Attachment "http.tcl.patch" to
ticket [2da8d6fb3d]
added by
andrew.brooks
2018-11-08 18:11:15.
--- tcl/library/http/http.tcl 2018-11-08 11:11:36.000000000 -0600
+++ fixed/library/http/http.tcl 2018-11-08 11:11:31.000000000 -0600
@@ -748,6 +748,7 @@
-validate boolean
}
set state(charset) $defaultCharset
+ set state(explicit_charset) 0
set options {
-binary -blocksize -channel -command -handler -headers -keepalive
-method -myaddr -progress -protocol -query -queryblocksize
@@ -2710,9 +2711,11 @@
{charset\s*=\s*\"((?:[^""]|\\\")*)\"} \
$state(type) -> cs]} {
set state(charset) [string map {{\"} \"} $cs]
+ set state(explicit_charset) 1
} else {
- regexp -nocase {charset\s*=\s*(\S+?);?} \
- $state(type) -> state(charset)
+ set state(explicit_charset) \
+ [regexp -nocase {charset\s*=\s*(\S+?);?} \
+ $state(type) -> state(charset)]
}
}
content-length {
@@ -3320,11 +3323,13 @@
return
}
- if {!$state(binary)} {
+ if {!$state(binary) || $state(explicit_charset)} {
# If we are getting text, set the incoming channel's encoding
# correctly. iso8859-1 is the RFC default, but this could be any
- # IANA charset. However, we only know how to convert what we have
- # encodings for.
+ # IANA charset. If we were given an explicit charset in the
+ # Content-Type header, RFC2068 3.7.1 demands that we respect the
+ # explicit charset if possible. However, we only know how to
+ # convert what we have encodings for.
set enc [CharsetToEncoding $state(charset)]
if {$enc ne "binary"} {