Tcl Source Code

Artifact [acae450d36]
Login

Artifact acae450d36eb6c469475b8c8adf81972499c2394aff87ef9bd11f1ae44fd9d71:

Attachment "http.tcl.patch" to ticket [2da8d6fb3d] added by andrew.brooks 2018-11-08 18:11:15. (unpublished)
--- tcl/library/http/http.tcl	2018-11-08 11:11:36.000000000 -0600
+++ fixed/library/http/http.tcl	2018-11-08 11:11:31.000000000 -0600
@@ -748,6 +748,7 @@
 	-validate	boolean
     }
     set state(charset)	$defaultCharset
+    set state(explicit_charset) 0
     set options {
 	-binary -blocksize -channel -command -handler -headers -keepalive
 	-method -myaddr -progress -protocol -query -queryblocksize
@@ -2710,9 +2711,11 @@
 				    {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \
 				    $state(type) -> cs]} {
 				set state(charset) [string map {{\"} \"} $cs]
+				set state(explicit_charset) 1
 			    } else {
-				regexp -nocase {charset\s*=\s*(\S+?);?} \
-					$state(type) -> state(charset)
+				set state(explicit_charset) \
+				    [regexp -nocase {charset\s*=\s*(\S+?);?} \
+					 $state(type) -> state(charset)]
 			    }
 			}
 			content-length {
@@ -3320,11 +3323,13 @@
 	    return
 	}
 
-	if {!$state(binary)} {
+	if {!$state(binary) || $state(explicit_charset)} {
 	    # If we are getting text, set the incoming channel's encoding
 	    # correctly.  iso8859-1 is the RFC default, but this could be any
-	    # IANA charset.  However, we only know how to convert what we have
-	    # encodings for.
+	    # IANA charset.  If we were given an explicit charset in the
+	    # Content-Type header, RFC2068 3.7.1 demands that we respect the
+	    # explicit charset if possible.  However, we only know how to
+	    # convert what we have encodings for.
 
 	    set enc [CharsetToEncoding $state(charset)]
 	    if {$enc ne "binary"} {