Tcl Source Code

Artifact [c0adf17a0a]
Login

Artifact c0adf17a0a4645648e103713763528f30b350c33:

Attachment "httpparse.diff" to ticket [1358369fff] added by dkf 2005-11-18 21:01:13.
Index: ChangeLog
===================================================================
RCS file: /cvsroot/tcl/tcl/ChangeLog,v
retrieving revision 1.2873
diff -u -r1.2873 ChangeLog
--- ChangeLog	17 Nov 2005 23:33:41 -0000	1.2873
+++ ChangeLog	18 Nov 2005 13:56:47 -0000
@@ -1,3 +1,8 @@
+2005-11-18  Donal K. Fellows  <[email protected]>
+
+	* library/http/http.tcl (http::geturl): Improved syntactic validation
+	of URLs, and better error messages in some cases. [Bug 1358369]
+
 2005-11-17  Miguel Sofer <[email protected]>
 
 	* tests/namespace.test: fix comment
Index: library/http/http.tcl
===================================================================
RCS file: /cvsroot/tcl/tcl/library/http/http.tcl,v
retrieving revision 1.53
diff -u -r1.53 http.tcl
--- library/http/http.tcl	15 Nov 2005 22:58:18 -0000	1.53
+++ library/http/http.tcl	18 Nov 2005 13:56:47 -0000
@@ -1,30 +1,29 @@
 # http.tcl --
 #
-#	Client-side HTTP for GET, POST, and HEAD commands.
-#	These routines can be used in untrusted code that uses 
-#	the Safesock security policy.  These procedures use a 
-#	callback interface to avoid using vwait, which is not 
+#	Client-side HTTP for GET, POST, and HEAD commands. These routines can
+#	be used in untrusted code that uses the Safesock security policy. These
+#	procedures use a callback interface to avoid using vwait, which is not
 #	defined in the safe base.
 #
-# See the file "license.terms" for information on usage and
-# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
 # RCS: @(#) $Id: http.tcl,v 1.53 2005/11/15 22:58:18 dgp Exp $
 
 # Rough version history:
-# 1.0	Old http_get interface
-# 2.0	http:: namespace and http::geturl
-# 2.1	Added callbacks to handle arriving data, and timeouts
-# 2.2	Added ability to fetch into a channel
-# 2.3	Added SSL support, and ability to post from a channel
-#	This version also cleans up error cases and eliminates the
-#	"ioerror" status in favor of raising an error
-# 2.4	Added -binary option to http::geturl and charset element
-#	to the state array.
+# 1.0	Old http_get interface.
+# 2.0	http:: namespace and http::geturl.
+# 2.1	Added callbacks to handle arriving data, and timeouts.
+# 2.2	Added ability to fetch into a channel.
+# 2.3	Added SSL support, and ability to post from a channel. This version
+#	also cleans up error cases and eliminates the "ioerror" status in
+#	favor of raising an error
+# 2.4	Added -binary option to http::geturl and charset element to the state
+#	array.
 
 package require Tcl 8.4
-# keep this in sync with pkgIndex.tcl
-# and with the install directories in Makefiles
+# Keep this in sync with pkgIndex.tcl and with the install directories
+# in Makefiles
 package provide http 2.5.2
 
 namespace eval http {
@@ -39,12 +38,11 @@
     set http(-useragent) "Tcl http client package [package provide http]"
 
     proc init {} {
-	# Set up the map for quoting chars
-	# RFC3986 Section 2.3 say percent encode all except:
-	# "... percent-encoded octets in the ranges of ALPHA
-	# (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
-	# period (%2E), underscore (%5F), or tilde (%7E) should
-	# not be created by URI producers ..."
+	# Set up the map for quoting chars. RFC3986 Section 2.3 say percent
+	# encode all except: "... percent-encoded octets in the ranges of ALPHA
+	# (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
+	# underscore (%5F), or tilde (%7E) should not be created by URI
+	# producers ..."
 	for {set i 0} {$i <= 256} {incr i} {
 	    set c [format %c $i]
 	    if {![string match {[-._~a-zA-Z0-9]} $c]} {
@@ -152,9 +150,9 @@
 # Arguments:
 #	token	    Connection token.
 #	errormsg    (optional) If set, forces status to error.
-#       skipCB      (optional) If set, don't call the -command callback.  This
+#       skipCB      (optional) If set, don't call the -command callback. This
 #                   is useful when geturl wants to throw an exception instead
-#                   of calling the callback.  That way, the same error isn't
+#                   of calling the callback. That way, the same error isn't
 #                   reported to two places.
 #
 # Side Effects:
@@ -218,17 +216,16 @@
 #       args		Option value pairs. Valid options include:
 #				-blocksize, -validate, -headers, -timeout
 # Results:
-#	Returns a token for this connection.
-#	This token is the name of an array that the caller should
-#	unset to garbage collect the state.
+#	Returns a token for this connection. This token is the name of an array
+#	that the caller should unset to garbage collect the state.
 
 proc http::geturl { url args } {
     variable http
     variable urlTypes
     variable defaultCharset
 
-    # Initialize the state variable, an array.  We'll return the
-    # name of this array as the token for the transaction.
+    # Initialize the state variable, an array. We'll return the name of this
+    # array as the token for the transaction.
 
     if {![info exists http(uid)]} {
 	set http(uid) 0
@@ -301,17 +298,118 @@
     }
 
     # Validate URL, determine the server host and port, and check proxy case
-    # Recognize user:pass@host URLs also, although we do not do anything
-    # with that info yet.
+    # Recognize user:pass@host URLs also, although we do not do anything with
+    # that info yet.
 
-    set exp {^(([^:]*)://)?([^@]+@)?([^/:]+)(:([0-9]+))?(/.*)?$}
-    if {![regexp -nocase $exp $url x prefix proto user host y port srvurl]} {
+    # URLs have basically four parts.
+    # First, before the colon, is the protocol scheme (e.g. http)
+    # Second, for HTTP-like protocols, is the authority
+    #	The authority is preceded by // and lasts up to (but not including)
+    #	the following / and it identifies up to four parts, of which only one,
+    #	the host, is required (if an authority is present at all). All other
+    #	parts of the authority (user name, password, port number) are optional.
+    # Third is the resource name, which is split into two parts at a ?
+    #	The first part (from the single "/" up to "?") is the path, and the
+    #	second part (from that "?" up to "#") is the query. *HOWEVER*, we do
+    #	not need to separate them; we send the whole lot to the server.
+    # Fourth is the fragment identifier, which is everything after the first
+    #	"#" in the URL. The fragment identifier MUST NOT be sent to the server
+    #	and indeed, we don't bother to validate it (it could be an error to
+    #	pass it in here, but it's cheap to strip).
+    #
+    # An example of a URL that has all the parts:
+    #   http://jschmoe:[email protected]:8000/foo/bar.tml?q=foo#changes
+    # The "http" is the protocol, the user is "jschmoe", the password is
+    # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
+    # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
+    #
+    # Note that the RE actually combines the user and password parts, as
+    # recommended in RFC 3986. Indeed, that RFC states that putting passwords
+    # in URLs is a Really Bad Idea, something with which I would agree utterly.
+    # Also note that we do not currently support IPv6 addresses.
+    #
+    # From a validation perspective, we need to ensure that the parts of the
+    # URL that are going to the server are correctly encoded.
+
+    set URLmatcher {(?x)		# this is _expanded_ syntax
+	^
+	(?: (\w+) : )			# <protocol scheme>
+	(?: //
+	    (?:
+		(
+		    [^@/\#?]+		# <userinfo part of authority>
+		) @
+	    )?
+	    ( [^/:\#?]+ )		# <host part of authority>
+	    (?: : (\d+) )?		# <port part of authority>
+	)?
+	( / [^\#?]* (?: \? [^\#?]* ) )?	# <path> (including query)
+	(?: \# (.*) )?			# <fragment>
+	$
+    }
+
+    # Phase one: parse
+    if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} {
 	unset $token
 	return -code error "Unsupported URL: $url"
     }
+    # Phase two: validate
+    if {$host eq ""} {
+	# Caller has to provide a host name; we do not have a "default host"
+	# that would enable us to handle relative URLs.
+	unset $token
+	return -code error "Missing host part: $url"
+	# Note that we don't check the hostname for validity here; if it's
+	# invalid, we'll simply fail to resolve it later on.
+    }
+    if {$port ne "" && $port>65535} {
+	unset $token
+	return -code error "Invalid port number: $port"
+    }
+    # The user identification and resource identification parts of the URL can
+    # have encoded characters in them; take care!
+    if {$user ne ""} {
+	# Check for validity according to RFC 3986, Appendix A
+	set validityRE {(?xi)
+	    ^
+	    (?: [\w-.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+
+	    $
+	}
+	if {![regexp -- $validityRE $user]} {
+	    unset $token
+	    # Provide a better error message in this error case
+	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $path bad]} {
+		return -code error \
+			"Illegal encoding character usage \"$bad\" in URL user"
+	    }
+	    return -code error "Illegal characters in URL user"
+	}
+    }
+    if {$srvurl ne ""} {
+	# Check for validity according to RFC 3986, Appendix A
+	set validityRE {(?xi)
+	    ^
+	    # Path part (already must start with / character)
+	    (?:	      [\w-.~!$&'()*+,;=:@/]  | %[0-9a-f][0-9a-f] )*
+	    # Query part (optional, permits ? characters)
+	    (?: ? (?: [\w-.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )?
+	    $
+	}
+	if {![regexp -- $validityRE $srvurl]} {
+	    unset $token
+	    # Provide a better error message in this error case
+	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $path bad]} {
+		return -code error \
+			"Illegal encoding character usage \"$bad\" in URL path"
+	    }
+	    return -code error "Illegal characters in URL path"
+	}
+    } else {
+	set srvurl /
+    }
     if {[string length $proto] == 0} {
 	set proto http
-	set url ${proto}://$url
+	set url ${proto}:$url
     }
     if {![info exists urlTypes($proto)]} {
 	unset $token
@@ -323,20 +421,27 @@
     if {[string length $port] == 0} {
 	set port $defport
     }
-    if {[string length $srvurl] == 0} {
-	set srvurl /
-    }
-    if {[string length $proto] == 0} {
-	set url http://$url
-    }
-    set state(url) $url
     if {![catch {$http(-proxyfilter) $host} proxy]} {
 	set phost [lindex $proxy 0]
 	set pport [lindex $proxy 1]
     }
 
-    # If a timeout is specified we set up the after event
-    # and arrange for an asynchronous socket connection.
+    # OK, now reassemble into a full URL
+    set url ${proto}://
+    if {$user ne ""} {
+	append url $user
+	append url @
+    }
+    append url $host
+    if {$port != $defport} {
+	append url : $port
+    }
+    append url $srvurl
+    # Don't append the fragment!
+    set state(url) $url
+
+    # If a timeout is specified we set up the after event and arrange for an
+    # asynchronous socket connection.
 
     if {$state(-timeout) > 0} {
 	set state(after) [after $state(-timeout) \
@@ -346,8 +451,8 @@
 	set async ""
     }
 
-    # If we are using the proxy, we must pass in the full URL that
-    # includes the server name.
+    # If we are using the proxy, we must pass in the full URL that includes
+    # the server name.
 
     if {[info exists phost] && [string length $phost]} {
 	set srvurl $url
@@ -355,11 +460,11 @@
     } else {
 	set conStat [catch {eval $defcmd $async {$host $port}} s]
     }
-    if {$conStat} {
 
-	# something went wrong while trying to establish the connection
-	# Clean up after events and such, but DON'T call the command callback
-	# (if available) because we're going to throw an exception from here
+    if {$conStat} {
+	# Something went wrong while trying to establish the connection. Clean
+	# up after events and such, but DON'T call the command callback (if
+	# available) because we're going to throw an exception from here
 	# instead.
 	Finish $token "" 1
 	cleanup $token
@@ -367,16 +472,16 @@
     }
     set state(sock) $s
 
-    # Wait for the connection to complete
+    # Wait for the connection to complete.
 
     if {$state(-timeout) > 0} {
 	fileevent $s writable [list http::Connect $token]
 	http::wait $token
 
 	if {$state(status) eq "error"} {
-	    # something went wrong while trying to establish the connection
+	    # Something went wrong while trying to establish the connection.
 	    # Clean up after events and such, but DON'T call the command
-	    # callback (if available) because we're going to throw an 
+	    # callback (if available) because we're going to throw an
 	    # exception from here instead.
 	    set err [lindex $state(error) 0]
 	    cleanup $token
@@ -392,8 +497,8 @@
 
     fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize)
 
-    # The following is disallowed in safe interpreters, but the socket
-    # is already in non-blocking mode in that case.
+    # The following is disallowed in safe interpreters, but the socket is
+    # already in non-blocking mode in that case.
 
     catch {fconfigure $s -blocking off}
     set how GET
@@ -403,7 +508,7 @@
 	    set how POST
 	    set contDone 0
 	} else {
-	    # there's no query data
+	    # There's no query data.
 	    unset state(-query)
 	    set isQuery 0
 	}
@@ -421,8 +526,8 @@
 	puts $s "$how $srvurl HTTP/1.0"
 	puts $s "Accept: $http(-accept)"
 	if {$port == $defport} {
-	    # Don't add port in this case, to handle broken servers.
-	    # [Bug #504508]
+	    # Don't add port in this case, to handle broken servers. [Bug
+	    # 504508]
 	    puts $s "Host: $host"
 	} else {
 	    puts $s "Host: $host:$port"
@@ -440,8 +545,8 @@
 	    }
 	}
 	if {$isQueryChannel && $state(querylength) == 0} {
-	    # Try to determine size of data in channel
-	    # If we cannot seek, the surrounding catch will trap us
+	    # Try to determine size of data in channel. If we cannot seek, the
+	    # surrounding catch will trap us
 
 	    set start [tell $state(-querychannel)]
 	    seek $state(-querychannel) 0 end
@@ -450,22 +555,21 @@
 	    seek $state(-querychannel) $start
 	}
 
-	# Flush the request header and set up the fileevent that will
-	# either push the POST data or read the response.
+	# Flush the request header and set up the fileevent that will either
+	# push the POST data or read the response.
 	#
 	# fileevent note:
 	#
-	# It is possible to have both the read and write fileevents active
-	# at this point.  The only scenario it seems to affect is a server
-	# that closes the connection without reading the POST data.
-	# (e.g., early versions TclHttpd in various error cases).
-	# Depending on the platform, the client may or may not be able to
-	# get the response from the server because of the error it will
-	# get trying to write the post data.  Having both fileevents active
-	# changes the timing and the behavior, but no two platforms
-	# (among Solaris, Linux, and NT)  behave the same, and none 
-	# behave all that well in any case.  Servers should always read thier
-	# POST data if they expect the client to read their response.
+	# It is possible to have both the read and write fileevents active at
+	# this point. The only scenario it seems to affect is a server that
+	# closes the connection without reading the POST data. (e.g., early
+	# versions TclHttpd in various error cases). Depending on the platform,
+	# the client may or may not be able to get the response from the server
+	# because of the error it will get trying to write the post data.
+	# Having both fileevents active changes the timing and the behavior,
+	# but no two platforms (among Solaris, Linux, and NT) behave the same,
+	# and none behave all that well in any case. Servers should always read
+	# their POST data if they expect the client to read their response.
 
 	if {$isQuery || $isQueryChannel} {
 	    puts $s "Content-Type: $state(-type)"
@@ -482,9 +586,8 @@
 	}
 
 	if {! [info exists state(-command)]} {
-
-	    # geturl does EVERYTHING asynchronously, so if the user
-	    # calls it synchronously, we just do a wait here.
+	    # geturl does EVERYTHING asynchronously, so if the user calls it
+	    # synchronously, we just do a wait here.
 
 	    wait $token
 	    if {$state(status) eq "error"} {
@@ -494,8 +597,8 @@
 	    }
 	}
     } err]} {
-	# The socket probably was never connected,
-	# or the connection dropped later.
+	# The socket probably was never connected, or the connection dropped
+	# later.
 
 	# Clean up after events and such, but DON'T call the command callback
 	# (if available) because we're going to throw an exception from here
@@ -622,8 +725,8 @@
 	# Catch I/O errors on dead sockets
 
 	if {[info exists state(-query)]} {
-	    # Chop up large query strings so queryprogress callback
-	    # can give smooth feedback
+	    # Chop up large query strings so queryprogress callback can give
+	    # smooth feedback.
 
 	    puts -nonewline $s \
 		    [string range $state(-query) $state(queryoffset) \
@@ -644,8 +747,8 @@
 	    }
 	}
     } err]} {
-	# Do not call Finish here, but instead let the read half of
-	# the socket process whatever server reply there is to get.
+	# Do not call Finish here, but instead let the read half of the socket
+	# process whatever server reply there is to get.
 
 	set state(posterror) $err
 	set done 1
@@ -656,7 +759,7 @@
 	fileevent $s readable [list http::Event $token]
     }
 
-    # Callback to the client after we've completely handled everything
+    # Callback to the client after we've completely handled everything.
 
     if {[string length $state(-queryprogress)]} {
 	eval $state(-queryprogress) [list $token $state(querylength)\
@@ -698,10 +801,10 @@
 		    fconfigure $state(-channel) -translation binary
 		}
 	    } else {
-		# If we are getting text, set the incoming channel's
-		# encoding correctly.  iso8859-1 is the RFC default, but
-		# this could be any IANA charset.  However, we only know
-		# how to convert what we have encodings for.
+		# If we are getting text, set the incoming channel's encoding
+		# correctly. iso8859-1 is the RFC default, but this could be
+		# any IANA charset. However, we only know how to convert what
+		# we have encodings for.
 		set idx [lsearch -exact $encodings \
 			[string tolower $state(charset)]]
 		if {$idx >= 0} {
@@ -855,16 +958,15 @@
 
 # http::formatQuery --
 #
-#	See documentaion for details.
-#	Call http::formatQuery with an even number of arguments, where 
-#	the first is a name, the second is a value, the third is another 
-#	name, and so on.
+#	See documentaion for details. Call http::formatQuery with an even
+#	number of arguments, where the first is a name, the second is a value,
+#	the third is another name, and so on.
 #
 # Arguments:
 #	args	A list of name-value pairs.
 #
 # Results:
-#        TODO
+#	TODO
 
 proc http::formatQuery {args} {
     set result ""
@@ -894,9 +996,9 @@
     variable http
     variable formMap
 
-    # The spec says: "non-alphanumeric characters are replaced by '%HH'"
-    # Use a pre-computed map and [string map] to do the conversion
-    # (much faster than [regsub]/[subst]). [Bug 1020491]
+    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
+    # a pre-computed map and [string map] to do the conversion (much faster
+    # than [regsub]/[subst]). [Bug 1020491]
 
     if {$http(-urlencoding) ne ""} {
 	set string [encoding convertto $http(-urlencoding) $string]
@@ -913,7 +1015,7 @@
 }
 
 # http::ProxyRequired --
-#	Default proxy filter. 
+#	Default proxy filter.
 #
 # Arguments:
 #	host	The destination host
Index: tests/http.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/http.test,v
retrieving revision 1.39
diff -u -r1.39 http.test
--- tests/http.test	5 Oct 2005 05:03:38 -0000	1.39
+++ tests/http.test	18 Nov 2005 13:56:48 -0000
@@ -82,19 +82,15 @@
     }
 }
 
-
 test http-1.1 {http::config} {
     http::config
 } [list -accept */* -proxyfilter http::ProxyRequired -proxyhost {} -proxyport {} -urlencoding utf-8 -useragent "Tcl http client package $version"]
-
 test http-1.2 {http::config} {
     http::config -proxyfilter
 } http::ProxyRequired
-
 test http-1.3 {http::config} {
     catch {http::config -junk}
 } 1
-
 test http-1.4 {http::config} {
     set savedconf [http::config]
     http::config -proxyhost nowhere.come -proxyport 8080 \
@@ -104,11 +100,9 @@
     http::config {expand}$savedconf
     set x
 } {-accept */* -proxyfilter myFilter -proxyhost nowhere.come -proxyport 8080 -urlencoding iso8859-1 -useragent {Tcl Test Suite}}
-
 test http-1.5 {http::config} {
     list [catch {http::config -proxyhost {} -junk 8080} msg] $msg
 } {1 {Unknown option -junk, must be: -accept, -proxyfilter, -proxyhost, -proxyport, -urlencoding, -useragent}}
-
 test http-1.6 {http::config} {
     set enc [list [http::config -urlencoding]]
     http::config -urlencoding iso8859-1
@@ -117,7 +111,6 @@
     set enc
 } {utf-8 iso8859-1}
 
-
 test http-2.1 {http::reset} {
     catch {http::reset http#1}
 } 0
@@ -125,12 +118,10 @@
 test http-3.1 {http::geturl} {
     list [catch {http::geturl -bogus flag} msg] $msg
 } {1 {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -progress, -query, -queryblocksize, -querychannel, -queryprogress, -validate, -timeout, -type}}
-
 test http-3.2 {http::geturl} {
     catch {http::geturl http:junk} err
     set err
 } {Unsupported URL: http:junk}
-
 set url [info hostname]:$port
 set badurl [info hostname]:6666
 test http-3.3 {http::geturl} {
@@ -140,14 +131,12 @@
 <h1>Hello, World!</h1>
 <h2>GET /</h2>
 </body></html>"
-
 set tail /a/b/c
 set url [info hostname]:$port/a/b/c
 set fullurl http://user:pass@[info hostname]:$port/a/b/c
 set binurl [info hostname]:$port/binary
 set posturl [info hostname]:$port/post
 set badposturl [info hostname]:$port/droppost
-
 test http-3.4 {http::geturl} {
     set token [http::geturl $url]
     http::data $token
@@ -155,7 +144,6 @@
 <h1>Hello, World!</h1>
 <h2>GET $tail</h2>
 </body></html>"
-
 proc selfproxy {host} {
     global port
     return [list [info hostname] $port]
@@ -169,7 +157,6 @@
 <h1>Hello, World!</h1>
 <h2>GET http://$url</h2>
 </body></html>"
-
 test http-3.6 {http::geturl} {
     http::config -proxyfilter bogus
     set token [http::geturl $url]
@@ -179,7 +166,6 @@
 <h1>Hello, World!</h1>
 <h2>GET $tail</h2>
 </body></html>"
-
 test http-3.7 {http::geturl} {
     set token [http::geturl $url -headers {Pragma no-cache}]
     http::data $token
@@ -187,7 +173,6 @@
 <h1>Hello, World!</h1>
 <h2>GET $tail</h2>
 </body></html>"
-
 test http-3.8 {http::geturl} {
     set token [http::geturl $url -query Name=Value&Foo=Bar -timeout 2000]
     http::data $token
@@ -200,12 +185,10 @@
 <dt>Foo<dd>Bar
 </dl>
 </body></html>"
-
 test http-3.9 {http::geturl} {
     set token [http::geturl $url -validate 1]
     http::code $token
 } "HTTP/1.0 200 OK"
-
 test http-3.10 {http::geturl queryprogress} {
     set query foo=bar
     set sep ""
@@ -227,7 +210,6 @@
     http::wait $t
     list [http::status $t] [string length $query] $postProgress [http::data $t]
 } {ok 122879 {16384 32768 49152 65536 81920 98304 114688 122879} {Got 122879 bytes}}
-
 test http-3.11 {http::geturl querychannel with -command} {
     set query foo=bar
     set sep ""
@@ -263,14 +245,11 @@
     removeFile outdata
     set testRes
 } {ok 122879 {Got 122880 bytes} ok {PostStart {Got 122880 bytes}}}
-
-# On Linux platforms when the client and server are on the same
-# host, the client is unable to read the server's response one
-# it hits the write error.  The status is "eof"
-
-# On Windows, the http::wait procedure gets a
-# "connection reset by peer" error while reading the reply
-
+# On Linux platforms when the client and server are on the same host, the
+# client is unable to read the server's response one it hits the write error.
+# The status is "eof".
+# On Windows, the http::wait procedure gets a "connection reset by peer" error
+# while reading the reply.
 test http-3.12 {http::geturl querychannel with aborted request} {nonPortable} {
     set query foo=bar
     set sep ""
@@ -308,21 +287,49 @@
     removeFile outdata
     list [http::status $t] [http::code $t]
 } {ok {HTTP/1.0 200 Data follows}}
-
 test http-3.13 {http::geturl socket leak test} {
     set chanCount [llength [file channels]]
     for {set i 0} {$i < 3} {incr i} {
-	catch {http::geturl $badurl -timeout 5000} 
+	catch {http::geturl $badurl -timeout 5000}
     }
 
     # No extra channels should be taken
     expr {[llength [file channels]] == $chanCount}
 } 1
-
 test http-3.14 "http::geturl $fullurl" {
     set token [http::geturl $fullurl -validate 1]
     http::code $token
 } "HTTP/1.0 200 OK"
+test http-3.15 {http::geturl parse failures} -body {
+    http::geturl "{invalid}:url"
+} -returnCodes error -result {Unsupported URL: {invalid}:url}
+test http-3.16 {http::geturl parse failures} -body {
+    http::geturl http:relative/url
+} -returnCodes error -result {Unsupported URL: http:relative/url}
+test http-3.17 {http::geturl parse failures} -body {
+    http::geturl /absolute/url
+} -returnCodes error -result {Missing host part: /absolute/url}
+test http-3.18 {http::geturl parse failures} -body {
+    http::geturl http://somewhere:123456789/
+} -returnCodes error -result {Invalid port number: 123456789}
+test http-3.19 {http::geturl parse failures} -body {
+    http::geturl http://{user}@somewhere
+} -returnCodes error -result {Illegal characters in URL user}
+test http-3.20 {http::geturl parse failures} -body {
+    http::geturl http://%user@somewhere
+} -returnCodes error -result {Illegal encoding character usage "%us" in URL user}
+test http-3.21 {http::geturl parse failures} -body {
+    http::geturl http://somewhere/{path}
+} -returnCodes error -result {Illegal characters in URL path}
+test http-3.22 {http::geturl parse failures} -body {
+    http::geturl http://somewhere/%path
+} -returnCodes error -result {Illegal encoding character usage "%pa" in URL path}
+test http-3.23 {http::geturl parse failures} -body {
+    http::geturl http://somewhere/path?{query}
+} -returnCodes error -result {Illegal characters in URL path}
+test http-3.24 {http::geturl parse failures} -body {
+    http::geturl http://somewhere/path?%query
+} -returnCodes error -result {Illegal encoding character usage "%qu" in URL path}
 
 test http-4.1 {http::Event} {
     set token [http::geturl $url]
@@ -330,19 +337,16 @@
     array set meta $data(meta)
     expr ($data(totalsize) == $meta(Content-Length))
 } 1
-
 test http-4.2 {http::Event} {
     set token [http::geturl $url]
     upvar #0 $token data
     array set meta $data(meta)
     string compare $data(type) [string trim $meta(Content-Type)]
 } 0
-
 test http-4.3 {http::Event} {
     set token [http::geturl $url]
     http::code $token
 } {HTTP/1.0 200 Data follows}
-
 test http-4.4 {http::Event} {
     set testfile [makeFile "" testfile]
     set out [open $testfile w]
@@ -357,7 +361,6 @@
 <h1>Hello, World!</h1>
 <h2>GET $tail</h2>
 </body></html>"
-
 test http-4.5 {http::Event} {
     set testfile [makeFile "" testfile]
     set out [open $testfile w]
@@ -367,7 +370,6 @@
     removeFile $testfile
     expr $data(currentsize) == $data(totalsize)
 } 1
-
 test http-4.6 {http::Event} {
     set testfile [makeFile "" testfile]
     set out [open $testfile w]
@@ -380,7 +382,6 @@
     removeFile $testfile
     set x
 } "$bindata$binurl"
-
 proc myProgress {token total current} {
     global progress httpLog
     if {[info exists httpLog] && $httpLog} {
@@ -391,7 +392,7 @@
 if 0 {
     # This test hangs on Windows95 because the client never gets EOF
     set httpLog 1
-    test http-4.6 {http::Event} {
+    test http-4.6.1 {http::Event} knownBug {
 	set token [http::geturl $url -blocksize 50 -progress myProgress]
 	set progress
     } {111 111}
@@ -412,38 +413,29 @@
     set token [http::geturl $url -progress myProgress]
     http::size $token
 } {111}
-
 # Timeout cases
-
-#	Short timeout to working server  (the test server)
-#	This lets us try a reset during the connection
-
+#	Short timeout to working server (the test server). This lets us try a
+#	reset during the connection.
 test http-4.11 {http::Event} {
     set token [http::geturl $url -timeout 1 -command {#}]
     http::reset $token
     http::status $token
 } {reset}
-
-#	Longer timeout with reset
-
+#	Longer timeout with reset.
 test http-4.12 {http::Event} {
     set token [http::geturl $url/?timeout=10 -command {#}]
     http::reset $token
     http::status $token
 } {reset}
-
-#	Medium timeout to working server that waits even longer
-#	The timeout hits while waiting for a reply
-
+#	Medium timeout to working server that waits even longer. The timeout
+#	hits while waiting for a reply.
 test http-4.13 {http::Event} {
     set token [http::geturl $url?timeout=30 -timeout 10 -command {#}]
     http::wait $token
     http::status $token
 } {timeout}
-
-#	Longer timeout to good host, bad port, gets an error
-#	after the connection "completes" but the socket is bad
-
+#	Longer timeout to good host, bad port, gets an error after the
+#	connection "completes" but the socket is bad.
 test http-4.14 {http::Event} {
     set code [catch {
 	set token [http::geturl $badurl/?timeout=10 -timeout 10000 -command {#}]
@@ -456,9 +448,7 @@
     # error code varies among platforms.
     list $code [regexp {(connect failed|couldn't open socket)} $err]
 } {1 1}
-
 # Bogus host
-
 test http-4.15 {http::Event} {
     # This test may fail if you use a proxy server.  That is to be
     # expected and is not a problem with Tcl.
@@ -474,17 +464,13 @@
 test http-5.1 {http::formatQuery} {
     http::formatQuery name1 value1 name2 "value two"
 } {name1=value1&name2=value+two}
-
 # test http-5.2 obsoleted by 5.4 and 5.5 with http 2.5
-
 test http-5.3 {http::formatQuery} {
     http::formatQuery lines "line1\nline2\nline3"
 } {lines=line1%0d%0aline2%0d%0aline3}
-
 test http-5.4 {http::formatQuery} {
     http::formatQuery name1 ~bwelch name2 \xa1\xa2\xa2
 } {name1=~bwelch&name2=%c2%a1%c2%a2%c2%a2}
-
 test http-5.5 {http::formatQuery} {
     set enc [http::config -urlencoding]
     http::config -urlencoding iso8859-1
@@ -508,13 +494,11 @@
 test http-7.1 {http::mapReply} {
     http::mapReply "abc\$\[\]\"\\()\}\{"
 } {abc%24%5b%5d%22%5c%28%29%7d%7b}
-
 test http-7.2 {http::mapReply} {
     # RFC 2718 specifies that we pass urlencoding on utf-8 chars by default,
     # so make sure this gets converted to utf-8 then urlencoded.
     http::mapReply "\u2208"
 } {%e2%88%88}
-
 test http-7.3 {http::formatQuery} {
     set enc [http::config -urlencoding]
     # this would be reverting to http <=2.4 behavior
@@ -523,7 +507,6 @@
     http::config -urlencoding $enc
     set res
 } [list 1 "can't read \"formMap(\u2208)\": no such element in array"]
-
 test http-7.4 {http::formatQuery} {
     set enc [http::config -urlencoding]
     # this would be reverting to http <=2.4 behavior w/o errors