Tcl Source Code

Artifact [92787796d1]
Login

Artifact 92787796d1acfddc019616ef4931f0ca7b06d89f:

Attachment "219210.diff.1" to ticket [219210ffff] added by andreas_kupries 2001-04-04 21:36:54.
? unix/httpd
Index: ChangeLog
===================================================================
RCS file: /cvsroot/tcl/tcl/ChangeLog,v
retrieving revision 1.390
diff -u -r1.390 ChangeLog
--- ChangeLog	2001/03/30 00:56:28	1.390
+++ ChangeLog	2001/04/04 14:30:23
@@ -1,3 +1,24 @@
+2001-04-04  Andreas Kupries <[email protected]>
+
+	* doc/fcopy.n: Updated to reflect the extended behaviour of 'fcopy'.
+
+	* tests/io.test: Added tests 'io-52.9', 'io-52.10' and 'io-52.11'
+	  to test the handling of encodings by 'fcopy' / 'TclCopychannel'
+	  [Bug #209210].
+
+	* tests/iogt.test: Changed the tests iogt-2.1, -2.2 and -2.3. They
+	  now configure the used channels for encoding 'binary' and thus
+	  force them to use the old original fcopy loop. Thus ensures that
+	  the results don't change [Bug #209210].
+
+	* generic/tclIO.c: Split of both 'Tcl_ReadChars' and
+	  'Tcl_WriteChars' into a public error checking and an internal
+	  working part. The public functions now use the new internal
+	  ones. The new functions are 'DoReadChars' and 'DoWriteChars'.
+	  Extended 'CopyData' to use the new functions 'DoXChars' when
+	  required by the encodings on the input and output channels
+	  [Bug #209210].
+
 2001-03-29  Mo DeJong  <[email protected]>
 
 	* tests/interp.test: Print out warning when
Index: doc/fcopy.n
===================================================================
RCS file: /cvsroot/tcl/tcl/doc/fcopy.n,v
retrieving revision 1.2
diff -u -r1.2 fcopy.n
--- doc/fcopy.n	1998/09/14 18:39:52	1.2
+++ doc/fcopy.n	2001/04/04 14:30:23
@@ -71,6 +71,19 @@
 Only the number of bytes written to \fIoutchan\fR is reported,
 either as the return value of a synchronous \fBfcopy\fP or
 as the argument to the callback for an asynchronous \fBfcopy\fP.
+.PP
+\fBfcopy\fR obeys the encodings configured for the channels. This
+means that the incoming characters are converted internally first
+UTF-8 and then into the encoding of the channel \fBfcopy\fR writes
+to. See the manual entry for \fBfconfigure\fR for details on the
+\fB\-encoding\fR option. No conversion is done if both channels are
+set to encoding "binary". If only the output channel is set to
+encoding "binary" the system will write the internal UTF-8
+representation of the incoming characters. If only the input channel
+is set to encoding "binary" the system will assume that the incoming
+bytes are valid UTF-8 characters and convert them according to the
+output encoding. The behaviour of the system for bytes which are not
+valid UTF-8 characters is in this case undefined.
 
 .SH EXAMPLE
 .PP
Index: generic/tclIO.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclIO.c,v
retrieving revision 1.28
diff -u -r1.28 tclIO.c
--- generic/tclIO.c	2001/01/30 17:32:06	1.28
+++ generic/tclIO.c	2001/04/04 14:30:41
@@ -112,6 +112,10 @@
 				int slen));
 static int		DoWrite _ANSI_ARGS_((Channel *chanPtr, char *src,
 				int srcLen));
+static int		DoReadChars _ANSI_ARGS_ ((Channel* chan,
+				Tcl_Obj* objPtr, int toRead, int appendFlag));
+static int		DoWriteChars _ANSI_ARGS_ ((Channel* chan,
+				CONST char* src, int len));
 static int		FilterInputBytes _ANSI_ARGS_((Channel *chanPtr,
 				GetsState *statePtr));
 static int		FlushChannel _ANSI_ARGS_((Tcl_Interp *interp,
@@ -2466,7 +2470,10 @@
  *	Puts a sequence of bytes into an output buffer, may queue the
  *	buffer for output if it gets full, and also remembers whether the
  *	current buffer is ready e.g. if it contains a newline and we are in
- *	line buffering mode.
+ *	line buffering mode. Compensates stacking, i.e. will redirect the
+ *	data from the specified channel to the topmost channel in a stack.
+ *
+ *	No encoding conversions are applied to the bytes being read.
  *
  * Results:
  *	The number of bytes written or -1 in case of error. If -1,
@@ -2513,8 +2520,11 @@
  *	Puts a sequence of bytes into an output buffer, may queue the
  *	buffer for output if it gets full, and also remembers whether the
  *	current buffer is ready e.g. if it contains a newline and we are in
- *	line buffering mode.
+ *	line buffering mode. Writes directly to the driver of the channel,
+ *	does not compensate for stacking.
  *
+ *	No encoding conversions are applied to the bytes being read.
+ *
  * Results:
  *	The number of bytes written or -1 in case of error. If -1,
  *	Tcl_GetErrno will return the error code.
@@ -2569,7 +2579,8 @@
  *	using the channel's current encoding, may queue the buffer for
  *	output if it gets full, and also remembers whether the current
  *	buffer is ready e.g. if it contains a newline and we are in
- *	line buffering mode.
+ *	line buffering mode. Compensates stacking, i.e. will redirect the
+ *	data from the specified channel to the topmost channel in a stack.
  *
  * Results:
  *	The number of bytes written or -1 in case of error. If -1,
@@ -2589,18 +2600,55 @@
     int len;			/* Length of string in bytes, or < 0 for 
 				 * strlen(). */
 {
-    /*
-     * Always use the topmost channel of the stack
-     */
-    Channel *chanPtr;
     ChannelState *statePtr;	/* state info for channel */
 
     statePtr = ((Channel *) chan)->state;
-    chanPtr  = statePtr->topChanPtr;
 
     if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) {
 	return -1;
     }
+
+    return DoWriteChars ((Channel*) chan, src, len);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * DoWriteChars --
+ *
+ *	Takes a sequence of UTF-8 characters and converts them for output
+ *	using the channel's current encoding, may queue the buffer for
+ *	output if it gets full, and also remembers whether the current
+ *	buffer is ready e.g. if it contains a newline and we are in
+ *	line buffering mode. Compensates stacking, i.e. will redirect the
+ *	data from the specified channel to the topmost channel in a stack.
+ *
+ * Results:
+ *	The number of bytes written or -1 in case of error. If -1,
+ *	Tcl_GetErrno will return the error code.
+ *
+ * Side effects:
+ *	May buffer up output and may cause output to be produced on the
+ *	channel.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+DoWriteChars(chanPtr, src, len)
+    Channel* chanPtr;		/* The channel to buffer output for. */
+    CONST char *src;		/* UTF-8 characters to queue in output buffer. */
+    int len;			/* Length of string in bytes, or < 0 for 
+				 * strlen(). */
+{
+    /*
+     * Always use the topmost channel of the stack
+     */
+    ChannelState *statePtr;	/* state info for channel */
+
+    statePtr = chanPtr->state;
+    chanPtr  = statePtr->topChanPtr;
+
     if (len < 0) {
         len = strlen(src);
     }
@@ -3995,12 +4043,8 @@
 				 * of the object. */
 
 {
-    Channel *chanPtr = (Channel *) chan;
-    ChannelState *statePtr = chanPtr->state;	/* state info for channel */
-    ChannelBuffer *bufPtr;
-    int offset, factor, copied, copiedNow, result;
-    Tcl_Encoding encoding;
-#define UTF_EXPANSION_FACTOR	1024
+    Channel*      chanPtr  = (Channel *) chan;
+    ChannelState* statePtr = chanPtr->state;	/* state info for channel */
     
     /*
      * This operation should occur at the top of a channel stack.
@@ -4009,12 +4053,64 @@
     chanPtr = statePtr->topChanPtr;
 
     if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) {
-	copied = -1;
-	goto done;
+        /*
+	 * Update the notifier state so we don't block while there is still
+	 * data in the buffers.
+	 */
+        UpdateInterest(chanPtr);
+	return -1;
     }
 
+    return DoReadChars (chanPtr, objPtr, toRead, appendFlag);
+}
+/*
+ *---------------------------------------------------------------------------
+ *
+ * DoReadChars --
+ *
+ *	Reads from the channel until the requested number of characters
+ *	have been seen, EOF is seen, or the channel would block.  EOL
+ *	and EOF translation is done.  If reading binary data, the raw
+ *	bytes are wrapped in a Tcl byte array object.  Otherwise, the raw
+ *	bytes are converted to UTF-8 using the channel's current encoding
+ *	and stored in a Tcl string object.
+ *
+ * Results:
+ *	The number of characters read, or -1 on error. Use Tcl_GetErrno()
+ *	to retrieve the error code for the error that occurred.
+ *
+ * Side effects:
+ *	May cause input to be buffered.
+ *
+ *---------------------------------------------------------------------------
+ */
+ 
+int
+DoReadChars(chanPtr, objPtr, toRead, appendFlag)
+    Channel* chanPtr;		/* The channel to read. */
+    Tcl_Obj *objPtr;		/* Input data is stored in this object. */
+    int toRead;			/* Maximum number of characters to store,
+				 * or -1 to read all available data (up to EOF
+				 * or when channel blocks). */
+    int appendFlag;		/* If non-zero, data read from the channel
+				 * will be appended to the object.  Otherwise,
+				 * the data will replace the existing contents
+				 * of the object. */
+
+{
+    ChannelState *statePtr = chanPtr->state;	/* state info for channel */
+    ChannelBuffer *bufPtr;
+    int offset, factor, copied, copiedNow, result;
+    Tcl_Encoding encoding;
+#define UTF_EXPANSION_FACTOR	1024
+    
+    /*
+     * This operation should occur at the top of a channel stack.
+     */
+
+    chanPtr  = statePtr->topChanPtr;
     encoding = statePtr->encoding;
-    factor = UTF_EXPANSION_FACTOR;
+    factor   = UTF_EXPANSION_FACTOR;
 
     if (appendFlag == 0) {
 	if (encoding == NULL) {
@@ -6995,6 +7091,9 @@
     int result = TCL_OK;
     int size;
     int total;
+    int sizeb;
+    Tcl_Obj* bufObj = NULL;
+    char* buffer;
 
     inChan	= (Tcl_Channel) csPtr->readPtr;
     outChan	= (Tcl_Channel) csPtr->writePtr;
@@ -7011,6 +7110,11 @@
      * thus gets the bottom of the stack.
      */
 
+    if (inStatePtr->encoding != NULL) {
+        bufObj = Tcl_NewObj ();
+	Tcl_IncrRefCount (bufObj);
+    }
+
     while (csPtr->toRead != 0) {
 
 	/*
@@ -7036,8 +7140,13 @@
 	    size = csPtr->bufSize;
 	} else {
 	    size = csPtr->toRead;
+	}
+
+	if (inStatePtr->encoding == NULL) {
+	    size = DoRead(inStatePtr->topChanPtr, csPtr->buffer, size);
+	} else {
+	    size = DoReadChars(inStatePtr->topChanPtr, bufObj, size, 0 /* No append */);
 	}
-	size = DoRead(inStatePtr->topChanPtr, csPtr->buffer, size);
 
 	if (size < 0) {
 	    readError:
@@ -7063,6 +7172,10 @@
 		Tcl_CreateChannelHandler(inChan, TCL_READABLE,
 			CopyEventProc, (ClientData) csPtr);
 	    }
+	    if (bufObj != (Tcl_Obj*) NULL) {
+	        Tcl_DecrRefCount (bufObj);
+		bufObj = (Tcl_Obj*) NULL;
+	    }
 	    return TCL_OK;
 	}
 
@@ -7070,8 +7183,27 @@
 	 * Now write the buffer out.
 	 */
 
-	size = DoWrite(outStatePtr->topChanPtr, csPtr->buffer, size);
-	if (size < 0) {
+	if (inStatePtr->encoding == NULL) {
+	    buffer = csPtr->buffer;
+	    sizeb = size;
+	} else {
+	    buffer = Tcl_GetStringFromObj (bufObj, &sizeb);
+	}
+
+	if (outStatePtr->encoding == NULL) {
+	    sizeb = DoWrite(outStatePtr->topChanPtr, buffer, sizeb);
+	} else {
+	    sizeb = DoWriteChars(outStatePtr->topChanPtr, buffer, sizeb);
+	}
+
+	if (inStatePtr->encoding == NULL) {
+	    /* Both read and write functions counted bytes */
+	    size = sizeb;
+	} /* else : Read counted characters, write counted bytes, i.e. size != sizeb
+	   * Use sizeb for error checking and size for counting transfered amount.
+	   */
+
+	if (sizeb < 0) {
 	    writeError:
 	    errObj = Tcl_NewObj();
 	    Tcl_AppendStringsToObj(errObj, "error writing \"",
@@ -7106,6 +7238,10 @@
 		Tcl_CreateChannelHandler(outChan, TCL_WRITABLE,
 			CopyEventProc, (ClientData) csPtr);
 	    }
+	    if (bufObj != (Tcl_Obj*) NULL) {
+	        Tcl_DecrRefCount (bufObj);
+		bufObj = (Tcl_Obj*) NULL;
+	    }
 	    return TCL_OK;
 	}
 
@@ -7124,8 +7260,17 @@
 		Tcl_CreateChannelHandler(outChan, TCL_WRITABLE,
 			CopyEventProc, (ClientData) csPtr);
 	    }
+	    if (bufObj != (Tcl_Obj*) NULL) {
+	        Tcl_DecrRefCount (bufObj);
+		bufObj = (Tcl_Obj*) NULL;
+	    }
 	    return TCL_OK;
 	}
+    } /* while */
+
+    if (bufObj != (Tcl_Obj*) NULL) {
+        Tcl_DecrRefCount (bufObj);
+	bufObj = (Tcl_Obj*) NULL;
     }
 
     /*
@@ -7175,6 +7320,8 @@
  * DoRead --
  *
  *	Reads a given number of bytes from a channel.
+ *
+ *	No encoding conversions are applied to the bytes being read.
  *
  * Results:
  *	The number of characters read, or -1 on error. Use Tcl_GetErrno()
Index: tests/io.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/io.test,v
retrieving revision 1.14
diff -u -r1.14 io.test
--- tests/io.test	2000/04/10 17:19:00	1.14
+++ tests/io.test	2001/04/04 14:30:49
@@ -6369,6 +6369,84 @@
     list $s0 [file size test1]
 } {40 40}
 
+# Empty files, to register them with the test facility
+makeFile {} kyrillic.txt
+makeFile {} utf8-fcopy.txt
+makeFile {} utf8-rp.txt
+
+# Create kyrillic file
+set out [open kyrillic.txt w]
+fconfigure $out -encoding koi8-r
+puts       $out "\u0410\u0410"
+close      $out
+
+test io-52.9 {TclCopyChannel & encodings} {
+    # Copy kyrillic to UTF-8, using fcopy.
+
+    set in  [open kyrillic.txt r]
+    set out [open utf8-fcopy.txt w]
+
+    fconfigure $in  -encoding koi8-r
+    fconfigure $out -encoding utf-8
+
+    fcopy $in $out
+    close $in
+    close $out
+
+    # Do the same again, but differently (read/puts).
+
+    set in  [open kyrillic.txt r]
+    set out [open utf8-rp.txt w]
+
+    fconfigure $in  -encoding koi8-r
+    fconfigure $out -encoding utf-8
+
+    puts -nonewline $out [read $in]
+
+    close $in
+    close $out
+
+    list \
+	    [file size kyrillic.txt]   \
+	    [file size utf8-fcopy.txt] \
+	    [file size utf8-rp.txt]
+} {3 5 5}
+
+test io-52.10 {TclCopyChannel & encodings} {
+    # encoding to binary (=> implies that the
+    # internal utf-8 is written)
+
+    set in  [open kyrillic.txt r]
+    set out [open utf8-fcopy.txt w]
+
+    fconfigure $in  -encoding koi8-r
+    fconfigure $out -encoding binary
+
+    fcopy $in $out
+    close $in
+    close $out
+
+    file size utf8-fcopy.txt
+} 5
+
+test io-52.11 {TclCopyChannel & encodings} {
+    # binary to encoding => the input has to be
+    # in utf-8 to make sense to the encoder
+
+    set in  [open utf8-fcopy.txt r]
+    set out [open kyrillic.txt w]
+
+    fconfigure $in  -encoding binary
+    fconfigure $out -encoding koi8-r
+
+    fcopy $in $out
+    close $in
+    close $out
+
+    file size kyrillic.txt
+} 3
+
+
 test io-53.1 {CopyData} {
     removeFile test1
     set f1 [open $thisScript]
@@ -6730,16 +6808,3 @@
 ::tcltest::restoreState
 ::tcltest::cleanupTests
 return
-
-
-
-
-
-
-
-
-
-
-
-
-
Index: tests/iogt.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/iogt.test,v
retrieving revision 1.2
diff -u -r1.2 iogt.test
--- tests/iogt.test	2000/09/28 06:38:22	1.2
+++ tests/iogt.test	2001/04/04 14:30:50
@@ -484,8 +484,8 @@
     audit_ops ain  -attach $fin
     audit_ops aout -attach $fout
 
-    fconfigure $fin  -buffersize 10
-    fconfigure $fout -buffersize 5
+    fconfigure $fin  -buffersize 10 -encoding binary
+    fconfigure $fout -buffersize 5  -encoding binary
 
     fcopy $fin $fout
 
@@ -535,8 +535,8 @@
     audit_flow ain  -attach $fin
     audit_flow aout -attach $fout
 
-    fconfigure $fin  -buffersize 10
-    fconfigure $fout -buffersize 5
+    fconfigure $fin  -buffersize 10 -encoding binary
+    fconfigure $fout -buffersize 5  -encoding binary
 
     fcopy $fin $fout
 
@@ -591,8 +591,8 @@
     audit_flow trail -attach $fin
     audit_flow trail -attach $fout
 
-    fconfigure $fin  -buffersize 20
-    fconfigure $fout -buffersize 10
+    fconfigure $fin  -buffersize 20 -encoding binary
+    fconfigure $fout -buffersize 10 -encoding binary
 
     fcopy $fin $fout