Tcl Source Code

Artifact [df3d1b49e3]
Login

Artifact df3d1b49e39fc626369e154cd2f28ba769ce7439:

Attachment "stringUtf.diff" to ticket [686782ffff] added by vincentdarley 2003-02-18 17:04:16.
Index: tclIO.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclIO.c,v
retrieving revision 1.60
diff -u -r1.60 tclIO.c
--- tclIO.c	17 Feb 2003 22:32:05 -0000	1.60
+++ tclIO.c	18 Feb 2003 09:54:14 -0000
@@ -3511,6 +3511,7 @@
      * find a newline in the available input.
      */
 
+    TclEnsurePureUtfString(objPtr);
     Tcl_GetStringFromObj(objPtr, &oldLength);
     oldFlags = statePtr->inputEncodingFlags;
     oldState = statePtr->inputEncodingState;
@@ -4397,13 +4398,14 @@
 	     * we must ensure that this is actually a string
 	     * object (otherwise it might have been pure Unicode).
 	     */
-	    Tcl_GetString(objPtr);
+	    TclEnsurePureUtfString(objPtr);
 	}
 	offset = 0;
     } else {
 	if (encoding == NULL) {
 	    Tcl_GetByteArrayFromObj(objPtr, &offset);
 	} else {
+	    TclEnsurePureUtfString(objPtr);
 	    Tcl_GetStringFromObj(objPtr, &offset);
 	}
     }
@@ -4609,7 +4611,10 @@
     Tcl_Obj *objPtr;		/* Input data is appended to this object.
 				 * objPtr->length is how much space has been
 				 * allocated to hold data, not how many bytes
-				 * of data have been stored in the object. */
+				 * of data have been stored in the object. 
+				 * This object must be pure utf-8, with no
+				 * Unicode representation, since we will 
+				 * modify objPtr->bytes directly. */
     int charsToRead;		/* Maximum number of characters to store,
 				 * or -1 to get all available characters.
 				 * Characters are obtained from the first
Index: tclInt.h
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclInt.h,v
retrieving revision 1.118
diff -u -r1.118 tclInt.h
--- tclInt.h	10 Feb 2003 10:26:25 -0000	1.118
+++ tclInt.h	18 Feb 2003 09:54:21 -0000
@@ -1643,6 +1643,7 @@
 			    CONST char *value));
 EXTERN void		TclExpandTokenArray _ANSI_ARGS_((
 			    Tcl_Parse *parsePtr));
+EXTERN void		TclEnsurePureUtfString _ANSI_ARGS_((Tcl_Obj *objPtr));
 EXTERN int		TclFileAttrsCmd _ANSI_ARGS_((Tcl_Interp *interp,
 			    int objc, Tcl_Obj *CONST objv[]));
 EXTERN int		TclFileCopyCmd _ANSI_ARGS_((Tcl_Interp *interp, 
Index: tclStringObj.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclStringObj.c,v
retrieving revision 1.30
diff -u -r1.30 tclStringObj.c
--- tclStringObj.c	15 Feb 2003 02:14:33 -0000	1.30
+++ tclStringObj.c	18 Feb 2003 09:54:25 -0000
@@ -727,6 +727,49 @@
 /*
  *----------------------------------------------------------------------
  *
+ * TclEnsurePureUtf8String --
+ *
+ *	Takes an object and ensures that objPtr->bytes is the only
+ *	valid rep of the object. This then allows the calling code
+ *	to safely manipulate/modify objPtr->bytes directly.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	If the object has other representations, they will be
+ *	freed.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclEnsurePureUtfString(objPtr)
+    register Tcl_Obj *objPtr;	/* Pointer to object.  This object must
+			         * not currently be shared. */
+{
+    String *stringPtr;
+
+    SetStringFromAny(NULL, objPtr);
+    
+    if (objPtr->bytes == NULL) {
+	/* Pure unicode string.  Ensure we have a objPtr->bytes rep */
+	Tcl_GetString(objPtr);
+    }
+
+    stringPtr = GET_STRING(objPtr);
+
+    /* Now we just need to get rid of any Unicode representation */
+    if (stringPtr->uallocated > 0) {
+	stringPtr = (String *) ckrealloc((char*) stringPtr, STRING_SIZE(0));
+	SET_STRING(objPtr, stringPtr);
+	stringPtr->uallocated = 0;
+    }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_SetObjLength --
  *
  *	This procedure changes the length of the string representation