Attachment "stringUtf.diff" to
ticket [686782ffff]
added by
vincentdarley
2003-02-18 17:04:16.
Index: tclIO.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclIO.c,v
retrieving revision 1.60
diff -u -r1.60 tclIO.c
--- tclIO.c 17 Feb 2003 22:32:05 -0000 1.60
+++ tclIO.c 18 Feb 2003 09:54:14 -0000
@@ -3511,6 +3511,7 @@
* find a newline in the available input.
*/
+ TclEnsurePureUtfString(objPtr);
Tcl_GetStringFromObj(objPtr, &oldLength);
oldFlags = statePtr->inputEncodingFlags;
oldState = statePtr->inputEncodingState;
@@ -4397,13 +4398,14 @@
* we must ensure that this is actually a string
* object (otherwise it might have been pure Unicode).
*/
- Tcl_GetString(objPtr);
+ TclEnsurePureUtfString(objPtr);
}
offset = 0;
} else {
if (encoding == NULL) {
Tcl_GetByteArrayFromObj(objPtr, &offset);
} else {
+ TclEnsurePureUtfString(objPtr);
Tcl_GetStringFromObj(objPtr, &offset);
}
}
@@ -4609,7 +4611,10 @@
Tcl_Obj *objPtr; /* Input data is appended to this object.
* objPtr->length is how much space has been
* allocated to hold data, not how many bytes
- * of data have been stored in the object. */
+ * of data have been stored in the object.
+ * This object must be pure utf-8, with no
+ * Unicode representation, since we will
+ * modify objPtr->bytes directly. */
int charsToRead; /* Maximum number of characters to store,
* or -1 to get all available characters.
* Characters are obtained from the first
Index: tclInt.h
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclInt.h,v
retrieving revision 1.118
diff -u -r1.118 tclInt.h
--- tclInt.h 10 Feb 2003 10:26:25 -0000 1.118
+++ tclInt.h 18 Feb 2003 09:54:21 -0000
@@ -1643,6 +1643,7 @@
CONST char *value));
EXTERN void TclExpandTokenArray _ANSI_ARGS_((
Tcl_Parse *parsePtr));
+EXTERN void TclEnsurePureUtfString _ANSI_ARGS_((Tcl_Obj *objPtr));
EXTERN int TclFileAttrsCmd _ANSI_ARGS_((Tcl_Interp *interp,
int objc, Tcl_Obj *CONST objv[]));
EXTERN int TclFileCopyCmd _ANSI_ARGS_((Tcl_Interp *interp,
Index: tclStringObj.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclStringObj.c,v
retrieving revision 1.30
diff -u -r1.30 tclStringObj.c
--- tclStringObj.c 15 Feb 2003 02:14:33 -0000 1.30
+++ tclStringObj.c 18 Feb 2003 09:54:25 -0000
@@ -727,6 +727,49 @@
/*
*----------------------------------------------------------------------
*
+ * TclEnsurePureUtf8String --
+ *
+ * Takes an object and ensures that objPtr->bytes is the only
+ * valid rep of the object. This then allows the calling code
+ * to safely manipulate/modify objPtr->bytes directly.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * If the object has other representations, they will be
+ * freed.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclEnsurePureUtfString(objPtr)
+ register Tcl_Obj *objPtr; /* Pointer to object. This object must
+ * not currently be shared. */
+{
+ String *stringPtr;
+
+ SetStringFromAny(NULL, objPtr);
+
+ if (objPtr->bytes == NULL) {
+ /* Pure unicode string. Ensure we have a objPtr->bytes rep */
+ Tcl_GetString(objPtr);
+ }
+
+ stringPtr = GET_STRING(objPtr);
+
+ /* Now we just need to get rid of any Unicode representation */
+ if (stringPtr->uallocated > 0) {
+ stringPtr = (String *) ckrealloc((char*) stringPtr, STRING_SIZE(0));
+ SET_STRING(objPtr, stringPtr);
+ stringPtr->uallocated = 0;
+ }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_SetObjLength --
*
* This procedure changes the length of the string representation