Tcl Source Code

Artifact [7cda8ebc78]
Login

Artifact 7cda8ebc78b6e282620027f4330ad82e13b1dec3:

Attachment "1122671-tcl85.patch" to ticket [1122671fff] added by hobbs 2006-10-06 04:21:14.
Index: generic/tcl.h
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tcl.h,v
retrieving revision 1.216
diff -u -r1.216 tcl.h
--- generic/tcl.h	26 Sep 2006 14:08:36 -0000	1.216
+++ generic/tcl.h	5 Oct 2006 21:20:17 -0000
@@ -2302,7 +2302,11 @@
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
-     * value must be reflected correctly in regcustom.h.
+     * value must be reflected correctly in regcustom.h and
+     * in tclEncoding.c.
+     * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode
+     * XXX: string rep that Tcl_UniChar represents.  Changing the size
+     * XXX: of Tcl_UniChar is /not/ supported.
      */
 typedef unsigned int Tcl_UniChar;
 #else
Index: generic/tclEncoding.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclEncoding.c,v
retrieving revision 1.44
diff -u -r1.44 tclEncoding.c
--- generic/tclEncoding.c	26 Sep 2006 23:01:11 -0000	1.44
+++ generic/tclEncoding.c	5 Oct 2006 21:20:17 -0000
@@ -2288,9 +2288,10 @@
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd;
+    CONST char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
+    Tcl_UniChar ch;
 
     result = TCL_OK;
     if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
@@ -2299,33 +2300,31 @@
 	srcLen *= sizeof(Tcl_UniChar);
     }
 
-    wSrc = (Tcl_UniChar *) src;
-
-    wSrcStart = (Tcl_UniChar *) src;
-    wSrcEnd = (Tcl_UniChar *) (src + srcLen);
+    srcStart = src;
+    srcEnd = src + srcLen;
 
     dstStart = dst;
     dstEnd = dst + dstLen - TCL_UTF_MAX;
 
-    for (numChars = 0; wSrc < wSrcEnd; numChars++) {
+    for (numChars = 0; src < srcEnd; numChars++) {
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-
 	/*
-	 * Special case for 1-byte utf chars for speed.
+	 * Special case for 1-byte utf chars for speed.  Make sure we
+	 * work with Tcl_UniChar-size data.
 	 */
-
-	if (*wSrc && *wSrc < 0x80) {
-	    *dst++ = (char) *wSrc;
+	ch = *(Tcl_UniChar *)src;
+	if (ch && ch < 0x80) {
+	    *dst++ = *src;
 	} else {
-	    dst += Tcl_UniCharToUtf(*wSrc, dst);
+	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
-	wSrc++;
+	src += sizeof(Tcl_UniChar);
     }
 
-    *srcReadPtr = (char *) wSrc - (char *) wSrcStart;
+    *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
     return result;
@@ -2375,9 +2374,9 @@
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    CONST char *srcStart, *srcEnd, *srcClose;
-    Tcl_UniChar *wDst, *wDstStart, *wDstEnd;
+    CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
+    Tcl_UniChar ch;
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2386,9 +2385,8 @@
 	srcClose -= TCL_UTF_MAX;
     }
 
-    wDst = (Tcl_UniChar *) dst;
-    wDstStart = (Tcl_UniChar *) dst;
-    wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar));
+    dstStart = dst;
+    dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);
 
     result = TCL_OK;
     for (numChars = 0; src < srcEnd; numChars++) {
@@ -2401,16 +2399,26 @@
 	    result = TCL_CONVERT_MULTIBYTE;
 	    break;
 	}
-	if (wDst > wDstEnd) {
+	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
-	}
-	src += TclUtfToUniChar(src, wDst);
-	wDst++;
+        }
+	src += TclUtfToUniChar(src, &ch);
+	/*
+	 * Need to handle this in a way that won't cause misalignment
+	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
+	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
+	 */
+#ifdef WORDS_BIGENDIAN
+	*dst++ = (ch >> 8);
+	*dst++ = (ch & 0xFF);
+#else
+	*dst++ = (ch & 0xFF);
+	*dst++ = (ch >> 8);
+#endif
     }
-
     *srcReadPtr = src - srcStart;
-    *dstWrotePtr = (char *) wDst - (char *) wDstStart;
+    *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
     return result;
 }
Index: tests/encoding.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/encoding.test,v
retrieving revision 1.24
diff -u -r1.24 encoding.test
--- tests/encoding.test	8 Feb 2006 21:41:28 -0000	1.24
+++ tests/encoding.test	5 Oct 2006 21:20:17 -0000
@@ -307,8 +307,9 @@
 } {1 2 c080}
 
 test encoding-16.1 {UnicodeToUtfProc} {
-    encoding convertfrom unicode NN
-} "\u4e4e"
+    set val [encoding convertfrom unicode NN]
+    list $val [format %x [scan $val %c]]
+} "\u4e4e 4e4e"
 
 test encoding-17.1 {UtfToUnicodeProc} {
 } {}