Attachment "utf.patch" to
ticket [217987ffff]
added by
hobbs
2001-06-28 08:06:55.
Index: generic/tclUtf.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclUtf.c,v
retrieving revision 1.15
diff -b -c -r1.15 tclUtf.c
*** generic/tclUtf.c 2001/04/06 10:50:00 1.15
--- generic/tclUtf.c 2001/06/28 01:05:24
***************
*** 111,117 ****
*---------------------------------------------------------------------------
*/
! static int
UtfCount(ch)
int ch; /* The Tcl_UniChar whose size is returned. */
{
--- 111,117 ----
*---------------------------------------------------------------------------
*/
! INLINE static int
UtfCount(ch)
int ch; /* The Tcl_UniChar whose size is returned. */
{
***************
*** 781,787 ****
* backslash sequence. */
{
register CONST char *p = src+1;
! int result, count, n;
char buf[TCL_UTF_MAX];
if (dst == NULL) {
--- 781,788 ----
* backslash sequence. */
{
register CONST char *p = src+1;
! Tcl_UniChar result;
! int count, n;
char buf[TCL_UTF_MAX];
if (dst == NULL) {
***************
*** 883,897 ****
result = (unsigned char)((result << 3) + (*p - '0'));
break;
}
result = *p;
count = 2;
break;
}
if (readPtr != NULL) {
*readPtr = count;
}
! return Tcl_UniCharToUtf(result, dst);
}
/*
--- 884,908 ----
result = (unsigned char)((result << 3) + (*p - '0'));
break;
}
+ if (UCHAR(*p) < UNICODE_SELF) {
result = *p;
count = 2;
+ } else {
+ /*
+ * We have to convert here because the user has put a
+ * backslash in front of a multi-byte utf-8 character.
+ * While this means nothing special, we shouldn't break up
+ * a correct utf-8 character. [Bug #217987] test subst-3.2
+ */
+ count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */
+ }
break;
}
if (readPtr != NULL) {
*readPtr = count;
}
! return Tcl_UniCharToUtf((int) result, dst);
}
/*