Attachment "stringrev.diff" to
ticket [22324bcbdf]
added by
chw
2021-02-13 05:17:43.
Index: generic/tclStringObj.c
==================================================================
--- generic/tclStringObj.c
+++ generic/tclStringObj.c
@@ -2897,10 +2897,13 @@
TclStringReverse(
Tcl_Obj *objPtr)
{
String *stringPtr;
Tcl_UniChar ch = 0;
+#if TCL_UTF_MAX <= 4
+ int needFlip = 0;
+#endif
if (TclIsPureByteArray(objPtr)) {
int numBytes;
unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
@@ -2915,36 +2918,70 @@
stringPtr = GET_STRING(objPtr);
if (stringPtr->hasUnicode) {
Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
+ Tcl_UniChar *to;
if (Tcl_IsShared(objPtr)) {
- Tcl_UniChar *to;
-
/*
* Create a non-empty, pure unicode value, so we can coax
* Tcl_SetObjLength into growing the unicode rep buffer.
*/
objPtr = Tcl_NewUnicodeObj(&ch, 1);
Tcl_SetObjLength(objPtr, stringPtr->numChars);
to = Tcl_GetUnicode(objPtr);
while (--src >= from) {
+#if TCL_UTF_MAX <= 4
+ ch = *src;
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+ *to++ = ch;
+#else
*to++ = *src;
+#endif
}
} else {
/*
* Reversing in place.
*/
+#if TCL_UTF_MAX <= 4
+ to = src;
+#endif
while (--src > from) {
ch = *src;
+#if TCL_UTF_MAX <= 4
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+#endif
*src = *from;
*from++ = ch;
}
}
+#if TCL_UTF_MAX <= 4
+ if (needFlip) {
+ /*
+ * Flip back surrogate pairs. There might be a better way.
+ */
+
+ from = to - stringPtr->numChars;
+ while (--to >= from) {
+ ch = *to;
+ if ((ch & 0xFC00) == 0xD800) {
+ if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) {
+ to[0] = to[-1];
+ to[-1] = ch;
+ --to;
+ }
+ }
+ }
+ }
+#endif
}
if (objPtr->bytes) {
int numChars = stringPtr->numChars;
int numBytes = objPtr->length;
@@ -2976,10 +3013,15 @@
* skip calling Tcl_UtfCharComplete() here.
*/
int bytesInChar = TclUtfToUniChar(from, &ch);
+#if TCL_UTF_MAX <= 4
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+#endif
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);
to += bytesInChar;
from += bytesInChar;
bytesLeft -= bytesInChar;
@@ -2989,10 +3031,35 @@
from = to = objPtr->bytes;
stringPtr->numChars = charCount;
}
/* Pass 2. Reverse all the bytes. */
ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);
+
+#if TCL_UTF_MAX <= 4
+ if (needFlip) {
+ /* Pass 3. Flip back surrogate pairs. Might be a better way. */
+ numBytes = objPtr->length;
+ from = to = objPtr->bytes;
+ from += numBytes; /* This is the end (The Doors, Jim Morrison) */
+ while (to < from) {
+ Tcl_UniChar ch1, ch2;
+ int len, len2;
+
+ len = TclUtfToUniChar(to, &ch1);
+ if (((ch1 & 0xFC00) == 0xDC00) &&
+ (to + len + TCL_UTF_MAX <= from)) {
+ len2 = TclUtfToUniChar(to + len, &ch2);
+ if ((ch2 & 0xFC00) == 0xD800) {
+ Tcl_UniCharToUtf(ch2, to);
+ Tcl_UniCharToUtf(ch1, to + len);
+ len += len2;
+ }
+ }
+ to += len;
+ }
+ }
+#endif
}
return objPtr;
}
Index: tests/string.test
==================================================================
--- tests/string.test
+++ tests/string.test
@@ -1799,10 +1799,28 @@
} 030201
test string-24.15 {string reverse command - pure bytearray} {
binary scan [tcl::string::reverse [binary format H* 010203]] H* x
set x
} 030201
+test string-24.16 {string reverse command - surrogates} {
+ string reverse \u0444bulb\ud83d\ude02
+} \ud83d\ude02blub\u0444
+test string-24.17 {string reverse command - surrogates} {
+ string reverse \ud83d\ude02hello\ud83d\ude02
+} \ud83d\ude02olleh\ud83d\ude02
+test string-24.18 {string reverse command - surrogates} {
+ set s \u0444bulb\ud83d\ude02
+ # shim shimmery ...
+ string index $s 0
+ string reverse $s
+} \ud83d\ude02blub\u0444
+test string-24.19 {string reverse command - surrogates} {
+ set s \ud83d\ude02hello\ud83d\ude02
+ # shim shimmery ...
+ string index $s 0
+ string reverse $s
+} \ud83d\ude02olleh\ud83d\ude02
test string-25.1 {string is list} {
string is list {a b c}
} 1
test string-25.2 {string is list} {