Tcl Source Code: Changes On Branch tip-619

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch tip-619 Excluding Merge-Ins

This is equivalent to a diff from ccbe007452 to 631cce8656

2022-07-04
11:21		TIP #619: New TCL_COMBINE flag for Tcl_UniCharToUtf() check-in: 3bb6f6602c user: jan.nijtmans tags: trunk, main
2022-06-10
20:24		Merge 8.7 check-in: 8c31c163af user: jan.nijtmans tags: trunk, main
13:45		Provide help with solving merge conflicts ;-) check-in: 71597277a7 user: jan.nijtmans tags: dgp-refactor
2022-06-09
20:25		Merge 9.0 Closed-Leaf check-in: 631cce8656 user: jan.nijtmans tags: tip-619
16:31		Merge 9.0 check-in: 475b98506a user: jan.nijtmans tags: tip-626
13:45		Internal bug in Tcl_GetIndexFromObjStruct re-definition (works, as long as sizeof(struct) == sizeof(... check-in: ccbe007452 user: jan.nijtmans tags: trunk, main
07:03		More (internal) int -> size_t, allowing values > 2^31 check-in: 5d828b481d user: jan.nijtmans tags: trunk, main
2022-05-19
15:05		Merge 9.0 check-in: 60d39427c4 user: jan.nijtmans tags: tip-619

Changes to doc/Utf.3.

Changes to generic/tcl.h.

Changes to generic/tclCmdMZ.c.

Changes to generic/tclDecls.h.

Changes to generic/tclEncoding.c.

Changes to generic/tclExecute.c.

Changes to generic/tclParse.c.

Changes to generic/tclStringObj.c.

Changes to generic/tclUtf.c.

Changes to tests/encoding.test.

Changes to tests/string.test.

Changes to tests/utf.test.

︙			︙
2312 2313 2314 2315 2316 2317 2318 ~~2319~~ 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 ~~2337~~ 2338 2339 2340 2341 2342 ~~2343~~ 2344 2345 2346 2347 2348 2349 2350 2351 2352 ~~2353 2354 2355 2356 2357~~ 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368	} else { char chbuf[2]; chbuf[0] = UCHAR(src++); chbuf[1] = 0; TclUtfToUCS4(chbuf, &ch); } dst += Tcl_UniCharToUtf(ch, dst); } else { ~~int low;~~ const char saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); if ((len < 2) && (ch != 0) && !(flags & TCL_ENCODING_NOCOMPLAIN) && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; } src += len; if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF / ch -= 0x10000; dst++ = 0xED; dst++ = (char) (((ch >> 16) & 0x0F) \| 0xA0); dst++ = (char) (((ch >> 10) & 0x3F) \| 0x80); ch = (ch & 0x0CFF) \| 0xDC00; } ~~~~goto~~ cesu8;~~ } else if ((ch \| 0x7FF) == 0xDFFF) { /* * A surrogate character is detected, handle especially. / ~~low = ch;~~ len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) \|\| (ch & 0x400)) { if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } ~~cesu8: dst++ = (char) (((ch >> 12) \| 0xE0) & 0xEF); dst++ = (char) (((ch >> 6) \| 0x80) & 0xBF); dst++ = (char) ((ch \| 0x80) & 0xBF); ~~continue;~~~~ } src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; } else if (!Tcl_UniCharIsUnicode(ch)) { if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } if (!(flags & TCL_ENCODING_MODIFIED)) {	< > \| > > > > > > \| \| < < < < >	2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371	} else { char chbuf[2]; chbuf[0] = UCHAR(src++); chbuf[1] = 0; TclUtfToUCS4(chbuf, &ch); } dst += Tcl_UniCharToUtf(ch, dst); } else { const char saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); if ((len < 2) && (ch != 0) && !(flags & TCL_ENCODING_NOCOMPLAIN) && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; } src += len; if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF / ch -= 0x10000; dst++ = 0xED; dst++ = (char) (((ch >> 16) & 0x0F) \| 0xA0); dst++ = (char) (((ch >> 10) & 0x3F) \| 0x80); ch = (ch & 0x0CFF) \| 0xDC00; } #if TCL_UTF_MAX < 4 cesu8: #endif dst++ = (char) (((ch >> 12) \| 0xE0) & 0xEF); dst++ = (char) (((ch >> 6) \| 0x80) & 0xBF); dst++ = (char) ((ch \| 0x80) & 0xBF); continue; #if TCL_UTF_MAX < 4 } else if ((ch \| 0x7FF) == 0xDFFF) { / * A surrogate character is detected, handle especially. */ int low = ch; len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) \|\| (ch & 0x400)) { if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } goto cesu8; } src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; #endif } else if (!Tcl_UniCharIsUnicode(ch)) { if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } if (!(flags & TCL_ENCODING_MODIFIED)) {
︙			︙
2663 2664 2665 2666 2667 2668 2669 ~~2670~~ 2671 2672 2673 2674 2675 2676 2677	* Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. / if (ch && ch < 0x80) { dst++ = (ch & 0xFF); } else { ~~dst += Tcl_UniCharToUtf(ch, dst);~~ } src += sizeof(unsigned short); } srcReadPtr = src - srcStart; dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars;	\|	2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680	* Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. / if (ch && ch < 0x80) { dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch \| TCL_COMBINE, dst); } src += sizeof(unsigned short); } srcReadPtr = src - srcStart; dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars;
︙			︙

︙			︙
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 ~~891 892 893~~ 894 895 896 897 898 899 900	case 'u': count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* * No hexdigits -> This is just "u". / result = 'u'; } else if (((result & 0xFC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { / If high surrogate is immediately followed by a low surrogate * escape, combine them into one character. / int low; int count2 = ParseHex(p+7, 4, &low); if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) { result = ((result & 0x3FF)<<10 \| (low & 0x3FF)) + 0x10000; count += count2 + 2; } } break; case 'U': count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { / * No hexdigits -> This is just "U". / result = 'U'; ~~} else if ((result \| 0x7FF) == 0xDFFF) {~~ ~~/ Upper or lower surrogate, not allowed in this syntax. */~~ ~~result = 0xFFFD;~~ } break; case '\n': count--; do { p++; count++;	> > < < <	865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899	case 'u': count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* * No hexdigits -> This is just "u". / result = 'u'; #if TCL_UTF_MAX < 4 } else if (((result & 0xFC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { / If high surrogate is immediately followed by a low surrogate * escape, combine them into one character. / int low; int count2 = ParseHex(p+7, 4, &low); if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) { result = ((result & 0x3FF)<<10 \| (low & 0x3FF)) + 0x10000; count += count2 + 2; } #endif } break; case 'U': count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { / * No hexdigits -> This is just "U". */ result = 'U'; } break; case '\n': count--; do { p++; count++;
︙			︙
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967	} done: if (readPtr != NULL) { readPtr = count; } count = Tcl_UniCharToUtf(result, dst); if ((result >= 0xD800) && (count < 3)) { / Special case for handling high surrogates. / count += Tcl_UniCharToUtf(-1, dst + count); } return count; } / ---------------------------------------------------------------------- * ParseComment --	> >	949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968	} done: if (readPtr != NULL) { readPtr = count; } count = Tcl_UniCharToUtf(result, dst); #if TCL_UTF_MAX < 4 if ((result >= 0xD800) && (count < 3)) { / Special case for handling high surrogates. / count += Tcl_UniCharToUtf(-1, dst + count); } #endif return count; } / ---------------------------------------------------------------------- * ParseComment --
︙			︙

︙			︙
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82	static void GrowUnicodeBuffer(Tcl_Obj objPtr, size_t needed); static int SetStringFromAny(Tcl_Interp interp, Tcl_Obj objPtr); static void SetUnicodeObj(Tcl_Obj objPtr, const Tcl_UniChar unicode, size_t numChars); static size_t UnicodeLength(const Tcl_UniChar unicode); static void UpdateStringOfString(Tcl_Obj objPtr); #define ISCONTINUATION(bytes) (\ ((((bytes)[0] & 0xC0) == 0x80) \|\| (((bytes)[0] == '\xED') \ && (((bytes)[1] & 0xF0) == 0xB0) && (((bytes)[2] & 0xC0) == 0x80)))) / * The structure below defines the string Tcl object type by means of * functions that can be invoked by generic object code. */	> > > > >	66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87	static void GrowUnicodeBuffer(Tcl_Obj objPtr, size_t needed); static int SetStringFromAny(Tcl_Interp interp, Tcl_Obj objPtr); static void SetUnicodeObj(Tcl_Obj objPtr, const Tcl_UniChar unicode, size_t numChars); static size_t UnicodeLength(const Tcl_UniChar unicode); static void UpdateStringOfString(Tcl_Obj objPtr); #if TCL_UTF_MAX > 3 #define ISCONTINUATION(bytes) (\ ((bytes)[0] & 0xC0) == 0x80) #else #define ISCONTINUATION(bytes) (\ ((((bytes)[0] & 0xC0) == 0x80) \|\| (((bytes)[0] == '\xED') \ && (((bytes)[1] & 0xF0) == 0xB0) && (((bytes)[2] & 0xC0) == 0x80)))) #endif / * The structure below defines the string Tcl object type by means of * functions that can be invoked by generic object code. */
︙			︙
2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137	char buf[4] = ""; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { goto error; } length = Tcl_UniCharToUtf(code, buf); if ((code >= 0xD800) && (length < 3)) { /* Special case for handling high surrogates. */ length += Tcl_UniCharToUtf(-1, buf + length); } segment = Tcl_NewStringObj(buf, length); Tcl_IncrRefCount(segment); allocSegment = 1; break; } case 'u':	> >	2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144	char buf[4] = ""; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { goto error; } length = Tcl_UniCharToUtf(code, buf); #if TCL_UTF_MAX < 4 if ((code >= 0xD800) && (length < 3)) { /* Special case for handling high surrogates. */ length += Tcl_UniCharToUtf(-1, buf + length); } #endif segment = Tcl_NewStringObj(buf, length); Tcl_IncrRefCount(segment); allocSegment = 1; break; } case 'u':
︙			︙
4178 4179 4180 4181 4182 4183 4184 ~~4185 4186 4187 4188 4189 4190 4191 4192~~ 4193 4194 4195 4196 4197 4198 4199	stringPtr->numChars = needed; } else { numAppendChars = 0; } dst = stringPtr->unicode + numOrigChars; if (numAppendChars-- > 0) { bytes += TclUtfToUniChar(bytes, &unichar); ~~#if TCL_UTF_MAX > 3~~ ~~/* join upper/lower surrogate /~~ ~~if (bytes && (stringPtr->unicode[numOrigChars - 1] \| 0x3FF) == 0xDBFF && (unichar \| 0x3FF) == 0xDFFF) {~~ ~~stringPtr->numChars--;~~ ~~unichar = ((stringPtr->unicode[numOrigChars - 1] & 0x3FF) << 10) + (unichar & 0x3FF) + 0x10000;~~ ~~dst--;~~ } ~~#endif~~ dst++ = unichar; while (numAppendChars-- > 0) { bytes += TclUtfToUniChar(bytes, &unichar); dst++ = unichar; } } dst = 0;	< < < < < < < <	4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198	stringPtr->numChars = needed; } else { numAppendChars = 0; } dst = stringPtr->unicode + numOrigChars; if (numAppendChars-- > 0) { bytes += TclUtfToUniChar(bytes, &unichar); dst++ = unichar; while (numAppendChars-- > 0) { bytes += TclUtfToUniChar(bytes, &unichar); dst++ = unichar; } } *dst = 0;
︙			︙

︙			︙
204 205 206 207 208 209 210 211 212 213 ~~214~~ 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 ~~231~~ 232 233 234 235 236 237 238	* * Side effects: * None. * --------------------------------------------------------------------------- / int Tcl_UniCharToUtf( int ch, /* The Tcl_UniChar to be stored in the * buffer. / char buf) /* Buffer in which the UTF-8 representation of * the Tcl_UniChar is stored. Buffer must be * large enough to hold the UTF-8 character * (at most 4 bytes). / { if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { buf[0] = (char) ch; return 1; } if (ch >= 0) { if (ch <= 0x7FF) { buf[1] = (char) ((ch \| 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) \| 0xC0); return 2; } if (ch <= 0xFFFF) { ~~if ((ch & 0xF800) == 0xD800) {~~ if (ch & 0x0400) { / Low surrogate / if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) { / Previous Tcl_UniChar was a high surrogate, so combine */ buf[2] = (char) ((ch & 0x3F) \| 0x80); buf[1] \|= (char) (((ch >> 6) & 0x0F) \| 0x80); return 3;	> \| > > > > > > > > > > > \|	204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250	* * Side effects: * None. * --------------------------------------------------------------------------- / #undef Tcl_UniCharToUtf int Tcl_UniCharToUtf( int ch, /* The Tcl_UniChar to be stored in the * buffer. Can be or'ed with flag TCL_COMBINE / char buf) /* Buffer in which the UTF-8 representation of * the Tcl_UniChar is stored. Buffer must be * large enough to hold the UTF-8 character * (at most 4 bytes). / { #if TCL_UTF_MAX > 3 int flags = ch; #endif if (ch >= TCL_COMBINE) { ch &= (TCL_COMBINE - 1); } if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { buf[0] = (char) ch; return 1; } if (ch >= 0) { if (ch <= 0x7FF) { buf[1] = (char) ((ch \| 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) \| 0xC0); return 2; } if (ch <= 0xFFFF) { if ( #if TCL_UTF_MAX > 3 (flags & TCL_COMBINE) && #endif ((ch & 0xF800) == 0xD800)) { if (ch & 0x0400) { / Low surrogate / if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) { / Previous Tcl_UniChar was a high surrogate, so combine */ buf[2] = (char) ((ch & 0x3F) \| 0x80); buf[1] \|= (char) (((ch >> 6) & 0x0F) \| 0x80); return 3;
︙			︙
373 374 375 376 377 378 379 ~~380~~ 381 382 383 384 385 386 387	p = string; wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { if (!len && ((w & 0xFC00) != 0xDC00)) { / Special case for handling high surrogates. / p += Tcl_UniCharToUtf(-1, p); } ~~len = Tcl_UniCharToUtf(w, p);~~ p += len; if ((w >= 0xD800) && (len < 3)) { len = 0; / Indication that high surrogate was found */ } w++; } if (!len) {	\|	385 386 387 388 389 390 391 392 393 394 395 396 397 398 399	p = string; wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { if (!len && ((w & 0xFC00) != 0xDC00)) { / Special case for handling high surrogates. / p += Tcl_UniCharToUtf(-1, p); } len = Tcl_UniCharToUtf(w \| TCL_COMBINE, p); p += len; if ((w >= 0xD800) && (len < 3)) { len = 0; / Indication that high surrogate was found */ } w++; } if (!len) {
︙			︙
1344 1345 1346 1347 1348 1349 1350 ~~1351~~ 1352 1353 1354 1355 1356 1357 1358	/* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the upper case * char to dst if its size is <= the original char. */ ~~if ((len < TclUtfCount(upChar~~)) \|\| ((upChar & ~0x7FF) == 0xD800~~)) {~~ memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(upChar, dst); } src += len; }	\|	1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370	/* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the upper case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(upChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(upChar, dst); } src += len; }
︙			︙
1397 1398 1399 1400 1401 1402 1403 ~~1404~~ 1405 1406 1407 1408 1409 1410 1411	/* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ ~~if ((len < TclUtfCount(lowChar~~)) \|\| ((lowChar & ~0x7FF) == 0xD800~~)) {~~ memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; }	\|	1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423	/* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; }
︙			︙
1447 1448 1449 1450 1451 1452 1453 ~~1454~~ 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 ~~1470~~ 1471 1472 1473 1474 1475 1476 1477	src = dst = str; if (src) { len = TclUtfToUCS4(src, &ch); titleChar = Tcl_UniCharToTitle(ch); ~~if ((len < TclUtfCount(titleChar~~)) \|\| ((titleChar & ~0x7FF) == 0xD800~~)) {~~ memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } src += len; } while (src) { len = TclUtfToUCS4(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } ~~if ((len < TclUtfCount(lowChar~~)) \|\| ((lowChar & ~0x7FF) == 0xD800~~)) {~~ memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; }	\| \|	1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489	src = dst = str; if (src) { len = TclUtfToUCS4(src, &ch); titleChar = Tcl_UniCharToTitle(ch); if (len < TclUtfCount(titleChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } src += len; } while (src) { len = TclUtfToUCS4(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; }
︙			︙

︙			︙
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50	# Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint exec [llength [info commands exec]] testConstraint testgetencpath [llength [info commands testgetencpath]] # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup { set old [encoding system] } -constraints {testencoding} -body { testencoding create foo [namespace origin toutf] [namespace origin fromutf]	> > > \|	36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53	# Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint exec [llength [info commands exec]] testConstraint testgetencpath [llength [info commands testgetencpath]] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf32 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup { set old [encoding system] } -constraints {testencoding} -body { testencoding create foo [namespace origin toutf] [namespace origin fromutf]
︙			︙
332 333 334 335 336 337 338 ~~339~~ 340 ~~341~~ 342 343 344 345 346 ~~347~~ 348 349 350 351 ~~352 353~~ 354 355 356 357 ~~358 359~~ 360 361 362 363 ~~364 365~~ 366 367 368 369 ~~370 371~~ 372 373 374 375 ~~376 377~~ 378 379 380 381 ~~382 383~~ 384 385 386 387 ~~388 389~~ 390 391 392 393 ~~394 395~~ 396 397 398 399 ~~400 401~~ 402 403 404 405 ~~406~~ 407 408 409 410 411 412 413	test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]] binary scan [teststringbytes $y] H* z set z } c080 test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 ~~set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]~~ list [string length $x] $y ~~} -result "6 😂"~~ test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] list [string length $x] $y } "4 😂" ~~test encoding-15.6 {UtfToUtfProc emoji character output} {~~ set x \uDE02\uD83D\uDE02\uD83D set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z ~~} {10 edb~~882~~f09f~~9882~~e~~da0~~bd} test encoding-15.7 {UtfToUtfProc emoji character output} {~~ set x \uDE02\uD83D\uD83D set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {3 9 e~~db882eda0~~bde~~da0~~bd} test encoding-15.8 {UtfToUtfProc emoji character output} {~~ set x \uDE02\uD83Dé set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {3 8 ed~~b882eda0~~bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} {~~ set x \uDE02\uD83DX set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {3 7 ed~~b882eda0~~bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} {~~ set x \uDE02é set y [encoding convertto -nocomplain utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {2 5 e~~db882~~c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} {~~ set x \uDA02é set y [encoding convertto -nocomplain utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {2 5 e~~da882~~c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} {~~ set x \uDE02Y set y [encoding convertto -nocomplain utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {2 4 e~~db882~~59} test encoding-15.13 {UtfToUtfProc low surrogate character output} {~~ set x \uDA02Y set y [encoding convertto -nocomplain utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {2 4 e~~da882~~59} test encoding-15.14 {UtfToUtfProc high surrogate character output} {~~ set x \uDE02 set y [encoding convertto -nocomplain utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {1 3 e~~db882~~} test encoding-15.15 {UtfToUtfProc low surrogate character output} {~~ set x \uDA02 set y [encoding convertto -nocomplain utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z ~~} {1 3 e~~da882~~}~~ test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { set x 😂	\| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|	335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416	test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]] binary scan [teststringbytes $y] H* z set z } c080 test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom -nocomplain utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y } -result "6 \uD83D\uDE02" test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] list [string length $x] $y } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83D\uDE02\uD83D set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {12 efbfbdefbfbdefbfbdefbfbd} test encoding-15.7 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83D\uD83D set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 efbfbdefbfbdefbfbd} test encoding-15.8 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83Dé set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 efbfbdefbfbdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83DX set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 efbfbdefbfbd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} utf32 { set x \uDE02é set y [encoding convertto -nocomplain utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 efbfbdc3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} utf32 { set x \uDA02é set y [encoding convertto -nocomplain utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 efbfbdc3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} utf32 { set x \uDE02Y set y [encoding convertto -nocomplain utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 efbfbd59} test encoding-15.13 {UtfToUtfProc low surrogate character output} utf32 { set x \uDA02Y set y [encoding convertto -nocomplain utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 efbfbd59} test encoding-15.14 {UtfToUtfProc high surrogate character output} utf32 { set x \uDE02 set y [encoding convertto -nocomplain utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 efbfbd} test encoding-15.15 {UtfToUtfProc low surrogate character output} utf32 { set x \uDA02 set y [encoding convertto -nocomplain utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 efbfbd} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { set x 😂
︙			︙

︙			︙
30 31 32 33 34 35 36 37 38 39 40 41 42 43	# Some tests require the testobj command testConstraint testobj [expr {[info commands testobj] ne {}}] testConstraint testindexobj [expr {[info commands testindexobj] ne {}}] testConstraint testevalex [expr {[info commands testevalex] ne {}}] testConstraint utf32 [expr {[string length \U010000] == 1}] testConstraint testbytestring [llength [info commands testbytestring]] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] if {[testConstraint memory]} { proc getbytes {} { set lines [split [memory info] \n] return [lindex $lines 3 3]	> >	30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45	# Some tests require the testobj command testConstraint testobj [expr {[info commands testobj] ne {}}] testConstraint testindexobj [expr {[info commands testindexobj] ne {}}] testConstraint testevalex [expr {[info commands testevalex] ne {}}] testConstraint utf32 [expr {[string length \U010000] == 1}] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint utf32 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] if {[testConstraint memory]} { proc getbytes {} { set lines [split [memory info] \n] return [lindex $lines 3 3]
︙			︙
1942 1943 1944 1945 1946 1947 1948 ~~1949~~ 1950 1951 1952 1953 1954 ~~1955~~ 1956 1957 1958 1959 1960 1961 1962	test string-21.21.$noComp {string trimleft, unicode} { run {string trimleft "\uF602Hello world!\uF602" \uD83D\uDE02} } "\uF602Hello world!\uF602" test string-21.22.$noComp {string trimright, unicode} { run {string trimright "\uF602Hello world!\uF602" \uD83D\uDE02} } "\uF602Hello world!\uF602" test string-21.23.$noComp {string trim, unicode} { ~~run {string trim "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D~~\uDE02~~}~~ } "\uD83D\uDE02Hello world!\uD83D\uDE02" test string-21.24.$noComp {string trimleft, unicode} { run {string trimleft "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D\uDE02} } "\uD83D\uDE02Hello world!\uD83D\uDE02" test string-21.25.$noComp {string trimright, unicode} { ~~run {string trimright "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D~~\uDE02~~}~~ } "\uD83D\uDE02Hello world!\uD83D\uDE02" test string-22.1.$noComp {string wordstart} -body { list [catch {run {string word a}} msg] $msg } -result {1 {unknown or ambiguous subcommand "word": must be cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}} test string-22.2.$noComp {string wordstart} -body { list [catch {run {string wordstart a}} msg] $msg	\| \|	1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964	test string-21.21.$noComp {string trimleft, unicode} { run {string trimleft "\uF602Hello world!\uF602" \uD83D\uDE02} } "\uF602Hello world!\uF602" test string-21.22.$noComp {string trimright, unicode} { run {string trimright "\uF602Hello world!\uF602" \uD83D\uDE02} } "\uF602Hello world!\uF602" test string-21.23.$noComp {string trim, unicode} { run {string trim "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D} } "\uD83D\uDE02Hello world!\uD83D\uDE02" test string-21.24.$noComp {string trimleft, unicode} { run {string trimleft "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D\uDE02} } "\uD83D\uDE02Hello world!\uD83D\uDE02" test string-21.25.$noComp {string trimright, unicode} { run {string trimright "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D} } "\uD83D\uDE02Hello world!\uD83D\uDE02" test string-22.1.$noComp {string wordstart} -body { list [catch {run {string word a}} msg] $msg } -result {1 {unknown or ambiguous subcommand "word": must be cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}} test string-22.2.$noComp {string wordstart} -body { list [catch {run {string wordstart a}} msg] $msg
︙			︙
2112 2113 2114 2115 2116 2117 2118 ~~2119~~ 2120 ~~2121 2122~~ 2123 ~~2124 2125~~ 2126 2127 2128 2129 ~~2130 2131~~ 2132 2133 2134 2135 ~~2136~~ 2137 2138 2139 2140 2141 2142 2143	binary scan [run {string reverse [binary format H* 010203]}] H* x set x } 030201 test string-24.15.$noComp {string reverse command - pure bytearray} { binary scan [run {tcl::string::reverse [binary format H* 010203]}] H* x set x } 030201 ~~test string-24.16.$noComp {string reverse command - surrogates} {~~ run {string reverse \u0444bulb\uD83D\uDE02} ~~} \uD83D~~\uDE02~~blub\u0444 test string-24.17.$noComp {string reverse command - surrogates} {~~ run {string reverse \uD83D\uDE02hello\uD83D\uDE02} ~~} \uD83D~~\uDE02~~olleh\uD83D~~\uDE02~~ test string-24.18.$noComp {string reverse command - surrogates} {~~ set s \u0444bulb\uD83D\uDE02 # shim shimmery ... string index $s 0 run {string reverse $s} ~~} \uD83D~~\uDE02~~blub\u0444 test string-24.19.$noComp {string reverse command - surrogates} {~~ set s \uD83D\uDE02hello\uD83D\uDE02 # shim shimmery ... string index $s 0 run {string reverse $s} ~~} \uD83D~~\uDE02~~olleh\uD83D~~\uDE02~~~~ test string-25.1.$noComp {string is list} { run {string is list {a b c}} } 1 test string-25.2.$noComp {string is list} { run {string is list "a \{b c"} } 0	\| \| \| \| \| \| \| \|	2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145	binary scan [run {string reverse [binary format H* 010203]}] H* x set x } 030201 test string-24.15.$noComp {string reverse command - pure bytearray} { binary scan [run {tcl::string::reverse [binary format H* 010203]}] H* x set x } 030201 test string-24.16.$noComp {string reverse command - surrogates} utf32 { run {string reverse \u0444bulb\uD83D\uDE02} } \uDE02\uD83Dblub\u0444 test string-24.17.$noComp {string reverse command - surrogates} utf32 { run {string reverse \uD83D\uDE02hello\uD83D\uDE02} } \uDE02\uD83Dolleh\uDE02\uD83D test string-24.18.$noComp {string reverse command - surrogates} utf32 { set s \u0444bulb\uD83D\uDE02 # shim shimmery ... string index $s 0 run {string reverse $s} } \uDE02\uD83Dblub\u0444 test string-24.19.$noComp {string reverse command - surrogates} utf32 { set s \uD83D\uDE02hello\uD83D\uDE02 # shim shimmery ... string index $s 0 run {string reverse $s} } \uDE02\uD83Dolleh\uDE02\uD83D test string-25.1.$noComp {string is list} { run {string is list {a b c}} } 1 test string-25.2.$noComp {string is list} { run {string is list "a \{b c"} } 0
︙			︙

︙			︙
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92	} 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]} } 1 test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 ~~test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} ~~{Uesc ucs2}~~ {~~ expr {"\UD842" eq "\uD842"} } 1 test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {fullutf testbytestring} { ~~expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]}~~ } 1 test utf-1.14 {Tcl_UniCharToUtf: surrogate pairs from concat} { set lo \uDE02 return \uD83D$lo } \uD83D\uDE02 test utf-1.15 {Tcl_UniCharToUtf: surrogate pairs from concat} { set hi \uD83D	\| \|	74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92	} 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]} } 1 test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} Uesc { expr {"\UD842" eq "\uD842"} } 1 test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {fullutf testbytestring} { expr {"\UD842" eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.14 {Tcl_UniCharToUtf: surrogate pairs from concat} { set lo \uDE02 return \uD83D$lo } \uD83D\uDE02 test utf-1.15 {Tcl_UniCharToUtf: surrogate pairs from concat} { set hi \uD83D
︙			︙
1110 1111 1112 1113 1114 1115 1116 ~~1117~~ 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 ~~1136~~ 1137 1138 1139 1140 1141 1142 1143	} ᲐᲐ test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf { string toupper 𐐨 } 𐐀 test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf { string toupper 𐐨 } 𐐀 ~~test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} {~~ string toupper \uDC24\uD824 } \uDC24\uD824 test utf-12.1 {Tcl_UtfToLower} { string tolower {} } {} test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { string tolower ÃGH } ãgh test utf-12.4 {Tcl_UtfToLower} { string tolower ǢGH } ǣgh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower აᲐ } აა ~~test utf-12.6 {Tcl_UtfToLower low/high surrogate)} {~~ string tolower \uDC24\uD824 } \uDC24\uD824 test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf { string tolower 𐐀 } 𐐨 test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf { string tolower 𐐀	\| \|	1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143	} ᲐᲐ test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf { string toupper 𐐨 } 𐐀 test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf { string toupper 𐐨 } 𐐀 test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} utf32 { string toupper \uDC24\uD824 } \uDC24\uD824 test utf-12.1 {Tcl_UtfToLower} { string tolower {} } {} test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { string tolower ÃGH } ãgh test utf-12.4 {Tcl_UtfToLower} { string tolower ǢGH } ǣgh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower აᲐ } აა test utf-12.6 {Tcl_UtfToLower low/high surrogate)} utf32 { string tolower \uDC24\uD824 } \uDC24\uD824 test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf { string tolower 𐐀 } 𐐨 test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf { string tolower 𐐀
︙			︙
1157 1158 1159 1160 1161 1162 1163 ~~1164~~ 1165 1166 1167 1168 1169 1170 1171	} ǲab test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle აᲐ } აᲐ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle Აა } Აა ~~test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} {~~ string totitle \uDC24\uD824 } \uDC24\uD824 test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf { string totitle 𐐨𐐀 } 𐐀𐐨 test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf { string totitle 𐐨𐐀	\|	1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171	} ǲab test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle აᲐ } აᲐ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle Აა } Აა test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} utf32 { string totitle \uDC24\uD824 } \uDC24\uD824 test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf { string totitle 𐐨𐐀 } 𐐀𐐨 test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf { string totitle 𐐨𐐀
︙			︙