Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | [Bug 3464428] string is graph \u0120 is wrong |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
e9a619e9dc3cc5d617ea1d0682e9a763 |
User & Date: | jan.nijtmans 2012-01-09 20:34:29 |
References
2013-06-17
| ||
04:50 | • Ticket [a876646efe] re_expr character class cntrl: should contain \u0000 - \u001f status still Open with 4 other changes artifact: fa040a5149 user: jan.nijtmans | |
Context
2012-01-12
| ||
13:23 | [Bug 3466506]: Document more environment variables. check-in: 6f0fbae68b user: dkf tags: trunk | |
2012-01-09
| ||
20:34 | [Bug 3464428] string is graph \u0120 is wrong check-in: e9a619e9dc user: jan.nijtmans tags: trunk | |
20:31 | [Bug 3464428] string is graph \u0120 is wrong check-in: 14fc5c19b7 user: jan.nijtmans tags: core-8-5-branch | |
13:50 | Revert mistaken commit. check-in: 38a63eef25 user: dgp tags: trunk | |
Changes
Changes to ChangeLog.
1 2 3 4 5 6 7 | 2012-01-08 Kevin B. Kenny <[email protected]> * library/clock.tcl (ReadZoneinfoFile): Corrected a bug where loading * tests/clock.test (clock-56.4): zoneinfo would fail if one timezone abbreviation was a proper tail of another, and zic used the same bytes of the file to represent both of them. Added a test case for the bug, using the same data that caused the observed failure | > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | 2012-01-09 Jan Nijtmans <[email protected]> * generic/tclUtf.c: [Bug 3464428] string is graph \u0120 is wrong * generic/regc_locale.c: Add table for Unicode [:cntrl:] class * tools/uniClass.tcl: Generate Unicode [:cntrl:] class table * tests/utf.test: 2012-01-08 Kevin B. Kenny <[email protected]> * library/clock.tcl (ReadZoneinfoFile): Corrected a bug where loading * tests/clock.test (clock-56.4): zoneinfo would fail if one timezone abbreviation was a proper tail of another, and zic used the same bytes of the file to represent both of them. Added a test case for the bug, using the same data that caused the observed failure |
︙ | ︙ |
Changes to generic/regc_locale.c.
︙ | ︙ | |||
219 220 221 222 223 224 225 226 227 228 229 230 231 232 | 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 }; #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) /* * Unicode: decimal digit characters. */ static const crange digitRangeTable[] = { {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x07c0, 0x07c9}, {0x0966, 0x096f}, {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, | > > > > > > > > > > > > > > > > > | 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 }; #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) /* * Unicode: control characters. */ static const crange controlRangeTable[] = { {0x007f, 0x009f}, {0x0600, 0x0603}, {0x200b, 0x200f}, {0x202a, 0x202e}, {0x2060, 0x2064}, {0x206a, 0x206f}, {0xe000, 0xf8ff}, {0xfff9, 0xfffb} }; #define NUM_CONTROL_RANGE (sizeof(controlRangeTable)/sizeof(crange)) static const chr controlCharTable[] = { 0x00ad, 0x06dd, 0x070f, 0x17b4, 0x17b5, 0xfeff }; #define NUM_CONTROL_CHAR (sizeof(controlCharTable)/sizeof(chr)) /* * Unicode: decimal digit characters. */ static const crange digitRangeTable[] = { {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x07c0, 0x07c9}, {0x0966, 0x096f}, {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, |
︙ | ︙ | |||
474 475 476 477 478 479 480 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) /* * Unicode: unicode print characters excluding space. */ static const crange graphRangeTable[] = { | | | 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) /* * Unicode: unicode print characters excluding space. */ static const crange graphRangeTable[] = { {0x0021, 0x007e}, {0x00a1, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556}, {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d}, {0x0710, 0x074a}, {0x074d, 0x07b1}, {0x07c0, 0x07fa}, {0x0800, 0x082d}, {0x0830, 0x083e}, {0x0840, 0x085b}, {0x0900, 0x0977}, {0x0979, 0x097f}, {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3}, |
︙ | ︙ | |||
509 510 511 512 513 514 515 | {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c}, {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda}, {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a}, | | | | | | | | | | | | | | | | | | 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 | {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c}, {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda}, {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a}, {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x167f}, {0x1681, 0x169c}, {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, {0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, {0x1780, 0x17b3}, {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1800, 0x180d}, {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, {0x18b0, 0x18f5}, {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x1944, 0x196d}, {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, {0x19d0, 0x19da}, {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a89}, {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, {0x1b50, 0x1b7c}, {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c37}, {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1de6}, {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, {0x2010, 0x2027}, {0x2030, 0x205e}, {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0}, {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a}, {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59}, {0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}, {0x2c60, 0x2cf1}, {0x2cf9, 0x2d25}, {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae}, {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce}, {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3001, 0x303f}, {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe}, {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6}, {0xa4d0, 0xa62b}, {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7}, {0xa700, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa82b}, {0xa830, 0xa839}, {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9}, {0xa8e0, 0xa8fb}, {0xa900, 0xa953}, {0xa95f, 0xa97c}, {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9}, |
︙ | ︙ | |||
783 784 785 786 787 788 789 | * Extract the class name */ len = endp - startp; Tcl_DStringInit(&ds); np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); | < < < < < < < < < > > > > > > > > | 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 | * Extract the class name */ len = endp - startp; Tcl_DStringInit(&ds); np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); /* * Map the name to the corresponding enumerated value. */ index = -1; for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) { index = i; break; } } Tcl_DStringFree(&ds); if (index == -1) { ERR(REG_ECTYPE); return NULL; } /* * Remap lower and upper to alpha if the match is case insensitive. */ if (cases && ((index == CC_LOWER) || (index == CC_UPPER))) { index = CC_ALNUM; } /* * Now compute the character class contents. */ switch((enum classes) index) { case CC_ALNUM: |
︙ | ︙ | |||
854 855 856 857 858 859 860 | break; case CC_BLANK: cv = getcvec(v, 2, 0); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: | | | > | > > > > > > | 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 | break; case CC_BLANK: cv = getcvec(v, 2, 0); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: cv = getcvec(v, NUM_CONTROL_CHAR, NUM_CONTROL_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_CONTROL_RANGE ; i++) { addrange(cv, controlRangeTable[i].start, controlRangeTable[i].end); } for (i=0 ; (size_t)i<NUM_CONTROL_CHAR ; i++) { addchr(cv, controlCharTable[i]); } } break; case CC_DIGIT: cv = getcvec(v, 0, NUM_DIGIT_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, digitRangeTable[i].end); |
︙ | ︙ | |||
933 934 935 936 937 938 939 940 941 942 | } for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) { addchr(cv, upperCharTable[i]); } } break; case CC_PRINT: case CC_GRAPH: cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); if (cv) { | > > > > > > > > > > > > > > > > > > < < < | | 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 | } for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) { addchr(cv, upperCharTable[i]); } } break; case CC_PRINT: cv = getcvec(v, NUM_SPACE_CHAR + NUM_GRAPH_CHAR, NUM_SPACE_RANGE + NUM_GRAPH_RANGE - 1); if (cv) { for (i=1 ; (size_t)i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, spaceRangeTable[i].end); } for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) { addchr(cv, spaceCharTable[i]); } for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, graphRangeTable[i].end); } for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) { addchr(cv, graphCharTable[i]); } } break; case CC_GRAPH: cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, graphRangeTable[i].end); } for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) { addchr(cv, graphCharTable[i]); } } |
︙ | ︙ |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
22 23 24 25 26 27 28 29 30 31 32 33 | * values are shifted right by the category value to determine whether the * given category is included in the set. */ #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \ | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER)) #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER) #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \ | (1 << PARAGRAPH_SEPARATOR)) | > > < < | < < < < < < < < > > > > > > > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | * values are shifted right by the category value to determine whether the * given category is included in the set. */ #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \ | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER)) #define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE)) #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER) #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \ | (1 << PARAGRAPH_SEPARATOR)) #define WORD_BITS (ALPHA_BITS | DIGIT_BITS | (1 << CONNECTOR_PUNCTUATION)) #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \ (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) #define GRAPH_BITS (WORD_BITS | PUNCT_BITS | \ (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ (1 << OTHER_NUMBER) | \ (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) /* * Unicode characters less than this value are represented by themselves in * UTF-8 strings. */ #define UNICODE_SELF 0x80 |
︙ | ︙ | |||
1325 1326 1327 1328 1329 1330 1331 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { | < < | | 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlpha -- * |
︙ | ︙ | |||
1350 1351 1352 1353 1354 1355 1356 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { | < | | 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { return ((ALPHA_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsControl -- * |
︙ | ︙ | |||
1374 1375 1376 1377 1378 1379 1380 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { | | | 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { return ((CONTROL_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsDigit -- * |
︙ | ︙ | |||
1397 1398 1399 1400 1401 1402 1403 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { | | | 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsGraph -- * |
︙ | ︙ | |||
1420 1421 1422 1423 1424 1425 1426 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { | < | | 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { return ((GRAPH_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsLower -- * |
︙ | ︙ | |||
1444 1445 1446 1447 1448 1449 1450 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { | | | 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { return (GetCategory(ch) == LOWERCASE_LETTER); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsPrint -- * |
︙ | ︙ | |||
1467 1468 1469 1470 1471 1472 1473 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { | < | | 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsPunct -- * |
︙ | ︙ | |||
1491 1492 1493 1494 1495 1496 1497 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { | < | | 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { return ((PUNCT_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsSpace -- * |
︙ | ︙ | |||
1515 1516 1517 1518 1519 1520 1521 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { | < < | | < | | 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { /* * If the character is within the first 127 characters, just use the * standard C function, otherwise consult the Unicode table. */ if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) { return isspace(UCHAR(ch)); /* INTL: ISO space */ } else { return ((SPACE_BITS >> GetCategory(ch)) & 1); } } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsUpper -- |
︙ | ︙ | |||
1550 1551 1552 1553 1554 1555 1556 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { | | | 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { return (GetCategory(ch) == UPPERCASE_LETTER); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsWordChar -- * |
︙ | ︙ | |||
1573 1574 1575 1576 1577 1578 1579 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { | < | < | 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { return ((WORD_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- * * Tcl_UniCharCaseMatch -- * |
︙ | ︙ |
Changes to tests/utf.test.
︙ | ︙ | |||
304 305 306 307 308 309 310 311 312 313 314 315 316 317 | test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 } {1} test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 } {1} test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { string wordend "x\u5080z123_bar\u203c fg" 0 | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 } {1} test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 } {1} test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u00a0 } {0} test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029 } {0} test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] string is print \u0009 } {0} test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \u0009 } {0} test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \u0009 } {0} test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] string is control \u00ad } {1} test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428] regexp {^[[:cntrl:]]$} \u00ad } {1} test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { string wordend "x\u5080z123_bar\u203c fg" 0 |
︙ | ︙ |
Changes to tools/uniClass.tcl.
︙ | ︙ | |||
83 84 85 86 87 88 89 90 91 92 93 94 95 96 | * is automatically generated by the tools/uniClass.tcl script * and used in generic/regc_locale.c. Do not modify by hand. */ " foreach {type desc} { alpha "alphabetic characters" digit "decimal digit characters" punct "punctuation characters" space "white space characters" lower "lowercase characters" upper "uppercase characters" graph "unicode print characters excluding space" } { | > | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | * is automatically generated by the tools/uniClass.tcl script * and used in generic/regc_locale.c. Do not modify by hand. */ " foreach {type desc} { alpha "alphabetic characters" control "control characters" digit "decimal digit characters" punct "punctuation characters" space "white space characters" lower "lowercase characters" upper "uppercase characters" graph "unicode print characters excluding space" } { |
︙ | ︙ |