Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | [Bug 3464428] string is graph \u0120 is wrong |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core-8-5-branch |
Files: | files | file ages | folders |
SHA1: |
005fc77cde8bf71bb89a9498c56e9b1b |
User & Date: | jan.nijtmans 2011-12-24 00:15:37 |
Context
2011-12-30
| ||
08:14 | Update to Olson's tzdata2011n check-in: f903a74e2f user: venkat tags: core-8-5-branch | |
2011-12-24
| ||
00:30 | [Bug 3464428] string is graph \u0120 is wrong check-in: 0c1ac83954 user: jan.nijtmans tags: trunk | |
00:15 | [Bug 3464428] string is graph \u0120 is wrong check-in: 005fc77cde user: jan.nijtmans tags: core-8-5-branch | |
2011-12-23
| ||
23:31 | [Bug 3464428] string is graph \u0120 is wrong check-in: 13071df962 user: jan.nijtmans tags: core-8-4-branch | |
2011-12-11
| ||
09:13 | [Bug 3457031]: Some Unicode 6.0 chars not in [:print:] class check-in: 6a31dc71dc user: jan.nijtmans tags: core-8-5-branch | |
Changes
Changes to ChangeLog.
1 2 3 4 5 6 7 | 2011-12-11 Jan Nijtmans <[email protected]> * generic/regc_locale.c: [Bug 3457031]: Some Unicode 6.0 chars not * tests/utf.test: in [:print:] class 2011-12-07 Jan Nijtmans <[email protected]> | > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | 2011-12-23 Jan Nijtmans <[email protected]> * generic/tclUtf.c: [Bug 3464428] string is graph \u0120 is wrong * generic/tclUniData.c: * generic/regc_locale.c: * tests/utf.test: * tools/uniParse.tcl: clean up some unused stuff, and be more robust against changes in UnicodeData.txt syntax 2011-12-11 Jan Nijtmans <[email protected]> * generic/regc_locale.c: [Bug 3457031]: Some Unicode 6.0 chars not * tests/utf.test: in [:print:] class 2011-12-07 Jan Nijtmans <[email protected]> |
︙ | ︙ |
Changes to generic/regc_locale.c.
︙ | ︙ | |||
214 215 216 217 218 219 220 | 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x103f, 0x1061, 0x1065, 0x1066, 0x108e, 0x10fc, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1bae, 0x1baf, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, | | | 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x103f, 0x1061, 0x1065, 0x1066, 0x108e, 0x10fc, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1bae, 0x1baf, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 }; #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) /* * Unicode: decimal digit characters. */ |
︙ | ︙ | |||
474 475 476 477 478 479 480 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) /* * Unicode: unicode print characters excluding space. */ static const crange graphRangeTable[] = { | | < | | | < | | | | | | | | | | | | < | | | | | | | | | | < | | | | | < | | | | | | | | | < | | | | | | < < < < < < < < < < < < < < < < < < < < < < | | | | | | | | | < < < < < < < < < < | < < | | | < | | | | < < < < < < < < < < < | | < | | | | | | | | | | | | | | | | | | < | 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) /* * Unicode: unicode print characters excluding space. */ static const crange graphRangeTable[] = { {0x0021, 0x007e}, {0x00a0, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556}, {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d}, {0x0710, 0x074a}, {0x074d, 0x07b1}, {0x07c0, 0x07fa}, {0x0800, 0x082d}, {0x0830, 0x083e}, {0x0840, 0x085b}, {0x0900, 0x0977}, {0x0979, 0x097f}, {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3}, {0x09e6, 0x09fb}, {0x0a01, 0x0a03}, {0x0a05, 0x0a0a}, {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a75}, {0x0a81, 0x0a83}, {0x0a85, 0x0a8d}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae0, 0x0ae3}, {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b35, 0x0b39}, {0x0b3c, 0x0b44}, {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b63}, {0x0b66, 0x0b77}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0be6, 0x0bfa}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, {0x0c3d, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c60, 0x0c63}, {0x0c66, 0x0c6f}, {0x0c78, 0x0c7f}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0cbc, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0ce0, 0x0ce3}, {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d3a}, {0x0d3d, 0x0d44}, {0x0d46, 0x0d48}, {0x0d4a, 0x0d4e}, {0x0d60, 0x0d63}, {0x0d66, 0x0d75}, {0x0d79, 0x0d7f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, {0x0df2, 0x0df4}, {0x0e01, 0x0e3a}, {0x0e3f, 0x0e5b}, {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c}, {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda}, {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a}, {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x169c}, {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, {0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, {0x1780, 0x17b3}, {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1800, 0x180e}, {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, {0x18b0, 0x18f5}, {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x1944, 0x196d}, {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, {0x19d0, 0x19da}, {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a89}, {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, {0x1b50, 0x1b7c}, {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c37}, {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1de6}, {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, {0x2000, 0x200a}, {0x2010, 0x2029}, {0x202f, 0x205f}, {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0}, {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a}, {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59}, {0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}, {0x2c60, 0x2cf1}, {0x2cf9, 0x2d25}, {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae}, {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce}, {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3000, 0x303f}, {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe}, {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6}, {0xa4d0, 0xa62b}, {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7}, {0xa700, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa82b}, {0xa830, 0xa839}, {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9}, {0xa8e0, 0xa8fb}, {0xa900, 0xa953}, {0xa95f, 0xa97c}, {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9}, {0xaa00, 0xaa36}, {0xaa40, 0xaa4d}, {0xaa50, 0xaa59}, {0xaa5c, 0xaa7b}, {0xaa80, 0xaac2}, {0xaadb, 0xaadf}, {0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16}, {0xab20, 0xab26}, {0xab28, 0xab2e}, {0xabc0, 0xabed}, {0xabf0, 0xabf9}, {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfa2d}, {0xfa30, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd}, {0xfe00, 0xfe19}, {0xfe20, 0xfe26}, {0xfe30, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee} }; #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) static const chr graphCharTable[] = { 0x038c, 0x0589, 0x058a, 0x085e, 0x098f, 0x0990, 0x09b2, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, 0x0a3c, 0x0a47, 0x0a48, 0x0a51, 0x0a5e, 0x0ab2, 0x0ab3, 0x0ad0, 0x0af1, 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0bd0, 0x0bd7, 0x0c55, 0x0c56, 0x0c58, 0x0c59, 0x0c82, 0x0c83, 0x0cd5, 0x0cd6, 0x0cde, 0x0cf1, 0x0cf2, 0x0d02, 0x0d03, 0x0d57, 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, 0x0edd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x27cc, 0x2d6f, 0x2d70, 0xa790, 0xa791, 0xa9de, 0xa9df, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd }; #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) /* * End of auto-generated Unicode character ranges declarations. */ |
︙ | ︙ |
Changes to generic/tclUniData.c.
︙ | ︙ | |||
771 772 773 774 775 776 777 | }; /* * The following constants are used to determine the category of a * Unicode character. */ | | | 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 | }; /* * The following constants are used to determine the category of a * Unicode character. */ #define UNICODE_CATEGORY_MASK 0x1f #define UNICODE_OUT_OF_RANGE 0x10000u enum { UNASSIGNED, UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, |
︙ | ︙ | |||
813 814 815 816 817 818 819 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ | | | | 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) #define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) /* * This macro extracts the information about a character from the * Unicode character tables. */ #define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
1420 1421 1422 1423 1424 1425 1426 | */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); | | | 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 | */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return (((PRINT_BITS >> category) & 1) && (ch != ' ')); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsLower -- * |
︙ | ︙ |
Changes to tests/utf.test.
︙ | ︙ | |||
23 24 25 26 27 28 29 | } [bytestring "\xc0\x80"] test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { set x "\xe0" } [bytestring "\xc3\xa0"] test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { set x "\u4e4e" } [bytestring "\xe4\xb9\x8e"] | | > > > | | | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | } [bytestring "\xc0\x80"] test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { set x "\xe0" } [bytestring "\xc3\xa0"] test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { set x "\u4e4e" } [bytestring "\xe4\xb9\x8e"] test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} { format %c 0x110000 } [bytestring "\xef\xbf\xbd"] test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} { format %c -1 } [bytestring "\xef\xbf\xbd"] test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" } {3} test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { string length [bytestring "\x82\x83\x84"] } {3} |
︙ | ︙ | |||
282 283 284 285 286 287 288 289 290 291 292 293 294 295 | # this returns 1 with Unicode 6 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220] } {1 1} test utf-21.3 {unicode print char in regc_locale.c} { # this returns 1 with Unicode 6 compliance regexp {^[[:print:]]+$} \ufbc1 } 1 test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { string wordend "x\u5080z123_bar\u203c fg" 0 } 10 | > > > > > > > > | 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | # this returns 1 with Unicode 6 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220] } {1 1} test utf-21.3 {unicode print char in regc_locale.c} { # this returns 1 with Unicode 6 compliance regexp {^[[:print:]]+$} \ufbc1 } 1 test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 } {1} test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 } {1} test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { string wordend "x\u5080z123_bar\u203c fg" 0 } 10 |
︙ | ︙ |
Changes to tools/uniParse.tcl.
︙ | ︙ | |||
26 27 28 29 30 31 32 | # unassigned character group variable categories { Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So }; # Ordered list of character categories, must # match the enumeration in the header file. | < < < < < < | | | > > | < < | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | # unassigned character group variable categories { Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So }; # Ordered list of character categories, must # match the enumeration in the header file. } proc uni::getValue {items index} { variable categories # Extract character info set category [lindex $items 2] if {[scan [lindex $items 12] %x toupper] == 1} { set toupper [expr {$index - $toupper}] } else { set toupper 0 } if {[scan [lindex $items 13] %x tolower] == 1} { set tolower [expr {$tolower - $index}] } else { set tolower 0 } if {[scan [lindex $items 14] %x totitle] == 1} { set totitle [expr {$index - $totitle}] } elseif {$tolower} { set totitle 0 } else { set totitle $toupper } set categoryIndex [lsearch -exact $categories $category] if {$categoryIndex < 0} { puts "Unexpected character category: $index($category)" set categoryIndex 0 } return [list $categoryIndex $toupper $tolower $totitle] } proc uni::getGroup {value} { variable groups set gIndex [lsearch -exact $groups $value] if {$gIndex == -1} { |
︙ | ︙ | |||
96 97 98 99 100 101 102 | } proc uni::buildTables {data} { variable shift variable pMap {} variable pages {} | | | | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | } proc uni::buildTables {data} { variable shift variable pMap {} variable pages {} variable groups {{0 0 0 0}} variable next 0 set info {} ;# temporary page info set mask [expr {(1 << $shift) - 1}] foreach line [split $data \n] { if {$line eq ""} { if {!($next & $mask)} { # next character is already on page boundary continue } # fill remaining page set line [format %X [expr {($next-1)|$mask}]] append line ";;Cn;0;ON;;;;;N;;;;;\n" } set items [split $line \;] scan [lindex $items 0] %x index if {$index > 0xffff} then { # Ignore non-BMP characters, as long as Tcl doesn't support them continue } set index [format %d $index] set gIndex [getGroup [getValue $items $index]] |
︙ | ︙ | |||
167 168 169 170 171 172 173 | proc uni::main {} { global argc argv0 argv variable pMap variable pages variable groups variable shift | < < | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | proc uni::main {} { global argc argv0 argv variable pMap variable pages variable groups variable shift variable next if {$argc != 2} { puts stderr "\nusage: $argv0 <datafile> <outdir>\n" exit 1 } set f [open [lindex $argv 0] r] set data [read $f] close $f buildTables $data puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" set size [expr {[llength $pMap]*2 + [llength $pages]*(1<<$shift)}] puts "shift = $shift, space = $size" set f [open [file join [lindex $argv 1] tclUniData.c] w] fconfigure $f -translation lf puts $f "/* * tclUniData.c -- * * Declarations of Unicode character information tables. This file is |
︙ | ︙ | |||
276 277 278 279 280 281 282 | * highest field so we can easily sign extend. */ static const int groups\[\] = {" set line " " set last [expr {[llength $groups] - 1}] for {set i 0} {$i <= $last} {incr i} { | | | 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | * highest field so we can easily sign extend. */ static const int groups\[\] = {" set line " " set last [expr {[llength $groups] - 1}] for {set i 0} {$i <= $last} {incr i} { foreach {type toupper tolower totitle} [lindex $groups $i] {} # Compute the case conversion type and delta if {$totitle} { if {$totitle == $toupper} { # subtract delta for title or upper set case 4 |
︙ | ︙ | |||
325 326 327 328 329 330 331 | puts -nonewline $f "}; /* * The following constants are used to determine the category of a * Unicode character. */ | | | | 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | puts -nonewline $f "}; /* * The following constants are used to determine the category of a * Unicode character. */ #define UNICODE_CATEGORY_MASK 0x1f #define UNICODE_OUT_OF_RANGE " puts $f [format 0x%xu $next] puts $f " enum { UNASSIGNED, UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, |
︙ | ︙ | |||
368 369 370 371 372 373 374 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ | | | | 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) #define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) /* * This macro extracts the information about a character from the * Unicode character tables. */ |
︙ | ︙ |