Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | [Bug 3464428] string is graph \u0120 is wrong |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0c1ac83954446a04679d583e0e4914cf |
User & Date: | jan.nijtmans 2011-12-24 00:30:08 |
Context
2011-12-31
| ||
15:04 | merge trunk check-in: d7da5e7e1e user: dkf tags: dkf-alias-encoding | |
2011-12-30
| ||
08:26 | Update to Olson's tzdata2011n check-in: fdf178b941 user: venkat tags: trunk | |
2011-12-28
| ||
23:29 | first attempt at fixing bug-3466099 check-in: 625e54fc08 user: jan.nijtmans tags: bug-3466099 | |
2011-12-24
| ||
00:30 | [Bug 3464428] string is graph \u0120 is wrong check-in: 0c1ac83954 user: jan.nijtmans tags: trunk | |
00:15 | [Bug 3464428] string is graph \u0120 is wrong check-in: 005fc77cde user: jan.nijtmans tags: core-8-5-branch | |
2011-12-13
| ||
17:45 | (TclInitAuxDataTypeTable): Extended to register the DictUpdateInfo structure as an AuxData type. For... check-in: 1afb7a55b3 user: andreask tags: trunk | |
Changes
Changes to ChangeLog.
1 2 3 4 5 6 7 | 2011-12-13 Andreas Kupries <[email protected]> * generic/tclCompile.c (TclInitAuxDataTypeTable): Extended to register the DictUpdateInfo structure as an AuxData type. For use by tbcload, tclcompiler. 2011-12-11 Jan Nijtmans <[email protected]> | > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | 2011-12-23 Jan Nijtmans <[email protected]> * generic/tclUtf.c: [Bug 3464428] string is graph \u0120 is wrong * generic/tclUniData.c: * generic/regc_locale.c: * tests/utf.test: * tools/uniParse.tcl: clean up some unused stuff, and be more robust against changes in UnicodeData.txt syntax 2011-12-13 Andreas Kupries <[email protected]> * generic/tclCompile.c (TclInitAuxDataTypeTable): Extended to register the DictUpdateInfo structure as an AuxData type. For use by tbcload, tclcompiler. 2011-12-11 Jan Nijtmans <[email protected]> |
︙ | ︙ |
Changes to generic/regc_locale.c.
︙ | ︙ | |||
214 215 216 217 218 219 220 | 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x103f, 0x1061, 0x1065, 0x1066, 0x108e, 0x10fc, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1bae, 0x1baf, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, | | | 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x103f, 0x1061, 0x1065, 0x1066, 0x108e, 0x10fc, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1bae, 0x1baf, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 }; #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) /* * Unicode: decimal digit characters. */ |
︙ | ︙ | |||
474 475 476 477 478 479 480 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) /* * Unicode: unicode print characters excluding space. */ static const crange graphRangeTable[] = { | | < | | | < | | | | | | | | | | | | < | | | | | | | | | | < | | | | | < | | | | | | | | | < | | | | | | < < < < < < < < < < < < < < < < < < < < < < | | | | | | | | | < < < < < < < < < < | < < | | | < | | | | < < < < < < < < < < < | | < | | | | | | | | | | | | | | | | | | < | 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) /* * Unicode: unicode print characters excluding space. */ static const crange graphRangeTable[] = { {0x0021, 0x007e}, {0x00a0, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556}, {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d}, {0x0710, 0x074a}, {0x074d, 0x07b1}, {0x07c0, 0x07fa}, {0x0800, 0x082d}, {0x0830, 0x083e}, {0x0840, 0x085b}, {0x0900, 0x0977}, {0x0979, 0x097f}, {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3}, {0x09e6, 0x09fb}, {0x0a01, 0x0a03}, {0x0a05, 0x0a0a}, {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a75}, {0x0a81, 0x0a83}, {0x0a85, 0x0a8d}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae0, 0x0ae3}, {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b35, 0x0b39}, {0x0b3c, 0x0b44}, {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b63}, {0x0b66, 0x0b77}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0be6, 0x0bfa}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, {0x0c3d, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c60, 0x0c63}, {0x0c66, 0x0c6f}, {0x0c78, 0x0c7f}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0cbc, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0ce0, 0x0ce3}, {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d3a}, {0x0d3d, 0x0d44}, {0x0d46, 0x0d48}, {0x0d4a, 0x0d4e}, {0x0d60, 0x0d63}, {0x0d66, 0x0d75}, {0x0d79, 0x0d7f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, {0x0df2, 0x0df4}, {0x0e01, 0x0e3a}, {0x0e3f, 0x0e5b}, {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c}, {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda}, {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a}, {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x169c}, {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, {0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, {0x1780, 0x17b3}, {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1800, 0x180e}, {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, {0x18b0, 0x18f5}, {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x1944, 0x196d}, {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, {0x19d0, 0x19da}, {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a89}, {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, {0x1b50, 0x1b7c}, {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c37}, {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1de6}, {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, {0x2000, 0x200a}, {0x2010, 0x2029}, {0x202f, 0x205f}, {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0}, {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a}, {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59}, {0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}, {0x2c60, 0x2cf1}, {0x2cf9, 0x2d25}, {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae}, {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce}, {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3000, 0x303f}, {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe}, {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6}, {0xa4d0, 0xa62b}, {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7}, {0xa700, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa82b}, {0xa830, 0xa839}, {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9}, {0xa8e0, 0xa8fb}, {0xa900, 0xa953}, {0xa95f, 0xa97c}, {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9}, {0xaa00, 0xaa36}, {0xaa40, 0xaa4d}, {0xaa50, 0xaa59}, {0xaa5c, 0xaa7b}, {0xaa80, 0xaac2}, {0xaadb, 0xaadf}, {0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16}, {0xab20, 0xab26}, {0xab28, 0xab2e}, {0xabc0, 0xabed}, {0xabf0, 0xabf9}, {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfa2d}, {0xfa30, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd}, {0xfe00, 0xfe19}, {0xfe20, 0xfe26}, {0xfe30, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee} }; #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) static const chr graphCharTable[] = { 0x038c, 0x0589, 0x058a, 0x085e, 0x098f, 0x0990, 0x09b2, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, 0x0a3c, 0x0a47, 0x0a48, 0x0a51, 0x0a5e, 0x0ab2, 0x0ab3, 0x0ad0, 0x0af1, 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0bd0, 0x0bd7, 0x0c55, 0x0c56, 0x0c58, 0x0c59, 0x0c82, 0x0c83, 0x0cd5, 0x0cd6, 0x0cde, 0x0cf1, 0x0cf2, 0x0d02, 0x0d03, 0x0d57, 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, 0x0edd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x27cc, 0x2d6f, 0x2d70, 0xa790, 0xa791, 0xa9de, 0xa9df, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd }; #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) /* * End of auto-generated Unicode character ranges declarations. */ |
︙ | ︙ |
Changes to generic/tclUniData.c.
︙ | ︙ | |||
771 772 773 774 775 776 777 | }; /* * The following constants are used to determine the category of a * Unicode character. */ | | | 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 | }; /* * The following constants are used to determine the category of a * Unicode character. */ #define UNICODE_CATEGORY_MASK 0x1f #define UNICODE_OUT_OF_RANGE 0x10000u enum { UNASSIGNED, UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, |
︙ | ︙ | |||
813 814 815 816 817 818 819 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ | | | | 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) #define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) /* * This macro extracts the information about a character from the * Unicode character tables. */ #define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
1421 1422 1423 1424 1425 1426 1427 | */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); | | | 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 | */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return (((PRINT_BITS >> category) & 1) && (ch != ' ')); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsLower -- * |
︙ | ︙ |
Changes to tests/utf.test.
︙ | ︙ | |||
297 298 299 300 301 302 303 304 305 306 307 308 309 310 | # this returns 1 with Unicode 6 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220] } {1 1} test utf-21.3 {unicode print char in regc_locale.c} { # this returns 1 with Unicode 6 compliance regexp {^[[:print:]]+$} \ufbc1 } 1 test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { string wordend "x\u5080z123_bar\u203c fg" 0 } 10 | > > > > > > > > | 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | # this returns 1 with Unicode 6 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220] } {1 1} test utf-21.3 {unicode print char in regc_locale.c} { # this returns 1 with Unicode 6 compliance regexp {^[[:print:]]+$} \ufbc1 } 1 test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 } {1} test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 } {1} test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { string wordend "x\u5080z123_bar\u203c fg" 0 } 10 |
︙ | ︙ |
Changes to tools/uniParse.tcl.
︙ | ︙ | |||
26 27 28 29 30 31 32 | # unassigned character group variable categories { Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So }; # Ordered list of character categories, must # match the enumeration in the header file. | < < < < < < | | | > > | < < | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | # unassigned character group variable categories { Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So }; # Ordered list of character categories, must # match the enumeration in the header file. } proc uni::getValue {items index} { variable categories # Extract character info set category [lindex $items 2] if {[scan [lindex $items 12] %x toupper] == 1} { set toupper [expr {$index - $toupper}] } else { set toupper 0 } if {[scan [lindex $items 13] %x tolower] == 1} { set tolower [expr {$tolower - $index}] } else { set tolower 0 } if {[scan [lindex $items 14] %x totitle] == 1} { set totitle [expr {$index - $totitle}] } elseif {$tolower} { set totitle 0 } else { set totitle $toupper } set categoryIndex [lsearch -exact $categories $category] if {$categoryIndex < 0} { puts "Unexpected character category: $index($category)" set categoryIndex 0 } return [list $categoryIndex $toupper $tolower $totitle] } proc uni::getGroup {value} { variable groups set gIndex [lsearch -exact $groups $value] if {$gIndex == -1} { |
︙ | ︙ | |||
96 97 98 99 100 101 102 | } proc uni::buildTables {data} { variable shift variable pMap {} variable pages {} | | | | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | } proc uni::buildTables {data} { variable shift variable pMap {} variable pages {} variable groups {{0 0 0 0}} variable next 0 set info {} ;# temporary page info set mask [expr {(1 << $shift) - 1}] foreach line [split $data \n] { if {$line eq ""} { if {!($next & $mask)} { # next character is already on page boundary continue } # fill remaining page set line [format %X [expr {($next-1)|$mask}]] append line ";;Cn;0;ON;;;;;N;;;;;\n" } set items [split $line \;] scan [lindex $items 0] %x index if {$index > 0xffff} then { # Ignore non-BMP characters, as long as Tcl doesn't support them continue } set index [format %d $index] set gIndex [getGroup [getValue $items $index]] |
︙ | ︙ | |||
167 168 169 170 171 172 173 | proc uni::main {} { global argc argv0 argv variable pMap variable pages variable groups variable shift | < < | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | proc uni::main {} { global argc argv0 argv variable pMap variable pages variable groups variable shift variable next if {$argc != 2} { puts stderr "\nusage: $argv0 <datafile> <outdir>\n" exit 1 } set f [open [lindex $argv 0] r] set data [read $f] close $f buildTables $data puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" set size [expr {[llength $pMap]*2 + [llength $pages]*(1<<$shift)}] puts "shift = $shift, space = $size" set f [open [file join [lindex $argv 1] tclUniData.c] w] fconfigure $f -translation lf puts $f "/* * tclUniData.c -- * * Declarations of Unicode character information tables. This file is |
︙ | ︙ | |||
276 277 278 279 280 281 282 | * highest field so we can easily sign extend. */ static const int groups\[\] = {" set line " " set last [expr {[llength $groups] - 1}] for {set i 0} {$i <= $last} {incr i} { | | | 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | * highest field so we can easily sign extend. */ static const int groups\[\] = {" set line " " set last [expr {[llength $groups] - 1}] for {set i 0} {$i <= $last} {incr i} { foreach {type toupper tolower totitle} [lindex $groups $i] {} # Compute the case conversion type and delta if {$totitle} { if {$totitle == $toupper} { # subtract delta for title or upper set case 4 |
︙ | ︙ | |||
325 326 327 328 329 330 331 | puts -nonewline $f "}; /* * The following constants are used to determine the category of a * Unicode character. */ | | | | 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | puts -nonewline $f "}; /* * The following constants are used to determine the category of a * Unicode character. */ #define UNICODE_CATEGORY_MASK 0x1f #define UNICODE_OUT_OF_RANGE " puts $f [format 0x%xu $next] puts $f " enum { UNASSIGNED, UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, |
︙ | ︙ | |||
368 369 370 371 372 373 374 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ | | | | 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) #define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) /* * This macro extracts the information about a character from the * Unicode character tables. */ |
︙ | ︙ |