Tcl Source Code

Check-in [005fc77cde]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[Bug 3464428] string is graph \u0120 is wrong
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-5-branch
Files: files | file ages | folders
SHA1: 005fc77cde8bf71bb89a9498c56e9b1bb9d1dc85
User & Date: jan.nijtmans 2011-12-24 00:15:37
Context
2011-12-30
08:14
Update to Olson's tzdata2011n check-in: f903a74e2f user: venkat tags: core-8-5-branch
2011-12-24
00:30
[Bug 3464428] string is graph \u0120 is wrong check-in: 0c1ac83954 user: jan.nijtmans tags: trunk
00:15
[Bug 3464428] string is graph \u0120 is wrong check-in: 005fc77cde user: jan.nijtmans tags: core-8-5-branch
2011-12-23
23:31
[Bug 3464428] string is graph \u0120 is wrong check-in: 13071df962 user: jan.nijtmans tags: core-8-4-branch
2011-12-11
09:13
[Bug 3457031]: Some Unicode 6.0 chars not in [:print:] class check-in: 6a31dc71dc user: jan.nijtmans tags: core-8-5-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ChangeLog.










1
2
3
4
5
6
7









2011-12-11  Jan Nijtmans  <[email protected]>

	* generic/regc_locale.c: [Bug 3457031]: Some Unicode 6.0 chars not
	* tests/utf.test:        in [:print:] class

2011-12-07  Jan Nijtmans  <[email protected]>

>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2011-12-23  Jan Nijtmans  <[email protected]>

	* generic/tclUtf.c:     [Bug 3464428] string is graph \u0120 is wrong
	* generic/tclUniData.c:
	* generic/regc_locale.c:
	* tests/utf.test:
	* tools/uniParse.tcl:   clean up some unused stuff, and be more robust
	against changes in UnicodeData.txt syntax

2011-12-11  Jan Nijtmans  <[email protected]>

	* generic/regc_locale.c: [Bug 3457031]: Some Unicode 6.0 chars not
	* tests/utf.test:        in [:print:] class

2011-12-07  Jan Nijtmans  <[email protected]>

Changes to generic/regc_locale.c.

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2,
    0x0eb3, 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x103f, 0x1061, 0x1065,
    0x1066, 0x108e, 0x10fc, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7,
    0x1bae, 0x1baf, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102,
    0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2d6f,
    0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791,
    0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d,
    0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffe, 0xffff
};

#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))

/*
 * Unicode: decimal digit characters.
 */







|







214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2,
    0x0eb3, 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x103f, 0x1061, 0x1065,
    0x1066, 0x108e, 0x10fc, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7,
    0x1bae, 0x1baf, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102,
    0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2d6f,
    0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791,
    0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d,
    0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44
};

#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))

/*
 * Unicode: decimal digit characters.
 */
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))

/*
 * Unicode: unicode print characters excluding space.
 */

static const crange graphRangeTable[] = {
    {0x0021, 0x007e}, {0x00a0, 0x00ac}, {0x00ae, 0x011f}, {0x0121, 0x021f},
    {0x0221, 0x031f}, {0x0321, 0x0377}, {0x037a, 0x037e}, {0x0384, 0x038a},
    {0x038e, 0x03a1}, {0x03a3, 0x041f}, {0x0421, 0x051f}, {0x0521, 0x0527},
    {0x0531, 0x0556}, {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7},
    {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x0621, 0x06dc},
    {0x06de, 0x070d}, {0x0710, 0x071f}, {0x0721, 0x074a}, {0x074d, 0x07b1},
    {0x07c0, 0x07fa}, {0x0800, 0x081f}, {0x0821, 0x082d}, {0x0830, 0x083e},
    {0x0840, 0x085b}, {0x0900, 0x091f}, {0x0921, 0x0977}, {0x0979, 0x097f},
    {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0},
    {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3},
    {0x09e6, 0x09fb}, {0x0a01, 0x0a03}, {0x0a05, 0x0a0a}, {0x0a13, 0x0a1f},
    {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, {0x0a4b, 0x0a4d},
    {0x0a59, 0x0a5c}, {0x0a66, 0x0a75}, {0x0a81, 0x0a83}, {0x0a85, 0x0a8d},
    {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, {0x0ab5, 0x0ab9},
    {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae0, 0x0ae3},
    {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f},
    {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b35, 0x0b39}, {0x0b3c, 0x0b44},
    {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b63}, {0x0b66, 0x0b77}, {0x0b85, 0x0b8a},
    {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb9},
    {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0be6, 0x0bfa},
    {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c1f},
    {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, {0x0c3d, 0x0c44},
    {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c60, 0x0c63}, {0x0c66, 0x0c6f},
    {0x0c78, 0x0c7f}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8},
    {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0cbc, 0x0cc4}, {0x0cc6, 0x0cc8},
    {0x0cca, 0x0ccd}, {0x0ce0, 0x0ce3}, {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c},
    {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f}, {0x0d21, 0x0d3a}, {0x0d3d, 0x0d44},
    {0x0d46, 0x0d48}, {0x0d4a, 0x0d4e}, {0x0d60, 0x0d63}, {0x0d66, 0x0d75},
    {0x0d79, 0x0d7f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, {0x0db3, 0x0dbb},
    {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, {0x0df2, 0x0df4},
    {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b}, {0x0e94, 0x0e97},
    {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd},
    {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f1f},
    {0x0f21, 0x0f47}, {0x0f49, 0x0f6c}, {0x0f71, 0x0f97}, {0x0f99, 0x0fbc},
    {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda}, {0x1000, 0x101f}, {0x1021, 0x10c5},
    {0x10d0, 0x10fc}, {0x1100, 0x111f}, {0x1121, 0x121f}, {0x1221, 0x1248},
    {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288},
    {0x128a, 0x128d}, {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be},
    {0x12c2, 0x12c5}, {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315},
    {0x1318, 0x131f}, {0x1321, 0x135a}, {0x135d, 0x137c}, {0x1380, 0x1399},
    {0x13a0, 0x13f4}, {0x1400, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f},
    {0x1621, 0x169c}, {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714},
    {0x1721, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770},
    {0x1780, 0x17b3}, {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9},
    {0x1800, 0x180e}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18aa},
    {0x18b0, 0x18f5}, {0x1900, 0x191c}, {0x1921, 0x192b}, {0x1930, 0x193b},
    {0x1944, 0x196d}, {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9},
    {0x19d0, 0x19da}, {0x19de, 0x1a1b}, {0x1a21, 0x1a5e}, {0x1a60, 0x1a7c},
    {0x1a7f, 0x1a89}, {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b1f},
    {0x1b21, 0x1b4b}, {0x1b50, 0x1b7c}, {0x1b80, 0x1baa}, {0x1bae, 0x1bb9},
    {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c1f}, {0x1c21, 0x1c37}, {0x1c3b, 0x1c49},
    {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1d1f}, {0x1d21, 0x1de6},
    {0x1dfc, 0x1e1f}, {0x1e21, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f21, 0x1f45},
    {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
    {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef},
    {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, {0x2000, 0x200a}, {0x2010, 0x201f},
    {0x2021, 0x2029}, {0x202f, 0x205f}, {0x2074, 0x208e}, {0x2090, 0x209c},
    {0x20a0, 0x20b9}, {0x20d0, 0x20f0}, {0x2100, 0x211f}, {0x2121, 0x2189},
    {0x2190, 0x221f}, {0x2221, 0x231f}, {0x2321, 0x23f3}, {0x2400, 0x241f},
    {0x2421, 0x2426}, {0x2440, 0x244a}, {0x2460, 0x251f}, {0x2521, 0x261f},
    {0x2621, 0x26ff}, {0x2701, 0x271f}, {0x2721, 0x27ca}, {0x27ce, 0x281f},
    {0x2821, 0x291f}, {0x2921, 0x2a1f}, {0x2a21, 0x2b1f}, {0x2b21, 0x2b4c},
    {0x2b50, 0x2b59}, {0x2c00, 0x2c1f}, {0x2c21, 0x2c2e}, {0x2c30, 0x2c5e},
    {0x2c60, 0x2cf1}, {0x2cf9, 0x2d1f}, {0x2d21, 0x2d25}, {0x2d30, 0x2d65},
    {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae}, {0x2db0, 0x2db6},
    {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce}, {0x2dd0, 0x2dd6},
    {0x2dd8, 0x2dde}, {0x2de0, 0x2e1f}, {0x2e21, 0x2e31}, {0x2e80, 0x2e99},
    {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5}, {0x2ff0, 0x2ffb},
    {0x3000, 0x301f}, {0x3021, 0x303f}, {0x3041, 0x3096}, {0x3099, 0x30ff},
    {0x3105, 0x311f}, {0x3121, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31ba},
    {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3221, 0x32fe}, {0x3300, 0x331f},
    {0x3321, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f},
    {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f},
    {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f},
    {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f},
    {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f},
    {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f},
    {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4dc0, 0x4e1f},
    {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f},
    {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f},
    {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f},
    {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f},
    {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f},
    {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f},
    {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f},
    {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f},
    {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f},
    {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f},
    {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f},
    {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f},
    {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f},
    {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f},
    {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f},
    {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f},
    {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f},
    {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f},
    {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f},
    {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f},
    {0x9e21, 0x9f1f}, {0x9f21, 0x9fcb}, {0xa000, 0xa01f}, {0xa021, 0xa11f},
    {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c},
    {0xa490, 0xa4c6}, {0xa4d0, 0xa51f}, {0xa521, 0xa61f}, {0xa621, 0xa62b},
    {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7}, {0xa700, 0xa71f},
    {0xa721, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa81f}, {0xa821, 0xa82b},
    {0xa830, 0xa839}, {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9},
    {0xa8e0, 0xa8fb}, {0xa900, 0xa91f}, {0xa921, 0xa953}, {0xa95f, 0xa97c},
    {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9}, {0xaa00, 0xaa1f}, {0xaa21, 0xaa36},
    {0xaa40, 0xaa4d}, {0xaa50, 0xaa59}, {0xaa5c, 0xaa7b}, {0xaa80, 0xaac2},
    {0xaadb, 0xaadf}, {0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16},
    {0xab21, 0xab26}, {0xab28, 0xab2e}, {0xabc0, 0xabed}, {0xabf0, 0xabf9},
    {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f},
    {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f},
    {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f},
    {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f},
    {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f},
    {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f},
    {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f},
    {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f},
    {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f},
    {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f},
    {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f},
    {0xd721, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xf91f},
    {0xf921, 0xfa1f}, {0xfa21, 0xfa2d}, {0xfa30, 0xfa6d}, {0xfa70, 0xfad9},
    {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36},
    {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f},
    {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd},
    {0xfe00, 0xfe19}, {0xfe21, 0xfe26}, {0xfe30, 0xfe52}, {0xfe54, 0xfe66},
    {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xff1f},
    {0xff21, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7},
    {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, {0xfffc, 0xffff}
};

#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))

static const chr graphCharTable[] = {
    0x038c, 0x0589, 0x058a, 0x061e, 0x061f, 0x085e, 0x098f, 0x0990, 0x09b2,
    0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, 0x0a0f, 0x0a10, 0x0a32, 0x0a33,
    0x0a35, 0x0a36, 0x0a38, 0x0a39, 0x0a3c, 0x0a47, 0x0a48, 0x0a51, 0x0a5e,
    0x0ab2, 0x0ab3, 0x0ad0, 0x0af1, 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47,
    0x0b48, 0x0b56, 0x0b57, 0x0b5c, 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a,
    0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0bd0, 0x0bd7, 0x0c55, 0x0c56,
    0x0c58, 0x0c59, 0x0c82, 0x0c83, 0x0cd5, 0x0cd6, 0x0cde, 0x0cf1, 0x0cf2,
    0x0d02, 0x0d03, 0x0d57, 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81,
    0x0e82, 0x0e84, 0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa,
    0x0eab, 0x0ec6, 0x0edc, 0x0edd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940,
    0x1a1e, 0x1a1f, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x27cc, 0x2d6f,
    0x2d70, 0xa790, 0xa791, 0xa9de, 0xa9df, 0xfb3e, 0xfb40, 0xfb41, 0xfb43,
    0xfb44
};

#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))

/*
 *	End of auto-generated Unicode character ranges declarations.
 */







|
<
|
|
|
<
|
|


|
|
|
|
|
|
|
|
|
|
<
|
|
|
|
|
|



|
|
|
|
<
|
|
|
|
|
<
|
|
|
|
|
|
|
|
|
<
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
|
<
<
|
|
|
<
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
|
|
<
|
|
|
|
|
|





|
|
|
|
|
|
|
|
|
|
|
|
<







474
475
476
477
478
479
480
481

482
483
484

485
486
487
488
489
490
491
492
493
494
495
496
497
498

499
500
501
502
503
504
505
506
507
508
509
510
511

512
513
514
515
516

517
518
519
520
521
522
523
524
525

526
527
528
529
530
531






















532
533
534
535
536
537
538
539
540










541


542
543
544

545
546
547
548











549
550

551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573

574
575
576
577
578
579
580
#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))

/*
 * Unicode: unicode print characters excluding space.
 */

static const crange graphRangeTable[] = {
    {0x0021, 0x007e}, {0x00a0, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e},

    {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556},
    {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea},
    {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d},

    {0x0710, 0x074a}, {0x074d, 0x07b1}, {0x07c0, 0x07fa}, {0x0800, 0x082d},
    {0x0830, 0x083e}, {0x0840, 0x085b}, {0x0900, 0x0977}, {0x0979, 0x097f},
    {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0},
    {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3},
    {0x09e6, 0x09fb}, {0x0a01, 0x0a03}, {0x0a05, 0x0a0a}, {0x0a13, 0x0a28},
    {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c},
    {0x0a66, 0x0a75}, {0x0a81, 0x0a83}, {0x0a85, 0x0a8d}, {0x0a8f, 0x0a91},
    {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5},
    {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae0, 0x0ae3}, {0x0ae6, 0x0aef},
    {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30},
    {0x0b35, 0x0b39}, {0x0b3c, 0x0b44}, {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b63},
    {0x0b66, 0x0b77}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, {0x0b92, 0x0b95},
    {0x0ba8, 0x0baa}, {0x0bae, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8},
    {0x0bca, 0x0bcd}, {0x0be6, 0x0bfa}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c},

    {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39},
    {0x0c3d, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c60, 0x0c63},
    {0x0c66, 0x0c6f}, {0x0c78, 0x0c7f}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90},
    {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0cbc, 0x0cc4},
    {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0ce0, 0x0ce3}, {0x0ce6, 0x0cef},
    {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d3a}, {0x0d3d, 0x0d44},
    {0x0d46, 0x0d48}, {0x0d4a, 0x0d4e}, {0x0d60, 0x0d63}, {0x0d66, 0x0d75},
    {0x0d79, 0x0d7f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, {0x0db3, 0x0dbb},
    {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, {0x0df2, 0x0df4},
    {0x0e01, 0x0e3a}, {0x0e3f, 0x0e5b}, {0x0e94, 0x0e97}, {0x0e99, 0x0e9f},
    {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4},
    {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c},
    {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda},

    {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d},
    {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d},
    {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5},
    {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a},
    {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x169c},

    {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, {0x1720, 0x1736},
    {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, {0x1780, 0x17b3},
    {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1800, 0x180e},
    {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, {0x18b0, 0x18f5},
    {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x1944, 0x196d},
    {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, {0x19d0, 0x19da},
    {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a89},
    {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, {0x1b50, 0x1b7c},
    {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c37},

    {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1de6},
    {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d},
    {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4},
    {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4},
    {0x1ff6, 0x1ffe}, {0x2000, 0x200a}, {0x2010, 0x2029}, {0x202f, 0x205f},
    {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0},






















    {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a},
    {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59},
    {0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}, {0x2c60, 0x2cf1}, {0x2cf9, 0x2d25},
    {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae},
    {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce},
    {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99},
    {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3000, 0x303f},
    {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e},
    {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe},










    {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6},


    {0xa4d0, 0xa62b}, {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7},
    {0xa700, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa82b}, {0xa830, 0xa839},
    {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9}, {0xa8e0, 0xa8fb},

    {0xa900, 0xa953}, {0xa95f, 0xa97c}, {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9},
    {0xaa00, 0xaa36}, {0xaa40, 0xaa4d}, {0xaa50, 0xaa59}, {0xaa5c, 0xaa7b},
    {0xaa80, 0xaac2}, {0xaadb, 0xaadf}, {0xab01, 0xab06}, {0xab09, 0xab0e},
    {0xab11, 0xab16}, {0xab20, 0xab26}, {0xab28, 0xab2e}, {0xabc0, 0xabed},











    {0xabf0, 0xabf9}, {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb},
    {0xf900, 0xfa2d}, {0xfa30, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06},

    {0xfb13, 0xfb17}, {0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1},
    {0xfbd3, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd},
    {0xfe00, 0xfe19}, {0xfe20, 0xfe26}, {0xfe30, 0xfe52}, {0xfe54, 0xfe66},
    {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xffbe},
    {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc},
    {0xffe0, 0xffe6}, {0xffe8, 0xffee}
};

#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))

static const chr graphCharTable[] = {
    0x038c, 0x0589, 0x058a, 0x085e, 0x098f, 0x0990, 0x09b2, 0x09c7, 0x09c8,
    0x09d7, 0x09dc, 0x09dd, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36,
    0x0a38, 0x0a39, 0x0a3c, 0x0a47, 0x0a48, 0x0a51, 0x0a5e, 0x0ab2, 0x0ab3,
    0x0ad0, 0x0af1, 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56,
    0x0b57, 0x0b5c, 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e,
    0x0b9f, 0x0ba3, 0x0ba4, 0x0bd0, 0x0bd7, 0x0c55, 0x0c56, 0x0c58, 0x0c59,
    0x0c82, 0x0c83, 0x0cd5, 0x0cd6, 0x0cde, 0x0cf1, 0x0cf2, 0x0d02, 0x0d03,
    0x0d57, 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84,
    0x0e87, 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6,
    0x0edc, 0x0edd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1f59, 0x1f5b,
    0x1f5d, 0x2070, 0x2071, 0x27cc, 0x2d6f, 0x2d70, 0xa790, 0xa791, 0xa9de,
    0xa9df, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd

};

#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))

/*
 *	End of auto-generated Unicode character ranges declarations.
 */

Changes to generic/tclUniData.c.

771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
};

/*
 * The following constants are used to determine the category of a
 * Unicode character.
 */

#define UNICODE_CATEGORY_MASK 0x1F
#define UNICODE_OUT_OF_RANGE 0x10000u

enum {
    UNASSIGNED,
    UPPERCASE_LETTER,
    LOWERCASE_LETTER,
    TITLECASE_LETTER,







|







771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
};

/*
 * The following constants are used to determine the category of a
 * Unicode character.
 */

#define UNICODE_CATEGORY_MASK 0x1f
#define UNICODE_OUT_OF_RANGE 0x10000u

enum {
    UNASSIGNED,
    UPPERCASE_LETTER,
    LOWERCASE_LETTER,
    TITLECASE_LETTER,
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830

/*
 * The following macros extract the fields of the character info.  The
 * GetDelta() macro is complicated because we can't rely on the C compiler
 * to do sign extension on right shifts.
 */

#define GetCaseType(info) (((info) & 0xE0) >> 5)
#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1F)
#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15)))

/*
 * This macro extracts the information about a character from the
 * Unicode character tables.
 */

#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]])








|
|









813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830

/*
 * The following macros extract the fields of the character info.  The
 * GetDelta() macro is complicated because we can't rely on the C compiler
 * to do sign extension on right shifts.
 */

#define GetCaseType(info) (((info) & 0xe0) >> 5)
#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f)
#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15)))

/*
 * This macro extracts the information about a character from the
 * Unicode character tables.
 */

#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]])

Changes to generic/tclUtf.c.

1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
 */

int
Tcl_UniCharIsGraph(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
    return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsLower --
 *







|







1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
 */

int
Tcl_UniCharIsGraph(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
    return (((PRINT_BITS >> category) & 1) && (ch != ' '));
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsLower --
 *

Changes to tests/utf.test.

23
24
25
26
27
28
29
30



31
32
33
34
35
36
37
38
39
} [bytestring "\xc0\x80"]
test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
    set x "\xe0"
} [bytestring "\xc3\xa0"]
test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
    set x "\u4e4e"
} [bytestring "\xe4\xb9\x8e"]
test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {



    string length [format %c -1]
} 1

test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
    string length "abc"
} {3}
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
    string length [bytestring "\x82\x83\x84"]
} {3}







|
>
>
>
|
|







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
} [bytestring "\xc0\x80"]
test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
    set x "\xe0"
} [bytestring "\xc3\xa0"]
test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
    set x "\u4e4e"
} [bytestring "\xe4\xb9\x8e"]
test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} {
    format %c 0x110000
} [bytestring "\xef\xbf\xbd"]
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
    format %c -1
} [bytestring "\xef\xbf\xbd"]

test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
    string length "abc"
} {3}
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
    string length [bytestring "\x82\x83\x84"]
} {3}
282
283
284
285
286
287
288








289
290
291
292
293
294
295
    # this returns 1 with Unicode 6 compliance
    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220]
} {1 1}
test utf-21.3 {unicode print char in regc_locale.c} {
    # this returns 1 with Unicode 6 compliance
    regexp {^[[:print:]]+$} \ufbc1
} 1









test utf-22.1 {TclUniCharIsWordChar} {
    string wordend "xyz123_bar fg" 0
} 10
test utf-22.2 {TclUniCharIsWordChar} {
    string wordend "x\u5080z123_bar\u203c fg" 0
} 10







>
>
>
>
>
>
>
>







285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
    # this returns 1 with Unicode 6 compliance
    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220]
} {1 1}
test utf-21.3 {unicode print char in regc_locale.c} {
    # this returns 1 with Unicode 6 compliance
    regexp {^[[:print:]]+$} \ufbc1
} 1
test utf-21.4 {TclUniCharIsGraph} {
    # [Bug 3464428]
    string is graph \u0120
} {1}
test utf-21.5 {unicode graph char in regc_locale.c} {
    # [Bug 3464428]
    regexp {^[[:graph:]]+$} \u0120
} {1}

test utf-22.1 {TclUniCharIsWordChar} {
    string wordend "xyz123_bar fg" 0
} 10
test utf-22.2 {TclUniCharIsWordChar} {
    string wordend "x\u5080z123_bar\u203c fg" 0
} 10

Changes to tools/uniParse.tcl.

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
				# unassigned character group

    variable categories {
	Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp
	Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
    };				# Ordered list of character categories, must
				# match the enumeration in the header file.

    variable titleCount 0;	# Count of the number of title case
				# characters.  This value is used in the
				# regular expression code to allocate enough
				# space for the title case variants.
}

proc uni::getValue {items index} {
    variable categories
    variable titleCount

    # Extract character info

    set category [lindex $items 2]
    if {[scan [lindex $items 12] %6x toupper] == 1} {
	set toupper [expr {$index - $toupper}]
    } else {
	set toupper 0
    }
    if {[scan [lindex $items 13] %6x tolower] == 1} {
	set tolower [expr {$tolower - $index}]
    } else {
	set tolower 0
    }
    if {[scan [lindex $items 14] %6x totitle] == 1} {
	set totitle [expr {$index - $totitle}]


    } else {
	set totitle 0
    }

    set categoryIndex [lsearch -exact $categories $category]
    if {$categoryIndex < 0} {
	puts "Unexpected character category: $index($category)"
	set categoryIndex 0
    } elseif {$category eq "Lt"} {
	incr titleCount
    }

    return "$categoryIndex,$toupper,$tolower,$totitle"
}

proc uni::getGroup {value} {
    variable groups

    set gIndex [lsearch -exact $groups $value]
    if {$gIndex == -1} {







<
<
<
<
<




<




|




|




|

>
>

|






<
<


|







26
27
28
29
30
31
32





33
34
35
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62


63
64
65
66
67
68
69
70
71
72
				# unassigned character group

    variable categories {
	Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp
	Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
    };				# Ordered list of character categories, must
				# match the enumeration in the header file.





}

proc uni::getValue {items index} {
    variable categories


    # Extract character info

    set category [lindex $items 2]
    if {[scan [lindex $items 12] %x toupper] == 1} {
	set toupper [expr {$index - $toupper}]
    } else {
	set toupper 0
    }
    if {[scan [lindex $items 13] %x tolower] == 1} {
	set tolower [expr {$tolower - $index}]
    } else {
	set tolower 0
    }
    if {[scan [lindex $items 14] %x totitle] == 1} {
	set totitle [expr {$index - $totitle}]
    } elseif {$tolower} {
	set totitle 0
    } else {
	set totitle $toupper
    }

    set categoryIndex [lsearch -exact $categories $category]
    if {$categoryIndex < 0} {
	puts "Unexpected character category: $index($category)"
	set categoryIndex 0


    }

    return [list $categoryIndex $toupper $tolower $totitle]
}

proc uni::getGroup {value} {
    variable groups

    set gIndex [lsearch -exact $groups $value]
    if {$gIndex == -1} {
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
}

proc uni::buildTables {data} {
    variable shift

    variable pMap {}
    variable pages {}
    variable groups {{0,0,0,0}}
    variable next 0
    set info {}			;# temporary page info

    set mask [expr {(1 << $shift) - 1}]

    foreach line [split $data \n] {
	if {$line eq ""} {
	    if {!($next & $mask)} {
		# next character is already on page boundary
		continue
	    }
	    # fill remaining page
	    set line [format %X [expr {($next-1)|$mask}]]
	    append line ";;Cn;0;ON;;;;;N;;;;;\n"
	}

	set items [split $line \;]

	scan [lindex $items 0] %x index
	if {$index > 0xFFFF} then {
	    # Ignore non-BMP characters, as long as Tcl doesn't support them
	    continue
	}
	set index [format %d $index]

	set gIndex [getGroup [getValue $items $index]]








|



















|







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
}

proc uni::buildTables {data} {
    variable shift

    variable pMap {}
    variable pages {}
    variable groups {{0 0 0 0}}
    variable next 0
    set info {}			;# temporary page info

    set mask [expr {(1 << $shift) - 1}]

    foreach line [split $data \n] {
	if {$line eq ""} {
	    if {!($next & $mask)} {
		# next character is already on page boundary
		continue
	    }
	    # fill remaining page
	    set line [format %X [expr {($next-1)|$mask}]]
	    append line ";;Cn;0;ON;;;;;N;;;;;\n"
	}

	set items [split $line \;]

	scan [lindex $items 0] %x index
	if {$index > 0xffff} then {
	    # Ignore non-BMP characters, as long as Tcl doesn't support them
	    continue
	}
	set index [format %d $index]

	set gIndex [getGroup [getValue $items $index]]

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

proc uni::main {} {
    global argc argv0 argv
    variable pMap
    variable pages
    variable groups
    variable shift
    variable titleCount
    variable next

    if {$argc != 2} {
	puts stderr "\nusage: $argv0 <datafile> <outdir>\n"
	exit 1
    }
    set f [open [lindex $argv 0] r]
    set data [read $f]
    close $f

    buildTables $data
    puts "X = [llength $pMap]  Y= [llength $pages]  A= [llength $groups]"
    set size [expr {[llength $pMap]*2 + [llength $pages]*(1<<$shift)}]
    puts "shift = $shift, space = $size"
    puts "title case count = $titleCount"

    set f [open [file join [lindex $argv 1] tclUniData.c] w]
    fconfigure $f -translation lf
    puts $f "/*
 * tclUniData.c --
 *
 *	Declarations of Unicode character information tables.  This file is







<














<







161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181

182
183
184
185
186
187
188

proc uni::main {} {
    global argc argv0 argv
    variable pMap
    variable pages
    variable groups
    variable shift

    variable next

    if {$argc != 2} {
	puts stderr "\nusage: $argv0 <datafile> <outdir>\n"
	exit 1
    }
    set f [open [lindex $argv 0] r]
    set data [read $f]
    close $f

    buildTables $data
    puts "X = [llength $pMap]  Y= [llength $pages]  A= [llength $groups]"
    set size [expr {[llength $pMap]*2 + [llength $pages]*(1<<$shift)}]
    puts "shift = $shift, space = $size"


    set f [open [file join [lindex $argv 1] tclUniData.c] w]
    fconfigure $f -translation lf
    puts $f "/*
 * tclUniData.c --
 *
 *	Declarations of Unicode character information tables.  This file is
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
 *			    highest field so we can easily sign extend.
 */

static const int groups\[\] = {"
    set line "    "
    set last [expr {[llength $groups] - 1}]
    for {set i 0} {$i <= $last} {incr i} {
	foreach {type toupper tolower totitle} [split [lindex $groups $i] ,] {}

	# Compute the case conversion type and delta

	if {$totitle} {
	    if {$totitle == $toupper} {
		# subtract delta for title or upper
		set case 4







|







268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
 *			    highest field so we can easily sign extend.
 */

static const int groups\[\] = {"
    set line "    "
    set last [expr {[llength $groups] - 1}]
    for {set i 0} {$i <= $last} {incr i} {
	foreach {type toupper tolower totitle} [lindex $groups $i] {}

	# Compute the case conversion type and delta

	if {$totitle} {
	    if {$totitle == $toupper} {
		# subtract delta for title or upper
		set case 4
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
    puts -nonewline $f "};

/*
 * The following constants are used to determine the category of a
 * Unicode character.
 */

#define UNICODE_CATEGORY_MASK 0x1F
#define UNICODE_OUT_OF_RANGE "
    puts $f [format 0x%Xu $next]
    puts $f "
enum {
    UNASSIGNED,
    UPPERCASE_LETTER,
    LOWERCASE_LETTER,
    TITLECASE_LETTER,
    MODIFIER_LETTER,







|

|







317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
    puts -nonewline $f "};

/*
 * The following constants are used to determine the category of a
 * Unicode character.
 */

#define UNICODE_CATEGORY_MASK 0x1f
#define UNICODE_OUT_OF_RANGE "
    puts $f [format 0x%xu $next]
    puts $f "
enum {
    UNASSIGNED,
    UPPERCASE_LETTER,
    LOWERCASE_LETTER,
    TITLECASE_LETTER,
    MODIFIER_LETTER,
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383

/*
 * The following macros extract the fields of the character info.  The
 * GetDelta() macro is complicated because we can't rely on the C compiler
 * to do sign extension on right shifts.
 */

#define GetCaseType(info) (((info) & 0xE0) >> 5)
#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1F)
#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15)))

/*
 * This macro extracts the information about a character from the
 * Unicode character tables.
 */








|
|







360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375

/*
 * The following macros extract the fields of the character info.  The
 * GetDelta() macro is complicated because we can't rely on the C compiler
 * to do sign extension on right shifts.
 */

#define GetCaseType(info) (((info) & 0xe0) >> 5)
#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f)
#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15)))

/*
 * This macro extracts the information about a character from the
 * Unicode character tables.
 */