Tcl Source Code

Check-in [14fc5c19b7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[Bug 3464428] string is graph \u0120 is wrong
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-5-branch
Files: files | file ages | folders
SHA1: 14fc5c19b75591a508cf3d708a1d9450716b60ad
User & Date: jan.nijtmans 2012-01-09 20:31:27
Context
2012-01-12
20:34
[Bug 3472316] Document the destructive semantics of [fconfigure -error] on sockets. check-in: c840cf2371 user: ferrieux tags: core-8-5-branch
2012-01-11
21:24
Contributed patch aiming to fix [file writable] problems on Windows & Samba. check-in: 0aabf0529e user: dgp tags: fix-win-native-access
2012-01-09
20:34
[Bug 3464428] string is graph \u0120 is wrong check-in: e9a619e9dc user: jan.nijtmans tags: trunk
20:31
[Bug 3464428] string is graph \u0120 is wrong check-in: 14fc5c19b7 user: jan.nijtmans tags: core-8-5-branch
19:59
[Bug 3464428] string is graph \u0120 is wrong check-in: a0c0feafe9 user: jan.nijtmans tags: core-8-4-branch
2012-01-08
20:43
Fix loading of timezones where one abbreviation is a proper suffix of another [Bug 3470928] check-in: c7e8dc4c9b user: kbk tags: core-8-5-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ChangeLog.








1
2
3
4
5
6
7







2012-01-08  Kevin B. Kenny  <[email protected]>

	* library/clock.tcl (ReadZoneinfoFile): Corrected a bug where loading
	* tests/clock.test (clock-56.4):        zoneinfo would fail if one
	timezone abbreviation was a proper tail of another, and zic used the
	same bytes of the file to represent both of them.  Added a test case
	for the bug, using the same data that caused the observed failure
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
2012-01-09  Jan Nijtmans  <[email protected]>

	* generic/tclUtf.c:      [Bug 3464428] string is graph \u0120 is wrong
	* generic/regc_locale.c: Add table for Unicode [:cntrl:] class
	* tools/uniClass.tcl:    Generate Unicode [:cntrl:] class table
	* tests/utf.test:

2012-01-08  Kevin B. Kenny  <[email protected]>

	* library/clock.tcl (ReadZoneinfoFile): Corrected a bug where loading
	* tests/clock.test (clock-56.4):        zoneinfo would fail if one
	timezone abbreviation was a proper tail of another, and zic used the
	same bytes of the file to represent both of them.  Added a test case
	for the bug, using the same data that caused the observed failure

Changes to generic/regc_locale.c.

219
220
221
222
223
224
225

















226
227
228
229
230
231
232
    0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791,
    0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d,
    0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44
};

#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))


















/*
 * Unicode: decimal digit characters.
 */

static const crange digitRangeTable[] = {
    {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x07c0, 0x07c9},
    {0x0966, 0x096f}, {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef},







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
    0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa790, 0xa791,
    0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d,
    0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44
};

#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))

/*
 * Unicode: control characters.
 */

static const crange controlRangeTable[] = {
    {0x007f, 0x009f}, {0x0600, 0x0603}, {0x200b, 0x200f}, {0x202a, 0x202e},
    {0x2060, 0x2064}, {0x206a, 0x206f}, {0xe000, 0xf8ff}, {0xfff9, 0xfffb}
};

#define NUM_CONTROL_RANGE (sizeof(controlRangeTable)/sizeof(crange))

static const chr controlCharTable[] = {
    0x00ad, 0x06dd, 0x070f, 0x17b4, 0x17b5, 0xfeff
};

#define NUM_CONTROL_CHAR (sizeof(controlCharTable)/sizeof(chr))

/*
 * Unicode: decimal digit characters.
 */

static const crange digitRangeTable[] = {
    {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x07c0, 0x07c9},
    {0x0966, 0x096f}, {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef},
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))

/*
 * Unicode: unicode print characters excluding space.
 */

static const crange graphRangeTable[] = {
    {0x0021, 0x007e}, {0x00a0, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e},
    {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556},
    {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea},
    {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d},
    {0x0710, 0x074a}, {0x074d, 0x07b1}, {0x07c0, 0x07fa}, {0x0800, 0x082d},
    {0x0830, 0x083e}, {0x0840, 0x085b}, {0x0900, 0x0977}, {0x0979, 0x097f},
    {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0},
    {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3},







|







491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))

/*
 * Unicode: unicode print characters excluding space.
 */

static const crange graphRangeTable[] = {
    {0x0021, 0x007e}, {0x00a1, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e},
    {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556},
    {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea},
    {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d},
    {0x0710, 0x074a}, {0x074d, 0x07b1}, {0x07c0, 0x07fa}, {0x0800, 0x082d},
    {0x0830, 0x083e}, {0x0840, 0x085b}, {0x0900, 0x0977}, {0x0979, 0x097f},
    {0x0981, 0x0983}, {0x0985, 0x098c}, {0x0993, 0x09a8}, {0x09aa, 0x09b0},
    {0x09b6, 0x09b9}, {0x09bc, 0x09c4}, {0x09cb, 0x09ce}, {0x09df, 0x09e3},
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
    {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4},
    {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c},
    {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda},
    {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d},
    {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d},
    {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5},
    {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a},
    {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x169c},
    {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, {0x1720, 0x1736},
    {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, {0x1780, 0x17b3},
    {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1800, 0x180e},
    {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, {0x18b0, 0x18f5},
    {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x1944, 0x196d},
    {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, {0x19d0, 0x19da},
    {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a89},
    {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, {0x1b50, 0x1b7c},
    {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c37},
    {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1de6},
    {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d},
    {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4},
    {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4},
    {0x1ff6, 0x1ffe}, {0x2000, 0x200a}, {0x2010, 0x2029}, {0x202f, 0x205f},
    {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0},
    {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a},
    {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59},
    {0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}, {0x2c60, 0x2cf1}, {0x2cf9, 0x2d25},
    {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae},
    {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce},
    {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99},
    {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3000, 0x303f},
    {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e},
    {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe},
    {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6},
    {0xa4d0, 0xa62b}, {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7},
    {0xa700, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa82b}, {0xa830, 0xa839},
    {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9}, {0xa8e0, 0xa8fb},
    {0xa900, 0xa953}, {0xa95f, 0xa97c}, {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9},







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







|







526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
    {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4},
    {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, {0x0f00, 0x0f47}, {0x0f49, 0x0f6c},
    {0x0f71, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x0fce, 0x0fda},
    {0x1000, 0x10c5}, {0x10d0, 0x10fc}, {0x1100, 0x1248}, {0x124a, 0x124d},
    {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d},
    {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5},
    {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a},
    {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x167f},
    {0x1681, 0x169c}, {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714},
    {0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770},
    {0x1780, 0x17b3}, {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9},
    {0x1800, 0x180d}, {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa},
    {0x18b0, 0x18f5}, {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b},
    {0x1944, 0x196d}, {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9},
    {0x19d0, 0x19da}, {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c},
    {0x1a7f, 0x1a89}, {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b},
    {0x1b50, 0x1b7c}, {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3},
    {0x1bfc, 0x1c37}, {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2},
    {0x1d00, 0x1de6}, {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
    {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
    {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef},
    {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, {0x2010, 0x2027}, {0x2030, 0x205e},
    {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0},
    {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a},
    {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59},
    {0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}, {0x2c60, 0x2cf1}, {0x2cf9, 0x2d25},
    {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae},
    {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce},
    {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99},
    {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3001, 0x303f},
    {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e},
    {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe},
    {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6},
    {0xa4d0, 0xa62b}, {0xa640, 0xa673}, {0xa67c, 0xa697}, {0xa6a0, 0xa6f7},
    {0xa700, 0xa78e}, {0xa7a0, 0xa7a9}, {0xa7fa, 0xa82b}, {0xa830, 0xa839},
    {0xa840, 0xa877}, {0xa880, 0xa8c4}, {0xa8ce, 0xa8d9}, {0xa8e0, 0xa8fb},
    {0xa900, 0xa953}, {0xa95f, 0xa97c}, {0xa980, 0xa9cd}, {0xa9cf, 0xa9d9},
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814








815
816
817
818
819
820
821
     * Extract the class name
     */

    len = endp - startp;
    Tcl_DStringInit(&ds);
    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);

    /*
     * Remap lower and upper to alpha if the match is case insensitive.
     */

    if (cases && len == 5 && (strncmp("lower", np, 5) == 0
	    || strncmp("upper", np, 5) == 0)) {
	np = "alpha";
    }

    /*
     * Map the name to the corresponding enumerated value.
     */

    index = -1;
    for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
	if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
	    index = i;
	    break;
	}
    }
    Tcl_DStringFree(&ds);
    if (index == -1) {
	ERR(REG_ECTYPE);
	return NULL;
    }









    /*
     * Now compute the character class contents.
     */

    switch((enum classes) index) {
    case CC_ALNUM:







<
<
<
<
<
<
<
<
<
















>
>
>
>
>
>
>
>







800
801
802
803
804
805
806









807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
     * Extract the class name
     */

    len = endp - startp;
    Tcl_DStringInit(&ds);
    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);










    /*
     * Map the name to the corresponding enumerated value.
     */

    index = -1;
    for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
	if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
	    index = i;
	    break;
	}
    }
    Tcl_DStringFree(&ds);
    if (index == -1) {
	ERR(REG_ECTYPE);
	return NULL;
    }

    /*
     * Remap lower and upper to alpha if the match is case insensitive.
     */

    if (cases && ((index == CC_LOWER) || (index == CC_UPPER))) {
	index = CC_ALNUM;
    }

    /*
     * Now compute the character class contents.
     */

    switch((enum classes) index) {
    case CC_ALNUM:
854
855
856
857
858
859
860
861
862

863






864
865
866
867
868
869
870
	break;
    case CC_BLANK:
	cv = getcvec(v, 2, 0);
	addchr(cv, '\t');
	addchr(cv, ' ');
	break;
    case CC_CNTRL:
	cv = getcvec(v, 0, 2);
	addrange(cv, 0x0, 0x1f);

	addrange(cv, 0x7f, 0x9f);






	break;
    case CC_DIGIT:
	cv = getcvec(v, 0, NUM_DIGIT_RANGE);
	if (cv) {
	    for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
		addrange(cv, digitRangeTable[i].start,
			digitRangeTable[i].end);







|
|
>
|
>
>
>
>
>
>







870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
	break;
    case CC_BLANK:
	cv = getcvec(v, 2, 0);
	addchr(cv, '\t');
	addchr(cv, ' ');
	break;
    case CC_CNTRL:
	cv = getcvec(v, NUM_CONTROL_CHAR, NUM_CONTROL_RANGE);
	if (cv) {
	    for (i=0 ; (size_t)i<NUM_CONTROL_RANGE ; i++) {
		addrange(cv, controlRangeTable[i].start,
			controlRangeTable[i].end);
	    }
	    for (i=0 ; (size_t)i<NUM_CONTROL_CHAR ; i++) {
		addchr(cv, controlCharTable[i]);
	    }
	}
	break;
    case CC_DIGIT:
	cv = getcvec(v, 0, NUM_DIGIT_RANGE);
	if (cv) {
	    for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
		addrange(cv, digitRangeTable[i].start,
			digitRangeTable[i].end);
933
934
935
936
937
938
939


















940
941
942
943
944
945
946
947
948
949
950
951
952
953
	    }
	    for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) {
		addchr(cv, upperCharTable[i]);
	    }
	}
	break;
    case CC_PRINT:


















    case CC_GRAPH:
	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
	if (cv) {
		/* For CC_PRINT, include space as well */
		addrange(cv, graphRangeTable[0].start - (index == CC_PRINT),
			graphRangeTable[0].end);
	    for (i=1 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
		addrange(cv, graphRangeTable[i].start,
			graphRangeTable[i].end);
	    }
	    for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) {
		addchr(cv, graphCharTable[i]);
	    }
	}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



<
<
<
|







956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983



984
985
986
987
988
989
990
991
	    }
	    for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) {
		addchr(cv, upperCharTable[i]);
	    }
	}
	break;
    case CC_PRINT:
    	cv  = getcvec(v, NUM_SPACE_CHAR + NUM_GRAPH_CHAR, NUM_SPACE_RANGE + NUM_GRAPH_RANGE  - 1);
    	if (cv) {
    	    for (i=1 ; (size_t)i<NUM_SPACE_RANGE ; i++) {
    		addrange(cv, spaceRangeTable[i].start,
    				spaceRangeTable[i].end);
    	    }
    	    for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) {
    		addchr(cv, spaceCharTable[i]);
    	    }
    	    for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
    		addrange(cv, graphRangeTable[i].start,
    				graphRangeTable[i].end);
    	    }
    	    for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) {
    		addchr(cv, graphCharTable[i]);
    	    }
    	}
    	break;
    case CC_GRAPH:
	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
	if (cv) {



	    for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
		addrange(cv, graphRangeTable[i].start,
			graphRangeTable[i].end);
	    }
	    for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) {
		addchr(cv, graphCharTable[i]);
	    }
	}

Changes to generic/tclUtf.c.

22
23
24
25
26
27
28


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49







50
51
52
53
54
55
56
 * values are shifted right by the category value to determine whether the
 * given category is included in the set.
 */

#define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \
	| (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER))



#define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)

#define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \
	| (1 << PARAGRAPH_SEPARATOR))

#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)

#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
	(1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
	(1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
	(1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
	(1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
	(1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))

#define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \
	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION))








/*
 * Unicode characters less than this value are represented by themselves in
 * UTF-8 strings.
 */

#define UNICODE_SELF	0x80







>
>





<
<
|
<
<
<
<
<
<
<
<





>
>
>
>
>
>
>







22
23
24
25
26
27
28
29
30
31
32
33
34
35


36








37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
 * values are shifted right by the category value to determine whether the
 * given category is included in the set.
 */

#define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \
	| (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER))

#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE))

#define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)

#define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \
	| (1 << PARAGRAPH_SEPARATOR))



#define WORD_BITS (ALPHA_BITS | DIGIT_BITS | (1 << CONNECTOR_PUNCTUATION))









#define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \
	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION))

#define GRAPH_BITS (WORD_BITS | PUNCT_BITS | \
	(1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
	(1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
	(1 << OTHER_NUMBER) | \
	(1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
	(1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))

/*
 * Unicode characters less than this value are represented by themselves in
 * UTF-8 strings.
 */

#define UNICODE_SELF	0x80
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsAlnum(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);

    return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsAlpha --
 *







<
<
|







1323
1324
1325
1326
1327
1328
1329


1330
1331
1332
1333
1334
1335
1336
1337
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsAlnum(
    int ch)			/* Unicode character to test. */
{


    return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsAlpha --
 *
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsAlpha(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
    return ((ALPHA_BITS >> category) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsControl --
 *







<
|







1346
1347
1348
1349
1350
1351
1352

1353
1354
1355
1356
1357
1358
1359
1360
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsAlpha(
    int ch)			/* Unicode character to test. */
{

    return ((ALPHA_BITS >> GetCategory(ch)) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsControl --
 *
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsControl(
    int ch)			/* Unicode character to test. */
{
    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsDigit --
 *







|







1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsControl(
    int ch)			/* Unicode character to test. */
{
    return ((CONTROL_BITS >> GetCategory(ch)) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsDigit --
 *
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsDigit(
    int ch)			/* Unicode character to test. */
{
    return (GetUniCharInfo(ch)&UNICODE_CATEGORY_MASK) == DECIMAL_DIGIT_NUMBER;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsGraph --
 *







|







1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsDigit(
    int ch)			/* Unicode character to test. */
{
    return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsGraph --
 *
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsGraph(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
    return (((PRINT_BITS >> category) & 1) && (ch != ' '));
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsLower --
 *







<
|







1415
1416
1417
1418
1419
1420
1421

1422
1423
1424
1425
1426
1427
1428
1429
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsGraph(
    int ch)			/* Unicode character to test. */
{

    return ((GRAPH_BITS >> GetCategory(ch)) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsLower --
 *
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsLower(
    int ch)			/* Unicode character to test. */
{
    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsPrint --
 *







|







1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsLower(
    int ch)			/* Unicode character to test. */
{
    return (GetCategory(ch) == LOWERCASE_LETTER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsPrint --
 *
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsPrint(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
    return ((PRINT_BITS >> category) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsPunct --
 *







<
|







1461
1462
1463
1464
1465
1466
1467

1468
1469
1470
1471
1472
1473
1474
1475
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsPrint(
    int ch)			/* Unicode character to test. */
{

    return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsPunct --
 *
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsPunct(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
    return ((PUNCT_BITS >> category) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsSpace --
 *







<
|







1484
1485
1486
1487
1488
1489
1490

1491
1492
1493
1494
1495
1496
1497
1498
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsPunct(
    int ch)			/* Unicode character to test. */
{

    return ((PUNCT_BITS >> GetCategory(ch)) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsSpace --
 *
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsSpace(
    int ch)			/* Unicode character to test. */
{
    register int category;

    /*
     * If the character is within the first 127 characters, just use the
     * standard C function, otherwise consult the Unicode table.
     */

    if (ch < 0x80) {
	return TclIsSpaceProc((char)ch);
    } else {
	category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
	return ((SPACE_BITS >> category) & 1);
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsUpper --







<
<





|
|

<
|







1507
1508
1509
1510
1511
1512
1513


1514
1515
1516
1517
1518
1519
1520
1521

1522
1523
1524
1525
1526
1527
1528
1529
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsSpace(
    int ch)			/* Unicode character to test. */
{


    /*
     * If the character is within the first 127 characters, just use the
     * standard C function, otherwise consult the Unicode table.
     */

    if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
	return isspace(UCHAR(ch)); /* INTL: ISO space */
    } else {

	return ((SPACE_BITS >> GetCategory(ch)) & 1);
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsUpper --
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsUpper(
    int ch)			/* Unicode character to test. */
{
    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsWordChar --
 *







|







1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsUpper(
    int ch)			/* Unicode character to test. */
{
    return (GetCategory(ch) == UPPERCASE_LETTER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsWordChar --
 *
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsWordChar(
    int ch)			/* Unicode character to test. */
{
    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);

    return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharCaseMatch --
 *







<
|
<







1562
1563
1564
1565
1566
1567
1568

1569

1570
1571
1572
1573
1574
1575
1576
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsWordChar(
    int ch)			/* Unicode character to test. */
{

    return ((WORD_BITS >> GetCategory(ch)) & 1);

}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharCaseMatch --
 *

Changes to tests/utf.test.

292
293
294
295
296
297
298




























299
300
301
302
303
304
305
test utf-21.4 {TclUniCharIsGraph} {
    # [Bug 3464428]
    string is graph \u0120
} {1}
test utf-21.5 {unicode graph char in regc_locale.c} {
    # [Bug 3464428]
    regexp {^[[:graph:]]+$} \u0120




























} {1}

test utf-22.1 {TclUniCharIsWordChar} {
    string wordend "xyz123_bar fg" 0
} 10
test utf-22.2 {TclUniCharIsWordChar} {
    string wordend "x\u5080z123_bar\u203c fg" 0







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
test utf-21.4 {TclUniCharIsGraph} {
    # [Bug 3464428]
    string is graph \u0120
} {1}
test utf-21.5 {unicode graph char in regc_locale.c} {
    # [Bug 3464428]
    regexp {^[[:graph:]]+$} \u0120
} {1}
test utf-21.6 {TclUniCharIsGraph} {
    # [Bug 3464428]
    string is graph \u00a0
} {0}
test utf-21.7 {unicode graph char in regc_locale.c} {
    # [Bug 3464428]
    regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029
} {0}
test utf-21.8 {TclUniCharIsPrint} {
    # [Bug 3464428]
    string is print \u0009
} {0}
test utf-21.9 {unicode print char in regc_locale.c} {
    # [Bug 3464428]
    regexp {[[:print:]]} \u0009
} {0}
test utf-21.10 {unicode print char in regc_locale.c} {
    # [Bug 3464428]
    regexp {[[:print:]]} \u0009
} {0}
test utf-21.11 {TclUniCharIsControl} {
    # [Bug 3464428]
    string is control \u00ad
} {1}
test utf-21.12 {unicode control char in regc_locale.c} {
    # [Bug 3464428]
    regexp {^[[:cntrl:]]$} \u00ad
} {1}

test utf-22.1 {TclUniCharIsWordChar} {
    string wordend "xyz123_bar fg" 0
} 10
test utf-22.2 {TclUniCharIsWordChar} {
    string wordend "x\u5080z123_bar\u203c fg" 0

Changes to tools/uniClass.tcl.

83
84
85
86
87
88
89

90
91
92
93
94
95
96
 *	is automatically generated by the tools/uniClass.tcl script
 *	and used in generic/regc_locale.c.  Do not modify by hand.
 */
"

foreach {type desc} {
    alpha "alphabetic characters"

    digit "decimal digit characters"
    punct "punctuation characters"
    space "white space characters"
    lower "lowercase characters"
    upper "uppercase characters"
    graph "unicode print characters excluding space"
} {







>







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
 *	is automatically generated by the tools/uniClass.tcl script
 *	and used in generic/regc_locale.c.  Do not modify by hand.
 */
"

foreach {type desc} {
    alpha "alphabetic characters"
    control "control characters"
    digit "decimal digit characters"
    punct "punctuation characters"
    space "white space characters"
    lower "lowercase characters"
    upper "uppercase characters"
    graph "unicode print characters excluding space"
} {