Tcl Source Code

Changes On Branch utf-max-6
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch utf-max-6 Excluding Merge-Ins

This is equivalent to a diff from c6f4b50fbf to ab3bc46fe1

2019-03-08
21:27
Make -DTCL_UTF_MAX=6-build work on win32. Add travis builds to prove it. check-in: 1df1b22fad user: jan.nijtmans tags: core-8-branch
19:51
Code cleanup: less (size_t) casts Closed-Leaf check-in: ab3bc46fe1 user: jan.nijtmans tags: utf-max-6
14:47
merge 8.6 check-in: 8d5d640832 user: dgp tags: core-8-branch
2019-03-07
22:03
Merge 8.7 check-in: d6eb82a174 user: jan.nijtmans tags: trunk
22:02
Fixes for TCL_UTF_MAX=6, (gcc compiler warnings). Also make everything work on win32/win64. Patch ad... check-in: 650574e0fb user: jan.nijtmans tags: utf-max-6
21:59
Fix gcc compiler warning check-in: c6f4b50fbf user: jan.nijtmans tags: core-8-branch
14:17
(core-8-6-branch merge) Fix automatic pkgIndex generation for multiplatform installs. check-in: d48cced50f user: apnadkarni tags: core-8-branch

Changes to .travis.yml.

118
119
120
121
122
123
124
















125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140















141
142
143
144
145
146
147
148
149
150
            - gcc-mingw-w64
            - gcc-multilib
            - wine
      env:
        - BUILD_DIR=win
        - CFGOPT=--host=i686-w64-mingw32
        - NO_DIRECT_TEST=1
















# Test with mingw-w64 (64 bit)
    - os: linux
      dist: xenial
      compiler: x86_64-w64-mingw32-gcc
      addons:
        apt:
          packages:
            - gcc-mingw-w64-base
            - binutils-mingw-w64-x86-64
            - gcc-mingw-w64-x86-64
            - gcc-mingw-w64
            - wine
      env:
        - BUILD_DIR=win
        - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit"
        - NO_DIRECT_TEST=1
















before_install:
  - export ERROR_ON_FAILURES=1
  - cd ${BUILD_DIR}
install:
  - test -n "$NO_DIRECT_CONFIGURE" || ./configure ${CFGOPT}
script:
  - make
  # The styles=develop avoids some weird problems on OSX
  - test -n "$NO_DIRECT_TEST" || make test styles=develop







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
            - gcc-mingw-w64
            - gcc-multilib
            - wine
      env:
        - BUILD_DIR=win
        - CFGOPT=--host=i686-w64-mingw32
        - NO_DIRECT_TEST=1
    - os: linux
      dist: xenial
      compiler: i686-w64-mingw32-gcc
      addons:
        apt:
          packages:
            - gcc-mingw-w64-base
            - binutils-mingw-w64-i686
            - gcc-mingw-w64-i686
            - gcc-mingw-w64
            - gcc-multilib
            - wine
      env:
        - BUILD_DIR=win
        - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=6"
        - NO_DIRECT_TEST=1
# Test with mingw-w64 (64 bit)
    - os: linux
      dist: xenial
      compiler: x86_64-w64-mingw32-gcc
      addons:
        apt:
          packages:
            - gcc-mingw-w64-base
            - binutils-mingw-w64-x86-64
            - gcc-mingw-w64-x86-64
            - gcc-mingw-w64
            - wine
      env:
        - BUILD_DIR=win
        - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit"
        - NO_DIRECT_TEST=1
    - os: linux
      dist: xenial
      compiler: x86_64-w64-mingw32-gcc
      addons:
        apt:
          packages:
            - gcc-mingw-w64-base
            - binutils-mingw-w64-x86-64
            - gcc-mingw-w64-x86-64
            - gcc-mingw-w64
            - wine
      env:
        - BUILD_DIR=win
        - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=6"
        - NO_DIRECT_TEST=1

before_install:
  - export ERROR_ON_FAILURES=1
  - cd ${BUILD_DIR}
install:
  - test -n "$NO_DIRECT_CONFIGURE" || ./configure ${CFGOPT}
script:
  - make
  # The styles=develop avoids some weird problems on OSX
  - test -n "$NO_DIRECT_TEST" || make test styles=develop

Changes to generic/regcomp.c.

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
    int cflags;			/* copy of compile flags */
    int lasttype;		/* type of previous token */
    int nexttype;		/* type of next token */
    chr nextvalue;		/* value (if any) of next token */
    int lexcon;			/* lexical context type (see lex.c) */
    int nsubexp;		/* subexpression count */
    struct subre **subs;	/* subRE pointer vector */
    size_t nsubs;		/* length of vector */
    struct subre *sub10[10];	/* initial vector, enough for most */
    struct nfa *nfa;		/* the NFA */
    struct colormap *cm;	/* character color map */
    color nlcolor;		/* color of newline */
    struct state *wordchrs;	/* state in nfa holding word-char outarcs */
    struct subre *tree;		/* subexpression tree */
    struct subre *treechain;	/* all tree nodes allocated */







|







206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
    int cflags;			/* copy of compile flags */
    int lasttype;		/* type of previous token */
    int nexttype;		/* type of next token */
    chr nextvalue;		/* value (if any) of next token */
    int lexcon;			/* lexical context type (see lex.c) */
    int nsubexp;		/* subexpression count */
    struct subre **subs;	/* subRE pointer vector */
    int nsubs;			/* length of vector */
    struct subre *sub10[10];	/* initial vector, enough for most */
    struct nfa *nfa;		/* the NFA */
    struct colormap *cm;	/* character color map */
    color nlcolor;		/* color of newline */
    struct state *wordchrs;	/* state in nfa holding word-char outarcs */
    struct subre *tree;		/* subexpression tree */
    struct subre *treechain;	/* all tree nodes allocated */
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
    regex_t *re,
    const chr *string,
    size_t len,
    int flags)
{
    AllocVars(v);
    struct guts *g;
    int i;
    size_t j;
    FILE *debug = (flags&REG_PROGRESS) ? stdout : NULL;
#define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }

    /*
     * Sanity checks.
     */








|
<







283
284
285
286
287
288
289
290

291
292
293
294
295
296
297
    regex_t *re,
    const chr *string,
    size_t len,
    int flags)
{
    AllocVars(v);
    struct guts *g;
    int i, j;

    FILE *debug = (flags&REG_PROGRESS) ? stdout : NULL;
#define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }

    /*
     * Sanity checks.
     */

473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
 */
static void
moresubs(
    struct vars *v,
    int wanted)			/* want enough room for this one */
{
    struct subre **p;
    size_t n;

    assert(wanted > 0 && (size_t)wanted >= v->nsubs);
    n = (size_t)wanted * 3 / 2 + 1;
    if (v->subs == v->sub10) {
	p = (struct subre **) MALLOC(n * sizeof(struct subre *));
	if (p != NULL) {
	    memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));
	}
    } else {
	p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *));
    }
    if (p == NULL) {
	ERR(REG_ESPACE);
	return;
    }

    v->subs = p;
    for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) {
	*p = NULL;
    }
    assert(v->nsubs == n);
    assert((size_t)wanted < v->nsubs);
}

/*
 - freev - free vars struct's substructures where necessary
 * Optionally does error-number setting, and always returns error code (if
 * any), to make error-handling code terser.
 ^ static int freev(struct vars *, int);







|

|
|


















|







472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
 */
static void
moresubs(
    struct vars *v,
    int wanted)			/* want enough room for this one */
{
    struct subre **p;
    int n;

    assert(wanted > 0 && wanted >= v->nsubs);
    n = wanted * 3 / 2 + 1;
    if (v->subs == v->sub10) {
	p = (struct subre **) MALLOC(n * sizeof(struct subre *));
	if (p != NULL) {
	    memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));
	}
    } else {
	p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *));
    }
    if (p == NULL) {
	ERR(REG_ESPACE);
	return;
    }

    v->subs = p;
    for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) {
	*p = NULL;
    }
    assert(v->nsubs == n);
    assert(wanted < v->nsubs);
}

/*
 - freev - free vars struct's substructures where necessary
 * Optionally does error-number setting, and always returns error code (if
 * any), to make error-handling code terser.
 ^ static int freev(struct vars *, int);
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
	 */

    case '(':			/* value flags as capturing or non */
	cap = (type == LACON) ? 0 : v->nextvalue;
	if (cap) {
	    v->nsubexp++;
	    subno = v->nsubexp;
	    if ((size_t)subno >= v->nsubs) {
		moresubs(v, subno);
	    }
	    assert((size_t)subno < v->nsubs);
	} else {
	    atomtype = PLAIN;	/* something that's not '(' */
	}
	NEXT();

	/*
	 * Need new endpoints because tree will contain pointers.







|


|







949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
	 */

    case '(':			/* value flags as capturing or non */
	cap = (type == LACON) ? 0 : v->nextvalue;
	if (cap) {
	    v->nsubexp++;
	    subno = v->nsubexp;
	    if (subno >= v->nsubs) {
		moresubs(v, subno);
	    }
	    assert(subno < v->nsubs);
	} else {
	    atomtype = PLAIN;	/* something that's not '(' */
	}
	NEXT();

	/*
	 * Need new endpoints because tree will contain pointers.

Changes to generic/regexec.c.

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    rm_detail_t *details,
    size_t nmatch,
    regmatch_t pmatch[],
    int flags)
{
    AllocVars(v);
    int st, backref;
    size_t n;
    size_t i;
#define	LOCALMAT	20
    regmatch_t mat[LOCALMAT];
#define LOCALDFAS	40
    struct dfa *subdfas[LOCALDFAS];

    /*
     * Sanity checks.







|
|







168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    rm_detail_t *details,
    size_t nmatch,
    regmatch_t pmatch[],
    int flags)
{
    AllocVars(v);
    int st, backref;
    int n;
    int i;
#define	LOCALMAT	20
    regmatch_t mat[LOCALMAT];
#define LOCALDFAS	40
    struct dfa *subdfas[LOCALDFAS];

    /*
     * Sanity checks.
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
	v->pmatch = pmatch;
    }
    v->details = details;
    v->start = (chr *)string;
    v->stop = (chr *)string + len;
    v->err = 0;
    assert(v->g->ntree >= 0);
    n = (size_t) v->g->ntree;
    if (n <= LOCALDFAS)
	v->subdfas = subdfas;
    else
	v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
    if (v->subdfas == NULL) {
	if (v->pmatch != pmatch && v->pmatch != mat)
	    FREE(v->pmatch);







|







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
	v->pmatch = pmatch;
    }
    v->details = details;
    v->start = (chr *)string;
    v->stop = (chr *)string + len;
    v->err = 0;
    assert(v->g->ntree >= 0);
    n = v->g->ntree;
    if (n <= LOCALDFAS)
	v->subdfas = subdfas;
    else
	v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
    if (v->subdfas == NULL) {
	if (v->pmatch != pmatch && v->pmatch != mat)
	    FREE(v->pmatch);
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
    /*
     * Clean up.
     */

    if (v->pmatch != pmatch && v->pmatch != mat) {
	FREE(v->pmatch);
    }
    n = (size_t) v->g->ntree;
    for (i = 0; i < n; i++) {
	if (v->subdfas[i] != NULL)
	    freeDFA(v->subdfas[i]);
    }
    if (v->subdfas != subdfas)
	FREE(v->subdfas);
    FreeVars(v);







|







274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
    /*
     * Clean up.
     */

    if (v->pmatch != pmatch && v->pmatch != mat) {
	FREE(v->pmatch);
    }
    n = v->g->ntree;
    for (i = 0; i < n; i++) {
	if (v->subdfas[i] != NULL)
	    freeDFA(v->subdfas[i]);
    }
    if (v->subdfas != subdfas)
	FREE(v->subdfas);
    FreeVars(v);

Changes to generic/tclEncoding.c.

2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars, charLimit = INT_MAX;
    Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;

    if (flags & TCL_ENCODING_START) {
    	*statePtr = 0;
    }
    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    result = TCL_OK;
    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen /= sizeof(Tcl_UniChar);
	srcLen *= sizeof(Tcl_UniChar);
    }

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * Tcl_UniChar-size data.
	 */

	*chPtr = *(Tcl_UniChar *)src;
	if (*chPtr && *chPtr < 0x80) {
	    *dst++ = (*chPtr & 0xFF);
	} else {
	    dst += Tcl_UniCharToUtf(*chPtr, dst);
	}
	src += sizeof(Tcl_UniChar);
    }

    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}







<
|
<
<
|




|

|
|
















|


|
|
|

|

|







2440
2441
2442
2443
2444
2445
2446

2447


2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars, charLimit = INT_MAX;

    unsigned short ch;



    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    result = TCL_OK;
    if ((srcLen % sizeof(unsigned short)) != 0) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen /= sizeof(unsigned short);
	srcLen *= sizeof(unsigned short);
    }

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * unsigned short-size data.
	 */

	ch = *(unsigned short *)src;
	if (ch && ch < 0x80) {
	    *dst++ = (ch & 0xFF);
	} else {
	    dst += Tcl_UniCharToUtf(ch, dst);
	}
	src += sizeof(unsigned short);
    }

    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}
2572
2573
2574
2575
2576
2577
2578

2579
2580

2581
2582



2583
2584
2585
2586
2587
2588

2589
2590

2591

2592


2593
2594
2595
2596
2597
2598
2599
	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

#ifdef WORDS_BIGENDIAN
#if TCL_UTF_MAX > 4

	*dst++ = (*chPtr >> 24);
	*dst++ = ((*chPtr >> 16) & 0xFF);

	*dst++ = ((*chPtr >> 8) & 0xFF);
	*dst++ = (*chPtr & 0xFF);



#else
	*dst++ = (*chPtr >> 8);
	*dst++ = (*chPtr & 0xFF);
#endif
#else
#if TCL_UTF_MAX > 4

	*dst++ = (*chPtr & 0xFF);
	*dst++ = ((*chPtr >> 8) & 0xFF);

	*dst++ = ((*chPtr >> 16) & 0xFF);

	*dst++ = (*chPtr >> 24);


#else
	*dst++ = (*chPtr & 0xFF);
	*dst++ = (*chPtr >> 8);
#endif
#endif
    }
    *srcReadPtr = src - srcStart;







>
|
|
>
|
|
>
>
>






>
|
|
>
|
>
|
>
>







2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

#ifdef WORDS_BIGENDIAN
#if TCL_UTF_MAX > 4
	if (*chPtr <= 0xFFFF) {
	    *dst++ = (*chPtr >> 8);
	    *dst++ = (*chPtr & 0xFF);
	} else {
	    *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
	    *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
	}
#else
	*dst++ = (*chPtr >> 8);
	*dst++ = (*chPtr & 0xFF);
#endif
#else
#if TCL_UTF_MAX > 4
	if (*chPtr <= 0xFFFF) {
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
	} else {
	    *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
	    *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	}
#else
	*dst++ = (*chPtr & 0xFF);
	*dst++ = (*chPtr >> 8);
#endif
#endif
    }
    *srcReadPtr = src - srcStart;

Changes to generic/tclMain.c.

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71






72

73
74
75
76
77
78
79
80
81
82

/*
 * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise
 * NewNativeObj is needed (which provides proper conversion from native
 * encoding to UTF-8).
 */

#ifdef UNICODE
#   define NewNativeObj Tcl_NewUnicodeObj
#else /* !UNICODE */
static inline Tcl_Obj *
NewNativeObj(
    char *string,
    int length)
{
    Tcl_DString ds;







    Tcl_ExternalToUtfDString(NULL, string, length, &ds);

    return TclDStringToObj(&ds);
}
#endif /* !UNICODE */

/*
 * Declarations for various library functions and variables (don't want to
 * include tclPort.h here, because people might copy this file out of the Tcl
 * source directory to make their own modified versions).
 */








|

|


|




>
>
>
>
>
>
|
>


|







55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

/*
 * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise
 * NewNativeObj is needed (which provides proper conversion from native
 * encoding to UTF-8).
 */

#if defined(UNICODE) && (TCL_UTF_MAX <= 4)
#   define NewNativeObj Tcl_NewUnicodeObj
#else /* !UNICODE || (TCL_UTF_MAX > 4) */
static inline Tcl_Obj *
NewNativeObj(
    TCHAR *string,
    int length)
{
    Tcl_DString ds;

#ifdef UNICODE
    if (length > 0) {
	length *= sizeof(WCHAR);
    }
    Tcl_WinTCharToUtf(string, length, &ds);
#else
    Tcl_ExternalToUtfDString(NULL, (char *) string, length, &ds);
#endif
    return TclDStringToObj(&ds);
}
#endif /* !UNICODE || (TCL_UTF_MAX > 4) */

/*
 * Declarations for various library functions and variables (don't want to
 * include tclPort.h here, because people might copy this file out of the Tcl
 * source directory to make their own modified versions).
 */

Changes to generic/tclUtf.c.

1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
	 * characters separated by "-").
	 */

	if (p == '[') {
	    Tcl_UniChar startChar, endChar;

	    uniPattern++;
	    ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
	    uniStr++;
	    while (1) {
		if ((*uniPattern == ']') || (*uniPattern == 0)) {
		    return 0;
		}
		startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
			: *uniPattern);
		uniPattern++;
		if (*uniPattern == '-') {
		    uniPattern++;
		    if (*uniPattern == 0) {
			return 0;
		    }
		    endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
			    : *uniPattern);
		    uniPattern++;
		    if (((startChar <= ch1) && (ch1 <= endChar))
			    || ((endChar <= ch1) && (ch1 <= startChar))) {
			/*
			 * Matches ranges of form [a-z] or [z-a].
			 */







|





|







|







1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
	 * characters separated by "-").
	 */

	if (p == '[') {
	    Tcl_UniChar startChar, endChar;

	    uniPattern++;
	    ch1 = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniStr) : *uniStr);
	    uniStr++;
	    while (1) {
		if ((*uniPattern == ']') || (*uniPattern == 0)) {
		    return 0;
		}
		startChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniPattern)
			: *uniPattern);
		uniPattern++;
		if (*uniPattern == '-') {
		    uniPattern++;
		    if (*uniPattern == 0) {
			return 0;
		    }
		    endChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniPattern)
			    : *uniPattern);
		    uniPattern++;
		    if (((startChar <= ch1) && (ch1 <= endChar))
			    || ((endChar <= ch1) && (ch1 <= startChar))) {
			/*
			 * Matches ranges of form [a-z] or [z-a].
			 */
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
	 * characters separated by "-").
	 */

	if (p == '[') {
	    Tcl_UniChar ch1, startChar, endChar;

	    pattern++;
	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
	    string++;
	    while (1) {
		if ((*pattern == ']') || (pattern == patternEnd)) {
		    return 0;
		}
		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
		pattern++;
		if (*pattern == '-') {
		    pattern++;
		    if (pattern == patternEnd) {
			return 0;
		    }
		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)
			    : *pattern);
		    pattern++;
		    if (((startChar <= ch1) && (ch1 <= endChar))
			    || ((endChar <= ch1) && (ch1 <= startChar))) {
			/*
			 * Matches ranges of form [a-z] or [z-a].
			 */







|





|






|







2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
	 * characters separated by "-").
	 */

	if (p == '[') {
	    Tcl_UniChar ch1, startChar, endChar;

	    pattern++;
	    ch1 = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*string) : *string);
	    string++;
	    while (1) {
		if ((*pattern == ']') || (pattern == patternEnd)) {
		    return 0;
		}
		startChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*pattern) : *pattern);
		pattern++;
		if (*pattern == '-') {
		    pattern++;
		    if (pattern == patternEnd) {
			return 0;
		    }
		    endChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*pattern)
			    : *pattern);
		    pattern++;
		    if (((startChar <= ch1) && (ch1 <= endChar))
			    || ((endChar <= ch1) && (ch1 <= startChar))) {
			/*
			 * Matches ranges of form [a-z] or [z-a].
			 */

Changes to generic/tclUtil.c.

2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
		 * character
		 */

		if ((p != '[') && (p != '?') && (p != '\\')) {
		    if (nocase) {
			while (*str) {
			    charLen = TclUtfToUniChar(str, &ch1);
			    if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
				break;
			    }
			    str += charLen;
			}
		    } else {
			/*
			 * There's no point in trying to make this code







|







2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
		 * character
		 */

		if ((p != '[') && (p != '?') && (p != '\\')) {
		    if (nocase) {
			while (*str) {
			    charLen = TclUtfToUniChar(str, &ch1);
			    if (ch2==ch1 || ch2==(Tcl_UniChar)Tcl_UniCharToLower(ch1)) {
				break;
			    }
			    str += charLen;
			}
		    } else {
			/*
			 * There's no point in trying to make this code

Changes to generic/tclZipfs.c.

4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
 *-------------------------------------------------------------------------
 */

int
TclZipfs_AppHook(
    int *argcPtr,		/* Pointer to argc */
#ifdef _WIN32
    TCHAR
#else /* !_WIN32 */
    char
#endif /* _WIN32 */
    ***argvPtr)			/* Pointer to argv */
{
    char *archive;








|







4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
 *-------------------------------------------------------------------------
 */

int
TclZipfs_AppHook(
    int *argcPtr,		/* Pointer to argc */
#ifdef _WIN32
    WCHAR
#else /* !_WIN32 */
    char
#endif /* _WIN32 */
    ***argvPtr)			/* Pointer to argv */
{
    char *archive;

Changes to tests/encoding.test.

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
proc runtests {} {
    variable x

# Some tests require the testencoding command
testConstraint testencoding [llength [info commands testencoding]]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint teststringbytes [llength [info commands teststringbytes]]
testConstraint tip389 [expr {[string length \U010000] == 2}]
testConstraint exec [llength [info commands exec]]
testConstraint testgetencpath [llength [info commands testgetencpath]]

# TclInitEncodingSubsystem is tested by the rest of this file
# TclFinalizeEncodingSubsystem is not currently tested

test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup {







<







32
33
34
35
36
37
38

39
40
41
42
43
44
45
proc runtests {} {
    variable x

# Some tests require the testencoding command
testConstraint testencoding [llength [info commands testencoding]]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint teststringbytes [llength [info commands teststringbytes]]

testConstraint exec [llength [info commands exec]]
testConstraint testgetencpath [llength [info commands testgetencpath]]

# TclInitEncodingSubsystem is tested by the rest of this file
# TclFinalizeEncodingSubsystem is not currently tested

test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup {
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
} 00
test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
    set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]]
    binary scan [teststringbytes $y] H* z
    set z
} c080

test encoding-16.1 {UnicodeToUtfProc} -constraints tip389 -body {
    set val [encoding convertfrom unicode NN]
    list $val [format %x [scan $val %c]]
} -result "\u4e4e 4e4e"
test encoding-16.2 {UnicodeToUtfProc} -constraints tip389 -body {
    set val [encoding convertfrom unicode "\xd8\xd8\xdc\xdc"]
    list $val [format %x [scan $val %c]]
} -result "\U460dc 460dc"

test encoding-17.1 {UtfToUnicodeProc} -constraints tip389 -body {
    encoding convertto unicode "\U460dc"
} -result "\xd8\xd8\xdc\xdc"

test encoding-18.1 {TableToUtfProc} {
} {}

test encoding-19.1 {TableFromUtfProc} {







|



|




|







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
} 00
test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
    set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]]
    binary scan [teststringbytes $y] H* z
    set z
} c080

test encoding-16.1 {UnicodeToUtfProc} -body {
    set val [encoding convertfrom unicode NN]
    list $val [format %x [scan $val %c]]
} -result "\u4e4e 4e4e"
test encoding-16.2 {UnicodeToUtfProc} -body {
    set val [encoding convertfrom unicode "\xd8\xd8\xdc\xdc"]
    list $val [format %x [scan $val %c]]
} -result "\U460dc 460dc"

test encoding-17.1 {UtfToUnicodeProc} -body {
    encoding convertto unicode "\U460dc"
} -result "\xd8\xd8\xdc\xdc"

test encoding-18.1 {TableToUtfProc} {
} {}

test encoding-19.1 {TableFromUtfProc} {

Changes to win/tclWinFile.c.

927
928
929
930
931
932
933

934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
	Tcl_Obj *norm = Tcl_FSGetNormalizedPath(NULL, pathPtr);

	if (norm != NULL) {
	    /*
	     * Match a single file directly.
	     */


	    DWORD attr;
	    WIN32_FILE_ATTRIBUTE_DATA data;
	    const char *str = TclGetString(norm);

	    native = Tcl_FSGetNativePath(pathPtr);

	    if (GetFileAttributesEx(native,
		    GetFileExInfoStandard, &data) != TRUE) {
		return TCL_OK;
	    }
	    attr = data.dwFileAttributes;

	    if (NativeMatchType(WinIsDrive(str,norm->length), attr, native, types)) {
		Tcl_ListObjAppendElement(interp, resultPtr, pathPtr);
	    }
	}
	return TCL_OK;
    } else {
	DWORD attr;
	HANDLE handle;
	WIN32_FIND_DATA data;
	const char *dirName;	/* UTF-8 dir name, later with pattern
				 * appended. */
	size_t dirLength;
	int matchSpecialDots;
	Tcl_DString ds;		/* Native encoding of dir, also used
				 * temporarily for other things. */
	Tcl_DString dsOrig;	/* UTF-8 encoding of dir. */
	Tcl_Obj *fileNamePtr;
	char lastChar;








>


|









|










|







927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
	Tcl_Obj *norm = Tcl_FSGetNormalizedPath(NULL, pathPtr);

	if (norm != NULL) {
	    /*
	     * Match a single file directly.
	     */

	    int len;
	    DWORD attr;
	    WIN32_FILE_ATTRIBUTE_DATA data;
	    const char *str = TclGetStringFromObj(norm, &len);

	    native = Tcl_FSGetNativePath(pathPtr);

	    if (GetFileAttributesEx(native,
		    GetFileExInfoStandard, &data) != TRUE) {
		return TCL_OK;
	    }
	    attr = data.dwFileAttributes;

	    if (NativeMatchType(WinIsDrive(str,len), attr, native, types)) {
		Tcl_ListObjAppendElement(interp, resultPtr, pathPtr);
	    }
	}
	return TCL_OK;
    } else {
	DWORD attr;
	HANDLE handle;
	WIN32_FIND_DATA data;
	const char *dirName;	/* UTF-8 dir name, later with pattern
				 * appended. */
	int dirLength;
	int matchSpecialDots;
	Tcl_DString ds;		/* Native encoding of dir, also used
				 * temporarily for other things. */
	Tcl_DString dsOrig;	/* UTF-8 encoding of dir. */
	Tcl_Obj *fileNamePtr;
	char lastChar;

989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004

	/*
	 * Build up the directory name for searching, including a trailing
	 * directory separator.
	 */

	Tcl_DStringInit(&dsOrig);
	dirName = TclGetString(fileNamePtr);
	dirLength = fileNamePtr->length;
	Tcl_DStringAppend(&dsOrig, dirName, dirLength);

	lastChar = dirName[dirLength -1];
	if ((lastChar != '\\') && (lastChar != '/') && (lastChar != ':')) {
	    TclDStringAppendLiteral(&dsOrig, "/");
	    dirLength++;
	}







|
<







990
991
992
993
994
995
996
997

998
999
1000
1001
1002
1003
1004

	/*
	 * Build up the directory name for searching, including a trailing
	 * directory separator.
	 */

	Tcl_DStringInit(&dsOrig);
	dirName = TclGetStringFromObj(fileNamePtr, &dirLength);

	Tcl_DStringAppend(&dsOrig, dirName, dirLength);

	lastChar = dirName[dirLength -1];
	if ((lastChar != '\\') && (lastChar != '/') && (lastChar != ':')) {
	    TclDStringAppendLiteral(&dsOrig, "/");
	    dirLength++;
	}
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
		Tcl_JoinPath(1, &ptr, bufferPtr);
		rc = 1;
		result = Tcl_DStringValue(bufferPtr);
	    }
	}
	Tcl_DStringFree(&ds);
    } else {
	Tcl_DStringInit(&ds);
	wName = Tcl_UtfToUniCharDString(domain + 1, -1, &ds);
	rc = NetGetDCName(NULL, wName, (LPBYTE *) &wDomain);
	Tcl_DStringFree(&ds);
	nameLen = domain - name;
    }
    if (rc == 0) {
	Tcl_DStringInit(&ds);
	wName = Tcl_UtfToUniCharDString(name, nameLen, &ds);
	while (NetUserGetInfo(wDomain, wName, 1, (LPBYTE *) &uiPtr) != 0) {
	    /*
	     * user does not exists - if domain was not specified,
	     * try again using current domain.
	     */
	    rc = 1;
	    if (domain != NULL) break;
	    /* get current domain */
	    rc = NetGetDCName(NULL, NULL, (LPBYTE *) &wDomain);
	    if (rc != 0) break;
	    domain = INT2PTR(-1); /* repeat once */
	}
	if (rc == 0) {
	    DWORD i, size = MAX_PATH;
	    wHomeDir = uiPtr->usri1_home_dir;
	    if ((wHomeDir != NULL) && (wHomeDir[0] != L'\0')) {
		size = lstrlenW(wHomeDir);
		Tcl_UniCharToUtfDString(wHomeDir, size, bufferPtr);
	    } else {
		/*
		 * User exists but has no home dir. Return
		 * "{GetProfilesDirectory}/<user>".
		 */
		GetProfilesDirectoryW(buf, &size);
		Tcl_UniCharToUtfDString(buf, size-1, bufferPtr);
		Tcl_DStringAppend(bufferPtr, "/", 1);
		Tcl_DStringAppend(bufferPtr, name, nameLen);
	    }
	    result = Tcl_DStringValue(bufferPtr);
	    /* be sure we returns normalized path */
	    for (i = 0; i < size; ++i){
		if (result[i] == '\\') result[i] = '/';
	    }
	    NetApiBufferFree((void *) uiPtr);
	}
	Tcl_DStringFree(&ds);
    }







<
|





<
|

















|






|




|







1460
1461
1462
1463
1464
1465
1466

1467
1468
1469
1470
1471
1472

1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
		Tcl_JoinPath(1, &ptr, bufferPtr);
		rc = 1;
		result = Tcl_DStringValue(bufferPtr);
	    }
	}
	Tcl_DStringFree(&ds);
    } else {

	wName = Tcl_WinUtfToTChar(domain + 1, -1, &ds);
	rc = NetGetDCName(NULL, wName, (LPBYTE *) &wDomain);
	Tcl_DStringFree(&ds);
	nameLen = domain - name;
    }
    if (rc == 0) {

	wName = Tcl_WinUtfToTChar(name, nameLen, &ds);
	while (NetUserGetInfo(wDomain, wName, 1, (LPBYTE *) &uiPtr) != 0) {
	    /*
	     * user does not exists - if domain was not specified,
	     * try again using current domain.
	     */
	    rc = 1;
	    if (domain != NULL) break;
	    /* get current domain */
	    rc = NetGetDCName(NULL, NULL, (LPBYTE *) &wDomain);
	    if (rc != 0) break;
	    domain = INT2PTR(-1); /* repeat once */
	}
	if (rc == 0) {
	    DWORD i, size = MAX_PATH;
	    wHomeDir = uiPtr->usri1_home_dir;
	    if ((wHomeDir != NULL) && (wHomeDir[0] != L'\0')) {
		size = lstrlenW(wHomeDir);
		Tcl_WinTCharToUtf((TCHAR *) wHomeDir, size * sizeof(WCHAR), bufferPtr);
	    } else {
		/*
		 * User exists but has no home dir. Return
		 * "{GetProfilesDirectory}/<user>".
		 */
		GetProfilesDirectoryW(buf, &size);
		Tcl_WinTCharToUtf(buf, (size-1) * sizeof(WCHAR), bufferPtr);
		Tcl_DStringAppend(bufferPtr, "/", 1);
		Tcl_DStringAppend(bufferPtr, name, nameLen);
	    }
	    result = Tcl_DStringValue(bufferPtr);
	    /* be sure we return normalized path */
	    for (i = 0; i < size; ++i){
		if (result[i] == '\\') result[i] = '/';
	    }
	    NetApiBufferFree((void *) uiPtr);
	}
	Tcl_DStringFree(&ds);
    }
2748
2749
2750
2751
2752
2753
2754

2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
		Tcl_DStringLength(&dsNorm), &ds);
	nextCheckpoint = Tcl_DStringLength(&ds);
	if (*lastValidPathEnd != 0) {
	    /*
	     * Not the end of the string.
	     */


	    char *path;
	    Tcl_Obj *tmpPathPtr;

	    tmpPathPtr = Tcl_NewStringObj(Tcl_DStringValue(&ds),
		    nextCheckpoint);
	    Tcl_AppendToObj(tmpPathPtr, lastValidPathEnd, -1);
	    path = TclGetString(tmpPathPtr);
	    Tcl_SetStringObj(pathPtr, path, tmpPathPtr->length);
	    Tcl_DecrRefCount(tmpPathPtr);
	} else {
	    /*
	     * End of string was reached above.
	     */

	    Tcl_SetStringObj(pathPtr, Tcl_DStringValue(&ds), nextCheckpoint);







>






|
|







2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
		Tcl_DStringLength(&dsNorm), &ds);
	nextCheckpoint = Tcl_DStringLength(&ds);
	if (*lastValidPathEnd != 0) {
	    /*
	     * Not the end of the string.
	     */

	    int len;
	    char *path;
	    Tcl_Obj *tmpPathPtr;

	    tmpPathPtr = Tcl_NewStringObj(Tcl_DStringValue(&ds),
		    nextCheckpoint);
	    Tcl_AppendToObj(tmpPathPtr, lastValidPathEnd, -1);
	    path = TclGetStringFromObj(tmpPathPtr, &len);
	    Tcl_SetStringObj(pathPtr, path, len);
	    Tcl_DecrRefCount(tmpPathPtr);
	} else {
	    /*
	     * End of string was reached above.
	     */

	    Tcl_SetStringObj(pathPtr, Tcl_DStringValue(&ds), nextCheckpoint);
2838
2839
2840
2841
2842
2843
2844

2845
2846
2847
2848
2849
2850
2851
2852
2853
	 */
    } else {
	/*
	 * Path of form C:foo/bar, but this only makes sense if the cwd is
	 * also on drive C.
	 */


	const char *drive = TclGetString(useThisCwd);
	size_t cwdLen = useThisCwd->length;
	char drive_cur = path[0];

	if (drive_cur >= 'a') {
	    drive_cur -= ('a' - 'A');
	}
	if (drive[0] == drive_cur) {
	    absolutePath = Tcl_DuplicateObj(useThisCwd);







>
|
|







2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
	 */
    } else {
	/*
	 * Path of form C:foo/bar, but this only makes sense if the cwd is
	 * also on drive C.
	 */

	int cwdLen;
	const char *drive =
		TclGetStringFromObj(useThisCwd, &cwdLen);
	char drive_cur = path[0];

	if (drive_cur >= 'a') {
	    drive_cur -= ('a' - 'A');
	}
	if (drive[0] == drive_cur) {
	    absolutePath = Tcl_DuplicateObj(useThisCwd);