Tcl Source Code

Check-in [af900c390e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:merge novem
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | dgp-refactor
Files: files | file ages | folders
SHA1: af900c390e718c9bf7fb9e757cff3b3e393163ec
User & Date: dgp 2014-12-23 18:52:10
Context
2015-02-26
16:40
merge novem check-in: c0ddc62dbe user: dgp tags: dgp-refactor
2014-12-23
18:52
merge novem check-in: af900c390e user: dgp tags: dgp-refactor
18:44
merge trunk check-in: 526ce54517 user: dgp tags: novem
15:37
merge novem check-in: 37e28f586b user: dgp tags: dgp-refactor
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tcl.h.

1929
1930
1931
1932
1933
1934
1935















1936
1937
1938
1939
1940


1941
1942
1943
1944
1945
1946
1947
 *				immediately upon encountering an invalid byte
 *				sequence or a source character that has no
 *				mapping in the target encoding. If clear, then
 *				the converter will skip the problem,
 *				substituting one or more "close" characters in
 *				the destination buffer and then continue to
 *				convert the source.















 */

#define TCL_ENCODING_START		0x01
#define TCL_ENCODING_END		0x02
#define TCL_ENCODING_STOPONERROR	0x04



/*
 * The following definitions are the error codes returned by the conversion
 * routines:
 *
 * TCL_OK -			All characters were converted.
 * TCL_CONVERT_NOSPACE -	The output buffer would not have been large







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
>







1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
 *				immediately upon encountering an invalid byte
 *				sequence or a source character that has no
 *				mapping in the target encoding. If clear, then
 *				the converter will skip the problem,
 *				substituting one or more "close" characters in
 *				the destination buffer and then continue to
 *				convert the source.
 * TCL_ENCODING_NO_TERMINATE - 	If set, Tcl_ExternalToUtf will not append a
 *				terminating NUL byte.  Knowing that it will
 *				not need space to do so, it will fill all
 *				dstLen bytes with encoded UTF-8 content, as
 *				other circumstances permit.  If clear, the
 *				default behavior is to reserve a byte in
 *				the dst space for NUL termination, and to
 *				append the NUL byte.
 * TCL_ENCODING_CHAR_LIMIT -	If set and dstCharsPtr is not NULL, then
 *				Tcl_ExternalToUtf takes the initial value
 *				of *dstCharsPtr is taken as a limit of the
 *				maximum number of chars to produce in the
 *				encoded UTF-8 content.  Otherwise, the 
 *				number of chars produced is controlled only
 *				by other limiting factors.
 */

#define TCL_ENCODING_START		0x01
#define TCL_ENCODING_END		0x02
#define TCL_ENCODING_STOPONERROR	0x04
#define TCL_ENCODING_NO_TERMINATE	0x08
#define TCL_ENCODING_CHAR_LIMIT		0x10

/*
 * The following definitions are the error codes returned by the conversion
 * routines:
 *
 * TCL_OK -			All characters were converted.
 * TCL_CONVERT_NOSPACE -	The output buffer would not have been large

Changes to generic/tclEncoding.c.

1140
1141
1142
1143
1144
1145
1146
1147



1148
1149
1150
1151
1152
1153
1154
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const Encoding *encodingPtr;
    int result, srcRead, dstWrote, dstChars;



    Tcl_EncodingState state;

    if (encoding == NULL) {
	encoding = systemEncoding;
    }
    encodingPtr = (Encoding *) encoding;








|
>
>
>







1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const Encoding *encodingPtr;
    int result, srcRead, dstWrote, dstChars = 0;
    int noTerminate = flags & TCL_ENCODING_NO_TERMINATE;
    int charLimited = (flags & TCL_ENCODING_CHAR_LIMIT) && dstCharsPtr;
    int maxChars = INT_MAX;
    Tcl_EncodingState state;

    if (encoding == NULL) {
	encoding = systemEncoding;
    }
    encodingPtr = (Encoding *) encoding;

1165
1166
1167
1168
1169
1170
1171



1172
1173

1174
1175
1176
1177

1178
1179
1180





1181
1182
1183










1184

1185
1186
1187
1188
1189
1190
1191
	srcReadPtr = &srcRead;
    }
    if (dstWrotePtr == NULL) {
	dstWrotePtr = &dstWrote;
    }
    if (dstCharsPtr == NULL) {
	dstCharsPtr = &dstChars;



    }


    /*
     * If there are any null characters in the middle of the buffer, they will
     * converted to the UTF-8 null character (\xC080). To get the actual \0 at
     * the end of the destination buffer, we need to append it manually.

     */

    dstLen--;





    result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
	    dstCharsPtr);










    dst[*dstWrotePtr] = '\0';


    return result;
}

/*
 *-------------------------------------------------------------------------
 *







>
>
>


>
|
|
|
|
>
|

|
>
>
>
>
>
|
|
|
>
>
>
>
>
>
>
>
>
>
|
>







1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
	srcReadPtr = &srcRead;
    }
    if (dstWrotePtr == NULL) {
	dstWrotePtr = &dstWrote;
    }
    if (dstCharsPtr == NULL) {
	dstCharsPtr = &dstChars;
	flags &= ~TCL_ENCODING_CHAR_LIMIT;
    } else if (charLimited) {
	maxChars = *dstCharsPtr;
    }

    if (!noTerminate) {
	/*
	 * If there are any null characters in the middle of the buffer,
	 * they will converted to the UTF-8 null character (\xC080). To get
	 * the actual \0 at the end of the destination buffer, we need to
	 * append it manually.  First make room for it...
	 */

	dstLen--;
    }
    do {
	int savedFlags = flags;
	Tcl_EncodingState savedState = *statePtr;

	result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
		flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
		dstCharsPtr);
	if (*dstCharsPtr <= maxChars) {
	    break;
	}
	dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX;
	flags = savedFlags;
	*statePtr = savedState;
    } while (1);
    if (!noTerminate) {
	/* ...and then append it */

	dst[*dstWrotePtr] = '\0';
    }

    return result;
}

/*
 *-------------------------------------------------------------------------
 *
2040
2041
2042
2043
2044
2045
2046



2047
2048
2049
2050
2051
2052
2053
{
    int result;

    result = TCL_OK;
    dstLen -= TCL_UTF_MAX - 1;
    if (dstLen < 0) {
	dstLen = 0;



    }
    if (srcLen > dstLen) {
	srcLen = dstLen;
	result = TCL_CONVERT_NOSPACE;
    }

    *srcReadPtr = srcLen;







>
>
>







2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
{
    int result;

    result = TCL_OK;
    dstLen -= TCL_UTF_MAX - 1;
    if (dstLen < 0) {
	dstLen = 0;
    }
    if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) {
	srcLen = *dstCharsPtr;
    }
    if (srcLen > dstLen) {
	srcLen = dstLen;
	result = TCL_CONVERT_NOSPACE;
    }

    *srcReadPtr = srcLen;
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218



2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
				 * output buffer. */
    int pureNullMode)		/* Convert embedded nulls from internal
				 * representation to real null-bytes or vice
				 * versa. */
{
    const char *srcStart, *srcEnd, *srcClose;
    const char *dstStart, *dstEnd;
    int result, numChars;
    Tcl_UniChar ch;

    result = TCL_OK;

    srcStart = src;
    srcEnd = src + srcLen;
    srcClose = srcEnd;
    if ((flags & TCL_ENCODING_END) == 0) {
	srcClose -= TCL_UTF_MAX;
    }




    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd; numChars++) {
	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
	    /*
	     * If there is more string to follow, this will ensure that the
	     * last UTF-8 character in the source buffer hasn't been cut off.
	     */

	    result = TCL_CONVERT_MULTIBYTE;







|










>
>
>




|







2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
				 * output buffer. */
    int pureNullMode)		/* Convert embedded nulls from internal
				 * representation to real null-bytes or vice
				 * versa. */
{
    const char *srcStart, *srcEnd, *srcClose;
    const char *dstStart, *dstEnd;
    int result, numChars, charLimit = INT_MAX;
    Tcl_UniChar ch;

    result = TCL_OK;

    srcStart = src;
    srcEnd = src + srcLen;
    srcClose = srcEnd;
    if ((flags & TCL_ENCODING_END) == 0) {
	srcClose -= TCL_UTF_MAX;
    }
    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
	    /*
	     * If there is more string to follow, this will ensure that the
	     * last UTF-8 character in the source buffer hasn't been cut off.
	     */

	    result = TCL_CONVERT_MULTIBYTE;
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321



2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars;
    Tcl_UniChar ch;




    result = TCL_OK;
    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen /= sizeof(Tcl_UniChar);
	srcLen *= sizeof(Tcl_UniChar);
    }

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with







|


>
>
>













|







2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars, charLimit = INT_MAX;
    Tcl_UniChar ch;

    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    result = TCL_OK;
    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen /= sizeof(Tcl_UniChar);
	srcLen *= sizeof(Tcl_UniChar);
    }

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508



2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart, *prefixBytes;
    int result, byte, numChars;
    Tcl_UniChar ch;
    const unsigned short *const *toUnicode;
    const unsigned short *pageZero;
    TableEncodingData *dataPtr = clientData;




    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    toUnicode = (const unsigned short *const *) dataPtr->toUnicode;
    prefixBytes = dataPtr->prefixBytes;
    pageZero = toUnicode[0];

    result = TCL_OK;
    for (numChars = 0; src < srcEnd; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	byte = *((unsigned char *) src);
	if (prefixBytes[byte]) {
	    src++;







|





>
>
>











|







2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart, *prefixBytes;
    int result, byte, numChars, charLimit = INT_MAX;
    Tcl_UniChar ch;
    const unsigned short *const *toUnicode;
    const unsigned short *pageZero;
    TableEncodingData *dataPtr = clientData;

    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    toUnicode = (const unsigned short *const *) dataPtr->toUnicode;
    prefixBytes = dataPtr->prefixBytes;
    pageZero = toUnicode[0];

    result = TCL_OK;
    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	byte = *((unsigned char *) src);
	if (prefixBytes[byte]) {
	    src++;
2727
2728
2729
2730
2731
2732
2733
2734
2735



2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars;




    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    result = TCL_OK;
    for (numChars = 0; src < srcEnd; numChars++) {
	Tcl_UniChar ch;

	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	ch = (Tcl_UniChar) *((unsigned char *) src);







|

>
>
>







|







2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars, charLimit = INT_MAX;

    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    result = TCL_OK;
    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	Tcl_UniChar ch;

	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	ch = (Tcl_UniChar) *((unsigned char *) src);
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961



2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    EscapeEncodingData *dataPtr = clientData;
    const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd;
    const unsigned short *const *tableToUnicode;
    const Encoding *encodingPtr;
    int state, result, numChars;
    const char *dstStart, *dstEnd;




    result = TCL_OK;
    tablePrefixBytes = NULL;	/* lint. */
    tableToUnicode = NULL;	/* lint. */
    prefixBytes = dataPtr->prefixBytes;
    encodingPtr = NULL;

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    state = PTR2INT(*statePtr);
    if (flags & TCL_ENCODING_START) {
	state = 0;
    }

    for (numChars = 0; src < srcEnd; ) {
	int byte, hi, lo, ch;

	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	byte = *((unsigned char *) src);







|


>
>
>

















|







2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    EscapeEncodingData *dataPtr = clientData;
    const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd;
    const unsigned short *const *tableToUnicode;
    const Encoding *encodingPtr;
    int state, result, numChars, charLimit = INT_MAX;
    const char *dstStart, *dstEnd;

    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    result = TCL_OK;
    tablePrefixBytes = NULL;	/* lint. */
    tableToUnicode = NULL;	/* lint. */
    prefixBytes = dataPtr->prefixBytes;
    encodingPtr = NULL;

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    state = PTR2INT(*statePtr);
    if (flags & TCL_ENCODING_START) {
	state = 0;
    }

    for (numChars = 0; src < srcEnd && numChars <= charLimit; ) {
	int byte, hi, lo, ch;

	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	byte = *((unsigned char *) src);

Changes to generic/tclIO.c.

4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586

4587
4588
4589
4590
4591
4592
4593
4594
4595
	    if (GotFlag(statePtr, INPUT_SAW_CR)) {
		ResetFlag(statePtr, INPUT_SAW_CR);
		if ((eol < dstEnd) && (*eol == '\n')) {
		    /*
		     * Skip the raw bytes that make up the '\n'.
		     */

		    char tmp[1 + TCL_UTF_MAX];
		    int rawRead;

		    bufPtr = gs.bufPtr;
		    Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr),
			    gs.rawRead, statePtr->inputEncodingFlags,

			    &gs.state, tmp, 1 + TCL_UTF_MAX, &rawRead, NULL,
			    NULL);
		    bufPtr->nextRemoved += rawRead;
		    gs.rawRead -= rawRead;
		    gs.bytesWrote--;
		    gs.charsWrote--;
		    memmove(dst, dst + 1, (size_t) (dstEnd - dst));
		    dstEnd--;
		}







|




|
>
|
<







4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588

4589
4590
4591
4592
4593
4594
4595
	    if (GotFlag(statePtr, INPUT_SAW_CR)) {
		ResetFlag(statePtr, INPUT_SAW_CR);
		if ((eol < dstEnd) && (*eol == '\n')) {
		    /*
		     * Skip the raw bytes that make up the '\n'.
		     */

		    char tmp[TCL_UTF_MAX];
		    int rawRead;

		    bufPtr = gs.bufPtr;
		    Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr),
			    gs.rawRead, statePtr->inputEncodingFlags
				| TCL_ENCODING_NO_TERMINATE, &gs.state, tmp,
			    TCL_UTF_MAX, &rawRead, NULL, NULL);

		    bufPtr->nextRemoved += rawRead;
		    gs.rawRead -= rawRead;
		    gs.bytesWrote--;
		    gs.charsWrote--;
		    memmove(dst, dst + 1, (size_t) (dstEnd - dst));
		    dstEnd--;
		}
4682
4683
4684
4685
4686
4687
4688

4689
4690
4691
4692
4693
4694
4695
4696
4697

    bufPtr = gs.bufPtr;
    if (bufPtr == NULL) {
	Tcl_Panic("Tcl_GetsObj: gotEOL reached with bufPtr==NULL");
    }
    statePtr->inputEncodingState = gs.state;
    Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead,

	    statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst,
	    eol - dst + skip + TCL_UTF_MAX, &gs.rawRead, NULL,
	    &gs.charsWrote);
    bufPtr->nextRemoved += gs.rawRead;

    /*
     * Recycle all the emptied buffers.
     */








>
|
|







4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698

    bufPtr = gs.bufPtr;
    if (bufPtr == NULL) {
	Tcl_Panic("Tcl_GetsObj: gotEOL reached with bufPtr==NULL");
    }
    statePtr->inputEncodingState = gs.state;
    Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead,
	    statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
	    &statePtr->inputEncodingState, dst,
	    eol - dst + skip + TCL_UTF_MAX - 1, &gs.rawRead, NULL,
	    &gs.charsWrote);
    bufPtr->nextRemoved += gs.rawRead;

    /*
     * Recycle all the emptied buffers.
     */

5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
	}
	spaceLeft = length - offset;
	dst = objPtr->bytes + offset;
	*gsPtr->dstPtr = dst;
    }
    gsPtr->state = statePtr->inputEncodingState;
    result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
	    statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
	    dst, spaceLeft+1, &gsPtr->rawRead, &gsPtr->bytesWrote,
	    &gsPtr->charsWrote);

    /*
     * Make sure that if we go through 'gets', that we reset the
     * TCL_ENCODING_START flag still. [Bug #523988]
     */

    statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;







|
|
|







5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
	}
	spaceLeft = length - offset;
	dst = objPtr->bytes + offset;
	*gsPtr->dstPtr = dst;
    }
    gsPtr->state = statePtr->inputEncodingState;
    result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
	    statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
	    &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
	    &gsPtr->bytesWrote, &gsPtr->charsWrote);

    /*
     * Make sure that if we go through 'gets', that we reset the
     * TCL_ENCODING_START flag still. [Bug #523988]
     */

    statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980






5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
    Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding
	    : GetBinaryEncoding();
    Tcl_EncodingState savedState = statePtr->inputEncodingState;
    ChannelBuffer *bufPtr = statePtr->inQueueHead;
    int savedIEFlags = statePtr->inputEncodingFlags;
    int savedFlags = statePtr->flags;
    char *dst, *src = RemovePoint(bufPtr);
    int dstLimit, numBytes, srcLen = BytesLeft(bufPtr);

    /*
     * One src byte can yield at most one character.  So when the
     * number of src bytes we plan to read is less than the limit on
     * character count to be read, clearly we will remain within that
     * limit, and we can use the value of "srcLen" as a tighter limit
     * for sizing receiving buffers.
     */

    int toRead = ((charsToRead<0)||(charsToRead > srcLen)) ? srcLen : charsToRead;

    /*
     * 'factor' is how much we guess that the bytes in the source buffer will
     * expand when converted to UTF-8 chars. This guess comes from analyzing
     * how many characters were produced by the previous pass.
     */
    
    int factor = *factorPtr;
    int dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;

    (void) TclGetStringFromObj(objPtr, &numBytes);
    Tcl_AppendToObj(objPtr, NULL, dstNeeded);
    if (toRead == srcLen) {
	unsigned int size;
	dst = TclGetStringStorage(objPtr, &size) + numBytes;
	dstNeeded = size - numBytes;
    } else {
	dst = TclGetString(objPtr) + numBytes;
    }

    /*
     * This routine is burdened with satisfying several constraints.
     * It cannot append more than 'charsToRead` chars onto objPtr.
     * This is measured after encoding and translation transformations
     * are completed.  There is no precise number of src bytes that can
     * be associated with the limit.  Yet, when we are done, we must know
     * precisely the number of src bytes that were consumed to produce
     * the appended chars, so that all subsequent bytes are left in
     * the buffers for future read operations.
     *
     * The consequence is that we have no choice but to implement a
     * "trial and error" approach, where in general we may need to
     * perform transformations and copies multiple times to achieve
     * a consistent set of results.  This takes the shape of a loop.
     */

    dstLimit = dstNeeded + 1;
    while (1) {
	int dstDecoded, dstRead, dstWrote, srcRead, numChars;







	/*
	 * Perform the encoding transformation.  Read no more than
	 * srcLen bytes, write no more than dstLimit bytes.
	 */

	int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		statePtr->inputEncodingFlags & (bufPtr->nextPtr
		? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
		dst, dstLimit, &srcRead, &dstDecoded, &numChars);

	/*
	 * Perform the translation transformation in place.  Read no more
	 * than the dstDecoded bytes the encoding transformation actually
	 * produced.  Capture the number of bytes written in dstWrote.
	 * Capture the number of bytes actually consumed in dstRead.
	 */







|


















|


|



|




















<

|
>
>
>
>
>
>






|
|
|
|







5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978

5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
    Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding
	    : GetBinaryEncoding();
    Tcl_EncodingState savedState = statePtr->inputEncodingState;
    ChannelBuffer *bufPtr = statePtr->inQueueHead;
    int savedIEFlags = statePtr->inputEncodingFlags;
    int savedFlags = statePtr->flags;
    char *dst, *src = RemovePoint(bufPtr);
    int numBytes, srcLen = BytesLeft(bufPtr);

    /*
     * One src byte can yield at most one character.  So when the
     * number of src bytes we plan to read is less than the limit on
     * character count to be read, clearly we will remain within that
     * limit, and we can use the value of "srcLen" as a tighter limit
     * for sizing receiving buffers.
     */

    int toRead = ((charsToRead<0)||(charsToRead > srcLen)) ? srcLen : charsToRead;

    /*
     * 'factor' is how much we guess that the bytes in the source buffer will
     * expand when converted to UTF-8 chars. This guess comes from analyzing
     * how many characters were produced by the previous pass.
     */
    
    int factor = *factorPtr;
    int dstLimit = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;

    (void) TclGetStringFromObj(objPtr, &numBytes);
    Tcl_AppendToObj(objPtr, NULL, dstLimit);
    if (toRead == srcLen) {
	unsigned int size;
	dst = TclGetStringStorage(objPtr, &size) + numBytes;
	dstLimit = size - numBytes;
    } else {
	dst = TclGetString(objPtr) + numBytes;
    }

    /*
     * This routine is burdened with satisfying several constraints.
     * It cannot append more than 'charsToRead` chars onto objPtr.
     * This is measured after encoding and translation transformations
     * are completed.  There is no precise number of src bytes that can
     * be associated with the limit.  Yet, when we are done, we must know
     * precisely the number of src bytes that were consumed to produce
     * the appended chars, so that all subsequent bytes are left in
     * the buffers for future read operations.
     *
     * The consequence is that we have no choice but to implement a
     * "trial and error" approach, where in general we may need to
     * perform transformations and copies multiple times to achieve
     * a consistent set of results.  This takes the shape of a loop.
     */


    while (1) {
	int dstDecoded, dstRead, dstWrote, srcRead, numChars, code;
	int flags = statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE;
	
	if (charsToRead > 0) {
	    flags |= TCL_ENCODING_CHAR_LIMIT;
	    numChars = charsToRead;
	}

	/*
	 * Perform the encoding transformation.  Read no more than
	 * srcLen bytes, write no more than dstLimit bytes.
	 */

	code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		flags & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
		&statePtr->inputEncodingState, dst, dstLimit, &srcRead,
		&dstDecoded, &numChars);

	/*
	 * Perform the translation transformation in place.  Read no more
	 * than the dstDecoded bytes the encoding transformation actually
	 * produced.  Capture the number of bytes written in dstWrote.
	 * Capture the number of bytes actually consumed in dstRead.
	 */
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
		    /*
		     * There are chars leading the buffer before the eof
		     * char.  Adjust the dstLimit so we go back and read
		     * only those and do not encounter the eof char this
		     * time.
		     */

		    dstLimit = dstRead + TCL_UTF_MAX;
		    statePtr->flags = savedFlags;
		    statePtr->inputEncodingFlags = savedIEFlags;
		    statePtr->inputEncodingState = savedState;
		    continue;
		}
	    }








|







6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
		    /*
		     * There are chars leading the buffer before the eof
		     * char.  Adjust the dstLimit so we go back and read
		     * only those and do not encounter the eof char this
		     * time.
		     */

		    dstLimit = dstRead - 1 + TCL_UTF_MAX;
		    statePtr->flags = savedFlags;
		    statePtr->inputEncodingFlags = savedIEFlags;
		    statePtr->inputEncodingState = savedState;
		    continue;
		}
	    }

6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108

6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
		/*
		 * There are chars we can read before we hit the bare cr.
		 * Go back with a smaller dstLimit so we get them in the
		 * next pass, compute a matching srcRead, and don't end
		 * up back here in this call.
		 */

		dstLimit = dstRead + TCL_UTF_MAX;
		statePtr->flags = savedFlags;
		statePtr->inputEncodingFlags = savedIEFlags;
		statePtr->inputEncodingState = savedState;
		continue;
	    }

	    assert(dstWrote == 0);
	    assert(dstRead == 0);

	    /*
	     * We decoded only the bare cr, and we cannot read a
	     * translated char from that alone.  We have to know what's
	     * next.  So why do we only have the one decoded char?
	     */

	    if (code != TCL_OK) {
		char buffer[TCL_UTF_MAX + 2];
		int read, decoded, count;

		/* 
		 * Didn't get everything the buffer could offer
		 */

		statePtr->flags = savedFlags;
		statePtr->inputEncodingFlags = savedIEFlags;
		statePtr->inputEncodingState = savedState;

		Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		statePtr->inputEncodingFlags & (bufPtr->nextPtr

		? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
		buffer, TCL_UTF_MAX + 2, &read, &decoded, &count);

		if (count == 2) {
		    if (buffer[1] == '\n') {
			/* \r\n translate to \n */
			dst[0] = '\n';
			bufPtr->nextRemoved += read;
		    } else {
			dst[0] = '\r';
			bufPtr->nextRemoved += srcRead;
		    }

		    dst[1] = '\0';
		    statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;

		    Tcl_SetObjLength(objPtr, numBytes + 1);
		    return 1;
		}

	    } else if (statePtr->flags & CHANNEL_EOF) {







|
















|











|
>
|
|











<







6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128

6129
6130
6131
6132
6133
6134
6135
		/*
		 * There are chars we can read before we hit the bare cr.
		 * Go back with a smaller dstLimit so we get them in the
		 * next pass, compute a matching srcRead, and don't end
		 * up back here in this call.
		 */

		dstLimit = dstRead - 1 + TCL_UTF_MAX;
		statePtr->flags = savedFlags;
		statePtr->inputEncodingFlags = savedIEFlags;
		statePtr->inputEncodingState = savedState;
		continue;
	    }

	    assert(dstWrote == 0);
	    assert(dstRead == 0);

	    /*
	     * We decoded only the bare cr, and we cannot read a
	     * translated char from that alone.  We have to know what's
	     * next.  So why do we only have the one decoded char?
	     */

	    if (code != TCL_OK) {
		char buffer[TCL_UTF_MAX + 1];
		int read, decoded, count;

		/* 
		 * Didn't get everything the buffer could offer
		 */

		statePtr->flags = savedFlags;
		statePtr->inputEncodingFlags = savedIEFlags;
		statePtr->inputEncodingState = savedState;

		Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		(statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
		& (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
		&statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1,
		&read, &decoded, &count);

		if (count == 2) {
		    if (buffer[1] == '\n') {
			/* \r\n translate to \n */
			dst[0] = '\n';
			bufPtr->nextRemoved += read;
		    } else {
			dst[0] = '\r';
			bufPtr->nextRemoved += srcRead;
		    }


		    statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;

		    Tcl_SetObjLength(objPtr, numBytes + 1);
		    return 1;
		}

	    } else if (statePtr->flags & CHANNEL_EOF) {
6156
6157
6158
6159
6160
6161
6162


6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
	 */

	numChars -= (dstRead - dstWrote);

	if (charsToRead > 0 && numChars > charsToRead) {

	    /* 


	     * We read more chars than allowed.  Reset limits to
	     * prevent that and try again.  Don't forget the extra
	     * padding of TCL_UTF_MAX bytes demanded by the
	     * Tcl_ExternalToUtf() call!
	     */

	    dstLimit = Tcl_UtfAtIndex(dst, charsToRead) + TCL_UTF_MAX - dst;
	    statePtr->flags = savedFlags;
	    statePtr->inputEncodingFlags = savedIEFlags;
	    statePtr->inputEncodingState = savedState;
	    continue;
	}

	if (dstWrote == 0) {







>
>






|







6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
	 */

	numChars -= (dstRead - dstWrote);

	if (charsToRead > 0 && numChars > charsToRead) {

	    /* 
	     * TODO: This cannot happen anymore.
	     *
	     * We read more chars than allowed.  Reset limits to
	     * prevent that and try again.  Don't forget the extra
	     * padding of TCL_UTF_MAX bytes demanded by the
	     * Tcl_ExternalToUtf() call!
	     */

	    dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - 1 + TCL_UTF_MAX - dst;
	    statePtr->flags = savedFlags;
	    statePtr->inputEncodingFlags = savedIEFlags;
	    statePtr->inputEncodingState = savedState;
	    continue;
	}

	if (dstWrote == 0) {