Tcl Source Code

Check-in [f0adfe7dac]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:(partial) fix for 00a27923ee: text/entry dysfunctional when pasting an emoji on MacOSX. Don't handle incoming valid 4-byte UTF-8 characters as invalid byte sequences (since they aren't), but as being the Unicode replacement character.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bug-00a27923ee
Files: files | file ages | folders
SHA3-256: f0adfe7dacbc6c229043189a4508f8d6c2199ce27a111ddee74430f0ef00b069
User & Date: jan.nijtmans 2018-01-09 11:15:14
Context
2018-01-10
08:25
Fix 00a27923ee: (Tcl part, re... check-in: 8481a52495 user: jan.nijtmans tags: core-8-branch
2018-01-09
11:15
(partial) fix for 00a27923ee:... Closed-Leaf check-in: f0adfe7dac user: jan.nijtmans tags: bug-00a27923ee
2018-01-04
13:00
(cherry-pick): Use http 2 instead of http 1 for Safe Base testing. check-in: 116dad3ca7 user: jan.nijtmans tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclUtf.c.

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
#if TCL_UTF_MAX > 3
    4,4,4,4,4,4,4,4,
#else
    1,1,1,1,1,1,1,1,
#endif
    1,1,1,1,1,1,1,1
};

/*
 * Functions used only in this module.
 */








<

<
<
<







64
65
66
67
68
69
70

71



72
73
74
75
76
77
78
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,

    4,4,4,4,4,4,4,4,



    1,1,1,1,1,1,1,1
};

/*
 * Functions used only in this module.
 */

330
331
332
333
334
335
336
337
338
339
340
341
342
343










344
345
346
347
348
349
350
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }
#if TCL_UTF_MAX > 3
    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX == 4










	    Tcl_UniChar surrogate;

	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
	    surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
	    if (byte & 0x100000) {
		/* out of range, < 0x10000 or > 0x10ffff */







<





|
>
>
>
>
>
>
>
>
>
>







326
327
328
329
330
331
332

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }

    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX == 3
	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
	    if (byte & 0x100000) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce replacement character, and advance source pointer */
		*chPtr = (Tcl_UniChar) 0xFFFD;
		return 4;
	    }
#elif TCL_UTF_MAX == 4
	    Tcl_UniChar surrogate;

	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
	    surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
	    if (byte & 0x100000) {
		/* out of range, < 0x10000 or > 0x10ffff */
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
	}

	/*
	 * A four-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }
#endif

    *chPtr = (Tcl_UniChar) byte;
    return 1;
}

/*
 *---------------------------------------------------------------------------







<







372
373
374
375
376
377
378

379
380
381
382
383
384
385
	}

	/*
	 * A four-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }


    *chPtr = (Tcl_UniChar) byte;
    return 1;
}

/*
 *---------------------------------------------------------------------------
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
    if (length < 0) {
	while (*src != '\0') {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (i < 0) i = INT_MAX; /* Bug [2738427] */
    } else {
	register const char *endPtr = src + length - TCL_UTF_MAX;

	while (src < endPtr) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	endPtr += TCL_UTF_MAX;
	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (src < endPtr) {
	    i += endPtr - src;
	}







|





|







505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
    if (length < 0) {
	while (*src != '\0') {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (i < 0) i = INT_MAX; /* Bug [2738427] */
    } else {
	register const char *endPtr = src + length - 4;

	while (src < endPtr) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	endPtr += 4;
	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (src < endPtr) {
	    i += endPtr - src;
	}
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
    const char *start)		/* Pointer to the beginning of the string, to
				 * avoid going backwards too far. */
{
    const char *look;
    int i, byte;

    look = --src;
    for (i = 0; i < TCL_UTF_MAX; i++) {
	if (look < start) {
	    if (src < start) {
		src = start;
	    }
	    break;
	}
	byte = *((unsigned char *) look);







|







683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
    const char *start)		/* Pointer to the beginning of the string, to
				 * avoid going backwards too far. */
{
    const char *look;
    int i, byte;

    look = --src;
    for (i = 0; i < 4; i++) {
	if (look < start) {
	    if (src < start) {
		src = start;
	    }
	    break;
	}
	byte = *((unsigned char *) look);