Tcl Source Code

Check-in [39ae4108bf]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:fix tests utf-2.8 and utf-2.9
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | tip-389-impl
Files: files | file ages | folders
SHA1: 39ae4108bf48367379a0cbd35fdf767f92af72e8
User & Date: jan.nijtmans 2011-08-24 08:32:39
Context
2011-08-25
12:11
Merge to feature branch check-in: 4ddaab72c4 user: jan.nijtmans tags: tip-389-impl
2011-08-24
08:32
fix tests utf-2.8 and utf-2.9 check-in: 39ae4108bf user: jan.nijtmans tags: tip-389-impl
07:50
Upcoming TIP implementation: Full support for Unicode 6.0 check-in: 5721cf9ae6 user: jan.nijtmans tags: tip-389-impl
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclUtf.c.

560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580




581
582
583
584
585
586
587
588
589
590
591



592
593
594
595
596
597
598
int
Tcl_NumUtfChars(
    register const char *src,	/* The UTF-8 string to measure. */
    int length)			/* The length of the string in bytes, or -1
				 * for strlen(string). */
{
    Tcl_UniChar ch = 0;
    register Tcl_UniChar *chPtr = &ch;
    register int i;

    /*
     * The separate implementations are faster.
     *
     * Since this is a time-sensitive function, we also do the check for the
     * single-byte char case specially.
     */

    i = 0;
    if (length < 0) {
	while (*src != '\0') {
	    src += TclUtfToUniChar(src, chPtr);




	    i++;
	}
    } else {
	register int n;

	while (length > 0) {
	    if (UCHAR(*src) < 0xC0) {
		length--;
		src++;
	    } else {
		n = Tcl_UtfToUniChar(src, chPtr);



		length -= n;
		src += n;
	    }
	    i++;
	}
    }
    return i;







<
|











|
>
>
>
>



<
<





|
>
>
>







560
561
562
563
564
565
566

567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586


587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
int
Tcl_NumUtfChars(
    register const char *src,	/* The UTF-8 string to measure. */
    int length)			/* The length of the string in bytes, or -1
				 * for strlen(string). */
{
    Tcl_UniChar ch = 0;

    register int i, n;

    /*
     * The separate implementations are faster.
     *
     * Since this is a time-sensitive function, we also do the check for the
     * single-byte char case specially.
     */

    i = 0;
    if (length < 0) {
	while (*src != '\0') {
	    n = TclUtfToUniChar(src, &ch);
	    if (!n) {
	        n = Tcl_UtfToUniChar(src, &ch);
	    }
	    src += n;
	    i++;
	}
    } else {


	while (length > 0) {
	    if (UCHAR(*src) < 0xC0) {
		length--;
		src++;
	    } else {
		n = Tcl_UtfToUniChar(src, &ch);
		if (!n) {
		    n = Tcl_UtfToUniChar(src, &ch);
		}
		length -= n;
		src += n;
	    }
	    i++;
	}
    }
    return i;
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
    register const char *src,	/* The UTF-8 string. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;
    int len;

    while (index > 0) {
    index--;
	len = TclUtfToUniChar(src, &ch);
	if (!len) {
	    len = TclUtfToUniChar(src, &ch);
	}
	src += len;
    }
    return src;







|







823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
    register const char *src,	/* The UTF-8 string. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;
    int len;

    while (index > 0) {
	index--;
	len = TclUtfToUniChar(src, &ch);
	if (!len) {
	    len = TclUtfToUniChar(src, &ch);
	}
	src += len;
    }
    return src;