Tcl Source Code

Check-in [8cc7cdfbd6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[3613609]: Replace strcasecmp() with UTF-8-aware version.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-5-branch
Files: files | file ages | folders
SHA1: 8cc7cdfbd6122482fddb9317b5410cd4f9372ee6
User & Date: dkf 2013-05-22 12:59:45
Context
2013-05-22
13:14
silence compiler warning check-in: dcd61515be user: dgp tags: core-8-5-branch
13:07
[3613609]: Replace strcasecmp() with UTF-8-aware version. check-in: 89f027f118 user: dkf tags: trunk
12:59
[3613609]: Replace strcasecmp() with UTF-8-aware version. check-in: 8cc7cdfbd6 user: dkf tags: core-8-5-branch
12:55
Fixed the weird edge case. Closed-Leaf check-in: 93dd8bb33b user: dkf tags: bug-3613609
2013-05-19
20:26
Don't #define VOID on VxWorks, as it is already typdef'd to void. Eliminate possibly conflicting LOC... check-in: 157acf6411 user: jan.nijtmans tags: core-8-5-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ChangeLog.







1
2
3
4
5
6
7






2013-05-19  Jan Nijtmans  <[email protected]>

	* unix/tcl.m4:     Fix for FreeBSD, and remove support for older
	* unix/configure:  FreeBSD versions. Patch by Pietro Cerutti.

2013-05-16  Jan Nijtmans  <[email protected]>

>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
2013-05-22  Donal K. Fellows  <[email protected]>

	* generic/tclUtf.c (TclUtfCasecmp): [Bug 3613609]: Replace problematic
	uses of strcasecmp with a proper UTF-8-aware version. Affects both
	[lsearch -nocase] and [lsort -nocase].

2013-05-19  Jan Nijtmans  <[email protected]>

	* unix/tcl.m4:     Fix for FreeBSD, and remove support for older
	* unix/configure:  FreeBSD versions. Patch by Pietro Cerutti.

2013-05-16  Jan Nijtmans  <[email protected]>

Changes to generic/tclCmdIL.c.

2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
	case LSEARCH_INLINE:		/* -inline */
	    inlineReturn = 1;
	    break;
	case LSEARCH_INTEGER:		/* -integer */
	    dataType = INTEGER;
	    break;
	case LSEARCH_NOCASE:		/* -nocase */
	    strCmpFn = strcasecmp;
	    noCase = 1;
	    break;
	case LSEARCH_NOT:		/* -not */
	    negatedMatch = 1;
	    break;
	case LSEARCH_REAL:		/* -real */
	    dataType = REAL;







|







2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
	case LSEARCH_INLINE:		/* -inline */
	    inlineReturn = 1;
	    break;
	case LSEARCH_INTEGER:		/* -integer */
	    dataType = INTEGER;
	    break;
	case LSEARCH_NOCASE:		/* -nocase */
	    strCmpFn = TclUtfCasecmp;
	    noCase = 1;
	    break;
	case LSEARCH_NOT:		/* -not */
	    negatedMatch = 1;
	    break;
	case LSEARCH_REAL:		/* -real */
	    dataType = REAL;
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
		    if (length == elemLen) {
			/*
			 * This split allows for more optimal compilation of
			 * memcmp/strcasecmp.
			 */

			if (noCase) {
			    match = (strcasecmp(bytes, patternBytes) == 0);
			} else {
			    match = (memcmp(bytes, patternBytes,
				    (size_t) length) == 0);
			}
		    }
		    break;








|







3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
		    if (length == elemLen) {
			/*
			 * This split allows for more optimal compilation of
			 * memcmp/strcasecmp.
			 */

			if (noCase) {
			    match = (TclUtfCasecmp(bytes, patternBytes) == 0);
			} else {
			    match = (memcmp(bytes, patternBytes,
				    (size_t) length) == 0);
			}
		    }
		    break;

3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
	} else if (sortMode == SORTMODE_INTEGER) {
	    long a;
	    if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.intValue = a;
	} else if (sortInfo.sortMode == SORTMODE_REAL) {
	    double a;
	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.doubleValue = a;
	} else {







|







3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
	} else if (sortMode == SORTMODE_INTEGER) {
	    long a;
	    if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.intValue = a;
	} else if (sortMode == SORTMODE_REAL) {
	    double a;
	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.doubleValue = a;
	} else {
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
	Tcl_SetObjResult(interp, resultPtr);
    }

  done1:
    ckfree((char *)elementArray);

  done:
    if (sortInfo.sortMode == SORTMODE_COMMAND) {
	TclDecrRefCount(sortInfo.compareCmdPtr);
	TclDecrRefCount(listObj);
	sortInfo.compareCmdPtr = NULL;
    }
    if (sortInfo.indexc > 1) {
	ckfree((char *) sortInfo.indexv);
    }







|







3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
	Tcl_SetObjResult(interp, resultPtr);
    }

  done1:
    ckfree((char *)elementArray);

  done:
    if (sortMode == SORTMODE_COMMAND) {
	TclDecrRefCount(sortInfo.compareCmdPtr);
	TclDecrRefCount(listObj);
	sortInfo.compareCmdPtr = NULL;
    }
    if (sortInfo.indexc > 1) {
	ckfree((char *) sortInfo.indexv);
    }
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
{
    int order = 0;

    if (infoPtr->sortMode == SORTMODE_ASCII) {
	order = strcmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
	order = strcasecmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
	order = DictionaryCompare(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_INTEGER) {
	long a, b;








|







3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
{
    int order = 0;

    if (infoPtr->sortMode == SORTMODE_ASCII) {
	order = strcmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
	order = TclUtfCasecmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
	order = DictionaryCompare(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_INTEGER) {
	long a, b;

Changes to generic/tclCmdMZ.c.

3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
	     * General options.
	     */

	case OPT_LAST:
	    i++;
	    goto finishedOptions;
	case OPT_NOCASE:
	    strCmpFn = strcasecmp;
	    noCase = 1;
	    break;

	    /*
	     * Handle the different switch mode options.
	     */








|







3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
	     * General options.
	     */

	case OPT_LAST:
	    i++;
	    goto finishedOptions;
	case OPT_NOCASE:
	    strCmpFn = TclUtfCasecmp;
	    noCase = 1;
	    break;

	    /*
	     * Handle the different switch mode options.
	     */

Changes to generic/tclInt.h.

2758
2759
2760
2761
2762
2763
2764

2765
2766
2767
2768
2769
2770
2771
MODULE_SCOPE void	TclpFinalizeThreadDataThread(void);
MODULE_SCOPE void	TclFinalizeThreadStorage(void);
#ifdef TCL_WIDE_CLICKS
MODULE_SCOPE Tcl_WideInt TclpGetWideClicks(void);
MODULE_SCOPE double	TclpWideClicksToNanoseconds(Tcl_WideInt clicks);
#endif
MODULE_SCOPE Tcl_Obj *	TclDisassembleByteCodeObj(Tcl_Obj *objPtr);


/*
 *----------------------------------------------------------------
 * Command procedures in the generic core:
 *----------------------------------------------------------------
 */








>







2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
MODULE_SCOPE void	TclpFinalizeThreadDataThread(void);
MODULE_SCOPE void	TclFinalizeThreadStorage(void);
#ifdef TCL_WIDE_CLICKS
MODULE_SCOPE Tcl_WideInt TclpGetWideClicks(void);
MODULE_SCOPE double	TclpWideClicksToNanoseconds(Tcl_WideInt clicks);
#endif
MODULE_SCOPE Tcl_Obj *	TclDisassembleByteCodeObj(Tcl_Obj *objPtr);
MODULE_SCOPE int TclUtfCasecmp(CONST char *cs, CONST char *ct);

/*
 *----------------------------------------------------------------
 * Command procedures in the generic core:
 *----------------------------------------------------------------
 */

Changes to generic/tclUtf.c.

1097
1098
1099
1100
1101
1102
1103








































1104
1105
1106
1107
1108
1109
1110
	    if (ch1 != ch2) {
		return (ch1 - ch2);
	    }
	}
    }
    return 0;
}









































/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
	    if (ch1 != ch2) {
		return (ch1 - ch2);
	    }
	}
    }
    return 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UtfNcasecmp --
 *
 *	Compare UTF chars of string cs to string ct case insensitively.
 *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should
 *	be handled.
 *
 * Results:
 *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclUtfCasecmp(
    CONST char *cs,		/* UTF string to compare to ct. */
    CONST char *ct)		/* UTF string cs is compared to. */
{
    while (*cs && *ct) {
	Tcl_UniChar ch1, ch2;

	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
	    ch1 = Tcl_UniCharToLower(ch1);
	    ch2 = Tcl_UniCharToLower(ch2);
	    if (ch1 != ch2) {
		return ch1 - ch2;
	    }
	}
    }
    return UCHAR(*cs) - UCHAR(*ct);
}


/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.

Changes to tests/cmdIL.test.

390
391
392
393
394
395
396









397
398
399
400
401
402
403
} [list ! ` AA c CC]
test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d e c b a d35 d300 100 20}
} {100 20 a b c d d300 d35 e}
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}










test cmdIL-5.1 {lsort with list style index} {
    lsort -ascii -decreasing -index {0 1} {
	{{Jim Alpha} 20000410}
	{{Joe Bravo} 19990320}
	{{Jacky Charlie} 19390911}
    }







>
>
>
>
>
>
>
>
>







390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
} [list ! ` AA c CC]
test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d e c b a d35 d300 100 20}
} {100 20 a b c d d300 d35 e}
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}
test cmdIL-4.36 {SortCompare procedure, UTF-8 with -nocase option} {
    scan [lsort -ascii -nocase [list \u101 \u100]] %c%c%c
} {257 32 256}
test cmdIL-4.37 {SortCompare procedure, UTF-8 with -nocase option} {
    scan [lsort -ascii -nocase [list a\u0000a a]] %c%c%c%c%c
} {97 32 97 0 97}
test cmdIL-4.38 {SortCompare procedure, UTF-8 with -nocase option} {
    scan [lsort -ascii -nocase [list a a\u0000a]] %c%c%c%c%c
} {97 32 97 0 97}

test cmdIL-5.1 {lsort with list style index} {
    lsort -ascii -decreasing -index {0 1} {
	{{Jim Alpha} 20000410}
	{{Joe Bravo} 19990320}
	{{Jacky Charlie} 19390911}
    }