Tcl Source Code

Check-in [89f027f118]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[3613609]: Replace strcasecmp() with UTF-8-aware version.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 89f027f118c0cae741036295827ac6105d9dc781
User & Date: dkf 2013-05-22 13:07:33
Context
2013-05-22
13:34
silence compiler warning check-in: 13314a4573 user: dgp tags: trunk
13:07
[3613609]: Replace strcasecmp() with UTF-8-aware version. check-in: 89f027f118 user: dkf tags: trunk
12:59
[3613609]: Replace strcasecmp() with UTF-8-aware version. check-in: 8cc7cdfbd6 user: dkf tags: core-8-5-branch
10:36
* doc/file.n: [Bug 3613671]: Added note to portability section on the fact that [file owned] does ...
check-in: a9869d4e5a user: dkf tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ChangeLog.







1
2
3
4
5
6
7






2013-05-22  Donal K. Fellows  <[email protected]>

	* doc/file.n: [Bug 3613671]: Added note to portability section on the
	fact that [file owned] does not produce useful results on Windows.

2013-05-20  Donal K. Fellows  <[email protected]>

>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
2013-05-22  Donal K. Fellows  <[email protected]>

	* generic/tclUtf.c (TclUtfCasecmp): [Bug 3613609]: Replace problematic
	uses of strcasecmp with a proper UTF-8-aware version. Affects both
	[lsearch -nocase] and [lsort -nocase].

2013-05-22  Donal K. Fellows  <[email protected]>

	* doc/file.n: [Bug 3613671]: Added note to portability section on the
	fact that [file owned] does not produce useful results on Windows.

2013-05-20  Donal K. Fellows  <[email protected]>

Changes to generic/tclCmdIL.c.

3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
	case LSEARCH_INLINE:		/* -inline */
	    inlineReturn = 1;
	    break;
	case LSEARCH_INTEGER:		/* -integer */
	    dataType = INTEGER;
	    break;
	case LSEARCH_NOCASE:		/* -nocase */
	    strCmpFn = strcasecmp;
	    noCase = 1;
	    break;
	case LSEARCH_NOT:		/* -not */
	    negatedMatch = 1;
	    break;
	case LSEARCH_REAL:		/* -real */
	    dataType = REAL;







|







3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
	case LSEARCH_INLINE:		/* -inline */
	    inlineReturn = 1;
	    break;
	case LSEARCH_INTEGER:		/* -integer */
	    dataType = INTEGER;
	    break;
	case LSEARCH_NOCASE:		/* -nocase */
	    strCmpFn = TclUtfCasecmp;
	    noCase = 1;
	    break;
	case LSEARCH_NOT:		/* -not */
	    negatedMatch = 1;
	    break;
	case LSEARCH_REAL:		/* -real */
	    dataType = REAL;
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
		    if (length == elemLen) {
			/*
			 * This split allows for more optimal compilation of
			 * memcmp/strcasecmp.
			 */

			if (noCase) {
			    match = (strcasecmp(bytes, patternBytes) == 0);
			} else {
			    match = (memcmp(bytes, patternBytes,
				    (size_t) length) == 0);
			}
		    }
		    break;








|







3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
		    if (length == elemLen) {
			/*
			 * This split allows for more optimal compilation of
			 * memcmp/strcasecmp.
			 */

			if (noCase) {
			    match = (TclUtfCasecmp(bytes, patternBytes) == 0);
			} else {
			    match = (memcmp(bytes, patternBytes,
				    (size_t) length) == 0);
			}
		    }
		    break;

3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
	    long a;

	    if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].collationKey.intValue = a;
	} else if (sortInfo.sortMode == SORTMODE_REAL) {
	    double a;

	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr,
		    &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }







|







3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
	    long a;

	    if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].collationKey.intValue = a;
	} else if (sortMode == SORTMODE_REAL) {
	    double a;

	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr,
		    &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
	Tcl_SetObjResult(interp, resultPtr);
    }

  done1:
    TclStackFree(interp, elementArray);

  done:
    if (sortInfo.sortMode == SORTMODE_COMMAND) {
	TclDecrRefCount(sortInfo.compareCmdPtr);
	TclDecrRefCount(listObj);
	sortInfo.compareCmdPtr = NULL;
    }
  done2:
    if (allocatedIndexVector) {
	TclStackFree(interp, sortInfo.indexv);







|







4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
	Tcl_SetObjResult(interp, resultPtr);
    }

  done1:
    TclStackFree(interp, elementArray);

  done:
    if (sortMode == SORTMODE_COMMAND) {
	TclDecrRefCount(sortInfo.compareCmdPtr);
	TclDecrRefCount(listObj);
	sortInfo.compareCmdPtr = NULL;
    }
  done2:
    if (allocatedIndexVector) {
	TclStackFree(interp, sortInfo.indexv);
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
{
    int order = 0;

    if (infoPtr->sortMode == SORTMODE_ASCII) {
	order = strcmp(elemPtr1->collationKey.strValuePtr,
		elemPtr2->collationKey.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
	order = strcasecmp(elemPtr1->collationKey.strValuePtr,
		elemPtr2->collationKey.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
	order = DictionaryCompare(elemPtr1->collationKey.strValuePtr,
		elemPtr2->collationKey.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_INTEGER) {
	long a, b;








|







4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
{
    int order = 0;

    if (infoPtr->sortMode == SORTMODE_ASCII) {
	order = strcmp(elemPtr1->collationKey.strValuePtr,
		elemPtr2->collationKey.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
	order = TclUtfCasecmp(elemPtr1->collationKey.strValuePtr,
		elemPtr2->collationKey.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
	order = DictionaryCompare(elemPtr1->collationKey.strValuePtr,
		elemPtr2->collationKey.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_INTEGER) {
	long a, b;

Changes to generic/tclCmdMZ.c.

3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
	     * General options.
	     */

	case OPT_LAST:
	    i++;
	    goto finishedOptions;
	case OPT_NOCASE:
	    strCmpFn = strcasecmp;
	    noCase = 1;
	    break;

	    /*
	     * Handle the different switch mode options.
	     */








|







3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
	     * General options.
	     */

	case OPT_LAST:
	    i++;
	    goto finishedOptions;
	case OPT_NOCASE:
	    strCmpFn = TclUtfCasecmp;
	    noCase = 1;
	    break;

	    /*
	     * Handle the different switch mode options.
	     */

Changes to generic/tclInt.h.

3090
3091
3092
3093
3094
3095
3096

3097
3098
3099
3100
3101
3102
3103
MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
			    int count, int *tokensLeftPtr, int line,
			    int *clNextOuter, const char *outerScript);
MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);

MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
			    Tcl_LoadHandle *loadHandle,
			    Tcl_FSUnloadFileProc **unloadProcPtr, int flags);
MODULE_SCOPE int	TclpUtime(Tcl_Obj *pathPtr, struct utimbuf *tval);
#ifdef TCL_LOAD_FROM_MEMORY







>







3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
			    int count, int *tokensLeftPtr, int line,
			    int *clNextOuter, const char *outerScript);
MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
			    Tcl_LoadHandle *loadHandle,
			    Tcl_FSUnloadFileProc **unloadProcPtr, int flags);
MODULE_SCOPE int	TclpUtime(Tcl_Obj *pathPtr, struct utimbuf *tval);
#ifdef TCL_LOAD_FROM_MEMORY

Changes to generic/tclUtf.c.

1098
1099
1100
1101
1102
1103
1104








































1105
1106
1107
1108
1109
1110
1111
	    if (ch1 != ch2) {
		return (ch1 - ch2);
	    }
	}
    }
    return 0;
}









































/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
	    if (ch1 != ch2) {
		return (ch1 - ch2);
	    }
	}
    }
    return 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UtfNcasecmp --
 *
 *	Compare UTF chars of string cs to string ct case insensitively.
 *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should
 *	be handled.
 *
 * Results:
 *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclUtfCasecmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct)		/* UTF string cs is compared to. */
{
    while (*cs && *ct) {
	Tcl_UniChar ch1, ch2;

	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
	    ch1 = Tcl_UniCharToLower(ch1);
	    ch2 = Tcl_UniCharToLower(ch2);
	    if (ch1 != ch2) {
		return ch1 - ch2;
	    }
	}
    }
    return UCHAR(*cs) - UCHAR(*ct);
}


/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.

Changes to tests/cmdIL.test.

413
414
415
416
417
418
419









420
421
422
423
424
425
426
} [list ! ` AA c CC]
test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d e c b a d35 d300 100 20}
} {100 20 a b c d d300 d35 e}
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}










test cmdIL-5.1 {lsort with list style index} {
    lsort -ascii -decreasing -index {0 1} {
	{{Jim Alpha} 20000410}
	{{Joe Bravo} 19990320}
	{{Jacky Charlie} 19390911}
    }







>
>
>
>
>
>
>
>
>







413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
} [list ! ` AA c CC]
test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d e c b a d35 d300 100 20}
} {100 20 a b c d d300 d35 e}
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}
test cmdIL-4.36 {SortCompare procedure, UTF-8 with -nocase option} {
    scan [lsort -ascii -nocase [list \u101 \u100]] %c%c%c
} {257 32 256}
test cmdIL-4.37 {SortCompare procedure, UTF-8 with -nocase option} {
    scan [lsort -ascii -nocase [list a\u0000a a]] %c%c%c%c%c
} {97 32 97 0 97}
test cmdIL-4.38 {SortCompare procedure, UTF-8 with -nocase option} {
    scan [lsort -ascii -nocase [list a a\u0000a]] %c%c%c%c%c
} {97 32 97 0 97}

test cmdIL-5.1 {lsort with list style index} {
    lsort -ascii -decreasing -index {0 1} {
	{{Jim Alpha} 20000410}
	{{Joe Bravo} 19990320}
	{{Jacky Charlie} 19390911}
    }