Tcl Source Code

Check-in [44527c632e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid generating string representation when comparing the empty string.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-6-branch
Files: files | file ages | folders
SHA3-256: 44527c632ed609c24f146216eae23f91e017e8afc6e2ef60e0fe41a08ff75114
User & Date: pooryorick 2018-05-05 17:04:08
Context
2018-05-06
13:45
Preparation to deduplicate code between byte-compiled and legacy implementations of [string compare]... check-in: e423459fce user: pooryorick tags: core-8-6-branch
2018-05-05
17:04
Avoid generating string representation when comparing the empty string. check-in: 44527c632e user: pooryorick tags: core-8-6-branch
16:38
Avoid generating string representation when comparing the empty string. Closed-Leaf check-in: 14d9d1de63 user: pooryorick tags: pyk-emptystring-core-8-6
2018-05-04
19:06
Update TZ info to tzdata2018e. check-in: adc50e99d2 user: jima tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclExecute.c.

5436
5437
5438
5439
5440
5441
5442
5443
5444

5445
5446





















5447










5448




5449
5450
5451
5452
5453

5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469


5470
5471
5472
5473
5474
5475
5476
			s2len *= sizeof(Tcl_UniChar);
		    } else {
			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		    }
		}
	    } else {
		/*
		 * strcmp can't do a simple memcmp in order to handle the
		 * special Tcl \xC0\x80 null encoding for utf-8.

		 */






















		s1 = TclGetStringFromObj(valuePtr, &s1len);










		s2 = TclGetStringFromObj(value2Ptr, &s2len);




		if (checkEq) {
		    memCmpFn = memcmp;
		} else {
		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
		}

	    }

	    if (checkEq && (s1len != s2len)) {
		match = 1;
	    } else {
		/*
		 * The comparison function should compare up to the minimum
		 * byte length only.
		 */
		match = memCmpFn(s1, s2,
			(size_t) ((s1len < s2len) ? s1len : s2len));
		if (match == 0) {
		    match = s1len - s2len;
		}
	    }
	}



	/*
	 * Make sure only -1,0,1 is returned
	 * TODO: consider peephole opt.
	 */

	if (*pc != INST_STR_CMP) {







<
|
>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>





>




|











>
>







5436
5437
5438
5439
5440
5441
5442

5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
			s2len *= sizeof(Tcl_UniChar);
		    } else {
			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		    }
		}
	    } else {
		/*

		 * In order to handle the special Tcl \xC0\x80 null encoding
		 * for utf-8, strcmp can't do a simple memcmp.
		 */

		{
		    int empty;
		    if ((empty = TclCheckEmptyString(valuePtr)) > 0) {
			switch (TclCheckEmptyString(value2Ptr)) {
			    case -1:
				s1 = "";
				s1len = 0;
				s2 = TclGetStringFromObj(value2Ptr, &s2len);
				break;
			    case 0:
				match = -1;
				goto matchdone;
			    case 1:
				match = 0;
				goto matchdone;
			}
		    } else if (TclCheckEmptyString(value2Ptr) > 0) {
			switch (empty) {
			    case -1:
				s2 = "";
				s2len = 0;
				s1 = TclGetStringFromObj(valuePtr, &s1len);
				break;
			    case 0:
				match = 1;
				goto matchdone;
			    case 1:
				match = 0;
				goto matchdone;
			}
		    } else {
			s1 = TclGetStringFromObj(valuePtr, &s1len);
			s2 = TclGetStringFromObj(value2Ptr, &s2len);
		    }
		}


		if (checkEq) {
		    memCmpFn = memcmp;
		} else {
		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
		}

	    }

	    if (checkEq && (s1len != s2len)) {
		match = 1;
	    }  else {
		/*
		 * The comparison function should compare up to the minimum
		 * byte length only.
		 */
		match = memCmpFn(s1, s2,
			(size_t) ((s1len < s2len) ? s1len : s2len));
		if (match == 0) {
		    match = s1len - s2len;
		}
	    }
	}

	matchdone:

	/*
	 * Make sure only -1,0,1 is returned
	 * TODO: consider peephole opt.
	 */

	if (*pc != INST_STR_CMP) {
6137
6138
6139
6140
6141
6142
6143








6144
6145
6146
6147
6148
6149
6150
    case INST_GT:
    case INST_LE:
    case INST_GE: {
	int iResult = 0, compare = 0;

	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;









	if (GetNumberFromObj(NULL, valuePtr, &ptr1, &type1) != TCL_OK
		|| GetNumberFromObj(NULL, value2Ptr, &ptr2, &type2) != TCL_OK) {
	    /*
	     * At least one non-numeric argument - compare as strings.
	     */








>
>
>
>
>
>
>
>







6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
    case INST_GT:
    case INST_LE:
    case INST_GE: {
	int iResult = 0, compare = 0;

	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;

	/*
	    Try to determine, without triggering generation of a string
	    representation, whether one value is not a number.
	*/
	if (TclCheckEmptyString(valuePtr) > 0 || TclCheckEmptyString(value2Ptr) > 0) {
	    goto stringCompare;
	}

	if (GetNumberFromObj(NULL, valuePtr, &ptr1, &type1) != TCL_OK
		|| GetNumberFromObj(NULL, value2Ptr, &ptr2, &type2) != TCL_OK) {
	    /*
	     * At least one non-numeric argument - compare as strings.
	     */

Changes to generic/tclInt.h.

2731
2732
2733
2734
2735
2736
2737




2738
2739
2740
2741
2742
2743
2744
 * the value of an empty string representation for an object. This value is
 * shared by all new objects allocated by Tcl_NewObj.
 */

MODULE_SCOPE char *	tclEmptyStringRep;
MODULE_SCOPE char	tclEmptyString;





/*
 *----------------------------------------------------------------
 * Procedures shared among Tcl modules but not used by the outside world,
 * introduced by/for NRE.
 *----------------------------------------------------------------
 */








>
>
>
>







2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
 * the value of an empty string representation for an object. This value is
 * shared by all new objects allocated by Tcl_NewObj.
 */

MODULE_SCOPE char *	tclEmptyStringRep;
MODULE_SCOPE char	tclEmptyString;

enum CheckEmptyStringResult {
	TCL_EMPTYSTRING_UNKNOWN = -1, TCL_EMPTYSTRING_NO, TCL_EMPTYSTRING_YES
};

/*
 *----------------------------------------------------------------
 * Procedures shared among Tcl modules but not used by the outside world,
 * introduced by/for NRE.
 *----------------------------------------------------------------
 */

2871
2872
2873
2874
2875
2876
2877

2878
2879
2880
2881
2882
2883
2884
MODULE_SCOPE double	TclCeil(const mp_int *a);
MODULE_SCOPE void	TclChannelPreserve(Tcl_Channel chan);
MODULE_SCOPE void	TclChannelRelease(Tcl_Channel chan);
MODULE_SCOPE int	TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr,
			    Var *arrayPtr, Tcl_Obj *name, int index);
MODULE_SCOPE int	TclCheckBadOctal(Tcl_Interp *interp,
			    const char *value);

MODULE_SCOPE int	TclChanCaughtErrorBypass(Tcl_Interp *interp,
			    Tcl_Channel chan);
MODULE_SCOPE Tcl_ObjCmdProc TclChannelNamesCmd;
MODULE_SCOPE Tcl_NRPostProc TclClearRootEnsemble;
MODULE_SCOPE ContLineLoc *TclContinuationsEnter(Tcl_Obj *objPtr, int num,
			    int *loc);
MODULE_SCOPE void	TclContinuationsEnterDerived(Tcl_Obj *objPtr,







>







2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
MODULE_SCOPE double	TclCeil(const mp_int *a);
MODULE_SCOPE void	TclChannelPreserve(Tcl_Channel chan);
MODULE_SCOPE void	TclChannelRelease(Tcl_Channel chan);
MODULE_SCOPE int	TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr,
			    Var *arrayPtr, Tcl_Obj *name, int index);
MODULE_SCOPE int	TclCheckBadOctal(Tcl_Interp *interp,
			    const char *value);
MODULE_SCOPE int	TclCheckEmptyString(Tcl_Obj *objPtr);
MODULE_SCOPE int	TclChanCaughtErrorBypass(Tcl_Interp *interp,
			    Tcl_Channel chan);
MODULE_SCOPE Tcl_ObjCmdProc TclChannelNamesCmd;
MODULE_SCOPE Tcl_NRPostProc TclClearRootEnsemble;
MODULE_SCOPE ContLineLoc *TclContinuationsEnter(Tcl_Obj *objPtr, int num,
			    int *loc);
MODULE_SCOPE void	TclContinuationsEnterDerived(Tcl_Obj *objPtr,
4451
4452
4453
4454
4455
4456
4457





4458
4459
4460
4461
4462
4463
4464
 *
 * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
 *----------------------------------------------------------------
 */

#define TclIsPureByteArray(objPtr) \
	(((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL))






/*
 *----------------------------------------------------------------
 * Macro used by the Tcl core to compare Unicode strings. On big-endian
 * systems we can use the more efficient memcmp, but this would not be
 * lexically correct on little-endian systems. The ANSI C "prototype" for
 * this macro is:







>
>
>
>
>







4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
 *
 * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
 *----------------------------------------------------------------
 */

#define TclIsPureByteArray(objPtr) \
	(((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL))
#define TclIsPureDict(objPtr) \
	(((objPtr)->bytes==NULL) && ((objPtr)->typePtr==&tclDictType))

#define TclIsPureList(objPtr) \
	(((objPtr)->bytes==NULL) && ((objPtr)->typePtr==&tclListType))

/*
 *----------------------------------------------------------------
 * Macro used by the Tcl core to compare Unicode strings. On big-endian
 * systems we can use the more efficient memcmp, but this would not be
 * lexically correct on little-endian systems. The ANSI C "prototype" for
 * this macro is:

Changes to generic/tclStringObj.c.

429
430
431
432
433
434
435

436
437
438
439
440
441
442

    if (objPtr->typePtr == &tclByteArrayType) {
	int length;

	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
	return length;
    }


    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);







>







429
430
431
432
433
434
435
436
437
438
439
440
441
442
443

    if (objPtr->typePtr == &tclByteArrayType) {
	int length;

	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
	return length;
    }


    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);
460
461
462
463
464
465
466












































467
468
469
470
471
472
473

	    FillUnicodeRep(objPtr);
	}
#endif
    }
    return numChars;
}













































/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUniChar --
 *
 *	Get the index'th Unicode character from the String object. The index







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518

	    FillUnicodeRep(objPtr);
	}
#endif
    }
    return numChars;
}



/*
 *----------------------------------------------------------------------
 *
 * TclCheckEmptyString --
 *
 *	Determine whether the string value of an object is or would be the
 *	empty string, without generating a string representation.
 *
 * Results:
 *	Returns 1 if empty, 0 if not, and -1 if unknown.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */
int
TclCheckEmptyString (
    Tcl_Obj *objPtr
) {
    int length = -1;

    if (objPtr->bytes == tclEmptyStringRep) {
	return TCL_EMPTYSTRING_YES;
    }

    if (TclIsPureList(objPtr)) {
	Tcl_ListObjLength(NULL, objPtr, &length);
	return length == 0;
    }

    if (TclIsPureDict(objPtr)) {
	Tcl_DictObjSize(NULL, objPtr, &length);
	return length == 0;
    }
    
    if (objPtr->bytes == NULL) {
	return TCL_EMPTYSTRING_UNKNOWN;
    }
    return objPtr->length == 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUniChar --
 *
 *	Get the index'th Unicode character from the String object. The index