Tcl Source Code

Check-in [0c26cd8178]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to tclconference@googlegroups.com
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:code review (typo fixed, compiler compat, etc) + more test cases (hashing of not canonical form of integer)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | tip-534-sebres-fast-number-hash
Files: files | file ages | folders
SHA3-256:0c26cd8178c6741e0fcd54d32734b24d77c9400bb6a16c133b506d03409f18fe
User & Date: sebres 2019-05-17 17:43:11
Context
2019-05-17
18:26
more clean-up, size_t-related consolidation (prepared for unsigned object length in 9.0) check-in: 9208e61ec9 user: sebres tags: tip-534-sebres-fast-number-hash
17:43
code review (typo fixed, compiler compat, etc) + more test cases (hashing of not canonical form of i... check-in: 0c26cd8178 user: sebres tags: tip-534-sebres-fast-number-hash
15:18
introduces fast hash algorithm for canonical numeric objects (all wide integer ranges covered now), ... check-in: a614b04af1 user: sebres tags: tip-534-sebres-fast-number-hash
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclObj.c.

4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
....
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261





4262
4263
4264
4265
4266
4267
4268
....
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368

4369
4370
4371
4372
4373
4374
4375
4376

4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
....
4393
4394
4395
4396
4397
4398
4399

4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459

4460
4461
4462
4463
4464
4465
4466
int
TclCompareObjKeys(
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    register Tcl_Obj *objPtr1 = keyPtr;
    register Tcl_Obj *objPtr2 = hPtr->key.objPtr;
    register int l1, l2;

    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller
     *
     * if (objPtr1 == objPtr2) return 1;
     *
................................................................................
    /*
     * Only compare string representations of the same length.
     */

    if (l1 != l2) {
        return 0;
    } else {
	register const char *p1 = objPtr1->bytes, *p2 = objPtr2->bytes;

	assert(p1 != NULL && p2 != NULL);
	if (!l1) {
	    return 0;
	}





	do {
	    if (*p1++ != *p2++) {
		return 0;
	    }
	} while (--l1);
    }
    return 1;
................................................................................
    /*
     * TIP #534, use fast integer hashing if it is canonical
     */
    if (objPtr->typePtr == &tclIntType && IntObjIsCanonical(objPtr)) {
	Tcl_WideUInt num = objPtr->internalRep.wideValue;
	/* remove sign and hash it differently */
	if (objPtr->internalRep.wideValue < 0) {
	    num = -num;
	    result = (TCL_HASH_TYPE)'-' << 31; /* 45<<31 == 0x(x64?16:0)80000000 */
	}

    #if ((TCL_HASH_TYPE)-1) > 0xffffffff
	/* unsigned 64-bit as unsigned 64-bit integer */
	result += (TCL_HASH_TYPE)objPtr->internalRep.wideValue;
    #else
	/* unsigned 64-bit as sum of parts in 32-bit unsigned */
	result += (TCL_HASH_TYPE)(num / 1000000000)
		+ (TCL_HASH_TYPE)(num % 1000000000);
    #endif

	return result;
    }

    /*
     * Hash string considering numeric (TIP #534), if it looks like a number
     * use fastest string to number conversion, thereby we don't care about
     * possible non-numeric characters, because it is just a hash value.
     */
    result = 0;
    string = TclGetString(objPtr);
    length = objPtr->length;

    if (!length) { return result; }

    if (*string == '-') {
	result = '-';
................................................................................
	string++; length--;
	if (!length) {
	    return result;
	}
    }

    if (length <= 19 && *string <= '9' && *string >= '0') {

    #if ((TCL_HASH_TYPE)-1) > 0xffffffff
        /* hash is 64-bit, assume compiled as x64 */
        Tcl_WideUInt num = 0;
	switch (length) {
	    /* signed 64-bit int is max 19 chars = (+/-)9223372036854775807L */
	    case 19:  num += (*string++ - '0') * 1000000000000000000;
	    case 18:  num += (*string++ - '0') * 100000000000000000;
	    case 17:  num += (*string++ - '0') * 10000000000000000;
	    case 16:  num += (*string++ - '0') * 1000000000000000;
	    case 15:  num += (*string++ - '0') * 100000000000000;
	    case 14:  num += (*string++ - '0') * 10000000000000;
	    case 13:  num += (*string++ - '0') * 1000000000000;
	    case 12:  num += (*string++ - '0') * 100000000000;
	    case 11:  num += (*string++ - '0') * 10000000000;
	    /* signed 32-bit int is max 10 chars = (+/-)2147483647 */
	    case 10:  num += (*string++ - '0') * 1000000000;
	    case  9:  num += (*string++ - '0') * 100000000;
	    case  8:  num += (*string++ - '0') * 10000000;
	    case  7:  num += (*string++ - '0') * 1000000;
	    case  6:  num += (*string++ - '0') * 100000;
	    case  5:  num += (*string++ - '0') * 10000;
	    case  4:  num += (*string++ - '0') * 1000;
	    case  3:  num += (*string++ - '0') * 100;
	    case  2:  num += (*string++ - '0') * 10;
	    case  1:  num += (*string++ - '0');
	}
	/* result considering sign (if result is '-', "negate" numeric) */
	result <<= 31; /* 45<<31 == 0x(x64?16:0)80000000 */
	result += (TCL_HASH_TYPE)num;
    #else 
        /* 32-bit hash (int calculation is faster) */
        unsigned int hnm = 0;
        unsigned int lnm = 0;
	switch (length) {
	    /* high part of hash (wide / 1000000000) */
	    case 19:  hnm += (*string++ - '0') * 100000000 * 10;
	    case 18:  hnm += (*string++ - '0') * 100000000;
	    case 17:  hnm += (*string++ - '0') * 10000000;
	    case 16:  hnm += (*string++ - '0') * 1000000;
	    case 15:  hnm += (*string++ - '0') * 100000;
	    case 14:  hnm += (*string++ - '0') * 10000;
	    case 13:  hnm += (*string++ - '0') * 1000;
	    case 12:  hnm += (*string++ - '0') * 100;
	    case 11:  hnm += (*string++ - '0') * 10;
	    case 10:  hnm += (*string++ - '0');
	    /* low part of hash (wide % 1000000000) */
	    case  9:  lnm += (*string++ - '0') * 100000000;
	    case  8:  lnm += (*string++ - '0') * 10000000;
	    case  7:  lnm += (*string++ - '0') * 1000000;
	    case  6:  lnm += (*string++ - '0') * 100000;
	    case  5:  lnm += (*string++ - '0') * 10000;
	    case  4:  lnm += (*string++ - '0') * 1000;
	    case  3:  lnm += (*string++ - '0') * 100;
	    case  2:  lnm += (*string++ - '0') * 10;
	    case  1:  lnm += (*string++ - '0');
	}
	/* result considering sign (if result is '-', "negate" numeric) */
	result <<= 31; /* 45<<31 == 0x(x64?16:0)80000000 */
	result += (TCL_HASH_TYPE)hnm + (TCL_HASH_TYPE)lnm;
    #endif


        return result;
    }

    /* 
     * Fast string hashing (non-numeric)
     */







|







 







|

<
|
|

>
>
>
>
>







 







|


>
|
|
|
|
|
|
|
<
>








|







 







>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
>







4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
....
4249
4250
4251
4252
4253
4254
4255
4256
4257

4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
....
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380

4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
....
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464

4465
4466
4467
4468
4469
4470
4471
4472
int
TclCompareObjKeys(
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    register Tcl_Obj *objPtr1 = keyPtr;
    register Tcl_Obj *objPtr2 = hPtr->key.objPtr;
    register size_t l1, l2;

    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller
     *
     * if (objPtr1 == objPtr2) return 1;
     *
................................................................................
    /*
     * Only compare string representations of the same length.
     */

    if (l1 != l2) {
        return 0;
    } else {
	register const char *p1, *p2;


	if (!l1) { /* empty string are equal */
	    return 1;
	}

	/* compare both strings */
	p1 = objPtr1->bytes; p2 = objPtr2->bytes;

	assert(p1 != NULL && p2 != NULL);
	do {
	    if (*p1++ != *p2++) {
		return 0;
	    }
	} while (--l1);
    }
    return 1;
................................................................................
    /*
     * TIP #534, use fast integer hashing if it is canonical
     */
    if (objPtr->typePtr == &tclIntType && IntObjIsCanonical(objPtr)) {
	Tcl_WideUInt num = objPtr->internalRep.wideValue;
	/* remove sign and hash it differently */
	if (objPtr->internalRep.wideValue < 0) {
	    num = -(Tcl_WideInt)num;
	    result = (TCL_HASH_TYPE)'-' << 31; /* 45<<31 == 0x(x64?16:0)80000000 */
	}
	/* if will be optimized to compile time */
	if (((TCL_HASH_TYPE)-1) >= UWIDE_MAX) {
	    /* unsigned 64-bit as unsigned 64-bit integer */
	    result += (TCL_HASH_TYPE)objPtr->internalRep.wideValue;
	} else {
	    /* unsigned 64-bit as sum of parts in 32-bit unsigned */
	    result += (TCL_HASH_TYPE)(num / 1000000000)
		    + (TCL_HASH_TYPE)(num % 1000000000);

	}
	return result;
    }

    /*
     * Hash string considering numeric (TIP #534), if it looks like a number
     * use fastest string to number conversion, thereby we don't care about
     * possible non-numeric characters, because it is just a hash value.
     */

    string = TclGetString(objPtr);
    length = objPtr->length;

    if (!length) { return result; }

    if (*string == '-') {
	result = '-';
................................................................................
	string++; length--;
	if (!length) {
	    return result;
	}
    }

    if (length <= 19 && *string <= '9' && *string >= '0') {
	/* if will be optimized to compile time */
	if (((TCL_HASH_TYPE)-1) >= UWIDE_MAX) {
	    /* hash is 64-bit, assume compiled as x64 */
	    Tcl_WideUInt num = 0;
	    switch (length) {
		/* signed 64-bit int is max 19 chars = (+/-)9223372036854775807L */
		case 19:  num += (*string++ - '0') * 1000000000000000000;
		case 18:  num += (*string++ - '0') * 100000000000000000;
		case 17:  num += (*string++ - '0') * 10000000000000000;
		case 16:  num += (*string++ - '0') * 1000000000000000;
		case 15:  num += (*string++ - '0') * 100000000000000;
		case 14:  num += (*string++ - '0') * 10000000000000;
		case 13:  num += (*string++ - '0') * 1000000000000;
		case 12:  num += (*string++ - '0') * 100000000000;
		case 11:  num += (*string++ - '0') * 10000000000;
		/* signed 32-bit int is max 10 chars = (+/-)2147483647 */
		case 10:  num += (*string++ - '0') * 1000000000;
		case  9:  num += (*string++ - '0') * 100000000;
		case  8:  num += (*string++ - '0') * 10000000;
		case  7:  num += (*string++ - '0') * 1000000;
		case  6:  num += (*string++ - '0') * 100000;
		case  5:  num += (*string++ - '0') * 10000;
		case  4:  num += (*string++ - '0') * 1000;
		case  3:  num += (*string++ - '0') * 100;
		case  2:  num += (*string++ - '0') * 10;
		case  1:  num += (*string++ - '0');
	    }
	    /* result considering sign (if result is '-', "negate" numeric) */
	    result <<= 31; /* 45<<31 == 0x(x64?16:0)80000000 */
	    result += (TCL_HASH_TYPE)num;
	} else {
	    /* 32-bit hash (int calculation is faster) */
	    unsigned int hnm = 0;
	    unsigned int lnm = 0;
	    switch (length) {
		/* high part of hash (wide / 1000000000) */
		case 19:  hnm += (*string++ - '0') * 100000000 * 10;
		case 18:  hnm += (*string++ - '0') * 100000000;
		case 17:  hnm += (*string++ - '0') * 10000000;
		case 16:  hnm += (*string++ - '0') * 1000000;
		case 15:  hnm += (*string++ - '0') * 100000;
		case 14:  hnm += (*string++ - '0') * 10000;
		case 13:  hnm += (*string++ - '0') * 1000;
		case 12:  hnm += (*string++ - '0') * 100;
		case 11:  hnm += (*string++ - '0') * 10;
		case 10:  hnm += (*string++ - '0');
		/* low part of hash (wide % 1000000000) */
		case  9:  lnm += (*string++ - '0') * 100000000;
		case  8:  lnm += (*string++ - '0') * 10000000;
		case  7:  lnm += (*string++ - '0') * 1000000;
		case  6:  lnm += (*string++ - '0') * 100000;
		case  5:  lnm += (*string++ - '0') * 10000;
		case  4:  lnm += (*string++ - '0') * 1000;
		case  3:  lnm += (*string++ - '0') * 100;
		case  2:  lnm += (*string++ - '0') * 10;
		case  1:  lnm += (*string++ - '0');
	    }
	    /* result considering sign (if result is '-', "negate" numeric) */
	    result <<= 31; /* 45<<31 == 0x(x64?16:0)80000000 */
	    result += (TCL_HASH_TYPE)hnm + (TCL_HASH_TYPE)lnm;

	}

        return result;
    }

    /* 
     * Fast string hashing (non-numeric)
     */

Changes to tests/dict.test.

814
815
816
817
818
819
820






























821
822
823
824
825
826
827
} -result {wrong # args: should be "dict set dictVarName key ?key ...? value"}
test dict-15.13 {dict set command} -returnCodes error -body {
    set dictVar a
    dict set dictVar b c
} -cleanup {
    unset dictVar
} -result {missing value to go with key}































test dict-16.1 {dict unset command} -body {
    set dictVar {a b c d}
    dict unset dictVar a
} -cleanup {
    unset dictVar
} -result {c d}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
} -result {wrong # args: should be "dict set dictVarName key ?key ...? value"}
test dict-15.13 {dict set command} -returnCodes error -body {
    set dictVar a
    dict set dictVar b c
} -cleanup {
    unset dictVar
} -result {missing value to go with key}

# note also similar test in var.test...
test dict-15.21 {corner cases of int-hash optimizations (TIP 534} -setup {
    unset -nocomplain d
} -body {
    # check integer hash is equal string hash for values from [$i-5 .. $i+5]:
    foreach i {
	0
	0x55555555 0xaaaaaaaa 0x7fffffff 0xffffffff
	0x5555555555555555 0xaaaaaaaaaaaaaaaa 0x7fffffffffffffff 0xffffffffffffffff
	0x7fffffffffffffffffffffffffffffff 0xffffffffffffffffffffffffffffffff
    } {
	# not canonical int's (positive / negative):
	expr {$i}; # make int
	dict set d $i 1; dict get $d [string trim "$i "]; dict unset d $i
	set j -$i
	expr {$j}; # make int
	dict set d $j 1; dict get $d [string trim "$j "]; dict unset d $j
	# canonical int form (positive / negative):
	set i [expr {$i - 5}]
	time { 
	    dict set d [incr i] 1; dict get $d [string trim "$i "]; dict unset d $i
	    set j [expr {-$i}]
	    dict set d [incr j] 1; dict get $d [string trim "$j "]; dict unset d $j
	} 10
    }
    dict size $d
} -cleanup {
    unset -nocomplain d
} -result 0

test dict-16.1 {dict unset command} -body {
    set dictVar {a b c d}
    dict unset dictVar a
} -cleanup {
    unset dictVar
} -result {c d}

Changes to tests/var.test.

1476
1477
1478
1479
1480
1481
1482

1483
1484
1485

1486
1487
1488
1489
1490
1491
1492
1493
1494


1495




1496

1497



1498
1499
1500
1501
1502
1503
1504
    unset -nocomplain ary
} -body {
    array default unset ary x
} -returnCodes error -cleanup {
    unset -nocomplain ary
} -result * -match glob


test var-25.1 {corner cases of int-hash optimizations (TIP 534} -setup {
    unset -nocomplain v
} -body {

    foreach i {
	0
	0x7fffffff -0x7fffffff
	0xffffffff -0xffffffff
	0x7fffffffffffffff -0x7fffffffffffffff
	0xffffffffffffffff -0xffffffffffffffff
	0x7fffffffffffffffffffffffffffffff -0x7fffffffffffffffffffffffffffffff
	0xffffffffffffffffffffffffffffffff -0xffffffffffffffffffffffffffffffff
    } {


	# check integer hash is equal string hash for values from [$i-5 .. $i+5]:




	set i [expr {$i - 5}]

	time { set v([incr i]) 1; set v([string trim "$i "]); unset v($i) } 10



    }
    array size v
} -cleanup {
    unset -nocomplain v
} -result 0
 
catch {namespace delete ns}







>



>


<
|
|
<
|
<

>
>
|
>
>
>
>

>
|
>
>
>







1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489

1490
1491

1492

1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
    unset -nocomplain ary
} -body {
    array default unset ary x
} -returnCodes error -cleanup {
    unset -nocomplain ary
} -result * -match glob

# note also similar test in dict.test...
test var-25.1 {corner cases of int-hash optimizations (TIP 534} -setup {
    unset -nocomplain v
} -body {
    # check integer hash is equal string hash for values from [$i-5 .. $i+5]:
    foreach i {
	0

	0x55555555 0xaaaaaaaa 0x7fffffff 0xffffffff
	0x5555555555555555 0xaaaaaaaaaaaaaaaa 0x7fffffffffffffff 0xffffffffffffffff

	0x7fffffffffffffffffffffffffffffff 0xffffffffffffffffffffffffffffffff

    } {
	# not canonical int's (positive / negative):
	expr {$i}; # make int
	set v($i) 1; set v([string trim "$i "]); unset v($i)
	set j -$i
	expr {$j}; # make int
	set v($j) 1; set v([string trim "$j "]); unset v($j)
	# canonical int form (positive / negative):
	set i [expr {$i - 5}]
	time { 
	    set v([incr i]) 1; set v([string trim "$i "]); unset v($i)
	    set j [expr {-$i}]
	    set v([incr j]) 1; set v([string trim "$j "]); unset v($j)
	} 10
    }
    array size v
} -cleanup {
    unset -nocomplain v
} -result 0
 
catch {namespace delete ns}