Tcl Source Code

Check-in [dc7f1a9b04]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:3396731 Revise the [string reverse] implementation to operate on the representation that comes in, avoid conversion to other reps.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: dc7f1a9b046aaecb206f6fbe9e3a502657064889
User & Date: dgp 2011-08-27 04:24:24
Context
2011-08-29
07:25
[3396731] inline string reverse: minor further improvements check-in: 69c582084d user: jan.nijtmans tags: trunk
2011-08-27
04:24
3396731 Revise the [string reverse] implementation to operate on the representation that comes in, a... check-in: dc7f1a9b04 user: dgp tags: trunk
02:28
Repaired the lost performance in the copy loop hotspots. Now meets or beats the former trunk (and ... Closed-Leaf check-in: 34daf5b5b3 user: dgp tags: revert-3396731
2011-08-25
12:00
[Enh 3396731] Follow-up: special case for Pure-unicode representation check-in: f9ddcf91ba user: jan.nijtmans tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ChangeLog.







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15






2011-08-23  Don Porter  <[email protected]>

	* generic/tclIORChan.c:	[Bug 3396948] Leak of ReflectedChannelMap.

2011-08-23  Jan Nijtmans  <[email protected]>

	* generic/tclStringObj.c: [FRQ 3396731] inline string reverse

2011-08-19  Don Porter  <[email protected]>

	* generic/tclIORTrans.c: [Bugs 3393279, 3393280] ReflectClose(.) is
	missing Tcl_EventuallyFree() calls at some of its exits.

	* generic/tclIO.c: [Bugs 3394654, 3393276] Revise FlushChannel() to
	account for the possibility that the ChanWrite() call might recycle
>
>
>
>
>
>




<
<
<
<







1
2
3
4
5
6
7
8
9
10




11
12
13
14
15
16
17
2011-08-27  Don Porter  <[email protected]>

	* generic/tclStringObj.c:  [RFE 3396731] Revise the [string reverse]
	* tests/string.test:	implementation to operate on the representation
	that comes in, avoid conversion to other reps.

2011-08-23  Don Porter  <[email protected]>

	* generic/tclIORChan.c:	[Bug 3396948] Leak of ReflectedChannelMap.





2011-08-19  Don Porter  <[email protected]>

	* generic/tclIORTrans.c: [Bugs 3393279, 3393280] ReflectClose(.) is
	missing Tcl_EventuallyFree() calls at some of its exits.

	* generic/tclIO.c: [Bugs 3394654, 3393276] Revise FlushChannel() to
	account for the possibility that the ChanWrite() call might recycle

Changes to generic/tclStringObj.c.

2648
2649
2650
2651
2652
2653
2654













































2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671

2672
2673
2674
2675
2676
2677
2678
2679
2680
2681

2682
2683
2684
2685
2686


2687


2688
2689
2690

2691
2692

2693

2694
2695
2696
2697
2698

2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710

2711
2712
2713
2714
2715

2716

2717
2718
2719
2720
2721
2722
2723
2724


2725





2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737





2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752

2753
2754
2755
2756
2757
2758
2759

2760
2761
2762
2763


2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
 *	argument with modifications done in place.
 *
 * Side effects:
 *	May allocate a new Tcl_Obj.
 *
 *---------------------------------------------------------------------------
 */














































Tcl_Obj *
TclStringObjReverse(
    Tcl_Obj *objPtr)
{
    char *src, *dest;
    Tcl_Obj *resultPtr = objPtr;
    char c;

    /* Special case: Pure Unicode array */
    if ((objPtr->typePtr == &tclStringType) && !objPtr->bytes) {
	String *strPtr = GET_STRING(objPtr);
	if (strPtr->hasUnicode) {
		String *dstStrPtr = stringAlloc(strPtr->numChars);
	    Tcl_UniChar *chars = strPtr->unicode;
	    Tcl_UniChar *dstChars = dstStrPtr->unicode + strPtr->numChars;


	    resultPtr = Tcl_NewObj();
	    resultPtr->bytes = NULL;
	    SET_STRING(resultPtr, dstStrPtr);
	    resultPtr->typePtr = &tclStringType;
	    dstStrPtr->maxChars = strPtr->numChars;
	    dstStrPtr->unicode[strPtr->numChars] = 0;
	    dstStrPtr->numChars = strPtr->numChars;
	    dstStrPtr->hasUnicode = 1;
	    dstStrPtr->allocated = 0;


	    while (--dstChars >= dstStrPtr->unicode) {
		*dstChars = *chars++;
	    }
	    return resultPtr;
	}


    }



    src = TclGetString(objPtr);
    if (Tcl_IsShared(objPtr)) {

	resultPtr = Tcl_NewObj();
	Tcl_SetObjLength(resultPtr, objPtr->length);

	dest = TclGetString(resultPtr);

	memcpy(dest, src, objPtr->length);
    } else {
	TclFreeIntRep(objPtr);
	dest = src;
    }


    src = dest + objPtr->length;

    /* Pass 1: reverse individual bytes of UTF-8 representation. */
    while (dest < src) {
	Tcl_UniChar ch = 0;
	switch (Tcl_UtfToUniChar(dest, &ch)) {
	case 1: {
		++dest;
		break;
	    }
	case 2: {

		c = dest[0];
		dest[0] = dest[1];
		dest[1] = c;
		dest += 2;
		break;

	    }

	case 3: {
		c = dest[0];
		dest[0] = dest[2];
		dest[2] = c;
		dest += 3;
		break;
	    }
#if TCL_UTF_MAX > 4


	case 5: {





		c = dest[0];
		dest[0] = dest[4];
		dest[4] = c;
		c = dest[1];
		dest[1] = dest[3];
		dest[3] = c;
		dest += 5;
		break;
	    }
#endif
#if TCL_UTF_MAX > 5
	case 6: {





		c = dest[0];
		dest[0] = dest[5];
		dest[5] = c;
		c = dest[1];
		dest[1] = dest[4];
		dest[4] = c;
		c = dest[0];
		dest[2] = dest[3];
		dest[3] = c;
		dest += 6;
		break;
	    }
#endif
	default: {
#if TCL_UTF_MAX > 3

		c = dest[0];
		dest[0] = dest[3];
		dest[3] = c;
		c = dest[1];
		dest[1] = dest[2];
		dest[2] = c;
		dest += 4;

#endif
		break;
	    }
	}


    }

	/* Pass 2: Reverse byte string. */
	dest = TclGetString(resultPtr);

	while (dest < --src) {
		c = *src;
		*src = *dest;
		*dest++ = c;
	}
    return resultPtr;
}

/*
 *---------------------------------------------------------------------------
 *
 * FillUnicodeRep --
 *







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
<
<

|
|
|
<
<
<
<

>
|
<
<
<
<
<
<
<
<
|
>
|
<
|
<
|
>
>
|
>
>

<
|
>
|
|
>
|
>
|
<
|
<
|
>
|
<

|
|
|
<
<
|
<
|
<
>
|
<
<
<
<
>
|
>
<
<
<
<
<
<
|
|
>
>
|
>
>
>
>
>
|
<
<
|
<
<
<
<
|
<
<
|
>
>
>
>
>
|
<
<
|
<
<
<
<
<
<
<
|
<
<
<
>
|
<
|
<
<
<
|
>
|
<

|
>
>
|
|
|
<
|
<
<
<
<
|
|







2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705


2706
2707
2708
2709




2710
2711
2712








2713
2714
2715

2716

2717
2718
2719
2720
2721
2722
2723

2724
2725
2726
2727
2728
2729
2730
2731

2732

2733
2734
2735

2736
2737
2738
2739


2740

2741

2742
2743




2744
2745
2746






2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757


2758




2759


2760
2761
2762
2763
2764
2765
2766


2767







2768



2769
2770

2771



2772
2773
2774

2775
2776
2777
2778
2779
2780
2781

2782




2783
2784
2785
2786
2787
2788
2789
2790
2791
 *	argument with modifications done in place.
 *
 * Side effects:
 *	May allocate a new Tcl_Obj.
 *
 *---------------------------------------------------------------------------
 */

void
ReverseBytes(
    unsigned char *to,		/* Copy bytes into here... */
    unsigned char *from,	/* ...from here... */
    int count)		/* Until this many are copied, */
				/* reversing as you go. */
{
    unsigned char *src = from + count - 1;
    if (to == from) {
	/* Reversing in place */
	while (to < src) {
	    unsigned char c = *src;
	    *src-- = *to;
	    *to++ = c;
	}
    }  else {
	while (src >= from) {
	    *to++ = *src--;
	}
    }
}

void
ReverseUniChars(
    Tcl_UniChar *to,		/* Copy Tcl_UniChars into here... */
    Tcl_UniChar *from,		/* ...from here... */
    unsigned int count)		/* Until this many are copied, */
				/* reversing as you go. */
{
    Tcl_UniChar *src = from + count - 1;
    if (to == from) {
	/* Reversing in place */
	from += count - 1;
	while (to < src) {
	    Tcl_UniChar c = *src;
	    *src-- = *to;
	    *to++ = c;
	}
    }  else {
	while (src >= from) {
	    *to++ = *src--;
	}
    }
}

Tcl_Obj *
TclStringObjReverse(
    Tcl_Obj *objPtr)
{
    String *stringPtr;



    if (TclIsPureByteArray(objPtr)) {
	int numBytes;
	unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);





	if (Tcl_IsShared(objPtr)) {
	    objPtr = Tcl_NewByteArrayObj(NULL, numBytes);








	}
	ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
	return objPtr;

    }


    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);

    if (stringPtr->hasUnicode) {
	Tcl_UniChar *from = Tcl_GetUnicode(objPtr);


	if (Tcl_IsShared(objPtr)) {
	    /*
	     * Create a non-empty, pure unicode value, so we can coax
	     * Tcl_SetObjLength into growing the unicode rep buffer.
	     */

	    Tcl_UniChar ch = 0;
	    objPtr = Tcl_NewUnicodeObj(&ch, 1);

	    Tcl_SetObjLength(objPtr, stringPtr->numChars);

	}
	ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars);
    }


    if (objPtr->bytes) {
	int numChars = stringPtr->numChars;
	int numBytes = objPtr->length;


	char *to, *from = objPtr->bytes;



	if (Tcl_IsShared(objPtr)) {
	    objPtr = Tcl_NewObj();




	    Tcl_SetObjLength(objPtr, numBytes);
	}
	to = objPtr->bytes;







	if (numChars < numBytes) {
	    /*
	     * Either numChars == -1 and we don't know how many chars are
	     * represented by objPtr->bytes and we need Pass 1 just in case,
	     * or numChars >= 0 and we know we have fewer chars than bytes,
	     * so we know there's a multibyte character needing Pass 1.
	     *
	     * Pass 1. Reverse the bytes of each multi-byte character.
	     */
	    int charCount = 0;


	    int bytesLeft = numBytes;







	    while (bytesLeft) {
		/*
		 * NOTE: We know that the from buffer is NUL-terminated.
		 * It's part of the contract for objPtr->bytes values.
		 * Thus, we can skip calling Tcl_UtfCharComplete() here.
		 */
		Tcl_UniChar ch = 0;


		int bytesInChar = Tcl_UtfToUniChar(from, &ch);











		ReverseBytes((unsigned char *)to, (unsigned char *)from,
			bytesInChar);

		to += bytesInChar;



		from += bytesInChar;
		bytesLeft -= bytesInChar;
		charCount++;

	    }

	    from = to = objPtr->bytes;
	    stringPtr->numChars = charCount;
	}
	/* Pass 2. Reverse all the bytes. */
	ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);

    }





    return objPtr;
}

/*
 *---------------------------------------------------------------------------
 *
 * FillUnicodeRep --
 *

Changes to tests/string.test.

1622
1623
1624
1625
1626
1627
1628








1629
1630
1631
1632
1633
1634
1635
    set x \ubeef
    set y \udead
    string is ascii [string reverse $x$y]
} 0
test string-24.13 {string reverse command - pure Unicode string} {
    string reverse [string range \ubeef\udead\ubeef\udead\ubeef\udead 1 5]
} \udead\ubeef\udead\ubeef\udead









test string-25.1 {string is list} {
    string is list {a b c}
} 1
test string-25.2 {string is list} {
    string is list "a \{b c"
} 0







>
>
>
>
>
>
>
>







1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
    set x \ubeef
    set y \udead
    string is ascii [string reverse $x$y]
} 0
test string-24.13 {string reverse command - pure Unicode string} {
    string reverse [string range \ubeef\udead\ubeef\udead\ubeef\udead 1 5]
} \udead\ubeef\udead\ubeef\udead
test string-24.14 {string reverse command - pure bytearray} {
    binary scan [string reverse [binary format H* 010203]] H* x
    set x
} 030201
test string-24.15 {string reverse command - pure bytearray} {
    binary scan [tcl::string::reverse [binary format H* 010203]] H* x
    set x
} 030201

test string-25.1 {string is list} {
    string is list {a b c}
} 1
test string-25.2 {string is list} {
    string is list "a \{b c"
} 0