Tcl Source Code

Artifact [0e2f950eeb]
Login

Artifact 0e2f950eebe9bf76604339cb74d515a3915920b5:

Attachment "None" to ticket [403709ffff] added by dkf 2001-02-09 18:30:17.
Index: ChangeLog
===================================================================
RCS file: /cvsroot/tcl/tcl/ChangeLog,v
retrieving revision 1.367
diff -u -r1.367 ChangeLog
--- ChangeLog	2001/02/01 00:58:45	1.367
+++ ChangeLog	2001/02/15 17:09:10
@@ -1,3 +1,12 @@
+2001-02-15  Donal K. Fellows  <[email protected]>
+
+	* generic/tclCmdMZ.c (Tcl_SplitObjCmd): Improved efficiency of
+	splitting strings into individual characters by adding hash so
+	that only one Tcl_Obj per character is created.  Improves
+	performance of splitting of short strings and makes a huge
+	difference to splitting of long strings, such as is done in the
+	mime package in tcllib.  [Bug #131523]
+
 2001-01-31  Don Porter  <[email protected]>
 	* win/makefile.vc (install-libraries):  Corrected misdirected
 	install directory for the msgcat 1.2 package.
Index: generic/tclCmdMZ.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclCmdMZ.c,v
retrieving revision 1.30
diff -u -r1.30 tclCmdMZ.c
--- generic/tclCmdMZ.c	2000/09/20 01:50:38	1.30
+++ generic/tclCmdMZ.c	2001/02/15 17:09:11
@@ -939,15 +939,34 @@
 	 * Do nothing.
 	 */
     } else if (splitCharLen == 0) {
+	Tcl_HashTable charReuseTable;
+	Tcl_HashEntry *hPtr;
+	int isNew;
+
 	/*
 	 * Handle the special case of splitting on every character.
+	 *
+	 * Uses a hash table to ensure that each kind of character has
+	 * only one Tcl_Obj instance (multiply-referenced) in the
+	 * final list.  This is a *major* win when splitting on a long
+	 * string (especially in the megabyte range!) - DKF
 	 */
 
+	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
 	for ( ; string < end; string += len) {
 	    len = Tcl_UtfToUniChar(string, &ch);
-	    objPtr = Tcl_NewStringObj(string, len);
+	    /* Assume Tcl_UniChar is an integral type... */
+	    hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0 + ch, &isNew);
+	    if (isNew) {
+		objPtr = Tcl_NewStringObj(string, len);
+		/* Don't need to fiddle with refcount... */
+		Tcl_SetHashValue(hPtr, (ClientData) objPtr);
+	    } else {
+		objPtr = (Tcl_Obj*) Tcl_GetHashValue(hPtr);
+	    }
 	    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
 	}
+	Tcl_DeleteHashTable(&charReuseTable);
     } else {
 	char *element, *p, *splitEnd;
 	int splitLen;