Tcl Source Code

Artifact [2265d7a258]
Login

Artifact 2265d7a258b0cdf0575e47e7f5c26edcbfe653d5:

Attachment "1243354-update.patch" to ticket [1243354fff] added by dgp 2008-09-19 02:10:43.
Index: generic/tclCmdMZ.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclCmdMZ.c,v
retrieving revision 1.169
diff -u -r1.169 tclCmdMZ.c
--- generic/tclCmdMZ.c	31 Jul 2008 20:01:40 -0000	1.169
+++ generic/tclCmdMZ.c	18 Sep 2008 18:57:31 -0000
@@ -1097,8 +1097,7 @@
  * StringFirstCmd --
  *
  *	This procedure is invoked to process the "string first" Tcl command.
- *	See the user documentation for details on what it does. Note that this
- *	command only functions correctly on properly formed Tcl UTF strings.
+ *	See the user documentation for details on what it does.
  *
  * Results:
  *	A standard Tcl result.
@@ -1116,8 +1115,8 @@
     int objc,			/* Number of arguments. */
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
-    Tcl_UniChar *ustring1, *ustring2;
-    int match, start, length1, length2;
+    int i, needleLength, haystackLength, end, start = 0;
+    Tcl_UniChar *needle, *haystack;
 
     if (objc < 3 || objc > 4) {
 	Tcl_WrongNumArgs(interp, 1, objv,
@@ -1125,76 +1124,47 @@
 	return TCL_ERROR;
     }
 
-    /*
-     * We are searching string2 for the sequence string1.
-     */
-
-    match = -1;
-    start = 0;
-    length2 = -1;
-
-    ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
-    ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
+    haystackLength = Tcl_GetCharLength(objv[2]);
 
     if (objc == 4) {
-	/*
-	 * If a startIndex is specified, we will need to fast forward to that
-	 * point in the string before we think about a match.
-	 */
-
-	if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){
+	/* Parse the startIndex value in any supported index format. */
+	if (TclGetIntForIndexM(interp, objv[3], haystackLength - 1, &start)
+		!= TCL_OK) {
 	    return TCL_ERROR;
 	}
 
-	/*
-	 * Reread to prevent shimmering problems.
-	 */
-
-	ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
-	ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
-
-	if (start >= length2) {
-	    goto str_first_done;
-	} else if (start > 0) {
-	    ustring2 += start;
-	    length2 -= start;
-	} else if (start < 0) {
-	    /*
-	     * Invalid start index mapped to string start; Bug #423581
-	     */
-
+	if (start < 0) {
 	    start = 0;
 	}
     }
 
-    if (length1 > 0) {
-	register Tcl_UniChar *p, *end;
-
-	end = ustring2 + length2 - length1 + 1;
-	for (p = ustring2;  p < end;  p++) {
-	    /*
-	     * Scan forward to find the first character.
-	     */
-
-	    if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p,
-		    (unsigned long) length1) == 0)) {
-		match = p - ustring2;
-		break;
-	    }
-	}
+    needle = Tcl_GetUnicodeFromObj(objv[1], &needleLength);
+    haystack = Tcl_GetUnicodeFromObj(objv[2], &haystackLength);
+    end = haystackLength - needleLength;
+
+    if (end < start) {
+	/* Set of search positions is empty; return not found */
+	Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
+	return TCL_OK;
     }
 
-    /*
-     * Compute the character index of the matching string by counting the
-     * number of characters before the match.
-     */
+    if (needleLength == 0) {
+	/* Empty string is found wherever we start looking */
+	Tcl_SetObjResult(interp, Tcl_NewIntObj(start));
+	return TCL_OK;
+    }
 
-    if ((match != -1) && (objc == 4)) {
-	match += start;
+    for (i = start; i <= end; i++) {
+	if ((needle[0] == haystack[i]) && (TclUniCharNcmp(needle, haystack+i,
+		(unsigned long) needleLength) == 0)) {
+	    /* Success!  Return location we found. */
+	    Tcl_SetObjResult(interp, Tcl_NewIntObj(i));
+	    return TCL_OK;
+	}
     }
 
-  str_first_done:
-    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+    /* Not found. */
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
     return TCL_OK;
 }
 
@@ -1204,8 +1174,7 @@
  * StringLastCmd --
  *
  *	This procedure is invoked to process the "string last" Tcl command.
- *	See the user documentation for details on what it does. Note that this
- *	command only functions correctly on properly formed Tcl UTF strings.
+ *	See the user documentation for details on what it does.
  *
  * Results:
  *	A standard Tcl result.
@@ -1223,8 +1192,8 @@
     int objc,			/* Number of arguments. */
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
-    Tcl_UniChar *ustring1, *ustring2, *p;
-    int match, start, length1, length2;
+    int i, needleLength, haystackLength, start=INT_MAX;
+    Tcl_UniChar *needle, *haystack;
 
     if (objc < 3 || objc > 4) {
 	Tcl_WrongNumArgs(interp, 1, objv,
@@ -1232,61 +1201,54 @@
 	return TCL_ERROR;
     }
 
-    /*
-     * We are searching string2 for the sequence string1.
-     */
-
-    match = -1;
-    start = 0;
-    length2 = -1;
-
-    ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
-    ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
+    needleLength = Tcl_GetCharLength(objv[1]);
+    haystackLength = Tcl_GetCharLength(objv[2]);
 
     if (objc == 4) {
-	/*
-	 * If a startIndex is specified, we will need to restrict the string
-	 * range to that char index in the string
-	 */
-
-	if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){
+	/* Parse the startIndex value in any supported index format. */
+	if (TclGetIntForIndexM(interp, objv[3], haystackLength - 1, &start)
+		!= TCL_OK) {
 	    return TCL_ERROR;
 	}
 
 	/*
-	 * Reread to prevent shimmering problems.
+	 * Truncated search guided by the identity:
+	 *	[string last $n $h $i] ==
+	 *		[string last $n [string range $h 0 $i]]
 	 */
+	start += 1 - needleLength;
+    }
 
-	ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
-	ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
+    needle = Tcl_GetUnicodeFromObj(objv[1], &needleLength);
+    haystack = Tcl_GetUnicodeFromObj(objv[2], &haystackLength);
 
-	if (start < 0) {
-	    goto str_last_done;
-	} else if (start < length2) {
-	    p = ustring2 + start + 1 - length1;
-	} else {
-	    p = ustring2 + length2 - length1;
-	}
-    } else {
-	p = ustring2 + length2 - length1;
+    if (start > haystackLength - needleLength) {
+	start = haystackLength - needleLength;
     }
 
-    if (length1 > 0) {
-	for (; p >= ustring2; p--) {
-	    /*
-	     * Scan backwards to find the first character.
-	     */
+    if (start < 0) {
+	/* Set of search positions is empty; return not found */
+	Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
+	return TCL_OK;
+    }
 
-	    if ((*p == *ustring1) && !memcmp(ustring1, p,
-		    sizeof(Tcl_UniChar) * (size_t)length1)) {
-		match = p - ustring2;
-		break;
-	    }
+    if (needleLength == 0) {
+	/* Empty string is found wherever we start looking */
+	Tcl_SetObjResult(interp, Tcl_NewIntObj(start));
+	return TCL_OK;
+    }
+
+    for (i = start; i >= 0; i--) {
+	if ((needle[0] == haystack[i]) && (TclUniCharNcmp(needle, haystack+i,
+		(unsigned long) needleLength) == 0)) {
+	    /* Success!  Return location we found. */
+	    Tcl_SetObjResult(interp, Tcl_NewIntObj(i));
+	    return TCL_OK;
 	}
     }
 
-  str_last_done:
-    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+    /* Not found. */
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
     return TCL_OK;
 }
 
@@ -1296,8 +1258,7 @@
  * StringIndexCmd --
  *
  *	This procedure is invoked to process the "string index" Tcl command.
- *	See the user documentation for details on what it does. Note that this
- *	command only functions correctly on properly formed Tcl UTF strings.
+ *	See the user documentation for details on what it does.
  *
  * Results:
  *	A standard Tcl result.
Index: library/safe.tcl
===================================================================
RCS file: /cvsroot/tcl/tcl/library/safe.tcl,v
retrieving revision 1.17
diff -u -r1.17 safe.tcl
--- library/safe.tcl	25 Jun 2008 17:40:03 -0000	1.17
+++ library/safe.tcl	18 Sep 2008 18:57:31 -0000
@@ -999,19 +999,13 @@
 
     # AliasEncoding is the target of the "encoding" alias in safe interpreters.
 
-    proc AliasEncoding {slave args} {
-
-	set argc [llength $args]
-
-	set okpat "^(name.*|convert.*)\$"
-	set subcommand [lindex $args 0]
-
-	if {[regexp $okpat $subcommand]} {
-	    return [::interp invokehidden $slave encoding {*}$args]
+    proc AliasEncoding {slave option args} {
+	if {[regexp {^(n.*|convert.*)$} $option]} {
+	    return [::interp invokehidden $slave encoding $option {*}$args]
 	}
 
-	if {[string first $subcommand system] == 0} {
-	    if {$argc == 1} {
+	if {[string match s* $option] && [string first $option system] == 0} {
+	    if {[llength $args] == 0} {
 		# passed all the tests , lets source it:
 		if {[catch {::interp invokehidden \
 			$slave encoding system} msg]} {
@@ -1024,7 +1018,7 @@
 		error $msg
 	    }
 	} else {
-	    set msg "wrong # args: should be \"encoding option ?arg ...?\""
+	    set msg "bad option \"$option\": must be convertfrom, convertto, names, or system"
 	    Log $slave $msg
 	    error $msg
 	}
Index: tests/string.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/string.test,v
retrieving revision 1.73
diff -u -r1.73 string.test
--- tests/string.test	19 Jul 2008 22:50:38 -0000	1.73
+++ tests/string.test	18 Sep 2008 18:57:32 -0000
@@ -194,7 +194,7 @@
 } 9
 test string-4.8 {string first} {
     string first "" x123xx345xxx789xxx012
-} -1
+} 0 
 test string-4.9 {string first, unicode} {
     string first x abc\u7266x
 } 4
Index: tests/stringComp.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/stringComp.test,v
retrieving revision 1.16
diff -u -r1.16 stringComp.test
--- tests/stringComp.test	19 Jul 2008 22:50:38 -0000	1.16
+++ tests/stringComp.test	18 Sep 2008 18:57:32 -0000
@@ -250,7 +250,7 @@
 test stringComp-4.8 {string first} {
     proc foo {} {string first "" x123xx345xxx789xxx012}
     foo
-} -1
+} 0
 test stringComp-4.9 {string first, unicode} {
     proc foo {} {string first x abc\u7266x}
     foo