Tcl Source Code

Artifact [37e68e3782]
Login

Artifact 37e68e3782bdbef64333935ef8718a28f0e239e2:

Attachment "patchfile" to ticket [3408718fff] added by anonymous 2015-05-08 20:41:42. (unpublished)
Index: generic/regexec.c
==================================================================
--- generic/regexec.c
+++ generic/regexec.c
@@ -271,15 +271,36 @@
 
     /*
      * Copy (portion of) match vector over if necessary.
      */
 
+    n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
     if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
 	zapSubexpressions(pmatch, nmatch);
-	n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
 	memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
     }
+
+    /*
+     * Correct unicode character indices
+     */
+
+    size_t i, startoffset, endoffset;
+    chr *chrPtr;
+    for (i = 0; i < n; i++) {
+        startoffset = 0; endoffset = 0;
+        for (chrPtr=(chr *)string; chrPtr < string+len && chrPtr < string+pmatch[i].rm_eo; chrPtr++) {
+            if ((*chrPtr & 0xfc00) == 0xd800) {
+                if (chrPtr < string+pmatch[i].rm_so) {
+                    startoffset++;
+                } else {
+                    endoffset++;
+                }
+            }
+        }
+        pmatch[i].rm_so -= startoffset;
+        pmatch[i].rm_eo -= (startoffset + endoffset);
+    }
 
     /*
      * Clean up.
      */
 

Index: generic/tclCmdMZ.c
==================================================================
--- generic/tclCmdMZ.c
+++ generic/tclCmdMZ.c
@@ -363,10 +363,19 @@
 		resultPtr = Tcl_NewObj();
 	    }
 	}
 	for (i = 0; i < objc; i++) {
 	    Tcl_Obj *newPtr;
+
+	    int j;
+	    Tcl_UniChar *chrv = Tcl_GetUnicode(objPtr);
+	    for (j = 0; j < offset; j++) {
+	        if ((chrv[j] & 0xfc00) == 0xd800) {
+	            info.matches[i].start--;
+	            info.matches[i].end--;
+	        }
+	    }
 
 	    if (indices) {
 		int start, end;
 		Tcl_Obj *objs[2];
 
@@ -1083,14 +1092,12 @@
 
 	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
 
 	for ( ; stringPtr < end; stringPtr += len) {
 	    len = TclUtfToUniChar(stringPtr, &ch);
-	    
-	    if (!len) {
-	        continue;
-	    }
+
+	    if (!len) continue;
 
 	    /*
 	     * Assume Tcl_UniChar is an integral type...
 	     */
 
@@ -2363,17 +2370,25 @@
     if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
 	    TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){
 	return TCL_ERROR;
     }
 
+    int i;
+    for (i = 0; i < length; i++) {
+        if ((ustring[i] & 0xfc00) == 0xd800) {
+            if (i < last) last++;
+            length++;
+        }
+    }
+    
     if ((last < first) || (last < 0) || (first > length)) {
 	Tcl_SetObjResult(interp, objv[1]);
     } else {
 	Tcl_Obj *resultPtr;
 
-	ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
-	length--;
+	//ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
+	//length--;
 
 	if (first < 0) {
 	    first = 0;
 	}
 

Index: generic/tclStringObj.c
==================================================================
--- generic/tclStringObj.c
+++ generic/tclStringObj.c
@@ -536,10 +536,11 @@
     Tcl_Obj *objPtr,		/* The object to get the Unicode charater
 				 * from. */
     int index)			/* Get the index'th Unicode character. */
 {
     String *stringPtr;
+    int i, offset = 0;
 
     /*
      * Optimize the case where we're really dealing with a bytearray object
      * without string representation; we don't need to convert to a string to
      * perform the indexing operation.
@@ -570,11 +571,17 @@
 	    return (Tcl_UniChar) objPtr->bytes[index];
 	}
 	FillUnicodeRep(objPtr);
 	stringPtr = GET_STRING(objPtr);
     }
-    return (int) stringPtr->unicode[index];
+    for (i = 0; i < index + offset; i++) {
+        if ((stringPtr->unicode[i] & 0xfc00) == 0xd800) offset++;
+    }
+    if ((stringPtr->unicode[index + offset] & 0xfc00) == 0xd800) {
+        return (((stringPtr->unicode[index + offset] & 0x3ff) << 10) | (stringPtr->unicode[index + offset + 1] & 0x3ff)) + 0x10000;
+    }
+    return (int) stringPtr->unicode[index + offset];
 }
 
 /*
  *----------------------------------------------------------------------
  *
@@ -725,11 +732,11 @@
                 lastoffset++;
             }
         }
     }
 
-    return Tcl_NewUnicodeObj(stringPtr->unicode + first + firstoffset, last-first+1 + lastoffset + firstoffset);
+    return Tcl_NewUnicodeObj(stringPtr->unicode + first + firstoffset, last-first+1 + lastoffset);
 }
 
 /*
  *----------------------------------------------------------------------
  *
@@ -1747,11 +1754,11 @@
     Tcl_UniChar ch = 0;
     static const char *mixedXPG =
 	    "cannot mix \"%\" and \"%n$\" conversion specifiers";
     static const char *const badIndex[2] = {
 	"not enough arguments for all format specifiers",
-	"\"%n$\" argument index out of range"
+	"\"%n$\" argument index out of range" //"
     };
     static const char *overflow = "max size for a Tcl value exceeded";
 
     if (Tcl_IsShared(appendObj)) {
 	Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
@@ -2875,24 +2882,28 @@
     const char *bytes,
     int numBytes,
     int numAppendChars)
 {
     String *stringPtr = GET_STRING(objPtr);
-    int incr, needed, numOrigChars = 0;
+    int i, incr, needed, numAppendChars2, numOrigChars = 0, offset = 0;
     Tcl_UniChar *dst, unichar = 0;
 
     if (stringPtr->hasUnicode) {
 	numOrigChars = stringPtr->numChars;
     }
     if (numAppendChars == -1) {
 	TclNumUtfChars(numAppendChars, bytes, numBytes);
     }
+    numAppendChars2 = numAppendChars;
+    for (i = numOrigChars; numAppendChars2-- + offset > 0; i++) {
+        if ((stringPtr->unicode[i] & 0xfc00) == 0xd800) offset++;
+    }
     needed = numOrigChars + numAppendChars;
-    stringCheckLimits(needed);
-	
-    if (needed > stringPtr->maxChars) {
-	GrowUnicodeBuffer(objPtr, needed);
+    stringCheckLimits(needed + offset);
+
+    if (needed + offset > stringPtr->maxChars) {
+	GrowUnicodeBuffer(objPtr, needed + offset);
 	stringPtr = GET_STRING(objPtr);
     }
 
     stringPtr->hasUnicode = 1;
     if (bytes) {