Attachment "patchfile" to
ticket [3408718fff]
added by
anonymous
2015-05-08 20:41:42.
Index: generic/regexec.c
==================================================================
--- generic/regexec.c
+++ generic/regexec.c
@@ -271,15 +271,36 @@
/*
* Copy (portion of) match vector over if necessary.
*/
+ n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
zapSubexpressions(pmatch, nmatch);
- n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
}
+
+ /*
+ * Correct unicode character indices
+ */
+
+ size_t i, startoffset, endoffset;
+ chr *chrPtr;
+ for (i = 0; i < n; i++) {
+ startoffset = 0; endoffset = 0;
+ for (chrPtr=(chr *)string; chrPtr < string+len && chrPtr < string+pmatch[i].rm_eo; chrPtr++) {
+ if ((*chrPtr & 0xfc00) == 0xd800) {
+ if (chrPtr < string+pmatch[i].rm_so) {
+ startoffset++;
+ } else {
+ endoffset++;
+ }
+ }
+ }
+ pmatch[i].rm_so -= startoffset;
+ pmatch[i].rm_eo -= (startoffset + endoffset);
+ }
/*
* Clean up.
*/
Index: generic/tclCmdMZ.c
==================================================================
--- generic/tclCmdMZ.c
+++ generic/tclCmdMZ.c
@@ -363,10 +363,19 @@
resultPtr = Tcl_NewObj();
}
}
for (i = 0; i < objc; i++) {
Tcl_Obj *newPtr;
+
+ int j;
+ Tcl_UniChar *chrv = Tcl_GetUnicode(objPtr);
+ for (j = 0; j < offset; j++) {
+ if ((chrv[j] & 0xfc00) == 0xd800) {
+ info.matches[i].start--;
+ info.matches[i].end--;
+ }
+ }
if (indices) {
int start, end;
Tcl_Obj *objs[2];
@@ -1083,14 +1092,12 @@
Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
for ( ; stringPtr < end; stringPtr += len) {
len = TclUtfToUniChar(stringPtr, &ch);
-
- if (!len) {
- continue;
- }
+
+ if (!len) continue;
/*
* Assume Tcl_UniChar is an integral type...
*/
@@ -2363,17 +2370,25 @@
if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){
return TCL_ERROR;
}
+ int i;
+ for (i = 0; i < length; i++) {
+ if ((ustring[i] & 0xfc00) == 0xd800) {
+ if (i < last) last++;
+ length++;
+ }
+ }
+
if ((last < first) || (last < 0) || (first > length)) {
Tcl_SetObjResult(interp, objv[1]);
} else {
Tcl_Obj *resultPtr;
- ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
- length--;
+ //ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
+ //length--;
if (first < 0) {
first = 0;
}
Index: generic/tclStringObj.c
==================================================================
--- generic/tclStringObj.c
+++ generic/tclStringObj.c
@@ -536,10 +536,11 @@
Tcl_Obj *objPtr, /* The object to get the Unicode charater
* from. */
int index) /* Get the index'th Unicode character. */
{
String *stringPtr;
+ int i, offset = 0;
/*
* Optimize the case where we're really dealing with a bytearray object
* without string representation; we don't need to convert to a string to
* perform the indexing operation.
@@ -570,11 +571,17 @@
return (Tcl_UniChar) objPtr->bytes[index];
}
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
- return (int) stringPtr->unicode[index];
+ for (i = 0; i < index + offset; i++) {
+ if ((stringPtr->unicode[i] & 0xfc00) == 0xd800) offset++;
+ }
+ if ((stringPtr->unicode[index + offset] & 0xfc00) == 0xd800) {
+ return (((stringPtr->unicode[index + offset] & 0x3ff) << 10) | (stringPtr->unicode[index + offset + 1] & 0x3ff)) + 0x10000;
+ }
+ return (int) stringPtr->unicode[index + offset];
}
/*
*----------------------------------------------------------------------
*
@@ -725,11 +732,11 @@
lastoffset++;
}
}
}
- return Tcl_NewUnicodeObj(stringPtr->unicode + first + firstoffset, last-first+1 + lastoffset + firstoffset);
+ return Tcl_NewUnicodeObj(stringPtr->unicode + first + firstoffset, last-first+1 + lastoffset);
}
/*
*----------------------------------------------------------------------
*
@@ -1747,11 +1754,11 @@
Tcl_UniChar ch = 0;
static const char *mixedXPG =
"cannot mix \"%\" and \"%n$\" conversion specifiers";
static const char *const badIndex[2] = {
"not enough arguments for all format specifiers",
- "\"%n$\" argument index out of range"
+ "\"%n$\" argument index out of range" //"
};
static const char *overflow = "max size for a Tcl value exceeded";
if (Tcl_IsShared(appendObj)) {
Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
@@ -2875,24 +2882,28 @@
const char *bytes,
int numBytes,
int numAppendChars)
{
String *stringPtr = GET_STRING(objPtr);
- int incr, needed, numOrigChars = 0;
+ int i, incr, needed, numAppendChars2, numOrigChars = 0, offset = 0;
Tcl_UniChar *dst, unichar = 0;
if (stringPtr->hasUnicode) {
numOrigChars = stringPtr->numChars;
}
if (numAppendChars == -1) {
TclNumUtfChars(numAppendChars, bytes, numBytes);
}
+ numAppendChars2 = numAppendChars;
+ for (i = numOrigChars; numAppendChars2-- + offset > 0; i++) {
+ if ((stringPtr->unicode[i] & 0xfc00) == 0xd800) offset++;
+ }
needed = numOrigChars + numAppendChars;
- stringCheckLimits(needed);
-
- if (needed > stringPtr->maxChars) {
- GrowUnicodeBuffer(objPtr, needed);
+ stringCheckLimits(needed + offset);
+
+ if (needed + offset > stringPtr->maxChars) {
+ GrowUnicodeBuffer(objPtr, needed + offset);
stringPtr = GET_STRING(objPtr);
}
stringPtr->hasUnicode = 1;
if (bytes) {