Attachment "tcl_bug_2826551.patch" to
ticket [2948425fff]
added by
mdejong
2010-02-09 18:26:25.
2010-02-09 Mo DeJong <[email protected]>
Tcl Bug 2826551 regexp bugs related to
-all -line and -start and newlines
* generic/tclCmdMZ.c (Tcl_RegexpObjCmd):
If -offset is given, treat it as the start
of the line if the previous character was
a newline. Fix nasty edge case where a zero
length match would not advance the index.
* tests/regexp.test: Add regression tests back
ported from Jacl. Checks for a number of issues
related to -line and newline handling. A few
of tests were broken before the patch and
continue to be broken, marked as knownBug.
Index: generic/tclCmdMZ.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclCmdMZ.c,v
retrieving revision 1.198
diff -u -r1.198 tclCmdMZ.c
--- generic/tclCmdMZ.c 25 Dec 2009 22:45:05 -0000 1.198
+++ generic/tclCmdMZ.c 9 Feb 2010 11:14:20 -0000
@@ -108,7 +108,7 @@
Tcl_Obj *const objv[]) /* Argument objects. */
{
int i, indices, match, about, offset, all, doinline, numMatchesSaved;
- int cflags, eflags, stringLength;
+ int cflags, eflags, stringLength, matchLength;
Tcl_RegExp regExpr;
Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL;
Tcl_RegExpInfo info;
@@ -250,15 +250,6 @@
return TCL_ERROR;
}
- if (offset > 0) {
- /*
- * Add flag if using offset (string is part of a larger string), so
- * that "^" won't match.
- */
-
- eflags |= TCL_REG_NOTBOL;
- }
-
objc -= 2;
objv += 2;
@@ -286,11 +277,25 @@
*/
while (1) {
+ /*
+ * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing
+ * TCL_REG_NOTBOL indicates that the character at offset
+ * should not be considered the start of the line. If
+ * for example the pattern {^} is passed and -start is positive,
+ * then the pattern will not match the start of the string
+ * unless the previous character is a newline.
+ */
+
+ if ((offset == 0) ||
+ ((offset > 0) &&
+ (Tcl_GetUniChar(objPtr,offset-1) == (Tcl_UniChar)'\n'))) {
+ eflags = 0;
+ } else {
+ eflags = TCL_REG_NOTBOL;
+ }
+
match = Tcl_RegExpExecObj(interp, regExpr, objPtr,
- offset /* offset */, numMatchesSaved, eflags
- | ((offset > 0 &&
- (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n'))
- ? TCL_REG_NOTBOL : 0));
+ offset, numMatchesSaved, eflags);
if (match < 0) {
return TCL_ERROR;
@@ -408,12 +413,19 @@
* offset never changes).
*/
- if (info.matches[0].end == 0) {
+ matchLength = (info.matches[0].end - info.matches[0].start);
+
+ offset += info.matches[0].end;
+
+ /*
+ * A match of length zero could happen for {^} {$} or {.*}
+ * and in these cases we always want to bump the index up one.
+ */
+
+ if (matchLength == 0) {
offset++;
}
- offset += info.matches[0].end;
all++;
- eflags |= TCL_REG_NOTBOL;
if (offset >= stringLength) {
break;
}
Index: tests/regexp.test
===================================================================
RCS file: /cvsroot/tcl/tcl/tests/regexp.test,v
retrieving revision 1.34
diff -u -r1.34 regexp.test
--- tests/regexp.test 21 Sep 2009 21:30:41 -0000 1.34
+++ tests/regexp.test 9 Feb 2010 11:14:21 -0000
@@ -311,12 +311,39 @@
test regexp-7.18 {basic regsub replacement} {
list [regsub a+ aaa {&} foo] $foo
} {1 aaa}
-test regexp-7.19 {basic regsub backslash replacement} {
+test regexp-7.19 {basic regsub replacement} {
+ list [regsub a+ aaa {\&} foo] $foo
+} {1 &}
+test regexp-7.20 {basic regsub replacement} {
+ list [regsub a+ aaa {\\&} foo] $foo
+} {1 {\aaa}}
+test regexp-7.21 {basic regsub replacement} {
+ list [regsub a+ aaa {\\\&} foo] $foo
+} {1 {\&}}
+test regexp-7.22 {basic regsub replacement} {
list [regsub a+ aaa {\0} foo] $foo
} {1 aaa}
-test regexp-7.20 {basic regsub backslash replacement} {
+test regexp-7.23 {basic regsub replacement} {
+ list [regsub a+ aaa {\\0} foo] $foo
+} {1 {\0}}
+test regexp-7.24 {basic regsub replacement} {
list [regsub a+ aaa {\\\0} foo] $foo
} {1 {\aaa}}
+test regexp-7.25 {basic regsub replacement} {
+ list [regsub a+ aaa {\\\\0} foo] $foo
+} {1 {\\0}}
+test regexp-7.26 {dollar zero is not a backslash replacement} {
+ list [regsub a+ aaa {$0} foo] $foo
+} {1 {$0}}
+test regexp-7.27 {dollar zero is not a backslash replacement} {
+ list [regsub a+ aaa {\0$0} foo] $foo
+} {1 {aaa$0}}
+test regexp-7.28 {dollar zero is not a backslash replacement} {
+ list [regsub a+ aaa {\$0} foo] $foo
+} {1 {\$0}}
+test regexp-7.29 {dollar zero is not a backslash replacement} {
+ list [regsub a+ aaa {\\} foo] $foo
+} {1 \\}
test regexp-8.1 {case conversion in regsub} {
list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
@@ -700,6 +727,337 @@
regexp -- {([bc])\1} bb
} 1
+test regexp-23.1 {regexp -all and -line} {
+ set string ""
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1}] \
+ [list {0 -1}] \
+ [list {0 -1}] \
+ ]
+
+test regexp-23.2 {regexp -all and -line} {
+ set string "\n"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1}] \
+ [list {0 -1}] \
+ [list {0 -1}] \
+ ]
+
+test regexp-23.3 {regexp -all and -line} {
+ set string "\n\n"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {1 0}] \
+ [list {0 -1} {1 0}] \
+ [list {0 -1} {1 0}] \
+ ]
+
+test regexp-23.4 {regexp -all and -line} {
+ set string "a"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1}] \
+ [list {0 0}] \
+ [list {1 0}] \
+ ]
+
+test regexp-23.5 {regexp -all and -line} {knownBug} {
+ set string "a\n"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {2 1}] \
+ [list {0 0} {2 1}] \
+ [list {1 0} {2 1}] \
+ ]
+
+test regexp-23.6 {regexp -all and -line} {
+ set string "\na"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {1 0}] \
+ [list {0 -1} {1 1}] \
+ [list {0 -1} {2 1}] \
+ ]
+
+test regexp-23.7 {regexp -all and -line} {knownBug} {
+ set string "ab\n"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {3 2}] \
+ [list {0 1} {3 2}] \
+ [list {2 1} {3 2}] \
+ ]
+
+test regexp-23.8 {regexp -all and -line} {
+ set string "a\nb"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {2 1}] \
+ [list {0 0} {2 2}] \
+ [list {1 0} {3 2}] \
+ ]
+
+test regexp-23.9 {regexp -all and -line} {knownBug} {
+ set string "a\nb\n"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {2 1} {4 3}] \
+ [list {0 0} {2 2} {4 3}] \
+ [list {1 0} {3 2} {4 3}] \
+ ]
+
+test regexp-23.10 {regexp -all and -line} {
+ set string "a\nb\nc"
+ list \
+ [regexp -all -inline -indices -line -- {^} $string] \
+ [regexp -all -inline -indices -line -- {^.*$} $string] \
+ [regexp -all -inline -indices -line -- {$} $string]
+} [list \
+ [list {0 -1} {2 1} {4 3}] \
+ [list {0 0} {2 2} {4 4}] \
+ [list {1 0} {3 2} {5 4}] \
+ ]
+
+test regexp-23.11 {regexp -all and -line} {
+ regexp -all -inline -indices -line -- {b} "abb\nb"
+} {{1 1} {2 2} {4 4}}
+
+
+test regexp-24.1 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string ""
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} {1 <> 1 <> 1 <>}
+
+test regexp-24.2 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "\n"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 2 "<>\n<>" 2 "<>\n<>" 2 "<>\n<>"]
+
+test regexp-24.3 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "\n\n"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 3 "<>\n<>\n<>" 3 "<>\n<>\n<>" 3 "<>\n<>\n<>"]
+
+test regexp-24.4 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "a"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 1 "<>a" 1 "<a>" 1 "a<>"]
+
+test regexp-24.5 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "a\n"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 2 "<>a\n<>" 2 "<a>\n<>" 2 "a<>\n<>"]
+
+test regexp-24.6 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "\na"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 2 "<>\n<>a" 2 "<>\n<a>" 2 "<>\na<>"]
+
+test regexp-24.7 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "ab\n"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 2 "<>ab\n<>" 2 "<ab>\n<>" 2 "ab<>\n<>"]
+
+test regexp-24.8 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "a\nb"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 2 "<>a\n<>b" 2 "<a>\n<b>" 2 "a<>\nb<>"]
+
+test regexp-24.9 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "a\nb\n"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 3 "<>a\n<>b\n<>" 3 "<a>\n<b>\n<>" 3 "a<>\nb<>\n<>"]
+
+test regexp-24.10 {regsub -all and -line} {
+ foreach {v1 v2 v3} {{} {} {}} {}
+ set string "a\nb\nc"
+ list \
+ [regsub -line -all {^} $string {<&>} v1] $v1 \
+ [regsub -line -all {^.*$} $string {<&>} v2] $v2 \
+ [regsub -line -all {$} $string {<&>} v3] $v3
+} [list 3 "<>a\n<>b\n<>c" 3 "<a>\n<b>\n<c>" 3 "a<>\nb<>\nc<>"]
+
+test regexp-24.11 {regsub -all and -line} {
+ regsub -line -all {b} "abb\nb" {<&>}
+} "a<b><b>\n<b>"
+
+
+test regexp-25.1 {regexp without -line option} {
+ set foo ""
+ list [regexp {a.*b} "dabc\naxyb\n" foo] $foo
+} [list 1 abc\naxyb]
+
+test regexp-25.2 {regexp without -line option} {
+ set foo ""
+ list [regexp {^a.*b$} "dabc\naxyb\n" foo] $foo
+} {0 {}}
+
+test regexp-25.3 {regexp with -line option} {
+ set foo ""
+ list [regexp -line {^a.*b$} "dabc\naxyb\n" foo] $foo
+} {1 axyb}
+
+test regexp-25.4 {regexp with -line option} {
+ set foo ""
+ list [regexp -line {^a.*b$} "dabc\naxyb\nxb" foo] $foo
+} {1 axyb}
+
+test regexp-25.5 {regexp without -line option} {
+ set foo ""
+ list [regexp {^a.*b$} "dabc\naxyb\nxb" foo] $foo
+} {0 {}}
+
+test regexp-25.6 {regexp without -line option} {
+ set foo ""
+ list [regexp {a.*b$} "dabc\naxyb\nxb" foo] $foo
+} "1 {abc\naxyb\nxb}"
+
+test regexp-25.7 {regexp with -lineanchor option} {
+ set foo ""
+ list [regexp -lineanchor {^a.*b$} "dabc\naxyb\nxb" foo] $foo
+} "1 {axyb\nxb}"
+
+test regexp-25.8 {regexp with -lineanchor and -linestop option} {
+ set foo ""
+ list [regexp -lineanchor -linestop {^a.*b$} "dabc\naxyb\nxb" foo] $foo
+} {1 axyb}
+
+test regexp-25.9 {regexp with -linestop option} {
+ set foo ""
+ list [regexp -linestop {a.*b} "ab\naxyb\nxb" foo] $foo
+} {1 ab}
+
+test regexp-26.1 {matches start of line 1 time} {
+ regexp -all -inline -- {^a+} "aab\naaa"
+} {aa}
+
+test regexp-26.2 {matches start of line(s) 2 times} {
+ regexp -all -inline -line -- {^a+} "aab\naaa"
+} {aa aaa}
+
+test regexp-26.3 {effect of -line -all and -start} {
+ list \
+ [regexp -all -inline -line -start 0 -- {^a+} "aab\naaa"] \
+ [regexp -all -inline -line -start 1 -- {^a+} "aab\naaa"] \
+ [regexp -all -inline -line -start 3 -- {^a+} "aab\naaa"] \
+ [regexp -all -inline -line -start 4 -- {^a+} "aab\naaa"] \
+} {{aa aaa} aaa aaa aaa}
+
+test regexp-26.5 {match length 0, match length 1} {
+ regexp -all -inline -line -- {^b*} "a\nb"
+} {{} b}
+
+test regexp-26.6 {non reporting capture group} {
+ regexp -all -inline -line -- {^(?:a+|b)} "aab\naaa"
+} {aa aaa}
+
+test regexp-26.7 {Tcl bug 2826551: -line sensitive regexp and -start} {
+ set match1 {}
+ set match2 {}
+ list \
+ [regexp -start 0 -indices -line {^a} "\nab" match1] $match1 \
+ [regexp -start 1 -indices -line {^a} "\nab" match2] $match2
+} {1 {1 1} 1 {1 1}}
+
+test regexp-26.8 {Tcl bug 2826551: diff regexp with -line option} {
+ set data "@1\n2\n+3\n@4\n-5\n+6\n7\n@8\n9\n"
+ regexp -all -inline -line {^@.*\n(?:[^@].*\n?)*} $data
+} [list \
+ "@1\n2\n+3\n" \
+ "@4\n-5\n+6\n7\n" \
+ "@8\n9\n" \
+ ]
+
+test regexp-26.9 {Tcl bug 2826551: diff regexp with embedded -line option} {
+ set data "@1\n2\n+3\n@4\n-5\n+6\n7\n@8\n9\n"
+ regexp -all -inline {(?n)^@.*\n(?:[^@].*\n?)*} $data
+} [list \
+ "@1\n2\n+3\n" \
+ "@4\n-5\n+6\n7\n" \
+ "@8\n9\n" \
+ ]
+
+test regexp-26.10 {regexp with -line option} {
+ regexp -all -inline -line -- {a*} "a\n"
+} {a {}}
+
+test regexp-26.11 {regexp without -line option} {
+ regexp -all -inline -- {a*} "a\n"
+} {a {}}
+
+test regexp-26.12 {regexp with -line option} {
+ regexp -all -inline -line -- {a*} "b\n"
+} {{} {}}
+
+test regexp-26.13 {regexp without -line option} {
+ regexp -all -inline -- {a*} "b\n"
+} {{} {}}
+
# cleanup
::tcltest::cleanupTests
return