Attachment "comment-words.diff" to
ticket [3522426fff]
added by
lars_h
2012-04-30 03:03:03.
Index: generic/tcl.h
===================================================================
--- generic/tcl.h
+++ generic/tcl.h
@@ -1998,10 +1998,15 @@
* TCL_TOKEN_EXPAND_WORD - This token is just like TCL_TOKEN_WORD except
* that it marks a word that began with the
* literal character prefix "{*}". This word is
* marked to be expanded - that is, broken into
* words after substitution is complete.
+ * TCL_TOKEN_COMMENT_WORD - This token is just like TCL_TOKEN_WORD except
+ * that it marks a word that began with the
+ * literal character prefix "{#}". This word is
+ * marked to be ignored - that is, treated as
+ * if it denotes an expansion of the empty list.
*/
#define TCL_TOKEN_WORD 1
#define TCL_TOKEN_SIMPLE_WORD 2
#define TCL_TOKEN_TEXT 4
@@ -2009,10 +2014,11 @@
#define TCL_TOKEN_COMMAND 16
#define TCL_TOKEN_VARIABLE 32
#define TCL_TOKEN_SUB_EXPR 64
#define TCL_TOKEN_OPERATOR 128
#define TCL_TOKEN_EXPAND_WORD 256
+#define TCL_TOKEN_COMMENT_WORD 512
/*
* Parsing error types. On any parsing error, one of these values will be
* stored in the error field of the Tcl_Parse structure defined below.
*/
Index: generic/tclBasic.c
===================================================================
--- generic/tclBasic.c
+++ generic/tclBasic.c
@@ -5147,13 +5147,25 @@
iPtr->evalFlags = 0;
if (code != TCL_OK) {
break;
}
+
+ if (tokenPtr->type == TCL_TOKEN_COMMENT_WORD) {
+ /*
+ * TIP #???. Word comments are handled by pretending
+ * that they are expansions of the empty list.
+ * There is probably a less roundabout way to achieve
+ * the same end, though.
+ */
+
+ Tcl_ResetResult(interp);
+ }
+
objv[objectsUsed] = Tcl_GetObjResult(interp);
Tcl_IncrRefCount(objv[objectsUsed]);
- if (tokenPtr->type == TCL_TOKEN_EXPAND_WORD) {
+ if (tokenPtr->type & (TCL_TOKEN_EXPAND_WORD | TCL_TOKEN_COMMENT_WORD)) {
int numElements;
code = TclListObjLength(interp, objv[objectsUsed],
&numElements);
if (code == TCL_ERROR) {
Index: generic/tclCompile.c
===================================================================
--- generic/tclCompile.c
+++ generic/tclCompile.c
@@ -1583,11 +1583,11 @@
*/
for (wordIdx = 0, tokenPtr = parsePtr->tokenPtr;
wordIdx < parsePtr->numWords;
wordIdx++, tokenPtr += tokenPtr->numComponents + 1) {
- if (tokenPtr->type == TCL_TOKEN_EXPAND_WORD) {
+ if (tokenPtr->type & (TCL_TOKEN_EXPAND_WORD | TCL_TOKEN_COMMENT_WORD)) {
expand = 1;
break;
}
}
@@ -1640,10 +1640,12 @@
TclCompileTokens(interp, tokenPtr+1,
tokenPtr->numComponents, envPtr);
if (tokenPtr->type == TCL_TOKEN_EXPAND_WORD) {
TclEmitInstInt4(INST_EXPAND_STKTOP,
envPtr->currStackDepth, envPtr);
+ } else if (tokenPtr->type == TCL_TOKEN_COMMENT_WORD) {
+ TclEmitOpcode(INST_POP, envPtr);
}
continue;
}
/*
Index: generic/tclParse.c
===================================================================
--- generic/tclParse.c
+++ generic/tclParse.c
@@ -295,11 +295,13 @@
* iteration through the loop.
*/
parsePtr->commandStart = src;
while (1) {
- int expandWord = 0;
+ int expandWord = 0; /* 0 = ordinary word,
+ * 1 = word with {*} prefix,
+ * 2 = word with {#} prefix. */
/*
* Create the token for the word.
*/
@@ -329,12 +331,13 @@
parsePtr->numTokens++;
parsePtr->numWords++;
/*
* At this point the word can have one of four forms: something
- * enclosed in quotes, something enclosed in braces, and expanding
- * word, or an unquoted word (anything else).
+ * enclosed in quotes, something enclosed in braces, a word with
+ * prefix (expanding or comment word), or an unquoted word
+ * (anything else).
*/
parseWord:
if (*src == '"') {
if (Tcl_ParseQuotedString(interp, src, numBytes, parsePtr, 1,
@@ -353,28 +356,29 @@
}
src = termPtr;
numBytes = parsePtr->end - src;
/*
- * Check whether the braces contained the word expansion prefix
- * {*}
+ * Check whether the braces contained the word expansion
+ * prefix {*} or the comment word prefix {#}.
*/
expPtr = &parsePtr->tokenPtr[expIdx];
if ((0 == expandWord)
/* Haven't seen prefix already */
&& (1 == parsePtr->numTokens - expIdx)
/* Only one token */
- && (((1 == (size_t) expPtr->size)
+ && ((1 == (size_t) expPtr->size)
/* Same length as prefix */
- && (expPtr->start[0] == '*')))
+ && ((expPtr->start[0] == '*')
+ || (expPtr->start[0] == '#')))
/* Is the prefix */
&& (numBytes > 0) && (0 == ParseWhiteSpace(termPtr,
numBytes, &parsePtr->incomplete, &type))
&& (type != TYPE_COMMAND_END)
/* Non-whitespace follows */) {
- expandWord = 1;
+ expandWord = (expPtr->start[0] == '#') ? 2 : 1;
parsePtr->numTokens--;
goto parseWord;
}
} else {
/*
@@ -396,11 +400,11 @@
*/
tokenPtr = &parsePtr->tokenPtr[wordIndex];
tokenPtr->size = src - tokenPtr->start;
tokenPtr->numComponents = parsePtr->numTokens - (wordIndex + 1);
- if (expandWord) {
+ if (expandWord == 1) {
int i, isLiteral = 1;
/*
* When a command includes a word that is an expanded literal; for
* example, {*}{1 2 3}, the parser performs that expansion
@@ -536,10 +540,52 @@
* expansion to compile/eval time by marking with a
* TCL_TOKEN_EXPAND_WORD token.
*/
tokenPtr->type = TCL_TOKEN_EXPAND_WORD;
+ }
+ } else if (expandWord == 2) {
+ int i, isLiteral = 1;
+
+ /*
+ * When a command includes a comment word then processing
+ * proceeds in much the same way as for expansion words, but
+ * several cases can be pruned. One that remains is that of
+ * distinguishing between a literal and non-literal comment,
+ * since substitution is carried out in a comment word even
+ * if the result of that substitution will always be discarded.
+ *
+ * First check whether the thing to be expanded is a literal,
+ * in the sense of being composed entirely of TCL_TOKEN_TEXT
+ * tokens.
+ */
+
+ for (i = 1; i <= tokenPtr->numComponents; i++) {
+ if (tokenPtr[i].type != TCL_TOKEN_TEXT) {
+ isLiteral = 0;
+ break;
+ }
+ }
+
+ if (isLiteral) {
+ /*
+ * The comment is a literal, so just forget about it
+ * right away. This is effectively the same as happens
+ * when {*} acts on a length 0 literate list.
+ */
+
+ parsePtr->numWords--;
+ parsePtr->numTokens = wordIndex;
+
+ } else {
+ /*
+ * The comment word is not a literal, so defer
+ * processing to compile/eval time by marking with a
+ * TCL_TOKEN_COMMENT_WORD token.
+ */
+
+ tokenPtr->type = TCL_TOKEN_COMMENT_WORD;
}
} else if ((tokenPtr->numComponents == 1)
&& (tokenPtr[1].type == TCL_TOKEN_TEXT)) {
tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;
}
Index: generic/tclTest.c
===================================================================
--- generic/tclTest.c
+++ generic/tclTest.c
@@ -3616,10 +3616,13 @@
for (i = 0; i < parsePtr->numTokens; i++) {
tokenPtr = &parsePtr->tokenPtr[i];
switch (tokenPtr->type) {
case TCL_TOKEN_EXPAND_WORD:
typeString = "expand";
+ break;
+ case TCL_TOKEN_COMMENT_WORD:
+ typeString = "comment";
break;
case TCL_TOKEN_WORD:
typeString = "word";
break;
case TCL_TOKEN_SIMPLE_WORD:
Index: generic/tclUtil.c
===================================================================
--- generic/tclUtil.c
+++ generic/tclUtil.c
@@ -444,11 +444,11 @@
* detailed error message.
*
* If TCL_OK is returned, then *elementPtr will be set to point to the
* first element of list, and *nextPtr will be set to point to the
* character just after any white space following the last character
- * that's part of the element. If this is the last argument in the list,
+ * that's part of the element. If this is the last element in the list,
* then *nextPtr will point just after the last character in the list
* (i.e., at the character at list+listLength). If sizePtr is non-NULL,
* *sizePtr is filled in with the number of bytes in the element. If
* the element is in braces, then *elementPtr will point to the character
* after the opening brace and *sizePtr will not include either of the
@@ -494,10 +494,15 @@
const char *p = list;
const char *elemStart; /* Points to first byte of first element. */
const char *limit; /* Points just after list's last byte. */
int openBraces = 0; /* Brace nesting level during parse. */
int inQuotes = 0;
+ enum TFECommentState { /* TIP#??? */
+ ELEMENT_WORD, /* Not in a comment word (that we know). */
+ BEFORE_COMMENT, /* In a comment before the element. */
+ AFTER_COMMENT /* In a comment after the element. */
+ } inComment = ELEMENT_WORD;
int size = 0; /* lint. */
int numChars;
int literal = 1;
const char *p2;
@@ -524,17 +529,19 @@
p++;
}
elemStart = p;
/*
- * Find element's end (a space, close brace, or the end of the string).
+ * Find end of word (a space, close brace, or the end of the string).
*/
+ mainLoop: /* Comment words may cause jumping back
+ * to this point in the function. */
while (p < limit) {
switch (*p) {
/*
- * Open brace: don't treat specially unless the element is in
+ * Open brace: don't treat specially unless the word is in
* braces. In this case, keep a nesting count.
*/
case '{':
if (openBraces != 0) {
@@ -541,11 +548,11 @@
openBraces++;
}
break;
/*
- * Close brace: if element is in braces, keep nesting count and
+ * Close brace: if word is in braces, keep nesting count and
* quit when the last close brace is seen.
*/
case '}':
if (openBraces > 1) {
@@ -556,26 +563,51 @@
if ((p >= limit) || TclIsSpaceProc(*p)) {
goto done;
}
/*
- * Garbage after the closing brace; return an error.
+ * There is something after the closing brace. Could that
+ * be because it is the closing brace of a comment prefix?
+ */
+
+ if ((size != 1) || (inComment != ELEMENT_WORD) ||
+ (*elemStart != '#')) {
+
+ /*
+ * No, that was no comment prefix, so *p is simply
+ * garbage after the closing brace; return an error.
+ */
+
+ if (interp != NULL) {
+ p2 = p;
+ while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ && (p2 < p+20)) {
+ p2++;
+ }
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "list element in braces followed by \"%.*s\" "
+ "instead of space", (int) (p2-p), p));
+ Tcl_SetErrorCode(interp, "TCL", "VALUE", "LIST", "JUNK",
+ NULL);
+ }
+ return TCL_ERROR;
+ }
+
+ /*
+ * Yes, that was a comment prefix. Check if the comment
+ * is brace- or quote-delimited.
*/
-
- if (interp != NULL) {
- p2 = p;
- while ((p2 < limit) && (!TclIsSpaceProc(*p2))
- && (p2 < p+20)) {
- p2++;
- }
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "list element in braces followed by \"%.*s\" "
- "instead of space", (int) (p2-p), p));
- Tcl_SetErrorCode(interp, "TCL", "VALUE", "LIST", "JUNK",
- NULL);
- }
- return TCL_ERROR;
+
+ inComment = BEFORE_COMMENT;
+ openBraces = 0;
+ if (*p == '{') {
+ openBraces = 1;
+ p++;
+ } else if (*p == '"') {
+ inQuotes = 1;
+ p++;
+ }
}
break;
/*
* Backslash: skip over everything up to the end of the backslash
@@ -583,11 +615,11 @@
*/
case '\\':
if (openBraces == 0) {
/*
- * A backslash sequence not within a brace quoted element
+ * A backslash sequence not within a brace quoted word
* means the value of the element is different from the
* substring we are parsing. A call to TclCopyAndCollapse()
* is needed to produce the element value. Inform the caller.
*/
literal = 0;
@@ -595,12 +627,12 @@
TclParseBackslash(p, limit - p, &numChars, NULL);
p += (numChars - 1);
break;
/*
- * Space: ignore if element is in braces or quotes; otherwise
- * terminate element.
+ * Space: ignore if word is in braces or quotes; otherwise
+ * terminate word.
*/
case ' ':
case '\f':
case '\n':
@@ -612,11 +644,11 @@
goto done;
}
break;
/*
- * Double-quote: if element is in quotes then terminate it.
+ * Double-quote: if word is in quotes then terminate it.
*/
case '"':
if (inQuotes) {
size = (p - elemStart);
@@ -647,11 +679,11 @@
}
p++;
}
/*
- * End of list: terminate element.
+ * End of list: terminate word.
*/
if (p == limit) {
if (openBraces != 0) {
if (interp != NULL) {
@@ -675,18 +707,73 @@
done:
while ((p < limit) && (TclIsSpaceProc(*p))) {
p++;
}
- *elementPtr = elemStart;
+ if (inComment == BEFORE_COMMENT) {
+
+ /*
+ * The word which has just been read was a comment rather than
+ * a list element, so we'll have to do it all again.
+ */
+
+ inComment = ELEMENT_WORD;
+ literal = 1;
+ openBraces = 0;
+ inQuotes = 0;
+ if (*p == '{') {
+ openBraces = 1;
+ p++;
+ } else if (*p == '"') {
+ inQuotes = 1;
+ p++;
+ }
+ elemStart = p;
+ goto mainLoop;
+ }
+ if (inComment == ELEMENT_WORD) {
+
+ /*
+ * The word which has just been read was the sought list element.
+ */
+
+ *elementPtr = elemStart;
+ if (sizePtr != 0) {
+ *sizePtr = size;
+ }
+ if (literalPtr != 0) {
+ *literalPtr = literal;
+ }
+ }
*nextPtr = p;
- if (sizePtr != 0) {
- *sizePtr = size;
+
+ /*
+ * Could there be a comment word after what has been read so far?
+ */
+
+ if ((limit - p > 3) && (p[0] == '{') && (p[1] == '#') &&
+ (p[2] == '}') && !(TclIsSpaceProc(p[3]))) {
+ /*
+ * It appears there is, so go back and scan past it.
+ * This is needed because callers use (*nextPtr == limit) as
+ * a test for whether this was the last list element.
+ */
+
+ p += 3;
+ inComment = AFTER_COMMENT;
+ openBraces = 0;
+ inQuotes = 0;
+ if (*p == '{') {
+ openBraces = 1;
+ p++;
+ } else if (*p == '"') {
+ inQuotes = 1;
+ p++;
+ }
+ goto mainLoop;
}
- if (literalPtr != 0) {
- *literalPtr = literal;
- }
+
return TCL_OK;
}
/*
*----------------------------------------------------------------------
Index: tests/basic.test
===================================================================
--- tests/basic.test
+++ tests/basic.test
@@ -654,21 +654,41 @@
test basic-47.2.$noComp {Tcl_EvalEx: error during word expansion} -body {
run {{*}\{}
} -constraints $constraints -returnCodes error -result {unmatched open brace in list}
+test basic-47.2.$noComp.2 {Tcl_EvalEx: no error for non-list comment word} -body {
+ run {{#}\{}
+} -constraints $constraints
+
test basic-47.3.$noComp {Tcl_EvalEx, error during substitution} -body {
run {{*}[error foo]}
+} -constraints $constraints -returnCodes error -result foo
+
+test basic-47.3.$noComp.2 {Tcl_EvalEx, error during substitution} -body {
+ run {{#}[error foo]}
} -constraints $constraints -returnCodes error -result foo
test basic-47.4.$noComp {Tcl_EvalEx: no expansion} $constraints {
run {list {*} {*} {*}}
} {* * *}
+test basic-47.4.$noComp.2 {Tcl_EvalEx: not comment words} $constraints {
+ run {list {#} {#} {#}}
+} [list \# \# \#]
+
test basic-47.5.$noComp {Tcl_EvalEx: expansion} $constraints {
run {list {*}{} {*} {*}x {*}"y z"}
} {* x y z}
+
+test basic-47.5.$noComp.2 {Tcl_EvalEx: word comments} $constraints {
+ run {list {#}{} {#} {#}x {#}"y z"}
+} [list \#]
+
+test basic-47.5.$noComp.3 {Tcl_EvalEx: expansion/comment mix} $constraints {
+ run {list a {*}b {#}{c} {*} d {#}e {#}f\ g {*}h\ i {*}"j k" l}
+} {a b * d h i j k l}
test basic-47.6.$noComp {Tcl_EvalEx: expansion to zero args} $constraints {
run {list {*}{}}
} {}
@@ -683,10 +703,16 @@
test basic-47.9.$noComp {Tcl_EvalEx: expansion and subst order} $constraints {
set x 0
run {list [incr x] {*}[incr x] [incr x] \
{*}[list [incr x] [incr x]] [incr x]}
} {1 2 3 4 5 6}
+
+test basic-47.9.$noComp.2 {Tcl_EvalEx: word comment and subst order} $constraints {
+ set x 0
+ run {list [incr x] {#}[incr x] [incr x] \
+ {#}[list [incr x] [incr x]] [incr x]}
+} {1 3 6}
test basic-47.10.$noComp {Tcl_EvalEx: expand and memory management} $constraints {
run {concat {*}{} a b c d e f g h i j k l m n o p q r}
} {a b c d e f g h i j k l m n o p q r}
Index: tests/dict.test
===================================================================
--- tests/dict.test
+++ tests/dict.test
@@ -120,10 +120,18 @@
dict get $a(z) d
}}
} -returnCodes error -result {key "d" not known in dictionary}
test dict-3.16 {dict/list shimmering - Bug 3004007} {set l [list p 1 p 2 q 3];dict get $l q;set l} {p 1 p 2 q 3}
test dict-3.17 {dict/list shimmering - Bug 3004007} {set l [list p 1 p 2 q 3];dict get $l q;llength $l} 6
+test dict-3.18 {dict get command, comment words} -body {
+ dict get {
+ {#}"First heading"
+ key1 value1
+ {#}"Second heading" {#}{extra comment}
+ key2 {#}nothing value2
+ }
+} -result {key1 value1 key2 value2}
test dict-4.1 {dict replace command} {
dict replace {a b c d}
} {a b c d}
test dict-4.2 {dict replace command} {
Index: tests/lindex.test
===================================================================
--- tests/lindex.test
+++ tests/lindex.test
@@ -128,10 +128,12 @@
} f
test lindex-5.3 {three indices} testevalex {
testevalex {lindex {{{a b} {c d}} {{e f} {g h}}} 1 0 1}
} f
+# List parsing
+
test lindex-6.1 {error conditions in parsing list} testevalex {
list [catch {testevalex {lindex "a \{" 2}} msg] $msg
} {1 {unmatched open brace in list}}
test lindex-6.2 {error conditions in parsing list} testevalex {
list [catch {testevalex {lindex {a {b c}d e} 2}} msg] $msg
@@ -339,18 +341,23 @@
lindex {{{a b} {c d}} {{e f} {g h}}} 1 0 1
} result
set result
} f
+# List parsing
+
test lindex-14.1 {error conditions in parsing list} {
list [catch { lindex "a \{" 2 } msg] $msg
} {1 {unmatched open brace in list}}
test lindex-14.2 {error conditions in parsing list} {
list [catch { lindex {a {b c}d e} 2 } msg] $msg
} {1 {list element in braces followed by "d" instead of space}}
test lindex-14.3 {error conditions in parsing list} {
list [catch { lindex {a "b c"def ghi} 2 } msg] $msg
+} {1 {list element in quotes followed by "def" instead of space}}
+test lindex-14.4 {error conditions in parsing list} {
+ list [catch { lindex {a {#}"b c"def ghi} 2 } msg] $msg
} {1 {list element in quotes followed by "def" instead of space}}
test lindex-15.1 {quoted elements} {
catch {
lindex {a "b c" d} 1
@@ -373,10 +380,28 @@
catch {
lindex {a b {c d "e} {f g"}} 2
} result
set result
} {c d "e}
+test lindex-15.5 {comment words} {
+ catch {
+ lindex {a {#}b c d} 1
+ } result
+ set result
+} {c}
+test lindex-15.6 {comment words} {
+ catch {
+ lindex {a {#}"b c" d} 1
+ } result
+ set result
+} {d}
+test lindex-15.7 {comment words} {
+ catch {
+ lindex {{#}a "b c" {#}d} 0
+ } result
+ set result
+} {b c}
test lindex-16.1 {data reuse} {
set x 0
catch {
lindex $x $x
Index: tests/listObj.test
===================================================================
--- tests/listObj.test
+++ tests/listObj.test
@@ -105,10 +105,14 @@
} {}
test listobj-5.8 {Tcl_ListObjIndex, error in conversion} {
set x " \{"
list [catch {lindex $x 0} msg] $msg
} {1 {unmatched open brace in list}}
+test listobj-5.9 {Tcl_ListObjIndex, error in conversion} {
+ set x " {#}{a b}c "
+ list [catch {lindex $x 0} msg] $msg
+} {1 {list element in braces followed by "c" instead of space}}
test listobj-6.1 {Tcl_ListObjLength} {
llength {a b c d}
} 4
test listobj-6.2 {Tcl_ListObjLength} {
@@ -168,10 +172,13 @@
} {1 2 3 4 a b c d e f g h i j k l 5}
test listobj-8.1 {SetListFromAny} {
lindex {0 foo\x00help 2} 1
} "foo\x00help"
+test listobj-8.2 {SetListFromAny, comment} {
+ lindex {0 {#}foo\ help 2} 1
+} 2
test listobj-9.1 {UpdateStringOfList} {
string length [list foo\x00help]
} 8
Index: tests/llength.test
===================================================================
--- tests/llength.test
+++ tests/llength.test
@@ -23,10 +23,23 @@
llength {a b c {a b {c d}} d}
} 5
test llength-1.3 {length of list} {
llength {}
} 0
+test llength-1.4 {length of list with comment word} {
+ llength {a b {#}c d}
+} 3
+test llength-1.5 {length of list with comment word} {
+ llength {a {#}"b c" d}
+} 2
+test llength-1.6 {length of list with comment words} {
+ llength {{#}{a b} c {#}\ d}
+} 1
+test llength-1.7 {length of list with comment words only} {
+ llength {{#}"a b" {#}c {#}{d}}
+} 0
+
test llength-2.1 {error conditions} {
list [catch {llength} msg] $msg
} {1 {wrong # args: should be "llength list"}}
test llength-2.2 {error conditions} {