Tcl Source Code

Artifact [2bf216fce5]
Login

Artifact 2bf216fce51936e40d58bcf07b3656d353fbaa2b:

Attachment "lookbehind.patch" to ticket [d351de9d7b] added by tgl 2015-11-06 19:41:41. (unpublished)
diff -pcdr src/doc/re_syntax.n lookbehind/doc/re_syntax.n
*** src/doc/re_syntax.n	Mon Sep 21 18:12:24 2015
--- lookbehind/doc/re_syntax.n	Fri Nov  6 12:55:30 2015
*************** substring matching \fIre\fR begins
*** 152,161 ****
  .
  \fInegative lookahead\fR (AREs only), matches at any point where no
  substring matching \fIre\fR begins
  .RE
  .PP
! The lookahead constraints may not contain back references (see later),
! and all parentheses within them are considered non-capturing.
  .PP
  An RE may not end with
  .QW \fB\e\fR .
--- 152,171 ----
  .
  \fInegative lookahead\fR (AREs only), matches at any point where no
  substring matching \fIre\fR begins
+ .TP
+ \fB(?<=\fIre\fB)\fR
+ .
+ \fIpositive lookbehind\fR (AREs only), matches at any point where a
+ substring matching \fIre\fR ends
+ .TP
+ \fB(?<!\fIre\fB)\fR
+ .
+ \fInegative lookbehind\fR (AREs only), matches at any point where no
+ substring matching \fIre\fR ends
  .RE
  .PP
! Lookahead and lookbehind constraints may not contain back references
! (see later), and all parentheses within them are considered non-capturing.
  .PP
  An RE may not end with
  .QW \fB\e\fR .
*************** Incompatibilities of note include
*** 784,790 ****
  the lack of special treatment for a trailing newline, the addition of
  complemented bracket expressions to the things affected by
  newline-sensitive matching, the restrictions on parentheses and back
! references in lookahead constraints, and the longest/shortest-match
  (rather than first-match) matching semantics.
  .PP
  The matching rules for REs containing both normal and non-greedy
--- 794,800 ----
  the lack of special treatment for a trailing newline, the addition of
  complemented bracket expressions to the things affected by
  newline-sensitive matching, the restrictions on parentheses and back
! references in lookahead/lookbehind constraints, and the longest/shortest-match
  (rather than first-match) matching semantics.
  .PP
  The matching rules for REs containing both normal and non-greedy
diff -pcdr src/generic/regc_lex.c lookbehind/generic/regc_lex.c
*** src/generic/regc_lex.c	Mon Sep 21 18:12:24 2015
--- lookbehind/generic/regc_lex.c	Fri Nov  6 13:00:17 2015
*************** next(
*** 613,618 ****
--- 613,621 ----
  	if ((v->cflags&REG_ADVF) && NEXT1('?')) {
  	    NOTE(REG_UNONPOSIX);
  	    v->now++;
+ 	    if (ATEOS()) {
+ 		FAILW(REG_BADRPT);
+ 	    }
  	    switch (*v->now++) {
  	    case CHR(':'):	/* non-capturing paren */
  		RETV('(', 0);
*************** next(
*** 628,639 ****
  		return next(v);
  		break;
  	    case CHR('='):	/* positive lookahead */
! 		NOTE(REG_ULOOKAHEAD);
! 		RETV(LACON, 1);
  		break;
  	    case CHR('!'):	/* negative lookahead */
! 		NOTE(REG_ULOOKAHEAD);
! 		RETV(LACON, 0);
  		break;
  	    default:
  		FAILW(REG_BADRPT);
--- 631,661 ----
  		return next(v);
  		break;
  	    case CHR('='):	/* positive lookahead */
! 		NOTE(REG_ULOOKAROUND);
! 		RETV(LACON, LATYPE_AHEAD_POS);
  		break;
  	    case CHR('!'):	/* negative lookahead */
! 		NOTE(REG_ULOOKAROUND);
! 		RETV(LACON, LATYPE_AHEAD_NEG);
! 		break;
! 	    case CHR('<'):
! 		if (ATEOS()) {
! 		    FAILW(REG_BADRPT);
! 		}
! 		switch (*v->now++) {
! 		case CHR('='):	/* positive lookbehind */
! 		    NOTE(REG_ULOOKAROUND);
! 		    RETV(LACON, LATYPE_BEHIND_POS);
! 		    break;
! 		case CHR('!'):	/* negative lookbehind */
! 		    NOTE(REG_ULOOKAROUND);
! 		    RETV(LACON, LATYPE_BEHIND_NEG);
! 		    break;
! 		default:
! 		    FAILW(REG_BADRPT);
! 		    break;
! 		}
! 		assert(NOTREACHED);
  		break;
  	    default:
  		FAILW(REG_BADRPT);
diff -pcdr src/generic/regc_nfa.c lookbehind/generic/regc_nfa.c
*** src/generic/regc_nfa.c	Tue Oct 27 14:39:17 2015
--- lookbehind/generic/regc_nfa.c	Fri Nov  6 13:46:52 2015
*************** newarc(
*** 293,299 ****
  	    }
  	}
      }
!   
      /* no dup, so create the arc */
      createarc(nfa, t, co, from, to);
  }
--- 293,299 ----
  	    }
  	}
      }
! 
      /* no dup, so create the arc */
      createarc(nfa, t, co, from, to);
  }
*************** sortins_cmp(
*** 657,663 ****
      }
      return 0;
  }
!  
  /*
   * sortouts - sort the out arcs of a state by to/color/type
   */
--- 657,663 ----
      }
      return 0;
  }
! 
  /*
   * sortouts - sort the out arcs of a state by to/color/type
   */
*************** cleartraverse(
*** 1357,1362 ****
--- 1357,1407 ----
  }
  
  /*
+  - single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
+  * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
+  * of a set of colors), return a state whose outarc list contains only PLAIN
+  * arcs of those color(s).  Otherwise return NULL.
+  * This is used before optimizing the NFA, so there may be EMPTY arcs, which
+  * we should ignore; the possibility of an EMPTY is why the result state could
+  * be different from s1.
+  * It's worth troubling to handle multiple parallel PLAIN arcs here because a
+  * bracket construct such as [abc] might yield either one or several parallel
+  * PLAIN arcs depending on earlier atoms in the expression.  We'd rather that
+  * that implementation detail not create user-visible performance differences.
+  */
+ static struct state *
+ single_color_transition(
+     struct state *s1,
+     struct state *s2)
+ {
+     struct arc *a;
+ 
+     /* Ignore leading EMPTY arc, if any */
+     if (s1->nouts == 1 && s1->outs->type == EMPTY) {
+ 	s1 = s1->outs->to;
+     }
+     /* Likewise for any trailing EMPTY arc */
+     if (s2->nins == 1 && s2->ins->type == EMPTY) {
+ 	s2 = s2->ins->from;
+     }
+     /* Perhaps we could have a single-state loop in between, if so reject */
+     if (s1 == s2) {
+ 	return NULL;
+     }
+     /* s1 must have at least one outarc... */
+     if (s1->outs == NULL) {
+ 	return NULL;
+     }
+     /* ... and they must all be PLAIN arcs to s2 */
+     for (a = s1->outs; a != NULL; a = a->outchain) {
+ 	if (a->type != PLAIN || a->to != s2)
+ 	    return NULL;
+     }
+     /* OK, return s1 as the possessor of the relevant outarcs */
+     return s1;
+ }
+ 
+ /*
   - specialcolors - fill in special colors for an NFA
   ^ static void specialcolors(struct nfa *);
   */
*************** fixempties(
*** 2020,2026 ****
  		    arcarray[arccount++] = a;
  		}
  	    }
!   
    	    /* Reset the tmp fields as we walk back */
    	    nexts = s2->tmp;
    	    s2->tmp = NULL;
--- 2065,2071 ----
  		    arcarray[arccount++] = a;
  		}
  	    }
! 
    	    /* Reset the tmp fields as we walk back */
    	    nexts = s2->tmp;
    	    s2->tmp = NULL;
*************** fixempties(
*** 2042,2048 ****
  	}
  	inarcsorig[s->no] = a;
      }
!   
      FREE(arcarray);
      FREE(inarcsorig);
  
--- 2087,2093 ----
  	}
  	inarcsorig[s->no] = a;
      }
! 
      FREE(arcarray);
      FREE(inarcsorig);
  
*************** fixconstraintloops(
*** 2193,2199 ****
   	    dropstate(nfa, s);
  	}
      }
!  
      /* Nothing to do if no remaining constraint arcs */
      if (NISERR() || !hasconstraints) {
  	return;
--- 2238,2244 ----
   	    dropstate(nfa, s);
  	}
      }
! 
      /* Nothing to do if no remaining constraint arcs */
      if (NISERR() || !hasconstraints) {
  	return;
*************** carc_cmp(
*** 2909,2915 ****
  {
      const struct carc *aa = (const struct carc *) a;
      const struct carc *bb = (const struct carc *) b;
!   
      if (aa->co < bb->co) {
  	return -1;
      }
--- 2954,2960 ----
  {
      const struct carc *aa = (const struct carc *) a;
      const struct carc *bb = (const struct carc *) b;
! 
      if (aa->co < bb->co) {
  	return -1;
      }
diff -pcdr src/generic/regcomp.c lookbehind/generic/regcomp.c
*** src/generic/regcomp.c	Tue Oct 27 14:39:17 2015
--- lookbehind/generic/regcomp.c	Fri Nov  6 13:45:32 2015
*************** static const chr *scanplain(struct vars 
*** 56,61 ****
--- 56,63 ----
  static void onechr(struct vars *, pchr, struct state *, struct state *);
  static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
  static void wordchrs(struct vars *);
+ static void processlacon(struct vars *, struct state *, struct state *, int,
+ 			 struct state *, struct state *);
  static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
  static void freesubre(struct vars *, struct subre *);
  static void freesrnode(struct vars *, struct subre *);
*************** static int numst(struct subre *, int);
*** 64,70 ****
  static void markst(struct subre *);
  static void cleanst(struct vars *);
  static long nfatree(struct vars *, struct subre *, FILE *);
! static long nfanode(struct vars *, struct subre *, FILE *);
  static int newlacon(struct vars *, struct state *, struct state *, int);
  static void freelacons(struct subre *, int);
  static void rfree(regex_t *);
--- 66,72 ----
  static void markst(struct subre *);
  static void cleanst(struct vars *);
  static long nfatree(struct vars *, struct subre *, FILE *);
! static long nfanode(struct vars *, struct subre *, int, FILE *);
  static int newlacon(struct vars *, struct state *, struct state *, int);
  static void freelacons(struct subre *, int);
  static void rfree(regex_t *);
*************** static void deltraverse(struct nfa *, st
*** 138,143 ****
--- 140,146 ----
  static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
  static void duptraverse(struct nfa *, struct state *, struct state *, int);
  static void cleartraverse(struct nfa *, struct state *);
+ static struct state *single_color_transition(struct state *, struct state *);
  static void specialcolors(struct nfa *);
  static long optimize(struct nfa *, FILE *);
  static void pullback(struct nfa *, FILE *);
*************** struct vars {
*** 222,229 ****
      int ntree;			/* number of tree nodes, plus one */
      struct cvec *cv;		/* interface cvec */
      struct cvec *cv2;		/* utility cvec */
!     struct subre *lacons;	/* lookahead-constraint vector */
!     int nlacons;		/* size of lacons */
      size_t spaceused;		/* approx. space used for compilation */
  };
  
--- 225,233 ----
      int ntree;			/* number of tree nodes, plus one */
      struct cvec *cv;		/* interface cvec */
      struct cvec *cv2;		/* utility cvec */
!     struct subre *lacons;	/* lookaround-constraint vector */
!     int nlacons;		/* size of lacons[]; note that only slots
! 				 * numbered 1 .. nlacons-1 are used */
      size_t spaceused;		/* approx. space used for compilation */
  };
  
*************** struct vars {
*** 254,260 ****
  #define	CCLASS	'C'		/* start of [: */
  #define	END	'X'		/* end of [. [= [: */
  #define	RANGE	'R'		/* - within [] which might be range delim. */
! #define	LACON	'L'		/* lookahead constraint subRE */
  #define	AHEAD	'a'		/* color-lookahead arc */
  #define	BEHIND	'r'		/* color-lookbehind arc */
  #define	WBDRY	'w'		/* word boundary constraint */
--- 258,264 ----
  #define	CCLASS	'C'		/* start of [: */
  #define	END	'X'		/* end of [. [= [: */
  #define	RANGE	'R'		/* - within [] which might be range delim. */
! #define	LACON	'L'		/* lookaround constraint subRE */
  #define	AHEAD	'a'		/* color-lookahead arc */
  #define	BEHIND	'r'		/* color-lookbehind arc */
  #define	WBDRY	'w'		/* word boundary constraint */
*************** compile(
*** 412,421 ****
      CNOERR();
      assert(v->nlacons == 0 || v->lacons != NULL);
      for (i = 1; i < v->nlacons; i++) {
  	if (debug != NULL) {
  	    fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
  	}
! 	nfanode(v, &v->lacons[i], debug);
      }
      CNOERR();
      if (v->tree->flags&SHORTER) {
--- 416,428 ----
      CNOERR();
      assert(v->nlacons == 0 || v->lacons != NULL);
      for (i = 1; i < v->nlacons; i++) {
+ 	struct subre *lasub = &v->lacons[i];
+ 
  	if (debug != NULL) {
  	    fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
  	}
! 	/* Prepend .* to pattern if it's a lookbehind LACON */
! 	nfanode(v, lasub, !LATYPE_IS_AHEAD(lasub->subno), debug);
      }
      CNOERR();
      if (v->tree->flags&SHORTER) {
*************** static struct subre *
*** 653,659 ****
  parse(
      struct vars *v,
      int stopper,		/* EOS or ')' */
!     int type,			/* LACON (lookahead subRE) or PLAIN */
      struct state *init,		/* initial state */
      struct state *final)	/* final state */
  {
--- 660,666 ----
  parse(
      struct vars *v,
      int stopper,		/* EOS or ')' */
!     int type,			/* LACON (lookaround subRE) or PLAIN */
      struct state *init,		/* initial state */
      struct state *final)	/* final state */
  {
*************** static struct subre *
*** 735,741 ****
  parsebranch(
      struct vars *v,
      int stopper,		/* EOS or ')' */
!     int type,			/* LACON (lookahead subRE) or PLAIN */
      struct state *left,		/* leftmost state */
      struct state *right,	/* rightmost state */
      int partial)		/* is this only part of a branch? */
--- 742,748 ----
  parsebranch(
      struct vars *v,
      int stopper,		/* EOS or ')' */
!     int type,			/* LACON (lookaround subRE) or PLAIN */
      struct state *left,		/* leftmost state */
      struct state *right,	/* rightmost state */
      int partial)		/* is this only part of a branch? */
*************** static void
*** 784,790 ****
  parseqatom(
      struct vars *v,
      int stopper,		/* EOS or ')' */
!     int type,			/* LACON (lookahead subRE) or PLAIN */
      struct state *lp,		/* left state to hang it on */
      struct state *rp,		/* right state to hang it on */
      struct subre *top)		/* subtree top */
--- 791,797 ----
  parseqatom(
      struct vars *v,
      int stopper,		/* EOS or ')' */
!     int type,			/* LACON (lookaround subRE) or PLAIN */
      struct state *lp,		/* left state to hang it on */
      struct state *rp,		/* right state to hang it on */
      struct subre *top)		/* subtree top */
*************** parseqatom(
*** 796,802 ****
      struct subre *atom;		/* atom's subtree */
      struct subre *t;
      int cap;			/* capturing parens? */
!     int pos;			/* positive lookahead? */
      int subno;			/* capturing-parens or backref number */
      int atomtype;
      int qprefer;		/* quantifier short/long preference */
--- 803,809 ----
      struct subre *atom;		/* atom's subtree */
      struct subre *t;
      int cap;			/* capturing parens? */
!     int latype;			/* lookaround constraint type */
      int subno;			/* capturing-parens or backref number */
      int atomtype;
      int qprefer;		/* quantifier short/long preference */
*************** parseqatom(
*** 879,897 ****
  	nonword(v, BEHIND, lp, s);
  	nonword(v, AHEAD, s, rp);
  	return;
!     case LACON:			/* lookahead constraint */
! 	pos = v->nextvalue;
  	NEXT();
  	s = newstate(v->nfa);
  	s2 = newstate(v->nfa);
  	NOERR();
  	t = parse(v, ')', LACON, s, s2);
  	freesubre(v, t);	/* internal structure irrelevant */
- 	assert(SEE(')') || ISERR());
- 	NEXT();
- 	n = newlacon(v, s, s2, pos);
  	NOERR();
! 	ARCV(LACON, n);
  	return;
  
  	/*
--- 886,903 ----
  	nonword(v, BEHIND, lp, s);
  	nonword(v, AHEAD, s, rp);
  	return;
!     case LACON:			/* lookaround constraint */
! 	latype = v->nextvalue;
  	NEXT();
  	s = newstate(v->nfa);
  	s2 = newstate(v->nfa);
  	NOERR();
  	t = parse(v, ')', LACON, s, s2);
  	freesubre(v, t);	/* internal structure irrelevant */
  	NOERR();
! 	assert(SEE(')'));
! 	NEXT();
! 	processlacon(v, s, s2, latype, lp, rp);
  	return;
  
  	/*
*************** wordchrs(
*** 1719,1724 ****
--- 1725,1794 ----
  }
  
  /*
+  - processlacon - generate the NFA representation of a LACON
+  * In the general case this is just newlacon() + newarc(), but some cases
+  * can be optimized.
+  */
+ static void
+ processlacon(
+     struct vars *v,
+     struct state *begin,	 /* start of parsed LACON sub-re */
+     struct state *end,		 /* end of parsed LACON sub-re */
+     int latype,
+     struct state *lp,		/* left state to hang it on */
+     struct state *rp)		/* right state to hang it on */
+ {
+     struct state *s1;
+     int n;
+ 
+     /*
+      * Check for lookaround RE consisting of a single plain color arc (or set
+      * of arcs); this would typically be a simple chr or a bracket expression.
+      */
+     s1 = single_color_transition(begin, end);
+     switch (latype) {
+     case LATYPE_AHEAD_POS:
+ 	/* If lookahead RE is just colorset C, convert to AHEAD(C) */
+ 	if (s1 != NULL) {
+ 	    cloneouts(v->nfa, s1, lp, rp, AHEAD);
+ 	    return;
+ 	}
+ 	break;
+     case LATYPE_AHEAD_NEG:
+ 	/* If lookahead RE is just colorset C, convert to AHEAD(^C)|$ */
+ 	if (s1 != NULL) {
+ 	    colorcomplement(v->nfa, v->cm, AHEAD, s1, lp, rp);
+ 	    newarc(v->nfa, '$', 1, lp, rp);
+ 	    newarc(v->nfa, '$', 0, lp, rp);
+ 	    return;
+ 	}
+ 	break;
+     case LATYPE_BEHIND_POS:
+ 	/* If lookbehind RE is just colorset C, convert to BEHIND(C) */
+ 	if (s1 != NULL) {
+ 	    cloneouts(v->nfa, s1, lp, rp, BEHIND);
+ 	    return;
+ 	}
+ 	break;
+     case LATYPE_BEHIND_NEG:
+ 	/* If lookbehind RE is just colorset C, convert to BEHIND(^C)|^ */
+ 	if (s1 != NULL) {
+ 	    colorcomplement(v->nfa, v->cm, BEHIND, s1, lp, rp);
+ 	    newarc(v->nfa, '^', 1, lp, rp);
+ 	    newarc(v->nfa, '^', 0, lp, rp);
+ 	    return;
+ 	}
+ 	break;
+     default:
+ 	assert(NOTREACHED);
+     }
+ 
+     /* General case: we need a LACON subre and arc */
+     n = newlacon(v, begin, end, latype);
+     newarc(v->nfa, LACON, n, lp, rp);
+ }
+ 
+ /*
   - subre - allocate a subre
   ^ static struct subre *subre(struct vars *, int, int, struct state *,
   ^	struct state *);
*************** nfatree(
*** 1926,1942 ****
  	(DISCARD) nfatree(v, t->right, f);
      }
  
!     return nfanode(v, t, f);
  }
  
  /*
!  - nfanode - do one NFA for nfatree
!  ^ static long nfanode(struct vars *, struct subre *, FILE *);
   */
  static long			/* optimize results */
  nfanode(
      struct vars *v,
      struct subre *t,
      FILE *f)			/* for debug output */
  {
      struct nfa *nfa;
--- 1996,2014 ----
  	(DISCARD) nfatree(v, t->right, f);
      }
  
!     return nfanode(v, t, 0, f);
  }
  
  /*
!  - nfanode - do one NFA for nfatree or lacons
!  * If converttosearch is true, apply makesearch() to the NFA.
!  ^ static long nfanode(struct vars *, struct subre *, int, FILE *);
   */
  static long			/* optimize results */
  nfanode(
      struct vars *v,
      struct subre *t,
+     int converttosearch,
      FILE *f)			/* for debug output */
  {
      struct nfa *nfa;
*************** nfanode(
*** 1954,1961 ****
--- 2026,2038 ----
      dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
      if (!ISERR()) {
  	specialcolors(nfa);
+     }
+     if (!ISERR()) {
  	ret = optimize(nfa, f);
      }
+     if (converttosearch && !ISERR()) {
+ 	makesearch(v, nfa);
+     }
      if (!ISERR()) {
  	compact(nfa, &t->cnfa);
      }
*************** nfanode(
*** 1965,1971 ****
  }
  
  /*
!  - newlacon - allocate a lookahead-constraint subRE
   ^ static int newlacon(struct vars *, struct state *, struct state *, int);
   */
  static int			/* lacon number */
--- 2042,2048 ----
  }
  
  /*
!  - newlacon - allocate a lookaround-constraint subRE
   ^ static int newlacon(struct vars *, struct state *, struct state *, int);
   */
  static int			/* lacon number */
*************** newlacon(
*** 1973,1979 ****
      struct vars *v,
      struct state *begin,
      struct state *end,
!     int pos)
  {
      int n;
      struct subre *newlacons;
--- 2050,2056 ----
      struct vars *v,
      struct state *begin,
      struct state *end,
!     int latype)
  {
      int n;
      struct subre *newlacons;
*************** newlacon(
*** 1998,2010 ****
      sub = &v->lacons[n];
      sub->begin = begin;
      sub->end = end;
!     sub->subno = pos;
      ZAPCNFA(sub->cnfa);
      return n;
  }
  
  /*
!  - freelacons - free lookahead-constraint subRE vector
   ^ static void freelacons(struct subre *, int);
   */
  static void
--- 2075,2087 ----
      sub = &v->lacons[n];
      sub->begin = begin;
      sub->end = end;
!     sub->subno = latype;
      ZAPCNFA(sub->cnfa);
      return n;
  }
  
  /*
!  - freelacons - free lookaround-constraint subRE vector
   ^ static void freelacons(struct subre *, int);
   */
  static void
*************** dump(
*** 2095,2103 ****
  	dumpcnfa(&g->search, f);
      }
      for (i = 1; i < g->nlacons; i++) {
! 	fprintf(f, "\nla%d (%s):\n", i,
! 		(g->lacons[i].subno) ? "positive" : "negative");
! 	dumpcnfa(&g->lacons[i].cnfa, f);
      }
      fprintf(f, "\n");
      dumpst(g->tree, f, 0);
--- 2172,2199 ----
  	dumpcnfa(&g->search, f);
      }
      for (i = 1; i < g->nlacons; i++) {
! 	struct subre *lasub = &g->lacons[i];
! 	const char *latype;
! 
! 	switch (lasub->subno) {
! 	case LATYPE_AHEAD_POS:
! 	    latype = "positive lookahead";
! 	    break;
! 	case LATYPE_AHEAD_NEG:
! 	    latype = "negative lookahead";
! 	    break;
! 	case LATYPE_BEHIND_POS:
! 	    latype = "positive lookbehind";
! 	    break;
! 	case LATYPE_BEHIND_NEG:
! 	    latype = "negative lookbehind";
! 	    break;
! 	default:
! 	    latype = "???";
! 	    break;
! 	}
! 	fprintf(f, "\nla%d (%s):\n", i, latype);
! 	dumpcnfa(&lasub->cnfa, f);
      }
      fprintf(f, "\n");
      dumpst(g->tree, f, 0);
diff -pcdr src/generic/rege_dfa.c lookbehind/generic/rege_dfa.c
*** src/generic/rege_dfa.c	Mon Sep 21 18:12:24 2015
--- lookbehind/generic/rege_dfa.c	Fri Nov  6 13:46:57 2015
*************** shortest(
*** 282,287 ****
--- 282,402 ----
  }
  
  /*
+  - matchuntil - incremental matching engine
+  * This is meant for use with a search-style NFA (that is, the pattern is
+  * known to act as though it had a leading .*).  We determine whether a
+  * match exists starting at v->start and ending at probe.  Multiple calls
+  * require only O(N) time not O(N^2) so long as the probe values are
+  * nondecreasing.  *lastcss and *lastcp must be initialized to NULL before
+  * starting a series of calls.
+  * Returns 1 if a match exists, 0 if not.
+  * Internal errors also return 0, with v->err set.
+  */
+ static int
+ matchuntil(
+     struct vars *v,
+     struct dfa *d,
+     chr *probe,			/* we want to know if a match ends here */
+     struct sset **lastcss,	/* state storage across calls */
+     chr **lastcp)		/* state storage across calls */
+ {
+     chr *cp = *lastcp;
+     color co;
+     struct sset *css = *lastcss;
+     struct sset *ss;
+     struct colormap *cm = d->cm;
+ 
+     /* initialize and startup, or restart, if necessary */
+     if (cp == NULL || cp > probe) {
+ 	cp = v->start;
+ 	css = initialize(v, d, cp);
+ 	if (css == NULL)
+ 	    return 0;
+ 
+ 	FDEBUG((">>> startup >>>\n"));
+ 	co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
+ 	FDEBUG(("color %ld\n", (long) co));
+ 
+ 	css = miss(v, d, css, co, cp, v->start);
+ 	if (css == NULL)
+ 	    return 0;
+ 	css->lastseen = cp;
+     }
+     else if (css == NULL) {
+ 	/* we previously found that no match is possible beyond *lastcp */
+ 	return 0;
+     }
+     ss = css;
+ 
+     /*
+      * This is the main text-scanning loop.  It seems worth having two copies
+      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
+      * builds, when you're not actively tracing.
+      */
+ #ifdef REG_DEBUG
+     if (v->eflags & REG_FTRACE) {
+ 	while (cp < probe) {
+ 	    FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
+ 	    co = GETCOLOR(cm, *cp);
+ 	    FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
+ 	    ss = css->outs[co];
+ 	    if (ss == NULL) {
+ 		ss = miss(v, d, css, co, cp + 1, v->start);
+ 		if (ss == NULL)
+ 		    break;	/* NOTE BREAK OUT */
+ 	    }
+ 	    cp++;
+ 	    ss->lastseen = cp;
+ 	    css = ss;
+ 	}
+     }
+     else
+ #endif
+     {
+ 	while (cp < probe) {
+ 	    co = GETCOLOR(cm, *cp);
+ 	    ss = css->outs[co];
+ 	    if (ss == NULL) {
+ 		ss = miss(v, d, css, co, cp + 1, v->start);
+ 		if (ss == NULL)
+ 		    break;	/* NOTE BREAK OUT */
+ 	    }
+ 	    cp++;
+ 	    ss->lastseen = cp;
+ 	    css = ss;
+ 	}
+     }
+ 
+     *lastcss = ss;
+     *lastcp = cp;
+ 
+     if (ss == NULL) {
+ 	return 0;		/* impossible match, or internal error */
+     }
+ 
+     /* We need to process one more chr, or the EOS symbol, to check match */
+     if (cp < v->stop) {
+ 	FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
+ 	co = GETCOLOR(cm, *cp);
+ 	FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
+ 	ss = css->outs[co];
+ 	if (ss == NULL) {
+ 	    ss = miss(v, d, css, co, cp + 1, v->start);
+ 	}
+     } else {
+ 	assert(cp == v->stop);
+ 	co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
+ 	FDEBUG(("color %ld\n", (long) co));
+ 	ss = miss(v, d, css, co, cp, v->start);
+     }
+ 
+     if (ss == NULL || !(ss->flags & POSTSTATE)) {
+ 	return 0;
+     }
+     return 1;
+ }
+ 
+ /*
   - lastCold - determine last point at which no progress had been made
   ^ static chr *lastCold(struct vars *, struct dfa *);
   */
*************** miss(
*** 593,599 ****
  	 */
      }
  
!     if (!sawLAConstraints) {	/* lookahead conds. always cache miss */
  	FDEBUG(("c%d[%d]->c%d\n",
  		(int) (css - d->ssets), co, (int) (p - d->ssets)));
  	css->outs[co] = p;
--- 708,714 ----
  	 */
      }
  
!     if (!sawLAConstraints) {	/* lookaround conds. always cache miss */
  	FDEBUG(("c%d[%d]->c%d\n",
  		(int) (css - d->ssets), co, (int) (p - d->ssets)));
  	css->outs[co] = p;
*************** miss(
*** 605,611 ****
  }
  
  /*
!  - checkLAConstraint - lookahead-constraint checker for miss()
   ^ static int checkLAConstraint(struct vars *, struct cnfa *, chr *, pcolor);
   */
  static int			/* predicate:  constraint satisfied? */
--- 720,726 ----
  }
  
  /*
!  - checkLAConstraint - lookaround-constraint checker for miss()
   ^ static int checkLAConstraint(struct vars *, struct cnfa *, chr *, pcolor);
   */
  static int			/* predicate:  constraint satisfied? */
*************** checkLAConstraint(
*** 613,639 ****
      struct vars *const v,
      struct cnfa *const pcnfa,	/* parent cnfa */
      chr *const cp,
!     const pcolor co)		/* "color" of the lookahead constraint */
  {
      int n;
      struct subre *sub;
      struct dfa *d;
-     struct smalldfa sd;
      chr *end;
  
      n = co - pcnfa->ncolors;
!     assert(n < v->g->nlacons && v->g->lacons != NULL);
      FDEBUG(("=== testing lacon %d\n", n));
      sub = &v->g->lacons[n];
!     d = newDFA(v, &sub->cnfa, &v->g->cmap, &sd);
      if (d == NULL) {
- 	ERR(REG_ESPACE);
  	return 0;
      }
!     end = longest(v, d, cp, v->stop, NULL);
!     freeDFA(d);
!     FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
!     return (sub->subno) ? (end != NULL) : (end == NULL);
  }
  
  /*
--- 728,769 ----
      struct vars *const v,
      struct cnfa *const pcnfa,	/* parent cnfa */
      chr *const cp,
!     const pcolor co)		/* "color" of the lookaround constraint */
  {
      int n;
      struct subre *sub;
      struct dfa *d;
      chr *end;
+     int satisfied;
  
      n = co - pcnfa->ncolors;
!     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
      FDEBUG(("=== testing lacon %d\n", n));
      sub = &v->g->lacons[n];
!     d = getladfa(v, n);
      if (d == NULL) {
  	return 0;
      }
!     if (LATYPE_IS_AHEAD(sub->subno)) {
! 	/* used to use longest() here, but shortest() could be much cheaper */
! 	end = shortest(v, d, cp, cp, v->stop, NULL, NULL);
! 	satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
!     } else {
! 	/*
! 	 * To avoid doing O(N^2) work when repeatedly testing a lookbehind
! 	 * constraint in an N-character string, we use matchuntil() which can
! 	 * cache the DFA state across calls.  We only need to restart if the
! 	 * probe point decreases, which is not common.  The NFA we're using is
! 	 * a search NFA, so it doesn't mind scanning over stuff before the
! 	 * nominal match.
! 	 */
! 	satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
! 	if (!LATYPE_IS_POS(sub->subno)) {
! 	    satisfied = !satisfied;
! 	}
!     }
!     FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
!     return satisfied;
  }
  
  /*
diff -pcdr src/generic/regex.h lookbehind/generic/regex.h
*** src/generic/regex.h	Tue Oct 27 14:39:17 2015
--- lookbehind/generic/regex.h	Fri Nov  6 13:37:48 2015
*************** typedef struct {
*** 163,169 ****
      size_t re_nsub;		/* number of subexpressions */
      long re_info;		/* information about RE */
  #define	REG_UBACKREF		000001
! #define	REG_ULOOKAHEAD		000002
  #define	REG_UBOUNDS		000004
  #define	REG_UBRACES		000010
  #define	REG_UBSALNUM		000020
--- 163,169 ----
      size_t re_nsub;		/* number of subexpressions */
      long re_info;		/* information about RE */
  #define	REG_UBACKREF		000001
! #define	REG_ULOOKAROUND		000002
  #define	REG_UBOUNDS		000004
  #define	REG_UBRACES		000010
  #define	REG_UBSALNUM		000020
diff -pcdr src/generic/regexec.c lookbehind/generic/regexec.c
*** src/generic/regexec.c	Tue Oct 20 18:38:00 2015
--- lookbehind/generic/regexec.c	Fri Nov  6 13:46:36 2015
*************** struct vars {
*** 107,113 ****
      chr *start;			/* start of string */
      chr *stop;			/* just past end of string */
      int err;			/* error code if any (0 none) */
!     struct dfa **subdfas;	/* per-subre DFAs */
      struct smalldfa dfa1;
      struct smalldfa dfa2;
  };
--- 107,116 ----
      chr *start;			/* start of string */
      chr *stop;			/* just past end of string */
      int err;			/* error code if any (0 none) */
!     struct dfa **subdfas;	/* per-tree-subre DFAs */
!     struct dfa **ladfas;	/* per-lacon-subre DFAs */
!     struct sset **lblastcss;	/* per-lacon-subre lookbehind restart data */
!     chr **lblastcp;		/* per-lacon-subre lookbehind restart data */
      struct smalldfa dfa1;
      struct smalldfa dfa2;
  };
*************** struct vars {
*** 127,132 ****
--- 130,136 ----
  /* === regexec.c === */
  int exec(regex_t *, const chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
  static struct dfa *getsubdfa(struct vars *, struct subre *);
+ static struct dfa *getladfa(struct vars *, int);
  static int simpleFind(struct vars *const, struct cnfa *const, struct colormap *const);
  static int complicatedFind(struct vars *const, struct cnfa *const, struct colormap *const);
  static int complicatedFindLoop(struct vars *const, struct cnfa *const, struct colormap *const, struct dfa *const, struct dfa *const, chr **const);
*************** static int creviterdissect(struct vars *
*** 143,148 ****
--- 147,153 ----
  /* === rege_dfa.c === */
  static chr *longest(struct vars *const, struct dfa *const, chr *const, chr *const, int *const);
  static chr *shortest(struct vars *const, struct dfa *const, chr *const, chr *const, chr *const, chr **const, int *const);
+ static int matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
  static chr *lastCold(struct vars *const, struct dfa *const);
  static struct dfa *newDFA(struct vars *const, struct cnfa *const, struct colormap *const, struct smalldfa *);
  static void freeDFA(struct dfa *const);
*************** exec(
*** 235,255 ****
      v->start = (chr *)string;
      v->stop = (chr *)string + len;
      v->err = 0;
      assert(v->g->ntree >= 0);
      n = (size_t) v->g->ntree;
!     if (n <= LOCALDFAS)
  	v->subdfas = subdfas;
!     else
  	v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
!     if (v->subdfas == NULL) {
! 	if (v->pmatch != pmatch && v->pmatch != mat)
! 	    FREE(v->pmatch);
! 	FreeVars(v);
! 	return REG_ESPACE;
      }
      for (i = 0; i < n; i++)
  	v->subdfas[i] = NULL;
  
      /*
       * Do it.
       */
--- 240,287 ----
      v->start = (chr *)string;
      v->stop = (chr *)string + len;
      v->err = 0;
+     v->subdfas = NULL;
+     v->ladfas = NULL;
+     v->lblastcss = NULL;
+     v->lblastcp = NULL;
+     /* below this point, "goto cleanup" will behave sanely */
+ 
      assert(v->g->ntree >= 0);
      n = (size_t) v->g->ntree;
!     if (n <= LOCALDFAS) {
  	v->subdfas = subdfas;
!     } else {
  	v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
! 	if (v->subdfas == NULL) {
! 	    st = REG_ESPACE;
! 	    goto cleanup;
! 	}
      }
      for (i = 0; i < n; i++)
  	v->subdfas[i] = NULL;
  
+     assert(v->g->nlacons >= 0);
+     n = (size_t) v->g->nlacons;
+     if (n > 0) {
+ 	v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
+ 	if (v->ladfas == NULL) {
+ 	    st = REG_ESPACE;
+ 	    goto cleanup;
+ 	}
+ 	for (i = 0; i < n; i++)
+ 	    v->ladfas[i] = NULL;
+ 	v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
+ 	v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
+ 	if (v->lblastcss == NULL || v->lblastcp == NULL) {
+ 	    st = REG_ESPACE;
+ 	    goto cleanup;
+ 	}
+ 	for (i = 0; i < n; i++) {
+ 	    v->lblastcss[i] = NULL;
+ 	    v->lblastcp[i] = NULL;
+ 	}
+     }
+ 
      /*
       * Do it.
       */
*************** exec(
*** 274,296 ****
      /*
       * Clean up.
       */
! 
      if (v->pmatch != pmatch && v->pmatch != mat) {
  	FREE(v->pmatch);
      }
!     n = (size_t) v->g->ntree;
!     for (i = 0; i < n; i++) {
! 	if (v->subdfas[i] != NULL)
! 	    freeDFA(v->subdfas[i]);
      }
!     if (v->subdfas != subdfas)
! 	FREE(v->subdfas);
      FreeVars(v);
      return st;
  }
  
  /*
!  - getsubdfa - create or re-fetch the DFA for a subre node
   * We only need to create the DFA once per overall regex execution.
   * The DFA will be freed by the cleanup step in exec().
   */
--- 306,342 ----
      /*
       * Clean up.
       */
! cleanup:
      if (v->pmatch != pmatch && v->pmatch != mat) {
  	FREE(v->pmatch);
      }
!     if (v->subdfas != NULL) {
! 	n = (size_t) v->g->ntree;
! 	for (i = 0; i < n; i++) {
! 	    if (v->subdfas[i] != NULL)
! 		freeDFA(v->subdfas[i]);
! 	}
! 	if (v->subdfas != subdfas)
! 	    FREE(v->subdfas);
      }
!     if (v->ladfas != NULL) {
! 	n = (size_t) v->g->nlacons;
! 	for (i = 0; i < n; i++) {
! 	    if (v->ladfas[i] != NULL)
! 		freeDFA(v->ladfas[i]);
! 	}
! 	FREE(v->ladfas);
!     }
!     if (v->lblastcss != NULL)
! 	FREE(v->lblastcss);
!     if (v->lblastcp != NULL)
! 	FREE(v->lblastcp);
      FreeVars(v);
      return st;
  }
  
  /*
!  - getsubdfa - create or re-fetch the DFA for a tree subre node
   * We only need to create the DFA once per overall regex execution.
   * The DFA will be freed by the cleanup step in exec().
   */
*************** getsubdfa(struct vars * v,
*** 307,312 ****
--- 353,377 ----
  }
  
  /*
+  - getladfa - create or re-fetch the DFA for a LACON subre node
+  * Same as above, but for LACONs.
+  */
+ static struct dfa *
+ getladfa(struct vars *v,
+ 	 int n)
+ {
+     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
+     if (v->ladfas[n] == NULL) {
+ 	struct subre *sub = &v->g->lacons[n];
+ 
+ 	v->ladfas[n] = newDFA(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
+ 	if (ISERR())
+ 	    return NULL;
+     }
+     return v->ladfas[n];
+ }
+ 
+ /*
   - simpleFind - find a match for the main NFA (no-complications case)
   ^ static int simpleFind(struct vars *, struct cnfa *, struct colormap *);
   */
diff -pcdr src/generic/regguts.h lookbehind/generic/regguts.h
*** src/generic/regguts.h	Tue Oct 27 14:39:17 2015
--- lookbehind/generic/regguts.h	Fri Nov  6 13:40:57 2015
***************
*** 96,108 ****
   */
  
  #define	NOTREACHED	0
- #define	xxx		1
  
  #define	DUPMAX	_POSIX2_RE_DUP_MAX
  #define	DUPINF	(DUPMAX+1)
  
  #define	REMAGIC	0xfed7		/* magic number for main struct */
  
  /*
   * debugging facilities
   */
--- 96,115 ----
   */
  
  #define	NOTREACHED	0
  
  #define	DUPMAX	_POSIX2_RE_DUP_MAX
  #define	DUPINF	(DUPMAX+1)
  
  #define	REMAGIC	0xfed7		/* magic number for main struct */
  
+ /* Type codes for lookaround constraints */
+ #define LATYPE_AHEAD_POS	03	/* positive lookahead */
+ #define LATYPE_AHEAD_NEG	02	/* negative lookahead */
+ #define LATYPE_BEHIND_POS	01	/* positive lookbehind */
+ #define LATYPE_BEHIND_NEG	00	/* negative lookbehind */
+ #define LATYPE_IS_POS(la)	((la) & 01)
+ #define LATYPE_IS_AHEAD(la) ((la) & 02)
+ 
  /*
   * debugging facilities
   */
*************** struct nfa {
*** 311,317 ****
   *
   * The non-dummy carc structs are of two types: plain arcs and LACON arcs.
   * Plain arcs just store the transition color number as "co".  LACON arcs
!  * store the lookahead constraint number plus cnfa.ncolors as "co".  LACON
   * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
   */
  
--- 318,324 ----
   *
   * The non-dummy carc structs are of two types: plain arcs and LACON arcs.
   * Plain arcs just store the transition color number as "co".  LACON arcs
!  * store the lookaround constraint number plus cnfa.ncolors as "co".  LACON
   * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
   */
  
*************** struct cnfa {
*** 324,330 ****
      int nstates;		/* number of states */
      int ncolors;		/* number of colors */
      int flags;
! #define	HASLACONS	01	/* uses lookahead constraints */
      int pre;			/* setup state number */
      int post;			/* teardown state number */
      color bos[2];		/* colors, if any, assigned to BOS and BOL */
--- 331,337 ----
      int nstates;		/* number of states */
      int ncolors;		/* number of colors */
      int flags;
! #define	HASLACONS	01	/* uses lookaround constraints */
      int pre;			/* setup state number */
      int post;			/* teardown state number */
      color bos[2];		/* colors, if any, assigned to BOS and BOL */
*************** struct subre {
*** 391,397 ****
  #define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
  #define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
      short id;			/* ID of subre (1..ntree-1) */
!     int subno;			/* subexpression number (for 'b' and '(') */
      short min;			/* min repetitions for iteration or backref */
      short max;			/* max repetitions for iteration or backref */
      struct subre *left;		/* left child, if any (also freelist chain) */
--- 398,405 ----
  #define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
  #define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
      short id;			/* ID of subre (1..ntree-1) */
!     int subno;			/* subexpression number for 'b' and '(', or
! 				 * LATYPE code for lookaround constraint */
      short min;			/* min repetitions for iteration or backref */
      short max;			/* max repetitions for iteration or backref */
      struct subre *left;		/* left child, if any (also freelist chain) */
*************** struct guts {
*** 426,433 ****
      int ntree;			/* number of subre's, plus one */
      struct colormap cmap;
      int FUNCPTR(compare, (const chr *, const chr *, size_t));
!     struct subre *lacons;	/* lookahead-constraint vector */
!     int nlacons;		/* size of lacons */
  };
  
  /*
--- 434,442 ----
      int ntree;			/* number of subre's, plus one */
      struct colormap cmap;
      int FUNCPTR(compare, (const chr *, const chr *, size_t));
!     struct subre *lacons;	/* lookaround-constraint vector */
!     int nlacons;		/* size of lacons[]; note that only slots
! 				 * numbered 1 .. nlacons-1 are used */
  };
  
  /*
diff -pcdr src/generic/tclRegexp.c lookbehind/generic/tclRegexp.c
*** src/generic/tclRegexp.c	Mon Sep 21 18:12:24 2015
--- lookbehind/generic/tclRegexp.c	Fri Nov  6 13:43:13 2015
*************** TclRegAbout(
*** 638,644 ****
      };
      static const struct infoname infonames[] = {
  	{REG_UBACKREF,		"REG_UBACKREF"},
! 	{REG_ULOOKAHEAD,	"REG_ULOOKAHEAD"},
  	{REG_UBOUNDS,		"REG_UBOUNDS"},
  	{REG_UBRACES,		"REG_UBRACES"},
  	{REG_UBSALNUM,		"REG_UBSALNUM"},
--- 638,644 ----
      };
      static const struct infoname infonames[] = {
  	{REG_UBACKREF,		"REG_UBACKREF"},
! 	{REG_ULOOKAROUND,	"REG_ULOOKAROUND"},
  	{REG_UBOUNDS,		"REG_UBOUNDS"},
  	{REG_UBRACES,		"REG_UBRACES"},
  	{REG_UBSALNUM,		"REG_UBSALNUM"},
diff -pcdr src/tests/reg.test lookbehind/tests/reg.test
*** src/tests/reg.test	Tue Oct 27 14:39:17 2015
--- lookbehind/tests/reg.test	Fri Nov  6 14:08:51 2015
*************** namespace eval RETest {
*** 111,117 ****
  	A REG_UBSALNUM
  	B REG_UBRACES
  	E REG_UBBS
! 	H REG_ULOOKAHEAD
  	I REG_UIMPOSSIBLE
  	L REG_ULOCALE
  	M REG_UUNPORT
--- 111,117 ----
  	A REG_UBSALNUM
  	B REG_UBRACES
  	E REG_UBBS
! 	H REG_ULOOKAROUND
  	I REG_UIMPOSSIBLE
  	L REG_ULOCALE
  	M REG_UUNPORT
*************** expectMatch	22.21 &+L	{a[^b]}		ach	ach
*** 831,837 ****
  expectNomatch	22.22 &+L	{a[^b]}		abe
  
  
! doing 23 "lookahead constraints"
  expectMatch	23.1 HP		a(?=b)b*	ab	ab
  expectNomatch	23.2 HP		a(?=b)b*	a
  expectMatch	23.3 HP		a(?=b)b*(?=c)c*	abc	abc
--- 831,838 ----
  expectNomatch	22.22 &+L	{a[^b]}		abe
  
  
! doing 23 "lookaround constraints"
! # lookahead
  expectMatch	23.1 HP		a(?=b)b*	ab	ab
  expectNomatch	23.2 HP		a(?=b)b*	a
  expectMatch	23.3 HP		a(?=b)b*(?=c)c*	abc	abc
*************** expectNomatch	23.5 HP		a(?!b)b*	ab
*** 840,845 ****
--- 841,869 ----
  expectMatch	23.6 HP		a(?!b)b*	a	a
  expectMatch	23.7 HP		(?=b)b		b	b
  expectNomatch	23.8 HP		(?=b)b		a
+ # lookbehind
+ expectMatch	23.9 HNP	(?<=a)b*	abb	bb
+ expectMatch	23.10 HP	a(?<=a)b*	a	a
+ expectMatch	23.11 HP	a(?<=a)b*(?<=b)c*	abc	abc
+ expectMatch	23.12 HP	a(?<=a)b*(?<=b)c*	ab	ab
+ expectMatch	23.13 HNP	a*(?<!a)b*	ab	{}
+ expectNomatch	23.14 HP	a*(?<!a)b+	ab
+ expectMatch	23.15 HP	a*(?<!a)b+	b	b
+ expectNomatch	23.16 HIP	a(?<!a)b*	a
+ expectNomatch	23.17 HP	(?<=b)b	b
+ expectNomatch	23.18 HP	(?<=f)b+	foobar
+ expectMatch	23.19 HP	(?<=foo)b+	foobar	b
+ expectMatch	23.20 HP	(?<=oo)b+	foobar	b
+ # Test optimization of single-chr-or-bracket-expression lookaround constraints
+ expectNomatch	23.21 HP	{x(?=[xy])}	xz
+ expectMatch	23.22 HP	{x(?=[xy])}	xy	x
+ expectMatch	23.23 HP	{x(?![xy])}	xz	x
+ expectNomatch	23.24 HP	{x(?![xy])}	xy
+ expectMatch	23.25 HP	{x(?![xy])}	x	x
+ expectMatch	23.26 HP	{(?<=[xy])yy+}	xyy	yy
+ expectNomatch	23.27 HP	{(?<=[xy])yy+}	zyy
+ expectNomatch	23.28 HP	{(?<![xy])yy+}	xyy
+ expectMatch	23.29 HP	{(?<![xy])yy+}	zyy	yy
  
  
  doing 24 "non-greedy quantifiers"