Tcl Source Code

Check-in [a3e6750fec]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix [8663689908d3]: regexp \\w missing characters
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-6-branch
Files: files | file ages | folders
SHA1: a3e6750fec49a04b731629504f3b0dd4c515f225
User & Date: jan.nijtmans 2016-04-08 12:26:19
Context
2016-04-08
13:27
Fix [2538f373ffc78d6d]: crash in Tcl_OpenTcpServer() on Win... check-in: e40eb76900 user: jan.nijtmans tags: core-8-6-branch
12:28
merge 8.6 check-in: 0fdb5892a5 user: jan.nijtmans tags: trunk
12:26
Fix [8663689908d3]: regexp \\w missing characters check-in: a3e6750fec user: jan.nijtmans tags: core-8-6-branch
12:23
Fix [8663689908d3]: regexp \\w missing characters check-in: 4ca1a7770f user: jan.nijtmans tags: core-8-5-branch
2016-04-07
14:54
Tidy up the last commit. check-in: ce1aa70a2e user: dgp tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/regc_lex.c.

252
253
254
255
256
257
258













259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
    CHR(':'), CHR(']'), CHR(']')
};
static const chr brbacks[] = {	/* \s within brackets */
    CHR('['), CHR(':'),
    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
    CHR(':'), CHR(']')
};













static const chr backw[] = {	/* \w */
    CHR('['), CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static const chr backW[] = {	/* \W */
    CHR('['), CHR('^'), CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static const chr brbackw[] = {	/* \w within brackets */
    CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), CHR('_')
};

/*
 - lexword - interpolate a bracket expression for word characters
 * Possibly ought to inquire whether there is a "word" character class.
 ^ static void lexword(struct vars *);
 */







>
>
>
>
>
>
>
>
>
>
>
>
>



|




|




|







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
    CHR(':'), CHR(']'), CHR(']')
};
static const chr brbacks[] = {	/* \s within brackets */
    CHR('['), CHR(':'),
    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
    CHR(':'), CHR(']')
};

#define PUNCT_CONN \
	CHR('_'), \
	0x203f /* UNDERTIE */, \
	0x2040 /* CHARACTER TIE */,\
	0x2054 /* INVERTED UNDERTIE */,\
	0xfe33 /* PRESENTATION FORM FOR VERTICAL LOW LINE */, \
	0xfe34 /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */, \
	0xfe4d /* DASHED LOW LINE */, \
	0xfe4e /* CENTRELINE LOW LINE */, \
	0xfe4f /* WAVY LOW LINE */, \
	0xff3f /* FULLWIDTH LOW LINE */

static const chr backw[] = {	/* \w */
    CHR('['), CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), PUNCT_CONN, CHR(']')
};
static const chr backW[] = {	/* \W */
    CHR('['), CHR('^'), CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), PUNCT_CONN, CHR(']')
};
static const chr brbackw[] = {	/* \w within brackets */
    CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), PUNCT_CONN
};

/*
 - lexword - interpolate a bracket expression for word characters
 * Possibly ought to inquire whether there is a "word" character class.
 ^ static void lexword(struct vars *);
 */

Changes to tests/utf.test.

298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

test utf-21.1 {TclUniCharIsAlnum} {
    # this returns 1 with Unicode 7 compliance
    string is alnum \u1040\u021f\u0220
} {1}
test utf-21.2 {unicode alnum char in regc_locale.c} {
    # this returns 1 with Unicode 7 compliance
    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220]
} {1 1}
test utf-21.3 {unicode print char in regc_locale.c} {
    # this returns 1 with Unicode 7 compliance
    regexp {^[[:print:]]+$} \ufbc1
} 1
test utf-21.4 {TclUniCharIsGraph} {
    # [Bug 3464428]







|







298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

test utf-21.1 {TclUniCharIsAlnum} {
    # this returns 1 with Unicode 7 compliance
    string is alnum \u1040\u021f\u0220
} {1}
test utf-21.2 {unicode alnum char in regc_locale.c} {
    # this returns 1 with Unicode 7 compliance
    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f]
} {1 1}
test utf-21.3 {unicode print char in regc_locale.c} {
    # this returns 1 with Unicode 7 compliance
    regexp {^[[:print:]]+$} \ufbc1
} 1
test utf-21.4 {TclUniCharIsGraph} {
    # [Bug 3464428]