Index: doc/FileSystem.3
==================================================================
--- doc/FileSystem.3
+++ doc/FileSystem.3
@@ -1743,11 +1743,11 @@
 .TP
 \fIlinkProc\fR
 .
 If \fItoPtr\fR is NULL, this should return a value with reference count 1 that
 has just been allocated and passed to \fBTcl_IncrRefCount\fR. If \fItoPtr\fR
-is not NULL, it should be returned on success. 
+is not NULL, it should be returned on success.
 .TP
 \fIlistVolumesProc\fR
 .
 The result value should be a list (if non-NULL); it will have its reference
 count decremented once (with \fBTcl_DecrRefCount\fR) by Tcl once done.

Index: doc/NRE.3
==================================================================
--- doc/NRE.3
+++ doc/NRE.3
@@ -243,11 +243,11 @@
 .PP
 Use \fBTcl_NRAddCallback\fR to schedule any required final decrementing of the
 reference counts of arguments to any of the other functions on this page, as
 with any other post-processing step in the non-recursive execution engine.
 .PP
-The 
+The
 .SH "SEE ALSO"
 Tcl_CreateCommand(3), Tcl_CreateObjCommand(3), Tcl_EvalObjEx(3), Tcl_GetCommandFromObj(3), Tcl_ExprObj(3)
 .SH KEYWORDS
 stackless, nonrecursive, execute, command, global, value, result, script
 .SH COPYRIGHT

Index: doc/UniCharIsAlpha.3
==================================================================
--- doc/UniCharIsAlpha.3
+++ doc/UniCharIsAlpha.3
@@ -6,11 +6,11 @@
 '\"
 .TH Tcl_UniCharIsAlpha 3 "8.1" Tcl "Tcl Library Procedures"
 .so man.macros
 .BS
 .SH NAME
-Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
+Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsUnicode, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
 .SH SYNOPSIS
 .nf
 \fB#include <tcl.h>\fR
 .sp
 int
@@ -41,10 +41,13 @@
 \fBTcl_UniCharIsSpace\fR(\fIch\fR)
 .sp
 int
 \fBTcl_UniCharIsUpper\fR(\fIch\fR)
 .sp
+int
+\fBTcl_UniCharIsUnicode\fR(\fIch\fR)
+.sp
 int
 \fBTcl_UniCharIsWordChar\fR(\fIch\fR)
 .SH ARGUMENTS
 .AS int ch
 .AP int ch in
@@ -78,11 +81,14 @@
 \fBTcl_UniCharIsPunct\fR tests if the character is a Unicode punctuation character.
 .PP
 \fBTcl_UniCharIsSpace\fR tests if the character is a whitespace Unicode character.
 .PP
 \fBTcl_UniCharIsUpper\fR tests if the character is an uppercase Unicode character.
+.PP
+\fBTcl_UniCharIsUnicode\fR tests if the character is a Unicode character, not being
+a surrogate or noncharacter.
 .PP
 \fBTcl_UniCharIsWordChar\fR tests if the character is alphanumeric or
 a connector punctuation mark.
 
 .SH KEYWORDS
 unicode, classification

Index: doc/string.n
==================================================================
--- doc/string.n
+++ doc/string.n
@@ -411,14 +411,14 @@
 ever be used by Tcl's implementation, the number of bytes used to
 store the representation is of very low value (except to C extension
 code, which has direct access for the purpose of memory management,
 etc.)
 .PP
-\fICompatibility note:\fR it is likely that this subcommand will be
-withdrawn in a future version of Tcl. It is better to use the
-\fBencoding convertto\fR command to convert a string to a known
-encoding and then apply \fBstring length\fR to that.
+\fICompatibility note:\fR This subcommand is deprecated and will
+be removed in Tcl 9.0. It is better to use the \fBencoding convertto\fR
+command to convert a string to a known encoding (e.g. "utf-8" or "cesu-8")
+and then apply \fBstring length\fR to that.
 .PP
 .CS
 \fBstring length\fR [encoding convertto utf-8 $theString]
 .CE
 .RE

Index: generic/tcl.decls
==================================================================
--- generic/tcl.decls
+++ generic/tcl.decls
@@ -2421,10 +2421,13 @@
     const char *Tcl_UtfNext(const char *src)
 }
 declare 656 {
     const char *Tcl_UtfPrev(const char *src, const char *start)
 }
+declare 657 {
+    int Tcl_UniCharIsUnicode(int ch)
+}
 
 # ----- BASELINE -- FOR -- 8.7.0 ----- #
 
 ##############################################################################
 

Index: generic/tclCmdMZ.c
==================================================================
--- generic/tclCmdMZ.c
+++ generic/tclCmdMZ.c
@@ -1531,20 +1531,20 @@
     static const char *const isClasses[] = {
 	"alnum",	"alpha",	"ascii",	"control",
 	"boolean",	"dict",		"digit",	"double",
 	"entier",	"false",	"graph",	"integer",
 	"list",		"lower",	"print",	"punct",
-	"space",	"true",		"upper",	"wideinteger",
-	"wordchar",	"xdigit",	NULL
+	"space",	"true",		"upper",	"unicode",
+	"wideinteger", "wordchar",	"xdigit",	NULL
     };
     enum isClassesEnum {
 	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
 	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
 	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
 	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
-	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_WIDE,
-	STR_IS_WORD,	STR_IS_XDIGIT
+	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_UNICODE,
+	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
     };
     static const char *const isOptions[] = {
 	"-strict", "-failindex", NULL
     };
     enum isOptionsEnum {
@@ -1870,10 +1870,13 @@
 	chcomp = Tcl_UniCharIsSpace;
 	break;
     case STR_IS_UPPER:
 	chcomp = Tcl_UniCharIsUpper;
 	break;
+    case STR_IS_UNICODE:
+	chcomp = Tcl_UniCharIsUnicode;
+	break;
     case STR_IS_WORD:
 	chcomp = Tcl_UniCharIsWordChar;
 	break;
     case STR_IS_XDIGIT:
 	chcomp = UniCharIsHexDigit;
@@ -2830,10 +2833,11 @@
  * Side effects:
  *	See the user documentation.
  *
  *----------------------------------------------------------------------
  */
+#if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED)
 static int
 StringBytesCmd(
     TCL_UNUSED(ClientData),
     Tcl_Interp *interp,		/* Current interpreter. */
     int objc,			/* Number of arguments. */
@@ -2848,10 +2852,11 @@
 
     (void) TclGetStringFromObj(objv[1], &length);
     Tcl_SetObjResult(interp, Tcl_NewWideIntObj(length));
     return TCL_OK;
 }
+#endif
 
 /*
  *----------------------------------------------------------------------
  *
  * StringLenCmd --
@@ -3304,11 +3309,13 @@
 Tcl_Command
 TclInitStringCmd(
     Tcl_Interp *interp)		/* Current interpreter. */
 {
     static const EnsembleImplMap stringImplMap[] = {
+#if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED)
 	{"bytelength",	StringBytesCmd,	TclCompileBasic1ArgCmd, NULL, NULL, 0},
+#endif
 	{"cat",		StringCatCmd,	TclCompileStringCatCmd, NULL, NULL, 0},
 	{"compare",	StringCmpCmd,	TclCompileStringCmpCmd, NULL, NULL, 0},
 	{"equal",	StringEqualCmd,	TclCompileStringEqualCmd, NULL, NULL, 0},
 	{"first",	StringFirstCmd,	TclCompileStringFirstCmd, NULL, NULL, 0},
 	{"index",	StringIndexCmd,	TclCompileStringIndexCmd, NULL, NULL, 0},

Index: generic/tclCompCmdsSZ.c
==================================================================
--- generic/tclCompCmdsSZ.c
+++ generic/tclCompCmdsSZ.c
@@ -503,23 +503,23 @@
 {
     DefineLineInformation;	/* TIP #280 */
     Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr);
     static const char *const isClasses[] = {
 	"alnum",	"alpha",	"ascii",	"control",
-	"boolean",	"dict", "digit",	"double",	"entier",
-	"false",	"graph",	"integer",	"list",
-	"lower",	"print",	"punct",	"space",
-	"true",		"upper",	"wideinteger",	"wordchar",
-	"xdigit",	NULL
+	"boolean",	"dict",		"digit",	"double",
+	"entier",	"false",	"graph",	"integer",
+	"list",		"lower",	"print",	"punct",
+	"space",	"true",		"upper",	"unicode",
+	"wideinteger", "wordchar",	"xdigit",	NULL
     };
     enum isClassesEnum {
 	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
-	STR_IS_BOOL,	STR_IS_DICT, STR_IS_DIGIT,	STR_IS_DOUBLE,	STR_IS_ENTIER,
-	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,	STR_IS_LIST,
-	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,	STR_IS_SPACE,
-	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_WIDE,	STR_IS_WORD,
-	STR_IS_XDIGIT
+	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
+	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
+	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
+	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_UNICODE,
+	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
     };
     int t, range, allowEmpty = 0, end;
     InstStringClassType strClassType;
     Tcl_Obj *isClass;
 
@@ -607,10 +607,13 @@
 	strClassType = STR_CLASS_SPACE;
 	goto compileStrClass;
     case STR_IS_UPPER:
 	strClassType = STR_CLASS_UPPER;
 	goto compileStrClass;
+    case STR_IS_UNICODE:
+	strClassType = STR_CLASS_UNICODE;
+	goto compileStrClass;
     case STR_IS_WORD:
 	strClassType = STR_CLASS_WORD;
 	goto compileStrClass;
     case STR_IS_XDIGIT:
 	strClassType = STR_CLASS_XDIGIT;
@@ -1413,10 +1416,11 @@
     {"punct",	Tcl_UniCharIsPunct},
     {"space",	Tcl_UniCharIsSpace},
     {"upper",	Tcl_UniCharIsUpper},
     {"word",	Tcl_UniCharIsWordChar},
     {"xdigit",	UniCharIsHexDigit},
+    {"unicode",	Tcl_UniCharIsUnicode},
     {"",	NULL}
 };
 
 /*
  *----------------------------------------------------------------------

Index: generic/tclCompile.h
==================================================================
--- generic/tclCompile.h
+++ generic/tclCompile.h
@@ -920,12 +920,13 @@
     STR_CLASS_PUNCT,		/* Unicode punctuation characters. */
     STR_CLASS_SPACE,		/* Unicode space characters. */
     STR_CLASS_UPPER,		/* Unicode upper-case alphabet characters. */
     STR_CLASS_WORD,		/* Unicode word (alphabetic, digit, connector
 				 * punctuation) characters. */
-    STR_CLASS_XDIGIT		/* Characters that can be used as digits in
+    STR_CLASS_XDIGIT,		/* Characters that can be used as digits in
 				 * hexadecimal numbers ([0-9A-Fa-f]). */
+    STR_CLASS_UNICODE		/* Unicode characters. */
 } InstStringClassType;
 
 typedef struct StringClassDesc {
     char name[8];		/* Name of the class. */
     int (*comparator)(int);	/* Function to test if a single unicode

Index: generic/tclDecls.h
==================================================================
--- generic/tclDecls.h
+++ generic/tclDecls.h
@@ -1935,10 +1935,12 @@
 EXTERN int		Tcl_UtfCharComplete(const char *src, int length);
 /* 655 */
 EXTERN const char *	Tcl_UtfNext(const char *src);
 /* 656 */
 EXTERN const char *	Tcl_UtfPrev(const char *src, const char *start);
+/* 657 */
+EXTERN int		Tcl_UniCharIsUnicode(int ch);
 
 typedef struct {
     const struct TclPlatStubs *tclPlatStubs;
     const struct TclIntStubs *tclIntStubs;
     const struct TclIntPlatStubs *tclIntPlatStubs;
@@ -2627,10 +2629,11 @@
     Tcl_UniChar * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */
     unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */
     int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */
     const char * (*tcl_UtfNext) (const char *src); /* 655 */
     const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
+    int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
 } TclStubs;
 
 extern const TclStubs *tclStubsPtr;
 
 #ifdef __cplusplus
@@ -3969,10 +3972,12 @@
 	(tclStubsPtr->tcl_UtfCharComplete) /* 654 */
 #define Tcl_UtfNext \
 	(tclStubsPtr->tcl_UtfNext) /* 655 */
 #define Tcl_UtfPrev \
 	(tclStubsPtr->tcl_UtfPrev) /* 656 */
+#define Tcl_UniCharIsUnicode \
+	(tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */
 
 #endif /* defined(USE_TCL_STUBS) */
 
 /* !END!: Do not edit above this line. */
 

Index: generic/tclEncoding.c
==================================================================
--- generic/tclEncoding.c
+++ generic/tclEncoding.c
@@ -508,15 +508,16 @@
  *	Depends on the memory, object, and IO subsystems.
  *
  *---------------------------------------------------------------------------
  */
 
-/* This flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
+/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
+/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and
+ * TCL_ENCODING_LE is only used for  utf-16/ucs-2. re-use the same value */
 #define TCL_ENCODING_MODIFIED	0x20	/* Converting NULL bytes to 0xC0 0x80 */
-/* Since TCL_ENCODING_MODIFIED is only used for utf-8 and
- * TCL_ENCODING_LE is only used for  utf-16/ucs-2, re-use the same value */
 #define TCL_ENCODING_LE		TCL_ENCODING_MODIFIED	/* Little-endian encoding */
+#define TCL_ENCODING_UTF	0x200	/* For UTF-8 encoding, allow 4-byte output sequences */
 
 void
 TclInitEncodingSubsystem(void)
 {
     Tcl_EncodingType type;
@@ -554,11 +555,14 @@
     type.encodingName	= "utf-8";
     type.toUtfProc	= UtfToUtfProc;
     type.fromUtfProc	= UtfToUtfProc;
     type.freeProc	= NULL;
     type.nullSize	= 1;
-    type.clientData	= NULL;
+    type.clientData	= INT2PTR(TCL_ENCODING_UTF);
+    Tcl_CreateEncoding(&type);
+    type.clientData	= INT2PTR(0);
+    type.encodingName	= "cesu-8";
     Tcl_CreateEncoding(&type);
 
     type.toUtfProc	= Utf16ToUtfProc;
     type.fromUtfProc    = UtfToUcs2Proc;
     type.freeProc	= NULL;
@@ -1139,11 +1143,11 @@
 	srcLen = encodingPtr->lengthProc(src);
     }
 
     flags = TCL_ENCODING_START | TCL_ENCODING_END;
     if (encodingPtr->toUtfProc == UtfToUtfProc) {
-	flags |= TCL_ENCODING_MODIFIED;
+	flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
     }
 
     while (1) {
 	result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
 		flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
@@ -1256,11 +1260,11 @@
 	 */
 
 	dstLen--;
     }
     if (encodingPtr->toUtfProc == UtfToUtfProc) {
-	flags |= TCL_ENCODING_MODIFIED;
+	flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
     }
     do {
 	Tcl_EncodingState savedState = *statePtr;
 
 	result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
@@ -1335,20 +1339,20 @@
 	result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
 		srcLen, flags, &state, dst, dstLen,
 		&srcRead, &dstWrote, &dstChars);
 	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
 
+	src += srcRead;
 	if (result != TCL_CONVERT_NOSPACE) {
 	    if (encodingPtr->nullSize == 2) {
 		Tcl_DStringSetLength(dstPtr, soFar + 1);
 	    }
 	    Tcl_DStringSetLength(dstPtr, soFar);
 	    return Tcl_DStringValue(dstPtr);
 	}
 
 	flags &= ~TCL_ENCODING_START;
-	src += srcRead;
 	srcLen -= srcRead;
 	if (Tcl_DStringLength(dstPtr) == 0) {
 	    Tcl_DStringSetLength(dstPtr, dstLen);
 	}
 	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
@@ -2213,11 +2217,11 @@
 	charLimit = *dstCharsPtr;
     }
 
     dstStart = dst;
     flags |= PTR2INT(clientData);
-    dstEnd = dst + dstLen - TCL_UTF_MAX;
+    dstEnd = dst + dstLen - ((flags & TCL_ENCODING_UTF) ? TCL_UTF_MAX : 6);
 
     for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
 	    /*
 	     * If there is more string to follow, this will ensure that the
@@ -2266,34 +2270,63 @@
 		TclUtfToUCS4(chbuf, &ch);
 	    }
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	} else {
 	    int low;
+	    const char *saveSrc = src;
 	    size_t len = TclUtfToUCS4(src, &ch);
 	    if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)
 		    && (flags & TCL_ENCODING_MODIFIED)) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
 	    }
 	    src += len;
-	    if ((ch | 0x7FF) == 0xDFFF) {
+	    if (!(flags & TCL_ENCODING_UTF)) {
+		if (ch > 0xFFFF) {
+		    /* CESU-8 6-byte sequence for chars > U+FFFF */
+		    ch -= 0x10000;
+		    *dst++ = 0xED;
+		    *dst++ = (char) (((ch >> 16) & 0x0F) | 0xA0);
+		    *dst++ = (char) (((ch >> 10) & 0x3F) | 0x80);
+		    ch = (ch & 0x0CFF) | 0xDC00;
+		}
+		goto cesu8;
+	    } else if ((ch | 0x7FF) == 0xDFFF) {
 		/*
 		 * A surrogate character is detected, handle especially.
 		 */
 
 		low = ch;
 		len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
 
 		if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
+		    if (flags & TCL_ENCODING_STOPONERROR) {
+			result = TCL_CONVERT_UNKNOWN;
+			src = saveSrc;
+			break;
+		    }
+		    if (!(flags & TCL_ENCODING_MODIFIED)) {
+			ch = 0xFFFD;
+		    }
+		cesu8:
 		    *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
 		    *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF);
 		    *dst++ = (char) ((ch | 0x80) & 0xBF);
 		    continue;
 		}
 		src += len;
 		dst += Tcl_UniCharToUtf(ch, dst);
 		ch = low;
+	    } else if (!Tcl_UniCharIsUnicode(ch)) {
+		if (flags & TCL_ENCODING_STOPONERROR) {
+		    result = TCL_CONVERT_UNKNOWN;
+		    src = saveSrc;
+		    break;
+		}
+		if (!(flags & TCL_ENCODING_MODIFIED)) {
+		    ch = 0xFFFD;
+		}
 	    }
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
     }
 
@@ -2448,11 +2481,11 @@
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    int ch;
+    int ch, len;
 
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
     if ((flags & TCL_ENCODING_END) == 0) {
@@ -2476,11 +2509,19 @@
 	}
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	src += TclUtfToUCS4(src, &ch);
+	len = TclUtfToUCS4(src, &ch);
+	if (!Tcl_UniCharIsUnicode(ch)) {
+	    if (flags & TCL_ENCODING_STOPONERROR) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    }
+	    ch = 0xFFFD;
+	}
+	src += len;
 	if (flags & TCL_ENCODING_LE) {
 	    if (ch <= 0xFFFF) {
 		*dst++ = (ch & 0xFF);
 		*dst++ = (ch >> 8);
 	    } else {

Index: generic/tclStubInit.c
==================================================================
--- generic/tclStubInit.c
+++ generic/tclStubInit.c
@@ -1939,8 +1939,9 @@
     TclGetUnicodeFromObj, /* 652 */
     TclGetByteArrayFromObj, /* 653 */
     Tcl_UtfCharComplete, /* 654 */
     Tcl_UtfNext, /* 655 */
     Tcl_UtfPrev, /* 656 */
+    Tcl_UniCharIsUnicode, /* 657 */
 };
 
 /* !END!: Do not edit above this line. */

Index: generic/tclUtf.c
==================================================================
--- generic/tclUtf.c
+++ generic/tclUtf.c
@@ -2177,10 +2177,40 @@
 	return 0;
     }
     return (GetCategory(ch) == UPPERCASE_LETTER);
 }
 
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharIsUnicode --
+ *
+ *	Test if a character is a Unicode character.
+ *
+ * Results:
+ *	Returns non-zero if character belongs to the Unicode set.
+ *
+ *	Excluded are:
+ *	  1) All characters > U+10FFFF
+ *	  2) Surrogates U+D800 - U+DFFF
+ *	  3) Last 2 characters of each plane, so U+??FFFE  and U+??FFFF
+ *	  4) The characters in the range U+FDD0 - U+FDEF
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsUnicode(
+    int ch)			/* Unicode character to test. */
+{
+    return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800)
+	    && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32);
+}
+
 /*
  *----------------------------------------------------------------------
  *
  * Tcl_UniCharIsWordChar --
  *

Index: library/init.tcl
==================================================================
--- library/init.tcl
+++ library/init.tcl
@@ -212,13 +212,13 @@
 		# construct the stack trace.
 		#
 		set errInfo [dict get $opts -errorinfo]
 		set errCode [dict get $opts -errorcode]
 		set cinfo $args
-		if {[string bytelength $cinfo] > 150} {
+		if {[string length [encoding convertto utf-8 $cinfo]] > 150} {
 		    set cinfo [string range $cinfo 0 150]
-		    while {[string bytelength $cinfo] > 150} {
+		    while {[string length [encoding convertto utf-8 $cinfo]] > 150} {
 			set cinfo [string range $cinfo 0 end-1]
 		    }
 		    append cinfo ...
 		}
 		set tail "\n    (\"uplevel\" body line 1)\n    invoked\

Index: tests/encoding.test
==================================================================
--- tests/encoding.test
+++ tests/encoding.test
@@ -341,65 +341,65 @@
 test encoding-15.6 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83D\uDE02\uD83D
     set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
     binary scan $y H* z
     list [string length $y] $z
-} {10 edb882f09f9882eda0bd}
+} {10 efbfbdf09f9882efbfbd}
 test encoding-15.7 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83D\uD83D
     set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {3 9 edb882eda0bdeda0bd}
+} {3 9 efbfbdefbfbdefbfbd}
 test encoding-15.8 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83Dé
     set y [encoding convertto utf-8 \uDE02\uD83Dé]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {3 8 edb882eda0bdc3a9}
+} {3 8 efbfbdefbfbdc3a9}
 test encoding-15.9 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83DX
     set y [encoding convertto utf-8 \uDE02\uD83DX]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {3 7 edb882eda0bd58}
+} {3 7 efbfbdefbfbd58}
 test encoding-15.10 {UtfToUtfProc high surrogate character output} {
     set x \uDE02é
     set y [encoding convertto utf-8 \uDE02é]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 5 edb882c3a9}
+} {2 5 efbfbdc3a9}
 test encoding-15.11 {UtfToUtfProc low surrogate character output} {
     set x \uDA02é
     set y [encoding convertto utf-8 \uDA02é]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 5 eda882c3a9}
+} {2 5 efbfbdc3a9}
 test encoding-15.12 {UtfToUtfProc high surrogate character output} {
     set x \uDE02Y
     set y [encoding convertto utf-8 \uDE02Y]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 4 edb88259}
+} {2 4 efbfbd59}
 test encoding-15.13 {UtfToUtfProc low surrogate character output} {
     set x \uDA02Y
     set y [encoding convertto utf-8 \uDA02Y]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 4 eda88259}
+} {2 4 efbfbd59}
 test encoding-15.14 {UtfToUtfProc high surrogate character output} {
     set x \uDE02
     set y [encoding convertto utf-8 \uDE02]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {1 3 edb882}
+} {1 3 efbfbd}
 test encoding-15.15 {UtfToUtfProc low surrogate character output} {
     set x \uDA02
     set y [encoding convertto utf-8 \uDA02]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {1 3 eda882}
+} {1 3 efbfbd}
 test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
     set x \xF0\xA0\xA1\xC2
     set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
     list [string length $x] $y
 } "4 \xF0\xA0\xA1\xC2"
@@ -407,10 +407,30 @@
     set x 😂
     set y [encoding convertto utf-8 😂]
     binary scan $y H* z
     list [string length $y] $z
 } {4 f09f9882}
+test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} {
+    set y [encoding convertto cesu-8 \U10000]
+    binary scan $y H* z
+    list [string length $y] $z
+} {6 eda080edb080}
+test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} {
+    set y [encoding convertto cesu-8 \uD800]
+    binary scan $y H* z
+    list [string length $y] $z
+} {3 eda080}
+test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} {
+    set y [encoding convertto cesu-8 \uDC00]
+    binary scan $y H* z
+    list [string length $y] $z
+} {3 edb080}
+test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} {
+    set y [encoding convertto cesu-8 \uFFFF]
+    binary scan $y H* z
+    list [string length $y] $z
+} {3 efbfbf}
 
 test encoding-16.1 {Utf16ToUtfProc} -body {
     set val [encoding convertfrom utf-16 NN]
     list $val [format %x [scan $val %c]]
 } -result "乎 4e4e"
@@ -432,19 +452,19 @@
 } -result "\U460DC 460dc"
 
 test encoding-17.1 {UtfToUtf16Proc} -body {
     encoding convertto utf-16 "\U460DC"
 } -result "\xD8\xD8\xDC\xDC"
-test encoding-17.2 {UtfToUtf16Proc} -body {
-    encoding convertto utf-16 "\uDCDC"
-} -result "\xDC\xDC"
-test encoding-17.3 {UtfToUtf16Proc} -body {
-    encoding convertto utf-16 "\uD8D8"
-} -result "\xD8\xD8"
-test encoding-17.4 {UtfToUcs2Proc} -body {
+test encoding-17.2 {UtfToUcs2Proc} -body {
     encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
 } -result "\uFFFD"
+test encoding-17.3 {UtfToUtf16Proc} -body {
+    encoding convertto utf-16be "\uDCDC"
+} -result "\xFF\xFD"
+test encoding-17.4 {UtfToUtf16Proc} -body {
+    encoding convertto utf-16le "\uD8D8"
+} -result "\xFD\xFF"
 
 test encoding-18.1 {TableToUtfProc} {
 } {}
 
 test encoding-19.1 {TableFromUtfProc} {
@@ -740,11 +760,11 @@
 		# discard the cached internal representation of Tcl_Encoding
 		# Unfortunately, without this, encoding 2-1 fails.
 		llength $name
 	}
 	return $count
-} -result [expr {[info exists ::tcl_precision] ? 86 : 85}]
+} -result [expr {[info exists ::tcl_precision] ? 87 : 86}]
 
 runtests
 
 }
 

Index: tests/info.test
==================================================================
--- tests/info.test
+++ tests/info.test
@@ -20,11 +20,11 @@
     namespace import -force ::tcltest::*
 }
 ::tcltest::loadTestedCommands
 catch [list package require -exact tcl::test [info patchlevel]]
 testConstraint zlib [llength [info commands zlib]]
-
+testConstraint nodep [info exists tcl_precision]
 # Set up namespaces needed to test operation of "info args", "info body",
 # "info default", and "info procs" with imported procedures.
 
 catch {namespace delete test_ns_info1 test_ns_info2}
 
@@ -99,11 +99,11 @@
     eval [info body foo]
 } -returnCodes error -result {can't read "args": no such variable}
 # Fix for problem tested for in info-2.5 caused problems when
 # procedure body had no string rep (i.e. was not yet bytecode)
 # causing an empty string to be returned [Bug #545644]
-test info-2.6 {info body option, returning list bodies} {
+test info-2.6 {info body option, returning list bodies} nodep {
     proc foo args [list subst bar]
     list [string bytelength [info body foo]] \
 	    [foo; string bytelength [info body foo]]
 } {9 9}
 

Index: tests/io.test
==================================================================
--- tests/io.test
+++ tests/io.test
@@ -2414,11 +2414,11 @@
 	set chan [chan create r {apply {{cmd chan args} {
 	    switch $cmd {
 		blocking - finalize {
 		}
 		watch {
-		    chan postevent $chan read 
+		    chan postevent $chan read
 		}
 		initialize {
 		    list initialize finalize watch read write configure blocking
 		}
 		default {
@@ -2435,11 +2435,11 @@
 	    return
 	}
     } [namespace current]]]
     vwait [namespace current]::done
 return success
-} success 
+} success
 
 
 
 test io-29.1 {Tcl_WriteChars, channel not writable} {
     list [catch {puts stdin hello} msg] $msg

Index: tests/regexp.test
==================================================================
--- tests/regexp.test
+++ tests/regexp.test
@@ -17,10 +17,11 @@
 }
 
 unset -nocomplain foo
 
 testConstraint exec [llength [info commands exec]]
+testConstraint nodep [info exists tcl_precision]
 
 # Used for constraining memory leak tests
 testConstraint memory [llength [info commands memory]]
 if {[testConstraint memory]} {
     proc memtest script {
@@ -763,11 +764,11 @@
     regsub -all {@} {@hel@lo@} "\0a\0" result
     set expected "\0a\0hel\0a\0lo\0a\0"
     string equal $result $expected
 } 1
 
-test regexp-20.1 {regsub shared object shimmering} -body {
+test regexp-20.1 {regsub shared object shimmering} -constraints nodep -body {
     # Bug #461322
     set a abcdefghijklmnopqurstuvwxyz
     set b $a
     set c abcdefghijklmnopqurstuvwxyz0123456789
     regsub $a $c $b d

Index: tests/regexpComp.test
==================================================================
--- tests/regexpComp.test
+++ tests/regexpComp.test
@@ -13,10 +13,12 @@
 
 if {"::tcltest" ni [namespace children]} {
     package require tcltest 2.5
     namespace import -force ::tcltest::*
 }
+
+testConstraint nodep [info exists tcl_precision]
 
 # Procedure to evaluate a script within a proc, to test compilation
 # functionality
 
 proc evalInProc { script } {
@@ -789,11 +791,11 @@
 	regsub -all {@} {@hel@lo@} "\0a\0" result
 	list $result [string length $result]
     }
 } "\0a\0hel\0a\0lo\0a\0 14"
 
-test regexpComp-20.1 {regsub shared object shimmering} {
+test regexpComp-20.1 {regsub shared object shimmering} nodep {
     evalInProc {
 	# Bug #461322
 	set a abcdefghijklmnopqurstuvwxyz
 	set b $a
 	set c abcdefghijklmnopqurstuvwxyz0123456789

Index: tests/string.test
==================================================================
--- tests/string.test
+++ tests/string.test
@@ -31,10 +31,11 @@
 testConstraint testobj [expr {[info commands testobj] ne {}}]
 testConstraint testindexobj [expr {[info commands testindexobj] ne {}}]
 testConstraint testevalex [expr {[info commands testevalex] ne {}}]
 testConstraint utf16 [expr {[string length \U010000] == 2}]
 testConstraint testbytestring   [llength [info commands testbytestring]]
+testConstraint nodep [info exists tcl_precision]
 
 # Used for constraining memory leak tests
 testConstraint memory [llength [info commands memory]]
 if {[testConstraint memory]} {
     proc getbytes {} {
@@ -70,13 +71,13 @@
     interp alias {} run {} try
     set constraints {}
 }
 
 
-test string-1.1.$noComp {error conditions} {
+test string-1.1.$noComp {error conditions} -body {
     list [catch {run {string gorp a b}} msg] $msg
-} {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+} -match glob -result {1 {unknown or ambiguous subcommand "gorp": must be *cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
 test string-1.2.$noComp {error conditions} {
     list [catch {run {string}} msg] $msg
 } {1 {wrong # args: should be "string subcommand ?arg ...?"}}
 test stringComp-1.3.$noComp {error condition - undefined method during compile} {
     # We don't want this to complain about 'never' because it may never
@@ -523,14 +524,14 @@
 test string-6.4.$noComp {string is, too many args} {
     list [catch {run {string is alpha -failin var -strict str more}} msg] $msg
 } {1 {wrong # args: should be "string is class ?-strict? ?-failindex var? str"}}
 test string-6.5.$noComp {string is, class check} {
     list [catch {run {string is bogus str}} msg] $msg
-} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, wideinteger, wordchar, or xdigit}}
+} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
 test string-6.6.$noComp {string is, ambiguous class} {
     list [catch {run {string is al str}} msg] $msg
-} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, wideinteger, wordchar, or xdigit}}
+} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
 test string-6.7.$noComp {string is alpha, all ok} {
     run {string is alpha -strict -failindex var abc}
 } 1
 test string-6.8.$noComp {string is, error in var} {
     list [run {string is alpha -failindex var abc5def}] $var
@@ -959,10 +960,32 @@
     list [run {string is entier -fail var 0o1234561123412345612345656234561234561234561234561234561234561234561234561234561234536963}] $var
 } {0 87}
 test string-6.131.$noComp {string is entier, false on bad hex} {
     list [run {string is entier -fail var 0X12345611234123456123456562345612345612345612345612345612345612345612345612345612345345XYZ}] $var
 } {0 88}
+test string-6.132.$noComp {string is unicode} {
+    run {string is unicode \U10FFFD\uD7FF\uE000\uFDCF\uFDF0}
+} 1
+test string-6.133.$noComp {string is unicode, upper surrogate} {
+    run {string is unicode \uD800}
+} 0
+test string-6.134.$noComp {string is unicode, lower surrogate} {
+    run {string is unicode \uDFFF}
+} 0
+test string-6.135.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFFFE}
+} 0
+test string-6.136.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFFFF}
+} 0
+test string-6.137.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFDD0}
+} 0
+test string-6.138.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFDEF}
+} 0
+
 
 test string-7.1.$noComp {string last, not enough args} {
     list [catch {run {string last a}} msg] $msg
 } {1 {wrong # args: should be "string last needleString haystackString ?lastIndex?"}}
 test string-7.2.$noComp {string last, bad args} {
@@ -1011,20 +1034,20 @@
 } -1
 test string-7.16.$noComp {string last, start index} {
     run {string last Üa ÜadÜad end-1}
 } 3
 
-test string-8.1.$noComp {string bytelength} {
+test string-8.1.$noComp {string bytelength} nodep {
     list [catch {run {string bytelength}} msg] $msg
 } {1 {wrong # args: should be "string bytelength string"}}
-test string-8.2.$noComp {string bytelength} {
+test string-8.2.$noComp {string bytelength} nodep {
     list [catch {run {string bytelength a b}} msg] $msg
 } {1 {wrong # args: should be "string bytelength string"}}
-test string-8.3.$noComp {string bytelength} {
+test string-8.3.$noComp {string bytelength} nodep {
     run {string bytelength "\xC7"}
 } 2
-test string-8.4.$noComp {string bytelength} {
+test string-8.4.$noComp {string bytelength} nodep {
     run {string b ""}
 } 0
 
 test string-9.1.$noComp {string length} {
     list [catch {run {string length}} msg] $msg
@@ -1815,13 +1838,13 @@
 } \u1361ABC
 
 test string-20.1.$noComp {string trimright errors} {
     list [catch {run {string trimright}} msg] $msg
 } {1 {wrong # args: should be "string trimright string ?chars?"}}
-test string-20.2.$noComp {string trimright errors} {
+test string-20.2.$noComp {string trimright errors} -body {
     list [catch {run {string trimg a}} msg] $msg
-} {1 {unknown or ambiguous subcommand "trimg": must be bytelength, cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+} -match glob -result {1 {unknown or ambiguous subcommand "trimg": must be *cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
 test string-20.3.$noComp {string trimright} {
     run {string trimright "    XYZ      "}
 } {    XYZ}
 test string-20.4.$noComp {string trimright} {
     run {string trimright "   "}
@@ -1937,11 +1960,11 @@
     run {string trimright "\uD83D\uDE02Hello world!\uD83D\uDE02" \uD93D\uDE02}
 } "\uD83D\uDE02Hello world!\uD83D\uDE02"
 
 test string-22.1.$noComp {string wordstart} -body {
     list [catch {run {string word a}} msg] $msg
-} -result {1 {unknown or ambiguous subcommand "word": must be bytelength, cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+} -match glob -result {1 {unknown or ambiguous subcommand "word": must be *cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
 test string-22.2.$noComp {string wordstart} -body {
     list [catch {run {string wordstart a}} msg] $msg
 } -result {1 {wrong # args: should be "string wordstart string index"}}
 test string-22.3.$noComp {string wordstart} -body {
     list [catch {run {string wordstart a b c}} msg] $msg