Index: generic/tkEntry.c ================================================================== --- generic/tkEntry.c +++ generic/tkEntry.c @@ -1922,23 +1922,23 @@ * If we're displaying a special character instead of the value of the * entry, recompute the displayString. */ if (entryPtr->showChar != NULL) { - Tcl_UniChar ch; - char buf[TCL_UTF_MAX]; + int ch; + char buf[6]; int size; /* * Normalize the special character so we can safely duplicate it in * the display string. If we didn't do this, then two malformed * characters might end up looking like one valid UTF character in the * resulting string. */ - Tcl_UtfToUniChar(entryPtr->showChar, &ch); - size = Tcl_UniCharToUtf(ch, buf); + TkUtfToUniChar(entryPtr->showChar, &ch); + size = TkUniCharToUtf(ch, buf); entryPtr->numDisplayBytes = entryPtr->numChars * size; p = ckalloc(entryPtr->numDisplayBytes + 1); entryPtr->displayString = p; @@ -3410,11 +3410,11 @@ { int spaceNeeded, cvtFlags; /* Used to substitute string as proper Tcl * list element. */ int number, length; register const char *string; - Tcl_UniChar ch; + int ch; char numStorage[2*TCL_INTEGER_SPACE]; while (1) { if (*before == '\0') { break; @@ -3443,11 +3443,11 @@ * There's a percent sequence here. Process it. */ before++; /* skip over % */ if (*before != '\0') { - before += Tcl_UtfToUniChar(before, &ch); + before += TkUtfToUniChar(before, &ch); } else { ch = '%'; } if (type == VALIDATE_BUTTON) { /* @@ -3463,11 +3463,11 @@ break; case 'W': /* widget name */ string = Tk_PathName(entryPtr->tkwin); break; default: - length = Tcl_UniCharToUtf(ch, numStorage); + length = TkUniCharToUtf(ch, numStorage); numStorage[length] = '\0'; string = numStorage; break; } } else { @@ -3523,11 +3523,11 @@ break; case 'W': /* widget name */ string = Tk_PathName(entryPtr->tkwin); break; default: - length = Tcl_UniCharToUtf(ch, numStorage); + length = TkUniCharToUtf(ch, numStorage); numStorage[length] = '\0'; string = numStorage; break; } } Index: generic/tkFont.c ================================================================== --- generic/tkFont.c +++ generic/tkFont.c @@ -495,11 +495,11 @@ case FONT_ACTUAL: { int skip, result, n; const char *s; Tk_Font tkfont; Tcl_Obj *optPtr, *charPtr, *resultPtr; - Tcl_UniChar uniChar = 0; + int uniChar = 0; const TkFontAttributes *faPtr; TkFontAttributes fa; /* * Params 0 and 1 are 'font actual'. Param 2 is the font name. 3-4 may @@ -560,21 +560,23 @@ /* * The 'charPtr' arg must be a single Unicode. */ if (charPtr != NULL) { - if (Tcl_GetCharLength(charPtr) != 1) { + const char *string = Tcl_GetString(charPtr); + int len = TkUtfToUniChar(string, &uniChar); + + if (len != charPtr->length) { resultPtr = Tcl_NewStringObj( "expected a single character but got \"", -1); - Tcl_AppendLimitedToObj(resultPtr, Tcl_GetString(charPtr), + Tcl_AppendLimitedToObj(resultPtr, string, -1, 40, "..."); Tcl_AppendToObj(resultPtr, "\"", -1); Tcl_SetObjResult(interp, resultPtr); Tcl_SetErrorCode(interp, "TK", "VALUE", "FONT_SAMPLE", NULL); return TCL_ERROR; } - uniChar = Tcl_GetUniChar(charPtr, 0); } /* * Find the font. */ @@ -1692,11 +1694,11 @@ } else if (strcasecmp(family, "ZapfChancery") == 0) { family = "ZapfChancery"; } else if (strcasecmp(family, "ZapfDingbats") == 0) { family = "ZapfDingbats"; } else { - Tcl_UniChar ch; + int ch; /* * Inline, capitalize the first letter of each word, lowercase the * rest of the letters in each word, and then take out the spaces * between the words. This may make the DString shorter, which is safe @@ -1710,18 +1712,18 @@ for (; *src != '\0'; ) { while (isspace(UCHAR(*src))) { /* INTL: ISO space */ src++; upper = 1; } - src += Tcl_UtfToUniChar(src, &ch); + src += TkUtfToUniChar(src, &ch); if (upper) { ch = Tcl_UniCharToUpper(ch); upper = 0; } else { ch = Tcl_UniCharToLower(ch); } - dest += Tcl_UniCharToUtf(ch, dest); + dest += TkUniCharToUtf(ch, dest); } *dest = '\0'; Tcl_DStringSetLength(dsPtr, dest - Tcl_DStringValue(dsPtr)); family = Tcl_DStringValue(dsPtr) + len; } @@ -3247,11 +3249,11 @@ int baseline = chunkPtr->y; Tcl_Obj *psObj = Tcl_NewObj(); int i, j, len; const char *p, *glyphname; char uindex[5], c, *ps; - Tcl_UniChar ch; + int ch; Tcl_AppendToObj(psObj, "[(", -1); for (i = 0; i < layoutPtr->numChunks; i++, chunkPtr++) { if (baseline != chunkPtr->y) { Tcl_AppendToObj(psObj, ")]\n[(", -1); @@ -3270,11 +3272,11 @@ * from the standard set defined by Adobe. The rest get punted. * Eventually this should be revised to handle more sophsticiated * international postscript fonts. */ - p += Tcl_UtfToUniChar(p, &ch); + p += TkUtfToUniChar(p, &ch); if ((ch == '(') || (ch == ')') || (ch == '\\') || (ch < 0x20)) { /* * Tricky point: the "03" is necessary in the sprintf below, * so that a full three digits of octal are always generated. * Without the "03", a number following this sequence could be @@ -3296,10 +3298,13 @@ /* * This character doesn't belong to the ASCII character set, so we * use the full glyph name. */ + if (ch > 0xffff) { + goto noMapping; + } sprintf(uindex, "%04X", ch); /* endianness? */ glyphname = Tcl_GetVar2(interp, "::tk::psglyphs", uindex, 0); if (glyphname) { ps = Tcl_GetStringFromObj(psObj, &len); if (ps[len-1] == '(') { @@ -3316,10 +3321,11 @@ } else { /* * No known mapping for the character into the space of * PostScript glyphs. Ignore it. :-( */ +noMapping: ; #ifdef TK_DEBUG_POSTSCRIPT_OUTPUT fprintf(stderr, "Warning: no mapping to PostScript " "glyphs for \\u%04x\n", ch); #endif Index: generic/tkText.c ================================================================== --- generic/tkText.c +++ generic/tkText.c @@ -4457,11 +4457,11 @@ { int objc, i, count; Tcl_Obj **objv; TkTextTabArray *tabArrayPtr; TkTextTab *tabPtr; - Tcl_UniChar ch; + int ch; double prevStop, lastStop; /* * Map these strings to TkTextTabAlign values. */ static const char *const tabOptionStrings[] = { @@ -4564,11 +4564,11 @@ /* * There may be a more efficient way of getting this. */ - Tcl_UtfToUniChar(Tcl_GetString(objv[i+1]), &ch); + TkUtfToUniChar(Tcl_GetString(objv[i+1]), &ch); if (!Tcl_UniCharIsAlpha(ch)) { continue; } i += 1; @@ -5878,11 +5878,11 @@ int maxExtraLines = 0; const char *startOfLine = Tcl_GetString(theLine); CLANG_ASSERT(pattern); do { - Tcl_UniChar ch; + int ch; const char *p; int lastFullLine = lastOffset; if (firstNewLine == -1) { if (searchSpecPtr->strictLimits @@ -6108,11 +6108,11 @@ if (alreadySearchOffset < 0) { break; } } else { firstOffset = p - startOfLine + - Tcl_UtfToUniChar(startOfLine+matchOffset,&ch); + TkUtfToUniChar(startOfLine+matchOffset,&ch); } } } while (searchSpecPtr->all); } else { int maxExtraLines = 0; Index: generic/tkTextDisp.c ================================================================== --- generic/tkTextDisp.c +++ generic/tkTextDisp.c @@ -7579,12 +7579,12 @@ chunkPtr->x, maxX, TK_ISOLATE_END, &nextX); #endif /* TK_LAYOUT_WITH_BASE_CHUNKS */ if (bytesThatFit < maxBytes) { if ((bytesThatFit == 0) && noCharsYet) { - Tcl_UniChar ch; - int chLen = Tcl_UtfToUniChar(p, &ch); + int ch; + int chLen = TkUtfToUniChar(p, &ch); #if TK_LAYOUT_WITH_BASE_CHUNKS bytesThatFit = CharChunkMeasureChars(chunkPtr, line, lineOffset+chLen, lineOffset, -1, chunkPtr->x, -1, 0, &nextX); Index: generic/tkTextIndex.c ================================================================== --- generic/tkTextIndex.c +++ generic/tkTextIndex.c @@ -2296,13 +2296,13 @@ segPtr = TkTextIndexToSeg(indexPtr, &offset); while (1) { int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { - Tcl_UniChar ch; + int ch; - chSize = Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch); + chSize = TkUtfToUniChar(segPtr->body.chars + offset, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } firstChar = 0; } @@ -2341,13 +2341,13 @@ segPtr = TkTextIndexToSeg(indexPtr, &offset); while (1) { int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { - Tcl_UniChar ch; - Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch); + int ch; + TkUtfToUniChar(segPtr->body.chars + offset, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } if (offset > 0) { chSize = (segPtr->body.chars + offset Index: generic/ttk/ttkEntry.c ================================================================== --- generic/ttk/ttkEntry.c +++ generic/ttk/ttkEntry.c @@ -280,15 +280,15 @@ */ static char *EntryDisplayString(const char *showChar, int numChars) { char *displayString, *p; int size; - Tcl_UniChar ch; - char buf[TCL_UTF_MAX]; + int ch; + char buf[6]; - Tcl_UtfToUniChar(showChar, &ch); - size = Tcl_UniCharToUtf(ch, buf); + TkUtfToUniChar(showChar, &ch); + size = TkUniCharToUtf(ch, buf); p = displayString = ckalloc(numChars * size + 1); while (numChars--) { memcpy(p, buf, size); p += size; @@ -404,11 +404,11 @@ { int spaceNeeded, cvtFlags; int number, length; const char *string; int stringLength; - Tcl_UniChar ch; + int ch; char numStorage[2*TCL_INTEGER_SPACE]; while (*template) { /* Find everything up to the next % character and append it * to the result string. @@ -428,11 +428,11 @@ /* There's a percent sequence here. Process it. */ ++template; /* skip over % */ if (*template != '\0') { - template += Tcl_UtfToUniChar(template, &ch); + template += TkUtfToUniChar(template, &ch); } else { ch = '%'; } stringLength = -1; @@ -478,11 +478,11 @@ break; case 'W': /* widget name */ string = Tk_PathName(entryPtr->core.tkwin); break; default: - length = Tcl_UniCharToUtf(ch, numStorage); + length = TkUniCharToUtf(ch, numStorage); numStorage[length] = '\0'; string = numStorage; break; } Index: unix/tkUnixFont.c ================================================================== --- unix/tkUnixFont.c +++ unix/tkUnixFont.c @@ -404,11 +404,11 @@ * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd; char *dstStart, *dstEnd; - Tcl_UniChar ch; + int ch; int result; static char hexChars[] = "0123456789abcdef"; static char mapChars[] = { 0, 0, 0, 0, 0, 0, 0, 'a', 'b', 't', 'n', 'v', 'f', 'r' @@ -425,27 +425,35 @@ for ( ; src < srcEnd; ) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - src += Tcl_UtfToUniChar(src, &ch); + src += TkUtfToUniChar(src, &ch); dst[0] = '\\'; - if ((ch < sizeof(mapChars)) && (mapChars[ch] != 0)) { + if (((size_t) ch < sizeof(mapChars)) && (mapChars[ch] != 0)) { dst[1] = mapChars[ch]; dst += 2; } else if (ch < 256) { dst[1] = 'x'; dst[2] = hexChars[(ch >> 4) & 0xf]; dst[3] = hexChars[ch & 0xf]; dst += 4; - } else { + } else if (ch < 0x10000) { dst[1] = 'u'; dst[2] = hexChars[(ch >> 12) & 0xf]; dst[3] = hexChars[(ch >> 8) & 0xf]; dst[4] = hexChars[(ch >> 4) & 0xf]; dst[5] = hexChars[ch & 0xf]; dst += 6; + } else { + /* TODO we can do better here */ + dst[1] = 'u'; + dst[2] = 'f'; + dst[3] = 'f'; + dst[4] = 'f'; + dst[5] = 'd'; + dst += 6; } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = dst - dstStart; @@ -1026,11 +1034,11 @@ if (numBytes == 0) { curX = 0; curByte = 0; } else if (maxLength < 0) { const char *p, *end, *next; - Tcl_UniChar ch; + int ch; SubFont *thisSubFontPtr; FontFamily *familyPtr; Tcl_DString runString; /* @@ -1042,11 +1050,11 @@ */ curX = 0; end = source + numBytes; for (p = source; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); if (thisSubFontPtr != lastSubFontPtr) { familyPtr = lastSubFontPtr->familyPtr; Tcl_UtfToExternalDString(familyPtr->encoding, source, p - source, &runString); Index: unix/tkUnixRFont.c ================================================================== --- unix/tkUnixRFont.c +++ unix/tkUnixRFont.c @@ -666,13 +666,13 @@ curX = 0; curByte = 0; sawNonSpace = 0; while (numBytes > 0) { - Tcl_UniChar unichar; + int unichar; - clen = Tcl_UtfToUniChar(source, &unichar); + clen = TkUtfToUniChar(source, &unichar); c = (FcChar32) unichar; if (clen <= 0) { /* * This can't happen (but see #1185640) Index: win/tkWinFont.c ================================================================== --- win/tkWinFont.c +++ win/tkWinFont.c @@ -826,11 +826,11 @@ { HDC hdc; HFONT oldFont; WinFont *fontPtr; int curX, moretomeasure; - Tcl_UniChar ch; + int ch; SIZE size; FontFamily *familyPtr; Tcl_DString runString; SubFont *thisSubFontPtr, *lastSubFontPtr; const char *p, *end, *next = NULL, *start; @@ -857,11 +857,11 @@ moretomeasure = 0; curX = 0; start = source; end = start + numBytes; for (p = start; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); if (thisSubFontPtr != lastSubFontPtr) { familyPtr = lastSubFontPtr->familyPtr; Tcl_UtfToExternalDString(familyPtr->encoding, start, (int) (p - start), &runString); @@ -919,11 +919,11 @@ int lastSize = 0; familyPtr = lastSubFontPtr->familyPtr; Tcl_DStringInit(&runString); for (p = start; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); Tcl_UtfToExternal(NULL, familyPtr->encoding, p, (int) (next - p), 0, NULL, buf, sizeof(buf), NULL, &dstWrote, NULL); Tcl_DStringAppend(&runString,buf,dstWrote); size.cx = 0; @@ -968,17 +968,17 @@ * Scan the string for the last word break and than repeat the whole * procedure without the maxLength limit or any flags. */ const char *lastWordBreak = NULL; - Tcl_UniChar ch2; + int ch2; end = p; p = source; ch = ' '; while (p < end) { - next = p + Tcl_UtfToUniChar(p, &ch2); + next = p + TkUtfToUniChar(p, &ch2); if ((ch != ' ') && (ch2 == ' ')) { lastWordBreak = p; } p = next; ch = ch2; @@ -1441,11 +1441,11 @@ int numBytes, /* Length of string in bytes. */ int x, int y, /* Coordinates at which to place origin of * string when drawing. */ double angle) { - Tcl_UniChar ch; + int ch; SIZE size; HFONT oldFont; FontFamily *familyPtr; Tcl_DString runString; const char *p, *end, *next; @@ -1456,11 +1456,11 @@ oldFont = SelectFont(hdc, fontPtr, lastSubFontPtr, angle); GetTextMetricsA(hdc, &tm); end = source + numBytes; for (p = source; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); if (thisSubFontPtr != lastSubFontPtr) { if (p > source) { familyPtr = lastSubFontPtr->familyPtr; Tcl_UtfToExternalDString(familyPtr->encoding, source, Index: win/tkWinX.c ================================================================== --- win/tkWinX.c +++ win/tkWinX.c @@ -1524,11 +1524,11 @@ /* *---------------------------------------------------------------------- * * HandleIMEComposition -- * - * This function works around a definciency in some versions of Windows + * This function works around a deficiency in some versions of Windows * 2000 to make it possible to entry multi-lingual characters under all * versions of Windows 2000. * * When an Input Method Editor (IME) is ready to send input characters to * an application, it sends a WM_IME_COMPOSITION message with the @@ -1554,10 +1554,11 @@ HWND hwnd, /* Window receiving the message. */ LPARAM lParam) /* Flags for the WM_IME_COMPOSITION message */ { HIMC hIMC; int n; + int high = 0; if ((lParam & GCS_RESULTSTR) == 0) { /* * Composition is not finished yet. */ @@ -1610,10 +1611,18 @@ * UNICODE character in the composition. */ event.xkey.keycode = buff[i++]; + if ((event.xkey.keycode & 0xfc00) == 0xd800) { + high = ((event.xkey.keycode & 0x3ff) << 10) + 0x10000; + break; + } else if (high && (event.xkey.keycode & 0xfc00) == 0xdc00) { + event.xkey.keycode &= 0x3ff; + event.xkey.keycode += high; + high = 0; + } event.type = KeyPress; Tk_QueueWindowEvent(&event, TCL_QUEUE_TAIL); event.type = KeyRelease; Tk_QueueWindowEvent(&event, TCL_QUEUE_TAIL);