Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Start work towards being able to work with utf8 fully and utf16 and other things outside the BMP. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | dkf-utf16-branch |
Files: | files | file ages | folders |
SHA1: |
f9f8c8425cd135c423fdfe3b89becb91 |
User & Date: | dkf 2011-07-27 10:40:43 |
Original Comment: | Start work towards being able to work with utf8 fully and utf16 and other things outside the BMP. |
Context
2011-07-28
| ||
14:24 | Actual possible implementation of code to convert external UTF-8 to internal surrogate pairs and bac... check-in: fed6086a43 user: dkf tags: dkf-utf16-branch | |
2011-07-27
| ||
10:40 | Start work towards being able to work with utf8 fully and utf16 and other things outside the BMP. check-in: f9f8c8425c user: dkf tags: dkf-utf16-branch | |
2011-07-26
| ||
20:00 | Ensure that TclOO is properly found by all the various package mechanisms (by adding a dummy ifneede... check-in: b90f247299 user: dkf tags: trunk | |
Changes
Changes to ChangeLog.
1 2 3 4 5 6 7 | 2011-07-26 Donal K. Fellows <[email protected]> * generic/tclOO.c (initScript): Ensure that TclOO is properly found by all the various package mechanisms (by adding a dummy ifneeded script) and not just some of them. 2011-07-21 Jan Nijtmans <[email protected]> | > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 | 2011-07-27 Donal K. Fellows <[email protected]> * generic/tclEncoding.c (UtfToUtfProc): Start to rough out what needs to change to transition Tcl to being able to work with non-BMP characters, at least at a basic level. 2011-07-26 Donal K. Fellows <[email protected]> * generic/tclOO.c (initScript): Ensure that TclOO is properly found by all the various package mechanisms (by adding a dummy ifneeded script) and not just some of them. 2011-07-21 Jan Nijtmans <[email protected]> |
︙ | ︙ |
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
186 187 188 189 190 191 192 193 194 195 196 197 198 199 | /* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */ static unsigned short emptyPage[256]; /* * Functions used only in this module. */ static int BinaryProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, | > > > > > > > > | 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | /* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */ static unsigned short emptyPage[256]; /* * Constants used in the (external) UTF-8 <--> (internal) Modified UTF-8 * conversion code. */ #define FROM_STANDARD_UTF8 0 #define TO_STANDARD_UTF8 1 /* * Functions used only in this module. */ static int BinaryProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, |
︙ | ︙ | |||
2154 2155 2156 2157 2158 2159 2160 | * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, | | | 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 | * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, TO_STANDARD_UTF8); } /* *------------------------------------------------------------------------- * * UtfExtToUtfIntProc -- * |
︙ | ︙ | |||
2203 2204 2205 2206 2207 2208 2209 | * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, | | | 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 | * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, FROM_STANDARD_UTF8); } /* *------------------------------------------------------------------------- * * UtfToUtfProc -- * |
︙ | ︙ | |||
2250 2251 2252 2253 2254 2255 2256 | * characters. */ int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr, /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ | | | 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 | * characters. */ int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr, /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ int conversionMode) /* Convert embedded nulls from internal * representation to real null-bytes or vice * versa. */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars; Tcl_UniChar ch; |
︙ | ︙ | |||
2285 2286 2287 2288 2289 2290 2291 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } | | > | | | > > > > > > | 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && conversionMode == FROM_STANDARD_UTF8)) { /* * Copy 7bit chatacters, but skip null-bytes when we are in input * mode, so that they get converted to 0xc080. */ *dst++ = *src++; } else if (conversionMode == TO_STANDARD_UTF8 && UCHAR(*src) == 0xc0 && UCHAR(*(src+1)) == 0x80) { /* * Convert 0xc080 to real nulls when we are in output mode. */ *dst++ = 0; src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using Tcl_UtfToUniChar. Not doing can so * cause it run beyond the endof the buffer! If we happen such an * incomplete char, its bytes are made to represent themselves. */ ch = (unsigned char) *src; src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { /* * This is where we ought to do surrogate pair handling, with the * correct way of doing it depending on the conversionMode * parameter. But we don't. Yet. KNOWN BUG/MISFEATURE! */ src += Tcl_UtfToUniChar(src, &ch); dst += Tcl_UniCharToUtf(ch, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; |
︙ | ︙ |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
130 131 132 133 134 135 136 | } if (ch <= 0x7FFFFFFF) { return 6; } #endif return 3; } | | | 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | } if (ch <= 0x7FFFFFFF) { return 6; } #endif return 3; } /* *--------------------------------------------------------------------------- * * Tcl_UniCharToUtf -- * * Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the * provided buffer. Equivalent to Plan 9 runetochar(). |
︙ | ︙ |