Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add test of UTF-8 -> internal -> UTF-8 round trip (and make it pass). |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | dkf-utf16-branch |
Files: | files | file ages | folders |
SHA1: |
7c97e3ea95fb60eca40a0b97b23181a4 |
User & Date: | dkf 2011-07-31 23:14:02 |
Context
2011-07-31
| ||
23:16 | Bring up to date with mainline. check-in: e878fe8df0 user: dkf tags: dkf-utf16-branch | |
23:14 | Add test of UTF-8 -> internal -> UTF-8 round trip (and make it pass). check-in: 7c97e3ea95 user: dkf tags: dkf-utf16-branch | |
2011-07-28
| ||
14:24 | Actual possible implementation of code to convert external UTF-8 to internal surrogate pairs and bac... check-in: fed6086a43 user: dkf tags: dkf-utf16-branch | |
Changes
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
2245 2246 2247 2248 2249 2250 2251 | * large enough to hold the UTF-8 character * (at most 6 bytes). */ { if ((ch > 0) && (ch < 0x80)) { buf[0] = (char) ch; return 1; } | < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < | 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 | * large enough to hold the UTF-8 character * (at most 6 bytes). */ { if ((ch > 0) && (ch < 0x80)) { buf[0] = (char) ch; return 1; } if (ch <= 0x7FF) { buf[1] = (char) ((ch | 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) | 0xC0); return 2; } if (ch <= 0xFFFF) { three: buf[2] = (char) ((ch | 0x80) & 0xBF); buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 12) | 0xE0); return 3; } if (ch <= 0x1FFFFF) { buf[3] = (char) ((ch | 0x80) & 0xBF); buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 18) | 0xF0); return 4; } if (ch <= 0x3FFFFFF) { buf[4] = (char) ((ch | 0x80) & 0xBF); buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF); buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 24) | 0xF8); return 5; } if (ch <= 0x7FFFFFFF) { buf[5] = (char) ((ch | 0x80) & 0xBF); buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF); buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF); buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 30) | 0xFC); return 6; } ch = 0xFFFD; goto three; } static INLINE int |
︙ | ︙ | |||
2484 2485 2486 2487 2488 2489 2490 | src += Tcl_UtfToUniChar(src, &ch); if (ch >= 0xD800 && ch < 0xDBFF) { unsigned fullChar = ((unsigned)(ch - 0xD800)) << 10; src += Tcl_UtfToUniChar(src, &ch); if (ch >= 0xDC00 && ch < 0xDFFF) { | | | | 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 | src += Tcl_UtfToUniChar(src, &ch); if (ch >= 0xD800 && ch < 0xDBFF) { unsigned fullChar = ((unsigned)(ch - 0xD800)) << 10; src += Tcl_UtfToUniChar(src, &ch); if (ch >= 0xDC00 && ch < 0xDFFF) { fullChar += 0x2400 + (unsigned) ch; dst += IntToUtf(fullChar, dst); continue; } else { src = origin + Tcl_UtfToUniChar(origin, &ch); } } dst += Tcl_UniCharToUtf(ch, dst); } else { unsigned fullChar; src += UtfToInt(src, &fullChar); if (fullChar > 0xFFFF) { fullChar -= 0x10000; ch = (Tcl_UniChar) ((fullChar >> 10) + 0xD800); dst += Tcl_UniCharToUtf(ch, dst); ch = (Tcl_UniChar) ((fullChar & 0x3FF) + 0xDC00); dst += Tcl_UniCharToUtf(ch, dst); } else { ch = (Tcl_UniChar) fullChar; dst += Tcl_UniCharToUtf(ch, dst); } |
︙ | ︙ |
Changes to tests/encoding.test.
︙ | ︙ | |||
311 312 313 314 315 316 317 318 319 320 321 322 323 324 | test encoding-15.3 {UtfToUtfProc null character input} { set x [encoding convertfrom identity \x00] set y [encoding convertfrom utf-8 $x] binary scan [encoding convertto identity $y] H* z list [string bytelength $x] [string bytelength $y] $z } {1 2 c080} test encoding-16.1 {UnicodeToUtfProc} { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" test encoding-17.1 {UtfToUnicodeProc} { | > > > > > > > > > | 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | test encoding-15.3 {UtfToUtfProc null character input} { set x [encoding convertfrom identity \x00] set y [encoding convertfrom utf-8 $x] binary scan [encoding convertto identity $y] H* z list [string bytelength $x] [string bytelength $y] $z } {1 2 c080} test encoding-15.4 {UtfToUtfProc: UTF-8 to UTF-16 and back} { set x \xF0\xA4\xAD\xA2; # U+024B62 set y [encoding convertfrom utf-8 $x] set z [encoding convertto utf-8 $y] list [string length $x] [string length $y] [string length $z] \ [format 0x%04x.0x%04x {*}[scan $y %c%c]] \ [format %02x.%02x.%02x.%02x {*}[scan $z %c%c%c%c]] } {4 2 4 0xd852.0xdf62 f0.a4.ad.a2} test encoding-16.1 {UnicodeToUtfProc} { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" test encoding-17.1 {UtfToUnicodeProc} { |
︙ | ︙ |