Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | [Bug 3444754] string tolower \u01c5 is wrong |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
1f5f4a676e1181cbafe2ac97dc839ded |
User & Date: | jan.nijtmans 2011-12-07 06:23:59 |
Context
2011-12-11
| ||
09:17 | Some Unicode 6.0 chars not in [:print:] class check-in: 95f8aa7255 user: jan.nijtmans tags: trunk | |
2011-12-07
| ||
06:23 | [Bug 3444754] string tolower \u01c5 is wrong check-in: 1f5f4a676e user: jan.nijtmans tags: trunk | |
06:17 | [Bug 3444754] string tolower \u01c5 is wrong check-in: 74cab55c44 user: jan.nijtmans tags: core-8-5-branch | |
2011-12-01
| ||
09:18 | Note incompatibility caused by switch of flags to dlopen() for Bug 3216070. check-in: 41679f73e6 user: dkf tags: trunk | |
Changes
Changes to ChangeLog.
1 2 3 4 5 6 7 | 2011-11-30 Jan Nijtmans <[email protected]> * library/tcltest/tcltest.tcl: [Bug 967195]: Make tcltest work when tclsh is compiled without using the setargv() function on mingw. (no need to incr the version, since 2.2.10 is never released) 2011-11-29 Jan Nijtmans <[email protected]> | > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 | 2011-12-07 Jan Nijtmans <[email protected]> * tools/uniParse.tcl: [Bug 3444754] string tolower \u01c5 is wrong * generic/tclUniData.c: * tests/utf.test: 2011-11-30 Jan Nijtmans <[email protected]> * library/tcltest/tcltest.tcl: [Bug 967195]: Make tcltest work when tclsh is compiled without using the setargv() function on mingw. (no need to incr the version, since 2.2.10 is never released) 2011-11-29 Jan Nijtmans <[email protected]> |
︙ | ︙ |
Changes to generic/tclUniData.c.
︙ | ︙ | |||
747 748 749 750 751 752 753 | static const int groups[] = { 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 1048641, 28, 19, 1048706, 29, 2, 23, 16, 11, -24346494, 24, -3964798, 32833, 32898, -6520767, 7602306, -3964863, 9830530, -6389630, 6881345, 6750273, 6717505, 2588737, 6619201, 6651969, 6783041, -3178366, 6914113, 6848577, -5341054, 6979649, -4259710, 7012417, 7143489, 7110721, 7176257, | | | 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 | static const int groups[] = { 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 1048641, 28, 19, 1048706, 29, 2, 23, 16, 11, -24346494, 24, -3964798, 32833, 32898, -6520767, 7602306, -3964863, 9830530, -6389630, 6881345, 6750273, 6717505, 2588737, 6619201, 6651969, 6783041, -3178366, 6914113, 6848577, -5341054, 6979649, -4259710, 7012417, 7143489, 7110721, 7176257, 5, -1834878, 65633, 32963, 65698, 2588802, -3178431, -1834943, -4259775, 353730625, -5341119, 353632321, -354385790, -6389695, 2261057, 2326593, -353337214, -353238910, -353304446, 6881410, 6750338, 6717570, 6619266, 6652034, 6783106, -1385430910, 6848642, 6914178, -352026494, -352223102, 6979714, 7012482, -351502206, 7143554, 2261122, 7110786, 2326658, 7176322, 4, 6, -2752378, 1245249, 1212481, 2097217, 2064449, 1245314, 1212546, 1015938, 2097282, 2064514, 262209, 2031746, 1867906, 1, 1540226, 1769602, 262274, |
︙ | ︙ | |||
771 772 773 774 775 776 777 | }; /* * The following constants are used to determine the category of a * Unicode character. */ | | > | 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | }; /* * The following constants are used to determine the category of a * Unicode character. */ #define UNICODE_CATEGORY_MASK 0x1F #define UNICODE_OUT_OF_RANGE 0x10000u enum { UNASSIGNED, UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, |
︙ | ︙ | |||
813 814 815 816 817 818 819 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xE0) >> 5) | | | 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xE0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1F) #define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) /* * This macro extracts the information about a character from the * Unicode character tables. */ #define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) |
Changes to tests/utf.test.
︙ | ︙ | |||
255 256 257 258 259 260 261 | string toupper ! } ! test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { | | | | 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 | string toupper ! } ! test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { string tolower \u0178\u00ff\uA78D\u01c5 } \u00ff\u00ff\u0265\u01c6 test utf-17.1 {Tcl_UniCharToLower, no delta} { string tolower ! } ! test utf-18.1 {Tcl_UniCharToTitle, add one for title} { string totitle \u01c4 |
︙ | ︙ |
Changes to tools/uniParse.tcl.
︙ | ︙ | |||
40 41 42 43 44 45 46 | proc uni::getValue {items index} { variable categories variable titleCount # Extract character info set category [lindex $items 2] | | | | | | | | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | proc uni::getValue {items index} { variable categories variable titleCount # Extract character info set category [lindex $items 2] if {[scan [lindex $items 12] %6x toupper] == 1} { set toupper [expr {$index - $toupper}] } else { set toupper 0 } if {[scan [lindex $items 13] %6x tolower] == 1} { set tolower [expr {$tolower - $index}] } else { set tolower 0 } if {[scan [lindex $items 14] %6x totitle] == 1} { set totitle [expr {$index - $totitle}] } else { set totitle 0 } set categoryIndex [lsearch -exact $categories $category] if {$categoryIndex < 0} { puts "Unexpected character category: $index($category)" set categoryIndex 0 } elseif {$category eq "Lt"} { |
︙ | ︙ | |||
96 97 98 99 100 101 102 | } proc uni::buildTables {data} { variable shift variable pMap {} variable pages {} | | > < < > > > > > > | | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | } proc uni::buildTables {data} { variable shift variable pMap {} variable pages {} variable groups {{0,0,0,0}} variable next 0 set info {} ;# temporary page info set mask [expr {(1 << $shift) - 1}] foreach line [split $data \n] { if {$line eq ""} { if {!($next & $mask)} { # next character is already on page boundary continue } # fill remaining page set line [format %X [expr {($next-1)|$mask}]] append line ";;Cn;0;ON;;;;;N;;;;;\n" } set items [split $line \;] scan [lindex $items 0] %x index if {$index > 0xFFFF} then { # Ignore non-BMP characters, as long as Tcl doesn't support them continue } set index [format %d $index] set gIndex [getGroup [getValue $items $index]] # Since the input table omits unassigned characters, these will # show up as gaps in the index sequence. There are a few special cases # where the gaps correspond to a uniform block of assigned characters. # These are indicated as such in the character name. |
︙ | ︙ | |||
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | proc uni::main {} { global argc argv0 argv variable pMap variable pages variable groups variable shift variable titleCount if {$argc != 2} { puts stderr "\nusage: $argv0 <datafile> <outdir>\n" exit 1 } set f [open [lindex $argv 0] r] set data [read $f] close $f buildTables $data puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" | > | | 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | proc uni::main {} { global argc argv0 argv variable pMap variable pages variable groups variable shift variable titleCount variable next if {$argc != 2} { puts stderr "\nusage: $argv0 <datafile> <outdir>\n" exit 1 } set f [open [lindex $argv 0] r] set data [read $f] close $f buildTables $data puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" set size [expr {[llength $pMap]*2 + [llength $pages]*(1<<$shift)}] puts "shift = $shift, space = $size" puts "title case count = $titleCount" set f [open [file join [lindex $argv 1] tclUniData.c] w] fconfigure $f -translation lf puts $f "/* * tclUniData.c -- |
︙ | ︙ | |||
274 275 276 277 278 279 280 | set line " " set last [expr {[llength $groups] - 1}] for {set i 0} {$i <= $last} {incr i} { foreach {type toupper tolower totitle} [split [lindex $groups $i] ,] {} # Compute the case conversion type and delta | | | | | | | | > > | 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 | set line " " set last [expr {[llength $groups] - 1}] for {set i 0} {$i <= $last} {incr i} { foreach {type toupper tolower totitle} [split [lindex $groups $i] ,] {} # Compute the case conversion type and delta if {$totitle} { if {$totitle == $toupper} { # subtract delta for title or upper set case 4 set delta $toupper } elseif {$toupper} { # subtract delta for upper, subtract 1 for title set case 5 set delta $toupper } else { # add delta for lower, add 1 for title set case 3 set delta $tolower } } elseif {$toupper} { # subtract delta for upper, add delta for lower set case 6 set delta $toupper } elseif {$tolower} { # add delta for lower set case 2 set delta $tolower } else { # noop set case 0 set delta 0 } append line [expr {($delta << 15) | ($case << 5) | $type}] if {$i != $last} { append line ", " } if {[string length $line] > 65} { puts $f [string trimright $line] set line " " } } puts $f $line puts -nonewline $f "}; /* * The following constants are used to determine the category of a * Unicode character. */ #define UNICODE_CATEGORY_MASK 0x1F #define UNICODE_OUT_OF_RANGE " puts $f [format 0x%Xu $next] puts $f " enum { UNASSIGNED, UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, OTHER_LETTER, |
︙ | ︙ | |||
361 362 363 364 365 366 367 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xE0) >> 5) | | | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | /* * The following macros extract the fields of the character info. The * GetDelta() macro is complicated because we can't rely on the C compiler * to do sign extension on right shifts. */ #define GetCaseType(info) (((info) & 0xE0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1F) #define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) /* * This macro extracts the information about a character from the * Unicode character tables. */ |
︙ | ︙ |