Index: generic/tkTextDisp.c ================================================================== --- generic/tkTextDisp.c +++ generic/tkTextDisp.c @@ -1370,11 +1370,11 @@ lastChunkPtr->numBytes += elidesize; breakByteOffset = lastChunkPtr->breakIndex = lastChunkPtr->numBytes; /* - * If have we have a tag toggle, there is a chance that + * If we have a tag toggle, there is a chance that * invisibility state changed, so bail out. */ } else if ((segPtr->typePtr == &tkTextToggleOffType) || (segPtr->typePtr == &tkTextToggleOnType)) { if (segPtr->body.toggle.tagPtr->elideString != NULL) { @@ -1475,23 +1475,33 @@ gotTab = 0; maxBytes = segPtr->size - byteOffset; if (segPtr->typePtr == &tkTextCharType) { - /* - * See if there is a tab in the current chunk; if so, only layout - * characters up to (and including) the tab. - */ + /* + * See if there is a tab or soft hyphen in the current segment; if so, + * only layout characters up to (and including) this character. + */ + if (!elide) { + const char *p; + + p = Tcl_UtfFindFirst(segPtr->body.chars + byteOffset, 0x00AD); + if (p != NULL) { + maxBytes = (p + 2 - segPtr->body.chars) - byteOffset; + } + } if (!elide && justify == TK_JUSTIFY_LEFT) { char *p; for (p = segPtr->body.chars + byteOffset; *p != 0; p++) { if (*p == '\t') { - maxBytes = (p + 1 - segPtr->body.chars) - byteOffset; - gotTab = 1; - break; + if ((p + 1 - segPtr->body.chars) - byteOffset <= maxBytes) { + maxBytes = (p + 1 - segPtr->body.chars) - byteOffset; + gotTab = 1; + break; + } } } } #if TK_LAYOUT_WITH_BASE_CHUNKS @@ -7518,15 +7528,16 @@ /* Structure to fill in with information about * this chunk. The x field has already been * set by the caller. */ { Tk_Font tkfont; - int nextX, bytesThatFit, count; + int nextX, bytesThatFit; CharInfo *ciPtr; - char *p; + char *p, *p2; TkTextSegment *nextPtr; Tk_FontMetrics fm; + int ch, nBytes; #if TK_LAYOUT_WITH_BASE_CHUNKS const char *line; int lineOffset; BaseCharInfo *bciPtr; Tcl_DString *baseString; @@ -7594,12 +7605,11 @@ #else /* !TK_LAYOUT_WITH_BASE_CHUNKS */ bytesThatFit = CharChunkMeasureChars(chunkPtr, p, chLen, 0, -1, chunkPtr->x, -1, 0, &nextX); #endif /* TK_LAYOUT_WITH_BASE_CHUNKS */ } - if ((nextX < maxX) && ((p[bytesThatFit] == ' ') - || (p[bytesThatFit] == '\t'))) { + if ((nextX < maxX) && (p[bytesThatFit] == ' ')) { /* * Space characters are funny, in that they are considered to fit * if there is at least one pixel of space left on the line. Just * give the space character whatever space is left. */ @@ -7682,16 +7692,22 @@ Tcl_DStringSetLength(baseString,lineOffset+ciPtr->numBytes); bciPtr->width = nextX - baseCharChunkPtr->x; /* - * Finalize the base chunk if this chunk ends in a tab, which definitly - * breaks the context and needs to be handled on a higher level. + * Finalize the base chunk if this chunk ends in a tab or soft hyphen, + * which definitely breaks the context and needs to be handled on a + * higher level. */ if (ciPtr->numBytes > 0 && p[ciPtr->numBytes - 1] == '\t') { FinalizeBaseChunk(chunkPtr); + } else { + TkUtfToUniChar(Tcl_UtfPrev(p + ciPtr->numBytes, p), &ch); + if (ch == 0x00AD) { + FinalizeBaseChunk(chunkPtr); + } } #endif /* TK_LAYOUT_WITH_BASE_CHUNKS */ /* * Compute a break location. If we're in word wrap mode, a break can occur @@ -7700,26 +7716,32 @@ */ if (wrapMode != TEXT_WRAPMODE_WORD) { chunkPtr->breakIndex = chunkPtr->numBytes; } else { - for (count = bytesThatFit, p += bytesThatFit - 1; count > 0; - count--, p--) { - /* + p2 = p + bytesThatFit; + while (p2 - p > 0) { + /* * Don't use isspace(); effects are unpredictable and can lead to * odd word-wrapping problems on some platforms. Also don't use * Tcl_UniCharIsSpace here either, as it identifies non-breaking * spaces as places to break. What we actually want is only the * ASCII space characters, so use them explicitly... + * 0x09 is '\t', 0x0A is '\n', 0x0B is '\v', 0x0C is '\f', + * 0x0D is '\r', 0x20 is ' ', 0x2D is '-', and 0x00AD is the soft + * hyphen */ - switch (*p) { - case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': - chunkPtr->breakIndex = count; + nBytes = TkUtfToUniChar(Tcl_UtfPrev(p2, p), &ch); + switch (ch) { + case 0x09: case 0x0A: case 0x0B: case 0x0C: case 0x0D: + case 0x20: case 0x2D: case 0x00AD: + chunkPtr->breakIndex = p2 - p; goto checkForNextChunk; - } - } + } + p2 -= nBytes; + } checkForNextChunk: if ((bytesThatFit + byteOffset) == segPtr->size) { for (nextPtr = segPtr->nextPtr; nextPtr != NULL; nextPtr = nextPtr->nextPtr) { if (nextPtr->size != 0) { @@ -7866,14 +7888,15 @@ Drawable dst, /* Pixmap or window in which to draw chunk. */ int screenY) /* Y-coordinate in text window that * corresponds to y. */ { CharInfo *ciPtr = chunkPtr->clientData; - const char *string; + char *string; TextStyle *stylePtr; StyleValues *sValuePtr; int numBytes, offsetBytes, offsetX; + int ch, nBytes; #if TK_DRAW_IN_CONTEXT BaseCharInfo *bciPtr; #endif /* TK_DRAW_IN_CONTEXT */ if ((x + chunkPtr->width) <= 0) { @@ -7936,10 +7959,44 @@ int xDisplacement = x - chunkPtr->x; if ((len > 0) && (string[start + len - 1] == '\t')) { len--; } + + /* + * Don't draw any soft hyphen unless it is the last character + * of the display line. Soft hyphens can only show up at the + * end of a chunk, so test their presence at this end position + * only. + */ + + if (chunkPtr->nextPtr != NULL) { + nBytes = TkUtfToUniChar(Tcl_UtfPrev(string + start + len, + string + start), &ch); + if (ch == 0x00AD) { + len -= nBytes; + } + } else { + + /* + * On OS X, the soft hyphen does not render (there is no + * corresponding glyph in OS X fonts). Display a regular + * hard hyphen instead. This could be done on all platforms + * but for performance reasons do it only if it's needed. + */ + +#ifdef MAC_OSX_TK + nBytes = TkUtfToUniChar(Tcl_UtfPrev(string + start + len, + string + start), &ch); + if (ch == 0x00AD) { + string[start + len - nBytes] = '-'; + string[start + len - nBytes + 1] = '\0'; + len -= nBytes - 1; + } +#endif + } + if (len <= 0) { return; } TkpDrawCharsInContext(display, dst, stylePtr->fgGC, sValuePtr->tkfont, @@ -7970,10 +8027,57 @@ numBytes -= offsetBytes; if ((numBytes > 0) && (string[numBytes - 1] == '\t')) { numBytes--; } + + /* + * Don't draw any soft hyphen unless it is the last character + * of the display line. Soft hyphens can only show up at the + * end of a chunk, so test their presence at this end position + * only. + * In case TK_LAYOUT_WITH_BASE_CHUNKS is true, chunkPtr needs + * to be adjusted to point to the chunk displaying the final + * part of the stretch, so that the test below: + * if (chunkPtr->nextPtr != NULL) + * really checks whether the last character of this chunk + * is the last character of the display line. + */ + +#if TK_LAYOUT_WITH_BASE_CHUNKS + nBytes = ciPtr->numBytes; + while ((nBytes < numBytes) && (chunkPtr->nextPtr != NULL)) { + chunkPtr = chunkPtr->nextPtr; + nBytes += chunkPtr->numBytes; + } +#endif /* TK_LAYOUT_WITH_BASE_CHUNKS */ + if (chunkPtr->nextPtr != NULL) { + nBytes = TkUtfToUniChar(Tcl_UtfPrev(string + numBytes, string), + &ch); + if (ch == 0x00AD) { + numBytes -= nBytes; + } + } else { + + /* + * On OS X, the soft hyphen does not render (there is no + * corresponding glyph in OS X fonts). Display a regular + * hard hyphen instead. This could be done on all platforms + * but for performance reasons do it only if it's needed. + */ + +#ifdef MAC_OSX_TK + nBytes = TkUtfToUniChar(Tcl_UtfPrev(string + numBytes, string), + &ch); + if (ch == 0x00AD) { + string[numBytes - nBytes] = '-'; + string[numBytes - nBytes + 1] = '\0'; + numBytes -= nBytes - 1; + } +#endif + } + Tk_DrawChars(display, dst, stylePtr->fgGC, sValuePtr->tkfont, string, numBytes, offsetX, y + baseline - sValuePtr->offset); if (sValuePtr->underline) { Tk_UnderlineChars(display, dst, stylePtr->ulGC, sValuePtr->tkfont, string, offsetX, @@ -8536,12 +8640,12 @@ * Determine the number of characters from the string that will fit in * the given horizontal span. The measurement is done under the * assumption that Tk_DrawChars will be used to actually display the * characters. * - * If tabs are encountered in the string, they will be ignored (they - * should only occur as last character of the string anyway). + * If tabs or soft hyphens are encountered in the string, they will be + * ignored (they can only occur as last character of the string). * * If a newline is encountered in the string, the line will be broken at * that point. * * Results: @@ -8571,14 +8675,15 @@ * cross this x-position. */ int flags, /* Flags to pass to Tk_MeasureChars. */ int *nextXPtr) /* Return x-position of terminating character * here. */ { - int curX, width, ch; + int curX, width, ch, nBytes; const char *special, *end, *start; ch = 0; /* lint. */ + nBytes = 0; /* Silence compiler warning. */ curX = startX; start = source + rangeStart; end = start + rangeLength; special = start; while (start < end) { @@ -8585,15 +8690,17 @@ if (start >= special) { /* * Find the next special character in the string. */ - for (special = start; special < end; special++) { - ch = *special; - if ((ch == '\t') || (ch == '\n')) { + special = start; + while (special < end) { + nBytes = TkUtfToUniChar(special, &ch); + if ((ch == 0x09) || (ch == 0x0A) || (ch == 0x00AD)) { break; } + special += nBytes; } } /* * Special points at the next special character (or the end of the @@ -8619,14 +8726,18 @@ */ break; } if (special < end) { - if (ch != '\t') { + if (ch == 0x0A) { break; } - start++; + if (ch == 0x00AD) { + start += nBytes; + } else { + start++; + } } } *nextXPtr = curX; return start - (source+rangeStart); Index: macosx/tkMacOSXFont.c ================================================================== --- macosx/tkMacOSXFont.c +++ macosx/tkMacOSXFont.c @@ -874,10 +874,20 @@ whitespaceCharacterSet : lineendingCharacterSet; while (index > start && [cs characterIsMember:[string characterAtIndex:(index - 1)]]) { index--; } + + /* + * CTTypesetterSuggestClusterBreak et al. above may return an index + * larger than the end of the range to consider. Limit to given end. + */ + + if (index > start + len) { + index = start + len; + } + if (index <= start && (flags & TK_AT_LEAST_ONE)) { index = start + 1; } if (index > 0) { range.length = index; Index: tests/textDisp.test ================================================================== --- tests/textDisp.test +++ tests/textDisp.test @@ -518,10 +518,69 @@ test textDisp-2.30 {LayoutDLine, tabs, running out of space in dline} {textfonts} { .t delete 1.0 end .t insert 1.0 "a\tx\tabc" .t bbox 1.4 } [list 117 5 7 $fixedHeight] +test textDisp-2.31 {LayoutDLine, word wrap on ordinary hyphen} {textfonts} { + .t delete 1.0 end + .t insert 1.0 "This is a *normal* hyphen test: abc-123\n" + # test robustness: we only want to check that line wrapped at the hyphen, i.e. + # that the hyphen is still on the second display line while "123" is on the + # third one - check exactly this and not, say, a bbox + .t sync + list [.t count -displaylines 1.0 1.36] [.t count -displaylines 1.36 1.37] +} [list 1 1] +test textDisp-2.32 {LayoutDLine, word wrap on soft hyphen} {textfonts} { + .t delete 1.0 end + .t insert 1.0 "This is a -*soft*- hyphen test: abc\u00AD123\n" + # test robustness: we only want to check that line wrapped at the hyphen, i.e. + # that the hyphen is still on the second display line while "123" is on the + # third one - check exactly this and not, say, a bbox + .t sync + list [.t count -displaylines 1.0 1.39] [.t count -displaylines 1.39 1.40] +} [list 1 1] +test textDisp-2.33 {soft hyphen is not visible when not at display line end} {textfonts} { + .t delete 1.0 end + .t insert 1.0 "\u00ADSoft\u00AD\u00AD\u00ADhyphen test.\nAgain!\u00AD" + set res [lindex [.t bbox 1.0] 2] + lappend res [expr [lindex [.t bbox 1.1] 2] > 0 ] + lappend res [lindex [.t bbox 1.5] 2] + lappend res [lindex [.t bbox 1.6] 2] + lappend res [lindex [.t bbox 1.7] 2] + lappend res [expr [lindex [.t bbox 1.8] 2] > 0 ] + lappend res [lindex [.t bbox 2.6] 2] +} [list 0 1 0 0 0 1 0] +test textDisp-2.34 {soft hyphen is visible when at display line end} {textfonts} { + .t delete 1.0 end + .t insert 1.0 "This is a -*soft*- hyphen test: abc\u00AD123\n" + # soft hyphens always have zero width in their bounding boxes, + # even if they are accidentally displayed + set res [expr [lindex [.t bbox 1.39] 2] == 0] +} {1} +test textDisp-2.35 {LayoutDline, maxBytes calculation for chunk with both soft hyphen and tab} {textfonts} { + .t delete 1.0 end + .t insert end "Test\twith soft\u00ADhyphen and\ttabs.\n" + .t insert end "Test\u00ADwith soft\thyphen and\u00ADtabs.\n" + set res [expr [lindex [.t bbox 1.4] 2] > 0] + lappend res [lindex [.t bbox 1.14] 2] + lappend res [expr [lindex [.t bbox 1.25] 2] > 0] + lappend res [lindex [.t bbox 2.4] 2] + lappend res [expr [lindex [.t bbox 2.14] 2] > 0] + lappend res [lindex [.t bbox 2.25] 2] +} [list 1 0 1 0 1 0] +test textDisp-2.36 {a soft hyphen counts for 1 char even if not visible} {textfonts} { + .t delete 1.0 end + .t insert 1.0 "\u00ADSoft\u00AD\u00AD\u00ADhyphen test." + set res [.t count -chars 1.0 1.14] + lappend res [.t count -displaychars 1.0 1.14] + lappend res [.t count -indices 1.0 1.14] + lappend res [.t count -displayindices 1.0 1.14] + # All the above results must be same. With or without the "display" modifier + # makes no difference: despite not being visible the soft hyphens are not + # *elided* (i.e. not tagged as elided). In other words soft hyphens are + # displayed and count for 1 char each, but with zero width +} [list 14 14 14 14] test textDisp-3.1 {different character sizes} {textfonts} { .t configure -wrap word .t delete 1.0 end .t insert end "Some sample text, including both large\n" Index: win/tkWinFont.c ================================================================== --- win/tkWinFont.c +++ win/tkWinFont.c @@ -954,11 +954,11 @@ SelectObject(hdc, oldFont); ReleaseDC(fontPtr->hwnd, hdc); if ((flags & TK_WHOLE_WORDS) && (p < end)) { /* - * Scan the string for the last word break and than repeat the whole + * Scan the string for the last word break and then repeat the whole * procedure without the maxLength limit or any flags. */ const char *lastWordBreak = NULL; int ch2;