Tcl Source Code

View Ticket
Login
Ticket UUID: 217987
Title: Backslash substitution is broken with non-ASCII chars
Type: Bug Version: obsolete: 8.3.2
Submitter: nobody Created on: 2000-10-26 00:47:11
Subsystem: 44. UTF-8 Strings Assigned To: hobbs
Priority: 2 Severity:
Status: Closed Last Modified: 2001-06-28 08:06:56
Resolution: Fixed Closed By: hobbs
    Closed on: 2001-06-28 01:06:55
Description:
OriginalBugID: 6162 Bug
Version: 8.3.2
SubmitDate: '2000-08-23'
LastModified: '2000-09-05'
Severity: MED
Status: Closed
Submitter: techsupp
ChangedBy: hobbs
RelatedBugIDs: 6166
OS: Windows NT
FixedDate: '2000-09-05'
ClosedDate: '2000-10-25'


Name:

Benjamin Riefenstahl



CVS:

tclUtf.c,v 1.11 2000/01/11 22:09:00 hobbs Exp



Comments:

The patch is against the tcl-8.3.3 branch.



ReproducibleScript:

puts \ä



ObservedBehavior:

The script above displays garbage.



DesiredBehavior:

The script above should produce one line with an "ä".



Patch:

Index: tclUtf.c

===================================================================

RCS file: /cvsroot/tcl/generic/tclUtf.c,v

retrieving revision 1.11

diff -c -r1.11 tclUtf.c

*** tclUtf.c2000/01/11 22:09:001.11

--- tclUtf.c2000/08/23 18:54:26

***************

*** 781,787 ****

   * backslash sequence. */

  {

      register CONST char *p = src+1;

!     int result, count, n;

      char buf[TCL_UTF_MAX];

  

      if (dst == NULL) {

--- 781,787 ----

   * backslash sequence. */

  {

      register CONST char *p = src+1;

!     int result, count, n, utfconvert;

      char buf[TCL_UTF_MAX];

  

      if (dst == NULL) {

***************

*** 789,794 ****

--- 789,795 ----

      }

  

      count = 2;

+     utfconvert = 0;

      switch (*p) {

  /*

           * Note: in the conversions below, use absolute values (e.g.,

***************

*** 823,828 ****

--- 824,830 ----

              if (isxdigit(UCHAR(p[1]))) { /* INTL: digit */

                  char *end;

  

+ utfconvert = 1;

                  result = (unsigned char) strtoul(p+1, &end, 16);

                  count = end - src;

              } else {

***************

*** 831,836 ****

--- 833,839 ----

              }

              break;

  case 'u':

+     utfconvert = 1;

      result = 0;

      for (count = 0; count < 4; count++) {

  p++;

***************

*** 868,873 ****

--- 871,877 ----

       * Check for an octal number \oo?o?

       */

      if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */

+ utfconvert = 1;

  result = (unsigned char)(*p - '0');

  p++;

  if (!isdigit(UCHAR(*p)) || (UCHAR(*p) >= '8')) { /* INTL: digit */

***************

*** 891,897 ****

      if (readPtr != NULL) {

  *readPtr = count;

      }

!     return Tcl_UniCharToUtf(result, dst);

  }

  

  /*

--- 895,906 ----

      if (readPtr != NULL) {

  *readPtr = count;

      }

!     if (!utfconvert) {

! *dst = (char) result;

! return 1;

!     } else {

! return Tcl_UniCharToUtf(result, dst);

!     }

  }

  

  /*



PatchFiles:

tclUtf.c
User Comments: hobbs added on 2001-06-28 08:06:56:

File Added - 7876: utf.patch

hobbs added on 2001-06-28 08:06:55:
Logged In: YES 
user_id=72656

I solved this with a slightly cleaner patch that is 
attached for 8.4a3cvs.

hobbs added on 2001-06-20 13:31:55:
Logged In: YES 
user_id=72656

Not sure why this got closed, but it is still a valid bug 
in 8.3.3/8.4a2.

cc_benny added on 2001-01-29 23:47:35:
The Problem still exists in 8.4 in CVS, and it doesn't seem to be fixed in the 8.3.2 branch either ??.

I attach a patch against the main branch in CVS here:

Index: tclUtf.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclUtf.c,v
retrieving revision 1.14
diff -c -r1.14 tclUtf.c
*** tclUtf.c2000/06/05 23:36:211.14
--- tclUtf.c2001/01/29 16:46:49
***************
*** 781,787 ****
   * backslash sequence. */
  {
      register CONST char *p = src+1;
!     int result, count, n;
      char buf[TCL_UTF_MAX];
  
      if (dst == NULL) {
--- 781,787 ----
   * backslash sequence. */
  {
      register CONST char *p = src+1;
!     int result, count, n, utfconvert;
      char buf[TCL_UTF_MAX];
  
      if (dst == NULL) {
***************
*** 789,794 ****
--- 789,795 ----
      }
  
      count = 2;
+     utfconvert = 0; /*false*/
      switch (*p) {
  /*
           * Note: in the conversions below, use absolute values (e.g.,
***************
*** 820,825 ****
--- 821,827 ----
              result = 0xb;
              break;
          case 'x':
+     utfconvert = 1; /*true*/
              if (isxdigit(UCHAR(p[1]))) { /* INTL: digit */
                  char *end;
  
***************
*** 831,836 ****
--- 833,839 ----
              }
              break;
  case 'u':
+     utfconvert = 1; /*true*/
      result = 0;
      for (count = 0; count < 4; count++) {
  p++;
***************
*** 868,873 ****
--- 871,877 ----
       * Check for an octal number \oo?o?
       */
      if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */
+         utfconvert = 1; /*true*/
  result = (unsigned char)(*p - '0');
  p++;
  if (!isdigit(UCHAR(*p)) || (UCHAR(*p) >= '8')) { /* INTL: digit */
***************
*** 891,897 ****
      if (readPtr != NULL) {
  *readPtr = count;
      }
!     return Tcl_UniCharToUtf(result, dst);
  }
  
  /*
--- 895,909 ----
      if (readPtr != NULL) {
  *readPtr = count;
      }
!     if (!utfconvert)
!     {
! *dst = (char)result;
!         return 1;
!     }
!     else
!     {
! return Tcl_UniCharToUtf(result, dst);
!     }
  }
  
  /*

Attachments: