Tcl Source Code

Artifact [3ceed0d82c]
Login

Artifact 3ceed0d82c0367dfbb182ef0141f668a32b3c075:

Attachment "langinfo3.diff" to ticket [418645ffff] added by hobbs 2001-11-20 16:14:58.
Index: unix/tclUnixInit.c
===================================================================
RCS file: /cvsroot/tcl/tcl/unix/tclUnixInit.c,v
retrieving revision 1.24
diff -b -u -r1.24 tclUnixInit.c
--- unix/tclUnixInit.c	2001/08/27 02:14:08	1.24
+++ unix/tclUnixInit.c	2001/11/20 08:59:51
@@ -7,12 +7,15 @@
  * Copyright (c) 1999 by Scriptics Corporation.
  * All rights reserved.
  *
- * RCS: @(#) $Id: tclUnixInit.c,v 1.24 2001/08/27 02:14:08 dgp Exp $
+ * RCS: @(#) $Id: tclUnixInit.c,v 1.27 2001/11/17 00:10:40 hobbs Exp $
  */
 
 #include "tclInt.h"
 #include "tclPort.h"
 #include <locale.h>
+#ifdef HAVE_LANGINFO
+#include <langinfo.h>
+#endif
 #if defined(__FreeBSD__)
 #   include <floatingpoint.h>
 #endif
@@ -35,6 +38,16 @@
 static int libraryPathEncodingFixed = 0;
 
 /*
+ * Tcl tries to use standard and homebrew methods to guess the right
+ * encoding on the platform.  However, there is always a final fallback,
+ * and this value is it.  Make sure it is a real Tcl encoding.
+ */
+
+#ifndef TCL_DEFAULT_ENCODING
+#define TCL_DEFAULT_ENCODING "iso8859-1"
+#endif
+
+/*
  * Default directory in which to look for Tcl library scripts.  The
  * symbol is defined by Makefile.
  */
@@ -51,7 +64,10 @@
 
 /*
  * The following table is used to map from Unix locale strings to
- * encoding files.
+ * encoding files.  If HAVE_LANGINFO is defined, then this is a fallback
+ * table when the result from nl_langinfo isn't a recognized encoding.
+ * Otherwise this is the first list checked for a mapping from env
+ * encoding to Tcl encoding name.
  */
 
 typedef struct LocaleTable {
@@ -60,6 +76,29 @@
 } LocaleTable;
 
 static CONST LocaleTable localeTable[] = {
+#ifdef HAVE_LANGINFO
+    {"gb2312-1980",	"gb2312"},
+#ifdef __hpux
+    {"SJIS",		"shiftjis"},
+    {"eucjp",		"euc-jp"},
+    {"euckr",		"euc-kr"},
+    {"euctw",		"euc-cn"},
+    {"greek8",		"cp869"},
+    {"iso88591",	"iso8859-1"},
+    {"iso88592",	"iso8859-2"},
+    {"iso88595",	"iso8859-5"},
+    {"iso88596",	"iso8859-6"},
+    {"iso88597",	"iso8859-7"},
+    {"iso88598",	"iso8859-8"},
+    {"iso88599",	"iso8859-9"},
+    {"iso885915",	"iso8859-15"},
+    {"roman8",		"iso8859-1"},
+    {"tis620",		"tis-620"},
+    {"turkish8",	"cp857"},
+    {"utf8",		"utf-8"},
+#endif /* __hpux */
+#endif /* HAVE_LANGINFO */
+
     {"ja_JP.SJIS",	"shiftjis"},
     {"ja_JP.EUC",	"euc-jp"},
     {"ja_JP.eucJP",     "euc-jp"},
@@ -393,19 +432,91 @@
 TclpSetInitialEncodings()
 {
     if (libraryPathEncodingFixed == 0) {
-	CONST char *encoding;
-	int i;
+	CONST char *encoding = NULL;
+	int i, setSysEncCode = TCL_ERROR;
 	Tcl_Obj *pathPtr;
-	char *langEnv;
 
 	/*
 	 * Determine the current encoding from the LC_* or LANG environment
 	 * variables.  We previously used setlocale() to determine the locale,
 	 * but this does not work on some systems (e.g. Linux/i386 RH 5.0).
 	 */
+#ifdef HAVE_LANGINFO
+	if (setlocale(LC_CTYPE, "") != NULL) {
+	    Tcl_DString ds;
+
+	    /*
+	     * Use a DString so we can overwrite it in name compatability
+	     * checks below.
+	     */
 
-	langEnv = getenv("LC_ALL");
+	    Tcl_DStringInit(&ds);
+	    Tcl_DStringAppend(&ds, nl_langinfo(CODESET), -1);
+	    encoding = Tcl_DStringValue(&ds);
+
+	    Tcl_UtfToLower(Tcl_DStringValue(&ds));
+#ifdef HAVE_LANGINFO_DEBUG
+	    fprintf(stderr, "encoding '%s'", encoding);
+#endif
+	    if (encoding[0] == 'i' && encoding[1] == 's' && encoding[2] == 'o'
+		    && encoding[3] == '-') {
+		char *p, *q;
+		/* need to strip '-' from iso-* encoding */
+		for(p = Tcl_DStringValue(&ds)+3, q = Tcl_DStringValue(&ds)+4;
+		    *p; *p++ = *q++);
+	    } else if (encoding[0] == 'i' && encoding[1] == 'b'
+		    && encoding[2] == 'm' && encoding[3] >= '0'
+		    && encoding[3] <= '9') {
+		char *p, *q;
+		/* if langinfo reports "ibm*" we should use "cp*" */
+		p = Tcl_DStringValue(&ds);
+		*p++ = 'c'; *p++ = 'p';
+		for(q = p+1; *p ; *p++ = *q++);
+	    } else if ((*encoding == '\0')
+		    || !strcmp(encoding, "ansi_x3.4-1968")) {
+		/* Use iso8859-1 for empty or 'ansi_x3.4-1968' encoding */
+		encoding = "iso8859-1";
+	    }
+#ifdef HAVE_LANGINFO_DEBUG
+	    fprintf(stderr, " ?%s?", encoding);
+#endif
+	    setSysEncCode = Tcl_SetSystemEncoding(NULL, encoding);
+	    if (setSysEncCode != TCL_OK) {
+		/*
+		 * If this doesn't return TCL_OK, the encoding returned by
+		 * nl_langinfo or as we translated it wasn't accepted.  Do
+		 * this fallback check.  If this fails, we will enter the
+		 * old fallback below.
+		 */
 
+		for (i = 0; localeTable[i].lang != NULL; i++) {
+		    if (strcmp(localeTable[i].lang, encoding) == 0) {
+			setSysEncCode = Tcl_SetSystemEncoding(NULL,
+				localeTable[i].encoding);
+			break;
+		    }
+		}
+	    }
+#ifdef HAVE_LANGINFO_DEBUG
+	    fprintf(stderr, " => '%s'\n", encoding);
+#endif
+	    Tcl_DStringFree(&ds);
+	}
+#ifdef HAVE_LANGINFO_DEBUG
+	else {
+	    fprintf(stderr, "setlocale returned NULL\n");
+	}
+#endif
+#endif /* HAVE_LANGINFO */
+
+	if (setSysEncCode != TCL_OK) {
+	    /*
+	     * Classic fallback check.  This tries a homebrew algorithm to
+	     * determine what encoding should be used based on env vars.
+	     */
+	    char *langEnv = getenv("LC_ALL");
+	    encoding = NULL;
+
 	if (langEnv == NULL || langEnv[0] == '\0') {
 	    langEnv = getenv("LC_CTYPE");
 	}
@@ -416,7 +527,6 @@
 	    langEnv = NULL;
 	}
 
-	encoding = NULL;
 	if (langEnv != NULL) {
 	    for (i = 0; localeTable[i].lang != NULL; i++) {
 		if (strcmp(localeTable[i].lang, langEnv) == 0) {
@@ -444,32 +554,39 @@
 
 		    encoding = Tcl_DStringValue(&ds);
 		    Tcl_UtfToLower(Tcl_DStringValue(&ds));
-		    if (Tcl_SetSystemEncoding(NULL, encoding) == TCL_OK) {
-			Tcl_DStringFree(&ds);
-			goto resetPath;
+			setSysEncCode = Tcl_SetSystemEncoding(NULL, encoding);
+			if (setSysEncCode != TCL_OK) {
+			    encoding = NULL;
 		    }
 		    Tcl_DStringFree(&ds);
-		    encoding = NULL;
 		}
 	    }
+#ifdef HAVE_LANGINFO_DEBUG
+		fprintf(stderr, "encoding fallback check '%s' => '%s'\n",
+			langEnv, encoding);
+#endif
 	}
+	    if (setSysEncCode != TCL_OK) {
 	if (encoding == NULL) {
-	    encoding = "iso8859-1";
+		    encoding = TCL_DEFAULT_ENCODING;
 	}
 
 	Tcl_SetSystemEncoding(NULL, encoding);
+	    }
 
-	resetPath:
 	/*
 	 * Initialize the C library's locale subsystem.  This is required
 	 * for input methods to work properly on X11.  We only do this for
 	 * LC_CTYPE because that's the necessary one, and we don't want to
-	 * affect LC_TIME here.  The side effect of setting the default locale
-	 * should be to load any locale specific modules that are needed by X.
-	 * [BUG: 5422 3345 4236 2522 2521].
+	     * affect LC_TIME here.  The side effect of setting the default
+	     * locale should be to load any locale specific modules that are
+	     * needed by X.  [BUG: 5422 3345 4236 2522 2521].
+	     * In HAVE_LANGINFO, this call is already done above.
 	 */
-
+#ifndef HAVE_LANGINFO
 	setlocale(LC_CTYPE, "");
+#endif
+	}
 
 	/*
 	 * In case the initial locale is not "C", ensure that the numeric