Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | another solution with auto recognition (without fixed cpBomTable) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | bug-3466099 |
Files: | files | file ages | folders |
SHA1: |
8da0451f94965ca03f713ebfc5f99756 |
User & Date: | sebres 2012-03-06 14:18:48 |
Context
2012-03-06
| ||
14:18 | another solution with auto recognition (without fixed cpBomTable) Closed-Leaf check-in: 8da0451f94 user: sebres tags: bug-3466099 | |
2012-03-01
| ||
22:48 | experiment: Handle BOM by switching to utf-8 when the system encoding is cp1250-cp1258 or identity (... check-in: 110d7c7820 user: jan.nijtmans tags: bug-3466099 | |
Changes
Changes to generic/tclIOUtil.c.
︙ | ︙ | |||
1728 1729 1730 1731 1732 1733 1734 | * Depends on the commands in the file. During the evaluation of the * contents of the file, iPtr->scriptFile is made to point to pathPtr * (the old value is cached and replaced when this function returns). * *---------------------------------------------------------------------- */ | < < < < < < < < < < < < < < < | < > | 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 | * Depends on the commands in the file. During the evaluation of the * contents of the file, iPtr->scriptFile is made to point to pathPtr * (the old value is cached and replaced when this function returns). * *---------------------------------------------------------------------- */ int Tcl_FSEvalFileEx( Tcl_Interp *interp, /* Interpreter in which to process file. */ Tcl_Obj *pathPtr, /* Path of file to process. Tilde-substitution * will be performed on this name. */ const char *encodingName) /* If non-NULL, then use this encoding for the * file. NULL means use the system encoding. */ { int length, result = TCL_ERROR; Tcl_StatBuf statBuf; Tcl_Obj *oldScriptFile; Interp *iPtr; Tcl_Encoding encoding = NULL; char *string; int len; Tcl_Channel chan; Tcl_Obj *objPtr; if (Tcl_FSGetNormalizedPath(interp, pathPtr) == NULL) { return result; } |
︙ | ︙ | |||
1787 1788 1789 1790 1791 1792 1793 | * The eofchar is \32 (^Z). This is the usual on Windows, but we effect * this cross-platform to allow for scripted documents. [Bug: 2040] */ Tcl_SetChannelOption(interp, chan, "-eofchar", "\32"); /* | | < < < | | | | < < < < < < < < < < < < < < | | | > | < > | | > > > > > > > > > | | | > > | > > > > > > > > > | > > | > > > > > > > > > > > > > > > > > > | 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 | * The eofchar is \32 (^Z). This is the usual on Windows, but we effect * this cross-platform to allow for scripted documents. [Bug: 2040] */ Tcl_SetChannelOption(interp, chan, "-eofchar", "\32"); /* * Set channel encoding to utf-8 to read first characters (BOM char?). */ if (Tcl_SetChannelOption(interp, chan, "-encoding", "utf-8") != TCL_OK) { Tcl_Close(interp,chan); return result; } objPtr = Tcl_NewObj(); Tcl_IncrRefCount(objPtr); /* Try to read first 4 characters of stream in utf-8, so we can * check for utf-8 BOM to be handled especially. [Bug 3466099] */ if (Tcl_ReadChars(chan, objPtr, 4, 0) < 0) { Tcl_Close(interp, chan); Tcl_AppendResult(interp, "couldn't read file \"", Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); goto end; } string = TclGetStringFromObj(objPtr, &len); /* * If first characters is not a BOM, convert it to given or system encoding. */ if (len < 3 || memcmp(string, "\xef\xbb\xbf", 3) != 0) { Tcl_DString ds; /* * If the encoding is not specified, set it to the system encoding, * hereafter, bypass utf-8, otherwise set it to the channel. * Report error on unknown encoding. */ if (encodingName == NULL || *encodingName == 0) { encodingName = Tcl_GetEncodingName(NULL); } if (strcmp(encodingName, "utf-8") != 0) { if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) != TCL_OK) { Tcl_Close(interp, chan); goto end; } /* * Convert first char from utf-8 (as it were binary) to * encoding 'encodingName'. */ encoding = Tcl_GetEncoding(interp, encodingName); if (encoding == NULL) { Tcl_Close(interp, chan); goto end; }; string = (char *) Tcl_GetByteArrayFromObj(objPtr, &len); Tcl_ExternalToUtfDString(encoding, string, len, &ds); Tcl_DecrRefCount(objPtr); objPtr = Tcl_NewStringObj( Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); Tcl_IncrRefCount(objPtr); Tcl_DStringFree(&ds); }; } else if (len >= 1) { /* * Remove BOM characters. */ Tcl_Obj * newPtr = Tcl_NewStringObj(string+3, len-3); Tcl_DecrRefCount(objPtr); objPtr = newPtr; Tcl_IncrRefCount(objPtr); } /* * Append the remaining characters read from channel. */ if (Tcl_ReadChars(chan, objPtr, -1, 1) < 0) { Tcl_Close(interp, chan); Tcl_AppendResult(interp, "couldn't read file \"", Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); goto end; } |
︙ | ︙ | |||
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 | "\n (file \"%.*s%s\" line %d)", (overflow ? limit : length), pathString, (overflow ? "..." : ""), interp->errorLine)); } end: Tcl_DecrRefCount(objPtr); return result; } /* *---------------------------------------------------------------------- * * Tcl_GetErrno -- | > > > | 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 | "\n (file \"%.*s%s\" line %d)", (overflow ? limit : length), pathString, (overflow ? "..." : ""), interp->errorLine)); } end: Tcl_DecrRefCount(objPtr); if (encoding != NULL) Tcl_FreeEncoding(encoding); return result; } /* *---------------------------------------------------------------------- * * Tcl_GetErrno -- |
︙ | ︙ |