[Mono-dev] Encoding bug in System.Console
wj
wall_john at sohu.com
Mon Jan 16 05:02:39 EST 2006
in mcs/class/corlib/System/Console.cs, there're a bug for get console's
encoding:
static Console ()
{
int code_page = 0;
Encoding.InternalCodePage (ref code_page);
Encoding encoding;
if (Environment.IsRunningOnWindows) {
//
// On Windows, follow the Windows tradition
//
encoding = Encoding.Default;
} else {
//
// On Unix systems (128), do not output the
// UTF-8 ZWNBSP (zero-width non-breaking space).
//
if (code_page == UTF8Encoding.UTF8_CODE_PAGE || ((code_page &
0x10000000) != 0))
encoding = Encoding.UTF8Unmarked;
else
encoding = Encoding.Default;
}
...
}
if the function "InternalCodePage" can not compute a suitable code page
number,
it will return -1, and "code_page & 0x10000000 != 0" will be true, it's
not correct.
second, "code_page == UTF8Encoding.UTF8_CODE_PAGE" will never be true,
because
after "InternalCodePage" invoked, "code_page" is an integer range from 0
to 6 or -1.
in mcs/class/corlib/System.Text/Encoding.cs, there're a correct
example.
public static Encoding Default
{
...
if (defaultEncoding == null) {
// See if the underlying system knows what
// code page handler we should be using.
int code_page = 1;
string code_page_name = InternalCodePage (ref code_page);
try {
if (code_page == -1)
defaultEncoding = GetEncoding (code_page_name);
else {
// map the codepage from internal to our numbers
code_page = code_page & 0x0fffffff;
switch (code_page){
case 1: code_page = ASCIIEncoding.ASCII_CODE_PAGE; break;
case 2: code_page = UTF7Encoding.UTF7_CODE_PAGE; break;
case 3: code_page = UTF8Encoding.UTF8_CODE_PAGE; break;
case 4: code_page = UnicodeEncoding.UNICODE_CODE_PAGE; break;
case 5: code_page = UnicodeEncoding.BIG_UNICODE_CODE_PAGE; break;
case 6: code_page = Latin1Encoding.ISOLATIN_CODE_PAGE; break;
}
defaultEncoding = GetEncoding (code_page);
}
} catch (NotSupportedException) {
defaultEncoding = UTF8Unmarked;
}
defaultEncoding.is_readonly = true;
}
...
}
patch:
diff -urN mcs/class/corlib/System/Console.cs.orig
mcs/class/corlib/System/Console.cs
--- mcs/class/corlib/System/Console.cs.orig 2006-01-16
16:48:24.000000000 +0800
+++ mcs/class/corlib/System/Console.cs 2006-01-16 17:08:49.000000000
+0800
@@ -63,7 +63,8 @@
// On Unix systems (128), do not output
the
// UTF-8 ZWNBSP (zero-width non-breaking
space).
//
- if (code_page ==
UTF8Encoding.UTF8_CODE_PAGE || ((code_page & 0x10000000) != 0))
+ if (code_page != -1 && ((code_page &
0x0fffffff) == 3 // UTF8Encoding.UTF8_CODE_PAGE
+ || ((code_page & 0x10000000) !=
0)))
encoding =
Encoding.UTF8Unmarked;
else
encoding = Encoding.Default;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20060116/30aa0bdb/attachment.html
More information about the Mono-devel-list
mailing list