[Mono-dev] Encoding bug in System.Console

wj wall_john at sohu.com
Mon Jan 16 05:02:39 EST 2006


in mcs/class/corlib/System/Console.cs, there're a bug for get console's
encoding:
				  
static Console ()
{
	int code_page = 0;
	Encoding.InternalCodePage (ref code_page);
	Encoding encoding;

	if (Environment.IsRunningOnWindows) {
		//
		// On Windows, follow the Windows tradition
		//
		encoding = Encoding.Default;
	} else {
		//
		// On Unix systems (128), do not output the
		// UTF-8 ZWNBSP (zero-width non-breaking space).
		//
		if (code_page == UTF8Encoding.UTF8_CODE_PAGE || ((code_page &
0x10000000) != 0))
			encoding = Encoding.UTF8Unmarked;
		else
			encoding = Encoding.Default;
	}
    ...
}
if the function "InternalCodePage" can not compute a suitable code page
number,
it will return -1, and "code_page & 0x10000000 != 0" will be true, it's
not correct.
second, "code_page == UTF8Encoding.UTF8_CODE_PAGE" will never be true,
because
after "InternalCodePage" invoked, "code_page" is an integer range from 0
to 6 or -1.

in  mcs/class/corlib/System.Text/Encoding.cs, there're a correct
example.
public static Encoding Default
{
	...
	if (defaultEncoding == null) {
		// See if the underlying system knows what
		// code page handler we should be using.
		int code_page = 1;
		
		string code_page_name = InternalCodePage (ref code_page);
		try {
			if (code_page == -1)
				defaultEncoding = GetEncoding (code_page_name);
			else {
				// map the codepage from internal to our numbers
				code_page = code_page & 0x0fffffff;
				switch (code_page){
				case 1: code_page = ASCIIEncoding.ASCII_CODE_PAGE; break;
				case 2: code_page = UTF7Encoding.UTF7_CODE_PAGE; break;
				case 3: code_page = UTF8Encoding.UTF8_CODE_PAGE; break;
				case 4: code_page = UnicodeEncoding.UNICODE_CODE_PAGE; break;
				case 5: code_page = UnicodeEncoding.BIG_UNICODE_CODE_PAGE; break;
				case 6: code_page = Latin1Encoding.ISOLATIN_CODE_PAGE; break;
				}
				defaultEncoding = GetEncoding (code_page);
			}
		} catch (NotSupportedException) {
			defaultEncoding = UTF8Unmarked;
		}
		defaultEncoding.is_readonly = true;
	}
	...
}

patch:

diff -urN mcs/class/corlib/System/Console.cs.orig
mcs/class/corlib/System/Console.cs
--- mcs/class/corlib/System/Console.cs.orig     2006-01-16
16:48:24.000000000 +0800
+++ mcs/class/corlib/System/Console.cs  2006-01-16 17:08:49.000000000
+0800
@@ -63,7 +63,8 @@
                                // On Unix systems (128), do not output
the
                                // UTF-8 ZWNBSP (zero-width non-breaking
space).
                                //
-                               if (code_page ==
UTF8Encoding.UTF8_CODE_PAGE || ((code_page & 0x10000000) != 0))
+                               if (code_page != -1 && ((code_page &
0x0fffffff) == 3 // UTF8Encoding.UTF8_CODE_PAGE
+                                        || ((code_page & 0x10000000) !=
0)))
                                        encoding =
Encoding.UTF8Unmarked;
                                else
                                        encoding = Encoding.Default;

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20060116/30aa0bdb/attachment.html 


More information about the Mono-devel-list mailing list