[Mono-dev] [PATCH] Encoding bug in System.Console

wj wall_john at sohu.com
Tue Jan 17 00:20:25 EST 2006


hi,

in mcs/class/corlib/System/Console.cs, there're a bug for get console's
encoding:
				  
static Console ()
{
	int code_page = 0;
	Encoding.InternalCodePage (ref code_page);
	Encoding encoding;

	if (Environment.IsRunningOnWindows) {
		//
		// On Windows, follow the Windows tradition
		//
		encoding = Encoding.Default;
	} else {
		//
		// On Unix systems (128), do not output the
		// UTF-8 ZWNBSP (zero-width non-breaking space).
		//
		if (code_page == UTF8Encoding.UTF8_CODE_PAGE || ((code_page &
0x10000000) != 0))
			encoding = Encoding.UTF8Unmarked;
		else
			encoding = Encoding.Default;
	}
    ...
}
if the function "InternalCodePage" can not compute a suitable code page
number,
it will return -1, and "code_page & 0x10000000 != 0" will be true, it's
not correct.
second, "code_page == UTF8Encoding.UTF8_CODE_PAGE" will never be true,
because
after "InternalCodePage" invoked, "code_page" is an integer range from 0
to 6 or -1.

in  mcs/class/corlib/System.Text/Encoding.cs, there're a correct
example.
public static Encoding Default
{
	...
	if (defaultEncoding == null) {
		// See if the underlying system knows what
		// code page handler we should be using.
		int code_page = 1;
		
		string code_page_name = InternalCodePage (ref code_page);
		try {
			if (code_page == -1)
				defaultEncoding = GetEncoding (code_page_name);
			else {
				// map the codepage from internal to our numbers
				code_page = code_page & 0x0fffffff;
				switch (code_page){
				case 1: code_page = ASCIIEncoding.ASCII_CODE_PAGE; break;
				case 2: code_page = UTF7Encoding.UTF7_CODE_PAGE; break;
				case 3: code_page = UTF8Encoding.UTF8_CODE_PAGE; break;
				case 4: code_page = UnicodeEncoding.UNICODE_CODE_PAGE; break;
				case 5: code_page = UnicodeEncoding.BIG_UNICODE_CODE_PAGE; break;
				case 6: code_page = Latin1Encoding.ISOLATIN_CODE_PAGE; break;
				}
				defaultEncoding = GetEncoding (code_page);
			}
		} catch (NotSupportedException) {
			defaultEncoding = UTF8Unmarked;
		}
		defaultEncoding.is_readonly = true;
	}
	...
}

patch:

diff -urN mcs/class/corlib/System/Console.cs.orig
mcs/class/corlib/System/Console.cs
--- mcs/class/corlib/System/Console.cs.orig	2006-01-16
16:48:24.000000000 +0800
+++ mcs/class/corlib/System/Console.cs	2006-01-16 17:08:49.000000000
+0800
@@ -63,7 +63,8 @@
 				// On Unix systems (128), do not output the
 				// UTF-8 ZWNBSP (zero-width non-breaking space).
 				//
-				if (code_page == UTF8Encoding.UTF8_CODE_PAGE || ((code_page &
0x10000000) != 0))
+				if (code_page != -1 && ((code_page & 0x0fffffff) == 3 //
UTF8Encoding.UTF8_CODE_PAGE
+                                        || ((code_page & 0x10000000) !=
0)))
 					encoding = Encoding.UTF8Unmarked;
 				else
 					encoding = Encoding.Default;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: console_encoding.diff
Type: text/x-patch
Size: 623 bytes
Desc: not available
Url : http://lists.ximian.com/pipermail/mono-devel-list/attachments/20060117/2820d2fb/attachment.bin 


More information about the Mono-devel-list mailing list