[Mono-dev] Re: [Mono-devel-list] mcs patch for default encoding
Marek Safar
marek.safar at seznam.cz
Tue Aug 23 03:06:43 EDT 2005
Hello Eno,
Could you write some tests to cover this functionality. I mean e.g.
simple test file with UTF header.
Thanks,
Marek
> Hi again,
>
>> Agreed. In fact, I was also fixing bug #75065, maybe duplicate.
>> I have a fix for UTF8Encoding, but it uncovered another mcs bug
>> which does not handle files with BOM with specific encoding.
>> To summarize the situation:
>>
>> - Currently driver.cs does not process source files with
>> default encoding.
>> - UTF8Encoding.cs does not handle U+FEFF correctly.
>> - When we fix UTF8Encoding.cs to handle U+FEFF, it starts
>> to reject some source files which has BOM.
>> (CS8025:Parsing error)
>> - Even if we fix driver.cs to let StreamReader consider BOM
>> (currently we disable it), there are still some files
>> borking.
>>
>> Am digging into this bug in depth. Hopefully I'll post a set of
>> fixes later.
>
>
> ... and now I finished the fixes as was done in the attached patch:
>
> - driver.cs :
> a) uses Encoding.Default for the default input.
> b) Always use true for detecting BOM at any time.
> - support.cs : Handle preamble_size precisely.
> - UTF8Encoding.cs : it should not skip U+FEFF. This fixes
> bug #73086 and #75065.
>
> They should be applied at a time, except for a).
>
> Atsushi Eno
>
>------------------------------------------------------------------------
>
>Index: mcs/driver.cs
>===================================================================
>--- mcs/driver.cs (revision 48630)
>+++ mcs/driver.cs (working copy)
>@@ -91,16 +91,11 @@
> static DateTime last_time, first_time;
>
> //
>- // Encoding: ISO-Latin1 is 28591
>+ // Encoding.
> //
> static Encoding encoding;
>
>- //
>- // Whether the user has specified a different encoder manually
>- //
>- static bool using_default_encoder = true;
>
>-
> static public void Reset ()
> {
> want_debugging_support = false;
>@@ -114,7 +109,6 @@
> defines = null;
> output_file = null;
> encoding = null;
>- using_default_encoder = true;
> first_source = null;
> }
>
>@@ -158,7 +152,7 @@
> }
>
> using (input){
>- SeekableStreamReader reader = new SeekableStreamReader (input, encoding, using_default_encoder);
>+ SeekableStreamReader reader = new SeekableStreamReader (input, encoding, true);
> Tokenizer lexer = new Tokenizer (reader, file, defines);
> int token, tokens = 0, errors = 0;
>
>@@ -186,7 +180,7 @@
> return;
> }
>
>- SeekableStreamReader reader = new SeekableStreamReader (input, encoding, using_default_encoder);
>+ SeekableStreamReader reader = new SeekableStreamReader (input, encoding, true);
>
> // Check 'MZ' header
> if (reader.Read () == 77 && reader.Read () == 90) {
>@@ -1308,21 +1302,15 @@
>
> if (value == "utf8"){
> encoding = new UTF8Encoding();
>- using_default_encoder = false;
> return true;
> }
> if (value == "reset"){
>- //
>- // 28591 is the code page for ISO-8859-1 encoding.
>- //
>- cp = 28591;
>- using_default_encoder = true;
>+ cp = Encoding.Default.CodePage;
> }
>
> try {
> cp = Int32.Parse (value);
> encoding = Encoding.GetEncoding (cp);
>- using_default_encoder = false;
> } catch {
> Report.Error (2016, "Code page `{0}' is invalid or not installed", value);
> }
>@@ -1373,13 +1361,8 @@
> int i;
> bool parsing_options = true;
>
>- try {
>- encoding = Encoding.GetEncoding (28591);
>- } catch {
>- Console.WriteLine ("Error: could not load encoding 28591, trying 1252");
>- encoding = Encoding.GetEncoding (1252);
>- }
>-
>+ encoding = Encoding.Default;
>+
> references = new ArrayList ();
> soft_references = new ArrayList ();
> modules = new ArrayList ();
>Index: mcs/support.cs
>===================================================================
>--- mcs/support.cs (revision 48630)
>+++ mcs/support.cs (working copy)
>@@ -359,36 +359,8 @@
> // Let the StreamWriter autodetect the encoder
> reader.Peek ();
>
>- reader.BaseStream.Position = 0;
> Encoding enc = reader.CurrentEncoding;
>- // First of all, get at least a char
>-
>- byte[] auxb = new byte [50];
>- int num_bytes = 0;
>- int num_chars = 0;
>- int br = 0;
>- do {
>- br = reader.BaseStream.Read (auxb, num_bytes, auxb.Length - num_bytes);
>- num_bytes += br;
>- num_chars = enc.GetCharCount (auxb, 0, num_bytes);
>- }
>- while (num_chars == 0 && br > 0);
>-
>- if (num_chars != 0)
>- {
>- // Now, check which bytes at the beginning have no effect in the
>- // char count
>-
>- int p = 0;
>- while (enc.GetCharCount (auxb, p, num_bytes-p) >= num_chars)
>- p++;
>-
>- preamble_size = p - 1;
>- reader.BaseStream.Position = 0;
>- reader.DiscardBufferedData ();
>-
>- buffer_start = preamble_size;
>- }
>+ preamble_size = (int) reader.BaseStream.Position;
> }
>
> public SeekableStreamReader (Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks)
>Index: class/corlib/System.Text/UTF8Encoding.cs
>===================================================================
>--- class/corlib/System.Text/UTF8Encoding.cs (revision 48630)
>+++ class/corlib/System.Text/UTF8Encoding.cs (working copy)
>@@ -426,7 +426,7 @@
> if (++leftSoFar >= leftSize) {
> // We have a complete character now.
> if (leftBits < (uint)0x10000) {
>- if (leftBits != (uint)0xFEFF) {
>+// if (leftBits != (uint)0xFEFF) {
> // is it an overlong ?
> bool overlong = false;
> switch (leftSize) {
>@@ -452,7 +452,7 @@
> }
> else
> ++length;
>- }
>+// }
> } else if (leftBits < (uint)0x110000) {
> length += 2;
> } else if (throwOnInvalid) {
>@@ -571,7 +571,7 @@
> if (++leftSoFar >= leftSize) {
> // We have a complete character now.
> if (leftBits < (uint)0x10000) {
>- if (leftBits != (uint)0xFEFF) {
>+// if (leftBits != (uint)0xFEFF) {
> // is it an overlong ?
> bool overlong = false;
> switch (leftSize) {
>@@ -602,7 +602,7 @@
> }
> chars[posn++] = (char)leftBits;
> }
>- }
>+// }
> } else if (leftBits < (uint)0x110000) {
> if ((posn + 2) > length) {
> throw new ArgumentException
>
>
>------------------------------------------------------------------------
>
>_______________________________________________
>Mono-devel-list mailing list
>Mono-devel-list at lists.ximian.com
>http://lists.ximian.com/mailman/listinfo/mono-devel-list
>
>
More information about the Mono-devel-list
mailing list