[Mono-dev] mcs patch to say goodbye to SeekableStreamReader
Kornél Pál
kornelpal at hotmail.com
Tue Aug 30 10:26:58 EDT 2005
Hi,
I tried your patch with ComIStreamMarshaler.cs and used -codepage:65001. It
worked as expected but it fails with SVN head. The expected behaviour is to
compile without errors so you patch solves the bug.
And I agree that SeekableStreamReader is a too tricky solution and seems to
be the cause of the bug. Maybe SeekableStreamReader could be fixed but if
there is no need to use it I think it's better to remove it to avoid
possible bugs.
I've done some tests with files readed as UTF-8 but containing invalid byte
sequences even with BOM and it worked as expected.
If this patch does not cause any regressions I support it.
I think the patch should be ported to gmcs as well.
I think we should return using Encoding.Default as the default encoding in
mcs but should comfigure mcs using -codepage:28591 -codepage:1252 in
mcs/build to use that encoding.
Kornél
----- Original Message -----
From: "Atsushi Eno" <atsushi at ximian.com>
To: "mono-devel mailing list" <mono-devel-list at lists.ximian.com>
Sent: Monday, August 29, 2005 1:55 PM
Subject: [Mono-dev] mcs patch to say goodbye to SeekableStreamReader
> Hi,
>
> Now I blame SeekableStreamReader on bringing UTF8 related bug
> (without proof ;-) so I made a patch to eliminate this class.
>
> Additionally I made tiny modification for 'MZ' executable check
> (even with a test case bom-mz.cs that differentiates csc and mcs).
>
> I'm not sure if it really is the culprit, but now we don't have
> tricky stream usage, so now the code should be a bit healthy.
>
> Atsushi Eno
>
--------------------------------------------------------------------------------
> Index: support.cs
> ===================================================================
> --- support.cs (revision 49060)
> +++ support.cs (working copy)
> @@ -344,6 +344,7 @@
> }
> }
>
> +/*
> /// <summary>
> /// This is a wrapper around StreamReader which is seekable.
> /// </summary>
> @@ -442,6 +443,7 @@
> return buffer [pos++];
> }
> }
> +*/
>
> public class DoubleHash {
> const int DEFAULT_INITIAL_BUCKETS = 100;
> Index: cs-tokenizer.cs
> ===================================================================
> --- cs-tokenizer.cs (revision 49060)
> +++ cs-tokenizer.cs (working copy)
> @@ -31,7 +31,7 @@
>
> public class Tokenizer : yyParser.yyInput
> {
> - SeekableStreamReader reader;
> + StreamReader reader;
> SourceFile ref_name;
> SourceFile file_name;
> int ref_line = 1;
> @@ -45,6 +45,8 @@
> Location current_location;
> Location current_comment_location = Location.Null;
> ArrayList escapedIdentifiers = new ArrayList ();
> + SavedToken saved_token = SavedToken.Null;
> + bool putback_ambiguous_close_parens = false;
>
> //
> // XML documentation buffer. The save point is used to divide
> @@ -385,7 +387,7 @@
> defines [def] = true;
> }
>
> - public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList
> defs)
> + public Tokenizer (StreamReader input, SourceFile file, ArrayList defs)
> {
> this.ref_name = file;
> this.file_name = file;
> @@ -467,19 +469,11 @@
>
> --deambiguate_close_parens;
>
> - // Save current position and parse next token.
> - int old = reader.Position;
> - int old_ref_line = ref_line;
> - int old_col = col;
> -
> - // disable preprocessing directives when peeking
> - process_directives = false;
> + // Save next token.
> + Location cur_loc = current_location;
> int new_token = token ();
> - process_directives = true;
> - reader.Position = old;
> - ref_line = old_ref_line;
> - col = old_col;
> - putback_char = -1;
> + saved_token = new SavedToken (new_token, val, Location);
> + current_location = cur_loc;
>
> if (new_token == Token.OPEN_PARENS)
> return Token.CLOSE_PARENS_OPEN_PARENS;
> @@ -658,7 +652,7 @@
>
> public void Deambiguate_CloseParens ()
> {
> - putback (')');
> + putback_ambiguous_close_parens = true;
> deambiguate_close_parens++;
> }
>
> @@ -1087,6 +1081,10 @@
> int getChar ()
> {
> int x;
> + if (putback_ambiguous_close_parens) {
> + putback_ambiguous_close_parens = false;
> + return ')';
> + }
> if (putback_char != -1) {
> x = putback_char;
> putback_char = -1;
> @@ -1106,6 +1104,8 @@
>
> int peekChar ()
> {
> + if (putback_ambiguous_close_parens)
> + return ')';
> if (putback_char != -1)
> return putback_char;
> putback_char = reader.Read ();
> @@ -1114,6 +1114,8 @@
>
> int peekChar2 ()
> {
> + if (putback_ambiguous_close_parens)
> + return ')';
> if (putback_char != -1)
> return putback_char;
> return reader.Peek ();
> @@ -1202,7 +1204,14 @@
>
> public int token ()
> {
> - current_token = xtoken ();
> + if (!saved_token.Location.IsNull) {
> + current_token = saved_token.Token;
> + val = saved_token.Value;
> + current_location = saved_token.Location;
> + saved_token = SavedToken.Null;
> + }
> + else
> + current_token = xtoken ();
> return current_token;
> }
>
> @@ -1844,29 +1853,21 @@
> }
>
> if (res == Token.PARTIAL) {
> - // Save current position and parse next token.
> - int old = reader.Position;
> - int old_putback = putback_char;
> - int old_ref_line = ref_line;
> - int old_col = col;
> -
> - putback_char = -1;
> -
> + // Save next token.
> + Location cur_loc = Location;
> int next_token = token ();
> + saved_token = new SavedToken (next_token, val, Location);
> + current_location = cur_loc;
> bool ok = (next_token == Token.CLASS) ||
> (next_token == Token.STRUCT) ||
> (next_token == Token.INTERFACE) ||
> (next_token == Token.ENUM); // "partial" is a keyword in 'partial enum',
> even though it's not valid
>
> - reader.Position = old;
> - ref_line = old_ref_line;
> - col = old_col;
> - putback_char = old_putback;
> -
> if (ok)
> return res;
> else {
> val = new LocatedToken (Location, "partial");
> + saved_token = SavedToken.Null;
> return Token.IDENTIFIER;
> }
> }
> @@ -2309,6 +2310,23 @@
> }
>
> }
> +
> + public struct SavedToken
> + {
> + public static readonly SavedToken Null =
> + new SavedToken (0, null, Location.Null);
> +
> + public readonly int Token;
> + public readonly object Value;
> + public readonly Location Location;
> +
> + public SavedToken (int token, object value, Location loc)
> + {
> + Token = token;
> + Value = value;
> + Location = loc;
> + }
> + }
> }
>
> //
> Index: cs-parser.jay
> ===================================================================
> --- cs-parser.jay (revision 49060)
> +++ cs-parser.jay (working copy)
> @@ -4856,7 +4856,7 @@
> }
> }
>
> -public CSharpParser (SeekableStreamReader reader, SourceFile file,
> ArrayList defines)
> +public CSharpParser (StreamReader reader, SourceFile file, ArrayList
> defines)
> {
> current_namespace = new NamespaceEntry (null, file, null, Location.Null);
> this.name = file.Name;
> Index: driver.cs
> ===================================================================
> --- driver.cs (revision 49060)
> +++ driver.cs (working copy)
> @@ -153,7 +153,7 @@
> }
>
> using (input){
> - SeekableStreamReader reader = new SeekableStreamReader (input,
> encoding);
> + StreamReader reader = new StreamReader (input, encoding, true);
> Tokenizer lexer = new Tokenizer (reader, file, defines);
> int token, tokens = 0, errors = 0;
>
> @@ -181,16 +181,16 @@
> return;
> }
>
> - SeekableStreamReader reader = new SeekableStreamReader (input,
> encoding);
> -
> // Check 'MZ' header
> - if (reader.Read () == 77 && reader.Read () == 90) {
> + if (input.ReadByte () == 77 && input.ReadByte () == 90) {
> Report.Error (2015, "Source file `{0}' is a binary file and not a text
> file", file.Name);
> input.Close ();
> return;
> }
> + input.Position = 0;
>
> - reader.Position = 0;
> + StreamReader reader = new StreamReader (input, encoding, true);
> +
> parser = new CSharpParser (reader, file, defines);
> parser.ErrorOutput = Report.Stderr;
> try {
>
--------------------------------------------------------------------------------
> ・ソMZ
--------------------------------------------------------------------------------
> _______________________________________________
> Mono-devel-list mailing list
> Mono-devel-list at lists.ximian.com
> http://lists.ximian.com/mailman/listinfo/mono-devel-list
>
More information about the Mono-devel-list
mailing list