[Mono-dev] mcs patch to say goodbye to SeekableStreamReader

Kornél Pál kornelpal at hotmail.com
Tue Aug 30 10:26:58 EDT 2005


Hi,

I tried your patch with ComIStreamMarshaler.cs and used -codepage:65001. It
worked as expected but it fails with SVN head. The expected behaviour is to
compile without errors so you patch solves the bug.

And I agree that SeekableStreamReader is a too tricky solution and seems to
be the cause of the bug. Maybe SeekableStreamReader could be fixed but if
there is no need to use it I think it's better to remove it to avoid
possible bugs.

I've done some tests with files readed as UTF-8 but containing invalid byte
sequences even with BOM and it worked as expected.

If this patch does not cause any regressions I support it.

I think the patch should be ported to gmcs as well.

I think we should return using Encoding.Default as the default encoding in
mcs but should comfigure mcs using -codepage:28591 -codepage:1252 in
mcs/build to use that encoding.

Kornél

----- Original Message -----
From: "Atsushi Eno" <atsushi at ximian.com>
To: "mono-devel mailing list" <mono-devel-list at lists.ximian.com>
Sent: Monday, August 29, 2005 1:55 PM
Subject: [Mono-dev] mcs patch to say goodbye to SeekableStreamReader


> Hi,
>
> Now I blame SeekableStreamReader on bringing UTF8 related bug
> (without proof ;-) so I made a patch to eliminate this class.
>
> Additionally I made tiny modification for 'MZ' executable check
> (even with a test case bom-mz.cs that differentiates csc and mcs).
>
> I'm not sure if it really is the culprit, but now we don't have
> tricky stream usage, so now the code should be a bit healthy.
>
> Atsushi Eno
>


--------------------------------------------------------------------------------


> Index: support.cs
> ===================================================================
> --- support.cs (revision 49060)
> +++ support.cs (working copy)
> @@ -344,6 +344,7 @@
>  }
>  }
>
> +/*
>  /// <summary>
>  ///   This is a wrapper around StreamReader which is seekable.
>  /// </summary>
> @@ -442,6 +443,7 @@
>  return buffer [pos++];
>  }
>  }
> +*/
>
>  public class DoubleHash {
>  const int DEFAULT_INITIAL_BUCKETS = 100;
> Index: cs-tokenizer.cs
> ===================================================================
> --- cs-tokenizer.cs (revision 49060)
> +++ cs-tokenizer.cs (working copy)
> @@ -31,7 +31,7 @@
>
>  public class Tokenizer : yyParser.yyInput
>  {
> - SeekableStreamReader reader;
> + StreamReader reader;
>  SourceFile ref_name;
>  SourceFile file_name;
>  int ref_line = 1;
> @@ -45,6 +45,8 @@
>  Location current_location;
>  Location current_comment_location = Location.Null;
>  ArrayList escapedIdentifiers = new ArrayList ();
> + SavedToken saved_token = SavedToken.Null;
> + bool putback_ambiguous_close_parens = false;
>
>  //
>  // XML documentation buffer. The save point is used to divide
> @@ -385,7 +387,7 @@
>  defines [def] = true;
>  }
>
> - public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList
> defs)
> + public Tokenizer (StreamReader input, SourceFile file, ArrayList defs)
>  {
>  this.ref_name = file;
>  this.file_name = file;
> @@ -467,19 +469,11 @@
>
>  --deambiguate_close_parens;
>
> - // Save current position and parse next token.
> - int old = reader.Position;
> - int old_ref_line = ref_line;
> - int old_col = col;
> -
> - // disable preprocessing directives when peeking
> - process_directives = false;
> + // Save next token.
> + Location cur_loc = current_location;
>  int new_token = token ();
> - process_directives = true;
> - reader.Position = old;
> - ref_line = old_ref_line;
> - col = old_col;
> - putback_char = -1;
> + saved_token = new SavedToken (new_token, val, Location);
> + current_location = cur_loc;
>
>  if (new_token == Token.OPEN_PARENS)
>  return Token.CLOSE_PARENS_OPEN_PARENS;
> @@ -658,7 +652,7 @@
>
>  public void Deambiguate_CloseParens ()
>  {
> - putback (')');
> + putback_ambiguous_close_parens = true;
>  deambiguate_close_parens++;
>  }
>
> @@ -1087,6 +1081,10 @@
>  int getChar ()
>  {
>  int x;
> + if (putback_ambiguous_close_parens) {
> + putback_ambiguous_close_parens = false;
> + return ')';
> + }
>  if (putback_char != -1) {
>  x = putback_char;
>  putback_char = -1;
> @@ -1106,6 +1104,8 @@
>
>  int peekChar ()
>  {
> + if (putback_ambiguous_close_parens)
> + return ')';
>  if (putback_char != -1)
>  return putback_char;
>  putback_char = reader.Read ();
> @@ -1114,6 +1114,8 @@
>
>  int peekChar2 ()
>  {
> + if (putback_ambiguous_close_parens)
> + return ')';
>  if (putback_char != -1)
>  return putback_char;
>  return reader.Peek ();
> @@ -1202,7 +1204,14 @@
>
>  public int token ()
>                 {
> - current_token = xtoken ();
> + if (!saved_token.Location.IsNull) {
> + current_token = saved_token.Token;
> + val = saved_token.Value;
> + current_location = saved_token.Location;
> + saved_token = SavedToken.Null;
> + }
> + else
> + current_token = xtoken ();
>                         return current_token;
>                 }
>
> @@ -1844,29 +1853,21 @@
>  }
>
>  if (res == Token.PARTIAL) {
> - // Save current position and parse next token.
> - int old = reader.Position;
> - int old_putback = putback_char;
> - int old_ref_line = ref_line;
> - int old_col = col;
> -
> - putback_char = -1;
> -
> + // Save next token.
> + Location cur_loc = Location;
>  int next_token = token ();
> + saved_token = new SavedToken (next_token, val, Location);
> + current_location = cur_loc;
>  bool ok = (next_token == Token.CLASS) ||
>  (next_token == Token.STRUCT) ||
>  (next_token == Token.INTERFACE) ||
>  (next_token == Token.ENUM); // "partial" is a keyword in 'partial enum',
> even though it's not valid
>
> - reader.Position = old;
> - ref_line = old_ref_line;
> - col = old_col;
> - putback_char = old_putback;
> -
>  if (ok)
>  return res;
>  else {
>  val = new LocatedToken (Location, "partial");
> + saved_token = SavedToken.Null;
>  return Token.IDENTIFIER;
>  }
>  }
> @@ -2309,6 +2310,23 @@
>  }
>
>  }
> +
> + public struct SavedToken
> + {
> + public static readonly SavedToken Null =
> + new SavedToken (0, null, Location.Null);
> +
> + public readonly int Token;
> + public readonly object Value;
> + public readonly Location Location;
> +
> + public SavedToken (int token, object value, Location loc)
> + {
> + Token = token;
> + Value = value;
> + Location = loc;
> + }
> + }
>  }
>
>  //
> Index: cs-parser.jay
> ===================================================================
> --- cs-parser.jay (revision 49060)
> +++ cs-parser.jay (working copy)
> @@ -4856,7 +4856,7 @@
>  }
> }
>
> -public CSharpParser (SeekableStreamReader reader, SourceFile file,
> ArrayList defines)
> +public CSharpParser (StreamReader reader, SourceFile file, ArrayList
> defines)
> {
>  current_namespace = new NamespaceEntry (null, file, null, Location.Null);
>  this.name = file.Name;
> Index: driver.cs
> ===================================================================
> --- driver.cs (revision 49060)
> +++ driver.cs (working copy)
> @@ -153,7 +153,7 @@
>  }
>
>  using (input){
> - SeekableStreamReader reader = new SeekableStreamReader (input,
> encoding);
> + StreamReader reader = new StreamReader (input, encoding, true);
>  Tokenizer lexer = new Tokenizer (reader, file, defines);
>  int token, tokens = 0, errors = 0;
>
> @@ -181,16 +181,16 @@
>  return;
>  }
>
> - SeekableStreamReader reader = new SeekableStreamReader (input,
> encoding);
> -
>  // Check 'MZ' header
> - if (reader.Read () == 77 && reader.Read () == 90) {
> + if (input.ReadByte () == 77 && input.ReadByte () == 90) {
>  Report.Error (2015, "Source file `{0}' is a binary file and not a text
> file", file.Name);
>  input.Close ();
>  return;
>  }
> + input.Position = 0;
>
> - reader.Position = 0;
> + StreamReader reader = new StreamReader (input, encoding, true);
> +
>  parser = new CSharpParser (reader, file, defines);
>  parser.ErrorOutput = Report.Stderr;
>  try {
>


--------------------------------------------------------------------------------


> ・ソMZ


--------------------------------------------------------------------------------


> _______________________________________________
> Mono-devel-list mailing list
> Mono-devel-list at lists.ximian.com
> http://lists.ximian.com/mailman/listinfo/mono-devel-list
>




More information about the Mono-devel-list mailing list