[Mono-dev] mcs patch to say goodbye to SeekableStreamReader

Miguel de Icaza miguel at ximian.com
Wed Aug 31 00:19:04 EDT 2005


Hello,

> I agree that xtoken() could be performance sensitive, but on the
> other hand, current code moves back the stream, which results in
> extraneous parse on every parenthesized expressions. Actually
> as mentioned later, the patched code seems faster. Note that
> SeekableStreamReader has to call encoding GetCharCount() when
> we reposition the stream.

Good point.  Thanks for the benchmarks, lets get this patch into SVN.

> Actually the resulting performance is better than existing code:
> 
> existing code at *best*:
> real    0m2.711s
> user    0m0.080s
> sys     0m0.050s
> 
> Total memory allocated: 34835 KB
> 
> patched code at *worst*:
> real    0m2.635s
> user    0m0.020s
> sys     0m0.030s
> Total memory allocated: 34083 KB
> 
> Most of the memory consumption difference comes from
> SeekableStreamReader:

Lovely, thanks for the patch and the measurements.

> 
> ########################
>      731 KB Mono.CSharp.SeekableStreamReader::.ctor(StreamReader)
>          731 KB      363 System.Char[]
>   Callers (with count) that contribute at least for 1%:
>          363  100 % Mono.CSharp.SeekableStreamReader::.ctor(Stream,Encoding)
> 
> Atsushi Eno
> Plain text document attachment (goodbye-seekablestreamreader.patch)
> Index: support.cs
> ===================================================================
> --- support.cs	(revision 49144)
> +++ support.cs	(working copy)
> @@ -344,6 +344,7 @@
>  		}
>  	}
>  
> +/*
>  	/// <summary>
>  	///   This is a wrapper around StreamReader which is seekable.
>  	/// </summary>
> @@ -442,6 +443,7 @@
>  			return buffer [pos++];
>  		}
>  	}
> +*/
>  
>  	public class DoubleHash {
>  		const int DEFAULT_INITIAL_BUCKETS = 100;
> Index: cs-tokenizer.cs
> ===================================================================
> --- cs-tokenizer.cs	(revision 49144)
> +++ cs-tokenizer.cs	(working copy)
> @@ -31,7 +31,7 @@
>  
>  	public class Tokenizer : yyParser.yyInput
>  	{
> -		SeekableStreamReader reader;
> +		StreamReader reader;
>  		SourceFile ref_name;
>  		SourceFile file_name;
>  		int ref_line = 1;
> @@ -45,6 +45,8 @@
>  		Location current_location;
>  		Location current_comment_location = Location.Null;
>  		ArrayList escapedIdentifiers = new ArrayList ();
> +		SavedToken saved_token = SavedToken.Null;
> +		bool putback_ambiguous_close_parens = false;
>  
>  		//
>  		// XML documentation buffer. The save point is used to divide
> @@ -385,7 +387,7 @@
>  			defines [def] = true;
>  		}
>  		
> -		public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList defs)
> +		public Tokenizer (StreamReader input, SourceFile file, ArrayList defs)
>  		{
>  			this.ref_name = file;
>  			this.file_name = file;
> @@ -467,19 +469,11 @@
>  
>  				--deambiguate_close_parens;
>  
> -				// Save current position and parse next token.
> -				int old = reader.Position;
> -				int old_ref_line = ref_line;
> -				int old_col = col;
> -
> -				// disable preprocessing directives when peeking
> -				process_directives = false;
> +				// Save next token.
> +				Location cur_loc = current_location;
>  				int new_token = token ();
> -				process_directives = true;
> -				reader.Position = old;
> -				ref_line = old_ref_line;
> -				col = old_col;
> -				putback_char = -1;
> +				saved_token = new SavedToken (new_token, val, Location);
> +				current_location = cur_loc;
>  
>  				if (new_token == Token.OPEN_PARENS)
>  					return Token.CLOSE_PARENS_OPEN_PARENS;
> @@ -658,7 +652,7 @@
>  
>  		public void Deambiguate_CloseParens ()
>  		{
> -			putback (')');
> +			putback_ambiguous_close_parens = true;
>  			deambiguate_close_parens++;
>  		}
>  
> @@ -1087,6 +1081,10 @@
>  		int getChar ()
>  		{
>  			int x;
> +			if (putback_ambiguous_close_parens) {
> +				putback_ambiguous_close_parens = false;
> +				return ')';
> +			}
>  			if (putback_char != -1) {
>  				x = putback_char;
>  				putback_char = -1;
> @@ -1106,6 +1104,8 @@
>  
>  		int peekChar ()
>  		{
> +			if (putback_ambiguous_close_parens)
> +				return ')';
>  			if (putback_char != -1)
>  				return putback_char;
>  			putback_char = reader.Read ();
> @@ -1114,6 +1114,8 @@
>  
>  		int peekChar2 ()
>  		{
> +			if (putback_ambiguous_close_parens)
> +				return ')';
>  			if (putback_char != -1)
>  				return putback_char;
>  			return reader.Peek ();
> @@ -1202,7 +1204,14 @@
>  
>  		public int token ()
>                  {
> -			current_token = xtoken ();
> +			if (!saved_token.Location.IsNull) {
> +				current_token = saved_token.Token;
> +				val = saved_token.Value;
> +				current_location = saved_token.Location;
> +				saved_token = SavedToken.Null;
> +			}
> +			else
> +				current_token = xtoken ();
>                          return current_token;
>                  }
>  
> @@ -1844,25 +1853,16 @@
>  			}
>  
>  			if (res == Token.PARTIAL) {
> -				// Save current position and parse next token.
> -				int old = reader.Position;
> -				int old_putback = putback_char;
> -				int old_ref_line = ref_line;
> -				int old_col = col;
> -
> -				putback_char = -1;
> -
> +				// Save next token.
> +				Location cur_loc = Location;
>  				int next_token = token ();
> +				saved_token = new SavedToken (next_token, val, Location);
> +				current_location = cur_loc;
>  				bool ok = (next_token == Token.CLASS) ||
>  					(next_token == Token.STRUCT) ||
>  					(next_token == Token.INTERFACE) ||
>  					(next_token == Token.ENUM); // "partial" is a keyword in 'partial enum', even though it's not valid
>  
> -				reader.Position = old;
> -				ref_line = old_ref_line;
> -				col = old_col;
> -				putback_char = old_putback;
> -
>  				if (ok)
>  					return res;
>  				else {
> @@ -2309,6 +2309,23 @@
>  			}
>  				
>  		}
> +
> +		public struct SavedToken
> +		{
> +			public static readonly SavedToken Null =
> +				new SavedToken (0, null, Location.Null);
> +
> +			public readonly int Token;
> +			public readonly object Value;
> +			public readonly Location Location;
> +
> +			public SavedToken (int token, object value, Location loc)
> +			{
> +				Token = token;
> +				Value = value;
> +				Location = loc;
> +			}
> +		}
>  	}
>  
>  	//
> Index: cs-parser.jay
> ===================================================================
> --- cs-parser.jay	(revision 49144)
> +++ cs-parser.jay	(working copy)
> @@ -4856,7 +4856,7 @@
>  	}
>  }		   
>  
> -public CSharpParser (SeekableStreamReader reader, SourceFile file, ArrayList defines)
> +public CSharpParser (StreamReader reader, SourceFile file, ArrayList defines)
>  {
>  	current_namespace = new NamespaceEntry (null, file, null, Location.Null);
>  	this.name = file.Name;
> Index: driver.cs
> ===================================================================
> --- driver.cs	(revision 49144)
> +++ driver.cs	(working copy)
> @@ -153,7 +153,7 @@
>  			}
>  
>  			using (input){
> -				SeekableStreamReader reader = new SeekableStreamReader (input, encoding);
> +				StreamReader reader = new StreamReader (input, encoding, true);
>  				Tokenizer lexer = new Tokenizer (reader, file, defines);
>  				int token, tokens = 0, errors = 0;
>  
> @@ -181,16 +181,16 @@
>  				return;
>  			}
>  
> -			SeekableStreamReader reader = new SeekableStreamReader (input, encoding);
> -
>  			// Check 'MZ' header
> -			if (reader.Read () == 77 && reader.Read () == 90) {
> +			if (input.ReadByte () == 77 && input.ReadByte () == 90) {
>  				Report.Error (2015, "Source file `{0}' is a binary file and not a text file", file.Name);
>  				input.Close ();
>  				return;
>  			}
> +			input.Seek (0, SeekOrigin.Begin);
>  
> -			reader.Position = 0;
> +			StreamReader reader = new StreamReader (input, encoding, true);
> +
>  			parser = new CSharpParser (reader, file, defines);
>  			parser.ErrorOutput = Report.Stderr;
>  			try {
-- 
Miguel de Icaza <miguel at ximian.com>



More information about the Mono-devel-list mailing list