[Mono-list] PATCH: ignore upper case in System.Text.RegularExpressions

Marco Craveiro marco_craveiro@oceanus.plus.com
11 Jan 2004 13:08:50 +0000


--=-CGfPbmcm+xz8m6Z6FfKa
Content-Type: text/plain
Content-Transfer-Encoding: 7bit

hello all,

this is my first attempt at fixing bugs #45966 and #45976, so i'm
posting the list first for comments; i'll attach it to the bug reports
once everyone is happy. i'm sure the code is not the best ever written
:-) but according to my tests it fixes the problems reported - which are
related to ignoring case on intervals. a couple of questions though:

- can you declare statics on a method? if yes, how? i would like to
remove the two member variables i've added since they are only used
within AddRange.

- does anyone know of a test suite of regexp tests done for another
language? any language would do as long as the tests are fairly
complete. i would like to update RegexTest.cs (hopefully that's our unit
test for this subsystem) but unfortunately my knowledge of regexps is 
limited.

thanks for your time,

marco
-- 
serenese y apunte bien! va usted a matar un hombre! -- ultimas palavras
de che guevara

--=-CGfPbmcm+xz8m6Z6FfKa
Content-Disposition: attachment; filename=regular_expressions.patch
Content-Type: text/plain; name=regular_expressions.patch; charset=UTF-8
Content-Transfer-Encoding: 7bit

--- interval.cs.~1.1.~	2002-01-31 08:00:16.000000000 +0000
+++ interval.cs	2004-01-10 19:58:53.000000000 +0000
@@ -95,6 +95,14 @@
 			return low <= i && i <= high;
 		}
 
+		public bool Intersects (Interval i) {
+			if (IsEmpty || i.IsEmpty)
+				return false;
+
+			return ((Contains (i.low) && !Contains (i.high)) ||
+					  (Contains (i.high) && !Contains (i.low)));
+		}
+
 		public void Merge (Interval i) {
 			if (i.IsEmpty)
 				return;
--- syntax.cs.~1.1.~	2002-01-31 08:00:16.000000000 +0000
+++ syntax.cs	2004-01-11 12:51:32.000000000 +0000
@@ -779,11 +779,39 @@
 		}
 
 		public void AddCharacter (char c) {
-			intervals.Add (new Interval (c, c));
+			// TODO: this is certainly not the most efficient way of doing things 
+			// TODO: but at least it produces correct results. 
+			AddRange (c, c);
 		}
 
 		public void AddRange (char lo, char hi) {
-			intervals.Add (new Interval (lo, hi));
+			Interval new_interval = new Interval (lo, hi);
+
+			// ignore case is on. we must make sure our interval does not
+			// use upper case. if it does, we must normalize the upper case
+			// characters into lower case. 
+			if (ignore) {
+				if (upper_case_characters.Intersects (new_interval)) {
+					Interval partial_new_interval;
+
+					if (new_interval.low < upper_case_characters.low) {
+						partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case, 
+																		 new_interval.high + distance_between_upper_and_lower_case);
+						new_interval.high = upper_case_characters.low - 1;
+					}
+					else {
+						partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case, 
+																	 	 upper_case_characters.high + distance_between_upper_and_lower_case);
+						new_interval.low = upper_case_characters.high + 1;
+					}
+					intervals.Add (partial_new_interval);
+				}
+				else if (upper_case_characters.Contains (new_interval)) {
+					new_interval.high += distance_between_upper_and_lower_case;
+					new_interval.low += distance_between_upper_and_lower_case;
+				}
+			}
+			intervals.Add (new_interval);
 		}
 
 		public override void Compile (ICompiler cmp, bool reverse) {
@@ -871,6 +899,8 @@
 				return 3;					// Range
 		}
 
+		private static Interval upper_case_characters = new Interval ((char)65, (char)90);
+		private const int distance_between_upper_and_lower_case = 32;
 		private bool negate, ignore;
 		private bool[] pos_cats, neg_cats;
 		private IntervalCollection intervals;

--=-CGfPbmcm+xz8m6Z6FfKa--