[Mono-list] PATCH: ignore upper case in System.Text.RegularExpressions
Marco Craveiro
marco_craveiro@oceanus.plus.com
11 Jan 2004 13:08:50 +0000
--=-CGfPbmcm+xz8m6Z6FfKa
Content-Type: text/plain
Content-Transfer-Encoding: 7bit
hello all,
this is my first attempt at fixing bugs #45966 and #45976, so i'm
posting the list first for comments; i'll attach it to the bug reports
once everyone is happy. i'm sure the code is not the best ever written
:-) but according to my tests it fixes the problems reported - which are
related to ignoring case on intervals. a couple of questions though:
- can you declare statics on a method? if yes, how? i would like to
remove the two member variables i've added since they are only used
within AddRange.
- does anyone know of a test suite of regexp tests done for another
language? any language would do as long as the tests are fairly
complete. i would like to update RegexTest.cs (hopefully that's our unit
test for this subsystem) but unfortunately my knowledge of regexps is
limited.
thanks for your time,
marco
--
serenese y apunte bien! va usted a matar un hombre! -- ultimas palavras
de che guevara
--=-CGfPbmcm+xz8m6Z6FfKa
Content-Disposition: attachment; filename=regular_expressions.patch
Content-Type: text/plain; name=regular_expressions.patch; charset=UTF-8
Content-Transfer-Encoding: 7bit
--- interval.cs.~1.1.~ 2002-01-31 08:00:16.000000000 +0000
+++ interval.cs 2004-01-10 19:58:53.000000000 +0000
@@ -95,6 +95,14 @@
return low <= i && i <= high;
}
+ public bool Intersects (Interval i) {
+ if (IsEmpty || i.IsEmpty)
+ return false;
+
+ return ((Contains (i.low) && !Contains (i.high)) ||
+ (Contains (i.high) && !Contains (i.low)));
+ }
+
public void Merge (Interval i) {
if (i.IsEmpty)
return;
--- syntax.cs.~1.1.~ 2002-01-31 08:00:16.000000000 +0000
+++ syntax.cs 2004-01-11 12:51:32.000000000 +0000
@@ -779,11 +779,39 @@
}
public void AddCharacter (char c) {
- intervals.Add (new Interval (c, c));
+ // TODO: this is certainly not the most efficient way of doing things
+ // TODO: but at least it produces correct results.
+ AddRange (c, c);
}
public void AddRange (char lo, char hi) {
- intervals.Add (new Interval (lo, hi));
+ Interval new_interval = new Interval (lo, hi);
+
+ // ignore case is on. we must make sure our interval does not
+ // use upper case. if it does, we must normalize the upper case
+ // characters into lower case.
+ if (ignore) {
+ if (upper_case_characters.Intersects (new_interval)) {
+ Interval partial_new_interval;
+
+ if (new_interval.low < upper_case_characters.low) {
+ partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
+ new_interval.high + distance_between_upper_and_lower_case);
+ new_interval.high = upper_case_characters.low - 1;
+ }
+ else {
+ partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
+ upper_case_characters.high + distance_between_upper_and_lower_case);
+ new_interval.low = upper_case_characters.high + 1;
+ }
+ intervals.Add (partial_new_interval);
+ }
+ else if (upper_case_characters.Contains (new_interval)) {
+ new_interval.high += distance_between_upper_and_lower_case;
+ new_interval.low += distance_between_upper_and_lower_case;
+ }
+ }
+ intervals.Add (new_interval);
}
public override void Compile (ICompiler cmp, bool reverse) {
@@ -871,6 +899,8 @@
return 3; // Range
}
+ private static Interval upper_case_characters = new Interval ((char)65, (char)90);
+ private const int distance_between_upper_and_lower_case = 32;
private bool negate, ignore;
private bool[] pos_cats, neg_cats;
private IntervalCollection intervals;
--=-CGfPbmcm+xz8m6Z6FfKa--