/* * Regsub.java * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * * SCCS: %Z% %M% %I% %E% %U% */ // Included in SQLite3 port to C# for use in testharness only; 2008 Noah B Hart //$Header$ using System; namespace sunlabs.brazil.util.regexp { /// The Regsub class provides an iterator-like object to /// extract the matched and unmatched portions of a string with respect to /// a given regular expression. ///

/// After each match is found, the portions of the string already /// checked are not searched again -- searching for the next match will /// begin at the character just after where the last match ended. ///

/// Here is an example of using Regsub to replace all "%XX" sequences in /// a string with the ASCII character represented by the hex digits "XX": ///

  /// public static void
  /// main(String[] args)
  /// throws Exception
  /// {
  /// Regexp re = new Regexp("%[a-fA-F0-9][a-fA-F0-9]");
  /// Regsub rs = new Regsub(re, args[0]);
  /// 
  /// StringBuffer sb = new StringBuffer();
  /// 
  /// while (rs.nextMatch()) {
  /// sb.append(rs.skipped());
  /// 
  /// String match = rs.matched();
  /// 
  /// int hi = Character.digit(match.charAt(1), 16);
  /// int lo = Character.digit(match.charAt(2), 16);
  /// sb.append((char) ((hi << 4) | lo));
  /// }
  /// sb.append(rs.rest());
  /// 
  /// System.out.println(sb);
  /// }
  /// 
/// ///
/// Colin Stevens (colin.stevens@sun.com) /// /// 1.4, 99/10/14 /// /// /// public class Regsub { internal Regexp r; internal string str; internal int ustart; internal int mstart; internal int end; internal Regexp.Match m; /// Construct a new Regsub that can be used to step /// through the given string, finding each substring that matches /// the given regular expression. ///

/// Regexp contains two substitution methods, /// sub and subAll, that can be used instead /// of Regsub if just simple substitutions are being done. /// ///

/// r /// The compiled regular expression. /// /// /// str /// The string to search. /// /// /// /// /// /// public Regsub( Regexp r, string str ) { this.r = r; this.str = str; this.ustart = 0; this.mstart = -1; this.end = 0; } /// Searches for the next substring that matches the regular expression. /// After calling this method, the caller would call methods like /// skipped, matched, etc. to query attributes /// of the matched region. ///

/// Calling this function again will search for the next match, beginning /// at the character just after where the last match ended. /// ///

/// true if a match was found, false /// if there are no more matches. /// public bool nextMatch() { ustart = end; /* * Consume one character if the last match didn't consume any * characters, to avoid an infinite loop. */ int off = ustart; if ( off == mstart ) { off++; if ( off >= str.Length ) { return false; } } m = r.exec( str, 0, off ); if ( m == null ) { return false; } mstart = m.indices[0]; end = m.indices[1]; return true; } /// Returns a substring consisting of all the characters skipped /// between the end of the last match (or the start of the original /// search string) and the start of this match. ///

/// This method can be used extract all the portions of string that /// didn't match the regular expression. /// ///

/// The characters that didn't match. /// public string skipped() { return str.Substring( ustart, ( mstart ) - ( ustart ) ); } /// Returns a substring consisting of the characters that matched /// the entire regular expression during the last call to /// nextMatch. /// /// /// The characters that did match. /// /// /// /// public string matched() { return str.Substring( mstart, ( end ) - ( mstart ) ); } /// Returns a substring consisting of the characters that matched /// the given parenthesized subexpression during the last call to /// nextMatch. /// /// /// i /// The index of the parenthesized subexpression. /// /// /// The characters that matched the subexpression, or /// null if the given subexpression did not /// exist or did not match. /// public string submatch( int i ) { if ( i * 2 + 1 >= m.indices.Length ) { return null; } int start = m.indices[i * 2]; int end = m.indices[i * 2 + 1]; if ( ( start < 0 ) || ( end < 0 ) ) { return null; } return str.Substring( start, ( end ) - ( start ) ); } /// Returns a substring consisting of all the characters that come /// after the last match. As the matches progress, the rest /// gets shorter. When nextMatch returns false, /// then this method will return the rest of the string that can't be /// matched. /// /// /// The rest of the characters after the last match. /// public string rest() { return str.Substring( end ); } } }