Regexp class can be used to match a pattern against a
/// string and optionally replace the matched parts with new strings.
/// /// Regular expressions were implemented by translating Henry Spencer's /// regular expression package for tcl8.0. /// Much of the description below is copied verbatim from the tcl8.0 regsub /// manual entry. ///
/// A regular expression is zero or more branches, separated by
/// "|". It matches anything that matches one of the branches.
///
/// A branch is zero or more pieces, concatenated.
/// It matches a match for the first piece, followed by a match for the
/// second piece, etc.
///
/// A piece is an atom, possibly followed by "*", "+", or
/// "?".
/// An atom is
range (see below)
///
/// A range is a sequence of characters enclosed in "[]".
/// The range normally matches any single character from the sequence.
/// If the sequence begins with "^", the range matches any single character
/// not from the rest of the sequence.
/// If two characters in the sequence are separated by "-", this is shorthand
/// for the full list of characters between them (e.g. "[0-9]" matches any
/// decimal digit). To include a literal "]" in the sequence, make it the
/// first character (following a possible "^"). To include a literal "-",
/// make it the first or last character.
///
/// In general there may be more than one way to match a regular expression /// to an input string. For example, consider the command ///
/// String[] match = new String[2];
/// Regexp.match("(a*)b*", "aabaaabb", match);
///
/// Considering only the rules given so far, match[0] and
/// match[1] could end up with the values /// In the example from above, "(a*)b*" therefore matches exactly "aab"; the /// "(a*)" portion of the pattern is matched first and it consumes the leading /// "aa", then the "b*" portion of the pattern consumes the next "b". Or, /// consider the following example: ///
/// String match = new String[3];
/// Regexp.match("(ab|a)(b*)c", "abc", match);
///
/// After this command, match[0] will be "abc",
/// match[1] will be "ab", and match[2] will be an
/// empty string.
/// Rule 4 specifies that the "(ab|a)" component gets first shot at the input
/// string and Rule 2 specifies that the "ab" sub-expression
/// is checked before the "a" sub-expression.
/// Thus the "b" has already been claimed before the "(b*)"
/// component is checked and therefore "(b*)" must match an empty string.
/// /// Regular expression substitution matches a string against a regular /// expression, transforming the string by replacing the matched region(s) /// with new substring(s). ///
/// What gets substituted into the result is controlled by a
/// subspec. The subspec is a formatting string that specifies
/// what portions of the matched region should be substituted into the
/// result.
///
n", where n is a digit from 1 to 9,
/// is replaced with a copy of the nth subexpression.
/// backslash and "2", not the Unicode character 0002.
///
///
/// public static void
/// main(String[] args)
/// throws Exception
/// {
/// Regexp re;
/// String[] matches;
/// String s;
///
/// /*
/// * A regular expression to match the first line of a HTTP request.
/// *
/// * 1. ^ - starting at the beginning of the line
/// * 2. ([A-Z]+) - match and remember some upper case characters
/// * 3. [ \t]+ - skip blank space
/// * 4. ([^ \t]*) - match and remember up to the next blank space
/// * 5. [ \t]+ - skip more blank space
/// * 6. (HTTP/1\\.[01]) - match and remember HTTP/1.0 or HTTP/1.1
/// * 7. $ - end of string - no chars left.
/// */
///
/// s = "GET http://a.b.com:1234/index.html HTTP/1.1";
///
/// Regexp re = new Regexp("^([A-Z]+)[ \t]+([^ \t]+)[ \t]+(HTTP/1\\.[01])$");
/// String[] matches = new String[4];
/// if (re.match(s, matches)) {
/// System.out.println("METHOD " + matches[1]);
/// System.out.println("URL " + matches[2]);
/// System.out.println("VERSION " + matches[3]);
/// }
///
/// /*
/// * A regular expression to extract some simple comma-separated data,
/// * reorder some of the columns, and discard column 2.
/// */
///
/// s = "abc,def,ghi,klm,nop,pqr";
///
/// Regexp re = new Regexp("^([^,]+),([^,]+),([^,]+),(.*)");
/// System.out.println(re.sub(s, "\\3,\\1,\\4"));
/// }
///
///
/// true if the pattern must match the beginning of the
/// string, so we don't have to waste time matching against all possible
/// starting locations in the string.
/// /// It takes a certain amount of time to parse and validate a regular /// expression pattern before it can be used to perform matches /// or substitutions. If the caller caches the new Regexp object, that /// parsing time will be saved because the same Regexp can be used with /// respect to many different strings. /// ///
true then this regular expression will
/// do case-insensitive matching. If false, then
/// the matches are case-sensitive. Regular expressions
/// generated by Regexp(String) are case-sensitive.
///
/// @throws IllegalArgumentException if the pattern is malformed.
/// The detail message for the exception will be set to a
/// string indicating how the pattern was malformed.
///
public Regexp( string pat, bool ignoreCase )
{
this.ignoreCase = ignoreCase;
if ( ignoreCase )
{
pat = pat.ToLower();
}
compile( pat );
}
/// str that matched the entire
/// regular expression, or null if the string did not
/// match this regular expression.
///
/// substrs[0] is set to the range of str
/// that matched the entire regular expression.
///
/// substrs[1] is set to the range of str
/// that matched the first (leftmost) parenthesized subexpression.
/// substrs[n] is set to the range that matched the
/// nth subexpression, and so on.
///
/// If subexpression n did not match, then
/// substrs[n] is set to null. Not to
/// be confused with "", which is a valid value for a
/// subexpression that matched 0 characters.
///
/// The length that the caller should use when allocating the
/// substr array is the return value of
/// Regexp.subspecs. The array
/// can be shorter (in which case not all the information will
/// be returned), or longer (in which case the remainder of the
/// elements are initialized to null), or
/// null (to ignore the subexpressions).
///
///
str that
/// matched the regular expression. May be null.
///
///
/// true if str that matched this
/// regular expression, false otherwise.
/// If false is returned, then the contents of
/// substrs are unchanged.
///
/// /// For the indices specified below, the range extends from the character /// at the starting index up to, but not including, the character at the /// ending index. ///
/// indices[0] and indices[1] are set to
/// starting and ending indices of the range of str
/// that matched the entire regular expression.
///
/// indices[2] and indices[3] are set to the
/// starting and ending indices of the range of str that
/// matched the first (leftmost) parenthesized subexpression.
/// indices[n * 2] and indices[n * 2 + 1]
/// are set to the range that matched the nth
/// subexpression, and so on.
///
/// If subexpression n did not match, then
/// indices[n * 2] and indices[n * 2 + 1]
/// are both set to -1.
///
/// The length that the caller should use when allocating the
/// indices array is twice the return value of
/// Regexp.subspecs. The array
/// can be shorter (in which case not all the information will
/// be returned), or longer (in which case the remainder of the
/// elements are initialized to -1), or
/// null (to ignore the subexpressions).
///
///
str that
/// matched all the parts of the regular expression.
/// May be null.
///
///
/// true if the string matched the regular expression,
/// false otherwise. If false is
/// returned, then the contents of indices are
/// unchanged.
///
/// str with the string generated from
/// subspec. If no matches were found, then
/// the return value is null.
/// str with the strings generated from
/// subspec. If no matches were found, then
/// the return value is a copy of str.
/// sub and subAll. Appends to the
/// string buffer the string generated by applying the substitution
/// parameter to the matched region.
///
/// Regexp class to generate
/// the replacement string for each pattern match found in the source
/// string.
///
/// /// The implementation can use whatever rules it chooses /// to generate the replacement string. For example, here is an /// example of a filter that replaces the first 5 /// occurrences of "%XX" in a string with the ASCII character /// represented by the hex digits "XX": ///
/// String str = ...;
///
/// Regexp re = new Regexp("%[a-fA-F0-9][a-fA-F0-9]");
///
/// Regexp.Filter rf = new Regexp.Filter() {
/// int count = 5;
/// public boolean filter(Regsub rs, StringBuffer sb) {
/// String match = rs.matched();
/// int hi = Character.digit(match.charAt(1), 16);
/// int lo = Character.digit(match.charAt(2), 16);
/// sb.append((char) ((hi << 4) | lo));
/// return (--count > 0);
/// }
/// }
///
/// String result = re.sub(str, rf);
///
///
/// Regsub containing the state of the current
/// match.
///
///
/// sb
/// The string buffer that this filter should append the
/// generated string to. This string buffer actually
/// contains the results the calling Regexp has
/// generated up to this point.
///
///
/// false if no further matches should be
/// considered in this string, true to allow
/// Regexp to continue looking for further
/// matches.
///