Regexp
class can be used to match a pattern against a
/// string and optionally replace the matched parts with new strings.
/// /// Regular expressions were implemented by translating Henry Spencer's /// regular expression package for tcl8.0. /// Much of the description below is copied verbatim from the tcl8.0 regsub /// manual entry. ///
/// A regular expression is zero or more branches
, separated by
/// "|". It matches anything that matches one of the branches.
///
/// A branch is zero or more pieces
, concatenated.
/// It matches a match for the first piece, followed by a match for the
/// second piece, etc.
///
/// A piece is an atom
, possibly followed by "*", "+", or
/// "?".
/// An atom is
range
(see below)
///
/// A range
is a sequence of characters enclosed in "[]".
/// The range normally matches any single character from the sequence.
/// If the sequence begins with "^", the range matches any single character
/// not from the rest of the sequence.
/// If two characters in the sequence are separated by "-", this is shorthand
/// for the full list of characters between them (e.g. "[0-9]" matches any
/// decimal digit). To include a literal "]" in the sequence, make it the
/// first character (following a possible "^"). To include a literal "-",
/// make it the first or last character.
///
/// In general there may be more than one way to match a regular expression /// to an input string. For example, consider the command ///
/// String[] match = new String[2]; /// Regexp.match("(a*)b*", "aabaaabb", match); ////// Considering only the rules given so far,
match[0]
and
/// match[1]
could end up with the values /// In the example from above, "(a*)b*" therefore matches exactly "aab"; the /// "(a*)" portion of the pattern is matched first and it consumes the leading /// "aa", then the "b*" portion of the pattern consumes the next "b". Or, /// consider the following example: ///
/// String match = new String[3]; /// Regexp.match("(ab|a)(b*)c", "abc", match); ////// After this command,
match[0]
will be "abc",
/// match[1]
will be "ab", and match[2]
will be an
/// empty string.
/// Rule 4 specifies that the "(ab|a)" component gets first shot at the input
/// string and Rule 2 specifies that the "ab" sub-expression
/// is checked before the "a" sub-expression.
/// Thus the "b" has already been claimed before the "(b*)"
/// component is checked and therefore "(b*)" must match an empty string.
/// /// Regular expression substitution matches a string against a regular /// expression, transforming the string by replacing the matched region(s) /// with new substring(s). ///
/// What gets substituted into the result is controlled by a
/// subspec
. The subspec is a formatting string that specifies
/// what portions of the matched region should be substituted into the
/// result.
///
n
", where n
is a digit from 1 to 9,
/// is replaced with a copy of the n
th subexpression.
/// backslash
and "2", not the Unicode character 0002.
/// /// /// public static void /// main(String[] args) /// throws Exception /// { /// Regexp re; /// String[] matches; /// String s; /// /// /* /// * A regular expression to match the first line of a HTTP request. /// * /// * 1. ^ - starting at the beginning of the line /// * 2. ([A-Z]+) - match and remember some upper case characters /// * 3. [ \t]+ - skip blank space /// * 4. ([^ \t]*) - match and remember up to the next blank space /// * 5. [ \t]+ - skip more blank space /// * 6. (HTTP/1\\.[01]) - match and remember HTTP/1.0 or HTTP/1.1 /// * 7. $ - end of string - no chars left. /// */ /// /// s = "GET http://a.b.com:1234/index.html HTTP/1.1"; /// /// Regexp re = new Regexp("^([A-Z]+)[ \t]+([^ \t]+)[ \t]+(HTTP/1\\.[01])$"); /// String[] matches = new String[4]; /// if (re.match(s, matches)) { /// System.out.println("METHOD " + matches[1]); /// System.out.println("URL " + matches[2]); /// System.out.println("VERSION " + matches[3]); /// } /// /// /* /// * A regular expression to extract some simple comma-separated data, /// * reorder some of the columns, and discard column 2. /// */ /// /// s = "abc,def,ghi,klm,nop,pqr"; /// /// Regexp re = new Regexp("^([^,]+),([^,]+),([^,]+),(.*)"); /// System.out.println(re.sub(s, "\\3,\\1,\\4")); /// } ////// ///
true
if the pattern must match the beginning of the
/// string, so we don't have to waste time matching against all possible
/// starting locations in the string.
/// /// It takes a certain amount of time to parse and validate a regular /// expression pattern before it can be used to perform matches /// or substitutions. If the caller caches the new Regexp object, that /// parsing time will be saved because the same Regexp can be used with /// respect to many different strings. /// ///
true
then this regular expression will
/// do case-insensitive matching. If false
, then
/// the matches are case-sensitive. Regular expressions
/// generated by Regexp(String)
are case-sensitive.
///
/// @throws IllegalArgumentException if the pattern is malformed.
/// The detail message for the exception will be set to a
/// string indicating how the pattern was malformed.
///
public Regexp( string pat, bool ignoreCase )
{
this.ignoreCase = ignoreCase;
if ( ignoreCase )
{
pat = pat.ToLower();
}
compile( pat );
}
/// str
that matched the entire
/// regular expression, or null
if the string did not
/// match this regular expression.
///
/// substrs[0]
is set to the range of str
/// that matched the entire regular expression.
///
/// substrs[1]
is set to the range of str
/// that matched the first (leftmost) parenthesized subexpression.
/// substrs[n]
is set to the range that matched the
/// n
th subexpression, and so on.
///
/// If subexpression n
did not match, then
/// substrs[n]
is set to null
. Not to
/// be confused with "", which is a valid value for a
/// subexpression that matched 0 characters.
///
/// The length that the caller should use when allocating the
/// substr
array is the return value of
/// Regexp.subspecs
. The array
/// can be shorter (in which case not all the information will
/// be returned), or longer (in which case the remainder of the
/// elements are initialized to null
), or
/// null
(to ignore the subexpressions).
///
///
str
that
/// matched the regular expression. May be null
.
///
///
/// true
if str
that matched this
/// regular expression, false
otherwise.
/// If false
is returned, then the contents of
/// substrs
are unchanged.
///
/// /// For the indices specified below, the range extends from the character /// at the starting index up to, but not including, the character at the /// ending index. ///
/// indices[0]
and indices[1]
are set to
/// starting and ending indices of the range of str
/// that matched the entire regular expression.
///
/// indices[2]
and indices[3]
are set to the
/// starting and ending indices of the range of str
that
/// matched the first (leftmost) parenthesized subexpression.
/// indices[n * 2]
and indices[n * 2 + 1]
/// are set to the range that matched the n
th
/// subexpression, and so on.
///
/// If subexpression n
did not match, then
/// indices[n * 2]
and indices[n * 2 + 1]
/// are both set to -1
.
///
/// The length that the caller should use when allocating the
/// indices
array is twice the return value of
/// Regexp.subspecs
. The array
/// can be shorter (in which case not all the information will
/// be returned), or longer (in which case the remainder of the
/// elements are initialized to -1
), or
/// null
(to ignore the subexpressions).
///
///
str
that
/// matched all the parts of the regular expression.
/// May be null
.
///
///
/// true
if the string matched the regular expression,
/// false
otherwise. If false
is
/// returned, then the contents of indices
are
/// unchanged.
///
/// str
with the string generated from
/// subspec
. If no matches were found, then
/// the return value is null
.
/// str
with the strings generated from
/// subspec
. If no matches were found, then
/// the return value is a copy of str
.
/// sub
and subAll
. Appends to the
/// string buffer the string generated by applying the substitution
/// parameter to the matched region.
///
/// Regexp
class to generate
/// the replacement string for each pattern match found in the source
/// string.
///
/// /// The implementation can use whatever rules it chooses /// to generate the replacement string. For example, here is an /// example of a filter that replaces the first 5 /// occurrences of "%XX" in a string with the ASCII character /// represented by the hex digits "XX": ///
/// String str = ...; /// /// Regexp re = new Regexp("%[a-fA-F0-9][a-fA-F0-9]"); /// /// Regexp.Filter rf = new Regexp.Filter() { /// int count = 5; /// public boolean filter(Regsub rs, StringBuffer sb) { /// String match = rs.matched(); /// int hi = Character.digit(match.charAt(1), 16); /// int lo = Character.digit(match.charAt(2), 16); /// sb.append((char) ((hi << 4) | lo)); /// return (--count > 0); /// } /// } /// /// String result = re.sub(str, rf); ////// ///
Regsub
containing the state of the current
/// match.
///
///
/// sb
/// The string buffer that this filter should append the
/// generated string to. This string buffer actually
/// contains the results the calling Regexp
has
/// generated up to this point.
///
///
/// false
if no further matches should be
/// considered in this string, true
to allow
/// Regexp
to continue looking for further
/// matches.
///