/* |
|
* Copyright (c) 1995, 2001, Oracle and/or its affiliates. All rights reserved. |
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
* |
|
* This code is free software; you can redistribute it and/or modify it |
|
* under the terms of the GNU General Public License version 2 only, as |
|
* published by the Free Software Foundation. Oracle designates this |
|
* particular file as subject to the "Classpath" exception as provided |
|
* by Oracle in the LICENSE file that accompanied this code. |
|
* |
|
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
* version 2 for more details (a copy is included in the LICENSE file that |
|
* accompanied this code). |
|
* |
|
* You should have received a copy of the GNU General Public License version |
|
* 2 along with this work; if not, write to the Free Software Foundation, |
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
* |
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
* or visit www.oracle.com if you need additional information or have any |
|
* questions. |
|
*/ |
|
package sun.misc; |
|
import java.io.*; |
|
/** |
|
* A class to represent a pool of regular expressions. A string |
|
* can be matched against the whole pool all at once. It is much |
|
* faster than doing individual regular expression matches one-by-one. |
|
* |
|
* @see java.misc.RegexpTarget |
|
* @author James Gosling |
|
*/ |
|
public class RegexpPool { |
|
private RegexpNode prefixMachine = new RegexpNode(); |
|
private RegexpNode suffixMachine = new RegexpNode(); |
|
private static final int BIG = 0x7FFFFFFF; |
|
private int lastDepth = BIG; |
|
public RegexpPool () { |
|
} |
|
/** |
|
* Add a regular expression to the pool of regular expressions. |
|
* @param re The regular expression to add to the pool. |
|
For now, only handles strings that either begin or end with |
|
a '*'. |
|
* @param ret The object to be returned when this regular expression is |
|
matched. If ret is an instance of the RegexpTarget class, ret.found |
|
is called with the string fragment that matched the '*' as its |
|
parameter. |
|
* @exception REException error |
|
*/ |
|
public void add(String re, Object ret) throws REException { |
|
add(re, ret, false); |
|
} |
|
/** |
|
* Replace the target for the regular expression with a different |
|
* target. |
|
* |
|
* @param re The regular expression to be replaced in the pool. |
|
* For now, only handles strings that either begin or end with |
|
* a '*'. |
|
* @param ret The object to be returned when this regular expression is |
|
* matched. If ret is an instance of the RegexpTarget class, ret.found |
|
* is called with the string fragment that matched the '*' as its |
|
* parameter. |
|
*/ |
|
public void replace(String re, Object ret) { |
|
try { |
|
add(re, ret, true); |
|
} catch(Exception e) { |
|
// should never occur if replace is true |
|
} |
|
} |
|
/** |
|
* Delete the regular expression and its target. |
|
* @param re The regular expression to be deleted from the pool. |
|
* must begin or end with a '*' |
|
* @return target - the old target. |
|
*/ |
|
public Object delete(String re) { |
|
Object o = null; |
|
RegexpNode p = prefixMachine; |
|
RegexpNode best = p; |
|
int len = re.length() - 1; |
|
int i; |
|
boolean prefix = true; |
|
if (!re.startsWith("*") || |
|
!re.endsWith("*")) |
|
len++; |
|
if (len <= 0) |
|
return null; |
|
/* March forward through the prefix machine */ |
|
for (i = 0; p != null; i++) { |
|
if (p.result != null && p.depth < BIG |
|
&& (!p.exact || i == len)) { |
|
best = p; |
|
} |
|
if (i >= len) |
|
break; |
|
p = p.find(re.charAt(i)); |
|
} |
|
/* march backward through the suffix machine */ |
|
p = suffixMachine; |
|
for (i = len; --i >= 0 && p != null;) { |
|
if (p.result != null && p.depth < BIG) { |
|
prefix = false; |
|
best = p; |
|
} |
|
p = p.find(re.charAt(i)); |
|
} |
|
// delete only if there is an exact match |
|
if (prefix) { |
|
if (re.equals(best.re)) { |
|
o = best.result; |
|
best.result = null; |
|
} |
|
} else { |
|
if (re.equals(best.re)) { |
|
o = best.result; |
|
best.result = null; |
|
} |
|
} |
|
return o; |
|
} |
|
/** Search for a match to a string & return the object associated |
|
with it with the match. When multiple regular expressions |
|
would match the string, the best match is returned first. |
|
The next best match is returned the next time matchNext is |
|
called. |
|
@param s The string to match against the regular expressions |
|
in the pool. |
|
@return null on failure, otherwise the object associated with |
|
the regular expression when it was added to the pool. |
|
If the object is an instance of RegexpTarget, then |
|
the return value is the result from calling |
|
return.found(string_that_matched_wildcard). |
|
*/ |
|
public Object match(String s) { |
|
return matchAfter(s, BIG); |
|
} |
|
/** Identical to match except that it will only find matches to |
|
regular expressions that were added to the pool <i>after</i> |
|
the last regular expression that matched in the last call |
|
to match() or matchNext() */ |
|
public Object matchNext(String s) { |
|
return matchAfter(s, lastDepth); |
|
} |
|
private void add(String re, Object ret, boolean replace) throws REException { |
|
int len = re.length(); |
|
RegexpNode p; |
|
if (re.charAt(0) == '*') { |
|
p = suffixMachine; |
|
while (len > 1) |
|
p = p.add(re.charAt(--len)); |
|
} else { |
|
boolean exact = false; |
|
if (re.charAt(len - 1) == '*') |
|
len--; |
|
else |
|
exact = true; |
|
p = prefixMachine; |
|
for (int i = 0; i < len; i++) |
|
p = p.add(re.charAt(i)); |
|
p.exact = exact; |
|
} |
|
if (p.result != null && !replace) |
|
throw new REException(re + " is a duplicate"); |
|
p.re = re; |
|
p.result = ret; |
|
} |
|
private Object matchAfter(String s, int lastMatchDepth) { |
|
RegexpNode p = prefixMachine; |
|
RegexpNode best = p; |
|
int bst = 0; |
|
int bend = 0; |
|
int len = s.length(); |
|
int i; |
|
if (len <= 0) |
|
return null; |
|
/* March forward through the prefix machine */ |
|
for (i = 0; p != null; i++) { |
|
if (p.result != null && p.depth < lastMatchDepth |
|
&& (!p.exact || i == len)) { |
|
lastDepth = p.depth; |
|
best = p; |
|
bst = i; |
|
bend = len; |
|
} |
|
if (i >= len) |
|
break; |
|
p = p.find(s.charAt(i)); |
|
} |
|
/* march backward through the suffix machine */ |
|
p = suffixMachine; |
|
for (i = len; --i >= 0 && p != null;) { |
|
if (p.result != null && p.depth < lastMatchDepth) { |
|
lastDepth = p.depth; |
|
best = p; |
|
bst = 0; |
|
bend = i+1; |
|
} |
|
p = p.find(s.charAt(i)); |
|
} |
|
Object o = best.result; |
|
if (o != null && o instanceof RegexpTarget) |
|
o = ((RegexpTarget) o).found(s.substring(bst, bend)); |
|
return o; |
|
} |
|
/** Resets the pool so that the next call to matchNext looks |
|
at all regular expressions in the pool. match(s); is equivalent |
|
to reset(); matchNext(s); |
|
<p><b>Multithreading note:</b> reset/nextMatch leave state in the |
|
regular expression pool. If multiple threads could be using this |
|
pool this way, they should be syncronized to avoid race hazards. |
|
match() was done in such a way that there are no such race |
|
hazards: multiple threads can be matching in the same pool |
|
simultaneously. */ |
|
public void reset() { |
|
lastDepth = BIG; |
|
} |
|
/** Print this pool to standard output */ |
|
public void print(PrintStream out) { |
|
out.print("Regexp pool:\n"); |
|
if (suffixMachine.firstchild != null) { |
|
out.print(" Suffix machine: "); |
|
suffixMachine.firstchild.print(out); |
|
out.print("\n"); |
|
} |
|
if (prefixMachine.firstchild != null) { |
|
out.print(" Prefix machine: "); |
|
prefixMachine.firstchild.print(out); |
|
out.print("\n"); |
|
} |
|
} |
|
} |
|
/* A node in a regular expression finite state machine. */ |
|
class RegexpNode { |
|
char c; |
|
RegexpNode firstchild; |
|
RegexpNode nextsibling; |
|
int depth; |
|
boolean exact; |
|
Object result; |
|
String re = null; |
|
RegexpNode () { |
|
c = '#'; |
|
depth = 0; |
|
} |
|
RegexpNode (char C, int depth) { |
|
c = C; |
|
this.depth = depth; |
|
} |
|
RegexpNode add(char C) { |
|
RegexpNode p = firstchild; |
|
if (p == null) |
|
p = new RegexpNode (C, depth+1); |
|
else { |
|
while (p != null) |
|
if (p.c == C) |
|
return p; |
|
else |
|
p = p.nextsibling; |
|
p = new RegexpNode (C, depth+1); |
|
p.nextsibling = firstchild; |
|
} |
|
firstchild = p; |
|
return p; |
|
} |
|
RegexpNode find(char C) { |
|
for (RegexpNode p = firstchild; |
|
p != null; |
|
p = p.nextsibling) |
|
if (p.c == C) |
|
return p; |
|
return null; |
|
} |
|
void print(PrintStream out) { |
|
if (nextsibling != null) { |
|
RegexpNode p = this; |
|
out.print("("); |
|
while (p != null) { |
|
out.write(p.c); |
|
if (p.firstchild != null) |
|
p.firstchild.print(out); |
|
p = p.nextsibling; |
|
out.write(p != null ? '|' : ')'); |
|
} |
|
} else { |
|
out.write(c); |
|
if (firstchild != null) |
|
firstchild.print(out); |
|
} |
|
} |
|
} |