300 lines
10 KiB
Java
300 lines
10 KiB
Java
![]() |
/* GENERATED SOURCE. DO NOT MODIFY. */
|
||
|
// © 2016 and later: Unicode, Inc. and others.
|
||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||
|
/*
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 2001-2004, International Business Machines Corporation and *
|
||
|
* others. All Rights Reserved. *
|
||
|
*******************************************************************************
|
||
|
*/
|
||
|
package android.icu.text;
|
||
|
import android.icu.impl.Utility;
|
||
|
|
||
|
/**
|
||
|
* An object that matches a fixed input string, implementing the
|
||
|
* UnicodeMatcher API. This object also implements the
|
||
|
* UnicodeReplacer API, allowing it to emit the matched text as
|
||
|
* output. Since the match text may contain flexible match elements,
|
||
|
* such as UnicodeSets, the emitted text is not the match pattern, but
|
||
|
* instead a substring of the actual matched text. Following
|
||
|
* convention, the output text is the leftmost match seen up to this
|
||
|
* point.
|
||
|
*
|
||
|
* A StringMatcher may represent a segment, in which case it has a
|
||
|
* positive segment number. This affects how the matcher converts
|
||
|
* itself to a pattern but does not otherwise affect its function.
|
||
|
*
|
||
|
* A StringMatcher that is not a segment should not be used as a
|
||
|
* UnicodeReplacer.
|
||
|
*/
|
||
|
class StringMatcher implements UnicodeMatcher, UnicodeReplacer {
|
||
|
|
||
|
/**
|
||
|
* The text to be matched.
|
||
|
*/
|
||
|
private String pattern;
|
||
|
|
||
|
/**
|
||
|
* Start offset, in the match text, of the <em>rightmost</em>
|
||
|
* match.
|
||
|
*/
|
||
|
private int matchStart;
|
||
|
|
||
|
/**
|
||
|
* Limit offset, in the match text, of the <em>rightmost</em>
|
||
|
* match.
|
||
|
*/
|
||
|
private int matchLimit;
|
||
|
|
||
|
/**
|
||
|
* The segment number, 1-based, or 0 if not a segment.
|
||
|
*/
|
||
|
private int segmentNumber;
|
||
|
|
||
|
/**
|
||
|
* Context object that maps stand-ins to matcher and replacer
|
||
|
* objects.
|
||
|
*/
|
||
|
private final RuleBasedTransliterator.Data data;
|
||
|
|
||
|
/**
|
||
|
* Construct a matcher that matches the given pattern string.
|
||
|
* @param theString the pattern to be matched, possibly containing
|
||
|
* stand-ins that represent nested UnicodeMatcher objects.
|
||
|
* @param segmentNum the segment number from 1..n, or 0 if this is
|
||
|
* not a segment.
|
||
|
* @param theData context object mapping stand-ins to
|
||
|
* UnicodeMatcher objects.
|
||
|
*/
|
||
|
public StringMatcher(String theString,
|
||
|
int segmentNum,
|
||
|
RuleBasedTransliterator.Data theData) {
|
||
|
data = theData;
|
||
|
pattern = theString;
|
||
|
matchStart = matchLimit = -1;
|
||
|
segmentNumber = segmentNum;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Construct a matcher that matches a substring of the given
|
||
|
* pattern string.
|
||
|
* @param theString the pattern to be matched, possibly containing
|
||
|
* stand-ins that represent nested UnicodeMatcher objects.
|
||
|
* @param start first character of theString to be matched
|
||
|
* @param limit index after the last character of theString to be
|
||
|
* matched.
|
||
|
* @param segmentNum the segment number from 1..n, or 0 if this is
|
||
|
* not a segment.
|
||
|
* @param theData context object mapping stand-ins to
|
||
|
* UnicodeMatcher objects.
|
||
|
*/
|
||
|
public StringMatcher(String theString,
|
||
|
int start,
|
||
|
int limit,
|
||
|
int segmentNum,
|
||
|
RuleBasedTransliterator.Data theData) {
|
||
|
this(theString.substring(start, limit), segmentNum, theData);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Implement UnicodeMatcher
|
||
|
*/
|
||
|
@Override
|
||
|
public int matches(Replaceable text,
|
||
|
int[] offset,
|
||
|
int limit,
|
||
|
boolean incremental) {
|
||
|
// Note (1): We process text in 16-bit code units, rather than
|
||
|
// 32-bit code points. This works because stand-ins are
|
||
|
// always in the BMP and because we are doing a literal match
|
||
|
// operation, which can be done 16-bits at a time.
|
||
|
int i;
|
||
|
int[] cursor = new int[] { offset[0] };
|
||
|
if (limit < cursor[0]) {
|
||
|
// Match in the reverse direction
|
||
|
for (i=pattern.length()-1; i>=0; --i) {
|
||
|
char keyChar = pattern.charAt(i); // OK; see note (1) above
|
||
|
UnicodeMatcher subm = data.lookupMatcher(keyChar);
|
||
|
if (subm == null) {
|
||
|
if (cursor[0] > limit &&
|
||
|
keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
|
||
|
--cursor[0];
|
||
|
} else {
|
||
|
return U_MISMATCH;
|
||
|
}
|
||
|
} else {
|
||
|
int m =
|
||
|
subm.matches(text, cursor, limit, incremental);
|
||
|
if (m != U_MATCH) {
|
||
|
return m;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Record the match position, but adjust for a normal
|
||
|
// forward start, limit, and only if a prior match does not
|
||
|
// exist -- we want the rightmost match.
|
||
|
if (matchStart < 0) {
|
||
|
matchStart = cursor[0]+1;
|
||
|
matchLimit = offset[0]+1;
|
||
|
}
|
||
|
} else {
|
||
|
for (i=0; i<pattern.length(); ++i) {
|
||
|
if (incremental && cursor[0] == limit) {
|
||
|
// We've reached the context limit without a mismatch and
|
||
|
// without completing our match.
|
||
|
return U_PARTIAL_MATCH;
|
||
|
}
|
||
|
char keyChar = pattern.charAt(i); // OK; see note (1) above
|
||
|
UnicodeMatcher subm = data.lookupMatcher(keyChar);
|
||
|
if (subm == null) {
|
||
|
// Don't need the cursor < limit check if
|
||
|
// incremental is true (because it's done above); do need
|
||
|
// it otherwise.
|
||
|
if (cursor[0] < limit &&
|
||
|
keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
|
||
|
++cursor[0];
|
||
|
} else {
|
||
|
return U_MISMATCH;
|
||
|
}
|
||
|
} else {
|
||
|
int m =
|
||
|
subm.matches(text, cursor, limit, incremental);
|
||
|
if (m != U_MATCH) {
|
||
|
return m;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Record the match position
|
||
|
matchStart = offset[0];
|
||
|
matchLimit = cursor[0];
|
||
|
}
|
||
|
|
||
|
offset[0] = cursor[0];
|
||
|
return U_MATCH;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Implement UnicodeMatcher
|
||
|
*/
|
||
|
@Override
|
||
|
public String toPattern(boolean escapeUnprintable) {
|
||
|
StringBuffer result = new StringBuffer();
|
||
|
StringBuffer quoteBuf = new StringBuffer();
|
||
|
if (segmentNumber > 0) { // i.e., if this is a segment
|
||
|
result.append('(');
|
||
|
}
|
||
|
for (int i=0; i<pattern.length(); ++i) {
|
||
|
char keyChar = pattern.charAt(i); // OK; see note (1) above
|
||
|
UnicodeMatcher m = data.lookupMatcher(keyChar);
|
||
|
if (m == null) {
|
||
|
Utility.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf);
|
||
|
} else {
|
||
|
Utility.appendToRule(result, m.toPattern(escapeUnprintable),
|
||
|
true, escapeUnprintable, quoteBuf);
|
||
|
}
|
||
|
}
|
||
|
if (segmentNumber > 0) { // i.e., if this is a segment
|
||
|
result.append(')');
|
||
|
}
|
||
|
// Flush quoteBuf out to result
|
||
|
Utility.appendToRule(result, -1,
|
||
|
true, escapeUnprintable, quoteBuf);
|
||
|
return result.toString();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Implement UnicodeMatcher
|
||
|
*/
|
||
|
@Override
|
||
|
public boolean matchesIndexValue(int v) {
|
||
|
if (pattern.length() == 0) {
|
||
|
return true;
|
||
|
}
|
||
|
int c = UTF16.charAt(pattern, 0);
|
||
|
UnicodeMatcher m = data.lookupMatcher(c);
|
||
|
return (m == null) ? ((c & 0xFF) == v) : m.matchesIndexValue(v);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Implementation of UnicodeMatcher API. Union the set of all
|
||
|
* characters that may be matched by this object into the given
|
||
|
* set.
|
||
|
* @param toUnionTo the set into which to union the source characters
|
||
|
*/
|
||
|
@Override
|
||
|
public void addMatchSetTo(UnicodeSet toUnionTo) {
|
||
|
int ch;
|
||
|
for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) {
|
||
|
ch = UTF16.charAt(pattern, i);
|
||
|
UnicodeMatcher matcher = data.lookupMatcher(ch);
|
||
|
if (matcher == null) {
|
||
|
toUnionTo.add(ch);
|
||
|
} else {
|
||
|
matcher.addMatchSetTo(toUnionTo);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* UnicodeReplacer API
|
||
|
*/
|
||
|
@Override
|
||
|
public int replace(Replaceable text,
|
||
|
int start,
|
||
|
int limit,
|
||
|
int[] cursor) {
|
||
|
|
||
|
int outLen = 0;
|
||
|
|
||
|
// Copy segment with out-of-band data
|
||
|
int dest = limit;
|
||
|
// If there was no match, that means that a quantifier
|
||
|
// matched zero-length. E.g., x (a)* y matched "xy".
|
||
|
if (matchStart >= 0) {
|
||
|
if (matchStart != matchLimit) {
|
||
|
text.copy(matchStart, matchLimit, dest);
|
||
|
outLen = matchLimit - matchStart;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
text.replace(start, limit, ""); // delete original text
|
||
|
|
||
|
return outLen;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* UnicodeReplacer API
|
||
|
*/
|
||
|
@Override
|
||
|
public String toReplacerPattern(boolean escapeUnprintable) {
|
||
|
// assert(segmentNumber > 0);
|
||
|
StringBuffer rule = new StringBuffer("$");
|
||
|
Utility.appendNumber(rule, segmentNumber, 10, 1);
|
||
|
return rule.toString();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Remove any match data. This must be called before performing a
|
||
|
* set of matches with this segment.
|
||
|
*/
|
||
|
public void resetMatch() {
|
||
|
matchStart = matchLimit = -1;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Union the set of all characters that may output by this object
|
||
|
* into the given set.
|
||
|
* @param toUnionTo the set into which to union the output characters
|
||
|
*/
|
||
|
@Override
|
||
|
public void addReplacementSetTo(UnicodeSet toUnionTo) {
|
||
|
// The output of this replacer varies; it is the source text between
|
||
|
// matchStart and matchLimit. Since this varies depending on the
|
||
|
// input text, we can't compute it here. We can either do nothing
|
||
|
// or we can add ALL characters to the set. It's probably more useful
|
||
|
// to do nothing.
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//eof
|