227 lines
7.1 KiB
Java
227 lines
7.1 KiB
Java
/* GENERATED SOURCE. DO NOT MODIFY. */
|
|
// © 2017 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
package android.icu.impl;
|
|
|
|
import android.icu.lang.UCharacter;
|
|
import android.icu.text.UnicodeSet;
|
|
|
|
/**
|
|
* A mutable String wrapper with a variable offset and length and
|
|
* support for case folding. The charAt, length, and subSequence methods all
|
|
* operate relative to the fixed offset into the String.
|
|
*
|
|
* Intended to be useful for parsing.
|
|
*
|
|
* CAUTION: Since this class is mutable, it must not be used anywhere that an
|
|
* immutable object is required, like in a cache or as the key of a hash map.
|
|
*
|
|
* @author sffc (Shane Carr)
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public class StringSegment implements CharSequence {
|
|
private final String str;
|
|
private int start;
|
|
private int end;
|
|
private boolean foldCase;
|
|
|
|
public StringSegment(String str, boolean foldCase) {
|
|
this.str = str;
|
|
this.start = 0;
|
|
this.end = str.length();
|
|
this.foldCase = foldCase;
|
|
}
|
|
|
|
public int getOffset() {
|
|
return start;
|
|
}
|
|
|
|
public void setOffset(int start) {
|
|
assert start <= end;
|
|
this.start = start;
|
|
}
|
|
|
|
/**
|
|
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
|
|
*
|
|
* <p>
|
|
* Number parsing note: This method is usually called by a Matcher to register that a char was
|
|
* consumed. If the char is strong (it usually is, except for things like whitespace), follow this
|
|
* with a call to ParsedNumber#setCharsConsumed(). For more information on strong chars, see that
|
|
* method.
|
|
*/
|
|
public void adjustOffset(int delta) {
|
|
assert start + delta >= 0;
|
|
assert start + delta <= end;
|
|
start += delta;
|
|
}
|
|
|
|
/**
|
|
* Adjusts the offset by the width of the current lead code point, either 1 or 2 chars.
|
|
*/
|
|
public void adjustOffsetByCodePoint() {
|
|
start += Character.charCount(getCodePoint());
|
|
}
|
|
|
|
public void setLength(int length) {
|
|
assert length >= 0;
|
|
assert start + length <= str.length();
|
|
end = start + length;
|
|
}
|
|
|
|
public void resetLength() {
|
|
end = str.length();
|
|
}
|
|
|
|
@Override
|
|
public int length() {
|
|
return end - start;
|
|
}
|
|
|
|
@Override
|
|
public char charAt(int index) {
|
|
return str.charAt(index + start);
|
|
}
|
|
|
|
@Override
|
|
public CharSequence subSequence(int start, int end) {
|
|
return str.subSequence(start + this.start, end + this.start);
|
|
}
|
|
|
|
/**
|
|
* Returns the first code point in the string segment.
|
|
*
|
|
* <p>
|
|
* <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles
|
|
* case folding logic, instead of this method.
|
|
*/
|
|
public int getCodePoint() {
|
|
assert start < end;
|
|
char lead = str.charAt(start);
|
|
char trail;
|
|
if (Character.isHighSurrogate(lead)
|
|
&& start + 1 < end
|
|
&& Character.isLowSurrogate(trail = str.charAt(start + 1))) {
|
|
return Character.toCodePoint(lead, trail);
|
|
}
|
|
return lead;
|
|
}
|
|
|
|
/**
|
|
* Returns the code point at the given index relative to the current offset.
|
|
*/
|
|
public int codePointAt(int index) {
|
|
return str.codePointAt(start + index);
|
|
}
|
|
|
|
/**
|
|
* Returns true if the first code point of this StringSegment equals the given code point.
|
|
*
|
|
* <p>
|
|
* This method will perform case folding if case folding is enabled for the parser.
|
|
*/
|
|
public boolean startsWith(int otherCp) {
|
|
return codePointsEqual(getCodePoint(), otherCp, foldCase);
|
|
}
|
|
|
|
/**
|
|
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
|
|
*/
|
|
public boolean startsWith(UnicodeSet uniset) {
|
|
// TODO: Move UnicodeSet case-folding logic here.
|
|
// TODO: Handle string matches here instead of separately.
|
|
int cp = getCodePoint();
|
|
if (cp == -1) {
|
|
return false;
|
|
}
|
|
return uniset.contains(cp);
|
|
}
|
|
|
|
/**
|
|
* Returns true if there is at least one code point of overlap between this StringSegment and the
|
|
* given CharSequence. Null-safe.
|
|
*/
|
|
public boolean startsWith(CharSequence other) {
|
|
if (other == null || other.length() == 0 || length() == 0) {
|
|
return false;
|
|
}
|
|
int cp1 = Character.codePointAt(this, 0);
|
|
int cp2 = Character.codePointAt(other, 0);
|
|
return codePointsEqual(cp1, cp2, foldCase);
|
|
}
|
|
|
|
/**
|
|
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
|
|
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
|
* since the first 2 characters are the same.
|
|
*
|
|
* <p>
|
|
* This method only returns offsets along code point boundaries.
|
|
*
|
|
* <p>
|
|
* This method will perform case folding if case folding was enabled in the constructor.
|
|
*
|
|
* <p>
|
|
* IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check.
|
|
*/
|
|
public int getCommonPrefixLength(CharSequence other) {
|
|
return getPrefixLengthInternal(other, foldCase);
|
|
}
|
|
|
|
/**
|
|
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding was
|
|
* enabled in the constructor.
|
|
*/
|
|
public int getCaseSensitivePrefixLength(CharSequence other) {
|
|
return getPrefixLengthInternal(other, false);
|
|
}
|
|
|
|
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
|
|
assert other.length() != 0;
|
|
int offset = 0;
|
|
for (; offset < Math.min(length(), other.length());) {
|
|
// TODO: case-fold code points, not chars
|
|
int cp1 = Character.codePointAt(this, offset);
|
|
int cp2 = Character.codePointAt(other, offset);
|
|
if (!codePointsEqual(cp1, cp2, foldCase)) {
|
|
break;
|
|
}
|
|
offset += Character.charCount(cp1);
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
|
|
if (cp1 == cp2) {
|
|
return true;
|
|
}
|
|
if (!foldCase) {
|
|
return false;
|
|
}
|
|
cp1 = UCharacter.foldCase(cp1, true);
|
|
cp2 = UCharacter.foldCase(cp2, true);
|
|
return cp1 == cp2;
|
|
}
|
|
|
|
/**
|
|
* Returns true if this segment contains the same characters as the other CharSequence.
|
|
*
|
|
* <p>This method does not perform case folding; if you want case-insensitive equality, use
|
|
* {@link #getCommonPrefixLength}.
|
|
*/
|
|
public boolean contentEquals(CharSequence other) {
|
|
return Utility.charSequenceEquals(this, other);
|
|
}
|
|
|
|
/** Returns a string representation useful for debugging. */
|
|
@Override
|
|
public String toString() {
|
|
return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
|
|
}
|
|
|
|
/** Returns a String that is equivalent to the CharSequence representation. */
|
|
public String asString() {
|
|
return str.substring(start, end);
|
|
}
|
|
}
|