1203 lines
44 KiB
Java
1203 lines
44 KiB
Java
/* GENERATED SOURCE. DO NOT MODIFY. */
|
|
// © 2018 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
// created: 2018may04 Markus W. Scherer
|
|
|
|
package android.icu.util;
|
|
|
|
import java.io.DataOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.OutputStream;
|
|
import java.nio.ByteBuffer;
|
|
import java.nio.ByteOrder;
|
|
|
|
import android.icu.impl.ICUBinary;
|
|
import android.icu.impl.Normalizer2Impl.UTF16Plus;
|
|
|
|
/**
|
|
* Immutable Unicode code point trie.
|
|
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
|
* For details see https://icu.unicode.org/design/struct/utrie
|
|
*
|
|
* <p>This class is not intended for public subclassing.
|
|
*
|
|
* @see MutableCodePointTrie
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public abstract class CodePointTrie extends CodePointMap {
|
|
/**
|
|
* Selectors for the type of a CodePointTrie.
|
|
* Different trade-offs for size vs. speed.
|
|
*
|
|
* <p>Use null for {@link #fromBinary} to accept any type;
|
|
* {@link #getType} will return the actual type.
|
|
*
|
|
* @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
|
|
* @see #fromBinary
|
|
* @see #getType
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public enum Type {
|
|
/**
|
|
* Fast/simple/larger BMP data structure.
|
|
* The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points.
|
|
*
|
|
* @see Fast
|
|
*/
|
|
FAST,
|
|
/**
|
|
* Small/slower BMP data structure.
|
|
*
|
|
* @see Small
|
|
*/
|
|
SMALL
|
|
}
|
|
|
|
/**
|
|
* Selectors for the number of bits in a CodePointTrie data value.
|
|
*
|
|
* <p>Use null for {@link #fromBinary} to accept any data value width;
|
|
* {@link #getValueWidth} will return the actual data value width.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public enum ValueWidth {
|
|
/**
|
|
* The trie stores 16 bits per data value.
|
|
* It returns them as unsigned values 0..0xffff=65535.
|
|
*/
|
|
BITS_16,
|
|
/**
|
|
* The trie stores 32 bits per data value.
|
|
*/
|
|
BITS_32,
|
|
/**
|
|
* The trie stores 8 bits per data value.
|
|
* It returns them as unsigned values 0..0xff=255.
|
|
*/
|
|
BITS_8
|
|
}
|
|
|
|
private CodePointTrie(char[] index, Data data, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
this.ascii = new int[ASCII_LIMIT];
|
|
this.index = index;
|
|
this.data = data;
|
|
this.dataLength = data.getDataLength();
|
|
this.highStart = highStart;
|
|
this.index3NullOffset = index3NullOffset;
|
|
this.dataNullOffset = dataNullOffset;
|
|
|
|
for (int c = 0; c < ASCII_LIMIT; ++c) {
|
|
ascii[c] = data.getFromIndex(c);
|
|
}
|
|
|
|
int nullValueOffset = dataNullOffset;
|
|
if (nullValueOffset >= dataLength) {
|
|
nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
nullValue = data.getFromIndex(nullValueOffset);
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form,
|
|
* stored in the ByteBuffer starting at the current position.
|
|
* Advances the buffer position to just after the trie data.
|
|
* Inverse of {@link #toBinary(OutputStream)}.
|
|
*
|
|
* <p>The data is copied from the buffer;
|
|
* later modification of the buffer will not affect the trie.
|
|
*
|
|
* @param type selects the trie type; this method throws an exception
|
|
* if the type does not match the binary data;
|
|
* use null to accept any type
|
|
* @param valueWidth selects the number of bits in a data value; this method throws an exception
|
|
* if the valueWidth does not match the binary data;
|
|
* use null to accept any data value width
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
* @see MutableCodePointTrie#MutableCodePointTrie(int, int)
|
|
* @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
|
|
* @see #toBinary(OutputStream)
|
|
*/
|
|
public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) {
|
|
ByteOrder outerByteOrder = bytes.order();
|
|
try {
|
|
// Enough data for a trie header?
|
|
if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) {
|
|
throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header");
|
|
}
|
|
|
|
// struct UCPTrieHeader
|
|
/** "Tri3" in big-endian US-ASCII (0x54726933) */
|
|
int signature = bytes.getInt();
|
|
|
|
// Check the signature.
|
|
switch (signature) {
|
|
case 0x54726933:
|
|
// The buffer is already set to the trie data byte order.
|
|
break;
|
|
case 0x33697254:
|
|
// Temporarily reverse the byte order.
|
|
boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
|
|
bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
|
|
signature = 0x54726933;
|
|
break;
|
|
default:
|
|
throw new ICUUncheckedIOException("Buffer does not contain a serialized CodePointTrie");
|
|
}
|
|
|
|
// struct UCPTrieHeader continued
|
|
/**
|
|
* Options bit field:
|
|
* Bits 15..12: Data length bits 19..16.
|
|
* Bits 11..8: Data null block offset bits 19..16.
|
|
* Bits 7..6: UCPTrieType
|
|
* Bits 5..3: Reserved (0).
|
|
* Bits 2..0: UCPTrieValueWidth
|
|
*/
|
|
int options = bytes.getChar();
|
|
|
|
/** Total length of the index tables. */
|
|
int indexLength = bytes.getChar();
|
|
|
|
/** Data length bits 15..0. */
|
|
int dataLength = bytes.getChar();
|
|
|
|
/** Index-3 null block offset, 0x7fff or 0xffff if none. */
|
|
int index3NullOffset = bytes.getChar();
|
|
|
|
/** Data null block offset bits 15..0, 0xfffff if none. */
|
|
int dataNullOffset = bytes.getChar();
|
|
|
|
/**
|
|
* First code point of the single-value range ending with U+10ffff,
|
|
* rounded up and then shifted right by SHIFT_2.
|
|
*/
|
|
int shiftedHighStart = bytes.getChar();
|
|
// struct UCPTrieHeader end
|
|
|
|
int typeInt = (options >> 6) & 3;
|
|
Type actualType;
|
|
switch (typeInt) {
|
|
case 0: actualType = Type.FAST; break;
|
|
case 1: actualType = Type.SMALL; break;
|
|
default:
|
|
throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported type");
|
|
}
|
|
|
|
int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK;
|
|
ValueWidth actualValueWidth;
|
|
switch (valueWidthInt) {
|
|
case 0: actualValueWidth = ValueWidth.BITS_16; break;
|
|
case 1: actualValueWidth = ValueWidth.BITS_32; break;
|
|
case 2: actualValueWidth = ValueWidth.BITS_8; break;
|
|
default:
|
|
throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported value width");
|
|
}
|
|
|
|
if ((options & OPTIONS_RESERVED_MASK) != 0) {
|
|
throw new ICUUncheckedIOException("CodePointTrie data header has unsupported options");
|
|
}
|
|
|
|
if (type == null) {
|
|
type = actualType;
|
|
}
|
|
if (valueWidth == null) {
|
|
valueWidth = actualValueWidth;
|
|
}
|
|
if (type != actualType || valueWidth != actualValueWidth) {
|
|
throw new ICUUncheckedIOException("CodePointTrie data header has a different type or value width than required");
|
|
}
|
|
|
|
// Get the length values and offsets.
|
|
dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4);
|
|
dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8);
|
|
|
|
int highStart = shiftedHighStart << SHIFT_2;
|
|
|
|
// Calculate the actual length, minus the header.
|
|
int actualLength = indexLength * 2;
|
|
if (valueWidth == ValueWidth.BITS_16) {
|
|
actualLength += dataLength * 2;
|
|
} else if (valueWidth == ValueWidth.BITS_32) {
|
|
actualLength += dataLength * 4;
|
|
} else {
|
|
actualLength += dataLength;
|
|
}
|
|
if (bytes.remaining() < actualLength) {
|
|
throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data");
|
|
}
|
|
|
|
char[] index = ICUBinary.getChars(bytes, indexLength, 0);
|
|
switch (valueWidth) {
|
|
case BITS_16: {
|
|
char[] data16 = ICUBinary.getChars(bytes, dataLength, 0);
|
|
return type == Type.FAST ?
|
|
new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) :
|
|
new Small16(index, data16, highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
case BITS_32: {
|
|
int[] data32 = ICUBinary.getInts(bytes, dataLength, 0);
|
|
return type == Type.FAST ?
|
|
new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) :
|
|
new Small32(index, data32, highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
case BITS_8: {
|
|
byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0);
|
|
return type == Type.FAST ?
|
|
new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) :
|
|
new Small8(index, data8, highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
default:
|
|
throw new AssertionError("should be unreachable");
|
|
}
|
|
} finally {
|
|
bytes.order(outerByteOrder);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the trie type.
|
|
*
|
|
* @return the trie type
|
|
*/
|
|
public abstract Type getType();
|
|
/**
|
|
* Returns the number of bits in a trie data value.
|
|
*
|
|
* @return the number of bits in a trie data value
|
|
*/
|
|
public final ValueWidth getValueWidth() { return data.getValueWidth(); }
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public int get(int c) {
|
|
return data.getFromIndex(cpIndex(c));
|
|
}
|
|
|
|
/**
|
|
* Returns a trie value for an ASCII code point, without range checking.
|
|
*
|
|
* @param c the input code point; must be U+0000..U+007F
|
|
* @return The ASCII code point's trie value.
|
|
*/
|
|
public final int asciiGet(int c) {
|
|
return ascii[c];
|
|
}
|
|
|
|
private static final int MAX_UNICODE = 0x10ffff;
|
|
|
|
private static final int ASCII_LIMIT = 0x80;
|
|
|
|
private static final int maybeFilterValue(int value, int trieNullValue, int nullValue,
|
|
ValueFilter filter) {
|
|
if (value == trieNullValue) {
|
|
value = nullValue;
|
|
} else if (filter != null) {
|
|
value = filter.apply(value);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final boolean getRange(int start, ValueFilter filter, Range range) {
|
|
if (start < 0 || MAX_UNICODE < start) {
|
|
return false;
|
|
}
|
|
if (start >= highStart) {
|
|
int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
|
|
int value = data.getFromIndex(di);
|
|
if (filter != null) { value = filter.apply(value); }
|
|
range.set(start, MAX_UNICODE, value);
|
|
return true;
|
|
}
|
|
|
|
int nullValue = this.nullValue;
|
|
if (filter != null) { nullValue = filter.apply(nullValue); }
|
|
Type type = getType();
|
|
|
|
int prevI3Block = -1;
|
|
int prevBlock = -1;
|
|
int c = start;
|
|
// Initialize to make compiler happy. Real value when haveValue is true.
|
|
int trieValue = 0, value = 0;
|
|
boolean haveValue = false;
|
|
do {
|
|
int i3Block;
|
|
int i3;
|
|
int i3BlockLength;
|
|
int dataBlockLength;
|
|
if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) {
|
|
i3Block = 0;
|
|
i3 = c >> FAST_SHIFT;
|
|
i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH;
|
|
dataBlockLength = FAST_DATA_BLOCK_LENGTH;
|
|
} else {
|
|
// Use the multi-stage index.
|
|
int i1 = c >> SHIFT_1;
|
|
if (type == Type.FAST) {
|
|
assert(0xffff < c && c < highStart);
|
|
i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
|
|
} else {
|
|
assert(c < highStart && highStart > SMALL_LIMIT);
|
|
i1 += SMALL_INDEX_LENGTH;
|
|
}
|
|
i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
|
|
if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) {
|
|
// The index-3 block is the same as the previous one, and filled with value.
|
|
assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
|
|
c += CP_PER_INDEX_2_ENTRY;
|
|
continue;
|
|
}
|
|
prevI3Block = i3Block;
|
|
if (i3Block == index3NullOffset) {
|
|
// This is the index-3 null block.
|
|
if (haveValue) {
|
|
if (nullValue != value) {
|
|
range.set(start, c - 1, value);
|
|
return true;
|
|
}
|
|
} else {
|
|
trieValue = this.nullValue;
|
|
value = nullValue;
|
|
haveValue = true;
|
|
}
|
|
prevBlock = dataNullOffset;
|
|
c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1);
|
|
continue;
|
|
}
|
|
i3 = (c >> SHIFT_3) & INDEX_3_MASK;
|
|
i3BlockLength = INDEX_3_BLOCK_LENGTH;
|
|
dataBlockLength = SMALL_DATA_BLOCK_LENGTH;
|
|
}
|
|
// Enumerate data blocks for one index-3 block.
|
|
do {
|
|
int block;
|
|
if ((i3Block & 0x8000) == 0) {
|
|
block = index[i3Block + i3];
|
|
} else {
|
|
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
|
|
int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
|
|
int gi = i3 & 7;
|
|
block = (index[group++] << (2 + (2 * gi))) & 0x30000;
|
|
block |= index[group + gi];
|
|
}
|
|
if (block == prevBlock && (c - start) >= dataBlockLength) {
|
|
// The block is the same as the previous one, and filled with value.
|
|
assert((c & (dataBlockLength - 1)) == 0);
|
|
c += dataBlockLength;
|
|
} else {
|
|
int dataMask = dataBlockLength - 1;
|
|
prevBlock = block;
|
|
if (block == dataNullOffset) {
|
|
// This is the data null block.
|
|
if (haveValue) {
|
|
if (nullValue != value) {
|
|
range.set(start, c - 1, value);
|
|
return true;
|
|
}
|
|
} else {
|
|
trieValue = this.nullValue;
|
|
value = nullValue;
|
|
haveValue = true;
|
|
}
|
|
c = (c + dataBlockLength) & ~dataMask;
|
|
} else {
|
|
int di = block + (c & dataMask);
|
|
int trieValue2 = data.getFromIndex(di);
|
|
if (haveValue) {
|
|
if (trieValue2 != trieValue) {
|
|
if (filter == null ||
|
|
maybeFilterValue(trieValue2, this.nullValue, nullValue,
|
|
filter) != value) {
|
|
range.set(start, c - 1, value);
|
|
return true;
|
|
}
|
|
trieValue = trieValue2; // may or may not help
|
|
}
|
|
} else {
|
|
trieValue = trieValue2;
|
|
value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter);
|
|
haveValue = true;
|
|
}
|
|
while ((++c & dataMask) != 0) {
|
|
trieValue2 = data.getFromIndex(++di);
|
|
if (trieValue2 != trieValue) {
|
|
if (filter == null ||
|
|
maybeFilterValue(trieValue2, this.nullValue, nullValue,
|
|
filter) != value) {
|
|
range.set(start, c - 1, value);
|
|
return true;
|
|
}
|
|
trieValue = trieValue2; // may or may not help
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (++i3 < i3BlockLength);
|
|
} while (c < highStart);
|
|
assert(haveValue);
|
|
int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
|
|
int highValue = data.getFromIndex(di);
|
|
if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) {
|
|
--c;
|
|
} else {
|
|
c = MAX_UNICODE;
|
|
}
|
|
range.set(start, c, value);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Writes a representation of the trie to the output stream.
|
|
* Inverse of {@link #fromBinary}.
|
|
*
|
|
* @param os the output stream
|
|
* @return the number of bytes written
|
|
*/
|
|
public final int toBinary(OutputStream os) {
|
|
try {
|
|
DataOutputStream dos = new DataOutputStream(os);
|
|
|
|
// Write the UCPTrieHeader
|
|
dos.writeInt(0x54726933); // signature="Tri3"
|
|
dos.writeChar( // options
|
|
((dataLength & 0xf0000) >> 4) |
|
|
((dataNullOffset & 0xf0000) >> 8) |
|
|
(getType().ordinal() << 6) |
|
|
getValueWidth().ordinal());
|
|
dos.writeChar(index.length);
|
|
dos.writeChar(dataLength);
|
|
dos.writeChar(index3NullOffset);
|
|
dos.writeChar(dataNullOffset);
|
|
dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart
|
|
int length = 16; // sizeof(UCPTrieHeader)
|
|
|
|
for (char i : index) { dos.writeChar(i); }
|
|
length += index.length * 2;
|
|
length += data.write(dos);
|
|
return length;
|
|
} catch (IOException e) {
|
|
throw new ICUUncheckedIOException(e);
|
|
}
|
|
}
|
|
|
|
/** @hide draft / provisional / internal are hidden on Android*/
|
|
static final int FAST_SHIFT = 6;
|
|
|
|
/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
|
|
static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT;
|
|
|
|
/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
|
|
private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1;
|
|
|
|
/** @hide draft / provisional / internal are hidden on Android*/
|
|
private static final int SMALL_MAX = 0xfff;
|
|
|
|
/**
|
|
* Offset from dataLength (to be subtracted) for fetching the
|
|
* value returned for out-of-range code points and ill-formed UTF-8/16.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1;
|
|
/**
|
|
* Offset from dataLength (to be subtracted) for fetching the
|
|
* value returned for code points highStart..U+10FFFF.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2;
|
|
|
|
// ucptrie_impl.h
|
|
|
|
/** The length of the BMP index table. 1024=0x400 */
|
|
private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT;
|
|
|
|
static final int SMALL_LIMIT = 0x1000;
|
|
private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT;
|
|
|
|
/** Shift size for getting the index-3 table offset. */
|
|
static final int SHIFT_3 = 4;
|
|
|
|
/** Shift size for getting the index-2 table offset. */
|
|
private static final int SHIFT_2 = 5 + SHIFT_3;
|
|
|
|
/** Shift size for getting the index-1 table offset. */
|
|
private static final int SHIFT_1 = 5 + SHIFT_2;
|
|
|
|
/**
|
|
* Difference between two shift sizes,
|
|
* for getting an index-2 offset from an index-3 offset. 5=9-4
|
|
*/
|
|
static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3;
|
|
|
|
/**
|
|
* Difference between two shift sizes,
|
|
* for getting an index-1 offset from an index-2 offset. 5=14-9
|
|
*/
|
|
static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2;
|
|
|
|
/**
|
|
* Number of index-1 entries for the BMP. (4)
|
|
* This part of the index-1 table is omitted from the serialized form.
|
|
*/
|
|
private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;
|
|
|
|
/** Number of entries in an index-2 block. 32=0x20 */
|
|
static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;
|
|
|
|
/** Mask for getting the lower bits for the in-index-2-block offset. */
|
|
static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;
|
|
|
|
/** Number of code points per index-2 table entry. 512=0x200 */
|
|
static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2;
|
|
|
|
/** Number of entries in an index-3 block. 32=0x20 */
|
|
static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3;
|
|
|
|
/** Mask for getting the lower bits for the in-index-3-block offset. */
|
|
private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1;
|
|
|
|
/** Number of entries in a small data block. 16=0x10 */
|
|
static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3;
|
|
|
|
/** Mask for getting the lower bits for the in-small-data-block offset. */
|
|
static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1;
|
|
|
|
// ucptrie_impl.h: Constants for use with UCPTrieHeader.options.
|
|
private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000;
|
|
private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;
|
|
private static final int OPTIONS_RESERVED_MASK = 0x38;
|
|
private static final int OPTIONS_VALUE_BITS_MASK = 7;
|
|
/**
|
|
* Value for index3NullOffset which indicates that there is no index-3 null block.
|
|
* Bit 15 is unused for this value because this bit is used if the index-3 contains
|
|
* 18-bit indexes.
|
|
*/
|
|
static final int NO_INDEX3_NULL_OFFSET = 0x7fff;
|
|
static final int NO_DATA_NULL_OFFSET = 0xfffff;
|
|
|
|
private static abstract class Data {
|
|
abstract ValueWidth getValueWidth();
|
|
abstract int getDataLength();
|
|
abstract int getFromIndex(int index);
|
|
abstract int write(DataOutputStream dos) throws IOException;
|
|
}
|
|
|
|
private static final class Data16 extends Data {
|
|
char[] array;
|
|
Data16(char[] a) { array = a; }
|
|
@Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; }
|
|
@Override int getDataLength() { return array.length; }
|
|
@Override int getFromIndex(int index) { return array[index]; }
|
|
@Override int write(DataOutputStream dos) throws IOException {
|
|
for (char v : array) { dos.writeChar(v); }
|
|
return array.length * 2;
|
|
}
|
|
}
|
|
|
|
private static final class Data32 extends Data {
|
|
int[] array;
|
|
Data32(int[] a) { array = a; }
|
|
@Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; }
|
|
@Override int getDataLength() { return array.length; }
|
|
@Override int getFromIndex(int index) { return array[index]; }
|
|
@Override int write(DataOutputStream dos) throws IOException {
|
|
for (int v : array) { dos.writeInt(v); }
|
|
return array.length * 4;
|
|
}
|
|
}
|
|
|
|
private static final class Data8 extends Data {
|
|
byte[] array;
|
|
Data8(byte[] a) { array = a; }
|
|
@Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; }
|
|
@Override int getDataLength() { return array.length; }
|
|
@Override int getFromIndex(int index) { return array[index] & 0xff; }
|
|
@Override int write(DataOutputStream dos) throws IOException {
|
|
for (byte v : array) { dos.writeByte(v); }
|
|
return array.length;
|
|
}
|
|
}
|
|
|
|
/** @hide draft / provisional / internal are hidden on Android*/
|
|
private final int[] ascii;
|
|
|
|
/** @hide draft / provisional / internal are hidden on Android*/
|
|
private final char[] index;
|
|
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
protected final Data data;
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
protected final int dataLength;
|
|
/**
|
|
* Start of the last range which ends at U+10FFFF.
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
protected final int highStart;
|
|
|
|
/**
|
|
* Internal index-3 null block offset.
|
|
* Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
private final int index3NullOffset;
|
|
/**
|
|
* Internal data null block offset, not shifted.
|
|
* Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
private final int dataNullOffset;
|
|
/** @hide draft / provisional / internal are hidden on Android*/
|
|
private final int nullValue;
|
|
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
protected final int fastIndex(int c) {
|
|
return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK);
|
|
}
|
|
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
protected final int smallIndex(Type type, int c) {
|
|
// Split into two methods to make this part inline-friendly.
|
|
// In C, this part is a macro.
|
|
if (c >= highStart) {
|
|
return dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
return internalSmallIndex(type, c);
|
|
}
|
|
|
|
private final int internalSmallIndex(Type type, int c) {
|
|
int i1 = c >> SHIFT_1;
|
|
if (type == Type.FAST) {
|
|
assert(0xffff < c && c < highStart);
|
|
i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
|
|
} else {
|
|
assert(0 <= c && c < highStart && highStart > SMALL_LIMIT);
|
|
i1 += SMALL_INDEX_LENGTH;
|
|
}
|
|
int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
|
|
int i3 = (c >> SHIFT_3) & INDEX_3_MASK;
|
|
int dataBlock;
|
|
if ((i3Block & 0x8000) == 0) {
|
|
// 16-bit indexes
|
|
dataBlock = index[i3Block + i3];
|
|
} else {
|
|
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
|
|
i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
|
|
i3 &= 7;
|
|
dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000;
|
|
dataBlock |= index[i3Block + i3];
|
|
}
|
|
return dataBlock + (c & SMALL_DATA_MASK);
|
|
}
|
|
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
protected abstract int cpIndex(int c);
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#FAST}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static abstract class Fast extends CodePointTrie {
|
|
private Fast(char[] index, Data data, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, data, highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#FAST}.
|
|
*
|
|
* @param valueWidth selects the number of bits in a data value; this method throws an exception
|
|
* if the valueWidth does not match the binary data;
|
|
* use null to accept any data value width
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
|
|
return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes);
|
|
}
|
|
|
|
/**
|
|
* @return {@link Type#FAST}
|
|
*/
|
|
@Override
|
|
public final Type getType() { return Type.FAST; }
|
|
|
|
/**
|
|
* Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
|
|
* Can be used to look up a value for a UTF-16 code unit if other parts of
|
|
* the string processing check for surrogates.
|
|
*
|
|
* @param c the input code point, must be U+0000..U+FFFF
|
|
* @return The BMP code point's trie value.
|
|
*/
|
|
public abstract int bmpGet(int c);
|
|
|
|
/**
|
|
* Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
|
|
* without range checking.
|
|
*
|
|
* @param c the input code point, must be U+10000..U+10FFFF
|
|
* @return The supplementary code point's trie value.
|
|
*/
|
|
public abstract int suppGet(int c);
|
|
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
@Override
|
|
protected final int cpIndex(int c) {
|
|
if (c >= 0) {
|
|
if (c <= 0xffff) {
|
|
return fastIndex(c);
|
|
} else if (c <= 0x10ffff) {
|
|
return smallIndex(Type.FAST, c);
|
|
}
|
|
}
|
|
return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final StringIterator stringIterator(CharSequence s, int sIndex) {
|
|
return new FastStringIterator(s, sIndex);
|
|
}
|
|
|
|
private final class FastStringIterator extends StringIterator {
|
|
private FastStringIterator(CharSequence s, int sIndex) {
|
|
super(s, sIndex);
|
|
}
|
|
|
|
@Override
|
|
public boolean next() {
|
|
if (sIndex >= s.length()) {
|
|
return false;
|
|
}
|
|
char lead = s.charAt(sIndex++);
|
|
c = lead;
|
|
int dataIndex;
|
|
if (!Character.isSurrogate(lead)) {
|
|
dataIndex = fastIndex(c);
|
|
} else {
|
|
char trail;
|
|
if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
|
|
Character.isLowSurrogate(trail = s.charAt(sIndex))) {
|
|
++sIndex;
|
|
c = Character.toCodePoint(lead, trail);
|
|
dataIndex = smallIndex(Type.FAST, c);
|
|
} else {
|
|
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
}
|
|
value = data.getFromIndex(dataIndex);
|
|
return true;
|
|
}
|
|
|
|
@Override
|
|
public boolean previous() {
|
|
if (sIndex <= 0) {
|
|
return false;
|
|
}
|
|
char trail = s.charAt(--sIndex);
|
|
c = trail;
|
|
int dataIndex;
|
|
if (!Character.isSurrogate(trail)) {
|
|
dataIndex = fastIndex(c);
|
|
} else {
|
|
char lead;
|
|
if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
|
|
Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
|
|
--sIndex;
|
|
c = Character.toCodePoint(lead, trail);
|
|
dataIndex = smallIndex(Type.FAST, c);
|
|
} else {
|
|
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
}
|
|
value = data.getFromIndex(dataIndex);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#SMALL}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static abstract class Small extends CodePointTrie {
|
|
private Small(char[] index, Data data, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, data, highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#SMALL}.
|
|
*
|
|
* @param valueWidth selects the number of bits in a data value; this method throws an exception
|
|
* if the valueWidth does not match the binary data;
|
|
* use null to accept any data value width
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
|
|
return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes);
|
|
}
|
|
|
|
/**
|
|
* @return {@link Type#SMALL}
|
|
*/
|
|
@Override
|
|
public final Type getType() { return Type.SMALL; }
|
|
|
|
/**
|
|
* @deprecated This API is ICU internal only.
|
|
* @hide draft / provisional / internal are hidden on Android
|
|
*/
|
|
@Deprecated
|
|
@Override
|
|
protected final int cpIndex(int c) {
|
|
if (c >= 0) {
|
|
if (c <= SMALL_MAX) {
|
|
return fastIndex(c);
|
|
} else if (c <= 0x10ffff) {
|
|
return smallIndex(Type.SMALL, c);
|
|
}
|
|
}
|
|
return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final StringIterator stringIterator(CharSequence s, int sIndex) {
|
|
return new SmallStringIterator(s, sIndex);
|
|
}
|
|
|
|
private final class SmallStringIterator extends StringIterator {
|
|
private SmallStringIterator(CharSequence s, int sIndex) {
|
|
super(s, sIndex);
|
|
}
|
|
|
|
@Override
|
|
public boolean next() {
|
|
if (sIndex >= s.length()) {
|
|
return false;
|
|
}
|
|
char lead = s.charAt(sIndex++);
|
|
c = lead;
|
|
int dataIndex;
|
|
if (!Character.isSurrogate(lead)) {
|
|
dataIndex = cpIndex(c);
|
|
} else {
|
|
char trail;
|
|
if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
|
|
Character.isLowSurrogate(trail = s.charAt(sIndex))) {
|
|
++sIndex;
|
|
c = Character.toCodePoint(lead, trail);
|
|
dataIndex = smallIndex(Type.SMALL, c);
|
|
} else {
|
|
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
}
|
|
value = data.getFromIndex(dataIndex);
|
|
return true;
|
|
}
|
|
|
|
@Override
|
|
public boolean previous() {
|
|
if (sIndex <= 0) {
|
|
return false;
|
|
}
|
|
char trail = s.charAt(--sIndex);
|
|
c = trail;
|
|
int dataIndex;
|
|
if (!Character.isSurrogate(trail)) {
|
|
dataIndex = cpIndex(c);
|
|
} else {
|
|
char lead;
|
|
if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
|
|
Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
|
|
--sIndex;
|
|
c = Character.toCodePoint(lead, trail);
|
|
dataIndex = smallIndex(Type.SMALL, c);
|
|
} else {
|
|
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
|
|
}
|
|
}
|
|
value = data.getFromIndex(dataIndex);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static final class Fast16 extends Fast {
|
|
private final char[] dataArray;
|
|
|
|
Fast16(char[] index, char[] data16, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
|
|
this.dataArray = data16;
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
|
|
*
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Fast16 fromBinary(ByteBuffer bytes) {
|
|
return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes);
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int get(int c) {
|
|
return dataArray[cpIndex(c)];
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int bmpGet(int c) {
|
|
assert 0 <= c && c <= 0xffff;
|
|
return dataArray[fastIndex(c)];
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int suppGet(int c) {
|
|
assert 0x10000 <= c && c <= 0x10ffff;
|
|
return dataArray[smallIndex(Type.FAST, c)];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static final class Fast32 extends Fast {
|
|
private final int[] dataArray;
|
|
|
|
Fast32(char[] index, int[] data32, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
|
|
this.dataArray = data32;
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
|
|
*
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Fast32 fromBinary(ByteBuffer bytes) {
|
|
return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes);
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int get(int c) {
|
|
return dataArray[cpIndex(c)];
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int bmpGet(int c) {
|
|
assert 0 <= c && c <= 0xffff;
|
|
return dataArray[fastIndex(c)];
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int suppGet(int c) {
|
|
assert 0x10000 <= c && c <= 0x10ffff;
|
|
return dataArray[smallIndex(Type.FAST, c)];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static final class Fast8 extends Fast {
|
|
private final byte[] dataArray;
|
|
|
|
Fast8(char[] index, byte[] data8, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
|
|
this.dataArray = data8;
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
|
|
*
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Fast8 fromBinary(ByteBuffer bytes) {
|
|
return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes);
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int get(int c) {
|
|
return dataArray[cpIndex(c)] & 0xff;
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int bmpGet(int c) {
|
|
assert 0 <= c && c <= 0xffff;
|
|
return dataArray[fastIndex(c)] & 0xff;
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public final int suppGet(int c) {
|
|
assert 0x10000 <= c && c <= 0x10ffff;
|
|
return dataArray[smallIndex(Type.FAST, c)] & 0xff;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static final class Small16 extends Small {
|
|
Small16(char[] index, char[] data16, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
|
|
*
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Small16 fromBinary(ByteBuffer bytes) {
|
|
return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static final class Small32 extends Small {
|
|
Small32(char[] index, int[] data32, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
|
|
*
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Small32 fromBinary(ByteBuffer bytes) {
|
|
return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
|
|
*
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static final class Small8 extends Small {
|
|
Small8(char[] index, byte[] data8, int highStart,
|
|
int index3NullOffset, int dataNullOffset) {
|
|
super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
|
|
}
|
|
|
|
/**
|
|
* Creates a trie from its binary form.
|
|
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
|
|
* with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
|
|
*
|
|
* @param bytes a buffer containing the binary data of a CodePointTrie
|
|
* @return the trie
|
|
*/
|
|
public static Small8 fromBinary(ByteBuffer bytes) {
|
|
return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes);
|
|
}
|
|
}
|
|
}
|