260 lines
10 KiB
Java
260 lines
10 KiB
Java
![]() |
/* GENERATED SOURCE. DO NOT MODIFY. */
|
||
|
// © 2016 and later: Unicode, Inc. and others.
|
||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||
|
/*
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 2009-2014, International Business Machines Corporation and
|
||
|
* others. All Rights Reserved.
|
||
|
*******************************************************************************
|
||
|
*/
|
||
|
|
||
|
package android.icu.impl;
|
||
|
|
||
|
import java.io.DataOutputStream;
|
||
|
import java.io.IOException;
|
||
|
import java.io.OutputStream;
|
||
|
import java.nio.ByteBuffer;
|
||
|
|
||
|
/**
|
||
|
* @author aheninger
|
||
|
*
|
||
|
* A read-only Trie2, holding 32 bit data values.
|
||
|
*
|
||
|
* A Trie2 is a highly optimized data structure for mapping from Unicode
|
||
|
* code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
|
||
|
*
|
||
|
* See class Trie2 for descriptions of the API for accessing the contents of a trie.
|
||
|
*
|
||
|
* The fundamental data access methods are declared final in this class, with
|
||
|
* the intent that applications might gain a little extra performance, when compared
|
||
|
* with calling the same methods via the abstract UTrie2 base class.
|
||
|
* @hide Only a subset of ICU is exposed in Android
|
||
|
*/
|
||
|
|
||
|
public class Trie2_32 extends Trie2 {
|
||
|
|
||
|
/**
|
||
|
* Internal constructor, not for general use.
|
||
|
*/
|
||
|
Trie2_32() {
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Create a Trie2 from its serialized form. Inverse of utrie2_serialize().
|
||
|
* The serialized format is identical between ICU4C and ICU4J, so this function
|
||
|
* will work with serialized Trie2s from either.
|
||
|
*
|
||
|
* The serialized Trie2 in the bytes may be in either little or big endian byte order.
|
||
|
* This allows using serialized Tries from ICU4C without needing to consider the
|
||
|
* byte order of the system that created them.
|
||
|
*
|
||
|
* @param bytes a byte buffer to the serialized form of a UTrie2.
|
||
|
* @return An unserialized Trie_32, ready for use.
|
||
|
* @throws IllegalArgumentException if the stream does not contain a serialized Trie2.
|
||
|
* @throws IOException if a read error occurs in the buffer.
|
||
|
* @throws ClassCastException if the bytes contains a serialized Trie2_16
|
||
|
*/
|
||
|
public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException {
|
||
|
return (Trie2_32) Trie2.createFromSerialized(bytes);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get the value for a code point as stored in the Trie2.
|
||
|
*
|
||
|
* @param codePoint the code point
|
||
|
* @return the value
|
||
|
*/
|
||
|
@Override
|
||
|
public final int get(int codePoint) {
|
||
|
int value;
|
||
|
int ix;
|
||
|
|
||
|
if (codePoint >= 0) {
|
||
|
if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
|
||
|
// Ordinary BMP code point, excluding leading surrogates.
|
||
|
// BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index.
|
||
|
// 32 bit data is stored in the index array itself.
|
||
|
ix = index[codePoint >> UTRIE2_SHIFT_2];
|
||
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
|
||
|
value = data32[ix];
|
||
|
return value;
|
||
|
}
|
||
|
if (codePoint <= 0xffff) {
|
||
|
// Lead Surrogate Code Point. A Separate index section is stored for
|
||
|
// lead surrogate code units and code points.
|
||
|
// The main index has the code unit data.
|
||
|
// For this function, we need the code point data.
|
||
|
// Note: this expression could be refactored for slightly improved efficiency, but
|
||
|
// surrogate code points will be so rare in practice that it's not worth it.
|
||
|
ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
|
||
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
|
||
|
value = data32[ix];
|
||
|
return value;
|
||
|
}
|
||
|
if (codePoint < highStart) {
|
||
|
// Supplemental code point, use two-level lookup.
|
||
|
ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
|
||
|
ix = index[ix];
|
||
|
ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
|
||
|
ix = index[ix];
|
||
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
|
||
|
value = data32[ix];
|
||
|
return value;
|
||
|
}
|
||
|
if (codePoint <= 0x10ffff) {
|
||
|
value = data32[highValueIndex];
|
||
|
return value;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Fall through. The code point is outside of the legal range of 0..0x10ffff.
|
||
|
return errorValue;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Get a Trie2 value for a UTF-16 code unit.
|
||
|
*
|
||
|
* This function returns the same value as get() if the input
|
||
|
* character is outside of the lead surrogate range
|
||
|
*
|
||
|
* There are two values stored in a Trie2 for inputs in the lead
|
||
|
* surrogate range. This function returns the alternate value,
|
||
|
* while Trie2.get() returns the main value.
|
||
|
*
|
||
|
* @param codeUnit a 16 bit code unit or lead surrogate value.
|
||
|
* @return the value
|
||
|
*/
|
||
|
@Override
|
||
|
public int getFromU16SingleLead(char codeUnit){
|
||
|
int value;
|
||
|
int ix;
|
||
|
|
||
|
ix = index[codeUnit >> UTRIE2_SHIFT_2];
|
||
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
|
||
|
value = data32[ix];
|
||
|
return value;
|
||
|
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Serialize a Trie2_32 onto an OutputStream.
|
||
|
*
|
||
|
* A Trie2 can be serialized multiple times.
|
||
|
* The serialized data is compatible with ICU4C UTrie2 serialization.
|
||
|
* Trie2 serialization is unrelated to Java object serialization.
|
||
|
*
|
||
|
* @param os the stream to which the serialized Trie2 data will be written.
|
||
|
* @return the number of bytes written.
|
||
|
* @throw IOException on an error writing to the OutputStream.
|
||
|
*/
|
||
|
public int serialize(OutputStream os) throws IOException {
|
||
|
DataOutputStream dos = new DataOutputStream(os);
|
||
|
int bytesWritten = 0;
|
||
|
|
||
|
bytesWritten += serializeHeader(dos);
|
||
|
for (int i=0; i<dataLength; i++) {
|
||
|
dos.writeInt(data32[i]);
|
||
|
}
|
||
|
bytesWritten += dataLength*4;
|
||
|
return bytesWritten;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @return the number of bytes of the serialized trie
|
||
|
*/
|
||
|
public int getSerializedLength() {
|
||
|
return 16+header.indexLength*2+dataLength*4;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Given a starting code point, find the last in a range of code points,
|
||
|
* all with the same value.
|
||
|
*
|
||
|
* This function is part of the implementation of iterating over the
|
||
|
* Trie2's contents.
|
||
|
* @param startingCP The code point at which to begin looking.
|
||
|
* @return The last code point with the same value as the starting code point.
|
||
|
*/
|
||
|
@Override
|
||
|
int rangeEnd(int startingCP, int limit, int value) {
|
||
|
int cp = startingCP;
|
||
|
int block = 0;
|
||
|
int index2Block = 0;
|
||
|
|
||
|
// Loop runs once for each of
|
||
|
// - a partial data block
|
||
|
// - a reference to the null (default) data block.
|
||
|
// - a reference to the index2 null block
|
||
|
|
||
|
outerLoop:
|
||
|
for (;;) {
|
||
|
if (cp >= limit) {
|
||
|
break;
|
||
|
}
|
||
|
if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
|
||
|
// Ordinary BMP code point, excluding leading surrogates.
|
||
|
// BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index.
|
||
|
// 16 bit data is stored in the index array itself.
|
||
|
index2Block = 0;
|
||
|
block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
|
||
|
} else if (cp < 0xffff) {
|
||
|
// Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
|
||
|
index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
|
||
|
block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
|
||
|
} else if (cp < highStart) {
|
||
|
// Supplemental code point, use two-level lookup.
|
||
|
int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
|
||
|
index2Block = index[ix];
|
||
|
block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
|
||
|
} else {
|
||
|
// Code point above highStart.
|
||
|
if (value == data32[highValueIndex]) {
|
||
|
cp = limit;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (index2Block == index2NullOffset) {
|
||
|
if (value != initialValue) {
|
||
|
break;
|
||
|
}
|
||
|
cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
|
||
|
} else if (block == dataNullOffset) {
|
||
|
// The block at dataNullOffset has all values == initialValue.
|
||
|
// Because Trie2 iteration always proceeds in ascending order, we will always
|
||
|
// encounter a null block at its beginning, and can skip over
|
||
|
// a number of code points equal to the length of the block.
|
||
|
if (value != initialValue) {
|
||
|
break;
|
||
|
}
|
||
|
cp += UTRIE2_DATA_BLOCK_LENGTH;
|
||
|
} else {
|
||
|
// Current position refers to an ordinary data block.
|
||
|
// Walk over the data entries, checking the values.
|
||
|
int startIx = block + (cp & UTRIE2_DATA_MASK);
|
||
|
int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
|
||
|
for (int ix = startIx; ix<limitIx; ix++) {
|
||
|
if (data32[ix] != value) {
|
||
|
// We came to an entry with a different value.
|
||
|
// We are done.
|
||
|
cp += (ix - startIx);
|
||
|
break outerLoop;
|
||
|
}
|
||
|
}
|
||
|
// The ordinary data block contained our value until its end.
|
||
|
// Advance the current code point, and continue the outer loop.
|
||
|
cp += limitIx - startIx;
|
||
|
}
|
||
|
}
|
||
|
if (cp > limit) {
|
||
|
cp = limit;
|
||
|
}
|
||
|
|
||
|
return cp - 1;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|