570 lines
19 KiB
Java
570 lines
19 KiB
Java
![]() |
/* GENERATED SOURCE. DO NOT MODIFY. */
|
||
|
// © 2016 and later: Unicode, Inc. and others.
|
||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||
|
/*
|
||
|
******************************************************************************
|
||
|
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||
|
* others. All Rights Reserved. *
|
||
|
******************************************************************************
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Store bits (Unicode character properties) in bit set vectors.
|
||
|
*
|
||
|
* This is a port of the C++ class UPropsVectors from ICU4C
|
||
|
*
|
||
|
* @author Shaopeng Jia
|
||
|
* @hide draft / provisional / internal are hidden on Android
|
||
|
*/
|
||
|
|
||
|
package android.icu.impl;
|
||
|
|
||
|
import java.util.Arrays;
|
||
|
import java.util.Comparator;
|
||
|
|
||
|
/**
|
||
|
* Unicode Properties Vectors associated with code point ranges.
|
||
|
*
|
||
|
* Rows of primitive integers in a contiguous array store the range limits and
|
||
|
* the properties vectors.
|
||
|
*
|
||
|
* In each row, row[0] contains the start code point and row[1] contains the
|
||
|
* limit code point, which is the start of the next range.
|
||
|
*
|
||
|
* Initially, there is only one range [0..0x110000] with values 0.
|
||
|
*
|
||
|
* It would be possible to store only one range boundary per row, but
|
||
|
* self-contained rows allow to later sort them by contents.
|
||
|
* @hide Only a subset of ICU is exposed in Android
|
||
|
*/
|
||
|
public class PropsVectors {
|
||
|
private int v[];
|
||
|
private int columns; // number of columns, plus two for start
|
||
|
// and limit values
|
||
|
private int maxRows;
|
||
|
private int rows;
|
||
|
private int prevRow; // search optimization: remember last row seen
|
||
|
private boolean isCompacted;
|
||
|
|
||
|
// internal function to compare elements in v and target. Return true iff
|
||
|
// elements in v starting from index1 to index1 + length - 1
|
||
|
// are exactly the same as elements in target
|
||
|
// starting from index2 to index2 + length - 1
|
||
|
private boolean areElementsSame(int index1, int[] target, int index2,
|
||
|
int length) {
|
||
|
for (int i = 0; i < length; ++i) {
|
||
|
if (v[index1 + i] != target[index2 + i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// internal function which given rangeStart, returns
|
||
|
// index where v[index]<=rangeStart<v[index+1].
|
||
|
// The returned index is a multiple of columns, and therefore
|
||
|
// points to the start of a row.
|
||
|
private int findRow(int rangeStart) {
|
||
|
int index = 0;
|
||
|
|
||
|
// check the vicinity of the last-seen row (start
|
||
|
// searching with an unrolled loop)
|
||
|
|
||
|
index = prevRow * columns;
|
||
|
if (rangeStart >= v[index]) {
|
||
|
if (rangeStart < v[index + 1]) {
|
||
|
// same row as last seen
|
||
|
return index;
|
||
|
} else {
|
||
|
index += columns;
|
||
|
if (rangeStart < v[index + 1]) {
|
||
|
++prevRow;
|
||
|
return index;
|
||
|
} else {
|
||
|
index += columns;
|
||
|
if (rangeStart < v[index + 1]) {
|
||
|
prevRow += 2;
|
||
|
return index;
|
||
|
} else if ((rangeStart - v[index + 1]) < 10) {
|
||
|
// we are close, continue looping
|
||
|
prevRow += 2;
|
||
|
do {
|
||
|
++prevRow;
|
||
|
index += columns;
|
||
|
} while (rangeStart >= v[index + 1]);
|
||
|
return index;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else if (rangeStart < v[1]) {
|
||
|
// the very first row
|
||
|
prevRow = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// do a binary search for the start of the range
|
||
|
int start = 0;
|
||
|
int mid = 0;
|
||
|
int limit = rows;
|
||
|
while (start < limit - 1) {
|
||
|
mid = (start + limit) / 2;
|
||
|
index = columns * mid;
|
||
|
if (rangeStart < v[index]) {
|
||
|
limit = mid;
|
||
|
} else if (rangeStart < v[index + 1]) {
|
||
|
prevRow = mid;
|
||
|
return index;
|
||
|
} else {
|
||
|
start = mid;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// must be found because all ranges together always cover
|
||
|
// all of Unicode
|
||
|
prevRow = start;
|
||
|
index = start * columns;
|
||
|
return index;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Special pseudo code points for storing the initialValue and the
|
||
|
* errorValue which are used to initialize a Trie or similar.
|
||
|
*/
|
||
|
public final static int FIRST_SPECIAL_CP = 0x110000;
|
||
|
public final static int INITIAL_VALUE_CP = 0x110000;
|
||
|
public final static int ERROR_VALUE_CP = 0x110001;
|
||
|
public final static int MAX_CP = 0x110001;
|
||
|
|
||
|
public final static int INITIAL_ROWS = 1 << 12;
|
||
|
public final static int MEDIUM_ROWS = 1 << 16;
|
||
|
public final static int MAX_ROWS = MAX_CP + 1;
|
||
|
|
||
|
/*
|
||
|
* Constructor.
|
||
|
* @param numOfColumns Number of value integers (32-bit int) per row.
|
||
|
*/
|
||
|
public PropsVectors(int numOfColumns) {
|
||
|
if (numOfColumns < 1) {
|
||
|
throw new IllegalArgumentException("numOfColumns need to be no "
|
||
|
+ "less than 1; but it is " + numOfColumns);
|
||
|
}
|
||
|
columns = numOfColumns + 2; // count range start and limit columns
|
||
|
v = new int[INITIAL_ROWS * columns];
|
||
|
maxRows = INITIAL_ROWS;
|
||
|
rows = 2 + (MAX_CP - FIRST_SPECIAL_CP);
|
||
|
prevRow = 0;
|
||
|
isCompacted = false;
|
||
|
v[0] = 0;
|
||
|
v[1] = 0x110000;
|
||
|
int index = columns;
|
||
|
for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) {
|
||
|
v[index] = cp;
|
||
|
v[index + 1] = cp + 1;
|
||
|
index += columns;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* In rows for code points [start..end], select the column, reset the mask
|
||
|
* bits and set the value bits (ANDed with the mask).
|
||
|
*
|
||
|
* @throws IllegalArgumentException
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
*
|
||
|
* @throws IndexOutOfBoundsException
|
||
|
*/
|
||
|
public void setValue(int start, int end, int column, int value, int mask) {
|
||
|
if (start < 0 || start > end || end > MAX_CP || column < 0
|
||
|
|| column >= (columns - 2)) {
|
||
|
throw new IllegalArgumentException();
|
||
|
}
|
||
|
if (isCompacted) {
|
||
|
throw new IllegalStateException("Shouldn't be called after"
|
||
|
+ "compact()!");
|
||
|
}
|
||
|
|
||
|
int firstRow, lastRow;
|
||
|
int limit = end + 1;
|
||
|
boolean splitFirstRow, splitLastRow;
|
||
|
// skip range start and limit columns
|
||
|
column += 2;
|
||
|
value &= mask;
|
||
|
|
||
|
// find the rows whose ranges overlap with the input range
|
||
|
// find the first and last row, always successful
|
||
|
firstRow = findRow(start);
|
||
|
lastRow = findRow(end);
|
||
|
|
||
|
/*
|
||
|
* Rows need to be split if they partially overlap with the input range
|
||
|
* (only possible for the first and last rows) and if their value
|
||
|
* differs from the input value.
|
||
|
*/
|
||
|
splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask));
|
||
|
splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask));
|
||
|
|
||
|
// split first/last rows if necessary
|
||
|
if (splitFirstRow || splitLastRow) {
|
||
|
int rowsToExpand = 0;
|
||
|
if (splitFirstRow) {
|
||
|
++rowsToExpand;
|
||
|
}
|
||
|
if (splitLastRow) {
|
||
|
++rowsToExpand;
|
||
|
}
|
||
|
int newMaxRows = 0;
|
||
|
if ((rows + rowsToExpand) > maxRows) {
|
||
|
if (maxRows < MEDIUM_ROWS) {
|
||
|
newMaxRows = MEDIUM_ROWS;
|
||
|
} else if (maxRows < MAX_ROWS) {
|
||
|
newMaxRows = MAX_ROWS;
|
||
|
} else {
|
||
|
throw new IndexOutOfBoundsException(
|
||
|
"MAX_ROWS exceeded! Increase it to a higher value" +
|
||
|
"in the implementation");
|
||
|
}
|
||
|
int[] temp = new int[newMaxRows * columns];
|
||
|
System.arraycopy(v, 0, temp, 0, rows * columns);
|
||
|
v = temp;
|
||
|
maxRows = newMaxRows;
|
||
|
}
|
||
|
|
||
|
// count the number of row cells to move after the last row,
|
||
|
// and move them
|
||
|
int count = (rows * columns) - (lastRow + columns);
|
||
|
if (count > 0) {
|
||
|
System.arraycopy(v, lastRow + columns, v, lastRow
|
||
|
+ (1 + rowsToExpand) * columns, count);
|
||
|
}
|
||
|
rows += rowsToExpand;
|
||
|
|
||
|
// split the first row, and move the firstRow pointer
|
||
|
// to the second part
|
||
|
if (splitFirstRow) {
|
||
|
// copy all affected rows up one and move the lastRow pointer
|
||
|
count = lastRow - firstRow + columns;
|
||
|
System.arraycopy(v, firstRow, v, firstRow + columns, count);
|
||
|
lastRow += columns;
|
||
|
|
||
|
// split the range and move the firstRow pointer
|
||
|
v[firstRow + 1] = v[firstRow + columns] = start;
|
||
|
firstRow += columns;
|
||
|
}
|
||
|
|
||
|
// split the last row
|
||
|
if (splitLastRow) {
|
||
|
// copy the last row data
|
||
|
System.arraycopy(v, lastRow, v, lastRow + columns, columns);
|
||
|
|
||
|
// split the range and move the firstRow pointer
|
||
|
v[lastRow + 1] = v[lastRow + columns] = limit;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// set the "row last seen" to the last row for the range
|
||
|
prevRow = lastRow / columns;
|
||
|
|
||
|
// set the input value in all remaining rows
|
||
|
firstRow += column;
|
||
|
lastRow += column;
|
||
|
mask = ~mask;
|
||
|
for (;;) {
|
||
|
v[firstRow] = (v[firstRow] & mask) | value;
|
||
|
if (firstRow == lastRow) {
|
||
|
break;
|
||
|
}
|
||
|
firstRow += columns;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Always returns 0 if called after compact().
|
||
|
*/
|
||
|
public int getValue(int c, int column) {
|
||
|
if (isCompacted || c < 0 || c > MAX_CP || column < 0
|
||
|
|| column >= (columns - 2)) {
|
||
|
return 0;
|
||
|
}
|
||
|
int index = findRow(c);
|
||
|
return v[index + 2 + column];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Returns an array which contains value elements
|
||
|
* in row rowIndex.
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
* @throws IllegalArgumentException
|
||
|
*/
|
||
|
public int[] getRow(int rowIndex) {
|
||
|
if (isCompacted) {
|
||
|
throw new IllegalStateException(
|
||
|
"Illegal Invocation of the method after compact()");
|
||
|
}
|
||
|
if (rowIndex < 0 || rowIndex > rows) {
|
||
|
throw new IllegalArgumentException("rowIndex out of bound!");
|
||
|
}
|
||
|
int[] rowToReturn = new int[columns - 2];
|
||
|
System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0,
|
||
|
columns - 2);
|
||
|
return rowToReturn;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Returns an int which is the start codepoint
|
||
|
* in row rowIndex.
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
*
|
||
|
* @throws IllegalArgumentException
|
||
|
*/
|
||
|
public int getRowStart(int rowIndex) {
|
||
|
if (isCompacted) {
|
||
|
throw new IllegalStateException(
|
||
|
"Illegal Invocation of the method after compact()");
|
||
|
}
|
||
|
if (rowIndex < 0 || rowIndex > rows) {
|
||
|
throw new IllegalArgumentException("rowIndex out of bound!");
|
||
|
}
|
||
|
return v[rowIndex * columns];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Returns an int which is the limit codepoint
|
||
|
* minus 1 in row rowIndex.
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
*
|
||
|
* @throws IllegalArgumentException
|
||
|
*/
|
||
|
public int getRowEnd(int rowIndex) {
|
||
|
if (isCompacted) {
|
||
|
throw new IllegalStateException(
|
||
|
"Illegal Invocation of the method after compact()");
|
||
|
}
|
||
|
if (rowIndex < 0 || rowIndex > rows) {
|
||
|
throw new IllegalArgumentException("rowIndex out of bound!");
|
||
|
}
|
||
|
return v[rowIndex * columns + 1] - 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Compact the vectors:
|
||
|
* - modify the memory
|
||
|
* - keep only unique vectors
|
||
|
* - store them contiguously from the beginning of the memory
|
||
|
* - for each (non-unique) row, call the respective function in
|
||
|
* CompactHandler
|
||
|
*
|
||
|
* The handler's rowIndex is the index of the row in the compacted
|
||
|
* memory block. Therefore, it starts at 0 increases in increments of the
|
||
|
* columns value.
|
||
|
*
|
||
|
* In a first phase, only special values are delivered (each exactly once).
|
||
|
* Then CompactHandler::startRealValues() is called
|
||
|
* where rowIndex is the length of the compacted array.
|
||
|
* Then, in the second phase, the CompactHandler::setRowIndexForRange() is
|
||
|
* called for each row of real values.
|
||
|
*/
|
||
|
public void compact(CompactHandler compactor) {
|
||
|
if (isCompacted) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Set the flag now: Sorting and compacting destroys the builder
|
||
|
// data structure.
|
||
|
isCompacted = true;
|
||
|
int valueColumns = columns - 2; // not counting start & limit
|
||
|
|
||
|
// sort the properties vectors to find unique vector values
|
||
|
Integer[] indexArray = new Integer[rows];
|
||
|
for (int i = 0; i < rows; ++i) {
|
||
|
indexArray[i] = columns * i;
|
||
|
}
|
||
|
|
||
|
Arrays.sort(indexArray, new Comparator<Integer>() {
|
||
|
@Override
|
||
|
public int compare(Integer o1, Integer o2) {
|
||
|
int indexOfRow1 = o1.intValue();
|
||
|
int indexOfRow2 = o2.intValue();
|
||
|
int count = columns; // includes start/limit columns
|
||
|
|
||
|
// start comparing after start/limit
|
||
|
// but wrap around to them
|
||
|
int index = 2;
|
||
|
do {
|
||
|
if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) {
|
||
|
return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1
|
||
|
: 1;
|
||
|
}
|
||
|
if (++index == columns) {
|
||
|
index = 0;
|
||
|
}
|
||
|
} while (--count > 0);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
});
|
||
|
|
||
|
/*
|
||
|
* Find and set the special values. This has to do almost the same work
|
||
|
* as the compaction below, to find the indexes where the special-value
|
||
|
* rows will move.
|
||
|
*/
|
||
|
int count = -valueColumns;
|
||
|
for (int i = 0; i < rows; ++i) {
|
||
|
int start = v[indexArray[i].intValue()];
|
||
|
|
||
|
// count a new values vector if it is different
|
||
|
// from the current one
|
||
|
if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v,
|
||
|
indexArray[i-1].intValue() + 2, valueColumns)) {
|
||
|
count += valueColumns;
|
||
|
}
|
||
|
|
||
|
if (start == INITIAL_VALUE_CP) {
|
||
|
compactor.setRowIndexForInitialValue(count);
|
||
|
} else if (start == ERROR_VALUE_CP) {
|
||
|
compactor.setRowIndexForErrorValue(count);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// count is at the beginning of the last vector,
|
||
|
// add valueColumns to include that last vector
|
||
|
count += valueColumns;
|
||
|
|
||
|
// Call the handler once more to signal the start of
|
||
|
// delivering real values.
|
||
|
compactor.startRealValues(count);
|
||
|
|
||
|
/*
|
||
|
* Move vector contents up to a contiguous array with only unique
|
||
|
* vector values, and call the handler function for each vector.
|
||
|
*
|
||
|
* This destroys the Properties Vector structure and replaces it
|
||
|
* with an array of just vector values.
|
||
|
*/
|
||
|
int[] temp = new int[count];
|
||
|
count = -valueColumns;
|
||
|
for (int i = 0; i < rows; ++i) {
|
||
|
int start = v[indexArray[i].intValue()];
|
||
|
int limit = v[indexArray[i].intValue() + 1];
|
||
|
|
||
|
// count a new values vector if it is different
|
||
|
// from the current one
|
||
|
if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
|
||
|
temp, count, valueColumns)) {
|
||
|
count += valueColumns;
|
||
|
System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
|
||
|
valueColumns);
|
||
|
}
|
||
|
|
||
|
if (start < FIRST_SPECIAL_CP) {
|
||
|
compactor.setRowIndexForRange(start, limit - 1, count);
|
||
|
}
|
||
|
}
|
||
|
v = temp;
|
||
|
|
||
|
// count is at the beginning of the last vector,
|
||
|
// add one to include that last vector
|
||
|
rows = count / valueColumns + 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Get the vectors array after calling compact().
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
*/
|
||
|
public int[] getCompactedArray() {
|
||
|
if (!isCompacted) {
|
||
|
throw new IllegalStateException(
|
||
|
"Illegal Invocation of the method before compact()");
|
||
|
}
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Get the number of rows for the compacted array.
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
*/
|
||
|
public int getCompactedRows() {
|
||
|
if (!isCompacted) {
|
||
|
throw new IllegalStateException(
|
||
|
"Illegal Invocation of the method before compact()");
|
||
|
}
|
||
|
return rows;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Get the number of columns for the compacted array.
|
||
|
*
|
||
|
* @throws IllegalStateException
|
||
|
*/
|
||
|
public int getCompactedColumns() {
|
||
|
if (!isCompacted) {
|
||
|
throw new IllegalStateException(
|
||
|
"Illegal Invocation of the method before compact()");
|
||
|
}
|
||
|
return columns - 2;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Call compact(), create a IntTrie with indexes into the compacted
|
||
|
* vectors array.
|
||
|
*/
|
||
|
public IntTrie compactToTrieWithRowIndexes() {
|
||
|
PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler();
|
||
|
compact(compactor);
|
||
|
return compactor.builder.serialize(new DefaultGetFoldedValue(
|
||
|
compactor.builder), new DefaultGetFoldingOffset());
|
||
|
}
|
||
|
|
||
|
// inner class implementation of Trie.DataManipulate
|
||
|
private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
|
||
|
@Override
|
||
|
public int getFoldingOffset(int value) {
|
||
|
return value;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// inner class implementation of TrieBuilder.DataManipulate
|
||
|
private static class DefaultGetFoldedValue implements
|
||
|
TrieBuilder.DataManipulate {
|
||
|
private IntTrieBuilder builder;
|
||
|
|
||
|
public DefaultGetFoldedValue(IntTrieBuilder inBuilder) {
|
||
|
builder = inBuilder;
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public int getFoldedValue(int start, int offset) {
|
||
|
int initialValue = builder.m_initialValue_;
|
||
|
int limit = start + 0x400;
|
||
|
while (start < limit) {
|
||
|
boolean[] inBlockZero = new boolean[1];
|
||
|
int value = builder.getValue(start, inBlockZero);
|
||
|
if (inBlockZero[0]) {
|
||
|
start += TrieBuilder.DATA_BLOCK_LENGTH;
|
||
|
} else if (value != initialValue) {
|
||
|
return offset;
|
||
|
} else {
|
||
|
++start;
|
||
|
}
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @hide Only a subset of ICU is exposed in Android
|
||
|
*/
|
||
|
public static interface CompactHandler {
|
||
|
public void setRowIndexForRange(int start, int end, int rowIndex);
|
||
|
public void setRowIndexForInitialValue(int rowIndex);
|
||
|
public void setRowIndexForErrorValue(int rowIndex);
|
||
|
public void startRealValues(int rowIndex);
|
||
|
}
|
||
|
}
|