508 lines
18 KiB
Java
508 lines
18 KiB
Java
/* GENERATED SOURCE. DO NOT MODIFY. */
|
|
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 2013-2014, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
* CollationRootElements.java, ported from collationrootelements.h/.cpp
|
|
*
|
|
* C++ version created on: 2013mar01
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
package android.icu.impl.coll;
|
|
|
|
/**
|
|
* Container and access methods for collation elements and weights
|
|
* that occur in the root collator.
|
|
* Needed for finding boundaries for building a tailoring.
|
|
*
|
|
* This class takes and returns 16-bit secondary and tertiary weights.
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public final class CollationRootElements {
|
|
public CollationRootElements(long[] rootElements) {
|
|
elements = rootElements;
|
|
}
|
|
|
|
/**
|
|
* Higher than any root primary.
|
|
*/
|
|
public static final long PRIMARY_SENTINEL = 0xffffff00L;
|
|
|
|
/**
|
|
* Flag in a root element, set if the element contains secondary & tertiary weights,
|
|
* rather than a primary.
|
|
*/
|
|
public static final int SEC_TER_DELTA_FLAG = 0x80;
|
|
/**
|
|
* Mask for getting the primary range step value from a primary-range-end element.
|
|
*/
|
|
public static final int PRIMARY_STEP_MASK = 0x7f;
|
|
|
|
/**
|
|
* Index of the first CE with a non-zero tertiary weight.
|
|
* Same as the start of the compact root elements table.
|
|
*/
|
|
public static final int IX_FIRST_TERTIARY_INDEX = 0;
|
|
/**
|
|
* Index of the first CE with a non-zero secondary weight.
|
|
*/
|
|
static final int IX_FIRST_SECONDARY_INDEX = 1;
|
|
/**
|
|
* Index of the first CE with a non-zero primary weight.
|
|
*/
|
|
static final int IX_FIRST_PRIMARY_INDEX = 2;
|
|
/**
|
|
* Must match Collation.COMMON_SEC_AND_TER_CE.
|
|
*/
|
|
static final int IX_COMMON_SEC_AND_TER_CE = 3;
|
|
/**
|
|
* Secondary & tertiary boundaries.
|
|
* Bits 31..24: [fixed last secondary common byte 45]
|
|
* Bits 23..16: [fixed first ignorable secondary byte 80]
|
|
* Bits 15.. 8: reserved, 0
|
|
* Bits 7.. 0: [fixed first ignorable tertiary byte 3C]
|
|
*/
|
|
static final int IX_SEC_TER_BOUNDARIES = 4;
|
|
/**
|
|
* The current number of indexes.
|
|
* Currently the same as elements[IX_FIRST_TERTIARY_INDEX].
|
|
*/
|
|
static final int IX_COUNT = 5;
|
|
|
|
/**
|
|
* Returns the boundary between tertiary weights of primary/secondary CEs
|
|
* and those of tertiary CEs.
|
|
* This is the upper limit for tertiaries of primary/secondary CEs.
|
|
* This minus one is the lower limit for tertiaries of tertiary CEs.
|
|
*/
|
|
public int getTertiaryBoundary() {
|
|
return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;
|
|
}
|
|
|
|
/**
|
|
* Returns the first assigned tertiary CE.
|
|
*/
|
|
long getFirstTertiaryCE() {
|
|
return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
|
|
}
|
|
|
|
/**
|
|
* Returns the last assigned tertiary CE.
|
|
*/
|
|
long getLastTertiaryCE() {
|
|
return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
|
|
}
|
|
|
|
/**
|
|
* Returns the last common secondary weight.
|
|
* This is the lower limit for secondaries of primary CEs.
|
|
*/
|
|
public int getLastCommonSecondary() {
|
|
return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;
|
|
}
|
|
|
|
/**
|
|
* Returns the boundary between secondary weights of primary CEs
|
|
* and those of secondary CEs.
|
|
* This is the upper limit for secondaries of primary CEs.
|
|
* This minus one is the lower limit for secondaries of secondary CEs.
|
|
*/
|
|
public int getSecondaryBoundary() {
|
|
return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;
|
|
}
|
|
|
|
/**
|
|
* Returns the first assigned secondary CE.
|
|
*/
|
|
long getFirstSecondaryCE() {
|
|
return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
|
|
}
|
|
|
|
/**
|
|
* Returns the last assigned secondary CE.
|
|
*/
|
|
long getLastSecondaryCE() {
|
|
return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
|
|
}
|
|
|
|
/**
|
|
* Returns the first assigned primary weight.
|
|
*/
|
|
long getFirstPrimary() {
|
|
return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]; // step=0: cannot be a range end
|
|
}
|
|
|
|
/**
|
|
* Returns the first assigned primary CE.
|
|
*/
|
|
long getFirstPrimaryCE() {
|
|
return Collation.makeCE(getFirstPrimary());
|
|
}
|
|
|
|
/**
|
|
* Returns the last root CE with a primary weight before p.
|
|
* Intended only for reordering group boundaries.
|
|
*/
|
|
long lastCEWithPrimaryBefore(long p) {
|
|
if(p == 0) { return 0; }
|
|
assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]);
|
|
int index = findP(p);
|
|
long q = elements[index];
|
|
long secTer;
|
|
if(p == (q & 0xffffff00L)) {
|
|
// p == elements[index] is a root primary. Find the CE before it.
|
|
// We must not be in a primary range.
|
|
assert((q & PRIMARY_STEP_MASK) == 0);
|
|
secTer = elements[index - 1];
|
|
if((secTer & SEC_TER_DELTA_FLAG) == 0) {
|
|
// Primary CE just before p.
|
|
p = secTer & 0xffffff00L;
|
|
secTer = Collation.COMMON_SEC_AND_TER_CE;
|
|
} else {
|
|
// secTer = last secondary & tertiary for the previous primary
|
|
index -= 2;
|
|
for(;;) {
|
|
p = elements[index];
|
|
if((p & SEC_TER_DELTA_FLAG) == 0) {
|
|
p &= 0xffffff00L;
|
|
break;
|
|
}
|
|
--index;
|
|
}
|
|
}
|
|
} else {
|
|
// p > elements[index] which is the previous primary.
|
|
// Find the last secondary & tertiary weights for it.
|
|
p = q & 0xffffff00L;
|
|
secTer = Collation.COMMON_SEC_AND_TER_CE;
|
|
for(;;) {
|
|
q = elements[++index];
|
|
if((q & SEC_TER_DELTA_FLAG) == 0) {
|
|
// We must not be in a primary range.
|
|
assert((q & PRIMARY_STEP_MASK) == 0);
|
|
break;
|
|
}
|
|
secTer = q;
|
|
}
|
|
}
|
|
return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
|
|
}
|
|
|
|
/**
|
|
* Returns the first root CE with a primary weight of at least p.
|
|
* Intended only for reordering group boundaries.
|
|
*/
|
|
long firstCEWithPrimaryAtLeast(long p) {
|
|
if(p == 0) { return 0; }
|
|
int index = findP(p);
|
|
if(p != (elements[index] & 0xffffff00L)) {
|
|
for(;;) {
|
|
p = elements[++index];
|
|
if((p & SEC_TER_DELTA_FLAG) == 0) {
|
|
// First primary after p. We must not be in a primary range.
|
|
assert((p & PRIMARY_STEP_MASK) == 0);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
|
|
return (p << 32) | Collation.COMMON_SEC_AND_TER_CE;
|
|
}
|
|
|
|
/**
|
|
* Returns the primary weight before p.
|
|
* p must be greater than the first root primary.
|
|
*/
|
|
long getPrimaryBefore(long p, boolean isCompressible) {
|
|
int index = findPrimary(p);
|
|
int step;
|
|
long q = elements[index];
|
|
if(p == (q & 0xffffff00L)) {
|
|
// Found p itself. Return the previous primary.
|
|
// See if p is at the end of a previous range.
|
|
step = (int)q & PRIMARY_STEP_MASK;
|
|
if(step == 0) {
|
|
// p is not at the end of a range. Look for the previous primary.
|
|
do {
|
|
p = elements[--index];
|
|
} while((p & SEC_TER_DELTA_FLAG) != 0);
|
|
return p & 0xffffff00L;
|
|
}
|
|
} else {
|
|
// p is in a range, and not at the start.
|
|
long nextElement = elements[index + 1];
|
|
assert(isEndOfPrimaryRange(nextElement));
|
|
step = (int)nextElement & PRIMARY_STEP_MASK;
|
|
}
|
|
// Return the previous range primary.
|
|
if((p & 0xffff) == 0) {
|
|
return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step);
|
|
} else {
|
|
return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step);
|
|
}
|
|
}
|
|
|
|
/** Returns the secondary weight before [p, s]. */
|
|
int getSecondaryBefore(long p, int s) {
|
|
int index;
|
|
int previousSec, sec;
|
|
if(p == 0) {
|
|
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
|
|
// Gap at the beginning of the secondary CE range.
|
|
previousSec = 0;
|
|
sec = (int)(elements[index] >> 16);
|
|
} else {
|
|
index = findPrimary(p) + 1;
|
|
previousSec = Collation.BEFORE_WEIGHT16;
|
|
sec = (int)getFirstSecTerForPrimary(index) >>> 16;
|
|
}
|
|
assert(s >= sec);
|
|
while(s > sec) {
|
|
previousSec = sec;
|
|
assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
|
|
sec = (int)(elements[index++] >> 16);
|
|
}
|
|
assert(sec == s);
|
|
return previousSec;
|
|
}
|
|
|
|
/** Returns the tertiary weight before [p, s, t]. */
|
|
int getTertiaryBefore(long p, int s, int t) {
|
|
assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0);
|
|
int index;
|
|
int previousTer;
|
|
long secTer;
|
|
if(p == 0) {
|
|
if(s == 0) {
|
|
index = (int)elements[IX_FIRST_TERTIARY_INDEX];
|
|
// Gap at the beginning of the tertiary CE range.
|
|
previousTer = 0;
|
|
} else {
|
|
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
|
|
previousTer = Collation.BEFORE_WEIGHT16;
|
|
}
|
|
secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
|
|
} else {
|
|
index = findPrimary(p) + 1;
|
|
previousTer = Collation.BEFORE_WEIGHT16;
|
|
secTer = getFirstSecTerForPrimary(index);
|
|
}
|
|
long st = ((long)s << 16) | t;
|
|
while(st > secTer) {
|
|
if((int)(secTer >> 16) == s) { previousTer = (int)secTer; }
|
|
assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
|
|
secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
|
|
}
|
|
assert(secTer == st);
|
|
return previousTer & 0xffff;
|
|
}
|
|
|
|
/**
|
|
* Finds the index of the input primary.
|
|
* p must occur as a root primary, and must not be 0.
|
|
*/
|
|
int findPrimary(long p) {
|
|
// Requirement: p must occur as a root primary.
|
|
assert((p & 0xff) == 0); // at most a 3-byte primary
|
|
int index = findP(p);
|
|
// If p is in a range, then we just assume that p is an actual primary in this range.
|
|
// (Too cumbersome/expensive to check.)
|
|
// Otherwise, it must be an exact match.
|
|
assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L));
|
|
return index;
|
|
}
|
|
|
|
/**
|
|
* Returns the primary weight after p where index=findPrimary(p).
|
|
* p must be at least the first root primary.
|
|
*/
|
|
long getPrimaryAfter(long p, int index, boolean isCompressible) {
|
|
assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1]));
|
|
long q = elements[++index];
|
|
int step;
|
|
if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) {
|
|
// Return the next primary in this range.
|
|
if((p & 0xffff) == 0) {
|
|
return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step);
|
|
} else {
|
|
return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step);
|
|
}
|
|
} else {
|
|
// Return the next primary in the list.
|
|
while((q & SEC_TER_DELTA_FLAG) != 0) {
|
|
q = elements[++index];
|
|
}
|
|
assert((q & PRIMARY_STEP_MASK) == 0);
|
|
return q;
|
|
}
|
|
}
|
|
/**
|
|
* Returns the secondary weight after [p, s] where index=findPrimary(p)
|
|
* except use index=0 for p=0.
|
|
*
|
|
* <p>Must return a weight for every root [p, s] as well as for every weight
|
|
* returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16.
|
|
*
|
|
* <p>Exception: [0, 0] is handled by the CollationBuilder:
|
|
* Both its lower and upper boundaries are special.
|
|
*/
|
|
int getSecondaryAfter(int index, int s) {
|
|
long secTer;
|
|
int secLimit;
|
|
if(index == 0) {
|
|
// primary = 0
|
|
assert(s != 0);
|
|
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
|
|
secTer = elements[index];
|
|
// Gap at the end of the secondary CE range.
|
|
secLimit = 0x10000;
|
|
} else {
|
|
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
|
|
secTer = getFirstSecTerForPrimary(index + 1);
|
|
// If this is an explicit sec/ter unit, then it will be read once more.
|
|
// Gap for secondaries of primary CEs.
|
|
secLimit = getSecondaryBoundary();
|
|
}
|
|
for(;;) {
|
|
int sec = (int)(secTer >> 16);
|
|
if(sec > s) { return sec; }
|
|
secTer = elements[++index];
|
|
if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
|
|
}
|
|
}
|
|
/**
|
|
* Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
|
|
* except use index=0 for p=0.
|
|
*
|
|
* <p>Must return a weight for every root [p, s, t] as well as for every weight
|
|
* returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16.
|
|
*
|
|
* <p>Exception: [0, 0, 0] is handled by the CollationBuilder:
|
|
* Both its lower and upper boundaries are special.
|
|
*/
|
|
int getTertiaryAfter(int index, int s, int t) {
|
|
long secTer;
|
|
int terLimit;
|
|
if(index == 0) {
|
|
// primary = 0
|
|
if(s == 0) {
|
|
assert(t != 0);
|
|
index = (int)elements[IX_FIRST_TERTIARY_INDEX];
|
|
// Gap at the end of the tertiary CE range.
|
|
terLimit = 0x4000;
|
|
} else {
|
|
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
|
|
// Gap for tertiaries of primary/secondary CEs.
|
|
terLimit = getTertiaryBoundary();
|
|
}
|
|
secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
|
|
} else {
|
|
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
|
|
secTer = getFirstSecTerForPrimary(index + 1);
|
|
// If this is an explicit sec/ter unit, then it will be read once more.
|
|
terLimit = getTertiaryBoundary();
|
|
}
|
|
long st = (((long)s & 0xffffffffL) << 16) | t;
|
|
for(;;) {
|
|
if(secTer > st) {
|
|
assert((secTer >> 16) == s);
|
|
return (int)secTer & 0xffff;
|
|
}
|
|
secTer = elements[++index];
|
|
// No tertiary greater than t for this primary+secondary.
|
|
if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
|
|
secTer &= ~SEC_TER_DELTA_FLAG;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1.
|
|
*/
|
|
private long getFirstSecTerForPrimary(int index) {
|
|
long secTer = elements[index];
|
|
if((secTer & SEC_TER_DELTA_FLAG) == 0) {
|
|
// No sec/ter delta.
|
|
return Collation.COMMON_SEC_AND_TER_CE;
|
|
}
|
|
secTer &= ~SEC_TER_DELTA_FLAG;
|
|
if(secTer > Collation.COMMON_SEC_AND_TER_CE) {
|
|
// Implied sec/ter.
|
|
return Collation.COMMON_SEC_AND_TER_CE;
|
|
}
|
|
// Explicit sec/ter below common/common.
|
|
return secTer;
|
|
}
|
|
|
|
/**
|
|
* Finds the largest index i where elements[i]<=p.
|
|
* Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
|
|
* Does not require that p is a root collator primary.
|
|
*/
|
|
private int findP(long p) {
|
|
// p need not occur as a root primary.
|
|
// For example, it might be a reordering group boundary.
|
|
assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE);
|
|
// modified binary search
|
|
int start = (int)elements[IX_FIRST_PRIMARY_INDEX];
|
|
assert(p >= elements[start]);
|
|
int limit = elements.length - 1;
|
|
assert(elements[limit] >= PRIMARY_SENTINEL);
|
|
assert(p < elements[limit]);
|
|
while((start + 1) < limit) {
|
|
// Invariant: elements[start] and elements[limit] are primaries,
|
|
// and elements[start]<=p<=elements[limit].
|
|
int i = (int)(((long)start + (long)limit) / 2);
|
|
long q = elements[i];
|
|
if((q & SEC_TER_DELTA_FLAG) != 0) {
|
|
// Find the next primary.
|
|
int j = i + 1;
|
|
for(;;) {
|
|
if(j == limit) { break; }
|
|
q = elements[j];
|
|
if((q & SEC_TER_DELTA_FLAG) == 0) {
|
|
i = j;
|
|
break;
|
|
}
|
|
++j;
|
|
}
|
|
if((q & SEC_TER_DELTA_FLAG) != 0) {
|
|
// Find the preceding primary.
|
|
j = i - 1;
|
|
for(;;) {
|
|
if(j == start) { break; }
|
|
q = elements[j];
|
|
if((q & SEC_TER_DELTA_FLAG) == 0) {
|
|
i = j;
|
|
break;
|
|
}
|
|
--j;
|
|
}
|
|
if((q & SEC_TER_DELTA_FLAG) != 0) {
|
|
// No primary between start and limit.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if(p < (q & 0xffffff00L)) { // Reset the "step" bits of a range end primary.
|
|
limit = i;
|
|
} else {
|
|
start = i;
|
|
}
|
|
}
|
|
return start;
|
|
}
|
|
|
|
private static boolean isEndOfPrimaryRange(long q) {
|
|
return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;
|
|
}
|
|
|
|
/**
|
|
* Data structure: See ICU4C source/i18n/collationrootelements.h.
|
|
*/
|
|
private long[] elements;
|
|
}
|