314 lines
12 KiB
Java
314 lines
12 KiB
Java
/* GENERATED SOURCE. DO NOT MODIFY. */
|
|
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 2013-2014, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
* ContractionsAndExpansions.java, ported from collationsets.h/.cpp
|
|
*
|
|
* C++ version created on: 2013feb09
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
package android.icu.impl.coll;
|
|
|
|
import java.util.Iterator;
|
|
|
|
import android.icu.impl.Trie2;
|
|
import android.icu.text.UnicodeSet;
|
|
import android.icu.util.CharsTrie;
|
|
import android.icu.util.CharsTrie.Entry;
|
|
|
|
/**
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public final class ContractionsAndExpansions {
|
|
// C++: The following fields are @internal, only public for access by callback.
|
|
private CollationData data;
|
|
private UnicodeSet contractions;
|
|
private UnicodeSet expansions;
|
|
private CESink sink;
|
|
private boolean addPrefixes;
|
|
private int checkTailored = 0; // -1: collected tailored +1: exclude tailored
|
|
private UnicodeSet tailored = new UnicodeSet();
|
|
private UnicodeSet ranges;
|
|
private StringBuilder unreversedPrefix = new StringBuilder();
|
|
private String suffix;
|
|
private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH];
|
|
|
|
/**
|
|
* @hide Only a subset of ICU is exposed in Android
|
|
*/
|
|
public static interface CESink {
|
|
void handleCE(long ce);
|
|
void handleExpansion(long ces[], int start, int length);
|
|
}
|
|
|
|
public ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes) {
|
|
contractions = con;
|
|
expansions = exp;
|
|
sink = s;
|
|
addPrefixes = prefixes;
|
|
}
|
|
|
|
public void forData(CollationData d) {
|
|
// Add all from the data, can be tailoring or base.
|
|
if (d.base != null) {
|
|
checkTailored = -1;
|
|
}
|
|
data = d;
|
|
Iterator<Trie2.Range> trieIterator = data.trie.iterator();
|
|
Trie2.Range range;
|
|
while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
|
|
enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this);
|
|
}
|
|
if (d.base == null) {
|
|
return;
|
|
}
|
|
// Add all from the base data but only for un-tailored code points.
|
|
tailored.freeze();
|
|
checkTailored = 1;
|
|
data = d.base;
|
|
trieIterator = data.trie.iterator();
|
|
while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
|
|
enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this);
|
|
}
|
|
}
|
|
|
|
private void enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne) {
|
|
if (cne.checkTailored == 0) {
|
|
// There is no tailoring.
|
|
// No need to collect nor check the tailored set.
|
|
} else if (cne.checkTailored < 0) {
|
|
// Collect the set of code points with mappings in the tailoring data.
|
|
if (ce32 == Collation.FALLBACK_CE32) {
|
|
return; // fallback to base, not tailored
|
|
} else {
|
|
cne.tailored.add(start, end);
|
|
}
|
|
// checkTailored > 0: Exclude tailored ranges from the base data enumeration.
|
|
} else if (start == end) {
|
|
if (cne.tailored.contains(start)) {
|
|
return;
|
|
}
|
|
} else if (cne.tailored.containsSome(start, end)) {
|
|
if (cne.ranges == null) {
|
|
cne.ranges = new UnicodeSet();
|
|
}
|
|
cne.ranges.set(start, end).removeAll(cne.tailored);
|
|
int count = cne.ranges.getRangeCount();
|
|
for (int i = 0; i < count; ++i) {
|
|
cne.handleCE32(cne.ranges.getRangeStart(i), cne.ranges.getRangeEnd(i), ce32);
|
|
}
|
|
}
|
|
cne.handleCE32(start, end, ce32);
|
|
}
|
|
|
|
public void forCodePoint(CollationData d, int c) {
|
|
int ce32 = d.getCE32(c);
|
|
if (ce32 == Collation.FALLBACK_CE32) {
|
|
d = d.base;
|
|
ce32 = d.getCE32(c);
|
|
}
|
|
data = d;
|
|
handleCE32(c, c, ce32);
|
|
}
|
|
|
|
private void handleCE32(int start, int end, int ce32) {
|
|
for (;;) {
|
|
if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE) {
|
|
// !isSpecialCE32()
|
|
if (sink != null) {
|
|
sink.handleCE(Collation.ceFromSimpleCE32(ce32));
|
|
}
|
|
return;
|
|
}
|
|
switch (Collation.tagFromCE32(ce32)) {
|
|
case Collation.FALLBACK_TAG:
|
|
return;
|
|
case Collation.RESERVED_TAG_3:
|
|
case Collation.BUILDER_DATA_TAG:
|
|
case Collation.LEAD_SURROGATE_TAG:
|
|
// Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C.
|
|
throw new AssertionError(
|
|
String.format("Unexpected CE32 tag type %d for ce32=0x%08x",
|
|
Collation.tagFromCE32(ce32), ce32));
|
|
case Collation.LONG_PRIMARY_TAG:
|
|
if (sink != null) {
|
|
sink.handleCE(Collation.ceFromLongPrimaryCE32(ce32));
|
|
}
|
|
return;
|
|
case Collation.LONG_SECONDARY_TAG:
|
|
if (sink != null) {
|
|
sink.handleCE(Collation.ceFromLongSecondaryCE32(ce32));
|
|
}
|
|
return;
|
|
case Collation.LATIN_EXPANSION_TAG:
|
|
if (sink != null) {
|
|
ces[0] = Collation.latinCE0FromCE32(ce32);
|
|
ces[1] = Collation.latinCE1FromCE32(ce32);
|
|
sink.handleExpansion(ces, 0, 2);
|
|
}
|
|
// Optimization: If we have a prefix,
|
|
// then the relevant strings have been added already.
|
|
if (unreversedPrefix.length() == 0) {
|
|
addExpansions(start, end);
|
|
}
|
|
return;
|
|
case Collation.EXPANSION32_TAG:
|
|
if (sink != null) {
|
|
int idx = Collation.indexFromCE32(ce32);
|
|
int length = Collation.lengthFromCE32(ce32);
|
|
for (int i = 0; i < length; ++i) {
|
|
ces[i] = Collation.ceFromCE32(data.ce32s[idx + i]);
|
|
}
|
|
sink.handleExpansion(ces, 0, length);
|
|
}
|
|
// Optimization: If we have a prefix,
|
|
// then the relevant strings have been added already.
|
|
if (unreversedPrefix.length() == 0) {
|
|
addExpansions(start, end);
|
|
}
|
|
return;
|
|
case Collation.EXPANSION_TAG:
|
|
if (sink != null) {
|
|
int idx = Collation.indexFromCE32(ce32);
|
|
int length = Collation.lengthFromCE32(ce32);
|
|
sink.handleExpansion(data.ces, idx, length);
|
|
}
|
|
// Optimization: If we have a prefix,
|
|
// then the relevant strings have been added already.
|
|
if (unreversedPrefix.length() == 0) {
|
|
addExpansions(start, end);
|
|
}
|
|
return;
|
|
case Collation.PREFIX_TAG:
|
|
handlePrefixes(start, end, ce32);
|
|
return;
|
|
case Collation.CONTRACTION_TAG:
|
|
handleContractions(start, end, ce32);
|
|
return;
|
|
case Collation.DIGIT_TAG:
|
|
// Fetch the non-numeric-collation CE32 and continue.
|
|
ce32 = data.ce32s[Collation.indexFromCE32(ce32)];
|
|
break;
|
|
case Collation.U0000_TAG:
|
|
assert (start == 0 && end == 0);
|
|
// Fetch the normal ce32 for U+0000 and continue.
|
|
ce32 = data.ce32s[0];
|
|
break;
|
|
case Collation.HANGUL_TAG:
|
|
if (sink != null) {
|
|
// TODO: This should be optimized,
|
|
// especially if [start..end] is the complete Hangul range. (assert that)
|
|
UTF16CollationIterator iter = new UTF16CollationIterator(data);
|
|
StringBuilder hangul = new StringBuilder(1);
|
|
for (int c = start; c <= end; ++c) {
|
|
hangul.setLength(0);
|
|
hangul.appendCodePoint(c);
|
|
iter.setText(false, hangul, 0);
|
|
int length = iter.fetchCEs();
|
|
// Ignore the terminating non-CE.
|
|
assert (length >= 2 && iter.getCE(length - 1) == Collation.NO_CE);
|
|
sink.handleExpansion(iter.getCEs(), 0, length - 1);
|
|
}
|
|
}
|
|
// Optimization: If we have a prefix,
|
|
// then the relevant strings have been added already.
|
|
if (unreversedPrefix.length() == 0) {
|
|
addExpansions(start, end);
|
|
}
|
|
return;
|
|
case Collation.OFFSET_TAG:
|
|
// Currently no need to send offset CEs to the sink.
|
|
return;
|
|
case Collation.IMPLICIT_TAG:
|
|
// Currently no need to send implicit CEs to the sink.
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
private void handlePrefixes(int start, int end, int ce32) {
|
|
int index = Collation.indexFromCE32(ce32);
|
|
ce32 = data.getCE32FromContexts(index); // Default if no prefix match.
|
|
handleCE32(start, end, ce32);
|
|
if (!addPrefixes) {
|
|
return;
|
|
}
|
|
CharsTrie.Iterator prefixes = new CharsTrie(data.contexts, index + 2).iterator();
|
|
while (prefixes.hasNext()) {
|
|
Entry e = prefixes.next();
|
|
setPrefix(e.chars);
|
|
// Prefix/pre-context mappings are special kinds of contractions
|
|
// that always yield expansions.
|
|
addStrings(start, end, contractions);
|
|
addStrings(start, end, expansions);
|
|
handleCE32(start, end, e.value);
|
|
}
|
|
resetPrefix();
|
|
}
|
|
|
|
void handleContractions(int start, int end, int ce32) {
|
|
int index = Collation.indexFromCE32(ce32);
|
|
if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
|
|
// No match on the single code point.
|
|
// We are underneath a prefix, and the default mapping is just
|
|
// a fallback to the mappings for a shorter prefix.
|
|
assert (unreversedPrefix.length() != 0);
|
|
} else {
|
|
ce32 = data.getCE32FromContexts(index); // Default if no suffix match.
|
|
assert (!Collation.isContractionCE32(ce32));
|
|
handleCE32(start, end, ce32);
|
|
}
|
|
CharsTrie.Iterator suffixes = new CharsTrie(data.contexts, index + 2).iterator();
|
|
while (suffixes.hasNext()) {
|
|
Entry e = suffixes.next();
|
|
suffix = e.chars.toString();
|
|
addStrings(start, end, contractions);
|
|
if (unreversedPrefix.length() != 0) {
|
|
addStrings(start, end, expansions);
|
|
}
|
|
handleCE32(start, end, e.value);
|
|
}
|
|
suffix = null;
|
|
}
|
|
|
|
void addExpansions(int start, int end) {
|
|
if (unreversedPrefix.length() == 0 && suffix == null) {
|
|
if (expansions != null) {
|
|
expansions.add(start, end);
|
|
}
|
|
} else {
|
|
addStrings(start, end, expansions);
|
|
}
|
|
}
|
|
|
|
void addStrings(int start, int end, UnicodeSet set) {
|
|
if (set == null) {
|
|
return;
|
|
}
|
|
StringBuilder s = new StringBuilder(unreversedPrefix);
|
|
do {
|
|
s.appendCodePoint(start);
|
|
if (suffix != null) {
|
|
s.append(suffix);
|
|
}
|
|
set.add(s);
|
|
s.setLength(unreversedPrefix.length());
|
|
} while (++start <= end);
|
|
}
|
|
|
|
// Prefixes are reversed in the data structure.
|
|
private void setPrefix(CharSequence pfx) {
|
|
unreversedPrefix.setLength(0);
|
|
unreversedPrefix.append(pfx).reverse();
|
|
}
|
|
|
|
private void resetPrefix() {
|
|
unreversedPrefix.setLength(0);
|
|
}
|
|
}
|