/* GENERATED SOURCE. DO NOT MODIFY. */ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2005-2016 International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ package android.icu.text; import static android.icu.impl.CharacterIteration.DONE32; import static android.icu.impl.CharacterIteration.next32; import static android.icu.impl.CharacterIteration.nextTrail32; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.text.CharacterIterator; import java.util.MissingResourceException; import java.util.concurrent.ConcurrentLinkedQueue; import android.icu.impl.CharacterIteration; import android.icu.impl.ICUBinary; import android.icu.impl.ICUDebug; import android.icu.impl.RBBIDataWrapper; import android.icu.impl.breakiter.BurmeseBreakEngine; import android.icu.impl.breakiter.CjkBreakEngine; import android.icu.impl.breakiter.DictionaryBreakEngine; import android.icu.impl.breakiter.KhmerBreakEngine; import android.icu.impl.breakiter.LSTMBreakEngine; import android.icu.impl.breakiter.LanguageBreakEngine; import android.icu.impl.breakiter.LaoBreakEngine; import android.icu.impl.breakiter.ThaiBreakEngine; import android.icu.impl.breakiter.UnhandledBreakEngine; import android.icu.lang.UCharacter; import android.icu.lang.UProperty; import android.icu.lang.UScript; import android.icu.util.CodePointTrie; /** * Rule Based Break Iterator * This is a port of the C++ class RuleBasedBreakIterator from ICU4C. * * @hide Only a subset of ICU is exposed in Android */ public class RuleBasedBreakIterator extends BreakIterator { //======================================================================= // Constructors & Factories //======================================================================= /** * private constructor */ private RuleBasedBreakIterator() { fDictionaryCharCount = 0; } /** * Create a break iterator from a precompiled set of break rules. * * Creating a break iterator from the binary rules is much faster than * creating one from source rules. * * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. * Binary break iterator rules are not guaranteed to be compatible between * different versions of ICU. * * @param is an input stream supplying the compiled binary rules. * @throws IOException if there is an error while reading the rules from the InputStream. * @see #compileRules(String, OutputStream) */ public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException { RuleBasedBreakIterator This = new RuleBasedBreakIterator(); This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is)); This.fLookAheadMatches = new int[This.fRData.fFTable.fLookAheadResultsSize]; return This; } /** * This factory method doesn't have an access modifier; it is only accessible in the same * package. * * Create a break iterator from a precompiled set of break rules. * * Creating a break iterator from the binary rules is much faster than * creating one from source rules. * * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. * Binary break iterator rules are not guaranteed to be compatible between * different versions of ICU. * * @param bytes a buffer supplying the compiled binary rules. * @param phraseBreaking a flag indicating if phrase breaking is required. * @throws IOException if there is an error while reading the rules from the buffer. * @see #compileRules(String, OutputStream) * @hide draft / provisional / internal are hidden on Android */ /* package-potected */ static RuleBasedBreakIterator getInstanceFromCompiledRules( ByteBuffer bytes, boolean phraseBreaking) throws IOException { RuleBasedBreakIterator instance = getInstanceFromCompiledRules(bytes); instance.fPhraseBreaking = phraseBreaking; return instance; } /** * Create a break iterator from a precompiled set of break rules. * * Creating a break iterator from the binary rules is much faster than * creating one from source rules. * * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. * Binary break iterator rules are not guaranteed to be compatible between * different versions of ICU. * * @param bytes a buffer supplying the compiled binary rules. * @throws IOException if there is an error while reading the rules from the buffer. * @see #compileRules(String, OutputStream) * @deprecated This API is ICU internal only. * @hide draft / provisional / internal are hidden on Android */ @Deprecated public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException { RuleBasedBreakIterator This = new RuleBasedBreakIterator(); This.fRData = RBBIDataWrapper.get(bytes); This.fLookAheadMatches = new int[This.fRData.fFTable.fLookAheadResultsSize]; return This; } /** * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. * @param rules The break rules to be used. */ public RuleBasedBreakIterator(String rules) { this(); try { ByteArrayOutputStream ruleOS = new ByteArrayOutputStream(); compileRules(rules, ruleOS); fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray())); fLookAheadMatches = new int[fRData.fFTable.fLookAheadResultsSize]; } catch (IOException e) { ///CLOVER:OFF // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler, // causing bogus compiled rules to be produced, but with no compile error raised. RuntimeException rte = new RuntimeException("RuleBasedBreakIterator rule compilation internal error: " + e.getMessage()); throw rte; ///CLOVER:ON } } //======================================================================= // Boilerplate //======================================================================= /** * Clones this iterator. * @return A newly-constructed RuleBasedBreakIterator with the same * behavior as this one. */ @Override public Object clone() { RuleBasedBreakIterator result; result = (RuleBasedBreakIterator)super.clone(); if (fText != null) { result.fText = (CharacterIterator)(fText.clone()); } result.fLookAheadMatches = new int[fRData.fFTable.fLookAheadResultsSize]; result.fBreakCache = result.new BreakCache(fBreakCache); result.fDictionaryCache = result.new DictionaryCache(fDictionaryCache); return result; } /** * Returns true if both BreakIterators are of the same class, have the same * rules, and iterate over the same text. */ @Override public boolean equals(Object that) { if (that == null) { return false; } if (this == that) { return true; } try { RuleBasedBreakIterator other = (RuleBasedBreakIterator) that; if (fRData != other.fRData && (fRData == null || other.fRData == null)) { return false; } if (fRData != null && other.fRData != null && (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) { return false; } if (fText == null && other.fText == null) { return true; } if (fText == null || other.fText == null || !fText.equals(other.fText)) { return false; } return fPosition == other.fPosition; } catch(ClassCastException e) { return false; } } /** * Returns the description (rules) used to create this iterator. * (In ICU4C, the same function is RuleBasedBreakIterator::getRules()) */ @Override public String toString() { String retStr = ""; if (fRData != null) { retStr = fRData.fRuleSource; } return retStr; } /** * Compute a hashcode for this BreakIterator * @return A hash code */ @Override public int hashCode() { return fRData.fRuleSource.hashCode(); } private static final int START_STATE = 1; // The state number of the starting state private static final int STOP_STATE = 0; // The state-transition value indicating "stop" // RBBIRunMode - the state machine runs an extra iteration at the beginning and end // of user text. A variable with this enum type keeps track of where we // are. The state machine only fetches user text input while in RUN mode. private static final int RBBI_START = 0; private static final int RBBI_RUN = 1; private static final int RBBI_END = 2; /** * The character iterator through which this BreakIterator accesses the text. */ private CharacterIterator fText = new java.text.StringCharacterIterator(""); /** * The rule data for this BreakIterator instance. * Not intended for public use. Declared public for testing purposes only. * @deprecated This API is ICU internal only. * @hide draft / provisional / internal are hidden on Android */ @Deprecated public RBBIDataWrapper fRData; /** * The iteration state - current position, rule status for the current position, * and whether the iterator ran off the end, yielding UBRK_DONE. * Current position is pinned to be 0 < position <= text.length. * Current position is always set to a boundary. * * The current position of the iterator. Pinned, 0 < fPosition <= text.length. * Never has the value UBRK_DONE (-1). */ private int fPosition; /** * Index of the Rule {tag} values for the most recent match. */ private int fRuleStatusIndex; /** * True when iteration has run off the end, and iterator functions should return UBRK_DONE. */ private boolean fDone; /** * Array of look-ahead tentative results. */ private int[] fLookAheadMatches; /** * Cache of previously determined boundary positions. */ private BreakCache fBreakCache = new BreakCache(); /** * Flag used to indicate if phrase breaking is required. */ private boolean fPhraseBreaking = false; /** * Counter for the number of characters encountered with the "dictionary" * flag set. Normal RBBI iterators don't use it, although the code * for updating it is live. Dictionary Based break iterators (a subclass * of us) access this field directly. * @hide draft / provisional / internal are hidden on Android */ private int fDictionaryCharCount; private DictionaryCache fDictionaryCache = new DictionaryCache(); /** * ICU debug argument name for RBBI */ private static final String RBBI_DEBUG_ARG = "rbbi"; /** * Debugging flag. Trace operation of state machine when true. */ private static final boolean TRACE = ICUDebug.enabled(RBBI_DEBUG_ARG) && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0; /** * The "default" break engine - just skips over ranges of dictionary words, * producing no breaks. Should only be used if characters need to be handled * by a dictionary but we have no dictionary implementation for them. * * Only one instance; shared by all break iterators. */ private static final UnhandledBreakEngine gUnhandledBreakEngine; /** * List of all known break engines, common for all break iterators. * Lazily updated as break engines are needed, because instantiation of * break engines is expensive. * * Important notes: *
* Of the standard types of ICU break iterators, only the word and line break
* iterator provides status values. The values are defined in
* class RuleBasedBreakIterator, and allow distinguishing between words
* that contain alphabetic letters, "words" that appear to be numbers,
* punctuation and spaces, words containing ideographic characters, and
* more. Call getRuleStatus
after obtaining a boundary
* position from next()
, previous()
, or
* any other break iterator functions that returns a boundary position.
*
* Note that getRuleStatus()
returns the value corresponding to
* current()
index even after next()
has returned DONE.
*
* @return the status from the break rule that determined the boundary * at the current iteration position. */ @Override public int getRuleStatus() { // Status records have this form: // Count N <-- fLastRuleStatusIndex points here. // Status val 0 // Status val 1 // ... // Status val N-1 <-- the value we need to return // The status values are sorted in ascending order. // This function returns the last (largest) of the array of status values. int idx = fRuleStatusIndex + fRData.fStatusTable[fRuleStatusIndex]; int tagVal = fRData.fStatusTable[idx]; return tagVal; } /** * Get the status (tag) values from the break rule(s) that determined the boundary * at the current iteration position. The values appear in the rule source * within brackets, {123}, for example. The default status value for rules * that do not explicitly provide one is zero. *
* The status values used by the standard ICU break rules are defined * as public constants in class RuleBasedBreakIterator. *
* If the size of the output array is insufficient to hold the data,
* the output will be truncated to the available length. No exception
* will be thrown.
*
* @param fillInArray an array to be filled in with the status values.
* @return The number of rule status values from the rules that determined
* the boundary at the current iteration position.
* In the event that the array is too small, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
*/
@Override
public int getRuleStatusVec(int[] fillInArray) {
int numStatusVals = fRData.fStatusTable[fRuleStatusIndex];
if (fillInArray != null) {
int numToCopy = Math.min(numStatusVals, fillInArray.length);
for (int i=0; i
* The returned CharacterIterator is a reference
* to the actual iterator being used by the BreakIterator.
* No guarantees are made about the current position
* of this iterator when it is returned; it may differ from the
* BreakIterators current position. If you need to move that
* position to examine the text, clone this function's return value first.
* @return An iterator over the text being analyzed.
*/
@Override
public CharacterIterator getText() {
return fText;
}
/**
* Set the iterator to analyze a new piece of text. This function resets
* the current iteration position to the beginning of the text.
* (The old iterator is dropped.)
*
* Caution: The supplied CharacterIterator is used
* directly by the BreakIterator, and must not be altered in any
* way by code outside of the BreakIterator.
* Doing so will lead to undefined behavior of the BreakIterator.
*
* @param newText An iterator over the text to analyze.
*/
@Override
public void setText(CharacterIterator newText) {
if (newText != null) {
fBreakCache.reset(newText.getBeginIndex(), 0);
} else {
fBreakCache.reset();
}
fDictionaryCache.reset();
fText = newText;
this.first();
}
/**
* Control debug, trace and dump options.
* @deprecated This API is ICU internal only.
* @hide draft / provisional / internal are hidden on Android
*/
@Deprecated
public static final String fDebugEnv = ICUDebug.enabled(RBBI_DEBUG_ARG) ?
ICUDebug.value(RBBI_DEBUG_ARG) : null;
private LanguageBreakEngine getLanguageBreakEngine(int c) {
// We have a dictionary character.
// Does an already instantiated break engine handle it?
// First read without synchronization, which could lead to a new language
// break engine being added and we didn't go over it.
for (LanguageBreakEngine candidate : gAllBreakEngines) {
if (candidate.handles(c)) {
return candidate;
}
}
synchronized (gAllBreakEngines) {
// Another break iterator may have instantiated the desired engine.
for (LanguageBreakEngine candidate : gAllBreakEngines) {
if (candidate.handles(c)) {
return candidate;
}
}
// The global list doesn't have an existing engine, build one.
int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
if (script == UScript.KATAKANA || script == UScript.HIRAGANA) {
// Katakana, Hiragana and Han are handled by the same dictionary engine.
// Fold them together for mapping from script -> engine.
script = UScript.HAN;
}
LanguageBreakEngine eng;
try {
switch (script) {
case UScript.THAI:
try {
eng = LSTMBreakEngine.create(script, LSTMBreakEngine.createData(script));
} catch (MissingResourceException e) {
eng = new ThaiBreakEngine();
}
break;
case UScript.LAO:
eng = new LaoBreakEngine();
break;
case UScript.MYANMAR:
try {
eng = LSTMBreakEngine.create(script, LSTMBreakEngine.createData(script));
} catch (MissingResourceException e) {
eng = new BurmeseBreakEngine();
}
break;
case UScript.KHMER:
eng = new KhmerBreakEngine();
break;
case UScript.HAN:
eng = new CjkBreakEngine(false);
break;
case UScript.HANGUL:
eng = new CjkBreakEngine(true);
break;
default:
gUnhandledBreakEngine.handleChar(c);
eng = gUnhandledBreakEngine;
break;
}
} catch (IOException e) {
eng = null;
}
if (eng != null && eng != gUnhandledBreakEngine) {
gAllBreakEngines.add(eng);
}
return eng;
} // end synchronized(gAllBreakEngines)
}
/**
* The State Machine Engine for moving forward is here.
* This function is the heart of the RBBI run time engine.
*
* Input
* fPosition, the position in the text to begin from.
* Output
* fPosition: the boundary following the starting position.
* fDictionaryCharCount the number of dictionary characters encountered.
* If > 0, the segment will be further subdivided
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
*
* @return the new iterator position
*
* A note on supplementary characters and the position of underlying
* Java CharacterIterator: Normally, a character iterator is positioned at
* the char most recently returned by next(). Within this function, when
* a supplementary char is being processed, the char iterator is left
* sitting on the trail surrogate, in the middle of the code point.
* This is different from everywhere else, where an iterator always
* points at the lead surrogate of a supplementary.
*/
private int handleNext() {
if (TRACE) {
System.out.println("Handle Next pos char state category");
}
// handleNext always sets the break tag value.
// Set the default for it.
fRuleStatusIndex = 0;
fDictionaryCharCount = 0;
// caches for quicker access
CharacterIterator text = fText;
CodePointTrie trie = fRData.fTrie;
char[] stateTable = fRData.fFTable.fTable;
int initialPosition = fPosition;
text.setIndex(initialPosition);
int result = initialPosition;
// Set up the starting char
int c = text.current();
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
c = nextTrail32(text, c);
if (c == DONE32) {
fDone = true;
return BreakIterator.DONE;
}
}
// Set the initial state for the state machine
int state = START_STATE;
int row = fRData.getRowIndex(state);
short category = 3;
int flagsState = fRData.fFTable.fFlags;
int dictStart = fRData.fFTable.fDictCategoriesStart;
int mode = RBBI_RUN;
if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
mode = RBBI_START;
if (TRACE) {
System.out.print(" " + RBBIDataWrapper.intToString(text.getIndex(), 5));
System.out.print(RBBIDataWrapper.intToHexString(c, 10));
System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
}
}
// loop until we reach the end of the text or transition to state 0
while (state != STOP_STATE) {
if (c == DONE32) {
// Reached end of input string.
if (mode == RBBI_END) {
// We have already run the loop one last time with the
// character set to the pseudo {eof} value. Now it is time
// to unconditionally bail out.
break;
}
// Run the loop one last time with the fake end-of-input character category
mode = RBBI_END;
category = 1;
}
else if (mode == RBBI_RUN) {
// Get the char category. An incoming category of 1 or 2 mens that
// we are preset for doing the beginning or end of input, and
// that we shouldn't get a category from an actual text input character.
//
// look up the current character's character category, which tells us
// which column in the state table to look at.
//
category = (short) trie.get(c);
// Check for categories that require word dictionary handling.
if (category >= dictStart) {
fDictionaryCharCount++;
}
if (TRACE) {
System.out.print(" " + RBBIDataWrapper.intToString(text.getIndex(), 5));
System.out.print(RBBIDataWrapper.intToHexString(c, 10));
System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
}
// Advance to the next character.
// If this is a beginning-of-input loop iteration, don't advance.
// The next iteration will be processing the first real input character.
c = text.next();
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
c = nextTrail32(text, c);
}
}
else {
mode = RBBI_RUN;
}
// look up a state transition in the state table
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
row = fRData.getRowIndex(state);
int accepting = stateTable[row + RBBIDataWrapper.ACCEPTING];
if (accepting == RBBIDataWrapper.ACCEPTING_UNCONDITIONAL) {
// Match found, common case
result = text.getIndex();
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
// The iterator has been left in the middle of a surrogate pair.
// We want the start of it.
result--;
}
// Remember the break status (tag) values.
fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGSIDX];
} else if (accepting > RBBIDataWrapper.ACCEPTING_UNCONDITIONAL) {
// Lookahead match is completed
int lookaheadResult = fLookAheadMatches[accepting];
if (lookaheadResult >= 0) {
fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGSIDX];
fPosition = lookaheadResult;
return lookaheadResult;
}
}
// If we are at the position of the '/' in a look-ahead (hard break) rule;
// record the current position, to be returned later, if the full rule matches.
// TODO: Move this check before the previous check of fAccepting.
// This would enable hard-break rules with no following context.
// But there are line break test failures when trying this. Investigate.
// Issue ICU-20837
int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
if (rule != 0) {
int pos = text.getIndex();
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
// The iterator has been left in the middle of a surrogate pair.
// We want the beginning of it.
pos--;
}
fLookAheadMatches[rule] = pos;
}
} // End of state machine main loop
// The state machine is done. Check whether it found a match...
// If the iterator failed to advance in the match engine force it ahead by one.
// This indicates a defect in the break rules, which should always match
// at least one character.
if (result == initialPosition) {
if (TRACE) {
System.out.println("Iterator did not move. Advancing by 1.");
}
text.setIndex(initialPosition);
next32(text);
result = text.getIndex();
fRuleStatusIndex = 0;
}
// Leave the iterator at our result position.
// (we may have advanced beyond the last accepting position chasing after
// longer matches that never completed.)
fPosition = result;
if (TRACE) {
System.out.println("result = " + result);
}
return result;
}
/**
* Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
* This locates a "Safe Position" from which the forward break rules
* will operate correctly. A Safe Position is not necessarily a boundary itself.
*
* The logic of this function is very similar to handleNext(), above, but simpler
* because the safe table does not require as many options.
*
* @param fromPosition the position in the input text to begin the iteration.
* @hide draft / provisional / internal are hidden on Android
*/
private int handleSafePrevious(int fromPosition) {
char state;
short category = 0;
int result = 0;
// caches for quicker access
CharacterIterator text = fText;
CodePointTrie trie = fRData.fTrie;
char[] stateTable = fRData.fRTable.fTable;
CISetIndex32(text, fromPosition);
if (TRACE) {
System.out.print("Handle Previous pos char state category");
}
// if we're already at the start of the text, return DONE.
if (text.getIndex() == text.getBeginIndex()) {
return BreakIterator.DONE;
}
// Set the initial state for the state machine
int c = CharacterIteration.previous32(text);
state = START_STATE;
int row = fRData.getRowIndex(state);
// loop until we reach the start of the text or transition to state 0
//
for (; c != DONE32; c = CharacterIteration.previous32(text)) {
// look up the current character's character category, which tells us
// which column in the state table to look at.
//
// And off the dictionary flag bit. For reverse iteration it is not used.
category = (short) trie.get(c);
if (TRACE) {
System.out.print(" " + RBBIDataWrapper.intToString(text.getIndex(), 5));
System.out.print(RBBIDataWrapper.intToHexString(c, 10));
System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
}
// State Transition - move machine to its next state
//
assert(category < fRData.fHeader.fCatCount);
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
row = fRData.getRowIndex(state);
if (state == STOP_STATE) {
// This is the normal exit from the lookup state machine.
// Transition to state zero means we have found a safe point.
break;
}
}
// The state machine is done.
result = text.getIndex();
if (TRACE) {
System.out.println("result = " + result);
}
return result;
}
/**
* Set the index of a CharacterIterator.
* Pin the index to the valid range range of BeginIndex <= index <= EndIndex.
* If the index points to a trail surrogate of a supplementary character, adjust it
* to the start (lead surrogate) index.
*
* @param ci A CharacterIterator to set
* @param index the index to set
* @return the resulting index, possibly pinned or adjusted.
*/
private static int CISetIndex32(CharacterIterator ci, int index) {
if (index <= ci.getBeginIndex()) {
ci.first();
} else if (index >= ci.getEndIndex()) {
ci.setIndex(ci.getEndIndex());
} else if (Character.isLowSurrogate(ci.setIndex(index))) {
if (!Character.isHighSurrogate(ci.previous())) {
ci.next();
}
}
return ci.getIndex();
}
/** DictionaryCache stores the boundaries obtained from a run of dictionary characters.
* Dictionary boundaries are moved first to this cache, then from here
* to the main BreakCache, where they may inter-leave with non-dictionary
* boundaries. The public BreakIterator API always fetches directly
* from the main BreakCache, not from here.
*
* In common situations, the number of boundaries in a single dictionary run
* should be quite small, it will be terminated by punctuation, spaces,
* or any other non-dictionary characters. The main BreakCache may end
* up with boundaries from multiple dictionary based runs.
*
* The boundaries are stored in a simple ArrayList (vector), with the
* assumption that they will be accessed sequentially.
*/
class DictionaryCache {
void reset() {
fPositionInCache = -1;
fStart = 0;
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
fBreaks.removeAllElements();
};
boolean following(int fromPos) {
if (fromPos >= fLimit || fromPos < fStart) {
fPositionInCache = -1;
return false;
}
// Sequential iteration, move from previous boundary to the following
int r = 0;
if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) {
++fPositionInCache;
if (fPositionInCache >= fBreaks.size()) {
fPositionInCache = -1;
return false;
}
r = fBreaks.elementAt(fPositionInCache);
assert(r > fromPos);
fBoundary = r;
fStatusIndex = fOtherRuleStatusIndex;
return true;
}
// Random indexing. Linear search for the boundary following the given position.
for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
r= fBreaks.elementAt(fPositionInCache);
if (r > fromPos) {
fBoundary = r;
fStatusIndex = fOtherRuleStatusIndex;
return true;
}
}
// Internal error. fStart <= fromPos < fLimit, but no cached boundary.
assert(false);
fPositionInCache = -1;
return false;
};
boolean preceding(int fromPos) {
if (fromPos <= fStart || fromPos > fLimit) {
fPositionInCache = -1;
return false;
}
if (fromPos == fLimit) {
fPositionInCache = fBreaks.size() - 1;
if (fPositionInCache >= 0) {
assert(fBreaks.elementAt(fPositionInCache) == fromPos);
}
}
int r;
if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) {
--fPositionInCache;
r = fBreaks.elementAt(fPositionInCache);
assert(r < fromPos);
fBoundary = r;
fStatusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return true;
}
if (fPositionInCache == 0) {
fPositionInCache = -1;
return false;
}
for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
r = fBreaks.elementAt(fPositionInCache);
if (r < fromPos) {
fBoundary = r;
fStatusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return true;
}
}
assert(false);
fPositionInCache = -1;
return false;
};
/**
* Populate the cache with the dictionary based boundaries within a region of text.
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param firstRuleStatus The rule status index that applies to the break at startPos
* @param otherRuleStatus The rule status index that applies to boundaries other than startPos
* @hide draft / provisional / internal are hidden on Android
*/
void populateDictionary(int startPos, int endPos,
int firstRuleStatus, int otherRuleStatus) {
if ((endPos - startPos) <= 1) {
return;
}
reset();
fFirstRuleStatusIndex = firstRuleStatus;
fOtherRuleStatusIndex = otherRuleStatus;
int rangeStart = startPos;
int rangeEnd = endPos;
int category;
int current;
int foundBreakCount = 0;
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
// any breaks within the span.
fText.setIndex(rangeStart);
int c = CharacterIteration.current32(fText);
category = (short)fRData.fTrie.get(c);
int dictStart = fRData.fFTable.fDictCategoriesStart;
while(true) {
while((current = fText.getIndex()) < rangeEnd && (category < dictStart)) {
c = CharacterIteration.next32(fText); // pre-increment
category = (short)fRData.fTrie.get(c);
}
if (current >= rangeEnd) {
break;
}
// We now have a dictionary character. Get the appropriate language object
// to deal with it.
LanguageBreakEngine lbe = getLanguageBreakEngine(c);
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != null) {
foundBreakCount += lbe.findBreaks(fText, rangeStart, rangeEnd, fBreaks, fPhraseBreaking);
}
// Reload the loop variables for the next go-round
c = CharacterIteration.current32(fText);
category = (short)fRData.fTrie.get(c);
}
// If we found breaks, ensure that the first and last entries are
// the original starting and ending position. And initialize the
// cache iteration position to the first entry.
// System.out.printf("foundBreakCount = %d%n", foundBreakCount);
if (foundBreakCount > 0) {
assert(foundBreakCount == fBreaks.size());
if (startPos < fBreaks.elementAt(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
fBreaks.offer(startPos);
}
if (endPos > fBreaks.peek()) {
fBreaks.push(endPos);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
fStart = fBreaks.elementAt(0);
fLimit = fBreaks.peek();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
// for this range will fail, and the calling code will fall back to the rule based boundaries.
}
};
DictionaryCache() {
fPositionInCache = -1;
fBreaks = new DictionaryBreakEngine.DequeI();
}
/**
* copy constructor. Used by RuleBasedBreakIterator.clone().
*
* @param src the source object to be copied.
*/
DictionaryCache(DictionaryCache src) {
try {
fBreaks = (DictionaryBreakEngine.DequeI)src.fBreaks.clone();
}
catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
fPositionInCache = src.fPositionInCache;
fStart = src.fStart;
fLimit = src.fLimit;
fFirstRuleStatusIndex = src.fFirstRuleStatusIndex;
fOtherRuleStatusIndex = src.fOtherRuleStatusIndex;
fBoundary = src.fBoundary;
fStatusIndex = src.fStatusIndex;
}
// A data structure containing the boundaries themselves. Essentially a vector of raw ints.
DictionaryBreakEngine.DequeI fBreaks;
int fPositionInCache; // Index in fBreaks of last boundary returned by following()
// // or preceding(). Optimizes sequential access.
int fStart; // Text position of first boundary in cache.
int fLimit; // Last boundary in cache. Which is the limit of the
// // text segment being handled by the dictionary.
int fFirstRuleStatusIndex; // Rule status info for first boundary.
int fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
int fBoundary; // Current boundary. Set by preceding(), following().
int fStatusIndex; // Current rule status index. Set by preceding, following().
};
/*
* class BreakCache
*
* Cache of break boundary positions and rule status values.
* Break iterator API functions, next(), previous(), etc., will use cached results
* when possible, and otherwise cache new results as they are obtained.
*
* Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
*
* The cache is implemented as a single circular buffer.
*/
/*
* size of the circular cache buffer.
*/
class BreakCache {
BreakCache() {
reset();
};
void reset(int pos, int ruleStatus) {
fStartBufIdx = 0;
fEndBufIdx = 0;
fTextIdx = pos;
fBufIdx = 0;
fBoundaries[0] = pos;
fStatuses[0] = (short)ruleStatus;
}
void reset() {reset(0, 0); };
void next() {
if (fBufIdx == fEndBufIdx) {
fDone = !populateFollowing();
fPosition = fTextIdx;
fRuleStatusIndex = fStatuses[fBufIdx];
} else {
fBufIdx = modChunkSize(fBufIdx + 1);
fTextIdx = fPosition = fBoundaries[fBufIdx];
fRuleStatusIndex = fStatuses[fBufIdx];
}
};
void previous() {
int initialBufIdx = fBufIdx;
if (fBufIdx == fStartBufIdx) {
// At start of cache. Prepend to it.
populatePreceding();
} else {
// Cache already holds the next boundary
fBufIdx = modChunkSize(fBufIdx - 1);
fTextIdx = fBoundaries[fBufIdx];
}
fDone = (fBufIdx == initialBufIdx);
fPosition = fTextIdx;
fRuleStatusIndex = fStatuses[fBufIdx];
return;
};
// Move the iteration state to the position following the startPosition.
// Input position must be pinned to the input length.
void following(int startPos) {
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) {
// startPos is in the cache. Do a next() from that position.
// TODO: an awkward set of interactions with bi->fDone
// seek() does not clear it; it can't because of interactions with populateNear().
// next() does not clear it in the fast-path case, where everything matters. Maybe it should.
// So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
fDone = false;
next();
}
};
void preceding(int startPos) {
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) {
if (startPos == fTextIdx) {
previous();
} else {
// seek() leaves the BreakCache positioned at the preceding boundary
// if the requested position is between two boundaries.
// current() pushes the BreakCache position out to the BreakIterator itself.
assert(startPos > fTextIdx);
current();
}
}
return;
};
/**
* Update the state of the public BreakIterator (fBI) to reflect the
* current state of the break iterator cache (this).
*/
int current() {
fPosition = fTextIdx;
fRuleStatusIndex = fStatuses[fBufIdx];
fDone = false;
return fTextIdx;
};
/**
* Add boundaries to the cache near the specified position.
* The given position need not be a boundary itself.
* The input position must be within the range of the text, and
* on a code point boundary.
* If the requested position is a break boundary, leave the iteration
* position on it.
* If the requested position is not a boundary, leave the iteration
* position on the preceding boundary and include both the the
* preceding and following boundaries in the cache.
* Additional boundaries, either preceding or following, may be added
* to the cache as a side effect.
*
* Return false if the operation failed.
*/
boolean populateNear(int position) {
assert(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
// Add boundaries to the cache near the specified position.
// The given position need not be a boundary itself.
// The input position must be within the range of the text, and
// on a code point boundary.
// If the requested position is a break boundary, leave the iteration
// position on it.
// If the requested position is not a boundary, leave the iteration
// position on the preceding boundary and include both the
// preceding and following boundaries in the cache.
// Additional boundaries, either preceding or following, may be added
// to the cache as a side effect.
// If the requested position is not near already cached positions, clear the existing cache,
// find a near-by boundary and begin new cache contents there.
// Threshold for a text position to be considered near to existing cache contents.
// TODO: See issue ICU-22024 "perf tuning of Cache needed."
// This value is subject to change. See the ticket for more details.
final int CACHE_NEAR = 15;
int startOfText = fText.getBeginIndex();
int aBoundary = -1;
int ruleStatusIndex = 0;
boolean retainCache = false;
if ((position > fBoundaries[fStartBufIdx] - CACHE_NEAR) && position < (fBoundaries[fEndBufIdx] + CACHE_NEAR)) {
// Requested position is near the existing cache. Retain it.
retainCache = true;
} else if (position <= startOfText + CACHE_NEAR) {
// Requested position is near the start of the text. Fill cache from start, skipping
// the need to find a safe point.
retainCache = false;
aBoundary = startOfText;
} else {
// Requested position is not near the existing cache.
// Find a safe point to refill the cache from.
int backupPos = handleSafePrevious(position);
if (fBoundaries[fEndBufIdx] < position && fBoundaries[fEndBufIdx] >= (backupPos - CACHE_NEAR)) {
// The requested position is beyond the end of the existing cache, but the
// reverse rules produced a position near or before the cached region.
// Retain the existing cache, and fill from the end of it.
retainCache = true;
} else if (backupPos < startOfText + CACHE_NEAR) {
// The safe reverse rules moved us to near the start of text.
// Take that (index 0) as the backup boundary, avoiding the complication
// (in the following block) of moving forward from the safe point to a known boundary.
//
// Retain the cache if it begins not too far from the requested position.
aBoundary = startOfText;
retainCache = (fBoundaries[fStartBufIdx] <= (position + CACHE_NEAR));
} else {
// The safe reverse rules produced a position that is neither near the existing
// cache, nor near the start of text.
// Advance to the boundary following.
// There is a complication: the safe reverse rules identify pairs of code points
// that are safe. If advancing from the safe point moves forwards by less than
// two code points, we need to advance one more time to ensure that the boundary
// is good, including a correct rules status value.
//
retainCache = false;
fPosition = backupPos;
aBoundary = handleNext();
if (aBoundary == backupPos + 1 ||
(aBoundary == backupPos + 2 &&
Character.isHighSurrogate(fText.setIndex(backupPos)) &&
Character.isLowSurrogate(fText.next()))) {
// The initial handleNext() only advanced by a single code point. Go again.
// Safe rules identify safe pairs.
aBoundary = handleNext();
}
if (aBoundary == BreakIterator.DONE) {
aBoundary = fText.getEndIndex();
}
ruleStatusIndex = fRuleStatusIndex;
}
}
if (!retainCache) {
assert(aBoundary != -1);
reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
}
// Fill in boundaries between existing cache content and the new requested position.
if (fBoundaries[fEndBufIdx] < position) {
// The last position in the cache precedes the requested position.
// Add following position(s) to the cache.
while (fBoundaries[fEndBufIdx] < position) {
if (!populateFollowing()) {
assert false;
return false;
}
}
fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
previous();
}
return true;
}
if (fBoundaries[fStartBufIdx] > position) {
// The first position in the cache is beyond the requested position.
// back up more until we get a boundary <= the requested position.
while (fBoundaries[fStartBufIdx] > position) {
populatePreceding();
}
fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
next();
}
if (fTextIdx > position) {
// If position is not itself a boundary, the next() loop above will overshoot.
// Back up one, leaving cache position at the boundary preceding the requested position.
previous();
}
return true;
}
assert fTextIdx == position;
return true;
};
/**
* Add boundary(s) to the cache following the current last boundary.
* Return false if at the end of the text, and no more boundaries can be added.
* Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
*/
boolean populateFollowing() {
int fromPosition = fBoundaries[fEndBufIdx];
int fromRuleStatusIdx = fStatuses[fEndBufIdx];
int pos = 0;
int ruleStatusIdx = 0;
if (fDictionaryCache.following(fromPosition)) {
addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
return true;
}
fPosition = fromPosition;
pos = handleNext();
if (pos == BreakIterator.DONE) {
return false;
}
ruleStatusIdx = fRuleStatusIndex;
if (fDictionaryCharCount > 0) {
// The text segment obtained from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
fDictionaryCache.populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
if (fDictionaryCache.following(fromPosition)) {
addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
return true;
// TODO: may want to move a sizable chunk of the dictionary cache to the break cache at this point.
// But be careful with interactions with populateNear().
}
}
// Rule based segment did not include dictionary characters.
// Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
// meaning that we didn't take the return, above.
// Add its end point to the cache.
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
// Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
// (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
//
for (int count=0; count<6; ++count) {
pos = handleNext();
if (pos == BreakIterator.DONE || fDictionaryCharCount > 0) {
break;
}
addFollowing(pos, fRuleStatusIndex, RetainCachePosition);
}
return true;
};
/**
* Add one or more boundaries to the cache preceding the first currently cached boundary.
* Leave the iteration position on the first added boundary.
* Return false if no boundaries could be added (if at the start of the text.)
*/
boolean populatePreceding() {
int textBegin = fText.getBeginIndex();
int fromPosition = fBoundaries[fStartBufIdx];
if (fromPosition == textBegin) {
return false;
}
int position = textBegin;
int positionStatusIdx = 0;
if (fDictionaryCache.preceding(fromPosition)) {
addPreceding(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
return true;
}
int backupPosition = fromPosition;
// Find a boundary somewhere preceding the first already-cached boundary
do {
backupPosition = backupPosition - 30;
if (backupPosition <= textBegin) {
backupPosition = textBegin;
} else {
backupPosition = handleSafePrevious(backupPosition);
}
if (backupPosition == BreakIterator.DONE || backupPosition == textBegin) {
position = textBegin;
positionStatusIdx = 0;
} else {
// Advance to the boundary following the backup position.
// There is a complication: the safe reverse rules identify pairs of code points
// that are safe. If advancing from the safe point moves forwards by less than
// two code points, we need to advance one more time to ensure that the boundary
// is good, including a correct rules status value.
//
fPosition = backupPosition; // TODO: pass starting position in a clearer way.
position = handleNext();
if (position == backupPosition + 1 ||
(position == backupPosition + 2 &&
Character.isHighSurrogate(fText.setIndex(backupPosition)) &&
Character.isLowSurrogate(fText.next()))) {
// The initial handleNext() only advanced by a single code point. Go again.
// Safe rules identify safe pairs.
position = handleNext();
}
positionStatusIdx = fRuleStatusIndex;
}
} while (position >= fromPosition);
// Find boundaries between the one we just located and the first already-cached boundary
// Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer.
fSideBuffer.removeAllElements();
fSideBuffer.push(position);
fSideBuffer.push(positionStatusIdx);
do {
int prevPosition = fPosition = position;
int prevStatusIdx = positionStatusIdx;
position = handleNext();
positionStatusIdx = fRuleStatusIndex;
if (position == BreakIterator.DONE) {
break;
}
boolean segmentHandledByDictionary = false;
if (fDictionaryCharCount != 0) {
// Segment from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
int dictSegEndPosition = position;
fDictionaryCache.populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
while (fDictionaryCache.following(prevPosition)) {
position = fDictionaryCache.fBoundary;
positionStatusIdx = fDictionaryCache.fStatusIndex;
segmentHandledByDictionary = true;
assert(position > prevPosition);
if (position >= fromPosition) {
break;
}
assert(position <= dictSegEndPosition);
fSideBuffer.push(position);
fSideBuffer.push(positionStatusIdx);
prevPosition = position;
}
assert(position==dictSegEndPosition || position>=fromPosition);
}
if (!segmentHandledByDictionary && position < fromPosition) {
fSideBuffer.push(position);
fSideBuffer.push(positionStatusIdx);
}
} while (position < fromPosition);
// Move boundaries from the side buffer to the main circular buffer.
boolean success = false;
if (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.pop();
position = fSideBuffer.pop();
addPreceding(position, positionStatusIdx, UpdateCachePosition);
success = true;
}
while (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.pop();
position = fSideBuffer.pop();
if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
// No space in circular buffer to hold a new preceding result while
// also retaining the current cache (iteration) position.
// Bailing out is safe; the cache will refill again if needed.
break;
}
}
return success;
};
static final boolean RetainCachePosition = false;
static final boolean UpdateCachePosition = true;
/**
* Add the boundary following the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
void addFollowing(int position, int ruleStatusIdx, boolean update) {
assert(position > fBoundaries[fEndBufIdx]);
assert(ruleStatusIdx <= Short.MAX_VALUE);
int nextIdx = modChunkSize(fEndBufIdx + 1);
if (nextIdx == fStartBufIdx) {
fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = (short)ruleStatusIdx;
fEndBufIdx = nextIdx;
if (update == UpdateCachePosition) {
// Set current position to the newly added boundary.
fBufIdx = nextIdx;
fTextIdx = position;
} else {
// Retaining the original cache position.
// Check if the added boundary wraps around the buffer, and would over-write the original position.
// It's the responsibility of callers of this function to not add too many.
assert(nextIdx != fBufIdx);
}
};
/**
* Add the boundary preceding the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
boolean addPreceding(int position, int ruleStatusIdx, boolean update) {
assert(position < fBoundaries[fStartBufIdx]);
assert(ruleStatusIdx <= Short.MAX_VALUE);
int nextIdx = modChunkSize(fStartBufIdx - 1);
if (nextIdx == fEndBufIdx) {
if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
// Failure. The insertion of the new boundary would claim the buffer position that is the
// current iteration position. And we also want to retain the current iteration position.
// (The buffer is already completely full of entries that precede the iteration position.)
return false;
}
fEndBufIdx = modChunkSize(fEndBufIdx - 1);
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = (short)ruleStatusIdx;
fStartBufIdx = nextIdx;
if (update == UpdateCachePosition) {
fBufIdx = nextIdx;
fTextIdx = position;
}
return true;
};
/**
* Set the cache position to the specified position, or, if the position
* falls between to cached boundaries, to the preceding boundary.
* Fails if the requested position is outside of the range of boundaries currently held by the cache.
* The startPosition must be on a code point boundary.
*
* Return true if successful, false if the specified position is after
* the last cached boundary or before the first.
*/
boolean seek(int pos) {
if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
return false;
}
if (pos == fBoundaries[fStartBufIdx]) {
// Common case: seek(0), from BreakIterator::first()
fBufIdx = fStartBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return true;
}
if (pos == fBoundaries[fEndBufIdx]) {
fBufIdx = fEndBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return true;
}
int min = fStartBufIdx;
int max = fEndBufIdx;
while (min != max) {
int probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
probe = modChunkSize(probe);
if (fBoundaries[probe] > pos) {
max = probe;
} else {
min = modChunkSize(probe + 1);
}
}
assert(fBoundaries[max] > pos);
fBufIdx = modChunkSize(max - 1);
fTextIdx = fBoundaries[fBufIdx];
assert(fTextIdx <= pos);
return true;
};
/**
* copy constructor, used from RuleBasedBreakIterator.clone().
*
* @param src
*/
BreakCache(BreakCache src) {
fStartBufIdx = src.fStartBufIdx;
fEndBufIdx = src.fEndBufIdx;
fTextIdx = src.fTextIdx;
fBufIdx = src.fBufIdx;
fBoundaries = src.fBoundaries.clone();
fStatuses = src.fStatuses.clone();
fSideBuffer = new DictionaryBreakEngine.DequeI(); // Transient, no need to clone contents.
}
void dumpCache() {
System.out.printf("fTextIdx:%d fBufIdx:%d%n", fTextIdx, fBufIdx);
for (int i=fStartBufIdx; ; i=modChunkSize(i+1)) {
System.out.printf("%d %d%n", i, fBoundaries[i]);
if (i == fEndBufIdx) {
break;
}
}
};
private final int modChunkSize(int index) { return index & (CACHE_SIZE - 1); };
static final int CACHE_SIZE = 128;
// static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
int fStartBufIdx;
int fEndBufIdx; // inclusive
int fTextIdx;
int fBufIdx;
int[] fBoundaries = new int[CACHE_SIZE];
short[] fStatuses = new short[CACHE_SIZE];
DictionaryBreakEngine.DequeI fSideBuffer = new DictionaryBreakEngine.DequeI();
};
}