/* GENERATED SOURCE. DO NOT MODIFY. */ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2005-2016 International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ package android.icu.text; import static android.icu.impl.CharacterIteration.DONE32; import static android.icu.impl.CharacterIteration.next32; import static android.icu.impl.CharacterIteration.nextTrail32; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.text.CharacterIterator; import java.util.MissingResourceException; import java.util.concurrent.ConcurrentLinkedQueue; import android.icu.impl.CharacterIteration; import android.icu.impl.ICUBinary; import android.icu.impl.ICUDebug; import android.icu.impl.RBBIDataWrapper; import android.icu.impl.breakiter.BurmeseBreakEngine; import android.icu.impl.breakiter.CjkBreakEngine; import android.icu.impl.breakiter.DictionaryBreakEngine; import android.icu.impl.breakiter.KhmerBreakEngine; import android.icu.impl.breakiter.LSTMBreakEngine; import android.icu.impl.breakiter.LanguageBreakEngine; import android.icu.impl.breakiter.LaoBreakEngine; import android.icu.impl.breakiter.ThaiBreakEngine; import android.icu.impl.breakiter.UnhandledBreakEngine; import android.icu.lang.UCharacter; import android.icu.lang.UProperty; import android.icu.lang.UScript; import android.icu.util.CodePointTrie; /** * Rule Based Break Iterator * This is a port of the C++ class RuleBasedBreakIterator from ICU4C. * * @hide Only a subset of ICU is exposed in Android */ public class RuleBasedBreakIterator extends BreakIterator { //======================================================================= // Constructors & Factories //======================================================================= /** * private constructor */ private RuleBasedBreakIterator() { fDictionaryCharCount = 0; } /** * Create a break iterator from a precompiled set of break rules. * * Creating a break iterator from the binary rules is much faster than * creating one from source rules. * * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. * Binary break iterator rules are not guaranteed to be compatible between * different versions of ICU. * * @param is an input stream supplying the compiled binary rules. * @throws IOException if there is an error while reading the rules from the InputStream. * @see #compileRules(String, OutputStream) */ public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException { RuleBasedBreakIterator This = new RuleBasedBreakIterator(); This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is)); This.fLookAheadMatches = new int[This.fRData.fFTable.fLookAheadResultsSize]; return This; } /** * This factory method doesn't have an access modifier; it is only accessible in the same * package. * * Create a break iterator from a precompiled set of break rules. * * Creating a break iterator from the binary rules is much faster than * creating one from source rules. * * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. * Binary break iterator rules are not guaranteed to be compatible between * different versions of ICU. * * @param bytes a buffer supplying the compiled binary rules. * @param phraseBreaking a flag indicating if phrase breaking is required. * @throws IOException if there is an error while reading the rules from the buffer. * @see #compileRules(String, OutputStream) * @hide draft / provisional / internal are hidden on Android */ /* package-potected */ static RuleBasedBreakIterator getInstanceFromCompiledRules( ByteBuffer bytes, boolean phraseBreaking) throws IOException { RuleBasedBreakIterator instance = getInstanceFromCompiledRules(bytes); instance.fPhraseBreaking = phraseBreaking; return instance; } /** * Create a break iterator from a precompiled set of break rules. * * Creating a break iterator from the binary rules is much faster than * creating one from source rules. * * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. * Binary break iterator rules are not guaranteed to be compatible between * different versions of ICU. * * @param bytes a buffer supplying the compiled binary rules. * @throws IOException if there is an error while reading the rules from the buffer. * @see #compileRules(String, OutputStream) * @deprecated This API is ICU internal only. * @hide draft / provisional / internal are hidden on Android */ @Deprecated public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException { RuleBasedBreakIterator This = new RuleBasedBreakIterator(); This.fRData = RBBIDataWrapper.get(bytes); This.fLookAheadMatches = new int[This.fRData.fFTable.fLookAheadResultsSize]; return This; } /** * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. * @param rules The break rules to be used. */ public RuleBasedBreakIterator(String rules) { this(); try { ByteArrayOutputStream ruleOS = new ByteArrayOutputStream(); compileRules(rules, ruleOS); fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray())); fLookAheadMatches = new int[fRData.fFTable.fLookAheadResultsSize]; } catch (IOException e) { ///CLOVER:OFF // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler, // causing bogus compiled rules to be produced, but with no compile error raised. RuntimeException rte = new RuntimeException("RuleBasedBreakIterator rule compilation internal error: " + e.getMessage()); throw rte; ///CLOVER:ON } } //======================================================================= // Boilerplate //======================================================================= /** * Clones this iterator. * @return A newly-constructed RuleBasedBreakIterator with the same * behavior as this one. */ @Override public Object clone() { RuleBasedBreakIterator result; result = (RuleBasedBreakIterator)super.clone(); if (fText != null) { result.fText = (CharacterIterator)(fText.clone()); } result.fLookAheadMatches = new int[fRData.fFTable.fLookAheadResultsSize]; result.fBreakCache = result.new BreakCache(fBreakCache); result.fDictionaryCache = result.new DictionaryCache(fDictionaryCache); return result; } /** * Returns true if both BreakIterators are of the same class, have the same * rules, and iterate over the same text. */ @Override public boolean equals(Object that) { if (that == null) { return false; } if (this == that) { return true; } try { RuleBasedBreakIterator other = (RuleBasedBreakIterator) that; if (fRData != other.fRData && (fRData == null || other.fRData == null)) { return false; } if (fRData != null && other.fRData != null && (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) { return false; } if (fText == null && other.fText == null) { return true; } if (fText == null || other.fText == null || !fText.equals(other.fText)) { return false; } return fPosition == other.fPosition; } catch(ClassCastException e) { return false; } } /** * Returns the description (rules) used to create this iterator. * (In ICU4C, the same function is RuleBasedBreakIterator::getRules()) */ @Override public String toString() { String retStr = ""; if (fRData != null) { retStr = fRData.fRuleSource; } return retStr; } /** * Compute a hashcode for this BreakIterator * @return A hash code */ @Override public int hashCode() { return fRData.fRuleSource.hashCode(); } private static final int START_STATE = 1; // The state number of the starting state private static final int STOP_STATE = 0; // The state-transition value indicating "stop" // RBBIRunMode - the state machine runs an extra iteration at the beginning and end // of user text. A variable with this enum type keeps track of where we // are. The state machine only fetches user text input while in RUN mode. private static final int RBBI_START = 0; private static final int RBBI_RUN = 1; private static final int RBBI_END = 2; /** * The character iterator through which this BreakIterator accesses the text. */ private CharacterIterator fText = new java.text.StringCharacterIterator(""); /** * The rule data for this BreakIterator instance. * Not intended for public use. Declared public for testing purposes only. * @deprecated This API is ICU internal only. * @hide draft / provisional / internal are hidden on Android */ @Deprecated public RBBIDataWrapper fRData; /** * The iteration state - current position, rule status for the current position, * and whether the iterator ran off the end, yielding UBRK_DONE. * Current position is pinned to be 0 < position <= text.length. * Current position is always set to a boundary. * * The current position of the iterator. Pinned, 0 < fPosition <= text.length. * Never has the value UBRK_DONE (-1). */ private int fPosition; /** * Index of the Rule {tag} values for the most recent match. */ private int fRuleStatusIndex; /** * True when iteration has run off the end, and iterator functions should return UBRK_DONE. */ private boolean fDone; /** * Array of look-ahead tentative results. */ private int[] fLookAheadMatches; /** * Cache of previously determined boundary positions. */ private BreakCache fBreakCache = new BreakCache(); /** * Flag used to indicate if phrase breaking is required. */ private boolean fPhraseBreaking = false; /** * Counter for the number of characters encountered with the "dictionary" * flag set. Normal RBBI iterators don't use it, although the code * for updating it is live. Dictionary Based break iterators (a subclass * of us) access this field directly. * @hide draft / provisional / internal are hidden on Android */ private int fDictionaryCharCount; private DictionaryCache fDictionaryCache = new DictionaryCache(); /** * ICU debug argument name for RBBI */ private static final String RBBI_DEBUG_ARG = "rbbi"; /** * Debugging flag. Trace operation of state machine when true. */ private static final boolean TRACE = ICUDebug.enabled(RBBI_DEBUG_ARG) && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0; /** * The "default" break engine - just skips over ranges of dictionary words, * producing no breaks. Should only be used if characters need to be handled * by a dictionary but we have no dictionary implementation for them. * * Only one instance; shared by all break iterators. */ private static final UnhandledBreakEngine gUnhandledBreakEngine; /** * List of all known break engines, common for all break iterators. * Lazily updated as break engines are needed, because instantiation of * break engines is expensive. * * Important notes: *