/* GENERATED SOURCE. DO NOT MODIFY. */ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 1996-2016, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package android.icu.text; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.Locale; import java.util.MissingResourceException; import android.icu.impl.CSCharacterIterator; import android.icu.impl.CacheValue; import android.icu.impl.ICUDebug; import android.icu.util.ICUCloneNotSupportedException; import android.icu.util.ULocale; /** * [icu enhancement] ICU's replacement for {@link java.text.BreakIterator}. Methods, fields, and other functionality specific to ICU are labeled '[icu]'. * *
A class that locates boundaries in text. This class defines a protocol for * objects that break up a piece of natural-language text according to a set * of criteria. Instances or subclasses of BreakIterator can be provided, for * example, to break a piece of text into words, sentences, or logical characters * according to the conventions of some language or group of languages. * * We provide five built-in types of BreakIterator: *
* BreakIterator's interface follows an "iterator" model (hence the name), meaning it * has a concept of a "current position" and methods like first(), last(), next(), * and previous() that update the current position. All BreakIterators uphold the * following invariants: *
* Examples:
* Creating and using text boundaries *
** * Print each element in order ** public static void main(String args[]) { * if (args.length == 1) { * String stringToExamine = args[0]; * //print each word in order * BreakIterator boundary = BreakIterator.getWordInstance(); * boundary.setText(stringToExamine); * printEachForward(boundary, stringToExamine); * //print each sentence in reverse order * boundary = BreakIterator.getSentenceInstance(Locale.US); * boundary.setText(stringToExamine); * printEachBackward(boundary, stringToExamine); * printFirst(boundary, stringToExamine); * printLast(boundary, stringToExamine); * } * } **
** * Print each element in reverse order ** public static void printEachForward(BreakIterator boundary, String source) { * int start = boundary.first(); * for (int end = boundary.next(); * end != BreakIterator.DONE; * start = end, end = boundary.next()) { * System.out.println(source.substring(start,end)); * } * } **
** * Print first element ** public static void printEachBackward(BreakIterator boundary, String source) { * int end = boundary.last(); * for (int start = boundary.previous(); * start != BreakIterator.DONE; * end = start, start = boundary.previous()) { * System.out.println(source.substring(start,end)); * } * } **
** * Print last element ** public static void printFirst(BreakIterator boundary, String source) { * int start = boundary.first(); * int end = boundary.next(); * System.out.println(source.substring(start,end)); * } **
** * Print the element at a specified position ** public static void printLast(BreakIterator boundary, String source) { * int end = boundary.last(); * int start = boundary.previous(); * System.out.println(source.substring(start,end)); * } **
** * Find the next word ** public static void printAt(BreakIterator boundary, int pos, String source) { * int end = boundary.following(pos); * int start = boundary.previous(); * System.out.println(source.substring(start,end)); * } **
** * @see CharacterIterator * */ public abstract class BreakIterator implements Cloneable { private static final boolean DEBUG = ICUDebug.enabled("breakiterator"); /** * Default constructor. There is no state that is carried by this abstract * base class. */ protected BreakIterator() { } /** * Clone method. Creates another BreakIterator with the same behavior and * current state as this one. * @return The clone. */ @Override public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e) { ///CLOVER:OFF throw new ICUCloneNotSupportedException(e); ///CLOVER:ON } } /** * DONE is returned by previous() and next() after all valid * boundaries have been returned. */ public static final int DONE = -1; /** * Set the iterator to the first boundary position. This is always the beginning * index of the text this iterator iterates over. For example, if * the iterator iterates over a whole string, this function will * always return 0. * @return The character offset of the beginning of the stretch of text * being broken. */ public abstract int first(); /** * Set the iterator to the last boundary position. This is always the "past-the-end" * index of the text this iterator iterates over. For example, if the * iterator iterates over a whole string (call it "text"), this function * will always return text.length(). * @return The character offset of the end of the stretch of text * being broken. */ public abstract int last(); /** * Move the iterator by the specified number of steps in the text. * A positive number moves the iterator forward; a negative number * moves the iterator backwards. If this causes the iterator * to move off either end of the text, this function returns DONE; * otherwise, this function returns the position of the appropriate * boundary. Calling this function is equivalent to calling next() or * previous() n times. * @param n The number of boundaries to advance over (if positive, moves * forward; if negative, moves backwards). * @return The position of the boundary n boundaries from the current * iteration position, or DONE if moving n boundaries causes the iterator * to advance off either end of the text. */ public abstract int next(int n); /** * Advances the iterator forward one boundary. The current iteration * position is updated to point to the next boundary position after the * current position, and this is also the value that is returned. If * the current position is equal to the value returned by last(), or to * DONE, this function returns DONE and sets the current position to * DONE. * @return The position of the first boundary position following the * iteration position. */ public abstract int next(); /** * Move the iterator backward one boundary. The current iteration * position is updated to point to the last boundary position before * the current position, and this is also the value that is returned. If * the current position is equal to the value returned by first(), or to * DONE, this function returns DONE and sets the current position to * DONE. * @return The position of the last boundary position preceding the * iteration position. */ public abstract int previous(); /** * Sets the iterator's current iteration position to be the first * boundary position following the specified position. (Whether the * specified position is itself a boundary position or not doesn't * matter-- this function always moves the iteration position to the * first boundary after the specified position.) If the specified * position is the past-the-end position, returns DONE. * @param offset The character position to start searching from. * @return The position of the first boundary position following * "offset" (whether or not "offset" itself is a boundary position), * or DONE if "offset" is the past-the-end offset. */ public abstract int following(int offset); /** * Sets the iterator's current iteration position to be the last * boundary position preceding the specified position. (Whether the * specified position is itself a boundary position or not doesn't * matter-- this function always moves the iteration position to the * last boundary before the specified position.) If the specified * position is the starting position, returns DONE. * @param offset The character position to start searching from. * @return The position of the last boundary position preceding * "offset" (whether of not "offset" itself is a boundary position), * or DONE if "offset" is the starting offset of the iterator. */ public int preceding(int offset) { // NOTE: This implementation is here solely because we can't add new // abstract methods to an existing class. There is almost ALWAYS a // better, faster way to do this. int pos = following(offset); while (pos >= offset && pos != DONE) pos = previous(); return pos; } /** * Return true if the specified position is a boundary position. If the * function returns true, the current iteration position is set to the * specified position; if the function returns false, the current * iteration position is set as though following() had been called. * @param offset the offset to check. * @return True if "offset" is a boundary position. */ public boolean isBoundary(int offset) { // Again, this is the default implementation, which is provided solely because // we couldn't add a new abstract method to an existing class. The real // implementations will usually need to do a little more work. if (offset == 0) { return true; } else return following(offset - 1) == offset; } /** * Return the iterator's current position. * @return The iterator's current position. */ public abstract int current(); /** * Tag value for "words" that do not fit into any of other categories. * Includes spaces and most punctuation. */ public static final int WORD_NONE = 0; /** * Upper bound for tags for uncategorized words. */ public static final int WORD_NONE_LIMIT = 100; /** * Tag value for words that appear to be numbers, lower limit. */ public static final int WORD_NUMBER = 100; /** * Tag value for words that appear to be numbers, upper limit. */ public static final int WORD_NUMBER_LIMIT = 200; /** * Tag value for words that contain letters, excluding * hiragana, katakana or ideographic characters, lower limit. */ public static final int WORD_LETTER = 200; /** * Tag value for words containing letters, upper limit */ public static final int WORD_LETTER_LIMIT = 300; /** * Tag value for words containing kana characters, lower limit */ public static final int WORD_KANA = 300; /** * Tag value for words containing kana characters, upper limit */ public static final int WORD_KANA_LIMIT = 400; /** * Tag value for words containing ideographic characters, lower limit */ public static final int WORD_IDEO = 400; /** * Tag value for words containing ideographic characters, upper limit */ public static final int WORD_IDEO_LIMIT = 500; /** * For RuleBasedBreakIterators, return the status tag from the * break rule that determined the boundary at the current iteration position. ** public static int nextWordStartAfter(int pos, String text) { * BreakIterator wb = BreakIterator.getWordInstance(); * wb.setText(text); * int wordStart = wb.following(pos); * for (;;) { * int wordLimit = wb.next(); * if (wordLimit == BreakIterator.DONE) { * return BreakIterator.DONE; * } * int wordStatus = wb.getRuleStatus(); * if (wordStatus != BreakIterator.WORD_NONE) { * return wordStart; * } * wordStart = wordLimit; * } * } ** The iterator returned by {@link #getWordInstance} is unique in that * the break positions it returns don't represent both the start and end of the * thing being iterated over. That is, a sentence-break iterator returns breaks * that each represent the end of one sentence and the beginning of the next. * With the word-break iterator, the characters between two boundaries might be a * word, or they might be the punctuation or whitespace between two words. The * above code uses {@link #getRuleStatus} to identify and ignore boundaries associated * with punctuation or other non-word characters. *
* For break iterator types that do not support a rule status, * a default value of 0 is returned. *
* @return The status from the break rule that determined the boundary * at the current iteration position. */ public int getRuleStatus() { return 0; } /** * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) * that determined the the boundary at the current iteration position. *
* For break iterator types that do not support rule status, * no values are returned. *
* If the size of the output array is insufficient to hold the data, * the output will be truncated to the available length. No exception * will be thrown. * * @param fillInArray an array to be filled in with the status values. * @return The number of rule status values from rules that determined * the the boundary at the current iteration position. * In the event that the array is too small, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. */ public int getRuleStatusVec(int[] fillInArray) { if (fillInArray != null && fillInArray.length > 0) { fillInArray[0] = 0; } return 1; } /** * Returns a CharacterIterator over the text being analyzed. *
* Caution:The state of the returned CharacterIterator * must not be modified in any way while the BreakIterator is still in use. * Doing so will lead to undefined behavior of the BreakIterator. * Clone the returned CharacterIterator first and work with that. *
* The returned CharacterIterator is a reference * to the actual iterator being used by the BreakIterator. * No guarantees are made about the current position * of this iterator when it is returned; it may differ from the * BreakIterators current position. If you need to move that * position to examine the text, clone this function's return value first. * * @return A CharacterIterator over the text being analyzed. */ public abstract CharacterIterator getText(); /** * Sets the iterator to analyze a new piece of text. The new * piece of text is passed in as a String, and the current * iteration position is reset to the beginning of the string. * (The old text is dropped.) * @param newText A String containing the text to analyze with * this BreakIterator. */ public void setText(String newText) { setText(new StringCharacterIterator(newText)); } /** * Sets the iterator to analyze a new piece of text. The new * piece of text is passed in as a CharSequence, and the current * iteration position is reset to the beginning of the text. * (The old text is dropped.) *
* The text underlying the CharSequence must not be be modified while * the BreakIterator holds a references to it. (As could possibly occur * with a StringBuilder, for example). * @param newText A CharSequence containing the text to analyze with * this BreakIterator. */ public void setText(CharSequence newText) { setText(new CSCharacterIterator(newText)); } /** * Sets the iterator to analyze a new piece of text. This function resets * the current iteration position to the beginning of the text. * (The old iterator is dropped.) *
* Caution: The supplied CharacterIterator is used
* directly by the BreakIterator, and must not be altered in any
* way by code outside of the BreakIterator.
* Doing so will lead to undefined behavior of the BreakIterator.
*
* @param newText A CharacterIterator referring to the text
* to analyze with this BreakIterator (the iterator's current
* position is ignored, but its other state is significant).
*/
public abstract void setText(CharacterIterator newText);
/**
* [icu]
*/
public static final int KIND_CHARACTER = 0;
/**
* [icu]
*/
public static final int KIND_WORD = 1;
/**
* [icu]
*/
public static final int KIND_LINE = 2;
/**
* [icu]
*/
public static final int KIND_SENTENCE = 3;
/**
* [icu]
* @see #getTitleInstance
* @see #getWordInstance
* @deprecated ICU 64 Use {@link #getWordInstance} instead.
*/
@Deprecated
public static final int KIND_TITLE = 4;
/**
*/
private static final int KIND_COUNT = 5;
private static final CacheValue>[] iterCache = new CacheValue>[5];
/**
* Returns a new instance of BreakIterator that locates word boundaries.
* This function assumes that the text being analyzed is in the default
* locale's language.
* @return An instance of BreakIterator that locates word boundaries.
*/
public static BreakIterator getWordInstance()
{
return getWordInstance(ULocale.getDefault());
}
/**
* Returns a new instance of BreakIterator that locates word boundaries.
* @param where A locale specifying the language of the text to be
* analyzed.
* @return An instance of BreakIterator that locates word boundaries.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getWordInstance(Locale where)
{
return getBreakInstance(ULocale.forLocale(where), KIND_WORD);
}
/**
* [icu] Returns a new instance of BreakIterator that locates word boundaries.
* @param where A locale specifying the language of the text to be
* analyzed.
* @return An instance of BreakIterator that locates word boundaries.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getWordInstance(ULocale where)
{
return getBreakInstance(where, KIND_WORD);
}
/**
* Returns a new instance of BreakIterator that locates legal line-
* wrapping positions. This function assumes the text being broken
* is in the default locale's language.
* @return A new instance of BreakIterator that locates legal
* line-wrapping positions.
*/
public static BreakIterator getLineInstance()
{
return getLineInstance(ULocale.getDefault());
}
/**
* Returns a new instance of BreakIterator that locates legal line-
* wrapping positions.
* @param where A Locale specifying the language of the text being broken.
* @return A new instance of BreakIterator that locates legal
* line-wrapping positions.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getLineInstance(Locale where)
{
return getBreakInstance(ULocale.forLocale(where), KIND_LINE);
}
/**
* [icu] Returns a new instance of BreakIterator that locates legal line-
* wrapping positions.
* @param where A Locale specifying the language of the text being broken.
* @return A new instance of BreakIterator that locates legal
* line-wrapping positions.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getLineInstance(ULocale where)
{
return getBreakInstance(where, KIND_LINE);
}
/**
* Returns a new instance of BreakIterator that locates logical-character
* boundaries. This function assumes that the text being analyzed is
* in the default locale's language.
* @return A new instance of BreakIterator that locates logical-character
* boundaries.
*/
public static BreakIterator getCharacterInstance()
{
return getCharacterInstance(ULocale.getDefault());
}
/**
* Returns a new instance of BreakIterator that locates logical-character
* boundaries.
* @param where A Locale specifying the language of the text being analyzed.
* @return A new instance of BreakIterator that locates logical-character
* boundaries.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getCharacterInstance(Locale where)
{
return getBreakInstance(ULocale.forLocale(where), KIND_CHARACTER);
}
/**
* [icu] Returns a new instance of BreakIterator that locates logical-character
* boundaries.
* @param where A Locale specifying the language of the text being analyzed.
* @return A new instance of BreakIterator that locates logical-character
* boundaries.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getCharacterInstance(ULocale where)
{
return getBreakInstance(where, KIND_CHARACTER);
}
/**
* Returns a new instance of BreakIterator that locates sentence boundaries.
* This function assumes the text being analyzed is in the default locale's
* language.
* @return A new instance of BreakIterator that locates sentence boundaries.
*/
public static BreakIterator getSentenceInstance()
{
return getSentenceInstance(ULocale.getDefault());
}
/**
* Returns a new instance of BreakIterator that locates sentence boundaries.
* @param where A Locale specifying the language of the text being analyzed.
* @return A new instance of BreakIterator that locates sentence boundaries.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getSentenceInstance(Locale where)
{
return getBreakInstance(ULocale.forLocale(where), KIND_SENTENCE);
}
/**
* [icu] Returns a new instance of BreakIterator that locates sentence boundaries.
* @param where A Locale specifying the language of the text being analyzed.
* @return A new instance of BreakIterator that locates sentence boundaries.
* @throws NullPointerException if where
is null.
*/
public static BreakIterator getSentenceInstance(ULocale where)
{
return getBreakInstance(where, KIND_SENTENCE);
}
/**
* [icu] Returns a new instance of BreakIterator that locates title boundaries.
* This function assumes the text being analyzed is in the default locale's
* language. The iterator returned locates title boundaries as described for
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
* please use a word boundary iterator. {@link #getWordInstance}
* @return A new instance of BreakIterator that locates title boundaries.
* @deprecated ICU 64 Use {@link #getWordInstance} instead.
*/
@Deprecated
public static BreakIterator getTitleInstance()
{
return getTitleInstance(ULocale.getDefault());
}
/**
* [icu] Returns a new instance of BreakIterator that locates title boundaries.
* The iterator returned locates title boundaries as described for
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
* please use Word Boundary iterator.{@link #getWordInstance}
* @param where A Locale specifying the language of the text being analyzed.
* @return A new instance of BreakIterator that locates title boundaries.
* @throws NullPointerException if where
is null.
* @deprecated ICU 64 Use {@link #getWordInstance} instead.
*/
@Deprecated
public static BreakIterator getTitleInstance(Locale where)
{
return getBreakInstance(ULocale.forLocale(where), KIND_TITLE);
}
/**
* [icu] Returns a new instance of BreakIterator that locates title boundaries.
* The iterator returned locates title boundaries as described for
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
* please use Word Boundary iterator.{@link #getWordInstance}
* @param where A Locale specifying the language of the text being analyzed.
* @return A new instance of BreakIterator that locates title boundaries.
* @throws NullPointerException if where
is null.
* @deprecated ICU 64 Use {@link #getWordInstance} instead.
*/
@Deprecated
public static BreakIterator getTitleInstance(ULocale where)
{
return getBreakInstance(where, KIND_TITLE);
}
/**
* [icu] Registers a new break iterator of the indicated kind, to use in the given
* locale. Clones of the iterator will be returned if a request for a break iterator
* of the given kind matches or falls back to this locale.
*
*
Because ICU may choose to cache BreakIterator objects internally, this must * be called at application startup, prior to any calls to * BreakIterator.getInstance to avoid undefined behavior. * * @param iter the BreakIterator instance to adopt. * @param locale the Locale for which this instance is to be registered * @param kind the type of iterator for which this instance is to be registered * @return a registry key that can be used to unregister this instance * @hide unsupported on Android */ public static Object registerInstance(BreakIterator iter, Locale locale, int kind) { return registerInstance(iter, ULocale.forLocale(locale), kind); } /** * [icu] Registers a new break iterator of the indicated kind, to use in the given * locale. Clones of the iterator will be returned if a request for a break iterator * of the given kind matches or falls back to this locale. * *
Because ICU may choose to cache BreakIterator objects internally, this must
* be called at application startup, prior to any calls to
* BreakIterator.getInstance to avoid undefined behavior.
*
* @param iter the BreakIterator instance to adopt.
* @param locale the Locale for which this instance is to be registered
* @param kind the type of iterator for which this instance is to be registered
* @return a registry key that can be used to unregister this instance
* @hide unsupported on Android
*/
public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) {
// If the registered object matches the one in the cache, then
// flush the cached object.
if (iterCache[kind] != null) {
BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind].get();
if (cache != null) {
if (cache.getLocale().equals(locale)) {
iterCache[kind] = null;
}
}
}
return getShim().registerInstance(iter, locale, kind);
}
/**
* [icu] Unregisters a previously-registered BreakIterator using the key returned
* from the register call. Key becomes invalid after this call and should not be used
* again.
* @param key the registry key returned by a previous call to registerInstance
* @return true if the iterator for the key was successfully unregistered
* @hide unsupported on Android
*/
public static boolean unregister(Object key) {
if (key == null) {
throw new IllegalArgumentException("registry key must not be null");
}
// TODO: we don't do code coverage for the following lines
// because in getBreakInstance we always instantiate the shim,
// and test execution is such that we always instantiate a
// breakiterator before we get to the break iterator tests.
// this is for modularization, and we could remove the
// dependencies in getBreakInstance by rewriting part of the
// LocaleData code, or perhaps by accepting it into the
// module.
///CLOVER:OFF
if (shim != null) {
// Unfortunately, we don't know what is being unregistered
// -- what `kind' and what locale -- so we flush all
// caches. This is safe but inefficient if people are
// actively registering and unregistering.
for (int kind=0; kind Note: The actual locale is returned correctly, but the valid
* locale is not, in most cases.
* @param type type of information requested, either {@link
* android.icu.util.ULocale#VALID_LOCALE} or {@link
* android.icu.util.ULocale#ACTUAL_LOCALE}.
* @return the information specified by type, or null if
* this object was not constructed from locale data.
* @see android.icu.util.ULocale
* @see android.icu.util.ULocale#VALID_LOCALE
* @see android.icu.util.ULocale#ACTUAL_LOCALE
* @hide draft / provisional / internal are hidden on Android
*/
public final ULocale getLocale(ULocale.Type type) {
return type == ULocale.ACTUAL_LOCALE ?
this.actualLocale : this.validLocale;
}
/**
* Set information about the locales that were used to create this
* object. If the object was not constructed from locale data,
* both arguments should be set to null. Otherwise, neither
* should be null. The actual locale must be at the same level or
* less specific than the valid locale. This method is intended
* for use by factories or other entities that create objects of
* this class.
* @param valid the most specific locale containing any resource
* data, or null
* @param actual the locale containing data used to construct this
* object, or null
* @see android.icu.util.ULocale
* @see android.icu.util.ULocale#VALID_LOCALE
* @see android.icu.util.ULocale#ACTUAL_LOCALE
*/
final void setLocale(ULocale valid, ULocale actual) {
// Change the following to an assertion later
if ((valid == null) != (actual == null)) {
///CLOVER:OFF
throw new IllegalArgumentException();
///CLOVER:ON
}
// Another check we could do is that the actual locale is at
// the same level or less specific than the valid locale.
this.validLocale = valid;
this.actualLocale = actual;
}
/**
* The most specific locale containing any resource data, or null.
* @see android.icu.util.ULocale
*/
private ULocale validLocale;
/**
* The locale containing data used to construct this object, or
* null.
* @see android.icu.util.ULocale
*/
private ULocale actualLocale;
// -------- END ULocale boilerplate --------
}