/* * Copyright (C) 2022 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package android.text; import android.annotation.IntRange; import android.annotation.NonNull; import android.icu.text.BreakIterator; import android.icu.util.ULocale; import android.text.method.WordIterator; /** * Implementation of {@link SegmentFinder} using words as the text segment. Word boundaries are * found using {@code WordIterator}. Whitespace characters are excluded, so they are not included in * any text segments. * *
For example, the text "Hello, World!" would be subdivided into four text segments: "Hello", * ",", "World", "!". The space character does not belong to any text segments. * * @see Unicode Text Segmentation - Word * Boundaries */ public class WordSegmentFinder extends SegmentFinder { private final CharSequence mText; private final WordIterator mWordIterator; /** * Constructs a WordSegmentFinder instance for the specified text which uses the provided locale * to determine word boundaries. * * @param text text to be segmented * @param locale locale used for analyzing the text */ public WordSegmentFinder( @NonNull CharSequence text, @NonNull ULocale locale) { mText = text; mWordIterator = new WordIterator(locale); mWordIterator.setCharSequence(text, 0, text.length()); } /** * Constructs a WordSegmentFinder instance for the specified text which uses the provided * WordIterator to determine word boundaries. * * @param text text to be segmented * @param wordIterator word iterator used to find word boundaries in the text * @hide */ public WordSegmentFinder(@NonNull CharSequence text, @NonNull WordIterator wordIterator) { mText = text; mWordIterator = wordIterator; } @Override public int previousStartBoundary(@IntRange(from = 0) int offset) { int boundary = offset; do { boundary = mWordIterator.prevBoundary(boundary); if (boundary == BreakIterator.DONE) { return DONE; } } while (Character.isWhitespace(mText.charAt(boundary))); return boundary; } @Override public int previousEndBoundary(@IntRange(from = 0) int offset) { int boundary = offset; do { boundary = mWordIterator.prevBoundary(boundary); if (boundary == BreakIterator.DONE || boundary == 0) { return DONE; } } while (Character.isWhitespace(mText.charAt(boundary - 1))); return boundary; } @Override public int nextStartBoundary(@IntRange(from = 0) int offset) { int boundary = offset; do { boundary = mWordIterator.nextBoundary(boundary); if (boundary == BreakIterator.DONE || boundary == mText.length()) { return DONE; } } while (Character.isWhitespace(mText.charAt(boundary))); return boundary; } @Override public int nextEndBoundary(@IntRange(from = 0) int offset) { int boundary = offset; do { boundary = mWordIterator.nextBoundary(boundary); if (boundary == BreakIterator.DONE) { return DONE; } } while (Character.isWhitespace(mText.charAt(boundary - 1))); return boundary; } }