script-astra/Android/Sdk/sources/android-35/android/text/WordSegmentFinder.java

/*
 * Copyright (C) 2022 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package android.text;

import android.annotation.IntRange;
import android.annotation.NonNull;
import android.icu.text.BreakIterator;
import android.icu.util.ULocale;
import android.text.method.WordIterator;

/**
 * Implementation of {@link SegmentFinder} using words as the text segment. Word boundaries are
 * found using {@code WordIterator}. Whitespace characters are excluded, so they are not included in
 * any text segments.
 *
 * <p>For example, the text "Hello, World!" would be subdivided into four text segments: "Hello",
 * ",", "World", "!". The space character does not belong to any text segments.
 *
 * @see <a href="https://unicode.org/reports/tr29/#Word_Boundaries">Unicode Text Segmentation - Word
 *     Boundaries</a>
 */
public class WordSegmentFinder extends SegmentFinder {
    private final CharSequence mText;
    private final WordIterator mWordIterator;

    /**
     * Constructs a WordSegmentFinder instance for the specified text which uses the provided locale
     * to determine word boundaries.
     *
     * @param text text to be segmented
     * @param locale locale used for analyzing the text
     */
    public WordSegmentFinder(
            @NonNull CharSequence text, @NonNull ULocale locale) {
        mText = text;
        mWordIterator = new WordIterator(locale);
        mWordIterator.setCharSequence(text, 0, text.length());
    }

    /**
     * Constructs a WordSegmentFinder instance for the specified text which uses the provided
     * WordIterator to determine word boundaries.
     *
     * @param text text to be segmented
     * @param wordIterator word iterator used to find word boundaries in the text
     * @hide
     */
    public WordSegmentFinder(@NonNull CharSequence text, @NonNull WordIterator wordIterator) {
        mText = text;
        mWordIterator = wordIterator;
    }

    @Override
    public int previousStartBoundary(@IntRange(from = 0) int offset) {
        int boundary = offset;
        do {
            boundary = mWordIterator.prevBoundary(boundary);
            if (boundary == BreakIterator.DONE) {
                return DONE;
            }
        } while (Character.isWhitespace(mText.charAt(boundary)));
        return boundary;
    }

    @Override
    public int previousEndBoundary(@IntRange(from = 0) int offset) {
        int boundary = offset;
        do {
            boundary = mWordIterator.prevBoundary(boundary);
            if (boundary == BreakIterator.DONE || boundary == 0) {
                return DONE;
            }
        } while (Character.isWhitespace(mText.charAt(boundary - 1)));
        return boundary;
    }

    @Override
    public int nextStartBoundary(@IntRange(from = 0) int offset) {
        int boundary = offset;
        do {
            boundary = mWordIterator.nextBoundary(boundary);
            if (boundary == BreakIterator.DONE || boundary == mText.length()) {
                return DONE;
            }
        } while (Character.isWhitespace(mText.charAt(boundary)));
        return boundary;
    }

    @Override
    public int nextEndBoundary(@IntRange(from = 0) int offset) {
        int boundary = offset;
        do {
            boundary = mWordIterator.nextBoundary(boundary);
            if (boundary == BreakIterator.DONE) {
                return DONE;
            }
        } while (Character.isWhitespace(mText.charAt(boundary - 1)));
        return boundary;
    }
}
init 2025-01-20 15:15:20 +00:00			`/*`
			`* Copyright (C) 2022 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`package android.text;`

			`import android.annotation.IntRange;`
			`import android.annotation.NonNull;`
			`import android.icu.text.BreakIterator;`
			`import android.icu.util.ULocale;`
			`import android.text.method.WordIterator;`

			`/**`
			`* Implementation of {@link SegmentFinder} using words as the text segment. Word boundaries are`
			`* found using {@code WordIterator}. Whitespace characters are excluded, so they are not included in`
			`* any text segments.`
			`*`
			`* <p>For example, the text "Hello, World!" would be subdivided into four text segments: "Hello",`
			`* ",", "World", "!". The space character does not belong to any text segments.`
			`*`
			`* @see <a href="https://unicode.org/reports/tr29/#Word_Boundaries">Unicode Text Segmentation - Word`
			`* Boundaries</a>`
			`*/`
			`public class WordSegmentFinder extends SegmentFinder {`
			`private final CharSequence mText;`
			`private final WordIterator mWordIterator;`

			`/**`
			`* Constructs a WordSegmentFinder instance for the specified text which uses the provided locale`
			`* to determine word boundaries.`
			`*`
			`* @param text text to be segmented`
			`* @param locale locale used for analyzing the text`
			`*/`
			`public WordSegmentFinder(`
			`@NonNull CharSequence text, @NonNull ULocale locale) {`
			`mText = text;`
			`mWordIterator = new WordIterator(locale);`
			`mWordIterator.setCharSequence(text, 0, text.length());`
			`}`

			`/**`
			`* Constructs a WordSegmentFinder instance for the specified text which uses the provided`
			`* WordIterator to determine word boundaries.`
			`*`
			`* @param text text to be segmented`
			`* @param wordIterator word iterator used to find word boundaries in the text`
			`* @hide`
			`*/`
			`public WordSegmentFinder(@NonNull CharSequence text, @NonNull WordIterator wordIterator) {`
			`mText = text;`
			`mWordIterator = wordIterator;`
			`}`

			`@Override`
			`public int previousStartBoundary(@IntRange(from = 0) int offset) {`
			`int boundary = offset;`
			`do {`
			`boundary = mWordIterator.prevBoundary(boundary);`
			`if (boundary == BreakIterator.DONE) {`
			`return DONE;`
			`}`
			`} while (Character.isWhitespace(mText.charAt(boundary)));`
			`return boundary;`
			`}`

			`@Override`
			`public int previousEndBoundary(@IntRange(from = 0) int offset) {`
			`int boundary = offset;`
			`do {`
			`boundary = mWordIterator.prevBoundary(boundary);`
			`if (boundary == BreakIterator.DONE \|\| boundary == 0) {`
			`return DONE;`
			`}`
			`} while (Character.isWhitespace(mText.charAt(boundary - 1)));`
			`return boundary;`
			`}`

			`@Override`
			`public int nextStartBoundary(@IntRange(from = 0) int offset) {`
			`int boundary = offset;`
			`do {`
			`boundary = mWordIterator.nextBoundary(boundary);`
			`if (boundary == BreakIterator.DONE \|\| boundary == mText.length()) {`
			`return DONE;`
			`}`
			`} while (Character.isWhitespace(mText.charAt(boundary)));`
			`return boundary;`
			`}`

			`@Override`
			`public int nextEndBoundary(@IntRange(from = 0) int offset) {`
			`int boundary = offset;`
			`do {`
			`boundary = mWordIterator.nextBoundary(boundary);`
			`if (boundary == BreakIterator.DONE) {`
			`return DONE;`
			`}`
			`} while (Character.isWhitespace(mText.charAt(boundary - 1)));`
			`return boundary;`
			`}`
			`}`