171 lines
6.9 KiB
Java
171 lines
6.9 KiB
Java
![]() |
/*
|
||
|
* Copyright (C) 2011 The Android Open Source Project
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||
|
* the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||
|
* License for the specific language governing permissions and limitations under
|
||
|
* the License.
|
||
|
*/
|
||
|
package android.speech.tts;
|
||
|
|
||
|
import android.annotation.IntDef;
|
||
|
import android.annotation.IntRange;
|
||
|
import android.media.AudioFormat;
|
||
|
|
||
|
import java.lang.annotation.Retention;
|
||
|
import java.lang.annotation.RetentionPolicy;
|
||
|
|
||
|
/**
|
||
|
* A callback to return speech data synthesized by a text to speech engine.
|
||
|
*
|
||
|
* The engine can provide streaming audio by calling
|
||
|
* {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
|
||
|
* {@link #done}.
|
||
|
*
|
||
|
* {@link #error} can be called at any stage in the synthesis process to
|
||
|
* indicate that an error has occurred, but if the call is made after a call
|
||
|
* to {@link #done}, it might be discarded.
|
||
|
*
|
||
|
* {@link #done} must be called at the end of synthesis, regardless of errors.
|
||
|
*
|
||
|
* All methods can be only called on the synthesis thread.
|
||
|
*/
|
||
|
public interface SynthesisCallback {
|
||
|
|
||
|
/** @hide */
|
||
|
@Retention(RetentionPolicy.SOURCE)
|
||
|
@IntDef({
|
||
|
AudioFormat.ENCODING_PCM_8BIT,
|
||
|
AudioFormat.ENCODING_PCM_16BIT,
|
||
|
AudioFormat.ENCODING_PCM_FLOAT
|
||
|
})
|
||
|
@interface SupportedAudioFormat {};
|
||
|
|
||
|
/**
|
||
|
* @return the maximum number of bytes that the TTS engine can pass in a single call of {@link
|
||
|
* #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this
|
||
|
* value will not succeed.
|
||
|
*/
|
||
|
int getMaxBufferSize();
|
||
|
|
||
|
/**
|
||
|
* The service should call this when it starts to synthesize audio for this request.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* @param sampleRateInHz Sample rate in HZ of the generated audio.
|
||
|
* @param audioFormat Audio format of the generated audio. Must be one of {@link
|
||
|
* AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be
|
||
|
* {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above.
|
||
|
* @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
|
||
|
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
|
||
|
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
|
||
|
*/
|
||
|
int start(
|
||
|
int sampleRateInHz,
|
||
|
@SupportedAudioFormat int audioFormat,
|
||
|
@IntRange(from = 1, to = 2) int channelCount);
|
||
|
|
||
|
/**
|
||
|
* The service should call this method when synthesized audio is ready for consumption.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the
|
||
|
* caller is free to modify it after this method returns.
|
||
|
* @param offset The offset into {@code buffer} where the audio data starts.
|
||
|
* @param length The number of bytes of audio data in {@code buffer}. This must be less than or
|
||
|
* equal to the return value of {@link #getMaxBufferSize}.
|
||
|
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
|
||
|
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
|
||
|
*/
|
||
|
int audioAvailable(byte[] buffer, int offset, int length);
|
||
|
|
||
|
/**
|
||
|
* The service should call this method when all the synthesized audio for a request has been
|
||
|
* passed to {@link #audioAvailable}.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* <p>This method has to be called if {@link #start} and/or {@link #error} was called.
|
||
|
*
|
||
|
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
|
||
|
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
|
||
|
*/
|
||
|
int done();
|
||
|
|
||
|
/**
|
||
|
* The service should call this method if the speech synthesis fails.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*/
|
||
|
void error();
|
||
|
|
||
|
/**
|
||
|
* The service should call this method if the speech synthesis fails.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link
|
||
|
* android.speech.tts.TextToSpeech}
|
||
|
*/
|
||
|
void error(@TextToSpeech.Error int errorCode);
|
||
|
|
||
|
/**
|
||
|
* Check if {@link #start} was called or not.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* <p>Useful for checking if a fallback from network request is possible.
|
||
|
*/
|
||
|
boolean hasStarted();
|
||
|
|
||
|
/**
|
||
|
* Check if {@link #done} was called or not.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* <p>Useful for checking if a fallback from network request is possible.
|
||
|
*/
|
||
|
boolean hasFinished();
|
||
|
|
||
|
/**
|
||
|
* The service may call this method to provide timing information about the spoken text.
|
||
|
*
|
||
|
* <p>Calling this method means that at the given audio frame, the given range of the input is
|
||
|
* about to be spoken. If this method is called the client will receive a callback on the
|
||
|
* listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been
|
||
|
* reached by the playback head.
|
||
|
*
|
||
|
* <p>This information can be used by the client, for example, to highlight ranges of the text
|
||
|
* while it is spoken.
|
||
|
*
|
||
|
* <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
|
||
|
* the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
|
||
|
* definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
|
||
|
* for more information.
|
||
|
*
|
||
|
* <p>This method should only be called on the synthesis thread, while in {@link
|
||
|
* TextToSpeechService#onSynthesizeText}.
|
||
|
*
|
||
|
* @param markerInFrames The position in frames in the audio where this range is spoken.
|
||
|
* @param start The start index of the range in the input text.
|
||
|
* @param end The end index (exclusive) of the range in the input text.
|
||
|
*/
|
||
|
default void rangeStart(int markerInFrames, int start, int end) {}
|
||
|
}
|