* Copyright (C) 2011 The Android Open Source Project
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
package android.speech.tts;
import android.annotation.NonNull;
import android.app.Service;
import android.content.Intent;
import android.media.AudioAttributes;
import android.media.AudioManager;
import android.net.Uri;
import android.os.Binder;
import android.os.Bundle;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.IBinder;
import android.os.Looper;
import android.os.Message;
import android.os.MessageQueue;
import android.os.ParcelFileDescriptor;
import android.os.RemoteCallbackList;
import android.os.RemoteException;
import android.provider.Settings;
import android.speech.tts.TextToSpeech.Engine;
import android.text.TextUtils;
import android.util.Log;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.Set;
* Abstract base class for TTS engine implementations. The following methods
* need to be implemented:
{@link #onIsLanguageAvailable}
{@link #onLoadLanguage}
{@link #onGetLanguage}
{@link #onSynthesizeText}
{@link #onStop}
* The first three deal primarily with language management, and are used to
* query the engine for it's support for a given language and indicate to it
* that requests in a given language are imminent.
* {@link #onSynthesizeText} is central to the engine implementation. The
* implementation should synthesize text as per the request parameters and
* return synthesized data via the supplied callback. This class and its helpers
* will then consume that data, which might mean queuing it for playback or writing
* it to a file or similar. All calls to this method will be on a single thread,
* which will be different from the main thread of the service. Synthesis must be
* synchronous which means the engine must NOT hold on to the callback or call any
* methods on it after the method returns.
* {@link #onStop} tells the engine that it should stop
* all ongoing synthesis, if any. Any pending data from the current synthesis
* will be discarded.
* {@link #onGetLanguage} is not required as of JELLYBEAN_MR2 (API 18) and later, it is only
* called on earlier versions of Android.
* API Level 20 adds support for Voice objects. Voices are an abstraction that allow the TTS
* service to expose multiple backends for a single locale. Each one of them can have a different
* features set. In order to fully take advantage of voices, an engine should implement
* the following methods:
* The first three methods are siblings of the {@link #onGetLanguage},
* {@link #onIsLanguageAvailable} and {@link #onLoadLanguage} methods. The last one,
* {@link #onGetDefaultVoiceNameFor(String, String, String)} is a link between locale and voice
* based methods. Since API level 21 {@link TextToSpeech#setLanguage} is implemented by
* calling {@link TextToSpeech#setVoice} with the voice returned by
* {@link #onGetDefaultVoiceNameFor(String, String, String)}.
* If the client uses a voice instead of a locale, {@link SynthesisRequest} will contain the
* requested voice name.
* The default implementations of Voice-related methods implement them using the
* pre-existing locale-based implementation.
public abstract class TextToSpeechService extends Service {
private static final boolean DBG = false;
private static final String TAG = "TextToSpeechService";
private static final String SYNTH_THREAD_NAME = "SynthThread";
private SynthHandler mSynthHandler;
// A thread and it's associated handler for playing back any audio
// associated with this TTS engine. Will handle all requests except synthesis
// to file requests, which occur on the synthesis thread.
@NonNull private AudioPlaybackHandler mAudioPlaybackHandler;
private TtsEngines mEngineHelper;
private CallbackMap mCallbacks;
private String mPackageName;
private final Object mVoicesInfoLock = new Object();
public void onCreate() {
if (DBG) Log.d(TAG, "onCreate()");
SynthThread synthThread = new SynthThread();
mSynthHandler = new SynthHandler(synthThread.getLooper());
mAudioPlaybackHandler = new AudioPlaybackHandler();
mEngineHelper = new TtsEngines(this);
mCallbacks = new CallbackMap();
mPackageName = getApplicationInfo().packageName;
String[] defaultLocale = getSettingsLocale();
// Load default language
onLoadLanguage(defaultLocale[0], defaultLocale[1], defaultLocale[2]);
public void onDestroy() {
if (DBG) Log.d(TAG, "onDestroy()");
// Tell the synthesizer to stop
// Tell the audio playback thread to stop.
// Unregister all callbacks.
* Checks whether the engine supports a given language.
* Can be called on multiple threads.
* Its return values HAVE to be consistent with onLoadLanguage.
* @param lang ISO-3 language code.
* @param country ISO-3 country code. May be empty or null.
* @param variant Language variant. May be empty or null.
* @return Code indicating the support status for the locale.
* One of {@link TextToSpeech#LANG_AVAILABLE},
* {@link TextToSpeech#LANG_COUNTRY_AVAILABLE},
* {@link TextToSpeech#LANG_MISSING_DATA}
* {@link TextToSpeech#LANG_NOT_SUPPORTED}.
protected abstract int onIsLanguageAvailable(String lang, String country, String variant);
* Returns the language, country and variant currently being used by the TTS engine.
* This method will be called only on Android 4.2 and before (API <= 17). In later versions
* this method is not called by the Android TTS framework.
* Can be called on multiple threads.
* @return A 3-element array, containing language (ISO 3-letter code),
* country (ISO 3-letter code) and variant used by the engine.
* The country and variant may be {@code ""}. If country is empty, then variant must
* be empty too.
* @see Locale#getISO3Language()
* @see Locale#getISO3Country()
* @see Locale#getVariant()
protected abstract String[] onGetLanguage();
* Notifies the engine that it should load a speech synthesis language. There is no guarantee
* that this method is always called before the language is used for synthesis. It is merely
* a hint to the engine that it will probably get some synthesis requests for this language
* at some point in the future.
* Can be called on multiple threads.
* In <= Android 4.2 (<= API 17) can be called on main and service binder threads.
* In > Android 4.2 (> API 17) can be called on main and synthesis threads.
* @param lang ISO-3 language code.
* @param country ISO-3 country code. May be empty or null.
* @param variant Language variant. May be empty or null.
* @return Code indicating the support status for the locale.
* One of {@link TextToSpeech#LANG_AVAILABLE},
* {@link TextToSpeech#LANG_COUNTRY_AVAILABLE},
* {@link TextToSpeech#LANG_MISSING_DATA}
* {@link TextToSpeech#LANG_NOT_SUPPORTED}.
protected abstract int onLoadLanguage(String lang, String country, String variant);
* Notifies the service that it should stop any in-progress speech synthesis.
* This method can be called even if no speech synthesis is currently in progress.
* Can be called on multiple threads, but not on the synthesis thread.
protected abstract void onStop();
* Tells the service to synthesize speech from the given text. This method should block until
* the synthesis is finished. Called on the synthesis thread.
* @param request The synthesis request.
* @param callback The callback that the engine must use to make data available for playback or
* for writing to a file.
protected abstract void onSynthesizeText(SynthesisRequest request, SynthesisCallback callback);
* Queries the service for a set of features supported for a given language.
* Can be called on multiple threads.
* @param lang ISO-3 language code.
* @param country ISO-3 country code. May be empty or null.
* @param variant Language variant. May be empty or null.
* @return A list of features supported for the given language.
protected Set onGetFeaturesForLanguage(String lang, String country, String variant) {
return new HashSet();
private int getExpectedLanguageAvailableStatus(Locale locale) {
int expectedStatus = TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE;
if (locale.getVariant().isEmpty()) {
if (locale.getCountry().isEmpty()) {
expectedStatus = TextToSpeech.LANG_AVAILABLE;
} else {
expectedStatus = TextToSpeech.LANG_COUNTRY_AVAILABLE;
return expectedStatus;
* Queries the service for a set of supported voices.
* Can be called on multiple threads.
* The default implementation tries to enumerate all available locales, pass them to
* {@link #onIsLanguageAvailable(String, String, String)} and create Voice instances (using
* the locale's BCP-47 language tag as the voice name) for the ones that are supported.
* Note, that this implementation is suitable only for engines that don't have multiple voices
* for a single locale. Also, this implementation won't work with Locales not listed in the
* set returned by the {@link Locale#getAvailableLocales()} method.
* @return A list of voices supported.
public List onGetVoices() {
// Enumerate all locales and check if they are available
ArrayList voices = new ArrayList();
for (Locale locale : Locale.getAvailableLocales()) {
int expectedStatus = getExpectedLanguageAvailableStatus(locale);
try {
int localeStatus = onIsLanguageAvailable(locale.getISO3Language(),
locale.getISO3Country(), locale.getVariant());
if (localeStatus != expectedStatus) {
} catch (MissingResourceException e) {
// Ignore locale without iso 3 codes
Set features = onGetFeaturesForLanguage(locale.getISO3Language(),
locale.getISO3Country(), locale.getVariant());
String voiceName = onGetDefaultVoiceNameFor(locale.getISO3Language(),
locale.getISO3Country(), locale.getVariant());
voices.add(new Voice(voiceName, locale, Voice.QUALITY_NORMAL,
Voice.LATENCY_NORMAL, false, features));
return voices;
* Return a name of the default voice for a given locale.
* This method provides a mapping between locales and available voices. This method is
* used in {@link TextToSpeech#setLanguage}, which calls this method and then calls
* {@link TextToSpeech#setVoice} with the voice returned by this method.
* Also, it's used by {@link TextToSpeech#getDefaultVoice()} to find a default voice for
* the default locale.
* @param lang ISO-3 language code.
* @param country ISO-3 country code. May be empty or null.
* @param variant Language variant. May be empty or null.
* @return A name of the default voice for a given locale.
public String onGetDefaultVoiceNameFor(String lang, String country, String variant) {
int localeStatus = onIsLanguageAvailable(lang, country, variant);
Locale iso3Locale = null;
switch (localeStatus) {
case TextToSpeech.LANG_AVAILABLE:
iso3Locale = new Locale(lang);
iso3Locale = new Locale(lang, country);
iso3Locale = new Locale(lang, country, variant);
return null;
Locale properLocale = TtsEngines.normalizeTTSLocale(iso3Locale);
String voiceName = properLocale.toLanguageTag();
if (onIsValidVoiceName(voiceName) == TextToSpeech.SUCCESS) {
return voiceName;
} else {
return null;
* Notifies the engine that it should load a speech synthesis voice. There is no guarantee
* that this method is always called before the voice is used for synthesis. It is merely
* a hint to the engine that it will probably get some synthesis requests for this voice
* at some point in the future.
* Will be called only on synthesis thread.
* The default implementation creates a Locale from the voice name (by interpreting the name as
* a BCP-47 tag for the locale), and passes it to
* {@link #onLoadLanguage(String, String, String)}.
* @param voiceName Name of the voice.
* @return {@link TextToSpeech#ERROR} or {@link TextToSpeech#SUCCESS}.
public int onLoadVoice(String voiceName) {
Locale locale = Locale.forLanguageTag(voiceName);
if (locale == null) {
return TextToSpeech.ERROR;
int expectedStatus = getExpectedLanguageAvailableStatus(locale);
try {
int localeStatus = onIsLanguageAvailable(locale.getISO3Language(),
locale.getISO3Country(), locale.getVariant());
if (localeStatus != expectedStatus) {
return TextToSpeech.ERROR;
locale.getISO3Country(), locale.getVariant());
return TextToSpeech.SUCCESS;
} catch (MissingResourceException e) {
return TextToSpeech.ERROR;
* Checks whether the engine supports a voice with a given name.
* Can be called on multiple threads.
* The default implementation treats the voice name as a language tag, creating a Locale from
* the voice name, and passes it to {@link #onIsLanguageAvailable(String, String, String)}.
* @param voiceName Name of the voice.
* @return {@link TextToSpeech#ERROR} or {@link TextToSpeech#SUCCESS}.
public int onIsValidVoiceName(String voiceName) {
Locale locale = Locale.forLanguageTag(voiceName);
if (locale == null) {
return TextToSpeech.ERROR;
int expectedStatus = getExpectedLanguageAvailableStatus(locale);
try {
int localeStatus = onIsLanguageAvailable(locale.getISO3Language(),
locale.getISO3Country(), locale.getVariant());
if (localeStatus != expectedStatus) {
return TextToSpeech.ERROR;
return TextToSpeech.SUCCESS;
} catch (MissingResourceException e) {
return TextToSpeech.ERROR;
private int getDefaultSpeechRate() {
return getSecureSettingInt(Settings.Secure.TTS_DEFAULT_RATE, Engine.DEFAULT_RATE);
private int getDefaultPitch() {
return getSecureSettingInt(Settings.Secure.TTS_DEFAULT_PITCH, Engine.DEFAULT_PITCH);
private String[] getSettingsLocale() {
final Locale locale = mEngineHelper.getLocalePrefForEngine(mPackageName);
return TtsEngines.toOldLocaleStringFormat(locale);
private int getSecureSettingInt(String name, int defaultValue) {
return Settings.Secure.getInt(getContentResolver(), name, defaultValue);
* Synthesizer thread. This thread is used to run {@link SynthHandler}.
private class SynthThread extends HandlerThread implements MessageQueue.IdleHandler {
private boolean mFirstIdle = true;
public SynthThread() {
protected void onLooperPrepared() {
public boolean queueIdle() {
if (mFirstIdle) {
mFirstIdle = false;
} else {
return true;
private void broadcastTtsQueueProcessingCompleted() {
if (DBG) Log.d(TAG, "Broadcasting: " + i);
private class SynthHandler extends Handler {
private SpeechItem mCurrentSpeechItem = null;
// When a message with QUEUE_FLUSH arrives we add the caller identity to the List and when a
// message with QUEUE_DESTROY arrives we increment mFlushAll. Then a message is added to the
// handler queue that removes the caller identify from the list and decrements the mFlushAll
// counter. This is so that when a message is processed and the caller identity is in the
// list or mFlushAll is not zero, we know that the message should be flushed.
// It's important that mFlushedObjects is a List and not a Set, and that mFlushAll is an
// int and not a bool. This is because when multiple messages arrive with QUEUE_FLUSH or
// QUEUE_DESTROY, we want to keep flushing messages until we arrive at the last QUEUE_FLUSH
// or QUEUE_DESTROY message.
private List