319 lines
10 KiB
Java
319 lines
10 KiB
Java
/*
|
|
* Copyright (C) 2011 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
* use this file except in compliance with the License. You may obtain a copy of
|
|
* the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations under
|
|
* the License.
|
|
*/
|
|
package android.speech.tts;
|
|
|
|
import android.annotation.NonNull;
|
|
import android.media.AudioFormat;
|
|
import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
|
|
import android.util.Log;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.ByteBuffer;
|
|
import java.nio.ByteOrder;
|
|
import java.nio.channels.FileChannel;
|
|
import java.util.Arrays;
|
|
|
|
/**
|
|
* Speech synthesis request that writes the audio to a WAV file.
|
|
*/
|
|
class FileSynthesisCallback extends AbstractSynthesisCallback {
|
|
|
|
private static final String TAG = "FileSynthesisRequest";
|
|
private static final boolean DBG = false;
|
|
|
|
private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
|
|
|
|
private static final int WAV_HEADER_LENGTH = 44;
|
|
private static final short WAV_FORMAT_PCM = 0x0001;
|
|
|
|
private final Object mStateLock = new Object();
|
|
|
|
private int mSampleRateInHz;
|
|
private int mAudioFormat;
|
|
private int mChannelCount;
|
|
|
|
private FileChannel mFileChannel;
|
|
|
|
private final UtteranceProgressDispatcher mDispatcher;
|
|
|
|
private boolean mStarted = false;
|
|
private boolean mDone = false;
|
|
|
|
/** Status code of synthesis */
|
|
protected int mStatusCode;
|
|
|
|
FileSynthesisCallback(@NonNull FileChannel fileChannel,
|
|
@NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2) {
|
|
super(clientIsUsingV2);
|
|
mFileChannel = fileChannel;
|
|
mDispatcher = dispatcher;
|
|
mStatusCode = TextToSpeech.SUCCESS;
|
|
}
|
|
|
|
@Override
|
|
void stop() {
|
|
synchronized (mStateLock) {
|
|
if (mDone) {
|
|
return;
|
|
}
|
|
if (mStatusCode == TextToSpeech.STOPPED) {
|
|
return;
|
|
}
|
|
|
|
mStatusCode = TextToSpeech.STOPPED;
|
|
cleanUp();
|
|
mDispatcher.dispatchOnStop();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Must be called while holding the monitor on {@link #mStateLock}.
|
|
*/
|
|
private void cleanUp() {
|
|
closeFile();
|
|
}
|
|
|
|
/**
|
|
* Must be called while holding the monitor on {@link #mStateLock}.
|
|
*/
|
|
private void closeFile() {
|
|
// File will be closed by the SpeechItem in the speech service.
|
|
mFileChannel = null;
|
|
}
|
|
|
|
@Override
|
|
public int getMaxBufferSize() {
|
|
return MAX_AUDIO_BUFFER_SIZE;
|
|
}
|
|
|
|
@Override
|
|
public int start(int sampleRateInHz, int audioFormat, int channelCount) {
|
|
if (DBG) {
|
|
Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
|
|
+ "," + channelCount + ")");
|
|
}
|
|
if (audioFormat != AudioFormat.ENCODING_PCM_8BIT &&
|
|
audioFormat != AudioFormat.ENCODING_PCM_16BIT &&
|
|
audioFormat != AudioFormat.ENCODING_PCM_FLOAT) {
|
|
Log.e(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " +
|
|
"of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " +
|
|
"AudioFormat.ENCODING_PCM_FLOAT");
|
|
}
|
|
mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount);
|
|
|
|
FileChannel fileChannel = null;
|
|
synchronized (mStateLock) {
|
|
if (mStatusCode == TextToSpeech.STOPPED) {
|
|
if (DBG) Log.d(TAG, "Request has been aborted.");
|
|
return errorCodeOnStop();
|
|
}
|
|
if (mStatusCode != TextToSpeech.SUCCESS) {
|
|
if (DBG) Log.d(TAG, "Error was raised");
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
if (mStarted) {
|
|
Log.e(TAG, "Start called twice");
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
mStarted = true;
|
|
mSampleRateInHz = sampleRateInHz;
|
|
mAudioFormat = audioFormat;
|
|
mChannelCount = channelCount;
|
|
|
|
mDispatcher.dispatchOnStart();
|
|
fileChannel = mFileChannel;
|
|
}
|
|
|
|
try {
|
|
fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
|
|
return TextToSpeech.SUCCESS;
|
|
} catch (IOException ex) {
|
|
Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
|
|
synchronized (mStateLock) {
|
|
cleanUp();
|
|
mStatusCode = TextToSpeech.ERROR_OUTPUT;
|
|
}
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public int audioAvailable(byte[] buffer, int offset, int length) {
|
|
if (DBG) {
|
|
Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + Arrays.toString(buffer)
|
|
+ "," + offset + "," + length + ")");
|
|
}
|
|
FileChannel fileChannel = null;
|
|
synchronized (mStateLock) {
|
|
if (mStatusCode == TextToSpeech.STOPPED) {
|
|
if (DBG) Log.d(TAG, "Request has been aborted.");
|
|
return errorCodeOnStop();
|
|
}
|
|
if (mStatusCode != TextToSpeech.SUCCESS) {
|
|
if (DBG) Log.d(TAG, "Error was raised");
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
if (mFileChannel == null) {
|
|
Log.e(TAG, "File not open");
|
|
mStatusCode = TextToSpeech.ERROR_OUTPUT;
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
if (!mStarted) {
|
|
Log.e(TAG, "Start method was not called");
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
fileChannel = mFileChannel;
|
|
}
|
|
|
|
final byte[] bufferCopy = new byte[length];
|
|
System.arraycopy(buffer, offset, bufferCopy, 0, length);
|
|
mDispatcher.dispatchOnAudioAvailable(bufferCopy);
|
|
|
|
try {
|
|
fileChannel.write(ByteBuffer.wrap(buffer, offset, length));
|
|
return TextToSpeech.SUCCESS;
|
|
} catch (IOException ex) {
|
|
Log.e(TAG, "Failed to write to output file descriptor", ex);
|
|
synchronized (mStateLock) {
|
|
cleanUp();
|
|
mStatusCode = TextToSpeech.ERROR_OUTPUT;
|
|
}
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public int done() {
|
|
if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
|
|
FileChannel fileChannel = null;
|
|
|
|
int sampleRateInHz = 0;
|
|
int audioFormat = 0;
|
|
int channelCount = 0;
|
|
|
|
synchronized (mStateLock) {
|
|
if (mDone) {
|
|
Log.w(TAG, "Duplicate call to done()");
|
|
// This is not an error that would prevent synthesis. Hence no
|
|
// setStatusCode is set.
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
if (mStatusCode == TextToSpeech.STOPPED) {
|
|
if (DBG) Log.d(TAG, "Request has been aborted.");
|
|
return errorCodeOnStop();
|
|
}
|
|
if (mStatusCode != TextToSpeech.SUCCESS && mStatusCode != TextToSpeech.STOPPED) {
|
|
mDispatcher.dispatchOnError(mStatusCode);
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
if (mFileChannel == null) {
|
|
Log.e(TAG, "File not open");
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
mDone = true;
|
|
fileChannel = mFileChannel;
|
|
sampleRateInHz = mSampleRateInHz;
|
|
audioFormat = mAudioFormat;
|
|
channelCount = mChannelCount;
|
|
}
|
|
|
|
try {
|
|
// Write WAV header at start of file
|
|
fileChannel.position(0);
|
|
int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
|
|
fileChannel.write(
|
|
makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
|
|
|
|
synchronized (mStateLock) {
|
|
closeFile();
|
|
mDispatcher.dispatchOnSuccess();
|
|
return TextToSpeech.SUCCESS;
|
|
}
|
|
} catch (IOException ex) {
|
|
Log.e(TAG, "Failed to write to output file descriptor", ex);
|
|
synchronized (mStateLock) {
|
|
cleanUp();
|
|
}
|
|
return TextToSpeech.ERROR;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void error() {
|
|
error(TextToSpeech.ERROR_SYNTHESIS);
|
|
}
|
|
|
|
@Override
|
|
public void error(int errorCode) {
|
|
if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
|
|
synchronized (mStateLock) {
|
|
if (mDone) {
|
|
return;
|
|
}
|
|
cleanUp();
|
|
mStatusCode = errorCode;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public boolean hasStarted() {
|
|
synchronized (mStateLock) {
|
|
return mStarted;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public boolean hasFinished() {
|
|
synchronized (mStateLock) {
|
|
return mDone;
|
|
}
|
|
}
|
|
|
|
private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
|
|
int dataLength) {
|
|
int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat);
|
|
int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
|
|
short blockAlign = (short) (sampleSizeInBytes * channelCount);
|
|
short bitsPerSample = (short) (sampleSizeInBytes * 8);
|
|
|
|
byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
|
|
ByteBuffer header = ByteBuffer.wrap(headerBuf);
|
|
header.order(ByteOrder.LITTLE_ENDIAN);
|
|
|
|
header.put(new byte[]{ 'R', 'I', 'F', 'F' });
|
|
header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size
|
|
header.put(new byte[]{ 'W', 'A', 'V', 'E' });
|
|
header.put(new byte[]{ 'f', 'm', 't', ' ' });
|
|
header.putInt(16); // size of fmt chunk
|
|
header.putShort(WAV_FORMAT_PCM);
|
|
header.putShort((short) channelCount);
|
|
header.putInt(sampleRateInHz);
|
|
header.putInt(byteRate);
|
|
header.putShort(blockAlign);
|
|
header.putShort(bitsPerSample);
|
|
header.put(new byte[]{ 'd', 'a', 't', 'a' });
|
|
header.putInt(dataLength);
|
|
header.flip();
|
|
|
|
return header;
|
|
}
|
|
|
|
@Override
|
|
public void rangeStart(int markerInFrames, int start, int end) {
|
|
mDispatcher.dispatchOnRangeStart(markerInFrames, start, end);
|
|
}
|
|
}
|