256 lines
9.9 KiB
Java
256 lines
9.9 KiB
Java
![]() |
/**
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 1996-2006, International Business Machines Corporation and *
|
||
|
* others. All Rights Reserved. *
|
||
|
*******************************************************************************
|
||
|
*
|
||
|
*******************************************************************************
|
||
|
*/
|
||
|
/**
|
||
|
* A JNI interface for ICU converters.
|
||
|
*
|
||
|
*
|
||
|
* @author Ram Viswanadha, IBM
|
||
|
*/
|
||
|
package com.android.icu.charset;
|
||
|
|
||
|
import dalvik.annotation.optimization.ReachabilitySensitive;
|
||
|
import java.nio.ByteBuffer;
|
||
|
import java.nio.CharBuffer;
|
||
|
import java.nio.charset.Charset;
|
||
|
import java.nio.charset.CharsetEncoder;
|
||
|
import java.nio.charset.CoderResult;
|
||
|
import java.nio.charset.CodingErrorAction;
|
||
|
import java.util.HashMap;
|
||
|
import java.util.Map;
|
||
|
|
||
|
final class CharsetEncoderICU extends CharsetEncoder {
|
||
|
private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>();
|
||
|
static {
|
||
|
// ICU has different default replacements to the RI in some cases. There are many
|
||
|
// additional cases, but this covers all the charsets that Java guarantees will be
|
||
|
// available, which is where compatibility seems most important. (The RI even uses
|
||
|
// the byte corresponding to '?' in ASCII as the replacement byte for charsets where that
|
||
|
// byte corresponds to an entirely different character.)
|
||
|
// It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it
|
||
|
// can represent it, but this is what the RI does...
|
||
|
byte[] questionMark = new byte[] { (byte) '?' };
|
||
|
DEFAULT_REPLACEMENTS.put("UTF-8", questionMark);
|
||
|
DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark);
|
||
|
DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark);
|
||
|
}
|
||
|
|
||
|
private static final int INPUT_OFFSET = 0;
|
||
|
private static final int OUTPUT_OFFSET = 1;
|
||
|
private static final int INVALID_CHAR_COUNT = 2;
|
||
|
private static final char[] EMPTY_CHAR_ARRAY = new char[0];
|
||
|
/*
|
||
|
* data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed
|
||
|
* data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written
|
||
|
* data[INVALID_CHARS] = number of invalid chars
|
||
|
*/
|
||
|
private int[] data = new int[3];
|
||
|
|
||
|
/* handle to the ICU converter that is opened */
|
||
|
@ReachabilitySensitive
|
||
|
private final long converterHandle;
|
||
|
|
||
|
private char[] input = null;
|
||
|
private byte[] output = null;
|
||
|
|
||
|
private char[] allocatedInput = null;
|
||
|
private byte[] allocatedOutput = null;
|
||
|
|
||
|
// These instance variables are always assigned in the methods before being used. This class
|
||
|
// is inherently thread-unsafe so we don't have to worry about synchronization.
|
||
|
private int inEnd;
|
||
|
private int outEnd;
|
||
|
|
||
|
public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) {
|
||
|
// This complexity is necessary to ensure that even if the constructor, superclass
|
||
|
// constructor, or call to updateCallback throw, we still free the native peer.
|
||
|
long address = 0;
|
||
|
CharsetEncoderICU result;
|
||
|
try {
|
||
|
address = NativeConverter.openConverter(icuCanonicalName);
|
||
|
float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address);
|
||
|
float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address);
|
||
|
byte[] replacement = makeReplacement(icuCanonicalName, address);
|
||
|
result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address);
|
||
|
} catch (Throwable t) {
|
||
|
if (address != 0) {
|
||
|
NativeConverter.closeConverter(address);
|
||
|
}
|
||
|
throw t;
|
||
|
}
|
||
|
// An exception in registerConverter() will deallocate address:
|
||
|
NativeConverter.registerConverter(result, address);
|
||
|
result.updateCallback();
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
private static byte[] makeReplacement(String icuCanonicalName, long address) {
|
||
|
// We have our own map of RI-compatible default replacements (where ICU disagrees)...
|
||
|
byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName);
|
||
|
if (replacement != null) {
|
||
|
return replacement.clone();
|
||
|
}
|
||
|
// ...but fall back to asking ICU.
|
||
|
return NativeConverter.getSubstitutionBytes(address);
|
||
|
}
|
||
|
|
||
|
private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) {
|
||
|
super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true);
|
||
|
// Our native peer needs to know what just happened...
|
||
|
this.converterHandle = address;
|
||
|
}
|
||
|
|
||
|
@Override protected void implReplaceWith(byte[] newReplacement) {
|
||
|
updateCallback();
|
||
|
}
|
||
|
|
||
|
@Override protected void implOnMalformedInput(CodingErrorAction newAction) {
|
||
|
updateCallback();
|
||
|
}
|
||
|
|
||
|
@Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
|
||
|
updateCallback();
|
||
|
}
|
||
|
|
||
|
private void updateCallback() {
|
||
|
NativeConverter.setCallbackEncode(converterHandle, this);
|
||
|
}
|
||
|
|
||
|
@Override protected void implReset() {
|
||
|
NativeConverter.resetCharToByte(converterHandle);
|
||
|
data[INPUT_OFFSET] = 0;
|
||
|
data[OUTPUT_OFFSET] = 0;
|
||
|
data[INVALID_CHAR_COUNT] = 0;
|
||
|
output = null;
|
||
|
input = null;
|
||
|
allocatedInput = null;
|
||
|
allocatedOutput = null;
|
||
|
inEnd = 0;
|
||
|
outEnd = 0;
|
||
|
}
|
||
|
|
||
|
@Override protected CoderResult implFlush(ByteBuffer out) {
|
||
|
try {
|
||
|
// ICU needs to see an empty input.
|
||
|
input = EMPTY_CHAR_ARRAY;
|
||
|
inEnd = 0;
|
||
|
data[INPUT_OFFSET] = 0;
|
||
|
|
||
|
data[OUTPUT_OFFSET] = getArray(out);
|
||
|
data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
|
||
|
|
||
|
int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true);
|
||
|
if (NativeConverter.U_FAILURE(error)) {
|
||
|
if (error == NativeConverter.U_BUFFER_OVERFLOW_ERROR) {
|
||
|
return CoderResult.OVERFLOW;
|
||
|
} else if (error == NativeConverter.U_TRUNCATED_CHAR_FOUND) {
|
||
|
if (data[INVALID_CHAR_COUNT] > 0) {
|
||
|
return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return CoderResult.UNDERFLOW;
|
||
|
} finally {
|
||
|
setPosition(out);
|
||
|
implReset();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||
|
if (!in.hasRemaining()) {
|
||
|
return CoderResult.UNDERFLOW;
|
||
|
}
|
||
|
|
||
|
data[INPUT_OFFSET] = getArray(in);
|
||
|
data[OUTPUT_OFFSET]= getArray(out);
|
||
|
data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
|
||
|
|
||
|
try {
|
||
|
int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false);
|
||
|
if (NativeConverter.U_FAILURE(error)) {
|
||
|
if (error == NativeConverter.U_BUFFER_OVERFLOW_ERROR) {
|
||
|
return CoderResult.OVERFLOW;
|
||
|
} else if (error == NativeConverter.U_INVALID_CHAR_FOUND) {
|
||
|
return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]);
|
||
|
} else if (error == NativeConverter.U_ILLEGAL_CHAR_FOUND) {
|
||
|
return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
|
||
|
} else {
|
||
|
throw new AssertionError(error);
|
||
|
}
|
||
|
}
|
||
|
// Decoding succeeded: give us more data.
|
||
|
return CoderResult.UNDERFLOW;
|
||
|
} finally {
|
||
|
setPosition(in);
|
||
|
setPosition(out);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private int getArray(ByteBuffer out) {
|
||
|
if (out.hasArray()) {
|
||
|
output = out.array();
|
||
|
outEnd = out.arrayOffset() + out.limit();
|
||
|
return out.arrayOffset() + out.position();
|
||
|
} else {
|
||
|
outEnd = out.remaining();
|
||
|
if (allocatedOutput == null || outEnd > allocatedOutput.length) {
|
||
|
allocatedOutput = new byte[outEnd];
|
||
|
}
|
||
|
// The array's start position is 0
|
||
|
output = allocatedOutput;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private int getArray(CharBuffer in) {
|
||
|
if (in.hasArray()) {
|
||
|
input = in.array();
|
||
|
inEnd = in.arrayOffset() + in.limit();
|
||
|
return in.arrayOffset() + in.position();
|
||
|
} else {
|
||
|
inEnd = in.remaining();
|
||
|
if (allocatedInput == null || inEnd > allocatedInput.length) {
|
||
|
allocatedInput = new char[inEnd];
|
||
|
}
|
||
|
// Copy the input buffer into the allocated array.
|
||
|
int pos = in.position();
|
||
|
in.get(allocatedInput, 0, inEnd);
|
||
|
in.position(pos);
|
||
|
// The array's start position is 0
|
||
|
input = allocatedInput;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void setPosition(ByteBuffer out) {
|
||
|
if (out.hasArray()) {
|
||
|
out.position(data[OUTPUT_OFFSET] - out.arrayOffset());
|
||
|
} else {
|
||
|
out.put(output, 0, data[OUTPUT_OFFSET]);
|
||
|
}
|
||
|
// release reference to output array, which may not be ours
|
||
|
output = null;
|
||
|
}
|
||
|
|
||
|
private void setPosition(CharBuffer in) {
|
||
|
int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT];
|
||
|
if (position < 0) {
|
||
|
// The calculated position might be negative if we encountered an
|
||
|
// invalid char that spanned input buffers. We adjust it to 0 in this case.
|
||
|
//
|
||
|
// NOTE: The API doesn't allow us to adjust the position of the previous
|
||
|
// input buffer. (Doing that wouldn't serve any useful purpose anyway.)
|
||
|
position = 0;
|
||
|
}
|
||
|
|
||
|
in.position(position);
|
||
|
// release reference to input array, which may not be ours
|
||
|
input = null;
|
||
|
}
|
||
|
}
|