/* GENERATED SOURCE. DO NOT MODIFY. */ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2003-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ package android.icu.impl; import android.icu.lang.UCharacter; import android.icu.text.StringPrepParseException; import android.icu.text.UTF16; import android.icu.util.ICUInputTooLongException; /** * Ported code from ICU punycode.c * @author ram * @hide Only a subset of ICU is exposed in Android */ public final class Punycode { /* Punycode parameters for Bootstring */ private static final int BASE = 36; private static final int TMIN = 1; private static final int TMAX = 26; private static final int SKEW = 38; private static final int DAMP = 700; private static final int INITIAL_BIAS = 72; private static final int INITIAL_N = 0x80; /* "Basic" Unicode/ASCII code points */ private static final char HYPHEN = 0x2d; private static final char DELIMITER = HYPHEN; private static final int ZERO = 0x30; //private static final int NINE = 0x39; private static final int SMALL_A = 0x61; private static final int SMALL_Z = 0x7a; private static final int CAPITAL_A = 0x41; private static final int CAPITAL_Z = 0x5a; private static int adaptBias(int delta, int length, boolean firstTime){ if(firstTime){ delta /=DAMP; }else{ delta /= 2; } delta += delta/length; int count=0; for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { delta/=(BASE-TMIN); } return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); } /** * @return the numeric value of a basic code point (for use in representing integers) * in the range 0 to BASE-1, or a negative value if cp is invalid. */ private static final int decodeDigit(int cp) { if(cp<='Z') { if(cp<='9') { if(cp<'0') { return -1; } else { return cp-'0'+26; // 0..9 -> 26..35 } } else { return cp-'A'; // A-Z -> 0..25 } } else if(cp<='z') { return cp-'a'; // a..z -> 0..25 } else { return -1; } } ///CLOVER:OFF private static char asciiCaseMap(char b, boolean uppercase) { if(uppercase) { if(SMALL_A<=b && b<=SMALL_Z) { b-=(SMALL_A-CAPITAL_A); } } else { if(CAPITAL_A<=b && b<=CAPITAL_Z) { b+=(SMALL_A-CAPITAL_A); } } return b; } ///CLOVER:ON /** * digitToBasic() returns the basic code point whose value * (when used for representing integers) is d, which must be in the * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is * nonzero, in which case the uppercase form is used. */ private static char digitToBasic(int digit, boolean uppercase) { /* 0..25 map to ASCII a..z or A..Z */ /* 26..35 map to ASCII 0..9 */ if(digit<26) { if(uppercase) { return (char)(CAPITAL_A+digit); } else { return (char)(SMALL_A+digit); } } else { return (char)((ZERO-26)+digit); } } // ICU-13727: Limit input length for n^2 algorithm // where well-formed strings are at most 59 characters long. private static final int ENCODE_MAX_CODE_UNITS = 1000; private static final int DECODE_MAX_CHARS = 2000; /** * Converts Unicode to Punycode. * The input string must not contain single, unpaired surrogates. * The output will be represented as an array of ASCII code points. * * @param src The source of the String Buffer passed. * @param caseFlags The boolean array of case flags. * @return An array of ASCII code points. */ public static StringBuilder encode(CharSequence src, boolean[] caseFlags) throws StringPrepParseException{ int n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, srcCPCount; char c, c2; int srcLength = src.length(); if (srcLength > ENCODE_MAX_CODE_UNITS) { throw new ICUInputTooLongException( "input too long: " + srcLength + " UTF-16 code units"); } int[] cpBuffer = new int[srcLength]; StringBuilder dest = new StringBuilder(srcLength); /* * Handle the basic code points and * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): */ srcCPCount=0; for(j=0; j0) { dest.append(DELIMITER); } /* * handledCPCount is the number of code points that have been handled * basicLength is the number of basic code points * destLength is the number of chars that have been output */ /* Initialize the state: */ n=INITIAL_N; delta=0; bias=INITIAL_BIAS; /* Main encoding loop: */ for(handledCPCount=basicLength; handledCPCount state to , but guard against overflow: */ if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) { throw new IllegalStateException("Internal program error"); } delta+=(m-n)*(handledCPCount+1); n=m; /* Encode a sequence of same code points n */ for(j=0; jTMAX) { t=TMAX; } */ t=k-bias; if(t=(bias+TMAX)) { t=TMAX; } if(q= CAPITAL_Z); } ///CLOVER:ON private static boolean isSurrogate(int ch){ return (((ch)&0xfffff800)==0xd800); } /** * Converts Punycode to Unicode. * The Unicode string will be at most as long as the Punycode string. * * @param src The source of the string buffer being passed. * @param caseFlags The array of boolean case flags. * @return StringBuilder string. */ public static StringBuilder decode(CharSequence src, boolean[] caseFlags) throws StringPrepParseException{ int srcLength = src.length(); if (srcLength > DECODE_MAX_CHARS) { throw new ICUInputTooLongException("input too long: " + srcLength + " characters"); } StringBuilder dest = new StringBuilder(src.length()); int n, i, bias, basicLength, j, in, oldi, w, k, digit, t, destCPCount, firstSupplementaryIndex, cpLength; char b; /* * Handle the basic code points: * Let basicLength be the number of input code points * before the last delimiter, or 0 if there is none, * then copy the first basicLength code points to the output. * * The following loop iterates backward. */ for(j=srcLength; j>0;) { if(src.charAt(--j)==DELIMITER) { break; } } basicLength=destCPCount=j; for(j=0; j0 ? basicLength+1 : 0; in=srcLength) { throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); } digit=decodeDigit(src.charAt(in++)); if(digit<0) { throw new StringPrepParseException("Invalid char found", StringPrepParseException.INVALID_CHAR_FOUND); } if(digit>(0x7fffffff-i)/w) { /* integer overflow */ throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); } i+=digit*w; t=k-bias; if(t=(bias+TMAX)) { t=TMAX; } if(digit0x7fffffff/(BASE-t)) { /* integer overflow */ throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); } w*=BASE-t; } /* * Modification from sample code: * Increments destCPCount here, * where needed instead of in for() loop tail. */ ++destCPCount; bias=adaptBias(i-oldi, destCPCount, (oldi==0)); /* * i was supposed to wrap around from (incremented) destCPCount to 0, * incrementing n each time, so we'll fix that now: */ if(i/destCPCount>(0x7fffffff-n)) { /* integer overflow */ throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); } n+=i/destCPCount; i%=destCPCount; /* not needed for Punycode: */ /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ if(n>0x10ffff || isSurrogate(n)) { /* Unicode code point overflow */ throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); } /* Insert n at position i of the output: */ cpLength=Character.charCount(n); int codeUnitIndex; /* * Handle indexes when supplementary code points are present. * * In almost all cases, there will be only BMP code points before i * and even in the entire string. * This is handled with the same efficiency as with UTF-32. * * Only the rare cases with supplementary code points are handled * more slowly - but not too bad since this is an insertion anyway. */ if(i<=firstSupplementaryIndex) { codeUnitIndex=i; if(cpLength>1) { firstSupplementaryIndex=codeUnitIndex; } else { ++firstSupplementaryIndex; } } else { codeUnitIndex=dest.offsetByCodePoints(firstSupplementaryIndex, i-firstSupplementaryIndex); } /* use the UChar index codeUnitIndex instead of the code point index i */ if(caseFlags!=null && (dest.length()+cpLength)<=caseFlags.length) { if(codeUnitIndex