A fold(int options,
CharSequence src, A dest, Edits edits) {
try {
if (edits != null) {
edits.reset();
}
internalToLower(-1, options, src, 0, src.length(), null, dest, edits);
return dest;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
private static final class GreekUpper {
// Data bits.
private static final int UPPER_MASK = 0x3ff;
private static final int HAS_VOWEL = 0x1000;
private static final int HAS_YPOGEGRAMMENI = 0x2000;
private static final int HAS_ACCENT = 0x4000;
private static final int HAS_DIALYTIKA = 0x8000;
// Further bits during data building and processing, not stored in the data map.
private static final int HAS_COMBINING_DIALYTIKA = 0x10000;
private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000;
private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
// State bits.
private static final int AFTER_CASED = 1;
private static final int AFTER_VOWEL_WITH_COMBINING_ACCENT = 2;
private static final int AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT = 4;
// Data generated by prototype code, see
// https://icu.unicode.org/design/case/greek-upper
// TODO: Move this data into ucase.icu.
private static final char[] data0370 = {
// U+0370..03FF
0x0370, // Ͱ
0x0370, // ͱ
0x0372, // Ͳ
0x0372, // ͳ
0,
0,
0x0376, // Ͷ
0x0376, // ͷ
0,
0,
0x037A, // ͺ
0x03FD, // ͻ
0x03FE, // ͼ
0x03FF, // ͽ
0,
0x037F, // Ϳ
0,
0,
0,
0,
0,
0,
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά
0,
0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί
0,
0x039F | HAS_VOWEL | HAS_ACCENT, // Ό
0,
0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ
0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ
0x0391 | HAS_VOWEL, // Α
0x0392, // Β
0x0393, // Γ
0x0394, // Δ
0x0395 | HAS_VOWEL, // Ε
0x0396, // Ζ
0x0397 | HAS_VOWEL, // Η
0x0398, // Θ
0x0399 | HAS_VOWEL, // Ι
0x039A, // Κ
0x039B, // Λ
0x039C, // Μ
0x039D, // Ν
0x039E, // Ξ
0x039F | HAS_VOWEL, // Ο
0x03A0, // Π
0x03A1, // Ρ
0,
0x03A3, // Σ
0x03A4, // Τ
0x03A5 | HAS_VOWEL, // Υ
0x03A6, // Φ
0x03A7, // Χ
0x03A8, // Ψ
0x03A9 | HAS_VOWEL, // Ω
0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // Ϊ
0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // Ϋ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ά
0x0395 | HAS_VOWEL | HAS_ACCENT, // έ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ή
0x0399 | HAS_VOWEL | HAS_ACCENT, // ί
0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ
0x0391 | HAS_VOWEL, // α
0x0392, // β
0x0393, // γ
0x0394, // δ
0x0395 | HAS_VOWEL, // ε
0x0396, // ζ
0x0397 | HAS_VOWEL, // η
0x0398, // θ
0x0399 | HAS_VOWEL, // ι
0x039A, // κ
0x039B, // λ
0x039C, // μ
0x039D, // ν
0x039E, // ξ
0x039F | HAS_VOWEL, // ο
0x03A0, // π
0x03A1, // ρ
0x03A3, // ς
0x03A3, // σ
0x03A4, // τ
0x03A5 | HAS_VOWEL, // υ
0x03A6, // φ
0x03A7, // χ
0x03A8, // ψ
0x03A9 | HAS_VOWEL, // ω
0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // ϊ
0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // ϋ
0x039F | HAS_VOWEL | HAS_ACCENT, // ό
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ
0x03CF, // Ϗ
0x0392, // ϐ
0x0398, // ϑ
0x03D2, // ϒ
0x03D2 | HAS_ACCENT, // ϓ
0x03D2 | HAS_DIALYTIKA, // ϔ
0x03A6, // ϕ
0x03A0, // ϖ
0x03CF, // ϗ
0x03D8, // Ϙ
0x03D8, // ϙ
0x03DA, // Ϛ
0x03DA, // ϛ
0x03DC, // Ϝ
0x03DC, // ϝ
0x03DE, // Ϟ
0x03DE, // ϟ
0x03E0, // Ϡ
0x03E0, // ϡ
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0x039A, // ϰ
0x03A1, // ϱ
0x03F9, // ϲ
0x037F, // ϳ
0x03F4, // ϴ
0x0395 | HAS_VOWEL, // ϵ
0,
0x03F7, // Ϸ
0x03F7, // ϸ
0x03F9, // Ϲ
0x03FA, // Ϻ
0x03FA, // ϻ
0x03FC, // ϼ
0x03FD, // Ͻ
0x03FE, // Ͼ
0x03FF, // Ͽ
};
private static final char[] data1F00 = {
// U+1F00..1FFF
0x0391 | HAS_VOWEL, // ἀ
0x0391 | HAS_VOWEL, // ἁ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ἂ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ἃ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ἄ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ἅ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ἆ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ἇ
0x0391 | HAS_VOWEL, // Ἀ
0x0391 | HAS_VOWEL, // Ἁ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἂ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἃ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἄ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἅ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἆ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἇ
0x0395 | HAS_VOWEL, // ἐ
0x0395 | HAS_VOWEL, // ἑ
0x0395 | HAS_VOWEL | HAS_ACCENT, // ἒ
0x0395 | HAS_VOWEL | HAS_ACCENT, // ἓ
0x0395 | HAS_VOWEL | HAS_ACCENT, // ἔ
0x0395 | HAS_VOWEL | HAS_ACCENT, // ἕ
0,
0,
0x0395 | HAS_VOWEL, // Ἐ
0x0395 | HAS_VOWEL, // Ἑ
0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἒ
0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἓ
0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἔ
0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἕ
0,
0,
0x0397 | HAS_VOWEL, // ἠ
0x0397 | HAS_VOWEL, // ἡ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ἢ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ἣ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ἤ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ἥ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ἦ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ἧ
0x0397 | HAS_VOWEL, // Ἠ
0x0397 | HAS_VOWEL, // Ἡ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἢ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἣ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἤ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἥ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἦ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἧ
0x0399 | HAS_VOWEL, // ἰ
0x0399 | HAS_VOWEL, // ἱ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ἲ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ἳ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ἴ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ἵ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ἶ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ἷ
0x0399 | HAS_VOWEL, // Ἰ
0x0399 | HAS_VOWEL, // Ἱ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἲ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἳ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἴ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἵ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἶ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἷ
0x039F | HAS_VOWEL, // ὀ
0x039F | HAS_VOWEL, // ὁ
0x039F | HAS_VOWEL | HAS_ACCENT, // ὂ
0x039F | HAS_VOWEL | HAS_ACCENT, // ὃ
0x039F | HAS_VOWEL | HAS_ACCENT, // ὄ
0x039F | HAS_VOWEL | HAS_ACCENT, // ὅ
0,
0,
0x039F | HAS_VOWEL, // Ὀ
0x039F | HAS_VOWEL, // Ὁ
0x039F | HAS_VOWEL | HAS_ACCENT, // Ὂ
0x039F | HAS_VOWEL | HAS_ACCENT, // Ὃ
0x039F | HAS_VOWEL | HAS_ACCENT, // Ὄ
0x039F | HAS_VOWEL | HAS_ACCENT, // Ὅ
0,
0,
0x03A5 | HAS_VOWEL, // ὐ
0x03A5 | HAS_VOWEL, // ὑ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὒ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὓ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὔ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὕ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὖ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὗ
0,
0x03A5 | HAS_VOWEL, // Ὑ
0,
0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὓ
0,
0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὕ
0,
0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὗ
0x03A9 | HAS_VOWEL, // ὠ
0x03A9 | HAS_VOWEL, // ὡ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὢ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὣ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὤ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὥ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὦ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὧ
0x03A9 | HAS_VOWEL, // Ὠ
0x03A9 | HAS_VOWEL, // Ὡ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὢ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὣ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὤ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὥ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὦ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὧ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ὰ
0x0391 | HAS_VOWEL | HAS_ACCENT, // ά
0x0395 | HAS_VOWEL | HAS_ACCENT, // ὲ
0x0395 | HAS_VOWEL | HAS_ACCENT, // έ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ὴ
0x0397 | HAS_VOWEL | HAS_ACCENT, // ή
0x0399 | HAS_VOWEL | HAS_ACCENT, // ὶ
0x0399 | HAS_VOWEL | HAS_ACCENT, // ί
0x039F | HAS_VOWEL | HAS_ACCENT, // ὸ
0x039F | HAS_VOWEL | HAS_ACCENT, // ό
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὺ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὼ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ
0,
0,
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾀ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾁ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾂ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾃ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾄ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾅ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾆ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾇ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾈ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾉ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾊ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾋ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾌ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾍ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾎ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾏ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾐ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾑ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾒ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾓ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾔ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾕ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾖ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾗ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾘ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾙ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾚ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾛ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾜ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾝ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾞ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾟ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾠ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾡ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾢ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾣ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾤ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾥ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾦ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾧ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾨ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾩ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾪ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾫ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾬ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾭ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾮ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾯ
0x0391 | HAS_VOWEL, // ᾰ
0x0391 | HAS_VOWEL, // ᾱ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾲ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾳ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾴ
0,
0x0391 | HAS_VOWEL | HAS_ACCENT, // ᾶ
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾷ
0x0391 | HAS_VOWEL, // Ᾰ
0x0391 | HAS_VOWEL, // Ᾱ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ὰ
0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά
0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾼ
0,
0x0399 | HAS_VOWEL, // ι
0,
0,
0,
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῂ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῃ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῄ
0,
0x0397 | HAS_VOWEL | HAS_ACCENT, // ῆ
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῇ
0x0395 | HAS_VOWEL | HAS_ACCENT, // Ὲ
0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ὴ
0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή
0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῌ
0,
0,
0,
0x0399 | HAS_VOWEL, // ῐ
0x0399 | HAS_VOWEL, // ῑ
0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῒ
0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ
0,
0,
0x0399 | HAS_VOWEL | HAS_ACCENT, // ῖ
0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῗ
0x0399 | HAS_VOWEL, // Ῐ
0x0399 | HAS_VOWEL, // Ῑ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ὶ
0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί
0,
0,
0,
0,
0x03A5 | HAS_VOWEL, // ῠ
0x03A5 | HAS_VOWEL, // ῡ
0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῢ
0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ
0x03A1, // ῤ
0x03A1, // ῥ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // ῦ
0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῧ
0x03A5 | HAS_VOWEL, // Ῠ
0x03A5 | HAS_VOWEL, // Ῡ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὺ
0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ
0x03A1, // Ῥ
0,
0,
0,
0,
0,
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῲ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῳ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῴ
0,
0x03A9 | HAS_VOWEL | HAS_ACCENT, // ῶ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῷ
0x039F | HAS_VOWEL | HAS_ACCENT, // Ὸ
0x039F | HAS_VOWEL | HAS_ACCENT, // Ό
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὼ
0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ
0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῼ
0,
0,
0,
};
// U+2126 Ohm sign
private static final char data2126 = 0x03A9 | HAS_VOWEL; // Ω
private static final int getLetterData(int c) {
if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
return 0;
} else if (c <= 0x3ff) {
return data0370[c - 0x370];
} else if (c <= 0x1fff) {
return data1F00[c - 0x1f00];
} else if (c == 0x2126) {
return data2126;
} else {
return 0;
}
}
/**
* Returns a non-zero value for each of the Greek combining diacritics
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
* plus some perispomeni look-alikes.
*/
private static final int getDiacriticData(int c) {
switch (c) {
case '\u0300': // varia
case '\u0301': // tonos = oxia
case '\u0342': // perispomeni
case '\u0302': // circumflex can look like perispomeni
case '\u0303': // tilde can look like perispomeni
case '\u0311': // inverted breve can look like perispomeni
return HAS_ACCENT;
case '\u0308': // dialytika = diaeresis
return HAS_COMBINING_DIALYTIKA;
case '\u0344': // dialytika tonos
return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
case '\u0345': // ypogegrammeni = iota subscript
return HAS_YPOGEGRAMMENI;
case '\u0304': // macron
case '\u0306': // breve
case '\u0313': // comma above
case '\u0314': // reversed comma above
case '\u0343': // koronis
return HAS_OTHER_GREEK_DIACRITIC;
default:
return 0;
}
}
private static boolean isFollowedByCasedLetter(CharSequence s, int i) {
while (i < s.length()) {
int c = Character.codePointAt(s, i);
int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
if ((type & UCaseProps.IGNORABLE) != 0) {
// Case-ignorable, continue with the loop.
i += Character.charCount(c);
} else if (type != UCaseProps.NONE) {
return true; // Followed by cased letter.
} else {
return false; // Uncased and not case-ignorable.
}
}
return false; // Not followed by cased letter.
}
/**
* Greek string uppercasing with a state machine.
* Probably simpler than a stateless function that has to figure out complex context-before
* for each character.
* TODO: Try to re-consolidate one way or another with the non-Greek function.
*
* Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
* @throws IOException
*/
private static A toUpper(int options,
CharSequence src, A dest, Edits edits) throws IOException {
int state = 0;
for (int i = 0; i < src.length();) {
int c = Character.codePointAt(src, i);
int nextIndex = i + Character.charCount(c);
int nextState = 0;
int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
if ((type & UCaseProps.IGNORABLE) != 0) {
// c is case-ignorable
nextState |= (state & AFTER_CASED);
} else if (type != UCaseProps.NONE) {
// c is cased
nextState |= AFTER_CASED;
}
int data = getLetterData(c);
if (data > 0) {
int upper = data & UPPER_MASK;
// Add a dialytika to this iota or ypsilon vowel
// if we removed a tonos from the previous vowel,
// and that previous vowel did not also have (or gain) a dialytika.
// Adding one only to the final vowel in a longer sequence
// (which does not occur in normal writing) would require lookahead.
// Set the same flag as for preserving an existing dialytika.
if ((data & HAS_VOWEL) != 0
&& (state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) != 0
&& (upper == 'Ι' || upper == 'Υ')) {
data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) != 0 ? HAS_DIALYTIKA
: HAS_COMBINING_DIALYTIKA;
}
int numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
if ((data & HAS_YPOGEGRAMMENI) != 0) {
numYpogegrammeni = 1;
}
final boolean hasPrecomposedAccent = (data & HAS_ACCENT) != 0;
// Skip combining diacritics after this Greek letter.
while (nextIndex < src.length()) {
int diacriticData = getDiacriticData(src.charAt(nextIndex));
if (diacriticData != 0) {
data |= diacriticData;
if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
++numYpogegrammeni;
}
++nextIndex;
} else {
break; // not a Greek diacritic
}
}
if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT
: AFTER_VOWEL_WITH_COMBINING_ACCENT;
}
// Map according to Greek rules.
boolean addTonos = false;
if (upper == 'Η' &&
(data & HAS_ACCENT) != 0 &&
numYpogegrammeni == 0 &&
(state & AFTER_CASED) == 0 &&
!isFollowedByCasedLetter(src, nextIndex)) {
// Keep disjunctive "or" with (only) a tonos.
// We use the same "word boundary" conditions as for the Final_Sigma test.
if (hasPrecomposedAccent) {
upper = 'Ή'; // Preserve the precomposed form.
} else {
addTonos = true;
}
} else if ((data & HAS_DIALYTIKA) != 0) {
// Preserve a vowel with dialytika in precomposed form if it exists.
if (upper == 'Ι') {
upper = 'Ϊ';
data &= ~HAS_EITHER_DIALYTIKA;
} else if (upper == 'Υ') {
upper = 'Ϋ';
data &= ~HAS_EITHER_DIALYTIKA;
}
}
boolean change;
if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
change = true; // common, simple usage
} else {
// Find out first whether we are changing the text.
change = src.charAt(i) != upper || numYpogegrammeni > 0;
int i2 = i + 1;
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
++i2;
}
if (addTonos) {
change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
++i2;
}
int oldLength = nextIndex - i;
int newLength = (i2 - i) + numYpogegrammeni;
change |= oldLength != newLength;
if (change) {
if (edits != null) {
edits.addReplace(oldLength, newLength);
}
} else {
if (edits != null) {
edits.addUnchanged(oldLength);
}
// Write unchanged text?
change = (options & OMIT_UNCHANGED_TEXT) == 0;
}
}
if (change) {
dest.append((char)upper);
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
dest.append('\u0308'); // restore or add a dialytika
}
if (addTonos) {
dest.append('\u0301');
}
while (numYpogegrammeni > 0) {
dest.append('Ι');
--numYpogegrammeni;
}
}
} else {
c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
appendResult(c, dest, nextIndex - i, options, edits);
}
i = nextIndex;
state = nextState;
}
return dest;
}
}
}