|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
/* |
|
******************************************************************************* |
|
* Copyright (C) 1996-2011, International Business Machines Corporation and * |
|
* others. All Rights Reserved. * |
|
******************************************************************************* |
|
*/ |
|
|
|
package sun.text.normalizer; |
|
|
|
import java.io.IOException; |
|
import java.util.Locale; |
|
|
|
final class Utility { |
|
|
|
|
|
|
|
|
|
*/ |
|
public static final String escape(String s) { |
|
StringBuilder buf = new StringBuilder(); |
|
for (int i=0; i<s.length(); ) { |
|
int c = Character.codePointAt(s, i); |
|
i += UTF16.getCharCount(c); |
|
if (c >= ' ' && c <= 0x007F) { |
|
if (c == '\\') { |
|
buf.append("\\\\"); |
|
} else { |
|
buf.append((char)c); |
|
} |
|
} else { |
|
boolean four = c <= 0xFFFF; |
|
buf.append(four ? "\\u" : "\\U"); |
|
buf.append(hex(c, four ? 4 : 8)); |
|
} |
|
} |
|
return buf.toString(); |
|
} |
|
|
|
|
|
private static final char[] UNESCAPE_MAP = { |
|
/*" 0x22, 0x22 */ |
|
/*' 0x27, 0x27 */ |
|
/*? 0x3F, 0x3F */ |
|
/*\ 0x5C, 0x5C */ |
|
0x61, 0x07, |
|
0x62, 0x08, |
|
0x65, 0x1b, |
|
0x66, 0x0c, |
|
0x6E, 0x0a, |
|
0x72, 0x0d, |
|
0x74, 0x09, |
|
0x76, 0x0b |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static int unescapeAt(String s, int[] offset16) { |
|
int c; |
|
int result = 0; |
|
int n = 0; |
|
int minDig = 0; |
|
int maxDig = 0; |
|
int bitsPerDigit = 4; |
|
int dig; |
|
int i; |
|
boolean braces = false; |
|
|
|
|
|
int offset = offset16[0]; |
|
int length = s.length(); |
|
if (offset < 0 || offset >= length) { |
|
return -1; |
|
} |
|
|
|
|
|
c = Character.codePointAt(s, offset); |
|
offset += UTF16.getCharCount(c); |
|
|
|
|
|
switch (c) { |
|
case 'u': |
|
minDig = maxDig = 4; |
|
break; |
|
case 'U': |
|
minDig = maxDig = 8; |
|
break; |
|
case 'x': |
|
minDig = 1; |
|
if (offset < length && UTF16.charAt(s, offset) == 0x7B ) { |
|
++offset; |
|
braces = true; |
|
maxDig = 8; |
|
} else { |
|
maxDig = 2; |
|
} |
|
break; |
|
default: |
|
dig = UCharacter.digit(c, 8); |
|
if (dig >= 0) { |
|
minDig = 1; |
|
maxDig = 3; |
|
n = 1; |
|
bitsPerDigit = 3; |
|
result = dig; |
|
} |
|
break; |
|
} |
|
if (minDig != 0) { |
|
while (offset < length && n < maxDig) { |
|
c = UTF16.charAt(s, offset); |
|
dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); |
|
if (dig < 0) { |
|
break; |
|
} |
|
result = (result << bitsPerDigit) | dig; |
|
offset += UTF16.getCharCount(c); |
|
++n; |
|
} |
|
if (n < minDig) { |
|
return -1; |
|
} |
|
if (braces) { |
|
if (c != 0x7D ) { |
|
return -1; |
|
} |
|
++offset; |
|
} |
|
if (result < 0 || result >= 0x110000) { |
|
return -1; |
|
} |
|
// If an escape sequence specifies a lead surrogate, see |
|
// if there is a trail surrogate after it, either as an |
|
// escape or as a literal. If so, join them up into a |
|
|
|
if (offset < length && |
|
UTF16.isLeadSurrogate((char) result)) { |
|
int ahead = offset+1; |
|
c = s.charAt(offset); |
|
if (c == '\\' && ahead < length) { |
|
int o[] = new int[] { ahead }; |
|
c = unescapeAt(s, o); |
|
ahead = o[0]; |
|
} |
|
if (UTF16.isTrailSurrogate((char) c)) { |
|
offset = ahead; |
|
result = UCharacterProperty.getRawSupplementary( |
|
(char) result, (char) c); |
|
} |
|
} |
|
offset16[0] = offset; |
|
return result; |
|
} |
|
|
|
|
|
for (i=0; i<UNESCAPE_MAP.length; i+=2) { |
|
if (c == UNESCAPE_MAP[i]) { |
|
offset16[0] = offset; |
|
return UNESCAPE_MAP[i+1]; |
|
} else if (c < UNESCAPE_MAP[i]) { |
|
break; |
|
} |
|
} |
|
|
|
|
|
if (c == 'c' && offset < length) { |
|
c = UTF16.charAt(s, offset); |
|
offset16[0] = offset + UTF16.getCharCount(c); |
|
return 0x1F & c; |
|
} |
|
|
|
|
|
* the backslash to generically escape the next character. */ |
|
offset16[0] = offset; |
|
return c; |
|
} |
|
|
|
|
|
|
|
*/ |
|
public static String hex(long i, int places) { |
|
if (i == Long.MIN_VALUE) return "-8000000000000000"; |
|
boolean negative = i < 0; |
|
if (negative) { |
|
i = -i; |
|
} |
|
String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH); |
|
if (result.length() < places) { |
|
result = "0000000000000000".substring(result.length(),places) + result; |
|
} |
|
if (negative) { |
|
return '-' + result; |
|
} |
|
return result; |
|
} |
|
|
|
static final char DIGITS[] = { |
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
|
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', |
|
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', |
|
'U', 'V', 'W', 'X', 'Y', 'Z' |
|
}; |
|
|
|
|
|
|
|
|
|
*/ |
|
public static boolean isUnprintable(int c) { |
|
|
|
return !(c >= 0x20 && c <= 0x7E); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static <T extends Appendable> boolean escapeUnprintable(T result, int c) { |
|
try { |
|
if (isUnprintable(c)) { |
|
result.append('\\'); |
|
if ((c & ~0xFFFF) != 0) { |
|
result.append('U'); |
|
result.append(DIGITS[0xF&(c>>28)]); |
|
result.append(DIGITS[0xF&(c>>24)]); |
|
result.append(DIGITS[0xF&(c>>20)]); |
|
result.append(DIGITS[0xF&(c>>16)]); |
|
} else { |
|
result.append('u'); |
|
} |
|
result.append(DIGITS[0xF&(c>>12)]); |
|
result.append(DIGITS[0xF&(c>>8)]); |
|
result.append(DIGITS[0xF&(c>>4)]); |
|
result.append(DIGITS[0xF&c]); |
|
return true; |
|
} |
|
return false; |
|
} catch (IOException e) { |
|
throw new IllegalArgumentException(e); |
|
} |
|
} |
|
} |