|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
|
|
package sun.nio.cs.ext; |
|
|
|
import java.nio.ByteBuffer; |
|
import java.nio.CharBuffer; |
|
import java.nio.charset.Charset; |
|
import java.nio.charset.CharsetEncoder; |
|
import java.nio.charset.CharsetDecoder; |
|
import java.nio.charset.CoderResult; |
|
import java.security.AccessController; |
|
import java.security.PrivilegedAction; |
|
import java.util.Arrays; |
|
import sun.nio.cs.CharsetMapping; |
|
|
|
/* |
|
* 5 types of entry in SJIS_X_0213/Unicode mapping table |
|
* |
|
* (1)Single-Byte |
|
* JIS_X_0213 does not define single-byte character itself, the |
|
* JIS_X_0201 entries are added in for sjis implementation. |
|
* |
|
* (2)Double-Byte SJIS <-> BMP Unicode |
|
* ex: 0x8140 U+3000 # IDEOGRAPHIC SPACE |
|
* |
|
* (3)Double-Byte SJIS <-> Supplementary |
|
* ex: 0xFCF0 U+2A61A # <cjk> [2000] [Unicode3.1] |
|
* |
|
* (4)Double-Byte SJIS <-> Composite |
|
* ex: 0x83F6 U+31F7+309A # [2000] |
|
* |
|
* (5)"Windows-only" special mapping entries |
|
* are handled by MS932_0213. |
|
*/ |
|
|
|
public class SJIS_0213 extends Charset { |
|
public SJIS_0213() { |
|
super("x-SJIS_0213", ExtendedCharsets.aliasesFor("SJIS_0213")); |
|
} |
|
|
|
public boolean contains(Charset cs) { |
|
return ((cs.name().equals("US-ASCII")) |
|
|| (cs instanceof SJIS) |
|
|| (cs instanceof SJIS_0213)); |
|
} |
|
|
|
public CharsetDecoder newDecoder() { |
|
return new Decoder(this); |
|
} |
|
|
|
public CharsetEncoder newEncoder() { |
|
return new Encoder(this); |
|
} |
|
|
|
static CharsetMapping mapping = AccessController.doPrivileged( |
|
new PrivilegedAction<CharsetMapping>() { |
|
public CharsetMapping run() { |
|
return CharsetMapping.get(SJIS_0213.class.getResourceAsStream("sjis0213.dat")); |
|
} |
|
}); |
|
|
|
protected static class Decoder extends CharsetDecoder { |
|
protected static final char UNMAPPABLE = CharsetMapping.UNMAPPABLE_DECODING; |
|
|
|
protected Decoder(Charset cs) { |
|
super(cs, 0.5f, 1.0f); |
|
} |
|
|
|
private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { |
|
byte[] sa = src.array(); |
|
int sp = src.arrayOffset() + src.position(); |
|
int sl = src.arrayOffset() + src.limit(); |
|
|
|
char[] da = dst.array(); |
|
int dp = dst.arrayOffset() + dst.position(); |
|
int dl = dst.arrayOffset() + dst.limit(); |
|
|
|
try { |
|
while (sp < sl) { |
|
int b1 = sa[sp] & 0xff; |
|
char c = decodeSingle(b1); |
|
int inSize = 1, outSize = 1; |
|
char[] cc = null; |
|
if (c == UNMAPPABLE) { |
|
if (sl - sp < 2) |
|
return CoderResult.UNDERFLOW; |
|
int b2 = sa[sp + 1] & 0xff; |
|
c = decodeDouble(b1, b2); |
|
inSize++; |
|
if (c == UNMAPPABLE) { |
|
cc = decodeDoubleEx(b1, b2); |
|
if (cc == null) { |
|
if (decodeSingle(b2) == UNMAPPABLE) |
|
return CoderResult.unmappableForLength(2); |
|
else |
|
return CoderResult.unmappableForLength(1); |
|
} |
|
outSize++; |
|
} |
|
} |
|
if (dl - dp < outSize) |
|
return CoderResult.OVERFLOW; |
|
if (outSize == 2) { |
|
da[dp++] = cc[0]; |
|
da[dp++] = cc[1]; |
|
} else { |
|
da[dp++] = c; |
|
} |
|
sp += inSize; |
|
} |
|
return CoderResult.UNDERFLOW; |
|
} finally { |
|
src.position(sp - src.arrayOffset()); |
|
dst.position(dp - dst.arrayOffset()); |
|
} |
|
} |
|
|
|
private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { |
|
int mark = src.position(); |
|
try { |
|
while (src.hasRemaining()) { |
|
char[] cc = null; |
|
int b1 = src.get() & 0xff; |
|
char c = decodeSingle(b1); |
|
int inSize = 1, outSize = 1; |
|
if (c == UNMAPPABLE) { |
|
if (src.remaining() < 1) |
|
return CoderResult.UNDERFLOW; |
|
int b2 = src.get() & 0xff; |
|
inSize++; |
|
c = decodeDouble(b1, b2); |
|
if (c == UNMAPPABLE) { |
|
cc = decodeDoubleEx(b1, b2); |
|
if (cc == null) { |
|
if (decodeSingle(b2) == UNMAPPABLE) |
|
return CoderResult.unmappableForLength(2); |
|
else |
|
return CoderResult.unmappableForLength(1); |
|
} |
|
outSize++; |
|
} |
|
} |
|
if (dst.remaining() < outSize) |
|
return CoderResult.OVERFLOW; |
|
if (outSize == 2) { |
|
dst.put(cc[0]); |
|
dst.put(cc[1]); |
|
} else { |
|
dst.put(c); |
|
} |
|
mark += inSize; |
|
} |
|
return CoderResult.UNDERFLOW; |
|
} finally { |
|
src.position(mark); |
|
} |
|
} |
|
|
|
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { |
|
if (src.hasArray() && dst.hasArray()) |
|
return decodeArrayLoop(src, dst); |
|
else |
|
return decodeBufferLoop(src, dst); |
|
} |
|
|
|
protected char decodeSingle(int b) { |
|
return mapping.decodeSingle(b); |
|
} |
|
|
|
protected char decodeDouble(int b1, int b2) { |
|
return mapping.decodeDouble(b1, b2); |
|
} |
|
|
|
private char[] cc = new char[2]; |
|
private CharsetMapping.Entry comp = new CharsetMapping.Entry(); |
|
protected char[] decodeDoubleEx(int b1, int b2) { |
|
int db = (b1 << 8) | b2; |
|
if (mapping.decodeSurrogate(db, cc) != null) |
|
return cc; |
|
comp.bs = db; |
|
if (mapping.decodeComposite(comp, cc) != null) |
|
return cc; |
|
return null; |
|
} |
|
} |
|
|
|
protected static class Encoder extends CharsetEncoder { |
|
protected static final int UNMAPPABLE = CharsetMapping.UNMAPPABLE_ENCODING; |
|
protected static final int MAX_SINGLEBYTE = 0xff; |
|
|
|
protected Encoder(Charset cs) { |
|
super(cs, 2.0f, 2.0f); |
|
} |
|
|
|
public boolean canEncode(char c) { |
|
return (encodeChar(c) != UNMAPPABLE); |
|
} |
|
|
|
protected int encodeChar(char ch) { |
|
return mapping.encodeChar(ch); |
|
} |
|
|
|
protected int encodeSurrogate(char hi, char lo) { |
|
return mapping.encodeSurrogate(hi, lo); |
|
} |
|
|
|
private CharsetMapping.Entry comp = new CharsetMapping.Entry(); |
|
protected int encodeComposite(char base, char cc) { |
|
comp.cp = base; |
|
comp.cp2 = cc; |
|
return mapping.encodeComposite(comp); |
|
} |
|
|
|
protected boolean isCompositeBase(char ch) { |
|
comp.cp = ch; |
|
return mapping.isCompositeBase(comp); |
|
} |
|
|
|
// Unlike surrogate pair, the base character of a base+cc composite |
|
// itself is a legal codepoint in 0213, if we simply return UNDERFLOW |
|
// when a base candidate is the last input char in the CharBuffer, like |
|
// what we do for the surrogte pair, encoding will fail if this base |
|
// character is indeed the last character of the input char sequence. |
|
// Keep this base candidate in "leftoverBase" so we can flush it out |
|
|
|
char leftoverBase = 0; |
|
protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { |
|
char[] sa = src.array(); |
|
int sp = src.arrayOffset() + src.position(); |
|
int sl = src.arrayOffset() + src.limit(); |
|
byte[] da = dst.array(); |
|
int dp = dst.arrayOffset() + dst.position(); |
|
int dl = dst.arrayOffset() + dst.limit(); |
|
|
|
try { |
|
while (sp < sl) { |
|
int db; |
|
char c = sa[sp]; |
|
if (leftoverBase != 0) { |
|
boolean isComp = false; |
|
db = encodeComposite(leftoverBase, c); |
|
if (db == UNMAPPABLE) |
|
db = encodeChar(leftoverBase); |
|
else |
|
isComp = true; |
|
if (dl - dp < 2) |
|
return CoderResult.OVERFLOW; |
|
da[dp++] = (byte)(db >> 8); |
|
da[dp++] = (byte)db; |
|
leftoverBase = 0; |
|
if (isComp) { |
|
sp++; |
|
continue; |
|
} |
|
} |
|
if (isCompositeBase(c)) { |
|
leftoverBase = c; |
|
} else { |
|
db = encodeChar(c); |
|
if (db <= MAX_SINGLEBYTE) { |
|
if (dl <= dp) |
|
return CoderResult.OVERFLOW; |
|
da[dp++] = (byte)db; |
|
} else if (db != UNMAPPABLE) { |
|
if (dl - dp < 2) |
|
return CoderResult.OVERFLOW; |
|
da[dp++] = (byte)(db >> 8); |
|
da[dp++] = (byte)db; |
|
} else if (Character.isHighSurrogate(c)) { |
|
if ((sp + 1) == sl) |
|
return CoderResult.UNDERFLOW; |
|
char c2 = sa[sp + 1]; |
|
if (!Character.isLowSurrogate(c2)) |
|
return CoderResult.malformedForLength(1); |
|
db = encodeSurrogate(c, c2); |
|
if (db == UNMAPPABLE) |
|
return CoderResult.unmappableForLength(2); |
|
if (dl - dp < 2) |
|
return CoderResult.OVERFLOW; |
|
da[dp++] = (byte)(db >> 8); |
|
da[dp++] = (byte)db; |
|
sp++; |
|
} else if (Character.isLowSurrogate(c)) { |
|
return CoderResult.malformedForLength(1); |
|
} else { |
|
return CoderResult.unmappableForLength(1); |
|
} |
|
} |
|
sp++; |
|
} |
|
return CoderResult.UNDERFLOW; |
|
} finally { |
|
src.position(sp - src.arrayOffset()); |
|
dst.position(dp - dst.arrayOffset()); |
|
} |
|
} |
|
|
|
protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { |
|
int mark = src.position(); |
|
try { |
|
while (src.hasRemaining()) { |
|
int db; |
|
char c = src.get(); |
|
if (leftoverBase != 0) { |
|
boolean isComp = false; |
|
db = encodeComposite(leftoverBase, c); |
|
if (db == UNMAPPABLE) |
|
db = encodeChar(leftoverBase); |
|
else |
|
isComp = true; |
|
if (dst.remaining() < 2) |
|
return CoderResult.OVERFLOW; |
|
dst.put((byte)(db >> 8)); |
|
dst.put((byte)(db)); |
|
leftoverBase = 0; |
|
if (isComp) { |
|
mark++; |
|
continue; |
|
} |
|
} |
|
if (isCompositeBase(c)) { |
|
leftoverBase = c; |
|
} else { |
|
db = encodeChar(c); |
|
if (db <= MAX_SINGLEBYTE) { |
|
if (dst.remaining() < 1) |
|
return CoderResult.OVERFLOW; |
|
dst.put((byte)db); |
|
} else if (db != UNMAPPABLE) { |
|
if (dst.remaining() < 2) |
|
return CoderResult.OVERFLOW; |
|
dst.put((byte)(db >> 8)); |
|
dst.put((byte)(db)); |
|
} else if (Character.isHighSurrogate(c)) { |
|
if (!src.hasRemaining()) |
|
return CoderResult.UNDERFLOW; |
|
char c2 = src.get(); |
|
if (!Character.isLowSurrogate(c2)) |
|
return CoderResult.malformedForLength(1); |
|
db = encodeSurrogate(c, c2); |
|
if (db == UNMAPPABLE) |
|
return CoderResult.unmappableForLength(2); |
|
if (dst.remaining() < 2) |
|
return CoderResult.OVERFLOW; |
|
dst.put((byte)(db >> 8)); |
|
dst.put((byte)(db)); |
|
mark++; |
|
} else if (Character.isLowSurrogate(c)) { |
|
return CoderResult.malformedForLength(1); |
|
} else { |
|
return CoderResult.unmappableForLength(1); |
|
} |
|
} |
|
mark++; |
|
} |
|
return CoderResult.UNDERFLOW; |
|
} finally { |
|
src.position(mark); |
|
} |
|
} |
|
|
|
protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { |
|
if (src.hasArray() && dst.hasArray()) |
|
return encodeArrayLoop(src, dst); |
|
else |
|
return encodeBufferLoop(src, dst); |
|
} |
|
|
|
protected CoderResult implFlush(ByteBuffer dst) { |
|
if (leftoverBase > 0) { |
|
if (dst.remaining() < 2) |
|
return CoderResult.OVERFLOW; |
|
int db = encodeChar(leftoverBase); |
|
dst.put((byte)(db >> 8)); |
|
dst.put((byte)(db)); |
|
leftoverBase = 0; |
|
} |
|
return CoderResult.UNDERFLOW; |
|
} |
|
|
|
protected void implReset() { |
|
leftoverBase = 0; |
|
} |
|
} |
|
} |