Back to index...

	/*
	* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	package java.lang;

	import java.io.UnsupportedEncodingException;
	import java.lang.ref.SoftReference;
	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetDecoder;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CharacterCodingException;
	import java.nio.charset.CoderResult;
	import java.nio.charset.CodingErrorAction;
	import java.nio.charset.IllegalCharsetNameException;
	import java.nio.charset.MalformedInputException;
	import java.nio.charset.UnmappableCharacterException;
	import java.nio.charset.UnsupportedCharsetException;
	import java.util.Arrays;
	import jdk.internal.HotSpotIntrinsicCandidate;
	import sun.nio.cs.HistoricallyNamedCharset;
	import sun.nio.cs.ArrayDecoder;
	import sun.nio.cs.ArrayEncoder;

	import static java.lang.String.LATIN1;
	import static java.lang.String.UTF16;
	import static java.lang.String.COMPACT_STRINGS;
	import static java.lang.Character.isSurrogate;
	import static java.lang.Character.highSurrogate;
	import static java.lang.Character.lowSurrogate;
	import static java.lang.Character.isSupplementaryCodePoint;
	import static java.lang.StringUTF16.putChar;

	/**
	* Utility class for string encoding and decoding.
	*/

	class StringCoding {

	private StringCoding() { }

	/** The cached coders for each thread */
	private static final ThreadLocal<SoftReference<StringDecoder>> decoder =
	new ThreadLocal<>();
	private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
	new ThreadLocal<>();

	private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
	private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
	private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;

	private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
	SoftReference<T> sr = tl.get();
	if (sr == null)
	return null;
	return sr.get();
	}

	private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
	tl.set(new SoftReference<>(ob));
	}

	// Trim the given byte array to the given length
	private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
	if (len == ba.length && (isTrusted \|\| System.getSecurityManager() == null))
	return ba;
	else
	return Arrays.copyOf(ba, len);
	}

	private static int scale(int len, float expansionFactor) {
	// We need to perform double, not float, arithmetic; otherwise
	// we lose low order bits when len is larger than 2**24.
	return (int)(len * (double)expansionFactor);
	}

	private static Charset lookupCharset(String csn) {
	if (Charset.isSupported(csn)) {
	try {
	return Charset.forName(csn);
	} catch (UnsupportedCharsetException x) {
	throw new Error(x);
	}
	}
	return null;
	}

	static class Result {
	byte[] value;
	byte coder;

	Result with() {
	coder = COMPACT_STRINGS ? LATIN1 : UTF16;
	value = new byte[0];
	return this;
	}

	Result with(char[] val, int off, int len) {
	if (String.COMPACT_STRINGS) {
	byte[] bs = StringUTF16.compress(val, off, len);
	if (bs != null) {
	value = bs;
	coder = LATIN1;
	return this;
	}
	}
	coder = UTF16;
	value = StringUTF16.toBytes(val, off, len);
	return this;
	}

	Result with(byte[] val, byte coder) {
	this.coder = coder;
	value = val;
	return this;
	}
	}

	@HotSpotIntrinsicCandidate
	public static boolean hasNegatives(byte[] ba, int off, int len) {
	for (int i = off; i < off + len; i++) {
	if (ba[i] < 0) {
	return true;
	}
	}
	return false;
	}

	// -- Decoding --
	static class StringDecoder {
	private final String requestedCharsetName;
	private final Charset cs;
	private final boolean isASCIICompatible;
	private final CharsetDecoder cd;
	protected final Result result;

	StringDecoder(Charset cs, String rcn) {
	this.requestedCharsetName = rcn;
	this.cs = cs;
	this.cd = cs.newDecoder()
	.onMalformedInput(CodingErrorAction.REPLACE)
	.onUnmappableCharacter(CodingErrorAction.REPLACE);
	this.result = new Result();
	this.isASCIICompatible = (cd instanceof ArrayDecoder) &&
	((ArrayDecoder)cd).isASCIICompatible();
	}

	String charsetName() {
	if (cs instanceof HistoricallyNamedCharset)
	return ((HistoricallyNamedCharset)cs).historicalName();
	return cs.name();
	}

	final String requestedCharsetName() {
	return requestedCharsetName;
	}

	Result decode(byte[] ba, int off, int len) {
	if (len == 0) {
	return result.with();
	}
	// fastpath for ascii compatible
	if (isASCIICompatible && !hasNegatives(ba, off, len)) {
	if (COMPACT_STRINGS) {
	return result.with(Arrays.copyOfRange(ba, off, off + len),
	LATIN1);
	} else {
	return result.with(StringLatin1.inflate(ba, off, len), UTF16);
	}
	}
	int en = scale(len, cd.maxCharsPerByte());
	char[] ca = new char[en];
	if (cd instanceof ArrayDecoder) {
	int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
	return result.with(ca, 0, clen);
	}
	cd.reset();
	ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
	CharBuffer cb = CharBuffer.wrap(ca);
	try {
	CoderResult cr = cd.decode(bb, cb, true);
	if (!cr.isUnderflow())
	cr.throwException();
	cr = cd.flush(cb);
	if (!cr.isUnderflow())
	cr.throwException();
	} catch (CharacterCodingException x) {
	// Substitution is always enabled,
	// so this shouldn't happen
	throw new Error(x);
	}
	return result.with(ca, 0, cb.position());
	}
	}

	static Result decode(String charsetName, byte[] ba, int off, int len)
	throws UnsupportedEncodingException
	{
	StringDecoder sd = deref(decoder);
	String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
	if ((sd == null) \|\| !(csn.equals(sd.requestedCharsetName())
	\|\| csn.equals(sd.charsetName()))) {
	sd = null;
	try {
	Charset cs = lookupCharset(csn);
	if (cs != null) {
	if (cs == UTF_8) {
	return decodeUTF8(ba, off, len, true);
	}
	if (cs == ISO_8859_1) {
	return decodeLatin1(ba, off, len);
	}
	if (cs == US_ASCII) {
	return decodeASCII(ba, off, len);
	}
	sd = new StringDecoder(cs, csn);
	}
	} catch (IllegalCharsetNameException x) {}
	if (sd == null)
	throw new UnsupportedEncodingException(csn);
	set(decoder, sd);
	}
	return sd.decode(ba, off, len);
	}

	static Result decode(Charset cs, byte[] ba, int off, int len) {
	if (cs == UTF_8) {
	return decodeUTF8(ba, off, len, true);
	}
	if (cs == ISO_8859_1) {
	return decodeLatin1(ba, off, len);
	}
	if (cs == US_ASCII) {
	return decodeASCII(ba, off, len);
	}

	// (1)We never cache the "external" cs, the only benefit of creating
	// an additional StringDe/Encoder object to wrap it is to share the
	// de/encode() method. These SD/E objects are short-lived, the young-gen
	// gc should be able to take care of them well. But the best approach
	// is still not to generate them if not really necessary.
	// (2)The defensive copy of the input byte/char[] has a big performance
	// impact, as well as the outgoing result byte/char[]. Need to do the
	// optimization check of (sm==null && classLoader0==null) for both.
	// (3)There might be a timing gap in isTrusted setting. getClassLoader0()
	// is only checked (and then isTrusted gets set) when (SM==null). It is
	// possible that the SM==null for now but then SM is NOT null later
	// when safeTrim() is invoked...the "safe" way to do is to redundant
	// check (... && (isTrusted \|\| SM == null \|\| getClassLoader0())) in trim
	// but it then can be argued that the SM is null when the operation
	// is started...
	CharsetDecoder cd = cs.newDecoder();
	// ascii fastpath
	if ((cd instanceof ArrayDecoder) &&
	((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
	return decodeLatin1(ba, off, len);
	}
	int en = scale(len, cd.maxCharsPerByte());
	if (len == 0) {
	return new Result().with();
	}
	cd.onMalformedInput(CodingErrorAction.REPLACE)
	.onUnmappableCharacter(CodingErrorAction.REPLACE)
	.reset();
	char[] ca = new char[en];
	if (cd instanceof ArrayDecoder) {
	int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
	return new Result().with(ca, 0, clen);
	}
	if (cs.getClass().getClassLoader0() != null &&
	System.getSecurityManager() != null) {
	ba = Arrays.copyOfRange(ba, off, off + len);
	off = 0;
	}
	ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
	CharBuffer cb = CharBuffer.wrap(ca);
	try {
	CoderResult cr = cd.decode(bb, cb, true);
	if (!cr.isUnderflow())
	cr.throwException();
	cr = cd.flush(cb);
	if (!cr.isUnderflow())
	cr.throwException();
	} catch (CharacterCodingException x) {
	// Substitution is always enabled,
	// so this shouldn't happen
	throw new Error(x);
	}
	return new Result().with(ca, 0, cb.position());
	}

	static Result decode(byte[] ba, int off, int len) {
	Charset cs = Charset.defaultCharset();
	if (cs == UTF_8) {
	return decodeUTF8(ba, off, len, true);
	}
	if (cs == ISO_8859_1) {
	return decodeLatin1(ba, off, len);
	}
	if (cs == US_ASCII) {
	return decodeASCII(ba, off, len);
	}
	StringDecoder sd = deref(decoder);
	if (sd == null \|\| !cs.name().equals(sd.cs.name())) {
	sd = new StringDecoder(cs, cs.name());
	set(decoder, sd);
	}
	return sd.decode(ba, off, len);
	}

	// -- Encoding --
	private static class StringEncoder {
	private Charset cs;
	private CharsetEncoder ce;
	private final boolean isASCIICompatible;
	private final String requestedCharsetName;
	private final boolean isTrusted;

	private StringEncoder(Charset cs, String rcn) {
	this.requestedCharsetName = rcn;
	this.cs = cs;
	this.ce = cs.newEncoder()
	.onMalformedInput(CodingErrorAction.REPLACE)
	.onUnmappableCharacter(CodingErrorAction.REPLACE);
	this.isTrusted = (cs.getClass().getClassLoader0() == null);
	this.isASCIICompatible = (ce instanceof ArrayEncoder) &&
	((ArrayEncoder)ce).isASCIICompatible();
	}

	String charsetName() {
	if (cs instanceof HistoricallyNamedCharset)
	return ((HistoricallyNamedCharset)cs).historicalName();
	return cs.name();
	}

	final String requestedCharsetName() {
	return requestedCharsetName;
	}

	byte[] encode(byte coder, byte[] val) {
	// fastpath for ascii compatible
	if (coder == LATIN1 && isASCIICompatible &&
	!hasNegatives(val, 0, val.length)) {
	return Arrays.copyOf(val, val.length);
	}
	int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
	int en = scale(len, ce.maxBytesPerChar());
	byte[] ba = new byte[en];
	if (len == 0) {
	return ba;
	}
	if (ce instanceof ArrayEncoder) {
	int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
	: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
	if (blen != -1) {
	return safeTrim(ba, blen, isTrusted);
	}
	}
	char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
	: StringUTF16.toChars(val);
	ce.reset();
	ByteBuffer bb = ByteBuffer.wrap(ba);
	CharBuffer cb = CharBuffer.wrap(ca, 0, len);
	try {
	CoderResult cr = ce.encode(cb, bb, true);
	if (!cr.isUnderflow())
	cr.throwException();
	cr = ce.flush(bb);
	if (!cr.isUnderflow())
	cr.throwException();
	} catch (CharacterCodingException x) {
	// Substitution is always enabled,
	// so this shouldn't happen
	throw new Error(x);
	}
	return safeTrim(ba, bb.position(), isTrusted);
	}
	}

	static byte[] encode(String charsetName, byte coder, byte[] val)
	throws UnsupportedEncodingException
	{
	StringEncoder se = deref(encoder);
	String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
	if ((se == null) \|\| !(csn.equals(se.requestedCharsetName())
	\|\| csn.equals(se.charsetName()))) {
	se = null;
	try {
	Charset cs = lookupCharset(csn);
	if (cs != null) {
	if (cs == UTF_8) {
	return encodeUTF8(coder, val, true);
	}
	if (cs == ISO_8859_1) {
	return encode8859_1(coder, val);
	}
	if (cs == US_ASCII) {
	return encodeASCII(coder, val);
	}
	se = new StringEncoder(cs, csn);
	}
	} catch (IllegalCharsetNameException x) {}
	if (se == null) {
	throw new UnsupportedEncodingException (csn);
	}
	set(encoder, se);
	}
	return se.encode(coder, val);
	}

	static byte[] encode(Charset cs, byte coder, byte[] val) {
	if (cs == UTF_8) {
	return encodeUTF8(coder, val, true);
	}
	if (cs == ISO_8859_1) {
	return encode8859_1(coder, val);
	}
	if (cs == US_ASCII) {
	return encodeASCII(coder, val);
	}
	CharsetEncoder ce = cs.newEncoder();
	// fastpath for ascii compatible
	if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
	((ArrayEncoder)ce).isASCIICompatible() &&
	!hasNegatives(val, 0, val.length)))) {
	return Arrays.copyOf(val, val.length);
	}
	int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
	int en = scale(len, ce.maxBytesPerChar());
	byte[] ba = new byte[en];
	if (len == 0) {
	return ba;
	}
	ce.onMalformedInput(CodingErrorAction.REPLACE)
	.onUnmappableCharacter(CodingErrorAction.REPLACE)
	.reset();
	if (ce instanceof ArrayEncoder) {
	int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
	: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
	if (blen != -1) {
	return safeTrim(ba, blen, true);
	}
	}
	boolean isTrusted = cs.getClass().getClassLoader0() == null \|\|
	System.getSecurityManager() == null;
	char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
	: StringUTF16.toChars(val);
	ByteBuffer bb = ByteBuffer.wrap(ba);
	CharBuffer cb = CharBuffer.wrap(ca, 0, len);
	try {
	CoderResult cr = ce.encode(cb, bb, true);
	if (!cr.isUnderflow())
	cr.throwException();
	cr = ce.flush(bb);
	if (!cr.isUnderflow())
	cr.throwException();
	} catch (CharacterCodingException x) {
	throw new Error(x);
	}
	return safeTrim(ba, bb.position(), isTrusted);
	}

	static byte[] encode(byte coder, byte[] val) {
	Charset cs = Charset.defaultCharset();
	if (cs == UTF_8) {
	return encodeUTF8(coder, val, true);
	}
	if (cs == ISO_8859_1) {
	return encode8859_1(coder, val);
	}
	if (cs == US_ASCII) {
	return encodeASCII(coder, val);
	}
	StringEncoder se = deref(encoder);
	if (se == null \|\| !cs.name().equals(se.cs.name())) {
	se = new StringEncoder(cs, cs.name());
	set(encoder, se);
	}
	return se.encode(coder, val);
	}

	/**
	* Print a message directly to stderr, bypassing all character conversion
	* methods.
	* @param msg message to print
	*/
	private static native void err(String msg);

	/* The cached Result for each thread */
	private static final ThreadLocal<StringCoding.Result>
	resultCached = new ThreadLocal<>() {
	protected StringCoding.Result initialValue() {
	return new StringCoding.Result();
	}};

	////////////////////////// ascii //////////////////////////////

	private static Result decodeASCII(byte[] ba, int off, int len) {
	Result result = resultCached.get();
	if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
	return result.with(Arrays.copyOfRange(ba, off, off + len),
	LATIN1);
	}
	byte[] dst = new byte[len<<1];
	int dp = 0;
	while (dp < len) {
	int b = ba[off++];
	putChar(dst, dp++, (b >= 0) ? (char)b : repl);
	}
	return result.with(dst, UTF16);
	}

	private static byte[] encodeASCII(byte coder, byte[] val) {
	if (coder == LATIN1) {
	byte[] dst = new byte[val.length];
	for (int i = 0; i < val.length; i++) {
	if (val[i] < 0) {
	dst[i] = '?';
	} else {
	dst[i] = val[i];
	}
	}
	return dst;
	}
	int len = val.length >> 1;
	byte[] dst = new byte[len];
	int dp = 0;
	for (int i = 0; i < len; i++) {
	char c = StringUTF16.getChar(val, i);
	if (c < 0x80) {
	dst[dp++] = (byte)c;
	continue;
	}
	if (Character.isHighSurrogate(c) && i + 1 < len &&
	Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
	i++;
	}
	dst[dp++] = '?';
	}
	if (len == dp) {
	return dst;
	}
	return Arrays.copyOf(dst, dp);
	}

	////////////////////////// latin1/8859_1 ///////////////////////////

	private static Result decodeLatin1(byte[] ba, int off, int len) {
	Result result = resultCached.get();
	if (COMPACT_STRINGS) {
	return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
	} else {
	return result.with(StringLatin1.inflate(ba, off, len), UTF16);
	}
	}

	@HotSpotIntrinsicCandidate
	private static int implEncodeISOArray(byte[] sa, int sp,
	byte[] da, int dp, int len) {
	int i = 0;
	for (; i < len; i++) {
	char c = StringUTF16.getChar(sa, sp++);
	if (c > '\u00FF')
	break;
	da[dp++] = (byte)c;
	}
	return i;
	}

	private static byte[] encode8859_1(byte coder, byte[] val) {
	return encode8859_1(coder, val, true);
	}

	private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
	if (coder == LATIN1) {
	return Arrays.copyOf(val, val.length);
	}
	int len = val.length >> 1;
	byte[] dst = new byte[len];
	int dp = 0;
	int sp = 0;
	int sl = len;
	while (sp < sl) {
	int ret = implEncodeISOArray(val, sp, dst, dp, len);
	sp = sp + ret;
	dp = dp + ret;
	if (ret != len) {
	if (!doReplace) {
	throwUnmappable(sp, 1);
	}
	char c = StringUTF16.getChar(val, sp++);
	if (Character.isHighSurrogate(c) && sp < sl &&
	Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
	sp++;
	}
	dst[dp++] = '?';
	len = sl - sp;
	}
	}
	if (dp == dst.length) {
	return dst;
	}
	return Arrays.copyOf(dst, dp);
	}

	//////////////////////////////// utf8 ////////////////////////////////////

	private static boolean isNotContinuation(int b) {
	return (b & 0xc0) != 0x80;
	}

	private static boolean isMalformed3(int b1, int b2, int b3) {
	return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) \|\|
	(b2 & 0xc0) != 0x80 \|\| (b3 & 0xc0) != 0x80;
	}

	private static boolean isMalformed3_2(int b1, int b2) {
	return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) \|\|
	(b2 & 0xc0) != 0x80;
	}

	private static boolean isMalformed4(int b2, int b3, int b4) {
	return (b2 & 0xc0) != 0x80 \|\| (b3 & 0xc0) != 0x80 \|\|
	(b4 & 0xc0) != 0x80;
	}

	private static boolean isMalformed4_2(int b1, int b2) {
	return (b1 == 0xf0 && (b2 < 0x90 \|\| b2 > 0xbf)) \|\|
	(b1 == 0xf4 && (b2 & 0xf0) != 0x80) \|\|
	(b2 & 0xc0) != 0x80;
	}

	private static boolean isMalformed4_3(int b3) {
	return (b3 & 0xc0) != 0x80;
	}

	// for nb == 3/4
	private static int malformedN(byte[] src, int sp, int nb) {
	if (nb == 3) {
	int b1 = src[sp++];
	int b2 = src[sp++]; // no need to lookup b3
	return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) \|\|
	isNotContinuation(b2)) ? 1 : 2;
	} else if (nb == 4) { // we don't care the speed here
	int b1 = src[sp++] & 0xff;
	int b2 = src[sp++] & 0xff;
	if (b1 > 0xf4 \|\|
	(b1 == 0xf0 && (b2 < 0x90 \|\| b2 > 0xbf)) \|\|
	(b1 == 0xf4 && (b2 & 0xf0) != 0x80) \|\|
	isNotContinuation(b2))
	return 1;
	if (isNotContinuation(src[sp++]))
	return 2;
	return 3;
	}
	assert false;
	return -1;
	}

	private static void throwMalformed(int off, int nb) {
	String msg = "malformed input off : " + off + ", length : " + nb;
	throw new IllegalArgumentException(msg, new MalformedInputException(nb));
	}

	private static void throwMalformed(byte[] val) {
	int dp = 0;
	while (dp < val.length && val[dp] >=0) { dp++; }
	throwMalformed(dp, 1);
	}

	private static void throwUnmappable(int off, int nb) {
	String msg = "malformed input off : " + off + ", length : " + nb;
	throw new IllegalArgumentException(msg, new UnmappableCharacterException(nb));
	}

	private static void throwUnmappable(byte[] val) {
	int dp = 0;
	while (dp < val.length && val[dp] >=0) { dp++; }
	throwUnmappable(dp, 1);
	}

	private static char repl = '\ufffd';

	private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
	// ascii-bais, which has a relative impact to the non-ascii-only bytes
	if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
	return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
	LATIN1);
	return decodeUTF8_0(src, sp, len, doReplace);
	}

	private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
	Result ret = resultCached.get();

	int sl = sp + len;
	int dp = 0;
	byte[] dst = new byte[len];

	if (COMPACT_STRINGS) {
	while (sp < sl) {
	int b1 = src[sp];
	if (b1 >= 0) {
	dst[dp++] = (byte)b1;
	sp++;
	continue;
	}
	if ((b1 == (byte)0xc2 \|\| b1 == (byte)0xc3) &&
	sp + 1 < sl) {
	int b2 = src[sp + 1];
	if (!isNotContinuation(b2)) {
	dst[dp++] = (byte)(((b1 << 6) ^ b2)^
	(((byte) 0xC0 << 6) ^
	((byte) 0x80 << 0)));
	sp += 2;
	continue;
	}
	}
	// anything not a latin1, including the repl
	// we have to go with the utf16
	break;
	}
	if (sp == sl) {
	if (dp != dst.length) {
	dst = Arrays.copyOf(dst, dp);
	}
	return ret.with(dst, LATIN1);
	}
	}
	if (dp == 0) {
	dst = new byte[len << 1];
	} else {
	byte[] buf = new byte[len << 1];
	StringLatin1.inflate(dst, 0, buf, 0, dp);
	dst = buf;
	}
	while (sp < sl) {
	int b1 = src[sp++];
	if (b1 >= 0) {
	putChar(dst, dp++, (char) b1);
	} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
	if (sp < sl) {
	int b2 = src[sp++];
	if (isNotContinuation(b2)) {
	if (!doReplace) {
	throwMalformed(sp - 1, 1);
	}
	putChar(dst, dp++, repl);
	sp--;
	} else {
	putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
	(((byte) 0xC0 << 6) ^
	((byte) 0x80 << 0))));
	}
	continue;
	}
	if (!doReplace) {
	throwMalformed(sp, 1); // underflow()
	}
	putChar(dst, dp++, repl);
	break;
	} else if ((b1 >> 4) == -2) {
	if (sp + 1 < sl) {
	int b2 = src[sp++];
	int b3 = src[sp++];
	if (isMalformed3(b1, b2, b3)) {
	if (!doReplace) {
	throwMalformed(sp - 3, 3);
	}
	putChar(dst, dp++, repl);
	sp -= 3;
	sp += malformedN(src, sp, 3);
	} else {
	char c = (char)((b1 << 12) ^
	(b2 << 6) ^
	(b3 ^
	(((byte) 0xE0 << 12) ^
	((byte) 0x80 << 6) ^
	((byte) 0x80 << 0))));
	if (isSurrogate(c)) {
	if (!doReplace) {
	throwMalformed(sp - 3, 3);
	}
	putChar(dst, dp++, repl);
	} else {
	putChar(dst, dp++, c);
	}
	}
	continue;
	}
	if (sp < sl && isMalformed3_2(b1, src[sp])) {
	if (!doReplace) {
	throwMalformed(sp - 1, 2);
	}
	putChar(dst, dp++, repl);
	continue;
	}
	if (!doReplace){
	throwMalformed(sp, 1);
	}
	putChar(dst, dp++, repl);
	break;
	} else if ((b1 >> 3) == -2) {
	if (sp + 2 < sl) {
	int b2 = src[sp++];
	int b3 = src[sp++];
	int b4 = src[sp++];
	int uc = ((b1 << 18) ^
	(b2 << 12) ^
	(b3 << 6) ^
	(b4 ^
	(((byte) 0xF0 << 18) ^
	((byte) 0x80 << 12) ^
	((byte) 0x80 << 6) ^
	((byte) 0x80 << 0))));
	if (isMalformed4(b2, b3, b4) \|\|
	!isSupplementaryCodePoint(uc)) { // shortest form check
	if (!doReplace) {
	throwMalformed(sp - 4, 4);
	}
	putChar(dst, dp++, repl);
	sp -= 4;
	sp += malformedN(src, sp, 4);
	} else {
	putChar(dst, dp++, highSurrogate(uc));
	putChar(dst, dp++, lowSurrogate(uc));
	}
	continue;
	}
	b1 &= 0xff;
	if (b1 > 0xf4 \|\|
	sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
	if (!doReplace) {
	throwMalformed(sp - 1, 1); // or 2
	}
	putChar(dst, dp++, repl);
	continue;
	}
	if (!doReplace) {
	throwMalformed(sp - 1, 1);
	}
	sp++;
	putChar(dst, dp++, repl);
	if (sp < sl && isMalformed4_3(src[sp])) {
	continue;
	}
	break;
	} else {
	if (!doReplace) {
	throwMalformed(sp - 1, 1);
	}
	putChar(dst, dp++, repl);
	}
	}
	if (dp != len) {
	dst = Arrays.copyOf(dst, dp << 1);
	}
	return ret.with(dst, UTF16);
	}

	private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
	if (coder == UTF16)
	return encodeUTF8_UTF16(val, doReplace);

	if (!hasNegatives(val, 0, val.length))
	return Arrays.copyOf(val, val.length);

	int dp = 0;
	byte[] dst = new byte[val.length << 1];
	for (int sp = 0; sp < val.length; sp++) {
	byte c = val[sp];
	if (c < 0) {
	dst[dp++] = (byte)(0xc0 \| ((c & 0xff) >> 6));
	dst[dp++] = (byte)(0x80 \| (c & 0x3f));
	} else {
	dst[dp++] = c;
	}
	}
	if (dp == dst.length)
	return dst;
	return Arrays.copyOf(dst, dp);
	}

	private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
	int dp = 0;
	int sp = 0;
	int sl = val.length >> 1;
	byte[] dst = new byte[sl * 3];
	char c;
	while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
	// ascii fast loop;
	dst[dp++] = (byte)c;
	sp++;
	}
	while (sp < sl) {
	c = StringUTF16.getChar(val, sp++);
	if (c < 0x80) {
	dst[dp++] = (byte)c;
	} else if (c < 0x800) {
	dst[dp++] = (byte)(0xc0 \| (c >> 6));
	dst[dp++] = (byte)(0x80 \| (c & 0x3f));
	} else if (Character.isSurrogate(c)) {
	int uc = -1;
	char c2;
	if (Character.isHighSurrogate(c) && sp < sl &&
	Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
	uc = Character.toCodePoint(c, c2);
	}
	if (uc < 0) {
	if (doReplace) {
	dst[dp++] = '?';
	} else {
	throwUnmappable(sp - 1, 1); // or 2, does not matter here
	}
	} else {
	dst[dp++] = (byte)(0xf0 \| ((uc >> 18)));
	dst[dp++] = (byte)(0x80 \| ((uc >> 12) & 0x3f));
	dst[dp++] = (byte)(0x80 \| ((uc >> 6) & 0x3f));
	dst[dp++] = (byte)(0x80 \| (uc & 0x3f));
	sp++; // 2 chars
	}
	} else {
	// 3 bytes, 16 bits
	dst[dp++] = (byte)(0xe0 \| ((c >> 12)));
	dst[dp++] = (byte)(0x80 \| ((c >> 6) & 0x3f));
	dst[dp++] = (byte)(0x80 \| (c & 0x3f));
	}
	}
	if (dp == dst.length) {
	return dst;
	}
	return Arrays.copyOf(dst, dp);
	}

	////////////////////// for j.u.z.ZipCoder //////////////////////////

	/*
	* Throws iae, instead of replacing, if malformed or unmappable.
	*/
	static String newStringUTF8NoRepl(byte[] src, int off, int len) {
	if (COMPACT_STRINGS && !hasNegatives(src, off, len))
	return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
	Result ret = decodeUTF8_0(src, off, len, false);
	return new String(ret.value, ret.coder);
	}

	/*
	* Throws iae, instead of replacing, if unmappable.
	*/
	static byte[] getBytesUTF8NoRepl(String s) {
	return encodeUTF8(s.coder(), s.value(), false);
	}

	////////////////////// for j.n.f.Files //////////////////////////

	private static boolean isASCII(byte[] src) {
	return !hasNegatives(src, 0, src.length);
	}

	private static String newStringLatin1(byte[] src) {
	if (COMPACT_STRINGS)
	return new String(src, LATIN1);
	return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
	}

	static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
	try {
	return newStringNoRepl1(src, cs);
	} catch (IllegalArgumentException e) {
	//newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
	Throwable cause = e.getCause();
	if (cause instanceof MalformedInputException) {
	throw (MalformedInputException)cause;
	}
	throw (CharacterCodingException)cause;
	}
	}

	static String newStringNoRepl1(byte[] src, Charset cs) {
	if (cs == UTF_8) {
	if (COMPACT_STRINGS && isASCII(src))
	return new String(src, LATIN1);
	Result ret = decodeUTF8_0(src, 0, src.length, false);
	return new String(ret.value, ret.coder);
	}
	if (cs == ISO_8859_1) {
	return newStringLatin1(src);
	}
	if (cs == US_ASCII) {
	if (isASCII(src)) {
	return newStringLatin1(src);
	} else {
	throwMalformed(src);
	}
	}

	CharsetDecoder cd = cs.newDecoder();
	// ascii fastpath
	if ((cd instanceof ArrayDecoder) &&
	((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
	return newStringLatin1(src);
	}
	int len = src.length;
	if (len == 0) {
	return "";
	}
	int en = scale(len, cd.maxCharsPerByte());
	char[] ca = new char[en];
	if (cs.getClass().getClassLoader0() != null &&
	System.getSecurityManager() != null) {
	src = Arrays.copyOf(src, len);
	}
	ByteBuffer bb = ByteBuffer.wrap(src);
	CharBuffer cb = CharBuffer.wrap(ca);
	try {
	CoderResult cr = cd.decode(bb, cb, true);
	if (!cr.isUnderflow())
	cr.throwException();
	cr = cd.flush(cb);
	if (!cr.isUnderflow())
	cr.throwException();
	} catch (CharacterCodingException x) {
	throw new IllegalArgumentException(x); // todo
	}
	Result ret = resultCached.get().with(ca, 0, cb.position());
	return new String(ret.value, ret.coder);
	}

	/*
	* Throws CCE, instead of replacing, if unmappable.
	*/
	static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
	try {
	return getBytesNoRepl1(s, cs);
	} catch (IllegalArgumentException e) {
	//getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
	Throwable cause = e.getCause();
	if (cause instanceof UnmappableCharacterException) {
	throw (UnmappableCharacterException)cause;
	}
	throw (CharacterCodingException)cause;
	}
	}

	static byte[] getBytesNoRepl1(String s, Charset cs) {
	byte[] val = s.value();
	byte coder = s.coder();
	if (cs == UTF_8) {
	if (isASCII(val)) {
	return val;
	}
	return encodeUTF8(coder, val, false);
	}
	if (cs == ISO_8859_1) {
	if (coder == LATIN1) {
	return val;
	}
	return encode8859_1(coder, val, false);
	}
	if (cs == US_ASCII) {
	if (coder == LATIN1) {
	if (isASCII(val)) {
	return val;
	} else {
	throwUnmappable(val);
	}
	}
	}
	CharsetEncoder ce = cs.newEncoder();
	// fastpath for ascii compatible
	if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
	((ArrayEncoder)ce).isASCIICompatible() &&
	isASCII(val)))) {
	return val;
	}
	int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
	int en = scale(len, ce.maxBytesPerChar());
	byte[] ba = new byte[en];
	if (len == 0) {
	return ba;
	}
	if (ce instanceof ArrayEncoder) {
	int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
	: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
	if (blen != -1) {
	return safeTrim(ba, blen, true);
	}
	}
	boolean isTrusted = cs.getClass().getClassLoader0() == null \|\|
	System.getSecurityManager() == null;
	char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
	: StringUTF16.toChars(val);
	ByteBuffer bb = ByteBuffer.wrap(ba);
	CharBuffer cb = CharBuffer.wrap(ca, 0, len);
	try {
	CoderResult cr = ce.encode(cb, bb, true);
	if (!cr.isUnderflow())
	cr.throwException();
	cr = ce.flush(bb);
	if (!cr.isUnderflow())
	cr.throwException();
	} catch (CharacterCodingException x) {
	throw new IllegalArgumentException(x);
	}
	return safeTrim(ba, bb.position(), isTrusted);
	}
	}

Back to index...