/* |
|
* reserved comment block |
|
* DO NOT REMOVE OR ALTER! |
|
*/ |
|
/* |
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
* contributor license agreements. See the NOTICE file distributed with |
|
* this work for additional information regarding copyright ownership. |
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
* (the "License"); you may not use this file except in compliance with |
|
* the License. You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
package com.sun.org.apache.xerces.internal.impl.io; |
|
import java.io.IOException; |
|
import java.io.InputStream; |
|
import java.io.Reader; |
|
import java.util.Locale; |
|
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; |
|
import com.sun.org.apache.xerces.internal.util.MessageFormatter; |
|
/** |
|
* <p> |
|
* A UTF-16 reader. Can also be used for UCS-2 (i.e. ISO-10646-UCS-2).</p> |
|
* |
|
* @xerces.internal |
|
* |
|
* @author Michael Glavassevich, IBM |
|
* |
|
* @version $Id: UTF16Reader.java 718095 2008-11-16 20:00:14Z mrglavas $ |
|
*/ |
|
public final class UTF16Reader |
|
extends Reader { |
|
// |
|
// Constants |
|
// |
|
/** |
|
* Default byte buffer size (4096). |
|
*/ |
|
public static final int DEFAULT_BUFFER_SIZE = 4096; |
|
// |
|
// Data |
|
// |
|
/** |
|
* Input stream. |
|
*/ |
|
protected final InputStream fInputStream; |
|
/** |
|
* Byte buffer. |
|
*/ |
|
protected final byte[] fBuffer; |
|
/** |
|
* Endianness. |
|
*/ |
|
protected final boolean fIsBigEndian; |
|
// message formatter; used to produce localized exception messages |
|
private final MessageFormatter fFormatter; |
|
// Locale to use for messages |
|
private final Locale fLocale; |
|
// |
|
// Constructors |
|
// |
|
/** |
|
* Constructs a UTF-16 reader from the specified input stream using the |
|
* default buffer size. Primarily for testing. |
|
* |
|
* @param inputStream The input stream. |
|
* @param isBigEndian The byte order. |
|
*/ |
|
public UTF16Reader(InputStream inputStream, boolean isBigEndian) { |
|
this(inputStream, DEFAULT_BUFFER_SIZE, isBigEndian, |
|
new XMLMessageFormatter(), Locale.getDefault()); |
|
} // <init>(InputStream, boolean) |
|
/** |
|
* Constructs a UTF-16 reader from the specified input stream using the |
|
* default buffer size and the given MessageFormatter. |
|
* |
|
* @param inputStream The input stream. |
|
* @param isBigEndian The byte order. |
|
*/ |
|
public UTF16Reader(InputStream inputStream, boolean isBigEndian, |
|
MessageFormatter messageFormatter, Locale locale) { |
|
this(inputStream, DEFAULT_BUFFER_SIZE, isBigEndian, messageFormatter, locale); |
|
} // <init>(InputStream, boolean, MessageFormatter, Locale) |
|
/** |
|
* Constructs a UTF-16 reader from the specified input stream and buffer |
|
* size and given MessageFormatter. |
|
* |
|
* @param inputStream The input stream. |
|
* @param size The initial buffer size. |
|
* @param isBigEndian The byte order. |
|
* @param messageFormatter Given MessageFormatter |
|
* @param locale Locale to use for messages |
|
*/ |
|
public UTF16Reader(InputStream inputStream, int size, boolean isBigEndian, |
|
MessageFormatter messageFormatter, Locale locale) { |
|
this(inputStream, new byte[size], isBigEndian, messageFormatter, locale); |
|
} // <init>(InputStream, int, boolean, MessageFormatter, Locale) |
|
/** |
|
* Constructs a UTF-16 reader from the specified input stream, buffer and |
|
* MessageFormatter. |
|
* |
|
* @param inputStream The input stream. |
|
* @param buffer The byte buffer. |
|
* @param isBigEndian The byte order. |
|
* @param messageFormatter Given MessageFormatter |
|
* @param locale Locale to use for messages |
|
*/ |
|
public UTF16Reader(InputStream inputStream, byte[] buffer, boolean isBigEndian, |
|
MessageFormatter messageFormatter, Locale locale) { |
|
fInputStream = inputStream; |
|
fBuffer = buffer; |
|
fIsBigEndian = isBigEndian; |
|
fFormatter = messageFormatter; |
|
fLocale = locale; |
|
} // <init>(InputStream, byte[], boolean, MessageFormatter, Locale) |
|
// |
|
// Reader methods |
|
// |
|
/** |
|
* Read a single character. This method will block until a character is |
|
* available, an I/O error occurs, or the end of the stream is reached. |
|
* |
|
* <p> |
|
* Subclasses that intend to support efficient single-character input should |
|
* override this method. |
|
* |
|
* @return The character read, as an integer in the range 0 to 65535 |
|
* (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has been reached |
|
* |
|
* @exception IOException If an I/O error occurs |
|
*/ |
|
public int read() throws IOException { |
|
final int b0 = fInputStream.read(); |
|
if (b0 == -1) { |
|
return -1; |
|
} |
|
final int b1 = fInputStream.read(); |
|
if (b1 == -1) { |
|
expectedTwoBytes(); |
|
} |
|
// UTF-16BE |
|
if (fIsBigEndian) { |
|
return (b0 << 8) | b1; |
|
} |
|
// UTF-16LE |
|
return (b1 << 8) | b0; |
|
} // read():int |
|
/** |
|
* Read characters into a portion of an array. This method will block until |
|
* some input is available, an I/O error occurs, or the end of the stream is |
|
* reached. |
|
* |
|
* @param ch Destination buffer |
|
* @param offset Offset at which to start storing characters |
|
* @param length Maximum number of characters to read |
|
* |
|
* @return The number of characters read, or -1 if the end of the stream has |
|
* been reached |
|
* |
|
* @exception IOException If an I/O error occurs |
|
*/ |
|
public int read(char ch[], int offset, int length) throws IOException { |
|
int byteLength = length << 1; |
|
if (byteLength > fBuffer.length) { |
|
byteLength = fBuffer.length; |
|
} |
|
int byteCount = fInputStream.read(fBuffer, 0, byteLength); |
|
if (byteCount == -1) { |
|
return -1; |
|
} |
|
// If an odd number of bytes were read, we still need to read one more. |
|
if ((byteCount & 1) != 0) { |
|
int b = fInputStream.read(); |
|
if (b == -1) { |
|
expectedTwoBytes(); |
|
} |
|
fBuffer[byteCount++] = (byte) b; |
|
} |
|
final int charCount = byteCount >> 1; |
|
if (fIsBigEndian) { |
|
processBE(ch, offset, charCount); |
|
} else { |
|
processLE(ch, offset, charCount); |
|
} |
|
return charCount; |
|
} // read(char[],int,int) |
|
/** |
|
* Skip characters. This method will block until some characters are |
|
* available, an I/O error occurs, or the end of the stream is reached. |
|
* |
|
* @param n The number of characters to skip |
|
* |
|
* @return The number of characters actually skipped |
|
* |
|
* @exception IOException If an I/O error occurs |
|
*/ |
|
public long skip(long n) throws IOException { |
|
long bytesSkipped = fInputStream.skip(n << 1); |
|
if ((bytesSkipped & 1) != 0) { |
|
int b = fInputStream.read(); |
|
if (b == -1) { |
|
expectedTwoBytes(); |
|
} |
|
++bytesSkipped; |
|
} |
|
return bytesSkipped >> 1; |
|
} // skip(long):long |
|
/** |
|
* Tell whether this stream is ready to be read. |
|
* |
|
* @return True if the next read() is guaranteed not to block for input, |
|
* false otherwise. Note that returning false does not guarantee that the |
|
* next read will block. |
|
* |
|
* @exception IOException If an I/O error occurs |
|
*/ |
|
public boolean ready() throws IOException { |
|
return false; |
|
} // ready() |
|
/** |
|
* Tell whether this stream supports the mark() operation. |
|
*/ |
|
public boolean markSupported() { |
|
return false; |
|
} // markSupported() |
|
/** |
|
* Mark the present position in the stream. Subsequent calls to reset() will |
|
* attempt to reposition the stream to this point. Not all character-input |
|
* streams support the mark() operation. |
|
* |
|
* @param readAheadLimit Limit on the number of characters that may be read |
|
* while still preserving the mark. After reading this many characters, |
|
* attempting to reset the stream may fail. |
|
* |
|
* @exception IOException If the stream does not support mark(), or if some |
|
* other I/O error occurs |
|
*/ |
|
public void mark(int readAheadLimit) throws IOException { |
|
throw new IOException(fFormatter.formatMessage(fLocale, "OperationNotSupported", new Object[]{"mark()", "UTF-16"})); |
|
} // mark(int) |
|
/** |
|
* Reset the stream. If the stream has been marked, then attempt to |
|
* reposition it at the mark. If the stream has not been marked, then |
|
* attempt to reset it in some way appropriate to the particular stream, for |
|
* example by repositioning it to its starting point. Not all |
|
* character-input streams support the reset() operation, and some support |
|
* reset() without supporting mark(). |
|
* |
|
* @exception IOException If the stream has not been marked, or if the mark |
|
* has been invalidated, or if the stream does not support reset(), or if |
|
* some other I/O error occurs |
|
*/ |
|
public void reset() throws IOException { |
|
} // reset() |
|
/** |
|
* Close the stream. Once a stream has been closed, further read(), ready(), |
|
* mark(), or reset() invocations will throw an IOException. Closing a |
|
* previously-closed stream, however, has no effect. |
|
* |
|
* @exception IOException If an I/O error occurs |
|
*/ |
|
public void close() throws IOException { |
|
fInputStream.close(); |
|
} // close() |
|
// |
|
// Private methods |
|
// |
|
/** |
|
* Decodes UTF-16BE * |
|
*/ |
|
private void processBE(final char ch[], int offset, final int count) { |
|
int curPos = 0; |
|
for (int i = 0; i < count; ++i) { |
|
final int b0 = fBuffer[curPos++] & 0xff; |
|
final int b1 = fBuffer[curPos++] & 0xff; |
|
ch[offset++] = (char) ((b0 << 8) | b1); |
|
} |
|
} // processBE(char[],int,int) |
|
/** |
|
* Decodes UTF-16LE * |
|
*/ |
|
private void processLE(final char ch[], int offset, final int count) { |
|
int curPos = 0; |
|
for (int i = 0; i < count; ++i) { |
|
final int b0 = fBuffer[curPos++] & 0xff; |
|
final int b1 = fBuffer[curPos++] & 0xff; |
|
ch[offset++] = (char) ((b1 << 8) | b0); |
|
} |
|
} // processLE(char[],int,int) |
|
/** |
|
* Throws an exception for expected byte. |
|
*/ |
|
private void expectedTwoBytes() |
|
throws MalformedByteSequenceException { |
|
throw new MalformedByteSequenceException(fFormatter, |
|
fLocale, |
|
XMLMessageFormatter.XML_DOMAIN, |
|
"ExpectedByte", |
|
new Object[]{"2", "2"}); |
|
} // expectedTwoBytes() |
|
} // class UTF16Reader |