/* | 
|
 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. | 
|
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|
 * | 
|
 * This code is free software; you can redistribute it and/or modify it | 
|
 * under the terms of the GNU General Public License version 2 only, as | 
|
 * published by the Free Software Foundation.  Oracle designates this | 
|
 * particular file as subject to the "Classpath" exception as provided | 
|
 * by Oracle in the LICENSE file that accompanied this code. | 
|
 * | 
|
 * This code is distributed in the hope that it will be useful, but WITHOUT | 
|
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|
 * version 2 for more details (a copy is included in the LICENSE file that | 
|
 * accompanied this code). | 
|
 * | 
|
 * You should have received a copy of the GNU General Public License version | 
|
 * 2 along with this work; if not, write to the Free Software Foundation, | 
|
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|
 * | 
|
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|
 * or visit www.oracle.com if you need additional information or have any | 
|
 * questions. | 
|
*/  | 
|
package jdk.internal.util.xml.impl;  | 
|
import java.io.Reader;  | 
|
import java.io.InputStream;  | 
|
import java.io.IOException;  | 
|
import java.io.UnsupportedEncodingException;  | 
|
/** | 
|
 * UTF-8 transformed UCS-2 character stream reader. | 
|
 * | 
|
 * This reader converts UTF-8 transformed UCS-2 characters to Java characters. | 
|
 * The UCS-2 subset of UTF-8 transformation is described in RFC-2279 #2 | 
|
 * "UTF-8 definition": | 
|
 *  0000 0000-0000 007F   0xxxxxxx | 
|
 *  0000 0080-0000 07FF   110xxxxx 10xxxxxx | 
|
 *  0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx | 
|
 * | 
|
 * This reader will return incorrect last character on broken UTF-8 stream. | 
|
*/  | 
|
public class ReaderUTF8 extends Reader {  | 
|
private InputStream is;  | 
|
    /** | 
|
     * Constructor. | 
|
     * | 
|
     * @param is A byte input stream. | 
|
*/  | 
|
public ReaderUTF8(InputStream is) {  | 
|
this.is = is;  | 
|
}  | 
|
    /** | 
|
     * Reads characters into a portion of an array. | 
|
     * | 
|
     * @param cbuf Destination buffer. | 
|
     * @param off Offset at which to start storing characters. | 
|
     * @param len Maximum number of characters to read. | 
|
     * @exception IOException If any IO errors occur. | 
|
     * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. | 
|
*/  | 
|
public int read(char[] cbuf, int off, int len) throws IOException {  | 
|
int num = 0;  | 
|
int val;  | 
|
while (num < len) {  | 
|
if ((val = is.read()) < 0) {  | 
|
return (num != 0) ? num : -1;  | 
|
}  | 
|
switch (val & 0xf0) {  | 
|
case 0xc0:  | 
|
case 0xd0:  | 
|
cbuf[off++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));  | 
|
break;  | 
|
case 0xe0:  | 
|
cbuf[off++] = (char) (((val & 0x0f) << 12)  | 
|
| ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));  | 
|
break;  | 
|
                case 0xf0:      // UCS-4 character | 
|
throw new UnsupportedEncodingException("UTF-32 (or UCS-4) encoding not supported.");  | 
|
default:  | 
|
cbuf[off++] = (char) val;  | 
|
break;  | 
|
}  | 
|
num++;  | 
|
}  | 
|
return num;  | 
|
}  | 
|
    /** | 
|
     * Reads a single character. | 
|
     * | 
|
     * @return The character read, as an integer in the range 0 to 65535 | 
|
     *  (0x00-0xffff), or -1 if the end of the stream has been reached. | 
|
     * @exception IOException If any IO errors occur. | 
|
     * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. | 
|
*/  | 
|
public int read() throws IOException {  | 
|
int val;  | 
|
if ((val = is.read()) < 0) {  | 
|
return -1;  | 
|
}  | 
|
switch (val & 0xf0) {  | 
|
case 0xc0:  | 
|
case 0xd0:  | 
|
val = ((val & 0x1f) << 6) | (is.read() & 0x3f);  | 
|
break;  | 
|
case 0xe0:  | 
|
val = ((val & 0x0f) << 12)  | 
|
| ((is.read() & 0x3f) << 6) | (is.read() & 0x3f);  | 
|
break;  | 
|
            case 0xf0:  // UCS-4 character | 
|
throw new UnsupportedEncodingException();  | 
|
default:  | 
|
break;  | 
|
}  | 
|
return val;  | 
|
}  | 
|
    /** | 
|
     * Closes the stream. | 
|
     * | 
|
     * @exception IOException If any IO errors occur. | 
|
*/  | 
|
public void close() throws IOException {  | 
|
is.close();  | 
|
}  | 
|
}  |