|
|
|
|
|
|
|
*/ |
|
/* |
|
* Licensed to the Apache Software Foundation (ASF) under one |
|
* or more contributor license agreements. See the NOTICE file |
|
* distributed with this work for additional information |
|
* regarding copyright ownership. The ASF licenses this file |
|
* to you under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
|
|
*/ |
|
package com.sun.org.apache.xml.internal.serializer; |
|
|
|
import java.io.IOException; |
|
import java.io.OutputStream; |
|
import java.io.UnsupportedEncodingException; |
|
import java.io.Writer; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
final class WriterToUTF8Buffered extends Writer implements WriterChain |
|
{ |
|
|
|
|
|
|
|
*/ |
|
private static final int BYTES_MAX=16*1024; |
|
|
|
|
|
|
|
*/ |
|
private static final int CHARS_MAX=(BYTES_MAX/3); |
|
|
|
// private static final int |
|
|
|
|
|
private final OutputStream m_os; |
|
|
|
|
|
|
|
|
|
*/ |
|
private final byte m_outputBytes[]; |
|
|
|
private final char m_inputChars[]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private int count; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public WriterToUTF8Buffered(OutputStream out) |
|
throws UnsupportedEncodingException |
|
{ |
|
m_os = out; |
|
// get 3 extra bytes to make buffer overflow checking simpler and faster |
|
|
|
m_outputBytes = new byte[BYTES_MAX + 3]; |
|
|
|
// Big enough to hold the input chars that will be transformed |
|
|
|
m_inputChars = new char[CHARS_MAX + 2]; |
|
count = 0; |
|
|
|
// the old body of this constructor, before the buffersize was changed to a constant |
|
// this(out, 8*1024); |
|
} |
|
|
|
/** |
|
* Create an buffered UTF-8 writer to write data to the |
|
* specified underlying output stream with the specified buffer |
|
* size. |
|
* |
|
* @param out the underlying output stream. |
|
* @param size the buffer size. |
|
* @exception IllegalArgumentException if size <= 0. |
|
*/ |
|
// public WriterToUTF8Buffered(final OutputStream out, final int size) |
|
// { |
|
// |
|
// m_os = out; |
|
// |
|
// if (size <= 0) |
|
// { |
|
// throw new IllegalArgumentException( |
|
// SerializerMessages.createMessage(SerializerErrorResources.ER_BUFFER_SIZE_LESSTHAN_ZERO, null)); //"Buffer size <= 0"); |
|
// } |
|
// |
|
// m_outputBytes = new byte[size]; |
|
// count = 0; |
|
// } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void write(final int c) throws IOException |
|
{ |
|
|
|
|
|
|
|
*/ |
|
if (count >= BYTES_MAX) |
|
flushBuffer(); |
|
|
|
if (c < 0x80) |
|
{ |
|
m_outputBytes[count++] = (byte) (c); |
|
} |
|
else if (c < 0x800) |
|
{ |
|
m_outputBytes[count++] = (byte) (0xc0 + (c >> 6)); |
|
m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
else if (c < 0x10000) |
|
{ |
|
m_outputBytes[count++] = (byte) (0xe0 + (c >> 12)); |
|
m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f)); |
|
m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
else |
|
{ |
|
m_outputBytes[count++] = (byte) (0xf0 + (c >> 18)); |
|
m_outputBytes[count++] = (byte) (0x80 + ((c >> 12) & 0x3f)); |
|
m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f)); |
|
m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void write(final char chars[], final int start, final int length) |
|
throws java.io.IOException |
|
{ |
|
|
|
// We multiply the length by three since this is the maximum length |
|
// of the characters that we can put into the buffer. It is possible |
|
// for each Unicode character to expand to three bytes. |
|
|
|
int lengthx3 = 3*length; |
|
|
|
if (lengthx3 >= BYTES_MAX - count) |
|
{ |
|
|
|
flushBuffer(); |
|
|
|
if (lengthx3 > BYTES_MAX) |
|
{ |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
int split = length/CHARS_MAX; |
|
final int chunks; |
|
if (length % CHARS_MAX > 0) |
|
chunks = split + 1; |
|
else |
|
chunks = split; |
|
int end_chunk = start; |
|
for (int chunk = 1; chunk <= chunks; chunk++) |
|
{ |
|
int start_chunk = end_chunk; |
|
end_chunk = start + (int) ((((long) length) * chunk) / chunks); |
|
|
|
// Adjust the end of the chunk if it ends on a high char |
|
// of a Unicode surrogate pair and low char of the pair |
|
|
|
final char c = chars[end_chunk - 1]; |
|
int ic = chars[end_chunk - 1]; |
|
if (c >= 0xD800 && c <= 0xDBFF) { |
|
// The last Java char that we were going |
|
// to process is the first of a |
|
// Java surrogate char pair that |
|
// represent a Unicode character. |
|
|
|
if (end_chunk < start + length) { |
|
// Avoid spanning by including the low |
|
|
|
end_chunk++; |
|
} else { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
end_chunk--; |
|
} |
|
} |
|
|
|
|
|
int len_chunk = (end_chunk - start_chunk); |
|
this.write(chars,start_chunk, len_chunk); |
|
} |
|
return; |
|
} |
|
} |
|
|
|
|
|
|
|
final int n = length+start; |
|
final byte[] buf_loc = m_outputBytes; |
|
int count_loc = count; |
|
int i = start; |
|
{ |
|
|
|
|
|
|
|
|
|
*/ |
|
char c; |
|
for(; i < n && (c = chars[i])< 0x80 ; i++ ) |
|
buf_loc[count_loc++] = (byte)c; |
|
} |
|
for (; i < n; i++) |
|
{ |
|
|
|
final char c = chars[i]; |
|
|
|
if (c < 0x80) |
|
buf_loc[count_loc++] = (byte) (c); |
|
else if (c < 0x800) |
|
{ |
|
buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6)); |
|
buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
else if (c >= 0xD800 && c <= 0xDBFF) |
|
{ |
|
char high, low; |
|
high = c; |
|
i++; |
|
low = chars[i]; |
|
|
|
buf_loc[count_loc++] = (byte) (0xF0 | (((high + 0x40) >> 8) & 0xf0)); |
|
buf_loc[count_loc++] = (byte) (0x80 | (((high + 0x40) >> 2) & 0x3f)); |
|
buf_loc[count_loc++] = (byte) (0x80 | ((low >> 6) & 0x0f) + ((high << 4) & 0x30)); |
|
buf_loc[count_loc++] = (byte) (0x80 | (low & 0x3f)); |
|
} |
|
else |
|
{ |
|
buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12)); |
|
buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f)); |
|
buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
} |
|
|
|
count = count_loc; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void write(final String s) throws IOException |
|
{ |
|
|
|
// We multiply the length by three since this is the maximum length |
|
// of the characters that we can put into the buffer. It is possible |
|
|
|
final int length = s.length(); |
|
int lengthx3 = 3*length; |
|
|
|
if (lengthx3 >= BYTES_MAX - count) |
|
{ |
|
|
|
flushBuffer(); |
|
|
|
if (lengthx3 > BYTES_MAX) |
|
{ |
|
|
|
|
|
|
|
*/ |
|
final int start = 0; |
|
int split = length/CHARS_MAX; |
|
final int chunks; |
|
if (length % CHARS_MAX > 0) |
|
chunks = split + 1; |
|
else |
|
chunks = split; |
|
int end_chunk = 0; |
|
for (int chunk = 1; chunk <= chunks; chunk++) |
|
{ |
|
int start_chunk = end_chunk; |
|
end_chunk = start + (int) ((((long) length) * chunk) / chunks); |
|
s.getChars(start_chunk,end_chunk, m_inputChars,0); |
|
int len_chunk = (end_chunk - start_chunk); |
|
|
|
// Adjust the end of the chunk if it ends on a high char |
|
// of a Unicode surrogate pair and low char of the pair |
|
|
|
final char c = m_inputChars[len_chunk - 1]; |
|
if (c >= 0xD800 && c <= 0xDBFF) { |
|
// Exclude char in this chunk, |
|
// to avoid spanning a Unicode character |
|
|
|
end_chunk--; |
|
len_chunk--; |
|
if (chunk == chunks) { |
|
/* TODO: error message needed. |
|
* The String incorrectly ends in a high char |
|
* of a high/low surrogate pair, but there is |
|
* no corresponding low as the high is the last char |
|
* Recover by ignoring this last char. |
|
*/ |
|
} |
|
} |
|
|
|
this.write(m_inputChars,0, len_chunk); |
|
} |
|
return; |
|
} |
|
} |
|
|
|
|
|
s.getChars(0, length , m_inputChars, 0); |
|
final char[] chars = m_inputChars; |
|
final int n = length; |
|
final byte[] buf_loc = m_outputBytes; |
|
int count_loc = count; |
|
int i = 0; |
|
{ |
|
|
|
|
|
|
|
|
|
*/ |
|
char c; |
|
for(; i < n && (c = chars[i])< 0x80 ; i++ ) |
|
buf_loc[count_loc++] = (byte)c; |
|
} |
|
for (; i < n; i++) |
|
{ |
|
|
|
final char c = chars[i]; |
|
|
|
if (c < 0x80) |
|
buf_loc[count_loc++] = (byte) (c); |
|
else if (c < 0x800) |
|
{ |
|
buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6)); |
|
buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
else if (c >= 0xD800 && c <= 0xDBFF) |
|
{ |
|
char high, low; |
|
high = c; |
|
i++; |
|
low = chars[i]; |
|
|
|
buf_loc[count_loc++] = (byte) (0xF0 | (((high + 0x40) >> 8) & 0xf0)); |
|
buf_loc[count_loc++] = (byte) (0x80 | (((high + 0x40) >> 2) & 0x3f)); |
|
buf_loc[count_loc++] = (byte) (0x80 | ((low >> 6) & 0x0f) + ((high << 4) & 0x30)); |
|
buf_loc[count_loc++] = (byte) (0x80 | (low & 0x3f)); |
|
} |
|
else |
|
{ |
|
buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12)); |
|
buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f)); |
|
buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f)); |
|
} |
|
} |
|
|
|
count = count_loc; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void flushBuffer() throws IOException |
|
{ |
|
|
|
if (count > 0) |
|
{ |
|
m_os.write(m_outputBytes, 0, count); |
|
|
|
count = 0; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void flush() throws java.io.IOException |
|
{ |
|
flushBuffer(); |
|
m_os.flush(); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void close() throws java.io.IOException |
|
{ |
|
flushBuffer(); |
|
m_os.close(); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public OutputStream getOutputStream() |
|
{ |
|
return m_os; |
|
} |
|
|
|
public Writer getWriter() |
|
{ |
|
// Only one of getWriter() or getOutputStream() can return null |
|
|
|
return null; |
|
} |
|
} |