Back to index...

	/*
	* reserved comment block
	* DO NOT REMOVE OR ALTER!
	*/
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	/*
	* $Id: WriterToUTF8Buffered.java,v 1.2.4.1 2005/09/15 08:15:31 suresh_emailid Exp $
	*/
	package com.sun.org.apache.xml.internal.serializer;

	import java.io.IOException;
	import java.io.OutputStream;
	import java.io.UnsupportedEncodingException;
	import java.io.Writer;


	/**
	* This class writes unicode characters to a byte stream (java.io.OutputStream)
	* as quickly as possible. It buffers the output in an internal
	* buffer which must be flushed to the OutputStream when done. This flushing
	* is done via the close() flush() or flushBuffer() method.
	*
	* This class is only used internally within Xalan.
	*
	* @xsl.usage internal
	*/
	final class WriterToUTF8Buffered extends Writer implements WriterChain
	{

	/** number of bytes that the byte buffer can hold.
	* This is a fixed constant is used rather than m_outputBytes.lenght for performance.
	*/
	private static final int BYTES_MAX=16*1024;
	/** number of characters that the character buffer can hold.
	* This is 1/3 of the number of bytes because UTF-8 encoding
	* can expand one unicode character by up to 3 bytes.
	*/
	private static final int CHARS_MAX=(BYTES_MAX/3);

	// private static final int

	/** The byte stream to write to. (sc & sb remove final to compile in JDK 1.1.8) */
	private final OutputStream m_os;

	/**
	* The internal buffer where data is stored.
	* (sc & sb remove final to compile in JDK 1.1.8)
	*/
	private final byte m_outputBytes[];

	private final char m_inputChars[];

	/**
	* The number of valid bytes in the buffer. This value is always
	* in the range <tt>0</tt> through <tt>m_outputBytes.length</tt>; elements
	* <tt>m_outputBytes[0]</tt> through <tt>m_outputBytes[count-1]</tt> contain valid
	* byte data.
	*/
	private int count;

	/**
	* Create an buffered UTF-8 writer.
	*
	*
	* @param out the underlying output stream.
	*
	* @throws UnsupportedEncodingException
	*/
	public WriterToUTF8Buffered(OutputStream out)
	throws UnsupportedEncodingException
	{
	m_os = out;
	// get 3 extra bytes to make buffer overflow checking simpler and faster
	// we won't have to keep checking for a few extra characters
	m_outputBytes = new byte[BYTES_MAX + 3];

	// Big enough to hold the input chars that will be transformed
	// into output bytes in m_ouputBytes.
	m_inputChars = new char[CHARS_MAX + 2];
	count = 0;

	// the old body of this constructor, before the buffersize was changed to a constant
	// this(out, 8*1024);
	}

	/**
	* Create an buffered UTF-8 writer to write data to the
	* specified underlying output stream with the specified buffer
	* size.
	*
	* @param out the underlying output stream.
	* @param size the buffer size.
	* @exception IllegalArgumentException if size <= 0.
	*/
	// public WriterToUTF8Buffered(final OutputStream out, final int size)
	// {
	//
	// m_os = out;
	//
	// if (size <= 0)
	// {
	// throw new IllegalArgumentException(
	// SerializerMessages.createMessage(SerializerErrorResources.ER_BUFFER_SIZE_LESSTHAN_ZERO, null)); //"Buffer size <= 0");
	// }
	//
	// m_outputBytes = new byte[size];
	// count = 0;
	// }

	/**
	* Write a single character. The character to be written is contained in
	* the 16 low-order bits of the given integer value; the 16 high-order bits
	* are ignored.
	*
	* <p> Subclasses that intend to support efficient single-character output
	* should override this method.
	*
	* @param c int specifying a character to be written.
	* @exception IOException If an I/O error occurs
	*/
	public void write(final int c) throws IOException
	{

	/* If we are close to the end of the buffer then flush it.
	* Remember the buffer can hold a few more bytes than BYTES_MAX
	*/
	if (count >= BYTES_MAX)
	flushBuffer();

	if (c < 0x80)
	{
	m_outputBytes[count++] = (byte) (c);
	}
	else if (c < 0x800)
	{
	m_outputBytes[count++] = (byte) (0xc0 + (c >> 6));
	m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
	}
	else if (c < 0x10000)
	{
	m_outputBytes[count++] = (byte) (0xe0 + (c >> 12));
	m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
	m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
	}
	else
	{
	m_outputBytes[count++] = (byte) (0xf0 + (c >> 18));
	m_outputBytes[count++] = (byte) (0x80 + ((c >> 12) & 0x3f));
	m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
	m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
	}

	}


	/**
	* Write a portion of an array of characters.
	*
	* @param chars Array of characters
	* @param start Offset from which to start writing characters
	* @param length Number of characters to write
	*
	* @exception IOException If an I/O error occurs
	*
	* @throws java.io.IOException
	*/
	public void write(final char chars[], final int start, final int length)
	throws java.io.IOException
	{

	// We multiply the length by three since this is the maximum length
	// of the characters that we can put into the buffer. It is possible
	// for each Unicode character to expand to three bytes.

	int lengthx3 = 3*length;

	if (lengthx3 >= BYTES_MAX - count)
	{
	// The requested length is greater than the unused part of the buffer
	flushBuffer();

	if (lengthx3 > BYTES_MAX)
	{
	/*
	* The requested length exceeds the size of the buffer.
	* Cut the buffer up into chunks, each of which will
	* not cause an overflow to the output buffer m_outputBytes,
	* and make multiple recursive calls.
	* Be careful about integer overflows in multiplication.
	*/
	int split = length/CHARS_MAX;
	final int chunks;
	if (length % CHARS_MAX > 0)
	chunks = split + 1;
	else
	chunks = split;
	int end_chunk = start;
	for (int chunk = 1; chunk <= chunks; chunk++)
	{
	int start_chunk = end_chunk;
	end_chunk = start + (int) ((((long) length) * chunk) / chunks);

	// Adjust the end of the chunk if it ends on a high char
	// of a Unicode surrogate pair and low char of the pair
	// is not going to be in the same chunk
	final char c = chars[end_chunk - 1];
	int ic = chars[end_chunk - 1];
	if (c >= 0xD800 && c <= 0xDBFF) {
	// The last Java char that we were going
	// to process is the first of a
	// Java surrogate char pair that
	// represent a Unicode character.

	if (end_chunk < start + length) {
	// Avoid spanning by including the low
	// char in the current chunk of chars.
	end_chunk++;
	} else {
	/* This is the last char of the last chunk,
	* and it is the high char of a high/low pair with
	* no low char provided.
	* TODO: error message needed.
	* The char array incorrectly ends in a high char
	* of a high/low surrogate pair, but there is
	* no corresponding low as the high is the last char
	*/
	end_chunk--;
	}
	}


	int len_chunk = (end_chunk - start_chunk);
	this.write(chars,start_chunk, len_chunk);
	}
	return;
	}
	}



	final int n = length+start;
	final byte[] buf_loc = m_outputBytes; // local reference for faster access
	int count_loc = count; // local integer for faster access
	int i = start;
	{
	/* This block could be omitted and the code would produce
	* the same result. But this block exists to give the JIT
	* a better chance of optimizing a tight and common loop which
	* occurs when writing out ASCII characters.
	*/
	char c;
	for(; i < n && (c = chars[i])< 0x80 ; i++ )
	buf_loc[count_loc++] = (byte)c;
	}
	for (; i < n; i++)
	{

	final char c = chars[i];

	if (c < 0x80)
	buf_loc[count_loc++] = (byte) (c);
	else if (c < 0x800)
	{
	buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
	buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
	}
	/**
	* The following else if condition is added to support XML 1.1 Characters for
	* UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
	* Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
	* [1101 11yy] [yyxx xxxx] (low surrogate)
	* * uuuuu = wwww + 1
	*/
	else if (c >= 0xD800 && c <= 0xDBFF)
	{
	char high, low;
	high = c;
	i++;
	low = chars[i];

	buf_loc[count_loc++] = (byte) (0xF0 \| (((high + 0x40) >> 8) & 0xf0));
	buf_loc[count_loc++] = (byte) (0x80 \| (((high + 0x40) >> 2) & 0x3f));
	buf_loc[count_loc++] = (byte) (0x80 \| ((low >> 6) & 0x0f) + ((high << 4) & 0x30));
	buf_loc[count_loc++] = (byte) (0x80 \| (low & 0x3f));
	}
	else
	{
	buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
	buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
	buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
	}
	}
	// Store the local integer back into the instance variable
	count = count_loc;

	}

	/**
	* Write a string.
	*
	* @param s String to be written
	*
	* @exception IOException If an I/O error occurs
	*/
	public void write(final String s) throws IOException
	{

	// We multiply the length by three since this is the maximum length
	// of the characters that we can put into the buffer. It is possible
	// for each Unicode character to expand to three bytes.
	final int length = s.length();
	int lengthx3 = 3*length;

	if (lengthx3 >= BYTES_MAX - count)
	{
	// The requested length is greater than the unused part of the buffer
	flushBuffer();

	if (lengthx3 > BYTES_MAX)
	{
	/*
	* The requested length exceeds the size of the buffer,
	* so break it up in chunks that don't exceed the buffer size.
	*/
	final int start = 0;
	int split = length/CHARS_MAX;
	final int chunks;
	if (length % CHARS_MAX > 0)
	chunks = split + 1;
	else
	chunks = split;
	int end_chunk = 0;
	for (int chunk = 1; chunk <= chunks; chunk++)
	{
	int start_chunk = end_chunk;
	end_chunk = start + (int) ((((long) length) * chunk) / chunks);
	s.getChars(start_chunk,end_chunk, m_inputChars,0);
	int len_chunk = (end_chunk - start_chunk);

	// Adjust the end of the chunk if it ends on a high char
	// of a Unicode surrogate pair and low char of the pair
	// is not going to be in the same chunk
	final char c = m_inputChars[len_chunk - 1];
	if (c >= 0xD800 && c <= 0xDBFF) {
	// Exclude char in this chunk,
	// to avoid spanning a Unicode character
	// that is in two Java chars as a high/low surrogate
	end_chunk--;
	len_chunk--;
	if (chunk == chunks) {
	/* TODO: error message needed.
	* The String incorrectly ends in a high char
	* of a high/low surrogate pair, but there is
	* no corresponding low as the high is the last char
	* Recover by ignoring this last char.
	*/
	}
	}

	this.write(m_inputChars,0, len_chunk);
	}
	return;
	}
	}


	s.getChars(0, length , m_inputChars, 0);
	final char[] chars = m_inputChars;
	final int n = length;
	final byte[] buf_loc = m_outputBytes; // local reference for faster access
	int count_loc = count; // local integer for faster access
	int i = 0;
	{
	/* This block could be omitted and the code would produce
	* the same result. But this block exists to give the JIT
	* a better chance of optimizing a tight and common loop which
	* occurs when writing out ASCII characters.
	*/
	char c;
	for(; i < n && (c = chars[i])< 0x80 ; i++ )
	buf_loc[count_loc++] = (byte)c;
	}
	for (; i < n; i++)
	{

	final char c = chars[i];

	if (c < 0x80)
	buf_loc[count_loc++] = (byte) (c);
	else if (c < 0x800)
	{
	buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
	buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
	}
	/**
	* The following else if condition is added to support XML 1.1 Characters for
	* UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
	* Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
	* [1101 11yy] [yyxx xxxx] (low surrogate)
	* * uuuuu = wwww + 1
	*/
	else if (c >= 0xD800 && c <= 0xDBFF)
	{
	char high, low;
	high = c;
	i++;
	low = chars[i];

	buf_loc[count_loc++] = (byte) (0xF0 \| (((high + 0x40) >> 8) & 0xf0));
	buf_loc[count_loc++] = (byte) (0x80 \| (((high + 0x40) >> 2) & 0x3f));
	buf_loc[count_loc++] = (byte) (0x80 \| ((low >> 6) & 0x0f) + ((high << 4) & 0x30));
	buf_loc[count_loc++] = (byte) (0x80 \| (low & 0x3f));
	}
	else
	{
	buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
	buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
	buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
	}
	}
	// Store the local integer back into the instance variable
	count = count_loc;

	}

	/**
	* Flush the internal buffer
	*
	* @throws IOException
	*/
	public void flushBuffer() throws IOException
	{

	if (count > 0)
	{
	m_os.write(m_outputBytes, 0, count);

	count = 0;
	}
	}

	/**
	* Flush the stream. If the stream has saved any characters from the
	* various write() methods in a buffer, write them immediately to their
	* intended destination. Then, if that destination is another character or
	* byte stream, flush it. Thus one flush() invocation will flush all the
	* buffers in a chain of Writers and OutputStreams.
	*
	* @exception IOException If an I/O error occurs
	*
	* @throws java.io.IOException
	*/
	public void flush() throws java.io.IOException
	{
	flushBuffer();
	m_os.flush();
	}

	/**
	* Close the stream, flushing it first. Once a stream has been closed,
	* further write() or flush() invocations will cause an IOException to be
	* thrown. Closing a previously-closed stream, however, has no effect.
	*
	* @exception IOException If an I/O error occurs
	*
	* @throws java.io.IOException
	*/
	public void close() throws java.io.IOException
	{
	flushBuffer();
	m_os.close();
	}

	/**
	* Get the output stream where the events will be serialized to.
	*
	* @return reference to the result stream, or null of only a writer was
	* set.
	*/
	public OutputStream getOutputStream()
	{
	return m_os;
	}

	public Writer getWriter()
	{
	// Only one of getWriter() or getOutputStream() can return null
	// This type of writer wraps an OutputStream, not a Writer.
	return null;
	}
	}

Back to index...