Back to index...
/*
 * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 *******************************************************************************
 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
 * Agreement between IBM and Sun. This technology is protected by multiple     *
 * US and International patents. This notice and attribution to IBM may not    *
 * to removed.                                                                 *
 *******************************************************************************
 */
package sun.text.normalizer;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
/**
 * Trie implementation which stores data in char, 16 bits.
 * @author synwee
 * @see com.ibm.icu.impl.Trie
 * @since release 2.1, Jan 01 2002
 */
 // note that i need to handle the block calculations later, since chartrie
 // in icu4c uses the same index array.
public class CharTrie extends Trie
{
    // public constructors ---------------------------------------------
    /**
    * <p>Creates a new Trie with the settings for the trie data.</p>
    * <p>Unserialize the 32-bit-aligned input stream and use the data for the
    * trie.</p>
    * @param inputStream file input stream to a ICU data file, containing
    *                    the trie
    * @param dataManipulate object which provides methods to parse the char
    *                        data
    * @throws IOException thrown when data reading fails
    * @draft 2.1
    */
    public CharTrie(InputStream inputStream,
                    DataManipulate dataManipulate) throws IOException
    {
        super(inputStream, dataManipulate);
        if (!isCharTrie()) {
            throw new IllegalArgumentException(
                               "Data given does not belong to a char trie.");
        }
        m_friendAgent_ = new FriendAgent();
    }
    /**
     * Make a dummy CharTrie.
     * A dummy trie is an empty runtime trie, used when a real data trie cannot
     * be loaded.
     *
     * The trie always returns the initialValue,
     * or the leadUnitValue for lead surrogate code points.
     * The Latin-1 part is always set up to be linear.
     *
     * @param initialValue the initial value that is set for all code points
     * @param leadUnitValue the value for lead surrogate code _units_ that do not
     *                      have associated supplementary data
     * @param dataManipulate object which provides methods to parse the char data
     */
    public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
        super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
        int dataLength, latin1Length, i, limit;
        char block;
        /* calculate the actual size of the dummy trie data */
        /* max(Latin-1, block 0) */
        dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
        if(leadUnitValue!=initialValue) {
            dataLength+=DATA_BLOCK_LENGTH;
        }
        m_data_=new char[dataLength];
        m_dataLength_=dataLength;
        m_initialValue_=(char)initialValue;
        /* fill the index and data arrays */
        /* indexes are preset to 0 (block 0) */
        /* Latin-1 data */
        for(i=0; i<latin1Length; ++i) {
            m_data_[i]=(char)initialValue;
        }
        if(leadUnitValue!=initialValue) {
            /* indexes for lead surrogate code units to the block after Latin-1 */
            block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
            i=0xd800>>INDEX_STAGE_1_SHIFT_;
            limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
            for(; i<limit; ++i) {
                m_index_[i]=block;
            }
            /* data for lead surrogate code units */
            limit=latin1Length+DATA_BLOCK_LENGTH;
            for(i=latin1Length; i<limit; ++i) {
                m_data_[i]=(char)leadUnitValue;
            }
        }
        m_friendAgent_ = new FriendAgent();
    }
    /**
     * Java friend implementation
     */
    public class FriendAgent
    {
        /**
         * Gives out the index array of the trie
         * @return index array of trie
         */
        public char[] getPrivateIndex()
        {
            return m_index_;
        }
        /**
         * Gives out the data array of the trie
         * @return data array of trie
         */
        public char[] getPrivateData()
        {
            return m_data_;
        }
        /**
         * Gives out the data offset in the trie
         * @return data offset in the trie
         */
        public int getPrivateInitialValue()
        {
            return m_initialValue_;
        }
    }
    // public methods --------------------------------------------------
    /**
     * Java friend implementation
     * To store the index and data array into the argument.
     * @param friend java friend UCharacterProperty object to store the array
     */
    public void putIndexData(UCharacterProperty friend)
    {
        friend.setIndexData(m_friendAgent_);
    }
    /**
    * Gets the value associated with the codepoint.
    * If no value is associated with the codepoint, a default value will be
    * returned.
    * @param ch codepoint
    * @return offset to data
    * @draft 2.1
    */
    public final char getCodePointValue(int ch)
    {
        int offset;
        // fastpath for U+0000..U+D7FF
        if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
            // copy of getRawOffset()
            offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
                    + (ch & INDEX_STAGE_3_MASK_);
            return m_data_[offset];
        }
        // handle U+D800..U+10FFFF
        offset = getCodePointOffset(ch);
        // return -1 if there is an error, in this case we return the default
        // value: m_initialValue_
        return (offset >= 0) ? m_data_[offset] : m_initialValue_;
    }
    /**
    * Gets the value to the data which this lead surrogate character points
    * to.
    * Returned data may contain folding offset information for the next
    * trailing surrogate character.
    * This method does not guarantee correct results for trail surrogates.
    * @param ch lead surrogate character
    * @return data value
    * @draft 2.1
    */
    public final char getLeadValue(char ch)
    {
       return m_data_[getLeadOffset(ch)];
    }
    /**
    * Get the value associated with a pair of surrogates.
    * @param lead a lead surrogate
    * @param trail a trail surrogate
    * @draft 2.1
    */
    public final char getSurrogateValue(char lead, char trail)
    {
        int offset = getSurrogateOffset(lead, trail);
        if (offset > 0) {
            return m_data_[offset];
        }
        return m_initialValue_;
    }
    /**
    * <p>Get a value from a folding offset (from the value of a lead surrogate)
    * and a trail surrogate.</p>
    * <p>If the
    * @param leadvalue value associated with the lead surrogate which contains
    *        the folding offset
    * @param trail surrogate
    * @return trie data value associated with the trail character
    * @draft 2.1
    */
    public final char getTrailValue(int leadvalue, char trail)
    {
        if (m_dataManipulate_ == null) {
            throw new NullPointerException(
                             "The field DataManipulate in this Trie is null");
        }
        int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
        if (offset > 0) {
            return m_data_[getRawOffset(offset,
                                        (char)(trail & SURROGATE_MASK_))];
        }
        return m_initialValue_;
    }
    // protected methods -----------------------------------------------
    /**
    * <p>Parses the input stream and stores its trie content into a index and
    * data array</p>
    * @param inputStream data input stream containing trie data
    * @exception IOException thrown when data reading fails
    */
    protected final void unserialize(InputStream inputStream)
                                                throws IOException
    {
        DataInputStream input = new DataInputStream(inputStream);
        int indexDataLength = m_dataOffset_ + m_dataLength_;
        m_index_ = new char[indexDataLength];
        for (int i = 0; i < indexDataLength; i ++) {
            m_index_[i] = input.readChar();
        }
        m_data_           = m_index_;
        m_initialValue_   = m_data_[m_dataOffset_];
    }
    /**
    * Gets the offset to the data which the surrogate pair points to.
    * @param lead lead surrogate
    * @param trail trailing surrogate
    * @return offset to data
    * @draft 2.1
    */
    protected final int getSurrogateOffset(char lead, char trail)
    {
        if (m_dataManipulate_ == null) {
            throw new NullPointerException(
                             "The field DataManipulate in this Trie is null");
        }
        // get fold position for the next trail surrogate
        int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
        // get the real data from the folded lead/trail units
        if (offset > 0) {
            return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
        }
        // return -1 if there is an error, in this case we return the default
        // value: m_initialValue_
        return -1;
    }
    /**
    * Gets the value at the argument index.
    * For use internally in TrieIterator.
    * @param index value at index will be retrieved
    * @return 32 bit value
    * @see com.ibm.icu.impl.TrieIterator
    * @draft 2.1
    */
    protected final int getValue(int index)
    {
        return m_data_[index];
    }
    /**
    * Gets the default initial value
    * @return 32 bit value
    * @draft 2.1
    */
    protected final int getInitialValue()
    {
        return m_initialValue_;
    }
    // private data members --------------------------------------------
    /**
    * Default value
    */
    private char m_initialValue_;
    /**
    * Array of char data
    */
    private char m_data_[];
    /**
     * Agent for friends
     */
    private FriendAgent m_friendAgent_;
}
Back to index...