/* |
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. |
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
* |
|
* This code is free software; you can redistribute it and/or modify it |
|
* under the terms of the GNU General Public License version 2 only, as |
|
* published by the Free Software Foundation. Oracle designates this |
|
* particular file as subject to the "Classpath" exception as provided |
|
* by Oracle in the LICENSE file that accompanied this code. |
|
* |
|
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
* version 2 for more details (a copy is included in the LICENSE file that |
|
* accompanied this code). |
|
* |
|
* You should have received a copy of the GNU General Public License version |
|
* 2 along with this work; if not, write to the Free Software Foundation, |
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
* |
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
* or visit www.oracle.com if you need additional information or have any |
|
* questions. |
|
*/ |
|
/* |
|
******************************************************************************* |
|
* Copyright (C) 2009-2014, International Business Machines Corporation and |
|
* others. All Rights Reserved. |
|
******************************************************************************* |
|
*/ |
|
package sun.text.normalizer; |
|
import java.io.IOException; |
|
import java.nio.ByteBuffer; |
|
/** |
|
* @author aheninger |
|
* |
|
* A read-only Trie2, holding 16 bit data values. |
|
* |
|
* A Trie2 is a highly optimized data structure for mapping from Unicode |
|
* code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. |
|
* |
|
* See class Trie2 for descriptions of the API for accessing the contents of a trie. |
|
* |
|
* The fundamental data access methods are declared final in this class, with |
|
* the intent that applications might gain a little extra performance, when compared |
|
* with calling the same methods via the abstract UTrie2 base class. |
|
*/ |
|
public final class Trie2_16 extends Trie2 { |
|
/** |
|
* Internal constructor, not for general use. |
|
*/ |
|
Trie2_16() { |
|
} |
|
/** |
|
* Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). |
|
* The serialized format is identical between ICU4C and ICU4J, so this function |
|
* will work with serialized Trie2s from either. |
|
* |
|
* The serialized Trie2 in the bytes may be in either little or big endian byte order. |
|
* This allows using serialized Tries from ICU4C without needing to consider the |
|
* byte order of the system that created them. |
|
* |
|
* @param bytes a byte buffer to the serialized form of a UTrie2. |
|
* @return An unserialized Trie2_16, ready for use. |
|
* @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. |
|
* @throws IOException if a read error occurs in the buffer. |
|
* @throws ClassCastException if the bytes contain a serialized Trie2_32 |
|
*/ |
|
public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { |
|
return (Trie2_16) Trie2.createFromSerialized(bytes); |
|
} |
|
/** |
|
* Get the value for a code point as stored in the Trie2. |
|
* |
|
* @param codePoint the code point |
|
* @return the value |
|
*/ |
|
@Override |
|
public final int get(int codePoint) { |
|
int value; |
|
int ix; |
|
if (codePoint >= 0) { |
|
if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { |
|
// Ordinary BMP code point, excluding leading surrogates. |
|
// BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. |
|
// 16 bit data is stored in the index array itself. |
|
ix = index[codePoint >> UTRIE2_SHIFT_2]; |
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); |
|
value = index[ix]; |
|
return value; |
|
} |
|
if (codePoint <= 0xffff) { |
|
// Lead Surrogate Code Point. A Separate index section is stored for |
|
// lead surrogate code units and code points. |
|
// The main index has the code unit data. |
|
// For this function, we need the code point data. |
|
// Note: this expression could be refactored for slightly improved efficiency, but |
|
// surrogate code points will be so rare in practice that it's not worth it. |
|
ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; |
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); |
|
value = index[ix]; |
|
return value; |
|
} |
|
if (codePoint < highStart) { |
|
// Supplemental code point, use two-level lookup. |
|
ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); |
|
ix = index[ix]; |
|
ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; |
|
ix = index[ix]; |
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); |
|
value = index[ix]; |
|
return value; |
|
} |
|
if (codePoint <= 0x10ffff) { |
|
value = index[highValueIndex]; |
|
return value; |
|
} |
|
} |
|
// Fall through. The code point is outside of the legal range of 0..0x10ffff. |
|
return errorValue; |
|
} |
|
/** |
|
* Get a Trie2 value for a UTF-16 code unit. |
|
* |
|
* This function returns the same value as get() if the input |
|
* character is outside of the lead surrogate range |
|
* |
|
* There are two values stored in a Trie2 for inputs in the lead |
|
* surrogate range. This function returns the alternate value, |
|
* while Trie2.get() returns the main value. |
|
* |
|
* @param codeUnit a 16 bit code unit or lead surrogate value. |
|
* @return the value |
|
*/ |
|
@Override |
|
public int getFromU16SingleLead(char codeUnit) { |
|
int value; |
|
int ix; |
|
// Because the input is a 16 bit char, we can skip the tests for it being in |
|
// the BMP range. It is. |
|
ix = index[codeUnit >> UTRIE2_SHIFT_2]; |
|
ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); |
|
value = index[ix]; |
|
return value; |
|
} |
|
/** |
|
* @return the number of bytes of the serialized trie |
|
*/ |
|
public int getSerializedLength() { |
|
return 16+(header.indexLength+dataLength)*2; |
|
} |
|
} |