|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
/* |
|
******************************************************************************* |
|
* Copyright (C) 1996-2014, International Business Machines Corporation and |
|
* others. All Rights Reserved. |
|
******************************************************************************* |
|
*/ |
|
|
|
package jdk.internal.icu.impl; |
|
|
|
import java.io.IOException; |
|
import java.nio.ByteBuffer; |
|
import java.util.Iterator; |
|
import java.util.MissingResourceException; |
|
|
|
import jdk.internal.icu.lang.UCharacter.HangulSyllableType; |
|
import jdk.internal.icu.lang.UCharacter.NumericType; |
|
import jdk.internal.icu.text.UTF16; |
|
import jdk.internal.icu.text.UnicodeSet; |
|
import jdk.internal.icu.util.VersionInfo; |
|
|
|
/** |
|
* <p>Internal class used for Unicode character property database.</p> |
|
* <p>This classes store binary data read from uprops.icu. |
|
* It does not have the capability to parse the data into more high-level |
|
* information. It only returns bytes of information when required.</p> |
|
* <p>Due to the form most commonly used for retrieval, array of char is used |
|
* to store the binary data.</p> |
|
* <p>UCharacterPropertyDB also contains information on accessing indexes to |
|
* significant points in the binary data.</p> |
|
* <p>Responsibility for molding the binary data into more meaning form lies on |
|
* <a href=UCharacter.html>UCharacter</a>.</p> |
|
* @author Syn Wee Quek |
|
* @since release 2.1, february 1st 2002 |
|
*/ |
|
|
|
public final class UCharacterProperty |
|
{ |
|
// public data members ----------------------------------------------- |
|
|
|
|
|
|
|
*/ |
|
public static final UCharacterProperty INSTANCE; |
|
|
|
|
|
|
|
*/ |
|
public Trie2_16 m_trie_; |
|
|
|
|
|
|
|
*/ |
|
public VersionInfo m_unicodeVersion_; |
|
|
|
|
|
|
|
*/ |
|
public static final int TYPE_MASK = 0x1F; |
|
|
|
// uprops.h enum UPropertySource --------------------------------------- *** |
|
|
|
|
|
public static final int SRC_CHAR=1; |
|
|
|
public static final int SRC_PROPSVEC=2; |
|
|
|
public static final int SRC_BIDI=5; |
|
|
|
public static final int SRC_NFC=8; |
|
|
|
public static final int SRC_NFKC=9; |
|
|
|
// public methods ---------------------------------------------------- |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public final int getProperty(int ch) |
|
{ |
|
return m_trie_.get(ch); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public int getAdditional(int codepoint, int column) { |
|
assert column >= 0; |
|
if (column >= m_additionalColumnsCount_) { |
|
return 0; |
|
} |
|
return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column]; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public VersionInfo getAge(int codepoint) |
|
{ |
|
int version = getAdditional(codepoint, 0) >> AGE_SHIFT_; |
|
return VersionInfo.getInstance( |
|
(version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_, |
|
version & LAST_NIBBLE_MASK_, 0, 0); |
|
} |
|
|
|
// int-value and enumerated properties --------------------------------- *** |
|
|
|
public int getType(int c) { |
|
return getProperty(c)&TYPE_MASK; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
private static final int gcbToHst[]={ |
|
HangulSyllableType.NOT_APPLICABLE, |
|
HangulSyllableType.NOT_APPLICABLE, |
|
HangulSyllableType.NOT_APPLICABLE, |
|
HangulSyllableType.NOT_APPLICABLE, |
|
HangulSyllableType.LEADING_JAMO, |
|
HangulSyllableType.NOT_APPLICABLE, |
|
HangulSyllableType.LV_SYLLABLE, |
|
HangulSyllableType.LVT_SYLLABLE, |
|
HangulSyllableType.TRAILING_JAMO, |
|
HangulSyllableType.VOWEL_JAMO /* U_GCB_V */ |
|
/* |
|
* Omit GCB values beyond what we need for hst. |
|
* The code below checks for the array length. |
|
*/ |
|
}; |
|
|
|
private class IntProperty { |
|
int column; |
|
int mask; |
|
int shift; |
|
|
|
IntProperty(int column, int mask, int shift) { |
|
this.column=column; |
|
this.mask=mask; |
|
this.shift=shift; |
|
} |
|
|
|
IntProperty(int source) { |
|
this.column=source; |
|
this.mask=0; |
|
} |
|
|
|
int getValue(int c) { |
|
|
|
return (getAdditional(c, column)&mask)>>>shift; |
|
} |
|
} |
|
|
|
private class BiDiIntProperty extends IntProperty { |
|
BiDiIntProperty() { |
|
super(SRC_BIDI); |
|
} |
|
} |
|
|
|
private class CombiningClassIntProperty extends IntProperty { |
|
CombiningClassIntProperty(int source) { |
|
super(source); |
|
} |
|
} |
|
|
|
private class NormQuickCheckIntProperty extends IntProperty { |
|
int which; |
|
int max; |
|
|
|
NormQuickCheckIntProperty(int source, int which, int max) { |
|
super(source); |
|
this.which=which; |
|
this.max=max; |
|
} |
|
} |
|
|
|
private IntProperty intProp = new BiDiIntProperty() { |
|
int getValue(int c) { |
|
return UBiDiProps.INSTANCE.getPairedBracketType(c); |
|
} |
|
}; |
|
|
|
public int getIntPropertyValue(int c, int which) { |
|
if (which == BIDI_PAIRED_BRACKET_TYPE) { |
|
return intProp.getValue(c); |
|
} |
|
return 0; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static int getRawSupplementary(char lead, char trail) |
|
{ |
|
return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static final int getMask(int type) |
|
{ |
|
return 1 << type; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static int getEuropeanDigit(int ch) { |
|
if ((ch > 0x7a && ch < 0xff21) |
|
|| ch < 0x41 || (ch > 0x5a && ch < 0x61) |
|
|| ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) { |
|
return -1; |
|
} |
|
if (ch <= 0x7a) { |
|
|
|
return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); |
|
} |
|
|
|
if (ch <= 0xff3a) { |
|
return ch + 10 - 0xff21; |
|
} |
|
|
|
return ch + 10 - 0xff41; |
|
} |
|
|
|
public int digit(int c) { |
|
int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_; |
|
if(value<=9) { |
|
return value; |
|
} else { |
|
return -1; |
|
} |
|
} |
|
|
|
// protected variables ----------------------------------------------- |
|
|
|
|
|
|
|
*/ |
|
Trie2_16 m_additionalTrie_; |
|
|
|
|
|
|
|
*/ |
|
int m_additionalVectors_[]; |
|
|
|
|
|
*/ |
|
int m_additionalColumnsCount_; |
|
|
|
|
|
|
|
*/ |
|
int m_maxBlockScriptValue_; |
|
|
|
|
|
|
|
*/ |
|
int m_maxJTGValue_; |
|
|
|
|
|
*/ |
|
public char[] m_scriptExtensions_; |
|
|
|
// private variables ------------------------------------------------- |
|
|
|
|
|
|
|
*/ |
|
@SuppressWarnings("deprecation") |
|
private static final String DATA_FILE_NAME_ = |
|
"/jdk/internal/icu/impl/data/icudt" + |
|
VersionInfo.ICU_DATA_VERSION_PATH + |
|
"/uprops.icu"; |
|
|
|
|
|
|
|
*/ |
|
private static final int LEAD_SURROGATE_SHIFT_ = 10; |
|
|
|
|
|
*/ |
|
private static final int SURROGATE_OFFSET_ = |
|
UTF16.SUPPLEMENTARY_MIN_VALUE - |
|
(UTF16.SURROGATE_MIN_VALUE << |
|
LEAD_SURROGATE_SHIFT_) - |
|
UTF16.TRAIL_SURROGATE_MIN_VALUE; |
|
|
|
|
|
// property data constants ------------------------------------------------- |
|
|
|
|
|
|
|
*/ |
|
private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6; |
|
private static final int getNumericTypeValue(int props) { |
|
return props >> NUMERIC_TYPE_VALUE_SHIFT_; |
|
} |
|
|
|
/* constants for the storage form of numeric types and values */ |
|
|
|
private static final int NTV_NONE_ = 0; |
|
|
|
private static final int NTV_DECIMAL_START_ = 1; |
|
|
|
private static final int NTV_DIGIT_START_ = 11; |
|
|
|
private static final int NTV_NUMERIC_START_ = 21; |
|
|
|
private static final int ntvGetType(int ntv) { |
|
return |
|
(ntv==NTV_NONE_) ? NumericType.NONE : |
|
(ntv<NTV_DIGIT_START_) ? NumericType.DECIMAL : |
|
(ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT : |
|
NumericType.NUMERIC; |
|
} |
|
|
|
/* |
|
* Properties in vector word 0 |
|
* Bits |
|
* 31..24 DerivedAge version major/minor one nibble each |
|
* 23..22 3..1: Bits 21..20 & 7..0 = Script_Extensions index |
|
* 3: Script value from Script_Extensions |
|
* 2: Script=Inherited |
|
* 1: Script=Common |
|
* 0: Script=bits 21..20 & 7..0 |
|
* 21..20 Bits 9..8 of the UScriptCode, or index to Script_Extensions |
|
* 19..17 East Asian Width |
|
* 16.. 8 UBlockCode |
|
* 7.. 0 UScriptCode, or index to Script_Extensions |
|
*/ |
|
|
|
|
|
|
|
*/ |
|
public static final int SCRIPT_X_MASK = 0x00f000ff; |
|
//private static final int SCRIPT_X_SHIFT = 22; |
|
|
|
// The UScriptCode or Script_Extensions index is split across two bit fields. |
|
// (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.) |
|
|
|
public static final int SCRIPT_HIGH_MASK = 0x00300000; |
|
public static final int SCRIPT_HIGH_SHIFT = 12; |
|
public static final int MAX_SCRIPT = 0x3ff; |
|
|
|
|
|
|
|
|
|
*/ |
|
private static final int EAST_ASIAN_MASK_ = 0x000e0000; |
|
|
|
|
|
|
|
*/ |
|
private static final int EAST_ASIAN_SHIFT_ = 17; |
|
|
|
|
|
|
|
*/ |
|
private static final int BLOCK_MASK_ = 0x0001ff00; |
|
|
|
|
|
|
|
*/ |
|
private static final int BLOCK_SHIFT_ = 8; |
|
|
|
|
|
|
|
*/ |
|
public static final int SCRIPT_LOW_MASK = 0x000000ff; |
|
|
|
public static final int mergeScriptCodeOrIndex(int scriptX) { |
|
return |
|
((scriptX & SCRIPT_HIGH_MASK) >> SCRIPT_HIGH_SHIFT) | |
|
(scriptX & SCRIPT_LOW_MASK); |
|
} |
|
|
|
/** |
|
* Additional properties used in internal trie data |
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private static final int WHITE_SPACE_PROPERTY_ = 0; |
|
private static final int DASH_PROPERTY_ = 1; |
|
private static final int HYPHEN_PROPERTY_ = 2; |
|
private static final int QUOTATION_MARK_PROPERTY_ = 3; |
|
private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4; |
|
private static final int MATH_PROPERTY_ = 5; |
|
private static final int HEX_DIGIT_PROPERTY_ = 6; |
|
private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7; |
|
private static final int ALPHABETIC_PROPERTY_ = 8; |
|
private static final int IDEOGRAPHIC_PROPERTY_ = 9; |
|
private static final int DIACRITIC_PROPERTY_ = 10; |
|
private static final int EXTENDER_PROPERTY_ = 11; |
|
private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12; |
|
private static final int GRAPHEME_EXTEND_PROPERTY_ = 13; |
|
private static final int GRAPHEME_LINK_PROPERTY_ = 14; |
|
private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15; |
|
private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16; |
|
private static final int RADICAL_PROPERTY_ = 17; |
|
private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18; |
|
private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19; |
|
private static final int DEPRECATED_PROPERTY_ = 20; |
|
private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21; |
|
private static final int XID_START_PROPERTY_ = 22; |
|
private static final int XID_CONTINUE_PROPERTY_ = 23; |
|
private static final int ID_START_PROPERTY_ = 24; |
|
private static final int ID_CONTINUE_PROPERTY_ = 25; |
|
private static final int GRAPHEME_BASE_PROPERTY_ = 26; |
|
private static final int S_TERM_PROPERTY_ = 27; |
|
private static final int VARIATION_SELECTOR_PROPERTY_ = 28; |
|
private static final int PATTERN_SYNTAX = 29; |
|
private static final int PATTERN_WHITE_SPACE = 30; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private static final int LB_MASK = 0x03f00000; |
|
private static final int LB_SHIFT = 20; |
|
|
|
private static final int SB_MASK = 0x000f8000; |
|
private static final int SB_SHIFT = 15; |
|
|
|
private static final int WB_MASK = 0x00007c00; |
|
private static final int WB_SHIFT = 10; |
|
|
|
private static final int GCB_MASK = 0x000003e0; |
|
private static final int GCB_SHIFT = 5; |
|
|
|
|
|
|
|
|
|
*/ |
|
private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f; |
|
|
|
|
|
|
|
*/ |
|
private static final int FIRST_NIBBLE_SHIFT_ = 0x4; |
|
|
|
|
|
*/ |
|
private static final int LAST_NIBBLE_MASK_ = 0xF; |
|
|
|
|
|
*/ |
|
private static final int AGE_SHIFT_ = 24; |
|
|
|
// private constructors -------------------------------------------------- |
|
|
|
|
|
|
|
|
|
*/ |
|
private UCharacterProperty() throws IOException |
|
{ |
|
|
|
ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_); |
|
m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable()); |
|
|
|
int propertyOffset = bytes.getInt(); |
|
bytes.getInt(); |
|
bytes.getInt(); |
|
int additionalOffset = bytes.getInt(); |
|
int additionalVectorsOffset = bytes.getInt(); |
|
m_additionalColumnsCount_ = bytes.getInt(); |
|
int scriptExtensionsOffset = bytes.getInt(); |
|
int reservedOffset7 = bytes.getInt(); |
|
bytes.getInt(); |
|
bytes.getInt(); |
|
m_maxBlockScriptValue_ = bytes.getInt(); |
|
m_maxJTGValue_ = bytes.getInt(); |
|
ICUBinary.skipBytes(bytes, (16 - 12) << 2); |
|
|
|
|
|
m_trie_ = Trie2_16.createFromSerialized(bytes); |
|
int expectedTrieLength = (propertyOffset - 16) * 4; |
|
int trieLength = m_trie_.getSerializedLength(); |
|
if(trieLength > expectedTrieLength) { |
|
throw new IOException("uprops.icu: not enough bytes for main trie"); |
|
} |
|
|
|
ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); |
|
|
|
|
|
ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4); |
|
|
|
if(m_additionalColumnsCount_ > 0) { |
|
|
|
m_additionalTrie_ = Trie2_16.createFromSerialized(bytes); |
|
expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4; |
|
trieLength = m_additionalTrie_.getSerializedLength(); |
|
if(trieLength > expectedTrieLength) { |
|
throw new IOException("uprops.icu: not enough bytes for additional-properties trie"); |
|
} |
|
|
|
ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); |
|
|
|
|
|
int size = scriptExtensionsOffset - additionalVectorsOffset; |
|
m_additionalVectors_ = new int[size]; |
|
for (int i = 0; i < size; i ++) { |
|
m_additionalVectors_[i] = bytes.getInt(); |
|
} |
|
} |
|
|
|
|
|
int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2; |
|
if(numChars > 0) { |
|
m_scriptExtensions_ = new char[numChars]; |
|
for(int i = 0; i < numChars; ++i) { |
|
m_scriptExtensions_[i] = bytes.getChar(); |
|
} |
|
} |
|
} |
|
|
|
private static final class IsAcceptable implements ICUBinary.Authenticate { |
|
|
|
public boolean isDataVersionAcceptable(byte version[]) { |
|
return version[0] == 7; |
|
} |
|
} |
|
|
|
private static final int DATA_FORMAT = 0x5550726F; |
|
|
|
public void upropsvec_addPropertyStarts(UnicodeSet set) { |
|
|
|
if(m_additionalColumnsCount_>0) { |
|
|
|
Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator(); |
|
Trie2.Range range; |
|
while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { |
|
set.add(range.startCodePoint); |
|
} |
|
} |
|
} |
|
|
|
// This static initializer block must be placed after |
|
|
|
static { |
|
try { |
|
INSTANCE = new UCharacterProperty(); |
|
} |
|
catch (IOException e) { |
|
throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,""); |
|
} |
|
} |
|
|
|
|
|
// Moved from UProperty.java |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015; |
|
|
|
} |