|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
package jdk.internal.icu.impl; |
|
|
|
import java.io.IOException; |
|
import java.nio.ByteBuffer; |
|
|
|
import jdk.internal.icu.lang.UCharacter; |
|
import jdk.internal.icu.text.Normalizer2; |
|
import jdk.internal.icu.text.UTF16; |
|
import jdk.internal.icu.util.CodePointTrie; |
|
import jdk.internal.icu.util.VersionInfo; |
|
|
|
|
|
public final class NormalizerImpl { |
|
public static final class Hangul { |
|
|
|
public static final int JAMO_L_BASE=0x1100; |
|
public static final int JAMO_V_BASE=0x1161; |
|
public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */ |
|
|
|
public static final int HANGUL_BASE=0xac00; |
|
public static final int HANGUL_END=0xd7a3; |
|
|
|
public static final int JAMO_L_COUNT=19; |
|
public static final int JAMO_V_COUNT=21; |
|
public static final int JAMO_T_COUNT=28; |
|
|
|
public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT; |
|
public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT; |
|
|
|
public static boolean isHangul(int c) { |
|
return HANGUL_BASE<=c && c<HANGUL_LIMIT; |
|
} |
|
public static boolean isHangulLV(int c) { |
|
c-=HANGUL_BASE; |
|
return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
public static int decompose(int c, Appendable buffer) { |
|
try { |
|
c-=HANGUL_BASE; |
|
int c2=c%JAMO_T_COUNT; |
|
c/=JAMO_T_COUNT; |
|
buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT)); |
|
buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT)); |
|
if(c2==0) { |
|
return 2; |
|
} else { |
|
buffer.append((char)(JAMO_T_BASE+c2)); |
|
return 3; |
|
} |
|
} catch(IOException e) { |
|
throw new InternalError(e); |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static final class ReorderingBuffer implements Appendable { |
|
public ReorderingBuffer(NormalizerImpl ni, Appendable dest, int destCapacity) { |
|
impl=ni; |
|
app=dest; |
|
if (app instanceof StringBuilder) { |
|
appIsStringBuilder=true; |
|
str=(StringBuilder)dest; |
|
|
|
str.ensureCapacity(destCapacity); |
|
reorderStart=0; |
|
if(str.length()==0) { |
|
lastCC=0; |
|
} else { |
|
setIterator(); |
|
lastCC=previousCC(); |
|
|
|
if(lastCC>1) { |
|
while(previousCC()>1) {} |
|
} |
|
reorderStart=codePointLimit; |
|
} |
|
} else { |
|
appIsStringBuilder=false; |
|
str=new StringBuilder(); |
|
reorderStart=0; |
|
lastCC=0; |
|
} |
|
} |
|
|
|
public boolean isEmpty() { return str.length()==0; } |
|
public int length() { return str.length(); } |
|
public int getLastCC() { return lastCC; } |
|
|
|
public StringBuilder getStringBuilder() { return str; } |
|
|
|
public boolean equals(CharSequence s, int start, int limit) { |
|
return UTF16Plus.equal(str, 0, str.length(), s, start, limit); |
|
} |
|
|
|
public void append(int c, int cc) { |
|
if(lastCC<=cc || cc==0) { |
|
str.appendCodePoint(c); |
|
lastCC=cc; |
|
if(cc<=1) { |
|
reorderStart=str.length(); |
|
} |
|
} else { |
|
insert(c, cc); |
|
} |
|
} |
|
public void append(CharSequence s, int start, int limit, boolean isNFD, |
|
int leadCC, int trailCC) { |
|
if(start==limit) { |
|
return; |
|
} |
|
if(lastCC<=leadCC || leadCC==0) { |
|
if(trailCC<=1) { |
|
reorderStart=str.length()+(limit-start); |
|
} else if(leadCC<=1) { |
|
reorderStart=str.length()+1; |
|
} |
|
str.append(s, start, limit); |
|
lastCC=trailCC; |
|
} else { |
|
int c=Character.codePointAt(s, start); |
|
start+=Character.charCount(c); |
|
insert(c, leadCC); |
|
while(start<limit) { |
|
c=Character.codePointAt(s, start); |
|
start+=Character.charCount(c); |
|
if(start<limit) { |
|
if (isNFD) { |
|
leadCC = getCCFromYesOrMaybe(impl.getNorm16(c)); |
|
} else { |
|
leadCC = impl.getCC(impl.getNorm16(c)); |
|
} |
|
} else { |
|
leadCC=trailCC; |
|
} |
|
append(c, leadCC); |
|
} |
|
} |
|
} |
|
// The following append() methods work like C++ appendZeroCC(). |
|
// They assume that the cc or trailCC of their input is 0. |
|
|
|
@Override |
|
public ReorderingBuffer append(char c) { |
|
str.append(c); |
|
lastCC=0; |
|
reorderStart=str.length(); |
|
return this; |
|
} |
|
public void appendZeroCC(int c) { |
|
str.appendCodePoint(c); |
|
lastCC=0; |
|
reorderStart=str.length(); |
|
} |
|
@Override |
|
public ReorderingBuffer append(CharSequence s) { |
|
if(s.length()!=0) { |
|
str.append(s); |
|
lastCC=0; |
|
reorderStart=str.length(); |
|
} |
|
return this; |
|
} |
|
@Override |
|
public ReorderingBuffer append(CharSequence s, int start, int limit) { |
|
if(start!=limit) { |
|
str.append(s, start, limit); |
|
lastCC=0; |
|
reorderStart=str.length(); |
|
} |
|
return this; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void flush() { |
|
if(appIsStringBuilder) { |
|
reorderStart=str.length(); |
|
} else { |
|
try { |
|
app.append(str); |
|
str.setLength(0); |
|
reorderStart=0; |
|
} catch(IOException e) { |
|
throw new InternalError(e); |
|
} |
|
} |
|
lastCC=0; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) { |
|
if(appIsStringBuilder) { |
|
str.append(s, start, limit); |
|
reorderStart=str.length(); |
|
} else { |
|
try { |
|
app.append(str).append(s, start, limit); |
|
str.setLength(0); |
|
reorderStart=0; |
|
} catch(IOException e) { |
|
throw new InternalError(e); |
|
} |
|
} |
|
lastCC=0; |
|
return this; |
|
} |
|
public void remove() { |
|
str.setLength(0); |
|
lastCC=0; |
|
reorderStart=0; |
|
} |
|
public void removeSuffix(int suffixLength) { |
|
int oldLength=str.length(); |
|
str.delete(oldLength-suffixLength, oldLength); |
|
lastCC=0; |
|
reorderStart=str.length(); |
|
} |
|
|
|
// Inserts c somewhere before the last character. |
|
|
|
private void insert(int c, int cc) { |
|
for(setIterator(), skipPrevious(); previousCC()>cc;) {} |
|
|
|
if(c<=0xffff) { |
|
str.insert(codePointLimit, (char)c); |
|
if(cc<=1) { |
|
reorderStart=codePointLimit+1; |
|
} |
|
} else { |
|
str.insert(codePointLimit, Character.toChars(c)); |
|
if(cc<=1) { |
|
reorderStart=codePointLimit+2; |
|
} |
|
} |
|
} |
|
|
|
private final NormalizerImpl impl; |
|
private final Appendable app; |
|
private final StringBuilder str; |
|
private final boolean appIsStringBuilder; |
|
private int reorderStart; |
|
private int lastCC; |
|
|
|
|
|
private void setIterator() { codePointStart=str.length(); } |
|
private void skipPrevious() { |
|
codePointLimit=codePointStart; |
|
codePointStart=str.offsetByCodePoints(codePointStart, -1); |
|
} |
|
private int previousCC() { |
|
codePointLimit=codePointStart; |
|
if(reorderStart>=codePointStart) { |
|
return 0; |
|
} |
|
int c=str.codePointBefore(codePointStart); |
|
codePointStart-=Character.charCount(c); |
|
return impl.getCCFromYesOrMaybeCP(c); |
|
} |
|
private int codePointStart, codePointLimit; |
|
} |
|
|
|
// TODO: Propose as public API on the UTF16 class. |
|
// TODO: Propose widening UTF16 methods that take char to take int. |
|
|
|
public static final class UTF16Plus { |
|
|
|
|
|
|
|
|
|
*/ |
|
public static boolean isLeadSurrogate(int c) { return (c & 0xfffffc00) == 0xd800; } |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static boolean isSurrogateLead(int c) { return (c&0x400)==0; } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public static boolean equal(CharSequence s1, int start1, int limit1, |
|
CharSequence s2, int start2, int limit2) { |
|
if((limit1-start1)!=(limit2-start2)) { |
|
return false; |
|
} |
|
if(s1==s2 && start1==start2) { |
|
return true; |
|
} |
|
while(start1<limit1) { |
|
if(s1.charAt(start1++)!=s2.charAt(start2++)) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
} |
|
|
|
public NormalizerImpl() {} |
|
|
|
private static final class IsAcceptable implements ICUBinary.Authenticate { |
|
public boolean isDataVersionAcceptable(byte version[]) { |
|
return version[0]==4; |
|
} |
|
} |
|
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); |
|
private static final int DATA_FORMAT = 0x4e726d32; |
|
|
|
public NormalizerImpl load(ByteBuffer bytes) { |
|
try { |
|
dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); |
|
int indexesLength=bytes.getInt()/4; |
|
if(indexesLength<=IX_MIN_LCCC_CP) { |
|
throw new InternalError("Normalizer2 data: not enough indexes"); |
|
} |
|
int[] inIndexes=new int[indexesLength]; |
|
inIndexes[0]=indexesLength*4; |
|
for(int i=1; i<indexesLength; ++i) { |
|
inIndexes[i]=bytes.getInt(); |
|
} |
|
|
|
minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; |
|
minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; |
|
minLcccCP=inIndexes[IX_MIN_LCCC_CP]; |
|
|
|
minYesNo=inIndexes[IX_MIN_YES_NO]; |
|
minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; |
|
minNoNo=inIndexes[IX_MIN_NO_NO]; |
|
minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]; |
|
minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]; |
|
minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY]; |
|
limitNoNo=inIndexes[IX_LIMIT_NO_NO]; |
|
minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; |
|
assert((minMaybeYes&7)==0); |
|
centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1; |
|
|
|
|
|
int offset=inIndexes[IX_NORM_TRIE_OFFSET]; |
|
int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; |
|
int triePosition = bytes.position(); |
|
normTrie = CodePointTrie.Fast16.fromBinary(bytes); |
|
int trieLength = bytes.position() - triePosition; |
|
if(trieLength>(nextOffset-offset)) { |
|
throw new InternalError("Normalizer2 data: not enough bytes for normTrie"); |
|
} |
|
ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength); |
|
|
|
|
|
offset=nextOffset; |
|
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; |
|
int numChars=(nextOffset-offset)/2; |
|
if(numChars!=0) { |
|
maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0); |
|
extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT); |
|
} |
|
|
|
|
|
offset=nextOffset; |
|
smallFCD=new byte[0x100]; |
|
bytes.get(smallFCD); |
|
|
|
return this; |
|
} catch(IOException e) { |
|
throw new InternalError(e); |
|
} |
|
} |
|
public NormalizerImpl load(String name) { |
|
return load(ICUBinary.getRequiredData(name)); |
|
} |
|
|
|
// The trie stores values for lead surrogate code *units*. |
|
|
|
public int getNorm16(int c) { |
|
return UTF16Plus.isLeadSurrogate(c) ? INERT : normTrie.get(c); |
|
} |
|
public int getRawNorm16(int c) { return normTrie.get(c); } |
|
public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; } |
|
public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; } |
|
public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; } |
|
|
|
public int getCC(int norm16) { |
|
if(norm16>=MIN_NORMAL_MAYBE_YES) { |
|
return getCCFromNormalYesOrMaybe(norm16); |
|
} |
|
if(norm16<minNoNo || limitNoNo<=norm16) { |
|
return 0; |
|
} |
|
return getCCFromNoNo(norm16); |
|
} |
|
public static int getCCFromNormalYesOrMaybe(int norm16) { |
|
return (norm16 >> OFFSET_SHIFT) & 0xff; |
|
} |
|
public static int getCCFromYesOrMaybe(int norm16) { |
|
return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0; |
|
} |
|
public int getCCFromYesOrMaybeCP(int c) { |
|
if (c < minCompNoMaybeCP) { return 0; } |
|
return getCCFromYesOrMaybe(getNorm16(c)); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public int getFCD16(int c) { |
|
if(c<minDecompNoCP) { |
|
return 0; |
|
} else if(c<=0xffff) { |
|
if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } |
|
} |
|
return getFCD16FromNormData(c); |
|
} |
|
|
|
public boolean singleLeadMightHaveNonZeroFCD16(int lead) { |
|
|
|
byte bits=smallFCD[lead>>8]; |
|
if(bits==0) { return false; } |
|
return ((bits>>((lead>>5)&7))&1)!=0; |
|
} |
|
|
|
|
|
public int getFCD16FromNormData(int c) { |
|
int norm16=getNorm16(c); |
|
if (norm16 >= limitNoNo) { |
|
if(norm16>=MIN_NORMAL_MAYBE_YES) { |
|
|
|
norm16=getCCFromNormalYesOrMaybe(norm16); |
|
return norm16|(norm16<<8); |
|
} else if(norm16>=minMaybeYes) { |
|
return 0; |
|
} else { |
|
int deltaTrailCC = norm16 & DELTA_TCCC_MASK; |
|
if (deltaTrailCC <= DELTA_TCCC_1) { |
|
return deltaTrailCC >> OFFSET_SHIFT; |
|
} |
|
|
|
c=mapAlgorithmic(c, norm16); |
|
norm16=getRawNorm16(c); |
|
} |
|
} |
|
if(norm16<=minYesNo || isHangulLVT(norm16)) { |
|
|
|
return 0; |
|
} |
|
|
|
int mapping=norm16>>OFFSET_SHIFT; |
|
int firstUnit=extraData.charAt(mapping); |
|
int fcd16=firstUnit>>8; |
|
if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { |
|
fcd16|=extraData.charAt(mapping-1)&0xff00; |
|
} |
|
return fcd16; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public String getDecomposition(int c) { |
|
int norm16; |
|
if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) { |
|
|
|
return null; |
|
} |
|
int decomp = -1; |
|
if(isDecompNoAlgorithmic(norm16)) { |
|
|
|
decomp=c=mapAlgorithmic(c, norm16); |
|
|
|
norm16 = getRawNorm16(c); |
|
} |
|
if (norm16 < minYesNo) { |
|
if(decomp<0) { |
|
return null; |
|
} else { |
|
return UTF16.valueOf(decomp); |
|
} |
|
} else if(isHangulLV(norm16) || isHangulLVT(norm16)) { |
|
|
|
StringBuilder buffer=new StringBuilder(); |
|
Hangul.decompose(c, buffer); |
|
return buffer.toString(); |
|
} |
|
|
|
int mapping=norm16>>OFFSET_SHIFT; |
|
int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK; |
|
return extraData.substring(mapping, mapping+length); |
|
} |
|
|
|
|
|
public static final int MIN_YES_YES_WITH_CC=0xfe02; |
|
public static final int JAMO_VT=0xfe00; |
|
public static final int MIN_NORMAL_MAYBE_YES=0xfc00; |
|
public static final int JAMO_L=2; |
|
public static final int INERT=1; |
|
|
|
|
|
public static final int HAS_COMP_BOUNDARY_AFTER=1; |
|
public static final int OFFSET_SHIFT=1; |
|
|
|
// For algorithmic one-way mappings, norm16 bits 2..1 indicate the |
|
|
|
public static final int DELTA_TCCC_0=0; |
|
public static final int DELTA_TCCC_1=2; |
|
public static final int DELTA_TCCC_GT_1=4; |
|
public static final int DELTA_TCCC_MASK=6; |
|
public static final int DELTA_SHIFT=3; |
|
|
|
public static final int MAX_DELTA=0x40; |
|
|
|
|
|
public static final int IX_NORM_TRIE_OFFSET=0; |
|
public static final int IX_EXTRA_DATA_OFFSET=1; |
|
public static final int IX_SMALL_FCD_OFFSET=2; |
|
public static final int IX_RESERVED3_OFFSET=3; |
|
public static final int IX_TOTAL_SIZE=7; |
|
public static final int MIN_CCC_LCCC_CP=0x300; |
|
|
|
public static final int IX_MIN_DECOMP_NO_CP=8; |
|
public static final int IX_MIN_COMP_NO_MAYBE_CP=9; |
|
|
|
// Norm16 value thresholds for quick check combinations and types of extra data. |
|
|
|
|
|
public static final int IX_MIN_YES_NO=10; |
|
|
|
public static final int IX_MIN_NO_NO=11; |
|
public static final int IX_LIMIT_NO_NO=12; |
|
public static final int IX_MIN_MAYBE_YES=13; |
|
|
|
|
|
public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14; |
|
|
|
public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15; |
|
|
|
public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16; |
|
|
|
public static final int IX_MIN_NO_NO_EMPTY=17; |
|
|
|
public static final int IX_MIN_LCCC_CP=18; |
|
public static final int IX_COUNT=20; |
|
|
|
public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80; |
|
public static final int MAPPING_HAS_RAW_MAPPING=0x40; |
|
|
|
public static final int MAPPING_LENGTH_MASK=0x1f; |
|
|
|
public static final int COMP_1_LAST_TUPLE=0x8000; |
|
public static final int COMP_1_TRIPLE=1; |
|
public static final int COMP_1_TRAIL_LIMIT=0x3400; |
|
public static final int COMP_1_TRAIL_MASK=0x7ffe; |
|
public static final int COMP_1_TRAIL_SHIFT=9; |
|
public static final int COMP_2_TRAIL_SHIFT=6; |
|
public static final int COMP_2_TRAIL_MASK=0xffc0; |
|
|
|
// higher-level functionality ------------------------------------------ *** |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public void decompose(CharSequence s, int src, int limit, StringBuilder dest, |
|
int destLengthEstimate) { |
|
if(destLengthEstimate<0) { |
|
destLengthEstimate=limit-src; |
|
} |
|
dest.setLength(0); |
|
ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate); |
|
decompose(s, src, limit, buffer); |
|
} |
|
|
|
// Dual functionality: |
|
// buffer!=NULL: normalize |
|
|
|
public int decompose(CharSequence s, int src, int limit, |
|
ReorderingBuffer buffer) { |
|
int minNoCP=minDecompNoCP; |
|
|
|
int prevSrc; |
|
int c=0; |
|
int norm16=0; |
|
|
|
|
|
int prevBoundary=src; |
|
int prevCC=0; |
|
|
|
for(;;) { |
|
|
|
for(prevSrc=src; src!=limit;) { |
|
if( (c=s.charAt(src))<minNoCP || |
|
isMostDecompYesAndZeroCC(norm16=normTrie.bmpGet(c)) |
|
) { |
|
++src; |
|
} else if(!UTF16Plus.isLeadSurrogate(c)) { |
|
break; |
|
} else { |
|
char c2; |
|
if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) { |
|
c = Character.toCodePoint((char)c, c2); |
|
norm16 = normTrie.suppGet(c); |
|
if (isMostDecompYesAndZeroCC(norm16)) { |
|
src += 2; |
|
} else { |
|
break; |
|
} |
|
} else { |
|
++src; |
|
} |
|
} |
|
} |
|
|
|
if(src!=prevSrc) { |
|
if(buffer!=null) { |
|
buffer.flushAndAppendZeroCC(s, prevSrc, src); |
|
} else { |
|
prevCC=0; |
|
prevBoundary=src; |
|
} |
|
} |
|
if(src==limit) { |
|
break; |
|
} |
|
|
|
|
|
src+=Character.charCount(c); |
|
if(buffer!=null) { |
|
decompose(c, norm16, buffer); |
|
} else { |
|
if(isDecompYes(norm16)) { |
|
int cc=getCCFromYesOrMaybe(norm16); |
|
if(prevCC<=cc || cc==0) { |
|
prevCC=cc; |
|
if(cc<=1) { |
|
prevBoundary=src; |
|
} |
|
continue; |
|
} |
|
} |
|
return prevBoundary; |
|
} |
|
} |
|
return src; |
|
} |
|
public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) { |
|
int limit=s.length(); |
|
if(limit==0) { |
|
return; |
|
} |
|
if(doDecompose) { |
|
decompose(s, 0, limit, buffer); |
|
return; |
|
} |
|
|
|
int c=Character.codePointAt(s, 0); |
|
int src=0; |
|
int firstCC, prevCC, cc; |
|
firstCC=prevCC=cc=getCC(getNorm16(c)); |
|
while(cc!=0) { |
|
prevCC=cc; |
|
src+=Character.charCount(c); |
|
if(src>=limit) { |
|
break; |
|
} |
|
c=Character.codePointAt(s, src); |
|
cc=getCC(getNorm16(c)); |
|
}; |
|
buffer.append(s, 0, src, false, firstCC, prevCC); |
|
buffer.append(s, src, limit); |
|
} |
|
|
|
// Very similar to composeQuickCheck(): Make the same changes in both places if relevant. |
|
// doCompose: normalize |
|
|
|
public boolean compose(CharSequence s, int src, int limit, |
|
boolean onlyContiguous, |
|
boolean doCompose, |
|
ReorderingBuffer buffer) { |
|
int prevBoundary=src; |
|
int minNoMaybeCP=minCompNoMaybeCP; |
|
|
|
for (;;) { |
|
// Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, |
|
|
|
int prevSrc; |
|
int c = 0; |
|
int norm16 = 0; |
|
for (;;) { |
|
if (src == limit) { |
|
if (prevBoundary != limit && doCompose) { |
|
buffer.append(s, prevBoundary, limit); |
|
} |
|
return true; |
|
} |
|
if( (c=s.charAt(src))<minNoMaybeCP || |
|
isCompYesAndZeroCC(norm16=normTrie.bmpGet(c)) |
|
) { |
|
++src; |
|
} else { |
|
prevSrc = src++; |
|
if (!UTF16Plus.isLeadSurrogate(c)) { |
|
break; |
|
} else { |
|
char c2; |
|
if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) { |
|
++src; |
|
c = Character.toCodePoint((char)c, c2); |
|
norm16 = normTrie.suppGet(c); |
|
if (!isCompYesAndZeroCC(norm16)) { |
|
break; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. |
|
// The current character is either a "noNo" (has a mapping) |
|
// or a "maybeYes" (combines backward) |
|
// or a "yesYes" with ccc!=0. |
|
// It is not a Hangul syllable or Jamo L because those have "yes" properties. |
|
|
|
|
|
if (!isMaybeOrNonZeroCC(norm16)) { |
|
if (!doCompose) { |
|
return false; |
|
} |
|
// Fast path for mapping a character that is immediately surrounded by boundaries. |
|
|
|
if (isDecompNoAlgorithmic(norm16)) { |
|
// Maps to a single isCompYesAndZeroCC character |
|
|
|
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || |
|
hasCompBoundaryBefore(s, src, limit)) { |
|
if (prevBoundary != prevSrc) { |
|
buffer.append(s, prevBoundary, prevSrc); |
|
} |
|
buffer.append(mapAlgorithmic(c, norm16), 0); |
|
prevBoundary = src; |
|
continue; |
|
} |
|
} else if (norm16 < minNoNoCompBoundaryBefore) { |
|
|
|
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || |
|
hasCompBoundaryBefore(s, src, limit)) { |
|
if (prevBoundary != prevSrc) { |
|
buffer.append(s, prevBoundary, prevSrc); |
|
} |
|
int mapping = norm16 >> OFFSET_SHIFT; |
|
int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK; |
|
buffer.append(extraData, mapping, mapping + length); |
|
prevBoundary = src; |
|
continue; |
|
} |
|
} else if (norm16 >= minNoNoEmpty) { |
|
// The current character maps to nothing. |
|
// Simply omit it from the output if there is a boundary before _or_ after it. |
|
|
|
if (hasCompBoundaryBefore(s, src, limit) || |
|
hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) { |
|
if (prevBoundary != prevSrc) { |
|
buffer.append(s, prevBoundary, prevSrc); |
|
} |
|
prevBoundary = src; |
|
continue; |
|
} |
|
} |
|
// Other "noNo" type, or need to examine more text around this character: |
|
// Fall through to the slow path. |
|
} else if (isJamoVT(norm16) && prevBoundary != prevSrc) { |
|
char prev=s.charAt(prevSrc-1); |
|
if(c<Hangul.JAMO_T_BASE) { |
|
// The current character is a Jamo Vowel, |
|
|
|
char l = (char)(prev-Hangul.JAMO_L_BASE); |
|
if(l<Hangul.JAMO_L_COUNT) { |
|
if (!doCompose) { |
|
return false; |
|
} |
|
int t; |
|
if (src != limit && |
|
0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) && |
|
t < Hangul.JAMO_T_COUNT) { |
|
|
|
++src; |
|
} else if (hasCompBoundaryBefore(s, src, limit)) { |
|
|
|
t = 0; |
|
} else { |
|
t = -1; |
|
} |
|
if (t >= 0) { |
|
int syllable = Hangul.HANGUL_BASE + |
|
(l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) * |
|
Hangul.JAMO_T_COUNT + t; |
|
--prevSrc; |
|
if (prevBoundary != prevSrc) { |
|
buffer.append(s, prevBoundary, prevSrc); |
|
} |
|
buffer.append((char)syllable); |
|
prevBoundary = src; |
|
continue; |
|
} |
|
// If we see L+V+x where x!=T then we drop to the slow path, |
|
// decompose and recompose. |
|
// This is to deal with NFKC finding normal L and V but a |
|
// compatibility variant of a T. |
|
// We need to either fully compose that combination here |
|
// (which would complicate the code and may not work with strange custom data) |
|
// or use the slow path. |
|
} |
|
} else if (Hangul.isHangulLV(prev)) { |
|
// The current character is a Jamo Trailing consonant, |
|
|
|
if (!doCompose) { |
|
return false; |
|
} |
|
int syllable = prev + c - Hangul.JAMO_T_BASE; |
|
--prevSrc; |
|
if (prevBoundary != prevSrc) { |
|
buffer.append(s, prevBoundary, prevSrc); |
|
} |
|
buffer.append((char)syllable); |
|
prevBoundary = src; |
|
continue; |
|
} |
|
// No matching context, or may need to decompose surrounding text first: |
|
// Fall through to the slow path. |
|
} else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC |
|
// One or more combining marks that do not combine-back: |
|
// Check for canonical order, copy unchanged if ok and |
|
// if followed by a character with a boundary-before. |
|
int cc = getCCFromNormalYesOrMaybe(norm16); |
|
if (onlyContiguous && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) { |
|
|
|
if (!doCompose) { |
|
return false; |
|
} |
|
} else { |
|
// If !onlyContiguous (not FCC), then we ignore the tccc of |
|
|
|
int n16; |
|
for (;;) { |
|
if (src == limit) { |
|
if (doCompose) { |
|
buffer.append(s, prevBoundary, limit); |
|
} |
|
return true; |
|
} |
|
int prevCC = cc; |
|
c = Character.codePointAt(s, src); |
|
n16 = normTrie.get(c); |
|
if (n16 >= MIN_YES_YES_WITH_CC) { |
|
cc = getCCFromNormalYesOrMaybe(n16); |
|
if (prevCC > cc) { |
|
if (!doCompose) { |
|
return false; |
|
} |
|
break; |
|
} |
|
} else { |
|
break; |
|
} |
|
src += Character.charCount(c); |
|
} |
|
// p is after the last in-order combining mark. |
|
|
|
if (norm16HasCompBoundaryBefore(n16)) { |
|
if (isCompYesAndZeroCC(n16)) { |
|
src += Character.charCount(c); |
|
} |
|
continue; |
|
} |
|
// Use the slow path. There is no boundary in [prevSrc, src[. |
|
} |
|
} |
|
|
|
// Slow path: Find the nearest boundaries around the current character, |
|
|
|
if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { |
|
c = Character.codePointBefore(s, prevSrc); |
|
norm16 = normTrie.get(c); |
|
if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { |
|
prevSrc -= Character.charCount(c); |
|
} |
|
} |
|
if (doCompose && prevBoundary != prevSrc) { |
|
buffer.append(s, prevBoundary, prevSrc); |
|
} |
|
int recomposeStartIndex=buffer.length(); |
|
|
|
decomposeShort(s, prevSrc, src, false , onlyContiguous, |
|
buffer); |
|
|
|
src = decomposeShort(s, src, limit, true , onlyContiguous, |
|
buffer); |
|
recompose(buffer, recomposeStartIndex, onlyContiguous); |
|
if(!doCompose) { |
|
if(!buffer.equals(s, prevSrc, src)) { |
|
return false; |
|
} |
|
buffer.remove(); |
|
} |
|
prevBoundary=src; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public int composeQuickCheck(CharSequence s, int src, int limit, |
|
boolean onlyContiguous, boolean doSpan) { |
|
int qcResult=0; |
|
int prevBoundary=src; |
|
int minNoMaybeCP=minCompNoMaybeCP; |
|
|
|
for(;;) { |
|
// Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, |
|
|
|
int prevSrc; |
|
int c = 0; |
|
int norm16 = 0; |
|
for (;;) { |
|
if(src==limit) { |
|
return (src<<1)|qcResult; |
|
} |
|
if( (c=s.charAt(src))<minNoMaybeCP || |
|
isCompYesAndZeroCC(norm16=normTrie.bmpGet(c)) |
|
) { |
|
++src; |
|
} else { |
|
prevSrc = src++; |
|
if (!UTF16Plus.isLeadSurrogate(c)) { |
|
break; |
|
} else { |
|
char c2; |
|
if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) { |
|
++src; |
|
c = Character.toCodePoint((char)c, c2); |
|
norm16 = normTrie.suppGet(c); |
|
if (!isCompYesAndZeroCC(norm16)) { |
|
break; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. |
|
// The current character is either a "noNo" (has a mapping) |
|
// or a "maybeYes" (combines backward) |
|
// or a "yesYes" with ccc!=0. |
|
// It is not a Hangul syllable or Jamo L because those have "yes" properties. |
|
|
|
int prevNorm16 = INERT; |
|
if (prevBoundary != prevSrc) { |
|
prevBoundary = prevSrc; |
|
if (!norm16HasCompBoundaryBefore(norm16)) { |
|
c = Character.codePointBefore(s, prevSrc); |
|
int n16 = getNorm16(c); |
|
if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) { |
|
prevBoundary -= Character.charCount(c); |
|
prevNorm16 = n16; |
|
} |
|
} |
|
} |
|
|
|
if(isMaybeOrNonZeroCC(norm16)) { |
|
int cc=getCCFromYesOrMaybe(norm16); |
|
if (onlyContiguous && cc != 0 && |
|
getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) { |
|
// The [prevBoundary..prevSrc[ character |
|
// passed the quick check "yes && ccc==0" test |
|
// but is out of canonical order with the current combining mark. |
|
} else { |
|
// If !onlyContiguous (not FCC), then we ignore the tccc of |
|
|
|
for (;;) { |
|
if (norm16 < MIN_YES_YES_WITH_CC) { |
|
if (!doSpan) { |
|
qcResult = 1; |
|
} else { |
|
return prevBoundary << 1; |
|
} |
|
} |
|
if (src == limit) { |
|
return (src<<1) | qcResult; |
|
} |
|
int prevCC = cc; |
|
c = Character.codePointAt(s, src); |
|
norm16 = getNorm16(c); |
|
if (isMaybeOrNonZeroCC(norm16)) { |
|
cc = getCCFromYesOrMaybe(norm16); |
|
if (!(prevCC <= cc || cc == 0)) { |
|
break; |
|
} |
|
} else { |
|
break; |
|
} |
|
src += Character.charCount(c); |
|
} |
|
|
|
if (isCompYesAndZeroCC(norm16)) { |
|
prevBoundary = src; |
|
src += Character.charCount(c); |
|
continue; |
|
} |
|
} |
|
} |
|
return prevBoundary<<1; |
|
} |
|
} |
|
public void composeAndAppend(CharSequence s, |
|
boolean doCompose, |
|
boolean onlyContiguous, |
|
ReorderingBuffer buffer) { |
|
int src=0, limit=s.length(); |
|
if(!buffer.isEmpty()) { |
|
int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous); |
|
if(0!=firstStarterInSrc) { |
|
int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(), |
|
buffer.length(), onlyContiguous); |
|
StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+ |
|
firstStarterInSrc+16); |
|
middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length()); |
|
buffer.removeSuffix(buffer.length()-lastStarterInDest); |
|
middle.append(s, 0, firstStarterInSrc); |
|
compose(middle, 0, middle.length(), onlyContiguous, true, buffer); |
|
src=firstStarterInSrc; |
|
} |
|
} |
|
if(doCompose) { |
|
compose(s, src, limit, onlyContiguous, true, buffer); |
|
} else { |
|
buffer.append(s, src, limit); |
|
} |
|
} |
|
// Dual functionality: |
|
// buffer!=NULL: normalize |
|
|
|
public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) { |
|
// Note: In this function we use buffer->appendZeroCC() because we track |
|
// the lead and trail combining classes here, rather than leaving it to |
|
// the ReorderingBuffer. |
|
// The exception is the call to decomposeShort() which uses the buffer |
|
// in the normal way. |
|
|
|
// Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. |
|
|
|
int prevBoundary=src; |
|
int prevSrc; |
|
int c=0; |
|
int prevFCD16=0; |
|
int fcd16=0; |
|
|
|
for(;;) { |
|
|
|
for(prevSrc=src; src!=limit;) { |
|
if((c=s.charAt(src))<minLcccCP) { |
|
prevFCD16=~c; |
|
++src; |
|
} else if(!singleLeadMightHaveNonZeroFCD16(c)) { |
|
prevFCD16=0; |
|
++src; |
|
} else { |
|
if (UTF16Plus.isLeadSurrogate(c)) { |
|
char c2; |
|
if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) { |
|
c = Character.toCodePoint((char)c, c2); |
|
} |
|
} |
|
if((fcd16=getFCD16FromNormData(c))<=0xff) { |
|
prevFCD16=fcd16; |
|
src+=Character.charCount(c); |
|
} else { |
|
break; |
|
} |
|
} |
|
} |
|
|
|
if(src!=prevSrc) { |
|
if(src==limit) { |
|
if(buffer!=null) { |
|
buffer.flushAndAppendZeroCC(s, prevSrc, src); |
|
} |
|
break; |
|
} |
|
prevBoundary=src; |
|
|
|
if(prevFCD16<0) { |
|
|
|
int prev=~prevFCD16; |
|
if(prev<minDecompNoCP) { |
|
prevFCD16=0; |
|
} else { |
|
prevFCD16=getFCD16FromNormData(prev); |
|
if(prevFCD16>1) { |
|
--prevBoundary; |
|
} |
|
} |
|
} else { |
|
int p=src-1; |
|
if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p && |
|
Character.isHighSurrogate(s.charAt(p-1)) |
|
) { |
|
--p; |
|
// Need to fetch the previous character's FCD value because |
|
|
|
prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1))); |
|
// Still known to have lccc==0 because its lead surrogate unit had lccc==0. |
|
} |
|
if(prevFCD16>1) { |
|
prevBoundary=p; |
|
} |
|
} |
|
if(buffer!=null) { |
|
// The last lccc==0 character is excluded from the |
|
|
|
buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary); |
|
buffer.append(s, prevBoundary, src); |
|
} |
|
|
|
prevSrc=src; |
|
} else if(src==limit) { |
|
break; |
|
} |
|
|
|
src+=Character.charCount(c); |
|
// The current character (c) at [prevSrc..src[ has a non-zero lead combining class. |
|
|
|
if((prevFCD16&0xff)<=(fcd16>>8)) { |
|
|
|
if((fcd16&0xff)<=1) { |
|
prevBoundary=src; |
|
} |
|
if(buffer!=null) { |
|
buffer.appendZeroCC(c); |
|
} |
|
prevFCD16=fcd16; |
|
continue; |
|
} else if(buffer==null) { |
|
return prevBoundary; |
|
} else { |
|
|
|
|
|
|
|
|
|
*/ |
|
buffer.removeSuffix(prevSrc-prevBoundary); |
|
|
|
|
|
|
|
*/ |
|
src=findNextFCDBoundary(s, src, limit); |
|
|
|
|
|
|
|
*/ |
|
decomposeShort(s, prevBoundary, src, false, false, buffer); |
|
prevBoundary=src; |
|
prevFCD16=0; |
|
} |
|
} |
|
return src; |
|
} |
|
|
|
public boolean hasDecompBoundaryBefore(int c) { |
|
return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) || |
|
norm16HasDecompBoundaryBefore(getNorm16(c)); |
|
} |
|
public boolean norm16HasDecompBoundaryBefore(int norm16) { |
|
if (norm16 < minNoNoCompNoMaybeCC) { |
|
return true; |
|
} |
|
if (norm16 >= limitNoNo) { |
|
return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; |
|
} |
|
|
|
int mapping=norm16>>OFFSET_SHIFT; |
|
int firstUnit=extraData.charAt(mapping); |
|
|
|
return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0; |
|
} |
|
public boolean hasDecompBoundaryAfter(int c) { |
|
if (c < minDecompNoCP) { |
|
return true; |
|
} |
|
if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) { |
|
return true; |
|
} |
|
return norm16HasDecompBoundaryAfter(getNorm16(c)); |
|
} |
|
public boolean norm16HasDecompBoundaryAfter(int norm16) { |
|
if(norm16 <= minYesNo || isHangulLVT(norm16)) { |
|
return true; |
|
} |
|
if (norm16 >= limitNoNo) { |
|
if (isMaybeOrNonZeroCC(norm16)) { |
|
return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; |
|
} |
|
|
|
return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1; |
|
} |
|
|
|
int mapping=norm16>>OFFSET_SHIFT; |
|
int firstUnit=extraData.charAt(mapping); |
|
// decomp after-boundary: same as hasFCDBoundaryAfter(), |
|
|
|
if(firstUnit>0x1ff) { |
|
return false; |
|
} |
|
if(firstUnit<=0xff) { |
|
return true; |
|
} |
|
// if(trailCC==1) test leadCC==0, same as checking for before-boundary |
|
|
|
return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0; |
|
} |
|
public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); } |
|
|
|
public boolean hasCompBoundaryBefore(int c) { |
|
return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c)); |
|
} |
|
public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) { |
|
return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous); |
|
} |
|
|
|
private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } |
|
private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; } |
|
private static boolean isInert(int norm16) { return norm16==INERT; } |
|
private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; } |
|
private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; } |
|
private boolean isHangulLV(int norm16) { return norm16==minYesNo; } |
|
private boolean isHangulLVT(int norm16) { |
|
return norm16==hangulLVT(); |
|
} |
|
private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; } |
|
// UBool isCompYes(uint16_t norm16) const { |
|
// return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; |
|
// } |
|
// UBool isCompYesOrMaybe(uint16_t norm16) const { |
|
// return norm16<minNoNo || minMaybeYes<=norm16; |
|
// } |
|
// private boolean hasZeroCCFromDecompYes(int norm16) { |
|
// return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; |
|
|
|
private boolean isDecompYesAndZeroCC(int norm16) { |
|
return norm16<minYesNo || |
|
norm16==JAMO_VT || |
|
(minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES); |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
private boolean isMostDecompYesAndZeroCC(int norm16) { |
|
return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; |
|
} |
|
private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; } |
|
|
|
// For use with isCompYes(). |
|
// Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. |
|
// static uint8_t getCCFromYes(uint16_t norm16) { |
|
// return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0; |
|
|
|
private int getCCFromNoNo(int norm16) { |
|
int mapping=norm16>>OFFSET_SHIFT; |
|
if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) { |
|
return extraData.charAt(mapping-1)&0xff; |
|
} else { |
|
return 0; |
|
} |
|
} |
|
int getTrailCCFromCompYesAndZeroCC(int norm16) { |
|
if(norm16<=minYesNo) { |
|
return 0; |
|
} else { |
|
// For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here. |
|
return extraData.charAt(norm16>>OFFSET_SHIFT)>>8; |
|
} |
|
} |
|
|
|
|
|
private int mapAlgorithmic(int c, int norm16) { |
|
return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta; |
|
} |
|
|
|
// Requires minYesNo<norm16<limitNoNo. |
|
// private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); } |
|
|
|
|
|
|
|
*/ |
|
private int getCompositionsListForDecompYes(int norm16) { |
|
if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) { |
|
return -1; |
|
} else { |
|
if((norm16-=minMaybeYes)<0) { |
|
// norm16<minMaybeYes: index into extraData which is a substring at |
|
// maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes] |
|
// same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16 |
|
norm16+=MIN_NORMAL_MAYBE_YES; |
|
} |
|
return norm16>>OFFSET_SHIFT; |
|
} |
|
} |
|
|
|
|
|
*/ |
|
private int getCompositionsListForComposite(int norm16) { |
|
|
|
int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT; |
|
int firstUnit=maybeYesCompositions.charAt(list); |
|
return list+ |
|
1+ |
|
(firstUnit&MAPPING_LENGTH_MASK); |
|
} |
|
|
|
// Decompose a short piece of text which is likely to contain characters that |
|
// fail the quick check loop and/or where the quick check loop's overhead |
|
// is unlikely to be amortized. |
|
// Called by the compose() and makeFCD() implementations. |
|
|
|
private int decomposeShort( |
|
CharSequence s, int src, int limit, |
|
boolean stopAtCompBoundary, boolean onlyContiguous, |
|
ReorderingBuffer buffer) { |
|
while(src<limit) { |
|
int c=Character.codePointAt(s, src); |
|
if (stopAtCompBoundary && c < minCompNoMaybeCP) { |
|
return src; |
|
} |
|
int norm16 = getNorm16(c); |
|
if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) { |
|
return src; |
|
} |
|
src+=Character.charCount(c); |
|
decompose(c, norm16, buffer); |
|
if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { |
|
return src; |
|
} |
|
} |
|
return src; |
|
} |
|
private void decompose(int c, int norm16, ReorderingBuffer buffer) { |
|
|
|
if (norm16 >= limitNoNo) { |
|
if (isMaybeOrNonZeroCC(norm16)) { |
|
buffer.append(c, getCCFromYesOrMaybe(norm16)); |
|
return; |
|
} |
|
|
|
c=mapAlgorithmic(c, norm16); |
|
norm16=getRawNorm16(c); |
|
} |
|
if (norm16 < minYesNo) { |
|
|
|
buffer.append(c, 0); |
|
} else if(isHangulLV(norm16) || isHangulLVT(norm16)) { |
|
|
|
Hangul.decompose(c, buffer); |
|
} else { |
|
|
|
int mapping=norm16>>OFFSET_SHIFT; |
|
int firstUnit=extraData.charAt(mapping); |
|
int length=firstUnit&MAPPING_LENGTH_MASK; |
|
int leadCC, trailCC; |
|
trailCC=firstUnit>>8; |
|
if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { |
|
leadCC=extraData.charAt(mapping-1)>>8; |
|
} else { |
|
leadCC=0; |
|
} |
|
++mapping; |
|
buffer.append(extraData, mapping, mapping+length, true, leadCC, trailCC); |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private static int combine(String compositions, int list, int trail) { |
|
int key1, firstUnit; |
|
if(trail<COMP_1_TRAIL_LIMIT) { |
|
// trail character is 0..33FF |
|
|
|
key1=(trail<<1); |
|
while(key1>(firstUnit=compositions.charAt(list))) { |
|
list+=2+(firstUnit&COMP_1_TRIPLE); |
|
} |
|
if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { |
|
if((firstUnit&COMP_1_TRIPLE)!=0) { |
|
return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2); |
|
} else { |
|
return compositions.charAt(list+1); |
|
} |
|
} |
|
} else { |
|
// trail character is 3400..10FFFF |
|
|
|
key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE); |
|
int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff; |
|
int secondUnit; |
|
for(;;) { |
|
if(key1>(firstUnit=compositions.charAt(list))) { |
|
list+=2+(firstUnit&COMP_1_TRIPLE); |
|
} else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { |
|
if(key2>(secondUnit=compositions.charAt(list+1))) { |
|
if((firstUnit&COMP_1_LAST_TUPLE)!=0) { |
|
break; |
|
} else { |
|
list+=3; |
|
} |
|
} else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { |
|
return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2); |
|
} else { |
|
break; |
|
} |
|
} else { |
|
break; |
|
} |
|
} |
|
} |
|
return -1; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private void recompose(ReorderingBuffer buffer, int recomposeStartIndex, |
|
boolean onlyContiguous) { |
|
StringBuilder sb=buffer.getStringBuilder(); |
|
int p=recomposeStartIndex; |
|
if(p==sb.length()) { |
|
return; |
|
} |
|
|
|
int starter, pRemove; |
|
int compositionsList; |
|
int c, compositeAndFwd; |
|
int norm16; |
|
int cc, prevCC; |
|
boolean starterIsSupplementary; |
|
|
|
// Some of the following variables are not used until we have a forward-combining starter |
|
// and are only initialized now to avoid compiler warnings. |
|
compositionsList=-1; |
|
starter=-1; |
|
starterIsSupplementary=false; |
|
prevCC=0; |
|
|
|
for(;;) { |
|
c=sb.codePointAt(p); |
|
p+=Character.charCount(c); |
|
norm16=getNorm16(c); |
|
cc=getCCFromYesOrMaybe(norm16); |
|
if( |
|
isMaybe(norm16) && |
|
|
|
compositionsList>=0 && |
|
|
|
(prevCC<cc || prevCC==0) |
|
) { |
|
if(isJamoVT(norm16)) { |
|
|
|
if(c<Hangul.JAMO_T_BASE) { |
|
|
|
char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE); |
|
if(prev<Hangul.JAMO_L_COUNT) { |
|
pRemove=p-1; |
|
char syllable=(char) |
|
(Hangul.HANGUL_BASE+ |
|
(prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))* |
|
Hangul.JAMO_T_COUNT); |
|
char t; |
|
if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) { |
|
++p; |
|
syllable+=t; |
|
} |
|
sb.setCharAt(starter, syllable); |
|
|
|
sb.delete(pRemove, p); |
|
p=pRemove; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
if(p==sb.length()) { |
|
break; |
|
} |
|
compositionsList=-1; |
|
continue; |
|
} else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) { |
|
|
|
int composite=compositeAndFwd>>1; |
|
|
|
// Remove the combining mark. |
|
pRemove=p-Character.charCount(c); |
|
sb.delete(pRemove, p); |
|
p=pRemove; |
|
|
|
if(starterIsSupplementary) { |
|
if(composite>0xffff) { |
|
|
|
sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); |
|
sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite)); |
|
} else { |
|
sb.setCharAt(starter, (char)c); |
|
sb.deleteCharAt(starter+1); |
|
// The composite is shorter than the starter, |
|
|
|
starterIsSupplementary=false; |
|
--p; |
|
} |
|
} else if(composite>0xffff) { |
|
// The composite is longer than the starter, |
|
|
|
starterIsSupplementary=true; |
|
sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); |
|
sb.insert(starter+1, UTF16.getTrailSurrogate(composite)); |
|
++p; |
|
} else { |
|
|
|
sb.setCharAt(starter, (char)composite); |
|
} |
|
|
|
// Keep prevCC because we removed the combining mark. |
|
|
|
if(p==sb.length()) { |
|
break; |
|
} |
|
|
|
if((compositeAndFwd&1)!=0) { |
|
compositionsList= |
|
getCompositionsListForComposite(getRawNorm16(composite)); |
|
} else { |
|
compositionsList=-1; |
|
} |
|
|
|
|
|
continue; |
|
} |
|
} |
|
|
|
|
|
prevCC=cc; |
|
if(p==sb.length()) { |
|
break; |
|
} |
|
|
|
|
|
if(cc==0) { |
|
|
|
if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) { |
|
|
|
if(c<=0xffff) { |
|
starterIsSupplementary=false; |
|
starter=p-1; |
|
} else { |
|
starterIsSupplementary=true; |
|
starter=p-2; |
|
} |
|
} |
|
} else if(onlyContiguous) { |
|
|
|
compositionsList=-1; |
|
} |
|
} |
|
buffer.flush(); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private boolean hasCompBoundaryBefore(int c, int norm16) { |
|
return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16); |
|
} |
|
private boolean norm16HasCompBoundaryBefore(int norm16) { |
|
return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16); |
|
} |
|
private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) { |
|
return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src)); |
|
} |
|
private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) { |
|
return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 && |
|
(!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16)); |
|
} |
|
private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) { |
|
return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous); |
|
} |
|
|
|
private boolean isTrailCC01ForCompBoundaryAfter(int norm16) { |
|
return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ? |
|
(norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff); |
|
} |
|
|
|
private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) { |
|
while(p>0) { |
|
int c=Character.codePointBefore(s, p); |
|
int norm16 = getNorm16(c); |
|
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { |
|
break; |
|
} |
|
p-=Character.charCount(c); |
|
if(hasCompBoundaryBefore(c, norm16)) { |
|
break; |
|
} |
|
} |
|
return p; |
|
} |
|
private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) { |
|
while(p<limit) { |
|
int c=Character.codePointAt(s, p); |
|
int norm16=normTrie.get(c); |
|
if(hasCompBoundaryBefore(c, norm16)) { |
|
break; |
|
} |
|
p+=Character.charCount(c); |
|
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { |
|
break; |
|
} |
|
} |
|
return p; |
|
} |
|
|
|
|
|
private int findNextFCDBoundary(CharSequence s, int p, int limit) { |
|
while(p<limit) { |
|
int c=Character.codePointAt(s, p); |
|
int norm16; |
|
if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) { |
|
break; |
|
} |
|
p+=Character.charCount(c); |
|
if (norm16HasDecompBoundaryAfter(norm16)) { |
|
break; |
|
} |
|
} |
|
return p; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
public static int getDecompose(int chars[], String decomps[]) { |
|
Normalizer2 impl = Normalizer2.getNFDInstance(); |
|
|
|
int length=0; |
|
int norm16 = 0; |
|
int ch = -1; |
|
int i = 0; |
|
|
|
while (++ch < 0x2fa1e) { //no cannoical above 0x3ffff |
|
//TBD !!!! the hack code heres save us about 50ms for startup |
|
|
|
if (ch == 0x30ff) |
|
ch = 0xf900; |
|
else if (ch == 0x115bc) |
|
ch = 0x1d15e; |
|
else if (ch == 0x1d1c1) |
|
ch = 0x2f800; |
|
|
|
String s = impl.getDecomposition(ch); |
|
|
|
if(s != null && i < chars.length) { |
|
chars[i] = ch; |
|
decomps[i++] = s; |
|
} |
|
} |
|
return i; |
|
} |
|
|
|
//------------------------------------------------------ |
|
// special method for Collation (RBTableBuilder.build()) |
|
|
|
private static boolean needSingleQuotation(char c) { |
|
return (c >= 0x0009 && c <= 0x000D) || |
|
(c >= 0x0020 && c <= 0x002F) || |
|
(c >= 0x003A && c <= 0x0040) || |
|
(c >= 0x005B && c <= 0x0060) || |
|
(c >= 0x007B && c <= 0x007E); |
|
} |
|
|
|
public static String canonicalDecomposeWithSingleQuotation(String string) { |
|
Normalizer2 impl = Normalizer2.getNFDInstance(); |
|
char[] src = string.toCharArray(); |
|
int srcIndex = 0; |
|
int srcLimit = src.length; |
|
char[] dest = new char[src.length * 3]; |
|
int destIndex = 0; |
|
int destLimit = dest.length; |
|
|
|
int prevSrc; |
|
String norm; |
|
int reorderStartIndex, length; |
|
char c1, c2; |
|
int cp; |
|
int minNoMaybe = 0x00c0; |
|
int cc, prevCC, trailCC; |
|
char[] p; |
|
int pStart; |
|
|
|
|
|
reorderStartIndex = 0; |
|
prevCC = 0; |
|
norm = null; |
|
cp = 0; |
|
pStart = 0; |
|
|
|
cc = trailCC = -1; |
|
c1 = 0; |
|
for (;;) { |
|
prevSrc=srcIndex; |
|
|
|
while (srcIndex != srcLimit && |
|
((c1 = src[srcIndex]) < minNoMaybe || |
|
(norm = impl.getDecomposition(cp = string.codePointAt(srcIndex))) == null || |
|
(c1 >= '\uac00' && c1 <= '\ud7a3'))) { |
|
prevCC = 0; |
|
srcIndex += (cp < 0x10000) ? 1 : 2; |
|
} |
|
|
|
|
|
if (srcIndex != prevSrc) { |
|
length = srcIndex - prevSrc; |
|
if ((destIndex + length) <= destLimit) { |
|
System.arraycopy(src,prevSrc,dest,destIndex,length); |
|
} |
|
|
|
destIndex += length; |
|
reorderStartIndex = destIndex; |
|
} |
|
|
|
|
|
if (srcIndex == srcLimit) { |
|
break; |
|
} |
|
|
|
|
|
srcIndex += (cp < 0x10000) ? 1 : 2; |
|
|
|
if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
c2 = 0; |
|
length = 1; |
|
|
|
if (Character.isHighSurrogate(c1) |
|
|| Character.isLowSurrogate(c1)) { |
|
norm = null; |
|
} |
|
} else { |
|
length = 2; |
|
c2 = src[srcIndex-1]; |
|
} |
|
|
|
|
|
if (norm == null) { |
|
|
|
cc = trailCC = UCharacter.getCombiningClass(cp); |
|
p = null; |
|
pStart = -1; |
|
} else { |
|
|
|
pStart = 0; |
|
p = norm.toCharArray(); |
|
length = p.length; |
|
int cpNum = norm.codePointCount(0, length); |
|
cc= UCharacter.getCombiningClass(norm.codePointAt(0)); |
|
trailCC= UCharacter.getCombiningClass(norm.codePointAt(cpNum-1)); |
|
if (length == 1) { |
|
|
|
c1 = p[pStart]; |
|
c2 = 0; |
|
p = null; |
|
pStart = -1; |
|
} |
|
} |
|
|
|
if((destIndex + length * 3) >= destLimit) { // 2 SingleQuotations |
|
|
|
char[] tmpBuf = new char[destLimit * 2]; |
|
System.arraycopy(dest, 0, tmpBuf, 0, destIndex); |
|
dest = tmpBuf; |
|
destLimit = dest.length; |
|
} |
|
|
|
|
|
{ |
|
int reorderSplit = destIndex; |
|
if (p == null) { |
|
|
|
if (needSingleQuotation(c1)) { |
|
//if we need single quotation, no need to consider "prevCC" |
|
|
|
dest[destIndex++] = '\''; |
|
dest[destIndex++] = c1; |
|
dest[destIndex++] = '\''; |
|
trailCC = 0; |
|
} else if(cc != 0 && cc < prevCC) { |
|
// (c1, c2) is out of order with respect to the preceding |
|
|
|
destIndex += length; |
|
trailCC = insertOrdered(dest, reorderStartIndex, |
|
reorderSplit, destIndex, c1, c2, cc); |
|
} else { |
|
|
|
dest[destIndex++] = c1; |
|
if(c2 != 0) { |
|
dest[destIndex++] = c2; |
|
} |
|
} |
|
} else { |
|
// general: multiple code points (ordered by themselves) |
|
|
|
if (needSingleQuotation(p[pStart])) { |
|
dest[destIndex++] = '\''; |
|
dest[destIndex++] = p[pStart++]; |
|
dest[destIndex++] = '\''; |
|
length--; |
|
do { |
|
dest[destIndex++] = p[pStart++]; |
|
} while(--length > 0); |
|
} else if (cc != 0 && cc < prevCC) { |
|
destIndex += length; |
|
trailCC = mergeOrdered(dest, reorderStartIndex, |
|
reorderSplit, p, pStart, |
|
pStart+length); |
|
} else { |
|
|
|
do { |
|
dest[destIndex++] = p[pStart++]; |
|
} while (--length > 0); |
|
} |
|
} |
|
} |
|
prevCC = trailCC; |
|
if(prevCC == 0) { |
|
reorderStartIndex = destIndex; |
|
} |
|
} |
|
|
|
return new String(dest, 0, destIndex); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private static int insertOrdered(char[] source, |
|
int start, |
|
int current, int p, |
|
char c1, char c2, |
|
int cc) { |
|
int back, preBack; |
|
int r; |
|
int prevCC, trailCC=cc; |
|
|
|
if (start<current && cc!=0) { |
|
|
|
preBack=back=current; |
|
|
|
PrevArgs prevArgs = new PrevArgs(); |
|
prevArgs.current = current; |
|
prevArgs.start = start; |
|
prevArgs.src = source; |
|
prevArgs.c1 = c1; |
|
prevArgs.c2 = c2; |
|
|
|
|
|
prevCC=getPrevCC(prevArgs); |
|
preBack = prevArgs.current; |
|
|
|
if(cc<prevCC) { |
|
|
|
trailCC=prevCC; |
|
back=preBack; |
|
while(start<preBack) { |
|
prevCC=getPrevCC(prevArgs); |
|
preBack=prevArgs.current; |
|
if(cc>=prevCC) { |
|
break; |
|
} |
|
back=preBack; |
|
} |
|
|
|
// this is where we are right now with all these indicies: |
|
// [start]..[pPreBack] 0..? code points that we can ignore |
|
// [pPreBack]..[pBack] 0..1 code points with prevCC<=cc |
|
// [pBack]..[current] 0..n code points with >cc, move up to insert (c, c2) |
|
// [current]..[p] 1 code point (c, c2) with cc |
|
|
|
|
|
r=p; |
|
do { |
|
source[--r]=source[--current]; |
|
} while (back!=current); |
|
} |
|
} |
|
|
|
|
|
source[current] = c1; |
|
if (c2!=0) { |
|
source[(current+1)] = c2; |
|
} |
|
|
|
|
|
return trailCC; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
private static int mergeOrdered(char[] source, |
|
int start, |
|
int current, |
|
char[] data, |
|
int next, |
|
int limit) { |
|
int r; |
|
int cc, trailCC=0; |
|
boolean adjacent; |
|
|
|
adjacent= current==next; |
|
NextCCArgs ncArgs = new NextCCArgs(); |
|
ncArgs.source = data; |
|
ncArgs.next = next; |
|
ncArgs.limit = limit; |
|
|
|
if(start!=current) { |
|
|
|
while(ncArgs.next<ncArgs.limit) { |
|
cc=getNextCC(ncArgs); |
|
if(cc==0) { |
|
|
|
trailCC=0; |
|
if(adjacent) { |
|
current=ncArgs.next; |
|
} else { |
|
data[current++]=ncArgs.c1; |
|
if(ncArgs.c2!=0) { |
|
data[current++]=ncArgs.c2; |
|
} |
|
} |
|
break; |
|
} else { |
|
r=current+(ncArgs.c2==0 ? 1 : 2); |
|
trailCC=insertOrdered(source,start, current, r, |
|
ncArgs.c1, ncArgs.c2, cc); |
|
current=r; |
|
} |
|
} |
|
} |
|
|
|
if(ncArgs.next==ncArgs.limit) { |
|
|
|
return trailCC; |
|
} else { |
|
if(!adjacent) { |
|
|
|
do { |
|
source[current++]=data[ncArgs.next++]; |
|
} while(ncArgs.next!=ncArgs.limit); |
|
ncArgs.limit=current; |
|
} |
|
PrevArgs prevArgs = new PrevArgs(); |
|
prevArgs.src = data; |
|
prevArgs.start = start; |
|
prevArgs.current = ncArgs.limit; |
|
return getPrevCC(prevArgs); |
|
} |
|
|
|
} |
|
private static final class PrevArgs{ |
|
char[] src; |
|
int start; |
|
int current; |
|
char c1; |
|
char c2; |
|
} |
|
|
|
private static final class NextCCArgs{ |
|
char[] source; |
|
int next; |
|
int limit; |
|
char c1; |
|
char c2; |
|
} |
|
private static int getNextCC(NextCCArgs args) { |
|
args.c1=args.source[args.next++]; |
|
args.c2=0; |
|
|
|
if (UTF16.isTrailSurrogate(args.c1)) { |
|
|
|
return 0; |
|
} else if (!UTF16.isLeadSurrogate(args.c1)) { |
|
return UCharacter.getCombiningClass(args.c1); |
|
} else if (args.next!=args.limit && |
|
UTF16.isTrailSurrogate(args.c2=args.source[args.next])){ |
|
++args.next; |
|
return UCharacter.getCombiningClass(Character.toCodePoint(args.c1, args.c2)); |
|
} else { |
|
|
|
args.c2=0; |
|
return 0; |
|
} |
|
} |
|
private static int getPrevCC(PrevArgs args) { |
|
args.c1=args.src[--args.current]; |
|
args.c2=0; |
|
|
|
if (args.c1 < MIN_CCC_LCCC_CP) { |
|
return 0; |
|
} else if (UTF16.isLeadSurrogate(args.c1)) { |
|
|
|
return 0; |
|
} else if (!UTF16.isTrailSurrogate(args.c1)) { |
|
return UCharacter.getCombiningClass(args.c1); |
|
} else if (args.current!=args.start && |
|
UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) { |
|
--args.current; |
|
return UCharacter.getCombiningClass(Character.toCodePoint(args.c2, args.c1)); |
|
} else { |
|
|
|
args.c2=0; |
|
return 0; |
|
} |
|
} |
|
|
|
private int getPreviousTrailCC(CharSequence s, int start, int p) { |
|
if (start == p) { |
|
return 0; |
|
} |
|
return getFCD16(Character.codePointBefore(s, p)); |
|
} |
|
|
|
private VersionInfo dataVersion; |
|
|
|
|
|
private int minDecompNoCP; |
|
private int minCompNoMaybeCP; |
|
private int minLcccCP; |
|
|
|
|
|
private int minYesNo; |
|
private int minYesNoMappingsOnly; |
|
private int minNoNo; |
|
private int minNoNoCompBoundaryBefore; |
|
private int minNoNoCompNoMaybeCC; |
|
private int minNoNoEmpty; |
|
private int limitNoNo; |
|
private int centerNoNoDelta; |
|
private int minMaybeYes; |
|
|
|
private CodePointTrie.Fast16 normTrie; |
|
private String maybeYesCompositions; |
|
private String extraData; |
|
private byte[] smallFCD; |
|
} |