|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
package sun.text.normalizer; |
|
|
|
import java.io.IOException; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
class FilteredNormalizer2 extends Normalizer2 { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*/ |
|
public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) { |
|
norm2=n2; |
|
set=filterSet; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public StringBuilder normalize(CharSequence src, StringBuilder dest) { |
|
if(dest==src) { |
|
throw new IllegalArgumentException(); |
|
} |
|
dest.setLength(0); |
|
normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE); |
|
return dest; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public Appendable normalize(CharSequence src, Appendable dest) { |
|
if(dest==src) { |
|
throw new IllegalArgumentException(); |
|
} |
|
return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE); |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public StringBuilder normalizeSecondAndAppend( |
|
StringBuilder first, CharSequence second) { |
|
return normalizeSecondAndAppend(first, second, true); |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public StringBuilder append(StringBuilder first, CharSequence second) { |
|
return normalizeSecondAndAppend(first, second, false); |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public String getDecomposition(int c) { |
|
return set.contains(c) ? norm2.getDecomposition(c) : null; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public int getCombiningClass(int c) { |
|
return set.contains(c) ? norm2.getCombiningClass(c) : 0; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public boolean isNormalized(CharSequence s) { |
|
UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; |
|
for(int prevSpanLimit=0; prevSpanLimit<s.length();) { |
|
int spanLimit=set.span(s, prevSpanLimit, spanCondition); |
|
if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { |
|
spanCondition=UnicodeSet.SpanCondition.SIMPLE; |
|
} else { |
|
if(!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) { |
|
return false; |
|
} |
|
spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; |
|
} |
|
prevSpanLimit=spanLimit; |
|
} |
|
return true; |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public int spanQuickCheckYes(CharSequence s) { |
|
UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; |
|
for(int prevSpanLimit=0; prevSpanLimit<s.length();) { |
|
int spanLimit=set.span(s, prevSpanLimit, spanCondition); |
|
if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { |
|
spanCondition=UnicodeSet.SpanCondition.SIMPLE; |
|
} else { |
|
int yesLimit= |
|
prevSpanLimit+ |
|
norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit)); |
|
if(yesLimit<spanLimit) { |
|
return yesLimit; |
|
} |
|
spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; |
|
} |
|
prevSpanLimit=spanLimit; |
|
} |
|
return s.length(); |
|
} |
|
|
|
|
|
|
|
|
|
*/ |
|
@Override |
|
public boolean hasBoundaryBefore(int c) { |
|
return !set.contains(c) || norm2.hasBoundaryBefore(c); |
|
} |
|
|
|
// Internal: No argument checking, and appends to dest. |
|
// Pass as input spanCondition the one that is likely to yield a non-zero |
|
// span length at the start of src. |
|
// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2, |
|
// UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src |
|
// and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue after |
|
|
|
private Appendable normalize(CharSequence src, Appendable dest, |
|
UnicodeSet.SpanCondition spanCondition) { |
|
|
|
StringBuilder tempDest=new StringBuilder(); |
|
try { |
|
for(int prevSpanLimit=0; prevSpanLimit<src.length();) { |
|
int spanLimit=set.span(src, prevSpanLimit, spanCondition); |
|
int spanLength=spanLimit-prevSpanLimit; |
|
if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { |
|
if(spanLength!=0) { |
|
dest.append(src, prevSpanLimit, spanLimit); |
|
} |
|
spanCondition=UnicodeSet.SpanCondition.SIMPLE; |
|
} else { |
|
if(spanLength!=0) { |
|
// Not norm2.normalizeSecondAndAppend() because we do not want |
|
|
|
dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest)); |
|
} |
|
spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; |
|
} |
|
prevSpanLimit=spanLimit; |
|
} |
|
} catch(IOException e) { |
|
throw new InternalError(e.toString(), e); |
|
} |
|
return dest; |
|
} |
|
|
|
private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second, |
|
boolean doNormalize) { |
|
if(first==second) { |
|
throw new IllegalArgumentException(); |
|
} |
|
if(first.length()==0) { |
|
if(doNormalize) { |
|
return normalize(second, first); |
|
} else { |
|
return first.append(second); |
|
} |
|
} |
|
|
|
int prefixLimit=set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE); |
|
if(prefixLimit!=0) { |
|
CharSequence prefix=second.subSequence(0, prefixLimit); |
|
int suffixStart=set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE); |
|
if(suffixStart==0) { |
|
if(doNormalize) { |
|
norm2.normalizeSecondAndAppend(first, prefix); |
|
} else { |
|
norm2.append(first, prefix); |
|
} |
|
} else { |
|
StringBuilder middle=new StringBuilder( |
|
first.subSequence(suffixStart, first.length())); |
|
if(doNormalize) { |
|
norm2.normalizeSecondAndAppend(middle, prefix); |
|
} else { |
|
norm2.append(middle, prefix); |
|
} |
|
first.delete(suffixStart, 0x7fffffff).append(middle); |
|
} |
|
} |
|
if(prefixLimit<second.length()) { |
|
CharSequence rest=second.subSequence(prefixLimit, second.length()); |
|
if(doNormalize) { |
|
normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED); |
|
} else { |
|
first.append(rest); |
|
} |
|
} |
|
return first; |
|
} |
|
|
|
private Normalizer2 norm2; |
|
private UnicodeSet set; |
|
}; |