Back to index...

	/*
	* Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/
	/*
	*******************************************************************************
	* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
	* *
	* The original version of this source code and documentation is copyrighted *
	* and owned by IBM, These materials are provided under terms of a License *
	* Agreement between IBM and Sun. This technology is protected by multiple *
	* US and International patents. This notice and attribution to IBM may not *
	* to removed. *
	*******************************************************************************
	*/

	package sun.text.normalizer;

	import java.io.BufferedInputStream;
	import java.io.ByteArrayInputStream;
	import java.io.IOException;
	import java.io.BufferedInputStream;
	import java.io.InputStream;

	/**
	* @author Ram Viswanadha
	*/
	public final class NormalizerImpl {
	// Static block for the class to initialize its own self
	static final NormalizerImpl IMPL;

	static
	{
	try
	{
	IMPL = new NormalizerImpl();
	}
	catch (Exception e)
	{
	throw new RuntimeException(e.getMessage());
	}
	}

	static final int UNSIGNED_BYTE_MASK =0xFF;
	static final long UNSIGNED_INT_MASK = 0xffffffffL;
	/*
	* This new implementation of the normalization code loads its data from
	* unorm.icu, which is generated with the gennorm tool.
	* The format of that file is described at the end of this file.
	*/
	private static final String DATA_FILE_NAME = "/sun/text/resources/unorm.icu";

	// norm32 value constants

	// quick check flags 0..3 set mean "no" for their forms
	public static final int QC_NFC=0x11; /* no\|maybe */
	public static final int QC_NFKC=0x22; /* no\|maybe */
	public static final int QC_NFD=4; /* no */
	public static final int QC_NFKD=8; /* no */

	public static final int QC_ANY_NO=0xf;

	/* quick check flags 4..5 mean "maybe" for their forms;
	* test flags>=QC_MAYBE
	*/
	public static final int QC_MAYBE=0x10;
	public static final int QC_ANY_MAYBE=0x30;

	public static final int QC_MASK=0x3f;

	private static final int COMBINES_FWD=0x40;
	private static final int COMBINES_BACK=0x80;
	public static final int COMBINES_ANY=0xc0;
	// UnicodeData.txt combining class in bits 15.
	private static final int CC_SHIFT=8;
	public static final int CC_MASK=0xff00;
	// 16 bits for the index to UChars and other extra data
	private static final int EXTRA_SHIFT=16;

	/* norm32 value constants using >16 bits */
	private static final long MIN_SPECIAL = 0xfc000000 & UNSIGNED_INT_MASK;
	private static final long SURROGATES_TOP = 0xfff00000 & UNSIGNED_INT_MASK;
	private static final long MIN_HANGUL = 0xfff00000 & UNSIGNED_INT_MASK;
	// private static final long MIN_JAMO_V = 0xfff20000 & UNSIGNED_INT_MASK;
	private static final long JAMO_V_TOP = 0xfff30000 & UNSIGNED_INT_MASK;


	/* indexes[] value names */
	/* number of bytes in normalization trie */
	static final int INDEX_TRIE_SIZE = 0;
	/* number of chars in extra data */
	static final int INDEX_CHAR_COUNT = 1;
	/* number of uint16_t words for combining data */
	static final int INDEX_COMBINE_DATA_COUNT = 2;
	/* first code point with quick check NFC NO/MAYBE */
	public static final int INDEX_MIN_NFC_NO_MAYBE = 6;
	/* first code point with quick check NFKC NO/MAYBE */
	public static final int INDEX_MIN_NFKC_NO_MAYBE = 7;
	/* first code point with quick check NFD NO/MAYBE */
	public static final int INDEX_MIN_NFD_NO_MAYBE = 8;
	/* first code point with quick check NFKD NO/MAYBE */
	public static final int INDEX_MIN_NFKD_NO_MAYBE = 9;
	/* number of bytes in FCD trie */
	static final int INDEX_FCD_TRIE_SIZE = 10;
	/* number of bytes in the auxiliary trie */
	static final int INDEX_AUX_TRIE_SIZE = 11;
	/* changing this requires a new formatVersion */
	static final int INDEX_TOP = 32;


	/* AUX constants */
	/* value constants for auxTrie */
	private static final int AUX_UNSAFE_SHIFT = 11;
	private static final int AUX_COMP_EX_SHIFT = 10;
	private static final int AUX_NFC_SKIPPABLE_F_SHIFT = 12;

	private static final int AUX_MAX_FNC = 1<<AUX_COMP_EX_SHIFT;
	private static final int AUX_UNSAFE_MASK = (int)((1<<AUX_UNSAFE_SHIFT) & UNSIGNED_INT_MASK);
	private static final int AUX_FNC_MASK = (int)((AUX_MAX_FNC-1) & UNSIGNED_INT_MASK);
	private static final int AUX_COMP_EX_MASK = (int)((1<<AUX_COMP_EX_SHIFT) & UNSIGNED_INT_MASK);
	private static final long AUX_NFC_SKIP_F_MASK = ((UNSIGNED_INT_MASK&1)<<AUX_NFC_SKIPPABLE_F_SHIFT);

	private static final int MAX_BUFFER_SIZE = 20;

	/*******************************/

	/* Wrappers for Trie implementations */
	static final class NormTrieImpl implements Trie.DataManipulate{
	static IntTrie normTrie= null;
	/**
	* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
	* data the index array offset of the indexes for that lead surrogate.
	* @param property data value for a surrogate from the trie, including
	* the folding offset
	* @return data offset or 0 if there is no data for the lead surrogate
	*/
	/* normTrie: 32-bit trie result may contain a special extraData index with the folding offset */
	public int getFoldingOffset(int value){
	return BMP_INDEX_LENGTH+
	((value>>(EXTRA_SHIFT-SURROGATE_BLOCK_BITS))&
	(0x3ff<<SURROGATE_BLOCK_BITS));
	}

	}
	static final class FCDTrieImpl implements Trie.DataManipulate{
	static CharTrie fcdTrie=null;
	/**
	* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
	* data the index array offset of the indexes for that lead surrogate.
	* @param property data value for a surrogate from the trie, including
	* the folding offset
	* @return data offset or 0 if there is no data for the lead surrogate
	*/
	/* fcdTrie: the folding offset is the lead FCD value itself */
	public int getFoldingOffset(int value){
	return value;
	}
	}

	static final class AuxTrieImpl implements Trie.DataManipulate{
	static CharTrie auxTrie = null;
	/**
	* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
	* data the index array offset of the indexes for that lead surrogate.
	* @param property data value for a surrogate from the trie, including
	* the folding offset
	* @return data offset or 0 if there is no data for the lead surrogate
	*/
	/* auxTrie: the folding offset is in bits 9..0 of the 16-bit trie result */
	public int getFoldingOffset(int value){
	return (value &AUX_FNC_MASK)<<SURROGATE_BLOCK_BITS;
	}
	}

	/****************************************************/


	private static FCDTrieImpl fcdTrieImpl;
	private static NormTrieImpl normTrieImpl;
	private static AuxTrieImpl auxTrieImpl;
	private static int[] indexes;
	private static char[] combiningTable;
	private static char[] extraData;

	private static boolean isDataLoaded;
	private static boolean isFormatVersion_2_1;
	private static boolean isFormatVersion_2_2;
	private static byte[] unicodeVersion;

	/**
	* Default buffer size of datafile
	*/
	private static final int DATA_BUFFER_SIZE = 25000;

	/**
	* FCD check: everything below this code point is known to have a 0
	* lead combining class
	*/
	public static final int MIN_WITH_LEAD_CC=0x300;


	/**
	* Bit 7 of the length byte for a decomposition string in extra data is
	* a flag indicating whether the decomposition string is
	* preceded by a 16-bit word with the leading and trailing cc
	* of the decomposition (like for A-umlaut);
	* if not, then both cc's are zero (like for compatibility ideographs).
	*/
	private static final int DECOMP_FLAG_LENGTH_HAS_CC=0x80;
	/**
	* Bits 6..0 of the length byte contain the actual length.
	*/
	private static final int DECOMP_LENGTH_MASK=0x7f;

	/** Length of the BMP portion of the index (stage 1) array. */
	private static final int BMP_INDEX_LENGTH=0x10000>>Trie.INDEX_STAGE_1_SHIFT_;
	/** Number of bits of a trail surrogate that are used in index table
	* lookups.
	*/
	private static final int SURROGATE_BLOCK_BITS=10-Trie.INDEX_STAGE_1_SHIFT_;


	// public utility
	public static int getFromIndexesArr(int index){
	return indexes[index];
	}

	// protected constructor ---------------------------------------------

	/**
	* Constructor
	* @exception thrown when data reading fails or data corrupted
	*/
	private NormalizerImpl() throws IOException {
	//data should be loaded only once
	if(!isDataLoaded){

	// jar access
	InputStream i = ICUData.getRequiredStream(DATA_FILE_NAME);
	BufferedInputStream b = new BufferedInputStream(i,DATA_BUFFER_SIZE);
	NormalizerDataReader reader = new NormalizerDataReader(b);

	// read the indexes
	indexes = reader.readIndexes(NormalizerImpl.INDEX_TOP);

	byte[] normBytes = new byte[indexes[NormalizerImpl.INDEX_TRIE_SIZE]];

	int combiningTableTop = indexes[NormalizerImpl.INDEX_COMBINE_DATA_COUNT];
	combiningTable = new char[combiningTableTop];

	int extraDataTop = indexes[NormalizerImpl.INDEX_CHAR_COUNT];
	extraData = new char[extraDataTop];

	byte[] fcdBytes = new byte[indexes[NormalizerImpl.INDEX_FCD_TRIE_SIZE]];
	byte[] auxBytes = new byte[indexes[NormalizerImpl.INDEX_AUX_TRIE_SIZE]];

	fcdTrieImpl = new FCDTrieImpl();
	normTrieImpl = new NormTrieImpl();
	auxTrieImpl = new AuxTrieImpl();

	// load the rest of the data data and initialize the data members
	reader.read(normBytes, fcdBytes,auxBytes, extraData, combiningTable);

	NormTrieImpl.normTrie = new IntTrie( new ByteArrayInputStream(normBytes),normTrieImpl );
	FCDTrieImpl.fcdTrie = new CharTrie( new ByteArrayInputStream(fcdBytes),fcdTrieImpl );
	AuxTrieImpl.auxTrie = new CharTrie( new ByteArrayInputStream(auxBytes),auxTrieImpl );

	// we reached here without any exceptions so the data is fully
	// loaded set the variable to true
	isDataLoaded = true;

	// get the data format version
	byte[] formatVersion = reader.getDataFormatVersion();

	isFormatVersion_2_1 =( formatVersion[0]>2
	\|\|
	(formatVersion[0]==2 && formatVersion[1]>=1)
	);
	isFormatVersion_2_2 =( formatVersion[0]>2
	\|\|
	(formatVersion[0]==2 && formatVersion[1]>=2)
	);
	unicodeVersion = reader.getUnicodeVersion();
	b.close();
	}
	}

	/* ---------------------------------------------------------------------- */

	/* Korean Hangul and Jamo constants */

	public static final int JAMO_L_BASE=0x1100; /* "lead" jamo */
	public static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */
	public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */

	public static final int HANGUL_BASE=0xac00;

	public static final int JAMO_L_COUNT=19;
	public static final int JAMO_V_COUNT=21;
	public static final int JAMO_T_COUNT=28;
	public static final int HANGUL_COUNT=JAMO_L_COUNTJAMO_V_COUNTJAMO_T_COUNT;

	private static boolean isHangulWithoutJamoT(char c) {
	c-=HANGUL_BASE;
	return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
	}

	/* norm32 helpers */

	/* is this a norm32 with a regular index? */
	private static boolean isNorm32Regular(long norm32) {
	return norm32<MIN_SPECIAL;
	}

	/* is this a norm32 with a special index for a lead surrogate? */
	private static boolean isNorm32LeadSurrogate(long norm32) {
	return MIN_SPECIAL<=norm32 && norm32<SURROGATES_TOP;
	}

	/* is this a norm32 with a special index for a Hangul syllable or a Jamo? */
	private static boolean isNorm32HangulOrJamo(long norm32) {
	return norm32>=MIN_HANGUL;
	}

	/*
	* Given norm32 for Jamo V or T,
	* is this a Jamo V?
	*/
	private static boolean isJamoVTNorm32JamoV(long norm32) {
	return norm32<JAMO_V_TOP;
	}

	/* data access primitives ----------------------------------------------- */

	public static long/unsigned/ getNorm32(char c) {
	return ((UNSIGNED_INT_MASK) & (NormTrieImpl.normTrie.getLeadValue(c)));
	}

	public static long/unsigned/ getNorm32FromSurrogatePair(long norm32,
	char c2) {
	/*
	* the surrogate index in norm32 stores only the number of the surrogate
	* index block see gennorm/store.c/getFoldedNormValue()
	*/
	return ((UNSIGNED_INT_MASK) &
	NormTrieImpl.normTrie.getTrailValue((int)norm32, c2));
	}
	///CLOVER:OFF
	private static long getNorm32(int c){
	return (UNSIGNED_INT_MASK&(NormTrieImpl.normTrie.getCodePointValue(c)));
	}

	/*
	* get a norm32 from text with complete code points
	* (like from decompositions)
	*/
	private static long/unsigned/ getNorm32(char[] p,int start,
	int/unsigned/ mask) {
	long/unsigned/ norm32= getNorm32(p[start]);
	if(((norm32&mask)>0) && isNorm32LeadSurrogate(norm32)) {
	/* p is a lead surrogate, get the real norm32 /
	norm32=getNorm32FromSurrogatePair(norm32, p[start+1]);
	}
	return norm32;
	}

	//// for StringPrep
	public static VersionInfo getUnicodeVersion(){
	return VersionInfo.getInstance(unicodeVersion[0], unicodeVersion[1],
	unicodeVersion[2], unicodeVersion[3]);
	}

	public static char getFCD16(char c) {
	return FCDTrieImpl.fcdTrie.getLeadValue(c);
	}

	public static char getFCD16FromSurrogatePair(char fcd16, char c2) {
	/* the surrogate index in fcd16 is an absolute offset over the
	* start of stage 1
	* */
	return FCDTrieImpl.fcdTrie.getTrailValue(fcd16, c2);
	}
	public static int getFCD16(int c) {
	return FCDTrieImpl.fcdTrie.getCodePointValue(c);
	}

	private static int getExtraDataIndex(long norm32) {
	return (int)(norm32>>EXTRA_SHIFT);
	}

	private static final class DecomposeArgs{
	int /unsigned byte/ cc;
	int /unsigned byte/ trailCC;
	int length;
	}
	/**
	*
	* get the canonical or compatibility decomposition for one character
	*
	* @return index into the extraData array
	*/
	private static int/index/ decompose(long/unsigned/ norm32,
	int/unsigned/ qcMask,
	DecomposeArgs args) {
	int p= getExtraDataIndex(norm32);
	args.length=extraData[p++];

	if((norm32&qcMask&QC_NFKD)!=0 && args.length>=0x100) {
	/* use compatibility decomposition, skip canonical data */
	p+=((args.length>>7)&1)+(args.length&DECOMP_LENGTH_MASK);
	args.length>>=8;
	}

	if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) {
	/* get the lead and trail cc's */
	char bothCCs=extraData[p++];
	args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8);
	args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs;
	} else {
	/* lead and trail cc's are both 0 */
	args.cc=args.trailCC=0;
	}

	args.length&=DECOMP_LENGTH_MASK;
	return p;
	}


	/**
	* get the canonical decomposition for one character
	* @return index into the extraData array
	*/
	private static int decompose(long/unsigned/ norm32,
	DecomposeArgs args) {

	int p= getExtraDataIndex(norm32);
	args.length=extraData[p++];

	if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) {
	/* get the lead and trail cc's */
	char bothCCs=extraData[p++];
	args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8);
	args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs;
	} else {
	/* lead and trail cc's are both 0 */
	args.cc=args.trailCC=0;
	}

	args.length&=DECOMP_LENGTH_MASK;
	return p;
	}


	private static final class NextCCArgs{
	char[] source;
	int next;
	int limit;
	char c;
	char c2;
	}

	/*
	* get the combining class of (c, c2)= args.source[args.next++]
	* before: args.next<args.limit after: args.next<=args.limit
	* if only one code unit is used, then c2==0
	*/
	private static int /unsigned byte/ getNextCC(NextCCArgs args) {
	long /unsigned/ norm32;

	args.c=args.source[args.next++];

	norm32= getNorm32(args.c);
	if((norm32 & CC_MASK)==0) {
	args.c2=0;
	return 0;
	} else {
	if(!isNorm32LeadSurrogate(norm32)) {
	args.c2=0;
	} else {
	/* c is a lead surrogate, get the real norm32 */
	if(args.next!=args.limit &&
	UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
	++args.next;
	norm32=getNorm32FromSurrogatePair(norm32, args.c2);
	} else {
	args.c2=0;
	return 0;
	}
	}

	return (int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT));
	}
	}

	private static final class PrevArgs{
	char[] src;
	int start;
	int current;
	char c;
	char c2;
	}

	/*
	* read backwards and get norm32
	* return 0 if the character is <minC
	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
	* surrogate but read second!)
	*/
	private static long /unsigned/ getPrevNorm32(PrevArgs args,
	int/unsigned/ minC,
	int/unsigned/ mask) {
	long/unsigned/ norm32;

	args.c=args.src[--args.current];
	args.c2=0;

	/* check for a surrogate before getting norm32 to see if we need to
	* predecrement further
	*/
	if(args.c<minC) {
	return 0;
	} else if(!UTF16.isSurrogate(args.c)) {
	return getNorm32(args.c);
	} else if(UTF16.isLeadSurrogate(args.c)) {
	/* unpaired first surrogate */
	return 0;
	} else if(args.current!=args.start &&
	UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) {
	--args.current;
	norm32=getNorm32(args.c2);

	if((norm32&mask)==0) {
	/* all surrogate pairs with this lead surrogate have
	* only irrelevant data
	*/
	return 0;
	} else {
	/* norm32 must be a surrogate special */
	return getNorm32FromSurrogatePair(norm32, args.c);
	}
	} else {
	/* unpaired second surrogate */
	args.c2=0;
	return 0;
	}
	}

	/*
	* get the combining class of (c, c2)=*--p
	* before: start<p after: start<=p
	*/
	private static int /unsigned byte/ getPrevCC(PrevArgs args) {

	return (int)((UNSIGNED_BYTE_MASK)&(getPrevNorm32(args, MIN_WITH_LEAD_CC,
	CC_MASK)>>CC_SHIFT));
	}

	/*
	* is this a safe boundary character for NF*D?
	* (lead cc==0)
	*/
	public static boolean isNFDSafe(long/unsigned/ norm32,
	int/unsigned/ccOrQCMask,
	int/unsigned/ decompQCMask) {
	if((norm32&ccOrQCMask)==0) {
	return true; /* cc==0 and no decomposition: this is NFD safe /
	}

	/* inspect its decomposition - maybe a Hangul but not a surrogate here*/
	if(isNorm32Regular(norm32) && (norm32&decompQCMask)!=0) {
	DecomposeArgs args=new DecomposeArgs();
	/* decomposes, get everything from the variable-length extra data */
	decompose(norm32, decompQCMask, args);
	return args.cc==0;
	} else {
	/* no decomposition (or Hangul), test the cc directly */
	return (norm32&CC_MASK)==0;
	}
	}

	/*
	* is this (or does its decomposition begin with) a "true starter"?
	* (cc==0 and NF*C_YES)
	*/
	public static boolean isTrueStarter(long/unsigned/ norm32,
	int/unsigned/ ccOrQCMask,
	int/unsigned/ decompQCMask) {
	if((norm32&ccOrQCMask)==0) {
	return true; /* this is a true starter (could be Hangul or Jamo L)*/
	}

	/* inspect its decomposition - not a Hangul or a surrogate here */
	if((norm32&decompQCMask)!=0) {
	int p; /* index into extra data array */
	DecomposeArgs args=new DecomposeArgs();
	/* decomposes, get everything from the variable-length extra data */
	p=decompose(norm32, decompQCMask, args);

	if(args.cc==0) {
	int/unsigned/ qcMask=ccOrQCMask&QC_MASK;

	/* does it begin with NFC_YES? */
	if((getNorm32(extraData,p, qcMask)&qcMask)==0) {
	/* yes, the decomposition begins with a true starter */
	return true;
	}
	}
	}
	return false;
	}

	/* reorder UTF-16 in-place ---------------------------------------------- */

	/**
	* simpler, single-character version of mergeOrdered() -
	* bubble-insert one single code point into the preceding string
	* which is already canonically ordered
	* (c, c2) may or may not yet have been inserted at src[current]..src[p]
	*
	* it must be p=current+lengthof(c, c2) i.e. p=current+(c2==0 ? 1 : 2)
	*
	* before: src[start]..src[current] is already ordered, and
	* src[current]..src[p] may or may not hold (c, c2) but
	* must be exactly the same length as (c, c2)
	* after: src[start]..src[p] is ordered
	*
	* @return the trailing combining class
	*/
	private static int/unsigned byte/ insertOrdered(char[] source,
	int start,
	int current, int p,
	char c, char c2,
	int/unsigned byte/ cc) {
	int back, preBack;
	int r;
	int prevCC, trailCC=cc;

	if(start<current && cc!=0) {
	// search for the insertion point where cc>=prevCC
	preBack=back=current;
	PrevArgs prevArgs = new PrevArgs();
	prevArgs.current = current;
	prevArgs.start = start;
	prevArgs.src = source;
	// get the prevCC
	prevCC=getPrevCC(prevArgs);
	preBack = prevArgs.current;

	if(cc<prevCC) {
	// this will be the last code point, so keep its cc
	trailCC=prevCC;
	back=preBack;
	while(start<preBack) {
	prevCC=getPrevCC(prevArgs);
	preBack=prevArgs.current;
	if(cc>=prevCC) {
	break;
	}
	back=preBack;
	}


	// this is where we are right now with all these indicies:
	// [start]..[pPreBack] 0..? code points that we can ignore
	// [pPreBack]..[pBack] 0..1 code points with prevCC<=cc
	// [pBack]..[current] 0..n code points with >cc, move up to insert (c, c2)
	// [current]..[p] 1 code point (c, c2) with cc

	// move the code units in between up
	r=p;
	do {
	source[--r]=source[--current];
	} while(back!=current);
	}
	}

	// insert (c, c2)
	source[current]=c;
	if(c2!=0) {
	source[(current+1)]=c2;
	}

	// we know the cc of the last code point
	return trailCC;
	}

	/**
	* merge two UTF-16 string parts together
	* to canonically order (order by combining classes) their concatenation
	*
	* the two strings may already be adjacent, so that the merging is done
	* in-place if the two strings are not adjacent, then the buffer holding the
	* first one must be large enough
	* the second string may or may not be ordered in itself
	*
	* before: [start]..[current] is already ordered, and
	* [next]..[limit] may be ordered in itself, but
	* is not in relation to [start..current[
	* after: [start..current+(limit-next)[ is ordered
	*
	* the algorithm is a simple bubble-sort that takes the characters from
	* src[next++] and inserts them in correct combining class order into the
	* preceding part of the string
	*
	* since this function is called much less often than the single-code point
	* insertOrdered(), it just uses that for easier maintenance
	*
	* @return the trailing combining class
	*/
	private static int /unsigned byte/ mergeOrdered(char[] source,
	int start,
	int current,
	char[] data,
	int next,
	int limit,
	boolean isOrdered) {
	int r;
	int /unsigned byte/ cc, trailCC=0;
	boolean adjacent;

	adjacent= current==next;
	NextCCArgs ncArgs = new NextCCArgs();
	ncArgs.source = data;
	ncArgs.next = next;
	ncArgs.limit = limit;

	if(start!=current \|\| !isOrdered) {

	while(ncArgs.next<ncArgs.limit) {
	cc=getNextCC(ncArgs);
	if(cc==0) {
	// does not bubble back
	trailCC=0;
	if(adjacent) {
	current=ncArgs.next;
	} else {
	data[current++]=ncArgs.c;
	if(ncArgs.c2!=0) {
	data[current++]=ncArgs.c2;
	}
	}
	if(isOrdered) {
	break;
	} else {
	start=current;
	}
	} else {
	r=current+(ncArgs.c2==0 ? 1 : 2);
	trailCC=insertOrdered(source,start, current, r,
	ncArgs.c, ncArgs.c2, cc);
	current=r;
	}
	}
	}

	if(ncArgs.next==ncArgs.limit) {
	// we know the cc of the last code point
	return trailCC;
	} else {
	if(!adjacent) {
	// copy the second string part
	do {
	source[current++]=data[ncArgs.next++];
	} while(ncArgs.next!=ncArgs.limit);
	ncArgs.limit=current;
	}
	PrevArgs prevArgs = new PrevArgs();
	prevArgs.src = data;
	prevArgs.start = start;
	prevArgs.current = ncArgs.limit;
	return getPrevCC(prevArgs);
	}

	}
	private static int /unsigned byte/ mergeOrdered(char[] source,
	int start,
	int current,
	char[] data,
	final int next,
	final int limit) {
	return mergeOrdered(source,start,current,data,next,limit,true);
	}

	public static NormalizerBase.QuickCheckResult quickCheck(char[] src,
	int srcStart,
	int srcLimit,
	int minNoMaybe,
	int qcMask,
	int options,
	boolean allowMaybe,
	UnicodeSet nx){

	int ccOrQCMask;
	long norm32;
	char c, c2;
	char cc, prevCC;
	long qcNorm32;
	NormalizerBase.QuickCheckResult result;
	ComposePartArgs args = new ComposePartArgs();
	char[] buffer ;
	int start = srcStart;

	if(!isDataLoaded) {
	return NormalizerBase.MAYBE;
	}
	// initialize
	ccOrQCMask=CC_MASK\|qcMask;
	result=NormalizerBase.YES;
	prevCC=0;

	for(;;) {
	for(;;) {
	if(srcStart==srcLimit) {
	return result;
	} else if((c=src[srcStart++])>=minNoMaybe &&
	(( norm32=getNorm32(c)) & ccOrQCMask)!=0) {
	break;
	}
	prevCC=0;
	}


	// check one above-minimum, relevant code unit
	if(isNorm32LeadSurrogate(norm32)) {
	// c is a lead surrogate, get the real norm32
	if(srcStart!=srcLimit&& UTF16.isTrailSurrogate(c2=src[srcStart])) {
	++srcStart;
	norm32=getNorm32FromSurrogatePair(norm32,c2);
	} else {
	norm32=0;
	c2=0;
	}
	}else{
	c2=0;
	}
	if(nx_contains(nx, c, c2)) {
	/* excluded: norm32==0 */
	norm32=0;
	}

	// check the combining order
	cc=(char)((norm32>>CC_SHIFT)&0xFF);
	if(cc!=0 && cc<prevCC) {
	return NormalizerBase.NO;
	}
	prevCC=cc;

	// check for "no" or "maybe" quick check flags
	qcNorm32 = norm32 & qcMask;
	if((qcNorm32& QC_ANY_NO)>=1) {
	result= NormalizerBase.NO;
	break;
	} else if(qcNorm32!=0) {
	// "maybe" can only occur for NFC and NFKC
	if(allowMaybe){
	result=NormalizerBase.MAYBE;
	}else{
	// normalize a section around here to see if it is really
	// normalized or not
	int prevStarter;
	int/unsigned/ decompQCMask;

	decompQCMask=(qcMask<<2)&0xf; // decomposition quick check mask

	// find the previous starter

	// set prevStarter to the beginning of the current character
	prevStarter=srcStart-1;
	if(UTF16.isTrailSurrogate(src[prevStarter])) {
	// safe because unpaired surrogates do not result
	// in "maybe"
	--prevStarter;
	}
	prevStarter=findPreviousStarter(src, start, prevStarter,
	ccOrQCMask, decompQCMask,
	(char)minNoMaybe);

	// find the next true starter in [src..limit[ - modifies
	// src to point to the next starter
	srcStart=findNextStarter(src,srcStart, srcLimit, qcMask,
	decompQCMask,(char) minNoMaybe);

	//set the args for compose part
	args.prevCC = prevCC;

	// decompose and recompose [prevStarter..src[
	buffer = composePart(args,prevStarter,src,srcStart,srcLimit,options,nx);

	// compare the normalized version with the original
	if(0!=strCompare(buffer,0,args.length,src,prevStarter,srcStart, false)) {
	result=NormalizerBase.NO; // normalization differs
	break;
	}

	// continue after the next starter
	}
	}
	}
	return result;
	}


	//------------------------------------------------------
	// make NFD & NFKD
	//------------------------------------------------------

	public static int decompose(char[] src,int srcStart,int srcLimit,
	char[] dest,int destStart,int destLimit,
	boolean compat,int[] outTrailCC,
	UnicodeSet nx) {

	char[] buffer = new char[3];
	int prevSrc;
	long norm32;
	int ccOrQCMask, qcMask;
	int reorderStartIndex, length;
	char c, c2, minNoMaybe;
	int/unsigned byte/ cc, prevCC, trailCC;
	char[] p;
	int pStart;
	int destIndex = destStart;
	int srcIndex = srcStart;
	if(!compat) {
	minNoMaybe=(char)indexes[INDEX_MIN_NFD_NO_MAYBE];
	qcMask=QC_NFD;
	} else {
	minNoMaybe=(char)indexes[INDEX_MIN_NFKD_NO_MAYBE];
	qcMask=QC_NFKD;
	}

	/* initialize */
	ccOrQCMask=CC_MASK\|qcMask;
	reorderStartIndex=0;
	prevCC=0;
	norm32=0;
	c=0;
	pStart=0;

	cc=trailCC=-1;//initialize to bogus value

	for(;;) {
	/* count code units below the minimum or with irrelevant data for
	* the quick check
	*/
	prevSrc=srcIndex;

	while(srcIndex!=srcLimit &&((c=src[srcIndex])<minNoMaybe \|\|
	((norm32=getNorm32(c))&ccOrQCMask)==0)){
	prevCC=0;
	++srcIndex;
	}

	/* copy these code units all at once */
	if(srcIndex!=prevSrc) {
	length=srcIndex-prevSrc;
	if((destIndex+length)<=destLimit) {
	System.arraycopy(src,prevSrc,dest,destIndex,length);
	}

	destIndex+=length;
	reorderStartIndex=destIndex;
	}

	/* end of source reached? */
	if(srcIndex==srcLimit) {
	break;
	}

	/* c already contains src and norm32 is set for it, increment src/
	++srcIndex;

	/* check one above-minimum, relevant code unit */
	/*
	* generally, set p and length to the decomposition string
	* in simple cases, p==NULL and (c, c2) will hold the length code
	* units to append in all cases, set cc to the lead and trailCC to
	* the trail combining class
	*
	* the following merge-sort of the current character into the
	* preceding, canonically ordered result text will use the
	* optimized insertOrdered()
	* if there is only one single code point to process;
	* this is indicated with p==NULL, and (c, c2) is the character to
	* insert
	* ((c, 0) for a BMP character and (lead surrogate, trail surrogate)
	* for a supplementary character)
	* otherwise, p[length] is merged in with _mergeOrdered()
	*/
	if(isNorm32HangulOrJamo(norm32)) {
	if(nx_contains(nx, c)) {
	c2=0;
	p=null;
	length=1;
	} else {
	// Hangul syllable: decompose algorithmically
	p=buffer;
	pStart=0;
	cc=trailCC=0;

	c-=HANGUL_BASE;

	c2=(char)(c%JAMO_T_COUNT);
	c/=JAMO_T_COUNT;
	if(c2>0) {
	buffer[2]=(char)(JAMO_T_BASE+c2);
	length=3;
	} else {
	length=2;
	}

	buffer[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
	buffer[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
	}
	} else {
	if(isNorm32Regular(norm32)) {
	c2=0;
	length=1;
	} else {
	// c is a lead surrogate, get the real norm32
	if(srcIndex!=srcLimit &&
	UTF16.isTrailSurrogate(c2=src[srcIndex])) {
	++srcIndex;
	length=2;
	norm32=getNorm32FromSurrogatePair(norm32, c2);
	} else {
	c2=0;
	length=1;
	norm32=0;
	}
	}

	/* get the decomposition and the lead and trail cc's */
	if(nx_contains(nx, c, c2)) {
	/* excluded: norm32==0 */
	cc=trailCC=0;
	p=null;
	} else if((norm32&qcMask)==0) {
	/* c does not decompose */
	cc=trailCC=(int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT));
	p=null;
	pStart=-1;
	} else {
	DecomposeArgs arg = new DecomposeArgs();
	/* c decomposes, get everything from the variable-length
	* extra data
	*/
	pStart=decompose(norm32, qcMask, arg);
	p=extraData;
	length=arg.length;
	cc=arg.cc;
	trailCC=arg.trailCC;
	if(length==1) {
	/* fastpath a single code unit from decomposition */
	c=p[pStart];
	c2=0;
	p=null;
	pStart=-1;
	}
	}
	}

	/* append the decomposition to the destination buffer, assume
	* length>0
	*/
	if((destIndex+length)<=destLimit) {
	int reorderSplit=destIndex;
	if(p==null) {
	/* fastpath: single code point */
	if(cc!=0 && cc<prevCC) {
	/* (c, c2) is out of order with respect to the preceding
	* text
	*/
	destIndex+=length;
	trailCC=insertOrdered(dest,reorderStartIndex,
	reorderSplit, destIndex, c, c2, cc);
	} else {
	/* just append (c, c2) */
	dest[destIndex++]=c;
	if(c2!=0) {
	dest[destIndex++]=c2;
	}
	}
	} else {
	/* general: multiple code points (ordered by themselves)
	* from decomposition
	*/
	if(cc!=0 && cc<prevCC) {
	/* the decomposition is out of order with respect to the
	* preceding text
	*/
	destIndex+=length;
	trailCC=mergeOrdered(dest,reorderStartIndex,
	reorderSplit,p, pStart,pStart+length);
	} else {
	/* just append the decomposition */
	do {
	dest[destIndex++]=p[pStart++];
	} while(--length>0);
	}
	}
	} else {
	/* buffer overflow */
	/* keep incrementing the destIndex for preflighting */
	destIndex+=length;
	}

	prevCC=trailCC;
	if(prevCC==0) {
	reorderStartIndex=destIndex;
	}
	}

	outTrailCC[0]=prevCC;

	return destIndex - destStart;
	}

	/* make NFC & NFKC ------------------------------------------------------ */
	private static final class NextCombiningArgs{
	char[] source;
	int start;
	//int limit;
	char c;
	char c2;
	int/unsigned/ combiningIndex;
	char /unsigned byte/ cc;
	}

	/* get the composition properties of the next character */
	private static int /unsigned/ getNextCombining(NextCombiningArgs args,
	int limit,
	UnicodeSet nx) {
	long/unsigned/ norm32;
	int combineFlags;
	/* get properties */
	args.c=args.source[args.start++];
	norm32=getNorm32(args.c);

	/* preset output values for most characters */
	args.c2=0;
	args.combiningIndex=0;
	args.cc=0;

	if((norm32&(CC_MASK\|COMBINES_ANY))==0) {
	return 0;
	} else {
	if(isNorm32Regular(norm32)) {
	/* set cc etc. below */
	} else if(isNorm32HangulOrJamo(norm32)) {
	/* a compatibility decomposition contained Jamos */
	args.combiningIndex=(int)((UNSIGNED_INT_MASK)&(0xfff0\|
	(norm32>>EXTRA_SHIFT)));
	return (int)(norm32&COMBINES_ANY);
	} else {
	/* c is a lead surrogate, get the real norm32 */
	if(args.start!=limit && UTF16.isTrailSurrogate(args.c2=
	args.source[args.start])) {
	++args.start;
	norm32=getNorm32FromSurrogatePair(norm32, args.c2);
	} else {
	args.c2=0;
	return 0;
	}
	}

	if(nx_contains(nx, args.c, args.c2)) {
	return 0; /* excluded: norm32==0 */
	}

	args.cc= (char)((norm32>>CC_SHIFT)&0xff);

	combineFlags=(int)(norm32&COMBINES_ANY);
	if(combineFlags!=0) {
	int index = getExtraDataIndex(norm32);
	args.combiningIndex=index>0 ? extraData[(index-1)] :0;
	}

	return combineFlags;
	}
	}

	/*
	* given a composition-result starter (c, c2) - which means its cc==0,
	* it combines forward, it has extra data, its norm32!=0,
	* it is not a Hangul or Jamo,
	* get just its combineFwdIndex
	*
	* norm32(c) is special if and only if c2!=0
	*/
	private static int/unsigned/ getCombiningIndexFromStarter(char c,char c2){
	long/unsigned/ norm32;

	norm32=getNorm32(c);
	if(c2!=0) {
	norm32=getNorm32FromSurrogatePair(norm32, c2);
	}
	return extraData[(getExtraDataIndex(norm32)-1)];
	}

	/*
	* Find the recomposition result for
	* a forward-combining character
	* (specified with a pointer to its part of the combiningTable[])
	* and a backward-combining character
	* (specified with its combineBackIndex).
	*
	* If these two characters combine, then set (value, value2)
	* with the code unit(s) of the composition character.
	*
	* Return value:
	* 0 do not combine
	* 1 combine
	* >1 combine, and the composition is a forward-combining starter
	*
	* See unormimp.h for a description of the composition table format.
	*/
	private static int/unsigned/ combine(char[]table,int tableStart,
	int/unsinged/ combineBackIndex,
	int[] outValues) {
	int/unsigned/ key;
	int value,value2;

	if(outValues.length<2){
	throw new IllegalArgumentException();
	}

	/* search in the starter's composition table */
	for(;;) {
	key=table[tableStart++];
	if(key>=combineBackIndex) {
	break;
	}
	tableStart+= ((table[tableStart]&0x8000) != 0)? 2 : 1;
	}

	/* mask off bit 15, the last-entry-in-the-list flag */
	if((key&0x7fff)==combineBackIndex) {
	/* found! combine! */
	value=table[tableStart];

	/* is the composition a starter that combines forward? */
	key=(int)((UNSIGNED_INT_MASK)&((value&0x2000)+1));

	/* get the composition result code point from the variable-length
	* result value
	*/
	if((value&0x8000) != 0) {
	if((value&0x4000) != 0) {
	/* surrogate pair composition result */
	value=(int)((UNSIGNED_INT_MASK)&((value&0x3ff)\|0xd800));
	value2=table[tableStart+1];
	} else {
	/* BMP composition result U+2000..U+ffff */
	value=table[tableStart+1];
	value2=0;
	}
	} else {
	/* BMP composition result U+0000..U+1fff */
	value&=0x1fff;
	value2=0;
	}
	outValues[0]=value;
	outValues[1]=value2;
	return key;
	} else {
	/* not found */
	return 0;
	}
	}


	private static final class RecomposeArgs{
	char[] source;
	int start;
	int limit;
	}
	/*
	* recompose the characters in [p..limit[
	* (which is in NFD - decomposed and canonically ordered),
	* adjust limit, and return the trailing cc
	*
	* since for NFKC we may get Jamos in decompositions, we need to
	* recompose those too
	*
	* note that recomposition never lengthens the text:
	* any character consists of either one or two code units;
	* a composition may contain at most one more code unit than the original
	* starter, while the combining mark that is removed has at least one code
	* unit
	*/
	private static char/unsigned byte/ recompose(RecomposeArgs args, int options, UnicodeSet nx) {
	int remove, q, r;
	int /unsigned/ combineFlags;
	int /unsigned/ combineFwdIndex, combineBackIndex;
	int /unsigned/ result, value=0, value2=0;
	int /unsigned byte/ prevCC;
	boolean starterIsSupplementary;
	int starter;
	int[] outValues = new int[2];
	starter=-1; /* no starter */
	combineFwdIndex=0; /* will not be used until starter!=NULL */
	starterIsSupplementary=false; /* will not be used until starter!=NULL */
	prevCC=0;

	NextCombiningArgs ncArg = new NextCombiningArgs();
	ncArg.source = args.source;

	ncArg.cc =0;
	ncArg.c2 =0;

	for(;;) {
	ncArg.start = args.start;
	combineFlags=getNextCombining(ncArg,args.limit,nx);
	combineBackIndex=ncArg.combiningIndex;
	args.start = ncArg.start;

	if(((combineFlags&COMBINES_BACK)!=0) && starter!=-1) {
	if((combineBackIndex&0x8000)!=0) {
	/* c is a Jamo V/T, see if we can compose it with the
	* previous character
	*/
	/* for the PRI #29 fix, check that there is no intervening combining mark */
	if((options&BEFORE_PRI_29)!=0 \|\| prevCC==0) {
	remove=-1; /* NULL while no Hangul composition */
	combineFlags=0;
	ncArg.c2=args.source[starter];
	if(combineBackIndex==0xfff2) {
	/* Jamo V, compose with previous Jamo L and following
	* Jamo T
	*/
	ncArg.c2=(char)(ncArg.c2-JAMO_L_BASE);
	if(ncArg.c2<JAMO_L_COUNT) {
	remove=args.start-1;
	ncArg.c=(char)(HANGUL_BASE+(ncArg.c2*JAMO_V_COUNT+
	(ncArg.c-JAMO_V_BASE))*JAMO_T_COUNT);
	if(args.start!=args.limit &&
	(ncArg.c2=(char)(args.source[args.start]
	-JAMO_T_BASE))<JAMO_T_COUNT) {
	++args.start;
	ncArg.c+=ncArg.c2;
	} else {
	/* the result is an LV syllable, which is a starter (unlike LVT) */
	combineFlags=COMBINES_FWD;
	}
	if(!nx_contains(nx, ncArg.c)) {
	args.source[starter]=ncArg.c;
	} else {
	/* excluded */
	if(!isHangulWithoutJamoT(ncArg.c)) {
	--args.start; /* undo the ++args.start from reading the Jamo T */
	}
	/* c is modified but not used any more -- c=(p-1); -- re-read the Jamo V/T /
	remove=args.start;
	}
	}

	/*
	* Normally, the following can not occur:
	* Since the input is in NFD, there are no Hangul LV syllables that
	* a Jamo T could combine with.
	* All Jamo Ts are combined above when handling Jamo Vs.
	*
	* However, before the PRI #29 fix, this can occur due to
	* an intervening combining mark between the Hangul LV and the Jamo T.
	*/
	} else {
	/* Jamo T, compose with previous Hangul that does not have a Jamo T */
	if(isHangulWithoutJamoT(ncArg.c2)) {
	ncArg.c2+=ncArg.c-JAMO_T_BASE;
	if(!nx_contains(nx, ncArg.c2)) {
	remove=args.start-1;
	args.source[starter]=ncArg.c2;
	}
	}
	}

	if(remove!=-1) {
	/* remove the Jamo(s) */
	q=remove;
	r=args.start;
	while(r<args.limit) {
	args.source[q++]=args.source[r++];
	}
	args.start=remove;
	args.limit=q;
	}

	ncArg.c2=0; /* c2 held starter temporarily /

	if(combineFlags!=0) {
	/*
	* not starter=NULL because the composition is a Hangul LV syllable
	* and might combine once more (but only before the PRI #29 fix)
	*/

	/* done? */
	if(args.start==args.limit) {
	return (char)prevCC;
	}

	/* the composition is a Hangul LV syllable which is a starter that combines forward */
	combineFwdIndex=0xfff0;

	/* we combined; continue with looking for compositions */
	continue;
	}
	}

	/*
	* now: cc==0 and the combining index does not include
	* "forward" -> the rest of the loop body will reset starter
	* to NULL; technically, a composed Hangul syllable is a
	* starter, but it does not combine forward now that we have
	* consumed all eligible Jamos; for Jamo V/T, combineFlags
	* does not contain _NORM_COMBINES_FWD
	*/

	} else if(
	/* the starter is not a Hangul LV or Jamo V/T and */
	!((combineFwdIndex&0x8000)!=0) &&
	/* the combining mark is not blocked and */
	((options&BEFORE_PRI_29)!=0 ?
	(prevCC!=ncArg.cc \|\| prevCC==0) :
	(prevCC<ncArg.cc \|\| prevCC==0)) &&
	/* the starter and the combining mark (c, c2) do combine */
	0!=(result=combine(combiningTable,combineFwdIndex,
	combineBackIndex, outValues)) &&
	/* the composition result is not excluded */
	!nx_contains(nx, (char)value, (char)value2)
	) {
	value=outValues[0];
	value2=outValues[1];
	/* replace the starter with the composition, remove the
	* combining mark
	*/
	remove= ncArg.c2==0 ? args.start-1 : args.start-2; /* index to the combining mark */

	/* replace the starter with the composition */
	args.source[starter]=(char)value;
	if(starterIsSupplementary) {
	if(value2!=0) {
	/* both are supplementary */
	args.source[starter+1]=(char)value2;
	} else {
	/* the composition is shorter than the starter,
	* move the intermediate characters forward one */
	starterIsSupplementary=false;
	q=starter+1;
	r=q+1;
	while(r<remove) {
	args.source[q++]=args.source[r++];
	}
	--remove;
	}
	} else if(value2!=0) { // for U+1109A, U+1109C, and U+110AB
	starterIsSupplementary=true;
	args.source[starter+1]=(char)value2;
	/* } else { both are on the BMP, nothing more to do */
	}

	/* remove the combining mark by moving the following text
	* over it */
	if(remove<args.start) {
	q=remove;
	r=args.start;
	while(r<args.limit) {
	args.source[q++]=args.source[r++];
	}
	args.start=remove;
	args.limit=q;
	}

	/* keep prevCC because we removed the combining mark */

	/* done? */
	if(args.start==args.limit) {
	return (char)prevCC;
	}

	/* is the composition a starter that combines forward? */
	if(result>1) {
	combineFwdIndex=getCombiningIndexFromStarter((char)value,
	(char)value2);
	} else {
	starter=-1;
	}

	/* we combined; continue with looking for compositions */
	continue;
	}
	}

	/* no combination this time */
	prevCC=ncArg.cc;
	if(args.start==args.limit) {
	return (char)prevCC;
	}

	/* if (c, c2) did not combine, then check if it is a starter */
	if(ncArg.cc==0) {
	/* found a new starter; combineFlags==0 if (c, c2) is excluded */
	if((combineFlags&COMBINES_FWD)!=0) {
	/* it may combine with something, prepare for it */
	if(ncArg.c2==0) {
	starterIsSupplementary=false;
	starter=args.start-1;
	} else {
	starterIsSupplementary=false;
	starter=args.start-2;
	}
	combineFwdIndex=combineBackIndex;
	} else {
	/* it will not combine with anything */
	starter=-1;
	}
	} else if((options&OPTIONS_COMPOSE_CONTIGUOUS)!=0) {
	/* FCC: no discontiguous compositions; any intervening character blocks */
	starter=-1;
	}
	}
	}

	// find the last true starter between src[start]....src[current] going
	// backwards and return its index
	private static int findPreviousStarter(char[]src, int srcStart, int current,
	int/unsigned/ ccOrQCMask,
	int/unsigned/ decompQCMask,
	char minNoMaybe) {
	long norm32;
	PrevArgs args = new PrevArgs();
	args.src = src;
	args.start = srcStart;
	args.current = current;

	while(args.start<args.current) {
	norm32= getPrevNorm32(args, minNoMaybe, ccOrQCMask\|decompQCMask);
	if(isTrueStarter(norm32, ccOrQCMask, decompQCMask)) {
	break;
	}
	}
	return args.current;
	}

	/* find the first true starter in [src..limit[ and return the
	* pointer to it
	*/
	private static int/index/ findNextStarter(char[] src,int start,int limit,
	int/unsigned/ qcMask,
	int/unsigned/ decompQCMask,
	char minNoMaybe) {
	int p;
	long/unsigned/ norm32;
	int ccOrQCMask;
	char c, c2;

	ccOrQCMask=CC_MASK\|qcMask;

	DecomposeArgs decompArgs = new DecomposeArgs();

	for(;;) {
	if(start==limit) {
	break; /* end of string */
	}
	c=src[start];
	if(c<minNoMaybe) {
	break; /* catches NUL terminater, too */
	}

	norm32=getNorm32(c);
	if((norm32&ccOrQCMask)==0) {
	break; /* true starter */
	}

	if(isNorm32LeadSurrogate(norm32)) {
	/* c is a lead surrogate, get the real norm32 */
	if((start+1)==limit \|\|
	!UTF16.isTrailSurrogate(c2=(src[start+1]))){
	/* unmatched first surrogate: counts as a true starter */
	break;
	}
	norm32=getNorm32FromSurrogatePair(norm32, c2);

	if((norm32&ccOrQCMask)==0) {
	break; /* true starter */
	}
	} else {
	c2=0;
	}

	/* (c, c2) is not a true starter but its decomposition may be */
	if((norm32&decompQCMask)!=0) {
	/* (c, c2) decomposes, get everything from the variable-length
	* extra data */
	p=decompose(norm32, decompQCMask, decompArgs);

	/* get the first character's norm32 to check if it is a true
	* starter */
	if(decompArgs.cc==0 && (getNorm32(extraData,p, qcMask)&qcMask)==0) {
	break; /* true starter */
	}
	}

	start+= c2==0 ? 1 : 2; /* not a true starter, continue */
	}

	return start;
	}


	private static final class ComposePartArgs{
	int prevCC;
	int length; /* length of decomposed part */
	}

	/* decompose and recompose [prevStarter..src[ */
	private static char[] composePart(ComposePartArgs args,
	int prevStarter,
	char[] src, int start, int limit,
	int options,
	UnicodeSet nx) {
	int recomposeLimit;
	boolean compat =((options&OPTIONS_COMPAT)!=0);

	/* decompose [prevStarter..src[ */
	int[] outTrailCC = new int[1];
	char[] buffer = new char[(limit-prevStarter)*MAX_BUFFER_SIZE];

	for(;;){
	args.length=decompose(src,prevStarter,(start),
	buffer,0,buffer.length,
	compat,outTrailCC,nx);
	if(args.length<=buffer.length){
	break;
	}else{
	buffer = new char[args.length];
	}
	}

	/* recompose the decomposition */
	recomposeLimit=args.length;

	if(args.length>=2) {
	RecomposeArgs rcArgs = new RecomposeArgs();
	rcArgs.source = buffer;
	rcArgs.start = 0;
	rcArgs.limit = recomposeLimit;
	args.prevCC=recompose(rcArgs, options, nx);
	recomposeLimit = rcArgs.limit;
	}

	/* return with a pointer to the recomposition and its length */
	args.length=recomposeLimit;
	return buffer;
	}

	private static boolean composeHangul(char prev, char c,
	long/unsigned/ norm32,
	char[] src,int[] srcIndex, int limit,
	boolean compat,
	char[] dest,int destIndex,
	UnicodeSet nx) {
	int start=srcIndex[0];
	if(isJamoVTNorm32JamoV(norm32)) {
	/* c is a Jamo V, compose with previous Jamo L and
	* following Jamo T */
	prev=(char)(prev-JAMO_L_BASE);
	if(prev<JAMO_L_COUNT) {
	c=(char)(HANGUL_BASE+(prev*JAMO_V_COUNT+
	(c-JAMO_V_BASE))*JAMO_T_COUNT);

	/* check if the next character is a Jamo T (normal or
	* compatibility) */
	if(start!=limit) {
	char next, t;

	next=src[start];
	if((t=(char)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
	/* normal Jamo T */
	++start;
	c+=t;
	} else if(compat) {
	/* if NFKC, then check for compatibility Jamo T
	* (BMP only) */
	norm32=getNorm32(next);
	if(isNorm32Regular(norm32) && ((norm32&QC_NFKD)!=0)) {
	int p /index into extra data array/;
	DecomposeArgs dcArgs = new DecomposeArgs();
	p=decompose(norm32, QC_NFKD, dcArgs);
	if(dcArgs.length==1 &&
	(t=(char)(extraData[p]-JAMO_T_BASE))
	<JAMO_T_COUNT) {
	/* compatibility Jamo T */
	++start;
	c+=t;
	}
	}
	}
	}
	if(nx_contains(nx, c)) {
	if(!isHangulWithoutJamoT(c)) {
	--start; /* undo ++start from reading the Jamo T */
	}
	return false;
	}
	dest[destIndex]=c;
	srcIndex[0]=start;
	return true;
	}
	} else if(isHangulWithoutJamoT(prev)) {
	/* c is a Jamo T, compose with previous Hangul LV that does not
	* contain a Jamo T */
	c=(char)(prev+(c-JAMO_T_BASE));
	if(nx_contains(nx, c)) {
	return false;
	}
	dest[destIndex]=c;
	srcIndex[0]=start;
	return true;
	}
	return false;
	}
	/*
	public static int compose(char[] src, char[] dest,boolean compat, UnicodeSet nx){
	return compose(src,0,src.length,dest,0,dest.length,compat, nx);
	}
	*/

	public static int compose(char[] src, int srcStart, int srcLimit,
	char[] dest,int destStart,int destLimit,
	int options,UnicodeSet nx) {

	int prevSrc, prevStarter;
	long/unsigned/ norm32;
	int ccOrQCMask, qcMask;
	int reorderStartIndex, length;
	char c, c2, minNoMaybe;
	int/unsigned byte/ cc, prevCC;
	int[] ioIndex = new int[1];
	int destIndex = destStart;
	int srcIndex = srcStart;

	if((options&OPTIONS_COMPAT)!=0) {
	minNoMaybe=(char)indexes[INDEX_MIN_NFKC_NO_MAYBE];
	qcMask=QC_NFKC;
	} else {
	minNoMaybe=(char)indexes[INDEX_MIN_NFC_NO_MAYBE];
	qcMask=QC_NFC;
	}

	/*
	* prevStarter points to the last character before the current one
	* that is a "true" starter with cc==0 and quick check "yes".
	*
	* prevStarter will be used instead of looking for a true starter
	* while incrementally decomposing [prevStarter..prevSrc[
	* in _composePart(). Having a good prevStarter allows to just decompose
	* the entire [prevStarter..prevSrc[.
	*
	* When _composePart() backs out from prevSrc back to prevStarter,
	* then it also backs out destIndex by the same amount.
	* Therefore, at all times, the (prevSrc-prevStarter) source units
	* must correspond 1:1 to destination units counted with destIndex,
	* except for reordering.
	* This is true for the qc "yes" characters copied in the fast loop,
	* and for pure reordering.
	* prevStarter must be set forward to src when this is not true:
	* In _composePart() and after composing a Hangul syllable.
	*
	* This mechanism relies on the assumption that the decomposition of a
	* true starter also begins with a true starter. gennorm/store.c checks
	* for this.
	*/
	prevStarter=srcIndex;

	ccOrQCMask=CC_MASK\|qcMask;
	/destIndex=/reorderStartIndex=0;/* ####TODO#### check this **/
	prevCC=0;

	/* avoid compiler warnings */
	norm32=0;
	c=0;

	for(;;) {
	/* count code units below the minimum or with irrelevant data for
	* the quick check */
	prevSrc=srcIndex;

	while(srcIndex!=srcLimit && ((c=src[srcIndex])<minNoMaybe \|\|
	((norm32=getNorm32(c))&ccOrQCMask)==0)) {
	prevCC=0;
	++srcIndex;
	}


	/* copy these code units all at once */
	if(srcIndex!=prevSrc) {
	length=srcIndex-prevSrc;
	if((destIndex+length)<=destLimit) {
	System.arraycopy(src,prevSrc,dest,destIndex,length);
	}
	destIndex+=length;
	reorderStartIndex=destIndex;

	/* set prevStarter to the last character in the quick check
	* loop */
	prevStarter=srcIndex-1;
	if(UTF16.isTrailSurrogate(src[prevStarter]) &&
	prevSrc<prevStarter &&
	UTF16.isLeadSurrogate(src[(prevStarter-1)])) {
	--prevStarter;
	}

	prevSrc=srcIndex;
	}

	/* end of source reached? */
	if(srcIndex==srcLimit) {
	break;
	}

	/* c already contains src and norm32 is set for it, increment src/
	++srcIndex;

	/*
	* source buffer pointers:
	*
	* all done quick check current char not yet
	* "yes" but (c, c2) processed
	* may combine
	* forward
	* [-------------[-------------[-------------[-------------[
	* \| \| \| \| \|
	* start prevStarter prevSrc src limit
	*
	*
	* destination buffer pointers and indexes:
	*
	* all done might take not filled yet
	* characters for
	* reordering
	* [-------------[-------------[-------------[
	* \| \| \| \|
	* dest reorderStartIndex destIndex destCapacity
	*/

	/* check one above-minimum, relevant code unit */
	/*
	* norm32 is for c=*(src-1), and the quick check flag is "no" or
	* "maybe", and/or cc!=0
	* check for Jamo V/T, then for surrogates and regular characters
	* c is not a Hangul syllable or Jamo L because
	* they are not marked with no/maybe for NFC & NFKC(and their cc==0)
	*/
	if(isNorm32HangulOrJamo(norm32)) {
	/*
	* c is a Jamo V/T:
	* try to compose with the previous character, Jamo V also with
	* a following Jamo T, and set values here right now in case we
	* just continue with the main loop
	*/
	prevCC=cc=0;
	reorderStartIndex=destIndex;
	ioIndex[0]=srcIndex;
	if(
	destIndex>0 &&
	composeHangul(src[(prevSrc-1)], c, norm32,src, ioIndex,
	srcLimit, (options&OPTIONS_COMPAT)!=0, dest,
	destIndex<=destLimit ? destIndex-1: 0,
	nx)
	) {
	srcIndex=ioIndex[0];
	prevStarter=srcIndex;
	continue;
	}

	srcIndex = ioIndex[0];

	/* the Jamo V/T did not compose into a Hangul syllable, just
	* append to dest */
	c2=0;
	length=1;
	prevStarter=prevSrc;
	} else {
	if(isNorm32Regular(norm32)) {
	c2=0;
	length=1;
	} else {
	/* c is a lead surrogate, get the real norm32 */
	if(srcIndex!=srcLimit &&
	UTF16.isTrailSurrogate(c2=src[srcIndex])) {
	++srcIndex;
	length=2;
	norm32=getNorm32FromSurrogatePair(norm32, c2);
	} else {
	/* c is an unpaired lead surrogate, nothing to do */
	c2=0;
	length=1;
	norm32=0;
	}
	}
	ComposePartArgs args =new ComposePartArgs();

	/* we are looking at the character (c, c2) at [prevSrc..src[ */
	if(nx_contains(nx, c, c2)) {
	/* excluded: norm32==0 */
	cc=0;
	} else if((norm32&qcMask)==0) {
	cc=(int)((UNSIGNED_BYTE_MASK)&(norm32>>CC_SHIFT));
	} else {
	char[] p;

	/*
	* find appropriate boundaries around this character,
	* decompose the source text from between the boundaries,
	* and recompose it
	*
	* this puts the intermediate text into the side buffer because
	* it might be longer than the recomposition end result,
	* or the destination buffer may be too short or missing
	*
	* note that destIndex may be adjusted backwards to account
	* for source text that passed the quick check but needed to
	* take part in the recomposition
	*/
	int decompQCMask=(qcMask<<2)&0xf; /* decomposition quick check mask */
	/*
	* find the last true starter in [prevStarter..src[
	* it is either the decomposition of the current character (at prevSrc),
	* or prevStarter
	*/
	if(isTrueStarter(norm32, CC_MASK\|qcMask, decompQCMask)) {
	prevStarter=prevSrc;
	} else {
	/* adjust destIndex: back out what had been copied with qc "yes" */
	destIndex-=prevSrc-prevStarter;
	}

	/* find the next true starter in [src..limit[ */
	srcIndex=findNextStarter(src, srcIndex,srcLimit, qcMask,
	decompQCMask, minNoMaybe);
	//args.prevStarter = prevStarter;
	args.prevCC = prevCC;
	//args.destIndex = destIndex;
	args.length = length;
	p=composePart(args,prevStarter,src,srcIndex,srcLimit,options,nx);

	if(p==null) {
	/* an error occurred (out of memory) */
	break;
	}

	prevCC = args.prevCC;
	length = args.length;

	/* append the recomposed buffer contents to the destination
	* buffer */
	if((destIndex+args.length)<=destLimit) {
	int i=0;
	while(i<args.length) {
	dest[destIndex++]=p[i++];
	--length;
	}
	} else {
	/* buffer overflow */
	/* keep incrementing the destIndex for preflighting */
	destIndex+=length;
	}

	prevStarter=srcIndex;
	continue;
	}
	}

	/* append the single code point (c, c2) to the destination buffer */
	if((destIndex+length)<=destLimit) {
	if(cc!=0 && cc<prevCC) {
	/* (c, c2) is out of order with respect to the preceding
	* text */
	int reorderSplit= destIndex;
	destIndex+=length;
	prevCC=insertOrdered(dest,reorderStartIndex, reorderSplit,
	destIndex, c, c2, cc);
	} else {
	/* just append (c, c2) */
	dest[destIndex++]=c;
	if(c2!=0) {
	dest[destIndex++]=c2;
	}
	prevCC=cc;
	}
	} else {
	/* buffer overflow */
	/* keep incrementing the destIndex for preflighting */
	destIndex+=length;
	prevCC=cc;
	}
	}

	return destIndex - destStart;
	}

	public static int getCombiningClass(int c) {
	long norm32;
	norm32=getNorm32(c);
	return (int)((norm32>>CC_SHIFT)&0xFF);
	}

	public static boolean isFullCompositionExclusion(int c) {
	if(isFormatVersion_2_1) {
	int aux =AuxTrieImpl.auxTrie.getCodePointValue(c);
	return (aux & AUX_COMP_EX_MASK)!=0;
	} else {
	return false;
	}
	}

	public static boolean isCanonSafeStart(int c) {
	if(isFormatVersion_2_1) {
	int aux = AuxTrieImpl.auxTrie.getCodePointValue(c);
	return (aux & AUX_UNSAFE_MASK)==0;
	} else {
	return false;
	}
	}

	/* Is c an NF<mode>-skippable code point? See unormimp.h. */
	public static boolean isNFSkippable(int c, NormalizerBase.Mode mode, long mask) {
	long /unsigned int/ norm32;
	mask = mask & UNSIGNED_INT_MASK;
	char aux;

	/* check conditions (a)..(e), see unormimp.h */
	norm32 = getNorm32(c);

	if((norm32&mask)!=0) {
	return false; /* fails (a)..(e), not skippable */
	}

	if(mode == NormalizerBase.NFD \|\| mode == NormalizerBase.NFKD \|\| mode == NormalizerBase.NONE){
	return true; /* NFD, passed (a)..(c), is skippable /
	}
	/* check conditions (a)..(e), see unormimp.h */

	/* NFC/FCC, passed (a)..(e) /
	if((norm32& QC_NFD)==0) {
	return true; /* no canonical decomposition, is skippable */
	}

	/* check Hangul syllables algorithmically */
	if(isNorm32HangulOrJamo(norm32)) {
	/* Jamo passed (a)..(e) above, must be Hangul */
	return !isHangulWithoutJamoT((char)c); /* LVT are skippable, LV are not */
	}

	/* if(mode<=UNORM_NFKC) { -- enable when implementing FCC */
	/* NFC, test (f) flag /
	if(!isFormatVersion_2_2) {
	return false; /* no (f) data, say not skippable to be safe */
	}


	aux = AuxTrieImpl.auxTrie.getCodePointValue(c);
	return (aux&AUX_NFC_SKIP_F_MASK)==0; /* TRUE=skippable if the (f) flag is not set */

	/* } else { FCC, test fcd<=1 instead of the above } */
	}

	public static UnicodeSet addPropertyStarts(UnicodeSet set) {
	int c;

	/* add the start code point of each same-value range of each trie */
	//utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, set);
	TrieIterator normIter = new TrieIterator(NormTrieImpl.normTrie);
	RangeValueIterator.Element normResult = new RangeValueIterator.Element();

	while(normIter.next(normResult)){
	set.add(normResult.start);
	}

	//utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
	TrieIterator fcdIter = new TrieIterator(FCDTrieImpl.fcdTrie);
	RangeValueIterator.Element fcdResult = new RangeValueIterator.Element();

	while(fcdIter.next(fcdResult)){
	set.add(fcdResult.start);
	}

	if(isFormatVersion_2_1){
	//utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, set);
	TrieIterator auxIter = new TrieIterator(AuxTrieImpl.auxTrie);
	RangeValueIterator.Element auxResult = new RangeValueIterator.Element();
	while(auxIter.next(auxResult)){
	set.add(auxResult.start);
	}
	}
	/* add Hangul LV syllables and LV+1 because of skippables */
	for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
	set.add(c);
	set.add(c+1);
	}
	set.add(HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
	return set; // for chaining
	}

	/**
	* Internal API, used in UCharacter.getIntPropertyValue().
	* @internal
	* @param c code point
	* @param modeValue numeric value compatible with Mode
	* @return numeric value compatible with QuickCheck
	*/
	public static final int quickCheck(int c, int modeValue) {
	final int qcMask[/UNORM_MODE_COUNT/]={
	0, 0, QC_NFD, QC_NFKD, QC_NFC, QC_NFKC
	};

	int norm32=(int)getNorm32(c)&qcMask[modeValue];

	if(norm32==0) {
	return 1; // YES
	} else if((norm32&QC_ANY_NO)!=0) {
	return 0; // NO
	} else /* _NORM_QC_ANY_MAYBE */ {
	return 2; // MAYBE;
	}
	}

	private static int strCompare(char[] s1, int s1Start, int s1Limit,
	char[] s2, int s2Start, int s2Limit,
	boolean codePointOrder) {

	int start1, start2, limit1, limit2;

	char c1, c2;

	/* setup for fix-up */
	start1=s1Start;
	start2=s2Start;

	int length1, length2;

	length1 = s1Limit - s1Start;
	length2 = s2Limit - s2Start;

	int lengthResult;

	if(length1<length2) {
	lengthResult=-1;
	limit1=start1+length1;
	} else if(length1==length2) {
	lengthResult=0;
	limit1=start1+length1;
	} else /* length1>length2 */ {
	lengthResult=1;
	limit1=start1+length2;
	}

	if(s1==s2) {
	return lengthResult;
	}

	for(;;) {
	/* check pseudo-limit */
	if(s1Start==limit1) {
	return lengthResult;
	}

	c1=s1[s1Start];
	c2=s2[s2Start];
	if(c1!=c2) {
	break;
	}
	++s1Start;
	++s2Start;
	}

	/* setup for fix-up */
	limit1=start1+length1;
	limit2=start2+length2;


	/* if both values are in or above the surrogate range, fix them up */
	if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
	/* subtract 0x2800 from BMP code points to make them smaller than
	* supplementary ones */
	if(
	( c1<=0xdbff && (s1Start+1)!=limit1 &&
	UTF16.isTrailSurrogate(s1[(s1Start+1)])
	) \|\|
	( UTF16.isTrailSurrogate(c1) && start1!=s1Start &&
	UTF16.isLeadSurrogate(s1[(s1Start-1)])
	)
	) {
	/* part of a surrogate pair, leave >=d800 */
	} else {
	/* BMP code point - may be surrogate code point - make <d800 */
	c1-=0x2800;
	}

	if(
	( c2<=0xdbff && (s2Start+1)!=limit2 &&
	UTF16.isTrailSurrogate(s2[(s2Start+1)])
	) \|\|
	( UTF16.isTrailSurrogate(c2) && start2!=s2Start &&
	UTF16.isLeadSurrogate(s2[(s2Start-1)])
	)
	) {
	/* part of a surrogate pair, leave >=d800 */
	} else {
	/* BMP code point - may be surrogate code point - make <d800 */
	c2-=0x2800;
	}
	}

	/* now c1 and c2 are in UTF-32-compatible order */
	return (int)c1-(int)c2;
	}


	/*
	* Status of tailored normalization
	*
	* This was done initially for investigation on Unicode public review issue 7
	* (http://www.unicode.org/review/). See Jitterbug 2481.
	* While the UTC at meeting #94 (2003mar) did not take up the issue, this is
	* a permanent feature in ICU 2.6 in support of IDNA which requires true
	* Unicode 3.2 normalization.
	* (NormalizationCorrections are rolled into IDNA mapping tables.)
	*
	* Tailored normalization as implemented here allows to "normalize less"
	* than full Unicode normalization would.
	* Based internally on a UnicodeSet of code points that are
	* "excluded from normalization", the normalization functions leave those
	* code points alone ("inert"). This means that tailored normalization
	* still transforms text into a canonically equivalent form.
	* It does not add decompositions to code points that do not have any or
	* change decomposition results.
	*
	* Any function that searches for a safe boundary has not been touched,
	* which means that these functions will be over-pessimistic when
	* exclusions are applied.
	* This should not matter because subsequent checks and normalizations
	* do apply the exclusions; only a little more of the text may be processed
	* than necessary under exclusions.
	*
	* Normalization exclusions have the following effect on excluded code points c:
	* - c is not decomposed
	* - c is not a composition target
	* - c does not combine forward or backward for composition
	* except that this is not implemented for Jamo
	* - c is treated as having a combining class of 0
	*/

	/*
	* Constants for the bit fields in the options bit set parameter.
	* These need not be public.
	* A user only needs to know the currently assigned values.
	* The number and positions of reserved bits per field can remain private.
	*/
	private static final int OPTIONS_NX_MASK=0x1f;
	private static final int OPTIONS_UNICODE_MASK=0xe0;
	public static final int OPTIONS_SETS_MASK=0xff;
	// private static final int OPTIONS_UNICODE_SHIFT=5;
	private static final UnicodeSet[] nxCache = new UnicodeSet[OPTIONS_SETS_MASK+1];

	/* Constants for options flags for normalization.*/

	/**
	* Options bit 0, do not decompose Hangul syllables.
	* @draft ICU 2.6
	*/
	private static final int NX_HANGUL = 1;
	/**
	* Options bit 1, do not decompose CJK compatibility characters.
	* @draft ICU 2.6
	*/
	private static final int NX_CJK_COMPAT=2;
	/**
	* Options bit 8, use buggy recomposition described in
	* Unicode Public Review Issue #29
	* at http://www.unicode.org/review/resolved-pri.html#pri29
	*
	* Used in IDNA implementation according to strict interpretation
	* of IDNA definition based on Unicode 3.2 which predates PRI #29.
	*
	* See ICU4C unormimp.h
	*
	* @draft ICU 3.2
	*/
	public static final int BEFORE_PRI_29=0x100;

	/*
	* The following options are used only in some composition functions.
	* They use bits 12 and up to preserve lower bits for the available options
	* space in unorm_compare() -
	* see documentation for UNORM_COMPARE_NORM_OPTIONS_SHIFT.
	*/

	/** Options bit 12, for compatibility vs. canonical decomposition. */
	public static final int OPTIONS_COMPAT=0x1000;
	/** Options bit 13, no discontiguous composition (FCC vs. NFC). */
	public static final int OPTIONS_COMPOSE_CONTIGUOUS=0x2000;

	/* normalization exclusion sets --------------------------------------------- */

	/*
	* Normalization exclusion UnicodeSets are used for tailored normalization;
	* see the comment near the beginning of this file.
	*
	* By specifying one or several sets of code points,
	* those code points become inert for normalization.
	*/
	private static final synchronized UnicodeSet internalGetNXHangul() {
	/* internal function, does not check for incoming U_FAILURE */

	if(nxCache[NX_HANGUL]==null) {
	nxCache[NX_HANGUL]=new UnicodeSet(0xac00, 0xd7a3);
	}
	return nxCache[NX_HANGUL];
	}

	private static final synchronized UnicodeSet internalGetNXCJKCompat() {
	/* internal function, does not check for incoming U_FAILURE */

	if(nxCache[NX_CJK_COMPAT]==null) {

	/* build a set from [CJK Ideographs]&[has canonical decomposition] */
	UnicodeSet set, hasDecomp;

	set=new UnicodeSet("[:Ideographic:]");

	/* start with an empty set for [has canonical decomposition] */
	hasDecomp=new UnicodeSet();

	/* iterate over all ideographs and remember which canonically decompose */
	UnicodeSetIterator it = new UnicodeSetIterator(set);
	int start, end;
	long norm32;

	while(it.nextRange() && (it.codepoint != UnicodeSetIterator.IS_STRING)) {
	start=it.codepoint;
	end=it.codepointEnd;
	while(start<=end) {
	norm32 = getNorm32(start);
	if((norm32 & QC_NFD)>0) {
	hasDecomp.add(start);
	}
	++start;
	}
	}

	/* hasDecomp now contains all ideographs that decompose canonically */
	nxCache[NX_CJK_COMPAT]=hasDecomp;

	}

	return nxCache[NX_CJK_COMPAT];
	}

	private static final synchronized UnicodeSet internalGetNXUnicode(int options) {
	options &= OPTIONS_UNICODE_MASK;
	if(options==0) {
	return null;
	}

	if(nxCache[options]==null) {
	/* build a set with all code points that were not designated by the specified Unicode version */
	UnicodeSet set = new UnicodeSet();

	switch(options) {
	case NormalizerBase.UNICODE_3_2:
	set.applyPattern("[:^Age=3.2:]");
	break;
	default:
	return null;
	}

	nxCache[options]=set;
	}

	return nxCache[options];
	}

	/* Get a decomposition exclusion set. The data must be loaded. */
	private static final synchronized UnicodeSet internalGetNX(int options) {
	options&=OPTIONS_SETS_MASK;

	if(nxCache[options]==null) {
	/* return basic sets */
	if(options==NX_HANGUL) {
	return internalGetNXHangul();
	}
	if(options==NX_CJK_COMPAT) {
	return internalGetNXCJKCompat();
	}
	if((options & OPTIONS_UNICODE_MASK)!=0 && (options & OPTIONS_NX_MASK)==0) {
	return internalGetNXUnicode(options);
	}

	/* build a set from multiple subsets */
	UnicodeSet set;
	UnicodeSet other;

	set=new UnicodeSet();


	if((options & NX_HANGUL)!=0 && null!=(other=internalGetNXHangul())) {
	set.addAll(other);
	}
	if((options&NX_CJK_COMPAT)!=0 && null!=(other=internalGetNXCJKCompat())) {
	set.addAll(other);
	}
	if((options&OPTIONS_UNICODE_MASK)!=0 && null!=(other=internalGetNXUnicode(options))) {
	set.addAll(other);
	}

	nxCache[options]=set;
	}
	return nxCache[options];
	}

	public static final UnicodeSet getNX(int options) {
	if((options&=OPTIONS_SETS_MASK)==0) {
	/* incoming failure, or no decomposition exclusions requested */
	return null;
	} else {
	return internalGetNX(options);
	}
	}

	private static final boolean nx_contains(UnicodeSet nx, int c) {
	return nx!=null && nx.contains(c);
	}

	private static final boolean nx_contains(UnicodeSet nx, char c, char c2) {
	return nx!=null && nx.contains(c2==0 ? c : UCharacterProperty.getRawSupplementary(c, c2));
	}

	/*****************************************************************************/

	/**
	* Get the canonical decomposition
	* sherman for ComposedCharIter
	*/

	public static int getDecompose(int chars[], String decomps[]) {
	DecomposeArgs args = new DecomposeArgs();
	int length=0;
	long norm32 = 0;
	int ch = -1;
	int index = 0;
	int i = 0;

	while (++ch < 0x2fa1e) { //no cannoical above 0x3ffff
	//TBD !!!! the hack code heres save us about 50ms for startup
	//need a better solution/lookup
	if (ch == 0x30ff)
	ch = 0xf900;
	else if (ch == 0x10000)
	ch = 0x1d15e;
	else if (ch == 0x1d1c1)
	ch = 0x2f800;

	norm32 = NormalizerImpl.getNorm32(ch);
	if((norm32 & QC_NFD)!=0 && i < chars.length) {
	chars[i] = ch;
	index = decompose(norm32, args);
	decomps[i++] = new String(extraData,index, args.length);
	}
	}
	return i;
	}

	//------------------------------------------------------
	// special method for Collation
	//------------------------------------------------------
	private static boolean needSingleQuotation(char c) {
	return (c >= 0x0009 && c <= 0x000D) \|\|
	(c >= 0x0020 && c <= 0x002F) \|\|
	(c >= 0x003A && c <= 0x0040) \|\|
	(c >= 0x005B && c <= 0x0060) \|\|
	(c >= 0x007B && c <= 0x007E);
	}

	public static String canonicalDecomposeWithSingleQuotation(String string) {
	char[] src = string.toCharArray();
	int srcIndex = 0;
	int srcLimit = src.length;
	char[] dest = new char[src.length * 3]; //MAX_BUF_SIZE_DECOMPOSE = 3
	int destIndex = 0;
	int destLimit = dest.length;

	char[] buffer = new char[3];
	int prevSrc;
	long norm32;
	int ccOrQCMask;
	int qcMask = QC_NFD;
	int reorderStartIndex, length;
	char c, c2;
	char minNoMaybe = (char)indexes[INDEX_MIN_NFD_NO_MAYBE];
	int cc, prevCC, trailCC;
	char[] p;
	int pStart;


	// initialize
	ccOrQCMask = CC_MASK \| qcMask;
	reorderStartIndex = 0;
	prevCC = 0;
	norm32 = 0;
	c = 0;
	pStart = 0;

	cc = trailCC = -1; // initialize to bogus value
	for(;;) {
	prevSrc=srcIndex;
	//quick check (1)less than minNoMaybe (2)no decomp (3)hangual
	while (srcIndex != srcLimit &&
	(( c = src[srcIndex]) < minNoMaybe \|\|
	((norm32 = getNorm32(c)) & ccOrQCMask) == 0 \|\|
	( c >= '\uac00' && c <= '\ud7a3'))){

	prevCC = 0;
	++srcIndex;
	}

	// copy these code units all at once
	if (srcIndex != prevSrc) {
	length = srcIndex - prevSrc;
	if ((destIndex + length) <= destLimit) {
	System.arraycopy(src,prevSrc,dest,destIndex,length);
	}

	destIndex += length;
	reorderStartIndex = destIndex;
	}

	// end of source reached?
	if(srcIndex == srcLimit) {
	break;
	}
	// c already contains *src and norm32 is set for it, increment src
	++srcIndex;

	if(isNorm32Regular(norm32)) {
	c2 = 0;
	length = 1;
	} else {
	// c is a lead surrogate, get the real norm32
	if(srcIndex != srcLimit &&
	Character.isLowSurrogate(c2 = src[srcIndex])) {
	++srcIndex;
	length = 2;
	norm32 = getNorm32FromSurrogatePair(norm32, c2);
	} else {
	c2 = 0;
	length = 1;
	norm32 = 0;
	}
	}

	// get the decomposition and the lead and trail cc's
	if((norm32 & qcMask) == 0) {
	// c does not decompose
	cc = trailCC = (int)((UNSIGNED_BYTE_MASK) & (norm32 >> CC_SHIFT));
	p = null;
	pStart = -1;
	} else {
	DecomposeArgs arg = new DecomposeArgs();
	// c decomposes, get everything from the variable-length
	// extra data
	pStart = decompose(norm32, qcMask, arg);
	p = extraData;
	length = arg.length;
	cc = arg.cc;
	trailCC = arg.trailCC;
	if(length == 1) {
	// fastpath a single code unit from decomposition
	c = p[pStart];
	c2 = 0;
	p = null;
	pStart = -1;
	}
	}

	if((destIndex + length * 3) >= destLimit) { // 2 SingleQuotations
	// buffer overflow
	char[] tmpBuf = new char[destLimit * 2];
	System.arraycopy(dest, 0, tmpBuf, 0, destIndex);
	dest = tmpBuf;
	destLimit = dest.length;
	}
	// append the decomposition to the destination buffer, assume length>0
	{
	int reorderSplit = destIndex;
	if(p == null) {
	// fastpath: single code point
	if (needSingleQuotation(c)) {
	//if we need single quotation, no need to consider "prevCC"
	//and it must NOT be a supplementary pair
	dest[destIndex++] = '\'';
	dest[destIndex++] = c;
	dest[destIndex++] = '\'';
	trailCC = 0;
	} else if(cc != 0 && cc < prevCC) {
	// (c, c2) is out of order with respect to the preceding
	// text
	destIndex += length;
	trailCC = insertOrdered(dest,reorderStartIndex,
	reorderSplit, destIndex, c, c2, cc);
	} else {
	// just append (c, c2)
	dest[destIndex++] = c;
	if(c2 != 0) {
	dest[destIndex++] = c2;
	}
	}
	} else {
	// general: multiple code points (ordered by themselves)
	// from decomposition
	if (needSingleQuotation(p[pStart])) {
	dest[destIndex++] = '\'';
	dest[destIndex++] = p[pStart++];
	dest[destIndex++] = '\'';
	length--;
	do {
	dest[destIndex++] = p[pStart++];
	} while(--length > 0);
	} else
	if(cc != 0 && cc < prevCC) {
	destIndex += length;
	trailCC = mergeOrdered(dest,reorderStartIndex,
	reorderSplit,p, pStart,pStart+length);
	} else {
	// just append the decomposition
	do {
	dest[destIndex++] = p[pStart++];
	} while(--length > 0);
	}
	}
	}
	prevCC = trailCC;
	if(prevCC == 0) {
	reorderStartIndex = destIndex;
	}
	}
	return new String(dest, 0, destIndex);
	}

	//------------------------------------------------------
	// mapping method for IDNA/StringPrep
	//------------------------------------------------------

	/*
	* Normalization using NormalizerBase.UNICODE_3_2 option supports Unicode
	* 3.2 normalization with Corrigendum 4 corrections. However, normalization
	* without the corrections is necessary for IDNA/StringPrep support.
	* This method is called when NormalizerBase.UNICODE_3_2_0_ORIGINAL option
	* (= sun.text.Normalizer.UNICODE_3_2) is used and normalizes five
	* characters in Corrigendum 4 before normalization in order to avoid
	* incorrect normalization.
	* For the Corrigendum 4 issue, refer
	* http://www.unicode.org/versions/corrigendum4.html
	*/

	/*
	* Option used in NormalizerBase.UNICODE_3_2_0_ORIGINAL.
	*/
	public static final int WITHOUT_CORRIGENDUM4_CORRECTIONS=0x40000;

	private static final char[][] corrigendum4MappingTable = {
	{'\uD844', '\uDF6A'}, // 0x2F868
	{'\u5F33'}, // 0x2F874
	{'\u43AB'}, // 0x2F91F
	{'\u7AAE'}, // 0x2F95F
	{'\u4D57'}}; // 0x2F9BF

	/*
	* Removing Corrigendum 4 fix
	* @return normalized text
	*/
	public static String convert(String str) {
	if (str == null) {
	return null;
	}

	int ch = UCharacterIterator.DONE;
	StringBuffer dest = new StringBuffer();
	UCharacterIterator iter = UCharacterIterator.getInstance(str);

	while ((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
	switch (ch) {
	case 0x2F868:
	dest.append(corrigendum4MappingTable[0]);
	break;
	case 0x2F874:
	dest.append(corrigendum4MappingTable[1]);
	break;
	case 0x2F91F:
	dest.append(corrigendum4MappingTable[2]);
	break;
	case 0x2F95F:
	dest.append(corrigendum4MappingTable[3]);
	break;
	case 0x2F9BF:
	dest.append(corrigendum4MappingTable[4]);
	break;
	default:
	UTF16.append(dest,ch);
	break;
	}
	}

	return dest.toString();
	}
	}

Back to index...