Back to index...

	/*
	* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	/*
	* (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
	* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
	*
	* The original version of this source code and documentation is copyrighted
	* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
	* materials are provided under terms of a License Agreement between Taligent
	* and Sun. This technology is protected by multiple US and International
	* patents. This notice and attribution to Taligent may not be removed.
	* Taligent is a registered trademark of Taligent, Inc.
	*
	*/

	package java.text;

	import java.lang.ref.SoftReference;
	import java.text.spi.CollatorProvider;
	import java.util.Locale;
	import java.util.ResourceBundle;
	import java.util.concurrent.ConcurrentHashMap;
	import java.util.concurrent.ConcurrentMap;
	import sun.util.locale.provider.LocaleProviderAdapter;
	import sun.util.locale.provider.LocaleServiceProviderPool;


	/**
	* The <code>Collator</code> class performs locale-sensitive
	* <code>String</code> comparison. You use this class to build
	* searching and sorting routines for natural language text.
	*
	* <p>
	* <code>Collator</code> is an abstract base class. Subclasses
	* implement specific collation strategies. One subclass,
	* <code>RuleBasedCollator</code>, is currently provided with
	* the Java Platform and is applicable to a wide set of languages. Other
	* subclasses may be created to handle more specialized needs.
	*
	* <p>
	* Like other locale-sensitive classes, you can use the static
	* factory method, <code>getInstance</code>, to obtain the appropriate
	* <code>Collator</code> object for a given locale. You will only need
	* to look at the subclasses of <code>Collator</code> if you need
	* to understand the details of a particular collation strategy or
	* if you need to modify that strategy.
	*
	* <p>
	* The following example shows how to compare two strings using
	* the <code>Collator</code> for the default locale.
	* <blockquote>
	* <pre>{@code
	* // Compare two strings in the default locale
	* Collator myCollator = Collator.getInstance();
	* if( myCollator.compare("abc", "ABC") < 0 )
	* System.out.println("abc is less than ABC");
	* else
	* System.out.println("abc is greater than or equal to ABC");
	* }</pre>
	* </blockquote>
	*
	* <p>
	* You can set a <code>Collator</code>'s <em>strength</em> property
	* to determine the level of difference considered significant in
	* comparisons. Four strengths are provided: <code>PRIMARY</code>,
	* <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
	* The exact assignment of strengths to language features is
	* locale dependent. For example, in Czech, "e" and "f" are considered
	* primary differences, while "e" and "ě" are secondary differences,
	* "e" and "E" are tertiary differences and "e" and "e" are identical.
	* The following shows how both case and accents could be ignored for
	* US English.
	* <blockquote>
	* <pre>
	* //Get the Collator for US English and set its strength to PRIMARY
	* Collator usCollator = Collator.getInstance(Locale.US);
	* usCollator.setStrength(Collator.PRIMARY);
	* if( usCollator.compare("abc", "ABC") == 0 ) {
	* System.out.println("Strings are equivalent");
	* }
	* </pre>
	* </blockquote>
	* <p>
	* For comparing <code>String</code>s exactly once, the <code>compare</code>
	* method provides the best performance. When sorting a list of
	* <code>String</code>s however, it is generally necessary to compare each
	* <code>String</code> multiple times. In this case, <code>CollationKey</code>s
	* provide better performance. The <code>CollationKey</code> class converts
	* a <code>String</code> to a series of bits that can be compared bitwise
	* against other <code>CollationKey</code>s. A <code>CollationKey</code> is
	* created by a <code>Collator</code> object for a given <code>String</code>.
	* <br>
	* <strong>Note:</strong> <code>CollationKey</code>s from different
	* <code>Collator</code>s can not be compared. See the class description
	* for {@link CollationKey}
	* for an example using <code>CollationKey</code>s.
	*
	* @see RuleBasedCollator
	* @see CollationKey
	* @see CollationElementIterator
	* @see Locale
	* @author Helena Shih, Laura Werner, Richard Gillam
	* @since 1.1
	*/

	public abstract class Collator
	implements java.util.Comparator<Object>, Cloneable
	{
	/**
	* Collator strength value. When set, only PRIMARY differences are
	* considered significant during comparison. The assignment of strengths
	* to language features is locale dependent. A common example is for
	* different base letters ("a" vs "b") to be considered a PRIMARY difference.
	* @see java.text.Collator#setStrength
	* @see java.text.Collator#getStrength
	*/
	public static final int PRIMARY = 0;
	/**
	* Collator strength value. When set, only SECONDARY and above differences are
	* considered significant during comparison. The assignment of strengths
	* to language features is locale dependent. A common example is for
	* different accented forms of the same base letter ("a" vs "\u00E4") to be
	* considered a SECONDARY difference.
	* @see java.text.Collator#setStrength
	* @see java.text.Collator#getStrength
	*/
	public static final int SECONDARY = 1;
	/**
	* Collator strength value. When set, only TERTIARY and above differences are
	* considered significant during comparison. The assignment of strengths
	* to language features is locale dependent. A common example is for
	* case differences ("a" vs "A") to be considered a TERTIARY difference.
	* @see java.text.Collator#setStrength
	* @see java.text.Collator#getStrength
	*/
	public static final int TERTIARY = 2;

	/**
	* Collator strength value. When set, all differences are
	* considered significant during comparison. The assignment of strengths
	* to language features is locale dependent. A common example is for control
	* characters ("\u0001" vs "\u0002") to be considered equal at the
	* PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
	* level. Additionally, differences between pre-composed accents such as
	* "\u00C0" (A-grave) and combining accents such as "A\u0300"
	* (A, combining-grave) will be considered significant at the IDENTICAL
	* level if decomposition is set to NO_DECOMPOSITION.
	*/
	public static final int IDENTICAL = 3;

	/**
	* Decomposition mode value. With NO_DECOMPOSITION
	* set, accented characters will not be decomposed for collation. This
	* is the default setting and provides the fastest collation but
	* will only produce correct results for languages that do not use accents.
	* @see java.text.Collator#getDecomposition
	* @see java.text.Collator#setDecomposition
	*/
	public static final int NO_DECOMPOSITION = 0;

	/**
	* Decomposition mode value. With CANONICAL_DECOMPOSITION
	* set, characters that are canonical variants according to Unicode
	* standard will be decomposed for collation. This should be used to get
	* correct collation of accented characters.
	* <p>
	* CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
	* described in
	* <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
	* Technical Report #15</a>.
	* @see java.text.Collator#getDecomposition
	* @see java.text.Collator#setDecomposition
	*/
	public static final int CANONICAL_DECOMPOSITION = 1;

	/**
	* Decomposition mode value. With FULL_DECOMPOSITION
	* set, both Unicode canonical variants and Unicode compatibility variants
	* will be decomposed for collation. This causes not only accented
	* characters to be collated, but also characters that have special formats
	* to be collated with their norminal form. For example, the half-width and
	* full-width ASCII and Katakana characters are then collated together.
	* FULL_DECOMPOSITION is the most complete and therefore the slowest
	* decomposition mode.
	* <p>
	* FULL_DECOMPOSITION corresponds to Normalization Form KD as
	* described in
	* <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
	* Technical Report #15</a>.
	* @see java.text.Collator#getDecomposition
	* @see java.text.Collator#setDecomposition
	*/
	public static final int FULL_DECOMPOSITION = 2;

	/**
	* Gets the Collator for the current default locale.
	* The default locale is determined by java.util.Locale.getDefault.
	* @return the Collator for the default locale.(for example, en_US)
	* @see java.util.Locale#getDefault
	*/
	public static synchronized Collator getInstance() {
	return getInstance(Locale.getDefault());
	}

	/**
	* Gets the Collator for the desired locale.
	* @param desiredLocale the desired locale.
	* @return the Collator for the desired locale.
	* @see java.util.Locale
	* @see java.util.ResourceBundle
	*/
	public static Collator getInstance(Locale desiredLocale) {
	SoftReference<Collator> ref = cache.get(desiredLocale);
	Collator result = (ref != null) ? ref.get() : null;
	if (result == null) {
	LocaleProviderAdapter adapter;
	adapter = LocaleProviderAdapter.getAdapter(CollatorProvider.class,
	desiredLocale);
	CollatorProvider provider = adapter.getCollatorProvider();
	result = provider.getInstance(desiredLocale);
	if (result == null) {
	result = LocaleProviderAdapter.forJRE()
	.getCollatorProvider().getInstance(desiredLocale);
	}
	while (true) {
	if (ref != null) {
	// Remove the empty SoftReference if any
	cache.remove(desiredLocale, ref);
	}
	ref = cache.putIfAbsent(desiredLocale, new SoftReference<>(result));
	if (ref == null) {
	break;
	}
	Collator cachedColl = ref.get();
	if (cachedColl != null) {
	result = cachedColl;
	break;
	}
	}
	}
	return (Collator) result.clone(); // make the world safe
	}

	/**
	* Compares the source string to the target string according to the
	* collation rules for this Collator. Returns an integer less than,
	* equal to or greater than zero depending on whether the source String is
	* less than, equal to or greater than the target string. See the Collator
	* class description for an example of use.
	* <p>
	* For a one time comparison, this method has the best performance. If a
	* given String will be involved in multiple comparisons, CollationKey.compareTo
	* has the best performance. See the Collator class description for an example
	* using CollationKeys.
	* @param source the source string.
	* @param target the target string.
	* @return Returns an integer value. Value is less than zero if source is less than
	* target, value is zero if source and target are equal, value is greater than zero
	* if source is greater than target.
	* @see java.text.CollationKey
	* @see java.text.Collator#getCollationKey
	*/
	public abstract int compare(String source, String target);

	/**
	* Compares its two arguments for order. Returns a negative integer,
	* zero, or a positive integer as the first argument is less than, equal
	* to, or greater than the second.
	* <p>
	* This implementation merely returns
	* <code> compare((String)o1, (String)o2) </code>.
	*
	* @return a negative integer, zero, or a positive integer as the
	* first argument is less than, equal to, or greater than the
	* second.
	* @exception ClassCastException the arguments cannot be cast to Strings.
	* @see java.util.Comparator
	* @since 1.2
	*/
	@Override
	public int compare(Object o1, Object o2) {
	return compare((String)o1, (String)o2);
	}

	/**
	* Transforms the String into a series of bits that can be compared bitwise
	* to other CollationKeys. CollationKeys provide better performance than
	* Collator.compare when Strings are involved in multiple comparisons.
	* See the Collator class description for an example using CollationKeys.
	* @param source the string to be transformed into a collation key.
	* @return the CollationKey for the given String based on this Collator's collation
	* rules. If the source String is null, a null CollationKey is returned.
	* @see java.text.CollationKey
	* @see java.text.Collator#compare
	*/
	public abstract CollationKey getCollationKey(String source);

	/**
	* Convenience method for comparing the equality of two strings based on
	* this Collator's collation rules.
	* @param source the source string to be compared with.
	* @param target the target string to be compared with.
	* @return true if the strings are equal according to the collation
	* rules. false, otherwise.
	* @see java.text.Collator#compare
	*/
	public boolean equals(String source, String target)
	{
	return (compare(source, target) == Collator.EQUAL);
	}

	/**
	* Returns this Collator's strength property. The strength property determines
	* the minimum level of difference considered significant during comparison.
	* See the Collator class description for an example of use.
	* @return this Collator's current strength property.
	* @see java.text.Collator#setStrength
	* @see java.text.Collator#PRIMARY
	* @see java.text.Collator#SECONDARY
	* @see java.text.Collator#TERTIARY
	* @see java.text.Collator#IDENTICAL
	*/
	public synchronized int getStrength()
	{
	return strength;
	}

	/**
	* Sets this Collator's strength property. The strength property determines
	* the minimum level of difference considered significant during comparison.
	* See the Collator class description for an example of use.
	* @param newStrength the new strength value.
	* @see java.text.Collator#getStrength
	* @see java.text.Collator#PRIMARY
	* @see java.text.Collator#SECONDARY
	* @see java.text.Collator#TERTIARY
	* @see java.text.Collator#IDENTICAL
	* @exception IllegalArgumentException If the new strength value is not one of
	* PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
	*/
	public synchronized void setStrength(int newStrength) {
	if ((newStrength != PRIMARY) &&
	(newStrength != SECONDARY) &&
	(newStrength != TERTIARY) &&
	(newStrength != IDENTICAL)) {
	throw new IllegalArgumentException("Incorrect comparison level.");
	}
	strength = newStrength;
	}

	/**
	* Get the decomposition mode of this Collator. Decomposition mode
	* determines how Unicode composed characters are handled. Adjusting
	* decomposition mode allows the user to select between faster and more
	* complete collation behavior.
	* <p>The three values for decomposition mode are:
	* <UL>
	* <LI>NO_DECOMPOSITION,
	* <LI>CANONICAL_DECOMPOSITION
	* <LI>FULL_DECOMPOSITION.
	* </UL>
	* See the documentation for these three constants for a description
	* of their meaning.
	* @return the decomposition mode
	* @see java.text.Collator#setDecomposition
	* @see java.text.Collator#NO_DECOMPOSITION
	* @see java.text.Collator#CANONICAL_DECOMPOSITION
	* @see java.text.Collator#FULL_DECOMPOSITION
	*/
	public synchronized int getDecomposition()
	{
	return decmp;
	}
	/**
	* Set the decomposition mode of this Collator. See getDecomposition
	* for a description of decomposition mode.
	* @param decompositionMode the new decomposition mode.
	* @see java.text.Collator#getDecomposition
	* @see java.text.Collator#NO_DECOMPOSITION
	* @see java.text.Collator#CANONICAL_DECOMPOSITION
	* @see java.text.Collator#FULL_DECOMPOSITION
	* @exception IllegalArgumentException If the given value is not a valid decomposition
	* mode.
	*/
	public synchronized void setDecomposition(int decompositionMode) {
	if ((decompositionMode != NO_DECOMPOSITION) &&
	(decompositionMode != CANONICAL_DECOMPOSITION) &&
	(decompositionMode != FULL_DECOMPOSITION)) {
	throw new IllegalArgumentException("Wrong decomposition mode.");
	}
	decmp = decompositionMode;
	}

	/**
	* Returns an array of all locales for which the
	* <code>getInstance</code> methods of this class can return
	* localized instances.
	* The returned array represents the union of locales supported
	* by the Java runtime and by installed
	* {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
	* It must contain at least a Locale instance equal to
	* {@link java.util.Locale#US Locale.US}.
	*
	* @return An array of locales for which localized
	* <code>Collator</code> instances are available.
	*/
	public static synchronized Locale[] getAvailableLocales() {
	LocaleServiceProviderPool pool =
	LocaleServiceProviderPool.getPool(CollatorProvider.class);
	return pool.getAvailableLocales();
	}

	/**
	* Overrides Cloneable
	*/
	@Override
	public Object clone()
	{
	try {
	return (Collator)super.clone();
	} catch (CloneNotSupportedException e) {
	throw new InternalError(e);
	}
	}

	/**
	* Compares the equality of two Collators.
	* @param that the Collator to be compared with this.
	* @return true if this Collator is the same as that Collator;
	* false otherwise.
	*/
	@Override
	public boolean equals(Object that)
	{
	if (this == that) {
	return true;
	}
	if (that == null) {
	return false;
	}
	if (getClass() != that.getClass()) {
	return false;
	}
	Collator other = (Collator) that;
	return ((strength == other.strength) &&
	(decmp == other.decmp));
	}

	/**
	* Generates the hash code for this Collator.
	*/
	@Override
	public abstract int hashCode();

	/**
	* Default constructor. This constructor is
	* protected so subclasses can get access to it. Users typically create
	* a Collator sub-class by calling the factory method getInstance.
	* @see java.text.Collator#getInstance
	*/
	protected Collator()
	{
	strength = TERTIARY;
	decmp = CANONICAL_DECOMPOSITION;
	}

	private int strength = 0;
	private int decmp = 0;
	private static final ConcurrentMap<Locale, SoftReference<Collator>> cache
	= new ConcurrentHashMap<>();

	//
	// FIXME: These three constants should be removed.
	//
	/**
	* LESS is returned if source string is compared to be less than target
	* string in the compare() method.
	* @see java.text.Collator#compare
	*/
	static final int LESS = -1;
	/**
	* EQUAL is returned if source string is compared to be equal to target
	* string in the compare() method.
	* @see java.text.Collator#compare
	*/
	static final int EQUAL = 0;
	/**
	* GREATER is returned if source string is compared to be greater than
	* target string in the compare() method.
	* @see java.text.Collator#compare
	*/
	static final int GREATER = 1;
	}

Back to index...