Back to index...

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.lang3;

	/**
	* <p>Operations on {@link CharSequence} that are
	* {@code null} safe.</p>
	*
	* @see CharSequence
	* @since 3.0
	*/
	public class CharSequenceUtils {

	private static final int NOT_FOUND = -1;

	/**
	* <p>{@code CharSequenceUtils} instances should NOT be constructed in
	* standard programming. </p>
	*
	* <p>This constructor is public to permit tools that require a JavaBean
	* instance to operate.</p>
	*/
	public CharSequenceUtils() {
	super();
	}

	//-----------------------------------------------------------------------
	/**
	* <p>Returns a new {@code CharSequence} that is a subsequence of this
	* sequence starting with the {@code char} value at the specified index.</p>
	*
	* <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
	* The length (in {@code char}) of the returned sequence is {@code length() - start},
	* so if {@code start == end} then an empty sequence is returned.</p>
	*
	* @param cs the specified subsequence, null returns null
	* @param start the start index, inclusive, valid
	* @return a new subsequence, may be null
	* @throws IndexOutOfBoundsException if {@code start} is negative or if
	* {@code start} is greater than {@code length()}
	*/
	public static CharSequence subSequence(final CharSequence cs, final int start) {
	return cs == null ? null : cs.subSequence(start, cs.length());
	}

	//-----------------------------------------------------------------------
	/**
	* Returns the index within {@code cs} of the first occurrence of the
	* specified character, starting the search at the specified index.
	* <p>
	* If a character with value {@code searchChar} occurs in the
	* character sequence represented by the {@code cs}
	* object at an index no smaller than {@code start}, then
	* the index of the first such occurrence is returned. For values
	* of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
	* this is the smallest value <i>k</i> such that:
	* <blockquote><pre>
	* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
	* </pre></blockquote>
	* is true. For other values of {@code searchChar}, it is the
	* smallest value <i>k</i> such that:
	* <blockquote><pre>
	* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
	* </pre></blockquote>
	* is true. In either case, if no such character occurs inm {@code cs}
	* at or after position {@code start}, then
	* {@code -1} is returned.
	*
	* <p>
	* There is no restriction on the value of {@code start}. If it
	* is negative, it has the same effect as if it were zero: the entire
	* {@code CharSequence} may be searched. If it is greater than
	* the length of {@code cs}, it has the same effect as if it were
	* equal to the length of {@code cs}: {@code -1} is returned.
	*
	* <p>All indices are specified in {@code char} values
	* (Unicode code units).
	*
	* @param cs the {@code CharSequence} to be processed, not null
	* @param searchChar the char to be searched for
	* @param start the start index, negative starts at the string start
	* @return the index where the search char was found, -1 if not found
	* @since 3.6 updated to behave more like {@code String}
	*/
	static int indexOf(final CharSequence cs, final int searchChar, int start) {
	if (cs instanceof String) {
	return ((String) cs).indexOf(searchChar, start);
	}
	final int sz = cs.length();
	if (start < 0) {
	start = 0;
	}
	if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
	for (int i = start; i < sz; i++) {
	if (cs.charAt(i) == searchChar) {
	return i;
	}
	}
	}
	//supplementary characters (LANG1300)
	if (searchChar <= Character.MAX_CODE_POINT) {
	final char[] chars = Character.toChars(searchChar);
	for (int i = start; i < sz - 1; i++) {
	final char high = cs.charAt(i);
	final char low = cs.charAt(i + 1);
	if (high == chars[0] && low == chars[1]) {
	return i;
	}
	}
	}
	return NOT_FOUND;
	}

	/**
	* Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
	*
	* @param cs the {@code CharSequence} to be processed
	* @param searchChar the {@code CharSequence} to be searched for
	* @param start the start index
	* @return the index where the search sequence was found
	*/
	static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
	if (cs instanceof String) {
	return ((String) cs).indexOf(searchChar.toString(), start);
	} else if (cs instanceof StringBuilder) {
	return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
	} else if (cs instanceof StringBuffer) {
	return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
	}
	return cs.toString().indexOf(searchChar.toString(), start);
	// if (cs instanceof String && searchChar instanceof String) {
	// // TODO: Do we assume searchChar is usually relatively small;
	// // If so then calling toString() on it is better than reverting to
	// // the green implementation in the else block
	// return ((String) cs).indexOf((String) searchChar, start);
	// } else {
	// // TODO: Implement rather than convert to String
	// return cs.toString().indexOf(searchChar.toString(), start);
	// }
	}

	/**
	* Returns the index within {@code cs} of the last occurrence of
	* the specified character, searching backward starting at the
	* specified index. For values of {@code searchChar} in the range
	* from 0 to 0xFFFF (inclusive), the index returned is the largest
	* value <i>k</i> such that:
	* <blockquote><pre>
	* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
	* </pre></blockquote>
	* is true. For other values of {@code searchChar}, it is the
	* largest value <i>k</i> such that:
	* <blockquote><pre>
	* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
	* </pre></blockquote>
	* is true. In either case, if no such character occurs in {@code cs}
	* at or before position {@code start}, then {@code -1} is returned.
	*
	* <p>All indices are specified in {@code char} values
	* (Unicode code units).
	*
	* @param cs the {@code CharSequence} to be processed
	* @param searchChar the char to be searched for
	* @param start the start index, negative returns -1, beyond length starts at end
	* @return the index where the search char was found, -1 if not found
	* @since 3.6 updated to behave more like {@code String}
	*/
	static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
	if (cs instanceof String) {
	return ((String) cs).lastIndexOf(searchChar, start);
	}
	final int sz = cs.length();
	if (start < 0) {
	return NOT_FOUND;
	}
	if (start >= sz) {
	start = sz - 1;
	}
	if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
	for (int i = start; i >= 0; --i) {
	if (cs.charAt(i) == searchChar) {
	return i;
	}
	}
	}
	//supplementary characters (LANG1300)
	//NOTE - we must do a forward traversal for this to avoid duplicating code points
	if (searchChar <= Character.MAX_CODE_POINT) {
	final char[] chars = Character.toChars(searchChar);
	//make sure it's not the last index
	if (start == sz - 1) {
	return NOT_FOUND;
	}
	for (int i = start; i >= 0; i--) {
	final char high = cs.charAt(i);
	final char low = cs.charAt(i + 1);
	if (chars[0] == high && chars[1] == low) {
	return i;
	}
	}
	}
	return NOT_FOUND;
	}

	static final int TO_STRING_LIMIT = 16;

	/**
	* Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
	*
	* @param cs the {@code CharSequence} to be processed
	* @param searchChar the {@code CharSequence} to be searched for
	* @param start the start index
	* @return the index where the search sequence was found
	*/
	static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
	if (searchChar instanceof String) {
	if (cs instanceof String) {
	return ((String) cs).lastIndexOf((String) searchChar, start);
	} else if (cs instanceof StringBuilder) {
	return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
	} else if (cs instanceof StringBuffer) {
	return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
	}
	}

	final int len1 = cs.length();
	final int len2 = searchChar.length();

	if (start > len1) {
	start = len1;
	}

	if (start < 0 \|\| len2 < 0 \|\| len2 > len1) {
	return -1;
	}

	if (len2 == 0) {
	return start;
	}

	if (len2 <= TO_STRING_LIMIT) {
	if (cs instanceof String) {
	return ((String) cs).lastIndexOf(searchChar.toString(), start);
	} else if (cs instanceof StringBuilder) {
	return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
	} else if (cs instanceof StringBuffer) {
	return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
	}
	}

	if (start + len2 > len1) {
	start = len1 - len2;
	}

	final char char0 = searchChar.charAt(0);

	int i = start;
	while (true) {
	while (cs.charAt(i) != char0) {
	i--;
	if (i < 0) {
	return -1;
	}
	}
	if (checkLaterThan1(cs, searchChar, len2, i)) {
	return i;
	}
	i--;
	if (i < 0) {
	return -1;
	}
	}
	}

	private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
	for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
	if (cs.charAt(start1 + i) != searchChar.charAt(i)
	\|\|
	cs.charAt(start1 + j) != searchChar.charAt(j)
	) {
	return false;
	}
	}
	return true;
	}

	/**
	* Converts the given CharSequence to a char[].
	*
	* @param source the {@code CharSequence} to be processed.
	* @return the resulting char array, never null.
	* @since 3.11
	*/
	public static char[] toCharArray(final CharSequence source) {
	final int len = StringUtils.length(source);
	if (len == 0) {
	return ArrayUtils.EMPTY_CHAR_ARRAY;
	}
	if (source instanceof String) {
	return ((String) source).toCharArray();
	}
	final char[] array = new char[len];
	for (int i = 0; i < len; i++) {
	array[i] = source.charAt(i);
	}
	return array;
	}

	/**
	* Green implementation of regionMatches.
	*
	* @param cs the {@code CharSequence} to be processed
	* @param ignoreCase whether or not to be case insensitive
	* @param thisStart the index to start on the {@code cs} CharSequence
	* @param substring the {@code CharSequence} to be looked for
	* @param start the index to start on the {@code substring} CharSequence
	* @param length character length of the region
	* @return whether the region matched
	*/
	static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
	final CharSequence substring, final int start, final int length) {
	if (cs instanceof String && substring instanceof String) {
	return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
	}
	int index1 = thisStart;
	int index2 = start;
	int tmpLen = length;

	// Extract these first so we detect NPEs the same as the java.lang.String version
	final int srcLen = cs.length() - thisStart;
	final int otherLen = substring.length() - start;

	// Check for invalid parameters
	if (thisStart < 0 \|\| start < 0 \|\| length < 0) {
	return false;
	}

	// Check that the regions are long enough
	if (srcLen < length \|\| otherLen < length) {
	return false;
	}

	while (tmpLen-- > 0) {
	final char c1 = cs.charAt(index1++);
	final char c2 = substring.charAt(index2++);

	if (c1 == c2) {
	continue;
	}

	if (!ignoreCase) {
	return false;
	}

	// The real same check as in String.regionMatches():
	final char u1 = Character.toUpperCase(c1);
	final char u2 = Character.toUpperCase(c2);
	if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
	return false;
	}
	}

	return true;
	}
	}

Back to index...