/* |
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
* contributor license agreements. See the NOTICE file distributed with |
|
* this work for additional information regarding copyright ownership. |
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
* (the "License"); you may not use this file except in compliance with |
|
* the License. You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
package org.apache.commons.lang3; |
|
/** |
|
* <p>Operations on {@link CharSequence} that are |
|
* {@code null} safe.</p> |
|
* |
|
* @see CharSequence |
|
* @since 3.0 |
|
*/ |
|
public class CharSequenceUtils { |
|
private static final int NOT_FOUND = -1; |
|
/** |
|
* <p>{@code CharSequenceUtils} instances should NOT be constructed in |
|
* standard programming. </p> |
|
* |
|
* <p>This constructor is public to permit tools that require a JavaBean |
|
* instance to operate.</p> |
|
*/ |
|
public CharSequenceUtils() { |
|
super(); |
|
} |
|
//----------------------------------------------------------------------- |
|
/** |
|
* <p>Returns a new {@code CharSequence} that is a subsequence of this |
|
* sequence starting with the {@code char} value at the specified index.</p> |
|
* |
|
* <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}. |
|
* The length (in {@code char}) of the returned sequence is {@code length() - start}, |
|
* so if {@code start == end} then an empty sequence is returned.</p> |
|
* |
|
* @param cs the specified subsequence, null returns null |
|
* @param start the start index, inclusive, valid |
|
* @return a new subsequence, may be null |
|
* @throws IndexOutOfBoundsException if {@code start} is negative or if |
|
* {@code start} is greater than {@code length()} |
|
*/ |
|
public static CharSequence subSequence(final CharSequence cs, final int start) { |
|
return cs == null ? null : cs.subSequence(start, cs.length()); |
|
} |
|
//----------------------------------------------------------------------- |
|
/** |
|
* Returns the index within {@code cs} of the first occurrence of the |
|
* specified character, starting the search at the specified index. |
|
* <p> |
|
* If a character with value {@code searchChar} occurs in the |
|
* character sequence represented by the {@code cs} |
|
* object at an index no smaller than {@code start}, then |
|
* the index of the first such occurrence is returned. For values |
|
* of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), |
|
* this is the smallest value <i>k</i> such that: |
|
* <blockquote><pre> |
|
* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) |
|
* </pre></blockquote> |
|
* is true. For other values of {@code searchChar}, it is the |
|
* smallest value <i>k</i> such that: |
|
* <blockquote><pre> |
|
* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) |
|
* </pre></blockquote> |
|
* is true. In either case, if no such character occurs inm {@code cs} |
|
* at or after position {@code start}, then |
|
* {@code -1} is returned. |
|
* |
|
* <p> |
|
* There is no restriction on the value of {@code start}. If it |
|
* is negative, it has the same effect as if it were zero: the entire |
|
* {@code CharSequence} may be searched. If it is greater than |
|
* the length of {@code cs}, it has the same effect as if it were |
|
* equal to the length of {@code cs}: {@code -1} is returned. |
|
* |
|
* <p>All indices are specified in {@code char} values |
|
* (Unicode code units). |
|
* |
|
* @param cs the {@code CharSequence} to be processed, not null |
|
* @param searchChar the char to be searched for |
|
* @param start the start index, negative starts at the string start |
|
* @return the index where the search char was found, -1 if not found |
|
* @since 3.6 updated to behave more like {@code String} |
|
*/ |
|
static int indexOf(final CharSequence cs, final int searchChar, int start) { |
|
if (cs instanceof String) { |
|
return ((String) cs).indexOf(searchChar, start); |
|
} |
|
final int sz = cs.length(); |
|
if (start < 0) { |
|
start = 0; |
|
} |
|
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
for (int i = start; i < sz; i++) { |
|
if (cs.charAt(i) == searchChar) { |
|
return i; |
|
} |
|
} |
|
} |
|
//supplementary characters (LANG1300) |
|
if (searchChar <= Character.MAX_CODE_POINT) { |
|
final char[] chars = Character.toChars(searchChar); |
|
for (int i = start; i < sz - 1; i++) { |
|
final char high = cs.charAt(i); |
|
final char low = cs.charAt(i + 1); |
|
if (high == chars[0] && low == chars[1]) { |
|
return i; |
|
} |
|
} |
|
} |
|
return NOT_FOUND; |
|
} |
|
/** |
|
* Used by the indexOf(CharSequence methods) as a green implementation of indexOf. |
|
* |
|
* @param cs the {@code CharSequence} to be processed |
|
* @param searchChar the {@code CharSequence} to be searched for |
|
* @param start the start index |
|
* @return the index where the search sequence was found |
|
*/ |
|
static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { |
|
if (cs instanceof String) { |
|
return ((String) cs).indexOf(searchChar.toString(), start); |
|
} else if (cs instanceof StringBuilder) { |
|
return ((StringBuilder) cs).indexOf(searchChar.toString(), start); |
|
} else if (cs instanceof StringBuffer) { |
|
return ((StringBuffer) cs).indexOf(searchChar.toString(), start); |
|
} |
|
return cs.toString().indexOf(searchChar.toString(), start); |
|
// if (cs instanceof String && searchChar instanceof String) { |
|
// // TODO: Do we assume searchChar is usually relatively small; |
|
// // If so then calling toString() on it is better than reverting to |
|
// // the green implementation in the else block |
|
// return ((String) cs).indexOf((String) searchChar, start); |
|
// } else { |
|
// // TODO: Implement rather than convert to String |
|
// return cs.toString().indexOf(searchChar.toString(), start); |
|
// } |
|
} |
|
/** |
|
* Returns the index within {@code cs} of the last occurrence of |
|
* the specified character, searching backward starting at the |
|
* specified index. For values of {@code searchChar} in the range |
|
* from 0 to 0xFFFF (inclusive), the index returned is the largest |
|
* value <i>k</i> such that: |
|
* <blockquote><pre> |
|
* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) |
|
* </pre></blockquote> |
|
* is true. For other values of {@code searchChar}, it is the |
|
* largest value <i>k</i> such that: |
|
* <blockquote><pre> |
|
* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) |
|
* </pre></blockquote> |
|
* is true. In either case, if no such character occurs in {@code cs} |
|
* at or before position {@code start}, then {@code -1} is returned. |
|
* |
|
* <p>All indices are specified in {@code char} values |
|
* (Unicode code units). |
|
* |
|
* @param cs the {@code CharSequence} to be processed |
|
* @param searchChar the char to be searched for |
|
* @param start the start index, negative returns -1, beyond length starts at end |
|
* @return the index where the search char was found, -1 if not found |
|
* @since 3.6 updated to behave more like {@code String} |
|
*/ |
|
static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { |
|
if (cs instanceof String) { |
|
return ((String) cs).lastIndexOf(searchChar, start); |
|
} |
|
final int sz = cs.length(); |
|
if (start < 0) { |
|
return NOT_FOUND; |
|
} |
|
if (start >= sz) { |
|
start = sz - 1; |
|
} |
|
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
for (int i = start; i >= 0; --i) { |
|
if (cs.charAt(i) == searchChar) { |
|
return i; |
|
} |
|
} |
|
} |
|
//supplementary characters (LANG1300) |
|
//NOTE - we must do a forward traversal for this to avoid duplicating code points |
|
if (searchChar <= Character.MAX_CODE_POINT) { |
|
final char[] chars = Character.toChars(searchChar); |
|
//make sure it's not the last index |
|
if (start == sz - 1) { |
|
return NOT_FOUND; |
|
} |
|
for (int i = start; i >= 0; i--) { |
|
final char high = cs.charAt(i); |
|
final char low = cs.charAt(i + 1); |
|
if (chars[0] == high && chars[1] == low) { |
|
return i; |
|
} |
|
} |
|
} |
|
return NOT_FOUND; |
|
} |
|
static final int TO_STRING_LIMIT = 16; |
|
/** |
|
* Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf |
|
* |
|
* @param cs the {@code CharSequence} to be processed |
|
* @param searchChar the {@code CharSequence} to be searched for |
|
* @param start the start index |
|
* @return the index where the search sequence was found |
|
*/ |
|
static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { |
|
if (searchChar instanceof String) { |
|
if (cs instanceof String) { |
|
return ((String) cs).lastIndexOf((String) searchChar, start); |
|
} else if (cs instanceof StringBuilder) { |
|
return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); |
|
} else if (cs instanceof StringBuffer) { |
|
return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); |
|
} |
|
} |
|
final int len1 = cs.length(); |
|
final int len2 = searchChar.length(); |
|
if (start > len1) { |
|
start = len1; |
|
} |
|
if (start < 0 || len2 < 0 || len2 > len1) { |
|
return -1; |
|
} |
|
if (len2 == 0) { |
|
return start; |
|
} |
|
if (len2 <= TO_STRING_LIMIT) { |
|
if (cs instanceof String) { |
|
return ((String) cs).lastIndexOf(searchChar.toString(), start); |
|
} else if (cs instanceof StringBuilder) { |
|
return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); |
|
} else if (cs instanceof StringBuffer) { |
|
return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); |
|
} |
|
} |
|
if (start + len2 > len1) { |
|
start = len1 - len2; |
|
} |
|
final char char0 = searchChar.charAt(0); |
|
int i = start; |
|
while (true) { |
|
while (cs.charAt(i) != char0) { |
|
i--; |
|
if (i < 0) { |
|
return -1; |
|
} |
|
} |
|
if (checkLaterThan1(cs, searchChar, len2, i)) { |
|
return i; |
|
} |
|
i--; |
|
if (i < 0) { |
|
return -1; |
|
} |
|
} |
|
} |
|
private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { |
|
for (int i = 1, j = len2 - 1; i <= j; i++, j--) { |
|
if (cs.charAt(start1 + i) != searchChar.charAt(i) |
|
|| |
|
cs.charAt(start1 + j) != searchChar.charAt(j) |
|
) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
/** |
|
* Converts the given CharSequence to a char[]. |
|
* |
|
* @param source the {@code CharSequence} to be processed. |
|
* @return the resulting char array, never null. |
|
* @since 3.11 |
|
*/ |
|
public static char[] toCharArray(final CharSequence source) { |
|
final int len = StringUtils.length(source); |
|
if (len == 0) { |
|
return ArrayUtils.EMPTY_CHAR_ARRAY; |
|
} |
|
if (source instanceof String) { |
|
return ((String) source).toCharArray(); |
|
} |
|
final char[] array = new char[len]; |
|
for (int i = 0; i < len; i++) { |
|
array[i] = source.charAt(i); |
|
} |
|
return array; |
|
} |
|
/** |
|
* Green implementation of regionMatches. |
|
* |
|
* @param cs the {@code CharSequence} to be processed |
|
* @param ignoreCase whether or not to be case insensitive |
|
* @param thisStart the index to start on the {@code cs} CharSequence |
|
* @param substring the {@code CharSequence} to be looked for |
|
* @param start the index to start on the {@code substring} CharSequence |
|
* @param length character length of the region |
|
* @return whether the region matched |
|
*/ |
|
static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, |
|
final CharSequence substring, final int start, final int length) { |
|
if (cs instanceof String && substring instanceof String) { |
|
return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); |
|
} |
|
int index1 = thisStart; |
|
int index2 = start; |
|
int tmpLen = length; |
|
// Extract these first so we detect NPEs the same as the java.lang.String version |
|
final int srcLen = cs.length() - thisStart; |
|
final int otherLen = substring.length() - start; |
|
// Check for invalid parameters |
|
if (thisStart < 0 || start < 0 || length < 0) { |
|
return false; |
|
} |
|
// Check that the regions are long enough |
|
if (srcLen < length || otherLen < length) { |
|
return false; |
|
} |
|
while (tmpLen-- > 0) { |
|
final char c1 = cs.charAt(index1++); |
|
final char c2 = substring.charAt(index2++); |
|
if (c1 == c2) { |
|
continue; |
|
} |
|
if (!ignoreCase) { |
|
return false; |
|
} |
|
// The real same check as in String.regionMatches(): |
|
final char u1 = Character.toUpperCase(c1); |
|
final char u2 = Character.toUpperCase(c2); |
|
if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
} |