/* |
|
* Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. |
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
* |
|
* This code is free software; you can redistribute it and/or modify it |
|
* under the terms of the GNU General Public License version 2 only, as |
|
* published by the Free Software Foundation. Oracle designates this |
|
* particular file as subject to the "Classpath" exception as provided |
|
* by Oracle in the LICENSE file that accompanied this code. |
|
* |
|
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
* version 2 for more details (a copy is included in the LICENSE file that |
|
* accompanied this code). |
|
* |
|
* You should have received a copy of the GNU General Public License version |
|
* 2 along with this work; if not, write to the Free Software Foundation, |
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
* |
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
* or visit www.oracle.com if you need additional information or have any |
|
* questions. |
|
*/ |
|
package java.util; |
|
import java.util.concurrent.CountedCompleter; |
|
import java.util.concurrent.RecursiveTask; |
|
/** |
|
* This class implements powerful and fully optimized versions, both |
|
* sequential and parallel, of the Dual-Pivot Quicksort algorithm by |
|
* Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm |
|
* offers O(n log(n)) performance on all data sets, and is typically |
|
* faster than traditional (one-pivot) Quicksort implementations. |
|
* |
|
* There are also additional algorithms, invoked from the Dual-Pivot |
|
* Quicksort, such as mixed insertion sort, merging of runs and heap |
|
* sort, counting sort and parallel merge sort. |
|
* |
|
* @author Vladimir Yaroslavskiy |
|
* @author Jon Bentley |
|
* @author Josh Bloch |
|
* @author Doug Lea |
|
* |
|
* @version 2018.08.18 |
|
* |
|
* @since 1.7 * 14 |
|
*/ |
|
final class DualPivotQuicksort { |
|
/** |
|
* Prevents instantiation. |
|
*/ |
|
private DualPivotQuicksort() {} |
|
/** |
|
* Max array size to use mixed insertion sort. |
|
*/ |
|
private static final int MAX_MIXED_INSERTION_SORT_SIZE = 65; |
|
/** |
|
* Max array size to use insertion sort. |
|
*/ |
|
private static final int MAX_INSERTION_SORT_SIZE = 44; |
|
/** |
|
* Min array size to perform sorting in parallel. |
|
*/ |
|
private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10; |
|
/** |
|
* Min array size to try merging of runs. |
|
*/ |
|
private static final int MIN_TRY_MERGE_SIZE = 4 << 10; |
|
/** |
|
* Min size of the first run to continue with scanning. |
|
*/ |
|
private static final int MIN_FIRST_RUN_SIZE = 16; |
|
/** |
|
* Min factor for the first runs to continue scanning. |
|
*/ |
|
private static final int MIN_FIRST_RUNS_FACTOR = 7; |
|
/** |
|
* Max capacity of the index array for tracking runs. |
|
*/ |
|
private static final int MAX_RUN_CAPACITY = 5 << 10; |
|
/** |
|
* Min number of runs, required by parallel merging. |
|
*/ |
|
private static final int MIN_RUN_COUNT = 4; |
|
/** |
|
* Min array size to use parallel merging of parts. |
|
*/ |
|
private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10; |
|
/** |
|
* Min size of a byte array to use counting sort. |
|
*/ |
|
private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64; |
|
/** |
|
* Min size of a short or char array to use counting sort. |
|
*/ |
|
private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750; |
|
/** |
|
* Threshold of mixed insertion sort is incremented by this value. |
|
*/ |
|
private static final int DELTA = 3 << 1; |
|
/** |
|
* Max recursive partitioning depth before using heap sort. |
|
*/ |
|
private static final int MAX_RECURSION_DEPTH = 64 * DELTA; |
|
/** |
|
* Calculates the double depth of parallel merging. |
|
* Depth is negative, if tasks split before sorting. |
|
* |
|
* @param parallelism the parallelism level |
|
* @param size the target size |
|
* @return the depth of parallel merging |
|
*/ |
|
private static int getDepth(int parallelism, int size) { |
|
int depth = 0; |
|
while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) { |
|
depth -= 2; |
|
} |
|
return depth; |
|
} |
|
/** |
|
* Sorts the specified range of the array using parallel merge |
|
* sort and/or Dual-Pivot Quicksort. |
|
* |
|
* To balance the faster splitting and parallelism of merge sort |
|
* with the faster element partitioning of Quicksort, ranges are |
|
* subdivided in tiers such that, if there is enough parallelism, |
|
* the four-way parallel merge is started, still ensuring enough |
|
* parallelism to process the partitions. |
|
* |
|
* @param a the array to be sorted |
|
* @param parallelism the parallelism level |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(int[] a, int parallelism, int low, int high) { |
|
int size = high - low; |
|
if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
int depth = getDepth(parallelism, size >> 12); |
|
int[] b = depth == 0 ? null : new int[size]; |
|
new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
} else { |
|
sort(null, a, 0, low, high); |
|
} |
|
} |
|
/** |
|
* Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
* other sorts in special-cases, possibly with parallel partitions. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param bits the combination of recursion depth and bit flag, where |
|
* the right bit "0" indicates that array is the leftmost part |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(Sorter sorter, int[] a, int bits, int low, int high) { |
|
while (true) { |
|
int end = high - 1, size = high - low; |
|
/* |
|
* Run mixed insertion sort on small non-leftmost parts. |
|
*/ |
|
if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
return; |
|
} |
|
/* |
|
* Invoke insertion sort on small leftmost part. |
|
*/ |
|
if (size < MAX_INSERTION_SORT_SIZE) { |
|
insertionSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Check if the whole array or large non-leftmost |
|
* parts are nearly sorted and then merge runs. |
|
*/ |
|
if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
&& tryMergeRuns(sorter, a, low, size)) { |
|
return; |
|
} |
|
/* |
|
* Switch to heap sort if execution |
|
* time is becoming quadratic. |
|
*/ |
|
if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
heapSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Use an inexpensive approximation of the golden ratio |
|
* to select five sample elements and determine pivots. |
|
*/ |
|
int step = (size >> 3) * 3 + 3; |
|
/* |
|
* Five elements around (and including) the central element |
|
* will be used for pivot selection as described below. The |
|
* unequal choice of spacing these elements was empirically |
|
* determined to work well on a wide variety of inputs. |
|
*/ |
|
int e1 = low + step; |
|
int e5 = end - step; |
|
int e3 = (e1 + e5) >>> 1; |
|
int e2 = (e1 + e3) >>> 1; |
|
int e4 = (e3 + e5) >>> 1; |
|
int a3 = a[e3]; |
|
/* |
|
* Sort these elements in place by the combination |
|
* of 4-element sorting network and insertion sort. |
|
* |
|
* 5 ------o-----------o------------ |
|
* | | |
|
* 4 ------|-----o-----o-----o------ |
|
* | | | |
|
* 2 ------o-----|-----o-----o------ |
|
* | | |
|
* 1 ------------o-----o------------ |
|
*/ |
|
if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
|
if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
|
if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
|
if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
|
if (a3 < a[e2]) { |
|
if (a3 < a[e1]) { |
|
a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
|
} else { |
|
a[e3] = a[e2]; a[e2] = a3; |
|
} |
|
} else if (a3 > a[e4]) { |
|
if (a3 > a[e5]) { |
|
a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
|
} else { |
|
a[e3] = a[e4]; a[e4] = a3; |
|
} |
|
} |
|
// Pointers |
|
int lower = low; // The index of the last element of the left part |
|
int upper = end; // The index of the first element of the right part |
|
/* |
|
* Partitioning with 2 pivots in case of different elements. |
|
*/ |
|
if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
|
/* |
|
* Use the first and fifth of the five sorted elements as |
|
* the pivots. These values are inexpensive approximation |
|
* of tertiles. Note, that pivot1 < pivot2. |
|
*/ |
|
int pivot1 = a[e1]; |
|
int pivot2 = a[e5]; |
|
/* |
|
* The first and the last elements to be sorted are moved |
|
* to the locations formerly occupied by the pivots. When |
|
* partitioning is completed, the pivots are swapped back |
|
* into their final positions, and excluded from the next |
|
* subsequent sorting. |
|
*/ |
|
a[e1] = a[lower]; |
|
a[e5] = a[upper]; |
|
/* |
|
* Skip elements, which are less or greater than the pivots. |
|
*/ |
|
while (a[++lower] < pivot1); |
|
while (a[--upper] > pivot2); |
|
/* |
|
* Backward 3-interval partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------------+ |
|
* | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
|
* +------------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot1 |
|
* pivot1 <= all in (k, upper) <= pivot2 |
|
* all in [upper, end) > pivot2 |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int unused = --lower, k = ++upper; --k > lower; ) { |
|
int ak = a[k]; |
|
if (ak < pivot1) { // Move a[k] to the left side |
|
while (lower < k) { |
|
if (a[++lower] >= pivot1) { |
|
if (a[lower] > pivot2) { |
|
a[k] = a[--upper]; |
|
a[upper] = a[lower]; |
|
} else { |
|
a[k] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
break; |
|
} |
|
} |
|
} else if (ak > pivot2) { // Move a[k] to the right side |
|
a[k] = a[--upper]; |
|
a[upper] = ak; |
|
} |
|
} |
|
/* |
|
* Swap the pivots into their final positions. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot1; |
|
a[end] = a[upper]; a[upper] = pivot2; |
|
/* |
|
* Sort non-left parts recursively (possibly in parallel), |
|
* excluding known pivots. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, lower + 1, upper); |
|
sorter.forkSorter(bits | 1, upper + 1, high); |
|
} else { |
|
sort(sorter, a, bits | 1, lower + 1, upper); |
|
sort(sorter, a, bits | 1, upper + 1, high); |
|
} |
|
} else { // Use single pivot in case of many equal elements |
|
/* |
|
* Use the third of the five sorted elements as the pivot. |
|
* This value is inexpensive approximation of the median. |
|
*/ |
|
int pivot = a[e3]; |
|
/* |
|
* The first element to be sorted is moved to the |
|
* location formerly occupied by the pivot. After |
|
* completion of partitioning the pivot is swapped |
|
* back into its final position, and excluded from |
|
* the next subsequent sorting. |
|
*/ |
|
a[e3] = a[lower]; |
|
/* |
|
* Traditional 3-way (Dutch National Flag) partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------+ |
|
* | < pivot | ? | == pivot | > pivot | |
|
* +------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot |
|
* all in (k, upper) == pivot |
|
* all in [upper, end] > pivot |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int k = ++upper; --k > lower; ) { |
|
int ak = a[k]; |
|
if (ak != pivot) { |
|
a[k] = pivot; |
|
if (ak < pivot) { // Move a[k] to the left side |
|
while (a[++lower] < pivot); |
|
if (a[lower] > pivot) { |
|
a[--upper] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
} else { // ak > pivot - Move a[k] to the right side |
|
a[--upper] = ak; |
|
} |
|
} |
|
} |
|
/* |
|
* Swap the pivot into its final position. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot; |
|
/* |
|
* Sort the right part (possibly in parallel), excluding |
|
* known pivot. All elements from the central part are |
|
* equal and therefore already sorted. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, upper, high); |
|
} else { |
|
sort(sorter, a, bits | 1, upper, high); |
|
} |
|
} |
|
high = lower; // Iterate along the left part |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using mixed insertion sort. |
|
* |
|
* Mixed insertion sort is combination of simple insertion sort, |
|
* pin insertion sort and pair insertion sort. |
|
* |
|
* In the context of Dual-Pivot Quicksort, the pivot element |
|
* from the left part plays the role of sentinel, because it |
|
* is less than any elements from the given part. Therefore, |
|
* expensive check of the left range can be skipped on each |
|
* iteration unless it is the leftmost call. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param end the index of the last element for simple insertion sort |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void mixedInsertionSort(int[] a, int low, int end, int high) { |
|
if (end == high) { |
|
/* |
|
* Invoke simple insertion sort on tiny array. |
|
*/ |
|
for (int i; ++low < end; ) { |
|
int ai = a[i = low]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} else { |
|
/* |
|
* Start with pin insertion sort on small part. |
|
* |
|
* Pin insertion sort is extended simple insertion sort. |
|
* The main idea of this sort is to put elements larger |
|
* than an element called pin to the end of array (the |
|
* proper area for such elements). It avoids expensive |
|
* movements of these elements through the whole array. |
|
*/ |
|
int pin = a[end]; |
|
for (int i, p = high; ++low < end; ) { |
|
int ai = a[i = low]; |
|
if (ai < a[i - 1]) { // Small element |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
a[i] = a[--i]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} else if (p > i && ai > pin) { // Large element |
|
/* |
|
* Find element smaller than pin. |
|
*/ |
|
while (a[--p] > pin); |
|
/* |
|
* Swap it with large element. |
|
*/ |
|
if (p > i) { |
|
ai = a[p]; |
|
a[p] = a[i]; |
|
} |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
/* |
|
* Continue with pair insertion sort on remain part. |
|
*/ |
|
for (int i; low < high; ++low) { |
|
int a1 = a[i = low], a2 = a[++low]; |
|
/* |
|
* Insert two elements per iteration: at first, insert the |
|
* larger element and then insert the smaller element, but |
|
* from the position where the larger element was inserted. |
|
*/ |
|
if (a1 > a2) { |
|
while (a1 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a1; |
|
while (a2 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a2; |
|
} else if (a1 < a[i - 1]) { |
|
while (a2 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a2; |
|
while (a1 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a1; |
|
} |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(int[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
int ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using heap sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void heapSort(int[] a, int low, int high) { |
|
for (int k = (low + high) >>> 1; k > low; ) { |
|
pushDown(a, --k, a[k], low, high); |
|
} |
|
while (--high > low) { |
|
int max = a[low]; |
|
pushDown(a, low, a[high], low, high); |
|
a[high] = max; |
|
} |
|
} |
|
/** |
|
* Pushes specified element down during heap sort. |
|
* |
|
* @param a the given array |
|
* @param p the start index |
|
* @param value the given element |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void pushDown(int[] a, int p, int value, int low, int high) { |
|
for (int k ;; a[p] = a[p = k]) { |
|
k = (p << 1) - low + 2; // Index of the right child |
|
if (k > high) { |
|
break; |
|
} |
|
if (k == high || a[k] < a[k - 1]) { |
|
--k; |
|
} |
|
if (a[k] <= value) { |
|
break; |
|
} |
|
} |
|
a[p] = value; |
|
} |
|
/** |
|
* Tries to sort the specified range of the array. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param low the index of the first element to be sorted |
|
* @param size the array size |
|
* @return true if finally sorted, false otherwise |
|
*/ |
|
private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) { |
|
/* |
|
* The run array is constructed only if initial runs are |
|
* long enough to continue, run[i] then holds start index |
|
* of the i-th sequence of elements in non-descending order. |
|
*/ |
|
int[] run = null; |
|
int high = low + size; |
|
int count = 1, last = low; |
|
/* |
|
* Identify all possible runs. |
|
*/ |
|
for (int k = low + 1; k < high; ) { |
|
/* |
|
* Find the end index of the current run. |
|
*/ |
|
if (a[k - 1] < a[k]) { |
|
// Identify ascending sequence |
|
while (++k < high && a[k - 1] <= a[k]); |
|
} else if (a[k - 1] > a[k]) { |
|
// Identify descending sequence |
|
while (++k < high && a[k - 1] >= a[k]); |
|
// Reverse into ascending order |
|
for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
int ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
} |
|
} else { // Identify constant sequence |
|
for (int ak = a[k]; ++k < high && ak == a[k]; ); |
|
if (k < high) { |
|
continue; |
|
} |
|
} |
|
/* |
|
* Check special cases. |
|
*/ |
|
if (run == null) { |
|
if (k == high) { |
|
/* |
|
* The array is monotonous sequence, |
|
* and therefore already sorted. |
|
*/ |
|
return true; |
|
} |
|
if (k - low < MIN_FIRST_RUN_SIZE) { |
|
/* |
|
* The first run is too small |
|
* to proceed with scanning. |
|
*/ |
|
return false; |
|
} |
|
run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
run[0] = low; |
|
} else if (a[last - 1] > a[last]) { |
|
if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
/* |
|
* The first runs are not long |
|
* enough to continue scanning. |
|
*/ |
|
return false; |
|
} |
|
if (++count == MAX_RUN_CAPACITY) { |
|
/* |
|
* Array is not highly structured. |
|
*/ |
|
return false; |
|
} |
|
if (count == run.length) { |
|
/* |
|
* Increase capacity of index array. |
|
*/ |
|
run = Arrays.copyOf(run, count << 1); |
|
} |
|
} |
|
run[count] = (last = k); |
|
} |
|
/* |
|
* Merge runs of highly structured array. |
|
*/ |
|
if (count > 1) { |
|
int[] b; int offset = low; |
|
if (sorter == null || (b = (int[]) sorter.b) == null) { |
|
b = new int[size]; |
|
} else { |
|
offset = sorter.offset; |
|
} |
|
mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
} |
|
return true; |
|
} |
|
/** |
|
* Merges the specified runs. |
|
* |
|
* @param a the source array |
|
* @param b the temporary buffer used in merging |
|
* @param offset the start index in the source, inclusive |
|
* @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
* @param parallel indicates whether merging is performed in parallel |
|
* @param run the start indexes of the runs, inclusive |
|
* @param lo the start index of the first run, inclusive |
|
* @param hi the start index of the last run, inclusive |
|
* @return the destination where runs are merged |
|
*/ |
|
private static int[] mergeRuns(int[] a, int[] b, int offset, |
|
int aim, boolean parallel, int[] run, int lo, int hi) { |
|
if (hi - lo == 1) { |
|
if (aim >= 0) { |
|
return a; |
|
} |
|
for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
b[--j] = a[--i] |
|
); |
|
return b; |
|
} |
|
/* |
|
* Split into approximately equal parts. |
|
*/ |
|
int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
while (run[++mi + 1] <= rmi); |
|
/* |
|
* Merge the left and right parts. |
|
*/ |
|
int[] a1, a2; |
|
if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
a2 = (int[]) merger.getDestination(); |
|
} else { |
|
a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
} |
|
int[] dst = a1 == a ? b : a; |
|
int k = a1 == a ? run[lo] - offset : run[lo]; |
|
int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
if (parallel) { |
|
new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
} else { |
|
mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
} |
|
return dst; |
|
} |
|
/** |
|
* Merges the sorted parts. |
|
* |
|
* @param merger parallel context |
|
* @param dst the destination where parts are merged |
|
* @param k the start index of the destination, inclusive |
|
* @param a1 the first part |
|
* @param lo1 the start index of the first part, inclusive |
|
* @param hi1 the end index of the first part, exclusive |
|
* @param a2 the second part |
|
* @param lo2 the start index of the second part, inclusive |
|
* @param hi2 the end index of the second part, exclusive |
|
*/ |
|
private static void mergeParts(Merger merger, int[] dst, int k, |
|
int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) { |
|
if (merger != null && a1 == a2) { |
|
while (true) { |
|
/* |
|
* The first part must be larger. |
|
*/ |
|
if (hi1 - lo1 < hi2 - lo2) { |
|
int lo = lo1; lo1 = lo2; lo2 = lo; |
|
int hi = hi1; hi1 = hi2; hi2 = hi; |
|
} |
|
/* |
|
* Small parts will be merged sequentially. |
|
*/ |
|
if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
break; |
|
} |
|
/* |
|
* Find the median of the larger part. |
|
*/ |
|
int mi1 = (lo1 + hi1) >>> 1; |
|
int key = a1[mi1]; |
|
int mi2 = hi2; |
|
/* |
|
* Partition the smaller part. |
|
*/ |
|
for (int loo = lo2; loo < mi2; ) { |
|
int t = (loo + mi2) >>> 1; |
|
if (key > a2[t]) { |
|
loo = t + 1; |
|
} else { |
|
mi2 = t; |
|
} |
|
} |
|
int d = mi2 - lo2 + mi1 - lo1; |
|
/* |
|
* Merge the right sub-parts in parallel. |
|
*/ |
|
merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
/* |
|
* Process the sub-left parts. |
|
*/ |
|
hi1 = mi1; |
|
hi2 = mi2; |
|
} |
|
} |
|
/* |
|
* Merge small parts sequentially. |
|
*/ |
|
while (lo1 < hi1 && lo2 < hi2) { |
|
dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
} |
|
if (dst != a1 || k < lo1) { |
|
while (lo1 < hi1) { |
|
dst[k++] = a1[lo1++]; |
|
} |
|
} |
|
if (dst != a2 || k < lo2) { |
|
while (lo2 < hi2) { |
|
dst[k++] = a2[lo2++]; |
|
} |
|
} |
|
} |
|
// [long] |
|
/** |
|
* Sorts the specified range of the array using parallel merge |
|
* sort and/or Dual-Pivot Quicksort. |
|
* |
|
* To balance the faster splitting and parallelism of merge sort |
|
* with the faster element partitioning of Quicksort, ranges are |
|
* subdivided in tiers such that, if there is enough parallelism, |
|
* the four-way parallel merge is started, still ensuring enough |
|
* parallelism to process the partitions. |
|
* |
|
* @param a the array to be sorted |
|
* @param parallelism the parallelism level |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(long[] a, int parallelism, int low, int high) { |
|
int size = high - low; |
|
if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
int depth = getDepth(parallelism, size >> 12); |
|
long[] b = depth == 0 ? null : new long[size]; |
|
new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
} else { |
|
sort(null, a, 0, low, high); |
|
} |
|
} |
|
/** |
|
* Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
* other sorts in special-cases, possibly with parallel partitions. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param bits the combination of recursion depth and bit flag, where |
|
* the right bit "0" indicates that array is the leftmost part |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(Sorter sorter, long[] a, int bits, int low, int high) { |
|
while (true) { |
|
int end = high - 1, size = high - low; |
|
/* |
|
* Run mixed insertion sort on small non-leftmost parts. |
|
*/ |
|
if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
return; |
|
} |
|
/* |
|
* Invoke insertion sort on small leftmost part. |
|
*/ |
|
if (size < MAX_INSERTION_SORT_SIZE) { |
|
insertionSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Check if the whole array or large non-leftmost |
|
* parts are nearly sorted and then merge runs. |
|
*/ |
|
if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
&& tryMergeRuns(sorter, a, low, size)) { |
|
return; |
|
} |
|
/* |
|
* Switch to heap sort if execution |
|
* time is becoming quadratic. |
|
*/ |
|
if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
heapSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Use an inexpensive approximation of the golden ratio |
|
* to select five sample elements and determine pivots. |
|
*/ |
|
int step = (size >> 3) * 3 + 3; |
|
/* |
|
* Five elements around (and including) the central element |
|
* will be used for pivot selection as described below. The |
|
* unequal choice of spacing these elements was empirically |
|
* determined to work well on a wide variety of inputs. |
|
*/ |
|
int e1 = low + step; |
|
int e5 = end - step; |
|
int e3 = (e1 + e5) >>> 1; |
|
int e2 = (e1 + e3) >>> 1; |
|
int e4 = (e3 + e5) >>> 1; |
|
long a3 = a[e3]; |
|
/* |
|
* Sort these elements in place by the combination |
|
* of 4-element sorting network and insertion sort. |
|
* |
|
* 5 ------o-----------o------------ |
|
* | | |
|
* 4 ------|-----o-----o-----o------ |
|
* | | | |
|
* 2 ------o-----|-----o-----o------ |
|
* | | |
|
* 1 ------------o-----o------------ |
|
*/ |
|
if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
|
if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
|
if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
|
if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
|
if (a3 < a[e2]) { |
|
if (a3 < a[e1]) { |
|
a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
|
} else { |
|
a[e3] = a[e2]; a[e2] = a3; |
|
} |
|
} else if (a3 > a[e4]) { |
|
if (a3 > a[e5]) { |
|
a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
|
} else { |
|
a[e3] = a[e4]; a[e4] = a3; |
|
} |
|
} |
|
// Pointers |
|
int lower = low; // The index of the last element of the left part |
|
int upper = end; // The index of the first element of the right part |
|
/* |
|
* Partitioning with 2 pivots in case of different elements. |
|
*/ |
|
if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
|
/* |
|
* Use the first and fifth of the five sorted elements as |
|
* the pivots. These values are inexpensive approximation |
|
* of tertiles. Note, that pivot1 < pivot2. |
|
*/ |
|
long pivot1 = a[e1]; |
|
long pivot2 = a[e5]; |
|
/* |
|
* The first and the last elements to be sorted are moved |
|
* to the locations formerly occupied by the pivots. When |
|
* partitioning is completed, the pivots are swapped back |
|
* into their final positions, and excluded from the next |
|
* subsequent sorting. |
|
*/ |
|
a[e1] = a[lower]; |
|
a[e5] = a[upper]; |
|
/* |
|
* Skip elements, which are less or greater than the pivots. |
|
*/ |
|
while (a[++lower] < pivot1); |
|
while (a[--upper] > pivot2); |
|
/* |
|
* Backward 3-interval partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------------+ |
|
* | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
|
* +------------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot1 |
|
* pivot1 <= all in (k, upper) <= pivot2 |
|
* all in [upper, end) > pivot2 |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int unused = --lower, k = ++upper; --k > lower; ) { |
|
long ak = a[k]; |
|
if (ak < pivot1) { // Move a[k] to the left side |
|
while (lower < k) { |
|
if (a[++lower] >= pivot1) { |
|
if (a[lower] > pivot2) { |
|
a[k] = a[--upper]; |
|
a[upper] = a[lower]; |
|
} else { |
|
a[k] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
break; |
|
} |
|
} |
|
} else if (ak > pivot2) { // Move a[k] to the right side |
|
a[k] = a[--upper]; |
|
a[upper] = ak; |
|
} |
|
} |
|
/* |
|
* Swap the pivots into their final positions. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot1; |
|
a[end] = a[upper]; a[upper] = pivot2; |
|
/* |
|
* Sort non-left parts recursively (possibly in parallel), |
|
* excluding known pivots. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, lower + 1, upper); |
|
sorter.forkSorter(bits | 1, upper + 1, high); |
|
} else { |
|
sort(sorter, a, bits | 1, lower + 1, upper); |
|
sort(sorter, a, bits | 1, upper + 1, high); |
|
} |
|
} else { // Use single pivot in case of many equal elements |
|
/* |
|
* Use the third of the five sorted elements as the pivot. |
|
* This value is inexpensive approximation of the median. |
|
*/ |
|
long pivot = a[e3]; |
|
/* |
|
* The first element to be sorted is moved to the |
|
* location formerly occupied by the pivot. After |
|
* completion of partitioning the pivot is swapped |
|
* back into its final position, and excluded from |
|
* the next subsequent sorting. |
|
*/ |
|
a[e3] = a[lower]; |
|
/* |
|
* Traditional 3-way (Dutch National Flag) partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------+ |
|
* | < pivot | ? | == pivot | > pivot | |
|
* +------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot |
|
* all in (k, upper) == pivot |
|
* all in [upper, end] > pivot |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int k = ++upper; --k > lower; ) { |
|
long ak = a[k]; |
|
if (ak != pivot) { |
|
a[k] = pivot; |
|
if (ak < pivot) { // Move a[k] to the left side |
|
while (a[++lower] < pivot); |
|
if (a[lower] > pivot) { |
|
a[--upper] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
} else { // ak > pivot - Move a[k] to the right side |
|
a[--upper] = ak; |
|
} |
|
} |
|
} |
|
/* |
|
* Swap the pivot into its final position. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot; |
|
/* |
|
* Sort the right part (possibly in parallel), excluding |
|
* known pivot. All elements from the central part are |
|
* equal and therefore already sorted. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, upper, high); |
|
} else { |
|
sort(sorter, a, bits | 1, upper, high); |
|
} |
|
} |
|
high = lower; // Iterate along the left part |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using mixed insertion sort. |
|
* |
|
* Mixed insertion sort is combination of simple insertion sort, |
|
* pin insertion sort and pair insertion sort. |
|
* |
|
* In the context of Dual-Pivot Quicksort, the pivot element |
|
* from the left part plays the role of sentinel, because it |
|
* is less than any elements from the given part. Therefore, |
|
* expensive check of the left range can be skipped on each |
|
* iteration unless it is the leftmost call. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param end the index of the last element for simple insertion sort |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void mixedInsertionSort(long[] a, int low, int end, int high) { |
|
if (end == high) { |
|
/* |
|
* Invoke simple insertion sort on tiny array. |
|
*/ |
|
for (int i; ++low < end; ) { |
|
long ai = a[i = low]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} else { |
|
/* |
|
* Start with pin insertion sort on small part. |
|
* |
|
* Pin insertion sort is extended simple insertion sort. |
|
* The main idea of this sort is to put elements larger |
|
* than an element called pin to the end of array (the |
|
* proper area for such elements). It avoids expensive |
|
* movements of these elements through the whole array. |
|
*/ |
|
long pin = a[end]; |
|
for (int i, p = high; ++low < end; ) { |
|
long ai = a[i = low]; |
|
if (ai < a[i - 1]) { // Small element |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
a[i] = a[--i]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} else if (p > i && ai > pin) { // Large element |
|
/* |
|
* Find element smaller than pin. |
|
*/ |
|
while (a[--p] > pin); |
|
/* |
|
* Swap it with large element. |
|
*/ |
|
if (p > i) { |
|
ai = a[p]; |
|
a[p] = a[i]; |
|
} |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
/* |
|
* Continue with pair insertion sort on remain part. |
|
*/ |
|
for (int i; low < high; ++low) { |
|
long a1 = a[i = low], a2 = a[++low]; |
|
/* |
|
* Insert two elements per iteration: at first, insert the |
|
* larger element and then insert the smaller element, but |
|
* from the position where the larger element was inserted. |
|
*/ |
|
if (a1 > a2) { |
|
while (a1 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a1; |
|
while (a2 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a2; |
|
} else if (a1 < a[i - 1]) { |
|
while (a2 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a2; |
|
while (a1 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a1; |
|
} |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(long[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
long ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using heap sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void heapSort(long[] a, int low, int high) { |
|
for (int k = (low + high) >>> 1; k > low; ) { |
|
pushDown(a, --k, a[k], low, high); |
|
} |
|
while (--high > low) { |
|
long max = a[low]; |
|
pushDown(a, low, a[high], low, high); |
|
a[high] = max; |
|
} |
|
} |
|
/** |
|
* Pushes specified element down during heap sort. |
|
* |
|
* @param a the given array |
|
* @param p the start index |
|
* @param value the given element |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void pushDown(long[] a, int p, long value, int low, int high) { |
|
for (int k ;; a[p] = a[p = k]) { |
|
k = (p << 1) - low + 2; // Index of the right child |
|
if (k > high) { |
|
break; |
|
} |
|
if (k == high || a[k] < a[k - 1]) { |
|
--k; |
|
} |
|
if (a[k] <= value) { |
|
break; |
|
} |
|
} |
|
a[p] = value; |
|
} |
|
/** |
|
* Tries to sort the specified range of the array. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param low the index of the first element to be sorted |
|
* @param size the array size |
|
* @return true if finally sorted, false otherwise |
|
*/ |
|
private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) { |
|
/* |
|
* The run array is constructed only if initial runs are |
|
* long enough to continue, run[i] then holds start index |
|
* of the i-th sequence of elements in non-descending order. |
|
*/ |
|
int[] run = null; |
|
int high = low + size; |
|
int count = 1, last = low; |
|
/* |
|
* Identify all possible runs. |
|
*/ |
|
for (int k = low + 1; k < high; ) { |
|
/* |
|
* Find the end index of the current run. |
|
*/ |
|
if (a[k - 1] < a[k]) { |
|
// Identify ascending sequence |
|
while (++k < high && a[k - 1] <= a[k]); |
|
} else if (a[k - 1] > a[k]) { |
|
// Identify descending sequence |
|
while (++k < high && a[k - 1] >= a[k]); |
|
// Reverse into ascending order |
|
for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
long ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
} |
|
} else { // Identify constant sequence |
|
for (long ak = a[k]; ++k < high && ak == a[k]; ); |
|
if (k < high) { |
|
continue; |
|
} |
|
} |
|
/* |
|
* Check special cases. |
|
*/ |
|
if (run == null) { |
|
if (k == high) { |
|
/* |
|
* The array is monotonous sequence, |
|
* and therefore already sorted. |
|
*/ |
|
return true; |
|
} |
|
if (k - low < MIN_FIRST_RUN_SIZE) { |
|
/* |
|
* The first run is too small |
|
* to proceed with scanning. |
|
*/ |
|
return false; |
|
} |
|
run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
run[0] = low; |
|
} else if (a[last - 1] > a[last]) { |
|
if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
/* |
|
* The first runs are not long |
|
* enough to continue scanning. |
|
*/ |
|
return false; |
|
} |
|
if (++count == MAX_RUN_CAPACITY) { |
|
/* |
|
* Array is not highly structured. |
|
*/ |
|
return false; |
|
} |
|
if (count == run.length) { |
|
/* |
|
* Increase capacity of index array. |
|
*/ |
|
run = Arrays.copyOf(run, count << 1); |
|
} |
|
} |
|
run[count] = (last = k); |
|
} |
|
/* |
|
* Merge runs of highly structured array. |
|
*/ |
|
if (count > 1) { |
|
long[] b; int offset = low; |
|
if (sorter == null || (b = (long[]) sorter.b) == null) { |
|
b = new long[size]; |
|
} else { |
|
offset = sorter.offset; |
|
} |
|
mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
} |
|
return true; |
|
} |
|
/** |
|
* Merges the specified runs. |
|
* |
|
* @param a the source array |
|
* @param b the temporary buffer used in merging |
|
* @param offset the start index in the source, inclusive |
|
* @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
* @param parallel indicates whether merging is performed in parallel |
|
* @param run the start indexes of the runs, inclusive |
|
* @param lo the start index of the first run, inclusive |
|
* @param hi the start index of the last run, inclusive |
|
* @return the destination where runs are merged |
|
*/ |
|
private static long[] mergeRuns(long[] a, long[] b, int offset, |
|
int aim, boolean parallel, int[] run, int lo, int hi) { |
|
if (hi - lo == 1) { |
|
if (aim >= 0) { |
|
return a; |
|
} |
|
for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
b[--j] = a[--i] |
|
); |
|
return b; |
|
} |
|
/* |
|
* Split into approximately equal parts. |
|
*/ |
|
int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
while (run[++mi + 1] <= rmi); |
|
/* |
|
* Merge the left and right parts. |
|
*/ |
|
long[] a1, a2; |
|
if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
a2 = (long[]) merger.getDestination(); |
|
} else { |
|
a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
} |
|
long[] dst = a1 == a ? b : a; |
|
int k = a1 == a ? run[lo] - offset : run[lo]; |
|
int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
if (parallel) { |
|
new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
} else { |
|
mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
} |
|
return dst; |
|
} |
|
/** |
|
* Merges the sorted parts. |
|
* |
|
* @param merger parallel context |
|
* @param dst the destination where parts are merged |
|
* @param k the start index of the destination, inclusive |
|
* @param a1 the first part |
|
* @param lo1 the start index of the first part, inclusive |
|
* @param hi1 the end index of the first part, exclusive |
|
* @param a2 the second part |
|
* @param lo2 the start index of the second part, inclusive |
|
* @param hi2 the end index of the second part, exclusive |
|
*/ |
|
private static void mergeParts(Merger merger, long[] dst, int k, |
|
long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) { |
|
if (merger != null && a1 == a2) { |
|
while (true) { |
|
/* |
|
* The first part must be larger. |
|
*/ |
|
if (hi1 - lo1 < hi2 - lo2) { |
|
int lo = lo1; lo1 = lo2; lo2 = lo; |
|
int hi = hi1; hi1 = hi2; hi2 = hi; |
|
} |
|
/* |
|
* Small parts will be merged sequentially. |
|
*/ |
|
if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
break; |
|
} |
|
/* |
|
* Find the median of the larger part. |
|
*/ |
|
int mi1 = (lo1 + hi1) >>> 1; |
|
long key = a1[mi1]; |
|
int mi2 = hi2; |
|
/* |
|
* Partition the smaller part. |
|
*/ |
|
for (int loo = lo2; loo < mi2; ) { |
|
int t = (loo + mi2) >>> 1; |
|
if (key > a2[t]) { |
|
loo = t + 1; |
|
} else { |
|
mi2 = t; |
|
} |
|
} |
|
int d = mi2 - lo2 + mi1 - lo1; |
|
/* |
|
* Merge the right sub-parts in parallel. |
|
*/ |
|
merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
/* |
|
* Process the sub-left parts. |
|
*/ |
|
hi1 = mi1; |
|
hi2 = mi2; |
|
} |
|
} |
|
/* |
|
* Merge small parts sequentially. |
|
*/ |
|
while (lo1 < hi1 && lo2 < hi2) { |
|
dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
} |
|
if (dst != a1 || k < lo1) { |
|
while (lo1 < hi1) { |
|
dst[k++] = a1[lo1++]; |
|
} |
|
} |
|
if (dst != a2 || k < lo2) { |
|
while (lo2 < hi2) { |
|
dst[k++] = a2[lo2++]; |
|
} |
|
} |
|
} |
|
// [byte] |
|
/** |
|
* Sorts the specified range of the array using |
|
* counting sort or insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(byte[] a, int low, int high) { |
|
if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) { |
|
countingSort(a, low, high); |
|
} else { |
|
insertionSort(a, low, high); |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(byte[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
byte ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* The number of distinct byte values. |
|
*/ |
|
private static final int NUM_BYTE_VALUES = 1 << 8; |
|
/** |
|
* Max index of byte counter. |
|
*/ |
|
private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1; |
|
/** |
|
* Sorts the specified range of the array using counting sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void countingSort(byte[] a, int low, int high) { |
|
int[] count = new int[NUM_BYTE_VALUES]; |
|
/* |
|
* Compute a histogram with the number of each values. |
|
*/ |
|
for (int i = high; i > low; ++count[a[--i] & 0xFF]); |
|
/* |
|
* Place values on their final positions. |
|
*/ |
|
if (high - low > NUM_BYTE_VALUES) { |
|
for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) { |
|
int value = i & 0xFF; |
|
for (low = high - count[value]; high > low; |
|
a[--high] = (byte) value |
|
); |
|
} |
|
} else { |
|
for (int i = MAX_BYTE_INDEX; high > low; ) { |
|
while (count[--i & 0xFF] == 0); |
|
int value = i & 0xFF; |
|
int c = count[value]; |
|
do { |
|
a[--high] = (byte) value; |
|
} while (--c > 0); |
|
} |
|
} |
|
} |
|
// [char] |
|
/** |
|
* Sorts the specified range of the array using |
|
* counting sort or Dual-Pivot Quicksort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(char[] a, int low, int high) { |
|
if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { |
|
countingSort(a, low, high); |
|
} else { |
|
sort(a, 0, low, high); |
|
} |
|
} |
|
/** |
|
* Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
* other sorts in special-cases, possibly with parallel partitions. |
|
* |
|
* @param a the array to be sorted |
|
* @param bits the combination of recursion depth and bit flag, where |
|
* the right bit "0" indicates that array is the leftmost part |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(char[] a, int bits, int low, int high) { |
|
while (true) { |
|
int end = high - 1, size = high - low; |
|
/* |
|
* Invoke insertion sort on small leftmost part. |
|
*/ |
|
if (size < MAX_INSERTION_SORT_SIZE) { |
|
insertionSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Switch to counting sort if execution |
|
* time is becoming quadratic. |
|
*/ |
|
if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
countingSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Use an inexpensive approximation of the golden ratio |
|
* to select five sample elements and determine pivots. |
|
*/ |
|
int step = (size >> 3) * 3 + 3; |
|
/* |
|
* Five elements around (and including) the central element |
|
* will be used for pivot selection as described below. The |
|
* unequal choice of spacing these elements was empirically |
|
* determined to work well on a wide variety of inputs. |
|
*/ |
|
int e1 = low + step; |
|
int e5 = end - step; |
|
int e3 = (e1 + e5) >>> 1; |
|
int e2 = (e1 + e3) >>> 1; |
|
int e4 = (e3 + e5) >>> 1; |
|
char a3 = a[e3]; |
|
/* |
|
* Sort these elements in place by the combination |
|
* of 4-element sorting network and insertion sort. |
|
* |
|
* 5 ------o-----------o------------ |
|
* | | |
|
* 4 ------|-----o-----o-----o------ |
|
* | | | |
|
* 2 ------o-----|-----o-----o------ |
|
* | | |
|
* 1 ------------o-----o------------ |
|
*/ |
|
if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
|
if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
|
if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
|
if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
|
if (a3 < a[e2]) { |
|
if (a3 < a[e1]) { |
|
a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
|
} else { |
|
a[e3] = a[e2]; a[e2] = a3; |
|
} |
|
} else if (a3 > a[e4]) { |
|
if (a3 > a[e5]) { |
|
a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
|
} else { |
|
a[e3] = a[e4]; a[e4] = a3; |
|
} |
|
} |
|
// Pointers |
|
int lower = low; // The index of the last element of the left part |
|
int upper = end; // The index of the first element of the right part |
|
/* |
|
* Partitioning with 2 pivots in case of different elements. |
|
*/ |
|
if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
|
/* |
|
* Use the first and fifth of the five sorted elements as |
|
* the pivots. These values are inexpensive approximation |
|
* of tertiles. Note, that pivot1 < pivot2. |
|
*/ |
|
char pivot1 = a[e1]; |
|
char pivot2 = a[e5]; |
|
/* |
|
* The first and the last elements to be sorted are moved |
|
* to the locations formerly occupied by the pivots. When |
|
* partitioning is completed, the pivots are swapped back |
|
* into their final positions, and excluded from the next |
|
* subsequent sorting. |
|
*/ |
|
a[e1] = a[lower]; |
|
a[e5] = a[upper]; |
|
/* |
|
* Skip elements, which are less or greater than the pivots. |
|
*/ |
|
while (a[++lower] < pivot1); |
|
while (a[--upper] > pivot2); |
|
/* |
|
* Backward 3-interval partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------------+ |
|
* | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
|
* +------------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot1 |
|
* pivot1 <= all in (k, upper) <= pivot2 |
|
* all in [upper, end) > pivot2 |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int unused = --lower, k = ++upper; --k > lower; ) { |
|
char ak = a[k]; |
|
if (ak < pivot1) { // Move a[k] to the left side |
|
while (lower < k) { |
|
if (a[++lower] >= pivot1) { |
|
if (a[lower] > pivot2) { |
|
a[k] = a[--upper]; |
|
a[upper] = a[lower]; |
|
} else { |
|
a[k] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
break; |
|
} |
|
} |
|
} else if (ak > pivot2) { // Move a[k] to the right side |
|
a[k] = a[--upper]; |
|
a[upper] = ak; |
|
} |
|
} |
|
/* |
|
* Swap the pivots into their final positions. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot1; |
|
a[end] = a[upper]; a[upper] = pivot2; |
|
/* |
|
* Sort non-left parts recursively, |
|
* excluding known pivots. |
|
*/ |
|
sort(a, bits | 1, lower + 1, upper); |
|
sort(a, bits | 1, upper + 1, high); |
|
} else { // Use single pivot in case of many equal elements |
|
/* |
|
* Use the third of the five sorted elements as the pivot. |
|
* This value is inexpensive approximation of the median. |
|
*/ |
|
char pivot = a[e3]; |
|
/* |
|
* The first element to be sorted is moved to the |
|
* location formerly occupied by the pivot. After |
|
* completion of partitioning the pivot is swapped |
|
* back into its final position, and excluded from |
|
* the next subsequent sorting. |
|
*/ |
|
a[e3] = a[lower]; |
|
/* |
|
* Traditional 3-way (Dutch National Flag) partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------+ |
|
* | < pivot | ? | == pivot | > pivot | |
|
* +------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot |
|
* all in (k, upper) == pivot |
|
* all in [upper, end] > pivot |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int k = ++upper; --k > lower; ) { |
|
char ak = a[k]; |
|
if (ak != pivot) { |
|
a[k] = pivot; |
|
if (ak < pivot) { // Move a[k] to the left side |
|
while (a[++lower] < pivot); |
|
if (a[lower] > pivot) { |
|
a[--upper] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
} else { // ak > pivot - Move a[k] to the right side |
|
a[--upper] = ak; |
|
} |
|
} |
|
} |
|
/* |
|
* Swap the pivot into its final position. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot; |
|
/* |
|
* Sort the right part, excluding known pivot. |
|
* All elements from the central part are |
|
* equal and therefore already sorted. |
|
*/ |
|
sort(a, bits | 1, upper, high); |
|
} |
|
high = lower; // Iterate along the left part |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(char[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
char ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* The number of distinct char values. |
|
*/ |
|
private static final int NUM_CHAR_VALUES = 1 << 16; |
|
/** |
|
* Sorts the specified range of the array using counting sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void countingSort(char[] a, int low, int high) { |
|
int[] count = new int[NUM_CHAR_VALUES]; |
|
/* |
|
* Compute a histogram with the number of each values. |
|
*/ |
|
for (int i = high; i > low; ++count[a[--i]]); |
|
/* |
|
* Place values on their final positions. |
|
*/ |
|
if (high - low > NUM_CHAR_VALUES) { |
|
for (int i = NUM_CHAR_VALUES; i > 0; ) { |
|
for (low = high - count[--i]; high > low; |
|
a[--high] = (char) i |
|
); |
|
} |
|
} else { |
|
for (int i = NUM_CHAR_VALUES; high > low; ) { |
|
while (count[--i] == 0); |
|
int c = count[i]; |
|
do { |
|
a[--high] = (char) i; |
|
} while (--c > 0); |
|
} |
|
} |
|
} |
|
// [short] |
|
/** |
|
* Sorts the specified range of the array using |
|
* counting sort or Dual-Pivot Quicksort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(short[] a, int low, int high) { |
|
if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { |
|
countingSort(a, low, high); |
|
} else { |
|
sort(a, 0, low, high); |
|
} |
|
} |
|
/** |
|
* Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
* other sorts in special-cases, possibly with parallel partitions. |
|
* |
|
* @param a the array to be sorted |
|
* @param bits the combination of recursion depth and bit flag, where |
|
* the right bit "0" indicates that array is the leftmost part |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(short[] a, int bits, int low, int high) { |
|
while (true) { |
|
int end = high - 1, size = high - low; |
|
/* |
|
* Invoke insertion sort on small leftmost part. |
|
*/ |
|
if (size < MAX_INSERTION_SORT_SIZE) { |
|
insertionSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Switch to counting sort if execution |
|
* time is becoming quadratic. |
|
*/ |
|
if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
countingSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Use an inexpensive approximation of the golden ratio |
|
* to select five sample elements and determine pivots. |
|
*/ |
|
int step = (size >> 3) * 3 + 3; |
|
/* |
|
* Five elements around (and including) the central element |
|
* will be used for pivot selection as described below. The |
|
* unequal choice of spacing these elements was empirically |
|
* determined to work well on a wide variety of inputs. |
|
*/ |
|
int e1 = low + step; |
|
int e5 = end - step; |
|
int e3 = (e1 + e5) >>> 1; |
|
int e2 = (e1 + e3) >>> 1; |
|
int e4 = (e3 + e5) >>> 1; |
|
short a3 = a[e3]; |
|
/* |
|
* Sort these elements in place by the combination |
|
* of 4-element sorting network and insertion sort. |
|
* |
|
* 5 ------o-----------o------------ |
|
* | | |
|
* 4 ------|-----o-----o-----o------ |
|
* | | | |
|
* 2 ------o-----|-----o-----o------ |
|
* | | |
|
* 1 ------------o-----o------------ |
|
*/ |
|
if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
|
if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
|
if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
|
if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
|
if (a3 < a[e2]) { |
|
if (a3 < a[e1]) { |
|
a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
|
} else { |
|
a[e3] = a[e2]; a[e2] = a3; |
|
} |
|
} else if (a3 > a[e4]) { |
|
if (a3 > a[e5]) { |
|
a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
|
} else { |
|
a[e3] = a[e4]; a[e4] = a3; |
|
} |
|
} |
|
// Pointers |
|
int lower = low; // The index of the last element of the left part |
|
int upper = end; // The index of the first element of the right part |
|
/* |
|
* Partitioning with 2 pivots in case of different elements. |
|
*/ |
|
if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
|
/* |
|
* Use the first and fifth of the five sorted elements as |
|
* the pivots. These values are inexpensive approximation |
|
* of tertiles. Note, that pivot1 < pivot2. |
|
*/ |
|
short pivot1 = a[e1]; |
|
short pivot2 = a[e5]; |
|
/* |
|
* The first and the last elements to be sorted are moved |
|
* to the locations formerly occupied by the pivots. When |
|
* partitioning is completed, the pivots are swapped back |
|
* into their final positions, and excluded from the next |
|
* subsequent sorting. |
|
*/ |
|
a[e1] = a[lower]; |
|
a[e5] = a[upper]; |
|
/* |
|
* Skip elements, which are less or greater than the pivots. |
|
*/ |
|
while (a[++lower] < pivot1); |
|
while (a[--upper] > pivot2); |
|
/* |
|
* Backward 3-interval partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------------+ |
|
* | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
|
* +------------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot1 |
|
* pivot1 <= all in (k, upper) <= pivot2 |
|
* all in [upper, end) > pivot2 |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int unused = --lower, k = ++upper; --k > lower; ) { |
|
short ak = a[k]; |
|
if (ak < pivot1) { // Move a[k] to the left side |
|
while (lower < k) { |
|
if (a[++lower] >= pivot1) { |
|
if (a[lower] > pivot2) { |
|
a[k] = a[--upper]; |
|
a[upper] = a[lower]; |
|
} else { |
|
a[k] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
break; |
|
} |
|
} |
|
} else if (ak > pivot2) { // Move a[k] to the right side |
|
a[k] = a[--upper]; |
|
a[upper] = ak; |
|
} |
|
} |
|
/* |
|
* Swap the pivots into their final positions. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot1; |
|
a[end] = a[upper]; a[upper] = pivot2; |
|
/* |
|
* Sort non-left parts recursively, |
|
* excluding known pivots. |
|
*/ |
|
sort(a, bits | 1, lower + 1, upper); |
|
sort(a, bits | 1, upper + 1, high); |
|
} else { // Use single pivot in case of many equal elements |
|
/* |
|
* Use the third of the five sorted elements as the pivot. |
|
* This value is inexpensive approximation of the median. |
|
*/ |
|
short pivot = a[e3]; |
|
/* |
|
* The first element to be sorted is moved to the |
|
* location formerly occupied by the pivot. After |
|
* completion of partitioning the pivot is swapped |
|
* back into its final position, and excluded from |
|
* the next subsequent sorting. |
|
*/ |
|
a[e3] = a[lower]; |
|
/* |
|
* Traditional 3-way (Dutch National Flag) partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------+ |
|
* | < pivot | ? | == pivot | > pivot | |
|
* +------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot |
|
* all in (k, upper) == pivot |
|
* all in [upper, end] > pivot |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int k = ++upper; --k > lower; ) { |
|
short ak = a[k]; |
|
if (ak != pivot) { |
|
a[k] = pivot; |
|
if (ak < pivot) { // Move a[k] to the left side |
|
while (a[++lower] < pivot); |
|
if (a[lower] > pivot) { |
|
a[--upper] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
} else { // ak > pivot - Move a[k] to the right side |
|
a[--upper] = ak; |
|
} |
|
} |
|
} |
|
/* |
|
* Swap the pivot into its final position. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot; |
|
/* |
|
* Sort the right part, excluding known pivot. |
|
* All elements from the central part are |
|
* equal and therefore already sorted. |
|
*/ |
|
sort(a, bits | 1, upper, high); |
|
} |
|
high = lower; // Iterate along the left part |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(short[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
short ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* The number of distinct short values. |
|
*/ |
|
private static final int NUM_SHORT_VALUES = 1 << 16; |
|
/** |
|
* Max index of short counter. |
|
*/ |
|
private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1; |
|
/** |
|
* Sorts the specified range of the array using counting sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void countingSort(short[] a, int low, int high) { |
|
int[] count = new int[NUM_SHORT_VALUES]; |
|
/* |
|
* Compute a histogram with the number of each values. |
|
*/ |
|
for (int i = high; i > low; ++count[a[--i] & 0xFFFF]); |
|
/* |
|
* Place values on their final positions. |
|
*/ |
|
if (high - low > NUM_SHORT_VALUES) { |
|
for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) { |
|
int value = i & 0xFFFF; |
|
for (low = high - count[value]; high > low; |
|
a[--high] = (short) value |
|
); |
|
} |
|
} else { |
|
for (int i = MAX_SHORT_INDEX; high > low; ) { |
|
while (count[--i & 0xFFFF] == 0); |
|
int value = i & 0xFFFF; |
|
int c = count[value]; |
|
do { |
|
a[--high] = (short) value; |
|
} while (--c > 0); |
|
} |
|
} |
|
} |
|
// [float] |
|
/** |
|
* Sorts the specified range of the array using parallel merge |
|
* sort and/or Dual-Pivot Quicksort. |
|
* |
|
* To balance the faster splitting and parallelism of merge sort |
|
* with the faster element partitioning of Quicksort, ranges are |
|
* subdivided in tiers such that, if there is enough parallelism, |
|
* the four-way parallel merge is started, still ensuring enough |
|
* parallelism to process the partitions. |
|
* |
|
* @param a the array to be sorted |
|
* @param parallelism the parallelism level |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(float[] a, int parallelism, int low, int high) { |
|
/* |
|
* Phase 1. Count the number of negative zero -0.0f, |
|
* turn them into positive zero, and move all NaNs |
|
* to the end of the array. |
|
*/ |
|
int numNegativeZero = 0; |
|
for (int k = high; k > low; ) { |
|
float ak = a[--k]; |
|
if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f |
|
numNegativeZero += 1; |
|
a[k] = 0.0f; |
|
} else if (ak != ak) { // ak is NaN |
|
a[k] = a[--high]; |
|
a[high] = ak; |
|
} |
|
} |
|
/* |
|
* Phase 2. Sort everything except NaNs, |
|
* which are already in place. |
|
*/ |
|
int size = high - low; |
|
if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
int depth = getDepth(parallelism, size >> 12); |
|
float[] b = depth == 0 ? null : new float[size]; |
|
new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
} else { |
|
sort(null, a, 0, low, high); |
|
} |
|
/* |
|
* Phase 3. Turn positive zero 0.0f |
|
* back into negative zero -0.0f. |
|
*/ |
|
if (++numNegativeZero == 1) { |
|
return; |
|
} |
|
/* |
|
* Find the position one less than |
|
* the index of the first zero. |
|
*/ |
|
while (low <= high) { |
|
int middle = (low + high) >>> 1; |
|
if (a[middle] < 0) { |
|
low = middle + 1; |
|
} else { |
|
high = middle - 1; |
|
} |
|
} |
|
/* |
|
* Replace the required number of 0.0f by -0.0f. |
|
*/ |
|
while (--numNegativeZero > 0) { |
|
a[++high] = -0.0f; |
|
} |
|
} |
|
/** |
|
* Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
* other sorts in special-cases, possibly with parallel partitions. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param bits the combination of recursion depth and bit flag, where |
|
* the right bit "0" indicates that array is the leftmost part |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(Sorter sorter, float[] a, int bits, int low, int high) { |
|
while (true) { |
|
int end = high - 1, size = high - low; |
|
/* |
|
* Run mixed insertion sort on small non-leftmost parts. |
|
*/ |
|
if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
return; |
|
} |
|
/* |
|
* Invoke insertion sort on small leftmost part. |
|
*/ |
|
if (size < MAX_INSERTION_SORT_SIZE) { |
|
insertionSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Check if the whole array or large non-leftmost |
|
* parts are nearly sorted and then merge runs. |
|
*/ |
|
if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
&& tryMergeRuns(sorter, a, low, size)) { |
|
return; |
|
} |
|
/* |
|
* Switch to heap sort if execution |
|
* time is becoming quadratic. |
|
*/ |
|
if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
heapSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Use an inexpensive approximation of the golden ratio |
|
* to select five sample elements and determine pivots. |
|
*/ |
|
int step = (size >> 3) * 3 + 3; |
|
/* |
|
* Five elements around (and including) the central element |
|
* will be used for pivot selection as described below. The |
|
* unequal choice of spacing these elements was empirically |
|
* determined to work well on a wide variety of inputs. |
|
*/ |
|
int e1 = low + step; |
|
int e5 = end - step; |
|
int e3 = (e1 + e5) >>> 1; |
|
int e2 = (e1 + e3) >>> 1; |
|
int e4 = (e3 + e5) >>> 1; |
|
float a3 = a[e3]; |
|
/* |
|
* Sort these elements in place by the combination |
|
* of 4-element sorting network and insertion sort. |
|
* |
|
* 5 ------o-----------o------------ |
|
* | | |
|
* 4 ------|-----o-----o-----o------ |
|
* | | | |
|
* 2 ------o-----|-----o-----o------ |
|
* | | |
|
* 1 ------------o-----o------------ |
|
*/ |
|
if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
|
if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
|
if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
|
if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
|
if (a3 < a[e2]) { |
|
if (a3 < a[e1]) { |
|
a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
|
} else { |
|
a[e3] = a[e2]; a[e2] = a3; |
|
} |
|
} else if (a3 > a[e4]) { |
|
if (a3 > a[e5]) { |
|
a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
|
} else { |
|
a[e3] = a[e4]; a[e4] = a3; |
|
} |
|
} |
|
// Pointers |
|
int lower = low; // The index of the last element of the left part |
|
int upper = end; // The index of the first element of the right part |
|
/* |
|
* Partitioning with 2 pivots in case of different elements. |
|
*/ |
|
if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
|
/* |
|
* Use the first and fifth of the five sorted elements as |
|
* the pivots. These values are inexpensive approximation |
|
* of tertiles. Note, that pivot1 < pivot2. |
|
*/ |
|
float pivot1 = a[e1]; |
|
float pivot2 = a[e5]; |
|
/* |
|
* The first and the last elements to be sorted are moved |
|
* to the locations formerly occupied by the pivots. When |
|
* partitioning is completed, the pivots are swapped back |
|
* into their final positions, and excluded from the next |
|
* subsequent sorting. |
|
*/ |
|
a[e1] = a[lower]; |
|
a[e5] = a[upper]; |
|
/* |
|
* Skip elements, which are less or greater than the pivots. |
|
*/ |
|
while (a[++lower] < pivot1); |
|
while (a[--upper] > pivot2); |
|
/* |
|
* Backward 3-interval partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------------+ |
|
* | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
|
* +------------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot1 |
|
* pivot1 <= all in (k, upper) <= pivot2 |
|
* all in [upper, end) > pivot2 |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int unused = --lower, k = ++upper; --k > lower; ) { |
|
float ak = a[k]; |
|
if (ak < pivot1) { // Move a[k] to the left side |
|
while (lower < k) { |
|
if (a[++lower] >= pivot1) { |
|
if (a[lower] > pivot2) { |
|
a[k] = a[--upper]; |
|
a[upper] = a[lower]; |
|
} else { |
|
a[k] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
break; |
|
} |
|
} |
|
} else if (ak > pivot2) { // Move a[k] to the right side |
|
a[k] = a[--upper]; |
|
a[upper] = ak; |
|
} |
|
} |
|
/* |
|
* Swap the pivots into their final positions. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot1; |
|
a[end] = a[upper]; a[upper] = pivot2; |
|
/* |
|
* Sort non-left parts recursively (possibly in parallel), |
|
* excluding known pivots. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, lower + 1, upper); |
|
sorter.forkSorter(bits | 1, upper + 1, high); |
|
} else { |
|
sort(sorter, a, bits | 1, lower + 1, upper); |
|
sort(sorter, a, bits | 1, upper + 1, high); |
|
} |
|
} else { // Use single pivot in case of many equal elements |
|
/* |
|
* Use the third of the five sorted elements as the pivot. |
|
* This value is inexpensive approximation of the median. |
|
*/ |
|
float pivot = a[e3]; |
|
/* |
|
* The first element to be sorted is moved to the |
|
* location formerly occupied by the pivot. After |
|
* completion of partitioning the pivot is swapped |
|
* back into its final position, and excluded from |
|
* the next subsequent sorting. |
|
*/ |
|
a[e3] = a[lower]; |
|
/* |
|
* Traditional 3-way (Dutch National Flag) partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------+ |
|
* | < pivot | ? | == pivot | > pivot | |
|
* +------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot |
|
* all in (k, upper) == pivot |
|
* all in [upper, end] > pivot |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int k = ++upper; --k > lower; ) { |
|
float ak = a[k]; |
|
if (ak != pivot) { |
|
a[k] = pivot; |
|
if (ak < pivot) { // Move a[k] to the left side |
|
while (a[++lower] < pivot); |
|
if (a[lower] > pivot) { |
|
a[--upper] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
} else { // ak > pivot - Move a[k] to the right side |
|
a[--upper] = ak; |
|
} |
|
} |
|
} |
|
/* |
|
* Swap the pivot into its final position. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot; |
|
/* |
|
* Sort the right part (possibly in parallel), excluding |
|
* known pivot. All elements from the central part are |
|
* equal and therefore already sorted. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, upper, high); |
|
} else { |
|
sort(sorter, a, bits | 1, upper, high); |
|
} |
|
} |
|
high = lower; // Iterate along the left part |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using mixed insertion sort. |
|
* |
|
* Mixed insertion sort is combination of simple insertion sort, |
|
* pin insertion sort and pair insertion sort. |
|
* |
|
* In the context of Dual-Pivot Quicksort, the pivot element |
|
* from the left part plays the role of sentinel, because it |
|
* is less than any elements from the given part. Therefore, |
|
* expensive check of the left range can be skipped on each |
|
* iteration unless it is the leftmost call. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param end the index of the last element for simple insertion sort |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void mixedInsertionSort(float[] a, int low, int end, int high) { |
|
if (end == high) { |
|
/* |
|
* Invoke simple insertion sort on tiny array. |
|
*/ |
|
for (int i; ++low < end; ) { |
|
float ai = a[i = low]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} else { |
|
/* |
|
* Start with pin insertion sort on small part. |
|
* |
|
* Pin insertion sort is extended simple insertion sort. |
|
* The main idea of this sort is to put elements larger |
|
* than an element called pin to the end of array (the |
|
* proper area for such elements). It avoids expensive |
|
* movements of these elements through the whole array. |
|
*/ |
|
float pin = a[end]; |
|
for (int i, p = high; ++low < end; ) { |
|
float ai = a[i = low]; |
|
if (ai < a[i - 1]) { // Small element |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
a[i] = a[--i]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} else if (p > i && ai > pin) { // Large element |
|
/* |
|
* Find element smaller than pin. |
|
*/ |
|
while (a[--p] > pin); |
|
/* |
|
* Swap it with large element. |
|
*/ |
|
if (p > i) { |
|
ai = a[p]; |
|
a[p] = a[i]; |
|
} |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
/* |
|
* Continue with pair insertion sort on remain part. |
|
*/ |
|
for (int i; low < high; ++low) { |
|
float a1 = a[i = low], a2 = a[++low]; |
|
/* |
|
* Insert two elements per iteration: at first, insert the |
|
* larger element and then insert the smaller element, but |
|
* from the position where the larger element was inserted. |
|
*/ |
|
if (a1 > a2) { |
|
while (a1 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a1; |
|
while (a2 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a2; |
|
} else if (a1 < a[i - 1]) { |
|
while (a2 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a2; |
|
while (a1 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a1; |
|
} |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(float[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
float ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using heap sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void heapSort(float[] a, int low, int high) { |
|
for (int k = (low + high) >>> 1; k > low; ) { |
|
pushDown(a, --k, a[k], low, high); |
|
} |
|
while (--high > low) { |
|
float max = a[low]; |
|
pushDown(a, low, a[high], low, high); |
|
a[high] = max; |
|
} |
|
} |
|
/** |
|
* Pushes specified element down during heap sort. |
|
* |
|
* @param a the given array |
|
* @param p the start index |
|
* @param value the given element |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void pushDown(float[] a, int p, float value, int low, int high) { |
|
for (int k ;; a[p] = a[p = k]) { |
|
k = (p << 1) - low + 2; // Index of the right child |
|
if (k > high) { |
|
break; |
|
} |
|
if (k == high || a[k] < a[k - 1]) { |
|
--k; |
|
} |
|
if (a[k] <= value) { |
|
break; |
|
} |
|
} |
|
a[p] = value; |
|
} |
|
/** |
|
* Tries to sort the specified range of the array. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param low the index of the first element to be sorted |
|
* @param size the array size |
|
* @return true if finally sorted, false otherwise |
|
*/ |
|
private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) { |
|
/* |
|
* The run array is constructed only if initial runs are |
|
* long enough to continue, run[i] then holds start index |
|
* of the i-th sequence of elements in non-descending order. |
|
*/ |
|
int[] run = null; |
|
int high = low + size; |
|
int count = 1, last = low; |
|
/* |
|
* Identify all possible runs. |
|
*/ |
|
for (int k = low + 1; k < high; ) { |
|
/* |
|
* Find the end index of the current run. |
|
*/ |
|
if (a[k - 1] < a[k]) { |
|
// Identify ascending sequence |
|
while (++k < high && a[k - 1] <= a[k]); |
|
} else if (a[k - 1] > a[k]) { |
|
// Identify descending sequence |
|
while (++k < high && a[k - 1] >= a[k]); |
|
// Reverse into ascending order |
|
for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
float ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
} |
|
} else { // Identify constant sequence |
|
for (float ak = a[k]; ++k < high && ak == a[k]; ); |
|
if (k < high) { |
|
continue; |
|
} |
|
} |
|
/* |
|
* Check special cases. |
|
*/ |
|
if (run == null) { |
|
if (k == high) { |
|
/* |
|
* The array is monotonous sequence, |
|
* and therefore already sorted. |
|
*/ |
|
return true; |
|
} |
|
if (k - low < MIN_FIRST_RUN_SIZE) { |
|
/* |
|
* The first run is too small |
|
* to proceed with scanning. |
|
*/ |
|
return false; |
|
} |
|
run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
run[0] = low; |
|
} else if (a[last - 1] > a[last]) { |
|
if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
/* |
|
* The first runs are not long |
|
* enough to continue scanning. |
|
*/ |
|
return false; |
|
} |
|
if (++count == MAX_RUN_CAPACITY) { |
|
/* |
|
* Array is not highly structured. |
|
*/ |
|
return false; |
|
} |
|
if (count == run.length) { |
|
/* |
|
* Increase capacity of index array. |
|
*/ |
|
run = Arrays.copyOf(run, count << 1); |
|
} |
|
} |
|
run[count] = (last = k); |
|
} |
|
/* |
|
* Merge runs of highly structured array. |
|
*/ |
|
if (count > 1) { |
|
float[] b; int offset = low; |
|
if (sorter == null || (b = (float[]) sorter.b) == null) { |
|
b = new float[size]; |
|
} else { |
|
offset = sorter.offset; |
|
} |
|
mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
} |
|
return true; |
|
} |
|
/** |
|
* Merges the specified runs. |
|
* |
|
* @param a the source array |
|
* @param b the temporary buffer used in merging |
|
* @param offset the start index in the source, inclusive |
|
* @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
* @param parallel indicates whether merging is performed in parallel |
|
* @param run the start indexes of the runs, inclusive |
|
* @param lo the start index of the first run, inclusive |
|
* @param hi the start index of the last run, inclusive |
|
* @return the destination where runs are merged |
|
*/ |
|
private static float[] mergeRuns(float[] a, float[] b, int offset, |
|
int aim, boolean parallel, int[] run, int lo, int hi) { |
|
if (hi - lo == 1) { |
|
if (aim >= 0) { |
|
return a; |
|
} |
|
for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
b[--j] = a[--i] |
|
); |
|
return b; |
|
} |
|
/* |
|
* Split into approximately equal parts. |
|
*/ |
|
int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
while (run[++mi + 1] <= rmi); |
|
/* |
|
* Merge the left and right parts. |
|
*/ |
|
float[] a1, a2; |
|
if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
a2 = (float[]) merger.getDestination(); |
|
} else { |
|
a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
} |
|
float[] dst = a1 == a ? b : a; |
|
int k = a1 == a ? run[lo] - offset : run[lo]; |
|
int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
if (parallel) { |
|
new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
} else { |
|
mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
} |
|
return dst; |
|
} |
|
/** |
|
* Merges the sorted parts. |
|
* |
|
* @param merger parallel context |
|
* @param dst the destination where parts are merged |
|
* @param k the start index of the destination, inclusive |
|
* @param a1 the first part |
|
* @param lo1 the start index of the first part, inclusive |
|
* @param hi1 the end index of the first part, exclusive |
|
* @param a2 the second part |
|
* @param lo2 the start index of the second part, inclusive |
|
* @param hi2 the end index of the second part, exclusive |
|
*/ |
|
private static void mergeParts(Merger merger, float[] dst, int k, |
|
float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) { |
|
if (merger != null && a1 == a2) { |
|
while (true) { |
|
/* |
|
* The first part must be larger. |
|
*/ |
|
if (hi1 - lo1 < hi2 - lo2) { |
|
int lo = lo1; lo1 = lo2; lo2 = lo; |
|
int hi = hi1; hi1 = hi2; hi2 = hi; |
|
} |
|
/* |
|
* Small parts will be merged sequentially. |
|
*/ |
|
if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
break; |
|
} |
|
/* |
|
* Find the median of the larger part. |
|
*/ |
|
int mi1 = (lo1 + hi1) >>> 1; |
|
float key = a1[mi1]; |
|
int mi2 = hi2; |
|
/* |
|
* Partition the smaller part. |
|
*/ |
|
for (int loo = lo2; loo < mi2; ) { |
|
int t = (loo + mi2) >>> 1; |
|
if (key > a2[t]) { |
|
loo = t + 1; |
|
} else { |
|
mi2 = t; |
|
} |
|
} |
|
int d = mi2 - lo2 + mi1 - lo1; |
|
/* |
|
* Merge the right sub-parts in parallel. |
|
*/ |
|
merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
/* |
|
* Process the sub-left parts. |
|
*/ |
|
hi1 = mi1; |
|
hi2 = mi2; |
|
} |
|
} |
|
/* |
|
* Merge small parts sequentially. |
|
*/ |
|
while (lo1 < hi1 && lo2 < hi2) { |
|
dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
} |
|
if (dst != a1 || k < lo1) { |
|
while (lo1 < hi1) { |
|
dst[k++] = a1[lo1++]; |
|
} |
|
} |
|
if (dst != a2 || k < lo2) { |
|
while (lo2 < hi2) { |
|
dst[k++] = a2[lo2++]; |
|
} |
|
} |
|
} |
|
// [double] |
|
/** |
|
* Sorts the specified range of the array using parallel merge |
|
* sort and/or Dual-Pivot Quicksort. |
|
* |
|
* To balance the faster splitting and parallelism of merge sort |
|
* with the faster element partitioning of Quicksort, ranges are |
|
* subdivided in tiers such that, if there is enough parallelism, |
|
* the four-way parallel merge is started, still ensuring enough |
|
* parallelism to process the partitions. |
|
* |
|
* @param a the array to be sorted |
|
* @param parallelism the parallelism level |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(double[] a, int parallelism, int low, int high) { |
|
/* |
|
* Phase 1. Count the number of negative zero -0.0d, |
|
* turn them into positive zero, and move all NaNs |
|
* to the end of the array. |
|
*/ |
|
int numNegativeZero = 0; |
|
for (int k = high; k > low; ) { |
|
double ak = a[--k]; |
|
if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d |
|
numNegativeZero += 1; |
|
a[k] = 0.0d; |
|
} else if (ak != ak) { // ak is NaN |
|
a[k] = a[--high]; |
|
a[high] = ak; |
|
} |
|
} |
|
/* |
|
* Phase 2. Sort everything except NaNs, |
|
* which are already in place. |
|
*/ |
|
int size = high - low; |
|
if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
int depth = getDepth(parallelism, size >> 12); |
|
double[] b = depth == 0 ? null : new double[size]; |
|
new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
} else { |
|
sort(null, a, 0, low, high); |
|
} |
|
/* |
|
* Phase 3. Turn positive zero 0.0d |
|
* back into negative zero -0.0d. |
|
*/ |
|
if (++numNegativeZero == 1) { |
|
return; |
|
} |
|
/* |
|
* Find the position one less than |
|
* the index of the first zero. |
|
*/ |
|
while (low <= high) { |
|
int middle = (low + high) >>> 1; |
|
if (a[middle] < 0) { |
|
low = middle + 1; |
|
} else { |
|
high = middle - 1; |
|
} |
|
} |
|
/* |
|
* Replace the required number of 0.0d by -0.0d. |
|
*/ |
|
while (--numNegativeZero > 0) { |
|
a[++high] = -0.0d; |
|
} |
|
} |
|
/** |
|
* Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
* other sorts in special-cases, possibly with parallel partitions. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param bits the combination of recursion depth and bit flag, where |
|
* the right bit "0" indicates that array is the leftmost part |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
static void sort(Sorter sorter, double[] a, int bits, int low, int high) { |
|
while (true) { |
|
int end = high - 1, size = high - low; |
|
/* |
|
* Run mixed insertion sort on small non-leftmost parts. |
|
*/ |
|
if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
return; |
|
} |
|
/* |
|
* Invoke insertion sort on small leftmost part. |
|
*/ |
|
if (size < MAX_INSERTION_SORT_SIZE) { |
|
insertionSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Check if the whole array or large non-leftmost |
|
* parts are nearly sorted and then merge runs. |
|
*/ |
|
if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
&& tryMergeRuns(sorter, a, low, size)) { |
|
return; |
|
} |
|
/* |
|
* Switch to heap sort if execution |
|
* time is becoming quadratic. |
|
*/ |
|
if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
heapSort(a, low, high); |
|
return; |
|
} |
|
/* |
|
* Use an inexpensive approximation of the golden ratio |
|
* to select five sample elements and determine pivots. |
|
*/ |
|
int step = (size >> 3) * 3 + 3; |
|
/* |
|
* Five elements around (and including) the central element |
|
* will be used for pivot selection as described below. The |
|
* unequal choice of spacing these elements was empirically |
|
* determined to work well on a wide variety of inputs. |
|
*/ |
|
int e1 = low + step; |
|
int e5 = end - step; |
|
int e3 = (e1 + e5) >>> 1; |
|
int e2 = (e1 + e3) >>> 1; |
|
int e4 = (e3 + e5) >>> 1; |
|
double a3 = a[e3]; |
|
/* |
|
* Sort these elements in place by the combination |
|
* of 4-element sorting network and insertion sort. |
|
* |
|
* 5 ------o-----------o------------ |
|
* | | |
|
* 4 ------|-----o-----o-----o------ |
|
* | | | |
|
* 2 ------o-----|-----o-----o------ |
|
* | | |
|
* 1 ------------o-----o------------ |
|
*/ |
|
if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
|
if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
|
if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
|
if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
|
if (a3 < a[e2]) { |
|
if (a3 < a[e1]) { |
|
a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
|
} else { |
|
a[e3] = a[e2]; a[e2] = a3; |
|
} |
|
} else if (a3 > a[e4]) { |
|
if (a3 > a[e5]) { |
|
a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
|
} else { |
|
a[e3] = a[e4]; a[e4] = a3; |
|
} |
|
} |
|
// Pointers |
|
int lower = low; // The index of the last element of the left part |
|
int upper = end; // The index of the first element of the right part |
|
/* |
|
* Partitioning with 2 pivots in case of different elements. |
|
*/ |
|
if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
|
/* |
|
* Use the first and fifth of the five sorted elements as |
|
* the pivots. These values are inexpensive approximation |
|
* of tertiles. Note, that pivot1 < pivot2. |
|
*/ |
|
double pivot1 = a[e1]; |
|
double pivot2 = a[e5]; |
|
/* |
|
* The first and the last elements to be sorted are moved |
|
* to the locations formerly occupied by the pivots. When |
|
* partitioning is completed, the pivots are swapped back |
|
* into their final positions, and excluded from the next |
|
* subsequent sorting. |
|
*/ |
|
a[e1] = a[lower]; |
|
a[e5] = a[upper]; |
|
/* |
|
* Skip elements, which are less or greater than the pivots. |
|
*/ |
|
while (a[++lower] < pivot1); |
|
while (a[--upper] > pivot2); |
|
/* |
|
* Backward 3-interval partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------------+ |
|
* | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
|
* +------------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot1 |
|
* pivot1 <= all in (k, upper) <= pivot2 |
|
* all in [upper, end) > pivot2 |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int unused = --lower, k = ++upper; --k > lower; ) { |
|
double ak = a[k]; |
|
if (ak < pivot1) { // Move a[k] to the left side |
|
while (lower < k) { |
|
if (a[++lower] >= pivot1) { |
|
if (a[lower] > pivot2) { |
|
a[k] = a[--upper]; |
|
a[upper] = a[lower]; |
|
} else { |
|
a[k] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
break; |
|
} |
|
} |
|
} else if (ak > pivot2) { // Move a[k] to the right side |
|
a[k] = a[--upper]; |
|
a[upper] = ak; |
|
} |
|
} |
|
/* |
|
* Swap the pivots into their final positions. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot1; |
|
a[end] = a[upper]; a[upper] = pivot2; |
|
/* |
|
* Sort non-left parts recursively (possibly in parallel), |
|
* excluding known pivots. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, lower + 1, upper); |
|
sorter.forkSorter(bits | 1, upper + 1, high); |
|
} else { |
|
sort(sorter, a, bits | 1, lower + 1, upper); |
|
sort(sorter, a, bits | 1, upper + 1, high); |
|
} |
|
} else { // Use single pivot in case of many equal elements |
|
/* |
|
* Use the third of the five sorted elements as the pivot. |
|
* This value is inexpensive approximation of the median. |
|
*/ |
|
double pivot = a[e3]; |
|
/* |
|
* The first element to be sorted is moved to the |
|
* location formerly occupied by the pivot. After |
|
* completion of partitioning the pivot is swapped |
|
* back into its final position, and excluded from |
|
* the next subsequent sorting. |
|
*/ |
|
a[e3] = a[lower]; |
|
/* |
|
* Traditional 3-way (Dutch National Flag) partitioning |
|
* |
|
* left part central part right part |
|
* +------------------------------------------------------+ |
|
* | < pivot | ? | == pivot | > pivot | |
|
* +------------------------------------------------------+ |
|
* ^ ^ ^ |
|
* | | | |
|
* lower k upper |
|
* |
|
* Invariants: |
|
* |
|
* all in (low, lower] < pivot |
|
* all in (k, upper) == pivot |
|
* all in [upper, end] > pivot |
|
* |
|
* Pointer k is the last index of ?-part |
|
*/ |
|
for (int k = ++upper; --k > lower; ) { |
|
double ak = a[k]; |
|
if (ak != pivot) { |
|
a[k] = pivot; |
|
if (ak < pivot) { // Move a[k] to the left side |
|
while (a[++lower] < pivot); |
|
if (a[lower] > pivot) { |
|
a[--upper] = a[lower]; |
|
} |
|
a[lower] = ak; |
|
} else { // ak > pivot - Move a[k] to the right side |
|
a[--upper] = ak; |
|
} |
|
} |
|
} |
|
/* |
|
* Swap the pivot into its final position. |
|
*/ |
|
a[low] = a[lower]; a[lower] = pivot; |
|
/* |
|
* Sort the right part (possibly in parallel), excluding |
|
* known pivot. All elements from the central part are |
|
* equal and therefore already sorted. |
|
*/ |
|
if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
sorter.forkSorter(bits | 1, upper, high); |
|
} else { |
|
sort(sorter, a, bits | 1, upper, high); |
|
} |
|
} |
|
high = lower; // Iterate along the left part |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using mixed insertion sort. |
|
* |
|
* Mixed insertion sort is combination of simple insertion sort, |
|
* pin insertion sort and pair insertion sort. |
|
* |
|
* In the context of Dual-Pivot Quicksort, the pivot element |
|
* from the left part plays the role of sentinel, because it |
|
* is less than any elements from the given part. Therefore, |
|
* expensive check of the left range can be skipped on each |
|
* iteration unless it is the leftmost call. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param end the index of the last element for simple insertion sort |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void mixedInsertionSort(double[] a, int low, int end, int high) { |
|
if (end == high) { |
|
/* |
|
* Invoke simple insertion sort on tiny array. |
|
*/ |
|
for (int i; ++low < end; ) { |
|
double ai = a[i = low]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} else { |
|
/* |
|
* Start with pin insertion sort on small part. |
|
* |
|
* Pin insertion sort is extended simple insertion sort. |
|
* The main idea of this sort is to put elements larger |
|
* than an element called pin to the end of array (the |
|
* proper area for such elements). It avoids expensive |
|
* movements of these elements through the whole array. |
|
*/ |
|
double pin = a[end]; |
|
for (int i, p = high; ++low < end; ) { |
|
double ai = a[i = low]; |
|
if (ai < a[i - 1]) { // Small element |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
a[i] = a[--i]; |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} else if (p > i && ai > pin) { // Large element |
|
/* |
|
* Find element smaller than pin. |
|
*/ |
|
while (a[--p] > pin); |
|
/* |
|
* Swap it with large element. |
|
*/ |
|
if (p > i) { |
|
ai = a[p]; |
|
a[p] = a[i]; |
|
} |
|
/* |
|
* Insert small element into sorted part. |
|
*/ |
|
while (ai < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
/* |
|
* Continue with pair insertion sort on remain part. |
|
*/ |
|
for (int i; low < high; ++low) { |
|
double a1 = a[i = low], a2 = a[++low]; |
|
/* |
|
* Insert two elements per iteration: at first, insert the |
|
* larger element and then insert the smaller element, but |
|
* from the position where the larger element was inserted. |
|
*/ |
|
if (a1 > a2) { |
|
while (a1 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a1; |
|
while (a2 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a2; |
|
} else if (a1 < a[i - 1]) { |
|
while (a2 < a[--i]) { |
|
a[i + 2] = a[i]; |
|
} |
|
a[++i + 1] = a2; |
|
while (a1 < a[--i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = a1; |
|
} |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using insertion sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void insertionSort(double[] a, int low, int high) { |
|
for (int i, k = low; ++k < high; ) { |
|
double ai = a[i = k]; |
|
if (ai < a[i - 1]) { |
|
while (--i >= low && ai < a[i]) { |
|
a[i + 1] = a[i]; |
|
} |
|
a[i + 1] = ai; |
|
} |
|
} |
|
} |
|
/** |
|
* Sorts the specified range of the array using heap sort. |
|
* |
|
* @param a the array to be sorted |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void heapSort(double[] a, int low, int high) { |
|
for (int k = (low + high) >>> 1; k > low; ) { |
|
pushDown(a, --k, a[k], low, high); |
|
} |
|
while (--high > low) { |
|
double max = a[low]; |
|
pushDown(a, low, a[high], low, high); |
|
a[high] = max; |
|
} |
|
} |
|
/** |
|
* Pushes specified element down during heap sort. |
|
* |
|
* @param a the given array |
|
* @param p the start index |
|
* @param value the given element |
|
* @param low the index of the first element, inclusive, to be sorted |
|
* @param high the index of the last element, exclusive, to be sorted |
|
*/ |
|
private static void pushDown(double[] a, int p, double value, int low, int high) { |
|
for (int k ;; a[p] = a[p = k]) { |
|
k = (p << 1) - low + 2; // Index of the right child |
|
if (k > high) { |
|
break; |
|
} |
|
if (k == high || a[k] < a[k - 1]) { |
|
--k; |
|
} |
|
if (a[k] <= value) { |
|
break; |
|
} |
|
} |
|
a[p] = value; |
|
} |
|
/** |
|
* Tries to sort the specified range of the array. |
|
* |
|
* @param sorter parallel context |
|
* @param a the array to be sorted |
|
* @param low the index of the first element to be sorted |
|
* @param size the array size |
|
* @return true if finally sorted, false otherwise |
|
*/ |
|
private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) { |
|
/* |
|
* The run array is constructed only if initial runs are |
|
* long enough to continue, run[i] then holds start index |
|
* of the i-th sequence of elements in non-descending order. |
|
*/ |
|
int[] run = null; |
|
int high = low + size; |
|
int count = 1, last = low; |
|
/* |
|
* Identify all possible runs. |
|
*/ |
|
for (int k = low + 1; k < high; ) { |
|
/* |
|
* Find the end index of the current run. |
|
*/ |
|
if (a[k - 1] < a[k]) { |
|
// Identify ascending sequence |
|
while (++k < high && a[k - 1] <= a[k]); |
|
} else if (a[k - 1] > a[k]) { |
|
// Identify descending sequence |
|
while (++k < high && a[k - 1] >= a[k]); |
|
// Reverse into ascending order |
|
for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
double ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
} |
|
} else { // Identify constant sequence |
|
for (double ak = a[k]; ++k < high && ak == a[k]; ); |
|
if (k < high) { |
|
continue; |
|
} |
|
} |
|
/* |
|
* Check special cases. |
|
*/ |
|
if (run == null) { |
|
if (k == high) { |
|
/* |
|
* The array is monotonous sequence, |
|
* and therefore already sorted. |
|
*/ |
|
return true; |
|
} |
|
if (k - low < MIN_FIRST_RUN_SIZE) { |
|
/* |
|
* The first run is too small |
|
* to proceed with scanning. |
|
*/ |
|
return false; |
|
} |
|
run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
run[0] = low; |
|
} else if (a[last - 1] > a[last]) { |
|
if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
/* |
|
* The first runs are not long |
|
* enough to continue scanning. |
|
*/ |
|
return false; |
|
} |
|
if (++count == MAX_RUN_CAPACITY) { |
|
/* |
|
* Array is not highly structured. |
|
*/ |
|
return false; |
|
} |
|
if (count == run.length) { |
|
/* |
|
* Increase capacity of index array. |
|
*/ |
|
run = Arrays.copyOf(run, count << 1); |
|
} |
|
} |
|
run[count] = (last = k); |
|
} |
|
/* |
|
* Merge runs of highly structured array. |
|
*/ |
|
if (count > 1) { |
|
double[] b; int offset = low; |
|
if (sorter == null || (b = (double[]) sorter.b) == null) { |
|
b = new double[size]; |
|
} else { |
|
offset = sorter.offset; |
|
} |
|
mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
} |
|
return true; |
|
} |
|
/** |
|
* Merges the specified runs. |
|
* |
|
* @param a the source array |
|
* @param b the temporary buffer used in merging |
|
* @param offset the start index in the source, inclusive |
|
* @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
* @param parallel indicates whether merging is performed in parallel |
|
* @param run the start indexes of the runs, inclusive |
|
* @param lo the start index of the first run, inclusive |
|
* @param hi the start index of the last run, inclusive |
|
* @return the destination where runs are merged |
|
*/ |
|
private static double[] mergeRuns(double[] a, double[] b, int offset, |
|
int aim, boolean parallel, int[] run, int lo, int hi) { |
|
if (hi - lo == 1) { |
|
if (aim >= 0) { |
|
return a; |
|
} |
|
for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
b[--j] = a[--i] |
|
); |
|
return b; |
|
} |
|
/* |
|
* Split into approximately equal parts. |
|
*/ |
|
int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
while (run[++mi + 1] <= rmi); |
|
/* |
|
* Merge the left and right parts. |
|
*/ |
|
double[] a1, a2; |
|
if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
a2 = (double[]) merger.getDestination(); |
|
} else { |
|
a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
} |
|
double[] dst = a1 == a ? b : a; |
|
int k = a1 == a ? run[lo] - offset : run[lo]; |
|
int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
if (parallel) { |
|
new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
} else { |
|
mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
} |
|
return dst; |
|
} |
|
/** |
|
* Merges the sorted parts. |
|
* |
|
* @param merger parallel context |
|
* @param dst the destination where parts are merged |
|
* @param k the start index of the destination, inclusive |
|
* @param a1 the first part |
|
* @param lo1 the start index of the first part, inclusive |
|
* @param hi1 the end index of the first part, exclusive |
|
* @param a2 the second part |
|
* @param lo2 the start index of the second part, inclusive |
|
* @param hi2 the end index of the second part, exclusive |
|
*/ |
|
private static void mergeParts(Merger merger, double[] dst, int k, |
|
double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) { |
|
if (merger != null && a1 == a2) { |
|
while (true) { |
|
/* |
|
* The first part must be larger. |
|
*/ |
|
if (hi1 - lo1 < hi2 - lo2) { |
|
int lo = lo1; lo1 = lo2; lo2 = lo; |
|
int hi = hi1; hi1 = hi2; hi2 = hi; |
|
} |
|
/* |
|
* Small parts will be merged sequentially. |
|
*/ |
|
if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
break; |
|
} |
|
/* |
|
* Find the median of the larger part. |
|
*/ |
|
int mi1 = (lo1 + hi1) >>> 1; |
|
double key = a1[mi1]; |
|
int mi2 = hi2; |
|
/* |
|
* Partition the smaller part. |
|
*/ |
|
for (int loo = lo2; loo < mi2; ) { |
|
int t = (loo + mi2) >>> 1; |
|
if (key > a2[t]) { |
|
loo = t + 1; |
|
} else { |
|
mi2 = t; |
|
} |
|
} |
|
int d = mi2 - lo2 + mi1 - lo1; |
|
/* |
|
* Merge the right sub-parts in parallel. |
|
*/ |
|
merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
/* |
|
* Process the sub-left parts. |
|
*/ |
|
hi1 = mi1; |
|
hi2 = mi2; |
|
} |
|
} |
|
/* |
|
* Merge small parts sequentially. |
|
*/ |
|
while (lo1 < hi1 && lo2 < hi2) { |
|
dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
} |
|
if (dst != a1 || k < lo1) { |
|
while (lo1 < hi1) { |
|
dst[k++] = a1[lo1++]; |
|
} |
|
} |
|
if (dst != a2 || k < lo2) { |
|
while (lo2 < hi2) { |
|
dst[k++] = a2[lo2++]; |
|
} |
|
} |
|
} |
|
// [class] |
|
/** |
|
* This class implements parallel sorting. |
|
*/ |
|
private static final class Sorter extends CountedCompleter<Void> { |
|
private static final long serialVersionUID = 20180818L; |
|
private final Object a, b; |
|
private final int low, size, offset, depth; |
|
private Sorter(CountedCompleter<?> parent, |
|
Object a, Object b, int low, int size, int offset, int depth) { |
|
super(parent); |
|
this.a = a; |
|
this.b = b; |
|
this.low = low; |
|
this.size = size; |
|
this.offset = offset; |
|
this.depth = depth; |
|
} |
|
@Override |
|
public final void compute() { |
|
if (depth < 0) { |
|
setPendingCount(2); |
|
int half = size >> 1; |
|
new Sorter(this, b, a, low, half, offset, depth + 1).fork(); |
|
new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute(); |
|
} else { |
|
if (a instanceof int[]) { |
|
sort(this, (int[]) a, depth, low, low + size); |
|
} else if (a instanceof long[]) { |
|
sort(this, (long[]) a, depth, low, low + size); |
|
} else if (a instanceof float[]) { |
|
sort(this, (float[]) a, depth, low, low + size); |
|
} else if (a instanceof double[]) { |
|
sort(this, (double[]) a, depth, low, low + size); |
|
} else { |
|
throw new IllegalArgumentException( |
|
"Unknown type of array: " + a.getClass().getName()); |
|
} |
|
} |
|
tryComplete(); |
|
} |
|
@Override |
|
public final void onCompletion(CountedCompleter<?> caller) { |
|
if (depth < 0) { |
|
int mi = low + (size >> 1); |
|
boolean src = (depth & 1) == 0; |
|
new Merger(null, |
|
a, |
|
src ? low : low - offset, |
|
b, |
|
src ? low - offset : low, |
|
src ? mi - offset : mi, |
|
b, |
|
src ? mi - offset : mi, |
|
src ? low + size - offset : low + size |
|
).invoke(); |
|
} |
|
} |
|
private void forkSorter(int depth, int low, int high) { |
|
addToPendingCount(1); |
|
Object a = this.a; // Use local variable for performance |
|
new Sorter(this, a, b, low, high - low, offset, depth).fork(); |
|
} |
|
} |
|
/** |
|
* This class implements parallel merging. |
|
*/ |
|
private static final class Merger extends CountedCompleter<Void> { |
|
private static final long serialVersionUID = 20180818L; |
|
private final Object dst, a1, a2; |
|
private final int k, lo1, hi1, lo2, hi2; |
|
private Merger(CountedCompleter<?> parent, Object dst, int k, |
|
Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { |
|
super(parent); |
|
this.dst = dst; |
|
this.k = k; |
|
this.a1 = a1; |
|
this.lo1 = lo1; |
|
this.hi1 = hi1; |
|
this.a2 = a2; |
|
this.lo2 = lo2; |
|
this.hi2 = hi2; |
|
} |
|
@Override |
|
public final void compute() { |
|
if (dst instanceof int[]) { |
|
mergeParts(this, (int[]) dst, k, |
|
(int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2); |
|
} else if (dst instanceof long[]) { |
|
mergeParts(this, (long[]) dst, k, |
|
(long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2); |
|
} else if (dst instanceof float[]) { |
|
mergeParts(this, (float[]) dst, k, |
|
(float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2); |
|
} else if (dst instanceof double[]) { |
|
mergeParts(this, (double[]) dst, k, |
|
(double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2); |
|
} else { |
|
throw new IllegalArgumentException( |
|
"Unknown type of array: " + dst.getClass().getName()); |
|
} |
|
propagateCompletion(); |
|
} |
|
private void forkMerger(Object dst, int k, |
|
Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { |
|
addToPendingCount(1); |
|
new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork(); |
|
} |
|
} |
|
/** |
|
* This class implements parallel merging of runs. |
|
*/ |
|
private static final class RunMerger extends RecursiveTask<Object> { |
|
private static final long serialVersionUID = 20180818L; |
|
private final Object a, b; |
|
private final int[] run; |
|
private final int offset, aim, lo, hi; |
|
private RunMerger(Object a, Object b, int offset, |
|
int aim, int[] run, int lo, int hi) { |
|
this.a = a; |
|
this.b = b; |
|
this.offset = offset; |
|
this.aim = aim; |
|
this.run = run; |
|
this.lo = lo; |
|
this.hi = hi; |
|
} |
|
@Override |
|
protected final Object compute() { |
|
if (a instanceof int[]) { |
|
return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi); |
|
} |
|
if (a instanceof long[]) { |
|
return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi); |
|
} |
|
if (a instanceof float[]) { |
|
return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi); |
|
} |
|
if (a instanceof double[]) { |
|
return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi); |
|
} |
|
throw new IllegalArgumentException( |
|
"Unknown type of array: " + a.getClass().getName()); |
|
} |
|
private RunMerger forkMe() { |
|
fork(); |
|
return this; |
|
} |
|
private Object getDestination() { |
|
join(); |
|
return getRawResult(); |
|
} |
|
} |
|
} |