/** * The maximum number of runs in merge sort. */ private static final int MAX_RUN_COUNT = 67; /** * The maximum length of run in merge sort. */ private static final int MAX_RUN_LENGTH = 33; /** * If the length of an array to be sorted is less than this * constant, Quicksort is used in preference to merge sort. */ private static final int QUICKSORT_THRESHOLD = 286; /** * If the length of an array to be sorted is less than this * constant, insertion sort is used in preference to Quicksort. */ private static final int INSERTION_SORT_THRESHOLD = 47; /** * If the length of a byte array to be sorted is greater than this * constant, counting sort is used in preference to insertion sort. */ private static final int COUNTING_SORT_THRESHOLD_FOR_BYTE = 29; /** * If the length of a short or char array to be sorted is greater * than this constant, counting sort is used in preference to Quicksort. */ private static final int COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR = 3200;
就是“分治”思想,先将序列元素拆解,然后归并,即合并相邻有序子序列。
Timsort的思想是,“分”的时候,直接从左往右,划分成各种不同长度的、有序的子序列,然后对这些子序列进行归并,这样一来,复杂度就大大降低了。
序列分为两部分,一部分有序,一部分无序,不断从无序的部分选元素出来,插入到有序的部分。(一开始是认为第一个元素是有序的部分,其他元素都是无序的部分)
第一步:在无序部分拿两个元素a1,a2,并调整使a1>a2;
第二步:a1往左比较,找到合适位置后插入;
第三步:a2只需在a1的左边进行比较(a1>a2),找到合适的位置插入即可。
第一步:选其中一个元素出来作为轴。
第二步:两边同时开始遍历,比轴大的元素放在左边,比轴小的元素放在右边。(详见下动图)
第三步:对上面被轴分开的两个序列,进行递归处理,重复执行一二步。最终得到一个有序序列
第一步:k向右遍历,great向左遍历,把遍历的元素放进合适的区间。
第二步:对三个区间进行递归处理,即得到有序序列。
①:先创建一个length为元素种数的数组count,里面的元素全部为0。
②:遍历要排序的序列,根据序列元素大小a找到数组count的位置,对count[a]+=1;
(举个例子:若刚好遍历到的元素是55,则找到count[55]+=1)
③:从左到右遍历count[],元素不是0的位置都拿出来,根据count[a]拿多少个。
④:最终得到有效序列。
/* * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved. * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. * * * * * * * * * * * * * * * * * * * * */ package java.utill; /** * This class implements the Dual-Pivot Quicksort algorithm by * Vladimir Yaroslavskiy, Jon Bentley, and Josh Bloch. The algorithm * offers O(n log(n)) performance on many data sets that cause other * quicksorts to degrade to quadratic performance, and is typically * faster than traditional (one-pivot) Quicksort implementations. * 此处介绍一些双轴快排的优势,对很多两次循环后引起低效率的数据排列方式, * 都保持O(nlogn)复杂度 * * All exposed methods are package-private, designed to be invoked * from public methods (in class Arrays) after performing any * necessary array bounds checks and expanding parameters into the * required forms. * 关于重用这个私有类的介绍 * * @author Vladimir Yaroslavskiy * @author Jon Bentley * @author Josh Bloch * * @version 2011.02.11 m765.827.12i:5\7pm * @since 1.7 */ final class DualPivotQuicksort { /** * Prevents instantiation.防止这个类被实例化 */ private DualPivotQuicksort() { } /* * Tuning parameters.一些设置好的阈值数据,这些数据经过实验证明是最优的 */ /** * The maximum number of runs in merge sort. * 待合并的序列的最大数量 */ private static final int MAX_RUN_COUNT = 67; /** * If the length of an array to be sorted is less than this * constant, Quicksort is used in preference to merge sort. * 如果参与排序的数组长度小于这个值,优先使用快速排序而不是归并排序 */ private static final int QUICKSORT_THRESHOLD = 286; /** * If the length of an array to be sorted is less than this * constant, insertion sort is used in preference to Quicksort. * 如果参与排序的数组长度小于这个值,考虑插入排序,而不是快速排序 */ private static final int INSERTION_SORT_THRESHOLD = 47; /** * If the length of a byte array to be sorted is greater than this * constant, counting sort is used in preference to insertion sort. * 这个是byte数组的 */ private static final int COUNTING_SORT_THRESHOLD_FOR_BYTE = 29; /** * If the length of a short or char array to be sorted is greater * than this constant, counting sort is used in preference to Quicksort. * 这个是short或char数组的启用计数排序的阈值 */ private static final int COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR = 3200; /* * Sorting methods for seven primitive types. * 针对7种基本类型的排序方法,这篇博客仅讨论int类型和short类型 */ /** * Sorts the specified range of the array using the given * workspace array slice if possible for merging * * @param a the array to be sorted * @param left the index of the first element, inclusive, to be sorted * @param right the index of the last element, inclusive, to be sorted * @param work a workspace array (slice) * @param workBase origin of usable space in work array * @param workLen usable size of work array */ static void sort(int[] a, int left, int right, int[] work, int workBase, int workLen) { // Use Quicksort on small arrays //小序列直接使用快排。 if (right - left < QUICKSORT_THRESHOLD) { sort(a, left, right, true); return; } /* * Index run[i] is the start of i-th run * (ascending or descending sequence). * run[i] 意味着第i个有序数列开始的位置,(升序或者降序) */ int[] run = new int[MAX_RUN_COUNT + 1]; int count = 0; run[0] = left; // Check if the array is nearly sorted // 检查数组是不是已经接近有序状态。这个循环用于分割有序序列 for (int k = left; k < right; run[count] = k) { // Equal items in the beginning of the sequence //跳过序列开头一些相等的元素 while (k < right && a[k] == a[k + 1]) k++; if (k == right) break; // Sequence finishes with equal items直接结束,run[count]~k全部元素相等 if (a[k] < a[k + 1]) { // ascending升序 while (++k <= right && a[k - 1] <= a[k]) ;//run[account]~k的都是升序 } else if (a[k] > a[k + 1]) { // descending降序 while (++k <= right && a[k - 1] >= a[k]) ; // Transform into an ascending sequence转为升序 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { int t = a[lo]; a[lo] = a[hi]; a[hi] = t; } //处理到这里,run[account]~k的都是升序(包含相等) } // Merge a transformed descending sequence followed by an // ascending sequence //合并两个升序列,其中前一个是由降序转化而来,详情见图一 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { count--; } /* * The array is not highly structured, * use Quicksort instead of merge sort. * 若待归并序列超过阈值,表明该序列不是高度接近有序,那么改为使用快速排序 */ if (++count == MAX_RUN_COUNT) { sort(a, left, right, true); return; } } // These invariants should hold true: // run[0] = 0 // run[<last>] = right + 1; (terminator) //这三个判断用来处理最后一个子序列的终止问题 if (count == 0) { // A single equal run //这个序列所有元素相等,则直接返回。显然是由上面的break打破循环产生。 return; } else if (count == 1 && run[count] > right) { // Either a single ascending or a transformed descending run. // Always check that a final run is a proper terminator, otherwise // we have an unterminated trailing run, to handle downstream. //这里表示序列已经是升序列,直接返回 return; } /* * 这个地方的触发有两种情况: * 1、run[count]==right(未加1前) * 2、由上面的break触发,也即run[count]后面是一串有相等元素的数列 * 这样就必须为不在(left~right)范围内的下一个子序列设定起始, * 这样才能终止范围内的最后一个子序列 * 详见图二 * 同时,对照上面的elseif,也是一个有关终止条件的行为 */ right++; if (run[count] < right) { // Corner case: the final run is not a terminator. This may happen // if a final run is an equals run, or there is a single-element run // at the end. Fix up by adding a proper terminator at the end. // Note that we terminate with (right + 1), incremented earlier. run[++count] = right; } // Determine alternation base for merge //决定产生变化的基数组,即在work[]上变化还是在a[]上变化(参见下面第一个if-else) //这个优化确实看不太懂 byte odd = 0; //由下面语句知n和odd的关系为: //odd 0 1 0 1 …… //n 1 2 4 8 …… for (int n = 1; (n <<= 1) < count; odd ^= 1) ; // Use or create temporary array b for merging //为归并过程创建一个临时数组b int[] b; // temp array; alternates with a int ao, bo; // array offsets from 'left' int blen = right - left; // space needed for b if (work == null || workLen < blen || workBase + blen > work.length) { work = new int[blen]; workBase = 0; } //决定a和b的指向,指向原a[]还是work[] if (odd == 0) { System.arraycopy(a, left, work, workBase, blen); b = a; bo = 0; a = work; ao = workBase - left; } else { b = work; ao = 0; bo = workBase - left; } // Merging // 归并 // 最外层循环,直到count为1,也就是栈中待合并的序列只有一个的时候,标志归并成功 // a 做原始数组,b 做临时数组 for (int last; count > 1; count = last) { // 遍历数组,合并相邻的两个升序序列 for (int k = (last = 0) + 2; k <= count; k += 2) { // 合并run[k-2] 与 run[k-1]两个序列 int hi = run[k], mi = run[k - 1]; for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { b[i + bo] = a[p++ + ao]; } else { b[i + bo] = a[q++ + ao]; } } // 这里把合并之后的数列往前移动 run[++last] = hi; } // 如果栈的长度为奇数,那么把最后落单的有序数列copy过对面 if ((count & 1) != 0) { for (int i = right, lo = run[count - 1]; --i >= lo; b[i + bo] = a[i + ao] ) ; run[++last] = right; } //这里为什么要交换a、b呢?详见图三 int[] t = a; a = b; b = t; int o = ao; ao = bo; bo = o; } } /** * Sorts the specified range of the array by Dual-Pivot Quicksort. * * @param a the array to be sorted * @param left the index of the first element, inclusive, to be sorted * @param right the index of the last element, inclusive, to be sorted * @param leftmost indicates if this part is the leftmost in the range */ private static void sort(int[] a, int left, int right, boolean leftmost) { int length = right - left + 1; // Use insertion sort on tiny arrays //小数组使用插入排序 if (length < INSERTION_SORT_THRESHOLD) { if (leftmost) {//代表要比较的序列位于数组最左边 /* * Traditional (without sentinel) insertion sort, * optimized for server VM, is used in case of * the leftmost part. * 经典的插入排序算法,不带哨兵。做了优化,在leftmost情况下使用 */ for (int i = left, j = i; i < right; j = ++i) { int ai = a[i + 1]; while (ai < a[j]) { a[j + 1] = a[j]; if (j-- == left) { break; } } a[j + 1] = ai; } } else { /* * Skip the longest ascending sequence. * 首先跨过开头的升序的部分 */ do { if (left >= right) { return; } } while (a[++left] >= a[left - 1]); /* * Every element from adjoining part plays the role * of sentinel, therefore this allows us to avoid the * left range check on each iteration. Moreover, we use * the more optimized algorithm, so called pair insertion * sort, which is faster (in the context of Quicksort) * than traditional implementation of insertion sort. * 成对插入排序,常见图四 * 具体执行过程:上面的do-while循环已经排好的最前面的数据 * (1)将要插入的数据,第一个值赋值a1,第二个值赋值a2, * (2)然后判断a1与a2的大小,使a1>a2(关键点) * (3)接下来,首先是插入大的数值a1,将a1与k之前的数字一一比较, * 直到数值小于a1为止,把a1插入到合适的位置, * 注意:这里的相隔距离为2,目的是为了给a2留下插入的空隙 * (4)接下来,插入小的数值a2,将a2与此时k之前的数字一一比较, * (这个k已经变化到a1左边,即此时,只需在a1左边插入a2即可, * 因此减少了a2的比较次数,优化就发生在这里) * 直到数值小于a2为止,将a2插入到合适的位置, * 注意:这里的相隔距离为1 * (5)最后把最后一个没有遍历到的数据插入到合适位置 * * 还有一个问题:为什么一定不能是leftmost呢? * 如果是leftmost,可能会发生左越界 * */ for (int k = left; ++left <= right; k = ++left) { int a1 = a[k], a2 = a[left]; if (a1 < a2) { a2 = a1; a1 = a[left]; } while (a1 < a[--k]) { a[k + 2] = a[k]; } a[++k + 1] = a1; while (a2 < a[--k]) { a[k + 1] = a[k]; } a[k + 1] = a2; } int last = a[right]; while (last < a[--right]) { a[right + 1] = a[right]; } a[right + 1] = last; } return; } //开始双轴快排的选轴工作 // Inexpensive approximation of length / 7 // length / 7 的一种低复杂度的实现 int seventh = (length >> 3) + (length >> 6) + 1; /* * Sort five evenly spaced elements around (and including) the * center element in the range. These elements will be used for * pivot selection as described below. The choice for spacing * these elements was empirically determined to work well on * a wide variety of inputs. * 取五个靠近中间位置的元素,这五个位置的间隔为length/7, * 对这五个元素进行排序,这些元素最终会被用来做轴(见图五) */ int e3 = (left + right) >>> 1; // The midpoint int e2 = e3 - seventh; int e1 = e2 - seventh; int e4 = e3 + seventh; int e5 = e4 + seventh; // Sort these elements using insertion sort //使用插入排序将这五个位置的元素排序起来 if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e3] < a[e2]) { int t = a[e3]; a[e3] = a[e2]; a[e2] = t; if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } } if (a[e4] < a[e3]) { int t = a[e4]; a[e4] = a[e3]; a[e3] = t; if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } } } if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } } } } // Pointers int less = left; // 中间区域的首个元素的位置 int great = right; // 右边区域的首个元素的位置 //若满足下面这个条件,则以e2和e4进行双轴快排,否则以e3进行单轴快排 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { /* * Use the second and fourth of the five sorted elements as pivots. * These values are inexpensive approximations of the first and * second terciles of the array. Note that pivot1 <= pivot2. * 使用5个元素中的e2,e4两个位置的元素作为轴,他们两个大致处在四分位的位置上。 * 需要注意的是pivot1 <= pivot2 */ int pivot1 = a[e2]; int pivot2 = a[e4]; /* * The first and the last elements to be sorted are moved to the * locations formerly occupied by the pivots. When partitioning * is complete, the pivots are swapped back into their final * positions, and excluded from subsequent sorting. * 这里就是用第一个和最后一个元素覆盖e2和e4上的元素, * 这样e2和e4上的元素就被排除出下面的排序,因为他们已经是枢轴 */ a[e2] = a[left]; a[e4] = a[right]; /* * Skip elements, which are less or greater than pivot values. * 跳过一些队首的小于pivot1的值,跳过队尾的大于pivot2的值 */ while (a[++less] < pivot1) ; while (a[--great] > pivot2) ; /* * Partitioning: * * left part center part right part * +--------------------------------------------------------------+ * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | * +--------------------------------------------------------------+ * ^ ^ ^ * | | | * less k great * * Invariants: * * all in (left, less) < pivot1 * pivot1 <= all in [less, k) <= pivot2 * all in (great, right) > pivot2 * * Pointer k is the first index of ?-part. * 这里的思想是:k不断右移,将元素放进相对应的区域,直到碰上great。 */ outer: for (int k = less - 1; ++k <= great; ) { int ak = a[k]; if (ak < pivot1) { // Move a[k] to left part //挪到left part中去 a[k] = a[less]; /* * Here and below we use "a[i] = b; i++;" instead * of "a[i++] = b;" due to performance issue. * "a[i] = b; i--;"的效率比"a[i--] = b;"的效率高 */ a[less] = ak; ++less; } else if (ak > pivot2) { // Move a[k] to right part //挪到right part中去 while (a[great] > pivot2) {//这里先将一波大于privot2的放在right part if (great-- == k) { // k遇到great,本次分割终止 break outer; } } if (a[great] < pivot1) { // a[great] <= pivot2 a[k] = a[less]; a[less] = a[great]; ++less; } else { // pivot1 <= a[great] <= pivot2 a[k] = a[great]; } /* * Here and below we use "a[i] = b; i--;" instead * of "a[i--] = b;" due to performance issue. */ a[great] = ak; --great; } } // Swap pivots into their final positions /* * 由于排序的部分是a[left+1...right-1],而原范围却是a[left...right], * 而那两个被扣掉的元素就是pivot1和pivot2, * 因此,经过交换,将pivot1和pivot2插入到a[less-1]和a[great+1]中去, * 作为真正的枢轴,并维持left-part、center-part、right-part三个部分 */ a[left] = a[less - 1]; a[less - 1] = pivot1; a[right] = a[great + 1]; a[great + 1] = pivot2; // Sort left and right parts recursively, excluding known pivots //递归快排左右两边的序列 sort(a, left, less - 2, leftmost); sort(a, great + 2, right, false); /* * If center part is too large (comprises > 4/7 of the array), * swap internal pivot values to ends. * 如果中心区域太大,超过数组长度的 4/7。就先进行预处理,再参与递归排序。 * 预处理的方法是把等于pivot1的元素统一放到左边, * 等于pivot2的元素统一放到右边,最终产生一个不包含pivot1和pivot2的数列, * 再拿去参与快排中的递归。 */ if (less < e1 && e5 < great) { /* * Skip elements, which are equal to pivot values. */ while (a[less] == pivot1) { ++less; } while (a[great] == pivot2) { --great; } /* * Partitioning: * * left part center part right part * +----------------------------------------------------------+ * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | * +----------------------------------------------------------+ * ^ ^ ^ * | | | * less k great * * Invariants: * * all in (*, less) == pivot1 * pivot1 < all in [less, k) < pivot2 * all in (great, *) == pivot2 * * Pointer k is the first index of ?-part. */ outer: for (int k = less - 1; ++k <= great; ) { int ak = a[k]; if (ak == pivot1) { // Move a[k] to left part //挪到left part中去 a[k] = a[less]; a[less] = ak; ++less; } else if (ak == pivot2) { // Move a[k] to right part //挪到left part中去 while (a[great] == pivot2) { if (great-- == k) { break outer; } } if (a[great] == pivot1) { // a[great] < pivot2 a[k] = a[less]; /* * Even though a[great] equals to pivot1, the * assignment a[less] = pivot1 may be incorrect, * if a[great] and pivot1 are floating-point zeros * of different signs. Therefore in float and * double sorting methods we have to use more * accurate assignment a[less] = a[great]. */ a[less] = pivot1; ++less; } else { // pivot1 < a[great] < pivot2 a[k] = a[great]; } a[great] = ak; --great; } } } // Sort center part recursively sort(a, less, great, false); } else { // Partitioning with one pivot //用单轴3-way进行分区,因为e1-e5至少存在一对相等的元素, // 因此判定这个数组中重复的元素居多 /* * Use the third of the five sorted elements as pivot. * This value is inexpensive approximation of the median. */ int pivot = a[e3]; /* * Partitioning degenerates to the traditional 3-way * (or "Dutch National Flag") schema: * * left part center part right part * +-------------------------------------------------+ * | < pivot | == pivot | ? | > pivot | * +-------------------------------------------------+ * ^ ^ ^ * | | | * less k great * * Invariants: * * all in (left, less) < pivot * all in [less, k) == pivot * all in (great, right) > pivot * * Pointer k is the first index of ?-part. * 单轴快排和上面的双轴差不多,一个动图介绍图五 */ for (int k = less; k <= great; ++k) { if (a[k] == pivot) { continue; } int ak = a[k]; if (ak < pivot) { // Move a[k] to left part a[k] = a[less]; a[less] = ak; ++less; } else { // a[k] > pivot - Move a[k] to right part while (a[great] > pivot) { --great; } if (a[great] < pivot) { // a[great] <= pivot a[k] = a[less]; a[less] = a[great]; ++less; } else { // a[great] == pivot /* * Even though a[great] equals to pivot, the * assignment a[k] = pivot may be incorrect, * if a[great] and pivot are floating-point * zeros of different signs. Therefore in float * and double sorting methods we have to use * more accurate assignment a[k] = a[great]. */ a[k] = pivot; } a[great] = ak; --great; } } /* * Sort left and right parts recursively. * All elements from center part are equal * and, therefore, already sorted. * 递归快排 */ sort(a, left, less - 1, leftmost); sort(a, great + 1, right, false); } } /** * Sorts the specified range of the array using the given * workspace array slice if possible for merging * * @param a the array to be sorted * @param left the index of the first element, inclusive, to be sorted * @param right the index of the last element, inclusive, to be sorted * @param work a workspace array (slice) * @param workBase origin of usable space in work array * @param workLen usable size of work array * 统计排序适用于元素个数远大于元素种数的情况, * 适用于Short、Byte、Char等元素种数较少的类型。 */ static void sort(short[] a, int left, int right, short[] work, int workBase, int workLen) { // Use counting sort on large arrays if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) { int[] count = new int[NUM_SHORT_VALUES]; //从左往右,存在的数的count加1 for (int i = left - 1; ++i <= right; count[a[i] - Short.MIN_VALUE]++ ); //从右往左,将count不是0的数提取出来,这样的序列就变成有序了 for (int i = NUM_SHORT_VALUES, k = right + 1; k > left; ) { while (count[--i] == 0); short value = (short) (i + Short.MIN_VALUE); int s = count[i]; do { a[--k] = value; } while (--s > 0); } } else { // Use Dual-Pivot Quicksort on small arrays //doSort(a, left, right, work, workBase, workLen); //上面的doSort与之前的快排sort差不多,这里就不赘述了。 } } /** The number of distinct short values. */ private static final int NUM_SHORT_VALUES = 1 << 16; }