字符串排序算法:
适用性:适用于小整数键的排序算法
稳定性:稳定的排序算法,排序后键相同的元素的相对位置没有变化
思路:计算数据每个键的起始位置
算法实现:
/** * 键索引计数法 * */ public class KeyIndexCount { private static final int R = 100; // 数字范围 [0, R) public static void sort(int[] data) { int n = data.length; // 统计频率 int[] count = new int[R + 1]; for (int i = 0; i < n; i++) { count[data[i] + 1]++; } // 将频率转换成索引 for (int r = 0; r < R; r++) { count[r + 1] += count[r]; } // 数据分类 int[] aux = new int[n]; for (int i = 0; i < n; i++) { aux[count[data[i]]] = data[i]; count[data[i]]++; } // 回写 System.arraycopy(aux, 0, data, 0, n); } }
测试:
class KeyIndexCountTest { @Test void sort() { Random random = new Random(); int[] data = new int[100]; for (int i = 0; i < 100; i++) { data[i] = random.nextInt(100); } KeyIndexCount.sort(data); Assertions.assertTrue(SortUtils.isSorted(data)); } }
适用性:适用于定长的字符串排序
思路:从右往左对每个字符应用键索引计数法
算法实现:
/** * 低位优先的字符串排序算法 * */ public class LSD { private static final int R = 256; // 字符集大小 /** * @param w 按前 w 位排序 * */ public static void sort(String[] data, int w) { int n = data.length; String[] aux = new String[n]; // 从右往左对每个字符应用键索引计数法 for (int d = w - 1; d >= 0; d--) { // 统计频率 int[] count = new int[R + 1]; for (int i = 0; i < n; i++) { count[data[i].charAt(d) + 1]++; } // 将频率转换为索引 for (int r = 0; r < R; r++) { count[r + 1] += count[r]; } // 数据分类 for (int i = 0; i < n; i++) { aux[count[data[i].charAt(d)]] = data[i]; count[data[i].charAt(d)]++; } // 回写 System.arraycopy(aux, 0, data, 0, n); } } }
测试:
class LSDTest { @Test void sort() { String[] data = new String[]{"4PGC938", "2IYE230", "3CIO720", "1ICK750", "1OHV845", "4JZY524", "1ICK750", "3CIO720", "1OHV845", "1OHV845", "2RLA629", "2RLA629", "3ATW723"}; LSD.sort(data, 7); Assertions.assertEquals( "[1ICK750, 1ICK750, 1OHV845, 1OHV845, 1OHV845, 2IYE230, 2RLA629, 2RLA629, 3ATW723, 3CIO720, 3CIO720, 4JZY524, 4PGC938]", Arrays.toString(data) ); } }
适用性:适用于变长字符串
思路:将字符串从左到右按字符排序
算法实现:
/** * 高位优先字符串排序算法 * */ public class MSD { private static final int R = 256; // 字符集大小 private static String[] aux; public static void sort(String[] data) { aux = new String[data.length]; sort(data, 0, data.length - 1, 0); } /** * @param lo 子数组开始位置 * @param hi 子数组结束位置 * @param d 处理索引位置为 d 的字符 * */ private static void sort(String[] data, int lo, int hi, int d) { if (lo >= hi) { return; } int n = data.length; // 统计频率 int[] count = new int[R + 2]; for (int i = lo; i <= hi; i++) { count[charAt(data[i], d) + 2]++; } // 将频率转换为索引 for (int r = 0; r < R + 1; r++) { count[r + 1] += count[r]; } // 数据分类 for (int i = lo; i <= hi; i++) { aux[count[charAt(data[i], d) + 1]] = data[i]; count[charAt(data[i], d) + 1]++; } // 回写 for (int i = lo; i <= hi; i++) { data[i] = aux[i - lo]; } // 递归处理子数组 for (int r = 0; r < R; r++) { sort(data, lo + count[r], lo + count[r + 1] - 1, d + 1); } } /** * @return 字符串中索引位置为 d 的字符,超过字符串长度返回 -1 * */ private static int charAt(String s, int d) { if (d < s.length()) { return s.charAt(d); } return -1; } }
测试:
class MSDTest { @Test void sort() { String[] data = new String[]{"she", "sells", "seashells", "by", "the", "seashore", "the", "shells", "she", "sells", "are", "surely", "seashells"}; MSD.sort(data); Assertions.assertEquals( "[are, by, seashells, seashells, seashore, sells, sells, she, she, shells, surely, the, the]", Arrays.toString(data) ); } }
算法实现:
/** * 三向字符串快速排序 * */ public class Quick3String { public static void sort(String[] data) { sort(data, 0, data.length - 1, 0); } private static void sort(String[] data, int lo, int hi, int d) { if (lo >= hi) { return; } int lt = lo; int gt = hi; int v = chatAt(data[lo], d); int i = lo + 1; while (i <= gt) { int t = chatAt(data[i], d); if (t < v) { exch(data, lt++, i++); } else if (t > v) { exch(data, gt--, i); } else { i++; } } sort(data, lo, lt - 1, d); if (v > 0) sort(data, lt, gt, d + 1); sort(data, gt + 1, hi, d); } private static int chatAt(String s, int d) { if (d < s.length()) { return s.charAt(d); } return -1; } private static void exch(String[] data, int i, int j) { String t = data[i]; data[i] = data[j]; data[j] = t; } }
测试:
class Quick3StringTest { @Test void sort() { String[] data = new String[]{"edu.princeton.cs", "com.apple", "edu.princeton.cs", "com.cnn", "com.google", "edu.uva.cs", "edu.princeton.cs", "edu.princeton.cs.www", "edu.uva.cs", "edu.uva.cs", "edu.uva.cs", "com.adobe", "edu.princeton.ee"}; Quick3String.sort(data); Assertions.assertEquals( "[com.adobe, com.apple, com.cnn, com.google, edu.princeton.cs, edu.princeton.cs, edu.princeton.cs, edu.princeton.cs.www, edu.princeton.ee, edu.uva.cs, edu.uva.cs, edu.uva.cs, edu.uva.cs]", Arrays.toString(data) ); } }