节点类
/** * 节点类 */ class Node implements Comparable<Node> { Byte data; int weight; Node left; Node right; public Node(int weight) { this.weight = weight; } public Node(Byte data, int weight) { this.data = data; this.weight = weight; } /** * 前序遍历 */ public void preOrder() { System.out.println(this); if (this.left != null) { this.left.preOrder(); } if (this.right != null) { this.right.preOrder(); } } @Override public int compareTo(Node o) { return this.weight - o.weight; } @Override public String toString() { return "Node{" + "data=" + data + ", weight=" + weight + '}'; } }
首先根据字符串数据创建Node集合
/** * 将字符数组按出现个数转换成一个个Node,并存到List返回 * * @param bytes 被转换的字符数组 * @return 转换后的list */ private static List<Node> getNodes(byte[] bytes) { ArrayList<Node> nodes = new ArrayList<>(); // 遍历统计出现的次数 HashMap<Byte, Integer> map = new HashMap<>(); for (byte b : bytes) { Integer integer = map.get(b); if (integer == null) { map.put(b, 1); } else { map.put(b, integer + 1); } } // 将map里的元素转成一个个Node Set<Map.Entry<Byte, Integer>> entries = map.entrySet(); for (Map.Entry<Byte, Integer> entry : entries) { Node node = new Node(entry.getKey(), entry.getValue()); // 并加入list nodes.add(node); } return nodes; }
根据Node集合创建哈夫曼树
/** * 根据Node集合创建哈夫曼树 */ private static Node createHuffmanTree(List<Node> nodes) { while (nodes.size() > 1) { Collections.sort(nodes); Node left = nodes.get(0); Node right = nodes.get(1); // 通过另一个构造器 Node parent = new Node(left.weight + right.weight); parent.left = left; parent.right = right; nodes.remove(left); nodes.remove(right); nodes.add(parent); } return nodes.get(0); }
根据哈夫曼树构造哈夫曼编码表
static HashMap<Byte, String> huffmanCodes = new HashMap<>(); static StringBuilder stringbuilder = new StringBuilder(); /** * 为了方便调用,重载 */ private static HashMap<Byte, String> createEncoding(Node root) { if (root == null) { return null; } // 为了减少root的一次 createEncoding(root.left, "0", stringbuilder); createEncoding(root.right, "1", stringbuilder); return huffmanCodes; } /** * 根据哈夫曼树创建对应的哈夫曼编码表 * * @param node 传入的节点 * @param code 路径:左字节为0,右子节点为1 * @param stringBuilder 用于拼接路径 */ private static void createEncoding(Node node, String code, StringBuilder stringBuilder) { StringBuilder builder = new StringBuilder(stringBuilder); builder.append(code); if (node != null) { // 判断是不是应该编码的 if (node.data == null) { // 左右递归 createEncoding(node.left, "0", builder); createEncoding(node.right, "1", builder); } else { // 如果是 huffmanCodes.put(node.data, builder.toString()); } } }
将需要压缩的字节数组根据编码表压缩
/** * 将字节数组根据编码表压缩 * * @param bytes 被转换的字节数组 * @return 压缩后的数组 */ private static byte[] zip(byte[] bytes, HashMap<Byte, String> huffmanCodes) { StringBuilder builder = new StringBuilder(); // 遍历bytes for (byte b : bytes) { // 根据编码表拿到对应的编码 String code = huffmanCodes.get(b); stringbuilder.append(code); } // 编码后再次压缩,转成byte数组 // 先得到byte[]的长度 int length = stringbuilder.length(); int len; // 或者写成len = (length + 7) / 8; if (length % 8 == 0) { len = length / 8; } else { len = length / 8 + 1; } byte[] result = new byte[len]; // 每8位对应 int index = 0; for (int i = 0; i < length; i += 8) { String str; if (i + 8 < length) { // 够八位 str = stringbuilder.substring(i, i + 8); } else { // 最后不够八位 str = stringbuilder.substring(i); } // 转成一个byte存入 result[index++] = (byte) Integer.parseInt(str); } return result; }
把所有方法调用封装起来,方便调用
/** * 封装,方便调用 * @param str 传入的字符串 * @return 经过哈夫曼编码后的字节数组 */ private static byte[] huffmanZip(String str) { byte[] bytes = str.getBytes(); // 转成Node集合 List<Node> nodes = getNodes(bytes); // 创建哈夫曼树 Node huffmanTree = createHuffmanTree(nodes); // 创建编码表 HashMap<Byte, String> encoding = createEncoding(huffmanTree); // 压缩并返回 return zip(bytes, huffmanCodes); }
测试方法
public static void main(String[] args) { // 需要被转换的字符串 String text = "i like like like java do you like a java"; byte[] bytes = huffmanZip(text); System.out.println(Arrays.toString(bytes)); }