一个大文件,包含很多行,每一行都是int类型的数据,按照从小到大的顺序进行排序
package com.example.test; import java.io.*; import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; import java.util.Random; /** * 大数据排序 合并 */ public class BigFileSort { /** * 大数据文件路径 */ private static final String SOURCE_FILE_PATH = "/Users/enjoy/Documents/test"; /** * 中间临时小文件的路径 */ private static final String TEMP_FILE_PATH = "/Users/enjoy/Documents/test/temp"; /** * 大数据文件名称 */ private static final String SOURCE_FILE_NAME = "data"; /** * 生成的目标文件名 */ private static final String SORTED_FILE_NAME = "sorted"; /** * 临时小文件前缀 */ private static final String TEMP_FILE_NAME_PREFIX = "temp-"; /** * 文件后缀 */ private static final String FILE_SUFFIX = ".txt"; /** * 生成源文件 行数 */ private static final int SOURCE_DATA_COUNT = 1000000; /** * 临时文件数量 */ private static final int TEMP_FILE_COUNT = 10; public static void main(String[] args) throws IOException { long startNumber = System.currentTimeMillis(); String sourceFileName = SOURCE_FILE_PATH + "/" + SOURCE_FILE_NAME + FILE_SUFFIX; // 生成测试数据 mockBigDataFile(sourceFileName, SOURCE_DATA_COUNT); System.out.println("存储完毕"); // 将大数据文件分割到若干个小文件中 splitBigFile(SOURCE_FILE_PATH, TEMP_FILE_PATH, TEMP_FILE_COUNT); System.out.println("文件切割完毕!"); // 把每个文件的数据进行排序 sortTempFile(TEMP_FILE_PATH, TEMP_FILE_COUNT); System.out.println("每个子文件排序完毕!"); // 排序后的多个文件数据进行整合 mergeTempSortedFile(SOURCE_FILE_PATH, TEMP_FILE_PATH, TEMP_FILE_COUNT); System.out.println("整合完毕"); long stopNumber = System.currentTimeMillis(); System.out.println("耗时" + (stopNumber - startNumber) + "毫秒"); } public static void mockBigDataFile(String fileName, int count) throws IOException { makeSureFileExists(SOURCE_FILE_PATH, fileName); FileWriter fs = new FileWriter(fileName); BufferedWriter fw = new BufferedWriter(fs); for (int i = 0; i < count; i++) { fw.write(new Random().nextInt(SOURCE_DATA_COUNT) + "\r\n"); } fw.close(); fs.close(); } public static void makeSureFileExists(String filePath, String fileName) throws IOException { File path = new File(filePath); if (!path.exists()) { path.mkdirs(); } File file = new File(fileName); if (!file.exists()) { file.createNewFile(); } } // 将大数据文件切分到多个小文件中 public static void splitBigFile(String sourceFilePath, String tempFilePath, int fileCount) throws IOException { FileReader fr = new FileReader(sourceFilePath + "/" + SOURCE_FILE_NAME + FILE_SUFFIX); BufferedReader br = new BufferedReader(fr); // 读取获取整行数据 LinkedList<FileWriter> sourceFWList = new LinkedList<>(); //初始化文件流对象集合 LinkedList<BufferedWriter> sourceBWList = new LinkedList<>(); for (int j = 1; j <= fileCount; j++) { String fileName = tempFilePath + "/" + TEMP_FILE_NAME_PREFIX + j + FILE_SUFFIX; makeSureFileExists(tempFilePath, fileName); //声明对象 FileWriter sourceFW = new FileWriter(fileName, false); BufferedWriter sourceBW = new BufferedWriter(sourceFW); //将对象装入集合 sourceFWList.add(sourceFW); sourceBWList.add(sourceBW); } int i = 1; while (br.ready()) { int count = 1; // 从第一行开始写 for (BufferedWriter type : sourceBWList) { if (i == count) { type.write(br.readLine() + "\r\n"); break; } // 第一行写完,写第二行 count++; } // 一个文件写完一行之后,切换下一个文件 if (i >= fileCount) { i = 1; } else { i++; } } br.close(); fr.close(); for (BufferedWriter object : sourceBWList) { object.close(); } for (FileWriter object : sourceFWList) { object.close(); } } // 把每个小文件中的数据进行排序 public static void sortTempFile(String filePath, int fileCount) { LinkedList<Integer> nums; for (int i = 1; i <= fileCount; i++) { nums = new LinkedList<>(); String path = filePath + "/" + TEMP_FILE_NAME_PREFIX + i + FILE_SUFFIX; try (FileReader fr = new FileReader(path); BufferedReader br = new BufferedReader(fr)) { while (br.ready()) { // 将读取的单个数据加入到集合里面 nums.add(Integer.valueOf(br.readLine())); } // 对集合进行排序 Collections.sort(nums); // 将排序好的数据写入源文件 sortedToFile(nums, path); } catch (NumberFormatException | IOException e) { e.printStackTrace(); } } } // 对每个文件数据进行排序,写入文件 public static void sortedToFile(LinkedList<Integer> list, String path) { try (FileWriter fs = new FileWriter(path); BufferedWriter fw = new BufferedWriter(fs)) { for (Integer str : list) { fw.write(str + "\r\n"); } } catch (IOException e) { e.printStackTrace(); } } // 合并排序后的文件 public static void mergeTempSortedFile(String filepath, String splitFilePath, int fileCount) throws IOException { LinkedList<ReadNode> readOneLineList = new LinkedList<>(); int hasNoDataCount = 0; FileWriter sortedFW = new FileWriter(filepath + "/" + SORTED_FILE_NAME + FILE_SUFFIX, false); //创建文件流,以便整合的数据写入 BufferedWriter sortedBW = new BufferedWriter(sortedFW); LinkedList<BufferedReader> tempFileBR = new LinkedList<>(); LinkedList<FileReader> tempFileFR = new LinkedList<>(); for (int j = 1; j <= fileCount; j++) { FileReader fr = new FileReader(splitFilePath + "/" + TEMP_FILE_NAME_PREFIX + j + FILE_SUFFIX); BufferedReader br = new BufferedReader(fr); tempFileFR.add(fr); tempFileBR.add(br); } for (BufferedReader br : tempFileBR) { if (br.ready()) { readOneLineList.add(new ReadNode(Integer.valueOf(br.readLine()), br)); continue; } if (!br.ready()) { hasNoDataCount++; } } readOneLineList.sort(Comparator.comparingInt(o -> o.value)); for (; ; ) { if (hasNoDataCount == fileCount) { break; } ReadNode rn = readOneLineList.get(0); sortedBW.write(rn.value + "\r\n"); if (!rn.getBr().ready()) { hasNoDataCount++; readOneLineList.remove(0); continue; } if (rn.getBr().ready()) { rn.setValue(Integer.valueOf(rn.getBr().readLine())); readOneLineList.sort(Comparator.comparingInt(o -> o.value)); } } sortedBW.close(); sortedFW.close(); for (BufferedReader object2 : tempFileBR) { object2.close(); } for (FileReader object : tempFileFR) { object.close(); } } } class ReadNode { Integer value; BufferedReader br; public ReadNode(Integer value, BufferedReader br) { this.value = value; this.br = br; } public void setValue(Integer value) { this.value = value; } public BufferedReader getBr() { return br; } }