归并排序算法介绍,请参照Wikipeida
zh.wikipedia.org/wiki/%E5%BD%92%E5%B9%B6%E6%8E%92%E5%BA%8F
基本思想:
大文件分割成行数相等的两个子文件,递归(归并排序)两个子文件,直到递归到分割成的子文件低于限制行数
低于限制行数的子文件直接排序
两个排序好的子文件归并到父文件
直到最后所有排序好的父文件归并到输入的大文件并返回
之前看了网上很多示例代码,写的很不简洁, 引入了过多的临时变量i, j, k等等, 导致程序基本没法看,
只好自己写了一个,没有很关心执行效率, 只求够用,以后有机会再优化一下吧。
Performance:
输入999999行 on
Intel(R) Core(TM)2 Duo CPU @ 2.66GHz 硬盘 5400转
cost: 8951 MILLISECONDS
cost: 8 MICROSECONDS per line
JDK要求
Java 8
package com.java.sort.merge; import com.google.common.base.Charsets; import com.google.common.base.Stopwatch; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import com.google.common.collect.PeekingIterator; import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.filefilter.AndFileFilter; import org.apache.commons.io.filefilter.PrefixFileFilter; import org.apache.commons.io.filefilter.SuffixFileFilter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.io.*; import java.util.Iterator; import java.util.TreeSet; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; public class FileMergeSort { private static final Logger log = LogManager.getLogger(); private static final long total = 999999L; private static final int limit = 99999; private static void cleanTempFiles() { FilenameFilter filter = new AndFileFilter(ImmutableList.of(new PrefixFileFilter("sort"), new SuffixFileFilter(".part"))); ImmutableList.copyOf(FileUtils.getTempDirectory().listFiles(filter)).forEach(File::delete); } private static long lineNumber(File input) throws IOException { try (Stream<String> stream = Files.newReader(input, Charsets.UTF_8).lines()) { return stream.count(); } } private static File split(File input, long from, long to) throws IOException { File part = File.createTempFile("sort", ".part"); long lineNumber = 0L; String line = null; try (Stream<String> stream = Files.newReader(input, Charsets.UTF_8).lines(); Writer writer = Files.newWriter(part, Charsets.UTF_8)) { Iterator<String> lineIterator = stream.iterator(); while (lineIterator.hasNext() && lineNumber <= to) { line = lineIterator.next(); if (lineNumber >= from) { writer.write(line.concat(IOUtils.LINE_SEPARATOR)); } lineNumber++; } } return part; } private static File merge(File source, File left, File right) throws IOException { try (Stream<String> leftStream = Files.newReader(left, Charsets.UTF_8).lines(); Stream<String> rightStream = Files.newReader(right, Charsets.UTF_8).lines(); Writer writer = Files.newWriter(source, Charsets.UTF_8)) { PeekingIterator<String> leftPeekingIterator = Iterators.peekingIterator(leftStream.iterator()); PeekingIterator<String> rightPeekingIterator = Iterators.peekingIterator(rightStream.iterator()); String leftLine, rightLine; writer.write(""); while (leftPeekingIterator.hasNext() && rightPeekingIterator.hasNext()) { leftLine = leftPeekingIterator.peek(); rightLine = rightPeekingIterator.peek(); if (leftLine.compareTo(rightLine) < 0) { writer.append(leftLine.concat(IOUtils.LINE_SEPARATOR)); leftPeekingIterator.next(); } else { writer.append(rightLine.concat(IOUtils.LINE_SEPARATOR)); rightPeekingIterator.next(); } } while (leftPeekingIterator.hasNext()) { writer.append(leftPeekingIterator.next().concat(IOUtils.LINE_SEPARATOR)); } while (rightPeekingIterator.hasNext()) { writer.append(rightPeekingIterator.next().concat(IOUtils.LINE_SEPARATOR)); } } return source; } private static File inMemorySort(File input) throws IOException { TreeSet<String> lines = new TreeSet<>(String::compareTo); try (BufferedReader reader = Files.newReader(input, Charsets.UTF_8);) { String line; while ((line = reader.readLine()) != null) { lines.add(line); } } FileUtils.writeLines(input, lines); return input; } public static File mergeSort(File input) throws IOException { long total = lineNumber(input); if (total <= limit) { return inMemorySort(input); } long half = total / 2L; File left = mergeSort(split(input, 0, half)); File right = mergeSort(split(input, half + 1, total)); return merge(input, left, right); } @BeforeClass public static void init() throws IOException { cleanTempFiles(); int minLength = String.valueOf(total).length(); try (Writer writer = Files.newWriter(new File("z:\\long.txt"), Charsets.UTF_8)) { writer.write(""); for (long i = total; i > 0L; i--) { writer.append(Strings.padStart(String.valueOf(i), minLength, '0').concat(IOUtils.LINE_SEPARATOR)); } } } @AfterClass public static void clean() { cleanTempFiles(); } @Test public void testSort() throws IOException { Stopwatch watch = Stopwatch.createStarted(); File sorted = mergeSort(new File("z:\\long.txt")); watch.stop(); log.info(String.format("cost: %s MILLISECONDS", watch.elapsed(TimeUnit.MILLISECONDS))); log.info(String.format("cost: %s MICROSECONDS per line", watch.elapsed(TimeUnit.MICROSECONDS) / total)); } }
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.java.app</groupId> <artifactId>sample</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>18.0</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-api</artifactId> <version>2.1</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-core</artifactId> <version>2.1</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-jcl</artifactId> <version>2.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> </dependencies> </project>
相关推荐
根据算法导论实现的归并排序算法
C语言二路归并排序算法, 写了个二路归并的归并排序小代码,直接贴上来
C++实现希尔、快速、堆排序、归并排序算法,一些中文注释可能成乱码了,但是不影响代码执行。
实现以下常用的内部排序算法并进行性能比较:"直接插入排序"," 折半插入排序"," 2—路插入排序"," 表插入排序"," 希尔排序"," 起泡排序"," 快速排序"," 简单选择排序"," 树形选择排序"," 堆排序"," 归并排序"," 链式...
本人自己写的一些排序算法,这是系列1归并排序算法实现,
C语言算法之归并排序C语言算法之归并排序C语言算法之归并排序C语言算法之归并排序
随机生成一千个数,并进行排序。四中排序算法,冒泡,简单选择,归并排序,堆排序。
7大排序算法(快速排序,冒泡排序,选择排序,归并排序,插入排序,希尔排序,堆排序)实现源码
易语言归并排序算法源码,归并排序算法,归并排序,子程序_有序数组合并
快速排序、归并排序、基数排序等排序算法比较,比较时间性能,采用C++语言实现。。。
随机产生10000个浮点数,保存到a.txt文件中,再读取到内存中并分别用简单选择排序、冒泡排序、快速排序、希尔排序、归并排序、堆排序算法进行排序,显示排序过的数列的第1、10、100、1000、10000的具体数字和每个...
自己写的三个排序算法的比较。快速排序、归并排序、简单排序 对三个排序算法所消耗时间进行统计,比较时间效率 程序是在Linux下用C写的,vc下并未做测试。
算法设计,给出归并排序的C++实现代码,并利用给随机数方式求运行时间
归并排序算法C语言版
根号n段归并排序算法时间复杂度分析过程: 1.合并 根号n向下取整 段子数组使用的是自底向上两两归并的策略 2.根号n段归并排序算法时间复杂度的数学推导
排序算法java版,速度排行:冒泡排序、简单选择排序、直接插入排序、折半插入排序、希尔排序、堆排序、归并排序、快速排序.mht
本文件是7种常用排序算法的实现(C++),包括冒泡排序、选择排序、直接插入排序、希尔排序、堆排序、归并排序以及快速排序。代码详细有注释且有测试用例。
完整的实现了归并排序的算法,使用C语言实现,相信看过本程序之后,会对归并排序了如指掌
MATLAB实现《算法设计与分析》中的插入排序、二分归并排序、归并排序实验,其中包括.m文件和实验报告,安徽大学本科课程。