把本地文件夹下的所有文件上传到hdfs上并合并成一个文件

uttq1257 8年前

来自: http://my.oschina.net/u/914897/blog/616682


需要自己写代码来实现:

/**   *    */  package com.jason.hadoop.example;    import java.io.IOException;  import java.net.URI;    import org.apache.hadoop.conf.Configuration;  import org.apache.hadoop.fs.FSDataInputStream;  import  org.apache.hadoop.fs.FSDataOutputStream;  import org.apache.hadoop.fs.FileStatus;  import org.apache.hadoop.fs.FileSystem;  import org.apache.hadoop.fs.Path;    /**   * @author jason   *   */  public class PutMerge {        public static void main(String[] args) throws IOException {          Configuration conf = new Configuration();           conf.addResource("classpath:/hadoop/core-site.xml");            conf.addResource("classpath:/hadoop/hdfs-site.xml");            conf.addResource("classpath:/hadoop/mapred-site.xml");          FileSystem hdfs = FileSystem.get(URI.create(args[1]), conf);  //          FileSystem hdfs = FileSystem.get(conf);          FileSystem local = FileSystem.getLocal(conf);          Path inputDlir = new Path(args[0]);          Path hdfsFile = new Path(args[2]);          try {              FileStatus[] inputFiles = local.listStatus(inputDlir);              FSDataOutputStream out = hdfs.create(hdfsFile);              for (int i=0; i<inputFiles.length; i++) {                  System.out.println(inputFiles[i].getPath().getName());                  FSDataInputStream in = local.open(inputFiles[i].getPath());                  byte[] buffer = new byte[256];                  int bytesRead = 0;                  while ((bytesRead = in.read(buffer)) > 0) {                      out.write(buffer, 0 , bytesRead);                  }                  in.close();              }              out.close();          } catch (IOException ioe) {              ioe.printStackTrace();          }      }  }

把工程打成jar包后,执行以下命令即可:

hadoop jar hadoopExample-1.0-SNAPSHOT.jar com.jason.hadoop.example.PutMerge /home/jason/hadoop-1.0.1/put_merge hdfs://localhost:9000 /example/put_merge/in/merge_222.txt