itext读取PDF

13年前
import java.io.File;  import java.io.FileInputStream;  import java.io.IOException;  import org.pdfbox.pdfparser.PDFParser;  import org.pdfbox.pdmodel.PDDocument;  import org.pdfbox.util.PDFTextStripper;  /**   *   * @author 刘毅   * @date 2010-2-24   * @ClassName ReaderForPDF.java   * @Email liu_yi126@163.com   * @param 读取PDF   * @param   */  public class ReaderForPDF {    /**     * 读PDF文件,使用了pdfbox开源项目     * @param fileName     */    public void readPDF(String fileName) {     File file = new File(fileName);     FileInputStream in = null;     try {    in = new FileInputStream(fileName);     //新建一个PDF解析器对象      PDFParser parser = new PDFParser(in);      //对PDF文件进行解析      parser.parse();    //获取解析后得到的PDF文档对象    PDDocument pdfdocument = parser.getPDDocument();      //新建一个PDF文本剥离器    PDFTextStripper stripper = new PDFTextStripper();    //从PDF文档对象中剥离文本      String result = stripper.getText(pdfdocument);      System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:");      System.out.println(result);     } catch (Exception e) {    System.out.println("读取PDF文件"+ file.getAbsolutePath() + "生失败!" + e);      e.printStackTrace();     } finally {      if (in != null){       try {        in.close();       } catch (IOException e1) {       }      }     }    }      public static void main(String[] args) {     ReaderForPDF pdf = new ReaderForPDF();      String fileName = "src/tempPDF.pdf";      try {     pdf.readPDF(fileName);    } catch (Exception e) {     e.printStackTrace();    }     }  }