java PPT 转成PDF,中文乱码解决

jopen 8年前

        ppt转成pdf,原理是ppt转成图片,再用图片生产pdf,过程有个问题,不管是ppt还是pptx,都遇到中文乱码,编程方框的问题,其中ppt后缀网上随便找就有解决方案,就是设置字体为统一字体,pptx如果页面是一种中文字体不会有问题,如果一个页面有微软雅黑和宋体,就会导致部分中文方框,怀疑是poi处理的时候,只读取第一种字体,所以导致多个中文字体乱码。

        百度和谷歌都找了很久,有看到说apache官网有人说是bug(https://bz.apache.org/bugzilla/show_bug.cgi?id=54880),但他们回复说是字体问题,这个问题其实我觉得poi可能可以自己做,读取原来字体设置成当前字体,不过性能应该会有很多消耗,反正我估计很多人跟我一样花费大量时间找解决方案,网上几乎没有现成的方案。自己也是一步步尝试,最终找到解决办法,ppt格式的就不说了网上找得到,pptx后缀的网上我是没找到。

    问题前的pptx转成图片:

解决后的pptx转成图片:

解决方法:

图取每个shape,将文字转成统一的字体,网上找到的那段代码不可行,我自己改的方案如下:

             for( XSLFShape shape : slide[i].getShapes() ){                      if ( shape instanceof XSLFTextShape ){                          XSLFTextShape txtshape = (XSLFTextShape)shape ;                          System.out.println("txtshape" + (i+1) + ":"  + txtshape.getShapeName());                          System.out.println("text:" +txtshape.getText());                                                    for ( XSLFTextParagraph textPara : txtshape.getTextParagraphs() ){                              List<XSLFTextRun> textRunList = textPara.getTextRuns();                              for(XSLFTextRun textRun: textRunList) {                                  textRun.setFontFamily("宋体");                              }                          }                      }                  }

完整代码如下(除了以上自己的解决方案,大部分是网上摘抄):

public static void convertPPTToPDF(String sourcepath, String destinationPath, String fileType) throws Exception {          FileInputStream inputStream = new FileInputStream(sourcepath);          double zoom = 2;          AffineTransform at = new AffineTransform();          at.setToScale(zoom, zoom);          Document pdfDocument = new Document();          PdfWriter pdfWriter = PdfWriter.getInstance(pdfDocument, new FileOutputStream(destinationPath));          PdfPTable table = new PdfPTable(1);          pdfWriter.open();          pdfDocument.open();          Dimension pgsize = null;          Image slideImage = null;          BufferedImage img = null;          if (fileType.equalsIgnoreCase(".ppt")) {              SlideShow ppt = new SlideShow(inputStream);              inputStream.close();              pgsize = ppt.getPageSize();              Slide slide[] = ppt.getSlides();              pdfDocument.setPageSize(new Rectangle((float) pgsize.getWidth(), (float) pgsize.getHeight()));              pdfWriter.open();              pdfDocument.open();              for (int i = 0; i < slide.length; i++) {                                    TextRun[] truns = slide[i].getTextRuns();                        for ( int k=0;k<truns.length;k++){                           RichTextRun[] rtruns = truns[k].getRichTextRuns();                          for(int l=0;l<rtruns.length;l++){        //                       int index = rtruns[l].getFontIndex();        //                        String name = rtruns[l].getFontName();                                          rtruns[l].setFontIndex(1);                                rtruns[l].setFontName("宋体");                                               }                        }                                                            img = new BufferedImage((int) Math.ceil(pgsize.width * zoom), (int) Math.ceil(pgsize.height * zoom), BufferedImage.TYPE_INT_RGB);                  Graphics2D graphics = img.createGraphics();                  graphics.setTransform(at);                    graphics.setPaint(Color.white);                  graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));                  slide[i].draw(graphics);                  graphics.getPaint();                  slideImage = Image.getInstance(img, null);                  table.addCell(new PdfPCell(slideImage, true));              }          }          if (fileType.equalsIgnoreCase(".pptx")) {              XMLSlideShow ppt = new XMLSlideShow(inputStream);              pgsize = ppt.getPageSize();              XSLFSlide slide[] = ppt.getSlides();              pdfDocument.setPageSize(new Rectangle((float) pgsize.getWidth(), (float) pgsize.getHeight()));              pdfWriter.open();              pdfDocument.open();                                          for (int i = 0; i < slide.length; i++) {                  for( XSLFShape shape : slide[i].getShapes() ){                      if ( shape instanceof XSLFTextShape ){                          XSLFTextShape txtshape = (XSLFTextShape)shape ;                         // System.out.println("txtshape" + (i+1) + ":"  + txtshape.getShapeName());                          //System.out.println("text:" +txtshape.getText());                                                    for ( XSLFTextParagraph textPara : txtshape.getTextParagraphs() ){                              List<XSLFTextRun> textRunList = textPara.getTextRuns();                              for(XSLFTextRun textRun: textRunList) {                                  textRun.setFontFamily("宋体");                              }                          }                      }                  }                  img = new BufferedImage((int) Math.ceil(pgsize.width * zoom), (int) Math.ceil(pgsize.height * zoom), BufferedImage.TYPE_INT_RGB);                  Graphics2D graphics = img.createGraphics();                  graphics.setTransform(at);                  graphics.setPaint(Color.white);                  graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));                  slide[i].draw(graphics);                                      //                FileOutputStream out = new FileOutputStream("src/main/resources/test"+i+".jpg");    //                javax.imageio.ImageIO.write(img, "jpg", out);                                                                        graphics.getPaint();                  slideImage = Image.getInstance(img, null);                  table.addCell(new PdfPCell(slideImage, true));              }          }          pdfDocument.add(table);          pdfDocument.close();          pdfWriter.close();          System.out.println("Powerpoint file converted to PDF successfully");      }

maven配置:

<dependency>        <groupId>org.apache.poi</groupId>          <artifactId>poi</artifactId>      <!--  <version>3.13</version> -->       <version>3.9</version>      </dependency>      <dependency>          <groupId>org.apache.poi</groupId>          <artifactId>poi-ooxml</artifactId>         <!--  <version>3.10-FINAL</version> -->         <version>3.9</version>      </dependency>            <dependency>            <groupId>com.itextpdf</groupId>            <artifactId>itextpdf</artifactId>            <version>5.5.7</version>      </dependency>        <dependency>        <groupId>com.itextpdf.tool</groupId>        <artifactId>xmlworker</artifactId>        <version>5.5.7</version>      </dependency>      <dependency>        <groupId>org.apache.poi</groupId>        <artifactId>poi-scratchpad</artifactId>        <!--  <version>3.12</version> -->         <version>3.9</version>      </dependency>

参考资料:

http://www.tutorialspoint.com/apache_poi_ppt/apache_poi_ppt_quick_guide.htm