本文是基于pdf文档转png图片,然后进行图片拼接,拼接后的图片转为base64字符串,然后拼接html文档写入html文件实现pdf文档转html文档。
引入maven依赖
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox --> <dependency> <groupid>org.apache.pdfbox</groupid> <artifactid>pdfbox</artifactid> <version>2.0.12</version> </dependency>
工具实现类
package com.frame.utils; import org.apache.pdfbox.pdmodel.pddocument; import org.apache.pdfbox.rendering.pdfrenderer; import org.slf4j.logger; import org.slf4j.loggerfactory; import sun.misc.base64decoder; import sun.misc.base64encoder; import javax.imageio.imageio; import java.awt.*; import java.awt.image.bufferedimage; import java.io.*; /** * pdf文档转html文档 * @author lxw * @date 2020/6/17 16:45 */ public class pdfconverthtmlutil { /** * 日志对象 */ private static logger logger = loggerfactory.getlogger(pdfconverthtmlutil.class); /** * pdf文档流转png * @param pdffileinputstream * @return bufferedimage */ public static bufferedimage pdfstreamtopng(inputstream pdffileinputstream){ pddocument doc = null; pdfrenderer renderer = null; try { doc = pddocument.load(pdffileinputstream); renderer = new pdfrenderer(doc); int pagecount = doc.getnumberofpages(); bufferedimage image = null; for (int i = 0; i < pagecount; i++) { if (image != null) { image = combinebufferedimages(image, renderer.renderimagewithdpi(i, 144)); } if (i == 0) { image = renderer.renderimagewithdpi(i, 144); // windows native dpi } // bufferedimage srcimage = resize(image, 240, 240);//产生缩略图 } return combinebufferedimages(image); } catch (ioexception e) { e.printstacktrace(); }finally { try { if(doc != null){doc.close();} } catch (ioexception e) { e.printstacktrace(); } } return null; } /** *bufferedimage拼接处理,添加分割线 * @param images * @return bufferedimage */ public static bufferedimage combinebufferedimages(bufferedimage... images) { int height = 0; int width = 0; for (bufferedimage image : images) { //height += math.max(height, image.getheight()); height += image.getheight(); width = image.getwidth(); } bufferedimage combo = new bufferedimage(width, height, bufferedimage.type_int_argb); graphics2d g2 = combo.creategraphics(); int x = 0; int y = 0; for (bufferedimage image : images) { //int y = (height - image.getheight()) / 2; g2.setstroke(new basicstroke(2.0f));// 线条粗细 g2.setcolor(new color(193, 193, 193));// 线条颜色 g2.drawline(x, y, width, y);// 线条起点及终点位置 g2.drawimage(image, x, y, null); //x += image.getwidth(); y += image.getheight(); } return combo; } /** * 通过base64创建html文件并输出html文件 * @param base64 * @param htmlpath html保存路径 */ public static void createhtmlbybase64(string base64,string htmlpath) { stringbuilder stringhtml = new stringbuilder(); printstream printstream = null; try { // 打开文件 printstream = new printstream(new fileoutputstream(htmlpath)); } catch (filenotfoundexception e) { e.printstacktrace(); } // 输入html文件内容 stringhtml.append("<html><head>"); stringhtml.append("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">"); stringhtml.append("<title></title>"); stringhtml.append("</head>"); stringhtml.append( "<body style=\"\r\n" + " text-align: center;\r\n" + " background-color: #c1c1c1;\r\n" + "\">"); stringhtml.append("<img src=\"data:image/png;base64," + base64 + "\" />"); stringhtml.append("<a name=\"head\" style=\"position:absolute;top:0px;\"></a>"); //添加锚点用于返回首页 stringhtml.append("<a style=\"position:fixed;bottom:10px;right:10px\" href=\"#head\">回到首页</a>"); stringhtml.append("</body></html>"); try { // 将html文件内容写入文件中 printstream.println(stringhtml.tostring()); } catch (exception e) { e.printstacktrace(); }finally { if(printstream != null){printstream.close();} } } /** * bufferedimage 转为 base64编码 * @param bufferedimage * @return */ public static string bufferedimagetobase64(bufferedimage bufferedimage) { bytearrayoutputstream bytearrayoutputstream = new bytearrayoutputstream(); string png_base64 = ""; try { imageio.write(bufferedimage, "png", bytearrayoutputstream);// 写入流中 byte[] bytes = bytearrayoutputstream.tobytearray();// 转换成字节 base64encoder encoder = new base64encoder(); // 转换成base64串 删除 \r\n png_base64 = encoder.encodebuffer(bytes).trim() .replaceall("\n", "") .replaceall("\r", ""); } catch (ioexception e) { e.printstacktrace(); } return png_base64; } }
测试demo
public static void main(string[] args) { file file = new file("f:\1\\files\\mysql查询语句大全集锦(经典珍藏).pdf"); string htmlpath = "f:\1\\files\\mysql查询语句大全集锦(经典珍藏).html"; inputstream inputstream = null; bufferedimage bufferedimage = null; try { inputstream = new fileinputstream(file); bufferedimage = pdfstreamtopng(inputstream); string base64_png = bufferedimagetobase64(bufferedimage); createhtmlbybase64(base64_png,htmlpath); } catch (filenotfoundexception e) { e.printstacktrace(); }finally { try { if(inputstream != null){inputstream.close();} } catch (ioexception e) { e.printstacktrace(); } } }
最终结果 转换后文件
转换后的文件内容
文件预览效果
到此这篇关于java实现pdf转html文档的示例代码的文章就介绍到这了,更多相关java pdf转html 内容请搜索www.887551.com以前的文章或继续浏览下面的相关文章希望大家以后多多支持www.887551.com!
黄山市民网:https://www.huangshanshimin.com/