Java将HTML文档转换成PDF文件

本文使用itext来完成html转pdf的工作 。
一、引入相关环境
Itext-asian-5.2.0.jar
Itextpdf-5.5.13.2.jar
Xmlworker-5.5.13.2.jar
Jsoup-1.10.3.jar xml解析器,用来规范化html文档
为支持中文需要引入相关字体:
GB2312.ttf、simhei.ttf、simsun.ttc
二、相关代码实例及说明
首先,将要转换成PDF的HTML文档规范化,补全缺失的标签.
private static String formatHtml(String html) {
org.jsoup.nodes.Document doc = Jsoup.parse(html);
// 去除过大的宽度
String style = doc.attr("style");
if ((!style.isEmpty()) && style.contains("width")) {
doc.attr("style", "");
}
Elements divs = doc.select("div");
for (org.jsoup.nodes.Element div : divs) {
String divStyle = div.attr("style");
if ((!divStyle.isEmpty()) && divStyle.contains("width")) {
div.attr("style", "");
}
}
//修复表格属性导致的显示问题
Elements tds = doc.select("td");
for (org.jsoup.nodes.Element td : tds) {
String tdStyle = td.attr("style");
if (!tdStyle.isEmpty()) {
td.attr("style", "border:#000000 0.5pt solid;");
}
}
// jsoup生成闭合标签
doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
return doc.html();
}
2、将html文档转换成pdf
public void html2pdf(String html,boolean image) {
【Java将HTML文档转换成PDF文件】String DEST = "d:/tmp/sample.pdf";
String FONT = "D:/tmp/simsun.ttc";
String FONT2 = "D:/tmp/simhei.ttf";
String FONT3 = "D:/tmp/GB2312.ttf";
// step 1
Document document = new Document();
// step 2
PdfWriter writer = null;
try {
writer = PdfWriter.getInstance(document, new FileOutputStream(DEST));
//给每页加背景图案
if(image){
writer.setPageEvent(new BackgroundImage("D:/tmp/Image.png"));
}
// step 3
document.open();
// step 4
XMLWorkerFontProvider fontImp = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
fontImp.register(FONT);
fontImp.register(FONT2);
fontImp.register(FONT3);
XMLWorkerHelper.getInstance().parseXHtml(writer, document,
new ByteArrayInputStream(html.getBytes("UTF-8")), null, Charset.forName("UTF-8"),fontImp);
// step 5
document.close();
} catch (DocumentException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
3、给pdf的每页加上背景图案的算法 。
public class BackgroundImage extends PdfPageEventHelper {
private String path = "";
public BackgroundImage(String path) {
this.path = path;
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
@Override
public void onStartPage(PdfWriter writer, Document document) {
try {
Image image = Image.getInstance(path);
image.setAlignment(Image.UNDERLYING);
image.setAbsolutePosition(0,0);
image.scaleAbsolute(595,842);//缩放到A4纸大小
document.add(image);
} catch (BadElementException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (DocumentException e) {
e.printStackTrace();
}
super.onStartPage(writer,document);
}
}

相关经验推荐