通过 LibreOffice 获取文件文本信息,以及转换 pdf 添加水印

导入jar包

需要对应的maven包

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
<!--转换工具-->
<!-- 文件转换,通过OpenOffice或LibreOffice来转换 -->
<!--libreoffice文档在线预览-->
<dependency>
<groupId>org.jodconverter</groupId>
<artifactId>jodconverter-core</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.jodconverter</groupId>
<artifactId>jodconverter-local</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.jodconverter</groupId>
<artifactId>jodconverter-spring-boot-starter</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.libreoffice</groupId>
<artifactId>ridl</artifactId>
<version>6.4.3</version>
</dependency>
<!-- slf4j 日志门面 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.32</version>
</dependency>
<!-- slf4j 内置的简单实现 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.32</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>RELEASE</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13</version>
</dependency>
<!-- itext7html转pdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>3.0.2</version>
</dependency>
<!-- 中文字体支持 -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>font-asian</artifactId>
<version>7.1.13</version>
</dependency>

application.yml 配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
server:
port: 8026
tomcat:
max-http-form-post-size: 100MB
max-http-post-size: 100MB

jodconverter:
local:
enabled: true
# office-home: /usr/lib64/libreoffice
# libreoffice 应用地址目录
office-home: D:/LibreOffice
port-numbers: 8100
max-tasks-per-process: 100
spring:
application:
name: office
servlet:
multipart:
enabled: true
max-file-size: 100MB
max-request-size: 100MB

LibreOfficeUtil 类 用于 office 转换 pdf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
@Slf4j
public class LibreOfficeUtil {

private static DocumentConverter documentConverter;

private static void init() {
documentConverter = SpringUtil.getBean(DocumentConverter.class);
}

public static ByteArrayOutputStream convertToPdf(InputStream inputStream) {
init();
try {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
log.info(">>> 待转换的文档类型:{}", inputStream);
log.info(">>> 转换的目标文档类型:{}", "PDF");
documentConverter.convert(inputStream).as(DefaultDocumentFormatRegistry.PDF).to(byteArrayOutputStream).as(DefaultDocumentFormatRegistry.PDF).execute();
return byteArrayOutputStream;
} catch (OfficeException e) {
e.printStackTrace();
}
log.info(">>> 文件转换结束");
return null;
}

}

PdfUtil 类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
public class PdfUtil {

// inputstream 原文件 outputstream 输出文件 waterMark 斜水印内容 watermarkcorporation 中心水印内容
public static void pdfWaterMark(InputStream inputStream, OutputStream outputStream, String waterMark, String watermarkcorporation){
try{
PdfReader pdfReader = new PdfReader(inputStream);
PdfStamper stamper = new PdfStamper(pdfReader, outputStream);
int total = pdfReader.getNumberOfPages();
PdfContentByte content = null;
byte[] fontBytes = IOUtils.toByteArray(new ClassPathResource("font/ChillKai.ttf").getInputStream());
PdfGState gs = new PdfGState();
List<HashMap<String, Object>> outlines = new ArrayList<HashMap<String, Object>>();
stamper.setOutlines(outlines);
stamper.setViewerPreferences(0);
for (int i = 1; i <= total; i++) {
content = stamper.getOverContent(i);
List<HashMap<String, Object>> bookmark = SimpleBookmark.getBookmark(pdfReader);
gs.setFillOpacity(0.3f);
content.setGState(gs);
content.beginText();
content.setColorFill(new BaseColor(128, 128, 128));
Rectangle pageSize = pdfReader.getPageSize(i);
float width = pageSize.getWidth();
float height = pageSize.getHeight();
content.setFontAndSize(BaseFont.createFont("ChillKai.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED, true, fontBytes, null), 80F);
content.showTextAligned(Element.ALIGN_CENTER, watermarkcorporation, width/2, height/2, 50F);
float x = 0;
float y = 0;
content.setFontAndSize(BaseFont.createFont("ChillKai.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED, true, fontBytes, null), 30F);
if(!StringUtils.isEmpty(waterMark)){
while (x < width && y < height){
x += 180;
if(x >= width){
x = 0;
y += 280;
}
content.showTextAligned(Element.ALIGN_CENTER, waterMark, x, y, 38F);
}
}
content.endText();
}
stamper.close();
pdfReader.close();
}catch (Exception e){
e.printStackTrace();
throw new RuntimeException("添加水印失败");
}finally {
System.out.println("--------------添加水印完成--------------------");
}
}
}

SpringUtil 类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
@Component
public class SpringUtil implements ApplicationContextAware {

private static ApplicationContext applicationContext = null;

@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
if(SpringUtil.applicationContext == null){
SpringUtil.applicationContext = applicationContext;
}
}

//获取applicationContext
public static ApplicationContext getApplicationContext() {
return applicationContext;
}

//通过name获取 Bean.
public static Object getBean(String name){
return getApplicationContext().getBean(name);
}

//通过class获取Bean.
public static <T> T getBean(Class<T> clazz){
return getApplicationContext().getBean(clazz);
}

//通过name,以及Clazz返回指定的Bean
public static <T> T getBean(String name,Class<T> clazz){
return getApplicationContext().getBean(name, clazz);
}

}

HtmlToPdfUtils 类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.kernel.events.PdfDocumentEvent;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.font.FontProvider;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.example.handler.WaterMarkEventHandler;
import org.springframework.core.io.ClassPathResource;

import java.io.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Itext7转换工具类
*/
@Slf4j
public class HtmlToPdfUtils {

/**
* html转pdf
*
* @param inputStream 输入流
* @param waterMark 水印
* @param fontPath 字体路径,ttc后缀的字体需要添加<b>,0<b/>
* @param outputStream 输出流
* @date : 2022/11/15 14:07
*/
public static void convertToPdf(InputStream inputStream, OutputStream outputStream) throws IOException {

PdfWriter pdfWriter = new PdfWriter(outputStream);
PdfDocument pdfDocument = new PdfDocument(pdfWriter);
//设置为A4大小
pdfDocument.setDefaultPageSize(PageSize.A4);
//添加水印
// pdfDocument.addEventHandler(PdfDocumentEvent.END_PAGE, new WaterMarkEventHandler(waterMark));

//添加中文字体支持
ConverterProperties properties = new ConverterProperties();
FontProvider fontProvider = new FontProvider();

// 设置字体
/*PdfFont sysFont = PdfFontFactory.createFont("STSongStd-Light", "UniGB-UCS2-H", false);
fontProvider.addFont(sysFont.getFontProgram(), "UniGB-UCS2-H");*/

//添加自定义字体
byte[] fontBytes = IOUtils.toByteArray(new ClassPathResource("font/ChillKai.ttf").getInputStream());
PdfFont microsoft = PdfFontFactory.createFont(fontBytes, PdfEncodings.IDENTITY_H, false);
fontProvider.addFont(microsoft.getFontProgram(), PdfEncodings.IDENTITY_H);

properties.setFontProvider(fontProvider);
// 读取Html文件流,查找出当中的 或出现类似的符号空格字符
inputStream = readInputStrem(inputStream);
if (inputStream != null) {
// 生成pdf文档
HtmlConverter.convertToPdf(inputStream, pdfDocument, properties);
pdfWriter.close();
pdfDocument.close();
return;
} else {
log.error("转换失败!");
}
}

/**
* 读取HTML 流文件,并查询当中的 或类似符号直接替换为空格
*
* @param inputStream
* @return
*/
public static InputStream readInputStrem(InputStream inputStream) {
// 定义一些特殊字符的正则表达式 如:
String regEx_special = "\\&[a-zA-Z]{1,10};";
try {
//<1>创建字节数组输出流,用来输出读取到的内容
ByteArrayOutputStream baos = new ByteArrayOutputStream();
//<2>创建缓存大小
byte[] buffer = new byte[1024]; // 1KB
//每次读取到内容的长度
int len = -1;
//<3>开始读取输入流中的内容
while ((len = inputStream.read(buffer)) != -1) { //当等于-1说明没有数据可以读取了
baos.write(buffer, 0, len); //把读取到的内容写到输出流中
}
//<4> 把字节数组转换为字符串
String content = baos.toString();
//<5>关闭输入流和输出流
// inputStream.close();
baos.close();
// log.info("读取的内容:{}", content);
// 判断HTML内容是否具有HTML的特殊字符标记
Pattern compile = Pattern.compile(regEx_special, Pattern.CASE_INSENSITIVE);
Matcher matcher = compile.matcher(content);
String replaceAll = matcher.replaceAll("");
// log.info("替换后的内容:{}", replaceAll);
// 将字符串转化为输入流返回
InputStream stringStream = getStringStream(replaceAll);
//<6>返回结果
return stringStream;
} catch (Exception e) {
e.printStackTrace();
log.error("错误信息:{}", e.getMessage());
return null;
}
}

/**
* 将一个字符串转化为输入流
* @param sInputString 字符串
* @return
*/
public static InputStream getStringStream(String sInputString) {
if (sInputString != null && !sInputString.trim().equals("")) {
try {
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(sInputString.getBytes());
return byteArrayInputStream;
} catch (Exception e) {
e.printStackTrace();
}
}
return null;
}

}