word到pdf的奇幻之旅
word文档就像个穿着睡衣在家办公的程序员——舒服但有点随意。而pdf呢?就是穿上西装打上领带,准备去参加董事会的同一人——专业且纹丝不动!
这转变过程好比:
- word文档:“哈!我的字体可以随便换,边距可以随意调,图片还能拖来拖去~”
- pdf:“闭嘴!现在开始我说了算,每个像素都给我站好岗!”
springboot实现这个转换,就像是请了个文档变形金刚,把自由散漫的word驯化成纪律严明的pdf士兵。下面就让我带你见证这场“格式驯化仪式”!
准备阶段:装备你的“变形工具箱”
第一步:maven依赖大采购
<!-- pom.xml 里加入这些法宝 -->
<dependencies>
<!-- springboot标准装备 -->
<dependency>
<groupid>org.springframework.boot</groupid>
<artifactid>spring-boot-starter-web</artifactid>
</dependency>
<!-- apache poi - word文档的“读心术” -->
<dependency>
<groupid>org.apache.poi</groupid>
<artifactid>poi</artifactid>
<version>5.2.3</version>
</dependency>
<dependency>
<groupid>org.apache.poi</groupid>
<artifactid>poi-ooxml</artifactid>
<version>5.2.3</version>
</dependency>
<!-- openpdf - pdf的“打印机” -->
<dependency>
<groupid>com.github.librepdf</groupid>
<artifactid>openpdf</artifactid>
<version>1.3.30</version>
</dependency>
<!-- 文件类型检测 - 避免把图片当word处理 -->
<dependency>
<groupid>org.apache.tika</groupid>
<artifactid>tika-core</artifactid>
<version>2.7.0</version>
</dependency>
</dependencies>
第二步:配置属性文件
# application.yml
word-to-pdf:
upload-dir: "uploads/" # word文档临时停靠站
output-dir: "pdf-output/" # pdf成品仓库
max-file-size: 10mb # 别想用《战争与和平》来考验我
spring:
servlet:
multipart:
max-file-size: 10mb
max-request-size: 10mb
核心代码:变身吧,word君!
1. 文件上传控制器(接待员)
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.multipartfile;
import javax.servlet.http.httpservletresponse;
import java.io.*;
@restcontroller
@requestmapping("/api/doc-transform")
public class wordtopdfcontroller {
@postmapping("/word-to-pdf")
public void convertwordtopdf(
@requestparam("file") multipartfile wordfile,
httpservletresponse response) throws ioexception {
// 1. 检查文件:别想用猫咪图片冒充word文档!
if (!isworddocument(wordfile)) {
response.getwriter().write("喂!这不是word文档,别骗我!");
response.setstatus(httpservletresponse.sc_bad_request);
return;
}
// 2. 临时存放word文件(像安检前的暂存)
file tempwordfile = new file("temp_" + system.currenttimemillis() + ".docx");
wordfile.transferto(tempwordfile);
// 3. 开始变形!
byte[] pdfbytes = wordtopdfconverter.convert(tempwordfile);
// 4. 清理现场(像用完的变形金刚恢复原状)
tempwordfile.delete();
// 5. 把pdf交给用户
response.setcontenttype("application/pdf");
response.setheader("content-disposition",
"attachment; filename=\"" +
wordfile.getoriginalfilename().replace(".docx", ".pdf") + "\"");
response.getoutputstream().write(pdfbytes);
system.out.println("转换成功!又一个word被成功驯化成pdf!");
}
private boolean isworddocument(multipartfile file) {
string filename = file.getoriginalfilename().tolowercase();
return filename.endswith(".docx") || filename.endswith(".doc");
}
}
2. 转换器核心(真正的变形引擎)
import org.apache.poi.xwpf.usermodel.*;
import com.lowagie.text.*;
import com.lowagie.text.pdf.pdfwriter;
import java.io.*;
@component
public class wordtopdfconverter {
public static byte[] convert(file wordfile) throws ioexception {
bytearrayoutputstream pdfoutputstream = new bytearrayoutputstream();
try (fileinputstream fis = new fileinputstream(wordfile)) {
// 1. 打开word文档(像打开潘多拉魔盒)
xwpfdocument document = new xwpfdocument(fis);
// 2. 创建pdf文档(准备新家)
document pdfdocument = new document();
pdfwriter.getinstance(pdfdocument, pdfoutputstream);
pdfdocument.open();
// 3. 逐段搬运内容(像蚂蚁搬家)
system.out.println("开始搬运段落,共" + document.getparagraphs().size() + "段...");
for (xwpfparagraph para : document.getparagraphs()) {
if (para.gettext().trim().isempty()) continue;
// 处理文本样式
font font = new font();
if (para.getstyle() != null) {
switch (para.getstyle()) {
case "heading1":
font = new font(font.helvetica, 18, font.bold);
break;
case "heading2":
font = new font(font.helvetica, 16, font.bold);
break;
default:
font = new font(font.helvetica, 12, font.normal);
}
}
paragraph pdfpara = new paragraph(para.gettext(), font);
pdfdocument.add(pdfpara);
pdfdocument.add(chunk.newline); // 加个换行,喘口气
}
// 4. 处理图片(最难搬家的部分)
system.out.println("开始处理图片,共" + document.getallpictures().size() + "张...");
for (xwpfpicturedata picture : document.getallpictures()) {
try {
byte[] picturedata = picture.getdata();
image image = image.getinstance(picturedata);
image.scaletofit(500, 500); // 给图片上个紧箍咒,别太大
image.setalignment(element.align_center);
pdfdocument.add(image);
pdfdocument.add(chunk.newline);
} catch (exception e) {
system.err.println("图片" + picture.getfilename() + "太调皮,转换失败: " + e.getmessage());
}
}
// 5. 处理表格(excel表示:我也想来凑热闹)
for (xwpftable table : document.gettables()) {
com.lowagie.text.table pdftable =
new com.lowagie.text.table(table.getnumberofrows());
for (xwpftablerow row : table.getrows()) {
for (xwpftablecell cell : row.gettablecells()) {
pdftable.addcell(cell.gettext());
}
}
pdfdocument.add(pdftable);
}
pdfdocument.close();
document.close();
system.out.println("转换完成!生成pdf大小: " +
(pdfoutputstream.size() / 1024) + " kb");
} catch (exception e) {
system.err.println("转换过程出现意外: " + e.getmessage());
throw new ioexception("转换失败,word文档可能被施了魔法", e);
}
return pdfoutputstream.tobytearray();
}
}
3. 异常处理(变形失败的救护车)
@controlleradvice
public class documentconversionexceptionhandler {
@exceptionhandler(ioexception.class)
public responseentity<string> handleioexception(ioexception e) {
return responseentity.status(httpstatus.internal_server_error)
.body("文档转换失败,可能原因:\n" +
"1. word文档被外星人加密了\n" +
"2. 文件太大,服务器举不动了\n" +
"3. 网络连接在打瞌睡\n" +
"错误详情: " + e.getmessage());
}
@exceptionhandler(invalidformatexception.class)
public responseentity<string> handleinvalidformat(exception e) {
return responseentity.badrequest()
.body("喂!你上传的是word文档吗?\n" +
"我猜你上传的是:\n" +
"□ 猫咪图片 \n" +
"□ excel表格 \n" +
"□ 心灵鸡汤文本 \n" +
"请上传正经的.docx或.doc文件!");
}
}
4. 进度监控(变形过程直播)
@component
public class conversionprogressservice {
private map<string, integer> progressmap = new concurrenthashmap<>();
public void startconversion(string fileid) {
progressmap.put(fileid, 0);
system.out.println("开始转换文件: " + fileid);
}
public void updateprogress(string fileid, int percent) {
progressmap.put(fileid, percent);
// 打印进度条(假装很高级)
stringbuilder progressbar = new stringbuilder("[");
for (int i = 0; i < 20; i++) {
progressbar.append(i * 5 < percent ? "█" : "░");
}
progressbar.append("] ").append(percent).append("%");
system.out.println(fileid + " 转换进度: " + progressbar.tostring());
// 说点骚话鼓励一下
if (percent == 50) {
system.out.println("转换过半,坚持住!");
} else if (percent == 90) {
system.out.println("马上完成,准备发射pdf!");
}
}
public void completeconversion(string fileid) {
progressmap.remove(fileid);
system.out.println(fileid + " 转换完成,深藏功与名~");
}
}
前端调用示例(用户操作界面)
<!doctype html>
<html>
<head>
<title>word转pdf变形工坊</title>
<style>
body { font-family: 'comic sans ms', cursive; padding: 20px; }
.container { max-width: 600px; margin: 0 auto; }
.drop-zone {
border: 3px dashed #4caf50;
border-radius: 10px;
padding: 40px;
text-align: center;
background: #f9f9f9;
cursor: pointer;
}
.drop-zone:hover { background: #e8f5e9; }
.convert-btn {
background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
color: white;
border: none;
padding: 15px 30px;
border-radius: 25px;
font-size: 18px;
cursor: pointer;
margin-top: 20px;
}
.progress-bar {
width: 100%;
height: 20px;
background: #ddd;
border-radius: 10px;
margin-top: 20px;
overflow: hidden;
display: none;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #4caf50, #8bc34a);
width: 0%;
transition: width 0.3s;
}
</style>
</head>
<body>
<div class="container">
<h1>word转pdf变形工坊</h1>
<p>把你的word文档扔进来,还你一个乖巧的pdf!</p>
<div class="drop-zone" id="dropzone">
<h2>拖拽word文件到这里</h2>
<p>或者 <label style="color: #2196f3; cursor: pointer;">点击选择文件
<input type="file" id="fileinput" accept=".docx,.doc" hidden>
</label></p>
</div>
<button class="convert-btn" onclick="converttopdf()">
开始变形!
</button>
<div class="progress-bar" id="progressbar">
<div class="progress-fill" id="progressfill"></div>
</div>
<div id="status" style="margin-top: 20px;"></div>
</div>
<script>
const dropzone = document.getelementbyid('dropzone');
const fileinput = document.getelementbyid('fileinput');
let selectedfile = null;
// 拖拽功能
dropzone.addeventlistener('dragover', (e) => {
e.preventdefault();
dropzone.style.background = '#e8f5e9';
});
dropzone.addeventlistener('drop', (e) => {
e.preventdefault();
dropzone.style.background = '#f9f9f9';
selectedfile = e.datatransfer.files[0];
document.getelementbyid('status').innerhtml =
`已选择: <strong>${selectedfile.name}</strong>`;
});
fileinput.addeventlistener('change', (e) => {
selectedfile = e.target.files[0];
document.getelementbyid('status').innerhtml =
`已选择: <strong>${selectedfile.name}</strong>`;
});
// 转换函数
async function converttopdf() {
if (!selectedfile) {
alert('请先选择一个word文件!');
return;
}
const formdata = new formdata();
formdata.append('file', selectedfile);
// 显示进度条
const progressbar = document.getelementbyid('progressbar');
const progressfill = document.getelementbyid('progressfill');
progressbar.style.display = 'block';
// 模拟进度(实际项目可以用websocket)
let progress = 0;
const interval = setinterval(() => {
progress += 10;
progressfill.style.width = `${progress}%`;
if (progress >= 90) clearinterval(interval);
}, 300);
try {
const response = await fetch('/api/doc-transform/word-to-pdf', {
method: 'post',
body: formdata
});
clearinterval(interval);
progressfill.style.width = '100%';
if (response.ok) {
// 下载pdf
const blob = await response.blob();
const url = window.url.createobjecturl(blob);
const a = document.createelement('a');
a.href = url;
a.download = selectedfile.name.replace(/\.docx?$/i, '.pdf');
document.body.appendchild(a);
a.click();
a.remove();
document.getelementbyid('status').innerhtml =
'转换成功!pdf已开始下载~';
// 3秒后重置
settimeout(() => {
progressbar.style.display = 'none';
progressfill.style.width = '0%';
document.getelementbyid('status').innerhtml = '';
}, 3000);
} else {
const errortext = await response.text();
document.getelementbyid('status').innerhtml =
`转换失败: ${errortext}`;
}
} catch (error) {
document.getelementbyid('status').innerhtml =
`网络错误: ${error.message}`;
}
}
</script>
</body>
</html>
高级功能扩展
批量转换(群变模式)
@service
public class batchconversionservice {
@async // 异步处理,不卡界面
public completablefuture<list<file>> convertmultiple(list<multipartfile> files) {
system.out.println("开始批量转换,共" + files.size() + "个文件,冲鸭!");
list<file> pdffiles = new arraylist<>();
list<completablefuture<file>> futures = new arraylist<>();
for (int i = 0; i < files.size(); i++) {
final int index = i;
completablefuture<file> future = completablefuture.supplyasync(() -> {
try {
system.out.println("正在转换第" + (index + 1) + "个文件...");
byte[] pdfbytes = wordtopdfconverter.convert(converttofile(files.get(index)));
file pdffile = new file("converted_" + index + ".pdf");
files.write(pdffile.topath(), pdfbytes);
return pdffile;
} catch (exception e) {
system.err.println("第" + (index + 1) + "个文件转换失败: " + e.getmessage());
return null;
}
});
futures.add(future);
}
// 等待所有转换完成
completablefuture.allof(futures.toarray(new completablefuture[0])).join();
for (completablefuture<file> future : futures) {
try {
file pdf = future.get();
if (pdf != null) pdffiles.add(pdf);
} catch (exception e) {
// 忽略失败的文件
}
}
system.out.println("批量转换完成!成功: " + pdffiles.size() +
"/" + files.size() + " 个文件");
return completablefuture.completedfuture(pdffiles);
}
}
转换记录(变形档案室)
@entity
@table(name = "conversion_records")
@data
@noargsconstructor
@allargsconstructor
public class conversionrecord {
@id
@generatedvalue(strategy = generationtype.identity)
private long id;
private string originalfilename;
private string pdffilename;
private long originalsize;
private long pdfsize;
private localdatetime conversiontime;
private string status; // success, failed, processing
private string errormessage;
@prepersist
protected void oncreate() {
conversiontime = localdatetime.now();
}
}
@repository
public interface conversionrecordrepository extends jparepository<conversionrecord, long> {
list<conversionrecord> findbystatusorderbyconversiontimedesc(string status);
}
部署与优化建议
1. 性能优化
# application.yml 添加
server:
tomcat:
max-threads: 200 # 增加线程数处理并发转换
min-spare-threads: 20
spring:
task:
execution:
pool:
core-size: 10 # 异步任务线程池
max-size: 50
2. 内存管理
@component
public class memorywatcher {
@scheduled(fixedrate = 60000) // 每分钟检查一次
public void monitormemory() {
long usedmemory = runtime.getruntime().totalmemory() -
runtime.getruntime().freememory();
long maxmemory = runtime.getruntime().maxmemory();
double usagepercentage = (double) usedmemory / maxmemory * 100;
if (usagepercentage > 80) {
system.out.println("内存警告:使用率 " +
string.format("%.1f", usagepercentage) + "%");
// 触发垃圾回收
system.gc();
}
}
}
总结:word转pdf的奇幻旅程终点站
经过这一番折腾,我们成功打造了一个springboot牌文档变形金刚!总结一下这场冒险:
我们实现了什么
- 格式驯化:把自由的word变成规矩的pdf
- 异步处理:大文件转换不卡界面
- 进度监控:实时查看转换进度
- 错误处理:优雅处理各种意外情况
- 批量操作:一次性驯化整个word文档家族
注意事项
- 字体问题:有些特殊字体pdf可能不认识,需要额外处理
- 复杂格式:word里的高级排版(如文本框、艺术字)可能变形
- 内存消耗:大文档转换时注意内存溢出
- 并发限制:同时转换太多文档可能导致服务器喘不过气
word转pdf就像给文档穿上“防改铠甲”,springboot就是我们打造这副铠甲的智能工厂。虽然过程中会遇到各种奇葩格式的“刺头文档”,但只要有耐心调试,最终都能把它们治理得服服帖帖!
到此这篇关于springboot三步将word转为pdf的实战指南的文章就介绍到这了,更多相关springboot word转pdf内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
发表评论