1.需求
我有一些数学考题试卷数据存在数据库,考题数据是以大文本的形式存储,其中公式部分的格式为latex,现在需要将这些数据生成word文档供用户下载,要求公式能够正确在文档中显示,一种方案是将latex转换成图片,然后写入word文档,缺点是清晰度不高,而且不可编辑。所以需要一个原生的解决方案,自然就想到word对latex的支持,遗憾的是word不支持latex, 而是有自己的格式omml, 所以需要一个工具来将latex转换成omml,现有的资料显示需要先将latex转换成mathml,然后将mathml转换成omml。
2.代码结构

3.pom文件
<?xml version="1.0" encoding="utf-8"?>
<project xmlns="http://maven.apache.org/pom/4.0.0" xmlns:xsi="http://www.w3.org/2001/xmlschema-instance"
xsi:schemalocation="http://maven.apache.org/pom/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelversion>4.0.0</modelversion>
<parent>
<groupid>org.springframework.boot</groupid>
<artifactid>spring-boot-starter-parent</artifactid>
<version>2.5.8</version>
<relativepath/>
</parent>
<groupid>com.latex</groupid>
<artifactid>latex</artifactid>
<version>1.0.0</version>
<name>latex</name>
<description>latex</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupid>org.springframework.boot</groupid>
<artifactid>spring-boot-starter</artifactid>
</dependency>
<dependency>
<groupid>org.projectlombok</groupid>
<artifactid>lombok</artifactid>
<optional>true</optional>
</dependency>
<dependency>
<groupid>org.springframework.boot</groupid>
<artifactid>spring-boot-starter-test</artifactid>
<scope>test</scope>
</dependency>
<!-- 以下为转换和word操作需要的依赖 -->
<dependency>
<groupid>de.rototor.snuggletex</groupid>
<artifactid>snuggletex-core</artifactid>
<version>1.3.0</version>
</dependency>
<dependency>
<groupid>org.apache.poi</groupid>
<artifactid>poi</artifactid>
<version>4.1.2</version>
</dependency>
<dependency>
<groupid>org.apache.poi</groupid>
<artifactid>ooxml-schemas</artifactid>
<version>1.4</version>
</dependency>
<dependency>
<groupid>org.apache.poi</groupid>
<artifactid>poi-ooxml</artifactid>
<version>4.1.2</version>
</dependency>
<dependency>
<groupid>commons-io</groupid>
<artifactid>commons-io</artifactid>
<version>2.11.0</version>
</dependency>
</dependencies>
<build>
<!--
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
<includes>
<include>mml2omml.xsl</include>
</includes>
</resource>
</resources>
-->
<plugins>
<plugin>
<groupid>org.apache.maven.plugins</groupid>
<artifactid>maven-compiler-plugin</artifactid>
<configuration>
<annotationprocessorpaths>
<path>
<groupid>org.projectlombok</groupid>
<artifactid>lombok</artifactid>
</path>
</annotationprocessorpaths>
</configuration>
</plugin>
<plugin>
<groupid>org.springframework.boot</groupid>
<artifactid>spring-boot-maven-plugin</artifactid>
<configuration>
<excludes>
<exclude>
<groupid>org.projectlombok</groupid>
<artifactid>lombok</artifactid>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>4.转换工具:latexutil
package com.latex.util;
import com.latex.enums.textcircledenum;
import org.apache.poi.xwpf.usermodel.xwpfparagraph;
import org.apache.xmlbeans.xmlcursor;
import org.apache.xmlbeans.xmlexception;
import org.openxmlformats.schemas.officedocument.x2006.math.ctomath;
import org.openxmlformats.schemas.officedocument.x2006.math.ctomathpara;
import org.openxmlformats.schemas.officedocument.x2006.math.ctr;
import uk.ac.ed.ph.snuggletex.snuggleengine;
import uk.ac.ed.ph.snuggletex.snuggleinput;
import uk.ac.ed.ph.snuggletex.snugglesession;
import javax.xml.transform.transformer;
import javax.xml.transform.transformerexception;
import javax.xml.transform.transformerfactory;
import javax.xml.transform.stream.streamresult;
import javax.xml.transform.stream.streamsource;
import java.io.ioexception;
import java.io.inputstream;
import java.io.stringreader;
import java.io.stringwriter;
public class latexutil {
private static transformer transformer;
private static final snuggleengine snuggleengine = new snuggleengine();
/**
* 将latex公式添加到poi段落中
* @param paragraph 段落
* @param latex 公式
*/
public static void addtoparagraph(xwpfparagraph paragraph, string latex) throws exception{
latex = "$" + filter(latex) + "$"; //处理特殊符号
paragraph.getctp().addnewomath().set(getctomath(latex));
}
/**
* 将latex公式转换成ctomath(可直接写入word)
* @param latex 公式
* 这里的latex表达式 必须用$$包裹,例如:$\sin^2 \theta + \cos^2 \theta = 1$
*/
public static ctomath getctomath(string latex) throws exception {
system.out.println("latex: " + latex);
string mathml = tomathml(latex);
system.out.println("mathml: " + mathml);
string omml = toomml(mathml);
system.out.println("omml: " + omml);
return toctomath(omml);
}
/**
* latex转mathml
*/
public static string tomathml(string latex) throws ioexception {
snugglesession session = snuggleengine.createsession();
session.parseinput(new snuggleinput(latex));
return session.buildxmlstring();
}
/**
* mathml转omml
*/
public static string toomml(string mathml) throws transformerexception, ioexception {
transformer transformer = gettransformer();
streamsource source = new streamsource(new stringreader(mathml));
stringwriter stringwriter = new stringwriter();
streamresult result = new streamresult(stringwriter);
transformer.transform(source, result);
string omml = stringwriter.tostring();
stringwriter.close();
return omml;
}
/**
* 基于omml创建poi组件:ctomath
*/
public static ctomath toctomath(string omml) throws xmlexception {
ctomathpara ctomathpara = ctomathpara.factory.parse(omml);
ctomath ctomath = ctomathpara.getomatharray(0);
//for making this to work with office 2007 word also, special font settings are necessary
xmlcursor xmlcursor = ctomath.newcursor();
while (xmlcursor.hasnexttoken()) {
xmlcursor.tokentype tokentype = xmlcursor.tonexttoken();
if (tokentype.isstart()) {
if (xmlcursor.getobject() instanceof ctr) {
ctr ctr = (ctr) xmlcursor.getobject();
ctr.addnewrpr2().addnewrfonts().setascii("cambria math");
ctr.getrpr2().getrfonts().sethansi("cambria math"); // up to apache poi 4.1.2
//ctr.getrpr2().getrfontsarray(0).sethansi("cambria math"); // since apache poi 5.0.0
}
}
}
return ctomath;
}
/**
* 发现存在无法识别的符号,因此单独处理,提前过滤识别掉
* ①②③④⑤等符合无法识别,即latex表达式是 \textcircled
* @param latex latex表达式
* @return 处理结果
*/
public static string filter(string latex){
if(!latex.contains("textcircled")){
return latex;
}
return textcircledenum.replacetextcircled(latex);
}
private static transformer gettransformer() throws transformerexception {
if(transformer == null){
//实测发现jdk21需要修改以下配置,不然newtransformer会报错:
//javax.xml.transform.transformerconfigurationexception: jaxp0801001
//system.setproperty("jdk.xml.xpathexprgrplimit", "0"); // 0表示无限制
//system.setproperty("jdk.xml.xpathexproplimit", "0"); // 同时设置操作符限制
//如果读取不到mml2omml.xsl文件,请检查pom文件
//build->resources->resource->includes下添加:<include>mml2omml.xsl</include>
inputstream in = latexutil.class.getclassloader().getresourceasstream("mml2omml.xsl");
transformerfactory tfactory = transformerfactory.newinstance();
transformer = tfactory.newtransformer(new streamsource(in));
}
return transformer;
}
}
5.特殊符号处理:textcircledenum
package com.latex.enums;
import lombok.allargsconstructor;
import lombok.getter;
@getter
@allargsconstructor
public enum textcircledenum {
zero("\\\\textcircled\\{0\\}","⓪"),
one("\\\\textcircled\\{1\\}","①"),
two("\\\\textcircled\\{2\\}","②"),
three("\\\\textcircled\\{3\\}","③"),
four("\\\\textcircled\\{4\\}","④"),
five("\\\\textcircled\\{5\\}","⑤"),
six("\\\\textcircled\\{6\\}","⑥"),
seven("\\\\textcircled\\{7\\}","⑦"),
eight("\\\\textcircled\\{8\\}","⑧"),
nine("\\\\textcircled\\{9\\}","⑨"),
ten("\\\\textcircled\\{10\\}","⑩");
private final string code;
private final string value;
public static string replacetextcircled(string latex){
for (textcircledenum c : values()) {
latex = latex.replaceall(c.getcode(), c.getvalue());
}
return latex;
}
}
6.测试类:latexutiltest
package com.latex.test;
import com.latex.util.latexutil;
import org.apache.poi.xwpf.usermodel.paragraphalignment;
import org.apache.poi.xwpf.usermodel.xwpfdocument;
import org.apache.poi.xwpf.usermodel.xwpfparagraph;
import java.io.fileoutputstream;
public class latexutiltest {
public static void main(string[] args) throws exception {
xwpfdocument document = new xwpfdocument();
xwpfparagraph paragraph = document.createparagraph();
paragraph.setalignment(paragraphalignment.left);
paragraph.setfontalignment(paragraphalignment.left.getvalue());
paragraph.createrun().settext("前置文本:");
latexutil.addtoparagraph(paragraph, "\\sin^2 \\theta + \\cos^2 \\theta = 1");
paragraph.createrun().settext("后置文本");
// 保存文档到文件
string filepath = "d:\\test\\math_document_"+system.currenttimemillis()+".docx";
try (fileoutputstream out = new fileoutputstream(filepath)) {
document.write(out);
system.out.println("word文档已成功保存");
}
// 关闭文档
document.close();
}
}
7.测试输出
latex: $\sin^2 \theta + \cos^2 \theta = 1$
mathml: <math xmlns="http://www.w3.org/1998/math/mathml"><msup><mi>sin</mi><mn>2</mn></msup><mi>θ</mi><mo>+</mo><msup><mi>cos</mi><mn>2</mn></msup><mi>θ</mi><mo>=</mo><mn>1</mn></math>
omml: <?xml version="1.0" encoding="utf-8"?><m:omath xmlns:m="http://schemas.openxmlformats.org/officedocument/2006/math" xmlns:mml="http://www.w3.org/1998/math/mathml"><m:ssup><m:e><m:r><m:rpr><m:sty m:val="p"/></m:rpr><m:t>sin</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:ssup><m:r><m:t>θ</m:t></m:r><m:r><m:t>+</m:t></m:r><m:ssup><m:e><m:r><m:rpr><m:sty m:val="p"/></m:rpr><m:t>cos</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:ssup><m:r><m:t>θ</m:t></m:r><m:r><m:t>=</m:t></m:r><m:r><m:t>1</m:t></m:r></m:omath>
word文档已成功保存
8.word文档内容

9.mml2omml.xsl文件获取
mml2omml.xsl文件的路径一般在c盘:c:\program files\microsoft office\root\office16
到此这篇关于java实现latex转为omml并写入word文档的文章就介绍到这了,更多相关java latex转omml并写入word内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
发表评论