当前位置: 代码网 > it编程>编程语言>Java > Java实现LaTeX转为OMML并写入word文档

Java实现LaTeX转为OMML并写入word文档

2026年04月03日 Java 我要评论
1.需求我有一些数学考题试卷数据存在数据库,考题数据是以大文本的形式存储,其中公式部分的格式为latex,现在需要将这些数据生成word文档供用户下载,要求公式能够正确在文档中显示,一种方案是将lat

1.需求

我有一些数学考题试卷数据存在数据库,考题数据是以大文本的形式存储,其中公式部分的格式为latex,现在需要将这些数据生成word文档供用户下载,要求公式能够正确在文档中显示,一种方案是将latex转换成图片,然后写入word文档,缺点是清晰度不高,而且不可编辑。所以需要一个原生的解决方案,自然就想到word对latex的支持,遗憾的是word不支持latex, 而是有自己的格式omml, 所以需要一个工具来将latex转换成omml,现有的资料显示需要先将latex转换成mathml,然后将mathml转换成omml。

2.代码结构

3.pom文件

<?xml version="1.0" encoding="utf-8"?>
<project xmlns="http://maven.apache.org/pom/4.0.0" xmlns:xsi="http://www.w3.org/2001/xmlschema-instance"
         xsi:schemalocation="http://maven.apache.org/pom/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelversion>4.0.0</modelversion>
    <parent>
        <groupid>org.springframework.boot</groupid>
        <artifactid>spring-boot-starter-parent</artifactid>
        <version>2.5.8</version>
        <relativepath/>
    </parent>
    <groupid>com.latex</groupid>
    <artifactid>latex</artifactid>
    <version>1.0.0</version>
    <name>latex</name>
    <description>latex</description>
    <properties>
        <java.version>1.8</java.version>
    </properties>
    <dependencies>
        <dependency>
            <groupid>org.springframework.boot</groupid>
            <artifactid>spring-boot-starter</artifactid>
        </dependency>
        <dependency>
            <groupid>org.projectlombok</groupid>
            <artifactid>lombok</artifactid>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupid>org.springframework.boot</groupid>
            <artifactid>spring-boot-starter-test</artifactid>
            <scope>test</scope>
        </dependency>
        <!-- 以下为转换和word操作需要的依赖 -->
        <dependency>
            <groupid>de.rototor.snuggletex</groupid>
            <artifactid>snuggletex-core</artifactid>
            <version>1.3.0</version>
        </dependency>
        <dependency>
            <groupid>org.apache.poi</groupid>
            <artifactid>poi</artifactid>
            <version>4.1.2</version>
        </dependency>
        <dependency>
            <groupid>org.apache.poi</groupid>
            <artifactid>ooxml-schemas</artifactid>
            <version>1.4</version>
        </dependency>
        <dependency>
            <groupid>org.apache.poi</groupid>
            <artifactid>poi-ooxml</artifactid>
            <version>4.1.2</version>
        </dependency>
        <dependency>
            <groupid>commons-io</groupid>
            <artifactid>commons-io</artifactid>
            <version>2.11.0</version>
        </dependency>
    </dependencies>
    <build>
        <!--
        <resources>
            <resource>
                <directory>src/main/resources</directory>
                <filtering>true</filtering>
                <includes>
                    <include>mml2omml.xsl</include>
                </includes>
            </resource>
        </resources>
        -->
        <plugins>
            <plugin>
                <groupid>org.apache.maven.plugins</groupid>
                <artifactid>maven-compiler-plugin</artifactid>
                <configuration>
                    <annotationprocessorpaths>
                        <path>
                            <groupid>org.projectlombok</groupid>
                            <artifactid>lombok</artifactid>
                        </path>
                    </annotationprocessorpaths>
                </configuration>
            </plugin>
            <plugin>
                <groupid>org.springframework.boot</groupid>
                <artifactid>spring-boot-maven-plugin</artifactid>
                <configuration>
                    <excludes>
                        <exclude>
                            <groupid>org.projectlombok</groupid>
                            <artifactid>lombok</artifactid>
                        </exclude>
                    </excludes>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

4.转换工具:latexutil

package com.latex.util;

import com.latex.enums.textcircledenum;
import org.apache.poi.xwpf.usermodel.xwpfparagraph;
import org.apache.xmlbeans.xmlcursor;
import org.apache.xmlbeans.xmlexception;
import org.openxmlformats.schemas.officedocument.x2006.math.ctomath;
import org.openxmlformats.schemas.officedocument.x2006.math.ctomathpara;
import org.openxmlformats.schemas.officedocument.x2006.math.ctr;
import uk.ac.ed.ph.snuggletex.snuggleengine;
import uk.ac.ed.ph.snuggletex.snuggleinput;
import uk.ac.ed.ph.snuggletex.snugglesession;

import javax.xml.transform.transformer;
import javax.xml.transform.transformerexception;
import javax.xml.transform.transformerfactory;
import javax.xml.transform.stream.streamresult;
import javax.xml.transform.stream.streamsource;
import java.io.ioexception;
import java.io.inputstream;
import java.io.stringreader;
import java.io.stringwriter;

public class latexutil {
    private static transformer transformer;
    private static final snuggleengine snuggleengine =  new snuggleengine();

    /**
     * 将latex公式添加到poi段落中
     * @param paragraph 段落
     * @param latex 公式
     */
    public static void addtoparagraph(xwpfparagraph paragraph, string latex) throws exception{
        latex = "$" + filter(latex) + "$"; //处理特殊符号
        paragraph.getctp().addnewomath().set(getctomath(latex));
    }

    /**
     * 将latex公式转换成ctomath(可直接写入word)
     * @param latex 公式
     * 这里的latex表达式 必须用$$包裹,例如:$\sin^2 \theta + \cos^2 \theta = 1$
     */
    public static ctomath getctomath(string latex) throws exception {
        system.out.println("latex: " + latex);
        string mathml = tomathml(latex);
        system.out.println("mathml: " + mathml);
        string omml = toomml(mathml);
        system.out.println("omml: " + omml);
        return toctomath(omml);
    }

    /**
     * latex转mathml
     */
    public static string tomathml(string latex) throws ioexception {
        snugglesession session = snuggleengine.createsession();
        session.parseinput(new snuggleinput(latex));
        return session.buildxmlstring();
    }

    /**
     * mathml转omml
     */
    public static string toomml(string mathml) throws transformerexception, ioexception {
        transformer transformer = gettransformer();
        streamsource source = new streamsource(new stringreader(mathml));
        stringwriter stringwriter = new stringwriter();
        streamresult result = new streamresult(stringwriter);
        transformer.transform(source, result);
        string omml = stringwriter.tostring();
        stringwriter.close();
        return omml;
    }

    /**
     * 基于omml创建poi组件:ctomath
     */
    public static ctomath toctomath(string omml) throws xmlexception {
        ctomathpara ctomathpara = ctomathpara.factory.parse(omml);
        ctomath ctomath = ctomathpara.getomatharray(0);
        //for making this to work with office 2007 word also, special font settings are necessary
        xmlcursor xmlcursor = ctomath.newcursor();
        while (xmlcursor.hasnexttoken()) {
            xmlcursor.tokentype tokentype = xmlcursor.tonexttoken();
            if (tokentype.isstart()) {
                if (xmlcursor.getobject() instanceof ctr) {
                    ctr ctr = (ctr) xmlcursor.getobject();
                    ctr.addnewrpr2().addnewrfonts().setascii("cambria math");
                    ctr.getrpr2().getrfonts().sethansi("cambria math"); // up to apache poi 4.1.2
                    //ctr.getrpr2().getrfontsarray(0).sethansi("cambria math"); // since apache poi 5.0.0
                }
            }
        }
        return ctomath;
    }

    /**
     * 发现存在无法识别的符号,因此单独处理,提前过滤识别掉
     * ①②③④⑤等符合无法识别,即latex表达式是 \textcircled
     * @param latex latex表达式
     * @return 处理结果
     */
    public static string filter(string latex){
        if(!latex.contains("textcircled")){
            return latex;
        }
        return textcircledenum.replacetextcircled(latex);
    }

    private static transformer gettransformer() throws transformerexception {
        if(transformer == null){
            //实测发现jdk21需要修改以下配置,不然newtransformer会报错:
            //javax.xml.transform.transformerconfigurationexception: jaxp0801001
            //system.setproperty("jdk.xml.xpathexprgrplimit", "0"); // 0表示无限制
            //system.setproperty("jdk.xml.xpathexproplimit", "0");  // 同时设置操作符限制

            //如果读取不到mml2omml.xsl文件,请检查pom文件
            //build->resources->resource->includes下添加:<include>mml2omml.xsl</include>
            inputstream in = latexutil.class.getclassloader().getresourceasstream("mml2omml.xsl");
            transformerfactory tfactory = transformerfactory.newinstance();
            transformer = tfactory.newtransformer(new streamsource(in));
        }
        return transformer;
    }
}

5.特殊符号处理:textcircledenum

package com.latex.enums;

import lombok.allargsconstructor;
import lombok.getter;

@getter
@allargsconstructor
public enum textcircledenum {
    zero("\\\\textcircled\\{0\\}","⓪"),
    one("\\\\textcircled\\{1\\}","①"),
    two("\\\\textcircled\\{2\\}","②"),
    three("\\\\textcircled\\{3\\}","③"),
    four("\\\\textcircled\\{4\\}","④"),
    five("\\\\textcircled\\{5\\}","⑤"),
    six("\\\\textcircled\\{6\\}","⑥"),
    seven("\\\\textcircled\\{7\\}","⑦"),
    eight("\\\\textcircled\\{8\\}","⑧"),
    nine("\\\\textcircled\\{9\\}","⑨"),
    ten("\\\\textcircled\\{10\\}","⑩");

    private final string code;
    private final string value;

    public static string replacetextcircled(string latex){
        for (textcircledenum c : values()) {
            latex = latex.replaceall(c.getcode(), c.getvalue());
        }
        return latex;
    }
}

6.测试类:latexutiltest

package com.latex.test;

import com.latex.util.latexutil;
import org.apache.poi.xwpf.usermodel.paragraphalignment;
import org.apache.poi.xwpf.usermodel.xwpfdocument;
import org.apache.poi.xwpf.usermodel.xwpfparagraph;

import java.io.fileoutputstream;

public class latexutiltest {
    public static void main(string[] args) throws exception {
        xwpfdocument document = new xwpfdocument();
        xwpfparagraph paragraph = document.createparagraph();
        paragraph.setalignment(paragraphalignment.left);
        paragraph.setfontalignment(paragraphalignment.left.getvalue());

        paragraph.createrun().settext("前置文本:");
        latexutil.addtoparagraph(paragraph, "\\sin^2 \\theta + \\cos^2 \\theta = 1");
        paragraph.createrun().settext("后置文本");


        // 保存文档到文件
        string filepath = "d:\\test\\math_document_"+system.currenttimemillis()+".docx";
        try (fileoutputstream out = new fileoutputstream(filepath)) {
            document.write(out);
            system.out.println("word文档已成功保存");
        }

        // 关闭文档
        document.close();
    }
}

7.测试输出

latex: $\sin^2 \theta + \cos^2 \theta = 1$
mathml: <math xmlns="http://www.w3.org/1998/math/mathml"><msup><mi>sin</mi><mn>2</mn></msup><mi>θ</mi><mo>+</mo><msup><mi>cos</mi><mn>2</mn></msup><mi>θ</mi><mo>=</mo><mn>1</mn></math>
omml: <?xml version="1.0" encoding="utf-8"?><m:omath xmlns:m="http://schemas.openxmlformats.org/officedocument/2006/math" xmlns:mml="http://www.w3.org/1998/math/mathml"><m:ssup><m:e><m:r><m:rpr><m:sty m:val="p"/></m:rpr><m:t>sin</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:ssup><m:r><m:t>θ</m:t></m:r><m:r><m:t>+</m:t></m:r><m:ssup><m:e><m:r><m:rpr><m:sty m:val="p"/></m:rpr><m:t>cos</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:ssup><m:r><m:t>θ</m:t></m:r><m:r><m:t>=</m:t></m:r><m:r><m:t>1</m:t></m:r></m:omath>
word文档已成功保存

8.word文档内容

9.mml2omml.xsl文件获取

mml2omml.xsl文件的路径一般在c盘:c:\program files\microsoft office\root\office16

到此这篇关于java实现latex转为omml并写入word文档的文章就介绍到这了,更多相关java latex转omml并写入word内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!

(0)

相关文章:

版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。

发表评论

验证码:
Copyright © 2017-2026  代码网 保留所有权利. 粤ICP备2024248653号
站长QQ:2386932994 | 联系邮箱:2386932994@qq.com