使用 open xml sdk 实现 html 富文本转换为 docx 格式文档相对复杂。下面是一个示例。手动检测 <strong>和 <em> 标签并应用相应的文本格式。
using system;
using documentformat.openxml;
using documentformat.openxml.packaging;
using documentformat.openxml.wordprocessing;
class program
{
static void main()
{
string htmlcontent = "<p>this is <strong>bold</strong> and <em>italic</em> text.</p>";
// 创建一个新的docx文档
using (wordprocessingdocument doc = wordprocessingdocument.create("output.docx", wordprocessingdocumenttype.document))
{
maindocumentpart mainpart = doc.addmaindocumentpart();
mainpart.document = new document();
body body = mainpart.document.appendchild(new body());
// 解析html并创建docx段落
string[] paragraphs = htmlcontent.split(new[] { "<p>", "</p>" }, stringsplitoptions.removeemptyentries);
foreach (string paragraphcontent in paragraphs)
{
paragraph paragraph = new paragraph();
run run = new run();
string[] tags = paragraphcontent.split(new[] { "<strong>", "</strong>", "<em>", "</em>" }, stringsplitoptions.none);
foreach (string tag in tags)
{
runproperties runproperties = new runproperties();
if (tag.contains("<strong>"))
{
runproperties.bold = new bold();
}
if (tag.contains("<em>"))
{
runproperties.italic = new italic();
}
run.append(runproperties);
run.append(new text(tag));
}
paragraph.append(run);
body.append(paragraph);
}
}
console.writeline("html to docx conversion complete.");
}
}
需要根据 html 标记的不同来创建相应的 docx元素,例如将<p>标签映射到docx段落,将<strong>标签映射到粗体等。
using system;
using system.io;
using documentformat.openxml;
using documentformat.openxml.packaging;
using documentformat.openxml.wordprocessing;
class program
{
static void main()
{
string htmlcontent = "<p>this is <strong>bold</strong> and <em>italic</em> text.</p>";
// 创建一个新的docx文档
using (wordprocessingdocument doc = wordprocessingdocument.create("output.docx", wordprocessingdocumenttype.document))
{
maindocumentpart mainpart = doc.addmaindocumentpart();
mainpart.document = new document();
body body = mainpart.document.appendchild(new body());
// 解析html内容并创建相应的docx元素
processhtmlcontent(htmlcontent, body);
doc.save();
}
console.writeline("html to docx conversion complete.");
}
static void processhtmlcontent(string htmlcontent, openxmlelement parentelement)
{
// 解析html内容并将其映射到docx元素
// 这里需要根据html标记的不同来创建相应的docx元素
// 例如,<p>标签可以映射到段落,<strong>可以映射到粗体文本等
// 示例:将html段落转换为docx段落
if (htmlcontent.startswith("<p>") && htmlcontent.endswith("</p>"))
{
string paragraphtext = htmlcontent.substring(3, htmlcontent.length - 7);
paragraph paragraph = new paragraph(new run(new text(paragraphtext)));
parentelement.append(paragraph);
}
// 添加更多的html标记处理逻辑以满足你的需求
}
}
using system;
using system.io;
using documentformat.openxml;
using documentformat.openxml.packaging;
using documentformat.openxml.wordprocessing;
using htmltoopenxml;
class program
{
static void main()
{
string htmlcontent = "<p>this is <strong>bold</strong> and <em>italic</em> text.</p>";
// 创建一个新的docx文档
using (wordprocessingdocument doc = wordprocessingdocument.create("output.docx", wordprocessingdocumenttype.document))
{
maindocumentpart mainpart = doc.addmaindocumentpart();
mainpart.document = new document();
body body = mainpart.document.appendchild(new body());
// 使用html2openxml将html内容转换为docx元素
var converter = new htmlconverter(mainpart);
converter.imageprocessing = imageprocessing.automaticdownload;
var paragraphs = converter.parse(htmlcontent);
foreach (var paragraph in paragraphs)
{
body.append(paragraph);
}
doc.save();
}
console.writeline("html to docx conversion complete.");
}
}
带内联式 css 样式。
using system;
using system.io;
using system.text;
using documentformat.openxml;
using documentformat.openxml.packaging;
using documentformat.openxml.wordprocessing;
using htmltoopenxml;
class program
{
static void main()
{
string htmlcontent = "<p style=\"color: blue; font-size: 14px;\">this is <strong>bold</strong> and <em>italic</em> text.</p>";
// 创建一个新的docx文档
using (wordprocessingdocument doc = wordprocessingdocument.create("output.docx", wordprocessingdocumenttype.document))
{
maindocumentpart mainpart = doc.addmaindocumentpart();
mainpart.document = new document();
body body = mainpart.document.appendchild(new body());
// 使用html2openxml将html内容转换为docx元素
var converter = new htmlconverter(mainpart);
converter.imageprocessing = imageprocessing.automaticdownload;
var paragraphs = converter.parse(htmlcontent);
foreach (var paragraph in paragraphs)
{
body.append(paragraph);
}
doc.save();
}
console.writeline("html to docx conversion complete.");
}
}
带 css 样式应用
using system;
using system.io;
using system.text;
using documentformat.openxml;
using documentformat.openxml.packaging;
using documentformat.openxml.wordprocessing;
using htmltoopenxml;
class program
{
static void main()
{
string htmlcontent = "<p class=\"my-paragraph\">this is <strong>bold</strong> and <em>italic</em> text.</p>";
string externalcss = ".my-paragraph { color: blue; font-size: 14px; }";
// 创建一个新的docx文档
using (wordprocessingdocument doc = wordprocessingdocument.create("output.docx", wordprocessingdocumenttype.document))
{
maindocumentpart mainpart = doc.addmaindocumentpart();
mainpart.document = new document();
body body = mainpart.document.appendchild(new body());
// 将外部css样式转化为内联样式
htmlcontent = applyexternalcsstohtml(htmlcontent, externalcss);
// 使用html2openxml将html内容转换为docx元素
var converter = new htmlconverter(mainpart);
converter.imageprocessing = imageprocessing.automaticdownload;
var paragraphs = converter.parse(htmlcontent);
foreach (var paragraph in paragraphs)
{
body.append(paragraph);
}
doc.save();
}
console.writeline("html to docx conversion complete.");
}
static string applyexternalcsstohtml(string htmlcontent, string externalcss)
{
// 解析外部css文件并将其应用于html内容
// 这里需要将css规则应用到html标记的内联样式中
// 在此示例中,我们简单地将css类名替换为内联样式
// 你可能需要更复杂的css处理逻辑,取决于外部css文件的内容和结构
// 将样式规则拆分为每个类
var cssrules = externalcss.split('}');
foreach (var rule in cssrules)
{
if (!string.isnullorwhitespace(rule))
{
// 提取类名和样式属性
var parts = rule.split('{');
var classname = parts[0].trim();
var style = parts[1].trim();
// 将类名替换为内联样式
htmlcontent = htmlcontent.replace($"class=\"{classname}\"", $"style=\"{style}\"");
}
}
return htmlcontent;
}
}
发表评论