如何将POI数据高效生成Word文档?

摘要:通过配置word模板生成相应的word文档。支持文字,列表,原样式输出。采用java poi 开源库。
p, blockquote, ul, ol, dl, table, pre { margin: 15px 0 } h1, h2, h3, h4, h5, h6 { margin: 20px 0 10px; padding: 0; font-weight: bold; -webkit-font-smoothing: antialiased } h1 tt, h1 code, h2 tt, h2 code, h3 tt, h3 code, h4 tt, h4 code, h5 tt, h5 code, h6 tt, h6 code { font-size: inherit } h1 { font-size: 28px; color: rgba(0, 0, 0, 1) } h2 { font-size: 24px; border-bottom: 1px solid rgba(204, 204, 204, 1); color: rgba(0, 0, 0, 1) } h3 { font-size: 18px } h4 { font-size: 16px } h5 { font-size: 14px } h6 { color: rgba(119, 119, 119, 1); font-size: 14px } body>h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h4:first-child, body>h5:first-child, body>h6:first-child { margin-top: 0; padding-top: 0 } a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 { margin-top: 0; padding-top: 0 } h1+p, h2+p, h3+p, h4+p, h5+p, h6+p { margin-top: 10px } a { color: rgba(65, 131, 196, 1); text-decoration: none } a:hover { text-decoration: underline } ul, ol { padding-left: 30px } ul li>:first-child, ol li>:first-child, ul li ul:first-of-type, ol li ol:first-of-type, ul li ol:first-of-type, ol li ul:first-of-type { margin-top: 0 } ul ul, ul ol, ol ol, ol ul { margin-bottom: 0 } dl { padding: 0 } dl dt { font-size: 14px; font-weight: bold; font-style: italic; padding: 0; margin: 15px 0 5px } dl dt:first-child { padding: 0 } dl dt>:first-child { margin-top: 0 } dl dt>:last-child { margin-bottom: 0 } dl dd { margin: 0 0 15px; padding: 0 15px } dl dd>:first-child { margin-top: 0 } dl dd>:last-child { margin-bottom: 0 } pre, code, tt { font-size: 12px; font-family: Consolas, "Liberation Mono", Courier, monospace } code, tt { margin: 0; padding: 0; white-space: nowrap; border: 1px solid rgba(234, 234, 234, 1); background-color: rgba(248, 248, 248, 1); border-radius: 3px } pre>code { margin: 0; padding: 0; white-space: pre; border: none; background: rgba(0, 0, 0, 0) } pre { background-color: rgba(248, 248, 248, 1); border: 1px solid rgba(204, 204, 204, 1); font-size: 13px; line-height: 19px; overflow: auto; padding: 6px 10px; border-radius: 3px } pre code, pre tt { background-color: rgba(0, 0, 0, 0); border: none } kbd { -moz-border-bottom-colors: none; -moz-border-left-colors: none; -moz-border-right-colors: none; -moz-border-top-colors: none; background-color: rgba(221, 221, 221, 1); background-image: linear-gradient(rgba(241, 241, 241, 1), rgba(221, 221, 221, 1)); background-repeat: repeat-x; border-top: 1px solid rgba(221, 221, 221, 1); border-right: 1px solid rgba(204, 204, 204, 1); border-bottom: 1px solid rgba(204, 204, 204, 1); border-left: 1px solid rgba(221, 221, 221, 1); border-image: none; border-radius: 2px; font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; line-height: 10px; padding: 1px 4px } blockquote { border-left: 4px solid rgba(221, 221, 221, 1); padding: 0 15px; color: rgba(119, 119, 119, 1) } blockquote>:first-child { margin-top: 0 } blockquote>:last-child { margin-bottom: 0 } hr { clear: both; margin: 15px 0; height: 0; overflow: hidden; border-top: none; border-right: none; border-bottom: 4px solid rgba(221, 221, 221, 1); border-left: none; background: rgba(0, 0, 0, 0); padding: 0 } img { max-width: 100% } .highlight { background: rgba(255, 255, 255, 1) } .highlight .c { color: rgba(153, 153, 136, 1); font-style: italic } .highlight .err { color: rgba(166, 23, 23, 1); background-color: rgba(227, 210, 210, 1) } .highlight .k { font-weight: bold } .highlight .o { font-weight: bold } .highlight .cm { color: rgba(153, 153, 136, 1); font-style: italic } .highlight .cp { color: rgba(153, 153, 153, 1); font-weight: bold } .highlight .c1 { color: rgba(153, 153, 136, 1); font-style: italic } .highlight .cs { color: rgba(153, 153, 153, 1); font-weight: bold; font-style: italic } .highlight .gd { color: rgba(0, 0, 0, 1); background-color: rgba(255, 221, 221, 1) } .highlight .gd .x { color: rgba(0, 0, 0, 1); background-color: rgba(255, 170, 170, 1) } .highlight .ge { font-style: italic } .highlight .gr { color: rgba(170, 0, 0, 1) } .highlight .gh { color: rgba(153, 153, 153, 1) } .highlight .gi { color: rgba(0, 0, 0, 1); background-color: rgba(221, 255, 221, 1) } .highlight .gi .x { color: rgba(0, 0, 0, 1); background-color: rgba(170, 255, 170, 1) } .highlight .go { color: rgba(136, 136, 136, 1) } .highlight .gp { color: rgba(85, 85, 85, 1) } .highlight .gs { font-weight: bold } .highlight .gu { color: rgba(170, 170, 170, 1) } .highlight .gt { color: rgba(170, 0, 0, 1) } .highlight .kc { font-weight: bold } .highlight .kd { font-weight: bold } .highlight .kp { font-weight: bold } .highlight .kr { font-weight: bold } .highlight .kt { color: rgba(68, 85, 136, 1); font-weight: bold } .highlight .m { color: rgba(0, 153, 153, 1) } .highlight .s { color: rgba(221, 17, 68, 1) } .highlight .na { color: rgba(0, 128, 128, 1) } .highlight .nb { color: rgba(0, 134, 179, 1) } .highlight .nc { color: rgba(68, 85, 136, 1); font-weight: bold } .highlight .no { color: rgba(0, 128, 128, 1) } .highlight .ni { color: rgba(128, 0, 128, 1) } .highlight .ne { color: rgba(153, 0, 0, 1); font-weight: bold } .highlight .nf { color: rgba(153, 0, 0, 1); font-weight: bold } .highlight .nn { color: rgba(85, 85, 85, 1) } .highlight .nt { color: rgba(0, 0, 128, 1) } .highlight .nv { color: rgba(0, 128, 128, 1) } .highlight .ow { font-weight: bold } .highlight .w { color: rgba(187, 187, 187, 1) } .highlight .mf { color: rgba(0, 153, 153, 1) } .highlight .mh { color: rgba(0, 153, 153, 1) } .highlight .mi { color: rgba(0, 153, 153, 1) } .highlight .mo { color: rgba(0, 153, 153, 1) } .highlight .sb { color: rgba(221, 17, 68, 1) } .highlight .sc { color: rgba(221, 17, 68, 1) } .highlight .sd { color: rgba(221, 17, 68, 1) } .highlight .s2 { color: rgba(221, 17, 68, 1) } .highlight .se { color: rgba(221, 17, 68, 1) } .highlight .sh { color: rgba(221, 17, 68, 1) } .highlight .si { color: rgba(221, 17, 68, 1) } .highlight .sx { color: rgba(221, 17, 68, 1) } .highlight .sr { color: rgba(0, 153, 38, 1) } .highlight .s1 { color: rgba(221, 17, 68, 1) } .highlight .ss { color: rgba(153, 0, 115, 1) } .highlight .bp { color: rgba(153, 153, 153, 1) } .highlight .vc { color: rgba(0, 128, 128, 1) } .highlight .vg { color: rgba(0, 128, 128, 1) } .highlight .vi { color: rgba(0, 128, 128, 1) } .highlight .il { color: rgba(0, 153, 153, 1) } .pl-c { color: rgba(150, 152, 150, 1) } .pl-c1, .pl-mdh, .pl-mm, .pl-mp, .pl-mr, .pl-s1 .pl-v, .pl-s3, .pl-sc, .pl-sv { color: rgba(0, 134, 179, 1) } .pl-e, .pl-en { color: rgba(121, 93, 163, 1) } .pl-s1 .pl-s2, .pl-smi, .pl-smp, .pl-stj, .pl-vo, .pl-vpf { color: rgba(51, 51, 51, 1) } .pl-ent { color: rgba(99, 163, 92, 1) } .pl-k, .pl-s, .pl-st { color: rgba(167, 29, 93, 1) } .pl-pds, .pl-s1, .pl-s1 .pl-pse .pl-s2, .pl-sr, .pl-sr .pl-cce, .pl-sr .pl-sra, .pl-sr .pl-sre, .pl-src, .pl-v { color: rgba(223, 80, 0, 1) } .pl-id { color: rgba(181, 42, 29, 1) } .pl-ii { background-color: rgba(181, 42, 29, 1); color: rgba(248, 248, 248, 1) } .pl-sr .pl-cce { color: rgba(99, 163, 92, 1); font-weight: bold } .pl-ml { color: rgba(105, 58, 23, 1) } .pl-mh, .pl-mh .pl-en, .pl-ms { color: rgba(29, 62, 129, 1); font-weight: bold } .pl-mq { color: rgba(0, 128, 128, 1) } .pl-mi { color: rgba(51, 51, 51, 1); font-style: italic } .pl-mb { color: rgba(51, 51, 51, 1); font-weight: bold } .pl-md, .pl-mdhf { background-color: rgba(255, 236, 236, 1); color: rgba(189, 44, 0, 1) } .pl-mdht, .pl-mi1 { background-color: rgba(234, 255, 234, 1); color: rgba(85, 165, 50, 1) } .pl-mdr { color: rgba(121, 93, 163, 1); font-weight: bold } .pl-mo { color: rgba(29, 62, 129, 1) } .task-list { padding-left: 10px; margin-bottom: 0 } .task-list li { margin-left: 20px } .task-list-item { list-style-type: none; padding-left: 10px } .task-list-item label { font-weight: 400 } .task-list-item.enabled label { cursor: pointer } .task-list-item+.task-list-item { margin-top: 3px } .task-list-item-checkbox { display: inline-block; margin-left: -20px; margin-right: 3px; vertical-align: 1px } POI生成WORD文档 POI为Java系处理office文档的比较优秀的开源库,其中对于Excel的处理最为优秀,文档也写的很详细。不过很多网友都认为它在word文档处理方面就逊色很多,不过对于我本次的完成文档的生成我依然选择了POI。添加微信回复POI邀请你加群 需要完成功能 配置Word模板文件,包括表格 解析配置的Word文档,返回配置的特殊标记 构造数据,替换配置的标签,以及生成表格 配置word模版 采用${xx}方式配置标签,如果是表格在对应一行一列配置表格名称 注意在word文档中,如果两个相近的字符样式不同,word默认会保存在不同的RUN元素中,由此很多朋友在配置好以后都需要保存为一个单独的文件,然后不把不在一起的标签合并到一个RUN元素中,如果文件比较大,我相信这绝对是一个比较痛苦的事情,这里将会侧重处理这个问题.我的解决方案是只保留第一RUN的样式其他的删掉 解析word模板 首先需要将文件转换为XWPFDocument对象,可以通过流的当时,也可以通过opcpackage,不过如果使用opcpackage打开的方式,打开的文件和最终生成的文件不能够是同一个文件,我这里采用文件流的方式 public XWPFDocument openDocument() { XWPFDocument xdoc = null; InputStream is = null; try { is = new FileInputStream(saveFile); xdoc = new XWPFDocument(is); } catch (IOException e) { e.printStackTrace(); } return xdoc; } 获取非列表的标签,实现方式XWPFDocument对象有当前所有段落以及表格,这里暂不考虑表格嵌套表格的情况,每个段落的文本信息是可以通过p.getText()获取,获取段落中文档配置信息如下: // 获取段落集合中所有文本 public List<TagInfo> getWordTag(XWPFDocument doc, String regex) { List<TagInfo> tags = new ArrayList<TagInfo>(); // 普通段落 List<XWPFParagraph> pars = doc.getParagraphs(); for (int i = 0; i < pars.size(); i++) { XWPFParagraph p = pars.get(i); setTagInfoList(tags, p, regex); } // Table中段落 List<XWPFTable> commTables = getDocTables(doc, false, regex); for (XWPFTable table : commTables) { List<XWPFParagraph> tparags = getTableParagraph(table); for (int i = 0; i < tparags.size(); i++) { XWPFParagraph p = tparags.get(i); setTagInfoList(tags, p, regex); } } return tags; } 获取文本后通过正则解析,并依次保存到TagInfo中 // 向 taglist中添加新解析的段落信息 private void setTagInfoList(List<TagInfo> list, XWPFParagraph p, String regex) { if (regex == "") regex = defaultRegex; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(p.getText()); int startPosition = 0; while (matcher.find(startPosition)) { String match = matcher.group(); if (!list.contains(new TagInfo(match, match, ""))) { list.add(new TagInfo(match, match, "")); } startPosition = matcher.end(); } } 解析表格 // 获取Table列表中的配置信息 public Map<String, List<List<TagInfo>>> getTableTag(XWPFDocument doc, String regex) { Map<String, List<List<TagInfo>>> mapList = new HashMap<String, List<List<TagInfo>>>(); List<XWPFTable> lstTables = getDocTables(doc, true, regex); for (XWPFTable table : lstTables) { // 获取每个表格第一个单元格,以及最后一行 String strTableName = getTableListName(table, regex); List<List<TagInfo>> list = new ArrayList<List<TagInfo>>(); List<TagInfo> lstTag = new ArrayList<TagInfo>(); int rowSize = table.getRows().size(); XWPFTableRow lastRow = table.getRow(rowSize - 1); for (XWPFTableCell cell : lastRow.getTableCells()) { for (XWPFParagraph p : cell.getParagraphs()) { // 去掉空白字符串 if (p.getText() != null && p.getText().length() > 0) { setTagInfoList(lstTag, p, regex); } } } list.add(lstTag); // 添加到数据集 mapList.put(strTableName, list); } return mapList; } 生成WORD文档 难点替换标签 传入数据格式包含三个formtag以及一个tableTag {"formTags": [{"TagName":"${xxxx}","TagText":"${xxxx}","TagValue":""}, {"TagName":"${123}","TagText":"${123}","TagValue":""}, {"TagName":"${ddd}","TagText":"${ddd}","TagValue":""}], "tableTags":{ "${table}":[ [{"TagName":"${COL1}","TagText":"${COL1}","TagValue":""},{"TagName":"${COL2}","TagText":"${COL2}","TagValue":""}] ]} } 普通文档生成,并且保留配置样式,这里主要使用POI中提供searchText方法,返回Tag所有所在的RUN标签,通过一个字符做比较,如果找的第一个匹配的文本开始计数,所有在当前条件下类型 $${xxx}这样的标签是无法实现替换的 替换普通文本Tag public void ReplaceInParagraph(List<TagInfo> tagList, XWPFParagraph para, String regex) { if (regex == "") regex = defaultRegex; List<XWPFRun> runs = para.getRuns(); for (TagInfo ti : tagList) { String find = ti.TagText; String replValue = ti.TagValue; TextSegement found = para.searchText(find, new PositionInParagraph()); if (found != null) { // 判断查找内容是否在同一个Run标签中 if (found.getBeginRun() == found.getEndRun()) { XWPFRun run = runs.get(found.getBeginRun()); String runText = run.getText(run.getTextPosition()); String replaced = runText.replace(find, replValue); run.setText(replaced, 0); } else { // 存在多个Run标签 StringBuilder sb = new StringBuilder(); for (int runPos = found.getBeginRun(); runPos <= found .getEndRun(); runPos++) { XWPFRun run = runs.get(runPos); sb.append(run.getText((run.getTextPosition()))); } String connectedRuns = sb.toString(); String replaced = connectedRuns.replace(find, replValue); XWPFRun firstRun = runs.get(found.getBeginRun()); firstRun.setText(replaced, 0); // 删除后边的run标签 for (int runPos = found.getBeginRun() + 1; runPos <= found .getEndRun(); runPos++) { // 清空其他标签内容 XWPFRun partNext = runs.get(runPos); partNext.setText("", 0); } } } } // 完成第一遍查找,检测段落中的标签是否已经替换完 Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(para.getText()); boolean find = matcher.find(); if (find) { ReplaceInParagraph(tagList, para, regex); find = false; } } 表格主要是通过复制模版行,然后对模版行中的内容做修改 复制文本标签RUN private void CopyRun(XWPFRun target, XWPFRun source) { target.getCTR().setRPr(source.getCTR().getRPr()); // 设置文本 target.setText(source.text()); } 复制段落XWPFParagraph private void copyParagraph(XWPFParagraph target, XWPFParagraph source) { // 设置段落样式 target.getCTP().setPPr(source.getCTP().getPPr()); // 添加Run标签 for (int pos = 0; pos < target.getRuns().size(); pos++) { target.removeRun(pos); } for (XWPFRun s : source.getRuns()) { XWPFRun targetrun = target.createRun(); CopyRun(targetrun, s); } } 复制单元格XWPFTableCell private void copyTableCell(XWPFTableCell target, XWPFTableCell source) { // 列属性 target.getCTTc().setTcPr(source.getCTTc().getTcPr()); // 删除目标 targetCell 所有单元格 for (int pos = 0; pos < target.getParagraphs().size(); pos++) { target.removeParagraph(pos); } // 添加段落 for (XWPFParagraph sp : source.getParagraphs()) { XWPFParagraph targetP = target.addParagraph(); copyParagraph(targetP, sp); } } 复制行XWPFTableRow private void CopytTableRow(XWPFTableRow target, XWPFTableRow source) { // 复制样式 target.getCtRow().setTrPr(source.getCtRow().getTrPr()); // 复制单元格 for (int i = 0; i < target.getTableCells().size(); i++) { copyTableCell(target.getCell(i), source.getCell(i)); } } 以上就完成所有功能更,只要你配置规范,可以完全原样输出模版内容。这里特别感谢下肖哥哥大力支持。 其次,java的编码真的让人很无语,get或post时中文各种乱码