当前位置:首页 » 《我的小黑屋》 » 正文

Java html 转 word,根据html文件生成word文档

20 人参与  2024年09月30日 08:01  分类 : 《我的小黑屋》  评论

点击全文阅读


获取html的路径、word的模版路径,html包含图片路径前缀

public class Html2Word {    public static void main(String[] args) throws Exception {        String html2WordTemplatePath = "D:\\test\\test\\html2word\\html to word template (1).docx";        Document htmlDocument = Jsoup.parse(new File("D:\\test\\test\\html2word\\test-1.htm"));        NiceXWPFDocument document = new NiceXWPFDocument(Files.newInputStream(Paths.get(html2WordTemplatePath))); boolean[] returnFlag = {false};            String filePathPre = "D:\\test\\test\\html2word\\";        List<Node> nodes = htmlDocument.body().childNodes();        for (Node node : nodes) {            if (!(node instanceof Element)){                continue;            }            if (StringUtils.isEmpty(node.toString().trim())){                continue;            }            XWPFParagraph paragraph;            if (node.nodeName().equalsIgnoreCase("table") || node.nodeName().equalsIgnoreCase("ul") || node.nodeName().equalsIgnoreCase("span")){                paragraph = document.getLastParagraph();            }else {                paragraph = document.createParagraph();            }            if (node.nodeName().equalsIgnoreCase("span")){                paragraph.setSpacingAfter(200);                continue;            }            SarHtml2WordUtils.parseHtmlToWord(node,document,paragraph,returnFlag,filePathPre);            if (returnFlag[0]){                break;            }        }        document.getLastParagraph().createRun().addBreak(BreakType.PAGE);        // 写入到输出流        String outPath = "D:\\test\\test\\html2word" + System.currentTimeMillis() + ".docx";        OutputStream outputStream1 = Files.newOutputStream(Paths.get(outPath));        document.write(outputStream1);        outputStream1.close();    }}

读取html中的换行,颜色等信息,绘制到word中
 

public class Html2WordUtils {    /**     * 解析 html 格式内容 转变为 word     *     * @param node          HTML的node 节点     * @param doc           word 文档对象     * @param xwpfParagraph 段落     * @throws Exception 异常信息     */    public static void parseHtmlToWord(Node node, NiceXWPFDocument doc, XWPFParagraph xwpfParagraph,boolean[] returnFlag, String filePathPre) throws Exception {        List<Node> nodes = node.childNodes();        if (CollectionUtils.isNotEmpty(nodes)) {            for (Node childNode : nodes) {                parseHtmlToWord(childNode, doc, xwpfParagraph,returnFlag,filePathPre);            }        }        //处理table标签        if ("table".equalsIgnoreCase(node.nodeName())) {            parseTableToWord(doc, node, xwpfParagraph,filePathPre);            returnFlag[0] = true;            return;        }        if (CollectionUtils.isNotEmpty(node.childNodes())) {            return;        }        String nodeValue = node.toString();        Node parent = node.parent();        boolean boldFlag = false;        String color = "";        boolean subFlag = false;        boolean supFlag = false;        boolean ulFlag = false;        boolean tableFlag = false;        if (null != parent) {            String parentNodeName = parent.nodeName();            if (parentNodeName.equalsIgnoreCase("strong") || parentNodeName.equalsIgnoreCase("b")) {                boldFlag = true;            } else if (parentNodeName.equalsIgnoreCase("font")) {                if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("strong")                        || Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("b")) {                    boldFlag = true;                }                String color1 = parent.attr("color");                if (StringUtils.isNotEmpty(color1)){                    if (!Objects.equals("#ff0000",color1)){                        return;                    }                    color = color1.substring(1);                }                Node parented = parent.parent();                if (null != parented){                    if (parented.nodeName().equalsIgnoreCase("li")) {                        if (Objects.requireNonNull(parented.parent()).nodeName().equalsIgnoreCase("ul")) {                            ulFlag = true;                        }                    }                }            } else if (parentNodeName.equalsIgnoreCase("sub")) {                subFlag = true;            } else if (parentNodeName.equalsIgnoreCase("sup")) {                supFlag = true;            } else if (parentNodeName.equalsIgnoreCase("li")) {                if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("ul")) {                    ulFlag = true;                }            } else if (parentNodeName.equalsIgnoreCase("td")) {                tableFlag = true;            }        }        if (node.nodeName().equalsIgnoreCase("br")){            Node preNode = node.previousSibling();            if (null != preNode && null != preNode.parentNode()){                if (preNode.parentNode().nodeName().equalsIgnoreCase("font")) {                    String color1 = preNode.attr("color");                    if (StringUtils.isNotEmpty(color1)){                        if (!Objects.equals("#ff0000",color1)){                            return;                        }                    }                }            }        }        if ("#text".equalsIgnoreCase(node.nodeName()) && !tableFlag && !nodeValue.contains("<")) {            XWPFRun run = xwpfParagraph.createRun();            run.setFontFamily("Times New Roman");            run.setFontSize(10);            if (boldFlag) {                run.setBold(true);            }            if (StringUtils.isNotEmpty(color)) {                run.setColor(color);            }            if (supFlag) {                run.setSubscript(VerticalAlign.SUPERSCRIPT);            }            if (subFlag) {                run.setSubscript(VerticalAlign.SUBSCRIPT);            }            if (ulFlag && StringUtils.isNotEmpty(nodeValue.trim())) {                XWPFParagraph paragraph = doc.createParagraph();                paragraph.setIndentFromLeft(0);                paragraph.setFirstLineIndent(0);                paragraph.setIndentationLeftChars(125);                XWPFRun run1 = paragraph.createRun();                run1.setFontFamily("宋体");                run1.setFontSize(8);                run1.setText("● ");                run1.addTab();                XWPFRun run2 = paragraph.createRun();                run2.setText(nodeValue.trim());                run2.setFontFamily("宋体");                run2.setFontSize(10);            }            if (StringUtils.isNotEmpty(nodeValue) && !ulFlag){                run.setText(nodeValue.trim());            }        }        boolean enabledBreak = ReUtil.isMatch("(h[12345]|li|img|br)", node.nodeName().toLowerCase());            if (enabledBreak) {            XWPFRun run = xwpfParagraph.createRun();            run.addCarriageReturn();        }    }    private static void parseTableToWord(NiceXWPFDocument doc, Node node, XWPFParagraph paragraph,String filePathPre) throws Exception {        //简化表格html        String string = node.toString();        org.jsoup.nodes.Document tableDoc = Jsoup.parse(Objects.requireNonNull(simplifyTable(string)));        Elements trList = tableDoc.getElementsByTag("tr");        // 获取页边距        BigInteger right = (BigInteger) doc.getDocument().getBody().getSectPr().getPgMar().getRight();        BigInteger left = (BigInteger) doc.getDocument().getBody().getSectPr().getPgMar().getLeft();        // word 工作区域范围宽度        double wordWorkAreaWidth = 21 - ((double) (right.intValue() + left.intValue()) / 567);        //创建表格        XWPFTable xwpfTable = doc.insertNewTbl(paragraph.getCTP().newCursor());        if (null == xwpfTable) {            return;        }        //设置样式        xwpfTable.setWidth("100%");        //写入表格行和列内容        for (int row = 0; row < trList.size(); row++) {            XWPFTableRow tableRow = xwpfTable.getRow(row);            if (null == tableRow){                tableRow = xwpfTable.createRow();            }            Element trElement = trList.get(row);            Elements tds = trElement.getElementsByTag("td");            double widthTotal = 0.0;            for (int col = 0; col < tds.size(); col++) {                Element colElement = tds.get(col);                List<Node> nodes = colElement.childNodes();                for (Node tdNode : nodes) {                    if ("img".equalsIgnoreCase(tdNode.nodeName())) {                        String width = tdNode.attr("width");                        if (NumberUtils.isNumeric(width.trim())){                            widthTotal = widthTotal + Double.parseDouble(width.trim());                        }                    }                }            }            for (int col = 0; col < tds.size(); col++) {                XWPFTableCell tableCell = tableRow.getCell(col);                if (null == tableCell){                    tableCell = tableRow.createCell();                }                CTTcPr tcPr = tableCell.getCTTc().isSetTcPr() ? tableCell.getCTTc().getTcPr() : tableCell.getCTTc().addNewTcPr();                CTTcBorders ctTcBorders = tcPr.addNewTcBorders();                ctTcBorders.addNewLeft().setVal(STBorder.NIL);                ctTcBorders.addNewRight().setVal(STBorder.NIL);                ctTcBorders.addNewTop().setVal(STBorder.NIL);                ctTcBorders.addNewBottom().setVal(STBorder.NIL);                Element colElement = tds.get(col);                List<Node> nodes = colElement.childNodes();                for (Node tdNode : nodes) {                    if ("img".equalsIgnoreCase(tdNode.nodeName())) {                        String src = tdNode.attr("src");                        String width = tdNode.attr("width");                        String height = tdNode.attr("height");                        src = src.replaceAll("%20", " ").replaceAll("%26","&");                        String picturePath = filePathPre + src;                        InputStream inputStream = Files.newInputStream(Paths.get(picturePath));                        XWPFRun xwpfRun = tableCell.getParagraphs().get(0).createRun();                        double picWidth = wordWorkAreaWidth * ( Double.parseDouble(width.trim()) / widthTotal);                        double picHeight = picWidth * Double.parseDouble(height.trim()) / Double.parseDouble(width.trim());                        xwpfRun.addPicture(inputStream, Document.PICTURE_TYPE_PNG, src,                                (int) (picWidth * Units.EMU_PER_CENTIMETER), (int) (picHeight * Units.EMU_PER_CENTIMETER));                    }else if ("#text".equalsIgnoreCase(tdNode.nodeName())){                         parseHtmlToWordTable(colElement, doc,  tableCell.getParagraphs().get(0));                    }                }            }        }    }    private static void parseHtmlToWordTable(Node node, NiceXWPFDocument doc, XWPFParagraph xwpfParagraph) {            List<Node> nodes = node.childNodes();            if (CollectionUtils.isNotEmpty(nodes)) {                for (Node childNode : nodes) {                    parseHtmlToWordTable(childNode, doc, xwpfParagraph);                }            }            //处理table标签            if ("table".equalsIgnoreCase(node.nodeName())) {                return;            }            if (CollectionUtils.isNotEmpty(node.childNodes())) {                return;            }            String nodeValue = node.toString();            Node parent = node.parent();            boolean boldFlag = false;            String color = "";            boolean subFlag = false;            boolean supFlag = false;            boolean ulFlag = false;            if (null != parent) {                String parentNodeName = parent.nodeName();                if (parentNodeName.equalsIgnoreCase("strong") || parentNodeName.equalsIgnoreCase("b")) {                    boldFlag = true;                } else if (parentNodeName.equalsIgnoreCase("font")) {                    if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("strong")                            || Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("b")) {                        boldFlag = true;                    }                    String color1 = parent.attr("color");                    if (StringUtils.isNotEmpty(color1)){                        color = color1.substring(1);                    }                    Node parented = parent.parent();                    if (null != parented){                        if (parented.nodeName().equalsIgnoreCase("li")) {                            if (Objects.requireNonNull(parented.parent()).nodeName().equalsIgnoreCase("ul")) {                                ulFlag = true;                            }                        }                    }                } else if (parentNodeName.equalsIgnoreCase("sub")) {                    subFlag = true;                } else if (parentNodeName.equalsIgnoreCase("sup")) {                    supFlag = true;                } else if (parentNodeName.equalsIgnoreCase("li")) {                    if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("ul")) {                        ulFlag = true;                    }                }            }        if ("#text".equalsIgnoreCase(node.nodeName()) && !nodeValue.contains("<")) {            XWPFRun run = xwpfParagraph.createRun();            run.setFontFamily("Times New Roman");            run.setFontSize(10);            if (boldFlag) {                run.setBold(true);            }            if (StringUtils.isNotEmpty(color)) {                run.setColor(color);            }            if (supFlag) {                run.setSubscript(VerticalAlign.SUPERSCRIPT);            }            if (subFlag) {                run.setSubscript(VerticalAlign.SUBSCRIPT);            }            if (ulFlag && StringUtils.isNotEmpty(nodeValue.trim())) {                XWPFParagraph paragraph = doc.createParagraph();                paragraph.setIndentFromLeft(0);                paragraph.setFirstLineIndent(0);                paragraph.setIndentationLeftChars(125);                XWPFRun run1 = paragraph.createRun();                run1.setFontFamily("Times New Roman");                run1.setFontSize(8);                run1.setText("●");                run1.addTab();                XWPFRun run2 = paragraph.createRun();                run2.setText(nodeValue.trim());                run2.setFontFamily("Times New Roman");                run2.setFontSize(10);            }            if (StringUtils.isNotEmpty(nodeValue) && !ulFlag) {                run.setText(nodeValue.trim());            }        }        boolean enabledBreak = ReUtil.isMatch("(|h[12345]|li|img|br)", node.nodeName().toLowerCase());        if (enabledBreak) {            XWPFRun run = xwpfParagraph.createRun();            run.addCarriageReturn();        }    }    public static String simplifyTable(String tableContent) {        if (StringUtils.isEmpty(tableContent)) {            return null;        }        org.jsoup.nodes.Document tableDoc = Jsoup.parse(tableContent);        Elements trElements = tableDoc.getElementsByTag("tr");        // 针对于colspan操作        for (Element trElement : trElements) {            //去除所有样式            trElement.removeAttr("class");            Elements tdElements = trElement.getElementsByTag("td");            List<Element> tdEleList = covertElements2List(tdElements);            for (Element curTdElement : tdEleList) {                //去除所有样式                curTdElement.removeAttr("class");                Element ele = curTdElement.clone();                String colspanValStr = curTdElement.attr("colspan");                if (!StringUtils.isEmpty(colspanValStr)) {                    ele.removeAttr("colspan");                    int colspanVal = Integer.parseInt(colspanValStr);                    for (int k = 0; k < colspanVal - 1; k++) {                        curTdElement.after(ele.outerHtml());                    }                }            }        }        // 针对于rowspan操作        List<Element> trEleList = covertElements2List(trElements);        Element firstTrEle = trElements.first();        if (null == firstTrEle){            return "";        }        Elements tdElements = firstTrEle.getElementsByTag("td");        Integer tdCount = tdElements.size();        //获取该列下所有单元格        for (int i = 0; i < tdElements.size(); i++) {            for (Element trElement : trEleList) {                List<Element> tdElementList = covertElements2List(trElement.getElementsByTag("td"));                Node curTdNode = tdElementList.get(i);                Node cNode = curTdNode.clone();                String rowspanValStr = curTdNode.attr("rowspan");                if (!StringUtils.isEmpty(rowspanValStr)) {                    cNode.removeAttr("rowspan");                    Element nextTrElement = trElement.nextElementSibling();                    int rowspanVal = Integer.parseInt(rowspanValStr);                    for (int j = 0; j < rowspanVal - 1; j++) {                        Node tempNode = cNode.clone();                        List<Node> nodeList = new ArrayList<Node>();                        nodeList.add(tempNode);                        if (j > 0 && null != nextTrElement) {                            nextTrElement = nextTrElement.nextElementSibling();                        }                        Integer indexNum = i + 1;                        if (i == 0) {                            indexNum = 0;                        }                        if (null != nextTrElement){                            if (indexNum.equals(tdCount)) {                                nextTrElement.appendChild(tempNode);                            } else {                                nextTrElement.insertChildren(indexNum, nodeList);                            }                        }                    }                }            }        }        Element tableEle = tableDoc.getElementsByTag("table").first();        if (null == tableEle){            return "";        }        return tableEle.outerHtml();    }    private static List<Element> covertElements2List(Elements curElements) {        return new ArrayList<>(curElements);    }}


点击全文阅读


本文链接:http://zhangshiyu.com/post/166456.html

<< 上一篇 下一篇 >>

  • 评论(0)
  • 赞助本站

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。

关于我们 | 我要投稿 | 免责申明

Copyright © 2020-2022 ZhangShiYu.com Rights Reserved.豫ICP备2022013469号-1