|
@@ -10,7 +10,11 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
import org.jsoup.Jsoup;
|
|
|
import org.jsoup.nodes.Document;
|
|
|
import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.nodes.Node;
|
|
|
+import org.jsoup.nodes.TextNode;
|
|
|
import org.jsoup.select.Elements;
|
|
|
+import org.jsoup.select.NodeTraversor;
|
|
|
+import org.jsoup.select.NodeVisitor;
|
|
|
import org.springblade.core.tool.utils.IoUtil;
|
|
|
import org.springblade.core.tool.utils.StringUtil;
|
|
|
import org.springblade.manager.utils.FileUtils;
|
|
@@ -23,17 +27,17 @@ import java.util.stream.Collectors;
|
|
|
|
|
|
public class HtmlTableToExcelConverter {
|
|
|
|
|
|
-// public static void main(String[] args) throws Exception {
|
|
|
-// String html = "D:\\tools\\html\\1892816666778140672.html";
|
|
|
-//
|
|
|
-//
|
|
|
-// InputStream inputStreamByUrl = FileUtils.getInputStreamByUrl(html);
|
|
|
-// String htmlString = IoUtil.readToString(inputStreamByUrl);
|
|
|
-//
|
|
|
-// convertHtmlTableToExcel(htmlString, "D:\\tools\\html\\123.xlsx");
|
|
|
-//
|
|
|
-//
|
|
|
-// }
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
+ String html = "D:\\tools\\html\\1927983851351572480.html";
|
|
|
+
|
|
|
+
|
|
|
+ InputStream inputStreamByUrl = FileUtils.getInputStreamByUrl(html);
|
|
|
+ String htmlString = IoUtil.readToString(inputStreamByUrl);
|
|
|
+
|
|
|
+ convertHtmlTableToExcel(htmlString);
|
|
|
+
|
|
|
+
|
|
|
+ }
|
|
|
|
|
|
public static Workbook convertHtmlTableToExcel(String html) throws Exception {
|
|
|
Document doc = Jsoup.parse(html);
|
|
@@ -57,6 +61,8 @@ public class HtmlTableToExcelConverter {
|
|
|
|
|
|
// 合并区域跟踪列表
|
|
|
List<CellRangeAddress> mergedRegions = new ArrayList<>();
|
|
|
+ // 合并区域是否存在边框
|
|
|
+ Map<CellRangeAddress, CellStyle> mergedFrame = new HashMap<>();
|
|
|
// 单元格占用跟踪器
|
|
|
Map<Integer, Set<Integer>> occupiedCells = new HashMap<>();
|
|
|
|
|
@@ -120,6 +126,7 @@ public class HtmlTableToExcelConverter {
|
|
|
|
|
|
// 获取单元格内容
|
|
|
String cellText = extractCellText(td);
|
|
|
+// String cellText = extractCellTextWithFormElements(td);
|
|
|
|
|
|
// 创建单元格
|
|
|
Cell cell = excelRow.createCell(excelColNum);
|
|
@@ -132,7 +139,7 @@ public class HtmlTableToExcelConverter {
|
|
|
styleCache.put(styleKey, style);
|
|
|
|
|
|
// 检查是否需要自动换行
|
|
|
- if (shouldWrapText(td, cssRules,cellText)) {
|
|
|
+ if (shouldWrapText(td, cssRules, cellText)) {
|
|
|
style.setWrapText(true);
|
|
|
}
|
|
|
}
|
|
@@ -176,6 +183,10 @@ public class HtmlTableToExcelConverter {
|
|
|
if (!isOverlapping(mergedRegions, region)) {
|
|
|
sheet.addMergedRegion(region);
|
|
|
mergedRegions.add(region);
|
|
|
+ CellStyle cellStyle = styleCache.get(styleKey);
|
|
|
+ mergedFrame.put(region, cellStyle);
|
|
|
+ // 为合并区域设置边框(使用左上角单元格的样式)
|
|
|
+// setMergedRegionBorders(workbook, sheet, region, styleCache.get(styleKey));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -184,7 +195,7 @@ public class HtmlTableToExcelConverter {
|
|
|
excelRowNum++;
|
|
|
}
|
|
|
// 修复合并单元格边框问题
|
|
|
- fixMergedRegionBorders(workbook, sheet, mergedRegions);
|
|
|
+ fixMergedRegionBorders(workbook, sheet, mergedRegions, mergedFrame);
|
|
|
|
|
|
// 应用列宽
|
|
|
for (Map.Entry<Integer, Float> entry : columnWidths.entrySet()) {
|
|
@@ -216,83 +227,103 @@ public class HtmlTableToExcelConverter {
|
|
|
adjustRowHeights(sheet);
|
|
|
|
|
|
// 写入文件
|
|
|
-// try (FileOutputStream fos = new FileOutputStream(outputPath)) {
|
|
|
-// workbook.write(fos);
|
|
|
-// }
|
|
|
-// workbook.close();
|
|
|
+ try (FileOutputStream fos = new FileOutputStream("D:\\tools\\html\\234.xlsx")) {
|
|
|
+ workbook.write(fos);
|
|
|
+ }
|
|
|
+ workbook.close();
|
|
|
|
|
|
- return workbook;
|
|
|
+ return null;
|
|
|
}
|
|
|
|
|
|
- // 修复合并单元格边框问题
|
|
|
- private static void fixMergedRegionBorders(Workbook workbook, Sheet sheet, List<CellRangeAddress> mergedRegions) {
|
|
|
- for (CellRangeAddress region : mergedRegions) {
|
|
|
- // 设置合并区域的外边框
|
|
|
- RegionUtil.setBorderTop(BorderStyle.THIN, region, sheet);
|
|
|
- RegionUtil.setBorderRight(BorderStyle.THIN, region, sheet);
|
|
|
- RegionUtil.setBorderBottom(BorderStyle.THIN, region, sheet);
|
|
|
- RegionUtil.setBorderLeft(BorderStyle.THIN, region, sheet);
|
|
|
-
|
|
|
- RegionUtil.setTopBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
- RegionUtil.setRightBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
- RegionUtil.setBottomBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
- RegionUtil.setLeftBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
-
|
|
|
- // 特殊处理:确保内部单元格边框完整
|
|
|
- int firstRow = region.getFirstRow();
|
|
|
- int lastRow = region.getLastRow();
|
|
|
- int firstCol = region.getFirstColumn();
|
|
|
- int lastCol = region.getLastColumn();
|
|
|
-
|
|
|
- // 对于多行多列的合并区域,需要额外设置内部边框
|
|
|
- if (lastRow - firstRow > 0 || lastCol - firstCol > 0) {
|
|
|
- // 设置内部水平边框
|
|
|
- for (int row = firstRow; row <= lastRow; row++) {
|
|
|
- Row excelRow = sheet.getRow(row);
|
|
|
- if (excelRow == null) continue;
|
|
|
-
|
|
|
- for (int col = firstCol; col <= lastCol; col++) {
|
|
|
- Cell cell = excelRow.getCell(col);
|
|
|
- if (cell == null) continue;
|
|
|
-
|
|
|
- CellStyle style = cell.getCellStyle();
|
|
|
-
|
|
|
- // 设置顶部边框(如果是区域的第一行)
|
|
|
- if (row == firstRow) {
|
|
|
- style.setBorderTop(BorderStyle.THIN);
|
|
|
- }
|
|
|
-
|
|
|
- // 设置底部边框(如果是区域的最后一行)
|
|
|
- if (row == lastRow) {
|
|
|
- style.setBorderBottom(BorderStyle.THIN);
|
|
|
- }
|
|
|
+ // 修复合并单元格边框问题(使用指定样式)
|
|
|
+ private static void setMergedRegionBorders(Workbook workbook, Sheet sheet,
|
|
|
+ CellRangeAddress region, CellStyle style) {
|
|
|
+ // 设置合并区域的边框样式
|
|
|
+ RegionUtil.setBorderTop(style.getBorderTopEnum(), region, sheet);
|
|
|
+ RegionUtil.setBorderRight(style.getBorderRightEnum(), region, sheet);
|
|
|
+ RegionUtil.setBorderBottom(style.getBorderBottomEnum(), region, sheet);
|
|
|
+ RegionUtil.setBorderLeft(style.getBorderLeftEnum(), region, sheet);
|
|
|
+
|
|
|
+ // 设置边框颜色
|
|
|
+ RegionUtil.setTopBorderColor(style.getTopBorderColor(), region, sheet);
|
|
|
+ RegionUtil.setRightBorderColor(style.getRightBorderColor(), region, sheet);
|
|
|
+ RegionUtil.setBottomBorderColor(style.getBottomBorderColor(), region, sheet);
|
|
|
+ RegionUtil.setLeftBorderColor(style.getLeftBorderColor(), region, sheet);
|
|
|
+
|
|
|
+ // 特殊处理:确保内部单元格边框完整
|
|
|
+ int firstRow = region.getFirstRow();
|
|
|
+ int lastRow = region.getLastRow();
|
|
|
+ int firstCol = region.getFirstColumn();
|
|
|
+ int lastCol = region.getLastColumn();
|
|
|
+
|
|
|
+ // 对于多行多列的合并区域,需要额外设置内部边框
|
|
|
+ if (lastRow - firstRow > 0 || lastCol - firstCol > 0) {
|
|
|
+ // 设置内部水平边框
|
|
|
+ for (int r = firstRow; r <= lastRow; r++) {
|
|
|
+ Row excelRow = sheet.getRow(r);
|
|
|
+ if (excelRow == null) continue;
|
|
|
+
|
|
|
+ for (int c = firstCol; c <= lastCol; c++) {
|
|
|
+ Cell cell = excelRow.getCell(c);
|
|
|
+ if (cell == null) continue;
|
|
|
+
|
|
|
+ CellStyle cellStyle = cell.getCellStyle();
|
|
|
+
|
|
|
+ // 设置顶部边框(如果是区域的第一行)
|
|
|
+ if (r == firstRow) {
|
|
|
+ cellStyle.setBorderTop(style.getBorderTopEnum());
|
|
|
+ }
|
|
|
|
|
|
- // 设置左侧边框(如果是区域的第一列)
|
|
|
- if (col == firstCol) {
|
|
|
- style.setBorderLeft(BorderStyle.THIN);
|
|
|
- }
|
|
|
+ // 设置底部边框(如果是区域的最后一行)
|
|
|
+ if (r == lastRow) {
|
|
|
+ cellStyle.setBorderBottom(style.getBorderBottomEnum());
|
|
|
+ }
|
|
|
|
|
|
- // 设置右侧边框(如果是区域的最后一列)
|
|
|
- if (col == lastCol) {
|
|
|
- style.setBorderRight(BorderStyle.THIN);
|
|
|
- }
|
|
|
+ // 设置左侧边框(如果是区域的第一列)
|
|
|
+ if (c == firstCol) {
|
|
|
+ cellStyle.setBorderLeft(style.getBorderLeftEnum());
|
|
|
+ }
|
|
|
|
|
|
- // 设置内部边框
|
|
|
- if (row < lastRow) {
|
|
|
- style.setBorderBottom(BorderStyle.THIN);
|
|
|
- }
|
|
|
- if (col < lastCol) {
|
|
|
- style.setBorderRight(BorderStyle.THIN);
|
|
|
- }
|
|
|
+ // 设置右侧边框(如果是区域的最后一列)
|
|
|
+ if (c == lastCol) {
|
|
|
+ cellStyle.setBorderRight(style.getBorderRightEnum());
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // 修复合并单元格边框问题
|
|
|
+ private static void fixMergedRegionBorders(Workbook workbook, Sheet sheet, List<CellRangeAddress> mergedRegions, Map<CellRangeAddress, CellStyle> mergedFrame) {
|
|
|
+ for (CellRangeAddress region : mergedRegions) {
|
|
|
+ CellStyle cellStyle = mergedFrame.get(region);
|
|
|
+ BorderStyle borderTopEnum = cellStyle.getBorderTopEnum();
|
|
|
+ BorderStyle borderBottomEnum = cellStyle.getBorderBottomEnum();
|
|
|
+ BorderStyle borderLeftEnum = cellStyle.getBorderLeftEnum();
|
|
|
+ BorderStyle borderRightEnum = cellStyle.getBorderRightEnum();
|
|
|
+ // 设置合并区域的外边框
|
|
|
+ if (borderTopEnum != BorderStyle.NONE) {
|
|
|
+ RegionUtil.setBorderTop(BorderStyle.THIN, region, sheet);
|
|
|
+ RegionUtil.setTopBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
+ }
|
|
|
+ if (borderBottomEnum != BorderStyle.NONE) {
|
|
|
+ RegionUtil.setBorderBottom(BorderStyle.THIN, region, sheet);
|
|
|
+ RegionUtil.setRightBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
+ }
|
|
|
+ if (borderLeftEnum != BorderStyle.NONE) {
|
|
|
+ RegionUtil.setBorderLeft(BorderStyle.THIN, region, sheet);
|
|
|
+ RegionUtil.setBottomBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
+ }
|
|
|
+ if (borderRightEnum != BorderStyle.NONE) {
|
|
|
+ RegionUtil.setBorderRight(BorderStyle.THIN, region, sheet);
|
|
|
+ RegionUtil.setLeftBorderColor(IndexedColors.BLACK.getIndex(), region, sheet);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
// 新增方法:检查是否需要自动换行
|
|
|
- private static boolean shouldWrapText(Element td, Map<String, String> cssRules,String cellText) {
|
|
|
+ private static boolean shouldWrapText(Element td, Map<String, String> cssRules, String cellText) {
|
|
|
// 1. 检查内联样式
|
|
|
String style = td.attr("style");
|
|
|
if (style != null && style.toLowerCase().contains("overflow-wrap") ||
|
|
@@ -351,7 +382,6 @@ public class HtmlTableToExcelConverter {
|
|
|
}
|
|
|
|
|
|
|
|
|
-
|
|
|
// 辅助方法:检查单元格是否被占用
|
|
|
private static boolean isCellOccupied(Map<Integer, Set<Integer>> occupiedCells, int row, int col) {
|
|
|
if (!occupiedCells.containsKey(row)) {
|
|
@@ -474,14 +504,14 @@ public class HtmlTableToExcelConverter {
|
|
|
style.setFont(fontCache.get(fontKey));
|
|
|
|
|
|
// 设置边框(细线)
|
|
|
- style.setBorderTop(BorderStyle.THIN);
|
|
|
- style.setBorderBottom(BorderStyle.THIN);
|
|
|
- style.setBorderLeft(BorderStyle.THIN);
|
|
|
- style.setBorderRight(BorderStyle.THIN);
|
|
|
- style.setTopBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
- style.setBottomBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
- style.setLeftBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
- style.setRightBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
+// style.setBorderTop(BorderStyle.THIN);
|
|
|
+// style.setBorderBottom(BorderStyle.THIN);
|
|
|
+// style.setBorderLeft(BorderStyle.THIN);
|
|
|
+// style.setBorderRight(BorderStyle.THIN);
|
|
|
+// style.setTopBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
+// style.setBottomBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
+// style.setLeftBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
+// style.setRightBorderColor(IndexedColors.BLACK.getIndex());
|
|
|
|
|
|
return style;
|
|
|
}
|
|
@@ -490,18 +520,17 @@ public class HtmlTableToExcelConverter {
|
|
|
private static void applyCssStyle(CellStyle style, Font font, String css) {
|
|
|
if (css == null || css.isEmpty()) return;
|
|
|
|
|
|
- // 解析CSS属性
|
|
|
String[] properties = css.split(";");
|
|
|
for (String prop : properties) {
|
|
|
String[] parts = prop.split(":");
|
|
|
if (parts.length < 2) continue;
|
|
|
|
|
|
- String key = parts[0].trim();
|
|
|
- String value = parts[1].trim();
|
|
|
+ String key = parts[0].trim().toLowerCase();
|
|
|
+ String value = parts[1].trim().toLowerCase();
|
|
|
|
|
|
switch (key) {
|
|
|
case "background-color":
|
|
|
-// setBackgroundColor(style, value);
|
|
|
+ setBackgroundColor(style, value);
|
|
|
break;
|
|
|
case "font-family":
|
|
|
font.setFontName(value);
|
|
@@ -510,7 +539,7 @@ public class HtmlTableToExcelConverter {
|
|
|
setFontSize(font, value);
|
|
|
break;
|
|
|
case "font-weight":
|
|
|
- if ("bold".equalsIgnoreCase(value)) {
|
|
|
+ if ("bold".equals(value)) {
|
|
|
font.setBold(true);
|
|
|
}
|
|
|
break;
|
|
@@ -520,10 +549,59 @@ public class HtmlTableToExcelConverter {
|
|
|
case "vertical-align":
|
|
|
setVerticalAlignment(style, value);
|
|
|
break;
|
|
|
+ case "white-space":
|
|
|
+ // 这个属性在shouldWrapText方法中处理
|
|
|
+ break;
|
|
|
+ // 处理边框样式
|
|
|
+ case "border-top-style":
|
|
|
+ style.setBorderTop(mapBorderStyle(value));
|
|
|
+ break;
|
|
|
+ case "border-right-style":
|
|
|
+ style.setBorderRight(mapBorderStyle(value));
|
|
|
+ break;
|
|
|
+ case "border-bottom-style":
|
|
|
+ style.setBorderBottom(mapBorderStyle(value));
|
|
|
+ break;
|
|
|
+ case "border-left-style":
|
|
|
+ style.setBorderLeft(mapBorderStyle(value));
|
|
|
+ break;
|
|
|
+ case "border-style": // 简写属性,设置所有边框
|
|
|
+ style.setBorderTop(mapBorderStyle(value));
|
|
|
+ style.setBorderRight(mapBorderStyle(value));
|
|
|
+ style.setBorderBottom(mapBorderStyle(value));
|
|
|
+ style.setBorderLeft(mapBorderStyle(value));
|
|
|
+ break;
|
|
|
+ // 处理边框宽度
|
|
|
+ case "border-top-width":
|
|
|
+ case "border-right-width":
|
|
|
+ case "border-bottom-width":
|
|
|
+ case "border-left-width":
|
|
|
+ // POI中边框宽度由BorderStyle控制,这里忽略具体数值
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // 辅助方法:映射CSS边框样式到POI的BorderStyle
|
|
|
+ private static BorderStyle mapBorderStyle(String cssStyle) {
|
|
|
+ switch (cssStyle.toLowerCase()) {
|
|
|
+ case "solid":
|
|
|
+ return BorderStyle.THIN;
|
|
|
+ case "dashed":
|
|
|
+ return BorderStyle.DASHED;
|
|
|
+ case "dotted":
|
|
|
+ return BorderStyle.DOTTED;
|
|
|
+ case "double":
|
|
|
+ return BorderStyle.DOUBLE;
|
|
|
+ case "thick":
|
|
|
+ return BorderStyle.MEDIUM;
|
|
|
+ case "none":
|
|
|
+ return BorderStyle.NONE;
|
|
|
+ default:
|
|
|
+ return BorderStyle.THIN; // 默认为细线
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
// 辅助方法:设置背景色
|
|
|
private static void setBackgroundColor(CellStyle style, String color) {
|
|
|
// 简化处理:只处理rgb格式
|
|
@@ -593,4 +671,80 @@ public class HtmlTableToExcelConverter {
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // 扩展方法:提取单元格文本并处理换行和表单元素
|
|
|
+ private static String extractCellTextWithFormElements(Element td) {
|
|
|
+ // 使用自定义NodeVisitor处理文本、换行和表单元素
|
|
|
+ FormElementVisitor visitor = new FormElementVisitor();
|
|
|
+ NodeTraversor traversor = new NodeTraversor(visitor);
|
|
|
+ traversor.traverse(td);
|
|
|
+ return visitor.getText().trim();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 自定义NodeVisitor处理文本节点、<br>标签和表单元素
|
|
|
+ private static class FormElementVisitor implements NodeVisitor {
|
|
|
+ private final StringBuilder text = new StringBuilder();
|
|
|
+ private boolean lastWasBr = false;
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void head(Node node, int depth) {
|
|
|
+ if (node instanceof TextNode) {
|
|
|
+ text.append(((TextNode) node).text());
|
|
|
+ lastWasBr = false;
|
|
|
+ } else if (node instanceof Element) {
|
|
|
+ Element el = (Element) node;
|
|
|
+ String tagName = el.tagName().toLowerCase();
|
|
|
+
|
|
|
+ // 处理换行
|
|
|
+ if ("br".equals(tagName)) {
|
|
|
+ if (!lastWasBr) {
|
|
|
+ text.append("\n");
|
|
|
+ }
|
|
|
+ lastWasBr = true;
|
|
|
+ }
|
|
|
+ // 处理单选框
|
|
|
+ else if ("input".equals(tagName) && "radio".equalsIgnoreCase(el.attr("type"))) {
|
|
|
+ text.append(el.hasAttr("checked") ? "✓" : "□");
|
|
|
+ lastWasBr = false;
|
|
|
+ }
|
|
|
+ // 处理复选框
|
|
|
+ else if ("input".equals(tagName) && "checkbox".equalsIgnoreCase(el.attr("type"))) {
|
|
|
+ text.append(el.hasAttr("checked") ? "✓" : "□");
|
|
|
+ lastWasBr = false;
|
|
|
+ }
|
|
|
+ // 处理下拉选择框
|
|
|
+ else if ("select".equals(tagName)) {
|
|
|
+ Element selectedOption = el.select("option[selected]").first();
|
|
|
+ if (selectedOption != null) {
|
|
|
+ text.append(selectedOption.text());
|
|
|
+ } else {
|
|
|
+ Element firstOption = el.select("option").first();
|
|
|
+ if (firstOption != null) {
|
|
|
+ text.append(firstOption.text());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ lastWasBr = false;
|
|
|
+ }
|
|
|
+ // 处理文本输入框和文本区域
|
|
|
+ else if (("input".equals(tagName) && "text".equalsIgnoreCase(el.attr("type")) ||
|
|
|
+ "textarea".equals(tagName))) {
|
|
|
+ String value = el.hasAttr("value") ? el.attr("value") : el.text();
|
|
|
+ if (value.isEmpty() && el.hasAttr("placeholder")) {
|
|
|
+ value = el.attr("placeholder");
|
|
|
+ }
|
|
|
+ text.append(value);
|
|
|
+ lastWasBr = false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void tail(Node node, int depth) {
|
|
|
+ // 不需要处理
|
|
|
+ }
|
|
|
+
|
|
|
+ public String getText() {
|
|
|
+ return text.toString();
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|