|
@@ -0,0 +1,207 @@
|
|
|
|
+package org.springblade.manager.utils;
|
|
|
|
+
|
|
|
|
+import org.jsoup.Jsoup;
|
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
|
+import org.jsoup.select.Elements;
|
|
|
|
+import org.springblade.core.tool.utils.IoUtil;
|
|
|
|
+
|
|
|
|
+import java.io.File;
|
|
|
|
+import java.io.IOException;
|
|
|
|
+import java.io.InputStream;
|
|
|
|
+import java.net.URL;
|
|
|
|
+import java.util.*;
|
|
|
|
+
|
|
|
|
+public class TableCoordinates {
|
|
|
|
+ static Map<Integer, Set<Integer>> occupiedCells = new HashMap<>();
|
|
|
|
+ public static void main12(String[] args) throws Exception {
|
|
|
|
+ String htmlUrl = "/Users/hongchuangyanfa/fsdownload/1582300079941746688.html";
|
|
|
|
+ // String htmlUrl = "/mnt/sdc/Users/hongchuangyanfa/Desktop/privateUrlCopy/1630011899725201410/1582300079941746688.html";
|
|
|
|
+ getHtmlDataInfo(htmlUrl);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static List<String> getHtmlDataInfo(String htmlUrl) throws Exception {
|
|
|
|
+ occupiedCells.clear();
|
|
|
|
+ List<String> redata = new ArrayList<>();
|
|
|
|
+ InputStream fileInputStream = FileUtils.getInputStreamByUrl(htmlUrl);
|
|
|
|
+ try {
|
|
|
|
+ String htmlString = IoUtil.readToString(fileInputStream);
|
|
|
|
+ Document doc = Jsoup.parse(htmlString, "UTF-8");
|
|
|
|
+
|
|
|
|
+ Element table = doc.select("table").first();
|
|
|
|
+ if (table == null) {
|
|
|
|
+ System.out.println("未找到表格!");
|
|
|
|
+ return redata;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ Map<String, Map<String, Object>> tableKeys = new HashMap<>();
|
|
|
|
+ List<Map<String, Object>> dataList = new ArrayList<>();
|
|
|
|
+ int scX1 = 0;
|
|
|
|
+ int scX2 = 0;
|
|
|
|
+ int scY1 = 0;
|
|
|
|
+ int scY2 = 0;
|
|
|
|
+ int y = 0;
|
|
|
|
+
|
|
|
|
+ for (Element row : table.select("tr")) {
|
|
|
|
+ int x = 0; // 列计数器
|
|
|
|
+ for (Element cell : row.select("td, th")) {
|
|
|
|
+ // 跳过已被合并单元格占用的位置
|
|
|
|
+ while (isCellOccupied(x, y)) {
|
|
|
|
+ x++;
|
|
|
|
+ }
|
|
|
|
+ // 获取单元格的合并属性
|
|
|
|
+ int colspan = cell.hasAttr("colspan") ? Integer.parseInt(cell.attr("colspan")) : 1;
|
|
|
|
+ int rowspan = cell.hasAttr("rowspan") ? Integer.parseInt(cell.attr("rowspan")) : 1;
|
|
|
|
+ // 计算绝对坐标
|
|
|
|
+ Map<String, Object> coord = new HashMap<>();
|
|
|
|
+ String cellName = cell.text().replaceAll("\\s+", "");
|
|
|
|
+ coord.put("name", cellName);
|
|
|
|
+ coord.put("x1", x);
|
|
|
|
+ coord.put("y1", y);
|
|
|
|
+ coord.put("x2", x + colspan - 1);
|
|
|
|
+ coord.put("y2", y + rowspan - 1);
|
|
|
|
+ coord.put("key", findKeyname(cell));
|
|
|
|
+
|
|
|
|
+ if (cellName != null && cellName.indexOf("实测项目") >= 0) {
|
|
|
|
+ scX1 = x;
|
|
|
|
+ scX2 = x + colspan - 1;
|
|
|
|
+ scY1 = y;
|
|
|
|
+ scY2 = y + rowspan - 1;
|
|
|
|
+ } else if (cellName != null && cellName.indexOf("合格判定") >= 0) {
|
|
|
|
+ tableKeys.put("合格判定", coord);
|
|
|
|
+ } else if (cellName != null && cellName.indexOf("实测值或实测偏差值") >= 0) {
|
|
|
|
+ tableKeys.put("实测值或实测偏差值", coord);
|
|
|
|
+ } else if (cellName != null && cellName.indexOf("项次") >= 0) {
|
|
|
|
+ tableKeys.put("项次", coord);
|
|
|
|
+ } else if (cellName != null && cellName.indexOf("检查项目") >= 0) {
|
|
|
|
+ tableKeys.put("检查项目", coord);
|
|
|
|
+ } else if (cellName != null && cellName.indexOf("合格率") >= 0) {
|
|
|
|
+ tableKeys.put("合格率", coord);
|
|
|
|
+ } else {
|
|
|
|
+ dataList.add(coord);
|
|
|
|
+ }
|
|
|
|
+ // 记录合并单元格占用区域
|
|
|
|
+ markOccupiedCells(x, y, colspan, rowspan);
|
|
|
|
+ x += colspan;
|
|
|
|
+ }
|
|
|
|
+ y++;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ Map<String, List<Map<String, Object>>> coleMap = new HashMap<>();
|
|
|
|
+ int finalScY = scY2;
|
|
|
|
+ int finalScY1 = scY1;
|
|
|
|
+ tableKeys.forEach((k, v) -> {
|
|
|
|
+ int sdX1 = (int) v.get("x1");
|
|
|
|
+ int sdX2 = (int) v.get("x2");
|
|
|
|
+ int sdY1 = (int) v.get("y1");
|
|
|
|
+ int sdY2 = (int) v.get("y2");
|
|
|
|
+ List<Map<String, Object>> list = new ArrayList<>();
|
|
|
|
+ dataList.forEach(data -> {
|
|
|
|
+ int x1 = (int) data.get("x1");
|
|
|
|
+ int x2 = (int) data.get("x2");
|
|
|
|
+ int y1 = (int) data.get("y1");
|
|
|
|
+ int y2 = (int) data.get("y2");
|
|
|
|
+ if (y1 >= sdY1 && x1 >= sdX1 && x2 <= sdX2 && y2 <= finalScY && y1 >= finalScY1) {
|
|
|
|
+ list.add(data);
|
|
|
|
+ }
|
|
|
|
+ coleMap.put(k, list);
|
|
|
|
+ });
|
|
|
|
+ System.out.println(k + ": " + list);
|
|
|
|
+ });
|
|
|
|
+
|
|
|
|
+ List<Map<String, Object>> hgCol = coleMap.get("合格判定");
|
|
|
|
+ List<Map<String, Object>> xcCol = coleMap.get("项次");
|
|
|
|
+ List<Map<String, Object>> jcxmCol = coleMap.get("检查项目");
|
|
|
|
+ List<Map<String, Object>> sczCol = coleMap.get("实测值或实测偏差值");
|
|
|
|
+ List<Map<String, Object>> hglCol = coleMap.get("合格率");
|
|
|
|
+ for (int i = 0; i < hgCol.size(); i++) {
|
|
|
|
+ Map<String, Object> hgInfo = hgCol.get(i);
|
|
|
|
+ String keyName = hgInfo.get("key") + "";
|
|
|
|
+ int x1 = (int) hgInfo.get("x1");
|
|
|
|
+ int x2 = (int) hgInfo.get("x2");
|
|
|
|
+ int y1 = (int) hgInfo.get("y1");
|
|
|
|
+ int y2 = (int) hgInfo.get("y2");
|
|
|
|
+
|
|
|
|
+ // 判断是否合格
|
|
|
|
+ List<String> hekeysList = new ArrayList<>();
|
|
|
|
+ for (int j = 0; j < sczCol.size(); j++) {
|
|
|
|
+ Map<String, Object> jcxm = sczCol.get(j);
|
|
|
|
+ int hgx1 = (int) jcxm.get("x1");
|
|
|
|
+ int hgx2 = (int) jcxm.get("x2");
|
|
|
|
+ int hgy1 = (int) jcxm.get("y1");
|
|
|
|
+ int hgy2 = (int) jcxm.get("y2");
|
|
|
|
+ if (hgx2 < x1 && hgy1 >= y1 && hgy2 <= y2) {
|
|
|
|
+ hekeysList.add(jcxm.get("key") + "");
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ String hekeys = String.join(",", hekeysList);
|
|
|
|
+
|
|
|
|
+ List<String> jcxNameList = new ArrayList<>();
|
|
|
|
+ for (int j = 0; j < jcxmCol.size(); j++) {
|
|
|
|
+ Map<String, Object> jcxm = jcxmCol.get(j);
|
|
|
|
+ int hgx1 = (int) jcxm.get("x1");
|
|
|
|
+ int hgx2 = (int) jcxm.get("x2");
|
|
|
|
+ int hgy1 = (int) jcxm.get("y1");
|
|
|
|
+ int hgy2 = (int) jcxm.get("y2");
|
|
|
|
+ if (hgx2 < x1 && hgy1 <= y1 && hgy2 >= y2) {
|
|
|
|
+ jcxNameList.add(jcxm.get("name") + "");
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ String jcxName = String.join("--", jcxNameList); // 输出 "C-B-A"
|
|
|
|
+ String xhName = "";
|
|
|
|
+ for (int j = 0; j < xcCol.size(); j++) {
|
|
|
|
+ Map<String, Object> jcxm = xcCol.get(j);
|
|
|
|
+ int hgx1 = (int) jcxm.get("x1");
|
|
|
|
+ int hgx2 = (int) jcxm.get("x2");
|
|
|
|
+ int hgy1 = (int) jcxm.get("y1");
|
|
|
|
+ int hgy2 = (int) jcxm.get("y2");
|
|
|
|
+ if (hgx2 < x1 && hgy1 <= y1 && hgy2 >= y2) {
|
|
|
|
+ xhName = xhName + jcxm.get("name");
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ String hglName = "";
|
|
|
|
+ for (int j = 0; j < hglCol.size(); j++) {
|
|
|
|
+ Map<String, Object> jcxm = hglCol.get(j);
|
|
|
|
+ int hgx1 = (int) jcxm.get("x1");
|
|
|
|
+ int hgx2 = (int) jcxm.get("x2");
|
|
|
|
+ int hgy1 = (int) jcxm.get("y1");
|
|
|
|
+ int hgy2 = (int) jcxm.get("y2");
|
|
|
|
+ if (hgx2 < x1 && hgy1 <= y1 && hgy2 >= y2) {
|
|
|
|
+ hglName =jcxm.get("key")+"";
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ String reVal = keyName + "_*_" + xhName + jcxName + "_*_" + hekeys+"_*_" + hglName;
|
|
|
|
+ System.out.println(reVal);
|
|
|
|
+ redata.add(reVal);
|
|
|
|
+ }
|
|
|
|
+ }finally {
|
|
|
|
+ fileInputStream.close();
|
|
|
|
+ }
|
|
|
|
+ return redata;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 检查坐标是否被占用
|
|
|
|
+ public static boolean isCellOccupied(int x, int y) {
|
|
|
|
+ return occupiedCells.getOrDefault(y, Collections.emptySet()).contains(x);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 标记合并单元格占用的区域
|
|
|
|
+ public static void markOccupiedCells(int startX, int startY, int colspan, int rowspan) {
|
|
|
|
+ for (int dy = 0; dy < rowspan; dy++) {
|
|
|
|
+ for (int dx = 0; dx < colspan; dx++) {
|
|
|
|
+ occupiedCells.computeIfAbsent(startY + dy, k -> new HashSet<>())
|
|
|
|
+ .add(startX + dx);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ public static String findKeyname(Element element) {
|
|
|
|
+ Elements elementsWithKeyname = element.select("[keyname]"); // 查找所有带 keyname 属性的元素
|
|
|
|
+ if (!elementsWithKeyname.isEmpty()) {
|
|
|
|
+ String keynameValue = elementsWithKeyname.first().attr("keyname");
|
|
|
|
+ return keynameValue;
|
|
|
|
+ }
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
|
|
+}
|