zhuwei 1 сар өмнө
parent
commit
dd5c239845

+ 73 - 25
blade-service/blade-e-visa/src/main/java/org/springblade/evisa/controller/ArchiveController.java

@@ -26,6 +26,7 @@ import org.springframework.web.bind.annotation.RestController;
 
 import javax.annotation.Resource;
 import java.io.*;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CompletableFuture;
@@ -103,33 +104,49 @@ public class ArchiveController {
             String archiveId = taskSign.getArchiveId();
             String id = taskSign.getId();
             String taskId = taskSign.getTaskId();
+            List<String> listPdf = new ArrayList<>();
+            int startPage = 0;
+            for (int i = 2; i <= 10; i++) {
+                // 获取pdf第二页的数据
+                String firstUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "first__"+i+"__.pdf";
+                File file = new File(firstUrl);
+                if (!file.exists()) {
+                    getPdfByPage(i, i, fileUrl, firstUrl);
+                }
 
-            // 获取pdf第二页的数据
-            String firstUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_001.pdf";
-            File file = new File(firstUrl);
-            if (!file.exists()) {
-                getPdfByPage(2, 2, fileUrl, firstUrl);
-            }
-
-            // 保存第一页为300DPI图片
-            String imagePath = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_001.png";
-            File imgfile = new File(imagePath);
-            if (!imgfile.exists()) {
-                savePdfAsImage(1, firstUrl, imagePath);
+                // 保存第一页为300DPI图片
+                String imagePath = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "first__"+i+"__.png";
+                File imgfile = new File(imagePath);
+                if (!imgfile.exists()) {
+                    savePdfAsImage(1, firstUrl, imagePath);
+                }
+                // 删除pdf
+                file.delete();
+                String state = OcrTitle(imagePath,"1");
+                if(state.equals("1")){
+                    if(startPage<2){
+                        startPage = i ;
+                    }
+                    listPdf.add(imagePath);
+                }else{
+                    imgfile.delete();
+                    break;
+                }
             }
-
+            System.out.println(listPdf.size());
+            String filePath = startPage+"--"+(listPdf.size()+1);
             //判断
             List<Map<String, Object>> mapList = jdbcTemplate.queryForList("select * from u_archives_split_info where id=" + id + "");
             if (mapList != null && Func.isNotEmpty(mapList) && mapList.size() >= 1) {
                 String status = mapList.get(0).get("status") + "";
                 if (status.equals("3")) {
-                    String updateSql = "update u_archives_auto set split_status=1 where id=" + id;
+                    String updateSql = "update u_archives_auto set split_status=1 where id=" + archiveId;
                     jdbcTemplate.execute(updateSql);
                 }
             } else {
-                String sql22 = "insert into u_archives_split_info(id,status,file_url,first_file_url,task_id,archive_id,create_time) VALUES(" + id + ",2,'" + fileUrl + "','" + imagePath + "'," + taskId + "," + archiveId + ",SYSDATE())";
+                String sql22 = "insert into u_archives_split_info(id,status,file_url,first_file_url,task_id,archive_id,create_time) VALUES(" + id + ",2,'" + fileUrl + "','" + filePath + "'," + taskId + "," + archiveId + ",SYSDATE())";
                 jdbcTemplate.execute(sql22);
-                String updateSql = "update u_archives_auto set split_status=2 where id=" + id;
+                String updateSql = "update u_archives_auto set split_status=2 where id=" + archiveId;
                 jdbcTemplate.execute(updateSql);
             }
             RedisTemplate.delete("splitpng-" + archiveId);
@@ -139,7 +156,7 @@ public class ArchiveController {
     }
 
 
-    @Scheduled(cron = "0/30 * * * * ?")
+   // @Scheduled(cron = "0/30 * * * * ?")
     public void SplitPdfInfo() {
         //执行代码
 
@@ -181,12 +198,7 @@ public class ArchiveController {
             String firstPage = FileUtils.getSysLocalFileUrl() + "archiveSplit/";
             int bkb = 0 ;
             //将imagePath 的数据转成一个可解析的html
-            String htmlUrl = pngToHtml(firstPage, archiveId);
-            /*String htmlUrl2 = pngToHtml(firstPage, archiveId);
-            String htmlUrl = "";
-            if (htmlUrl1.equals(htmlUrl2) && htmlUrl1.indexOf("_001.html") >= 0 && htmlUrl1.indexOf("archiveSplit") >= 0) {
-                htmlUrl = htmlUrl2;
-            }*/
+            String htmlUrl = pngToHtml(firstPage, archiveId,taskSign.getFirstFileUrl());
 
             if (htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
                 String htmlString = IoUtil.readToString(new FileInputStream(htmlUrl));
@@ -267,13 +279,13 @@ public class ArchiveController {
         }
     }
 
-    public static String pngToHtml(String fileUrl, String pKeyId) {
+    public static String pngToHtml(String fileUrl, String pKeyId,String pageNum) {
         String lasHhtmlUrl = "";
         try {
             // 定义Python解释器路径和脚本路径
             String pythonScript = "/Users/hongchuangyanfa/Desktop/PycharmProjects/splitPngToHtml.py";
             // 构建命令
-            ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, pKeyId);
+            ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, pKeyId, pageNum);
             Process process = pb.start();
 
             // 读取Python脚本输出
@@ -299,6 +311,42 @@ public class ArchiveController {
         }
     }
 
+/*    public static void main(String[] args) {
+        String mingUrl = "/Users/hongchuangyanfa/Desktop/archiveSplit/1935941065903177728first_002.png";
+        OcrTitle(mingUrl,"1");
+    }*/
+    public static String OcrTitle(String fileUrl, String type) {
+        String lasHhtmlUrl = "";
+        try {
+            // 定义Python解释器路径和脚本路径
+            String pythonScript = "/Users/hongchuangyanfa/Desktop/PycharmProjects/splitPngByTitle.py";
+            // 构建命令
+            ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, type);
+            Process process = pb.start();
+
+            // 读取Python脚本输出
+            BufferedReader reader = new BufferedReader(
+                    new InputStreamReader(process.getInputStream()));
+            String htmlUrl;
+            while ((htmlUrl = reader.readLine()) != null) {
+                System.out.println("222" + htmlUrl);
+                if (htmlUrl.indexOf("图片中是否有卷内目录") >= 0 && htmlUrl.indexOf("True") >=0) {
+                    return "1";
+                }
+            }
+            // 等待进程结束
+            int exitCode = process.waitFor();
+            if (exitCode == 0) {
+                return lasHhtmlUrl;
+            } else {
+                return "1";
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+            return "1";
+        }
+    }
+
 
     public static void getPdfByPage(int startPage, int endPage, String filePath, String savePath) {
         try {