|
@@ -26,6 +26,7 @@ import org.springframework.web.bind.annotation.RestController;
|
|
|
|
|
|
import javax.annotation.Resource;
|
|
import javax.annotation.Resource;
|
|
import java.io.*;
|
|
import java.io.*;
|
|
|
|
+import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
import java.util.concurrent.CompletableFuture;
|
|
import java.util.concurrent.CompletableFuture;
|
|
@@ -103,33 +104,49 @@ public class ArchiveController {
|
|
String archiveId = taskSign.getArchiveId();
|
|
String archiveId = taskSign.getArchiveId();
|
|
String id = taskSign.getId();
|
|
String id = taskSign.getId();
|
|
String taskId = taskSign.getTaskId();
|
|
String taskId = taskSign.getTaskId();
|
|
|
|
+ List<String> listPdf = new ArrayList<>();
|
|
|
|
+ int startPage = 0;
|
|
|
|
+ for (int i = 2; i <= 10; i++) {
|
|
|
|
+ // 获取pdf第二页的数据
|
|
|
|
+ String firstUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "first__"+i+"__.pdf";
|
|
|
|
+ File file = new File(firstUrl);
|
|
|
|
+ if (!file.exists()) {
|
|
|
|
+ getPdfByPage(i, i, fileUrl, firstUrl);
|
|
|
|
+ }
|
|
|
|
|
|
- // 获取pdf第二页的数据
|
|
|
|
- String firstUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_001.pdf";
|
|
|
|
- File file = new File(firstUrl);
|
|
|
|
- if (!file.exists()) {
|
|
|
|
- getPdfByPage(2, 2, fileUrl, firstUrl);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // 保存第一页为300DPI图片
|
|
|
|
- String imagePath = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_001.png";
|
|
|
|
- File imgfile = new File(imagePath);
|
|
|
|
- if (!imgfile.exists()) {
|
|
|
|
- savePdfAsImage(1, firstUrl, imagePath);
|
|
|
|
|
|
+ // 保存第一页为300DPI图片
|
|
|
|
+ String imagePath = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "first__"+i+"__.png";
|
|
|
|
+ File imgfile = new File(imagePath);
|
|
|
|
+ if (!imgfile.exists()) {
|
|
|
|
+ savePdfAsImage(1, firstUrl, imagePath);
|
|
|
|
+ }
|
|
|
|
+ // 删除pdf
|
|
|
|
+ file.delete();
|
|
|
|
+ String state = OcrTitle(imagePath,"1");
|
|
|
|
+ if(state.equals("1")){
|
|
|
|
+ if(startPage<2){
|
|
|
|
+ startPage = i ;
|
|
|
|
+ }
|
|
|
|
+ listPdf.add(imagePath);
|
|
|
|
+ }else{
|
|
|
|
+ imgfile.delete();
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+ System.out.println(listPdf.size());
|
|
|
|
+ String filePath = startPage+"--"+(listPdf.size()+1);
|
|
//判断
|
|
//判断
|
|
List<Map<String, Object>> mapList = jdbcTemplate.queryForList("select * from u_archives_split_info where id=" + id + "");
|
|
List<Map<String, Object>> mapList = jdbcTemplate.queryForList("select * from u_archives_split_info where id=" + id + "");
|
|
if (mapList != null && Func.isNotEmpty(mapList) && mapList.size() >= 1) {
|
|
if (mapList != null && Func.isNotEmpty(mapList) && mapList.size() >= 1) {
|
|
String status = mapList.get(0).get("status") + "";
|
|
String status = mapList.get(0).get("status") + "";
|
|
if (status.equals("3")) {
|
|
if (status.equals("3")) {
|
|
- String updateSql = "update u_archives_auto set split_status=1 where id=" + id;
|
|
|
|
|
|
+ String updateSql = "update u_archives_auto set split_status=1 where id=" + archiveId;
|
|
jdbcTemplate.execute(updateSql);
|
|
jdbcTemplate.execute(updateSql);
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- String sql22 = "insert into u_archives_split_info(id,status,file_url,first_file_url,task_id,archive_id,create_time) VALUES(" + id + ",2,'" + fileUrl + "','" + imagePath + "'," + taskId + "," + archiveId + ",SYSDATE())";
|
|
|
|
|
|
+ String sql22 = "insert into u_archives_split_info(id,status,file_url,first_file_url,task_id,archive_id,create_time) VALUES(" + id + ",2,'" + fileUrl + "','" + filePath + "'," + taskId + "," + archiveId + ",SYSDATE())";
|
|
jdbcTemplate.execute(sql22);
|
|
jdbcTemplate.execute(sql22);
|
|
- String updateSql = "update u_archives_auto set split_status=2 where id=" + id;
|
|
|
|
|
|
+ String updateSql = "update u_archives_auto set split_status=2 where id=" + archiveId;
|
|
jdbcTemplate.execute(updateSql);
|
|
jdbcTemplate.execute(updateSql);
|
|
}
|
|
}
|
|
RedisTemplate.delete("splitpng-" + archiveId);
|
|
RedisTemplate.delete("splitpng-" + archiveId);
|
|
@@ -139,7 +156,7 @@ public class ArchiveController {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
- @Scheduled(cron = "0/30 * * * * ?")
|
|
|
|
|
|
+ // @Scheduled(cron = "0/30 * * * * ?")
|
|
public void SplitPdfInfo() {
|
|
public void SplitPdfInfo() {
|
|
//执行代码
|
|
//执行代码
|
|
|
|
|
|
@@ -181,12 +198,7 @@ public class ArchiveController {
|
|
String firstPage = FileUtils.getSysLocalFileUrl() + "archiveSplit/";
|
|
String firstPage = FileUtils.getSysLocalFileUrl() + "archiveSplit/";
|
|
int bkb = 0 ;
|
|
int bkb = 0 ;
|
|
//将imagePath 的数据转成一个可解析的html
|
|
//将imagePath 的数据转成一个可解析的html
|
|
- String htmlUrl = pngToHtml(firstPage, archiveId);
|
|
|
|
- /*String htmlUrl2 = pngToHtml(firstPage, archiveId);
|
|
|
|
- String htmlUrl = "";
|
|
|
|
- if (htmlUrl1.equals(htmlUrl2) && htmlUrl1.indexOf("_001.html") >= 0 && htmlUrl1.indexOf("archiveSplit") >= 0) {
|
|
|
|
- htmlUrl = htmlUrl2;
|
|
|
|
- }*/
|
|
|
|
|
|
+ String htmlUrl = pngToHtml(firstPage, archiveId,taskSign.getFirstFileUrl());
|
|
|
|
|
|
if (htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
|
|
if (htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
|
|
String htmlString = IoUtil.readToString(new FileInputStream(htmlUrl));
|
|
String htmlString = IoUtil.readToString(new FileInputStream(htmlUrl));
|
|
@@ -267,13 +279,13 @@ public class ArchiveController {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- public static String pngToHtml(String fileUrl, String pKeyId) {
|
|
|
|
|
|
+ public static String pngToHtml(String fileUrl, String pKeyId,String pageNum) {
|
|
String lasHhtmlUrl = "";
|
|
String lasHhtmlUrl = "";
|
|
try {
|
|
try {
|
|
// 定义Python解释器路径和脚本路径
|
|
// 定义Python解释器路径和脚本路径
|
|
String pythonScript = "/Users/hongchuangyanfa/Desktop/PycharmProjects/splitPngToHtml.py";
|
|
String pythonScript = "/Users/hongchuangyanfa/Desktop/PycharmProjects/splitPngToHtml.py";
|
|
// 构建命令
|
|
// 构建命令
|
|
- ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, pKeyId);
|
|
|
|
|
|
+ ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, pKeyId, pageNum);
|
|
Process process = pb.start();
|
|
Process process = pb.start();
|
|
|
|
|
|
// 读取Python脚本输出
|
|
// 读取Python脚本输出
|
|
@@ -299,6 +311,42 @@ public class ArchiveController {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* public static void main(String[] args) {
|
|
|
|
+ String mingUrl = "/Users/hongchuangyanfa/Desktop/archiveSplit/1935941065903177728first_002.png";
|
|
|
|
+ OcrTitle(mingUrl,"1");
|
|
|
|
+ }*/
|
|
|
|
+ public static String OcrTitle(String fileUrl, String type) {
|
|
|
|
+ String lasHhtmlUrl = "";
|
|
|
|
+ try {
|
|
|
|
+ // 定义Python解释器路径和脚本路径
|
|
|
|
+ String pythonScript = "/Users/hongchuangyanfa/Desktop/PycharmProjects/splitPngByTitle.py";
|
|
|
|
+ // 构建命令
|
|
|
|
+ ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, type);
|
|
|
|
+ Process process = pb.start();
|
|
|
|
+
|
|
|
|
+ // 读取Python脚本输出
|
|
|
|
+ BufferedReader reader = new BufferedReader(
|
|
|
|
+ new InputStreamReader(process.getInputStream()));
|
|
|
|
+ String htmlUrl;
|
|
|
|
+ while ((htmlUrl = reader.readLine()) != null) {
|
|
|
|
+ System.out.println("222" + htmlUrl);
|
|
|
|
+ if (htmlUrl.indexOf("图片中是否有卷内目录") >= 0 && htmlUrl.indexOf("True") >=0) {
|
|
|
|
+ return "1";
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ // 等待进程结束
|
|
|
|
+ int exitCode = process.waitFor();
|
|
|
|
+ if (exitCode == 0) {
|
|
|
|
+ return lasHhtmlUrl;
|
|
|
|
+ } else {
|
|
|
|
+ return "1";
|
|
|
|
+ }
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ return "1";
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
|
|
public static void getPdfByPage(int startPage, int endPage, String filePath, String savePath) {
|
|
public static void getPdfByPage(int startPage, int endPage, String filePath, String savePath) {
|
|
try {
|
|
try {
|