|
|
@@ -59,13 +59,16 @@ public class Archive2Controller {
|
|
|
@Resource(name = "archivePoolExecutor")
|
|
|
private ThreadPoolExecutor archExecutor;
|
|
|
|
|
|
- @Scheduled(cron = "0/30 * * * * ?")
|
|
|
+ @Scheduled(cron = "0/10 * * * * ?")
|
|
|
public void SignTaskBatchPng() {
|
|
|
//执行代码
|
|
|
log.info("分解pdf专图片");
|
|
|
- // String sql = "SELECT distinct b.id,b.archive_id as archiveId ,REPLACE(b.file_url,'https://xinan1.zos.ctyun.cn','http://100.86.2.1:80') as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.split_status=10 LIMIT 20";
|
|
|
- String sql = "SELECT distinct b.id,b.archive_id as archiveId ,b.file_url as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.split_status=2 LIMIT 20";
|
|
|
- List<TaskArchiveSplitVO> query = jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(TaskArchiveSplitVO.class));
|
|
|
+ // String sql = "SELECT distinct b.id,b.archive_id as archiveId ,REPLACE(b.file_url,'https://xinan1.zos.ctyun.cn','http://100.86.2.1:80') as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.split_status=2 LIMIT 20";
|
|
|
+ // String sql = "SELECT distinct b.id,b.archive_id as archiveId ,b.file_url as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.split_status=2 LIMIT 20";
|
|
|
+ // String sql = "SELECT distinct b.id,b.archive_id as archiveId ,'/Users/hongchuangyanfa/Downloads/ab13344e4943222efffa89f6db3604a6.pdf' as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.id=1989148760491425792 LIMIT 20";
|
|
|
+ //String sql = "SELECT distinct b.id,b.archive_id as archiveId ,'/Users/hongchuangyanfa/Downloads/1a39d7b4610904104cc65739cddf24c6.pdf' as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.id=1989154852734763008 LIMIT 20";
|
|
|
+ String sql = "SELECT distinct b.id,b.archive_id as archiveId ,'/Users/hongchuangyanfa/Downloads/293ec019215ba23e78ff11ba1f3e6361.pdf' as fileUrl from u_archives_auto a ,u_archive_file b where a.id=b.archive_id and a.is_deleted=0 and b.is_deleted=0 and a.id=1989148775205044224 LIMIT 20";
|
|
|
+ List<TaskArchiveSplitVO> query = jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(TaskArchiveSplitVO.class));
|
|
|
|
|
|
if (query != null && query.size() >= 1) {
|
|
|
for (TaskArchiveSplitVO dataInfo : query) {
|
|
|
@@ -126,6 +129,7 @@ public class Archive2Controller {
|
|
|
}
|
|
|
|
|
|
String filePath = startPage + "--" + (listPdf.size() + 1);
|
|
|
+ System.out.println("filePath="+filePath);
|
|
|
//
|
|
|
ArchivesSplitInfoVO data =new ArchivesSplitInfoVO();
|
|
|
data.setId(taskSign.getId());
|
|
|
@@ -147,6 +151,7 @@ public class Archive2Controller {
|
|
|
String fileUlr = taskSign.getFileUrl();
|
|
|
String firstPage = FileUtils.getSysLocalFileUrl() + "archiveSplit/";
|
|
|
String firstFileUrl = taskSign.getFirstFileUrl();
|
|
|
+
|
|
|
String firstUrl[] = firstFileUrl.split("--");
|
|
|
int basePage = Integer.parseInt(firstUrl[1]);
|
|
|
int baseStart = Integer.parseInt(firstUrl[0]);
|
|
|
@@ -155,6 +160,7 @@ public class Archive2Controller {
|
|
|
//将imagePath 的数据转成一个可解析的html
|
|
|
String htmlUrl = pngToHtml(firstPage, archiveId, taskSign.getFirstFileUrl());
|
|
|
System.out.println("分解002=" + htmlUrl);
|
|
|
+ int pdfPage = FileUtils.getPdfNum(fileUlr);
|
|
|
|
|
|
if (htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
|
|
|
String htmlString = IoUtil.readToString(new FileInputStream(htmlUrl));
|
|
|
@@ -164,7 +170,9 @@ public class Archive2Controller {
|
|
|
//由于解析已经成功,可能数据已经分解过,需要删除
|
|
|
if (trs != null && trs.size() >= 1) {
|
|
|
String sql = "delete from u_archive_file where id<>'" + taskSign.getId() + "' and archive_id='" + archiveId + "'";
|
|
|
+ String sql2 = "update u_archive_file set file_name='整份pdf' where id='" + taskSign.getId() + "' and archive_id='" + archiveId + "'";
|
|
|
jdbcTemplate.execute(sql);
|
|
|
+ jdbcTemplate.execute(sql2);
|
|
|
}
|
|
|
|
|
|
for (int i = 0; i <= trs.size() - 1; i++) {
|
|
|
@@ -179,7 +187,11 @@ public class Archive2Controller {
|
|
|
int startYm = 0;
|
|
|
int endYm = 0;
|
|
|
if (i < trs.size() - 1) {
|
|
|
- startYm = Func.toInt(ym);
|
|
|
+ if(i==1){
|
|
|
+ startYm=1;
|
|
|
+ }else{
|
|
|
+ startYm = Func.toInt(ym);
|
|
|
+ }
|
|
|
String enData = trs.get(i + 1).select("td").get(3).text();
|
|
|
if (enData.indexOf("页") >= 0) {
|
|
|
enData = trs.get(i + 2).select("td").get(3).text();
|
|
|
@@ -210,6 +222,7 @@ public class Archive2Controller {
|
|
|
bkb = endYm;
|
|
|
}
|
|
|
} else {
|
|
|
+ RedisTemplate.delete("splithtml-" + archiveId);
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
@@ -223,30 +236,36 @@ public class Archive2Controller {
|
|
|
getPdfByPage(baseStart, basePage, fileUlr, jnmuUrl);
|
|
|
saveDataToMysql(jnmuUrl, "卷内目录", taskSign.getId(), 1, -3, dutyUser, "");
|
|
|
|
|
|
- // 卷内备考表
|
|
|
- String jnbkbUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_jnbkb_001.pdf";
|
|
|
- getPdfByPage(bkb + 1, bkb + 1, fileUlr, jnbkbUrl);
|
|
|
-
|
|
|
- File jlPdfFile = new File(jnbkbUrl);
|
|
|
- if (jlPdfFile.exists()) {
|
|
|
- saveDataToMysql(jnbkbUrl, "卷内备考表", taskSign.getId(), 1, 100, dutyUser, "");
|
|
|
- }
|
|
|
-
|
|
|
- // 背脊表
|
|
|
- String bjbUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_beiji_001.pdf";
|
|
|
- String bjbUrlPng = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_beiji_001.png";
|
|
|
|
|
|
- int pdfByPage = getPdfByPage(bkb + 2, bkb + 2, fileUlr, bjbUrl);
|
|
|
- if(pdfByPage==0){
|
|
|
- File bgImgFile = new File(bjbUrlPng);
|
|
|
- if (!bgImgFile.exists()) {
|
|
|
- int dataNum = savePdfAsImage(1, bjbUrl, bjbUrlPng);
|
|
|
- }
|
|
|
- String state = OcrTitle(bjbUrlPng, "3");
|
|
|
- if (state.equals("1")) {
|
|
|
- saveDataToMysql(bjbUrl, "背脊表", taskSign.getId(), 1, 101, dutyUser, "");
|
|
|
+ // 卷内备考表
|
|
|
+ for (int i = 0; i < 3; i++) {
|
|
|
+ int pageNo = pdfPage -i;
|
|
|
+ // 背脊表
|
|
|
+ String bjbUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_beiji_001.pdf";
|
|
|
+ String bjbUrlPng = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + archiveId + "_beiji_001.png";
|
|
|
+
|
|
|
+ int pdfByPage = getPdfByPage(pageNo, pageNo, fileUlr, bjbUrl);
|
|
|
+ if (pdfByPage == 0) {
|
|
|
+ File bgImgFile = new File(bjbUrlPng);
|
|
|
+ if (!bgImgFile.exists()) {
|
|
|
+ int dataNum = savePdfAsImage(1, bjbUrl, bjbUrlPng);
|
|
|
+ }
|
|
|
+ //卷内备考表
|
|
|
+ String state = OcrTitle(bjbUrlPng, "2");
|
|
|
+ if (state.equals("1")) {
|
|
|
+ saveDataToMysql(bjbUrl, "卷内备考表", taskSign.getId(), 1, 100, dutyUser, "");
|
|
|
+ }
|
|
|
+ String state2 = OcrTitle(bjbUrlPng, "3");
|
|
|
+ if (state2.equals("1")) {
|
|
|
+ saveDataToMysql(bjbUrl, "背脊表", taskSign.getId(), 1, 101, dutyUser, "");
|
|
|
+ }
|
|
|
+ bgImgFile.delete();
|
|
|
+ }else{
|
|
|
+ File bgImgFile = new File(bjbUrl);
|
|
|
+ if (!bgImgFile.exists()) {
|
|
|
+ bgImgFile.delete();
|
|
|
+ }
|
|
|
}
|
|
|
- bgImgFile.delete();
|
|
|
}
|
|
|
// 修改任务状态
|
|
|
String updateSql = "update u_archives_split_info set status=3 where id=" + taskSign.getId();
|
|
|
@@ -286,7 +305,7 @@ public class Archive2Controller {
|
|
|
new InputStreamReader(process.getInputStream()));
|
|
|
String htmlUrl;
|
|
|
while ((htmlUrl = reader.readLine()) != null) {
|
|
|
- System.out.println(htmlUrl);
|
|
|
+
|
|
|
if (htmlUrl.indexOf("html文件路径") >= 0 && htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
|
|
|
lasHhtmlUrl = htmlUrl.replace("html文件路径", "");
|
|
|
}
|
|
|
@@ -305,12 +324,13 @@ public class Archive2Controller {
|
|
|
}
|
|
|
|
|
|
public static String OcrTitle(String fileUrl, String type) {
|
|
|
- String lasHhtmlUrl = "";
|
|
|
try {
|
|
|
// 定义Python解释器路径和脚本路径
|
|
|
String pythonScript = "/Users/hongchuangyanfa/Desktop/PycharmProjects/splitPngByTitle.py";
|
|
|
// 构建命令
|
|
|
ProcessBuilder pb = new ProcessBuilder("python3", pythonScript, fileUrl, type);
|
|
|
+ // ProcessBuilder pb = new ProcessBuilder("conda", "run", "-n", "paddle_env","python3", pythonScript, fileUrl, type);
|
|
|
+
|
|
|
Process process = pb.start();
|
|
|
|
|
|
// 读取Python脚本输出
|
|
|
@@ -326,7 +346,7 @@ public class Archive2Controller {
|
|
|
// 等待进程结束
|
|
|
int exitCode = process.waitFor();
|
|
|
if (exitCode == 0) {
|
|
|
- return lasHhtmlUrl;
|
|
|
+ return "0";
|
|
|
} else {
|
|
|
return "1";
|
|
|
}
|
|
|
@@ -339,7 +359,8 @@ public class Archive2Controller {
|
|
|
|
|
|
public static int getPdfByPage(int startPage, int endPage, String filePath, String savePath) {
|
|
|
try {
|
|
|
- InputStream inputStreamByUrl = CommonUtil.getOSSInputStream(filePath);
|
|
|
+ // InputStream inputStreamByUrl = CommonUtil.getOSSInputStream(filePath);
|
|
|
+ InputStream inputStreamByUrl = new FileInputStream( new File(filePath));// CommonUtil.getOSSInputStream(filePath);
|
|
|
// 加载PDF文件
|
|
|
PDDocument document = PDDocument.load(inputStreamByUrl);
|
|
|
// 创建新文档
|
|
|
@@ -366,7 +387,10 @@ public class Archive2Controller {
|
|
|
}
|
|
|
|
|
|
public static int savePdfAsImage(int pageNum, String filePath, String outputPath) {
|
|
|
- try (InputStream inputStream = FileUtils.getInputStreamByUrl(filePath);
|
|
|
+ try (
|
|
|
+
|
|
|
+ // InputStream inputStream = FileUtils.getInputStreamByUrl(filePath);
|
|
|
+ InputStream inputStream = new FileInputStream(new File(filePath));
|
|
|
PDDocument document = PDDocument.load(inputStream)) {
|
|
|
|
|
|
// 验证页码范围
|
|
|
@@ -431,16 +455,16 @@ public class Archive2Controller {
|
|
|
return 200;
|
|
|
}
|
|
|
|
|
|
-/* public static void main(String[] args) {
|
|
|
+ public static void main11(String[] args) {
|
|
|
// 获取pdf第二页的数据
|
|
|
- String fileUrl = "/Users/hongchuangyanfa/Desktop/archiveSplit/PDF合并.pdf";
|
|
|
- String firstUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + 123 + "first__" + 1 + "__.pdf";
|
|
|
- int pdfByPage = getPdfByPage(0, 1, fileUrl, firstUrl);
|
|
|
+ String fileUrl = "/Users/hongchuangyanfa/Downloads/e4a7bbe7ae34444206cb989364314f12.pdf";
|
|
|
+ String firstUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + 123 + "first__" + 2 + "__.pdf";
|
|
|
+ int pdfByPage = getPdfByPage(2, 2, fileUrl, firstUrl);
|
|
|
File file = new File(firstUrl);
|
|
|
|
|
|
// 保存第一页为300DPI图片
|
|
|
- String imagePath = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + 123 + "first__" + 1 + "__.png";
|
|
|
+ String imagePath = FileUtils.getSysLocalFileUrl() + "archiveSplit/" + 123 + "first__" + 2 + "__.png";
|
|
|
File imgfile = new File(imagePath);
|
|
|
- int dataNum = savePdfAsImage(1, fileUrl, imagePath);
|
|
|
- }*/
|
|
|
+ int dataNum = savePdfAsImage(1, firstUrl, imagePath);
|
|
|
+ }
|
|
|
}
|