|
@@ -22,6 +22,7 @@ import org.springframework.data.redis.core.StringRedisTemplate;
|
|
|
import org.springframework.jdbc.core.BeanPropertyRowMapper;
|
|
|
import org.springframework.jdbc.core.JdbcTemplate;
|
|
|
import org.springframework.scheduling.annotation.Scheduled;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
import org.springframework.web.bind.annotation.RestController;
|
|
|
|
|
|
import javax.annotation.Resource;
|
|
@@ -77,7 +78,7 @@ public class ArchiveController {
|
|
|
if (!aBoolean) {
|
|
|
|
|
|
if (!aBoolean) {
|
|
|
- RedisTemplate.opsForValue().set("splitpng-" + dataInfo.getArchiveId(), "1", 7200, TimeUnit.SECONDS);
|
|
|
+ RedisTemplate.opsForValue().set("splitpng-" + dataInfo.getArchiveId(), "1", 600, TimeUnit.SECONDS);
|
|
|
CompletableFuture<Void> runAsync = CompletableFuture.runAsync(() -> {
|
|
|
try {
|
|
|
/*===============执行批量任务===============*/
|
|
@@ -149,6 +150,10 @@ public class ArchiveController {
|
|
|
String updateSql = "update u_archives_auto set split_status=2 where id=" + archiveId;
|
|
|
jdbcTemplate.execute(updateSql);
|
|
|
}
|
|
|
+
|
|
|
+ String sql = "delete from u_archive_file where id<>'"+id+"' and archive_id='"+archiveId+"'";
|
|
|
+ jdbcTemplate.execute(sql);
|
|
|
+
|
|
|
RedisTemplate.delete("splitpng-" + archiveId);
|
|
|
} catch (Exception e) {
|
|
|
e.printStackTrace();
|
|
@@ -156,7 +161,7 @@ public class ArchiveController {
|
|
|
}
|
|
|
|
|
|
|
|
|
- // @Scheduled(cron = "0/30 * * * * ?")
|
|
|
+ @Scheduled(cron = "0/30 * * * * ?")
|
|
|
public void SplitPdfInfo() {
|
|
|
//执行代码
|
|
|
|
|
@@ -171,7 +176,7 @@ public class ArchiveController {
|
|
|
if (!aBoolean) {
|
|
|
|
|
|
if (!aBoolean) {
|
|
|
- RedisTemplate.opsForValue().set("splithtml-" + dataInfo.getArchiveId(), "1", 600, TimeUnit.SECONDS);
|
|
|
+ RedisTemplate.opsForValue().set("splithtml-" + dataInfo.getArchiveId(), "1", 1200, TimeUnit.SECONDS);
|
|
|
CompletableFuture<Void> runAsync = CompletableFuture.runAsync(() -> {
|
|
|
try {
|
|
|
/*===============执行批量任务===============*/
|
|
@@ -196,67 +201,103 @@ public class ArchiveController {
|
|
|
String archiveId = taskSign.getArchiveId();
|
|
|
String fileUlr = taskSign.getFileUrl();
|
|
|
String firstPage = FileUtils.getSysLocalFileUrl() + "archiveSplit/";
|
|
|
+ String firstFileUrl = taskSign.getFirstFileUrl();
|
|
|
+ String firstUrl[] = firstFileUrl.split("--");
|
|
|
+ int basePage = Integer.parseInt(firstUrl[1]);
|
|
|
+ int baseStart = Integer.parseInt(firstUrl[0]);
|
|
|
+ String dutyUser = "";
|
|
|
int bkb = 0 ;
|
|
|
//将imagePath 的数据转成一个可解析的html
|
|
|
String htmlUrl = pngToHtml(firstPage, archiveId,taskSign.getFirstFileUrl());
|
|
|
|
|
|
+
|
|
|
if (htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
|
|
|
String htmlString = IoUtil.readToString(new FileInputStream(htmlUrl));
|
|
|
Document doc = Jsoup.parse(htmlString);
|
|
|
Element table = doc.select("table").first();
|
|
|
Elements trs = table.select("tr");
|
|
|
+ //由于解析已经成功,可能数据已经分解过,需要删除
|
|
|
+ if(trs!=null && trs.size()>=1){
|
|
|
+ String sql = "delete from u_archive_file where id<>'"+taskSign.getId()+"' and archive_id='"+archiveId+"'";
|
|
|
+ jdbcTemplate.execute(sql);
|
|
|
+ }
|
|
|
|
|
|
- for (int i = 1; i <= trs.size() - 1; i++) {
|
|
|
+ for (int i = 0; i <= trs.size() - 1; i++) {
|
|
|
Element tr = trs.get(i);
|
|
|
String zrz = tr.select("td").get(0).text();
|
|
|
String wjtm = tr.select("td").get(1).text();
|
|
|
String rq = tr.select("td").get(2).text();
|
|
|
String ym = tr.select("td").get(3).text();
|
|
|
+ if(zrz.equals("责任者") && wjtm.equals("文件题名") && rq.equals("日期")){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
int startYm = 0;
|
|
|
int endYm = 0;
|
|
|
if(i<trs.size()-1){
|
|
|
startYm = Func.toInt(ym);
|
|
|
String enData = trs.get(i+1).select("td").get(3).text();
|
|
|
- if(enData.indexOf("-")>=0){
|
|
|
- endYm = Func.toInt(enData.split("-")[0])-1;
|
|
|
- }else{
|
|
|
- endYm = Func.toInt(enData)-1;
|
|
|
+ if(enData.indexOf("页")>=0){
|
|
|
+ enData = trs.get(i+2).select("td").get(3).text();
|
|
|
+ }
|
|
|
+
|
|
|
+ String[] parts = enData.split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)");
|
|
|
+ if(parts!=null && parts.length>=1){
|
|
|
+ endYm = Func.toInt(parts[0]);
|
|
|
}
|
|
|
}else{
|
|
|
- if(ym.indexOf("-")>=0){
|
|
|
- String[] split = ym.split("-");
|
|
|
+ String[] split = ym.split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)");
|
|
|
+ if(split!=null && split.length>=3){
|
|
|
startYm = Func.toInt(split[0]);
|
|
|
- endYm = Func.toInt(split[1]);
|
|
|
+ endYm = Func.toInt(split[2]);
|
|
|
+ }else{
|
|
|
+ startYm = Func.toInt(split[0]);
|
|
|
+ endYm = Func.toInt(split[0]);
|
|
|
}
|
|
|
}
|
|
|
- startYm = startYm + 2 ;
|
|
|
- endYm = endYm + 2 ;
|
|
|
- System.out.println(zrz + " " + wjtm + " " + rq + " " + ym);
|
|
|
+ startYm = basePage+startYm ;
|
|
|
+ endYm = basePage+endYm ;
|
|
|
+ dutyUser = zrz;
|
|
|
+ System.out.println("序号="+i+"--文件提名:"+wjtm +"--开始("+startYm+"-"+endYm+")---页数"+(endYm-startYm+1));
|
|
|
// 分解文件
|
|
|
String fmlUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/"+archiveId+"_cf_00"+i+".pdf";
|
|
|
getPdfByPage(startYm,endYm,fileUlr,fmlUrl);
|
|
|
- saveDataToMysql(fmlUrl,wjtm,taskSign.getId(),endYm-startYm+1);
|
|
|
+ saveDataToMysql(fmlUrl,wjtm,taskSign.getId(),endYm-startYm+1,i,zrz);
|
|
|
bkb = endYm ;
|
|
|
}
|
|
|
-
|
|
|
} else {
|
|
|
-
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
// 添加封面信息
|
|
|
String fmlUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/"+archiveId+"_fm_001.pdf";
|
|
|
getPdfByPage(1,1,fileUlr,fmlUrl);
|
|
|
- saveDataToMysql(fmlUrl,"封面",taskSign.getId(),1);
|
|
|
+ saveDataToMysql(fmlUrl,"封面",taskSign.getId(),1,-4,dutyUser);
|
|
|
|
|
|
// 卷内目录
|
|
|
String jnmuUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/"+archiveId+"_jnml_001.pdf";
|
|
|
- getPdfByPage(2,2,fileUlr,jnmuUrl);
|
|
|
- saveDataToMysql(jnmuUrl,"卷内目录",taskSign.getId(),1);
|
|
|
+ getPdfByPage(baseStart,basePage,fileUlr,jnmuUrl);
|
|
|
+ saveDataToMysql(jnmuUrl,"卷内目录",taskSign.getId(),1,-3,dutyUser);
|
|
|
|
|
|
// 卷内备考表
|
|
|
String jnbkbUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/"+archiveId+"_jnbkb_001.pdf";
|
|
|
getPdfByPage(bkb+1,bkb+1,fileUlr,jnbkbUrl);
|
|
|
- saveDataToMysql(jnbkbUrl,"卷内备考表",taskSign.getId(),1);
|
|
|
+ saveDataToMysql(jnbkbUrl,"卷内备考表",taskSign.getId(),1,100,dutyUser);
|
|
|
+
|
|
|
+ // 背脊表
|
|
|
+ String bjbUrl = FileUtils.getSysLocalFileUrl() + "archiveSplit/"+archiveId+"_beiji_001.pdf";
|
|
|
+ String bjbUrlPng = FileUtils.getSysLocalFileUrl() + "archiveSplit/"+archiveId+"_beiji_001.png";
|
|
|
+ getPdfByPage(bkb+2,bkb+2,fileUlr,bjbUrl);
|
|
|
+
|
|
|
+ File bgImgFile = new File(bjbUrl);
|
|
|
+ if (!bgImgFile.exists()) {
|
|
|
+ savePdfAsImage(1, bjbUrl, bjbUrlPng);
|
|
|
+ }
|
|
|
+ String state = OcrTitle(bjbUrl,"3");
|
|
|
+ if(state.equals("1")){
|
|
|
+ saveDataToMysql(jnbkbUrl,"背脊表",taskSign.getId(),1,101,dutyUser);
|
|
|
+ }
|
|
|
+ bgImgFile.delete();
|
|
|
+
|
|
|
|
|
|
// 修改任务状态
|
|
|
String updateSql = "update u_archives_split_info set status=3 where id=" + taskSign.getId();
|
|
@@ -272,7 +313,6 @@ public class ArchiveController {
|
|
|
jdbcTemplate.execute(taxkSql2);
|
|
|
|
|
|
// 修改完成情况
|
|
|
-
|
|
|
RedisTemplate.delete("splithtml-" + archiveId);
|
|
|
} catch (Exception e) {
|
|
|
throw new RuntimeException(e);
|
|
@@ -293,7 +333,6 @@ public class ArchiveController {
|
|
|
new InputStreamReader(process.getInputStream()));
|
|
|
String htmlUrl;
|
|
|
while ((htmlUrl = reader.readLine()) != null) {
|
|
|
- System.out.println("222" + htmlUrl);
|
|
|
if (htmlUrl.indexOf("html文件路径") >= 0 && htmlUrl.indexOf("_001.html") >= 0 && htmlUrl.indexOf("archiveSplit") >= 0) {
|
|
|
lasHhtmlUrl = htmlUrl.replace("html文件路径", "");
|
|
|
}
|
|
@@ -311,11 +350,7 @@ public class ArchiveController {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/* public static void main(String[] args) {
|
|
|
- String mingUrl = "/Users/hongchuangyanfa/Desktop/archiveSplit/1935941065903177728first_002.png";
|
|
|
- OcrTitle(mingUrl,"1");
|
|
|
- }*/
|
|
|
- public static String OcrTitle(String fileUrl, String type) {
|
|
|
+ public String OcrTitle(String fileUrl, String type) {
|
|
|
String lasHhtmlUrl = "";
|
|
|
try {
|
|
|
// 定义Python解释器路径和脚本路径
|
|
@@ -377,7 +412,7 @@ public class ArchiveController {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public void savePdfAsImage(int pageNum, String filePath, String outputPath) {
|
|
|
+ public static void savePdfAsImage(int pageNum, String filePath, String outputPath) {
|
|
|
try (InputStream inputStream = FileUtils.getInputStreamByUrl(filePath);
|
|
|
PDDocument document = PDDocument.load(inputStream)) {
|
|
|
|
|
@@ -410,7 +445,8 @@ public class ArchiveController {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public int saveDataToMysql(String upFileUrl,String fileName,String fileId,int filePage) {
|
|
|
+ public int saveDataToMysql(String upFileUrl,String fileName,String fileId,int filePage,int sort,String dutyUser) {
|
|
|
+
|
|
|
// 获取封面信息
|
|
|
long newPkId = SnowFlakeUtil.getId(); //主键Id
|
|
|
File fmfile = new File(upFileUrl);
|
|
@@ -423,13 +459,13 @@ public class ArchiveController {
|
|
|
" drawing_no,cite_change_number,certification_time,e_visa_file,node_ext_id,file_type,archive_id,origin_id,filming_time,filmingor_time,tag_id,pic_code,refer_code,film_code,width,height,ftime,utime,del_time,sort,box_name,box_number,is_auto_file,is_archive,page_num, " +
|
|
|
" file_size,source_type,is_element,pdf_page_url,fid,rectification,classify,m_wbs_tree_contract_p_key_id,u_image_classification_file_id,archive_file_storage_type,node_tree_structure,date_name,archive_file_stroage_type,out_id,sort_num " +
|
|
|
" ) " +
|
|
|
- " SELECT "+newPkId+",project_id,contract_id,node_id,file_number,'" + fileName + "',file_time,'" + FmPdfUrl + "','" + FmPdfUrl + "',"+filePage+",is_approval,is_certification,is_need_certification,duty_user,create_user,create_dept,create_time,update_user,update_time,status,is_deleted,sheet_type,sheet_source, " +
|
|
|
- " drawing_no,cite_change_number,certification_time,e_visa_file,node_ext_id,file_type,archive_id,origin_id,filming_time,filmingor_time,tag_id,pic_code,refer_code,film_code,width,height,ftime,utime,del_time,sort,box_name,box_number,is_auto_file,is_archive,page_num, " +
|
|
|
+ " SELECT "+newPkId+",project_id,contract_id,node_id,file_number,'" + fileName + "',file_time,'" + FmPdfUrl + "','" + FmPdfUrl + "',"+filePage+",is_approval,is_certification,is_need_certification,'"+dutyUser+"',create_user,create_dept,create_time,update_user,update_time,status,is_deleted,sheet_type,sheet_source, " +
|
|
|
+ " drawing_no,cite_change_number,certification_time,e_visa_file,node_ext_id,file_type,archive_id,origin_id,filming_time,filmingor_time,tag_id,pic_code,refer_code,film_code,width,height,ftime,utime,del_time,"+sort+",box_name,box_number,is_auto_file,is_archive,page_num, " +
|
|
|
" file_size,source_type,is_element,pdf_page_url,fid,rectification,classify,m_wbs_tree_contract_p_key_id,u_image_classification_file_id,archive_file_storage_type,node_tree_structure,date_name,archive_file_stroage_type,out_id,sort_num " +
|
|
|
" from u_archive_file where id=" + fileId;
|
|
|
System.out.println(fileName + "----" + sql);
|
|
|
jdbcTemplate.execute(sql);
|
|
|
- return 200;
|
|
|
+
|
|
|
} else {
|
|
|
// 检查一下oss是否启动
|
|
|
System.out.println("oss服务未启动,无法上传文件到oss");
|
|
@@ -438,5 +474,7 @@ public class ArchiveController {
|
|
|
}else{
|
|
|
return 404;
|
|
|
}
|
|
|
+ fmfile.delete();
|
|
|
+ return 200;
|
|
|
}
|
|
|
}
|