1 сар өмнө · 607f0dae9d
--- a/blade-service/blade-archive/src/main/java/org/springblade/archive/controller/ArchivesAutoController.java
+++ b/blade-service/blade-archive/src/main/java/org/springblade/archive/controller/ArchivesAutoController.java
@@ -277,6 +277,13 @@ public class ArchivesAutoController extends BladeController {
 
				         archivesAutoService.fileNumberFlush(projectId,contractId,ids,isArchive,startNumber);
			
 
				         return R.success("正在刷新档号中,请稍后刷新");
			
 
				     }
			
 
				+	@GetMapping("/atuoOCR")
			
 
				+	@ApiOperationSupport(order = 5)
			
 
				+	@ApiOperation(value = "档案自动识别")
			
 
				+	public R atuoOCR(String ids) throws Exception {
			
 
				+		archivesAutoService.atuoOCR(ids);
			
 
				+		return R.success("正在识别中");
			
 
				+	}
			
 
				 
			
 
				 
			
 
				 
			
--- a/blade-service/blade-archive/src/main/java/org/springblade/archive/service/IArchivesAutoService.java
+++ b/blade-service/blade-archive/src/main/java/org/springblade/archive/service/IArchivesAutoService.java
@@ -187,4 +187,6 @@ public interface IArchivesAutoService extends BaseService<ArchivesAuto> {
 
				 	void reomoveArchiveAndFile(List<Long> archiveIds);
			
 
				 
			
 
				     void reCreateArchiveAuto1(String ids);
			
 
				+
			
 
				+	boolean atuoOCR(String ids) throws Exception;
			
 
				 }
			
--- a/blade-service/blade-archive/src/main/java/org/springblade/archive/service/impl/ArchivesAutoServiceImpl.java
+++ b/blade-service/blade-archive/src/main/java/org/springblade/archive/service/impl/ArchivesAutoServiceImpl.java
@@ -19,6 +19,9 @@ package org.springblade.archive.service.impl;
 
				 import com.alibaba.fastjson.JSON;
			
 
				 import com.alibaba.fastjson.JSONArray;
			
 
				 import com.alibaba.fastjson.JSONObject;
			
 
				+import com.google.gson.Gson;
			
 
				+import com.google.gson.reflect.TypeToken;
			
 
				+import java.lang.reflect.Type;
			
 
				 
			
 
				 import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
			
 
				 import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
			
@@ -96,7 +99,9 @@ import java.nio.file.Files;
 
				 import java.nio.file.Path;
			
 
				 import java.nio.file.Paths;
			
 
				 import java.rmi.ServerException;
			
 
				+import java.time.LocalDate;
			
 
				 import java.time.LocalDateTime;
			
 
				+import java.time.format.DateTimeFormatter;
			
 
				 import java.util.*;
			
 
				 import java.util.List;
			
 
				 import java.util.concurrent.ExecutorService;
			
@@ -5254,6 +5259,132 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
 
				 				.mapToInt(file -> file.getFilePage() != null ? file.getFilePage() : 0)
			
 
				 				.sum();
			
 
				 	}
			
 
				+
			
 
				+	@Override
			
 
				+	@Async
			
 
				+	public boolean atuoOCR(String ids) throws Exception {
			
 
				+		//String url="/mnt/sdc/AutoPdf/";
			
 
				+		String url="D:\\AutoPdf\\";
			
 
				+		List<Long> idsList = Func.toLongList(ids);
			
 
				+		List<ArchivesAuto> archivesAutoList = this.list(new LambdaQueryWrapper<ArchivesAuto>().in(ArchivesAuto::getId, idsList));
			
 
				+		for (ArchivesAuto auto : archivesAutoList) {
			
 
				+			String fileUrl=auto.getOutUrl().substring(0,auto.getOutUrl().indexOf("@"));
			
 
				+			String filePath=url+auto.getName()+".pdf";
			
 
				+			Boolean b = FileUtils.saveInputStreamByUrl(fileUrl, filePath);
			
 
				+			try {
			
 
				+				if(b){
			
 
				+					List<String> list = extractTextFromPDF(filePath);
			
 
				+					if(!list.isEmpty()){
			
 
				+						StringBuilder fileName=new StringBuilder();
			
 
				+						for (String result : list) {
			
 
				+							if(result.contains("档号")){
			
 
				+								String fileNum=result.replace("档号","").replace(":","").replace("：","");
			
 
				+								auto.setFileNumber(fileNum);
			
 
				+							}else if(result.contains("立卷单位")){
			
 
				+								String unit=result.replace("立卷单位","").replace(":","").replace("：","");
			
 
				+								auto.setUnit(unit);
			
 
				+							} else if (result.contains("起止日期")) {
			
 
				+								String time=result.replace("起止日期","").replace(":","").replace("：","");
			
 
				+								if(result.contains("~")){
			
 
				+									LocalDateTime[] localDateTimes = convertDateRange(time, "~");
			
 
				+									auto.setStartDate(localDateTimes[0]);
			
 
				+									auto.setEndDate(localDateTimes[1]);
			
 
				+								} else if (result.contains("-")) {
			
 
				+									LocalDateTime[] localDateTimes = convertDateRange(time, "-");
			
 
				+									auto.setStartDate(localDateTimes[0]);
			
 
				+									auto.setEndDate(localDateTimes[1]);
			
 
				+								}
			
 
				+							} else if (result.contains("保管期限")||result.contains("保管限期")) {
			
 
				+								String storageTime=result.replace("保管期限","").replace("保管限期","").replace(":","").replace("：","");
			
 
				+								auto.setStorageTime(storageTime);
			
 
				+							} else if (result.contains("密1")||result.contains("密级")) {
			
 
				+								String secretLevel=result.replace("密1","").replace("密级","");
			
 
				+								auto.setSecretLevel(secretLevel);
			
 
				+							} else {
			
 
				+								fileName.append(result);
			
 
				+							}
			
 
				+						}
			
 
				+						auto.setName(fileName.toString());
			
 
				+					}
			
 
				+				}
			
 
				+			}catch (Exception e){
			
 
				+
			
 
				+			}finally {
			
 
				+				FileUtils.removeFile(filePath);
			
 
				+			}
			
 
				+		}
			
 
				+		this.updateBatchById(archivesAutoList);
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	public static LocalDateTime[] convertDateRange(String dateRange,String split) {
			
 
				+		String[] dates = dateRange.split(split);
			
 
				+
			
 
				+		DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMdd");
			
 
				+
			
 
				+		LocalDate startLocalDate = LocalDate.parse(dates[0], formatter);
			
 
				+		LocalDate endLocalDate = LocalDate.parse(dates[1], formatter);
			
 
				+
			
 
				+		LocalDateTime startDateTime = startLocalDate.atStartOfDay(); // 00:00:00
			
 
				+		LocalDateTime endDateTime = endLocalDate.atStartOfDay();; // 00:00:00
			
 
				+
			
 
				+		return new LocalDateTime[]{startDateTime, endDateTime};
			
 
				+	}
			
 
				+
			
 
				+	public List<String> extractTextFromPDF(String pdfFilePath) throws IOException, InterruptedException {
			
 
				+		String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdfTextExtractorWindows.py";
			
 
				+		String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
			
 
				+
			
 
				+		String[] command = {
			
 
				+				PYTHON_INTERPRETER,
			
 
				+				PYTHON_SCRIPT_PATH,
			
 
				+				pdfFilePath
			
 
				+		};
			
 
				+
			
 
				+		Process process = new ProcessBuilder(command)
			
 
				+				.redirectErrorStream(true)
			
 
				+				.start();
			
 
				+
			
 
				+		// 读取Python输出
			
 
				+		StringBuilder output = new StringBuilder();
			
 
				+		try (InputStream inputStream = process.getInputStream();
			
 
				+			 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
			
 
				+
			
 
				+			String line;
			
 
				+			while ((line = reader.readLine()) != null) {
			
 
				+				output.append(line);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		int exitCode = process.waitFor();
			
 
				+		if (exitCode != 0) {
			
 
				+			throw new RuntimeException("Python脚本执行失败，退出码: " + exitCode + ", 输出: " + output.toString());
			
 
				+		}
			
 
				+
			
 
				+		// -------------------------- 关键修改：提取纯JSON部分 --------------------------
			
 
				+		String rawOutput = output.toString();
			
 
				+		// 找到JSON的起始位置（第一个'{'）和结束位置（最后一个'}'）
			
 
				+		int jsonStart = rawOutput.indexOf('{');
			
 
				+		int jsonEnd = rawOutput.lastIndexOf('}');
			
 
				+		if (jsonStart == -1 || jsonEnd == -1 || jsonStart >= jsonEnd) {
			
 
				+			throw new RuntimeException("无法提取有效的JSON结果，原始输出: " + rawOutput);
			
 
				+		}
			
 
				+		// 截取纯JSON字符串
			
 
				+		String jsonStr = rawOutput.substring(jsonStart, jsonEnd + 1);
			
 
				+
			
 
				+		// 解析清理后的JSON
			
 
				+		Gson gson = new Gson();
			
 
				+		Type type = new TypeToken<Map<String, Object>>(){}.getType();
			
 
				+		Map<String, Object> resultMap = gson.fromJson(jsonStr, type);
			
 
				+
			
 
				+		if (!"success".equals(resultMap.get("status"))) {
			
 
				+			String message = (String) resultMap.get("message");
			
 
				+			throw new RuntimeException("处理PDF失败: " + (message != null ? message : "未知错误"));
			
 
				+		}
			
 
				+
			
 
				+		Type listType = new TypeToken<List<String>>(){}.getType();
			
 
				+		return gson.fromJson(gson.toJson(resultMap.get("lines")), listType);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 
			
--- a/blade-service/blade-archive/src/main/java/org/springblade/archive/utils/FileUtils.java
+++ b/blade-service/blade-archive/src/main/java/org/springblade/archive/utils/FileUtils.java
@@ -897,4 +897,35 @@ public class FileUtils {
 
				         return "";
			
 
				     }
			
 
				 
			
 
				+    // 获取OSS文件流并且保存到本地
			
 
				+    public static Boolean saveInputStreamByUrl(String fileUrl,String filePath) throws Exception {
			
 
				+        InputStream fileInputStream = CommonUtil.getOSSInputStream(fileUrl);
			
 
				+        if(fileInputStream==null){
			
 
				+            return false;
			
 
				+        }
			
 
				+        try (FileOutputStream outputStream = new FileOutputStream(filePath)) {
			
 
				+            byte[] buffer = new byte[1024];
			
 
				+            int length;
			
 
				+
			
 
				+            while ((length = fileInputStream.read(buffer)) > 0) {
			
 
				+                outputStream.write(buffer, 0, length);
			
 
				+            }
			
 
				+            return true;
			
 
				+        } catch (IOException e) {
			
 
				+            e.printStackTrace();
			
 
				+            System.err.println("文件下载失败: " + e.getMessage());
			
 
				+            return false;
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    public static Boolean removeFile(String filePath){
			
 
				+        File file=new File(filePath);
			
 
				+        if(file.exists()){
			
 
				+           return file.delete();
			
 
				+        }
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				 }