6 өдөр өмнө · b6e2fb90b1
--- a/blade-service/blade-archive/src/main/java/org/springblade/archive/service/impl/ArchivesAutoServiceImpl.java
+++ b/blade-service/blade-archive/src/main/java/org/springblade/archive/service/impl/ArchivesAutoServiceImpl.java
@@ -5378,11 +5378,12 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
 
				 				.sum();
			
 
				 	}
			
 
				 
			
 
				+
			
 
				 	@Override
			
 
				 	@Async
			
 
				 	public Boolean atuoOCR(List<Long> idsList) throws Exception {
			
 
				-		//String url="/mnt/sdc/AutoPdf/";
			
 
				-		String url="D:\\AutoPdf\\";
			
 
				+		String url="/mnt/sdc/AutoPdf/";
			
 
				+		//String url="D:\\AutoPdf\\";
			
 
				 		List<ArchivesAuto> archivesAutoList = this.list(new LambdaQueryWrapper<ArchivesAuto>().in(ArchivesAuto::getId, idsList));
			
 
				 		this.update(Wrappers.<ArchivesAuto>lambdaUpdate().set(ArchivesAuto::getColourStatus, 2).in(ArchivesAuto::getId, idsList));
			
 
				 		for (ArchivesAuto auto : archivesAutoList) {
			
@@ -5403,46 +5404,61 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
 
				 				if(b){
			
 
				 					System.out.println("开始识别："+filePath);
			
 
				 					List<String> list = extractTextFromPDF(filePath);
			
 
				-					System.out.println("识别完成："+list);
			
 
				 					if(!list.isEmpty()){
			
 
				 						StringBuilder fileName=new StringBuilder();
			
 
				 						for (String result : list) {
			
 
				+							System.out.println("识别结果："+result);
			
 
				+							result=result.replaceAll("\\s+", "");
			
 
				 							if(result.contains("档号")){
			
 
				 								String fileNum=result.replace("档号","").replace(":","").replace("：","");
			
 
				 								auto.setFileNumber(fileNum);
			
 
				 							}else if(result.contains("立卷单位")){
			
 
				-								String unit=result.replace("立卷单位","").replace(":","").replace("：","");
			
 
				+								String unit=result.replace("立卷单位","").replace(":","").replace("：","").replaceAll("_","").replace("密级","").replace("级密","");
			
 
				 								auto.setUnit(unit);
			
 
				-							} else if (result.contains("起止日期")) {
			
 
				-								String time=result.replace("起止日期","").replace(":","").replace("：","");
			
 
				+							}else if (result.contains("起止日期")) {
			
 
				+								String time=result.replace("起止日期","").replace(":","").replace("：","").replaceAll("_","");
			
 
				 								if(result.contains("~")){
			
 
				-									String[] split = time.split("~");
			
 
				-									auto.setStartDate(split[0]);
			
 
				-									auto.setEndDate(split[1]);
			
 
				+									String[] localDateTimes = result.split("~");
			
 
				+									auto.setStartDate(localDateTimes[0]);
			
 
				+									auto.setEndDate(localDateTimes[1]);
			
 
				 								} else if (result.contains("-")) {
			
 
				-									String[] split = time.split("-");
			
 
				-									auto.setStartDate(split[0]);
			
 
				-									auto.setEndDate(split[1]);
			
 
				+									String[] localDateTimes = result.split("-");
			
 
				+									auto.setStartDate(localDateTimes[0]);
			
 
				+									auto.setEndDate(localDateTimes[1]);
			
 
				 								} else if (result.contains("～")) {
			
 
				-									String[] split = time.split("～");
			
 
				-									auto.setStartDate(split[0]);
			
 
				-									auto.setEndDate(split[1]);
			
 
				+									String[] localDateTimes = result.split("～");
			
 
				+									auto.setStartDate(localDateTimes[0]);
			
 
				+									auto.setEndDate(localDateTimes[1]);
			
 
				 								}
			
 
				 							} else if (result.contains("保管期限")||result.contains("保管限期")) {
			
 
				-								String storageTime=result.replace("保管期限","").replace("保管限期","").replace(":","").replace("：","");
			
 
				-								auto.setStorageTime(storageTime);
			
 
				+								String storageTime=result.replace("保管期限","").replace("保管限期","").replace(":","").replace("：","").replaceAll("_","");
			
 
				+								if(StringUtils.isNotEmpty(storageTime)){
			
 
				+									if("10年".equals(storageTime)){
			
 
				+										auto.setStorageTime("1");
			
 
				+									} else if("30年".equals(storageTime)){
			
 
				+										auto.setStorageTime("2");
			
 
				+									} else{
			
 
				+										auto.setStorageTime("3");
			
 
				+									}
			
 
				+								}
			
 
				 							} else if (result.contains("密1")||result.contains("密级")) {
			
 
				-								String secretLevel=result.replace("密1","").replace("密级","");
			
 
				+								String secretLevel=result.replace("密1","").replace("密级","").replaceAll("_","");
			
 
				 								auto.setSecretLevel(secretLevel);
			
 
				 							} else {
			
 
				-								fileName.append(result);
			
 
				+								if(StringUtils.isNotEmpty(result)){
			
 
				+									fileName.append(result);
			
 
				+									System.out.println("文件题名识别结果："+fileName);
			
 
				+								}
			
 
				 							}
			
 
				 						}
			
 
				-						auto.setName(fileName.toString());
			
 
				+						System.out.println("文件题名最终结果：===================="+fileName);
			
 
				+						if(StringUtils.isNotEmpty(fileName.toString())){
			
 
				+							auto.setName(fileName.toString().replace("密级","").replace("级密",""));
			
 
				+						}
			
 
				 					}
			
 
				 				}
			
 
				 			}catch (Exception e){
			
 
				-
			
 
				+				e.printStackTrace();
			
 
				 			}finally {
			
 
				 				FileUtils.removeFile(filePath);
			
 
				 				String updateSql="update u_archives_auto set colour_status=1 where id="+auto.getId();
			
@@ -5452,105 +5468,65 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
 
				 		this.updateBatchById(archivesAutoList);
			
 
				 		return true;
			
 
				 	}
			
 
				-
			
 
				-	public static LocalDateTime[] convertDateRange(String dateRange,String split) {
			
 
				-		String[] dates = dateRange.split(split);
			
 
				-
			
 
				-		DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMdd");
			
 
				-
			
 
				-		LocalDate startLocalDate = LocalDate.parse(dates[0], formatter);
			
 
				-		LocalDate endLocalDate = LocalDate.parse(dates[1], formatter);
			
 
				-
			
 
				-		LocalDateTime startDateTime = startLocalDate.atStartOfDay(); // 00:00:00
			
 
				-		LocalDateTime endDateTime = endLocalDate.atStartOfDay();; // 00:00:00
			
 
				-
			
 
				-		return new LocalDateTime[]{startDateTime, endDateTime};
			
 
				-	}
			
 
				-	// 自定义实体类（与 JSON 结构匹配）
			
 
				-	static class OcrResponse {
			
 
				-		private String status;
			
 
				-		private String message;
			
 
				-		private List<String> lines;
			
 
				-
			
 
				-		public String getStatus() { return status; }
			
 
				-		public void setStatus(String status) { this.status = status; }
			
 
				-		public String getMessage() { return message; }
			
 
				-		public void setMessage(String message) { this.message = message; }
			
 
				-		public List<String> getLines() { return lines; }
			
 
				-		public void setLines(List<String> lines) { this.lines = lines; }
			
 
				-	}
			
 
				-
			
 
				 	public List<String> extractTextFromPDF(String pdfFilePath) throws IOException, InterruptedException {
			
 
				-		// 1. 配置路径（确保正确）
			
 
				-		String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdf.py";
			
 
				-		String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
			
 
				+		//String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdfTextExtractorWindows.py";
			
 
				+		//String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
			
 
				+		System.out.println("进入识别1");
			
 
				+		String PYTHON_SCRIPT_PATH = "/www/wwwlogs/python/pdfTextExtractorWindows.py";
			
 
				+		String PYTHON_INTERPRETER = "python3";
			
 
				+		String[] command = {
			
 
				+				PYTHON_INTERPRETER,
			
 
				+				PYTHON_SCRIPT_PATH,
			
 
				+				pdfFilePath
			
 
				+		};
			
 
				 
			
 
				-		// 2. 构建命令
			
 
				-		String[] command = {PYTHON_INTERPRETER, PYTHON_SCRIPT_PATH, pdfFilePath};
			
 
				-
			
 
				-		// 3. 执行进程（分离输出流）
			
 
				 		Process process = new ProcessBuilder(command)
			
 
				-				.redirectErrorStream(false)
			
 
				+				.redirectErrorStream(true)
			
 
				 				.start();
			
 
				+		System.out.println("进入识别2");
			
 
				+		// 读取Python输出
			
 
				+		StringBuilder output = new StringBuilder();
			
 
				+		try (InputStream inputStream = process.getInputStream();
			
 
				+			 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
			
 
				 
			
 
				-		// 4. 读取 JSON 结果（stdout）
			
 
				-		StringBuilder jsonOutput = new StringBuilder();
			
 
				-		try (BufferedReader reader = new BufferedReader(
			
 
				-				new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
			
 
				 			String line;
			
 
				 			while ((line = reader.readLine()) != null) {
			
 
				-				jsonOutput.append(line.trim()); // 去除多余空格/换行
			
 
				+				output.append(line);
			
 
				 			}
			
 
				 		}
			
 
				-
			
 
				-		// 5. 读取错误日志（stderr，用于调试）
			
 
				-		StringBuilder errorOutput = new StringBuilder();
			
 
				-		try (BufferedReader errorReader = new BufferedReader(
			
 
				-				new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
			
 
				-			String line;
			
 
				-			while ((line = errorReader.readLine()) != null) {
			
 
				-				errorOutput.append(line).append("\n");
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		// 6. 检查进程退出码
			
 
				+		System.out.println("进入识别3");
			
 
				 		int exitCode = process.waitFor();
			
 
				 		if (exitCode != 0) {
			
 
				-			throw new RuntimeException(
			
 
				-					"Python脚本执行失败（退出码：" + exitCode + "）\n" +
			
 
				-							"错误日志：" + errorOutput.toString() + "\n" +
			
 
				-							"标准输出：" + jsonOutput.toString()
			
 
				-			);
			
 
				-		}
			
 
				-
			
 
				-		// 7. 解析 JSON（核心修复：用实体类直接解析）
			
 
				-		String jsonStr = jsonOutput.toString();
			
 
				-		if (jsonStr.isEmpty()) {
			
 
				-			throw new RuntimeException("Python脚本未输出任何结果（JSON 为空）");
			
 
				-		}
			
 
				-
			
 
				-		// 提取纯 JSON 字符串（过滤可能的前缀日志）
			
 
				-		int jsonStart = jsonStr.indexOf('{');
			
 
				-		int jsonEnd = jsonStr.lastIndexOf('}');
			
 
				-		if (jsonStart == -1 || jsonEnd == -1) {
			
 
				-			throw new RuntimeException(
			
 
				-					"无法提取JSON结果，原始输出：" + jsonStr + "\n" +
			
 
				-							"Python错误日志：" + errorOutput.toString()
			
 
				-			);
			
 
				-		}
			
 
				-		jsonStr = jsonStr.substring(jsonStart, jsonEnd + 1);
			
 
				-
			
 
				-		// 直接解析为实体类，避免 Map 转换问题
			
 
				+			System.out.println("进入识别4");
			
 
				+			throw new RuntimeException("Python脚本执行失败，退出码: " + exitCode + ", 输出: " + output.toString());
			
 
				+		}
			
 
				+
			
 
				+		// -------------------------- 关键修改：提取纯JSON部分 --------------------------
			
 
				+		String rawOutput = output.toString();
			
 
				+		// 找到JSON的起始位置（第一个'{'）和结束位置（最后一个'}'）
			
 
				+		int jsonStart = rawOutput.indexOf('{');
			
 
				+		int jsonEnd = rawOutput.lastIndexOf('}');
			
 
				+		System.out.println("进入识别5");
			
 
				+		if (jsonStart == -1 || jsonEnd == -1 || jsonStart >= jsonEnd) {
			
 
				+			System.out.println("进入识别6");
			
 
				+			throw new RuntimeException("无法提取有效的JSON结果，原始输出: " + rawOutput);
			
 
				+		}
			
 
				+		// 截取纯JSON字符串
			
 
				+		String jsonStr = rawOutput.substring(jsonStart, jsonEnd + 1);
			
 
				+		System.out.println("进入识别7");
			
 
				+		// 解析清理后的JSON
			
 
				 		Gson gson = new Gson();
			
 
				-		OcrResponse ocrResponse = gson.fromJson(jsonStr, OcrResponse.class);
			
 
				+		Type type = new TypeToken<Map<String, Object>>(){}.getType();
			
 
				+		Map<String, Object> resultMap = gson.fromJson(jsonStr, type);
			
 
				 
			
 
				-		// 8. 校验结果状态
			
 
				-		if (!"success".equals(ocrResponse.getStatus())) {
			
 
				-			throw new RuntimeException("PDF处理失败: " + ocrResponse.getMessage() + "\nPython日志：" + errorOutput.toString());
			
 
				+		if (!"success".equals(resultMap.get("status"))) {
			
 
				+			System.out.println("进入识别8");
			
 
				+			String message = (String) resultMap.get("message");
			
 
				+			throw new RuntimeException("处理PDF失败: " + (message != null ? message : "未知错误"));
			
 
				 		}
			
 
				-
			
 
				-		// 9. 返回 List<String>（直接从实体类获取）
			
 
				-		return ocrResponse.getLines();
			
 
				+		System.out.println("进入识别9");
			
 
				+		Type listType = new TypeToken<List<String>>(){}.getType();
			
 
				+		return gson.fromJson(gson.toJson(resultMap.get("lines")), listType);
			
 
				 	}
			
 
				 
			
 
				 	@Scheduled(fixedDelay = 1000 * 60 * 10)