فهرست منبع

档案自动识别

cr 1 هفته پیش
والد
کامیت
607f0dae9d

+ 7 - 0
blade-service/blade-archive/src/main/java/org/springblade/archive/controller/ArchivesAutoController.java

@@ -277,6 +277,13 @@ public class ArchivesAutoController extends BladeController {
         archivesAutoService.fileNumberFlush(projectId,contractId,ids,isArchive,startNumber);
         return R.success("正在刷新档号中,请稍后刷新");
     }
+	@GetMapping("/atuoOCR")
+	@ApiOperationSupport(order = 5)
+	@ApiOperation(value = "档案自动识别")
+	public R atuoOCR(String ids) throws Exception {
+		archivesAutoService.atuoOCR(ids);
+		return R.success("正在识别中");
+	}
 
 
 

+ 2 - 0
blade-service/blade-archive/src/main/java/org/springblade/archive/service/IArchivesAutoService.java

@@ -187,4 +187,6 @@ public interface IArchivesAutoService extends BaseService<ArchivesAuto> {
 	void reomoveArchiveAndFile(List<Long> archiveIds);
 
     void reCreateArchiveAuto1(String ids);
+
+	boolean atuoOCR(String ids) throws Exception;
 }

+ 131 - 0
blade-service/blade-archive/src/main/java/org/springblade/archive/service/impl/ArchivesAutoServiceImpl.java

@@ -19,6 +19,9 @@ package org.springblade.archive.service.impl;
 import com.alibaba.fastjson.JSON;
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
+import com.google.gson.Gson;
+import com.google.gson.reflect.TypeToken;
+import java.lang.reflect.Type;
 
 import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
 import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
@@ -96,7 +99,9 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.rmi.ServerException;
+import java.time.LocalDate;
 import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
 import java.util.*;
 import java.util.List;
 import java.util.concurrent.ExecutorService;
@@ -5254,6 +5259,132 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
 				.mapToInt(file -> file.getFilePage() != null ? file.getFilePage() : 0)
 				.sum();
 	}
+
+	@Override
+	@Async
+	public boolean atuoOCR(String ids) throws Exception {
+		//String url="/mnt/sdc/AutoPdf/";
+		String url="D:\\AutoPdf\\";
+		List<Long> idsList = Func.toLongList(ids);
+		List<ArchivesAuto> archivesAutoList = this.list(new LambdaQueryWrapper<ArchivesAuto>().in(ArchivesAuto::getId, idsList));
+		for (ArchivesAuto auto : archivesAutoList) {
+			String fileUrl=auto.getOutUrl().substring(0,auto.getOutUrl().indexOf("@"));
+			String filePath=url+auto.getName()+".pdf";
+			Boolean b = FileUtils.saveInputStreamByUrl(fileUrl, filePath);
+			try {
+				if(b){
+					List<String> list = extractTextFromPDF(filePath);
+					if(!list.isEmpty()){
+						StringBuilder fileName=new StringBuilder();
+						for (String result : list) {
+							if(result.contains("档号")){
+								String fileNum=result.replace("档号","").replace(":","").replace(":","");
+								auto.setFileNumber(fileNum);
+							}else if(result.contains("立卷单位")){
+								String unit=result.replace("立卷单位","").replace(":","").replace(":","");
+								auto.setUnit(unit);
+							} else if (result.contains("起止日期")) {
+								String time=result.replace("起止日期","").replace(":","").replace(":","");
+								if(result.contains("~")){
+									LocalDateTime[] localDateTimes = convertDateRange(time, "~");
+									auto.setStartDate(localDateTimes[0]);
+									auto.setEndDate(localDateTimes[1]);
+								} else if (result.contains("-")) {
+									LocalDateTime[] localDateTimes = convertDateRange(time, "-");
+									auto.setStartDate(localDateTimes[0]);
+									auto.setEndDate(localDateTimes[1]);
+								}
+							} else if (result.contains("保管期限")||result.contains("保管限期")) {
+								String storageTime=result.replace("保管期限","").replace("保管限期","").replace(":","").replace(":","");
+								auto.setStorageTime(storageTime);
+							} else if (result.contains("密1")||result.contains("密级")) {
+								String secretLevel=result.replace("密1","").replace("密级","");
+								auto.setSecretLevel(secretLevel);
+							} else {
+								fileName.append(result);
+							}
+						}
+						auto.setName(fileName.toString());
+					}
+				}
+			}catch (Exception e){
+
+			}finally {
+				FileUtils.removeFile(filePath);
+			}
+		}
+		this.updateBatchById(archivesAutoList);
+		return true;
+	}
+
+	public static LocalDateTime[] convertDateRange(String dateRange,String split) {
+		String[] dates = dateRange.split(split);
+
+		DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMdd");
+
+		LocalDate startLocalDate = LocalDate.parse(dates[0], formatter);
+		LocalDate endLocalDate = LocalDate.parse(dates[1], formatter);
+
+		LocalDateTime startDateTime = startLocalDate.atStartOfDay(); // 00:00:00
+		LocalDateTime endDateTime = endLocalDate.atStartOfDay();; // 00:00:00
+
+		return new LocalDateTime[]{startDateTime, endDateTime};
+	}
+
+	public List<String> extractTextFromPDF(String pdfFilePath) throws IOException, InterruptedException {
+		String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdfTextExtractorWindows.py";
+		String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
+
+		String[] command = {
+				PYTHON_INTERPRETER,
+				PYTHON_SCRIPT_PATH,
+				pdfFilePath
+		};
+
+		Process process = new ProcessBuilder(command)
+				.redirectErrorStream(true)
+				.start();
+
+		// 读取Python输出
+		StringBuilder output = new StringBuilder();
+		try (InputStream inputStream = process.getInputStream();
+			 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
+
+			String line;
+			while ((line = reader.readLine()) != null) {
+				output.append(line);
+			}
+		}
+
+		int exitCode = process.waitFor();
+		if (exitCode != 0) {
+			throw new RuntimeException("Python脚本执行失败,退出码: " + exitCode + ", 输出: " + output.toString());
+		}
+
+		// -------------------------- 关键修改:提取纯JSON部分 --------------------------
+		String rawOutput = output.toString();
+		// 找到JSON的起始位置(第一个'{')和结束位置(最后一个'}')
+		int jsonStart = rawOutput.indexOf('{');
+		int jsonEnd = rawOutput.lastIndexOf('}');
+		if (jsonStart == -1 || jsonEnd == -1 || jsonStart >= jsonEnd) {
+			throw new RuntimeException("无法提取有效的JSON结果,原始输出: " + rawOutput);
+		}
+		// 截取纯JSON字符串
+		String jsonStr = rawOutput.substring(jsonStart, jsonEnd + 1);
+
+		// 解析清理后的JSON
+		Gson gson = new Gson();
+		Type type = new TypeToken<Map<String, Object>>(){}.getType();
+		Map<String, Object> resultMap = gson.fromJson(jsonStr, type);
+
+		if (!"success".equals(resultMap.get("status"))) {
+			String message = (String) resultMap.get("message");
+			throw new RuntimeException("处理PDF失败: " + (message != null ? message : "未知错误"));
+		}
+
+		Type listType = new TypeToken<List<String>>(){}.getType();
+		return gson.fromJson(gson.toJson(resultMap.get("lines")), listType);
+	}
 }
 
 

+ 31 - 0
blade-service/blade-archive/src/main/java/org/springblade/archive/utils/FileUtils.java

@@ -897,4 +897,35 @@ public class FileUtils {
         return "";
     }
 
+    // 获取OSS文件流并且保存到本地
+    public static Boolean saveInputStreamByUrl(String fileUrl,String filePath) throws Exception {
+        InputStream fileInputStream = CommonUtil.getOSSInputStream(fileUrl);
+        if(fileInputStream==null){
+            return false;
+        }
+        try (FileOutputStream outputStream = new FileOutputStream(filePath)) {
+            byte[] buffer = new byte[1024];
+            int length;
+
+            while ((length = fileInputStream.read(buffer)) > 0) {
+                outputStream.write(buffer, 0, length);
+            }
+            return true;
+        } catch (IOException e) {
+            e.printStackTrace();
+            System.err.println("文件下载失败: " + e.getMessage());
+            return false;
+        }
+
+    }
+
+    public static Boolean removeFile(String filePath){
+        File file=new File(filePath);
+        if(file.exists()){
+           return file.delete();
+        }
+        return true;
+    }
+
+
 }