|
@@ -39,10 +39,7 @@ import lombok.AllArgsConstructor;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.apache.commons.lang.StringUtils;
|
|
import org.apache.commons.lang.StringUtils;
|
|
|
|
|
|
-import org.springblade.archive.dto.ArchiveWarningDTO;
|
|
|
|
-import org.springblade.archive.dto.FindAndReplaceDto;
|
|
|
|
-import org.springblade.archive.dto.JiLinQueryDto;
|
|
|
|
-import org.springblade.archive.dto.SaveApplyDTO;
|
|
|
|
|
|
+import org.springblade.archive.dto.*;
|
|
import org.springblade.archive.entity.*;
|
|
import org.springblade.archive.entity.*;
|
|
import org.springblade.archive.mapper.ArchiveConclusionMapper;
|
|
import org.springblade.archive.mapper.ArchiveConclusionMapper;
|
|
import org.springblade.archive.service.*;
|
|
import org.springblade.archive.service.*;
|
|
@@ -5342,17 +5339,25 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
|
|
|
|
@Override
|
|
@Override
|
|
@Async
|
|
@Async
|
|
- public boolean atuoOCR(List<Long> idsList) throws Exception {
|
|
|
|
|
|
+ public Boolean atuoOCR(List<Long> idsList) throws Exception {
|
|
String url="/mnt/sdc/AutoPdf/";
|
|
String url="/mnt/sdc/AutoPdf/";
|
|
//String url="D:\\AutoPdf\\";
|
|
//String url="D:\\AutoPdf\\";
|
|
|
|
+ //List<Long> idsList=Func.toLongList(ids);
|
|
List<ArchivesAuto> archivesAutoList = this.list(new LambdaQueryWrapper<ArchivesAuto>().in(ArchivesAuto::getId, idsList));
|
|
List<ArchivesAuto> archivesAutoList = this.list(new LambdaQueryWrapper<ArchivesAuto>().in(ArchivesAuto::getId, idsList));
|
|
for (ArchivesAuto auto : archivesAutoList) {
|
|
for (ArchivesAuto auto : archivesAutoList) {
|
|
|
|
+ if(auto.getOutUrl()==null||auto.getOutUrl().isEmpty()){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
String fileUrl=auto.getOutUrl().substring(0,auto.getOutUrl().indexOf("@"));
|
|
String fileUrl=auto.getOutUrl().substring(0,auto.getOutUrl().indexOf("@"));
|
|
String filePath=url+auto.getName()+".pdf";
|
|
String filePath=url+auto.getName()+".pdf";
|
|
|
|
+ System.out.println("开始保存:"+fileUrl);
|
|
Boolean b = FileUtils.saveInputStreamByUrl(fileUrl, filePath);
|
|
Boolean b = FileUtils.saveInputStreamByUrl(fileUrl, filePath);
|
|
|
|
+ System.out.println("保存完成:"+b);
|
|
try {
|
|
try {
|
|
if(b){
|
|
if(b){
|
|
|
|
+ System.out.println("开始识别:"+filePath);
|
|
List<String> list = extractTextFromPDF(filePath);
|
|
List<String> list = extractTextFromPDF(filePath);
|
|
|
|
+ System.out.println("识别完成:"+list);
|
|
if(!list.isEmpty()){
|
|
if(!list.isEmpty()){
|
|
StringBuilder fileName=new StringBuilder();
|
|
StringBuilder fileName=new StringBuilder();
|
|
for (String result : list) {
|
|
for (String result : list) {
|
|
@@ -5411,9 +5416,9 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
}
|
|
}
|
|
|
|
|
|
public List<String> extractTextFromPDF(String pdfFilePath) throws IOException, InterruptedException {
|
|
public List<String> extractTextFromPDF(String pdfFilePath) throws IOException, InterruptedException {
|
|
-// String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdfTextExtractorWindows.py";
|
|
|
|
-// String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
|
|
|
|
-
|
|
|
|
|
|
+ //String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdfTextExtractorWindows.py";
|
|
|
|
+ //String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
|
|
|
|
+ System.out.println("进入识别1");
|
|
String PYTHON_SCRIPT_PATH = "/www/wwwlogs/python/pdfTextExtractorWindows.py";
|
|
String PYTHON_SCRIPT_PATH = "/www/wwwlogs/python/pdfTextExtractorWindows.py";
|
|
String PYTHON_INTERPRETER = "python3";
|
|
String PYTHON_INTERPRETER = "python3";
|
|
String[] command = {
|
|
String[] command = {
|
|
@@ -5425,7 +5430,7 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
Process process = new ProcessBuilder(command)
|
|
Process process = new ProcessBuilder(command)
|
|
.redirectErrorStream(true)
|
|
.redirectErrorStream(true)
|
|
.start();
|
|
.start();
|
|
-
|
|
|
|
|
|
+ System.out.println("进入识别2");
|
|
// 读取Python输出
|
|
// 读取Python输出
|
|
StringBuilder output = new StringBuilder();
|
|
StringBuilder output = new StringBuilder();
|
|
try (InputStream inputStream = process.getInputStream();
|
|
try (InputStream inputStream = process.getInputStream();
|
|
@@ -5436,9 +5441,10 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
output.append(line);
|
|
output.append(line);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+ System.out.println("进入识别3");
|
|
int exitCode = process.waitFor();
|
|
int exitCode = process.waitFor();
|
|
if (exitCode != 0) {
|
|
if (exitCode != 0) {
|
|
|
|
+ System.out.println("进入识别4");
|
|
throw new RuntimeException("Python脚本执行失败,退出码: " + exitCode + ", 输出: " + output.toString());
|
|
throw new RuntimeException("Python脚本执行失败,退出码: " + exitCode + ", 输出: " + output.toString());
|
|
}
|
|
}
|
|
|
|
|
|
@@ -5447,22 +5453,25 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
// 找到JSON的起始位置(第一个'{')和结束位置(最后一个'}')
|
|
// 找到JSON的起始位置(第一个'{')和结束位置(最后一个'}')
|
|
int jsonStart = rawOutput.indexOf('{');
|
|
int jsonStart = rawOutput.indexOf('{');
|
|
int jsonEnd = rawOutput.lastIndexOf('}');
|
|
int jsonEnd = rawOutput.lastIndexOf('}');
|
|
|
|
+ System.out.println("进入识别5");
|
|
if (jsonStart == -1 || jsonEnd == -1 || jsonStart >= jsonEnd) {
|
|
if (jsonStart == -1 || jsonEnd == -1 || jsonStart >= jsonEnd) {
|
|
|
|
+ System.out.println("进入识别6");
|
|
throw new RuntimeException("无法提取有效的JSON结果,原始输出: " + rawOutput);
|
|
throw new RuntimeException("无法提取有效的JSON结果,原始输出: " + rawOutput);
|
|
}
|
|
}
|
|
// 截取纯JSON字符串
|
|
// 截取纯JSON字符串
|
|
String jsonStr = rawOutput.substring(jsonStart, jsonEnd + 1);
|
|
String jsonStr = rawOutput.substring(jsonStart, jsonEnd + 1);
|
|
-
|
|
|
|
|
|
+ System.out.println("进入识别7");
|
|
// 解析清理后的JSON
|
|
// 解析清理后的JSON
|
|
Gson gson = new Gson();
|
|
Gson gson = new Gson();
|
|
Type type = new TypeToken<Map<String, Object>>(){}.getType();
|
|
Type type = new TypeToken<Map<String, Object>>(){}.getType();
|
|
Map<String, Object> resultMap = gson.fromJson(jsonStr, type);
|
|
Map<String, Object> resultMap = gson.fromJson(jsonStr, type);
|
|
|
|
|
|
if (!"success".equals(resultMap.get("status"))) {
|
|
if (!"success".equals(resultMap.get("status"))) {
|
|
|
|
+ System.out.println("进入识别8");
|
|
String message = (String) resultMap.get("message");
|
|
String message = (String) resultMap.get("message");
|
|
throw new RuntimeException("处理PDF失败: " + (message != null ? message : "未知错误"));
|
|
throw new RuntimeException("处理PDF失败: " + (message != null ? message : "未知错误"));
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+ System.out.println("进入识别9");
|
|
Type listType = new TypeToken<List<String>>(){}.getType();
|
|
Type listType = new TypeToken<List<String>>(){}.getType();
|
|
return gson.fromJson(gson.toJson(resultMap.get("lines")), listType);
|
|
return gson.fromJson(gson.toJson(resultMap.get("lines")), listType);
|
|
}
|
|
}
|