|
|
@@ -5412,27 +5412,17 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
|
if(result.contains("档号")){
|
|
|
String fileNum=result.replace("档号","").replace(":","").replace(":","");
|
|
|
auto.setFileNumber(fileNum);
|
|
|
- }else if(result.contains("立卷单位")){
|
|
|
- String unit=result.replace("立卷单位","").replace(":","").replace(":","").replaceAll("_","").replace("密级","").replace("级密","");
|
|
|
+ }else if(result.contains("立卷单位")||result.contains("立港单位")){
|
|
|
+ String unit=result.replace("立卷单位","").replace("立港单位","").replace(":","").replace(":","").replaceAll("_","").replace("密级","").replace("级密","");
|
|
|
if(unit.startsWith(",")||unit.startsWith(".")||unit.startsWith("。")||unit.startsWith("——")||unit.startsWith("-")||unit.startsWith(",")){
|
|
|
unit=unit.substring(1);
|
|
|
}
|
|
|
auto.setUnit(unit);
|
|
|
}else if (result.contains("起止日期")) {
|
|
|
String time=result.replace("起止日期","").replace(":","").replace(":","").replaceAll("_","");
|
|
|
- if(result.contains("~")){
|
|
|
- String[] localDateTimes = time.split("~");
|
|
|
- auto.setStartDate(localDateTimes[0]);
|
|
|
- auto.setEndDate(localDateTimes[1]);
|
|
|
- } else if (result.contains("-")) {
|
|
|
- String[] localDateTimes = time.split("-");
|
|
|
- auto.setStartDate(localDateTimes[0]);
|
|
|
- auto.setEndDate(localDateTimes[1]);
|
|
|
- } else if (result.contains("~")) {
|
|
|
- String[] localDateTimes = time.split("~");
|
|
|
- auto.setStartDate(localDateTimes[0]);
|
|
|
- auto.setEndDate(localDateTimes[1]);
|
|
|
- }
|
|
|
+ String[] strings = extractTimeNumbers(time);
|
|
|
+ auto.setStartDate(strings[0]);
|
|
|
+ auto.setEndDate(strings[1]);
|
|
|
} else if (result.contains("保管期限")||result.contains("保管限期")) {
|
|
|
String storageTime=result.replace("保管期限","").replace("保管限期","").replace(":","").replace(":","").replaceAll("_","");
|
|
|
if(StringUtils.isNotEmpty(storageTime)){
|
|
|
@@ -5471,6 +5461,29 @@ public class ArchivesAutoServiceImpl extends BaseServiceImpl<ArchivesAutoMapper,
|
|
|
this.updateBatchById(archivesAutoList);
|
|
|
return true;
|
|
|
}
|
|
|
+
|
|
|
+ public String[] extractTimeNumbers(String time) {
|
|
|
+ if (StringUtils.isEmpty(time)) {
|
|
|
+ return new String[]{"", ""};
|
|
|
+ }
|
|
|
+
|
|
|
+ // 使用正则表达式分割,匹配一个或多个非数字字符作为分隔符
|
|
|
+ String[] parts = time.split("[^0-9]+");
|
|
|
+
|
|
|
+ // 过滤掉空字符串,只保留数字部分
|
|
|
+ List<String> numbers = Arrays.stream(parts)
|
|
|
+ .filter(part -> !part.isEmpty())
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ // 确保返回数组始终包含两个元素
|
|
|
+ if (numbers.size() >= 2) {
|
|
|
+ return new String[]{numbers.get(0), numbers.get(1)};
|
|
|
+ } else if (numbers.size() == 1) {
|
|
|
+ return new String[]{numbers.get(0), ""};
|
|
|
+ } else {
|
|
|
+ return new String[]{"", ""};
|
|
|
+ }
|
|
|
+ }
|
|
|
public List<String> extractTextFromPDF(String pdfFilePath) throws IOException, InterruptedException {
|
|
|
//String PYTHON_SCRIPT_PATH = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\Python\\pdfTextExtractorWindows.py";
|
|
|
//String PYTHON_INTERPRETER = "C:\\Users\\hc01\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
|