|
|
@@ -10,6 +10,8 @@ import com.itextpdf.text.pdf.PdfCopy;
|
|
|
import com.itextpdf.text.pdf.PdfReader;
|
|
|
import net.coobird.thumbnailator.Thumbnails;
|
|
|
import org.apache.commons.lang.StringUtils;
|
|
|
+import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
+import org.apache.pdfbox.text.PDFTextStripper;
|
|
|
import org.apache.poi.ss.usermodel.ClientAnchor;
|
|
|
import org.apache.poi.ss.usermodel.Sheet;
|
|
|
import org.apache.poi.ss.util.CellRangeAddress;
|
|
|
@@ -91,9 +93,12 @@ public class FileUtils {
|
|
|
fileName = array[1];
|
|
|
symbol = url.substring(url.lastIndexOf("."));
|
|
|
}
|
|
|
-
|
|
|
- //获取文件流
|
|
|
- inputStream = CommonUtil.getOSSInputStream(url);
|
|
|
+ if (!url.contains("http://") && !url.contains("https://") && Files.exists(Paths.get(url))) {
|
|
|
+ inputStream = Files.newInputStream(Paths.get(url));
|
|
|
+ } else {
|
|
|
+ //获取文件流
|
|
|
+ inputStream = CommonUtil.getOSSInputStream(url);
|
|
|
+ }
|
|
|
//转换
|
|
|
byte[] bytes = CommonUtil.InputStreamToBytes(inputStream);
|
|
|
|
|
|
@@ -740,4 +745,190 @@ public class FileUtils {
|
|
|
throw new ServiceException("IO错误");
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从电签pdf中提取指定未电签pdf的页面,并生成新的pdf
|
|
|
+ */
|
|
|
+ public static String extractPdfPages(String eVisaPdfUrl, String pdfUrl){
|
|
|
+ if (eVisaPdfUrl == null || eVisaPdfUrl.trim().isEmpty() || eVisaPdfUrl.equals(pdfUrl)) {
|
|
|
+ return pdfUrl;
|
|
|
+ }
|
|
|
+ try (
|
|
|
+ InputStream eVisaIs = CommonUtil.getOSSInputStream(eVisaPdfUrl);
|
|
|
+ InputStream pdfIs = CommonUtil.getOSSInputStream(pdfUrl);
|
|
|
+ // 使用pdfbox读取电签pdf
|
|
|
+ PDDocument eVisaDocument = PDDocument.load(eVisaIs);
|
|
|
+ PDDocument pdfDocument = PDDocument.load(pdfIs);
|
|
|
+ PDDocument newDocument = new PDDocument();
|
|
|
+ ) {
|
|
|
+ int page = eVisaDocument.getNumberOfPages();
|
|
|
+ int page1 = pdfDocument.getNumberOfPages();
|
|
|
+ int j = 0;
|
|
|
+ for (int i = 0; i < page; i++) {
|
|
|
+ String eVisaText = getPdfContent(eVisaDocument, i + 1, i + 1);
|
|
|
+ if (j >= page1) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ String pdfText = getPdfContent(pdfDocument, j + 1, j + 1);
|
|
|
+ if (!StringUtils.isBlank(eVisaText) && calculateDiffRatio(eVisaText,pdfText) < 0.10) {
|
|
|
+ newDocument.addPage(eVisaDocument.getPage(i));
|
|
|
+ j++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 判断新的pdf是否为空,不为空则保存新的pdf
|
|
|
+ if (newDocument.getNumberOfPages() > 0) {
|
|
|
+ // 获取临时文件目录
|
|
|
+ String tempDir = System.getProperty("java.io.tmpdir");
|
|
|
+ String tempFile = tempDir + File.separator + "new_" + System.currentTimeMillis() + ".pdf";
|
|
|
+ File file = new File(tempFile);
|
|
|
+ try {
|
|
|
+ newDocument.save(file);
|
|
|
+ return tempFile;
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ return pdfUrl;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String extractPdfPages(String eVisaPdfUrl, String[] strList){
|
|
|
+ if (eVisaPdfUrl == null || eVisaPdfUrl.trim().isEmpty()) {
|
|
|
+ return eVisaPdfUrl;
|
|
|
+ }
|
|
|
+ try (
|
|
|
+ InputStream eVisaIs = CommonUtil.getOSSInputStream(eVisaPdfUrl);
|
|
|
+ // 使用pdfbox读取电签pdf
|
|
|
+ PDDocument eVisaDocument = PDDocument.load(eVisaIs);
|
|
|
+ PDDocument newDocument = new PDDocument();
|
|
|
+ ) {
|
|
|
+ int page = eVisaDocument.getNumberOfPages();
|
|
|
+ for (int i = 0; i < page; i++) {
|
|
|
+ String eVisaText = getPdfContent(eVisaDocument, i + 1, i + 1);
|
|
|
+ if (!StringUtils.isBlank(eVisaText)) {
|
|
|
+ boolean flag = true;
|
|
|
+ for (String s : strList) {
|
|
|
+ flag = eVisaText.contains(s);
|
|
|
+ if (!flag) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (flag) {
|
|
|
+ newDocument.addPage(eVisaDocument.getPage(i));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 判断新的pdf是否为空,不为空则保存新的pdf
|
|
|
+ if (newDocument.getNumberOfPages() > 0) {
|
|
|
+ // 获取临时文件目录
|
|
|
+ String tempDir = System.getProperty("java.io.tmpdir");
|
|
|
+ String tempFile = tempDir + File.separator + "new_" + System.currentTimeMillis() + ".pdf";
|
|
|
+ File file = new File(tempFile);
|
|
|
+ try {
|
|
|
+ newDocument.save(file);
|
|
|
+ return tempFile;
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ return eVisaPdfUrl;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取pdf 指定页面的内容
|
|
|
+ */
|
|
|
+ private static String getPdfContent(PDDocument doc, int startPage, int endPage) {
|
|
|
+ try {
|
|
|
+ PDFTextStripper stripper = new PDFTextStripper();
|
|
|
+ stripper.setStartPage(startPage);
|
|
|
+ stripper.setEndPage(endPage);
|
|
|
+ return stripper.getText(doc);
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /**
|
|
|
+ * 计算两个文本之间的差异比例
|
|
|
+ * @param original 原始文本
|
|
|
+ * @param modified 修改后的文本
|
|
|
+ * @return 差异比例 (0.0 - 1.0),0表示完全相同,1表示完全不同
|
|
|
+ */
|
|
|
+ public static double calculateDiffRatio(String original, String modified) {
|
|
|
+ if (original == null && modified == null) {
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (original == null || modified == null) {
|
|
|
+ return 1.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (original.equals(modified)) {
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 按行分割文本
|
|
|
+ String[] originalLines = original.split("\n", -1);
|
|
|
+ String[] modifiedLines = modified.split("\n", -1);
|
|
|
+
|
|
|
+ // 使用动态规划计算编辑距离
|
|
|
+ int editDistance = computeEditDistance(originalLines, modifiedLines);
|
|
|
+
|
|
|
+ // 计算最大可能的编辑距离
|
|
|
+ int maxLength = Math.max(originalLines.length, modifiedLines.length);
|
|
|
+
|
|
|
+ if (maxLength == 0) {
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 返回差异比例
|
|
|
+ return (double) editDistance / maxLength;
|
|
|
+ }
|
|
|
+ /**
|
|
|
+ * 使用动态规划算法计算两个字符串数组之间的编辑距离(Levenshtein距离)
|
|
|
+ * @param original 原始字符串数组
|
|
|
+ * @param modified 修改后的字符串数组
|
|
|
+ * @return 编辑距离
|
|
|
+ */
|
|
|
+ private static int computeEditDistance(String[] original, String[] modified) {
|
|
|
+ int m = original.length;
|
|
|
+ int n = modified.length;
|
|
|
+
|
|
|
+ // 创建DP表
|
|
|
+ int[][] dp = new int[m + 1][n + 1];
|
|
|
+
|
|
|
+ // 初始化边界条件
|
|
|
+ for (int i = 0; i <= m; i++) {
|
|
|
+ dp[i][0] = i;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int j = 0; j <= n; j++) {
|
|
|
+ dp[0][j] = j;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 填充DP表
|
|
|
+ for (int i = 1; i <= m; i++) {
|
|
|
+ for (int j = 1; j <= n; j++) {
|
|
|
+ if (original[i - 1].equals(modified[j - 1])) {
|
|
|
+ dp[i][j] = dp[i - 1][j - 1]; // 字符相同,无需操作
|
|
|
+ } else {
|
|
|
+ // 取三种操作的最小值+1
|
|
|
+ dp[i][j] = 1 + Math.min(
|
|
|
+ Math.min(dp[i - 1][j], // 删除
|
|
|
+ dp[i][j - 1]), // 插入
|
|
|
+ dp[i - 1][j - 1] // 替换
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return dp[m][n];
|
|
|
+ }
|
|
|
+
|
|
|
}
|