Jelajahi Sumber

检测档案文件pdf质量

lvy 1 Minggu lalu
induk
melakukan
75a0d9005f

+ 16 - 0
blade-service/blade-archive/pom.xml

@@ -21,8 +21,24 @@
             <url>https://repo.e-iceblue.cn/repository/maven-public/</url>
         </repository>
     </repositories>
+    <properties>
+        <javacv.version>1.5.8</javacv.version>
+        <javacv.platform>linux-x86_64</javacv.platform>
+    </properties>
 
     <dependencies>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>javacv</artifactId>
+            <version>${javacv.version}</version>
+            <!--            <classifier>${javacv.platform}</classifier>-->
+        </dependency>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>javacpp</artifactId>
+            <version>${javacv.version}</version>
+            <!--            <classifier>${javacv.platform}</classifier>-->
+        </dependency>
         <!--        iText生成中文-->
         <dependency>
             <groupId>com.itextpdf</groupId>

+ 28 - 15
blade-service/blade-archive/src/main/java/org/springblade/archive/controller/ScanFileController.java

@@ -8,6 +8,7 @@ import io.swagger.annotations.ApiImplicitParam;
 import io.swagger.annotations.ApiImplicitParams;
 import io.swagger.annotations.ApiOperation;
 import lombok.AllArgsConstructor;
+import org.apache.pdfbox.io.MemoryUsageSetting;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.springblade.archive.dto.ScanFileMoveDTO;
@@ -29,7 +30,9 @@ import org.springframework.stereotype.Controller;
 import org.springframework.web.bind.annotation.*;
 
 import javax.annotation.Resource;
+import java.io.InputStream;
 import java.net.URLEncoder;
+import java.time.LocalTime;
 import java.util.Collection;
 import java.util.Date;
 import java.util.List;
@@ -191,7 +194,8 @@ public class ScanFileController {
      */
     @Scheduled(cron = "0 0 3 * * ?")
     public void checkFileInfo(){
-        String sql = "select id,node_id,duty_user,file_time,file_url,pdf_file_url from u_archive_file where is_deleted = 0 and check_status >= 0 and node_id is not null and project_is is not null limit ? offset ?";
+        System.out.println("开始执行文件完整性检测");
+        String sql = "select id,node_id,duty_user,file_time,file_url,pdf_file_url from u_archive_file where is_deleted = 0 and check_status >= 0 and node_id is not null and project_id is not null limit %d offset %d";
         int size = 10000;
         int offset = 0;
         Pattern datePattern = Pattern.compile("[0-9]{4}.?[0-9]{2}.?[0-9]{2}");
@@ -215,7 +219,7 @@ public class ScanFileController {
                 } else if (file.getFileUrl() != null && file.getFileUrl().contains("http")) {
                     url = file.getPdfFileUrl();
                 }
-                if (url == null || url.isEmpty()) {
+                if (url == null || !url.endsWith(".pdf")) {
                     status += "3";
                     file.setCheckStatus(Integer.parseInt(status));
                     continue;
@@ -225,18 +229,21 @@ public class ScanFileController {
                 String suffix = url.substring(lastIndexOf + 1);
                 try {
                     url = prefix + URLEncoder.encode(suffix, "UTF-8");
-                    PDDocument document = PDDocument.load(CommonUtil.getOSSInputStream(url));
-                    PDPage page = document.getPage(0);
-                    // 获取pdf 的dpi信息
-                    int dpi = (int) (page.getCropBox().getWidth() / page.getTrimBox().getWidth() * 72);
-                    if (dpi < 300) {
-                        status += "3";
-                    }
-                    // todo 检测pdf是否有遮挡或者污渍
-                    if (status.isEmpty()) {
-                        file.setCheckStatus(-1);
-                    } else {
-                        file.setCheckStatus(Integer.parseInt( status));
+                    // 使用内存限制设置 10MB 主内存
+                    MemoryUsageSetting memUsage = MemoryUsageSetting.setupMainMemoryOnly(10_000_000);
+                    try (InputStream ossIs = CommonUtil.getOSSInputStream(url);PDDocument document = PDDocument.load(ossIs, memUsage);) {
+                        PDPage page = document.getPage(0);
+                        // 获取pdf 的dpi信息
+                        int dpi = (int) (page.getCropBox().getWidth() / page.getTrimBox().getWidth() * 72);
+                        if (dpi < 300) {
+                            status += "3";
+                        }
+                        // todo 检测pdf是否有遮挡或者污渍
+                        if (status.isEmpty()) {
+                            file.setCheckStatus(-1);
+                        } else {
+                            file.setCheckStatus(Integer.parseInt( status));
+                        }
                     }
                 } catch (Exception e) {
                     status += "3";
@@ -245,8 +252,14 @@ public class ScanFileController {
             }
             Date date = new Date();
             List<Object[]> params = query.stream().map(entry -> new Object[]{entry.getCheckStatus(), date, entry.getId()}).collect(Collectors.toList());
-            jdbcTemplate.batchUpdate("update archive_file set check_status = ?, update_time = ? where id = ?", params);
+            jdbcTemplate.batchUpdate("update u_archive_file set check_status = ?, update_time = ? where id = ?", params);
+            System.out.println("已检测了" + (query.size() < size ? offset - size + query.size() : offset) + "条数据");
+            if (LocalTime.now().isBefore(LocalTime.of(6,30))) {
+                System.out.println("时间超过6:30, 停止检测");
+                break;
+            }
         }
+        System.out.println("文件完整性检测执行完毕");
 
 
     }

+ 183 - 0
blade-service/blade-archive/src/main/java/org/springblade/archive/utils/ImageQualityDetectorUtils.java

@@ -0,0 +1,183 @@
+package org.springblade.archive.utils;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.bytedeco.javacpp.BytePointer;
+import org.bytedeco.javacv.Frame;
+import org.bytedeco.javacv.Java2DFrameConverter;
+import org.bytedeco.javacv.OpenCVFrameConverter;
+import org.bytedeco.opencv.opencv_core.*;
+import org.bytedeco.opencv.opencv_imgproc.*;
+import static org.bytedeco.opencv.global.opencv_core.*;
+import org.bytedeco.opencv.global.opencv_imgcodecs;
+import org.opencv.core.CvType;
+import org.springblade.common.utils.CommonUtil;
+
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+
+import static org.bytedeco.opencv.global.opencv_imgcodecs.*;
+import static org.bytedeco.opencv.global.opencv_imgproc.*;
+
+/**
+ * 简单检测图片质量
+ */
+public class ImageQualityDetectorUtils {
+
+//    public static void main(String[] args) throws IOException {
+//        try (PDDocument document = PDDocument.load(new File("C:\\Users\\泓创02\\Downloads\\73ef17c2f24ea5b83747a1ac5c2c3f0f.pdf"));
+//             Java2DFrameConverter converter1 = new Java2DFrameConverter();
+//             OpenCVFrameConverter.ToMat converter2 = new OpenCVFrameConverter.ToMat();) {
+//            if (document.isEncrypted()) {
+//                throw new RuntimeException("PDF文件已加密,请先解密");
+//            }
+//            // 创建PDF渲染器
+//            PDFRenderer pdfRenderer = new PDFRenderer(document);
+//            // 获取总页数
+//            int pages = document.getNumberOfPages();
+//            for (int i = 0; i < pages; i++) {
+//                Frame frame = converter1.convert(pdfRenderer.renderImageWithDPI(i, 300));
+//                Mat image = converter2.convert(frame);
+//                if (image.empty()) {
+//                    System.out.println("无法加载图像");
+//                    return;
+//                }
+//                // 检测各种质量问题
+//                boolean hasStains = detectLargeStains(image);
+//                boolean isObstructed = detectTextObstruction(image);
+//                boolean hasShadows = detectShadows(image);
+//
+//                System.out.println("检测结果:");
+//                System.out.println("大面积污渍: " + (hasStains ? "是" : "否"));
+//                System.out.println("文字遮挡: " + (isObstructed ? "是" : "否"));
+//                System.out.println("黑影/阴影: " + (hasShadows ? "是" : "否"));
+//
+//                if (hasStains || isObstructed || hasShadows) {
+//                    System.out.println("图像质量不佳,建议重新扫描");
+//                } else {
+//                    System.out.println("图像质量良好");
+//                }
+//            }
+//        }
+//    }
+
+    /**
+     * 检测图像中是否存在大面积污渍
+     * @param image 输入图像
+     * @return 污渍检测结果
+     */
+    public static boolean detectLargeStains(Mat image) {
+        // 转换为灰度图
+        Mat gray = new Mat();
+        cvtColor(image, gray, COLOR_BGR2GRAY);
+
+        // 应用阈值处理,突出污渍区域
+        Mat thresh = new Mat();
+        threshold(gray, thresh, 0, 255, THRESH_BINARY_INV + THRESH_OTSU);
+
+        // 形态学操作去除噪声
+        Mat kernel = getStructuringElement(MORPH_ELLIPSE, new Size(5, 5));
+        morphologyEx(thresh, thresh, MORPH_OPEN, kernel);
+
+        // 查找轮廓
+        MatVector contours = new MatVector();
+        findContours(thresh, contours, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
+
+        // 检查是否有大面积的污渍
+        double imageArea = image.rows() * image.cols();
+        for (int i = 0; i < contours.size(); i++) {
+            double area = contourArea(contours.get(i));
+            // 如果污渍面积超过图像面积的5%,则认为存在大面积污渍
+            if (area > imageArea * 0.05) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * 检测图像中文字是否被遮挡
+     * @param image 输入图像
+     * @return 遮挡检测结果
+     */
+    public static boolean detectTextObstruction(Mat image) {
+        // 转换为灰度图
+        Mat gray = new Mat();
+        cvtColor(image, gray, COLOR_BGR2GRAY);
+
+        // 边缘检测
+        Mat edges = new Mat();
+        Canny(gray, edges, 50, 150);
+
+        // 形态学操作连接边缘
+        Mat kernel = getStructuringElement(MORPH_RECT, new Size(3, 3));
+        morphologyEx(edges, edges, MORPH_CLOSE, kernel);
+
+        // 查找轮廓
+        MatVector contours = new MatVector();
+        findContours(edges, contours, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
+
+        // 分析轮廓特征判断是否有遮挡
+        for (int i = 0; i < contours.size(); i++) {
+            Mat contour = contours.get(i);
+            double area = contourArea(contour);
+
+            // 计算轮廓的边界矩形
+            Rect boundingRect = boundingRect(contour);
+
+            // 如果区域较大但边缘不规则,可能是遮挡
+            if (area > 1000) {
+                double aspectRatio = (double) boundingRect.width() / boundingRect.height();
+                if (aspectRatio > 5 || aspectRatio < 0.2) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    /**
+     * 检测图像中是否存在黑影或阴影
+     * @param image 输入图像
+     * @return 阴影检测结果
+     */
+    public static boolean detectShadows(Mat image) {
+        // 转换为灰度图
+        Mat gray = new Mat();
+        cvtColor(image, gray, COLOR_BGR2GRAY);
+
+        // 应用高斯模糊减少噪声
+        Mat blurred = new Mat();
+        GaussianBlur(gray, blurred, new Size(5, 5), 0);
+
+        // 计算图像的梯度
+        Mat gradX = new Mat();
+        Mat gradY = new Mat();
+        Sobel(blurred, gradX, CV_32F, 1, 0);
+        Sobel(blurred, gradY, CV_32F, 0, 1);
+
+        // 计算梯度幅值
+        Mat gradient = new Mat();
+        magnitude(gradX, gradY, gradient);
+
+        // 转换为8位图像
+        Mat gradient8u = new Mat();
+        convertScaleAbs(gradient, gradient8u);
+
+        // 应用阈值分割
+        Mat binary = new Mat();
+        threshold(gradient8u, binary, 30, 255, THRESH_BINARY);
+
+        // 计算阴影区域占比
+        double totalPixels = binary.rows() * binary.cols();
+        double shadowPixels = totalPixels - countNonZero(binary);
+        double shadowRatio = shadowPixels / totalPixels;
+
+        // 如果阴影区域超过10%,则认为存在明显阴影
+        return shadowRatio > 0.1;
+    }
+
+}