SathvikGanta commited on
Commit
1575c76
·
verified ·
1 Parent(s): d1e3fd5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ import cv2
4
+ from pdf2image import convert_from_path
5
+ import numpy as np
6
+ import os
7
+ from fpdf import FPDF
8
+
9
+ # Convert PDFs to images
10
+ def convert_pdf_to_images(pdf_path, dpi=300):
11
+ images = convert_from_path(pdf_path, dpi=dpi, poppler_path="/usr/bin")
12
+ return [cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) for image in images]
13
+
14
+ # Align images
15
+ def align_images(img1, img2):
16
+ gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
17
+ gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
18
+ orb = cv2.ORB_create()
19
+ kp1, des1 = orb.detectAndCompute(gray1, None)
20
+ kp2, des2 = orb.detectAndCompute(gray2, None)
21
+ bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
22
+ matches = bf.match(des1, des2)
23
+ matches = sorted(matches, key=lambda x: x.distance)
24
+ src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
25
+ dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
26
+ matrix, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
27
+
28
+ # Validate if alignment is good enough
29
+ if matrix is None or len(matches) < 10: # Check if sufficient matches exist
30
+ raise ValueError("Alignment failed. Insufficient matches between images.")
31
+
32
+ aligned_img = cv2.warpPerspective(img2, matrix, (img1.shape[1], img1.shape[0]))
33
+ return aligned_img
34
+
35
+ # Compare visual changes
36
+ def compare_visual_changes(orig_img, edit_img):
37
+ diff = cv2.absdiff(orig_img, edit_img)
38
+ gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
39
+
40
+ # Apply Gaussian blur to reduce noise
41
+ blurred_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0)
42
+
43
+ # Apply thresholding
44
+ _, thresh = cv2.threshold(blurred_diff, 70, 255, cv2.THRESH_BINARY)
45
+
46
+ # Morphological operations to clean noise
47
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
48
+ cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
49
+
50
+ contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
51
+ overlay = edit_img.copy()
52
+
53
+ for cnt in contours:
54
+ if cv2.contourArea(cnt) > 100: # Filter out small regions
55
+ x, y, w, h = cv2.boundingRect(cnt)
56
+ cv2.rectangle(overlay, (x, y), (x + w, y + h), (0, 0, 255), 2) # Red bounding box
57
+
58
+ return overlay
59
+
60
+ # Generate visual comparison report
61
+ def generate_visual_report(images, output_path):
62
+ pdf = FPDF()
63
+ for img in images:
64
+ temp_path = "temp_image.png"
65
+ cv2.imwrite(temp_path, img)
66
+ pdf.add_page()
67
+ pdf.image(temp_path, x=10, y=10, w=190)
68
+ os.remove(temp_path)
69
+
70
+ pdf.output(output_path)
71
+ return output_path
72
+
73
+ # Perform only visual comparison
74
+ def generate_visual_comparison(original_pdf, edited_pdf):
75
+ original_images = convert_pdf_to_images(original_pdf)
76
+ edited_images = convert_pdf_to_images(edited_pdf)
77
+
78
+ visual_combined_images = []
79
+ for orig_img, edit_img in zip(original_images, edited_images):
80
+ aligned_img = align_images(orig_img, edit_img)
81
+ highlighted_img = compare_visual_changes(orig_img, aligned_img)
82
+ visual_combined_images.append(np.hstack((orig_img, highlighted_img)))
83
+
84
+ # Generate visual changes report
85
+ visual_report_path = generate_visual_report(
86
+ visual_combined_images, "outputs/visual_changes.pdf"
87
+ )
88
+
89
+ return visual_report_path
90
+
91
+ # Gradio interface function
92
+ def pdf_visual_comparison(original_pdf, edited_pdf):
93
+ visual_path = generate_visual_comparison(original_pdf.name, edited_pdf.name)
94
+ return visual_path
95
+
96
+ # Gradio interface
97
+ interface = gr.Interface(
98
+ fn=pdf_visual_comparison,
99
+ inputs=[
100
+ gr.File(label="Upload Original PDF", file_types=[".pdf"]),
101
+ gr.File(label="Upload Edited PDF", file_types=[".pdf"])
102
+ ],
103
+ outputs=[
104
+ gr.File(label="Download Visual Changes Report")
105
+ ],
106
+ title="PDF Visual Comparison Tool",
107
+ description="Upload two PDFs: the original and the edited version. The tool generates a visual changes report."
108
+ )
109
+
110
+ if __name__ == "__main__":
111
+ interface.launch()