SathvikGanta commited on
Commit
e4f149d
·
verified ·
1 Parent(s): 5a9fd52

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ import cv2
4
+ from pdf2image import convert_from_path
5
+ import pytesseract
6
+ import numpy as np
7
+ import os
8
+ from fpdf import FPDF
9
+
10
+ # Convert PDFs to images
11
+ def convert_pdf_to_images(pdf_path, dpi=300):
12
+ images = convert_from_path(pdf_path, dpi=dpi)
13
+ return [cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) for image in images]
14
+
15
+ # Align images
16
+ def align_images(img1, img2):
17
+ gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
18
+ gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
19
+ orb = cv2.ORB_create()
20
+ kp1, des1 = orb.detectAndCompute(gray1, None)
21
+ kp2, des2 = orb.detectAndCompute(gray2, None)
22
+ bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
23
+ matches = bf.match(des1, des2)
24
+ matches = sorted(matches, key=lambda x: x.distance)
25
+ src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
26
+ dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
27
+ matrix, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
28
+ aligned_img = cv2.warpPerspective(img2, matrix, (img1.shape[1], img1.shape[0]))
29
+ return aligned_img
30
+
31
+ # Compare images
32
+ def compare_images(img1, img2):
33
+ diff = cv2.absdiff(img1, img2)
34
+ gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
35
+ _, thresh = cv2.threshold(gray_diff, 50, 255, cv2.THRESH_BINARY)
36
+ return thresh
37
+
38
+ # Highlight changes
39
+ def highlight_changes(img, mask):
40
+ overlay = img.copy()
41
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
42
+ for cnt in contours:
43
+ x, y, w, h = cv2.boundingRect(cnt)
44
+ cv2.rectangle(overlay, (x, y), (x + w, y + h), (0, 0, 255), 2)
45
+ return overlay
46
+
47
+ # Generate combined comparison PDF
48
+ def generate_comparison_pdf(original_pdf, edited_pdf):
49
+ original_images = convert_pdf_to_images(original_pdf)
50
+ edited_images = convert_pdf_to_images(edited_pdf)
51
+
52
+ combined_images = []
53
+ for orig_img, edit_img in zip(original_images, edited_images):
54
+ aligned_img = align_images(orig_img, edit_img)
55
+ diff_mask = compare_images(orig_img, aligned_img)
56
+ highlighted_img = highlight_changes(edit_img, diff_mask)
57
+ combined_images.append(np.hstack((orig_img, highlighted_img)))
58
+
59
+ output_path = "outputs/comparison_result.pdf"
60
+ pdf = FPDF()
61
+ for img in combined_images:
62
+ temp_path = "temp_image.png"
63
+ cv2.imwrite(temp_path, img)
64
+ pdf.add_page()
65
+ pdf.image(temp_path, x=10, y=10, w=190)
66
+ os.remove(temp_path)
67
+ pdf.output(output_path)
68
+ return output_path
69
+
70
+ # Gradio interface function
71
+ def pdf_comparison(original_pdf, edited_pdf):
72
+ if not os.path.exists("outputs"):
73
+ os.makedirs("outputs")
74
+ result_path = generate_comparison_pdf(original_pdf.name, edited_pdf.name)
75
+ return result_path
76
+
77
+ # Gradio app
78
+ interface = gr.Interface(
79
+ fn=pdf_comparison,
80
+ inputs=[
81
+ gr.File(label="Upload Original PDF", file_types=[".pdf"]),
82
+ gr.File(label="Upload Edited PDF", file_types=[".pdf"])
83
+ ],
84
+ outputs=gr.File(label="Download Comparison Report"),
85
+ title="PDF Comparison Tool",
86
+ description="Upload two PDFs for a side-by-side comparison with visual highlights and a summary of changes."
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ interface.launch()