Spaces:

SathvikGanta
/

UC2_Image_Based_PDF_omparison

Sleeping

App Files Files Community

SathvikGanta commited on Dec 2, 2024

Commit

979e3c2

verified ·

1 Parent(s): 8db3ca1

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -44

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ def compare_images(img1, img2):
     return cleaned
-# Compare text and generate differences
 def generate_text_differences(orig_text, edit_text, start_position):
     diff = difflib.ndiff(orig_text.splitlines(), edit_text.splitlines())
     changes = []
@@ -64,11 +64,11 @@ def generate_text_differences(orig_text, edit_text, start_position):
         position_number += 1
     return changes, position_number
-# Highlight changes and generate visual summary
 def highlight_visual_changes(orig_img, edit_img, mask, start_position):
     overlay = edit_img.copy()
     contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    visual_changes = []  # To store visual changes
     font = cv2.FONT_HERSHEY_SIMPLEX
     font_scale = 0.8
     thickness = 2
@@ -84,13 +84,8 @@ def highlight_visual_changes(orig_img, edit_img, mask, start_position):
     return overlay, visual_changes, position_counter
-# Sanitize text for PDF compatibility
-def sanitize_text(text):
-    """Sanitize text for FPDF by replacing unsupported characters."""
-    return text.encode('latin-1', errors='replace').decode('latin-1')
-# Generate comparison PDF with visual and text-based summaries
-def generate_comparison_pdf(original_pdf, edited_pdf):
     original_images = convert_pdf_to_images(original_pdf)
     edited_images = convert_pdf_to_images(edited_pdf)
     combined_images = []
@@ -116,48 +111,46 @@ def generate_comparison_pdf(original_pdf, edited_pdf):
         highlighted_img_resized = highlighted_img[:height]
         combined_images.append(np.hstack((orig_img_resized, highlighted_img_resized)))
-    output_path = "outputs/comparison_result.pdf"
-    pdf = FPDF()
-    # Add each comparison image to the PDF
     for img in combined_images:
-        temp_path = "temp_image.png"
         cv2.imwrite(temp_path, img)
-        pdf.add_page()
-        pdf.image(temp_path, x=10, y=10, w=190)
         os.remove(temp_path)
-    # Add Visual Changes section
-    pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    pdf.cell(0, 10, sanitize_text("Visual Changes"), ln=True, align="C")
-    pdf.ln(10)  # Add a line break
     for _, change in visual_changes:
-        pdf.cell(0, 10, sanitize_text(change), ln=True)
-    # Add Text Changes section
-    pdf.add_page()
-    pdf.cell(0, 10, sanitize_text("Text Changes"), ln=True, align="C")
-    pdf.ln(10)  # Add a line break
     for _, change in text_changes:
-        pdf.cell(0, 10, sanitize_text(change), ln=True)
-    pdf.output(output_path)
-    return output_path
 # Gradio interface function
 def pdf_comparison(original_pdf, edited_pdf):
-    # Get the file size in bytes
-    original_file_size = os.path.getsize(original_pdf.name)
-    edited_file_size = os.path.getsize(edited_pdf.name)
-    # Check if either file exceeds 50 MB (50 * 1024 * 1024 bytes)
-    if original_file_size > 50 * 1024 * 1024 or edited_file_size > 50 * 1024 * 1024:
-        return "Error: File size exceeds 50 MB. Please upload smaller files."
-    # Proceed with PDF comparison
-    result_path = generate_comparison_pdf(original_pdf.name, edited_pdf.name)
-    return result_path
 # Gradio interface
 interface = gr.Interface(
@@ -166,9 +159,12 @@ interface = gr.Interface(
         gr.File(label="Upload Original PDF", file_types=[".pdf"]),
         gr.File(label="Upload Edited PDF", file_types=[".pdf"])
     ],
-    outputs=gr.File(label="Download Comparison Report"),
     title="PDF Comparison Tool with Separate Reports",
-    description="Upload two PDFs: the original and the edited version. The tool highlights changes and provides separate summaries for visual and text changes."
 )
 if __name__ == "__main__":

     return cleaned
+# Generate text-based differences
 def generate_text_differences(orig_text, edit_text, start_position):
     diff = difflib.ndiff(orig_text.splitlines(), edit_text.splitlines())
     changes = []
         position_number += 1
     return changes, position_number
+# Highlight visual changes
 def highlight_visual_changes(orig_img, edit_img, mask, start_position):
     overlay = edit_img.copy()
     contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    visual_changes = []
     font = cv2.FONT_HERSHEY_SIMPLEX
     font_scale = 0.8
     thickness = 2
     return overlay, visual_changes, position_counter
+# Generate separate PDFs for visual and text changes
+def generate_separate_pdfs(original_pdf, edited_pdf):
     original_images = convert_pdf_to_images(original_pdf)
     edited_images = convert_pdf_to_images(edited_pdf)
     combined_images = []
         highlighted_img_resized = highlighted_img[:height]
         combined_images.append(np.hstack((orig_img_resized, highlighted_img_resized)))
+    # Generate Visual Changes PDF
+    visual_pdf_path = "outputs/visual_changes.pdf"
+    pdf_visual = FPDF()
     for img in combined_images:
+        temp_path = "temp_image_visual.png"
         cv2.imwrite(temp_path, img)
+        pdf_visual.add_page()
+        pdf_visual.image(temp_path, x=10, y=10, w=190)
         os.remove(temp_path)
+    pdf_visual.add_page()
+    pdf_visual.set_font("Arial", size=12)
+    pdf_visual.cell(0, 10, "Visual Changes", ln=True, align="C")
+    pdf_visual.ln(10)
     for _, change in visual_changes:
+        pdf_visual.cell(0, 10, change, ln=True)
+    pdf_visual.output(visual_pdf_path)
+    # Generate Text Changes PDF
+    text_pdf_path = "outputs/text_changes.pdf"
+    pdf_text = FPDF()
+    for img in combined_images:
+        temp_path = "temp_image_text.png"
+        cv2.imwrite(temp_path, img)
+        pdf_text.add_page()
+        pdf_text.image(temp_path, x=10, y=10, w=190)
+        os.remove(temp_path)
+    pdf_text.add_page()
+    pdf_text.set_font("Arial", size=12)
+    pdf_text.cell(0, 10, "Text Changes", ln=True, align="C")
+    pdf_text.ln(10)
     for _, change in text_changes:
+        pdf_text.cell(0, 10, change, ln=True)
+    pdf_text.output(text_pdf_path)
+    return visual_pdf_path, text_pdf_path
 # Gradio interface function
 def pdf_comparison(original_pdf, edited_pdf):
+    visual_path, text_path = generate_separate_pdfs(original_pdf.name, edited_pdf.name)
+    return visual_path, text_path
 # Gradio interface
 interface = gr.Interface(
         gr.File(label="Upload Original PDF", file_types=[".pdf"]),
         gr.File(label="Upload Edited PDF", file_types=[".pdf"])
     ],
+    outputs=[
+        gr.File(label="Download Visual Changes Report"),
+        gr.File(label="Download Text Changes Report")
+    ],
     title="PDF Comparison Tool with Separate Reports",
+    description="Upload two PDFs: the original and the edited version. The tool generates two separate reports: one for visual changes and another for text changes."
 )
 if __name__ == "__main__":