# Highlight changes and categorize small and large differences def highlight_changes(img, mask): overlay = img.copy() contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) summary = [] for cnt in contours: area = cv2.contourArea(cnt) x, y, w, h = cv2.boundingRect(cnt) if area > 500: # Major differences cv2.rectangle(overlay, (x, y), (x + w, y + h), (0, 0, 255), 2) # Red summary.append(f"Major Difference: Location=({x}, {y}), Size=({w}x{h}), Area={area}") elif 100 < area <= 500: # Small differences cv2.rectangle(overlay, (x, y), (x + w, y + h), (255, 0, 0), 2) # Blue summary.append(f"Small Difference: Location=({x}, {y}), Size=({w}x{h}), Area={area}") return overlay, summary # Generate comparison PDF with detailed summary def generate_comparison_pdf(original_pdf, edited_pdf): original_images = convert_pdf_to_images(original_pdf) edited_images = convert_pdf_to_images(edited_pdf) combined_images = [] all_summaries = [] for page_num, (orig_img, edit_img) in enumerate(zip(original_images, edited_images), start=1): aligned_img = align_images(orig_img, edit_img) diff_mask = compare_images(orig_img, aligned_img) highlighted_img, summary = highlight_changes(edit_img, diff_mask) # Add page number to summary page_summary = [f"Page {page_num}:"] page_summary.extend(summary) all_summaries.extend(page_summary) # Ensure dimensions match height = min(orig_img.shape[0], highlighted_img.shape[0]) orig_img_resized = orig_img[:height] highlighted_img_resized = highlighted_img[:height] combined_images.append(np.hstack((orig_img_resized, highlighted_img_resized))) # Generate the PDF output_path = "outputs/comparison_result.pdf" pdf = FPDF() for img in combined_images: temp_path = "temp_image.png" cv2.imwrite(temp_path, img) pdf.add_page() pdf.image(temp_path, x=10, y=10, w=190) os.remove(temp_path) # Add detailed summary to the PDF summary_path = "outputs/summary.txt" with open(summary_path, "w") as f: f.write("\n".join(all_summaries)) pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, "\n".join(all_summaries)) pdf.output(output_path) return output_path, summary_path # Gradio interface function def pdf_comparison(original_pdf, edited_pdf): # Get the file size in bytes original_file_size = os.path.getsize(original_pdf.name) edited_file_size = os.path.getsize(edited_pdf.name) # Check if either file exceeds 50 MB (50 * 1024 * 1024 bytes) if original_file_size > 50 * 1024 * 1024 or edited_file_size > 50 * 1024 * 1024: return "Error: File size exceeds 50 MB. Please upload smaller files." # Proceed with PDF comparison result_path, summary_path = generate_comparison_pdf(original_pdf.name, edited_pdf.name) return result_path, summary_path # Gradio interface interface = gr.Interface( fn=pdf_comparison, inputs=[ gr.File(label="Upload Original PDF", file_types=[".pdf"]), gr.File(label="Upload Edited PDF", file_types=[".pdf"]) ], outputs=[ gr.File(label="Download Comparison Report"), gr.File(label="Download Detailed Summary") ], ) if __name__ == "__main__": interface.launch()