Spaces:

SuriRaja
/

usecase2

Sleeping

App Files Files Community

SuriRaja commited on Nov 11, 2024

Commit

95fb28f

verified ·

1 Parent(s): 9231f24

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from reportlab.pdfgen import canvas
 from reportlab.lib.pagesizes import letter
 import tempfile
 import os
 # Initialize the easyocr Reader
 ocr_reader = easyocr.Reader(['en'])
@@ -25,12 +26,12 @@ def load_and_compare_documents(file1, file2):
     ocr_differences, marked_images = perform_ocr_and_compare(file1_content, file2_content)
     # Generate a PDF with marked OCR differences and positions
-    pdf_path = create_pdf_with_differences(marked_images, ocr_differences)
     # Compile an overall summary of differences
     overall_summary = generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences)
-    return text_differences, text_property_changes, special_char_changes, placement_changes, pdf_path, overall_summary
 def pdf_to_images(file_content):
     images = []
@@ -160,8 +161,9 @@ def perform_ocr_and_compare(content1, content2):
     return ocr_differences, marked_images
 def create_pdf_with_differences(marked_images, ocr_differences):
-    output_pdf_path = "/mnt/data/marked_differences.pdf"
-    c = canvas.Canvas(output_pdf_path, pagesize=letter)
     for page_num, img in marked_images.items():
         # Save the marked image to a temporary file
@@ -188,14 +190,15 @@ def create_pdf_with_differences(marked_images, ocr_differences):
         # Move to the next page and delete the temporary image file
         c.showPage()
         temp_img_file.close()
-        # Remove the temporary file to avoid clutter
         try:
             os.remove(temp_img_path)
         except OSError:
             pass
     c.save()
-    return output_pdf_path
 def generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences):
     overall_summary = {
@@ -222,7 +225,7 @@ def main():
             st.error("One or both files are empty. Please upload valid PDF files.")
             return
-        text_differences, text_property_changes, special_char_changes, placement_changes, pdf_path, overall_summary = load_and_compare_documents(customer_file, output_file)
         st.subheader("Overall Comparison Summary")
         for key, value in overall_summary.items():
@@ -230,9 +233,7 @@ def main():
         # Provide download link for generated PDF with marked differences
         st.subheader("Download PDF with Marked OCR Differences")
-        with open(pdf_path, "rb") as pdf_file:
-            pdf_bytes = pdf_file.read()
-            st.download_button("Download Marked PDF", data=pdf_bytes, file_name="marked_differences.pdf", mime="application/pdf")
 if __name__ == "__main__":
     main()

 from reportlab.lib.pagesizes import letter
 import tempfile
 import os
+from io import BytesIO
 # Initialize the easyocr Reader
 ocr_reader = easyocr.Reader(['en'])
     ocr_differences, marked_images = perform_ocr_and_compare(file1_content, file2_content)
     # Generate a PDF with marked OCR differences and positions
+    pdf_buffer = create_pdf_with_differences(marked_images, ocr_differences)
     # Compile an overall summary of differences
     overall_summary = generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences)
+    return text_differences, text_property_changes, special_char_changes, placement_changes, pdf_buffer, overall_summary
 def pdf_to_images(file_content):
     images = []
     return ocr_differences, marked_images
 def create_pdf_with_differences(marked_images, ocr_differences):
+    # Use BytesIO to create an in-memory PDF file
+    pdf_buffer = BytesIO()
+    c = canvas.Canvas(pdf_buffer, pagesize=letter)
     for page_num, img in marked_images.items():
         # Save the marked image to a temporary file
         # Move to the next page and delete the temporary image file
         c.showPage()
         temp_img_file.close()
         try:
             os.remove(temp_img_path)
         except OSError:
             pass
+    # Save the PDF to the in-memory buffer
     c.save()
+    pdf_buffer.seek(0)
+    return pdf_buffer
 def generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences):
     overall_summary = {
             st.error("One or both files are empty. Please upload valid PDF files.")
             return
+        text_differences, text_property_changes, special_char_changes, placement_changes, pdf_buffer, overall_summary = load_and_compare_documents(customer_file, output_file)
         st.subheader("Overall Comparison Summary")
         for key, value in overall_summary.items():
         # Provide download link for generated PDF with marked differences
         st.subheader("Download PDF with Marked OCR Differences")
+        st.download_button("Download Marked PDF", data=pdf_buffer, file_name="marked_differences.pdf", mime="application/pdf")
 if __name__ == "__main__":
     main()