SuriRaja commited on
Commit
95fb28f
·
verified ·
1 Parent(s): 9231f24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -10,6 +10,7 @@ from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
  import tempfile
12
  import os
 
13
 
14
  # Initialize the easyocr Reader
15
  ocr_reader = easyocr.Reader(['en'])
@@ -25,12 +26,12 @@ def load_and_compare_documents(file1, file2):
25
  ocr_differences, marked_images = perform_ocr_and_compare(file1_content, file2_content)
26
 
27
  # Generate a PDF with marked OCR differences and positions
28
- pdf_path = create_pdf_with_differences(marked_images, ocr_differences)
29
 
30
  # Compile an overall summary of differences
31
  overall_summary = generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences)
32
 
33
- return text_differences, text_property_changes, special_char_changes, placement_changes, pdf_path, overall_summary
34
 
35
  def pdf_to_images(file_content):
36
  images = []
@@ -160,8 +161,9 @@ def perform_ocr_and_compare(content1, content2):
160
  return ocr_differences, marked_images
161
 
162
  def create_pdf_with_differences(marked_images, ocr_differences):
163
- output_pdf_path = "/mnt/data/marked_differences.pdf"
164
- c = canvas.Canvas(output_pdf_path, pagesize=letter)
 
165
 
166
  for page_num, img in marked_images.items():
167
  # Save the marked image to a temporary file
@@ -188,14 +190,15 @@ def create_pdf_with_differences(marked_images, ocr_differences):
188
  # Move to the next page and delete the temporary image file
189
  c.showPage()
190
  temp_img_file.close()
191
- # Remove the temporary file to avoid clutter
192
  try:
193
  os.remove(temp_img_path)
194
  except OSError:
195
  pass
196
 
 
197
  c.save()
198
- return output_pdf_path
 
199
 
200
  def generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences):
201
  overall_summary = {
@@ -222,7 +225,7 @@ def main():
222
  st.error("One or both files are empty. Please upload valid PDF files.")
223
  return
224
 
225
- text_differences, text_property_changes, special_char_changes, placement_changes, pdf_path, overall_summary = load_and_compare_documents(customer_file, output_file)
226
 
227
  st.subheader("Overall Comparison Summary")
228
  for key, value in overall_summary.items():
@@ -230,9 +233,7 @@ def main():
230
 
231
  # Provide download link for generated PDF with marked differences
232
  st.subheader("Download PDF with Marked OCR Differences")
233
- with open(pdf_path, "rb") as pdf_file:
234
- pdf_bytes = pdf_file.read()
235
- st.download_button("Download Marked PDF", data=pdf_bytes, file_name="marked_differences.pdf", mime="application/pdf")
236
 
237
  if __name__ == "__main__":
238
  main()
 
10
  from reportlab.lib.pagesizes import letter
11
  import tempfile
12
  import os
13
+ from io import BytesIO
14
 
15
  # Initialize the easyocr Reader
16
  ocr_reader = easyocr.Reader(['en'])
 
26
  ocr_differences, marked_images = perform_ocr_and_compare(file1_content, file2_content)
27
 
28
  # Generate a PDF with marked OCR differences and positions
29
+ pdf_buffer = create_pdf_with_differences(marked_images, ocr_differences)
30
 
31
  # Compile an overall summary of differences
32
  overall_summary = generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences)
33
 
34
+ return text_differences, text_property_changes, special_char_changes, placement_changes, pdf_buffer, overall_summary
35
 
36
  def pdf_to_images(file_content):
37
  images = []
 
161
  return ocr_differences, marked_images
162
 
163
  def create_pdf_with_differences(marked_images, ocr_differences):
164
+ # Use BytesIO to create an in-memory PDF file
165
+ pdf_buffer = BytesIO()
166
+ c = canvas.Canvas(pdf_buffer, pagesize=letter)
167
 
168
  for page_num, img in marked_images.items():
169
  # Save the marked image to a temporary file
 
190
  # Move to the next page and delete the temporary image file
191
  c.showPage()
192
  temp_img_file.close()
 
193
  try:
194
  os.remove(temp_img_path)
195
  except OSError:
196
  pass
197
 
198
+ # Save the PDF to the in-memory buffer
199
  c.save()
200
+ pdf_buffer.seek(0)
201
+ return pdf_buffer
202
 
203
  def generate_overall_summary(text_differences, text_property_changes, special_char_changes, placement_changes, ocr_differences):
204
  overall_summary = {
 
225
  st.error("One or both files are empty. Please upload valid PDF files.")
226
  return
227
 
228
+ text_differences, text_property_changes, special_char_changes, placement_changes, pdf_buffer, overall_summary = load_and_compare_documents(customer_file, output_file)
229
 
230
  st.subheader("Overall Comparison Summary")
231
  for key, value in overall_summary.items():
 
233
 
234
  # Provide download link for generated PDF with marked differences
235
  st.subheader("Download PDF with Marked OCR Differences")
236
+ st.download_button("Download Marked PDF", data=pdf_buffer, file_name="marked_differences.pdf", mime="application/pdf")
 
 
237
 
238
  if __name__ == "__main__":
239
  main()