Spaces:

Samarth991
/

Summarize-PhotoDocument

Sleeping

Samarth991 commited on Mar 28, 2024

Commit

e433400

verified ·

1 Parent(s): 951cf22

Update read_photodocument.py

Files changed (1) hide show

read_photodocument.py CHANGED Viewed

@@ -19,6 +19,8 @@ from spellchecker import SpellChecker
 from tqdm.auto import tqdm
 import nltk
 import contextlib
 nltk.download("stopwords")  # TODO=find where this requirement originates from
@@ -28,6 +30,19 @@ def simple_rename(filepath, target_ext=".txt"):
     return f"OCR_{basename}_{target_ext}"
 def rm_local_text_files(name_contains="RESULT_"):
     """
     rm_local_text_files - remove local text files

 from tqdm.auto import tqdm
 import nltk
 import contextlib
+import img2pdf
+from PIL import Image
 nltk.download("stopwords")  # TODO=find where this requirement originates from
     return f"OCR_{basename}_{target_ext}"
+def convert_image_to_pdf(image_path,model=None):
+    pdf_path = image_path.replace('.jpg','.pdf')
+    image = Image.open(image_path)
+    pdf_bytes = img2pdf.convert(image.filename)
+    file = open(pdf_path, "wb")
+    file.write(pdf_bytes)
+    image.close()
+    file.close()
+    return convert_PDF_to_Text(PDF_file=pdf_path,ocr_model=model)
 def rm_local_text_files(name_contains="RESULT_"):
     """
     rm_local_text_files - remove local text files