Spaces:

kxx-kkk
/

pdf_reader_try

Sleeping

kxx-kkk commited on Feb 9, 2024

Commit

9108bfe

verified ·

1 Parent(s): 9cd6a25

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pytesseract
 import PyPDF2
 from pdf2image import convert_from_path
 from PIL import Image
-import doctr
 def extract_text(file_path):
@@ -18,11 +18,10 @@ def extract_text(file_path):
             text = page.extract_text()
             st.write(text)  # Display the extracted selectable text
-    model = doctr.models.OCR()
-    images = model(file_path)  # Convert PDF pages to images
-    for i, page in enumerate(images.pages):
         st.write(f"Page {i + 1}")
-        text = page.content
         st.write(text)  # Display the extracted text from the image
 def main():

 import PyPDF2
 from pdf2image import convert_from_path
 from PIL import Image
 def extract_text(file_path):
             text = page.extract_text()
             st.write(text)  # Display the extracted selectable text
+    images = convert_from_path(file_path)  # Convert PDF pages to images
+    for i, image in enumerate(images):
         st.write(f"Page {i + 1}")
+        text = pytesseract.image_to_string(image)
         st.write(text)  # Display the extracted text from the image
 def main():