Spaces:

kxx-kkk
/

pdf_reader_try

Sleeping

kxx-kkk commited on Feb 9, 2024

Commit

ba24d1a

verified ·

1 Parent(s): 18df6af

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import PyPDF2
-from pdf2image import convert_from_path
 import streamlit as st
 import pytesseract
 from PIL import Image
 def extract_text_from_pdf(pdf_path):
@@ -44,8 +46,8 @@ def main():
         text = extract_text_from_pdf(file_path)
-        st.header("Extracted Text:")
-        st.write(text)
         pages = convert_from_path(file_path)
         st.header("Extracted Images:")
@@ -53,8 +55,10 @@ def main():
             st.subheader(f"Page {page_num + 1}")
             st.image(page_img, use_column_width=True)
             image_text = extract_text_from_image(page_img)
-            st.write("Image Text:")
-            st.write(image_text)
         # Delete the temporary file
         os.remove(file_path)

 import PyPDF2
 import streamlit as st
 import pytesseract
+from pdf2image import convert_from_path
 from PIL import Image
+import tempfile
+import os
 def extract_text_from_pdf(pdf_path):
         text = extract_text_from_pdf(file_path)
+        # st.header("Extracted Text:")
+        # st.write(text)
         pages = convert_from_path(file_path)
         st.header("Extracted Images:")
             st.subheader(f"Page {page_num + 1}")
             st.image(page_img, use_column_width=True)
             image_text = extract_text_from_image(page_img)
+            text += image_text
+            st.write("Text:")
+            st.write(text)
         # Delete the temporary file
         os.remove(file_path)