kxx-kkk commited on
Commit
ba24d1a
·
verified ·
1 Parent(s): 18df6af

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import PyPDF2
2
- from pdf2image import convert_from_path
3
  import streamlit as st
4
  import pytesseract
 
5
  from PIL import Image
 
 
6
 
7
 
8
  def extract_text_from_pdf(pdf_path):
@@ -44,8 +46,8 @@ def main():
44
 
45
  text = extract_text_from_pdf(file_path)
46
 
47
- st.header("Extracted Text:")
48
- st.write(text)
49
 
50
  pages = convert_from_path(file_path)
51
  st.header("Extracted Images:")
@@ -53,8 +55,10 @@ def main():
53
  st.subheader(f"Page {page_num + 1}")
54
  st.image(page_img, use_column_width=True)
55
  image_text = extract_text_from_image(page_img)
56
- st.write("Image Text:")
57
- st.write(image_text)
 
 
58
 
59
  # Delete the temporary file
60
  os.remove(file_path)
 
1
  import PyPDF2
 
2
  import streamlit as st
3
  import pytesseract
4
+ from pdf2image import convert_from_path
5
  from PIL import Image
6
+ import tempfile
7
+ import os
8
 
9
 
10
  def extract_text_from_pdf(pdf_path):
 
46
 
47
  text = extract_text_from_pdf(file_path)
48
 
49
+ # st.header("Extracted Text:")
50
+ # st.write(text)
51
 
52
  pages = convert_from_path(file_path)
53
  st.header("Extracted Images:")
 
55
  st.subheader(f"Page {page_num + 1}")
56
  st.image(page_img, use_column_width=True)
57
  image_text = extract_text_from_image(page_img)
58
+
59
+ text += image_text
60
+ st.write("Text:")
61
+ st.write(text)
62
 
63
  # Delete the temporary file
64
  os.remove(file_path)