kxx-kkk commited on
Commit
9108bfe
·
verified ·
1 Parent(s): 9cd6a25

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -4,7 +4,7 @@ import pytesseract
4
  import PyPDF2
5
  from pdf2image import convert_from_path
6
  from PIL import Image
7
- import doctr
8
 
9
 
10
  def extract_text(file_path):
@@ -18,11 +18,10 @@ def extract_text(file_path):
18
  text = page.extract_text()
19
  st.write(text) # Display the extracted selectable text
20
 
21
- model = doctr.models.OCR()
22
- images = model(file_path) # Convert PDF pages to images
23
- for i, page in enumerate(images.pages):
24
  st.write(f"Page {i + 1}")
25
- text = page.content
26
  st.write(text) # Display the extracted text from the image
27
 
28
  def main():
 
4
  import PyPDF2
5
  from pdf2image import convert_from_path
6
  from PIL import Image
7
+
8
 
9
 
10
  def extract_text(file_path):
 
18
  text = page.extract_text()
19
  st.write(text) # Display the extracted selectable text
20
 
21
+ images = convert_from_path(file_path) # Convert PDF pages to images
22
+ for i, image in enumerate(images):
 
23
  st.write(f"Page {i + 1}")
24
+ text = pytesseract.image_to_string(image)
25
  st.write(text) # Display the extracted text from the image
26
 
27
  def main():