Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
import PyPDF2
|
| 2 |
-
from pdf2image import convert_from_path
|
| 3 |
import streamlit as st
|
| 4 |
import pytesseract
|
|
|
|
| 5 |
from PIL import Image
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def extract_text_from_pdf(pdf_path):
|
|
@@ -44,8 +46,8 @@ def main():
|
|
| 44 |
|
| 45 |
text = extract_text_from_pdf(file_path)
|
| 46 |
|
| 47 |
-
st.header("Extracted Text:")
|
| 48 |
-
st.write(text)
|
| 49 |
|
| 50 |
pages = convert_from_path(file_path)
|
| 51 |
st.header("Extracted Images:")
|
|
@@ -53,8 +55,10 @@ def main():
|
|
| 53 |
st.subheader(f"Page {page_num + 1}")
|
| 54 |
st.image(page_img, use_column_width=True)
|
| 55 |
image_text = extract_text_from_image(page_img)
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Delete the temporary file
|
| 60 |
os.remove(file_path)
|
|
|
|
| 1 |
import PyPDF2
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
import pytesseract
|
| 4 |
+
from pdf2image import convert_from_path
|
| 5 |
from PIL import Image
|
| 6 |
+
import tempfile
|
| 7 |
+
import os
|
| 8 |
|
| 9 |
|
| 10 |
def extract_text_from_pdf(pdf_path):
|
|
|
|
| 46 |
|
| 47 |
text = extract_text_from_pdf(file_path)
|
| 48 |
|
| 49 |
+
# st.header("Extracted Text:")
|
| 50 |
+
# st.write(text)
|
| 51 |
|
| 52 |
pages = convert_from_path(file_path)
|
| 53 |
st.header("Extracted Images:")
|
|
|
|
| 55 |
st.subheader(f"Page {page_num + 1}")
|
| 56 |
st.image(page_img, use_column_width=True)
|
| 57 |
image_text = extract_text_from_image(page_img)
|
| 58 |
+
|
| 59 |
+
text += image_text
|
| 60 |
+
st.write("Text:")
|
| 61 |
+
st.write(text)
|
| 62 |
|
| 63 |
# Delete the temporary file
|
| 64 |
os.remove(file_path)
|