Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import pytesseract
|
|
| 4 |
import PyPDF2
|
| 5 |
from pdf2image import convert_from_path
|
| 6 |
from PIL import Image
|
| 7 |
-
|
| 8 |
|
| 9 |
|
| 10 |
def extract_text(file_path):
|
|
@@ -18,11 +18,10 @@ def extract_text(file_path):
|
|
| 18 |
text = page.extract_text()
|
| 19 |
st.write(text) # Display the extracted selectable text
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
for i, page in enumerate(images.pages):
|
| 24 |
st.write(f"Page {i + 1}")
|
| 25 |
-
text =
|
| 26 |
st.write(text) # Display the extracted text from the image
|
| 27 |
|
| 28 |
def main():
|
|
|
|
| 4 |
import PyPDF2
|
| 5 |
from pdf2image import convert_from_path
|
| 6 |
from PIL import Image
|
| 7 |
+
|
| 8 |
|
| 9 |
|
| 10 |
def extract_text(file_path):
|
|
|
|
| 18 |
text = page.extract_text()
|
| 19 |
st.write(text) # Display the extracted selectable text
|
| 20 |
|
| 21 |
+
images = convert_from_path(file_path) # Convert PDF pages to images
|
| 22 |
+
for i, image in enumerate(images):
|
|
|
|
| 23 |
st.write(f"Page {i + 1}")
|
| 24 |
+
text = pytesseract.image_to_string(image)
|
| 25 |
st.write(text) # Display the extracted text from the image
|
| 26 |
|
| 27 |
def main():
|