Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -16,12 +16,13 @@ def extract_text(file_path):
|
|
| 16 |
st.write(f"Page {page_number + 1}")
|
| 17 |
page = pdf_reader.pages[page_number]
|
| 18 |
text = page.extract_text()
|
| 19 |
-
st.write(text) # Display the extracted selectable text
|
| 20 |
|
| 21 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
| 22 |
for i, image in enumerate(images):
|
| 23 |
st.write(f"Page {i + 1}")
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
st.write(text) # Display the extracted text from the image
|
| 26 |
|
| 27 |
def main():
|
|
|
|
| 16 |
st.write(f"Page {page_number + 1}")
|
| 17 |
page = pdf_reader.pages[page_number]
|
| 18 |
text = page.extract_text()
|
|
|
|
| 19 |
|
| 20 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
| 21 |
for i, image in enumerate(images):
|
| 22 |
st.write(f"Page {i + 1}")
|
| 23 |
+
image_text = pytesseract.image_to_string(image)
|
| 24 |
+
|
| 25 |
+
text += image_text
|
| 26 |
st.write(text) # Display the extracted text from the image
|
| 27 |
|
| 28 |
def main():
|