Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -18,19 +18,19 @@ def extract_text(file_path):
|
|
| 18 |
for page_number in range(num_pages):
|
| 19 |
# st.write(f"Page {page_number + 1}")
|
| 20 |
page = pdf_reader.pages[page_number]
|
| 21 |
-
text = page.extract_text()
|
| 22 |
|
| 23 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
| 24 |
for i, image in enumerate(images):
|
| 25 |
# st.write(f"Page {i + 1}")
|
| 26 |
-
image_text = pytesseract.image_to_string(image)
|
| 27 |
|
| 28 |
st.write("text")
|
| 29 |
st.write(text)
|
| 30 |
|
| 31 |
st.write("image_text")
|
| 32 |
st.write(image_text)
|
| 33 |
-
|
| 34 |
text = text + image_text
|
| 35 |
st.write("plus")
|
| 36 |
st.write(text) # Display the extracted text from the image
|
|
|
|
| 18 |
for page_number in range(num_pages):
|
| 19 |
# st.write(f"Page {page_number + 1}")
|
| 20 |
page = pdf_reader.pages[page_number]
|
| 21 |
+
text += page.extract_text()
|
| 22 |
|
| 23 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
| 24 |
for i, image in enumerate(images):
|
| 25 |
# st.write(f"Page {i + 1}")
|
| 26 |
+
image_text += pytesseract.image_to_string(image)
|
| 27 |
|
| 28 |
st.write("text")
|
| 29 |
st.write(text)
|
| 30 |
|
| 31 |
st.write("image_text")
|
| 32 |
st.write(image_text)
|
| 33 |
+
|
| 34 |
text = text + image_text
|
| 35 |
st.write("plus")
|
| 36 |
st.write(text) # Display the extracted text from the image
|