Spaces:
Sleeping
Sleeping
| from pdf2image import convert_from_path | |
| import pytesseract | |
| import tempfile | |
| def extract_text_from_pdf(pdf_path): | |
| with tempfile.TemporaryDirectory() as tempdir: | |
| images = convert_from_path(pdf_path, dpi=300, output_folder=tempdir) | |
| all_text = [] | |
| for img in images: | |
| text = pytesseract.image_to_string(img) | |
| all_text.append(text) | |
| return "\n".join(all_text) |