Spaces:
Sleeping
Sleeping
File size: 410 Bytes
a50fdc7 |
1 2 3 4 5 6 7 8 9 10 11 12 |
from pdf2image import convert_from_path
import pytesseract
import tempfile
def extract_text_from_pdf(pdf_path):
with tempfile.TemporaryDirectory() as tempdir:
images = convert_from_path(pdf_path, dpi=300, output_folder=tempdir)
all_text = []
for img in images:
text = pytesseract.image_to_string(img)
all_text.append(text)
return "\n".join(all_text) |