Spaces:
Sleeping
Sleeping
File size: 486 Bytes
96df7b1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | import fitz # PyMuPDF
import logging
logger = logging.getLogger(__name__)
def extract_text_from_pdf(filepath: str) -> str:
"""Extract all text from a PDF file."""
text = ""
try:
with fitz.open(filepath) as doc:
for page in doc:
text += page.get_text()
except Exception as e:
logger.error(f"PDF extraction error for {filepath}: {e}")
raise ValueError(f"Could not extract text from PDF: {e}")
return text.strip()
|