Spaces:
Sleeping
Sleeping
| import fitz # PyMuPDF | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| def extract_text_from_pdf(filepath: str) -> str: | |
| """Extract all text from a PDF file.""" | |
| text = "" | |
| try: | |
| with fitz.open(filepath) as doc: | |
| for page in doc: | |
| text += page.get_text() | |
| except Exception as e: | |
| logger.error(f"PDF extraction error for {filepath}: {e}") | |
| raise ValueError(f"Could not extract text from PDF: {e}") | |
| return text.strip() | |