Spaces:
Sleeping
Sleeping
| """PDF text extraction using PyMuPDF.""" | |
| import fitz | |
| def extract(file_path: str) -> str: | |
| """Extract text from all pages of a PDF file.""" | |
| doc = fitz.open(file_path) | |
| pages = [] | |
| for page in doc: | |
| text = page.get_text() | |
| if text.strip(): | |
| pages.append(text) | |
| doc.close() | |
| return "\n\n".join(pages) | |