Spaces:
Sleeping
Sleeping
| import subprocess | |
| import sys | |
| import pdfplumber | |
| def ensure_spacy_model(name: str) -> None: | |
| """Download spacy model if not already installed.""" | |
| try: | |
| import spacy | |
| spacy.load(name) | |
| except OSError: | |
| subprocess.run([sys.executable, "-m", "spacy", "download", name], check=True) | |
| def extract_pdf_text(pdf_file) -> str: | |
| """Extract plain text from all pages of a PDF file.""" | |
| with pdfplumber.open(pdf_file.name) as pdf: | |
| pages = [page.extract_text() or "" for page in pdf.pages] | |
| return "\n".join(pages) | |