AiAnonymize / utils.py
TomassiniDigital's picture
Upload 6 files
632bc00 verified
import subprocess
import sys
import pdfplumber
def ensure_spacy_model(name: str) -> None:
"""Download spacy model if not already installed."""
try:
import spacy
spacy.load(name)
except OSError:
subprocess.run([sys.executable, "-m", "spacy", "download", name], check=True)
def extract_pdf_text(pdf_file) -> str:
"""Extract plain text from all pages of a PDF file."""
with pdfplumber.open(pdf_file.name) as pdf:
pages = [page.extract_text() or "" for page in pdf.pages]
return "\n".join(pages)