"""Generate synthetic UCP-student-shaped resume PDFs for Module 8 tests + demo.

Idempotent — re-running overwrites the same filenames. Output goes to
backend/tests/fixtures/resumes/.

What each fixture exercises (see fixtures/resumes/README.md for details):
  resume_ds_strong.pdf         — dense DS skills (Python/pandas/sklearn/...)
  resume_fe_junior.pdf         — junior FE profile (React/JS/HTML/CSS/Git)
  resume_fullstack_mixed.pdf   — composite Django+React+Postgres+Docker
  resume_minimal.pdf           — few literal skill names — forces SBERT/fuzzy
  resume_scanned.pdf           — image-only PDF → triggers ResumeParseError

Dev-only deps (requirements-dev.txt): reportlab.
"""
from __future__ import annotations

from pathlib import Path

from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
from reportlab.pdfgen import canvas


BASE_DIR = Path(__file__).resolve().parent.parent
FIXTURES_DIR = BASE_DIR / "tests" / "fixtures" / "resumes"


def _write_paragraphs(path: Path, paragraphs: list[str]) -> None:
    """Emit a single-column text-only PDF from the given paragraphs."""
    styles = getSampleStyleSheet()
    doc = SimpleDocTemplate(
        str(path), pagesize=LETTER,
        leftMargin=54, rightMargin=54, topMargin=54, bottomMargin=54,
    )
    story = []
    for i, text in enumerate(paragraphs):
        style = styles["Heading2"] if i == 0 else styles["BodyText"]
        story.append(Paragraph(text, style))
        story.append(Spacer(1, 8))
    doc.build(story)


def resume_ds_strong() -> list[str]:
    return [
        "Ayesha Khan — Data Science BS (UCP, Semester 7)",
        "<b>Summary:</b> Final-year Data Science student with 2 internships in "
        "machine learning and dashboarding. Experienced in Python, SQL, Pandas, "
        "NumPy, Scikit-learn, TensorFlow, and PyTorch.",
        "<b>Skills:</b> Python (advanced, 3 years), SQL (advanced), Pandas, "
        "NumPy, Matplotlib, Scikit-learn, TensorFlow, PyTorch, Tableau, "
        "Statistical Hypothesis Testing, Git, Jupyter.",
        "<b>Projects:</b> Built an end-to-end churn-prediction pipeline using "
        "scikit-learn and XGBoost; deployed a neural-network image classifier "
        "with PyTorch and served it via FastAPI.",
        "<b>Internship — Data Analyst @ TechCorp (2024):</b> Designed SQL "
        "reports and Tableau dashboards for marketing funnel analysis.",
        "<b>Education:</b> UCP Lahore, BS Data Science, CGPA 3.8/4.0. "
        "Relevant coursework: Machine Learning, Deep Learning, Statistics.",
    ]


def resume_fe_junior() -> list[str]:
    return [
        "Bilal Ahmed — Computer Science BS (UCP, Semester 5)",
        "<b>Summary:</b> Junior frontend developer with a focus on React, "
        "HTML, and CSS. Familiar with basic Git workflows and modern "
        "JavaScript (ES2020+).",
        "<b>Skills:</b> React, JavaScript, HTML, CSS, Git, Figma (beginner), "
        "Bootstrap, TailwindCSS, Node.js (basic).",
        "<b>Projects:</b> Personal portfolio site (React, Vite, Tailwind). "
        "Classroom attendance dashboard UI with React and Ant Design.",
        "<b>Education:</b> UCP Lahore, BS Computer Science, CGPA 3.2/4.0.",
    ]


def resume_fullstack_mixed() -> list[str]:
    return [
        "Zainab Ali — Software Engineering BS (UCP, Semester 7)",
        "<b>Summary:</b> Full-stack developer with production experience in "
        "Django and React. Built and deployed containerised web apps using "
        "Docker Compose and PostgreSQL.",
        "<b>Skills:</b> Python, Django, Django REST Framework, JavaScript, "
        "React, PostgreSQL, Redis, Docker, Git, GitHub, Linux, Nginx, "
        "RESTful APIs, HTML, CSS.",
        "<b>Projects:</b> UCP club management portal (Django + React + "
        "Postgres, deployed via Docker on a DigitalOcean droplet). "
        "CI/CD with GitHub Actions.",
        "<b>Education:</b> UCP Lahore, BS Software Engineering, CGPA 3.7/4.0.",
    ]


def resume_minimal() -> list[str]:
    # Uses paraphrased / near-names (data visualisation, cloud computing,
    # databases, version control) so the lexical layer largely misses and
    # SBERT/pgvector has to map the aliases to canonical catalog entries.
    return [
        "Usman Shah — BS Applied Mathematics (UCP, Semester 4)",
        "<b>Summary:</b> Math undergraduate interested in data-driven work. "
        "Some exposure to programming and statistical analysis.",
        "<b>Skills:</b> programming in scripting languages, data visualisation, "
        "spreadsheet modelling, basic cloud computing exposure, version "
        "control, databases, statistical analysis.",
        "<b>Coursework:</b> Probability, Linear Algebra, Numerical Methods.",
    ]


def generate_text_pdfs() -> None:
    FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
    jobs = {
        "resume_ds_strong.pdf": resume_ds_strong(),
        "resume_fe_junior.pdf": resume_fe_junior(),
        "resume_fullstack_mixed.pdf": resume_fullstack_mixed(),
        "resume_minimal.pdf": resume_minimal(),
    }
    for filename, paragraphs in jobs.items():
        out = FIXTURES_DIR / filename
        _write_paragraphs(out, paragraphs)
        print(f"  wrote {filename}  ({out.stat().st_size // 1024} KB)")


def generate_scanned_pdf() -> None:
    """Emit a PDF with *no* extractable text — just vector shapes.

    Exercises the ResumeParseError('No text could be extracted') branch in
    resume_parser._extract_text. A real scanned CV would be a rasterised
    image; reportlab can't easily make one without Pillow, but an empty-text
    PDF lands in the same code path because extract_text() returns ''.
    """
    out = FIXTURES_DIR / "resume_scanned.pdf"
    c = canvas.Canvas(str(out), pagesize=LETTER)
    # Draw a gray rectangle where "text" would be — no drawString calls.
    c.setFillColorRGB(0.85, 0.85, 0.85)
    c.rect(54, 600, 500, 120, fill=1, stroke=0)
    c.rect(54, 420, 500, 150, fill=1, stroke=0)
    c.showPage()
    c.save()
    print(f"  wrote resume_scanned.pdf  ({out.stat().st_size // 1024} KB)")


def main() -> None:
    print(f"Generating resume fixtures into {FIXTURES_DIR}")
    generate_text_pdfs()
    generate_scanned_pdf()
    print("Done.")


if __name__ == "__main__":
    main()