"""Additional resume fixtures for NER-chain evaluation. Extends the original 4 fixtures in generate_resume_fixtures.py with 6 more personas covering Backend, Data Engineer, AI Engineer (GenAI), DevOps/SRE, BI Analyst, and ML Engineer — mapped against the 71-skill seeded catalog so ground-truth canonical names line up 1:1 with Skill.skill_name. Run: python backend/scripts/generate_resume_fixtures_eval.py """ from __future__ import annotations from pathlib import Path from reportlab.lib.pagesizes import LETTER from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer BASE_DIR = Path(__file__).resolve().parent.parent FIXTURES_DIR = BASE_DIR / "tests" / "fixtures" / "resumes" def _write_paragraphs(path: Path, paragraphs: list[str]) -> None: styles = getSampleStyleSheet() doc = SimpleDocTemplate( str(path), pagesize=LETTER, leftMargin=54, rightMargin=54, topMargin=54, bottomMargin=54, ) story = [] for i, text in enumerate(paragraphs): style = styles["Heading2"] if i == 0 else styles["BodyText"] story.append(Paragraph(text, style)) story.append(Spacer(1, 8)) doc.build(story) def resume_backend_dev() -> list[str]: return [ "Hassan Raza - Software Engineering BS (UCP, Semester 7)", "Summary: Backend developer focused on Python and Django. " "Built production REST APIs for a logistics dashboard and a " "student course-registration portal. Comfortable with PostgreSQL " "schema design, Docker-based deployments, and Linux administration.", "Skills: Python (advanced), Django, Django REST Framework, " "PostgreSQL, Docker, Git, GitHub, Linux, REST API Design, JWT / OAuth, " "Bash Scripting.", "Projects: Internal billing API (Django + PostgreSQL, " "deployed with Docker Compose). Multi-tenant SaaS auth service " "using JWT token rotation. CI pipeline with GitHub Actions.", "Internship - Backend Engineer Intern @ Systems Ltd (2025): " "Optimised slow Django ORM queries, reduced p99 latency by 40%.", "Education: UCP Lahore, BS Software Engineering, CGPA 3.6/4.0.", ] def resume_data_engineer() -> list[str]: return [ "Sara Malik - BS Data Science (UCP, Semester 8)", "Summary: Data engineer with experience building batch and " "near-real-time pipelines. Lead author of an Airflow-orchestrated " "ETL stack that backs weekly marketing attribution reports.", "Skills: Python, SQL, Apache Spark, Apache Kafka, " "Apache Airflow, Snowflake, dbt, ETL / ELT, Docker, Git, " "Google BigQuery, PostgreSQL.", "Projects: Ingestion pipeline pulling click-stream events " "from Kafka into Snowflake via Spark Structured Streaming. " "dbt-based warehouse modelling with star-schema marts. Data " "quality tests with Great Expectations.", "Internship - Data Engineer @ Careem (2025): Authored 12 " "dbt models serving the rider-acquisition funnel.", "Education: UCP Lahore, BS Data Science, CGPA 3.85/4.0.", ] def resume_ai_engineer_genai() -> list[str]: return [ "Ahmad Tariq - BS Artificial Intelligence (UCP, Semester 7)", "Summary: AI engineer focused on LLM applications. " "Built retrieval-augmented chatbots using OpenAI and Anthropic " "models on internal corporate knowledge bases. Strong Python " "background, familiar with vector-DB retrieval patterns.", "Skills: Python, LangChain, LangGraph, OpenAI API, " "Anthropic Claude API, RAG, HuggingFace Transformers, " "Prompt Engineering, Sentence Transformers, pgvector, " "FastAPI, Git.", "Projects: Legal-document Q&A assistant using LangChain, " "OpenAI embeddings, and pgvector retrieval. Agentic research " "tool with LangGraph routing between Claude and GPT-4 per task.", "Research: Undergraduate thesis on prompt-ensembling for " "structured extraction tasks. Submitted workshop paper to a " "local NLP meetup.", "Education: UCP Lahore, BS Artificial Intelligence, " "CGPA 3.92/4.0.", ] def resume_devops_sre() -> list[str]: return [ "Umer Farooq - BS Computer Science (UCP, Semester 8)", "Summary: DevOps/SRE-track engineer. Manages production " "Kubernetes clusters for a fintech platform. Deep experience in " "infrastructure-as-code, container orchestration, and observability.", "Skills: Docker, Kubernetes, Ansible, Terraform, Prometheus, " "Grafana, CI/CD Pipelines, Jenkins, Linux, Bash Scripting, " "AWS, Git, GitHub Actions.", "Projects: Migrated a 30-service monolith from plain EC2 " "to EKS with Helm charts. Set up Prometheus + Grafana dashboards " "covering request-rate, error-rate, and saturation across services. " "Ansible-driven bootstrap for new clusters.", "Internship - DevOps Engineer @ Tier5 (2025): On-call rotation, " "authored 14 runbooks, reduced MTTR by 2x.", "Education: UCP Lahore, BS Computer Science, CGPA 3.7/4.0.", ] def resume_bi_analyst() -> list[str]: return [ "Mehwish Anwar - BS Data Science (UCP, Semester 6)", "Summary: BI analyst intern with experience in Power BI " "dashboards and SQL-based reporting. Comfortable with star-schema " "dimensional modelling and DAX measures.", "Skills: SQL, Microsoft Excel, Microsoft Power BI, Tableau, " "DAX, Power Query / M Language, Data Modeling, Data Storytelling, " "Star Schema Modeling, PostgreSQL.", "Projects: Executive-level sales dashboard on Power BI " "backed by a PostgreSQL star schema. Monthly churn-cohort Tableau " "workbook for a telecom case competition.", "Internship - BI Analyst @ Telenor (2025): Wrote 30+ SQL " "reports for the customer-experience team; rewrote 4 DAX measures " "that halved dashboard load time.", "Education: UCP Lahore, BS Data Science, CGPA 3.6/4.0.", ] def resume_ml_engineer() -> list[str]: return [ "Taimoor Abbas - BS Data Science (UCP, Semester 8)", "Summary: Machine learning engineer with strong deep-learning " "foundations. Shipped a computer-vision defect-detection model to " "production on a factory-floor edge device. Experienced in " "experiment-tracking and model-serving pipelines.", "Skills: Python, PyTorch, TensorFlow, Scikit-learn, MLflow, " "Docker, Feature Engineering, HuggingFace Transformers, NumPy, " "Pandas, Matplotlib, Git, Statistical Hypothesis Testing.", "Projects: PyTorch-based ResNet defect-classifier with " "MLflow run tracking. Dockerised model server with FastAPI. " "Sklearn baseline pipelines for a churn-prediction kaggle comp.", "Internship - ML Engineer Intern @ Arbisoft (2025): Built " "a feature store prototype; documented 18 features with lineage " "and freshness SLAs.", "Education: UCP Lahore, BS Data Science, CGPA 3.8/4.0.", ] def main() -> None: FIXTURES_DIR.mkdir(parents=True, exist_ok=True) jobs = { "resume_backend_dev.pdf": resume_backend_dev(), "resume_data_engineer.pdf": resume_data_engineer(), "resume_ai_engineer_genai.pdf": resume_ai_engineer_genai(), "resume_devops_sre.pdf": resume_devops_sre(), "resume_bi_analyst.pdf": resume_bi_analyst(), "resume_ml_engineer.pdf": resume_ml_engineer(), } print(f"Generating eval fixtures into {FIXTURES_DIR}") for filename, paragraphs in jobs.items(): out = FIXTURES_DIR / filename _write_paragraphs(out, paragraphs) print(f" wrote {filename} ({out.stat().st_size // 1024} KB)") print("Done.") if __name__ == "__main__": main()