"""Additional resume fixtures for NER-chain evaluation.
Extends the original 4 fixtures in generate_resume_fixtures.py with 6 more
personas covering Backend, Data Engineer, AI Engineer (GenAI), DevOps/SRE,
BI Analyst, and ML Engineer — mapped against the 71-skill seeded catalog so
ground-truth canonical names line up 1:1 with Skill.skill_name.
Run: python backend/scripts/generate_resume_fixtures_eval.py
"""
from __future__ import annotations
from pathlib import Path
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
BASE_DIR = Path(__file__).resolve().parent.parent
FIXTURES_DIR = BASE_DIR / "tests" / "fixtures" / "resumes"
def _write_paragraphs(path: Path, paragraphs: list[str]) -> None:
styles = getSampleStyleSheet()
doc = SimpleDocTemplate(
str(path), pagesize=LETTER,
leftMargin=54, rightMargin=54, topMargin=54, bottomMargin=54,
)
story = []
for i, text in enumerate(paragraphs):
style = styles["Heading2"] if i == 0 else styles["BodyText"]
story.append(Paragraph(text, style))
story.append(Spacer(1, 8))
doc.build(story)
def resume_backend_dev() -> list[str]:
return [
"Hassan Raza - Software Engineering BS (UCP, Semester 7)",
"Summary: Backend developer focused on Python and Django. "
"Built production REST APIs for a logistics dashboard and a "
"student course-registration portal. Comfortable with PostgreSQL "
"schema design, Docker-based deployments, and Linux administration.",
"Skills: Python (advanced), Django, Django REST Framework, "
"PostgreSQL, Docker, Git, GitHub, Linux, REST API Design, JWT / OAuth, "
"Bash Scripting.",
"Projects: Internal billing API (Django + PostgreSQL, "
"deployed with Docker Compose). Multi-tenant SaaS auth service "
"using JWT token rotation. CI pipeline with GitHub Actions.",
"Internship - Backend Engineer Intern @ Systems Ltd (2025): "
"Optimised slow Django ORM queries, reduced p99 latency by 40%.",
"Education: UCP Lahore, BS Software Engineering, CGPA 3.6/4.0.",
]
def resume_data_engineer() -> list[str]:
return [
"Sara Malik - BS Data Science (UCP, Semester 8)",
"Summary: Data engineer with experience building batch and "
"near-real-time pipelines. Lead author of an Airflow-orchestrated "
"ETL stack that backs weekly marketing attribution reports.",
"Skills: Python, SQL, Apache Spark, Apache Kafka, "
"Apache Airflow, Snowflake, dbt, ETL / ELT, Docker, Git, "
"Google BigQuery, PostgreSQL.",
"Projects: Ingestion pipeline pulling click-stream events "
"from Kafka into Snowflake via Spark Structured Streaming. "
"dbt-based warehouse modelling with star-schema marts. Data "
"quality tests with Great Expectations.",
"Internship - Data Engineer @ Careem (2025): Authored 12 "
"dbt models serving the rider-acquisition funnel.",
"Education: UCP Lahore, BS Data Science, CGPA 3.85/4.0.",
]
def resume_ai_engineer_genai() -> list[str]:
return [
"Ahmad Tariq - BS Artificial Intelligence (UCP, Semester 7)",
"Summary: AI engineer focused on LLM applications. "
"Built retrieval-augmented chatbots using OpenAI and Anthropic "
"models on internal corporate knowledge bases. Strong Python "
"background, familiar with vector-DB retrieval patterns.",
"Skills: Python, LangChain, LangGraph, OpenAI API, "
"Anthropic Claude API, RAG, HuggingFace Transformers, "
"Prompt Engineering, Sentence Transformers, pgvector, "
"FastAPI, Git.",
"Projects: Legal-document Q&A assistant using LangChain, "
"OpenAI embeddings, and pgvector retrieval. Agentic research "
"tool with LangGraph routing between Claude and GPT-4 per task.",
"Research: Undergraduate thesis on prompt-ensembling for "
"structured extraction tasks. Submitted workshop paper to a "
"local NLP meetup.",
"Education: UCP Lahore, BS Artificial Intelligence, "
"CGPA 3.92/4.0.",
]
def resume_devops_sre() -> list[str]:
return [
"Umer Farooq - BS Computer Science (UCP, Semester 8)",
"Summary: DevOps/SRE-track engineer. Manages production "
"Kubernetes clusters for a fintech platform. Deep experience in "
"infrastructure-as-code, container orchestration, and observability.",
"Skills: Docker, Kubernetes, Ansible, Terraform, Prometheus, "
"Grafana, CI/CD Pipelines, Jenkins, Linux, Bash Scripting, "
"AWS, Git, GitHub Actions.",
"Projects: Migrated a 30-service monolith from plain EC2 "
"to EKS with Helm charts. Set up Prometheus + Grafana dashboards "
"covering request-rate, error-rate, and saturation across services. "
"Ansible-driven bootstrap for new clusters.",
"Internship - DevOps Engineer @ Tier5 (2025): On-call rotation, "
"authored 14 runbooks, reduced MTTR by 2x.",
"Education: UCP Lahore, BS Computer Science, CGPA 3.7/4.0.",
]
def resume_bi_analyst() -> list[str]:
return [
"Mehwish Anwar - BS Data Science (UCP, Semester 6)",
"Summary: BI analyst intern with experience in Power BI "
"dashboards and SQL-based reporting. Comfortable with star-schema "
"dimensional modelling and DAX measures.",
"Skills: SQL, Microsoft Excel, Microsoft Power BI, Tableau, "
"DAX, Power Query / M Language, Data Modeling, Data Storytelling, "
"Star Schema Modeling, PostgreSQL.",
"Projects: Executive-level sales dashboard on Power BI "
"backed by a PostgreSQL star schema. Monthly churn-cohort Tableau "
"workbook for a telecom case competition.",
"Internship - BI Analyst @ Telenor (2025): Wrote 30+ SQL "
"reports for the customer-experience team; rewrote 4 DAX measures "
"that halved dashboard load time.",
"Education: UCP Lahore, BS Data Science, CGPA 3.6/4.0.",
]
def resume_ml_engineer() -> list[str]:
return [
"Taimoor Abbas - BS Data Science (UCP, Semester 8)",
"Summary: Machine learning engineer with strong deep-learning "
"foundations. Shipped a computer-vision defect-detection model to "
"production on a factory-floor edge device. Experienced in "
"experiment-tracking and model-serving pipelines.",
"Skills: Python, PyTorch, TensorFlow, Scikit-learn, MLflow, "
"Docker, Feature Engineering, HuggingFace Transformers, NumPy, "
"Pandas, Matplotlib, Git, Statistical Hypothesis Testing.",
"Projects: PyTorch-based ResNet defect-classifier with "
"MLflow run tracking. Dockerised model server with FastAPI. "
"Sklearn baseline pipelines for a churn-prediction kaggle comp.",
"Internship - ML Engineer Intern @ Arbisoft (2025): Built "
"a feature store prototype; documented 18 features with lineage "
"and freshness SLAs.",
"Education: UCP Lahore, BS Data Science, CGPA 3.8/4.0.",
]
def main() -> None:
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
jobs = {
"resume_backend_dev.pdf": resume_backend_dev(),
"resume_data_engineer.pdf": resume_data_engineer(),
"resume_ai_engineer_genai.pdf": resume_ai_engineer_genai(),
"resume_devops_sre.pdf": resume_devops_sre(),
"resume_bi_analyst.pdf": resume_bi_analyst(),
"resume_ml_engineer.pdf": resume_ml_engineer(),
}
print(f"Generating eval fixtures into {FIXTURES_DIR}")
for filename, paragraphs in jobs.items():
out = FIXTURES_DIR / filename
_write_paragraphs(out, paragraphs)
print(f" wrote {filename} ({out.stat().st_size // 1024} KB)")
print("Done.")
if __name__ == "__main__":
main()