Spaces:
Sleeping
Sleeping
| """Additional resume fixtures for NER-chain evaluation. | |
| Extends the original 4 fixtures in generate_resume_fixtures.py with 6 more | |
| personas covering Backend, Data Engineer, AI Engineer (GenAI), DevOps/SRE, | |
| BI Analyst, and ML Engineer — mapped against the 71-skill seeded catalog so | |
| ground-truth canonical names line up 1:1 with Skill.skill_name. | |
| Run: python backend/scripts/generate_resume_fixtures_eval.py | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| from reportlab.lib.pagesizes import LETTER | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer | |
| BASE_DIR = Path(__file__).resolve().parent.parent | |
| FIXTURES_DIR = BASE_DIR / "tests" / "fixtures" / "resumes" | |
| def _write_paragraphs(path: Path, paragraphs: list[str]) -> None: | |
| styles = getSampleStyleSheet() | |
| doc = SimpleDocTemplate( | |
| str(path), pagesize=LETTER, | |
| leftMargin=54, rightMargin=54, topMargin=54, bottomMargin=54, | |
| ) | |
| story = [] | |
| for i, text in enumerate(paragraphs): | |
| style = styles["Heading2"] if i == 0 else styles["BodyText"] | |
| story.append(Paragraph(text, style)) | |
| story.append(Spacer(1, 8)) | |
| doc.build(story) | |
| def resume_backend_dev() -> list[str]: | |
| return [ | |
| "Hassan Raza - Software Engineering BS (UCP, Semester 7)", | |
| "<b>Summary:</b> Backend developer focused on Python and Django. " | |
| "Built production REST APIs for a logistics dashboard and a " | |
| "student course-registration portal. Comfortable with PostgreSQL " | |
| "schema design, Docker-based deployments, and Linux administration.", | |
| "<b>Skills:</b> Python (advanced), Django, Django REST Framework, " | |
| "PostgreSQL, Docker, Git, GitHub, Linux, REST API Design, JWT / OAuth, " | |
| "Bash Scripting.", | |
| "<b>Projects:</b> Internal billing API (Django + PostgreSQL, " | |
| "deployed with Docker Compose). Multi-tenant SaaS auth service " | |
| "using JWT token rotation. CI pipeline with GitHub Actions.", | |
| "<b>Internship - Backend Engineer Intern @ Systems Ltd (2025):</b> " | |
| "Optimised slow Django ORM queries, reduced p99 latency by 40%.", | |
| "<b>Education:</b> UCP Lahore, BS Software Engineering, CGPA 3.6/4.0.", | |
| ] | |
| def resume_data_engineer() -> list[str]: | |
| return [ | |
| "Sara Malik - BS Data Science (UCP, Semester 8)", | |
| "<b>Summary:</b> Data engineer with experience building batch and " | |
| "near-real-time pipelines. Lead author of an Airflow-orchestrated " | |
| "ETL stack that backs weekly marketing attribution reports.", | |
| "<b>Skills:</b> Python, SQL, Apache Spark, Apache Kafka, " | |
| "Apache Airflow, Snowflake, dbt, ETL / ELT, Docker, Git, " | |
| "Google BigQuery, PostgreSQL.", | |
| "<b>Projects:</b> Ingestion pipeline pulling click-stream events " | |
| "from Kafka into Snowflake via Spark Structured Streaming. " | |
| "dbt-based warehouse modelling with star-schema marts. Data " | |
| "quality tests with Great Expectations.", | |
| "<b>Internship - Data Engineer @ Careem (2025):</b> Authored 12 " | |
| "dbt models serving the rider-acquisition funnel.", | |
| "<b>Education:</b> UCP Lahore, BS Data Science, CGPA 3.85/4.0.", | |
| ] | |
| def resume_ai_engineer_genai() -> list[str]: | |
| return [ | |
| "Ahmad Tariq - BS Artificial Intelligence (UCP, Semester 7)", | |
| "<b>Summary:</b> AI engineer focused on LLM applications. " | |
| "Built retrieval-augmented chatbots using OpenAI and Anthropic " | |
| "models on internal corporate knowledge bases. Strong Python " | |
| "background, familiar with vector-DB retrieval patterns.", | |
| "<b>Skills:</b> Python, LangChain, LangGraph, OpenAI API, " | |
| "Anthropic Claude API, RAG, HuggingFace Transformers, " | |
| "Prompt Engineering, Sentence Transformers, pgvector, " | |
| "FastAPI, Git.", | |
| "<b>Projects:</b> Legal-document Q&A assistant using LangChain, " | |
| "OpenAI embeddings, and pgvector retrieval. Agentic research " | |
| "tool with LangGraph routing between Claude and GPT-4 per task.", | |
| "<b>Research:</b> Undergraduate thesis on prompt-ensembling for " | |
| "structured extraction tasks. Submitted workshop paper to a " | |
| "local NLP meetup.", | |
| "<b>Education:</b> UCP Lahore, BS Artificial Intelligence, " | |
| "CGPA 3.92/4.0.", | |
| ] | |
| def resume_devops_sre() -> list[str]: | |
| return [ | |
| "Umer Farooq - BS Computer Science (UCP, Semester 8)", | |
| "<b>Summary:</b> DevOps/SRE-track engineer. Manages production " | |
| "Kubernetes clusters for a fintech platform. Deep experience in " | |
| "infrastructure-as-code, container orchestration, and observability.", | |
| "<b>Skills:</b> Docker, Kubernetes, Ansible, Terraform, Prometheus, " | |
| "Grafana, CI/CD Pipelines, Jenkins, Linux, Bash Scripting, " | |
| "AWS, Git, GitHub Actions.", | |
| "<b>Projects:</b> Migrated a 30-service monolith from plain EC2 " | |
| "to EKS with Helm charts. Set up Prometheus + Grafana dashboards " | |
| "covering request-rate, error-rate, and saturation across services. " | |
| "Ansible-driven bootstrap for new clusters.", | |
| "<b>Internship - DevOps Engineer @ Tier5 (2025):</b> On-call rotation, " | |
| "authored 14 runbooks, reduced MTTR by 2x.", | |
| "<b>Education:</b> UCP Lahore, BS Computer Science, CGPA 3.7/4.0.", | |
| ] | |
| def resume_bi_analyst() -> list[str]: | |
| return [ | |
| "Mehwish Anwar - BS Data Science (UCP, Semester 6)", | |
| "<b>Summary:</b> BI analyst intern with experience in Power BI " | |
| "dashboards and SQL-based reporting. Comfortable with star-schema " | |
| "dimensional modelling and DAX measures.", | |
| "<b>Skills:</b> SQL, Microsoft Excel, Microsoft Power BI, Tableau, " | |
| "DAX, Power Query / M Language, Data Modeling, Data Storytelling, " | |
| "Star Schema Modeling, PostgreSQL.", | |
| "<b>Projects:</b> Executive-level sales dashboard on Power BI " | |
| "backed by a PostgreSQL star schema. Monthly churn-cohort Tableau " | |
| "workbook for a telecom case competition.", | |
| "<b>Internship - BI Analyst @ Telenor (2025):</b> Wrote 30+ SQL " | |
| "reports for the customer-experience team; rewrote 4 DAX measures " | |
| "that halved dashboard load time.", | |
| "<b>Education:</b> UCP Lahore, BS Data Science, CGPA 3.6/4.0.", | |
| ] | |
| def resume_ml_engineer() -> list[str]: | |
| return [ | |
| "Taimoor Abbas - BS Data Science (UCP, Semester 8)", | |
| "<b>Summary:</b> Machine learning engineer with strong deep-learning " | |
| "foundations. Shipped a computer-vision defect-detection model to " | |
| "production on a factory-floor edge device. Experienced in " | |
| "experiment-tracking and model-serving pipelines.", | |
| "<b>Skills:</b> Python, PyTorch, TensorFlow, Scikit-learn, MLflow, " | |
| "Docker, Feature Engineering, HuggingFace Transformers, NumPy, " | |
| "Pandas, Matplotlib, Git, Statistical Hypothesis Testing.", | |
| "<b>Projects:</b> PyTorch-based ResNet defect-classifier with " | |
| "MLflow run tracking. Dockerised model server with FastAPI. " | |
| "Sklearn baseline pipelines for a churn-prediction kaggle comp.", | |
| "<b>Internship - ML Engineer Intern @ Arbisoft (2025):</b> Built " | |
| "a feature store prototype; documented 18 features with lineage " | |
| "and freshness SLAs.", | |
| "<b>Education:</b> UCP Lahore, BS Data Science, CGPA 3.8/4.0.", | |
| ] | |
| def main() -> None: | |
| FIXTURES_DIR.mkdir(parents=True, exist_ok=True) | |
| jobs = { | |
| "resume_backend_dev.pdf": resume_backend_dev(), | |
| "resume_data_engineer.pdf": resume_data_engineer(), | |
| "resume_ai_engineer_genai.pdf": resume_ai_engineer_genai(), | |
| "resume_devops_sre.pdf": resume_devops_sre(), | |
| "resume_bi_analyst.pdf": resume_bi_analyst(), | |
| "resume_ml_engineer.pdf": resume_ml_engineer(), | |
| } | |
| print(f"Generating eval fixtures into {FIXTURES_DIR}") | |
| for filename, paragraphs in jobs.items(): | |
| out = FIXTURES_DIR / filename | |
| _write_paragraphs(out, paragraphs) | |
| print(f" wrote {filename} ({out.stat().st_size // 1024} KB)") | |
| print("Done.") | |
| if __name__ == "__main__": | |
| main() | |