import logging import re from datetime import date from pathlib import Path from pydantic import BaseModel from pydantic_ai import Agent from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.output import PromptedOutput from agents.modal_model import build_modal_model from models.config import AppSettings from models.cv import CVData from models.resume import HTMLResume logger = logging.getLogger(__name__) TEMPLATE_DIR = Path(__file__).resolve().parent.parent / "templates" class OptimizerResult(BaseModel): html: str changes: list[str] = [] def _load_resume_guide() -> str: guide_path = TEMPLATE_DIR / "resume_guide.md" if not guide_path.exists(): return "" return guide_path.read_text(encoding="utf-8") OPTIMIZER_BASE = r""" You are a resume optimization expert. Use the parsed resume data and create optimized HTML for a job posting. INPUT: Parsed candidate resume JSON and job posting text. OUTPUT: Generate HTML for the of a resume PDF. Do NOT include , , or tags - only the body content. CONTENT RULES: - When describing job experiences, show concrete results: focus on impact, not tasks. - Include specific technologies within achievement descriptions. - Feature keywords matching job requirements IF they exist in the original resume. You can add umbrella terms if relevant (e.g. if user was making transformer LLM models you can add "NLP"). - Prioritize and highlight experiences most relevant to the role. - If going over one page: remove unrelated content to save space. - Remove obvious skills (Excel, VS Code, Jupyter, GitHub, Jira) unless specifically required by job or very relevant to it. - Exclude: location, language proficiency, age, hobbies unless required by job posting. - Add a summary section highlighting the most relevant experiences. - Try to preserve the original writing style if possible. - Avoid leaving empty space at the bottom of the page if useful relevant content can fill it. - PROJECTS: Only include projects directly relevant to this job. Skip projects already listed under Publications. If no projects are relevant, omit the section. - PUBLICATIONS: Always use "PUBLICATIONS" as the section title when publications are present. - EDUCATION: By default include only the most recent / highest degree. Include multiple degrees only if both are relevant. {content_rules} CONTENT BUDGET: - Target: about 500 words and about 4000 characters. - The pipeline will validate length, structure, keyword coverage, hallucination risk, and renderability after you return. - If previous feedback is provided, make the smallest possible change to address that feedback. LINKS: - Preserve contact info from the original and never delete it. - Preserve URLs from the original resume: email, LinkedIn, GitHub, website, project links. - Use full URLs (include https://) in the href attribute of every tag. - Link display text must NOT start with https:// or http://. Show just the domain+path. PUBLICATIONS: - Always append the DOI in parentheses at the end if available, e.g. "Author et al., Title, Venue Year (DOI: 10.xxxx/xxxx)". TEMPLATE AND CSS: - Use the provided template guide and CSS classes exactly where possible. - Prefer semantic tags from the guide: header.header, h1.name, div.contact-line, section.section, h2.section-title, div.entry, ul.bullets, div.skills-list, ul.simple-list. - You MUST include a header with the candidate name and available contact links. - Do not emit Markdown. - Do not emit wrapper tags. - The guide examples are FORMAT EXAMPLES ONLY. Never copy example facts from the guide, including fake GPA, Dean's List, dates, companies, emails, URLs, projects, certifications, or publication titles. - For education notes, include GPA, honors, coursework, or awards ONLY if they appear in the parsed resume JSON or original resume text. {resume_guide} """ OPTIMIZER_STRICT_RULES = """ ALLOWED: - You CAN add related technologies plausible from context (e.g. Python user likely knows pip, venv; React user likely knows npm, webpack). - General/umbrella terms inferable from context: "NLP" if they did text processing, "SQL" if they used databases. - Rephrasing metrics with same values: "1% - 10%" -> "1-10%", "$10k" -> "$10,000". - Reordering and emphasizing existing content. STRICT RULES - NEVER VIOLATE: - NEVER add specific named products or platforms absent from the original unless they are a direct, obvious companion to something explicitly present and there is no other way to improve fit. - NEVER fabricate job titles, companies, degrees, certifications, achievements, publications, patents, awards, or projects. - NEVER copy example facts from the template guide into the candidate resume. - NEVER invent metrics, numbers, and achievements not in original. - Do NOT drop critical work experience or achievements unless they decrease fit. - Never use the em dash symbol, the word "delve", or other common markers of LLM-generated text. - NEVER add