Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from typing import List, Optional | |
| import logging | |
| import re | |
| import textwrap | |
| from datetime import datetime | |
| from models.schemas import UserProfile, JobPosting, ResumeDraft | |
| from memory.store import memory_store | |
| from utils.text import extract_keywords_from_text, clamp_to_char_limit | |
| from utils.ats import ( | |
| format_resume_header, | |
| format_experience_section, | |
| format_skills_section, | |
| basic_resume_template, | |
| ensure_keywords, | |
| ACTION_VERBS, | |
| strengthen_action_verbs, | |
| ) | |
| from utils.consistency import allowed_keywords_from_profile, coverage_score, conciseness_score | |
| from utils.config import AgentConfig, LLMConfig | |
| from services.web_research import get_role_guidelines | |
| from services.llm import llm | |
| from utils.langextractor import distill_text | |
| try: | |
| from utils.langextractor_enhanced import extract_structured_info, extract_ats_keywords | |
| ENHANCED_EXTRACTION = True | |
| except ImportError: | |
| ENHANCED_EXTRACTION = False | |
| logger = logging.getLogger(__name__) | |
| def _clamp_words(text: str, max_words: int) -> str: | |
| if not text: | |
| return "" | |
| words = text.strip().split() | |
| if len(words) <= max_words: | |
| return text.strip() | |
| return " ".join(words[:max_words]).strip() | |
| def _extract_year(s: Optional[str]) -> Optional[int]: | |
| if not s: | |
| return None | |
| m = re.search(r"(19|20)\d{2}", s) | |
| return int(m.group(0)) if m else None | |
| def _uk_month_name(m: int) -> str: | |
| return ["", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][max(0, min(12, m))] | |
| def _uk_date_str(s: Optional[str]) -> Optional[str]: | |
| if not s: | |
| return None | |
| ss = s.strip() | |
| if ss.lower() == "present": | |
| return "Present" | |
| # YYYY-MM or YYYY/M or YYYY/MM | |
| m = re.match(r"^(\d{4})[-/](\d{1,2})$", ss) | |
| if m: | |
| y = int(m.group(1)); mo = int(m.group(2)) | |
| return f"{_uk_month_name(mo)} {y}" | |
| # MM/YYYY | |
| m = re.match(r"^(\d{1,2})/(\d{4})$", ss) | |
| if m: | |
| mo = int(m.group(1)); y = int(m.group(2)) | |
| return f"{_uk_month_name(mo)} {y}" | |
| # YYYY only | |
| m = re.match(r"^(\d{4})$", ss) | |
| if m: | |
| return m.group(1) | |
| return ss | |
| def _postprocess_bullets(text: str) -> str: | |
| if not text: | |
| return text | |
| lines = [] | |
| for line in text.splitlines(): | |
| newline = line | |
| if newline.lstrip().startswith("-"): | |
| # Remove first-person pronouns at bullet start | |
| newline = re.sub(r"^(\s*-\s*)(?:I|We|My)\s+", r"\1", newline, flags=re.IGNORECASE) | |
| # Remove trailing period | |
| newline = re.sub(r"\.(\s*)$", r"\1", newline) | |
| # Normalise percent and GBP | |
| newline = re.sub(r"\bper\s*cent\b", "%", newline, flags=re.IGNORECASE) | |
| newline = re.sub(r"\bpercent\b", "%", newline, flags=re.IGNORECASE) | |
| newline = newline.replace("GBP", "£") | |
| lines.append(newline) | |
| return "\n".join(lines) | |
| def _strip_personal_info(text: str) -> str: | |
| if not text: | |
| return text | |
| # Remove DOB lines and photo references | |
| text = re.sub(r"^.*\b(date of birth|dob)\b.*$", "", text, flags=re.IGNORECASE | re.MULTILINE) | |
| text = re.sub(r"^.*\b(photo|headshot)\b.*$", "", text, flags=re.IGNORECASE | re.MULTILINE) | |
| # Clean extra blank lines | |
| text = re.sub(r"\n{3,}", "\n\n", text) | |
| return text.strip() + "\n" | |
| class CVOwnerAgent: | |
| def __init__(self) -> None: | |
| self.name = "cv_owner" | |
| self.max_chars = AgentConfig.RESUME_MAX_CHARS | |
| def create_resume( | |
| self, | |
| profile: UserProfile, | |
| job: JobPosting, | |
| user_id: str = "default_user", | |
| user_chat: Optional[str] = None, | |
| seed_text: Optional[str] = None, | |
| agent2_notes: Optional[str] = None, | |
| layout_preset: Optional[str] = None, | |
| ) -> ResumeDraft: | |
| """Create an optimized resume for a specific job posting.""" | |
| jd_keywords: List[str] = extract_keywords_from_text( | |
| job.description or "", | |
| top_k=AgentConfig.JOB_KEYWORDS_COUNT | |
| ) | |
| allowed = allowed_keywords_from_profile(profile.skills, profile.experiences) | |
| # Format resume sections | |
| header = format_resume_header( | |
| full_name=profile.full_name, | |
| headline=profile.headline or job.title, | |
| email=profile.email, | |
| phone=profile.phone, | |
| location=profile.location, | |
| links=profile.links, | |
| ) | |
| # Sort experiences reverse-chronologically (Reed/Indeed best practice) | |
| def _date_key(s: Optional[str]) -> str: | |
| val = (s or "").strip() | |
| if not val or val.lower() == "present": | |
| return "9999-12-31" | |
| return val | |
| experiences_sorted = sorted( | |
| profile.experiences, | |
| key=lambda e: (_date_key(e.end_date), _date_key(e.start_date)), | |
| reverse=True, | |
| ) | |
| # Compute simple gap signal based on years between adjacent roles | |
| gap_years_flag = False | |
| for i in range(len(experiences_sorted) - 1): | |
| end_y = _extract_year(experiences_sorted[i].end_date or "Present") or 9999 | |
| start_next_y = _extract_year(experiences_sorted[i + 1].start_date) | |
| if start_next_y and end_y != 9999 and (start_next_y - end_y) >= 2: | |
| gap_years_flag = True | |
| break | |
| # Limit achievements depth: recent roles get more bullets, older roles compressed | |
| current_year = datetime.now().year | |
| experience_payload = [] | |
| for idx, e in enumerate(experiences_sorted): | |
| ach = e.achievements or [] | |
| # Compress if older than 15 years | |
| start_y = _extract_year(e.start_date or "") | |
| older = bool(start_y and (current_year - start_y > 15)) | |
| if idx < 2 and not older: | |
| limited = ach[:6] | |
| else: | |
| limited = [] if older else ach[:1] | |
| experience_payload.append({ | |
| "title": e.title, | |
| "company": e.company, | |
| "start_date": _uk_date_str(e.start_date) or e.start_date, | |
| "end_date": _uk_date_str(e.end_date) or ("Present" if (e.end_date or "").lower()=="present" else (e.end_date or "")), | |
| "achievements": limited, | |
| }) | |
| experience = format_experience_section(experience_payload) | |
| skills = format_skills_section(profile.skills) | |
| # Personal statement (Summary) refinement (~150 words), tailored to job | |
| summary_text = profile.summary or "" | |
| if summary_text: | |
| if llm.enabled: | |
| sys_ps = ( | |
| "You write CV personal statements (Summary) for UK job applications. Keep to ~150 words (100–180). " | |
| "Use active voice and clear, specific language; avoid clichés/buzzwords; no personal info. " | |
| "Structure: 1) who you are/pro background; 2) key skills + 1–2 quantified achievements relevant to the role; " | |
| "3) concise career goal aligned to the target role/company. Tailor to the job's keywords." | |
| ) | |
| usr_ps = ( | |
| f"Target role: {job.title} at {job.company}\n" | |
| f"Job keywords: {', '.join(jd_keywords[:15])}\n\n" | |
| f"Existing summary (edit and improve):\n{summary_text}" | |
| ) | |
| summary_text = llm.generate(sys_ps, usr_ps, max_tokens=220, agent="cv") | |
| summary_text = _clamp_words(summary_text, 180) | |
| # Ensure critical JD keywords appear in summary (top 3) | |
| try: | |
| needed = [] | |
| low = (summary_text or "").lower() | |
| for k in jd_keywords[:6]: | |
| if k and (k.lower() not in low) and len(needed) < 3: | |
| needed.append(k) | |
| if needed: | |
| summary_text = (summary_text or "").strip() + " " + ("Key strengths: " + ", ".join(needed) + ".") | |
| except Exception: | |
| pass | |
| else: | |
| # No summary provided: keep empty to avoid adding new sections implicitly | |
| summary_text = "" | |
| education_text = "\n".join( | |
| [f"{ed.degree or ''} {ed.field_of_study or ''} — {ed.school} ({ed.end_date or ''})" | |
| for ed in profile.education] | |
| ).strip() | |
| # Process seed text if provided | |
| base_text = seed_text.strip() if seed_text else None | |
| if base_text and len(base_text) > 2000: | |
| # Distill dense seed into key points to guide the draft | |
| bullets = distill_text(base_text, max_points=AgentConfig.DISTILL_MAX_POINTS) | |
| base_text = ("\n".join(f"- {b}" for b in bullets) + "\n\n") + base_text[:4000] | |
| # Compose initial draft by layout preset (ATS-friendly, single column) | |
| preset = (layout_preset or "").strip().lower() | |
| preset = { | |
| "traditional": "classic", | |
| "classic": "classic", | |
| "modern": "modern", | |
| "minimalist": "minimalist", | |
| "executive": "executive", | |
| }.get(preset, "") | |
| def sec_summary(s: str) -> str: | |
| return ("\nSummary\n" + textwrap.fill(s, width=100)) if s else "" | |
| def sec_skills(sk: str) -> str: | |
| return ("\n" + sk) if sk else "" | |
| def sec_experience(ex: str) -> str: | |
| return ("\n\nExperience\n" + ex) if ex else "" | |
| def sec_education(ed: str) -> str: | |
| return ("\n\nEducation\n" + ed) if ed else "" | |
| def sec_languages() -> str: | |
| langs = getattr(profile, "languages", []) or [] | |
| pairs = [] | |
| for it in langs[:8]: | |
| if isinstance(it, dict): | |
| name = it.get("language") or it.get("name") or "" | |
| lvl = it.get("level") or "" | |
| if name: | |
| pairs.append(f"{name}{' ('+lvl+')' if lvl else ''}") | |
| return ("\n\nLanguages\n- " + "\n- ".join(pairs)) if pairs else "" | |
| def sec_certs() -> str: | |
| certs = getattr(profile, "certifications", []) or [] | |
| lines = [] | |
| for c in certs[:6]: | |
| if isinstance(c, dict): | |
| name = c.get("name") or "" | |
| issuer = c.get("issuer") or "" | |
| year = c.get("year") or "" | |
| if name: | |
| parts = [name] | |
| if issuer: parts.append(issuer) | |
| if year: parts.append(str(year)) | |
| lines.append(" — ".join(parts)) | |
| return ("\n\nCertifications\n- " + "\n- ".join(lines)) if lines else "" | |
| def sec_projects() -> str: | |
| projs = getattr(profile, "projects", []) or [] | |
| lines = [] | |
| for p in projs[:4]: | |
| if isinstance(p, dict): | |
| title = p.get("title") or "" | |
| link = p.get("link") or "" | |
| impact = p.get("impact") or "" | |
| if title or impact: | |
| line = title | |
| if link: line += f" — {link}" | |
| if impact: line += f" — {impact}" | |
| lines.append(line) | |
| return ("\n\nSelected Projects\n- " + "\n- ".join(lines)) if lines else "" | |
| def sec_achievements() -> str: | |
| bul = [] | |
| for e in experiences_sorted[:2]: | |
| for a in (e.achievements or []): | |
| if a and len(bul) < 5: | |
| bul.append(a) | |
| return ("\n\nSelected Achievements\n- " + "\n- ".join(bul)) if bul else "" | |
| if base_text: | |
| draft = base_text | |
| elif preset == "classic": | |
| parts: List[str] = [header, sec_summary(summary_text), sec_skills(skills), sec_experience(experience), sec_education(education_text), sec_certs(), sec_languages()] | |
| draft = "".join(parts).strip() + "\n" | |
| elif preset == "modern": | |
| parts = [header, sec_summary(summary_text), sec_experience(experience), sec_skills(skills), sec_projects(), sec_certs(), sec_education(education_text)] | |
| draft = "".join(parts).strip() + "\n" | |
| elif preset == "minimalist": | |
| parts = [header, sec_summary(summary_text), sec_skills(skills), sec_experience(experience), sec_education(education_text)] | |
| draft = "".join(parts).strip() + "\n" | |
| elif preset == "executive": | |
| parts = [header, sec_summary(summary_text), sec_achievements(), sec_experience(experience), sec_skills(skills), sec_education(education_text), sec_certs()] | |
| draft = "".join(parts).strip() + "\n" | |
| else: | |
| # Default formatting | |
| draft = basic_resume_template( | |
| header=header, | |
| summary=(summary_text or None), | |
| skills=skills, | |
| experience=experience, | |
| education=education_text, | |
| ) | |
| # If profile.skill_proficiency exists, append a simple proficiency hint line under Skills (ATS-safe) | |
| try: | |
| if hasattr(profile, "links") and isinstance(profile.links, dict): | |
| pass | |
| # naive inject: if "Skills:" line exists, add a second line with proficiencies | |
| if getattr(profile, "skills", None) and getattr(profile, "links", None) is not None: | |
| prof_map = getattr(profile, "skill_proficiency", {}) or {} | |
| if prof_map: | |
| profs = ", ".join([f"{k}: {v}" for k, v in list(prof_map.items())[:8]]) | |
| if "\nSkills:" in draft: | |
| parts = draft.split("\nSkills:") | |
| draft = parts[0] + "\nSkills:" + parts[1].split("\n", 1)[0] + ("\n" + profs) + "\n" + (parts[1].split("\n", 1)[1] if "\n" in parts[1] else "") | |
| except Exception: | |
| pass | |
| guidance = get_role_guidelines(job.title, job.description) | |
| used_keywords: List[str] = [] | |
| # Optimization cycles | |
| for cycle in range(AgentConfig.OPTIMIZATION_CYCLES): | |
| draft, used_cycle = ensure_keywords( | |
| draft, | |
| jd_keywords, | |
| max_new=AgentConfig.MAX_NEW_KEYWORDS, | |
| allowed_keywords=allowed | |
| ) | |
| used_keywords = list({*used_keywords, *used_cycle}) | |
| if llm.enabled: | |
| system = ( | |
| "You refine resumes. Preserve factual accuracy. Keep ATS-friendly text-only formatting. " | |
| "Follow UK best practices (Indeed/Reed/StandOut/Novorésumé): keep concise (prefer 1 page; <= 2 pages for senior roles), use clear section headings. " | |
| "Present work experience in reverse chronological order, highlight recent quantified achievements, and keep older roles brief. " | |
| "Use bullet points for skimmability, maintain consistent spacing and layout, avoid irrelevant info. Do not add images/tables or unusual symbols. " | |
| "Tailor to the job's keywords. Prefer quantification where truthful (%, £, time, team size); never fabricate metrics. " | |
| "AVOID vague buzzwords (e.g., 'results-driven', 'team player', 'people person', 'perfectionist', 'multi-tasker'). Replace with specific, measurable achievements. " | |
| "Use active voice and strong action verbs (e.g., Achieved, Led, Implemented, Improved, Generated, Managed, Completed, Designed). " | |
| "Skills: when possible, separate Hard skills vs Soft skills (hard skills first, max ~10), then soft skills. Keep Education concise (highest/most recent first). " | |
| "Contact hygiene: prefer professional email; include relevant links (e.g., LinkedIn/portfolio) if provided; never include DOB or photos. " | |
| "If a 'Summary'/'Personal Statement' section exists, keep it ~150 words with the intro–skills/achievements–goal structure; do not add new sections. " | |
| "UK English, UK date style (MMM YYYY). Use present tense for the current role and past tense for previous roles. Remove first-person pronouns in bullets. " | |
| "Use digits for numbers (e.g., 7, 12%, £1,200). Include critical JD keywords verbatim inside bullets (not only in Skills). " | |
| f"Apply latest guidance: {guidance}." | |
| ) | |
| notes = (f"\nNotes from Agent 2: {agent2_notes}" if agent2_notes else "") | |
| custom = f"\nUser instructions: {user_chat}" if user_chat else "" | |
| user = ( | |
| f"Role: {job.title}. Company: {job.company}.\n" | |
| f"Job keywords: {', '.join(jd_keywords[:AgentConfig.RESUME_KEYWORDS_COUNT])}.\n" | |
| f"Allowed keywords (from user profile): {', '.join(sorted(list(allowed))[:40])}.\n" | |
| f"Rewrite the following resume content to strengthen alignment without inventing new skills.{custom}{notes}\n" | |
| f"Enforce reverse chronological experience ordering, bullet points, and consistent headings. Keep within {self.max_chars} characters.\n\n" | |
| f"Resume content:\n{draft}" | |
| ) | |
| draft = llm.generate(system, user, max_tokens=LLMConfig.RESUME_MAX_TOKENS, agent="cv") | |
| # Simple buzzword scrub per Reed guidance | |
| lower = draft.lower() | |
| for bad in [ | |
| "results-driven", "team player", "works well alone", "people person", | |
| "perfectionist", "multi-tasker", "multi tasker", "dynamic go-getter", | |
| ]: | |
| if bad in lower: | |
| # Replace phrase occurrences with an empty string; rely on achievements to convey value | |
| draft = draft.replace(bad, "") | |
| lower = draft.lower() | |
| # Strengthen weak bullet openers to action verbs (The Muse) | |
| draft = strengthen_action_verbs(draft) | |
| # ATS plain-text scrub: remove tabs and unusual symbols | |
| draft = draft.replace("\t", " ") | |
| # Pronoun/punctuation/currency/percent normalisation | |
| draft = _postprocess_bullets(draft) | |
| # Strip DOB/photo lines if present | |
| draft = _strip_personal_info(draft) | |
| cov = coverage_score(draft, jd_keywords) | |
| conc = conciseness_score(draft, self.max_chars) | |
| if conc < 1.0: | |
| draft = clamp_to_char_limit(draft, self.max_chars) | |
| # Signals for orchestrator/observability (StandOut CV + Novorésumé) | |
| bullet_lines = sum(1 for l in (draft or "").splitlines() if l.strip().startswith("-")) | |
| line_count = max(1, len((draft or "").splitlines())) | |
| bullet_density = round(bullet_lines / line_count, 3) | |
| quant_count = sum(1 for ch in (draft or "") if ch.isdigit()) + (draft or "").count('%') + (draft or "").count('£') | |
| email_ok = bool(re.match(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$", profile.email or "")) | |
| links_present = ("http://" in (draft or "").lower()) or ("https://" in (draft or "").lower()) or ("linkedin" in (draft or "").lower()) | |
| skills_split_hint = ("hard skills" in (draft or "").lower()) or ("soft skills" in (draft or "").lower()) | |
| languages_section = "\nlanguages" in (draft or "").lower() | |
| action_verb_count = sum(1 for v in ACTION_VERBS if v.lower() in (draft or "").lower()) | |
| approx_pages = round(max(1, len(draft or "")) / 2400.0, 2) | |
| approx_one_page = approx_pages <= 1.2 | |
| memory_store.save(user_id, self.name, { | |
| "job_id": job.id, | |
| "cycle": cycle + 1, | |
| "coverage": cov, | |
| "conciseness": conc, | |
| "keywords_used": used_keywords, | |
| "guidance": guidance[:500], | |
| "user_chat": (user_chat or "")[:500], | |
| "agent2_notes": (agent2_notes or "")[:500], | |
| "draft": draft, | |
| "signals": { | |
| "bullet_density": bullet_density, | |
| "quant_count": quant_count, | |
| "email_ok": email_ok, | |
| "gap_years_flag": gap_years_flag, | |
| "skills_split_hint": skills_split_hint, | |
| "languages_section": languages_section, | |
| "links_present": links_present, | |
| "action_verb_count": action_verb_count, | |
| "approx_pages": approx_pages, | |
| "approx_one_page": approx_one_page, | |
| }, | |
| }, job_id=job.id) | |
| logger.debug(f"Resume optimization cycle {cycle + 1}: coverage={cov:.2f}, conciseness={conc:.2f}") | |
| # Final cleanup | |
| draft = clamp_to_char_limit(draft, self.max_chars) | |
| memory_store.save(user_id, self.name, { | |
| "job_id": job.id, | |
| "final": True, | |
| "keywords_used": used_keywords, | |
| "draft": draft, | |
| }, job_id=job.id) | |
| logger.info(f"Resume created for job {job.id} with {len(used_keywords)} keywords") | |
| return ResumeDraft(job_id=job.id, text=draft, keywords_used=used_keywords) |