Noo88ear's picture
🚀 Initial deployment of Multi-Agent Job Application Assistant
7498f2c
from __future__ import annotations
from typing import List, Optional
import logging
import re
import textwrap
from datetime import datetime
from models.schemas import UserProfile, JobPosting, ResumeDraft
from memory.store import memory_store
from utils.text import extract_keywords_from_text, clamp_to_char_limit
from utils.ats import (
format_resume_header,
format_experience_section,
format_skills_section,
basic_resume_template,
ensure_keywords,
ACTION_VERBS,
strengthen_action_verbs,
)
from utils.consistency import allowed_keywords_from_profile, coverage_score, conciseness_score
from utils.config import AgentConfig, LLMConfig
from services.web_research import get_role_guidelines
from services.llm import llm
from utils.langextractor import distill_text
try:
from utils.langextractor_enhanced import extract_structured_info, extract_ats_keywords
ENHANCED_EXTRACTION = True
except ImportError:
ENHANCED_EXTRACTION = False
logger = logging.getLogger(__name__)
def _clamp_words(text: str, max_words: int) -> str:
if not text:
return ""
words = text.strip().split()
if len(words) <= max_words:
return text.strip()
return " ".join(words[:max_words]).strip()
def _extract_year(s: Optional[str]) -> Optional[int]:
if not s:
return None
m = re.search(r"(19|20)\d{2}", s)
return int(m.group(0)) if m else None
def _uk_month_name(m: int) -> str:
return ["", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][max(0, min(12, m))]
def _uk_date_str(s: Optional[str]) -> Optional[str]:
if not s:
return None
ss = s.strip()
if ss.lower() == "present":
return "Present"
# YYYY-MM or YYYY/M or YYYY/MM
m = re.match(r"^(\d{4})[-/](\d{1,2})$", ss)
if m:
y = int(m.group(1)); mo = int(m.group(2))
return f"{_uk_month_name(mo)} {y}"
# MM/YYYY
m = re.match(r"^(\d{1,2})/(\d{4})$", ss)
if m:
mo = int(m.group(1)); y = int(m.group(2))
return f"{_uk_month_name(mo)} {y}"
# YYYY only
m = re.match(r"^(\d{4})$", ss)
if m:
return m.group(1)
return ss
def _postprocess_bullets(text: str) -> str:
if not text:
return text
lines = []
for line in text.splitlines():
newline = line
if newline.lstrip().startswith("-"):
# Remove first-person pronouns at bullet start
newline = re.sub(r"^(\s*-\s*)(?:I|We|My)\s+", r"\1", newline, flags=re.IGNORECASE)
# Remove trailing period
newline = re.sub(r"\.(\s*)$", r"\1", newline)
# Normalise percent and GBP
newline = re.sub(r"\bper\s*cent\b", "%", newline, flags=re.IGNORECASE)
newline = re.sub(r"\bpercent\b", "%", newline, flags=re.IGNORECASE)
newline = newline.replace("GBP", "£")
lines.append(newline)
return "\n".join(lines)
def _strip_personal_info(text: str) -> str:
if not text:
return text
# Remove DOB lines and photo references
text = re.sub(r"^.*\b(date of birth|dob)\b.*$", "", text, flags=re.IGNORECASE | re.MULTILINE)
text = re.sub(r"^.*\b(photo|headshot)\b.*$", "", text, flags=re.IGNORECASE | re.MULTILINE)
# Clean extra blank lines
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip() + "\n"
class CVOwnerAgent:
def __init__(self) -> None:
self.name = "cv_owner"
self.max_chars = AgentConfig.RESUME_MAX_CHARS
def create_resume(
self,
profile: UserProfile,
job: JobPosting,
user_id: str = "default_user",
user_chat: Optional[str] = None,
seed_text: Optional[str] = None,
agent2_notes: Optional[str] = None,
layout_preset: Optional[str] = None,
) -> ResumeDraft:
"""Create an optimized resume for a specific job posting."""
jd_keywords: List[str] = extract_keywords_from_text(
job.description or "",
top_k=AgentConfig.JOB_KEYWORDS_COUNT
)
allowed = allowed_keywords_from_profile(profile.skills, profile.experiences)
# Format resume sections
header = format_resume_header(
full_name=profile.full_name,
headline=profile.headline or job.title,
email=profile.email,
phone=profile.phone,
location=profile.location,
links=profile.links,
)
# Sort experiences reverse-chronologically (Reed/Indeed best practice)
def _date_key(s: Optional[str]) -> str:
val = (s or "").strip()
if not val or val.lower() == "present":
return "9999-12-31"
return val
experiences_sorted = sorted(
profile.experiences,
key=lambda e: (_date_key(e.end_date), _date_key(e.start_date)),
reverse=True,
)
# Compute simple gap signal based on years between adjacent roles
gap_years_flag = False
for i in range(len(experiences_sorted) - 1):
end_y = _extract_year(experiences_sorted[i].end_date or "Present") or 9999
start_next_y = _extract_year(experiences_sorted[i + 1].start_date)
if start_next_y and end_y != 9999 and (start_next_y - end_y) >= 2:
gap_years_flag = True
break
# Limit achievements depth: recent roles get more bullets, older roles compressed
current_year = datetime.now().year
experience_payload = []
for idx, e in enumerate(experiences_sorted):
ach = e.achievements or []
# Compress if older than 15 years
start_y = _extract_year(e.start_date or "")
older = bool(start_y and (current_year - start_y > 15))
if idx < 2 and not older:
limited = ach[:6]
else:
limited = [] if older else ach[:1]
experience_payload.append({
"title": e.title,
"company": e.company,
"start_date": _uk_date_str(e.start_date) or e.start_date,
"end_date": _uk_date_str(e.end_date) or ("Present" if (e.end_date or "").lower()=="present" else (e.end_date or "")),
"achievements": limited,
})
experience = format_experience_section(experience_payload)
skills = format_skills_section(profile.skills)
# Personal statement (Summary) refinement (~150 words), tailored to job
summary_text = profile.summary or ""
if summary_text:
if llm.enabled:
sys_ps = (
"You write CV personal statements (Summary) for UK job applications. Keep to ~150 words (100–180). "
"Use active voice and clear, specific language; avoid clichés/buzzwords; no personal info. "
"Structure: 1) who you are/pro background; 2) key skills + 1–2 quantified achievements relevant to the role; "
"3) concise career goal aligned to the target role/company. Tailor to the job's keywords."
)
usr_ps = (
f"Target role: {job.title} at {job.company}\n"
f"Job keywords: {', '.join(jd_keywords[:15])}\n\n"
f"Existing summary (edit and improve):\n{summary_text}"
)
summary_text = llm.generate(sys_ps, usr_ps, max_tokens=220, agent="cv")
summary_text = _clamp_words(summary_text, 180)
# Ensure critical JD keywords appear in summary (top 3)
try:
needed = []
low = (summary_text or "").lower()
for k in jd_keywords[:6]:
if k and (k.lower() not in low) and len(needed) < 3:
needed.append(k)
if needed:
summary_text = (summary_text or "").strip() + " " + ("Key strengths: " + ", ".join(needed) + ".")
except Exception:
pass
else:
# No summary provided: keep empty to avoid adding new sections implicitly
summary_text = ""
education_text = "\n".join(
[f"{ed.degree or ''} {ed.field_of_study or ''}{ed.school} ({ed.end_date or ''})"
for ed in profile.education]
).strip()
# Process seed text if provided
base_text = seed_text.strip() if seed_text else None
if base_text and len(base_text) > 2000:
# Distill dense seed into key points to guide the draft
bullets = distill_text(base_text, max_points=AgentConfig.DISTILL_MAX_POINTS)
base_text = ("\n".join(f"- {b}" for b in bullets) + "\n\n") + base_text[:4000]
# Compose initial draft by layout preset (ATS-friendly, single column)
preset = (layout_preset or "").strip().lower()
preset = {
"traditional": "classic",
"classic": "classic",
"modern": "modern",
"minimalist": "minimalist",
"executive": "executive",
}.get(preset, "")
def sec_summary(s: str) -> str:
return ("\nSummary\n" + textwrap.fill(s, width=100)) if s else ""
def sec_skills(sk: str) -> str:
return ("\n" + sk) if sk else ""
def sec_experience(ex: str) -> str:
return ("\n\nExperience\n" + ex) if ex else ""
def sec_education(ed: str) -> str:
return ("\n\nEducation\n" + ed) if ed else ""
def sec_languages() -> str:
langs = getattr(profile, "languages", []) or []
pairs = []
for it in langs[:8]:
if isinstance(it, dict):
name = it.get("language") or it.get("name") or ""
lvl = it.get("level") or ""
if name:
pairs.append(f"{name}{' ('+lvl+')' if lvl else ''}")
return ("\n\nLanguages\n- " + "\n- ".join(pairs)) if pairs else ""
def sec_certs() -> str:
certs = getattr(profile, "certifications", []) or []
lines = []
for c in certs[:6]:
if isinstance(c, dict):
name = c.get("name") or ""
issuer = c.get("issuer") or ""
year = c.get("year") or ""
if name:
parts = [name]
if issuer: parts.append(issuer)
if year: parts.append(str(year))
lines.append(" — ".join(parts))
return ("\n\nCertifications\n- " + "\n- ".join(lines)) if lines else ""
def sec_projects() -> str:
projs = getattr(profile, "projects", []) or []
lines = []
for p in projs[:4]:
if isinstance(p, dict):
title = p.get("title") or ""
link = p.get("link") or ""
impact = p.get("impact") or ""
if title or impact:
line = title
if link: line += f" — {link}"
if impact: line += f" — {impact}"
lines.append(line)
return ("\n\nSelected Projects\n- " + "\n- ".join(lines)) if lines else ""
def sec_achievements() -> str:
bul = []
for e in experiences_sorted[:2]:
for a in (e.achievements or []):
if a and len(bul) < 5:
bul.append(a)
return ("\n\nSelected Achievements\n- " + "\n- ".join(bul)) if bul else ""
if base_text:
draft = base_text
elif preset == "classic":
parts: List[str] = [header, sec_summary(summary_text), sec_skills(skills), sec_experience(experience), sec_education(education_text), sec_certs(), sec_languages()]
draft = "".join(parts).strip() + "\n"
elif preset == "modern":
parts = [header, sec_summary(summary_text), sec_experience(experience), sec_skills(skills), sec_projects(), sec_certs(), sec_education(education_text)]
draft = "".join(parts).strip() + "\n"
elif preset == "minimalist":
parts = [header, sec_summary(summary_text), sec_skills(skills), sec_experience(experience), sec_education(education_text)]
draft = "".join(parts).strip() + "\n"
elif preset == "executive":
parts = [header, sec_summary(summary_text), sec_achievements(), sec_experience(experience), sec_skills(skills), sec_education(education_text), sec_certs()]
draft = "".join(parts).strip() + "\n"
else:
# Default formatting
draft = basic_resume_template(
header=header,
summary=(summary_text or None),
skills=skills,
experience=experience,
education=education_text,
)
# If profile.skill_proficiency exists, append a simple proficiency hint line under Skills (ATS-safe)
try:
if hasattr(profile, "links") and isinstance(profile.links, dict):
pass
# naive inject: if "Skills:" line exists, add a second line with proficiencies
if getattr(profile, "skills", None) and getattr(profile, "links", None) is not None:
prof_map = getattr(profile, "skill_proficiency", {}) or {}
if prof_map:
profs = ", ".join([f"{k}: {v}" for k, v in list(prof_map.items())[:8]])
if "\nSkills:" in draft:
parts = draft.split("\nSkills:")
draft = parts[0] + "\nSkills:" + parts[1].split("\n", 1)[0] + ("\n" + profs) + "\n" + (parts[1].split("\n", 1)[1] if "\n" in parts[1] else "")
except Exception:
pass
guidance = get_role_guidelines(job.title, job.description)
used_keywords: List[str] = []
# Optimization cycles
for cycle in range(AgentConfig.OPTIMIZATION_CYCLES):
draft, used_cycle = ensure_keywords(
draft,
jd_keywords,
max_new=AgentConfig.MAX_NEW_KEYWORDS,
allowed_keywords=allowed
)
used_keywords = list({*used_keywords, *used_cycle})
if llm.enabled:
system = (
"You refine resumes. Preserve factual accuracy. Keep ATS-friendly text-only formatting. "
"Follow UK best practices (Indeed/Reed/StandOut/Novorésumé): keep concise (prefer 1 page; <= 2 pages for senior roles), use clear section headings. "
"Present work experience in reverse chronological order, highlight recent quantified achievements, and keep older roles brief. "
"Use bullet points for skimmability, maintain consistent spacing and layout, avoid irrelevant info. Do not add images/tables or unusual symbols. "
"Tailor to the job's keywords. Prefer quantification where truthful (%, £, time, team size); never fabricate metrics. "
"AVOID vague buzzwords (e.g., 'results-driven', 'team player', 'people person', 'perfectionist', 'multi-tasker'). Replace with specific, measurable achievements. "
"Use active voice and strong action verbs (e.g., Achieved, Led, Implemented, Improved, Generated, Managed, Completed, Designed). "
"Skills: when possible, separate Hard skills vs Soft skills (hard skills first, max ~10), then soft skills. Keep Education concise (highest/most recent first). "
"Contact hygiene: prefer professional email; include relevant links (e.g., LinkedIn/portfolio) if provided; never include DOB or photos. "
"If a 'Summary'/'Personal Statement' section exists, keep it ~150 words with the intro–skills/achievements–goal structure; do not add new sections. "
"UK English, UK date style (MMM YYYY). Use present tense for the current role and past tense for previous roles. Remove first-person pronouns in bullets. "
"Use digits for numbers (e.g., 7, 12%, £1,200). Include critical JD keywords verbatim inside bullets (not only in Skills). "
f"Apply latest guidance: {guidance}."
)
notes = (f"\nNotes from Agent 2: {agent2_notes}" if agent2_notes else "")
custom = f"\nUser instructions: {user_chat}" if user_chat else ""
user = (
f"Role: {job.title}. Company: {job.company}.\n"
f"Job keywords: {', '.join(jd_keywords[:AgentConfig.RESUME_KEYWORDS_COUNT])}.\n"
f"Allowed keywords (from user profile): {', '.join(sorted(list(allowed))[:40])}.\n"
f"Rewrite the following resume content to strengthen alignment without inventing new skills.{custom}{notes}\n"
f"Enforce reverse chronological experience ordering, bullet points, and consistent headings. Keep within {self.max_chars} characters.\n\n"
f"Resume content:\n{draft}"
)
draft = llm.generate(system, user, max_tokens=LLMConfig.RESUME_MAX_TOKENS, agent="cv")
# Simple buzzword scrub per Reed guidance
lower = draft.lower()
for bad in [
"results-driven", "team player", "works well alone", "people person",
"perfectionist", "multi-tasker", "multi tasker", "dynamic go-getter",
]:
if bad in lower:
# Replace phrase occurrences with an empty string; rely on achievements to convey value
draft = draft.replace(bad, "")
lower = draft.lower()
# Strengthen weak bullet openers to action verbs (The Muse)
draft = strengthen_action_verbs(draft)
# ATS plain-text scrub: remove tabs and unusual symbols
draft = draft.replace("\t", " ")
# Pronoun/punctuation/currency/percent normalisation
draft = _postprocess_bullets(draft)
# Strip DOB/photo lines if present
draft = _strip_personal_info(draft)
cov = coverage_score(draft, jd_keywords)
conc = conciseness_score(draft, self.max_chars)
if conc < 1.0:
draft = clamp_to_char_limit(draft, self.max_chars)
# Signals for orchestrator/observability (StandOut CV + Novorésumé)
bullet_lines = sum(1 for l in (draft or "").splitlines() if l.strip().startswith("-"))
line_count = max(1, len((draft or "").splitlines()))
bullet_density = round(bullet_lines / line_count, 3)
quant_count = sum(1 for ch in (draft or "") if ch.isdigit()) + (draft or "").count('%') + (draft or "").count('£')
email_ok = bool(re.match(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$", profile.email or ""))
links_present = ("http://" in (draft or "").lower()) or ("https://" in (draft or "").lower()) or ("linkedin" in (draft or "").lower())
skills_split_hint = ("hard skills" in (draft or "").lower()) or ("soft skills" in (draft or "").lower())
languages_section = "\nlanguages" in (draft or "").lower()
action_verb_count = sum(1 for v in ACTION_VERBS if v.lower() in (draft or "").lower())
approx_pages = round(max(1, len(draft or "")) / 2400.0, 2)
approx_one_page = approx_pages <= 1.2
memory_store.save(user_id, self.name, {
"job_id": job.id,
"cycle": cycle + 1,
"coverage": cov,
"conciseness": conc,
"keywords_used": used_keywords,
"guidance": guidance[:500],
"user_chat": (user_chat or "")[:500],
"agent2_notes": (agent2_notes or "")[:500],
"draft": draft,
"signals": {
"bullet_density": bullet_density,
"quant_count": quant_count,
"email_ok": email_ok,
"gap_years_flag": gap_years_flag,
"skills_split_hint": skills_split_hint,
"languages_section": languages_section,
"links_present": links_present,
"action_verb_count": action_verb_count,
"approx_pages": approx_pages,
"approx_one_page": approx_one_page,
},
}, job_id=job.id)
logger.debug(f"Resume optimization cycle {cycle + 1}: coverage={cov:.2f}, conciseness={conc:.2f}")
# Final cleanup
draft = clamp_to_char_limit(draft, self.max_chars)
memory_store.save(user_id, self.name, {
"job_id": job.id,
"final": True,
"keywords_used": used_keywords,
"draft": draft,
}, job_id=job.id)
logger.info(f"Resume created for job {job.id} with {len(used_keywords)} keywords")
return ResumeDraft(job_id=job.id, text=draft, keywords_used=used_keywords)