Spaces:

Noo88ear
/

Job-Application-Assistant

Runtime error

App Files Files Community

Job-Application-Assistant / agents /cv_owner.py

Noo88ear

🚀 Initial deployment of Multi-Agent Job Application Assistant

7498f2c 8 months ago

raw

history blame contribute delete

18.3 kB

	from __future__ import annotations
	from typing import List, Optional
	import logging
	import re
	import textwrap
	from datetime import datetime

	from models.schemas import UserProfile, JobPosting, ResumeDraft
	from memory.store import memory_store
	from utils.text import extract_keywords_from_text, clamp_to_char_limit
	from utils.ats import (
	format_resume_header,
	format_experience_section,
	format_skills_section,
	basic_resume_template,
	ensure_keywords,
	ACTION_VERBS,
	strengthen_action_verbs,
	)
	from utils.consistency import allowed_keywords_from_profile, coverage_score, conciseness_score
	from utils.config import AgentConfig, LLMConfig
	from services.web_research import get_role_guidelines
	from services.llm import llm
	from utils.langextractor import distill_text
	try:
	from utils.langextractor_enhanced import extract_structured_info, extract_ats_keywords
	ENHANCED_EXTRACTION = True
	except ImportError:
	ENHANCED_EXTRACTION = False

	logger = logging.getLogger(__name__)


	def _clamp_words(text: str, max_words: int) -> str:
	if not text:
	return ""
	words = text.strip().split()
	if len(words) <= max_words:
	return text.strip()
	return " ".join(words[:max_words]).strip()


	def _extract_year(s: Optional[str]) -> Optional[int]:
	if not s:
	return None
	m = re.search(r"(19\|20)\d{2}", s)
	return int(m.group(0)) if m else None


	def _uk_month_name(m: int) -> str:
	return ["", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][max(0, min(12, m))]


	def _uk_date_str(s: Optional[str]) -> Optional[str]:
	if not s:
	return None
	ss = s.strip()
	if ss.lower() == "present":
	return "Present"
	# YYYY-MM or YYYY/M or YYYY/MM
	m = re.match(r"^(\d{4})[-/](\d{1,2})$", ss)
	if m:
	y = int(m.group(1)); mo = int(m.group(2))
	return f"{_uk_month_name(mo)} {y}"
	# MM/YYYY
	m = re.match(r"^(\d{1,2})/(\d{4})$", ss)
	if m:
	mo = int(m.group(1)); y = int(m.group(2))
	return f"{_uk_month_name(mo)} {y}"
	# YYYY only
	m = re.match(r"^(\d{4})$", ss)
	if m:
	return m.group(1)
	return ss


	def _postprocess_bullets(text: str) -> str:
	if not text:
	return text
	lines = []
	for line in text.splitlines():
	newline = line
	if newline.lstrip().startswith("-"):
	# Remove first-person pronouns at bullet start
	newline = re.sub(r"^(\s-\s)(?:I\|We\|My)\s+", r"\1", newline, flags=re.IGNORECASE)
	# Remove trailing period
	newline = re.sub(r"\.(\s*)$", r"\1", newline)
	# Normalise percent and GBP
	newline = re.sub(r"\bper\s*cent\b", "%", newline, flags=re.IGNORECASE)
	newline = re.sub(r"\bpercent\b", "%", newline, flags=re.IGNORECASE)
	newline = newline.replace("GBP", "£")
	lines.append(newline)
	return "\n".join(lines)

	def _strip_personal_info(text: str) -> str:
	if not text:
	return text
	# Remove DOB lines and photo references
	text = re.sub(r"^.\b(date of birth\|dob)\b.$", "", text, flags=re.IGNORECASE \| re.MULTILINE)
	text = re.sub(r"^.\b(photo\|headshot)\b.$", "", text, flags=re.IGNORECASE \| re.MULTILINE)
	# Clean extra blank lines
	text = re.sub(r"\n{3,}", "\n\n", text)
	return text.strip() + "\n"


	class CVOwnerAgent:
	def __init__(self) -> None:
	self.name = "cv_owner"
	self.max_chars = AgentConfig.RESUME_MAX_CHARS

	def create_resume(
	self,
	profile: UserProfile,
	job: JobPosting,
	user_id: str = "default_user",
	user_chat: Optional[str] = None,
	seed_text: Optional[str] = None,
	agent2_notes: Optional[str] = None,
	layout_preset: Optional[str] = None,
	) -> ResumeDraft:
	"""Create an optimized resume for a specific job posting."""
	jd_keywords: List[str] = extract_keywords_from_text(
	job.description or "",
	top_k=AgentConfig.JOB_KEYWORDS_COUNT
	)
	allowed = allowed_keywords_from_profile(profile.skills, profile.experiences)

	# Format resume sections
	header = format_resume_header(
	full_name=profile.full_name,
	headline=profile.headline or job.title,
	email=profile.email,
	phone=profile.phone,
	location=profile.location,
	links=profile.links,
	)

	# Sort experiences reverse-chronologically (Reed/Indeed best practice)
	def _date_key(s: Optional[str]) -> str:
	val = (s or "").strip()
	if not val or val.lower() == "present":
	return "9999-12-31"
	return val
	experiences_sorted = sorted(
	profile.experiences,
	key=lambda e: (_date_key(e.end_date), _date_key(e.start_date)),
	reverse=True,
	)
	# Compute simple gap signal based on years between adjacent roles
	gap_years_flag = False
	for i in range(len(experiences_sorted) - 1):
	end_y = _extract_year(experiences_sorted[i].end_date or "Present") or 9999
	start_next_y = _extract_year(experiences_sorted[i + 1].start_date)
	if start_next_y and end_y != 9999 and (start_next_y - end_y) >= 2:
	gap_years_flag = True
	break
	# Limit achievements depth: recent roles get more bullets, older roles compressed
	current_year = datetime.now().year
	experience_payload = []
	for idx, e in enumerate(experiences_sorted):
	ach = e.achievements or []
	# Compress if older than 15 years
	start_y = _extract_year(e.start_date or "")
	older = bool(start_y and (current_year - start_y > 15))
	if idx < 2 and not older:
	limited = ach[:6]
	else:
	limited = [] if older else ach[:1]
	experience_payload.append({
	"title": e.title,
	"company": e.company,
	"start_date": _uk_date_str(e.start_date) or e.start_date,
	"end_date": _uk_date_str(e.end_date) or ("Present" if (e.end_date or "").lower()=="present" else (e.end_date or "")),
	"achievements": limited,
	})
	experience = format_experience_section(experience_payload)
	skills = format_skills_section(profile.skills)

	# Personal statement (Summary) refinement (~150 words), tailored to job
	summary_text = profile.summary or ""
	if summary_text:
	if llm.enabled:
	sys_ps = (
	"You write CV personal statements (Summary) for UK job applications. Keep to ~150 words (100–180). "
	"Use active voice and clear, specific language; avoid clichés/buzzwords; no personal info. "
	"Structure: 1) who you are/pro background; 2) key skills + 1–2 quantified achievements relevant to the role; "
	"3) concise career goal aligned to the target role/company. Tailor to the job's keywords."
	)
	usr_ps = (
	f"Target role: {job.title} at {job.company}\n"
	f"Job keywords: {', '.join(jd_keywords[:15])}\n\n"
	f"Existing summary (edit and improve):\n{summary_text}"
	)
	summary_text = llm.generate(sys_ps, usr_ps, max_tokens=220, agent="cv")
	summary_text = _clamp_words(summary_text, 180)
	# Ensure critical JD keywords appear in summary (top 3)
	try:
	needed = []
	low = (summary_text or "").lower()
	for k in jd_keywords[:6]:
	if k and (k.lower() not in low) and len(needed) < 3:
	needed.append(k)
	if needed:
	summary_text = (summary_text or "").strip() + " " + ("Key strengths: " + ", ".join(needed) + ".")
	except Exception:
	pass
	else:
	# No summary provided: keep empty to avoid adding new sections implicitly
	summary_text = ""

	education_text = "\n".join(
	[f"{ed.degree or ''} {ed.field_of_study or ''} — {ed.school} ({ed.end_date or ''})"
	for ed in profile.education]
	).strip()

	# Process seed text if provided
	base_text = seed_text.strip() if seed_text else None
	if base_text and len(base_text) > 2000:
	# Distill dense seed into key points to guide the draft
	bullets = distill_text(base_text, max_points=AgentConfig.DISTILL_MAX_POINTS)
	base_text = ("\n".join(f"- {b}" for b in bullets) + "\n\n") + base_text[:4000]

	# Compose initial draft by layout preset (ATS-friendly, single column)
	preset = (layout_preset or "").strip().lower()
	preset = {
	"traditional": "classic",
	"classic": "classic",
	"modern": "modern",
	"minimalist": "minimalist",
	"executive": "executive",
	}.get(preset, "")
	def sec_summary(s: str) -> str:
	return ("\nSummary\n" + textwrap.fill(s, width=100)) if s else ""
	def sec_skills(sk: str) -> str:
	return ("\n" + sk) if sk else ""
	def sec_experience(ex: str) -> str:
	return ("\n\nExperience\n" + ex) if ex else ""
	def sec_education(ed: str) -> str:
	return ("\n\nEducation\n" + ed) if ed else ""
	def sec_languages() -> str:
	langs = getattr(profile, "languages", []) or []
	pairs = []
	for it in langs[:8]:
	if isinstance(it, dict):
	name = it.get("language") or it.get("name") or ""
	lvl = it.get("level") or ""
	if name:
	pairs.append(f"{name}{' ('+lvl+')' if lvl else ''}")
	return ("\n\nLanguages\n- " + "\n- ".join(pairs)) if pairs else ""
	def sec_certs() -> str:
	certs = getattr(profile, "certifications", []) or []
	lines = []
	for c in certs[:6]:
	if isinstance(c, dict):
	name = c.get("name") or ""
	issuer = c.get("issuer") or ""
	year = c.get("year") or ""
	if name:
	parts = [name]
	if issuer: parts.append(issuer)
	if year: parts.append(str(year))
	lines.append(" — ".join(parts))
	return ("\n\nCertifications\n- " + "\n- ".join(lines)) if lines else ""
	def sec_projects() -> str:
	projs = getattr(profile, "projects", []) or []
	lines = []
	for p in projs[:4]:
	if isinstance(p, dict):
	title = p.get("title") or ""
	link = p.get("link") or ""
	impact = p.get("impact") or ""
	if title or impact:
	line = title
	if link: line += f" — {link}"
	if impact: line += f" — {impact}"
	lines.append(line)
	return ("\n\nSelected Projects\n- " + "\n- ".join(lines)) if lines else ""
	def sec_achievements() -> str:
	bul = []
	for e in experiences_sorted[:2]:
	for a in (e.achievements or []):
	if a and len(bul) < 5:
	bul.append(a)
	return ("\n\nSelected Achievements\n- " + "\n- ".join(bul)) if bul else ""

	if base_text:
	draft = base_text
	elif preset == "classic":
	parts: List[str] = [header, sec_summary(summary_text), sec_skills(skills), sec_experience(experience), sec_education(education_text), sec_certs(), sec_languages()]
	draft = "".join(parts).strip() + "\n"
	elif preset == "modern":
	parts = [header, sec_summary(summary_text), sec_experience(experience), sec_skills(skills), sec_projects(), sec_certs(), sec_education(education_text)]
	draft = "".join(parts).strip() + "\n"
	elif preset == "minimalist":
	parts = [header, sec_summary(summary_text), sec_skills(skills), sec_experience(experience), sec_education(education_text)]
	draft = "".join(parts).strip() + "\n"
	elif preset == "executive":
	parts = [header, sec_summary(summary_text), sec_achievements(), sec_experience(experience), sec_skills(skills), sec_education(education_text), sec_certs()]
	draft = "".join(parts).strip() + "\n"
	else:
	# Default formatting
	draft = basic_resume_template(
	header=header,
	summary=(summary_text or None),
	skills=skills,
	experience=experience,
	education=education_text,
	)
	# If profile.skill_proficiency exists, append a simple proficiency hint line under Skills (ATS-safe)
	try:
	if hasattr(profile, "links") and isinstance(profile.links, dict):
	pass
	# naive inject: if "Skills:" line exists, add a second line with proficiencies
	if getattr(profile, "skills", None) and getattr(profile, "links", None) is not None:
	prof_map = getattr(profile, "skill_proficiency", {}) or {}
	if prof_map:
	profs = ", ".join([f"{k}: {v}" for k, v in list(prof_map.items())[:8]])
	if "\nSkills:" in draft:
	parts = draft.split("\nSkills:")
	draft = parts[0] + "\nSkills:" + parts[1].split("\n", 1)[0] + ("\n" + profs) + "\n" + (parts[1].split("\n", 1)[1] if "\n" in parts[1] else "")
	except Exception:
	pass

	guidance = get_role_guidelines(job.title, job.description)
	used_keywords: List[str] = []

	# Optimization cycles
	for cycle in range(AgentConfig.OPTIMIZATION_CYCLES):
	draft, used_cycle = ensure_keywords(
	draft,
	jd_keywords,
	max_new=AgentConfig.MAX_NEW_KEYWORDS,
	allowed_keywords=allowed
	)
	used_keywords = list({used_keywords, used_cycle})

	if llm.enabled:
	system = (
	"You refine resumes. Preserve factual accuracy. Keep ATS-friendly text-only formatting. "
	"Follow UK best practices (Indeed/Reed/StandOut/Novorésumé): keep concise (prefer 1 page; <= 2 pages for senior roles), use clear section headings. "
	"Present work experience in reverse chronological order, highlight recent quantified achievements, and keep older roles brief. "
	"Use bullet points for skimmability, maintain consistent spacing and layout, avoid irrelevant info. Do not add images/tables or unusual symbols. "
	"Tailor to the job's keywords. Prefer quantification where truthful (%, £, time, team size); never fabricate metrics. "
	"AVOID vague buzzwords (e.g., 'results-driven', 'team player', 'people person', 'perfectionist', 'multi-tasker'). Replace with specific, measurable achievements. "
	"Use active voice and strong action verbs (e.g., Achieved, Led, Implemented, Improved, Generated, Managed, Completed, Designed). "
	"Skills: when possible, separate Hard skills vs Soft skills (hard skills first, max ~10), then soft skills. Keep Education concise (highest/most recent first). "
	"Contact hygiene: prefer professional email; include relevant links (e.g., LinkedIn/portfolio) if provided; never include DOB or photos. "
	"If a 'Summary'/'Personal Statement' section exists, keep it ~150 words with the intro–skills/achievements–goal structure; do not add new sections. "
	"UK English, UK date style (MMM YYYY). Use present tense for the current role and past tense for previous roles. Remove first-person pronouns in bullets. "
	"Use digits for numbers (e.g., 7, 12%, £1,200). Include critical JD keywords verbatim inside bullets (not only in Skills). "
	f"Apply latest guidance: {guidance}."
	)
	notes = (f"\nNotes from Agent 2: {agent2_notes}" if agent2_notes else "")
	custom = f"\nUser instructions: {user_chat}" if user_chat else ""
	user = (
	f"Role: {job.title}. Company: {job.company}.\n"
	f"Job keywords: {', '.join(jd_keywords[:AgentConfig.RESUME_KEYWORDS_COUNT])}.\n"
	f"Allowed keywords (from user profile): {', '.join(sorted(list(allowed))[:40])}.\n"
	f"Rewrite the following resume content to strengthen alignment without inventing new skills.{custom}{notes}\n"
	f"Enforce reverse chronological experience ordering, bullet points, and consistent headings. Keep within {self.max_chars} characters.\n\n"
	f"Resume content:\n{draft}"
	)
	draft = llm.generate(system, user, max_tokens=LLMConfig.RESUME_MAX_TOKENS, agent="cv")

	# Simple buzzword scrub per Reed guidance
	lower = draft.lower()
	for bad in [
	"results-driven", "team player", "works well alone", "people person",
	"perfectionist", "multi-tasker", "multi tasker", "dynamic go-getter",
	]:
	if bad in lower:
	# Replace phrase occurrences with an empty string; rely on achievements to convey value
	draft = draft.replace(bad, "")
	lower = draft.lower()
	# Strengthen weak bullet openers to action verbs (The Muse)
	draft = strengthen_action_verbs(draft)
	# ATS plain-text scrub: remove tabs and unusual symbols
	draft = draft.replace("\t", " ")
	# Pronoun/punctuation/currency/percent normalisation
	draft = _postprocess_bullets(draft)
	# Strip DOB/photo lines if present
	draft = _strip_personal_info(draft)

	cov = coverage_score(draft, jd_keywords)
	conc = conciseness_score(draft, self.max_chars)

	if conc < 1.0:
	draft = clamp_to_char_limit(draft, self.max_chars)

	# Signals for orchestrator/observability (StandOut CV + Novorésumé)
	bullet_lines = sum(1 for l in (draft or "").splitlines() if l.strip().startswith("-"))
	line_count = max(1, len((draft or "").splitlines()))
	bullet_density = round(bullet_lines / line_count, 3)
	quant_count = sum(1 for ch in (draft or "") if ch.isdigit()) + (draft or "").count('%') + (draft or "").count('£')
	email_ok = bool(re.match(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$", profile.email or ""))
	links_present = ("http://" in (draft or "").lower()) or ("https://" in (draft or "").lower()) or ("linkedin" in (draft or "").lower())
	skills_split_hint = ("hard skills" in (draft or "").lower()) or ("soft skills" in (draft or "").lower())
	languages_section = "\nlanguages" in (draft or "").lower()
	action_verb_count = sum(1 for v in ACTION_VERBS if v.lower() in (draft or "").lower())
	approx_pages = round(max(1, len(draft or "")) / 2400.0, 2)
	approx_one_page = approx_pages <= 1.2

	memory_store.save(user_id, self.name, {
	"job_id": job.id,
	"cycle": cycle + 1,
	"coverage": cov,
	"conciseness": conc,
	"keywords_used": used_keywords,
	"guidance": guidance[:500],
	"user_chat": (user_chat or "")[:500],
	"agent2_notes": (agent2_notes or "")[:500],
	"draft": draft,
	"signals": {
	"bullet_density": bullet_density,
	"quant_count": quant_count,
	"email_ok": email_ok,
	"gap_years_flag": gap_years_flag,
	"skills_split_hint": skills_split_hint,
	"languages_section": languages_section,
	"links_present": links_present,
	"action_verb_count": action_verb_count,
	"approx_pages": approx_pages,
	"approx_one_page": approx_one_page,
	},
	}, job_id=job.id)

	logger.debug(f"Resume optimization cycle {cycle + 1}: coverage={cov:.2f}, conciseness={conc:.2f}")

	# Final cleanup
	draft = clamp_to_char_limit(draft, self.max_chars)

	memory_store.save(user_id, self.name, {
	"job_id": job.id,
	"final": True,
	"keywords_used": used_keywords,
	"draft": draft,
	}, job_id=job.id)

	logger.info(f"Resume created for job {job.id} with {len(used_keywords)} keywords")

	return ResumeDraft(job_id=job.id, text=draft, keywords_used=used_keywords)