Noo88ear's picture
🚀 Initial deployment of Multi-Agent Job Application Assistant
7498f2c
from __future__ import annotations
import os
import re
import httpx
import logging
from typing import Dict, Any, Optional
from utils.security import validate_url, sanitize_user_input
logger = logging.getLogger(__name__)
_DEFAULT_GUIDANCE = (
"Use concise, achievement-oriented bullets with metrics; prioritize recent, role-relevant skills; "
"ensure ATS-friendly formatting; avoid images/tables; tailor keywords to the job posting; keep resume to 1-2 pages and cover letter to <= 1 page; "
"reflect current tooling (e.g., modern cloud, MLOps/DevOps practices) only if you have real experience."
)
def get_role_guidelines(role_title: str, job_description: str) -> str:
"""Fetch role-specific guidelines using web research API."""
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
logger.debug("No Tavily API key, using default guidance")
return _DEFAULT_GUIDANCE
try:
# Sanitize inputs
role_title = sanitize_user_input(role_title, max_length=200)
job_description = sanitize_user_input(job_description, max_length=5000)
payload = {
"api_key": api_key,
"query": f"best practices {role_title} resume cover letter ats 2025 latest guidance",
"include_answer": True,
"max_results": 5,
}
with httpx.Client(timeout=20.0) as client:
resp = client.post("https://api.tavily.com/search", json=payload)
if resp.status_code != 200:
logger.warning(f"Tavily API returned status {resp.status_code}")
return _DEFAULT_GUIDANCE
data: Dict[str, Any] = resp.json()
answer = data.get("answer")
if isinstance(answer, str) and len(answer) > 40:
return sanitize_user_input(answer, max_length=2000)
results = data.get("results") or []
snippets = []
for r in results[:3]:
s = r.get("content") or r.get("snippet")
if s:
snippets.append(sanitize_user_input(s, max_length=500))
if snippets:
return " ".join(snippets)[:1500]
return _DEFAULT_GUIDANCE
except httpx.TimeoutException:
logger.warning("Tavily API timeout")
return _DEFAULT_GUIDANCE
except Exception as e:
logger.error(f"Error fetching role guidelines: {e}")
return _DEFAULT_GUIDANCE
def _strip_html(html: str) -> str:
"""Remove HTML tags from text."""
text = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.IGNORECASE)
text = re.sub(r"<style[\s\S]*?</style>", " ", text, flags=re.IGNORECASE)
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"\s+", " ", text).strip()
return text
def fetch_url_text(url: str, timeout: float = 20.0) -> Optional[str]:
"""Fetch and extract text from a URL with security validation."""
# Validate URL before fetching
if not validate_url(url):
logger.warning(f"URL validation failed for: {url}")
return None
try:
with httpx.Client(timeout=timeout, follow_redirects=True, max_redirects=5) as client:
# Add headers to appear more like a regular browser
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
}
resp = client.get(url, headers=headers)
if resp.status_code != 200 or not resp.text:
logger.warning(f"Failed to fetch URL {url}: status {resp.status_code}")
return None
# Sanitize the fetched content
text = _strip_html(resp.text)
return sanitize_user_input(text, max_length=10000)
except httpx.TimeoutException:
logger.warning(f"Timeout fetching URL: {url}")
return None
except Exception as e:
logger.error(f"Error fetching URL {url}: {e}")
return None
def cover_letter_inspiration_from_url(url: Optional[str]) -> str:
"""Fetch a page and distill high-level stylistic inspiration notes, not verbatim content."""
if not url:
return ""
# Validate URL first
if not validate_url(url):
logger.warning(f"Invalid inspiration URL: {url}")
return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise."
text = fetch_url_text(url)
if not text:
return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise."
# Extract simple heuristics: look for words about humor/comedy/examples to craft meta-guidelines
lower = text.lower()
cues = []
if "funny" in lower or "humor" in lower or "humour" in lower:
cues.append("Incorporate subtle, tasteful humor without undermining professionalism.")
if "cover letter" in lower:
cues.append("Maintain standard cover letter structure (greeting, body, closing).")
if "example" in lower or "examples" in lower:
cues.append("Use the site as inspiration only; do not reuse sentences or unique phrasing.")
cues.append("Focus on clarity, brevity, and role alignment; avoid clichés and excessive jokes.")
return " ".join(cues)