from __future__ import annotations import os import re import httpx import logging from typing import Dict, Any, Optional from utils.security import validate_url, sanitize_user_input logger = logging.getLogger(__name__) _DEFAULT_GUIDANCE = ( "Use concise, achievement-oriented bullets with metrics; prioritize recent, role-relevant skills; " "ensure ATS-friendly formatting; avoid images/tables; tailor keywords to the job posting; keep resume to 1-2 pages and cover letter to <= 1 page; " "reflect current tooling (e.g., modern cloud, MLOps/DevOps practices) only if you have real experience." ) def get_role_guidelines(role_title: str, job_description: str) -> str: """Fetch role-specific guidelines using web research API.""" api_key = os.getenv("TAVILY_API_KEY") if not api_key: logger.debug("No Tavily API key, using default guidance") return _DEFAULT_GUIDANCE try: # Sanitize inputs role_title = sanitize_user_input(role_title, max_length=200) job_description = sanitize_user_input(job_description, max_length=5000) payload = { "api_key": api_key, "query": f"best practices {role_title} resume cover letter ats 2025 latest guidance", "include_answer": True, "max_results": 5, } with httpx.Client(timeout=20.0) as client: resp = client.post("https://api.tavily.com/search", json=payload) if resp.status_code != 200: logger.warning(f"Tavily API returned status {resp.status_code}") return _DEFAULT_GUIDANCE data: Dict[str, Any] = resp.json() answer = data.get("answer") if isinstance(answer, str) and len(answer) > 40: return sanitize_user_input(answer, max_length=2000) results = data.get("results") or [] snippets = [] for r in results[:3]: s = r.get("content") or r.get("snippet") if s: snippets.append(sanitize_user_input(s, max_length=500)) if snippets: return " ".join(snippets)[:1500] return _DEFAULT_GUIDANCE except httpx.TimeoutException: logger.warning("Tavily API timeout") return _DEFAULT_GUIDANCE except Exception as e: logger.error(f"Error fetching role guidelines: {e}") return _DEFAULT_GUIDANCE def _strip_html(html: str) -> str: """Remove HTML tags from text.""" text = re.sub(r"", " ", html, flags=re.IGNORECASE) text = re.sub(r"", " ", text, flags=re.IGNORECASE) text = re.sub(r"<[^>]+>", " ", text) text = re.sub(r"\s+", " ", text).strip() return text def fetch_url_text(url: str, timeout: float = 20.0) -> Optional[str]: """Fetch and extract text from a URL with security validation.""" # Validate URL before fetching if not validate_url(url): logger.warning(f"URL validation failed for: {url}") return None try: with httpx.Client(timeout=timeout, follow_redirects=True, max_redirects=5) as client: # Add headers to appear more like a regular browser headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", } resp = client.get(url, headers=headers) if resp.status_code != 200 or not resp.text: logger.warning(f"Failed to fetch URL {url}: status {resp.status_code}") return None # Sanitize the fetched content text = _strip_html(resp.text) return sanitize_user_input(text, max_length=10000) except httpx.TimeoutException: logger.warning(f"Timeout fetching URL: {url}") return None except Exception as e: logger.error(f"Error fetching URL {url}: {e}") return None def cover_letter_inspiration_from_url(url: Optional[str]) -> str: """Fetch a page and distill high-level stylistic inspiration notes, not verbatim content.""" if not url: return "" # Validate URL first if not validate_url(url): logger.warning(f"Invalid inspiration URL: {url}") return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise." text = fetch_url_text(url) if not text: return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise." # Extract simple heuristics: look for words about humor/comedy/examples to craft meta-guidelines lower = text.lower() cues = [] if "funny" in lower or "humor" in lower or "humour" in lower: cues.append("Incorporate subtle, tasteful humor without undermining professionalism.") if "cover letter" in lower: cues.append("Maintain standard cover letter structure (greeting, body, closing).") if "example" in lower or "examples" in lower: cues.append("Use the site as inspiration only; do not reuse sentences or unique phrasing.") cues.append("Focus on clarity, brevity, and role alignment; avoid clichés and excessive jokes.") return " ".join(cues)