Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| import os | |
| import re | |
| import httpx | |
| import logging | |
| from typing import Dict, Any, Optional | |
| from utils.security import validate_url, sanitize_user_input | |
| logger = logging.getLogger(__name__) | |
| _DEFAULT_GUIDANCE = ( | |
| "Use concise, achievement-oriented bullets with metrics; prioritize recent, role-relevant skills; " | |
| "ensure ATS-friendly formatting; avoid images/tables; tailor keywords to the job posting; keep resume to 1-2 pages and cover letter to <= 1 page; " | |
| "reflect current tooling (e.g., modern cloud, MLOps/DevOps practices) only if you have real experience." | |
| ) | |
| def get_role_guidelines(role_title: str, job_description: str) -> str: | |
| """Fetch role-specific guidelines using web research API.""" | |
| api_key = os.getenv("TAVILY_API_KEY") | |
| if not api_key: | |
| logger.debug("No Tavily API key, using default guidance") | |
| return _DEFAULT_GUIDANCE | |
| try: | |
| # Sanitize inputs | |
| role_title = sanitize_user_input(role_title, max_length=200) | |
| job_description = sanitize_user_input(job_description, max_length=5000) | |
| payload = { | |
| "api_key": api_key, | |
| "query": f"best practices {role_title} resume cover letter ats 2025 latest guidance", | |
| "include_answer": True, | |
| "max_results": 5, | |
| } | |
| with httpx.Client(timeout=20.0) as client: | |
| resp = client.post("https://api.tavily.com/search", json=payload) | |
| if resp.status_code != 200: | |
| logger.warning(f"Tavily API returned status {resp.status_code}") | |
| return _DEFAULT_GUIDANCE | |
| data: Dict[str, Any] = resp.json() | |
| answer = data.get("answer") | |
| if isinstance(answer, str) and len(answer) > 40: | |
| return sanitize_user_input(answer, max_length=2000) | |
| results = data.get("results") or [] | |
| snippets = [] | |
| for r in results[:3]: | |
| s = r.get("content") or r.get("snippet") | |
| if s: | |
| snippets.append(sanitize_user_input(s, max_length=500)) | |
| if snippets: | |
| return " ".join(snippets)[:1500] | |
| return _DEFAULT_GUIDANCE | |
| except httpx.TimeoutException: | |
| logger.warning("Tavily API timeout") | |
| return _DEFAULT_GUIDANCE | |
| except Exception as e: | |
| logger.error(f"Error fetching role guidelines: {e}") | |
| return _DEFAULT_GUIDANCE | |
| def _strip_html(html: str) -> str: | |
| """Remove HTML tags from text.""" | |
| text = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.IGNORECASE) | |
| text = re.sub(r"<style[\s\S]*?</style>", " ", text, flags=re.IGNORECASE) | |
| text = re.sub(r"<[^>]+>", " ", text) | |
| text = re.sub(r"\s+", " ", text).strip() | |
| return text | |
| def fetch_url_text(url: str, timeout: float = 20.0) -> Optional[str]: | |
| """Fetch and extract text from a URL with security validation.""" | |
| # Validate URL before fetching | |
| if not validate_url(url): | |
| logger.warning(f"URL validation failed for: {url}") | |
| return None | |
| try: | |
| with httpx.Client(timeout=timeout, follow_redirects=True, max_redirects=5) as client: | |
| # Add headers to appear more like a regular browser | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", | |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| } | |
| resp = client.get(url, headers=headers) | |
| if resp.status_code != 200 or not resp.text: | |
| logger.warning(f"Failed to fetch URL {url}: status {resp.status_code}") | |
| return None | |
| # Sanitize the fetched content | |
| text = _strip_html(resp.text) | |
| return sanitize_user_input(text, max_length=10000) | |
| except httpx.TimeoutException: | |
| logger.warning(f"Timeout fetching URL: {url}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Error fetching URL {url}: {e}") | |
| return None | |
| def cover_letter_inspiration_from_url(url: Optional[str]) -> str: | |
| """Fetch a page and distill high-level stylistic inspiration notes, not verbatim content.""" | |
| if not url: | |
| return "" | |
| # Validate URL first | |
| if not validate_url(url): | |
| logger.warning(f"Invalid inspiration URL: {url}") | |
| return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise." | |
| text = fetch_url_text(url) | |
| if not text: | |
| return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise." | |
| # Extract simple heuristics: look for words about humor/comedy/examples to craft meta-guidelines | |
| lower = text.lower() | |
| cues = [] | |
| if "funny" in lower or "humor" in lower or "humour" in lower: | |
| cues.append("Incorporate subtle, tasteful humor without undermining professionalism.") | |
| if "cover letter" in lower: | |
| cues.append("Maintain standard cover letter structure (greeting, body, closing).") | |
| if "example" in lower or "examples" in lower: | |
| cues.append("Use the site as inspiration only; do not reuse sentences or unique phrasing.") | |
| cues.append("Focus on clarity, brevity, and role alignment; avoid clichés and excessive jokes.") | |
| return " ".join(cues) |