Spaces:
Runtime error
Runtime error
File size: 5,536 Bytes
7498f2c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | from __future__ import annotations
import os
import re
import httpx
import logging
from typing import Dict, Any, Optional
from utils.security import validate_url, sanitize_user_input
logger = logging.getLogger(__name__)
_DEFAULT_GUIDANCE = (
"Use concise, achievement-oriented bullets with metrics; prioritize recent, role-relevant skills; "
"ensure ATS-friendly formatting; avoid images/tables; tailor keywords to the job posting; keep resume to 1-2 pages and cover letter to <= 1 page; "
"reflect current tooling (e.g., modern cloud, MLOps/DevOps practices) only if you have real experience."
)
def get_role_guidelines(role_title: str, job_description: str) -> str:
"""Fetch role-specific guidelines using web research API."""
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
logger.debug("No Tavily API key, using default guidance")
return _DEFAULT_GUIDANCE
try:
# Sanitize inputs
role_title = sanitize_user_input(role_title, max_length=200)
job_description = sanitize_user_input(job_description, max_length=5000)
payload = {
"api_key": api_key,
"query": f"best practices {role_title} resume cover letter ats 2025 latest guidance",
"include_answer": True,
"max_results": 5,
}
with httpx.Client(timeout=20.0) as client:
resp = client.post("https://api.tavily.com/search", json=payload)
if resp.status_code != 200:
logger.warning(f"Tavily API returned status {resp.status_code}")
return _DEFAULT_GUIDANCE
data: Dict[str, Any] = resp.json()
answer = data.get("answer")
if isinstance(answer, str) and len(answer) > 40:
return sanitize_user_input(answer, max_length=2000)
results = data.get("results") or []
snippets = []
for r in results[:3]:
s = r.get("content") or r.get("snippet")
if s:
snippets.append(sanitize_user_input(s, max_length=500))
if snippets:
return " ".join(snippets)[:1500]
return _DEFAULT_GUIDANCE
except httpx.TimeoutException:
logger.warning("Tavily API timeout")
return _DEFAULT_GUIDANCE
except Exception as e:
logger.error(f"Error fetching role guidelines: {e}")
return _DEFAULT_GUIDANCE
def _strip_html(html: str) -> str:
"""Remove HTML tags from text."""
text = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.IGNORECASE)
text = re.sub(r"<style[\s\S]*?</style>", " ", text, flags=re.IGNORECASE)
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"\s+", " ", text).strip()
return text
def fetch_url_text(url: str, timeout: float = 20.0) -> Optional[str]:
"""Fetch and extract text from a URL with security validation."""
# Validate URL before fetching
if not validate_url(url):
logger.warning(f"URL validation failed for: {url}")
return None
try:
with httpx.Client(timeout=timeout, follow_redirects=True, max_redirects=5) as client:
# Add headers to appear more like a regular browser
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
}
resp = client.get(url, headers=headers)
if resp.status_code != 200 or not resp.text:
logger.warning(f"Failed to fetch URL {url}: status {resp.status_code}")
return None
# Sanitize the fetched content
text = _strip_html(resp.text)
return sanitize_user_input(text, max_length=10000)
except httpx.TimeoutException:
logger.warning(f"Timeout fetching URL: {url}")
return None
except Exception as e:
logger.error(f"Error fetching URL {url}: {e}")
return None
def cover_letter_inspiration_from_url(url: Optional[str]) -> str:
"""Fetch a page and distill high-level stylistic inspiration notes, not verbatim content."""
if not url:
return ""
# Validate URL first
if not validate_url(url):
logger.warning(f"Invalid inspiration URL: {url}")
return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise."
text = fetch_url_text(url)
if not text:
return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise."
# Extract simple heuristics: look for words about humor/comedy/examples to craft meta-guidelines
lower = text.lower()
cues = []
if "funny" in lower or "humor" in lower or "humour" in lower:
cues.append("Incorporate subtle, tasteful humor without undermining professionalism.")
if "cover letter" in lower:
cues.append("Maintain standard cover letter structure (greeting, body, closing).")
if "example" in lower or "examples" in lower:
cues.append("Use the site as inspiration only; do not reuse sentences or unique phrasing.")
cues.append("Focus on clarity, brevity, and role alignment; avoid clichés and excessive jokes.")
return " ".join(cues) |