File size: 5,536 Bytes
7498f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from __future__ import annotations
import os
import re
import httpx
import logging
from typing import Dict, Any, Optional

from utils.security import validate_url, sanitize_user_input

logger = logging.getLogger(__name__)

_DEFAULT_GUIDANCE = (
    "Use concise, achievement-oriented bullets with metrics; prioritize recent, role-relevant skills; "
    "ensure ATS-friendly formatting; avoid images/tables; tailor keywords to the job posting; keep resume to 1-2 pages and cover letter to <= 1 page; "
    "reflect current tooling (e.g., modern cloud, MLOps/DevOps practices) only if you have real experience."
)


def get_role_guidelines(role_title: str, job_description: str) -> str:
    """Fetch role-specific guidelines using web research API."""
    api_key = os.getenv("TAVILY_API_KEY")
    if not api_key:
        logger.debug("No Tavily API key, using default guidance")
        return _DEFAULT_GUIDANCE
    
    try:
        # Sanitize inputs
        role_title = sanitize_user_input(role_title, max_length=200)
        job_description = sanitize_user_input(job_description, max_length=5000)
        
        payload = {
            "api_key": api_key,
            "query": f"best practices {role_title} resume cover letter ats 2025 latest guidance",
            "include_answer": True,
            "max_results": 5,
        }
        
        with httpx.Client(timeout=20.0) as client:
            resp = client.post("https://api.tavily.com/search", json=payload)
        
        if resp.status_code != 200:
            logger.warning(f"Tavily API returned status {resp.status_code}")
            return _DEFAULT_GUIDANCE
        
        data: Dict[str, Any] = resp.json()
        answer = data.get("answer")
        
        if isinstance(answer, str) and len(answer) > 40:
            return sanitize_user_input(answer, max_length=2000)
        
        results = data.get("results") or []
        snippets = []
        for r in results[:3]:
            s = r.get("content") or r.get("snippet")
            if s:
                snippets.append(sanitize_user_input(s, max_length=500))
        
        if snippets:
            return " ".join(snippets)[:1500]
        
        return _DEFAULT_GUIDANCE
        
    except httpx.TimeoutException:
        logger.warning("Tavily API timeout")
        return _DEFAULT_GUIDANCE
    except Exception as e:
        logger.error(f"Error fetching role guidelines: {e}")
        return _DEFAULT_GUIDANCE


def _strip_html(html: str) -> str:
    """Remove HTML tags from text."""
    text = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.IGNORECASE)
    text = re.sub(r"<style[\s\S]*?</style>", " ", text, flags=re.IGNORECASE)
    text = re.sub(r"<[^>]+>", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def fetch_url_text(url: str, timeout: float = 20.0) -> Optional[str]:
    """Fetch and extract text from a URL with security validation."""
    # Validate URL before fetching
    if not validate_url(url):
        logger.warning(f"URL validation failed for: {url}")
        return None
    
    try:
        with httpx.Client(timeout=timeout, follow_redirects=True, max_redirects=5) as client:
            # Add headers to appear more like a regular browser
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            }
            resp = client.get(url, headers=headers)
        
        if resp.status_code != 200 or not resp.text:
            logger.warning(f"Failed to fetch URL {url}: status {resp.status_code}")
            return None
        
        # Sanitize the fetched content
        text = _strip_html(resp.text)
        return sanitize_user_input(text, max_length=10000)
        
    except httpx.TimeoutException:
        logger.warning(f"Timeout fetching URL: {url}")
        return None
    except Exception as e:
        logger.error(f"Error fetching URL {url}: {e}")
        return None


def cover_letter_inspiration_from_url(url: Optional[str]) -> str:
    """Fetch a page and distill high-level stylistic inspiration notes, not verbatim content."""
    if not url:
        return ""
    
    # Validate URL first
    if not validate_url(url):
        logger.warning(f"Invalid inspiration URL: {url}")
        return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise."
    
    text = fetch_url_text(url)
    if not text:
        return "Use a light, personable tone when appropriate; avoid copying examples; keep it professional and concise."
    
    # Extract simple heuristics: look for words about humor/comedy/examples to craft meta-guidelines
    lower = text.lower()
    cues = []
    
    if "funny" in lower or "humor" in lower or "humour" in lower:
        cues.append("Incorporate subtle, tasteful humor without undermining professionalism.")
    if "cover letter" in lower:
        cues.append("Maintain standard cover letter structure (greeting, body, closing).")
    if "example" in lower or "examples" in lower:
        cues.append("Use the site as inspiration only; do not reuse sentences or unique phrasing.")
    
    cues.append("Focus on clarity, brevity, and role alignment; avoid clichés and excessive jokes.")
    
    return " ".join(cues)