from __future__ import annotations from typing import Dict, List, Optional, Tuple import math import re import os import httpx import time from threading import RLock from .cache import TTLCache # Simple FX rates; in production pull from a rates API FX_RATES = { ("GBP", "USD"): 1.27, ("GBP", "EUR"): 1.17, ("USD", "GBP"): 1/1.27, ("EUR", "GBP"): 1/1.17, } _SALARY_CACHE = TTLCache(ttl_seconds=3600, max_items=512) _INFLIGHT: Dict[str, float] = {} _LOCK = RLock() def _key(role: str, location: Optional[str], industry: Optional[str]) -> str: return f"{role.lower()}::{(location or '').lower()}::{(industry or '').lower()}" def _convert(amount: float, src: str, dst: str) -> float: if src == dst: return amount rate = FX_RATES.get((src, dst)) if rate is None: if src != "GBP" and dst != "GBP": to_gbp = FX_RATES.get((src, "GBP"), 1.0) from_gbp = FX_RATES.get(("GBP", dst), 1.0) return amount * to_gbp * from_gbp return amount return amount * rate def _parse_salaries(text: str) -> List[Tuple[float, str]]: patterns = [ (r"£\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "GBP"), (r"\$\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "USD"), (r"(?:EUR|€)\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "EUR"), ] found: List[Tuple[float, str]] = [] lower = text.lower() for pat, ccy in patterns: for m in re.finditer(pat, text): raw = m.group(1) try: if "," in raw: num = float(raw.replace(",", "")) else: num = float(raw) span = m.span() tail = lower[span[1]: span[1] + 2] if 'k' in tail: num *= 1000 if 20000 <= num <= 350000: found.append((num, ccy)) except Exception: continue return found def estimate_salary_range(role: str, location: Optional[str], industry: Optional[str], skills: List[str]) -> Dict[str, Dict[str, int]]: k = _key(role, location, industry) # Cache hit cached = _SALARY_CACHE.get(k) if cached is not None: return cached # Debounce: if a request is in-flight recently, wait a short time for result with _LOCK: now = time.time() last = _INFLIGHT.get(k) if last and now - last < 5.0: time.sleep(0.25) cached2 = _SALARY_CACHE.get(k) if cached2 is not None: return cached2 _INFLIGHT[k] = now def _fallback() -> Dict[str, Dict[str, int]]: base = 90000 if (location and location.lower().startswith("london")) else 110000 return _make_range(int(base * 0.8), int(base * 1.4)) query = f"salary {role} {location or ''} {industry or ''} base compensation annual" api_key = os.getenv("TAVILY_API_KEY") texts: List[str] = [] if api_key: backoff = 1.0 for attempt in range(3): try: payload = {"api_key": api_key, "query": query, "include_answer": True, "max_results": 6} with httpx.Client(timeout=20.0) as client: resp = client.post("https://api.tavily.com/search", json=payload) if resp.status_code == 200: data = resp.json() ans = data.get("answer") or "" if ans: texts.append(ans) for r in data.get("results", [])[:5]: c = r.get("content") or r.get("snippet") or "" if c: texts.append(c) break except Exception: time.sleep(backoff) backoff *= 2 # Fallback heuristics by location if search fails if not texts: result = _fallback() _SALARY_CACHE.set(k, result) with _LOCK: _INFLIGHT.pop(k, None) return result values_gbp: List[float] = [] for t in texts: for amount, ccy in _parse_salaries(t): if ccy != "GBP": amount = _convert(amount, ccy, "GBP") values_gbp.append(amount) values_gbp = [v for v in values_gbp if 20000 <= v <= 350000] if not values_gbp: result = _fallback() _SALARY_CACHE.set(k, result) with _LOCK: _INFLIGHT.pop(k, None) return result values_gbp.sort() n = len(values_gbp) start = int(n * 0.1) end = max(start + 1, int(n * 0.9)) trimmed = values_gbp[start:end] or values_gbp low = int(trimmed[0]) high = int(trimmed[-1]) if high - low < 15000: mid = (high + low) // 2 low = int(mid * 0.9) high = int(mid * 1.1) result = _make_range(low, high) _SALARY_CACHE.set(k, result) with _LOCK: _INFLIGHT.pop(k, None) return result def _make_range(low_gbp: int, high_gbp: int) -> Dict[str, Dict[str, int]]: low_gbp, high_gbp = sorted([low_gbp, high_gbp]) return { "GBP": {"low": low_gbp, "high": high_gbp}, "USD": {"low": int(_convert(low_gbp, "GBP", "USD")), "high": int(_convert(high_gbp, "GBP", "USD"))}, "EUR": {"low": int(_convert(low_gbp, "GBP", "EUR")), "high": int(_convert(high_gbp, "GBP", "EUR"))}, }