Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from typing import Dict, List, Optional, Tuple | |
| import math | |
| import re | |
| import os | |
| import httpx | |
| import time | |
| from threading import RLock | |
| from .cache import TTLCache | |
| # Simple FX rates; in production pull from a rates API | |
| FX_RATES = { | |
| ("GBP", "USD"): 1.27, | |
| ("GBP", "EUR"): 1.17, | |
| ("USD", "GBP"): 1/1.27, | |
| ("EUR", "GBP"): 1/1.17, | |
| } | |
| _SALARY_CACHE = TTLCache(ttl_seconds=3600, max_items=512) | |
| _INFLIGHT: Dict[str, float] = {} | |
| _LOCK = RLock() | |
| def _key(role: str, location: Optional[str], industry: Optional[str]) -> str: | |
| return f"{role.lower()}::{(location or '').lower()}::{(industry or '').lower()}" | |
| def _convert(amount: float, src: str, dst: str) -> float: | |
| if src == dst: | |
| return amount | |
| rate = FX_RATES.get((src, dst)) | |
| if rate is None: | |
| if src != "GBP" and dst != "GBP": | |
| to_gbp = FX_RATES.get((src, "GBP"), 1.0) | |
| from_gbp = FX_RATES.get(("GBP", dst), 1.0) | |
| return amount * to_gbp * from_gbp | |
| return amount | |
| return amount * rate | |
| def _parse_salaries(text: str) -> List[Tuple[float, str]]: | |
| patterns = [ | |
| (r"Β£\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "GBP"), | |
| (r"\$\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "USD"), | |
| (r"(?:EUR|β¬)\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "EUR"), | |
| ] | |
| found: List[Tuple[float, str]] = [] | |
| lower = text.lower() | |
| for pat, ccy in patterns: | |
| for m in re.finditer(pat, text): | |
| raw = m.group(1) | |
| try: | |
| if "," in raw: | |
| num = float(raw.replace(",", "")) | |
| else: | |
| num = float(raw) | |
| span = m.span() | |
| tail = lower[span[1]: span[1] + 2] | |
| if 'k' in tail: | |
| num *= 1000 | |
| if 20000 <= num <= 350000: | |
| found.append((num, ccy)) | |
| except Exception: | |
| continue | |
| return found | |
| def estimate_salary_range(role: str, location: Optional[str], industry: Optional[str], skills: List[str]) -> Dict[str, Dict[str, int]]: | |
| k = _key(role, location, industry) | |
| # Cache hit | |
| cached = _SALARY_CACHE.get(k) | |
| if cached is not None: | |
| return cached | |
| # Debounce: if a request is in-flight recently, wait a short time for result | |
| with _LOCK: | |
| now = time.time() | |
| last = _INFLIGHT.get(k) | |
| if last and now - last < 5.0: | |
| time.sleep(0.25) | |
| cached2 = _SALARY_CACHE.get(k) | |
| if cached2 is not None: | |
| return cached2 | |
| _INFLIGHT[k] = now | |
| def _fallback() -> Dict[str, Dict[str, int]]: | |
| base = 90000 if (location and location.lower().startswith("london")) else 110000 | |
| return _make_range(int(base * 0.8), int(base * 1.4)) | |
| query = f"salary {role} {location or ''} {industry or ''} base compensation annual" | |
| api_key = os.getenv("TAVILY_API_KEY") | |
| texts: List[str] = [] | |
| if api_key: | |
| backoff = 1.0 | |
| for attempt in range(3): | |
| try: | |
| payload = {"api_key": api_key, "query": query, "include_answer": True, "max_results": 6} | |
| with httpx.Client(timeout=20.0) as client: | |
| resp = client.post("https://api.tavily.com/search", json=payload) | |
| if resp.status_code == 200: | |
| data = resp.json() | |
| ans = data.get("answer") or "" | |
| if ans: | |
| texts.append(ans) | |
| for r in data.get("results", [])[:5]: | |
| c = r.get("content") or r.get("snippet") or "" | |
| if c: | |
| texts.append(c) | |
| break | |
| except Exception: | |
| time.sleep(backoff) | |
| backoff *= 2 | |
| # Fallback heuristics by location if search fails | |
| if not texts: | |
| result = _fallback() | |
| _SALARY_CACHE.set(k, result) | |
| with _LOCK: | |
| _INFLIGHT.pop(k, None) | |
| return result | |
| values_gbp: List[float] = [] | |
| for t in texts: | |
| for amount, ccy in _parse_salaries(t): | |
| if ccy != "GBP": | |
| amount = _convert(amount, ccy, "GBP") | |
| values_gbp.append(amount) | |
| values_gbp = [v for v in values_gbp if 20000 <= v <= 350000] | |
| if not values_gbp: | |
| result = _fallback() | |
| _SALARY_CACHE.set(k, result) | |
| with _LOCK: | |
| _INFLIGHT.pop(k, None) | |
| return result | |
| values_gbp.sort() | |
| n = len(values_gbp) | |
| start = int(n * 0.1) | |
| end = max(start + 1, int(n * 0.9)) | |
| trimmed = values_gbp[start:end] or values_gbp | |
| low = int(trimmed[0]) | |
| high = int(trimmed[-1]) | |
| if high - low < 15000: | |
| mid = (high + low) // 2 | |
| low = int(mid * 0.9) | |
| high = int(mid * 1.1) | |
| result = _make_range(low, high) | |
| _SALARY_CACHE.set(k, result) | |
| with _LOCK: | |
| _INFLIGHT.pop(k, None) | |
| return result | |
| def _make_range(low_gbp: int, high_gbp: int) -> Dict[str, Dict[str, int]]: | |
| low_gbp, high_gbp = sorted([low_gbp, high_gbp]) | |
| return { | |
| "GBP": {"low": low_gbp, "high": high_gbp}, | |
| "USD": {"low": int(_convert(low_gbp, "GBP", "USD")), "high": int(_convert(high_gbp, "GBP", "USD"))}, | |
| "EUR": {"low": int(_convert(low_gbp, "GBP", "EUR")), "high": int(_convert(high_gbp, "GBP", "EUR"))}, | |
| } |