Spaces:
Runtime error
Runtime error
File size: 5,595 Bytes
7498f2c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | from __future__ import annotations
from typing import Dict, List, Optional, Tuple
import math
import re
import os
import httpx
import time
from threading import RLock
from .cache import TTLCache
# Simple FX rates; in production pull from a rates API
FX_RATES = {
("GBP", "USD"): 1.27,
("GBP", "EUR"): 1.17,
("USD", "GBP"): 1/1.27,
("EUR", "GBP"): 1/1.17,
}
_SALARY_CACHE = TTLCache(ttl_seconds=3600, max_items=512)
_INFLIGHT: Dict[str, float] = {}
_LOCK = RLock()
def _key(role: str, location: Optional[str], industry: Optional[str]) -> str:
return f"{role.lower()}::{(location or '').lower()}::{(industry or '').lower()}"
def _convert(amount: float, src: str, dst: str) -> float:
if src == dst:
return amount
rate = FX_RATES.get((src, dst))
if rate is None:
if src != "GBP" and dst != "GBP":
to_gbp = FX_RATES.get((src, "GBP"), 1.0)
from_gbp = FX_RATES.get(("GBP", dst), 1.0)
return amount * to_gbp * from_gbp
return amount
return amount * rate
def _parse_salaries(text: str) -> List[Tuple[float, str]]:
patterns = [
(r"£\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "GBP"),
(r"\$\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "USD"),
(r"(?:EUR|€)\s?([0-9]{2,3}(?:[,][0-9]{3})?|[0-9]{2,3})\s?(?:k|,\d{3})?", "EUR"),
]
found: List[Tuple[float, str]] = []
lower = text.lower()
for pat, ccy in patterns:
for m in re.finditer(pat, text):
raw = m.group(1)
try:
if "," in raw:
num = float(raw.replace(",", ""))
else:
num = float(raw)
span = m.span()
tail = lower[span[1]: span[1] + 2]
if 'k' in tail:
num *= 1000
if 20000 <= num <= 350000:
found.append((num, ccy))
except Exception:
continue
return found
def estimate_salary_range(role: str, location: Optional[str], industry: Optional[str], skills: List[str]) -> Dict[str, Dict[str, int]]:
k = _key(role, location, industry)
# Cache hit
cached = _SALARY_CACHE.get(k)
if cached is not None:
return cached
# Debounce: if a request is in-flight recently, wait a short time for result
with _LOCK:
now = time.time()
last = _INFLIGHT.get(k)
if last and now - last < 5.0:
time.sleep(0.25)
cached2 = _SALARY_CACHE.get(k)
if cached2 is not None:
return cached2
_INFLIGHT[k] = now
def _fallback() -> Dict[str, Dict[str, int]]:
base = 90000 if (location and location.lower().startswith("london")) else 110000
return _make_range(int(base * 0.8), int(base * 1.4))
query = f"salary {role} {location or ''} {industry or ''} base compensation annual"
api_key = os.getenv("TAVILY_API_KEY")
texts: List[str] = []
if api_key:
backoff = 1.0
for attempt in range(3):
try:
payload = {"api_key": api_key, "query": query, "include_answer": True, "max_results": 6}
with httpx.Client(timeout=20.0) as client:
resp = client.post("https://api.tavily.com/search", json=payload)
if resp.status_code == 200:
data = resp.json()
ans = data.get("answer") or ""
if ans:
texts.append(ans)
for r in data.get("results", [])[:5]:
c = r.get("content") or r.get("snippet") or ""
if c:
texts.append(c)
break
except Exception:
time.sleep(backoff)
backoff *= 2
# Fallback heuristics by location if search fails
if not texts:
result = _fallback()
_SALARY_CACHE.set(k, result)
with _LOCK:
_INFLIGHT.pop(k, None)
return result
values_gbp: List[float] = []
for t in texts:
for amount, ccy in _parse_salaries(t):
if ccy != "GBP":
amount = _convert(amount, ccy, "GBP")
values_gbp.append(amount)
values_gbp = [v for v in values_gbp if 20000 <= v <= 350000]
if not values_gbp:
result = _fallback()
_SALARY_CACHE.set(k, result)
with _LOCK:
_INFLIGHT.pop(k, None)
return result
values_gbp.sort()
n = len(values_gbp)
start = int(n * 0.1)
end = max(start + 1, int(n * 0.9))
trimmed = values_gbp[start:end] or values_gbp
low = int(trimmed[0])
high = int(trimmed[-1])
if high - low < 15000:
mid = (high + low) // 2
low = int(mid * 0.9)
high = int(mid * 1.1)
result = _make_range(low, high)
_SALARY_CACHE.set(k, result)
with _LOCK:
_INFLIGHT.pop(k, None)
return result
def _make_range(low_gbp: int, high_gbp: int) -> Dict[str, Dict[str, int]]:
low_gbp, high_gbp = sorted([low_gbp, high_gbp])
return {
"GBP": {"low": low_gbp, "high": high_gbp},
"USD": {"low": int(_convert(low_gbp, "GBP", "USD")), "high": int(_convert(high_gbp, "GBP", "USD"))},
"EUR": {"low": int(_convert(low_gbp, "GBP", "EUR")), "high": int(_convert(high_gbp, "GBP", "EUR"))},
} |