Spaces:
Running
Running
File size: 12,982 Bytes
944f1ac 237e510 944f1ac 7a05b1b 944f1ac 237e510 944f1ac 237e510 944f1ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 | """
UNHCR Data Query Helper
========================
Used internally by Fugee's country_lookup tool.
Queries the local processed JSON files β no network required at runtime.
This is the query layer between the agent tools and the downloaded data.
All functions return real data or raise DataNotAvailableError β never fake data.
Co-authored-by: Codex <noreply@openai.com>
"""
import json
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
from typing import Optional
PROC_DIR = Path(__file__).parent.parent / "processed"
class DataNotAvailableError(Exception):
"""Raised when data for a query cannot be found in local cache."""
pass
# ββ Data loading (cached) βββββββββββββββββββββββββββββββββββββββββββββββββββββ
@lru_cache(maxsize=16)
def _load(endpoint: str) -> list[dict]:
p = PROC_DIR / f"{endpoint}.json"
if not p.exists():
raise DataNotAvailableError(
f"Data for '{endpoint}' not found. "
f"Run: python3 data/scripts/unhcr_downloader.py --endpoint {endpoint}"
)
data = json.loads(p.read_text())
return data.get("items", [])
# ββ Country lookup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@dataclass
class CountryInfo:
unhcr_code: str
iso3: str
name: str
unhcr_region: str
unsd_region: str
def get_country(code: str) -> CountryInfo:
"""Look up a country by UNHCR code or ISO3 code."""
countries = _load("countries")
code = code.upper()
for c in countries:
if c.get("code") == code or c.get("iso3") == code or c.get("name", "").lower() == code.lower():
return CountryInfo(
unhcr_code=c.get("code", ""),
iso3=c.get("iso3", ""),
name=c.get("name", ""),
unhcr_region=c.get("unhcr_region", {}).get("name", "") if isinstance(c.get("unhcr_region"), dict) else c.get("unhcr_region", ""),
unsd_region=c.get("region", ""),
)
raise DataNotAvailableError(f"Country not found: {code}")
def list_african_asylum_countries() -> list[CountryInfo]:
"""Return all countries in UNHCR's African regions."""
african_regions = {
"East and Horn of Africa",
"West and Central Africa",
"Southern Africa",
"North Africa",
"Great Lakes and Central Africa",
}
countries = _load("countries")
results = []
for c in countries:
region = c.get("unhcr_region", {})
region_name = region.get("name", "") if isinstance(region, dict) else str(region)
if any(ar in region_name for ar in african_regions):
results.append(CountryInfo(
unhcr_code=c.get("code", ""),
iso3=c.get("iso3", ""),
name=c.get("name", ""),
unhcr_region=region_name,
unsd_region=c.get("region", ""),
))
return results
# ββ Acceptance rates ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@dataclass
class AcceptanceRate:
coo: str # country of origin code
coa: str # country of asylum code
year: int
recognised: int # granted Convention status
complementary: int # granted complementary protection
rejected: int
otherwise_closed: int
total_decisions: int
recognition_rate: Optional[float] # recognised / total (UNHCR definition)
total_protection_rate: Optional[float] # (recognised + complementary) / total
def get_acceptance_rates(
origin_code: str,
asylum_code: str,
year_from: int = 2018,
year_to: int = 2025,
) -> list[AcceptanceRate]:
"""
Get historical acceptance rates for a specific origin Γ asylum pair.
Returns list sorted by year ascending.
"""
decisions = _load("asylum-decisions")
origin_code = origin_code.upper()
asylum_code = asylum_code.upper()
rates = []
for row in decisions:
# Real API rows expose ISO3 as coo_iso/coa_iso (and UNHCR codes as
# coo/coa); the older coo_code/coa_code keys do not exist.
row_coo = (row.get("coo_iso") or row.get("coo") or row.get("coo_code") or "").upper()
row_coa = (row.get("coa_iso") or row.get("coa") or row.get("coa_code") or "").upper()
if row_coo != origin_code or row_coa != asylum_code:
continue
year = int(row.get("year", 0))
if not (year_from <= year <= year_to):
continue
recognised = int(row.get("dec_recognized", 0) or 0)
complementary = int(row.get("dec_other", 0) or 0) # complementary protection
rejected = int(row.get("dec_rejected", 0) or 0)
closed = int(row.get("dec_closed", 0) or 0)
total = int(row.get("dec_total", 0) or 0)
rec_rate = round(recognised / total, 4) if total > 0 else None
tpr = round((recognised + complementary) / total, 4) if total > 0 else None
rates.append(AcceptanceRate(
coo=origin_code,
coa=asylum_code,
year=year,
recognised=recognised,
complementary=complementary,
rejected=rejected,
otherwise_closed=closed,
total_decisions=total,
recognition_rate=rec_rate,
total_protection_rate=tpr,
))
return sorted(rates, key=lambda r: r.year)
def latest_acceptance_rate(origin_code: str, asylum_code: str) -> Optional[AcceptanceRate]:
"""Return the most recent available acceptance rate for an origin Γ asylum pair."""
rates = get_acceptance_rates(origin_code, asylum_code)
if not rates:
return None
return rates[-1]
# ββ Population figures ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@dataclass
class PopulationFigure:
coo: str
coa: str
year: int
refugees: int
asylum_seekers: int
idps: int
stateless: int
total: int
def get_population(
origin_code: str,
asylum_code: str,
year: Optional[int] = None,
) -> list[PopulationFigure]:
"""Get population stock figures for an origin Γ asylum pair."""
pop = _load("population")
origin_code = origin_code.upper()
asylum_code = asylum_code.upper()
results = []
for row in pop:
if (
row.get("coo_code", "").upper() != origin_code
or row.get("coa_code", "").upper() != asylum_code
):
continue
row_year = int(row.get("year", 0))
if year and row_year != year:
continue
refugees = int(row.get("refugees", 0) or 0)
asylum_seekers = int(row.get("asylum_seekers", 0) or 0)
idps = int(row.get("idps", 0) or 0)
stateless = int(row.get("stateless", 0) or 0)
results.append(PopulationFigure(
coo=origin_code,
coa=asylum_code,
year=row_year,
refugees=refugees,
asylum_seekers=asylum_seekers,
idps=idps,
stateless=stateless,
total=refugees + asylum_seekers,
))
return sorted(results, key=lambda r: r.year)
# ββ Applications ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_application_volume(
origin_code: str,
asylum_code: str,
year_from: int = 2018,
) -> list[dict]:
"""How many people from origin X applied in asylum country Y per year."""
apps = _load("asylum-applications")
origin_code = origin_code.upper()
asylum_code = asylum_code.upper()
results = []
for row in apps:
if (
row.get("coo_code", "").upper() != origin_code
or row.get("coa_code", "").upper() != asylum_code
):
continue
year = int(row.get("year", 0))
if year < year_from:
continue
results.append({
"year": year,
"new_applications": int(row.get("applied", 0) or 0),
"pending_start": int(row.get("asylum_seekers", 0) or 0),
})
return sorted(results, key=lambda r: r["year"])
# ββ IDP context βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_idp_context(country_code: str, year: Optional[int] = None) -> list[dict]:
"""Internal displacement figures for an origin country."""
idmc = _load("idmc")
country_code = country_code.upper()
results = []
for row in idmc:
if row.get("coo_code", "").upper() != country_code:
continue
row_year = int(row.get("year", 0))
if year and row_year != year:
continue
results.append({
"year": row_year,
"idps_conflict": int(row.get("idps_conflict", 0) or 0),
"idps_disasters": int(row.get("idps_disasters", 0) or 0),
"total_idps": int(row.get("idps_conflict", 0) or 0) + int(row.get("idps_disasters", 0) or 0),
})
return sorted(results, key=lambda r: r["year"])
# ββ Resettlement ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_resettlement(origin_code: str, year_from: int = 2018) -> list[dict]:
"""How many people from origin X were resettled globally."""
solutions = _load("solutions")
origin_code = origin_code.upper()
results = []
for row in solutions:
if row.get("coo_code", "").upper() != origin_code:
continue
year = int(row.get("year", 0))
if year < year_from:
continue
resettled = int(row.get("resettlement", 0) or 0)
if resettled > 0:
results.append({
"year": year,
"coa": row.get("coa_code", ""),
"resettled": resettled,
})
return sorted(results, key=lambda r: r["year"])
# ββ Footnotes / data quality ββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_footnotes(country_code: str) -> list[str]:
"""Return any data quality notes for a country."""
try:
notes = _load("footnotes")
except DataNotAvailableError:
return []
country_code = country_code.upper()
return [
row.get("note", "")
for row in notes
if row.get("coa_code", "").upper() == country_code
or row.get("coo_code", "").upper() == country_code
if row.get("note")
]
# ββ Composite: full country profile for Fugee βββββββββββββββββββββββββββββββ
@dataclass
class CountryProfile:
"""Everything Fugee needs about a potential asylum country for a given origin."""
country: CountryInfo
latest_acceptance_rate: Optional[AcceptanceRate]
application_trend: list[dict] # last 5 years of applications
current_population: Optional[PopulationFigure]
origin_idp_context: list[dict] # IDP figures for origin country
resettlement_available: bool
data_footnotes: list[str]
def build_country_profile(
origin_code: str,
asylum_code: str,
) -> CountryProfile:
"""
Build a full profile for asylum_code as a destination for someone from origin_code.
Used by the country_lookup agent tool.
Raises DataNotAvailableError if essential data is missing.
"""
country = get_country(asylum_code)
acceptance = latest_acceptance_rate(origin_code, asylum_code)
apps = get_application_volume(origin_code, asylum_code)[-5:] # last 5 years
pop_series = get_population(origin_code, asylum_code)
current_pop = pop_series[-1] if pop_series else None
idp = get_idp_context(origin_code)[-3:] # last 3 years
resettlement = get_resettlement(origin_code)
footnotes = get_footnotes(asylum_code)
return CountryProfile(
country=country,
latest_acceptance_rate=acceptance,
application_trend=apps,
current_population=current_pop,
origin_idp_context=idp,
resettlement_available=len(resettlement) > 0,
data_footnotes=footnotes,
)
|