Spaces:
Running
Running
| """ | |
| UNHCR Data Query Helper | |
| ======================== | |
| Used internally by Fugee's country_lookup tool. | |
| Queries the local processed JSON files β no network required at runtime. | |
| This is the query layer between the agent tools and the downloaded data. | |
| All functions return real data or raise DataNotAvailableError β never fake data. | |
| Co-authored-by: Codex <noreply@openai.com> | |
| """ | |
| import json | |
| from dataclasses import dataclass, field | |
| from functools import lru_cache | |
| from pathlib import Path | |
| from typing import Optional | |
| PROC_DIR = Path(__file__).parent.parent / "processed" | |
| class DataNotAvailableError(Exception): | |
| """Raised when data for a query cannot be found in local cache.""" | |
| pass | |
| # ββ Data loading (cached) βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _load(endpoint: str) -> list[dict]: | |
| p = PROC_DIR / f"{endpoint}.json" | |
| if not p.exists(): | |
| raise DataNotAvailableError( | |
| f"Data for '{endpoint}' not found. " | |
| f"Run: python3 data/scripts/unhcr_downloader.py --endpoint {endpoint}" | |
| ) | |
| data = json.loads(p.read_text()) | |
| return data.get("items", []) | |
| # ββ Country lookup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class CountryInfo: | |
| unhcr_code: str | |
| iso3: str | |
| name: str | |
| unhcr_region: str | |
| unsd_region: str | |
| def get_country(code: str) -> CountryInfo: | |
| """Look up a country by UNHCR code or ISO3 code.""" | |
| countries = _load("countries") | |
| code = code.upper() | |
| for c in countries: | |
| if c.get("code") == code or c.get("iso3") == code or c.get("name", "").lower() == code.lower(): | |
| return CountryInfo( | |
| unhcr_code=c.get("code", ""), | |
| iso3=c.get("iso3", ""), | |
| name=c.get("name", ""), | |
| unhcr_region=c.get("unhcr_region", {}).get("name", "") if isinstance(c.get("unhcr_region"), dict) else c.get("unhcr_region", ""), | |
| unsd_region=c.get("region", ""), | |
| ) | |
| raise DataNotAvailableError(f"Country not found: {code}") | |
| def list_african_asylum_countries() -> list[CountryInfo]: | |
| """Return all countries in UNHCR's African regions.""" | |
| african_regions = { | |
| "East and Horn of Africa", | |
| "West and Central Africa", | |
| "Southern Africa", | |
| "North Africa", | |
| "Great Lakes and Central Africa", | |
| } | |
| countries = _load("countries") | |
| results = [] | |
| for c in countries: | |
| region = c.get("unhcr_region", {}) | |
| region_name = region.get("name", "") if isinstance(region, dict) else str(region) | |
| if any(ar in region_name for ar in african_regions): | |
| results.append(CountryInfo( | |
| unhcr_code=c.get("code", ""), | |
| iso3=c.get("iso3", ""), | |
| name=c.get("name", ""), | |
| unhcr_region=region_name, | |
| unsd_region=c.get("region", ""), | |
| )) | |
| return results | |
| # ββ Acceptance rates ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class AcceptanceRate: | |
| coo: str # country of origin code | |
| coa: str # country of asylum code | |
| year: int | |
| recognised: int # granted Convention status | |
| complementary: int # granted complementary protection | |
| rejected: int | |
| otherwise_closed: int | |
| total_decisions: int | |
| recognition_rate: Optional[float] # recognised / total (UNHCR definition) | |
| total_protection_rate: Optional[float] # (recognised + complementary) / total | |
| def get_acceptance_rates( | |
| origin_code: str, | |
| asylum_code: str, | |
| year_from: int = 2018, | |
| year_to: int = 2025, | |
| ) -> list[AcceptanceRate]: | |
| """ | |
| Get historical acceptance rates for a specific origin Γ asylum pair. | |
| Returns list sorted by year ascending. | |
| """ | |
| decisions = _load("asylum-decisions") | |
| origin_code = origin_code.upper() | |
| asylum_code = asylum_code.upper() | |
| rates = [] | |
| for row in decisions: | |
| # Real API rows expose ISO3 as coo_iso/coa_iso (and UNHCR codes as | |
| # coo/coa); the older coo_code/coa_code keys do not exist. | |
| row_coo = (row.get("coo_iso") or row.get("coo") or row.get("coo_code") or "").upper() | |
| row_coa = (row.get("coa_iso") or row.get("coa") or row.get("coa_code") or "").upper() | |
| if row_coo != origin_code or row_coa != asylum_code: | |
| continue | |
| year = int(row.get("year", 0)) | |
| if not (year_from <= year <= year_to): | |
| continue | |
| recognised = int(row.get("dec_recognized", 0) or 0) | |
| complementary = int(row.get("dec_other", 0) or 0) # complementary protection | |
| rejected = int(row.get("dec_rejected", 0) or 0) | |
| closed = int(row.get("dec_closed", 0) or 0) | |
| total = int(row.get("dec_total", 0) or 0) | |
| rec_rate = round(recognised / total, 4) if total > 0 else None | |
| tpr = round((recognised + complementary) / total, 4) if total > 0 else None | |
| rates.append(AcceptanceRate( | |
| coo=origin_code, | |
| coa=asylum_code, | |
| year=year, | |
| recognised=recognised, | |
| complementary=complementary, | |
| rejected=rejected, | |
| otherwise_closed=closed, | |
| total_decisions=total, | |
| recognition_rate=rec_rate, | |
| total_protection_rate=tpr, | |
| )) | |
| return sorted(rates, key=lambda r: r.year) | |
| def latest_acceptance_rate(origin_code: str, asylum_code: str) -> Optional[AcceptanceRate]: | |
| """Return the most recent available acceptance rate for an origin Γ asylum pair.""" | |
| rates = get_acceptance_rates(origin_code, asylum_code) | |
| if not rates: | |
| return None | |
| return rates[-1] | |
| # ββ Population figures ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class PopulationFigure: | |
| coo: str | |
| coa: str | |
| year: int | |
| refugees: int | |
| asylum_seekers: int | |
| idps: int | |
| stateless: int | |
| total: int | |
| def get_population( | |
| origin_code: str, | |
| asylum_code: str, | |
| year: Optional[int] = None, | |
| ) -> list[PopulationFigure]: | |
| """Get population stock figures for an origin Γ asylum pair.""" | |
| pop = _load("population") | |
| origin_code = origin_code.upper() | |
| asylum_code = asylum_code.upper() | |
| results = [] | |
| for row in pop: | |
| if ( | |
| row.get("coo_code", "").upper() != origin_code | |
| or row.get("coa_code", "").upper() != asylum_code | |
| ): | |
| continue | |
| row_year = int(row.get("year", 0)) | |
| if year and row_year != year: | |
| continue | |
| refugees = int(row.get("refugees", 0) or 0) | |
| asylum_seekers = int(row.get("asylum_seekers", 0) or 0) | |
| idps = int(row.get("idps", 0) or 0) | |
| stateless = int(row.get("stateless", 0) or 0) | |
| results.append(PopulationFigure( | |
| coo=origin_code, | |
| coa=asylum_code, | |
| year=row_year, | |
| refugees=refugees, | |
| asylum_seekers=asylum_seekers, | |
| idps=idps, | |
| stateless=stateless, | |
| total=refugees + asylum_seekers, | |
| )) | |
| return sorted(results, key=lambda r: r.year) | |
| # ββ Applications ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_application_volume( | |
| origin_code: str, | |
| asylum_code: str, | |
| year_from: int = 2018, | |
| ) -> list[dict]: | |
| """How many people from origin X applied in asylum country Y per year.""" | |
| apps = _load("asylum-applications") | |
| origin_code = origin_code.upper() | |
| asylum_code = asylum_code.upper() | |
| results = [] | |
| for row in apps: | |
| if ( | |
| row.get("coo_code", "").upper() != origin_code | |
| or row.get("coa_code", "").upper() != asylum_code | |
| ): | |
| continue | |
| year = int(row.get("year", 0)) | |
| if year < year_from: | |
| continue | |
| results.append({ | |
| "year": year, | |
| "new_applications": int(row.get("applied", 0) or 0), | |
| "pending_start": int(row.get("asylum_seekers", 0) or 0), | |
| }) | |
| return sorted(results, key=lambda r: r["year"]) | |
| # ββ IDP context βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_idp_context(country_code: str, year: Optional[int] = None) -> list[dict]: | |
| """Internal displacement figures for an origin country.""" | |
| idmc = _load("idmc") | |
| country_code = country_code.upper() | |
| results = [] | |
| for row in idmc: | |
| if row.get("coo_code", "").upper() != country_code: | |
| continue | |
| row_year = int(row.get("year", 0)) | |
| if year and row_year != year: | |
| continue | |
| results.append({ | |
| "year": row_year, | |
| "idps_conflict": int(row.get("idps_conflict", 0) or 0), | |
| "idps_disasters": int(row.get("idps_disasters", 0) or 0), | |
| "total_idps": int(row.get("idps_conflict", 0) or 0) + int(row.get("idps_disasters", 0) or 0), | |
| }) | |
| return sorted(results, key=lambda r: r["year"]) | |
| # ββ Resettlement ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_resettlement(origin_code: str, year_from: int = 2018) -> list[dict]: | |
| """How many people from origin X were resettled globally.""" | |
| solutions = _load("solutions") | |
| origin_code = origin_code.upper() | |
| results = [] | |
| for row in solutions: | |
| if row.get("coo_code", "").upper() != origin_code: | |
| continue | |
| year = int(row.get("year", 0)) | |
| if year < year_from: | |
| continue | |
| resettled = int(row.get("resettlement", 0) or 0) | |
| if resettled > 0: | |
| results.append({ | |
| "year": year, | |
| "coa": row.get("coa_code", ""), | |
| "resettled": resettled, | |
| }) | |
| return sorted(results, key=lambda r: r["year"]) | |
| # ββ Footnotes / data quality ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_footnotes(country_code: str) -> list[str]: | |
| """Return any data quality notes for a country.""" | |
| try: | |
| notes = _load("footnotes") | |
| except DataNotAvailableError: | |
| return [] | |
| country_code = country_code.upper() | |
| return [ | |
| row.get("note", "") | |
| for row in notes | |
| if row.get("coa_code", "").upper() == country_code | |
| or row.get("coo_code", "").upper() == country_code | |
| if row.get("note") | |
| ] | |
| # ββ Composite: full country profile for Fugee βββββββββββββββββββββββββββββββ | |
| class CountryProfile: | |
| """Everything Fugee needs about a potential asylum country for a given origin.""" | |
| country: CountryInfo | |
| latest_acceptance_rate: Optional[AcceptanceRate] | |
| application_trend: list[dict] # last 5 years of applications | |
| current_population: Optional[PopulationFigure] | |
| origin_idp_context: list[dict] # IDP figures for origin country | |
| resettlement_available: bool | |
| data_footnotes: list[str] | |
| def build_country_profile( | |
| origin_code: str, | |
| asylum_code: str, | |
| ) -> CountryProfile: | |
| """ | |
| Build a full profile for asylum_code as a destination for someone from origin_code. | |
| Used by the country_lookup agent tool. | |
| Raises DataNotAvailableError if essential data is missing. | |
| """ | |
| country = get_country(asylum_code) | |
| acceptance = latest_acceptance_rate(origin_code, asylum_code) | |
| apps = get_application_volume(origin_code, asylum_code)[-5:] # last 5 years | |
| pop_series = get_population(origin_code, asylum_code) | |
| current_pop = pop_series[-1] if pop_series else None | |
| idp = get_idp_context(origin_code)[-3:] # last 3 years | |
| resettlement = get_resettlement(origin_code) | |
| footnotes = get_footnotes(asylum_code) | |
| return CountryProfile( | |
| country=country, | |
| latest_acceptance_rate=acceptance, | |
| application_trend=apps, | |
| current_population=current_pop, | |
| origin_idp_context=idp, | |
| resettlement_available=len(resettlement) > 0, | |
| data_footnotes=footnotes, | |
| ) | |