""" UNHCR Data Refresh Script ========================== Run this to update the local UNHCR data cache with the latest figures. Schedule: run once after the hackathon, then annually (UNHCR updates mid-year). For nowcasting: run monthly if needed. Usage ----- python3 refresh_data.py # refresh all annual endpoints python3 refresh_data.py --full # re-download everything including raw python3 refresh_data.py --nowcast # refresh only nowcasting (monthly data) python3 refresh_data.py --check # check what data exists and how old it is Output ------ Prints a table of each endpoint, record count, last download date, staleness. Updates data/processed/*.json and data/processed/metadata.json. Co-authored-by: Codex """ import argparse import json import subprocess import sys from datetime import datetime, timezone from pathlib import Path PROC_DIR = Path(__file__).parent.parent / "processed" DOWNLOADER = Path(__file__).parent / "unhcr_downloader.py" # How many days before we consider data stale STALE_DAYS = { "annual": 366, "monthly": 31, } def load_manifest() -> dict: p = PROC_DIR / "metadata.json" if not p.exists(): return {} return json.loads(p.read_text()) def check_staleness(): manifest = load_manifest() downloads = {d["endpoint"]: d for d in manifest.get("downloads", [])} print(f"\n{'Endpoint':<25} {'Records':>8} {'Downloaded':<22} {'Status'}") print("-" * 75) from unhcr_downloader import ENDPOINTS # noqa: E402 now = datetime.now(timezone.utc) for name, cfg in ENDPOINTS.items(): if name in downloads: d = downloads[name] ts = datetime.fromisoformat(d["downloaded_at"].replace("Z", "+00:00")) age_days = (now - ts).days threshold = STALE_DAYS.get(cfg["refresh_frequency"], 366) status = "✅ fresh" if age_days <= threshold else f"⚠️ stale ({age_days}d)" records = d.get("records", "?") print(f"{name:<25} {records:>8} {d['downloaded_at']:<22} {status}") else: print(f"{name:<25} {'—':>8} {'not downloaded':<22} ❌ missing") print() def run_downloader(args: list[str]): cmd = [sys.executable, str(DOWNLOADER)] + args print(f"Running: {' '.join(cmd)}\n") result = subprocess.run(cmd) return result.returncode def main(): parser = argparse.ArgumentParser(description="Refresh UNHCR data for Fugee.") parser.add_argument("--full", action="store_true", help="Re-download all endpoints") parser.add_argument("--nowcast", action="store_true", help="Refresh nowcasting only") parser.add_argument("--check", action="store_true", help="Check data freshness and exit") args = parser.parse_args() if args.check: check_staleness() return if args.nowcast: run_downloader(["--endpoint", "nowcasting"]) return if args.full: run_downloader(["--all"]) return # Default: refresh annual endpoints + check staleness after check_staleness() print("Refreshing annual endpoints (this may take several minutes)...\n") rc = run_downloader(["--refresh"]) if rc == 0: print("\nRefresh complete. Updated manifest:\n") check_staleness() else: print(f"\nRefresh exited with code {rc}. Check logs above.") if __name__ == "__main__": main()