"""Data fetcher service — mirrors PHP fetchers.php. Fetches handbook data from the two external JSON APIs (source of truth), normalises the payloads, and returns typed dicts identical to what the PHP code produced. """ from __future__ import annotations import json import logging from typing import Any import httpx from app.core.config import get_settings logger = logging.getLogger(__name__) def _normalize_section_json(raw: Any, context: str, sid: Any = None) -> dict | list: """Mirrors PHP handbook_normalize_section_json.""" if isinstance(raw, dict) or isinstance(raw, list): return raw if isinstance(raw, str): raw = raw.strip() if not raw: return {} try: decoded = json.loads(raw) if isinstance(decoded, (dict, list)): return decoded except (json.JSONDecodeError, ValueError): logger.warning( "section_json parse failed ctx=%s id=%s snippet=%.180s", context, sid, raw, ) return {} return {} def _is_truthy(val: Any) -> bool: """Mirrors PHP handbook_true.""" if isinstance(val, bool): return val if isinstance(val, int): return val != 0 s = str(val).lower().strip() return s not in ("0", "false", "") def _tier_section_rank(section_key: str) -> int: """Return sort priority for tier-related section keys. Tier One sections sort before Tier Two; non-tier sections get 99 (neutral). """ k = section_key.lower().replace("-", "_").replace(" ", "_") if "tier_one" in k or "non_cosigner" in k: return 0 if "tier_two" in k or k in ("cosigner_schools", "cosigner"): return 1 return 99 def _sort_sections_stable(sections: list[dict]) -> list[dict]: """Mirrors PHP sortHandbookSectionsStable with tier-aware tiebreaker.""" for i, s in enumerate(sections): s.setdefault("_i", i) def sort_key(s: dict): so = s.get("sort_order") sid = s.get("id") # None values sort after numeric values so_key = (0, so) if so is not None else (1, 0) # Tier-aware tiebreaker: Tier One before Tier Two when sort_order ties tier_rank = _tier_section_rank(str(s.get("section_key", ""))) sid_key = (0, sid) if sid is not None else (1, 0) return (so_key, tier_rank, sid_key, s.get("_i", 0)) sections.sort(key=sort_key) for s in sections: s.pop("_i", None) return sections async def fetch_global_sections(catalog_id: int = 0) -> list[dict[str, Any]]: """Fetch and normalise global handbook sections from the external API. Mirrors PHP fetchGlobalSections(). """ settings = get_settings() url = settings.general_endpoint_url if catalog_id: sep = "&" if "?" in url else "?" url += f"{sep}catalog_id={catalog_id}" try: async with httpx.AsyncClient(verify=False, timeout=settings.http_timeout) as client: resp = await client.get(url) resp.raise_for_status() payload = resp.json() except Exception as exc: logger.error("Global sections fetch failed: %s url=%s", exc, url) return [] if not payload.get("ok"): logger.warning("Global sections API returned ok=false: %s", payload) return [] # Accept common shapes sections_raw = ( payload.get("general_sections") or payload.get("sections") or payload.get("globals") or payload.get("data") or [] ) if not isinstance(sections_raw, list): sections_raw = [] out: list[dict[str, Any]] = [] for i, s in enumerate(sections_raw): if not isinstance(s, dict): continue k = str(s.get("section_key", "")) t = str(s.get("section_title", "")) j = _normalize_section_json(s.get("section_json", {}), "global", s.get("id")) sort_raw = s.get("sort_order") or s.get("sortOrder") sort_val = int(sort_raw) if sort_raw is not None and str(sort_raw).lstrip("-").isdigit() else None if not k and not t and (not j or j == {}): continue out.append({ "section_key": k, "section_title": t, "section_json": j, "sort_order": sort_val, "id": int(s["id"]) if s.get("id") is not None else None, "_i": i, }) out = _sort_sections_stable(out) logger.info( "Global sections fetched catalog_id=%d count=%d keys=%s", catalog_id, len(out), [s.get("section_key") for s in out], ) return out async def fetch_university_sections() -> dict[int, dict[str, Any]]: """Fetch and normalise university handbook sections. Returns dict keyed by university_id. Mirrors PHP fetchUniversitySections(). """ settings = get_settings() url = settings.university_endpoint_url try: async with httpx.AsyncClient(verify=False, timeout=settings.http_timeout) as client: resp = await client.get(url) resp.raise_for_status() payload = resp.json() except Exception as exc: logger.error("University sections fetch failed: %s url=%s", exc, url) return {} if not payload.get("ok"): logger.warning("University sections API returned ok=false") return {} universities = payload.get("universities", []) if not isinstance(universities, list): universities = [] by_uni: dict[int, dict[str, Any]] = {} for u in universities: if not isinstance(u, dict): continue uid = int(u.get("university_id", 0)) if uid <= 0: continue name = str(u.get("university_name", f"University #{uid}")) is_active_raw = u.get("is_active", u.get("isActive", 1)) website = str(u.get("website", u.get("website_url", ""))) is_active = _is_truthy(is_active_raw) sections_raw = u.get("sections", []) if not isinstance(sections_raw, list): sections_raw = [] norm_sections: list[dict[str, Any]] = [] for s in sections_raw: if not isinstance(s, dict): continue k = str(s.get("section_key", "")) t = str(s.get("section_title", "")) j = _normalize_section_json(s.get("section_json", {}), "university", s.get("id")) if not k and not t and (not j or j == {}): continue norm_sections.append({ "section_key": k, "section_title": t, "section_json": j, }) # Derive tier from school_category (backward-compatible — older APIs may omit these) school_category = str(u.get("school_category", "")).strip() tier = u.get("tier") tier_label = u.get("tier_label", "") if tier is None and school_category: # Derive from school_category if tier not explicitly provided if school_category == "non_cosigner": tier, tier_label = 1, "Tier One" elif school_category == "cosigner": tier, tier_label = 2, "Tier Two" by_uni[uid] = { "university_name": name, "sections": norm_sections, "is_active": is_active, "website": website, "school_category": school_category, "tier": tier, "tier_label": tier_label or "", } # Sort: Tier One (non_cosigner) first, then Tier Two (cosigner), then by name def _uni_sort_key(item: tuple[int, dict]) -> tuple: uid, data = item t = data.get("tier") tier_rank = t if isinstance(t, int) else 99 return (tier_rank, data.get("university_name", "").lower(), uid) return dict(sorted(by_uni.items(), key=_uni_sort_key))