"""
catalog_loader.py — Loads and validates the SHL catalog from disk.

Why a separate module?
  Separates I/O from business logic. If the catalog source changes (e.g., live API
  instead of JSON file), only this file changes; retrieval.py and agent.py are untouched.

Interview Q: "How would you scale to a live catalog?"
A: Replace load_catalog() with an HTTP fetch + TTL cache. The rest of the system
   is unaware of the source.

Trade-off: We load the full catalog into memory at startup. At ~35 items this is trivial.
For a catalog with tens of thousands of items, a streaming/lazy approach would be needed.
"""

import json
import os
from typing import List, Dict, Any

# Resolve path relative to this file so the module works regardless of cwd.
_CATALOG_PATH = os.path.join(
    os.path.dirname(__file__), "..", "data", "shl_catalog.json"
)


def load_catalog() -> List[Dict[str, Any]]:
    """
    Load and return the SHL catalog as a list of dicts.

    Raises FileNotFoundError if the catalog is missing (surfaces at startup, not
    at request time — fail fast principle).

    Each item is expected to have at minimum: name, url, test_type, description.
    Extra fields (duration, languages, keys, seniority, domains) are used for
    richer retrieval context but are optional.
    """
    catalog_path = os.path.abspath(_CATALOG_PATH)
    if not os.path.exists(catalog_path):
        raise FileNotFoundError(
            f"SHL catalog not found at {catalog_path}. "
            "Ensure data/shl_catalog.json exists before starting the server."
        )

    with open(catalog_path, "r", encoding="utf-8") as f:
        catalog = json.load(f)

    if not isinstance(catalog, list) or len(catalog) == 0:
        raise ValueError("Catalog must be a non-empty JSON array.")

    # Basic validation: every item must have the four mandatory fields.
    required_fields = {"name", "url", "test_type", "description"}
    for i, item in enumerate(catalog):
        missing = required_fields - set(item.keys())
        if missing:
            raise ValueError(
                f"Catalog item {i} is missing required fields: {missing}"
            )

    return catalog