shlaiagent / catalog_loader.py
Utkarsh430's picture
The app
4fe04aa verified
Raw
History Blame Contribute Delete
2.18 kB
"""
catalog_loader.py — Loads and validates the SHL catalog from disk.
Why a separate module?
Separates I/O from business logic. If the catalog source changes (e.g., live API
instead of JSON file), only this file changes; retrieval.py and agent.py are untouched.
Interview Q: "How would you scale to a live catalog?"
A: Replace load_catalog() with an HTTP fetch + TTL cache. The rest of the system
is unaware of the source.
Trade-off: We load the full catalog into memory at startup. At ~35 items this is trivial.
For a catalog with tens of thousands of items, a streaming/lazy approach would be needed.
"""
import json
import os
from typing import List, Dict, Any
# Resolve path relative to this file so the module works regardless of cwd.
_CATALOG_PATH = os.path.join(
os.path.dirname(__file__), "..", "data", "shl_catalog.json"
)
def load_catalog() -> List[Dict[str, Any]]:
"""
Load and return the SHL catalog as a list of dicts.
Raises FileNotFoundError if the catalog is missing (surfaces at startup, not
at request time — fail fast principle).
Each item is expected to have at minimum: name, url, test_type, description.
Extra fields (duration, languages, keys, seniority, domains) are used for
richer retrieval context but are optional.
"""
catalog_path = os.path.abspath(_CATALOG_PATH)
if not os.path.exists(catalog_path):
raise FileNotFoundError(
f"SHL catalog not found at {catalog_path}. "
"Ensure data/shl_catalog.json exists before starting the server."
)
with open(catalog_path, "r", encoding="utf-8") as f:
catalog = json.load(f)
if not isinstance(catalog, list) or len(catalog) == 0:
raise ValueError("Catalog must be a non-empty JSON array.")
# Basic validation: every item must have the four mandatory fields.
required_fields = {"name", "url", "test_type", "description"}
for i, item in enumerate(catalog):
missing = required_fields - set(item.keys())
if missing:
raise ValueError(
f"Catalog item {i} is missing required fields: {missing}"
)
return catalog