Spaces:
Build error
Build error
| """ | |
| catalog_loader.py — Loads and validates the SHL catalog from disk. | |
| Why a separate module? | |
| Separates I/O from business logic. If the catalog source changes (e.g., live API | |
| instead of JSON file), only this file changes; retrieval.py and agent.py are untouched. | |
| Interview Q: "How would you scale to a live catalog?" | |
| A: Replace load_catalog() with an HTTP fetch + TTL cache. The rest of the system | |
| is unaware of the source. | |
| Trade-off: We load the full catalog into memory at startup. At ~35 items this is trivial. | |
| For a catalog with tens of thousands of items, a streaming/lazy approach would be needed. | |
| """ | |
| import json | |
| import os | |
| from typing import List, Dict, Any | |
| # Resolve path relative to this file so the module works regardless of cwd. | |
| _CATALOG_PATH = os.path.join( | |
| os.path.dirname(__file__), "..", "data", "shl_catalog.json" | |
| ) | |
| def load_catalog() -> List[Dict[str, Any]]: | |
| """ | |
| Load and return the SHL catalog as a list of dicts. | |
| Raises FileNotFoundError if the catalog is missing (surfaces at startup, not | |
| at request time — fail fast principle). | |
| Each item is expected to have at minimum: name, url, test_type, description. | |
| Extra fields (duration, languages, keys, seniority, domains) are used for | |
| richer retrieval context but are optional. | |
| """ | |
| catalog_path = os.path.abspath(_CATALOG_PATH) | |
| if not os.path.exists(catalog_path): | |
| raise FileNotFoundError( | |
| f"SHL catalog not found at {catalog_path}. " | |
| "Ensure data/shl_catalog.json exists before starting the server." | |
| ) | |
| with open(catalog_path, "r", encoding="utf-8") as f: | |
| catalog = json.load(f) | |
| if not isinstance(catalog, list) or len(catalog) == 0: | |
| raise ValueError("Catalog must be a non-empty JSON array.") | |
| # Basic validation: every item must have the four mandatory fields. | |
| required_fields = {"name", "url", "test_type", "description"} | |
| for i, item in enumerate(catalog): | |
| missing = required_fields - set(item.keys()) | |
| if missing: | |
| raise ValueError( | |
| f"Catalog item {i} is missing required fields: {missing}" | |
| ) | |
| return catalog | |