Spaces:
Build error
Build error
| """ | |
| main.py — FastAPI application entry point. | |
| Why FastAPI? | |
| - Native async support, Pydantic integration, automatic OpenAPI docs. | |
| - Extremely low boilerplate for typed REST APIs. | |
| - Standard choice for Python ML/LLM API services. | |
| Startup pattern: we load the catalog and build/load the index once at startup using | |
| FastAPI's lifespan context manager (the modern replacement for @app.on_event("startup")). | |
| All request handlers then receive these pre-loaded objects via app.state — no global | |
| variables, no singleton anti-patterns. | |
| Interview Q: "Why app.state instead of module-level globals?" | |
| A: Module-level globals can't be easily mocked in tests, and their initialization order | |
| is implicit. app.state is explicit, testable, and scoped to the application instance. | |
| Interview Q: "How do you make this horizontally scalable?" | |
| A: The service is stateless — no per-user data is stored. Multiple instances can run | |
| behind a load balancer with no sticky sessions required. | |
| """ | |
| import os | |
| import logging | |
| from contextlib import asynccontextmanager | |
| from fastapi import FastAPI, HTTPException, Request | |
| from fastapi.responses import JSONResponse | |
| from .schemas import ChatRequest, ChatResponse | |
| from .catalog_loader import load_catalog | |
| from .retrieval import get_or_build_index | |
| from .agent import run_agent | |
| # --------------------------------------------------------------------------- | |
| # Logging: structured logs to stdout so HF Spaces / Docker captures them. | |
| # --------------------------------------------------------------------------- | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Lifespan: load all expensive artifacts once at startup. | |
| # --------------------------------------------------------------------------- | |
| async def lifespan(app: FastAPI): | |
| """ | |
| FastAPI lifespan context manager. | |
| Everything before `yield` runs at startup; after yield runs at shutdown. | |
| We load the catalog and build/load the TF-IDF index here so that: | |
| 1. Startup failures are loud and immediate (not silent until first request). | |
| 2. Request handlers pay zero I/O cost. | |
| """ | |
| logger.info("Loading SHL catalog...") | |
| catalog = load_catalog() | |
| logger.info(f"Catalog loaded: {len(catalog)} items.") | |
| logger.info("Building/loading TF-IDF index...") | |
| vectorizer, tfidf_matrix = get_or_build_index(catalog) | |
| logger.info("Index ready.") | |
| # Build a URL set for O(1) hallucination checks in the agent. | |
| catalog_url_set = {item["url"] for item in catalog} | |
| # Store on app.state so all request handlers can access without globals. | |
| app.state.catalog = catalog | |
| app.state.catalog_url_set = catalog_url_set | |
| app.state.vectorizer = vectorizer | |
| app.state.tfidf_matrix = tfidf_matrix | |
| logger.info("SHL Agent ready.") | |
| yield | |
| # Shutdown: nothing to clean up (no DB connections, no file handles). | |
| logger.info("Shutting down SHL Agent.") | |
| # --------------------------------------------------------------------------- | |
| # Application instance | |
| # --------------------------------------------------------------------------- | |
| app = FastAPI( | |
| title="SHL Assessment Recommendation Agent", | |
| description=( | |
| "Conversational agent for recommending SHL psychometric assessments. " | |
| "Stateless API — send full conversation history on every POST /chat call." | |
| ), | |
| version="1.0.0", | |
| lifespan=lifespan, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Global exception handler: returns JSON (not HTML) for unexpected errors. | |
| # This is important for automated evaluators that expect JSON responses. | |
| # --------------------------------------------------------------------------- | |
| async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse: | |
| logger.error(f"Unhandled exception: {exc}", exc_info=True) | |
| return JSONResponse( | |
| status_code=500, | |
| content={"detail": "Internal server error. Please check server logs."}, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Routes | |
| # --------------------------------------------------------------------------- | |
| async def health() -> dict: | |
| """ | |
| Health check endpoint. | |
| Returns {"status": "ok"} when the service is running and catalog is loaded. | |
| Design: this is the canonical liveness probe for HF Spaces and load balancers. | |
| We intentionally don't check the LLM API here — that would make health checks | |
| flaky and expensive. LLM availability is tested at the first /chat call. | |
| """ | |
| return {"status": "ok"} | |
| async def chat(request: ChatRequest, req: Request) -> ChatResponse: | |
| """ | |
| Main conversational endpoint. | |
| Accepts: full conversation history (stateless — caller owns state). | |
| Returns: reply, recommendations (0–10 items), end_of_conversation flag. | |
| Error handling: | |
| - Pydantic validates the request shape; FastAPI returns 422 on invalid input. | |
| - We catch ValueError (e.g., empty messages) and return 400. | |
| - anthropic.APIError is caught and returned as 502 (upstream failure). | |
| - All other exceptions bubble to the global handler (500). | |
| """ | |
| logger.info(f"POST /chat — {len(request.messages)} message(s) in history.") | |
| try: | |
| response = run_agent( | |
| messages=request.messages, | |
| vectorizer=req.app.state.vectorizer, | |
| tfidf_matrix=req.app.state.tfidf_matrix, | |
| catalog=req.app.state.catalog, | |
| catalog_url_set=req.app.state.catalog_url_set, | |
| ) | |
| except ValueError as e: | |
| logger.warning(f"Bad request: {e}") | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| except Exception as e: | |
| # Let the global handler deal with unexpected errors. | |
| raise | |
| logger.info( | |
| f"Response: end_of_conversation={response.end_of_conversation}, " | |
| f"recommendations={len(response.recommendations)}" | |
| ) | |
| return response | |