"""
main.py — FastAPI application entry point.

Why FastAPI?
  - Native async support, Pydantic integration, automatic OpenAPI docs.
  - Extremely low boilerplate for typed REST APIs.
  - Standard choice for Python ML/LLM API services.

Startup pattern: we load the catalog and build/load the index once at startup using
FastAPI's lifespan context manager (the modern replacement for @app.on_event("startup")).
All request handlers then receive these pre-loaded objects via app.state — no global
variables, no singleton anti-patterns.

Interview Q: "Why app.state instead of module-level globals?"
A: Module-level globals can't be easily mocked in tests, and their initialization order
   is implicit. app.state is explicit, testable, and scoped to the application instance.

Interview Q: "How do you make this horizontally scalable?"
A: The service is stateless — no per-user data is stored. Multiple instances can run
   behind a load balancer with no sticky sessions required.
"""

import os
import logging
from contextlib import asynccontextmanager

from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse

from .schemas import ChatRequest, ChatResponse
from .catalog_loader import load_catalog
from .retrieval import get_or_build_index
from .agent import run_agent

# ---------------------------------------------------------------------------
# Logging: structured logs to stdout so HF Spaces / Docker captures them.
# ---------------------------------------------------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Lifespan: load all expensive artifacts once at startup.
# ---------------------------------------------------------------------------
@asynccontextmanager
async def lifespan(app: FastAPI):
    """
    FastAPI lifespan context manager.
    Everything before `yield` runs at startup; after yield runs at shutdown.

    We load the catalog and build/load the TF-IDF index here so that:
    1. Startup failures are loud and immediate (not silent until first request).
    2. Request handlers pay zero I/O cost.
    """
    logger.info("Loading SHL catalog...")
    catalog = load_catalog()
    logger.info(f"Catalog loaded: {len(catalog)} items.")

    logger.info("Building/loading TF-IDF index...")
    vectorizer, tfidf_matrix = get_or_build_index(catalog)
    logger.info("Index ready.")

    # Build a URL set for O(1) hallucination checks in the agent.
    catalog_url_set = {item["url"] for item in catalog}

    # Store on app.state so all request handlers can access without globals.
    app.state.catalog = catalog
    app.state.catalog_url_set = catalog_url_set
    app.state.vectorizer = vectorizer
    app.state.tfidf_matrix = tfidf_matrix

    logger.info("SHL Agent ready.")
    yield
    # Shutdown: nothing to clean up (no DB connections, no file handles).
    logger.info("Shutting down SHL Agent.")


# ---------------------------------------------------------------------------
# Application instance
# ---------------------------------------------------------------------------
app = FastAPI(
    title="SHL Assessment Recommendation Agent",
    description=(
        "Conversational agent for recommending SHL psychometric assessments. "
        "Stateless API — send full conversation history on every POST /chat call."
    ),
    version="1.0.0",
    lifespan=lifespan,
)


# ---------------------------------------------------------------------------
# Global exception handler: returns JSON (not HTML) for unexpected errors.
# This is important for automated evaluators that expect JSON responses.
# ---------------------------------------------------------------------------
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
    logger.error(f"Unhandled exception: {exc}", exc_info=True)
    return JSONResponse(
        status_code=500,
        content={"detail": "Internal server error. Please check server logs."},
    )


# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------

@app.get("/health")
async def health() -> dict:
    """
    Health check endpoint.
    Returns {"status": "ok"} when the service is running and catalog is loaded.

    Design: this is the canonical liveness probe for HF Spaces and load balancers.
    We intentionally don't check the LLM API here — that would make health checks
    flaky and expensive. LLM availability is tested at the first /chat call.
    """
    return {"status": "ok"}


@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest, req: Request) -> ChatResponse:
    """
    Main conversational endpoint.

    Accepts: full conversation history (stateless — caller owns state).
    Returns: reply, recommendations (0–10 items), end_of_conversation flag.

    Error handling:
    - Pydantic validates the request shape; FastAPI returns 422 on invalid input.
    - We catch ValueError (e.g., empty messages) and return 400.
    - anthropic.APIError is caught and returned as 502 (upstream failure).
    - All other exceptions bubble to the global handler (500).
    """
    logger.info(f"POST /chat — {len(request.messages)} message(s) in history.")

    try:
        response = run_agent(
            messages=request.messages,
            vectorizer=req.app.state.vectorizer,
            tfidf_matrix=req.app.state.tfidf_matrix,
            catalog=req.app.state.catalog,
            catalog_url_set=req.app.state.catalog_url_set,
        )
    except ValueError as e:
        logger.warning(f"Bad request: {e}")
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        # Let the global handler deal with unexpected errors.
        raise

    logger.info(
        f"Response: end_of_conversation={response.end_of_conversation}, "
        f"recommendations={len(response.recommendations)}"
    )
    return response