sadickam's picture
Fix: remove /api prefix from routes - nginx already strips it
4644d09
"""FastAPI application factory and configuration.
This module provides the create_app factory function for creating
and configuring the FastAPI application. The application includes:
- CORS middleware for frontend access
- Error handling middleware
- Route mounting for query and health endpoints
- Lifespan management for startup/shutdown
Architecture Overview:
The application uses the factory pattern to enable:
- Lazy loading of heavy dependencies (FastAPI, Starlette)
- Configuration injection for testing
- Multiple application instances if needed
Lifespan Management:
The application uses FastAPI's lifespan context manager to handle:
- Startup: Initialize logging, load retrieval indexes (lazy)
- Shutdown: Clean up resources, flush pending logs
CORS Configuration:
CORS origins are loaded from the Settings class, which reads
from the CORS_ORIGINS environment variable. This allows
flexible configuration for different deployment environments.
Lazy Loading:
FastAPI is loaded on first use to avoid import overhead.
This is especially important for CLI tools that import
the module but may not need the web server.
Example:
-------
>>> from rag_chatbot.api import create_app
>>> app = create_app()
>>> # Run with uvicorn
>>> import uvicorn
>>> uvicorn.run(app, host="0.0.0.0", port=8000)
"""
from __future__ import annotations
import logging
from contextlib import asynccontextmanager
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from collections.abc import AsyncIterator
from fastapi import FastAPI
# =============================================================================
# Module Exports
# =============================================================================
__all__: list[str] = ["create_app"]
# =============================================================================
# Module-level Logger
# =============================================================================
# Logger is configured at module level for use in lifespan events.
# The actual log level is set by the application configuration.
logger = logging.getLogger(__name__)
# =============================================================================
# Lifespan Context Manager
# =============================================================================
@asynccontextmanager
async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
"""Manage application lifespan events for startup and shutdown.
This async context manager handles the application lifecycle:
Startup Phase:
- Log application startup with version info
- Initialize ResourceManager singleton (lazy loading deferred)
- Store ResourceManager in app.state for route handler access
Shutdown Phase:
- Log application shutdown
- Call ResourceManager.shutdown() to clean up resources
- Log shutdown completion
The lifespan pattern replaces the deprecated @app.on_event decorators
and provides cleaner resource management with proper cleanup guarantees.
Resource Loading Strategy:
Resources are NOT loaded during startup. The ResourceManager is
initialized but resources (retriever, settings) load lazily on
the first request via ensure_loaded(). This enables:
- Fast application startup (no heavy loading)
- Minimal memory usage until first request
- Clean cold start metrics
Args:
----
app: The FastAPI application instance. Used to access app state
for storing shared resources like the ResourceManager.
Yields:
------
None. The context manager yields control to the application
after startup and regains control on shutdown.
Example:
-------
This is used internally by create_app() and should not be
called directly:
>>> app = FastAPI(lifespan=_lifespan)
"""
# =========================================================================
# Startup Phase
# =========================================================================
logger.info(
"Starting Pythermalcomfort RAG Chatbot API (version %s)",
"0.1.0",
)
# =========================================================================
# Initialize Resource Manager (lazy loading deferred to first request)
# =========================================================================
# Import ResourceManager here to avoid loading heavy dependencies at
# module import time. The ResourceManager itself uses lazy loading.
# =========================================================================
from rag_chatbot.api.resources import get_resource_manager
resource_manager = get_resource_manager()
# Store in app.state for access by route handlers
# Route handlers will call ensure_loaded() before using resources
app.state.resource_manager = resource_manager
logger.info("Resource manager initialized (resources will load on first request)")
# =========================================================================
# Initialize Query Logging Service
# =========================================================================
# Start the query logging service for async logging to HuggingFace.
# This is non-blocking and will not affect application startup if it fails.
# =========================================================================
from rag_chatbot.qlog.service import on_startup as qlog_on_startup
await qlog_on_startup()
# Yield control to the application - this is where requests are served
yield
# =========================================================================
# Shutdown Phase
# =========================================================================
logger.info("Shutting down Pythermalcomfort RAG Chatbot API")
# =========================================================================
# Stop Query Logging Service
# =========================================================================
# Stop the query logging service and flush any pending logs to HuggingFace.
# This ensures all logged queries are persisted before shutdown.
# =========================================================================
from rag_chatbot.qlog.service import on_shutdown as qlog_on_shutdown
await qlog_on_shutdown()
# Clean up resources (flush logs, release memory)
await resource_manager.shutdown()
logger.info("Application shutdown complete")
# =============================================================================
# Application Factory
# =============================================================================
def create_app() -> Any: # noqa: ANN401 - Returns FastAPI instance
"""Create and configure the FastAPI application.
This factory function creates a fully configured FastAPI application
with all middleware, routes, and lifespan management set up.
Application Components:
1. Lifespan Management: Handles startup/shutdown events for
resource initialization and cleanup.
2. CORS Middleware: Configured from settings to allow frontend
access from specified origins.
3. OpenAPI Documentation: Available at /docs with full API
schema and interactive testing.
4. Route Handlers:
- Health routes: /health, /health/ready for monitoring
- Query routes: /query for chat functionality
Configuration:
The application reads configuration from the Settings class,
which loads from environment variables. Key settings:
- CORS_ORIGINS: Comma-separated list of allowed origins
- LOG_LEVEL: Logging verbosity (DEBUG, INFO, etc.)
- Other settings for retrieval and LLM configuration
Returns:
-------
FastAPI: Configured FastAPI application instance ready to be
run with uvicorn or another ASGI server.
Example:
-------
>>> app = create_app()
>>> # Run with uvicorn programmatically
>>> import uvicorn
>>> uvicorn.run(app, host="0.0.0.0", port=8000)
Or from command line:
>>> # uvicorn rag_chatbot.api:create_app --factory --host 0.0.0.0
Note:
----
This function uses lazy imports to defer loading of FastAPI
and related dependencies until the application is actually
created. This improves import performance for CLI tools.
"""
# =========================================================================
# Lazy Import Dependencies
# =========================================================================
# Import FastAPI and middleware only when creating the app.
# This avoids loading these heavy dependencies at module import time.
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
# Import settings for CORS configuration
from rag_chatbot.config.settings import Settings
# Import route handlers from the routes submodule
# Note: These are currently placeholder routers (router = None)
# and will be fully implemented in subsequent steps.
from .routes.health import router as health_router
from .routes.providers import router as providers_router
from .routes.query import router as query_router
# =========================================================================
# Load Configuration
# =========================================================================
# Create settings instance to load configuration from environment.
# Settings uses Pydantic BaseSettings for validation and defaults.
settings = Settings()
# Log the configuration being used (at debug level to avoid secrets)
logger.debug(
"Creating app with CORS origins: %s",
settings.cors_origins,
)
# =========================================================================
# Create FastAPI Application
# =========================================================================
# Create the FastAPI app with OpenAPI documentation configuration.
# The lifespan parameter handles startup/shutdown events.
app = FastAPI(
# ---------------------------------------------------------------------
# OpenAPI Metadata
# ---------------------------------------------------------------------
# These settings configure the /docs endpoint and OpenAPI schema.
title="Pythermalcomfort RAG Chatbot API",
version="0.1.0",
description=(
"A Retrieval-Augmented Generation (RAG) chatbot API for the "
"pythermalcomfort library. This API provides:\n\n"
"- **Query Endpoint**: Ask questions about pythermalcomfort and "
"receive AI-generated answers with source citations.\n"
"- **Streaming Responses**: Real-time response streaming via "
"Server-Sent Events (SSE).\n"
"- **Health Checks**: Endpoints for monitoring application status "
"and readiness.\n\n"
"The chatbot uses hybrid retrieval (dense embeddings + BM25) to "
"find relevant documentation chunks, then generates responses "
"using a multi-provider LLM fallback chain."
),
# ---------------------------------------------------------------------
# Lifespan Management
# ---------------------------------------------------------------------
# Use the lifespan context manager for startup/shutdown events.
# This replaces the deprecated @app.on_event decorators.
lifespan=_lifespan,
# ---------------------------------------------------------------------
# Documentation URLs
# ---------------------------------------------------------------------
# Enable the /docs endpoint for interactive API documentation.
# The /redoc endpoint is also available by default.
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
)
# =========================================================================
# Configure CORS Middleware
# =========================================================================
# Add CORS middleware to allow frontend access from specified origins.
# This is essential for the Next.js frontend to communicate with the API.
#
# The CORS configuration:
# - allow_origins: List of allowed origins from settings
# - allow_credentials: Allow cookies and auth headers
# - allow_methods: Allow all HTTP methods (GET, POST, etc.)
# - allow_headers: Allow all headers for flexibility
app.add_middleware(
CORSMiddleware,
# Origins allowed to make requests to this API.
# Loaded from CORS_ORIGINS environment variable via settings.
allow_origins=settings.cors_origins,
# Allow credentials (cookies, authorization headers).
# Required for authenticated requests from the frontend.
allow_credentials=True,
# Allow all HTTP methods.
# The API uses GET for health checks and POST for queries.
allow_methods=["*"],
# Allow all headers.
# This enables custom headers like Content-Type and Authorization.
allow_headers=["*"],
)
# =========================================================================
# Mount Route Handlers
# =========================================================================
# Include the route handlers for different API functionality.
# Each router handles a specific set of endpoints.
#
# Note: The routers are currently placeholders (router = None).
# They will be fully implemented in subsequent steps:
# - health router: Step 7.3
# - query router: Step 7.4
# Health check routes for monitoring and readiness probes.
# These endpoints are used by:
# - Load balancers to check if the service is healthy
# - Kubernetes to determine pod readiness
# - Monitoring systems to track service status
if health_router is not None:
app.include_router(
health_router, # type: ignore[arg-type]
prefix="/health",
tags=["Health"],
)
else:
# Log warning if health router is not yet implemented
logger.warning(
"Health router is None - health endpoints not mounted. "
"This will be implemented in a subsequent step."
)
# Query routes for chat functionality.
# The query endpoint handles:
# - Receiving user questions
# - Retrieving relevant context from the document store
# - Generating responses using LLM providers
# - Streaming responses via SSE
if query_router is not None:
# Note: No /api prefix here because nginx strips it before forwarding.
# Frontend calls /api/query -> nginx strips /api/ -> backend receives /query
app.include_router(
query_router, # type: ignore[arg-type]
tags=["Query"],
)
else:
# Log warning if query router is not yet implemented
logger.warning(
"Query router is None - query endpoints not mounted. "
"This will be implemented in a subsequent step."
)
# Provider status routes for monitoring LLM provider availability.
# The providers endpoint handles:
# - Returning status of all configured LLM providers (Gemini, Groq)
# - Per-model quota information (RPM, TPM, RPD, TPD)
# - Cooldown status after rate limit errors
# - Response caching (1 minute TTL) to reduce quota check overhead
if providers_router is not None:
# Note: No /api prefix here because nginx strips it before forwarding.
# Frontend calls /api/providers -> nginx strips /api/ -> backend receives /providers
app.include_router(
providers_router, # type: ignore[arg-type]
tags=["Providers"],
)
else:
# Log warning if providers router is not yet implemented
logger.warning(
"Providers router is None - provider status endpoints not mounted. "
"This will be implemented in a subsequent step."
)
# =========================================================================
# Return Configured Application
# =========================================================================
logger.info("FastAPI application created successfully")
return app