"""FastAPI application factory and configuration. This module provides the create_app factory function for creating and configuring the FastAPI application. The application includes: - CORS middleware for frontend access - Error handling middleware - Route mounting for query and health endpoints - Lifespan management for startup/shutdown Architecture Overview: The application uses the factory pattern to enable: - Lazy loading of heavy dependencies (FastAPI, Starlette) - Configuration injection for testing - Multiple application instances if needed Lifespan Management: The application uses FastAPI's lifespan context manager to handle: - Startup: Initialize logging, load retrieval indexes (lazy) - Shutdown: Clean up resources, flush pending logs CORS Configuration: CORS origins are loaded from the Settings class, which reads from the CORS_ORIGINS environment variable. This allows flexible configuration for different deployment environments. Lazy Loading: FastAPI is loaded on first use to avoid import overhead. This is especially important for CLI tools that import the module but may not need the web server. Example: ------- >>> from rag_chatbot.api import create_app >>> app = create_app() >>> # Run with uvicorn >>> import uvicorn >>> uvicorn.run(app, host="0.0.0.0", port=8000) """ from __future__ import annotations import logging from contextlib import asynccontextmanager from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import AsyncIterator from fastapi import FastAPI # ============================================================================= # Module Exports # ============================================================================= __all__: list[str] = ["create_app"] # ============================================================================= # Module-level Logger # ============================================================================= # Logger is configured at module level for use in lifespan events. # The actual log level is set by the application configuration. logger = logging.getLogger(__name__) # ============================================================================= # Lifespan Context Manager # ============================================================================= @asynccontextmanager async def _lifespan(app: FastAPI) -> AsyncIterator[None]: """Manage application lifespan events for startup and shutdown. This async context manager handles the application lifecycle: Startup Phase: - Log application startup with version info - Initialize ResourceManager singleton (lazy loading deferred) - Store ResourceManager in app.state for route handler access Shutdown Phase: - Log application shutdown - Call ResourceManager.shutdown() to clean up resources - Log shutdown completion The lifespan pattern replaces the deprecated @app.on_event decorators and provides cleaner resource management with proper cleanup guarantees. Resource Loading Strategy: Resources are NOT loaded during startup. The ResourceManager is initialized but resources (retriever, settings) load lazily on the first request via ensure_loaded(). This enables: - Fast application startup (no heavy loading) - Minimal memory usage until first request - Clean cold start metrics Args: ---- app: The FastAPI application instance. Used to access app state for storing shared resources like the ResourceManager. Yields: ------ None. The context manager yields control to the application after startup and regains control on shutdown. Example: ------- This is used internally by create_app() and should not be called directly: >>> app = FastAPI(lifespan=_lifespan) """ # ========================================================================= # Startup Phase # ========================================================================= logger.info( "Starting Pythermalcomfort RAG Chatbot API (version %s)", "0.1.0", ) # ========================================================================= # Initialize Resource Manager (lazy loading deferred to first request) # ========================================================================= # Import ResourceManager here to avoid loading heavy dependencies at # module import time. The ResourceManager itself uses lazy loading. # ========================================================================= from rag_chatbot.api.resources import get_resource_manager resource_manager = get_resource_manager() # Store in app.state for access by route handlers # Route handlers will call ensure_loaded() before using resources app.state.resource_manager = resource_manager logger.info("Resource manager initialized (resources will load on first request)") # ========================================================================= # Initialize Query Logging Service # ========================================================================= # Start the query logging service for async logging to HuggingFace. # This is non-blocking and will not affect application startup if it fails. # ========================================================================= from rag_chatbot.qlog.service import on_startup as qlog_on_startup await qlog_on_startup() # Yield control to the application - this is where requests are served yield # ========================================================================= # Shutdown Phase # ========================================================================= logger.info("Shutting down Pythermalcomfort RAG Chatbot API") # ========================================================================= # Stop Query Logging Service # ========================================================================= # Stop the query logging service and flush any pending logs to HuggingFace. # This ensures all logged queries are persisted before shutdown. # ========================================================================= from rag_chatbot.qlog.service import on_shutdown as qlog_on_shutdown await qlog_on_shutdown() # Clean up resources (flush logs, release memory) await resource_manager.shutdown() logger.info("Application shutdown complete") # ============================================================================= # Application Factory # ============================================================================= def create_app() -> Any: # noqa: ANN401 - Returns FastAPI instance """Create and configure the FastAPI application. This factory function creates a fully configured FastAPI application with all middleware, routes, and lifespan management set up. Application Components: 1. Lifespan Management: Handles startup/shutdown events for resource initialization and cleanup. 2. CORS Middleware: Configured from settings to allow frontend access from specified origins. 3. OpenAPI Documentation: Available at /docs with full API schema and interactive testing. 4. Route Handlers: - Health routes: /health, /health/ready for monitoring - Query routes: /query for chat functionality Configuration: The application reads configuration from the Settings class, which loads from environment variables. Key settings: - CORS_ORIGINS: Comma-separated list of allowed origins - LOG_LEVEL: Logging verbosity (DEBUG, INFO, etc.) - Other settings for retrieval and LLM configuration Returns: ------- FastAPI: Configured FastAPI application instance ready to be run with uvicorn or another ASGI server. Example: ------- >>> app = create_app() >>> # Run with uvicorn programmatically >>> import uvicorn >>> uvicorn.run(app, host="0.0.0.0", port=8000) Or from command line: >>> # uvicorn rag_chatbot.api:create_app --factory --host 0.0.0.0 Note: ---- This function uses lazy imports to defer loading of FastAPI and related dependencies until the application is actually created. This improves import performance for CLI tools. """ # ========================================================================= # Lazy Import Dependencies # ========================================================================= # Import FastAPI and middleware only when creating the app. # This avoids loading these heavy dependencies at module import time. from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware # Import settings for CORS configuration from rag_chatbot.config.settings import Settings # Import route handlers from the routes submodule # Note: These are currently placeholder routers (router = None) # and will be fully implemented in subsequent steps. from .routes.health import router as health_router from .routes.providers import router as providers_router from .routes.query import router as query_router # ========================================================================= # Load Configuration # ========================================================================= # Create settings instance to load configuration from environment. # Settings uses Pydantic BaseSettings for validation and defaults. settings = Settings() # Log the configuration being used (at debug level to avoid secrets) logger.debug( "Creating app with CORS origins: %s", settings.cors_origins, ) # ========================================================================= # Create FastAPI Application # ========================================================================= # Create the FastAPI app with OpenAPI documentation configuration. # The lifespan parameter handles startup/shutdown events. app = FastAPI( # --------------------------------------------------------------------- # OpenAPI Metadata # --------------------------------------------------------------------- # These settings configure the /docs endpoint and OpenAPI schema. title="Pythermalcomfort RAG Chatbot API", version="0.1.0", description=( "A Retrieval-Augmented Generation (RAG) chatbot API for the " "pythermalcomfort library. This API provides:\n\n" "- **Query Endpoint**: Ask questions about pythermalcomfort and " "receive AI-generated answers with source citations.\n" "- **Streaming Responses**: Real-time response streaming via " "Server-Sent Events (SSE).\n" "- **Health Checks**: Endpoints for monitoring application status " "and readiness.\n\n" "The chatbot uses hybrid retrieval (dense embeddings + BM25) to " "find relevant documentation chunks, then generates responses " "using a multi-provider LLM fallback chain." ), # --------------------------------------------------------------------- # Lifespan Management # --------------------------------------------------------------------- # Use the lifespan context manager for startup/shutdown events. # This replaces the deprecated @app.on_event decorators. lifespan=_lifespan, # --------------------------------------------------------------------- # Documentation URLs # --------------------------------------------------------------------- # Enable the /docs endpoint for interactive API documentation. # The /redoc endpoint is also available by default. docs_url="/docs", redoc_url="/redoc", openapi_url="/openapi.json", ) # ========================================================================= # Configure CORS Middleware # ========================================================================= # Add CORS middleware to allow frontend access from specified origins. # This is essential for the Next.js frontend to communicate with the API. # # The CORS configuration: # - allow_origins: List of allowed origins from settings # - allow_credentials: Allow cookies and auth headers # - allow_methods: Allow all HTTP methods (GET, POST, etc.) # - allow_headers: Allow all headers for flexibility app.add_middleware( CORSMiddleware, # Origins allowed to make requests to this API. # Loaded from CORS_ORIGINS environment variable via settings. allow_origins=settings.cors_origins, # Allow credentials (cookies, authorization headers). # Required for authenticated requests from the frontend. allow_credentials=True, # Allow all HTTP methods. # The API uses GET for health checks and POST for queries. allow_methods=["*"], # Allow all headers. # This enables custom headers like Content-Type and Authorization. allow_headers=["*"], ) # ========================================================================= # Mount Route Handlers # ========================================================================= # Include the route handlers for different API functionality. # Each router handles a specific set of endpoints. # # Note: The routers are currently placeholders (router = None). # They will be fully implemented in subsequent steps: # - health router: Step 7.3 # - query router: Step 7.4 # Health check routes for monitoring and readiness probes. # These endpoints are used by: # - Load balancers to check if the service is healthy # - Kubernetes to determine pod readiness # - Monitoring systems to track service status if health_router is not None: app.include_router( health_router, # type: ignore[arg-type] prefix="/health", tags=["Health"], ) else: # Log warning if health router is not yet implemented logger.warning( "Health router is None - health endpoints not mounted. " "This will be implemented in a subsequent step." ) # Query routes for chat functionality. # The query endpoint handles: # - Receiving user questions # - Retrieving relevant context from the document store # - Generating responses using LLM providers # - Streaming responses via SSE if query_router is not None: # Note: No /api prefix here because nginx strips it before forwarding. # Frontend calls /api/query -> nginx strips /api/ -> backend receives /query app.include_router( query_router, # type: ignore[arg-type] tags=["Query"], ) else: # Log warning if query router is not yet implemented logger.warning( "Query router is None - query endpoints not mounted. " "This will be implemented in a subsequent step." ) # Provider status routes for monitoring LLM provider availability. # The providers endpoint handles: # - Returning status of all configured LLM providers (Gemini, Groq) # - Per-model quota information (RPM, TPM, RPD, TPD) # - Cooldown status after rate limit errors # - Response caching (1 minute TTL) to reduce quota check overhead if providers_router is not None: # Note: No /api prefix here because nginx strips it before forwarding. # Frontend calls /api/providers -> nginx strips /api/ -> backend receives /providers app.include_router( providers_router, # type: ignore[arg-type] tags=["Providers"], ) else: # Log warning if providers router is not yet implemented logger.warning( "Providers router is None - provider status endpoints not mounted. " "This will be implemented in a subsequent step." ) # ========================================================================= # Return Configured Application # ========================================================================= logger.info("FastAPI application created successfully") return app