| """FastAPI application factory and configuration. | |
| This module provides the create_app factory function for creating | |
| and configuring the FastAPI application. The application includes: | |
| - CORS middleware for frontend access | |
| - Error handling middleware | |
| - Route mounting for query and health endpoints | |
| - Lifespan management for startup/shutdown | |
| Architecture Overview: | |
| The application uses the factory pattern to enable: | |
| - Lazy loading of heavy dependencies (FastAPI, Starlette) | |
| - Configuration injection for testing | |
| - Multiple application instances if needed | |
| Lifespan Management: | |
| The application uses FastAPI's lifespan context manager to handle: | |
| - Startup: Initialize logging, load retrieval indexes (lazy) | |
| - Shutdown: Clean up resources, flush pending logs | |
| CORS Configuration: | |
| CORS origins are loaded from the Settings class, which reads | |
| from the CORS_ORIGINS environment variable. This allows | |
| flexible configuration for different deployment environments. | |
| Lazy Loading: | |
| FastAPI is loaded on first use to avoid import overhead. | |
| This is especially important for CLI tools that import | |
| the module but may not need the web server. | |
| Example: | |
| ------- | |
| >>> from rag_chatbot.api import create_app | |
| >>> app = create_app() | |
| >>> # Run with uvicorn | |
| >>> import uvicorn | |
| >>> uvicorn.run(app, host="0.0.0.0", port=8000) | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from contextlib import asynccontextmanager | |
| from typing import TYPE_CHECKING, Any | |
| if TYPE_CHECKING: | |
| from collections.abc import AsyncIterator | |
| from fastapi import FastAPI | |
| # ============================================================================= | |
| # Module Exports | |
| # ============================================================================= | |
| __all__: list[str] = ["create_app"] | |
| # ============================================================================= | |
| # Module-level Logger | |
| # ============================================================================= | |
| # Logger is configured at module level for use in lifespan events. | |
| # The actual log level is set by the application configuration. | |
| logger = logging.getLogger(__name__) | |
| # ============================================================================= | |
| # Lifespan Context Manager | |
| # ============================================================================= | |
| async def _lifespan(app: FastAPI) -> AsyncIterator[None]: | |
| """Manage application lifespan events for startup and shutdown. | |
| This async context manager handles the application lifecycle: | |
| Startup Phase: | |
| - Log application startup with version info | |
| - Initialize ResourceManager singleton (lazy loading deferred) | |
| - Store ResourceManager in app.state for route handler access | |
| Shutdown Phase: | |
| - Log application shutdown | |
| - Call ResourceManager.shutdown() to clean up resources | |
| - Log shutdown completion | |
| The lifespan pattern replaces the deprecated @app.on_event decorators | |
| and provides cleaner resource management with proper cleanup guarantees. | |
| Resource Loading Strategy: | |
| Resources are NOT loaded during startup. The ResourceManager is | |
| initialized but resources (retriever, settings) load lazily on | |
| the first request via ensure_loaded(). This enables: | |
| - Fast application startup (no heavy loading) | |
| - Minimal memory usage until first request | |
| - Clean cold start metrics | |
| Args: | |
| ---- | |
| app: The FastAPI application instance. Used to access app state | |
| for storing shared resources like the ResourceManager. | |
| Yields: | |
| ------ | |
| None. The context manager yields control to the application | |
| after startup and regains control on shutdown. | |
| Example: | |
| ------- | |
| This is used internally by create_app() and should not be | |
| called directly: | |
| >>> app = FastAPI(lifespan=_lifespan) | |
| """ | |
| # ========================================================================= | |
| # Startup Phase | |
| # ========================================================================= | |
| logger.info( | |
| "Starting Pythermalcomfort RAG Chatbot API (version %s)", | |
| "0.1.0", | |
| ) | |
| # ========================================================================= | |
| # Initialize Resource Manager (lazy loading deferred to first request) | |
| # ========================================================================= | |
| # Import ResourceManager here to avoid loading heavy dependencies at | |
| # module import time. The ResourceManager itself uses lazy loading. | |
| # ========================================================================= | |
| from rag_chatbot.api.resources import get_resource_manager | |
| resource_manager = get_resource_manager() | |
| # Store in app.state for access by route handlers | |
| # Route handlers will call ensure_loaded() before using resources | |
| app.state.resource_manager = resource_manager | |
| logger.info("Resource manager initialized (resources will load on first request)") | |
| # ========================================================================= | |
| # Initialize Query Logging Service | |
| # ========================================================================= | |
| # Start the query logging service for async logging to HuggingFace. | |
| # This is non-blocking and will not affect application startup if it fails. | |
| # ========================================================================= | |
| from rag_chatbot.qlog.service import on_startup as qlog_on_startup | |
| await qlog_on_startup() | |
| # Yield control to the application - this is where requests are served | |
| yield | |
| # ========================================================================= | |
| # Shutdown Phase | |
| # ========================================================================= | |
| logger.info("Shutting down Pythermalcomfort RAG Chatbot API") | |
| # ========================================================================= | |
| # Stop Query Logging Service | |
| # ========================================================================= | |
| # Stop the query logging service and flush any pending logs to HuggingFace. | |
| # This ensures all logged queries are persisted before shutdown. | |
| # ========================================================================= | |
| from rag_chatbot.qlog.service import on_shutdown as qlog_on_shutdown | |
| await qlog_on_shutdown() | |
| # Clean up resources (flush logs, release memory) | |
| await resource_manager.shutdown() | |
| logger.info("Application shutdown complete") | |
| # ============================================================================= | |
| # Application Factory | |
| # ============================================================================= | |
| def create_app() -> Any: # noqa: ANN401 - Returns FastAPI instance | |
| """Create and configure the FastAPI application. | |
| This factory function creates a fully configured FastAPI application | |
| with all middleware, routes, and lifespan management set up. | |
| Application Components: | |
| 1. Lifespan Management: Handles startup/shutdown events for | |
| resource initialization and cleanup. | |
| 2. CORS Middleware: Configured from settings to allow frontend | |
| access from specified origins. | |
| 3. OpenAPI Documentation: Available at /docs with full API | |
| schema and interactive testing. | |
| 4. Route Handlers: | |
| - Health routes: /health, /health/ready for monitoring | |
| - Query routes: /query for chat functionality | |
| Configuration: | |
| The application reads configuration from the Settings class, | |
| which loads from environment variables. Key settings: | |
| - CORS_ORIGINS: Comma-separated list of allowed origins | |
| - LOG_LEVEL: Logging verbosity (DEBUG, INFO, etc.) | |
| - Other settings for retrieval and LLM configuration | |
| Returns: | |
| ------- | |
| FastAPI: Configured FastAPI application instance ready to be | |
| run with uvicorn or another ASGI server. | |
| Example: | |
| ------- | |
| >>> app = create_app() | |
| >>> # Run with uvicorn programmatically | |
| >>> import uvicorn | |
| >>> uvicorn.run(app, host="0.0.0.0", port=8000) | |
| Or from command line: | |
| >>> # uvicorn rag_chatbot.api:create_app --factory --host 0.0.0.0 | |
| Note: | |
| ---- | |
| This function uses lazy imports to defer loading of FastAPI | |
| and related dependencies until the application is actually | |
| created. This improves import performance for CLI tools. | |
| """ | |
| # ========================================================================= | |
| # Lazy Import Dependencies | |
| # ========================================================================= | |
| # Import FastAPI and middleware only when creating the app. | |
| # This avoids loading these heavy dependencies at module import time. | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| # Import settings for CORS configuration | |
| from rag_chatbot.config.settings import Settings | |
| # Import route handlers from the routes submodule | |
| # Note: These are currently placeholder routers (router = None) | |
| # and will be fully implemented in subsequent steps. | |
| from .routes.health import router as health_router | |
| from .routes.providers import router as providers_router | |
| from .routes.query import router as query_router | |
| # ========================================================================= | |
| # Load Configuration | |
| # ========================================================================= | |
| # Create settings instance to load configuration from environment. | |
| # Settings uses Pydantic BaseSettings for validation and defaults. | |
| settings = Settings() | |
| # Log the configuration being used (at debug level to avoid secrets) | |
| logger.debug( | |
| "Creating app with CORS origins: %s", | |
| settings.cors_origins, | |
| ) | |
| # ========================================================================= | |
| # Create FastAPI Application | |
| # ========================================================================= | |
| # Create the FastAPI app with OpenAPI documentation configuration. | |
| # The lifespan parameter handles startup/shutdown events. | |
| app = FastAPI( | |
| # --------------------------------------------------------------------- | |
| # OpenAPI Metadata | |
| # --------------------------------------------------------------------- | |
| # These settings configure the /docs endpoint and OpenAPI schema. | |
| title="Pythermalcomfort RAG Chatbot API", | |
| version="0.1.0", | |
| description=( | |
| "A Retrieval-Augmented Generation (RAG) chatbot API for the " | |
| "pythermalcomfort library. This API provides:\n\n" | |
| "- **Query Endpoint**: Ask questions about pythermalcomfort and " | |
| "receive AI-generated answers with source citations.\n" | |
| "- **Streaming Responses**: Real-time response streaming via " | |
| "Server-Sent Events (SSE).\n" | |
| "- **Health Checks**: Endpoints for monitoring application status " | |
| "and readiness.\n\n" | |
| "The chatbot uses hybrid retrieval (dense embeddings + BM25) to " | |
| "find relevant documentation chunks, then generates responses " | |
| "using a multi-provider LLM fallback chain." | |
| ), | |
| # --------------------------------------------------------------------- | |
| # Lifespan Management | |
| # --------------------------------------------------------------------- | |
| # Use the lifespan context manager for startup/shutdown events. | |
| # This replaces the deprecated @app.on_event decorators. | |
| lifespan=_lifespan, | |
| # --------------------------------------------------------------------- | |
| # Documentation URLs | |
| # --------------------------------------------------------------------- | |
| # Enable the /docs endpoint for interactive API documentation. | |
| # The /redoc endpoint is also available by default. | |
| docs_url="/docs", | |
| redoc_url="/redoc", | |
| openapi_url="/openapi.json", | |
| ) | |
| # ========================================================================= | |
| # Configure CORS Middleware | |
| # ========================================================================= | |
| # Add CORS middleware to allow frontend access from specified origins. | |
| # This is essential for the Next.js frontend to communicate with the API. | |
| # | |
| # The CORS configuration: | |
| # - allow_origins: List of allowed origins from settings | |
| # - allow_credentials: Allow cookies and auth headers | |
| # - allow_methods: Allow all HTTP methods (GET, POST, etc.) | |
| # - allow_headers: Allow all headers for flexibility | |
| app.add_middleware( | |
| CORSMiddleware, | |
| # Origins allowed to make requests to this API. | |
| # Loaded from CORS_ORIGINS environment variable via settings. | |
| allow_origins=settings.cors_origins, | |
| # Allow credentials (cookies, authorization headers). | |
| # Required for authenticated requests from the frontend. | |
| allow_credentials=True, | |
| # Allow all HTTP methods. | |
| # The API uses GET for health checks and POST for queries. | |
| allow_methods=["*"], | |
| # Allow all headers. | |
| # This enables custom headers like Content-Type and Authorization. | |
| allow_headers=["*"], | |
| ) | |
| # ========================================================================= | |
| # Mount Route Handlers | |
| # ========================================================================= | |
| # Include the route handlers for different API functionality. | |
| # Each router handles a specific set of endpoints. | |
| # | |
| # Note: The routers are currently placeholders (router = None). | |
| # They will be fully implemented in subsequent steps: | |
| # - health router: Step 7.3 | |
| # - query router: Step 7.4 | |
| # Health check routes for monitoring and readiness probes. | |
| # These endpoints are used by: | |
| # - Load balancers to check if the service is healthy | |
| # - Kubernetes to determine pod readiness | |
| # - Monitoring systems to track service status | |
| if health_router is not None: | |
| app.include_router( | |
| health_router, # type: ignore[arg-type] | |
| prefix="/health", | |
| tags=["Health"], | |
| ) | |
| else: | |
| # Log warning if health router is not yet implemented | |
| logger.warning( | |
| "Health router is None - health endpoints not mounted. " | |
| "This will be implemented in a subsequent step." | |
| ) | |
| # Query routes for chat functionality. | |
| # The query endpoint handles: | |
| # - Receiving user questions | |
| # - Retrieving relevant context from the document store | |
| # - Generating responses using LLM providers | |
| # - Streaming responses via SSE | |
| if query_router is not None: | |
| # Note: No /api prefix here because nginx strips it before forwarding. | |
| # Frontend calls /api/query -> nginx strips /api/ -> backend receives /query | |
| app.include_router( | |
| query_router, # type: ignore[arg-type] | |
| tags=["Query"], | |
| ) | |
| else: | |
| # Log warning if query router is not yet implemented | |
| logger.warning( | |
| "Query router is None - query endpoints not mounted. " | |
| "This will be implemented in a subsequent step." | |
| ) | |
| # Provider status routes for monitoring LLM provider availability. | |
| # The providers endpoint handles: | |
| # - Returning status of all configured LLM providers (Gemini, Groq) | |
| # - Per-model quota information (RPM, TPM, RPD, TPD) | |
| # - Cooldown status after rate limit errors | |
| # - Response caching (1 minute TTL) to reduce quota check overhead | |
| if providers_router is not None: | |
| # Note: No /api prefix here because nginx strips it before forwarding. | |
| # Frontend calls /api/providers -> nginx strips /api/ -> backend receives /providers | |
| app.include_router( | |
| providers_router, # type: ignore[arg-type] | |
| tags=["Providers"], | |
| ) | |
| else: | |
| # Log warning if providers router is not yet implemented | |
| logger.warning( | |
| "Providers router is None - provider status endpoints not mounted. " | |
| "This will be implemented in a subsequent step." | |
| ) | |
| # ========================================================================= | |
| # Return Configured Application | |
| # ========================================================================= | |
| logger.info("FastAPI application created successfully") | |
| return app | |