""" Configuration management for A11y Expert system. This module provides centralized configuration using Pydantic settings. All settings can be configured via environment variables or .env file. """ from pydantic_settings import BaseSettings from pydantic import Field, field_validator from functools import lru_cache import os from typing import Optional class Settings(BaseSettings): """ Application settings loaded from environment variables or .env file. All settings have sensible defaults except for the OpenAI API key, which must be provided via the OPENAI_API_KEY environment variable. Attributes: openai_api_key: OpenAI API key (required) llm_model: Language model to use for chat completions llm_base_url: Base URL for OpenAI API (supports GitHub Models) embedding_model: Model to use for text embeddings lancedb_uri: Path to LanceDB database directory lancedb_table: Name of the LanceDB table chunk_size: Target size for text chunks in characters chunk_overlap: Overlap between consecutive chunks log_level: Logging level (DEBUG, INFO, WARNING, ERROR) server_host: Gradio server host address server_port: Gradio server port Examples: >>> settings = get_settings() >>> print(settings.llm_model) 'gpt-4o' """ # API Configuration (required) openai_api_key: str = Field( default="", description="OpenAI API key - required for LLM and embeddings" ) # LLM Configuration llm_model: str = Field( default="gpt-4o", description="Language model for chat completions" ) llm_base_url: Optional[str] = Field( default=None, description="Base URL for OpenAI-compatible API (optional)" ) # Embeddings Configuration embedding_model: str = Field( default="text-embedding-3-large", description="Model for text embeddings" ) # Database Configuration lancedb_uri: str = Field( default="./lancedb", description="Path to LanceDB database directory" ) lancedb_table: str = Field( default="a11y_expert", description="Name of the LanceDB table" ) # ETL Configuration chunk_size: int = Field( default=1000, ge=100, le=4000, description="Target chunk size in characters" ) chunk_overlap: int = Field( default=200, ge=0, le=1000, description="Overlap between chunks in characters" ) # Logging Configuration log_level: str = Field( default="INFO", description="Logging level (DEBUG, INFO, WARNING, ERROR)" ) # UI Configuration server_host: str = Field( default="127.0.0.1", description="Gradio server host address" ) server_port: int = Field( default=7860, ge=1024, le=65535, description="Gradio server port" ) @field_validator("openai_api_key") @classmethod def validate_api_key(cls, v): """Ensure API key is provided and not empty.""" v = v or "" v = v.strip() if not v: import os if not os.getenv("SPACE_ID"): raise ValueError( "OPENAI_API_KEY is required. " "Set it in your .env file or environment variables." ) return v @field_validator("log_level") @classmethod def validate_log_level(cls, v): """Ensure log level is valid.""" valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] v_upper = v.upper() if v_upper not in valid_levels: raise ValueError( f"Invalid log level: {v}. " f"Must be one of: {', '.join(valid_levels)}" ) return v_upper @field_validator("chunk_overlap") @classmethod def validate_overlap(cls, v, info): """Ensure chunk overlap is less than chunk size.""" if info.data and "chunk_size" in info.data and v >= info.data["chunk_size"]: raise ValueError( f"chunk_overlap ({v}) must be less than chunk_size ({info.data['chunk_size']})" ) return v model_config = { "env_file": ".env", "env_file_encoding": "utf-8", "case_sensitive": False, "extra": "ignore", } @lru_cache() def get_settings() -> Settings: """ Get cached settings instance (singleton pattern). Returns: Settings: Configured application settings Raises: ValidationError: If required settings are missing or invalid Examples: >>> settings = get_settings() >>> print(settings.llm_model) 'gpt-4o' """ return Settings()