JacekAI / config.py
Jacek Zadrożny
Fix HuggingFace deployment issues
637ed9b
"""
Configuration management for A11y Expert system.
This module provides centralized configuration using Pydantic settings.
All settings can be configured via environment variables or .env file.
"""
from pydantic_settings import BaseSettings
from pydantic import Field, field_validator
from functools import lru_cache
import os
from typing import Optional
class Settings(BaseSettings):
"""
Application settings loaded from environment variables or .env file.
All settings have sensible defaults except for the OpenAI API key,
which must be provided via the OPENAI_API_KEY environment variable.
Attributes:
openai_api_key: OpenAI API key (required)
llm_model: Language model to use for chat completions
llm_base_url: Base URL for OpenAI API (supports GitHub Models)
embedding_model: Model to use for text embeddings
lancedb_uri: Path to LanceDB database directory
lancedb_table: Name of the LanceDB table
chunk_size: Target size for text chunks in characters
chunk_overlap: Overlap between consecutive chunks
log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
server_host: Gradio server host address
server_port: Gradio server port
Examples:
>>> settings = get_settings()
>>> print(settings.llm_model)
'gpt-4o'
"""
# API Configuration (required)
openai_api_key: str = Field(
default="",
description="OpenAI API key - required for LLM and embeddings"
)
# LLM Configuration
llm_model: str = Field(
default="gpt-4o",
description="Language model for chat completions"
)
llm_base_url: Optional[str] = Field(
default=None,
description="Base URL for OpenAI-compatible API (optional)"
)
# Embeddings Configuration
embedding_model: str = Field(
default="text-embedding-3-large",
description="Model for text embeddings"
)
# Database Configuration
lancedb_uri: str = Field(
default="./lancedb",
description="Path to LanceDB database directory"
)
lancedb_table: str = Field(
default="a11y_expert",
description="Name of the LanceDB table"
)
# ETL Configuration
chunk_size: int = Field(
default=1000,
ge=100,
le=4000,
description="Target chunk size in characters"
)
chunk_overlap: int = Field(
default=200,
ge=0,
le=1000,
description="Overlap between chunks in characters"
)
# Logging Configuration
log_level: str = Field(
default="INFO",
description="Logging level (DEBUG, INFO, WARNING, ERROR)"
)
# UI Configuration
server_host: str = Field(
default="127.0.0.1",
description="Gradio server host address"
)
server_port: int = Field(
default=7860,
ge=1024,
le=65535,
description="Gradio server port"
)
@field_validator("openai_api_key")
@classmethod
def validate_api_key(cls, v):
"""Ensure API key is provided and not empty."""
v = v or ""
v = v.strip()
if not v:
import os
if not os.getenv("SPACE_ID"):
raise ValueError(
"OPENAI_API_KEY is required. "
"Set it in your .env file or environment variables."
)
return v
@field_validator("log_level")
@classmethod
def validate_log_level(cls, v):
"""Ensure log level is valid."""
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
v_upper = v.upper()
if v_upper not in valid_levels:
raise ValueError(
f"Invalid log level: {v}. "
f"Must be one of: {', '.join(valid_levels)}"
)
return v_upper
@field_validator("chunk_overlap")
@classmethod
def validate_overlap(cls, v, info):
"""Ensure chunk overlap is less than chunk size."""
if info.data and "chunk_size" in info.data and v >= info.data["chunk_size"]:
raise ValueError(
f"chunk_overlap ({v}) must be less than chunk_size ({info.data['chunk_size']})"
)
return v
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
"case_sensitive": False,
"extra": "ignore",
}
@lru_cache()
def get_settings() -> Settings:
"""
Get cached settings instance (singleton pattern).
Returns:
Settings: Configured application settings
Raises:
ValidationError: If required settings are missing or invalid
Examples:
>>> settings = get_settings()
>>> print(settings.llm_model)
'gpt-4o'
"""
return Settings()