import os from functools import lru_cache from typing import Optional from dotenv import load_dotenv from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict # Load environment variables from a local .env file if present load_dotenv() class Settings(BaseSettings): """Application settings loaded from environment variables.""" # App APP_NAME: str = Field(default="rag-agent-workbench") APP_VERSION: str = Field(default="0.1.0") # Pinecone PINECONE_API_KEY: str = Field(..., description="Pinecone API key") PINECONE_INDEX_NAME: str = Field( ..., description="Name of the Pinecone index (used for configuration checks)" ) PINECONE_HOST: str = Field( ..., description="Pinecone index host URL for data-plane operations" ) PINECONE_NAMESPACE: str = Field( default="dev", description="Default Pinecone namespace" ) PINECONE_TEXT_FIELD: str = Field( default="chunk_text", description=( "Text field name used by the Pinecone integrated embedding index. " "For example, set to 'content' if your index field_map uses that name." ), ) # Logging LOG_LEVEL: str = Field(default="INFO", description="Application log level") # HTTP client defaults HTTP_TIMEOUT_SECONDS: float = Field( default=10.0, description="Default timeout for outbound HTTP requests" ) HTTP_MAX_RETRIES: int = Field( default=3, description="Max retries for outbound HTTP requests" ) # Groq / LLM GROQ_API_KEY: Optional[str] = Field( default=None, description="Groq API key (required for /chat endpoints)", ) GROQ_BASE_URL: str = Field( default="https://api.groq.com/openai/v1", description="Groq OpenAI-compatible base URL", ) GROQ_MODEL: str = Field( default="llama-3.1-8b-instant", description="Default Groq chat model used for /chat", ) # Web search / Tavily TAVILY_API_KEY: Optional[str] = Field( default=None, description="Tavily API key for web search fallback (optional)", ) # RAG defaults RAG_DEFAULT_TOP_K: int = Field( default=5, ge=1, le=100, description="Default number of documents to retrieve for RAG", ) RAG_MIN_SCORE: float = Field( default=0.25, ge=0.0, le=1.0, description="Default minimum relevance score to trust retrieval without web fallback", ) RAG_MAX_WEB_RESULTS: int = Field( default=5, ge=1, le=20, description="Maximum number of web search results to fetch when using Tavily", ) # Operational toggles RATE_LIMIT_ENABLED: bool = Field( default=True, description="Enable SlowAPI rate limiting middleware when true.", ) CACHE_ENABLED: bool = Field( default=True, description="Enable in-memory TTL caching for /search and /chat when true.", ) model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", extra="ignore", ) @lru_cache(maxsize=1) def get_settings() -> Settings: """Return a cached Settings instance.""" return Settings() # type: ignore[call-arg] def get_env_bool(name: str, default: bool = False) -> bool: """Utility to parse boolean flags from environment variables.""" raw: Optional[str] = os.getenv(name) if raw is None: return default return raw.strip().lower() in {"1", "true", "yes", "on"}