Spaces:
Running
Running
File size: 7,579 Bytes
31a2688 a493f04 31a2688 ec64993 31a2688 a120767 31a2688 3f19c23 b205d63 31a2688 6fd2f67 4d2a2da 31a2688 a493f04 31a2688 b205d63 31a2688 ec64993 31a2688 b205d63 31a2688 a120767 31a2688 3f19c23 b205d63 31a2688 f2cefbd 31a2688 6fd2f67 4d2a2da 31a2688 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | """Centralized configuration loaded from environment variables."""
import os
from dataclasses import dataclass
from pathlib import Path
from dotenv import load_dotenv
# Load .env from project root
load_dotenv(Path(__file__).resolve().parent.parent / ".env")
@dataclass(frozen=True)
class Settings:
"""Application-wide settings populated from environment variables."""
# Provider selection
llm_provider: str
embedding_provider: str
# Evaluator (RAGAS judge) — independent of generation LLM so a strong
# cloud judge can score outputs from a small local generation model.
# Empty evaluator_llm_provider means "reuse the generation LLM".
evaluator_llm_provider: str
evaluator_llm_model: str
# General
qdrant_path: str
qdrant_url: str
collection_name: str
embedding_model: str
embedding_dimension: int
generation_model: str
reranker_model: str
chunk_strategy: str
chunk_size: int
chunk_overlap: int
top_k: int
bm25_weight: float
dense_weight: float
log_level: str
# Ollama
ollama_base_url: str
ollama_model: str
# OpenAI
openai_api_key: str
openai_base_url: str
openai_model: str
openai_embedding_model: str
# Azure OpenAI
azure_openai_api_key: str
azure_openai_endpoint: str
azure_openai_api_version: str
azure_openai_deployment: str
azure_openai_embedding_deployment: str
# AWS Bedrock
aws_region: str
aws_bedrock_model: str
aws_bedrock_embedding_model: str
# Groq
groq_api_key: str
groq_model: str
# Anthropic
anthropic_api_key: str
anthropic_model: str
# Google GenAI
google_api_key: str
google_model: str
google_embedding_model: str
# Local embeddings (HuggingFace)
local_embedding_model: str
# Query translation
translate_query: bool
# Agent mode: "pipeline" (fixed DAG) or "react" (tool-calling ReAct loop)
agent_mode: str
# Token budget (Stage 1: measure-only). When True, prompt sizes are
# logged at known injection points; truncation is NOT applied yet.
token_budget_enabled: bool
# LLM provider fallback. When enabled, the primary generation LLM is
# wrapped with LangChain's with_fallbacks across ``llm_fallback_providers``
# in order. DEFAULT OFF because an automatic switch from a local
# privacy-preserving provider (e.g. Ollama) to a cloud provider (e.g.
# OpenAI) has both cost and data-exfiltration implications.
llm_fallback_enabled: bool
llm_fallback_providers: tuple[str, ...]
def _parse_bool(value: str, *, default: bool) -> bool:
"""Parse a boolean environment variable string.
Args:
value: Raw env var value (may be empty).
default: Fallback when value is empty or unset.
Returns:
Parsed boolean.
"""
if not value:
return default
return value.strip().lower() in ("1", "true", "yes")
def load_settings() -> Settings:
"""Load and return application settings from environment variables.
Returns:
Settings: Frozen dataclass with all configuration values.
"""
return Settings(
# Provider selection
llm_provider=os.environ.get("LLM_PROVIDER", "ollama"),
embedding_provider=os.environ.get("EMBEDDING_PROVIDER", "local"),
# Evaluator (RAGAS judge) — empty provider means "reuse generation LLM"
evaluator_llm_provider=os.environ.get("EVALUATOR_LLM_PROVIDER", ""),
evaluator_llm_model=os.environ.get("EVALUATOR_LLM_MODEL", ""),
# General
qdrant_path=os.environ.get("QDRANT_PATH", "./qdrant_data"),
qdrant_url=os.environ.get("QDRANT_URL", ""),
collection_name=os.environ.get("COLLECTION_NAME", "ku_documents"),
embedding_model=os.environ.get("EMBEDDING_MODEL", "paraphrase-multilingual-MiniLM-L12-v2"),
embedding_dimension=int(os.environ.get("EMBEDDING_DIMENSION", "384")),
generation_model=os.environ.get("GENERATION_MODEL", "gemma4:e4b"),
reranker_model=os.environ.get("RERANKER_MODEL", "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"),
chunk_strategy=os.environ.get("CHUNK_STRATEGY", "semantic"),
chunk_size=int(os.environ.get("CHUNK_SIZE", "512")),
chunk_overlap=int(os.environ.get("CHUNK_OVERLAP", "64")),
top_k=int(os.environ.get("TOP_K", "5")),
bm25_weight=float(os.environ.get("BM25_WEIGHT", "0.3")),
dense_weight=float(os.environ.get("DENSE_WEIGHT", "0.7")),
log_level=os.environ.get("LOG_LEVEL", "INFO"),
# Ollama
ollama_base_url=os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434"),
ollama_model=os.environ.get("OLLAMA_MODEL", "gemma4:e4b"),
# OpenAI
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
openai_base_url=os.environ.get("OPENAI_BASE_URL", ""),
openai_model=os.environ.get("OPENAI_MODEL", "gpt-4o-mini"),
openai_embedding_model=os.environ.get("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"),
# Azure OpenAI
azure_openai_api_key=os.environ.get("AZURE_OPENAI_API_KEY", ""),
azure_openai_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT", ""),
azure_openai_api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2024-02-01"),
azure_openai_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT", ""),
azure_openai_embedding_deployment=os.environ.get("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", ""),
# AWS Bedrock
aws_region=os.environ.get("AWS_REGION", "eu-west-1"),
aws_bedrock_model=os.environ.get("AWS_BEDROCK_MODEL", "anthropic.claude-sonnet-4-20250514-v1:0"),
aws_bedrock_embedding_model=os.environ.get("AWS_BEDROCK_EMBEDDING_MODEL", "amazon.titan-embed-text-v2:0"),
# Groq
groq_api_key=os.environ.get("GROQ_API_KEY", ""),
groq_model=os.environ.get("GROQ_MODEL", "qwen/qwen3-32b"),
# Anthropic
anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY", ""),
anthropic_model=os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-20250514"),
# Google GenAI
google_api_key=os.environ.get("GOOGLE_API_KEY", ""),
google_model=os.environ.get("GOOGLE_LLM_MODEL", "gemini-2.5-flash"),
google_embedding_model=os.environ.get("GOOGLE_EMBEDDING_MODEL", "models/embedding-001"),
# Local embeddings
local_embedding_model=os.environ.get(
"LOCAL_EMBEDDING_MODEL", "paraphrase-multilingual-MiniLM-L12-v2"
),
# Query translation — auto-detect default based on provider
translate_query=_parse_bool(
os.environ.get("TRANSLATE_QUERY", ""),
default=os.environ.get("LLM_PROVIDER", "ollama") == "ollama",
),
# Agent mode: "pipeline" keeps the existing fixed DAG; "react" enables
# the multi-step ReAct loop (requires an LLM with tool-calling support).
agent_mode=os.environ.get("AGENT_MODE", "pipeline"),
# Token budget — measure-only logging, off by default.
token_budget_enabled=_parse_bool(
os.environ.get("TOKEN_BUDGET_ENABLED", ""), default=False
),
# LLM fallback chain — off by default for privacy / cost reasons.
llm_fallback_enabled=_parse_bool(
os.environ.get("LLM_FALLBACK_ENABLED", ""), default=False
),
llm_fallback_providers=tuple(
p.strip().lower()
for p in os.environ.get("LLM_FALLBACK_PROVIDERS", "").split(",")
if p.strip()
),
)
|