docmind / config.py
AI Engineer
Initial commit for DocMind
6cca5b1
Raw
History Blame Contribute Delete
4.73 kB
"""
DocMind — Centralized Configuration
All magic numbers, thresholds, model names, and environment-dependent
settings live here so they can be tuned in one place.
"""
import os
from dataclasses import dataclass, field
from typing import List
from dotenv import load_dotenv
load_dotenv()
@dataclass(frozen=True)
class ChunkingConfig:
"""Token-based chunking parameters."""
max_tokens: int = 400
overlap_tokens: int = 50
encoding_name: str = "cl100k_base"
@dataclass(frozen=True)
class RetrievalConfig:
"""Hybrid retrieval tuning knobs."""
bm25_top_k: int = 20
dense_top_k: int = 20
rrf_k: int = 60 # RRF constant (higher = more uniform weighting)
rrf_top_k: int = 8 # Final chunks sent to the LLM
@dataclass(frozen=True)
class GroundingConfig:
"""NLI grounding gate thresholds."""
high_threshold: float = 0.65
medium_threshold: float = 0.45
nli_model_name: str = "cross-encoder/nli-deberta-v3-base"
def __post_init__(self):
model_name = os.getenv("NLI_MODEL_NAME", "cross-encoder/nli-deberta-v3-base")
object.__setattr__(self, "nli_model_name", model_name)
@dataclass(frozen=True)
class LLMConfig:
"""Groq LLM settings."""
api_key: str = ""
model_name: str = "llama-3.3-70b-versatile"
fallback_model: str = "llama-3.1-8b-instant"
max_tokens: int = 400
temperature: float = 0.1
max_context_tokens: int = 6000 # Budget for document chunks in prompt
max_response_sentences: int = 4
def __post_init__(self):
# frozen=True means we use object.__setattr__
key = os.getenv("GROQ_API_KEY", "")
model_name = os.getenv("GROQ_MODEL_NAME", "llama-3.3-70b-versatile")
fallback_model = os.getenv("GROQ_FALLBACK_MODEL", "llama-3.1-8b-instant")
object.__setattr__(self, "api_key", key)
object.__setattr__(self, "model_name", model_name)
object.__setattr__(self, "fallback_model", fallback_model)
@dataclass(frozen=True)
class EmbeddingConfig:
"""Dense embedding model settings."""
model_name: str = "BAAI/bge-m3"
vector_size: int = 1024
batch_size: int = 32
def __post_init__(self):
model_name = os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-m3")
vector_size = int(os.getenv("EMBEDDING_VECTOR_SIZE", "1024"))
object.__setattr__(self, "model_name", model_name)
object.__setattr__(self, "vector_size", vector_size)
@dataclass(frozen=True)
class QdrantConfig:
"""Qdrant vector store settings."""
url: str = ""
api_key: str = ""
collection_name: str = "docmind_chunks"
use_local: bool = True # Default to local in-memory for easy setup
local_path: str = "" # Empty = pure in-memory (no persistence)
def __post_init__(self):
url = os.getenv("QDRANT_URL", "")
api_key = os.getenv("QDRANT_API_KEY", "")
use_local = not (url and api_key)
object.__setattr__(self, "url", url)
object.__setattr__(self, "api_key", api_key)
object.__setattr__(self, "use_local", use_local)
@dataclass(frozen=True)
class RouterConfig:
"""Intent router sensitive keywords."""
sensitive_keywords: tuple = (
"refund", "compensation", "legal", "liability", "sue",
"medical", "diagnose", "toxic", "dangerous", "illegal",
"lawsuit", "attorney", "malpractice", "fraud",
)
handoff_message: str = (
"This query involves a sensitive topic I cannot advise on from "
"documents alone. Please consult a qualified professional."
)
@dataclass(frozen=True)
class UIConfig:
"""UI display settings."""
max_documents: int = 3
max_chat_history: int = 10
max_upload_mb: int = 20
document_colors: tuple = ("#3B82F6", "#10B981", "#F59E0B") # blue, green, orange
document_color_names: tuple = ("blue", "green", "orange")
app_name: str = "DocMind"
app_tagline: str = "Grounded RAG Document Intelligence"
@dataclass(frozen=True)
class AppConfig:
"""Top-level config aggregating all sub-configs."""
chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
grounding: GroundingConfig = field(default_factory=GroundingConfig)
llm: LLMConfig = field(default_factory=LLMConfig)
embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
qdrant: QdrantConfig = field(default_factory=QdrantConfig)
router: RouterConfig = field(default_factory=RouterConfig)
ui: UIConfig = field(default_factory=UIConfig)
# ── Singleton instance used across the app ──────────────────────────
CONFIG = AppConfig()