| """ |
| DocMind — Centralized Configuration |
| |
| All magic numbers, thresholds, model names, and environment-dependent |
| settings live here so they can be tuned in one place. |
| """ |
|
|
| import os |
| from dataclasses import dataclass, field |
| from typing import List |
|
|
| from dotenv import load_dotenv |
|
|
| load_dotenv() |
|
|
|
|
| @dataclass(frozen=True) |
| class ChunkingConfig: |
| """Token-based chunking parameters.""" |
| max_tokens: int = 400 |
| overlap_tokens: int = 50 |
| encoding_name: str = "cl100k_base" |
|
|
|
|
| @dataclass(frozen=True) |
| class RetrievalConfig: |
| """Hybrid retrieval tuning knobs.""" |
| bm25_top_k: int = 20 |
| dense_top_k: int = 20 |
| rrf_k: int = 60 |
| rrf_top_k: int = 8 |
|
|
|
|
| @dataclass(frozen=True) |
| class GroundingConfig: |
| """NLI grounding gate thresholds.""" |
| high_threshold: float = 0.65 |
| medium_threshold: float = 0.45 |
| nli_model_name: str = "cross-encoder/nli-deberta-v3-base" |
|
|
| def __post_init__(self): |
| model_name = os.getenv("NLI_MODEL_NAME", "cross-encoder/nli-deberta-v3-base") |
| object.__setattr__(self, "nli_model_name", model_name) |
|
|
|
|
| @dataclass(frozen=True) |
| class LLMConfig: |
| """Groq LLM settings.""" |
| api_key: str = "" |
| model_name: str = "llama-3.3-70b-versatile" |
| fallback_model: str = "llama-3.1-8b-instant" |
| max_tokens: int = 400 |
| temperature: float = 0.1 |
| max_context_tokens: int = 6000 |
| max_response_sentences: int = 4 |
|
|
| def __post_init__(self): |
| |
| key = os.getenv("GROQ_API_KEY", "") |
| model_name = os.getenv("GROQ_MODEL_NAME", "llama-3.3-70b-versatile") |
| fallback_model = os.getenv("GROQ_FALLBACK_MODEL", "llama-3.1-8b-instant") |
| object.__setattr__(self, "api_key", key) |
| object.__setattr__(self, "model_name", model_name) |
| object.__setattr__(self, "fallback_model", fallback_model) |
|
|
|
|
| @dataclass(frozen=True) |
| class EmbeddingConfig: |
| """Dense embedding model settings.""" |
| model_name: str = "BAAI/bge-m3" |
| vector_size: int = 1024 |
| batch_size: int = 32 |
|
|
| def __post_init__(self): |
| model_name = os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-m3") |
| vector_size = int(os.getenv("EMBEDDING_VECTOR_SIZE", "1024")) |
| object.__setattr__(self, "model_name", model_name) |
| object.__setattr__(self, "vector_size", vector_size) |
|
|
|
|
| @dataclass(frozen=True) |
| class QdrantConfig: |
| """Qdrant vector store settings.""" |
| url: str = "" |
| api_key: str = "" |
| collection_name: str = "docmind_chunks" |
| use_local: bool = True |
| local_path: str = "" |
|
|
| def __post_init__(self): |
| url = os.getenv("QDRANT_URL", "") |
| api_key = os.getenv("QDRANT_API_KEY", "") |
| use_local = not (url and api_key) |
| object.__setattr__(self, "url", url) |
| object.__setattr__(self, "api_key", api_key) |
| object.__setattr__(self, "use_local", use_local) |
|
|
|
|
| @dataclass(frozen=True) |
| class RouterConfig: |
| """Intent router sensitive keywords.""" |
| sensitive_keywords: tuple = ( |
| "refund", "compensation", "legal", "liability", "sue", |
| "medical", "diagnose", "toxic", "dangerous", "illegal", |
| "lawsuit", "attorney", "malpractice", "fraud", |
| ) |
| handoff_message: str = ( |
| "This query involves a sensitive topic I cannot advise on from " |
| "documents alone. Please consult a qualified professional." |
| ) |
|
|
|
|
| @dataclass(frozen=True) |
| class UIConfig: |
| """UI display settings.""" |
| max_documents: int = 3 |
| max_chat_history: int = 10 |
| max_upload_mb: int = 20 |
| document_colors: tuple = ("#3B82F6", "#10B981", "#F59E0B") |
| document_color_names: tuple = ("blue", "green", "orange") |
| app_name: str = "DocMind" |
| app_tagline: str = "Grounded RAG Document Intelligence" |
|
|
|
|
| @dataclass(frozen=True) |
| class AppConfig: |
| """Top-level config aggregating all sub-configs.""" |
| chunking: ChunkingConfig = field(default_factory=ChunkingConfig) |
| retrieval: RetrievalConfig = field(default_factory=RetrievalConfig) |
| grounding: GroundingConfig = field(default_factory=GroundingConfig) |
| llm: LLMConfig = field(default_factory=LLMConfig) |
| embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig) |
| qdrant: QdrantConfig = field(default_factory=QdrantConfig) |
| router: RouterConfig = field(default_factory=RouterConfig) |
| ui: UIConfig = field(default_factory=UIConfig) |
|
|
|
|
| |
| CONFIG = AppConfig() |
|
|