import os import logging import warnings from dataclasses import dataclass, field from dotenv import load_dotenv load_dotenv() # Suppress noisy third-party logs os.environ["TOKENIZERS_PARALLELISM"] = "false" logging.getLogger("sentence_transformers").setLevel(logging.WARNING) logging.getLogger("transformers").setLevel(logging.WARNING) logging.getLogger("huggingface_hub").setLevel(logging.WARNING) warnings.filterwarnings("ignore", message=".*Pydantic V1.*") warnings.filterwarnings("ignore", message=".*urllib3.*") warnings.filterwarnings("ignore", message=".*HuggingFaceEmbeddings.*") warnings.filterwarnings("ignore", category=DeprecationWarning) @dataclass class Settings: llm_base_url: str = field( default_factory=lambda: os.getenv("LLM_BASE_URL", "") ) llm_model: str = field( default_factory=lambda: os.getenv("LLM_MODEL", "") ) llm_api_key: str = field( default_factory=lambda: os.getenv("LLM_API_KEY", "") ) @property def is_llm_configured(self) -> bool: return bool(self.llm_base_url and self.llm_model) embedding_model: str = field( default_factory=lambda: os.getenv( "EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2" ) ) faiss_index_path: str = field( default_factory=lambda: os.getenv("FAISS_INDEX_PATH", "rag/faiss_index") ) memory_dir: str = field( default_factory=lambda: os.getenv("MEMORY_DIR", "memory/data") ) ocr_confidence_threshold: float = 0.6 asr_confidence_threshold: float = 0.6 verifier_confidence_threshold: float = 0.7 rag_top_k: int = 5 max_solver_retries: int = 2 settings = Settings()