"""Configuration management for RAG Application.""" from pydantic_settings import BaseSettings from typing import Optional, List import os def is_huggingface_space() -> bool: """Check if running on Hugging Face Spaces.""" return os.environ.get("SPACE_ID") is not None class Settings(BaseSettings): """Application settings.""" # LLM Provider Selection: "groq" or "ollama" # On HuggingFace Spaces, default to groq (ollama not available) llm_provider: str = "groq" # API Keys - Support multiple keys for rotation (for Groq) # On HuggingFace, these come from Secrets groq_api_key: str = os.environ.get("GROQ_API_KEY", "") groq_api_keys: List[str] = [] # Multiple keys for rotation (comma-separated in .env) # Ollama Configuration (Local LLM - NOT available on HuggingFace Spaces) ollama_host: str = "http://localhost:11434" ollama_model: str = "qwen2.5:7b" # Default model (fast + good quality) ollama_models: List[str] = ["qwen2.5:7b", "mistral:7b", "gemma3:12b", "llama3.3"] # Available models # ChromaDB - Use /tmp for ephemeral storage on HuggingFace Spaces chroma_persist_directory: str = "/tmp/chroma_db" if is_huggingface_space() else "./chroma_db" # Vector Store Provider: "chroma" (local/ephemeral) or "qdrant" (cloud/persistent) # Use Qdrant for persistent storage on HuggingFace Spaces vector_store_provider: str = "qdrant" if is_huggingface_space() else "chroma" # Qdrant Cloud Configuration (for persistent storage on HuggingFace Spaces) # Sign up free at: https://cloud.qdrant.io qdrant_url: str = os.environ.get("QDRANT_URL", "") qdrant_api_key: str = os.environ.get("QDRANT_API_KEY", "") # Rate Limiting (for Groq API only) # 30 RPM = 2 seconds minimum between requests to stay under limit groq_rpm_limit: int = 30 rate_limit_delay: float = 2.5 # Increased from 2.0 to 2.5 for safety margin (ensures ~24 RPM actual) # Retry settings for rate limit errors (for Groq API only) max_retries: int = 3 retry_delay: float = 60.0 # Wait 60 seconds on rate limit error before retry # Batch processing settings batch_size: int = 10 # Process in smaller batches with checkpoints checkpoint_enabled: bool = True # Save progress to resume if interrupted # Embedding Models embedding_models: list = [ "sentence-transformers/all-mpnet-base-v2", # Stable, high quality "emilyalsentzer/Bio_ClinicalBERT", # Clinical domain "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract", # Medical domain "sentence-transformers/all-MiniLM-L6-v2", # Fast, lightweight # "sentence-transformers/multilingual-MiniLM-L12-v2", # Multilingual (disabled for lighter build) # "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", # Paraphrase (disabled for lighter build) "allenai/specter", # Academic papers "ProsusAI/finbert", # Financial domain "nlpaueb/legal-bert-base-uncased", # Legal domain "sentence-transformers/all-mpnet-base-v2-legal", # Legal domain specialized "sentence-transformers/paraphrase-mpnet-base-v2-customer-service", # Customer service "sentence-transformers/all-MiniLM-L6-v2-customer-service", # Customer service lightweight # "gemini-embedding-001" # Gemini API (disabled for lighter build) ] # LLM Models llm_models: list = [ "meta-llama/llama-4-maverick-17b-128e-instruct", "llama-3.1-8b-instant", "openai/gpt-oss-120b" ] # Chunking Strategies chunking_strategies: list = ["dense", "sparse", "hybrid", "re-ranking", "row-based", "entity-based"] # RAG Bench Datasets (from rungalileo/ragbench) ragbench_datasets: list = [ "covidqa", "cuad", "delucionqa", "emanual", "expertqa", "finqa", "hagrid", "hotpotqa", "msmarco", "pubmedqa", "tatqa", "techqa" ] class Config: env_file = ".env" case_sensitive = False extra = "allow" settings = Settings()