Spaces:
Sleeping
Sleeping
| """Configuration management for RAG Application.""" | |
| from pydantic_settings import BaseSettings | |
| from typing import Optional, List | |
| import os | |
| def is_huggingface_space() -> bool: | |
| """Check if running on Hugging Face Spaces.""" | |
| return os.environ.get("SPACE_ID") is not None | |
| class Settings(BaseSettings): | |
| """Application settings.""" | |
| # LLM Provider Selection: "groq" or "ollama" | |
| # On HuggingFace Spaces, default to groq (ollama not available) | |
| llm_provider: str = "groq" | |
| # API Keys - Support multiple keys for rotation (for Groq) | |
| # On HuggingFace, these come from Secrets | |
| groq_api_key: str = os.environ.get("GROQ_API_KEY", "") | |
| groq_api_keys: List[str] = [] # Multiple keys for rotation (comma-separated in .env) | |
| # Ollama Configuration (Local LLM - NOT available on HuggingFace Spaces) | |
| ollama_host: str = "http://localhost:11434" | |
| ollama_model: str = "qwen2.5:7b" # Default model (fast + good quality) | |
| ollama_models: List[str] = ["qwen2.5:7b", "mistral:7b", "gemma3:12b", "llama3.3"] # Available models | |
| # ChromaDB - Use /tmp for ephemeral storage on HuggingFace Spaces | |
| chroma_persist_directory: str = "/tmp/chroma_db" if is_huggingface_space() else "./chroma_db" | |
| # Vector Store Provider: "chroma" (local/ephemeral) or "qdrant" (cloud/persistent) | |
| # Use Qdrant for persistent storage on HuggingFace Spaces | |
| vector_store_provider: str = "qdrant" if is_huggingface_space() else "chroma" | |
| # Qdrant Cloud Configuration (for persistent storage on HuggingFace Spaces) | |
| # Sign up free at: https://cloud.qdrant.io | |
| qdrant_url: str = os.environ.get("QDRANT_URL", "") | |
| qdrant_api_key: str = os.environ.get("QDRANT_API_KEY", "") | |
| # Rate Limiting (for Groq API only) | |
| # 30 RPM = 2 seconds minimum between requests to stay under limit | |
| groq_rpm_limit: int = 30 | |
| rate_limit_delay: float = 2.5 # Increased from 2.0 to 2.5 for safety margin (ensures ~24 RPM actual) | |
| # Retry settings for rate limit errors (for Groq API only) | |
| max_retries: int = 3 | |
| retry_delay: float = 60.0 # Wait 60 seconds on rate limit error before retry | |
| # Batch processing settings | |
| batch_size: int = 10 # Process in smaller batches with checkpoints | |
| checkpoint_enabled: bool = True # Save progress to resume if interrupted | |
| # Embedding Models | |
| embedding_models: list = [ | |
| "sentence-transformers/all-mpnet-base-v2", # Stable, high quality | |
| "emilyalsentzer/Bio_ClinicalBERT", # Clinical domain | |
| "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract", # Medical domain | |
| "sentence-transformers/all-MiniLM-L6-v2", # Fast, lightweight | |
| # "sentence-transformers/multilingual-MiniLM-L12-v2", # Multilingual (disabled for lighter build) | |
| # "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", # Paraphrase (disabled for lighter build) | |
| "allenai/specter", # Academic papers | |
| "ProsusAI/finbert", # Financial domain | |
| "nlpaueb/legal-bert-base-uncased", # Legal domain | |
| "sentence-transformers/all-mpnet-base-v2-legal", # Legal domain specialized | |
| "sentence-transformers/paraphrase-mpnet-base-v2-customer-service", # Customer service | |
| "sentence-transformers/all-MiniLM-L6-v2-customer-service", # Customer service lightweight | |
| # "gemini-embedding-001" # Gemini API (disabled for lighter build) | |
| ] | |
| # LLM Models | |
| llm_models: list = [ | |
| "meta-llama/llama-4-maverick-17b-128e-instruct", | |
| "llama-3.1-8b-instant", | |
| "openai/gpt-oss-120b" | |
| ] | |
| # Chunking Strategies | |
| chunking_strategies: list = ["dense", "sparse", "hybrid", "re-ranking", "row-based", "entity-based"] | |
| # RAG Bench Datasets (from rungalileo/ragbench) | |
| ragbench_datasets: list = [ | |
| "covidqa", | |
| "cuad", | |
| "delucionqa", | |
| "emanual", | |
| "expertqa", | |
| "finqa", | |
| "hagrid", | |
| "hotpotqa", | |
| "msmarco", | |
| "pubmedqa", | |
| "tatqa", | |
| "techqa" | |
| ] | |
| class Config: | |
| env_file = ".env" | |
| case_sensitive = False | |
| extra = "allow" | |
| settings = Settings() | |