CapStoneRAG10 / config.py
Developer
Update config to include domain-specific embedding models in dropdown
7569ab7
"""Configuration management for RAG Application."""
from pydantic_settings import BaseSettings
from typing import Optional, List
import os
def is_huggingface_space() -> bool:
"""Check if running on Hugging Face Spaces."""
return os.environ.get("SPACE_ID") is not None
class Settings(BaseSettings):
"""Application settings."""
# LLM Provider Selection: "groq" or "ollama"
# On HuggingFace Spaces, default to groq (ollama not available)
llm_provider: str = "groq"
# API Keys - Support multiple keys for rotation (for Groq)
# On HuggingFace, these come from Secrets
groq_api_key: str = os.environ.get("GROQ_API_KEY", "")
groq_api_keys: List[str] = [] # Multiple keys for rotation (comma-separated in .env)
# Ollama Configuration (Local LLM - NOT available on HuggingFace Spaces)
ollama_host: str = "http://localhost:11434"
ollama_model: str = "qwen2.5:7b" # Default model (fast + good quality)
ollama_models: List[str] = ["qwen2.5:7b", "mistral:7b", "gemma3:12b", "llama3.3"] # Available models
# ChromaDB - Use /tmp for ephemeral storage on HuggingFace Spaces
chroma_persist_directory: str = "/tmp/chroma_db" if is_huggingface_space() else "./chroma_db"
# Vector Store Provider: "chroma" (local/ephemeral) or "qdrant" (cloud/persistent)
# Use Qdrant for persistent storage on HuggingFace Spaces
vector_store_provider: str = "qdrant" if is_huggingface_space() else "chroma"
# Qdrant Cloud Configuration (for persistent storage on HuggingFace Spaces)
# Sign up free at: https://cloud.qdrant.io
qdrant_url: str = os.environ.get("QDRANT_URL", "")
qdrant_api_key: str = os.environ.get("QDRANT_API_KEY", "")
# Rate Limiting (for Groq API only)
# 30 RPM = 2 seconds minimum between requests to stay under limit
groq_rpm_limit: int = 30
rate_limit_delay: float = 2.5 # Increased from 2.0 to 2.5 for safety margin (ensures ~24 RPM actual)
# Retry settings for rate limit errors (for Groq API only)
max_retries: int = 3
retry_delay: float = 60.0 # Wait 60 seconds on rate limit error before retry
# Batch processing settings
batch_size: int = 10 # Process in smaller batches with checkpoints
checkpoint_enabled: bool = True # Save progress to resume if interrupted
# Embedding Models
embedding_models: list = [
"sentence-transformers/all-mpnet-base-v2", # Stable, high quality
"emilyalsentzer/Bio_ClinicalBERT", # Clinical domain
"microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract", # Medical domain
"sentence-transformers/all-MiniLM-L6-v2", # Fast, lightweight
# "sentence-transformers/multilingual-MiniLM-L12-v2", # Multilingual (disabled for lighter build)
# "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", # Paraphrase (disabled for lighter build)
"allenai/specter", # Academic papers
"ProsusAI/finbert", # Financial domain
"nlpaueb/legal-bert-base-uncased", # Legal domain
"sentence-transformers/all-mpnet-base-v2-legal", # Legal domain specialized
"sentence-transformers/paraphrase-mpnet-base-v2-customer-service", # Customer service
"sentence-transformers/all-MiniLM-L6-v2-customer-service", # Customer service lightweight
# "gemini-embedding-001" # Gemini API (disabled for lighter build)
]
# LLM Models
llm_models: list = [
"meta-llama/llama-4-maverick-17b-128e-instruct",
"llama-3.1-8b-instant",
"openai/gpt-oss-120b"
]
# Chunking Strategies
chunking_strategies: list = ["dense", "sparse", "hybrid", "re-ranking", "row-based", "entity-based"]
# RAG Bench Datasets (from rungalileo/ragbench)
ragbench_datasets: list = [
"covidqa",
"cuad",
"delucionqa",
"emanual",
"expertqa",
"finqa",
"hagrid",
"hotpotqa",
"msmarco",
"pubmedqa",
"tatqa",
"techqa"
]
class Config:
env_file = ".env"
case_sensitive = False
extra = "allow"
settings = Settings()