File size: 4,182 Bytes
1d10b0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81f53e2
 
1d10b0a
7569ab7
 
 
 
 
81f53e2
1d10b0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""Configuration management for RAG Application."""
from pydantic_settings import BaseSettings
from typing import Optional, List
import os


def is_huggingface_space() -> bool:
    """Check if running on Hugging Face Spaces."""
    return os.environ.get("SPACE_ID") is not None


class Settings(BaseSettings):
    """Application settings."""
    
    # LLM Provider Selection: "groq" or "ollama"
    # On HuggingFace Spaces, default to groq (ollama not available)
    llm_provider: str = "groq"
    
    # API Keys - Support multiple keys for rotation (for Groq)
    # On HuggingFace, these come from Secrets
    groq_api_key: str = os.environ.get("GROQ_API_KEY", "")
    groq_api_keys: List[str] = []  # Multiple keys for rotation (comma-separated in .env)
    
    # Ollama Configuration (Local LLM - NOT available on HuggingFace Spaces)
    ollama_host: str = "http://localhost:11434"
    ollama_model: str = "qwen2.5:7b"  # Default model (fast + good quality)
    ollama_models: List[str] = ["qwen2.5:7b", "mistral:7b", "gemma3:12b", "llama3.3"]  # Available models
    
    # ChromaDB - Use /tmp for ephemeral storage on HuggingFace Spaces
    chroma_persist_directory: str = "/tmp/chroma_db" if is_huggingface_space() else "./chroma_db"
    
    # Vector Store Provider: "chroma" (local/ephemeral) or "qdrant" (cloud/persistent)
    # Use Qdrant for persistent storage on HuggingFace Spaces
    vector_store_provider: str = "qdrant" if is_huggingface_space() else "chroma"
    
    # Qdrant Cloud Configuration (for persistent storage on HuggingFace Spaces)
    # Sign up free at: https://cloud.qdrant.io
    qdrant_url: str = os.environ.get("QDRANT_URL", "")
    qdrant_api_key: str = os.environ.get("QDRANT_API_KEY", "")
    
    # Rate Limiting (for Groq API only)
    # 30 RPM = 2 seconds minimum between requests to stay under limit
    groq_rpm_limit: int = 30
    rate_limit_delay: float = 2.5  # Increased from 2.0 to 2.5 for safety margin (ensures ~24 RPM actual)
    
    # Retry settings for rate limit errors (for Groq API only)
    max_retries: int = 3
    retry_delay: float = 60.0  # Wait 60 seconds on rate limit error before retry
    
    # Batch processing settings
    batch_size: int = 10  # Process in smaller batches with checkpoints
    checkpoint_enabled: bool = True  # Save progress to resume if interrupted
    
    # Embedding Models
    embedding_models: list = [
        "sentence-transformers/all-mpnet-base-v2",  # Stable, high quality
        "emilyalsentzer/Bio_ClinicalBERT",  # Clinical domain
        "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",  # Medical domain
        "sentence-transformers/all-MiniLM-L6-v2",  # Fast, lightweight
        # "sentence-transformers/multilingual-MiniLM-L12-v2",  # Multilingual (disabled for lighter build)
        # "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",  # Paraphrase (disabled for lighter build)
        "allenai/specter",  # Academic papers
        "ProsusAI/finbert",  # Financial domain
        "nlpaueb/legal-bert-base-uncased",  # Legal domain
        "sentence-transformers/all-mpnet-base-v2-legal",  # Legal domain specialized
        "sentence-transformers/paraphrase-mpnet-base-v2-customer-service",  # Customer service
        "sentence-transformers/all-MiniLM-L6-v2-customer-service",  # Customer service lightweight
        # "gemini-embedding-001"  # Gemini API (disabled for lighter build)
    ]
    
    # LLM Models
    llm_models: list = [
        "meta-llama/llama-4-maverick-17b-128e-instruct",
        "llama-3.1-8b-instant",
        "openai/gpt-oss-120b"
    ]
    
    # Chunking Strategies
    chunking_strategies: list = ["dense", "sparse", "hybrid", "re-ranking", "row-based", "entity-based"]
    
    # RAG Bench Datasets (from rungalileo/ragbench)
    ragbench_datasets: list = [
        "covidqa",
        "cuad",
        "delucionqa",
        "emanual",
        "expertqa",
        "finqa",
        "hagrid",
        "hotpotqa",
        "msmarco",
        "pubmedqa",
        "tatqa",
        "techqa"
    ]
    
    class Config:
        env_file = ".env"
        case_sensitive = False
        extra = "allow"


settings = Settings()