Spaces:

satyakimitra
/

QuerySphere

Running

File size: 16,592 Bytes

# DEPENDENCIES
import os
import time
import torch
from pathlib import Path
from pydantic import Field
from typing import Literal
from typing import Optional
from pydantic import field_validator
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    """
    Application configuration with environment variable support
    
    Environment variables take precedence over defaults
    """
    # Huggingface Space Deployment mode detection
    IS_HF_SPACE                   : bool                                                     = Field(default = os.getenv("SPACE_ID") is not None, description = "Running in HF Space")
    
    # Application Settings
    APP_NAME                      : str                                                      = "QuerySphere"
    APP_VERSION                   : str                                                      = "1.0.0"
    DEBUG                         : bool                                                     = Field(default = False, description = "Enable debug mode")
    HOST                          : str                                                      = Field(default = "0.0.0.0", description = "API host")
    PORT                          : int                                                      = Field(default = int(os.getenv("PORT", 8000)), description = "API port (7860 for HF Spaces)")
    
    # LLM Provider Selection (ADD THESE)
    OLLAMA_ENABLED                : bool                                                     = Field(default = os.getenv("OLLAMA_ENABLED", "true").lower() == "true", description = "Enable Ollama (set false for HF Spaces)")
    USE_OPENAI                    : bool                                                     = Field(default = os.getenv("USE_OPENAI", "false").lower() == "true", description = "Use OpenAI API instead of local LLM")
    
    
    # File Upload Settings
    MAX_FILE_SIZE_MB              : int                                                      = Field(default = 100, description = "Max file size in MB")
    MAX_BATCH_FILES               : int                                                      = Field(default = 10, description = "Max files per upload")
    ALLOWED_EXTENSIONS            : list[str]                                                = Field(default = ["pdf", "docx", "txt"], description = "Allowed file extensions")
    UPLOAD_DIR                    : Path                                                     = Field(default = Path("data/uploads"), description = "Directory for uploaded files")
    
    # Ollama LLM Settings
    OLLAMA_BASE_URL               : str                                                      = Field(default = "http://localhost:11434", description = "Ollama API endpoint")
    OLLAMA_MODEL                  : str                                                      = Field(default = "mistral:7b", description = "Ollama model name")
    OLLAMA_TIMEOUT                : int                                                      = Field(default = 120, description = "Ollama request timeout (seconds)")
    
    # Generation parameters
    DEFAULT_TEMPERATURE           : float                                                    = Field(default = 0.1, ge = 0.0, le = 1.0, description = "LLM temperature (0=deterministic, 1=creative)")
    TOP_P                         : float                                                    = Field(default = 0.9, ge = 0.0, le = 1.0, description = "Nucleus sampling threshold")
    MAX_TOKENS                    : int                                                      = Field(default = 1000, description = "Max output tokens")
    CONTEXT_WINDOW                : int                                                      = Field(default = 8192, description = "Model context window size")
    
    # OpenAI Settings
    OPENAI_API_KEY                : Optional[str]                                            = Field(default = os.getenv("OPENAI_API_KEY"), description = "Open AI API secret key")
    OPENAI_MODEL                  : str                                                      = Field(default = "gpt-3.5-turbo", description = "OpenAI model name")
    
    # Embedding Settings
    EMBEDDING_MODEL               : str                                                      = Field(default = "BAAI/bge-small-en-v1.5", description = "HuggingFace embedding model")
    EMBEDDING_DIMENSION           : int                                                      = Field(default = 384, description = "Embedding vector dimension")
    EMBEDDING_DEVICE              : Literal["cpu", "cuda", "mps"]                            = Field(default = "cpu", description = "Device for embedding generation")
    EMBEDDING_BATCH_SIZE          : int                                                      = Field(default = 32, description = "Batch size for embedding generation")
    
    # Chunking Settings
    # Fixed chunking
    FIXED_CHUNK_SIZE              : int                                                      = Field(default = 512, description = "Fixed chunk size in tokens")
    FIXED_CHUNK_OVERLAP           : int                                                      = Field(default = 25, description = "Overlap between chunks")
    
    # Semantic chunking
    SEMANTIC_BREAKPOINT_THRESHOLD : float                                                    = Field(default = 0.80, description = "Percentile for semantic breakpoints")

    # Hierarchical chunking
    PARENT_CHUNK_SIZE             : int                                                      = Field(default = 2048, description = "Parent chunk size")
    CHILD_CHUNK_SIZE              : int                                                      = Field(default = 512, description = "Child chunk size")
    
    # Adaptive thresholds
    SMALL_DOC_THRESHOLD           : int                                                      = Field(default = 1000, description = "Token threshold for fixed chunking")
    LARGE_DOC_THRESHOLD           : int                                                      = Field(default = 500000, description = "Token threshold for hierarchical chunking")
    
    # Retrieval Settings
    # Vector search
    TOP_K_RETRIEVE                : int                                                      = Field(default = 10, description = "Top chunks to retrieve")
    TOP_K_FINAL                   : int                                                      = Field(default = 5, description = "Final chunks after reranking")
    FAISS_NPROBE                  : int                                                      = Field(default = 10, description = "FAISS search probes")
    
    # Hybrid search weights
    VECTOR_WEIGHT                 : float                                                    = Field(default = 0.6, description = "Vector search weight")
    BM25_WEIGHT                   : float                                                    = Field(default = 0.4, description = "BM25 search weight")
    
    # BM25 parameters
    BM25_K1                       : float                                                    = Field(default = 1.5, description = "BM25 term saturation")
    BM25_B                        : float                                                    = Field(default = 0.75, description = "BM25 length normalization")
    
    # Reranking
    ENABLE_RERANKING              : bool                                                     = Field(default = True, description = "Enable cross-encoder reranking")
    RERANKER_MODEL                : str                                                      = Field(default = "cross-encoder/ms-marco-MiniLM-L-6-v2", description = "Reranker model")
    
    # Storage Settings
    VECTOR_STORE_DIR              : Path                                                     = Field(default = Path("data/vector_store"), description = "FAISS index storage")
    METADATA_DB_PATH              : Path                                                     = Field(default = Path("data/metadata.db"), description = "SQLite metadata database")
    
    # Backup
    AUTO_BACKUP                   : bool                                                     = Field(default = True, description = "Enable auto-backup")
    BACKUP_INTERVAL               : int                                                      = Field(default = 1000, description = "Backup every N documents")
    BACKUP_DIR                    : Path                                                     = Field(default = Path("data/backups"), description = "Backup directory")
    
    # Cache Settings
    ENABLE_CACHE                  : bool                                                     = Field(default = True, description = "Enable embedding cache")
    CACHE_TYPE                    : Literal["memory", "redis"]                               = Field(default = "memory", description = "Cache backend")
    CACHE_TTL                     : int                                                      = Field(default = 3600, description = "Cache TTL in seconds")
    CACHE_MAX_SIZE                : int                                                      = Field(default = 1000, description = "Max cached items")
    
    # Redis (if used)
    REDIS_HOST                    : str                                                      = Field(default = "localhost", description = "Redis host")
    REDIS_PORT                    : int                                                      = Field(default = 6379, description = "Redis port")
    REDIS_DB                      : int                                                      = Field(default = 0, description = "Redis database number")
    
    # Logging Settings
    LOG_LEVEL                     : Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default = "INFO", description = "Logging level")
    LOG_DIR                       : Path                                                     = Field(default = Path("logs"), description = "Log file directory")
    LOG_FORMAT                    : str                                                      = Field(default = "%(asctime)s - %(name)s - %(levelname)s - %(message)s", description = "Log format string")
    LOG_ROTATION                  : str                                                      = Field(default = "500 MB", description = "Log rotation size")
    LOG_RETENTION                 : str                                                      = Field(default = "30 days", description = "Log retention period")
    
    # Evaluation Settings
    ENABLE_RAGAS                  : bool                                                     = Field(default = True, description = "Enable Ragas evaluation")
    RAGAS_ENABLE_GROUND_TRUTH     : bool                                                     = Field(default = False, description = "Enable RAGAS metrics requiring ground truth")
    RAGAS_METRICS                 : list[str]                                                = Field(default = ["answer_relevancy", "faithfulness", "context_utilization", "context_relevancy"], description = "Ragas metrics to compute (base metrics without ground truth)")
    RAGAS_GROUND_TRUTH_METRICS    : list[str]                                                = Field(default = ["context_precision", "context_recall", "answer_similarity", "answer_correctness"], description = "Ragas metrics requiring ground truth")
    RAGAS_EVALUATION_TIMEOUT      : int                                                      = Field(default = 60, description = "RAGAS evaluation timeout in seconds")
    RAGAS_BATCH_SIZE              : int                                                      = Field(default = 10, description = "Batch size for RAGAS evaluations")

    # Web Scraping Settings (for future)
    SCRAPING_ENABLED              : bool                                                     = Field(default = False, description = "Enable web scraping")
    USER_AGENT                    : str                                                      = Field(default = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", description = "User agent for scraping")
    REQUEST_DELAY                 : float                                                    = Field(default = 2.0, description = "Delay between requests (seconds)")
    MAX_RETRIES                   : int                                                      = Field(default = 3, description = "Max scraping retries")
    
    # Performance Settings
    MAX_WORKERS                   : int                                                      = Field(default = 4, description = "Max parallel workers")
    ASYNC_BATCH_SIZE              : int                                                      = Field(default = 10, description = "Async batch size")
    
    # Security Settings
    ENABLE_AUTH                   : bool                                                     = Field(default = False, description = "Enable authentication")
    SECRET_KEY                    : str                                                      = Field(default = os.getenv("SECRET_KEY", "dev-key-change-in-production"))
    
    FIXED_CHUNK_STRATEGY          : str                                                      = Field(default = "fixed", description = "Default chunking strategy")
    

    class Config:
        env_file          = ".env"
        env_file_encoding = "utf-8"
        case_sensitive    = True
    

    @field_validator("UPLOAD_DIR", "VECTOR_STORE_DIR", "LOG_DIR", "BACKUP_DIR", "METADATA_DB_PATH")
    @classmethod
    def create_directories(cls, v: Path) -> Path:
        """
        Ensure directories exist
        """
        if v.suffix:  # It's a file path (like metadata.db)
            v.parent.mkdir(parents = True, exist_ok = True)

        else:  # It's a directory
            v.mkdir(parents = True, exist_ok = True)

        return v

    
    @field_validator("VECTOR_WEIGHT", "BM25_WEIGHT")
    @classmethod
    def validate_weights_sum(cls, v: float, info) -> float:
        """
        Ensure vector and BM25 weights are valid
        """
        if ((info.field_name == "BM25_WEIGHT") and ("VECTOR_WEIGHT" in info.data)):
            vector_weight = info.data["VECTOR_WEIGHT"]
            
            if (abs(vector_weight + v - 1.0) > 0.01):
                raise ValueError(f"VECTOR_WEIGHT ({vector_weight}) + BM25_WEIGHT ({v}) must sum to 1.0")
        
        return v
    

    @property
    def max_file_size_bytes(self) -> int:
        """
        Convert MB to bytes
        """
        return self.MAX_FILE_SIZE_MB * 1024 * 1024
    

    @property
    def is_cuda_available(self) -> bool:
        """
        Check if CUDA device is requested and available
        """
        if self.EMBEDDING_DEVICE == "cuda":
            try:
                return torch.cuda.is_available()

            except ImportError:
                return False

        return False
    

    def get_ollama_url(self, endpoint: str) -> str:
        """
        Construct full Ollama API URL
        """
        return f"{self.OLLAMA_BASE_URL.rstrip('/')}/{endpoint.lstrip('/')}"
    

    @classmethod
    def get_timestamp_ms(cls) -> int:
        """
        Get current timestamp in milliseconds
        """
        return int(time.time() * 1000)


    def summary(self) -> dict:
        """
        Get configuration summary (excluding sensitive data)
        """
        return {"app_name"           : self.APP_NAME,
                "version"            : self.APP_VERSION,
                "ollama_model"       : self.OLLAMA_MODEL,
                "embedding_model"    : self.EMBEDDING_MODEL,
                "embedding_device"   : self.EMBEDDING_DEVICE,
                "max_file_size_mb"   : self.MAX_FILE_SIZE_MB,
                "allowed_extensions" : self.ALLOWED_EXTENSIONS,
                "chunking_strategy"  : {"small_threshold" : self.SMALL_DOC_THRESHOLD, "large_threshold" : self.LARGE_DOC_THRESHOLD},
                "retrieval"          : {"top_k" : self.TOP_K_RETRIEVE, "hybrid_weights" : {"vector" : self.VECTOR_WEIGHT, "bm25" : self.BM25_WEIGHT}},
                "evaluation"         : {"ragas_enabled" : self.ENABLE_RAGAS, "ragas_ground_truth" : self.RAGAS_ENABLE_GROUND_TRUTH, "ragas_metrics" : self.RAGAS_METRICS},
               }


# Global settings instance
settings = Settings()


# Convenience function for getting settings
def get_settings() -> Settings:
    """
    Get global settings instance
    """
    return settings