Spaces:
Running
Running
| # DEPENDENCIES | |
| import os | |
| import time | |
| import torch | |
| from pathlib import Path | |
| from pydantic import Field | |
| from typing import Literal | |
| from typing import Optional | |
| from pydantic import field_validator | |
| from pydantic_settings import BaseSettings | |
| class Settings(BaseSettings): | |
| """ | |
| Application configuration with environment variable support | |
| Environment variables take precedence over defaults | |
| """ | |
| # Huggingface Space Deployment mode detection | |
| IS_HF_SPACE : bool = Field(default = os.getenv("SPACE_ID") is not None, description = "Running in HF Space") | |
| # Application Settings | |
| APP_NAME : str = "QuerySphere" | |
| APP_VERSION : str = "1.0.0" | |
| DEBUG : bool = Field(default = False, description = "Enable debug mode") | |
| HOST : str = Field(default = "0.0.0.0", description = "API host") | |
| PORT : int = Field(default = int(os.getenv("PORT", 8000)), description = "API port (7860 for HF Spaces)") | |
| # LLM Provider Selection (ADD THESE) | |
| OLLAMA_ENABLED : bool = Field(default = os.getenv("OLLAMA_ENABLED", "true").lower() == "true", description = "Enable Ollama (set false for HF Spaces)") | |
| USE_OPENAI : bool = Field(default = os.getenv("USE_OPENAI", "false").lower() == "true", description = "Use OpenAI API instead of local LLM") | |
| # File Upload Settings | |
| MAX_FILE_SIZE_MB : int = Field(default = 100, description = "Max file size in MB") | |
| MAX_BATCH_FILES : int = Field(default = 10, description = "Max files per upload") | |
| ALLOWED_EXTENSIONS : list[str] = Field(default = ["pdf", "docx", "txt"], description = "Allowed file extensions") | |
| UPLOAD_DIR : Path = Field(default = Path("data/uploads"), description = "Directory for uploaded files") | |
| # Ollama LLM Settings | |
| OLLAMA_BASE_URL : str = Field(default = "http://localhost:11434", description = "Ollama API endpoint") | |
| OLLAMA_MODEL : str = Field(default = "mistral:7b", description = "Ollama model name") | |
| OLLAMA_TIMEOUT : int = Field(default = 120, description = "Ollama request timeout (seconds)") | |
| # Generation parameters | |
| DEFAULT_TEMPERATURE : float = Field(default = 0.1, ge = 0.0, le = 1.0, description = "LLM temperature (0=deterministic, 1=creative)") | |
| TOP_P : float = Field(default = 0.9, ge = 0.0, le = 1.0, description = "Nucleus sampling threshold") | |
| MAX_TOKENS : int = Field(default = 1000, description = "Max output tokens") | |
| CONTEXT_WINDOW : int = Field(default = 8192, description = "Model context window size") | |
| # OpenAI Settings | |
| OPENAI_API_KEY : Optional[str] = Field(default = os.getenv("OPENAI_API_KEY"), description = "Open AI API secret key") | |
| OPENAI_MODEL : str = Field(default = "gpt-3.5-turbo", description = "OpenAI model name") | |
| # Embedding Settings | |
| EMBEDDING_MODEL : str = Field(default = "BAAI/bge-small-en-v1.5", description = "HuggingFace embedding model") | |
| EMBEDDING_DIMENSION : int = Field(default = 384, description = "Embedding vector dimension") | |
| EMBEDDING_DEVICE : Literal["cpu", "cuda", "mps"] = Field(default = "cpu", description = "Device for embedding generation") | |
| EMBEDDING_BATCH_SIZE : int = Field(default = 32, description = "Batch size for embedding generation") | |
| # Chunking Settings | |
| # Fixed chunking | |
| FIXED_CHUNK_SIZE : int = Field(default = 512, description = "Fixed chunk size in tokens") | |
| FIXED_CHUNK_OVERLAP : int = Field(default = 25, description = "Overlap between chunks") | |
| # Semantic chunking | |
| SEMANTIC_BREAKPOINT_THRESHOLD : float = Field(default = 0.80, description = "Percentile for semantic breakpoints") | |
| # Hierarchical chunking | |
| PARENT_CHUNK_SIZE : int = Field(default = 2048, description = "Parent chunk size") | |
| CHILD_CHUNK_SIZE : int = Field(default = 512, description = "Child chunk size") | |
| # Adaptive thresholds | |
| SMALL_DOC_THRESHOLD : int = Field(default = 1000, description = "Token threshold for fixed chunking") | |
| LARGE_DOC_THRESHOLD : int = Field(default = 500000, description = "Token threshold for hierarchical chunking") | |
| # Retrieval Settings | |
| # Vector search | |
| TOP_K_RETRIEVE : int = Field(default = 10, description = "Top chunks to retrieve") | |
| TOP_K_FINAL : int = Field(default = 5, description = "Final chunks after reranking") | |
| FAISS_NPROBE : int = Field(default = 10, description = "FAISS search probes") | |
| # Hybrid search weights | |
| VECTOR_WEIGHT : float = Field(default = 0.6, description = "Vector search weight") | |
| BM25_WEIGHT : float = Field(default = 0.4, description = "BM25 search weight") | |
| # BM25 parameters | |
| BM25_K1 : float = Field(default = 1.5, description = "BM25 term saturation") | |
| BM25_B : float = Field(default = 0.75, description = "BM25 length normalization") | |
| # Reranking | |
| ENABLE_RERANKING : bool = Field(default = True, description = "Enable cross-encoder reranking") | |
| RERANKER_MODEL : str = Field(default = "cross-encoder/ms-marco-MiniLM-L-6-v2", description = "Reranker model") | |
| # Storage Settings | |
| VECTOR_STORE_DIR : Path = Field(default = Path("data/vector_store"), description = "FAISS index storage") | |
| METADATA_DB_PATH : Path = Field(default = Path("data/metadata.db"), description = "SQLite metadata database") | |
| # Backup | |
| AUTO_BACKUP : bool = Field(default = True, description = "Enable auto-backup") | |
| BACKUP_INTERVAL : int = Field(default = 1000, description = "Backup every N documents") | |
| BACKUP_DIR : Path = Field(default = Path("data/backups"), description = "Backup directory") | |
| # Cache Settings | |
| ENABLE_CACHE : bool = Field(default = True, description = "Enable embedding cache") | |
| CACHE_TYPE : Literal["memory", "redis"] = Field(default = "memory", description = "Cache backend") | |
| CACHE_TTL : int = Field(default = 3600, description = "Cache TTL in seconds") | |
| CACHE_MAX_SIZE : int = Field(default = 1000, description = "Max cached items") | |
| # Redis (if used) | |
| REDIS_HOST : str = Field(default = "localhost", description = "Redis host") | |
| REDIS_PORT : int = Field(default = 6379, description = "Redis port") | |
| REDIS_DB : int = Field(default = 0, description = "Redis database number") | |
| # Logging Settings | |
| LOG_LEVEL : Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default = "INFO", description = "Logging level") | |
| LOG_DIR : Path = Field(default = Path("logs"), description = "Log file directory") | |
| LOG_FORMAT : str = Field(default = "%(asctime)s - %(name)s - %(levelname)s - %(message)s", description = "Log format string") | |
| LOG_ROTATION : str = Field(default = "500 MB", description = "Log rotation size") | |
| LOG_RETENTION : str = Field(default = "30 days", description = "Log retention period") | |
| # Evaluation Settings | |
| ENABLE_RAGAS : bool = Field(default = True, description = "Enable Ragas evaluation") | |
| RAGAS_ENABLE_GROUND_TRUTH : bool = Field(default = False, description = "Enable RAGAS metrics requiring ground truth") | |
| RAGAS_METRICS : list[str] = Field(default = ["answer_relevancy", "faithfulness", "context_utilization", "context_relevancy"], description = "Ragas metrics to compute (base metrics without ground truth)") | |
| RAGAS_GROUND_TRUTH_METRICS : list[str] = Field(default = ["context_precision", "context_recall", "answer_similarity", "answer_correctness"], description = "Ragas metrics requiring ground truth") | |
| RAGAS_EVALUATION_TIMEOUT : int = Field(default = 60, description = "RAGAS evaluation timeout in seconds") | |
| RAGAS_BATCH_SIZE : int = Field(default = 10, description = "Batch size for RAGAS evaluations") | |
| # Web Scraping Settings (for future) | |
| SCRAPING_ENABLED : bool = Field(default = False, description = "Enable web scraping") | |
| USER_AGENT : str = Field(default = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", description = "User agent for scraping") | |
| REQUEST_DELAY : float = Field(default = 2.0, description = "Delay between requests (seconds)") | |
| MAX_RETRIES : int = Field(default = 3, description = "Max scraping retries") | |
| # Performance Settings | |
| MAX_WORKERS : int = Field(default = 4, description = "Max parallel workers") | |
| ASYNC_BATCH_SIZE : int = Field(default = 10, description = "Async batch size") | |
| # Security Settings | |
| ENABLE_AUTH : bool = Field(default = False, description = "Enable authentication") | |
| SECRET_KEY : str = Field(default = os.getenv("SECRET_KEY", "dev-key-change-in-production")) | |
| FIXED_CHUNK_STRATEGY : str = Field(default = "fixed", description = "Default chunking strategy") | |
| class Config: | |
| env_file = ".env" | |
| env_file_encoding = "utf-8" | |
| case_sensitive = True | |
| def create_directories(cls, v: Path) -> Path: | |
| """ | |
| Ensure directories exist | |
| """ | |
| if v.suffix: # It's a file path (like metadata.db) | |
| v.parent.mkdir(parents = True, exist_ok = True) | |
| else: # It's a directory | |
| v.mkdir(parents = True, exist_ok = True) | |
| return v | |
| def validate_weights_sum(cls, v: float, info) -> float: | |
| """ | |
| Ensure vector and BM25 weights are valid | |
| """ | |
| if ((info.field_name == "BM25_WEIGHT") and ("VECTOR_WEIGHT" in info.data)): | |
| vector_weight = info.data["VECTOR_WEIGHT"] | |
| if (abs(vector_weight + v - 1.0) > 0.01): | |
| raise ValueError(f"VECTOR_WEIGHT ({vector_weight}) + BM25_WEIGHT ({v}) must sum to 1.0") | |
| return v | |
| def max_file_size_bytes(self) -> int: | |
| """ | |
| Convert MB to bytes | |
| """ | |
| return self.MAX_FILE_SIZE_MB * 1024 * 1024 | |
| def is_cuda_available(self) -> bool: | |
| """ | |
| Check if CUDA device is requested and available | |
| """ | |
| if self.EMBEDDING_DEVICE == "cuda": | |
| try: | |
| return torch.cuda.is_available() | |
| except ImportError: | |
| return False | |
| return False | |
| def get_ollama_url(self, endpoint: str) -> str: | |
| """ | |
| Construct full Ollama API URL | |
| """ | |
| return f"{self.OLLAMA_BASE_URL.rstrip('/')}/{endpoint.lstrip('/')}" | |
| def get_timestamp_ms(cls) -> int: | |
| """ | |
| Get current timestamp in milliseconds | |
| """ | |
| return int(time.time() * 1000) | |
| def summary(self) -> dict: | |
| """ | |
| Get configuration summary (excluding sensitive data) | |
| """ | |
| return {"app_name" : self.APP_NAME, | |
| "version" : self.APP_VERSION, | |
| "ollama_model" : self.OLLAMA_MODEL, | |
| "embedding_model" : self.EMBEDDING_MODEL, | |
| "embedding_device" : self.EMBEDDING_DEVICE, | |
| "max_file_size_mb" : self.MAX_FILE_SIZE_MB, | |
| "allowed_extensions" : self.ALLOWED_EXTENSIONS, | |
| "chunking_strategy" : {"small_threshold" : self.SMALL_DOC_THRESHOLD, "large_threshold" : self.LARGE_DOC_THRESHOLD}, | |
| "retrieval" : {"top_k" : self.TOP_K_RETRIEVE, "hybrid_weights" : {"vector" : self.VECTOR_WEIGHT, "bm25" : self.BM25_WEIGHT}}, | |
| "evaluation" : {"ragas_enabled" : self.ENABLE_RAGAS, "ragas_ground_truth" : self.RAGAS_ENABLE_GROUND_TRUTH, "ragas_metrics" : self.RAGAS_METRICS}, | |
| } | |
| # Global settings instance | |
| settings = Settings() | |
| # Convenience function for getting settings | |
| def get_settings() -> Settings: | |
| """ | |
| Get global settings instance | |
| """ | |
| return settings |