""" Optimized configuration for ALL RAG systems - BACKWARD COMPATIBLE. """ import os from pathlib import Path # Base paths BASE_DIR = Path(__file__).parent DATA_DIR = BASE_DIR / "data" MODELS_DIR = BASE_DIR / "models" CACHE_DIR = BASE_DIR / ".cache" # Ensure directories exist for directory in [DATA_DIR, MODELS_DIR, CACHE_DIR]: directory.mkdir(exist_ok=True) # Model Configuration EMBEDDING_MODEL = "all-MiniLM-L6-v2" LLM_MODEL = "microsoft/phi-2" # ===== BACKWARD COMPATIBLE CONFIGS ===== # For Naive RAG and Optimized RAG CHUNK_SIZE = 500 CHUNK_OVERLAP = 50 TOP_K = 5 # For backward compatibility # For Optimized RAG TOP_K_DYNAMIC_OPTIMIZED = { "short": 2, # < 10 tokens "medium": 3, # 10-30 tokens "long": 4 # > 30 tokens } # For Hyper RAG (more aggressive) TOP_K_DYNAMIC_HYPER = { "short": 3, # < 5 words "medium": 4, # 5-15 words "long": 5 # > 15 words } # Alias for backward compatibility TOP_K_DYNAMIC = TOP_K_DYNAMIC_OPTIMIZED # FAISS Configuration FAISS_INDEX_PATH = DATA_DIR / "faiss_index.bin" DOCSTORE_PATH = DATA_DIR / "docstore.db" # Cache Configuration EMBEDDING_CACHE_PATH = DATA_DIR / "embedding_cache.db" QUERY_CACHE_TTL = 3600 # LLM Inference Configuration MAX_TOKENS = 1024 TEMPERATURE = 0.1 CONTEXT_SIZE = 2048 # Performance Settings ENABLE_EMBEDDING_CACHE = True ENABLE_QUERY_CACHE = True USE_QUANTIZED_LLM = False BATCH_SIZE = 1 # FILTERING SETTINGS ENABLE_PRE_FILTER = True ENABLE_PROMPT_COMPRESSION = True MIN_FILTER_MATCHES = 1 FILTER_EXPANSION_FACTOR = 2.0 # Dataset Configuration SAMPLE_DOCUMENTS = 1000 # Monitoring ENABLE_METRICS = True METRICS_FILE = DATA_DIR / "metrics.csv" # HYPER RAG SPECIFIC OPTIMIZATIONS HYPER_CACHE_SIZE = 1000 HYPER_THREAD_WORKERS = 4 HYPER_MIN_CHUNKS = 1 # ===== CONFIG VALIDATION ===== def validate_config(): """Validate configuration settings.""" errors = [] # Check required directories for dir_name, dir_path in [("DATA", DATA_DIR), ("MODELS", MODELS_DIR)]: if not dir_path.exists(): errors.append(f"{dir_name} directory does not exist: {dir_path}") # Check FAISS index if not FAISS_INDEX_PATH.exists(): print(f"⚠ WARNING: FAISS index not found at {FAISS_INDEX_PATH}") print(" Run: python scripts/initialize_rag.py") # Check embedding cache if ENABLE_EMBEDDING_CACHE and not EMBEDDING_CACHE_PATH.exists(): print(f"⚠ WARNING: Embedding cache not found at {EMBEDDING_CACHE_PATH}") print(" It will be created automatically on first use.") if errors: print("\n❌ CONFIGURATION ERRORS:") for error in errors: print(f" - {error}") return False print("✅ Configuration validated successfully") return True # Auto-validate on import if __name__ != "__main__": validate_config()