Ariyan-Pro's picture
Deploy RAG Latency Optimization v1.0
04ab625
"""
Optimized configuration for ALL RAG systems - BACKWARD COMPATIBLE.
"""
import os
from pathlib import Path
# Base paths
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
MODELS_DIR = BASE_DIR / "models"
CACHE_DIR = BASE_DIR / ".cache"
# Ensure directories exist
for directory in [DATA_DIR, MODELS_DIR, CACHE_DIR]:
directory.mkdir(exist_ok=True)
# Model Configuration
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
LLM_MODEL = "microsoft/phi-2"
# ===== BACKWARD COMPATIBLE CONFIGS =====
# For Naive RAG and Optimized RAG
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
TOP_K = 5 # For backward compatibility
# For Optimized RAG
TOP_K_DYNAMIC_OPTIMIZED = {
"short": 2, # < 10 tokens
"medium": 3, # 10-30 tokens
"long": 4 # > 30 tokens
}
# For Hyper RAG (more aggressive)
TOP_K_DYNAMIC_HYPER = {
"short": 3, # < 5 words
"medium": 4, # 5-15 words
"long": 5 # > 15 words
}
# Alias for backward compatibility
TOP_K_DYNAMIC = TOP_K_DYNAMIC_OPTIMIZED
# FAISS Configuration
FAISS_INDEX_PATH = DATA_DIR / "faiss_index.bin"
DOCSTORE_PATH = DATA_DIR / "docstore.db"
# Cache Configuration
EMBEDDING_CACHE_PATH = DATA_DIR / "embedding_cache.db"
QUERY_CACHE_TTL = 3600
# LLM Inference Configuration
MAX_TOKENS = 1024
TEMPERATURE = 0.1
CONTEXT_SIZE = 2048
# Performance Settings
ENABLE_EMBEDDING_CACHE = True
ENABLE_QUERY_CACHE = True
USE_QUANTIZED_LLM = False
BATCH_SIZE = 1
# FILTERING SETTINGS
ENABLE_PRE_FILTER = True
ENABLE_PROMPT_COMPRESSION = True
MIN_FILTER_MATCHES = 1
FILTER_EXPANSION_FACTOR = 2.0
# Dataset Configuration
SAMPLE_DOCUMENTS = 1000
# Monitoring
ENABLE_METRICS = True
METRICS_FILE = DATA_DIR / "metrics.csv"
# HYPER RAG SPECIFIC OPTIMIZATIONS
HYPER_CACHE_SIZE = 1000
HYPER_THREAD_WORKERS = 4
HYPER_MIN_CHUNKS = 1
# ===== CONFIG VALIDATION =====
def validate_config():
"""Validate configuration settings."""
errors = []
# Check required directories
for dir_name, dir_path in [("DATA", DATA_DIR), ("MODELS", MODELS_DIR)]:
if not dir_path.exists():
errors.append(f"{dir_name} directory does not exist: {dir_path}")
# Check FAISS index
if not FAISS_INDEX_PATH.exists():
print(f"⚠ WARNING: FAISS index not found at {FAISS_INDEX_PATH}")
print(" Run: python scripts/initialize_rag.py")
# Check embedding cache
if ENABLE_EMBEDDING_CACHE and not EMBEDDING_CACHE_PATH.exists():
print(f"⚠ WARNING: Embedding cache not found at {EMBEDDING_CACHE_PATH}")
print(" It will be created automatically on first use.")
if errors:
print("\n❌ CONFIGURATION ERRORS:")
for error in errors:
print(f" - {error}")
return False
print("✅ Configuration validated successfully")
return True
# Auto-validate on import
if __name__ != "__main__":
validate_config()