# llm_clients/performance_utils.py """ Performance optimization utilities to reduce startup time and memory usage. """ import os import warnings def apply_performance_optimizations(): """Apply various performance optimizations to reduce startup time and memory usage.""" # Disable TensorFlow warnings and optimizations os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Only show errors # Disable PyTorch compilation for CPU-only inference os.environ["TORCH_COMPILE_DISABLE"] = "1" os.environ["TORCHDYNAMO_DISABLE"] = "1" # Optimize memory usage os.environ["TOKENIZERS_PARALLELISM"] = "false" # Reduce tokenizer overhead os.environ["OMP_NUM_THREADS"] = "1" # Reduce CPU threading overhead # Disable various warnings to reduce console noise warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning, module="transformers") warnings.filterwarnings("ignore", category=UserWarning, module="torch") print("⚡ Applied performance optimizations") def setup_model_sharing(): """Initialize shared model manager early to control loading order.""" try: from .shared_models import shared_model_manager print("🔗 Shared model manager initialized") return shared_model_manager except ImportError: print("⚠️ Could not initialize shared model manager") return None def optimize_transformers(): """Apply transformers-specific optimizations.""" try: import transformers # Disable transformers warnings transformers.logging.set_verbosity_error() print("🤖 Transformers logging optimized") except ImportError: pass def optimize_for_cpu(): """Apply CPU-specific optimizations.""" try: import torch # Set number of threads for CPU inference torch.set_num_threads(1) # Disable autograd for inference-only mode torch.autograd.set_grad_enabled(False) print("🧠 CPU inference optimized") except ImportError: pass def apply_all_optimizations(): """Apply all available performance optimizations.""" apply_performance_optimizations() optimize_transformers() optimize_for_cpu() setup_model_sharing()