import torch import gc _active_processors = {} def register_processor(name, processor_instance): """Register a processor instance for active VRAM offloading.""" _active_processors[name] = processor_instance print(f"[VRAM MANAGER] Registered processor: {name}") def reclaim_vram_for(target_processor_name): """Offload other processors' models from GPU to CPU to avoid Out of Memory (OOM) crashes.""" if not torch.cuda.is_available(): return print(f"[VRAM MANAGER] Reclaiming GPU VRAM for '{target_processor_name}'...") offloaded = False for name, proc in list(_active_processors.items()): if name == target_processor_name: continue try: # 1. Greek Processor if name == "greek" and getattr(proc, "trocr_model", None) is not None: current_device = next(proc.trocr_model.parameters()).device if str(current_device).startswith("cuda"): print("[VRAM MANAGER] Offloading Greek TrOCR to CPU...") proc.trocr_model.to("cpu") offloaded = True # 2. Latin Processor elif name == "latin": if getattr(proc, "tridis_model", None) is not None: current_device = next(proc.tridis_model.parameters()).device if str(current_device).startswith("cuda"): print("[VRAM MANAGER] Offloading Latin TRIDIS to CPU...") proc.tridis_model.to("cpu") offloaded = True if getattr(proc, "trocr_latin_model", None) is not None: current_device = next(proc.trocr_latin_model.parameters()).device if str(current_device).startswith("cuda"): print("[VRAM MANAGER] Offloading Latin TrOCR to CPU...") proc.trocr_latin_model.to("cpu") offloaded = True # 3. Cuneiform Processor elif name == "cuneiform": if getattr(proc, "clip_model", None) is not None: current_device = next(proc.clip_model.parameters()).device if str(current_device).startswith("cuda"): print("[VRAM MANAGER] Offloading Cuneiform CLIP to CPU...") proc.clip_model.to("cpu") offloaded = True if getattr(proc, "cuneiform_model", None) is not None: current_device = next(proc.cuneiform_model.parameters()).device if str(current_device).startswith("cuda"): print("[VRAM MANAGER] Offloading Cuneiform Translator to CPU...") proc.cuneiform_model.to("cpu") offloaded = True # 4. Egyptian Processor (HuggingFaceModels) elif name == "egyptian" and getattr(proc, "_model", None) is not None: current_device = next(proc._model.parameters()).device if str(current_device).startswith("cuda"): print("[VRAM MANAGER] Offloading Egyptian T5 to CPU...") proc._model.to("cpu") offloaded = True except Exception as e: print(f"[WARN] Failed to offload '{name}' models: {e}") if offloaded: gc.collect() torch.cuda.empty_cache() print("[VRAM MANAGER] VRAM cache cleared successfully.") def get_gpu_info(): """Get diagnostic information about the NVIDIA GPU if available.""" info = { "cuda_available": torch.cuda.is_available(), "gpu_name": "N/A", "vram_total_gb": 0.0, "vram_allocated_gb": 0.0, "vram_cached_gb": 0.0, "vram_free_gb": 0.0, "cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A", "device": "cpu" } if info["cuda_available"]: info["device"] = "cuda" try: info["gpu_name"] = torch.cuda.get_device_name(0) props = torch.cuda.get_device_properties(0) info["vram_total_gb"] = round(props.total_memory / 1024**3, 2) allocated = torch.cuda.memory_allocated(0) cached = torch.cuda.memory_reserved(0) info["vram_allocated_gb"] = round(allocated / 1024**3, 3) info["vram_cached_gb"] = round(cached / 1024**3, 3) try: free_mem, total_mem = torch.cuda.mem_get_info(0) info["vram_free_gb"] = round(free_mem / 1024**3, 3) except Exception: info["vram_free_gb"] = round((props.total_memory - allocated) / 1024**3, 3) except Exception as e: print(f"[WARN] Error gathering detailed GPU info: {e}") return info def log_gpu_info(): """Print clean diagnostic logs at startup.""" info = get_gpu_info() print("=" * 60) print(" NVIDIA GPU & CUDA INITIALIZATION DIAGNOSTICS") print("=" * 60) print(f"CUDA Available: {info['cuda_available']}") if info["cuda_available"]: print(f"CUDA Version: {info['cuda_version']}") print(f"GPU Model: {info['gpu_name']}") print(f"Total VRAM: {info['vram_total_gb']} GB") print(f"Free VRAM: {info['vram_free_gb']} GB") print(f"Active Device: CUDA (Dynamic Offloading Enabled)") else: print("Active Device: CPU (GPU acceleration not available)") print("=" * 60) def log_model_device(model_name, device): """Log the device selected for a specific model.""" print(f"[DEVICE LOG] Model '{model_name}' -> Assigned to: {str(device).upper()}") def clear_gpu_cache(): """Utility to clean memory cache during benchmarks or processing.""" if torch.cuda.is_available(): gc.collect() torch.cuda.empty_cache()