decipherai-api / utils /gpu_diagnostics.py
Akshay30's picture
Initial DecipherAI backend deployment
2f4af3f
import torch
import gc
_active_processors = {}
def register_processor(name, processor_instance):
"""Register a processor instance for active VRAM offloading."""
_active_processors[name] = processor_instance
print(f"[VRAM MANAGER] Registered processor: {name}")
def reclaim_vram_for(target_processor_name):
"""Offload other processors' models from GPU to CPU to avoid Out of Memory (OOM) crashes."""
if not torch.cuda.is_available():
return
print(f"[VRAM MANAGER] Reclaiming GPU VRAM for '{target_processor_name}'...")
offloaded = False
for name, proc in list(_active_processors.items()):
if name == target_processor_name:
continue
try:
# 1. Greek Processor
if name == "greek" and getattr(proc, "trocr_model", None) is not None:
current_device = next(proc.trocr_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Greek TrOCR to CPU...")
proc.trocr_model.to("cpu")
offloaded = True
# 2. Latin Processor
elif name == "latin":
if getattr(proc, "tridis_model", None) is not None:
current_device = next(proc.tridis_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Latin TRIDIS to CPU...")
proc.tridis_model.to("cpu")
offloaded = True
if getattr(proc, "trocr_latin_model", None) is not None:
current_device = next(proc.trocr_latin_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Latin TrOCR to CPU...")
proc.trocr_latin_model.to("cpu")
offloaded = True
# 3. Cuneiform Processor
elif name == "cuneiform":
if getattr(proc, "clip_model", None) is not None:
current_device = next(proc.clip_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Cuneiform CLIP to CPU...")
proc.clip_model.to("cpu")
offloaded = True
if getattr(proc, "cuneiform_model", None) is not None:
current_device = next(proc.cuneiform_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Cuneiform Translator to CPU...")
proc.cuneiform_model.to("cpu")
offloaded = True
# 4. Egyptian Processor (HuggingFaceModels)
elif name == "egyptian" and getattr(proc, "_model", None) is not None:
current_device = next(proc._model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Egyptian T5 to CPU...")
proc._model.to("cpu")
offloaded = True
except Exception as e:
print(f"[WARN] Failed to offload '{name}' models: {e}")
if offloaded:
gc.collect()
torch.cuda.empty_cache()
print("[VRAM MANAGER] VRAM cache cleared successfully.")
def get_gpu_info():
"""Get diagnostic information about the NVIDIA GPU if available."""
info = {
"cuda_available": torch.cuda.is_available(),
"gpu_name": "N/A",
"vram_total_gb": 0.0,
"vram_allocated_gb": 0.0,
"vram_cached_gb": 0.0,
"vram_free_gb": 0.0,
"cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A",
"device": "cpu"
}
if info["cuda_available"]:
info["device"] = "cuda"
try:
info["gpu_name"] = torch.cuda.get_device_name(0)
props = torch.cuda.get_device_properties(0)
info["vram_total_gb"] = round(props.total_memory / 1024**3, 2)
allocated = torch.cuda.memory_allocated(0)
cached = torch.cuda.memory_reserved(0)
info["vram_allocated_gb"] = round(allocated / 1024**3, 3)
info["vram_cached_gb"] = round(cached / 1024**3, 3)
try:
free_mem, total_mem = torch.cuda.mem_get_info(0)
info["vram_free_gb"] = round(free_mem / 1024**3, 3)
except Exception:
info["vram_free_gb"] = round((props.total_memory - allocated) / 1024**3, 3)
except Exception as e:
print(f"[WARN] Error gathering detailed GPU info: {e}")
return info
def log_gpu_info():
"""Print clean diagnostic logs at startup."""
info = get_gpu_info()
print("=" * 60)
print(" NVIDIA GPU & CUDA INITIALIZATION DIAGNOSTICS")
print("=" * 60)
print(f"CUDA Available: {info['cuda_available']}")
if info["cuda_available"]:
print(f"CUDA Version: {info['cuda_version']}")
print(f"GPU Model: {info['gpu_name']}")
print(f"Total VRAM: {info['vram_total_gb']} GB")
print(f"Free VRAM: {info['vram_free_gb']} GB")
print(f"Active Device: CUDA (Dynamic Offloading Enabled)")
else:
print("Active Device: CPU (GPU acceleration not available)")
print("=" * 60)
def log_model_device(model_name, device):
"""Log the device selected for a specific model."""
print(f"[DEVICE LOG] Model '{model_name}' -> Assigned to: {str(device).upper()}")
def clear_gpu_cache():
"""Utility to clean memory cache during benchmarks or processing."""
if torch.cuda.is_available():
gc.collect()
torch.cuda.empty_cache()