Spaces:
Sleeping
Sleeping
File size: 6,064 Bytes
2f4af3f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | import torch
import gc
_active_processors = {}
def register_processor(name, processor_instance):
"""Register a processor instance for active VRAM offloading."""
_active_processors[name] = processor_instance
print(f"[VRAM MANAGER] Registered processor: {name}")
def reclaim_vram_for(target_processor_name):
"""Offload other processors' models from GPU to CPU to avoid Out of Memory (OOM) crashes."""
if not torch.cuda.is_available():
return
print(f"[VRAM MANAGER] Reclaiming GPU VRAM for '{target_processor_name}'...")
offloaded = False
for name, proc in list(_active_processors.items()):
if name == target_processor_name:
continue
try:
# 1. Greek Processor
if name == "greek" and getattr(proc, "trocr_model", None) is not None:
current_device = next(proc.trocr_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Greek TrOCR to CPU...")
proc.trocr_model.to("cpu")
offloaded = True
# 2. Latin Processor
elif name == "latin":
if getattr(proc, "tridis_model", None) is not None:
current_device = next(proc.tridis_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Latin TRIDIS to CPU...")
proc.tridis_model.to("cpu")
offloaded = True
if getattr(proc, "trocr_latin_model", None) is not None:
current_device = next(proc.trocr_latin_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Latin TrOCR to CPU...")
proc.trocr_latin_model.to("cpu")
offloaded = True
# 3. Cuneiform Processor
elif name == "cuneiform":
if getattr(proc, "clip_model", None) is not None:
current_device = next(proc.clip_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Cuneiform CLIP to CPU...")
proc.clip_model.to("cpu")
offloaded = True
if getattr(proc, "cuneiform_model", None) is not None:
current_device = next(proc.cuneiform_model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Cuneiform Translator to CPU...")
proc.cuneiform_model.to("cpu")
offloaded = True
# 4. Egyptian Processor (HuggingFaceModels)
elif name == "egyptian" and getattr(proc, "_model", None) is not None:
current_device = next(proc._model.parameters()).device
if str(current_device).startswith("cuda"):
print("[VRAM MANAGER] Offloading Egyptian T5 to CPU...")
proc._model.to("cpu")
offloaded = True
except Exception as e:
print(f"[WARN] Failed to offload '{name}' models: {e}")
if offloaded:
gc.collect()
torch.cuda.empty_cache()
print("[VRAM MANAGER] VRAM cache cleared successfully.")
def get_gpu_info():
"""Get diagnostic information about the NVIDIA GPU if available."""
info = {
"cuda_available": torch.cuda.is_available(),
"gpu_name": "N/A",
"vram_total_gb": 0.0,
"vram_allocated_gb": 0.0,
"vram_cached_gb": 0.0,
"vram_free_gb": 0.0,
"cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A",
"device": "cpu"
}
if info["cuda_available"]:
info["device"] = "cuda"
try:
info["gpu_name"] = torch.cuda.get_device_name(0)
props = torch.cuda.get_device_properties(0)
info["vram_total_gb"] = round(props.total_memory / 1024**3, 2)
allocated = torch.cuda.memory_allocated(0)
cached = torch.cuda.memory_reserved(0)
info["vram_allocated_gb"] = round(allocated / 1024**3, 3)
info["vram_cached_gb"] = round(cached / 1024**3, 3)
try:
free_mem, total_mem = torch.cuda.mem_get_info(0)
info["vram_free_gb"] = round(free_mem / 1024**3, 3)
except Exception:
info["vram_free_gb"] = round((props.total_memory - allocated) / 1024**3, 3)
except Exception as e:
print(f"[WARN] Error gathering detailed GPU info: {e}")
return info
def log_gpu_info():
"""Print clean diagnostic logs at startup."""
info = get_gpu_info()
print("=" * 60)
print(" NVIDIA GPU & CUDA INITIALIZATION DIAGNOSTICS")
print("=" * 60)
print(f"CUDA Available: {info['cuda_available']}")
if info["cuda_available"]:
print(f"CUDA Version: {info['cuda_version']}")
print(f"GPU Model: {info['gpu_name']}")
print(f"Total VRAM: {info['vram_total_gb']} GB")
print(f"Free VRAM: {info['vram_free_gb']} GB")
print(f"Active Device: CUDA (Dynamic Offloading Enabled)")
else:
print("Active Device: CPU (GPU acceleration not available)")
print("=" * 60)
def log_model_device(model_name, device):
"""Log the device selected for a specific model."""
print(f"[DEVICE LOG] Model '{model_name}' -> Assigned to: {str(device).upper()}")
def clear_gpu_cache():
"""Utility to clean memory cache during benchmarks or processing."""
if torch.cuda.is_available():
gc.collect()
torch.cuda.empty_cache()
|