Spaces:

Akshay30
/

decipherai-api

Sleeping

App Files Files Community

decipherai-api / utils /gpu_diagnostics.py

Akshay30

Initial DecipherAI backend deployment

2f4af3f 4 days ago

raw

history blame contribute delete

6.06 kB

	import torch
	import gc

	_active_processors = {}

	def register_processor(name, processor_instance):
	"""Register a processor instance for active VRAM offloading."""
	_active_processors[name] = processor_instance
	print(f"[VRAM MANAGER] Registered processor: {name}")

	def reclaim_vram_for(target_processor_name):
	"""Offload other processors' models from GPU to CPU to avoid Out of Memory (OOM) crashes."""
	if not torch.cuda.is_available():
	return

	print(f"[VRAM MANAGER] Reclaiming GPU VRAM for '{target_processor_name}'...")
	offloaded = False

	for name, proc in list(_active_processors.items()):
	if name == target_processor_name:
	continue

	try:
	# 1. Greek Processor
	if name == "greek" and getattr(proc, "trocr_model", None) is not None:
	current_device = next(proc.trocr_model.parameters()).device
	if str(current_device).startswith("cuda"):
	print("[VRAM MANAGER] Offloading Greek TrOCR to CPU...")
	proc.trocr_model.to("cpu")
	offloaded = True

	# 2. Latin Processor
	elif name == "latin":
	if getattr(proc, "tridis_model", None) is not None:
	current_device = next(proc.tridis_model.parameters()).device
	if str(current_device).startswith("cuda"):
	print("[VRAM MANAGER] Offloading Latin TRIDIS to CPU...")
	proc.tridis_model.to("cpu")
	offloaded = True
	if getattr(proc, "trocr_latin_model", None) is not None:
	current_device = next(proc.trocr_latin_model.parameters()).device
	if str(current_device).startswith("cuda"):
	print("[VRAM MANAGER] Offloading Latin TrOCR to CPU...")
	proc.trocr_latin_model.to("cpu")
	offloaded = True

	# 3. Cuneiform Processor
	elif name == "cuneiform":
	if getattr(proc, "clip_model", None) is not None:
	current_device = next(proc.clip_model.parameters()).device
	if str(current_device).startswith("cuda"):
	print("[VRAM MANAGER] Offloading Cuneiform CLIP to CPU...")
	proc.clip_model.to("cpu")
	offloaded = True
	if getattr(proc, "cuneiform_model", None) is not None:
	current_device = next(proc.cuneiform_model.parameters()).device
	if str(current_device).startswith("cuda"):
	print("[VRAM MANAGER] Offloading Cuneiform Translator to CPU...")
	proc.cuneiform_model.to("cpu")
	offloaded = True

	# 4. Egyptian Processor (HuggingFaceModels)
	elif name == "egyptian" and getattr(proc, "_model", None) is not None:
	current_device = next(proc._model.parameters()).device
	if str(current_device).startswith("cuda"):
	print("[VRAM MANAGER] Offloading Egyptian T5 to CPU...")
	proc._model.to("cpu")
	offloaded = True
	except Exception as e:
	print(f"[WARN] Failed to offload '{name}' models: {e}")

	if offloaded:
	gc.collect()
	torch.cuda.empty_cache()
	print("[VRAM MANAGER] VRAM cache cleared successfully.")

	def get_gpu_info():
	"""Get diagnostic information about the NVIDIA GPU if available."""
	info = {
	"cuda_available": torch.cuda.is_available(),
	"gpu_name": "N/A",
	"vram_total_gb": 0.0,
	"vram_allocated_gb": 0.0,
	"vram_cached_gb": 0.0,
	"vram_free_gb": 0.0,
	"cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A",
	"device": "cpu"
	}

	if info["cuda_available"]:
	info["device"] = "cuda"
	try:
	info["gpu_name"] = torch.cuda.get_device_name(0)
	props = torch.cuda.get_device_properties(0)
	info["vram_total_gb"] = round(props.total_memory / 1024**3, 2)

	allocated = torch.cuda.memory_allocated(0)
	cached = torch.cuda.memory_reserved(0)
	info["vram_allocated_gb"] = round(allocated / 1024**3, 3)
	info["vram_cached_gb"] = round(cached / 1024**3, 3)

	try:
	free_mem, total_mem = torch.cuda.mem_get_info(0)
	info["vram_free_gb"] = round(free_mem / 1024**3, 3)
	except Exception:
	info["vram_free_gb"] = round((props.total_memory - allocated) / 1024**3, 3)
	except Exception as e:
	print(f"[WARN] Error gathering detailed GPU info: {e}")

	return info

	def log_gpu_info():
	"""Print clean diagnostic logs at startup."""
	info = get_gpu_info()
	print("=" * 60)
	print(" NVIDIA GPU & CUDA INITIALIZATION DIAGNOSTICS")
	print("=" * 60)
	print(f"CUDA Available: {info['cuda_available']}")
	if info["cuda_available"]:
	print(f"CUDA Version: {info['cuda_version']}")
	print(f"GPU Model: {info['gpu_name']}")
	print(f"Total VRAM: {info['vram_total_gb']} GB")
	print(f"Free VRAM: {info['vram_free_gb']} GB")
	print(f"Active Device: CUDA (Dynamic Offloading Enabled)")
	else:
	print("Active Device: CPU (GPU acceleration not available)")
	print("=" * 60)

	def log_model_device(model_name, device):
	"""Log the device selected for a specific model."""
	print(f"[DEVICE LOG] Model '{model_name}' -> Assigned to: {str(device).upper()}")

	def clear_gpu_cache():
	"""Utility to clean memory cache during benchmarks or processing."""
	if torch.cuda.is_available():
	gc.collect()
	torch.cuda.empty_cache()