Spaces:

Akshay30
/

decipherai-api

Sleeping

File size: 6,064 Bytes

2f4af3f

import torch
import gc

_active_processors = {}

def register_processor(name, processor_instance):
    """Register a processor instance for active VRAM offloading."""
    _active_processors[name] = processor_instance
    print(f"[VRAM MANAGER] Registered processor: {name}")

def reclaim_vram_for(target_processor_name):
    """Offload other processors' models from GPU to CPU to avoid Out of Memory (OOM) crashes."""
    if not torch.cuda.is_available():
        return
        
    print(f"[VRAM MANAGER] Reclaiming GPU VRAM for '{target_processor_name}'...")
    offloaded = False
    
    for name, proc in list(_active_processors.items()):
        if name == target_processor_name:
            continue
            
        try:
            # 1. Greek Processor
            if name == "greek" and getattr(proc, "trocr_model", None) is not None:
                current_device = next(proc.trocr_model.parameters()).device
                if str(current_device).startswith("cuda"):
                    print("[VRAM MANAGER] Offloading Greek TrOCR to CPU...")
                    proc.trocr_model.to("cpu")
                    offloaded = True
                    
            # 2. Latin Processor
            elif name == "latin":
                if getattr(proc, "tridis_model", None) is not None:
                    current_device = next(proc.tridis_model.parameters()).device
                    if str(current_device).startswith("cuda"):
                        print("[VRAM MANAGER] Offloading Latin TRIDIS to CPU...")
                        proc.tridis_model.to("cpu")
                        offloaded = True
                if getattr(proc, "trocr_latin_model", None) is not None:
                    current_device = next(proc.trocr_latin_model.parameters()).device
                    if str(current_device).startswith("cuda"):
                        print("[VRAM MANAGER] Offloading Latin TrOCR to CPU...")
                        proc.trocr_latin_model.to("cpu")
                        offloaded = True
                        
            # 3. Cuneiform Processor
            elif name == "cuneiform":
                if getattr(proc, "clip_model", None) is not None:
                    current_device = next(proc.clip_model.parameters()).device
                    if str(current_device).startswith("cuda"):
                        print("[VRAM MANAGER] Offloading Cuneiform CLIP to CPU...")
                        proc.clip_model.to("cpu")
                        offloaded = True
                if getattr(proc, "cuneiform_model", None) is not None:
                    current_device = next(proc.cuneiform_model.parameters()).device
                    if str(current_device).startswith("cuda"):
                        print("[VRAM MANAGER] Offloading Cuneiform Translator to CPU...")
                        proc.cuneiform_model.to("cpu")
                        offloaded = True
                        
            # 4. Egyptian Processor (HuggingFaceModels)
            elif name == "egyptian" and getattr(proc, "_model", None) is not None:
                current_device = next(proc._model.parameters()).device
                if str(current_device).startswith("cuda"):
                    print("[VRAM MANAGER] Offloading Egyptian T5 to CPU...")
                    proc._model.to("cpu")
                    offloaded = True
        except Exception as e:
            print(f"[WARN] Failed to offload '{name}' models: {e}")
            
    if offloaded:
        gc.collect()
        torch.cuda.empty_cache()
        print("[VRAM MANAGER] VRAM cache cleared successfully.")

def get_gpu_info():
    """Get diagnostic information about the NVIDIA GPU if available."""
    info = {
        "cuda_available": torch.cuda.is_available(),
        "gpu_name": "N/A",
        "vram_total_gb": 0.0,
        "vram_allocated_gb": 0.0,
        "vram_cached_gb": 0.0,
        "vram_free_gb": 0.0,
        "cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A",
        "device": "cpu"
    }
    
    if info["cuda_available"]:
        info["device"] = "cuda"
        try:
            info["gpu_name"] = torch.cuda.get_device_name(0)
            props = torch.cuda.get_device_properties(0)
            info["vram_total_gb"] = round(props.total_memory / 1024**3, 2)
            
            allocated = torch.cuda.memory_allocated(0)
            cached = torch.cuda.memory_reserved(0)
            info["vram_allocated_gb"] = round(allocated / 1024**3, 3)
            info["vram_cached_gb"] = round(cached / 1024**3, 3)
            
            try:
                free_mem, total_mem = torch.cuda.mem_get_info(0)
                info["vram_free_gb"] = round(free_mem / 1024**3, 3)
            except Exception:
                info["vram_free_gb"] = round((props.total_memory - allocated) / 1024**3, 3)
        except Exception as e:
            print(f"[WARN] Error gathering detailed GPU info: {e}")
            
    return info

def log_gpu_info():
    """Print clean diagnostic logs at startup."""
    info = get_gpu_info()
    print("=" * 60)
    print("        NVIDIA GPU & CUDA INITIALIZATION DIAGNOSTICS")
    print("=" * 60)
    print(f"CUDA Available:      {info['cuda_available']}")
    if info["cuda_available"]:
        print(f"CUDA Version:        {info['cuda_version']}")
        print(f"GPU Model:           {info['gpu_name']}")
        print(f"Total VRAM:          {info['vram_total_gb']} GB")
        print(f"Free VRAM:           {info['vram_free_gb']} GB")
        print(f"Active Device:       CUDA (Dynamic Offloading Enabled)")
    else:
        print("Active Device:       CPU (GPU acceleration not available)")
    print("=" * 60)

def log_model_device(model_name, device):
    """Log the device selected for a specific model."""
    print(f"[DEVICE LOG] Model '{model_name}' -> Assigned to: {str(device).upper()}")

def clear_gpu_cache():
    """Utility to clean memory cache during benchmarks or processing."""
    if torch.cuda.is_available():
        gc.collect()
        torch.cuda.empty_cache()