Spaces:
Sleeping
Sleeping
| import torch | |
| import gc | |
| _active_processors = {} | |
| def register_processor(name, processor_instance): | |
| """Register a processor instance for active VRAM offloading.""" | |
| _active_processors[name] = processor_instance | |
| print(f"[VRAM MANAGER] Registered processor: {name}") | |
| def reclaim_vram_for(target_processor_name): | |
| """Offload other processors' models from GPU to CPU to avoid Out of Memory (OOM) crashes.""" | |
| if not torch.cuda.is_available(): | |
| return | |
| print(f"[VRAM MANAGER] Reclaiming GPU VRAM for '{target_processor_name}'...") | |
| offloaded = False | |
| for name, proc in list(_active_processors.items()): | |
| if name == target_processor_name: | |
| continue | |
| try: | |
| # 1. Greek Processor | |
| if name == "greek" and getattr(proc, "trocr_model", None) is not None: | |
| current_device = next(proc.trocr_model.parameters()).device | |
| if str(current_device).startswith("cuda"): | |
| print("[VRAM MANAGER] Offloading Greek TrOCR to CPU...") | |
| proc.trocr_model.to("cpu") | |
| offloaded = True | |
| # 2. Latin Processor | |
| elif name == "latin": | |
| if getattr(proc, "tridis_model", None) is not None: | |
| current_device = next(proc.tridis_model.parameters()).device | |
| if str(current_device).startswith("cuda"): | |
| print("[VRAM MANAGER] Offloading Latin TRIDIS to CPU...") | |
| proc.tridis_model.to("cpu") | |
| offloaded = True | |
| if getattr(proc, "trocr_latin_model", None) is not None: | |
| current_device = next(proc.trocr_latin_model.parameters()).device | |
| if str(current_device).startswith("cuda"): | |
| print("[VRAM MANAGER] Offloading Latin TrOCR to CPU...") | |
| proc.trocr_latin_model.to("cpu") | |
| offloaded = True | |
| # 3. Cuneiform Processor | |
| elif name == "cuneiform": | |
| if getattr(proc, "clip_model", None) is not None: | |
| current_device = next(proc.clip_model.parameters()).device | |
| if str(current_device).startswith("cuda"): | |
| print("[VRAM MANAGER] Offloading Cuneiform CLIP to CPU...") | |
| proc.clip_model.to("cpu") | |
| offloaded = True | |
| if getattr(proc, "cuneiform_model", None) is not None: | |
| current_device = next(proc.cuneiform_model.parameters()).device | |
| if str(current_device).startswith("cuda"): | |
| print("[VRAM MANAGER] Offloading Cuneiform Translator to CPU...") | |
| proc.cuneiform_model.to("cpu") | |
| offloaded = True | |
| # 4. Egyptian Processor (HuggingFaceModels) | |
| elif name == "egyptian" and getattr(proc, "_model", None) is not None: | |
| current_device = next(proc._model.parameters()).device | |
| if str(current_device).startswith("cuda"): | |
| print("[VRAM MANAGER] Offloading Egyptian T5 to CPU...") | |
| proc._model.to("cpu") | |
| offloaded = True | |
| except Exception as e: | |
| print(f"[WARN] Failed to offload '{name}' models: {e}") | |
| if offloaded: | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| print("[VRAM MANAGER] VRAM cache cleared successfully.") | |
| def get_gpu_info(): | |
| """Get diagnostic information about the NVIDIA GPU if available.""" | |
| info = { | |
| "cuda_available": torch.cuda.is_available(), | |
| "gpu_name": "N/A", | |
| "vram_total_gb": 0.0, | |
| "vram_allocated_gb": 0.0, | |
| "vram_cached_gb": 0.0, | |
| "vram_free_gb": 0.0, | |
| "cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A", | |
| "device": "cpu" | |
| } | |
| if info["cuda_available"]: | |
| info["device"] = "cuda" | |
| try: | |
| info["gpu_name"] = torch.cuda.get_device_name(0) | |
| props = torch.cuda.get_device_properties(0) | |
| info["vram_total_gb"] = round(props.total_memory / 1024**3, 2) | |
| allocated = torch.cuda.memory_allocated(0) | |
| cached = torch.cuda.memory_reserved(0) | |
| info["vram_allocated_gb"] = round(allocated / 1024**3, 3) | |
| info["vram_cached_gb"] = round(cached / 1024**3, 3) | |
| try: | |
| free_mem, total_mem = torch.cuda.mem_get_info(0) | |
| info["vram_free_gb"] = round(free_mem / 1024**3, 3) | |
| except Exception: | |
| info["vram_free_gb"] = round((props.total_memory - allocated) / 1024**3, 3) | |
| except Exception as e: | |
| print(f"[WARN] Error gathering detailed GPU info: {e}") | |
| return info | |
| def log_gpu_info(): | |
| """Print clean diagnostic logs at startup.""" | |
| info = get_gpu_info() | |
| print("=" * 60) | |
| print(" NVIDIA GPU & CUDA INITIALIZATION DIAGNOSTICS") | |
| print("=" * 60) | |
| print(f"CUDA Available: {info['cuda_available']}") | |
| if info["cuda_available"]: | |
| print(f"CUDA Version: {info['cuda_version']}") | |
| print(f"GPU Model: {info['gpu_name']}") | |
| print(f"Total VRAM: {info['vram_total_gb']} GB") | |
| print(f"Free VRAM: {info['vram_free_gb']} GB") | |
| print(f"Active Device: CUDA (Dynamic Offloading Enabled)") | |
| else: | |
| print("Active Device: CPU (GPU acceleration not available)") | |
| print("=" * 60) | |
| def log_model_device(model_name, device): | |
| """Log the device selected for a specific model.""" | |
| print(f"[DEVICE LOG] Model '{model_name}' -> Assigned to: {str(device).upper()}") | |
| def clear_gpu_cache(): | |
| """Utility to clean memory cache during benchmarks or processing.""" | |
| if torch.cuda.is_available(): | |
| gc.collect() | |
| torch.cuda.empty_cache() | |