"""GPU preflight diagnostics for Nomen-AI training. Run before smoke/SFT/DPO to fail early if CUDA or VRAM is unavailable. """ import sys import torch def main(): print('torch:', torch.__version__) print('cuda_available:', torch.cuda.is_available()) if not torch.cuda.is_available(): raise SystemExit('ERROR: CUDA is not available. Use a Colab GPU/T4 runtime or Docker with NVIDIA runtime.') device = torch.cuda.current_device() name = torch.cuda.get_device_name(device) props = torch.cuda.get_device_properties(device) total_gb = props.total_memory / 1e9 print('gpu_name:', name) print('total_vram_gb:', round(total_gb, 2)) print('compute_capability:', f'{props.major}.{props.minor}') if total_gb < 14: raise SystemExit(f'ERROR: VRAM {total_gb:.1f}GB is below expected T4-class 15GB. Use T4/A10G or larger.') print('GPU_PREFLIGHT_PASS') if __name__ == '__main__': main()