Text Generation
PEFT
lora
trl
naming
brand-generation
controllable-generation
nomen-ai / scripts /preflight_gpu.py
krystv's picture
Add GPU preflight diagnostics
a544dd6 verified
"""GPU preflight diagnostics for Nomen-AI training.
Run before smoke/SFT/DPO to fail early if CUDA or VRAM is unavailable.
"""
import sys
import torch
def main():
print('torch:', torch.__version__)
print('cuda_available:', torch.cuda.is_available())
if not torch.cuda.is_available():
raise SystemExit('ERROR: CUDA is not available. Use a Colab GPU/T4 runtime or Docker with NVIDIA runtime.')
device = torch.cuda.current_device()
name = torch.cuda.get_device_name(device)
props = torch.cuda.get_device_properties(device)
total_gb = props.total_memory / 1e9
print('gpu_name:', name)
print('total_vram_gb:', round(total_gb, 2))
print('compute_capability:', f'{props.major}.{props.minor}')
if total_gb < 14:
raise SystemExit(f'ERROR: VRAM {total_gb:.1f}GB is below expected T4-class 15GB. Use T4/A10G or larger.')
print('GPU_PREFLIGHT_PASS')
if __name__ == '__main__':
main()