File size: 1,286 Bytes
253d988 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | """GPU smoke test — verify Blackwell SM_120 kernels work with the installed torch."""
import sys
import torch
def main() -> int:
print(f"torch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if not torch.cuda.is_available():
print("FAIL: CUDA not available", file=sys.stderr)
return 1
print(f"CUDA version (torch built against): {torch.version.cuda}")
print(f"Device count: {torch.cuda.device_count()}")
name = torch.cuda.get_device_name(0)
cap = torch.cuda.get_device_capability(0)
print(f"Device 0: {name} (SM_{cap[0]}{cap[1]})")
# Real kernel test — pure metadata access can pass even when SM kernels are missing.
try:
a = torch.randn(1024, 1024, device="cuda", dtype=torch.bfloat16)
b = torch.randn(1024, 1024, device="cuda", dtype=torch.bfloat16)
c = a @ b
torch.cuda.synchronize()
print(f"bf16 matmul OK, result norm: {c.float().norm().item():.4f}")
except Exception as e:
print(f"FAIL: bf16 matmul: {e}", file=sys.stderr)
return 2
free, total = torch.cuda.mem_get_info(0)
print(f"VRAM free / total: {free / 1e9:.2f} GB / {total / 1e9:.2f} GB")
return 0
if __name__ == "__main__":
sys.exit(main())
|