"""GPU smoke test — verify Blackwell SM_120 kernels work with the installed torch.""" import sys import torch def main() -> int: print(f"torch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") if not torch.cuda.is_available(): print("FAIL: CUDA not available", file=sys.stderr) return 1 print(f"CUDA version (torch built against): {torch.version.cuda}") print(f"Device count: {torch.cuda.device_count()}") name = torch.cuda.get_device_name(0) cap = torch.cuda.get_device_capability(0) print(f"Device 0: {name} (SM_{cap[0]}{cap[1]})") # Real kernel test — pure metadata access can pass even when SM kernels are missing. try: a = torch.randn(1024, 1024, device="cuda", dtype=torch.bfloat16) b = torch.randn(1024, 1024, device="cuda", dtype=torch.bfloat16) c = a @ b torch.cuda.synchronize() print(f"bf16 matmul OK, result norm: {c.float().norm().item():.4f}") except Exception as e: print(f"FAIL: bf16 matmul: {e}", file=sys.stderr) return 2 free, total = torch.cuda.mem_get_info(0) print(f"VRAM free / total: {free / 1e9:.2f} GB / {total / 1e9:.2f} GB") return 0 if __name__ == "__main__": sys.exit(main())