+

HF Kernels - SwiGLU Activation

+

GPU Info

+
+
+ +▼ code +▼ output + ▶ uv-logs + | +Cell: nv | 0.25s + | + +Raw +GitHub +
+
+
+
import subprocess
+print(subprocess.run(["nvidia-smi"], capture_output=True, text=True).stdout)
+
+ +
+
+
+
+
Wed Oct 22 08:58:23 2025 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 570.195.03 Driver Version: 570.195.03 CUDA Version: 12.8 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA L40S On | 00000000:4D:00.0 Off | 0 | +| N/A 26C P8 22W / 350W | 0MiB / 46068MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| No running processes found | ++-----------------------------------------------------------------------------------------+ + +
+
+
+ +

SwiGLU Benchmark

+
+
+ +▼ code +▼ output + ▶ uv-logs + | +Cell: benchmark | 0.01s | FAILED + | + +Raw +GitHub +
+
+
+
# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "numpy",
+#     "torch",
+#     "kernels-benchmark-tools",
+#     "kernels",
+# ]
+#
+# [tool.uv.sources]
+# kernels-benchmark-tools = { path = "/home/ubuntu/Projects/kernels-benchmarks-consolidated/tools", editable = true }
+# ///
+import torch
+import sys
+import kernels_benchmark_tools as kbt
+from kernels import get_kernel
+
+# Load the activation kernel
+activation = get_kernel("kernels-community/activation")
+
+
+def hf_kernels_swiglu(input_tensor):
+    """HuggingFace Kernels SwiGLU implementation"""
+    hidden_dim = input_tensor.shape[-1] // 2
+    out_shape = input_tensor.shape[:-1] + (hidden_dim,)
+    out = torch.empty(out_shape, dtype=input_tensor.dtype, device=input_tensor.device)
+    return activation.silu_and_mul(out, input_tensor)
+
+
+# Register the implementation
+kbt.add(
+    "hf_kernels_swiglu",
+    hf_kernels_swiglu,
+    tags={"family": "hf-kernels", "backend": "triton", "compile": "none"},
+)
+
+if __name__ == "__main__":
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    if device == "cpu":
+        print("HF Kernels SwiGLU requires CUDA - skipping benchmark")
+        sys.exit(0)
+
+    dtype = "bfloat16"
+
+    # Generate workloads - using a subset for faster testing
+    wl = list(kbt.activation.llama_workloads(dtype=dtype))[:3]  # First 3 workloads
+
+    print(f"Running SwiGLU benchmarks on {device} with {dtype}")
+    print(f"Testing {len(wl)} workloads")
+
+    # Run benchmark
+    kbt.run(
+        wl,
+        jsonl="activation.jsonl",
+        reps=5,
+        warmup=2,
+        gen=kbt.activation.gen_inputs,
+        ref=kbt.activation.ref_swiglu,
+        cmp=kbt.activation.cmp_allclose,
+        profile_trace=True
+    )
+
+    kbt.summarize(["activation.jsonl"])
+
+ +
+
+
+
+
× Failed to resolve script requirement + ╰─▶ Distribution not found at: + file:///home/ubuntu/Projects/kernels-benchmarks-consolidated/tools +
+
+
+