+

PyTorch Native - SwiGLU Activation (macOS)

+

System Info

+
+
+ +▼ code +▼ output + ▶ uv-logs + | +Cell: sysinfo | 6.52s + | + +Raw +GitHub +
+
+
+
# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "torch==2.8.0",
+# ]
+# ///
+import platform
+import subprocess
+print(f"Platform: {platform.system()} {platform.machine()}")
+print(f"Python: {platform.python_version()}")
+# Check for MPS availability
+import torch
+print(f"PyTorch: {torch.__version__}")
+print(f"MPS available: {torch.backends.mps.is_available()}")
+
+ +
+
+
+
+
Platform: Darwin arm64
+Python: 3.11.14
+PyTorch: 2.8.0
+MPS available: True
+
+
+
▶ UV Install Logs
+ +
+
/Users/runner/work/_temp/setup-uv-cache/environments-v2/sysinfo-29fbb5e6dd1955a1/lib/python3.11/site-packages/torch/_subclasses/functional_tensor.py:279: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_numpy.cpp:81.) + cpu = _conversion_method_template(device=torch.device("cpu"))
+
+
+ +

SwiGLU Benchmark (PyTorch Native - macOS)

+
+
+ +▼ code +▼ output + ▶ uv-logs + | +Cell: benchmark | 36.20s + | + +Raw +GitHub +
+
+
+
# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "numpy",
+#     "torch==2.8.0",
+#     "kernels-benchmark-tools",
+# ]
+#
+# [tool.uv.sources]
+# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
+# ///
+import torch
+import sys
+from kernels_benchmark_tools import KernelTypeEnum, run_benchmark
+import torch, torch.nn.functional as F
+
+
+def swiglu_eager(x):
+    d = x.shape[-1] // 2
+    return F.silu(x[..., :d]) * x[..., d:]
+
+
+run_benchmark(
+    kernel_type=KernelTypeEnum.ACTIVATION,
+    impl_name="torch_eager_darwin",
+    impl_tags={"family":"pytorch", "backend":"eager", "platform": "darwin"},
+    impl_func=swiglu_eager,
+)
+
+ +
+
+
+
+
Running activation benchmark on cpu with 9 workloads.
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T128_D768
+======================================================================
+tensor([-0.1372, -0.0743, -0.1620, -0.0605,  0.9656])
+tensor([-0.1367, -0.0742, -0.1611, -0.0608,  0.9648])
+torch.Size([128, 768])
+torch.Size([128, 768])
+Diff stats:
+  max: 0.04913330078125
+  mean: 0.0008915023063309491
+  mse: 4.496400833886582e-06
+Top 5 most different elements (index: value):
+  9070: diff=0.04913330078125, out=-7.79913330078125, ref=-7.75
+  24359: diff=0.04716157913208008, out=-7.04658842086792, ref=-7.09375
+  32951: diff=0.04714775085449219, out=4.952852249145508, ref=5.0
+  73101: diff=0.04227447509765625, out=5.261024475097656, ref=5.21875
+  69062: diff=0.040175437927246094, out=-4.553574562072754, ref=-4.59375
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T128_D768
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        44.59%       6.820ms       100.00%      15.295ms      15.295ms             1  
+            aten::silu        28.59%       4.373ms        28.59%       4.373ms       1.458ms             3  
+           aten::slice        22.18%       3.392ms        22.42%       3.429ms     571.467us             6  
+             aten::mul         4.41%     673.951us         4.41%     673.951us     224.650us             3  
+      aten::as_strided         0.24%      36.334us         0.24%      36.334us       6.056us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 15.295ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T128_D1024
+======================================================================
+tensor([0.2989, 0.0356, 0.0747, 0.2235, 0.3924])
+tensor([0.2988, 0.0356, 0.0742, 0.2246, 0.3926])
+torch.Size([128, 1024])
+torch.Size([128, 1024])
+Diff stats:
+  max: 0.06802082061767578
+  mean: 0.0008884685230441391
+  mse: 4.475335117604118e-06
+Top 5 most different elements (index: value):
+  98566: diff=0.06802082061767578, out=9.005520820617676, ref=8.9375
+  105439: diff=0.058165550231933594, out=-7.816834449768066, ref=-7.875
+  101051: diff=0.050549983978271484, out=-4.8005499839782715, ref=-4.75
+  108736: diff=0.04906034469604492, out=4.794689655303955, ref=4.84375
+  113765: diff=0.048272132873535156, out=-5.857977867126465, ref=-5.90625
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T128_D1024
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        14.33%     153.966us       100.00%       1.074ms       1.074ms             1  
+            aten::silu        67.17%     721.584us        67.17%     721.584us     240.528us             3  
+             aten::mul        15.74%     169.043us        15.74%     169.043us      56.348us             3  
+           aten::slice         2.20%      23.619us         2.76%      29.702us       4.950us             6  
+      aten::as_strided         0.57%       6.083us         0.57%       6.083us       1.014us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 1.074ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T128_D2048
+======================================================================
+tensor([-0.1836, -0.2447, -0.0512,  0.1101, -0.2620])
+tensor([-0.1826, -0.2451, -0.0510,  0.1104, -0.2617])
+torch.Size([128, 2048])
+torch.Size([128, 2048])
+Diff stats:
+  max: 0.07091712951660156
+  mean: 0.0008893357589840889
+  mse: 4.469751274882583e-06
+Top 5 most different elements (index: value):
+  179851: diff=0.07091712951660156, out=7.414667129516602, ref=7.34375
+  21382: diff=0.06355762481689453, out=-5.3114423751831055, ref=-5.375
+  210130: diff=0.059961795806884766, out=-7.690038204193115, ref=-7.75
+  11530: diff=0.05908966064453125, out=10.184089660644531, ref=10.125
+  176690: diff=0.05261039733886719, out=4.740110397338867, ref=4.6875
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T128_D2048
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        15.48%     486.211us       100.00%       3.142ms       3.142ms             1  
+            aten::silu        66.09%       2.076ms        66.09%       2.076ms     692.102us             3  
+             aten::mul        16.13%     506.835us        16.13%     506.835us     168.945us             3  
+           aten::slice         1.78%      56.043us         2.30%      72.251us      12.042us             6  
+      aten::as_strided         0.52%      16.208us         0.52%      16.208us       2.701us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 3.142ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T256_D768
+======================================================================
+tensor([-0.1372, -0.0743, -0.1620, -0.0605,  0.9656])
+tensor([-0.1367, -0.0742, -0.1611, -0.0608,  0.9648])
+torch.Size([256, 768])
+torch.Size([256, 768])
+Diff stats:
+  max: 0.04913330078125
+  mean: 0.0008873133920133114
+  mse: 4.3958548303635325e-06
+Top 5 most different elements (index: value):
+  9070: diff=0.04913330078125, out=-7.79913330078125, ref=-7.75
+  24359: diff=0.04716157913208008, out=-7.04658842086792, ref=-7.09375
+  32951: diff=0.04714775085449219, out=4.952852249145508, ref=5.0
+  154833: diff=0.0467376708984375, out=-5.7032623291015625, ref=-5.75
+  136168: diff=0.04372358322143555, out=5.8312764167785645, ref=5.875
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T256_D768
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        14.43%     258.000us       100.00%       1.787ms       1.787ms             1  
+            aten::silu        53.16%     950.142us        53.16%     950.142us     316.714us             3  
+             aten::mul        29.94%     535.121us        29.94%     535.121us     178.374us             3  
+           aten::slice         2.01%      35.962us         2.47%      44.214us       7.369us             6  
+      aten::as_strided         0.46%       8.252us         0.46%       8.252us       1.375us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 1.787ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T256_D1024
+======================================================================
+tensor([0.2989, 0.0356, 0.0747, 0.2235, 0.3924])
+tensor([0.2988, 0.0356, 0.0742, 0.2246, 0.3926])
+torch.Size([256, 1024])
+torch.Size([256, 1024])
+Diff stats:
+  max: 0.06802082061767578
+  mean: 0.0008889895398169756
+  mse: 4.431089109857567e-06
+Top 5 most different elements (index: value):
+  98566: diff=0.06802082061767578, out=9.005520820617676, ref=8.9375
+  105439: diff=0.058165550231933594, out=-7.816834449768066, ref=-7.875
+  101051: diff=0.050549983978271484, out=-4.8005499839782715, ref=-4.75
+  108736: diff=0.04906034469604492, out=4.794689655303955, ref=4.84375
+  113765: diff=0.048272132873535156, out=-5.857977867126465, ref=-5.90625
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T256_D1024
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        11.50%     358.973us       100.00%       3.121ms       3.121ms             1  
+            aten::silu        62.91%       1.963ms        62.91%       1.963ms     654.428us             3  
+             aten::mul        24.31%     758.572us        24.31%     758.572us     252.857us             3  
+           aten::slice         1.04%      32.498us         1.29%      40.124us       6.687us             6  
+      aten::as_strided         0.24%       7.626us         0.24%       7.626us       1.271us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 3.121ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T256_D2048
+======================================================================
+tensor([-0.1836, -0.2447, -0.0512,  0.1101, -0.2620])
+tensor([-0.1826, -0.2451, -0.0510,  0.1104, -0.2617])
+torch.Size([256, 2048])
+torch.Size([256, 2048])
+Diff stats:
+  max: 0.08395957946777344
+  mean: 0.0008889408782124519
+  mse: 4.476671620068373e-06
+Top 5 most different elements (index: value):
+  439480: diff=0.08395957946777344, out=9.978540420532227, ref=10.0625
+  179851: diff=0.07091712951660156, out=7.414667129516602, ref=7.34375
+  21382: diff=0.06355762481689453, out=-5.3114423751831055, ref=-5.375
+  210130: diff=0.059961795806884766, out=-7.690038204193115, ref=-7.75
+  11530: diff=0.05908966064453125, out=10.184089660644531, ref=10.125
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T256_D2048
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        15.81%       1.194ms       100.00%       7.551ms       7.551ms             1  
+            aten::silu        61.23%       4.623ms        61.23%       4.623ms       1.541ms             3  
+             aten::mul        21.01%       1.586ms        21.01%       1.586ms     528.681us             3  
+           aten::slice         1.55%     117.206us         1.96%     147.833us      24.639us             6  
+      aten::as_strided         0.41%      30.627us         0.41%      30.627us       5.104us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 7.551ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T512_D768
+======================================================================
+tensor([-0.1372, -0.0743, -0.1620, -0.0605,  0.9656])
+tensor([-0.1367, -0.0742, -0.1611, -0.0608,  0.9648])
+torch.Size([512, 768])
+torch.Size([512, 768])
+Diff stats:
+  max: 0.05687236785888672
+  mean: 0.0008884922135621309
+  mse: 4.399109002406476e-06
+Top 5 most different elements (index: value):
+  361157: diff=0.05687236785888672, out=-8.994372367858887, ref=-8.9375
+  324514: diff=0.04921436309814453, out=4.1382856369018555, ref=4.1875
+  9070: diff=0.04913330078125, out=-7.79913330078125, ref=-7.75
+  240463: diff=0.0480494499206543, out=-5.608200550079346, ref=-5.65625
+  24359: diff=0.04716157913208008, out=-7.04658842086792, ref=-7.09375
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T512_D768
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        20.25%     695.500us       100.00%       3.435ms       3.435ms             1  
+            aten::silu        54.52%       1.873ms        54.52%       1.873ms     624.208us             3  
+             aten::mul        23.71%     814.334us        23.71%     814.334us     271.445us             3  
+           aten::slice         1.25%      43.042us         1.53%      52.541us       8.757us             6  
+      aten::as_strided         0.28%       9.499us         0.28%       9.499us       1.583us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 3.435ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T512_D1024
+======================================================================
+tensor([0.2989, 0.0356, 0.0747, 0.2235, 0.3924])
+tensor([0.2988, 0.0356, 0.0742, 0.2246, 0.3926])
+torch.Size([512, 1024])
+torch.Size([512, 1024])
+Diff stats:
+  max: 0.06802082061767578
+  mean: 0.0008890957687981427
+  mse: 4.448749677976593e-06
+Top 5 most different elements (index: value):
+  98566: diff=0.06802082061767578, out=9.005520820617676, ref=8.9375
+  497777: diff=0.058727264404296875, out=8.433727264404297, ref=8.375
+  105439: diff=0.058165550231933594, out=-7.816834449768066, ref=-7.875
+  367857: diff=0.05556774139404297, out=7.631932258605957, ref=7.6875
+  341485: diff=0.052660465240478516, out=-4.5410895347595215, ref=-4.59375
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T512_D1024
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        39.22%       2.899ms       100.00%       7.392ms       7.392ms             1  
+            aten::silu        44.24%       3.270ms        44.24%       3.270ms       1.090ms             3  
+             aten::mul        15.74%       1.164ms        15.74%       1.164ms     387.903us             3  
+           aten::slice         0.63%      46.920us         0.80%      59.461us       9.910us             6  
+      aten::as_strided         0.17%      12.541us         0.17%      12.541us       2.090us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 7.392ms
+
+
+
+======================================================================
+CORRECTNESS FAILURE: torch_eager_darwin | cpu_T512_D2048
+======================================================================
+tensor([-0.1836, -0.2447, -0.0512,  0.1101, -0.2620])
+tensor([-0.1826, -0.2451, -0.0510,  0.1104, -0.2617])
+torch.Size([512, 2048])
+torch.Size([512, 2048])
+Diff stats:
+  max: 0.09098148345947266
+  mean: 0.0008892239420674741
+  mse: 4.500504473980982e-06
+Top 5 most different elements (index: value):
+  869075: diff=0.09098148345947266, out=-9.403481483459473, ref=-9.3125
+  439480: diff=0.08395957946777344, out=9.978540420532227, ref=10.0625
+  179851: diff=0.07091712951660156, out=7.414667129516602, ref=7.34375
+  21382: diff=0.06355762481689453, out=-5.3114423751831055, ref=-5.375
+  776526: diff=0.060305118560791016, out=-5.377194881439209, ref=-5.4375
+
+
+======================================================================
+PROFILE TRACE: torch_eager_darwin | cpu_T512_D2048
+======================================================================
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+    torch_eager_darwin        10.59%       2.513ms       100.00%      23.734ms      23.734ms             1  
+            aten::silu        55.91%      13.269ms        55.91%      13.269ms       4.423ms             3  
+             aten::mul        32.61%       7.739ms        32.61%       7.739ms       2.580ms             3  
+           aten::slice         0.74%     176.334us         0.90%     214.043us      35.674us             6  
+      aten::as_strided         0.16%      37.709us         0.16%      37.709us       6.285us             6  
+----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
+Self CPU time total: 23.734ms
+
+
+impl                     wl                  p50(ms)  ok
+torch_eager_darwin       cpu_T128_D1024         0.18  False
+torch_eager_darwin       cpu_T128_D2048         0.38  False
+torch_eager_darwin       cpu_T128_D768          0.14  False
+torch_eager_darwin       cpu_T256_D1024         0.40  False
+torch_eager_darwin       cpu_T256_D2048         0.98  False
+torch_eager_darwin       cpu_T256_D768          0.33  False
+torch_eager_darwin       cpu_T512_D1024         1.63  False
+torch_eager_darwin       cpu_T512_D2048         3.37  False
+torch_eager_darwin       cpu_T512_D768          0.87  False
+
+
+
▶ UV Install Logs
+ +
+
Matplotlib is building the font cache; this may take a moment.
+
+

Artifacts:

+activation.jsonl +
+
+
+