{"ts": "2025-10-23T17:22:01Z", "run": "2868ab5dc1ce4d49ac015295dd5ab8d5", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "triton", "compile": "none"}, "wl": {"name": "llama_T512_D4096", "num_tokens": 512, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.023811000119167147, "p50": 0.024261000135084032, "p90": 0.024421000034635654, "mean": 0.024255200014522416, "iqr": 0.00023000006876827683, "raw_times": [0.024261000135084032, 0.023811000119167147, 0.024190999965867377, 0.024591999817857868, 0.024421000034635654], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03041099989786744, "peak_bytes": 46139392, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.01, "atol": 0.1, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "relmax": 0.0, "ref": "swiglu_bfloat16"}, "err": null} {"ts": "2025-10-23T17:22:01Z", "run": "2868ab5dc1ce4d49ac015295dd5ab8d5", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "triton", "compile": "none"}, "wl": {"name": "llama_T512_D8192", "num_tokens": 512, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.030561000130546745, "p50": 0.031221000199366244, "p90": 0.031622000051356736, "mean": 0.031125600116865826, "iqr": 0.001030000021273736, "raw_times": [0.030561000130546745, 0.031221000199366244, 0.030592000030083, 0.031622000051356736, 0.031632000172976404], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03244200001972786, "peak_bytes": 92276736, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.01, "atol": 0.1, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "relmax": 0.0, "ref": "swiglu_bfloat16"}, "err": null} {"ts": "2025-10-23T17:22:01Z", "run": "2868ab5dc1ce4d49ac015295dd5ab8d5", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "triton", "compile": "none"}, "wl": {"name": "llama_T512_D11008", "num_tokens": 512, "hidden_dim": 11008, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0339219998295448, "p50": 0.03464199994596129, "p90": 0.0347420000252896, "mean": 0.03469179991952842, "iqr": 0.00024100017981254496, "raw_times": [0.0339219998295448, 0.0347420000252896, 0.03464199994596129, 0.03565199995136936, 0.034500999845477054], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03648099982456188, "peak_bytes": 124520448, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.01, "atol": 0.1, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "relmax": 0.0, "ref": "swiglu_bfloat16"}, "err": null}