| {"ts": "2025-11-10T22:11:32Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D4096", "batch": 16, "seq_len": 2048, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.8238129998971999, "p50": 0.8308330000090791, "p90": 0.8364840000467666, "mean": 0.8310172000165039, "iqr": 0.012130999948567478, "raw_times": [0.8243530000981991, 0.8396030000312749, 0.8364840000467666, 0.8238129998971999, 0.8308330000090791], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.8376129999305704, "peak_bytes": 2415935488, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null} | |
| {"ts": "2025-11-10T22:11:32Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D8192", "batch": 16, "seq_len": 2048, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.6412459999628481, "p50": 1.64963599991097, "p90": 1.6500760000326409, "mean": 1.6485119999288145, "iqr": 0.005011000212107319, "raw_times": [1.6500760000326409, 1.6450649998205336, 1.6412459999628481, 1.6565369999170798, 1.64963599991097], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6503259998899011, "peak_bytes": 4831870976, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1086463928222656e-05, "ref": "layer_norm_ref"}, "err": null} | |
| {"ts": "2025-11-10T22:11:33Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D4096", "batch": 16, "seq_len": 4096, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.635675999978048, "p50": 1.649695999958567, "p90": 1.6533860000436107, "mean": 1.6475760000048467, "iqr": 0.008750000006330083, "raw_times": [1.649695999958567, 1.635675999978048, 1.6533860000436107, 1.6544860000067274, 1.6446360000372806], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6516960001808911, "peak_bytes": 4831854592, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null} | |
| {"ts": "2025-11-10T22:11:33Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D8192", "batch": 16, "seq_len": 4096, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.2460209999953804, "p50": 3.252669999938007, "p90": 3.25303099998564, "mean": 3.2530550000046787, "iqr": 0.002919999815276242, "raw_times": [3.2634419999340025, 3.25303099998564, 3.252669999938007, 3.2501110001703637, 3.2460209999953804], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.265330999965954, "peak_bytes": 9663709184, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1026859283447266e-05, "ref": "layer_norm_ref"}, "err": null} | |