File size: 4,325 Bytes
3b25788 |
1 2 3 4 5 |
{"ts": "2025-12-19T23:02:16Z", "run": "32d018bc53624a45997f9dda67216816", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D4096", "batch": 16, "seq_len": 2048, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.8274980000351206, "p50": 0.8322979999775271, "p90": 0.8378580000680813, "mean": 0.8332618000167713, "iqr": 0.0071710001066094264, "raw_times": [0.8322979999775271, 0.8378580000680813, 0.8379680000416556, 0.8306869999614719, 0.8274980000351206], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.8445380001376179, "peak_bytes": 2415935488, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null}
{"ts": "2025-12-19T23:02:16Z", "run": "32d018bc53624a45997f9dda67216816", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D8192", "batch": 16, "seq_len": 2048, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.6395549998833303, "p50": 1.6463560000374855, "p90": 1.6514159999587719, "mean": 1.6487175999827741, "iqr": 0.00707099979990744, "raw_times": [1.6395549998833303, 1.6514159999587719, 1.6463560000374855, 1.6443450001588644, 1.6619159998754185], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6726759999983187, "peak_bytes": 4831870976, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1086463928222656e-05, "ref": "layer_norm_ref"}, "err": null}
{"ts": "2025-12-19T23:02:16Z", "run": "32d018bc53624a45997f9dda67216816", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D4096", "batch": 16, "seq_len": 4096, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.6412159998253628, "p50": 1.644736000116609, "p90": 1.6461760001220682, "mean": 1.6448379999474128, "iqr": 0.0036900003124173963, "raw_times": [1.644736000116609, 1.6412159998253628, 1.649575999863373, 1.6461760001220682, 1.6424859998096508], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.646575999984634, "peak_bytes": 4831854592, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null}
{"ts": "2025-12-19T23:02:17Z", "run": "32d018bc53624a45997f9dda67216816", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D8192", "batch": 16, "seq_len": 4096, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.2493999999587686, "p50": 3.2569499999226537, "p90": 3.2582300000285613, "mean": 3.2570102000136103, "iqr": 0.006920000032550888, "raw_times": [3.2493999999587686, 3.2691610001620575, 3.2513099999960104, 3.2569499999226537, 3.2582300000285613], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.2572910001817945, "peak_bytes": 9663709184, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1026859283447266e-05, "ref": "layer_norm_ref"}, "err": null}
|