drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
81fff32 verified
raw
history blame
8.73 kB
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S512_D4096", "batch": 1, "seq_len": 512, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.039122000089264475, "p50": 0.04020200003651553, "p90": 0.04062199991494708, "mean": 0.040302199977304554, "iqr": 0.00047999992602854036, "raw_times": [0.04020200003651553, 0.04142299985687714, 0.04062199991494708, 0.04014199998891854, 0.039122000089264475], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.049882999974215636, "peak_bytes": 37765120, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015716552734375, "mse": 1.1444091796875e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S512_D8192", "batch": 1, "seq_len": 512, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.03869200008921325, "p50": 0.039361000062854146, "p90": 0.03952199995183037, "mean": 0.039353600004687905, "iqr": 0.0002899998889915878, "raw_times": [0.03923200006283878, 0.03996099985670298, 0.03952199995183037, 0.039361000062854146, 0.03869200008921325], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04323200005273975, "peak_bytes": 75530240, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.00157928466796875, "mse": 1.150369644165039e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S1024_D4096", "batch": 1, "seq_len": 1024, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.038322000136759016, "p50": 0.039080999840734876, "p90": 0.03983200008406129, "mean": 0.03918759998668975, "iqr": 0.0012000000424450263, "raw_times": [0.038322000136759016, 0.038632000041616266, 0.03983200008406129, 0.039080999840734876, 0.04007099983027729], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04126199996790092, "peak_bytes": 75513856, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0016021728515625, "mse": 1.1682510375976562e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S1024_D8192", "batch": 1, "seq_len": 1024, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.038531999962287955, "p50": 0.03957200010518136, "p90": 0.040011999999478576, "mean": 0.040755799955149996, "iqr": 0.0013210001270635985, "raw_times": [0.04697199983638711, 0.03957200010518136, 0.040011999999478576, 0.03869099987241498, 0.038531999962287955], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04860299986830796, "peak_bytes": 151027712, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.00156402587890625, "mse": 1.1444091796875e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S2048_D4096", "batch": 1, "seq_len": 2048, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.03818100003627478, "p50": 0.039942000057635596, "p90": 0.04086199987796135, "mean": 0.044605999983104994, "iqr": 0.0025399997412023367, "raw_times": [0.06572299980689422, 0.04086199987796135, 0.03818100003627478, 0.039942000057635596, 0.038322000136759016], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.046752999878663104, "peak_bytes": 151011328, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.00156402587890625, "mse": 1.1444091796875e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S2048_D8192", "batch": 1, "seq_len": 2048, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.046271999963209964, "p50": 0.046712000084880856, "p90": 0.0469120000161638, "mean": 0.04688640001404565, "iqr": 0.00020900006347801536, "raw_times": [0.046712000084880856, 0.04783300005328783, 0.046271999963209964, 0.046702999952685786, 0.0469120000161638], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.049573000069358386, "peak_bytes": 302022656, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.00152587890625, "mse": 1.1146068572998047e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S4096_D4096", "batch": 1, "seq_len": 4096, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.044022000110999215, "p50": 0.04556199996841315, "p90": 0.045742000111204106, "mean": 0.04578840002977813, "iqr": 0.000619000047663576, "raw_times": [0.04512300006354053, 0.04556199996841315, 0.045742000111204106, 0.044022000110999215, 0.04849299989473366], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04771299995809386, "peak_bytes": 302006272, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_fp32"}, "err": null}
{"ts": "2025-10-23T17:21:54Z", "run": "f94366cee8c44bb99e36bc03ca51ad49", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "llama_S4096_D8192", "batch": 1, "seq_len": 4096, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.20399000004545087, "p50": 0.20584999992934172, "p90": 0.20648999998229556, "mean": 0.20627999997486768, "iqr": 0.0007099999947968172, "raw_times": [0.20399000004545087, 0.2092899999297515, 0.20577999998749874, 0.20648999998229556, 0.20584999992934172], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.20653999990827288, "peak_bytes": 604012544, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.00152587890625, "mse": 1.1205673217773438e-05, "ref": "layer_norm_fp32"}, "err": null}