drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
0c5e7b7 verified
raw
history blame
8.88 kB
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T128_D768", "num_tokens": 128, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.022969999974975508, "p50": 0.023499999997511622, "p90": 0.023961000010785938, "mean": 0.02361460000201987, "iqr": 0.0009899999895424116, "raw_times": [0.022971000021243526, 0.022969999974975508, 0.023961000010785938, 0.023499999997511622, 0.024671000005582755], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03073999999969601, "peak_bytes": 1966080, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T128_D1024", "num_tokens": 128, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.027540000019143918, "p50": 0.029130999962490023, "p90": 0.03002100004323438, "mean": 0.029014800009008468, "iqr": 0.0016900000332498166, "raw_times": [0.027540000019143918, 0.030051000010189455, 0.03002100004323438, 0.029130999962490023, 0.028331000009984564], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03343999998151048, "peak_bytes": 2621440, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T128_D2048", "num_tokens": 128, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.02829999999676147, "p50": 0.029119999965132592, "p90": 0.03051000004461457, "mean": 0.029939999990347133, "iqr": 0.0019500000689731678, "raw_times": [0.02829999999676147, 0.03051000004461457, 0.033209999969585624, 0.029119999965132592, 0.028559999975641404], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.031761000002461515, "peak_bytes": 5242880, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T256_D768", "num_tokens": 256, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.027531000000635686, "p50": 0.028170999996746104, "p90": 0.028501000031155854, "mean": 0.028293000002577173, "iqr": 0.0008900000239009387, "raw_times": [0.027611000007254916, 0.028170999996746104, 0.029650999977093306, 0.027531000000635686, 0.028501000031155854], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03130000004603062, "peak_bytes": 3932160, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T256_D1024", "num_tokens": 256, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.02755000002707675, "p50": 0.02861000001530556, "p90": 0.028831000008722185, "mean": 0.02867660001584227, "iqr": 0.00023000001192485797, "raw_times": [0.028600999996797327, 0.029791000031309522, 0.028831000008722185, 0.02755000002707675, 0.02861000001530556], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03139000000373926, "peak_bytes": 5242880, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T256_D2048", "num_tokens": 256, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.02618100000972845, "p50": 0.027131000024382956, "p90": 0.02731099999664366, "mean": 0.026918799994746223, "iqr": 0.0007610000238855719, "raw_times": [0.02618100000972845, 0.027131000024382956, 0.027420999970217963, 0.02731099999664366, 0.026549999972758087], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03008099997714453, "peak_bytes": 10485760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T512_D768", "num_tokens": 512, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.026950999995278835, "p50": 0.02748000002839035, "p90": 0.02804100000730614, "mean": 0.02758480000011332, "iqr": 0.0006300000450210064, "raw_times": [0.026950999995278835, 0.02804100000730614, 0.027410999962285132, 0.02804100000730614, 0.02748000002839035], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03104999996139668, "peak_bytes": 7864320, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T512_D1024", "num_tokens": 512, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.026300000001810986, "p50": 0.02733100001250932, "p90": 0.0275399999623005, "mean": 0.02720039998393986, "iqr": 0.0004789999934473599, "raw_times": [0.02706099996885314, 0.02733100001250932, 0.027769999974225357, 0.0275399999623005, 0.026300000001810986], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03032000000757762, "peak_bytes": 10485760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}
{"ts": "2025-10-30T15:52:35Z", "run": "37230f1ce31641f2b6ebd7aba7f793c9", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T512_D2048", "num_tokens": 512, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.02642100002958614, "p50": 0.027860999978202017, "p90": 0.02790100000993334, "mean": 0.027615000010428048, "iqr": 0.00036000000136482413, "raw_times": [0.02642100002958614, 0.028351000025850226, 0.027541000008568517, 0.02790100000993334, 0.027860999978202017], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03163999997468636, "peak_bytes": 20971520, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null}