drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
d8c3a70 verified
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S128_W2", "batch": 2, "dim": 64, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0682910000477932, "p50": 0.0693509999791786, "p90": 0.06985099997791622, "mean": 0.0695532000008825, "iqr": 0.0006490000146186503, "raw_times": [0.0710710000362269, 0.06985099997791622, 0.0693509999791786, 0.0682910000477932, 0.06920199996329757], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.07755100000395032, "peak_bytes": 295936, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S128_W4", "batch": 2, "dim": 64, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08078100000830091, "p50": 0.08191099999521612, "p90": 0.08361100003639876, "mean": 0.08277140000245709, "iqr": 0.0022690000491820683, "raw_times": [0.08191099999521612, 0.08361100003639876, 0.08078100000830091, 0.0813419999872167, 0.08621199998515294], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08947200001330202, "peak_bytes": 296448, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S512_W2", "batch": 2, "dim": 64, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07914100001471525, "p50": 0.0805310000373538, "p90": 0.08094200001096397, "mean": 0.08066520001648314, "iqr": 0.0007310000000870787, "raw_times": [0.0805310000373538, 0.0825010000085058, 0.08094200001096397, 0.08021100001087689, 0.07914100001471525], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0867210000023988, "peak_bytes": 1180672, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S512_W4", "batch": 2, "dim": 64, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08032099998445119, "p50": 0.08104099998718084, "p90": 0.08252100002437146, "mean": 0.08145320000494394, "iqr": 0.0017790000015338592, "raw_times": [0.08104099998718084, 0.0826410000058786, 0.08252100002437146, 0.0807420000228376, 0.08032099998445119], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08690100003150292, "peak_bytes": 1181184, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S2048_W2", "batch": 2, "dim": 64, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08014200000161509, "p50": 0.08045200002015918, "p90": 0.08049199999504708, "mean": 0.08065180001040062, "iqr": 9.099994713324122e-05, "raw_times": [0.08049199999504708, 0.08177199998726792, 0.08045200002015918, 0.08014200000161509, 0.08040100004791384], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08574200001021381, "peak_bytes": 4719616, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S2048_W4", "batch": 2, "dim": 64, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08046099998182399, "p50": 0.08075099998450241, "p90": 0.08139099998061283, "mean": 0.0811031999774059, "iqr": 0.0006690000304843124, "raw_times": [0.08075099998450241, 0.08046099998182399, 0.08072199995012852, 0.08139099998061283, 0.08219099998996171], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08660099996404824, "peak_bytes": 4720128, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S128_W2", "batch": 2, "dim": 2048, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07871100001466402, "p50": 0.0798610000174449, "p90": 0.08112200004006809, "mean": 0.07994760002247858, "iqr": 0.0023900000201138027, "raw_times": [0.07873200001995428, 0.07871100001466402, 0.08112200004006809, 0.08131200002026162, 0.0798610000174449], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0850010000021939, "peak_bytes": 9461760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S128_W4", "batch": 2, "dim": 2048, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08012099999632483, "p50": 0.08051100002148814, "p90": 0.08112199998322467, "mean": 0.08078719999957684, "iqr": 0.0007209999921542476, "raw_times": [0.08051100002148814, 0.08040099999107042, 0.08012099999632483, 0.08112199998322467, 0.08178100000577615], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08605099998248988, "peak_bytes": 9478144, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S512_W2", "batch": 2, "dim": 2048, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07783099999869592, "p50": 0.07915100002264808, "p90": 0.07922200001075907, "mean": 0.0788234000083321, "iqr": 0.0006800000278417428, "raw_times": [0.07915100002264808, 0.0793710000266401, 0.07854199998291733, 0.07922200001075907, 0.07783099999869592], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08282100003498272, "peak_bytes": 37773312, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S512_W4", "batch": 2, "dim": 2048, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07959199996321331, "p50": 0.08125099998324004, "p90": 0.08163100000047052, "mean": 0.08102919999828373, "iqr": 0.0016999999843392288, "raw_times": [0.08163100000047052, 0.08274100002836349, 0.07959199996321331, 0.07993100001613129, 0.08125099998324004], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08671099999446596, "peak_bytes": 37789696, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S2048_W2", "batch": 2, "dim": 2048, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.15852199999244476, "p50": 0.15985300001375435, "p90": 0.15988199999128483, "mean": 0.15945239999837213, "iqr": 0.0009890000001178123, "raw_times": [0.15988199999128483, 0.15985300001375435, 0.1601120000032097, 0.15889299999116702, 0.15852199999244476], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.16028200002438098, "peak_bytes": 151019520, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S2048_W4", "batch": 2, "dim": 2048, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.16347299998642484, "p50": 0.1640229999679832, "p90": 0.16425199999048345, "mean": 0.1702108000017688, "iqr": 0.00036999995245423634, "raw_times": [0.16388200003802922, 0.1954240000259233, 0.16347299998642484, 0.1640229999679832, 0.16425199999048345], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.1656729999695017, "peak_bytes": 151035904, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S128_W2", "batch": 4, "dim": 64, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07862100000011196, "p50": 0.07903099998429752, "p90": 0.08047100004660024, "mean": 0.08115100000622988, "iqr": 0.001730000064981141, "raw_times": [0.07862100000011196, 0.0787409999816191, 0.08047100004660024, 0.08889100001852057, 0.07903099998429752], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08358100001260027, "peak_bytes": 33727488, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S128_W4", "batch": 4, "dim": 64, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08011200003466001, "p50": 0.0809910000043601, "p90": 0.08145100002820982, "mean": 0.0812654000128532, "iqr": 0.0010090000159834744, "raw_times": [0.08044200001222634, 0.08333099998480975, 0.08145100002820982, 0.0809910000043601, 0.08011200003466001], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08477100004711247, "peak_bytes": 591360, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S512_W2", "batch": 4, "dim": 64, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07822099996701581, "p50": 0.07905100000016319, "p90": 0.07909100003189451, "mean": 0.07904699999699005, "iqr": 0.00011000003041772288, "raw_times": [0.07822099996701581, 0.07989099998439997, 0.07898100000147679, 0.07905100000016319, 0.07909100003189451], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08219099998996171, "peak_bytes": 2360320, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S512_W4", "batch": 4, "dim": 64, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07998199998837663, "p50": 0.08066199995937495, "p90": 0.08123099996737437, "mean": 0.08075179998741078, "iqr": 0.0008489999459015962, "raw_times": [0.07998199998837663, 0.08150200000045515, 0.08123099996737437, 0.08066199995937495, 0.08038200002147278], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08594100000891558, "peak_bytes": 2360832, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S2048_W2", "batch": 4, "dim": 64, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07915100002264808, "p50": 0.0798309999936464, "p90": 0.07988099997646714, "mean": 0.07979119999390605, "iqr": 0.0006789999815737247, "raw_times": [0.07915100002264808, 0.08089099998187521, 0.07988099997646714, 0.07920199999489341, 0.0798309999936464], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08425199996509036, "peak_bytes": 9438208, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S2048_W4", "batch": 4, "dim": 64, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08051100002148814, "p50": 0.08109199995942618, "p90": 0.08230200000980403, "mean": 0.08190759998569774, "iqr": 0.0016110000160551863, "raw_times": [0.08109199995942618, 0.08494199994402152, 0.08069099999374885, 0.08051100002148814, 0.08230200000980403], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09682099999963611, "peak_bytes": 9438720, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S128_W2", "batch": 4, "dim": 2048, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07942100000946084, "p50": 0.08037099996727193, "p90": 0.08038100003204818, "mean": 0.08024699999396034, "iqr": 0.0001800000291041215, "raw_times": [0.07942100000946084, 0.08038100003204818, 0.08020100000294406, 0.08037099996727193, 0.08086099995807672], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0851419999889913, "peak_bytes": 18931712, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S128_W4", "batch": 4, "dim": 2048, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07963100000552004, "p50": 0.08073099996863675, "p90": 0.08166100002426901, "mean": 0.08793740000783146, "iqr": 0.0012990000186619, "raw_times": [0.07963100000552004, 0.08073099996863675, 0.11730200003512437, 0.08166100002426901, 0.08036200000560711], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08495099996252975, "peak_bytes": 18948096, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S512_W2", "batch": 4, "dim": 2048, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0912609999659253, "p50": 0.09163199996464755, "p90": 0.09213200002022859, "mean": 0.09177579999004593, "iqr": 0.0006200000370881753, "raw_times": [0.09151199998314041, 0.09163199996464755, 0.0912609999659253, 0.09213200002022859, 0.09234200001628778], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0949509999941256, "peak_bytes": 75522048, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S512_W4", "batch": 4, "dim": 2048, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09837099997866972, "p50": 0.09879099997078811, "p90": 0.09935200000654731, "mean": 0.09890359998507847, "iqr": 0.0009400000067216752, "raw_times": [0.09841199999982564, 0.09837099997866972, 0.09879099997078811, 0.09959199996956158, 0.09935200000654731], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.10164200000417623, "peak_bytes": 75538432, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S2048_W2", "batch": 4, "dim": 2048, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.47857700002396086, "p50": 0.48315699996237527, "p90": 0.4835080000020753, "mean": 0.48229559999981575, "iqr": 0.0009899999895424116, "raw_times": [0.48251800001253287, 0.4837179999981345, 0.4835080000020753, 0.47857700002396086, 0.48315699996237527], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.48609800001031545, "peak_bytes": 302014464, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}
{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S2048_W4", "batch": 4, "dim": 2048, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.49741800000902003, "p50": 0.5014380000147867, "p90": 0.5020579999950314, "mean": 0.501099999996768, "iqr": 0.0024500000108673703, "raw_times": [0.5020579999950314, 0.5014380000147867, 0.5049779999808379, 0.49960799998416405, 0.49741800000902003], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.5005179999670872, "peak_bytes": 302030848, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null}