drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
3b25788 verified
raw
history blame
6.41 kB
{"ts": "2025-12-19T23:02:00Z", "run": "d08cdddcbd814f0a98850e99a3cc8f3c", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L128_bfloat16", "batch": 1, "seq_len": 4224, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.2105559999326942, "p50": 1.2135660001604265, "p90": 1.214856999922631, "mean": 1.213000200004899, "iqr": 0.0038309999581542797, "raw_times": [1.2149960000442661, 1.2110259999644768, 1.2105559999326942, 1.2135660001604265, 1.214856999922631], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2067360000855842, "peak_bytes": 295567360, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null}
{"ts": "2025-12-19T23:02:00Z", "run": "d08cdddcbd814f0a98850e99a3cc8f3c", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L256_bfloat16", "batch": 1, "seq_len": 4352, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.2594280001394509, "p50": 1.2752780000937491, "p90": 1.2771070000781037, "mean": 1.2731776000691752, "iqr": 0.010640000027706265, "raw_times": [1.2752780000937491, 1.2664670000503975, 1.2771070000781037, 1.2594280001394509, 1.287607999984175], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2718570001197804, "peak_bytes": 304742400, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null}
{"ts": "2025-12-19T23:02:00Z", "run": "d08cdddcbd814f0a98850e99a3cc8f3c", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L320_bfloat16", "batch": 1, "seq_len": 4416, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.2881479999578005, "p50": 1.2985280000066268, "p90": 1.2987470001917245, "mean": 1.2992600000416132, "iqr": 0.008449000233667903, "raw_times": [1.2902979999580566, 1.2881479999578005, 1.2985280000066268, 1.2987470001917245, 1.3205790000938578], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2919179998789332, "peak_bytes": 307494912, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null}
{"ts": "2025-12-19T23:02:00Z", "run": "d08cdddcbd814f0a98850e99a3cc8f3c", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L384_bfloat16", "batch": 1, "seq_len": 4480, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.32487900009437, "p50": 1.3346000000638014, "p90": 1.337429000159318, "mean": 1.3341430000764376, "iqr": 0.006821000170020852, "raw_times": [1.32487900009437, 1.337429000159318, 1.3346000000638014, 1.3306079999892972, 1.3431990000754013], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.327048999883118, "peak_bytes": 311296000, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null}
{"ts": "2025-12-19T23:02:00Z", "run": "d08cdddcbd814f0a98850e99a3cc8f3c", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L448_bfloat16", "batch": 1, "seq_len": 4544, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.4795820000017557, "p50": 1.4878020001560799, "p90": 1.4919819998340245, "mean": 1.4892582000356924, "iqr": 0.004879999778495403, "raw_times": [1.4795820000017557, 1.4919819998340245, 1.487102000055529, 1.499823000131073, 1.4878020001560799], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.4706619999742543, "peak_bytes": 315621376, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null}
{"ts": "2025-12-19T23:02:00Z", "run": "d08cdddcbd814f0a98850e99a3cc8f3c", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L512_bfloat16", "batch": 1, "seq_len": 4608, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.507972999888807, "p50": 1.5174029999798222, "p90": 1.518043000032776, "mean": 1.5156109999679757, "iqr": 0.005300000111674308, "raw_times": [1.518043000032776, 1.5174029999798222, 1.5218930000173714, 1.507972999888807, 1.5127429999211017], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.517042999921614, "peak_bytes": 319946752, "ok": true, "absmax": 0.125, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.125, "mae": 0.0003566741943359375, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null}