| {"ts": "2025-10-23T17:22:14Z", "run": "1b435099e2fc4712a8a21c79100926bd", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "flux_L128", "batch": 1, "seq_len": 1152, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.17766900009519304, "p50": 0.1805790000162233, "p90": 0.1809689999845432, "mean": 0.18065700000988727, "iqr": 0.0005199999577598646, "raw_times": [0.17766900009519304, 0.18044900002678332, 0.1836189999266935, 0.1805790000162233, 0.1809689999845432], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.18813999986377894, "peak_bytes": 87425024, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.0003452301025390625, "mse": 2.8014183044433594e-06, "ref": "sdpa_math_fp32"}, "err": null} | |
| {"ts": "2025-10-23T17:22:14Z", "run": "1b435099e2fc4712a8a21c79100926bd", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "flux_L256", "batch": 1, "seq_len": 1280, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.2040300000771822, "p50": 0.208629999860932, "p90": 0.20883999991383462, "mean": 0.2071937999517104, "iqr": 0.004771000021719374, "raw_times": [0.208629999860932, 0.2104000000144879, 0.20883999991383462, 0.20406899989211524, 0.2040300000771822], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.2113399998506793, "peak_bytes": 95027200, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000347137451171875, "mse": 2.7865171432495117e-06, "ref": "sdpa_math_fp32"}, "err": null} | |
| {"ts": "2025-10-23T17:22:15Z", "run": "1b435099e2fc4712a8a21c79100926bd", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "flux_L320", "batch": 1, "seq_len": 1344, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.21214000003055844, "p50": 0.22414099998968595, "p90": 0.22725099984199915, "mean": 0.22296499996627972, "iqr": 0.014549999832524918, "raw_times": [0.22414099998968595, 0.21270100000947423, 0.23859199995968083, 0.21214000003055844, 0.22725099984199915], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.215200000184268, "peak_bytes": 99680256, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000347137451171875, "mse": 2.7865171432495117e-06, "ref": "sdpa_math_fp32"}, "err": null} | |
| {"ts": "2025-10-23T17:22:15Z", "run": "1b435099e2fc4712a8a21c79100926bd", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "flux_L384", "batch": 1, "seq_len": 1408, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.2172510000946204, "p50": 0.21762999995189602, "p90": 0.229150999984995, "mean": 0.22471280003628635, "iqr": 0.011839999842777615, "raw_times": [0.2172510000946204, 0.229150999984995, 0.21762999995189602, 0.24222100000770297, 0.21731100014221738], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.22035099982531392, "peak_bytes": 104726528, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.0003452301025390625, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null} | |
| {"ts": "2025-10-23T17:22:15Z", "run": "1b435099e2fc4712a8a21c79100926bd", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "flux_L448", "batch": 1, "seq_len": 1472, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.2690430001166533, "p50": 0.2719639999213541, "p90": 0.2809840000281838, "mean": 0.27520160006133665, "iqr": 0.011710999842762249, "raw_times": [0.2719639999213541, 0.26927300018542155, 0.2690430001166533, 0.2809840000281838, 0.2847440000550705], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.26890300000559364, "peak_bytes": 108855296, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.00034332275390625, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null} | |
| {"ts": "2025-10-23T17:22:15Z", "run": "1b435099e2fc4712a8a21c79100926bd", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "flux_L512", "batch": 1, "seq_len": 1536, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.27566299991121923, "p50": 0.2808829999594309, "p90": 0.29306400006134936, "mean": 0.2846773999863217, "iqr": 0.01699100016594457, "raw_times": [0.29306400006134936, 0.2760729998954048, 0.2808829999594309, 0.29770400010420417, 0.27566299991121923], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.2820939998855465, "peak_bytes": 114425856, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.00034332275390625, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null} | |