{"ts": "2025-12-19T23:01:31Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B1_S512_E2", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 2.642566999838891, "p50": 2.6590969998778746, "p90": 2.673486999810848, "mean": 2.659981199894901, "iqr": 0.023999999939405825, "raw_times": [2.6590969998778746, 2.675268000075448, 2.649486999871442, 2.642566999838891, 2.673486999810848], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 2.7064890000474406, "peak_bytes": 311252992, "ok": true, "absmax": 1.0818243026733398e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.0818243026733398e-05, "mae": 1.0733322142186807e-06, "mse": 1.9560496885423495e-12, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:32Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B1_S512_E4", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.936204999945403, "p50": 3.9538260000426817, "p90": 3.9835660002154327, "mean": 3.9606518000255164, "iqr": 0.039130000232034945, "raw_times": [3.936204999945403, 3.9538260000426817, 3.9835660002154327, 3.985225999940667, 3.944435999983398], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.9596259998688765, "peak_bytes": 632822272, "ok": true, "absmax": 7.82310962677002e-06, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 7.82310962677002e-06, "mae": 5.576844728238939e-07, "mse": 5.436189692842319e-13, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:33Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B1_S1024_E2", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.817872999889005, "p50": 3.868872999873929, "p90": 3.9019339999413205, "mean": 3.8749997999275365, "iqr": 0.044331000026431866, "raw_times": [3.817872999889005, 3.8576029999148886, 3.9287160000185395, 3.9019339999413205, 3.868872999873929], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.836012999954619, "peak_bytes": 645417472, "ok": true, "absmax": 1.5497207641601562e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.5497207641601562e-05, "mae": 1.1454358173068613e-06, "mse": 2.2412421311207575e-12, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:34Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B1_S1024_E4", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 5.3247949999786215, "p50": 5.3401449999910255, "p90": 5.39184700005535, "mean": 5.356893600037438, "iqr": 0.06286200004979037, "raw_times": [5.39184700005535, 5.3247949999786215, 5.3401449999910255, 5.328985000005559, 5.398696000156633], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 5.315443999961644, "peak_bytes": 657099264, "ok": true, "absmax": 6.556510925292969e-06, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 6.556510925292969e-06, "mae": 4.852234951613354e-07, "mse": 4.015021550906467e-13, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:36Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B4_S512_E2", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 6.797146999815595, "p50": 6.804686999885234, "p90": 6.806136000022889, "mean": 6.814822799969988, "iqr": 0.0027099999897473026, "raw_times": [6.862718000093082, 6.806136000022889, 6.797146999815595, 6.8034260000331415, 6.804686999885234], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 6.6412220000984235, "peak_bytes": 678357504, "ok": true, "absmax": 1.3589859008789062e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.3589859008789062e-05, "mae": 1.1745952406272409e-06, "mse": 2.316181968442521e-12, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:38Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B4_S512_E4", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 7.520542000065689, "p50": 7.530022999844732, "p90": 7.53409300000385, "mean": 7.531816999926377, "iqr": 0.0065400001858506585, "raw_times": [7.520542000065689, 7.527552999817999, 7.546873999899617, 7.530022999844732, 7.53409300000385], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 7.356247999950938, "peak_bytes": 701983232, "ok": true, "absmax": 8.58306884765625e-06, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 8.58306884765625e-06, "mae": 5.268635732136318e-07, "mse": 4.753664909623589e-13, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:42Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B4_S1024_E2", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 13.378247999980886, "p50": 13.385679999828426, "p90": 13.397299999951429, "mean": 13.394303199947899, "iqr": 0.012501999890446314, "raw_times": [13.378247999980886, 13.384798000060982, 13.425489999917772, 13.385679999828426, 13.397299999951429], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 13.166785000066739, "peak_bytes": 1012207616, "ok": true, "absmax": 1.71661376953125e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.71661376953125e-05, "mae": 1.797086838450923e-06, "mse": 5.3811247992252564e-12, "ref": "naive_moe"}, "err": null} {"ts": "2025-12-19T23:01:46Z", "run": "7f201a1c2ed74237ad40459314918ba0", "impl": "gpt_oss_experts", "tags": {"family": "reference", "backend": "pytorch"}, "wl": {"name": "cuda_B4_S1024_E4", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 13.328448000038406, "p50": 13.40927800015379, "p90": 13.441681000131211, "mean": 13.402939000070546, "iqr": 0.0636730001133401, "raw_times": [13.457280000011451, 13.441681000131211, 13.40927800015379, 13.378008000017871, 13.328448000038406], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 13.036729999839736, "peak_bytes": 910968320, "ok": true, "absmax": 8.344650268554688e-06, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 8.344650268554688e-06, "mae": 5.471991357808292e-07, "mse": 5.06310813587485e-13, "ref": "naive_moe"}, "err": null}