+
+▶ code
+▼ output
+ ▶ uv-logs
+ |
+Cell: combine | 4.26s
+ |
+
+Raw
+
+
+
+
+
+
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+
+
+
+
+
+
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+# "numpy",
+# "torch==2.8.0",
+# "kernels-benchmark-tools",
+# "matplotlib",
+# ]
+#
+# [tool.uv.sources]
+# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
+# ///
+from kernels_benchmark_tools.core.visuals import generate_combined_results
+
+# Map display names to uvnote environment variables
+cache_env_map = {
+ # "PyTorch OpenAI MoE": "UVNOTE_FILE_TORCH_OPENAI_MOE_BENCHMARK",
+ "Binned PyTorch": "UVNOTE_FILE_BINNED_TORCH_BENCHMARK",
+ "GptOssExperts": "UVNOTE_FILE_GPT_OSS_MOE_BENCHMARK",
+}
+
+# Generate combined results with visualization
+generate_combined_results(
+ cache_env_map=cache_env_map,
+ output_filename="openai_moe.jsonl",
+ svg_filename="latency.svg"
+)
+
+
+
+====================================================================== +LOADING BENCHMARK DATA +====================================================================== +✓ Binned PyTorch : /__w/kernels-benchmarks/kernels-benchmarks/benches/openai_moe/impls/.uvnote/cache/fd01907ce582015b5dd52e56081cc8e2a21813f73271b422308d60a8ab9391af +✓ GptOssExperts : /__w/kernels-benchmarks/kernels-benchmarks/benches/openai_moe/impls/.uvnote/cache/002e3e7d42f2dbf6d5e5216db57e56aa649bc6ac59ce4131ce80c5849e52482b + + ✓ Found Binned PyTorch + Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/openai_moe/impls/.uvnote/cache/fd01907ce582015b5dd52e56081cc8e2a21813f73271b422308d60a8ab9391af/openai_moe.jsonl + ✓ Found GptOssExperts + Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/openai_moe/impls/.uvnote/cache/002e3e7d42f2dbf6d5e5216db57e56aa649bc6ac59ce4131ce80c5849e52482b/openai_moe.jsonl + +====================================================================== +Summary: 2 found, 0 skipped, 0 missing +====================================================================== + +COMBINED BENCHMARK SUMMARY + +impl wl p50(ms) ok +binned_torch cuda_B1_S1024_E2 372.79 True +binned_torch cuda_B1_S1024_E4 382.68 True +binned_torch cuda_B1_S512_E2 150.05 True +binned_torch cuda_B1_S512_E4 200.26 True +binned_torch cuda_B4_S1024_E2 1486.48 True +binned_torch cuda_B4_S1024_E4 1524.50 True +binned_torch cuda_B4_S512_E2 742.02 True +binned_torch cuda_B4_S512_E4 801.90 True +gpt_oss_experts cuda_B1_S1024_E2 3.79 True +gpt_oss_experts cuda_B1_S1024_E4 5.24 True +gpt_oss_experts cuda_B1_S512_E2 2.63 True +gpt_oss_experts cuda_B1_S512_E4 3.89 True +gpt_oss_experts cuda_B4_S1024_E2 13.28 True +gpt_oss_experts cuda_B4_S1024_E4 13.19 True +gpt_oss_experts cuda_B4_S512_E2 6.74 True +gpt_oss_experts cuda_B4_S512_E4 7.36 True + +GENERATING COMBINED VISUALIZATION + +Loaded 16 records +✓ Visualization saved as latency.svg +Saved latency.png +✓ Visualization saved as latency.svg +✓ SVG visualization ready! + +ANALYSIS COMPLETE +Total implementations analyzed: 2 + +Implementations included: + ✓ Binned PyTorch + ✓ GptOssExperts +
+
+
+▶ UV Install Logs
+
+