+
+▶ code
+▼ output
+ ▶ uv-logs
+ |
+Cell: combine | 4.36s
+ |
+
+Raw
+
+
+
+
+
+
+
+
+
+
+
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+# "numpy",
+# "torch==2.8.0",
+# "kernels-benchmark-tools",
+# "matplotlib",
+# ]
+#
+# [tool.uv.sources]
+# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
+# ///
+from kernels_benchmark_tools.core.visuals import generate_combined_results
+
+# Map display names to uvnote environment variables
+cache_env_map = {
+ "HF Kernels Rotary": "UVNOTE_FILE_HF_KERNELS_ROTARY_BENCHMARK",
+ "PyTorch Rotary": "UVNOTE_FILE_TORCH_ROTARY_BENCHMARK",
+}
+
+# Generate combined results with visualization
+generate_combined_results(
+ cache_env_map=cache_env_map,
+ output_filename="rotary.jsonl",
+ svg_filename="latency.svg"
+)
+
+
+
+====================================================================== +LOADING BENCHMARK DATA +====================================================================== +✓ HF Kernels Rotary : /__w/kernels-benchmarks/kernels-benchmarks/benches/rotary/impls/.uvnote/cache/49ec9501b131c967277abe3cccb638422565260339bb30f5ea386b0076f2183e +✓ PyTorch Rotary : /__w/kernels-benchmarks/kernels-benchmarks/benches/rotary/impls/.uvnote/cache/abf801d6445dfa81a8dd7b2e6257930c39c18160a9b97a739858c3b244e16cc5 + + ✓ Found HF Kernels Rotary + Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/rotary/impls/.uvnote/cache/49ec9501b131c967277abe3cccb638422565260339bb30f5ea386b0076f2183e/rotary.jsonl + ✓ Found PyTorch Rotary + Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/rotary/impls/.uvnote/cache/abf801d6445dfa81a8dd7b2e6257930c39c18160a9b97a739858c3b244e16cc5/rotary.jsonl + +====================================================================== +Summary: 2 found, 0 skipped, 0 missing +====================================================================== + +COMBINED BENCHMARK SUMMARY + +impl wl p50(ms) ok +hf_kernels_rotary cuda_B1_S128_H32_D128_R64 0.09 False +hf_kernels_rotary cuda_B1_S128_H32_D64_R32 0.09 False +hf_kernels_rotary cuda_B1_S128_H8_D128_R64 0.09 False +hf_kernels_rotary cuda_B1_S128_H8_D64_R32 0.08 False +hf_kernels_rotary cuda_B1_S2048_H32_D128_R64 0.09 False +hf_kernels_rotary cuda_B1_S2048_H32_D64_R32 0.09 False +hf_kernels_rotary cuda_B1_S2048_H8_D128_R64 0.09 False +hf_kernels_rotary cuda_B1_S2048_H8_D64_R32 0.09 False +hf_kernels_rotary cuda_B1_S512_H32_D128_R64 0.09 False +hf_kernels_rotary cuda_B1_S512_H32_D64_R32 0.09 False +hf_kernels_rotary cuda_B1_S512_H8_D128_R64 0.09 False +hf_kernels_rotary cuda_B1_S512_H8_D64_R32 0.09 False +hf_kernels_rotary cuda_B2_S128_H32_D128_R64 0.09 False +hf_kernels_rotary cuda_B2_S128_H32_D64_R32 0.09 False +hf_kernels_rotary cuda_B2_S128_H8_D128_R64 0.09 False +hf_kernels_rotary cuda_B2_S128_H8_D64_R32 0.09 False +hf_kernels_rotary cuda_B2_S2048_H32_D128_R64 0.28 False +hf_kernels_rotary cuda_B2_S2048_H32_D64_R32 0.10 False +hf_kernels_rotary cuda_B2_S2048_H8_D128_R64 0.09 False +hf_kernels_rotary cuda_B2_S2048_H8_D64_R32 0.09 False +hf_kernels_rotary cuda_B2_S512_H32_D128_R64 0.09 False +hf_kernels_rotary cuda_B2_S512_H32_D64_R32 0.09 False +hf_kernels_rotary cuda_B2_S512_H8_D128_R64 0.09 False +hf_kernels_rotary cuda_B2_S512_H8_D64_R32 0.09 False +torch_eager cuda_B1_S128_H32_D128_R64 0.22 True +torch_eager cuda_B1_S128_H32_D64_R32 0.22 True +torch_eager cuda_B1_S128_H8_D128_R64 0.23 True +torch_eager cuda_B1_S128_H8_D64_R32 0.17 True +torch_eager cuda_B1_S2048_H32_D128_R64 0.22 True +torch_eager cuda_B1_S2048_H32_D64_R32 0.22 True +torch_eager cuda_B1_S2048_H8_D128_R64 0.22 True +torch_eager cuda_B1_S2048_H8_D64_R32 0.22 True +torch_eager cuda_B1_S512_H32_D128_R64 0.22 True +torch_eager cuda_B1_S512_H32_D64_R32 0.22 True +torch_eager cuda_B1_S512_H8_D128_R64 0.22 True +torch_eager cuda_B1_S512_H8_D64_R32 0.22 True +torch_eager cuda_B2_S128_H32_D128_R64 0.22 True +torch_eager cuda_B2_S128_H32_D64_R32 0.22 True +torch_eager cuda_B2_S128_H8_D128_R64 0.22 True +torch_eager cuda_B2_S128_H8_D64_R32 0.22 True +torch_eager cuda_B2_S2048_H32_D128_R64 0.64 True +torch_eager cuda_B2_S2048_H32_D64_R32 0.23 True +torch_eager cuda_B2_S2048_H8_D128_R64 0.22 True +torch_eager cuda_B2_S2048_H8_D64_R32 0.22 True +torch_eager cuda_B2_S512_H32_D128_R64 0.22 True +torch_eager cuda_B2_S512_H32_D64_R32 0.22 True +torch_eager cuda_B2_S512_H8_D128_R64 0.22 True +torch_eager cuda_B2_S512_H8_D64_R32 0.22 True + +GENERATING COMBINED VISUALIZATION + +Loaded 48 records +✓ Visualization saved as latency.svg +Saved latency.png +✓ Visualization saved as latency.svg +✓ SVG visualization ready! + +ANALYSIS COMPLETE +Total implementations analyzed: 2 + +Implementations included: + ✓ HF Kernels Rotary + ✓ PyTorch Rotary +
+
+
+▶ UV Install Logs
+
+