style: apply yapf + isort formatting

Browse files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

benchmarks/common/bench_framework.py +13 -7

benchmarks/common/bench_framework.py CHANGED Viewed

@@ -4,8 +4,8 @@ import re
 from typing import Any, Dict, Sequence
 import torch
-from torch.profiler import ProfilerActivity, profile
 import triton
 from .diff_engine import DiffCase
@@ -42,8 +42,8 @@ def _compute_bytes(inputs, forward_fn, obj):
     if isinstance(output, torch.Tensor):
         output_bytes = output.nbytes
     elif isinstance(output, (tuple, list)):
-        output_bytes = sum(
-            o.nbytes for o in output if isinstance(o, torch.Tensor))
     else:
         output_bytes = 0
     return input_bytes + output_bytes
@@ -158,7 +158,9 @@ def make_fwd_benchmark_for_case(
         key = make_fwd_key(dim, batch_size, seq_len)
         I = case.build_inputs(batch_size, seq_len, dim, dtype, eps)
         if provider == "speedup":
-            return round(timings_ms["naive"][key] / _get_best_cuda_timing(timings_ms, key), 2)
         if provider.endswith("_bw"):
             base = provider[:-3]
             ms = timings_ms[base][key]
@@ -227,7 +229,8 @@ def make_fwd_benchmark_plot_for_case(
         ms = profile_bench(run, total_bytes=nbytes)
         timings_ms[provider][config] = ms
         if provider == "cuda":
-            ratio = timings_ms["naive"][config] / _get_best_cuda_timing(timings_ms, config)
             spdup_ratio.append(ratio)
             return round(ratio, 2)
         else:
@@ -267,7 +270,9 @@ def make_bwd_benchmark_for_case(
         key = make_bwd_key(dim, batch_size, seq_len)
         I = case.build_inputs(batch_size, seq_len, dim, dtype, eps)
         if provider == "speedup":
-            return round(timings_ms["naive"][key] / _get_best_cuda_timing(timings_ms, key), 2)
         if provider.endswith("_bw"):
             base = provider[:-3]
             ms = timings_ms[base][key]
@@ -360,7 +365,8 @@ def make_bwd_benchmark_plot_for_case(
         ms = profile_bench(run, total_bytes=nbytes)
         timings_ms[provider][config] = ms
         if provider == "cuda":
-            ratio = timings_ms["naive"][config] / _get_best_cuda_timing(timings_ms, config)
             spdup_ratio.append(ratio)
             return round(ratio, 2)
         else:

 from typing import Any, Dict, Sequence
 import torch
 import triton
+from torch.profiler import ProfilerActivity, profile
 from .diff_engine import DiffCase
     if isinstance(output, torch.Tensor):
         output_bytes = output.nbytes
     elif isinstance(output, (tuple, list)):
+        output_bytes = sum(o.nbytes for o in output
+                           if isinstance(o, torch.Tensor))
     else:
         output_bytes = 0
     return input_bytes + output_bytes
         key = make_fwd_key(dim, batch_size, seq_len)
         I = case.build_inputs(batch_size, seq_len, dim, dtype, eps)
         if provider == "speedup":
+            return round(
+                timings_ms["naive"][key] /
+                _get_best_cuda_timing(timings_ms, key), 2)
         if provider.endswith("_bw"):
             base = provider[:-3]
             ms = timings_ms[base][key]
         ms = profile_bench(run, total_bytes=nbytes)
         timings_ms[provider][config] = ms
         if provider == "cuda":
+            ratio = timings_ms["naive"][config] / _get_best_cuda_timing(
+                timings_ms, config)
             spdup_ratio.append(ratio)
             return round(ratio, 2)
         else:
         key = make_bwd_key(dim, batch_size, seq_len)
         I = case.build_inputs(batch_size, seq_len, dim, dtype, eps)
         if provider == "speedup":
+            return round(
+                timings_ms["naive"][key] /
+                _get_best_cuda_timing(timings_ms, key), 2)
         if provider.endswith("_bw"):
             base = provider[:-3]
             ms = timings_ms[base][key]
         ms = profile_bench(run, total_bytes=nbytes)
         timings_ms[provider][config] = ms
         if provider == "cuda":
+            ratio = timings_ms["naive"][config] / _get_best_cuda_timing(
+                timings_ms, config)
             spdup_ratio.append(ratio)
             return round(ratio, 2)
         else: