Spaces:

JetBrains-Research
/

SWE-bench-Costs-Calculator

Sleeping

App Files Files Community

IgorSlinko commited on 2 days ago

Commit

403adb5

1 Parent(s): 10a0b43

Add Use Cache and Tokenizer Overhead support for single trajectory charts (v0.3.38)

Browse files

Files changed (1) hide show

app.py +55 -23

app.py CHANGED Viewed

@@ -444,7 +444,7 @@ def create_single_trajectory_meta_cost_chart(steps: list[dict], input_price: flo
     return fig
-def create_single_trajectory_chart(steps: list[dict]):
     """Create stacked bar chart for a single trajectory showing tokens per step."""
     import plotly.graph_objects as go
@@ -454,18 +454,31 @@ def create_single_trajectory_chart(steps: list[dict]):
     per_step_data = calculate_per_step_tokens(steps)
     x_labels = [f"Step {d['step']}" for d in per_step_data]
-    cache_read = [d["cache_read"] / 1e3 for d in per_step_data]
-    cache_creation = [d["cache_creation"] / 1e3 for d in per_step_data]
-    completion = [d["completion"] / 1e3 for d in per_step_data]
-    prompt_tokens = [(d["cache_read"] + d["uncached_input"]) / 1e3 for d in per_step_data]
-    uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens, cache_read, cache_creation)]
     fig = go.Figure()
     fig.add_trace(go.Bar(
         name="Uncached Input",
         x=x_labels,
-        y=uncached,
         marker_color="#EF553B",
         hovertemplate="Step %{x}<br>Uncached Input: %{y:.2f}K<extra></extra>",
     ))
@@ -473,7 +486,7 @@ def create_single_trajectory_chart(steps: list[dict]):
     fig.add_trace(go.Bar(
         name="Cache Read",
         x=x_labels,
-        y=cache_read,
         marker_color="#19D3F3",
         hovertemplate="Step %{x}<br>Cache Read: %{y:.2f}K<extra></extra>",
     ))
@@ -481,7 +494,7 @@ def create_single_trajectory_chart(steps: list[dict]):
     fig.add_trace(go.Bar(
         name="Cache Creation",
         x=x_labels,
-        y=cache_creation,
         marker_color="#FFA15A",
         hovertemplate="Step %{x}<br>Cache Creation: %{y:.2f}K<extra></extra>",
     ))
@@ -489,7 +502,7 @@ def create_single_trajectory_chart(steps: list[dict]):
     fig.add_trace(go.Bar(
         name="Completion",
         x=x_labels,
-        y=completion,
         marker_color="#AB63FA",
         hovertemplate="Step %{x}<br>Completion: %{y:.2f}K<extra></extra>",
     ))
@@ -505,7 +518,7 @@ def create_single_trajectory_chart(steps: list[dict]):
     return fig
-def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, cache_read_price: float, cache_creation_price: float, completion_price: float):
     """Create stacked bar chart for a single trajectory showing cost per step."""
     import plotly.graph_objects as go
@@ -515,16 +528,24 @@ def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, c
     per_step_data = calculate_per_step_tokens(steps)
     x_labels = [f"Step {d['step']}" for d in per_step_data]
-    cache_read = [d["cache_read"] for d in per_step_data]
-    cache_creation = [d["cache_creation"] for d in per_step_data]
-    completion = [d["completion"] for d in per_step_data]
-    prompt_tokens = [d["cache_read"] + d["uncached_input"] for d in per_step_data]
-    uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens, cache_read, cache_creation)]
     uncached_cost = [u * input_price / 1e6 for u in uncached]
     cache_read_cost = [cr * cache_read_price / 1e6 for cr in cache_read]
     cache_creation_cost = [cc * cache_creation_price / 1e6 for cc in cache_creation]
-    completion_cost = [c * completion_price / 1e6 for c in completion]
     fig = go.Figure()
@@ -1818,7 +1839,7 @@ def build_app():
         """)
         trajectories_state = gr.State(None)
-        gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.37`")
         gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
         with gr.Row():
@@ -2860,15 +2881,15 @@ def build_app():
                 gr.update(),
             )
-        def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
             if state_data is None or not issue_id:
                 return None, None
             trajectory_steps = state_data.get("steps", {})
             if issue_id not in trajectory_steps:
                 return None, None
             steps = trajectory_steps[issue_id]
-            tokens_chart = create_single_trajectory_chart(steps)
-            cost_chart = create_single_trajectory_cost_chart(steps, input_price, cache_read_price, cache_creation_price, completion_price)
             return tokens_chart, cost_chart
         def on_single_traj_meta_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
@@ -2904,7 +2925,7 @@ def build_app():
             ],
         ).then(
             fn=on_single_traj_select,
-            inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion],
             outputs=[single_traj_plot, single_traj_cost_plot],
         ).then(
             fn=on_single_traj_meta_select,
@@ -2969,7 +2990,7 @@ def build_app():
         single_traj_dropdown.change(
             fn=on_single_traj_select,
-            inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion],
             outputs=[single_traj_plot, single_traj_cost_plot],
         )
@@ -2979,16 +3000,27 @@ def build_app():
             outputs=[single_traj_meta_plot, single_traj_meta_cost_plot],
         )
         thinking_overhead.change(
             fn=on_calc_options_change,
             inputs=calc_options_inputs,
             outputs=calc_options_outputs,
         )
         use_cache.change(
             fn=on_calc_options_change,
             inputs=calc_options_inputs,
             outputs=calc_options_outputs,
         )
     return app

     return fig
+def create_single_trajectory_chart(steps: list[dict], overhead: float = 1.0, with_cache: bool = True):
     """Create stacked bar chart for a single trajectory showing tokens per step."""
     import plotly.graph_objects as go
     per_step_data = calculate_per_step_tokens(steps)
     x_labels = [f"Step {d['step']}" for d in per_step_data]
+    cache_read_raw = [d["cache_read"] * overhead for d in per_step_data]
+    cache_creation_raw = [d["cache_creation"] * overhead for d in per_step_data]
+    completion_raw = [d["completion"] * overhead for d in per_step_data]
+    prompt_tokens_raw = [(d["cache_read"] + d["uncached_input"]) * overhead for d in per_step_data]
+    if with_cache:
+        uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens_raw, cache_read_raw, cache_creation_raw)]
+        cache_read = cache_read_raw
+        cache_creation = cache_creation_raw
+    else:
+        uncached = prompt_tokens_raw
+        cache_read = [0] * len(per_step_data)
+        cache_creation = [0] * len(per_step_data)
+    uncached_k = [u / 1e3 for u in uncached]
+    cache_read_k = [cr / 1e3 for cr in cache_read]
+    cache_creation_k = [cc / 1e3 for cc in cache_creation]
+    completion_k = [c / 1e3 for c in completion_raw]
     fig = go.Figure()
     fig.add_trace(go.Bar(
         name="Uncached Input",
         x=x_labels,
+        y=uncached_k,
         marker_color="#EF553B",
         hovertemplate="Step %{x}<br>Uncached Input: %{y:.2f}K<extra></extra>",
     ))
     fig.add_trace(go.Bar(
         name="Cache Read",
         x=x_labels,
+        y=cache_read_k,
         marker_color="#19D3F3",
         hovertemplate="Step %{x}<br>Cache Read: %{y:.2f}K<extra></extra>",
     ))
     fig.add_trace(go.Bar(
         name="Cache Creation",
         x=x_labels,
+        y=cache_creation_k,
         marker_color="#FFA15A",
         hovertemplate="Step %{x}<br>Cache Creation: %{y:.2f}K<extra></extra>",
     ))
     fig.add_trace(go.Bar(
         name="Completion",
         x=x_labels,
+        y=completion_k,
         marker_color="#AB63FA",
         hovertemplate="Step %{x}<br>Completion: %{y:.2f}K<extra></extra>",
     ))
     return fig
+def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, cache_read_price: float, cache_creation_price: float, completion_price: float, overhead: float = 1.0, with_cache: bool = True):
     """Create stacked bar chart for a single trajectory showing cost per step."""
     import plotly.graph_objects as go
     per_step_data = calculate_per_step_tokens(steps)
     x_labels = [f"Step {d['step']}" for d in per_step_data]
+    cache_read_raw = [d["cache_read"] * overhead for d in per_step_data]
+    cache_creation_raw = [d["cache_creation"] * overhead for d in per_step_data]
+    completion_raw = [d["completion"] * overhead for d in per_step_data]
+    prompt_tokens_raw = [(d["cache_read"] + d["uncached_input"]) * overhead for d in per_step_data]
+    if with_cache:
+        uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens_raw, cache_read_raw, cache_creation_raw)]
+        cache_read = cache_read_raw
+        cache_creation = cache_creation_raw
+    else:
+        uncached = prompt_tokens_raw
+        cache_read = [0] * len(per_step_data)
+        cache_creation = [0] * len(per_step_data)
     uncached_cost = [u * input_price / 1e6 for u in uncached]
     cache_read_cost = [cr * cache_read_price / 1e6 for cr in cache_read]
     cache_creation_cost = [cc * cache_creation_price / 1e6 for cc in cache_creation]
+    completion_cost = [c * completion_price / 1e6 for c in completion_raw]
     fig = go.Figure()
         """)
         trajectories_state = gr.State(None)
+        gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.38`")
         gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
         with gr.Row():
                 gr.update(),
             )
+        def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
             if state_data is None or not issue_id:
                 return None, None
             trajectory_steps = state_data.get("steps", {})
             if issue_id not in trajectory_steps:
                 return None, None
             steps = trajectory_steps[issue_id]
+            tokens_chart = create_single_trajectory_chart(steps, overhead, with_cache)
+            cost_chart = create_single_trajectory_cost_chart(steps, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache)
             return tokens_chart, cost_chart
         def on_single_traj_meta_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
             ],
         ).then(
             fn=on_single_traj_select,
+            inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
             outputs=[single_traj_plot, single_traj_cost_plot],
         ).then(
             fn=on_single_traj_meta_select,
         single_traj_dropdown.change(
             fn=on_single_traj_select,
+            inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
             outputs=[single_traj_plot, single_traj_cost_plot],
         )
             outputs=[single_traj_meta_plot, single_traj_meta_cost_plot],
         )
+        single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
+        single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
         thinking_overhead.change(
             fn=on_calc_options_change,
             inputs=calc_options_inputs,
             outputs=calc_options_outputs,
+        ).then(
+            fn=on_single_traj_select,
+            inputs=single_traj_inputs,
+            outputs=single_traj_outputs,
         )
         use_cache.change(
             fn=on_calc_options_change,
             inputs=calc_options_inputs,
             outputs=calc_options_outputs,
+        ).then(
+            fn=on_single_traj_select,
+            inputs=single_traj_inputs,
+            outputs=single_traj_outputs,
         )
     return app