Commit
·
403adb5
1
Parent(s):
10a0b43
Add Use Cache and Tokenizer Overhead support for single trajectory charts (v0.3.38)
Browse files
app.py
CHANGED
|
@@ -444,7 +444,7 @@ def create_single_trajectory_meta_cost_chart(steps: list[dict], input_price: flo
|
|
| 444 |
return fig
|
| 445 |
|
| 446 |
|
| 447 |
-
def create_single_trajectory_chart(steps: list[dict]):
|
| 448 |
"""Create stacked bar chart for a single trajectory showing tokens per step."""
|
| 449 |
import plotly.graph_objects as go
|
| 450 |
|
|
@@ -454,18 +454,31 @@ def create_single_trajectory_chart(steps: list[dict]):
|
|
| 454 |
per_step_data = calculate_per_step_tokens(steps)
|
| 455 |
|
| 456 |
x_labels = [f"Step {d['step']}" for d in per_step_data]
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
fig = go.Figure()
|
| 464 |
|
| 465 |
fig.add_trace(go.Bar(
|
| 466 |
name="Uncached Input",
|
| 467 |
x=x_labels,
|
| 468 |
-
y=
|
| 469 |
marker_color="#EF553B",
|
| 470 |
hovertemplate="Step %{x}<br>Uncached Input: %{y:.2f}K<extra></extra>",
|
| 471 |
))
|
|
@@ -473,7 +486,7 @@ def create_single_trajectory_chart(steps: list[dict]):
|
|
| 473 |
fig.add_trace(go.Bar(
|
| 474 |
name="Cache Read",
|
| 475 |
x=x_labels,
|
| 476 |
-
y=
|
| 477 |
marker_color="#19D3F3",
|
| 478 |
hovertemplate="Step %{x}<br>Cache Read: %{y:.2f}K<extra></extra>",
|
| 479 |
))
|
|
@@ -481,7 +494,7 @@ def create_single_trajectory_chart(steps: list[dict]):
|
|
| 481 |
fig.add_trace(go.Bar(
|
| 482 |
name="Cache Creation",
|
| 483 |
x=x_labels,
|
| 484 |
-
y=
|
| 485 |
marker_color="#FFA15A",
|
| 486 |
hovertemplate="Step %{x}<br>Cache Creation: %{y:.2f}K<extra></extra>",
|
| 487 |
))
|
|
@@ -489,7 +502,7 @@ def create_single_trajectory_chart(steps: list[dict]):
|
|
| 489 |
fig.add_trace(go.Bar(
|
| 490 |
name="Completion",
|
| 491 |
x=x_labels,
|
| 492 |
-
y=
|
| 493 |
marker_color="#AB63FA",
|
| 494 |
hovertemplate="Step %{x}<br>Completion: %{y:.2f}K<extra></extra>",
|
| 495 |
))
|
|
@@ -505,7 +518,7 @@ def create_single_trajectory_chart(steps: list[dict]):
|
|
| 505 |
return fig
|
| 506 |
|
| 507 |
|
| 508 |
-
def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, cache_read_price: float, cache_creation_price: float, completion_price: float):
|
| 509 |
"""Create stacked bar chart for a single trajectory showing cost per step."""
|
| 510 |
import plotly.graph_objects as go
|
| 511 |
|
|
@@ -515,16 +528,24 @@ def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, c
|
|
| 515 |
per_step_data = calculate_per_step_tokens(steps)
|
| 516 |
|
| 517 |
x_labels = [f"Step {d['step']}" for d in per_step_data]
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
|
| 524 |
uncached_cost = [u * input_price / 1e6 for u in uncached]
|
| 525 |
cache_read_cost = [cr * cache_read_price / 1e6 for cr in cache_read]
|
| 526 |
cache_creation_cost = [cc * cache_creation_price / 1e6 for cc in cache_creation]
|
| 527 |
-
completion_cost = [c * completion_price / 1e6 for c in
|
| 528 |
|
| 529 |
fig = go.Figure()
|
| 530 |
|
|
@@ -1818,7 +1839,7 @@ def build_app():
|
|
| 1818 |
""")
|
| 1819 |
trajectories_state = gr.State(None)
|
| 1820 |
|
| 1821 |
-
gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.
|
| 1822 |
gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
|
| 1823 |
|
| 1824 |
with gr.Row():
|
|
@@ -2860,15 +2881,15 @@ def build_app():
|
|
| 2860 |
gr.update(),
|
| 2861 |
)
|
| 2862 |
|
| 2863 |
-
def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
|
| 2864 |
if state_data is None or not issue_id:
|
| 2865 |
return None, None
|
| 2866 |
trajectory_steps = state_data.get("steps", {})
|
| 2867 |
if issue_id not in trajectory_steps:
|
| 2868 |
return None, None
|
| 2869 |
steps = trajectory_steps[issue_id]
|
| 2870 |
-
tokens_chart = create_single_trajectory_chart(steps)
|
| 2871 |
-
cost_chart = create_single_trajectory_cost_chart(steps, input_price, cache_read_price, cache_creation_price, completion_price)
|
| 2872 |
return tokens_chart, cost_chart
|
| 2873 |
|
| 2874 |
def on_single_traj_meta_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
|
|
@@ -2904,7 +2925,7 @@ def build_app():
|
|
| 2904 |
],
|
| 2905 |
).then(
|
| 2906 |
fn=on_single_traj_select,
|
| 2907 |
-
inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion],
|
| 2908 |
outputs=[single_traj_plot, single_traj_cost_plot],
|
| 2909 |
).then(
|
| 2910 |
fn=on_single_traj_meta_select,
|
|
@@ -2969,7 +2990,7 @@ def build_app():
|
|
| 2969 |
|
| 2970 |
single_traj_dropdown.change(
|
| 2971 |
fn=on_single_traj_select,
|
| 2972 |
-
inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion],
|
| 2973 |
outputs=[single_traj_plot, single_traj_cost_plot],
|
| 2974 |
)
|
| 2975 |
|
|
@@ -2979,16 +3000,27 @@ def build_app():
|
|
| 2979 |
outputs=[single_traj_meta_plot, single_traj_meta_cost_plot],
|
| 2980 |
)
|
| 2981 |
|
|
|
|
|
|
|
|
|
|
| 2982 |
thinking_overhead.change(
|
| 2983 |
fn=on_calc_options_change,
|
| 2984 |
inputs=calc_options_inputs,
|
| 2985 |
outputs=calc_options_outputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2986 |
)
|
| 2987 |
|
| 2988 |
use_cache.change(
|
| 2989 |
fn=on_calc_options_change,
|
| 2990 |
inputs=calc_options_inputs,
|
| 2991 |
outputs=calc_options_outputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2992 |
)
|
| 2993 |
|
| 2994 |
return app
|
|
|
|
| 444 |
return fig
|
| 445 |
|
| 446 |
|
| 447 |
+
def create_single_trajectory_chart(steps: list[dict], overhead: float = 1.0, with_cache: bool = True):
|
| 448 |
"""Create stacked bar chart for a single trajectory showing tokens per step."""
|
| 449 |
import plotly.graph_objects as go
|
| 450 |
|
|
|
|
| 454 |
per_step_data = calculate_per_step_tokens(steps)
|
| 455 |
|
| 456 |
x_labels = [f"Step {d['step']}" for d in per_step_data]
|
| 457 |
+
cache_read_raw = [d["cache_read"] * overhead for d in per_step_data]
|
| 458 |
+
cache_creation_raw = [d["cache_creation"] * overhead for d in per_step_data]
|
| 459 |
+
completion_raw = [d["completion"] * overhead for d in per_step_data]
|
| 460 |
+
prompt_tokens_raw = [(d["cache_read"] + d["uncached_input"]) * overhead for d in per_step_data]
|
| 461 |
+
|
| 462 |
+
if with_cache:
|
| 463 |
+
uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens_raw, cache_read_raw, cache_creation_raw)]
|
| 464 |
+
cache_read = cache_read_raw
|
| 465 |
+
cache_creation = cache_creation_raw
|
| 466 |
+
else:
|
| 467 |
+
uncached = prompt_tokens_raw
|
| 468 |
+
cache_read = [0] * len(per_step_data)
|
| 469 |
+
cache_creation = [0] * len(per_step_data)
|
| 470 |
+
|
| 471 |
+
uncached_k = [u / 1e3 for u in uncached]
|
| 472 |
+
cache_read_k = [cr / 1e3 for cr in cache_read]
|
| 473 |
+
cache_creation_k = [cc / 1e3 for cc in cache_creation]
|
| 474 |
+
completion_k = [c / 1e3 for c in completion_raw]
|
| 475 |
|
| 476 |
fig = go.Figure()
|
| 477 |
|
| 478 |
fig.add_trace(go.Bar(
|
| 479 |
name="Uncached Input",
|
| 480 |
x=x_labels,
|
| 481 |
+
y=uncached_k,
|
| 482 |
marker_color="#EF553B",
|
| 483 |
hovertemplate="Step %{x}<br>Uncached Input: %{y:.2f}K<extra></extra>",
|
| 484 |
))
|
|
|
|
| 486 |
fig.add_trace(go.Bar(
|
| 487 |
name="Cache Read",
|
| 488 |
x=x_labels,
|
| 489 |
+
y=cache_read_k,
|
| 490 |
marker_color="#19D3F3",
|
| 491 |
hovertemplate="Step %{x}<br>Cache Read: %{y:.2f}K<extra></extra>",
|
| 492 |
))
|
|
|
|
| 494 |
fig.add_trace(go.Bar(
|
| 495 |
name="Cache Creation",
|
| 496 |
x=x_labels,
|
| 497 |
+
y=cache_creation_k,
|
| 498 |
marker_color="#FFA15A",
|
| 499 |
hovertemplate="Step %{x}<br>Cache Creation: %{y:.2f}K<extra></extra>",
|
| 500 |
))
|
|
|
|
| 502 |
fig.add_trace(go.Bar(
|
| 503 |
name="Completion",
|
| 504 |
x=x_labels,
|
| 505 |
+
y=completion_k,
|
| 506 |
marker_color="#AB63FA",
|
| 507 |
hovertemplate="Step %{x}<br>Completion: %{y:.2f}K<extra></extra>",
|
| 508 |
))
|
|
|
|
| 518 |
return fig
|
| 519 |
|
| 520 |
|
| 521 |
+
def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, cache_read_price: float, cache_creation_price: float, completion_price: float, overhead: float = 1.0, with_cache: bool = True):
|
| 522 |
"""Create stacked bar chart for a single trajectory showing cost per step."""
|
| 523 |
import plotly.graph_objects as go
|
| 524 |
|
|
|
|
| 528 |
per_step_data = calculate_per_step_tokens(steps)
|
| 529 |
|
| 530 |
x_labels = [f"Step {d['step']}" for d in per_step_data]
|
| 531 |
+
cache_read_raw = [d["cache_read"] * overhead for d in per_step_data]
|
| 532 |
+
cache_creation_raw = [d["cache_creation"] * overhead for d in per_step_data]
|
| 533 |
+
completion_raw = [d["completion"] * overhead for d in per_step_data]
|
| 534 |
+
prompt_tokens_raw = [(d["cache_read"] + d["uncached_input"]) * overhead for d in per_step_data]
|
| 535 |
+
|
| 536 |
+
if with_cache:
|
| 537 |
+
uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens_raw, cache_read_raw, cache_creation_raw)]
|
| 538 |
+
cache_read = cache_read_raw
|
| 539 |
+
cache_creation = cache_creation_raw
|
| 540 |
+
else:
|
| 541 |
+
uncached = prompt_tokens_raw
|
| 542 |
+
cache_read = [0] * len(per_step_data)
|
| 543 |
+
cache_creation = [0] * len(per_step_data)
|
| 544 |
|
| 545 |
uncached_cost = [u * input_price / 1e6 for u in uncached]
|
| 546 |
cache_read_cost = [cr * cache_read_price / 1e6 for cr in cache_read]
|
| 547 |
cache_creation_cost = [cc * cache_creation_price / 1e6 for cc in cache_creation]
|
| 548 |
+
completion_cost = [c * completion_price / 1e6 for c in completion_raw]
|
| 549 |
|
| 550 |
fig = go.Figure()
|
| 551 |
|
|
|
|
| 1839 |
""")
|
| 1840 |
trajectories_state = gr.State(None)
|
| 1841 |
|
| 1842 |
+
gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.38`")
|
| 1843 |
gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
|
| 1844 |
|
| 1845 |
with gr.Row():
|
|
|
|
| 2881 |
gr.update(),
|
| 2882 |
)
|
| 2883 |
|
| 2884 |
+
def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
|
| 2885 |
if state_data is None or not issue_id:
|
| 2886 |
return None, None
|
| 2887 |
trajectory_steps = state_data.get("steps", {})
|
| 2888 |
if issue_id not in trajectory_steps:
|
| 2889 |
return None, None
|
| 2890 |
steps = trajectory_steps[issue_id]
|
| 2891 |
+
tokens_chart = create_single_trajectory_chart(steps, overhead, with_cache)
|
| 2892 |
+
cost_chart = create_single_trajectory_cost_chart(steps, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache)
|
| 2893 |
return tokens_chart, cost_chart
|
| 2894 |
|
| 2895 |
def on_single_traj_meta_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
|
|
|
|
| 2925 |
],
|
| 2926 |
).then(
|
| 2927 |
fn=on_single_traj_select,
|
| 2928 |
+
inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
|
| 2929 |
outputs=[single_traj_plot, single_traj_cost_plot],
|
| 2930 |
).then(
|
| 2931 |
fn=on_single_traj_meta_select,
|
|
|
|
| 2990 |
|
| 2991 |
single_traj_dropdown.change(
|
| 2992 |
fn=on_single_traj_select,
|
| 2993 |
+
inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
|
| 2994 |
outputs=[single_traj_plot, single_traj_cost_plot],
|
| 2995 |
)
|
| 2996 |
|
|
|
|
| 3000 |
outputs=[single_traj_meta_plot, single_traj_meta_cost_plot],
|
| 3001 |
)
|
| 3002 |
|
| 3003 |
+
single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
|
| 3004 |
+
single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
|
| 3005 |
+
|
| 3006 |
thinking_overhead.change(
|
| 3007 |
fn=on_calc_options_change,
|
| 3008 |
inputs=calc_options_inputs,
|
| 3009 |
outputs=calc_options_outputs,
|
| 3010 |
+
).then(
|
| 3011 |
+
fn=on_single_traj_select,
|
| 3012 |
+
inputs=single_traj_inputs,
|
| 3013 |
+
outputs=single_traj_outputs,
|
| 3014 |
)
|
| 3015 |
|
| 3016 |
use_cache.change(
|
| 3017 |
fn=on_calc_options_change,
|
| 3018 |
inputs=calc_options_inputs,
|
| 3019 |
outputs=calc_options_outputs,
|
| 3020 |
+
).then(
|
| 3021 |
+
fn=on_single_traj_select,
|
| 3022 |
+
inputs=single_traj_inputs,
|
| 3023 |
+
outputs=single_traj_outputs,
|
| 3024 |
)
|
| 3025 |
|
| 3026 |
return app
|