IgorSlinko commited on
Commit
403adb5
·
1 Parent(s): 10a0b43

Add Use Cache and Tokenizer Overhead support for single trajectory charts (v0.3.38)

Browse files
Files changed (1) hide show
  1. app.py +55 -23
app.py CHANGED
@@ -444,7 +444,7 @@ def create_single_trajectory_meta_cost_chart(steps: list[dict], input_price: flo
444
  return fig
445
 
446
 
447
- def create_single_trajectory_chart(steps: list[dict]):
448
  """Create stacked bar chart for a single trajectory showing tokens per step."""
449
  import plotly.graph_objects as go
450
 
@@ -454,18 +454,31 @@ def create_single_trajectory_chart(steps: list[dict]):
454
  per_step_data = calculate_per_step_tokens(steps)
455
 
456
  x_labels = [f"Step {d['step']}" for d in per_step_data]
457
- cache_read = [d["cache_read"] / 1e3 for d in per_step_data]
458
- cache_creation = [d["cache_creation"] / 1e3 for d in per_step_data]
459
- completion = [d["completion"] / 1e3 for d in per_step_data]
460
- prompt_tokens = [(d["cache_read"] + d["uncached_input"]) / 1e3 for d in per_step_data]
461
- uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens, cache_read, cache_creation)]
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  fig = go.Figure()
464
 
465
  fig.add_trace(go.Bar(
466
  name="Uncached Input",
467
  x=x_labels,
468
- y=uncached,
469
  marker_color="#EF553B",
470
  hovertemplate="Step %{x}<br>Uncached Input: %{y:.2f}K<extra></extra>",
471
  ))
@@ -473,7 +486,7 @@ def create_single_trajectory_chart(steps: list[dict]):
473
  fig.add_trace(go.Bar(
474
  name="Cache Read",
475
  x=x_labels,
476
- y=cache_read,
477
  marker_color="#19D3F3",
478
  hovertemplate="Step %{x}<br>Cache Read: %{y:.2f}K<extra></extra>",
479
  ))
@@ -481,7 +494,7 @@ def create_single_trajectory_chart(steps: list[dict]):
481
  fig.add_trace(go.Bar(
482
  name="Cache Creation",
483
  x=x_labels,
484
- y=cache_creation,
485
  marker_color="#FFA15A",
486
  hovertemplate="Step %{x}<br>Cache Creation: %{y:.2f}K<extra></extra>",
487
  ))
@@ -489,7 +502,7 @@ def create_single_trajectory_chart(steps: list[dict]):
489
  fig.add_trace(go.Bar(
490
  name="Completion",
491
  x=x_labels,
492
- y=completion,
493
  marker_color="#AB63FA",
494
  hovertemplate="Step %{x}<br>Completion: %{y:.2f}K<extra></extra>",
495
  ))
@@ -505,7 +518,7 @@ def create_single_trajectory_chart(steps: list[dict]):
505
  return fig
506
 
507
 
508
- def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, cache_read_price: float, cache_creation_price: float, completion_price: float):
509
  """Create stacked bar chart for a single trajectory showing cost per step."""
510
  import plotly.graph_objects as go
511
 
@@ -515,16 +528,24 @@ def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, c
515
  per_step_data = calculate_per_step_tokens(steps)
516
 
517
  x_labels = [f"Step {d['step']}" for d in per_step_data]
518
- cache_read = [d["cache_read"] for d in per_step_data]
519
- cache_creation = [d["cache_creation"] for d in per_step_data]
520
- completion = [d["completion"] for d in per_step_data]
521
- prompt_tokens = [d["cache_read"] + d["uncached_input"] for d in per_step_data]
522
- uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens, cache_read, cache_creation)]
 
 
 
 
 
 
 
 
523
 
524
  uncached_cost = [u * input_price / 1e6 for u in uncached]
525
  cache_read_cost = [cr * cache_read_price / 1e6 for cr in cache_read]
526
  cache_creation_cost = [cc * cache_creation_price / 1e6 for cc in cache_creation]
527
- completion_cost = [c * completion_price / 1e6 for c in completion]
528
 
529
  fig = go.Figure()
530
 
@@ -1818,7 +1839,7 @@ def build_app():
1818
  """)
1819
  trajectories_state = gr.State(None)
1820
 
1821
- gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.37`")
1822
  gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
1823
 
1824
  with gr.Row():
@@ -2860,15 +2881,15 @@ def build_app():
2860
  gr.update(),
2861
  )
2862
 
2863
- def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
2864
  if state_data is None or not issue_id:
2865
  return None, None
2866
  trajectory_steps = state_data.get("steps", {})
2867
  if issue_id not in trajectory_steps:
2868
  return None, None
2869
  steps = trajectory_steps[issue_id]
2870
- tokens_chart = create_single_trajectory_chart(steps)
2871
- cost_chart = create_single_trajectory_cost_chart(steps, input_price, cache_read_price, cache_creation_price, completion_price)
2872
  return tokens_chart, cost_chart
2873
 
2874
  def on_single_traj_meta_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
@@ -2904,7 +2925,7 @@ def build_app():
2904
  ],
2905
  ).then(
2906
  fn=on_single_traj_select,
2907
- inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion],
2908
  outputs=[single_traj_plot, single_traj_cost_plot],
2909
  ).then(
2910
  fn=on_single_traj_meta_select,
@@ -2969,7 +2990,7 @@ def build_app():
2969
 
2970
  single_traj_dropdown.change(
2971
  fn=on_single_traj_select,
2972
- inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion],
2973
  outputs=[single_traj_plot, single_traj_cost_plot],
2974
  )
2975
 
@@ -2979,16 +3000,27 @@ def build_app():
2979
  outputs=[single_traj_meta_plot, single_traj_meta_cost_plot],
2980
  )
2981
 
 
 
 
2982
  thinking_overhead.change(
2983
  fn=on_calc_options_change,
2984
  inputs=calc_options_inputs,
2985
  outputs=calc_options_outputs,
 
 
 
 
2986
  )
2987
 
2988
  use_cache.change(
2989
  fn=on_calc_options_change,
2990
  inputs=calc_options_inputs,
2991
  outputs=calc_options_outputs,
 
 
 
 
2992
  )
2993
 
2994
  return app
 
444
  return fig
445
 
446
 
447
+ def create_single_trajectory_chart(steps: list[dict], overhead: float = 1.0, with_cache: bool = True):
448
  """Create stacked bar chart for a single trajectory showing tokens per step."""
449
  import plotly.graph_objects as go
450
 
 
454
  per_step_data = calculate_per_step_tokens(steps)
455
 
456
  x_labels = [f"Step {d['step']}" for d in per_step_data]
457
+ cache_read_raw = [d["cache_read"] * overhead for d in per_step_data]
458
+ cache_creation_raw = [d["cache_creation"] * overhead for d in per_step_data]
459
+ completion_raw = [d["completion"] * overhead for d in per_step_data]
460
+ prompt_tokens_raw = [(d["cache_read"] + d["uncached_input"]) * overhead for d in per_step_data]
461
+
462
+ if with_cache:
463
+ uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens_raw, cache_read_raw, cache_creation_raw)]
464
+ cache_read = cache_read_raw
465
+ cache_creation = cache_creation_raw
466
+ else:
467
+ uncached = prompt_tokens_raw
468
+ cache_read = [0] * len(per_step_data)
469
+ cache_creation = [0] * len(per_step_data)
470
+
471
+ uncached_k = [u / 1e3 for u in uncached]
472
+ cache_read_k = [cr / 1e3 for cr in cache_read]
473
+ cache_creation_k = [cc / 1e3 for cc in cache_creation]
474
+ completion_k = [c / 1e3 for c in completion_raw]
475
 
476
  fig = go.Figure()
477
 
478
  fig.add_trace(go.Bar(
479
  name="Uncached Input",
480
  x=x_labels,
481
+ y=uncached_k,
482
  marker_color="#EF553B",
483
  hovertemplate="Step %{x}<br>Uncached Input: %{y:.2f}K<extra></extra>",
484
  ))
 
486
  fig.add_trace(go.Bar(
487
  name="Cache Read",
488
  x=x_labels,
489
+ y=cache_read_k,
490
  marker_color="#19D3F3",
491
  hovertemplate="Step %{x}<br>Cache Read: %{y:.2f}K<extra></extra>",
492
  ))
 
494
  fig.add_trace(go.Bar(
495
  name="Cache Creation",
496
  x=x_labels,
497
+ y=cache_creation_k,
498
  marker_color="#FFA15A",
499
  hovertemplate="Step %{x}<br>Cache Creation: %{y:.2f}K<extra></extra>",
500
  ))
 
502
  fig.add_trace(go.Bar(
503
  name="Completion",
504
  x=x_labels,
505
+ y=completion_k,
506
  marker_color="#AB63FA",
507
  hovertemplate="Step %{x}<br>Completion: %{y:.2f}K<extra></extra>",
508
  ))
 
518
  return fig
519
 
520
 
521
+ def create_single_trajectory_cost_chart(steps: list[dict], input_price: float, cache_read_price: float, cache_creation_price: float, completion_price: float, overhead: float = 1.0, with_cache: bool = True):
522
  """Create stacked bar chart for a single trajectory showing cost per step."""
523
  import plotly.graph_objects as go
524
 
 
528
  per_step_data = calculate_per_step_tokens(steps)
529
 
530
  x_labels = [f"Step {d['step']}" for d in per_step_data]
531
+ cache_read_raw = [d["cache_read"] * overhead for d in per_step_data]
532
+ cache_creation_raw = [d["cache_creation"] * overhead for d in per_step_data]
533
+ completion_raw = [d["completion"] * overhead for d in per_step_data]
534
+ prompt_tokens_raw = [(d["cache_read"] + d["uncached_input"]) * overhead for d in per_step_data]
535
+
536
+ if with_cache:
537
+ uncached = [max(0, p - cr - cc) for p, cr, cc in zip(prompt_tokens_raw, cache_read_raw, cache_creation_raw)]
538
+ cache_read = cache_read_raw
539
+ cache_creation = cache_creation_raw
540
+ else:
541
+ uncached = prompt_tokens_raw
542
+ cache_read = [0] * len(per_step_data)
543
+ cache_creation = [0] * len(per_step_data)
544
 
545
  uncached_cost = [u * input_price / 1e6 for u in uncached]
546
  cache_read_cost = [cr * cache_read_price / 1e6 for cr in cache_read]
547
  cache_creation_cost = [cc * cache_creation_price / 1e6 for cc in cache_creation]
548
+ completion_cost = [c * completion_price / 1e6 for c in completion_raw]
549
 
550
  fig = go.Figure()
551
 
 
1839
  """)
1840
  trajectories_state = gr.State(None)
1841
 
1842
+ gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.38`")
1843
  gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
1844
 
1845
  with gr.Row():
 
2881
  gr.update(),
2882
  )
2883
 
2884
+ def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
2885
  if state_data is None or not issue_id:
2886
  return None, None
2887
  trajectory_steps = state_data.get("steps", {})
2888
  if issue_id not in trajectory_steps:
2889
  return None, None
2890
  steps = trajectory_steps[issue_id]
2891
+ tokens_chart = create_single_trajectory_chart(steps, overhead, with_cache)
2892
+ cost_chart = create_single_trajectory_cost_chart(steps, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache)
2893
  return tokens_chart, cost_chart
2894
 
2895
  def on_single_traj_meta_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price):
 
2925
  ],
2926
  ).then(
2927
  fn=on_single_traj_select,
2928
+ inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
2929
  outputs=[single_traj_plot, single_traj_cost_plot],
2930
  ).then(
2931
  fn=on_single_traj_meta_select,
 
2990
 
2991
  single_traj_dropdown.change(
2992
  fn=on_single_traj_select,
2993
+ inputs=[trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
2994
  outputs=[single_traj_plot, single_traj_cost_plot],
2995
  )
2996
 
 
3000
  outputs=[single_traj_meta_plot, single_traj_meta_cost_plot],
3001
  )
3002
 
3003
+ single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
3004
+ single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
3005
+
3006
  thinking_overhead.change(
3007
  fn=on_calc_options_change,
3008
  inputs=calc_options_inputs,
3009
  outputs=calc_options_outputs,
3010
+ ).then(
3011
+ fn=on_single_traj_select,
3012
+ inputs=single_traj_inputs,
3013
+ outputs=single_traj_outputs,
3014
  )
3015
 
3016
  use_cache.change(
3017
  fn=on_calc_options_change,
3018
  inputs=calc_options_inputs,
3019
  outputs=calc_options_outputs,
3020
+ ).then(
3021
+ fn=on_single_traj_select,
3022
+ inputs=single_traj_inputs,
3023
+ outputs=single_traj_outputs,
3024
  )
3025
 
3026
  return app