IgorSlinko commited on
Commit
f734731
1 Parent(s): b8e0bce

Fix routing charts: remove uncached_input, add Use Cache support (v0.3.43)

Browse files

- Apply same uncached formula to routing charts (uncached_input = 0 with cache)
- Recalculate routing charts when Use Cache or Tokenizer Overhead changes

Files changed (1) hide show
  1. app.py +55 -5
app.py CHANGED
@@ -1839,7 +1839,7 @@ def build_app():
1839
  """)
1840
  trajectories_state = gr.State(None)
1841
 
1842
- gr.Markdown("# 馃М SWE-bench analytics tool `v0.3.42`")
1843
  gr.Markdown("### *Calculate cost savings with different routing strategies.*")
1844
  gr.Markdown("## 馃幆 Select a base model for cost analysis (click a row)")
1845
 
@@ -2638,8 +2638,32 @@ def build_app():
2638
  result_lines.append(f'| **Savings** | <span style="color: {savings_color}; font-weight: bold;">${savings:.2f} 路 {savings_pct:.1f}%</span> |')
2639
  result_text = "\n".join(result_lines)
2640
 
2641
- additional_token_models = [(rc["name"], rc["tokens"]) for rc in routing_costs_list]
2642
- additional_cost_models = [(rc["name"], rc["costs"]) for rc in routing_costs_list]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2643
 
2644
  if df_calc is not None and not df_calc.empty:
2645
  df_temp = df_for_cost.copy()
@@ -2651,12 +2675,12 @@ def build_app():
2651
  "completion": df_for_cost["completion_tokens"].sum(),
2652
  }
2653
  else:
2654
- original_tokens_from_df = total_original_tokens
2655
 
2656
  original_costs = tokens_to_costs(original_tokens_from_df, base_prices)
2657
 
2658
  base_model_name = detected_model_val or "Base"
2659
- tokens_chart = create_routed_token_chart(original_tokens_from_df, total_base_tokens, additional_token_models, base_model_name)
2660
  cost_chart = create_routed_cost_chart(original_costs, base_costs, additional_cost_models, base_model_name)
2661
 
2662
  yield (
@@ -3013,6 +3037,24 @@ def build_app():
3013
  single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
3014
  single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
3015
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3016
  thinking_overhead.change(
3017
  fn=on_calc_options_change,
3018
  inputs=calc_options_inputs,
@@ -3021,6 +3063,10 @@ def build_app():
3021
  fn=on_single_traj_select,
3022
  inputs=single_traj_inputs,
3023
  outputs=single_traj_outputs,
 
 
 
 
3024
  )
3025
 
3026
  use_cache.change(
@@ -3031,6 +3077,10 @@ def build_app():
3031
  fn=on_single_traj_select,
3032
  inputs=single_traj_inputs,
3033
  outputs=single_traj_outputs,
 
 
 
 
3034
  )
3035
 
3036
  return app
 
1839
  """)
1840
  trajectories_state = gr.State(None)
1841
 
1842
+ gr.Markdown("# 馃М SWE-bench analytics tool `v0.3.43`")
1843
  gr.Markdown("### *Calculate cost savings with different routing strategies.*")
1844
  gr.Markdown("## 馃幆 Select a base model for cost analysis (click a row)")
1845
 
 
2638
  result_lines.append(f'| **Savings** | <span style="color: {savings_color}; font-weight: bold;">${savings:.2f} 路 {savings_pct:.1f}%</span> |')
2639
  result_text = "\n".join(result_lines)
2640
 
2641
+ def apply_display_formula(tokens: dict) -> dict:
2642
+ prompt = tokens["cache_read"] + tokens["uncached_input"]
2643
+ if with_cache:
2644
+ uncached_display = max(0, prompt - tokens["cache_read"] - tokens["cache_creation"])
2645
+ return {
2646
+ "uncached_input": uncached_display,
2647
+ "cache_read": tokens["cache_read"],
2648
+ "cache_creation": tokens["cache_creation"],
2649
+ "completion": tokens["completion"],
2650
+ }
2651
+ else:
2652
+ return {
2653
+ "uncached_input": prompt,
2654
+ "cache_read": 0,
2655
+ "cache_creation": 0,
2656
+ "completion": tokens["completion"],
2657
+ }
2658
+
2659
+ total_base_tokens_display = apply_display_formula(total_base_tokens)
2660
+ base_costs = tokens_to_costs(total_base_tokens_display, base_prices)
2661
+
2662
+ additional_token_models = [(rc["name"], apply_display_formula(rc["tokens"])) for rc in routing_costs_list]
2663
+ additional_cost_models = []
2664
+ for i, rc in enumerate(routing_costs_list):
2665
+ model_prices = routing_models[i]["prices"]
2666
+ additional_cost_models.append((rc["name"], tokens_to_costs(apply_display_formula(rc["tokens"]), model_prices)))
2667
 
2668
  if df_calc is not None and not df_calc.empty:
2669
  df_temp = df_for_cost.copy()
 
2675
  "completion": df_for_cost["completion_tokens"].sum(),
2676
  }
2677
  else:
2678
+ original_tokens_from_df = apply_display_formula(total_original_tokens)
2679
 
2680
  original_costs = tokens_to_costs(original_tokens_from_df, base_prices)
2681
 
2682
  base_model_name = detected_model_val or "Base"
2683
+ tokens_chart = create_routed_token_chart(original_tokens_from_df, total_base_tokens_display, additional_token_models, base_model_name)
2684
  cost_chart = create_routed_cost_chart(original_costs, base_costs, additional_cost_models, base_model_name)
2685
 
2686
  yield (
 
3037
  single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
3038
  single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
3039
 
3040
+ routing_inputs = [
3041
+ trajectories_state,
3042
+ price_input, price_cache_read, price_cache_creation, price_completion,
3043
+ routing_model_1, routing_price_1_input, routing_price_1_cache_read, routing_price_1_cache_creation, routing_price_1_completion,
3044
+ routing_model_2, routing_price_2_input, routing_price_2_cache_read, routing_price_2_cache_creation, routing_price_2_completion,
3045
+ routing_model_3, routing_price_3_input, routing_price_3_cache_read, routing_price_3_cache_creation, routing_price_3_completion,
3046
+ selected_strategy,
3047
+ weight_base, weight_model_1, weight_model_2, weight_model_3,
3048
+ k_model_1, k_model_2, k_model_3,
3049
+ slice_model_1, slice_model_2, slice_model_3,
3050
+ grep_model_1, grep_model_2, grep_model_3,
3051
+ resolved_model, unresolved_model,
3052
+ part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
3053
+ thinking_overhead, use_cache,
3054
+ detected_model,
3055
+ ]
3056
+ routing_outputs = [routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot]
3057
+
3058
  thinking_overhead.change(
3059
  fn=on_calc_options_change,
3060
  inputs=calc_options_inputs,
 
3063
  fn=on_single_traj_select,
3064
  inputs=single_traj_inputs,
3065
  outputs=single_traj_outputs,
3066
+ ).then(
3067
+ fn=run_routing,
3068
+ inputs=routing_inputs,
3069
+ outputs=routing_outputs,
3070
  )
3071
 
3072
  use_cache.change(
 
3077
  fn=on_single_traj_select,
3078
  inputs=single_traj_inputs,
3079
  outputs=single_traj_outputs,
3080
+ ).then(
3081
+ fn=run_routing,
3082
+ inputs=routing_inputs,
3083
+ outputs=routing_outputs,
3084
  )
3085
 
3086
  return app