Commit
路
f734731
1
Parent(s):
b8e0bce
Fix routing charts: remove uncached_input, add Use Cache support (v0.3.43)
Browse files- Apply same uncached formula to routing charts (uncached_input = 0 with cache)
- Recalculate routing charts when Use Cache or Tokenizer Overhead changes
app.py
CHANGED
|
@@ -1839,7 +1839,7 @@ def build_app():
|
|
| 1839 |
""")
|
| 1840 |
trajectories_state = gr.State(None)
|
| 1841 |
|
| 1842 |
-
gr.Markdown("# 馃М SWE-bench analytics tool `v0.3.
|
| 1843 |
gr.Markdown("### *Calculate cost savings with different routing strategies.*")
|
| 1844 |
gr.Markdown("## 馃幆 Select a base model for cost analysis (click a row)")
|
| 1845 |
|
|
@@ -2638,8 +2638,32 @@ def build_app():
|
|
| 2638 |
result_lines.append(f'| **Savings** | <span style="color: {savings_color}; font-weight: bold;">${savings:.2f} 路 {savings_pct:.1f}%</span> |')
|
| 2639 |
result_text = "\n".join(result_lines)
|
| 2640 |
|
| 2641 |
-
|
| 2642 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2643 |
|
| 2644 |
if df_calc is not None and not df_calc.empty:
|
| 2645 |
df_temp = df_for_cost.copy()
|
|
@@ -2651,12 +2675,12 @@ def build_app():
|
|
| 2651 |
"completion": df_for_cost["completion_tokens"].sum(),
|
| 2652 |
}
|
| 2653 |
else:
|
| 2654 |
-
original_tokens_from_df = total_original_tokens
|
| 2655 |
|
| 2656 |
original_costs = tokens_to_costs(original_tokens_from_df, base_prices)
|
| 2657 |
|
| 2658 |
base_model_name = detected_model_val or "Base"
|
| 2659 |
-
tokens_chart = create_routed_token_chart(original_tokens_from_df,
|
| 2660 |
cost_chart = create_routed_cost_chart(original_costs, base_costs, additional_cost_models, base_model_name)
|
| 2661 |
|
| 2662 |
yield (
|
|
@@ -3013,6 +3037,24 @@ def build_app():
|
|
| 3013 |
single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
|
| 3014 |
single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
|
| 3015 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3016 |
thinking_overhead.change(
|
| 3017 |
fn=on_calc_options_change,
|
| 3018 |
inputs=calc_options_inputs,
|
|
@@ -3021,6 +3063,10 @@ def build_app():
|
|
| 3021 |
fn=on_single_traj_select,
|
| 3022 |
inputs=single_traj_inputs,
|
| 3023 |
outputs=single_traj_outputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3024 |
)
|
| 3025 |
|
| 3026 |
use_cache.change(
|
|
@@ -3031,6 +3077,10 @@ def build_app():
|
|
| 3031 |
fn=on_single_traj_select,
|
| 3032 |
inputs=single_traj_inputs,
|
| 3033 |
outputs=single_traj_outputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3034 |
)
|
| 3035 |
|
| 3036 |
return app
|
|
|
|
| 1839 |
""")
|
| 1840 |
trajectories_state = gr.State(None)
|
| 1841 |
|
| 1842 |
+
gr.Markdown("# 馃М SWE-bench analytics tool `v0.3.43`")
|
| 1843 |
gr.Markdown("### *Calculate cost savings with different routing strategies.*")
|
| 1844 |
gr.Markdown("## 馃幆 Select a base model for cost analysis (click a row)")
|
| 1845 |
|
|
|
|
| 2638 |
result_lines.append(f'| **Savings** | <span style="color: {savings_color}; font-weight: bold;">${savings:.2f} 路 {savings_pct:.1f}%</span> |')
|
| 2639 |
result_text = "\n".join(result_lines)
|
| 2640 |
|
| 2641 |
+
def apply_display_formula(tokens: dict) -> dict:
|
| 2642 |
+
prompt = tokens["cache_read"] + tokens["uncached_input"]
|
| 2643 |
+
if with_cache:
|
| 2644 |
+
uncached_display = max(0, prompt - tokens["cache_read"] - tokens["cache_creation"])
|
| 2645 |
+
return {
|
| 2646 |
+
"uncached_input": uncached_display,
|
| 2647 |
+
"cache_read": tokens["cache_read"],
|
| 2648 |
+
"cache_creation": tokens["cache_creation"],
|
| 2649 |
+
"completion": tokens["completion"],
|
| 2650 |
+
}
|
| 2651 |
+
else:
|
| 2652 |
+
return {
|
| 2653 |
+
"uncached_input": prompt,
|
| 2654 |
+
"cache_read": 0,
|
| 2655 |
+
"cache_creation": 0,
|
| 2656 |
+
"completion": tokens["completion"],
|
| 2657 |
+
}
|
| 2658 |
+
|
| 2659 |
+
total_base_tokens_display = apply_display_formula(total_base_tokens)
|
| 2660 |
+
base_costs = tokens_to_costs(total_base_tokens_display, base_prices)
|
| 2661 |
+
|
| 2662 |
+
additional_token_models = [(rc["name"], apply_display_formula(rc["tokens"])) for rc in routing_costs_list]
|
| 2663 |
+
additional_cost_models = []
|
| 2664 |
+
for i, rc in enumerate(routing_costs_list):
|
| 2665 |
+
model_prices = routing_models[i]["prices"]
|
| 2666 |
+
additional_cost_models.append((rc["name"], tokens_to_costs(apply_display_formula(rc["tokens"]), model_prices)))
|
| 2667 |
|
| 2668 |
if df_calc is not None and not df_calc.empty:
|
| 2669 |
df_temp = df_for_cost.copy()
|
|
|
|
| 2675 |
"completion": df_for_cost["completion_tokens"].sum(),
|
| 2676 |
}
|
| 2677 |
else:
|
| 2678 |
+
original_tokens_from_df = apply_display_formula(total_original_tokens)
|
| 2679 |
|
| 2680 |
original_costs = tokens_to_costs(original_tokens_from_df, base_prices)
|
| 2681 |
|
| 2682 |
base_model_name = detected_model_val or "Base"
|
| 2683 |
+
tokens_chart = create_routed_token_chart(original_tokens_from_df, total_base_tokens_display, additional_token_models, base_model_name)
|
| 2684 |
cost_chart = create_routed_cost_chart(original_costs, base_costs, additional_cost_models, base_model_name)
|
| 2685 |
|
| 2686 |
yield (
|
|
|
|
| 3037 |
single_traj_inputs = [trajectories_state, single_traj_dropdown, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
|
| 3038 |
single_traj_outputs = [single_traj_plot, single_traj_cost_plot]
|
| 3039 |
|
| 3040 |
+
routing_inputs = [
|
| 3041 |
+
trajectories_state,
|
| 3042 |
+
price_input, price_cache_read, price_cache_creation, price_completion,
|
| 3043 |
+
routing_model_1, routing_price_1_input, routing_price_1_cache_read, routing_price_1_cache_creation, routing_price_1_completion,
|
| 3044 |
+
routing_model_2, routing_price_2_input, routing_price_2_cache_read, routing_price_2_cache_creation, routing_price_2_completion,
|
| 3045 |
+
routing_model_3, routing_price_3_input, routing_price_3_cache_read, routing_price_3_cache_creation, routing_price_3_completion,
|
| 3046 |
+
selected_strategy,
|
| 3047 |
+
weight_base, weight_model_1, weight_model_2, weight_model_3,
|
| 3048 |
+
k_model_1, k_model_2, k_model_3,
|
| 3049 |
+
slice_model_1, slice_model_2, slice_model_3,
|
| 3050 |
+
grep_model_1, grep_model_2, grep_model_3,
|
| 3051 |
+
resolved_model, unresolved_model,
|
| 3052 |
+
part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
|
| 3053 |
+
thinking_overhead, use_cache,
|
| 3054 |
+
detected_model,
|
| 3055 |
+
]
|
| 3056 |
+
routing_outputs = [routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot]
|
| 3057 |
+
|
| 3058 |
thinking_overhead.change(
|
| 3059 |
fn=on_calc_options_change,
|
| 3060 |
inputs=calc_options_inputs,
|
|
|
|
| 3063 |
fn=on_single_traj_select,
|
| 3064 |
inputs=single_traj_inputs,
|
| 3065 |
outputs=single_traj_outputs,
|
| 3066 |
+
).then(
|
| 3067 |
+
fn=run_routing,
|
| 3068 |
+
inputs=routing_inputs,
|
| 3069 |
+
outputs=routing_outputs,
|
| 3070 |
)
|
| 3071 |
|
| 3072 |
use_cache.change(
|
|
|
|
| 3077 |
fn=on_single_traj_select,
|
| 3078 |
inputs=single_traj_inputs,
|
| 3079 |
outputs=single_traj_outputs,
|
| 3080 |
+
).then(
|
| 3081 |
+
fn=run_routing,
|
| 3082 |
+
inputs=routing_inputs,
|
| 3083 |
+
outputs=routing_outputs,
|
| 3084 |
)
|
| 3085 |
|
| 3086 |
return app
|