Spaces:

JetBrains-Research
/

SWE-bench-Costs-Calculator

Sleeping

App Files Files Community

IgorSlinko commited on 7 days ago

Commit

745add3

1 Parent(s): 0c41621

Fix router strategy params visibility

Browse files

Files changed (1) hide show

app.py +211 -171

app.py CHANGED Viewed

@@ -700,7 +700,7 @@ def create_token_charts(df: pd.DataFrame, input_price: float, cache_read_price:
         xaxis_title="Trajectory (sorted by total tokens)",
         yaxis_title="Tokens (M)",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
-        margin=dict(l=50, r=20, t=60, b=40),
     )
     return fig_tokens, fig_tokens_cost, fig_stacked
@@ -836,7 +836,7 @@ def create_basic_histograms(df: pd.DataFrame, input_price: float, cache_read_pri
         xaxis_title="Trajectory (sorted by total tokens)",
         yaxis_title="Tokens (M)",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
-        margin=dict(l=50, r=20, t=60, b=40),
     )
     return fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked
@@ -905,7 +905,7 @@ def create_cost_breakdown(df: pd.DataFrame, input_price: float, cache_read_price
         xaxis_title="Trajectory (sorted by total tokens)",
         yaxis_title="Cost ($)",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
-        margin=dict(l=50, r=20, t=60, b=40),
     )
     fig.add_annotation(
@@ -1155,7 +1155,7 @@ def build_app():
     with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app:
         trajectories_state = gr.State(None)
-        gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard")
         gr.Markdown("Select a model to use as base for cost analysis")
         with gr.Row():
@@ -1170,21 +1170,37 @@ def build_app():
                 with gr.Column(visible=False) as analysis_section:
                     gr.Markdown("## 📊 Trajectory Analysis")
-                    with gr.Row():
-                        plot_steps = gr.Plot(label="API Calls Distribution")
-                        plot_cost = gr.Plot(label="Cost Distribution")
-                    with gr.Row():
-                        plot_tokens = gr.Plot(label="Token Usage by Type")
-                        plot_tokens_cost = gr.Plot(label="Cost by Token Type ($)")
-                    with gr.Row():
-                        plot_stacked = gr.Plot(label="Tokens per Trajectory")
-                        plot_cost_breakdown = gr.Plot(label="Cost per Trajectory ($)")
-                    with gr.Row(visible=False) as routing_plots_row:
-                        routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
-                        routing_cost_plot = gr.Plot(label="Cost by Type (per Model)")
             with gr.Column(scale=1):
                 selected_folder = gr.State("")
@@ -1204,23 +1220,17 @@ def build_app():
                     price_completion = gr.Number(label="Completion", value=0, precision=2, scale=1)
                 gr.Markdown("---")
-                gr.Markdown("### 📊 Token Count Source")
-                token_source = gr.Radio(
-                    choices=["Metadata", "Calculated"],
-                    value="Metadata",
-                )
                 thinking_overhead = gr.Number(
-                    label="🔢 Tokenizer Overhead",
                     value=1.21,
                     precision=2,
                     info="Multiplier for Calculated tokens (tiktoken → native)",
-                    visible=False,
                 )
                 use_cache = gr.Checkbox(
                     label="Use Cache",
                     value=True,
                     info="If disabled, all tokens are Uncached Input or Completion",
-                    visible=False,
                 )
                 gr.Markdown("---")
@@ -1282,37 +1292,40 @@ def build_app():
                     gr.Markdown("### 🎯 Router Strategy")
                     selected_strategy = gr.Radio(
-                        choices=["Random weights", "Every k-th step", "Replace part of trajectory"],
-                        value="Random weights",
                         label="Strategy",
                         interactive=True,
                     )
-                    random_hint = gr.Markdown("*Weights must sum to 1.0*", visible=True)
-                    weight_base = gr.Number(label="Base weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True, visible=True)
-                    weight_model_1 = gr.Number(label="Model 1 weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True, visible=True)
-                    weight_model_2 = gr.Number(label="Model 2 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
-                    weight_model_3 = gr.Number(label="Model 3 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
-                    every_k_hint = gr.Markdown("*First model has priority on overlaps*", visible=False)
-                    k_model_1 = gr.Number(label="k₁ (Model 1)", value=2, minimum=1, precision=0, interactive=True, visible=False)
-                    k_model_2 = gr.Number(label="k₂ (Model 2)", value=3, minimum=1, precision=0, interactive=True, visible=False)
-                    k_model_3 = gr.Number(label="k₃ (Model 3)", value=5, minimum=1, precision=0, interactive=True, visible=False)
-                    part_hint = gr.Markdown("*Ranges must not overlap*", visible=False)
-                    part_mode = gr.Radio(
-                        choices=["Indexes", "Percentages"],
-                        value="Percentages",
-                        label="Mode",
-                        interactive=True,
-                        visible=False,
-                    )
-                    start_1 = gr.Number(label="M1 Start", value=0, minimum=0, precision=0, interactive=True, visible=False)
-                    end_1 = gr.Number(label="M1 End", value=30, minimum=0, precision=0, interactive=True, visible=False)
-                    start_2 = gr.Number(label="M2 Start", value=30, minimum=0, precision=0, interactive=True, visible=False)
-                    end_2 = gr.Number(label="M2 End", value=60, minimum=0, precision=0, interactive=True, visible=False)
-                    start_3 = gr.Number(label="M3 Start", value=60, minimum=0, precision=0, interactive=True, visible=False)
-                    end_3 = gr.Number(label="M3 End", value=100, minimum=0, precision=0, interactive=True, visible=False)
                     gr.Markdown("---")
                     route_btn = gr.Button("🚀 Let's ROUTE!!", variant="primary", size="lg", interactive=False)
@@ -1327,30 +1340,43 @@ def build_app():
             outputs=[routing_section],
         )
-        def on_strategy_change(strategy):
-            is_random = strategy == "Random weights"
-            is_every_k = strategy == "Every k-th step"
-            is_part = strategy == "Replace part of trajectory"
-            print(f"DEBUG on_strategy_change: strategy={strategy}")
-            return (
-                gr.update(visible=is_random),
-                gr.update(visible=is_random),
-                gr.update(visible=is_random),
-                gr.update(visible=is_every_k),
-                gr.update(visible=is_every_k),
-                gr.update(visible=is_part),
-                gr.update(visible=is_part),
-                gr.update(visible=is_part),
-                gr.update(visible=is_part),
-            )
         selected_strategy.change(
             fn=on_strategy_change,
-            inputs=[selected_strategy],
             outputs=[
-                random_hint, weight_base, weight_model_1,
-                every_k_hint, k_model_1,
-                part_hint, part_mode, start_1, end_1,
             ],
         )
@@ -1425,22 +1451,23 @@ def build_app():
         )
         def show_model_2(strategy):
-            is_random = strategy == "Random weights"
             is_every_k = strategy == "Every k-th step"
             is_part = strategy == "Replace part of trajectory"
             return (
-                gr.update(visible=True),
-                gr.update(visible=False),
-                gr.update(visible=is_random),
-                gr.update(visible=is_every_k),
-                gr.update(visible=is_part),
-                gr.update(visible=is_part),
             )
         add_model_2_btn.click(
             fn=show_model_2,
             inputs=[selected_strategy],
-            outputs=[routing_block_2, add_model_2_btn, weight_model_2, k_model_2, start_2, end_2],
         )
         routing_model_2.change(
@@ -1450,22 +1477,23 @@ def build_app():
         )
         def show_model_3(strategy):
-            is_random = strategy == "Random weights"
             is_every_k = strategy == "Every k-th step"
             is_part = strategy == "Replace part of trajectory"
             return (
-                gr.update(visible=True),
-                gr.update(visible=False),
-                gr.update(visible=is_random),
-                gr.update(visible=is_every_k),
-                gr.update(visible=is_part),
-                gr.update(visible=is_part),
             )
         add_model_3_btn.click(
             fn=show_model_3,
             inputs=[selected_strategy],
-            outputs=[routing_block_3, add_model_3_btn, weight_model_3, k_model_3, start_3, end_3],
         )
         routing_model_3.change(
@@ -1484,7 +1512,7 @@ def build_app():
             weight_base_val, weight_1_val, weight_2_val, weight_3_val,
             k_1_val, k_2_val, k_3_val,
             part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
-            source, overhead, with_cache
         ):
             if state_data is None:
                 yield (
@@ -1571,7 +1599,7 @@ def build_app():
                             return
             weights = None
-            if strategy_val == "Random weights":
                 weights = [weight_base_val, weight_1_val]
                 if len(routing_models) > 1:
                     weights.append(weight_2_val)
@@ -1599,7 +1627,7 @@ def build_app():
                 step_to_model = {}
-                if strategy_val == "Random weights":
                     model_choices = [BASE_MODEL] + [f"__routing_{j}__" for j in range(len(routing_models))]
                     for i in range(total_steps):
                         step_to_model[i] = random.choices(model_choices, weights=weights)[0]
@@ -1629,7 +1657,7 @@ def build_app():
                     modified_steps.append({
                         "model": model,
                         "system_user": step.get("system_user", 0),
-                        "completion": int(step.get("completion", 0) * (overhead if source == "Calculated" else 1)),
                         "observation": step.get("observation"),
                     })
@@ -1647,7 +1675,7 @@ def build_app():
                     original_steps.append({
                         "model": BASE_MODEL,
                         "system_user": step.get("system_user", 0),
-                        "completion": int(step.get("completion", 0) * (overhead if source == "Calculated" else 1)),
                         "observation": step.get("observation"),
                     })
                 original_totals = calculate_routing_tokens(original_steps)
@@ -1738,32 +1766,24 @@ def build_app():
                 weight_base, weight_model_1, weight_model_2, weight_model_3,
                 k_model_1, k_model_2, k_model_3,
                 part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
-                token_source, thinking_overhead, use_cache,
             ],
             outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
         )
-        def update_calculated_options_visibility(source):
-            is_calc = source == "Calculated"
-            return gr.update(visible=is_calc), gr.update(visible=is_calc)
-        token_source.change(
-            fn=update_calculated_options_visibility,
-            inputs=[token_source],
-            outputs=[thinking_overhead, use_cache],
-        )
         leaderboard_table.select(
             fn=on_row_select,
             inputs=[leaderboard_table],
             outputs=[selected_folder, selected_name, analyze_btn, price_input, price_cache_read, price_cache_creation, price_completion, detected_model, thinking_overhead],
         )
-        def load_and_analyze(folder, input_price, cache_read_price, cache_creation_price, completion_price, source, overhead, with_cache, progress=gr.Progress()):
             empty_result = (
                 "",
                 gr.update(visible=False),
-                None, None, None, None, None, None,
                 None,
                 gr.update(visible=False),
             )
@@ -1776,7 +1796,9 @@ def build_app():
                 yield (
                     "⏳ Downloading trajectories...",
                     gr.update(visible=False),
-                    None, None, None, None, None, None,
                     None,
                     gr.update(visible=False),
                 )
@@ -1785,7 +1807,9 @@ def build_app():
                     yield (
                         status,
                         gr.update(visible=False),
-                        None, None, None, None, None, None,
                         None,
                         gr.update(visible=False),
                     )
@@ -1794,7 +1818,9 @@ def build_app():
             yield (
                 "⏳ Loading trajectories...",
                 gr.update(visible=True),
-                None, None, None, None, None, None,
                 None,
                 gr.update(visible=False),
             )
@@ -1807,115 +1833,129 @@ def build_app():
             state_data = {"meta": df_meta, "calculated": df_calc, "folder": folder, "steps": trajectory_steps}
-            if source == "Metadata":
-                df = df_meta
-            else:
-                df = apply_thinking_overhead(df_calc.copy(), overhead)
-                if not with_cache:
-                    df = apply_no_cache(df)
-            if df.empty:
                 yield (
                     "❌ No trajectories found",
                     gr.update(visible=False),
-                    None, None, None, None, None, None,
                     None,
                     gr.update(visible=False),
                 )
                 return
-            fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked = create_basic_histograms(
-                df, input_price, cache_read_price, cache_creation_price, completion_price
             )
-            fig_cost_breakdown = create_cost_breakdown(df, input_price, cache_read_price, cache_creation_price, completion_price)
             yield (
-                f"✅ Loaded {len(df)} trajectories",
                 gr.update(visible=True),
-                fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked, fig_cost_breakdown,
                 state_data,
                 gr.update(visible=True),
             )
         analyze_btn.click(
             fn=load_and_analyze,
-            inputs=[selected_folder, price_input, price_cache_read, price_cache_creation, price_completion, token_source, thinking_overhead, use_cache],
             outputs=[
                 download_status,
                 analysis_section,
-                plot_steps, plot_cost, plot_tokens, plot_tokens_cost, plot_stacked, plot_cost_breakdown,
                 trajectories_state,
                 add_routing_btn,
             ],
         )
-        def recalculate_costs(state_data, input_price, cache_read_price, cache_creation_price, completion_price, source, overhead, with_cache):
             if state_data is None:
-                return None, None
-            if source == "Metadata":
-                df = state_data["meta"]
-            else:
-                df = apply_thinking_overhead(state_data["calculated"].copy(), overhead)
-                if not with_cache:
-                    df = apply_no_cache(df)
-            if df.empty:
-                return None, None
-            fig_tokens_cost = create_cost_by_type_chart(df, input_price, cache_read_price, cache_creation_price, completion_price)
-            fig_cost_breakdown = create_cost_breakdown(df, input_price, cache_read_price, cache_creation_price, completion_price)
-            return fig_tokens_cost, fig_cost_breakdown
-        price_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, token_source, thinking_overhead, use_cache]
-        price_outputs = [plot_tokens_cost, plot_cost_breakdown]
         price_input.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
         price_cache_read.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
         price_cache_creation.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
         price_completion.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
-        def on_source_change(state_data, input_price, cache_read_price, cache_creation_price, completion_price, source, overhead, with_cache):
-            """Recalculate only token-dependent charts when source changes"""
             if state_data is None:
                 return None, None, None, None
-            if source == "Metadata":
-                df = state_data["meta"]
-            else:
-                df = apply_thinking_overhead(state_data["calculated"].copy(), overhead)
-                if not with_cache:
-                    df = apply_no_cache(df)
-            if df.empty:
                 return None, None, None, None
-            fig_tokens, fig_tokens_cost, fig_stacked = create_token_charts(
-                df, input_price, cache_read_price, cache_creation_price, completion_price
-            )
-            fig_cost_breakdown = create_cost_breakdown(df, input_price, cache_read_price, cache_creation_price, completion_price)
-            return fig_tokens, fig_tokens_cost, fig_stacked, fig_cost_breakdown
-        source_change_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, token_source, thinking_overhead, use_cache]
-        source_change_outputs = [plot_tokens, plot_tokens_cost, plot_stacked, plot_cost_breakdown]
-        token_source.change(
-            fn=on_source_change,
-            inputs=source_change_inputs,
-            outputs=source_change_outputs,
-        )
         thinking_overhead.change(
-            fn=on_source_change,
-            inputs=source_change_inputs,
-            outputs=source_change_outputs,
         )
         use_cache.change(
-            fn=on_source_change,
-            inputs=source_change_inputs,
-            outputs=source_change_outputs,
         )
     return app

         xaxis_title="Trajectory (sorted by total tokens)",
         yaxis_title="Tokens (M)",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        margin=dict(l=50, r=20, t=80, b=40),
     )
     return fig_tokens, fig_tokens_cost, fig_stacked
         xaxis_title="Trajectory (sorted by total tokens)",
         yaxis_title="Tokens (M)",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        margin=dict(l=50, r=20, t=80, b=40),
     )
     return fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked
         xaxis_title="Trajectory (sorted by total tokens)",
         yaxis_title="Cost ($)",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        margin=dict(l=50, r=20, t=80, b=40),
     )
     fig.add_annotation(
     with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app:
         trajectories_state = gr.State(None)
+        gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.9`")
         gr.Markdown("Select a model to use as base for cost analysis")
         with gr.Row():
                 with gr.Column(visible=False) as analysis_section:
                     gr.Markdown("## 📊 Trajectory Analysis")
+                    with gr.Accordion("Leaderboard data", open=True):
+                        with gr.Row():
+                            plot_steps = gr.Plot(label="API Calls Distribution")
+                            plot_cost = gr.Plot(label="Cost Distribution")
+                    with gr.Accordion("Metadata from .traj", open=True):
+                        with gr.Row():
+                            plot_tokens_meta = gr.Plot(label="Token Usage by Type")
+                            plot_tokens_cost_meta = gr.Plot(label="Cost by Token Type")
+                    with gr.Accordion("Metadata from .traj by trajectory", open=False):
+                        with gr.Row():
+                            plot_stacked_meta = gr.Plot(label="Tokens per Trajectory")
+                        with gr.Row():
+                            plot_cost_breakdown_meta = gr.Plot(label="Cost per Trajectory")
+                    with gr.Accordion("Calculated from .traj messages", open=True):
+                        with gr.Row():
+                            plot_tokens_calc = gr.Plot(label="Token Usage by Type")
+                            plot_tokens_cost_calc = gr.Plot(label="Cost by Token Type")
+                    with gr.Accordion("Calculated from .traj messages by trajectory", open=False):
+                        with gr.Row():
+                            plot_stacked_calc = gr.Plot(label="Tokens per Trajectory")
+                        with gr.Row():
+                            plot_cost_breakdown_calc = gr.Plot(label="Cost per Trajectory")
+                    with gr.Accordion("Calculated with routing", open=False, visible=False) as routing_plots_row:
+                        with gr.Row():
+                            routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
+                            routing_cost_plot = gr.Plot(label="Cost by Type (per Model)")
             with gr.Column(scale=1):
                 selected_folder = gr.State("")
                     price_completion = gr.Number(label="Completion", value=0, precision=2, scale=1)
                 gr.Markdown("---")
+                gr.Markdown("### 🔢 Calculated Token Options")
                 thinking_overhead = gr.Number(
+                    label="Tokenizer Overhead",
                     value=1.21,
                     precision=2,
                     info="Multiplier for Calculated tokens (tiktoken → native)",
                 )
                 use_cache = gr.Checkbox(
                     label="Use Cache",
                     value=True,
                     info="If disabled, all tokens are Uncached Input or Completion",
                 )
                 gr.Markdown("---")
                     gr.Markdown("### 🎯 Router Strategy")
                     selected_strategy = gr.Radio(
+                        choices=["Random router", "Every k-th step", "Replace part of trajectory"],
+                        value="Random router",
                         label="Strategy",
                         interactive=True,
                     )
+                    num_routing_models = gr.State(1)
+                    with gr.Column(visible=True) as random_block:
+                        random_hint = gr.Markdown("*Weights must sum to 1.0*")
+                        weight_base = gr.Number(label="Base weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True)
+                        weight_model_1 = gr.Number(label="Model 1 weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True)
+                        weight_model_2 = gr.Number(label="Model 2 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
+                        weight_model_3 = gr.Number(label="Model 3 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
+                    with gr.Column(visible=False) as every_k_block:
+                        every_k_hint = gr.Markdown("*First model has priority on overlaps*")
+                        k_model_1 = gr.Number(label="k₁ (Model 1)", value=2, minimum=1, precision=0, interactive=True)
+                        k_model_2 = gr.Number(label="k₂ (Model 2)", value=3, minimum=1, precision=0, interactive=True, visible=False)
+                        k_model_3 = gr.Number(label="k₃ (Model 3)", value=5, minimum=1, precision=0, interactive=True, visible=False)
+                    with gr.Column(visible=False) as part_block:
+                        part_hint = gr.Markdown("*Ranges must not overlap*")
+                        part_mode = gr.Radio(
+                            choices=["Indexes", "Percentages"],
+                            value="Percentages",
+                            label="Mode",
+                            interactive=True,
+                        )
+                        start_1 = gr.Number(label="M1 Start", value=0, minimum=0, precision=0, interactive=True)
+                        end_1 = gr.Number(label="M1 End", value=30, minimum=0, precision=0, interactive=True)
+                        start_2 = gr.Number(label="M2 Start", value=30, minimum=0, precision=0, interactive=True, visible=False)
+                        end_2 = gr.Number(label="M2 End", value=60, minimum=0, precision=0, interactive=True, visible=False)
+                        start_3 = gr.Number(label="M3 Start", value=60, minimum=0, precision=0, interactive=True, visible=False)
+                        end_3 = gr.Number(label="M3 End", value=100, minimum=0, precision=0, interactive=True, visible=False)
                     gr.Markdown("---")
                     route_btn = gr.Button("🚀 Let's ROUTE!!", variant="primary", size="lg", interactive=False)
             outputs=[routing_section],
         )
+        def on_strategy_change(strategy, num_models):
+            show_random = strategy == "Random router"
+            show_every_k = strategy == "Every k-th step"
+            show_part = strategy == "Replace part of trajectory"
+            has_m2 = num_models >= 2
+            has_m3 = num_models >= 3
+            return [
+                gr.update(visible=show_random),       # random_block
+                gr.update(visible=show_every_k),      # every_k_block
+                gr.update(visible=show_part),         # part_block
+                gr.update(visible=show_random),       # random_hint
+                gr.update(visible=show_random),       # weight_base
+                gr.update(visible=show_random),       # weight_model_1
+                gr.update(visible=show_random and has_m2),  # weight_model_2
+                gr.update(visible=show_random and has_m3),  # weight_model_3
+                gr.update(visible=show_every_k),      # every_k_hint
+                gr.update(visible=show_every_k),      # k_model_1
+                gr.update(visible=show_every_k and has_m2), # k_model_2
+                gr.update(visible=show_every_k and has_m3), # k_model_3
+                gr.update(visible=show_part),         # part_hint
+                gr.update(visible=show_part),         # part_mode
+                gr.update(visible=show_part),         # start_1
+                gr.update(visible=show_part),         # end_1
+                gr.update(visible=show_part and has_m2), # start_2
+                gr.update(visible=show_part and has_m2), # end_2
+                gr.update(visible=show_part and has_m3), # start_3
+                gr.update(visible=show_part and has_m3), # end_3
+            ]
         selected_strategy.change(
             fn=on_strategy_change,
+            inputs=[selected_strategy, num_routing_models],
             outputs=[
+                random_block, every_k_block, part_block,
+                random_hint, weight_base, weight_model_1, weight_model_2, weight_model_3,
+                every_k_hint, k_model_1, k_model_2, k_model_3,
+                part_hint, part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
             ],
         )
         )
         def show_model_2(strategy):
+            is_random = strategy == "Random router"
             is_every_k = strategy == "Every k-th step"
             is_part = strategy == "Replace part of trajectory"
             return (
+                gr.update(visible=True),   # show block 2
+                gr.update(visible=False),  # hide add button
+                gr.update(visible=is_random),  # weight2
+                gr.update(visible=is_every_k), # k2
+                gr.update(visible=is_part),    # start2
+                gr.update(visible=is_part),    # end2
+                2,
             )
         add_model_2_btn.click(
             fn=show_model_2,
             inputs=[selected_strategy],
+            outputs=[routing_block_2, add_model_2_btn, weight_model_2, k_model_2, start_2, end_2, num_routing_models],
         )
         routing_model_2.change(
         )
         def show_model_3(strategy):
+            is_random = strategy == "Random router"
             is_every_k = strategy == "Every k-th step"
             is_part = strategy == "Replace part of trajectory"
             return (
+                gr.update(visible=True),   # show block 3
+                gr.update(visible=False),  # hide add button
+                gr.update(visible=is_random),  # weight3
+                gr.update(visible=is_every_k), # k3
+                gr.update(visible=is_part),    # start3
+                gr.update(visible=is_part),    # end3
+                3,
             )
         add_model_3_btn.click(
             fn=show_model_3,
             inputs=[selected_strategy],
+            outputs=[routing_block_3, add_model_3_btn, weight_model_3, k_model_3, start_3, end_3, num_routing_models],
         )
         routing_model_3.change(
             weight_base_val, weight_1_val, weight_2_val, weight_3_val,
             k_1_val, k_2_val, k_3_val,
             part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
+            overhead, with_cache
         ):
             if state_data is None:
                 yield (
                             return
             weights = None
+            if strategy_val == "Random router":
                 weights = [weight_base_val, weight_1_val]
                 if len(routing_models) > 1:
                     weights.append(weight_2_val)
                 step_to_model = {}
+                if strategy_val == "Random router":
                     model_choices = [BASE_MODEL] + [f"__routing_{j}__" for j in range(len(routing_models))]
                     for i in range(total_steps):
                         step_to_model[i] = random.choices(model_choices, weights=weights)[0]
                     modified_steps.append({
                         "model": model,
                         "system_user": step.get("system_user", 0),
+                        "completion": int(step.get("completion", 0) * overhead),
                         "observation": step.get("observation"),
                     })
                     original_steps.append({
                         "model": BASE_MODEL,
                         "system_user": step.get("system_user", 0),
+                        "completion": int(step.get("completion", 0) * overhead),
                         "observation": step.get("observation"),
                     })
                 original_totals = calculate_routing_tokens(original_steps)
                 weight_base, weight_model_1, weight_model_2, weight_model_3,
                 k_model_1, k_model_2, k_model_3,
                 part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
+                thinking_overhead, use_cache,
             ],
             outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
         )
         leaderboard_table.select(
             fn=on_row_select,
             inputs=[leaderboard_table],
             outputs=[selected_folder, selected_name, analyze_btn, price_input, price_cache_read, price_cache_creation, price_completion, detected_model, thinking_overhead],
         )
+        def load_and_analyze(folder, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache, progress=gr.Progress()):
             empty_result = (
                 "",
                 gr.update(visible=False),
+                None, None,
+                None, None, None, None,
+                None, None, None, None,
                 None,
                 gr.update(visible=False),
             )
                 yield (
                     "⏳ Downloading trajectories...",
                     gr.update(visible=False),
+                    None, None,
+                    None, None, None, None,
+                    None, None, None, None,
                     None,
                     gr.update(visible=False),
                 )
                     yield (
                         status,
                         gr.update(visible=False),
+                        None, None,
+                        None, None, None, None,
+                        None, None, None, None,
                         None,
                         gr.update(visible=False),
                     )
             yield (
                 "⏳ Loading trajectories...",
                 gr.update(visible=True),
+                None, None,
+                None, None, None, None,
+                None, None, None, None,
                 None,
                 gr.update(visible=False),
             )
             state_data = {"meta": df_meta, "calculated": df_calc, "folder": folder, "steps": trajectory_steps}
+            if df_meta.empty:
                 yield (
                     "❌ No trajectories found",
                     gr.update(visible=False),
+                    None, None,
+                    None, None, None, None,
+                    None, None, None, None,
                     None,
                     gr.update(visible=False),
                 )
                 return
+            fig_steps, fig_cost, _, _, _ = create_basic_histograms(
+                df_meta, input_price, cache_read_price, cache_creation_price, completion_price
+            )
+            fig_tokens_meta, fig_tokens_cost_meta, fig_stacked_meta = create_token_charts(
+                df_meta, input_price, cache_read_price, cache_creation_price, completion_price
+            )
+            fig_cost_breakdown_meta = create_cost_breakdown(
+                df_meta, input_price, cache_read_price, cache_creation_price, completion_price
+            )
+            df_calc_processed = apply_thinking_overhead(df_calc.copy(), overhead)
+            if not with_cache:
+                df_calc_processed = apply_no_cache(df_calc_processed)
+            fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc = create_token_charts(
+                df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
+            )
+            fig_cost_breakdown_calc = create_cost_breakdown(
+                df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
             )
             yield (
+                f"✅ Loaded {len(df_meta)} trajectories",
                 gr.update(visible=True),
+                fig_steps, fig_cost,
+                fig_tokens_meta, fig_tokens_cost_meta, fig_stacked_meta, fig_cost_breakdown_meta,
+                fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc, fig_cost_breakdown_calc,
                 state_data,
                 gr.update(visible=True),
             )
         analyze_btn.click(
             fn=load_and_analyze,
+            inputs=[selected_folder, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
             outputs=[
                 download_status,
                 analysis_section,
+                plot_steps, plot_cost,
+                plot_tokens_meta, plot_tokens_cost_meta, plot_stacked_meta, plot_cost_breakdown_meta,
+                plot_tokens_calc, plot_tokens_cost_calc, plot_stacked_calc, plot_cost_breakdown_calc,
                 trajectories_state,
                 add_routing_btn,
             ],
         )
+        def recalculate_costs(state_data, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
             if state_data is None:
+                return None, None, None, None
+            df_meta = state_data["meta"]
+            df_calc = state_data["calculated"]
+            if df_meta.empty:
+                return None, None, None, None
+            fig_tokens_cost_meta = create_cost_by_type_chart(df_meta, input_price, cache_read_price, cache_creation_price, completion_price)
+            fig_cost_breakdown_meta = create_cost_breakdown(df_meta, input_price, cache_read_price, cache_creation_price, completion_price)
+            df_calc_processed = apply_thinking_overhead(df_calc.copy(), overhead)
+            if not with_cache:
+                df_calc_processed = apply_no_cache(df_calc_processed)
+            fig_tokens_cost_calc = create_cost_by_type_chart(df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price)
+            fig_cost_breakdown_calc = create_cost_breakdown(df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price)
+            return fig_tokens_cost_meta, fig_cost_breakdown_meta, fig_tokens_cost_calc, fig_cost_breakdown_calc
+        price_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
+        price_outputs = [plot_tokens_cost_meta, plot_cost_breakdown_meta, plot_tokens_cost_calc, plot_cost_breakdown_calc]
         price_input.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
         price_cache_read.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
         price_cache_creation.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
         price_completion.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
+        def on_calc_options_change(state_data, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
+            """Recalculate only calculated charts when overhead or cache options change"""
             if state_data is None:
                 return None, None, None, None
+            df_calc = state_data["calculated"]
+            if df_calc.empty:
                 return None, None, None, None
+            df_calc_processed = apply_thinking_overhead(df_calc.copy(), overhead)
+            if not with_cache:
+                df_calc_processed = apply_no_cache(df_calc_processed)
+            fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc = create_token_charts(
+                df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
+            )
+            fig_cost_breakdown_calc = create_cost_breakdown(
+                df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
+            )
+            return fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc, fig_cost_breakdown_calc
+        calc_options_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
+        calc_options_outputs = [plot_tokens_calc, plot_tokens_cost_calc, plot_stacked_calc, plot_cost_breakdown_calc]
         thinking_overhead.change(
+            fn=on_calc_options_change,
+            inputs=calc_options_inputs,
+            outputs=calc_options_outputs,
         )
         use_cache.change(
+            fn=on_calc_options_change,
+            inputs=calc_options_inputs,
+            outputs=calc_options_outputs,
         )
     return app