Spaces:

JetBrains-Research
/

SWE-bench-Costs-Calculator

Sleeping

IgorSlinko commited on 8 days ago

Commit

c63e9d7

1 Parent(s): 1ae03ef

Support multiple routing models (up to 3)

- Add interactive=True to all part_mode Radio buttons
- Rewrite run_routing to support all 3 models
- Each model can have different strategy and parameters
- Results table shows cost breakdown per model
- Charts support any number of models with different colors
- Validate Start < End for each model separately

Files changed (1) hide show

app.py +103 -64

app.py CHANGED Viewed

@@ -1310,6 +1310,7 @@ def build_app():
                                     choices=["Indexes", "Percentages"],
                                     value="Percentages",
                                     label="Mode",
                                 )
                                 with gr.Row():
                                     start_step_1 = gr.Number(label="Start", value=0, minimum=0, precision=0, interactive=True)
@@ -1346,6 +1347,7 @@ def build_app():
                                         choices=["Indexes", "Percentages"],
                                         value="Percentages",
                                         label="Mode",
                                     )
                                     with gr.Row():
                                         start_step_2 = gr.Number(label="Start", value=0, minimum=0, precision=0, interactive=True)
@@ -1382,6 +1384,7 @@ def build_app():
                                         choices=["Indexes", "Percentages"],
                                         value="Percentages",
                                         label="Mode",
                                     )
                                     with gr.Row():
                                         start_step_3 = gr.Number(label="Start", value=0, minimum=0, precision=0, interactive=True)
@@ -1522,6 +1525,10 @@ def build_app():
             base_input, base_cache_read, base_cache_creation, base_completion,
             routing_model_1_val, r1_input, r1_cache_read, r1_cache_creation, r1_completion,
             strategy_1_val, random_pct_1_val, step_k_1_val, part_mode_1_val, start_1_val, end_1_val,
             source, overhead, with_cache
         ):
             if state_data is None:
@@ -1571,47 +1578,74 @@ def build_app():
                 "cache_creation": base_cache_creation,
                 "completion": base_completion,
             }
-            routing_prices = {
-                "input": r1_input,
-                "cache_read": r1_cache_read,
-                "cache_creation": r1_cache_creation,
-                "completion": r1_completion,
-            }
-            strategy_params = {}
-            if strategy_1_val == "Replace on random steps":
-                strategy_params["percentage"] = random_pct_1_val
-            elif strategy_1_val == "Replace every step k":
-                strategy_params["k"] = step_k_1_val
-            elif strategy_1_val == "Replace part of trajectory":
-                strategy_params["mode"] = part_mode_1_val
-                strategy_params["start"] = start_1_val
-                strategy_params["end"] = end_1_val
-                if start_1_val >= end_1_val:
-                    yield (
-                        gr.update(visible=True, value="❌ Start must be less than End"),
-                        gr.update(visible=False),
-                        None, None,
-                    )
                     return
-            total_base_tokens = {"uncached_input": 0, "cache_read": 0, "cache_creation": 0, "completion": 0}
-            total_routing_tokens = {"uncached_input": 0, "cache_read": 0, "cache_creation": 0, "completion": 0}
-            total_original_tokens = {"uncached_input": 0, "cache_read": 0, "cache_creation": 0, "completion": 0}
             BASE_MODEL = "__base__"
-            ROUTING_MODEL = "__routing__"
             for instance_id, steps in trajectory_steps.items():
                 if not steps:
                     continue
                 total_steps = len(steps)
-                routed_step_indices = get_routed_steps(total_steps, strategy_1_val, strategy_params)
                 modified_steps = []
                 for i, step in enumerate(steps):
-                    model = ROUTING_MODEL if i in routed_step_indices else BASE_MODEL
                     modified_steps.append({
                         "model": model,
                         "system_user": step.get("system_user", 0),
@@ -1621,22 +1655,12 @@ def build_app():
                 model_totals = calculate_routing_tokens(modified_steps)
-                base_totals = model_totals.get(BASE_MODEL, {
-                    "cache_read": 0, "uncached_input": 0, "completion": 0, "cache_creation": 0
-                })
-                routing_totals = model_totals.get(ROUTING_MODEL, {
-                    "cache_read": 0, "uncached_input": 0, "completion": 0, "cache_creation": 0
-                })
-                total_base_tokens["cache_read"] += base_totals.get("cache_read", 0)
-                total_base_tokens["uncached_input"] += base_totals.get("uncached_input", 0)
-                total_base_tokens["completion"] += base_totals.get("completion", 0)
-                total_base_tokens["cache_creation"] += base_totals.get("cache_creation", 0)
-                total_routing_tokens["cache_read"] += routing_totals.get("cache_read", 0)
-                total_routing_tokens["uncached_input"] += routing_totals.get("uncached_input", 0)
-                total_routing_tokens["completion"] += routing_totals.get("completion", 0)
-                total_routing_tokens["cache_creation"] += routing_totals.get("cache_creation", 0)
                 original_steps = []
                 for step in steps:
@@ -1661,11 +1685,23 @@ def build_app():
                     tokens["completion"] * prices["completion"] / 1e6
                 )
-            base_costs = {k: total_base_tokens[k] * base_prices[{"uncached_input": "input", "cache_read": "cache_read", "cache_creation": "cache_creation", "completion": "completion"}[k]] / 1e6 for k in total_base_tokens}
-            routing_costs = {k: total_routing_tokens[k] * routing_prices[{"uncached_input": "input", "cache_read": "cache_read", "cache_creation": "cache_creation", "completion": "completion"}[k]] / 1e6 for k in total_routing_tokens}
             total_base_cost = calc_cost(total_base_tokens, base_prices)
-            total_routing_cost = calc_cost(total_routing_tokens, routing_prices)
             if total_original_cost_from_df is not None:
                 total_original_cost = total_original_cost_from_df
@@ -1676,23 +1712,22 @@ def build_app():
             savings = total_original_cost - total_routed_cost
             savings_pct = (savings / total_original_cost * 100) if total_original_cost > 0 else 0
-            result_text = f"""
-## 🚀 Routing Results
-| Metric | Value |
-|--------|-------|
-| **Original Cost (base model only)** | ${total_original_cost:.2f} |
-| **Routed Cost** | ${total_routed_cost:.2f} |
-| ↳ Base model portion | ${total_base_cost:.2f} |
-| ↳ Routing model portion | ${total_routing_cost:.2f} |
-| **Savings** | ${savings:.2f} ({savings_pct:+.1f}%) |
-*Strategy: {strategy_1_val}*
-*Routing model: {routing_model_1_val}*
-"""
-            additional_token_models = [(routing_model_1_val, total_routing_tokens)]
-            additional_cost_models = [(routing_model_1_val, routing_costs)]
             yield (
                 gr.update(visible=True, value="⏳ Creating charts..."),
@@ -1718,6 +1753,10 @@ def build_app():
                 price_input, price_cache_read, price_cache_creation, price_completion,
                 routing_model_1, routing_price_1_input, routing_price_1_cache_read, routing_price_1_cache_creation, routing_price_1_completion,
                 strategy_1, random_pct_1, step_k_1, part_mode_1, start_step_1, end_step_1,
                 token_source, thinking_overhead, use_cache,
             ],
             outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],

                                     choices=["Indexes", "Percentages"],
                                     value="Percentages",
                                     label="Mode",
+                                    interactive=True,
                                 )
                                 with gr.Row():
                                     start_step_1 = gr.Number(label="Start", value=0, minimum=0, precision=0, interactive=True)
                                         choices=["Indexes", "Percentages"],
                                         value="Percentages",
                                         label="Mode",
+                                        interactive=True,
                                     )
                                     with gr.Row():
                                         start_step_2 = gr.Number(label="Start", value=0, minimum=0, precision=0, interactive=True)
                                         choices=["Indexes", "Percentages"],
                                         value="Percentages",
                                         label="Mode",
+                                        interactive=True,
                                     )
                                     with gr.Row():
                                         start_step_3 = gr.Number(label="Start", value=0, minimum=0, precision=0, interactive=True)
             base_input, base_cache_read, base_cache_creation, base_completion,
             routing_model_1_val, r1_input, r1_cache_read, r1_cache_creation, r1_completion,
             strategy_1_val, random_pct_1_val, step_k_1_val, part_mode_1_val, start_1_val, end_1_val,
+            routing_model_2_val, r2_input, r2_cache_read, r2_cache_creation, r2_completion,
+            strategy_2_val, random_pct_2_val, step_k_2_val, part_mode_2_val, start_2_val, end_2_val,
+            routing_model_3_val, r3_input, r3_cache_read, r3_cache_creation, r3_completion,
+            strategy_3_val, random_pct_3_val, step_k_3_val, part_mode_3_val, start_3_val, end_3_val,
             source, overhead, with_cache
         ):
             if state_data is None:
                 "cache_creation": base_cache_creation,
                 "completion": base_completion,
             }
+            def build_strategy_params(strategy, random_pct, step_k, part_mode, start_val, end_val):
+                params = {}
+                if strategy == "Replace on random steps":
+                    params["percentage"] = random_pct
+                elif strategy == "Replace every step k":
+                    params["k"] = step_k
+                elif strategy == "Replace part of trajectory":
+                    params["mode"] = part_mode
+                    params["start"] = start_val
+                    params["end"] = end_val
+                return params
+            routing_models = []
+            if routing_model_1_val:
+                if strategy_1_val == "Replace part of trajectory" and start_1_val >= end_1_val:
+                    yield (gr.update(visible=True, value="❌ Model 1: Start must be less than End"), gr.update(visible=False), None, None)
                     return
+                routing_models.append({
+                    "name": routing_model_1_val,
+                    "prices": {"input": r1_input, "cache_read": r1_cache_read, "cache_creation": r1_cache_creation, "completion": r1_completion},
+                    "strategy": strategy_1_val,
+                    "params": build_strategy_params(strategy_1_val, random_pct_1_val, step_k_1_val, part_mode_1_val, start_1_val, end_1_val),
+                })
+            if routing_model_2_val:
+                if strategy_2_val == "Replace part of trajectory" and start_2_val >= end_2_val:
+                    yield (gr.update(visible=True, value="❌ Model 2: Start must be less than End"), gr.update(visible=False), None, None)
+                    return
+                routing_models.append({
+                    "name": routing_model_2_val,
+                    "prices": {"input": r2_input, "cache_read": r2_cache_read, "cache_creation": r2_cache_creation, "completion": r2_completion},
+                    "strategy": strategy_2_val,
+                    "params": build_strategy_params(strategy_2_val, random_pct_2_val, step_k_2_val, part_mode_2_val, start_2_val, end_2_val),
+                })
+            if routing_model_3_val:
+                if strategy_3_val == "Replace part of trajectory" and start_3_val >= end_3_val:
+                    yield (gr.update(visible=True, value="❌ Model 3: Start must be less than End"), gr.update(visible=False), None, None)
+                    return
+                routing_models.append({
+                    "name": routing_model_3_val,
+                    "prices": {"input": r3_input, "cache_read": r3_cache_read, "cache_creation": r3_cache_creation, "completion": r3_completion},
+                    "strategy": strategy_3_val,
+                    "params": build_strategy_params(strategy_3_val, random_pct_3_val, step_k_3_val, part_mode_3_val, start_3_val, end_3_val),
+                })
             BASE_MODEL = "__base__"
+            model_keys = [BASE_MODEL] + [f"__routing_{i}__" for i in range(len(routing_models))]
+            all_tokens = {key: {"uncached_input": 0, "cache_read": 0, "cache_creation": 0, "completion": 0} for key in model_keys}
+            total_original_tokens = {"uncached_input": 0, "cache_read": 0, "cache_creation": 0, "completion": 0}
             for instance_id, steps in trajectory_steps.items():
                 if not steps:
                     continue
                 total_steps = len(steps)
+                routed_sets = []
+                for rm in routing_models:
+                    routed_sets.append(get_routed_steps(total_steps, rm["strategy"], rm["params"]))
                 modified_steps = []
                 for i, step in enumerate(steps):
+                    model = BASE_MODEL
+                    for j, routed_set in enumerate(routed_sets):
+                        if i in routed_set:
+                            model = f"__routing_{j}__"
+                            break
                     modified_steps.append({
                         "model": model,
                         "system_user": step.get("system_user", 0),
                 model_totals = calculate_routing_tokens(modified_steps)
+                for key in model_keys:
+                    totals = model_totals.get(key, {})
+                    all_tokens[key]["cache_read"] += totals.get("cache_read", 0)
+                    all_tokens[key]["uncached_input"] += totals.get("uncached_input", 0)
+                    all_tokens[key]["completion"] += totals.get("completion", 0)
+                    all_tokens[key]["cache_creation"] += totals.get("cache_creation", 0)
                 original_steps = []
                 for step in steps:
                     tokens["completion"] * prices["completion"] / 1e6
                 )
+            def tokens_to_costs(tokens: dict, prices: dict) -> dict:
+                price_map = {"uncached_input": "input", "cache_read": "cache_read", "cache_creation": "cache_creation", "completion": "completion"}
+                return {k: tokens[k] * prices[price_map[k]] / 1e6 for k in tokens}
+            total_base_tokens = all_tokens[BASE_MODEL]
+            base_costs = tokens_to_costs(total_base_tokens, base_prices)
             total_base_cost = calc_cost(total_base_tokens, base_prices)
+            routing_costs_list = []
+            total_routing_cost = 0
+            for i, rm in enumerate(routing_models):
+                key = f"__routing_{i}__"
+                tokens = all_tokens[key]
+                costs = tokens_to_costs(tokens, rm["prices"])
+                cost = calc_cost(tokens, rm["prices"])
+                routing_costs_list.append({"name": rm["name"], "tokens": tokens, "costs": costs, "cost": cost})
+                total_routing_cost += cost
             if total_original_cost_from_df is not None:
                 total_original_cost = total_original_cost_from_df
             savings = total_original_cost - total_routed_cost
             savings_pct = (savings / total_original_cost * 100) if total_original_cost > 0 else 0
+            result_lines = [
+                "## 🚀 Routing Results",
+                "",
+                "| Metric | Value |",
+                "|--------|-------|",
+                f"| **Original Cost (base model only)** | ${total_original_cost:.2f} |",
+                f"| **Routed Cost** | ${total_routed_cost:.2f} |",
+                f"| ↳ Base model portion | ${total_base_cost:.2f} |",
+            ]
+            for rc in routing_costs_list:
+                result_lines.append(f"| ↳ {rc['name']} | ${rc['cost']:.2f} |")
+            result_lines.append(f"| **Savings** | ${savings:.2f} ({savings_pct:+.1f}%) |")
+            result_text = "\n".join(result_lines)
+            additional_token_models = [(rc["name"], rc["tokens"]) for rc in routing_costs_list]
+            additional_cost_models = [(rc["name"], rc["costs"]) for rc in routing_costs_list]
             yield (
                 gr.update(visible=True, value="⏳ Creating charts..."),
                 price_input, price_cache_read, price_cache_creation, price_completion,
                 routing_model_1, routing_price_1_input, routing_price_1_cache_read, routing_price_1_cache_creation, routing_price_1_completion,
                 strategy_1, random_pct_1, step_k_1, part_mode_1, start_step_1, end_step_1,
+                routing_model_2, routing_price_2_input, routing_price_2_cache_read, routing_price_2_cache_creation, routing_price_2_completion,
+                strategy_2, random_pct_2, step_k_2, part_mode_2, start_step_2, end_step_2,
+                routing_model_3, routing_price_3_input, routing_price_3_cache_read, routing_price_3_cache_creation, routing_price_3_completion,
+                strategy_3, random_pct_3, step_k_3, part_mode_3, start_step_3, end_step_3,
                 token_source, thinking_overhead, use_cache,
             ],
             outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],