IgorSlinko commited on
Commit
745add3
·
1 Parent(s): 0c41621

Fix router strategy params visibility

Browse files
Files changed (1) hide show
  1. app.py +211 -171
app.py CHANGED
@@ -700,7 +700,7 @@ def create_token_charts(df: pd.DataFrame, input_price: float, cache_read_price:
700
  xaxis_title="Trajectory (sorted by total tokens)",
701
  yaxis_title="Tokens (M)",
702
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
703
- margin=dict(l=50, r=20, t=60, b=40),
704
  )
705
 
706
  return fig_tokens, fig_tokens_cost, fig_stacked
@@ -836,7 +836,7 @@ def create_basic_histograms(df: pd.DataFrame, input_price: float, cache_read_pri
836
  xaxis_title="Trajectory (sorted by total tokens)",
837
  yaxis_title="Tokens (M)",
838
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
839
- margin=dict(l=50, r=20, t=60, b=40),
840
  )
841
 
842
  return fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked
@@ -905,7 +905,7 @@ def create_cost_breakdown(df: pd.DataFrame, input_price: float, cache_read_price
905
  xaxis_title="Trajectory (sorted by total tokens)",
906
  yaxis_title="Cost ($)",
907
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
908
- margin=dict(l=50, r=20, t=60, b=40),
909
  )
910
 
911
  fig.add_annotation(
@@ -1155,7 +1155,7 @@ def build_app():
1155
  with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app:
1156
  trajectories_state = gr.State(None)
1157
 
1158
- gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard")
1159
  gr.Markdown("Select a model to use as base for cost analysis")
1160
 
1161
  with gr.Row():
@@ -1170,21 +1170,37 @@ def build_app():
1170
  with gr.Column(visible=False) as analysis_section:
1171
  gr.Markdown("## 📊 Trajectory Analysis")
1172
 
1173
- with gr.Row():
1174
- plot_steps = gr.Plot(label="API Calls Distribution")
1175
- plot_cost = gr.Plot(label="Cost Distribution")
1176
-
1177
- with gr.Row():
1178
- plot_tokens = gr.Plot(label="Token Usage by Type")
1179
- plot_tokens_cost = gr.Plot(label="Cost by Token Type ($)")
1180
-
1181
- with gr.Row():
1182
- plot_stacked = gr.Plot(label="Tokens per Trajectory")
1183
- plot_cost_breakdown = gr.Plot(label="Cost per Trajectory ($)")
1184
-
1185
- with gr.Row(visible=False) as routing_plots_row:
1186
- routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
1187
- routing_cost_plot = gr.Plot(label="Cost by Type (per Model)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1188
 
1189
  with gr.Column(scale=1):
1190
  selected_folder = gr.State("")
@@ -1204,23 +1220,17 @@ def build_app():
1204
  price_completion = gr.Number(label="Completion", value=0, precision=2, scale=1)
1205
 
1206
  gr.Markdown("---")
1207
- gr.Markdown("### 📊 Token Count Source")
1208
- token_source = gr.Radio(
1209
- choices=["Metadata", "Calculated"],
1210
- value="Metadata",
1211
- )
1212
  thinking_overhead = gr.Number(
1213
- label="🔢 Tokenizer Overhead",
1214
  value=1.21,
1215
  precision=2,
1216
  info="Multiplier for Calculated tokens (tiktoken → native)",
1217
- visible=False,
1218
  )
1219
  use_cache = gr.Checkbox(
1220
  label="Use Cache",
1221
  value=True,
1222
  info="If disabled, all tokens are Uncached Input or Completion",
1223
- visible=False,
1224
  )
1225
 
1226
  gr.Markdown("---")
@@ -1282,37 +1292,40 @@ def build_app():
1282
  gr.Markdown("### 🎯 Router Strategy")
1283
 
1284
  selected_strategy = gr.Radio(
1285
- choices=["Random weights", "Every k-th step", "Replace part of trajectory"],
1286
- value="Random weights",
1287
  label="Strategy",
1288
  interactive=True,
1289
  )
1290
-
1291
- random_hint = gr.Markdown("*Weights must sum to 1.0*", visible=True)
1292
- weight_base = gr.Number(label="Base weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True, visible=True)
1293
- weight_model_1 = gr.Number(label="Model 1 weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True, visible=True)
1294
- weight_model_2 = gr.Number(label="Model 2 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
1295
- weight_model_3 = gr.Number(label="Model 3 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
1296
-
1297
- every_k_hint = gr.Markdown("*First model has priority on overlaps*", visible=False)
1298
- k_model_1 = gr.Number(label="k₁ (Model 1)", value=2, minimum=1, precision=0, interactive=True, visible=False)
1299
- k_model_2 = gr.Number(label="k₂ (Model 2)", value=3, minimum=1, precision=0, interactive=True, visible=False)
1300
- k_model_3 = gr.Number(label="k₃ (Model 3)", value=5, minimum=1, precision=0, interactive=True, visible=False)
1301
-
1302
- part_hint = gr.Markdown("*Ranges must not overlap*", visible=False)
1303
- part_mode = gr.Radio(
1304
- choices=["Indexes", "Percentages"],
1305
- value="Percentages",
1306
- label="Mode",
1307
- interactive=True,
1308
- visible=False,
1309
- )
1310
- start_1 = gr.Number(label="M1 Start", value=0, minimum=0, precision=0, interactive=True, visible=False)
1311
- end_1 = gr.Number(label="M1 End", value=30, minimum=0, precision=0, interactive=True, visible=False)
1312
- start_2 = gr.Number(label="M2 Start", value=30, minimum=0, precision=0, interactive=True, visible=False)
1313
- end_2 = gr.Number(label="M2 End", value=60, minimum=0, precision=0, interactive=True, visible=False)
1314
- start_3 = gr.Number(label="M3 Start", value=60, minimum=0, precision=0, interactive=True, visible=False)
1315
- end_3 = gr.Number(label="M3 End", value=100, minimum=0, precision=0, interactive=True, visible=False)
 
 
 
1316
 
1317
  gr.Markdown("---")
1318
  route_btn = gr.Button("🚀 Let's ROUTE!!", variant="primary", size="lg", interactive=False)
@@ -1327,30 +1340,43 @@ def build_app():
1327
  outputs=[routing_section],
1328
  )
1329
 
1330
- def on_strategy_change(strategy):
1331
- is_random = strategy == "Random weights"
1332
- is_every_k = strategy == "Every k-th step"
1333
- is_part = strategy == "Replace part of trajectory"
1334
- print(f"DEBUG on_strategy_change: strategy={strategy}")
1335
- return (
1336
- gr.update(visible=is_random),
1337
- gr.update(visible=is_random),
1338
- gr.update(visible=is_random),
1339
- gr.update(visible=is_every_k),
1340
- gr.update(visible=is_every_k),
1341
- gr.update(visible=is_part),
1342
- gr.update(visible=is_part),
1343
- gr.update(visible=is_part),
1344
- gr.update(visible=is_part),
1345
- )
 
 
 
 
 
 
 
 
 
 
 
 
1346
 
1347
  selected_strategy.change(
1348
  fn=on_strategy_change,
1349
- inputs=[selected_strategy],
1350
  outputs=[
1351
- random_hint, weight_base, weight_model_1,
1352
- every_k_hint, k_model_1,
1353
- part_hint, part_mode, start_1, end_1,
 
1354
  ],
1355
  )
1356
 
@@ -1425,22 +1451,23 @@ def build_app():
1425
  )
1426
 
1427
  def show_model_2(strategy):
1428
- is_random = strategy == "Random weights"
1429
  is_every_k = strategy == "Every k-th step"
1430
  is_part = strategy == "Replace part of trajectory"
1431
  return (
1432
- gr.update(visible=True),
1433
- gr.update(visible=False),
1434
- gr.update(visible=is_random),
1435
- gr.update(visible=is_every_k),
1436
- gr.update(visible=is_part),
1437
- gr.update(visible=is_part),
 
1438
  )
1439
 
1440
  add_model_2_btn.click(
1441
  fn=show_model_2,
1442
  inputs=[selected_strategy],
1443
- outputs=[routing_block_2, add_model_2_btn, weight_model_2, k_model_2, start_2, end_2],
1444
  )
1445
 
1446
  routing_model_2.change(
@@ -1450,22 +1477,23 @@ def build_app():
1450
  )
1451
 
1452
  def show_model_3(strategy):
1453
- is_random = strategy == "Random weights"
1454
  is_every_k = strategy == "Every k-th step"
1455
  is_part = strategy == "Replace part of trajectory"
1456
  return (
1457
- gr.update(visible=True),
1458
- gr.update(visible=False),
1459
- gr.update(visible=is_random),
1460
- gr.update(visible=is_every_k),
1461
- gr.update(visible=is_part),
1462
- gr.update(visible=is_part),
 
1463
  )
1464
 
1465
  add_model_3_btn.click(
1466
  fn=show_model_3,
1467
  inputs=[selected_strategy],
1468
- outputs=[routing_block_3, add_model_3_btn, weight_model_3, k_model_3, start_3, end_3],
1469
  )
1470
 
1471
  routing_model_3.change(
@@ -1484,7 +1512,7 @@ def build_app():
1484
  weight_base_val, weight_1_val, weight_2_val, weight_3_val,
1485
  k_1_val, k_2_val, k_3_val,
1486
  part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
1487
- source, overhead, with_cache
1488
  ):
1489
  if state_data is None:
1490
  yield (
@@ -1571,7 +1599,7 @@ def build_app():
1571
  return
1572
 
1573
  weights = None
1574
- if strategy_val == "Random weights":
1575
  weights = [weight_base_val, weight_1_val]
1576
  if len(routing_models) > 1:
1577
  weights.append(weight_2_val)
@@ -1599,7 +1627,7 @@ def build_app():
1599
 
1600
  step_to_model = {}
1601
 
1602
- if strategy_val == "Random weights":
1603
  model_choices = [BASE_MODEL] + [f"__routing_{j}__" for j in range(len(routing_models))]
1604
  for i in range(total_steps):
1605
  step_to_model[i] = random.choices(model_choices, weights=weights)[0]
@@ -1629,7 +1657,7 @@ def build_app():
1629
  modified_steps.append({
1630
  "model": model,
1631
  "system_user": step.get("system_user", 0),
1632
- "completion": int(step.get("completion", 0) * (overhead if source == "Calculated" else 1)),
1633
  "observation": step.get("observation"),
1634
  })
1635
 
@@ -1647,7 +1675,7 @@ def build_app():
1647
  original_steps.append({
1648
  "model": BASE_MODEL,
1649
  "system_user": step.get("system_user", 0),
1650
- "completion": int(step.get("completion", 0) * (overhead if source == "Calculated" else 1)),
1651
  "observation": step.get("observation"),
1652
  })
1653
  original_totals = calculate_routing_tokens(original_steps)
@@ -1738,32 +1766,24 @@ def build_app():
1738
  weight_base, weight_model_1, weight_model_2, weight_model_3,
1739
  k_model_1, k_model_2, k_model_3,
1740
  part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
1741
- token_source, thinking_overhead, use_cache,
1742
  ],
1743
  outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
1744
  )
1745
 
1746
- def update_calculated_options_visibility(source):
1747
- is_calc = source == "Calculated"
1748
- return gr.update(visible=is_calc), gr.update(visible=is_calc)
1749
-
1750
- token_source.change(
1751
- fn=update_calculated_options_visibility,
1752
- inputs=[token_source],
1753
- outputs=[thinking_overhead, use_cache],
1754
- )
1755
-
1756
  leaderboard_table.select(
1757
  fn=on_row_select,
1758
  inputs=[leaderboard_table],
1759
  outputs=[selected_folder, selected_name, analyze_btn, price_input, price_cache_read, price_cache_creation, price_completion, detected_model, thinking_overhead],
1760
  )
1761
 
1762
- def load_and_analyze(folder, input_price, cache_read_price, cache_creation_price, completion_price, source, overhead, with_cache, progress=gr.Progress()):
1763
  empty_result = (
1764
  "",
1765
  gr.update(visible=False),
1766
- None, None, None, None, None, None,
 
 
1767
  None,
1768
  gr.update(visible=False),
1769
  )
@@ -1776,7 +1796,9 @@ def build_app():
1776
  yield (
1777
  "⏳ Downloading trajectories...",
1778
  gr.update(visible=False),
1779
- None, None, None, None, None, None,
 
 
1780
  None,
1781
  gr.update(visible=False),
1782
  )
@@ -1785,7 +1807,9 @@ def build_app():
1785
  yield (
1786
  status,
1787
  gr.update(visible=False),
1788
- None, None, None, None, None, None,
 
 
1789
  None,
1790
  gr.update(visible=False),
1791
  )
@@ -1794,7 +1818,9 @@ def build_app():
1794
  yield (
1795
  "⏳ Loading trajectories...",
1796
  gr.update(visible=True),
1797
- None, None, None, None, None, None,
 
 
1798
  None,
1799
  gr.update(visible=False),
1800
  )
@@ -1807,115 +1833,129 @@ def build_app():
1807
 
1808
  state_data = {"meta": df_meta, "calculated": df_calc, "folder": folder, "steps": trajectory_steps}
1809
 
1810
- if source == "Metadata":
1811
- df = df_meta
1812
- else:
1813
- df = apply_thinking_overhead(df_calc.copy(), overhead)
1814
- if not with_cache:
1815
- df = apply_no_cache(df)
1816
-
1817
- if df.empty:
1818
  yield (
1819
  "❌ No trajectories found",
1820
  gr.update(visible=False),
1821
- None, None, None, None, None, None,
 
 
1822
  None,
1823
  gr.update(visible=False),
1824
  )
1825
  return
1826
 
1827
- fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked = create_basic_histograms(
1828
- df, input_price, cache_read_price, cache_creation_price, completion_price
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1829
  )
1830
- fig_cost_breakdown = create_cost_breakdown(df, input_price, cache_read_price, cache_creation_price, completion_price)
1831
 
1832
  yield (
1833
- f"✅ Loaded {len(df)} trajectories",
1834
  gr.update(visible=True),
1835
- fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked, fig_cost_breakdown,
 
 
1836
  state_data,
1837
  gr.update(visible=True),
1838
  )
1839
 
1840
  analyze_btn.click(
1841
  fn=load_and_analyze,
1842
- inputs=[selected_folder, price_input, price_cache_read, price_cache_creation, price_completion, token_source, thinking_overhead, use_cache],
1843
  outputs=[
1844
  download_status,
1845
  analysis_section,
1846
- plot_steps, plot_cost, plot_tokens, plot_tokens_cost, plot_stacked, plot_cost_breakdown,
 
 
1847
  trajectories_state,
1848
  add_routing_btn,
1849
  ],
1850
  )
1851
 
1852
- def recalculate_costs(state_data, input_price, cache_read_price, cache_creation_price, completion_price, source, overhead, with_cache):
1853
  if state_data is None:
1854
- return None, None
1855
 
1856
- if source == "Metadata":
1857
- df = state_data["meta"]
1858
- else:
1859
- df = apply_thinking_overhead(state_data["calculated"].copy(), overhead)
1860
- if not with_cache:
1861
- df = apply_no_cache(df)
1862
 
1863
- if df.empty:
1864
- return None, None
 
 
 
 
 
 
 
1865
 
1866
- fig_tokens_cost = create_cost_by_type_chart(df, input_price, cache_read_price, cache_creation_price, completion_price)
1867
- fig_cost_breakdown = create_cost_breakdown(df, input_price, cache_read_price, cache_creation_price, completion_price)
1868
- return fig_tokens_cost, fig_cost_breakdown
1869
 
1870
- price_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, token_source, thinking_overhead, use_cache]
1871
- price_outputs = [plot_tokens_cost, plot_cost_breakdown]
 
 
1872
 
1873
  price_input.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1874
  price_cache_read.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1875
  price_cache_creation.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1876
  price_completion.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1877
 
1878
- def on_source_change(state_data, input_price, cache_read_price, cache_creation_price, completion_price, source, overhead, with_cache):
1879
- """Recalculate only token-dependent charts when source changes"""
1880
  if state_data is None:
1881
  return None, None, None, None
1882
 
1883
- if source == "Metadata":
1884
- df = state_data["meta"]
1885
- else:
1886
- df = apply_thinking_overhead(state_data["calculated"].copy(), overhead)
1887
- if not with_cache:
1888
- df = apply_no_cache(df)
1889
-
1890
- if df.empty:
1891
  return None, None, None, None
1892
 
1893
- fig_tokens, fig_tokens_cost, fig_stacked = create_token_charts(
1894
- df, input_price, cache_read_price, cache_creation_price, completion_price
1895
- )
1896
- fig_cost_breakdown = create_cost_breakdown(df, input_price, cache_read_price, cache_creation_price, completion_price)
1897
 
1898
- return fig_tokens, fig_tokens_cost, fig_stacked, fig_cost_breakdown
 
 
 
 
 
1899
 
1900
- source_change_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, token_source, thinking_overhead, use_cache]
1901
- source_change_outputs = [plot_tokens, plot_tokens_cost, plot_stacked, plot_cost_breakdown]
1902
 
1903
- token_source.change(
1904
- fn=on_source_change,
1905
- inputs=source_change_inputs,
1906
- outputs=source_change_outputs,
1907
- )
1908
 
1909
  thinking_overhead.change(
1910
- fn=on_source_change,
1911
- inputs=source_change_inputs,
1912
- outputs=source_change_outputs,
1913
  )
1914
 
1915
  use_cache.change(
1916
- fn=on_source_change,
1917
- inputs=source_change_inputs,
1918
- outputs=source_change_outputs,
1919
  )
1920
 
1921
  return app
 
700
  xaxis_title="Trajectory (sorted by total tokens)",
701
  yaxis_title="Tokens (M)",
702
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
703
+ margin=dict(l=50, r=20, t=80, b=40),
704
  )
705
 
706
  return fig_tokens, fig_tokens_cost, fig_stacked
 
836
  xaxis_title="Trajectory (sorted by total tokens)",
837
  yaxis_title="Tokens (M)",
838
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
839
+ margin=dict(l=50, r=20, t=80, b=40),
840
  )
841
 
842
  return fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked
 
905
  xaxis_title="Trajectory (sorted by total tokens)",
906
  yaxis_title="Cost ($)",
907
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
908
+ margin=dict(l=50, r=20, t=80, b=40),
909
  )
910
 
911
  fig.add_annotation(
 
1155
  with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app:
1156
  trajectories_state = gr.State(None)
1157
 
1158
+ gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.9`")
1159
  gr.Markdown("Select a model to use as base for cost analysis")
1160
 
1161
  with gr.Row():
 
1170
  with gr.Column(visible=False) as analysis_section:
1171
  gr.Markdown("## 📊 Trajectory Analysis")
1172
 
1173
+ with gr.Accordion("Leaderboard data", open=True):
1174
+ with gr.Row():
1175
+ plot_steps = gr.Plot(label="API Calls Distribution")
1176
+ plot_cost = gr.Plot(label="Cost Distribution")
1177
+
1178
+ with gr.Accordion("Metadata from .traj", open=True):
1179
+ with gr.Row():
1180
+ plot_tokens_meta = gr.Plot(label="Token Usage by Type")
1181
+ plot_tokens_cost_meta = gr.Plot(label="Cost by Token Type")
1182
+
1183
+ with gr.Accordion("Metadata from .traj by trajectory", open=False):
1184
+ with gr.Row():
1185
+ plot_stacked_meta = gr.Plot(label="Tokens per Trajectory")
1186
+ with gr.Row():
1187
+ plot_cost_breakdown_meta = gr.Plot(label="Cost per Trajectory")
1188
+
1189
+ with gr.Accordion("Calculated from .traj messages", open=True):
1190
+ with gr.Row():
1191
+ plot_tokens_calc = gr.Plot(label="Token Usage by Type")
1192
+ plot_tokens_cost_calc = gr.Plot(label="Cost by Token Type")
1193
+
1194
+ with gr.Accordion("Calculated from .traj messages by trajectory", open=False):
1195
+ with gr.Row():
1196
+ plot_stacked_calc = gr.Plot(label="Tokens per Trajectory")
1197
+ with gr.Row():
1198
+ plot_cost_breakdown_calc = gr.Plot(label="Cost per Trajectory")
1199
+
1200
+ with gr.Accordion("Calculated with routing", open=False, visible=False) as routing_plots_row:
1201
+ with gr.Row():
1202
+ routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
1203
+ routing_cost_plot = gr.Plot(label="Cost by Type (per Model)")
1204
 
1205
  with gr.Column(scale=1):
1206
  selected_folder = gr.State("")
 
1220
  price_completion = gr.Number(label="Completion", value=0, precision=2, scale=1)
1221
 
1222
  gr.Markdown("---")
1223
+ gr.Markdown("### 🔢 Calculated Token Options")
 
 
 
 
1224
  thinking_overhead = gr.Number(
1225
+ label="Tokenizer Overhead",
1226
  value=1.21,
1227
  precision=2,
1228
  info="Multiplier for Calculated tokens (tiktoken → native)",
 
1229
  )
1230
  use_cache = gr.Checkbox(
1231
  label="Use Cache",
1232
  value=True,
1233
  info="If disabled, all tokens are Uncached Input or Completion",
 
1234
  )
1235
 
1236
  gr.Markdown("---")
 
1292
  gr.Markdown("### 🎯 Router Strategy")
1293
 
1294
  selected_strategy = gr.Radio(
1295
+ choices=["Random router", "Every k-th step", "Replace part of trajectory"],
1296
+ value="Random router",
1297
  label="Strategy",
1298
  interactive=True,
1299
  )
1300
+ num_routing_models = gr.State(1)
1301
+
1302
+ with gr.Column(visible=True) as random_block:
1303
+ random_hint = gr.Markdown("*Weights must sum to 1.0*")
1304
+ weight_base = gr.Number(label="Base weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True)
1305
+ weight_model_1 = gr.Number(label="Model 1 weight", value=0.5, minimum=0, maximum=1, precision=2, interactive=True)
1306
+ weight_model_2 = gr.Number(label="Model 2 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
1307
+ weight_model_3 = gr.Number(label="Model 3 weight", value=0, minimum=0, maximum=1, precision=2, interactive=True, visible=False)
1308
+
1309
+ with gr.Column(visible=False) as every_k_block:
1310
+ every_k_hint = gr.Markdown("*First model has priority on overlaps*")
1311
+ k_model_1 = gr.Number(label="k₁ (Model 1)", value=2, minimum=1, precision=0, interactive=True)
1312
+ k_model_2 = gr.Number(label="k₂ (Model 2)", value=3, minimum=1, precision=0, interactive=True, visible=False)
1313
+ k_model_3 = gr.Number(label="k₃ (Model 3)", value=5, minimum=1, precision=0, interactive=True, visible=False)
1314
+
1315
+ with gr.Column(visible=False) as part_block:
1316
+ part_hint = gr.Markdown("*Ranges must not overlap*")
1317
+ part_mode = gr.Radio(
1318
+ choices=["Indexes", "Percentages"],
1319
+ value="Percentages",
1320
+ label="Mode",
1321
+ interactive=True,
1322
+ )
1323
+ start_1 = gr.Number(label="M1 Start", value=0, minimum=0, precision=0, interactive=True)
1324
+ end_1 = gr.Number(label="M1 End", value=30, minimum=0, precision=0, interactive=True)
1325
+ start_2 = gr.Number(label="M2 Start", value=30, minimum=0, precision=0, interactive=True, visible=False)
1326
+ end_2 = gr.Number(label="M2 End", value=60, minimum=0, precision=0, interactive=True, visible=False)
1327
+ start_3 = gr.Number(label="M3 Start", value=60, minimum=0, precision=0, interactive=True, visible=False)
1328
+ end_3 = gr.Number(label="M3 End", value=100, minimum=0, precision=0, interactive=True, visible=False)
1329
 
1330
  gr.Markdown("---")
1331
  route_btn = gr.Button("🚀 Let's ROUTE!!", variant="primary", size="lg", interactive=False)
 
1340
  outputs=[routing_section],
1341
  )
1342
 
1343
+ def on_strategy_change(strategy, num_models):
1344
+ show_random = strategy == "Random router"
1345
+ show_every_k = strategy == "Every k-th step"
1346
+ show_part = strategy == "Replace part of trajectory"
1347
+ has_m2 = num_models >= 2
1348
+ has_m3 = num_models >= 3
1349
+ return [
1350
+ gr.update(visible=show_random), # random_block
1351
+ gr.update(visible=show_every_k), # every_k_block
1352
+ gr.update(visible=show_part), # part_block
1353
+ gr.update(visible=show_random), # random_hint
1354
+ gr.update(visible=show_random), # weight_base
1355
+ gr.update(visible=show_random), # weight_model_1
1356
+ gr.update(visible=show_random and has_m2), # weight_model_2
1357
+ gr.update(visible=show_random and has_m3), # weight_model_3
1358
+ gr.update(visible=show_every_k), # every_k_hint
1359
+ gr.update(visible=show_every_k), # k_model_1
1360
+ gr.update(visible=show_every_k and has_m2), # k_model_2
1361
+ gr.update(visible=show_every_k and has_m3), # k_model_3
1362
+ gr.update(visible=show_part), # part_hint
1363
+ gr.update(visible=show_part), # part_mode
1364
+ gr.update(visible=show_part), # start_1
1365
+ gr.update(visible=show_part), # end_1
1366
+ gr.update(visible=show_part and has_m2), # start_2
1367
+ gr.update(visible=show_part and has_m2), # end_2
1368
+ gr.update(visible=show_part and has_m3), # start_3
1369
+ gr.update(visible=show_part and has_m3), # end_3
1370
+ ]
1371
 
1372
  selected_strategy.change(
1373
  fn=on_strategy_change,
1374
+ inputs=[selected_strategy, num_routing_models],
1375
  outputs=[
1376
+ random_block, every_k_block, part_block,
1377
+ random_hint, weight_base, weight_model_1, weight_model_2, weight_model_3,
1378
+ every_k_hint, k_model_1, k_model_2, k_model_3,
1379
+ part_hint, part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
1380
  ],
1381
  )
1382
 
 
1451
  )
1452
 
1453
  def show_model_2(strategy):
1454
+ is_random = strategy == "Random router"
1455
  is_every_k = strategy == "Every k-th step"
1456
  is_part = strategy == "Replace part of trajectory"
1457
  return (
1458
+ gr.update(visible=True), # show block 2
1459
+ gr.update(visible=False), # hide add button
1460
+ gr.update(visible=is_random), # weight2
1461
+ gr.update(visible=is_every_k), # k2
1462
+ gr.update(visible=is_part), # start2
1463
+ gr.update(visible=is_part), # end2
1464
+ 2,
1465
  )
1466
 
1467
  add_model_2_btn.click(
1468
  fn=show_model_2,
1469
  inputs=[selected_strategy],
1470
+ outputs=[routing_block_2, add_model_2_btn, weight_model_2, k_model_2, start_2, end_2, num_routing_models],
1471
  )
1472
 
1473
  routing_model_2.change(
 
1477
  )
1478
 
1479
  def show_model_3(strategy):
1480
+ is_random = strategy == "Random router"
1481
  is_every_k = strategy == "Every k-th step"
1482
  is_part = strategy == "Replace part of trajectory"
1483
  return (
1484
+ gr.update(visible=True), # show block 3
1485
+ gr.update(visible=False), # hide add button
1486
+ gr.update(visible=is_random), # weight3
1487
+ gr.update(visible=is_every_k), # k3
1488
+ gr.update(visible=is_part), # start3
1489
+ gr.update(visible=is_part), # end3
1490
+ 3,
1491
  )
1492
 
1493
  add_model_3_btn.click(
1494
  fn=show_model_3,
1495
  inputs=[selected_strategy],
1496
+ outputs=[routing_block_3, add_model_3_btn, weight_model_3, k_model_3, start_3, end_3, num_routing_models],
1497
  )
1498
 
1499
  routing_model_3.change(
 
1512
  weight_base_val, weight_1_val, weight_2_val, weight_3_val,
1513
  k_1_val, k_2_val, k_3_val,
1514
  part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
1515
+ overhead, with_cache
1516
  ):
1517
  if state_data is None:
1518
  yield (
 
1599
  return
1600
 
1601
  weights = None
1602
+ if strategy_val == "Random router":
1603
  weights = [weight_base_val, weight_1_val]
1604
  if len(routing_models) > 1:
1605
  weights.append(weight_2_val)
 
1627
 
1628
  step_to_model = {}
1629
 
1630
+ if strategy_val == "Random router":
1631
  model_choices = [BASE_MODEL] + [f"__routing_{j}__" for j in range(len(routing_models))]
1632
  for i in range(total_steps):
1633
  step_to_model[i] = random.choices(model_choices, weights=weights)[0]
 
1657
  modified_steps.append({
1658
  "model": model,
1659
  "system_user": step.get("system_user", 0),
1660
+ "completion": int(step.get("completion", 0) * overhead),
1661
  "observation": step.get("observation"),
1662
  })
1663
 
 
1675
  original_steps.append({
1676
  "model": BASE_MODEL,
1677
  "system_user": step.get("system_user", 0),
1678
+ "completion": int(step.get("completion", 0) * overhead),
1679
  "observation": step.get("observation"),
1680
  })
1681
  original_totals = calculate_routing_tokens(original_steps)
 
1766
  weight_base, weight_model_1, weight_model_2, weight_model_3,
1767
  k_model_1, k_model_2, k_model_3,
1768
  part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
1769
+ thinking_overhead, use_cache,
1770
  ],
1771
  outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
1772
  )
1773
 
 
 
 
 
 
 
 
 
 
 
1774
  leaderboard_table.select(
1775
  fn=on_row_select,
1776
  inputs=[leaderboard_table],
1777
  outputs=[selected_folder, selected_name, analyze_btn, price_input, price_cache_read, price_cache_creation, price_completion, detected_model, thinking_overhead],
1778
  )
1779
 
1780
+ def load_and_analyze(folder, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache, progress=gr.Progress()):
1781
  empty_result = (
1782
  "",
1783
  gr.update(visible=False),
1784
+ None, None,
1785
+ None, None, None, None,
1786
+ None, None, None, None,
1787
  None,
1788
  gr.update(visible=False),
1789
  )
 
1796
  yield (
1797
  "⏳ Downloading trajectories...",
1798
  gr.update(visible=False),
1799
+ None, None,
1800
+ None, None, None, None,
1801
+ None, None, None, None,
1802
  None,
1803
  gr.update(visible=False),
1804
  )
 
1807
  yield (
1808
  status,
1809
  gr.update(visible=False),
1810
+ None, None,
1811
+ None, None, None, None,
1812
+ None, None, None, None,
1813
  None,
1814
  gr.update(visible=False),
1815
  )
 
1818
  yield (
1819
  "⏳ Loading trajectories...",
1820
  gr.update(visible=True),
1821
+ None, None,
1822
+ None, None, None, None,
1823
+ None, None, None, None,
1824
  None,
1825
  gr.update(visible=False),
1826
  )
 
1833
 
1834
  state_data = {"meta": df_meta, "calculated": df_calc, "folder": folder, "steps": trajectory_steps}
1835
 
1836
+ if df_meta.empty:
 
 
 
 
 
 
 
1837
  yield (
1838
  "❌ No trajectories found",
1839
  gr.update(visible=False),
1840
+ None, None,
1841
+ None, None, None, None,
1842
+ None, None, None, None,
1843
  None,
1844
  gr.update(visible=False),
1845
  )
1846
  return
1847
 
1848
+ fig_steps, fig_cost, _, _, _ = create_basic_histograms(
1849
+ df_meta, input_price, cache_read_price, cache_creation_price, completion_price
1850
+ )
1851
+
1852
+ fig_tokens_meta, fig_tokens_cost_meta, fig_stacked_meta = create_token_charts(
1853
+ df_meta, input_price, cache_read_price, cache_creation_price, completion_price
1854
+ )
1855
+ fig_cost_breakdown_meta = create_cost_breakdown(
1856
+ df_meta, input_price, cache_read_price, cache_creation_price, completion_price
1857
+ )
1858
+
1859
+ df_calc_processed = apply_thinking_overhead(df_calc.copy(), overhead)
1860
+ if not with_cache:
1861
+ df_calc_processed = apply_no_cache(df_calc_processed)
1862
+
1863
+ fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc = create_token_charts(
1864
+ df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
1865
+ )
1866
+ fig_cost_breakdown_calc = create_cost_breakdown(
1867
+ df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
1868
  )
 
1869
 
1870
  yield (
1871
+ f"✅ Loaded {len(df_meta)} trajectories",
1872
  gr.update(visible=True),
1873
+ fig_steps, fig_cost,
1874
+ fig_tokens_meta, fig_tokens_cost_meta, fig_stacked_meta, fig_cost_breakdown_meta,
1875
+ fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc, fig_cost_breakdown_calc,
1876
  state_data,
1877
  gr.update(visible=True),
1878
  )
1879
 
1880
  analyze_btn.click(
1881
  fn=load_and_analyze,
1882
+ inputs=[selected_folder, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache],
1883
  outputs=[
1884
  download_status,
1885
  analysis_section,
1886
+ plot_steps, plot_cost,
1887
+ plot_tokens_meta, plot_tokens_cost_meta, plot_stacked_meta, plot_cost_breakdown_meta,
1888
+ plot_tokens_calc, plot_tokens_cost_calc, plot_stacked_calc, plot_cost_breakdown_calc,
1889
  trajectories_state,
1890
  add_routing_btn,
1891
  ],
1892
  )
1893
 
1894
+ def recalculate_costs(state_data, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
1895
  if state_data is None:
1896
+ return None, None, None, None
1897
 
1898
+ df_meta = state_data["meta"]
1899
+ df_calc = state_data["calculated"]
 
 
 
 
1900
 
1901
+ if df_meta.empty:
1902
+ return None, None, None, None
1903
+
1904
+ fig_tokens_cost_meta = create_cost_by_type_chart(df_meta, input_price, cache_read_price, cache_creation_price, completion_price)
1905
+ fig_cost_breakdown_meta = create_cost_breakdown(df_meta, input_price, cache_read_price, cache_creation_price, completion_price)
1906
+
1907
+ df_calc_processed = apply_thinking_overhead(df_calc.copy(), overhead)
1908
+ if not with_cache:
1909
+ df_calc_processed = apply_no_cache(df_calc_processed)
1910
 
1911
+ fig_tokens_cost_calc = create_cost_by_type_chart(df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price)
1912
+ fig_cost_breakdown_calc = create_cost_breakdown(df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price)
 
1913
 
1914
+ return fig_tokens_cost_meta, fig_cost_breakdown_meta, fig_tokens_cost_calc, fig_cost_breakdown_calc
1915
+
1916
+ price_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
1917
+ price_outputs = [plot_tokens_cost_meta, plot_cost_breakdown_meta, plot_tokens_cost_calc, plot_cost_breakdown_calc]
1918
 
1919
  price_input.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1920
  price_cache_read.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1921
  price_cache_creation.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1922
  price_completion.change(fn=recalculate_costs, inputs=price_inputs, outputs=price_outputs)
1923
 
1924
+ def on_calc_options_change(state_data, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
1925
+ """Recalculate only calculated charts when overhead or cache options change"""
1926
  if state_data is None:
1927
  return None, None, None, None
1928
 
1929
+ df_calc = state_data["calculated"]
1930
+ if df_calc.empty:
 
 
 
 
 
 
1931
  return None, None, None, None
1932
 
1933
+ df_calc_processed = apply_thinking_overhead(df_calc.copy(), overhead)
1934
+ if not with_cache:
1935
+ df_calc_processed = apply_no_cache(df_calc_processed)
 
1936
 
1937
+ fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc = create_token_charts(
1938
+ df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
1939
+ )
1940
+ fig_cost_breakdown_calc = create_cost_breakdown(
1941
+ df_calc_processed, input_price, cache_read_price, cache_creation_price, completion_price
1942
+ )
1943
 
1944
+ return fig_tokens_calc, fig_tokens_cost_calc, fig_stacked_calc, fig_cost_breakdown_calc
 
1945
 
1946
+ calc_options_inputs = [trajectories_state, price_input, price_cache_read, price_cache_creation, price_completion, thinking_overhead, use_cache]
1947
+ calc_options_outputs = [plot_tokens_calc, plot_tokens_cost_calc, plot_stacked_calc, plot_cost_breakdown_calc]
 
 
 
1948
 
1949
  thinking_overhead.change(
1950
+ fn=on_calc_options_change,
1951
+ inputs=calc_options_inputs,
1952
+ outputs=calc_options_outputs,
1953
  )
1954
 
1955
  use_cache.change(
1956
+ fn=on_calc_options_change,
1957
+ inputs=calc_options_inputs,
1958
+ outputs=calc_options_outputs,
1959
  )
1960
 
1961
  return app