IgorSlinko commited on
Commit
c89587b
·
1 Parent(s): 33c5576

Improve routing charts with comparison view (v0.3.30)

Browse files

- Add side-by-side comparison: [no routing] vs [with routing] bars
- Use hatched pattern for [no routing] bars to distinguish from routed
- Fix data source for [no routing] to match "Calculated from .traj" values
- Add base model name to legend items
- Improve legend order and styling
- Add gap between bar groups for better readability
- Keep original color palette for Metadata/Calculated charts

Files changed (1) hide show
  1. app.py +99 -51
app.py CHANGED
@@ -742,7 +742,6 @@ def create_cost_by_type_chart(df: pd.DataFrame, input_price: float, cache_read_p
742
  cost_data,
743
  x="Token Type",
744
  y="Cost ($)",
745
- title="",
746
  color="Token Type",
747
  color_discrete_sequence=["#EF553B", "#19D3F3", "#FFA15A", "#AB63FA"],
748
  )
@@ -1162,58 +1161,74 @@ def select_first_row(df: pd.DataFrame):
1162
  return _build_selection_payload(default_idx, df)
1163
 
1164
 
1165
- def create_routed_token_chart(base_tokens: dict, additional_models: list):
1166
  """
1167
- Create stacked bar chart for tokens by type, comparing base vs additional models.
1168
- X-axis: token types, bars stacked by model.
1169
 
1170
  Args:
1171
- base_tokens: dict with uncached_input, cache_read, cache_creation, completion
 
1172
  additional_models: list of (model_name, tokens_dict) tuples
 
1173
  """
1174
  import plotly.graph_objects as go
1175
 
1176
  categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
1177
  token_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
1178
- colors = ["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
 
 
1179
 
1180
  fig = go.Figure()
1181
 
1182
- base_total = sum(base_tokens.get(k, 0) for k in token_keys)
1183
- base_values = [base_tokens.get(k, 0) / 1e6 for k in token_keys]
1184
  fig.add_trace(go.Bar(
1185
- name="Base Model",
1186
  x=categories,
1187
- y=base_values,
1188
- marker_color=colors[0],
1189
- hovertemplate="%{x}<br>Base Model: %{y:.3f}M<extra></extra>",
 
 
 
 
 
1190
  ))
1191
 
1192
- model_totals = [("Base Model", base_total)]
 
 
 
 
 
 
 
1193
 
1194
  for i, (model_name, tokens) in enumerate(additional_models):
1195
- model_total = sum(tokens.get(k, 0) for k in token_keys)
1196
- model_totals.append((model_name or f"Model {i+1}", model_total))
1197
- values = [tokens.get(k, 0) / 1e6 for k in token_keys]
1198
- color = colors[(i + 1) % len(colors)]
1199
  fig.add_trace(go.Bar(
1200
  name=model_name or f"Model {i+1}",
1201
  x=categories,
1202
- y=values,
1203
- marker_color=color,
 
1204
  hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": %{y:.3f}M<extra></extra>",
1205
  ))
1206
 
1207
- grand_total = sum(t for _, t in model_totals)
1208
- annotation_lines = [f"<b>Total: {grand_total/1e6:.2f}M</b>"]
1209
- for name, total in model_totals:
1210
- annotation_lines.append(f"{name}: {total/1e6:.2f}M")
 
 
 
 
 
1211
 
1212
  fig.update_layout(
1213
  yaxis_title="Tokens (M)",
1214
  barmode="stack",
 
1215
  margin=dict(l=40, r=40, t=40, b=40),
1216
- legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1217
  )
1218
  fig.add_annotation(
1219
  text="<br>".join(annotation_lines),
@@ -1228,58 +1243,74 @@ def create_routed_token_chart(base_tokens: dict, additional_models: list):
1228
  return fig
1229
 
1230
 
1231
- def create_routed_cost_chart(base_costs: dict, additional_models: list):
1232
  """
1233
- Create stacked bar chart for costs by type, comparing base vs additional models.
1234
- X-axis: cost types, bars stacked by model.
1235
 
1236
  Args:
1237
- base_costs: dict with uncached_input, cache_read, cache_creation, completion
 
1238
  additional_models: list of (model_name, costs_dict) tuples
 
1239
  """
1240
  import plotly.graph_objects as go
1241
 
1242
  categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
1243
  cost_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
1244
- colors = ["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
 
 
1245
 
1246
  fig = go.Figure()
1247
 
1248
- base_total = sum(base_costs.get(k, 0) for k in cost_keys)
1249
- base_values = [base_costs.get(k, 0) for k in cost_keys]
1250
  fig.add_trace(go.Bar(
1251
- name="Base Model",
1252
  x=categories,
1253
- y=base_values,
1254
- marker_color=colors[0],
1255
- hovertemplate="%{x}<br>Base Model: $%{y:.2f}<extra></extra>",
 
 
 
 
 
1256
  ))
1257
 
1258
- model_totals = [("Base Model", base_total)]
 
 
 
 
 
 
 
1259
 
1260
  for i, (model_name, costs) in enumerate(additional_models):
1261
- model_total = sum(costs.get(k, 0) for k in cost_keys)
1262
- model_totals.append((model_name or f"Model {i+1}", model_total))
1263
- values = [costs.get(k, 0) for k in cost_keys]
1264
- color = colors[(i + 1) % len(colors)]
1265
  fig.add_trace(go.Bar(
1266
  name=model_name or f"Model {i+1}",
1267
  x=categories,
1268
- y=values,
1269
- marker_color=color,
 
1270
  hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": $%{y:.2f}<extra></extra>",
1271
  ))
1272
 
1273
- grand_total = sum(t for _, t in model_totals)
1274
- annotation_lines = [f"<b>Total: ${grand_total:.2f}</b>"]
1275
- for name, total in model_totals:
1276
- annotation_lines.append(f"{name}: ${total:.2f}")
 
 
 
 
 
1277
 
1278
  fig.update_layout(
1279
  yaxis_title="Cost ($)",
1280
  barmode="stack",
 
1281
  margin=dict(l=40, r=40, t=40, b=40),
1282
- legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1283
  )
1284
  fig.add_annotation(
1285
  text="<br>".join(annotation_lines),
@@ -1322,7 +1353,7 @@ def build_app():
1322
  """)
1323
  trajectories_state = gr.State(None)
1324
 
1325
- gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.22`")
1326
  gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
1327
 
1328
  with gr.Row():
@@ -1806,7 +1837,8 @@ def build_app():
1806
  grep_1_val, grep_2_val, grep_3_val,
1807
  resolved_model_val, unresolved_model_val,
1808
  part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
1809
- overhead, with_cache
 
1810
  ):
1811
  if state_data is None:
1812
  yield (
@@ -2103,6 +2135,20 @@ def build_app():
2103
  additional_token_models = [(rc["name"], rc["tokens"]) for rc in routing_costs_list]
2104
  additional_cost_models = [(rc["name"], rc["costs"]) for rc in routing_costs_list]
2105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2106
  yield (
2107
  gr.update(visible=True, value="⏳ Creating charts..."),
2108
  gr.update(visible=True),
@@ -2110,8 +2156,9 @@ def build_app():
2110
  None,
2111
  )
2112
 
2113
- tokens_chart = create_routed_token_chart(total_base_tokens, additional_token_models)
2114
- cost_chart = create_routed_cost_chart(base_costs, additional_cost_models)
 
2115
 
2116
  yield (
2117
  gr.update(visible=True, value=result_text),
@@ -2136,6 +2183,7 @@ def build_app():
2136
  resolved_model, unresolved_model,
2137
  part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
2138
  thinking_overhead, use_cache,
 
2139
  ],
2140
  outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
2141
  )
 
742
  cost_data,
743
  x="Token Type",
744
  y="Cost ($)",
 
745
  color="Token Type",
746
  color_discrete_sequence=["#EF553B", "#19D3F3", "#FFA15A", "#AB63FA"],
747
  )
 
1161
  return _build_selection_payload(default_idx, df)
1162
 
1163
 
1164
+ def create_routed_token_chart(original_tokens: dict, base_tokens: dict, additional_models: list, base_model_name: str = "Base"):
1165
  """
1166
+ Create grouped+stacked bar chart comparing Calculated vs Routed tokens.
 
1167
 
1168
  Args:
1169
+ original_tokens: dict with uncached_input, cache_read, cache_creation, completion (from Calculated)
1170
+ base_tokens: dict with uncached_input, cache_read, cache_creation, completion (base portion in routing)
1171
  additional_models: list of (model_name, tokens_dict) tuples
1172
+ base_model_name: name of the base model
1173
  """
1174
  import plotly.graph_objects as go
1175
 
1176
  categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
1177
  token_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
1178
+ base_color_dark = "#636EFA"
1179
+ base_color_light = "#A0C4FF"
1180
+ model_colors = ["#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
1181
 
1182
  fig = go.Figure()
1183
 
 
 
1184
  fig.add_trace(go.Bar(
1185
+ name=f"{base_model_name} [no routing]",
1186
  x=categories,
1187
+ y=[original_tokens.get(k, 0) / 1e6 for k in token_keys],
1188
+ marker_color="rgba(99, 110, 250, 0.3)",
1189
+ marker_line_color=base_color_dark,
1190
+ marker_line_width=1,
1191
+ marker_pattern_shape="/",
1192
+ marker_pattern_fgcolor=base_color_dark,
1193
+ offsetgroup="calculated",
1194
+ hovertemplate="%{x}<br>" + base_model_name + " [no routing]: %{y:.3f}M<extra></extra>",
1195
  ))
1196
 
1197
+ fig.add_trace(go.Bar(
1198
+ name=f"{base_model_name} [with routing]",
1199
+ x=categories,
1200
+ y=[base_tokens.get(k, 0) / 1e6 for k in token_keys],
1201
+ marker_color=base_color_dark,
1202
+ offsetgroup="routed",
1203
+ hovertemplate="%{x}<br>" + base_model_name + " [with routing]: %{y:.3f}M<extra></extra>",
1204
+ ))
1205
 
1206
  for i, (model_name, tokens) in enumerate(additional_models):
 
 
 
 
1207
  fig.add_trace(go.Bar(
1208
  name=model_name or f"Model {i+1}",
1209
  x=categories,
1210
+ y=[tokens.get(k, 0) / 1e6 for k in token_keys],
1211
+ marker_color=model_colors[i % len(model_colors)],
1212
+ offsetgroup="routed",
1213
  hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": %{y:.3f}M<extra></extra>",
1214
  ))
1215
 
1216
+ original_total = sum(original_tokens.get(k, 0) for k in token_keys)
1217
+ routed_total = sum(base_tokens.get(k, 0) for k in token_keys) + sum(
1218
+ sum(m[1].get(k, 0) for k in token_keys) for m in additional_models
1219
+ )
1220
+
1221
+ annotation_lines = [
1222
+ f"<b>No routing: {original_total/1e6:.2f}M</b>",
1223
+ f"<b>With routing: {routed_total/1e6:.2f}M</b>",
1224
+ ]
1225
 
1226
  fig.update_layout(
1227
  yaxis_title="Tokens (M)",
1228
  barmode="stack",
1229
+ bargroupgap=0.1,
1230
  margin=dict(l=40, r=40, t=40, b=40),
1231
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, traceorder="normal"),
1232
  )
1233
  fig.add_annotation(
1234
  text="<br>".join(annotation_lines),
 
1243
  return fig
1244
 
1245
 
1246
+ def create_routed_cost_chart(original_costs: dict, base_costs: dict, additional_models: list, base_model_name: str = "Base"):
1247
  """
1248
+ Create grouped+stacked bar chart comparing Calculated vs Routed costs.
 
1249
 
1250
  Args:
1251
+ original_costs: dict with uncached_input, cache_read, cache_creation, completion (from Calculated)
1252
+ base_costs: dict with uncached_input, cache_read, cache_creation, completion (base portion in routing)
1253
  additional_models: list of (model_name, costs_dict) tuples
1254
+ base_model_name: name of the base model
1255
  """
1256
  import plotly.graph_objects as go
1257
 
1258
  categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
1259
  cost_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
1260
+ base_color_dark = "#636EFA"
1261
+ base_color_light = "#A0C4FF"
1262
+ model_colors = ["#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
1263
 
1264
  fig = go.Figure()
1265
 
 
 
1266
  fig.add_trace(go.Bar(
1267
+ name=f"{base_model_name} [no routing]",
1268
  x=categories,
1269
+ y=[original_costs.get(k, 0) for k in cost_keys],
1270
+ marker_color="rgba(99, 110, 250, 0.3)",
1271
+ marker_line_color=base_color_dark,
1272
+ marker_line_width=1,
1273
+ marker_pattern_shape="/",
1274
+ marker_pattern_fgcolor=base_color_dark,
1275
+ offsetgroup="calculated",
1276
+ hovertemplate="%{x}<br>" + base_model_name + " [no routing]: $%{y:.2f}<extra></extra>",
1277
  ))
1278
 
1279
+ fig.add_trace(go.Bar(
1280
+ name=f"{base_model_name} [with routing]",
1281
+ x=categories,
1282
+ y=[base_costs.get(k, 0) for k in cost_keys],
1283
+ marker_color=base_color_dark,
1284
+ offsetgroup="routed",
1285
+ hovertemplate="%{x}<br>" + base_model_name + " [with routing]: $%{y:.2f}<extra></extra>",
1286
+ ))
1287
 
1288
  for i, (model_name, costs) in enumerate(additional_models):
 
 
 
 
1289
  fig.add_trace(go.Bar(
1290
  name=model_name or f"Model {i+1}",
1291
  x=categories,
1292
+ y=[costs.get(k, 0) for k in cost_keys],
1293
+ marker_color=model_colors[i % len(model_colors)],
1294
+ offsetgroup="routed",
1295
  hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": $%{y:.2f}<extra></extra>",
1296
  ))
1297
 
1298
+ original_total = sum(original_costs.get(k, 0) for k in cost_keys)
1299
+ routed_total = sum(base_costs.get(k, 0) for k in cost_keys) + sum(
1300
+ sum(m[1].get(k, 0) for k in cost_keys) for m in additional_models
1301
+ )
1302
+
1303
+ annotation_lines = [
1304
+ f"<b>No routing: ${original_total:.2f}</b>",
1305
+ f"<b>With routing: ${routed_total:.2f}</b>",
1306
+ ]
1307
 
1308
  fig.update_layout(
1309
  yaxis_title="Cost ($)",
1310
  barmode="stack",
1311
+ bargroupgap=0.1,
1312
  margin=dict(l=40, r=40, t=40, b=40),
1313
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, traceorder="normal"),
1314
  )
1315
  fig.add_annotation(
1316
  text="<br>".join(annotation_lines),
 
1353
  """)
1354
  trajectories_state = gr.State(None)
1355
 
1356
+ gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.30`")
1357
  gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
1358
 
1359
  with gr.Row():
 
1837
  grep_1_val, grep_2_val, grep_3_val,
1838
  resolved_model_val, unresolved_model_val,
1839
  part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
1840
+ overhead, with_cache,
1841
+ detected_model_val
1842
  ):
1843
  if state_data is None:
1844
  yield (
 
2135
  additional_token_models = [(rc["name"], rc["tokens"]) for rc in routing_costs_list]
2136
  additional_cost_models = [(rc["name"], rc["costs"]) for rc in routing_costs_list]
2137
 
2138
+ if df_calc is not None and not df_calc.empty:
2139
+ df_temp = df_for_cost.copy()
2140
+ df_temp["uncached_input"] = (df_temp["prompt_tokens"] - df_temp["cache_read_tokens"] - df_temp["cache_creation_tokens"]).clip(lower=0)
2141
+ original_tokens_from_df = {
2142
+ "uncached_input": df_temp["uncached_input"].sum(),
2143
+ "cache_read": df_for_cost["cache_read_tokens"].sum(),
2144
+ "cache_creation": df_for_cost["cache_creation_tokens"].sum(),
2145
+ "completion": df_for_cost["completion_tokens"].sum(),
2146
+ }
2147
+ else:
2148
+ original_tokens_from_df = total_original_tokens
2149
+
2150
+ original_costs = tokens_to_costs(original_tokens_from_df, base_prices)
2151
+
2152
  yield (
2153
  gr.update(visible=True, value="⏳ Creating charts..."),
2154
  gr.update(visible=True),
 
2156
  None,
2157
  )
2158
 
2159
+ base_model_name = detected_model_val or "Base"
2160
+ tokens_chart = create_routed_token_chart(original_tokens_from_df, total_base_tokens, additional_token_models, base_model_name)
2161
+ cost_chart = create_routed_cost_chart(original_costs, base_costs, additional_cost_models, base_model_name)
2162
 
2163
  yield (
2164
  gr.update(visible=True, value=result_text),
 
2183
  resolved_model, unresolved_model,
2184
  part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
2185
  thinking_overhead, use_cache,
2186
+ detected_model,
2187
  ],
2188
  outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
2189
  )