Commit
·
c89587b
1
Parent(s):
33c5576
Improve routing charts with comparison view (v0.3.30)
Browse files- Add side-by-side comparison: [no routing] vs [with routing] bars
- Use hatched pattern for [no routing] bars to distinguish from routed
- Fix data source for [no routing] to match "Calculated from .traj" values
- Add base model name to legend items
- Improve legend order and styling
- Add gap between bar groups for better readability
- Keep original color palette for Metadata/Calculated charts
app.py
CHANGED
|
@@ -742,7 +742,6 @@ def create_cost_by_type_chart(df: pd.DataFrame, input_price: float, cache_read_p
|
|
| 742 |
cost_data,
|
| 743 |
x="Token Type",
|
| 744 |
y="Cost ($)",
|
| 745 |
-
title="",
|
| 746 |
color="Token Type",
|
| 747 |
color_discrete_sequence=["#EF553B", "#19D3F3", "#FFA15A", "#AB63FA"],
|
| 748 |
)
|
|
@@ -1162,58 +1161,74 @@ def select_first_row(df: pd.DataFrame):
|
|
| 1162 |
return _build_selection_payload(default_idx, df)
|
| 1163 |
|
| 1164 |
|
| 1165 |
-
def create_routed_token_chart(base_tokens: dict, additional_models: list):
|
| 1166 |
"""
|
| 1167 |
-
Create stacked bar chart
|
| 1168 |
-
X-axis: token types, bars stacked by model.
|
| 1169 |
|
| 1170 |
Args:
|
| 1171 |
-
|
|
|
|
| 1172 |
additional_models: list of (model_name, tokens_dict) tuples
|
|
|
|
| 1173 |
"""
|
| 1174 |
import plotly.graph_objects as go
|
| 1175 |
|
| 1176 |
categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
|
| 1177 |
token_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
|
| 1178 |
-
|
|
|
|
|
|
|
| 1179 |
|
| 1180 |
fig = go.Figure()
|
| 1181 |
|
| 1182 |
-
base_total = sum(base_tokens.get(k, 0) for k in token_keys)
|
| 1183 |
-
base_values = [base_tokens.get(k, 0) / 1e6 for k in token_keys]
|
| 1184 |
fig.add_trace(go.Bar(
|
| 1185 |
-
name="
|
| 1186 |
x=categories,
|
| 1187 |
-
y=
|
| 1188 |
-
marker_color=
|
| 1189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1190 |
))
|
| 1191 |
|
| 1192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1193 |
|
| 1194 |
for i, (model_name, tokens) in enumerate(additional_models):
|
| 1195 |
-
model_total = sum(tokens.get(k, 0) for k in token_keys)
|
| 1196 |
-
model_totals.append((model_name or f"Model {i+1}", model_total))
|
| 1197 |
-
values = [tokens.get(k, 0) / 1e6 for k in token_keys]
|
| 1198 |
-
color = colors[(i + 1) % len(colors)]
|
| 1199 |
fig.add_trace(go.Bar(
|
| 1200 |
name=model_name or f"Model {i+1}",
|
| 1201 |
x=categories,
|
| 1202 |
-
y=
|
| 1203 |
-
marker_color=
|
|
|
|
| 1204 |
hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": %{y:.3f}M<extra></extra>",
|
| 1205 |
))
|
| 1206 |
|
| 1207 |
-
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
|
| 1212 |
fig.update_layout(
|
| 1213 |
yaxis_title="Tokens (M)",
|
| 1214 |
barmode="stack",
|
|
|
|
| 1215 |
margin=dict(l=40, r=40, t=40, b=40),
|
| 1216 |
-
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
| 1217 |
)
|
| 1218 |
fig.add_annotation(
|
| 1219 |
text="<br>".join(annotation_lines),
|
|
@@ -1228,58 +1243,74 @@ def create_routed_token_chart(base_tokens: dict, additional_models: list):
|
|
| 1228 |
return fig
|
| 1229 |
|
| 1230 |
|
| 1231 |
-
def create_routed_cost_chart(base_costs: dict, additional_models: list):
|
| 1232 |
"""
|
| 1233 |
-
Create stacked bar chart
|
| 1234 |
-
X-axis: cost types, bars stacked by model.
|
| 1235 |
|
| 1236 |
Args:
|
| 1237 |
-
|
|
|
|
| 1238 |
additional_models: list of (model_name, costs_dict) tuples
|
|
|
|
| 1239 |
"""
|
| 1240 |
import plotly.graph_objects as go
|
| 1241 |
|
| 1242 |
categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
|
| 1243 |
cost_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
|
| 1244 |
-
|
|
|
|
|
|
|
| 1245 |
|
| 1246 |
fig = go.Figure()
|
| 1247 |
|
| 1248 |
-
base_total = sum(base_costs.get(k, 0) for k in cost_keys)
|
| 1249 |
-
base_values = [base_costs.get(k, 0) for k in cost_keys]
|
| 1250 |
fig.add_trace(go.Bar(
|
| 1251 |
-
name="
|
| 1252 |
x=categories,
|
| 1253 |
-
y=
|
| 1254 |
-
marker_color=
|
| 1255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1256 |
))
|
| 1257 |
|
| 1258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1259 |
|
| 1260 |
for i, (model_name, costs) in enumerate(additional_models):
|
| 1261 |
-
model_total = sum(costs.get(k, 0) for k in cost_keys)
|
| 1262 |
-
model_totals.append((model_name or f"Model {i+1}", model_total))
|
| 1263 |
-
values = [costs.get(k, 0) for k in cost_keys]
|
| 1264 |
-
color = colors[(i + 1) % len(colors)]
|
| 1265 |
fig.add_trace(go.Bar(
|
| 1266 |
name=model_name or f"Model {i+1}",
|
| 1267 |
x=categories,
|
| 1268 |
-
y=
|
| 1269 |
-
marker_color=
|
|
|
|
| 1270 |
hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": $%{y:.2f}<extra></extra>",
|
| 1271 |
))
|
| 1272 |
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1277 |
|
| 1278 |
fig.update_layout(
|
| 1279 |
yaxis_title="Cost ($)",
|
| 1280 |
barmode="stack",
|
|
|
|
| 1281 |
margin=dict(l=40, r=40, t=40, b=40),
|
| 1282 |
-
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
| 1283 |
)
|
| 1284 |
fig.add_annotation(
|
| 1285 |
text="<br>".join(annotation_lines),
|
|
@@ -1322,7 +1353,7 @@ def build_app():
|
|
| 1322 |
""")
|
| 1323 |
trajectories_state = gr.State(None)
|
| 1324 |
|
| 1325 |
-
gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.
|
| 1326 |
gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
|
| 1327 |
|
| 1328 |
with gr.Row():
|
|
@@ -1806,7 +1837,8 @@ def build_app():
|
|
| 1806 |
grep_1_val, grep_2_val, grep_3_val,
|
| 1807 |
resolved_model_val, unresolved_model_val,
|
| 1808 |
part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
|
| 1809 |
-
overhead, with_cache
|
|
|
|
| 1810 |
):
|
| 1811 |
if state_data is None:
|
| 1812 |
yield (
|
|
@@ -2103,6 +2135,20 @@ def build_app():
|
|
| 2103 |
additional_token_models = [(rc["name"], rc["tokens"]) for rc in routing_costs_list]
|
| 2104 |
additional_cost_models = [(rc["name"], rc["costs"]) for rc in routing_costs_list]
|
| 2105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2106 |
yield (
|
| 2107 |
gr.update(visible=True, value="⏳ Creating charts..."),
|
| 2108 |
gr.update(visible=True),
|
|
@@ -2110,8 +2156,9 @@ def build_app():
|
|
| 2110 |
None,
|
| 2111 |
)
|
| 2112 |
|
| 2113 |
-
|
| 2114 |
-
|
|
|
|
| 2115 |
|
| 2116 |
yield (
|
| 2117 |
gr.update(visible=True, value=result_text),
|
|
@@ -2136,6 +2183,7 @@ def build_app():
|
|
| 2136 |
resolved_model, unresolved_model,
|
| 2137 |
part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
|
| 2138 |
thinking_overhead, use_cache,
|
|
|
|
| 2139 |
],
|
| 2140 |
outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
|
| 2141 |
)
|
|
|
|
| 742 |
cost_data,
|
| 743 |
x="Token Type",
|
| 744 |
y="Cost ($)",
|
|
|
|
| 745 |
color="Token Type",
|
| 746 |
color_discrete_sequence=["#EF553B", "#19D3F3", "#FFA15A", "#AB63FA"],
|
| 747 |
)
|
|
|
|
| 1161 |
return _build_selection_payload(default_idx, df)
|
| 1162 |
|
| 1163 |
|
| 1164 |
+
def create_routed_token_chart(original_tokens: dict, base_tokens: dict, additional_models: list, base_model_name: str = "Base"):
|
| 1165 |
"""
|
| 1166 |
+
Create grouped+stacked bar chart comparing Calculated vs Routed tokens.
|
|
|
|
| 1167 |
|
| 1168 |
Args:
|
| 1169 |
+
original_tokens: dict with uncached_input, cache_read, cache_creation, completion (from Calculated)
|
| 1170 |
+
base_tokens: dict with uncached_input, cache_read, cache_creation, completion (base portion in routing)
|
| 1171 |
additional_models: list of (model_name, tokens_dict) tuples
|
| 1172 |
+
base_model_name: name of the base model
|
| 1173 |
"""
|
| 1174 |
import plotly.graph_objects as go
|
| 1175 |
|
| 1176 |
categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
|
| 1177 |
token_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
|
| 1178 |
+
base_color_dark = "#636EFA"
|
| 1179 |
+
base_color_light = "#A0C4FF"
|
| 1180 |
+
model_colors = ["#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
|
| 1181 |
|
| 1182 |
fig = go.Figure()
|
| 1183 |
|
|
|
|
|
|
|
| 1184 |
fig.add_trace(go.Bar(
|
| 1185 |
+
name=f"{base_model_name} [no routing]",
|
| 1186 |
x=categories,
|
| 1187 |
+
y=[original_tokens.get(k, 0) / 1e6 for k in token_keys],
|
| 1188 |
+
marker_color="rgba(99, 110, 250, 0.3)",
|
| 1189 |
+
marker_line_color=base_color_dark,
|
| 1190 |
+
marker_line_width=1,
|
| 1191 |
+
marker_pattern_shape="/",
|
| 1192 |
+
marker_pattern_fgcolor=base_color_dark,
|
| 1193 |
+
offsetgroup="calculated",
|
| 1194 |
+
hovertemplate="%{x}<br>" + base_model_name + " [no routing]: %{y:.3f}M<extra></extra>",
|
| 1195 |
))
|
| 1196 |
|
| 1197 |
+
fig.add_trace(go.Bar(
|
| 1198 |
+
name=f"{base_model_name} [with routing]",
|
| 1199 |
+
x=categories,
|
| 1200 |
+
y=[base_tokens.get(k, 0) / 1e6 for k in token_keys],
|
| 1201 |
+
marker_color=base_color_dark,
|
| 1202 |
+
offsetgroup="routed",
|
| 1203 |
+
hovertemplate="%{x}<br>" + base_model_name + " [with routing]: %{y:.3f}M<extra></extra>",
|
| 1204 |
+
))
|
| 1205 |
|
| 1206 |
for i, (model_name, tokens) in enumerate(additional_models):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1207 |
fig.add_trace(go.Bar(
|
| 1208 |
name=model_name or f"Model {i+1}",
|
| 1209 |
x=categories,
|
| 1210 |
+
y=[tokens.get(k, 0) / 1e6 for k in token_keys],
|
| 1211 |
+
marker_color=model_colors[i % len(model_colors)],
|
| 1212 |
+
offsetgroup="routed",
|
| 1213 |
hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": %{y:.3f}M<extra></extra>",
|
| 1214 |
))
|
| 1215 |
|
| 1216 |
+
original_total = sum(original_tokens.get(k, 0) for k in token_keys)
|
| 1217 |
+
routed_total = sum(base_tokens.get(k, 0) for k in token_keys) + sum(
|
| 1218 |
+
sum(m[1].get(k, 0) for k in token_keys) for m in additional_models
|
| 1219 |
+
)
|
| 1220 |
+
|
| 1221 |
+
annotation_lines = [
|
| 1222 |
+
f"<b>No routing: {original_total/1e6:.2f}M</b>",
|
| 1223 |
+
f"<b>With routing: {routed_total/1e6:.2f}M</b>",
|
| 1224 |
+
]
|
| 1225 |
|
| 1226 |
fig.update_layout(
|
| 1227 |
yaxis_title="Tokens (M)",
|
| 1228 |
barmode="stack",
|
| 1229 |
+
bargroupgap=0.1,
|
| 1230 |
margin=dict(l=40, r=40, t=40, b=40),
|
| 1231 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, traceorder="normal"),
|
| 1232 |
)
|
| 1233 |
fig.add_annotation(
|
| 1234 |
text="<br>".join(annotation_lines),
|
|
|
|
| 1243 |
return fig
|
| 1244 |
|
| 1245 |
|
| 1246 |
+
def create_routed_cost_chart(original_costs: dict, base_costs: dict, additional_models: list, base_model_name: str = "Base"):
|
| 1247 |
"""
|
| 1248 |
+
Create grouped+stacked bar chart comparing Calculated vs Routed costs.
|
|
|
|
| 1249 |
|
| 1250 |
Args:
|
| 1251 |
+
original_costs: dict with uncached_input, cache_read, cache_creation, completion (from Calculated)
|
| 1252 |
+
base_costs: dict with uncached_input, cache_read, cache_creation, completion (base portion in routing)
|
| 1253 |
additional_models: list of (model_name, costs_dict) tuples
|
| 1254 |
+
base_model_name: name of the base model
|
| 1255 |
"""
|
| 1256 |
import plotly.graph_objects as go
|
| 1257 |
|
| 1258 |
categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
|
| 1259 |
cost_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
|
| 1260 |
+
base_color_dark = "#636EFA"
|
| 1261 |
+
base_color_light = "#A0C4FF"
|
| 1262 |
+
model_colors = ["#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
|
| 1263 |
|
| 1264 |
fig = go.Figure()
|
| 1265 |
|
|
|
|
|
|
|
| 1266 |
fig.add_trace(go.Bar(
|
| 1267 |
+
name=f"{base_model_name} [no routing]",
|
| 1268 |
x=categories,
|
| 1269 |
+
y=[original_costs.get(k, 0) for k in cost_keys],
|
| 1270 |
+
marker_color="rgba(99, 110, 250, 0.3)",
|
| 1271 |
+
marker_line_color=base_color_dark,
|
| 1272 |
+
marker_line_width=1,
|
| 1273 |
+
marker_pattern_shape="/",
|
| 1274 |
+
marker_pattern_fgcolor=base_color_dark,
|
| 1275 |
+
offsetgroup="calculated",
|
| 1276 |
+
hovertemplate="%{x}<br>" + base_model_name + " [no routing]: $%{y:.2f}<extra></extra>",
|
| 1277 |
))
|
| 1278 |
|
| 1279 |
+
fig.add_trace(go.Bar(
|
| 1280 |
+
name=f"{base_model_name} [with routing]",
|
| 1281 |
+
x=categories,
|
| 1282 |
+
y=[base_costs.get(k, 0) for k in cost_keys],
|
| 1283 |
+
marker_color=base_color_dark,
|
| 1284 |
+
offsetgroup="routed",
|
| 1285 |
+
hovertemplate="%{x}<br>" + base_model_name + " [with routing]: $%{y:.2f}<extra></extra>",
|
| 1286 |
+
))
|
| 1287 |
|
| 1288 |
for i, (model_name, costs) in enumerate(additional_models):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1289 |
fig.add_trace(go.Bar(
|
| 1290 |
name=model_name or f"Model {i+1}",
|
| 1291 |
x=categories,
|
| 1292 |
+
y=[costs.get(k, 0) for k in cost_keys],
|
| 1293 |
+
marker_color=model_colors[i % len(model_colors)],
|
| 1294 |
+
offsetgroup="routed",
|
| 1295 |
hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": $%{y:.2f}<extra></extra>",
|
| 1296 |
))
|
| 1297 |
|
| 1298 |
+
original_total = sum(original_costs.get(k, 0) for k in cost_keys)
|
| 1299 |
+
routed_total = sum(base_costs.get(k, 0) for k in cost_keys) + sum(
|
| 1300 |
+
sum(m[1].get(k, 0) for k in cost_keys) for m in additional_models
|
| 1301 |
+
)
|
| 1302 |
+
|
| 1303 |
+
annotation_lines = [
|
| 1304 |
+
f"<b>No routing: ${original_total:.2f}</b>",
|
| 1305 |
+
f"<b>With routing: ${routed_total:.2f}</b>",
|
| 1306 |
+
]
|
| 1307 |
|
| 1308 |
fig.update_layout(
|
| 1309 |
yaxis_title="Cost ($)",
|
| 1310 |
barmode="stack",
|
| 1311 |
+
bargroupgap=0.1,
|
| 1312 |
margin=dict(l=40, r=40, t=40, b=40),
|
| 1313 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, traceorder="normal"),
|
| 1314 |
)
|
| 1315 |
fig.add_annotation(
|
| 1316 |
text="<br>".join(annotation_lines),
|
|
|
|
| 1353 |
""")
|
| 1354 |
trajectories_state = gr.State(None)
|
| 1355 |
|
| 1356 |
+
gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.30`")
|
| 1357 |
gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
|
| 1358 |
|
| 1359 |
with gr.Row():
|
|
|
|
| 1837 |
grep_1_val, grep_2_val, grep_3_val,
|
| 1838 |
resolved_model_val, unresolved_model_val,
|
| 1839 |
part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
|
| 1840 |
+
overhead, with_cache,
|
| 1841 |
+
detected_model_val
|
| 1842 |
):
|
| 1843 |
if state_data is None:
|
| 1844 |
yield (
|
|
|
|
| 2135 |
additional_token_models = [(rc["name"], rc["tokens"]) for rc in routing_costs_list]
|
| 2136 |
additional_cost_models = [(rc["name"], rc["costs"]) for rc in routing_costs_list]
|
| 2137 |
|
| 2138 |
+
if df_calc is not None and not df_calc.empty:
|
| 2139 |
+
df_temp = df_for_cost.copy()
|
| 2140 |
+
df_temp["uncached_input"] = (df_temp["prompt_tokens"] - df_temp["cache_read_tokens"] - df_temp["cache_creation_tokens"]).clip(lower=0)
|
| 2141 |
+
original_tokens_from_df = {
|
| 2142 |
+
"uncached_input": df_temp["uncached_input"].sum(),
|
| 2143 |
+
"cache_read": df_for_cost["cache_read_tokens"].sum(),
|
| 2144 |
+
"cache_creation": df_for_cost["cache_creation_tokens"].sum(),
|
| 2145 |
+
"completion": df_for_cost["completion_tokens"].sum(),
|
| 2146 |
+
}
|
| 2147 |
+
else:
|
| 2148 |
+
original_tokens_from_df = total_original_tokens
|
| 2149 |
+
|
| 2150 |
+
original_costs = tokens_to_costs(original_tokens_from_df, base_prices)
|
| 2151 |
+
|
| 2152 |
yield (
|
| 2153 |
gr.update(visible=True, value="⏳ Creating charts..."),
|
| 2154 |
gr.update(visible=True),
|
|
|
|
| 2156 |
None,
|
| 2157 |
)
|
| 2158 |
|
| 2159 |
+
base_model_name = detected_model_val or "Base"
|
| 2160 |
+
tokens_chart = create_routed_token_chart(original_tokens_from_df, total_base_tokens, additional_token_models, base_model_name)
|
| 2161 |
+
cost_chart = create_routed_cost_chart(original_costs, base_costs, additional_cost_models, base_model_name)
|
| 2162 |
|
| 2163 |
yield (
|
| 2164 |
gr.update(visible=True, value=result_text),
|
|
|
|
| 2183 |
resolved_model, unresolved_model,
|
| 2184 |
part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
|
| 2185 |
thinking_overhead, use_cache,
|
| 2186 |
+
detected_model,
|
| 2187 |
],
|
| 2188 |
outputs=[routing_result, routing_plots_row, routing_tokens_plot, routing_cost_plot],
|
| 2189 |
)
|