Commit
·
33c5576
1
Parent(s):
8a1286e
Fix routing charts: stack models within each category (v0.3.22)
Browse files- X-axis: token/cost types (Uncached Input, Cache Read, etc.)
- Within each category, models are stacked (Base on bottom, others on top)
app.py
CHANGED
|
@@ -1165,6 +1165,7 @@ def select_first_row(df: pd.DataFrame):
|
|
| 1165 |
def create_routed_token_chart(base_tokens: dict, additional_models: list):
|
| 1166 |
"""
|
| 1167 |
Create stacked bar chart for tokens by type, comparing base vs additional models.
|
|
|
|
| 1168 |
|
| 1169 |
Args:
|
| 1170 |
base_tokens: dict with uncached_input, cache_read, cache_creation, completion
|
|
@@ -1172,30 +1173,37 @@ def create_routed_token_chart(base_tokens: dict, additional_models: list):
|
|
| 1172 |
"""
|
| 1173 |
import plotly.graph_objects as go
|
| 1174 |
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
colors = ["#
|
| 1178 |
-
|
| 1179 |
-
model_names = ["Base Model"] + [m[0] or f"Model {i+1}" for i, m in enumerate(additional_models)]
|
| 1180 |
-
all_tokens = [base_tokens] + [m[1] for m in additional_models]
|
| 1181 |
|
| 1182 |
fig = go.Figure()
|
| 1183 |
|
| 1184 |
-
|
| 1185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
fig.add_trace(go.Bar(
|
| 1187 |
-
name=
|
| 1188 |
-
x=
|
| 1189 |
y=values,
|
| 1190 |
marker_color=color,
|
| 1191 |
-
hovertemplate="%{x}<br>" +
|
| 1192 |
))
|
| 1193 |
|
| 1194 |
-
model_totals = []
|
| 1195 |
-
for name, tokens in zip(model_names, all_tokens):
|
| 1196 |
-
total = sum(tokens.get(k, 0) for k in token_types)
|
| 1197 |
-
model_totals.append((name, total))
|
| 1198 |
-
|
| 1199 |
grand_total = sum(t for _, t in model_totals)
|
| 1200 |
annotation_lines = [f"<b>Total: {grand_total/1e6:.2f}M</b>"]
|
| 1201 |
for name, total in model_totals:
|
|
@@ -1223,6 +1231,7 @@ def create_routed_token_chart(base_tokens: dict, additional_models: list):
|
|
| 1223 |
def create_routed_cost_chart(base_costs: dict, additional_models: list):
|
| 1224 |
"""
|
| 1225 |
Create stacked bar chart for costs by type, comparing base vs additional models.
|
|
|
|
| 1226 |
|
| 1227 |
Args:
|
| 1228 |
base_costs: dict with uncached_input, cache_read, cache_creation, completion
|
|
@@ -1230,30 +1239,37 @@ def create_routed_cost_chart(base_costs: dict, additional_models: list):
|
|
| 1230 |
"""
|
| 1231 |
import plotly.graph_objects as go
|
| 1232 |
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
colors = ["#
|
| 1236 |
-
|
| 1237 |
-
model_names = ["Base Model"] + [m[0] or f"Model {i+1}" for i, m in enumerate(additional_models)]
|
| 1238 |
-
all_costs = [base_costs] + [m[1] for m in additional_models]
|
| 1239 |
|
| 1240 |
fig = go.Figure()
|
| 1241 |
|
| 1242 |
-
|
| 1243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1244 |
fig.add_trace(go.Bar(
|
| 1245 |
-
name=
|
| 1246 |
-
x=
|
| 1247 |
y=values,
|
| 1248 |
marker_color=color,
|
| 1249 |
-
hovertemplate="%{x}<br>" +
|
| 1250 |
))
|
| 1251 |
|
| 1252 |
-
model_totals = []
|
| 1253 |
-
for name, costs in zip(model_names, all_costs):
|
| 1254 |
-
total = sum(costs.get(k, 0) for k in cost_types)
|
| 1255 |
-
model_totals.append((name, total))
|
| 1256 |
-
|
| 1257 |
grand_total = sum(t for _, t in model_totals)
|
| 1258 |
annotation_lines = [f"<b>Total: ${grand_total:.2f}</b>"]
|
| 1259 |
for name, total in model_totals:
|
|
@@ -1306,7 +1322,7 @@ def build_app():
|
|
| 1306 |
""")
|
| 1307 |
trajectories_state = gr.State(None)
|
| 1308 |
|
| 1309 |
-
gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.
|
| 1310 |
gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
|
| 1311 |
|
| 1312 |
with gr.Row():
|
|
|
|
| 1165 |
def create_routed_token_chart(base_tokens: dict, additional_models: list):
|
| 1166 |
"""
|
| 1167 |
Create stacked bar chart for tokens by type, comparing base vs additional models.
|
| 1168 |
+
X-axis: token types, bars stacked by model.
|
| 1169 |
|
| 1170 |
Args:
|
| 1171 |
base_tokens: dict with uncached_input, cache_read, cache_creation, completion
|
|
|
|
| 1173 |
"""
|
| 1174 |
import plotly.graph_objects as go
|
| 1175 |
|
| 1176 |
+
categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
|
| 1177 |
+
token_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
|
| 1178 |
+
colors = ["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
|
|
|
|
|
|
|
|
|
|
| 1179 |
|
| 1180 |
fig = go.Figure()
|
| 1181 |
|
| 1182 |
+
base_total = sum(base_tokens.get(k, 0) for k in token_keys)
|
| 1183 |
+
base_values = [base_tokens.get(k, 0) / 1e6 for k in token_keys]
|
| 1184 |
+
fig.add_trace(go.Bar(
|
| 1185 |
+
name="Base Model",
|
| 1186 |
+
x=categories,
|
| 1187 |
+
y=base_values,
|
| 1188 |
+
marker_color=colors[0],
|
| 1189 |
+
hovertemplate="%{x}<br>Base Model: %{y:.3f}M<extra></extra>",
|
| 1190 |
+
))
|
| 1191 |
+
|
| 1192 |
+
model_totals = [("Base Model", base_total)]
|
| 1193 |
+
|
| 1194 |
+
for i, (model_name, tokens) in enumerate(additional_models):
|
| 1195 |
+
model_total = sum(tokens.get(k, 0) for k in token_keys)
|
| 1196 |
+
model_totals.append((model_name or f"Model {i+1}", model_total))
|
| 1197 |
+
values = [tokens.get(k, 0) / 1e6 for k in token_keys]
|
| 1198 |
+
color = colors[(i + 1) % len(colors)]
|
| 1199 |
fig.add_trace(go.Bar(
|
| 1200 |
+
name=model_name or f"Model {i+1}",
|
| 1201 |
+
x=categories,
|
| 1202 |
y=values,
|
| 1203 |
marker_color=color,
|
| 1204 |
+
hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": %{y:.3f}M<extra></extra>",
|
| 1205 |
))
|
| 1206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1207 |
grand_total = sum(t for _, t in model_totals)
|
| 1208 |
annotation_lines = [f"<b>Total: {grand_total/1e6:.2f}M</b>"]
|
| 1209 |
for name, total in model_totals:
|
|
|
|
| 1231 |
def create_routed_cost_chart(base_costs: dict, additional_models: list):
|
| 1232 |
"""
|
| 1233 |
Create stacked bar chart for costs by type, comparing base vs additional models.
|
| 1234 |
+
X-axis: cost types, bars stacked by model.
|
| 1235 |
|
| 1236 |
Args:
|
| 1237 |
base_costs: dict with uncached_input, cache_read, cache_creation, completion
|
|
|
|
| 1239 |
"""
|
| 1240 |
import plotly.graph_objects as go
|
| 1241 |
|
| 1242 |
+
categories = ["Uncached Input", "Cache Read", "Cache Creation", "Completion"]
|
| 1243 |
+
cost_keys = ["uncached_input", "cache_read", "cache_creation", "completion"]
|
| 1244 |
+
colors = ["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "#FFA15A"]
|
|
|
|
|
|
|
|
|
|
| 1245 |
|
| 1246 |
fig = go.Figure()
|
| 1247 |
|
| 1248 |
+
base_total = sum(base_costs.get(k, 0) for k in cost_keys)
|
| 1249 |
+
base_values = [base_costs.get(k, 0) for k in cost_keys]
|
| 1250 |
+
fig.add_trace(go.Bar(
|
| 1251 |
+
name="Base Model",
|
| 1252 |
+
x=categories,
|
| 1253 |
+
y=base_values,
|
| 1254 |
+
marker_color=colors[0],
|
| 1255 |
+
hovertemplate="%{x}<br>Base Model: $%{y:.2f}<extra></extra>",
|
| 1256 |
+
))
|
| 1257 |
+
|
| 1258 |
+
model_totals = [("Base Model", base_total)]
|
| 1259 |
+
|
| 1260 |
+
for i, (model_name, costs) in enumerate(additional_models):
|
| 1261 |
+
model_total = sum(costs.get(k, 0) for k in cost_keys)
|
| 1262 |
+
model_totals.append((model_name or f"Model {i+1}", model_total))
|
| 1263 |
+
values = [costs.get(k, 0) for k in cost_keys]
|
| 1264 |
+
color = colors[(i + 1) % len(colors)]
|
| 1265 |
fig.add_trace(go.Bar(
|
| 1266 |
+
name=model_name or f"Model {i+1}",
|
| 1267 |
+
x=categories,
|
| 1268 |
y=values,
|
| 1269 |
marker_color=color,
|
| 1270 |
+
hovertemplate="%{x}<br>" + (model_name or f"Model {i+1}") + ": $%{y:.2f}<extra></extra>",
|
| 1271 |
))
|
| 1272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1273 |
grand_total = sum(t for _, t in model_totals)
|
| 1274 |
annotation_lines = [f"<b>Total: ${grand_total:.2f}</b>"]
|
| 1275 |
for name, total in model_totals:
|
|
|
|
| 1322 |
""")
|
| 1323 |
trajectories_state = gr.State(None)
|
| 1324 |
|
| 1325 |
+
gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.22`")
|
| 1326 |
gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
|
| 1327 |
|
| 1328 |
with gr.Row():
|