Spaces:
Running on CPU Spr
Running on CPU Spr
Fix Fibble Arena (and similar) suite link routing
Browse filesSuite hrefs in the eval browser had a fallback chain of
rollup → matrix-preview → undefined. Suites without either (e.g.
fibble_arena: 5 leaf sub-evals, no parent rollup, not in matrix) ended
up navigating to whichever leaf eval's href leaked up to the suite
node. Add `/evals/aggregate__<suite_key>` as the third fallback — it's
in `aggregate_eval_summaries.parquet` for any suite with ≥2 sub-evals,
which is exactly the population that needs the fallback.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- app/evals/page.tsx +21 -14
app/evals/page.tsx
CHANGED
|
@@ -1159,13 +1159,20 @@ export default function EvalsPage() {
|
|
| 1159 |
summaries: suiteSummaries,
|
| 1160 |
card: suiteCard,
|
| 1161 |
sourceLabel: suiteLabel,
|
| 1162 |
-
href:
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1169 |
scopeKeys: suiteScopeKeys,
|
| 1170 |
matrixPreview: suiteMatrixPreview,
|
| 1171 |
descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
|
|
@@ -1288,13 +1295,13 @@ export default function EvalsPage() {
|
|
| 1288 |
family.key
|
| 1289 |
),
|
| 1290 |
sourceLabel: suiteLabel,
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
scopeKeys: suiteScopeKeys,
|
| 1299 |
matrixPreview: suiteMatrixPreview,
|
| 1300 |
descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
|
|
|
|
| 1159 |
summaries: suiteSummaries,
|
| 1160 |
card: suiteCard,
|
| 1161 |
sourceLabel: suiteLabel,
|
| 1162 |
+
// Suite href priority: real rollup > synthetic matrix >
|
| 1163 |
+
// synthetic aggregate (`aggregate__<suite_key>`). The
|
| 1164 |
+
// aggregate fallback was missing so suites with neither a
|
| 1165 |
+
// parent rollup nor a single-metric matrix (e.g.
|
| 1166 |
+
// `fibble_arena` — 5 leaves but no rollup) used to land on
|
| 1167 |
+
// a leaf-eval href that leaked from a child node, navigating
|
| 1168 |
+
// users to the alphabetically-first sub-eval. The aggregate
|
| 1169 |
+
// exists in `aggregate_eval_summaries.parquet` whenever the
|
| 1170 |
+
// suite has ≥2 distinct sub-evals.
|
| 1171 |
+
href: hasSuiteRollup
|
| 1172 |
+
? `/evals/${rollupSummary.evaluation_id}`
|
| 1173 |
+
: syntheticMatrixEvalId
|
| 1174 |
+
? `/evals/${syntheticMatrixEvalId}`
|
| 1175 |
+
: `/evals/aggregate__${composite.key}`,
|
| 1176 |
scopeKeys: suiteScopeKeys,
|
| 1177 |
matrixPreview: suiteMatrixPreview,
|
| 1178 |
descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
|
|
|
|
| 1295 |
family.key
|
| 1296 |
),
|
| 1297 |
sourceLabel: suiteLabel,
|
| 1298 |
+
// Same fallback chain as the nested-suite branch above:
|
| 1299 |
+
// rollup > matrix > aggregate.
|
| 1300 |
+
href: hasSuiteRollup
|
| 1301 |
+
? `/evals/${rollupSummary.evaluation_id}`
|
| 1302 |
+
: syntheticMatrixEvalId
|
| 1303 |
+
? `/evals/${syntheticMatrixEvalId}`
|
| 1304 |
+
: `/evals/aggregate__${suiteKey}`,
|
| 1305 |
scopeKeys: suiteScopeKeys,
|
| 1306 |
matrixPreview: suiteMatrixPreview,
|
| 1307 |
descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
|