j-chim Claude Opus 4.7 (1M context) commited on
Commit
c569d0f
·
1 Parent(s): 819e7c9

Fix Fibble Arena (and similar) suite link routing

Browse files

Suite hrefs in the eval browser had a fallback chain of
rollup → matrix-preview → undefined. Suites without either (e.g.
fibble_arena: 5 leaf sub-evals, no parent rollup, not in matrix) ended
up navigating to whichever leaf eval's href leaked up to the suite
node. Add `/evals/aggregate__<suite_key>` as the third fallback — it's
in `aggregate_eval_summaries.parquet` for any suite with ≥2 sub-evals,
which is exactly the population that needs the fallback.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app/evals/page.tsx +21 -14
app/evals/page.tsx CHANGED
@@ -1159,13 +1159,20 @@ export default function EvalsPage() {
1159
  summaries: suiteSummaries,
1160
  card: suiteCard,
1161
  sourceLabel: suiteLabel,
1162
- href: suiteMatrixPreview
1163
- ? hasSuiteRollup
1164
- ? `/evals/${rollupSummary.evaluation_id}`
1165
- : syntheticMatrixEvalId
1166
- ? `/evals/${syntheticMatrixEvalId}`
1167
- : undefined
1168
- : undefined,
 
 
 
 
 
 
 
1169
  scopeKeys: suiteScopeKeys,
1170
  matrixPreview: suiteMatrixPreview,
1171
  descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
@@ -1288,13 +1295,13 @@ export default function EvalsPage() {
1288
  family.key
1289
  ),
1290
  sourceLabel: suiteLabel,
1291
- href: suiteMatrixPreview
1292
- ? hasSuiteRollup
1293
- ? `/evals/${rollupSummary.evaluation_id}`
1294
- : syntheticMatrixEvalId
1295
- ? `/evals/${syntheticMatrixEvalId}`
1296
- : undefined
1297
- : undefined,
1298
  scopeKeys: suiteScopeKeys,
1299
  matrixPreview: suiteMatrixPreview,
1300
  descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
 
1159
  summaries: suiteSummaries,
1160
  card: suiteCard,
1161
  sourceLabel: suiteLabel,
1162
+ // Suite href priority: real rollup > synthetic matrix >
1163
+ // synthetic aggregate (`aggregate__<suite_key>`). The
1164
+ // aggregate fallback was missing so suites with neither a
1165
+ // parent rollup nor a single-metric matrix (e.g.
1166
+ // `fibble_arena` — 5 leaves but no rollup) used to land on
1167
+ // a leaf-eval href that leaked from a child node, navigating
1168
+ // users to the alphabetically-first sub-eval. The aggregate
1169
+ // exists in `aggregate_eval_summaries.parquet` whenever the
1170
+ // suite has ≥2 distinct sub-evals.
1171
+ href: hasSuiteRollup
1172
+ ? `/evals/${rollupSummary.evaluation_id}`
1173
+ : syntheticMatrixEvalId
1174
+ ? `/evals/${syntheticMatrixEvalId}`
1175
+ : `/evals/aggregate__${composite.key}`,
1176
  scopeKeys: suiteScopeKeys,
1177
  matrixPreview: suiteMatrixPreview,
1178
  descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,
 
1295
  family.key
1296
  ),
1297
  sourceLabel: suiteLabel,
1298
+ // Same fallback chain as the nested-suite branch above:
1299
+ // rollup > matrix > aggregate.
1300
+ href: hasSuiteRollup
1301
+ ? `/evals/${rollupSummary.evaluation_id}`
1302
+ : syntheticMatrixEvalId
1303
+ ? `/evals/${syntheticMatrixEvalId}`
1304
+ : `/evals/aggregate__${suiteKey}`,
1305
  scopeKeys: suiteScopeKeys,
1306
  matrixPreview: suiteMatrixPreview,
1307
  descriptionFallback: `Browse the {label} suite and then open its benchmark children.`,