bradnow commited on
Commit
b384f8e
·
1 Parent(s): 84f5427

feat: bars show absolute % of metric max (EOG/100, EVA/1.0)

Browse files
.claude/skills/sync-nowai-leaderboard/lib/transform.mjs CHANGED
@@ -52,7 +52,6 @@ export function vendorFromName(name) {
52
 
53
  export function deriveEog(eogArray) {
54
  const sorted = [...eogArray].sort((a, b) => b.avg - a.avg).slice(0, 5);
55
- const top = sorted[0]?.avg ?? 0;
56
  return {
57
  metricLabel: 'Task Success Rate · Oracle mode',
58
  rows: sorted.map((m, i) => ({
@@ -61,7 +60,8 @@ export function deriveEog(eogArray) {
61
  org: vendorFromName(m.model),
62
  type: m.type,
63
  score: round1(m.avg),
64
- bar: top ? Math.round((m.avg / top) * 100) : 0,
 
65
  })),
66
  };
67
  }
@@ -80,7 +80,6 @@ export function deriveEvaBoard(systems, metricKey) {
80
  .filter((x) => x.pooled && typeof x.pooled.point === 'number')
81
  .sort((a, b) => b.pooled.point - a.pooled.point)
82
  .slice(0, 5);
83
- const top = scored[0]?.pooled.point ?? 0;
84
  return {
85
  metricLabel: 'Pass@1',
86
  rows: scored.map(({ s, pooled }, i) => ({
@@ -93,7 +92,8 @@ export function deriveEvaBoard(systems, metricKey) {
93
  score: round2(pooled.point),
94
  ciLower: round2(pooled.ci_lower),
95
  ciUpper: round2(pooled.ci_upper),
96
- bar: top ? Math.round((pooled.point / top) * 100) : 0,
 
97
  })),
98
  };
99
  }
 
52
 
53
  export function deriveEog(eogArray) {
54
  const sorted = [...eogArray].sort((a, b) => b.avg - a.avg).slice(0, 5);
 
55
  return {
56
  metricLabel: 'Task Success Rate · Oracle mode',
57
  rows: sorted.map((m, i) => ({
 
60
  org: vendorFromName(m.model),
61
  type: m.type,
62
  score: round1(m.avg),
63
+ // bar = absolute % of max. EOG score is already a 0–100 success rate.
64
+ bar: Math.round(m.avg),
65
  })),
66
  };
67
  }
 
80
  .filter((x) => x.pooled && typeof x.pooled.point === 'number')
81
  .sort((a, b) => b.pooled.point - a.pooled.point)
82
  .slice(0, 5);
 
83
  return {
84
  metricLabel: 'Pass@1',
85
  rows: scored.map(({ s, pooled }, i) => ({
 
92
  score: round2(pooled.point),
93
  ciLower: round2(pooled.ci_lower),
94
  ciUpper: round2(pooled.ci_upper),
95
+ // bar = absolute % of max. EVA Pass@1 is a 0–1 proportion.
96
+ bar: Math.round(pooled.point * 100),
97
  })),
98
  };
99
  }
.claude/skills/sync-nowai-leaderboard/test/transform.test.mjs CHANGED
@@ -45,9 +45,9 @@ test('deriveEog sorts by avg desc, takes top 5, computes bars', () => {
45
  assert.deepEqual(out.rows.map(r => r.model), ['B', 'C', 'A']);
46
  assert.equal(out.rows[0].rank, 1);
47
  assert.equal(out.rows[0].score, 40);
48
- assert.equal(out.rows[0].bar, 100);
49
- assert.equal(out.rows[1].bar, 75); // 30/40
50
- assert.equal(out.rows[2].bar, 50); // 20/40
51
  assert.equal(out.rows[0].type, 'open');
52
  });
53
 
@@ -84,11 +84,11 @@ test('deriveEvaBoard ranks by accuracy point, builds subtitle + ci + bars', () =
84
  assert.deepEqual(out.rows.map(r => r.name), ['Nova + GPT + Sonic', 'Gemini Live']);
85
  assert.equal(out.rows[0].score, 0.41);
86
  assert.equal(out.rows[0].subtitle, 'Mixed Models · Cascade');
87
- assert.equal(out.rows[0].bar, 100);
88
  assert.equal(out.rows[0].ciLower, 0.39);
89
  assert.equal(out.rows[0].ciUpper, 0.43);
90
  assert.equal(out.rows[1].subtitle, 'Google · Speech-to-Speech');
91
- assert.equal(out.rows[1].bar, 73); // 0.30/0.41 = 0.7317 → 73
92
  });
93
 
94
  test('deriveEvaBoard ranks experience board independently', () => {
 
45
  assert.deepEqual(out.rows.map(r => r.model), ['B', 'C', 'A']);
46
  assert.equal(out.rows[0].rank, 1);
47
  assert.equal(out.rows[0].score, 40);
48
+ assert.equal(out.rows[0].bar, 40); // absolute: avg 40 of max 100
49
+ assert.equal(out.rows[1].bar, 30); // avg 30
50
+ assert.equal(out.rows[2].bar, 20); // avg 20
51
  assert.equal(out.rows[0].type, 'open');
52
  });
53
 
 
84
  assert.deepEqual(out.rows.map(r => r.name), ['Nova + GPT + Sonic', 'Gemini Live']);
85
  assert.equal(out.rows[0].score, 0.41);
86
  assert.equal(out.rows[0].subtitle, 'Mixed Models · Cascade');
87
+ assert.equal(out.rows[0].bar, 41); // absolute: 0.41 × 100
88
  assert.equal(out.rows[0].ciLower, 0.39);
89
  assert.equal(out.rows[0].ciUpper, 0.43);
90
  assert.equal(out.rows[1].subtitle, 'Google · Speech-to-Speech');
91
+ assert.equal(out.rows[1].bar, 30); // 0.30 × 100
92
  });
93
 
94
  test('deriveEvaBoard ranks experience board independently', () => {