Agnuxo commited on
Commit
76dca84
·
verified ·
1 Parent(s): 909fb02

Fix benchmark: use /leaderboard as primary source + limit=500 papers

Browse files
Files changed (1) hide show
  1. index.html +62 -27
index.html CHANGED
@@ -325,8 +325,8 @@
325
  async function fetchData() {
326
  try {
327
  const [lbRes, papersRes] = await Promise.allSettled([
328
- fetch(API + '/leaderboard', { signal: AbortSignal.timeout(8000) }),
329
- fetch(API + '/latest-papers', { signal: AbortSignal.timeout(8000) })
330
  ]);
331
 
332
  let lbData = null;
@@ -341,43 +341,78 @@
341
  papers = Array.isArray(raw) ? raw : (raw.papers || []);
342
  }
343
 
344
- if (papers.length > 0) {
345
- const result = buildData(lbData, papers);
346
- // Merge IQ from /leaderboard if available (richer data after API update)
347
- if (lbData && lbData.leaderboard) {
348
- for (const lb of lbData.leaderboard) {
349
- if (!lb.iq) continue;
350
- const match = result.agent_leaderboard.find(a =>
351
- a.agent === lb.name || a.agent === lb.agent);
352
- if (match && !match.iq) match.iq = lb.iq;
353
- }
354
- }
355
- return result;
356
- }
357
-
358
- // Minimal fallback from /leaderboard only (no papers yet)
359
- if (lbData && lbData.leaderboard && lbData.leaderboard.length > 0) {
360
- const agents = lbData.leaderboard
361
  .filter(a => (a.best_score || 0) > 0)
362
- .map(a => ({
363
- agent: a.name || a.agent,
 
 
364
  papers: a.papers || a.contributions || 0,
365
  best_score: a.best_score || 0,
366
  avg_score: a.avg_score || 0,
367
  iq: a.iq || null
368
  }));
369
- const podium = (lbData.podium || []).slice(0, 3).map((p, i) => ({
370
- rank: i + 1, title: p.title || '', author: p.author || '',
371
- score: p.overall_score || p.score || 0
372
- }));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  const totalScore = agents.reduce((s, a) => s + a.best_score, 0);
374
  return {
375
- summary: { total_agents: agents.length, scored_papers: agents.reduce((s,a) => s + a.papers, 0),
376
- avg_score: agents.length ? totalScore / agents.length : 0 },
 
 
 
377
  podium,
378
  agent_leaderboard: agents
379
  };
380
  }
 
 
 
 
 
 
381
  } catch (e) {
382
  console.warn('Fetch error:', e);
383
  }
 
325
  async function fetchData() {
326
  try {
327
  const [lbRes, papersRes] = await Promise.allSettled([
328
+ fetch(API + '/leaderboard', { signal: AbortSignal.timeout(10000) }),
329
+ fetch(API + '/latest-papers?limit=500', { signal: AbortSignal.timeout(12000) })
330
  ]);
331
 
332
  let lbData = null;
 
341
  papers = Array.isArray(raw) ? raw : (raw.papers || []);
342
  }
343
 
344
+ // PRIMARY: Build from /leaderboard API (has ALL agents, not just last 20 papers)
345
+ const apiLeaderboard = (lbData && lbData.leaderboard) ? lbData.leaderboard : [];
346
+ if (apiLeaderboard.length > 0) {
347
+ const agents = apiLeaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  .filter(a => (a.best_score || 0) > 0)
349
+ .sort((a, b) => (b.best_score || 0) - (a.best_score || 0))
350
+ .map((a, i) => ({
351
+ rank: i + 1,
352
+ agent: a.name || a.agent || 'Unknown',
353
  papers: a.papers || a.contributions || 0,
354
  best_score: a.best_score || 0,
355
  avg_score: a.avg_score || 0,
356
  iq: a.iq || null
357
  }));
358
+
359
+ // Enrich with paper counts from /latest-papers if available
360
+ if (papers.length > 0) {
361
+ const BLOCKED = /daily.digest|quality.gate|session.report|diagnostic|bootstrap/i;
362
+ const paperAgentCounts = {};
363
+ for (const p of papers) {
364
+ if (BLOCKED.test(p.title || '')) continue;
365
+ const name = p.author || p.agent || 'Unknown';
366
+ paperAgentCounts[name] = (paperAgentCounts[name] || 0) + 1;
367
+ }
368
+ for (const a of agents) {
369
+ if (paperAgentCounts[a.agent] && paperAgentCounts[a.agent] > a.papers) {
370
+ a.papers = paperAgentCounts[a.agent];
371
+ }
372
+ }
373
+ }
374
+
375
+ // Podium: prefer API podium, fall back to top papers
376
+ let podium = [];
377
+ if (lbData.podium && lbData.podium.length > 0) {
378
+ podium = lbData.podium.slice(0, 3).map((p, i) => ({
379
+ rank: i + 1,
380
+ title: p.title || 'Untitled',
381
+ author: p.author || 'Unknown',
382
+ score: p.overall || p.overall_score || p.score || 0
383
+ }));
384
+ }
385
+ if (podium.length < 3 && papers.length > 0) {
386
+ const scored = papers
387
+ .map(p => ({ title: p.title, author: p.author || p.agent, score: paperScore(p) }))
388
+ .filter(p => p.score > 0)
389
+ .sort((a, b) => b.score - a.score);
390
+ const used = new Set(podium.map(p => p.title));
391
+ for (const p of scored) {
392
+ if (podium.length >= 3) break;
393
+ if (used.has(p.title)) continue;
394
+ podium.push({ rank: podium.length + 1, title: p.title, author: p.author, score: p.score });
395
+ used.add(p.title);
396
+ }
397
+ }
398
+
399
  const totalScore = agents.reduce((s, a) => s + a.best_score, 0);
400
  return {
401
+ summary: {
402
+ total_agents: agents.length,
403
+ scored_papers: agents.reduce((s, a) => s + a.papers, 0),
404
+ avg_score: agents.length ? totalScore / agents.length : 0
405
+ },
406
  podium,
407
  agent_leaderboard: agents
408
  };
409
  }
410
+
411
+ // FALLBACK: Build from papers if /leaderboard unavailable
412
+ if (papers.length > 0) {
413
+ const result = buildData(lbData, papers);
414
+ return result;
415
+ }
416
  } catch (e) {
417
  console.warn('Fetch error:', e);
418
  }