rafmacalaba's picture
feat: multi-corpus support
a2c885c
import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN, getCorpora, getLinksRepoPath, getDocRepoPath } from '../../../utils/config.js';
export const dynamic = 'force-dynamic';
/**
* GET /api/leaderboard
* Scans ALL corpora and returns annotator rankings.
*/
export async function GET() {
try {
const corpora = getCorpora();
const stats = {}; // annotator -> { verified, correct, incorrect, docs, humanAdded }
for (const corpus of corpora) {
const linksPath = getLinksRepoPath(corpus);
const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/${linksPath}`;
const linksRes = await fetch(linksUrl, {
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` },
cache: 'no-store'
});
if (!linksRes.ok) continue;
const links = await linksRes.json();
const activeLinks = links
.filter(l => l.status === 'success' && l.has_revalidation === true)
.slice(0, MAX_DOCS_TO_SCAN);
await Promise.allSettled(
activeLinks.map(async (link) => {
const docRepoPath = getDocRepoPath(corpus, link.index);
const docUrl = `${HF_DATASET_BASE_URL}/raw/main/${docRepoPath}`;
const docRes = await fetch(docUrl, {
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` },
cache: 'no-store'
});
if (!docRes.ok) return;
const pagesData = await docRes.json();
for (const page of pagesData) {
for (const ds of (page.datasets || [])) {
if (ds.source === 'human' && ds.annotator) {
if (!stats[ds.annotator]) {
stats[ds.annotator] = { verified: 0, correct: 0, incorrect: 0, docs: new Set(), humanAdded: 0 };
}
stats[ds.annotator].humanAdded++;
stats[ds.annotator].docs.add(`${corpus.id}:${link.index}`);
}
for (const v of (ds.validations || [])) {
if (!v.annotator || !v.human_validated) continue;
if (!stats[v.annotator]) {
stats[v.annotator] = { verified: 0, correct: 0, incorrect: 0, docs: new Set(), humanAdded: 0 };
}
stats[v.annotator].verified++;
if (v.human_verdict === true) stats[v.annotator].correct++;
else stats[v.annotator].incorrect++;
stats[v.annotator].docs.add(`${corpus.id}:${link.index}`);
}
}
}
})
);
}
const leaderboard = Object.entries(stats)
.map(([annotator, s]) => ({
annotator,
verified: s.verified,
correct: s.correct,
incorrect: s.incorrect,
humanAdded: s.humanAdded,
docsWorked: s.docs.size,
score: s.verified + s.humanAdded,
}))
.sort((a, b) => b.score - a.score);
return new Response(JSON.stringify({ leaderboard }), {
status: 200,
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache, no-store, must-revalidate'
}
});
} catch (error) {
console.error('Leaderboard API error:', error);
return new Response(
JSON.stringify({ error: 'Failed to compute leaderboard' }),
{ status: 500, headers: { 'Content-Type': 'application/json' } }
);
}
}