Spaces:
Sleeping
Sleeping
Add release-date plots to HF leaderboard Space.
Browse filesReplicate the FreshStack release-date visualizations with monthly date axes, baseline reference lines, and improved plot spacing while keeping BM25/Fusion as dashed lines only.
Made-with: Cursor
- index.html +8 -0
- leaderboard_data.json +3 -3
- main.js +95 -5
index.html
CHANGED
|
@@ -93,6 +93,14 @@
|
|
| 93 |
<div id="plot-avg-r50" class="plot-box"></div>
|
| 94 |
</section>
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
<section class="citation">
|
| 97 |
<div class="citation-head">
|
| 98 |
<h3>Cite FreshStack</h3>
|
|
|
|
| 93 |
<div id="plot-avg-r50" class="plot-box"></div>
|
| 94 |
</section>
|
| 95 |
|
| 96 |
+
<section class="plots">
|
| 97 |
+
<h3>FreshStack Metrics vs. Model Release Date</h3>
|
| 98 |
+
<p class="plot-sub">Average scores across 5 domains vs model release date; points are colored by model family.</p>
|
| 99 |
+
<div id="plot-date-avg-alpha10" class="plot-box"></div>
|
| 100 |
+
<div id="plot-date-avg-c20" class="plot-box"></div>
|
| 101 |
+
<div id="plot-date-avg-r50" class="plot-box"></div>
|
| 102 |
+
</section>
|
| 103 |
+
|
| 104 |
<section class="citation">
|
| 105 |
<div class="citation-head">
|
| 106 |
<h3>Cite FreshStack</h3>
|
leaderboard_data.json
CHANGED
|
@@ -46,7 +46,7 @@
|
|
| 46 |
"name": "BGE (Gemma-2)",
|
| 47 |
"size": "9B",
|
| 48 |
"type": "open_source",
|
| 49 |
-
"date": "2024-
|
| 50 |
"link": "https://huggingface.co/BAAI/bge-multilingual-gemma2"
|
| 51 |
},
|
| 52 |
"datasets": {
|
|
@@ -87,7 +87,7 @@
|
|
| 87 |
"name": "E5 (Mistral-7B)",
|
| 88 |
"size": "7B",
|
| 89 |
"type": "open_source",
|
| 90 |
-
"date": "2024-
|
| 91 |
"link": "https://huggingface.co/intfloat/e5-mistral-7b-instruct"
|
| 92 |
},
|
| 93 |
"datasets": {
|
|
@@ -128,7 +128,7 @@
|
|
| 128 |
"name": "Voyage Large 2",
|
| 129 |
"size": "-",
|
| 130 |
"type": "proprietary",
|
| 131 |
-
"date": "2024-
|
| 132 |
"link": "https://docs.voyageai.com/docs/embeddings"
|
| 133 |
},
|
| 134 |
"datasets": {
|
|
|
|
| 46 |
"name": "BGE (Gemma-2)",
|
| 47 |
"size": "9B",
|
| 48 |
"type": "open_source",
|
| 49 |
+
"date": "2024-07-30",
|
| 50 |
"link": "https://huggingface.co/BAAI/bge-multilingual-gemma2"
|
| 51 |
},
|
| 52 |
"datasets": {
|
|
|
|
| 87 |
"name": "E5 (Mistral-7B)",
|
| 88 |
"size": "7B",
|
| 89 |
"type": "open_source",
|
| 90 |
+
"date": "2024-01-01",
|
| 91 |
"link": "https://huggingface.co/intfloat/e5-mistral-7b-instruct"
|
| 92 |
},
|
| 93 |
"datasets": {
|
|
|
|
| 128 |
"name": "Voyage Large 2",
|
| 129 |
"size": "-",
|
| 130 |
"type": "proprietary",
|
| 131 |
+
"date": "2024-05-05",
|
| 132 |
"link": "https://docs.voyageai.com/docs/embeddings"
|
| 133 |
},
|
| 134 |
"datasets": {
|
main.js
CHANGED
|
@@ -29,6 +29,11 @@ const PLOT_METRICS = [
|
|
| 29 |
{ id: 'coverage_20', key: 'avg_c20', plotId: 'plot-avg-c20', title: 'Coverage@20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
|
| 30 |
{ id: 'recall_50', key: 'avg_r50', plotId: 'plot-avg-r50', title: 'Recall@50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
|
| 31 |
];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
function num(v) {
|
| 34 |
return typeof v === 'number' ? v.toFixed(3) : '-';
|
|
@@ -84,6 +89,21 @@ function inferFamily(name) {
|
|
| 84 |
return 'Other';
|
| 85 |
}
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
const FAMILY_COLORS = {
|
| 88 |
'Stella': '#1f77b4', 'Harrier OSS': '#ff7f0e', 'Voyage': '#009688', 'Jina': '#d62728',
|
| 89 |
'Qwen3': '#9467bd', 'IBM Granite': '#8c564b', 'Arctic Embed': '#e377c2', 'Perplexity Embed': '#17becf',
|
|
@@ -193,10 +213,11 @@ function renderPlots() {
|
|
| 193 |
if (typeof Plotly === 'undefined') return;
|
| 194 |
const active = activeTypes();
|
| 195 |
const filtered = rows.filter(r => active.includes(r.type));
|
|
|
|
| 196 |
|
| 197 |
PLOT_METRICS.forEach(metric => {
|
| 198 |
const grouped = {};
|
| 199 |
-
|
| 200 |
const x = parseSizeToBillions(r.size);
|
| 201 |
const y = r[metric.key];
|
| 202 |
if (x === null || typeof y !== 'number') return;
|
|
@@ -233,10 +254,79 @@ function renderPlots() {
|
|
| 233 |
|
| 234 |
Plotly.newPlot(metric.plotId, traces, {
|
| 235 |
title: { text: metric.title, x: 0.01, xanchor: 'left', font: { size: 16 } },
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
hovermode: 'closest'
|
| 241 |
}, { responsive: true, displaylogo: false });
|
| 242 |
});
|
|
|
|
| 29 |
{ id: 'coverage_20', key: 'avg_c20', plotId: 'plot-avg-c20', title: 'Coverage@20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
|
| 30 |
{ id: 'recall_50', key: 'avg_r50', plotId: 'plot-avg-r50', title: 'Recall@50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
|
| 31 |
];
|
| 32 |
+
const DATE_PLOT_METRICS = [
|
| 33 |
+
{ id: 'alpha_ndcg_10', key: 'avg_a10', plotId: 'plot-date-avg-alpha10', title: 'alpha-nDCG@10', yLabel: 'α@10 (Avg. 5)', yMin: 0.1, yMax: 0.541 },
|
| 34 |
+
{ id: 'coverage_20', key: 'avg_c20', plotId: 'plot-date-avg-c20', title: 'Coverage@20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
|
| 35 |
+
{ id: 'recall_50', key: 'avg_r50', plotId: 'plot-date-avg-r50', title: 'Recall@50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
|
| 36 |
+
];
|
| 37 |
|
| 38 |
function num(v) {
|
| 39 |
return typeof v === 'number' ? v.toFixed(3) : '-';
|
|
|
|
| 89 |
return 'Other';
|
| 90 |
}
|
| 91 |
|
| 92 |
+
function normalizeModelName(rawName) {
|
| 93 |
+
return String(rawName || '').toLowerCase().replace(/^oracle:\s*/i, '').trim();
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
function isReferenceBaselineModel(rawName) {
|
| 97 |
+
const name = normalizeModelName(rawName);
|
| 98 |
+
return name === 'bm25' || name === 'fusion (bm25, bge, e5, voyage)';
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
function parseReleaseDate(dateStr) {
|
| 102 |
+
if (!dateStr) return null;
|
| 103 |
+
const d = new Date(dateStr);
|
| 104 |
+
return Number.isNaN(d.getTime()) ? null : d;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
const FAMILY_COLORS = {
|
| 108 |
'Stella': '#1f77b4', 'Harrier OSS': '#ff7f0e', 'Voyage': '#009688', 'Jina': '#d62728',
|
| 109 |
'Qwen3': '#9467bd', 'IBM Granite': '#8c564b', 'Arctic Embed': '#e377c2', 'Perplexity Embed': '#17becf',
|
|
|
|
| 213 |
if (typeof Plotly === 'undefined') return;
|
| 214 |
const active = activeTypes();
|
| 215 |
const filtered = rows.filter(r => active.includes(r.type));
|
| 216 |
+
const filteredNoBaselines = filtered.filter(r => !isReferenceBaselineModel(r.name));
|
| 217 |
|
| 218 |
PLOT_METRICS.forEach(metric => {
|
| 219 |
const grouped = {};
|
| 220 |
+
filteredNoBaselines.forEach(r => {
|
| 221 |
const x = parseSizeToBillions(r.size);
|
| 222 |
const y = r[metric.key];
|
| 223 |
if (x === null || typeof y !== 'number') return;
|
|
|
|
| 254 |
|
| 255 |
Plotly.newPlot(metric.plotId, traces, {
|
| 256 |
title: { text: metric.title, x: 0.01, xanchor: 'left', font: { size: 16 } },
|
| 257 |
+
height: 430,
|
| 258 |
+
margin: { t: 46, r: 12, b: 130, l: 56 },
|
| 259 |
+
xaxis: { title: { text: 'Model Parameters (Billions)', standoff: 26 }, type: 'log', automargin: true, showgrid: true },
|
| 260 |
+
yaxis: { title: metric.yLabel, range: [metric.yMin, metric.yMax], tickformat: '.2f', automargin: true, showgrid: true },
|
| 261 |
+
legend: { orientation: 'h', y: -0.36, x: 0.5, xanchor: 'center' },
|
| 262 |
+
hovermode: 'closest'
|
| 263 |
+
}, { responsive: true, displaylogo: false });
|
| 264 |
+
});
|
| 265 |
+
|
| 266 |
+
DATE_PLOT_METRICS.forEach(metric => {
|
| 267 |
+
const grouped = {};
|
| 268 |
+
filteredNoBaselines.forEach(r => {
|
| 269 |
+
const x = parseReleaseDate(r.date);
|
| 270 |
+
const y = r[metric.key];
|
| 271 |
+
if (x === null || typeof y !== 'number') return;
|
| 272 |
+
const fam = inferFamily(r.name);
|
| 273 |
+
if (!grouped[fam]) grouped[fam] = { x: [], y: [], text: [] };
|
| 274 |
+
grouped[fam].x.push(x);
|
| 275 |
+
grouped[fam].y.push(y);
|
| 276 |
+
grouped[fam].text.push(r.name);
|
| 277 |
+
});
|
| 278 |
+
|
| 279 |
+
const traces = Object.keys(grouped).sort().map(fam => ({
|
| 280 |
+
type: 'scatter',
|
| 281 |
+
mode: 'markers',
|
| 282 |
+
name: fam,
|
| 283 |
+
x: grouped[fam].x,
|
| 284 |
+
y: grouped[fam].y,
|
| 285 |
+
text: grouped[fam].text,
|
| 286 |
+
marker: { color: FAMILY_COLORS[fam] || '#9e9e9e', size: 11, line: { width: 1, color: '#fff' } },
|
| 287 |
+
hovertemplate: '<b>%{text}</b><br>Release date: %{x|%Y-%m-%d}<br>Score: %{y:.3f}<extra></extra>'
|
| 288 |
+
}));
|
| 289 |
+
|
| 290 |
+
const bm25 = filtered.find(r => normalizeModelName(r.name) === 'bm25');
|
| 291 |
+
const fusion = filtered.find(r => normalizeModelName(r.name) === 'fusion (bm25, bge, e5, voyage)');
|
| 292 |
+
const xs = traces.flatMap(t => t.x || []);
|
| 293 |
+
const xMin = xs.length ? new Date(Math.min(...xs.map(d => d.getTime()))) : null;
|
| 294 |
+
const xMax = xs.length ? new Date(Math.max(...xs.map(d => d.getTime()))) : null;
|
| 295 |
+
const xMinMonthStart = xMin ? new Date(xMin.getFullYear(), xMin.getMonth(), 1) : null;
|
| 296 |
+
if (xMin && xMax) {
|
| 297 |
+
if (bm25 && typeof bm25[metric.key] === 'number') {
|
| 298 |
+
traces.push({ type: 'scatter', mode: 'lines', name: 'BM25', x: [xMin, xMax], y: [bm25[metric.key], bm25[metric.key]], line: { color: 'rgba(97,97,97,0.55)', width: 1.1, dash: 'dash' } });
|
| 299 |
+
}
|
| 300 |
+
if (fusion && typeof fusion[metric.key] === 'number') {
|
| 301 |
+
traces.push({ type: 'scatter', mode: 'lines', name: 'Fusion', x: [xMin, xMax], y: [fusion[metric.key], fusion[metric.key]], line: { color: 'rgba(106,27,154,0.55)', width: 1.1, dash: 'dot' } });
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
Plotly.newPlot(metric.plotId, traces, {
|
| 306 |
+
title: { text: metric.title, x: 0.01, xanchor: 'left', font: { size: 16 } },
|
| 307 |
+
height: 430,
|
| 308 |
+
margin: { t: 46, r: 12, b: 160, l: 56 },
|
| 309 |
+
xaxis: {
|
| 310 |
+
title: { text: 'Model Release Date', standoff: 34 },
|
| 311 |
+
type: 'date',
|
| 312 |
+
tickmode: 'linear',
|
| 313 |
+
tick0: xMinMonthStart ? xMinMonthStart.toISOString().slice(0, 10) : undefined,
|
| 314 |
+
dtick: 'M1',
|
| 315 |
+
tickformat: '%b %Y',
|
| 316 |
+
tickangle: -45,
|
| 317 |
+
automargin: true,
|
| 318 |
+
showgrid: true
|
| 319 |
+
},
|
| 320 |
+
yaxis: { title: metric.yLabel, range: [metric.yMin, metric.yMax], tickformat: '.2f', automargin: true, showgrid: true },
|
| 321 |
+
legend: {
|
| 322 |
+
orientation: 'h',
|
| 323 |
+
y: -0.44,
|
| 324 |
+
x: 0.5,
|
| 325 |
+
xanchor: 'center',
|
| 326 |
+
entrywidthmode: 'pixels',
|
| 327 |
+
entrywidth: 155,
|
| 328 |
+
itemsizing: 'constant'
|
| 329 |
+
},
|
| 330 |
hovermode: 'closest'
|
| 331 |
}, { responsive: true, displaylogo: false });
|
| 332 |
});
|