nthakur commited on
Commit
dc4ca26
·
1 Parent(s): 9fa3c9a

Add release-date plots to HF leaderboard Space.

Browse files

Replicate the FreshStack release-date visualizations with monthly date axes, baseline reference lines, and improved plot spacing while keeping BM25/Fusion as dashed lines only.

Made-with: Cursor

Files changed (3) hide show
  1. index.html +8 -0
  2. leaderboard_data.json +3 -3
  3. main.js +95 -5
index.html CHANGED
@@ -93,6 +93,14 @@
93
  <div id="plot-avg-r50" class="plot-box"></div>
94
  </section>
95
 
 
 
 
 
 
 
 
 
96
  <section class="citation">
97
  <div class="citation-head">
98
  <h3>Cite FreshStack</h3>
 
93
  <div id="plot-avg-r50" class="plot-box"></div>
94
  </section>
95
 
96
+ <section class="plots">
97
+ <h3>FreshStack Metrics vs. Model Release Date</h3>
98
+ <p class="plot-sub">Average scores across 5 domains vs model release date; points are colored by model family.</p>
99
+ <div id="plot-date-avg-alpha10" class="plot-box"></div>
100
+ <div id="plot-date-avg-c20" class="plot-box"></div>
101
+ <div id="plot-date-avg-r50" class="plot-box"></div>
102
+ </section>
103
+
104
  <section class="citation">
105
  <div class="citation-head">
106
  <h3>Cite FreshStack</h3>
leaderboard_data.json CHANGED
@@ -46,7 +46,7 @@
46
  "name": "BGE (Gemma-2)",
47
  "size": "9B",
48
  "type": "open_source",
49
- "date": "2024-11-01",
50
  "link": "https://huggingface.co/BAAI/bge-multilingual-gemma2"
51
  },
52
  "datasets": {
@@ -87,7 +87,7 @@
87
  "name": "E5 (Mistral-7B)",
88
  "size": "7B",
89
  "type": "open_source",
90
- "date": "2024-11-01",
91
  "link": "https://huggingface.co/intfloat/e5-mistral-7b-instruct"
92
  },
93
  "datasets": {
@@ -128,7 +128,7 @@
128
  "name": "Voyage Large 2",
129
  "size": "-",
130
  "type": "proprietary",
131
- "date": "2024-11-01",
132
  "link": "https://docs.voyageai.com/docs/embeddings"
133
  },
134
  "datasets": {
 
46
  "name": "BGE (Gemma-2)",
47
  "size": "9B",
48
  "type": "open_source",
49
+ "date": "2024-07-30",
50
  "link": "https://huggingface.co/BAAI/bge-multilingual-gemma2"
51
  },
52
  "datasets": {
 
87
  "name": "E5 (Mistral-7B)",
88
  "size": "7B",
89
  "type": "open_source",
90
+ "date": "2024-01-01",
91
  "link": "https://huggingface.co/intfloat/e5-mistral-7b-instruct"
92
  },
93
  "datasets": {
 
128
  "name": "Voyage Large 2",
129
  "size": "-",
130
  "type": "proprietary",
131
+ "date": "2024-05-05",
132
  "link": "https://docs.voyageai.com/docs/embeddings"
133
  },
134
  "datasets": {
main.js CHANGED
@@ -29,6 +29,11 @@ const PLOT_METRICS = [
29
  { id: 'coverage_20', key: 'avg_c20', plotId: 'plot-avg-c20', title: 'Coverage@20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
30
  { id: 'recall_50', key: 'avg_r50', plotId: 'plot-avg-r50', title: 'Recall@50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
31
  ];
 
 
 
 
 
32
 
33
  function num(v) {
34
  return typeof v === 'number' ? v.toFixed(3) : '-';
@@ -84,6 +89,21 @@ function inferFamily(name) {
84
  return 'Other';
85
  }
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  const FAMILY_COLORS = {
88
  'Stella': '#1f77b4', 'Harrier OSS': '#ff7f0e', 'Voyage': '#009688', 'Jina': '#d62728',
89
  'Qwen3': '#9467bd', 'IBM Granite': '#8c564b', 'Arctic Embed': '#e377c2', 'Perplexity Embed': '#17becf',
@@ -193,10 +213,11 @@ function renderPlots() {
193
  if (typeof Plotly === 'undefined') return;
194
  const active = activeTypes();
195
  const filtered = rows.filter(r => active.includes(r.type));
 
196
 
197
  PLOT_METRICS.forEach(metric => {
198
  const grouped = {};
199
- filtered.forEach(r => {
200
  const x = parseSizeToBillions(r.size);
201
  const y = r[metric.key];
202
  if (x === null || typeof y !== 'number') return;
@@ -233,10 +254,79 @@ function renderPlots() {
233
 
234
  Plotly.newPlot(metric.plotId, traces, {
235
  title: { text: metric.title, x: 0.01, xanchor: 'left', font: { size: 16 } },
236
- margin: { t: 46, r: 12, b: 76, l: 56 },
237
- xaxis: { title: { text: 'Model Parameters (Billions)', standoff: 18 }, type: 'log', showgrid: true },
238
- yaxis: { title: metric.yLabel, range: [metric.yMin, metric.yMax], tickformat: '.2f', showgrid: true },
239
- legend: { orientation: 'h', y: -0.26, x: 0.5, xanchor: 'center' },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  hovermode: 'closest'
241
  }, { responsive: true, displaylogo: false });
242
  });
 
29
  { id: 'coverage_20', key: 'avg_c20', plotId: 'plot-avg-c20', title: 'Coverage@20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
30
  { id: 'recall_50', key: 'avg_r50', plotId: 'plot-avg-r50', title: 'Recall@50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
31
  ];
32
+ const DATE_PLOT_METRICS = [
33
+ { id: 'alpha_ndcg_10', key: 'avg_a10', plotId: 'plot-date-avg-alpha10', title: 'alpha-nDCG@10', yLabel: 'α@10 (Avg. 5)', yMin: 0.1, yMax: 0.541 },
34
+ { id: 'coverage_20', key: 'avg_c20', plotId: 'plot-date-avg-c20', title: 'Coverage@20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
35
+ { id: 'recall_50', key: 'avg_r50', plotId: 'plot-date-avg-r50', title: 'Recall@50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
36
+ ];
37
 
38
  function num(v) {
39
  return typeof v === 'number' ? v.toFixed(3) : '-';
 
89
  return 'Other';
90
  }
91
 
92
+ function normalizeModelName(rawName) {
93
+ return String(rawName || '').toLowerCase().replace(/^oracle:\s*/i, '').trim();
94
+ }
95
+
96
+ function isReferenceBaselineModel(rawName) {
97
+ const name = normalizeModelName(rawName);
98
+ return name === 'bm25' || name === 'fusion (bm25, bge, e5, voyage)';
99
+ }
100
+
101
+ function parseReleaseDate(dateStr) {
102
+ if (!dateStr) return null;
103
+ const d = new Date(dateStr);
104
+ return Number.isNaN(d.getTime()) ? null : d;
105
+ }
106
+
107
  const FAMILY_COLORS = {
108
  'Stella': '#1f77b4', 'Harrier OSS': '#ff7f0e', 'Voyage': '#009688', 'Jina': '#d62728',
109
  'Qwen3': '#9467bd', 'IBM Granite': '#8c564b', 'Arctic Embed': '#e377c2', 'Perplexity Embed': '#17becf',
 
213
  if (typeof Plotly === 'undefined') return;
214
  const active = activeTypes();
215
  const filtered = rows.filter(r => active.includes(r.type));
216
+ const filteredNoBaselines = filtered.filter(r => !isReferenceBaselineModel(r.name));
217
 
218
  PLOT_METRICS.forEach(metric => {
219
  const grouped = {};
220
+ filteredNoBaselines.forEach(r => {
221
  const x = parseSizeToBillions(r.size);
222
  const y = r[metric.key];
223
  if (x === null || typeof y !== 'number') return;
 
254
 
255
  Plotly.newPlot(metric.plotId, traces, {
256
  title: { text: metric.title, x: 0.01, xanchor: 'left', font: { size: 16 } },
257
+ height: 430,
258
+ margin: { t: 46, r: 12, b: 130, l: 56 },
259
+ xaxis: { title: { text: 'Model Parameters (Billions)', standoff: 26 }, type: 'log', automargin: true, showgrid: true },
260
+ yaxis: { title: metric.yLabel, range: [metric.yMin, metric.yMax], tickformat: '.2f', automargin: true, showgrid: true },
261
+ legend: { orientation: 'h', y: -0.36, x: 0.5, xanchor: 'center' },
262
+ hovermode: 'closest'
263
+ }, { responsive: true, displaylogo: false });
264
+ });
265
+
266
+ DATE_PLOT_METRICS.forEach(metric => {
267
+ const grouped = {};
268
+ filteredNoBaselines.forEach(r => {
269
+ const x = parseReleaseDate(r.date);
270
+ const y = r[metric.key];
271
+ if (x === null || typeof y !== 'number') return;
272
+ const fam = inferFamily(r.name);
273
+ if (!grouped[fam]) grouped[fam] = { x: [], y: [], text: [] };
274
+ grouped[fam].x.push(x);
275
+ grouped[fam].y.push(y);
276
+ grouped[fam].text.push(r.name);
277
+ });
278
+
279
+ const traces = Object.keys(grouped).sort().map(fam => ({
280
+ type: 'scatter',
281
+ mode: 'markers',
282
+ name: fam,
283
+ x: grouped[fam].x,
284
+ y: grouped[fam].y,
285
+ text: grouped[fam].text,
286
+ marker: { color: FAMILY_COLORS[fam] || '#9e9e9e', size: 11, line: { width: 1, color: '#fff' } },
287
+ hovertemplate: '<b>%{text}</b><br>Release date: %{x|%Y-%m-%d}<br>Score: %{y:.3f}<extra></extra>'
288
+ }));
289
+
290
+ const bm25 = filtered.find(r => normalizeModelName(r.name) === 'bm25');
291
+ const fusion = filtered.find(r => normalizeModelName(r.name) === 'fusion (bm25, bge, e5, voyage)');
292
+ const xs = traces.flatMap(t => t.x || []);
293
+ const xMin = xs.length ? new Date(Math.min(...xs.map(d => d.getTime()))) : null;
294
+ const xMax = xs.length ? new Date(Math.max(...xs.map(d => d.getTime()))) : null;
295
+ const xMinMonthStart = xMin ? new Date(xMin.getFullYear(), xMin.getMonth(), 1) : null;
296
+ if (xMin && xMax) {
297
+ if (bm25 && typeof bm25[metric.key] === 'number') {
298
+ traces.push({ type: 'scatter', mode: 'lines', name: 'BM25', x: [xMin, xMax], y: [bm25[metric.key], bm25[metric.key]], line: { color: 'rgba(97,97,97,0.55)', width: 1.1, dash: 'dash' } });
299
+ }
300
+ if (fusion && typeof fusion[metric.key] === 'number') {
301
+ traces.push({ type: 'scatter', mode: 'lines', name: 'Fusion', x: [xMin, xMax], y: [fusion[metric.key], fusion[metric.key]], line: { color: 'rgba(106,27,154,0.55)', width: 1.1, dash: 'dot' } });
302
+ }
303
+ }
304
+
305
+ Plotly.newPlot(metric.plotId, traces, {
306
+ title: { text: metric.title, x: 0.01, xanchor: 'left', font: { size: 16 } },
307
+ height: 430,
308
+ margin: { t: 46, r: 12, b: 160, l: 56 },
309
+ xaxis: {
310
+ title: { text: 'Model Release Date', standoff: 34 },
311
+ type: 'date',
312
+ tickmode: 'linear',
313
+ tick0: xMinMonthStart ? xMinMonthStart.toISOString().slice(0, 10) : undefined,
314
+ dtick: 'M1',
315
+ tickformat: '%b %Y',
316
+ tickangle: -45,
317
+ automargin: true,
318
+ showgrid: true
319
+ },
320
+ yaxis: { title: metric.yLabel, range: [metric.yMin, metric.yMax], tickformat: '.2f', automargin: true, showgrid: true },
321
+ legend: {
322
+ orientation: 'h',
323
+ y: -0.44,
324
+ x: 0.5,
325
+ xanchor: 'center',
326
+ entrywidthmode: 'pixels',
327
+ entrywidth: 155,
328
+ itemsizing: 'constant'
329
+ },
330
  hovermode: 'closest'
331
  }, { responsive: true, displaylogo: false });
332
  });