nthakur commited on
Commit
26aa131
·
1 Parent(s): 5921ea4

Refine leaderboard labels, controls, and plots.

Browse files

Update metric labels and grouped header alignment, add metric details toggle, highlight Avg(5) scores, and embed FreshStack metric-vs-params plots below the table.

Made-with: Cursor

Files changed (3) hide show
  1. index.html +17 -1
  2. main.js +104 -4
  3. style.css +13 -0
index.html CHANGED
@@ -9,6 +9,7 @@
9
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Outfit:wght@400;500;700&display=swap" rel="stylesheet">
10
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
11
  <link rel="stylesheet" href="./style.css">
 
12
  </head>
13
  <body>
14
  <div class="bg-blobs">
@@ -25,13 +26,20 @@
25
  </p>
26
 
27
  <div class="top-actions">
28
- <a href="https://arxiv.org/abs/2504.13128" target="_blank" class="action-btn"><i class="fa-solid fa-file-lines"></i> Paper</a>
 
29
  <a href="https://github.com/fresh-stack/freshstack" target="_blank" class="action-btn"><i class="fa-brands fa-github"></i> Code</a>
30
  <a href="https://huggingface.co/freshstack" target="_blank" class="action-btn"><i class="fa-solid fa-database"></i> Dataset</a>
31
  <a href="https://fresh-stack.github.io/" target="_blank" class="action-btn"><i class="fa-solid fa-house"></i> Project Home</a>
32
  <button class="action-btn" id="toggle-submit"><i class="fa-solid fa-paper-plane"></i> Submit Here</button>
33
  </div>
34
 
 
 
 
 
 
 
35
  <div id="submit-panel" class="panel hidden">
36
  <p>Submit your results by adding a new row to <code>leaderboard_data.json</code> and opening a PR.</p>
37
  <p><a href="https://github.com/fresh-stack/fresh-stack.github.io/blob/master/leaderboard_data.json" target="_blank">Open leaderboard_data.json</a></p>
@@ -77,6 +85,14 @@
77
  </div>
78
  </div>
79
 
 
 
 
 
 
 
 
 
80
  <section class="citation">
81
  <div class="citation-head">
82
  <h3>Cite FreshStack</h3>
 
9
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Outfit:wght@400;500;700&display=swap" rel="stylesheet">
10
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
11
  <link rel="stylesheet" href="./style.css">
12
+ <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
13
  </head>
14
  <body>
15
  <div class="bg-blobs">
 
26
  </p>
27
 
28
  <div class="top-actions">
29
+ <a href="https://openreview.net/forum?id=54TTgXlS2U" target="_blank" class="action-btn"><i class="fa-solid fa-file-lines"></i> Paper</a>
30
+ <button class="action-btn" id="toggle-metrics"><i class="fa-solid fa-chart-line"></i> Metric Details</button>
31
  <a href="https://github.com/fresh-stack/freshstack" target="_blank" class="action-btn"><i class="fa-brands fa-github"></i> Code</a>
32
  <a href="https://huggingface.co/freshstack" target="_blank" class="action-btn"><i class="fa-solid fa-database"></i> Dataset</a>
33
  <a href="https://fresh-stack.github.io/" target="_blank" class="action-btn"><i class="fa-solid fa-house"></i> Project Home</a>
34
  <button class="action-btn" id="toggle-submit"><i class="fa-solid fa-paper-plane"></i> Submit Here</button>
35
  </div>
36
 
37
+ <div id="metrics-panel" class="panel hidden">
38
+ <p><b>alpha-nDCG@10 (α@10)</b>: diversity-aware ranking quality in top results.</p>
39
+ <p><b>Coverage@20 (C@20)</b>: fraction of nuggets supported by top-20 retrieved chunks.</p>
40
+ <p><b>Recall@50 (R@50)</b>: fraction of relevant chunks recovered in top-50.</p>
41
+ </div>
42
+
43
  <div id="submit-panel" class="panel hidden">
44
  <p>Submit your results by adding a new row to <code>leaderboard_data.json</code> and opening a PR.</p>
45
  <p><a href="https://github.com/fresh-stack/fresh-stack.github.io/blob/master/leaderboard_data.json" target="_blank">Open leaderboard_data.json</a></p>
 
85
  </div>
86
  </div>
87
 
88
+ <section class="plots">
89
+ <h3>FreshStack Metrics vs. Model Parameters</h3>
90
+ <p class="plot-sub">Average scores across 5 domains vs model parameter size; points are colored by model family.</p>
91
+ <div id="plot-avg-alpha10" class="plot-box"></div>
92
+ <div id="plot-avg-c20" class="plot-box"></div>
93
+ <div id="plot-avg-r50" class="plot-box"></div>
94
+ </section>
95
+
96
  <section class="citation">
97
  <div class="citation-head">
98
  <h3>Cite FreshStack</h3>
main.js CHANGED
@@ -16,7 +16,7 @@ const GROUPS = [
16
  ];
17
 
18
  const METRICS = [
19
- { id: 'a10', label: 'alpha@10' },
20
  { id: 'c20', label: 'C@20' },
21
  { id: 'r50', label: 'R@50' }
22
  ];
@@ -24,6 +24,11 @@ const METRICS = [
24
  let rows = [];
25
  let sortKey = 'avg_r50';
26
  let sortAsc = false;
 
 
 
 
 
27
 
28
  function num(v) {
29
  return typeof v === 'number' ? v.toFixed(3) : '-';
@@ -38,6 +43,45 @@ function typeBadge(type) {
38
  return `<span class="type-pill type-${type}">${labels[type] || type}</span>`;
39
  }
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  function mapRow(item) {
42
  return {
43
  name: item.info.name,
@@ -73,7 +117,7 @@ function renderHeaders() {
73
  <th rowspan="2" data-key="type">Type</th>
74
  <th rowspan="2" data-key="size">Params</th>
75
  <th rowspan="2" data-key="date">Date</th>
76
- ${GROUPS.map(g => `<th colspan="3">${g.label}</th>`).join('')}
77
  `;
78
 
79
  headerRowSub.innerHTML = GROUPS.map(g =>
@@ -123,7 +167,7 @@ function renderBody() {
123
  <td>${typeBadge(r.type)}</td>
124
  <td>${r.size || '-'}</td>
125
  <td>${r.date || '-'}</td>
126
- <td>${num(r.avg_a10)}</td><td>${num(r.avg_c20)}</td><td>${num(r.avg_r50)}</td>
127
  <td>${num(r.lc_a10)}</td><td>${num(r.lc_c20)}</td><td>${num(r.lc_r50)}</td>
128
  <td>${num(r.yolo_a10)}</td><td>${num(r.yolo_c20)}</td><td>${num(r.yolo_r50)}</td>
129
  <td>${num(r.laravel_a10)}</td><td>${num(r.laravel_c20)}</td><td>${num(r.laravel_r50)}</td>
@@ -133,19 +177,75 @@ function renderBody() {
133
  `).join('');
134
  }
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  async function init() {
137
  const resp = await fetch('./leaderboard_data.json');
138
  const data = await resp.json();
139
  rows = data.leaderboardData.map(mapRow);
140
  renderHeaders();
141
  renderBody();
 
142
  }
143
 
144
  searchInput.addEventListener('input', renderBody);
145
- typeFilters.forEach(cb => cb.addEventListener('change', renderBody));
146
  document.getElementById('toggle-submit').addEventListener('click', () => {
147
  document.getElementById('submit-panel').classList.toggle('hidden');
148
  });
 
 
 
149
 
150
  copyCitationBtn.addEventListener('click', async () => {
151
  try {
 
16
  ];
17
 
18
  const METRICS = [
19
+ { id: 'a10', label: 'α@10' },
20
  { id: 'c20', label: 'C@20' },
21
  { id: 'r50', label: 'R@50' }
22
  ];
 
24
  let rows = [];
25
  let sortKey = 'avg_r50';
26
  let sortAsc = false;
27
+ const PLOT_METRICS = [
28
+ { id: 'alpha_ndcg_10', key: 'avg_a10', plotId: 'plot-avg-alpha10', yLabel: 'α@10 (Avg. 5)', yMin: 0.1, yMax: 0.541 },
29
+ { id: 'coverage_20', key: 'avg_c20', plotId: 'plot-avg-c20', yLabel: 'C@20 (Avg. 5)', yMin: 0.25, yMax: 0.868 },
30
+ { id: 'recall_50', key: 'avg_r50', plotId: 'plot-avg-r50', yLabel: 'R@50 (Avg. 5)', yMin: 0.15, yMax: 0.755 }
31
+ ];
32
 
33
  function num(v) {
34
  return typeof v === 'number' ? v.toFixed(3) : '-';
 
43
  return `<span class="type-pill type-${type}">${labels[type] || type}</span>`;
44
  }
45
 
46
+ function parseSizeToBillions(sizeStr) {
47
+ if (!sizeStr || sizeStr === '-') return null;
48
+ const m = String(sizeStr).trim().match(/^([\d.]+)\s*([BMK])$/i);
49
+ if (!m) return null;
50
+ const numValue = parseFloat(m[1]);
51
+ const unit = m[2].toUpperCase();
52
+ if (Number.isNaN(numValue)) return null;
53
+ if (unit === 'B') return numValue;
54
+ if (unit === 'M') return numValue / 1000;
55
+ if (unit === 'K') return numValue / 1e6;
56
+ return null;
57
+ }
58
+
59
+ function inferFamily(name) {
60
+ const n = String(name || '').toLowerCase();
61
+ if (n.includes('stella') || n.includes('jasper')) return 'Stella';
62
+ if (n.includes('harrier')) return 'Harrier OSS';
63
+ if (n.includes('voyage')) return 'Voyage';
64
+ if (n.includes('jina')) return 'Jina';
65
+ if (n.includes('qwen3')) return 'Qwen3';
66
+ if (n.includes('granite')) return 'IBM Granite';
67
+ if (n.includes('arctic embed')) return 'Arctic Embed';
68
+ if (n.includes('perplexity embed')) return 'Perplexity Embed';
69
+ if (n.includes('nomic embed') || n.includes('coderankembed')) return 'Nomic Embed';
70
+ if (n.includes('bge')) return 'BGE';
71
+ if (n.includes('e5')) return 'E5';
72
+ if (n.includes('gte')) return 'GTE';
73
+ if (n.includes('bm25')) return 'BM25';
74
+ if (n.includes('fusion')) return 'Fusion';
75
+ return 'Other';
76
+ }
77
+
78
+ const FAMILY_COLORS = {
79
+ 'Stella': '#1f77b4', 'Harrier OSS': '#ff7f0e', 'Voyage': '#009688', 'Jina': '#d62728',
80
+ 'Qwen3': '#9467bd', 'IBM Granite': '#8c564b', 'Arctic Embed': '#e377c2', 'Perplexity Embed': '#17becf',
81
+ 'Nomic Embed': '#6a1b9a', 'BGE': '#7f7f7f', 'E5': '#393b79', 'GTE': '#bcbd22',
82
+ 'BM25': '#969696', 'Fusion': '#e6550d', 'Other': '#9e9e9e'
83
+ };
84
+
85
  function mapRow(item) {
86
  return {
87
  name: item.info.name,
 
117
  <th rowspan="2" data-key="type">Type</th>
118
  <th rowspan="2" data-key="size">Params</th>
119
  <th rowspan="2" data-key="date">Date</th>
120
+ ${GROUPS.map(g => `<th colspan="3" class="group-name">${g.label}</th>`).join('')}
121
  `;
122
 
123
  headerRowSub.innerHTML = GROUPS.map(g =>
 
167
  <td>${typeBadge(r.type)}</td>
168
  <td>${r.size || '-'}</td>
169
  <td>${r.date || '-'}</td>
170
+ <td class="avg-score">${num(r.avg_a10)}</td><td class="avg-score">${num(r.avg_c20)}</td><td class="avg-score">${num(r.avg_r50)}</td>
171
  <td>${num(r.lc_a10)}</td><td>${num(r.lc_c20)}</td><td>${num(r.lc_r50)}</td>
172
  <td>${num(r.yolo_a10)}</td><td>${num(r.yolo_c20)}</td><td>${num(r.yolo_r50)}</td>
173
  <td>${num(r.laravel_a10)}</td><td>${num(r.laravel_c20)}</td><td>${num(r.laravel_r50)}</td>
 
177
  `).join('');
178
  }
179
 
180
+ function renderPlots() {
181
+ if (typeof Plotly === 'undefined') return;
182
+ const active = activeTypes();
183
+ const filtered = rows.filter(r => active.includes(r.type));
184
+
185
+ PLOT_METRICS.forEach(metric => {
186
+ const grouped = {};
187
+ filtered.forEach(r => {
188
+ const x = parseSizeToBillions(r.size);
189
+ const y = r[metric.key];
190
+ if (x === null || typeof y !== 'number') return;
191
+ const fam = inferFamily(r.name);
192
+ if (!grouped[fam]) grouped[fam] = { x: [], y: [], text: [] };
193
+ grouped[fam].x.push(x);
194
+ grouped[fam].y.push(y);
195
+ grouped[fam].text.push(r.name);
196
+ });
197
+ const traces = Object.keys(grouped).sort().map(fam => ({
198
+ type: 'scatter',
199
+ mode: 'markers',
200
+ name: fam,
201
+ x: grouped[fam].x,
202
+ y: grouped[fam].y,
203
+ text: grouped[fam].text,
204
+ marker: { color: FAMILY_COLORS[fam] || '#9e9e9e', size: 11, line: { width: 1, color: '#fff' } },
205
+ hovertemplate: '<b>%{text}</b><br>Params: %{x:.3f}B<br>Score: %{y:.3f}<extra></extra>'
206
+ }));
207
+
208
+ const bm25 = filtered.find(r => String(r.name).toLowerCase() === 'bm25');
209
+ const fusion = filtered.find(r => String(r.name).toLowerCase() === 'fusion (bm25, bge, e5, voyage)');
210
+ const xs = traces.flatMap(t => t.x || []);
211
+ if (xs.length) {
212
+ const xmin = Math.min(...xs);
213
+ const xmax = Math.max(...xs);
214
+ if (bm25 && typeof bm25[metric.key] === 'number') {
215
+ traces.push({ type: 'scatter', mode: 'lines', name: 'BM25', x: [xmin, xmax], y: [bm25[metric.key], bm25[metric.key]], line: { color: 'rgba(97,97,97,0.55)', width: 1.1, dash: 'dash' } });
216
+ }
217
+ if (fusion && typeof fusion[metric.key] === 'number') {
218
+ traces.push({ type: 'scatter', mode: 'lines', name: 'Fusion', x: [xmin, xmax], y: [fusion[metric.key], fusion[metric.key]], line: { color: 'rgba(106,27,154,0.55)', width: 1.1, dash: 'dot' } });
219
+ }
220
+ }
221
+
222
+ Plotly.newPlot(metric.plotId, traces, {
223
+ margin: { t: 20, r: 12, b: 76, l: 56 },
224
+ xaxis: { title: { text: 'Model Parameters (Billions)', standoff: 18 }, type: 'log', showgrid: true },
225
+ yaxis: { title: metric.yLabel, range: [metric.yMin, metric.yMax], tickformat: '.2f', showgrid: true },
226
+ legend: { orientation: 'h', y: -0.26, x: 0.5, xanchor: 'center' },
227
+ hovermode: 'closest'
228
+ }, { responsive: true, displaylogo: false });
229
+ });
230
+ }
231
+
232
  async function init() {
233
  const resp = await fetch('./leaderboard_data.json');
234
  const data = await resp.json();
235
  rows = data.leaderboardData.map(mapRow);
236
  renderHeaders();
237
  renderBody();
238
+ renderPlots();
239
  }
240
 
241
  searchInput.addEventListener('input', renderBody);
242
+ typeFilters.forEach(cb => cb.addEventListener('change', () => { renderBody(); renderPlots(); }));
243
  document.getElementById('toggle-submit').addEventListener('click', () => {
244
  document.getElementById('submit-panel').classList.toggle('hidden');
245
  });
246
+ document.getElementById('toggle-metrics').addEventListener('click', () => {
247
+ document.getElementById('metrics-panel').classList.toggle('hidden');
248
+ });
249
 
250
  copyCitationBtn.addEventListener('click', async () => {
251
  try {
style.css CHANGED
@@ -98,11 +98,13 @@ thead th {
98
  }
99
  #header-row-top th { top: 0; font-weight: 700; border-bottom: 1px solid #dbe3f0; }
100
  #header-row-sub th { top: 38px; font-weight: 600; color: #374151; }
 
101
  .metric-header { cursor: pointer; }
102
  tbody tr:hover { background: #f8fbff; }
103
  .model-cell { font-weight: 600; max-width: 360px; overflow: hidden; text-overflow: ellipsis; }
104
  .model-cell a { color: #1d4ed8; text-decoration: none; }
105
  .model-cell a:hover { text-decoration: underline; }
 
106
  .type-pill {
107
  display: inline-flex;
108
  align-items: center;
@@ -121,6 +123,17 @@ tbody tr:hover { background: #f8fbff; }
121
  border-radius: 14px;
122
  padding: 14px;
123
  }
 
 
 
 
 
 
 
 
 
 
 
124
  .citation-head {
125
  display: flex;
126
  align-items: center;
 
98
  }
99
  #header-row-top th { top: 0; font-weight: 700; border-bottom: 1px solid #dbe3f0; }
100
  #header-row-sub th { top: 38px; font-weight: 600; color: #374151; }
101
+ .group-name { text-align: center !important; }
102
  .metric-header { cursor: pointer; }
103
  tbody tr:hover { background: #f8fbff; }
104
  .model-cell { font-weight: 600; max-width: 360px; overflow: hidden; text-overflow: ellipsis; }
105
  .model-cell a { color: #1d4ed8; text-decoration: none; }
106
  .model-cell a:hover { text-decoration: underline; }
107
+ .avg-score { color: #1d4ed8; font-weight: 700; }
108
  .type-pill {
109
  display: inline-flex;
110
  align-items: center;
 
123
  border-radius: 14px;
124
  padding: 14px;
125
  }
126
+ .plots {
127
+ margin-top: 20px;
128
+ background: #fff;
129
+ border: 1px solid #e5e7eb;
130
+ border-radius: 14px;
131
+ padding: 14px;
132
+ }
133
+ .plots h3 { margin: 0 0 6px; }
134
+ .plot-sub { margin: 0 0 12px; color: #475569; }
135
+ .plot-box { width: 100%; height: 390px; margin-bottom: 12px; }
136
+ .plot-box:last-child { margin-bottom: 0; }
137
  .citation-head {
138
  display: flex;
139
  align-items: center;