Spaces:
Sleeping
Sleeping
minor nits
Browse files- index.html +9 -9
- main.js +13 -1
- style.css +1 -0
index.html
CHANGED
|
@@ -21,7 +21,7 @@
|
|
| 21 |
<h1>FreshStack Leaderboard</h1>
|
| 22 |
<p class="subtitle">Realistic Retrieval Benchmarking on Technical Documentation</p>
|
| 23 |
<p class="intro">
|
| 24 |
-
FreshStack evaluates retrieval models on five domains: <b>LangChain</b>, <b>Yolo v7 & v8</b>, <b>Laravel 10 & 11</b>,
|
| 25 |
<b>Angular 16, 17 & 18</b>, and <b>Godot4</b>. Metrics include <b>alpha-nDCG@10</b>, <b>Coverage@20</b>, and <b>Recall@50</b>.
|
| 26 |
</p>
|
| 27 |
|
|
@@ -35,9 +35,9 @@
|
|
| 35 |
</div>
|
| 36 |
|
| 37 |
<div id="metrics-panel" class="panel hidden">
|
| 38 |
-
<p><b>alpha-nDCG@10 (α@10)</b>: diversity-aware ranking
|
| 39 |
-
<p><b>Coverage@20 (C@20)</b>: fraction of nuggets supported by top-20 retrieved
|
| 40 |
-
<p><b>Recall@50 (R@50)</b>: fraction of relevant
|
| 41 |
</div>
|
| 42 |
|
| 43 |
<div id="submit-panel" class="panel hidden">
|
|
@@ -45,11 +45,11 @@
|
|
| 45 |
<p><a href="https://github.com/fresh-stack/fresh-stack.github.io/blob/master/leaderboard_data.json" target="_blank">Open leaderboard_data.json</a></p>
|
| 46 |
<textarea readonly rows="14">{
|
| 47 |
"info": {
|
| 48 |
-
"name": "Your Model Name",
|
| 49 |
-
"size": "600M",
|
| 50 |
-
"type": "open_source",
|
| 51 |
-
"date": "2026-04-07",
|
| 52 |
-
"link": "https://model-or-paper-link"
|
| 53 |
},
|
| 54 |
"datasets": {
|
| 55 |
"langchain": {"alpha_ndcg_10": 0.000, "coverage_20": 0.000, "recall_50": 0.000},
|
|
|
|
| 21 |
<h1>FreshStack Leaderboard</h1>
|
| 22 |
<p class="subtitle">Realistic Retrieval Benchmarking on Technical Documentation</p>
|
| 23 |
<p class="intro">
|
| 24 |
+
FreshStack is a holistic framework for building realistic & challenging RAG benchmarks from community-asked questions and answers on niche and fast-growing domains. FreshStack evaluates retrieval models on five domains: <b>LangChain</b>, <b>Yolo v7 & v8</b>, <b>Laravel 10 & 11</b>,
|
| 25 |
<b>Angular 16, 17 & 18</b>, and <b>Godot4</b>. Metrics include <b>alpha-nDCG@10</b>, <b>Coverage@20</b>, and <b>Recall@50</b>.
|
| 26 |
</p>
|
| 27 |
|
|
|
|
| 35 |
</div>
|
| 36 |
|
| 37 |
<div id="metrics-panel" class="panel hidden">
|
| 38 |
+
<p><b>alpha-nDCG@10 (α@10)</b>: diversity-aware ranking metric based on nDCG@10 but penalizes redundant documents (i.e., documents supporting the same nugget) by a geometric factor of alpha. Read more in <a href="https://dl.acm.org/doi/abs/10.1145/1390334.1390446" target="_blank">[Clarke et al. 2008]</a>.</p>
|
| 39 |
+
<p><b>Coverage@20 (C@20)</b>: fraction of unique nuggets supported by top-20 retrieved documents. Defined in our <a href="https://openreview.net/forum?id=54TTgXlS2U" target="_blank">[paper]</a>.</p>
|
| 40 |
+
<p><b>Recall@50 (R@50)</b>: traditional retrieval metric measuring the fraction of relevant documents retrieved in top-50 documents.</p>
|
| 41 |
</div>
|
| 42 |
|
| 43 |
<div id="submit-panel" class="panel hidden">
|
|
|
|
| 45 |
<p><a href="https://github.com/fresh-stack/fresh-stack.github.io/blob/master/leaderboard_data.json" target="_blank">Open leaderboard_data.json</a></p>
|
| 46 |
<textarea readonly rows="14">{
|
| 47 |
"info": {
|
| 48 |
+
"name": "Your Model Name", // try to follow the format of other models
|
| 49 |
+
"size": "600M", // in millions (<1B) or billions (7B)
|
| 50 |
+
"type": "open_source", // open_source, proprietary
|
| 51 |
+
"date": "2026-04-07", // date of model release
|
| 52 |
+
"link": "https://model-or-paper-link" // link to model or documentation
|
| 53 |
},
|
| 54 |
"datasets": {
|
| 55 |
"langchain": {"alpha_ndcg_10": 0.000, "coverage_20": 0.000, "recall_50": 0.000},
|
main.js
CHANGED
|
@@ -43,6 +43,15 @@ function typeBadge(type) {
|
|
| 43 |
return `<span class="type-pill type-${type}">${labels[type] || type}</span>`;
|
| 44 |
}
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
function parseSizeToBillions(sizeStr) {
|
| 47 |
if (!sizeStr || sizeStr === '-') return null;
|
| 48 |
const m = String(sizeStr).trim().match(/^([\d.]+)\s*([BMK])$/i);
|
|
@@ -163,7 +172,10 @@ function renderBody() {
|
|
| 163 |
bodyRow.innerHTML = filtered.map((r, idx) => `
|
| 164 |
<tr>
|
| 165 |
<td>${idx + 1}</td>
|
| 166 |
-
<td class="model-cell">
|
|
|
|
|
|
|
|
|
|
| 167 |
<td>${typeBadge(r.type)}</td>
|
| 168 |
<td>${r.size || '-'}</td>
|
| 169 |
<td>${r.date || '-'}</td>
|
|
|
|
| 43 |
return `<span class="type-pill type-${type}">${labels[type] || type}</span>`;
|
| 44 |
}
|
| 45 |
|
| 46 |
+
function isNewModel(dateStr) {
|
| 47 |
+
if (!dateStr) return false;
|
| 48 |
+
const modelDate = new Date(dateStr);
|
| 49 |
+
if (Number.isNaN(modelDate.getTime())) return false;
|
| 50 |
+
const now = new Date();
|
| 51 |
+
const daysDiff = (now - modelDate) / (1000 * 60 * 60 * 24);
|
| 52 |
+
return daysDiff >= 0 && daysDiff <= 90;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
function parseSizeToBillions(sizeStr) {
|
| 56 |
if (!sizeStr || sizeStr === '-') return null;
|
| 57 |
const m = String(sizeStr).trim().match(/^([\d.]+)\s*([BMK])$/i);
|
|
|
|
| 172 |
bodyRow.innerHTML = filtered.map((r, idx) => `
|
| 173 |
<tr>
|
| 174 |
<td>${idx + 1}</td>
|
| 175 |
+
<td class="model-cell">
|
| 176 |
+
${r.link ? `<a href="${r.link}" target="_blank">${r.name}</a>` : r.name}
|
| 177 |
+
${isNewModel(r.date) ? '<span class="new-badge">🆕</span>' : ''}
|
| 178 |
+
</td>
|
| 179 |
<td>${typeBadge(r.type)}</td>
|
| 180 |
<td>${r.size || '-'}</td>
|
| 181 |
<td>${r.date || '-'}</td>
|
style.css
CHANGED
|
@@ -104,6 +104,7 @@ tbody tr:hover { background: #f8fbff; }
|
|
| 104 |
.model-cell { font-weight: 600; max-width: 360px; overflow: hidden; text-overflow: ellipsis; }
|
| 105 |
.model-cell a { color: #1d4ed8; text-decoration: none; }
|
| 106 |
.model-cell a:hover { text-decoration: underline; }
|
|
|
|
| 107 |
.avg-score { color: #1d4ed8; font-weight: 700; }
|
| 108 |
.type-pill {
|
| 109 |
display: inline-flex;
|
|
|
|
| 104 |
.model-cell { font-weight: 600; max-width: 360px; overflow: hidden; text-overflow: ellipsis; }
|
| 105 |
.model-cell a { color: #1d4ed8; text-decoration: none; }
|
| 106 |
.model-cell a:hover { text-decoration: underline; }
|
| 107 |
+
.new-badge { margin-left: 6px; font-size: 0.9em; vertical-align: middle; }
|
| 108 |
.avg-score { color: #1d4ed8; font-weight: 700; }
|
| 109 |
.type-pill {
|
| 110 |
display: inline-flex;
|