Upload index.html with huggingface_hub
Browse files- index.html +4 -134
index.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>
|
| 7 |
<script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
|
| 8 |
<style>
|
| 9 |
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
@@ -16,9 +16,7 @@
|
|
| 16 |
|
| 17 |
/* ── Page header ─────────────────────────────── */
|
| 18 |
.page-header {
|
| 19 |
-
|
| 20 |
-
align-items: center;
|
| 21 |
-
justify-content: space-between;
|
| 22 |
margin-bottom: 24px;
|
| 23 |
}
|
| 24 |
.page-header h1 {
|
|
@@ -347,7 +345,8 @@
|
|
| 347 |
</head>
|
| 348 |
<body>
|
| 349 |
<div class="page-header">
|
| 350 |
-
<h1>
|
|
|
|
| 351 |
</div>
|
| 352 |
|
| 353 |
<div id="init-loading">Initializing DuckDB...</div>
|
|
@@ -356,8 +355,6 @@
|
|
| 356 |
|
| 357 |
<div class="add-panel-row" id="add-panel-row" style="display:none">
|
| 358 |
<button class="btn btn-primary" id="btn-add-panel">+ Add Panel</button>
|
| 359 |
-
<button class="btn" id="btn-scorecard-csv">Export Scorecard CSV</button>
|
| 360 |
-
<span id="scorecard-progress" style="font-size:0.8rem;color:#6c757d;display:none"></span>
|
| 361 |
</div>
|
| 362 |
|
| 363 |
<script type="module">
|
|
@@ -1689,132 +1686,6 @@
|
|
| 1689 |
return panel;
|
| 1690 |
}
|
| 1691 |
|
| 1692 |
-
// ── Scorecard CSV export ────────────────────────────────────
|
| 1693 |
-
async function exportScorecardCSV() {
|
| 1694 |
-
const progress = document.getElementById('scorecard-progress');
|
| 1695 |
-
const btn = document.getElementById('btn-scorecard-csv');
|
| 1696 |
-
btn.disabled = true;
|
| 1697 |
-
progress.style.display = 'inline';
|
| 1698 |
-
progress.textContent = 'Querying all benchmark data...';
|
| 1699 |
-
|
| 1700 |
-
try {
|
| 1701 |
-
// Get all benchmark-level tasks with checkpoint data
|
| 1702 |
-
const tasks = await query(`
|
| 1703 |
-
SELECT DISTINCT task, task_display_name, metric, higher_is_better
|
| 1704 |
-
FROM scores
|
| 1705 |
-
WHERE task_type = 'benchmark'
|
| 1706 |
-
AND is_checkpoint = true
|
| 1707 |
-
AND tokens_trained IS NOT NULL
|
| 1708 |
-
ORDER BY task, metric
|
| 1709 |
-
`);
|
| 1710 |
-
|
| 1711 |
-
// Fetch all checkpoint rows in one query
|
| 1712 |
-
progress.textContent = 'Loading checkpoint scores...';
|
| 1713 |
-
const allRows = await query(`
|
| 1714 |
-
SELECT task, metric, model_display_name, tokens_trained, score,
|
| 1715 |
-
is_checkpoint, higher_is_better
|
| 1716 |
-
FROM scores
|
| 1717 |
-
WHERE task_type = 'benchmark'
|
| 1718 |
-
AND is_checkpoint = true
|
| 1719 |
-
AND tokens_trained IS NOT NULL
|
| 1720 |
-
ORDER BY task, metric, model_display_name, tokens_trained
|
| 1721 |
-
`);
|
| 1722 |
-
|
| 1723 |
-
// Group rows by task+metric
|
| 1724 |
-
const grouped = {};
|
| 1725 |
-
for (const r of allRows) {
|
| 1726 |
-
const key = r.task + '|||' + r.metric;
|
| 1727 |
-
if (!grouped[key]) grouped[key] = [];
|
| 1728 |
-
grouped[key].push(r);
|
| 1729 |
-
}
|
| 1730 |
-
|
| 1731 |
-
// Compute metrics for each task+metric combo
|
| 1732 |
-
const stages = ['overall', 'early', 'late'];
|
| 1733 |
-
const metricKeys = ['monotonicity', 'signalStrength', 'noise', 'orderingConsistency', 'discrimination'];
|
| 1734 |
-
|
| 1735 |
-
// CSV header
|
| 1736 |
-
const headers = ['task', 'task_display_name', 'metric', 'higher_is_better', 'n_models', 'n_checkpoints'];
|
| 1737 |
-
for (const stage of stages) {
|
| 1738 |
-
for (const mk of metricKeys) {
|
| 1739 |
-
headers.push(`${stage}_${mk}`);
|
| 1740 |
-
}
|
| 1741 |
-
}
|
| 1742 |
-
const csvRows = [headers.join(',')];
|
| 1743 |
-
|
| 1744 |
-
let processed = 0;
|
| 1745 |
-
const total = tasks.length;
|
| 1746 |
-
|
| 1747 |
-
for (const t of tasks) {
|
| 1748 |
-
const key = t.task + '|||' + t.metric;
|
| 1749 |
-
const rows = grouped[key];
|
| 1750 |
-
if (!rows || rows.length === 0) continue;
|
| 1751 |
-
|
| 1752 |
-
// Build byModel structure (same as drawLineChart)
|
| 1753 |
-
const byModel = {};
|
| 1754 |
-
for (const r of rows) {
|
| 1755 |
-
const name = r.model_display_name;
|
| 1756 |
-
if (!byModel[name]) byModel[name] = { points: [], isCheckpoint: true };
|
| 1757 |
-
byModel[name].points.push({ x: Number(r.tokens_trained), y: r.score });
|
| 1758 |
-
}
|
| 1759 |
-
for (const d of Object.values(byModel)) {
|
| 1760 |
-
d.points.sort((a, b) => a.x - b.x);
|
| 1761 |
-
}
|
| 1762 |
-
|
| 1763 |
-
const nModels = Object.keys(byModel).length;
|
| 1764 |
-
const nCheckpoints = Math.max(...Object.values(byModel).map(d => d.points.length));
|
| 1765 |
-
const metrics = computeTaskQualityMetrics(byModel, t.higher_is_better);
|
| 1766 |
-
|
| 1767 |
-
const vals = [
|
| 1768 |
-
csvEsc(t.task),
|
| 1769 |
-
csvEsc(t.task_display_name),
|
| 1770 |
-
csvEsc(t.metric),
|
| 1771 |
-
t.higher_is_better,
|
| 1772 |
-
nModels,
|
| 1773 |
-
nCheckpoints,
|
| 1774 |
-
];
|
| 1775 |
-
for (const stage of stages) {
|
| 1776 |
-
for (const mk of metricKeys) {
|
| 1777 |
-
const v = metrics?.[stage]?.[mk];
|
| 1778 |
-
vals.push(v != null && !isNaN(v) ? v.toFixed(6) : '');
|
| 1779 |
-
}
|
| 1780 |
-
}
|
| 1781 |
-
csvRows.push(vals.join(','));
|
| 1782 |
-
|
| 1783 |
-
processed++;
|
| 1784 |
-
if (processed % 50 === 0) {
|
| 1785 |
-
progress.textContent = `Computing metrics... ${processed}/${total}`;
|
| 1786 |
-
await new Promise(r => setTimeout(r, 0)); // yield to UI
|
| 1787 |
-
}
|
| 1788 |
-
}
|
| 1789 |
-
|
| 1790 |
-
// Download
|
| 1791 |
-
const blob = new Blob([csvRows.join('\n')], { type: 'text/csv' });
|
| 1792 |
-
const url = URL.createObjectURL(blob);
|
| 1793 |
-
const a = document.createElement('a');
|
| 1794 |
-
a.href = url;
|
| 1795 |
-
a.download = 'benchmark_scorecard.csv';
|
| 1796 |
-
a.click();
|
| 1797 |
-
URL.revokeObjectURL(url);
|
| 1798 |
-
|
| 1799 |
-
progress.textContent = `Done! ${processed} task-metric combos exported.`;
|
| 1800 |
-
setTimeout(() => { progress.style.display = 'none'; }, 3000);
|
| 1801 |
-
} catch (err) {
|
| 1802 |
-
progress.textContent = `Error: ${err.message}`;
|
| 1803 |
-
console.error('Scorecard export failed:', err);
|
| 1804 |
-
} finally {
|
| 1805 |
-
btn.disabled = false;
|
| 1806 |
-
}
|
| 1807 |
-
}
|
| 1808 |
-
|
| 1809 |
-
function csvEsc(val) {
|
| 1810 |
-
if (val == null) return '';
|
| 1811 |
-
const s = String(val);
|
| 1812 |
-
if (s.includes(',') || s.includes('"') || s.includes('\n')) {
|
| 1813 |
-
return '"' + s.replace(/"/g, '""') + '"';
|
| 1814 |
-
}
|
| 1815 |
-
return s;
|
| 1816 |
-
}
|
| 1817 |
-
|
| 1818 |
// ── Init ────────────────────────────────────────────────────
|
| 1819 |
const elInitLoading = document.getElementById('init-loading');
|
| 1820 |
const elAddPanelRow = document.getElementById('add-panel-row');
|
|
@@ -1848,7 +1719,6 @@
|
|
| 1848 |
}
|
| 1849 |
|
| 1850 |
document.getElementById('btn-add-panel').addEventListener('click', () => addPanel());
|
| 1851 |
-
document.getElementById('btn-scorecard-csv').addEventListener('click', () => exportScorecardCSV());
|
| 1852 |
|
| 1853 |
init();
|
| 1854 |
</script>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>ellamind base-eval</title>
|
| 7 |
<script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
|
| 8 |
<style>
|
| 9 |
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
|
|
| 16 |
|
| 17 |
/* ── Page header ─────────────────────────────── */
|
| 18 |
.page-header {
|
| 19 |
+
text-align: center;
|
|
|
|
|
|
|
| 20 |
margin-bottom: 24px;
|
| 21 |
}
|
| 22 |
.page-header h1 {
|
|
|
|
| 345 |
</head>
|
| 346 |
<body>
|
| 347 |
<div class="page-header">
|
| 348 |
+
<h1>ellamind base-eval</h1>
|
| 349 |
+
<p style="margin:4px 0 0;font-size:13px;color:#6c757d;"><a href="https://github.com/ellamind/base-eval" target="_blank" rel="noopener" style="color:#4361ee;">ellamind/base-eval</a> · Data: <a href="https://huggingface.co/datasets/ellamind/eval-scores-ref" target="_blank" rel="noopener" style="color:#4361ee;">ellamind/eval-scores-ref</a></p>
|
| 350 |
</div>
|
| 351 |
|
| 352 |
<div id="init-loading">Initializing DuckDB...</div>
|
|
|
|
| 355 |
|
| 356 |
<div class="add-panel-row" id="add-panel-row" style="display:none">
|
| 357 |
<button class="btn btn-primary" id="btn-add-panel">+ Add Panel</button>
|
|
|
|
|
|
|
| 358 |
</div>
|
| 359 |
|
| 360 |
<script type="module">
|
|
|
|
| 1686 |
return panel;
|
| 1687 |
}
|
| 1688 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1689 |
// ── Init ────────────────────────────────────────────────────
|
| 1690 |
const elInitLoading = document.getElementById('init-loading');
|
| 1691 |
const elAddPanelRow = document.getElementById('add-panel-row');
|
|
|
|
| 1719 |
}
|
| 1720 |
|
| 1721 |
document.getElementById('btn-add-panel').addEventListener('click', () => addPanel());
|
|
|
|
| 1722 |
|
| 1723 |
init();
|
| 1724 |
</script>
|