maxidl commited on
Commit
fd32731
·
verified ·
1 Parent(s): cdbce30

Upload index.html with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.html +4 -134
index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Eval Suite Visualization</title>
7
  <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
8
  <style>
9
  * { box-sizing: border-box; margin: 0; padding: 0; }
@@ -16,9 +16,7 @@
16
 
17
  /* ── Page header ─────────────────────────────── */
18
  .page-header {
19
- display: flex;
20
- align-items: center;
21
- justify-content: space-between;
22
  margin-bottom: 24px;
23
  }
24
  .page-header h1 {
@@ -347,7 +345,8 @@
347
  </head>
348
  <body>
349
  <div class="page-header">
350
- <h1>Eval Suite Visualization</h1>
 
351
  </div>
352
 
353
  <div id="init-loading">Initializing DuckDB...</div>
@@ -356,8 +355,6 @@
356
 
357
  <div class="add-panel-row" id="add-panel-row" style="display:none">
358
  <button class="btn btn-primary" id="btn-add-panel">+ Add Panel</button>
359
- <button class="btn" id="btn-scorecard-csv">Export Scorecard CSV</button>
360
- <span id="scorecard-progress" style="font-size:0.8rem;color:#6c757d;display:none"></span>
361
  </div>
362
 
363
  <script type="module">
@@ -1689,132 +1686,6 @@
1689
  return panel;
1690
  }
1691
 
1692
- // ── Scorecard CSV export ────────────────────────────────────
1693
- async function exportScorecardCSV() {
1694
- const progress = document.getElementById('scorecard-progress');
1695
- const btn = document.getElementById('btn-scorecard-csv');
1696
- btn.disabled = true;
1697
- progress.style.display = 'inline';
1698
- progress.textContent = 'Querying all benchmark data...';
1699
-
1700
- try {
1701
- // Get all benchmark-level tasks with checkpoint data
1702
- const tasks = await query(`
1703
- SELECT DISTINCT task, task_display_name, metric, higher_is_better
1704
- FROM scores
1705
- WHERE task_type = 'benchmark'
1706
- AND is_checkpoint = true
1707
- AND tokens_trained IS NOT NULL
1708
- ORDER BY task, metric
1709
- `);
1710
-
1711
- // Fetch all checkpoint rows in one query
1712
- progress.textContent = 'Loading checkpoint scores...';
1713
- const allRows = await query(`
1714
- SELECT task, metric, model_display_name, tokens_trained, score,
1715
- is_checkpoint, higher_is_better
1716
- FROM scores
1717
- WHERE task_type = 'benchmark'
1718
- AND is_checkpoint = true
1719
- AND tokens_trained IS NOT NULL
1720
- ORDER BY task, metric, model_display_name, tokens_trained
1721
- `);
1722
-
1723
- // Group rows by task+metric
1724
- const grouped = {};
1725
- for (const r of allRows) {
1726
- const key = r.task + '|||' + r.metric;
1727
- if (!grouped[key]) grouped[key] = [];
1728
- grouped[key].push(r);
1729
- }
1730
-
1731
- // Compute metrics for each task+metric combo
1732
- const stages = ['overall', 'early', 'late'];
1733
- const metricKeys = ['monotonicity', 'signalStrength', 'noise', 'orderingConsistency', 'discrimination'];
1734
-
1735
- // CSV header
1736
- const headers = ['task', 'task_display_name', 'metric', 'higher_is_better', 'n_models', 'n_checkpoints'];
1737
- for (const stage of stages) {
1738
- for (const mk of metricKeys) {
1739
- headers.push(`${stage}_${mk}`);
1740
- }
1741
- }
1742
- const csvRows = [headers.join(',')];
1743
-
1744
- let processed = 0;
1745
- const total = tasks.length;
1746
-
1747
- for (const t of tasks) {
1748
- const key = t.task + '|||' + t.metric;
1749
- const rows = grouped[key];
1750
- if (!rows || rows.length === 0) continue;
1751
-
1752
- // Build byModel structure (same as drawLineChart)
1753
- const byModel = {};
1754
- for (const r of rows) {
1755
- const name = r.model_display_name;
1756
- if (!byModel[name]) byModel[name] = { points: [], isCheckpoint: true };
1757
- byModel[name].points.push({ x: Number(r.tokens_trained), y: r.score });
1758
- }
1759
- for (const d of Object.values(byModel)) {
1760
- d.points.sort((a, b) => a.x - b.x);
1761
- }
1762
-
1763
- const nModels = Object.keys(byModel).length;
1764
- const nCheckpoints = Math.max(...Object.values(byModel).map(d => d.points.length));
1765
- const metrics = computeTaskQualityMetrics(byModel, t.higher_is_better);
1766
-
1767
- const vals = [
1768
- csvEsc(t.task),
1769
- csvEsc(t.task_display_name),
1770
- csvEsc(t.metric),
1771
- t.higher_is_better,
1772
- nModels,
1773
- nCheckpoints,
1774
- ];
1775
- for (const stage of stages) {
1776
- for (const mk of metricKeys) {
1777
- const v = metrics?.[stage]?.[mk];
1778
- vals.push(v != null && !isNaN(v) ? v.toFixed(6) : '');
1779
- }
1780
- }
1781
- csvRows.push(vals.join(','));
1782
-
1783
- processed++;
1784
- if (processed % 50 === 0) {
1785
- progress.textContent = `Computing metrics... ${processed}/${total}`;
1786
- await new Promise(r => setTimeout(r, 0)); // yield to UI
1787
- }
1788
- }
1789
-
1790
- // Download
1791
- const blob = new Blob([csvRows.join('\n')], { type: 'text/csv' });
1792
- const url = URL.createObjectURL(blob);
1793
- const a = document.createElement('a');
1794
- a.href = url;
1795
- a.download = 'benchmark_scorecard.csv';
1796
- a.click();
1797
- URL.revokeObjectURL(url);
1798
-
1799
- progress.textContent = `Done! ${processed} task-metric combos exported.`;
1800
- setTimeout(() => { progress.style.display = 'none'; }, 3000);
1801
- } catch (err) {
1802
- progress.textContent = `Error: ${err.message}`;
1803
- console.error('Scorecard export failed:', err);
1804
- } finally {
1805
- btn.disabled = false;
1806
- }
1807
- }
1808
-
1809
- function csvEsc(val) {
1810
- if (val == null) return '';
1811
- const s = String(val);
1812
- if (s.includes(',') || s.includes('"') || s.includes('\n')) {
1813
- return '"' + s.replace(/"/g, '""') + '"';
1814
- }
1815
- return s;
1816
- }
1817
-
1818
  // ── Init ────────────────────────────────────────────────────
1819
  const elInitLoading = document.getElementById('init-loading');
1820
  const elAddPanelRow = document.getElementById('add-panel-row');
@@ -1848,7 +1719,6 @@
1848
  }
1849
 
1850
  document.getElementById('btn-add-panel').addEventListener('click', () => addPanel());
1851
- document.getElementById('btn-scorecard-csv').addEventListener('click', () => exportScorecardCSV());
1852
 
1853
  init();
1854
  </script>
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>ellamind base-eval</title>
7
  <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
8
  <style>
9
  * { box-sizing: border-box; margin: 0; padding: 0; }
 
16
 
17
  /* ── Page header ─────────────────────────────── */
18
  .page-header {
19
+ text-align: center;
 
 
20
  margin-bottom: 24px;
21
  }
22
  .page-header h1 {
 
345
  </head>
346
  <body>
347
  <div class="page-header">
348
+ <h1>ellamind base-eval</h1>
349
+ <p style="margin:4px 0 0;font-size:13px;color:#6c757d;"><a href="https://github.com/ellamind/base-eval" target="_blank" rel="noopener" style="color:#4361ee;">ellamind/base-eval</a> · Data: <a href="https://huggingface.co/datasets/ellamind/eval-scores-ref" target="_blank" rel="noopener" style="color:#4361ee;">ellamind/eval-scores-ref</a></p>
350
  </div>
351
 
352
  <div id="init-loading">Initializing DuckDB...</div>
 
355
 
356
  <div class="add-panel-row" id="add-panel-row" style="display:none">
357
  <button class="btn btn-primary" id="btn-add-panel">+ Add Panel</button>
 
 
358
  </div>
359
 
360
  <script type="module">
 
1686
  return panel;
1687
  }
1688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1689
  // ── Init ────────────────────────────────────────────────────
1690
  const elInitLoading = document.getElementById('init-loading');
1691
  const elAddPanelRow = document.getElementById('add-panel-row');
 
1719
  }
1720
 
1721
  document.getElementById('btn-add-panel').addEventListener('click', () => addPanel());
 
1722
 
1723
  init();
1724
  </script>