maxidl commited on
Commit
33c9ddc
Β·
verified Β·
1 Parent(s): c4a5bf1

Upload index.html with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.html +928 -18
index.html CHANGED
@@ -1,19 +1,929 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Eval Suite Visualization</title>
7
+ <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
8
+ <style>
9
+ * { box-sizing: border-box; margin: 0; padding: 0; }
10
+ body {
11
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
12
+ background: #f8f9fa;
13
+ color: #1a1a2e;
14
+ padding: 24px;
15
+ }
16
+
17
+ /* ── Page header ─────────────────────────────── */
18
+ .page-header {
19
+ display: flex;
20
+ align-items: center;
21
+ justify-content: space-between;
22
+ margin-bottom: 24px;
23
+ }
24
+ .page-header h1 {
25
+ font-size: 1.5rem;
26
+ font-weight: 600;
27
+ color: #1a1a2e;
28
+ }
29
+ .btn {
30
+ padding: 8px 16px;
31
+ border: 1px solid #dee2e6;
32
+ border-radius: 6px;
33
+ background: #fff;
34
+ font-size: 0.875rem;
35
+ color: #495057;
36
+ cursor: pointer;
37
+ transition: background 0.15s;
38
+ }
39
+ .btn:hover { background: #e9ecef; }
40
+ .btn-primary {
41
+ background: #4361ee;
42
+ color: #fff;
43
+ border-color: #4361ee;
44
+ }
45
+ .btn-primary:hover { background: #3a56d4; }
46
+ .btn-sm {
47
+ padding: 4px 10px;
48
+ font-size: 0.75rem;
49
+ }
50
+ .btn-danger { color: #e63946; border-color: #e6394640; }
51
+ .btn-danger:hover { background: #e6394610; }
52
+
53
+ /* ── Panel ───────────────────────────────────── */
54
+ .panel {
55
+ background: #fff;
56
+ border: 1px solid #dee2e6;
57
+ border-radius: 8px;
58
+ margin-bottom: 20px;
59
+ overflow: hidden;
60
+ }
61
+ .panel-header {
62
+ display: flex;
63
+ align-items: center;
64
+ gap: 12px;
65
+ padding: 12px 16px;
66
+ border-bottom: 1px solid #dee2e6;
67
+ background: #f8f9fa;
68
+ }
69
+ .panel-title {
70
+ flex: 1;
71
+ font-size: 0.875rem;
72
+ font-weight: 600;
73
+ color: #1a1a2e;
74
+ white-space: nowrap;
75
+ overflow: hidden;
76
+ text-overflow: ellipsis;
77
+ }
78
+ .panel-info {
79
+ font-size: 0.7rem;
80
+ color: #6c757d;
81
+ font-weight: 400;
82
+ }
83
+ .panel-controls {
84
+ padding: 16px;
85
+ border-bottom: 1px solid #dee2e6;
86
+ }
87
+ .panel-controls.collapsed { display: none; }
88
+ .controls-row {
89
+ display: flex;
90
+ flex-wrap: wrap;
91
+ gap: 12px;
92
+ align-items: flex-end;
93
+ }
94
+ .controls-row + .controls-row { margin-top: 12px; }
95
+ .control-group {
96
+ display: flex;
97
+ flex-direction: column;
98
+ gap: 4px;
99
+ }
100
+ .control-group label {
101
+ font-size: 0.7rem;
102
+ font-weight: 600;
103
+ text-transform: uppercase;
104
+ letter-spacing: 0.05em;
105
+ color: #6c757d;
106
+ }
107
+ select {
108
+ padding: 6px 10px;
109
+ border: 1px solid #dee2e6;
110
+ border-radius: 6px;
111
+ background: #fff;
112
+ font-size: 0.8rem;
113
+ color: #1a1a2e;
114
+ min-width: 160px;
115
+ cursor: pointer;
116
+ }
117
+ select:focus {
118
+ outline: none;
119
+ border-color: #4361ee;
120
+ box-shadow: 0 0 0 3px rgba(67, 97, 238, 0.15);
121
+ }
122
+
123
+ /* ── Models section ──────────────────────────── */
124
+ .models-section {
125
+ margin-top: 12px;
126
+ }
127
+ .models-header {
128
+ display: flex;
129
+ align-items: center;
130
+ gap: 8px;
131
+ margin-bottom: 8px;
132
+ }
133
+ .models-header span {
134
+ font-size: 0.7rem;
135
+ font-weight: 600;
136
+ text-transform: uppercase;
137
+ letter-spacing: 0.05em;
138
+ color: #6c757d;
139
+ }
140
+ .checkbox-grid {
141
+ display: flex;
142
+ flex-wrap: wrap;
143
+ gap: 6px 16px;
144
+ }
145
+ .checkbox-item {
146
+ display: flex;
147
+ align-items: center;
148
+ gap: 5px;
149
+ cursor: pointer;
150
+ font-size: 0.8rem;
151
+ }
152
+ .checkbox-item input[type="checkbox"] {
153
+ width: 14px;
154
+ height: 14px;
155
+ cursor: pointer;
156
+ accent-color: #4361ee;
157
+ }
158
+ .model-separator {
159
+ width: 100%;
160
+ border-top: 1px solid #eee;
161
+ margin: 4px 0;
162
+ }
163
+
164
+ /* ── Chart ───────────────────────────────────── */
165
+ .panel-chart {
166
+ min-height: 100px;
167
+ }
168
+ .loading {
169
+ display: flex;
170
+ align-items: center;
171
+ justify-content: center;
172
+ padding: 1rem 0;
173
+ color: #adb5bd;
174
+ font-size: 0.85rem;
175
+ }
176
+
177
+ /* ── Custom tooltip ──────────────────────────── */
178
+ .custom-tooltip {
179
+ position: fixed;
180
+ pointer-events: none;
181
+ background: rgba(0, 0, 0, 0.8);
182
+ color: #fff;
183
+ padding: 6px 10px;
184
+ border-radius: 4px;
185
+ font-size: 12px;
186
+ line-height: 1.4;
187
+ z-index: 9999;
188
+ display: none;
189
+ white-space: nowrap;
190
+ }
191
+
192
+ /* ── Add panel button ────────────────────────── */
193
+ .add-panel-row {
194
+ display: flex;
195
+ justify-content: center;
196
+ padding: 20px;
197
+ }
198
+
199
+ /* ── Init loading ────────────────────────────── */
200
+ #init-loading {
201
+ display: flex;
202
+ align-items: center;
203
+ justify-content: center;
204
+ height: 300px;
205
+ color: #6c757d;
206
+ font-size: 1rem;
207
+ }
208
+ </style>
209
+ </head>
210
+ <body>
211
+ <div class="page-header">
212
+ <h1>Eval Suite Visualization</h1>
213
+ </div>
214
+
215
+ <div id="init-loading">Initializing DuckDB...</div>
216
+ <div id="panels-container"></div>
217
+ <div class="custom-tooltip" id="custom-tooltip"></div>
218
+
219
+ <div class="add-panel-row" id="add-panel-row" style="display:none">
220
+ <button class="btn btn-primary" id="btn-add-panel">+ Add Panel</button>
221
+ </div>
222
+
223
+ <script type="module">
224
+ import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.29.0/+esm';
225
+ import jsyaml from 'https://cdn.jsdelivr.net/npm/js-yaml@4.1.0/+esm';
226
+
227
+ // ── Globals ─────────────────────────────────────────────────
228
+ let db = null;
229
+ let conn = null;
230
+ let panelCounter = 0;
231
+ const panels = new Map();
232
+
233
+ // Fallback palette for models without a config color
234
+ const COLOR_PALETTE = [
235
+ '#4361ee', '#e63946', '#2a9d8f', '#e9c46a', '#f4a261',
236
+ '#264653', '#7209b7', '#06d6a0', '#ef476f', '#ff6b6b',
237
+ '#48bfe3', '#d4a017', '#b5838d', '#588157', '#9d4edd',
238
+ '#f77f00', '#3a86a7', '#8338ec', '#ff006e', '#fb5607',
239
+ ];
240
+
241
+ const PARQUET_URL = 'https://huggingface.co/datasets/ellamind/eval-scores/resolve/main/scores.parquet';
242
+
243
+ // Shared model info (loaded once)
244
+ let ALL_MODELS = []; // [{model, model_display_name, is_checkpoint}]
245
+ let MODEL_COLORS = {};
246
+ let CONFIG = {}; // parsed config.yaml
247
+
248
+ // ── DuckDB init ─────────────────────────────────────────────
249
+ async function initDuckDB() {
250
+ const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
251
+ const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
252
+ const worker_url = URL.createObjectURL(
253
+ new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
254
+ );
255
+ const worker = new Worker(worker_url);
256
+ const logger = new duckdb.ConsoleLogger();
257
+ db = new duckdb.AsyncDuckDB(logger, worker);
258
+ await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
259
+ URL.revokeObjectURL(worker_url);
260
+ conn = await db.connect();
261
+ }
262
+
263
+ async function loadParquet() {
264
+ const response = await fetch(PARQUET_URL);
265
+ const buffer = new Uint8Array(await response.arrayBuffer());
266
+ await db.registerFileBuffer('scores.parquet', buffer);
267
+ await conn.query(`CREATE VIEW scores AS SELECT * FROM 'scores.parquet'`);
268
+ }
269
+
270
+ // ── SQL helpers ─────────────────────────────────────────────
271
+ async function query(sql) {
272
+ const result = await conn.query(sql);
273
+ return result.toArray().map(row => row.toJSON());
274
+ }
275
+
276
+ function esc(s) { return s.replace(/'/g, "''"); }
277
+ function sqlIn(vals) { return vals.map(v => `'${esc(v)}'`).join(', '); }
278
+
279
+ // ── Shared helpers ──────────────────────────────────────────
280
+ function populateSelect(el, options, selected) {
281
+ el.innerHTML = '';
282
+ options.forEach(opt => {
283
+ const o = document.createElement('option');
284
+ if (typeof opt === 'object') { o.value = opt.value; o.textContent = opt.label; }
285
+ else { o.value = opt; o.textContent = opt; }
286
+ el.appendChild(o);
287
+ });
288
+ if (selected && options.some(o => (typeof o === 'object' ? o.value : o) === selected)) {
289
+ el.value = selected;
290
+ }
291
+ }
292
+
293
+ function formatTokens(value) {
294
+ if (value >= 1e12) return (value / 1e12).toFixed(1) + 'T';
295
+ if (value >= 1e9) return (value / 1e9).toFixed(0) + 'B';
296
+ if (value >= 1e6) return (value / 1e6).toFixed(0) + 'M';
297
+ if (value >= 1e3) return (value / 1e3).toFixed(0) + 'K';
298
+ return value.toString();
299
+ }
300
+
301
+ function niceTicks(min, max, maxTicks = 8) {
302
+ if (min === max) return [min];
303
+ const range = max - min;
304
+ // Find a "nice" step size: 1, 2, 5 Γ— 10^n
305
+ const rawStep = range / maxTicks;
306
+ const mag = Math.pow(10, Math.floor(Math.log10(rawStep)));
307
+ const normalized = rawStep / mag;
308
+ let step;
309
+ if (normalized <= 1.5) step = 1 * mag;
310
+ else if (normalized <= 3.5) step = 2 * mag;
311
+ else if (normalized <= 7.5) step = 5 * mag;
312
+ else step = 10 * mag;
313
+
314
+ const start = Math.ceil(min / step) * step;
315
+ const ticks = [];
316
+ for (let v = start; v <= max; v += step) {
317
+ ticks.push(Math.round(v));
318
+ }
319
+ // Always include min/max endpoints if not already close
320
+ if (ticks.length === 0 || ticks[0] - min > step * 0.3) ticks.unshift(Math.round(min));
321
+ if (max - ticks[ticks.length - 1] > step * 0.3) ticks.push(Math.round(max));
322
+ return ticks;
323
+ }
324
+
325
+ function movingAverage(values, w) {
326
+ if (w <= 1) return values;
327
+ return values.map((_, i) => {
328
+ const n = Math.min(i + 1, w);
329
+ let sum = 0;
330
+ for (let j = i - n + 1; j <= i; j++) sum += values[j];
331
+ return sum / n;
332
+ });
333
+ }
334
+
335
+ async function loadConfig() {
336
+ try {
337
+ const resp = await fetch('config.yaml');
338
+ if (resp.ok) {
339
+ CONFIG = jsyaml.load(await resp.text()) || {};
340
+ }
341
+ } catch (e) {
342
+ console.warn('Could not load config.yaml, using defaults:', e);
343
+ }
344
+ }
345
+
346
+ async function loadModels() {
347
+ ALL_MODELS = await query(`
348
+ SELECT DISTINCT model, model_display_name, is_checkpoint
349
+ FROM scores
350
+ ORDER BY is_checkpoint DESC, model_display_name
351
+ `);
352
+
353
+ // Assign colors: config overrides first, then fallback palette
354
+ const configColors = CONFIG.model_colors || {};
355
+ MODEL_COLORS = {};
356
+ let paletteIdx = 0;
357
+ ALL_MODELS.forEach(m => {
358
+ const name = m.model_display_name;
359
+ if (configColors[name]) {
360
+ MODEL_COLORS[name] = configColors[name];
361
+ } else {
362
+ MODEL_COLORS[name] = COLOR_PALETTE[paletteIdx % COLOR_PALETTE.length];
363
+ paletteIdx++;
364
+ }
365
+ });
366
+ }
367
+
368
+ // ── Panel class ─────────────────────────────────────────────
369
+ class Panel {
370
+ constructor(id) {
371
+ this.id = id;
372
+ this.el = {};
373
+ this.collapsed = false;
374
+ this.build();
375
+ }
376
+
377
+ build() {
378
+ const container = document.getElementById('panels-container');
379
+ const panel = document.createElement('div');
380
+ panel.className = 'panel';
381
+ panel.id = `panel-${this.id}`;
382
+
383
+ panel.innerHTML = `
384
+ <div class="panel-header">
385
+ <span class="panel-title" id="ptitle-${this.id}">New Panel</span>
386
+ <span class="panel-info" id="pinfo-${this.id}"></span>
387
+ <button class="btn btn-sm" id="ptoggle-${this.id}">Collapse</button>
388
+ <button class="btn btn-sm" id="pexport-png-${this.id}">PNG</button>
389
+ <button class="btn btn-sm" id="pexport-svg-${this.id}">SVG</button>
390
+ <button class="btn btn-sm btn-danger" id="premove-${this.id}">Remove</button>
391
+ </div>
392
+ <div class="panel-controls" id="pcontrols-${this.id}">
393
+ <div class="controls-row">
394
+ <div class="control-group">
395
+ <label>Eval Suite</label>
396
+ <select id="psuite-${this.id}"></select>
397
+ </div>
398
+ <div class="control-group">
399
+ <label>Group</label>
400
+ <select id="pgroup-${this.id}"></select>
401
+ </div>
402
+ <div class="control-group">
403
+ <label>Task</label>
404
+ <select id="ptask-${this.id}"></select>
405
+ </div>
406
+ <div class="control-group">
407
+ <label>Metric</label>
408
+ <select id="pmetric-${this.id}"></select>
409
+ </div>
410
+ <div class="control-group">
411
+ <label>Smoothing</label>
412
+ <select id="psmooth-${this.id}">
413
+ <option value="1" selected>None</option>
414
+ <option value="2">2</option>
415
+ <option value="3">3</option>
416
+ <option value="4">4</option>
417
+ <option value="5">5</option>
418
+ </select>
419
+ </div>
420
+ <div class="control-group">
421
+ <label>Chart Type</label>
422
+ <select id="pchart-type-${this.id}">
423
+ <option value="auto" selected>Auto</option>
424
+ <option value="line">Line</option>
425
+ <option value="bar">Bar</option>
426
+ </select>
427
+ </div>
428
+ </div>
429
+ <div class="models-section">
430
+ <div class="models-header">
431
+ <span>Models</span>
432
+ <button class="btn btn-sm" id="pmodels-all-${this.id}">All</button>
433
+ <button class="btn btn-sm" id="pmodels-none-${this.id}">None</button>
434
+ <button class="btn btn-sm" id="pmodels-ckpt-${this.id}">Checkpoints</button>
435
+ <button class="btn btn-sm" id="pmodels-base-${this.id}">Baselines</button>
436
+ </div>
437
+ <div class="checkbox-grid" id="pmodels-${this.id}"></div>
438
+ </div>
439
+ </div>
440
+ <div class="panel-chart" id="pchart-${this.id}"></div>
441
+ `;
442
+
443
+ container.appendChild(panel);
444
+
445
+ // Cache refs
446
+ this.el.panel = panel;
447
+ this.el.title = panel.querySelector(`#ptitle-${this.id}`);
448
+ this.el.info = panel.querySelector(`#pinfo-${this.id}`);
449
+ this.el.controls = panel.querySelector(`#pcontrols-${this.id}`);
450
+ this.el.suite = panel.querySelector(`#psuite-${this.id}`);
451
+ this.el.group = panel.querySelector(`#pgroup-${this.id}`);
452
+ this.el.task = panel.querySelector(`#ptask-${this.id}`);
453
+ this.el.metric = panel.querySelector(`#pmetric-${this.id}`);
454
+ this.el.smooth = panel.querySelector(`#psmooth-${this.id}`);
455
+ this.el.chartType = panel.querySelector(`#pchart-type-${this.id}`);
456
+ this.el.models = panel.querySelector(`#pmodels-${this.id}`);
457
+ this.el.chart = panel.querySelector(`#pchart-${this.id}`);
458
+
459
+ // Events
460
+ panel.querySelector(`#ptoggle-${this.id}`).addEventListener('click', () => this.toggleControls());
461
+ panel.querySelector(`#premove-${this.id}`).addEventListener('click', () => this.remove());
462
+ panel.querySelector(`#pexport-png-${this.id}`).addEventListener('click', () => this.export('png'));
463
+ panel.querySelector(`#pexport-svg-${this.id}`).addEventListener('click', () => this.export('svg'));
464
+
465
+ this.el.suite.addEventListener('change', () => this.onSuiteChange());
466
+ this.el.group.addEventListener('change', () => this.onGroupChange());
467
+ this.el.task.addEventListener('change', () => this.onTaskChange());
468
+ this.el.metric.addEventListener('change', () => this.renderChart());
469
+ this.el.smooth.addEventListener('change', () => this.renderChart());
470
+ this.el.chartType.addEventListener('change', () => this.renderChart());
471
+
472
+ panel.querySelector(`#pmodels-all-${this.id}`).addEventListener('click', () => this.setModels(true));
473
+ panel.querySelector(`#pmodels-none-${this.id}`).addEventListener('click', () => this.setModels(false));
474
+ panel.querySelector(`#pmodels-ckpt-${this.id}`).addEventListener('click', () => this.setModelsByType(true));
475
+ panel.querySelector(`#pmodels-base-${this.id}`).addEventListener('click', () => this.setModelsByType(false));
476
+
477
+ this.buildModelCheckboxes();
478
+ }
479
+
480
+ toggleControls() {
481
+ this.collapsed = !this.collapsed;
482
+ this.el.controls.classList.toggle('collapsed', this.collapsed);
483
+ this.el.panel.querySelector(`#ptoggle-${this.id}`).textContent =
484
+ this.collapsed ? 'Expand' : 'Collapse';
485
+ }
486
+
487
+ remove() {
488
+ this.el.panel.remove();
489
+ panels.delete(this.id);
490
+ }
491
+
492
+ buildModelCheckboxes() {
493
+ const container = this.el.models;
494
+ container.innerHTML = '';
495
+ let lastCkpt = null;
496
+
497
+ for (const m of ALL_MODELS) {
498
+ if (lastCkpt !== null && lastCkpt !== m.is_checkpoint) {
499
+ const sep = document.createElement('div');
500
+ sep.className = 'model-separator';
501
+ container.appendChild(sep);
502
+ }
503
+ lastCkpt = m.is_checkpoint;
504
+
505
+ const lbl = document.createElement('label');
506
+ lbl.className = 'checkbox-item';
507
+
508
+ const cb = document.createElement('input');
509
+ cb.type = 'checkbox';
510
+ cb.value = m.model_display_name;
511
+ cb.checked = true;
512
+ cb.dataset.isCheckpoint = m.is_checkpoint;
513
+ cb.addEventListener('change', () => this.renderChart());
514
+
515
+ const dot = document.createElement('span');
516
+ dot.style.cssText = `display:inline-block;width:9px;height:9px;border-radius:50%;background:${MODEL_COLORS[m.model_display_name]}`;
517
+
518
+ const name = document.createElement('span');
519
+ name.textContent = ' ' + m.model_display_name;
520
+ if (!m.is_checkpoint) {
521
+ name.style.fontStyle = 'italic';
522
+ name.title = 'Baseline';
523
+ }
524
+
525
+ lbl.append(cb, dot, name);
526
+ container.appendChild(lbl);
527
+ }
528
+ }
529
+
530
+ setModels(checked) {
531
+ this.el.models.querySelectorAll('input').forEach(cb => cb.checked = checked);
532
+ this.renderChart();
533
+ }
534
+
535
+ setModelsByType(isCheckpoint) {
536
+ this.el.models.querySelectorAll('input').forEach(cb => {
537
+ cb.checked = (cb.dataset.isCheckpoint === String(isCheckpoint));
538
+ });
539
+ this.renderChart();
540
+ }
541
+
542
+ getSelectedModels() {
543
+ return Array.from(this.el.models.querySelectorAll('input:checked')).map(cb => cb.value);
544
+ }
545
+
546
+ getSmoothing() {
547
+ return parseInt(this.el.smooth.value, 10) || 1;
548
+ }
549
+
550
+ getChartType() {
551
+ return this.el.chartType.value;
552
+ }
553
+
554
+ getSelectedTask() {
555
+ const v = this.el.task.value;
556
+ return v === '__group__' ? this.el.group.value : v;
557
+ }
558
+
559
+ // ── Populate cascades ──────────────────────────────────────
560
+ async populateSuites(defaults) {
561
+ const rows = await query(`
562
+ SELECT DISTINCT task AS value, task_display_name AS label
563
+ FROM scores
564
+ WHERE task_type = 'eval_suite' AND task != 'test_fix'
565
+ ORDER BY task
566
+ `);
567
+ populateSelect(this.el.suite, rows, defaults?.suite);
568
+ await this.onSuiteChange(defaults);
569
+ }
570
+
571
+ async onSuiteChange(defaults) {
572
+ const suite = this.el.suite.value;
573
+ if (!suite) return;
574
+
575
+ const rows = await query(`
576
+ SELECT DISTINCT task AS value, task_display_name AS label
577
+ FROM scores
578
+ WHERE parent_task = '${esc(suite)}'
579
+ AND task_type = 'task_group'
580
+ ORDER BY task
581
+ `);
582
+ const options = [
583
+ { value: suite, label: `${suite} (aggregate)` },
584
+ ...rows,
585
+ ];
586
+ populateSelect(this.el.group, options, defaults?.group);
587
+ await this.onGroupChange(defaults);
588
+ }
589
+
590
+ async onGroupChange(defaults) {
591
+ const group = this.el.group.value;
592
+ if (!group) return;
593
+
594
+ const rows = await query(`
595
+ SELECT DISTINCT task AS value, task_display_name AS label
596
+ FROM scores
597
+ WHERE parent_task = '${esc(group)}'
598
+ AND task_type = 'benchmark'
599
+ ORDER BY task
600
+ `);
601
+
602
+ if (rows.length === 0) {
603
+ populateSelect(this.el.task, [{ value: '__group__', label: '(aggregate)' }]);
604
+ } else {
605
+ populateSelect(this.el.task, [
606
+ { value: '__group__', label: `(aggregate: ${group})` },
607
+ ...rows,
608
+ ]);
609
+ }
610
+ if (defaults?.task) this.el.task.value = defaults.task;
611
+ await this.onTaskChange(defaults);
612
+ }
613
+
614
+ async onTaskChange(defaults) {
615
+ const task = this.getSelectedTask();
616
+ if (!task) return;
617
+
618
+ const rows = await query(`
619
+ SELECT DISTINCT metric FROM scores WHERE task = '${esc(task)}' ORDER BY metric
620
+ `);
621
+ const prev = defaults?.metric || this.el.metric.value;
622
+ populateSelect(this.el.metric, rows.map(r => r.metric), prev);
623
+ if (defaults?.chartType) this.el.chartType.value = defaults.chartType;
624
+ await this.renderChart();
625
+ }
626
+
627
+ // ── Chart rendering ────────────────────────────────────────
628
+ async renderChart() {
629
+ const task = this.getSelectedTask();
630
+ const metric = this.el.metric.value;
631
+ const models = this.getSelectedModels();
632
+
633
+ if (!task || !metric || models.length === 0) {
634
+ this.el.chart.innerHTML = '';
635
+ this.updateTitle(task, metric);
636
+ return;
637
+ }
638
+
639
+ const rows = await query(`
640
+ SELECT model_display_name, tokens_trained, score, score_stderr,
641
+ is_checkpoint, higher_is_better
642
+ FROM scores
643
+ WHERE task = '${esc(task)}'
644
+ AND metric = '${esc(metric)}'
645
+ AND model_display_name IN (${sqlIn(models)})
646
+ AND tokens_trained IS NOT NULL
647
+ ORDER BY model_display_name, tokens_trained
648
+ `);
649
+
650
+ if (rows.length === 0) {
651
+ this.el.chart.innerHTML = '<div class="loading">No data for this selection</div>';
652
+ this.updateTitle(task, metric);
653
+ return;
654
+ }
655
+
656
+ // Determine chart type
657
+ const chartType = this.resolveChartType(rows);
658
+ const higherIsBetter = rows[0]?.higher_is_better;
659
+
660
+ if (chartType === 'bar') {
661
+ this.drawBarChart(rows, task, metric, higherIsBetter);
662
+ } else {
663
+ this.drawLineChart(rows, task, metric, higherIsBetter);
664
+ }
665
+
666
+ this.updateTitle(task, metric, higherIsBetter);
667
+ }
668
+
669
+ resolveChartType(rows) {
670
+ const pref = this.getChartType();
671
+ if (pref !== 'auto') return pref;
672
+
673
+ // Auto-detect: if every model has <= 1 unique tokens_trained, use bar
674
+ const byModel = {};
675
+ for (const r of rows) {
676
+ if (!byModel[r.model_display_name]) byModel[r.model_display_name] = new Set();
677
+ byModel[r.model_display_name].add(Number(r.tokens_trained));
678
+ }
679
+ const allSingle = Object.values(byModel).every(s => s.size <= 1);
680
+ return allSingle ? 'bar' : 'line';
681
+ }
682
+
683
+ formatChartTitle(task, metric, higherIsBetter) {
684
+ const arrow = higherIsBetter === true ? ' \u2191' : higherIsBetter === false ? ' \u2193' : '';
685
+ return `${task} \u2014 ${metric}${arrow}`;
686
+ }
687
+
688
+ updateTitle(task, metric, higherIsBetter) {
689
+ const w = this.getSmoothing();
690
+ const smooth = w > 1 ? ` (smooth=${w})` : '';
691
+ const arrow = higherIsBetter === true ? ' \u2191' : higherIsBetter === false ? ' \u2193' : '';
692
+ this.el.title.textContent = `${task || '...'} \u2014 ${metric || '...'}${arrow}${smooth}`;
693
+ }
694
+
695
+ cleanupTooltip() {
696
+ const tooltip = document.getElementById('custom-tooltip');
697
+ tooltip.style.display = 'none';
698
+ const chart = this.el.chart;
699
+ chart.removeAllListeners?.('plotly_hover');
700
+ chart.removeAllListeners?.('plotly_unhover');
701
+ if (this._tooltipMouseMove) {
702
+ chart.removeEventListener('mousemove', this._tooltipMouseMove);
703
+ this._tooltipMouseMove = null;
704
+ }
705
+ if (this._tooltipMouseLeave) {
706
+ chart.removeEventListener('mouseleave', this._tooltipMouseLeave);
707
+ this._tooltipMouseLeave = null;
708
+ }
709
+ }
710
+
711
+ drawLineChart(rows, task, metric, higherIsBetter) {
712
+ this.cleanupTooltip();
713
+ const w = this.getSmoothing();
714
+
715
+ // Group by model
716
+ const byModel = {};
717
+ for (const r of rows) {
718
+ const name = r.model_display_name;
719
+ if (!byModel[name]) byModel[name] = { points: [], isCheckpoint: r.is_checkpoint };
720
+ byModel[name].points.push({ x: Number(r.tokens_trained), y: r.score });
721
+ }
722
+ for (const d of Object.values(byModel)) d.points.sort((a, b) => a.x - b.x);
723
+
724
+ // X range for baselines
725
+ let xMin = Infinity, xMax = -Infinity;
726
+ for (const d of Object.values(byModel)) {
727
+ if (d.isCheckpoint) {
728
+ for (const p of d.points) {
729
+ xMin = Math.min(xMin, p.x);
730
+ xMax = Math.max(xMax, p.x);
731
+ }
732
+ }
733
+ }
734
+ if (!isFinite(xMin)) { xMin = 0; xMax = 1; }
735
+
736
+ const traces = [];
737
+ for (const [name, d] of Object.entries(byModel)) {
738
+ const color = MODEL_COLORS[name] || '#999';
739
+ if (d.isCheckpoint && d.points.length > 1) {
740
+ traces.push({
741
+ x: d.points.map(p => p.x),
742
+ y: movingAverage(d.points.map(p => p.y), w),
743
+ name, mode: 'lines+markers',
744
+ line: { color, width: 2 }, marker: { size: 5 },
745
+ });
746
+ } else {
747
+ const score = d.points[0]?.y;
748
+ if (score != null) {
749
+ traces.push({
750
+ x: [xMin, xMax], y: [score, score],
751
+ name, mode: 'lines',
752
+ line: { color, width: 2, dash: 'dash' },
753
+ });
754
+ }
755
+ }
756
+ }
757
+
758
+ // Compute nice tick values from data range
759
+ const tickVals = niceTicks(xMin, xMax);
760
+
761
+ Plotly.react(this.el.chart, traces, {
762
+ title: { text: this.formatChartTitle(task, metric, higherIsBetter), font: { size: 14, color: '#1a1a2e' } },
763
+ hoverlabel: { namelength: -1 },
764
+ xaxis: {
765
+ title: { text: 'Tokens Trained', font: { size: 12 } },
766
+ tickfont: { size: 10 }, tickvals: tickVals, ticktext: tickVals.map(formatTokens),
767
+ gridcolor: '#e9ecef', zeroline: false,
768
+ },
769
+ yaxis: {
770
+ title: { text: 'Score', font: { size: 12 } },
771
+ tickfont: { size: 10 }, gridcolor: '#e9ecef', zeroline: false, autorange: true,
772
+ },
773
+ legend: { orientation: 'h', yanchor: 'bottom', y: 1.05, x: 0, font: { size: 11 } },
774
+ margin: { t: 80, r: 20, b: 70, l: 50 },
775
+ plot_bgcolor: '#fff', paper_bgcolor: '#fff',
776
+ font: { family: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif' },
777
+ height: 500,
778
+ }, { responsive: true });
779
+ }
780
+
781
+ drawBarChart(rows, task, metric, higherIsBetter) {
782
+ this.cleanupTooltip();
783
+ // For bar chart, use latest checkpoint per model
784
+ const byModel = {};
785
+ for (const r of rows) {
786
+ const name = r.model_display_name;
787
+ const tokens = Number(r.tokens_trained);
788
+ if (!byModel[name] || tokens > byModel[name].tokens) {
789
+ byModel[name] = { score: r.score, tokens, isCheckpoint: r.is_checkpoint };
790
+ }
791
+ }
792
+
793
+ // Sort by score
794
+ const sorted = Object.entries(byModel)
795
+ .sort((a, b) => higherIsBetter !== false ? b[1].score - a[1].score : a[1].score - b[1].score);
796
+
797
+ const names = sorted.map(([n]) => n);
798
+ const scores = sorted.map(([, d]) => d.score);
799
+ const colors = sorted.map(([n]) => MODEL_COLORS[n] || '#999');
800
+ const tokens = sorted.map(([, d]) => formatTokens(d.tokens));
801
+ const hovertext = sorted.map(([n, d]) =>
802
+ `${n}<br>Score: ${d.score.toFixed(4)}<br>Tokens: ${formatTokens(d.tokens)}`
803
+ );
804
+
805
+ // Annotations for tokens trained at the start of each bar
806
+ const annotations = names.map((name, i) => ({
807
+ x: 0,
808
+ y: name,
809
+ text: 'tokens trained: ' + tokens[i],
810
+ xanchor: 'left',
811
+ yanchor: 'middle',
812
+ showarrow: false,
813
+ font: { size: 10, color: '#000' },
814
+ xshift: 4,
815
+ }));
816
+
817
+ Plotly.react(this.el.chart, [{
818
+ type: 'bar',
819
+ orientation: 'h',
820
+ y: names,
821
+ x: scores,
822
+ marker: { color: colors },
823
+ text: scores.map(s => s.toFixed(4)),
824
+ textposition: 'outside',
825
+ textfont: { size: 11 },
826
+ hoverinfo: 'none',
827
+ customdata: hovertext,
828
+ }], {
829
+ title: { text: this.formatChartTitle(task, metric, higherIsBetter), font: { size: 14, color: '#1a1a2e' } },
830
+ hovermode: 'closest',
831
+ annotations,
832
+ xaxis: {
833
+ title: { text: 'Score', font: { size: 12 } },
834
+ tickfont: { size: 10 }, gridcolor: '#e9ecef', zeroline: false,
835
+ },
836
+ yaxis: {
837
+ tickfont: { size: 11 }, automargin: true,
838
+ categoryorder: 'array', categoryarray: names.slice().reverse(),
839
+ },
840
+ margin: { t: 60, r: 80, b: 60, l: 10 },
841
+ plot_bgcolor: '#fff', paper_bgcolor: '#fff',
842
+ font: { family: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif' },
843
+ height: Math.max(300, names.length * 40 + 100),
844
+ showlegend: false,
845
+ }, { responsive: true });
846
+
847
+ // Custom cursor-following tooltip
848
+ const tooltip = document.getElementById('custom-tooltip');
849
+ const chart = this.el.chart;
850
+ chart.on('plotly_hover', (data) => {
851
+ const pt = data.points[0];
852
+ tooltip.innerHTML = pt.customdata;
853
+ tooltip.style.display = 'block';
854
+ });
855
+ chart.on('plotly_unhover', () => {
856
+ tooltip.style.display = 'none';
857
+ });
858
+ this._tooltipMouseMove = (e) => {
859
+ if (tooltip.style.display === 'block') {
860
+ tooltip.style.left = (e.clientX + 12) + 'px';
861
+ tooltip.style.top = (e.clientY - 10) + 'px';
862
+ }
863
+ };
864
+ this._tooltipMouseLeave = () => {
865
+ tooltip.style.display = 'none';
866
+ };
867
+ chart.addEventListener('mousemove', this._tooltipMouseMove);
868
+ chart.addEventListener('mouseleave', this._tooltipMouseLeave);
869
+ }
870
+
871
+ export(format) {
872
+ const task = this.getSelectedTask();
873
+ const metric = this.el.metric.value;
874
+ let filename = `${task}_${metric}`.replace(/[^a-zA-Z0-9_-]/g, '_');
875
+ Plotly.downloadImage(this.el.chart, { format, scale: 3, filename });
876
+ }
877
+ }
878
+
879
+ // ── Panel management ────────────────────────────────────────
880
+ async function addPanel(defaults) {
881
+ const id = panelCounter++;
882
+ const panel = new Panel(id);
883
+ panels.set(id, panel);
884
+ await panel.populateSuites(defaults);
885
+ return panel;
886
+ }
887
+
888
+ // ── Init ────────────────────────────────────────────────────
889
+ const elInitLoading = document.getElementById('init-loading');
890
+ const elAddPanelRow = document.getElementById('add-panel-row');
891
+
892
+ async function init() {
893
+ try {
894
+ elInitLoading.textContent = 'Loading config...';
895
+ await loadConfig();
896
+
897
+ elInitLoading.textContent = 'Initializing DuckDB...';
898
+ await initDuckDB();
899
+
900
+ elInitLoading.textContent = 'Loading data from HuggingFace...';
901
+ await loadParquet();
902
+
903
+ elInitLoading.textContent = 'Loading models...';
904
+ await loadModels();
905
+
906
+ elInitLoading.style.display = 'none';
907
+ elAddPanelRow.style.display = '';
908
+
909
+ // Create default panel: deu_base_easy, aggregate, bits_per_byte, bar chart
910
+ await addPanel({
911
+ suite: 'deu_base_easy',
912
+ metric: 'bits_per_byte',
913
+ chartType: 'bar',
914
+ });
915
+ } catch (err) {
916
+ elInitLoading.innerHTML = `<span style="color:#e63946">
917
+ Error: ${err.message}<br>
918
+ <small>Check browser console for details.</small>
919
+ </span>`;
920
+ console.error('Init failed:', err);
921
+ }
922
+ }
923
+
924
+ document.getElementById('btn-add-panel').addEventListener('click', () => addPanel());
925
+
926
+ init();
927
+ </script>
928
+ </body>
929
  </html>