joelniklaus HF Staff commited on
Commit
1da160c
·
1 Parent(s): e575fa7

made presentation together with script to convert to standalone file

Browse files
app/presentation/se2026/assets/academia-hub.png ADDED

Git LFS Details

  • SHA256: bd09292616583ae80d8d0af2e45f03b1677695bd9ffa4f445fafe4beb41dba68
  • Pointer size: 131 Bytes
  • Size of remote file: 765 kB
app/presentation/se2026/assets/bern-skyline.png ADDED

Git LFS Details

  • SHA256: ee85a87bc197f02967a63414ef7b7051772a5152106e8e2817dfddd76dc96de4
  • Pointer size: 130 Bytes
  • Size of remote file: 44.3 kB
app/presentation/se2026/assets/dclm-filtering-pipeline.png ADDED

Git LFS Details

  • SHA256: 9253a7197ab67f6f0661b295ac7347451cb7c7629bf8d545970989e0a1cab965
  • Pointer size: 130 Bytes
  • Size of remote file: 70.3 kB
app/presentation/se2026/assets/profile.jpg ADDED

Git LFS Details

  • SHA256: 9777155d9c0f3ba6feb90d44711979d852ab1b2362cbf8279ff72eb450adc85a
  • Pointer size: 130 Bytes
  • Size of remote file: 74.3 kB
app/presentation/se2026/build-standalone.mjs ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Build helper for the SE2026 one-file presentation.
3
+ *
4
+ * What this script does:
5
+ * - Reads `index.html` as the base reveal deck
6
+ * - Inlines local CSS (`style.css`)
7
+ * - Inlines local image assets as data URIs
8
+ * - Rewrites chart iframe sources to embedded `data:` HTML documents
9
+ * - Injects chart data (`data/benchmark-results.csv`, `data/rephrasing_metadata.json`)
10
+ * directly into chart pages so they no longer fetch local files
11
+ * - Writes the final output to `standalone.html`
12
+ *
13
+ * How to run:
14
+ * - From this folder:
15
+ * `node build-standalone.mjs`
16
+ * - Or from repo root:
17
+ * `node app/presentation/se2026/build-standalone.mjs`
18
+ */
19
+ import { readFileSync, writeFileSync } from 'fs';
20
+ import { dirname, join } from 'path';
21
+ import { fileURLToPath } from 'url';
22
+
23
+ const __dirname = dirname(fileURLToPath(import.meta.url));
24
+
25
+ const readText = (relativePath) => readFileSync(join(__dirname, relativePath), 'utf8');
26
+ const readBinary = (relativePath) => readFileSync(join(__dirname, relativePath));
27
+
28
+ const toDataUri = (relativePath, mimeType) =>
29
+ `data:${mimeType};base64,${readBinary(relativePath).toString('base64')}`;
30
+
31
+ const css = readText('style.css');
32
+ const csvData = readText('data/benchmark-results.csv');
33
+ const jsonData = readText('data/rephrasing_metadata.json');
34
+
35
+ const chartIds = [
36
+ 'experiment-flow',
37
+ 'benchmark',
38
+ 'benchmark-prompts',
39
+ 'benchmark-family',
40
+ 'throughput',
41
+ 'cost-efficiency',
42
+ 'pipeline',
43
+ ];
44
+
45
+ const chartDataNeeds = {
46
+ 'experiment-flow': { csv: false, json: true },
47
+ benchmark: { csv: true, json: false },
48
+ 'benchmark-prompts': { csv: true, json: false },
49
+ 'benchmark-family': { csv: true, json: false },
50
+ throughput: { csv: false, json: false },
51
+ 'cost-efficiency': { csv: true, json: true },
52
+ pipeline: { csv: false, json: false },
53
+ };
54
+
55
+ const imageDataUris = {
56
+ 'assets/bern-skyline.png': toDataUri('assets/bern-skyline.png', 'image/png'),
57
+ 'assets/dclm-filtering-pipeline.png': toDataUri('assets/dclm-filtering-pipeline.png', 'image/png'),
58
+ 'assets/profile.jpg': toDataUri('assets/profile.jpg', 'image/jpeg'),
59
+ 'assets/academia-hub.png': toDataUri('assets/academia-hub.png', 'image/png'),
60
+ };
61
+
62
+ function injectInlineData(chartHtml, needs) {
63
+ const snippets = [];
64
+
65
+ if (needs.csv) {
66
+ snippets.push(`window.__INLINE_CSV_DATA__ = ${JSON.stringify(csvData)};`);
67
+ }
68
+
69
+ if (needs.json) {
70
+ // Avoid accidental closing of the script tag if present in JSON payload.
71
+ const safeJson = jsonData.replace(/<\/script/gi, '<\\/script');
72
+ snippets.push(`window.__INLINE_JSON_DATA__ = ${safeJson};`);
73
+ }
74
+
75
+ if (snippets.length === 0) {
76
+ return chartHtml;
77
+ }
78
+
79
+ const inlineScript = `<script>\n${snippets.join('\n')}\n</script>`;
80
+ if (!chartHtml.includes('</head>')) {
81
+ return `${inlineScript}\n${chartHtml}`;
82
+ }
83
+
84
+ return chartHtml.replace('</head>', `${inlineScript}\n</head>`);
85
+ }
86
+
87
+ function patchChartFetches(chartHtml) {
88
+ let output = chartHtml;
89
+
90
+ // Benchmark chart family: read CSV from inline payload.
91
+ output = output.replace(
92
+ /const text = await fetchFirstAvailable\(csvPaths\);/g,
93
+ 'const text = window.__INLINE_CSV_DATA__;'
94
+ );
95
+
96
+ // Experiment flow chart: read JSON from inline payload.
97
+ output = output.replace(
98
+ /fetchFirst\(dataPaths\)\.then\(data => buildChart\(data\)\)/g,
99
+ 'Promise.resolve(window.__INLINE_JSON_DATA__).then(data => buildChart(data))'
100
+ );
101
+
102
+ // Cost efficiency chart: read both JSON and CSV from inline payloads.
103
+ output = output.replace(
104
+ /Promise\.all\(\[\s*fetchFirst\(dataPaths\),\s*fetchFirst\(csvPaths, d3\.csvParse\)\s*\]\)\.then\(\(\[data, csvRows\]\) => buildChart\(data, csvRows\)\)/g,
105
+ 'Promise.resolve([window.__INLINE_JSON_DATA__, d3.csvParse(window.__INLINE_CSV_DATA__)]).then(([data, csvRows]) => buildChart(data, csvRows))'
106
+ );
107
+
108
+ return output;
109
+ }
110
+
111
+ function toDataUrl(html) {
112
+ return `data:text/html;base64,${Buffer.from(html, 'utf8').toString('base64')}`;
113
+ }
114
+
115
+ const chartDataUrls = {};
116
+ for (const chartId of chartIds) {
117
+ let chartHtml = readText(`charts/${chartId}.html`);
118
+ chartHtml = injectInlineData(chartHtml, chartDataNeeds[chartId]);
119
+ chartHtml = patchChartFetches(chartHtml);
120
+ chartDataUrls[chartId] = toDataUrl(chartHtml);
121
+ }
122
+
123
+ let output = readText('index.html');
124
+
125
+ output = output.replace(
126
+ '<link rel="stylesheet" href="style.css">',
127
+ `<style>\n${css}\n</style>`
128
+ );
129
+
130
+ for (const [path, dataUri] of Object.entries(imageDataUris)) {
131
+ output = output.replace(new RegExp(`src="${path.replace('.', '\\.')}"`, 'g'), `src="${dataUri}"`);
132
+ }
133
+
134
+ for (const chartId of chartIds) {
135
+ const srcPattern = new RegExp(`src="charts/${chartId}\\.html"`, 'g');
136
+ output = output.replace(srcPattern, `src="${chartDataUrls[chartId]}"`);
137
+ }
138
+
139
+ writeFileSync(join(__dirname, 'standalone.html'), output, 'utf8');
140
+
141
+ console.log('Built standalone.html');
142
+ console.log('Size:', (Buffer.byteLength(output) / 1024 / 1024).toFixed(2), 'MB');
app/presentation/se2026/charts/benchmark-family.html ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Benchmark Comparison</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
8
+ <style>
9
+ :root {
10
+ --text-color: rgba(255,255,255,0.88);
11
+ --muted-color: rgba(255,255,255,0.45);
12
+ --surface-bg: rgba(30,30,40,0.95);
13
+ --border-color: rgba(255,255,255,0.1);
14
+ --axis-color: rgba(255,255,255,0.15);
15
+ --tick-color: rgba(255,255,255,0.5);
16
+ --grid-color: rgba(255,255,255,0.06);
17
+ --primary-color: #7c6ff7;
18
+ }
19
+ * { box-sizing: border-box; margin: 0; padding: 0; }
20
+ html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
21
+ </style>
22
+ </head>
23
+ <body>
24
+ <div class="d3-benchmark-comparison" data-config='{"defaultSetup":"average","setups":{"Article":{"datasets":{"mix-fw_edu_hq-article_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-article_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-article_granite3_1b_hq":"Granite3","mix-fw_edu_hq-article_1b_hq":"Gemma-3","mix-fw_edu_hq-article_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-article_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Discussion":{"datasets":{"mix-fw_edu_hq-discussion_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-discussion_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-discussion_granite3_1b_hq":"Granite3","mix-fw_edu_hq-discussion_1b_hq":"Gemma-3","mix-fw_edu_hq-discussion_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-discussion_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Tutorial":{"datasets":{"mix-fw_edu_hq-tutorial_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-tutorial_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-tutorial_qwen3_1.7b_hq":"Qwen3","mix-fw_edu_hq-tutorial_1b_hq":"Gemma-3","mix-fw_edu_hq-tutorial_granite3_1b_hq":"Granite3","mix-fw_edu_hq-tutorial_llama3.2_1b_hq":"Llama-3.2","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"FAQ":{"datasets":{"mix-fw_edu_hq-faq_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-faq_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-faq_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-faq_1b_hq":"Gemma-3","mix-fw_edu_hq-faq_granite3_1b_hq":"Granite3","mix-fw_edu_hq-faq_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Table":{"datasets":{"mix-fw_edu_hq-table_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-table_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-table_granite3_1b_hq":"Granite3","mix-fw_edu_hq-table_qwen3_1.7b_hq":"Qwen3","mix-fw_edu_hq-table_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-table_1b_hq":"Gemma-3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Math":{"datasets":{"mix-fw_edu_hq-math_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-math_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-math_granite3_1b_hq":"Granite3","mix-fw_edu_hq-math_1b_hq":"Gemma-3","mix-fw_edu_hq-math_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-math_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}}}}' data-datafiles="../data/benchmark-results.csv"></div>
25
+ <style>
26
+ .d3-benchmark-comparison { position: relative; }
27
+ .d3-benchmark-comparison .controls {
28
+ display: flex;
29
+ gap: 16px;
30
+ align-items: flex-end;
31
+ justify-content: center;
32
+ margin: 10px 0 0 0;
33
+ }
34
+ .d3-benchmark-comparison .controls .control-group {
35
+ display: flex;
36
+ flex-direction: column;
37
+ align-items: flex-start;
38
+ gap: 6px;
39
+ }
40
+ .d3-benchmark-comparison .controls label {
41
+ font-size: 18px;
42
+ font-weight: 700;
43
+ color: var(--text-color);
44
+ }
45
+ .d3-benchmark-comparison .controls select {
46
+ appearance: none;
47
+ -webkit-appearance: none;
48
+ -moz-appearance: none;
49
+ border: 1px solid var(--border-color);
50
+ border-radius: 8px;
51
+ padding: 6px 28px 6px 10px;
52
+ background-color: var(--surface-bg);
53
+ color: var(--text-color);
54
+ font-size: 18px;
55
+ line-height: 1.2;
56
+ background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
57
+ background-repeat: no-repeat;
58
+ background-position: right 8px center;
59
+ }
60
+ .d3-benchmark-comparison .controls select:focus-visible {
61
+ outline: 2px solid var(--primary-color);
62
+ outline-offset: 2px;
63
+ }
64
+ .d3-benchmark-comparison .legend {
65
+ display: flex;
66
+ flex-direction: column;
67
+ align-items: flex-start;
68
+ gap: 6px;
69
+ margin: 8px 0 0 0;
70
+ padding-bottom: 4px;
71
+ }
72
+ .d3-benchmark-comparison .legend .legend-title {
73
+ font-size: 18px;
74
+ font-weight: 700;
75
+ color: var(--text-color);
76
+ }
77
+ .d3-benchmark-comparison .legend .items {
78
+ display: flex;
79
+ flex-wrap: wrap;
80
+ gap: 8px 14px;
81
+ }
82
+ .d3-benchmark-comparison .legend .item {
83
+ display: inline-flex;
84
+ align-items: center;
85
+ gap: 6px;
86
+ white-space: nowrap;
87
+ font-size: 18px;
88
+ color: var(--text-color);
89
+ cursor: pointer;
90
+ }
91
+ .d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
92
+ .d3-benchmark-comparison .legend .swatch {
93
+ width: 14px;
94
+ height: 14px;
95
+ border-radius: 3px;
96
+ border: 1px solid var(--border-color);
97
+ }
98
+ .d3-benchmark-comparison .bar.ghost { opacity: .25; }
99
+ .d3-benchmark-comparison .value-label.ghost { opacity: .25; }
100
+ .d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
101
+ .d3-benchmark-comparison .line-path.ghost { opacity: .15; }
102
+ .d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
103
+ .d3-benchmark-comparison .baseline.ghost { opacity: .1; }
104
+ .d3-benchmark-comparison .axes path { display: none; }
105
+ .d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
106
+ .d3-benchmark-comparison .axes text { fill: var(--tick-color); }
107
+ .d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
108
+ .d3-benchmark-comparison .hover-line {
109
+ stroke: var(--text-color);
110
+ stroke-opacity: 0.25;
111
+ stroke-width: 1;
112
+ pointer-events: none;
113
+ }
114
+ .d3-benchmark-comparison .d3-tooltip {
115
+ position: absolute;
116
+ top: 0px;
117
+ left: 0px;
118
+ transform: translate(-9999px, -9999px);
119
+ pointer-events: none;
120
+ padding: 8px 10px;
121
+ border-radius: 8px;
122
+ font-size: 18px;
123
+ line-height: 1.35;
124
+ border: 1px solid var(--border-color);
125
+ background: var(--surface-bg);
126
+ color: var(--text-color);
127
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
128
+ opacity: 0;
129
+ transition: opacity .12s ease;
130
+ text-align: left;
131
+ z-index: 10;
132
+ }
133
+ .d3-benchmark-comparison .d3-tooltip .tip-dot {
134
+ display: inline-block;
135
+ width: 10px;
136
+ height: 10px;
137
+ border-radius: 3px;
138
+ border: 1px solid var(--border-color);
139
+ margin-right: 6px;
140
+ vertical-align: middle;
141
+ }
142
+ </style>
143
+ <script>
144
+ (() => {
145
+ const ensureD3 = (cb) => {
146
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
147
+ let s = document.getElementById('d3-cdn-script');
148
+ if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
149
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
150
+ s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady();
151
+ };
152
+
153
+ const bootstrap = () => {
154
+ const scriptEl = document.currentScript;
155
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
156
+ if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
157
+ const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
158
+ container = cs[cs.length - 1] || null;
159
+ }
160
+ if (!container) return;
161
+ if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
162
+
163
+ container.style.position = container.style.position || 'relative';
164
+
165
+ // ─── READ CONFIG ───
166
+ let mountEl = container;
167
+ while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
168
+ let cfg = {};
169
+ try {
170
+ const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
171
+ if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
172
+ } catch (_) {}
173
+
174
+ // ─── NORMALIZE DATASETS CONFIG ───
175
+ // Accepts: { "key": "Name" } or { "key": { display, color, shaded, baseline } }
176
+ // Returns: { key: { display, color, shaded, baseline } }
177
+ function normalizeDatasets(raw) {
178
+ const out = {};
179
+ for (const [k, v] of Object.entries(raw || {})) {
180
+ out[k] = typeof v === 'string' ? { display: v } : { ...v };
181
+ }
182
+ return out;
183
+ }
184
+
185
+ // ─── SETUP SUPPORT ───
186
+ const SETUPS = cfg.setups || null;
187
+ const setupNames = SETUPS ? Object.keys(SETUPS) : [];
188
+ const AVG_SETUP_KEY = 'Average (all setups)';
189
+ const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
190
+ let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
191
+ let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
192
+ let avgDatasets = {};
193
+ let parsedData = [];
194
+
195
+ const RUN_COL = cfg.runColumn || 'runname';
196
+ const STEP_COL = cfg.stepColumn || 'steps';
197
+ const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
198
+ const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
199
+ const defaultView = cfg.defaultView || 'bar';
200
+ const uid = Math.random().toString(36).slice(2, 8);
201
+
202
+ // ─── DATASET ACCESSORS ───
203
+ function displayName(raw) { return DATASETS[raw] ? DATASETS[raw].display : raw; }
204
+ function isBaseline(raw) { return !!(DATASETS[raw] && DATASETS[raw].baseline); }
205
+ function isShaded(raw) { return !!(DATASETS[raw] && DATASETS[raw].shaded); }
206
+ function pinnedColor(raw) { return DATASETS[raw] && DATASETS[raw].color; }
207
+ function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); }
208
+
209
+ const METRIC_NAMES = {
210
+ 'agg_score_macro': 'Aggregate Score (Macro)',
211
+ 'agg_score_micro': 'Aggregate Score (Micro)',
212
+ 'agg_score_RC': 'Reading Comprehension',
213
+ 'agg_score_GK': 'General Knowledge',
214
+ 'agg_score_NLU': 'Natural Language Understanding',
215
+ 'agg_score_MATH': 'Math',
216
+ 'agg_score_TABLE': 'Table Understanding',
217
+ 'agg_score_RES': 'Reasoning',
218
+ 'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy',
219
+ 'lighteval|drop|3/prob_norm_token': 'DROP',
220
+ 'lighteval|gsm8k|3/prob_norm_token': 'GSM8K',
221
+ 'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag',
222
+ 'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA',
223
+ 'lighteval|piqa_cf|3/prob_norm_token': 'PIQA',
224
+ 'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2',
225
+ 'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA',
226
+ 'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions',
227
+ 'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande',
228
+ 'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA',
229
+ 'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
230
+ };
231
+
232
+ // Tooltip
233
+ let tip = container.querySelector('.d3-tooltip'), tipInner;
234
+ if (!tip) {
235
+ tip = document.createElement('div'); tip.className = 'd3-tooltip';
236
+ tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
237
+ container.appendChild(tip);
238
+ } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
239
+
240
+ // SVG
241
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
242
+ const gRoot = svg.append('g');
243
+ const defs = svg.append('defs');
244
+
245
+ // State
246
+ let allData = [];
247
+ let metricKeys = [];
248
+ let currentMetric = defaultMetric;
249
+ let currentView = defaultView;
250
+ let colorMap = {};
251
+ let highlight = null;
252
+
253
+ // ─── HELPERS ───
254
+ function metricName(key) { return METRIC_NAMES[key] || key; }
255
+
256
+ function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
257
+ function formatTokens(tokens) {
258
+ if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
259
+ if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M';
260
+ return d3.format(',')(tokens);
261
+ }
262
+ function formatStep(step) {
263
+ if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
264
+ return String(step);
265
+ }
266
+ function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
267
+ function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
268
+
269
+ function getCategoricalColors(n) {
270
+ try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
271
+ return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
272
+ }
273
+
274
+ function initColors() {
275
+ if (Object.keys(colorMap).length) return;
276
+ const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
277
+ const unpinned = [];
278
+ allRaw.forEach(raw => {
279
+ const pc = pinnedColor(raw);
280
+ if (pc) { colorMap[raw] = pc; }
281
+ else { unpinned.push(raw); }
282
+ });
283
+ const palette = getCategoricalColors(unpinned.length);
284
+ unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
285
+ }
286
+
287
+ // ─── SETUP HELPERS ───
288
+ function filterData() {
289
+ const knownNames = Object.keys(DATASETS);
290
+ allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
291
+ allData.columns = parsedData.columns;
292
+ }
293
+
294
+ function computeAverageData(rawData) {
295
+ if (!SETUPS || setupNames.length < 2) return { data: [], datasets: {} };
296
+ const displayToRaws = {};
297
+ for (const sName of setupNames) {
298
+ const ds = normalizeDatasets(SETUPS[sName].datasets);
299
+ for (const [raw, opts] of Object.entries(ds)) {
300
+ if (!displayToRaws[opts.display]) displayToRaws[opts.display] = [];
301
+ displayToRaws[opts.display].push(raw);
302
+ }
303
+ }
304
+ const fullDisplay = Object.entries(displayToRaws)
305
+ .filter(([, raws]) => raws.length >= setupNames.length);
306
+ const byRunStep = {};
307
+ for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
308
+ const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
309
+ const cols = rawData.columns || Object.keys(rawData[0] || {});
310
+ const result = [];
311
+ const dsMap = {};
312
+ for (const [display, raws] of fullDisplay) {
313
+ const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
314
+ // Merge options from first setup that has this display name
315
+ const firstOpts = Object.values(normalizeDatasets(SETUPS[setupNames[0]].datasets)).find(o => o.display === display) || {};
316
+ dsMap[avgRaw] = { display, ...firstOpts };
317
+ for (const step of steps) {
318
+ const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
319
+ if (!rows.length) continue;
320
+ const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
321
+ for (const col of cols) {
322
+ if (col === RUN_COL || col === STEP_COL) continue;
323
+ const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
324
+ avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
325
+ }
326
+ result.push(avgRow);
327
+ }
328
+ }
329
+ return { data: result, datasets: dsMap };
330
+ }
331
+
332
+ function switchSetup(name) {
333
+ currentSetup = name;
334
+ if (name === AVG_SETUP_KEY) {
335
+ DATASETS = { ...avgDatasets };
336
+ } else {
337
+ DATASETS = normalizeDatasets(SETUPS[name].datasets);
338
+ }
339
+ // Re-add baselines from any setup
340
+ for (const sName of setupNames) {
341
+ const ds = normalizeDatasets(SETUPS[sName].datasets);
342
+ for (const [raw, opts] of Object.entries(ds)) {
343
+ if (opts.baseline && !DATASETS[raw] && parsedData.some(r => r[RUN_COL] === raw)) {
344
+ DATASETS[raw] = { ...opts };
345
+ }
346
+ }
347
+ }
348
+ colorMap = {};
349
+ filterData();
350
+ initColors();
351
+ render();
352
+ buildLegend();
353
+ }
354
+
355
+ function showTip(html, x, y) {
356
+ tipInner.innerHTML = html;
357
+ const tipW = tip.offsetWidth || 180;
358
+ const cW = container.clientWidth || 800;
359
+ const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12;
360
+ tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`;
361
+ tip.style.opacity = '1';
362
+ }
363
+ function hideTip() {
364
+ tip.style.opacity = '0';
365
+ tip.style.transform = 'translate(-9999px, -9999px)';
366
+ }
367
+
368
+ function updateHighlight() {
369
+ gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
370
+ gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
371
+ gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
372
+ gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
373
+ gRoot.selectAll('.baseline-vline').classed('ghost', d => highlight && d.name !== highlight);
374
+ gRoot.selectAll('.baseline-vlabel').classed('ghost', d => highlight && d.name !== highlight);
375
+ gRoot.selectAll('.baseline-hline').classed('ghost', d => highlight && d.name !== highlight);
376
+ gRoot.selectAll('.baseline-hlabel').classed('ghost', d => highlight && d.name !== highlight);
377
+ container.querySelectorAll('.legend .item').forEach(el => {
378
+ el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
379
+ });
380
+ }
381
+
382
+ // ─── AUTO-DETECT METRICS from CSV columns ───
383
+ function detectMetrics(columns) {
384
+ const skip = new Set([RUN_COL, STEP_COL, 'seed']);
385
+ const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
386
+ const agg = aggOrder.filter(k => columns.includes(k));
387
+ const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
388
+ return [...agg, ...ind];
389
+ }
390
+
391
+ // ─── BAR CHART ───
392
+ function renderBar() {
393
+ const width = container.clientWidth || 800;
394
+ const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
395
+ const margin = { top: hasBaselines ? 20 : 12, right: 56, bottom: 32, left: 190 };
396
+
397
+ const grouped = d3.group(allData, d => d[RUN_COL]);
398
+ const finalData = [];
399
+ for (const [raw, rows] of grouped) {
400
+ const maxStep = d3.max(rows, r => +r[STEP_COL]);
401
+ const row = rows.find(r => +r[STEP_COL] === maxStep);
402
+ if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
403
+ }
404
+ finalData.sort((a, b) => b.value - a.value);
405
+
406
+ const barData = finalData.filter(d => !isBaseline(d.rawName));
407
+ const baselineData = finalData.filter(d => isBaseline(d.rawName));
408
+
409
+ const height = window.innerHeight || 480;
410
+ svg.attr('width', width).attr('height', height);
411
+ const barHeight = Math.min(28, (height - margin.top - margin.bottom) / barData.length * 0.75);
412
+ const barGap = barHeight * 0.3;
413
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
414
+
415
+ const innerWidth = width - margin.left - margin.right;
416
+ const innerHeight = height - margin.top - margin.bottom;
417
+
418
+ const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]);
419
+ const y = d3.scaleBand().domain(barData.map(d => d.name)).range([0, innerHeight]).padding(0.2);
420
+
421
+ // Grid
422
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
423
+ g.selectAll('line').data(x.ticks(5)).join('line')
424
+ .attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight);
425
+ });
426
+
427
+ // X axis
428
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
429
+ .attr('transform', `translate(0,${innerHeight})`)
430
+ .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0))
431
+ .call(g => {
432
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
433
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
434
+ });
435
+
436
+ // Y axis
437
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
438
+ .call(d3.axisLeft(y).tickSizeOuter(0))
439
+ .call(g => {
440
+ g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '18px').style('font-weight', '600');
441
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
442
+ });
443
+
444
+ // Stripe patterns for shaded bars
445
+ barData.forEach(d => {
446
+ if (!isShaded(d.rawName)) return;
447
+ const c = colorMap[d.rawName] || '#999';
448
+ const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName))
449
+ .attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)');
450
+ pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35);
451
+ pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5);
452
+ });
453
+
454
+ function barFill(d) {
455
+ if (isShaded(d.rawName)) return `url(#${stripePatternId(d.rawName)})`;
456
+ return colorMap[d.rawName] || 'var(--primary-color)';
457
+ }
458
+
459
+ // Bars
460
+ const barTip = (ev, d) => {
461
+ const [mx, my] = d3.pointer(ev, container);
462
+ showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(3)}</strong>`, mx, my);
463
+ };
464
+ gRoot.selectAll('rect.bar').data(barData, d => d.name).join(
465
+ enter => enter.append('rect').attr('class', 'bar')
466
+ .attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
467
+ .attr('fill', d => barFill(d))
468
+ .attr('width', 0)
469
+ .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
470
+ .on('mousemove', barTip)
471
+ .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
472
+ .transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
473
+ update => update
474
+ .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
475
+ .on('mousemove', barTip)
476
+ .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
477
+ .transition().duration(300)
478
+ .attr('y', d => y(d.name)).attr('height', y.bandwidth())
479
+ .attr('width', d => Math.max(0, x(d.value)))
480
+ .attr('fill', d => barFill(d)),
481
+ exit => exit.transition().duration(200).attr('width', 0).remove()
482
+ );
483
+
484
+ // Value labels
485
+ gRoot.selectAll('text.value-label').data(barData, d => d.name).join(
486
+ enter => enter.append('text').attr('class', 'value-label')
487
+ .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
488
+ .attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 18)
489
+ .text(d => d.value.toFixed(3)),
490
+ update => update.transition().duration(300)
491
+ .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
492
+ .text(d => d.value.toFixed(3)),
493
+ exit => exit.remove()
494
+ );
495
+
496
+ // Baseline vertical reference lines
497
+ gRoot.selectAll('.baseline-vline').data(baselineData, d => d.name).join(
498
+ enter => enter.append('line').attr('class', 'baseline-vline baseline')
499
+ .attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
500
+ .attr('y1', 0).attr('y2', innerHeight)
501
+ .attr('stroke', d => colorMap[d.rawName] || '#999')
502
+ .attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
503
+ update => update.transition().duration(300)
504
+ .attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
505
+ .attr('y1', 0).attr('y2', innerHeight)
506
+ .attr('stroke', d => colorMap[d.rawName] || '#999'),
507
+ exit => exit.remove()
508
+ );
509
+ gRoot.selectAll('.baseline-vlabel').data(baselineData, d => d.name).join(
510
+ enter => enter.append('text').attr('class', 'baseline-vlabel baseline')
511
+ .attr('x', d => x(d.value)).attr('y', -4)
512
+ .attr('text-anchor', 'middle').attr('fill', d => colorMap[d.rawName] || '#999')
513
+ .attr('font-size', 18).attr('font-weight', 600)
514
+ .text(d => `${d.name} (${d.value.toFixed(3)})`),
515
+ update => update.transition().duration(300)
516
+ .attr('x', d => x(d.value))
517
+ .text(d => `${d.name} (${d.value.toFixed(3)})`),
518
+ exit => exit.remove()
519
+ );
520
+ }
521
+
522
+ // ─── LINE CHART ───
523
+ function renderLine() {
524
+ const width = container.clientWidth || 800;
525
+ const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
526
+ const margin = { top: 16, right: 50, bottom: 48, left: 60 };
527
+ const height = window.innerHeight || 480;
528
+ svg.attr('width', width).attr('height', height);
529
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
530
+
531
+ const innerWidth = width - margin.left - margin.right;
532
+ const innerHeight = height - margin.top - margin.bottom;
533
+
534
+ // Build series
535
+ const grouped = d3.group(allData, d => d[RUN_COL]);
536
+ const series = [];
537
+ const baselineSeries = [];
538
+ for (const [raw, rows] of grouped) {
539
+ const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
540
+ const entry = { name: displayName(raw), rawName: raw, values: pts };
541
+ if (isBaseline(raw)) {
542
+ entry.finalValue = pts[pts.length - 1].value;
543
+ baselineSeries.push(entry);
544
+ } else {
545
+ series.push(entry);
546
+ }
547
+ }
548
+
549
+ const allSteps = Array.from(new Set(allData.filter(r => !isBaseline(r[RUN_COL])).map(r => +r[STEP_COL]))).sort((a, b) => a - b);
550
+ const allValues = [...series, ...baselineSeries].flatMap(s => s.finalValue != null ? [s.finalValue] : s.values.map(v => v.value));
551
+
552
+ const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
553
+ const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
554
+ const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
555
+
556
+ // Grid
557
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
558
+ g.selectAll('line').data(y.ticks(6)).join('line')
559
+ .attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
560
+ });
561
+
562
+ // X axis
563
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
564
+ .attr('transform', `translate(0,${innerHeight})`)
565
+ .call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
566
+ .call(g => {
567
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
568
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
569
+ });
570
+
571
+ // Y axis
572
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
573
+ .call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0))
574
+ .call(g => {
575
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
576
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
577
+ });
578
+
579
+ // Axis labels
580
+ gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
581
+ .attr('x', innerWidth / 2).attr('y', innerHeight + 38)
582
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
583
+ .text('Tokens (Steps)');
584
+
585
+ gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
586
+ .attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
587
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
588
+ .text(metricName(currentMetric));
589
+
590
+ // Baseline horizontal reference lines
591
+ gRoot.selectAll('.baseline-hline').data(baselineSeries, d => d.name).join(
592
+ enter => enter.append('line').attr('class', 'baseline-hline baseline')
593
+ .attr('x1', 0).attr('x2', innerWidth)
594
+ .attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
595
+ .attr('stroke', d => colorMap[d.rawName] || '#999')
596
+ .attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
597
+ update => update.transition().duration(300)
598
+ .attr('x1', 0).attr('x2', innerWidth)
599
+ .attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
600
+ .attr('stroke', d => colorMap[d.rawName] || '#999'),
601
+ exit => exit.remove()
602
+ );
603
+ gRoot.selectAll('.baseline-hlabel').data(baselineSeries, d => d.name).join(
604
+ enter => enter.append('text').attr('class', 'baseline-hlabel baseline')
605
+ .attr('x', 4).attr('y', d => y(d.finalValue) - 6)
606
+ .attr('text-anchor', 'start')
607
+ .attr('fill', d => colorMap[d.rawName] || '#999')
608
+ .attr('font-size', 18).attr('font-weight', 600)
609
+ .text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
610
+ update => update.transition().duration(300)
611
+ .attr('x', 4).attr('y', d => y(d.finalValue) - 6)
612
+ .text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
613
+ exit => exit.remove()
614
+ );
615
+
616
+ // Lines (non-baseline)
617
+ const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
618
+ gRoot.selectAll('.line-path').data(series, d => d.name).join(
619
+ enter => enter.append('path').attr('class', 'line-path')
620
+ .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
621
+ .attr('d', d => line(d.values)),
622
+ update => update.transition().duration(300)
623
+ .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
624
+ .attr('d', d => line(d.values)),
625
+ exit => exit.remove()
626
+ );
627
+
628
+ // Dots (non-baseline)
629
+ const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
630
+ gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
631
+ enter => enter.append('circle').attr('class', 'line-dot')
632
+ .attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3)
633
+ .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
634
+ .attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1),
635
+ update => update.transition().duration(300)
636
+ .attr('cx', d => x(d.step)).attr('cy', d => y(d.value))
637
+ .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'),
638
+ exit => exit.remove()
639
+ );
640
+
641
+ // Hover overlay
642
+ gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
643
+ .attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
644
+
645
+ gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay')
646
+ .attr('width', innerWidth).attr('height', innerHeight)
647
+ .attr('fill', 'none').attr('pointer-events', 'all')
648
+ .on('mousemove', (ev) => {
649
+ const [mx] = d3.pointer(ev, gRoot.node());
650
+ const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
651
+ gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
652
+
653
+ const entries = series.map(s => {
654
+ const pt = s.values.find(v => v.step === nearest);
655
+ return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
656
+ }).filter(Boolean);
657
+ baselineSeries.forEach(s => {
658
+ entries.push({ name: s.name, rawName: s.rawName, value: s.finalValue });
659
+ });
660
+ entries.sort((a, b) => b.value - a.value);
661
+
662
+ let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`;
663
+ entries.forEach(e => {
664
+ html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(3)}</strong></div>`;
665
+ });
666
+ const [cx, cy] = d3.pointer(ev, container);
667
+ showTip(html, cx, cy);
668
+ })
669
+ .on('mouseleave', () => {
670
+ gRoot.select('.hover-line').style('display', 'none');
671
+ hideTip();
672
+ });
673
+ }
674
+
675
+ // ─── RENDER ───
676
+ function render() {
677
+ if (!allData.length) return;
678
+ initColors();
679
+ gRoot.selectAll('*').remove();
680
+ defs.selectAll('*').remove();
681
+ if (currentView === 'bar') renderBar(); else renderLine();
682
+ }
683
+
684
+ // ─── UI ───
685
+ function buildUI() {
686
+ const controls = document.createElement('div'); controls.className = 'controls';
687
+
688
+ if (SETUPS && setupNames.length > 0) {
689
+ const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
690
+ const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
691
+ const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
692
+ setupNames.forEach(name => {
693
+ const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
694
+ if (name === currentSetup) opt.selected = true;
695
+ setupSelect.appendChild(opt);
696
+ });
697
+ if (setupNames.length >= 2) {
698
+ const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
699
+ if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
700
+ setupSelect.appendChild(avgOpt);
701
+ }
702
+ setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
703
+ setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
704
+ controls.appendChild(setupGroup);
705
+ }
706
+
707
+ const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
708
+ const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
709
+ const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
710
+ [['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => {
711
+ const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
712
+ if (val === currentView) opt.selected = true;
713
+ viewSelect.appendChild(opt);
714
+ });
715
+ viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); });
716
+ viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
717
+ controls.appendChild(viewGroup);
718
+
719
+ const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
720
+ const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
721
+ const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
722
+ metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
723
+ controls.appendChild(metricGroup);
724
+
725
+ container.appendChild(controls);
726
+
727
+ const legend = document.createElement('div'); legend.className = 'legend';
728
+ legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>';
729
+ container.appendChild(legend);
730
+ }
731
+
732
+ function populateMetricSelect() {
733
+ const sel = container.querySelector('#metric-' + uid);
734
+ if (!sel) return;
735
+ sel.innerHTML = '';
736
+ const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
737
+ const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
738
+ metricKeys.forEach(key => {
739
+ const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
740
+ if (key === currentMetric) opt.selected = true;
741
+ if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
742
+ });
743
+ if (aggGroup.children.length) sel.appendChild(aggGroup);
744
+ if (indGroup.children.length) sel.appendChild(indGroup);
745
+ sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
746
+ }
747
+
748
+ function buildLegend() {
749
+ const items = container.querySelector('.legend .items');
750
+ if (!items) return;
751
+ items.innerHTML = '';
752
+ const grouped = d3.group(allData, d => d[RUN_COL]);
753
+ const sorted = Array.from(grouped.entries())
754
+ .map(([raw, rows]) => {
755
+ const maxStep = d3.max(rows, r => +r[STEP_COL]);
756
+ const row = rows.find(r => +r[STEP_COL] === maxStep);
757
+ return { raw, score: row ? +row[defaultMetric] : 0 };
758
+ })
759
+ .sort((a, b) => b.score - a.score)
760
+ .map(d => d.raw);
761
+ sorted.filter(raw => !isBaseline(raw)).forEach(raw => {
762
+ const name = displayName(raw);
763
+ const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
764
+ const sw = document.createElement('span'); sw.className = 'swatch';
765
+ const c = colorMap[raw] || '#999';
766
+ if (isShaded(raw)) {
767
+ sw.style.background = c;
768
+ sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)';
769
+ } else {
770
+ sw.style.background = c;
771
+ }
772
+ const txt = document.createElement('span'); txt.textContent = name;
773
+ el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
774
+ el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
775
+ el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
776
+ });
777
+ }
778
+
779
+ buildUI();
780
+
781
+ // ─── DATA LOADING ───
782
+ const fetchFirstAvailable = async (paths) => {
783
+ for (const p of paths) {
784
+ try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {}
785
+ }
786
+ throw new Error('CSV not found');
787
+ };
788
+
789
+ let dataMountEl = container;
790
+ while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; }
791
+ let providedData = null;
792
+ try {
793
+ const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null;
794
+ if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
795
+ } catch (_) {}
796
+
797
+ const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
798
+ const csvPaths = providedData
799
+ ? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)])
800
+ : ['../data/benchmark-results.csv'];
801
+
802
+ (async () => {
803
+ try {
804
+ const text = await fetchFirstAvailable(csvPaths);
805
+ const parsed = d3.csvParse(text);
806
+ parsedData = parsed;
807
+ if (SETUPS && setupNames.length >= 2) {
808
+ const avg = computeAverageData(parsed);
809
+ avgDatasets = avg.datasets;
810
+ parsedData = parsed.concat(avg.data);
811
+ parsedData.columns = parsed.columns;
812
+ if (currentSetup === AVG_SETUP_KEY) DATASETS = { ...avgDatasets };
813
+ }
814
+ filterData();
815
+ metricKeys = detectMetrics(allData.columns);
816
+ if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
817
+ populateMetricSelect();
818
+ render();
819
+ buildLegend();
820
+ if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
821
+ else { window.addEventListener('resize', () => render()); }
822
+ } catch (e) {
823
+ const pre = document.createElement('pre');
824
+ pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e);
825
+ pre.style.color = 'var(--danger, #b00020)';
826
+ pre.style.fontSize = '12px';
827
+ container.appendChild(pre);
828
+ }
829
+ })();
830
+ };
831
+
832
+ if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); }
833
+ else { ensureD3(bootstrap); }
834
+ })();
835
+ </script>
836
+ </body>
837
+ </html>
app/presentation/se2026/charts/benchmark-prompts.html ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Benchmark Comparison</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
8
+ <style>
9
+ :root {
10
+ --text-color: rgba(255,255,255,0.88);
11
+ --muted-color: rgba(255,255,255,0.45);
12
+ --surface-bg: rgba(30,30,40,0.95);
13
+ --border-color: rgba(255,255,255,0.1);
14
+ --axis-color: rgba(255,255,255,0.15);
15
+ --tick-color: rgba(255,255,255,0.5);
16
+ --grid-color: rgba(255,255,255,0.06);
17
+ --primary-color: #7c6ff7;
18
+ }
19
+ * { box-sizing: border-box; margin: 0; padding: 0; }
20
+ html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
21
+ </style>
22
+ </head>
23
+ <body>
24
+ <div class="d3-benchmark-comparison" data-config='{"datasets":{"mix-fw_edu_hq-table_1b_hq":{"display":"Table","color":"#EBA937"},"mix-fw_edu_hq-math_1b_hq":{"display":"Math","color":"#e06b9e"},"mix-fw_edu_hq-faq_1b_hq":{"display":"FAQ","color":"#5b9bd5"},"mix-fw_edu_hq-tutorial_1b_hq":{"display":"Tutorial","color":"#8bc474"},"mix-fw_edu_hq-article_1b_hq":{"display":"Article","color":"#9a8ec2"},"mix-fw_edu_hq-commentary_1b_hq":{"display":"Commentary","color":"#c9a046"},"mix-fw_edu_hq-discussion_1b_hq":{"display":"Discussion","color":"#e07b54"},"dclm":{"display":"DCLM","baseline":true}}}' data-datafiles="../data/benchmark-results.csv"></div>
25
+ <style>
26
+ .d3-benchmark-comparison { position: relative; }
27
+ .d3-benchmark-comparison .controls {
28
+ display: flex;
29
+ gap: 16px;
30
+ align-items: flex-end;
31
+ justify-content: center;
32
+ margin: 10px 0 0 0;
33
+ }
34
+ .d3-benchmark-comparison .controls .control-group {
35
+ display: flex;
36
+ flex-direction: column;
37
+ align-items: flex-start;
38
+ gap: 6px;
39
+ }
40
+ .d3-benchmark-comparison .controls label {
41
+ font-size: 18px;
42
+ font-weight: 700;
43
+ color: var(--text-color);
44
+ }
45
+ .d3-benchmark-comparison .controls select {
46
+ appearance: none;
47
+ -webkit-appearance: none;
48
+ -moz-appearance: none;
49
+ border: 1px solid var(--border-color);
50
+ border-radius: 8px;
51
+ padding: 6px 28px 6px 10px;
52
+ background-color: var(--surface-bg);
53
+ color: var(--text-color);
54
+ font-size: 18px;
55
+ line-height: 1.2;
56
+ background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
57
+ background-repeat: no-repeat;
58
+ background-position: right 8px center;
59
+ }
60
+ .d3-benchmark-comparison .controls select:focus-visible {
61
+ outline: 2px solid var(--primary-color);
62
+ outline-offset: 2px;
63
+ }
64
+ .d3-benchmark-comparison .legend {
65
+ display: flex;
66
+ flex-direction: column;
67
+ align-items: flex-start;
68
+ gap: 6px;
69
+ margin: 8px 0 0 0;
70
+ padding-bottom: 4px;
71
+ }
72
+ .d3-benchmark-comparison .legend .legend-title {
73
+ font-size: 18px;
74
+ font-weight: 700;
75
+ color: var(--text-color);
76
+ }
77
+ .d3-benchmark-comparison .legend .items {
78
+ display: flex;
79
+ flex-wrap: wrap;
80
+ gap: 8px 14px;
81
+ }
82
+ .d3-benchmark-comparison .legend .item {
83
+ display: inline-flex;
84
+ align-items: center;
85
+ gap: 6px;
86
+ white-space: nowrap;
87
+ font-size: 18px;
88
+ color: var(--text-color);
89
+ cursor: pointer;
90
+ }
91
+ .d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
92
+ .d3-benchmark-comparison .legend .swatch {
93
+ width: 14px;
94
+ height: 14px;
95
+ border-radius: 3px;
96
+ border: 1px solid var(--border-color);
97
+ }
98
+ .d3-benchmark-comparison .bar.ghost { opacity: .25; }
99
+ .d3-benchmark-comparison .value-label.ghost { opacity: .25; }
100
+ .d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
101
+ .d3-benchmark-comparison .line-path.ghost { opacity: .15; }
102
+ .d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
103
+ .d3-benchmark-comparison .baseline.ghost { opacity: .1; }
104
+ .d3-benchmark-comparison .axes path { display: none; }
105
+ .d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
106
+ .d3-benchmark-comparison .axes text { fill: var(--tick-color); }
107
+ .d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
108
+ .d3-benchmark-comparison .hover-line {
109
+ stroke: var(--text-color);
110
+ stroke-opacity: 0.25;
111
+ stroke-width: 1;
112
+ pointer-events: none;
113
+ }
114
+ .d3-benchmark-comparison .d3-tooltip {
115
+ position: absolute;
116
+ top: 0px;
117
+ left: 0px;
118
+ transform: translate(-9999px, -9999px);
119
+ pointer-events: none;
120
+ padding: 8px 10px;
121
+ border-radius: 8px;
122
+ font-size: 18px;
123
+ line-height: 1.35;
124
+ border: 1px solid var(--border-color);
125
+ background: var(--surface-bg);
126
+ color: var(--text-color);
127
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
128
+ opacity: 0;
129
+ transition: opacity .12s ease;
130
+ text-align: left;
131
+ z-index: 10;
132
+ }
133
+ .d3-benchmark-comparison .d3-tooltip .tip-dot {
134
+ display: inline-block;
135
+ width: 10px;
136
+ height: 10px;
137
+ border-radius: 3px;
138
+ border: 1px solid var(--border-color);
139
+ margin-right: 6px;
140
+ vertical-align: middle;
141
+ }
142
+ </style>
143
+ <script>
144
+ (() => {
145
+ const ensureD3 = (cb) => {
146
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
147
+ let s = document.getElementById('d3-cdn-script');
148
+ if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
149
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
150
+ s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady();
151
+ };
152
+
153
+ const bootstrap = () => {
154
+ const scriptEl = document.currentScript;
155
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
156
+ if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
157
+ const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
158
+ container = cs[cs.length - 1] || null;
159
+ }
160
+ if (!container) return;
161
+ if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
162
+
163
+ container.style.position = container.style.position || 'relative';
164
+
165
+ // ─── READ CONFIG ───
166
+ let mountEl = container;
167
+ while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
168
+ let cfg = {};
169
+ try {
170
+ const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
171
+ if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
172
+ } catch (_) {}
173
+
174
+ // ─── NORMALIZE DATASETS CONFIG ───
175
+ // Accepts: { "key": "Name" } or { "key": { display, color, shaded, baseline } }
176
+ // Returns: { key: { display, color, shaded, baseline } }
177
+ function normalizeDatasets(raw) {
178
+ const out = {};
179
+ for (const [k, v] of Object.entries(raw || {})) {
180
+ out[k] = typeof v === 'string' ? { display: v } : { ...v };
181
+ }
182
+ return out;
183
+ }
184
+
185
+ // ─── SETUP SUPPORT ───
186
+ const SETUPS = cfg.setups || null;
187
+ const setupNames = SETUPS ? Object.keys(SETUPS) : [];
188
+ const AVG_SETUP_KEY = 'Average (all setups)';
189
+ const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
190
+ let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
191
+ let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
192
+ let avgDatasets = {};
193
+ let parsedData = [];
194
+
195
+ const RUN_COL = cfg.runColumn || 'runname';
196
+ const STEP_COL = cfg.stepColumn || 'steps';
197
+ const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
198
+ const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
199
+ const defaultView = cfg.defaultView || 'bar';
200
+ const uid = Math.random().toString(36).slice(2, 8);
201
+
202
+ // ─── DATASET ACCESSORS ───
203
+ function displayName(raw) { return DATASETS[raw] ? DATASETS[raw].display : raw; }
204
+ function isBaseline(raw) { return !!(DATASETS[raw] && DATASETS[raw].baseline); }
205
+ function isShaded(raw) { return !!(DATASETS[raw] && DATASETS[raw].shaded); }
206
+ function pinnedColor(raw) { return DATASETS[raw] && DATASETS[raw].color; }
207
+ function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); }
208
+
209
+ const METRIC_NAMES = {
210
+ 'agg_score_macro': 'Aggregate Score (Macro)',
211
+ 'agg_score_micro': 'Aggregate Score (Micro)',
212
+ 'agg_score_RC': 'Reading Comprehension',
213
+ 'agg_score_GK': 'General Knowledge',
214
+ 'agg_score_NLU': 'Natural Language Understanding',
215
+ 'agg_score_MATH': 'Math',
216
+ 'agg_score_TABLE': 'Table Understanding',
217
+ 'agg_score_RES': 'Reasoning',
218
+ 'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy',
219
+ 'lighteval|drop|3/prob_norm_token': 'DROP',
220
+ 'lighteval|gsm8k|3/prob_norm_token': 'GSM8K',
221
+ 'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag',
222
+ 'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA',
223
+ 'lighteval|piqa_cf|3/prob_norm_token': 'PIQA',
224
+ 'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2',
225
+ 'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA',
226
+ 'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions',
227
+ 'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande',
228
+ 'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA',
229
+ 'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
230
+ };
231
+
232
+ // Tooltip
233
+ let tip = container.querySelector('.d3-tooltip'), tipInner;
234
+ if (!tip) {
235
+ tip = document.createElement('div'); tip.className = 'd3-tooltip';
236
+ tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
237
+ container.appendChild(tip);
238
+ } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
239
+
240
+ // SVG
241
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
242
+ const gRoot = svg.append('g');
243
+ const defs = svg.append('defs');
244
+
245
+ // State
246
+ let allData = [];
247
+ let metricKeys = [];
248
+ let currentMetric = defaultMetric;
249
+ let currentView = defaultView;
250
+ let colorMap = {};
251
+ let highlight = null;
252
+
253
+ // ─── HELPERS ───
254
+ function metricName(key) { return METRIC_NAMES[key] || key; }
255
+
256
+ function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
257
+ function formatTokens(tokens) {
258
+ if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
259
+ if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M';
260
+ return d3.format(',')(tokens);
261
+ }
262
+ function formatStep(step) {
263
+ if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
264
+ return String(step);
265
+ }
266
+ function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
267
+ function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
268
+
269
+ function getCategoricalColors(n) {
270
+ try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
271
+ return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
272
+ }
273
+
274
+ function initColors() {
275
+ if (Object.keys(colorMap).length) return;
276
+ const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
277
+ const unpinned = [];
278
+ allRaw.forEach(raw => {
279
+ const pc = pinnedColor(raw);
280
+ if (pc) { colorMap[raw] = pc; }
281
+ else { unpinned.push(raw); }
282
+ });
283
+ const palette = getCategoricalColors(unpinned.length);
284
+ unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
285
+ }
286
+
287
+ // ─── SETUP HELPERS ───
288
+ function filterData() {
289
+ const knownNames = Object.keys(DATASETS);
290
+ allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
291
+ allData.columns = parsedData.columns;
292
+ }
293
+
294
+ function computeAverageData(rawData) {
295
+ if (!SETUPS || setupNames.length < 2) return { data: [], datasets: {} };
296
+ const displayToRaws = {};
297
+ for (const sName of setupNames) {
298
+ const ds = normalizeDatasets(SETUPS[sName].datasets);
299
+ for (const [raw, opts] of Object.entries(ds)) {
300
+ if (!displayToRaws[opts.display]) displayToRaws[opts.display] = [];
301
+ displayToRaws[opts.display].push(raw);
302
+ }
303
+ }
304
+ const fullDisplay = Object.entries(displayToRaws)
305
+ .filter(([, raws]) => raws.length >= setupNames.length);
306
+ const byRunStep = {};
307
+ for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
308
+ const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
309
+ const cols = rawData.columns || Object.keys(rawData[0] || {});
310
+ const result = [];
311
+ const dsMap = {};
312
+ for (const [display, raws] of fullDisplay) {
313
+ const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
314
+ // Merge options from first setup that has this display name
315
+ const firstOpts = Object.values(normalizeDatasets(SETUPS[setupNames[0]].datasets)).find(o => o.display === display) || {};
316
+ dsMap[avgRaw] = { display, ...firstOpts };
317
+ for (const step of steps) {
318
+ const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
319
+ if (!rows.length) continue;
320
+ const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
321
+ for (const col of cols) {
322
+ if (col === RUN_COL || col === STEP_COL) continue;
323
+ const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
324
+ avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
325
+ }
326
+ result.push(avgRow);
327
+ }
328
+ }
329
+ return { data: result, datasets: dsMap };
330
+ }
331
+
332
+ function switchSetup(name) {
333
+ currentSetup = name;
334
+ if (name === AVG_SETUP_KEY) {
335
+ DATASETS = { ...avgDatasets };
336
+ } else {
337
+ DATASETS = normalizeDatasets(SETUPS[name].datasets);
338
+ }
339
+ // Re-add baselines from any setup
340
+ for (const sName of setupNames) {
341
+ const ds = normalizeDatasets(SETUPS[sName].datasets);
342
+ for (const [raw, opts] of Object.entries(ds)) {
343
+ if (opts.baseline && !DATASETS[raw] && parsedData.some(r => r[RUN_COL] === raw)) {
344
+ DATASETS[raw] = { ...opts };
345
+ }
346
+ }
347
+ }
348
+ colorMap = {};
349
+ filterData();
350
+ initColors();
351
+ render();
352
+ buildLegend();
353
+ }
354
+
355
+ function showTip(html, x, y) {
356
+ tipInner.innerHTML = html;
357
+ const tipW = tip.offsetWidth || 180;
358
+ const cW = container.clientWidth || 800;
359
+ const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12;
360
+ tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`;
361
+ tip.style.opacity = '1';
362
+ }
363
+ function hideTip() {
364
+ tip.style.opacity = '0';
365
+ tip.style.transform = 'translate(-9999px, -9999px)';
366
+ }
367
+
368
+ function updateHighlight() {
369
+ gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
370
+ gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
371
+ gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
372
+ gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
373
+ gRoot.selectAll('.baseline-vline').classed('ghost', d => highlight && d.name !== highlight);
374
+ gRoot.selectAll('.baseline-vlabel').classed('ghost', d => highlight && d.name !== highlight);
375
+ gRoot.selectAll('.baseline-hline').classed('ghost', d => highlight && d.name !== highlight);
376
+ gRoot.selectAll('.baseline-hlabel').classed('ghost', d => highlight && d.name !== highlight);
377
+ container.querySelectorAll('.legend .item').forEach(el => {
378
+ el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
379
+ });
380
+ }
381
+
382
+ // ─── AUTO-DETECT METRICS from CSV columns ───
383
+ function detectMetrics(columns) {
384
+ const skip = new Set([RUN_COL, STEP_COL, 'seed']);
385
+ const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
386
+ const agg = aggOrder.filter(k => columns.includes(k));
387
+ const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
388
+ return [...agg, ...ind];
389
+ }
390
+
391
+ // ─── BAR CHART ───
392
+ function renderBar() {
393
+ const width = container.clientWidth || 800;
394
+ const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
395
+ const margin = { top: hasBaselines ? 20 : 12, right: 56, bottom: 32, left: 190 };
396
+
397
+ const grouped = d3.group(allData, d => d[RUN_COL]);
398
+ const finalData = [];
399
+ for (const [raw, rows] of grouped) {
400
+ const maxStep = d3.max(rows, r => +r[STEP_COL]);
401
+ const row = rows.find(r => +r[STEP_COL] === maxStep);
402
+ if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
403
+ }
404
+ finalData.sort((a, b) => b.value - a.value);
405
+
406
+ const barData = finalData.filter(d => !isBaseline(d.rawName));
407
+ const baselineData = finalData.filter(d => isBaseline(d.rawName));
408
+
409
+ const height = window.innerHeight || 480;
410
+ svg.attr('width', width).attr('height', height);
411
+ const barHeight = Math.min(28, (height - margin.top - margin.bottom) / barData.length * 0.75);
412
+ const barGap = barHeight * 0.3;
413
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
414
+
415
+ const innerWidth = width - margin.left - margin.right;
416
+ const innerHeight = height - margin.top - margin.bottom;
417
+
418
+ const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]);
419
+ const y = d3.scaleBand().domain(barData.map(d => d.name)).range([0, innerHeight]).padding(0.2);
420
+
421
+ // Grid
422
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
423
+ g.selectAll('line').data(x.ticks(5)).join('line')
424
+ .attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight);
425
+ });
426
+
427
+ // X axis
428
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
429
+ .attr('transform', `translate(0,${innerHeight})`)
430
+ .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0))
431
+ .call(g => {
432
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
433
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
434
+ });
435
+
436
+ // Y axis
437
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
438
+ .call(d3.axisLeft(y).tickSizeOuter(0))
439
+ .call(g => {
440
+ g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '18px').style('font-weight', '600');
441
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
442
+ });
443
+
444
+ // Stripe patterns for shaded bars
445
+ barData.forEach(d => {
446
+ if (!isShaded(d.rawName)) return;
447
+ const c = colorMap[d.rawName] || '#999';
448
+ const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName))
449
+ .attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)');
450
+ pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35);
451
+ pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5);
452
+ });
453
+
454
+ function barFill(d) {
455
+ if (isShaded(d.rawName)) return `url(#${stripePatternId(d.rawName)})`;
456
+ return colorMap[d.rawName] || 'var(--primary-color)';
457
+ }
458
+
459
+ // Bars
460
+ const barTip = (ev, d) => {
461
+ const [mx, my] = d3.pointer(ev, container);
462
+ showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(3)}</strong>`, mx, my);
463
+ };
464
+ gRoot.selectAll('rect.bar').data(barData, d => d.name).join(
465
+ enter => enter.append('rect').attr('class', 'bar')
466
+ .attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
467
+ .attr('fill', d => barFill(d))
468
+ .attr('width', 0)
469
+ .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
470
+ .on('mousemove', barTip)
471
+ .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
472
+ .transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
473
+ update => update
474
+ .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
475
+ .on('mousemove', barTip)
476
+ .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
477
+ .transition().duration(300)
478
+ .attr('y', d => y(d.name)).attr('height', y.bandwidth())
479
+ .attr('width', d => Math.max(0, x(d.value)))
480
+ .attr('fill', d => barFill(d)),
481
+ exit => exit.transition().duration(200).attr('width', 0).remove()
482
+ );
483
+
484
+ // Value labels
485
+ gRoot.selectAll('text.value-label').data(barData, d => d.name).join(
486
+ enter => enter.append('text').attr('class', 'value-label')
487
+ .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
488
+ .attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 18)
489
+ .text(d => d.value.toFixed(3)),
490
+ update => update.transition().duration(300)
491
+ .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
492
+ .text(d => d.value.toFixed(3)),
493
+ exit => exit.remove()
494
+ );
495
+
496
+ // Baseline vertical reference lines
497
+ gRoot.selectAll('.baseline-vline').data(baselineData, d => d.name).join(
498
+ enter => enter.append('line').attr('class', 'baseline-vline baseline')
499
+ .attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
500
+ .attr('y1', 0).attr('y2', innerHeight)
501
+ .attr('stroke', d => colorMap[d.rawName] || '#999')
502
+ .attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
503
+ update => update.transition().duration(300)
504
+ .attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
505
+ .attr('y1', 0).attr('y2', innerHeight)
506
+ .attr('stroke', d => colorMap[d.rawName] || '#999'),
507
+ exit => exit.remove()
508
+ );
509
+ gRoot.selectAll('.baseline-vlabel').data(baselineData, d => d.name).join(
510
+ enter => enter.append('text').attr('class', 'baseline-vlabel baseline')
511
+ .attr('x', d => x(d.value)).attr('y', -4)
512
+ .attr('text-anchor', 'middle').attr('fill', d => colorMap[d.rawName] || '#999')
513
+ .attr('font-size', 18).attr('font-weight', 600)
514
+ .text(d => `${d.name} (${d.value.toFixed(3)})`),
515
+ update => update.transition().duration(300)
516
+ .attr('x', d => x(d.value))
517
+ .text(d => `${d.name} (${d.value.toFixed(3)})`),
518
+ exit => exit.remove()
519
+ );
520
+ }
521
+
522
+ // ─── LINE CHART ───
523
+ function renderLine() {
524
+ const width = container.clientWidth || 800;
525
+ const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
526
+ const margin = { top: 16, right: 50, bottom: 48, left: 60 };
527
+ const height = window.innerHeight || 480;
528
+ svg.attr('width', width).attr('height', height);
529
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
530
+
531
+ const innerWidth = width - margin.left - margin.right;
532
+ const innerHeight = height - margin.top - margin.bottom;
533
+
534
+ // Build series
535
+ const grouped = d3.group(allData, d => d[RUN_COL]);
536
+ const series = [];
537
+ const baselineSeries = [];
538
+ for (const [raw, rows] of grouped) {
539
+ const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
540
+ const entry = { name: displayName(raw), rawName: raw, values: pts };
541
+ if (isBaseline(raw)) {
542
+ entry.finalValue = pts[pts.length - 1].value;
543
+ baselineSeries.push(entry);
544
+ } else {
545
+ series.push(entry);
546
+ }
547
+ }
548
+
549
+ const allSteps = Array.from(new Set(allData.filter(r => !isBaseline(r[RUN_COL])).map(r => +r[STEP_COL]))).sort((a, b) => a - b);
550
+ const allValues = [...series, ...baselineSeries].flatMap(s => s.finalValue != null ? [s.finalValue] : s.values.map(v => v.value));
551
+
552
+ const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
553
+ const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
554
+ const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
555
+
556
+ // Grid
557
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
558
+ g.selectAll('line').data(y.ticks(6)).join('line')
559
+ .attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
560
+ });
561
+
562
+ // X axis
563
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
564
+ .attr('transform', `translate(0,${innerHeight})`)
565
+ .call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
566
+ .call(g => {
567
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
568
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
569
+ });
570
+
571
+ // Y axis
572
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
573
+ .call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0))
574
+ .call(g => {
575
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
576
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
577
+ });
578
+
579
+ // Axis labels
580
+ gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
581
+ .attr('x', innerWidth / 2).attr('y', innerHeight + 38)
582
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
583
+ .text('Tokens (Steps)');
584
+
585
+ gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
586
+ .attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
587
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
588
+ .text(metricName(currentMetric));
589
+
590
+ // Baseline horizontal reference lines
591
+ gRoot.selectAll('.baseline-hline').data(baselineSeries, d => d.name).join(
592
+ enter => enter.append('line').attr('class', 'baseline-hline baseline')
593
+ .attr('x1', 0).attr('x2', innerWidth)
594
+ .attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
595
+ .attr('stroke', d => colorMap[d.rawName] || '#999')
596
+ .attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
597
+ update => update.transition().duration(300)
598
+ .attr('x1', 0).attr('x2', innerWidth)
599
+ .attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
600
+ .attr('stroke', d => colorMap[d.rawName] || '#999'),
601
+ exit => exit.remove()
602
+ );
603
+ gRoot.selectAll('.baseline-hlabel').data(baselineSeries, d => d.name).join(
604
+ enter => enter.append('text').attr('class', 'baseline-hlabel baseline')
605
+ .attr('x', 4).attr('y', d => y(d.finalValue) - 6)
606
+ .attr('text-anchor', 'start')
607
+ .attr('fill', d => colorMap[d.rawName] || '#999')
608
+ .attr('font-size', 18).attr('font-weight', 600)
609
+ .text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
610
+ update => update.transition().duration(300)
611
+ .attr('x', 4).attr('y', d => y(d.finalValue) - 6)
612
+ .text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
613
+ exit => exit.remove()
614
+ );
615
+
616
+ // Lines (non-baseline)
617
+ const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
618
+ gRoot.selectAll('.line-path').data(series, d => d.name).join(
619
+ enter => enter.append('path').attr('class', 'line-path')
620
+ .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
621
+ .attr('d', d => line(d.values)),
622
+ update => update.transition().duration(300)
623
+ .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
624
+ .attr('d', d => line(d.values)),
625
+ exit => exit.remove()
626
+ );
627
+
628
+ // Dots (non-baseline)
629
+ const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
630
+ gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
631
+ enter => enter.append('circle').attr('class', 'line-dot')
632
+ .attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3)
633
+ .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
634
+ .attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1),
635
+ update => update.transition().duration(300)
636
+ .attr('cx', d => x(d.step)).attr('cy', d => y(d.value))
637
+ .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'),
638
+ exit => exit.remove()
639
+ );
640
+
641
+ // Hover overlay
642
+ gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
643
+ .attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
644
+
645
+ gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay')
646
+ .attr('width', innerWidth).attr('height', innerHeight)
647
+ .attr('fill', 'none').attr('pointer-events', 'all')
648
+ .on('mousemove', (ev) => {
649
+ const [mx] = d3.pointer(ev, gRoot.node());
650
+ const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
651
+ gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
652
+
653
+ const entries = series.map(s => {
654
+ const pt = s.values.find(v => v.step === nearest);
655
+ return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
656
+ }).filter(Boolean);
657
+ baselineSeries.forEach(s => {
658
+ entries.push({ name: s.name, rawName: s.rawName, value: s.finalValue });
659
+ });
660
+ entries.sort((a, b) => b.value - a.value);
661
+
662
+ let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`;
663
+ entries.forEach(e => {
664
+ html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(3)}</strong></div>`;
665
+ });
666
+ const [cx, cy] = d3.pointer(ev, container);
667
+ showTip(html, cx, cy);
668
+ })
669
+ .on('mouseleave', () => {
670
+ gRoot.select('.hover-line').style('display', 'none');
671
+ hideTip();
672
+ });
673
+ }
674
+
675
+ // ─── RENDER ───
676
+ function render() {
677
+ if (!allData.length) return;
678
+ initColors();
679
+ gRoot.selectAll('*').remove();
680
+ defs.selectAll('*').remove();
681
+ if (currentView === 'bar') renderBar(); else renderLine();
682
+ }
683
+
684
+ // ─── UI ───
685
+ function buildUI() {
686
+ const controls = document.createElement('div'); controls.className = 'controls';
687
+
688
+ if (SETUPS && setupNames.length > 0) {
689
+ const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
690
+ const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
691
+ const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
692
+ setupNames.forEach(name => {
693
+ const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
694
+ if (name === currentSetup) opt.selected = true;
695
+ setupSelect.appendChild(opt);
696
+ });
697
+ if (setupNames.length >= 2) {
698
+ const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
699
+ if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
700
+ setupSelect.appendChild(avgOpt);
701
+ }
702
+ setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
703
+ setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
704
+ controls.appendChild(setupGroup);
705
+ }
706
+
707
+ const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
708
+ const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
709
+ const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
710
+ [['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => {
711
+ const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
712
+ if (val === currentView) opt.selected = true;
713
+ viewSelect.appendChild(opt);
714
+ });
715
+ viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); });
716
+ viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
717
+ controls.appendChild(viewGroup);
718
+
719
+ const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
720
+ const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
721
+ const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
722
+ metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
723
+ controls.appendChild(metricGroup);
724
+
725
+ container.appendChild(controls);
726
+
727
+ const legend = document.createElement('div'); legend.className = 'legend';
728
+ legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>';
729
+ container.appendChild(legend);
730
+ }
731
+
732
+ function populateMetricSelect() {
733
+ const sel = container.querySelector('#metric-' + uid);
734
+ if (!sel) return;
735
+ sel.innerHTML = '';
736
+ const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
737
+ const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
738
+ metricKeys.forEach(key => {
739
+ const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
740
+ if (key === currentMetric) opt.selected = true;
741
+ if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
742
+ });
743
+ if (aggGroup.children.length) sel.appendChild(aggGroup);
744
+ if (indGroup.children.length) sel.appendChild(indGroup);
745
+ sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
746
+ }
747
+
748
+ function buildLegend() {
749
+ const items = container.querySelector('.legend .items');
750
+ if (!items) return;
751
+ items.innerHTML = '';
752
+ const grouped = d3.group(allData, d => d[RUN_COL]);
753
+ const sorted = Array.from(grouped.entries())
754
+ .map(([raw, rows]) => {
755
+ const maxStep = d3.max(rows, r => +r[STEP_COL]);
756
+ const row = rows.find(r => +r[STEP_COL] === maxStep);
757
+ return { raw, score: row ? +row[defaultMetric] : 0 };
758
+ })
759
+ .sort((a, b) => b.score - a.score)
760
+ .map(d => d.raw);
761
+ sorted.filter(raw => !isBaseline(raw)).forEach(raw => {
762
+ const name = displayName(raw);
763
+ const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
764
+ const sw = document.createElement('span'); sw.className = 'swatch';
765
+ const c = colorMap[raw] || '#999';
766
+ if (isShaded(raw)) {
767
+ sw.style.background = c;
768
+ sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)';
769
+ } else {
770
+ sw.style.background = c;
771
+ }
772
+ const txt = document.createElement('span'); txt.textContent = name;
773
+ el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
774
+ el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
775
+ el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
776
+ });
777
+ }
778
+
779
+ buildUI();
780
+
781
+ // ─── DATA LOADING ───
782
+ const fetchFirstAvailable = async (paths) => {
783
+ for (const p of paths) {
784
+ try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {}
785
+ }
786
+ throw new Error('CSV not found');
787
+ };
788
+
789
+ let dataMountEl = container;
790
+ while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; }
791
+ let providedData = null;
792
+ try {
793
+ const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null;
794
+ if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
795
+ } catch (_) {}
796
+
797
+ const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
798
+ const csvPaths = providedData
799
+ ? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)])
800
+ : ['../data/benchmark-results.csv'];
801
+
802
+ (async () => {
803
+ try {
804
+ const text = await fetchFirstAvailable(csvPaths);
805
+ const parsed = d3.csvParse(text);
806
+ parsedData = parsed;
807
+ if (SETUPS && setupNames.length >= 2) {
808
+ const avg = computeAverageData(parsed);
809
+ avgDatasets = avg.datasets;
810
+ parsedData = parsed.concat(avg.data);
811
+ parsedData.columns = parsed.columns;
812
+ if (currentSetup === AVG_SETUP_KEY) DATASETS = { ...avgDatasets };
813
+ }
814
+ filterData();
815
+ metricKeys = detectMetrics(allData.columns);
816
+ if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
817
+ populateMetricSelect();
818
+ render();
819
+ buildLegend();
820
+ if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
821
+ else { window.addEventListener('resize', () => render()); }
822
+ } catch (e) {
823
+ const pre = document.createElement('pre');
824
+ pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e);
825
+ pre.style.color = 'var(--danger, #b00020)';
826
+ pre.style.fontSize = '12px';
827
+ container.appendChild(pre);
828
+ }
829
+ })();
830
+ };
831
+
832
+ if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); }
833
+ else { ensureD3(bootstrap); }
834
+ })();
835
+ </script>
836
+ </body>
837
+ </html>
app/presentation/se2026/charts/benchmark.html ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Benchmark Comparison</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
8
+ <style>
9
+ :root {
10
+ --text-color: rgba(255,255,255,0.88);
11
+ --muted-color: rgba(255,255,255,0.45);
12
+ --surface-bg: rgba(30,30,40,0.95);
13
+ --border-color: rgba(255,255,255,0.1);
14
+ --axis-color: rgba(255,255,255,0.15);
15
+ --tick-color: rgba(255,255,255,0.5);
16
+ --grid-color: rgba(255,255,255,0.06);
17
+ --primary-color: #7c6ff7;
18
+ }
19
+ * { box-sizing: border-box; margin: 0; padding: 0; }
20
+ html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
21
+ </style>
22
+ </head>
23
+ <body>
24
+ <div class="d3-benchmark-comparison" data-config='{"datasets":{"mix-fw_edu_hq-table_smollm2_1.7b_hq":{"display":"FinePhrase","color":"#EBA937"},"dclm":{"display":"DCLM","baseline":true},"nemotron_hq_synth":{"display":"Nemotron-HQ-Synth","color":"#76b900","shaded":true},"rewire":{"display":"REWIRE","color":"#1877F2","shaded":true},"cosmopedia":"Cosmopedia","fw_edu_hq":"FW-Edu HQ","synth_query_reasoning_answer":"SYNTH","ultra-fineweb":"Ultra-FineWeb"}}' data-datafiles="../data/benchmark-results.csv"></div>
25
+ <style>
26
+ .d3-benchmark-comparison { position: relative; }
27
+ .d3-benchmark-comparison .controls {
28
+ display: flex;
29
+ gap: 16px;
30
+ align-items: flex-end;
31
+ justify-content: center;
32
+ margin: 10px 0 0 0;
33
+ }
34
+ .d3-benchmark-comparison .controls .control-group {
35
+ display: flex;
36
+ flex-direction: column;
37
+ align-items: flex-start;
38
+ gap: 6px;
39
+ }
40
+ .d3-benchmark-comparison .controls label {
41
+ font-size: 18px;
42
+ font-weight: 700;
43
+ color: var(--text-color);
44
+ }
45
+ .d3-benchmark-comparison .controls select {
46
+ appearance: none;
47
+ -webkit-appearance: none;
48
+ -moz-appearance: none;
49
+ border: 1px solid var(--border-color);
50
+ border-radius: 8px;
51
+ padding: 6px 28px 6px 10px;
52
+ background-color: var(--surface-bg);
53
+ color: var(--text-color);
54
+ font-size: 18px;
55
+ line-height: 1.2;
56
+ background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
57
+ background-repeat: no-repeat;
58
+ background-position: right 8px center;
59
+ }
60
+ .d3-benchmark-comparison .controls select:focus-visible {
61
+ outline: 2px solid var(--primary-color);
62
+ outline-offset: 2px;
63
+ }
64
+ .d3-benchmark-comparison .legend {
65
+ display: flex;
66
+ flex-direction: column;
67
+ align-items: flex-start;
68
+ gap: 6px;
69
+ margin: 8px 0 0 0;
70
+ padding-bottom: 4px;
71
+ }
72
+ .d3-benchmark-comparison .legend .legend-title {
73
+ font-size: 18px;
74
+ font-weight: 700;
75
+ color: var(--text-color);
76
+ }
77
+ .d3-benchmark-comparison .legend .items {
78
+ display: flex;
79
+ flex-wrap: wrap;
80
+ gap: 8px 14px;
81
+ }
82
+ .d3-benchmark-comparison .legend .item {
83
+ display: inline-flex;
84
+ align-items: center;
85
+ gap: 6px;
86
+ white-space: nowrap;
87
+ font-size: 18px;
88
+ color: var(--text-color);
89
+ cursor: pointer;
90
+ }
91
+ .d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
92
+ .d3-benchmark-comparison .legend .swatch {
93
+ width: 14px;
94
+ height: 14px;
95
+ border-radius: 3px;
96
+ border: 1px solid var(--border-color);
97
+ }
98
+ .d3-benchmark-comparison .bar.ghost { opacity: .25; }
99
+ .d3-benchmark-comparison .value-label.ghost { opacity: .25; }
100
+ .d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
101
+ .d3-benchmark-comparison .line-path.ghost { opacity: .15; }
102
+ .d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
103
+ .d3-benchmark-comparison .baseline.ghost { opacity: .1; }
104
+ .d3-benchmark-comparison .axes path { display: none; }
105
+ .d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
106
+ .d3-benchmark-comparison .axes text { fill: var(--tick-color); }
107
+ .d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
108
+ .d3-benchmark-comparison .hover-line {
109
+ stroke: var(--text-color);
110
+ stroke-opacity: 0.25;
111
+ stroke-width: 1;
112
+ pointer-events: none;
113
+ }
114
+ .d3-benchmark-comparison .d3-tooltip {
115
+ position: absolute;
116
+ top: 0px;
117
+ left: 0px;
118
+ transform: translate(-9999px, -9999px);
119
+ pointer-events: none;
120
+ padding: 8px 10px;
121
+ border-radius: 8px;
122
+ font-size: 18px;
123
+ line-height: 1.35;
124
+ border: 1px solid var(--border-color);
125
+ background: var(--surface-bg);
126
+ color: var(--text-color);
127
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
128
+ opacity: 0;
129
+ transition: opacity .12s ease;
130
+ text-align: left;
131
+ z-index: 10;
132
+ }
133
+ .d3-benchmark-comparison .d3-tooltip .tip-dot {
134
+ display: inline-block;
135
+ width: 10px;
136
+ height: 10px;
137
+ border-radius: 3px;
138
+ border: 1px solid var(--border-color);
139
+ margin-right: 6px;
140
+ vertical-align: middle;
141
+ }
142
+ </style>
143
+ <script>
144
+ (() => {
145
+ const ensureD3 = (cb) => {
146
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
147
+ let s = document.getElementById('d3-cdn-script');
148
+ if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
149
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
150
+ s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady();
151
+ };
152
+
153
+ const bootstrap = () => {
154
+ const scriptEl = document.currentScript;
155
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
156
+ if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
157
+ const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
158
+ container = cs[cs.length - 1] || null;
159
+ }
160
+ if (!container) return;
161
+ if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
162
+
163
+ container.style.position = container.style.position || 'relative';
164
+
165
+ // ─── READ CONFIG ───
166
+ let mountEl = container;
167
+ while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
168
+ let cfg = {};
169
+ try {
170
+ const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
171
+ if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
172
+ } catch (_) {}
173
+
174
+ // ─── NORMALIZE DATASETS CONFIG ───
175
+ // Accepts: { "key": "Name" } or { "key": { display, color, shaded, baseline } }
176
+ // Returns: { key: { display, color, shaded, baseline } }
177
+ function normalizeDatasets(raw) {
178
+ const out = {};
179
+ for (const [k, v] of Object.entries(raw || {})) {
180
+ out[k] = typeof v === 'string' ? { display: v } : { ...v };
181
+ }
182
+ return out;
183
+ }
184
+
185
+ // ─── SETUP SUPPORT ───
186
+ const SETUPS = cfg.setups || null;
187
+ const setupNames = SETUPS ? Object.keys(SETUPS) : [];
188
+ const AVG_SETUP_KEY = 'Average (all setups)';
189
+ const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
190
+ let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
191
+ let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
192
+ let avgDatasets = {};
193
+ let parsedData = [];
194
+
195
+ const RUN_COL = cfg.runColumn || 'runname';
196
+ const STEP_COL = cfg.stepColumn || 'steps';
197
+ const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
198
+ const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
199
+ const defaultView = cfg.defaultView || 'bar';
200
+ const uid = Math.random().toString(36).slice(2, 8);
201
+
202
+ // ─── DATASET ACCESSORS ───
203
+ function displayName(raw) { return DATASETS[raw] ? DATASETS[raw].display : raw; }
204
+ function isBaseline(raw) { return !!(DATASETS[raw] && DATASETS[raw].baseline); }
205
+ function isShaded(raw) { return !!(DATASETS[raw] && DATASETS[raw].shaded); }
206
+ function pinnedColor(raw) { return DATASETS[raw] && DATASETS[raw].color; }
207
+ function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); }
208
+
209
+ const METRIC_NAMES = {
210
+ 'agg_score_macro': 'Aggregate Score (Macro)',
211
+ 'agg_score_micro': 'Aggregate Score (Micro)',
212
+ 'agg_score_RC': 'Reading Comprehension',
213
+ 'agg_score_GK': 'General Knowledge',
214
+ 'agg_score_NLU': 'Natural Language Understanding',
215
+ 'agg_score_MATH': 'Math',
216
+ 'agg_score_TABLE': 'Table Understanding',
217
+ 'agg_score_RES': 'Reasoning',
218
+ 'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy',
219
+ 'lighteval|drop|3/prob_norm_token': 'DROP',
220
+ 'lighteval|gsm8k|3/prob_norm_token': 'GSM8K',
221
+ 'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag',
222
+ 'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA',
223
+ 'lighteval|piqa_cf|3/prob_norm_token': 'PIQA',
224
+ 'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2',
225
+ 'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA',
226
+ 'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions',
227
+ 'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande',
228
+ 'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA',
229
+ 'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
230
+ };
231
+
232
+ // Tooltip
233
+ let tip = container.querySelector('.d3-tooltip'), tipInner;
234
+ if (!tip) {
235
+ tip = document.createElement('div'); tip.className = 'd3-tooltip';
236
+ tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
237
+ container.appendChild(tip);
238
+ } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
239
+
240
+ // SVG
241
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
242
+ const gRoot = svg.append('g');
243
+ const defs = svg.append('defs');
244
+
245
+ // State
246
+ let allData = [];
247
+ let metricKeys = [];
248
+ let currentMetric = defaultMetric;
249
+ let currentView = defaultView;
250
+ let colorMap = {};
251
+ let highlight = null;
252
+
253
+ // ─── HELPERS ───
254
+ function metricName(key) { return METRIC_NAMES[key] || key; }
255
+
256
+ function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
257
+ function formatTokens(tokens) {
258
+ if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
259
+ if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M';
260
+ return d3.format(',')(tokens);
261
+ }
262
+ function formatStep(step) {
263
+ if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
264
+ return String(step);
265
+ }
266
+ function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
267
+ function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
268
+
269
+ function getCategoricalColors(n) {
270
+ try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
271
+ return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
272
+ }
273
+
274
+ function initColors() {
275
+ if (Object.keys(colorMap).length) return;
276
+ const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
277
+ const unpinned = [];
278
+ allRaw.forEach(raw => {
279
+ const pc = pinnedColor(raw);
280
+ if (pc) { colorMap[raw] = pc; }
281
+ else { unpinned.push(raw); }
282
+ });
283
+ const palette = getCategoricalColors(unpinned.length);
284
+ unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
285
+ }
286
+
287
+ // ─── SETUP HELPERS ───
288
+ function filterData() {
289
+ const knownNames = Object.keys(DATASETS);
290
+ allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
291
+ allData.columns = parsedData.columns;
292
+ }
293
+
294
+ function computeAverageData(rawData) {
295
+ if (!SETUPS || setupNames.length < 2) return { data: [], datasets: {} };
296
+ const displayToRaws = {};
297
+ for (const sName of setupNames) {
298
+ const ds = normalizeDatasets(SETUPS[sName].datasets);
299
+ for (const [raw, opts] of Object.entries(ds)) {
300
+ if (!displayToRaws[opts.display]) displayToRaws[opts.display] = [];
301
+ displayToRaws[opts.display].push(raw);
302
+ }
303
+ }
304
+ const fullDisplay = Object.entries(displayToRaws)
305
+ .filter(([, raws]) => raws.length >= setupNames.length);
306
+ const byRunStep = {};
307
+ for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
308
+ const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
309
+ const cols = rawData.columns || Object.keys(rawData[0] || {});
310
+ const result = [];
311
+ const dsMap = {};
312
+ for (const [display, raws] of fullDisplay) {
313
+ const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
314
+ // Merge options from first setup that has this display name
315
+ const firstOpts = Object.values(normalizeDatasets(SETUPS[setupNames[0]].datasets)).find(o => o.display === display) || {};
316
+ dsMap[avgRaw] = { display, ...firstOpts };
317
+ for (const step of steps) {
318
+ const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
319
+ if (!rows.length) continue;
320
+ const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
321
+ for (const col of cols) {
322
+ if (col === RUN_COL || col === STEP_COL) continue;
323
+ const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
324
+ avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
325
+ }
326
+ result.push(avgRow);
327
+ }
328
+ }
329
+ return { data: result, datasets: dsMap };
330
+ }
331
+
332
+ function switchSetup(name) {
333
+ currentSetup = name;
334
+ if (name === AVG_SETUP_KEY) {
335
+ DATASETS = { ...avgDatasets };
336
+ } else {
337
+ DATASETS = normalizeDatasets(SETUPS[name].datasets);
338
+ }
339
+ // Re-add baselines from any setup
340
+ for (const sName of setupNames) {
341
+ const ds = normalizeDatasets(SETUPS[sName].datasets);
342
+ for (const [raw, opts] of Object.entries(ds)) {
343
+ if (opts.baseline && !DATASETS[raw] && parsedData.some(r => r[RUN_COL] === raw)) {
344
+ DATASETS[raw] = { ...opts };
345
+ }
346
+ }
347
+ }
348
+ colorMap = {};
349
+ filterData();
350
+ initColors();
351
+ render();
352
+ buildLegend();
353
+ }
354
+
355
+ function showTip(html, x, y) {
356
+ tipInner.innerHTML = html;
357
+ const tipW = tip.offsetWidth || 180;
358
+ const cW = container.clientWidth || 800;
359
+ const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12;
360
+ tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`;
361
+ tip.style.opacity = '1';
362
+ }
363
+ function hideTip() {
364
+ tip.style.opacity = '0';
365
+ tip.style.transform = 'translate(-9999px, -9999px)';
366
+ }
367
+
368
+ function updateHighlight() {
369
+ gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
370
+ gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
371
+ gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
372
+ gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
373
+ gRoot.selectAll('.baseline-vline').classed('ghost', d => highlight && d.name !== highlight);
374
+ gRoot.selectAll('.baseline-vlabel').classed('ghost', d => highlight && d.name !== highlight);
375
+ gRoot.selectAll('.baseline-hline').classed('ghost', d => highlight && d.name !== highlight);
376
+ gRoot.selectAll('.baseline-hlabel').classed('ghost', d => highlight && d.name !== highlight);
377
+ container.querySelectorAll('.legend .item').forEach(el => {
378
+ el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
379
+ });
380
+ }
381
+
382
+ // ─── AUTO-DETECT METRICS from CSV columns ───
383
+ function detectMetrics(columns) {
384
+ const skip = new Set([RUN_COL, STEP_COL, 'seed']);
385
+ const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
386
+ const agg = aggOrder.filter(k => columns.includes(k));
387
+ const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
388
+ return [...agg, ...ind];
389
+ }
390
+
391
+ // ─── BAR CHART ───
392
+ function renderBar() {
393
+ const width = container.clientWidth || 800;
394
+ const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
395
+ const margin = { top: hasBaselines ? 20 : 12, right: 56, bottom: 32, left: 190 };
396
+
397
+ const grouped = d3.group(allData, d => d[RUN_COL]);
398
+ const finalData = [];
399
+ for (const [raw, rows] of grouped) {
400
+ const maxStep = d3.max(rows, r => +r[STEP_COL]);
401
+ const row = rows.find(r => +r[STEP_COL] === maxStep);
402
+ if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
403
+ }
404
+ finalData.sort((a, b) => b.value - a.value);
405
+
406
+ const barData = finalData.filter(d => !isBaseline(d.rawName));
407
+ const baselineData = finalData.filter(d => isBaseline(d.rawName));
408
+
409
+ const height = window.innerHeight || 480;
410
+ svg.attr('width', width).attr('height', height);
411
+ const barHeight = Math.min(28, (height - margin.top - margin.bottom) / barData.length * 0.75);
412
+ const barGap = barHeight * 0.3;
413
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
414
+
415
+ const innerWidth = width - margin.left - margin.right;
416
+ const innerHeight = height - margin.top - margin.bottom;
417
+
418
+ const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]);
419
+ const y = d3.scaleBand().domain(barData.map(d => d.name)).range([0, innerHeight]).padding(0.2);
420
+
421
+ // Grid
422
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
423
+ g.selectAll('line').data(x.ticks(5)).join('line')
424
+ .attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight);
425
+ });
426
+
427
+ // X axis
428
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
429
+ .attr('transform', `translate(0,${innerHeight})`)
430
+ .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0))
431
+ .call(g => {
432
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
433
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
434
+ });
435
+
436
+ // Y axis
437
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
438
+ .call(d3.axisLeft(y).tickSizeOuter(0))
439
+ .call(g => {
440
+ g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '18px').style('font-weight', '600');
441
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
442
+ });
443
+
444
+ // Stripe patterns for shaded bars
445
+ barData.forEach(d => {
446
+ if (!isShaded(d.rawName)) return;
447
+ const c = colorMap[d.rawName] || '#999';
448
+ const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName))
449
+ .attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)');
450
+ pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35);
451
+ pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5);
452
+ });
453
+
454
+ function barFill(d) {
455
+ if (isShaded(d.rawName)) return `url(#${stripePatternId(d.rawName)})`;
456
+ return colorMap[d.rawName] || 'var(--primary-color)';
457
+ }
458
+
459
+ // Bars
460
+ const barTip = (ev, d) => {
461
+ const [mx, my] = d3.pointer(ev, container);
462
+ showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(3)}</strong>`, mx, my);
463
+ };
464
+ gRoot.selectAll('rect.bar').data(barData, d => d.name).join(
465
+ enter => enter.append('rect').attr('class', 'bar')
466
+ .attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
467
+ .attr('fill', d => barFill(d))
468
+ .attr('width', 0)
469
+ .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
470
+ .on('mousemove', barTip)
471
+ .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
472
+ .transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
473
+ update => update
474
+ .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
475
+ .on('mousemove', barTip)
476
+ .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
477
+ .transition().duration(300)
478
+ .attr('y', d => y(d.name)).attr('height', y.bandwidth())
479
+ .attr('width', d => Math.max(0, x(d.value)))
480
+ .attr('fill', d => barFill(d)),
481
+ exit => exit.transition().duration(200).attr('width', 0).remove()
482
+ );
483
+
484
+ // Value labels
485
+ gRoot.selectAll('text.value-label').data(barData, d => d.name).join(
486
+ enter => enter.append('text').attr('class', 'value-label')
487
+ .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
488
+ .attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 18)
489
+ .text(d => d.value.toFixed(3)),
490
+ update => update.transition().duration(300)
491
+ .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
492
+ .text(d => d.value.toFixed(3)),
493
+ exit => exit.remove()
494
+ );
495
+
496
+ // Baseline vertical reference lines
497
+ gRoot.selectAll('.baseline-vline').data(baselineData, d => d.name).join(
498
+ enter => enter.append('line').attr('class', 'baseline-vline baseline')
499
+ .attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
500
+ .attr('y1', 0).attr('y2', innerHeight)
501
+ .attr('stroke', d => colorMap[d.rawName] || '#999')
502
+ .attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
503
+ update => update.transition().duration(300)
504
+ .attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
505
+ .attr('y1', 0).attr('y2', innerHeight)
506
+ .attr('stroke', d => colorMap[d.rawName] || '#999'),
507
+ exit => exit.remove()
508
+ );
509
+ gRoot.selectAll('.baseline-vlabel').data(baselineData, d => d.name).join(
510
+ enter => enter.append('text').attr('class', 'baseline-vlabel baseline')
511
+ .attr('x', d => x(d.value)).attr('y', -4)
512
+ .attr('text-anchor', 'middle').attr('fill', d => colorMap[d.rawName] || '#999')
513
+ .attr('font-size', 18).attr('font-weight', 600)
514
+ .text(d => `${d.name} (${d.value.toFixed(3)})`),
515
+ update => update.transition().duration(300)
516
+ .attr('x', d => x(d.value))
517
+ .text(d => `${d.name} (${d.value.toFixed(3)})`),
518
+ exit => exit.remove()
519
+ );
520
+ }
521
+
522
+ // ─── LINE CHART ───
523
+ function renderLine() {
524
+ const width = container.clientWidth || 800;
525
+ const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
526
+ const margin = { top: 16, right: 50, bottom: 48, left: 60 };
527
+ const height = window.innerHeight || 480;
528
+ svg.attr('width', width).attr('height', height);
529
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
530
+
531
+ const innerWidth = width - margin.left - margin.right;
532
+ const innerHeight = height - margin.top - margin.bottom;
533
+
534
+ // Build series
535
+ const grouped = d3.group(allData, d => d[RUN_COL]);
536
+ const series = [];
537
+ const baselineSeries = [];
538
+ for (const [raw, rows] of grouped) {
539
+ const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
540
+ const entry = { name: displayName(raw), rawName: raw, values: pts };
541
+ if (isBaseline(raw)) {
542
+ entry.finalValue = pts[pts.length - 1].value;
543
+ baselineSeries.push(entry);
544
+ } else {
545
+ series.push(entry);
546
+ }
547
+ }
548
+
549
+ const allSteps = Array.from(new Set(allData.filter(r => !isBaseline(r[RUN_COL])).map(r => +r[STEP_COL]))).sort((a, b) => a - b);
550
+ const allValues = [...series, ...baselineSeries].flatMap(s => s.finalValue != null ? [s.finalValue] : s.values.map(v => v.value));
551
+
552
+ const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
553
+ const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
554
+ const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
555
+
556
+ // Grid
557
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
558
+ g.selectAll('line').data(y.ticks(6)).join('line')
559
+ .attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
560
+ });
561
+
562
+ // X axis
563
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
564
+ .attr('transform', `translate(0,${innerHeight})`)
565
+ .call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
566
+ .call(g => {
567
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
568
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
569
+ });
570
+
571
+ // Y axis
572
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
573
+ .call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0))
574
+ .call(g => {
575
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
576
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
577
+ });
578
+
579
+ // Axis labels
580
+ gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
581
+ .attr('x', innerWidth / 2).attr('y', innerHeight + 38)
582
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
583
+ .text('Tokens (Steps)');
584
+
585
+ gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
586
+ .attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
587
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
588
+ .text(metricName(currentMetric));
589
+
590
+ // Baseline horizontal reference lines
591
+ gRoot.selectAll('.baseline-hline').data(baselineSeries, d => d.name).join(
592
+ enter => enter.append('line').attr('class', 'baseline-hline baseline')
593
+ .attr('x1', 0).attr('x2', innerWidth)
594
+ .attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
595
+ .attr('stroke', d => colorMap[d.rawName] || '#999')
596
+ .attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
597
+ update => update.transition().duration(300)
598
+ .attr('x1', 0).attr('x2', innerWidth)
599
+ .attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
600
+ .attr('stroke', d => colorMap[d.rawName] || '#999'),
601
+ exit => exit.remove()
602
+ );
603
+ gRoot.selectAll('.baseline-hlabel').data(baselineSeries, d => d.name).join(
604
+ enter => enter.append('text').attr('class', 'baseline-hlabel baseline')
605
+ .attr('x', 4).attr('y', d => y(d.finalValue) - 6)
606
+ .attr('text-anchor', 'start')
607
+ .attr('fill', d => colorMap[d.rawName] || '#999')
608
+ .attr('font-size', 18).attr('font-weight', 600)
609
+ .text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
610
+ update => update.transition().duration(300)
611
+ .attr('x', 4).attr('y', d => y(d.finalValue) - 6)
612
+ .text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
613
+ exit => exit.remove()
614
+ );
615
+
616
+ // Lines (non-baseline)
617
+ const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
618
+ gRoot.selectAll('.line-path').data(series, d => d.name).join(
619
+ enter => enter.append('path').attr('class', 'line-path')
620
+ .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
621
+ .attr('d', d => line(d.values)),
622
+ update => update.transition().duration(300)
623
+ .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
624
+ .attr('d', d => line(d.values)),
625
+ exit => exit.remove()
626
+ );
627
+
628
+ // Dots (non-baseline)
629
+ const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
630
+ gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
631
+ enter => enter.append('circle').attr('class', 'line-dot')
632
+ .attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3)
633
+ .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
634
+ .attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1),
635
+ update => update.transition().duration(300)
636
+ .attr('cx', d => x(d.step)).attr('cy', d => y(d.value))
637
+ .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'),
638
+ exit => exit.remove()
639
+ );
640
+
641
+ // Hover overlay
642
+ gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
643
+ .attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
644
+
645
+ gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay')
646
+ .attr('width', innerWidth).attr('height', innerHeight)
647
+ .attr('fill', 'none').attr('pointer-events', 'all')
648
+ .on('mousemove', (ev) => {
649
+ const [mx] = d3.pointer(ev, gRoot.node());
650
+ const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
651
+ gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
652
+
653
+ const entries = series.map(s => {
654
+ const pt = s.values.find(v => v.step === nearest);
655
+ return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
656
+ }).filter(Boolean);
657
+ baselineSeries.forEach(s => {
658
+ entries.push({ name: s.name, rawName: s.rawName, value: s.finalValue });
659
+ });
660
+ entries.sort((a, b) => b.value - a.value);
661
+
662
+ let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`;
663
+ entries.forEach(e => {
664
+ html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(3)}</strong></div>`;
665
+ });
666
+ const [cx, cy] = d3.pointer(ev, container);
667
+ showTip(html, cx, cy);
668
+ })
669
+ .on('mouseleave', () => {
670
+ gRoot.select('.hover-line').style('display', 'none');
671
+ hideTip();
672
+ });
673
+ }
674
+
675
+ // ─── RENDER ───
676
+ function render() {
677
+ if (!allData.length) return;
678
+ initColors();
679
+ gRoot.selectAll('*').remove();
680
+ defs.selectAll('*').remove();
681
+ if (currentView === 'bar') renderBar(); else renderLine();
682
+ }
683
+
684
+ // ─── UI ───
685
+ function buildUI() {
686
+ const controls = document.createElement('div'); controls.className = 'controls';
687
+
688
+ if (SETUPS && setupNames.length > 0) {
689
+ const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
690
+ const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
691
+ const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
692
+ setupNames.forEach(name => {
693
+ const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
694
+ if (name === currentSetup) opt.selected = true;
695
+ setupSelect.appendChild(opt);
696
+ });
697
+ if (setupNames.length >= 2) {
698
+ const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
699
+ if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
700
+ setupSelect.appendChild(avgOpt);
701
+ }
702
+ setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
703
+ setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
704
+ controls.appendChild(setupGroup);
705
+ }
706
+
707
+ const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
708
+ const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
709
+ const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
710
+ [['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => {
711
+ const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
712
+ if (val === currentView) opt.selected = true;
713
+ viewSelect.appendChild(opt);
714
+ });
715
+ viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); });
716
+ viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
717
+ controls.appendChild(viewGroup);
718
+
719
+ const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
720
+ const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
721
+ const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
722
+ metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
723
+ controls.appendChild(metricGroup);
724
+
725
+ container.appendChild(controls);
726
+
727
+ const legend = document.createElement('div'); legend.className = 'legend';
728
+ legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>';
729
+ container.appendChild(legend);
730
+ }
731
+
732
+ function populateMetricSelect() {
733
+ const sel = container.querySelector('#metric-' + uid);
734
+ if (!sel) return;
735
+ sel.innerHTML = '';
736
+ const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
737
+ const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
738
+ metricKeys.forEach(key => {
739
+ const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
740
+ if (key === currentMetric) opt.selected = true;
741
+ if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
742
+ });
743
+ if (aggGroup.children.length) sel.appendChild(aggGroup);
744
+ if (indGroup.children.length) sel.appendChild(indGroup);
745
+ sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
746
+ }
747
+
748
+ function buildLegend() {
749
+ const items = container.querySelector('.legend .items');
750
+ if (!items) return;
751
+ items.innerHTML = '';
752
+ const grouped = d3.group(allData, d => d[RUN_COL]);
753
+ const sorted = Array.from(grouped.entries())
754
+ .map(([raw, rows]) => {
755
+ const maxStep = d3.max(rows, r => +r[STEP_COL]);
756
+ const row = rows.find(r => +r[STEP_COL] === maxStep);
757
+ return { raw, score: row ? +row[defaultMetric] : 0 };
758
+ })
759
+ .sort((a, b) => b.score - a.score)
760
+ .map(d => d.raw);
761
+ sorted.filter(raw => !isBaseline(raw)).forEach(raw => {
762
+ const name = displayName(raw);
763
+ const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
764
+ const sw = document.createElement('span'); sw.className = 'swatch';
765
+ const c = colorMap[raw] || '#999';
766
+ if (isShaded(raw)) {
767
+ sw.style.background = c;
768
+ sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)';
769
+ } else {
770
+ sw.style.background = c;
771
+ }
772
+ const txt = document.createElement('span'); txt.textContent = name;
773
+ el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
774
+ el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
775
+ el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
776
+ });
777
+ }
778
+
779
+ buildUI();
780
+
781
+ // ─── DATA LOADING ───
782
+ const fetchFirstAvailable = async (paths) => {
783
+ for (const p of paths) {
784
+ try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {}
785
+ }
786
+ throw new Error('CSV not found');
787
+ };
788
+
789
+ let dataMountEl = container;
790
+ while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; }
791
+ let providedData = null;
792
+ try {
793
+ const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null;
794
+ if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
795
+ } catch (_) {}
796
+
797
+ const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
798
+ const csvPaths = providedData
799
+ ? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)])
800
+ : ['../data/benchmark-results.csv'];
801
+
802
+ (async () => {
803
+ try {
804
+ const text = await fetchFirstAvailable(csvPaths);
805
+ const parsed = d3.csvParse(text);
806
+ parsedData = parsed;
807
+ if (SETUPS && setupNames.length >= 2) {
808
+ const avg = computeAverageData(parsed);
809
+ avgDatasets = avg.datasets;
810
+ parsedData = parsed.concat(avg.data);
811
+ parsedData.columns = parsed.columns;
812
+ if (currentSetup === AVG_SETUP_KEY) DATASETS = { ...avgDatasets };
813
+ }
814
+ filterData();
815
+ metricKeys = detectMetrics(allData.columns);
816
+ if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
817
+ populateMetricSelect();
818
+ render();
819
+ buildLegend();
820
+ if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
821
+ else { window.addEventListener('resize', () => render()); }
822
+ } catch (e) {
823
+ const pre = document.createElement('pre');
824
+ pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e);
825
+ pre.style.color = 'var(--danger, #b00020)';
826
+ pre.style.fontSize = '12px';
827
+ container.appendChild(pre);
828
+ }
829
+ })();
830
+ };
831
+
832
+ if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); }
833
+ else { ensureD3(bootstrap); }
834
+ })();
835
+ </script>
836
+ </body>
837
+ </html>
app/presentation/se2026/charts/cost-efficiency.html ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html data-theme="dark">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Cost Efficiency Chart</title>
7
+ <style>
8
+ :root {
9
+ --text-color: rgba(255,255,255,0.88);
10
+ --muted-color: rgba(255,255,255,0.45);
11
+ --surface-bg: rgba(30,30,40,0.95);
12
+ --border-color: rgba(255,255,255,0.1);
13
+ --axis-color: rgba(255,255,255,0.15);
14
+ --tick-color: rgba(255,255,255,0.5);
15
+ --grid-color: rgba(255,255,255,0.06);
16
+ --primary-color: #7c6ff7;
17
+ }
18
+ * { box-sizing: border-box; margin: 0; padding: 0; }
19
+ html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
20
+ </style>
21
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
22
+ </head>
23
+ <body>
24
+ <div class="d3-cost-efficiency" data-datafiles="../data/rephrasing_metadata.json" style="width:100%;margin:10px 0;min-height:400px;"></div>
25
+ <style>
26
+ .d3-cost-efficiency { font-family: system-ui, -apple-system, sans-serif; position: relative; }
27
+ .d3-cost-efficiency .d3-tooltip {
28
+ position: absolute; top: 0; left: 0;
29
+ transform: translate(-9999px, -9999px);
30
+ pointer-events: none;
31
+ padding: 10px 14px; border-radius: 10px;
32
+ font-size: 18px; line-height: 1.4;
33
+ border: 1px solid var(--border-color);
34
+ background: var(--surface-bg); color: var(--text-color);
35
+ box-shadow: 0 6px 24px rgba(0,0,0,.22);
36
+ opacity: 0; transition: opacity .12s ease;
37
+ z-index: 20; max-width: 340px;
38
+ }
39
+ .d3-cost-efficiency .controls {
40
+ display: flex; gap: 16px; align-items: center; justify-content: flex-start; flex-wrap: wrap;
41
+ margin-top: 4px;
42
+ }
43
+ .d3-cost-efficiency .control-group {
44
+ display: flex; flex-direction: column; align-items: flex-start; gap: 4px;
45
+ }
46
+ .d3-cost-efficiency .controls label {
47
+ font-size: 18px; font-weight: 700; color: var(--text-color);
48
+ }
49
+ .d3-cost-efficiency .controls select {
50
+ font-size: 18px; padding: 6px 28px 6px 10px; border: 1px solid var(--border-color);
51
+ border-radius: 8px; background: var(--surface-bg); color: var(--text-color);
52
+ appearance: none; cursor: pointer;
53
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath d='M3 5l3 3 3-3' stroke='%23888' stroke-width='1.5' fill='none'/%3E%3C/svg%3E");
54
+ background-repeat: no-repeat; background-position: right 8px center;
55
+ }
56
+ .d3-cost-efficiency .legend {
57
+ display: flex; align-items: center; gap: 10px; margin-top: 0; margin-left: auto;
58
+ }
59
+ .d3-cost-efficiency .legend-title { font-size: 18px; font-weight: 700; color: var(--text-color); }
60
+ .d3-cost-efficiency .legend .items { display: flex; flex-wrap: wrap; gap: 6px 14px; }
61
+ .d3-cost-efficiency .legend .item {
62
+ display: inline-flex; align-items: center; gap: 6px; white-space: nowrap;
63
+ font-size: 18px; color: var(--text-color); cursor: pointer;
64
+ }
65
+ .d3-cost-efficiency .legend .swatch {
66
+ width: 14px; height: 14px; border-radius: 3px; border: 1px solid var(--border-color);
67
+ }
68
+ </style>
69
+ <script>
70
+ (() => {
71
+ const bootstrap = () => {
72
+ const container = document.querySelector('.d3-cost-efficiency');
73
+ if (!container) return;
74
+ if (container.dataset.mounted === 'true') return;
75
+ container.dataset.mounted = 'true';
76
+
77
+ let mountEl = container;
78
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) mountEl = mountEl.parentElement;
79
+ const dataAttr = mountEl?.getAttribute?.('data-datafiles');
80
+ const dataPaths = dataAttr
81
+ ? [dataAttr.includes('/') ? dataAttr : `../data/${dataAttr}`]
82
+ : ['../data/rephrasing_metadata.json', './assets/data/rephrasing_metadata.json'];
83
+
84
+ const fetchFirst = async (paths, parse) => {
85
+ for (const p of paths) {
86
+ try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return parse ? parse(await r.text()) : r.json(); } catch(_) {}
87
+ }
88
+ throw new Error('Data not found');
89
+ };
90
+
91
+ const csvPaths = ['../data/benchmark-results.csv', './assets/data/benchmark-results.csv'];
92
+
93
+ Promise.all([
94
+ fetchFirst(dataPaths),
95
+ fetchFirst(csvPaths, d3.csvParse)
96
+ ]).then(([data, csvRows]) => buildChart(data, csvRows)).catch(err => {
97
+ container.innerHTML = `<pre style="color:red;padding:12px;">Error loading data: ${err.message}</pre>`;
98
+ });
99
+
100
+ function buildChart(rawData, csvRows) {
101
+ const SOURCE_MAP = {
102
+ 'fineweb-edu-hq-20BT': 'FW-Edu HQ', 'fineweb-edu-lq-20BT': 'FW-Edu LQ',
103
+ 'dclm-37BT': 'DCLM', 'cosmopedia-25BT': 'Cosmopedia'
104
+ };
105
+ const PROMPT_LABELS = {
106
+ 'article': 'Article', 'commentary': 'Commentary', 'discussion': 'Discussion',
107
+ 'faq': 'FAQ', 'math': 'Math', 'table': 'Table', 'tutorial': 'Tutorial',
108
+ 'distill': 'Distill', 'diverse_qa_pairs': 'Diverse QA',
109
+ 'extract_knowledge': 'Extract Knowledge', 'knowledge_list': 'Knowledge List',
110
+ 'wikipedia_style_rephrasing': 'Wikipedia Style',
111
+ 'guided_rewrite_improved': 'Guided Rewrite+', 'guided_rewrite_original': 'Guided Rewrite'
112
+ };
113
+ const CAT_MAP = { 'format': 'Format', 'nemotron': 'Nemotron', 'rewire': 'REWIRE' };
114
+ const getFamily = (m) => {
115
+ const ml = m.toLowerCase();
116
+ if (ml.includes('smollm')) return 'SmolLM2';
117
+ if (ml.includes('gemma')) return 'Gemma';
118
+ if (ml.includes('qwen')) return 'Qwen';
119
+ if (ml.includes('falcon')) return 'Falcon';
120
+ if (ml.includes('granite')) return 'Granite';
121
+ if (ml.includes('llama')) return 'Llama';
122
+ return 'Other';
123
+ };
124
+ const familyColors = {
125
+ 'Gemma': '#5b9bd5', 'Qwen': '#e07b54', 'SmolLM2': '#e06b9e',
126
+ 'Falcon': '#c9a046', 'Granite': '#9a8ec2', 'Llama': '#8bc474'
127
+ };
128
+ const familyOrder = ['Gemma', 'Qwen', 'SmolLM2', 'Falcon', 'Granite', 'Llama'];
129
+
130
+ const METRICS = [
131
+ { key: 'agg_score_macro', label: 'Aggregate Score (Macro)', group: 'Aggregate' },
132
+ { key: 'agg_score_micro', label: 'Aggregate Score (Micro)', group: 'Aggregate' },
133
+ { key: 'agg_score_RC', label: 'Reading Comprehension', group: 'Aggregate' },
134
+ { key: 'agg_score_GK', label: 'General Knowledge', group: 'Aggregate' },
135
+ { key: 'agg_score_NLU', label: 'Natural Language Understanding', group: 'Aggregate' },
136
+ { key: 'agg_score_MATH', label: 'Math', group: 'Aggregate' },
137
+ { key: 'agg_score_TABLE', label: 'Table Understanding', group: 'Aggregate' },
138
+ { key: 'agg_score_RES', label: 'Reasoning', group: 'Aggregate' },
139
+ { key: 'arc_cf:easy', label: 'ARC-Easy', group: 'Individual' },
140
+ { key: 'drop', label: 'DROP', group: 'Individual' },
141
+ { key: 'gsm8k', label: 'GSM8K', group: 'Individual' },
142
+ { key: 'hellaswag_cf', label: 'HellaSwag', group: 'Individual' },
143
+ { key: 'openbookqa_cf', label: 'OpenBookQA', group: 'Individual' },
144
+ { key: 'piqa_cf', label: 'PIQA', group: 'Individual' },
145
+ { key: 'squad_v2', label: 'SQuAD v2', group: 'Individual' },
146
+ { key: 'treb_qa', label: 'TriviaQA', group: 'Individual' },
147
+ { key: 'wikitablequestions', label: 'WikiTableQuestions', group: 'Individual' },
148
+ { key: 'winogrande_cf', label: 'Winogrande', group: 'Individual' },
149
+ { key: 'xcsqa_cf', label: 'XCSQA', group: 'Individual' },
150
+ { key: 'mmlu_redux_cf:_average', label: 'MMLU Redux', group: 'Individual' }
151
+ ];
152
+ const CSV_COL = (key) => {
153
+ if (key.startsWith('agg_score_')) return key;
154
+ return `lighteval|${key}|3/prob_norm_token`;
155
+ };
156
+
157
+ const experiments = rawData.map(d => {
158
+ const [cat, promptFile] = d.prompt.split('/');
159
+ const promptKey = promptFile.replace('.md', '');
160
+ return {
161
+ run: d.run,
162
+ cat: CAT_MAP[cat] || cat,
163
+ prompt: PROMPT_LABELS[promptKey] || promptKey,
164
+ model: d.model.split('/').pop(),
165
+ source: SOURCE_MAP[d.source_dataset] || d.source_dataset,
166
+ family: getFamily(d.model),
167
+ gpuSeconds: d.gpu_time_seconds,
168
+ tpsPerGpu: d.output_tps_per_gpu,
169
+ outputTokens: d.output_tokens,
170
+ numDocs: d.num_documents,
171
+ results: d.results
172
+ };
173
+ });
174
+
175
+ const fmtGpuTime = (sec) => {
176
+ const d = sec / 86400;
177
+ if (d >= 365) { const y = Math.floor(d / 365); const mo = Math.round((d % 365) / 30); return mo ? y + 'y ' + mo + 'mo' : y + 'y'; }
178
+ if (d >= 30) { const mo = Math.floor(d / 30); const w = Math.round((d % 30) / 7); return w ? mo + 'mo ' + w + 'w' : mo + 'mo'; }
179
+ if (d >= 7) { const w = Math.floor(d / 7); const dd = Math.round(d % 7); return dd ? w + 'w ' + dd + 'd' : w + 'w'; }
180
+ return Math.round(d) + 'd';
181
+ };
182
+
183
+ const pareto = (data, metricKey) => {
184
+ const sorted = [...data].sort((a, b) => a.gpuSeconds - b.gpuSeconds);
185
+ const frontier = [];
186
+ let bestScore = -Infinity;
187
+ for (const pt of sorted) {
188
+ const score = pt.results[metricKey];
189
+ if (score == null) continue;
190
+ if (score > bestScore) {
191
+ bestScore = score;
192
+ frontier.push(pt);
193
+ }
194
+ }
195
+ return frontier;
196
+ };
197
+
198
+ const BASELINE_RUNS = {
199
+ 'dclm': { label: 'DCLM', synthetic: false },
200
+ 'fw_edu_hq': { label: 'FW-Edu HQ', synthetic: false },
201
+ 'fw_edu_lq': { label: 'FW-Edu LQ', synthetic: false },
202
+ 'ultra-fineweb': { label: 'Ultra-FineWeb', synthetic: false },
203
+ 'cosmopedia': { label: 'Cosmopedia', synthetic: true },
204
+ 'nemotron_hq_synth': { label: 'Nemotron-HQ-Synth', synthetic: true },
205
+ 'rewire': { label: 'REWIRE', synthetic: true },
206
+ 'synth_query_reasoning_answer': { label: 'SYNTH', synthetic: true }
207
+ };
208
+ const BASELINE_COLOR = '#86a1a9';
209
+ const SYNTH_BASELINE_COLOR = '#b07cc8';
210
+ const metricKeys = METRICS.map(m => m.key);
211
+ const baselines = [];
212
+ const bestStep = {};
213
+ for (const row of csvRows) {
214
+ const run = row.runname;
215
+ if (!(run in BASELINE_RUNS)) continue;
216
+ const step = +row.steps;
217
+ if (!(run in bestStep) || step > bestStep[run].step) {
218
+ const results = {};
219
+ for (const k of metricKeys) results[k] = +row[CSV_COL(k)];
220
+ bestStep[run] = { step, results };
221
+ }
222
+ }
223
+ for (const [run, info] of Object.entries(BASELINE_RUNS)) {
224
+ if (run in bestStep) baselines.push({ run, label: info.label, synthetic: info.synthetic, results: bestStep[run].results });
225
+ }
226
+
227
+ let currentMetric = METRICS[0].key;
228
+
229
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
230
+ const gGrid = svg.append('g').attr('class', 'grid');
231
+ const gPareto = svg.append('g').attr('class', 'pareto');
232
+ const gDots = svg.append('g').attr('class', 'dots');
233
+ const gBaselines = svg.append('g').attr('class', 'baselines');
234
+ const gAxes = svg.append('g').attr('class', 'axes');
235
+
236
+ let tip = container.querySelector('.d3-tooltip');
237
+ let tipInner;
238
+ if (!tip) {
239
+ tip = document.createElement('div'); tip.className = 'd3-tooltip';
240
+ tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner';
241
+ tipInner.style.textAlign = 'left';
242
+ tip.appendChild(tipInner); container.appendChild(tip);
243
+ } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
244
+
245
+ const margin = { top: 12, right: 16, bottom: 56, left: 70 };
246
+
247
+ function render() {
248
+ const width = container.clientWidth || 800;
249
+ const height = Math.min((window.innerHeight || 420) - 120, Math.max(200, Math.round(width / 3.5)));
250
+ svg.attr('width', width).attr('height', height);
251
+ const iw = width - margin.left - margin.right;
252
+ const ih = height - margin.top - margin.bottom;
253
+
254
+ const metricLabel = METRICS.find(m => m.key === currentMetric)?.label || currentMetric;
255
+
256
+ const xScale = d3.scaleLog()
257
+ .domain([5 * 86400, d3.max(experiments, d => d.gpuSeconds) * 1.2])
258
+ .range([margin.left, width - margin.right]);
259
+
260
+ const yVals = experiments.map(d => d.results[currentMetric]).filter(v => v != null)
261
+ .concat(baselines.map(d => d.results[currentMetric]).filter(v => v != null));
262
+ const yPad = (d3.max(yVals) - d3.min(yVals)) * 0.08;
263
+ const yScale = d3.scaleLinear()
264
+ .domain([d3.min(yVals) - yPad, d3.max(yVals) + yPad])
265
+ .range([height - margin.bottom, margin.top]);
266
+
267
+ const yTicks = yScale.ticks(6);
268
+ gGrid.selectAll('line').data(yTicks).join('line')
269
+ .attr('x1', margin.left).attr('x2', width - margin.right)
270
+ .attr('y1', d => yScale(d)).attr('y2', d => yScale(d))
271
+ .attr('stroke', 'var(--grid-color)').attr('stroke-width', 0.5);
272
+
273
+ gAxes.selectAll('*').remove();
274
+ const tickDays = [7, 14, 30, 60, 120, 240, 480];
275
+ const [xMin, xMax] = xScale.domain();
276
+ const tickValues = tickDays.map(d => d * 86400).filter(v => v >= xMin && v <= xMax);
277
+ const xAxis = d3.axisBottom(xScale).tickValues(tickValues).tickFormat(fmtGpuTime);
278
+ gAxes.append('g')
279
+ .attr('transform', `translate(0,${height - margin.bottom})`)
280
+ .call(xAxis)
281
+ .call(g => g.select('.domain').attr('stroke', 'var(--axis-color)'))
282
+ .call(g => g.selectAll('.tick line').attr('stroke', 'var(--tick-color)'))
283
+ .call(g => g.selectAll('.tick text').attr('fill', 'var(--tick-color)').attr('font-size', '18px'));
284
+
285
+ const yAxis = d3.axisLeft(yScale).ticks(6).tickFormat(v => { const s = v.toFixed(3); return s.replace(/0$/, ''); });
286
+ gAxes.append('g')
287
+ .attr('transform', `translate(${margin.left},0)`)
288
+ .call(yAxis)
289
+ .call(g => g.select('.domain').attr('stroke', 'var(--axis-color)'))
290
+ .call(g => g.selectAll('.tick line').attr('stroke', 'var(--tick-color)'))
291
+ .call(g => g.selectAll('.tick text').attr('fill', 'var(--tick-color)').attr('font-size', '18px'));
292
+
293
+ gAxes.append('text')
294
+ .attr('x', margin.left + iw / 2).attr('y', height - 4)
295
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)')
296
+ .attr('font-size', '18px').attr('font-weight', '600')
297
+ .text('GPU time (log scale)');
298
+
299
+ gAxes.append('text')
300
+ .attr('transform', `rotate(-90)`)
301
+ .attr('x', -(margin.top + ih / 2)).attr('y', 14)
302
+ .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)')
303
+ .attr('font-size', '18px').attr('font-weight', '600')
304
+ .text(metricLabel);
305
+
306
+ const frontierPts = pareto(experiments, currentMetric);
307
+ const lineGen = d3.line()
308
+ .x(d => xScale(d.gpuSeconds))
309
+ .y(d => yScale(d.results[currentMetric]));
310
+
311
+ const extendedFrontier = [...frontierPts];
312
+ if (frontierPts.length > 0) {
313
+ const last = frontierPts[frontierPts.length - 1];
314
+ extendedFrontier.push({ gpuSeconds: xScale.domain()[1], results: { [currentMetric]: last.results[currentMetric] } });
315
+ }
316
+
317
+ gPareto.selectAll('path').data([extendedFrontier]).join('path')
318
+ .attr('d', lineGen)
319
+ .attr('fill', 'none')
320
+ .attr('stroke', 'var(--primary-color)')
321
+ .attr('stroke-width', 2)
322
+ .attr('stroke-dasharray', '6,4')
323
+ .attr('opacity', 0.6);
324
+
325
+ const rBase = Math.max(5, Math.min(9, width * 0.008));
326
+
327
+ gDots.selectAll('circle').data(experiments, d => d.run).join('circle')
328
+ .attr('cx', d => xScale(d.gpuSeconds))
329
+ .attr('cy', d => yScale(d.results[currentMetric]))
330
+ .attr('r', rBase)
331
+ .attr('fill', d => familyColors[d.family] || '#999')
332
+ .attr('fill-opacity', 0.8)
333
+ .attr('stroke', d => familyColors[d.family] || '#999')
334
+ .attr('stroke-width', 1.5)
335
+ .attr('stroke-opacity', 0.3)
336
+ .attr('cursor', 'pointer')
337
+ .on('mouseenter', function(ev, d) {
338
+ d3.select(this).attr('r', rBase * 1.6).attr('fill-opacity', 1).attr('stroke-opacity', 0.8);
339
+ gDots.selectAll('circle').filter(c => c !== d)
340
+ .attr('fill-opacity', 0.2).attr('stroke-opacity', 0.1);
341
+ gBaselines.selectAll('circle').attr('fill-opacity', 0.12).attr('stroke-opacity', 0.2);
342
+ const score = d.results[currentMetric];
343
+ tipInner.innerHTML =
344
+ `<div style="font-weight:700;font-size:14px;margin-bottom:4px;">${d.prompt} (${d.cat})</div>` +
345
+ `<div style="font-size:12px;color:var(--muted-color);margin-bottom:6px;">` +
346
+ `<span style="display:inline-block;width:10px;height:10px;border-radius:50%;background:${familyColors[d.family]};margin-right:4px;vertical-align:middle;"></span>` +
347
+ `${d.model} · ${d.source}</div>` +
348
+ `<div style="display:grid;grid-template-columns:auto 1fr;gap:2px 10px;font-size:13px;">` +
349
+ `<span style="color:var(--muted-color);">GPU time</span><span>${fmtGpuTime(d.gpuSeconds)}</span>` +
350
+ `<span style="color:var(--muted-color);">TPS/GPU</span><span>${d.tpsPerGpu.toLocaleString()}</span>` +
351
+ `<span style="color:var(--muted-color);">Output tokens</span><span>${(d.outputTokens / 1e9).toFixed(1)}B</span>` +
352
+ `<span style="color:var(--muted-color);">Documents</span><span>${(d.numDocs / 1e6).toFixed(1)}M</span>` +
353
+ `<span style="color:var(--muted-color);">${metricLabel}</span><span style="font-weight:700;">${score != null ? score.toFixed(4) : 'N/A'}</span>` +
354
+ `</div>`;
355
+ tip.style.opacity = '1';
356
+ })
357
+ .on('mousemove', (ev) => {
358
+ const [mx, my] = d3.pointer(ev, container);
359
+ const bw = tip.offsetWidth || 280;
360
+ const bh = tip.offsetHeight || 160;
361
+ const ox = (mx + bw + 20 > width) ? -(bw + 12) : 14;
362
+ const oy = (my + bh + 20 > (height + 60)) ? -(bh + 12) : 14;
363
+ tip.style.transform = `translate(${Math.round(mx + ox)}px,${Math.round(my + oy)}px)`;
364
+ })
365
+ .on('mouseleave', function() {
366
+ gDots.selectAll('circle').attr('r', rBase).attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
367
+ gBaselines.selectAll('circle').attr('r', rBase * 1.1).attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
368
+ tip.style.opacity = '0';
369
+ tip.style.transform = 'translate(-9999px,-9999px)';
370
+ });
371
+
372
+ const bx = margin.left + rBase + 10;
373
+ const bColor = d => d.synthetic ? SYNTH_BASELINE_COLOR : BASELINE_COLOR;
374
+ gBaselines.selectAll('circle').data(baselines, d => d.run).join('circle')
375
+ .attr('cx', bx)
376
+ .attr('cy', d => yScale(d.results[currentMetric]))
377
+ .attr('r', rBase * 1.1)
378
+ .attr('fill', bColor)
379
+ .attr('fill-opacity', 0.35)
380
+ .attr('stroke', bColor)
381
+ .attr('stroke-width', 2)
382
+ .attr('stroke-opacity', 0.6)
383
+ .attr('cursor', 'pointer')
384
+ .on('mouseenter', function(ev, d) {
385
+ d3.select(this).attr('r', rBase * 1.8).attr('fill-opacity', 0.6).attr('stroke-opacity', 1);
386
+ gDots.selectAll('circle').attr('fill-opacity', 0.15).attr('stroke-opacity', 0.08);
387
+ const score = d.results[currentMetric];
388
+ const tag = d.synthetic ? 'synthetic baseline' : 'baseline';
389
+ tipInner.innerHTML =
390
+ `<div style="font-weight:700;font-size:14px;margin-bottom:4px;">${d.label} <span style="font-weight:400;font-size:12px;color:var(--muted-color);">(${tag})</span></div>` +
391
+ `<div style="display:grid;grid-template-columns:auto 1fr;gap:2px 10px;font-size:13px;">` +
392
+ `<span style="color:var(--muted-color);">GPU time</span><span>0 (no rephrasing)</span>` +
393
+ `<span style="color:var(--muted-color);">${metricLabel}</span><span style="font-weight:700;">${score != null ? score.toFixed(4) : 'N/A'}</span>` +
394
+ `</div>`;
395
+ tip.style.opacity = '1';
396
+ })
397
+ .on('mousemove', (ev) => {
398
+ const [mx, my] = d3.pointer(ev, container);
399
+ const bw = tip.offsetWidth || 280;
400
+ const bh = tip.offsetHeight || 100;
401
+ const ox = (mx + bw + 20 > width) ? -(bw + 12) : 14;
402
+ const oy = (my + bh + 20 > (height + 60)) ? -(bh + 12) : 14;
403
+ tip.style.transform = `translate(${Math.round(mx + ox)}px,${Math.round(my + oy)}px)`;
404
+ })
405
+ .on('mouseleave', function() {
406
+ gBaselines.selectAll('circle').attr('r', rBase * 1.1).attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
407
+ gDots.selectAll('circle').attr('r', rBase).attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
408
+ tip.style.opacity = '0';
409
+ tip.style.transform = 'translate(-9999px,-9999px)';
410
+ });
411
+ }
412
+
413
+ const controls = document.createElement('div'); controls.className = 'controls';
414
+ const cg = document.createElement('div'); cg.className = 'control-group';
415
+ const lbl = document.createElement('label'); lbl.textContent = 'Metric'; lbl.setAttribute('for', 'ce-metric-select');
416
+ const sel = document.createElement('select'); sel.id = 'ce-metric-select';
417
+ const groups = {};
418
+ METRICS.forEach(m => { (groups[m.group] = groups[m.group] || []).push(m); });
419
+ for (const [gName, gMetrics] of Object.entries(groups)) {
420
+ const og = document.createElement('optgroup'); og.label = gName;
421
+ gMetrics.forEach(m => { const o = document.createElement('option'); o.value = m.key; o.textContent = m.label; og.appendChild(o); });
422
+ sel.appendChild(og);
423
+ }
424
+ sel.value = currentMetric;
425
+ sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
426
+ cg.appendChild(lbl); cg.appendChild(sel); controls.appendChild(cg);
427
+
428
+ const legend = document.createElement('div'); legend.className = 'legend';
429
+ const ltitle = document.createElement('div'); ltitle.className = 'legend-title'; ltitle.textContent = 'Legend';
430
+ const items = document.createElement('div'); items.className = 'items';
431
+ familyOrder.forEach(fam => {
432
+ const el = document.createElement('span'); el.className = 'item';
433
+ const sw = document.createElement('span'); sw.className = 'swatch'; sw.style.background = familyColors[fam];
434
+ const txt = document.createElement('span'); txt.textContent = fam;
435
+ el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
436
+ el.addEventListener('mouseenter', () => {
437
+ gDots.selectAll('circle').attr('fill-opacity', d => d.family === fam ? 0.9 : 0.1)
438
+ .attr('stroke-opacity', d => d.family === fam ? 0.6 : 0.05);
439
+ gBaselines.selectAll('circle').attr('fill-opacity', 0.12).attr('stroke-opacity', 0.2);
440
+ });
441
+ el.addEventListener('mouseleave', () => {
442
+ gDots.selectAll('circle').attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
443
+ gBaselines.selectAll('circle').attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
444
+ });
445
+ });
446
+ legend.appendChild(ltitle); legend.appendChild(items); controls.appendChild(legend); container.appendChild(controls);
447
+
448
+ [[false, BASELINE_COLOR, 'Baselines'],
449
+ [true, SYNTH_BASELINE_COLOR, 'Synthetic baselines']].forEach(([isSynth, c, text]) => {
450
+ const el = document.createElement('span'); el.className = 'item';
451
+ el.innerHTML = `<svg width="14" height="14" style="vertical-align:middle;"><circle cx="7" cy="7" r="6" fill="${c}" fill-opacity="0.35" stroke="${c}" stroke-width="2" stroke-opacity="0.6"/></svg><span>${text}</span>`;
452
+ items.appendChild(el);
453
+ el.addEventListener('mouseenter', () => {
454
+ gBaselines.selectAll('circle')
455
+ .attr('fill-opacity', d => d.synthetic === isSynth ? 0.6 : 0.1)
456
+ .attr('stroke-opacity', d => d.synthetic === isSynth ? 1 : 0.15);
457
+ gDots.selectAll('circle').attr('fill-opacity', 0.15).attr('stroke-opacity', 0.08);
458
+ });
459
+ el.addEventListener('mouseleave', () => {
460
+ gBaselines.selectAll('circle').attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
461
+ gDots.selectAll('circle').attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
462
+ });
463
+ });
464
+
465
+ const paretoItem = document.createElement('span'); paretoItem.className = 'item';
466
+ paretoItem.innerHTML = `<svg width="20" height="14" style="vertical-align:middle;"><line x1="0" y1="7" x2="20" y2="7" stroke="var(--primary-color)" stroke-width="2" stroke-dasharray="4,3" opacity="0.6"/></svg><span>Pareto frontier</span>`;
467
+ items.appendChild(paretoItem);
468
+
469
+ render();
470
+ if (window.ResizeObserver) new ResizeObserver(() => render()).observe(container);
471
+ else window.addEventListener('resize', render);
472
+ }
473
+ };
474
+
475
+ if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
476
+ else bootstrap();
477
+ })();
478
+ </script>
479
+ </body>
480
+ </html>
app/presentation/se2026/charts/experiment-flow.html ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html data-theme="dark" lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Experiment Flow</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/d3-sankey@0.12.3/dist/d3-sankey.min.js"></script>
9
+ <style>
10
+ * { box-sizing: border-box; margin: 0; padding: 0; }
11
+ html, body { width: 100%; height: 100%; background: transparent; overflow: visible; }
12
+ :root {
13
+ --text-color: rgba(255,255,255,0.88);
14
+ --muted-color: rgba(255,255,255,0.45);
15
+ --surface-bg: rgba(30,30,40,0.95);
16
+ --border-color: rgba(255,255,255,0.1);
17
+ --primary-color: #7c6ff7;
18
+ }
19
+ .d3-experiment-overview { position: relative; font-family: system-ui, -apple-system, sans-serif; }
20
+ </style>
21
+ </head>
22
+ <body>
23
+ <div class="d3-experiment-overview" data-datafiles="../data/rephrasing_metadata.json" style="width:100%;height:100%;min-height:300px;"></div>
24
+ <script>
25
+ (() => {
26
+ const ensureD3 = (cb) => {
27
+ if (window.d3 && typeof window.d3.select === 'function' && typeof window.d3.sankey === 'function') return cb();
28
+ const loadSankey = () => {
29
+ if (typeof window.d3.sankey === 'function') return cb();
30
+ let s2 = document.getElementById('d3-sankey-cdn');
31
+ if (!s2) {
32
+ s2 = document.createElement('script');
33
+ s2.id = 'd3-sankey-cdn';
34
+ s2.src = 'https://cdn.jsdelivr.net/npm/d3-sankey@0.12.3/dist/d3-sankey.min.js';
35
+ document.head.appendChild(s2);
36
+ }
37
+ s2.addEventListener('load', cb, { once: true });
38
+ };
39
+ let s = document.getElementById('d3-cdn-script');
40
+ if (!s) {
41
+ s = document.createElement('script');
42
+ s.id = 'd3-cdn-script';
43
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
44
+ document.head.appendChild(s);
45
+ }
46
+ if (window.d3 && typeof window.d3.select === 'function') { loadSankey(); return; }
47
+ s.addEventListener('load', loadSankey, { once: true });
48
+ };
49
+
50
+ const bootstrap = () => {
51
+ const scriptEl = document.currentScript;
52
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
53
+ if (!(container && container.classList && container.classList.contains('d3-experiment-overview'))) {
54
+ const cs = Array.from(document.querySelectorAll('.d3-experiment-overview'))
55
+ .filter(el => !(el.dataset && el.dataset.mounted === 'true'));
56
+ container = cs[cs.length - 1] || null;
57
+ }
58
+ if (!container) return;
59
+ if (container.dataset) {
60
+ if (container.dataset.mounted === 'true') return;
61
+ container.dataset.mounted = 'true';
62
+ }
63
+
64
+ // Read data path from HtmlEmbed attribute
65
+ let mountEl = container;
66
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) mountEl = mountEl.parentElement;
67
+ const dataAttr = mountEl?.getAttribute?.('data-datafiles');
68
+ const dataPaths = dataAttr
69
+ ? [dataAttr.includes('/') ? dataAttr : `/data/${dataAttr}`]
70
+ : ['../data/rephrasing_metadata.json'];
71
+
72
+ const fetchFirst = async (paths) => {
73
+ for (const p of paths) {
74
+ try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return r.json(); } catch(_) {}
75
+ }
76
+ throw new Error('Data not found');
77
+ };
78
+
79
+ fetchFirst(dataPaths).then(data => buildChart(data)).catch(err => {
80
+ container.innerHTML = `<pre style="color:red;padding:12px;">Error loading data: ${err.message}</pre>`;
81
+ });
82
+
83
+ function buildChart(rawData) {
84
+ // Map source dataset strings to display names
85
+ const sourceMap = {
86
+ 'fineweb-edu-hq-20BT': 'FW-Edu HQ',
87
+ 'fineweb-edu-lq-20BT': 'FW-Edu LQ',
88
+ 'dclm-37BT': 'DCLM',
89
+ 'cosmopedia-25BT': 'Cosmopedia',
90
+ };
91
+
92
+ // Map prompt paths to display names and categories
93
+ const promptMap = {
94
+ 'format/tutorial.md': { name: 'Tutorial', cat: 'Format' },
95
+ 'format/faq.md': { name: 'FAQ', cat: 'Format' },
96
+ 'format/math.md': { name: 'Math', cat: 'Format' },
97
+ 'format/table.md': { name: 'Table', cat: 'Format' },
98
+ 'format/commentary.md': { name: 'Commentary', cat: 'Format' },
99
+ 'format/discussion.md': { name: 'Discussion', cat: 'Format' },
100
+ 'format/article.md': { name: 'Article', cat: 'Format' },
101
+ 'nemotron/diverse_qa_pairs.md': { name: 'Diverse QA', cat: 'Nemotron' },
102
+ 'nemotron/knowledge_list.md': { name: 'Knowledge List', cat: 'Nemotron' },
103
+ 'nemotron/wikipedia_style_rephrasing.md': { name: 'Wikipedia Style', cat: 'Nemotron' },
104
+ 'nemotron/extract_knowledge.md': { name: 'Extract Knowledge', cat: 'Nemotron' },
105
+ 'nemotron/distill.md': { name: 'Distill', cat: 'Nemotron' },
106
+ 'rewire/guided_rewrite_original.md': { name: 'Guided Rewrite', cat: 'REWIRE' },
107
+ 'rewire/guided_rewrite_improved.md': { name: 'Guided Rewrite+', cat: 'REWIRE' },
108
+ };
109
+
110
+ // Map model IDs to family names
111
+ const modelFamilyMap = (modelId) => {
112
+ if (modelId.includes('gemma')) return 'Gemma';
113
+ if (modelId.includes('Qwen') || modelId.includes('qwen')) return 'Qwen';
114
+ if (modelId.includes('Falcon') || modelId.includes('falcon')) return 'Falcon';
115
+ if (modelId.includes('granite') || modelId.includes('Granite')) return 'Granite';
116
+ if (modelId.includes('Llama') || modelId.includes('llama')) return 'Llama';
117
+ if (modelId.includes('SmolLM') || modelId.includes('smollm')) return 'SmolLM2';
118
+ return modelId;
119
+ };
120
+
121
+ // Build link counts from data
122
+ const linkCounts = {};
123
+ const key = (a, b) => `${a}|||${b}`;
124
+
125
+ rawData.forEach(exp => {
126
+ const src = sourceMap[exp.source_dataset];
127
+ const promptInfo = promptMap[exp.prompt];
128
+ const family = modelFamilyMap(exp.model);
129
+ if (!src || !promptInfo) return;
130
+
131
+ const spKey = key(src, promptInfo.name);
132
+ linkCounts[spKey] = (linkCounts[spKey] || 0) + 1;
133
+
134
+ const pmKey = key(promptInfo.name, family);
135
+ linkCounts[pmKey] = (linkCounts[pmKey] || 0) + 1;
136
+ });
137
+
138
+ // Collect unique names in order
139
+ const sources = [...new Set(rawData.map(e => sourceMap[e.source_dataset]).filter(Boolean))];
140
+ const prompts = [...new Set(rawData.map(e => promptMap[e.prompt]?.name).filter(Boolean))];
141
+ const models = [...new Set(rawData.map(e => modelFamilyMap(e.model)).filter(Boolean))];
142
+
143
+ // Build node list
144
+ const nodes = [];
145
+ sources.forEach(name => nodes.push({ name, col: 'source' }));
146
+ prompts.forEach(name => {
147
+ const info = Object.values(promptMap).find(p => p.name === name);
148
+ nodes.push({ name, col: 'prompt', cat: info?.cat || 'Other' });
149
+ });
150
+ models.forEach(name => nodes.push({ name, col: 'model' }));
151
+
152
+ const ni = (name) => nodes.findIndex(n => n.name === name);
153
+
154
+ // Build links
155
+ const links = [];
156
+ Object.entries(linkCounts).forEach(([k, value]) => {
157
+ const [from, to] = k.split('|||');
158
+ const s = ni(from), t = ni(to);
159
+ if (s >= 0 && t >= 0) links.push({ source: s, target: t, value });
160
+ });
161
+
162
+ // Colors
163
+ const sourceColors = { 'FW-Edu HQ': '#6B8DB5', 'FW-Edu LQ': '#B58B9B', 'DCLM': '#7B82C8', 'Cosmopedia': '#8BA878' };
164
+ const catColors = { 'Format': '#4EA5B7', 'Nemotron': '#76b900', 'REWIRE': '#1877F2' };
165
+ const familyColors = { 'Gemma': '#5b9bd5', 'Qwen': '#e07b54', 'SmolLM2': '#e06b9e', 'Falcon': '#c9a046', 'Granite': '#9a8ec2', 'Llama': '#8bc474' };
166
+
167
+ const nodeColor = (d) => {
168
+ if (d.col === 'source') return sourceColors[d.name] || '#888';
169
+ if (d.col === 'prompt') return catColors[d.cat] || '#888';
170
+ if (d.col === 'model') return familyColors[d.name] || '#888';
171
+ return '#888';
172
+ };
173
+
174
+ // SVG
175
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
176
+
177
+ const render = () => {
178
+ const width = container.clientWidth || 800;
179
+ const iframeH = window.innerHeight || 540;
180
+ const height = Math.min(iframeH, Math.max(400, width * 9 / 16));
181
+ svg.attr('width', width).attr('height', height);
182
+ svg.selectAll('*').remove();
183
+
184
+ const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
185
+ const textColor = isDark ? 'rgba(255,255,255,0.78)' : 'rgba(0,0,0,0.68)';
186
+ const mutedText = isDark ? 'rgba(255,255,255,0.35)' : 'rgba(0,0,0,0.30)';
187
+ const linkOpacity = isDark ? 0.20 : 0.35;
188
+ const linkHoverOpacity = isDark ? 0.50 : 0.65;
189
+ const fontSize = Math.max(9, Math.min(12, width / 80));
190
+
191
+ const ml = width * 0.005, mr = width * 0.01;
192
+ const mt = 28, mb = height * 0.01;
193
+
194
+ const sankeyGen = d3.sankey()
195
+ .nodeId(d => d.index)
196
+ .nodeWidth(Math.max(8, width * 0.012))
197
+ .nodePadding(Math.max(4, height * 0.014))
198
+ .nodeSort(null)
199
+ .extent([[ml, mt], [width - mr, height - mb]]);
200
+
201
+ const graph = sankeyGen({
202
+ nodes: nodes.map((d, i) => ({ ...d, index: i })),
203
+ links: links.map(d => ({ ...d }))
204
+ });
205
+
206
+ // Column headers
207
+ const modelNodes = graph.nodes.filter(n => n.col === 'model');
208
+ const colLabels = [
209
+ { text: 'Source Dataset', x: graph.nodes.filter(n => n.col === 'source')[0]?.x0 || ml, anchor: 'start' },
210
+ { text: 'Prompt Strategy', x: graph.nodes.filter(n => n.col === 'prompt')[0]?.x1 || width * 0.35, anchor: 'end' },
211
+ { text: 'Model Family', x: (modelNodes[0]?.x1 || width * 0.75), anchor: 'end' },
212
+ ];
213
+ svg.selectAll('text.col-header')
214
+ .data(colLabels).join('text')
215
+ .attr('class', 'col-header')
216
+ .attr('x', d => d.x).attr('y', mt - 8)
217
+ .attr('text-anchor', d => d.anchor)
218
+ .attr('fill', mutedText)
219
+ .attr('font-size', (fontSize * 1.4) + 'px')
220
+ .attr('font-weight', '700')
221
+ .attr('font-family', 'system-ui, -apple-system, sans-serif')
222
+ .attr('letter-spacing', '0.5px')
223
+ .attr('text-transform', 'uppercase')
224
+ .text(d => d.text);
225
+
226
+ // Category brackets for prompts
227
+ const catGroups = {};
228
+ graph.nodes.filter(n => n.col === 'prompt').forEach(n => {
229
+ if (!catGroups[n.cat]) catGroups[n.cat] = { min: Infinity, max: -Infinity };
230
+ catGroups[n.cat].min = Math.min(catGroups[n.cat].min, n.y0);
231
+ catGroups[n.cat].max = Math.max(catGroups[n.cat].max, n.y1);
232
+ });
233
+ const bracketX = (graph.nodes.find(n => n.col === 'prompt')?.x1 || 0) + 5;
234
+ Object.entries(catGroups).forEach(([cat, { min: y0, max: y1 }]) => {
235
+ const midY = (y0 + y1) / 2;
236
+ svg.append('line')
237
+ .attr('x1', bracketX).attr('x2', bracketX)
238
+ .attr('y1', y0 + 2).attr('y2', y1 - 2)
239
+ .attr('stroke', catColors[cat]).attr('stroke-width', 1.5)
240
+ .attr('stroke-opacity', 0.35).attr('stroke-linecap', 'round');
241
+ svg.append('text')
242
+ .attr('x', bracketX + 4).attr('y', midY)
243
+ .attr('dominant-baseline', 'central')
244
+ .attr('fill', catColors[cat]).attr('fill-opacity', 0.45)
245
+ .attr('font-size', (fontSize * 1.3) + 'px')
246
+ .attr('font-weight', '600')
247
+ .attr('font-family', 'system-ui, -apple-system, sans-serif')
248
+ .attr('letter-spacing', '0.3px')
249
+ .text(cat);
250
+ });
251
+
252
+ // Links
253
+ const gLinks = svg.append('g').attr('class', 'links');
254
+ const linkPath = d3.sankeyLinkHorizontal();
255
+ const linkEls = gLinks.selectAll('path')
256
+ .data(graph.links).join('path')
257
+ .attr('d', linkPath)
258
+ .attr('fill', 'none')
259
+ .attr('stroke', d => nodeColor(d.source))
260
+ .attr('stroke-width', d => Math.max(1, d.width))
261
+ .attr('stroke-opacity', linkOpacity)
262
+ .style('mix-blend-mode', isDark ? 'screen' : 'multiply');
263
+
264
+ // Nodes
265
+ const gNodes = svg.append('g').attr('class', 'nodes');
266
+ const nodeEls = gNodes.selectAll('rect')
267
+ .data(graph.nodes).join('rect')
268
+ .attr('x', d => d.x0).attr('y', d => d.y0)
269
+ .attr('width', d => d.x1 - d.x0)
270
+ .attr('height', d => Math.max(1, d.y1 - d.y0))
271
+ .attr('fill', d => nodeColor(d))
272
+ .attr('fill-opacity', 0.85).attr('rx', 2)
273
+ .attr('stroke', d => nodeColor(d))
274
+ .attr('stroke-width', 0.5).attr('stroke-opacity', 0.3);
275
+
276
+ // Node labels (interactive, same hover as node rects)
277
+ const gLabels = svg.append('g').attr('class', 'labels');
278
+ graph.nodes.forEach(d => {
279
+ const midY = (d.y0 + d.y1) / 2;
280
+ const isSource = d.col === 'source';
281
+ let labelX, anchor;
282
+ if (isSource) { labelX = d.x1 + 5; anchor = 'start'; }
283
+ else { labelX = d.x0 - 5; anchor = 'end'; }
284
+
285
+ const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0);
286
+ const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0);
287
+ const total = Math.max(totalIn, totalOut);
288
+
289
+ gLabels.append('text')
290
+ .datum(d)
291
+ .attr('class', 'node-label')
292
+ .attr('x', labelX).attr('y', midY - (total > 1 ? fontSize * 0.3 : 0))
293
+ .attr('text-anchor', anchor).attr('dominant-baseline', 'central')
294
+ .attr('fill', textColor)
295
+ .attr('font-size', fontSize + 'px').attr('font-weight', '600')
296
+ .attr('font-family', 'system-ui, -apple-system, sans-serif')
297
+ .style('cursor', 'pointer')
298
+ .text(d.name);
299
+
300
+ if (total > 1) {
301
+ gLabels.append('text')
302
+ .datum(d)
303
+ .attr('class', 'node-label')
304
+ .attr('x', labelX).attr('y', midY + fontSize * 0.55)
305
+ .attr('text-anchor', anchor).attr('dominant-baseline', 'central')
306
+ .attr('fill', mutedText)
307
+ .attr('font-size', (fontSize * 0.8) + 'px')
308
+ .attr('font-family', 'system-ui, -apple-system, sans-serif')
309
+ .style('cursor', 'pointer')
310
+ .text(total + ' exp.');
311
+ }
312
+ });
313
+
314
+ // Tooltip
315
+ container.style.position = container.style.position || 'relative';
316
+ let tip = container.querySelector('.d3-tooltip');
317
+ let tipInner;
318
+ if (!tip) {
319
+ tip = document.createElement('div');
320
+ tip.className = 'd3-tooltip';
321
+ Object.assign(tip.style, {
322
+ position: 'absolute', top: '0px', left: '0px',
323
+ transform: 'translate(-9999px, -9999px)',
324
+ pointerEvents: 'none', padding: '8px 12px', borderRadius: '10px',
325
+ fontSize: '12px', lineHeight: '1.4',
326
+ border: '1px solid var(--border-color)',
327
+ background: 'var(--surface-bg)', color: 'var(--text-color)',
328
+ boxShadow: '0 6px 24px rgba(0,0,0,.25)',
329
+ opacity: '0', transition: 'opacity .12s ease',
330
+ backdropFilter: 'saturate(1.12) blur(8px)',
331
+ zIndex: '20', maxWidth: '280px'
332
+ });
333
+ tipInner = document.createElement('div');
334
+ tipInner.className = 'd3-tooltip__inner';
335
+ tip.appendChild(tipInner);
336
+ container.appendChild(tip);
337
+ } else {
338
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
339
+ }
340
+
341
+ const positionTip = (ev) => {
342
+ const [mx, my] = d3.pointer(ev, container);
343
+ const bw = tip.offsetWidth || 220, bh = tip.offsetHeight || 60;
344
+ const ox = (mx + bw + 20 > width) ? -(bw + 12) : 12;
345
+ const oy = (my + bh + 20 > height) ? -(bh + 12) : 14;
346
+ tip.style.transform = `translate(${Math.round(mx + ox)}px, ${Math.round(my + oy)}px)`;
347
+ };
348
+ const showTip = (ev, html) => { tipInner.innerHTML = html; tip.style.opacity = '1'; positionTip(ev); };
349
+ const hideTip = () => { tip.style.opacity = '0'; tip.style.transform = 'translate(-9999px, -9999px)'; };
350
+
351
+ // Interaction
352
+ linkEls
353
+ .on('mouseenter', function (ev, d) {
354
+ linkEls.attr('stroke-opacity', l => l === d ? linkHoverOpacity * 1.5 : linkOpacity * 0.3);
355
+ showTip(ev, `<b>${d.source.name}</b> \u2192 <b>${d.target.name}</b><br/><span style="color:var(--muted-color);">${d.value} experiment${d.value > 1 ? 's' : ''}</span>`);
356
+ })
357
+ .on('mousemove', positionTip)
358
+ .on('mouseleave', function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); });
359
+
360
+ // Shared node hover handlers (used by both rects and labels)
361
+ const onNodeEnter = function (ev, d) {
362
+ const connected = new Set();
363
+ (d.sourceLinks || []).forEach(l => connected.add(l.index));
364
+ (d.targetLinks || []).forEach(l => connected.add(l.index));
365
+ linkEls.attr('stroke-opacity', l => connected.has(l.index) ? linkHoverOpacity : linkOpacity * 0.15);
366
+ const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0);
367
+ const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0);
368
+ const total = Math.max(totalIn, totalOut);
369
+ let info = `<b style="font-size:14px;">${d.name}</b>`;
370
+ if (d.cat) info += ` <span style="color:${catColors[d.cat]};font-size:12px;">(${d.cat})</span>`;
371
+ info += `<br/><span style="color:var(--muted-color);">${total} experiment${total > 1 ? 's' : ''}</span>`;
372
+ showTip(ev, info);
373
+ };
374
+ const onNodeLeave = function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); };
375
+
376
+ nodeEls.style('cursor', 'pointer')
377
+ .on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave);
378
+
379
+ gLabels.selectAll('.node-label')
380
+ .on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave);
381
+ };
382
+
383
+ if (window.ResizeObserver) new ResizeObserver(() => render()).observe(container);
384
+ else window.addEventListener('resize', render);
385
+ new MutationObserver(() => render()).observe(document.documentElement, { attributes: true, attributeFilter: ['data-theme'] });
386
+ render();
387
+ }
388
+ };
389
+
390
+ if (document.readyState === 'loading') {
391
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
392
+ } else { ensureD3(bootstrap); }
393
+ })();
394
+ </script>
395
+ </body>
396
+ </html>
app/presentation/se2026/charts/pipeline.html ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>DataTrove Pipeline</title>
7
+ <style>
8
+ :root {
9
+ --text-color: rgba(255,255,255,0.88);
10
+ --muted-color: rgba(255,255,255,0.45);
11
+ --surface-bg: rgba(30,30,40,0.95);
12
+ --border-color: rgba(255,255,255,0.1);
13
+ --primary-color: #7c6ff7;
14
+ }
15
+ * { box-sizing: border-box; margin: 0; padding: 0; }
16
+ html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
17
+ </style>
18
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
19
+ </head>
20
+ <body>
21
+ <div class="d3-pipeline"></div>
22
+ <style>
23
+ .d3-pipeline {
24
+ position: relative;
25
+ width: 100%;
26
+ margin: 0;
27
+ container-type: inline-size;
28
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
29
+ }
30
+ .d3-pipeline .node-group { cursor: default; }
31
+ .d3-pipeline .node-card { transition: filter .15s ease; }
32
+ .d3-pipeline .node-group:hover .node-card { filter: brightness(1.05); }
33
+ .d3-pipeline .node-title { font-weight: 700; fill: var(--text-color); }
34
+ .d3-pipeline .node-subtitle { fill: var(--muted-color); }
35
+ .d3-pipeline .group-label { font-weight: 700; fill: var(--muted-color); letter-spacing: 0.02em; }
36
+ .d3-pipeline .edge-path { fill: none; stroke-linecap: round; }
37
+ .d3-pipeline .d3-tooltip {
38
+ position: absolute; top: 0; left: 0;
39
+ transform: translate(-9999px, -9999px);
40
+ pointer-events: none; padding: 8px 12px; border-radius: 8px;
41
+ font-size: 12px; line-height: 1.4;
42
+ border: 1px solid var(--border-color); background: var(--surface-bg);
43
+ color: var(--text-color); box-shadow: 0 4px 20px rgba(0,0,0,.15);
44
+ opacity: 0; transition: opacity .12s ease; max-width: 260px; z-index: 100;
45
+ }
46
+ .d3-pipeline .d3-tooltip strong { display: block; margin-bottom: 2px; font-size: 13px; }
47
+ </style>
48
+ <script>
49
+ (() => {
50
+ const ensureD3 = (cb) => {
51
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
52
+ let s = document.getElementById('d3-cdn-script');
53
+ if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
54
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
55
+ s.addEventListener('load', onReady, { once: true });
56
+ if (window.d3) onReady();
57
+ };
58
+
59
+ const bootstrap = () => {
60
+ const scriptEl = document.currentScript;
61
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
62
+ if (!(container && container.classList && container.classList.contains('d3-pipeline'))) {
63
+ const cs = Array.from(document.querySelectorAll('.d3-pipeline')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
64
+ container = cs[cs.length - 1] || null;
65
+ }
66
+ if (!container) return;
67
+ if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
68
+ container.style.position = container.style.position || 'relative';
69
+
70
+ const tip = document.createElement('div');
71
+ tip.className = 'd3-tooltip';
72
+ const tipInner = document.createElement('div');
73
+ tip.appendChild(tipInner);
74
+ container.appendChild(tip);
75
+
76
+ function showTip(ev, html) {
77
+ tipInner.innerHTML = html;
78
+ tip.style.opacity = '1';
79
+ const r = container.getBoundingClientRect();
80
+ const x = ev.clientX - r.left + 14, y = ev.clientY - r.top - 10;
81
+ tip.style.transform = `translate(${x}px, ${y}px)`;
82
+ }
83
+ function hideTip() { tip.style.opacity = '0'; tip.style.transform = 'translate(-9999px,-9999px)'; }
84
+
85
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
86
+ const defs = svg.append('defs');
87
+ defs.append('marker').attr('id', 'pl-arrow').attr('viewBox', '0 0 10 8')
88
+ .attr('refX', 9).attr('refY', 4).attr('markerWidth', 7).attr('markerHeight', 5.5)
89
+ .attr('orient', 'auto').append('path').attr('d', 'M0,1 L8,4 L0,7 Z');
90
+
91
+ const gRoot = svg.append('g');
92
+ const gGroups = gRoot.append('g');
93
+ const gEdges = gRoot.append('g');
94
+ const gNodes = gRoot.append('g');
95
+
96
+ const nodes = [
97
+ { id: 'hf_in', label: 'HF Hub Dataset', sub: '', group: 'input', tip: 'Source dataset from the Hugging Face Hub. Any split or config.' },
98
+ { id: 'read', label: 'Read', sub: 'HuggingFaceDatasetReader', group: 'pipeline', tip: 'Reads documents from the Hub and streams them into the pipeline.' },
99
+ { id: 'transform', label: 'Transform', sub: 'InferenceRunner', group: 'pipeline', tip: 'Orchestrates LLM inference: batching, retries, metric logging.' },
100
+ { id: 'write', label: 'Write', sub: 'ParquetWriter', group: 'pipeline', tip: 'Writes generated outputs as Parquet files with checkpointing.' },
101
+ { id: 'local', label: 'Local', sub: 'single node, multi-GPU', group: 'execution', tip: 'Run on a single machine with multiple workers for development.' },
102
+ { id: 'slurm', label: 'Slurm Cluster', sub: 'multi-node, auto-scaling', group: 'execution', tip: 'Distribute across nodes for large-scale production workloads.' },
103
+ { id: 'rollout', label: 'Custom Rollout', sub: 'async callable', group: 'inference', tip: 'Your rollout function: orchestrates one or many generate() calls.' },
104
+ { id: 'vllm', label: 'vLLM / SGLang', sub: 'Server', group: 'inference', tip: 'High-throughput inference engine with prefix caching and batching.' },
105
+ { id: 'hf_out', label: 'HF Hub Dataset', sub: '', group: 'output', tip: 'Generated dataset uploaded continuously to the Hugging Face Hub.' },
106
+ { id: 'card', label: 'Dataset Card', sub: '+ Metrics', group: 'output', tip: 'Auto-generated dataset card with throughput stats.' },
107
+ { id: 'monitor', label: 'Progress Monitor', sub: '', group: 'output', tip: 'Live progress bar and ETA on the dataset card during inference.' },
108
+ ];
109
+
110
+ const groups = [
111
+ { id: 'input', label: 'Input', icon: '📥' },
112
+ { id: 'pipeline', label: 'DataTrove Pipeline', icon: '⚙️' },
113
+ { id: 'execution', label: 'Execution Mode', icon: '🖥️' },
114
+ { id: 'inference', label: 'Inference Engine', icon: '🚀' },
115
+ { id: 'output', label: 'Output', icon: '📤' },
116
+ ];
117
+
118
+ const edges = [
119
+ { from: 'hf_in', to: 'read' },
120
+ { from: 'read', to: 'transform' },
121
+ { from: 'transform', to: 'write' },
122
+ { from: 'transform', to: 'rollout' },
123
+ { from: 'rollout', to: 'vllm' },
124
+ { from: 'write', to: 'hf_out' },
125
+ { from: 'write', to: 'card' },
126
+ { from: 'write', to: 'monitor' },
127
+ ];
128
+
129
+ function isDark() { return document.documentElement.getAttribute('data-theme') === 'dark'; }
130
+
131
+ function colors() {
132
+ const dk = isDark();
133
+ const primary = window.ColorPalettes ? window.ColorPalettes.getPrimary() : (dk ? '#7c6ff7' : '#6366f1');
134
+ return {
135
+ nodeBg: dk ? 'rgba(255,255,255,0.055)' : 'rgba(255,255,255,0.92)',
136
+ nodeBd: dk ? 'rgba(255,255,255,0.10)' : 'rgba(0,0,0,0.09)',
137
+ groupBg: dk ? 'rgba(255,255,255,0.025)' : 'rgba(0,0,0,0.022)',
138
+ groupBd: dk ? 'rgba(255,255,255,0.07)' : 'rgba(0,0,0,0.055)',
139
+ pipeBg: dk ? 'rgba(99,102,241,0.055)' : 'rgba(99,102,241,0.04)',
140
+ pipeBd: dk ? 'rgba(99,102,241,0.14)' : 'rgba(99,102,241,0.11)',
141
+ edge: dk ? 'rgba(255,255,255,0.22)' : 'rgba(0,0,0,0.18)',
142
+ arrow: dk ? 'rgba(255,255,255,0.30)' : 'rgba(0,0,0,0.25)',
143
+ primary,
144
+ };
145
+ }
146
+
147
+ function computeLayout() {
148
+ const W = container.clientWidth || 820;
149
+ const s = Math.min(1, W / 820);
150
+
151
+ const nw = Math.round(200 * s), nh = Math.round(60 * s);
152
+ const nr = Math.round(10 * s);
153
+ const gp = Math.round(10 * s);
154
+ const gr = Math.round(10 * s);
155
+ const glh = Math.round(22 * s);
156
+ const ng = Math.round(7 * s);
157
+ const cg = Math.round(70 * s);
158
+ const rg = Math.round(14 * s);
159
+
160
+ const leftW = nw + gp * 2;
161
+ const centerW = nw + gp * 2;
162
+ const rightW = nw + gp * 2;
163
+ const totalW = leftW + centerW + rightW + cg * 2;
164
+ const offsetX = Math.max(0, (W - totalW) / 2);
165
+
166
+ const leftX = offsetX;
167
+ const centerX = offsetX + leftW + cg;
168
+ const rightX = offsetX + leftW + cg + centerW + cg;
169
+
170
+ let y = Math.round(4 * s);
171
+ const inputNode = nodes.find(n => n.id === 'hf_in');
172
+ inputNode._x = centerX + gp; inputNode._y = y + glh + gp;
173
+ inputNode._w = nw; inputNode._h = nh; inputNode._r = nr;
174
+ const inputGroup = groups.find(g => g.id === 'input');
175
+ inputGroup._x = centerX; inputGroup._y = y;
176
+ inputGroup._w = centerW; inputGroup._h = glh + gp * 2 + nh; inputGroup._r = gr;
177
+
178
+ y += inputGroup._h + rg;
179
+ const pipeTop = y;
180
+ const pipeNodes = ['read', 'transform', 'write'].map(id => nodes.find(n => n.id === id));
181
+ pipeNodes.forEach((n, i) => {
182
+ n._x = centerX + gp;
183
+ n._y = pipeTop + glh + gp + i * (nh + ng);
184
+ n._w = nw; n._h = nh; n._r = nr;
185
+ });
186
+ const pipeH = glh + gp * 2 + 3 * nh + 2 * ng;
187
+ const pipeGroup = groups.find(g => g.id === 'pipeline');
188
+ pipeGroup._x = centerX; pipeGroup._y = pipeTop;
189
+ pipeGroup._w = centerW; pipeGroup._h = pipeH; pipeGroup._r = gr;
190
+
191
+ const execNodes = ['local', 'slurm'].map(id => nodes.find(n => n.id === id));
192
+ const execH = glh + gp * 2 + execNodes.length * nh + (execNodes.length - 1) * ng;
193
+ const inferNodes = ['rollout', 'vllm'].map(id => nodes.find(n => n.id === id));
194
+ const inferH = glh + gp * 2 + inferNodes.length * nh + (inferNodes.length - 1) * ng;
195
+ const writeNode = nodes.find(n => n.id === 'write');
196
+ const inferBottom = writeNode._y + writeNode._h + gp;
197
+ const inferTop = inferBottom - inferH;
198
+ const execTop = inferTop - rg - execH;
199
+ execNodes.forEach((n, i) => {
200
+ n._x = leftX + gp; n._y = execTop + glh + gp + i * (nh + ng);
201
+ n._w = nw; n._h = nh; n._r = nr;
202
+ });
203
+ const execGroup = groups.find(g => g.id === 'execution');
204
+ execGroup._x = leftX; execGroup._y = execTop;
205
+ execGroup._w = leftW; execGroup._h = execH; execGroup._r = gr;
206
+
207
+ inferNodes.forEach((n, i) => {
208
+ n._x = leftX + gp; n._y = inferTop + glh + gp + i * (nh + ng);
209
+ n._w = nw; n._h = nh; n._r = nr;
210
+ });
211
+ const inferGroup = groups.find(g => g.id === 'inference');
212
+ inferGroup._x = leftX; inferGroup._y = inferTop;
213
+ inferGroup._w = leftW; inferGroup._h = inferH; inferGroup._r = gr;
214
+
215
+ const outNodes = ['hf_out', 'card', 'monitor'].map(id => nodes.find(n => n.id === id));
216
+ const outH = glh + gp * 2 + outNodes.length * nh + (outNodes.length - 1) * ng;
217
+ const outBottom = writeNode._y + writeNode._h + gp;
218
+ const outTop = outBottom - outH;
219
+ outNodes.forEach((n, i) => {
220
+ n._x = rightX + gp; n._y = outTop + glh + gp + i * (nh + ng);
221
+ n._w = nw; n._h = nh; n._r = nr;
222
+ });
223
+ const outGroup = groups.find(g => g.id === 'output');
224
+ outGroup._x = rightX; outGroup._y = outTop;
225
+ outGroup._w = rightW; outGroup._h = outH; outGroup._r = gr;
226
+
227
+ const minY = Math.min(
228
+ ...nodes.map(n => n._y),
229
+ ...groups.map(g => g._y)
230
+ );
231
+ if (minY < 0) {
232
+ const shift = -minY + Math.round(4 * s);
233
+ nodes.forEach(n => { n._y += shift; });
234
+ groups.forEach(g => { g._y += shift; });
235
+ }
236
+ const maxY = Math.max(
237
+ ...nodes.map(n => n._y + n._h + gp),
238
+ ...groups.map(g => g._y + g._h)
239
+ );
240
+ svg.attr('height', maxY + Math.round(4 * s));
241
+
242
+ return s;
243
+ }
244
+
245
+ function pt(n, side, offset) {
246
+ const o = offset || 0;
247
+ if (side === 'top') return { x: n._x + n._w / 2 + o, y: n._y };
248
+ if (side === 'bottom') return { x: n._x + n._w / 2 + o, y: n._y + n._h };
249
+ if (side === 'left') return { x: n._x, y: n._y + n._h / 2 + o };
250
+ if (side === 'right') return { x: n._x + n._w, y: n._y + n._h / 2 + o };
251
+ }
252
+
253
+ function hBez(a, b) {
254
+ const mx = (a.x + b.x) / 2;
255
+ return `M${a.x},${a.y} C${mx},${a.y} ${mx},${b.y} ${b.x},${b.y}`;
256
+ }
257
+ function vBez(a, b) {
258
+ const my = (a.y + b.y) / 2;
259
+ return `M${a.x},${a.y} C${a.x},${my} ${b.x},${my} ${b.x},${b.y}`;
260
+ }
261
+
262
+ function edgePath(e) {
263
+ const f = nodes.find(n => n.id === e.from);
264
+ const t = nodes.find(n => n.id === e.to);
265
+ if (!f || !t) return '';
266
+
267
+ if (e.from === 'hf_in' && e.to === 'read') return vBez(pt(f,'bottom'), pt(t,'top'));
268
+ if (e.from === 'read' && e.to === 'transform') return vBez(pt(f,'bottom'), pt(t,'top'));
269
+ if (e.from === 'transform' && e.to === 'write') return vBez(pt(f,'bottom'), pt(t,'top'));
270
+ if (e.from === 'transform' && e.to === 'rollout') return hBez(pt(f,'left'), pt(t,'right'));
271
+ if (e.from === 'rollout' && e.to === 'vllm') return vBez(pt(f,'bottom'), pt(t,'top'));
272
+
273
+ const sp = Math.round(f._h * 0.28);
274
+ if (e.from === 'write' && e.to === 'hf_out') return hBez(pt(f,'right', -sp), pt(t,'left'));
275
+ if (e.from === 'write' && e.to === 'card') return hBez(pt(f,'right'), pt(t,'left'));
276
+ if (e.from === 'write' && e.to === 'monitor') return hBez(pt(f,'right', sp), pt(t,'left'));
277
+
278
+ return hBez(pt(f,'right'), pt(t,'left'));
279
+ }
280
+
281
+ function render() {
282
+ const s = computeLayout();
283
+ const c = colors();
284
+
285
+ const fs = Math.max(11, Math.round(13 * s));
286
+ const fsSub = Math.max(10, Math.round(11 * s));
287
+ const fsGrp = Math.max(10, Math.round(11 * s));
288
+ const fsIcon = Math.max(12, Math.round(14 * s));
289
+
290
+ defs.select('#pl-arrow path').attr('fill', c.arrow);
291
+
292
+ const gSel = gGroups.selectAll('g.grp').data(groups, d => d.id);
293
+ const gE = gSel.enter().append('g').attr('class', 'grp');
294
+ gE.append('rect');
295
+ gE.append('text').attr('class', 'grp-icon');
296
+ gE.append('text').attr('class', 'group-label');
297
+ const gM = gE.merge(gSel);
298
+ gM.select('rect')
299
+ .attr('x', d => d._x).attr('y', d => d._y)
300
+ .attr('width', d => d._w).attr('height', d => d._h)
301
+ .attr('rx', d => d._r).attr('ry', d => d._r)
302
+ .attr('fill', d => d.id === 'pipeline' ? c.pipeBg : c.groupBg)
303
+ .attr('stroke', d => d.id === 'pipeline' ? c.pipeBd : c.groupBd)
304
+ .attr('stroke-width', 1);
305
+ gM.select('.grp-icon')
306
+ .attr('x', d => d._x + Math.round(6 * s))
307
+ .attr('y', d => d._y + Math.round(15 * s))
308
+ .style('font-size', fsIcon + 'px')
309
+ .text(d => d.icon);
310
+ gM.select('.group-label')
311
+ .attr('x', d => d._x + Math.round(6 * s) + fsIcon + Math.round(3 * s))
312
+ .attr('y', d => d._y + Math.round(15 * s))
313
+ .style('font-size', fsGrp + 'px')
314
+ .text(d => d.label);
315
+ gSel.exit().remove();
316
+
317
+ const eSel = gEdges.selectAll('path.edge-path').data(edges, d => d.from + d.to);
318
+ eSel.enter().append('path').attr('class', 'edge-path')
319
+ .attr('marker-end', 'url(#pl-arrow)')
320
+ .merge(eSel)
321
+ .attr('d', edgePath)
322
+ .attr('stroke', c.edge)
323
+ .attr('stroke-width', Math.max(1.5, 1.8 * s));
324
+ eSel.exit().remove();
325
+
326
+ const nSel = gNodes.selectAll('g.node-group').data(nodes, d => d.id);
327
+ const nE = nSel.enter().append('g').attr('class', 'node-group');
328
+ nE.append('rect').attr('class', 'node-card');
329
+ nE.append('text').attr('class', 'node-title');
330
+ nE.append('text').attr('class', 'node-subtitle');
331
+ const nM = nE.merge(nSel);
332
+ nM.attr('transform', d => `translate(${d._x},${d._y})`);
333
+ nM.select('.node-card')
334
+ .attr('width', d => d._w).attr('height', d => d._h)
335
+ .attr('rx', d => d._r).attr('ry', d => d._r)
336
+ .attr('fill', c.nodeBg).attr('stroke', c.nodeBd).attr('stroke-width', 1);
337
+ nM.select('.node-title')
338
+ .attr('x', d => d._w / 2).attr('y', d => d.sub ? d._h * 0.38 : d._h / 2)
339
+ .attr('text-anchor', 'middle').attr('dominant-baseline', 'middle')
340
+ .style('font-size', fs + 'px').text(d => d.label);
341
+ nM.select('.node-subtitle')
342
+ .attr('x', d => d._w / 2).attr('y', d => d._h * 0.68)
343
+ .attr('text-anchor', 'middle').attr('dominant-baseline', 'middle')
344
+ .style('font-size', fsSub + 'px').text(d => d.sub || '');
345
+ nM.on('mouseenter', (ev, d) => { if (d.tip) showTip(ev, `<strong>${d.label}</strong>${d.tip}`); })
346
+ .on('mousemove', (ev, d) => { if (d.tip) showTip(ev, `<strong>${d.label}</strong>${d.tip}`); })
347
+ .on('mouseleave', hideTip);
348
+ nSel.exit().remove();
349
+ }
350
+
351
+ render();
352
+ if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
353
+ else { window.addEventListener('resize', render); }
354
+ new MutationObserver(() => render()).observe(document.documentElement, { attributes: true, attributeFilter: ['data-theme'] });
355
+ };
356
+
357
+ if (document.readyState === 'loading') {
358
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
359
+ } else { ensureD3(bootstrap); }
360
+ })();
361
+ </script>
362
+ </body>
363
+ </html>
app/presentation/se2026/charts/throughput.html ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html data-theme="dark">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Throughput Chart</title>
7
+ <style>
8
+ :root {
9
+ --text-color: rgba(255,255,255,0.88);
10
+ --muted-color: rgba(255,255,255,0.45);
11
+ --surface-bg: rgba(30,30,40,0.95);
12
+ --border-color: rgba(255,255,255,0.1);
13
+ --axis-color: rgba(255,255,255,0.15);
14
+ --tick-color: rgba(255,255,255,0.5);
15
+ --grid-color: rgba(255,255,255,0.06);
16
+ --primary-color: #7c6ff7;
17
+ }
18
+ * { box-sizing: border-box; margin: 0; padding: 0; }
19
+ html, body { width: 100%; height: 100%; background: transparent; overflow: visible; }
20
+ </style>
21
+ <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
22
+ </head>
23
+ <body>
24
+ <div class="d3-optimization-sweep"></div>
25
+ <style>
26
+ .d3-optimization-sweep { position: relative; }
27
+ .d3-optimization-sweep .controls {
28
+ display: flex;
29
+ gap: 16px;
30
+ align-items: center;
31
+ justify-content: flex-start;
32
+ flex-wrap: wrap;
33
+ margin: 10px 0 0 0;
34
+ }
35
+ .d3-optimization-sweep .controls .control-group {
36
+ display: flex;
37
+ flex-direction: column;
38
+ align-items: flex-start;
39
+ gap: 6px;
40
+ }
41
+ .d3-optimization-sweep .controls label {
42
+ font-size: 18px;
43
+ font-weight: 700;
44
+ color: var(--text-color);
45
+ }
46
+ .d3-optimization-sweep .controls select {
47
+ appearance: none;
48
+ -webkit-appearance: none;
49
+ -moz-appearance: none;
50
+ border: 1px solid var(--border-color);
51
+ border-radius: 8px;
52
+ padding: 6px 28px 6px 10px;
53
+ background-color: var(--surface-bg);
54
+ color: var(--text-color);
55
+ font-size: 18px;
56
+ line-height: 1.2;
57
+ background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
58
+ background-repeat: no-repeat;
59
+ background-position: right 8px center;
60
+ }
61
+ .d3-optimization-sweep .controls select:focus-visible {
62
+ outline: 2px solid var(--primary-color);
63
+ outline-offset: 2px;
64
+ }
65
+ .d3-optimization-sweep .legend {
66
+ display: flex;
67
+ align-items: center;
68
+ gap: 14px;
69
+ margin: 0 0 0 auto;
70
+ }
71
+ .d3-optimization-sweep .legend .legend-title {
72
+ font-size: 18px;
73
+ font-weight: 700;
74
+ color: var(--text-color);
75
+ }
76
+ .d3-optimization-sweep .legend .legend-section {
77
+ display: flex;
78
+ flex-wrap: wrap;
79
+ gap: 8px 14px;
80
+ }
81
+ .d3-optimization-sweep .legend .item {
82
+ display: inline-flex;
83
+ align-items: center;
84
+ gap: 6px;
85
+ white-space: nowrap;
86
+ font-size: 18px;
87
+ color: var(--text-color);
88
+ }
89
+ .d3-optimization-sweep .legend .swatch {
90
+ width: 14px;
91
+ height: 14px;
92
+ border-radius: 3px;
93
+ border: 1px solid var(--border-color);
94
+ flex-shrink: 0;
95
+ }
96
+ .d3-optimization-sweep .legend .shape-swatch {
97
+ width: 14px;
98
+ height: 14px;
99
+ flex-shrink: 0;
100
+ }
101
+ .d3-optimization-sweep .d3-tooltip {
102
+ position: absolute;
103
+ top: 0px;
104
+ left: 0px;
105
+ transform: translate(-9999px, -9999px);
106
+ pointer-events: none;
107
+ padding: 8px 10px;
108
+ border-radius: 8px;
109
+ font-size: 18px;
110
+ line-height: 1.35;
111
+ border: 1px solid var(--border-color);
112
+ background: var(--surface-bg);
113
+ color: var(--text-color);
114
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
115
+ opacity: 0;
116
+ transition: opacity .12s ease;
117
+ text-align: left;
118
+ max-width: 320px;
119
+ z-index: 10;
120
+ }
121
+ .d3-optimization-sweep .d3-tooltip .tip-label { color: var(--muted-color); }
122
+ .d3-optimization-sweep .d3-tooltip .tip-val { font-weight: 600; }
123
+ .d3-optimization-sweep .d3-tooltip .tip-regression { color: #e05252; }
124
+ .d3-optimization-sweep .y-label-text {
125
+ font-size: 18px;
126
+ cursor: default;
127
+ }
128
+ .d3-optimization-sweep .speedup-label {
129
+ font-size: 18px;
130
+ font-weight: 600;
131
+ }
132
+ </style>
133
+ <script>
134
+ (() => {
135
+ const bootstrap = () => {
136
+ const container = document.querySelector('.d3-optimization-sweep');
137
+ if (!container) return;
138
+ if (container.dataset && container.dataset.mounted === 'true') return;
139
+ if (container.dataset) container.dataset.mounted = 'true';
140
+ container.style.position = container.style.position || 'relative';
141
+
142
+ // ── Data ──
143
+ const DATA = [
144
+ { model: 'GPT-OSS-120B', family: 'GPT-OSS', baseTp: 1, baseTps: 3138, t0Tps: 6117, t0Speedup: 1.95, t0Params: 'tp=2, mns=1024, mnbt=32768', t1Tps: 5450, t1Speedup: 1.74, t1Params: 'tp=2, mns=1024, mnbt=32768', bestSpeedup: 1.95 },
145
+ { model: 'Qwen3-8B', family: 'Qwen3', baseTp: 1, baseTps: 6338, t0Tps: 6338, t0Speedup: 1.00, t0Params: '(baseline)', t1Tps: 6443, t1Speedup: 1.02, t1Params: 'gmu=95', bestSpeedup: 1.02 },
146
+ { model: 'Gemma-3-4B', family: 'Gemma3', baseTp: 1, baseTps: 8501, t0Tps: 9253, t0Speedup: 1.09, t0Params: 'mns=1024, mnbt=32768', t1Tps: 8361, t1Speedup: 0.98, t1Params: 'mns=1024, mnbt=32768', bestSpeedup: 1.09 },
147
+ { model: 'SmolLM2-1.7B', family: 'SmolLM2', baseTp: 1, baseTps: 5255, t0Tps: 5437, t0Speedup: 1.03, t0Params: 'mns=2048, mnbt=32768', t1Tps: 9220, t1Speedup: 1.75, t1Params: 'mns=2048, mnbt=32768, gmu=95, spec=suffix_32', bestSpeedup: 1.75 },
148
+ ];
149
+
150
+ const FAMILIES = ['Qwen3', 'SmolLM2', 'Gemma3', 'GPT-OSS'];
151
+ const TIERS = ['Baseline', 'Tier 0', 'Tier 1'];
152
+ const SHAPE_SIZE = 42;
153
+ const TIER_Y_OFFSET = { 'Baseline': -0.38, 'Tier 0': 0, 'Tier 1': 0.38 };
154
+ const margin = { top: 30, right: 60, bottom: 50, left: 140 };
155
+
156
+ // ── Colors & shapes ──
157
+ const FAMILY_COLORS = { 'Qwen3': '#e07b54', 'SmolLM2': '#e06b9e', 'Gemma3': '#5b9bd5', 'GPT-OSS': '#8bc474' };
158
+ const familyPalette = FAMILIES.map(f => FAMILY_COLORS[f] || '#999');
159
+ const familyColor = (family) => FAMILY_COLORS[family] || '#999';
160
+
161
+ const shapeGenerators = {
162
+ 'Baseline': d3.symbol().type(d3.symbolCircle),
163
+ 'Tier 0': d3.symbol().type(d3.symbolSquare),
164
+ 'Tier 1': d3.symbol().type(d3.symbolTriangle),
165
+ };
166
+
167
+ // ── Tooltip ──
168
+ let tip = container.querySelector('.d3-tooltip');
169
+ let tipInner;
170
+ if (!tip) {
171
+ tip = document.createElement('div'); tip.className = 'd3-tooltip';
172
+ tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tipInner.style.textAlign = 'left';
173
+ tip.appendChild(tipInner); container.appendChild(tip);
174
+ } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
175
+
176
+ function showTip(html, mx, my) {
177
+ tipInner.innerHTML = html;
178
+ const cw = container.clientWidth;
179
+ let tx = mx + 14, ty = my - 10;
180
+ if (tx + (tip.offsetWidth || 200) > cw - 8) tx = mx - (tip.offsetWidth || 200) - 14;
181
+ if (ty + (tip.offsetHeight || 100) > container.clientHeight) ty = container.clientHeight - (tip.offsetHeight || 100) - 4;
182
+ if (ty < 0) ty = 4;
183
+ tip.style.transform = `translate(${tx}px, ${ty}px)`;
184
+ tip.style.opacity = '1';
185
+ }
186
+ function hideTip() {
187
+ tip.style.opacity = '0';
188
+ tip.style.transform = 'translate(-9999px, -9999px)';
189
+ }
190
+
191
+ // ── Shared tooltip event handlers ──
192
+ function attachTipEvents(sel, opacityFn) {
193
+ sel
194
+ .attr('cursor', 'pointer')
195
+ .on('mouseenter', function (event, d) {
196
+ d3.select(this).attr('opacity', 1);
197
+ const [mx, my] = d3.pointer(event, container);
198
+ showTip(buildTooltip(d), mx, my);
199
+ })
200
+ .on('mousemove', function (event) {
201
+ const [mx, my] = d3.pointer(event, container);
202
+ tip.style.transform = `translate(${mx + 14}px, ${my - 10}px)`;
203
+ })
204
+ .on('mouseleave', function (event, d) {
205
+ d3.select(this).attr('opacity', opacityFn(d));
206
+ hideTip();
207
+ });
208
+ }
209
+
210
+ // ── Shared axis styling ──
211
+ function styleAxis(g) {
212
+ g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
213
+ g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
214
+ }
215
+
216
+ // ── SVG ──
217
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
218
+ const gRoot = svg.append('g');
219
+
220
+ // ── State ──
221
+ const state = { metric: 'speedup', sort: 'speedup' };
222
+
223
+ function sortedData() {
224
+ const d = [...DATA];
225
+ if (state.sort === 'speedup') d.sort((a, b) => b.bestSpeedup - a.bestSpeedup);
226
+ else if (state.sort === 'baseline') d.sort((a, b) => b.baseTps - a.baseTps);
227
+ else if (state.sort === 'family') {
228
+ d.sort((a, b) => {
229
+ const fi = FAMILIES.indexOf(a.family) - FAMILIES.indexOf(b.family);
230
+ return fi !== 0 ? fi : b.bestSpeedup - a.bestSpeedup;
231
+ });
232
+ }
233
+ return d;
234
+ }
235
+
236
+ // ── X axis tick format helper ──
237
+ function xTickFormat() {
238
+ return state.metric === 'throughput'
239
+ ? (d => d >= 1000 ? (d / 1000) + 'k' : d)
240
+ : (d => d.toFixed(1) + 'x');
241
+ }
242
+
243
+ // ── Render ──
244
+ function render() {
245
+ const iw = (container.clientWidth || 800) - margin.left - margin.right;
246
+ const ih = Math.max(150, DATA.length * 60 + margin.top + margin.bottom) - margin.top - margin.bottom;
247
+ svg.attr('width', container.clientWidth || 800).attr('height', ih + margin.top + margin.bottom);
248
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
249
+
250
+ const data = sortedData();
251
+ const yScale = d3.scaleBand().domain(data.map(d => d.model)).range([0, ih]).padding(0.35);
252
+ const bandH = yScale.bandwidth();
253
+
254
+ // X scale
255
+ let xScale;
256
+ if (state.metric === 'throughput') {
257
+ const maxTps = d3.max(data, d => Math.max(d.baseTps, d.t0Tps, d.t1Tps));
258
+ xScale = d3.scaleLinear().domain([0, maxTps * 1.08]).range([0, iw]).nice();
259
+ } else {
260
+ const maxSpd = d3.max(data, d => Math.max(d.t0Speedup, d.t1Speedup));
261
+ const minSpd = d3.min(data, d => Math.min(d.t0Speedup, d.t1Speedup));
262
+ xScale = d3.scaleLinear().domain([Math.min(0.85, minSpd - 0.05), Math.max(2.05, maxSpd + 0.1)]).range([0, iw]).nice();
263
+ }
264
+
265
+ // Grid
266
+ gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid')
267
+ .call(g => {
268
+ g.selectAll('line').data(xScale.ticks(8), d => d).join('line')
269
+ .attr('x1', d => xScale(d)).attr('x2', d => xScale(d))
270
+ .attr('y1', 0).attr('y2', ih)
271
+ .attr('stroke', 'var(--grid-color)').attr('stroke-width', 1);
272
+ });
273
+
274
+ // X axes (bottom + top)
275
+ const fmt = xTickFormat();
276
+ gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axis-x')
277
+ .attr('transform', `translate(0,${ih})`)
278
+ .call(d3.axisBottom(xScale).ticks(8).tickFormat(fmt)).call(styleAxis);
279
+ gRoot.selectAll('.axis-x-top').data([0]).join('g').attr('class', 'axis-x-top')
280
+ .call(d3.axisTop(xScale).ticks(8).tickFormat(fmt)).call(styleAxis);
281
+
282
+ // X axis label
283
+ const xLabel = state.metric === 'throughput' ? 'Tokens per second per GPU' : 'Speedup vs baseline';
284
+ gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
285
+ .attr('x', iw / 2).attr('y', ih + margin.bottom - 4)
286
+ .attr('text-anchor', 'middle').attr('fill', 'var(--muted-color)')
287
+ .attr('font-size', 18).text(xLabel);
288
+
289
+ // Y axis (model names)
290
+ gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axis-y')
291
+ .call(g => {
292
+ g.selectAll('text.y-label-text').data(data, d => d.model).join(
293
+ enter => enter.append('text').attr('class', 'y-label-text')
294
+ .attr('x', -8).attr('dy', '0.35em').attr('text-anchor', 'end').style('font-size', '18px').style('font-weight', '600'),
295
+ update => update,
296
+ exit => exit.remove()
297
+ )
298
+ .attr('y', d => yScale(d.model) + bandH / 2)
299
+ .attr('fill', d => familyColor(d.family))
300
+ .text(d => d.model);
301
+ });
302
+
303
+ // Reference line at 1.0x in speedup mode
304
+ gRoot.selectAll('.ref-line').data(state.metric === 'speedup' ? [1.0] : []).join('line')
305
+ .attr('class', 'ref-line')
306
+ .attr('x1', d => xScale(d)).attr('x2', d => xScale(d))
307
+ .attr('y1', 0).attr('y2', ih)
308
+ .attr('stroke', 'var(--text-color)').attr('stroke-width', 1.5)
309
+ .attr('stroke-dasharray', '4,3').attr('opacity', 0.5);
310
+
311
+ // View-specific elements
312
+ if (state.metric === 'throughput') {
313
+ gRoot.selectAll('.speedup-bar').remove();
314
+ renderThroughput(data, xScale, yScale, bandH);
315
+ } else {
316
+ gRoot.selectAll('.conn-line').remove();
317
+ gRoot.selectAll('.dot').remove();
318
+ renderSpeedup(data, xScale, yScale, bandH);
319
+ }
320
+
321
+ // Speedup annotation (shared between both views)
322
+ gRoot.selectAll('.speedup-label').data(data, d => d.model).join('text')
323
+ .attr('class', 'speedup-label')
324
+ .attr('x', iw + 6)
325
+ .attr('y', d => yScale(d.model) + bandH / 2)
326
+ .attr('dy', '0.35em')
327
+ .attr('fill', 'var(--muted-color)')
328
+ .text(d => d.bestSpeedup.toFixed(2) + 'x');
329
+ }
330
+
331
+ function renderThroughput(data, xScale, yScale, bandH) {
332
+ // Connecting lines between the three staggered dots
333
+ const lineGen = d3.line().x(p => p.x).y(p => p.y);
334
+ const connData = data.map(d => {
335
+ const cy = yScale(d.model) + bandH / 2;
336
+ return {
337
+ model: d.model, family: d.family,
338
+ points: TIERS.map((tier, i) => ({
339
+ x: xScale([d.baseTps, d.t0Tps, d.t1Tps][i]),
340
+ y: cy + TIER_Y_OFFSET[tier] * bandH,
341
+ })),
342
+ };
343
+ });
344
+ gRoot.selectAll('.conn-line').data(connData, d => d.model).join('path')
345
+ .attr('class', 'conn-line')
346
+ .attr('d', d => lineGen(d.points))
347
+ .attr('fill', 'none')
348
+ .attr('stroke', d => familyColor(d.family))
349
+ .attr('stroke-width', 1.5).attr('opacity', 0.35);
350
+
351
+ // Dots: 3 per model with vertical stagger
352
+ const dots = [];
353
+ data.forEach(d => {
354
+ const cy = yScale(d.model) + bandH / 2;
355
+ const vals = [d.baseTps, d.t0Tps, d.t1Tps];
356
+ TIERS.forEach((tier, i) => {
357
+ dots.push({ ...d, tier, val: vals[i], cx: xScale(vals[i]), cy: cy + TIER_Y_OFFSET[tier] * bandH });
358
+ });
359
+ });
360
+
361
+ const dotSel = gRoot.selectAll('.dot').data(dots, d => d.model + '-' + d.tier).join('path')
362
+ .attr('class', 'dot')
363
+ .attr('d', d => shapeGenerators[d.tier].size(SHAPE_SIZE)())
364
+ .attr('transform', d => `translate(${d.cx},${d.cy})`)
365
+ .attr('fill', d => familyColor(d.family))
366
+ .attr('stroke', 'none')
367
+ .attr('opacity', 0.9);
368
+ attachTipEvents(dotSel, () => 0.9);
369
+ }
370
+
371
+ function renderSpeedup(data, xScale, yScale, bandH) {
372
+ const barH = bandH * 0.38;
373
+ const barData = [];
374
+ data.forEach(d => {
375
+ const baseY = yScale(d.model);
376
+ barData.push({ ...d, tier: 'Tier 0', val: d.t0Speedup, y: baseY + bandH * 0.12, h: barH });
377
+ barData.push({ ...d, tier: 'Tier 1', val: d.t1Speedup, y: baseY + bandH * 0.5, h: barH });
378
+ });
379
+
380
+ const oneX = xScale(1.0);
381
+ const barSel = gRoot.selectAll('.speedup-bar').data(barData, d => d.model + '-' + d.tier).join('rect')
382
+ .attr('class', 'speedup-bar')
383
+ .attr('x', d => d.val >= 1.0 ? oneX : xScale(d.val))
384
+ .attr('y', d => d.y)
385
+ .attr('width', d => Math.abs(xScale(d.val) - oneX))
386
+ .attr('height', d => d.h)
387
+ .attr('rx', 2)
388
+ .attr('fill', d => familyColor(d.family))
389
+ .attr('opacity', d => d.tier === 'Tier 0' ? 0.9 : 0.55)
390
+ .attr('stroke', d => d.val < 1.0 ? '#e05252' : 'none')
391
+ .attr('stroke-width', d => d.val < 1.0 ? 1 : 0);
392
+ attachTipEvents(barSel, d => d.tier === 'Tier 0' ? 0.9 : 0.55);
393
+ }
394
+
395
+ function buildTooltip(d) {
396
+ const fmt = (v) => v.toLocaleString();
397
+ const spd = (v) => v.toFixed(2) + 'x';
398
+ const cls = (v) => v < 1.0 ? 'tip-regression' : 'tip-val';
399
+ return `<div style="margin-bottom:4px"><strong>${d.model}</strong> <span class="tip-label">(${d.family})</span></div>`
400
+ + `<div><span class="tip-label">Baseline:</span> <span class="tip-val">${fmt(d.baseTps)}</span> tps/gpu <span class="tip-label">(tp=${d.baseTp})</span></div>`
401
+ + `<div><span class="tip-label">Tier 0:</span> <span class="${cls(d.t0Speedup)}">${fmt(d.t0Tps)}</span> tps/gpu <span class="${cls(d.t0Speedup)}">${spd(d.t0Speedup)}</span></div>`
402
+ + `<div style="font-size:10px;color:var(--muted-color);margin-left:8px">${d.t0Params}</div>`
403
+ + `<div><span class="tip-label">Tier 1:</span> <span class="${cls(d.t1Speedup)}">${fmt(d.t1Tps)}</span> tps/gpu <span class="${cls(d.t1Speedup)}">${spd(d.t1Speedup)}</span></div>`
404
+ + `<div style="font-size:10px;color:var(--muted-color);margin-left:8px">${d.t1Params}</div>`;
405
+ }
406
+
407
+ // ── Controls ──
408
+ function makeSelect(id, label, options, initial, onChange) {
409
+ const group = document.createElement('div'); group.className = 'control-group';
410
+ const lbl = document.createElement('label'); lbl.textContent = label; lbl.setAttribute('for', id);
411
+ const sel = document.createElement('select'); sel.id = id;
412
+ options.forEach(([v, t]) => {
413
+ const o = document.createElement('option'); o.value = v; o.textContent = t; sel.appendChild(o);
414
+ });
415
+ sel.value = initial;
416
+ sel.addEventListener('change', () => onChange(sel.value));
417
+ group.appendChild(lbl); group.appendChild(sel);
418
+ return group;
419
+ }
420
+
421
+ const controls = document.createElement('div'); controls.className = 'controls';
422
+ controls.appendChild(makeSelect('metric-sel-optsweep', 'Metric',
423
+ [['throughput', 'Throughput'], ['speedup', 'Speedup']], state.metric,
424
+ v => { state.metric = v; render(); }));
425
+ controls.appendChild(makeSelect('sort-sel-optsweep', 'Sort',
426
+ [['speedup', 'By Best Speedup'], ['baseline', 'By Baseline Throughput'], ['family', 'By Model Family']], state.sort,
427
+ v => { state.sort = v; render(); }));
428
+
429
+ // Legend inline with controls
430
+ const legend = document.createElement('div'); legend.className = 'legend';
431
+ legend.style.display = 'flex'; legend.style.alignItems = 'center'; legend.style.gap = '14px'; legend.style.marginLeft = 'auto'; legend.style.margin = '0 0 0 auto';
432
+ const svgNS = 'http://www.w3.org/2000/svg';
433
+ TIERS.forEach(tier => {
434
+ const item = document.createElement('span'); item.className = 'item';
435
+ const shapeSvg = document.createElementNS(svgNS, 'svg');
436
+ shapeSvg.setAttribute('width', '14'); shapeSvg.setAttribute('height', '14');
437
+ shapeSvg.setAttribute('viewBox', '-8 -8 16 16'); shapeSvg.style.display = 'block';
438
+ const path = document.createElementNS(svgNS, 'path');
439
+ path.setAttribute('d', shapeGenerators[tier].size(SHAPE_SIZE)());
440
+ path.setAttribute('fill', 'var(--text-color)');
441
+ shapeSvg.appendChild(path);
442
+ const swWrap = document.createElement('span'); swWrap.className = 'shape-swatch'; swWrap.appendChild(shapeSvg);
443
+ const txt = document.createElement('span'); txt.textContent = tier;
444
+ item.appendChild(swWrap); item.appendChild(txt); legend.appendChild(item);
445
+ });
446
+ controls.appendChild(legend);
447
+ container.appendChild(controls);
448
+
449
+ // ── Initial render + resize ──
450
+ render();
451
+ if (window.ResizeObserver) {
452
+ new ResizeObserver(() => render()).observe(container);
453
+ } else {
454
+ window.addEventListener('resize', render);
455
+ }
456
+ };
457
+
458
+ if (document.readyState === 'loading') {
459
+ document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
460
+ } else {
461
+ bootstrap();
462
+ }
463
+ })();
464
+ </script>
465
+ </body>
466
+ </html>
app/presentation/se2026/data/benchmark-results.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0359f44cbbe97ee8f7ea598152a5053a322a81af818de890606e0daa6c15fd3a
3
+ size 1378100
app/presentation/se2026/data/rephrasing_metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac779aca41bc6f868d99a7c7fcc43343591b40ace727098341d52285c1ff856
3
+ size 152802
app/presentation/se2026/index.html ADDED
@@ -0,0 +1,620 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>The Synthetic Data Playbook</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap" rel="stylesheet">
9
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/dist/reveal.css">
10
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/dist/theme/night.css">
11
+ <link rel="stylesheet" href="style.css">
12
+ </head>
13
+ <body>
14
+ <div class="reveal">
15
+ <div class="slides">
16
+
17
+ <!-- ============================================================ -->
18
+ <!-- SECTION 1: MOTIVATION AND RECAP (~40%, slides 1-8, ~9 min) -->
19
+ <!-- ============================================================ -->
20
+
21
+ <!-- SLIDE 1: Title -->
22
+ <section class="center-slide">
23
+ <div style="margin-top:-160px;">
24
+ <h2>The Synthetic Data Playbook</h2>
25
+ <br>
26
+ <h3>How to Cook Better Training Data for LLMs</h3>
27
+ <br>
28
+ <p style="margin-top:20px;font-size:0.8em;color:rgba(255,255,255,1);">
29
+ SE 26
30
+ </p>
31
+ </div>
32
+ <img src="assets/bern-skyline.png" style="position:absolute;bottom:0;left:50%;transform:translateX(-50%);width:100%;height:auto;opacity:0.6;pointer-events:none;">
33
+ <aside class="notes">
34
+ ~30s. Welcome, introduce yourself. "Today I'll show you how we made LLMs better
35
+ by rewriting their training data instead of just filtering it."
36
+ </aside>
37
+ </section>
38
+
39
+ <!-- SLIDE 2: Digital Sovereignty -->
40
+ <section>
41
+ <p class="section-label">Why This Matters</p>
42
+ <h2>The Data Black Box</h2>
43
+ <div style="font-size:0.65em;margin-top:20px;">
44
+ <p>Frontier labs (OpenAI, Google, Anthropic) don't disclose how they build their training data.</p>
45
+ <p class="fragment">Neither do the Chinese labs (DeepSeek or Qwen).</p>
46
+ <p class="fragment" style="margin-top:20px;">
47
+ Training data is the <span class="highlight">most important ingredient</span> in building an LLM,
48
+ yet the recipes are kept secret.
49
+ </p>
50
+ <div class="fragment" style="margin-top:30px;background:rgba(255,255,255,0.04);border:1px solid rgba(255,255,255,0.1);border-radius:16px;padding:24px;">
51
+ <p style="font-weight:700;color:#f0c674;margin-bottom:8px;font-size:1.1em;">Digital Sovereignty</p>
52
+ <p>If you can't build the data, you can't build the model.<br>
53
+ If you can't build the model, you depend on those who can.</p>
54
+ <p style="margin-top:12px;">This work puts the knowledge <span class="accent">out in the open</span> for everyone:
55
+ governments, universities, startups, and individuals.</p>
56
+ </div>
57
+ </div>
58
+ <aside class="notes">
59
+ ~1 min. "Before we dive in, let me explain why this matters beyond the technical.
60
+ None of the frontier labs, not OpenAI, not Google, not Anthropic, and not the Chinese labs either,
61
+ tell you how they build their training data. It's the most important ingredient and it's a black box.
62
+ This is a digital sovereignty issue. If you can't build the data yourself,
63
+ you can't build the model, and you're dependent on whoever can.
64
+ Our work makes this knowledge open and accessible to everyone."
65
+ </aside>
66
+ </section>
67
+
68
+ <!-- SLIDE 3: LLMs and Pretraining Recap -->
69
+ <section>
70
+ <p class="section-label">Quick Recap</p>
71
+ <h2>LLMs: What's Under the Hood</h2>
72
+ <div class="two-col" style="font-size:0.65em;margin-top:20px;">
73
+ <div class="col">
74
+ <p>You use these every day: ChatGPT, Copilot, Claude.</p>
75
+ <p class="fragment">Under the hood: a giant function that takes <span class="accent">tokens in</span> and predicts <span class="accent">tokens out</span>.</p>
76
+ <p class="fragment">Trained by reading <span class="highlight">billions of web pages</span>, learning to predict the next word.</p>
77
+ <p class="fragment" style="margin-top:20px;font-weight:700;color:#f0c674;">
78
+ Data quality defines model quality.
79
+ </p>
80
+ </div>
81
+ <div class="col fragment" style="text-align:center;">
82
+ <div style="background:rgba(255,255,255,0.04);border:1px solid rgba(255,255,255,0.1);border-radius:16px;padding:30px 20px;">
83
+ <div style="font-size:0.9em;color:rgba(255,255,255,0.5);">Input text</div>
84
+ <div style="font-size:2em;margin:10px 0;">↓</div>
85
+ <div style="background:rgba(124,111,247,0.15);border:1px solid rgba(124,111,247,0.3);border-radius:12px;padding:16px;font-weight:700;font-size:1.1em;">
86
+ LLM<br><span style="font-size:0.6em;font-weight:400;color:rgba(255,255,255,0.4);">billions of parameters</span>
87
+ </div>
88
+ <div style="font-size:2em;margin:10px 0;">↓</div>
89
+ <div style="font-size:0.9em;color:rgba(255,255,255,0.5);">Output text</div>
90
+ </div>
91
+ </div>
92
+ </div>
93
+ <aside class="notes">
94
+ ~1.5 min. "Quick recap so we have shared vocabulary." Click through fragments.
95
+ Emphasize: model quality = data quality. Like training a code model on all of GitHub.
96
+ </aside>
97
+ </section>
98
+
99
+ <!-- SLIDE 4: The Data Quality Problem -->
100
+ <section>
101
+ <p class="section-label">The Problem</p>
102
+ <h2>You Start With the Entire Internet...</h2>
103
+ <h3 class="fragment">...and throw away 98.6% of it</h3>
104
+ <div class="fragment">
105
+ <img src="assets/dclm-filtering-pipeline.png" class="img-contain" style="margin-top:10px;max-height:400px;">
106
+ <p style="font-size:0.45em;color:rgba(255,255,255,0.3);margin-top:8px;">
107
+ DCLM: 240T tokens from Common Crawl → 1.4% survives as DCLM-Baseline
108
+ </p>
109
+ </div>
110
+ <aside class="notes">
111
+ ~1 min. "This is the DCLM dataset pipeline. You scrape the whole internet, 240 trillion tokens.
112
+ Then heuristic filters, deduplication, model-based filtering. Only 1.4% of documents survive.
113
+ All this engineering just to clean the data. What if there was a better way?"
114
+ </aside>
115
+ </section>
116
+
117
+ <!-- SLIDE 5: Synthetic Data -->
118
+ <section>
119
+ <p class="section-label">The Idea</p>
120
+ <h2>Rewrite Instead of Filter</h2>
121
+ <div class="before-after">
122
+ <div class="panel bad">
123
+ <div class="panel-title">Raw Web Text</div>
124
+ <p style="margin:0;line-height:1.6;">
125
+ <span style="color:rgba(255,255,255,0.3);">★★★ BeSt DeAls!!!</span><br>
126
+ Photosynthesis is the process by wich plants convert sunlit into energy.
127
+ It occurs in the chloroplasts<br>
128
+ <span style="color:rgba(255,255,255,0.3);">Click here for more → → →</span><br>
129
+ <span style="color:rgba(255,255,255,0.3);">© 2019 AllScienceInfo.biz</span><br>
130
+ Carbon dioxide and water are transformed into glucose and oxygen...
131
+ <span style="color:rgba(255,255,255,0.3);">[AD] [AD] [POPUP]</span>
132
+ </p>
133
+ </div>
134
+ <div class="arrow fragment" data-fragment-index="0">→</div>
135
+ <div class="panel good fragment" data-fragment-index="0">
136
+ <div class="panel-title">LLM-Rewritten FAQ</div>
137
+ <p style="margin:0;line-height:1.6;">
138
+ <strong>Q: What is photosynthesis?</strong><br>
139
+ A: Photosynthesis is the process by which plants convert sunlight into chemical energy.
140
+ It occurs in organelles called chloroplasts.<br><br>
141
+ <strong>Q: What are the inputs and outputs?</strong><br>
142
+ A: Plants take in carbon dioxide (CO₂) and water (H₂O), and using light energy,
143
+ produce glucose (C₆H₁₂O₆) and oxygen (O₂).
144
+ </p>
145
+ </div>
146
+ </div>
147
+ <p class="fragment" style="font-size:0.55em;margin-top:16px;">
148
+ Same knowledge, better packaging.<br>
149
+ You keep <span class="highlight">100%</span> of your data instead of discarding 90%.
150
+ </p>
151
+ <aside class="notes">
152
+ ~1.5 min. Walk through the before/after. Left: messy web text with spam, typos, ads, broken formatting.
153
+ Right: same knowledge, but restructured as a clean FAQ. The LLM acts as a rewriter.
154
+ Key insight: you preserve the knowledge, you just improve the presentation. No data wasted.
155
+ </aside>
156
+ </section>
157
+
158
+ <!-- SLIDE 6: Research Question -->
159
+ <section>
160
+ <p class="section-label">Our Research</p>
161
+ <h2>What's the Best Recipe?</h2>
162
+ <p style="font-size:0.6em;color:rgba(255,255,255,0.6);margin-bottom:12px;">
163
+ Three knobs to tune: <span class="accent">source data</span>, <span class="accent">prompt strategy</span>, and
164
+ <span class="accent">generator model</span>.
165
+ </p>
166
+ <div style="display:flex;align-items:center;gap:24px;">
167
+ <iframe src="charts/experiment-flow.html" style="flex:0 1 75%;height:540px;border:none;border-radius:8px;background:transparent;" loading="lazy"></iframe>
168
+ <div class="fragment" style="display:flex;flex-direction:column;gap:24px;min-width:120px;text-align:center;align-self:flex-start;margin-top:40px;padding-left:40px;">
169
+ <div class="stat-box"><div class="num" style="font-size:1.6em;">70+</div><div class="label">experiments</div></div>
170
+ <div class="stat-box"><div class="num" style="font-size:1.6em;">1T+</div><div class="label">tokens generated</div></div>
171
+ <div class="stat-box"><div class="num" style="font-size:1.6em;">60k+</div><div class="label">GPU hours</div></div>
172
+ </div>
173
+ </div>
174
+ <aside class="notes">
175
+ ~1 min. "We ran a massive ablation study. Three axes: what prompt do you give the rewriter,
176
+ which model does the rewriting, and what source data do you start from.
177
+ This Sankey shows our 70+ experiments flowing from source → prompt → model.
178
+ Over 1 trillion tokens generated, 100k GPU hours."
179
+ </aside>
180
+ </section>
181
+
182
+ <!-- SLIDE 7: How We Evaluate -->
183
+ <section>
184
+ <p class="section-label">Methodology</p>
185
+ <h2>Our Integration Test Suite</h2>
186
+ <div style="font-size:0.7em;margin-top:30px;">
187
+ <p style="color:rgba(255,255,255,0.5);">For each experiment, we:</p>
188
+ <ul>
189
+ <li class="fragment">Train a <span class="accent">1.2B parameter</span> model from scratch</li>
190
+ <li class="fragment">Feed it <span class="accent">20B tokens</span> of synthetic and original data</li>
191
+ <li class="fragment">Test on <span class="accent">12 benchmarks</span> (reading, math, reasoning, knowledge...)</li>
192
+ <li class="fragment">Compare against curated web datasets as baselines</li>
193
+ </ul>
194
+ <p class="fragment" style="margin-top:16px;font-size:0.9em;color:rgba(255,255,255,0.6);">
195
+ This is expensive so we tried proxies:
196
+ </p>
197
+ <ul class="fragment" style="font-size:0.85em;margin-top:4px;">
198
+ <li>DCLM/Edu scores (used for filtering pretraining data)</li>
199
+ <li>Smaller training runs</li>
200
+ </ul>
201
+ <p class="fragment" style="margin-top:4px;font-size:0.9em;">
202
+ None correlated well enough.
203
+ </p>
204
+ <p class="fragment" style="margin-top:10px;color:#f0c674;font-weight:600;">
205
+ No shortcuts: you must train and evaluate to know if your data is good.
206
+ </p>
207
+ </div>
208
+ <aside class="notes">
209
+ ~1 min. "Think of it like an integration test suite for data quality.
210
+ We train a model on each dataset variant and see how it scores.
211
+ 12 benchmarks covering reading comprehension, math, general knowledge, reasoning.
212
+ 65 separate training runs. No proxy metric can replace this."
213
+ </aside>
214
+ </section>
215
+
216
+ <!-- SLIDE 8: Spoiler -->
217
+ <section>
218
+ <p class="section-label">Spoiler</p>
219
+ <h2>FinePhrase Wins</h2>
220
+ <p style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-bottom:10px;">
221
+ Our best synthetic recipe outperforms all tested baselines, including curated web data.
222
+ </p>
223
+ <iframe src="charts/benchmark.html" class="chart-frame" style="height:360px;" loading="lazy"></iframe>
224
+ <p class="fragment" style="font-size:0.6em;margin-top:10px;color:rgba(255,255,255,0.6);">
225
+ Let's unpack <span class="accent">how</span>.
226
+ </p>
227
+ <aside class="notes">
228
+ ~1 min. "Here's the punchline up front. FinePhrase, our best configuration,
229
+ beats all baselines including DCLM, Nemotron, REWIRE, and Cosmopedia.
230
+ Let's unpack the three key findings that got us here."
231
+ Transition to Section 2.
232
+ </aside>
233
+ </section>
234
+
235
+ <!-- ============================================================ -->
236
+ <!-- SECTION 2: EXPERIMENTAL RESULTS (~20%, slides 9-12, ~4 min) -->
237
+ <!-- ============================================================ -->
238
+
239
+ <!-- SLIDE 9: Prompts Matter Most -->
240
+ <section>
241
+ <p class="section-label">Finding #1</p>
242
+ <h2>Prompt Design Is the #1 Lever</h2>
243
+ <div class="two-col" style="font-size:0.6em;grid-template-columns:1.5fr 1fr;gap:20px;">
244
+ <div class="col" style="text-align:center;">
245
+ <iframe src="charts/benchmark-prompts.html" class="chart-frame" loading="lazy"
246
+ style="height:480px;" id="prompts-chart"></iframe>
247
+ </div>
248
+ <div class="col">
249
+ <p>Structured prompts beat everything:</p>
250
+ <ul>
251
+ <li class="fragment"><span class="highlight">Math</span> reformatting</li>
252
+ <li class="fragment"><span class="highlight">Table</span> extraction</li>
253
+ <li class="fragment"><span class="highlight">FAQ</span> generation</li>
254
+ <li class="fragment"><span class="highlight">Tutorial</span> rewriting</li>
255
+ </ul>
256
+ <p class="fragment" style="margin-top:20px;">
257
+ These beat curated web data <em>and</em> all prior synthetic baselines.
258
+ </p>
259
+ <p class="fragment" style="color:#f0c674;font-weight:600;margin-top:10px;">
260
+ The prompt matters more than the model or the source data.
261
+ </p>
262
+ </div>
263
+ </div>
264
+ <aside class="notes">
265
+ ~1 min. "Finding number one, and the most important: prompt design is the biggest lever.
266
+ Structured formats like Math, Table, FAQ, Tutorial consistently outperform
267
+ both curated web data and prior synthetic approaches.
268
+ The prompt matters more than which model you use or what source data you start from."
269
+ </aside>
270
+ </section>
271
+
272
+ <!-- SLIDE 10: Smol Models Are Enough -->
273
+ <section>
274
+ <p class="section-label">Finding #2</p>
275
+ <h2>Smol Models Are Enough</h2>
276
+ <div class="two-col" style="font-size:0.6em;grid-template-columns:1.6fr 1fr;gap:16px;">
277
+ <div class="col" style="text-align:center;">
278
+ <iframe src="charts/benchmark-family.html" class="chart-frame" loading="lazy"
279
+ style="height:440px;"></iframe>
280
+ </div>
281
+ <div class="col">
282
+ <p>1B matches 4B, 12B, and 27B model performance.</p>
283
+ <p class="fragment"><span class="accent">SmolLM2-1.7B</span> beats Qwen, Gemma, Llama, Falcon, and Granite.</p>
284
+ <p class="fragment" style="margin-top:20px;">And it's <em>much</em> faster:</p>
285
+ <ul>
286
+ <li class="fragment"><span class="highlight">3.0x</span> faster than Gemma-3-12B<br><span style="color:rgba(255,255,255,0.4);">(9,220 vs 3,046 tps/gpu)</span></li>
287
+ <li class="fragment"><span class="highlight">5.3x</span> faster than Gemma-3-27B<br><span style="color:rgba(255,255,255,0.4);">(9,220 vs 1,724 tps/gpu)</span></li>
288
+ </ul>
289
+ <p class="fragment" style="color:#f0c674;font-weight:600;margin-top:20px;">
290
+ Better quality <em>and</em> faster inference.
291
+ </p>
292
+ </div>
293
+ </div>
294
+ <aside class="notes">
295
+ ~1 min. "Finding two: you don't need a big model.
296
+ 1B parameters match 4B, 12B, even 27B for rephrasing quality.
297
+ SmolLM2 at 1.7B beats all other model families.
298
+ And it's 3x faster than Gemma-12B, 5.3x faster than Gemma-27B.
299
+ Better quality AND faster inference. You don't need a big model."
300
+ </aside>
301
+ </section>
302
+
303
+ <!-- SLIDE 11: Diversity Paradox -->
304
+ <section>
305
+ <p class="section-label">Finding #3</p>
306
+ <h2>Diversity Beats Consistency</h2>
307
+ <div style="font-size:0.65em;">
308
+ <div class="two-col">
309
+ <div class="col">
310
+ <p><span class="highlight">Messy beats polished.</span></p>
311
+ <p class="fragment" data-fragment-index="1">SmolLM2's varied, inconsistent outputs outperform
312
+ Qwen3's template-locked, clean outputs.</p>
313
+ <p class="fragment" data-fragment-index="3" style="margin-top:20px;">
314
+ <span class="accent">Synthetic-only fails.</span><br>
315
+ You must mix synthetic data with original web data.
316
+ </p>
317
+ <p class="fragment" data-fragment-index="4" style="margin-top:20px;">
318
+ The mix-in dataset matters as much as the synthetic data itself.
319
+ </p>
320
+ </div>
321
+ <div class="col fragment" data-fragment-index="2">
322
+ <div style="background:rgba(255,255,255,0.04);border:1px solid rgba(255,255,255,0.08);border-radius:16px;padding:24px;">
323
+ <div style="font-size:1em;font-weight:700;margin-bottom:12px;">Template Collapse</div>
324
+ <div style="font-size:0.9em;line-height:1.6;">
325
+ <p style="color:rgba(255,255,255,0.5);">Qwen3 Math outputs:</p>
326
+ <p><span class="danger">115 / 1000</span> samples start with the exact same sentence</p>
327
+ <p style="margin-top:12px;color:rgba(255,255,255,0.5);">SmolLM2 Math outputs:</p>
328
+ <p><span class="accent">Highly varied</span> formatting and structure</p>
329
+ <p style="margin-top:16px;color:#f0c674;font-weight:600;">
330
+ Diversity beats consistency for pretraining.
331
+ </p>
332
+ </div>
333
+ </div>
334
+ </div>
335
+ </div>
336
+ </div>
337
+ <aside class="notes">
338
+ ~1 min. "Finding three: diversity beats polish. This was counterintuitive.
339
+ Qwen3's math outputs are very clean and consistent, but 115 out of 1000 start identically.
340
+ SmolLM2's outputs are messier but more varied. The varied outputs win.
341
+ Also: synthetic-only training fails. You need to mix in original data.
342
+ The mix-in dataset influence is sometimes larger than the synthetic data itself."
343
+ </aside>
344
+ </section>
345
+
346
+ <!-- SLIDE 12: Results Summary -->
347
+ <section>
348
+ <p class="section-label">Summary</p>
349
+ <h2>What We Found</h2>
350
+ <ul class="takeaway-list" style="margin-top:30px;">
351
+ <li class="fragment">
352
+ <span class="accent">Prompt design</span> is the #1 lever.
353
+ Structured formats (Math, Table, FAQ, Tutorial) outperform everything.
354
+ </li>
355
+ <li class="fragment">
356
+ <span class="accent">1B models suffice.</span>
357
+ SmolLM2-1.7B is the best rephraser across the board.
358
+ </li>
359
+ <li class="fragment">
360
+ <span class="accent">Mix original data in.</span>
361
+ Synthetic-only fails. The mix-in dataset matters.
362
+ </li>
363
+ <li class="fragment">
364
+ <span class="accent">Diversity wins over polish.</span>
365
+ Varied, messy outputs beat clean, template-locked ones.
366
+ </li>
367
+ </ul>
368
+ <aside class="notes">
369
+ ~30s. Quick recap of findings. Click through each point. These four bullets
370
+ are the core message of the talk. Transition: "Now let's talk about
371
+ the engineering challenge of actually doing this at scale."
372
+ </aside>
373
+ </section>
374
+
375
+ <!-- ============================================================ -->
376
+ <!-- SECTION 3: INFRASTRUCTURE (~20%, slides 13-16, ~4 min) -->
377
+ <!-- ============================================================ -->
378
+
379
+ <!-- SLIDE 13: Engineering Challenge -->
380
+ <section>
381
+ <p class="section-label">Infrastructure</p>
382
+ <h2>How Do You Rephrase 1T Tokens?</h2>
383
+ <div style="font-size:0.65em;margin-top:30px;">
384
+ <p>Each experiment generates ~15B tokens.</p>
385
+ <p class="fragment">70+ experiments = <span class="accent">1T+ tokens</span> of LLM output.</p>
386
+ <p class="fragment" style="margin-top:20px;">
387
+ At ~4,750 tokens/sec/GPU (mean across all experiments):
388
+ </p>
389
+ <div class="fragment stat-row" style="margin-top:20px;">
390
+ <div class="stat-box"><div class="num">~880</div><div class="label">GPU-hours per experiment</div></div>
391
+ <div class="stat-box"><div class="num">~$3k</div><div class="label">cloud cost per experiment</div></div>
392
+ <div class="stat-box"><div class="num">~$215k</div><div class="label">total compute budget</div></div>
393
+ </div>
394
+ <p class="fragment" style="margin-top:20px;color:#f0c674;font-weight:600;">
395
+ You need a scalable, fault-tolerant pipeline.
396
+ </p>
397
+ </div>
398
+ <aside class="notes">
399
+ ~1 min. "Now the engineering side. Each experiment is 15 billion tokens of LLM generation.
400
+ 70+ experiments. That's over a trillion tokens total. At $3.50/GPU-hour,
401
+ each experiment costs about $7,000. You need infrastructure that handles failures,
402
+ checkpoints, and scales across many nodes."
403
+ </aside>
404
+ </section>
405
+
406
+ <!-- SLIDE 14: DataTrove + vLLM -->
407
+ <section>
408
+ <p class="section-label">Infrastructure</p>
409
+ <h2>DataTrove + vLLM</h2>
410
+ <iframe src="charts/pipeline.html" class="chart-frame" loading="lazy" style="height:440px;margin-bottom:0;"></iframe>
411
+ <div style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-top:2px;">
412
+ DataTrove orchestrates the pipeline. vLLM serves the model with optimized batching and prefix caching.
413
+ </div>
414
+ <aside class="notes">
415
+ ~1 min. "We built on DataTrove, our open-source data processing library.
416
+ The pipeline is Read → Transform → Write. The Transform step calls vLLM,
417
+ a high-throughput inference engine with tensor parallelism, chunked prefill, and prefix caching.
418
+ Everything runs on Slurm with checkpointing and auto-recovery.
419
+ Outputs go straight to a Hugging Face dataset with auto-generated cards."
420
+ </aside>
421
+ </section>
422
+
423
+ <!-- SLIDE 15: Throughput Optimization -->
424
+ <section>
425
+ <p class="section-label">Infrastructure</p>
426
+ <h2>Throughput Optimization</h2>
427
+ <p style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-bottom:8px;">
428
+ 18 models benchmarked on H100 GPUs. Two tiers of optimization.
429
+ </p>
430
+ <iframe src="charts/throughput.html" class="chart-frame" loading="lazy" style="height:420px;"></iframe>
431
+ <aside class="notes">
432
+ ~1 min. "We benchmarked 18 models across two tiers of optimization.
433
+ Tier 0: tensor parallelism, batch sizes, sequence lengths. Tier 1: GPU memory utilization, speculative decoding.
434
+ For large MoE models like GPT-OSS-120B, Tier 0 alone gives 1.95x speedup, cutting cost by nearly half.
435
+ Speculative decoding helps small models but can hurt others (Gemma 3 regresses due to vocab size)."
436
+ </aside>
437
+ </section>
438
+
439
+ <!-- SLIDE 16: Cost-Performance -->
440
+ <section>
441
+ <p class="section-label">Infrastructure</p>
442
+ <h2>Cost vs. Performance</h2>
443
+ <p style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-bottom:8px;">
444
+ Small models + good prompts dominate the Pareto frontier.
445
+ </p>
446
+ <iframe src="charts/cost-efficiency.html" class="chart-frame" loading="lazy" style="height:420px;"></iframe>
447
+ <p class="fragment" style="font-size:0.6em;margin-top:4px;color:#f0c674;font-weight:600;">
448
+ Invest in prompt design, not model size.
449
+ </p>
450
+ <aside class="notes">
451
+ ~1 min. "This scatter plot shows GPU time vs downstream performance for all experiments.
452
+ The Pareto frontier is dominated by small models with structured prompts.
453
+ The baselines on the left have zero rephrasing cost. Our best synthetic setups
454
+ beat them while remaining cost-efficient. Key takeaway: optimize throughput first,
455
+ then worry about model size."
456
+ </aside>
457
+ </section>
458
+
459
+ <!-- ============================================================ -->
460
+ <!-- SECTION 4: CONCLUSIONS (~20%, slides 17-21, ~4 min) -->
461
+ <!-- ============================================================ -->
462
+
463
+ <!-- SLIDE 17: The FinePhrase Recipe -->
464
+ <section>
465
+ <p class="section-label">Conclusion</p>
466
+ <h2>The FinePhrase Recipe</h2>
467
+ <div class="recipe-diagram fragment">
468
+ <div class="box">
469
+ <div style="font-size:1.4em;">📄</div>
470
+ <div style="font-weight:700;">Source Data</div>
471
+ <div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Web text<br>(even low-quality)</div>
472
+ </div>
473
+ <div class="plus">+</div>
474
+ <div class="box">
475
+ <div style="font-size:1.4em;">📝</div>
476
+ <div style="font-weight:700;">Structured Prompt</div>
477
+ <div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Math / Table /<br>FAQ / Tutorial</div>
478
+ </div>
479
+ <div class="plus">+</div>
480
+ <div class="box">
481
+ <div style="font-size:1.4em;">🤖</div>
482
+ <div style="font-weight:700;">SmolLM2-1.7B</div>
483
+ <div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Small, fast,<br>diverse outputs</div>
484
+ </div>
485
+ <div class="equals">=</div>
486
+ <div class="box result">
487
+ <div style="font-size:1.4em;">✨</div>
488
+ <div style="font-weight:700;color:#7c6ff7;">FinePhrase</div>
489
+ <div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Best synthetic<br>pretraining data</div>
490
+ </div>
491
+ </div>
492
+ <p class="fragment" style="font-size:0.6em;color:rgba(255,255,255,0.5);margin-top:20px;">
493
+ Mixed with high-quality original data (e.g., FineWeb-Edu) for best results.
494
+ </p>
495
+ <aside class="notes">
496
+ ~1 min. "Here's the recipe in one slide. Take any web text, even low-quality,
497
+ apply a structured prompt (Math, Table, FAQ, Tutorial), run it through SmolLM2-1.7B,
498
+ and mix the output with high-quality original data. That's FinePhrase.
499
+ It outperforms all tested baselines."
500
+ </aside>
501
+ </section>
502
+
503
+ <!-- SLIDE 18: What Surprised Us -->
504
+ <section>
505
+ <p class="section-label">Conclusion</p>
506
+ <h2>What Surprised Us</h2>
507
+ <div class="surprise-grid fragment">
508
+ <div class="surprise-card">
509
+ <div class="icon">🤷</div>
510
+ <h4>Typos Don't Matter</h4>
511
+ <p>REWIRE's original prompt had typos. Fixing them made no measurable difference to downstream performance.</p>
512
+ </div>
513
+ <div class="surprise-card">
514
+ <div class="icon">📊</div>
515
+ <h4>Proxy Scores Lie</h4>
516
+ <p>Edu-score and DCLM-score do not reliably predict downstream performance. You must train and evaluate.</p>
517
+ </div>
518
+ <div class="surprise-card">
519
+ <div class="icon">🎲</div>
520
+ <h4>Messier Is Better</h4>
521
+ <p>Varied, inconsistent outputs from SmolLM2 beat Qwen3's polished, template-locked outputs every time.</p>
522
+ </div>
523
+ </div>
524
+ <aside class="notes">
525
+ ~1 min. "Three things that surprised us. First: typos in prompts don't matter.
526
+ REWIRE's prompt had actual typos and fixing them changed nothing.
527
+ Second: quality proxy scores like edu-score don't predict performance. You must train.
528
+ Third: messy, varied outputs consistently beat clean, polished ones. Diversity is king."
529
+ </aside>
530
+ </section>
531
+
532
+ <!-- SLIDE 19: Everything Is Open -->
533
+ <section>
534
+ <p class="section-label">Open Source</p>
535
+ <h2>Everything Is Open</h2>
536
+ <div style="font-size:0.65em;margin-top:20px;">
537
+ <ul>
538
+ <li class="fragment">All prompts, configs, and pipeline code</li>
539
+ <li class="fragment">Generated datasets on the Hugging Face Hub</li>
540
+ <li class="fragment">Throughput benchmarks for 18 models</li>
541
+ <li class="fragment">Blog post with interactive charts</li>
542
+ </ul>
543
+ <div class="fragment" style="margin-top:30px;">
544
+ <p style="font-weight:700;color:#f0c674;font-size:1.1em;">Future directions:</p>
545
+ <ul style="color:rgba(255,255,255,0.5);">
546
+ <li>Diffusion LMs for faster inference</li>
547
+ <li>Scaling to more data (ablations trained on only 21B tokens)</li>
548
+ <li>Mixing ratio: how little synthetic data can you get away with?</li>
549
+ <li>Best-of-N filtering on synthetic outputs</li>
550
+ </ul>
551
+ </div>
552
+ </div>
553
+ <aside class="notes">
554
+ ~1 min. "We're releasing everything. All prompts, the pipeline code in DataTrove,
555
+ the generated datasets on the Hub, throughput benchmarks.
556
+ The blog post itself has interactive charts you can explore.
557
+ Future work: we're looking at diffusion LMs for faster inference,
558
+ scaling beyond our 21B token ablations, exploring mixing ratios to find how little
559
+ synthetic data you actually need, and using best-of-N filtering on synthetic outputs."
560
+ </aside>
561
+ </section>
562
+
563
+ <!-- SLIDE 20: Academia Hub -->
564
+ <section>
565
+ <img src="assets/academia-hub.png" class="img-contain" style="max-height:560px;border-radius:12px;box-shadow:0 8px 40px rgba(0,0,0,0.4);">
566
+ <aside class="notes">
567
+ ~30s. "If you're at a university or research lab, check out our Academia Hub:
568
+ institution-wide access to the Hugging Face Hub with priority GPU access,
569
+ inference credits, storage, and enterprise admin."
570
+ </aside>
571
+ </section>
572
+
573
+ <!-- SLIDE 21: Q&A -->
574
+ <section class="center-slide">
575
+ <h2>Thank You</h2>
576
+ <p style="font-size:0.6em;color:rgba(255,255,255,0.5);margin-top:10px;">Questions?</p>
577
+ <div style="display:flex;align-items:center;justify-content:center;gap:28px;margin-top:30px;">
578
+ <img src="assets/profile.jpg" style="width:90px;height:90px;border-radius:50%;border:2px solid rgba(255,255,255,0.15);object-fit:cover;">
579
+ <div style="text-align:left;font-size:0.55em;">
580
+ <div style="font-weight:700;font-size:1.2em;margin-bottom:8px;">Joel Niklaus</div>
581
+ <div style="display:flex;align-items:center;gap:8px;margin-bottom:6px;">
582
+ <svg width="18" height="18" viewBox="0 0 24 24" fill="rgba(255,255,255,0.7)"><path d="M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85 3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433a2.062 2.062 0 01-2.063-2.065 2.064 2.064 0 112.063 2.065zm1.782 13.019H3.555V9h3.564v11.452zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0h.003z"/></svg>
583
+ <a href="https://linkedin.com/in/joelniklaus" target="_blank" style="color:rgba(255,255,255,0.7);text-decoration:none;">joelniklaus</a>
584
+ </div>
585
+ <div style="display:flex;align-items:center;gap:8px;">
586
+ <svg width="18" height="18" viewBox="0 0 24 24" fill="rgba(255,255,255,0.7)"><path d="M18.244 2.25h3.308l-7.227 8.26 8.502 11.24H16.17l-5.214-6.817L4.99 21.75H1.68l7.73-8.835L1.254 2.25H8.08l4.713 6.231zm-1.161 17.52h1.833L7.084 4.126H5.117z"/></svg>
587
+ <a href="https://x.com/joelniklaus" target="_blank" style="color:rgba(255,255,255,0.7);text-decoration:none;">@joelniklaus</a>
588
+ </div>
589
+ </div>
590
+ </div>
591
+ <p style="margin-top:24px;font-size:0.55em;color:rgba(255,255,255,0.4);">
592
+ Stay tuned for the blog post with many more details.
593
+ </p>
594
+ <aside class="notes">
595
+ Q&A time. Mention they can reach out on LinkedIn or X. Have the blog open in a browser tab
596
+ for live demos if questions come up.
597
+ </aside>
598
+ </section>
599
+
600
+ </div><!-- /slides -->
601
+ </div><!-- /reveal -->
602
+
603
+ <script src="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/dist/reveal.js"></script>
604
+ <script src="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/plugin/notes/notes.js"></script>
605
+ <script>
606
+ Reveal.initialize({
607
+ hash: true,
608
+ slideNumber: 'c/t',
609
+ showSlideNumber: 'speaker',
610
+ transition: 'fade',
611
+ transitionSpeed: 'fast',
612
+ center: false,
613
+ width: 1200,
614
+ height: 700,
615
+ margin: 0.06,
616
+ plugins: [RevealNotes],
617
+ });
618
+ </script>
619
+ </body>
620
+ </html>
app/presentation/se2026/standalone.html ADDED
The diff for this file is too large to render. See raw diff
 
app/presentation/se2026/style.css ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --text-color: rgba(255,255,255,0.88);
3
+ --muted-color: rgba(255,255,255,0.45);
4
+ --surface-bg: rgba(30,30,40,0.95);
5
+ --border-color: rgba(255,255,255,0.1);
6
+ --axis-color: rgba(255,255,255,0.15);
7
+ --tick-color: rgba(255,255,255,0.5);
8
+ --grid-color: rgba(255,255,255,0.06);
9
+ --primary-color: #7c6ff7;
10
+ --danger: #e05252;
11
+ }
12
+
13
+ html { data-theme: dark; }
14
+
15
+ .reveal {
16
+ font-family: 'Inter', system-ui, -apple-system, sans-serif;
17
+ }
18
+
19
+ .reveal h1, .reveal h2, .reveal h3 {
20
+ font-weight: 700;
21
+ text-transform: none;
22
+ letter-spacing: -0.02em;
23
+ }
24
+
25
+ .reveal .slides section {
26
+ top: 0 !important;
27
+ padding-top: 5px;
28
+ }
29
+
30
+ .reveal .slides section.center-slide {
31
+ top: auto !important;
32
+ display: flex !important;
33
+ flex-direction: column;
34
+ justify-content: center;
35
+ height: 100%;
36
+ padding-top: 0;
37
+ }
38
+
39
+ .reveal h2 {
40
+ font-size: 1.6em;
41
+ margin-top: 0;
42
+ margin-bottom: 0.3em;
43
+ }
44
+
45
+ .reveal h3 {
46
+ font-size: 1.15em;
47
+ color: rgba(255,255,255,0.6);
48
+ font-weight: 500;
49
+ margin-bottom: 0.4em;
50
+ }
51
+
52
+ .reveal .subtitle {
53
+ font-size: 0.55em;
54
+ font-weight: 400;
55
+ color: rgba(255,255,255,0.5);
56
+ margin-top: 0.3em;
57
+ }
58
+
59
+ .reveal .section-label {
60
+ font-size: 0.5em;
61
+ font-weight: 700;
62
+ text-transform: uppercase;
63
+ letter-spacing: 0.15em;
64
+ color: #7c6ff7;
65
+ margin-bottom: 0.1em;
66
+ }
67
+
68
+ .reveal ul {
69
+ list-style: none;
70
+ padding-left: 0;
71
+ font-size: 0.75em;
72
+ }
73
+
74
+ .reveal ul li {
75
+ margin-bottom: 0.5em;
76
+ padding-left: 1.2em;
77
+ position: relative;
78
+ }
79
+
80
+ .reveal ul li::before {
81
+ content: '→';
82
+ position: absolute;
83
+ left: 0;
84
+ color: #7c6ff7;
85
+ }
86
+
87
+ .reveal .accent {
88
+ color: #7c6ff7;
89
+ }
90
+
91
+ .reveal .highlight {
92
+ color: #f0c674;
93
+ }
94
+
95
+ .reveal .danger {
96
+ color: #e05252;
97
+ }
98
+
99
+ .reveal .big-number {
100
+ font-size: 3em;
101
+ font-weight: 800;
102
+ line-height: 1.1;
103
+ color: #7c6ff7;
104
+ }
105
+
106
+ .reveal .big-number .unit {
107
+ font-size: 0.35em;
108
+ font-weight: 500;
109
+ color: rgba(255,255,255,0.5);
110
+ }
111
+
112
+ .stat-row {
113
+ display: flex;
114
+ justify-content: center;
115
+ gap: 60px;
116
+ margin: 30px 0;
117
+ }
118
+
119
+ .stat-box {
120
+ text-align: center;
121
+ }
122
+
123
+ .stat-box .num {
124
+ font-size: 2.2em;
125
+ font-weight: 800;
126
+ color: #7c6ff7;
127
+ line-height: 1.1;
128
+ }
129
+
130
+ .stat-box .label {
131
+ font-size: 0.55em;
132
+ color: rgba(255,255,255,0.5);
133
+ margin-top: 4px;
134
+ }
135
+
136
+ .two-col {
137
+ display: grid;
138
+ grid-template-columns: 1fr 1fr;
139
+ gap: 40px;
140
+ text-align: left;
141
+ align-items: start;
142
+ }
143
+
144
+ .two-col .col {
145
+ padding: 0;
146
+ }
147
+
148
+ .before-after {
149
+ display: grid;
150
+ grid-template-columns: 1fr auto 1fr;
151
+ gap: 16px;
152
+ align-items: start;
153
+ font-size: 0.52em;
154
+ text-align: left;
155
+ }
156
+
157
+ .before-after .arrow {
158
+ font-size: 2em;
159
+ color: #7c6ff7;
160
+ align-self: center;
161
+ padding-top: 20px;
162
+ }
163
+
164
+ .before-after .panel {
165
+ background: rgba(255,255,255,0.04);
166
+ border: 1px solid rgba(255,255,255,0.08);
167
+ border-radius: 12px;
168
+ padding: 18px 20px;
169
+ line-height: 1.5;
170
+ }
171
+
172
+ .before-after .panel.bad {
173
+ border-color: rgba(224,82,82,0.3);
174
+ }
175
+
176
+ .before-after .panel.good {
177
+ border-color: rgba(124,111,247,0.3);
178
+ }
179
+
180
+ .before-after .panel-title {
181
+ font-weight: 700;
182
+ font-size: 1.1em;
183
+ margin-bottom: 8px;
184
+ }
185
+
186
+ .before-after .panel.bad .panel-title {
187
+ color: #e05252;
188
+ }
189
+
190
+ .before-after .panel.good .panel-title {
191
+ color: #7c6ff7;
192
+ }
193
+
194
+ .chart-frame {
195
+ width: 100%;
196
+ height: 480px;
197
+ border: none;
198
+ border-radius: 8px;
199
+ background: transparent;
200
+ }
201
+
202
+ .chart-frame.tall {
203
+ height: 540px;
204
+ }
205
+
206
+ .chart-frame.short {
207
+ height: 400px;
208
+ }
209
+
210
+ .img-contain {
211
+ max-width: 100%;
212
+ max-height: 480px;
213
+ border-radius: 8px;
214
+ }
215
+
216
+ .recipe-diagram {
217
+ display: flex;
218
+ align-items: center;
219
+ justify-content: center;
220
+ gap: 20px;
221
+ font-size: 0.75em;
222
+ margin: 30px 0;
223
+ }
224
+
225
+ .recipe-diagram .box {
226
+ background: rgba(255,255,255,0.06);
227
+ border: 1px solid rgba(255,255,255,0.12);
228
+ border-radius: 12px;
229
+ padding: 16px 22px;
230
+ text-align: center;
231
+ min-width: 120px;
232
+ }
233
+
234
+ .recipe-diagram .box.result {
235
+ border-color: rgba(124,111,247,0.4);
236
+ background: rgba(124,111,247,0.08);
237
+ }
238
+
239
+ .recipe-diagram .plus {
240
+ font-size: 1.8em;
241
+ color: rgba(255,255,255,0.3);
242
+ }
243
+
244
+ .recipe-diagram .equals {
245
+ font-size: 1.8em;
246
+ color: #7c6ff7;
247
+ }
248
+
249
+ .takeaway-list {
250
+ font-size: 0.65em;
251
+ max-width: 700px;
252
+ margin: 0 auto;
253
+ }
254
+
255
+ .takeaway-list li {
256
+ margin-bottom: 0.7em;
257
+ line-height: 1.5;
258
+ }
259
+
260
+ .qr-section {
261
+ display: flex;
262
+ align-items: center;
263
+ justify-content: center;
264
+ gap: 40px;
265
+ margin-top: 20px;
266
+ }
267
+
268
+ .surprise-grid {
269
+ display: grid;
270
+ grid-template-columns: 1fr 1fr 1fr;
271
+ gap: 20px;
272
+ font-size: 0.6em;
273
+ text-align: left;
274
+ }
275
+
276
+ .surprise-card {
277
+ background: rgba(255,255,255,0.04);
278
+ border: 1px solid rgba(255,255,255,0.08);
279
+ border-radius: 12px;
280
+ padding: 20px;
281
+ }
282
+
283
+ .surprise-card .icon {
284
+ font-size: 1.6em;
285
+ margin-bottom: 8px;
286
+ }
287
+
288
+ .surprise-card h4 {
289
+ font-size: 1.05em;
290
+ margin: 0 0 8px 0;
291
+ color: #f0c674;
292
+ }
293
+
294
+ .surprise-card p {
295
+ margin: 0;
296
+ color: rgba(255,255,255,0.6);
297
+ line-height: 1.5;
298
+ }