Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
1da160c
1
Parent(s): e575fa7
made presentation together with script to convert to standalone file
Browse files- app/presentation/se2026/assets/academia-hub.png +3 -0
- app/presentation/se2026/assets/bern-skyline.png +3 -0
- app/presentation/se2026/assets/dclm-filtering-pipeline.png +3 -0
- app/presentation/se2026/assets/profile.jpg +3 -0
- app/presentation/se2026/build-standalone.mjs +142 -0
- app/presentation/se2026/charts/benchmark-family.html +837 -0
- app/presentation/se2026/charts/benchmark-prompts.html +837 -0
- app/presentation/se2026/charts/benchmark.html +837 -0
- app/presentation/se2026/charts/cost-efficiency.html +480 -0
- app/presentation/se2026/charts/experiment-flow.html +396 -0
- app/presentation/se2026/charts/pipeline.html +363 -0
- app/presentation/se2026/charts/throughput.html +466 -0
- app/presentation/se2026/data/benchmark-results.csv +3 -0
- app/presentation/se2026/data/rephrasing_metadata.json +3 -0
- app/presentation/se2026/index.html +620 -0
- app/presentation/se2026/standalone.html +0 -0
- app/presentation/se2026/style.css +298 -0
app/presentation/se2026/assets/academia-hub.png
ADDED
|
Git LFS Details
|
app/presentation/se2026/assets/bern-skyline.png
ADDED
|
Git LFS Details
|
app/presentation/se2026/assets/dclm-filtering-pipeline.png
ADDED
|
Git LFS Details
|
app/presentation/se2026/assets/profile.jpg
ADDED
|
Git LFS Details
|
app/presentation/se2026/build-standalone.mjs
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Build helper for the SE2026 one-file presentation.
|
| 3 |
+
*
|
| 4 |
+
* What this script does:
|
| 5 |
+
* - Reads `index.html` as the base reveal deck
|
| 6 |
+
* - Inlines local CSS (`style.css`)
|
| 7 |
+
* - Inlines local image assets as data URIs
|
| 8 |
+
* - Rewrites chart iframe sources to embedded `data:` HTML documents
|
| 9 |
+
* - Injects chart data (`data/benchmark-results.csv`, `data/rephrasing_metadata.json`)
|
| 10 |
+
* directly into chart pages so they no longer fetch local files
|
| 11 |
+
* - Writes the final output to `standalone.html`
|
| 12 |
+
*
|
| 13 |
+
* How to run:
|
| 14 |
+
* - From this folder:
|
| 15 |
+
* `node build-standalone.mjs`
|
| 16 |
+
* - Or from repo root:
|
| 17 |
+
* `node app/presentation/se2026/build-standalone.mjs`
|
| 18 |
+
*/
|
| 19 |
+
import { readFileSync, writeFileSync } from 'fs';
|
| 20 |
+
import { dirname, join } from 'path';
|
| 21 |
+
import { fileURLToPath } from 'url';
|
| 22 |
+
|
| 23 |
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
| 24 |
+
|
| 25 |
+
const readText = (relativePath) => readFileSync(join(__dirname, relativePath), 'utf8');
|
| 26 |
+
const readBinary = (relativePath) => readFileSync(join(__dirname, relativePath));
|
| 27 |
+
|
| 28 |
+
const toDataUri = (relativePath, mimeType) =>
|
| 29 |
+
`data:${mimeType};base64,${readBinary(relativePath).toString('base64')}`;
|
| 30 |
+
|
| 31 |
+
const css = readText('style.css');
|
| 32 |
+
const csvData = readText('data/benchmark-results.csv');
|
| 33 |
+
const jsonData = readText('data/rephrasing_metadata.json');
|
| 34 |
+
|
| 35 |
+
const chartIds = [
|
| 36 |
+
'experiment-flow',
|
| 37 |
+
'benchmark',
|
| 38 |
+
'benchmark-prompts',
|
| 39 |
+
'benchmark-family',
|
| 40 |
+
'throughput',
|
| 41 |
+
'cost-efficiency',
|
| 42 |
+
'pipeline',
|
| 43 |
+
];
|
| 44 |
+
|
| 45 |
+
const chartDataNeeds = {
|
| 46 |
+
'experiment-flow': { csv: false, json: true },
|
| 47 |
+
benchmark: { csv: true, json: false },
|
| 48 |
+
'benchmark-prompts': { csv: true, json: false },
|
| 49 |
+
'benchmark-family': { csv: true, json: false },
|
| 50 |
+
throughput: { csv: false, json: false },
|
| 51 |
+
'cost-efficiency': { csv: true, json: true },
|
| 52 |
+
pipeline: { csv: false, json: false },
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
const imageDataUris = {
|
| 56 |
+
'assets/bern-skyline.png': toDataUri('assets/bern-skyline.png', 'image/png'),
|
| 57 |
+
'assets/dclm-filtering-pipeline.png': toDataUri('assets/dclm-filtering-pipeline.png', 'image/png'),
|
| 58 |
+
'assets/profile.jpg': toDataUri('assets/profile.jpg', 'image/jpeg'),
|
| 59 |
+
'assets/academia-hub.png': toDataUri('assets/academia-hub.png', 'image/png'),
|
| 60 |
+
};
|
| 61 |
+
|
| 62 |
+
function injectInlineData(chartHtml, needs) {
|
| 63 |
+
const snippets = [];
|
| 64 |
+
|
| 65 |
+
if (needs.csv) {
|
| 66 |
+
snippets.push(`window.__INLINE_CSV_DATA__ = ${JSON.stringify(csvData)};`);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
if (needs.json) {
|
| 70 |
+
// Avoid accidental closing of the script tag if present in JSON payload.
|
| 71 |
+
const safeJson = jsonData.replace(/<\/script/gi, '<\\/script');
|
| 72 |
+
snippets.push(`window.__INLINE_JSON_DATA__ = ${safeJson};`);
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
if (snippets.length === 0) {
|
| 76 |
+
return chartHtml;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
const inlineScript = `<script>\n${snippets.join('\n')}\n</script>`;
|
| 80 |
+
if (!chartHtml.includes('</head>')) {
|
| 81 |
+
return `${inlineScript}\n${chartHtml}`;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
return chartHtml.replace('</head>', `${inlineScript}\n</head>`);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
function patchChartFetches(chartHtml) {
|
| 88 |
+
let output = chartHtml;
|
| 89 |
+
|
| 90 |
+
// Benchmark chart family: read CSV from inline payload.
|
| 91 |
+
output = output.replace(
|
| 92 |
+
/const text = await fetchFirstAvailable\(csvPaths\);/g,
|
| 93 |
+
'const text = window.__INLINE_CSV_DATA__;'
|
| 94 |
+
);
|
| 95 |
+
|
| 96 |
+
// Experiment flow chart: read JSON from inline payload.
|
| 97 |
+
output = output.replace(
|
| 98 |
+
/fetchFirst\(dataPaths\)\.then\(data => buildChart\(data\)\)/g,
|
| 99 |
+
'Promise.resolve(window.__INLINE_JSON_DATA__).then(data => buildChart(data))'
|
| 100 |
+
);
|
| 101 |
+
|
| 102 |
+
// Cost efficiency chart: read both JSON and CSV from inline payloads.
|
| 103 |
+
output = output.replace(
|
| 104 |
+
/Promise\.all\(\[\s*fetchFirst\(dataPaths\),\s*fetchFirst\(csvPaths, d3\.csvParse\)\s*\]\)\.then\(\(\[data, csvRows\]\) => buildChart\(data, csvRows\)\)/g,
|
| 105 |
+
'Promise.resolve([window.__INLINE_JSON_DATA__, d3.csvParse(window.__INLINE_CSV_DATA__)]).then(([data, csvRows]) => buildChart(data, csvRows))'
|
| 106 |
+
);
|
| 107 |
+
|
| 108 |
+
return output;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
function toDataUrl(html) {
|
| 112 |
+
return `data:text/html;base64,${Buffer.from(html, 'utf8').toString('base64')}`;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
const chartDataUrls = {};
|
| 116 |
+
for (const chartId of chartIds) {
|
| 117 |
+
let chartHtml = readText(`charts/${chartId}.html`);
|
| 118 |
+
chartHtml = injectInlineData(chartHtml, chartDataNeeds[chartId]);
|
| 119 |
+
chartHtml = patchChartFetches(chartHtml);
|
| 120 |
+
chartDataUrls[chartId] = toDataUrl(chartHtml);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
let output = readText('index.html');
|
| 124 |
+
|
| 125 |
+
output = output.replace(
|
| 126 |
+
'<link rel="stylesheet" href="style.css">',
|
| 127 |
+
`<style>\n${css}\n</style>`
|
| 128 |
+
);
|
| 129 |
+
|
| 130 |
+
for (const [path, dataUri] of Object.entries(imageDataUris)) {
|
| 131 |
+
output = output.replace(new RegExp(`src="${path.replace('.', '\\.')}"`, 'g'), `src="${dataUri}"`);
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
for (const chartId of chartIds) {
|
| 135 |
+
const srcPattern = new RegExp(`src="charts/${chartId}\\.html"`, 'g');
|
| 136 |
+
output = output.replace(srcPattern, `src="${chartDataUrls[chartId]}"`);
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
writeFileSync(join(__dirname, 'standalone.html'), output, 'utf8');
|
| 140 |
+
|
| 141 |
+
console.log('Built standalone.html');
|
| 142 |
+
console.log('Size:', (Buffer.byteLength(output) / 1024 / 1024).toFixed(2), 'MB');
|
app/presentation/se2026/charts/benchmark-family.html
ADDED
|
@@ -0,0 +1,837 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Benchmark Comparison</title>
|
| 7 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--text-color: rgba(255,255,255,0.88);
|
| 11 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 12 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 13 |
+
--border-color: rgba(255,255,255,0.1);
|
| 14 |
+
--axis-color: rgba(255,255,255,0.15);
|
| 15 |
+
--tick-color: rgba(255,255,255,0.5);
|
| 16 |
+
--grid-color: rgba(255,255,255,0.06);
|
| 17 |
+
--primary-color: #7c6ff7;
|
| 18 |
+
}
|
| 19 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 20 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
|
| 21 |
+
</style>
|
| 22 |
+
</head>
|
| 23 |
+
<body>
|
| 24 |
+
<div class="d3-benchmark-comparison" data-config='{"defaultSetup":"average","setups":{"Article":{"datasets":{"mix-fw_edu_hq-article_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-article_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-article_granite3_1b_hq":"Granite3","mix-fw_edu_hq-article_1b_hq":"Gemma-3","mix-fw_edu_hq-article_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-article_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Discussion":{"datasets":{"mix-fw_edu_hq-discussion_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-discussion_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-discussion_granite3_1b_hq":"Granite3","mix-fw_edu_hq-discussion_1b_hq":"Gemma-3","mix-fw_edu_hq-discussion_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-discussion_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Tutorial":{"datasets":{"mix-fw_edu_hq-tutorial_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-tutorial_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-tutorial_qwen3_1.7b_hq":"Qwen3","mix-fw_edu_hq-tutorial_1b_hq":"Gemma-3","mix-fw_edu_hq-tutorial_granite3_1b_hq":"Granite3","mix-fw_edu_hq-tutorial_llama3.2_1b_hq":"Llama-3.2","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"FAQ":{"datasets":{"mix-fw_edu_hq-faq_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-faq_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-faq_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-faq_1b_hq":"Gemma-3","mix-fw_edu_hq-faq_granite3_1b_hq":"Granite3","mix-fw_edu_hq-faq_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Table":{"datasets":{"mix-fw_edu_hq-table_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-table_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-table_granite3_1b_hq":"Granite3","mix-fw_edu_hq-table_qwen3_1.7b_hq":"Qwen3","mix-fw_edu_hq-table_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-table_1b_hq":"Gemma-3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}},"Math":{"datasets":{"mix-fw_edu_hq-math_smollm2_1.7b_hq":"SmolLM2","mix-fw_edu_hq-math_falcon3_1b_hq":"Falcon3","mix-fw_edu_hq-math_granite3_1b_hq":"Granite3","mix-fw_edu_hq-math_1b_hq":"Gemma-3","mix-fw_edu_hq-math_llama3.2_1b_hq":"Llama-3.2","mix-fw_edu_hq-math_qwen3_1.7b_hq":"Qwen3","dclm":{"display":"DCLM","color":"#8b8b8b","baseline":true}}}}}' data-datafiles="../data/benchmark-results.csv"></div>
|
| 25 |
+
<style>
|
| 26 |
+
.d3-benchmark-comparison { position: relative; }
|
| 27 |
+
.d3-benchmark-comparison .controls {
|
| 28 |
+
display: flex;
|
| 29 |
+
gap: 16px;
|
| 30 |
+
align-items: flex-end;
|
| 31 |
+
justify-content: center;
|
| 32 |
+
margin: 10px 0 0 0;
|
| 33 |
+
}
|
| 34 |
+
.d3-benchmark-comparison .controls .control-group {
|
| 35 |
+
display: flex;
|
| 36 |
+
flex-direction: column;
|
| 37 |
+
align-items: flex-start;
|
| 38 |
+
gap: 6px;
|
| 39 |
+
}
|
| 40 |
+
.d3-benchmark-comparison .controls label {
|
| 41 |
+
font-size: 18px;
|
| 42 |
+
font-weight: 700;
|
| 43 |
+
color: var(--text-color);
|
| 44 |
+
}
|
| 45 |
+
.d3-benchmark-comparison .controls select {
|
| 46 |
+
appearance: none;
|
| 47 |
+
-webkit-appearance: none;
|
| 48 |
+
-moz-appearance: none;
|
| 49 |
+
border: 1px solid var(--border-color);
|
| 50 |
+
border-radius: 8px;
|
| 51 |
+
padding: 6px 28px 6px 10px;
|
| 52 |
+
background-color: var(--surface-bg);
|
| 53 |
+
color: var(--text-color);
|
| 54 |
+
font-size: 18px;
|
| 55 |
+
line-height: 1.2;
|
| 56 |
+
background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
|
| 57 |
+
background-repeat: no-repeat;
|
| 58 |
+
background-position: right 8px center;
|
| 59 |
+
}
|
| 60 |
+
.d3-benchmark-comparison .controls select:focus-visible {
|
| 61 |
+
outline: 2px solid var(--primary-color);
|
| 62 |
+
outline-offset: 2px;
|
| 63 |
+
}
|
| 64 |
+
.d3-benchmark-comparison .legend {
|
| 65 |
+
display: flex;
|
| 66 |
+
flex-direction: column;
|
| 67 |
+
align-items: flex-start;
|
| 68 |
+
gap: 6px;
|
| 69 |
+
margin: 8px 0 0 0;
|
| 70 |
+
padding-bottom: 4px;
|
| 71 |
+
}
|
| 72 |
+
.d3-benchmark-comparison .legend .legend-title {
|
| 73 |
+
font-size: 18px;
|
| 74 |
+
font-weight: 700;
|
| 75 |
+
color: var(--text-color);
|
| 76 |
+
}
|
| 77 |
+
.d3-benchmark-comparison .legend .items {
|
| 78 |
+
display: flex;
|
| 79 |
+
flex-wrap: wrap;
|
| 80 |
+
gap: 8px 14px;
|
| 81 |
+
}
|
| 82 |
+
.d3-benchmark-comparison .legend .item {
|
| 83 |
+
display: inline-flex;
|
| 84 |
+
align-items: center;
|
| 85 |
+
gap: 6px;
|
| 86 |
+
white-space: nowrap;
|
| 87 |
+
font-size: 18px;
|
| 88 |
+
color: var(--text-color);
|
| 89 |
+
cursor: pointer;
|
| 90 |
+
}
|
| 91 |
+
.d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
|
| 92 |
+
.d3-benchmark-comparison .legend .swatch {
|
| 93 |
+
width: 14px;
|
| 94 |
+
height: 14px;
|
| 95 |
+
border-radius: 3px;
|
| 96 |
+
border: 1px solid var(--border-color);
|
| 97 |
+
}
|
| 98 |
+
.d3-benchmark-comparison .bar.ghost { opacity: .25; }
|
| 99 |
+
.d3-benchmark-comparison .value-label.ghost { opacity: .25; }
|
| 100 |
+
.d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
|
| 101 |
+
.d3-benchmark-comparison .line-path.ghost { opacity: .15; }
|
| 102 |
+
.d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
|
| 103 |
+
.d3-benchmark-comparison .baseline.ghost { opacity: .1; }
|
| 104 |
+
.d3-benchmark-comparison .axes path { display: none; }
|
| 105 |
+
.d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
|
| 106 |
+
.d3-benchmark-comparison .axes text { fill: var(--tick-color); }
|
| 107 |
+
.d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
|
| 108 |
+
.d3-benchmark-comparison .hover-line {
|
| 109 |
+
stroke: var(--text-color);
|
| 110 |
+
stroke-opacity: 0.25;
|
| 111 |
+
stroke-width: 1;
|
| 112 |
+
pointer-events: none;
|
| 113 |
+
}
|
| 114 |
+
.d3-benchmark-comparison .d3-tooltip {
|
| 115 |
+
position: absolute;
|
| 116 |
+
top: 0px;
|
| 117 |
+
left: 0px;
|
| 118 |
+
transform: translate(-9999px, -9999px);
|
| 119 |
+
pointer-events: none;
|
| 120 |
+
padding: 8px 10px;
|
| 121 |
+
border-radius: 8px;
|
| 122 |
+
font-size: 18px;
|
| 123 |
+
line-height: 1.35;
|
| 124 |
+
border: 1px solid var(--border-color);
|
| 125 |
+
background: var(--surface-bg);
|
| 126 |
+
color: var(--text-color);
|
| 127 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 128 |
+
opacity: 0;
|
| 129 |
+
transition: opacity .12s ease;
|
| 130 |
+
text-align: left;
|
| 131 |
+
z-index: 10;
|
| 132 |
+
}
|
| 133 |
+
.d3-benchmark-comparison .d3-tooltip .tip-dot {
|
| 134 |
+
display: inline-block;
|
| 135 |
+
width: 10px;
|
| 136 |
+
height: 10px;
|
| 137 |
+
border-radius: 3px;
|
| 138 |
+
border: 1px solid var(--border-color);
|
| 139 |
+
margin-right: 6px;
|
| 140 |
+
vertical-align: middle;
|
| 141 |
+
}
|
| 142 |
+
</style>
|
| 143 |
+
<script>
|
| 144 |
+
(() => {
|
| 145 |
+
const ensureD3 = (cb) => {
|
| 146 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 147 |
+
let s = document.getElementById('d3-cdn-script');
|
| 148 |
+
if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
|
| 149 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 150 |
+
s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady();
|
| 151 |
+
};
|
| 152 |
+
|
| 153 |
+
const bootstrap = () => {
|
| 154 |
+
const scriptEl = document.currentScript;
|
| 155 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 156 |
+
if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
|
| 157 |
+
const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 158 |
+
container = cs[cs.length - 1] || null;
|
| 159 |
+
}
|
| 160 |
+
if (!container) return;
|
| 161 |
+
if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
|
| 162 |
+
|
| 163 |
+
container.style.position = container.style.position || 'relative';
|
| 164 |
+
|
| 165 |
+
// ─── READ CONFIG ───
|
| 166 |
+
let mountEl = container;
|
| 167 |
+
while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
|
| 168 |
+
let cfg = {};
|
| 169 |
+
try {
|
| 170 |
+
const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
|
| 171 |
+
if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
|
| 172 |
+
} catch (_) {}
|
| 173 |
+
|
| 174 |
+
// ─── NORMALIZE DATASETS CONFIG ───
|
| 175 |
+
// Accepts: { "key": "Name" } or { "key": { display, color, shaded, baseline } }
|
| 176 |
+
// Returns: { key: { display, color, shaded, baseline } }
|
| 177 |
+
function normalizeDatasets(raw) {
|
| 178 |
+
const out = {};
|
| 179 |
+
for (const [k, v] of Object.entries(raw || {})) {
|
| 180 |
+
out[k] = typeof v === 'string' ? { display: v } : { ...v };
|
| 181 |
+
}
|
| 182 |
+
return out;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// ─── SETUP SUPPORT ───
|
| 186 |
+
const SETUPS = cfg.setups || null;
|
| 187 |
+
const setupNames = SETUPS ? Object.keys(SETUPS) : [];
|
| 188 |
+
const AVG_SETUP_KEY = 'Average (all setups)';
|
| 189 |
+
const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
|
| 190 |
+
let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
|
| 191 |
+
let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
|
| 192 |
+
let avgDatasets = {};
|
| 193 |
+
let parsedData = [];
|
| 194 |
+
|
| 195 |
+
const RUN_COL = cfg.runColumn || 'runname';
|
| 196 |
+
const STEP_COL = cfg.stepColumn || 'steps';
|
| 197 |
+
const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
|
| 198 |
+
const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
|
| 199 |
+
const defaultView = cfg.defaultView || 'bar';
|
| 200 |
+
const uid = Math.random().toString(36).slice(2, 8);
|
| 201 |
+
|
| 202 |
+
// ─── DATASET ACCESSORS ───
|
| 203 |
+
function displayName(raw) { return DATASETS[raw] ? DATASETS[raw].display : raw; }
|
| 204 |
+
function isBaseline(raw) { return !!(DATASETS[raw] && DATASETS[raw].baseline); }
|
| 205 |
+
function isShaded(raw) { return !!(DATASETS[raw] && DATASETS[raw].shaded); }
|
| 206 |
+
function pinnedColor(raw) { return DATASETS[raw] && DATASETS[raw].color; }
|
| 207 |
+
function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); }
|
| 208 |
+
|
| 209 |
+
const METRIC_NAMES = {
|
| 210 |
+
'agg_score_macro': 'Aggregate Score (Macro)',
|
| 211 |
+
'agg_score_micro': 'Aggregate Score (Micro)',
|
| 212 |
+
'agg_score_RC': 'Reading Comprehension',
|
| 213 |
+
'agg_score_GK': 'General Knowledge',
|
| 214 |
+
'agg_score_NLU': 'Natural Language Understanding',
|
| 215 |
+
'agg_score_MATH': 'Math',
|
| 216 |
+
'agg_score_TABLE': 'Table Understanding',
|
| 217 |
+
'agg_score_RES': 'Reasoning',
|
| 218 |
+
'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy',
|
| 219 |
+
'lighteval|drop|3/prob_norm_token': 'DROP',
|
| 220 |
+
'lighteval|gsm8k|3/prob_norm_token': 'GSM8K',
|
| 221 |
+
'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag',
|
| 222 |
+
'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA',
|
| 223 |
+
'lighteval|piqa_cf|3/prob_norm_token': 'PIQA',
|
| 224 |
+
'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2',
|
| 225 |
+
'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA',
|
| 226 |
+
'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions',
|
| 227 |
+
'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande',
|
| 228 |
+
'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA',
|
| 229 |
+
'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
|
| 230 |
+
};
|
| 231 |
+
|
| 232 |
+
// Tooltip
|
| 233 |
+
let tip = container.querySelector('.d3-tooltip'), tipInner;
|
| 234 |
+
if (!tip) {
|
| 235 |
+
tip = document.createElement('div'); tip.className = 'd3-tooltip';
|
| 236 |
+
tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
|
| 237 |
+
container.appendChild(tip);
|
| 238 |
+
} else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
|
| 239 |
+
|
| 240 |
+
// SVG
|
| 241 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 242 |
+
const gRoot = svg.append('g');
|
| 243 |
+
const defs = svg.append('defs');
|
| 244 |
+
|
| 245 |
+
// State
|
| 246 |
+
let allData = [];
|
| 247 |
+
let metricKeys = [];
|
| 248 |
+
let currentMetric = defaultMetric;
|
| 249 |
+
let currentView = defaultView;
|
| 250 |
+
let colorMap = {};
|
| 251 |
+
let highlight = null;
|
| 252 |
+
|
| 253 |
+
// ─── HELPERS ───
|
| 254 |
+
function metricName(key) { return METRIC_NAMES[key] || key; }
|
| 255 |
+
|
| 256 |
+
function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
|
| 257 |
+
function formatTokens(tokens) {
|
| 258 |
+
if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
|
| 259 |
+
if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M';
|
| 260 |
+
return d3.format(',')(tokens);
|
| 261 |
+
}
|
| 262 |
+
function formatStep(step) {
|
| 263 |
+
if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
|
| 264 |
+
return String(step);
|
| 265 |
+
}
|
| 266 |
+
function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
|
| 267 |
+
function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
|
| 268 |
+
|
| 269 |
+
function getCategoricalColors(n) {
|
| 270 |
+
try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
|
| 271 |
+
return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
function initColors() {
|
| 275 |
+
if (Object.keys(colorMap).length) return;
|
| 276 |
+
const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
|
| 277 |
+
const unpinned = [];
|
| 278 |
+
allRaw.forEach(raw => {
|
| 279 |
+
const pc = pinnedColor(raw);
|
| 280 |
+
if (pc) { colorMap[raw] = pc; }
|
| 281 |
+
else { unpinned.push(raw); }
|
| 282 |
+
});
|
| 283 |
+
const palette = getCategoricalColors(unpinned.length);
|
| 284 |
+
unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
// ─── SETUP HELPERS ───
|
| 288 |
+
function filterData() {
|
| 289 |
+
const knownNames = Object.keys(DATASETS);
|
| 290 |
+
allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
|
| 291 |
+
allData.columns = parsedData.columns;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
function computeAverageData(rawData) {
|
| 295 |
+
if (!SETUPS || setupNames.length < 2) return { data: [], datasets: {} };
|
| 296 |
+
const displayToRaws = {};
|
| 297 |
+
for (const sName of setupNames) {
|
| 298 |
+
const ds = normalizeDatasets(SETUPS[sName].datasets);
|
| 299 |
+
for (const [raw, opts] of Object.entries(ds)) {
|
| 300 |
+
if (!displayToRaws[opts.display]) displayToRaws[opts.display] = [];
|
| 301 |
+
displayToRaws[opts.display].push(raw);
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
const fullDisplay = Object.entries(displayToRaws)
|
| 305 |
+
.filter(([, raws]) => raws.length >= setupNames.length);
|
| 306 |
+
const byRunStep = {};
|
| 307 |
+
for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
|
| 308 |
+
const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 309 |
+
const cols = rawData.columns || Object.keys(rawData[0] || {});
|
| 310 |
+
const result = [];
|
| 311 |
+
const dsMap = {};
|
| 312 |
+
for (const [display, raws] of fullDisplay) {
|
| 313 |
+
const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
|
| 314 |
+
// Merge options from first setup that has this display name
|
| 315 |
+
const firstOpts = Object.values(normalizeDatasets(SETUPS[setupNames[0]].datasets)).find(o => o.display === display) || {};
|
| 316 |
+
dsMap[avgRaw] = { display, ...firstOpts };
|
| 317 |
+
for (const step of steps) {
|
| 318 |
+
const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
|
| 319 |
+
if (!rows.length) continue;
|
| 320 |
+
const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
|
| 321 |
+
for (const col of cols) {
|
| 322 |
+
if (col === RUN_COL || col === STEP_COL) continue;
|
| 323 |
+
const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
|
| 324 |
+
avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
|
| 325 |
+
}
|
| 326 |
+
result.push(avgRow);
|
| 327 |
+
}
|
| 328 |
+
}
|
| 329 |
+
return { data: result, datasets: dsMap };
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
function switchSetup(name) {
|
| 333 |
+
currentSetup = name;
|
| 334 |
+
if (name === AVG_SETUP_KEY) {
|
| 335 |
+
DATASETS = { ...avgDatasets };
|
| 336 |
+
} else {
|
| 337 |
+
DATASETS = normalizeDatasets(SETUPS[name].datasets);
|
| 338 |
+
}
|
| 339 |
+
// Re-add baselines from any setup
|
| 340 |
+
for (const sName of setupNames) {
|
| 341 |
+
const ds = normalizeDatasets(SETUPS[sName].datasets);
|
| 342 |
+
for (const [raw, opts] of Object.entries(ds)) {
|
| 343 |
+
if (opts.baseline && !DATASETS[raw] && parsedData.some(r => r[RUN_COL] === raw)) {
|
| 344 |
+
DATASETS[raw] = { ...opts };
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
}
|
| 348 |
+
colorMap = {};
|
| 349 |
+
filterData();
|
| 350 |
+
initColors();
|
| 351 |
+
render();
|
| 352 |
+
buildLegend();
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
function showTip(html, x, y) {
|
| 356 |
+
tipInner.innerHTML = html;
|
| 357 |
+
const tipW = tip.offsetWidth || 180;
|
| 358 |
+
const cW = container.clientWidth || 800;
|
| 359 |
+
const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12;
|
| 360 |
+
tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`;
|
| 361 |
+
tip.style.opacity = '1';
|
| 362 |
+
}
|
| 363 |
+
function hideTip() {
|
| 364 |
+
tip.style.opacity = '0';
|
| 365 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
function updateHighlight() {
|
| 369 |
+
gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
|
| 370 |
+
gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
|
| 371 |
+
gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
|
| 372 |
+
gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
|
| 373 |
+
gRoot.selectAll('.baseline-vline').classed('ghost', d => highlight && d.name !== highlight);
|
| 374 |
+
gRoot.selectAll('.baseline-vlabel').classed('ghost', d => highlight && d.name !== highlight);
|
| 375 |
+
gRoot.selectAll('.baseline-hline').classed('ghost', d => highlight && d.name !== highlight);
|
| 376 |
+
gRoot.selectAll('.baseline-hlabel').classed('ghost', d => highlight && d.name !== highlight);
|
| 377 |
+
container.querySelectorAll('.legend .item').forEach(el => {
|
| 378 |
+
el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
|
| 379 |
+
});
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
// ─── AUTO-DETECT METRICS from CSV columns ───
|
| 383 |
+
function detectMetrics(columns) {
|
| 384 |
+
const skip = new Set([RUN_COL, STEP_COL, 'seed']);
|
| 385 |
+
const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
|
| 386 |
+
const agg = aggOrder.filter(k => columns.includes(k));
|
| 387 |
+
const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
|
| 388 |
+
return [...agg, ...ind];
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
// ─── BAR CHART ───
|
| 392 |
+
function renderBar() {
|
| 393 |
+
const width = container.clientWidth || 800;
|
| 394 |
+
const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
|
| 395 |
+
const margin = { top: hasBaselines ? 20 : 12, right: 56, bottom: 32, left: 190 };
|
| 396 |
+
|
| 397 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 398 |
+
const finalData = [];
|
| 399 |
+
for (const [raw, rows] of grouped) {
|
| 400 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 401 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 402 |
+
if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
|
| 403 |
+
}
|
| 404 |
+
finalData.sort((a, b) => b.value - a.value);
|
| 405 |
+
|
| 406 |
+
const barData = finalData.filter(d => !isBaseline(d.rawName));
|
| 407 |
+
const baselineData = finalData.filter(d => isBaseline(d.rawName));
|
| 408 |
+
|
| 409 |
+
const height = window.innerHeight || 480;
|
| 410 |
+
svg.attr('width', width).attr('height', height);
|
| 411 |
+
const barHeight = Math.min(28, (height - margin.top - margin.bottom) / barData.length * 0.75);
|
| 412 |
+
const barGap = barHeight * 0.3;
|
| 413 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 414 |
+
|
| 415 |
+
const innerWidth = width - margin.left - margin.right;
|
| 416 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 417 |
+
|
| 418 |
+
const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]);
|
| 419 |
+
const y = d3.scaleBand().domain(barData.map(d => d.name)).range([0, innerHeight]).padding(0.2);
|
| 420 |
+
|
| 421 |
+
// Grid
|
| 422 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 423 |
+
g.selectAll('line').data(x.ticks(5)).join('line')
|
| 424 |
+
.attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight);
|
| 425 |
+
});
|
| 426 |
+
|
| 427 |
+
// X axis
|
| 428 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 429 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 430 |
+
.call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0))
|
| 431 |
+
.call(g => {
|
| 432 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 433 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 434 |
+
});
|
| 435 |
+
|
| 436 |
+
// Y axis
|
| 437 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
|
| 438 |
+
.call(d3.axisLeft(y).tickSizeOuter(0))
|
| 439 |
+
.call(g => {
|
| 440 |
+
g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '18px').style('font-weight', '600');
|
| 441 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 442 |
+
});
|
| 443 |
+
|
| 444 |
+
// Stripe patterns for shaded bars
|
| 445 |
+
barData.forEach(d => {
|
| 446 |
+
if (!isShaded(d.rawName)) return;
|
| 447 |
+
const c = colorMap[d.rawName] || '#999';
|
| 448 |
+
const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName))
|
| 449 |
+
.attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)');
|
| 450 |
+
pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35);
|
| 451 |
+
pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5);
|
| 452 |
+
});
|
| 453 |
+
|
| 454 |
+
function barFill(d) {
|
| 455 |
+
if (isShaded(d.rawName)) return `url(#${stripePatternId(d.rawName)})`;
|
| 456 |
+
return colorMap[d.rawName] || 'var(--primary-color)';
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
// Bars
|
| 460 |
+
const barTip = (ev, d) => {
|
| 461 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 462 |
+
showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(3)}</strong>`, mx, my);
|
| 463 |
+
};
|
| 464 |
+
gRoot.selectAll('rect.bar').data(barData, d => d.name).join(
|
| 465 |
+
enter => enter.append('rect').attr('class', 'bar')
|
| 466 |
+
.attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
|
| 467 |
+
.attr('fill', d => barFill(d))
|
| 468 |
+
.attr('width', 0)
|
| 469 |
+
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 470 |
+
.on('mousemove', barTip)
|
| 471 |
+
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 472 |
+
.transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
|
| 473 |
+
update => update
|
| 474 |
+
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 475 |
+
.on('mousemove', barTip)
|
| 476 |
+
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 477 |
+
.transition().duration(300)
|
| 478 |
+
.attr('y', d => y(d.name)).attr('height', y.bandwidth())
|
| 479 |
+
.attr('width', d => Math.max(0, x(d.value)))
|
| 480 |
+
.attr('fill', d => barFill(d)),
|
| 481 |
+
exit => exit.transition().duration(200).attr('width', 0).remove()
|
| 482 |
+
);
|
| 483 |
+
|
| 484 |
+
// Value labels
|
| 485 |
+
gRoot.selectAll('text.value-label').data(barData, d => d.name).join(
|
| 486 |
+
enter => enter.append('text').attr('class', 'value-label')
|
| 487 |
+
.attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
|
| 488 |
+
.attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 489 |
+
.text(d => d.value.toFixed(3)),
|
| 490 |
+
update => update.transition().duration(300)
|
| 491 |
+
.attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
|
| 492 |
+
.text(d => d.value.toFixed(3)),
|
| 493 |
+
exit => exit.remove()
|
| 494 |
+
);
|
| 495 |
+
|
| 496 |
+
// Baseline vertical reference lines
|
| 497 |
+
gRoot.selectAll('.baseline-vline').data(baselineData, d => d.name).join(
|
| 498 |
+
enter => enter.append('line').attr('class', 'baseline-vline baseline')
|
| 499 |
+
.attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
|
| 500 |
+
.attr('y1', 0).attr('y2', innerHeight)
|
| 501 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999')
|
| 502 |
+
.attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
|
| 503 |
+
update => update.transition().duration(300)
|
| 504 |
+
.attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
|
| 505 |
+
.attr('y1', 0).attr('y2', innerHeight)
|
| 506 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999'),
|
| 507 |
+
exit => exit.remove()
|
| 508 |
+
);
|
| 509 |
+
gRoot.selectAll('.baseline-vlabel').data(baselineData, d => d.name).join(
|
| 510 |
+
enter => enter.append('text').attr('class', 'baseline-vlabel baseline')
|
| 511 |
+
.attr('x', d => x(d.value)).attr('y', -4)
|
| 512 |
+
.attr('text-anchor', 'middle').attr('fill', d => colorMap[d.rawName] || '#999')
|
| 513 |
+
.attr('font-size', 18).attr('font-weight', 600)
|
| 514 |
+
.text(d => `${d.name} (${d.value.toFixed(3)})`),
|
| 515 |
+
update => update.transition().duration(300)
|
| 516 |
+
.attr('x', d => x(d.value))
|
| 517 |
+
.text(d => `${d.name} (${d.value.toFixed(3)})`),
|
| 518 |
+
exit => exit.remove()
|
| 519 |
+
);
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// ─── LINE CHART ───
|
| 523 |
+
function renderLine() {
|
| 524 |
+
const width = container.clientWidth || 800;
|
| 525 |
+
const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
|
| 526 |
+
const margin = { top: 16, right: 50, bottom: 48, left: 60 };
|
| 527 |
+
const height = window.innerHeight || 480;
|
| 528 |
+
svg.attr('width', width).attr('height', height);
|
| 529 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 530 |
+
|
| 531 |
+
const innerWidth = width - margin.left - margin.right;
|
| 532 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 533 |
+
|
| 534 |
+
// Build series
|
| 535 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 536 |
+
const series = [];
|
| 537 |
+
const baselineSeries = [];
|
| 538 |
+
for (const [raw, rows] of grouped) {
|
| 539 |
+
const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
|
| 540 |
+
const entry = { name: displayName(raw), rawName: raw, values: pts };
|
| 541 |
+
if (isBaseline(raw)) {
|
| 542 |
+
entry.finalValue = pts[pts.length - 1].value;
|
| 543 |
+
baselineSeries.push(entry);
|
| 544 |
+
} else {
|
| 545 |
+
series.push(entry);
|
| 546 |
+
}
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
const allSteps = Array.from(new Set(allData.filter(r => !isBaseline(r[RUN_COL])).map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 550 |
+
const allValues = [...series, ...baselineSeries].flatMap(s => s.finalValue != null ? [s.finalValue] : s.values.map(v => v.value));
|
| 551 |
+
|
| 552 |
+
const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
|
| 553 |
+
const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
|
| 554 |
+
const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
|
| 555 |
+
|
| 556 |
+
// Grid
|
| 557 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 558 |
+
g.selectAll('line').data(y.ticks(6)).join('line')
|
| 559 |
+
.attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
|
| 560 |
+
});
|
| 561 |
+
|
| 562 |
+
// X axis
|
| 563 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 564 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 565 |
+
.call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
|
| 566 |
+
.call(g => {
|
| 567 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 568 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 569 |
+
});
|
| 570 |
+
|
| 571 |
+
// Y axis
|
| 572 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
|
| 573 |
+
.call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0))
|
| 574 |
+
.call(g => {
|
| 575 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 576 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 577 |
+
});
|
| 578 |
+
|
| 579 |
+
// Axis labels
|
| 580 |
+
gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
|
| 581 |
+
.attr('x', innerWidth / 2).attr('y', innerHeight + 38)
|
| 582 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 583 |
+
.text('Tokens (Steps)');
|
| 584 |
+
|
| 585 |
+
gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
|
| 586 |
+
.attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
|
| 587 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 588 |
+
.text(metricName(currentMetric));
|
| 589 |
+
|
| 590 |
+
// Baseline horizontal reference lines
|
| 591 |
+
gRoot.selectAll('.baseline-hline').data(baselineSeries, d => d.name).join(
|
| 592 |
+
enter => enter.append('line').attr('class', 'baseline-hline baseline')
|
| 593 |
+
.attr('x1', 0).attr('x2', innerWidth)
|
| 594 |
+
.attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
|
| 595 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999')
|
| 596 |
+
.attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
|
| 597 |
+
update => update.transition().duration(300)
|
| 598 |
+
.attr('x1', 0).attr('x2', innerWidth)
|
| 599 |
+
.attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
|
| 600 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999'),
|
| 601 |
+
exit => exit.remove()
|
| 602 |
+
);
|
| 603 |
+
gRoot.selectAll('.baseline-hlabel').data(baselineSeries, d => d.name).join(
|
| 604 |
+
enter => enter.append('text').attr('class', 'baseline-hlabel baseline')
|
| 605 |
+
.attr('x', 4).attr('y', d => y(d.finalValue) - 6)
|
| 606 |
+
.attr('text-anchor', 'start')
|
| 607 |
+
.attr('fill', d => colorMap[d.rawName] || '#999')
|
| 608 |
+
.attr('font-size', 18).attr('font-weight', 600)
|
| 609 |
+
.text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
|
| 610 |
+
update => update.transition().duration(300)
|
| 611 |
+
.attr('x', 4).attr('y', d => y(d.finalValue) - 6)
|
| 612 |
+
.text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
|
| 613 |
+
exit => exit.remove()
|
| 614 |
+
);
|
| 615 |
+
|
| 616 |
+
// Lines (non-baseline)
|
| 617 |
+
const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
|
| 618 |
+
gRoot.selectAll('.line-path').data(series, d => d.name).join(
|
| 619 |
+
enter => enter.append('path').attr('class', 'line-path')
|
| 620 |
+
.attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 621 |
+
.attr('d', d => line(d.values)),
|
| 622 |
+
update => update.transition().duration(300)
|
| 623 |
+
.attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 624 |
+
.attr('d', d => line(d.values)),
|
| 625 |
+
exit => exit.remove()
|
| 626 |
+
);
|
| 627 |
+
|
| 628 |
+
// Dots (non-baseline)
|
| 629 |
+
const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
|
| 630 |
+
gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
|
| 631 |
+
enter => enter.append('circle').attr('class', 'line-dot')
|
| 632 |
+
.attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3)
|
| 633 |
+
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 634 |
+
.attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1),
|
| 635 |
+
update => update.transition().duration(300)
|
| 636 |
+
.attr('cx', d => x(d.step)).attr('cy', d => y(d.value))
|
| 637 |
+
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'),
|
| 638 |
+
exit => exit.remove()
|
| 639 |
+
);
|
| 640 |
+
|
| 641 |
+
// Hover overlay
|
| 642 |
+
gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
|
| 643 |
+
.attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
|
| 644 |
+
|
| 645 |
+
gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay')
|
| 646 |
+
.attr('width', innerWidth).attr('height', innerHeight)
|
| 647 |
+
.attr('fill', 'none').attr('pointer-events', 'all')
|
| 648 |
+
.on('mousemove', (ev) => {
|
| 649 |
+
const [mx] = d3.pointer(ev, gRoot.node());
|
| 650 |
+
const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
|
| 651 |
+
gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
|
| 652 |
+
|
| 653 |
+
const entries = series.map(s => {
|
| 654 |
+
const pt = s.values.find(v => v.step === nearest);
|
| 655 |
+
return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
|
| 656 |
+
}).filter(Boolean);
|
| 657 |
+
baselineSeries.forEach(s => {
|
| 658 |
+
entries.push({ name: s.name, rawName: s.rawName, value: s.finalValue });
|
| 659 |
+
});
|
| 660 |
+
entries.sort((a, b) => b.value - a.value);
|
| 661 |
+
|
| 662 |
+
let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`;
|
| 663 |
+
entries.forEach(e => {
|
| 664 |
+
html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(3)}</strong></div>`;
|
| 665 |
+
});
|
| 666 |
+
const [cx, cy] = d3.pointer(ev, container);
|
| 667 |
+
showTip(html, cx, cy);
|
| 668 |
+
})
|
| 669 |
+
.on('mouseleave', () => {
|
| 670 |
+
gRoot.select('.hover-line').style('display', 'none');
|
| 671 |
+
hideTip();
|
| 672 |
+
});
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
// ─── RENDER ───
|
| 676 |
+
function render() {
|
| 677 |
+
if (!allData.length) return;
|
| 678 |
+
initColors();
|
| 679 |
+
gRoot.selectAll('*').remove();
|
| 680 |
+
defs.selectAll('*').remove();
|
| 681 |
+
if (currentView === 'bar') renderBar(); else renderLine();
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
// ─── UI ───
|
| 685 |
+
function buildUI() {
|
| 686 |
+
const controls = document.createElement('div'); controls.className = 'controls';
|
| 687 |
+
|
| 688 |
+
if (SETUPS && setupNames.length > 0) {
|
| 689 |
+
const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
|
| 690 |
+
const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
|
| 691 |
+
const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
|
| 692 |
+
setupNames.forEach(name => {
|
| 693 |
+
const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
|
| 694 |
+
if (name === currentSetup) opt.selected = true;
|
| 695 |
+
setupSelect.appendChild(opt);
|
| 696 |
+
});
|
| 697 |
+
if (setupNames.length >= 2) {
|
| 698 |
+
const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
|
| 699 |
+
if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
|
| 700 |
+
setupSelect.appendChild(avgOpt);
|
| 701 |
+
}
|
| 702 |
+
setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
|
| 703 |
+
setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
|
| 704 |
+
controls.appendChild(setupGroup);
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 708 |
+
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
|
| 709 |
+
const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
|
| 710 |
+
[['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => {
|
| 711 |
+
const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
|
| 712 |
+
if (val === currentView) opt.selected = true;
|
| 713 |
+
viewSelect.appendChild(opt);
|
| 714 |
+
});
|
| 715 |
+
viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); });
|
| 716 |
+
viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
|
| 717 |
+
controls.appendChild(viewGroup);
|
| 718 |
+
|
| 719 |
+
const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
|
| 720 |
+
const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
|
| 721 |
+
const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
|
| 722 |
+
metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
|
| 723 |
+
controls.appendChild(metricGroup);
|
| 724 |
+
|
| 725 |
+
container.appendChild(controls);
|
| 726 |
+
|
| 727 |
+
const legend = document.createElement('div'); legend.className = 'legend';
|
| 728 |
+
legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>';
|
| 729 |
+
container.appendChild(legend);
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
function populateMetricSelect() {
|
| 733 |
+
const sel = container.querySelector('#metric-' + uid);
|
| 734 |
+
if (!sel) return;
|
| 735 |
+
sel.innerHTML = '';
|
| 736 |
+
const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
|
| 737 |
+
const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
|
| 738 |
+
metricKeys.forEach(key => {
|
| 739 |
+
const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
|
| 740 |
+
if (key === currentMetric) opt.selected = true;
|
| 741 |
+
if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
|
| 742 |
+
});
|
| 743 |
+
if (aggGroup.children.length) sel.appendChild(aggGroup);
|
| 744 |
+
if (indGroup.children.length) sel.appendChild(indGroup);
|
| 745 |
+
sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
function buildLegend() {
|
| 749 |
+
const items = container.querySelector('.legend .items');
|
| 750 |
+
if (!items) return;
|
| 751 |
+
items.innerHTML = '';
|
| 752 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 753 |
+
const sorted = Array.from(grouped.entries())
|
| 754 |
+
.map(([raw, rows]) => {
|
| 755 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 756 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 757 |
+
return { raw, score: row ? +row[defaultMetric] : 0 };
|
| 758 |
+
})
|
| 759 |
+
.sort((a, b) => b.score - a.score)
|
| 760 |
+
.map(d => d.raw);
|
| 761 |
+
sorted.filter(raw => !isBaseline(raw)).forEach(raw => {
|
| 762 |
+
const name = displayName(raw);
|
| 763 |
+
const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
|
| 764 |
+
const sw = document.createElement('span'); sw.className = 'swatch';
|
| 765 |
+
const c = colorMap[raw] || '#999';
|
| 766 |
+
if (isShaded(raw)) {
|
| 767 |
+
sw.style.background = c;
|
| 768 |
+
sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)';
|
| 769 |
+
} else {
|
| 770 |
+
sw.style.background = c;
|
| 771 |
+
}
|
| 772 |
+
const txt = document.createElement('span'); txt.textContent = name;
|
| 773 |
+
el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
|
| 774 |
+
el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
|
| 775 |
+
el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
|
| 776 |
+
});
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
buildUI();
|
| 780 |
+
|
| 781 |
+
// ─── DATA LOADING ───
|
| 782 |
+
const fetchFirstAvailable = async (paths) => {
|
| 783 |
+
for (const p of paths) {
|
| 784 |
+
try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {}
|
| 785 |
+
}
|
| 786 |
+
throw new Error('CSV not found');
|
| 787 |
+
};
|
| 788 |
+
|
| 789 |
+
let dataMountEl = container;
|
| 790 |
+
while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; }
|
| 791 |
+
let providedData = null;
|
| 792 |
+
try {
|
| 793 |
+
const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null;
|
| 794 |
+
if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
|
| 795 |
+
} catch (_) {}
|
| 796 |
+
|
| 797 |
+
const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
|
| 798 |
+
const csvPaths = providedData
|
| 799 |
+
? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)])
|
| 800 |
+
: ['../data/benchmark-results.csv'];
|
| 801 |
+
|
| 802 |
+
(async () => {
|
| 803 |
+
try {
|
| 804 |
+
const text = await fetchFirstAvailable(csvPaths);
|
| 805 |
+
const parsed = d3.csvParse(text);
|
| 806 |
+
parsedData = parsed;
|
| 807 |
+
if (SETUPS && setupNames.length >= 2) {
|
| 808 |
+
const avg = computeAverageData(parsed);
|
| 809 |
+
avgDatasets = avg.datasets;
|
| 810 |
+
parsedData = parsed.concat(avg.data);
|
| 811 |
+
parsedData.columns = parsed.columns;
|
| 812 |
+
if (currentSetup === AVG_SETUP_KEY) DATASETS = { ...avgDatasets };
|
| 813 |
+
}
|
| 814 |
+
filterData();
|
| 815 |
+
metricKeys = detectMetrics(allData.columns);
|
| 816 |
+
if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
|
| 817 |
+
populateMetricSelect();
|
| 818 |
+
render();
|
| 819 |
+
buildLegend();
|
| 820 |
+
if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
|
| 821 |
+
else { window.addEventListener('resize', () => render()); }
|
| 822 |
+
} catch (e) {
|
| 823 |
+
const pre = document.createElement('pre');
|
| 824 |
+
pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e);
|
| 825 |
+
pre.style.color = 'var(--danger, #b00020)';
|
| 826 |
+
pre.style.fontSize = '12px';
|
| 827 |
+
container.appendChild(pre);
|
| 828 |
+
}
|
| 829 |
+
})();
|
| 830 |
+
};
|
| 831 |
+
|
| 832 |
+
if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); }
|
| 833 |
+
else { ensureD3(bootstrap); }
|
| 834 |
+
})();
|
| 835 |
+
</script>
|
| 836 |
+
</body>
|
| 837 |
+
</html>
|
app/presentation/se2026/charts/benchmark-prompts.html
ADDED
|
@@ -0,0 +1,837 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Benchmark Comparison</title>
|
| 7 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--text-color: rgba(255,255,255,0.88);
|
| 11 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 12 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 13 |
+
--border-color: rgba(255,255,255,0.1);
|
| 14 |
+
--axis-color: rgba(255,255,255,0.15);
|
| 15 |
+
--tick-color: rgba(255,255,255,0.5);
|
| 16 |
+
--grid-color: rgba(255,255,255,0.06);
|
| 17 |
+
--primary-color: #7c6ff7;
|
| 18 |
+
}
|
| 19 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 20 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
|
| 21 |
+
</style>
|
| 22 |
+
</head>
|
| 23 |
+
<body>
|
| 24 |
+
<div class="d3-benchmark-comparison" data-config='{"datasets":{"mix-fw_edu_hq-table_1b_hq":{"display":"Table","color":"#EBA937"},"mix-fw_edu_hq-math_1b_hq":{"display":"Math","color":"#e06b9e"},"mix-fw_edu_hq-faq_1b_hq":{"display":"FAQ","color":"#5b9bd5"},"mix-fw_edu_hq-tutorial_1b_hq":{"display":"Tutorial","color":"#8bc474"},"mix-fw_edu_hq-article_1b_hq":{"display":"Article","color":"#9a8ec2"},"mix-fw_edu_hq-commentary_1b_hq":{"display":"Commentary","color":"#c9a046"},"mix-fw_edu_hq-discussion_1b_hq":{"display":"Discussion","color":"#e07b54"},"dclm":{"display":"DCLM","baseline":true}}}' data-datafiles="../data/benchmark-results.csv"></div>
|
| 25 |
+
<style>
|
| 26 |
+
.d3-benchmark-comparison { position: relative; }
|
| 27 |
+
.d3-benchmark-comparison .controls {
|
| 28 |
+
display: flex;
|
| 29 |
+
gap: 16px;
|
| 30 |
+
align-items: flex-end;
|
| 31 |
+
justify-content: center;
|
| 32 |
+
margin: 10px 0 0 0;
|
| 33 |
+
}
|
| 34 |
+
.d3-benchmark-comparison .controls .control-group {
|
| 35 |
+
display: flex;
|
| 36 |
+
flex-direction: column;
|
| 37 |
+
align-items: flex-start;
|
| 38 |
+
gap: 6px;
|
| 39 |
+
}
|
| 40 |
+
.d3-benchmark-comparison .controls label {
|
| 41 |
+
font-size: 18px;
|
| 42 |
+
font-weight: 700;
|
| 43 |
+
color: var(--text-color);
|
| 44 |
+
}
|
| 45 |
+
.d3-benchmark-comparison .controls select {
|
| 46 |
+
appearance: none;
|
| 47 |
+
-webkit-appearance: none;
|
| 48 |
+
-moz-appearance: none;
|
| 49 |
+
border: 1px solid var(--border-color);
|
| 50 |
+
border-radius: 8px;
|
| 51 |
+
padding: 6px 28px 6px 10px;
|
| 52 |
+
background-color: var(--surface-bg);
|
| 53 |
+
color: var(--text-color);
|
| 54 |
+
font-size: 18px;
|
| 55 |
+
line-height: 1.2;
|
| 56 |
+
background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
|
| 57 |
+
background-repeat: no-repeat;
|
| 58 |
+
background-position: right 8px center;
|
| 59 |
+
}
|
| 60 |
+
.d3-benchmark-comparison .controls select:focus-visible {
|
| 61 |
+
outline: 2px solid var(--primary-color);
|
| 62 |
+
outline-offset: 2px;
|
| 63 |
+
}
|
| 64 |
+
.d3-benchmark-comparison .legend {
|
| 65 |
+
display: flex;
|
| 66 |
+
flex-direction: column;
|
| 67 |
+
align-items: flex-start;
|
| 68 |
+
gap: 6px;
|
| 69 |
+
margin: 8px 0 0 0;
|
| 70 |
+
padding-bottom: 4px;
|
| 71 |
+
}
|
| 72 |
+
.d3-benchmark-comparison .legend .legend-title {
|
| 73 |
+
font-size: 18px;
|
| 74 |
+
font-weight: 700;
|
| 75 |
+
color: var(--text-color);
|
| 76 |
+
}
|
| 77 |
+
.d3-benchmark-comparison .legend .items {
|
| 78 |
+
display: flex;
|
| 79 |
+
flex-wrap: wrap;
|
| 80 |
+
gap: 8px 14px;
|
| 81 |
+
}
|
| 82 |
+
.d3-benchmark-comparison .legend .item {
|
| 83 |
+
display: inline-flex;
|
| 84 |
+
align-items: center;
|
| 85 |
+
gap: 6px;
|
| 86 |
+
white-space: nowrap;
|
| 87 |
+
font-size: 18px;
|
| 88 |
+
color: var(--text-color);
|
| 89 |
+
cursor: pointer;
|
| 90 |
+
}
|
| 91 |
+
.d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
|
| 92 |
+
.d3-benchmark-comparison .legend .swatch {
|
| 93 |
+
width: 14px;
|
| 94 |
+
height: 14px;
|
| 95 |
+
border-radius: 3px;
|
| 96 |
+
border: 1px solid var(--border-color);
|
| 97 |
+
}
|
| 98 |
+
.d3-benchmark-comparison .bar.ghost { opacity: .25; }
|
| 99 |
+
.d3-benchmark-comparison .value-label.ghost { opacity: .25; }
|
| 100 |
+
.d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
|
| 101 |
+
.d3-benchmark-comparison .line-path.ghost { opacity: .15; }
|
| 102 |
+
.d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
|
| 103 |
+
.d3-benchmark-comparison .baseline.ghost { opacity: .1; }
|
| 104 |
+
.d3-benchmark-comparison .axes path { display: none; }
|
| 105 |
+
.d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
|
| 106 |
+
.d3-benchmark-comparison .axes text { fill: var(--tick-color); }
|
| 107 |
+
.d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
|
| 108 |
+
.d3-benchmark-comparison .hover-line {
|
| 109 |
+
stroke: var(--text-color);
|
| 110 |
+
stroke-opacity: 0.25;
|
| 111 |
+
stroke-width: 1;
|
| 112 |
+
pointer-events: none;
|
| 113 |
+
}
|
| 114 |
+
.d3-benchmark-comparison .d3-tooltip {
|
| 115 |
+
position: absolute;
|
| 116 |
+
top: 0px;
|
| 117 |
+
left: 0px;
|
| 118 |
+
transform: translate(-9999px, -9999px);
|
| 119 |
+
pointer-events: none;
|
| 120 |
+
padding: 8px 10px;
|
| 121 |
+
border-radius: 8px;
|
| 122 |
+
font-size: 18px;
|
| 123 |
+
line-height: 1.35;
|
| 124 |
+
border: 1px solid var(--border-color);
|
| 125 |
+
background: var(--surface-bg);
|
| 126 |
+
color: var(--text-color);
|
| 127 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 128 |
+
opacity: 0;
|
| 129 |
+
transition: opacity .12s ease;
|
| 130 |
+
text-align: left;
|
| 131 |
+
z-index: 10;
|
| 132 |
+
}
|
| 133 |
+
.d3-benchmark-comparison .d3-tooltip .tip-dot {
|
| 134 |
+
display: inline-block;
|
| 135 |
+
width: 10px;
|
| 136 |
+
height: 10px;
|
| 137 |
+
border-radius: 3px;
|
| 138 |
+
border: 1px solid var(--border-color);
|
| 139 |
+
margin-right: 6px;
|
| 140 |
+
vertical-align: middle;
|
| 141 |
+
}
|
| 142 |
+
</style>
|
| 143 |
+
<script>
|
| 144 |
+
(() => {
|
| 145 |
+
const ensureD3 = (cb) => {
|
| 146 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 147 |
+
let s = document.getElementById('d3-cdn-script');
|
| 148 |
+
if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
|
| 149 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 150 |
+
s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady();
|
| 151 |
+
};
|
| 152 |
+
|
| 153 |
+
const bootstrap = () => {
|
| 154 |
+
const scriptEl = document.currentScript;
|
| 155 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 156 |
+
if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
|
| 157 |
+
const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 158 |
+
container = cs[cs.length - 1] || null;
|
| 159 |
+
}
|
| 160 |
+
if (!container) return;
|
| 161 |
+
if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
|
| 162 |
+
|
| 163 |
+
container.style.position = container.style.position || 'relative';
|
| 164 |
+
|
| 165 |
+
// ─── READ CONFIG ───
|
| 166 |
+
let mountEl = container;
|
| 167 |
+
while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
|
| 168 |
+
let cfg = {};
|
| 169 |
+
try {
|
| 170 |
+
const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
|
| 171 |
+
if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
|
| 172 |
+
} catch (_) {}
|
| 173 |
+
|
| 174 |
+
// ─── NORMALIZE DATASETS CONFIG ───
|
| 175 |
+
// Accepts: { "key": "Name" } or { "key": { display, color, shaded, baseline } }
|
| 176 |
+
// Returns: { key: { display, color, shaded, baseline } }
|
| 177 |
+
function normalizeDatasets(raw) {
|
| 178 |
+
const out = {};
|
| 179 |
+
for (const [k, v] of Object.entries(raw || {})) {
|
| 180 |
+
out[k] = typeof v === 'string' ? { display: v } : { ...v };
|
| 181 |
+
}
|
| 182 |
+
return out;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// ─── SETUP SUPPORT ───
|
| 186 |
+
const SETUPS = cfg.setups || null;
|
| 187 |
+
const setupNames = SETUPS ? Object.keys(SETUPS) : [];
|
| 188 |
+
const AVG_SETUP_KEY = 'Average (all setups)';
|
| 189 |
+
const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
|
| 190 |
+
let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
|
| 191 |
+
let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
|
| 192 |
+
let avgDatasets = {};
|
| 193 |
+
let parsedData = [];
|
| 194 |
+
|
| 195 |
+
const RUN_COL = cfg.runColumn || 'runname';
|
| 196 |
+
const STEP_COL = cfg.stepColumn || 'steps';
|
| 197 |
+
const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
|
| 198 |
+
const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
|
| 199 |
+
const defaultView = cfg.defaultView || 'bar';
|
| 200 |
+
const uid = Math.random().toString(36).slice(2, 8);
|
| 201 |
+
|
| 202 |
+
// ─── DATASET ACCESSORS ───
|
| 203 |
+
function displayName(raw) { return DATASETS[raw] ? DATASETS[raw].display : raw; }
|
| 204 |
+
function isBaseline(raw) { return !!(DATASETS[raw] && DATASETS[raw].baseline); }
|
| 205 |
+
function isShaded(raw) { return !!(DATASETS[raw] && DATASETS[raw].shaded); }
|
| 206 |
+
function pinnedColor(raw) { return DATASETS[raw] && DATASETS[raw].color; }
|
| 207 |
+
function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); }
|
| 208 |
+
|
| 209 |
+
const METRIC_NAMES = {
|
| 210 |
+
'agg_score_macro': 'Aggregate Score (Macro)',
|
| 211 |
+
'agg_score_micro': 'Aggregate Score (Micro)',
|
| 212 |
+
'agg_score_RC': 'Reading Comprehension',
|
| 213 |
+
'agg_score_GK': 'General Knowledge',
|
| 214 |
+
'agg_score_NLU': 'Natural Language Understanding',
|
| 215 |
+
'agg_score_MATH': 'Math',
|
| 216 |
+
'agg_score_TABLE': 'Table Understanding',
|
| 217 |
+
'agg_score_RES': 'Reasoning',
|
| 218 |
+
'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy',
|
| 219 |
+
'lighteval|drop|3/prob_norm_token': 'DROP',
|
| 220 |
+
'lighteval|gsm8k|3/prob_norm_token': 'GSM8K',
|
| 221 |
+
'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag',
|
| 222 |
+
'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA',
|
| 223 |
+
'lighteval|piqa_cf|3/prob_norm_token': 'PIQA',
|
| 224 |
+
'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2',
|
| 225 |
+
'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA',
|
| 226 |
+
'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions',
|
| 227 |
+
'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande',
|
| 228 |
+
'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA',
|
| 229 |
+
'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
|
| 230 |
+
};
|
| 231 |
+
|
| 232 |
+
// Tooltip
|
| 233 |
+
let tip = container.querySelector('.d3-tooltip'), tipInner;
|
| 234 |
+
if (!tip) {
|
| 235 |
+
tip = document.createElement('div'); tip.className = 'd3-tooltip';
|
| 236 |
+
tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
|
| 237 |
+
container.appendChild(tip);
|
| 238 |
+
} else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
|
| 239 |
+
|
| 240 |
+
// SVG
|
| 241 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 242 |
+
const gRoot = svg.append('g');
|
| 243 |
+
const defs = svg.append('defs');
|
| 244 |
+
|
| 245 |
+
// State
|
| 246 |
+
let allData = [];
|
| 247 |
+
let metricKeys = [];
|
| 248 |
+
let currentMetric = defaultMetric;
|
| 249 |
+
let currentView = defaultView;
|
| 250 |
+
let colorMap = {};
|
| 251 |
+
let highlight = null;
|
| 252 |
+
|
| 253 |
+
// ─── HELPERS ───
|
| 254 |
+
function metricName(key) { return METRIC_NAMES[key] || key; }
|
| 255 |
+
|
| 256 |
+
function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
|
| 257 |
+
function formatTokens(tokens) {
|
| 258 |
+
if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
|
| 259 |
+
if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M';
|
| 260 |
+
return d3.format(',')(tokens);
|
| 261 |
+
}
|
| 262 |
+
function formatStep(step) {
|
| 263 |
+
if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
|
| 264 |
+
return String(step);
|
| 265 |
+
}
|
| 266 |
+
function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
|
| 267 |
+
function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
|
| 268 |
+
|
| 269 |
+
function getCategoricalColors(n) {
|
| 270 |
+
try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
|
| 271 |
+
return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
function initColors() {
|
| 275 |
+
if (Object.keys(colorMap).length) return;
|
| 276 |
+
const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
|
| 277 |
+
const unpinned = [];
|
| 278 |
+
allRaw.forEach(raw => {
|
| 279 |
+
const pc = pinnedColor(raw);
|
| 280 |
+
if (pc) { colorMap[raw] = pc; }
|
| 281 |
+
else { unpinned.push(raw); }
|
| 282 |
+
});
|
| 283 |
+
const palette = getCategoricalColors(unpinned.length);
|
| 284 |
+
unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
// ─── SETUP HELPERS ───
|
| 288 |
+
function filterData() {
|
| 289 |
+
const knownNames = Object.keys(DATASETS);
|
| 290 |
+
allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
|
| 291 |
+
allData.columns = parsedData.columns;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
function computeAverageData(rawData) {
|
| 295 |
+
if (!SETUPS || setupNames.length < 2) return { data: [], datasets: {} };
|
| 296 |
+
const displayToRaws = {};
|
| 297 |
+
for (const sName of setupNames) {
|
| 298 |
+
const ds = normalizeDatasets(SETUPS[sName].datasets);
|
| 299 |
+
for (const [raw, opts] of Object.entries(ds)) {
|
| 300 |
+
if (!displayToRaws[opts.display]) displayToRaws[opts.display] = [];
|
| 301 |
+
displayToRaws[opts.display].push(raw);
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
const fullDisplay = Object.entries(displayToRaws)
|
| 305 |
+
.filter(([, raws]) => raws.length >= setupNames.length);
|
| 306 |
+
const byRunStep = {};
|
| 307 |
+
for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
|
| 308 |
+
const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 309 |
+
const cols = rawData.columns || Object.keys(rawData[0] || {});
|
| 310 |
+
const result = [];
|
| 311 |
+
const dsMap = {};
|
| 312 |
+
for (const [display, raws] of fullDisplay) {
|
| 313 |
+
const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
|
| 314 |
+
// Merge options from first setup that has this display name
|
| 315 |
+
const firstOpts = Object.values(normalizeDatasets(SETUPS[setupNames[0]].datasets)).find(o => o.display === display) || {};
|
| 316 |
+
dsMap[avgRaw] = { display, ...firstOpts };
|
| 317 |
+
for (const step of steps) {
|
| 318 |
+
const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
|
| 319 |
+
if (!rows.length) continue;
|
| 320 |
+
const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
|
| 321 |
+
for (const col of cols) {
|
| 322 |
+
if (col === RUN_COL || col === STEP_COL) continue;
|
| 323 |
+
const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
|
| 324 |
+
avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
|
| 325 |
+
}
|
| 326 |
+
result.push(avgRow);
|
| 327 |
+
}
|
| 328 |
+
}
|
| 329 |
+
return { data: result, datasets: dsMap };
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
function switchSetup(name) {
|
| 333 |
+
currentSetup = name;
|
| 334 |
+
if (name === AVG_SETUP_KEY) {
|
| 335 |
+
DATASETS = { ...avgDatasets };
|
| 336 |
+
} else {
|
| 337 |
+
DATASETS = normalizeDatasets(SETUPS[name].datasets);
|
| 338 |
+
}
|
| 339 |
+
// Re-add baselines from any setup
|
| 340 |
+
for (const sName of setupNames) {
|
| 341 |
+
const ds = normalizeDatasets(SETUPS[sName].datasets);
|
| 342 |
+
for (const [raw, opts] of Object.entries(ds)) {
|
| 343 |
+
if (opts.baseline && !DATASETS[raw] && parsedData.some(r => r[RUN_COL] === raw)) {
|
| 344 |
+
DATASETS[raw] = { ...opts };
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
}
|
| 348 |
+
colorMap = {};
|
| 349 |
+
filterData();
|
| 350 |
+
initColors();
|
| 351 |
+
render();
|
| 352 |
+
buildLegend();
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
function showTip(html, x, y) {
|
| 356 |
+
tipInner.innerHTML = html;
|
| 357 |
+
const tipW = tip.offsetWidth || 180;
|
| 358 |
+
const cW = container.clientWidth || 800;
|
| 359 |
+
const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12;
|
| 360 |
+
tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`;
|
| 361 |
+
tip.style.opacity = '1';
|
| 362 |
+
}
|
| 363 |
+
function hideTip() {
|
| 364 |
+
tip.style.opacity = '0';
|
| 365 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
function updateHighlight() {
|
| 369 |
+
gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
|
| 370 |
+
gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
|
| 371 |
+
gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
|
| 372 |
+
gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
|
| 373 |
+
gRoot.selectAll('.baseline-vline').classed('ghost', d => highlight && d.name !== highlight);
|
| 374 |
+
gRoot.selectAll('.baseline-vlabel').classed('ghost', d => highlight && d.name !== highlight);
|
| 375 |
+
gRoot.selectAll('.baseline-hline').classed('ghost', d => highlight && d.name !== highlight);
|
| 376 |
+
gRoot.selectAll('.baseline-hlabel').classed('ghost', d => highlight && d.name !== highlight);
|
| 377 |
+
container.querySelectorAll('.legend .item').forEach(el => {
|
| 378 |
+
el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
|
| 379 |
+
});
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
// ─── AUTO-DETECT METRICS from CSV columns ───
|
| 383 |
+
function detectMetrics(columns) {
|
| 384 |
+
const skip = new Set([RUN_COL, STEP_COL, 'seed']);
|
| 385 |
+
const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
|
| 386 |
+
const agg = aggOrder.filter(k => columns.includes(k));
|
| 387 |
+
const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
|
| 388 |
+
return [...agg, ...ind];
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
// ─── BAR CHART ───
|
| 392 |
+
function renderBar() {
|
| 393 |
+
const width = container.clientWidth || 800;
|
| 394 |
+
const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
|
| 395 |
+
const margin = { top: hasBaselines ? 20 : 12, right: 56, bottom: 32, left: 190 };
|
| 396 |
+
|
| 397 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 398 |
+
const finalData = [];
|
| 399 |
+
for (const [raw, rows] of grouped) {
|
| 400 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 401 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 402 |
+
if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
|
| 403 |
+
}
|
| 404 |
+
finalData.sort((a, b) => b.value - a.value);
|
| 405 |
+
|
| 406 |
+
const barData = finalData.filter(d => !isBaseline(d.rawName));
|
| 407 |
+
const baselineData = finalData.filter(d => isBaseline(d.rawName));
|
| 408 |
+
|
| 409 |
+
const height = window.innerHeight || 480;
|
| 410 |
+
svg.attr('width', width).attr('height', height);
|
| 411 |
+
const barHeight = Math.min(28, (height - margin.top - margin.bottom) / barData.length * 0.75);
|
| 412 |
+
const barGap = barHeight * 0.3;
|
| 413 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 414 |
+
|
| 415 |
+
const innerWidth = width - margin.left - margin.right;
|
| 416 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 417 |
+
|
| 418 |
+
const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]);
|
| 419 |
+
const y = d3.scaleBand().domain(barData.map(d => d.name)).range([0, innerHeight]).padding(0.2);
|
| 420 |
+
|
| 421 |
+
// Grid
|
| 422 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 423 |
+
g.selectAll('line').data(x.ticks(5)).join('line')
|
| 424 |
+
.attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight);
|
| 425 |
+
});
|
| 426 |
+
|
| 427 |
+
// X axis
|
| 428 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 429 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 430 |
+
.call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0))
|
| 431 |
+
.call(g => {
|
| 432 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 433 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 434 |
+
});
|
| 435 |
+
|
| 436 |
+
// Y axis
|
| 437 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
|
| 438 |
+
.call(d3.axisLeft(y).tickSizeOuter(0))
|
| 439 |
+
.call(g => {
|
| 440 |
+
g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '18px').style('font-weight', '600');
|
| 441 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 442 |
+
});
|
| 443 |
+
|
| 444 |
+
// Stripe patterns for shaded bars
|
| 445 |
+
barData.forEach(d => {
|
| 446 |
+
if (!isShaded(d.rawName)) return;
|
| 447 |
+
const c = colorMap[d.rawName] || '#999';
|
| 448 |
+
const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName))
|
| 449 |
+
.attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)');
|
| 450 |
+
pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35);
|
| 451 |
+
pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5);
|
| 452 |
+
});
|
| 453 |
+
|
| 454 |
+
function barFill(d) {
|
| 455 |
+
if (isShaded(d.rawName)) return `url(#${stripePatternId(d.rawName)})`;
|
| 456 |
+
return colorMap[d.rawName] || 'var(--primary-color)';
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
// Bars
|
| 460 |
+
const barTip = (ev, d) => {
|
| 461 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 462 |
+
showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(3)}</strong>`, mx, my);
|
| 463 |
+
};
|
| 464 |
+
gRoot.selectAll('rect.bar').data(barData, d => d.name).join(
|
| 465 |
+
enter => enter.append('rect').attr('class', 'bar')
|
| 466 |
+
.attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
|
| 467 |
+
.attr('fill', d => barFill(d))
|
| 468 |
+
.attr('width', 0)
|
| 469 |
+
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 470 |
+
.on('mousemove', barTip)
|
| 471 |
+
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 472 |
+
.transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
|
| 473 |
+
update => update
|
| 474 |
+
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 475 |
+
.on('mousemove', barTip)
|
| 476 |
+
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 477 |
+
.transition().duration(300)
|
| 478 |
+
.attr('y', d => y(d.name)).attr('height', y.bandwidth())
|
| 479 |
+
.attr('width', d => Math.max(0, x(d.value)))
|
| 480 |
+
.attr('fill', d => barFill(d)),
|
| 481 |
+
exit => exit.transition().duration(200).attr('width', 0).remove()
|
| 482 |
+
);
|
| 483 |
+
|
| 484 |
+
// Value labels
|
| 485 |
+
gRoot.selectAll('text.value-label').data(barData, d => d.name).join(
|
| 486 |
+
enter => enter.append('text').attr('class', 'value-label')
|
| 487 |
+
.attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
|
| 488 |
+
.attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 489 |
+
.text(d => d.value.toFixed(3)),
|
| 490 |
+
update => update.transition().duration(300)
|
| 491 |
+
.attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
|
| 492 |
+
.text(d => d.value.toFixed(3)),
|
| 493 |
+
exit => exit.remove()
|
| 494 |
+
);
|
| 495 |
+
|
| 496 |
+
// Baseline vertical reference lines
|
| 497 |
+
gRoot.selectAll('.baseline-vline').data(baselineData, d => d.name).join(
|
| 498 |
+
enter => enter.append('line').attr('class', 'baseline-vline baseline')
|
| 499 |
+
.attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
|
| 500 |
+
.attr('y1', 0).attr('y2', innerHeight)
|
| 501 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999')
|
| 502 |
+
.attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
|
| 503 |
+
update => update.transition().duration(300)
|
| 504 |
+
.attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
|
| 505 |
+
.attr('y1', 0).attr('y2', innerHeight)
|
| 506 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999'),
|
| 507 |
+
exit => exit.remove()
|
| 508 |
+
);
|
| 509 |
+
gRoot.selectAll('.baseline-vlabel').data(baselineData, d => d.name).join(
|
| 510 |
+
enter => enter.append('text').attr('class', 'baseline-vlabel baseline')
|
| 511 |
+
.attr('x', d => x(d.value)).attr('y', -4)
|
| 512 |
+
.attr('text-anchor', 'middle').attr('fill', d => colorMap[d.rawName] || '#999')
|
| 513 |
+
.attr('font-size', 18).attr('font-weight', 600)
|
| 514 |
+
.text(d => `${d.name} (${d.value.toFixed(3)})`),
|
| 515 |
+
update => update.transition().duration(300)
|
| 516 |
+
.attr('x', d => x(d.value))
|
| 517 |
+
.text(d => `${d.name} (${d.value.toFixed(3)})`),
|
| 518 |
+
exit => exit.remove()
|
| 519 |
+
);
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// ─── LINE CHART ───
|
| 523 |
+
function renderLine() {
|
| 524 |
+
const width = container.clientWidth || 800;
|
| 525 |
+
const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
|
| 526 |
+
const margin = { top: 16, right: 50, bottom: 48, left: 60 };
|
| 527 |
+
const height = window.innerHeight || 480;
|
| 528 |
+
svg.attr('width', width).attr('height', height);
|
| 529 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 530 |
+
|
| 531 |
+
const innerWidth = width - margin.left - margin.right;
|
| 532 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 533 |
+
|
| 534 |
+
// Build series
|
| 535 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 536 |
+
const series = [];
|
| 537 |
+
const baselineSeries = [];
|
| 538 |
+
for (const [raw, rows] of grouped) {
|
| 539 |
+
const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
|
| 540 |
+
const entry = { name: displayName(raw), rawName: raw, values: pts };
|
| 541 |
+
if (isBaseline(raw)) {
|
| 542 |
+
entry.finalValue = pts[pts.length - 1].value;
|
| 543 |
+
baselineSeries.push(entry);
|
| 544 |
+
} else {
|
| 545 |
+
series.push(entry);
|
| 546 |
+
}
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
const allSteps = Array.from(new Set(allData.filter(r => !isBaseline(r[RUN_COL])).map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 550 |
+
const allValues = [...series, ...baselineSeries].flatMap(s => s.finalValue != null ? [s.finalValue] : s.values.map(v => v.value));
|
| 551 |
+
|
| 552 |
+
const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
|
| 553 |
+
const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
|
| 554 |
+
const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
|
| 555 |
+
|
| 556 |
+
// Grid
|
| 557 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 558 |
+
g.selectAll('line').data(y.ticks(6)).join('line')
|
| 559 |
+
.attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
|
| 560 |
+
});
|
| 561 |
+
|
| 562 |
+
// X axis
|
| 563 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 564 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 565 |
+
.call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
|
| 566 |
+
.call(g => {
|
| 567 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 568 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 569 |
+
});
|
| 570 |
+
|
| 571 |
+
// Y axis
|
| 572 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
|
| 573 |
+
.call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0))
|
| 574 |
+
.call(g => {
|
| 575 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 576 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 577 |
+
});
|
| 578 |
+
|
| 579 |
+
// Axis labels
|
| 580 |
+
gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
|
| 581 |
+
.attr('x', innerWidth / 2).attr('y', innerHeight + 38)
|
| 582 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 583 |
+
.text('Tokens (Steps)');
|
| 584 |
+
|
| 585 |
+
gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
|
| 586 |
+
.attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
|
| 587 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 588 |
+
.text(metricName(currentMetric));
|
| 589 |
+
|
| 590 |
+
// Baseline horizontal reference lines
|
| 591 |
+
gRoot.selectAll('.baseline-hline').data(baselineSeries, d => d.name).join(
|
| 592 |
+
enter => enter.append('line').attr('class', 'baseline-hline baseline')
|
| 593 |
+
.attr('x1', 0).attr('x2', innerWidth)
|
| 594 |
+
.attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
|
| 595 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999')
|
| 596 |
+
.attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
|
| 597 |
+
update => update.transition().duration(300)
|
| 598 |
+
.attr('x1', 0).attr('x2', innerWidth)
|
| 599 |
+
.attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
|
| 600 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999'),
|
| 601 |
+
exit => exit.remove()
|
| 602 |
+
);
|
| 603 |
+
gRoot.selectAll('.baseline-hlabel').data(baselineSeries, d => d.name).join(
|
| 604 |
+
enter => enter.append('text').attr('class', 'baseline-hlabel baseline')
|
| 605 |
+
.attr('x', 4).attr('y', d => y(d.finalValue) - 6)
|
| 606 |
+
.attr('text-anchor', 'start')
|
| 607 |
+
.attr('fill', d => colorMap[d.rawName] || '#999')
|
| 608 |
+
.attr('font-size', 18).attr('font-weight', 600)
|
| 609 |
+
.text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
|
| 610 |
+
update => update.transition().duration(300)
|
| 611 |
+
.attr('x', 4).attr('y', d => y(d.finalValue) - 6)
|
| 612 |
+
.text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
|
| 613 |
+
exit => exit.remove()
|
| 614 |
+
);
|
| 615 |
+
|
| 616 |
+
// Lines (non-baseline)
|
| 617 |
+
const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
|
| 618 |
+
gRoot.selectAll('.line-path').data(series, d => d.name).join(
|
| 619 |
+
enter => enter.append('path').attr('class', 'line-path')
|
| 620 |
+
.attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 621 |
+
.attr('d', d => line(d.values)),
|
| 622 |
+
update => update.transition().duration(300)
|
| 623 |
+
.attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 624 |
+
.attr('d', d => line(d.values)),
|
| 625 |
+
exit => exit.remove()
|
| 626 |
+
);
|
| 627 |
+
|
| 628 |
+
// Dots (non-baseline)
|
| 629 |
+
const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
|
| 630 |
+
gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
|
| 631 |
+
enter => enter.append('circle').attr('class', 'line-dot')
|
| 632 |
+
.attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3)
|
| 633 |
+
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 634 |
+
.attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1),
|
| 635 |
+
update => update.transition().duration(300)
|
| 636 |
+
.attr('cx', d => x(d.step)).attr('cy', d => y(d.value))
|
| 637 |
+
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'),
|
| 638 |
+
exit => exit.remove()
|
| 639 |
+
);
|
| 640 |
+
|
| 641 |
+
// Hover overlay
|
| 642 |
+
gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
|
| 643 |
+
.attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
|
| 644 |
+
|
| 645 |
+
gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay')
|
| 646 |
+
.attr('width', innerWidth).attr('height', innerHeight)
|
| 647 |
+
.attr('fill', 'none').attr('pointer-events', 'all')
|
| 648 |
+
.on('mousemove', (ev) => {
|
| 649 |
+
const [mx] = d3.pointer(ev, gRoot.node());
|
| 650 |
+
const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
|
| 651 |
+
gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
|
| 652 |
+
|
| 653 |
+
const entries = series.map(s => {
|
| 654 |
+
const pt = s.values.find(v => v.step === nearest);
|
| 655 |
+
return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
|
| 656 |
+
}).filter(Boolean);
|
| 657 |
+
baselineSeries.forEach(s => {
|
| 658 |
+
entries.push({ name: s.name, rawName: s.rawName, value: s.finalValue });
|
| 659 |
+
});
|
| 660 |
+
entries.sort((a, b) => b.value - a.value);
|
| 661 |
+
|
| 662 |
+
let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`;
|
| 663 |
+
entries.forEach(e => {
|
| 664 |
+
html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(3)}</strong></div>`;
|
| 665 |
+
});
|
| 666 |
+
const [cx, cy] = d3.pointer(ev, container);
|
| 667 |
+
showTip(html, cx, cy);
|
| 668 |
+
})
|
| 669 |
+
.on('mouseleave', () => {
|
| 670 |
+
gRoot.select('.hover-line').style('display', 'none');
|
| 671 |
+
hideTip();
|
| 672 |
+
});
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
// ─── RENDER ───
|
| 676 |
+
function render() {
|
| 677 |
+
if (!allData.length) return;
|
| 678 |
+
initColors();
|
| 679 |
+
gRoot.selectAll('*').remove();
|
| 680 |
+
defs.selectAll('*').remove();
|
| 681 |
+
if (currentView === 'bar') renderBar(); else renderLine();
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
// ─── UI ───
|
| 685 |
+
function buildUI() {
|
| 686 |
+
const controls = document.createElement('div'); controls.className = 'controls';
|
| 687 |
+
|
| 688 |
+
if (SETUPS && setupNames.length > 0) {
|
| 689 |
+
const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
|
| 690 |
+
const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
|
| 691 |
+
const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
|
| 692 |
+
setupNames.forEach(name => {
|
| 693 |
+
const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
|
| 694 |
+
if (name === currentSetup) opt.selected = true;
|
| 695 |
+
setupSelect.appendChild(opt);
|
| 696 |
+
});
|
| 697 |
+
if (setupNames.length >= 2) {
|
| 698 |
+
const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
|
| 699 |
+
if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
|
| 700 |
+
setupSelect.appendChild(avgOpt);
|
| 701 |
+
}
|
| 702 |
+
setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
|
| 703 |
+
setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
|
| 704 |
+
controls.appendChild(setupGroup);
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 708 |
+
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
|
| 709 |
+
const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
|
| 710 |
+
[['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => {
|
| 711 |
+
const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
|
| 712 |
+
if (val === currentView) opt.selected = true;
|
| 713 |
+
viewSelect.appendChild(opt);
|
| 714 |
+
});
|
| 715 |
+
viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); });
|
| 716 |
+
viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
|
| 717 |
+
controls.appendChild(viewGroup);
|
| 718 |
+
|
| 719 |
+
const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
|
| 720 |
+
const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
|
| 721 |
+
const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
|
| 722 |
+
metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
|
| 723 |
+
controls.appendChild(metricGroup);
|
| 724 |
+
|
| 725 |
+
container.appendChild(controls);
|
| 726 |
+
|
| 727 |
+
const legend = document.createElement('div'); legend.className = 'legend';
|
| 728 |
+
legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>';
|
| 729 |
+
container.appendChild(legend);
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
function populateMetricSelect() {
|
| 733 |
+
const sel = container.querySelector('#metric-' + uid);
|
| 734 |
+
if (!sel) return;
|
| 735 |
+
sel.innerHTML = '';
|
| 736 |
+
const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
|
| 737 |
+
const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
|
| 738 |
+
metricKeys.forEach(key => {
|
| 739 |
+
const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
|
| 740 |
+
if (key === currentMetric) opt.selected = true;
|
| 741 |
+
if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
|
| 742 |
+
});
|
| 743 |
+
if (aggGroup.children.length) sel.appendChild(aggGroup);
|
| 744 |
+
if (indGroup.children.length) sel.appendChild(indGroup);
|
| 745 |
+
sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
function buildLegend() {
|
| 749 |
+
const items = container.querySelector('.legend .items');
|
| 750 |
+
if (!items) return;
|
| 751 |
+
items.innerHTML = '';
|
| 752 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 753 |
+
const sorted = Array.from(grouped.entries())
|
| 754 |
+
.map(([raw, rows]) => {
|
| 755 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 756 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 757 |
+
return { raw, score: row ? +row[defaultMetric] : 0 };
|
| 758 |
+
})
|
| 759 |
+
.sort((a, b) => b.score - a.score)
|
| 760 |
+
.map(d => d.raw);
|
| 761 |
+
sorted.filter(raw => !isBaseline(raw)).forEach(raw => {
|
| 762 |
+
const name = displayName(raw);
|
| 763 |
+
const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
|
| 764 |
+
const sw = document.createElement('span'); sw.className = 'swatch';
|
| 765 |
+
const c = colorMap[raw] || '#999';
|
| 766 |
+
if (isShaded(raw)) {
|
| 767 |
+
sw.style.background = c;
|
| 768 |
+
sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)';
|
| 769 |
+
} else {
|
| 770 |
+
sw.style.background = c;
|
| 771 |
+
}
|
| 772 |
+
const txt = document.createElement('span'); txt.textContent = name;
|
| 773 |
+
el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
|
| 774 |
+
el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
|
| 775 |
+
el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
|
| 776 |
+
});
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
buildUI();
|
| 780 |
+
|
| 781 |
+
// ─── DATA LOADING ───
|
| 782 |
+
const fetchFirstAvailable = async (paths) => {
|
| 783 |
+
for (const p of paths) {
|
| 784 |
+
try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {}
|
| 785 |
+
}
|
| 786 |
+
throw new Error('CSV not found');
|
| 787 |
+
};
|
| 788 |
+
|
| 789 |
+
let dataMountEl = container;
|
| 790 |
+
while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; }
|
| 791 |
+
let providedData = null;
|
| 792 |
+
try {
|
| 793 |
+
const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null;
|
| 794 |
+
if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
|
| 795 |
+
} catch (_) {}
|
| 796 |
+
|
| 797 |
+
const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
|
| 798 |
+
const csvPaths = providedData
|
| 799 |
+
? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)])
|
| 800 |
+
: ['../data/benchmark-results.csv'];
|
| 801 |
+
|
| 802 |
+
(async () => {
|
| 803 |
+
try {
|
| 804 |
+
const text = await fetchFirstAvailable(csvPaths);
|
| 805 |
+
const parsed = d3.csvParse(text);
|
| 806 |
+
parsedData = parsed;
|
| 807 |
+
if (SETUPS && setupNames.length >= 2) {
|
| 808 |
+
const avg = computeAverageData(parsed);
|
| 809 |
+
avgDatasets = avg.datasets;
|
| 810 |
+
parsedData = parsed.concat(avg.data);
|
| 811 |
+
parsedData.columns = parsed.columns;
|
| 812 |
+
if (currentSetup === AVG_SETUP_KEY) DATASETS = { ...avgDatasets };
|
| 813 |
+
}
|
| 814 |
+
filterData();
|
| 815 |
+
metricKeys = detectMetrics(allData.columns);
|
| 816 |
+
if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
|
| 817 |
+
populateMetricSelect();
|
| 818 |
+
render();
|
| 819 |
+
buildLegend();
|
| 820 |
+
if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
|
| 821 |
+
else { window.addEventListener('resize', () => render()); }
|
| 822 |
+
} catch (e) {
|
| 823 |
+
const pre = document.createElement('pre');
|
| 824 |
+
pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e);
|
| 825 |
+
pre.style.color = 'var(--danger, #b00020)';
|
| 826 |
+
pre.style.fontSize = '12px';
|
| 827 |
+
container.appendChild(pre);
|
| 828 |
+
}
|
| 829 |
+
})();
|
| 830 |
+
};
|
| 831 |
+
|
| 832 |
+
if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); }
|
| 833 |
+
else { ensureD3(bootstrap); }
|
| 834 |
+
})();
|
| 835 |
+
</script>
|
| 836 |
+
</body>
|
| 837 |
+
</html>
|
app/presentation/se2026/charts/benchmark.html
ADDED
|
@@ -0,0 +1,837 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Benchmark Comparison</title>
|
| 7 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--text-color: rgba(255,255,255,0.88);
|
| 11 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 12 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 13 |
+
--border-color: rgba(255,255,255,0.1);
|
| 14 |
+
--axis-color: rgba(255,255,255,0.15);
|
| 15 |
+
--tick-color: rgba(255,255,255,0.5);
|
| 16 |
+
--grid-color: rgba(255,255,255,0.06);
|
| 17 |
+
--primary-color: #7c6ff7;
|
| 18 |
+
}
|
| 19 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 20 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
|
| 21 |
+
</style>
|
| 22 |
+
</head>
|
| 23 |
+
<body>
|
| 24 |
+
<div class="d3-benchmark-comparison" data-config='{"datasets":{"mix-fw_edu_hq-table_smollm2_1.7b_hq":{"display":"FinePhrase","color":"#EBA937"},"dclm":{"display":"DCLM","baseline":true},"nemotron_hq_synth":{"display":"Nemotron-HQ-Synth","color":"#76b900","shaded":true},"rewire":{"display":"REWIRE","color":"#1877F2","shaded":true},"cosmopedia":"Cosmopedia","fw_edu_hq":"FW-Edu HQ","synth_query_reasoning_answer":"SYNTH","ultra-fineweb":"Ultra-FineWeb"}}' data-datafiles="../data/benchmark-results.csv"></div>
|
| 25 |
+
<style>
|
| 26 |
+
.d3-benchmark-comparison { position: relative; }
|
| 27 |
+
.d3-benchmark-comparison .controls {
|
| 28 |
+
display: flex;
|
| 29 |
+
gap: 16px;
|
| 30 |
+
align-items: flex-end;
|
| 31 |
+
justify-content: center;
|
| 32 |
+
margin: 10px 0 0 0;
|
| 33 |
+
}
|
| 34 |
+
.d3-benchmark-comparison .controls .control-group {
|
| 35 |
+
display: flex;
|
| 36 |
+
flex-direction: column;
|
| 37 |
+
align-items: flex-start;
|
| 38 |
+
gap: 6px;
|
| 39 |
+
}
|
| 40 |
+
.d3-benchmark-comparison .controls label {
|
| 41 |
+
font-size: 18px;
|
| 42 |
+
font-weight: 700;
|
| 43 |
+
color: var(--text-color);
|
| 44 |
+
}
|
| 45 |
+
.d3-benchmark-comparison .controls select {
|
| 46 |
+
appearance: none;
|
| 47 |
+
-webkit-appearance: none;
|
| 48 |
+
-moz-appearance: none;
|
| 49 |
+
border: 1px solid var(--border-color);
|
| 50 |
+
border-radius: 8px;
|
| 51 |
+
padding: 6px 28px 6px 10px;
|
| 52 |
+
background-color: var(--surface-bg);
|
| 53 |
+
color: var(--text-color);
|
| 54 |
+
font-size: 18px;
|
| 55 |
+
line-height: 1.2;
|
| 56 |
+
background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
|
| 57 |
+
background-repeat: no-repeat;
|
| 58 |
+
background-position: right 8px center;
|
| 59 |
+
}
|
| 60 |
+
.d3-benchmark-comparison .controls select:focus-visible {
|
| 61 |
+
outline: 2px solid var(--primary-color);
|
| 62 |
+
outline-offset: 2px;
|
| 63 |
+
}
|
| 64 |
+
.d3-benchmark-comparison .legend {
|
| 65 |
+
display: flex;
|
| 66 |
+
flex-direction: column;
|
| 67 |
+
align-items: flex-start;
|
| 68 |
+
gap: 6px;
|
| 69 |
+
margin: 8px 0 0 0;
|
| 70 |
+
padding-bottom: 4px;
|
| 71 |
+
}
|
| 72 |
+
.d3-benchmark-comparison .legend .legend-title {
|
| 73 |
+
font-size: 18px;
|
| 74 |
+
font-weight: 700;
|
| 75 |
+
color: var(--text-color);
|
| 76 |
+
}
|
| 77 |
+
.d3-benchmark-comparison .legend .items {
|
| 78 |
+
display: flex;
|
| 79 |
+
flex-wrap: wrap;
|
| 80 |
+
gap: 8px 14px;
|
| 81 |
+
}
|
| 82 |
+
.d3-benchmark-comparison .legend .item {
|
| 83 |
+
display: inline-flex;
|
| 84 |
+
align-items: center;
|
| 85 |
+
gap: 6px;
|
| 86 |
+
white-space: nowrap;
|
| 87 |
+
font-size: 18px;
|
| 88 |
+
color: var(--text-color);
|
| 89 |
+
cursor: pointer;
|
| 90 |
+
}
|
| 91 |
+
.d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
|
| 92 |
+
.d3-benchmark-comparison .legend .swatch {
|
| 93 |
+
width: 14px;
|
| 94 |
+
height: 14px;
|
| 95 |
+
border-radius: 3px;
|
| 96 |
+
border: 1px solid var(--border-color);
|
| 97 |
+
}
|
| 98 |
+
.d3-benchmark-comparison .bar.ghost { opacity: .25; }
|
| 99 |
+
.d3-benchmark-comparison .value-label.ghost { opacity: .25; }
|
| 100 |
+
.d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
|
| 101 |
+
.d3-benchmark-comparison .line-path.ghost { opacity: .15; }
|
| 102 |
+
.d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
|
| 103 |
+
.d3-benchmark-comparison .baseline.ghost { opacity: .1; }
|
| 104 |
+
.d3-benchmark-comparison .axes path { display: none; }
|
| 105 |
+
.d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
|
| 106 |
+
.d3-benchmark-comparison .axes text { fill: var(--tick-color); }
|
| 107 |
+
.d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
|
| 108 |
+
.d3-benchmark-comparison .hover-line {
|
| 109 |
+
stroke: var(--text-color);
|
| 110 |
+
stroke-opacity: 0.25;
|
| 111 |
+
stroke-width: 1;
|
| 112 |
+
pointer-events: none;
|
| 113 |
+
}
|
| 114 |
+
.d3-benchmark-comparison .d3-tooltip {
|
| 115 |
+
position: absolute;
|
| 116 |
+
top: 0px;
|
| 117 |
+
left: 0px;
|
| 118 |
+
transform: translate(-9999px, -9999px);
|
| 119 |
+
pointer-events: none;
|
| 120 |
+
padding: 8px 10px;
|
| 121 |
+
border-radius: 8px;
|
| 122 |
+
font-size: 18px;
|
| 123 |
+
line-height: 1.35;
|
| 124 |
+
border: 1px solid var(--border-color);
|
| 125 |
+
background: var(--surface-bg);
|
| 126 |
+
color: var(--text-color);
|
| 127 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 128 |
+
opacity: 0;
|
| 129 |
+
transition: opacity .12s ease;
|
| 130 |
+
text-align: left;
|
| 131 |
+
z-index: 10;
|
| 132 |
+
}
|
| 133 |
+
.d3-benchmark-comparison .d3-tooltip .tip-dot {
|
| 134 |
+
display: inline-block;
|
| 135 |
+
width: 10px;
|
| 136 |
+
height: 10px;
|
| 137 |
+
border-radius: 3px;
|
| 138 |
+
border: 1px solid var(--border-color);
|
| 139 |
+
margin-right: 6px;
|
| 140 |
+
vertical-align: middle;
|
| 141 |
+
}
|
| 142 |
+
</style>
|
| 143 |
+
<script>
|
| 144 |
+
(() => {
|
| 145 |
+
const ensureD3 = (cb) => {
|
| 146 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 147 |
+
let s = document.getElementById('d3-cdn-script');
|
| 148 |
+
if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
|
| 149 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 150 |
+
s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady();
|
| 151 |
+
};
|
| 152 |
+
|
| 153 |
+
const bootstrap = () => {
|
| 154 |
+
const scriptEl = document.currentScript;
|
| 155 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 156 |
+
if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
|
| 157 |
+
const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 158 |
+
container = cs[cs.length - 1] || null;
|
| 159 |
+
}
|
| 160 |
+
if (!container) return;
|
| 161 |
+
if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
|
| 162 |
+
|
| 163 |
+
container.style.position = container.style.position || 'relative';
|
| 164 |
+
|
| 165 |
+
// ─── READ CONFIG ───
|
| 166 |
+
let mountEl = container;
|
| 167 |
+
while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
|
| 168 |
+
let cfg = {};
|
| 169 |
+
try {
|
| 170 |
+
const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
|
| 171 |
+
if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
|
| 172 |
+
} catch (_) {}
|
| 173 |
+
|
| 174 |
+
// ─── NORMALIZE DATASETS CONFIG ───
|
| 175 |
+
// Accepts: { "key": "Name" } or { "key": { display, color, shaded, baseline } }
|
| 176 |
+
// Returns: { key: { display, color, shaded, baseline } }
|
| 177 |
+
function normalizeDatasets(raw) {
|
| 178 |
+
const out = {};
|
| 179 |
+
for (const [k, v] of Object.entries(raw || {})) {
|
| 180 |
+
out[k] = typeof v === 'string' ? { display: v } : { ...v };
|
| 181 |
+
}
|
| 182 |
+
return out;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// ─── SETUP SUPPORT ───
|
| 186 |
+
const SETUPS = cfg.setups || null;
|
| 187 |
+
const setupNames = SETUPS ? Object.keys(SETUPS) : [];
|
| 188 |
+
const AVG_SETUP_KEY = 'Average (all setups)';
|
| 189 |
+
const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
|
| 190 |
+
let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
|
| 191 |
+
let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
|
| 192 |
+
let avgDatasets = {};
|
| 193 |
+
let parsedData = [];
|
| 194 |
+
|
| 195 |
+
const RUN_COL = cfg.runColumn || 'runname';
|
| 196 |
+
const STEP_COL = cfg.stepColumn || 'steps';
|
| 197 |
+
const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
|
| 198 |
+
const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
|
| 199 |
+
const defaultView = cfg.defaultView || 'bar';
|
| 200 |
+
const uid = Math.random().toString(36).slice(2, 8);
|
| 201 |
+
|
| 202 |
+
// ─── DATASET ACCESSORS ───
|
| 203 |
+
function displayName(raw) { return DATASETS[raw] ? DATASETS[raw].display : raw; }
|
| 204 |
+
function isBaseline(raw) { return !!(DATASETS[raw] && DATASETS[raw].baseline); }
|
| 205 |
+
function isShaded(raw) { return !!(DATASETS[raw] && DATASETS[raw].shaded); }
|
| 206 |
+
function pinnedColor(raw) { return DATASETS[raw] && DATASETS[raw].color; }
|
| 207 |
+
function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); }
|
| 208 |
+
|
| 209 |
+
const METRIC_NAMES = {
|
| 210 |
+
'agg_score_macro': 'Aggregate Score (Macro)',
|
| 211 |
+
'agg_score_micro': 'Aggregate Score (Micro)',
|
| 212 |
+
'agg_score_RC': 'Reading Comprehension',
|
| 213 |
+
'agg_score_GK': 'General Knowledge',
|
| 214 |
+
'agg_score_NLU': 'Natural Language Understanding',
|
| 215 |
+
'agg_score_MATH': 'Math',
|
| 216 |
+
'agg_score_TABLE': 'Table Understanding',
|
| 217 |
+
'agg_score_RES': 'Reasoning',
|
| 218 |
+
'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy',
|
| 219 |
+
'lighteval|drop|3/prob_norm_token': 'DROP',
|
| 220 |
+
'lighteval|gsm8k|3/prob_norm_token': 'GSM8K',
|
| 221 |
+
'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag',
|
| 222 |
+
'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA',
|
| 223 |
+
'lighteval|piqa_cf|3/prob_norm_token': 'PIQA',
|
| 224 |
+
'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2',
|
| 225 |
+
'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA',
|
| 226 |
+
'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions',
|
| 227 |
+
'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande',
|
| 228 |
+
'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA',
|
| 229 |
+
'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
|
| 230 |
+
};
|
| 231 |
+
|
| 232 |
+
// Tooltip
|
| 233 |
+
let tip = container.querySelector('.d3-tooltip'), tipInner;
|
| 234 |
+
if (!tip) {
|
| 235 |
+
tip = document.createElement('div'); tip.className = 'd3-tooltip';
|
| 236 |
+
tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
|
| 237 |
+
container.appendChild(tip);
|
| 238 |
+
} else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
|
| 239 |
+
|
| 240 |
+
// SVG
|
| 241 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 242 |
+
const gRoot = svg.append('g');
|
| 243 |
+
const defs = svg.append('defs');
|
| 244 |
+
|
| 245 |
+
// State
|
| 246 |
+
let allData = [];
|
| 247 |
+
let metricKeys = [];
|
| 248 |
+
let currentMetric = defaultMetric;
|
| 249 |
+
let currentView = defaultView;
|
| 250 |
+
let colorMap = {};
|
| 251 |
+
let highlight = null;
|
| 252 |
+
|
| 253 |
+
// ─── HELPERS ───
|
| 254 |
+
function metricName(key) { return METRIC_NAMES[key] || key; }
|
| 255 |
+
|
| 256 |
+
function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
|
| 257 |
+
function formatTokens(tokens) {
|
| 258 |
+
if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
|
| 259 |
+
if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M';
|
| 260 |
+
return d3.format(',')(tokens);
|
| 261 |
+
}
|
| 262 |
+
function formatStep(step) {
|
| 263 |
+
if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
|
| 264 |
+
return String(step);
|
| 265 |
+
}
|
| 266 |
+
function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
|
| 267 |
+
function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
|
| 268 |
+
|
| 269 |
+
function getCategoricalColors(n) {
|
| 270 |
+
try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
|
| 271 |
+
return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
function initColors() {
|
| 275 |
+
if (Object.keys(colorMap).length) return;
|
| 276 |
+
const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
|
| 277 |
+
const unpinned = [];
|
| 278 |
+
allRaw.forEach(raw => {
|
| 279 |
+
const pc = pinnedColor(raw);
|
| 280 |
+
if (pc) { colorMap[raw] = pc; }
|
| 281 |
+
else { unpinned.push(raw); }
|
| 282 |
+
});
|
| 283 |
+
const palette = getCategoricalColors(unpinned.length);
|
| 284 |
+
unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
// ─── SETUP HELPERS ───
|
| 288 |
+
function filterData() {
|
| 289 |
+
const knownNames = Object.keys(DATASETS);
|
| 290 |
+
allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
|
| 291 |
+
allData.columns = parsedData.columns;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
function computeAverageData(rawData) {
|
| 295 |
+
if (!SETUPS || setupNames.length < 2) return { data: [], datasets: {} };
|
| 296 |
+
const displayToRaws = {};
|
| 297 |
+
for (const sName of setupNames) {
|
| 298 |
+
const ds = normalizeDatasets(SETUPS[sName].datasets);
|
| 299 |
+
for (const [raw, opts] of Object.entries(ds)) {
|
| 300 |
+
if (!displayToRaws[opts.display]) displayToRaws[opts.display] = [];
|
| 301 |
+
displayToRaws[opts.display].push(raw);
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
const fullDisplay = Object.entries(displayToRaws)
|
| 305 |
+
.filter(([, raws]) => raws.length >= setupNames.length);
|
| 306 |
+
const byRunStep = {};
|
| 307 |
+
for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
|
| 308 |
+
const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 309 |
+
const cols = rawData.columns || Object.keys(rawData[0] || {});
|
| 310 |
+
const result = [];
|
| 311 |
+
const dsMap = {};
|
| 312 |
+
for (const [display, raws] of fullDisplay) {
|
| 313 |
+
const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
|
| 314 |
+
// Merge options from first setup that has this display name
|
| 315 |
+
const firstOpts = Object.values(normalizeDatasets(SETUPS[setupNames[0]].datasets)).find(o => o.display === display) || {};
|
| 316 |
+
dsMap[avgRaw] = { display, ...firstOpts };
|
| 317 |
+
for (const step of steps) {
|
| 318 |
+
const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
|
| 319 |
+
if (!rows.length) continue;
|
| 320 |
+
const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
|
| 321 |
+
for (const col of cols) {
|
| 322 |
+
if (col === RUN_COL || col === STEP_COL) continue;
|
| 323 |
+
const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
|
| 324 |
+
avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
|
| 325 |
+
}
|
| 326 |
+
result.push(avgRow);
|
| 327 |
+
}
|
| 328 |
+
}
|
| 329 |
+
return { data: result, datasets: dsMap };
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
function switchSetup(name) {
|
| 333 |
+
currentSetup = name;
|
| 334 |
+
if (name === AVG_SETUP_KEY) {
|
| 335 |
+
DATASETS = { ...avgDatasets };
|
| 336 |
+
} else {
|
| 337 |
+
DATASETS = normalizeDatasets(SETUPS[name].datasets);
|
| 338 |
+
}
|
| 339 |
+
// Re-add baselines from any setup
|
| 340 |
+
for (const sName of setupNames) {
|
| 341 |
+
const ds = normalizeDatasets(SETUPS[sName].datasets);
|
| 342 |
+
for (const [raw, opts] of Object.entries(ds)) {
|
| 343 |
+
if (opts.baseline && !DATASETS[raw] && parsedData.some(r => r[RUN_COL] === raw)) {
|
| 344 |
+
DATASETS[raw] = { ...opts };
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
}
|
| 348 |
+
colorMap = {};
|
| 349 |
+
filterData();
|
| 350 |
+
initColors();
|
| 351 |
+
render();
|
| 352 |
+
buildLegend();
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
function showTip(html, x, y) {
|
| 356 |
+
tipInner.innerHTML = html;
|
| 357 |
+
const tipW = tip.offsetWidth || 180;
|
| 358 |
+
const cW = container.clientWidth || 800;
|
| 359 |
+
const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12;
|
| 360 |
+
tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`;
|
| 361 |
+
tip.style.opacity = '1';
|
| 362 |
+
}
|
| 363 |
+
function hideTip() {
|
| 364 |
+
tip.style.opacity = '0';
|
| 365 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
function updateHighlight() {
|
| 369 |
+
gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
|
| 370 |
+
gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
|
| 371 |
+
gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
|
| 372 |
+
gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
|
| 373 |
+
gRoot.selectAll('.baseline-vline').classed('ghost', d => highlight && d.name !== highlight);
|
| 374 |
+
gRoot.selectAll('.baseline-vlabel').classed('ghost', d => highlight && d.name !== highlight);
|
| 375 |
+
gRoot.selectAll('.baseline-hline').classed('ghost', d => highlight && d.name !== highlight);
|
| 376 |
+
gRoot.selectAll('.baseline-hlabel').classed('ghost', d => highlight && d.name !== highlight);
|
| 377 |
+
container.querySelectorAll('.legend .item').forEach(el => {
|
| 378 |
+
el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
|
| 379 |
+
});
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
// ─── AUTO-DETECT METRICS from CSV columns ───
|
| 383 |
+
function detectMetrics(columns) {
|
| 384 |
+
const skip = new Set([RUN_COL, STEP_COL, 'seed']);
|
| 385 |
+
const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
|
| 386 |
+
const agg = aggOrder.filter(k => columns.includes(k));
|
| 387 |
+
const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
|
| 388 |
+
return [...agg, ...ind];
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
// ─── BAR CHART ───
|
| 392 |
+
function renderBar() {
|
| 393 |
+
const width = container.clientWidth || 800;
|
| 394 |
+
const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
|
| 395 |
+
const margin = { top: hasBaselines ? 20 : 12, right: 56, bottom: 32, left: 190 };
|
| 396 |
+
|
| 397 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 398 |
+
const finalData = [];
|
| 399 |
+
for (const [raw, rows] of grouped) {
|
| 400 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 401 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 402 |
+
if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
|
| 403 |
+
}
|
| 404 |
+
finalData.sort((a, b) => b.value - a.value);
|
| 405 |
+
|
| 406 |
+
const barData = finalData.filter(d => !isBaseline(d.rawName));
|
| 407 |
+
const baselineData = finalData.filter(d => isBaseline(d.rawName));
|
| 408 |
+
|
| 409 |
+
const height = window.innerHeight || 480;
|
| 410 |
+
svg.attr('width', width).attr('height', height);
|
| 411 |
+
const barHeight = Math.min(28, (height - margin.top - margin.bottom) / barData.length * 0.75);
|
| 412 |
+
const barGap = barHeight * 0.3;
|
| 413 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 414 |
+
|
| 415 |
+
const innerWidth = width - margin.left - margin.right;
|
| 416 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 417 |
+
|
| 418 |
+
const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]);
|
| 419 |
+
const y = d3.scaleBand().domain(barData.map(d => d.name)).range([0, innerHeight]).padding(0.2);
|
| 420 |
+
|
| 421 |
+
// Grid
|
| 422 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 423 |
+
g.selectAll('line').data(x.ticks(5)).join('line')
|
| 424 |
+
.attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight);
|
| 425 |
+
});
|
| 426 |
+
|
| 427 |
+
// X axis
|
| 428 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 429 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 430 |
+
.call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0))
|
| 431 |
+
.call(g => {
|
| 432 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 433 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 434 |
+
});
|
| 435 |
+
|
| 436 |
+
// Y axis
|
| 437 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
|
| 438 |
+
.call(d3.axisLeft(y).tickSizeOuter(0))
|
| 439 |
+
.call(g => {
|
| 440 |
+
g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '18px').style('font-weight', '600');
|
| 441 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 442 |
+
});
|
| 443 |
+
|
| 444 |
+
// Stripe patterns for shaded bars
|
| 445 |
+
barData.forEach(d => {
|
| 446 |
+
if (!isShaded(d.rawName)) return;
|
| 447 |
+
const c = colorMap[d.rawName] || '#999';
|
| 448 |
+
const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName))
|
| 449 |
+
.attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)');
|
| 450 |
+
pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35);
|
| 451 |
+
pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5);
|
| 452 |
+
});
|
| 453 |
+
|
| 454 |
+
function barFill(d) {
|
| 455 |
+
if (isShaded(d.rawName)) return `url(#${stripePatternId(d.rawName)})`;
|
| 456 |
+
return colorMap[d.rawName] || 'var(--primary-color)';
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
// Bars
|
| 460 |
+
const barTip = (ev, d) => {
|
| 461 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 462 |
+
showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(3)}</strong>`, mx, my);
|
| 463 |
+
};
|
| 464 |
+
gRoot.selectAll('rect.bar').data(barData, d => d.name).join(
|
| 465 |
+
enter => enter.append('rect').attr('class', 'bar')
|
| 466 |
+
.attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
|
| 467 |
+
.attr('fill', d => barFill(d))
|
| 468 |
+
.attr('width', 0)
|
| 469 |
+
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 470 |
+
.on('mousemove', barTip)
|
| 471 |
+
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 472 |
+
.transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
|
| 473 |
+
update => update
|
| 474 |
+
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 475 |
+
.on('mousemove', barTip)
|
| 476 |
+
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 477 |
+
.transition().duration(300)
|
| 478 |
+
.attr('y', d => y(d.name)).attr('height', y.bandwidth())
|
| 479 |
+
.attr('width', d => Math.max(0, x(d.value)))
|
| 480 |
+
.attr('fill', d => barFill(d)),
|
| 481 |
+
exit => exit.transition().duration(200).attr('width', 0).remove()
|
| 482 |
+
);
|
| 483 |
+
|
| 484 |
+
// Value labels
|
| 485 |
+
gRoot.selectAll('text.value-label').data(barData, d => d.name).join(
|
| 486 |
+
enter => enter.append('text').attr('class', 'value-label')
|
| 487 |
+
.attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
|
| 488 |
+
.attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 489 |
+
.text(d => d.value.toFixed(3)),
|
| 490 |
+
update => update.transition().duration(300)
|
| 491 |
+
.attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2)
|
| 492 |
+
.text(d => d.value.toFixed(3)),
|
| 493 |
+
exit => exit.remove()
|
| 494 |
+
);
|
| 495 |
+
|
| 496 |
+
// Baseline vertical reference lines
|
| 497 |
+
gRoot.selectAll('.baseline-vline').data(baselineData, d => d.name).join(
|
| 498 |
+
enter => enter.append('line').attr('class', 'baseline-vline baseline')
|
| 499 |
+
.attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
|
| 500 |
+
.attr('y1', 0).attr('y2', innerHeight)
|
| 501 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999')
|
| 502 |
+
.attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
|
| 503 |
+
update => update.transition().duration(300)
|
| 504 |
+
.attr('x1', d => x(d.value)).attr('x2', d => x(d.value))
|
| 505 |
+
.attr('y1', 0).attr('y2', innerHeight)
|
| 506 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999'),
|
| 507 |
+
exit => exit.remove()
|
| 508 |
+
);
|
| 509 |
+
gRoot.selectAll('.baseline-vlabel').data(baselineData, d => d.name).join(
|
| 510 |
+
enter => enter.append('text').attr('class', 'baseline-vlabel baseline')
|
| 511 |
+
.attr('x', d => x(d.value)).attr('y', -4)
|
| 512 |
+
.attr('text-anchor', 'middle').attr('fill', d => colorMap[d.rawName] || '#999')
|
| 513 |
+
.attr('font-size', 18).attr('font-weight', 600)
|
| 514 |
+
.text(d => `${d.name} (${d.value.toFixed(3)})`),
|
| 515 |
+
update => update.transition().duration(300)
|
| 516 |
+
.attr('x', d => x(d.value))
|
| 517 |
+
.text(d => `${d.name} (${d.value.toFixed(3)})`),
|
| 518 |
+
exit => exit.remove()
|
| 519 |
+
);
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// ─── LINE CHART ───
|
| 523 |
+
function renderLine() {
|
| 524 |
+
const width = container.clientWidth || 800;
|
| 525 |
+
const hasBaselines = allData.some(r => isBaseline(r[RUN_COL]));
|
| 526 |
+
const margin = { top: 16, right: 50, bottom: 48, left: 60 };
|
| 527 |
+
const height = window.innerHeight || 480;
|
| 528 |
+
svg.attr('width', width).attr('height', height);
|
| 529 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 530 |
+
|
| 531 |
+
const innerWidth = width - margin.left - margin.right;
|
| 532 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 533 |
+
|
| 534 |
+
// Build series
|
| 535 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 536 |
+
const series = [];
|
| 537 |
+
const baselineSeries = [];
|
| 538 |
+
for (const [raw, rows] of grouped) {
|
| 539 |
+
const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
|
| 540 |
+
const entry = { name: displayName(raw), rawName: raw, values: pts };
|
| 541 |
+
if (isBaseline(raw)) {
|
| 542 |
+
entry.finalValue = pts[pts.length - 1].value;
|
| 543 |
+
baselineSeries.push(entry);
|
| 544 |
+
} else {
|
| 545 |
+
series.push(entry);
|
| 546 |
+
}
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
const allSteps = Array.from(new Set(allData.filter(r => !isBaseline(r[RUN_COL])).map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 550 |
+
const allValues = [...series, ...baselineSeries].flatMap(s => s.finalValue != null ? [s.finalValue] : s.values.map(v => v.value));
|
| 551 |
+
|
| 552 |
+
const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
|
| 553 |
+
const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
|
| 554 |
+
const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
|
| 555 |
+
|
| 556 |
+
// Grid
|
| 557 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 558 |
+
g.selectAll('line').data(y.ticks(6)).join('line')
|
| 559 |
+
.attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
|
| 560 |
+
});
|
| 561 |
+
|
| 562 |
+
// X axis
|
| 563 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 564 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 565 |
+
.call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
|
| 566 |
+
.call(g => {
|
| 567 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 568 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 569 |
+
});
|
| 570 |
+
|
| 571 |
+
// Y axis
|
| 572 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y')
|
| 573 |
+
.call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0))
|
| 574 |
+
.call(g => {
|
| 575 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 576 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 577 |
+
});
|
| 578 |
+
|
| 579 |
+
// Axis labels
|
| 580 |
+
gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
|
| 581 |
+
.attr('x', innerWidth / 2).attr('y', innerHeight + 38)
|
| 582 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 583 |
+
.text('Tokens (Steps)');
|
| 584 |
+
|
| 585 |
+
gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
|
| 586 |
+
.attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
|
| 587 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 18)
|
| 588 |
+
.text(metricName(currentMetric));
|
| 589 |
+
|
| 590 |
+
// Baseline horizontal reference lines
|
| 591 |
+
gRoot.selectAll('.baseline-hline').data(baselineSeries, d => d.name).join(
|
| 592 |
+
enter => enter.append('line').attr('class', 'baseline-hline baseline')
|
| 593 |
+
.attr('x1', 0).attr('x2', innerWidth)
|
| 594 |
+
.attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
|
| 595 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999')
|
| 596 |
+
.attr('stroke-width', 2).attr('stroke-dasharray', '6,4').attr('opacity', 0.7),
|
| 597 |
+
update => update.transition().duration(300)
|
| 598 |
+
.attr('x1', 0).attr('x2', innerWidth)
|
| 599 |
+
.attr('y1', d => y(d.finalValue)).attr('y2', d => y(d.finalValue))
|
| 600 |
+
.attr('stroke', d => colorMap[d.rawName] || '#999'),
|
| 601 |
+
exit => exit.remove()
|
| 602 |
+
);
|
| 603 |
+
gRoot.selectAll('.baseline-hlabel').data(baselineSeries, d => d.name).join(
|
| 604 |
+
enter => enter.append('text').attr('class', 'baseline-hlabel baseline')
|
| 605 |
+
.attr('x', 4).attr('y', d => y(d.finalValue) - 6)
|
| 606 |
+
.attr('text-anchor', 'start')
|
| 607 |
+
.attr('fill', d => colorMap[d.rawName] || '#999')
|
| 608 |
+
.attr('font-size', 18).attr('font-weight', 600)
|
| 609 |
+
.text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
|
| 610 |
+
update => update.transition().duration(300)
|
| 611 |
+
.attr('x', 4).attr('y', d => y(d.finalValue) - 6)
|
| 612 |
+
.text(d => `${d.name} (${d.finalValue.toFixed(3)})`),
|
| 613 |
+
exit => exit.remove()
|
| 614 |
+
);
|
| 615 |
+
|
| 616 |
+
// Lines (non-baseline)
|
| 617 |
+
const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
|
| 618 |
+
gRoot.selectAll('.line-path').data(series, d => d.name).join(
|
| 619 |
+
enter => enter.append('path').attr('class', 'line-path')
|
| 620 |
+
.attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 621 |
+
.attr('d', d => line(d.values)),
|
| 622 |
+
update => update.transition().duration(300)
|
| 623 |
+
.attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 624 |
+
.attr('d', d => line(d.values)),
|
| 625 |
+
exit => exit.remove()
|
| 626 |
+
);
|
| 627 |
+
|
| 628 |
+
// Dots (non-baseline)
|
| 629 |
+
const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
|
| 630 |
+
gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
|
| 631 |
+
enter => enter.append('circle').attr('class', 'line-dot')
|
| 632 |
+
.attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3)
|
| 633 |
+
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 634 |
+
.attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1),
|
| 635 |
+
update => update.transition().duration(300)
|
| 636 |
+
.attr('cx', d => x(d.step)).attr('cy', d => y(d.value))
|
| 637 |
+
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'),
|
| 638 |
+
exit => exit.remove()
|
| 639 |
+
);
|
| 640 |
+
|
| 641 |
+
// Hover overlay
|
| 642 |
+
gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
|
| 643 |
+
.attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
|
| 644 |
+
|
| 645 |
+
gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay')
|
| 646 |
+
.attr('width', innerWidth).attr('height', innerHeight)
|
| 647 |
+
.attr('fill', 'none').attr('pointer-events', 'all')
|
| 648 |
+
.on('mousemove', (ev) => {
|
| 649 |
+
const [mx] = d3.pointer(ev, gRoot.node());
|
| 650 |
+
const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
|
| 651 |
+
gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
|
| 652 |
+
|
| 653 |
+
const entries = series.map(s => {
|
| 654 |
+
const pt = s.values.find(v => v.step === nearest);
|
| 655 |
+
return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
|
| 656 |
+
}).filter(Boolean);
|
| 657 |
+
baselineSeries.forEach(s => {
|
| 658 |
+
entries.push({ name: s.name, rawName: s.rawName, value: s.finalValue });
|
| 659 |
+
});
|
| 660 |
+
entries.sort((a, b) => b.value - a.value);
|
| 661 |
+
|
| 662 |
+
let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`;
|
| 663 |
+
entries.forEach(e => {
|
| 664 |
+
html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(3)}</strong></div>`;
|
| 665 |
+
});
|
| 666 |
+
const [cx, cy] = d3.pointer(ev, container);
|
| 667 |
+
showTip(html, cx, cy);
|
| 668 |
+
})
|
| 669 |
+
.on('mouseleave', () => {
|
| 670 |
+
gRoot.select('.hover-line').style('display', 'none');
|
| 671 |
+
hideTip();
|
| 672 |
+
});
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
// ─── RENDER ───
|
| 676 |
+
function render() {
|
| 677 |
+
if (!allData.length) return;
|
| 678 |
+
initColors();
|
| 679 |
+
gRoot.selectAll('*').remove();
|
| 680 |
+
defs.selectAll('*').remove();
|
| 681 |
+
if (currentView === 'bar') renderBar(); else renderLine();
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
// ─── UI ───
|
| 685 |
+
function buildUI() {
|
| 686 |
+
const controls = document.createElement('div'); controls.className = 'controls';
|
| 687 |
+
|
| 688 |
+
if (SETUPS && setupNames.length > 0) {
|
| 689 |
+
const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
|
| 690 |
+
const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
|
| 691 |
+
const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
|
| 692 |
+
setupNames.forEach(name => {
|
| 693 |
+
const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
|
| 694 |
+
if (name === currentSetup) opt.selected = true;
|
| 695 |
+
setupSelect.appendChild(opt);
|
| 696 |
+
});
|
| 697 |
+
if (setupNames.length >= 2) {
|
| 698 |
+
const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
|
| 699 |
+
if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
|
| 700 |
+
setupSelect.appendChild(avgOpt);
|
| 701 |
+
}
|
| 702 |
+
setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
|
| 703 |
+
setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
|
| 704 |
+
controls.appendChild(setupGroup);
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 708 |
+
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
|
| 709 |
+
const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
|
| 710 |
+
[['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => {
|
| 711 |
+
const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
|
| 712 |
+
if (val === currentView) opt.selected = true;
|
| 713 |
+
viewSelect.appendChild(opt);
|
| 714 |
+
});
|
| 715 |
+
viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); });
|
| 716 |
+
viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
|
| 717 |
+
controls.appendChild(viewGroup);
|
| 718 |
+
|
| 719 |
+
const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
|
| 720 |
+
const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
|
| 721 |
+
const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
|
| 722 |
+
metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
|
| 723 |
+
controls.appendChild(metricGroup);
|
| 724 |
+
|
| 725 |
+
container.appendChild(controls);
|
| 726 |
+
|
| 727 |
+
const legend = document.createElement('div'); legend.className = 'legend';
|
| 728 |
+
legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>';
|
| 729 |
+
container.appendChild(legend);
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
function populateMetricSelect() {
|
| 733 |
+
const sel = container.querySelector('#metric-' + uid);
|
| 734 |
+
if (!sel) return;
|
| 735 |
+
sel.innerHTML = '';
|
| 736 |
+
const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
|
| 737 |
+
const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
|
| 738 |
+
metricKeys.forEach(key => {
|
| 739 |
+
const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
|
| 740 |
+
if (key === currentMetric) opt.selected = true;
|
| 741 |
+
if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
|
| 742 |
+
});
|
| 743 |
+
if (aggGroup.children.length) sel.appendChild(aggGroup);
|
| 744 |
+
if (indGroup.children.length) sel.appendChild(indGroup);
|
| 745 |
+
sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
function buildLegend() {
|
| 749 |
+
const items = container.querySelector('.legend .items');
|
| 750 |
+
if (!items) return;
|
| 751 |
+
items.innerHTML = '';
|
| 752 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 753 |
+
const sorted = Array.from(grouped.entries())
|
| 754 |
+
.map(([raw, rows]) => {
|
| 755 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 756 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 757 |
+
return { raw, score: row ? +row[defaultMetric] : 0 };
|
| 758 |
+
})
|
| 759 |
+
.sort((a, b) => b.score - a.score)
|
| 760 |
+
.map(d => d.raw);
|
| 761 |
+
sorted.filter(raw => !isBaseline(raw)).forEach(raw => {
|
| 762 |
+
const name = displayName(raw);
|
| 763 |
+
const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
|
| 764 |
+
const sw = document.createElement('span'); sw.className = 'swatch';
|
| 765 |
+
const c = colorMap[raw] || '#999';
|
| 766 |
+
if (isShaded(raw)) {
|
| 767 |
+
sw.style.background = c;
|
| 768 |
+
sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)';
|
| 769 |
+
} else {
|
| 770 |
+
sw.style.background = c;
|
| 771 |
+
}
|
| 772 |
+
const txt = document.createElement('span'); txt.textContent = name;
|
| 773 |
+
el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
|
| 774 |
+
el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
|
| 775 |
+
el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
|
| 776 |
+
});
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
buildUI();
|
| 780 |
+
|
| 781 |
+
// ─── DATA LOADING ───
|
| 782 |
+
const fetchFirstAvailable = async (paths) => {
|
| 783 |
+
for (const p of paths) {
|
| 784 |
+
try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {}
|
| 785 |
+
}
|
| 786 |
+
throw new Error('CSV not found');
|
| 787 |
+
};
|
| 788 |
+
|
| 789 |
+
let dataMountEl = container;
|
| 790 |
+
while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; }
|
| 791 |
+
let providedData = null;
|
| 792 |
+
try {
|
| 793 |
+
const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null;
|
| 794 |
+
if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
|
| 795 |
+
} catch (_) {}
|
| 796 |
+
|
| 797 |
+
const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
|
| 798 |
+
const csvPaths = providedData
|
| 799 |
+
? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)])
|
| 800 |
+
: ['../data/benchmark-results.csv'];
|
| 801 |
+
|
| 802 |
+
(async () => {
|
| 803 |
+
try {
|
| 804 |
+
const text = await fetchFirstAvailable(csvPaths);
|
| 805 |
+
const parsed = d3.csvParse(text);
|
| 806 |
+
parsedData = parsed;
|
| 807 |
+
if (SETUPS && setupNames.length >= 2) {
|
| 808 |
+
const avg = computeAverageData(parsed);
|
| 809 |
+
avgDatasets = avg.datasets;
|
| 810 |
+
parsedData = parsed.concat(avg.data);
|
| 811 |
+
parsedData.columns = parsed.columns;
|
| 812 |
+
if (currentSetup === AVG_SETUP_KEY) DATASETS = { ...avgDatasets };
|
| 813 |
+
}
|
| 814 |
+
filterData();
|
| 815 |
+
metricKeys = detectMetrics(allData.columns);
|
| 816 |
+
if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
|
| 817 |
+
populateMetricSelect();
|
| 818 |
+
render();
|
| 819 |
+
buildLegend();
|
| 820 |
+
if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
|
| 821 |
+
else { window.addEventListener('resize', () => render()); }
|
| 822 |
+
} catch (e) {
|
| 823 |
+
const pre = document.createElement('pre');
|
| 824 |
+
pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e);
|
| 825 |
+
pre.style.color = 'var(--danger, #b00020)';
|
| 826 |
+
pre.style.fontSize = '12px';
|
| 827 |
+
container.appendChild(pre);
|
| 828 |
+
}
|
| 829 |
+
})();
|
| 830 |
+
};
|
| 831 |
+
|
| 832 |
+
if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); }
|
| 833 |
+
else { ensureD3(bootstrap); }
|
| 834 |
+
})();
|
| 835 |
+
</script>
|
| 836 |
+
</body>
|
| 837 |
+
</html>
|
app/presentation/se2026/charts/cost-efficiency.html
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Cost Efficiency Chart</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root {
|
| 9 |
+
--text-color: rgba(255,255,255,0.88);
|
| 10 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 11 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 12 |
+
--border-color: rgba(255,255,255,0.1);
|
| 13 |
+
--axis-color: rgba(255,255,255,0.15);
|
| 14 |
+
--tick-color: rgba(255,255,255,0.5);
|
| 15 |
+
--grid-color: rgba(255,255,255,0.06);
|
| 16 |
+
--primary-color: #7c6ff7;
|
| 17 |
+
}
|
| 18 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 19 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
|
| 20 |
+
</style>
|
| 21 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 22 |
+
</head>
|
| 23 |
+
<body>
|
| 24 |
+
<div class="d3-cost-efficiency" data-datafiles="../data/rephrasing_metadata.json" style="width:100%;margin:10px 0;min-height:400px;"></div>
|
| 25 |
+
<style>
|
| 26 |
+
.d3-cost-efficiency { font-family: system-ui, -apple-system, sans-serif; position: relative; }
|
| 27 |
+
.d3-cost-efficiency .d3-tooltip {
|
| 28 |
+
position: absolute; top: 0; left: 0;
|
| 29 |
+
transform: translate(-9999px, -9999px);
|
| 30 |
+
pointer-events: none;
|
| 31 |
+
padding: 10px 14px; border-radius: 10px;
|
| 32 |
+
font-size: 18px; line-height: 1.4;
|
| 33 |
+
border: 1px solid var(--border-color);
|
| 34 |
+
background: var(--surface-bg); color: var(--text-color);
|
| 35 |
+
box-shadow: 0 6px 24px rgba(0,0,0,.22);
|
| 36 |
+
opacity: 0; transition: opacity .12s ease;
|
| 37 |
+
z-index: 20; max-width: 340px;
|
| 38 |
+
}
|
| 39 |
+
.d3-cost-efficiency .controls {
|
| 40 |
+
display: flex; gap: 16px; align-items: center; justify-content: flex-start; flex-wrap: wrap;
|
| 41 |
+
margin-top: 4px;
|
| 42 |
+
}
|
| 43 |
+
.d3-cost-efficiency .control-group {
|
| 44 |
+
display: flex; flex-direction: column; align-items: flex-start; gap: 4px;
|
| 45 |
+
}
|
| 46 |
+
.d3-cost-efficiency .controls label {
|
| 47 |
+
font-size: 18px; font-weight: 700; color: var(--text-color);
|
| 48 |
+
}
|
| 49 |
+
.d3-cost-efficiency .controls select {
|
| 50 |
+
font-size: 18px; padding: 6px 28px 6px 10px; border: 1px solid var(--border-color);
|
| 51 |
+
border-radius: 8px; background: var(--surface-bg); color: var(--text-color);
|
| 52 |
+
appearance: none; cursor: pointer;
|
| 53 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath d='M3 5l3 3 3-3' stroke='%23888' stroke-width='1.5' fill='none'/%3E%3C/svg%3E");
|
| 54 |
+
background-repeat: no-repeat; background-position: right 8px center;
|
| 55 |
+
}
|
| 56 |
+
.d3-cost-efficiency .legend {
|
| 57 |
+
display: flex; align-items: center; gap: 10px; margin-top: 0; margin-left: auto;
|
| 58 |
+
}
|
| 59 |
+
.d3-cost-efficiency .legend-title { font-size: 18px; font-weight: 700; color: var(--text-color); }
|
| 60 |
+
.d3-cost-efficiency .legend .items { display: flex; flex-wrap: wrap; gap: 6px 14px; }
|
| 61 |
+
.d3-cost-efficiency .legend .item {
|
| 62 |
+
display: inline-flex; align-items: center; gap: 6px; white-space: nowrap;
|
| 63 |
+
font-size: 18px; color: var(--text-color); cursor: pointer;
|
| 64 |
+
}
|
| 65 |
+
.d3-cost-efficiency .legend .swatch {
|
| 66 |
+
width: 14px; height: 14px; border-radius: 3px; border: 1px solid var(--border-color);
|
| 67 |
+
}
|
| 68 |
+
</style>
|
| 69 |
+
<script>
|
| 70 |
+
(() => {
|
| 71 |
+
const bootstrap = () => {
|
| 72 |
+
const container = document.querySelector('.d3-cost-efficiency');
|
| 73 |
+
if (!container) return;
|
| 74 |
+
if (container.dataset.mounted === 'true') return;
|
| 75 |
+
container.dataset.mounted = 'true';
|
| 76 |
+
|
| 77 |
+
let mountEl = container;
|
| 78 |
+
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) mountEl = mountEl.parentElement;
|
| 79 |
+
const dataAttr = mountEl?.getAttribute?.('data-datafiles');
|
| 80 |
+
const dataPaths = dataAttr
|
| 81 |
+
? [dataAttr.includes('/') ? dataAttr : `../data/${dataAttr}`]
|
| 82 |
+
: ['../data/rephrasing_metadata.json', './assets/data/rephrasing_metadata.json'];
|
| 83 |
+
|
| 84 |
+
const fetchFirst = async (paths, parse) => {
|
| 85 |
+
for (const p of paths) {
|
| 86 |
+
try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return parse ? parse(await r.text()) : r.json(); } catch(_) {}
|
| 87 |
+
}
|
| 88 |
+
throw new Error('Data not found');
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
const csvPaths = ['../data/benchmark-results.csv', './assets/data/benchmark-results.csv'];
|
| 92 |
+
|
| 93 |
+
Promise.all([
|
| 94 |
+
fetchFirst(dataPaths),
|
| 95 |
+
fetchFirst(csvPaths, d3.csvParse)
|
| 96 |
+
]).then(([data, csvRows]) => buildChart(data, csvRows)).catch(err => {
|
| 97 |
+
container.innerHTML = `<pre style="color:red;padding:12px;">Error loading data: ${err.message}</pre>`;
|
| 98 |
+
});
|
| 99 |
+
|
| 100 |
+
function buildChart(rawData, csvRows) {
|
| 101 |
+
const SOURCE_MAP = {
|
| 102 |
+
'fineweb-edu-hq-20BT': 'FW-Edu HQ', 'fineweb-edu-lq-20BT': 'FW-Edu LQ',
|
| 103 |
+
'dclm-37BT': 'DCLM', 'cosmopedia-25BT': 'Cosmopedia'
|
| 104 |
+
};
|
| 105 |
+
const PROMPT_LABELS = {
|
| 106 |
+
'article': 'Article', 'commentary': 'Commentary', 'discussion': 'Discussion',
|
| 107 |
+
'faq': 'FAQ', 'math': 'Math', 'table': 'Table', 'tutorial': 'Tutorial',
|
| 108 |
+
'distill': 'Distill', 'diverse_qa_pairs': 'Diverse QA',
|
| 109 |
+
'extract_knowledge': 'Extract Knowledge', 'knowledge_list': 'Knowledge List',
|
| 110 |
+
'wikipedia_style_rephrasing': 'Wikipedia Style',
|
| 111 |
+
'guided_rewrite_improved': 'Guided Rewrite+', 'guided_rewrite_original': 'Guided Rewrite'
|
| 112 |
+
};
|
| 113 |
+
const CAT_MAP = { 'format': 'Format', 'nemotron': 'Nemotron', 'rewire': 'REWIRE' };
|
| 114 |
+
const getFamily = (m) => {
|
| 115 |
+
const ml = m.toLowerCase();
|
| 116 |
+
if (ml.includes('smollm')) return 'SmolLM2';
|
| 117 |
+
if (ml.includes('gemma')) return 'Gemma';
|
| 118 |
+
if (ml.includes('qwen')) return 'Qwen';
|
| 119 |
+
if (ml.includes('falcon')) return 'Falcon';
|
| 120 |
+
if (ml.includes('granite')) return 'Granite';
|
| 121 |
+
if (ml.includes('llama')) return 'Llama';
|
| 122 |
+
return 'Other';
|
| 123 |
+
};
|
| 124 |
+
const familyColors = {
|
| 125 |
+
'Gemma': '#5b9bd5', 'Qwen': '#e07b54', 'SmolLM2': '#e06b9e',
|
| 126 |
+
'Falcon': '#c9a046', 'Granite': '#9a8ec2', 'Llama': '#8bc474'
|
| 127 |
+
};
|
| 128 |
+
const familyOrder = ['Gemma', 'Qwen', 'SmolLM2', 'Falcon', 'Granite', 'Llama'];
|
| 129 |
+
|
| 130 |
+
const METRICS = [
|
| 131 |
+
{ key: 'agg_score_macro', label: 'Aggregate Score (Macro)', group: 'Aggregate' },
|
| 132 |
+
{ key: 'agg_score_micro', label: 'Aggregate Score (Micro)', group: 'Aggregate' },
|
| 133 |
+
{ key: 'agg_score_RC', label: 'Reading Comprehension', group: 'Aggregate' },
|
| 134 |
+
{ key: 'agg_score_GK', label: 'General Knowledge', group: 'Aggregate' },
|
| 135 |
+
{ key: 'agg_score_NLU', label: 'Natural Language Understanding', group: 'Aggregate' },
|
| 136 |
+
{ key: 'agg_score_MATH', label: 'Math', group: 'Aggregate' },
|
| 137 |
+
{ key: 'agg_score_TABLE', label: 'Table Understanding', group: 'Aggregate' },
|
| 138 |
+
{ key: 'agg_score_RES', label: 'Reasoning', group: 'Aggregate' },
|
| 139 |
+
{ key: 'arc_cf:easy', label: 'ARC-Easy', group: 'Individual' },
|
| 140 |
+
{ key: 'drop', label: 'DROP', group: 'Individual' },
|
| 141 |
+
{ key: 'gsm8k', label: 'GSM8K', group: 'Individual' },
|
| 142 |
+
{ key: 'hellaswag_cf', label: 'HellaSwag', group: 'Individual' },
|
| 143 |
+
{ key: 'openbookqa_cf', label: 'OpenBookQA', group: 'Individual' },
|
| 144 |
+
{ key: 'piqa_cf', label: 'PIQA', group: 'Individual' },
|
| 145 |
+
{ key: 'squad_v2', label: 'SQuAD v2', group: 'Individual' },
|
| 146 |
+
{ key: 'treb_qa', label: 'TriviaQA', group: 'Individual' },
|
| 147 |
+
{ key: 'wikitablequestions', label: 'WikiTableQuestions', group: 'Individual' },
|
| 148 |
+
{ key: 'winogrande_cf', label: 'Winogrande', group: 'Individual' },
|
| 149 |
+
{ key: 'xcsqa_cf', label: 'XCSQA', group: 'Individual' },
|
| 150 |
+
{ key: 'mmlu_redux_cf:_average', label: 'MMLU Redux', group: 'Individual' }
|
| 151 |
+
];
|
| 152 |
+
const CSV_COL = (key) => {
|
| 153 |
+
if (key.startsWith('agg_score_')) return key;
|
| 154 |
+
return `lighteval|${key}|3/prob_norm_token`;
|
| 155 |
+
};
|
| 156 |
+
|
| 157 |
+
const experiments = rawData.map(d => {
|
| 158 |
+
const [cat, promptFile] = d.prompt.split('/');
|
| 159 |
+
const promptKey = promptFile.replace('.md', '');
|
| 160 |
+
return {
|
| 161 |
+
run: d.run,
|
| 162 |
+
cat: CAT_MAP[cat] || cat,
|
| 163 |
+
prompt: PROMPT_LABELS[promptKey] || promptKey,
|
| 164 |
+
model: d.model.split('/').pop(),
|
| 165 |
+
source: SOURCE_MAP[d.source_dataset] || d.source_dataset,
|
| 166 |
+
family: getFamily(d.model),
|
| 167 |
+
gpuSeconds: d.gpu_time_seconds,
|
| 168 |
+
tpsPerGpu: d.output_tps_per_gpu,
|
| 169 |
+
outputTokens: d.output_tokens,
|
| 170 |
+
numDocs: d.num_documents,
|
| 171 |
+
results: d.results
|
| 172 |
+
};
|
| 173 |
+
});
|
| 174 |
+
|
| 175 |
+
const fmtGpuTime = (sec) => {
|
| 176 |
+
const d = sec / 86400;
|
| 177 |
+
if (d >= 365) { const y = Math.floor(d / 365); const mo = Math.round((d % 365) / 30); return mo ? y + 'y ' + mo + 'mo' : y + 'y'; }
|
| 178 |
+
if (d >= 30) { const mo = Math.floor(d / 30); const w = Math.round((d % 30) / 7); return w ? mo + 'mo ' + w + 'w' : mo + 'mo'; }
|
| 179 |
+
if (d >= 7) { const w = Math.floor(d / 7); const dd = Math.round(d % 7); return dd ? w + 'w ' + dd + 'd' : w + 'w'; }
|
| 180 |
+
return Math.round(d) + 'd';
|
| 181 |
+
};
|
| 182 |
+
|
| 183 |
+
const pareto = (data, metricKey) => {
|
| 184 |
+
const sorted = [...data].sort((a, b) => a.gpuSeconds - b.gpuSeconds);
|
| 185 |
+
const frontier = [];
|
| 186 |
+
let bestScore = -Infinity;
|
| 187 |
+
for (const pt of sorted) {
|
| 188 |
+
const score = pt.results[metricKey];
|
| 189 |
+
if (score == null) continue;
|
| 190 |
+
if (score > bestScore) {
|
| 191 |
+
bestScore = score;
|
| 192 |
+
frontier.push(pt);
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
return frontier;
|
| 196 |
+
};
|
| 197 |
+
|
| 198 |
+
const BASELINE_RUNS = {
|
| 199 |
+
'dclm': { label: 'DCLM', synthetic: false },
|
| 200 |
+
'fw_edu_hq': { label: 'FW-Edu HQ', synthetic: false },
|
| 201 |
+
'fw_edu_lq': { label: 'FW-Edu LQ', synthetic: false },
|
| 202 |
+
'ultra-fineweb': { label: 'Ultra-FineWeb', synthetic: false },
|
| 203 |
+
'cosmopedia': { label: 'Cosmopedia', synthetic: true },
|
| 204 |
+
'nemotron_hq_synth': { label: 'Nemotron-HQ-Synth', synthetic: true },
|
| 205 |
+
'rewire': { label: 'REWIRE', synthetic: true },
|
| 206 |
+
'synth_query_reasoning_answer': { label: 'SYNTH', synthetic: true }
|
| 207 |
+
};
|
| 208 |
+
const BASELINE_COLOR = '#86a1a9';
|
| 209 |
+
const SYNTH_BASELINE_COLOR = '#b07cc8';
|
| 210 |
+
const metricKeys = METRICS.map(m => m.key);
|
| 211 |
+
const baselines = [];
|
| 212 |
+
const bestStep = {};
|
| 213 |
+
for (const row of csvRows) {
|
| 214 |
+
const run = row.runname;
|
| 215 |
+
if (!(run in BASELINE_RUNS)) continue;
|
| 216 |
+
const step = +row.steps;
|
| 217 |
+
if (!(run in bestStep) || step > bestStep[run].step) {
|
| 218 |
+
const results = {};
|
| 219 |
+
for (const k of metricKeys) results[k] = +row[CSV_COL(k)];
|
| 220 |
+
bestStep[run] = { step, results };
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
for (const [run, info] of Object.entries(BASELINE_RUNS)) {
|
| 224 |
+
if (run in bestStep) baselines.push({ run, label: info.label, synthetic: info.synthetic, results: bestStep[run].results });
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
let currentMetric = METRICS[0].key;
|
| 228 |
+
|
| 229 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 230 |
+
const gGrid = svg.append('g').attr('class', 'grid');
|
| 231 |
+
const gPareto = svg.append('g').attr('class', 'pareto');
|
| 232 |
+
const gDots = svg.append('g').attr('class', 'dots');
|
| 233 |
+
const gBaselines = svg.append('g').attr('class', 'baselines');
|
| 234 |
+
const gAxes = svg.append('g').attr('class', 'axes');
|
| 235 |
+
|
| 236 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 237 |
+
let tipInner;
|
| 238 |
+
if (!tip) {
|
| 239 |
+
tip = document.createElement('div'); tip.className = 'd3-tooltip';
|
| 240 |
+
tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner';
|
| 241 |
+
tipInner.style.textAlign = 'left';
|
| 242 |
+
tip.appendChild(tipInner); container.appendChild(tip);
|
| 243 |
+
} else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
|
| 244 |
+
|
| 245 |
+
const margin = { top: 12, right: 16, bottom: 56, left: 70 };
|
| 246 |
+
|
| 247 |
+
function render() {
|
| 248 |
+
const width = container.clientWidth || 800;
|
| 249 |
+
const height = Math.min((window.innerHeight || 420) - 120, Math.max(200, Math.round(width / 3.5)));
|
| 250 |
+
svg.attr('width', width).attr('height', height);
|
| 251 |
+
const iw = width - margin.left - margin.right;
|
| 252 |
+
const ih = height - margin.top - margin.bottom;
|
| 253 |
+
|
| 254 |
+
const metricLabel = METRICS.find(m => m.key === currentMetric)?.label || currentMetric;
|
| 255 |
+
|
| 256 |
+
const xScale = d3.scaleLog()
|
| 257 |
+
.domain([5 * 86400, d3.max(experiments, d => d.gpuSeconds) * 1.2])
|
| 258 |
+
.range([margin.left, width - margin.right]);
|
| 259 |
+
|
| 260 |
+
const yVals = experiments.map(d => d.results[currentMetric]).filter(v => v != null)
|
| 261 |
+
.concat(baselines.map(d => d.results[currentMetric]).filter(v => v != null));
|
| 262 |
+
const yPad = (d3.max(yVals) - d3.min(yVals)) * 0.08;
|
| 263 |
+
const yScale = d3.scaleLinear()
|
| 264 |
+
.domain([d3.min(yVals) - yPad, d3.max(yVals) + yPad])
|
| 265 |
+
.range([height - margin.bottom, margin.top]);
|
| 266 |
+
|
| 267 |
+
const yTicks = yScale.ticks(6);
|
| 268 |
+
gGrid.selectAll('line').data(yTicks).join('line')
|
| 269 |
+
.attr('x1', margin.left).attr('x2', width - margin.right)
|
| 270 |
+
.attr('y1', d => yScale(d)).attr('y2', d => yScale(d))
|
| 271 |
+
.attr('stroke', 'var(--grid-color)').attr('stroke-width', 0.5);
|
| 272 |
+
|
| 273 |
+
gAxes.selectAll('*').remove();
|
| 274 |
+
const tickDays = [7, 14, 30, 60, 120, 240, 480];
|
| 275 |
+
const [xMin, xMax] = xScale.domain();
|
| 276 |
+
const tickValues = tickDays.map(d => d * 86400).filter(v => v >= xMin && v <= xMax);
|
| 277 |
+
const xAxis = d3.axisBottom(xScale).tickValues(tickValues).tickFormat(fmtGpuTime);
|
| 278 |
+
gAxes.append('g')
|
| 279 |
+
.attr('transform', `translate(0,${height - margin.bottom})`)
|
| 280 |
+
.call(xAxis)
|
| 281 |
+
.call(g => g.select('.domain').attr('stroke', 'var(--axis-color)'))
|
| 282 |
+
.call(g => g.selectAll('.tick line').attr('stroke', 'var(--tick-color)'))
|
| 283 |
+
.call(g => g.selectAll('.tick text').attr('fill', 'var(--tick-color)').attr('font-size', '18px'));
|
| 284 |
+
|
| 285 |
+
const yAxis = d3.axisLeft(yScale).ticks(6).tickFormat(v => { const s = v.toFixed(3); return s.replace(/0$/, ''); });
|
| 286 |
+
gAxes.append('g')
|
| 287 |
+
.attr('transform', `translate(${margin.left},0)`)
|
| 288 |
+
.call(yAxis)
|
| 289 |
+
.call(g => g.select('.domain').attr('stroke', 'var(--axis-color)'))
|
| 290 |
+
.call(g => g.selectAll('.tick line').attr('stroke', 'var(--tick-color)'))
|
| 291 |
+
.call(g => g.selectAll('.tick text').attr('fill', 'var(--tick-color)').attr('font-size', '18px'));
|
| 292 |
+
|
| 293 |
+
gAxes.append('text')
|
| 294 |
+
.attr('x', margin.left + iw / 2).attr('y', height - 4)
|
| 295 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)')
|
| 296 |
+
.attr('font-size', '18px').attr('font-weight', '600')
|
| 297 |
+
.text('GPU time (log scale)');
|
| 298 |
+
|
| 299 |
+
gAxes.append('text')
|
| 300 |
+
.attr('transform', `rotate(-90)`)
|
| 301 |
+
.attr('x', -(margin.top + ih / 2)).attr('y', 14)
|
| 302 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)')
|
| 303 |
+
.attr('font-size', '18px').attr('font-weight', '600')
|
| 304 |
+
.text(metricLabel);
|
| 305 |
+
|
| 306 |
+
const frontierPts = pareto(experiments, currentMetric);
|
| 307 |
+
const lineGen = d3.line()
|
| 308 |
+
.x(d => xScale(d.gpuSeconds))
|
| 309 |
+
.y(d => yScale(d.results[currentMetric]));
|
| 310 |
+
|
| 311 |
+
const extendedFrontier = [...frontierPts];
|
| 312 |
+
if (frontierPts.length > 0) {
|
| 313 |
+
const last = frontierPts[frontierPts.length - 1];
|
| 314 |
+
extendedFrontier.push({ gpuSeconds: xScale.domain()[1], results: { [currentMetric]: last.results[currentMetric] } });
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
gPareto.selectAll('path').data([extendedFrontier]).join('path')
|
| 318 |
+
.attr('d', lineGen)
|
| 319 |
+
.attr('fill', 'none')
|
| 320 |
+
.attr('stroke', 'var(--primary-color)')
|
| 321 |
+
.attr('stroke-width', 2)
|
| 322 |
+
.attr('stroke-dasharray', '6,4')
|
| 323 |
+
.attr('opacity', 0.6);
|
| 324 |
+
|
| 325 |
+
const rBase = Math.max(5, Math.min(9, width * 0.008));
|
| 326 |
+
|
| 327 |
+
gDots.selectAll('circle').data(experiments, d => d.run).join('circle')
|
| 328 |
+
.attr('cx', d => xScale(d.gpuSeconds))
|
| 329 |
+
.attr('cy', d => yScale(d.results[currentMetric]))
|
| 330 |
+
.attr('r', rBase)
|
| 331 |
+
.attr('fill', d => familyColors[d.family] || '#999')
|
| 332 |
+
.attr('fill-opacity', 0.8)
|
| 333 |
+
.attr('stroke', d => familyColors[d.family] || '#999')
|
| 334 |
+
.attr('stroke-width', 1.5)
|
| 335 |
+
.attr('stroke-opacity', 0.3)
|
| 336 |
+
.attr('cursor', 'pointer')
|
| 337 |
+
.on('mouseenter', function(ev, d) {
|
| 338 |
+
d3.select(this).attr('r', rBase * 1.6).attr('fill-opacity', 1).attr('stroke-opacity', 0.8);
|
| 339 |
+
gDots.selectAll('circle').filter(c => c !== d)
|
| 340 |
+
.attr('fill-opacity', 0.2).attr('stroke-opacity', 0.1);
|
| 341 |
+
gBaselines.selectAll('circle').attr('fill-opacity', 0.12).attr('stroke-opacity', 0.2);
|
| 342 |
+
const score = d.results[currentMetric];
|
| 343 |
+
tipInner.innerHTML =
|
| 344 |
+
`<div style="font-weight:700;font-size:14px;margin-bottom:4px;">${d.prompt} (${d.cat})</div>` +
|
| 345 |
+
`<div style="font-size:12px;color:var(--muted-color);margin-bottom:6px;">` +
|
| 346 |
+
`<span style="display:inline-block;width:10px;height:10px;border-radius:50%;background:${familyColors[d.family]};margin-right:4px;vertical-align:middle;"></span>` +
|
| 347 |
+
`${d.model} · ${d.source}</div>` +
|
| 348 |
+
`<div style="display:grid;grid-template-columns:auto 1fr;gap:2px 10px;font-size:13px;">` +
|
| 349 |
+
`<span style="color:var(--muted-color);">GPU time</span><span>${fmtGpuTime(d.gpuSeconds)}</span>` +
|
| 350 |
+
`<span style="color:var(--muted-color);">TPS/GPU</span><span>${d.tpsPerGpu.toLocaleString()}</span>` +
|
| 351 |
+
`<span style="color:var(--muted-color);">Output tokens</span><span>${(d.outputTokens / 1e9).toFixed(1)}B</span>` +
|
| 352 |
+
`<span style="color:var(--muted-color);">Documents</span><span>${(d.numDocs / 1e6).toFixed(1)}M</span>` +
|
| 353 |
+
`<span style="color:var(--muted-color);">${metricLabel}</span><span style="font-weight:700;">${score != null ? score.toFixed(4) : 'N/A'}</span>` +
|
| 354 |
+
`</div>`;
|
| 355 |
+
tip.style.opacity = '1';
|
| 356 |
+
})
|
| 357 |
+
.on('mousemove', (ev) => {
|
| 358 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 359 |
+
const bw = tip.offsetWidth || 280;
|
| 360 |
+
const bh = tip.offsetHeight || 160;
|
| 361 |
+
const ox = (mx + bw + 20 > width) ? -(bw + 12) : 14;
|
| 362 |
+
const oy = (my + bh + 20 > (height + 60)) ? -(bh + 12) : 14;
|
| 363 |
+
tip.style.transform = `translate(${Math.round(mx + ox)}px,${Math.round(my + oy)}px)`;
|
| 364 |
+
})
|
| 365 |
+
.on('mouseleave', function() {
|
| 366 |
+
gDots.selectAll('circle').attr('r', rBase).attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
|
| 367 |
+
gBaselines.selectAll('circle').attr('r', rBase * 1.1).attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
|
| 368 |
+
tip.style.opacity = '0';
|
| 369 |
+
tip.style.transform = 'translate(-9999px,-9999px)';
|
| 370 |
+
});
|
| 371 |
+
|
| 372 |
+
const bx = margin.left + rBase + 10;
|
| 373 |
+
const bColor = d => d.synthetic ? SYNTH_BASELINE_COLOR : BASELINE_COLOR;
|
| 374 |
+
gBaselines.selectAll('circle').data(baselines, d => d.run).join('circle')
|
| 375 |
+
.attr('cx', bx)
|
| 376 |
+
.attr('cy', d => yScale(d.results[currentMetric]))
|
| 377 |
+
.attr('r', rBase * 1.1)
|
| 378 |
+
.attr('fill', bColor)
|
| 379 |
+
.attr('fill-opacity', 0.35)
|
| 380 |
+
.attr('stroke', bColor)
|
| 381 |
+
.attr('stroke-width', 2)
|
| 382 |
+
.attr('stroke-opacity', 0.6)
|
| 383 |
+
.attr('cursor', 'pointer')
|
| 384 |
+
.on('mouseenter', function(ev, d) {
|
| 385 |
+
d3.select(this).attr('r', rBase * 1.8).attr('fill-opacity', 0.6).attr('stroke-opacity', 1);
|
| 386 |
+
gDots.selectAll('circle').attr('fill-opacity', 0.15).attr('stroke-opacity', 0.08);
|
| 387 |
+
const score = d.results[currentMetric];
|
| 388 |
+
const tag = d.synthetic ? 'synthetic baseline' : 'baseline';
|
| 389 |
+
tipInner.innerHTML =
|
| 390 |
+
`<div style="font-weight:700;font-size:14px;margin-bottom:4px;">${d.label} <span style="font-weight:400;font-size:12px;color:var(--muted-color);">(${tag})</span></div>` +
|
| 391 |
+
`<div style="display:grid;grid-template-columns:auto 1fr;gap:2px 10px;font-size:13px;">` +
|
| 392 |
+
`<span style="color:var(--muted-color);">GPU time</span><span>0 (no rephrasing)</span>` +
|
| 393 |
+
`<span style="color:var(--muted-color);">${metricLabel}</span><span style="font-weight:700;">${score != null ? score.toFixed(4) : 'N/A'}</span>` +
|
| 394 |
+
`</div>`;
|
| 395 |
+
tip.style.opacity = '1';
|
| 396 |
+
})
|
| 397 |
+
.on('mousemove', (ev) => {
|
| 398 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 399 |
+
const bw = tip.offsetWidth || 280;
|
| 400 |
+
const bh = tip.offsetHeight || 100;
|
| 401 |
+
const ox = (mx + bw + 20 > width) ? -(bw + 12) : 14;
|
| 402 |
+
const oy = (my + bh + 20 > (height + 60)) ? -(bh + 12) : 14;
|
| 403 |
+
tip.style.transform = `translate(${Math.round(mx + ox)}px,${Math.round(my + oy)}px)`;
|
| 404 |
+
})
|
| 405 |
+
.on('mouseleave', function() {
|
| 406 |
+
gBaselines.selectAll('circle').attr('r', rBase * 1.1).attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
|
| 407 |
+
gDots.selectAll('circle').attr('r', rBase).attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
|
| 408 |
+
tip.style.opacity = '0';
|
| 409 |
+
tip.style.transform = 'translate(-9999px,-9999px)';
|
| 410 |
+
});
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
const controls = document.createElement('div'); controls.className = 'controls';
|
| 414 |
+
const cg = document.createElement('div'); cg.className = 'control-group';
|
| 415 |
+
const lbl = document.createElement('label'); lbl.textContent = 'Metric'; lbl.setAttribute('for', 'ce-metric-select');
|
| 416 |
+
const sel = document.createElement('select'); sel.id = 'ce-metric-select';
|
| 417 |
+
const groups = {};
|
| 418 |
+
METRICS.forEach(m => { (groups[m.group] = groups[m.group] || []).push(m); });
|
| 419 |
+
for (const [gName, gMetrics] of Object.entries(groups)) {
|
| 420 |
+
const og = document.createElement('optgroup'); og.label = gName;
|
| 421 |
+
gMetrics.forEach(m => { const o = document.createElement('option'); o.value = m.key; o.textContent = m.label; og.appendChild(o); });
|
| 422 |
+
sel.appendChild(og);
|
| 423 |
+
}
|
| 424 |
+
sel.value = currentMetric;
|
| 425 |
+
sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
|
| 426 |
+
cg.appendChild(lbl); cg.appendChild(sel); controls.appendChild(cg);
|
| 427 |
+
|
| 428 |
+
const legend = document.createElement('div'); legend.className = 'legend';
|
| 429 |
+
const ltitle = document.createElement('div'); ltitle.className = 'legend-title'; ltitle.textContent = 'Legend';
|
| 430 |
+
const items = document.createElement('div'); items.className = 'items';
|
| 431 |
+
familyOrder.forEach(fam => {
|
| 432 |
+
const el = document.createElement('span'); el.className = 'item';
|
| 433 |
+
const sw = document.createElement('span'); sw.className = 'swatch'; sw.style.background = familyColors[fam];
|
| 434 |
+
const txt = document.createElement('span'); txt.textContent = fam;
|
| 435 |
+
el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
|
| 436 |
+
el.addEventListener('mouseenter', () => {
|
| 437 |
+
gDots.selectAll('circle').attr('fill-opacity', d => d.family === fam ? 0.9 : 0.1)
|
| 438 |
+
.attr('stroke-opacity', d => d.family === fam ? 0.6 : 0.05);
|
| 439 |
+
gBaselines.selectAll('circle').attr('fill-opacity', 0.12).attr('stroke-opacity', 0.2);
|
| 440 |
+
});
|
| 441 |
+
el.addEventListener('mouseleave', () => {
|
| 442 |
+
gDots.selectAll('circle').attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
|
| 443 |
+
gBaselines.selectAll('circle').attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
|
| 444 |
+
});
|
| 445 |
+
});
|
| 446 |
+
legend.appendChild(ltitle); legend.appendChild(items); controls.appendChild(legend); container.appendChild(controls);
|
| 447 |
+
|
| 448 |
+
[[false, BASELINE_COLOR, 'Baselines'],
|
| 449 |
+
[true, SYNTH_BASELINE_COLOR, 'Synthetic baselines']].forEach(([isSynth, c, text]) => {
|
| 450 |
+
const el = document.createElement('span'); el.className = 'item';
|
| 451 |
+
el.innerHTML = `<svg width="14" height="14" style="vertical-align:middle;"><circle cx="7" cy="7" r="6" fill="${c}" fill-opacity="0.35" stroke="${c}" stroke-width="2" stroke-opacity="0.6"/></svg><span>${text}</span>`;
|
| 452 |
+
items.appendChild(el);
|
| 453 |
+
el.addEventListener('mouseenter', () => {
|
| 454 |
+
gBaselines.selectAll('circle')
|
| 455 |
+
.attr('fill-opacity', d => d.synthetic === isSynth ? 0.6 : 0.1)
|
| 456 |
+
.attr('stroke-opacity', d => d.synthetic === isSynth ? 1 : 0.15);
|
| 457 |
+
gDots.selectAll('circle').attr('fill-opacity', 0.15).attr('stroke-opacity', 0.08);
|
| 458 |
+
});
|
| 459 |
+
el.addEventListener('mouseleave', () => {
|
| 460 |
+
gBaselines.selectAll('circle').attr('fill-opacity', 0.35).attr('stroke-opacity', 0.6);
|
| 461 |
+
gDots.selectAll('circle').attr('fill-opacity', 0.8).attr('stroke-opacity', 0.3);
|
| 462 |
+
});
|
| 463 |
+
});
|
| 464 |
+
|
| 465 |
+
const paretoItem = document.createElement('span'); paretoItem.className = 'item';
|
| 466 |
+
paretoItem.innerHTML = `<svg width="20" height="14" style="vertical-align:middle;"><line x1="0" y1="7" x2="20" y2="7" stroke="var(--primary-color)" stroke-width="2" stroke-dasharray="4,3" opacity="0.6"/></svg><span>Pareto frontier</span>`;
|
| 467 |
+
items.appendChild(paretoItem);
|
| 468 |
+
|
| 469 |
+
render();
|
| 470 |
+
if (window.ResizeObserver) new ResizeObserver(() => render()).observe(container);
|
| 471 |
+
else window.addEventListener('resize', render);
|
| 472 |
+
}
|
| 473 |
+
};
|
| 474 |
+
|
| 475 |
+
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
|
| 476 |
+
else bootstrap();
|
| 477 |
+
})();
|
| 478 |
+
</script>
|
| 479 |
+
</body>
|
| 480 |
+
</html>
|
app/presentation/se2026/charts/experiment-flow.html
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html data-theme="dark" lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Experiment Flow</title>
|
| 7 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 8 |
+
<script src="https://cdn.jsdelivr.net/npm/d3-sankey@0.12.3/dist/d3-sankey.min.js"></script>
|
| 9 |
+
<style>
|
| 10 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 11 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: visible; }
|
| 12 |
+
:root {
|
| 13 |
+
--text-color: rgba(255,255,255,0.88);
|
| 14 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 15 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 16 |
+
--border-color: rgba(255,255,255,0.1);
|
| 17 |
+
--primary-color: #7c6ff7;
|
| 18 |
+
}
|
| 19 |
+
.d3-experiment-overview { position: relative; font-family: system-ui, -apple-system, sans-serif; }
|
| 20 |
+
</style>
|
| 21 |
+
</head>
|
| 22 |
+
<body>
|
| 23 |
+
<div class="d3-experiment-overview" data-datafiles="../data/rephrasing_metadata.json" style="width:100%;height:100%;min-height:300px;"></div>
|
| 24 |
+
<script>
|
| 25 |
+
(() => {
|
| 26 |
+
const ensureD3 = (cb) => {
|
| 27 |
+
if (window.d3 && typeof window.d3.select === 'function' && typeof window.d3.sankey === 'function') return cb();
|
| 28 |
+
const loadSankey = () => {
|
| 29 |
+
if (typeof window.d3.sankey === 'function') return cb();
|
| 30 |
+
let s2 = document.getElementById('d3-sankey-cdn');
|
| 31 |
+
if (!s2) {
|
| 32 |
+
s2 = document.createElement('script');
|
| 33 |
+
s2.id = 'd3-sankey-cdn';
|
| 34 |
+
s2.src = 'https://cdn.jsdelivr.net/npm/d3-sankey@0.12.3/dist/d3-sankey.min.js';
|
| 35 |
+
document.head.appendChild(s2);
|
| 36 |
+
}
|
| 37 |
+
s2.addEventListener('load', cb, { once: true });
|
| 38 |
+
};
|
| 39 |
+
let s = document.getElementById('d3-cdn-script');
|
| 40 |
+
if (!s) {
|
| 41 |
+
s = document.createElement('script');
|
| 42 |
+
s.id = 'd3-cdn-script';
|
| 43 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 44 |
+
document.head.appendChild(s);
|
| 45 |
+
}
|
| 46 |
+
if (window.d3 && typeof window.d3.select === 'function') { loadSankey(); return; }
|
| 47 |
+
s.addEventListener('load', loadSankey, { once: true });
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
const bootstrap = () => {
|
| 51 |
+
const scriptEl = document.currentScript;
|
| 52 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 53 |
+
if (!(container && container.classList && container.classList.contains('d3-experiment-overview'))) {
|
| 54 |
+
const cs = Array.from(document.querySelectorAll('.d3-experiment-overview'))
|
| 55 |
+
.filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 56 |
+
container = cs[cs.length - 1] || null;
|
| 57 |
+
}
|
| 58 |
+
if (!container) return;
|
| 59 |
+
if (container.dataset) {
|
| 60 |
+
if (container.dataset.mounted === 'true') return;
|
| 61 |
+
container.dataset.mounted = 'true';
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
// Read data path from HtmlEmbed attribute
|
| 65 |
+
let mountEl = container;
|
| 66 |
+
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) mountEl = mountEl.parentElement;
|
| 67 |
+
const dataAttr = mountEl?.getAttribute?.('data-datafiles');
|
| 68 |
+
const dataPaths = dataAttr
|
| 69 |
+
? [dataAttr.includes('/') ? dataAttr : `/data/${dataAttr}`]
|
| 70 |
+
: ['../data/rephrasing_metadata.json'];
|
| 71 |
+
|
| 72 |
+
const fetchFirst = async (paths) => {
|
| 73 |
+
for (const p of paths) {
|
| 74 |
+
try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return r.json(); } catch(_) {}
|
| 75 |
+
}
|
| 76 |
+
throw new Error('Data not found');
|
| 77 |
+
};
|
| 78 |
+
|
| 79 |
+
fetchFirst(dataPaths).then(data => buildChart(data)).catch(err => {
|
| 80 |
+
container.innerHTML = `<pre style="color:red;padding:12px;">Error loading data: ${err.message}</pre>`;
|
| 81 |
+
});
|
| 82 |
+
|
| 83 |
+
function buildChart(rawData) {
|
| 84 |
+
// Map source dataset strings to display names
|
| 85 |
+
const sourceMap = {
|
| 86 |
+
'fineweb-edu-hq-20BT': 'FW-Edu HQ',
|
| 87 |
+
'fineweb-edu-lq-20BT': 'FW-Edu LQ',
|
| 88 |
+
'dclm-37BT': 'DCLM',
|
| 89 |
+
'cosmopedia-25BT': 'Cosmopedia',
|
| 90 |
+
};
|
| 91 |
+
|
| 92 |
+
// Map prompt paths to display names and categories
|
| 93 |
+
const promptMap = {
|
| 94 |
+
'format/tutorial.md': { name: 'Tutorial', cat: 'Format' },
|
| 95 |
+
'format/faq.md': { name: 'FAQ', cat: 'Format' },
|
| 96 |
+
'format/math.md': { name: 'Math', cat: 'Format' },
|
| 97 |
+
'format/table.md': { name: 'Table', cat: 'Format' },
|
| 98 |
+
'format/commentary.md': { name: 'Commentary', cat: 'Format' },
|
| 99 |
+
'format/discussion.md': { name: 'Discussion', cat: 'Format' },
|
| 100 |
+
'format/article.md': { name: 'Article', cat: 'Format' },
|
| 101 |
+
'nemotron/diverse_qa_pairs.md': { name: 'Diverse QA', cat: 'Nemotron' },
|
| 102 |
+
'nemotron/knowledge_list.md': { name: 'Knowledge List', cat: 'Nemotron' },
|
| 103 |
+
'nemotron/wikipedia_style_rephrasing.md': { name: 'Wikipedia Style', cat: 'Nemotron' },
|
| 104 |
+
'nemotron/extract_knowledge.md': { name: 'Extract Knowledge', cat: 'Nemotron' },
|
| 105 |
+
'nemotron/distill.md': { name: 'Distill', cat: 'Nemotron' },
|
| 106 |
+
'rewire/guided_rewrite_original.md': { name: 'Guided Rewrite', cat: 'REWIRE' },
|
| 107 |
+
'rewire/guided_rewrite_improved.md': { name: 'Guided Rewrite+', cat: 'REWIRE' },
|
| 108 |
+
};
|
| 109 |
+
|
| 110 |
+
// Map model IDs to family names
|
| 111 |
+
const modelFamilyMap = (modelId) => {
|
| 112 |
+
if (modelId.includes('gemma')) return 'Gemma';
|
| 113 |
+
if (modelId.includes('Qwen') || modelId.includes('qwen')) return 'Qwen';
|
| 114 |
+
if (modelId.includes('Falcon') || modelId.includes('falcon')) return 'Falcon';
|
| 115 |
+
if (modelId.includes('granite') || modelId.includes('Granite')) return 'Granite';
|
| 116 |
+
if (modelId.includes('Llama') || modelId.includes('llama')) return 'Llama';
|
| 117 |
+
if (modelId.includes('SmolLM') || modelId.includes('smollm')) return 'SmolLM2';
|
| 118 |
+
return modelId;
|
| 119 |
+
};
|
| 120 |
+
|
| 121 |
+
// Build link counts from data
|
| 122 |
+
const linkCounts = {};
|
| 123 |
+
const key = (a, b) => `${a}|||${b}`;
|
| 124 |
+
|
| 125 |
+
rawData.forEach(exp => {
|
| 126 |
+
const src = sourceMap[exp.source_dataset];
|
| 127 |
+
const promptInfo = promptMap[exp.prompt];
|
| 128 |
+
const family = modelFamilyMap(exp.model);
|
| 129 |
+
if (!src || !promptInfo) return;
|
| 130 |
+
|
| 131 |
+
const spKey = key(src, promptInfo.name);
|
| 132 |
+
linkCounts[spKey] = (linkCounts[spKey] || 0) + 1;
|
| 133 |
+
|
| 134 |
+
const pmKey = key(promptInfo.name, family);
|
| 135 |
+
linkCounts[pmKey] = (linkCounts[pmKey] || 0) + 1;
|
| 136 |
+
});
|
| 137 |
+
|
| 138 |
+
// Collect unique names in order
|
| 139 |
+
const sources = [...new Set(rawData.map(e => sourceMap[e.source_dataset]).filter(Boolean))];
|
| 140 |
+
const prompts = [...new Set(rawData.map(e => promptMap[e.prompt]?.name).filter(Boolean))];
|
| 141 |
+
const models = [...new Set(rawData.map(e => modelFamilyMap(e.model)).filter(Boolean))];
|
| 142 |
+
|
| 143 |
+
// Build node list
|
| 144 |
+
const nodes = [];
|
| 145 |
+
sources.forEach(name => nodes.push({ name, col: 'source' }));
|
| 146 |
+
prompts.forEach(name => {
|
| 147 |
+
const info = Object.values(promptMap).find(p => p.name === name);
|
| 148 |
+
nodes.push({ name, col: 'prompt', cat: info?.cat || 'Other' });
|
| 149 |
+
});
|
| 150 |
+
models.forEach(name => nodes.push({ name, col: 'model' }));
|
| 151 |
+
|
| 152 |
+
const ni = (name) => nodes.findIndex(n => n.name === name);
|
| 153 |
+
|
| 154 |
+
// Build links
|
| 155 |
+
const links = [];
|
| 156 |
+
Object.entries(linkCounts).forEach(([k, value]) => {
|
| 157 |
+
const [from, to] = k.split('|||');
|
| 158 |
+
const s = ni(from), t = ni(to);
|
| 159 |
+
if (s >= 0 && t >= 0) links.push({ source: s, target: t, value });
|
| 160 |
+
});
|
| 161 |
+
|
| 162 |
+
// Colors
|
| 163 |
+
const sourceColors = { 'FW-Edu HQ': '#6B8DB5', 'FW-Edu LQ': '#B58B9B', 'DCLM': '#7B82C8', 'Cosmopedia': '#8BA878' };
|
| 164 |
+
const catColors = { 'Format': '#4EA5B7', 'Nemotron': '#76b900', 'REWIRE': '#1877F2' };
|
| 165 |
+
const familyColors = { 'Gemma': '#5b9bd5', 'Qwen': '#e07b54', 'SmolLM2': '#e06b9e', 'Falcon': '#c9a046', 'Granite': '#9a8ec2', 'Llama': '#8bc474' };
|
| 166 |
+
|
| 167 |
+
const nodeColor = (d) => {
|
| 168 |
+
if (d.col === 'source') return sourceColors[d.name] || '#888';
|
| 169 |
+
if (d.col === 'prompt') return catColors[d.cat] || '#888';
|
| 170 |
+
if (d.col === 'model') return familyColors[d.name] || '#888';
|
| 171 |
+
return '#888';
|
| 172 |
+
};
|
| 173 |
+
|
| 174 |
+
// SVG
|
| 175 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 176 |
+
|
| 177 |
+
const render = () => {
|
| 178 |
+
const width = container.clientWidth || 800;
|
| 179 |
+
const iframeH = window.innerHeight || 540;
|
| 180 |
+
const height = Math.min(iframeH, Math.max(400, width * 9 / 16));
|
| 181 |
+
svg.attr('width', width).attr('height', height);
|
| 182 |
+
svg.selectAll('*').remove();
|
| 183 |
+
|
| 184 |
+
const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
|
| 185 |
+
const textColor = isDark ? 'rgba(255,255,255,0.78)' : 'rgba(0,0,0,0.68)';
|
| 186 |
+
const mutedText = isDark ? 'rgba(255,255,255,0.35)' : 'rgba(0,0,0,0.30)';
|
| 187 |
+
const linkOpacity = isDark ? 0.20 : 0.35;
|
| 188 |
+
const linkHoverOpacity = isDark ? 0.50 : 0.65;
|
| 189 |
+
const fontSize = Math.max(9, Math.min(12, width / 80));
|
| 190 |
+
|
| 191 |
+
const ml = width * 0.005, mr = width * 0.01;
|
| 192 |
+
const mt = 28, mb = height * 0.01;
|
| 193 |
+
|
| 194 |
+
const sankeyGen = d3.sankey()
|
| 195 |
+
.nodeId(d => d.index)
|
| 196 |
+
.nodeWidth(Math.max(8, width * 0.012))
|
| 197 |
+
.nodePadding(Math.max(4, height * 0.014))
|
| 198 |
+
.nodeSort(null)
|
| 199 |
+
.extent([[ml, mt], [width - mr, height - mb]]);
|
| 200 |
+
|
| 201 |
+
const graph = sankeyGen({
|
| 202 |
+
nodes: nodes.map((d, i) => ({ ...d, index: i })),
|
| 203 |
+
links: links.map(d => ({ ...d }))
|
| 204 |
+
});
|
| 205 |
+
|
| 206 |
+
// Column headers
|
| 207 |
+
const modelNodes = graph.nodes.filter(n => n.col === 'model');
|
| 208 |
+
const colLabels = [
|
| 209 |
+
{ text: 'Source Dataset', x: graph.nodes.filter(n => n.col === 'source')[0]?.x0 || ml, anchor: 'start' },
|
| 210 |
+
{ text: 'Prompt Strategy', x: graph.nodes.filter(n => n.col === 'prompt')[0]?.x1 || width * 0.35, anchor: 'end' },
|
| 211 |
+
{ text: 'Model Family', x: (modelNodes[0]?.x1 || width * 0.75), anchor: 'end' },
|
| 212 |
+
];
|
| 213 |
+
svg.selectAll('text.col-header')
|
| 214 |
+
.data(colLabels).join('text')
|
| 215 |
+
.attr('class', 'col-header')
|
| 216 |
+
.attr('x', d => d.x).attr('y', mt - 8)
|
| 217 |
+
.attr('text-anchor', d => d.anchor)
|
| 218 |
+
.attr('fill', mutedText)
|
| 219 |
+
.attr('font-size', (fontSize * 1.4) + 'px')
|
| 220 |
+
.attr('font-weight', '700')
|
| 221 |
+
.attr('font-family', 'system-ui, -apple-system, sans-serif')
|
| 222 |
+
.attr('letter-spacing', '0.5px')
|
| 223 |
+
.attr('text-transform', 'uppercase')
|
| 224 |
+
.text(d => d.text);
|
| 225 |
+
|
| 226 |
+
// Category brackets for prompts
|
| 227 |
+
const catGroups = {};
|
| 228 |
+
graph.nodes.filter(n => n.col === 'prompt').forEach(n => {
|
| 229 |
+
if (!catGroups[n.cat]) catGroups[n.cat] = { min: Infinity, max: -Infinity };
|
| 230 |
+
catGroups[n.cat].min = Math.min(catGroups[n.cat].min, n.y0);
|
| 231 |
+
catGroups[n.cat].max = Math.max(catGroups[n.cat].max, n.y1);
|
| 232 |
+
});
|
| 233 |
+
const bracketX = (graph.nodes.find(n => n.col === 'prompt')?.x1 || 0) + 5;
|
| 234 |
+
Object.entries(catGroups).forEach(([cat, { min: y0, max: y1 }]) => {
|
| 235 |
+
const midY = (y0 + y1) / 2;
|
| 236 |
+
svg.append('line')
|
| 237 |
+
.attr('x1', bracketX).attr('x2', bracketX)
|
| 238 |
+
.attr('y1', y0 + 2).attr('y2', y1 - 2)
|
| 239 |
+
.attr('stroke', catColors[cat]).attr('stroke-width', 1.5)
|
| 240 |
+
.attr('stroke-opacity', 0.35).attr('stroke-linecap', 'round');
|
| 241 |
+
svg.append('text')
|
| 242 |
+
.attr('x', bracketX + 4).attr('y', midY)
|
| 243 |
+
.attr('dominant-baseline', 'central')
|
| 244 |
+
.attr('fill', catColors[cat]).attr('fill-opacity', 0.45)
|
| 245 |
+
.attr('font-size', (fontSize * 1.3) + 'px')
|
| 246 |
+
.attr('font-weight', '600')
|
| 247 |
+
.attr('font-family', 'system-ui, -apple-system, sans-serif')
|
| 248 |
+
.attr('letter-spacing', '0.3px')
|
| 249 |
+
.text(cat);
|
| 250 |
+
});
|
| 251 |
+
|
| 252 |
+
// Links
|
| 253 |
+
const gLinks = svg.append('g').attr('class', 'links');
|
| 254 |
+
const linkPath = d3.sankeyLinkHorizontal();
|
| 255 |
+
const linkEls = gLinks.selectAll('path')
|
| 256 |
+
.data(graph.links).join('path')
|
| 257 |
+
.attr('d', linkPath)
|
| 258 |
+
.attr('fill', 'none')
|
| 259 |
+
.attr('stroke', d => nodeColor(d.source))
|
| 260 |
+
.attr('stroke-width', d => Math.max(1, d.width))
|
| 261 |
+
.attr('stroke-opacity', linkOpacity)
|
| 262 |
+
.style('mix-blend-mode', isDark ? 'screen' : 'multiply');
|
| 263 |
+
|
| 264 |
+
// Nodes
|
| 265 |
+
const gNodes = svg.append('g').attr('class', 'nodes');
|
| 266 |
+
const nodeEls = gNodes.selectAll('rect')
|
| 267 |
+
.data(graph.nodes).join('rect')
|
| 268 |
+
.attr('x', d => d.x0).attr('y', d => d.y0)
|
| 269 |
+
.attr('width', d => d.x1 - d.x0)
|
| 270 |
+
.attr('height', d => Math.max(1, d.y1 - d.y0))
|
| 271 |
+
.attr('fill', d => nodeColor(d))
|
| 272 |
+
.attr('fill-opacity', 0.85).attr('rx', 2)
|
| 273 |
+
.attr('stroke', d => nodeColor(d))
|
| 274 |
+
.attr('stroke-width', 0.5).attr('stroke-opacity', 0.3);
|
| 275 |
+
|
| 276 |
+
// Node labels (interactive, same hover as node rects)
|
| 277 |
+
const gLabels = svg.append('g').attr('class', 'labels');
|
| 278 |
+
graph.nodes.forEach(d => {
|
| 279 |
+
const midY = (d.y0 + d.y1) / 2;
|
| 280 |
+
const isSource = d.col === 'source';
|
| 281 |
+
let labelX, anchor;
|
| 282 |
+
if (isSource) { labelX = d.x1 + 5; anchor = 'start'; }
|
| 283 |
+
else { labelX = d.x0 - 5; anchor = 'end'; }
|
| 284 |
+
|
| 285 |
+
const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0);
|
| 286 |
+
const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0);
|
| 287 |
+
const total = Math.max(totalIn, totalOut);
|
| 288 |
+
|
| 289 |
+
gLabels.append('text')
|
| 290 |
+
.datum(d)
|
| 291 |
+
.attr('class', 'node-label')
|
| 292 |
+
.attr('x', labelX).attr('y', midY - (total > 1 ? fontSize * 0.3 : 0))
|
| 293 |
+
.attr('text-anchor', anchor).attr('dominant-baseline', 'central')
|
| 294 |
+
.attr('fill', textColor)
|
| 295 |
+
.attr('font-size', fontSize + 'px').attr('font-weight', '600')
|
| 296 |
+
.attr('font-family', 'system-ui, -apple-system, sans-serif')
|
| 297 |
+
.style('cursor', 'pointer')
|
| 298 |
+
.text(d.name);
|
| 299 |
+
|
| 300 |
+
if (total > 1) {
|
| 301 |
+
gLabels.append('text')
|
| 302 |
+
.datum(d)
|
| 303 |
+
.attr('class', 'node-label')
|
| 304 |
+
.attr('x', labelX).attr('y', midY + fontSize * 0.55)
|
| 305 |
+
.attr('text-anchor', anchor).attr('dominant-baseline', 'central')
|
| 306 |
+
.attr('fill', mutedText)
|
| 307 |
+
.attr('font-size', (fontSize * 0.8) + 'px')
|
| 308 |
+
.attr('font-family', 'system-ui, -apple-system, sans-serif')
|
| 309 |
+
.style('cursor', 'pointer')
|
| 310 |
+
.text(total + ' exp.');
|
| 311 |
+
}
|
| 312 |
+
});
|
| 313 |
+
|
| 314 |
+
// Tooltip
|
| 315 |
+
container.style.position = container.style.position || 'relative';
|
| 316 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 317 |
+
let tipInner;
|
| 318 |
+
if (!tip) {
|
| 319 |
+
tip = document.createElement('div');
|
| 320 |
+
tip.className = 'd3-tooltip';
|
| 321 |
+
Object.assign(tip.style, {
|
| 322 |
+
position: 'absolute', top: '0px', left: '0px',
|
| 323 |
+
transform: 'translate(-9999px, -9999px)',
|
| 324 |
+
pointerEvents: 'none', padding: '8px 12px', borderRadius: '10px',
|
| 325 |
+
fontSize: '12px', lineHeight: '1.4',
|
| 326 |
+
border: '1px solid var(--border-color)',
|
| 327 |
+
background: 'var(--surface-bg)', color: 'var(--text-color)',
|
| 328 |
+
boxShadow: '0 6px 24px rgba(0,0,0,.25)',
|
| 329 |
+
opacity: '0', transition: 'opacity .12s ease',
|
| 330 |
+
backdropFilter: 'saturate(1.12) blur(8px)',
|
| 331 |
+
zIndex: '20', maxWidth: '280px'
|
| 332 |
+
});
|
| 333 |
+
tipInner = document.createElement('div');
|
| 334 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 335 |
+
tip.appendChild(tipInner);
|
| 336 |
+
container.appendChild(tip);
|
| 337 |
+
} else {
|
| 338 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
const positionTip = (ev) => {
|
| 342 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 343 |
+
const bw = tip.offsetWidth || 220, bh = tip.offsetHeight || 60;
|
| 344 |
+
const ox = (mx + bw + 20 > width) ? -(bw + 12) : 12;
|
| 345 |
+
const oy = (my + bh + 20 > height) ? -(bh + 12) : 14;
|
| 346 |
+
tip.style.transform = `translate(${Math.round(mx + ox)}px, ${Math.round(my + oy)}px)`;
|
| 347 |
+
};
|
| 348 |
+
const showTip = (ev, html) => { tipInner.innerHTML = html; tip.style.opacity = '1'; positionTip(ev); };
|
| 349 |
+
const hideTip = () => { tip.style.opacity = '0'; tip.style.transform = 'translate(-9999px, -9999px)'; };
|
| 350 |
+
|
| 351 |
+
// Interaction
|
| 352 |
+
linkEls
|
| 353 |
+
.on('mouseenter', function (ev, d) {
|
| 354 |
+
linkEls.attr('stroke-opacity', l => l === d ? linkHoverOpacity * 1.5 : linkOpacity * 0.3);
|
| 355 |
+
showTip(ev, `<b>${d.source.name}</b> \u2192 <b>${d.target.name}</b><br/><span style="color:var(--muted-color);">${d.value} experiment${d.value > 1 ? 's' : ''}</span>`);
|
| 356 |
+
})
|
| 357 |
+
.on('mousemove', positionTip)
|
| 358 |
+
.on('mouseleave', function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); });
|
| 359 |
+
|
| 360 |
+
// Shared node hover handlers (used by both rects and labels)
|
| 361 |
+
const onNodeEnter = function (ev, d) {
|
| 362 |
+
const connected = new Set();
|
| 363 |
+
(d.sourceLinks || []).forEach(l => connected.add(l.index));
|
| 364 |
+
(d.targetLinks || []).forEach(l => connected.add(l.index));
|
| 365 |
+
linkEls.attr('stroke-opacity', l => connected.has(l.index) ? linkHoverOpacity : linkOpacity * 0.15);
|
| 366 |
+
const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0);
|
| 367 |
+
const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0);
|
| 368 |
+
const total = Math.max(totalIn, totalOut);
|
| 369 |
+
let info = `<b style="font-size:14px;">${d.name}</b>`;
|
| 370 |
+
if (d.cat) info += ` <span style="color:${catColors[d.cat]};font-size:12px;">(${d.cat})</span>`;
|
| 371 |
+
info += `<br/><span style="color:var(--muted-color);">${total} experiment${total > 1 ? 's' : ''}</span>`;
|
| 372 |
+
showTip(ev, info);
|
| 373 |
+
};
|
| 374 |
+
const onNodeLeave = function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); };
|
| 375 |
+
|
| 376 |
+
nodeEls.style('cursor', 'pointer')
|
| 377 |
+
.on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave);
|
| 378 |
+
|
| 379 |
+
gLabels.selectAll('.node-label')
|
| 380 |
+
.on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave);
|
| 381 |
+
};
|
| 382 |
+
|
| 383 |
+
if (window.ResizeObserver) new ResizeObserver(() => render()).observe(container);
|
| 384 |
+
else window.addEventListener('resize', render);
|
| 385 |
+
new MutationObserver(() => render()).observe(document.documentElement, { attributes: true, attributeFilter: ['data-theme'] });
|
| 386 |
+
render();
|
| 387 |
+
}
|
| 388 |
+
};
|
| 389 |
+
|
| 390 |
+
if (document.readyState === 'loading') {
|
| 391 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 392 |
+
} else { ensureD3(bootstrap); }
|
| 393 |
+
})();
|
| 394 |
+
</script>
|
| 395 |
+
</body>
|
| 396 |
+
</html>
|
app/presentation/se2026/charts/pipeline.html
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>DataTrove Pipeline</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root {
|
| 9 |
+
--text-color: rgba(255,255,255,0.88);
|
| 10 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 11 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 12 |
+
--border-color: rgba(255,255,255,0.1);
|
| 13 |
+
--primary-color: #7c6ff7;
|
| 14 |
+
}
|
| 15 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 16 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: hidden; }
|
| 17 |
+
</style>
|
| 18 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 19 |
+
</head>
|
| 20 |
+
<body>
|
| 21 |
+
<div class="d3-pipeline"></div>
|
| 22 |
+
<style>
|
| 23 |
+
.d3-pipeline {
|
| 24 |
+
position: relative;
|
| 25 |
+
width: 100%;
|
| 26 |
+
margin: 0;
|
| 27 |
+
container-type: inline-size;
|
| 28 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
| 29 |
+
}
|
| 30 |
+
.d3-pipeline .node-group { cursor: default; }
|
| 31 |
+
.d3-pipeline .node-card { transition: filter .15s ease; }
|
| 32 |
+
.d3-pipeline .node-group:hover .node-card { filter: brightness(1.05); }
|
| 33 |
+
.d3-pipeline .node-title { font-weight: 700; fill: var(--text-color); }
|
| 34 |
+
.d3-pipeline .node-subtitle { fill: var(--muted-color); }
|
| 35 |
+
.d3-pipeline .group-label { font-weight: 700; fill: var(--muted-color); letter-spacing: 0.02em; }
|
| 36 |
+
.d3-pipeline .edge-path { fill: none; stroke-linecap: round; }
|
| 37 |
+
.d3-pipeline .d3-tooltip {
|
| 38 |
+
position: absolute; top: 0; left: 0;
|
| 39 |
+
transform: translate(-9999px, -9999px);
|
| 40 |
+
pointer-events: none; padding: 8px 12px; border-radius: 8px;
|
| 41 |
+
font-size: 12px; line-height: 1.4;
|
| 42 |
+
border: 1px solid var(--border-color); background: var(--surface-bg);
|
| 43 |
+
color: var(--text-color); box-shadow: 0 4px 20px rgba(0,0,0,.15);
|
| 44 |
+
opacity: 0; transition: opacity .12s ease; max-width: 260px; z-index: 100;
|
| 45 |
+
}
|
| 46 |
+
.d3-pipeline .d3-tooltip strong { display: block; margin-bottom: 2px; font-size: 13px; }
|
| 47 |
+
</style>
|
| 48 |
+
<script>
|
| 49 |
+
(() => {
|
| 50 |
+
const ensureD3 = (cb) => {
|
| 51 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 52 |
+
let s = document.getElementById('d3-cdn-script');
|
| 53 |
+
if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); }
|
| 54 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 55 |
+
s.addEventListener('load', onReady, { once: true });
|
| 56 |
+
if (window.d3) onReady();
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
const bootstrap = () => {
|
| 60 |
+
const scriptEl = document.currentScript;
|
| 61 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 62 |
+
if (!(container && container.classList && container.classList.contains('d3-pipeline'))) {
|
| 63 |
+
const cs = Array.from(document.querySelectorAll('.d3-pipeline')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 64 |
+
container = cs[cs.length - 1] || null;
|
| 65 |
+
}
|
| 66 |
+
if (!container) return;
|
| 67 |
+
if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; }
|
| 68 |
+
container.style.position = container.style.position || 'relative';
|
| 69 |
+
|
| 70 |
+
const tip = document.createElement('div');
|
| 71 |
+
tip.className = 'd3-tooltip';
|
| 72 |
+
const tipInner = document.createElement('div');
|
| 73 |
+
tip.appendChild(tipInner);
|
| 74 |
+
container.appendChild(tip);
|
| 75 |
+
|
| 76 |
+
function showTip(ev, html) {
|
| 77 |
+
tipInner.innerHTML = html;
|
| 78 |
+
tip.style.opacity = '1';
|
| 79 |
+
const r = container.getBoundingClientRect();
|
| 80 |
+
const x = ev.clientX - r.left + 14, y = ev.clientY - r.top - 10;
|
| 81 |
+
tip.style.transform = `translate(${x}px, ${y}px)`;
|
| 82 |
+
}
|
| 83 |
+
function hideTip() { tip.style.opacity = '0'; tip.style.transform = 'translate(-9999px,-9999px)'; }
|
| 84 |
+
|
| 85 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 86 |
+
const defs = svg.append('defs');
|
| 87 |
+
defs.append('marker').attr('id', 'pl-arrow').attr('viewBox', '0 0 10 8')
|
| 88 |
+
.attr('refX', 9).attr('refY', 4).attr('markerWidth', 7).attr('markerHeight', 5.5)
|
| 89 |
+
.attr('orient', 'auto').append('path').attr('d', 'M0,1 L8,4 L0,7 Z');
|
| 90 |
+
|
| 91 |
+
const gRoot = svg.append('g');
|
| 92 |
+
const gGroups = gRoot.append('g');
|
| 93 |
+
const gEdges = gRoot.append('g');
|
| 94 |
+
const gNodes = gRoot.append('g');
|
| 95 |
+
|
| 96 |
+
const nodes = [
|
| 97 |
+
{ id: 'hf_in', label: 'HF Hub Dataset', sub: '', group: 'input', tip: 'Source dataset from the Hugging Face Hub. Any split or config.' },
|
| 98 |
+
{ id: 'read', label: 'Read', sub: 'HuggingFaceDatasetReader', group: 'pipeline', tip: 'Reads documents from the Hub and streams them into the pipeline.' },
|
| 99 |
+
{ id: 'transform', label: 'Transform', sub: 'InferenceRunner', group: 'pipeline', tip: 'Orchestrates LLM inference: batching, retries, metric logging.' },
|
| 100 |
+
{ id: 'write', label: 'Write', sub: 'ParquetWriter', group: 'pipeline', tip: 'Writes generated outputs as Parquet files with checkpointing.' },
|
| 101 |
+
{ id: 'local', label: 'Local', sub: 'single node, multi-GPU', group: 'execution', tip: 'Run on a single machine with multiple workers for development.' },
|
| 102 |
+
{ id: 'slurm', label: 'Slurm Cluster', sub: 'multi-node, auto-scaling', group: 'execution', tip: 'Distribute across nodes for large-scale production workloads.' },
|
| 103 |
+
{ id: 'rollout', label: 'Custom Rollout', sub: 'async callable', group: 'inference', tip: 'Your rollout function: orchestrates one or many generate() calls.' },
|
| 104 |
+
{ id: 'vllm', label: 'vLLM / SGLang', sub: 'Server', group: 'inference', tip: 'High-throughput inference engine with prefix caching and batching.' },
|
| 105 |
+
{ id: 'hf_out', label: 'HF Hub Dataset', sub: '', group: 'output', tip: 'Generated dataset uploaded continuously to the Hugging Face Hub.' },
|
| 106 |
+
{ id: 'card', label: 'Dataset Card', sub: '+ Metrics', group: 'output', tip: 'Auto-generated dataset card with throughput stats.' },
|
| 107 |
+
{ id: 'monitor', label: 'Progress Monitor', sub: '', group: 'output', tip: 'Live progress bar and ETA on the dataset card during inference.' },
|
| 108 |
+
];
|
| 109 |
+
|
| 110 |
+
const groups = [
|
| 111 |
+
{ id: 'input', label: 'Input', icon: '📥' },
|
| 112 |
+
{ id: 'pipeline', label: 'DataTrove Pipeline', icon: '⚙️' },
|
| 113 |
+
{ id: 'execution', label: 'Execution Mode', icon: '🖥️' },
|
| 114 |
+
{ id: 'inference', label: 'Inference Engine', icon: '🚀' },
|
| 115 |
+
{ id: 'output', label: 'Output', icon: '📤' },
|
| 116 |
+
];
|
| 117 |
+
|
| 118 |
+
const edges = [
|
| 119 |
+
{ from: 'hf_in', to: 'read' },
|
| 120 |
+
{ from: 'read', to: 'transform' },
|
| 121 |
+
{ from: 'transform', to: 'write' },
|
| 122 |
+
{ from: 'transform', to: 'rollout' },
|
| 123 |
+
{ from: 'rollout', to: 'vllm' },
|
| 124 |
+
{ from: 'write', to: 'hf_out' },
|
| 125 |
+
{ from: 'write', to: 'card' },
|
| 126 |
+
{ from: 'write', to: 'monitor' },
|
| 127 |
+
];
|
| 128 |
+
|
| 129 |
+
function isDark() { return document.documentElement.getAttribute('data-theme') === 'dark'; }
|
| 130 |
+
|
| 131 |
+
function colors() {
|
| 132 |
+
const dk = isDark();
|
| 133 |
+
const primary = window.ColorPalettes ? window.ColorPalettes.getPrimary() : (dk ? '#7c6ff7' : '#6366f1');
|
| 134 |
+
return {
|
| 135 |
+
nodeBg: dk ? 'rgba(255,255,255,0.055)' : 'rgba(255,255,255,0.92)',
|
| 136 |
+
nodeBd: dk ? 'rgba(255,255,255,0.10)' : 'rgba(0,0,0,0.09)',
|
| 137 |
+
groupBg: dk ? 'rgba(255,255,255,0.025)' : 'rgba(0,0,0,0.022)',
|
| 138 |
+
groupBd: dk ? 'rgba(255,255,255,0.07)' : 'rgba(0,0,0,0.055)',
|
| 139 |
+
pipeBg: dk ? 'rgba(99,102,241,0.055)' : 'rgba(99,102,241,0.04)',
|
| 140 |
+
pipeBd: dk ? 'rgba(99,102,241,0.14)' : 'rgba(99,102,241,0.11)',
|
| 141 |
+
edge: dk ? 'rgba(255,255,255,0.22)' : 'rgba(0,0,0,0.18)',
|
| 142 |
+
arrow: dk ? 'rgba(255,255,255,0.30)' : 'rgba(0,0,0,0.25)',
|
| 143 |
+
primary,
|
| 144 |
+
};
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
function computeLayout() {
|
| 148 |
+
const W = container.clientWidth || 820;
|
| 149 |
+
const s = Math.min(1, W / 820);
|
| 150 |
+
|
| 151 |
+
const nw = Math.round(200 * s), nh = Math.round(60 * s);
|
| 152 |
+
const nr = Math.round(10 * s);
|
| 153 |
+
const gp = Math.round(10 * s);
|
| 154 |
+
const gr = Math.round(10 * s);
|
| 155 |
+
const glh = Math.round(22 * s);
|
| 156 |
+
const ng = Math.round(7 * s);
|
| 157 |
+
const cg = Math.round(70 * s);
|
| 158 |
+
const rg = Math.round(14 * s);
|
| 159 |
+
|
| 160 |
+
const leftW = nw + gp * 2;
|
| 161 |
+
const centerW = nw + gp * 2;
|
| 162 |
+
const rightW = nw + gp * 2;
|
| 163 |
+
const totalW = leftW + centerW + rightW + cg * 2;
|
| 164 |
+
const offsetX = Math.max(0, (W - totalW) / 2);
|
| 165 |
+
|
| 166 |
+
const leftX = offsetX;
|
| 167 |
+
const centerX = offsetX + leftW + cg;
|
| 168 |
+
const rightX = offsetX + leftW + cg + centerW + cg;
|
| 169 |
+
|
| 170 |
+
let y = Math.round(4 * s);
|
| 171 |
+
const inputNode = nodes.find(n => n.id === 'hf_in');
|
| 172 |
+
inputNode._x = centerX + gp; inputNode._y = y + glh + gp;
|
| 173 |
+
inputNode._w = nw; inputNode._h = nh; inputNode._r = nr;
|
| 174 |
+
const inputGroup = groups.find(g => g.id === 'input');
|
| 175 |
+
inputGroup._x = centerX; inputGroup._y = y;
|
| 176 |
+
inputGroup._w = centerW; inputGroup._h = glh + gp * 2 + nh; inputGroup._r = gr;
|
| 177 |
+
|
| 178 |
+
y += inputGroup._h + rg;
|
| 179 |
+
const pipeTop = y;
|
| 180 |
+
const pipeNodes = ['read', 'transform', 'write'].map(id => nodes.find(n => n.id === id));
|
| 181 |
+
pipeNodes.forEach((n, i) => {
|
| 182 |
+
n._x = centerX + gp;
|
| 183 |
+
n._y = pipeTop + glh + gp + i * (nh + ng);
|
| 184 |
+
n._w = nw; n._h = nh; n._r = nr;
|
| 185 |
+
});
|
| 186 |
+
const pipeH = glh + gp * 2 + 3 * nh + 2 * ng;
|
| 187 |
+
const pipeGroup = groups.find(g => g.id === 'pipeline');
|
| 188 |
+
pipeGroup._x = centerX; pipeGroup._y = pipeTop;
|
| 189 |
+
pipeGroup._w = centerW; pipeGroup._h = pipeH; pipeGroup._r = gr;
|
| 190 |
+
|
| 191 |
+
const execNodes = ['local', 'slurm'].map(id => nodes.find(n => n.id === id));
|
| 192 |
+
const execH = glh + gp * 2 + execNodes.length * nh + (execNodes.length - 1) * ng;
|
| 193 |
+
const inferNodes = ['rollout', 'vllm'].map(id => nodes.find(n => n.id === id));
|
| 194 |
+
const inferH = glh + gp * 2 + inferNodes.length * nh + (inferNodes.length - 1) * ng;
|
| 195 |
+
const writeNode = nodes.find(n => n.id === 'write');
|
| 196 |
+
const inferBottom = writeNode._y + writeNode._h + gp;
|
| 197 |
+
const inferTop = inferBottom - inferH;
|
| 198 |
+
const execTop = inferTop - rg - execH;
|
| 199 |
+
execNodes.forEach((n, i) => {
|
| 200 |
+
n._x = leftX + gp; n._y = execTop + glh + gp + i * (nh + ng);
|
| 201 |
+
n._w = nw; n._h = nh; n._r = nr;
|
| 202 |
+
});
|
| 203 |
+
const execGroup = groups.find(g => g.id === 'execution');
|
| 204 |
+
execGroup._x = leftX; execGroup._y = execTop;
|
| 205 |
+
execGroup._w = leftW; execGroup._h = execH; execGroup._r = gr;
|
| 206 |
+
|
| 207 |
+
inferNodes.forEach((n, i) => {
|
| 208 |
+
n._x = leftX + gp; n._y = inferTop + glh + gp + i * (nh + ng);
|
| 209 |
+
n._w = nw; n._h = nh; n._r = nr;
|
| 210 |
+
});
|
| 211 |
+
const inferGroup = groups.find(g => g.id === 'inference');
|
| 212 |
+
inferGroup._x = leftX; inferGroup._y = inferTop;
|
| 213 |
+
inferGroup._w = leftW; inferGroup._h = inferH; inferGroup._r = gr;
|
| 214 |
+
|
| 215 |
+
const outNodes = ['hf_out', 'card', 'monitor'].map(id => nodes.find(n => n.id === id));
|
| 216 |
+
const outH = glh + gp * 2 + outNodes.length * nh + (outNodes.length - 1) * ng;
|
| 217 |
+
const outBottom = writeNode._y + writeNode._h + gp;
|
| 218 |
+
const outTop = outBottom - outH;
|
| 219 |
+
outNodes.forEach((n, i) => {
|
| 220 |
+
n._x = rightX + gp; n._y = outTop + glh + gp + i * (nh + ng);
|
| 221 |
+
n._w = nw; n._h = nh; n._r = nr;
|
| 222 |
+
});
|
| 223 |
+
const outGroup = groups.find(g => g.id === 'output');
|
| 224 |
+
outGroup._x = rightX; outGroup._y = outTop;
|
| 225 |
+
outGroup._w = rightW; outGroup._h = outH; outGroup._r = gr;
|
| 226 |
+
|
| 227 |
+
const minY = Math.min(
|
| 228 |
+
...nodes.map(n => n._y),
|
| 229 |
+
...groups.map(g => g._y)
|
| 230 |
+
);
|
| 231 |
+
if (minY < 0) {
|
| 232 |
+
const shift = -minY + Math.round(4 * s);
|
| 233 |
+
nodes.forEach(n => { n._y += shift; });
|
| 234 |
+
groups.forEach(g => { g._y += shift; });
|
| 235 |
+
}
|
| 236 |
+
const maxY = Math.max(
|
| 237 |
+
...nodes.map(n => n._y + n._h + gp),
|
| 238 |
+
...groups.map(g => g._y + g._h)
|
| 239 |
+
);
|
| 240 |
+
svg.attr('height', maxY + Math.round(4 * s));
|
| 241 |
+
|
| 242 |
+
return s;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
function pt(n, side, offset) {
|
| 246 |
+
const o = offset || 0;
|
| 247 |
+
if (side === 'top') return { x: n._x + n._w / 2 + o, y: n._y };
|
| 248 |
+
if (side === 'bottom') return { x: n._x + n._w / 2 + o, y: n._y + n._h };
|
| 249 |
+
if (side === 'left') return { x: n._x, y: n._y + n._h / 2 + o };
|
| 250 |
+
if (side === 'right') return { x: n._x + n._w, y: n._y + n._h / 2 + o };
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
function hBez(a, b) {
|
| 254 |
+
const mx = (a.x + b.x) / 2;
|
| 255 |
+
return `M${a.x},${a.y} C${mx},${a.y} ${mx},${b.y} ${b.x},${b.y}`;
|
| 256 |
+
}
|
| 257 |
+
function vBez(a, b) {
|
| 258 |
+
const my = (a.y + b.y) / 2;
|
| 259 |
+
return `M${a.x},${a.y} C${a.x},${my} ${b.x},${my} ${b.x},${b.y}`;
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
function edgePath(e) {
|
| 263 |
+
const f = nodes.find(n => n.id === e.from);
|
| 264 |
+
const t = nodes.find(n => n.id === e.to);
|
| 265 |
+
if (!f || !t) return '';
|
| 266 |
+
|
| 267 |
+
if (e.from === 'hf_in' && e.to === 'read') return vBez(pt(f,'bottom'), pt(t,'top'));
|
| 268 |
+
if (e.from === 'read' && e.to === 'transform') return vBez(pt(f,'bottom'), pt(t,'top'));
|
| 269 |
+
if (e.from === 'transform' && e.to === 'write') return vBez(pt(f,'bottom'), pt(t,'top'));
|
| 270 |
+
if (e.from === 'transform' && e.to === 'rollout') return hBez(pt(f,'left'), pt(t,'right'));
|
| 271 |
+
if (e.from === 'rollout' && e.to === 'vllm') return vBez(pt(f,'bottom'), pt(t,'top'));
|
| 272 |
+
|
| 273 |
+
const sp = Math.round(f._h * 0.28);
|
| 274 |
+
if (e.from === 'write' && e.to === 'hf_out') return hBez(pt(f,'right', -sp), pt(t,'left'));
|
| 275 |
+
if (e.from === 'write' && e.to === 'card') return hBez(pt(f,'right'), pt(t,'left'));
|
| 276 |
+
if (e.from === 'write' && e.to === 'monitor') return hBez(pt(f,'right', sp), pt(t,'left'));
|
| 277 |
+
|
| 278 |
+
return hBez(pt(f,'right'), pt(t,'left'));
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
function render() {
|
| 282 |
+
const s = computeLayout();
|
| 283 |
+
const c = colors();
|
| 284 |
+
|
| 285 |
+
const fs = Math.max(11, Math.round(13 * s));
|
| 286 |
+
const fsSub = Math.max(10, Math.round(11 * s));
|
| 287 |
+
const fsGrp = Math.max(10, Math.round(11 * s));
|
| 288 |
+
const fsIcon = Math.max(12, Math.round(14 * s));
|
| 289 |
+
|
| 290 |
+
defs.select('#pl-arrow path').attr('fill', c.arrow);
|
| 291 |
+
|
| 292 |
+
const gSel = gGroups.selectAll('g.grp').data(groups, d => d.id);
|
| 293 |
+
const gE = gSel.enter().append('g').attr('class', 'grp');
|
| 294 |
+
gE.append('rect');
|
| 295 |
+
gE.append('text').attr('class', 'grp-icon');
|
| 296 |
+
gE.append('text').attr('class', 'group-label');
|
| 297 |
+
const gM = gE.merge(gSel);
|
| 298 |
+
gM.select('rect')
|
| 299 |
+
.attr('x', d => d._x).attr('y', d => d._y)
|
| 300 |
+
.attr('width', d => d._w).attr('height', d => d._h)
|
| 301 |
+
.attr('rx', d => d._r).attr('ry', d => d._r)
|
| 302 |
+
.attr('fill', d => d.id === 'pipeline' ? c.pipeBg : c.groupBg)
|
| 303 |
+
.attr('stroke', d => d.id === 'pipeline' ? c.pipeBd : c.groupBd)
|
| 304 |
+
.attr('stroke-width', 1);
|
| 305 |
+
gM.select('.grp-icon')
|
| 306 |
+
.attr('x', d => d._x + Math.round(6 * s))
|
| 307 |
+
.attr('y', d => d._y + Math.round(15 * s))
|
| 308 |
+
.style('font-size', fsIcon + 'px')
|
| 309 |
+
.text(d => d.icon);
|
| 310 |
+
gM.select('.group-label')
|
| 311 |
+
.attr('x', d => d._x + Math.round(6 * s) + fsIcon + Math.round(3 * s))
|
| 312 |
+
.attr('y', d => d._y + Math.round(15 * s))
|
| 313 |
+
.style('font-size', fsGrp + 'px')
|
| 314 |
+
.text(d => d.label);
|
| 315 |
+
gSel.exit().remove();
|
| 316 |
+
|
| 317 |
+
const eSel = gEdges.selectAll('path.edge-path').data(edges, d => d.from + d.to);
|
| 318 |
+
eSel.enter().append('path').attr('class', 'edge-path')
|
| 319 |
+
.attr('marker-end', 'url(#pl-arrow)')
|
| 320 |
+
.merge(eSel)
|
| 321 |
+
.attr('d', edgePath)
|
| 322 |
+
.attr('stroke', c.edge)
|
| 323 |
+
.attr('stroke-width', Math.max(1.5, 1.8 * s));
|
| 324 |
+
eSel.exit().remove();
|
| 325 |
+
|
| 326 |
+
const nSel = gNodes.selectAll('g.node-group').data(nodes, d => d.id);
|
| 327 |
+
const nE = nSel.enter().append('g').attr('class', 'node-group');
|
| 328 |
+
nE.append('rect').attr('class', 'node-card');
|
| 329 |
+
nE.append('text').attr('class', 'node-title');
|
| 330 |
+
nE.append('text').attr('class', 'node-subtitle');
|
| 331 |
+
const nM = nE.merge(nSel);
|
| 332 |
+
nM.attr('transform', d => `translate(${d._x},${d._y})`);
|
| 333 |
+
nM.select('.node-card')
|
| 334 |
+
.attr('width', d => d._w).attr('height', d => d._h)
|
| 335 |
+
.attr('rx', d => d._r).attr('ry', d => d._r)
|
| 336 |
+
.attr('fill', c.nodeBg).attr('stroke', c.nodeBd).attr('stroke-width', 1);
|
| 337 |
+
nM.select('.node-title')
|
| 338 |
+
.attr('x', d => d._w / 2).attr('y', d => d.sub ? d._h * 0.38 : d._h / 2)
|
| 339 |
+
.attr('text-anchor', 'middle').attr('dominant-baseline', 'middle')
|
| 340 |
+
.style('font-size', fs + 'px').text(d => d.label);
|
| 341 |
+
nM.select('.node-subtitle')
|
| 342 |
+
.attr('x', d => d._w / 2).attr('y', d => d._h * 0.68)
|
| 343 |
+
.attr('text-anchor', 'middle').attr('dominant-baseline', 'middle')
|
| 344 |
+
.style('font-size', fsSub + 'px').text(d => d.sub || '');
|
| 345 |
+
nM.on('mouseenter', (ev, d) => { if (d.tip) showTip(ev, `<strong>${d.label}</strong>${d.tip}`); })
|
| 346 |
+
.on('mousemove', (ev, d) => { if (d.tip) showTip(ev, `<strong>${d.label}</strong>${d.tip}`); })
|
| 347 |
+
.on('mouseleave', hideTip);
|
| 348 |
+
nSel.exit().remove();
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
render();
|
| 352 |
+
if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
|
| 353 |
+
else { window.addEventListener('resize', render); }
|
| 354 |
+
new MutationObserver(() => render()).observe(document.documentElement, { attributes: true, attributeFilter: ['data-theme'] });
|
| 355 |
+
};
|
| 356 |
+
|
| 357 |
+
if (document.readyState === 'loading') {
|
| 358 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 359 |
+
} else { ensureD3(bootstrap); }
|
| 360 |
+
})();
|
| 361 |
+
</script>
|
| 362 |
+
</body>
|
| 363 |
+
</html>
|
app/presentation/se2026/charts/throughput.html
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Throughput Chart</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root {
|
| 9 |
+
--text-color: rgba(255,255,255,0.88);
|
| 10 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 11 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 12 |
+
--border-color: rgba(255,255,255,0.1);
|
| 13 |
+
--axis-color: rgba(255,255,255,0.15);
|
| 14 |
+
--tick-color: rgba(255,255,255,0.5);
|
| 15 |
+
--grid-color: rgba(255,255,255,0.06);
|
| 16 |
+
--primary-color: #7c6ff7;
|
| 17 |
+
}
|
| 18 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 19 |
+
html, body { width: 100%; height: 100%; background: transparent; overflow: visible; }
|
| 20 |
+
</style>
|
| 21 |
+
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
| 22 |
+
</head>
|
| 23 |
+
<body>
|
| 24 |
+
<div class="d3-optimization-sweep"></div>
|
| 25 |
+
<style>
|
| 26 |
+
.d3-optimization-sweep { position: relative; }
|
| 27 |
+
.d3-optimization-sweep .controls {
|
| 28 |
+
display: flex;
|
| 29 |
+
gap: 16px;
|
| 30 |
+
align-items: center;
|
| 31 |
+
justify-content: flex-start;
|
| 32 |
+
flex-wrap: wrap;
|
| 33 |
+
margin: 10px 0 0 0;
|
| 34 |
+
}
|
| 35 |
+
.d3-optimization-sweep .controls .control-group {
|
| 36 |
+
display: flex;
|
| 37 |
+
flex-direction: column;
|
| 38 |
+
align-items: flex-start;
|
| 39 |
+
gap: 6px;
|
| 40 |
+
}
|
| 41 |
+
.d3-optimization-sweep .controls label {
|
| 42 |
+
font-size: 18px;
|
| 43 |
+
font-weight: 700;
|
| 44 |
+
color: var(--text-color);
|
| 45 |
+
}
|
| 46 |
+
.d3-optimization-sweep .controls select {
|
| 47 |
+
appearance: none;
|
| 48 |
+
-webkit-appearance: none;
|
| 49 |
+
-moz-appearance: none;
|
| 50 |
+
border: 1px solid var(--border-color);
|
| 51 |
+
border-radius: 8px;
|
| 52 |
+
padding: 6px 28px 6px 10px;
|
| 53 |
+
background-color: var(--surface-bg);
|
| 54 |
+
color: var(--text-color);
|
| 55 |
+
font-size: 18px;
|
| 56 |
+
line-height: 1.2;
|
| 57 |
+
background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E");
|
| 58 |
+
background-repeat: no-repeat;
|
| 59 |
+
background-position: right 8px center;
|
| 60 |
+
}
|
| 61 |
+
.d3-optimization-sweep .controls select:focus-visible {
|
| 62 |
+
outline: 2px solid var(--primary-color);
|
| 63 |
+
outline-offset: 2px;
|
| 64 |
+
}
|
| 65 |
+
.d3-optimization-sweep .legend {
|
| 66 |
+
display: flex;
|
| 67 |
+
align-items: center;
|
| 68 |
+
gap: 14px;
|
| 69 |
+
margin: 0 0 0 auto;
|
| 70 |
+
}
|
| 71 |
+
.d3-optimization-sweep .legend .legend-title {
|
| 72 |
+
font-size: 18px;
|
| 73 |
+
font-weight: 700;
|
| 74 |
+
color: var(--text-color);
|
| 75 |
+
}
|
| 76 |
+
.d3-optimization-sweep .legend .legend-section {
|
| 77 |
+
display: flex;
|
| 78 |
+
flex-wrap: wrap;
|
| 79 |
+
gap: 8px 14px;
|
| 80 |
+
}
|
| 81 |
+
.d3-optimization-sweep .legend .item {
|
| 82 |
+
display: inline-flex;
|
| 83 |
+
align-items: center;
|
| 84 |
+
gap: 6px;
|
| 85 |
+
white-space: nowrap;
|
| 86 |
+
font-size: 18px;
|
| 87 |
+
color: var(--text-color);
|
| 88 |
+
}
|
| 89 |
+
.d3-optimization-sweep .legend .swatch {
|
| 90 |
+
width: 14px;
|
| 91 |
+
height: 14px;
|
| 92 |
+
border-radius: 3px;
|
| 93 |
+
border: 1px solid var(--border-color);
|
| 94 |
+
flex-shrink: 0;
|
| 95 |
+
}
|
| 96 |
+
.d3-optimization-sweep .legend .shape-swatch {
|
| 97 |
+
width: 14px;
|
| 98 |
+
height: 14px;
|
| 99 |
+
flex-shrink: 0;
|
| 100 |
+
}
|
| 101 |
+
.d3-optimization-sweep .d3-tooltip {
|
| 102 |
+
position: absolute;
|
| 103 |
+
top: 0px;
|
| 104 |
+
left: 0px;
|
| 105 |
+
transform: translate(-9999px, -9999px);
|
| 106 |
+
pointer-events: none;
|
| 107 |
+
padding: 8px 10px;
|
| 108 |
+
border-radius: 8px;
|
| 109 |
+
font-size: 18px;
|
| 110 |
+
line-height: 1.35;
|
| 111 |
+
border: 1px solid var(--border-color);
|
| 112 |
+
background: var(--surface-bg);
|
| 113 |
+
color: var(--text-color);
|
| 114 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 115 |
+
opacity: 0;
|
| 116 |
+
transition: opacity .12s ease;
|
| 117 |
+
text-align: left;
|
| 118 |
+
max-width: 320px;
|
| 119 |
+
z-index: 10;
|
| 120 |
+
}
|
| 121 |
+
.d3-optimization-sweep .d3-tooltip .tip-label { color: var(--muted-color); }
|
| 122 |
+
.d3-optimization-sweep .d3-tooltip .tip-val { font-weight: 600; }
|
| 123 |
+
.d3-optimization-sweep .d3-tooltip .tip-regression { color: #e05252; }
|
| 124 |
+
.d3-optimization-sweep .y-label-text {
|
| 125 |
+
font-size: 18px;
|
| 126 |
+
cursor: default;
|
| 127 |
+
}
|
| 128 |
+
.d3-optimization-sweep .speedup-label {
|
| 129 |
+
font-size: 18px;
|
| 130 |
+
font-weight: 600;
|
| 131 |
+
}
|
| 132 |
+
</style>
|
| 133 |
+
<script>
|
| 134 |
+
(() => {
|
| 135 |
+
const bootstrap = () => {
|
| 136 |
+
const container = document.querySelector('.d3-optimization-sweep');
|
| 137 |
+
if (!container) return;
|
| 138 |
+
if (container.dataset && container.dataset.mounted === 'true') return;
|
| 139 |
+
if (container.dataset) container.dataset.mounted = 'true';
|
| 140 |
+
container.style.position = container.style.position || 'relative';
|
| 141 |
+
|
| 142 |
+
// ── Data ──
|
| 143 |
+
const DATA = [
|
| 144 |
+
{ model: 'GPT-OSS-120B', family: 'GPT-OSS', baseTp: 1, baseTps: 3138, t0Tps: 6117, t0Speedup: 1.95, t0Params: 'tp=2, mns=1024, mnbt=32768', t1Tps: 5450, t1Speedup: 1.74, t1Params: 'tp=2, mns=1024, mnbt=32768', bestSpeedup: 1.95 },
|
| 145 |
+
{ model: 'Qwen3-8B', family: 'Qwen3', baseTp: 1, baseTps: 6338, t0Tps: 6338, t0Speedup: 1.00, t0Params: '(baseline)', t1Tps: 6443, t1Speedup: 1.02, t1Params: 'gmu=95', bestSpeedup: 1.02 },
|
| 146 |
+
{ model: 'Gemma-3-4B', family: 'Gemma3', baseTp: 1, baseTps: 8501, t0Tps: 9253, t0Speedup: 1.09, t0Params: 'mns=1024, mnbt=32768', t1Tps: 8361, t1Speedup: 0.98, t1Params: 'mns=1024, mnbt=32768', bestSpeedup: 1.09 },
|
| 147 |
+
{ model: 'SmolLM2-1.7B', family: 'SmolLM2', baseTp: 1, baseTps: 5255, t0Tps: 5437, t0Speedup: 1.03, t0Params: 'mns=2048, mnbt=32768', t1Tps: 9220, t1Speedup: 1.75, t1Params: 'mns=2048, mnbt=32768, gmu=95, spec=suffix_32', bestSpeedup: 1.75 },
|
| 148 |
+
];
|
| 149 |
+
|
| 150 |
+
const FAMILIES = ['Qwen3', 'SmolLM2', 'Gemma3', 'GPT-OSS'];
|
| 151 |
+
const TIERS = ['Baseline', 'Tier 0', 'Tier 1'];
|
| 152 |
+
const SHAPE_SIZE = 42;
|
| 153 |
+
const TIER_Y_OFFSET = { 'Baseline': -0.38, 'Tier 0': 0, 'Tier 1': 0.38 };
|
| 154 |
+
const margin = { top: 30, right: 60, bottom: 50, left: 140 };
|
| 155 |
+
|
| 156 |
+
// ── Colors & shapes ──
|
| 157 |
+
const FAMILY_COLORS = { 'Qwen3': '#e07b54', 'SmolLM2': '#e06b9e', 'Gemma3': '#5b9bd5', 'GPT-OSS': '#8bc474' };
|
| 158 |
+
const familyPalette = FAMILIES.map(f => FAMILY_COLORS[f] || '#999');
|
| 159 |
+
const familyColor = (family) => FAMILY_COLORS[family] || '#999';
|
| 160 |
+
|
| 161 |
+
const shapeGenerators = {
|
| 162 |
+
'Baseline': d3.symbol().type(d3.symbolCircle),
|
| 163 |
+
'Tier 0': d3.symbol().type(d3.symbolSquare),
|
| 164 |
+
'Tier 1': d3.symbol().type(d3.symbolTriangle),
|
| 165 |
+
};
|
| 166 |
+
|
| 167 |
+
// ── Tooltip ──
|
| 168 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 169 |
+
let tipInner;
|
| 170 |
+
if (!tip) {
|
| 171 |
+
tip = document.createElement('div'); tip.className = 'd3-tooltip';
|
| 172 |
+
tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tipInner.style.textAlign = 'left';
|
| 173 |
+
tip.appendChild(tipInner); container.appendChild(tip);
|
| 174 |
+
} else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
|
| 175 |
+
|
| 176 |
+
function showTip(html, mx, my) {
|
| 177 |
+
tipInner.innerHTML = html;
|
| 178 |
+
const cw = container.clientWidth;
|
| 179 |
+
let tx = mx + 14, ty = my - 10;
|
| 180 |
+
if (tx + (tip.offsetWidth || 200) > cw - 8) tx = mx - (tip.offsetWidth || 200) - 14;
|
| 181 |
+
if (ty + (tip.offsetHeight || 100) > container.clientHeight) ty = container.clientHeight - (tip.offsetHeight || 100) - 4;
|
| 182 |
+
if (ty < 0) ty = 4;
|
| 183 |
+
tip.style.transform = `translate(${tx}px, ${ty}px)`;
|
| 184 |
+
tip.style.opacity = '1';
|
| 185 |
+
}
|
| 186 |
+
function hideTip() {
|
| 187 |
+
tip.style.opacity = '0';
|
| 188 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
// ── Shared tooltip event handlers ──
|
| 192 |
+
function attachTipEvents(sel, opacityFn) {
|
| 193 |
+
sel
|
| 194 |
+
.attr('cursor', 'pointer')
|
| 195 |
+
.on('mouseenter', function (event, d) {
|
| 196 |
+
d3.select(this).attr('opacity', 1);
|
| 197 |
+
const [mx, my] = d3.pointer(event, container);
|
| 198 |
+
showTip(buildTooltip(d), mx, my);
|
| 199 |
+
})
|
| 200 |
+
.on('mousemove', function (event) {
|
| 201 |
+
const [mx, my] = d3.pointer(event, container);
|
| 202 |
+
tip.style.transform = `translate(${mx + 14}px, ${my - 10}px)`;
|
| 203 |
+
})
|
| 204 |
+
.on('mouseleave', function (event, d) {
|
| 205 |
+
d3.select(this).attr('opacity', opacityFn(d));
|
| 206 |
+
hideTip();
|
| 207 |
+
});
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
// ── Shared axis styling ──
|
| 211 |
+
function styleAxis(g) {
|
| 212 |
+
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 213 |
+
g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '18px');
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
// ── SVG ──
|
| 217 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 218 |
+
const gRoot = svg.append('g');
|
| 219 |
+
|
| 220 |
+
// ── State ──
|
| 221 |
+
const state = { metric: 'speedup', sort: 'speedup' };
|
| 222 |
+
|
| 223 |
+
function sortedData() {
|
| 224 |
+
const d = [...DATA];
|
| 225 |
+
if (state.sort === 'speedup') d.sort((a, b) => b.bestSpeedup - a.bestSpeedup);
|
| 226 |
+
else if (state.sort === 'baseline') d.sort((a, b) => b.baseTps - a.baseTps);
|
| 227 |
+
else if (state.sort === 'family') {
|
| 228 |
+
d.sort((a, b) => {
|
| 229 |
+
const fi = FAMILIES.indexOf(a.family) - FAMILIES.indexOf(b.family);
|
| 230 |
+
return fi !== 0 ? fi : b.bestSpeedup - a.bestSpeedup;
|
| 231 |
+
});
|
| 232 |
+
}
|
| 233 |
+
return d;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// ── X axis tick format helper ──
|
| 237 |
+
function xTickFormat() {
|
| 238 |
+
return state.metric === 'throughput'
|
| 239 |
+
? (d => d >= 1000 ? (d / 1000) + 'k' : d)
|
| 240 |
+
: (d => d.toFixed(1) + 'x');
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// ── Render ──
|
| 244 |
+
function render() {
|
| 245 |
+
const iw = (container.clientWidth || 800) - margin.left - margin.right;
|
| 246 |
+
const ih = Math.max(150, DATA.length * 60 + margin.top + margin.bottom) - margin.top - margin.bottom;
|
| 247 |
+
svg.attr('width', container.clientWidth || 800).attr('height', ih + margin.top + margin.bottom);
|
| 248 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 249 |
+
|
| 250 |
+
const data = sortedData();
|
| 251 |
+
const yScale = d3.scaleBand().domain(data.map(d => d.model)).range([0, ih]).padding(0.35);
|
| 252 |
+
const bandH = yScale.bandwidth();
|
| 253 |
+
|
| 254 |
+
// X scale
|
| 255 |
+
let xScale;
|
| 256 |
+
if (state.metric === 'throughput') {
|
| 257 |
+
const maxTps = d3.max(data, d => Math.max(d.baseTps, d.t0Tps, d.t1Tps));
|
| 258 |
+
xScale = d3.scaleLinear().domain([0, maxTps * 1.08]).range([0, iw]).nice();
|
| 259 |
+
} else {
|
| 260 |
+
const maxSpd = d3.max(data, d => Math.max(d.t0Speedup, d.t1Speedup));
|
| 261 |
+
const minSpd = d3.min(data, d => Math.min(d.t0Speedup, d.t1Speedup));
|
| 262 |
+
xScale = d3.scaleLinear().domain([Math.min(0.85, minSpd - 0.05), Math.max(2.05, maxSpd + 0.1)]).range([0, iw]).nice();
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
// Grid
|
| 266 |
+
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid')
|
| 267 |
+
.call(g => {
|
| 268 |
+
g.selectAll('line').data(xScale.ticks(8), d => d).join('line')
|
| 269 |
+
.attr('x1', d => xScale(d)).attr('x2', d => xScale(d))
|
| 270 |
+
.attr('y1', 0).attr('y2', ih)
|
| 271 |
+
.attr('stroke', 'var(--grid-color)').attr('stroke-width', 1);
|
| 272 |
+
});
|
| 273 |
+
|
| 274 |
+
// X axes (bottom + top)
|
| 275 |
+
const fmt = xTickFormat();
|
| 276 |
+
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axis-x')
|
| 277 |
+
.attr('transform', `translate(0,${ih})`)
|
| 278 |
+
.call(d3.axisBottom(xScale).ticks(8).tickFormat(fmt)).call(styleAxis);
|
| 279 |
+
gRoot.selectAll('.axis-x-top').data([0]).join('g').attr('class', 'axis-x-top')
|
| 280 |
+
.call(d3.axisTop(xScale).ticks(8).tickFormat(fmt)).call(styleAxis);
|
| 281 |
+
|
| 282 |
+
// X axis label
|
| 283 |
+
const xLabel = state.metric === 'throughput' ? 'Tokens per second per GPU' : 'Speedup vs baseline';
|
| 284 |
+
gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label')
|
| 285 |
+
.attr('x', iw / 2).attr('y', ih + margin.bottom - 4)
|
| 286 |
+
.attr('text-anchor', 'middle').attr('fill', 'var(--muted-color)')
|
| 287 |
+
.attr('font-size', 18).text(xLabel);
|
| 288 |
+
|
| 289 |
+
// Y axis (model names)
|
| 290 |
+
gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axis-y')
|
| 291 |
+
.call(g => {
|
| 292 |
+
g.selectAll('text.y-label-text').data(data, d => d.model).join(
|
| 293 |
+
enter => enter.append('text').attr('class', 'y-label-text')
|
| 294 |
+
.attr('x', -8).attr('dy', '0.35em').attr('text-anchor', 'end').style('font-size', '18px').style('font-weight', '600'),
|
| 295 |
+
update => update,
|
| 296 |
+
exit => exit.remove()
|
| 297 |
+
)
|
| 298 |
+
.attr('y', d => yScale(d.model) + bandH / 2)
|
| 299 |
+
.attr('fill', d => familyColor(d.family))
|
| 300 |
+
.text(d => d.model);
|
| 301 |
+
});
|
| 302 |
+
|
| 303 |
+
// Reference line at 1.0x in speedup mode
|
| 304 |
+
gRoot.selectAll('.ref-line').data(state.metric === 'speedup' ? [1.0] : []).join('line')
|
| 305 |
+
.attr('class', 'ref-line')
|
| 306 |
+
.attr('x1', d => xScale(d)).attr('x2', d => xScale(d))
|
| 307 |
+
.attr('y1', 0).attr('y2', ih)
|
| 308 |
+
.attr('stroke', 'var(--text-color)').attr('stroke-width', 1.5)
|
| 309 |
+
.attr('stroke-dasharray', '4,3').attr('opacity', 0.5);
|
| 310 |
+
|
| 311 |
+
// View-specific elements
|
| 312 |
+
if (state.metric === 'throughput') {
|
| 313 |
+
gRoot.selectAll('.speedup-bar').remove();
|
| 314 |
+
renderThroughput(data, xScale, yScale, bandH);
|
| 315 |
+
} else {
|
| 316 |
+
gRoot.selectAll('.conn-line').remove();
|
| 317 |
+
gRoot.selectAll('.dot').remove();
|
| 318 |
+
renderSpeedup(data, xScale, yScale, bandH);
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
// Speedup annotation (shared between both views)
|
| 322 |
+
gRoot.selectAll('.speedup-label').data(data, d => d.model).join('text')
|
| 323 |
+
.attr('class', 'speedup-label')
|
| 324 |
+
.attr('x', iw + 6)
|
| 325 |
+
.attr('y', d => yScale(d.model) + bandH / 2)
|
| 326 |
+
.attr('dy', '0.35em')
|
| 327 |
+
.attr('fill', 'var(--muted-color)')
|
| 328 |
+
.text(d => d.bestSpeedup.toFixed(2) + 'x');
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
function renderThroughput(data, xScale, yScale, bandH) {
|
| 332 |
+
// Connecting lines between the three staggered dots
|
| 333 |
+
const lineGen = d3.line().x(p => p.x).y(p => p.y);
|
| 334 |
+
const connData = data.map(d => {
|
| 335 |
+
const cy = yScale(d.model) + bandH / 2;
|
| 336 |
+
return {
|
| 337 |
+
model: d.model, family: d.family,
|
| 338 |
+
points: TIERS.map((tier, i) => ({
|
| 339 |
+
x: xScale([d.baseTps, d.t0Tps, d.t1Tps][i]),
|
| 340 |
+
y: cy + TIER_Y_OFFSET[tier] * bandH,
|
| 341 |
+
})),
|
| 342 |
+
};
|
| 343 |
+
});
|
| 344 |
+
gRoot.selectAll('.conn-line').data(connData, d => d.model).join('path')
|
| 345 |
+
.attr('class', 'conn-line')
|
| 346 |
+
.attr('d', d => lineGen(d.points))
|
| 347 |
+
.attr('fill', 'none')
|
| 348 |
+
.attr('stroke', d => familyColor(d.family))
|
| 349 |
+
.attr('stroke-width', 1.5).attr('opacity', 0.35);
|
| 350 |
+
|
| 351 |
+
// Dots: 3 per model with vertical stagger
|
| 352 |
+
const dots = [];
|
| 353 |
+
data.forEach(d => {
|
| 354 |
+
const cy = yScale(d.model) + bandH / 2;
|
| 355 |
+
const vals = [d.baseTps, d.t0Tps, d.t1Tps];
|
| 356 |
+
TIERS.forEach((tier, i) => {
|
| 357 |
+
dots.push({ ...d, tier, val: vals[i], cx: xScale(vals[i]), cy: cy + TIER_Y_OFFSET[tier] * bandH });
|
| 358 |
+
});
|
| 359 |
+
});
|
| 360 |
+
|
| 361 |
+
const dotSel = gRoot.selectAll('.dot').data(dots, d => d.model + '-' + d.tier).join('path')
|
| 362 |
+
.attr('class', 'dot')
|
| 363 |
+
.attr('d', d => shapeGenerators[d.tier].size(SHAPE_SIZE)())
|
| 364 |
+
.attr('transform', d => `translate(${d.cx},${d.cy})`)
|
| 365 |
+
.attr('fill', d => familyColor(d.family))
|
| 366 |
+
.attr('stroke', 'none')
|
| 367 |
+
.attr('opacity', 0.9);
|
| 368 |
+
attachTipEvents(dotSel, () => 0.9);
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
function renderSpeedup(data, xScale, yScale, bandH) {
|
| 372 |
+
const barH = bandH * 0.38;
|
| 373 |
+
const barData = [];
|
| 374 |
+
data.forEach(d => {
|
| 375 |
+
const baseY = yScale(d.model);
|
| 376 |
+
barData.push({ ...d, tier: 'Tier 0', val: d.t0Speedup, y: baseY + bandH * 0.12, h: barH });
|
| 377 |
+
barData.push({ ...d, tier: 'Tier 1', val: d.t1Speedup, y: baseY + bandH * 0.5, h: barH });
|
| 378 |
+
});
|
| 379 |
+
|
| 380 |
+
const oneX = xScale(1.0);
|
| 381 |
+
const barSel = gRoot.selectAll('.speedup-bar').data(barData, d => d.model + '-' + d.tier).join('rect')
|
| 382 |
+
.attr('class', 'speedup-bar')
|
| 383 |
+
.attr('x', d => d.val >= 1.0 ? oneX : xScale(d.val))
|
| 384 |
+
.attr('y', d => d.y)
|
| 385 |
+
.attr('width', d => Math.abs(xScale(d.val) - oneX))
|
| 386 |
+
.attr('height', d => d.h)
|
| 387 |
+
.attr('rx', 2)
|
| 388 |
+
.attr('fill', d => familyColor(d.family))
|
| 389 |
+
.attr('opacity', d => d.tier === 'Tier 0' ? 0.9 : 0.55)
|
| 390 |
+
.attr('stroke', d => d.val < 1.0 ? '#e05252' : 'none')
|
| 391 |
+
.attr('stroke-width', d => d.val < 1.0 ? 1 : 0);
|
| 392 |
+
attachTipEvents(barSel, d => d.tier === 'Tier 0' ? 0.9 : 0.55);
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
function buildTooltip(d) {
|
| 396 |
+
const fmt = (v) => v.toLocaleString();
|
| 397 |
+
const spd = (v) => v.toFixed(2) + 'x';
|
| 398 |
+
const cls = (v) => v < 1.0 ? 'tip-regression' : 'tip-val';
|
| 399 |
+
return `<div style="margin-bottom:4px"><strong>${d.model}</strong> <span class="tip-label">(${d.family})</span></div>`
|
| 400 |
+
+ `<div><span class="tip-label">Baseline:</span> <span class="tip-val">${fmt(d.baseTps)}</span> tps/gpu <span class="tip-label">(tp=${d.baseTp})</span></div>`
|
| 401 |
+
+ `<div><span class="tip-label">Tier 0:</span> <span class="${cls(d.t0Speedup)}">${fmt(d.t0Tps)}</span> tps/gpu <span class="${cls(d.t0Speedup)}">${spd(d.t0Speedup)}</span></div>`
|
| 402 |
+
+ `<div style="font-size:10px;color:var(--muted-color);margin-left:8px">${d.t0Params}</div>`
|
| 403 |
+
+ `<div><span class="tip-label">Tier 1:</span> <span class="${cls(d.t1Speedup)}">${fmt(d.t1Tps)}</span> tps/gpu <span class="${cls(d.t1Speedup)}">${spd(d.t1Speedup)}</span></div>`
|
| 404 |
+
+ `<div style="font-size:10px;color:var(--muted-color);margin-left:8px">${d.t1Params}</div>`;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
// ── Controls ──
|
| 408 |
+
function makeSelect(id, label, options, initial, onChange) {
|
| 409 |
+
const group = document.createElement('div'); group.className = 'control-group';
|
| 410 |
+
const lbl = document.createElement('label'); lbl.textContent = label; lbl.setAttribute('for', id);
|
| 411 |
+
const sel = document.createElement('select'); sel.id = id;
|
| 412 |
+
options.forEach(([v, t]) => {
|
| 413 |
+
const o = document.createElement('option'); o.value = v; o.textContent = t; sel.appendChild(o);
|
| 414 |
+
});
|
| 415 |
+
sel.value = initial;
|
| 416 |
+
sel.addEventListener('change', () => onChange(sel.value));
|
| 417 |
+
group.appendChild(lbl); group.appendChild(sel);
|
| 418 |
+
return group;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
const controls = document.createElement('div'); controls.className = 'controls';
|
| 422 |
+
controls.appendChild(makeSelect('metric-sel-optsweep', 'Metric',
|
| 423 |
+
[['throughput', 'Throughput'], ['speedup', 'Speedup']], state.metric,
|
| 424 |
+
v => { state.metric = v; render(); }));
|
| 425 |
+
controls.appendChild(makeSelect('sort-sel-optsweep', 'Sort',
|
| 426 |
+
[['speedup', 'By Best Speedup'], ['baseline', 'By Baseline Throughput'], ['family', 'By Model Family']], state.sort,
|
| 427 |
+
v => { state.sort = v; render(); }));
|
| 428 |
+
|
| 429 |
+
// Legend inline with controls
|
| 430 |
+
const legend = document.createElement('div'); legend.className = 'legend';
|
| 431 |
+
legend.style.display = 'flex'; legend.style.alignItems = 'center'; legend.style.gap = '14px'; legend.style.marginLeft = 'auto'; legend.style.margin = '0 0 0 auto';
|
| 432 |
+
const svgNS = 'http://www.w3.org/2000/svg';
|
| 433 |
+
TIERS.forEach(tier => {
|
| 434 |
+
const item = document.createElement('span'); item.className = 'item';
|
| 435 |
+
const shapeSvg = document.createElementNS(svgNS, 'svg');
|
| 436 |
+
shapeSvg.setAttribute('width', '14'); shapeSvg.setAttribute('height', '14');
|
| 437 |
+
shapeSvg.setAttribute('viewBox', '-8 -8 16 16'); shapeSvg.style.display = 'block';
|
| 438 |
+
const path = document.createElementNS(svgNS, 'path');
|
| 439 |
+
path.setAttribute('d', shapeGenerators[tier].size(SHAPE_SIZE)());
|
| 440 |
+
path.setAttribute('fill', 'var(--text-color)');
|
| 441 |
+
shapeSvg.appendChild(path);
|
| 442 |
+
const swWrap = document.createElement('span'); swWrap.className = 'shape-swatch'; swWrap.appendChild(shapeSvg);
|
| 443 |
+
const txt = document.createElement('span'); txt.textContent = tier;
|
| 444 |
+
item.appendChild(swWrap); item.appendChild(txt); legend.appendChild(item);
|
| 445 |
+
});
|
| 446 |
+
controls.appendChild(legend);
|
| 447 |
+
container.appendChild(controls);
|
| 448 |
+
|
| 449 |
+
// ── Initial render + resize ──
|
| 450 |
+
render();
|
| 451 |
+
if (window.ResizeObserver) {
|
| 452 |
+
new ResizeObserver(() => render()).observe(container);
|
| 453 |
+
} else {
|
| 454 |
+
window.addEventListener('resize', render);
|
| 455 |
+
}
|
| 456 |
+
};
|
| 457 |
+
|
| 458 |
+
if (document.readyState === 'loading') {
|
| 459 |
+
document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
|
| 460 |
+
} else {
|
| 461 |
+
bootstrap();
|
| 462 |
+
}
|
| 463 |
+
})();
|
| 464 |
+
</script>
|
| 465 |
+
</body>
|
| 466 |
+
</html>
|
app/presentation/se2026/data/benchmark-results.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0359f44cbbe97ee8f7ea598152a5053a322a81af818de890606e0daa6c15fd3a
|
| 3 |
+
size 1378100
|
app/presentation/se2026/data/rephrasing_metadata.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cac779aca41bc6f868d99a7c7fcc43343591b40ace727098341d52285c1ff856
|
| 3 |
+
size 152802
|
app/presentation/se2026/index.html
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" data-theme="dark">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>The Synthetic Data Playbook</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap" rel="stylesheet">
|
| 9 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/dist/reveal.css">
|
| 10 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/dist/theme/night.css">
|
| 11 |
+
<link rel="stylesheet" href="style.css">
|
| 12 |
+
</head>
|
| 13 |
+
<body>
|
| 14 |
+
<div class="reveal">
|
| 15 |
+
<div class="slides">
|
| 16 |
+
|
| 17 |
+
<!-- ============================================================ -->
|
| 18 |
+
<!-- SECTION 1: MOTIVATION AND RECAP (~40%, slides 1-8, ~9 min) -->
|
| 19 |
+
<!-- ============================================================ -->
|
| 20 |
+
|
| 21 |
+
<!-- SLIDE 1: Title -->
|
| 22 |
+
<section class="center-slide">
|
| 23 |
+
<div style="margin-top:-160px;">
|
| 24 |
+
<h2>The Synthetic Data Playbook</h2>
|
| 25 |
+
<br>
|
| 26 |
+
<h3>How to Cook Better Training Data for LLMs</h3>
|
| 27 |
+
<br>
|
| 28 |
+
<p style="margin-top:20px;font-size:0.8em;color:rgba(255,255,255,1);">
|
| 29 |
+
SE 26
|
| 30 |
+
</p>
|
| 31 |
+
</div>
|
| 32 |
+
<img src="assets/bern-skyline.png" style="position:absolute;bottom:0;left:50%;transform:translateX(-50%);width:100%;height:auto;opacity:0.6;pointer-events:none;">
|
| 33 |
+
<aside class="notes">
|
| 34 |
+
~30s. Welcome, introduce yourself. "Today I'll show you how we made LLMs better
|
| 35 |
+
by rewriting their training data instead of just filtering it."
|
| 36 |
+
</aside>
|
| 37 |
+
</section>
|
| 38 |
+
|
| 39 |
+
<!-- SLIDE 2: Digital Sovereignty -->
|
| 40 |
+
<section>
|
| 41 |
+
<p class="section-label">Why This Matters</p>
|
| 42 |
+
<h2>The Data Black Box</h2>
|
| 43 |
+
<div style="font-size:0.65em;margin-top:20px;">
|
| 44 |
+
<p>Frontier labs (OpenAI, Google, Anthropic) don't disclose how they build their training data.</p>
|
| 45 |
+
<p class="fragment">Neither do the Chinese labs (DeepSeek or Qwen).</p>
|
| 46 |
+
<p class="fragment" style="margin-top:20px;">
|
| 47 |
+
Training data is the <span class="highlight">most important ingredient</span> in building an LLM,
|
| 48 |
+
yet the recipes are kept secret.
|
| 49 |
+
</p>
|
| 50 |
+
<div class="fragment" style="margin-top:30px;background:rgba(255,255,255,0.04);border:1px solid rgba(255,255,255,0.1);border-radius:16px;padding:24px;">
|
| 51 |
+
<p style="font-weight:700;color:#f0c674;margin-bottom:8px;font-size:1.1em;">Digital Sovereignty</p>
|
| 52 |
+
<p>If you can't build the data, you can't build the model.<br>
|
| 53 |
+
If you can't build the model, you depend on those who can.</p>
|
| 54 |
+
<p style="margin-top:12px;">This work puts the knowledge <span class="accent">out in the open</span> for everyone:
|
| 55 |
+
governments, universities, startups, and individuals.</p>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
<aside class="notes">
|
| 59 |
+
~1 min. "Before we dive in, let me explain why this matters beyond the technical.
|
| 60 |
+
None of the frontier labs, not OpenAI, not Google, not Anthropic, and not the Chinese labs either,
|
| 61 |
+
tell you how they build their training data. It's the most important ingredient and it's a black box.
|
| 62 |
+
This is a digital sovereignty issue. If you can't build the data yourself,
|
| 63 |
+
you can't build the model, and you're dependent on whoever can.
|
| 64 |
+
Our work makes this knowledge open and accessible to everyone."
|
| 65 |
+
</aside>
|
| 66 |
+
</section>
|
| 67 |
+
|
| 68 |
+
<!-- SLIDE 3: LLMs and Pretraining Recap -->
|
| 69 |
+
<section>
|
| 70 |
+
<p class="section-label">Quick Recap</p>
|
| 71 |
+
<h2>LLMs: What's Under the Hood</h2>
|
| 72 |
+
<div class="two-col" style="font-size:0.65em;margin-top:20px;">
|
| 73 |
+
<div class="col">
|
| 74 |
+
<p>You use these every day: ChatGPT, Copilot, Claude.</p>
|
| 75 |
+
<p class="fragment">Under the hood: a giant function that takes <span class="accent">tokens in</span> and predicts <span class="accent">tokens out</span>.</p>
|
| 76 |
+
<p class="fragment">Trained by reading <span class="highlight">billions of web pages</span>, learning to predict the next word.</p>
|
| 77 |
+
<p class="fragment" style="margin-top:20px;font-weight:700;color:#f0c674;">
|
| 78 |
+
Data quality defines model quality.
|
| 79 |
+
</p>
|
| 80 |
+
</div>
|
| 81 |
+
<div class="col fragment" style="text-align:center;">
|
| 82 |
+
<div style="background:rgba(255,255,255,0.04);border:1px solid rgba(255,255,255,0.1);border-radius:16px;padding:30px 20px;">
|
| 83 |
+
<div style="font-size:0.9em;color:rgba(255,255,255,0.5);">Input text</div>
|
| 84 |
+
<div style="font-size:2em;margin:10px 0;">↓</div>
|
| 85 |
+
<div style="background:rgba(124,111,247,0.15);border:1px solid rgba(124,111,247,0.3);border-radius:12px;padding:16px;font-weight:700;font-size:1.1em;">
|
| 86 |
+
LLM<br><span style="font-size:0.6em;font-weight:400;color:rgba(255,255,255,0.4);">billions of parameters</span>
|
| 87 |
+
</div>
|
| 88 |
+
<div style="font-size:2em;margin:10px 0;">↓</div>
|
| 89 |
+
<div style="font-size:0.9em;color:rgba(255,255,255,0.5);">Output text</div>
|
| 90 |
+
</div>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
<aside class="notes">
|
| 94 |
+
~1.5 min. "Quick recap so we have shared vocabulary." Click through fragments.
|
| 95 |
+
Emphasize: model quality = data quality. Like training a code model on all of GitHub.
|
| 96 |
+
</aside>
|
| 97 |
+
</section>
|
| 98 |
+
|
| 99 |
+
<!-- SLIDE 4: The Data Quality Problem -->
|
| 100 |
+
<section>
|
| 101 |
+
<p class="section-label">The Problem</p>
|
| 102 |
+
<h2>You Start With the Entire Internet...</h2>
|
| 103 |
+
<h3 class="fragment">...and throw away 98.6% of it</h3>
|
| 104 |
+
<div class="fragment">
|
| 105 |
+
<img src="assets/dclm-filtering-pipeline.png" class="img-contain" style="margin-top:10px;max-height:400px;">
|
| 106 |
+
<p style="font-size:0.45em;color:rgba(255,255,255,0.3);margin-top:8px;">
|
| 107 |
+
DCLM: 240T tokens from Common Crawl → 1.4% survives as DCLM-Baseline
|
| 108 |
+
</p>
|
| 109 |
+
</div>
|
| 110 |
+
<aside class="notes">
|
| 111 |
+
~1 min. "This is the DCLM dataset pipeline. You scrape the whole internet, 240 trillion tokens.
|
| 112 |
+
Then heuristic filters, deduplication, model-based filtering. Only 1.4% of documents survive.
|
| 113 |
+
All this engineering just to clean the data. What if there was a better way?"
|
| 114 |
+
</aside>
|
| 115 |
+
</section>
|
| 116 |
+
|
| 117 |
+
<!-- SLIDE 5: Synthetic Data -->
|
| 118 |
+
<section>
|
| 119 |
+
<p class="section-label">The Idea</p>
|
| 120 |
+
<h2>Rewrite Instead of Filter</h2>
|
| 121 |
+
<div class="before-after">
|
| 122 |
+
<div class="panel bad">
|
| 123 |
+
<div class="panel-title">Raw Web Text</div>
|
| 124 |
+
<p style="margin:0;line-height:1.6;">
|
| 125 |
+
<span style="color:rgba(255,255,255,0.3);">★★★ BeSt DeAls!!!</span><br>
|
| 126 |
+
Photosynthesis is the process by wich plants convert sunlit into energy.
|
| 127 |
+
It occurs in the chloroplasts<br>
|
| 128 |
+
<span style="color:rgba(255,255,255,0.3);">Click here for more → → →</span><br>
|
| 129 |
+
<span style="color:rgba(255,255,255,0.3);">© 2019 AllScienceInfo.biz</span><br>
|
| 130 |
+
Carbon dioxide and water are transformed into glucose and oxygen...
|
| 131 |
+
<span style="color:rgba(255,255,255,0.3);">[AD] [AD] [POPUP]</span>
|
| 132 |
+
</p>
|
| 133 |
+
</div>
|
| 134 |
+
<div class="arrow fragment" data-fragment-index="0">→</div>
|
| 135 |
+
<div class="panel good fragment" data-fragment-index="0">
|
| 136 |
+
<div class="panel-title">LLM-Rewritten FAQ</div>
|
| 137 |
+
<p style="margin:0;line-height:1.6;">
|
| 138 |
+
<strong>Q: What is photosynthesis?</strong><br>
|
| 139 |
+
A: Photosynthesis is the process by which plants convert sunlight into chemical energy.
|
| 140 |
+
It occurs in organelles called chloroplasts.<br><br>
|
| 141 |
+
<strong>Q: What are the inputs and outputs?</strong><br>
|
| 142 |
+
A: Plants take in carbon dioxide (CO₂) and water (H₂O), and using light energy,
|
| 143 |
+
produce glucose (C₆H₁₂O₆) and oxygen (O₂).
|
| 144 |
+
</p>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
<p class="fragment" style="font-size:0.55em;margin-top:16px;">
|
| 148 |
+
Same knowledge, better packaging.<br>
|
| 149 |
+
You keep <span class="highlight">100%</span> of your data instead of discarding 90%.
|
| 150 |
+
</p>
|
| 151 |
+
<aside class="notes">
|
| 152 |
+
~1.5 min. Walk through the before/after. Left: messy web text with spam, typos, ads, broken formatting.
|
| 153 |
+
Right: same knowledge, but restructured as a clean FAQ. The LLM acts as a rewriter.
|
| 154 |
+
Key insight: you preserve the knowledge, you just improve the presentation. No data wasted.
|
| 155 |
+
</aside>
|
| 156 |
+
</section>
|
| 157 |
+
|
| 158 |
+
<!-- SLIDE 6: Research Question -->
|
| 159 |
+
<section>
|
| 160 |
+
<p class="section-label">Our Research</p>
|
| 161 |
+
<h2>What's the Best Recipe?</h2>
|
| 162 |
+
<p style="font-size:0.6em;color:rgba(255,255,255,0.6);margin-bottom:12px;">
|
| 163 |
+
Three knobs to tune: <span class="accent">source data</span>, <span class="accent">prompt strategy</span>, and
|
| 164 |
+
<span class="accent">generator model</span>.
|
| 165 |
+
</p>
|
| 166 |
+
<div style="display:flex;align-items:center;gap:24px;">
|
| 167 |
+
<iframe src="charts/experiment-flow.html" style="flex:0 1 75%;height:540px;border:none;border-radius:8px;background:transparent;" loading="lazy"></iframe>
|
| 168 |
+
<div class="fragment" style="display:flex;flex-direction:column;gap:24px;min-width:120px;text-align:center;align-self:flex-start;margin-top:40px;padding-left:40px;">
|
| 169 |
+
<div class="stat-box"><div class="num" style="font-size:1.6em;">70+</div><div class="label">experiments</div></div>
|
| 170 |
+
<div class="stat-box"><div class="num" style="font-size:1.6em;">1T+</div><div class="label">tokens generated</div></div>
|
| 171 |
+
<div class="stat-box"><div class="num" style="font-size:1.6em;">60k+</div><div class="label">GPU hours</div></div>
|
| 172 |
+
</div>
|
| 173 |
+
</div>
|
| 174 |
+
<aside class="notes">
|
| 175 |
+
~1 min. "We ran a massive ablation study. Three axes: what prompt do you give the rewriter,
|
| 176 |
+
which model does the rewriting, and what source data do you start from.
|
| 177 |
+
This Sankey shows our 70+ experiments flowing from source → prompt → model.
|
| 178 |
+
Over 1 trillion tokens generated, 100k GPU hours."
|
| 179 |
+
</aside>
|
| 180 |
+
</section>
|
| 181 |
+
|
| 182 |
+
<!-- SLIDE 7: How We Evaluate -->
|
| 183 |
+
<section>
|
| 184 |
+
<p class="section-label">Methodology</p>
|
| 185 |
+
<h2>Our Integration Test Suite</h2>
|
| 186 |
+
<div style="font-size:0.7em;margin-top:30px;">
|
| 187 |
+
<p style="color:rgba(255,255,255,0.5);">For each experiment, we:</p>
|
| 188 |
+
<ul>
|
| 189 |
+
<li class="fragment">Train a <span class="accent">1.2B parameter</span> model from scratch</li>
|
| 190 |
+
<li class="fragment">Feed it <span class="accent">20B tokens</span> of synthetic and original data</li>
|
| 191 |
+
<li class="fragment">Test on <span class="accent">12 benchmarks</span> (reading, math, reasoning, knowledge...)</li>
|
| 192 |
+
<li class="fragment">Compare against curated web datasets as baselines</li>
|
| 193 |
+
</ul>
|
| 194 |
+
<p class="fragment" style="margin-top:16px;font-size:0.9em;color:rgba(255,255,255,0.6);">
|
| 195 |
+
This is expensive so we tried proxies:
|
| 196 |
+
</p>
|
| 197 |
+
<ul class="fragment" style="font-size:0.85em;margin-top:4px;">
|
| 198 |
+
<li>DCLM/Edu scores (used for filtering pretraining data)</li>
|
| 199 |
+
<li>Smaller training runs</li>
|
| 200 |
+
</ul>
|
| 201 |
+
<p class="fragment" style="margin-top:4px;font-size:0.9em;">
|
| 202 |
+
None correlated well enough.
|
| 203 |
+
</p>
|
| 204 |
+
<p class="fragment" style="margin-top:10px;color:#f0c674;font-weight:600;">
|
| 205 |
+
No shortcuts: you must train and evaluate to know if your data is good.
|
| 206 |
+
</p>
|
| 207 |
+
</div>
|
| 208 |
+
<aside class="notes">
|
| 209 |
+
~1 min. "Think of it like an integration test suite for data quality.
|
| 210 |
+
We train a model on each dataset variant and see how it scores.
|
| 211 |
+
12 benchmarks covering reading comprehension, math, general knowledge, reasoning.
|
| 212 |
+
65 separate training runs. No proxy metric can replace this."
|
| 213 |
+
</aside>
|
| 214 |
+
</section>
|
| 215 |
+
|
| 216 |
+
<!-- SLIDE 8: Spoiler -->
|
| 217 |
+
<section>
|
| 218 |
+
<p class="section-label">Spoiler</p>
|
| 219 |
+
<h2>FinePhrase Wins</h2>
|
| 220 |
+
<p style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-bottom:10px;">
|
| 221 |
+
Our best synthetic recipe outperforms all tested baselines, including curated web data.
|
| 222 |
+
</p>
|
| 223 |
+
<iframe src="charts/benchmark.html" class="chart-frame" style="height:360px;" loading="lazy"></iframe>
|
| 224 |
+
<p class="fragment" style="font-size:0.6em;margin-top:10px;color:rgba(255,255,255,0.6);">
|
| 225 |
+
Let's unpack <span class="accent">how</span>.
|
| 226 |
+
</p>
|
| 227 |
+
<aside class="notes">
|
| 228 |
+
~1 min. "Here's the punchline up front. FinePhrase, our best configuration,
|
| 229 |
+
beats all baselines including DCLM, Nemotron, REWIRE, and Cosmopedia.
|
| 230 |
+
Let's unpack the three key findings that got us here."
|
| 231 |
+
Transition to Section 2.
|
| 232 |
+
</aside>
|
| 233 |
+
</section>
|
| 234 |
+
|
| 235 |
+
<!-- ============================================================ -->
|
| 236 |
+
<!-- SECTION 2: EXPERIMENTAL RESULTS (~20%, slides 9-12, ~4 min) -->
|
| 237 |
+
<!-- ============================================================ -->
|
| 238 |
+
|
| 239 |
+
<!-- SLIDE 9: Prompts Matter Most -->
|
| 240 |
+
<section>
|
| 241 |
+
<p class="section-label">Finding #1</p>
|
| 242 |
+
<h2>Prompt Design Is the #1 Lever</h2>
|
| 243 |
+
<div class="two-col" style="font-size:0.6em;grid-template-columns:1.5fr 1fr;gap:20px;">
|
| 244 |
+
<div class="col" style="text-align:center;">
|
| 245 |
+
<iframe src="charts/benchmark-prompts.html" class="chart-frame" loading="lazy"
|
| 246 |
+
style="height:480px;" id="prompts-chart"></iframe>
|
| 247 |
+
</div>
|
| 248 |
+
<div class="col">
|
| 249 |
+
<p>Structured prompts beat everything:</p>
|
| 250 |
+
<ul>
|
| 251 |
+
<li class="fragment"><span class="highlight">Math</span> reformatting</li>
|
| 252 |
+
<li class="fragment"><span class="highlight">Table</span> extraction</li>
|
| 253 |
+
<li class="fragment"><span class="highlight">FAQ</span> generation</li>
|
| 254 |
+
<li class="fragment"><span class="highlight">Tutorial</span> rewriting</li>
|
| 255 |
+
</ul>
|
| 256 |
+
<p class="fragment" style="margin-top:20px;">
|
| 257 |
+
These beat curated web data <em>and</em> all prior synthetic baselines.
|
| 258 |
+
</p>
|
| 259 |
+
<p class="fragment" style="color:#f0c674;font-weight:600;margin-top:10px;">
|
| 260 |
+
The prompt matters more than the model or the source data.
|
| 261 |
+
</p>
|
| 262 |
+
</div>
|
| 263 |
+
</div>
|
| 264 |
+
<aside class="notes">
|
| 265 |
+
~1 min. "Finding number one, and the most important: prompt design is the biggest lever.
|
| 266 |
+
Structured formats like Math, Table, FAQ, Tutorial consistently outperform
|
| 267 |
+
both curated web data and prior synthetic approaches.
|
| 268 |
+
The prompt matters more than which model you use or what source data you start from."
|
| 269 |
+
</aside>
|
| 270 |
+
</section>
|
| 271 |
+
|
| 272 |
+
<!-- SLIDE 10: Smol Models Are Enough -->
|
| 273 |
+
<section>
|
| 274 |
+
<p class="section-label">Finding #2</p>
|
| 275 |
+
<h2>Smol Models Are Enough</h2>
|
| 276 |
+
<div class="two-col" style="font-size:0.6em;grid-template-columns:1.6fr 1fr;gap:16px;">
|
| 277 |
+
<div class="col" style="text-align:center;">
|
| 278 |
+
<iframe src="charts/benchmark-family.html" class="chart-frame" loading="lazy"
|
| 279 |
+
style="height:440px;"></iframe>
|
| 280 |
+
</div>
|
| 281 |
+
<div class="col">
|
| 282 |
+
<p>1B matches 4B, 12B, and 27B model performance.</p>
|
| 283 |
+
<p class="fragment"><span class="accent">SmolLM2-1.7B</span> beats Qwen, Gemma, Llama, Falcon, and Granite.</p>
|
| 284 |
+
<p class="fragment" style="margin-top:20px;">And it's <em>much</em> faster:</p>
|
| 285 |
+
<ul>
|
| 286 |
+
<li class="fragment"><span class="highlight">3.0x</span> faster than Gemma-3-12B<br><span style="color:rgba(255,255,255,0.4);">(9,220 vs 3,046 tps/gpu)</span></li>
|
| 287 |
+
<li class="fragment"><span class="highlight">5.3x</span> faster than Gemma-3-27B<br><span style="color:rgba(255,255,255,0.4);">(9,220 vs 1,724 tps/gpu)</span></li>
|
| 288 |
+
</ul>
|
| 289 |
+
<p class="fragment" style="color:#f0c674;font-weight:600;margin-top:20px;">
|
| 290 |
+
Better quality <em>and</em> faster inference.
|
| 291 |
+
</p>
|
| 292 |
+
</div>
|
| 293 |
+
</div>
|
| 294 |
+
<aside class="notes">
|
| 295 |
+
~1 min. "Finding two: you don't need a big model.
|
| 296 |
+
1B parameters match 4B, 12B, even 27B for rephrasing quality.
|
| 297 |
+
SmolLM2 at 1.7B beats all other model families.
|
| 298 |
+
And it's 3x faster than Gemma-12B, 5.3x faster than Gemma-27B.
|
| 299 |
+
Better quality AND faster inference. You don't need a big model."
|
| 300 |
+
</aside>
|
| 301 |
+
</section>
|
| 302 |
+
|
| 303 |
+
<!-- SLIDE 11: Diversity Paradox -->
|
| 304 |
+
<section>
|
| 305 |
+
<p class="section-label">Finding #3</p>
|
| 306 |
+
<h2>Diversity Beats Consistency</h2>
|
| 307 |
+
<div style="font-size:0.65em;">
|
| 308 |
+
<div class="two-col">
|
| 309 |
+
<div class="col">
|
| 310 |
+
<p><span class="highlight">Messy beats polished.</span></p>
|
| 311 |
+
<p class="fragment" data-fragment-index="1">SmolLM2's varied, inconsistent outputs outperform
|
| 312 |
+
Qwen3's template-locked, clean outputs.</p>
|
| 313 |
+
<p class="fragment" data-fragment-index="3" style="margin-top:20px;">
|
| 314 |
+
<span class="accent">Synthetic-only fails.</span><br>
|
| 315 |
+
You must mix synthetic data with original web data.
|
| 316 |
+
</p>
|
| 317 |
+
<p class="fragment" data-fragment-index="4" style="margin-top:20px;">
|
| 318 |
+
The mix-in dataset matters as much as the synthetic data itself.
|
| 319 |
+
</p>
|
| 320 |
+
</div>
|
| 321 |
+
<div class="col fragment" data-fragment-index="2">
|
| 322 |
+
<div style="background:rgba(255,255,255,0.04);border:1px solid rgba(255,255,255,0.08);border-radius:16px;padding:24px;">
|
| 323 |
+
<div style="font-size:1em;font-weight:700;margin-bottom:12px;">Template Collapse</div>
|
| 324 |
+
<div style="font-size:0.9em;line-height:1.6;">
|
| 325 |
+
<p style="color:rgba(255,255,255,0.5);">Qwen3 Math outputs:</p>
|
| 326 |
+
<p><span class="danger">115 / 1000</span> samples start with the exact same sentence</p>
|
| 327 |
+
<p style="margin-top:12px;color:rgba(255,255,255,0.5);">SmolLM2 Math outputs:</p>
|
| 328 |
+
<p><span class="accent">Highly varied</span> formatting and structure</p>
|
| 329 |
+
<p style="margin-top:16px;color:#f0c674;font-weight:600;">
|
| 330 |
+
Diversity beats consistency for pretraining.
|
| 331 |
+
</p>
|
| 332 |
+
</div>
|
| 333 |
+
</div>
|
| 334 |
+
</div>
|
| 335 |
+
</div>
|
| 336 |
+
</div>
|
| 337 |
+
<aside class="notes">
|
| 338 |
+
~1 min. "Finding three: diversity beats polish. This was counterintuitive.
|
| 339 |
+
Qwen3's math outputs are very clean and consistent, but 115 out of 1000 start identically.
|
| 340 |
+
SmolLM2's outputs are messier but more varied. The varied outputs win.
|
| 341 |
+
Also: synthetic-only training fails. You need to mix in original data.
|
| 342 |
+
The mix-in dataset influence is sometimes larger than the synthetic data itself."
|
| 343 |
+
</aside>
|
| 344 |
+
</section>
|
| 345 |
+
|
| 346 |
+
<!-- SLIDE 12: Results Summary -->
|
| 347 |
+
<section>
|
| 348 |
+
<p class="section-label">Summary</p>
|
| 349 |
+
<h2>What We Found</h2>
|
| 350 |
+
<ul class="takeaway-list" style="margin-top:30px;">
|
| 351 |
+
<li class="fragment">
|
| 352 |
+
<span class="accent">Prompt design</span> is the #1 lever.
|
| 353 |
+
Structured formats (Math, Table, FAQ, Tutorial) outperform everything.
|
| 354 |
+
</li>
|
| 355 |
+
<li class="fragment">
|
| 356 |
+
<span class="accent">1B models suffice.</span>
|
| 357 |
+
SmolLM2-1.7B is the best rephraser across the board.
|
| 358 |
+
</li>
|
| 359 |
+
<li class="fragment">
|
| 360 |
+
<span class="accent">Mix original data in.</span>
|
| 361 |
+
Synthetic-only fails. The mix-in dataset matters.
|
| 362 |
+
</li>
|
| 363 |
+
<li class="fragment">
|
| 364 |
+
<span class="accent">Diversity wins over polish.</span>
|
| 365 |
+
Varied, messy outputs beat clean, template-locked ones.
|
| 366 |
+
</li>
|
| 367 |
+
</ul>
|
| 368 |
+
<aside class="notes">
|
| 369 |
+
~30s. Quick recap of findings. Click through each point. These four bullets
|
| 370 |
+
are the core message of the talk. Transition: "Now let's talk about
|
| 371 |
+
the engineering challenge of actually doing this at scale."
|
| 372 |
+
</aside>
|
| 373 |
+
</section>
|
| 374 |
+
|
| 375 |
+
<!-- ============================================================ -->
|
| 376 |
+
<!-- SECTION 3: INFRASTRUCTURE (~20%, slides 13-16, ~4 min) -->
|
| 377 |
+
<!-- ============================================================ -->
|
| 378 |
+
|
| 379 |
+
<!-- SLIDE 13: Engineering Challenge -->
|
| 380 |
+
<section>
|
| 381 |
+
<p class="section-label">Infrastructure</p>
|
| 382 |
+
<h2>How Do You Rephrase 1T Tokens?</h2>
|
| 383 |
+
<div style="font-size:0.65em;margin-top:30px;">
|
| 384 |
+
<p>Each experiment generates ~15B tokens.</p>
|
| 385 |
+
<p class="fragment">70+ experiments = <span class="accent">1T+ tokens</span> of LLM output.</p>
|
| 386 |
+
<p class="fragment" style="margin-top:20px;">
|
| 387 |
+
At ~4,750 tokens/sec/GPU (mean across all experiments):
|
| 388 |
+
</p>
|
| 389 |
+
<div class="fragment stat-row" style="margin-top:20px;">
|
| 390 |
+
<div class="stat-box"><div class="num">~880</div><div class="label">GPU-hours per experiment</div></div>
|
| 391 |
+
<div class="stat-box"><div class="num">~$3k</div><div class="label">cloud cost per experiment</div></div>
|
| 392 |
+
<div class="stat-box"><div class="num">~$215k</div><div class="label">total compute budget</div></div>
|
| 393 |
+
</div>
|
| 394 |
+
<p class="fragment" style="margin-top:20px;color:#f0c674;font-weight:600;">
|
| 395 |
+
You need a scalable, fault-tolerant pipeline.
|
| 396 |
+
</p>
|
| 397 |
+
</div>
|
| 398 |
+
<aside class="notes">
|
| 399 |
+
~1 min. "Now the engineering side. Each experiment is 15 billion tokens of LLM generation.
|
| 400 |
+
70+ experiments. That's over a trillion tokens total. At $3.50/GPU-hour,
|
| 401 |
+
each experiment costs about $7,000. You need infrastructure that handles failures,
|
| 402 |
+
checkpoints, and scales across many nodes."
|
| 403 |
+
</aside>
|
| 404 |
+
</section>
|
| 405 |
+
|
| 406 |
+
<!-- SLIDE 14: DataTrove + vLLM -->
|
| 407 |
+
<section>
|
| 408 |
+
<p class="section-label">Infrastructure</p>
|
| 409 |
+
<h2>DataTrove + vLLM</h2>
|
| 410 |
+
<iframe src="charts/pipeline.html" class="chart-frame" loading="lazy" style="height:440px;margin-bottom:0;"></iframe>
|
| 411 |
+
<div style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-top:2px;">
|
| 412 |
+
DataTrove orchestrates the pipeline. vLLM serves the model with optimized batching and prefix caching.
|
| 413 |
+
</div>
|
| 414 |
+
<aside class="notes">
|
| 415 |
+
~1 min. "We built on DataTrove, our open-source data processing library.
|
| 416 |
+
The pipeline is Read → Transform → Write. The Transform step calls vLLM,
|
| 417 |
+
a high-throughput inference engine with tensor parallelism, chunked prefill, and prefix caching.
|
| 418 |
+
Everything runs on Slurm with checkpointing and auto-recovery.
|
| 419 |
+
Outputs go straight to a Hugging Face dataset with auto-generated cards."
|
| 420 |
+
</aside>
|
| 421 |
+
</section>
|
| 422 |
+
|
| 423 |
+
<!-- SLIDE 15: Throughput Optimization -->
|
| 424 |
+
<section>
|
| 425 |
+
<p class="section-label">Infrastructure</p>
|
| 426 |
+
<h2>Throughput Optimization</h2>
|
| 427 |
+
<p style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-bottom:8px;">
|
| 428 |
+
18 models benchmarked on H100 GPUs. Two tiers of optimization.
|
| 429 |
+
</p>
|
| 430 |
+
<iframe src="charts/throughput.html" class="chart-frame" loading="lazy" style="height:420px;"></iframe>
|
| 431 |
+
<aside class="notes">
|
| 432 |
+
~1 min. "We benchmarked 18 models across two tiers of optimization.
|
| 433 |
+
Tier 0: tensor parallelism, batch sizes, sequence lengths. Tier 1: GPU memory utilization, speculative decoding.
|
| 434 |
+
For large MoE models like GPT-OSS-120B, Tier 0 alone gives 1.95x speedup, cutting cost by nearly half.
|
| 435 |
+
Speculative decoding helps small models but can hurt others (Gemma 3 regresses due to vocab size)."
|
| 436 |
+
</aside>
|
| 437 |
+
</section>
|
| 438 |
+
|
| 439 |
+
<!-- SLIDE 16: Cost-Performance -->
|
| 440 |
+
<section>
|
| 441 |
+
<p class="section-label">Infrastructure</p>
|
| 442 |
+
<h2>Cost vs. Performance</h2>
|
| 443 |
+
<p style="font-size:0.55em;color:rgba(255,255,255,0.5);margin-bottom:8px;">
|
| 444 |
+
Small models + good prompts dominate the Pareto frontier.
|
| 445 |
+
</p>
|
| 446 |
+
<iframe src="charts/cost-efficiency.html" class="chart-frame" loading="lazy" style="height:420px;"></iframe>
|
| 447 |
+
<p class="fragment" style="font-size:0.6em;margin-top:4px;color:#f0c674;font-weight:600;">
|
| 448 |
+
Invest in prompt design, not model size.
|
| 449 |
+
</p>
|
| 450 |
+
<aside class="notes">
|
| 451 |
+
~1 min. "This scatter plot shows GPU time vs downstream performance for all experiments.
|
| 452 |
+
The Pareto frontier is dominated by small models with structured prompts.
|
| 453 |
+
The baselines on the left have zero rephrasing cost. Our best synthetic setups
|
| 454 |
+
beat them while remaining cost-efficient. Key takeaway: optimize throughput first,
|
| 455 |
+
then worry about model size."
|
| 456 |
+
</aside>
|
| 457 |
+
</section>
|
| 458 |
+
|
| 459 |
+
<!-- ============================================================ -->
|
| 460 |
+
<!-- SECTION 4: CONCLUSIONS (~20%, slides 17-21, ~4 min) -->
|
| 461 |
+
<!-- ============================================================ -->
|
| 462 |
+
|
| 463 |
+
<!-- SLIDE 17: The FinePhrase Recipe -->
|
| 464 |
+
<section>
|
| 465 |
+
<p class="section-label">Conclusion</p>
|
| 466 |
+
<h2>The FinePhrase Recipe</h2>
|
| 467 |
+
<div class="recipe-diagram fragment">
|
| 468 |
+
<div class="box">
|
| 469 |
+
<div style="font-size:1.4em;">📄</div>
|
| 470 |
+
<div style="font-weight:700;">Source Data</div>
|
| 471 |
+
<div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Web text<br>(even low-quality)</div>
|
| 472 |
+
</div>
|
| 473 |
+
<div class="plus">+</div>
|
| 474 |
+
<div class="box">
|
| 475 |
+
<div style="font-size:1.4em;">📝</div>
|
| 476 |
+
<div style="font-weight:700;">Structured Prompt</div>
|
| 477 |
+
<div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Math / Table /<br>FAQ / Tutorial</div>
|
| 478 |
+
</div>
|
| 479 |
+
<div class="plus">+</div>
|
| 480 |
+
<div class="box">
|
| 481 |
+
<div style="font-size:1.4em;">🤖</div>
|
| 482 |
+
<div style="font-weight:700;">SmolLM2-1.7B</div>
|
| 483 |
+
<div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Small, fast,<br>diverse outputs</div>
|
| 484 |
+
</div>
|
| 485 |
+
<div class="equals">=</div>
|
| 486 |
+
<div class="box result">
|
| 487 |
+
<div style="font-size:1.4em;">✨</div>
|
| 488 |
+
<div style="font-weight:700;color:#7c6ff7;">FinePhrase</div>
|
| 489 |
+
<div style="font-size:0.85em;color:rgba(255,255,255,0.4);">Best synthetic<br>pretraining data</div>
|
| 490 |
+
</div>
|
| 491 |
+
</div>
|
| 492 |
+
<p class="fragment" style="font-size:0.6em;color:rgba(255,255,255,0.5);margin-top:20px;">
|
| 493 |
+
Mixed with high-quality original data (e.g., FineWeb-Edu) for best results.
|
| 494 |
+
</p>
|
| 495 |
+
<aside class="notes">
|
| 496 |
+
~1 min. "Here's the recipe in one slide. Take any web text, even low-quality,
|
| 497 |
+
apply a structured prompt (Math, Table, FAQ, Tutorial), run it through SmolLM2-1.7B,
|
| 498 |
+
and mix the output with high-quality original data. That's FinePhrase.
|
| 499 |
+
It outperforms all tested baselines."
|
| 500 |
+
</aside>
|
| 501 |
+
</section>
|
| 502 |
+
|
| 503 |
+
<!-- SLIDE 18: What Surprised Us -->
|
| 504 |
+
<section>
|
| 505 |
+
<p class="section-label">Conclusion</p>
|
| 506 |
+
<h2>What Surprised Us</h2>
|
| 507 |
+
<div class="surprise-grid fragment">
|
| 508 |
+
<div class="surprise-card">
|
| 509 |
+
<div class="icon">🤷</div>
|
| 510 |
+
<h4>Typos Don't Matter</h4>
|
| 511 |
+
<p>REWIRE's original prompt had typos. Fixing them made no measurable difference to downstream performance.</p>
|
| 512 |
+
</div>
|
| 513 |
+
<div class="surprise-card">
|
| 514 |
+
<div class="icon">📊</div>
|
| 515 |
+
<h4>Proxy Scores Lie</h4>
|
| 516 |
+
<p>Edu-score and DCLM-score do not reliably predict downstream performance. You must train and evaluate.</p>
|
| 517 |
+
</div>
|
| 518 |
+
<div class="surprise-card">
|
| 519 |
+
<div class="icon">🎲</div>
|
| 520 |
+
<h4>Messier Is Better</h4>
|
| 521 |
+
<p>Varied, inconsistent outputs from SmolLM2 beat Qwen3's polished, template-locked outputs every time.</p>
|
| 522 |
+
</div>
|
| 523 |
+
</div>
|
| 524 |
+
<aside class="notes">
|
| 525 |
+
~1 min. "Three things that surprised us. First: typos in prompts don't matter.
|
| 526 |
+
REWIRE's prompt had actual typos and fixing them changed nothing.
|
| 527 |
+
Second: quality proxy scores like edu-score don't predict performance. You must train.
|
| 528 |
+
Third: messy, varied outputs consistently beat clean, polished ones. Diversity is king."
|
| 529 |
+
</aside>
|
| 530 |
+
</section>
|
| 531 |
+
|
| 532 |
+
<!-- SLIDE 19: Everything Is Open -->
|
| 533 |
+
<section>
|
| 534 |
+
<p class="section-label">Open Source</p>
|
| 535 |
+
<h2>Everything Is Open</h2>
|
| 536 |
+
<div style="font-size:0.65em;margin-top:20px;">
|
| 537 |
+
<ul>
|
| 538 |
+
<li class="fragment">All prompts, configs, and pipeline code</li>
|
| 539 |
+
<li class="fragment">Generated datasets on the Hugging Face Hub</li>
|
| 540 |
+
<li class="fragment">Throughput benchmarks for 18 models</li>
|
| 541 |
+
<li class="fragment">Blog post with interactive charts</li>
|
| 542 |
+
</ul>
|
| 543 |
+
<div class="fragment" style="margin-top:30px;">
|
| 544 |
+
<p style="font-weight:700;color:#f0c674;font-size:1.1em;">Future directions:</p>
|
| 545 |
+
<ul style="color:rgba(255,255,255,0.5);">
|
| 546 |
+
<li>Diffusion LMs for faster inference</li>
|
| 547 |
+
<li>Scaling to more data (ablations trained on only 21B tokens)</li>
|
| 548 |
+
<li>Mixing ratio: how little synthetic data can you get away with?</li>
|
| 549 |
+
<li>Best-of-N filtering on synthetic outputs</li>
|
| 550 |
+
</ul>
|
| 551 |
+
</div>
|
| 552 |
+
</div>
|
| 553 |
+
<aside class="notes">
|
| 554 |
+
~1 min. "We're releasing everything. All prompts, the pipeline code in DataTrove,
|
| 555 |
+
the generated datasets on the Hub, throughput benchmarks.
|
| 556 |
+
The blog post itself has interactive charts you can explore.
|
| 557 |
+
Future work: we're looking at diffusion LMs for faster inference,
|
| 558 |
+
scaling beyond our 21B token ablations, exploring mixing ratios to find how little
|
| 559 |
+
synthetic data you actually need, and using best-of-N filtering on synthetic outputs."
|
| 560 |
+
</aside>
|
| 561 |
+
</section>
|
| 562 |
+
|
| 563 |
+
<!-- SLIDE 20: Academia Hub -->
|
| 564 |
+
<section>
|
| 565 |
+
<img src="assets/academia-hub.png" class="img-contain" style="max-height:560px;border-radius:12px;box-shadow:0 8px 40px rgba(0,0,0,0.4);">
|
| 566 |
+
<aside class="notes">
|
| 567 |
+
~30s. "If you're at a university or research lab, check out our Academia Hub:
|
| 568 |
+
institution-wide access to the Hugging Face Hub with priority GPU access,
|
| 569 |
+
inference credits, storage, and enterprise admin."
|
| 570 |
+
</aside>
|
| 571 |
+
</section>
|
| 572 |
+
|
| 573 |
+
<!-- SLIDE 21: Q&A -->
|
| 574 |
+
<section class="center-slide">
|
| 575 |
+
<h2>Thank You</h2>
|
| 576 |
+
<p style="font-size:0.6em;color:rgba(255,255,255,0.5);margin-top:10px;">Questions?</p>
|
| 577 |
+
<div style="display:flex;align-items:center;justify-content:center;gap:28px;margin-top:30px;">
|
| 578 |
+
<img src="assets/profile.jpg" style="width:90px;height:90px;border-radius:50%;border:2px solid rgba(255,255,255,0.15);object-fit:cover;">
|
| 579 |
+
<div style="text-align:left;font-size:0.55em;">
|
| 580 |
+
<div style="font-weight:700;font-size:1.2em;margin-bottom:8px;">Joel Niklaus</div>
|
| 581 |
+
<div style="display:flex;align-items:center;gap:8px;margin-bottom:6px;">
|
| 582 |
+
<svg width="18" height="18" viewBox="0 0 24 24" fill="rgba(255,255,255,0.7)"><path d="M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85 3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433a2.062 2.062 0 01-2.063-2.065 2.064 2.064 0 112.063 2.065zm1.782 13.019H3.555V9h3.564v11.452zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0h.003z"/></svg>
|
| 583 |
+
<a href="https://linkedin.com/in/joelniklaus" target="_blank" style="color:rgba(255,255,255,0.7);text-decoration:none;">joelniklaus</a>
|
| 584 |
+
</div>
|
| 585 |
+
<div style="display:flex;align-items:center;gap:8px;">
|
| 586 |
+
<svg width="18" height="18" viewBox="0 0 24 24" fill="rgba(255,255,255,0.7)"><path d="M18.244 2.25h3.308l-7.227 8.26 8.502 11.24H16.17l-5.214-6.817L4.99 21.75H1.68l7.73-8.835L1.254 2.25H8.08l4.713 6.231zm-1.161 17.52h1.833L7.084 4.126H5.117z"/></svg>
|
| 587 |
+
<a href="https://x.com/joelniklaus" target="_blank" style="color:rgba(255,255,255,0.7);text-decoration:none;">@joelniklaus</a>
|
| 588 |
+
</div>
|
| 589 |
+
</div>
|
| 590 |
+
</div>
|
| 591 |
+
<p style="margin-top:24px;font-size:0.55em;color:rgba(255,255,255,0.4);">
|
| 592 |
+
Stay tuned for the blog post with many more details.
|
| 593 |
+
</p>
|
| 594 |
+
<aside class="notes">
|
| 595 |
+
Q&A time. Mention they can reach out on LinkedIn or X. Have the blog open in a browser tab
|
| 596 |
+
for live demos if questions come up.
|
| 597 |
+
</aside>
|
| 598 |
+
</section>
|
| 599 |
+
|
| 600 |
+
</div><!-- /slides -->
|
| 601 |
+
</div><!-- /reveal -->
|
| 602 |
+
|
| 603 |
+
<script src="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/dist/reveal.js"></script>
|
| 604 |
+
<script src="https://cdn.jsdelivr.net/npm/reveal.js@5.1.0/plugin/notes/notes.js"></script>
|
| 605 |
+
<script>
|
| 606 |
+
Reveal.initialize({
|
| 607 |
+
hash: true,
|
| 608 |
+
slideNumber: 'c/t',
|
| 609 |
+
showSlideNumber: 'speaker',
|
| 610 |
+
transition: 'fade',
|
| 611 |
+
transitionSpeed: 'fast',
|
| 612 |
+
center: false,
|
| 613 |
+
width: 1200,
|
| 614 |
+
height: 700,
|
| 615 |
+
margin: 0.06,
|
| 616 |
+
plugins: [RevealNotes],
|
| 617 |
+
});
|
| 618 |
+
</script>
|
| 619 |
+
</body>
|
| 620 |
+
</html>
|
app/presentation/se2026/standalone.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/presentation/se2026/style.css
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--text-color: rgba(255,255,255,0.88);
|
| 3 |
+
--muted-color: rgba(255,255,255,0.45);
|
| 4 |
+
--surface-bg: rgba(30,30,40,0.95);
|
| 5 |
+
--border-color: rgba(255,255,255,0.1);
|
| 6 |
+
--axis-color: rgba(255,255,255,0.15);
|
| 7 |
+
--tick-color: rgba(255,255,255,0.5);
|
| 8 |
+
--grid-color: rgba(255,255,255,0.06);
|
| 9 |
+
--primary-color: #7c6ff7;
|
| 10 |
+
--danger: #e05252;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
html { data-theme: dark; }
|
| 14 |
+
|
| 15 |
+
.reveal {
|
| 16 |
+
font-family: 'Inter', system-ui, -apple-system, sans-serif;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
.reveal h1, .reveal h2, .reveal h3 {
|
| 20 |
+
font-weight: 700;
|
| 21 |
+
text-transform: none;
|
| 22 |
+
letter-spacing: -0.02em;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.reveal .slides section {
|
| 26 |
+
top: 0 !important;
|
| 27 |
+
padding-top: 5px;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
.reveal .slides section.center-slide {
|
| 31 |
+
top: auto !important;
|
| 32 |
+
display: flex !important;
|
| 33 |
+
flex-direction: column;
|
| 34 |
+
justify-content: center;
|
| 35 |
+
height: 100%;
|
| 36 |
+
padding-top: 0;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
.reveal h2 {
|
| 40 |
+
font-size: 1.6em;
|
| 41 |
+
margin-top: 0;
|
| 42 |
+
margin-bottom: 0.3em;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.reveal h3 {
|
| 46 |
+
font-size: 1.15em;
|
| 47 |
+
color: rgba(255,255,255,0.6);
|
| 48 |
+
font-weight: 500;
|
| 49 |
+
margin-bottom: 0.4em;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.reveal .subtitle {
|
| 53 |
+
font-size: 0.55em;
|
| 54 |
+
font-weight: 400;
|
| 55 |
+
color: rgba(255,255,255,0.5);
|
| 56 |
+
margin-top: 0.3em;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.reveal .section-label {
|
| 60 |
+
font-size: 0.5em;
|
| 61 |
+
font-weight: 700;
|
| 62 |
+
text-transform: uppercase;
|
| 63 |
+
letter-spacing: 0.15em;
|
| 64 |
+
color: #7c6ff7;
|
| 65 |
+
margin-bottom: 0.1em;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
.reveal ul {
|
| 69 |
+
list-style: none;
|
| 70 |
+
padding-left: 0;
|
| 71 |
+
font-size: 0.75em;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.reveal ul li {
|
| 75 |
+
margin-bottom: 0.5em;
|
| 76 |
+
padding-left: 1.2em;
|
| 77 |
+
position: relative;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.reveal ul li::before {
|
| 81 |
+
content: '→';
|
| 82 |
+
position: absolute;
|
| 83 |
+
left: 0;
|
| 84 |
+
color: #7c6ff7;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.reveal .accent {
|
| 88 |
+
color: #7c6ff7;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.reveal .highlight {
|
| 92 |
+
color: #f0c674;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.reveal .danger {
|
| 96 |
+
color: #e05252;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
.reveal .big-number {
|
| 100 |
+
font-size: 3em;
|
| 101 |
+
font-weight: 800;
|
| 102 |
+
line-height: 1.1;
|
| 103 |
+
color: #7c6ff7;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.reveal .big-number .unit {
|
| 107 |
+
font-size: 0.35em;
|
| 108 |
+
font-weight: 500;
|
| 109 |
+
color: rgba(255,255,255,0.5);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.stat-row {
|
| 113 |
+
display: flex;
|
| 114 |
+
justify-content: center;
|
| 115 |
+
gap: 60px;
|
| 116 |
+
margin: 30px 0;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.stat-box {
|
| 120 |
+
text-align: center;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
.stat-box .num {
|
| 124 |
+
font-size: 2.2em;
|
| 125 |
+
font-weight: 800;
|
| 126 |
+
color: #7c6ff7;
|
| 127 |
+
line-height: 1.1;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.stat-box .label {
|
| 131 |
+
font-size: 0.55em;
|
| 132 |
+
color: rgba(255,255,255,0.5);
|
| 133 |
+
margin-top: 4px;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.two-col {
|
| 137 |
+
display: grid;
|
| 138 |
+
grid-template-columns: 1fr 1fr;
|
| 139 |
+
gap: 40px;
|
| 140 |
+
text-align: left;
|
| 141 |
+
align-items: start;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.two-col .col {
|
| 145 |
+
padding: 0;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.before-after {
|
| 149 |
+
display: grid;
|
| 150 |
+
grid-template-columns: 1fr auto 1fr;
|
| 151 |
+
gap: 16px;
|
| 152 |
+
align-items: start;
|
| 153 |
+
font-size: 0.52em;
|
| 154 |
+
text-align: left;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.before-after .arrow {
|
| 158 |
+
font-size: 2em;
|
| 159 |
+
color: #7c6ff7;
|
| 160 |
+
align-self: center;
|
| 161 |
+
padding-top: 20px;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.before-after .panel {
|
| 165 |
+
background: rgba(255,255,255,0.04);
|
| 166 |
+
border: 1px solid rgba(255,255,255,0.08);
|
| 167 |
+
border-radius: 12px;
|
| 168 |
+
padding: 18px 20px;
|
| 169 |
+
line-height: 1.5;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.before-after .panel.bad {
|
| 173 |
+
border-color: rgba(224,82,82,0.3);
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.before-after .panel.good {
|
| 177 |
+
border-color: rgba(124,111,247,0.3);
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
.before-after .panel-title {
|
| 181 |
+
font-weight: 700;
|
| 182 |
+
font-size: 1.1em;
|
| 183 |
+
margin-bottom: 8px;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.before-after .panel.bad .panel-title {
|
| 187 |
+
color: #e05252;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
.before-after .panel.good .panel-title {
|
| 191 |
+
color: #7c6ff7;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.chart-frame {
|
| 195 |
+
width: 100%;
|
| 196 |
+
height: 480px;
|
| 197 |
+
border: none;
|
| 198 |
+
border-radius: 8px;
|
| 199 |
+
background: transparent;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.chart-frame.tall {
|
| 203 |
+
height: 540px;
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.chart-frame.short {
|
| 207 |
+
height: 400px;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
.img-contain {
|
| 211 |
+
max-width: 100%;
|
| 212 |
+
max-height: 480px;
|
| 213 |
+
border-radius: 8px;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.recipe-diagram {
|
| 217 |
+
display: flex;
|
| 218 |
+
align-items: center;
|
| 219 |
+
justify-content: center;
|
| 220 |
+
gap: 20px;
|
| 221 |
+
font-size: 0.75em;
|
| 222 |
+
margin: 30px 0;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.recipe-diagram .box {
|
| 226 |
+
background: rgba(255,255,255,0.06);
|
| 227 |
+
border: 1px solid rgba(255,255,255,0.12);
|
| 228 |
+
border-radius: 12px;
|
| 229 |
+
padding: 16px 22px;
|
| 230 |
+
text-align: center;
|
| 231 |
+
min-width: 120px;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
.recipe-diagram .box.result {
|
| 235 |
+
border-color: rgba(124,111,247,0.4);
|
| 236 |
+
background: rgba(124,111,247,0.08);
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.recipe-diagram .plus {
|
| 240 |
+
font-size: 1.8em;
|
| 241 |
+
color: rgba(255,255,255,0.3);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
.recipe-diagram .equals {
|
| 245 |
+
font-size: 1.8em;
|
| 246 |
+
color: #7c6ff7;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
.takeaway-list {
|
| 250 |
+
font-size: 0.65em;
|
| 251 |
+
max-width: 700px;
|
| 252 |
+
margin: 0 auto;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
.takeaway-list li {
|
| 256 |
+
margin-bottom: 0.7em;
|
| 257 |
+
line-height: 1.5;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
.qr-section {
|
| 261 |
+
display: flex;
|
| 262 |
+
align-items: center;
|
| 263 |
+
justify-content: center;
|
| 264 |
+
gap: 40px;
|
| 265 |
+
margin-top: 20px;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.surprise-grid {
|
| 269 |
+
display: grid;
|
| 270 |
+
grid-template-columns: 1fr 1fr 1fr;
|
| 271 |
+
gap: 20px;
|
| 272 |
+
font-size: 0.6em;
|
| 273 |
+
text-align: left;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
.surprise-card {
|
| 277 |
+
background: rgba(255,255,255,0.04);
|
| 278 |
+
border: 1px solid rgba(255,255,255,0.08);
|
| 279 |
+
border-radius: 12px;
|
| 280 |
+
padding: 20px;
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
.surprise-card .icon {
|
| 284 |
+
font-size: 1.6em;
|
| 285 |
+
margin-bottom: 8px;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
.surprise-card h4 {
|
| 289 |
+
font-size: 1.05em;
|
| 290 |
+
margin: 0 0 8px 0;
|
| 291 |
+
color: #f0c674;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
.surprise-card p {
|
| 295 |
+
margin: 0;
|
| 296 |
+
color: rgba(255,255,255,0.6);
|
| 297 |
+
line-height: 1.5;
|
| 298 |
+
}
|