finephrase

Running on CPU Upgrade

App Files Files Community

joelniklaus HF Staff commited on Feb 17

Commit

05b639b

1 Parent(s): 17a96eb

add difference in macro average on benchmarks and add percentage difference for dclm and edu scores

Browse files

Files changed (3) hide show

app/src/content/assets/data/rephrasing_metadata.json +2 -2
app/src/content/chapters/experiments.mdx +1 -1
app/src/content/embeds/banner.html +62 -8

app/src/content/assets/data/rephrasing_metadata.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ead55bd8a7db04855376728b9f0bfea398e445c9bad179fcb027dceddcff8a1
-size 69832

 version https://git-lfs.github.com/spec/v1
+oid sha256:65df95a2f19779d4958b9e68b7959deec89eb5da2daedc2a956b8b1863e42160
+size 128848

app/src/content/chapters/experiments.mdx CHANGED Viewed

@@ -8,7 +8,7 @@ import FigRef from "../../components/FigRef.astro";
 {/* TODO: shorten the vllm inference benchmark or put stuff into the appendix */}
 {/* TODO: potentially make a widget for data exploration: look at the same few samples generated by different models or transformed with different prompts */}
 {/* TODO: add a plot for the table with the benchmark results */}
-{/* TODO: Analyze if certain models are more verbose than others (how many tokens did they produce per prompt?) (wait for last rephrasing job to be done) */}
 {/* TODO: Run dclm and edu score impact analysis on model verbosity data (wait for last rephrasing job to be done) */}
 {/* TODO: Add appendix section of weird unexplainable results? */}

 {/* TODO: shorten the vllm inference benchmark or put stuff into the appendix */}
 {/* TODO: potentially make a widget for data exploration: look at the same few samples generated by different models or transformed with different prompts */}
 {/* TODO: add a plot for the table with the benchmark results */}
+{/* TODO: Analyze if certain models are more verbose than others (how many tokens did they produce per prompt?) */}
 {/* TODO: Run dclm and edu score impact analysis on model verbosity data (wait for last rephrasing job to be done) */}
 {/* TODO: Add appendix section of weird unexplainable results? */}

app/src/content/embeds/banner.html CHANGED Viewed

@@ -22,12 +22,13 @@
             container.style.position = 'relative';
             const JSON_PATHS = ['/data/rephrasing_metadata.json', './assets/data/rephrasing_metadata.json'];
-            const fetchFirstAvailable = async (paths) => {
                 for (const p of paths) {
-                    try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return r.json(); } catch(_) {}
                 }
-                throw new Error('rephrasing_metadata.json not found');
             };
             // Derive display fields from a JSON entry
@@ -58,12 +59,41 @@
                 return d.toLocaleString() + ' days';
             }
-            function parseEntry(d, i) {
                 const [cat, promptFile] = d.prompt.split('/');
                 const promptKey = promptFile.replace('.md', '');
                 const modelShort = d.model.split('/').pop();
                 const modelLower = d.model.toLowerCase();
                 const family = Object.entries(FAMILY_MAP).find(([k]) => modelLower.includes(k))?.[1] || 'Other';
                 return {
                     id: i,
                     prompt: PROMPT_LABELS[promptKey] || promptKey,
@@ -78,7 +108,11 @@
                     gpuTime: gpuDays(d.gpu_time_seconds),
                     docsM: d.num_documents / 1e6,
                     dclm: d.dclm_score_difference,
                     edu: d.edu_score_difference,
                     phase: (i * 2.399) % (Math.PI * 2)
                 };
             }
@@ -88,8 +122,23 @@
                 return n.toFixed(1) + 'B';
             }
-            fetchFirstAvailable(JSON_PATHS).then(raw => {
-                const data = raw.map(parseEntry);
                 const totalOutputB = data.reduce((s, d) => s + d.compB, 0);
                 const totalDocsM = data.reduce((s, d) => s + d.docsM, 0);
                 const numExperiments = data.length;
@@ -299,6 +348,10 @@
                             const c = col[d.family];
                             const dc = d.dclm >= 0 ? '#16a34a' : '#dc2626';
                             const ec = d.edu >= 0 ? '#16a34a' : '#dc2626';
                             tip.html(
                                 `<div style="display:flex;align-items:center;gap:6px;margin-bottom:4px">` +
                                 `<span style="width:8px;height:8px;border-radius:50%;background:${c};opacity:.6;display:inline-block"></span>` +
@@ -309,8 +362,9 @@
                                 `<span style="opacity:.35">Input</span><span>${d.inputHuman}</span>` +
                                 `<span style="opacity:.35">GPU time</span><span>${d.gpuTime}</span>` +
                                 `<span style="opacity:.35">Docs</span><span>${d.docsM.toFixed(1)}M</span>` +
-                                `<span style="opacity:.35">DCLM</span><span style="color:${dc}">${d.dclm >= 0 ? '+' : ''}${d.dclm.toFixed(3)}</span>` +
-                                `<span style="opacity:.35">Edu</span><span style="color:${ec}">${d.edu >= 0 ? '+' : ''}${d.edu.toFixed(3)}</span></div>`
                             ).style('opacity', 1);
                         })
                         .on('mousemove', function(event) {

             container.style.position = 'relative';
             const JSON_PATHS = ['/data/rephrasing_metadata.json', './assets/data/rephrasing_metadata.json'];
+            const CSV_PATHS = ['/data/benchmark-results.csv', './assets/data/benchmark-results.csv'];
+            const fetchFirstAvailable = async (paths, parse) => {
                 for (const p of paths) {
+                    try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return parse ? parse(await r.text()) : r.json(); } catch(_) {}
                 }
+                throw new Error('Data not found: ' + paths.join(', '));
             };
             // Derive display fields from a JSON entry
                 return d.toLocaleString() + ' days';
             }
+            // Map source_dataset names to baseline run names in the CSV
+            const SOURCE_TO_BASELINE_RUN = {
+                'fineweb-edu-hq-20BT': 'fw_edu_hq',
+                'fineweb-edu-lq-20BT': 'fw_edu_lq',
+                'dclm-37BT': 'dclm',
+                'cosmopedia-25BT': 'cosmopedia'
+            };
+            // Extract max-step agg_score_macro per baseline run from CSV rows
+            function buildBaselineMacro(csvRows) {
+                const baselineRuns = new Set(Object.values(SOURCE_TO_BASELINE_RUN));
+                const best = {};
+                for (const row of csvRows) {
+                    if (!baselineRuns.has(row.runname)) continue;
+                    const step = +row.steps;
+                    const score = +row.agg_score_macro;
+                    if (!(row.runname in best) || step > best[row.runname][0]) {
+                        best[row.runname] = [step, score];
+                    }
+                }
+                const out = {};
+                for (const [src, run] of Object.entries(SOURCE_TO_BASELINE_RUN)) {
+                    if (run in best) out[src] = best[run][1];
+                }
+                return out;
+            }
+            function parseEntry(d, i, baselineMacro) {
                 const [cat, promptFile] = d.prompt.split('/');
                 const promptKey = promptFile.replace('.md', '');
                 const modelShort = d.model.split('/').pop();
                 const modelLower = d.model.toLowerCase();
                 const family = Object.entries(FAMILY_MAP).find(([k]) => modelLower.includes(k))?.[1] || 'Other';
+                const aggMacro = d.results?.agg_score_macro;
+                const baseline = baselineMacro[d.source_dataset];
                 return {
                     id: i,
                     prompt: PROMPT_LABELS[promptKey] || promptKey,
                     gpuTime: gpuDays(d.gpu_time_seconds),
                     docsM: d.num_documents / 1e6,
                     dclm: d.dclm_score_difference,
+                    dclmBase: d.input_dclm_score,
                     edu: d.edu_score_difference,
+                    eduBase: d.input_edu_score,
+                    aggDiff: (aggMacro != null && baseline != null) ? aggMacro - baseline : null,
+                    aggBase: baseline,
                     phase: (i * 2.399) % (Math.PI * 2)
                 };
             }
                 return n.toFixed(1) + 'B';
             }
+            // Format absolute diff + relative % in brackets, e.g. "+0.018 (+12.3%)"
+            function fmtDelta(diff, base) {
+                const sign = diff >= 0 ? '+' : '';
+                const abs = `${sign}${diff.toFixed(3)}`;
+                if (base != null && base !== 0) {
+                    const pct = (diff / base) * 100;
+                    return `${abs} <span style="opacity:.5">(${pct >= 0 ? '+' : ''}${pct.toFixed(1)}%)</span>`;
+                }
+                return abs;
+            }
+            Promise.all([
+                fetchFirstAvailable(JSON_PATHS),
+                fetchFirstAvailable(CSV_PATHS, d3.csvParse)
+            ]).then(([raw, csvRows]) => {
+                const baselineMacro = buildBaselineMacro(csvRows);
+                const data = raw.map((d, i) => parseEntry(d, i, baselineMacro));
                 const totalOutputB = data.reduce((s, d) => s + d.compB, 0);
                 const totalDocsM = data.reduce((s, d) => s + d.docsM, 0);
                 const numExperiments = data.length;
                             const c = col[d.family];
                             const dc = d.dclm >= 0 ? '#16a34a' : '#dc2626';
                             const ec = d.edu >= 0 ? '#16a34a' : '#dc2626';
+                            const ac = d.aggDiff != null ? (d.aggDiff >= 0 ? '#16a34a' : '#dc2626') : null;
+                            const aggRow = d.aggDiff != null
+                                ? `<span style="opacity:.35">Δ Macro</span><span style="color:${ac}">${fmtDelta(d.aggDiff, d.aggBase)}</span>`
+                                : '';
                             tip.html(
                                 `<div style="display:flex;align-items:center;gap:6px;margin-bottom:4px">` +
                                 `<span style="width:8px;height:8px;border-radius:50%;background:${c};opacity:.6;display:inline-block"></span>` +
                                 `<span style="opacity:.35">Input</span><span>${d.inputHuman}</span>` +
                                 `<span style="opacity:.35">GPU time</span><span>${d.gpuTime}</span>` +
                                 `<span style="opacity:.35">Docs</span><span>${d.docsM.toFixed(1)}M</span>` +
+                                `<span style="opacity:.35">DCLM</span><span style="color:${dc}">${fmtDelta(d.dclm, d.dclmBase)}</span>` +
+                                `<span style="opacity:.35">Edu</span><span style="color:${ec}">${fmtDelta(d.edu, d.eduBase)}</span>` +
+                                aggRow + `</div>`
                             ).style('opacity', 1);
                         })
                         .on('mousemove', function(event) {