finephrase

Running on CPU Upgrade

App Files Files Community

joelniklaus HF Staff commited on Mar 4

Commit

e575fa7

1 Parent(s): ae34010

updated synthetic only results

Browse files

Files changed (3) hide show

app/src/content/assets/data/benchmark-results.csv +2 -2
app/src/content/chapters/3-experiments.mdx +24 -3
app/src/content/embeds/d3-benchmark-comparison.html +4 -3

app/src/content/assets/data/benchmark-results.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19eac7b4c7d51ef51fde0893bd2e5f646c501eafb4da20cff189bad2e2d45262
-size 1513555

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc7f86e2cd5b311eb1fec66972254890034acd866037b5096f1551ef877fe72e
+size 1598658

app/src/content/chapters/3-experiments.mdx CHANGED Viewed

@@ -8,9 +8,9 @@ import ReadingTime from "../../components/ReadingTime.astro";
 {/* TODO: read through entire blog post and make improvements */}
 {/* TODO: Integrate decay experiment as another analysis for proxy */}
 {/* TODO: share on a bunch of discords/slacks/hackernews/locallama */}
-{/* TODO: brainstorm better banner, be artsy */}
 {/* TODO: run variance experiments with pretraining from scratch */}
 {/* TODO: go through the blog post and update the scale numbers for finephrase dataset */}
 {/* TODO: banner idea: 1T tokens = 8M books
 5cm pro buech = 400km
@@ -385,29 +385,50 @@ So far we've always mixed synthetic data with a <Glossary term="source dataset"
 #### Is synthetic data enough?
-The dream scenario would be generating all your training data synthetically, no curation needed. We test this by comparing synthetic-only training vs mixed training (synthetic + source) for [faq](#faq) and [tutorial](#tutorial) prompts on DCLM and FineWeb-Edu-HQ sources. Unfortunately, synthetic-only training falls short of both DCLM and mixed training (see <FigRef target="synthetic-only" />). Mixing consistently improves over both the synthetic-only and original-data-only baselines.
 <HtmlEmbed
   id="synthetic-only"
   src="d3-benchmark-comparison.html"
   desc="Synthetic-only vs mixed training. Use the Setup dropdown to compare across source datasets."
   config={{
     setups: {
       "DCLM Source": {
         datasets: {
           "mix-dclm-faq_1b_dclm": "Mix: FAQ + DCLM",
-          dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
           "mix-dclm-tutorial_1b_dclm": "Mix: Tutorial + DCLM",
           faq_1b_dclm: "FAQ Only",
           tutorial_1b_dclm: "Tutorial Only"
         }
       },
       "FineWeb-Edu-HQ Source": {
         datasets: {
           "mix-fw_edu_hq-faq_1b_hq": "Mix: FAQ + FineWeb-Edu-HQ",
           "mix-fw_edu_hq-tutorial_1b_hq": "Mix: Tutorial + FineWeb-Edu-HQ",
           dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
           faq_1b_hq: "FAQ Only",
           tutorial_1b_hq: "Tutorial Only"
         }
       }

 {/* TODO: read through entire blog post and make improvements */}
 {/* TODO: Integrate decay experiment as another analysis for proxy */}
 {/* TODO: share on a bunch of discords/slacks/hackernews/locallama */}
 {/* TODO: run variance experiments with pretraining from scratch */}
 {/* TODO: go through the blog post and update the scale numbers for finephrase dataset */}
+{/* TODO: brainstorm better banner, be artsy */}
 {/* TODO: banner idea: 1T tokens = 8M books
 5cm pro buech = 400km
 #### Is synthetic data enough?
+The dream scenario would be generating all your training data synthetically, no curation needed. We test this by comparing synthetic-only training vs mixed training (synthetic + source) across all our prompts on DCLM and FineWeb-Edu-HQ sources. Unfortunately, synthetic-only training falls short of both DCLM and mixed training (see <FigRef target="synthetic-only" />). Mixing consistently improves over both the synthetic-only and original-data-only baselines, regardless of prompt type.
 <HtmlEmbed
   id="synthetic-only"
   src="d3-benchmark-comparison.html"
   desc="Synthetic-only vs mixed training. Use the Setup dropdown to compare across source datasets."
   config={{
+    hideAverage: true,
     setups: {
       "DCLM Source": {
         datasets: {
+          "mix-dclm-article_1b_dclm": "Mix: Article + DCLM",
+          "mix-dclm-commentary_1b_dclm": "Mix: Commentary + DCLM",
+          "mix-dclm-discussion_1b_dclm": "Mix: Discussion + DCLM",
           "mix-dclm-faq_1b_dclm": "Mix: FAQ + DCLM",
+          "mix-dclm-math_1b_dclm": "Mix: Math + DCLM",
+          "mix-dclm-table_1b_dclm": "Mix: Table + DCLM",
           "mix-dclm-tutorial_1b_dclm": "Mix: Tutorial + DCLM",
+          dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
+          article_1b_dclm: "Article Only",
+          commentary_1b_dclm: "Commentary Only",
+          discussion_1b_dclm: "Discussion Only",
           faq_1b_dclm: "FAQ Only",
+          math_1b_dclm: "Math Only",
+          table_1b_dclm: "Table Only",
           tutorial_1b_dclm: "Tutorial Only"
         }
       },
       "FineWeb-Edu-HQ Source": {
         datasets: {
+          "mix-fw_edu_hq-article_1b_hq": "Mix: Article + FineWeb-Edu-HQ",
+          "mix-fw_edu_hq-commentary_1b_hq": "Mix: Commentary + FineWeb-Edu-HQ",
+          "mix-fw_edu_hq-discussion_1b_hq": "Mix: Discussion + FineWeb-Edu-HQ",
           "mix-fw_edu_hq-faq_1b_hq": "Mix: FAQ + FineWeb-Edu-HQ",
+          "mix-fw_edu_hq-math_1b_hq": "Mix: Math + FineWeb-Edu-HQ",
+          "mix-fw_edu_hq-table_1b_hq": "Mix: Table + FineWeb-Edu-HQ",
           "mix-fw_edu_hq-tutorial_1b_hq": "Mix: Tutorial + FineWeb-Edu-HQ",
           dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
+          article_1b_hq: "Article Only",
+          commentary_1b_hq: "Commentary Only",
+          discussion_1b_hq: "Discussion Only",
           faq_1b_hq: "FAQ Only",
+          math_1b_hq: "Math Only",
+          table_1b_hq: "Table Only",
           tutorial_1b_hq: "Tutorial Only"
         }
       }

app/src/content/embeds/d3-benchmark-comparison.html CHANGED Viewed

@@ -205,7 +205,8 @@
       const SETUPS = cfg.setups || null;
       const setupNames = SETUPS ? Object.keys(SETUPS) : [];
       const AVG_SETUP_KEY = 'Average (all setups)';
-      const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 ? 'average' : null);
       let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
       let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
       let avgDatasets = {};
@@ -712,7 +713,7 @@
             if (name === currentSetup) opt.selected = true;
             setupSelect.appendChild(opt);
           });
-          if (setupNames.length >= 2) {
             const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
             if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
             setupSelect.appendChild(avgOpt);
@@ -822,7 +823,7 @@
           const text = await fetchFirstAvailable(csvPaths);
           const parsed = d3.csvParse(text);
           parsedData = parsed;
-          if (SETUPS && setupNames.length >= 2) {
             const avg = computeAverageData(parsed);
             avgDatasets = avg.datasets;
             const hasAvgData = Object.values(avgDatasets).some(o => !o.baseline);

       const SETUPS = cfg.setups || null;
       const setupNames = SETUPS ? Object.keys(SETUPS) : [];
       const AVG_SETUP_KEY = 'Average (all setups)';
+      const HIDE_AVERAGE = !!cfg.hideAverage;
+      const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 && !HIDE_AVERAGE ? 'average' : null);
       let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
       let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
       let avgDatasets = {};
             if (name === currentSetup) opt.selected = true;
             setupSelect.appendChild(opt);
           });
+          if (setupNames.length >= 2 && !HIDE_AVERAGE) {
             const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
             if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
             setupSelect.appendChild(avgOpt);
           const text = await fetchFirstAvailable(csvPaths);
           const parsed = d3.csvParse(text);
           parsedData = parsed;
+          if (SETUPS && setupNames.length >= 2 && !HIDE_AVERAGE) {
             const avg = computeAverageData(parsed);
             avgDatasets = avg.datasets;
             const hasAvgData = Object.values(avgDatasets).some(o => !o.baseline);