Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
e575fa7
1
Parent(s): ae34010
updated synthetic only results
Browse files
app/src/content/assets/data/benchmark-results.csv
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc7f86e2cd5b311eb1fec66972254890034acd866037b5096f1551ef877fe72e
|
| 3 |
+
size 1598658
|
app/src/content/chapters/3-experiments.mdx
CHANGED
|
@@ -8,9 +8,9 @@ import ReadingTime from "../../components/ReadingTime.astro";
|
|
| 8 |
{/* TODO: read through entire blog post and make improvements */}
|
| 9 |
{/* TODO: Integrate decay experiment as another analysis for proxy */}
|
| 10 |
{/* TODO: share on a bunch of discords/slacks/hackernews/locallama */}
|
| 11 |
-
{/* TODO: brainstorm better banner, be artsy */}
|
| 12 |
{/* TODO: run variance experiments with pretraining from scratch */}
|
| 13 |
{/* TODO: go through the blog post and update the scale numbers for finephrase dataset */}
|
|
|
|
| 14 |
{/* TODO: banner idea: 1T tokens = 8M books
|
| 15 |
5cm pro buech = 400km
|
| 16 |
|
|
@@ -385,29 +385,50 @@ So far we've always mixed synthetic data with a <Glossary term="source dataset"
|
|
| 385 |
|
| 386 |
#### Is synthetic data enough?
|
| 387 |
|
| 388 |
-
The dream scenario would be generating all your training data synthetically, no curation needed. We test this by comparing synthetic-only training vs mixed training (synthetic + source)
|
| 389 |
|
| 390 |
<HtmlEmbed
|
| 391 |
id="synthetic-only"
|
| 392 |
src="d3-benchmark-comparison.html"
|
| 393 |
desc="Synthetic-only vs mixed training. Use the Setup dropdown to compare across source datasets."
|
| 394 |
config={{
|
|
|
|
| 395 |
setups: {
|
| 396 |
"DCLM Source": {
|
| 397 |
datasets: {
|
|
|
|
|
|
|
|
|
|
| 398 |
"mix-dclm-faq_1b_dclm": "Mix: FAQ + DCLM",
|
| 399 |
-
dclm:
|
|
|
|
| 400 |
"mix-dclm-tutorial_1b_dclm": "Mix: Tutorial + DCLM",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
faq_1b_dclm: "FAQ Only",
|
|
|
|
|
|
|
| 402 |
tutorial_1b_dclm: "Tutorial Only"
|
| 403 |
}
|
| 404 |
},
|
| 405 |
"FineWeb-Edu-HQ Source": {
|
| 406 |
datasets: {
|
|
|
|
|
|
|
|
|
|
| 407 |
"mix-fw_edu_hq-faq_1b_hq": "Mix: FAQ + FineWeb-Edu-HQ",
|
|
|
|
|
|
|
| 408 |
"mix-fw_edu_hq-tutorial_1b_hq": "Mix: Tutorial + FineWeb-Edu-HQ",
|
| 409 |
dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
|
|
|
|
|
|
|
|
|
|
| 410 |
faq_1b_hq: "FAQ Only",
|
|
|
|
|
|
|
| 411 |
tutorial_1b_hq: "Tutorial Only"
|
| 412 |
}
|
| 413 |
}
|
|
|
|
| 8 |
{/* TODO: read through entire blog post and make improvements */}
|
| 9 |
{/* TODO: Integrate decay experiment as another analysis for proxy */}
|
| 10 |
{/* TODO: share on a bunch of discords/slacks/hackernews/locallama */}
|
|
|
|
| 11 |
{/* TODO: run variance experiments with pretraining from scratch */}
|
| 12 |
{/* TODO: go through the blog post and update the scale numbers for finephrase dataset */}
|
| 13 |
+
{/* TODO: brainstorm better banner, be artsy */}
|
| 14 |
{/* TODO: banner idea: 1T tokens = 8M books
|
| 15 |
5cm pro buech = 400km
|
| 16 |
|
|
|
|
| 385 |
|
| 386 |
#### Is synthetic data enough?
|
| 387 |
|
| 388 |
+
The dream scenario would be generating all your training data synthetically, no curation needed. We test this by comparing synthetic-only training vs mixed training (synthetic + source) across all our prompts on DCLM and FineWeb-Edu-HQ sources. Unfortunately, synthetic-only training falls short of both DCLM and mixed training (see <FigRef target="synthetic-only" />). Mixing consistently improves over both the synthetic-only and original-data-only baselines, regardless of prompt type.
|
| 389 |
|
| 390 |
<HtmlEmbed
|
| 391 |
id="synthetic-only"
|
| 392 |
src="d3-benchmark-comparison.html"
|
| 393 |
desc="Synthetic-only vs mixed training. Use the Setup dropdown to compare across source datasets."
|
| 394 |
config={{
|
| 395 |
+
hideAverage: true,
|
| 396 |
setups: {
|
| 397 |
"DCLM Source": {
|
| 398 |
datasets: {
|
| 399 |
+
"mix-dclm-article_1b_dclm": "Mix: Article + DCLM",
|
| 400 |
+
"mix-dclm-commentary_1b_dclm": "Mix: Commentary + DCLM",
|
| 401 |
+
"mix-dclm-discussion_1b_dclm": "Mix: Discussion + DCLM",
|
| 402 |
"mix-dclm-faq_1b_dclm": "Mix: FAQ + DCLM",
|
| 403 |
+
"mix-dclm-math_1b_dclm": "Mix: Math + DCLM",
|
| 404 |
+
"mix-dclm-table_1b_dclm": "Mix: Table + DCLM",
|
| 405 |
"mix-dclm-tutorial_1b_dclm": "Mix: Tutorial + DCLM",
|
| 406 |
+
dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
|
| 407 |
+
article_1b_dclm: "Article Only",
|
| 408 |
+
commentary_1b_dclm: "Commentary Only",
|
| 409 |
+
discussion_1b_dclm: "Discussion Only",
|
| 410 |
faq_1b_dclm: "FAQ Only",
|
| 411 |
+
math_1b_dclm: "Math Only",
|
| 412 |
+
table_1b_dclm: "Table Only",
|
| 413 |
tutorial_1b_dclm: "Tutorial Only"
|
| 414 |
}
|
| 415 |
},
|
| 416 |
"FineWeb-Edu-HQ Source": {
|
| 417 |
datasets: {
|
| 418 |
+
"mix-fw_edu_hq-article_1b_hq": "Mix: Article + FineWeb-Edu-HQ",
|
| 419 |
+
"mix-fw_edu_hq-commentary_1b_hq": "Mix: Commentary + FineWeb-Edu-HQ",
|
| 420 |
+
"mix-fw_edu_hq-discussion_1b_hq": "Mix: Discussion + FineWeb-Edu-HQ",
|
| 421 |
"mix-fw_edu_hq-faq_1b_hq": "Mix: FAQ + FineWeb-Edu-HQ",
|
| 422 |
+
"mix-fw_edu_hq-math_1b_hq": "Mix: Math + FineWeb-Edu-HQ",
|
| 423 |
+
"mix-fw_edu_hq-table_1b_hq": "Mix: Table + FineWeb-Edu-HQ",
|
| 424 |
"mix-fw_edu_hq-tutorial_1b_hq": "Mix: Tutorial + FineWeb-Edu-HQ",
|
| 425 |
dclm: { display: "Baseline (DCLM)", color: "#8b8b8b", baseline: true },
|
| 426 |
+
article_1b_hq: "Article Only",
|
| 427 |
+
commentary_1b_hq: "Commentary Only",
|
| 428 |
+
discussion_1b_hq: "Discussion Only",
|
| 429 |
faq_1b_hq: "FAQ Only",
|
| 430 |
+
math_1b_hq: "Math Only",
|
| 431 |
+
table_1b_hq: "Table Only",
|
| 432 |
tutorial_1b_hq: "Tutorial Only"
|
| 433 |
}
|
| 434 |
}
|
app/src/content/embeds/d3-benchmark-comparison.html
CHANGED
|
@@ -205,7 +205,8 @@
|
|
| 205 |
const SETUPS = cfg.setups || null;
|
| 206 |
const setupNames = SETUPS ? Object.keys(SETUPS) : [];
|
| 207 |
const AVG_SETUP_KEY = 'Average (all setups)';
|
| 208 |
-
const
|
|
|
|
| 209 |
let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
|
| 210 |
let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
|
| 211 |
let avgDatasets = {};
|
|
@@ -712,7 +713,7 @@
|
|
| 712 |
if (name === currentSetup) opt.selected = true;
|
| 713 |
setupSelect.appendChild(opt);
|
| 714 |
});
|
| 715 |
-
if (setupNames.length >= 2) {
|
| 716 |
const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
|
| 717 |
if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
|
| 718 |
setupSelect.appendChild(avgOpt);
|
|
@@ -822,7 +823,7 @@
|
|
| 822 |
const text = await fetchFirstAvailable(csvPaths);
|
| 823 |
const parsed = d3.csvParse(text);
|
| 824 |
parsedData = parsed;
|
| 825 |
-
if (SETUPS && setupNames.length >= 2) {
|
| 826 |
const avg = computeAverageData(parsed);
|
| 827 |
avgDatasets = avg.datasets;
|
| 828 |
const hasAvgData = Object.values(avgDatasets).some(o => !o.baseline);
|
|
|
|
| 205 |
const SETUPS = cfg.setups || null;
|
| 206 |
const setupNames = SETUPS ? Object.keys(SETUPS) : [];
|
| 207 |
const AVG_SETUP_KEY = 'Average (all setups)';
|
| 208 |
+
const HIDE_AVERAGE = !!cfg.hideAverage;
|
| 209 |
+
const defaultSetupCfg = cfg.defaultSetup || (setupNames.length >= 2 && !HIDE_AVERAGE ? 'average' : null);
|
| 210 |
let currentSetup = SETUPS ? (defaultSetupCfg === 'average' ? AVG_SETUP_KEY : (defaultSetupCfg && setupNames.includes(defaultSetupCfg) ? defaultSetupCfg : setupNames[0])) : null;
|
| 211 |
let DATASETS = SETUPS ? (currentSetup === AVG_SETUP_KEY ? {} : normalizeDatasets(SETUPS[currentSetup].datasets)) : normalizeDatasets(cfg.datasets);
|
| 212 |
let avgDatasets = {};
|
|
|
|
| 713 |
if (name === currentSetup) opt.selected = true;
|
| 714 |
setupSelect.appendChild(opt);
|
| 715 |
});
|
| 716 |
+
if (setupNames.length >= 2 && !HIDE_AVERAGE) {
|
| 717 |
const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
|
| 718 |
if (currentSetup === AVG_SETUP_KEY) avgOpt.selected = true;
|
| 719 |
setupSelect.appendChild(avgOpt);
|
|
|
|
| 823 |
const text = await fetchFirstAvailable(csvPaths);
|
| 824 |
const parsed = d3.csvParse(text);
|
| 825 |
parsedData = parsed;
|
| 826 |
+
if (SETUPS && setupNames.length >= 2 && !HIDE_AVERAGE) {
|
| 827 |
const avg = computeAverageData(parsed);
|
| 828 |
avgDatasets = avg.datasets;
|
| 829 |
const hasAvgData = Object.values(avgDatasets).some(o => !o.baseline);
|