Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
cfb5e0c
1
Parent(s): 7aaccb8
improve coloring
Browse files
app/src/content/chapters/experiments.mdx
CHANGED
|
@@ -57,7 +57,7 @@ Prior synthetic datasets bundle multiple prompts together. We want to understand
|
|
| 57 |
We isolate each prompt from Nemotron-HQ-Synth ([diverse_qa_pairs](#diverse_qa_pairs), [extract_knowledge](#extract_knowledge), [distill](#distill), [wikipedia_style_rephrasing](#wikipedia_style_rephrasing), [knowledge_list](#knowledge_list)), the REWIRE [guided_rewrite](#guided_rewrite_original) prompt, and the two prompts from BeyondWeb [@beyondweb] ([continue](#continue), [summarize](#summarize)), all using Gemma-3-1B on FineWeb-Edu-HQ as source.
|
| 58 |
|
| 59 |
<Sidenote>
|
| 60 |
-
|
| 61 |
</Sidenote>
|
| 62 |
|
| 63 |
Only [diverse_qa_pairs](#diverse_qa_pairs) (driven by very strong SQuAD performance) and REWIRE's [guided_rewrite](#guided_rewrite_original) match DCLM (see [Dissecting Synthetic Baselines](#dissecting-baselines)). The BeyondWeb-inspired [continue](#continue) and [summarize](#summarize) prompts do not reach DCLM level. <mark>TLDR: Apart from two prompts, no existing synthetic method outperforms the DCLM baseline.</mark>
|
|
@@ -73,14 +73,26 @@ Only [diverse_qa_pairs](#diverse_qa_pairs) (driven by very strong SQuAD performa
|
|
| 73 |
"mix-fw_edu_hq-diverse_qa_pairs_1b_hq": "Diverse QA Pairs",
|
| 74 |
dclm: "DCLM",
|
| 75 |
"mix-fw_edu_hq-extract_knowledge_1b_hq": "Extract Knowledge",
|
| 76 |
-
"mix-fw_edu_hq-guided_rewrite_original_1b_hq": "Guided Rewrite
|
| 77 |
nemotron_hq_synth: "Nemotron-HQ-Synth",
|
| 78 |
rewire: "REWIRE",
|
| 79 |
"mix-fw_edu_hq-distill_1b_hq": "Distill",
|
| 80 |
"mix-fw_edu_hq-wikipedia_style_rephrasing_1b_hq": "Wikipedia Rephrasing",
|
| 81 |
"mix-fw_edu_hq-knowledge_list_1b_hq": "Knowledge List",
|
| 82 |
-
"mix-fw_edu_hq-continue_1b_hq": "Continue
|
| 83 |
-
"mix-fw_edu_hq-summarize_1b_hq": "Summarize
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
}
|
| 85 |
}}
|
| 86 |
/>
|
|
|
|
| 57 |
We isolate each prompt from Nemotron-HQ-Synth ([diverse_qa_pairs](#diverse_qa_pairs), [extract_knowledge](#extract_knowledge), [distill](#distill), [wikipedia_style_rephrasing](#wikipedia_style_rephrasing), [knowledge_list](#knowledge_list)), the REWIRE [guided_rewrite](#guided_rewrite_original) prompt, and the two prompts from BeyondWeb [@beyondweb] ([continue](#continue), [summarize](#summarize)), all using Gemma-3-1B on FineWeb-Edu-HQ as source.
|
| 58 |
|
| 59 |
<Sidenote>
|
| 60 |
+
The BeyondWeb dataset was never released and the paper omits key details, yet claims strong performance. We tested their [continue](#continue) and [summarize](#summarize) prompts to verify those claims and make the knowledge publicly available.
|
| 61 |
</Sidenote>
|
| 62 |
|
| 63 |
Only [diverse_qa_pairs](#diverse_qa_pairs) (driven by very strong SQuAD performance) and REWIRE's [guided_rewrite](#guided_rewrite_original) match DCLM (see [Dissecting Synthetic Baselines](#dissecting-baselines)). The BeyondWeb-inspired [continue](#continue) and [summarize](#summarize) prompts do not reach DCLM level. <mark>TLDR: Apart from two prompts, no existing synthetic method outperforms the DCLM baseline.</mark>
|
|
|
|
| 73 |
"mix-fw_edu_hq-diverse_qa_pairs_1b_hq": "Diverse QA Pairs",
|
| 74 |
dclm: "DCLM",
|
| 75 |
"mix-fw_edu_hq-extract_knowledge_1b_hq": "Extract Knowledge",
|
| 76 |
+
"mix-fw_edu_hq-guided_rewrite_original_1b_hq": "Guided Rewrite",
|
| 77 |
nemotron_hq_synth: "Nemotron-HQ-Synth",
|
| 78 |
rewire: "REWIRE",
|
| 79 |
"mix-fw_edu_hq-distill_1b_hq": "Distill",
|
| 80 |
"mix-fw_edu_hq-wikipedia_style_rephrasing_1b_hq": "Wikipedia Rephrasing",
|
| 81 |
"mix-fw_edu_hq-knowledge_list_1b_hq": "Knowledge List",
|
| 82 |
+
"mix-fw_edu_hq-continue_1b_hq": "Continue",
|
| 83 |
+
"mix-fw_edu_hq-summarize_1b_hq": "Summarize"
|
| 84 |
+
},
|
| 85 |
+
pinnedColors: {
|
| 86 |
+
"Nemotron-HQ-Synth": "#76b900",
|
| 87 |
+
"Diverse QA Pairs": "#c5e384",
|
| 88 |
+
"Distill": "#a0c95c",
|
| 89 |
+
"Wikipedia Rephrasing": "#7fb034",
|
| 90 |
+
"Knowledge List": "#5e960e",
|
| 91 |
+
"Extract Knowledge": "#3d6b00",
|
| 92 |
+
"REWIRE": "#1877F2",
|
| 93 |
+
"Guided Rewrite": "#6aabff",
|
| 94 |
+
"Continue (BeyondWeb)": "#e8713a",
|
| 95 |
+
"Summarize (BeyondWeb)": "#c4451c"
|
| 96 |
}
|
| 97 |
}}
|
| 98 |
/>
|
app/src/content/chapters/introduction.mdx
CHANGED
|
@@ -40,7 +40,7 @@ Here's a preview of where we end up: FinePhrase, our best configuration, clearly
|
|
| 40 |
desc="Figure: FinePhrase compared against synthetic data baselines across evaluation metrics."
|
| 41 |
config={{
|
| 42 |
defaultView: "line",
|
| 43 |
-
pinnedColors: { "FinePhrase": "#
|
| 44 |
baselines: ["cosmopedia", "nemotron_hq_synth", "rewire", "synth_query_reasoning_answer"],
|
| 45 |
datasetNames: {
|
| 46 |
cosmopedia: "Cosmopedia",
|
|
|
|
| 40 |
desc="Figure: FinePhrase compared against synthetic data baselines across evaluation metrics."
|
| 41 |
config={{
|
| 42 |
defaultView: "line",
|
| 43 |
+
pinnedColors: { "FinePhrase": "#EBA937" },
|
| 44 |
baselines: ["cosmopedia", "nemotron_hq_synth", "rewire", "synth_query_reasoning_answer"],
|
| 45 |
datasetNames: {
|
| 46 |
cosmopedia: "Cosmopedia",
|