joelniklaus HF Staff commited on
Commit
bc1432f
·
1 Parent(s): 6554803

Add auto-numbering for figures

Browse files
app/src/content/chapters/experiments.mdx CHANGED
@@ -29,7 +29,7 @@ DCLM, Nemotron-HQ-Synth, and REWIRE lead by a significant margin (see [Baseline
29
  id="baselines-comparison"
30
  src="d3-benchmark-comparison.html"
31
  title="Baseline Comparison"
32
- desc="Figure: Comparison of baseline datasets across different evaluation metrics. Use the dropdown to switch metrics."
33
  config={{
34
  baselines: [],
35
  datasetNames: {
@@ -65,7 +65,7 @@ Only [diverse_qa_pairs](#diverse_qa_pairs) (driven by very strong SQuAD performa
65
  id="dissecting-baselines"
66
  src="d3-benchmark-comparison.html"
67
  title="Dissecting Synthetic Baselines"
68
- desc="Figure: Individual prompt performance from existing synthetic datasets compared to DCLM and FineWeb-Edu-HQ."
69
  config={{
70
  baselines: ["dclm", "nemotron_hq_synth", "rewire"],
71
  datasetNames: {
@@ -110,7 +110,7 @@ Four prompts ([math](#math), [table](#table), [faq](#faq), [tutorial](#tutorial)
110
  id="new-prompts"
111
  src="d3-benchmark-comparison.html"
112
  title="New Prompt Performance"
113
- desc="Figure: Seven new prompts compared against DCLM and FineWeb-Edu-HQ."
114
  config={{
115
  datasetNames: {
116
  "mix-fw_edu_hq-math_1b_hq": "Math",
@@ -148,7 +148,7 @@ It is possible that larger models produce richer or more nuanced rephrasings tha
148
  id="model-size"
149
  src="d3-benchmark-comparison.html"
150
  title="Model Size"
151
- desc="Figure: Gemma-3 model sizes (270M to 27B). Use the Setup dropdown to compare across prompts."
152
  config={{
153
  setups: {
154
  "Tutorial Prompt": {
@@ -191,7 +191,7 @@ The results are mixed: for some prompts 12B helps slightly with LQ data, but for
191
  id="size-quality"
192
  src="d3-benchmark-comparison.html"
193
  title="Model Size vs Data Quality"
194
- desc="Figure: 1B vs 12B model on HQ vs LQ data. Use the Setup dropdown to compare across prompts."
195
  config={{
196
  setups: {
197
  "Continue Prompt": {
@@ -256,7 +256,7 @@ We hypothesize that SmolLM2's consistently strong rephrasing performance origina
256
  id="model-family"
257
  src="d3-benchmark-comparison.html"
258
  title="Model Family"
259
- desc="Figure: Model families compared at ~1B scale. Use the Setup dropdown to compare across prompts."
260
  config={{
261
  setups: {
262
  "Tutorial Prompt": {
@@ -325,7 +325,7 @@ While the differences are small, we find a consistent trend: newer versions lead
325
  id="model-generation"
326
  src="d3-benchmark-comparison.html"
327
  title="Model Generation: Qwen Tutorial"
328
- desc="Figure: Qwen model generations (1.5 to 3) on the tutorial prompt."
329
  config={{
330
  datasetNames: {
331
  "mix-fw_edu_hq-tutorial_qwen3_1.7b_hq": "Qwen3 (1.7B)",
@@ -363,7 +363,7 @@ Synthetic-only training beats FineWeb-Edu-HQ but falls short of both DCLM and mi
363
  id="synthetic-only"
364
  src="d3-benchmark-comparison.html"
365
  title="Is Synthetic Data Enough?"
366
- desc="Figure: Synthetic-only vs mixed training. Use the Setup dropdown to compare across source datasets."
367
  config={{
368
  setups: {
369
  "DCLM Source": {
@@ -404,7 +404,7 @@ DCLM and FineWeb-Edu-HQ outperform Cosmopedia and FineWeb-Edu-LQ as mix-in datas
404
  id="mixin-dataset"
405
  src="d3-benchmark-comparison.html"
406
  title="Mix-in Dataset Effect"
407
- desc="Figure: Effect of different mix-in datasets. Use the Setup dropdown to compare HQ vs LQ source data."
408
  config={{
409
  setups: {
410
  "HQ Source": {
@@ -450,7 +450,7 @@ When mix-in varies with source, source quality appears to matter: FineWeb-Edu-HQ
450
  id="source-dataset-mixin-source"
451
  src="d3-benchmark-comparison.html"
452
  title="Source Dataset (Mix-in = Source)"
453
- desc="Figure: Effect of source dataset when mix-in equals source. Use the Setup dropdown to compare prompts."
454
  config={{
455
  setups: {
456
  "Tutorial Prompt": {
@@ -481,7 +481,7 @@ When mix-in varies with source, source quality appears to matter: FineWeb-Edu-HQ
481
  id="source-dataset-fixed-mixin"
482
  src="d3-benchmark-comparison.html"
483
  title="Source Dataset (Fixed Mix-in: FineWeb-Edu-HQ)"
484
- desc="Figure: Effect of source dataset with FineWeb-Edu-HQ as fixed mix-in. Use the Setup dropdown to compare prompts."
485
  config={{
486
  setups: {
487
  "Tutorial Prompt": {
@@ -528,7 +528,7 @@ Interestingly, when mixing enough different prompts together, we don't seem to n
528
  id="diversity"
529
  src="d3-benchmark-comparison.html"
530
  title="Diversity"
531
- desc="Figure: Different diversity strategies. Use the Setup dropdown to compare approaches."
532
  config={{
533
  setups: {
534
  "Mixing Prompts": {
@@ -584,7 +584,7 @@ Surprisingly, typos don't have a negative effect on downstream model performance
584
  id="typos-effect"
585
  src="d3-benchmark-comparison.html"
586
  title="Effect of Typos in Prompt"
587
- desc="Figure: REWIRE prompt with original typos vs improved version at 1B and 12B scale."
588
  config={{
589
  datasetNames: {
590
  "mix-fw_edu_hq-guided_rewrite_original_12b_hq": "Original (12B)",
 
29
  id="baselines-comparison"
30
  src="d3-benchmark-comparison.html"
31
  title="Baseline Comparison"
32
+ desc="Comparison of baseline datasets across different evaluation metrics. Use the dropdown to switch metrics."
33
  config={{
34
  baselines: [],
35
  datasetNames: {
 
65
  id="dissecting-baselines"
66
  src="d3-benchmark-comparison.html"
67
  title="Dissecting Synthetic Baselines"
68
+ desc="Individual prompt performance from existing synthetic datasets compared to DCLM and FineWeb-Edu-HQ."
69
  config={{
70
  baselines: ["dclm", "nemotron_hq_synth", "rewire"],
71
  datasetNames: {
 
110
  id="new-prompts"
111
  src="d3-benchmark-comparison.html"
112
  title="New Prompt Performance"
113
+ desc="Seven new prompts compared against DCLM and FineWeb-Edu-HQ."
114
  config={{
115
  datasetNames: {
116
  "mix-fw_edu_hq-math_1b_hq": "Math",
 
148
  id="model-size"
149
  src="d3-benchmark-comparison.html"
150
  title="Model Size"
151
+ desc="Gemma-3 model sizes (270M to 27B). Use the Setup dropdown to compare across prompts."
152
  config={{
153
  setups: {
154
  "Tutorial Prompt": {
 
191
  id="size-quality"
192
  src="d3-benchmark-comparison.html"
193
  title="Model Size vs Data Quality"
194
+ desc="1B vs 12B model on HQ vs LQ data. Use the Setup dropdown to compare across prompts."
195
  config={{
196
  setups: {
197
  "Continue Prompt": {
 
256
  id="model-family"
257
  src="d3-benchmark-comparison.html"
258
  title="Model Family"
259
+ desc="Model families compared at ~1B scale. Use the Setup dropdown to compare across prompts."
260
  config={{
261
  setups: {
262
  "Tutorial Prompt": {
 
325
  id="model-generation"
326
  src="d3-benchmark-comparison.html"
327
  title="Model Generation: Qwen Tutorial"
328
+ desc="Qwen model generations (1.5 to 3) on the tutorial prompt."
329
  config={{
330
  datasetNames: {
331
  "mix-fw_edu_hq-tutorial_qwen3_1.7b_hq": "Qwen3 (1.7B)",
 
363
  id="synthetic-only"
364
  src="d3-benchmark-comparison.html"
365
  title="Is Synthetic Data Enough?"
366
+ desc="Synthetic-only vs mixed training. Use the Setup dropdown to compare across source datasets."
367
  config={{
368
  setups: {
369
  "DCLM Source": {
 
404
  id="mixin-dataset"
405
  src="d3-benchmark-comparison.html"
406
  title="Mix-in Dataset Effect"
407
+ desc="Effect of different mix-in datasets. Use the Setup dropdown to compare HQ vs LQ source data."
408
  config={{
409
  setups: {
410
  "HQ Source": {
 
450
  id="source-dataset-mixin-source"
451
  src="d3-benchmark-comparison.html"
452
  title="Source Dataset (Mix-in = Source)"
453
+ desc="Effect of source dataset when mix-in equals source. Use the Setup dropdown to compare prompts."
454
  config={{
455
  setups: {
456
  "Tutorial Prompt": {
 
481
  id="source-dataset-fixed-mixin"
482
  src="d3-benchmark-comparison.html"
483
  title="Source Dataset (Fixed Mix-in: FineWeb-Edu-HQ)"
484
+ desc="Effect of source dataset with FineWeb-Edu-HQ as fixed mix-in. Use the Setup dropdown to compare prompts."
485
  config={{
486
  setups: {
487
  "Tutorial Prompt": {
 
528
  id="diversity"
529
  src="d3-benchmark-comparison.html"
530
  title="Diversity"
531
+ desc="Different diversity strategies. Use the Setup dropdown to compare approaches."
532
  config={{
533
  setups: {
534
  "Mixing Prompts": {
 
584
  id="typos-effect"
585
  src="d3-benchmark-comparison.html"
586
  title="Effect of Typos in Prompt"
587
+ desc="REWIRE prompt with original typos vs improved version at 1B and 12B scale."
588
  config={{
589
  datasetNames: {
590
  "mix-fw_edu_hq-guided_rewrite_original_12b_hq": "Original (12B)",
app/src/content/chapters/introduction.mdx CHANGED
@@ -37,7 +37,7 @@ Here's a preview of where we end up: FinePhrase, our best configuration, clearly
37
  id="finephrase-vs-baselines"
38
  src="d3-benchmark-comparison.html"
39
  title="FinePhrase vs Synthetic Baselines"
40
- desc="Figure: FinePhrase compared against synthetic data baselines across evaluation metrics."
41
  config={{
42
  defaultView: "line",
43
  pinnedColors: { "FinePhrase": "#EBA937" },
 
37
  id="finephrase-vs-baselines"
38
  src="d3-benchmark-comparison.html"
39
  title="FinePhrase vs Synthetic Baselines"
40
+ desc="FinePhrase compared against synthetic data baselines across evaluation metrics."
41
  config={{
42
  defaultView: "line",
43
  pinnedColors: { "FinePhrase": "#EBA937" },
app/src/styles/_base.css CHANGED
@@ -178,6 +178,22 @@ html {
178
  opacity: 1;
179
  }
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  .katex .tag {
182
  background: none;
183
  border: none;
 
178
  opacity: 1;
179
  }
180
 
181
+ /* ===== Auto-numbering for figures ===== */
182
+ .content-grid main {
183
+ counter-reset: figure;
184
+ }
185
+
186
+ .content-grid main figure:not(.table-figure) {
187
+ counter-increment: figure;
188
+ }
189
+
190
+ /* Prepend "Figure N: " to description figcaptions (skip title-only figcaptions) */
191
+ .content-grid main figure:not(.table-figure) > figcaption.html-embed__desc::before,
192
+ .content-grid main figure:not(.table-figure):not(.html-embed) > figcaption::before {
193
+ content: "Figure " counter(figure) ": ";
194
+ font-weight: 600;
195
+ }
196
+
197
  .katex .tag {
198
  background: none;
199
  border: none;