joelniklaus HF Staff commited on
Commit
5bb39a7
·
1 Parent(s): fb9415e

improved tab styling

Browse files
app/src/components/Tabs.astro CHANGED
@@ -1,9 +1,16 @@
1
  ---
2
- const { class: className, ...props } = Astro.props;
 
 
 
 
 
3
  const wrapperClass = ["tabs", className].filter(Boolean).join(" ");
4
  ---
5
  <div class={wrapperClass} {...props}>
6
- <div class="tabs__nav" role="tablist"></div>
 
 
7
  <div class="tabs__panels">
8
  <slot />
9
  </div>
@@ -82,6 +89,20 @@ const wrapperClass = ["tabs", className].filter(Boolean).join(" ");
82
  display: none;
83
  }
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  .tabs__nav :global(.tabs__btn) {
86
  flex: 1 1 0;
87
  padding: var(--spacing-2) var(--spacing-3);
 
1
  ---
2
+ interface Props {
3
+ title?: string;
4
+ class?: string;
5
+ [key: string]: any;
6
+ }
7
+ const { title, class: className, ...props } = Astro.props;
8
  const wrapperClass = ["tabs", className].filter(Boolean).join(" ");
9
  ---
10
  <div class={wrapperClass} {...props}>
11
+ <div class="tabs__nav" role="tablist">
12
+ {title && <span class="tabs__title">{title}</span>}
13
+ </div>
14
  <div class="tabs__panels">
15
  <slot />
16
  </div>
 
89
  display: none;
90
  }
91
 
92
+ .tabs__title {
93
+ padding: var(--spacing-2) var(--spacing-3);
94
+ border-right: 1px solid var(--border-color);
95
+ font-size: 0.8em;
96
+ font-weight: 700;
97
+ color: var(--text-muted);
98
+ text-transform: uppercase;
99
+ letter-spacing: 0.05em;
100
+ white-space: nowrap;
101
+ display: flex;
102
+ align-items: center;
103
+ user-select: none;
104
+ }
105
+
106
  .tabs__nav :global(.tabs__btn) {
107
  flex: 1 1 0;
108
  padding: var(--spacing-2) var(--spacing-3);
app/src/content/chapters/2-setup.mdx CHANGED
@@ -31,7 +31,7 @@ For inference we use vLLM [@vllm] with tensor parallelism, chunked prefill, and
31
 
32
  Before diving into experiments, here's a quick overview of the datasets we compare against. We use "source data" and "seed data" interchangeably throughout.
33
 
34
- <Tabs>
35
  <Tab title="DCLM">
36
  A standardized benchmark providing a 240T token corpus from Common Crawl with model-based filtering as a key curation strategy. DCLM (DataComp-LM) enables training a 7B parameter model to 64% accuracy on MMLU with 2.6T tokens [@datacomp].
37
  </Tab>
@@ -41,6 +41,9 @@ Before diving into experiments, here's a quick overview of the datasets we compa
41
  <Tab title="Ultra-FineWeb">
42
  A 1T English token and 120B Chinese token dataset created by applying efficient verification-based filtering to FineWeb. Uses a lightweight fastText classifier and optimized seed data selection to improve data quality [@ultrafineweb].
43
  </Tab>
 
 
 
44
  <Tab title="Nemotron-HQ-Synth">
45
  Part of Nemotron-CC, a 6.3T token dataset using classifier ensembling and synthetic data rephrasing. The High-Quality-Synthetic subset contains synthetically rephrased data using Qwen3-30B-A3B [@qwen3] [@nemotroncc].
46
  </Tab>
 
31
 
32
  Before diving into experiments, here's a quick overview of the datasets we compare against. We use "source data" and "seed data" interchangeably throughout.
33
 
34
+ <Tabs title="Curated">
35
  <Tab title="DCLM">
36
  A standardized benchmark providing a 240T token corpus from Common Crawl with model-based filtering as a key curation strategy. DCLM (DataComp-LM) enables training a 7B parameter model to 64% accuracy on MMLU with 2.6T tokens [@datacomp].
37
  </Tab>
 
41
  <Tab title="Ultra-FineWeb">
42
  A 1T English token and 120B Chinese token dataset created by applying efficient verification-based filtering to FineWeb. Uses a lightweight fastText classifier and optimized seed data selection to improve data quality [@ultrafineweb].
43
  </Tab>
44
+ </Tabs>
45
+
46
+ <Tabs title="Synthetic">
47
  <Tab title="Nemotron-HQ-Synth">
48
  Part of Nemotron-CC, a 6.3T token dataset using classifier ensembling and synthetic data rephrasing. The High-Quality-Synthetic subset contains synthetically rephrased data using Qwen3-30B-A3B [@qwen3] [@nemotroncc].
49
  </Tab>