File size: 23,422 Bytes
30fab61
09b5abf
6d148a9
09b5abf
 
 
 
 
 
 
 
 
 
6d148a9
09b5abf
 
6d148a9
 
 
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d148a9
 
09b5abf
6d148a9
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d148a9
 
 
 
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661b5bb
 
 
 
09b5abf
 
 
 
 
 
6d148a9
09b5abf
 
 
6d148a9
09b5abf
 
 
6d148a9
09b5abf
 
 
6d148a9
 
 
 
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661b5bb
 
 
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661b5bb
 
 
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661b5bb
 
 
09b5abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d148a9
661b5bb
 
09b5abf
 
 
 
 
6d148a9
09b5abf
 
 
6d148a9
 
 
09b5abf
661b5bb
 
6d148a9
30fab61
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

<div style="max-width:860px;margin:0 auto;padding:2rem 1rem 3rem;font-family:var(--font-sans);color:var(--color-text-primary);">

  <div style="text-align:center;padding:3rem 1rem 2.5rem;">
    <img src="https://i.ibb.co/rGS6dBcf/logo-Astro-X.png" alt="AstroX AI" style="height:70px;object-fit:contain;display:block;margin:0 auto 1.75rem;">
    <p style="font-size:15px;color:var(--color-text-secondary);max-width:560px;margin:0 auto 1.75rem;line-height:1.7;">A frontier-class Mixture-of-Experts language model — competitive with leading closed-source models at a fraction of the training cost. Fully open-source and commercially licensed.</p>
    <div style="display:flex;gap:8px;justify-content:center;flex-wrap:wrap;margin-bottom:1.75rem;">
      <span style="font-size:11px;padding:4px 11px;border-radius:99px;background:#EEEDFE;color:#3C3489;border:0.5px solid #AFA9EC;">Mixture-of-Experts</span>
      <span style="font-size:11px;padding:4px 11px;border-radius:99px;border:0.5px solid var(--color-border-secondary);color:var(--color-text-secondary);">671B total params</span>
      <span style="font-size:11px;padding:4px 11px;border-radius:99px;border:0.5px solid var(--color-border-secondary);color:var(--color-text-secondary);">37B activated per token</span>
      <span style="font-size:11px;padding:4px 11px;border-radius:99px;border:0.5px solid var(--color-border-secondary);color:var(--color-text-secondary);">128K context</span>
      <span style="font-size:11px;padding:4px 11px;border-radius:99px;border:0.5px solid var(--color-border-secondary);color:var(--color-text-secondary);">FP8 training</span>
      <span style="font-size:11px;padding:4px 11px;border-radius:99px;border:0.5px solid var(--color-border-secondary);color:var(--color-text-secondary);">MIT License</span>
    </div>
    <div style="display:inline-flex;align-items:center;gap:8px;font-size:13px;padding:9px 20px;border-radius:var(--border-radius-md);border:0.5px solid var(--color-border-secondary);color:var(--color-text-secondary);background:var(--color-background-secondary);">
      <span>huggingface.co/</span><code style="font-family:var(--font-mono);font-size:12px;">teamzero/astrox</code>
    </div>
  </div>

  <div style="display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:12px;margin-bottom:2.5rem;">
    <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem;text-align:center;">
      <div style="font-size:22px;font-weight:500;margin-bottom:3px;">671B</div>
      <div style="font-size:12px;color:var(--color-text-secondary);">Total params</div>
    </div>
    <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem;text-align:center;">
      <div style="font-size:22px;font-weight:500;margin-bottom:3px;">37B</div>
      <div style="font-size:12px;color:var(--color-text-secondary);">Active per token</div>
    </div>
    <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem;text-align:center;">
      <div style="font-size:22px;font-weight:500;margin-bottom:3px;">128K</div>
      <div style="font-size:12px;color:var(--color-text-secondary);">Context window</div>
    </div>
    <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem;text-align:center;">
      <div style="font-size:22px;font-weight:500;margin-bottom:3px;">2.79M</div>
      <div style="font-size:12px;color:var(--color-text-secondary);">H800 GPU hours</div>
    </div>
  </div>

  <hr style="border:none;border-top:0.5px solid var(--color-border-tertiary);margin:2.5rem 0;">

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Model</div>
    <div style="background:var(--color-background-primary);border:0.5px solid var(--color-border-secondary);border-radius:var(--border-radius-lg);padding:1.5rem;">
      <div style="font-size:22px;font-weight:500;margin-bottom:4px;">AstroX</div>
      <div style="font-size:14px;color:var(--color-text-secondary);margin-bottom:1rem;line-height:1.6;">Instruction-tuned chat model with reinforcement learning and advanced long-chain-of-thought reasoning distillation. The only available model in the AstroX family.</div>
      <div style="display:flex;gap:6px;flex-wrap:wrap;margin-bottom:1.25rem;">
        <span style="font-size:11px;padding:3px 10px;border-radius:99px;background:#EEEDFE;color:#3C3489;">MoE</span>
        <span style="font-size:11px;padding:3px 10px;border-radius:99px;background:#E6F1FB;color:#0C447C;">671B / 37B active</span>
        <span style="font-size:11px;padding:3px 10px;border-radius:99px;background:#E1F5EE;color:#085041;">128K context</span>
        <span style="font-size:11px;padding:3px 10px;border-radius:99px;background:#FAEEDA;color:#633806;">FP8 weights</span>
      </div>
      <div style="border-top:0.5px solid var(--color-border-tertiary);padding-top:1rem;display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:12px;">
        <div><div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Architecture</div><div style="font-size:13px;font-weight:500;">MoE + Multi-head Latent Attention</div></div>
        <div><div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Experts</div><div style="font-size:13px;font-weight:500;">256 total · 8 active</div></div>
        <div><div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Pre-training data</div><div style="font-size:13px;font-weight:500;">14.8T tokens</div></div>
        <div><div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">License</div><div style="font-size:13px;font-weight:500;">MIT + Model Agreement</div></div>
      </div>
    </div>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Architecture highlights</div>
    <div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Attention</div>
        <div style="font-size:14px;font-weight:500;margin-bottom:4px;">Multi-head Latent Attention (MLA)</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">Reduces KV cache memory footprint significantly vs. standard MHA, enabling practical long-context inference.</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Load balancing</div>
        <div style="font-size:14px;font-weight:500;margin-bottom:4px;">Auxiliary-loss-free strategy</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">Balances expert load without the performance penalty of traditional auxiliary loss terms.</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Training objective</div>
        <div style="font-size:14px;font-weight:500;margin-bottom:4px;">Multi-Token Prediction (MTP)</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">Predicts multiple future tokens simultaneously, boosting performance and enabling speculative decoding.</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:11px;color:var(--color-text-secondary);margin-bottom:3px;">Post-training</div>
        <div style="font-size:14px;font-weight:500;margin-bottom:4px;">Reasoning distillation</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">Verification and reflection patterns distilled from a long-CoT model, keeping output style and length controlled.</div>
      </div>
    </div>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Key innovations</div>
    <div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(190px,1fr));gap:12px;">
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:14px;font-weight:500;margin-bottom:5px;">FP8 mixed precision</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">First validated large-scale FP8 training. Cuts compute cost without quality loss.</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:14px;font-weight:500;margin-bottom:5px;">Zero training instability</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">No irrecoverable loss spikes and no rollbacks throughout the entire pre-training run.</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:14px;font-weight:500;margin-bottom:5px;">Full comm/compute overlap</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">Co-designed algorithms and hardware nearly eliminate cross-node MoE communication bottlenecks.</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.1rem;">
        <div style="font-size:14px;font-weight:500;margin-bottom:5px;">Speculative decoding ready</div>
        <div style="font-size:13px;color:var(--color-text-secondary);line-height:1.55;">The MTP module doubles as a draft head for inference acceleration out of the box.</div>
      </div>
    </div>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Benchmark performance — math &amp; reasoning</div>
    <table style="width:100%;border-collapse:collapse;font-size:13px;table-layout:fixed;">
      <tr>
        <th style="text-align:left;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);width:44%;">Benchmark</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">GPT-4o</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">Claude 3.5 Sonnet</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">AstroX</th>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">AIME 2024 (Pass@1)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">9.3</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">16.0</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">39.2</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">MATH-500 (EM)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">74.6</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">78.3</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">90.2</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">CNMO 2024 (Pass@1)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">10.8</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">13.1</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">43.2</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;">GSM8K (EM)</td>
        <td style="padding:7px 10px;text-align:right;">—</td>
        <td style="padding:7px 10px;text-align:right;">—</td>
        <td style="padding:7px 10px;text-align:right;font-weight:500;color:#1D9E75;">89.3</td>
      </tr>
    </table>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Benchmark performance — code</div>
    <table style="width:100%;border-collapse:collapse;font-size:13px;table-layout:fixed;">
      <tr>
        <th style="text-align:left;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);width:44%;">Benchmark</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">GPT-4o</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">Claude 3.5 Sonnet</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">AstroX</th>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">LiveCodeBench (Pass@1)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">34.2</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">32.8</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">37.6</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">Codeforces (Percentile)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">23.6</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">20.3</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">51.6</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">Aider-Polyglot (Acc.)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">16.0</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">45.3</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">49.6</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;">HumanEval-Mul (Pass@1)</td>
        <td style="padding:7px 10px;text-align:right;">80.5</td>
        <td style="padding:7px 10px;text-align:right;">81.7</td>
        <td style="padding:7px 10px;text-align:right;font-weight:500;color:#1D9E75;">82.6</td>
      </tr>
    </table>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Benchmark performance — general</div>
    <table style="width:100%;border-collapse:collapse;font-size:13px;table-layout:fixed;">
      <tr>
        <th style="text-align:left;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);width:44%;">Benchmark</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">GPT-4o</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">Claude 3.5 Sonnet</th>
        <th style="text-align:right;padding:7px 10px;color:var(--color-text-secondary);font-weight:500;border-bottom:0.5px solid var(--color-border-tertiary);">AstroX</th>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">MMLU (EM)</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">87.2</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">88.3</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">88.5</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">Arena-Hard</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">80.4</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">85.2</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">85.5</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);">AlpacaEval 2.0</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">51.1</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;">52.0</td>
        <td style="padding:7px 10px;border-bottom:0.5px solid var(--color-border-tertiary);text-align:right;font-weight:500;color:#1D9E75;">70.0</td>
      </tr>
      <tr>
        <td style="padding:7px 10px;">DROP (3-shot F1)</td>
        <td style="padding:7px 10px;text-align:right;">83.7</td>
        <td style="padding:7px 10px;text-align:right;">88.3</td>
        <td style="padding:7px 10px;text-align:right;font-weight:500;color:#1D9E75;">91.6</td>
      </tr>
    </table>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Supported inference frameworks</div>
    <div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(140px,1fr));gap:8px;">
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:10px 14px;">
        <div style="font-size:13px;font-weight:500;margin-bottom:2px;">SGLang</div>
        <div style="font-size:11px;color:var(--color-text-secondary);">Recommended · FP8 + BF16 · NVIDIA + AMD</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:10px 14px;">
        <div style="font-size:13px;font-weight:500;margin-bottom:2px;">vLLM</div>
        <div style="font-size:11px;color:var(--color-text-secondary);">FP8 + BF16 · pipeline parallelism</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:10px 14px;">
        <div style="font-size:13px;font-weight:500;margin-bottom:2px;">LMDeploy</div>
        <div style="font-size:11px;color:var(--color-text-secondary);">Offline + online · PyTorch-native</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:10px 14px;">
        <div style="font-size:13px;font-weight:500;margin-bottom:2px;">TensorRT-LLM</div>
        <div style="font-size:11px;color:var(--color-text-secondary);">BF16 · INT4/INT8 quant</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:10px 14px;">
        <div style="font-size:13px;font-weight:500;margin-bottom:2px;">AMD GPU</div>
        <div style="font-size:11px;color:var(--color-text-secondary);">via SGLang · FP8 + BF16</div>
      </div>
      <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:10px 14px;">
        <div style="font-size:13px;font-weight:500;margin-bottom:2px;">Huawei Ascend</div>
        <div style="font-size:11px;color:var(--color-text-secondary);">via MindIE · BF16</div>
      </div>
    </div>
  </div>

  <div style="margin-bottom:2.5rem;">
    <div style="font-size:11px;font-weight:500;color:var(--color-text-secondary);text-transform:uppercase;letter-spacing:0.09em;margin-bottom:1rem;">Quick start</div>
    <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.25rem;margin-bottom:8px;">
      <div style="font-size:12px;color:var(--color-text-secondary);margin-bottom:8px;">Convert FP8 weights to BF16</div>
      <code style="font-family:var(--font-mono);font-size:12px;color:var(--color-text-primary);display:block;line-height:1.8;">python fp8_cast_bf16.py \<br>&nbsp;&nbsp;--input-fp8-hf-path /path/to/fp8_weights \<br>&nbsp;&nbsp;--output-bf16-hf-path /path/to/bf16_weights</code>
    </div>
    <div style="background:var(--color-background-secondary);border-radius:var(--border-radius-md);padding:1rem 1.25rem;">
      <div style="font-size:12px;color:var(--color-text-secondary);margin-bottom:8px;">Run interactive inference (2 nodes · 8 GPUs each)</div>
      <code style="font-family:var(--font-mono);font-size:12px;color:var(--color-text-primary);display:block;line-height:1.8;">torchrun --nnodes 2 --nproc-per-node 8 generate.py \<br>&nbsp;&nbsp;--node-rank $RANK --master-addr $ADDR \<br>&nbsp;&nbsp;--ckpt-path /path/to/AstroX \<br>&nbsp;&nbsp;--config configs/config_671B.json \<br>&nbsp;&nbsp;--interactive --temperature 0.7 --max-new-tokens 200</code>
    </div>
  </div>

  <div style="text-align:center;padding-top:2rem;font-size:12px;color:var(--color-text-secondary);line-height:2;">
    Code license: MIT &nbsp;·&nbsp; Model license: Model Agreement &nbsp;·&nbsp; Commercial use supported<br>
    huggingface.co/teamzero/astrox
  </div>

</div>