teamzero commited on
Commit
661b5bb
·
verified ·
1 Parent(s): 6d148a9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +171 -119
README.md CHANGED
@@ -1,75 +1,82 @@
1
 
2
  <style>
3
- * { box-sizing: border-box; margin: 0; padding: 0; }
4
- .page { max-width: 860px; margin: 0 auto; padding: 2rem 1rem; font-family: var(--font-sans); color: var(--color-text-primary); }
5
- .hero { text-align: center; padding: 3rem 1rem 2.5rem; }
6
- .hero img { height: 68px; object-fit: contain; margin-bottom: 1.5rem; }
7
- .hero h1 { font-size: 30px; font-weight: 500; letter-spacing: -0.5px; margin-bottom: 0.5rem; }
8
- .hero p { font-size: 15px; color: var(--color-text-secondary); max-width: 560px; margin: 0 auto 1.75rem; line-height: 1.7; }
9
- .badges { display: flex; gap: 8px; justify-content: center; flex-wrap: wrap; margin-bottom: 1.5rem; }
10
- .badge { font-size: 11px; padding: 4px 10px; border-radius: 99px; border: 0.5px solid var(--color-border-secondary); color: var(--color-text-secondary); }
11
- .badge.accent { background: #0d0d1e; color: #a8b0ff; border-color: #3a3d88; }
12
- @media (prefers-color-scheme: dark) { .badge.accent { background: #1a1a3a; color: #c0c8ff; border-color: #4a4d98; } }
13
- .links { display: flex; gap: 10px; justify-content: center; flex-wrap: wrap; }
14
- .link-btn { font-size: 13px; padding: 8px 18px; border-radius: var(--border-radius-md); border: 0.5px solid var(--color-border-secondary); color: var(--color-text-primary); text-decoration: none; cursor: pointer; background: var(--color-background-secondary); }
15
- .link-btn.primary { background: #0d0d1e; color: #c0c8ff; border-color: #3a3d88; }
16
- @media (prefers-color-scheme: dark) { .link-btn.primary { background: #1a1a3a; } }
17
- .stat-row { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin: 2rem 0; }
18
- .stat { background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem; text-align: center; }
19
- .stat .val { font-size: 22px; font-weight: 500; margin-bottom: 4px; }
20
- .stat .lbl { font-size: 12px; color: var(--color-text-secondary); }
21
- .divider { border: none; border-top: 0.5px solid var(--color-border-tertiary); margin: 2rem 0; }
22
- .section-title { font-size: 12px; font-weight: 500; color: var(--color-text-secondary); text-transform: uppercase; letter-spacing: 0.08em; margin-bottom: 1rem; }
23
- .model-card { background: var(--color-background-primary); border: 0.5px solid var(--color-border-secondary); border-radius: var(--border-radius-lg); padding: 1.5rem; display: flex; align-items: flex-start; justify-content: space-between; gap: 1.5rem; flex-wrap: wrap; }
24
- .model-info .name { font-size: 20px; font-weight: 500; margin-bottom: 4px; }
25
- .model-info .sub { font-size: 14px; color: var(--color-text-secondary); margin-bottom: 14px; }
26
- .pill-row { display: flex; gap: 6px; flex-wrap: wrap; }
27
- .pill { font-size: 11px; padding: 3px 9px; border-radius: 99px; }
28
- .pill.blue { background: #e6f1fb; color: #0c447c; }
29
- .pill.teal { background: #e1f5ee; color: #085041; }
30
- .pill.amber { background: #faeeda; color: #633806; }
31
- .pill.purple { background: #EEEDFE; color: #3C3489; }
32
- @media (prefers-color-scheme: dark) {
33
- .pill.blue { background: #0c447c; color: #b5d4f4; }
34
- .pill.teal { background: #085041; color: #9fe1cb; }
35
- .pill.amber { background: #633806; color: #fac775; }
36
- .pill.purple { background: #3C3489; color: #CECBF6; }
37
- }
38
- .dl-btn { display: inline-block; font-size: 13px; padding: 8px 18px; border-radius: var(--border-radius-md); border: 0.5px solid var(--color-border-secondary); color: var(--color-text-primary); cursor: pointer; background: var(--color-background-secondary); text-decoration: none; white-space: nowrap; }
39
- .feature-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(190px, 1fr)); gap: 12px; }
40
- .feature-card { background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem; }
41
- .feature-card .ft { font-size: 14px; font-weight: 500; margin-bottom: 4px; }
42
- .feature-card .fd { font-size: 13px; color: var(--color-text-secondary); line-height: 1.5; }
43
- .tabs { display: flex; gap: 0; border-bottom: 0.5px solid var(--color-border-tertiary); margin-bottom: 1.5rem; }
44
- .tab { font-size: 13px; padding: 8px 16px; cursor: pointer; color: var(--color-text-secondary); border-bottom: 2px solid transparent; margin-bottom: -1px; }
45
- .tab.active { color: var(--color-text-primary); border-bottom-color: var(--color-text-primary); }
46
- .tab-content { display: none; }
47
- .tab-content.active { display: block; }
48
- .benchmark-table { width: 100%; border-collapse: collapse; font-size: 13px; }
49
- .benchmark-table th { text-align: left; padding: 8px 12px; color: var(--color-text-secondary); font-weight: 500; border-bottom: 0.5px solid var(--color-border-tertiary); }
50
- .benchmark-table td { padding: 8px 12px; border-bottom: 0.5px solid var(--color-border-tertiary); }
51
- .benchmark-table tr:last-child td { border-bottom: none; }
52
- .best { font-weight: 500; color: #1D9E75; }
53
- .footer { text-align: center; padding: 2rem 0 1rem; font-size: 12px; color: var(--color-text-secondary); }
 
 
 
 
 
 
 
 
 
54
  </style>
55
 
56
  <div class="page">
57
 
58
  <div class="hero">
59
  <img src="https://i.ibb.co/rGS6dBcf/logo-Astro-X.png" alt="AstroX AI">
 
60
  <div class="badges">
61
- <span class="badge accent">MoE Architecture</span>
62
  <span class="badge">671B total params</span>
63
  <span class="badge">37B activated per token</span>
64
  <span class="badge">128K context</span>
 
65
  <span class="badge">MIT License</span>
66
  </div>
67
- <p>A frontier-class Mixture-of-Experts language model. Competitive with leading closed-source models — efficient, powerful, and fully open-source.</p>
68
- <div class="links">
69
- <a class="link-btn primary" href="https://huggingface.co/teamzero/astrox">HuggingFace</a>
70
- <a class="link-btn" href="https://chat.deepseek.com/">Chat Demo</a>
71
- <a class="link-btn" href="https://platform.deepseek.com/">API Platform</a>
72
- <a class="link-btn" href="https://arxiv.org/abs/2412.19437">Paper</a>
73
  </div>
74
  </div>
75
 
@@ -77,94 +84,139 @@
77
  <div class="stat"><div class="val">671B</div><div class="lbl">Total params</div></div>
78
  <div class="stat"><div class="val">37B</div><div class="lbl">Active per token</div></div>
79
  <div class="stat"><div class="val">128K</div><div class="lbl">Context window</div></div>
80
- <div class="stat"><div class="val">2.79M</div><div class="lbl">GPU hours</div></div>
81
  </div>
82
 
83
  <hr class="divider">
84
 
85
- <div style="margin-bottom: 2.5rem;">
86
- <div class="section-title">Model</div>
87
  <div class="model-card">
88
- <div class="model-info">
89
- <div class="name">AstroX</div>
90
- <div class="sub">Instruction-tuned · Reinforcement Learning · R1 reasoning distillation</div>
91
- <div class="pill-row">
92
- <span class="pill blue">671B / 37B active</span>
93
- <span class="pill teal">128K context</span>
94
- <span class="pill amber">FP8</span>
95
- <span class="pill purple">MoE</span>
96
- </div>
 
 
 
 
97
  </div>
98
- <a class="dl-btn" href="https://huggingface.co/teamzero/astrox">Download on HuggingFace</a>
99
  </div>
100
  </div>
101
 
102
- <div style="margin-bottom: 2.5rem;">
103
- <div class="section-title">Key innovations</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  <div class="feature-grid">
105
  <div class="feature-card">
106
- <div class="ft">Auxiliary-loss-free balancing</div>
107
- <div class="fd">Novel MoE load balancing that minimizes performance degradation during routing.</div>
108
  </div>
109
  <div class="feature-card">
110
- <div class="ft">Multi-token prediction</div>
111
- <div class="fd">MTP training objective boosts performance and enables speculative decoding.</div>
112
  </div>
113
  <div class="feature-card">
114
- <div class="ft">FP8 mixed precision</div>
115
- <div class="fd">First validated large-scale FP8 training lower cost, no quality loss.</div>
116
  </div>
117
  <div class="feature-card">
118
- <div class="ft">R1 reasoning distillation</div>
119
- <div class="fd">Distills long-chain-of-thought reasoning from DeepSeek-R1 into a standard LLM.</div>
120
  </div>
121
  </div>
122
  </div>
123
 
124
- <div style="margin-bottom: 2.5rem;">
125
- <div class="section-title">Benchmark highlights</div>
126
- <div class="tabs">
127
- <div class="tab active" onclick="switchTab(this,'math')">Math & Reasoning</div>
128
- <div class="tab" onclick="switchTab(this,'code')">Code</div>
129
- <div class="tab" onclick="switchTab(this,'general')">General</div>
130
- </div>
131
- <div id="math" class="tab-content active">
132
- <table class="benchmark-table">
133
- <tr><th>Benchmark</th><th>GPT-4o</th><th>Claude 3.5 Sonnet</th><th>AstroX</th></tr>
134
- <tr><td>AIME 2024 (Pass@1)</td><td>9.3</td><td>16.0</td><td class="best">39.2</td></tr>
135
- <tr><td>MATH-500 (EM)</td><td>74.6</td><td>78.3</td><td class="best">90.2</td></tr>
136
- <tr><td>CNMO 2024 (Pass@1)</td><td>10.8</td><td>13.1</td><td class="best">43.2</td></tr>
137
- </table>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  </div>
139
- <div id="code" class="tab-content">
140
- <table class="benchmark-table">
141
- <tr><th>Benchmark</th><th>GPT-4o</th><th>Claude 3.5 Sonnet</th><th>AstroX</th></tr>
142
- <tr><td>LiveCodeBench (Pass@1)</td><td>34.2</td><td>32.8</td><td class="best">37.6</td></tr>
143
- <tr><td>Codeforces (Percentile)</td><td>23.6</td><td>20.3</td><td class="best">51.6</td></tr>
144
- <tr><td>Aider-Polyglot (Acc.)</td><td>16.0</td><td>45.3</td><td class="best">49.6</td></tr>
145
- </table>
146
  </div>
147
- <div id="general" class="tab-content">
148
- <table class="benchmark-table">
149
- <tr><th>Benchmark</th><th>GPT-4o</th><th>Claude 3.5 Sonnet</th><th>AstroX</th></tr>
150
- <tr><td>MMLU (EM)</td><td>87.2</td><td>88.3</td><td class="best">88.5</td></tr>
151
- <tr><td>Arena-Hard</td><td>80.4</td><td>85.2</td><td class="best">85.5</td></tr>
152
- <tr><td>AlpacaEval 2.0</td><td>51.1</td><td>52.0</td><td class="best">70.0</td></tr>
153
- </table>
154
  </div>
155
  </div>
156
 
157
  <div class="footer">
158
- Code: MIT License &nbsp;·&nbsp; Model: Model Agreement &nbsp;·&nbsp; Commercial use supported
 
159
  </div>
160
 
161
  </div>
162
-
163
- <script>
164
- function switchTab(el, id) {
165
- document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
166
- document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
167
- el.classList.add('active');
168
- document.getElementById(id).classList.add('active');
169
- }
170
- </script>
 
1
 
2
  <style>
3
+ * { box-sizing: border-box; margin: 0; padding: 0; }
4
+ body { font-family: var(--font-sans); color: var(--color-text-primary); }
5
+ .page { max-width: 860px; margin: 0 auto; padding: 2rem 1rem 3rem; }
6
+ .hero { text-align: center; padding: 3rem 1rem 2.5rem; }
7
+ .hero img { height: 70px; object-fit: contain; margin-bottom: 1.75rem; display: block; margin-left: auto; margin-right: auto; }
8
+ .hero h1 { font-size: 32px; font-weight: 500; letter-spacing: -0.5px; margin-bottom: 0.5rem; }
9
+ .hero p { font-size: 15px; color: var(--color-text-secondary); max-width: 560px; margin: 0 auto 1.75rem; line-height: 1.7; }
10
+ .badges { display: flex; gap: 8px; justify-content: center; flex-wrap: wrap; margin-bottom: 1.75rem; }
11
+ .badge { font-size: 11px; padding: 4px 11px; border-radius: 99px; border: 0.5px solid var(--color-border-secondary); color: var(--color-text-secondary); }
12
+ .badge.purple { background: #EEEDFE; color: #3C3489; border-color: #AFA9EC; }
13
+ @media (prefers-color-scheme: dark) { .badge.purple { background: #3C3489; color: #CECBF6; border-color: #534AB7; } }
14
+ .hugging { display: inline-flex; align-items: center; gap: 8px; font-size: 13px; padding: 9px 20px; border-radius: var(--border-radius-md); border: 0.5px solid var(--color-border-secondary); color: var(--color-text-secondary); background: var(--color-background-secondary); }
15
+ .hugging code { font-family: var(--font-mono); font-size: 12px; color: var(--color-text-secondary); }
16
+ .divider { border: none; border-top: 0.5px solid var(--color-border-tertiary); margin: 2.5rem 0; }
17
+ .section { margin-bottom: 2.5rem; }
18
+ .section-label { font-size: 11px; font-weight: 500; color: var(--color-text-secondary); text-transform: uppercase; letter-spacing: 0.09em; margin-bottom: 1rem; }
19
+ .stat-row { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 12px; margin-bottom: 2.5rem; }
20
+ .stat { background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem; text-align: center; }
21
+ .stat .val { font-size: 22px; font-weight: 500; margin-bottom: 3px; }
22
+ .stat .lbl { font-size: 12px; color: var(--color-text-secondary); }
23
+ .model-card { background: var(--color-background-primary); border: 0.5px solid var(--color-border-secondary); border-radius: var(--border-radius-lg); padding: 1.5rem; }
24
+ .model-name { font-size: 22px; font-weight: 500; margin-bottom: 4px; }
25
+ .model-sub { font-size: 14px; color: var(--color-text-secondary); margin-bottom: 1rem; line-height: 1.6; }
26
+ .pill-row { display: flex; gap: 6px; flex-wrap: wrap; margin-bottom: 1.25rem; }
27
+ .pill { font-size: 11px; padding: 3px 10px; border-radius: 99px; }
28
+ .pill.blue { background: #E6F1FB; color: #0C447C; }
29
+ .pill.teal { background: #E1F5EE; color: #085041; }
30
+ .pill.amber { background: #FAEEDA; color: #633806; }
31
+ .pill.purple { background: #EEEDFE; color: #3C3489; }
32
+ @media (prefers-color-scheme: dark) {
33
+ .pill.blue { background: #0C447C; color: #B5D4F4; }
34
+ .pill.teal { background: #085041; color: #9FE1CB; }
35
+ .pill.amber { background: #633806; color: #FAC775; }
36
+ .pill.purple { background: #3C3489; color: #CECBF6; }
37
+ }
38
+ .model-meta { border-top: 0.5px solid var(--color-border-tertiary); padding-top: 1rem; display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 12px; }
39
+ .meta-item .mk { font-size: 11px; color: var(--color-text-secondary); margin-bottom: 3px; }
40
+ .meta-item .mv { font-size: 13px; font-weight: 500; }
41
+ .feature-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(190px, 1fr)); gap: 12px; }
42
+ .feature-card { background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem 1.1rem; }
43
+ .feature-card .ft { font-size: 14px; font-weight: 500; margin-bottom: 5px; }
44
+ .feature-card .fd { font-size: 13px; color: var(--color-text-secondary); line-height: 1.55; }
45
+ .bench-section { margin-bottom: 1.5rem; }
46
+ .bench-title { font-size: 13px; font-weight: 500; margin-bottom: 10px; padding-bottom: 6px; border-bottom: 0.5px solid var(--color-border-tertiary); }
47
+ table { width: 100%; border-collapse: collapse; font-size: 13px; table-layout: fixed; }
48
+ th { text-align: left; padding: 7px 10px; color: var(--color-text-secondary); font-weight: 500; border-bottom: 0.5px solid var(--color-border-tertiary); }
49
+ td { padding: 7px 10px; border-bottom: 0.5px solid var(--color-border-tertiary); }
50
+ tr:last-child td { border-bottom: none; }
51
+ td:not(:first-child), th:not(:first-child) { text-align: right; }
52
+ .best { font-weight: 500; color: #1D9E75; }
53
+ .framework-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(140px, 1fr)); gap: 8px; }
54
+ .fw { background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 10px 14px; font-size: 13px; }
55
+ .fw .fwn { font-weight: 500; margin-bottom: 2px; }
56
+ .fw .fwd { font-size: 11px; color: var(--color-text-secondary); }
57
+ .arch-row { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
58
+ .arch-card { background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem 1.1rem; }
59
+ .arch-card .ak { font-size: 11px; color: var(--color-text-secondary); margin-bottom: 3px; }
60
+ .arch-card .av { font-size: 14px; font-weight: 500; }
61
+ .arch-card .ad { font-size: 12px; color: var(--color-text-secondary); margin-top: 4px; line-height: 1.5; }
62
+ .footer { text-align: center; padding-top: 2rem; font-size: 12px; color: var(--color-text-secondary); line-height: 2; }
63
  </style>
64
 
65
  <div class="page">
66
 
67
  <div class="hero">
68
  <img src="https://i.ibb.co/rGS6dBcf/logo-Astro-X.png" alt="AstroX AI">
69
+ <p>A frontier-class Mixture-of-Experts language model — competitive with leading closed-source models at a fraction of the training cost. Fully open-source and commercially licensed.</p>
70
  <div class="badges">
71
+ <span class="badge purple">Mixture-of-Experts</span>
72
  <span class="badge">671B total params</span>
73
  <span class="badge">37B activated per token</span>
74
  <span class="badge">128K context</span>
75
+ <span class="badge">FP8 training</span>
76
  <span class="badge">MIT License</span>
77
  </div>
78
+ <div class="hugging">
79
+ <span style="font-size:13px; color: var(--color-text-secondary);">huggingface.co/</span><code>teamzero/astrox</code>
 
 
 
 
80
  </div>
81
  </div>
82
 
 
84
  <div class="stat"><div class="val">671B</div><div class="lbl">Total params</div></div>
85
  <div class="stat"><div class="val">37B</div><div class="lbl">Active per token</div></div>
86
  <div class="stat"><div class="val">128K</div><div class="lbl">Context window</div></div>
87
+ <div class="stat"><div class="val">2.79M</div><div class="lbl">H800 GPU hours</div></div>
88
  </div>
89
 
90
  <hr class="divider">
91
 
92
+ <div class="section">
93
+ <div class="section-label">Model</div>
94
  <div class="model-card">
95
+ <div class="model-name">AstroX</div>
96
+ <div class="model-sub">Instruction-tuned chat model with reinforcement learning and R1 long-chain-of-thought reasoning distillation. The only available model in the AstroX family.</div>
97
+ <div class="pill-row">
98
+ <span class="pill purple">MoE</span>
99
+ <span class="pill blue">671B / 37B active</span>
100
+ <span class="pill teal">128K context</span>
101
+ <span class="pill amber">FP8 weights</span>
102
+ </div>
103
+ <div class="model-meta">
104
+ <div class="meta-item"><div class="mk">Architecture</div><div class="mv">DeepSeekMoE + MLA</div></div>
105
+ <div class="meta-item"><div class="mk">Experts</div><div class="mv">256 total · 8 active</div></div>
106
+ <div class="meta-item"><div class="mk">Pre-training data</div><div class="mv">14.8T tokens</div></div>
107
+ <div class="meta-item"><div class="mk">License</div><div class="mv">MIT + Model Agreement</div></div>
108
  </div>
 
109
  </div>
110
  </div>
111
 
112
+ <div class="section">
113
+ <div class="section-label">Architecture highlights</div>
114
+ <div class="arch-row">
115
+ <div class="arch-card">
116
+ <div class="ak">Attention</div>
117
+ <div class="av">Multi-head Latent Attention (MLA)</div>
118
+ <div class="ad">Reduces KV cache memory footprint significantly vs. standard MHA, enabling practical long-context inference.</div>
119
+ </div>
120
+ <div class="arch-card">
121
+ <div class="ak">Load balancing</div>
122
+ <div class="av">Auxiliary-loss-free strategy</div>
123
+ <div class="ad">Balances expert load without the performance penalty of traditional auxiliary loss terms.</div>
124
+ </div>
125
+ <div class="arch-card">
126
+ <div class="ak">Training objective</div>
127
+ <div class="av">Multi-Token Prediction (MTP)</div>
128
+ <div class="ad">Predicts multiple future tokens simultaneously, improving performance and enabling speculative decoding.</div>
129
+ </div>
130
+ <div class="arch-card">
131
+ <div class="ak">Post-training</div>
132
+ <div class="av">R1 reasoning distillation</div>
133
+ <div class="ad">Verification and reflection patterns from DeepSeek-R1 are distilled into the model while keeping output style controlled.</div>
134
+ </div>
135
+ </div>
136
+ </div>
137
+
138
+ <div class="section">
139
+ <div class="section-label">Key innovations</div>
140
  <div class="feature-grid">
141
  <div class="feature-card">
142
+ <div class="ft">FP8 mixed precision</div>
143
+ <div class="fd">First large-scale FP8 training validated on a 671B model. Cuts compute cost without quality loss.</div>
144
  </div>
145
  <div class="feature-card">
146
+ <div class="ft">Zero training instability</div>
147
+ <div class="fd">No irrecoverable loss spikes and no rollbacks throughout the entire pre-training run.</div>
148
  </div>
149
  <div class="feature-card">
150
+ <div class="ft">Full comm/compute overlap</div>
151
+ <div class="fd">Co-designed algorithms and hardware nearly eliminate the communication bottleneck in cross-node MoE training.</div>
152
  </div>
153
  <div class="feature-card">
154
+ <div class="ft">Speculative decoding ready</div>
155
+ <div class="fd">The MTP module can be repurposed as a draft head for inference acceleration out of the box.</div>
156
  </div>
157
  </div>
158
  </div>
159
 
160
+ <div class="section">
161
+ <div class="section-label">Benchmark performance — math &amp; reasoning</div>
162
+ <table>
163
+ <tr><th style="width:44%">Benchmark</th><th>GPT-4o</th><th>Claude 3.5 Sonnet</th><th>AstroX</th></tr>
164
+ <tr><td>AIME 2024 (Pass@1)</td><td>9.3</td><td>16.0</td><td class="best">39.2</td></tr>
165
+ <tr><td>MATH-500 (EM)</td><td>74.6</td><td>78.3</td><td class="best">90.2</td></tr>
166
+ <tr><td>CNMO 2024 (Pass@1)</td><td>10.8</td><td>13.1</td><td class="best">43.2</td></tr>
167
+ <tr><td>GSM8K (EM)</td><td>—</td><td>—</td><td class="best">89.3</td></tr>
168
+ </table>
169
+ </div>
170
+
171
+ <div class="section">
172
+ <div class="section-label">Benchmark performance — code</div>
173
+ <table>
174
+ <tr><th style="width:44%">Benchmark</th><th>GPT-4o</th><th>Claude 3.5 Sonnet</th><th>AstroX</th></tr>
175
+ <tr><td>LiveCodeBench (Pass@1)</td><td>34.2</td><td>32.8</td><td class="best">37.6</td></tr>
176
+ <tr><td>Codeforces (Percentile)</td><td>23.6</td><td>20.3</td><td class="best">51.6</td></tr>
177
+ <tr><td>Aider-Polyglot (Acc.)</td><td>16.0</td><td>45.3</td><td class="best">49.6</td></tr>
178
+ <tr><td>HumanEval-Mul (Pass@1)</td><td>80.5</td><td>81.7</td><td class="best">82.6</td></tr>
179
+ </table>
180
+ </div>
181
+
182
+ <div class="section">
183
+ <div class="section-label">Benchmark performance — general</div>
184
+ <table>
185
+ <tr><th style="width:44%">Benchmark</th><th>GPT-4o</th><th>Claude 3.5 Sonnet</th><th>AstroX</th></tr>
186
+ <tr><td>MMLU (EM)</td><td>87.2</td><td>88.3</td><td class="best">88.5</td></tr>
187
+ <tr><td>Arena-Hard</td><td>80.4</td><td>85.2</td><td class="best">85.5</td></tr>
188
+ <tr><td>AlpacaEval 2.0</td><td>51.1</td><td>52.0</td><td class="best">70.0</td></tr>
189
+ <tr><td>DROP (3-shot F1)</td><td>83.7</td><td>88.3</td><td class="best">91.6</td></tr>
190
+ </table>
191
+ </div>
192
+
193
+ <div class="section">
194
+ <div class="section-label">Supported inference frameworks</div>
195
+ <div class="framework-grid">
196
+ <div class="fw"><div class="fwn">SGLang</div><div class="fwd">Recommended · FP8 + BF16 · NVIDIA + AMD</div></div>
197
+ <div class="fw"><div class="fwn">vLLM</div><div class="fwd">FP8 + BF16 · pipeline parallelism</div></div>
198
+ <div class="fw"><div class="fwn">LMDeploy</div><div class="fwd">Offline + online · PyTorch-native</div></div>
199
+ <div class="fw"><div class="fwn">TensorRT-LLM</div><div class="fwd">BF16 · INT4/INT8 quant</div></div>
200
+ <div class="fw"><div class="fwn">AMD GPU</div><div class="fwd">via SGLang · FP8 + BF16</div></div>
201
+ <div class="fw"><div class="fwn">Huawei Ascend</div><div class="fwd">via MindIE · BF16</div></div>
202
  </div>
203
+ </div>
204
+
205
+ <div class="section">
206
+ <div class="section-label">Quick start</div>
207
+ <div style="background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem 1.25rem;">
208
+ <div style="font-size: 12px; color: var(--color-text-secondary); margin-bottom: 8px;">Convert FP8 weights to BF16</div>
209
+ <code style="font-family: var(--font-mono); font-size: 12px; color: var(--color-text-primary); display: block; line-height: 1.8;">python fp8_cast_bf16.py \<br>&nbsp;&nbsp;--input-fp8-hf-path /path/to/fp8_weights \<br>&nbsp;&nbsp;--output-bf16-hf-path /path/to/bf16_weights</code>
210
  </div>
211
+ <div style="background: var(--color-background-secondary); border-radius: var(--border-radius-md); padding: 1rem 1.25rem; margin-top: 8px;">
212
+ <div style="font-size: 12px; color: var(--color-text-secondary); margin-bottom: 8px;">Run interactive inference (2 nodes · 8 GPUs each)</div>
213
+ <code style="font-family: var(--font-mono); font-size: 12px; color: var(--color-text-primary); display: block; line-height: 1.8;">torchrun --nnodes 2 --nproc-per-node 8 generate.py \<br>&nbsp;&nbsp;--node-rank $RANK --master-addr $ADDR \<br>&nbsp;&nbsp;--ckpt-path /path/to/AstroX \<br>&nbsp;&nbsp;--config configs/config_671B.json \<br>&nbsp;&nbsp;--interactive --temperature 0.7 --max-new-tokens 200</code>
 
 
 
 
214
  </div>
215
  </div>
216
 
217
  <div class="footer">
218
+ Code license: MIT &nbsp;·&nbsp; Model license: Model Agreement &nbsp;·&nbsp; Commercial use supported<br>
219
+ huggingface.co/teamzero/astrox
220
  </div>
221
 
222
  </div>