OrlandoHugBot commited on
Commit
5d15719
Β·
verified Β·
1 Parent(s): 95ea503

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +148 -77
README.md CHANGED
@@ -55,134 +55,205 @@ license: mit
55
  <table>
56
  <thead>
57
  <tr>
58
- <th></th>
59
- <th align="center"><strong>Vision</strong></th>
60
- <th align="center" colspan="3"><strong>Reasoning</strong></th>
61
- <th align="center" colspan="3"><strong>Vision</strong></th>
62
  </tr>
63
  <tr>
64
  <th></th>
65
  <th></th>
66
- <th align="center"><strong>MATH-500</strong></th>
67
- <th align="center"><strong>AIME 2024</strong></th>
68
- <th align="center"><strong>GPQA</strong></th>
69
- <th align="center"><strong>MathVista(mini)</strong></th>
70
- <th align="center"><strong>MMMU(Val)</strong></th>
 
 
 
 
 
 
 
 
71
  </tr>
72
  <tr>
73
  <th></th>
74
  <th></th>
 
 
 
 
 
75
  <th align="center">pass@1</th>
76
  <th align="center">pass@1</th>
77
  <th align="center">pass@1</th>
78
- <th align="center">pass@1</th>
79
- <th align="center">pass@1</th>
 
 
 
80
  </tr>
81
  </thead>
82
  <tbody>
83
  <tr>
84
- <td>Qwen2.5-72B-Instruct</td>
85
- <td align="center">❌</td>
86
- <td align="center">80.0</td>
87
- <td align="center">23.3</td>
 
 
 
 
 
 
 
 
88
  <td align="center">49.0</td>
89
- <td align="center">-</td>
90
- <td align="center">-</td>
91
  </tr>
92
  <tr>
93
- <td>Deepseek V3</td>
94
- <td align="center">❌</td>
95
- <td align="center">90.2</td>
96
- <td align="center">39.2</td>
97
- <td align="center">59.1</td>
98
- <td align="center">-</td>
99
- <td align="center">-</td>
 
 
 
 
 
 
 
 
100
  </tr>
101
  <tr>
102
- <td>Deepseek R1</td>
103
  <td align="center">❌</td>
 
 
 
 
 
104
  <td align="center">97.3</td>
105
  <td align="center">79.8</td>
106
  <td align="center">71.5</td>
107
- <td align="center">-</td>
108
- <td align="center">-</td>
 
 
 
109
  </tr>
110
  <tr>
111
- <td>Claude 3.5 Sonnet</td>
112
- <td align="center">βœ…</td>
113
- <td align="center">78.3</td>
114
- <td align="center">16.0</td>
115
- <td align="center">65.0</td>
116
- <td align="center">65.3</td>
117
- <td align="center">66.4</td>
 
 
 
 
 
 
 
 
118
  </tr>
119
  <tr>
120
- <td>GPT-4o</td>
121
  <td align="center">βœ…</td>
 
 
 
 
 
122
  <td align="center">74.6</td>
123
  <td align="center">9.3</td>
124
  <td align="center">49.9</td>
125
- <td align="center">63.8</td>
126
- <td align="center">69.1</td>
 
 
 
127
  </tr>
128
  <tr>
129
- <td>Kimi k1.5</td>
130
  <td align="center">βœ…</td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  <td align="center">96.2</td>
132
  <td align="center">77.5</td>
133
- <td align="center">-</td>
134
- <td align="center">74.9</td>
135
  <td align="center">70.0</td>
 
 
 
 
136
  </tr>
137
  <tr>
138
- <td>Qwen2.5-VL-72B-Instruct</td>
139
  <td align="center">βœ…</td>
140
- <td align="center">-</td>
141
- <td align="center">-</td>
142
- <td align="center">-</td>
143
- <td align="center">74.8</td>
 
 
 
 
144
  <td align="center">70.2</td>
 
 
 
 
145
  </tr>
146
  <tr>
147
- <td>LLaVA-Onevision-72B</td>
148
- <td align="center">βœ…</td>
149
- <td align="center">-</td>
150
- <td align="center">-</td>
151
- <td align="center">-</td>
152
- <td align="center">67.5</td>
153
- <td align="center">56.8</td>
154
- </tr>
155
- <tr>
156
- <td>InternVL2-Llama3-76B</td>
157
- <td align="center">βœ…</td>
158
- <td align="center">-</td>
159
- <td align="center">-</td>
160
- <td align="center">-</td>
161
- <td align="center">65.5</td>
162
- <td align="center">62.7</td>
163
- </tr>
164
- <tr>
165
- <td>InternVL2.5-78B</td>
166
  <td align="center">βœ…</td>
167
- <td align="center">-</td>
168
- <td align="center">-</td>
169
- <td align="center">-</td>
170
- <td align="center">72.3</td>
 
 
 
 
171
  <td align="center">70.1</td>
172
- </tr>
173
- <tr>
174
- <td>Skywork-R1V-38B</td>
175
- <td align="center">βœ…</td>
176
- <td align="center">94.0</td>
177
- <td align="center">72.0</td>
178
- <td align="center">61.6</td>
179
- <td align="center">67.5</td>
180
- <td align="center">69.0</td>
181
  </tr>
182
  </tbody>
183
  </table>
184
 
185
 
 
186
  ---
187
 
188
 
 
55
  <table>
56
  <thead>
57
  <tr>
58
+ <th>Model</th>
59
+ <th align="center"><strong>Supports Vision</strong></th>
60
+ <th align="center" colspan="8"><strong>TextΒ ReasoningΒ (%)</strong></th>
61
+ <th align="center" colspan="5"><strong>MultimodalΒ ReasoningΒ (%)</strong></th>
62
  </tr>
63
  <tr>
64
  <th></th>
65
  <th></th>
66
+ <th align="center">AIME24</th>
67
+ <th align="center">LiveCodebench</th>
68
+ <th align="center">liveBench</th>
69
+ <th align="center">IFEVAL</th>
70
+ <th align="center">BFCL</th>
71
+ <th align="center">MATH‑500</th>
72
+ <th align="center">AIMEΒ 2024</th>
73
+ <th align="center">GPQA</th>
74
+ <th align="center">MMMU(val)</th>
75
+ <th align="center">MathVista(mini)</th>
76
+ <th align="center">MathVision(mini)</th>
77
+ <th align="center">OlympiadBench</th>
78
+ <th align="center">mmmu‑pro</th>
79
  </tr>
80
  <tr>
81
  <th></th>
82
  <th></th>
83
+ <th align="center">%</th>
84
+ <th align="center">%</th>
85
+ <th align="center">%</th>
86
+ <th align="center">%</th>
87
+ <th align="center">%</th>
88
  <th align="center">pass@1</th>
89
  <th align="center">pass@1</th>
90
  <th align="center">pass@1</th>
91
+ <th align="center">%</th>
92
+ <th align="center">%</th>
93
+ <th align="center">%</th>
94
+ <th align="center">%</th>
95
+ <th align="center">%</th>
96
  </tr>
97
  </thead>
98
  <tbody>
99
  <tr>
100
+ <td>R1V2‑38B</td>
101
+ <td align="center">βœ…</td>
102
+ <td align="center">78.9</td>
103
+ <td align="center">63.6</td>
104
+ <td align="center">73.2</td>
105
+ <td align="center">82.9</td>
106
+ <td align="center">66.3</td>
107
+ <td align="center">94.0</td>
108
+ <td align="center">72.0</td>
109
+ <td align="center">61.6</td>
110
+ <td align="center">73.6</td>
111
+ <td align="center">74.0</td>
112
  <td align="center">49.0</td>
113
+ <td align="center">62.6</td>
114
+ <td align="center">52.0</td>
115
  </tr>
116
  <tr>
117
+ <td>R1V1‑38B</td>
118
+ <td align="center">βœ…</td>
119
+ <td align="center">72.0</td>
120
+ <td align="center">57.2</td>
121
+ <td align="center">54.6</td>
122
+ <td align="center">72.5</td>
123
+ <td align="center">53.5</td>
124
+ <td align="center">–</td>
125
+ <td align="center">–</td>
126
+ <td align="center">–</td>
127
+ <td align="center">68.0</td>
128
+ <td align="center">67.0</td>
129
+ <td align="center">–</td>
130
+ <td align="center">40.4</td>
131
+ <td align="center">–</td>
132
  </tr>
133
  <tr>
134
+ <td>Deepseek‑R1‑671B</td>
135
  <td align="center">❌</td>
136
+ <td align="center">74.3</td>
137
+ <td align="center">65.9</td>
138
+ <td align="center">71.6</td>
139
+ <td align="center">83.3</td>
140
+ <td align="center">60.3</td>
141
  <td align="center">97.3</td>
142
  <td align="center">79.8</td>
143
  <td align="center">71.5</td>
144
+ <td align="center">–</td>
145
+ <td align="center">–</td>
146
+ <td align="center">–</td>
147
+ <td align="center">–</td>
148
+ <td align="center">–</td>
149
  </tr>
150
  <tr>
151
+ <td>GPT‑o1</td>
152
+ <td align="center">❌</td>
153
+ <td align="center">79.8</td>
154
+ <td align="center">63.4</td>
155
+ <td align="center">72.2</td>
156
+ <td align="center">–</td>
157
+ <td align="center">–</td>
158
+ <td align="center">–</td>
159
+ <td align="center">–</td>
160
+ <td align="center">–</td>
161
+ <td align="center">–</td>
162
+ <td align="center">–</td>
163
+ <td align="center">–</td>
164
+ <td align="center">–</td>
165
+ <td align="center">–</td>
166
  </tr>
167
  <tr>
168
+ <td>GPT‑o4‑mini</td>
169
  <td align="center">βœ…</td>
170
+ <td align="center">93.4</td>
171
+ <td align="center">74.6</td>
172
+ <td align="center">78.1</td>
173
+ <td align="center">–</td>
174
+ <td align="center">–</td>
175
  <td align="center">74.6</td>
176
  <td align="center">9.3</td>
177
  <td align="center">49.9</td>
178
+ <td align="center">81.6</td>
179
+ <td align="center">84.3</td>
180
+ <td align="center">58.0</td>
181
+ <td align="center">–</td>
182
+ <td align="center">–</td>
183
  </tr>
184
  <tr>
185
+ <td>ClaudeΒ 3.5Β Sonnet</td>
186
  <td align="center">βœ…</td>
187
+ <td align="center">–</td>
188
+ <td align="center">–</td>
189
+ <td align="center">–</td>
190
+ <td align="center">–</td>
191
+ <td align="center">–</td>
192
+ <td align="center">78.3</td>
193
+ <td align="center">16.0</td>
194
+ <td align="center">65.0</td>
195
+ <td align="center">66.4</td>
196
+ <td align="center">65.3</td>
197
+ <td align="center">–</td>
198
+ <td align="center">–</td>
199
+ <td align="center">–</td>
200
+ </tr>
201
+ <tr>
202
+ <td>KimiΒ k1.5Β longcot</td>
203
+ <td align="center">βœ…</td>
204
+ <td align="center">–</td>
205
+ <td align="center">–</td>
206
+ <td align="center">–</td>
207
+ <td align="center">–</td>
208
+ <td align="center">–</td>
209
  <td align="center">96.2</td>
210
  <td align="center">77.5</td>
211
+ <td align="center">–</td>
 
212
  <td align="center">70.0</td>
213
+ <td align="center">74.9</td>
214
+ <td align="center">–</td>
215
+ <td align="center">–</td>
216
+ <td align="center">–</td>
217
  </tr>
218
  <tr>
219
+ <td>Qwen2.5‑VL‑72B‑Instruct</td>
220
  <td align="center">βœ…</td>
221
+ <td align="center">–</td>
222
+ <td align="center">–</td>
223
+ <td align="center">–</td>
224
+ <td align="center">–</td>
225
+ <td align="center">–</td>
226
+ <td align="center">–</td>
227
+ <td align="center">–</td>
228
+ <td align="center">–</td>
229
  <td align="center">70.2</td>
230
+ <td align="center">74.8</td>
231
+ <td align="center">–</td>
232
+ <td align="center">–</td>
233
+ <td align="center">–</td>
234
  </tr>
235
  <tr>
236
+ <td>InternVL2.5‑78B</td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  <td align="center">βœ…</td>
238
+ <td align="center">–</td>
239
+ <td align="center">–</td>
240
+ <td align="center">–</td>
241
+ <td align="center">–</td>
242
+ <td align="center">–</td>
243
+ <td align="center">–</td>
244
+ <td align="center">–</td>
245
+ <td align="center">–</td>
246
  <td align="center">70.1</td>
247
+ <td align="center">72.3</td>
248
+ <td align="center">–</td>
249
+ <td align="center">33.2</td>
250
+ <td align="center">–</td>
 
 
 
 
 
251
  </tr>
252
  </tbody>
253
  </table>
254
 
255
 
256
+
257
  ---
258
 
259