krishnateja95 commited on
Commit
11b59d3
·
verified ·
1 Parent(s): 9614a0b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +102 -1
README.md CHANGED
@@ -213,5 +213,106 @@ license: apache-2.0
213
  </table>
214
 
215
 
216
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
 
213
  </table>
214
 
215
 
216
+ ### Accuracy
217
+ <table>
218
+ <thead>
219
+ <tr>
220
+ <th>Category</th>
221
+ <th>Metric</th>
222
+ <th>ibm-granite/granite-4.0-h-small</th>
223
+ <th>RedHatAI/granite-4.0-h-small-FP8-dynamic</th>
224
+ <th>Recovery (%)</th>
225
+ </tr>
226
+ </thead>
227
+ <tbody>
228
+ <!-- OpenLLM Leaderboard V1 -->
229
+ <tr>
230
+ <td rowspan="7"><b>OpenLLM V1</b></td>
231
+ <td>ARC-Challenge (Acc-Norm, 25-shot)</td>
232
+ <td>72.27</td>
233
+ <td>72.10</td>
234
+ <td>99.76</td>
235
+ </tr>
236
+ <tr>
237
+ <td>GSM8K (Strict-Match, 5-shot)</td>
238
+ <td>85.22</td>
239
+ <td>84.84</td>
240
+ <td>99.56</td>
241
+ </tr>
242
+ <tr>
243
+ <td>HellaSwag (Acc-Norm, 10-shot)</td>
244
+ <td>86.08</td>
245
+ <td>85.88</td>
246
+ <td>99.77</td>
247
+ </tr>
248
+ <tr>
249
+ <td>MMLU (Acc, 5-shot)</td>
250
+ <td>77.15</td>
251
+ <td>77.18</td>
252
+ <td>100.03</td>
253
+ </tr>
254
+ <tr>
255
+ <td>TruthfulQA (MC2, 0-shot)</td>
256
+ <td>57.64</td>
257
+ <td>57.63</td>
258
+ <td>100.00</td>
259
+ </tr>
260
+ <tr>
261
+ <td>Winogrande (Acc, 5-shot)</td>
262
+ <td>81.37</td>
263
+ <td>81.45</td>
264
+ <td>100.10</td>
265
+ </tr>
266
+ <tr>
267
+ <td><b>Average Score</b></td>
268
+ <td><b>76.62</b></td>
269
+ <td><b>76.51</b></td>
270
+ <td><b>99.86</b></td>
271
+ </tr>
272
+ <!-- OpenLLM Leaderboard V2 -->
273
+ <tr>
274
+ <td rowspan="7"><b>OpenLLM V2</b></td>
275
+ <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
276
+ <td>87.53</td>
277
+ <td>87.41</td>
278
+ <td>99.86</td>
279
+ </tr>
280
+ <tr>
281
+ <td>BBH (Acc-Norm, 3-shot)</td>
282
+ <td>61.52</td>
283
+ <td>61.19</td>
284
+ <td>99.46</td>
285
+ </tr>
286
+ <tr>
287
+ <td>Math-Hard (Exact-Match, 4-shot)</td>
288
+ <td>46.22</td>
289
+ <td>41.77</td>
290
+ <td>90.36</td>
291
+ </tr>
292
+ <tr>
293
+ <td>GPQA (Acc-Norm, 0-shot)</td>
294
+ <td>35.23</td>
295
+ <td>34.23</td>
296
+ <td>97.14</td>
297
+ </tr>
298
+ <tr>
299
+ <td>MUSR (Acc-Norm, 0-shot)</td>
300
+ <td>46.69</td>
301
+ <td>45.77</td>
302
+ <td>98.02</td>
303
+ </tr>
304
+ <tr>
305
+ <td>MMLU-Pro (Acc, 5-shot)</td>
306
+ <td>47.99</td>
307
+ <td>47.58</td>
308
+ <td>99.15</td>
309
+ </tr>
310
+ <tr>
311
+ <td><b>Average Score</b></td>
312
+ <td><b>54.20</b></td>
313
+ <td><b>52.99</b></td>
314
+ <td><b>97.77</b></td>
315
+ </tr>
316
+ </tbody>
317
+ </table>
318