krishnateja95 commited on
Commit
5fdd752
·
verified ·
1 Parent(s): eec5871

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +107 -3
README.md CHANGED
@@ -1,3 +1,107 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+
6
+ ### Accuracy
7
+ <table>
8
+ <thead>
9
+ <tr>
10
+ <th>Category</th>
11
+ <th>Metric</th>
12
+ <th>ibm-granite/granite-4.0-h-small</th>
13
+ <th>ibm-granite/granite-4.0-h-small-FP8</th>
14
+ <th>Recovery (%)</th>
15
+ </tr>
16
+ </thead>
17
+ <tbody>
18
+ <!-- OpenLLM Leaderboard V1 -->
19
+ <tr>
20
+ <td rowspan="7"><b>OpenLLM V1</b></td>
21
+ <td>ARC-Challenge (Acc-Norm, 25-shot)</td>
22
+ <td>72.27</td>
23
+ <td>72.10</td>
24
+ <td>99.76</td>
25
+ </tr>
26
+ <tr>
27
+ <td>GSM8K (Strict-Match, 5-shot)</td>
28
+ <td>85.22</td>
29
+ <td>85.29</td>
30
+ <td>100.09</td>
31
+ </tr>
32
+ <tr>
33
+ <td>HellaSwag (Acc-Norm, 10-shot)</td>
34
+ <td>86.08</td>
35
+ <td>85.88</td>
36
+ <td>99.77</td>
37
+ </tr>
38
+ <tr>
39
+ <td>MMLU (Acc, 5-shot)</td>
40
+ <td>77.15</td>
41
+ <td>77.18</td>
42
+ <td>100.03</td>
43
+ </tr>
44
+ <tr>
45
+ <td>TruthfulQA (MC2, 0-shot)</td>
46
+ <td>57.64</td>
47
+ <td>57.63</td>
48
+ <td>99.99</td>
49
+ </tr>
50
+ <tr>
51
+ <td>Winogrande (Acc, 5-shot)</td>
52
+ <td>81.37</td>
53
+ <td>81.45</td>
54
+ <td>100.10</td>
55
+ </tr>
56
+ <tr>
57
+ <td><b>Average Score</b></td>
58
+ <td><b>76.62</b></td>
59
+ <td><b>76.59</b></td>
60
+ <td><b>99.96</b></td>
61
+ </tr>
62
+ <!-- OpenLLM Leaderboard V2 -->
63
+ <tr>
64
+ <td rowspan="7"><b>OpenLLM V2</b></td>
65
+ <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
66
+ <td>87.53</td>
67
+ <td>87.17</td>
68
+ <td>99.59</td>
69
+ </tr>
70
+ <tr>
71
+ <td>BBH (Acc-Norm, 3-shot)</td>
72
+ <td>61.52</td>
73
+ <td>61.31</td>
74
+ <td>99.66</td>
75
+ </tr>
76
+ <tr>
77
+ <td>Math-Hard (Exact-Match, 4-shot)</td>
78
+ <td>46.22</td>
79
+ <td>43.73</td>
80
+ <td>94.61</td>
81
+ </tr>
82
+ <tr>
83
+ <td>GPQA (Acc-Norm, 0-shot)</td>
84
+ <td>35.23</td>
85
+ <td>34.98</td>
86
+ <td>99.29</td>
87
+ </tr>
88
+ <tr>
89
+ <td>MUSR (Acc-Norm, 0-shot)</td>
90
+ <td>46.69</td>
91
+ <td>46.56</td>
92
+ <td>99.72</td>
93
+ </tr>
94
+ <tr>
95
+ <td>MMLU-Pro (Acc, 5-shot)</td>
96
+ <td>47.99</td>
97
+ <td>47.63</td>
98
+ <td>99.26</td>
99
+ </tr>
100
+ <tr>
101
+ <td><b>Average Score</b></td>
102
+ <td><b>54.20</b></td>
103
+ <td><b>53.56</b></td>
104
+ <td><b>98.82</b></td>
105
+ </tr>
106
+ </tbody>
107
+ </table>