Update README.md
Browse files
README.md
CHANGED
|
@@ -114,24 +114,24 @@ We report in the following table our internal pipeline benchmarks:
|
|
| 114 |
<tbody>
|
| 115 |
<tr>
|
| 116 |
<td>MMLU</td>
|
| 117 |
-
<td>67.
|
| 118 |
-
<td>65.
|
| 119 |
-
<td>67.
|
| 120 |
-
<td>66.
|
| 121 |
</tr>
|
| 122 |
<tr>
|
| 123 |
<td>MMLU-PRO</td>
|
| 124 |
-
<td>40.
|
| 125 |
-
<td>39.
|
| 126 |
-
<td>40.
|
| 127 |
-
<td>39.
|
| 128 |
</tr>
|
| 129 |
<tr>
|
| 130 |
<td>IFEval</td>
|
| 131 |
-
<td>75.
|
| 132 |
-
<td>72.
|
| 133 |
-
<td>
|
| 134 |
-
<td>74.
|
| 135 |
</tr>
|
| 136 |
</tbody>
|
| 137 |
</table>
|
|
|
|
| 114 |
<tbody>
|
| 115 |
<tr>
|
| 116 |
<td>MMLU</td>
|
| 117 |
+
<td>67.7</td>
|
| 118 |
+
<td>65.6</td>
|
| 119 |
+
<td>67.6</td>
|
| 120 |
+
<td>66.4</td>
|
| 121 |
</tr>
|
| 122 |
<tr>
|
| 123 |
<td>MMLU-PRO</td>
|
| 124 |
+
<td>40.9</td>
|
| 125 |
+
<td>39.1</td>
|
| 126 |
+
<td>40.9</td>
|
| 127 |
+
<td>39.9</td>
|
| 128 |
</tr>
|
| 129 |
<tr>
|
| 130 |
<td>IFEval</td>
|
| 131 |
+
<td>75.1</td>
|
| 132 |
+
<td>72.2</td>
|
| 133 |
+
<td>77.0</td>
|
| 134 |
+
<td>74.8</td>
|
| 135 |
</tr>
|
| 136 |
</tbody>
|
| 137 |
</table>
|