Remove CrowsPairs french metrics
Browse files
README.md
CHANGED
|
@@ -160,9 +160,6 @@ We advise adding the `rope_scaling` configuration only when processing long cont
|
|
| 160 |
<th style="text-align:center; padding:12px 16px; background:linear-gradient(90deg,#f6f8fb,#eef3f9); color:#0b1220; font-weight:700; border-bottom:1px solid rgba(15,23,42,0.06);">
|
| 161 |
CrowsPairs (English)
|
| 162 |
</th>
|
| 163 |
-
<th style="text-align:center; padding:12px 16px; background:linear-gradient(90deg,#f6f8fb,#eef3f9); color:#0b1220; font-weight:700; border-bottom:1px solid rgba(15,23,42,0.06);">
|
| 164 |
-
CrowsPairs (French)
|
| 165 |
-
</th>
|
| 166 |
<th style="text-align:center; padding:12px 16px; background:linear-gradient(90deg,#f6f8fb,#eef3f9); color:#0b1220; font-weight:700; border-bottom:1px solid rgba(15,23,42,0.06);">
|
| 167 |
TruthfulQA (MC1 acc)
|
| 168 |
</th>
|
|
@@ -180,7 +177,6 @@ We advise adding the `rope_scaling` configuration only when processing long cont
|
|
| 180 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">77.60</td>
|
| 181 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">49.64</td>
|
| 182 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">65.18</td>
|
| 183 |
-
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">54.32</td>
|
| 184 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">37.82</td>
|
| 185 |
</tr>
|
| 186 |
<tr style="background:#ffffff;">
|
|
@@ -194,7 +190,6 @@ We advise adding the `rope_scaling` configuration only when processing long cont
|
|
| 194 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">63.40</td>
|
| 195 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">31.66</td>
|
| 196 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">67.56</td>
|
| 197 |
-
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">66.07</td>
|
| 198 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">42.84</td>
|
| 199 |
</tr>
|
| 200 |
<tr style="background:#ffffff;">
|
|
@@ -208,7 +203,6 @@ We advise adding the `rope_scaling` configuration only when processing long cont
|
|
| 208 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">52.40</td>
|
| 209 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">59.71</td>
|
| 210 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">59.75</td>
|
| 211 |
-
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">50.15</td>
|
| 212 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">39.05</td>
|
| 213 |
</tr>
|
| 214 |
<tr style="background:#fbfdff;">
|
|
@@ -222,12 +216,11 @@ We advise adding the `rope_scaling` configuration only when processing long cont
|
|
| 222 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">51.80</td>
|
| 223 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">60.79</td>
|
| 224 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">66.79</td>
|
| 225 |
-
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">58.68</td>
|
| 226 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">40.51</td>
|
| 227 |
</tr>
|
| 228 |
</tbody>
|
| 229 |
</table>
|
| 230 |
-
|
| 231 |
*Note: All models listed in the benchmark table were evaluated using their respective system prompt, defined in their Hugging Face model.*
|
| 232 |
|
| 233 |
|
|
|
|
| 160 |
<th style="text-align:center; padding:12px 16px; background:linear-gradient(90deg,#f6f8fb,#eef3f9); color:#0b1220; font-weight:700; border-bottom:1px solid rgba(15,23,42,0.06);">
|
| 161 |
CrowsPairs (English)
|
| 162 |
</th>
|
|
|
|
|
|
|
|
|
|
| 163 |
<th style="text-align:center; padding:12px 16px; background:linear-gradient(90deg,#f6f8fb,#eef3f9); color:#0b1220; font-weight:700; border-bottom:1px solid rgba(15,23,42,0.06);">
|
| 164 |
TruthfulQA (MC1 acc)
|
| 165 |
</th>
|
|
|
|
| 177 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">77.60</td>
|
| 178 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">49.64</td>
|
| 179 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">65.18</td>
|
|
|
|
| 180 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">37.82</td>
|
| 181 |
</tr>
|
| 182 |
<tr style="background:#ffffff;">
|
|
|
|
| 190 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">63.40</td>
|
| 191 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">31.66</td>
|
| 192 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">67.56</td>
|
|
|
|
| 193 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">42.84</td>
|
| 194 |
</tr>
|
| 195 |
<tr style="background:#ffffff;">
|
|
|
|
| 203 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">52.40</td>
|
| 204 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">59.71</td>
|
| 205 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">59.75</td>
|
|
|
|
| 206 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">39.05</td>
|
| 207 |
</tr>
|
| 208 |
<tr style="background:#fbfdff;">
|
|
|
|
| 216 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">51.80</td>
|
| 217 |
<td style="padding:12px 16px; text-align:center; font-weight:700; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">60.79</td>
|
| 218 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">66.79</td>
|
|
|
|
| 219 |
<td style="padding:12px 16px; text-align:center; font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;">40.51</td>
|
| 220 |
</tr>
|
| 221 |
</tbody>
|
| 222 |
</table>
|
| 223 |
+
|
| 224 |
*Note: All models listed in the benchmark table were evaluated using their respective system prompt, defined in their Hugging Face model.*
|
| 225 |
|
| 226 |
|