Update README.md
Browse files
README.md
CHANGED
|
@@ -269,6 +269,83 @@ library_name: transformers
|
|
| 269 |
</table>
|
| 270 |
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
## Citation
|
| 273 |
|
| 274 |
```bibtex
|
|
|
|
| 269 |
</table>
|
| 270 |
|
| 271 |
|
| 272 |
+
### Optimizers
|
| 273 |
+
|
| 274 |
+
<table border="1" cellpadding="10" cellspacing="0" style="margin: 0 auto; border-collapse: collapse; text-align: center;">
|
| 275 |
+
<tr>
|
| 276 |
+
<th colspan="2">Batch Size</th>
|
| 277 |
+
<th>1</th>
|
| 278 |
+
<th>2</th>
|
| 279 |
+
<th>4</th>
|
| 280 |
+
<th>8</th>
|
| 281 |
+
<th>16</th>
|
| 282 |
+
<th>32</th>
|
| 283 |
+
<th>64</th>
|
| 284 |
+
<th>128</th>
|
| 285 |
+
<th>256</th>
|
| 286 |
+
<th>512</th>
|
| 287 |
+
<th>1024</th>
|
| 288 |
+
</tr>
|
| 289 |
+
<tr>
|
| 290 |
+
<td rowspan="4">Peak Mem (MB)</td>
|
| 291 |
+
<td>adamw_torch</td>
|
| 292 |
+
<td>601</td>
|
| 293 |
+
<td>605</td>
|
| 294 |
+
<td>633</td>
|
| 295 |
+
<td>707</td>
|
| 296 |
+
<td>857</td>
|
| 297 |
+
<td>1255</td>
|
| 298 |
+
<td>1637</td>
|
| 299 |
+
<td>2201</td>
|
| 300 |
+
<td>3787</td>
|
| 301 |
+
<td>6945</td>
|
| 302 |
+
<td>13293</td>
|
| 303 |
+
</tr>
|
| 304 |
+
<tr>
|
| 305 |
+
<td>adamw_bnb_8bit</td>
|
| 306 |
+
<td>589</td>
|
| 307 |
+
<td>595</td>
|
| 308 |
+
<td>625</td>
|
| 309 |
+
<td>699</td>
|
| 310 |
+
<td>849</td>
|
| 311 |
+
<td>1241</td>
|
| 312 |
+
<td>1625</td>
|
| 313 |
+
<td>2187</td>
|
| 314 |
+
<td>3773</td>
|
| 315 |
+
<td>6935</td>
|
| 316 |
+
<td>13283</td>
|
| 317 |
+
</tr>
|
| 318 |
+
<tr>
|
| 319 |
+
<td>adamw_hf</td>
|
| 320 |
+
<td>597</td>
|
| 321 |
+
<td>603</td>
|
| 322 |
+
<td>633</td>
|
| 323 |
+
<td>707</td>
|
| 324 |
+
<td>857</td>
|
| 325 |
+
<td>1251</td>
|
| 326 |
+
<td>1635</td>
|
| 327 |
+
<td>2197</td>
|
| 328 |
+
<td>3783</td>
|
| 329 |
+
<td>6941</td>
|
| 330 |
+
<td>13293</td>
|
| 331 |
+
</tr>
|
| 332 |
+
<tr>
|
| 333 |
+
<td>lion_32bit</td>
|
| 334 |
+
<td>591</td>
|
| 335 |
+
<td>597</td>
|
| 336 |
+
<td>627</td>
|
| 337 |
+
<td>701</td>
|
| 338 |
+
<td>851</td>
|
| 339 |
+
<td>1243</td>
|
| 340 |
+
<td>1627</td>
|
| 341 |
+
<td>2191</td>
|
| 342 |
+
<td>3777</td>
|
| 343 |
+
<td>6937</td>
|
| 344 |
+
<td>13285</td>
|
| 345 |
+
</tr>
|
| 346 |
+
</table>
|
| 347 |
+
|
| 348 |
+
|
| 349 |
## Citation
|
| 350 |
|
| 351 |
```bibtex
|