Training in progress, step 2232
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +220 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 541712
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:055632dea116e80c74f84447cc738eb310ec26ca797e8d97a341e79c76372814
|
| 3 |
size 541712
|
trainer_log.jsonl
CHANGED
|
@@ -231,3 +231,223 @@
|
|
| 231 |
{"current_steps": 1150, "total_steps": 22320, "loss": 0.3765, "lr": 2.5739247311827957e-05, "epoch": 1.0304659498207884, "percentage": 5.15, "elapsed_time": "0:02:48", "remaining_time": "0:51:42", "throughput": 2602.88, "total_tokens": 438616}
|
| 232 |
{"current_steps": 1155, "total_steps": 22320, "loss": 0.3124, "lr": 2.585125448028674e-05, "epoch": 1.0349462365591398, "percentage": 5.17, "elapsed_time": "0:02:49", "remaining_time": "0:51:40", "throughput": 2603.27, "total_tokens": 440536}
|
| 233 |
{"current_steps": 1160, "total_steps": 22320, "loss": 0.2793, "lr": 2.596326164874552e-05, "epoch": 1.039426523297491, "percentage": 5.2, "elapsed_time": "0:02:49", "remaining_time": "0:51:38", "throughput": 2603.95, "total_tokens": 442360}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
{"current_steps": 1150, "total_steps": 22320, "loss": 0.3765, "lr": 2.5739247311827957e-05, "epoch": 1.0304659498207884, "percentage": 5.15, "elapsed_time": "0:02:48", "remaining_time": "0:51:42", "throughput": 2602.88, "total_tokens": 438616}
|
| 232 |
{"current_steps": 1155, "total_steps": 22320, "loss": 0.3124, "lr": 2.585125448028674e-05, "epoch": 1.0349462365591398, "percentage": 5.17, "elapsed_time": "0:02:49", "remaining_time": "0:51:40", "throughput": 2603.27, "total_tokens": 440536}
|
| 233 |
{"current_steps": 1160, "total_steps": 22320, "loss": 0.2793, "lr": 2.596326164874552e-05, "epoch": 1.039426523297491, "percentage": 5.2, "elapsed_time": "0:02:49", "remaining_time": "0:51:38", "throughput": 2603.95, "total_tokens": 442360}
|
| 234 |
+
{"current_steps": 1165, "total_steps": 22320, "loss": 0.5798, "lr": 2.6075268817204303e-05, "epoch": 1.0439068100358422, "percentage": 5.22, "elapsed_time": "0:02:50", "remaining_time": "0:51:37", "throughput": 2604.52, "total_tokens": 444344}
|
| 235 |
+
{"current_steps": 1170, "total_steps": 22320, "loss": 0.4841, "lr": 2.6187275985663083e-05, "epoch": 1.0483870967741935, "percentage": 5.24, "elapsed_time": "0:02:51", "remaining_time": "0:51:37", "throughput": 2604.69, "total_tokens": 446328}
|
| 236 |
+
{"current_steps": 1175, "total_steps": 22320, "loss": 0.625, "lr": 2.6299283154121867e-05, "epoch": 1.0528673835125448, "percentage": 5.26, "elapsed_time": "0:02:52", "remaining_time": "0:51:37", "throughput": 2605.5, "total_tokens": 448504}
|
| 237 |
+
{"current_steps": 1180, "total_steps": 22320, "loss": 0.4226, "lr": 2.6411290322580645e-05, "epoch": 1.0573476702508962, "percentage": 5.29, "elapsed_time": "0:02:52", "remaining_time": "0:51:36", "throughput": 2605.61, "total_tokens": 450392}
|
| 238 |
+
{"current_steps": 1185, "total_steps": 22320, "loss": 0.5521, "lr": 2.652329749103943e-05, "epoch": 1.0618279569892473, "percentage": 5.31, "elapsed_time": "0:02:53", "remaining_time": "0:51:35", "throughput": 2606.17, "total_tokens": 452376}
|
| 239 |
+
{"current_steps": 1190, "total_steps": 22320, "loss": 0.656, "lr": 2.6635304659498213e-05, "epoch": 1.0663082437275986, "percentage": 5.33, "elapsed_time": "0:02:54", "remaining_time": "0:51:33", "throughput": 2606.35, "total_tokens": 454072}
|
| 240 |
+
{"current_steps": 1195, "total_steps": 22320, "loss": 0.6033, "lr": 2.674731182795699e-05, "epoch": 1.07078853046595, "percentage": 5.35, "elapsed_time": "0:02:54", "remaining_time": "0:51:31", "throughput": 2607.25, "total_tokens": 455928}
|
| 241 |
+
{"current_steps": 1200, "total_steps": 22320, "loss": 0.948, "lr": 2.685931899641577e-05, "epoch": 1.075268817204301, "percentage": 5.38, "elapsed_time": "0:02:55", "remaining_time": "0:51:29", "throughput": 2608.01, "total_tokens": 457848}
|
| 242 |
+
{"current_steps": 1205, "total_steps": 22320, "loss": 0.7082, "lr": 2.6971326164874555e-05, "epoch": 1.0797491039426523, "percentage": 5.4, "elapsed_time": "0:02:56", "remaining_time": "0:51:28", "throughput": 2608.5, "total_tokens": 459800}
|
| 243 |
+
{"current_steps": 1210, "total_steps": 22320, "loss": 1.005, "lr": 2.7083333333333332e-05, "epoch": 1.0842293906810037, "percentage": 5.42, "elapsed_time": "0:02:56", "remaining_time": "0:51:26", "throughput": 2609.14, "total_tokens": 461592}
|
| 244 |
+
{"current_steps": 1215, "total_steps": 22320, "loss": 0.5246, "lr": 2.7195340501792116e-05, "epoch": 1.0887096774193548, "percentage": 5.44, "elapsed_time": "0:02:57", "remaining_time": "0:51:24", "throughput": 2609.61, "total_tokens": 463448}
|
| 245 |
+
{"current_steps": 1220, "total_steps": 22320, "loss": 0.5735, "lr": 2.73073476702509e-05, "epoch": 1.093189964157706, "percentage": 5.47, "elapsed_time": "0:02:58", "remaining_time": "0:51:23", "throughput": 2609.58, "total_tokens": 465240}
|
| 246 |
+
{"current_steps": 1225, "total_steps": 22320, "loss": 0.873, "lr": 2.7419354838709678e-05, "epoch": 1.0976702508960574, "percentage": 5.49, "elapsed_time": "0:02:58", "remaining_time": "0:51:21", "throughput": 2610.5, "total_tokens": 467096}
|
| 247 |
+
{"current_steps": 1230, "total_steps": 22320, "loss": 0.4226, "lr": 2.7531362007168458e-05, "epoch": 1.1021505376344085, "percentage": 5.51, "elapsed_time": "0:02:59", "remaining_time": "0:51:19", "throughput": 2611.25, "total_tokens": 468920}
|
| 248 |
+
{"current_steps": 1235, "total_steps": 22320, "loss": 0.2786, "lr": 2.7643369175627242e-05, "epoch": 1.1066308243727598, "percentage": 5.53, "elapsed_time": "0:03:00", "remaining_time": "0:51:19", "throughput": 2611.27, "total_tokens": 471064}
|
| 249 |
+
{"current_steps": 1240, "total_steps": 22320, "loss": 0.5786, "lr": 2.775537634408602e-05, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:03:01", "remaining_time": "0:51:18", "throughput": 2612.11, "total_tokens": 473048}
|
| 250 |
+
{"current_steps": 1245, "total_steps": 22320, "loss": 0.4425, "lr": 2.7867383512544804e-05, "epoch": 1.1155913978494623, "percentage": 5.58, "elapsed_time": "0:03:01", "remaining_time": "0:51:16", "throughput": 2612.59, "total_tokens": 474808}
|
| 251 |
+
{"current_steps": 1250, "total_steps": 22320, "loss": 0.6601, "lr": 2.7979390681003588e-05, "epoch": 1.1200716845878136, "percentage": 5.6, "elapsed_time": "0:03:02", "remaining_time": "0:51:14", "throughput": 2613.15, "total_tokens": 476696}
|
| 252 |
+
{"current_steps": 1255, "total_steps": 22320, "loss": 0.6954, "lr": 2.8091397849462365e-05, "epoch": 1.124551971326165, "percentage": 5.62, "elapsed_time": "0:03:03", "remaining_time": "0:51:12", "throughput": 2614.08, "total_tokens": 478552}
|
| 253 |
+
{"current_steps": 1260, "total_steps": 22320, "loss": 0.3118, "lr": 2.8203405017921146e-05, "epoch": 1.129032258064516, "percentage": 5.65, "elapsed_time": "0:03:03", "remaining_time": "0:51:11", "throughput": 2614.83, "total_tokens": 480568}
|
| 254 |
+
{"current_steps": 1265, "total_steps": 22320, "loss": 0.8858, "lr": 2.831541218637993e-05, "epoch": 1.1335125448028673, "percentage": 5.67, "elapsed_time": "0:03:04", "remaining_time": "0:51:10", "throughput": 2615.03, "total_tokens": 482392}
|
| 255 |
+
{"current_steps": 1270, "total_steps": 22320, "loss": 0.3803, "lr": 2.8427419354838714e-05, "epoch": 1.1379928315412187, "percentage": 5.69, "elapsed_time": "0:03:05", "remaining_time": "0:51:08", "throughput": 2615.56, "total_tokens": 484280}
|
| 256 |
+
{"current_steps": 1275, "total_steps": 22320, "loss": 0.7615, "lr": 2.853942652329749e-05, "epoch": 1.14247311827957, "percentage": 5.71, "elapsed_time": "0:03:05", "remaining_time": "0:51:06", "throughput": 2615.92, "total_tokens": 486040}
|
| 257 |
+
{"current_steps": 1280, "total_steps": 22320, "loss": 0.4346, "lr": 2.8651433691756275e-05, "epoch": 1.146953405017921, "percentage": 5.73, "elapsed_time": "0:03:06", "remaining_time": "0:51:06", "throughput": 2616.06, "total_tokens": 488024}
|
| 258 |
+
{"current_steps": 1285, "total_steps": 22320, "loss": 0.4882, "lr": 2.8763440860215056e-05, "epoch": 1.1514336917562724, "percentage": 5.76, "elapsed_time": "0:03:07", "remaining_time": "0:51:04", "throughput": 2616.44, "total_tokens": 489784}
|
| 259 |
+
{"current_steps": 1290, "total_steps": 22320, "loss": 0.3166, "lr": 2.8875448028673837e-05, "epoch": 1.1559139784946237, "percentage": 5.78, "elapsed_time": "0:03:07", "remaining_time": "0:51:02", "throughput": 2617.19, "total_tokens": 491704}
|
| 260 |
+
{"current_steps": 1295, "total_steps": 22320, "loss": 0.2114, "lr": 2.8987455197132617e-05, "epoch": 1.1603942652329748, "percentage": 5.8, "elapsed_time": "0:03:08", "remaining_time": "0:51:01", "throughput": 2617.18, "total_tokens": 493560}
|
| 261 |
+
{"current_steps": 1300, "total_steps": 22320, "loss": 0.6222, "lr": 2.90994623655914e-05, "epoch": 1.1648745519713262, "percentage": 5.82, "elapsed_time": "0:03:09", "remaining_time": "0:51:00", "throughput": 2617.6, "total_tokens": 495512}
|
| 262 |
+
{"current_steps": 1305, "total_steps": 22320, "loss": 0.8893, "lr": 2.921146953405018e-05, "epoch": 1.1693548387096775, "percentage": 5.85, "elapsed_time": "0:03:10", "remaining_time": "0:50:59", "throughput": 2617.89, "total_tokens": 497432}
|
| 263 |
+
{"current_steps": 1310, "total_steps": 22320, "loss": 0.2537, "lr": 2.9323476702508963e-05, "epoch": 1.1738351254480286, "percentage": 5.87, "elapsed_time": "0:03:10", "remaining_time": "0:50:58", "throughput": 2618.92, "total_tokens": 499480}
|
| 264 |
+
{"current_steps": 1315, "total_steps": 22320, "loss": 0.6866, "lr": 2.9435483870967743e-05, "epoch": 1.17831541218638, "percentage": 5.89, "elapsed_time": "0:03:11", "remaining_time": "0:50:57", "throughput": 2619.44, "total_tokens": 501368}
|
| 265 |
+
{"current_steps": 1320, "total_steps": 22320, "loss": 0.1585, "lr": 2.9547491039426524e-05, "epoch": 1.1827956989247312, "percentage": 5.91, "elapsed_time": "0:03:12", "remaining_time": "0:50:55", "throughput": 2619.65, "total_tokens": 503192}
|
| 266 |
+
{"current_steps": 1325, "total_steps": 22320, "loss": 0.3224, "lr": 2.9659498207885305e-05, "epoch": 1.1872759856630823, "percentage": 5.94, "elapsed_time": "0:03:12", "remaining_time": "0:50:53", "throughput": 2619.9, "total_tokens": 504920}
|
| 267 |
+
{"current_steps": 1330, "total_steps": 22320, "loss": 0.2023, "lr": 2.977150537634409e-05, "epoch": 1.1917562724014337, "percentage": 5.96, "elapsed_time": "0:03:13", "remaining_time": "0:50:54", "throughput": 2620.4, "total_tokens": 507096}
|
| 268 |
+
{"current_steps": 1335, "total_steps": 22320, "loss": 0.5803, "lr": 2.9883512544802866e-05, "epoch": 1.196236559139785, "percentage": 5.98, "elapsed_time": "0:03:14", "remaining_time": "0:50:52", "throughput": 2621.33, "total_tokens": 508984}
|
| 269 |
+
{"current_steps": 1340, "total_steps": 22320, "loss": 0.443, "lr": 2.999551971326165e-05, "epoch": 1.2007168458781363, "percentage": 6.0, "elapsed_time": "0:03:14", "remaining_time": "0:50:50", "throughput": 2621.43, "total_tokens": 510776}
|
| 270 |
+
{"current_steps": 1345, "total_steps": 22320, "loss": 0.406, "lr": 3.010752688172043e-05, "epoch": 1.2051971326164874, "percentage": 6.03, "elapsed_time": "0:03:15", "remaining_time": "0:50:49", "throughput": 2622.08, "total_tokens": 512792}
|
| 271 |
+
{"current_steps": 1350, "total_steps": 22320, "loss": 0.3191, "lr": 3.0219534050179215e-05, "epoch": 1.2096774193548387, "percentage": 6.05, "elapsed_time": "0:03:16", "remaining_time": "0:50:48", "throughput": 2622.3, "total_tokens": 514616}
|
| 272 |
+
{"current_steps": 1355, "total_steps": 22320, "loss": 0.5769, "lr": 3.0331541218637992e-05, "epoch": 1.2141577060931898, "percentage": 6.07, "elapsed_time": "0:03:16", "remaining_time": "0:50:46", "throughput": 2622.69, "total_tokens": 516376}
|
| 273 |
+
{"current_steps": 1360, "total_steps": 22320, "loss": 0.5258, "lr": 3.0443548387096776e-05, "epoch": 1.2186379928315412, "percentage": 6.09, "elapsed_time": "0:03:17", "remaining_time": "0:50:44", "throughput": 2622.88, "total_tokens": 518200}
|
| 274 |
+
{"current_steps": 1365, "total_steps": 22320, "loss": 0.6723, "lr": 3.055555555555556e-05, "epoch": 1.2231182795698925, "percentage": 6.12, "elapsed_time": "0:03:18", "remaining_time": "0:50:43", "throughput": 2622.92, "total_tokens": 519992}
|
| 275 |
+
{"current_steps": 1370, "total_steps": 22320, "loss": 0.7991, "lr": 3.0667562724014334e-05, "epoch": 1.2275985663082438, "percentage": 6.14, "elapsed_time": "0:03:18", "remaining_time": "0:50:41", "throughput": 2623.31, "total_tokens": 521848}
|
| 276 |
+
{"current_steps": 1375, "total_steps": 22320, "loss": 0.1403, "lr": 3.077956989247312e-05, "epoch": 1.232078853046595, "percentage": 6.16, "elapsed_time": "0:03:19", "remaining_time": "0:50:41", "throughput": 2623.39, "total_tokens": 523832}
|
| 277 |
+
{"current_steps": 1380, "total_steps": 22320, "loss": 0.4548, "lr": 3.08915770609319e-05, "epoch": 1.2365591397849462, "percentage": 6.18, "elapsed_time": "0:03:20", "remaining_time": "0:50:40", "throughput": 2624.11, "total_tokens": 525784}
|
| 278 |
+
{"current_steps": 1385, "total_steps": 22320, "loss": 0.4657, "lr": 3.100358422939068e-05, "epoch": 1.2410394265232976, "percentage": 6.21, "elapsed_time": "0:03:21", "remaining_time": "0:50:38", "throughput": 2624.59, "total_tokens": 527672}
|
| 279 |
+
{"current_steps": 1390, "total_steps": 22320, "loss": 0.6072, "lr": 3.1115591397849464e-05, "epoch": 1.2455197132616487, "percentage": 6.23, "elapsed_time": "0:03:21", "remaining_time": "0:50:37", "throughput": 2624.97, "total_tokens": 529528}
|
| 280 |
+
{"current_steps": 1395, "total_steps": 22320, "loss": 0.5609, "lr": 3.1227598566308245e-05, "epoch": 1.25, "percentage": 6.25, "elapsed_time": "0:03:22", "remaining_time": "0:50:36", "throughput": 2625.17, "total_tokens": 531448}
|
| 281 |
+
{"current_steps": 1400, "total_steps": 22320, "loss": 0.7081, "lr": 3.1339605734767025e-05, "epoch": 1.2544802867383513, "percentage": 6.27, "elapsed_time": "0:03:23", "remaining_time": "0:50:35", "throughput": 2625.26, "total_tokens": 533336}
|
| 282 |
+
{"current_steps": 1405, "total_steps": 22320, "loss": 0.1401, "lr": 3.1451612903225806e-05, "epoch": 1.2589605734767024, "percentage": 6.29, "elapsed_time": "0:03:23", "remaining_time": "0:50:34", "throughput": 2625.7, "total_tokens": 535320}
|
| 283 |
+
{"current_steps": 1410, "total_steps": 22320, "loss": 0.3777, "lr": 3.156362007168459e-05, "epoch": 1.2634408602150538, "percentage": 6.32, "elapsed_time": "0:03:24", "remaining_time": "0:50:33", "throughput": 2626.39, "total_tokens": 537240}
|
| 284 |
+
{"current_steps": 1415, "total_steps": 22320, "loss": 0.3182, "lr": 3.167562724014337e-05, "epoch": 1.267921146953405, "percentage": 6.34, "elapsed_time": "0:03:25", "remaining_time": "0:50:33", "throughput": 2626.38, "total_tokens": 539192}
|
| 285 |
+
{"current_steps": 1420, "total_steps": 22320, "loss": 0.4603, "lr": 3.1787634408602155e-05, "epoch": 1.2724014336917562, "percentage": 6.36, "elapsed_time": "0:03:25", "remaining_time": "0:50:31", "throughput": 2626.58, "total_tokens": 541016}
|
| 286 |
+
{"current_steps": 1425, "total_steps": 22320, "loss": 0.2895, "lr": 3.1899641577060935e-05, "epoch": 1.2768817204301075, "percentage": 6.38, "elapsed_time": "0:03:26", "remaining_time": "0:50:30", "throughput": 2626.83, "total_tokens": 542936}
|
| 287 |
+
{"current_steps": 1430, "total_steps": 22320, "loss": 0.2146, "lr": 3.201164874551971e-05, "epoch": 1.2813620071684588, "percentage": 6.41, "elapsed_time": "0:03:27", "remaining_time": "0:50:29", "throughput": 2627.56, "total_tokens": 544888}
|
| 288 |
+
{"current_steps": 1435, "total_steps": 22320, "loss": 0.1976, "lr": 3.21236559139785e-05, "epoch": 1.2858422939068102, "percentage": 6.43, "elapsed_time": "0:03:28", "remaining_time": "0:50:28", "throughput": 2628.19, "total_tokens": 546904}
|
| 289 |
+
{"current_steps": 1440, "total_steps": 22320, "loss": 0.5713, "lr": 3.223566308243728e-05, "epoch": 1.2903225806451613, "percentage": 6.45, "elapsed_time": "0:03:28", "remaining_time": "0:50:26", "throughput": 2628.9, "total_tokens": 548760}
|
| 290 |
+
{"current_steps": 1445, "total_steps": 22320, "loss": 0.5514, "lr": 3.2347670250896065e-05, "epoch": 1.2948028673835126, "percentage": 6.47, "elapsed_time": "0:03:29", "remaining_time": "0:50:25", "throughput": 2629.04, "total_tokens": 550584}
|
| 291 |
+
{"current_steps": 1450, "total_steps": 22320, "loss": 0.8791, "lr": 3.245967741935484e-05, "epoch": 1.2992831541218637, "percentage": 6.5, "elapsed_time": "0:03:30", "remaining_time": "0:50:24", "throughput": 2629.01, "total_tokens": 552376}
|
| 292 |
+
{"current_steps": 1455, "total_steps": 22320, "loss": 0.2635, "lr": 3.257168458781362e-05, "epoch": 1.303763440860215, "percentage": 6.52, "elapsed_time": "0:03:30", "remaining_time": "0:50:22", "throughput": 2629.64, "total_tokens": 554328}
|
| 293 |
+
{"current_steps": 1460, "total_steps": 22320, "loss": 1.0019, "lr": 3.268369175627241e-05, "epoch": 1.3082437275985663, "percentage": 6.54, "elapsed_time": "0:03:31", "remaining_time": "0:50:21", "throughput": 2629.62, "total_tokens": 556120}
|
| 294 |
+
{"current_steps": 1465, "total_steps": 22320, "loss": 0.7127, "lr": 3.279569892473118e-05, "epoch": 1.3127240143369177, "percentage": 6.56, "elapsed_time": "0:03:32", "remaining_time": "0:50:21", "throughput": 2630.03, "total_tokens": 558232}
|
| 295 |
+
{"current_steps": 1470, "total_steps": 22320, "loss": 0.3093, "lr": 3.290770609318997e-05, "epoch": 1.3172043010752688, "percentage": 6.59, "elapsed_time": "0:03:32", "remaining_time": "0:50:19", "throughput": 2630.52, "total_tokens": 560056}
|
| 296 |
+
{"current_steps": 1475, "total_steps": 22320, "loss": 0.2366, "lr": 3.301971326164875e-05, "epoch": 1.32168458781362, "percentage": 6.61, "elapsed_time": "0:03:33", "remaining_time": "0:50:18", "throughput": 2631.23, "total_tokens": 562072}
|
| 297 |
+
{"current_steps": 1480, "total_steps": 22320, "loss": 0.3804, "lr": 3.313172043010753e-05, "epoch": 1.3261648745519714, "percentage": 6.63, "elapsed_time": "0:03:34", "remaining_time": "0:50:18", "throughput": 2631.53, "total_tokens": 564184}
|
| 298 |
+
{"current_steps": 1485, "total_steps": 22320, "loss": 0.7936, "lr": 3.324372759856631e-05, "epoch": 1.3306451612903225, "percentage": 6.65, "elapsed_time": "0:03:35", "remaining_time": "0:50:18", "throughput": 2631.76, "total_tokens": 566232}
|
| 299 |
+
{"current_steps": 1490, "total_steps": 22320, "loss": 0.8837, "lr": 3.335573476702509e-05, "epoch": 1.3351254480286738, "percentage": 6.68, "elapsed_time": "0:03:35", "remaining_time": "0:50:17", "throughput": 2632.21, "total_tokens": 568152}
|
| 300 |
+
{"current_steps": 1495, "total_steps": 22320, "loss": 0.2791, "lr": 3.346774193548387e-05, "epoch": 1.3396057347670252, "percentage": 6.7, "elapsed_time": "0:03:36", "remaining_time": "0:50:16", "throughput": 2632.68, "total_tokens": 570072}
|
| 301 |
+
{"current_steps": 1500, "total_steps": 22320, "loss": 0.8537, "lr": 3.357974910394265e-05, "epoch": 1.3440860215053765, "percentage": 6.72, "elapsed_time": "0:03:37", "remaining_time": "0:50:14", "throughput": 2633.33, "total_tokens": 571928}
|
| 302 |
+
{"current_steps": 1505, "total_steps": 22320, "loss": 0.955, "lr": 3.369175627240144e-05, "epoch": 1.3485663082437276, "percentage": 6.74, "elapsed_time": "0:03:37", "remaining_time": "0:50:14", "throughput": 2634.17, "total_tokens": 574136}
|
| 303 |
+
{"current_steps": 1510, "total_steps": 22320, "loss": 0.511, "lr": 3.3803763440860214e-05, "epoch": 1.353046594982079, "percentage": 6.77, "elapsed_time": "0:03:38", "remaining_time": "0:50:14", "throughput": 2634.35, "total_tokens": 576152}
|
| 304 |
+
{"current_steps": 1515, "total_steps": 22320, "loss": 0.4348, "lr": 3.3915770609318994e-05, "epoch": 1.35752688172043, "percentage": 6.79, "elapsed_time": "0:03:39", "remaining_time": "0:50:12", "throughput": 2634.29, "total_tokens": 577944}
|
| 305 |
+
{"current_steps": 1520, "total_steps": 22320, "loss": 0.8977, "lr": 3.402777777777778e-05, "epoch": 1.3620071684587813, "percentage": 6.81, "elapsed_time": "0:03:40", "remaining_time": "0:50:12", "throughput": 2634.41, "total_tokens": 579864}
|
| 306 |
+
{"current_steps": 1525, "total_steps": 22320, "loss": 0.33, "lr": 3.4139784946236556e-05, "epoch": 1.3664874551971327, "percentage": 6.83, "elapsed_time": "0:03:40", "remaining_time": "0:50:10", "throughput": 2634.59, "total_tokens": 581720}
|
| 307 |
+
{"current_steps": 1530, "total_steps": 22320, "loss": 0.689, "lr": 3.425179211469534e-05, "epoch": 1.370967741935484, "percentage": 6.85, "elapsed_time": "0:03:41", "remaining_time": "0:50:09", "throughput": 2634.8, "total_tokens": 583576}
|
| 308 |
+
{"current_steps": 1535, "total_steps": 22320, "loss": 1.1558, "lr": 3.4363799283154124e-05, "epoch": 1.375448028673835, "percentage": 6.88, "elapsed_time": "0:03:42", "remaining_time": "0:50:08", "throughput": 2634.91, "total_tokens": 585496}
|
| 309 |
+
{"current_steps": 1540, "total_steps": 22320, "loss": 0.3346, "lr": 3.4475806451612905e-05, "epoch": 1.3799283154121864, "percentage": 6.9, "elapsed_time": "0:03:42", "remaining_time": "0:50:07", "throughput": 2635.44, "total_tokens": 587416}
|
| 310 |
+
{"current_steps": 1545, "total_steps": 22320, "loss": 0.7698, "lr": 3.4587813620071685e-05, "epoch": 1.3844086021505375, "percentage": 6.92, "elapsed_time": "0:03:43", "remaining_time": "0:50:06", "throughput": 2635.59, "total_tokens": 589336}
|
| 311 |
+
{"current_steps": 1550, "total_steps": 22320, "loss": 0.135, "lr": 3.4699820788530466e-05, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:03:44", "remaining_time": "0:50:05", "throughput": 2635.66, "total_tokens": 591224}
|
| 312 |
+
{"current_steps": 1555, "total_steps": 22320, "loss": 0.5093, "lr": 3.4811827956989254e-05, "epoch": 1.3933691756272402, "percentage": 6.97, "elapsed_time": "0:03:45", "remaining_time": "0:50:04", "throughput": 2636.18, "total_tokens": 593144}
|
| 313 |
+
{"current_steps": 1560, "total_steps": 22320, "loss": 0.0921, "lr": 3.492383512544803e-05, "epoch": 1.3978494623655915, "percentage": 6.99, "elapsed_time": "0:03:45", "remaining_time": "0:50:04", "throughput": 2636.28, "total_tokens": 595160}
|
| 314 |
+
{"current_steps": 1565, "total_steps": 22320, "loss": 0.4888, "lr": 3.5035842293906815e-05, "epoch": 1.4023297491039426, "percentage": 7.01, "elapsed_time": "0:03:46", "remaining_time": "0:50:03", "throughput": 2636.56, "total_tokens": 597016}
|
| 315 |
+
{"current_steps": 1570, "total_steps": 22320, "loss": 0.5408, "lr": 3.5147849462365596e-05, "epoch": 1.406810035842294, "percentage": 7.03, "elapsed_time": "0:03:47", "remaining_time": "0:50:01", "throughput": 2637.1, "total_tokens": 598936}
|
| 316 |
+
{"current_steps": 1575, "total_steps": 22320, "loss": 0.1601, "lr": 3.525985663082437e-05, "epoch": 1.4112903225806452, "percentage": 7.06, "elapsed_time": "0:03:47", "remaining_time": "0:49:59", "throughput": 2637.27, "total_tokens": 600664}
|
| 317 |
+
{"current_steps": 1580, "total_steps": 22320, "loss": 0.2508, "lr": 3.537186379928316e-05, "epoch": 1.4157706093189963, "percentage": 7.08, "elapsed_time": "0:03:48", "remaining_time": "0:49:59", "throughput": 2637.47, "total_tokens": 602584}
|
| 318 |
+
{"current_steps": 1585, "total_steps": 22320, "loss": 0.5581, "lr": 3.548387096774194e-05, "epoch": 1.4202508960573477, "percentage": 7.1, "elapsed_time": "0:03:49", "remaining_time": "0:49:57", "throughput": 2637.36, "total_tokens": 604344}
|
| 319 |
+
{"current_steps": 1590, "total_steps": 22320, "loss": 0.5012, "lr": 3.559587813620072e-05, "epoch": 1.424731182795699, "percentage": 7.12, "elapsed_time": "0:03:49", "remaining_time": "0:49:56", "throughput": 2637.69, "total_tokens": 606200}
|
| 320 |
+
{"current_steps": 1595, "total_steps": 22320, "loss": 0.1476, "lr": 3.57078853046595e-05, "epoch": 1.4292114695340503, "percentage": 7.15, "elapsed_time": "0:03:50", "remaining_time": "0:49:55", "throughput": 2637.74, "total_tokens": 607992}
|
| 321 |
+
{"current_steps": 1600, "total_steps": 22320, "loss": 0.7313, "lr": 3.581989247311828e-05, "epoch": 1.4336917562724014, "percentage": 7.17, "elapsed_time": "0:03:51", "remaining_time": "0:49:53", "throughput": 2638.01, "total_tokens": 609752}
|
| 322 |
+
{"current_steps": 1605, "total_steps": 22320, "loss": 0.5344, "lr": 3.593189964157706e-05, "epoch": 1.4381720430107527, "percentage": 7.19, "elapsed_time": "0:03:51", "remaining_time": "0:49:52", "throughput": 2638.55, "total_tokens": 611768}
|
| 323 |
+
{"current_steps": 1610, "total_steps": 22320, "loss": 0.3533, "lr": 3.604390681003584e-05, "epoch": 1.4426523297491038, "percentage": 7.21, "elapsed_time": "0:03:52", "remaining_time": "0:49:51", "throughput": 2638.86, "total_tokens": 613624}
|
| 324 |
+
{"current_steps": 1615, "total_steps": 22320, "loss": 0.5671, "lr": 3.615591397849463e-05, "epoch": 1.4471326164874552, "percentage": 7.24, "elapsed_time": "0:03:53", "remaining_time": "0:49:49", "throughput": 2639.16, "total_tokens": 615384}
|
| 325 |
+
{"current_steps": 1620, "total_steps": 22320, "loss": 0.7964, "lr": 3.62679211469534e-05, "epoch": 1.4516129032258065, "percentage": 7.26, "elapsed_time": "0:03:53", "remaining_time": "0:49:48", "throughput": 2639.92, "total_tokens": 617368}
|
| 326 |
+
{"current_steps": 1625, "total_steps": 22320, "loss": 0.6377, "lr": 3.637992831541219e-05, "epoch": 1.4560931899641578, "percentage": 7.28, "elapsed_time": "0:03:54", "remaining_time": "0:49:47", "throughput": 2640.19, "total_tokens": 619416}
|
| 327 |
+
{"current_steps": 1630, "total_steps": 22320, "loss": 0.6647, "lr": 3.649193548387097e-05, "epoch": 1.460573476702509, "percentage": 7.3, "elapsed_time": "0:03:55", "remaining_time": "0:49:47", "throughput": 2640.47, "total_tokens": 621368}
|
| 328 |
+
{"current_steps": 1635, "total_steps": 22320, "loss": 0.6258, "lr": 3.660394265232975e-05, "epoch": 1.4650537634408602, "percentage": 7.33, "elapsed_time": "0:03:55", "remaining_time": "0:49:45", "throughput": 2640.98, "total_tokens": 623192}
|
| 329 |
+
{"current_steps": 1640, "total_steps": 22320, "loss": 1.0314, "lr": 3.671594982078853e-05, "epoch": 1.4695340501792113, "percentage": 7.35, "elapsed_time": "0:03:56", "remaining_time": "0:49:44", "throughput": 2641.52, "total_tokens": 625112}
|
| 330 |
+
{"current_steps": 1645, "total_steps": 22320, "loss": 0.7081, "lr": 3.682795698924731e-05, "epoch": 1.4740143369175627, "percentage": 7.37, "elapsed_time": "0:03:57", "remaining_time": "0:49:42", "throughput": 2641.54, "total_tokens": 626808}
|
| 331 |
+
{"current_steps": 1650, "total_steps": 22320, "loss": 0.4759, "lr": 3.69399641577061e-05, "epoch": 1.478494623655914, "percentage": 7.39, "elapsed_time": "0:03:57", "remaining_time": "0:49:40", "throughput": 2641.82, "total_tokens": 628568}
|
| 332 |
+
{"current_steps": 1655, "total_steps": 22320, "loss": 0.1828, "lr": 3.7051971326164874e-05, "epoch": 1.4829749103942653, "percentage": 7.41, "elapsed_time": "0:03:58", "remaining_time": "0:49:39", "throughput": 2642.32, "total_tokens": 630488}
|
| 333 |
+
{"current_steps": 1660, "total_steps": 22320, "loss": 0.2194, "lr": 3.7163978494623655e-05, "epoch": 1.4874551971326164, "percentage": 7.44, "elapsed_time": "0:03:59", "remaining_time": "0:49:37", "throughput": 2642.85, "total_tokens": 632312}
|
| 334 |
+
{"current_steps": 1665, "total_steps": 22320, "loss": 0.4595, "lr": 3.727598566308244e-05, "epoch": 1.4919354838709677, "percentage": 7.46, "elapsed_time": "0:03:59", "remaining_time": "0:49:36", "throughput": 2643.35, "total_tokens": 634232}
|
| 335 |
+
{"current_steps": 1670, "total_steps": 22320, "loss": 0.6745, "lr": 3.7387992831541216e-05, "epoch": 1.496415770609319, "percentage": 7.48, "elapsed_time": "0:04:00", "remaining_time": "0:49:35", "throughput": 2643.61, "total_tokens": 636088}
|
| 336 |
+
{"current_steps": 1675, "total_steps": 22320, "loss": 0.4876, "lr": 3.7500000000000003e-05, "epoch": 1.5008960573476702, "percentage": 7.5, "elapsed_time": "0:04:01", "remaining_time": "0:49:33", "throughput": 2644.42, "total_tokens": 638008}
|
| 337 |
+
{"current_steps": 1680, "total_steps": 22320, "loss": 0.369, "lr": 3.7612007168458784e-05, "epoch": 1.5053763440860215, "percentage": 7.53, "elapsed_time": "0:04:01", "remaining_time": "0:49:32", "throughput": 2644.68, "total_tokens": 639864}
|
| 338 |
+
{"current_steps": 1685, "total_steps": 22320, "loss": 0.3625, "lr": 3.7724014336917565e-05, "epoch": 1.5098566308243728, "percentage": 7.55, "elapsed_time": "0:04:02", "remaining_time": "0:49:30", "throughput": 2645.16, "total_tokens": 641688}
|
| 339 |
+
{"current_steps": 1690, "total_steps": 22320, "loss": 0.0851, "lr": 3.7836021505376346e-05, "epoch": 1.5143369175627241, "percentage": 7.57, "elapsed_time": "0:04:03", "remaining_time": "0:49:29", "throughput": 2644.99, "total_tokens": 643512}
|
| 340 |
+
{"current_steps": 1695, "total_steps": 22320, "loss": 0.4448, "lr": 3.7948028673835126e-05, "epoch": 1.5188172043010753, "percentage": 7.59, "elapsed_time": "0:04:04", "remaining_time": "0:49:29", "throughput": 2645.21, "total_tokens": 645464}
|
| 341 |
+
{"current_steps": 1700, "total_steps": 22320, "loss": 0.3854, "lr": 3.806003584229391e-05, "epoch": 1.5232974910394266, "percentage": 7.62, "elapsed_time": "0:04:04", "remaining_time": "0:49:27", "throughput": 2645.49, "total_tokens": 647320}
|
| 342 |
+
{"current_steps": 1705, "total_steps": 22320, "loss": 0.3601, "lr": 3.817204301075269e-05, "epoch": 1.5277777777777777, "percentage": 7.64, "elapsed_time": "0:04:05", "remaining_time": "0:49:27", "throughput": 2645.87, "total_tokens": 649304}
|
| 343 |
+
{"current_steps": 1710, "total_steps": 22320, "loss": 0.593, "lr": 3.8284050179211475e-05, "epoch": 1.532258064516129, "percentage": 7.66, "elapsed_time": "0:04:06", "remaining_time": "0:49:26", "throughput": 2645.91, "total_tokens": 651192}
|
| 344 |
+
{"current_steps": 1715, "total_steps": 22320, "loss": 0.4324, "lr": 3.839605734767025e-05, "epoch": 1.5367383512544803, "percentage": 7.68, "elapsed_time": "0:04:06", "remaining_time": "0:49:25", "throughput": 2645.95, "total_tokens": 653176}
|
| 345 |
+
{"current_steps": 1720, "total_steps": 22320, "loss": 0.0685, "lr": 3.8508064516129036e-05, "epoch": 1.5412186379928317, "percentage": 7.71, "elapsed_time": "0:04:07", "remaining_time": "0:49:24", "throughput": 2646.35, "total_tokens": 655064}
|
| 346 |
+
{"current_steps": 1725, "total_steps": 22320, "loss": 0.3368, "lr": 3.862007168458782e-05, "epoch": 1.5456989247311828, "percentage": 7.73, "elapsed_time": "0:04:08", "remaining_time": "0:49:23", "throughput": 2646.8, "total_tokens": 657080}
|
| 347 |
+
{"current_steps": 1730, "total_steps": 22320, "loss": 0.9095, "lr": 3.87320788530466e-05, "epoch": 1.550179211469534, "percentage": 7.75, "elapsed_time": "0:04:08", "remaining_time": "0:49:22", "throughput": 2647.08, "total_tokens": 658936}
|
| 348 |
+
{"current_steps": 1735, "total_steps": 22320, "loss": 0.7467, "lr": 3.884408602150538e-05, "epoch": 1.5546594982078852, "percentage": 7.77, "elapsed_time": "0:04:09", "remaining_time": "0:49:21", "throughput": 2647.11, "total_tokens": 660824}
|
| 349 |
+
{"current_steps": 1740, "total_steps": 22320, "loss": 0.6617, "lr": 3.895609318996416e-05, "epoch": 1.5591397849462365, "percentage": 7.8, "elapsed_time": "0:04:10", "remaining_time": "0:49:20", "throughput": 2647.25, "total_tokens": 662648}
|
| 350 |
+
{"current_steps": 1745, "total_steps": 22320, "loss": 1.0862, "lr": 3.906810035842295e-05, "epoch": 1.5636200716845878, "percentage": 7.82, "elapsed_time": "0:04:11", "remaining_time": "0:49:19", "throughput": 2647.17, "total_tokens": 664504}
|
| 351 |
+
{"current_steps": 1750, "total_steps": 22320, "loss": 0.2704, "lr": 3.918010752688172e-05, "epoch": 1.5681003584229392, "percentage": 7.84, "elapsed_time": "0:04:11", "remaining_time": "0:49:19", "throughput": 2647.41, "total_tokens": 666456}
|
| 352 |
+
{"current_steps": 1755, "total_steps": 22320, "loss": 0.2169, "lr": 3.92921146953405e-05, "epoch": 1.5725806451612905, "percentage": 7.86, "elapsed_time": "0:04:12", "remaining_time": "0:49:17", "throughput": 2647.56, "total_tokens": 668280}
|
| 353 |
+
{"current_steps": 1760, "total_steps": 22320, "loss": 0.5688, "lr": 3.940412186379929e-05, "epoch": 1.5770609318996416, "percentage": 7.89, "elapsed_time": "0:04:13", "remaining_time": "0:49:16", "throughput": 2647.71, "total_tokens": 670200}
|
| 354 |
+
{"current_steps": 1765, "total_steps": 22320, "loss": 0.2908, "lr": 3.951612903225806e-05, "epoch": 1.5815412186379927, "percentage": 7.91, "elapsed_time": "0:04:13", "remaining_time": "0:49:15", "throughput": 2648.18, "total_tokens": 672120}
|
| 355 |
+
{"current_steps": 1770, "total_steps": 22320, "loss": 0.4195, "lr": 3.962813620071685e-05, "epoch": 1.586021505376344, "percentage": 7.93, "elapsed_time": "0:04:14", "remaining_time": "0:49:15", "throughput": 2648.52, "total_tokens": 674104}
|
| 356 |
+
{"current_steps": 1775, "total_steps": 22320, "loss": 0.7232, "lr": 3.974014336917563e-05, "epoch": 1.5905017921146953, "percentage": 7.95, "elapsed_time": "0:04:15", "remaining_time": "0:49:14", "throughput": 2648.94, "total_tokens": 676216}
|
| 357 |
+
{"current_steps": 1780, "total_steps": 22320, "loss": 0.5179, "lr": 3.985215053763441e-05, "epoch": 1.5949820788530467, "percentage": 7.97, "elapsed_time": "0:04:16", "remaining_time": "0:49:14", "throughput": 2649.59, "total_tokens": 678296}
|
| 358 |
+
{"current_steps": 1785, "total_steps": 22320, "loss": 0.4092, "lr": 3.996415770609319e-05, "epoch": 1.599462365591398, "percentage": 8.0, "elapsed_time": "0:04:16", "remaining_time": "0:49:12", "throughput": 2650.14, "total_tokens": 680248}
|
| 359 |
+
{"current_steps": 1790, "total_steps": 22320, "loss": 0.7696, "lr": 4.007616487455197e-05, "epoch": 1.603942652329749, "percentage": 8.02, "elapsed_time": "0:04:17", "remaining_time": "0:49:11", "throughput": 2650.31, "total_tokens": 681976}
|
| 360 |
+
{"current_steps": 1795, "total_steps": 22320, "loss": 0.4309, "lr": 4.0188172043010753e-05, "epoch": 1.6084229390681004, "percentage": 8.04, "elapsed_time": "0:04:17", "remaining_time": "0:49:10", "throughput": 2650.65, "total_tokens": 683864}
|
| 361 |
+
{"current_steps": 1800, "total_steps": 22320, "loss": 0.2732, "lr": 4.0300179211469534e-05, "epoch": 1.6129032258064515, "percentage": 8.06, "elapsed_time": "0:04:18", "remaining_time": "0:49:09", "throughput": 2651.09, "total_tokens": 685880}
|
| 362 |
+
{"current_steps": 1805, "total_steps": 22320, "loss": 0.2479, "lr": 4.041218637992832e-05, "epoch": 1.6173835125448028, "percentage": 8.09, "elapsed_time": "0:04:19", "remaining_time": "0:49:08", "throughput": 2651.43, "total_tokens": 687768}
|
| 363 |
+
{"current_steps": 1810, "total_steps": 22320, "loss": 0.5145, "lr": 4.0524193548387096e-05, "epoch": 1.6218637992831542, "percentage": 8.11, "elapsed_time": "0:04:20", "remaining_time": "0:49:06", "throughput": 2651.35, "total_tokens": 689528}
|
| 364 |
+
{"current_steps": 1815, "total_steps": 22320, "loss": 0.739, "lr": 4.0636200716845876e-05, "epoch": 1.6263440860215055, "percentage": 8.13, "elapsed_time": "0:04:20", "remaining_time": "0:49:05", "throughput": 2651.58, "total_tokens": 691288}
|
| 365 |
+
{"current_steps": 1820, "total_steps": 22320, "loss": 0.5283, "lr": 4.0748207885304664e-05, "epoch": 1.6308243727598566, "percentage": 8.15, "elapsed_time": "0:04:21", "remaining_time": "0:49:03", "throughput": 2652.04, "total_tokens": 693112}
|
| 366 |
+
{"current_steps": 1825, "total_steps": 22320, "loss": 0.1456, "lr": 4.0860215053763444e-05, "epoch": 1.635304659498208, "percentage": 8.18, "elapsed_time": "0:04:22", "remaining_time": "0:49:03", "throughput": 2652.38, "total_tokens": 695096}
|
| 367 |
+
{"current_steps": 1830, "total_steps": 22320, "loss": 1.2179, "lr": 4.0972222222222225e-05, "epoch": 1.639784946236559, "percentage": 8.2, "elapsed_time": "0:04:22", "remaining_time": "0:49:02", "throughput": 2652.64, "total_tokens": 697112}
|
| 368 |
+
{"current_steps": 1835, "total_steps": 22320, "loss": 0.3008, "lr": 4.1084229390681006e-05, "epoch": 1.6442652329749103, "percentage": 8.22, "elapsed_time": "0:04:23", "remaining_time": "0:49:02", "throughput": 2652.74, "total_tokens": 699128}
|
| 369 |
+
{"current_steps": 1840, "total_steps": 22320, "loss": 0.2564, "lr": 4.1196236559139786e-05, "epoch": 1.6487455197132617, "percentage": 8.24, "elapsed_time": "0:04:24", "remaining_time": "0:49:00", "throughput": 2653.21, "total_tokens": 700952}
|
| 370 |
+
{"current_steps": 1845, "total_steps": 22320, "loss": 0.3717, "lr": 4.130824372759857e-05, "epoch": 1.653225806451613, "percentage": 8.27, "elapsed_time": "0:04:24", "remaining_time": "0:48:59", "throughput": 2653.61, "total_tokens": 702968}
|
| 371 |
+
{"current_steps": 1850, "total_steps": 22320, "loss": 0.6873, "lr": 4.142025089605735e-05, "epoch": 1.6577060931899643, "percentage": 8.29, "elapsed_time": "0:04:25", "remaining_time": "0:48:58", "throughput": 2654.05, "total_tokens": 704792}
|
| 372 |
+
{"current_steps": 1855, "total_steps": 22320, "loss": 0.3627, "lr": 4.1532258064516135e-05, "epoch": 1.6621863799283154, "percentage": 8.31, "elapsed_time": "0:04:26", "remaining_time": "0:48:57", "throughput": 2654.05, "total_tokens": 706584}
|
| 373 |
+
{"current_steps": 1860, "total_steps": 22320, "loss": 0.6303, "lr": 4.164426523297491e-05, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:04:26", "remaining_time": "0:48:56", "throughput": 2654.47, "total_tokens": 708504}
|
| 374 |
+
{"current_steps": 1865, "total_steps": 22320, "loss": 0.3955, "lr": 4.1756272401433697e-05, "epoch": 1.6711469534050178, "percentage": 8.36, "elapsed_time": "0:04:27", "remaining_time": "0:48:55", "throughput": 2654.91, "total_tokens": 710584}
|
| 375 |
+
{"current_steps": 1870, "total_steps": 22320, "loss": 0.3869, "lr": 4.186827956989248e-05, "epoch": 1.6756272401433692, "percentage": 8.38, "elapsed_time": "0:04:28", "remaining_time": "0:48:55", "throughput": 2655.23, "total_tokens": 712664}
|
| 376 |
+
{"current_steps": 1875, "total_steps": 22320, "loss": 0.33, "lr": 4.198028673835125e-05, "epoch": 1.6801075268817205, "percentage": 8.4, "elapsed_time": "0:04:29", "remaining_time": "0:48:54", "throughput": 2655.57, "total_tokens": 714552}
|
| 377 |
+
{"current_steps": 1880, "total_steps": 22320, "loss": 0.4614, "lr": 4.209229390681004e-05, "epoch": 1.6845878136200718, "percentage": 8.42, "elapsed_time": "0:04:29", "remaining_time": "0:48:53", "throughput": 2655.93, "total_tokens": 716568}
|
| 378 |
+
{"current_steps": 1885, "total_steps": 22320, "loss": 0.3558, "lr": 4.220430107526882e-05, "epoch": 1.689068100358423, "percentage": 8.45, "elapsed_time": "0:04:30", "remaining_time": "0:48:53", "throughput": 2655.78, "total_tokens": 718616}
|
| 379 |
+
{"current_steps": 1890, "total_steps": 22320, "loss": 0.5895, "lr": 4.23163082437276e-05, "epoch": 1.6935483870967742, "percentage": 8.47, "elapsed_time": "0:04:31", "remaining_time": "0:48:53", "throughput": 2655.77, "total_tokens": 720696}
|
| 380 |
+
{"current_steps": 1895, "total_steps": 22320, "loss": 0.8262, "lr": 4.242831541218638e-05, "epoch": 1.6980286738351253, "percentage": 8.49, "elapsed_time": "0:04:32", "remaining_time": "0:48:52", "throughput": 2656.08, "total_tokens": 722584}
|
| 381 |
+
{"current_steps": 1900, "total_steps": 22320, "loss": 0.6338, "lr": 4.254032258064516e-05, "epoch": 1.7025089605734767, "percentage": 8.51, "elapsed_time": "0:04:32", "remaining_time": "0:48:51", "throughput": 2656.29, "total_tokens": 724440}
|
| 382 |
+
{"current_steps": 1905, "total_steps": 22320, "loss": 0.4039, "lr": 4.265232974910394e-05, "epoch": 1.706989247311828, "percentage": 8.53, "elapsed_time": "0:04:33", "remaining_time": "0:48:50", "throughput": 2656.41, "total_tokens": 726360}
|
| 383 |
+
{"current_steps": 1910, "total_steps": 22320, "loss": 0.7557, "lr": 4.276433691756272e-05, "epoch": 1.7114695340501793, "percentage": 8.56, "elapsed_time": "0:04:34", "remaining_time": "0:48:49", "throughput": 2656.89, "total_tokens": 728312}
|
| 384 |
+
{"current_steps": 1915, "total_steps": 22320, "loss": 0.9621, "lr": 4.287634408602151e-05, "epoch": 1.7159498207885304, "percentage": 8.58, "elapsed_time": "0:04:34", "remaining_time": "0:48:48", "throughput": 2656.96, "total_tokens": 730232}
|
| 385 |
+
{"current_steps": 1920, "total_steps": 22320, "loss": 0.9593, "lr": 4.298835125448029e-05, "epoch": 1.7204301075268817, "percentage": 8.6, "elapsed_time": "0:04:35", "remaining_time": "0:48:47", "throughput": 2657.17, "total_tokens": 732216}
|
| 386 |
+
{"current_steps": 1925, "total_steps": 22320, "loss": 0.4535, "lr": 4.310035842293907e-05, "epoch": 1.7249103942652328, "percentage": 8.62, "elapsed_time": "0:04:36", "remaining_time": "0:48:46", "throughput": 2657.26, "total_tokens": 734040}
|
| 387 |
+
{"current_steps": 1930, "total_steps": 22320, "loss": 0.9617, "lr": 4.321236559139785e-05, "epoch": 1.7293906810035842, "percentage": 8.65, "elapsed_time": "0:04:36", "remaining_time": "0:48:45", "throughput": 2657.52, "total_tokens": 735928}
|
| 388 |
+
{"current_steps": 1935, "total_steps": 22320, "loss": 0.4373, "lr": 4.332437275985663e-05, "epoch": 1.7338709677419355, "percentage": 8.67, "elapsed_time": "0:04:37", "remaining_time": "0:48:44", "throughput": 2657.57, "total_tokens": 737752}
|
| 389 |
+
{"current_steps": 1940, "total_steps": 22320, "loss": 0.9003, "lr": 4.3436379928315414e-05, "epoch": 1.7383512544802868, "percentage": 8.69, "elapsed_time": "0:04:38", "remaining_time": "0:48:43", "throughput": 2657.95, "total_tokens": 739576}
|
| 390 |
+
{"current_steps": 1945, "total_steps": 22320, "loss": 0.5133, "lr": 4.3548387096774194e-05, "epoch": 1.7428315412186381, "percentage": 8.71, "elapsed_time": "0:04:39", "remaining_time": "0:48:42", "throughput": 2658.22, "total_tokens": 741656}
|
| 391 |
+
{"current_steps": 1950, "total_steps": 22320, "loss": 1.0662, "lr": 4.366039426523298e-05, "epoch": 1.7473118279569892, "percentage": 8.74, "elapsed_time": "0:04:39", "remaining_time": "0:48:41", "throughput": 2658.42, "total_tokens": 743608}
|
| 392 |
+
{"current_steps": 1955, "total_steps": 22320, "loss": 0.6417, "lr": 4.3772401433691756e-05, "epoch": 1.7517921146953404, "percentage": 8.76, "elapsed_time": "0:04:40", "remaining_time": "0:48:41", "throughput": 2658.33, "total_tokens": 745560}
|
| 393 |
+
{"current_steps": 1960, "total_steps": 22320, "loss": 0.4121, "lr": 4.3884408602150536e-05, "epoch": 1.7562724014336917, "percentage": 8.78, "elapsed_time": "0:04:41", "remaining_time": "0:48:40", "throughput": 2658.65, "total_tokens": 747480}
|
| 394 |
+
{"current_steps": 1965, "total_steps": 22320, "loss": 0.4192, "lr": 4.3996415770609324e-05, "epoch": 1.760752688172043, "percentage": 8.8, "elapsed_time": "0:04:41", "remaining_time": "0:48:39", "throughput": 2658.7, "total_tokens": 749304}
|
| 395 |
+
{"current_steps": 1970, "total_steps": 22320, "loss": 0.9134, "lr": 4.41084229390681e-05, "epoch": 1.7652329749103943, "percentage": 8.83, "elapsed_time": "0:04:42", "remaining_time": "0:48:38", "throughput": 2658.78, "total_tokens": 751224}
|
| 396 |
+
{"current_steps": 1975, "total_steps": 22320, "loss": 1.0941, "lr": 4.4220430107526885e-05, "epoch": 1.7697132616487457, "percentage": 8.85, "elapsed_time": "0:04:43", "remaining_time": "0:48:37", "throughput": 2659.46, "total_tokens": 753304}
|
| 397 |
+
{"current_steps": 1980, "total_steps": 22320, "loss": 0.8294, "lr": 4.4332437275985666e-05, "epoch": 1.7741935483870968, "percentage": 8.87, "elapsed_time": "0:04:43", "remaining_time": "0:48:36", "throughput": 2659.65, "total_tokens": 755064}
|
| 398 |
+
{"current_steps": 1985, "total_steps": 22320, "loss": 0.2903, "lr": 4.4444444444444447e-05, "epoch": 1.778673835125448, "percentage": 8.89, "elapsed_time": "0:04:44", "remaining_time": "0:48:35", "throughput": 2659.67, "total_tokens": 756984}
|
| 399 |
+
{"current_steps": 1990, "total_steps": 22320, "loss": 0.4299, "lr": 4.455645161290323e-05, "epoch": 1.7831541218637992, "percentage": 8.92, "elapsed_time": "0:04:45", "remaining_time": "0:48:34", "throughput": 2660.1, "total_tokens": 758840}
|
| 400 |
+
{"current_steps": 1995, "total_steps": 22320, "loss": 1.0325, "lr": 4.466845878136201e-05, "epoch": 1.7876344086021505, "percentage": 8.94, "elapsed_time": "0:04:46", "remaining_time": "0:48:33", "throughput": 2660.25, "total_tokens": 760888}
|
| 401 |
+
{"current_steps": 2000, "total_steps": 22320, "loss": 0.2514, "lr": 4.478046594982079e-05, "epoch": 1.7921146953405018, "percentage": 8.96, "elapsed_time": "0:04:46", "remaining_time": "0:48:32", "throughput": 2660.74, "total_tokens": 762744}
|
| 402 |
+
{"current_steps": 2005, "total_steps": 22320, "loss": 0.2518, "lr": 4.489247311827957e-05, "epoch": 1.7965949820788532, "percentage": 8.98, "elapsed_time": "0:04:47", "remaining_time": "0:48:31", "throughput": 2660.92, "total_tokens": 764504}
|
| 403 |
+
{"current_steps": 2010, "total_steps": 22320, "loss": 0.3431, "lr": 4.500448028673836e-05, "epoch": 1.8010752688172043, "percentage": 9.01, "elapsed_time": "0:04:47", "remaining_time": "0:48:29", "throughput": 2660.78, "total_tokens": 766264}
|
| 404 |
+
{"current_steps": 2015, "total_steps": 22320, "loss": 0.326, "lr": 4.511648745519714e-05, "epoch": 1.8055555555555556, "percentage": 9.03, "elapsed_time": "0:04:48", "remaining_time": "0:48:28", "throughput": 2661.05, "total_tokens": 768152}
|
| 405 |
+
{"current_steps": 2020, "total_steps": 22320, "loss": 0.3632, "lr": 4.522849462365592e-05, "epoch": 1.8100358422939067, "percentage": 9.05, "elapsed_time": "0:04:49", "remaining_time": "0:48:27", "throughput": 2661.22, "total_tokens": 770008}
|
| 406 |
+
{"current_steps": 2025, "total_steps": 22320, "loss": 0.0898, "lr": 4.53405017921147e-05, "epoch": 1.814516129032258, "percentage": 9.07, "elapsed_time": "0:04:50", "remaining_time": "0:48:26", "throughput": 2661.32, "total_tokens": 771832}
|
| 407 |
+
{"current_steps": 2030, "total_steps": 22320, "loss": 0.1708, "lr": 4.545250896057348e-05, "epoch": 1.8189964157706093, "percentage": 9.09, "elapsed_time": "0:04:50", "remaining_time": "0:48:26", "throughput": 2661.84, "total_tokens": 773912}
|
| 408 |
+
{"current_steps": 2035, "total_steps": 22320, "loss": 0.7898, "lr": 4.556451612903226e-05, "epoch": 1.8234767025089607, "percentage": 9.12, "elapsed_time": "0:04:51", "remaining_time": "0:48:25", "throughput": 2662.1, "total_tokens": 775896}
|
| 409 |
+
{"current_steps": 2040, "total_steps": 22320, "loss": 1.3234, "lr": 4.567652329749104e-05, "epoch": 1.827956989247312, "percentage": 9.14, "elapsed_time": "0:04:52", "remaining_time": "0:48:24", "throughput": 2662.1, "total_tokens": 777880}
|
| 410 |
+
{"current_steps": 2045, "total_steps": 22320, "loss": 0.1481, "lr": 4.578853046594982e-05, "epoch": 1.832437275985663, "percentage": 9.16, "elapsed_time": "0:04:52", "remaining_time": "0:48:24", "throughput": 2662.18, "total_tokens": 779896}
|
| 411 |
+
{"current_steps": 2050, "total_steps": 22320, "loss": 0.3665, "lr": 4.59005376344086e-05, "epoch": 1.8369175627240142, "percentage": 9.18, "elapsed_time": "0:04:53", "remaining_time": "0:48:24", "throughput": 2662.51, "total_tokens": 782008}
|
| 412 |
+
{"current_steps": 2055, "total_steps": 22320, "loss": 0.2286, "lr": 4.601254480286738e-05, "epoch": 1.8413978494623655, "percentage": 9.21, "elapsed_time": "0:04:54", "remaining_time": "0:48:23", "throughput": 2662.69, "total_tokens": 783960}
|
| 413 |
+
{"current_steps": 2060, "total_steps": 22320, "loss": 0.4771, "lr": 4.612455197132617e-05, "epoch": 1.8458781362007168, "percentage": 9.23, "elapsed_time": "0:04:55", "remaining_time": "0:48:22", "throughput": 2663.23, "total_tokens": 785848}
|
| 414 |
+
{"current_steps": 2065, "total_steps": 22320, "loss": 0.0626, "lr": 4.6236559139784944e-05, "epoch": 1.8503584229390682, "percentage": 9.25, "elapsed_time": "0:04:55", "remaining_time": "0:48:21", "throughput": 2663.31, "total_tokens": 787768}
|
| 415 |
+
{"current_steps": 2070, "total_steps": 22320, "loss": 0.4259, "lr": 4.634856630824373e-05, "epoch": 1.8548387096774195, "percentage": 9.27, "elapsed_time": "0:04:56", "remaining_time": "0:48:20", "throughput": 2663.28, "total_tokens": 789656}
|
| 416 |
+
{"current_steps": 2075, "total_steps": 22320, "loss": 0.4289, "lr": 4.646057347670251e-05, "epoch": 1.8593189964157706, "percentage": 9.3, "elapsed_time": "0:04:57", "remaining_time": "0:48:20", "throughput": 2663.65, "total_tokens": 791736}
|
| 417 |
+
{"current_steps": 2080, "total_steps": 22320, "loss": 0.1376, "lr": 4.657258064516129e-05, "epoch": 1.863799283154122, "percentage": 9.32, "elapsed_time": "0:04:57", "remaining_time": "0:48:18", "throughput": 2663.91, "total_tokens": 793624}
|
| 418 |
+
{"current_steps": 2085, "total_steps": 22320, "loss": 0.6138, "lr": 4.6684587813620074e-05, "epoch": 1.868279569892473, "percentage": 9.34, "elapsed_time": "0:04:58", "remaining_time": "0:48:17", "throughput": 2663.99, "total_tokens": 795352}
|
| 419 |
+
{"current_steps": 2090, "total_steps": 22320, "loss": 1.3904, "lr": 4.6796594982078854e-05, "epoch": 1.8727598566308243, "percentage": 9.36, "elapsed_time": "0:04:59", "remaining_time": "0:48:17", "throughput": 2664.32, "total_tokens": 797464}
|
| 420 |
+
{"current_steps": 2095, "total_steps": 22320, "loss": 0.6889, "lr": 4.690860215053764e-05, "epoch": 1.8772401433691757, "percentage": 9.39, "elapsed_time": "0:05:00", "remaining_time": "0:48:16", "throughput": 2664.21, "total_tokens": 799320}
|
| 421 |
+
{"current_steps": 2100, "total_steps": 22320, "loss": 0.3782, "lr": 4.7020609318996416e-05, "epoch": 1.881720430107527, "percentage": 9.41, "elapsed_time": "0:05:00", "remaining_time": "0:48:15", "throughput": 2664.53, "total_tokens": 801240}
|
| 422 |
+
{"current_steps": 2105, "total_steps": 22320, "loss": 0.7333, "lr": 4.71326164874552e-05, "epoch": 1.886200716845878, "percentage": 9.43, "elapsed_time": "0:05:01", "remaining_time": "0:48:14", "throughput": 2664.7, "total_tokens": 803096}
|
| 423 |
+
{"current_steps": 2110, "total_steps": 22320, "loss": 1.0869, "lr": 4.7244623655913984e-05, "epoch": 1.8906810035842294, "percentage": 9.45, "elapsed_time": "0:05:02", "remaining_time": "0:48:13", "throughput": 2664.86, "total_tokens": 804952}
|
| 424 |
+
{"current_steps": 2115, "total_steps": 22320, "loss": 0.4463, "lr": 4.735663082437276e-05, "epoch": 1.8951612903225805, "percentage": 9.48, "elapsed_time": "0:05:02", "remaining_time": "0:48:12", "throughput": 2665.01, "total_tokens": 807000}
|
| 425 |
+
{"current_steps": 2120, "total_steps": 22320, "loss": 0.506, "lr": 4.7468637992831545e-05, "epoch": 1.8996415770609318, "percentage": 9.5, "elapsed_time": "0:05:03", "remaining_time": "0:48:12", "throughput": 2665.0, "total_tokens": 808984}
|
| 426 |
+
{"current_steps": 2125, "total_steps": 22320, "loss": 0.4275, "lr": 4.7580645161290326e-05, "epoch": 1.9041218637992832, "percentage": 9.52, "elapsed_time": "0:05:04", "remaining_time": "0:48:11", "throughput": 2665.24, "total_tokens": 810872}
|
| 427 |
+
{"current_steps": 2130, "total_steps": 22320, "loss": 0.5939, "lr": 4.769265232974911e-05, "epoch": 1.9086021505376345, "percentage": 9.54, "elapsed_time": "0:05:04", "remaining_time": "0:48:09", "throughput": 2665.59, "total_tokens": 812696}
|
| 428 |
+
{"current_steps": 2135, "total_steps": 22320, "loss": 0.5367, "lr": 4.780465949820789e-05, "epoch": 1.9130824372759858, "percentage": 9.57, "elapsed_time": "0:05:05", "remaining_time": "0:48:08", "throughput": 2665.83, "total_tokens": 814584}
|
| 429 |
+
{"current_steps": 2140, "total_steps": 22320, "loss": 0.5979, "lr": 4.791666666666667e-05, "epoch": 1.917562724014337, "percentage": 9.59, "elapsed_time": "0:05:06", "remaining_time": "0:48:07", "throughput": 2665.99, "total_tokens": 816440}
|
| 430 |
+
{"current_steps": 2145, "total_steps": 22320, "loss": 0.3276, "lr": 4.802867383512545e-05, "epoch": 1.922043010752688, "percentage": 9.61, "elapsed_time": "0:05:06", "remaining_time": "0:48:07", "throughput": 2665.93, "total_tokens": 818328}
|
| 431 |
+
{"current_steps": 2150, "total_steps": 22320, "loss": 0.4902, "lr": 4.814068100358423e-05, "epoch": 1.9265232974910393, "percentage": 9.63, "elapsed_time": "0:05:07", "remaining_time": "0:48:06", "throughput": 2665.9, "total_tokens": 820312}
|
| 432 |
+
{"current_steps": 2155, "total_steps": 22320, "loss": 0.3896, "lr": 4.825268817204302e-05, "epoch": 1.9310035842293907, "percentage": 9.66, "elapsed_time": "0:05:08", "remaining_time": "0:48:05", "throughput": 2666.16, "total_tokens": 822104}
|
| 433 |
+
{"current_steps": 2160, "total_steps": 22320, "loss": 0.6773, "lr": 4.836469534050179e-05, "epoch": 1.935483870967742, "percentage": 9.68, "elapsed_time": "0:05:09", "remaining_time": "0:48:04", "throughput": 2666.31, "total_tokens": 823960}
|
| 434 |
+
{"current_steps": 2165, "total_steps": 22320, "loss": 0.7326, "lr": 4.847670250896058e-05, "epoch": 1.9399641577060933, "percentage": 9.7, "elapsed_time": "0:05:09", "remaining_time": "0:48:03", "throughput": 2666.81, "total_tokens": 825944}
|
| 435 |
+
{"current_steps": 2170, "total_steps": 22320, "loss": 0.4513, "lr": 4.858870967741936e-05, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:05:10", "remaining_time": "0:48:02", "throughput": 2666.89, "total_tokens": 827768}
|
| 436 |
+
{"current_steps": 2175, "total_steps": 22320, "loss": 1.405, "lr": 4.870071684587813e-05, "epoch": 1.9489247311827957, "percentage": 9.74, "elapsed_time": "0:05:11", "remaining_time": "0:48:01", "throughput": 2667.12, "total_tokens": 829752}
|
| 437 |
+
{"current_steps": 2180, "total_steps": 22320, "loss": 0.7005, "lr": 4.881272401433692e-05, "epoch": 1.9534050179211468, "percentage": 9.77, "elapsed_time": "0:05:11", "remaining_time": "0:48:01", "throughput": 2667.35, "total_tokens": 831832}
|
| 438 |
+
{"current_steps": 2185, "total_steps": 22320, "loss": 0.2731, "lr": 4.89247311827957e-05, "epoch": 1.9578853046594982, "percentage": 9.79, "elapsed_time": "0:05:12", "remaining_time": "0:47:59", "throughput": 2667.7, "total_tokens": 833656}
|
| 439 |
+
{"current_steps": 2190, "total_steps": 22320, "loss": 0.3239, "lr": 4.903673835125449e-05, "epoch": 1.9623655913978495, "percentage": 9.81, "elapsed_time": "0:05:13", "remaining_time": "0:47:59", "throughput": 2667.64, "total_tokens": 835544}
|
| 440 |
+
{"current_steps": 2195, "total_steps": 22320, "loss": 0.4362, "lr": 4.914874551971326e-05, "epoch": 1.9668458781362008, "percentage": 9.83, "elapsed_time": "0:05:13", "remaining_time": "0:47:57", "throughput": 2667.72, "total_tokens": 837272}
|
| 441 |
+
{"current_steps": 2200, "total_steps": 22320, "loss": 0.2661, "lr": 4.926075268817204e-05, "epoch": 1.971326164874552, "percentage": 9.86, "elapsed_time": "0:05:14", "remaining_time": "0:47:56", "throughput": 2667.95, "total_tokens": 839256}
|
| 442 |
+
{"current_steps": 2205, "total_steps": 22320, "loss": 0.8405, "lr": 4.937275985663083e-05, "epoch": 1.9758064516129032, "percentage": 9.88, "elapsed_time": "0:05:15", "remaining_time": "0:47:55", "throughput": 2668.44, "total_tokens": 841240}
|
| 443 |
+
{"current_steps": 2210, "total_steps": 22320, "loss": 1.0773, "lr": 4.9484767025089604e-05, "epoch": 1.9802867383512543, "percentage": 9.9, "elapsed_time": "0:05:15", "remaining_time": "0:47:54", "throughput": 2668.32, "total_tokens": 843000}
|
| 444 |
+
{"current_steps": 2215, "total_steps": 22320, "loss": 0.6677, "lr": 4.959677419354839e-05, "epoch": 1.9847670250896057, "percentage": 9.92, "elapsed_time": "0:05:16", "remaining_time": "0:47:53", "throughput": 2668.47, "total_tokens": 844856}
|
| 445 |
+
{"current_steps": 2220, "total_steps": 22320, "loss": 0.4077, "lr": 4.970878136200717e-05, "epoch": 1.989247311827957, "percentage": 9.95, "elapsed_time": "0:05:17", "remaining_time": "0:47:53", "throughput": 2668.87, "total_tokens": 846904}
|
| 446 |
+
{"current_steps": 2225, "total_steps": 22320, "loss": 0.5756, "lr": 4.982078853046595e-05, "epoch": 1.9937275985663083, "percentage": 9.97, "elapsed_time": "0:05:17", "remaining_time": "0:47:51", "throughput": 2668.59, "total_tokens": 848600}
|
| 447 |
+
{"current_steps": 2230, "total_steps": 22320, "loss": 0.2729, "lr": 4.9932795698924734e-05, "epoch": 1.9982078853046596, "percentage": 9.99, "elapsed_time": "0:05:18", "remaining_time": "0:47:51", "throughput": 2668.81, "total_tokens": 850680}
|
| 448 |
+
{"current_steps": 2232, "total_steps": 22320, "eval_loss": 0.5427400469779968, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:05:24", "remaining_time": "0:48:41", "throughput": 2622.04, "total_tokens": 851112}
|
| 449 |
+
{"current_steps": 2235, "total_steps": 22320, "loss": 0.2538, "lr": 4.999999877708479e-05, "epoch": 2.002688172043011, "percentage": 10.01, "elapsed_time": "0:05:25", "remaining_time": "0:48:49", "throughput": 2614.76, "total_tokens": 852232}
|
| 450 |
+
{"current_steps": 2240, "total_steps": 22320, "loss": 0.4123, "lr": 4.9999985019290045e-05, "epoch": 2.007168458781362, "percentage": 10.04, "elapsed_time": "0:05:26", "remaining_time": "0:48:48", "throughput": 2615.03, "total_tokens": 854312}
|
| 451 |
+
{"current_steps": 2245, "total_steps": 22320, "loss": 0.2382, "lr": 4.9999955975064985e-05, "epoch": 2.011648745519713, "percentage": 10.06, "elapsed_time": "0:05:27", "remaining_time": "0:48:47", "throughput": 2615.44, "total_tokens": 856296}
|
| 452 |
+
{"current_steps": 2250, "total_steps": 22320, "loss": 0.4184, "lr": 4.999991164442736e-05, "epoch": 2.0161290322580645, "percentage": 10.08, "elapsed_time": "0:05:28", "remaining_time": "0:48:46", "throughput": 2615.5, "total_tokens": 858184}
|
| 453 |
+
{"current_steps": 2255, "total_steps": 22320, "loss": 0.5826, "lr": 4.9999852027404285e-05, "epoch": 2.020609318996416, "percentage": 10.1, "elapsed_time": "0:05:28", "remaining_time": "0:48:45", "throughput": 2615.82, "total_tokens": 860072}
|