rbelanec commited on
Commit
fad3eba
·
verified ·
1 Parent(s): 47225ed

Training in progress, step 38480

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +384 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f286bc20b0a2f3cac4ca38d29132f2df82c7414ef69ef0d9d8c41de048aa91b
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb2f450589f18ae853df5e13798110ca6089bdef29f3a72e23efdc578221cdda
3
  size 798032
trainer_log.jsonl CHANGED
@@ -7331,3 +7331,387 @@
7331
  {"current_steps": 36560, "total_steps": 38480, "loss": 0.1782, "lr": 3.7862566783146147e-07, "epoch": 19.002079002079004, "percentage": 95.01, "elapsed_time": "1:28:07", "remaining_time": "0:04:37", "throughput": 1318.02, "total_tokens": 6968832}
7332
  {"current_steps": 36565, "total_steps": 38480, "loss": 0.0913, "lr": 3.766622085984661e-07, "epoch": 19.004677754677754, "percentage": 95.02, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6969760}
7333
  {"current_steps": 36570, "total_steps": 38480, "loss": 0.1051, "lr": 3.747038149728266e-07, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6970688}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7331
  {"current_steps": 36560, "total_steps": 38480, "loss": 0.1782, "lr": 3.7862566783146147e-07, "epoch": 19.002079002079004, "percentage": 95.01, "elapsed_time": "1:28:07", "remaining_time": "0:04:37", "throughput": 1318.02, "total_tokens": 6968832}
7332
  {"current_steps": 36565, "total_steps": 38480, "loss": 0.0913, "lr": 3.766622085984661e-07, "epoch": 19.004677754677754, "percentage": 95.02, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6969760}
7333
  {"current_steps": 36570, "total_steps": 38480, "loss": 0.1051, "lr": 3.747038149728266e-07, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6970688}
7334
+ {"current_steps": 36575, "total_steps": 38480, "loss": 0.0362, "lr": 3.7275048735743167e-07, "epoch": 19.00987525987526, "percentage": 95.05, "elapsed_time": "1:28:09", "remaining_time": "0:04:35", "throughput": 1317.99, "total_tokens": 6971648}
7335
+ {"current_steps": 36580, "total_steps": 38480, "loss": 0.3581, "lr": 3.708022261541294e-07, "epoch": 19.012474012474012, "percentage": 95.06, "elapsed_time": "1:28:10", "remaining_time": "0:04:34", "throughput": 1318.01, "total_tokens": 6972704}
7336
+ {"current_steps": 36585, "total_steps": 38480, "loss": 0.2977, "lr": 3.688590317637214e-07, "epoch": 19.015072765072766, "percentage": 95.08, "elapsed_time": "1:28:11", "remaining_time": "0:04:34", "throughput": 1318.02, "total_tokens": 6973696}
7337
+ {"current_steps": 36590, "total_steps": 38480, "loss": 0.0497, "lr": 3.6692090458597115e-07, "epoch": 19.017671517671516, "percentage": 95.09, "elapsed_time": "1:28:11", "remaining_time": "0:04:33", "throughput": 1318.02, "total_tokens": 6974624}
7338
+ {"current_steps": 36595, "total_steps": 38480, "loss": 0.1458, "lr": 3.6498784501959595e-07, "epoch": 19.02027027027027, "percentage": 95.1, "elapsed_time": "1:28:12", "remaining_time": "0:04:32", "throughput": 1318.02, "total_tokens": 6975552}
7339
+ {"current_steps": 36600, "total_steps": 38480, "loss": 0.0932, "lr": 3.63059853462272e-07, "epoch": 19.022869022869024, "percentage": 95.11, "elapsed_time": "1:28:13", "remaining_time": "0:04:31", "throughput": 1318.03, "total_tokens": 6976512}
7340
+ {"current_steps": 36605, "total_steps": 38480, "loss": 0.0794, "lr": 3.611369303106349e-07, "epoch": 19.025467775467774, "percentage": 95.13, "elapsed_time": "1:28:13", "remaining_time": "0:04:31", "throughput": 1318.05, "total_tokens": 6977536}
7341
+ {"current_steps": 36610, "total_steps": 38480, "loss": 0.1062, "lr": 3.592190759602709e-07, "epoch": 19.028066528066528, "percentage": 95.14, "elapsed_time": "1:28:14", "remaining_time": "0:04:30", "throughput": 1318.06, "total_tokens": 6978528}
7342
+ {"current_steps": 36615, "total_steps": 38480, "loss": 0.0892, "lr": 3.5730629080573676e-07, "epoch": 19.030665280665282, "percentage": 95.15, "elapsed_time": "1:28:15", "remaining_time": "0:04:29", "throughput": 1318.07, "total_tokens": 6979488}
7343
+ {"current_steps": 36620, "total_steps": 38480, "loss": 0.1147, "lr": 3.5539857524053145e-07, "epoch": 19.033264033264032, "percentage": 95.17, "elapsed_time": "1:28:15", "remaining_time": "0:04:28", "throughput": 1318.09, "total_tokens": 6980512}
7344
+ {"current_steps": 36625, "total_steps": 38480, "loss": 0.1604, "lr": 3.534959296571189e-07, "epoch": 19.035862785862786, "percentage": 95.18, "elapsed_time": "1:28:16", "remaining_time": "0:04:28", "throughput": 1318.09, "total_tokens": 6981472}
7345
+ {"current_steps": 36630, "total_steps": 38480, "loss": 0.0576, "lr": 3.5159835444691926e-07, "epoch": 19.03846153846154, "percentage": 95.19, "elapsed_time": "1:28:17", "remaining_time": "0:04:27", "throughput": 1318.09, "total_tokens": 6982400}
7346
+ {"current_steps": 36635, "total_steps": 38480, "loss": 0.2829, "lr": 3.497058500003092e-07, "epoch": 19.04106029106029, "percentage": 95.21, "elapsed_time": "1:28:18", "remaining_time": "0:04:26", "throughput": 1318.12, "total_tokens": 6983456}
7347
+ {"current_steps": 36640, "total_steps": 38480, "loss": 0.1148, "lr": 3.4781841670662175e-07, "epoch": 19.043659043659044, "percentage": 95.22, "elapsed_time": "1:28:18", "remaining_time": "0:04:26", "throughput": 1318.12, "total_tokens": 6984416}
7348
+ {"current_steps": 36645, "total_steps": 38480, "loss": 0.1326, "lr": 3.4593605495414626e-07, "epoch": 19.046257796257795, "percentage": 95.23, "elapsed_time": "1:28:19", "remaining_time": "0:04:25", "throughput": 1318.15, "total_tokens": 6985472}
7349
+ {"current_steps": 36650, "total_steps": 38480, "loss": 0.1154, "lr": 3.440587651301286e-07, "epoch": 19.04885654885655, "percentage": 95.24, "elapsed_time": "1:28:20", "remaining_time": "0:04:24", "throughput": 1318.15, "total_tokens": 6986400}
7350
+ {"current_steps": 36655, "total_steps": 38480, "loss": 0.1229, "lr": 3.421865476207764e-07, "epoch": 19.051455301455302, "percentage": 95.26, "elapsed_time": "1:28:20", "remaining_time": "0:04:23", "throughput": 1318.14, "total_tokens": 6987296}
7351
+ {"current_steps": 36660, "total_steps": 38480, "loss": 0.133, "lr": 3.403194028112455e-07, "epoch": 19.054054054054053, "percentage": 95.27, "elapsed_time": "1:28:21", "remaining_time": "0:04:23", "throughput": 1318.14, "total_tokens": 6988224}
7352
+ {"current_steps": 36665, "total_steps": 38480, "loss": 0.0856, "lr": 3.3845733108564814e-07, "epoch": 19.056652806652806, "percentage": 95.28, "elapsed_time": "1:28:22", "remaining_time": "0:04:22", "throughput": 1318.14, "total_tokens": 6989120}
7353
+ {"current_steps": 36670, "total_steps": 38480, "loss": 0.0569, "lr": 3.366003328270667e-07, "epoch": 19.05925155925156, "percentage": 95.3, "elapsed_time": "1:28:22", "remaining_time": "0:04:21", "throughput": 1318.14, "total_tokens": 6990048}
7354
+ {"current_steps": 36675, "total_steps": 38480, "loss": 0.1763, "lr": 3.347484084175234e-07, "epoch": 19.06185031185031, "percentage": 95.31, "elapsed_time": "1:28:23", "remaining_time": "0:04:21", "throughput": 1318.15, "total_tokens": 6991040}
7355
+ {"current_steps": 36680, "total_steps": 38480, "loss": 0.0938, "lr": 3.3290155823800515e-07, "epoch": 19.064449064449065, "percentage": 95.32, "elapsed_time": "1:28:24", "remaining_time": "0:04:20", "throughput": 1318.15, "total_tokens": 6991968}
7356
+ {"current_steps": 36685, "total_steps": 38480, "loss": 0.0474, "lr": 3.310597826684525e-07, "epoch": 19.06704781704782, "percentage": 95.34, "elapsed_time": "1:28:25", "remaining_time": "0:04:19", "throughput": 1318.14, "total_tokens": 6992864}
7357
+ {"current_steps": 36690, "total_steps": 38480, "loss": 0.1205, "lr": 3.292230820877623e-07, "epoch": 19.06964656964657, "percentage": 95.35, "elapsed_time": "1:28:25", "remaining_time": "0:04:18", "throughput": 1318.16, "total_tokens": 6993856}
7358
+ {"current_steps": 36695, "total_steps": 38480, "loss": 0.1137, "lr": 3.273914568737879e-07, "epoch": 19.072245322245323, "percentage": 95.36, "elapsed_time": "1:28:26", "remaining_time": "0:04:18", "throughput": 1318.16, "total_tokens": 6994816}
7359
+ {"current_steps": 36700, "total_steps": 38480, "loss": 0.0563, "lr": 3.25564907403339e-07, "epoch": 19.074844074844076, "percentage": 95.37, "elapsed_time": "1:28:27", "remaining_time": "0:04:17", "throughput": 1318.19, "total_tokens": 6995872}
7360
+ {"current_steps": 36705, "total_steps": 38480, "loss": 0.1539, "lr": 3.237434340521789e-07, "epoch": 19.077442827442827, "percentage": 95.39, "elapsed_time": "1:28:27", "remaining_time": "0:04:16", "throughput": 1318.19, "total_tokens": 6996800}
7361
+ {"current_steps": 36710, "total_steps": 38480, "loss": 0.2104, "lr": 3.2192703719503005e-07, "epoch": 19.08004158004158, "percentage": 95.4, "elapsed_time": "1:28:28", "remaining_time": "0:04:15", "throughput": 1318.2, "total_tokens": 6997792}
7362
+ {"current_steps": 36715, "total_steps": 38480, "loss": 0.046, "lr": 3.2011571720556575e-07, "epoch": 19.08264033264033, "percentage": 95.41, "elapsed_time": "1:28:29", "remaining_time": "0:04:15", "throughput": 1318.19, "total_tokens": 6998688}
7363
+ {"current_steps": 36720, "total_steps": 38480, "loss": 0.1702, "lr": 3.183094744564213e-07, "epoch": 19.085239085239085, "percentage": 95.43, "elapsed_time": "1:28:30", "remaining_time": "0:04:14", "throughput": 1318.21, "total_tokens": 6999680}
7364
+ {"current_steps": 36725, "total_steps": 38480, "loss": 0.0578, "lr": 3.165083093191801e-07, "epoch": 19.08783783783784, "percentage": 95.44, "elapsed_time": "1:28:30", "remaining_time": "0:04:13", "throughput": 1318.21, "total_tokens": 7000640}
7365
+ {"current_steps": 36730, "total_steps": 38480, "loss": 0.028, "lr": 3.1471222216438723e-07, "epoch": 19.09043659043659, "percentage": 95.45, "elapsed_time": "1:28:31", "remaining_time": "0:04:13", "throughput": 1318.21, "total_tokens": 7001568}
7366
+ {"current_steps": 36735, "total_steps": 38480, "loss": 0.1083, "lr": 3.129212133615389e-07, "epoch": 19.093035343035343, "percentage": 95.47, "elapsed_time": "1:28:32", "remaining_time": "0:04:12", "throughput": 1318.21, "total_tokens": 7002496}
7367
+ {"current_steps": 36740, "total_steps": 38480, "loss": 0.114, "lr": 3.111352832790904e-07, "epoch": 19.095634095634097, "percentage": 95.48, "elapsed_time": "1:28:32", "remaining_time": "0:04:11", "throughput": 1318.22, "total_tokens": 7003456}
7368
+ {"current_steps": 36745, "total_steps": 38480, "loss": 0.1547, "lr": 3.093544322844505e-07, "epoch": 19.098232848232847, "percentage": 95.49, "elapsed_time": "1:28:33", "remaining_time": "0:04:10", "throughput": 1318.23, "total_tokens": 7004416}
7369
+ {"current_steps": 36750, "total_steps": 38480, "loss": 0.0858, "lr": 3.07578660743979e-07, "epoch": 19.1008316008316, "percentage": 95.5, "elapsed_time": "1:28:34", "remaining_time": "0:04:10", "throughput": 1318.25, "total_tokens": 7005440}
7370
+ {"current_steps": 36755, "total_steps": 38480, "loss": 0.0649, "lr": 3.058079690229948e-07, "epoch": 19.103430353430355, "percentage": 95.52, "elapsed_time": "1:28:34", "remaining_time": "0:04:09", "throughput": 1318.24, "total_tokens": 7006336}
7371
+ {"current_steps": 36760, "total_steps": 38480, "loss": 0.0862, "lr": 3.040423574857759e-07, "epoch": 19.106029106029105, "percentage": 95.53, "elapsed_time": "1:28:35", "remaining_time": "0:04:08", "throughput": 1318.24, "total_tokens": 7007264}
7372
+ {"current_steps": 36765, "total_steps": 38480, "loss": 0.069, "lr": 3.0228182649555126e-07, "epoch": 19.10862785862786, "percentage": 95.54, "elapsed_time": "1:28:36", "remaining_time": "0:04:07", "throughput": 1318.25, "total_tokens": 7008224}
7373
+ {"current_steps": 36770, "total_steps": 38480, "loss": 0.1026, "lr": 3.00526376414495e-07, "epoch": 19.111226611226613, "percentage": 95.56, "elapsed_time": "1:28:36", "remaining_time": "0:04:07", "throughput": 1318.25, "total_tokens": 7009152}
7374
+ {"current_steps": 36775, "total_steps": 38480, "loss": 0.1017, "lr": 2.9877600760375434e-07, "epoch": 19.113825363825363, "percentage": 95.57, "elapsed_time": "1:28:37", "remaining_time": "0:04:06", "throughput": 1318.26, "total_tokens": 7010112}
7375
+ {"current_steps": 36780, "total_steps": 38480, "loss": 0.1637, "lr": 2.970307204234163e-07, "epoch": 19.116424116424117, "percentage": 95.58, "elapsed_time": "1:28:38", "remaining_time": "0:04:05", "throughput": 1318.26, "total_tokens": 7011040}
7376
+ {"current_steps": 36785, "total_steps": 38480, "loss": 0.0493, "lr": 2.9529051523253247e-07, "epoch": 19.11902286902287, "percentage": 95.6, "elapsed_time": "1:28:39", "remaining_time": "0:04:05", "throughput": 1318.25, "total_tokens": 7011936}
7377
+ {"current_steps": 36790, "total_steps": 38480, "loss": 0.0768, "lr": 2.935553923890999e-07, "epoch": 19.12162162162162, "percentage": 95.61, "elapsed_time": "1:28:39", "remaining_time": "0:04:04", "throughput": 1318.26, "total_tokens": 7012928}
7378
+ {"current_steps": 36795, "total_steps": 38480, "loss": 0.1658, "lr": 2.9182535225007744e-07, "epoch": 19.124220374220375, "percentage": 95.62, "elapsed_time": "1:28:40", "remaining_time": "0:04:03", "throughput": 1318.27, "total_tokens": 7013888}
7379
+ {"current_steps": 36800, "total_steps": 38480, "loss": 0.0857, "lr": 2.9010039517137487e-07, "epoch": 19.126819126819125, "percentage": 95.63, "elapsed_time": "1:28:41", "remaining_time": "0:04:02", "throughput": 1318.28, "total_tokens": 7014880}
7380
+ {"current_steps": 36805, "total_steps": 38480, "loss": 0.151, "lr": 2.883805215078583e-07, "epoch": 19.12941787941788, "percentage": 95.65, "elapsed_time": "1:28:41", "remaining_time": "0:04:02", "throughput": 1318.3, "total_tokens": 7015872}
7381
+ {"current_steps": 36810, "total_steps": 38480, "loss": 0.1465, "lr": 2.8666573161334475e-07, "epoch": 19.132016632016633, "percentage": 95.66, "elapsed_time": "1:28:42", "remaining_time": "0:04:01", "throughput": 1318.3, "total_tokens": 7016832}
7382
+ {"current_steps": 36815, "total_steps": 38480, "loss": 0.0488, "lr": 2.849560258406075e-07, "epoch": 19.134615384615383, "percentage": 95.67, "elapsed_time": "1:28:43", "remaining_time": "0:04:00", "throughput": 1318.31, "total_tokens": 7017792}
7383
+ {"current_steps": 36820, "total_steps": 38480, "loss": 0.1574, "lr": 2.8325140454137646e-07, "epoch": 19.137214137214137, "percentage": 95.69, "elapsed_time": "1:28:44", "remaining_time": "0:04:00", "throughput": 1318.33, "total_tokens": 7018816}
7384
+ {"current_steps": 36825, "total_steps": 38480, "loss": 0.0932, "lr": 2.815518680663293e-07, "epoch": 19.13981288981289, "percentage": 95.7, "elapsed_time": "1:28:44", "remaining_time": "0:03:59", "throughput": 1318.32, "total_tokens": 7019712}
7385
+ {"current_steps": 36830, "total_steps": 38480, "loss": 0.0936, "lr": 2.798574167651058e-07, "epoch": 19.14241164241164, "percentage": 95.71, "elapsed_time": "1:28:45", "remaining_time": "0:03:58", "throughput": 1318.32, "total_tokens": 7020640}
7386
+ {"current_steps": 36835, "total_steps": 38480, "loss": 0.2183, "lr": 2.7816805098628826e-07, "epoch": 19.145010395010395, "percentage": 95.73, "elapsed_time": "1:28:46", "remaining_time": "0:03:57", "throughput": 1318.34, "total_tokens": 7021632}
7387
+ {"current_steps": 36840, "total_steps": 38480, "loss": 0.122, "lr": 2.7648377107742375e-07, "epoch": 19.14760914760915, "percentage": 95.74, "elapsed_time": "1:28:46", "remaining_time": "0:03:57", "throughput": 1318.36, "total_tokens": 7022688}
7388
+ {"current_steps": 36845, "total_steps": 38480, "loss": 0.1587, "lr": 2.748045773850072e-07, "epoch": 19.1502079002079, "percentage": 95.75, "elapsed_time": "1:28:47", "remaining_time": "0:03:56", "throughput": 1318.37, "total_tokens": 7023648}
7389
+ {"current_steps": 36850, "total_steps": 38480, "loss": 0.0872, "lr": 2.731304702544929e-07, "epoch": 19.152806652806653, "percentage": 95.76, "elapsed_time": "1:28:48", "remaining_time": "0:03:55", "throughput": 1318.37, "total_tokens": 7024576}
7390
+ {"current_steps": 36855, "total_steps": 38480, "loss": 0.204, "lr": 2.7146145003028036e-07, "epoch": 19.155405405405407, "percentage": 95.78, "elapsed_time": "1:28:48", "remaining_time": "0:03:54", "throughput": 1318.38, "total_tokens": 7025536}
7391
+ {"current_steps": 36860, "total_steps": 38480, "loss": 0.0779, "lr": 2.6979751705572544e-07, "epoch": 19.158004158004157, "percentage": 95.79, "elapsed_time": "1:28:49", "remaining_time": "0:03:54", "throughput": 1318.38, "total_tokens": 7026496}
7392
+ {"current_steps": 36865, "total_steps": 38480, "loss": 0.1728, "lr": 2.6813867167314324e-07, "epoch": 19.16060291060291, "percentage": 95.8, "elapsed_time": "1:28:50", "remaining_time": "0:03:53", "throughput": 1318.39, "total_tokens": 7027456}
7393
+ {"current_steps": 36870, "total_steps": 38480, "loss": 0.1078, "lr": 2.664849142237968e-07, "epoch": 19.16320166320166, "percentage": 95.82, "elapsed_time": "1:28:51", "remaining_time": "0:03:52", "throughput": 1318.38, "total_tokens": 7028320}
7394
+ {"current_steps": 36875, "total_steps": 38480, "loss": 0.3906, "lr": 2.6483624504790026e-07, "epoch": 19.165800415800415, "percentage": 95.83, "elapsed_time": "1:28:51", "remaining_time": "0:03:52", "throughput": 1318.39, "total_tokens": 7029280}
7395
+ {"current_steps": 36880, "total_steps": 38480, "loss": 0.1687, "lr": 2.6319266448462666e-07, "epoch": 19.16839916839917, "percentage": 95.84, "elapsed_time": "1:28:52", "remaining_time": "0:03:51", "throughput": 1318.41, "total_tokens": 7030304}
7396
+ {"current_steps": 36885, "total_steps": 38480, "loss": 0.1239, "lr": 2.6155417287209994e-07, "epoch": 19.17099792099792, "percentage": 95.85, "elapsed_time": "1:28:53", "remaining_time": "0:03:50", "throughput": 1318.4, "total_tokens": 7031200}
7397
+ {"current_steps": 36890, "total_steps": 38480, "loss": 0.195, "lr": 2.5992077054739774e-07, "epoch": 19.173596673596673, "percentage": 95.87, "elapsed_time": "1:28:53", "remaining_time": "0:03:49", "throughput": 1318.41, "total_tokens": 7032192}
7398
+ {"current_steps": 36895, "total_steps": 38480, "loss": 0.1017, "lr": 2.5829245784654843e-07, "epoch": 19.176195426195427, "percentage": 95.88, "elapsed_time": "1:28:54", "remaining_time": "0:03:49", "throughput": 1318.41, "total_tokens": 7033088}
7399
+ {"current_steps": 36900, "total_steps": 38480, "loss": 0.114, "lr": 2.566692351045341e-07, "epoch": 19.178794178794178, "percentage": 95.89, "elapsed_time": "1:28:55", "remaining_time": "0:03:48", "throughput": 1318.43, "total_tokens": 7034144}
7400
+ {"current_steps": 36905, "total_steps": 38480, "loss": 0.165, "lr": 2.5505110265529044e-07, "epoch": 19.18139293139293, "percentage": 95.91, "elapsed_time": "1:28:55", "remaining_time": "0:03:47", "throughput": 1318.45, "total_tokens": 7035136}
7401
+ {"current_steps": 36910, "total_steps": 38480, "loss": 0.0703, "lr": 2.534380608317066e-07, "epoch": 19.183991683991685, "percentage": 95.92, "elapsed_time": "1:28:56", "remaining_time": "0:03:46", "throughput": 1318.46, "total_tokens": 7036128}
7402
+ {"current_steps": 36915, "total_steps": 38480, "loss": 0.1035, "lr": 2.5183010996562007e-07, "epoch": 19.186590436590436, "percentage": 95.93, "elapsed_time": "1:28:57", "remaining_time": "0:03:46", "throughput": 1318.46, "total_tokens": 7037088}
7403
+ {"current_steps": 36920, "total_steps": 38480, "loss": 0.1722, "lr": 2.502272503878328e-07, "epoch": 19.18918918918919, "percentage": 95.95, "elapsed_time": "1:28:58", "remaining_time": "0:03:45", "throughput": 1318.46, "total_tokens": 7037984}
7404
+ {"current_steps": 36925, "total_steps": 38480, "loss": 0.1954, "lr": 2.4862948242808383e-07, "epoch": 19.191787941787943, "percentage": 95.96, "elapsed_time": "1:28:58", "remaining_time": "0:03:44", "throughput": 1318.47, "total_tokens": 7038944}
7405
+ {"current_steps": 36930, "total_steps": 38480, "loss": 0.2661, "lr": 2.4703680641507407e-07, "epoch": 19.194386694386694, "percentage": 95.97, "elapsed_time": "1:28:59", "remaining_time": "0:03:44", "throughput": 1318.47, "total_tokens": 7039872}
7406
+ {"current_steps": 36935, "total_steps": 38480, "loss": 0.1487, "lr": 2.454492226764582e-07, "epoch": 19.196985446985448, "percentage": 95.98, "elapsed_time": "1:29:00", "remaining_time": "0:03:43", "throughput": 1318.46, "total_tokens": 7040768}
7407
+ {"current_steps": 36940, "total_steps": 38480, "loss": 0.1226, "lr": 2.43866731538836e-07, "epoch": 19.1995841995842, "percentage": 96.0, "elapsed_time": "1:29:00", "remaining_time": "0:03:42", "throughput": 1318.47, "total_tokens": 7041760}
7408
+ {"current_steps": 36945, "total_steps": 38480, "loss": 0.1808, "lr": 2.422893333277665e-07, "epoch": 19.20218295218295, "percentage": 96.01, "elapsed_time": "1:29:01", "remaining_time": "0:03:41", "throughput": 1318.49, "total_tokens": 7042752}
7409
+ {"current_steps": 36950, "total_steps": 38480, "loss": 0.1086, "lr": 2.40717028367754e-07, "epoch": 19.204781704781706, "percentage": 96.02, "elapsed_time": "1:29:02", "remaining_time": "0:03:41", "throughput": 1318.49, "total_tokens": 7043680}
7410
+ {"current_steps": 36955, "total_steps": 38480, "loss": 0.1171, "lr": 2.39149816982262e-07, "epoch": 19.207380457380456, "percentage": 96.04, "elapsed_time": "1:29:02", "remaining_time": "0:03:40", "throughput": 1318.5, "total_tokens": 7044704}
7411
+ {"current_steps": 36960, "total_steps": 38480, "loss": 0.2331, "lr": 2.3758769949370762e-07, "epoch": 19.20997920997921, "percentage": 96.05, "elapsed_time": "1:29:03", "remaining_time": "0:03:39", "throughput": 1318.5, "total_tokens": 7045600}
7412
+ {"current_steps": 36965, "total_steps": 38480, "loss": 0.1054, "lr": 2.3603067622344488e-07, "epoch": 19.212577962577964, "percentage": 96.06, "elapsed_time": "1:29:04", "remaining_time": "0:03:39", "throughput": 1318.51, "total_tokens": 7046592}
7413
+ {"current_steps": 36970, "total_steps": 38480, "loss": 0.1332, "lr": 2.3447874749180088e-07, "epoch": 19.215176715176714, "percentage": 96.08, "elapsed_time": "1:29:05", "remaining_time": "0:03:38", "throughput": 1318.52, "total_tokens": 7047552}
7414
+ {"current_steps": 36975, "total_steps": 38480, "loss": 0.1319, "lr": 2.3293191361803967e-07, "epoch": 19.217775467775468, "percentage": 96.09, "elapsed_time": "1:29:05", "remaining_time": "0:03:37", "throughput": 1318.52, "total_tokens": 7048480}
7415
+ {"current_steps": 36980, "total_steps": 38480, "loss": 0.0791, "lr": 2.3139017492038172e-07, "epoch": 19.22037422037422, "percentage": 96.1, "elapsed_time": "1:29:06", "remaining_time": "0:03:36", "throughput": 1318.52, "total_tokens": 7049408}
7416
+ {"current_steps": 36985, "total_steps": 38480, "loss": 0.0711, "lr": 2.2985353171599833e-07, "epoch": 19.222972972972972, "percentage": 96.11, "elapsed_time": "1:29:07", "remaining_time": "0:03:36", "throughput": 1318.52, "total_tokens": 7050336}
7417
+ {"current_steps": 36990, "total_steps": 38480, "loss": 0.1553, "lr": 2.2832198432101714e-07, "epoch": 19.225571725571726, "percentage": 96.13, "elapsed_time": "1:29:07", "remaining_time": "0:03:35", "throughput": 1318.54, "total_tokens": 7051360}
7418
+ {"current_steps": 36995, "total_steps": 38480, "loss": 0.1143, "lr": 2.2679553305051116e-07, "epoch": 19.22817047817048, "percentage": 96.14, "elapsed_time": "1:29:08", "remaining_time": "0:03:34", "throughput": 1318.54, "total_tokens": 7052288}
7419
+ {"current_steps": 37000, "total_steps": 38480, "loss": 0.1458, "lr": 2.2527417821850972e-07, "epoch": 19.23076923076923, "percentage": 96.15, "elapsed_time": "1:29:09", "remaining_time": "0:03:33", "throughput": 1318.55, "total_tokens": 7053248}
7420
+ {"current_steps": 37005, "total_steps": 38480, "loss": 0.0819, "lr": 2.2375792013798748e-07, "epoch": 19.233367983367984, "percentage": 96.17, "elapsed_time": "1:29:09", "remaining_time": "0:03:33", "throughput": 1318.56, "total_tokens": 7054240}
7421
+ {"current_steps": 37010, "total_steps": 38480, "loss": 0.0846, "lr": 2.22246759120881e-07, "epoch": 19.235966735966738, "percentage": 96.18, "elapsed_time": "1:29:10", "remaining_time": "0:03:32", "throughput": 1318.59, "total_tokens": 7055328}
7422
+ {"current_steps": 37015, "total_steps": 38480, "loss": 0.1237, "lr": 2.207406954780694e-07, "epoch": 19.238565488565488, "percentage": 96.19, "elapsed_time": "1:29:11", "remaining_time": "0:03:31", "throughput": 1318.6, "total_tokens": 7056288}
7423
+ {"current_steps": 37020, "total_steps": 38480, "loss": 0.1336, "lr": 2.1923972951938543e-07, "epoch": 19.241164241164242, "percentage": 96.21, "elapsed_time": "1:29:12", "remaining_time": "0:03:31", "throughput": 1318.61, "total_tokens": 7057280}
7424
+ {"current_steps": 37025, "total_steps": 38480, "loss": 0.0573, "lr": 2.1774386155361538e-07, "epoch": 19.243762993762992, "percentage": 96.22, "elapsed_time": "1:29:12", "remaining_time": "0:03:30", "throughput": 1318.61, "total_tokens": 7058240}
7425
+ {"current_steps": 37030, "total_steps": 38480, "loss": 0.1071, "lr": 2.1625309188849363e-07, "epoch": 19.246361746361746, "percentage": 96.23, "elapsed_time": "1:29:13", "remaining_time": "0:03:29", "throughput": 1318.63, "total_tokens": 7059264}
7426
+ {"current_steps": 37035, "total_steps": 38480, "loss": 0.0936, "lr": 2.1476742083070546e-07, "epoch": 19.2489604989605, "percentage": 96.24, "elapsed_time": "1:29:14", "remaining_time": "0:03:28", "throughput": 1318.64, "total_tokens": 7060224}
7427
+ {"current_steps": 37040, "total_steps": 38480, "loss": 0.1143, "lr": 2.132868486858869e-07, "epoch": 19.25155925155925, "percentage": 96.26, "elapsed_time": "1:29:14", "remaining_time": "0:03:28", "throughput": 1318.64, "total_tokens": 7061152}
7428
+ {"current_steps": 37045, "total_steps": 38480, "loss": 0.1498, "lr": 2.118113757586332e-07, "epoch": 19.254158004158004, "percentage": 96.27, "elapsed_time": "1:29:15", "remaining_time": "0:03:27", "throughput": 1318.65, "total_tokens": 7062112}
7429
+ {"current_steps": 37050, "total_steps": 38480, "loss": 0.0238, "lr": 2.1034100235248212e-07, "epoch": 19.256756756756758, "percentage": 96.28, "elapsed_time": "1:29:16", "remaining_time": "0:03:26", "throughput": 1318.65, "total_tokens": 7063040}
7430
+ {"current_steps": 37055, "total_steps": 38480, "loss": 0.1291, "lr": 2.0887572876992223e-07, "epoch": 19.259355509355508, "percentage": 96.3, "elapsed_time": "1:29:16", "remaining_time": "0:03:26", "throughput": 1318.65, "total_tokens": 7063968}
7431
+ {"current_steps": 37060, "total_steps": 38480, "loss": 0.087, "lr": 2.0741555531239855e-07, "epoch": 19.261954261954262, "percentage": 96.31, "elapsed_time": "1:29:17", "remaining_time": "0:03:25", "throughput": 1318.67, "total_tokens": 7064992}
7432
+ {"current_steps": 37065, "total_steps": 38480, "loss": 0.1066, "lr": 2.0596048228030128e-07, "epoch": 19.264553014553016, "percentage": 96.32, "elapsed_time": "1:29:18", "remaining_time": "0:03:24", "throughput": 1318.68, "total_tokens": 7065984}
7433
+ {"current_steps": 37070, "total_steps": 38480, "loss": 0.2216, "lr": 2.0451050997297162e-07, "epoch": 19.267151767151766, "percentage": 96.34, "elapsed_time": "1:29:19", "remaining_time": "0:03:23", "throughput": 1318.7, "total_tokens": 7067008}
7434
+ {"current_steps": 37075, "total_steps": 38480, "loss": 0.1244, "lr": 2.03065638688707e-07, "epoch": 19.26975051975052, "percentage": 96.35, "elapsed_time": "1:29:19", "remaining_time": "0:03:23", "throughput": 1318.72, "total_tokens": 7068032}
7435
+ {"current_steps": 37080, "total_steps": 38480, "loss": 0.0852, "lr": 2.016258687247502e-07, "epoch": 19.272349272349274, "percentage": 96.36, "elapsed_time": "1:29:20", "remaining_time": "0:03:22", "throughput": 1318.72, "total_tokens": 7068960}
7436
+ {"current_steps": 37085, "total_steps": 38480, "loss": 0.0781, "lr": 2.0019120037730043e-07, "epoch": 19.274948024948024, "percentage": 96.37, "elapsed_time": "1:29:21", "remaining_time": "0:03:21", "throughput": 1318.71, "total_tokens": 7069856}
7437
+ {"current_steps": 37090, "total_steps": 38480, "loss": 0.1291, "lr": 1.987616339414966e-07, "epoch": 19.277546777546778, "percentage": 96.39, "elapsed_time": "1:29:21", "remaining_time": "0:03:20", "throughput": 1318.72, "total_tokens": 7070816}
7438
+ {"current_steps": 37095, "total_steps": 38480, "loss": 0.1157, "lr": 1.9733716971143678e-07, "epoch": 19.28014553014553, "percentage": 96.4, "elapsed_time": "1:29:22", "remaining_time": "0:03:20", "throughput": 1318.72, "total_tokens": 7071744}
7439
+ {"current_steps": 37100, "total_steps": 38480, "loss": 0.1562, "lr": 1.9591780798016989e-07, "epoch": 19.282744282744282, "percentage": 96.41, "elapsed_time": "1:29:23", "remaining_time": "0:03:19", "throughput": 1318.73, "total_tokens": 7072736}
7440
+ {"current_steps": 37105, "total_steps": 38480, "loss": 0.0848, "lr": 1.9450354903968737e-07, "epoch": 19.285343035343036, "percentage": 96.43, "elapsed_time": "1:29:23", "remaining_time": "0:03:18", "throughput": 1318.72, "total_tokens": 7073600}
7441
+ {"current_steps": 37110, "total_steps": 38480, "loss": 0.1953, "lr": 1.930943931809398e-07, "epoch": 19.287941787941786, "percentage": 96.44, "elapsed_time": "1:29:24", "remaining_time": "0:03:18", "throughput": 1318.72, "total_tokens": 7074528}
7442
+ {"current_steps": 37115, "total_steps": 38480, "loss": 0.0734, "lr": 1.9169034069382584e-07, "epoch": 19.29054054054054, "percentage": 96.45, "elapsed_time": "1:29:25", "remaining_time": "0:03:17", "throughput": 1318.73, "total_tokens": 7075488}
7443
+ {"current_steps": 37120, "total_steps": 38480, "loss": 0.1451, "lr": 1.9029139186718948e-07, "epoch": 19.293139293139294, "percentage": 96.47, "elapsed_time": "1:29:26", "remaining_time": "0:03:16", "throughput": 1318.73, "total_tokens": 7076448}
7444
+ {"current_steps": 37125, "total_steps": 38480, "loss": 0.187, "lr": 1.8889754698882822e-07, "epoch": 19.295738045738045, "percentage": 96.48, "elapsed_time": "1:29:26", "remaining_time": "0:03:15", "throughput": 1318.73, "total_tokens": 7077344}
7445
+ {"current_steps": 37130, "total_steps": 38480, "loss": 0.1052, "lr": 1.8750880634548772e-07, "epoch": 19.2983367983368, "percentage": 96.49, "elapsed_time": "1:29:27", "remaining_time": "0:03:15", "throughput": 1318.73, "total_tokens": 7078272}
7446
+ {"current_steps": 37135, "total_steps": 38480, "loss": 0.074, "lr": 1.8612517022286725e-07, "epoch": 19.300935550935552, "percentage": 96.5, "elapsed_time": "1:29:28", "remaining_time": "0:03:14", "throughput": 1318.72, "total_tokens": 7079168}
7447
+ {"current_steps": 37140, "total_steps": 38480, "loss": 0.1099, "lr": 1.8474663890561405e-07, "epoch": 19.303534303534303, "percentage": 96.52, "elapsed_time": "1:29:28", "remaining_time": "0:03:13", "throughput": 1318.73, "total_tokens": 7080128}
7448
+ {"current_steps": 37145, "total_steps": 38480, "loss": 0.2409, "lr": 1.8337321267732356e-07, "epoch": 19.306133056133056, "percentage": 96.53, "elapsed_time": "1:29:29", "remaining_time": "0:03:12", "throughput": 1318.74, "total_tokens": 7081120}
7449
+ {"current_steps": 37150, "total_steps": 38480, "loss": 0.2103, "lr": 1.8200489182054193e-07, "epoch": 19.30873180873181, "percentage": 96.54, "elapsed_time": "1:29:30", "remaining_time": "0:03:12", "throughput": 1318.75, "total_tokens": 7082080}
7450
+ {"current_steps": 37155, "total_steps": 38480, "loss": 0.059, "lr": 1.80641676616769e-07, "epoch": 19.31133056133056, "percentage": 96.56, "elapsed_time": "1:29:30", "remaining_time": "0:03:11", "throughput": 1318.77, "total_tokens": 7083104}
7451
+ {"current_steps": 37160, "total_steps": 38480, "loss": 0.0991, "lr": 1.7928356734644436e-07, "epoch": 19.313929313929314, "percentage": 96.57, "elapsed_time": "1:29:31", "remaining_time": "0:03:10", "throughput": 1318.78, "total_tokens": 7084096}
7452
+ {"current_steps": 37165, "total_steps": 38480, "loss": 0.1022, "lr": 1.779305642889667e-07, "epoch": 19.316528066528065, "percentage": 96.58, "elapsed_time": "1:29:32", "remaining_time": "0:03:10", "throughput": 1318.8, "total_tokens": 7085120}
7453
+ {"current_steps": 37170, "total_steps": 38480, "loss": 0.1285, "lr": 1.7658266772268284e-07, "epoch": 19.31912681912682, "percentage": 96.6, "elapsed_time": "1:29:33", "remaining_time": "0:03:09", "throughput": 1318.81, "total_tokens": 7086112}
7454
+ {"current_steps": 37175, "total_steps": 38480, "loss": 0.1901, "lr": 1.7523987792488483e-07, "epoch": 19.321725571725572, "percentage": 96.61, "elapsed_time": "1:29:33", "remaining_time": "0:03:08", "throughput": 1318.81, "total_tokens": 7087040}
7455
+ {"current_steps": 37180, "total_steps": 38480, "loss": 0.1788, "lr": 1.739021951718184e-07, "epoch": 19.324324324324323, "percentage": 96.62, "elapsed_time": "1:29:34", "remaining_time": "0:03:07", "throughput": 1318.83, "total_tokens": 7088064}
7456
+ {"current_steps": 37185, "total_steps": 38480, "loss": 0.2472, "lr": 1.7256961973867447e-07, "epoch": 19.326923076923077, "percentage": 96.63, "elapsed_time": "1:29:35", "remaining_time": "0:03:07", "throughput": 1318.83, "total_tokens": 7088960}
7457
+ {"current_steps": 37190, "total_steps": 38480, "loss": 0.1178, "lr": 1.7124215189959768e-07, "epoch": 19.32952182952183, "percentage": 96.65, "elapsed_time": "1:29:35", "remaining_time": "0:03:06", "throughput": 1318.83, "total_tokens": 7089888}
7458
+ {"current_steps": 37195, "total_steps": 38480, "loss": 0.0835, "lr": 1.6991979192767793e-07, "epoch": 19.33212058212058, "percentage": 96.66, "elapsed_time": "1:29:36", "remaining_time": "0:03:05", "throughput": 1318.84, "total_tokens": 7090880}
7459
+ {"current_steps": 37200, "total_steps": 38480, "loss": 0.1826, "lr": 1.686025400949559e-07, "epoch": 19.334719334719335, "percentage": 96.67, "elapsed_time": "1:29:37", "remaining_time": "0:03:05", "throughput": 1318.85, "total_tokens": 7091872}
7460
+ {"current_steps": 37205, "total_steps": 38480, "loss": 0.0383, "lr": 1.6729039667242596e-07, "epoch": 19.33731808731809, "percentage": 96.69, "elapsed_time": "1:29:37", "remaining_time": "0:03:04", "throughput": 1318.85, "total_tokens": 7092768}
7461
+ {"current_steps": 37210, "total_steps": 38480, "loss": 0.0807, "lr": 1.6598336193002216e-07, "epoch": 19.33991683991684, "percentage": 96.7, "elapsed_time": "1:29:38", "remaining_time": "0:03:03", "throughput": 1318.86, "total_tokens": 7093728}
7462
+ {"current_steps": 37215, "total_steps": 38480, "loss": 0.1097, "lr": 1.64681436136635e-07, "epoch": 19.342515592515593, "percentage": 96.71, "elapsed_time": "1:29:39", "remaining_time": "0:03:02", "throughput": 1318.86, "total_tokens": 7094656}
7463
+ {"current_steps": 37220, "total_steps": 38480, "loss": 0.1048, "lr": 1.6338461956010022e-07, "epoch": 19.345114345114347, "percentage": 96.73, "elapsed_time": "1:29:40", "remaining_time": "0:03:02", "throughput": 1318.86, "total_tokens": 7095616}
7464
+ {"current_steps": 37225, "total_steps": 38480, "loss": 0.0584, "lr": 1.6209291246720714e-07, "epoch": 19.347713097713097, "percentage": 96.74, "elapsed_time": "1:29:40", "remaining_time": "0:03:01", "throughput": 1318.86, "total_tokens": 7096512}
7465
+ {"current_steps": 37230, "total_steps": 38480, "loss": 0.1753, "lr": 1.6080631512368772e-07, "epoch": 19.35031185031185, "percentage": 96.75, "elapsed_time": "1:29:41", "remaining_time": "0:03:00", "throughput": 1318.86, "total_tokens": 7097440}
7466
+ {"current_steps": 37235, "total_steps": 38480, "loss": 0.0747, "lr": 1.5952482779422463e-07, "epoch": 19.352910602910605, "percentage": 96.76, "elapsed_time": "1:29:42", "remaining_time": "0:02:59", "throughput": 1318.88, "total_tokens": 7098464}
7467
+ {"current_steps": 37240, "total_steps": 38480, "loss": 0.1681, "lr": 1.582484507424542e-07, "epoch": 19.355509355509355, "percentage": 96.78, "elapsed_time": "1:29:42", "remaining_time": "0:02:59", "throughput": 1318.89, "total_tokens": 7099456}
7468
+ {"current_steps": 37245, "total_steps": 38480, "loss": 0.1771, "lr": 1.569771842309553e-07, "epoch": 19.35810810810811, "percentage": 96.79, "elapsed_time": "1:29:43", "remaining_time": "0:02:58", "throughput": 1318.89, "total_tokens": 7100384}
7469
+ {"current_steps": 37250, "total_steps": 38480, "loss": 0.1031, "lr": 1.5571102852125762e-07, "epoch": 19.36070686070686, "percentage": 96.8, "elapsed_time": "1:29:44", "remaining_time": "0:02:57", "throughput": 1318.91, "total_tokens": 7101408}
7470
+ {"current_steps": 37255, "total_steps": 38480, "loss": 0.1251, "lr": 1.5444998387383892e-07, "epoch": 19.363305613305613, "percentage": 96.82, "elapsed_time": "1:29:45", "remaining_time": "0:02:57", "throughput": 1318.91, "total_tokens": 7102336}
7471
+ {"current_steps": 37260, "total_steps": 38480, "loss": 0.1147, "lr": 1.5319405054813052e-07, "epoch": 19.365904365904367, "percentage": 96.83, "elapsed_time": "1:29:45", "remaining_time": "0:02:56", "throughput": 1318.92, "total_tokens": 7103360}
7472
+ {"current_steps": 37265, "total_steps": 38480, "loss": 0.0945, "lr": 1.5194322880250356e-07, "epoch": 19.368503118503117, "percentage": 96.84, "elapsed_time": "1:29:46", "remaining_time": "0:02:55", "throughput": 1318.91, "total_tokens": 7104224}
7473
+ {"current_steps": 37270, "total_steps": 38480, "loss": 0.139, "lr": 1.5069751889428274e-07, "epoch": 19.37110187110187, "percentage": 96.86, "elapsed_time": "1:29:47", "remaining_time": "0:02:54", "throughput": 1318.86, "total_tokens": 7105152}
7474
+ {"current_steps": 37275, "total_steps": 38480, "loss": 0.0815, "lr": 1.4945692107973808e-07, "epoch": 19.373700623700625, "percentage": 96.87, "elapsed_time": "1:29:48", "remaining_time": "0:02:54", "throughput": 1318.88, "total_tokens": 7106208}
7475
+ {"current_steps": 37280, "total_steps": 38480, "loss": 0.124, "lr": 1.4822143561409317e-07, "epoch": 19.376299376299375, "percentage": 96.88, "elapsed_time": "1:29:48", "remaining_time": "0:02:53", "throughput": 1318.88, "total_tokens": 7107136}
7476
+ {"current_steps": 37285, "total_steps": 38480, "loss": 0.1934, "lr": 1.4699106275151975e-07, "epoch": 19.37889812889813, "percentage": 96.89, "elapsed_time": "1:29:49", "remaining_time": "0:02:52", "throughput": 1318.89, "total_tokens": 7108128}
7477
+ {"current_steps": 37290, "total_steps": 38480, "loss": 0.2367, "lr": 1.4576580274512642e-07, "epoch": 19.381496881496883, "percentage": 96.91, "elapsed_time": "1:29:50", "remaining_time": "0:02:52", "throughput": 1318.9, "total_tokens": 7109088}
7478
+ {"current_steps": 37295, "total_steps": 38480, "loss": 0.0762, "lr": 1.4454565584698654e-07, "epoch": 19.384095634095633, "percentage": 96.92, "elapsed_time": "1:29:50", "remaining_time": "0:02:51", "throughput": 1318.9, "total_tokens": 7109984}
7479
+ {"current_steps": 37300, "total_steps": 38480, "loss": 0.1094, "lr": 1.4333062230810769e-07, "epoch": 19.386694386694387, "percentage": 96.93, "elapsed_time": "1:29:51", "remaining_time": "0:02:50", "throughput": 1318.9, "total_tokens": 7110912}
7480
+ {"current_steps": 37305, "total_steps": 38480, "loss": 0.1125, "lr": 1.4212070237845378e-07, "epoch": 19.38929313929314, "percentage": 96.95, "elapsed_time": "1:29:52", "remaining_time": "0:02:49", "throughput": 1318.9, "total_tokens": 7111872}
7481
+ {"current_steps": 37310, "total_steps": 38480, "loss": 0.0794, "lr": 1.409158963069368e-07, "epoch": 19.39189189189189, "percentage": 96.96, "elapsed_time": "1:29:52", "remaining_time": "0:02:49", "throughput": 1318.91, "total_tokens": 7112832}
7482
+ {"current_steps": 37315, "total_steps": 38480, "loss": 0.1149, "lr": 1.3971620434140852e-07, "epoch": 19.394490644490645, "percentage": 96.97, "elapsed_time": "1:29:53", "remaining_time": "0:02:48", "throughput": 1318.91, "total_tokens": 7113728}
7483
+ {"current_steps": 37320, "total_steps": 38480, "loss": 0.0851, "lr": 1.3852162672867708e-07, "epoch": 19.397089397089395, "percentage": 96.99, "elapsed_time": "1:29:54", "remaining_time": "0:02:47", "throughput": 1318.91, "total_tokens": 7114656}
7484
+ {"current_steps": 37325, "total_steps": 38480, "loss": 0.113, "lr": 1.3733216371449586e-07, "epoch": 19.39968814968815, "percentage": 97.0, "elapsed_time": "1:29:55", "remaining_time": "0:02:46", "throughput": 1318.91, "total_tokens": 7115616}
7485
+ {"current_steps": 37330, "total_steps": 38480, "loss": 0.2061, "lr": 1.361478155435636e-07, "epoch": 19.402286902286903, "percentage": 97.01, "elapsed_time": "1:29:55", "remaining_time": "0:02:46", "throughput": 1318.93, "total_tokens": 7116640}
7486
+ {"current_steps": 37335, "total_steps": 38480, "loss": 0.0904, "lr": 1.349685824595326e-07, "epoch": 19.404885654885653, "percentage": 97.02, "elapsed_time": "1:29:56", "remaining_time": "0:02:45", "throughput": 1318.94, "total_tokens": 7117600}
7487
+ {"current_steps": 37340, "total_steps": 38480, "loss": 0.1449, "lr": 1.3379446470499767e-07, "epoch": 19.407484407484407, "percentage": 97.04, "elapsed_time": "1:29:57", "remaining_time": "0:02:44", "throughput": 1318.96, "total_tokens": 7118624}
7488
+ {"current_steps": 37345, "total_steps": 38480, "loss": 0.1969, "lr": 1.32625462521499e-07, "epoch": 19.41008316008316, "percentage": 97.05, "elapsed_time": "1:29:57", "remaining_time": "0:02:44", "throughput": 1318.95, "total_tokens": 7119520}
7489
+ {"current_steps": 37350, "total_steps": 38480, "loss": 0.1411, "lr": 1.314615761495358e-07, "epoch": 19.41268191268191, "percentage": 97.06, "elapsed_time": "1:29:58", "remaining_time": "0:02:43", "throughput": 1318.96, "total_tokens": 7120480}
7490
+ {"current_steps": 37355, "total_steps": 38480, "loss": 0.188, "lr": 1.303028058285388e-07, "epoch": 19.415280665280665, "percentage": 97.08, "elapsed_time": "1:29:59", "remaining_time": "0:02:42", "throughput": 1318.97, "total_tokens": 7121440}
7491
+ {"current_steps": 37360, "total_steps": 38480, "loss": 0.1467, "lr": 1.2914915179690067e-07, "epoch": 19.41787941787942, "percentage": 97.09, "elapsed_time": "1:29:59", "remaining_time": "0:02:41", "throughput": 1318.96, "total_tokens": 7122336}
7492
+ {"current_steps": 37365, "total_steps": 38480, "loss": 0.3333, "lr": 1.2800061429195377e-07, "epoch": 19.42047817047817, "percentage": 97.1, "elapsed_time": "1:30:00", "remaining_time": "0:02:41", "throughput": 1318.96, "total_tokens": 7123264}
7493
+ {"current_steps": 37370, "total_steps": 38480, "loss": 0.0825, "lr": 1.268571935499785e-07, "epoch": 19.423076923076923, "percentage": 97.12, "elapsed_time": "1:30:01", "remaining_time": "0:02:40", "throughput": 1318.97, "total_tokens": 7124224}
7494
+ {"current_steps": 37375, "total_steps": 38480, "loss": 0.1454, "lr": 1.2571888980620615e-07, "epoch": 19.425675675675677, "percentage": 97.13, "elapsed_time": "1:30:02", "remaining_time": "0:02:39", "throughput": 1318.97, "total_tokens": 7125120}
7495
+ {"current_steps": 37380, "total_steps": 38480, "loss": 0.1334, "lr": 1.2458570329481057e-07, "epoch": 19.428274428274428, "percentage": 97.14, "elapsed_time": "1:30:02", "remaining_time": "0:02:38", "throughput": 1318.97, "total_tokens": 7126048}
7496
+ {"current_steps": 37385, "total_steps": 38480, "loss": 0.2864, "lr": 1.2345763424891632e-07, "epoch": 19.43087318087318, "percentage": 97.15, "elapsed_time": "1:30:03", "remaining_time": "0:02:38", "throughput": 1318.98, "total_tokens": 7127040}
7497
+ {"current_steps": 37390, "total_steps": 38480, "loss": 0.1467, "lr": 1.2233468290059614e-07, "epoch": 19.433471933471935, "percentage": 97.17, "elapsed_time": "1:30:04", "remaining_time": "0:02:37", "throughput": 1319.0, "total_tokens": 7128064}
7498
+ {"current_steps": 37395, "total_steps": 38480, "loss": 0.0583, "lr": 1.2121684948086243e-07, "epoch": 19.436070686070686, "percentage": 97.18, "elapsed_time": "1:30:04", "remaining_time": "0:02:36", "throughput": 1319.01, "total_tokens": 7129024}
7499
+ {"current_steps": 37400, "total_steps": 38480, "loss": 0.1799, "lr": 1.2010413421968404e-07, "epoch": 19.43866943866944, "percentage": 97.19, "elapsed_time": "1:30:05", "remaining_time": "0:02:36", "throughput": 1319.01, "total_tokens": 7129952}
7500
+ {"current_steps": 37405, "total_steps": 38480, "loss": 0.1727, "lr": 1.1899653734597505e-07, "epoch": 19.44126819126819, "percentage": 97.21, "elapsed_time": "1:30:06", "remaining_time": "0:02:35", "throughput": 1319.01, "total_tokens": 7130880}
7501
+ {"current_steps": 37410, "total_steps": 38480, "loss": 0.0662, "lr": 1.1789405908758932e-07, "epoch": 19.443866943866944, "percentage": 97.22, "elapsed_time": "1:30:06", "remaining_time": "0:02:34", "throughput": 1319.0, "total_tokens": 7131744}
7502
+ {"current_steps": 37415, "total_steps": 38480, "loss": 0.0779, "lr": 1.1679669967133434e-07, "epoch": 19.446465696465697, "percentage": 97.23, "elapsed_time": "1:30:07", "remaining_time": "0:02:33", "throughput": 1319.01, "total_tokens": 7132736}
7503
+ {"current_steps": 37420, "total_steps": 38480, "loss": 0.0444, "lr": 1.1570445932296837e-07, "epoch": 19.449064449064448, "percentage": 97.25, "elapsed_time": "1:30:08", "remaining_time": "0:02:33", "throughput": 1319.0, "total_tokens": 7133600}
7504
+ {"current_steps": 37425, "total_steps": 38480, "loss": 0.2544, "lr": 1.146173382671839e-07, "epoch": 19.4516632016632, "percentage": 97.26, "elapsed_time": "1:30:09", "remaining_time": "0:02:32", "throughput": 1319.0, "total_tokens": 7134528}
7505
+ {"current_steps": 37430, "total_steps": 38480, "loss": 0.0716, "lr": 1.1353533672763261e-07, "epoch": 19.454261954261955, "percentage": 97.27, "elapsed_time": "1:30:09", "remaining_time": "0:02:31", "throughput": 1319.01, "total_tokens": 7135520}
7506
+ {"current_steps": 37435, "total_steps": 38480, "loss": 0.1168, "lr": 1.124584549269031e-07, "epoch": 19.456860706860706, "percentage": 97.28, "elapsed_time": "1:30:10", "remaining_time": "0:02:31", "throughput": 1319.03, "total_tokens": 7136544}
7507
+ {"current_steps": 37440, "total_steps": 38480, "loss": 0.103, "lr": 1.1138669308654314e-07, "epoch": 19.45945945945946, "percentage": 97.3, "elapsed_time": "1:30:11", "remaining_time": "0:02:30", "throughput": 1319.02, "total_tokens": 7137376}
7508
+ {"current_steps": 37445, "total_steps": 38480, "loss": 0.0263, "lr": 1.1032005142703195e-07, "epoch": 19.462058212058214, "percentage": 97.31, "elapsed_time": "1:30:11", "remaining_time": "0:02:29", "throughput": 1319.0, "total_tokens": 7138208}
7509
+ {"current_steps": 37450, "total_steps": 38480, "loss": 0.2115, "lr": 1.0925853016780784e-07, "epoch": 19.464656964656964, "percentage": 97.32, "elapsed_time": "1:30:12", "remaining_time": "0:02:28", "throughput": 1319.02, "total_tokens": 7139232}
7510
+ {"current_steps": 37455, "total_steps": 38480, "loss": 0.1168, "lr": 1.0820212952724896e-07, "epoch": 19.467255717255718, "percentage": 97.34, "elapsed_time": "1:30:13", "remaining_time": "0:02:28", "throughput": 1319.03, "total_tokens": 7140192}
7511
+ {"current_steps": 37460, "total_steps": 38480, "loss": 0.1024, "lr": 1.0715084972268418e-07, "epoch": 19.46985446985447, "percentage": 97.35, "elapsed_time": "1:30:13", "remaining_time": "0:02:27", "throughput": 1319.04, "total_tokens": 7141152}
7512
+ {"current_steps": 37465, "total_steps": 38480, "loss": 0.1656, "lr": 1.06104690970385e-07, "epoch": 19.472453222453222, "percentage": 97.36, "elapsed_time": "1:30:14", "remaining_time": "0:02:26", "throughput": 1319.06, "total_tokens": 7142208}
7513
+ {"current_steps": 37470, "total_steps": 38480, "loss": 0.1165, "lr": 1.0506365348557091e-07, "epoch": 19.475051975051976, "percentage": 97.38, "elapsed_time": "1:30:15", "remaining_time": "0:02:25", "throughput": 1319.06, "total_tokens": 7143136}
7514
+ {"current_steps": 37475, "total_steps": 38480, "loss": 0.1917, "lr": 1.0402773748240947e-07, "epoch": 19.477650727650726, "percentage": 97.39, "elapsed_time": "1:30:16", "remaining_time": "0:02:25", "throughput": 1319.07, "total_tokens": 7144128}
7515
+ {"current_steps": 37480, "total_steps": 38480, "loss": 0.0816, "lr": 1.0299694317401353e-07, "epoch": 19.48024948024948, "percentage": 97.4, "elapsed_time": "1:30:16", "remaining_time": "0:02:24", "throughput": 1319.08, "total_tokens": 7145120}
7516
+ {"current_steps": 37485, "total_steps": 38480, "loss": 0.0851, "lr": 1.0197127077244129e-07, "epoch": 19.482848232848234, "percentage": 97.41, "elapsed_time": "1:30:17", "remaining_time": "0:02:23", "throughput": 1319.08, "total_tokens": 7146048}
7517
+ {"current_steps": 37490, "total_steps": 38480, "loss": 0.1408, "lr": 1.0095072048869614e-07, "epoch": 19.485446985446984, "percentage": 97.43, "elapsed_time": "1:30:18", "remaining_time": "0:02:23", "throughput": 1319.08, "total_tokens": 7146944}
7518
+ {"current_steps": 37495, "total_steps": 38480, "loss": 0.116, "lr": 9.993529253273237e-08, "epoch": 19.488045738045738, "percentage": 97.44, "elapsed_time": "1:30:18", "remaining_time": "0:02:22", "throughput": 1319.09, "total_tokens": 7147904}
7519
+ {"current_steps": 37500, "total_steps": 38480, "loss": 0.1881, "lr": 9.892498711344678e-08, "epoch": 19.490644490644492, "percentage": 97.45, "elapsed_time": "1:30:19", "remaining_time": "0:02:21", "throughput": 1319.11, "total_tokens": 7148960}
7520
+ {"current_steps": 37505, "total_steps": 38480, "loss": 0.099, "lr": 9.791980443868421e-08, "epoch": 19.493243243243242, "percentage": 97.47, "elapsed_time": "1:30:20", "remaining_time": "0:02:20", "throughput": 1319.12, "total_tokens": 7149920}
7521
+ {"current_steps": 37510, "total_steps": 38480, "loss": 0.0691, "lr": 9.691974471523202e-08, "epoch": 19.495841995841996, "percentage": 97.48, "elapsed_time": "1:30:20", "remaining_time": "0:02:20", "throughput": 1319.12, "total_tokens": 7150880}
7522
+ {"current_steps": 37515, "total_steps": 38480, "loss": 0.1603, "lr": 9.592480814882843e-08, "epoch": 19.49844074844075, "percentage": 97.49, "elapsed_time": "1:30:21", "remaining_time": "0:02:19", "throughput": 1319.11, "total_tokens": 7151744}
7523
+ {"current_steps": 37520, "total_steps": 38480, "loss": 0.1808, "lr": 9.493499494415414e-08, "epoch": 19.5010395010395, "percentage": 97.51, "elapsed_time": "1:30:22", "remaining_time": "0:02:18", "throughput": 1319.13, "total_tokens": 7152768}
7524
+ {"current_steps": 37525, "total_steps": 38480, "loss": 0.1729, "lr": 9.395030530484072e-08, "epoch": 19.503638253638254, "percentage": 97.52, "elapsed_time": "1:30:23", "remaining_time": "0:02:18", "throughput": 1319.16, "total_tokens": 7153856}
7525
+ {"current_steps": 37530, "total_steps": 38480, "loss": 0.091, "lr": 9.297073943345946e-08, "epoch": 19.506237006237008, "percentage": 97.53, "elapsed_time": "1:30:23", "remaining_time": "0:02:17", "throughput": 1319.16, "total_tokens": 7154784}
7526
+ {"current_steps": 37535, "total_steps": 38480, "loss": 0.158, "lr": 9.199629753152972e-08, "epoch": 19.508835758835758, "percentage": 97.54, "elapsed_time": "1:30:24", "remaining_time": "0:02:16", "throughput": 1319.19, "total_tokens": 7155840}
7527
+ {"current_steps": 37540, "total_steps": 38480, "loss": 0.226, "lr": 9.102697979951613e-08, "epoch": 19.511434511434512, "percentage": 97.56, "elapsed_time": "1:30:25", "remaining_time": "0:02:15", "throughput": 1319.21, "total_tokens": 7156864}
7528
+ {"current_steps": 37545, "total_steps": 38480, "loss": 0.132, "lr": 9.006278643683696e-08, "epoch": 19.514033264033262, "percentage": 97.57, "elapsed_time": "1:30:25", "remaining_time": "0:02:15", "throughput": 1319.21, "total_tokens": 7157824}
7529
+ {"current_steps": 37550, "total_steps": 38480, "loss": 0.1228, "lr": 8.910371764184466e-08, "epoch": 19.516632016632016, "percentage": 97.58, "elapsed_time": "1:30:26", "remaining_time": "0:02:14", "throughput": 1319.21, "total_tokens": 7158752}
7530
+ {"current_steps": 37555, "total_steps": 38480, "loss": 0.1842, "lr": 8.814977361184251e-08, "epoch": 19.51923076923077, "percentage": 97.6, "elapsed_time": "1:30:27", "remaining_time": "0:02:13", "throughput": 1319.21, "total_tokens": 7159680}
7531
+ {"current_steps": 37560, "total_steps": 38480, "loss": 0.1641, "lr": 8.720095454308186e-08, "epoch": 19.52182952182952, "percentage": 97.61, "elapsed_time": "1:30:27", "remaining_time": "0:02:12", "throughput": 1319.2, "total_tokens": 7160544}
7532
+ {"current_steps": 37565, "total_steps": 38480, "loss": 0.1159, "lr": 8.625726063075656e-08, "epoch": 19.524428274428274, "percentage": 97.62, "elapsed_time": "1:30:28", "remaining_time": "0:02:12", "throughput": 1319.2, "total_tokens": 7161472}
7533
+ {"current_steps": 37570, "total_steps": 38480, "loss": 0.116, "lr": 8.531869206900578e-08, "epoch": 19.527027027027028, "percentage": 97.64, "elapsed_time": "1:30:29", "remaining_time": "0:02:11", "throughput": 1319.2, "total_tokens": 7162400}
7534
+ {"current_steps": 37575, "total_steps": 38480, "loss": 0.1517, "lr": 8.438524905091393e-08, "epoch": 19.52962577962578, "percentage": 97.65, "elapsed_time": "1:30:30", "remaining_time": "0:02:10", "throughput": 1319.21, "total_tokens": 7163360}
7535
+ {"current_steps": 37580, "total_steps": 38480, "loss": 0.254, "lr": 8.34569317685191e-08, "epoch": 19.532224532224532, "percentage": 97.66, "elapsed_time": "1:30:30", "remaining_time": "0:02:10", "throughput": 1319.2, "total_tokens": 7164256}
7536
+ {"current_steps": 37585, "total_steps": 38480, "loss": 0.2045, "lr": 8.253374041279072e-08, "epoch": 19.534823284823286, "percentage": 97.67, "elapsed_time": "1:30:31", "remaining_time": "0:02:09", "throughput": 1319.21, "total_tokens": 7165184}
7537
+ {"current_steps": 37590, "total_steps": 38480, "loss": 0.2791, "lr": 8.161567517365465e-08, "epoch": 19.537422037422036, "percentage": 97.69, "elapsed_time": "1:30:32", "remaining_time": "0:02:08", "throughput": 1319.22, "total_tokens": 7166176}
7538
+ {"current_steps": 37595, "total_steps": 38480, "loss": 0.0602, "lr": 8.070273623997926e-08, "epoch": 19.54002079002079, "percentage": 97.7, "elapsed_time": "1:30:32", "remaining_time": "0:02:07", "throughput": 1319.22, "total_tokens": 7167136}
7539
+ {"current_steps": 37600, "total_steps": 38480, "loss": 0.1672, "lr": 7.979492379957543e-08, "epoch": 19.542619542619544, "percentage": 97.71, "elapsed_time": "1:30:33", "remaining_time": "0:02:07", "throughput": 1319.22, "total_tokens": 7168032}
7540
+ {"current_steps": 37605, "total_steps": 38480, "loss": 0.1591, "lr": 7.889223803920764e-08, "epoch": 19.545218295218294, "percentage": 97.73, "elapsed_time": "1:30:34", "remaining_time": "0:02:06", "throughput": 1319.22, "total_tokens": 7168960}
7541
+ {"current_steps": 37610, "total_steps": 38480, "loss": 0.1352, "lr": 7.799467914457181e-08, "epoch": 19.54781704781705, "percentage": 97.74, "elapsed_time": "1:30:34", "remaining_time": "0:02:05", "throughput": 1319.21, "total_tokens": 7169824}
7542
+ {"current_steps": 37615, "total_steps": 38480, "loss": 0.0904, "lr": 7.7102247300323e-08, "epoch": 19.5504158004158, "percentage": 97.75, "elapsed_time": "1:30:35", "remaining_time": "0:02:04", "throughput": 1319.22, "total_tokens": 7170784}
7543
+ {"current_steps": 37620, "total_steps": 38480, "loss": 0.1026, "lr": 7.621494269005324e-08, "epoch": 19.553014553014552, "percentage": 97.77, "elapsed_time": "1:30:36", "remaining_time": "0:02:04", "throughput": 1319.23, "total_tokens": 7171808}
7544
+ {"current_steps": 37625, "total_steps": 38480, "loss": 0.0925, "lr": 7.533276549630264e-08, "epoch": 19.555613305613306, "percentage": 97.78, "elapsed_time": "1:30:37", "remaining_time": "0:02:03", "throughput": 1319.24, "total_tokens": 7172768}
7545
+ {"current_steps": 37630, "total_steps": 38480, "loss": 0.145, "lr": 7.445571590055655e-08, "epoch": 19.558212058212057, "percentage": 97.79, "elapsed_time": "1:30:37", "remaining_time": "0:02:02", "throughput": 1319.24, "total_tokens": 7173696}
7546
+ {"current_steps": 37635, "total_steps": 38480, "loss": 0.1111, "lr": 7.358379408324289e-08, "epoch": 19.56081081081081, "percentage": 97.8, "elapsed_time": "1:30:38", "remaining_time": "0:02:02", "throughput": 1319.25, "total_tokens": 7174688}
7547
+ {"current_steps": 37640, "total_steps": 38480, "loss": 0.1121, "lr": 7.271700022374039e-08, "epoch": 19.563409563409564, "percentage": 97.82, "elapsed_time": "1:30:39", "remaining_time": "0:02:01", "throughput": 1319.27, "total_tokens": 7175680}
7548
+ {"current_steps": 37645, "total_steps": 38480, "loss": 0.0447, "lr": 7.185533450036752e-08, "epoch": 19.566008316008315, "percentage": 97.83, "elapsed_time": "1:30:39", "remaining_time": "0:02:00", "throughput": 1319.26, "total_tokens": 7176576}
7549
+ {"current_steps": 37650, "total_steps": 38480, "loss": 0.1834, "lr": 7.099879709038804e-08, "epoch": 19.56860706860707, "percentage": 97.84, "elapsed_time": "1:30:40", "remaining_time": "0:01:59", "throughput": 1319.26, "total_tokens": 7177504}
7550
+ {"current_steps": 37655, "total_steps": 38480, "loss": 0.1028, "lr": 7.014738817001654e-08, "epoch": 19.571205821205822, "percentage": 97.86, "elapsed_time": "1:30:41", "remaining_time": "0:01:59", "throughput": 1319.27, "total_tokens": 7178496}
7551
+ {"current_steps": 37660, "total_steps": 38480, "loss": 0.1187, "lr": 6.93011079144018e-08, "epoch": 19.573804573804573, "percentage": 97.87, "elapsed_time": "1:30:41", "remaining_time": "0:01:58", "throughput": 1319.28, "total_tokens": 7179424}
7552
+ {"current_steps": 37665, "total_steps": 38480, "loss": 0.1855, "lr": 6.845995649764625e-08, "epoch": 19.576403326403327, "percentage": 97.88, "elapsed_time": "1:30:42", "remaining_time": "0:01:57", "throughput": 1319.29, "total_tokens": 7180448}
7553
+ {"current_steps": 37670, "total_steps": 38480, "loss": 0.0782, "lr": 6.762393409279755e-08, "epoch": 19.57900207900208, "percentage": 97.9, "elapsed_time": "1:30:43", "remaining_time": "0:01:57", "throughput": 1319.3, "total_tokens": 7181376}
7554
+ {"current_steps": 37675, "total_steps": 38480, "loss": 0.0625, "lr": 6.679304087184313e-08, "epoch": 19.58160083160083, "percentage": 97.91, "elapsed_time": "1:30:44", "remaining_time": "0:01:56", "throughput": 1319.3, "total_tokens": 7182304}
7555
+ {"current_steps": 37680, "total_steps": 38480, "loss": 0.0705, "lr": 6.596727700571847e-08, "epoch": 19.584199584199585, "percentage": 97.92, "elapsed_time": "1:30:44", "remaining_time": "0:01:55", "throughput": 1319.32, "total_tokens": 7183328}
7556
+ {"current_steps": 37685, "total_steps": 38480, "loss": 0.2967, "lr": 6.51466426643016e-08, "epoch": 19.58679833679834, "percentage": 97.93, "elapsed_time": "1:30:45", "remaining_time": "0:01:54", "throughput": 1319.31, "total_tokens": 7184224}
7557
+ {"current_steps": 37690, "total_steps": 38480, "loss": 0.0542, "lr": 6.433113801641577e-08, "epoch": 19.58939708939709, "percentage": 97.95, "elapsed_time": "1:30:46", "remaining_time": "0:01:54", "throughput": 1319.3, "total_tokens": 7185120}
7558
+ {"current_steps": 37695, "total_steps": 38480, "loss": 0.1538, "lr": 6.352076322983236e-08, "epoch": 19.591995841995843, "percentage": 97.96, "elapsed_time": "1:30:46", "remaining_time": "0:01:53", "throughput": 1319.33, "total_tokens": 7186176}
7559
+ {"current_steps": 37700, "total_steps": 38480, "loss": 0.0815, "lr": 6.271551847126245e-08, "epoch": 19.594594594594593, "percentage": 97.97, "elapsed_time": "1:30:47", "remaining_time": "0:01:52", "throughput": 1319.35, "total_tokens": 7187200}
7560
+ {"current_steps": 37705, "total_steps": 38480, "loss": 0.1119, "lr": 6.191540390636796e-08, "epoch": 19.597193347193347, "percentage": 97.99, "elapsed_time": "1:30:48", "remaining_time": "0:01:51", "throughput": 1319.35, "total_tokens": 7188128}
7561
+ {"current_steps": 37710, "total_steps": 38480, "loss": 0.1103, "lr": 6.112041969974503e-08, "epoch": 19.5997920997921, "percentage": 98.0, "elapsed_time": "1:30:48", "remaining_time": "0:01:51", "throughput": 1319.35, "total_tokens": 7189056}
7562
+ {"current_steps": 37715, "total_steps": 38480, "loss": 0.2608, "lr": 6.033056601494613e-08, "epoch": 19.60239085239085, "percentage": 98.01, "elapsed_time": "1:30:49", "remaining_time": "0:01:50", "throughput": 1319.35, "total_tokens": 7189984}
7563
+ {"current_steps": 37720, "total_steps": 38480, "loss": 0.1168, "lr": 5.9545843014460754e-08, "epoch": 19.604989604989605, "percentage": 98.02, "elapsed_time": "1:30:50", "remaining_time": "0:01:49", "throughput": 1319.36, "total_tokens": 7190976}
7564
+ {"current_steps": 37725, "total_steps": 38480, "loss": 0.2052, "lr": 5.876625085972642e-08, "epoch": 19.60758835758836, "percentage": 98.04, "elapsed_time": "1:30:51", "remaining_time": "0:01:49", "throughput": 1319.36, "total_tokens": 7191904}
7565
+ {"current_steps": 37730, "total_steps": 38480, "loss": 0.112, "lr": 5.799178971112318e-08, "epoch": 19.61018711018711, "percentage": 98.05, "elapsed_time": "1:30:51", "remaining_time": "0:01:48", "throughput": 1319.36, "total_tokens": 7192800}
7566
+ {"current_steps": 37735, "total_steps": 38480, "loss": 0.2336, "lr": 5.722245972797635e-08, "epoch": 19.612785862785863, "percentage": 98.06, "elapsed_time": "1:30:52", "remaining_time": "0:01:47", "throughput": 1319.35, "total_tokens": 7193696}
7567
+ {"current_steps": 37740, "total_steps": 38480, "loss": 0.1431, "lr": 5.6458261068553787e-08, "epoch": 19.615384615384617, "percentage": 98.08, "elapsed_time": "1:30:53", "remaining_time": "0:01:46", "throughput": 1319.36, "total_tokens": 7194656}
7568
+ {"current_steps": 37745, "total_steps": 38480, "loss": 0.0485, "lr": 5.5699193890074165e-08, "epoch": 19.617983367983367, "percentage": 98.09, "elapsed_time": "1:30:53", "remaining_time": "0:01:46", "throughput": 1319.36, "total_tokens": 7195584}
7569
+ {"current_steps": 37750, "total_steps": 38480, "loss": 0.0551, "lr": 5.4945258348690354e-08, "epoch": 19.62058212058212, "percentage": 98.1, "elapsed_time": "1:30:54", "remaining_time": "0:01:45", "throughput": 1319.36, "total_tokens": 7196512}
7570
+ {"current_steps": 37755, "total_steps": 38480, "loss": 0.1734, "lr": 5.419645459950606e-08, "epoch": 19.623180873180875, "percentage": 98.12, "elapsed_time": "1:30:55", "remaining_time": "0:01:44", "throughput": 1319.37, "total_tokens": 7197440}
7571
+ {"current_steps": 37760, "total_steps": 38480, "loss": 0.0855, "lr": 5.345278279656751e-08, "epoch": 19.625779625779625, "percentage": 98.13, "elapsed_time": "1:30:55", "remaining_time": "0:01:44", "throughput": 1319.37, "total_tokens": 7198400}
7572
+ {"current_steps": 37765, "total_steps": 38480, "loss": 0.1552, "lr": 5.271424309286899e-08, "epoch": 19.62837837837838, "percentage": 98.14, "elapsed_time": "1:30:56", "remaining_time": "0:01:43", "throughput": 1319.39, "total_tokens": 7199424}
7573
+ {"current_steps": 37770, "total_steps": 38480, "loss": 0.1076, "lr": 5.1980835640344505e-08, "epoch": 19.63097713097713, "percentage": 98.15, "elapsed_time": "1:30:57", "remaining_time": "0:01:42", "throughput": 1319.4, "total_tokens": 7200384}
7574
+ {"current_steps": 37775, "total_steps": 38480, "loss": 0.1272, "lr": 5.12525605898706e-08, "epoch": 19.633575883575883, "percentage": 98.17, "elapsed_time": "1:30:58", "remaining_time": "0:01:41", "throughput": 1319.42, "total_tokens": 7201440}
7575
+ {"current_steps": 37780, "total_steps": 38480, "loss": 0.1969, "lr": 5.052941809127465e-08, "epoch": 19.636174636174637, "percentage": 98.18, "elapsed_time": "1:30:58", "remaining_time": "0:01:41", "throughput": 1319.44, "total_tokens": 7202432}
7576
+ {"current_steps": 37785, "total_steps": 38480, "loss": 0.0665, "lr": 4.9811408293320984e-08, "epoch": 19.638773388773387, "percentage": 98.19, "elapsed_time": "1:30:59", "remaining_time": "0:01:40", "throughput": 1319.44, "total_tokens": 7203392}
7577
+ {"current_steps": 37790, "total_steps": 38480, "loss": 0.0928, "lr": 4.9098531343724773e-08, "epoch": 19.64137214137214, "percentage": 98.21, "elapsed_time": "1:31:00", "remaining_time": "0:01:39", "throughput": 1319.45, "total_tokens": 7204352}
7578
+ {"current_steps": 37795, "total_steps": 38480, "loss": 0.0956, "lr": 4.839078738913816e-08, "epoch": 19.643970893970895, "percentage": 98.22, "elapsed_time": "1:31:00", "remaining_time": "0:01:38", "throughput": 1319.45, "total_tokens": 7205280}
7579
+ {"current_steps": 37800, "total_steps": 38480, "loss": 0.1632, "lr": 4.768817657516411e-08, "epoch": 19.646569646569645, "percentage": 98.23, "elapsed_time": "1:31:01", "remaining_time": "0:01:38", "throughput": 1319.46, "total_tokens": 7206240}
7580
+ {"current_steps": 37805, "total_steps": 38480, "loss": 0.1256, "lr": 4.699069904634257e-08, "epoch": 19.6491683991684, "percentage": 98.25, "elapsed_time": "1:31:02", "remaining_time": "0:01:37", "throughput": 1319.47, "total_tokens": 7207200}
7581
+ {"current_steps": 37810, "total_steps": 38480, "loss": 0.1054, "lr": 4.629835494616708e-08, "epoch": 19.651767151767153, "percentage": 98.26, "elapsed_time": "1:31:02", "remaining_time": "0:01:36", "throughput": 1319.48, "total_tokens": 7208192}
7582
+ {"current_steps": 37815, "total_steps": 38480, "loss": 0.0788, "lr": 4.56111444170626e-08, "epoch": 19.654365904365903, "percentage": 98.27, "elapsed_time": "1:31:03", "remaining_time": "0:01:36", "throughput": 1319.49, "total_tokens": 7209152}
7583
+ {"current_steps": 37820, "total_steps": 38480, "loss": 0.1755, "lr": 4.492906760041049e-08, "epoch": 19.656964656964657, "percentage": 98.28, "elapsed_time": "1:31:04", "remaining_time": "0:01:35", "throughput": 1319.49, "total_tokens": 7210080}
7584
+ {"current_steps": 37825, "total_steps": 38480, "loss": 0.1456, "lr": 4.425212463652628e-08, "epoch": 19.65956340956341, "percentage": 98.3, "elapsed_time": "1:31:05", "remaining_time": "0:01:34", "throughput": 1319.5, "total_tokens": 7211072}
7585
+ {"current_steps": 37830, "total_steps": 38480, "loss": 0.0778, "lr": 4.358031566467635e-08, "epoch": 19.66216216216216, "percentage": 98.31, "elapsed_time": "1:31:05", "remaining_time": "0:01:33", "throughput": 1319.52, "total_tokens": 7212096}
7586
+ {"current_steps": 37835, "total_steps": 38480, "loss": 0.1097, "lr": 4.291364082306404e-08, "epoch": 19.664760914760915, "percentage": 98.32, "elapsed_time": "1:31:06", "remaining_time": "0:01:33", "throughput": 1319.53, "total_tokens": 7213088}
7587
+ {"current_steps": 37840, "total_steps": 38480, "loss": 0.0985, "lr": 4.2252100248843515e-08, "epoch": 19.66735966735967, "percentage": 98.34, "elapsed_time": "1:31:07", "remaining_time": "0:01:32", "throughput": 1319.53, "total_tokens": 7214016}
7588
+ {"current_steps": 37845, "total_steps": 38480, "loss": 0.1575, "lr": 4.1595694078108706e-08, "epoch": 19.66995841995842, "percentage": 98.35, "elapsed_time": "1:31:07", "remaining_time": "0:01:31", "throughput": 1319.52, "total_tokens": 7214880}
7589
+ {"current_steps": 37850, "total_steps": 38480, "loss": 0.0932, "lr": 4.09444224458988e-08, "epoch": 19.672557172557173, "percentage": 98.36, "elapsed_time": "1:31:08", "remaining_time": "0:01:31", "throughput": 1319.53, "total_tokens": 7215840}
7590
+ {"current_steps": 37855, "total_steps": 38480, "loss": 0.1637, "lr": 4.0298285486192746e-08, "epoch": 19.675155925155924, "percentage": 98.38, "elapsed_time": "1:31:09", "remaining_time": "0:01:30", "throughput": 1319.53, "total_tokens": 7216736}
7591
+ {"current_steps": 37860, "total_steps": 38480, "loss": 0.1308, "lr": 3.965728333192031e-08, "epoch": 19.677754677754677, "percentage": 98.39, "elapsed_time": "1:31:09", "remaining_time": "0:01:29", "throughput": 1319.53, "total_tokens": 7217664}
7592
+ {"current_steps": 37865, "total_steps": 38480, "loss": 0.0599, "lr": 3.902141611494548e-08, "epoch": 19.68035343035343, "percentage": 98.4, "elapsed_time": "1:31:10", "remaining_time": "0:01:28", "throughput": 1319.52, "total_tokens": 7218528}
7593
+ {"current_steps": 37870, "total_steps": 38480, "loss": 0.1197, "lr": 3.839068396608858e-08, "epoch": 19.68295218295218, "percentage": 98.41, "elapsed_time": "1:31:11", "remaining_time": "0:01:28", "throughput": 1319.53, "total_tokens": 7219520}
7594
+ {"current_steps": 37875, "total_steps": 38480, "loss": 0.2563, "lr": 3.776508701509862e-08, "epoch": 19.685550935550935, "percentage": 98.43, "elapsed_time": "1:31:11", "remaining_time": "0:01:27", "throughput": 1319.54, "total_tokens": 7220480}
7595
+ {"current_steps": 37880, "total_steps": 38480, "loss": 0.0439, "lr": 3.714462539068375e-08, "epoch": 19.68814968814969, "percentage": 98.44, "elapsed_time": "1:31:12", "remaining_time": "0:01:26", "throughput": 1319.54, "total_tokens": 7221408}
7596
+ {"current_steps": 37885, "total_steps": 38480, "loss": 0.1911, "lr": 3.652929922048076e-08, "epoch": 19.69074844074844, "percentage": 98.45, "elapsed_time": "1:31:13", "remaining_time": "0:01:25", "throughput": 1319.55, "total_tokens": 7222400}
7597
+ {"current_steps": 37890, "total_steps": 38480, "loss": 0.073, "lr": 3.591910863108006e-08, "epoch": 19.693347193347194, "percentage": 98.47, "elapsed_time": "1:31:14", "remaining_time": "0:01:25", "throughput": 1319.55, "total_tokens": 7223296}
7598
+ {"current_steps": 37895, "total_steps": 38480, "loss": 0.2109, "lr": 3.531405374801178e-08, "epoch": 19.695945945945947, "percentage": 98.48, "elapsed_time": "1:31:14", "remaining_time": "0:01:24", "throughput": 1319.55, "total_tokens": 7224192}
7599
+ {"current_steps": 37900, "total_steps": 38480, "loss": 0.1157, "lr": 3.471413469575135e-08, "epoch": 19.698544698544698, "percentage": 98.49, "elapsed_time": "1:31:15", "remaining_time": "0:01:23", "throughput": 1319.55, "total_tokens": 7225152}
7600
+ {"current_steps": 37905, "total_steps": 38480, "loss": 0.0884, "lr": 3.411935159771395e-08, "epoch": 19.70114345114345, "percentage": 98.51, "elapsed_time": "1:31:16", "remaining_time": "0:01:23", "throughput": 1319.56, "total_tokens": 7226144}
7601
+ {"current_steps": 37910, "total_steps": 38480, "loss": 0.2628, "lr": 3.352970457626281e-08, "epoch": 19.703742203742205, "percentage": 98.52, "elapsed_time": "1:31:16", "remaining_time": "0:01:22", "throughput": 1319.58, "total_tokens": 7227168}
7602
+ {"current_steps": 37915, "total_steps": 38480, "loss": 0.1184, "lr": 3.2945193752700887e-08, "epoch": 19.706340956340956, "percentage": 98.53, "elapsed_time": "1:31:17", "remaining_time": "0:01:21", "throughput": 1319.59, "total_tokens": 7228128}
7603
+ {"current_steps": 37920, "total_steps": 38480, "loss": 0.1395, "lr": 3.236581924727922e-08, "epoch": 19.70893970893971, "percentage": 98.54, "elapsed_time": "1:31:18", "remaining_time": "0:01:20", "throughput": 1319.6, "total_tokens": 7229088}
7604
+ {"current_steps": 37925, "total_steps": 38480, "loss": 0.096, "lr": 3.179158117918579e-08, "epoch": 19.71153846153846, "percentage": 98.56, "elapsed_time": "1:31:18", "remaining_time": "0:01:20", "throughput": 1319.59, "total_tokens": 7229984}
7605
+ {"current_steps": 37930, "total_steps": 38480, "loss": 0.0732, "lr": 3.1222479666553894e-08, "epoch": 19.714137214137214, "percentage": 98.57, "elapsed_time": "1:31:19", "remaining_time": "0:01:19", "throughput": 1319.6, "total_tokens": 7230944}
7606
+ {"current_steps": 37935, "total_steps": 38480, "loss": 0.2012, "lr": 3.065851482646487e-08, "epoch": 19.716735966735968, "percentage": 98.58, "elapsed_time": "1:31:20", "remaining_time": "0:01:18", "throughput": 1319.59, "total_tokens": 7231840}
7607
+ {"current_steps": 37940, "total_steps": 38480, "loss": 0.0818, "lr": 3.009968677493702e-08, "epoch": 19.719334719334718, "percentage": 98.6, "elapsed_time": "1:31:21", "remaining_time": "0:01:18", "throughput": 1319.6, "total_tokens": 7232800}
7608
+ {"current_steps": 37945, "total_steps": 38480, "loss": 0.0957, "lr": 2.9545995626936717e-08, "epoch": 19.721933471933472, "percentage": 98.61, "elapsed_time": "1:31:21", "remaining_time": "0:01:17", "throughput": 1319.6, "total_tokens": 7233728}
7609
+ {"current_steps": 37950, "total_steps": 38480, "loss": 0.0829, "lr": 2.8997441496370072e-08, "epoch": 19.724532224532226, "percentage": 98.62, "elapsed_time": "1:31:22", "remaining_time": "0:01:16", "throughput": 1319.59, "total_tokens": 7234624}
7610
+ {"current_steps": 37955, "total_steps": 38480, "loss": 0.0894, "lr": 2.8454024496085696e-08, "epoch": 19.727130977130976, "percentage": 98.64, "elapsed_time": "1:31:23", "remaining_time": "0:01:15", "throughput": 1319.6, "total_tokens": 7235584}
7611
+ {"current_steps": 37960, "total_steps": 38480, "loss": 0.068, "lr": 2.7915744737883055e-08, "epoch": 19.72972972972973, "percentage": 98.65, "elapsed_time": "1:31:23", "remaining_time": "0:01:15", "throughput": 1319.6, "total_tokens": 7236480}
7612
+ {"current_steps": 37965, "total_steps": 38480, "loss": 0.1573, "lr": 2.7382602332493013e-08, "epoch": 19.732328482328484, "percentage": 98.66, "elapsed_time": "1:31:24", "remaining_time": "0:01:14", "throughput": 1319.59, "total_tokens": 7237376}
7613
+ {"current_steps": 37970, "total_steps": 38480, "loss": 0.0566, "lr": 2.6854597389600055e-08, "epoch": 19.734927234927234, "percentage": 98.67, "elapsed_time": "1:31:25", "remaining_time": "0:01:13", "throughput": 1319.6, "total_tokens": 7238368}
7614
+ {"current_steps": 37975, "total_steps": 38480, "loss": 0.0858, "lr": 2.633173001782563e-08, "epoch": 19.737525987525988, "percentage": 98.69, "elapsed_time": "1:31:25", "remaining_time": "0:01:12", "throughput": 1319.61, "total_tokens": 7239328}
7615
+ {"current_steps": 37980, "total_steps": 38480, "loss": 0.0606, "lr": 2.581400032473369e-08, "epoch": 19.74012474012474, "percentage": 98.7, "elapsed_time": "1:31:26", "remaining_time": "0:01:12", "throughput": 1319.62, "total_tokens": 7240288}
7616
+ {"current_steps": 37985, "total_steps": 38480, "loss": 0.2136, "lr": 2.5301408416839035e-08, "epoch": 19.742723492723492, "percentage": 98.71, "elapsed_time": "1:31:27", "remaining_time": "0:01:11", "throughput": 1319.62, "total_tokens": 7241216}
7617
+ {"current_steps": 37990, "total_steps": 38480, "loss": 0.1287, "lr": 2.4793954399590647e-08, "epoch": 19.745322245322246, "percentage": 98.73, "elapsed_time": "1:31:28", "remaining_time": "0:01:10", "throughput": 1319.63, "total_tokens": 7242176}
7618
+ {"current_steps": 37995, "total_steps": 38480, "loss": 0.0474, "lr": 2.4291638377382798e-08, "epoch": 19.747920997921, "percentage": 98.74, "elapsed_time": "1:31:28", "remaining_time": "0:01:10", "throughput": 1319.63, "total_tokens": 7243104}
7619
+ {"current_steps": 38000, "total_steps": 38480, "loss": 0.0366, "lr": 2.3794460453555047e-08, "epoch": 19.75051975051975, "percentage": 98.75, "elapsed_time": "1:31:29", "remaining_time": "0:01:09", "throughput": 1319.62, "total_tokens": 7244000}
7620
+ {"current_steps": 38005, "total_steps": 38480, "loss": 0.0836, "lr": 2.330242073039224e-08, "epoch": 19.753118503118504, "percentage": 98.77, "elapsed_time": "1:31:30", "remaining_time": "0:01:08", "throughput": 1319.62, "total_tokens": 7244896}
7621
+ {"current_steps": 38010, "total_steps": 38480, "loss": 0.17, "lr": 2.2815519309113407e-08, "epoch": 19.755717255717254, "percentage": 98.78, "elapsed_time": "1:31:30", "remaining_time": "0:01:07", "throughput": 1319.61, "total_tokens": 7245792}
7622
+ {"current_steps": 38015, "total_steps": 38480, "loss": 0.0571, "lr": 2.2333756289885656e-08, "epoch": 19.758316008316008, "percentage": 98.79, "elapsed_time": "1:31:31", "remaining_time": "0:01:07", "throughput": 1319.62, "total_tokens": 7246784}
7623
+ {"current_steps": 38020, "total_steps": 38480, "loss": 0.1419, "lr": 2.1857131771824135e-08, "epoch": 19.760914760914762, "percentage": 98.8, "elapsed_time": "1:31:32", "remaining_time": "0:01:06", "throughput": 1319.63, "total_tokens": 7247744}
7624
+ {"current_steps": 38025, "total_steps": 38480, "loss": 0.0665, "lr": 2.13856458529782e-08, "epoch": 19.763513513513512, "percentage": 98.82, "elapsed_time": "1:31:32", "remaining_time": "0:01:05", "throughput": 1319.63, "total_tokens": 7248672}
7625
+ {"current_steps": 38030, "total_steps": 38480, "loss": 0.1946, "lr": 2.0919298630342477e-08, "epoch": 19.766112266112266, "percentage": 98.83, "elapsed_time": "1:31:33", "remaining_time": "0:01:05", "throughput": 1319.65, "total_tokens": 7249664}
7626
+ {"current_steps": 38035, "total_steps": 38480, "loss": 0.0527, "lr": 2.0458090199862446e-08, "epoch": 19.76871101871102, "percentage": 98.84, "elapsed_time": "1:31:34", "remaining_time": "0:01:04", "throughput": 1319.65, "total_tokens": 7250624}
7627
+ {"current_steps": 38040, "total_steps": 38480, "loss": 0.1543, "lr": 2.0002020656409436e-08, "epoch": 19.77130977130977, "percentage": 98.86, "elapsed_time": "1:31:35", "remaining_time": "0:01:03", "throughput": 1319.65, "total_tokens": 7251520}
7628
+ {"current_steps": 38045, "total_steps": 38480, "loss": 0.1873, "lr": 1.9551090093816727e-08, "epoch": 19.773908523908524, "percentage": 98.87, "elapsed_time": "1:31:35", "remaining_time": "0:01:02", "throughput": 1319.66, "total_tokens": 7252512}
7629
+ {"current_steps": 38050, "total_steps": 38480, "loss": 0.1015, "lr": 1.9105298604849e-08, "epoch": 19.776507276507278, "percentage": 98.88, "elapsed_time": "1:31:36", "remaining_time": "0:01:02", "throughput": 1319.7, "total_tokens": 7253632}
7630
+ {"current_steps": 38055, "total_steps": 38480, "loss": 0.1094, "lr": 1.8664646281213448e-08, "epoch": 19.77910602910603, "percentage": 98.9, "elapsed_time": "1:31:37", "remaining_time": "0:01:01", "throughput": 1319.71, "total_tokens": 7254624}
7631
+ {"current_steps": 38060, "total_steps": 38480, "loss": 0.1117, "lr": 1.8229133213565342e-08, "epoch": 19.781704781704782, "percentage": 98.91, "elapsed_time": "1:31:37", "remaining_time": "0:01:00", "throughput": 1319.72, "total_tokens": 7255616}
7632
+ {"current_steps": 38065, "total_steps": 38480, "loss": 0.2112, "lr": 1.779875949149967e-08, "epoch": 19.784303534303533, "percentage": 98.92, "elapsed_time": "1:31:38", "remaining_time": "0:00:59", "throughput": 1319.71, "total_tokens": 7256480}
7633
+ {"current_steps": 38070, "total_steps": 38480, "loss": 0.3019, "lr": 1.7373525203553953e-08, "epoch": 19.786902286902286, "percentage": 98.93, "elapsed_time": "1:31:39", "remaining_time": "0:00:59", "throughput": 1319.72, "total_tokens": 7257472}
7634
+ {"current_steps": 38075, "total_steps": 38480, "loss": 0.1467, "lr": 1.6953430437208206e-08, "epoch": 19.78950103950104, "percentage": 98.95, "elapsed_time": "1:31:39", "remaining_time": "0:00:58", "throughput": 1319.74, "total_tokens": 7258464}
7635
+ {"current_steps": 38080, "total_steps": 38480, "loss": 0.0945, "lr": 1.6538475278887743e-08, "epoch": 19.79209979209979, "percentage": 98.96, "elapsed_time": "1:31:40", "remaining_time": "0:00:57", "throughput": 1319.74, "total_tokens": 7259424}
7636
+ {"current_steps": 38085, "total_steps": 38480, "loss": 0.1508, "lr": 1.6128659813957613e-08, "epoch": 19.794698544698544, "percentage": 98.97, "elapsed_time": "1:31:41", "remaining_time": "0:00:57", "throughput": 1319.74, "total_tokens": 7260320}
7637
+ {"current_steps": 38090, "total_steps": 38480, "loss": 0.108, "lr": 1.5723984126728153e-08, "epoch": 19.7972972972973, "percentage": 98.99, "elapsed_time": "1:31:42", "remaining_time": "0:00:56", "throughput": 1319.75, "total_tokens": 7261312}
7638
+ {"current_steps": 38095, "total_steps": 38480, "loss": 0.1875, "lr": 1.5324448300449435e-08, "epoch": 19.79989604989605, "percentage": 99.0, "elapsed_time": "1:31:42", "remaining_time": "0:00:55", "throughput": 1319.74, "total_tokens": 7262208}
7639
+ {"current_steps": 38100, "total_steps": 38480, "loss": 0.1587, "lr": 1.493005241731682e-08, "epoch": 19.802494802494802, "percentage": 99.01, "elapsed_time": "1:31:43", "remaining_time": "0:00:54", "throughput": 1319.77, "total_tokens": 7263264}
7640
+ {"current_steps": 38105, "total_steps": 38480, "loss": 0.121, "lr": 1.4540796558465408e-08, "epoch": 19.805093555093556, "percentage": 99.03, "elapsed_time": "1:31:44", "remaining_time": "0:00:54", "throughput": 1319.76, "total_tokens": 7264128}
7641
+ {"current_steps": 38110, "total_steps": 38480, "loss": 0.1074, "lr": 1.4156680803972811e-08, "epoch": 19.807692307692307, "percentage": 99.04, "elapsed_time": "1:31:44", "remaining_time": "0:00:53", "throughput": 1319.77, "total_tokens": 7265120}
7642
+ {"current_steps": 38115, "total_steps": 38480, "loss": 0.1722, "lr": 1.3777705232864701e-08, "epoch": 19.81029106029106, "percentage": 99.05, "elapsed_time": "1:31:45", "remaining_time": "0:00:52", "throughput": 1319.79, "total_tokens": 7266144}
7643
+ {"current_steps": 38120, "total_steps": 38480, "loss": 0.0447, "lr": 1.3403869923103717e-08, "epoch": 19.812889812889814, "percentage": 99.06, "elapsed_time": "1:31:46", "remaining_time": "0:00:52", "throughput": 1319.79, "total_tokens": 7267072}
7644
+ {"current_steps": 38125, "total_steps": 38480, "loss": 0.342, "lr": 1.3035174951595009e-08, "epoch": 19.815488565488565, "percentage": 99.08, "elapsed_time": "1:31:46", "remaining_time": "0:00:51", "throughput": 1319.78, "total_tokens": 7267968}
7645
+ {"current_steps": 38130, "total_steps": 38480, "loss": 0.1225, "lr": 1.267162039418901e-08, "epoch": 19.81808731808732, "percentage": 99.09, "elapsed_time": "1:31:47", "remaining_time": "0:00:50", "throughput": 1319.77, "total_tokens": 7268832}
7646
+ {"current_steps": 38135, "total_steps": 38480, "loss": 0.2885, "lr": 1.2313206325681447e-08, "epoch": 19.820686070686072, "percentage": 99.1, "elapsed_time": "1:31:48", "remaining_time": "0:00:49", "throughput": 1319.77, "total_tokens": 7269760}
7647
+ {"current_steps": 38140, "total_steps": 38480, "loss": 0.1288, "lr": 1.1959932819799457e-08, "epoch": 19.823284823284823, "percentage": 99.12, "elapsed_time": "1:31:49", "remaining_time": "0:00:49", "throughput": 1319.78, "total_tokens": 7270720}
7648
+ {"current_steps": 38145, "total_steps": 38480, "loss": 0.2469, "lr": 1.1611799949226565e-08, "epoch": 19.825883575883577, "percentage": 99.13, "elapsed_time": "1:31:49", "remaining_time": "0:00:48", "throughput": 1319.79, "total_tokens": 7271712}
7649
+ {"current_steps": 38150, "total_steps": 38480, "loss": 0.1151, "lr": 1.1268807785577707e-08, "epoch": 19.828482328482327, "percentage": 99.14, "elapsed_time": "1:31:50", "remaining_time": "0:00:47", "throughput": 1319.79, "total_tokens": 7272608}
7650
+ {"current_steps": 38155, "total_steps": 38480, "loss": 0.1232, "lr": 1.0930956399415881e-08, "epoch": 19.83108108108108, "percentage": 99.16, "elapsed_time": "1:31:51", "remaining_time": "0:00:46", "throughput": 1319.79, "total_tokens": 7273536}
7651
+ {"current_steps": 38160, "total_steps": 38480, "loss": 0.1073, "lr": 1.0598245860243828e-08, "epoch": 19.833679833679835, "percentage": 99.17, "elapsed_time": "1:31:51", "remaining_time": "0:00:46", "throughput": 1319.78, "total_tokens": 7274432}
7652
+ {"current_steps": 38165, "total_steps": 38480, "loss": 0.154, "lr": 1.027067623650957e-08, "epoch": 19.836278586278585, "percentage": 99.18, "elapsed_time": "1:31:52", "remaining_time": "0:00:45", "throughput": 1319.79, "total_tokens": 7275424}
7653
+ {"current_steps": 38170, "total_steps": 38480, "loss": 0.1013, "lr": 9.948247595603643e-09, "epoch": 19.83887733887734, "percentage": 99.19, "elapsed_time": "1:31:53", "remaining_time": "0:00:44", "throughput": 1319.8, "total_tokens": 7276384}
7654
+ {"current_steps": 38175, "total_steps": 38480, "loss": 0.0745, "lr": 9.63096000385355e-09, "epoch": 19.841476091476093, "percentage": 99.21, "elapsed_time": "1:31:53", "remaining_time": "0:00:44", "throughput": 1319.8, "total_tokens": 7277312}
7655
+ {"current_steps": 38180, "total_steps": 38480, "loss": 0.1995, "lr": 9.318813526534853e-09, "epoch": 19.844074844074843, "percentage": 99.22, "elapsed_time": "1:31:54", "remaining_time": "0:00:43", "throughput": 1319.8, "total_tokens": 7278208}
7656
+ {"current_steps": 38185, "total_steps": 38480, "loss": 0.0667, "lr": 9.011808227865625e-09, "epoch": 19.846673596673597, "percentage": 99.23, "elapsed_time": "1:31:55", "remaining_time": "0:00:42", "throughput": 1319.79, "total_tokens": 7279104}
7657
+ {"current_steps": 38190, "total_steps": 38480, "loss": 0.1553, "lr": 8.70994417099813e-09, "epoch": 19.84927234927235, "percentage": 99.25, "elapsed_time": "1:31:56", "remaining_time": "0:00:41", "throughput": 1319.79, "total_tokens": 7280000}
7658
+ {"current_steps": 38195, "total_steps": 38480, "loss": 0.0845, "lr": 8.413221418041018e-09, "epoch": 19.8518711018711, "percentage": 99.26, "elapsed_time": "1:31:56", "remaining_time": "0:00:41", "throughput": 1319.79, "total_tokens": 7280928}
7659
+ {"current_steps": 38200, "total_steps": 38480, "loss": 0.2997, "lr": 8.121640030028798e-09, "epoch": 19.854469854469855, "percentage": 99.27, "elapsed_time": "1:31:57", "remaining_time": "0:00:40", "throughput": 1319.8, "total_tokens": 7281920}
7660
+ {"current_steps": 38205, "total_steps": 38480, "loss": 0.0814, "lr": 7.83520006695515e-09, "epoch": 19.85706860706861, "percentage": 99.29, "elapsed_time": "1:31:58", "remaining_time": "0:00:39", "throughput": 1319.8, "total_tokens": 7282848}
7661
+ {"current_steps": 38210, "total_steps": 38480, "loss": 0.1091, "lr": 7.55390158773961e-09, "epoch": 19.85966735966736, "percentage": 99.3, "elapsed_time": "1:31:58", "remaining_time": "0:00:38", "throughput": 1319.81, "total_tokens": 7283840}
7662
+ {"current_steps": 38215, "total_steps": 38480, "loss": 0.1994, "lr": 7.2777446502581006e-09, "epoch": 19.862266112266113, "percentage": 99.31, "elapsed_time": "1:31:59", "remaining_time": "0:00:38", "throughput": 1319.81, "total_tokens": 7284736}
7663
+ {"current_steps": 38220, "total_steps": 38480, "loss": 0.134, "lr": 7.006729311320737e-09, "epoch": 19.864864864864863, "percentage": 99.32, "elapsed_time": "1:32:00", "remaining_time": "0:00:37", "throughput": 1319.8, "total_tokens": 7285632}
7664
+ {"current_steps": 38225, "total_steps": 38480, "loss": 0.0445, "lr": 6.7408556266801425e-09, "epoch": 19.867463617463617, "percentage": 99.34, "elapsed_time": "1:32:00", "remaining_time": "0:00:36", "throughput": 1319.79, "total_tokens": 7286496}
7665
+ {"current_steps": 38230, "total_steps": 38480, "loss": 0.2176, "lr": 6.480123651034231e-09, "epoch": 19.87006237006237, "percentage": 99.35, "elapsed_time": "1:32:01", "remaining_time": "0:00:36", "throughput": 1319.79, "total_tokens": 7287424}
7666
+ {"current_steps": 38235, "total_steps": 38480, "loss": 0.0772, "lr": 6.224533438020652e-09, "epoch": 19.87266112266112, "percentage": 99.36, "elapsed_time": "1:32:02", "remaining_time": "0:00:35", "throughput": 1319.8, "total_tokens": 7288384}
7667
+ {"current_steps": 38240, "total_steps": 38480, "loss": 0.1487, "lr": 5.974085040222344e-09, "epoch": 19.875259875259875, "percentage": 99.38, "elapsed_time": "1:32:03", "remaining_time": "0:00:34", "throughput": 1319.79, "total_tokens": 7289280}
7668
+ {"current_steps": 38245, "total_steps": 38480, "loss": 0.0849, "lr": 5.728778509161981e-09, "epoch": 19.87785862785863, "percentage": 99.39, "elapsed_time": "1:32:03", "remaining_time": "0:00:33", "throughput": 1319.8, "total_tokens": 7290240}
7669
+ {"current_steps": 38250, "total_steps": 38480, "loss": 0.0841, "lr": 5.4886138953047505e-09, "epoch": 19.88045738045738, "percentage": 99.4, "elapsed_time": "1:32:04", "remaining_time": "0:00:33", "throughput": 1319.79, "total_tokens": 7291136}
7670
+ {"current_steps": 38255, "total_steps": 38480, "loss": 0.093, "lr": 5.253591248058354e-09, "epoch": 19.883056133056133, "percentage": 99.42, "elapsed_time": "1:32:05", "remaining_time": "0:00:32", "throughput": 1319.81, "total_tokens": 7292128}
7671
+ {"current_steps": 38260, "total_steps": 38480, "loss": 0.1742, "lr": 5.023710615773003e-09, "epoch": 19.885654885654887, "percentage": 99.43, "elapsed_time": "1:32:05", "remaining_time": "0:00:31", "throughput": 1319.8, "total_tokens": 7292992}
7672
+ {"current_steps": 38265, "total_steps": 38480, "loss": 0.1874, "lr": 4.798972045738647e-09, "epoch": 19.888253638253637, "percentage": 99.44, "elapsed_time": "1:32:06", "remaining_time": "0:00:31", "throughput": 1319.8, "total_tokens": 7293952}
7673
+ {"current_steps": 38270, "total_steps": 38480, "loss": 0.1791, "lr": 4.579375584190526e-09, "epoch": 19.89085239085239, "percentage": 99.45, "elapsed_time": "1:32:07", "remaining_time": "0:00:30", "throughput": 1319.81, "total_tokens": 7294912}
7674
+ {"current_steps": 38275, "total_steps": 38480, "loss": 0.0784, "lr": 4.3649212763036125e-09, "epoch": 19.893451143451145, "percentage": 99.47, "elapsed_time": "1:32:07", "remaining_time": "0:00:29", "throughput": 1319.8, "total_tokens": 7295808}
7675
+ {"current_steps": 38280, "total_steps": 38480, "loss": 0.1782, "lr": 4.1556091662009465e-09, "epoch": 19.896049896049895, "percentage": 99.48, "elapsed_time": "1:32:08", "remaining_time": "0:00:28", "throughput": 1319.81, "total_tokens": 7296768}
7676
+ {"current_steps": 38285, "total_steps": 38480, "loss": 0.1605, "lr": 3.951439296936976e-09, "epoch": 19.89864864864865, "percentage": 99.49, "elapsed_time": "1:32:09", "remaining_time": "0:00:28", "throughput": 1319.82, "total_tokens": 7297728}
7677
+ {"current_steps": 38290, "total_steps": 38480, "loss": 0.1042, "lr": 3.75241171051699e-09, "epoch": 19.901247401247403, "percentage": 99.51, "elapsed_time": "1:32:10", "remaining_time": "0:00:27", "throughput": 1319.82, "total_tokens": 7298656}
7678
+ {"current_steps": 38295, "total_steps": 38480, "loss": 0.0758, "lr": 3.558526447888788e-09, "epoch": 19.903846153846153, "percentage": 99.52, "elapsed_time": "1:32:10", "remaining_time": "0:00:26", "throughput": 1319.81, "total_tokens": 7299520}
7679
+ {"current_steps": 38300, "total_steps": 38480, "loss": 0.0772, "lr": 3.3697835489343575e-09, "epoch": 19.906444906444907, "percentage": 99.53, "elapsed_time": "1:32:11", "remaining_time": "0:00:25", "throughput": 1319.81, "total_tokens": 7300448}
7680
+ {"current_steps": 38305, "total_steps": 38480, "loss": 0.1193, "lr": 3.1861830524837487e-09, "epoch": 19.909043659043657, "percentage": 99.55, "elapsed_time": "1:32:12", "remaining_time": "0:00:25", "throughput": 1319.81, "total_tokens": 7301376}
7681
+ {"current_steps": 38310, "total_steps": 38480, "loss": 0.1364, "lr": 3.0077249963095246e-09, "epoch": 19.91164241164241, "percentage": 99.56, "elapsed_time": "1:32:12", "remaining_time": "0:00:24", "throughput": 1319.83, "total_tokens": 7302400}
7682
+ {"current_steps": 38315, "total_steps": 38480, "loss": 0.0549, "lr": 2.8344094171212086e-09, "epoch": 19.914241164241165, "percentage": 99.57, "elapsed_time": "1:32:13", "remaining_time": "0:00:23", "throughput": 1319.83, "total_tokens": 7303296}
7683
+ {"current_steps": 38320, "total_steps": 38480, "loss": 0.1727, "lr": 2.6662363505791656e-09, "epoch": 19.916839916839916, "percentage": 99.58, "elapsed_time": "1:32:14", "remaining_time": "0:00:23", "throughput": 1319.83, "total_tokens": 7304256}
7684
+ {"current_steps": 38325, "total_steps": 38480, "loss": 0.1189, "lr": 2.503205831277944e-09, "epoch": 19.91943866943867, "percentage": 99.6, "elapsed_time": "1:32:14", "remaining_time": "0:00:22", "throughput": 1319.83, "total_tokens": 7305184}
7685
+ {"current_steps": 38330, "total_steps": 38480, "loss": 0.1511, "lr": 2.3453178927573816e-09, "epoch": 19.922037422037423, "percentage": 99.61, "elapsed_time": "1:32:15", "remaining_time": "0:00:21", "throughput": 1319.84, "total_tokens": 7306144}
7686
+ {"current_steps": 38335, "total_steps": 38480, "loss": 0.1277, "lr": 2.1925725674970533e-09, "epoch": 19.924636174636174, "percentage": 99.62, "elapsed_time": "1:32:16", "remaining_time": "0:00:20", "throughput": 1319.85, "total_tokens": 7307104}
7687
+ {"current_steps": 38340, "total_steps": 38480, "loss": 0.0621, "lr": 2.0449698869218214e-09, "epoch": 19.927234927234927, "percentage": 99.64, "elapsed_time": "1:32:17", "remaining_time": "0:00:20", "throughput": 1319.86, "total_tokens": 7308096}
7688
+ {"current_steps": 38345, "total_steps": 38480, "loss": 0.1874, "lr": 1.902509881396286e-09, "epoch": 19.92983367983368, "percentage": 99.65, "elapsed_time": "1:32:17", "remaining_time": "0:00:19", "throughput": 1319.86, "total_tokens": 7309024}
7689
+ {"current_steps": 38350, "total_steps": 38480, "loss": 0.1682, "lr": 1.7651925802303347e-09, "epoch": 19.93243243243243, "percentage": 99.66, "elapsed_time": "1:32:18", "remaining_time": "0:00:18", "throughput": 1319.86, "total_tokens": 7309952}
7690
+ {"current_steps": 38355, "total_steps": 38480, "loss": 0.1273, "lr": 1.6330180116708172e-09, "epoch": 19.935031185031185, "percentage": 99.68, "elapsed_time": "1:32:19", "remaining_time": "0:00:18", "throughput": 1319.87, "total_tokens": 7310912}
7691
+ {"current_steps": 38360, "total_steps": 38480, "loss": 0.1036, "lr": 1.5059862029070947e-09, "epoch": 19.93762993762994, "percentage": 99.69, "elapsed_time": "1:32:19", "remaining_time": "0:00:17", "throughput": 1319.87, "total_tokens": 7311840}
7692
+ {"current_steps": 38365, "total_steps": 38480, "loss": 0.0845, "lr": 1.3840971800793689e-09, "epoch": 19.94022869022869, "percentage": 99.7, "elapsed_time": "1:32:20", "remaining_time": "0:00:16", "throughput": 1319.88, "total_tokens": 7312800}
7693
+ {"current_steps": 38370, "total_steps": 38480, "loss": 0.0855, "lr": 1.2673509682564755e-09, "epoch": 19.942827442827443, "percentage": 99.71, "elapsed_time": "1:32:21", "remaining_time": "0:00:15", "throughput": 1319.88, "total_tokens": 7313760}
7694
+ {"current_steps": 38375, "total_steps": 38480, "loss": 0.1138, "lr": 1.1557475914580895e-09, "epoch": 19.945426195426194, "percentage": 99.73, "elapsed_time": "1:32:21", "remaining_time": "0:00:15", "throughput": 1319.9, "total_tokens": 7314784}
7695
+ {"current_steps": 38380, "total_steps": 38480, "loss": 0.1106, "lr": 1.0492870726463987e-09, "epoch": 19.948024948024948, "percentage": 99.74, "elapsed_time": "1:32:22", "remaining_time": "0:00:14", "throughput": 1319.9, "total_tokens": 7315680}
7696
+ {"current_steps": 38385, "total_steps": 38480, "loss": 0.1334, "lr": 9.479694337205524e-10, "epoch": 19.9506237006237, "percentage": 99.75, "elapsed_time": "1:32:23", "remaining_time": "0:00:13", "throughput": 1319.91, "total_tokens": 7316672}
7697
+ {"current_steps": 38390, "total_steps": 38480, "loss": 0.0361, "lr": 8.517946955222123e-10, "epoch": 19.953222453222452, "percentage": 99.77, "elapsed_time": "1:32:24", "remaining_time": "0:00:12", "throughput": 1319.9, "total_tokens": 7317568}
7698
+ {"current_steps": 38395, "total_steps": 38480, "loss": 0.1069, "lr": 7.607628778383281e-10, "epoch": 19.955821205821206, "percentage": 99.78, "elapsed_time": "1:32:24", "remaining_time": "0:00:12", "throughput": 1319.89, "total_tokens": 7318400}
7699
+ {"current_steps": 38400, "total_steps": 38480, "loss": 0.0753, "lr": 6.748739993983622e-10, "epoch": 19.95841995841996, "percentage": 99.79, "elapsed_time": "1:32:25", "remaining_time": "0:00:11", "throughput": 1319.89, "total_tokens": 7319360}
7700
+ {"current_steps": 38405, "total_steps": 38480, "loss": 0.1131, "lr": 5.941280778687386e-10, "epoch": 19.96101871101871, "percentage": 99.81, "elapsed_time": "1:32:26", "remaining_time": "0:00:10", "throughput": 1319.89, "total_tokens": 7320288}
7701
+ {"current_steps": 38410, "total_steps": 38480, "loss": 0.0505, "lr": 5.185251298639449e-10, "epoch": 19.963617463617464, "percentage": 99.82, "elapsed_time": "1:32:26", "remaining_time": "0:00:10", "throughput": 1319.89, "total_tokens": 7321216}
7702
+ {"current_steps": 38415, "total_steps": 38480, "loss": 0.1433, "lr": 4.4806517093265444e-10, "epoch": 19.966216216216218, "percentage": 99.83, "elapsed_time": "1:32:27", "remaining_time": "0:00:09", "throughput": 1319.9, "total_tokens": 7322176}
7703
+ {"current_steps": 38420, "total_steps": 38480, "loss": 0.0375, "lr": 3.8274821557438e-10, "epoch": 19.968814968814968, "percentage": 99.84, "elapsed_time": "1:32:28", "remaining_time": "0:00:08", "throughput": 1319.91, "total_tokens": 7323136}
7704
+ {"current_steps": 38425, "total_steps": 38480, "loss": 0.1064, "lr": 3.2257427722559574e-10, "epoch": 19.97141372141372, "percentage": 99.86, "elapsed_time": "1:32:28", "remaining_time": "0:00:07", "throughput": 1319.91, "total_tokens": 7324064}
7705
+ {"current_steps": 38430, "total_steps": 38480, "loss": 0.0495, "lr": 2.6754336826528837e-10, "epoch": 19.974012474012476, "percentage": 99.87, "elapsed_time": "1:32:29", "remaining_time": "0:00:07", "throughput": 1319.9, "total_tokens": 7324928}
7706
+ {"current_steps": 38435, "total_steps": 38480, "loss": 0.1053, "lr": 2.176555000121816e-10, "epoch": 19.976611226611226, "percentage": 99.88, "elapsed_time": "1:32:30", "remaining_time": "0:00:06", "throughput": 1319.9, "total_tokens": 7325888}
7707
+ {"current_steps": 38440, "total_steps": 38480, "loss": 0.2411, "lr": 1.7291068273306288e-10, "epoch": 19.97920997920998, "percentage": 99.9, "elapsed_time": "1:32:31", "remaining_time": "0:00:05", "throughput": 1319.9, "total_tokens": 7326784}
7708
+ {"current_steps": 38445, "total_steps": 38480, "loss": 0.0481, "lr": 1.3330892563168107e-10, "epoch": 19.981808731808734, "percentage": 99.91, "elapsed_time": "1:32:31", "remaining_time": "0:00:05", "throughput": 1319.89, "total_tokens": 7327680}
7709
+ {"current_steps": 38450, "total_steps": 38480, "loss": 0.0448, "lr": 9.885023685152206e-11, "epoch": 19.984407484407484, "percentage": 99.92, "elapsed_time": "1:32:32", "remaining_time": "0:00:04", "throughput": 1319.89, "total_tokens": 7328608}
7710
+ {"current_steps": 38455, "total_steps": 38480, "loss": 0.1303, "lr": 6.953462348691098e-11, "epoch": 19.987006237006238, "percentage": 99.94, "elapsed_time": "1:32:33", "remaining_time": "0:00:03", "throughput": 1319.9, "total_tokens": 7329600}
7711
+ {"current_steps": 38460, "total_steps": 38480, "loss": 0.0776, "lr": 4.53620915691344e-11, "epoch": 19.989604989604988, "percentage": 99.95, "elapsed_time": "1:32:33", "remaining_time": "0:00:02", "throughput": 1319.9, "total_tokens": 7330496}
7712
+ {"current_steps": 38465, "total_steps": 38480, "loss": 0.1508, "lr": 2.6332646063664812e-11, "epoch": 19.992203742203742, "percentage": 99.96, "elapsed_time": "1:32:34", "remaining_time": "0:00:02", "throughput": 1319.91, "total_tokens": 7331456}
7713
+ {"current_steps": 38470, "total_steps": 38480, "loss": 0.1028, "lr": 1.24462908951406e-11, "epoch": 19.994802494802496, "percentage": 99.97, "elapsed_time": "1:32:35", "remaining_time": "0:00:01", "throughput": 1319.91, "total_tokens": 7332384}
7714
+ {"current_steps": 38475, "total_steps": 38480, "loss": 0.1778, "lr": 3.703028911283823e-12, "epoch": 19.997401247401246, "percentage": 99.99, "elapsed_time": "1:32:35", "remaining_time": "0:00:00", "throughput": 1319.91, "total_tokens": 7333344}
7715
+ {"current_steps": 38480, "total_steps": 38480, "loss": 0.1629, "lr": 1.0286191620689778e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:32:36", "remaining_time": "0:00:00", "throughput": 1319.92, "total_tokens": 7334376}
7716
+ {"current_steps": 38480, "total_steps": 38480, "eval_loss": 0.14645375311374664, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:32:44", "remaining_time": "0:00:00", "throughput": 1318.02, "total_tokens": 7334376}
7717
+ {"current_steps": 38480, "total_steps": 38480, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:32:46", "remaining_time": "0:00:00", "throughput": 1317.71, "total_tokens": 7334376}