Training in progress, step 38480
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +384 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb2f450589f18ae853df5e13798110ca6089bdef29f3a72e23efdc578221cdda
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -7331,3 +7331,387 @@
|
|
| 7331 |
{"current_steps": 36560, "total_steps": 38480, "loss": 0.1782, "lr": 3.7862566783146147e-07, "epoch": 19.002079002079004, "percentage": 95.01, "elapsed_time": "1:28:07", "remaining_time": "0:04:37", "throughput": 1318.02, "total_tokens": 6968832}
|
| 7332 |
{"current_steps": 36565, "total_steps": 38480, "loss": 0.0913, "lr": 3.766622085984661e-07, "epoch": 19.004677754677754, "percentage": 95.02, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6969760}
|
| 7333 |
{"current_steps": 36570, "total_steps": 38480, "loss": 0.1051, "lr": 3.747038149728266e-07, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6970688}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7331 |
{"current_steps": 36560, "total_steps": 38480, "loss": 0.1782, "lr": 3.7862566783146147e-07, "epoch": 19.002079002079004, "percentage": 95.01, "elapsed_time": "1:28:07", "remaining_time": "0:04:37", "throughput": 1318.02, "total_tokens": 6968832}
|
| 7332 |
{"current_steps": 36565, "total_steps": 38480, "loss": 0.0913, "lr": 3.766622085984661e-07, "epoch": 19.004677754677754, "percentage": 95.02, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6969760}
|
| 7333 |
{"current_steps": 36570, "total_steps": 38480, "loss": 0.1051, "lr": 3.747038149728266e-07, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6970688}
|
| 7334 |
+
{"current_steps": 36575, "total_steps": 38480, "loss": 0.0362, "lr": 3.7275048735743167e-07, "epoch": 19.00987525987526, "percentage": 95.05, "elapsed_time": "1:28:09", "remaining_time": "0:04:35", "throughput": 1317.99, "total_tokens": 6971648}
|
| 7335 |
+
{"current_steps": 36580, "total_steps": 38480, "loss": 0.3581, "lr": 3.708022261541294e-07, "epoch": 19.012474012474012, "percentage": 95.06, "elapsed_time": "1:28:10", "remaining_time": "0:04:34", "throughput": 1318.01, "total_tokens": 6972704}
|
| 7336 |
+
{"current_steps": 36585, "total_steps": 38480, "loss": 0.2977, "lr": 3.688590317637214e-07, "epoch": 19.015072765072766, "percentage": 95.08, "elapsed_time": "1:28:11", "remaining_time": "0:04:34", "throughput": 1318.02, "total_tokens": 6973696}
|
| 7337 |
+
{"current_steps": 36590, "total_steps": 38480, "loss": 0.0497, "lr": 3.6692090458597115e-07, "epoch": 19.017671517671516, "percentage": 95.09, "elapsed_time": "1:28:11", "remaining_time": "0:04:33", "throughput": 1318.02, "total_tokens": 6974624}
|
| 7338 |
+
{"current_steps": 36595, "total_steps": 38480, "loss": 0.1458, "lr": 3.6498784501959595e-07, "epoch": 19.02027027027027, "percentage": 95.1, "elapsed_time": "1:28:12", "remaining_time": "0:04:32", "throughput": 1318.02, "total_tokens": 6975552}
|
| 7339 |
+
{"current_steps": 36600, "total_steps": 38480, "loss": 0.0932, "lr": 3.63059853462272e-07, "epoch": 19.022869022869024, "percentage": 95.11, "elapsed_time": "1:28:13", "remaining_time": "0:04:31", "throughput": 1318.03, "total_tokens": 6976512}
|
| 7340 |
+
{"current_steps": 36605, "total_steps": 38480, "loss": 0.0794, "lr": 3.611369303106349e-07, "epoch": 19.025467775467774, "percentage": 95.13, "elapsed_time": "1:28:13", "remaining_time": "0:04:31", "throughput": 1318.05, "total_tokens": 6977536}
|
| 7341 |
+
{"current_steps": 36610, "total_steps": 38480, "loss": 0.1062, "lr": 3.592190759602709e-07, "epoch": 19.028066528066528, "percentage": 95.14, "elapsed_time": "1:28:14", "remaining_time": "0:04:30", "throughput": 1318.06, "total_tokens": 6978528}
|
| 7342 |
+
{"current_steps": 36615, "total_steps": 38480, "loss": 0.0892, "lr": 3.5730629080573676e-07, "epoch": 19.030665280665282, "percentage": 95.15, "elapsed_time": "1:28:15", "remaining_time": "0:04:29", "throughput": 1318.07, "total_tokens": 6979488}
|
| 7343 |
+
{"current_steps": 36620, "total_steps": 38480, "loss": 0.1147, "lr": 3.5539857524053145e-07, "epoch": 19.033264033264032, "percentage": 95.17, "elapsed_time": "1:28:15", "remaining_time": "0:04:28", "throughput": 1318.09, "total_tokens": 6980512}
|
| 7344 |
+
{"current_steps": 36625, "total_steps": 38480, "loss": 0.1604, "lr": 3.534959296571189e-07, "epoch": 19.035862785862786, "percentage": 95.18, "elapsed_time": "1:28:16", "remaining_time": "0:04:28", "throughput": 1318.09, "total_tokens": 6981472}
|
| 7345 |
+
{"current_steps": 36630, "total_steps": 38480, "loss": 0.0576, "lr": 3.5159835444691926e-07, "epoch": 19.03846153846154, "percentage": 95.19, "elapsed_time": "1:28:17", "remaining_time": "0:04:27", "throughput": 1318.09, "total_tokens": 6982400}
|
| 7346 |
+
{"current_steps": 36635, "total_steps": 38480, "loss": 0.2829, "lr": 3.497058500003092e-07, "epoch": 19.04106029106029, "percentage": 95.21, "elapsed_time": "1:28:18", "remaining_time": "0:04:26", "throughput": 1318.12, "total_tokens": 6983456}
|
| 7347 |
+
{"current_steps": 36640, "total_steps": 38480, "loss": 0.1148, "lr": 3.4781841670662175e-07, "epoch": 19.043659043659044, "percentage": 95.22, "elapsed_time": "1:28:18", "remaining_time": "0:04:26", "throughput": 1318.12, "total_tokens": 6984416}
|
| 7348 |
+
{"current_steps": 36645, "total_steps": 38480, "loss": 0.1326, "lr": 3.4593605495414626e-07, "epoch": 19.046257796257795, "percentage": 95.23, "elapsed_time": "1:28:19", "remaining_time": "0:04:25", "throughput": 1318.15, "total_tokens": 6985472}
|
| 7349 |
+
{"current_steps": 36650, "total_steps": 38480, "loss": 0.1154, "lr": 3.440587651301286e-07, "epoch": 19.04885654885655, "percentage": 95.24, "elapsed_time": "1:28:20", "remaining_time": "0:04:24", "throughput": 1318.15, "total_tokens": 6986400}
|
| 7350 |
+
{"current_steps": 36655, "total_steps": 38480, "loss": 0.1229, "lr": 3.421865476207764e-07, "epoch": 19.051455301455302, "percentage": 95.26, "elapsed_time": "1:28:20", "remaining_time": "0:04:23", "throughput": 1318.14, "total_tokens": 6987296}
|
| 7351 |
+
{"current_steps": 36660, "total_steps": 38480, "loss": 0.133, "lr": 3.403194028112455e-07, "epoch": 19.054054054054053, "percentage": 95.27, "elapsed_time": "1:28:21", "remaining_time": "0:04:23", "throughput": 1318.14, "total_tokens": 6988224}
|
| 7352 |
+
{"current_steps": 36665, "total_steps": 38480, "loss": 0.0856, "lr": 3.3845733108564814e-07, "epoch": 19.056652806652806, "percentage": 95.28, "elapsed_time": "1:28:22", "remaining_time": "0:04:22", "throughput": 1318.14, "total_tokens": 6989120}
|
| 7353 |
+
{"current_steps": 36670, "total_steps": 38480, "loss": 0.0569, "lr": 3.366003328270667e-07, "epoch": 19.05925155925156, "percentage": 95.3, "elapsed_time": "1:28:22", "remaining_time": "0:04:21", "throughput": 1318.14, "total_tokens": 6990048}
|
| 7354 |
+
{"current_steps": 36675, "total_steps": 38480, "loss": 0.1763, "lr": 3.347484084175234e-07, "epoch": 19.06185031185031, "percentage": 95.31, "elapsed_time": "1:28:23", "remaining_time": "0:04:21", "throughput": 1318.15, "total_tokens": 6991040}
|
| 7355 |
+
{"current_steps": 36680, "total_steps": 38480, "loss": 0.0938, "lr": 3.3290155823800515e-07, "epoch": 19.064449064449065, "percentage": 95.32, "elapsed_time": "1:28:24", "remaining_time": "0:04:20", "throughput": 1318.15, "total_tokens": 6991968}
|
| 7356 |
+
{"current_steps": 36685, "total_steps": 38480, "loss": 0.0474, "lr": 3.310597826684525e-07, "epoch": 19.06704781704782, "percentage": 95.34, "elapsed_time": "1:28:25", "remaining_time": "0:04:19", "throughput": 1318.14, "total_tokens": 6992864}
|
| 7357 |
+
{"current_steps": 36690, "total_steps": 38480, "loss": 0.1205, "lr": 3.292230820877623e-07, "epoch": 19.06964656964657, "percentage": 95.35, "elapsed_time": "1:28:25", "remaining_time": "0:04:18", "throughput": 1318.16, "total_tokens": 6993856}
|
| 7358 |
+
{"current_steps": 36695, "total_steps": 38480, "loss": 0.1137, "lr": 3.273914568737879e-07, "epoch": 19.072245322245323, "percentage": 95.36, "elapsed_time": "1:28:26", "remaining_time": "0:04:18", "throughput": 1318.16, "total_tokens": 6994816}
|
| 7359 |
+
{"current_steps": 36700, "total_steps": 38480, "loss": 0.0563, "lr": 3.25564907403339e-07, "epoch": 19.074844074844076, "percentage": 95.37, "elapsed_time": "1:28:27", "remaining_time": "0:04:17", "throughput": 1318.19, "total_tokens": 6995872}
|
| 7360 |
+
{"current_steps": 36705, "total_steps": 38480, "loss": 0.1539, "lr": 3.237434340521789e-07, "epoch": 19.077442827442827, "percentage": 95.39, "elapsed_time": "1:28:27", "remaining_time": "0:04:16", "throughput": 1318.19, "total_tokens": 6996800}
|
| 7361 |
+
{"current_steps": 36710, "total_steps": 38480, "loss": 0.2104, "lr": 3.2192703719503005e-07, "epoch": 19.08004158004158, "percentage": 95.4, "elapsed_time": "1:28:28", "remaining_time": "0:04:15", "throughput": 1318.2, "total_tokens": 6997792}
|
| 7362 |
+
{"current_steps": 36715, "total_steps": 38480, "loss": 0.046, "lr": 3.2011571720556575e-07, "epoch": 19.08264033264033, "percentage": 95.41, "elapsed_time": "1:28:29", "remaining_time": "0:04:15", "throughput": 1318.19, "total_tokens": 6998688}
|
| 7363 |
+
{"current_steps": 36720, "total_steps": 38480, "loss": 0.1702, "lr": 3.183094744564213e-07, "epoch": 19.085239085239085, "percentage": 95.43, "elapsed_time": "1:28:30", "remaining_time": "0:04:14", "throughput": 1318.21, "total_tokens": 6999680}
|
| 7364 |
+
{"current_steps": 36725, "total_steps": 38480, "loss": 0.0578, "lr": 3.165083093191801e-07, "epoch": 19.08783783783784, "percentage": 95.44, "elapsed_time": "1:28:30", "remaining_time": "0:04:13", "throughput": 1318.21, "total_tokens": 7000640}
|
| 7365 |
+
{"current_steps": 36730, "total_steps": 38480, "loss": 0.028, "lr": 3.1471222216438723e-07, "epoch": 19.09043659043659, "percentage": 95.45, "elapsed_time": "1:28:31", "remaining_time": "0:04:13", "throughput": 1318.21, "total_tokens": 7001568}
|
| 7366 |
+
{"current_steps": 36735, "total_steps": 38480, "loss": 0.1083, "lr": 3.129212133615389e-07, "epoch": 19.093035343035343, "percentage": 95.47, "elapsed_time": "1:28:32", "remaining_time": "0:04:12", "throughput": 1318.21, "total_tokens": 7002496}
|
| 7367 |
+
{"current_steps": 36740, "total_steps": 38480, "loss": 0.114, "lr": 3.111352832790904e-07, "epoch": 19.095634095634097, "percentage": 95.48, "elapsed_time": "1:28:32", "remaining_time": "0:04:11", "throughput": 1318.22, "total_tokens": 7003456}
|
| 7368 |
+
{"current_steps": 36745, "total_steps": 38480, "loss": 0.1547, "lr": 3.093544322844505e-07, "epoch": 19.098232848232847, "percentage": 95.49, "elapsed_time": "1:28:33", "remaining_time": "0:04:10", "throughput": 1318.23, "total_tokens": 7004416}
|
| 7369 |
+
{"current_steps": 36750, "total_steps": 38480, "loss": 0.0858, "lr": 3.07578660743979e-07, "epoch": 19.1008316008316, "percentage": 95.5, "elapsed_time": "1:28:34", "remaining_time": "0:04:10", "throughput": 1318.25, "total_tokens": 7005440}
|
| 7370 |
+
{"current_steps": 36755, "total_steps": 38480, "loss": 0.0649, "lr": 3.058079690229948e-07, "epoch": 19.103430353430355, "percentage": 95.52, "elapsed_time": "1:28:34", "remaining_time": "0:04:09", "throughput": 1318.24, "total_tokens": 7006336}
|
| 7371 |
+
{"current_steps": 36760, "total_steps": 38480, "loss": 0.0862, "lr": 3.040423574857759e-07, "epoch": 19.106029106029105, "percentage": 95.53, "elapsed_time": "1:28:35", "remaining_time": "0:04:08", "throughput": 1318.24, "total_tokens": 7007264}
|
| 7372 |
+
{"current_steps": 36765, "total_steps": 38480, "loss": 0.069, "lr": 3.0228182649555126e-07, "epoch": 19.10862785862786, "percentage": 95.54, "elapsed_time": "1:28:36", "remaining_time": "0:04:07", "throughput": 1318.25, "total_tokens": 7008224}
|
| 7373 |
+
{"current_steps": 36770, "total_steps": 38480, "loss": 0.1026, "lr": 3.00526376414495e-07, "epoch": 19.111226611226613, "percentage": 95.56, "elapsed_time": "1:28:36", "remaining_time": "0:04:07", "throughput": 1318.25, "total_tokens": 7009152}
|
| 7374 |
+
{"current_steps": 36775, "total_steps": 38480, "loss": 0.1017, "lr": 2.9877600760375434e-07, "epoch": 19.113825363825363, "percentage": 95.57, "elapsed_time": "1:28:37", "remaining_time": "0:04:06", "throughput": 1318.26, "total_tokens": 7010112}
|
| 7375 |
+
{"current_steps": 36780, "total_steps": 38480, "loss": 0.1637, "lr": 2.970307204234163e-07, "epoch": 19.116424116424117, "percentage": 95.58, "elapsed_time": "1:28:38", "remaining_time": "0:04:05", "throughput": 1318.26, "total_tokens": 7011040}
|
| 7376 |
+
{"current_steps": 36785, "total_steps": 38480, "loss": 0.0493, "lr": 2.9529051523253247e-07, "epoch": 19.11902286902287, "percentage": 95.6, "elapsed_time": "1:28:39", "remaining_time": "0:04:05", "throughput": 1318.25, "total_tokens": 7011936}
|
| 7377 |
+
{"current_steps": 36790, "total_steps": 38480, "loss": 0.0768, "lr": 2.935553923890999e-07, "epoch": 19.12162162162162, "percentage": 95.61, "elapsed_time": "1:28:39", "remaining_time": "0:04:04", "throughput": 1318.26, "total_tokens": 7012928}
|
| 7378 |
+
{"current_steps": 36795, "total_steps": 38480, "loss": 0.1658, "lr": 2.9182535225007744e-07, "epoch": 19.124220374220375, "percentage": 95.62, "elapsed_time": "1:28:40", "remaining_time": "0:04:03", "throughput": 1318.27, "total_tokens": 7013888}
|
| 7379 |
+
{"current_steps": 36800, "total_steps": 38480, "loss": 0.0857, "lr": 2.9010039517137487e-07, "epoch": 19.126819126819125, "percentage": 95.63, "elapsed_time": "1:28:41", "remaining_time": "0:04:02", "throughput": 1318.28, "total_tokens": 7014880}
|
| 7380 |
+
{"current_steps": 36805, "total_steps": 38480, "loss": 0.151, "lr": 2.883805215078583e-07, "epoch": 19.12941787941788, "percentage": 95.65, "elapsed_time": "1:28:41", "remaining_time": "0:04:02", "throughput": 1318.3, "total_tokens": 7015872}
|
| 7381 |
+
{"current_steps": 36810, "total_steps": 38480, "loss": 0.1465, "lr": 2.8666573161334475e-07, "epoch": 19.132016632016633, "percentage": 95.66, "elapsed_time": "1:28:42", "remaining_time": "0:04:01", "throughput": 1318.3, "total_tokens": 7016832}
|
| 7382 |
+
{"current_steps": 36815, "total_steps": 38480, "loss": 0.0488, "lr": 2.849560258406075e-07, "epoch": 19.134615384615383, "percentage": 95.67, "elapsed_time": "1:28:43", "remaining_time": "0:04:00", "throughput": 1318.31, "total_tokens": 7017792}
|
| 7383 |
+
{"current_steps": 36820, "total_steps": 38480, "loss": 0.1574, "lr": 2.8325140454137646e-07, "epoch": 19.137214137214137, "percentage": 95.69, "elapsed_time": "1:28:44", "remaining_time": "0:04:00", "throughput": 1318.33, "total_tokens": 7018816}
|
| 7384 |
+
{"current_steps": 36825, "total_steps": 38480, "loss": 0.0932, "lr": 2.815518680663293e-07, "epoch": 19.13981288981289, "percentage": 95.7, "elapsed_time": "1:28:44", "remaining_time": "0:03:59", "throughput": 1318.32, "total_tokens": 7019712}
|
| 7385 |
+
{"current_steps": 36830, "total_steps": 38480, "loss": 0.0936, "lr": 2.798574167651058e-07, "epoch": 19.14241164241164, "percentage": 95.71, "elapsed_time": "1:28:45", "remaining_time": "0:03:58", "throughput": 1318.32, "total_tokens": 7020640}
|
| 7386 |
+
{"current_steps": 36835, "total_steps": 38480, "loss": 0.2183, "lr": 2.7816805098628826e-07, "epoch": 19.145010395010395, "percentage": 95.73, "elapsed_time": "1:28:46", "remaining_time": "0:03:57", "throughput": 1318.34, "total_tokens": 7021632}
|
| 7387 |
+
{"current_steps": 36840, "total_steps": 38480, "loss": 0.122, "lr": 2.7648377107742375e-07, "epoch": 19.14760914760915, "percentage": 95.74, "elapsed_time": "1:28:46", "remaining_time": "0:03:57", "throughput": 1318.36, "total_tokens": 7022688}
|
| 7388 |
+
{"current_steps": 36845, "total_steps": 38480, "loss": 0.1587, "lr": 2.748045773850072e-07, "epoch": 19.1502079002079, "percentage": 95.75, "elapsed_time": "1:28:47", "remaining_time": "0:03:56", "throughput": 1318.37, "total_tokens": 7023648}
|
| 7389 |
+
{"current_steps": 36850, "total_steps": 38480, "loss": 0.0872, "lr": 2.731304702544929e-07, "epoch": 19.152806652806653, "percentage": 95.76, "elapsed_time": "1:28:48", "remaining_time": "0:03:55", "throughput": 1318.37, "total_tokens": 7024576}
|
| 7390 |
+
{"current_steps": 36855, "total_steps": 38480, "loss": 0.204, "lr": 2.7146145003028036e-07, "epoch": 19.155405405405407, "percentage": 95.78, "elapsed_time": "1:28:48", "remaining_time": "0:03:54", "throughput": 1318.38, "total_tokens": 7025536}
|
| 7391 |
+
{"current_steps": 36860, "total_steps": 38480, "loss": 0.0779, "lr": 2.6979751705572544e-07, "epoch": 19.158004158004157, "percentage": 95.79, "elapsed_time": "1:28:49", "remaining_time": "0:03:54", "throughput": 1318.38, "total_tokens": 7026496}
|
| 7392 |
+
{"current_steps": 36865, "total_steps": 38480, "loss": 0.1728, "lr": 2.6813867167314324e-07, "epoch": 19.16060291060291, "percentage": 95.8, "elapsed_time": "1:28:50", "remaining_time": "0:03:53", "throughput": 1318.39, "total_tokens": 7027456}
|
| 7393 |
+
{"current_steps": 36870, "total_steps": 38480, "loss": 0.1078, "lr": 2.664849142237968e-07, "epoch": 19.16320166320166, "percentage": 95.82, "elapsed_time": "1:28:51", "remaining_time": "0:03:52", "throughput": 1318.38, "total_tokens": 7028320}
|
| 7394 |
+
{"current_steps": 36875, "total_steps": 38480, "loss": 0.3906, "lr": 2.6483624504790026e-07, "epoch": 19.165800415800415, "percentage": 95.83, "elapsed_time": "1:28:51", "remaining_time": "0:03:52", "throughput": 1318.39, "total_tokens": 7029280}
|
| 7395 |
+
{"current_steps": 36880, "total_steps": 38480, "loss": 0.1687, "lr": 2.6319266448462666e-07, "epoch": 19.16839916839917, "percentage": 95.84, "elapsed_time": "1:28:52", "remaining_time": "0:03:51", "throughput": 1318.41, "total_tokens": 7030304}
|
| 7396 |
+
{"current_steps": 36885, "total_steps": 38480, "loss": 0.1239, "lr": 2.6155417287209994e-07, "epoch": 19.17099792099792, "percentage": 95.85, "elapsed_time": "1:28:53", "remaining_time": "0:03:50", "throughput": 1318.4, "total_tokens": 7031200}
|
| 7397 |
+
{"current_steps": 36890, "total_steps": 38480, "loss": 0.195, "lr": 2.5992077054739774e-07, "epoch": 19.173596673596673, "percentage": 95.87, "elapsed_time": "1:28:53", "remaining_time": "0:03:49", "throughput": 1318.41, "total_tokens": 7032192}
|
| 7398 |
+
{"current_steps": 36895, "total_steps": 38480, "loss": 0.1017, "lr": 2.5829245784654843e-07, "epoch": 19.176195426195427, "percentage": 95.88, "elapsed_time": "1:28:54", "remaining_time": "0:03:49", "throughput": 1318.41, "total_tokens": 7033088}
|
| 7399 |
+
{"current_steps": 36900, "total_steps": 38480, "loss": 0.114, "lr": 2.566692351045341e-07, "epoch": 19.178794178794178, "percentage": 95.89, "elapsed_time": "1:28:55", "remaining_time": "0:03:48", "throughput": 1318.43, "total_tokens": 7034144}
|
| 7400 |
+
{"current_steps": 36905, "total_steps": 38480, "loss": 0.165, "lr": 2.5505110265529044e-07, "epoch": 19.18139293139293, "percentage": 95.91, "elapsed_time": "1:28:55", "remaining_time": "0:03:47", "throughput": 1318.45, "total_tokens": 7035136}
|
| 7401 |
+
{"current_steps": 36910, "total_steps": 38480, "loss": 0.0703, "lr": 2.534380608317066e-07, "epoch": 19.183991683991685, "percentage": 95.92, "elapsed_time": "1:28:56", "remaining_time": "0:03:46", "throughput": 1318.46, "total_tokens": 7036128}
|
| 7402 |
+
{"current_steps": 36915, "total_steps": 38480, "loss": 0.1035, "lr": 2.5183010996562007e-07, "epoch": 19.186590436590436, "percentage": 95.93, "elapsed_time": "1:28:57", "remaining_time": "0:03:46", "throughput": 1318.46, "total_tokens": 7037088}
|
| 7403 |
+
{"current_steps": 36920, "total_steps": 38480, "loss": 0.1722, "lr": 2.502272503878328e-07, "epoch": 19.18918918918919, "percentage": 95.95, "elapsed_time": "1:28:58", "remaining_time": "0:03:45", "throughput": 1318.46, "total_tokens": 7037984}
|
| 7404 |
+
{"current_steps": 36925, "total_steps": 38480, "loss": 0.1954, "lr": 2.4862948242808383e-07, "epoch": 19.191787941787943, "percentage": 95.96, "elapsed_time": "1:28:58", "remaining_time": "0:03:44", "throughput": 1318.47, "total_tokens": 7038944}
|
| 7405 |
+
{"current_steps": 36930, "total_steps": 38480, "loss": 0.2661, "lr": 2.4703680641507407e-07, "epoch": 19.194386694386694, "percentage": 95.97, "elapsed_time": "1:28:59", "remaining_time": "0:03:44", "throughput": 1318.47, "total_tokens": 7039872}
|
| 7406 |
+
{"current_steps": 36935, "total_steps": 38480, "loss": 0.1487, "lr": 2.454492226764582e-07, "epoch": 19.196985446985448, "percentage": 95.98, "elapsed_time": "1:29:00", "remaining_time": "0:03:43", "throughput": 1318.46, "total_tokens": 7040768}
|
| 7407 |
+
{"current_steps": 36940, "total_steps": 38480, "loss": 0.1226, "lr": 2.43866731538836e-07, "epoch": 19.1995841995842, "percentage": 96.0, "elapsed_time": "1:29:00", "remaining_time": "0:03:42", "throughput": 1318.47, "total_tokens": 7041760}
|
| 7408 |
+
{"current_steps": 36945, "total_steps": 38480, "loss": 0.1808, "lr": 2.422893333277665e-07, "epoch": 19.20218295218295, "percentage": 96.01, "elapsed_time": "1:29:01", "remaining_time": "0:03:41", "throughput": 1318.49, "total_tokens": 7042752}
|
| 7409 |
+
{"current_steps": 36950, "total_steps": 38480, "loss": 0.1086, "lr": 2.40717028367754e-07, "epoch": 19.204781704781706, "percentage": 96.02, "elapsed_time": "1:29:02", "remaining_time": "0:03:41", "throughput": 1318.49, "total_tokens": 7043680}
|
| 7410 |
+
{"current_steps": 36955, "total_steps": 38480, "loss": 0.1171, "lr": 2.39149816982262e-07, "epoch": 19.207380457380456, "percentage": 96.04, "elapsed_time": "1:29:02", "remaining_time": "0:03:40", "throughput": 1318.5, "total_tokens": 7044704}
|
| 7411 |
+
{"current_steps": 36960, "total_steps": 38480, "loss": 0.2331, "lr": 2.3758769949370762e-07, "epoch": 19.20997920997921, "percentage": 96.05, "elapsed_time": "1:29:03", "remaining_time": "0:03:39", "throughput": 1318.5, "total_tokens": 7045600}
|
| 7412 |
+
{"current_steps": 36965, "total_steps": 38480, "loss": 0.1054, "lr": 2.3603067622344488e-07, "epoch": 19.212577962577964, "percentage": 96.06, "elapsed_time": "1:29:04", "remaining_time": "0:03:39", "throughput": 1318.51, "total_tokens": 7046592}
|
| 7413 |
+
{"current_steps": 36970, "total_steps": 38480, "loss": 0.1332, "lr": 2.3447874749180088e-07, "epoch": 19.215176715176714, "percentage": 96.08, "elapsed_time": "1:29:05", "remaining_time": "0:03:38", "throughput": 1318.52, "total_tokens": 7047552}
|
| 7414 |
+
{"current_steps": 36975, "total_steps": 38480, "loss": 0.1319, "lr": 2.3293191361803967e-07, "epoch": 19.217775467775468, "percentage": 96.09, "elapsed_time": "1:29:05", "remaining_time": "0:03:37", "throughput": 1318.52, "total_tokens": 7048480}
|
| 7415 |
+
{"current_steps": 36980, "total_steps": 38480, "loss": 0.0791, "lr": 2.3139017492038172e-07, "epoch": 19.22037422037422, "percentage": 96.1, "elapsed_time": "1:29:06", "remaining_time": "0:03:36", "throughput": 1318.52, "total_tokens": 7049408}
|
| 7416 |
+
{"current_steps": 36985, "total_steps": 38480, "loss": 0.0711, "lr": 2.2985353171599833e-07, "epoch": 19.222972972972972, "percentage": 96.11, "elapsed_time": "1:29:07", "remaining_time": "0:03:36", "throughput": 1318.52, "total_tokens": 7050336}
|
| 7417 |
+
{"current_steps": 36990, "total_steps": 38480, "loss": 0.1553, "lr": 2.2832198432101714e-07, "epoch": 19.225571725571726, "percentage": 96.13, "elapsed_time": "1:29:07", "remaining_time": "0:03:35", "throughput": 1318.54, "total_tokens": 7051360}
|
| 7418 |
+
{"current_steps": 36995, "total_steps": 38480, "loss": 0.1143, "lr": 2.2679553305051116e-07, "epoch": 19.22817047817048, "percentage": 96.14, "elapsed_time": "1:29:08", "remaining_time": "0:03:34", "throughput": 1318.54, "total_tokens": 7052288}
|
| 7419 |
+
{"current_steps": 37000, "total_steps": 38480, "loss": 0.1458, "lr": 2.2527417821850972e-07, "epoch": 19.23076923076923, "percentage": 96.15, "elapsed_time": "1:29:09", "remaining_time": "0:03:33", "throughput": 1318.55, "total_tokens": 7053248}
|
| 7420 |
+
{"current_steps": 37005, "total_steps": 38480, "loss": 0.0819, "lr": 2.2375792013798748e-07, "epoch": 19.233367983367984, "percentage": 96.17, "elapsed_time": "1:29:09", "remaining_time": "0:03:33", "throughput": 1318.56, "total_tokens": 7054240}
|
| 7421 |
+
{"current_steps": 37010, "total_steps": 38480, "loss": 0.0846, "lr": 2.22246759120881e-07, "epoch": 19.235966735966738, "percentage": 96.18, "elapsed_time": "1:29:10", "remaining_time": "0:03:32", "throughput": 1318.59, "total_tokens": 7055328}
|
| 7422 |
+
{"current_steps": 37015, "total_steps": 38480, "loss": 0.1237, "lr": 2.207406954780694e-07, "epoch": 19.238565488565488, "percentage": 96.19, "elapsed_time": "1:29:11", "remaining_time": "0:03:31", "throughput": 1318.6, "total_tokens": 7056288}
|
| 7423 |
+
{"current_steps": 37020, "total_steps": 38480, "loss": 0.1336, "lr": 2.1923972951938543e-07, "epoch": 19.241164241164242, "percentage": 96.21, "elapsed_time": "1:29:12", "remaining_time": "0:03:31", "throughput": 1318.61, "total_tokens": 7057280}
|
| 7424 |
+
{"current_steps": 37025, "total_steps": 38480, "loss": 0.0573, "lr": 2.1774386155361538e-07, "epoch": 19.243762993762992, "percentage": 96.22, "elapsed_time": "1:29:12", "remaining_time": "0:03:30", "throughput": 1318.61, "total_tokens": 7058240}
|
| 7425 |
+
{"current_steps": 37030, "total_steps": 38480, "loss": 0.1071, "lr": 2.1625309188849363e-07, "epoch": 19.246361746361746, "percentage": 96.23, "elapsed_time": "1:29:13", "remaining_time": "0:03:29", "throughput": 1318.63, "total_tokens": 7059264}
|
| 7426 |
+
{"current_steps": 37035, "total_steps": 38480, "loss": 0.0936, "lr": 2.1476742083070546e-07, "epoch": 19.2489604989605, "percentage": 96.24, "elapsed_time": "1:29:14", "remaining_time": "0:03:28", "throughput": 1318.64, "total_tokens": 7060224}
|
| 7427 |
+
{"current_steps": 37040, "total_steps": 38480, "loss": 0.1143, "lr": 2.132868486858869e-07, "epoch": 19.25155925155925, "percentage": 96.26, "elapsed_time": "1:29:14", "remaining_time": "0:03:28", "throughput": 1318.64, "total_tokens": 7061152}
|
| 7428 |
+
{"current_steps": 37045, "total_steps": 38480, "loss": 0.1498, "lr": 2.118113757586332e-07, "epoch": 19.254158004158004, "percentage": 96.27, "elapsed_time": "1:29:15", "remaining_time": "0:03:27", "throughput": 1318.65, "total_tokens": 7062112}
|
| 7429 |
+
{"current_steps": 37050, "total_steps": 38480, "loss": 0.0238, "lr": 2.1034100235248212e-07, "epoch": 19.256756756756758, "percentage": 96.28, "elapsed_time": "1:29:16", "remaining_time": "0:03:26", "throughput": 1318.65, "total_tokens": 7063040}
|
| 7430 |
+
{"current_steps": 37055, "total_steps": 38480, "loss": 0.1291, "lr": 2.0887572876992223e-07, "epoch": 19.259355509355508, "percentage": 96.3, "elapsed_time": "1:29:16", "remaining_time": "0:03:26", "throughput": 1318.65, "total_tokens": 7063968}
|
| 7431 |
+
{"current_steps": 37060, "total_steps": 38480, "loss": 0.087, "lr": 2.0741555531239855e-07, "epoch": 19.261954261954262, "percentage": 96.31, "elapsed_time": "1:29:17", "remaining_time": "0:03:25", "throughput": 1318.67, "total_tokens": 7064992}
|
| 7432 |
+
{"current_steps": 37065, "total_steps": 38480, "loss": 0.1066, "lr": 2.0596048228030128e-07, "epoch": 19.264553014553016, "percentage": 96.32, "elapsed_time": "1:29:18", "remaining_time": "0:03:24", "throughput": 1318.68, "total_tokens": 7065984}
|
| 7433 |
+
{"current_steps": 37070, "total_steps": 38480, "loss": 0.2216, "lr": 2.0451050997297162e-07, "epoch": 19.267151767151766, "percentage": 96.34, "elapsed_time": "1:29:19", "remaining_time": "0:03:23", "throughput": 1318.7, "total_tokens": 7067008}
|
| 7434 |
+
{"current_steps": 37075, "total_steps": 38480, "loss": 0.1244, "lr": 2.03065638688707e-07, "epoch": 19.26975051975052, "percentage": 96.35, "elapsed_time": "1:29:19", "remaining_time": "0:03:23", "throughput": 1318.72, "total_tokens": 7068032}
|
| 7435 |
+
{"current_steps": 37080, "total_steps": 38480, "loss": 0.0852, "lr": 2.016258687247502e-07, "epoch": 19.272349272349274, "percentage": 96.36, "elapsed_time": "1:29:20", "remaining_time": "0:03:22", "throughput": 1318.72, "total_tokens": 7068960}
|
| 7436 |
+
{"current_steps": 37085, "total_steps": 38480, "loss": 0.0781, "lr": 2.0019120037730043e-07, "epoch": 19.274948024948024, "percentage": 96.37, "elapsed_time": "1:29:21", "remaining_time": "0:03:21", "throughput": 1318.71, "total_tokens": 7069856}
|
| 7437 |
+
{"current_steps": 37090, "total_steps": 38480, "loss": 0.1291, "lr": 1.987616339414966e-07, "epoch": 19.277546777546778, "percentage": 96.39, "elapsed_time": "1:29:21", "remaining_time": "0:03:20", "throughput": 1318.72, "total_tokens": 7070816}
|
| 7438 |
+
{"current_steps": 37095, "total_steps": 38480, "loss": 0.1157, "lr": 1.9733716971143678e-07, "epoch": 19.28014553014553, "percentage": 96.4, "elapsed_time": "1:29:22", "remaining_time": "0:03:20", "throughput": 1318.72, "total_tokens": 7071744}
|
| 7439 |
+
{"current_steps": 37100, "total_steps": 38480, "loss": 0.1562, "lr": 1.9591780798016989e-07, "epoch": 19.282744282744282, "percentage": 96.41, "elapsed_time": "1:29:23", "remaining_time": "0:03:19", "throughput": 1318.73, "total_tokens": 7072736}
|
| 7440 |
+
{"current_steps": 37105, "total_steps": 38480, "loss": 0.0848, "lr": 1.9450354903968737e-07, "epoch": 19.285343035343036, "percentage": 96.43, "elapsed_time": "1:29:23", "remaining_time": "0:03:18", "throughput": 1318.72, "total_tokens": 7073600}
|
| 7441 |
+
{"current_steps": 37110, "total_steps": 38480, "loss": 0.1953, "lr": 1.930943931809398e-07, "epoch": 19.287941787941786, "percentage": 96.44, "elapsed_time": "1:29:24", "remaining_time": "0:03:18", "throughput": 1318.72, "total_tokens": 7074528}
|
| 7442 |
+
{"current_steps": 37115, "total_steps": 38480, "loss": 0.0734, "lr": 1.9169034069382584e-07, "epoch": 19.29054054054054, "percentage": 96.45, "elapsed_time": "1:29:25", "remaining_time": "0:03:17", "throughput": 1318.73, "total_tokens": 7075488}
|
| 7443 |
+
{"current_steps": 37120, "total_steps": 38480, "loss": 0.1451, "lr": 1.9029139186718948e-07, "epoch": 19.293139293139294, "percentage": 96.47, "elapsed_time": "1:29:26", "remaining_time": "0:03:16", "throughput": 1318.73, "total_tokens": 7076448}
|
| 7444 |
+
{"current_steps": 37125, "total_steps": 38480, "loss": 0.187, "lr": 1.8889754698882822e-07, "epoch": 19.295738045738045, "percentage": 96.48, "elapsed_time": "1:29:26", "remaining_time": "0:03:15", "throughput": 1318.73, "total_tokens": 7077344}
|
| 7445 |
+
{"current_steps": 37130, "total_steps": 38480, "loss": 0.1052, "lr": 1.8750880634548772e-07, "epoch": 19.2983367983368, "percentage": 96.49, "elapsed_time": "1:29:27", "remaining_time": "0:03:15", "throughput": 1318.73, "total_tokens": 7078272}
|
| 7446 |
+
{"current_steps": 37135, "total_steps": 38480, "loss": 0.074, "lr": 1.8612517022286725e-07, "epoch": 19.300935550935552, "percentage": 96.5, "elapsed_time": "1:29:28", "remaining_time": "0:03:14", "throughput": 1318.72, "total_tokens": 7079168}
|
| 7447 |
+
{"current_steps": 37140, "total_steps": 38480, "loss": 0.1099, "lr": 1.8474663890561405e-07, "epoch": 19.303534303534303, "percentage": 96.52, "elapsed_time": "1:29:28", "remaining_time": "0:03:13", "throughput": 1318.73, "total_tokens": 7080128}
|
| 7448 |
+
{"current_steps": 37145, "total_steps": 38480, "loss": 0.2409, "lr": 1.8337321267732356e-07, "epoch": 19.306133056133056, "percentage": 96.53, "elapsed_time": "1:29:29", "remaining_time": "0:03:12", "throughput": 1318.74, "total_tokens": 7081120}
|
| 7449 |
+
{"current_steps": 37150, "total_steps": 38480, "loss": 0.2103, "lr": 1.8200489182054193e-07, "epoch": 19.30873180873181, "percentage": 96.54, "elapsed_time": "1:29:30", "remaining_time": "0:03:12", "throughput": 1318.75, "total_tokens": 7082080}
|
| 7450 |
+
{"current_steps": 37155, "total_steps": 38480, "loss": 0.059, "lr": 1.80641676616769e-07, "epoch": 19.31133056133056, "percentage": 96.56, "elapsed_time": "1:29:30", "remaining_time": "0:03:11", "throughput": 1318.77, "total_tokens": 7083104}
|
| 7451 |
+
{"current_steps": 37160, "total_steps": 38480, "loss": 0.0991, "lr": 1.7928356734644436e-07, "epoch": 19.313929313929314, "percentage": 96.57, "elapsed_time": "1:29:31", "remaining_time": "0:03:10", "throughput": 1318.78, "total_tokens": 7084096}
|
| 7452 |
+
{"current_steps": 37165, "total_steps": 38480, "loss": 0.1022, "lr": 1.779305642889667e-07, "epoch": 19.316528066528065, "percentage": 96.58, "elapsed_time": "1:29:32", "remaining_time": "0:03:10", "throughput": 1318.8, "total_tokens": 7085120}
|
| 7453 |
+
{"current_steps": 37170, "total_steps": 38480, "loss": 0.1285, "lr": 1.7658266772268284e-07, "epoch": 19.31912681912682, "percentage": 96.6, "elapsed_time": "1:29:33", "remaining_time": "0:03:09", "throughput": 1318.81, "total_tokens": 7086112}
|
| 7454 |
+
{"current_steps": 37175, "total_steps": 38480, "loss": 0.1901, "lr": 1.7523987792488483e-07, "epoch": 19.321725571725572, "percentage": 96.61, "elapsed_time": "1:29:33", "remaining_time": "0:03:08", "throughput": 1318.81, "total_tokens": 7087040}
|
| 7455 |
+
{"current_steps": 37180, "total_steps": 38480, "loss": 0.1788, "lr": 1.739021951718184e-07, "epoch": 19.324324324324323, "percentage": 96.62, "elapsed_time": "1:29:34", "remaining_time": "0:03:07", "throughput": 1318.83, "total_tokens": 7088064}
|
| 7456 |
+
{"current_steps": 37185, "total_steps": 38480, "loss": 0.2472, "lr": 1.7256961973867447e-07, "epoch": 19.326923076923077, "percentage": 96.63, "elapsed_time": "1:29:35", "remaining_time": "0:03:07", "throughput": 1318.83, "total_tokens": 7088960}
|
| 7457 |
+
{"current_steps": 37190, "total_steps": 38480, "loss": 0.1178, "lr": 1.7124215189959768e-07, "epoch": 19.32952182952183, "percentage": 96.65, "elapsed_time": "1:29:35", "remaining_time": "0:03:06", "throughput": 1318.83, "total_tokens": 7089888}
|
| 7458 |
+
{"current_steps": 37195, "total_steps": 38480, "loss": 0.0835, "lr": 1.6991979192767793e-07, "epoch": 19.33212058212058, "percentage": 96.66, "elapsed_time": "1:29:36", "remaining_time": "0:03:05", "throughput": 1318.84, "total_tokens": 7090880}
|
| 7459 |
+
{"current_steps": 37200, "total_steps": 38480, "loss": 0.1826, "lr": 1.686025400949559e-07, "epoch": 19.334719334719335, "percentage": 96.67, "elapsed_time": "1:29:37", "remaining_time": "0:03:05", "throughput": 1318.85, "total_tokens": 7091872}
|
| 7460 |
+
{"current_steps": 37205, "total_steps": 38480, "loss": 0.0383, "lr": 1.6729039667242596e-07, "epoch": 19.33731808731809, "percentage": 96.69, "elapsed_time": "1:29:37", "remaining_time": "0:03:04", "throughput": 1318.85, "total_tokens": 7092768}
|
| 7461 |
+
{"current_steps": 37210, "total_steps": 38480, "loss": 0.0807, "lr": 1.6598336193002216e-07, "epoch": 19.33991683991684, "percentage": 96.7, "elapsed_time": "1:29:38", "remaining_time": "0:03:03", "throughput": 1318.86, "total_tokens": 7093728}
|
| 7462 |
+
{"current_steps": 37215, "total_steps": 38480, "loss": 0.1097, "lr": 1.64681436136635e-07, "epoch": 19.342515592515593, "percentage": 96.71, "elapsed_time": "1:29:39", "remaining_time": "0:03:02", "throughput": 1318.86, "total_tokens": 7094656}
|
| 7463 |
+
{"current_steps": 37220, "total_steps": 38480, "loss": 0.1048, "lr": 1.6338461956010022e-07, "epoch": 19.345114345114347, "percentage": 96.73, "elapsed_time": "1:29:40", "remaining_time": "0:03:02", "throughput": 1318.86, "total_tokens": 7095616}
|
| 7464 |
+
{"current_steps": 37225, "total_steps": 38480, "loss": 0.0584, "lr": 1.6209291246720714e-07, "epoch": 19.347713097713097, "percentage": 96.74, "elapsed_time": "1:29:40", "remaining_time": "0:03:01", "throughput": 1318.86, "total_tokens": 7096512}
|
| 7465 |
+
{"current_steps": 37230, "total_steps": 38480, "loss": 0.1753, "lr": 1.6080631512368772e-07, "epoch": 19.35031185031185, "percentage": 96.75, "elapsed_time": "1:29:41", "remaining_time": "0:03:00", "throughput": 1318.86, "total_tokens": 7097440}
|
| 7466 |
+
{"current_steps": 37235, "total_steps": 38480, "loss": 0.0747, "lr": 1.5952482779422463e-07, "epoch": 19.352910602910605, "percentage": 96.76, "elapsed_time": "1:29:42", "remaining_time": "0:02:59", "throughput": 1318.88, "total_tokens": 7098464}
|
| 7467 |
+
{"current_steps": 37240, "total_steps": 38480, "loss": 0.1681, "lr": 1.582484507424542e-07, "epoch": 19.355509355509355, "percentage": 96.78, "elapsed_time": "1:29:42", "remaining_time": "0:02:59", "throughput": 1318.89, "total_tokens": 7099456}
|
| 7468 |
+
{"current_steps": 37245, "total_steps": 38480, "loss": 0.1771, "lr": 1.569771842309553e-07, "epoch": 19.35810810810811, "percentage": 96.79, "elapsed_time": "1:29:43", "remaining_time": "0:02:58", "throughput": 1318.89, "total_tokens": 7100384}
|
| 7469 |
+
{"current_steps": 37250, "total_steps": 38480, "loss": 0.1031, "lr": 1.5571102852125762e-07, "epoch": 19.36070686070686, "percentage": 96.8, "elapsed_time": "1:29:44", "remaining_time": "0:02:57", "throughput": 1318.91, "total_tokens": 7101408}
|
| 7470 |
+
{"current_steps": 37255, "total_steps": 38480, "loss": 0.1251, "lr": 1.5444998387383892e-07, "epoch": 19.363305613305613, "percentage": 96.82, "elapsed_time": "1:29:45", "remaining_time": "0:02:57", "throughput": 1318.91, "total_tokens": 7102336}
|
| 7471 |
+
{"current_steps": 37260, "total_steps": 38480, "loss": 0.1147, "lr": 1.5319405054813052e-07, "epoch": 19.365904365904367, "percentage": 96.83, "elapsed_time": "1:29:45", "remaining_time": "0:02:56", "throughput": 1318.92, "total_tokens": 7103360}
|
| 7472 |
+
{"current_steps": 37265, "total_steps": 38480, "loss": 0.0945, "lr": 1.5194322880250356e-07, "epoch": 19.368503118503117, "percentage": 96.84, "elapsed_time": "1:29:46", "remaining_time": "0:02:55", "throughput": 1318.91, "total_tokens": 7104224}
|
| 7473 |
+
{"current_steps": 37270, "total_steps": 38480, "loss": 0.139, "lr": 1.5069751889428274e-07, "epoch": 19.37110187110187, "percentage": 96.86, "elapsed_time": "1:29:47", "remaining_time": "0:02:54", "throughput": 1318.86, "total_tokens": 7105152}
|
| 7474 |
+
{"current_steps": 37275, "total_steps": 38480, "loss": 0.0815, "lr": 1.4945692107973808e-07, "epoch": 19.373700623700625, "percentage": 96.87, "elapsed_time": "1:29:48", "remaining_time": "0:02:54", "throughput": 1318.88, "total_tokens": 7106208}
|
| 7475 |
+
{"current_steps": 37280, "total_steps": 38480, "loss": 0.124, "lr": 1.4822143561409317e-07, "epoch": 19.376299376299375, "percentage": 96.88, "elapsed_time": "1:29:48", "remaining_time": "0:02:53", "throughput": 1318.88, "total_tokens": 7107136}
|
| 7476 |
+
{"current_steps": 37285, "total_steps": 38480, "loss": 0.1934, "lr": 1.4699106275151975e-07, "epoch": 19.37889812889813, "percentage": 96.89, "elapsed_time": "1:29:49", "remaining_time": "0:02:52", "throughput": 1318.89, "total_tokens": 7108128}
|
| 7477 |
+
{"current_steps": 37290, "total_steps": 38480, "loss": 0.2367, "lr": 1.4576580274512642e-07, "epoch": 19.381496881496883, "percentage": 96.91, "elapsed_time": "1:29:50", "remaining_time": "0:02:52", "throughput": 1318.9, "total_tokens": 7109088}
|
| 7478 |
+
{"current_steps": 37295, "total_steps": 38480, "loss": 0.0762, "lr": 1.4454565584698654e-07, "epoch": 19.384095634095633, "percentage": 96.92, "elapsed_time": "1:29:50", "remaining_time": "0:02:51", "throughput": 1318.9, "total_tokens": 7109984}
|
| 7479 |
+
{"current_steps": 37300, "total_steps": 38480, "loss": 0.1094, "lr": 1.4333062230810769e-07, "epoch": 19.386694386694387, "percentage": 96.93, "elapsed_time": "1:29:51", "remaining_time": "0:02:50", "throughput": 1318.9, "total_tokens": 7110912}
|
| 7480 |
+
{"current_steps": 37305, "total_steps": 38480, "loss": 0.1125, "lr": 1.4212070237845378e-07, "epoch": 19.38929313929314, "percentage": 96.95, "elapsed_time": "1:29:52", "remaining_time": "0:02:49", "throughput": 1318.9, "total_tokens": 7111872}
|
| 7481 |
+
{"current_steps": 37310, "total_steps": 38480, "loss": 0.0794, "lr": 1.409158963069368e-07, "epoch": 19.39189189189189, "percentage": 96.96, "elapsed_time": "1:29:52", "remaining_time": "0:02:49", "throughput": 1318.91, "total_tokens": 7112832}
|
| 7482 |
+
{"current_steps": 37315, "total_steps": 38480, "loss": 0.1149, "lr": 1.3971620434140852e-07, "epoch": 19.394490644490645, "percentage": 96.97, "elapsed_time": "1:29:53", "remaining_time": "0:02:48", "throughput": 1318.91, "total_tokens": 7113728}
|
| 7483 |
+
{"current_steps": 37320, "total_steps": 38480, "loss": 0.0851, "lr": 1.3852162672867708e-07, "epoch": 19.397089397089395, "percentage": 96.99, "elapsed_time": "1:29:54", "remaining_time": "0:02:47", "throughput": 1318.91, "total_tokens": 7114656}
|
| 7484 |
+
{"current_steps": 37325, "total_steps": 38480, "loss": 0.113, "lr": 1.3733216371449586e-07, "epoch": 19.39968814968815, "percentage": 97.0, "elapsed_time": "1:29:55", "remaining_time": "0:02:46", "throughput": 1318.91, "total_tokens": 7115616}
|
| 7485 |
+
{"current_steps": 37330, "total_steps": 38480, "loss": 0.2061, "lr": 1.361478155435636e-07, "epoch": 19.402286902286903, "percentage": 97.01, "elapsed_time": "1:29:55", "remaining_time": "0:02:46", "throughput": 1318.93, "total_tokens": 7116640}
|
| 7486 |
+
{"current_steps": 37335, "total_steps": 38480, "loss": 0.0904, "lr": 1.349685824595326e-07, "epoch": 19.404885654885653, "percentage": 97.02, "elapsed_time": "1:29:56", "remaining_time": "0:02:45", "throughput": 1318.94, "total_tokens": 7117600}
|
| 7487 |
+
{"current_steps": 37340, "total_steps": 38480, "loss": 0.1449, "lr": 1.3379446470499767e-07, "epoch": 19.407484407484407, "percentage": 97.04, "elapsed_time": "1:29:57", "remaining_time": "0:02:44", "throughput": 1318.96, "total_tokens": 7118624}
|
| 7488 |
+
{"current_steps": 37345, "total_steps": 38480, "loss": 0.1969, "lr": 1.32625462521499e-07, "epoch": 19.41008316008316, "percentage": 97.05, "elapsed_time": "1:29:57", "remaining_time": "0:02:44", "throughput": 1318.95, "total_tokens": 7119520}
|
| 7489 |
+
{"current_steps": 37350, "total_steps": 38480, "loss": 0.1411, "lr": 1.314615761495358e-07, "epoch": 19.41268191268191, "percentage": 97.06, "elapsed_time": "1:29:58", "remaining_time": "0:02:43", "throughput": 1318.96, "total_tokens": 7120480}
|
| 7490 |
+
{"current_steps": 37355, "total_steps": 38480, "loss": 0.188, "lr": 1.303028058285388e-07, "epoch": 19.415280665280665, "percentage": 97.08, "elapsed_time": "1:29:59", "remaining_time": "0:02:42", "throughput": 1318.97, "total_tokens": 7121440}
|
| 7491 |
+
{"current_steps": 37360, "total_steps": 38480, "loss": 0.1467, "lr": 1.2914915179690067e-07, "epoch": 19.41787941787942, "percentage": 97.09, "elapsed_time": "1:29:59", "remaining_time": "0:02:41", "throughput": 1318.96, "total_tokens": 7122336}
|
| 7492 |
+
{"current_steps": 37365, "total_steps": 38480, "loss": 0.3333, "lr": 1.2800061429195377e-07, "epoch": 19.42047817047817, "percentage": 97.1, "elapsed_time": "1:30:00", "remaining_time": "0:02:41", "throughput": 1318.96, "total_tokens": 7123264}
|
| 7493 |
+
{"current_steps": 37370, "total_steps": 38480, "loss": 0.0825, "lr": 1.268571935499785e-07, "epoch": 19.423076923076923, "percentage": 97.12, "elapsed_time": "1:30:01", "remaining_time": "0:02:40", "throughput": 1318.97, "total_tokens": 7124224}
|
| 7494 |
+
{"current_steps": 37375, "total_steps": 38480, "loss": 0.1454, "lr": 1.2571888980620615e-07, "epoch": 19.425675675675677, "percentage": 97.13, "elapsed_time": "1:30:02", "remaining_time": "0:02:39", "throughput": 1318.97, "total_tokens": 7125120}
|
| 7495 |
+
{"current_steps": 37380, "total_steps": 38480, "loss": 0.1334, "lr": 1.2458570329481057e-07, "epoch": 19.428274428274428, "percentage": 97.14, "elapsed_time": "1:30:02", "remaining_time": "0:02:38", "throughput": 1318.97, "total_tokens": 7126048}
|
| 7496 |
+
{"current_steps": 37385, "total_steps": 38480, "loss": 0.2864, "lr": 1.2345763424891632e-07, "epoch": 19.43087318087318, "percentage": 97.15, "elapsed_time": "1:30:03", "remaining_time": "0:02:38", "throughput": 1318.98, "total_tokens": 7127040}
|
| 7497 |
+
{"current_steps": 37390, "total_steps": 38480, "loss": 0.1467, "lr": 1.2233468290059614e-07, "epoch": 19.433471933471935, "percentage": 97.17, "elapsed_time": "1:30:04", "remaining_time": "0:02:37", "throughput": 1319.0, "total_tokens": 7128064}
|
| 7498 |
+
{"current_steps": 37395, "total_steps": 38480, "loss": 0.0583, "lr": 1.2121684948086243e-07, "epoch": 19.436070686070686, "percentage": 97.18, "elapsed_time": "1:30:04", "remaining_time": "0:02:36", "throughput": 1319.01, "total_tokens": 7129024}
|
| 7499 |
+
{"current_steps": 37400, "total_steps": 38480, "loss": 0.1799, "lr": 1.2010413421968404e-07, "epoch": 19.43866943866944, "percentage": 97.19, "elapsed_time": "1:30:05", "remaining_time": "0:02:36", "throughput": 1319.01, "total_tokens": 7129952}
|
| 7500 |
+
{"current_steps": 37405, "total_steps": 38480, "loss": 0.1727, "lr": 1.1899653734597505e-07, "epoch": 19.44126819126819, "percentage": 97.21, "elapsed_time": "1:30:06", "remaining_time": "0:02:35", "throughput": 1319.01, "total_tokens": 7130880}
|
| 7501 |
+
{"current_steps": 37410, "total_steps": 38480, "loss": 0.0662, "lr": 1.1789405908758932e-07, "epoch": 19.443866943866944, "percentage": 97.22, "elapsed_time": "1:30:06", "remaining_time": "0:02:34", "throughput": 1319.0, "total_tokens": 7131744}
|
| 7502 |
+
{"current_steps": 37415, "total_steps": 38480, "loss": 0.0779, "lr": 1.1679669967133434e-07, "epoch": 19.446465696465697, "percentage": 97.23, "elapsed_time": "1:30:07", "remaining_time": "0:02:33", "throughput": 1319.01, "total_tokens": 7132736}
|
| 7503 |
+
{"current_steps": 37420, "total_steps": 38480, "loss": 0.0444, "lr": 1.1570445932296837e-07, "epoch": 19.449064449064448, "percentage": 97.25, "elapsed_time": "1:30:08", "remaining_time": "0:02:33", "throughput": 1319.0, "total_tokens": 7133600}
|
| 7504 |
+
{"current_steps": 37425, "total_steps": 38480, "loss": 0.2544, "lr": 1.146173382671839e-07, "epoch": 19.4516632016632, "percentage": 97.26, "elapsed_time": "1:30:09", "remaining_time": "0:02:32", "throughput": 1319.0, "total_tokens": 7134528}
|
| 7505 |
+
{"current_steps": 37430, "total_steps": 38480, "loss": 0.0716, "lr": 1.1353533672763261e-07, "epoch": 19.454261954261955, "percentage": 97.27, "elapsed_time": "1:30:09", "remaining_time": "0:02:31", "throughput": 1319.01, "total_tokens": 7135520}
|
| 7506 |
+
{"current_steps": 37435, "total_steps": 38480, "loss": 0.1168, "lr": 1.124584549269031e-07, "epoch": 19.456860706860706, "percentage": 97.28, "elapsed_time": "1:30:10", "remaining_time": "0:02:31", "throughput": 1319.03, "total_tokens": 7136544}
|
| 7507 |
+
{"current_steps": 37440, "total_steps": 38480, "loss": 0.103, "lr": 1.1138669308654314e-07, "epoch": 19.45945945945946, "percentage": 97.3, "elapsed_time": "1:30:11", "remaining_time": "0:02:30", "throughput": 1319.02, "total_tokens": 7137376}
|
| 7508 |
+
{"current_steps": 37445, "total_steps": 38480, "loss": 0.0263, "lr": 1.1032005142703195e-07, "epoch": 19.462058212058214, "percentage": 97.31, "elapsed_time": "1:30:11", "remaining_time": "0:02:29", "throughput": 1319.0, "total_tokens": 7138208}
|
| 7509 |
+
{"current_steps": 37450, "total_steps": 38480, "loss": 0.2115, "lr": 1.0925853016780784e-07, "epoch": 19.464656964656964, "percentage": 97.32, "elapsed_time": "1:30:12", "remaining_time": "0:02:28", "throughput": 1319.02, "total_tokens": 7139232}
|
| 7510 |
+
{"current_steps": 37455, "total_steps": 38480, "loss": 0.1168, "lr": 1.0820212952724896e-07, "epoch": 19.467255717255718, "percentage": 97.34, "elapsed_time": "1:30:13", "remaining_time": "0:02:28", "throughput": 1319.03, "total_tokens": 7140192}
|
| 7511 |
+
{"current_steps": 37460, "total_steps": 38480, "loss": 0.1024, "lr": 1.0715084972268418e-07, "epoch": 19.46985446985447, "percentage": 97.35, "elapsed_time": "1:30:13", "remaining_time": "0:02:27", "throughput": 1319.04, "total_tokens": 7141152}
|
| 7512 |
+
{"current_steps": 37465, "total_steps": 38480, "loss": 0.1656, "lr": 1.06104690970385e-07, "epoch": 19.472453222453222, "percentage": 97.36, "elapsed_time": "1:30:14", "remaining_time": "0:02:26", "throughput": 1319.06, "total_tokens": 7142208}
|
| 7513 |
+
{"current_steps": 37470, "total_steps": 38480, "loss": 0.1165, "lr": 1.0506365348557091e-07, "epoch": 19.475051975051976, "percentage": 97.38, "elapsed_time": "1:30:15", "remaining_time": "0:02:25", "throughput": 1319.06, "total_tokens": 7143136}
|
| 7514 |
+
{"current_steps": 37475, "total_steps": 38480, "loss": 0.1917, "lr": 1.0402773748240947e-07, "epoch": 19.477650727650726, "percentage": 97.39, "elapsed_time": "1:30:16", "remaining_time": "0:02:25", "throughput": 1319.07, "total_tokens": 7144128}
|
| 7515 |
+
{"current_steps": 37480, "total_steps": 38480, "loss": 0.0816, "lr": 1.0299694317401353e-07, "epoch": 19.48024948024948, "percentage": 97.4, "elapsed_time": "1:30:16", "remaining_time": "0:02:24", "throughput": 1319.08, "total_tokens": 7145120}
|
| 7516 |
+
{"current_steps": 37485, "total_steps": 38480, "loss": 0.0851, "lr": 1.0197127077244129e-07, "epoch": 19.482848232848234, "percentage": 97.41, "elapsed_time": "1:30:17", "remaining_time": "0:02:23", "throughput": 1319.08, "total_tokens": 7146048}
|
| 7517 |
+
{"current_steps": 37490, "total_steps": 38480, "loss": 0.1408, "lr": 1.0095072048869614e-07, "epoch": 19.485446985446984, "percentage": 97.43, "elapsed_time": "1:30:18", "remaining_time": "0:02:23", "throughput": 1319.08, "total_tokens": 7146944}
|
| 7518 |
+
{"current_steps": 37495, "total_steps": 38480, "loss": 0.116, "lr": 9.993529253273237e-08, "epoch": 19.488045738045738, "percentage": 97.44, "elapsed_time": "1:30:18", "remaining_time": "0:02:22", "throughput": 1319.09, "total_tokens": 7147904}
|
| 7519 |
+
{"current_steps": 37500, "total_steps": 38480, "loss": 0.1881, "lr": 9.892498711344678e-08, "epoch": 19.490644490644492, "percentage": 97.45, "elapsed_time": "1:30:19", "remaining_time": "0:02:21", "throughput": 1319.11, "total_tokens": 7148960}
|
| 7520 |
+
{"current_steps": 37505, "total_steps": 38480, "loss": 0.099, "lr": 9.791980443868421e-08, "epoch": 19.493243243243242, "percentage": 97.47, "elapsed_time": "1:30:20", "remaining_time": "0:02:20", "throughput": 1319.12, "total_tokens": 7149920}
|
| 7521 |
+
{"current_steps": 37510, "total_steps": 38480, "loss": 0.0691, "lr": 9.691974471523202e-08, "epoch": 19.495841995841996, "percentage": 97.48, "elapsed_time": "1:30:20", "remaining_time": "0:02:20", "throughput": 1319.12, "total_tokens": 7150880}
|
| 7522 |
+
{"current_steps": 37515, "total_steps": 38480, "loss": 0.1603, "lr": 9.592480814882843e-08, "epoch": 19.49844074844075, "percentage": 97.49, "elapsed_time": "1:30:21", "remaining_time": "0:02:19", "throughput": 1319.11, "total_tokens": 7151744}
|
| 7523 |
+
{"current_steps": 37520, "total_steps": 38480, "loss": 0.1808, "lr": 9.493499494415414e-08, "epoch": 19.5010395010395, "percentage": 97.51, "elapsed_time": "1:30:22", "remaining_time": "0:02:18", "throughput": 1319.13, "total_tokens": 7152768}
|
| 7524 |
+
{"current_steps": 37525, "total_steps": 38480, "loss": 0.1729, "lr": 9.395030530484072e-08, "epoch": 19.503638253638254, "percentage": 97.52, "elapsed_time": "1:30:23", "remaining_time": "0:02:18", "throughput": 1319.16, "total_tokens": 7153856}
|
| 7525 |
+
{"current_steps": 37530, "total_steps": 38480, "loss": 0.091, "lr": 9.297073943345946e-08, "epoch": 19.506237006237008, "percentage": 97.53, "elapsed_time": "1:30:23", "remaining_time": "0:02:17", "throughput": 1319.16, "total_tokens": 7154784}
|
| 7526 |
+
{"current_steps": 37535, "total_steps": 38480, "loss": 0.158, "lr": 9.199629753152972e-08, "epoch": 19.508835758835758, "percentage": 97.54, "elapsed_time": "1:30:24", "remaining_time": "0:02:16", "throughput": 1319.19, "total_tokens": 7155840}
|
| 7527 |
+
{"current_steps": 37540, "total_steps": 38480, "loss": 0.226, "lr": 9.102697979951613e-08, "epoch": 19.511434511434512, "percentage": 97.56, "elapsed_time": "1:30:25", "remaining_time": "0:02:15", "throughput": 1319.21, "total_tokens": 7156864}
|
| 7528 |
+
{"current_steps": 37545, "total_steps": 38480, "loss": 0.132, "lr": 9.006278643683696e-08, "epoch": 19.514033264033262, "percentage": 97.57, "elapsed_time": "1:30:25", "remaining_time": "0:02:15", "throughput": 1319.21, "total_tokens": 7157824}
|
| 7529 |
+
{"current_steps": 37550, "total_steps": 38480, "loss": 0.1228, "lr": 8.910371764184466e-08, "epoch": 19.516632016632016, "percentage": 97.58, "elapsed_time": "1:30:26", "remaining_time": "0:02:14", "throughput": 1319.21, "total_tokens": 7158752}
|
| 7530 |
+
{"current_steps": 37555, "total_steps": 38480, "loss": 0.1842, "lr": 8.814977361184251e-08, "epoch": 19.51923076923077, "percentage": 97.6, "elapsed_time": "1:30:27", "remaining_time": "0:02:13", "throughput": 1319.21, "total_tokens": 7159680}
|
| 7531 |
+
{"current_steps": 37560, "total_steps": 38480, "loss": 0.1641, "lr": 8.720095454308186e-08, "epoch": 19.52182952182952, "percentage": 97.61, "elapsed_time": "1:30:27", "remaining_time": "0:02:12", "throughput": 1319.2, "total_tokens": 7160544}
|
| 7532 |
+
{"current_steps": 37565, "total_steps": 38480, "loss": 0.1159, "lr": 8.625726063075656e-08, "epoch": 19.524428274428274, "percentage": 97.62, "elapsed_time": "1:30:28", "remaining_time": "0:02:12", "throughput": 1319.2, "total_tokens": 7161472}
|
| 7533 |
+
{"current_steps": 37570, "total_steps": 38480, "loss": 0.116, "lr": 8.531869206900578e-08, "epoch": 19.527027027027028, "percentage": 97.64, "elapsed_time": "1:30:29", "remaining_time": "0:02:11", "throughput": 1319.2, "total_tokens": 7162400}
|
| 7534 |
+
{"current_steps": 37575, "total_steps": 38480, "loss": 0.1517, "lr": 8.438524905091393e-08, "epoch": 19.52962577962578, "percentage": 97.65, "elapsed_time": "1:30:30", "remaining_time": "0:02:10", "throughput": 1319.21, "total_tokens": 7163360}
|
| 7535 |
+
{"current_steps": 37580, "total_steps": 38480, "loss": 0.254, "lr": 8.34569317685191e-08, "epoch": 19.532224532224532, "percentage": 97.66, "elapsed_time": "1:30:30", "remaining_time": "0:02:10", "throughput": 1319.2, "total_tokens": 7164256}
|
| 7536 |
+
{"current_steps": 37585, "total_steps": 38480, "loss": 0.2045, "lr": 8.253374041279072e-08, "epoch": 19.534823284823286, "percentage": 97.67, "elapsed_time": "1:30:31", "remaining_time": "0:02:09", "throughput": 1319.21, "total_tokens": 7165184}
|
| 7537 |
+
{"current_steps": 37590, "total_steps": 38480, "loss": 0.2791, "lr": 8.161567517365465e-08, "epoch": 19.537422037422036, "percentage": 97.69, "elapsed_time": "1:30:32", "remaining_time": "0:02:08", "throughput": 1319.22, "total_tokens": 7166176}
|
| 7538 |
+
{"current_steps": 37595, "total_steps": 38480, "loss": 0.0602, "lr": 8.070273623997926e-08, "epoch": 19.54002079002079, "percentage": 97.7, "elapsed_time": "1:30:32", "remaining_time": "0:02:07", "throughput": 1319.22, "total_tokens": 7167136}
|
| 7539 |
+
{"current_steps": 37600, "total_steps": 38480, "loss": 0.1672, "lr": 7.979492379957543e-08, "epoch": 19.542619542619544, "percentage": 97.71, "elapsed_time": "1:30:33", "remaining_time": "0:02:07", "throughput": 1319.22, "total_tokens": 7168032}
|
| 7540 |
+
{"current_steps": 37605, "total_steps": 38480, "loss": 0.1591, "lr": 7.889223803920764e-08, "epoch": 19.545218295218294, "percentage": 97.73, "elapsed_time": "1:30:34", "remaining_time": "0:02:06", "throughput": 1319.22, "total_tokens": 7168960}
|
| 7541 |
+
{"current_steps": 37610, "total_steps": 38480, "loss": 0.1352, "lr": 7.799467914457181e-08, "epoch": 19.54781704781705, "percentage": 97.74, "elapsed_time": "1:30:34", "remaining_time": "0:02:05", "throughput": 1319.21, "total_tokens": 7169824}
|
| 7542 |
+
{"current_steps": 37615, "total_steps": 38480, "loss": 0.0904, "lr": 7.7102247300323e-08, "epoch": 19.5504158004158, "percentage": 97.75, "elapsed_time": "1:30:35", "remaining_time": "0:02:04", "throughput": 1319.22, "total_tokens": 7170784}
|
| 7543 |
+
{"current_steps": 37620, "total_steps": 38480, "loss": 0.1026, "lr": 7.621494269005324e-08, "epoch": 19.553014553014552, "percentage": 97.77, "elapsed_time": "1:30:36", "remaining_time": "0:02:04", "throughput": 1319.23, "total_tokens": 7171808}
|
| 7544 |
+
{"current_steps": 37625, "total_steps": 38480, "loss": 0.0925, "lr": 7.533276549630264e-08, "epoch": 19.555613305613306, "percentage": 97.78, "elapsed_time": "1:30:37", "remaining_time": "0:02:03", "throughput": 1319.24, "total_tokens": 7172768}
|
| 7545 |
+
{"current_steps": 37630, "total_steps": 38480, "loss": 0.145, "lr": 7.445571590055655e-08, "epoch": 19.558212058212057, "percentage": 97.79, "elapsed_time": "1:30:37", "remaining_time": "0:02:02", "throughput": 1319.24, "total_tokens": 7173696}
|
| 7546 |
+
{"current_steps": 37635, "total_steps": 38480, "loss": 0.1111, "lr": 7.358379408324289e-08, "epoch": 19.56081081081081, "percentage": 97.8, "elapsed_time": "1:30:38", "remaining_time": "0:02:02", "throughput": 1319.25, "total_tokens": 7174688}
|
| 7547 |
+
{"current_steps": 37640, "total_steps": 38480, "loss": 0.1121, "lr": 7.271700022374039e-08, "epoch": 19.563409563409564, "percentage": 97.82, "elapsed_time": "1:30:39", "remaining_time": "0:02:01", "throughput": 1319.27, "total_tokens": 7175680}
|
| 7548 |
+
{"current_steps": 37645, "total_steps": 38480, "loss": 0.0447, "lr": 7.185533450036752e-08, "epoch": 19.566008316008315, "percentage": 97.83, "elapsed_time": "1:30:39", "remaining_time": "0:02:00", "throughput": 1319.26, "total_tokens": 7176576}
|
| 7549 |
+
{"current_steps": 37650, "total_steps": 38480, "loss": 0.1834, "lr": 7.099879709038804e-08, "epoch": 19.56860706860707, "percentage": 97.84, "elapsed_time": "1:30:40", "remaining_time": "0:01:59", "throughput": 1319.26, "total_tokens": 7177504}
|
| 7550 |
+
{"current_steps": 37655, "total_steps": 38480, "loss": 0.1028, "lr": 7.014738817001654e-08, "epoch": 19.571205821205822, "percentage": 97.86, "elapsed_time": "1:30:41", "remaining_time": "0:01:59", "throughput": 1319.27, "total_tokens": 7178496}
|
| 7551 |
+
{"current_steps": 37660, "total_steps": 38480, "loss": 0.1187, "lr": 6.93011079144018e-08, "epoch": 19.573804573804573, "percentage": 97.87, "elapsed_time": "1:30:41", "remaining_time": "0:01:58", "throughput": 1319.28, "total_tokens": 7179424}
|
| 7552 |
+
{"current_steps": 37665, "total_steps": 38480, "loss": 0.1855, "lr": 6.845995649764625e-08, "epoch": 19.576403326403327, "percentage": 97.88, "elapsed_time": "1:30:42", "remaining_time": "0:01:57", "throughput": 1319.29, "total_tokens": 7180448}
|
| 7553 |
+
{"current_steps": 37670, "total_steps": 38480, "loss": 0.0782, "lr": 6.762393409279755e-08, "epoch": 19.57900207900208, "percentage": 97.9, "elapsed_time": "1:30:43", "remaining_time": "0:01:57", "throughput": 1319.3, "total_tokens": 7181376}
|
| 7554 |
+
{"current_steps": 37675, "total_steps": 38480, "loss": 0.0625, "lr": 6.679304087184313e-08, "epoch": 19.58160083160083, "percentage": 97.91, "elapsed_time": "1:30:44", "remaining_time": "0:01:56", "throughput": 1319.3, "total_tokens": 7182304}
|
| 7555 |
+
{"current_steps": 37680, "total_steps": 38480, "loss": 0.0705, "lr": 6.596727700571847e-08, "epoch": 19.584199584199585, "percentage": 97.92, "elapsed_time": "1:30:44", "remaining_time": "0:01:55", "throughput": 1319.32, "total_tokens": 7183328}
|
| 7556 |
+
{"current_steps": 37685, "total_steps": 38480, "loss": 0.2967, "lr": 6.51466426643016e-08, "epoch": 19.58679833679834, "percentage": 97.93, "elapsed_time": "1:30:45", "remaining_time": "0:01:54", "throughput": 1319.31, "total_tokens": 7184224}
|
| 7557 |
+
{"current_steps": 37690, "total_steps": 38480, "loss": 0.0542, "lr": 6.433113801641577e-08, "epoch": 19.58939708939709, "percentage": 97.95, "elapsed_time": "1:30:46", "remaining_time": "0:01:54", "throughput": 1319.3, "total_tokens": 7185120}
|
| 7558 |
+
{"current_steps": 37695, "total_steps": 38480, "loss": 0.1538, "lr": 6.352076322983236e-08, "epoch": 19.591995841995843, "percentage": 97.96, "elapsed_time": "1:30:46", "remaining_time": "0:01:53", "throughput": 1319.33, "total_tokens": 7186176}
|
| 7559 |
+
{"current_steps": 37700, "total_steps": 38480, "loss": 0.0815, "lr": 6.271551847126245e-08, "epoch": 19.594594594594593, "percentage": 97.97, "elapsed_time": "1:30:47", "remaining_time": "0:01:52", "throughput": 1319.35, "total_tokens": 7187200}
|
| 7560 |
+
{"current_steps": 37705, "total_steps": 38480, "loss": 0.1119, "lr": 6.191540390636796e-08, "epoch": 19.597193347193347, "percentage": 97.99, "elapsed_time": "1:30:48", "remaining_time": "0:01:51", "throughput": 1319.35, "total_tokens": 7188128}
|
| 7561 |
+
{"current_steps": 37710, "total_steps": 38480, "loss": 0.1103, "lr": 6.112041969974503e-08, "epoch": 19.5997920997921, "percentage": 98.0, "elapsed_time": "1:30:48", "remaining_time": "0:01:51", "throughput": 1319.35, "total_tokens": 7189056}
|
| 7562 |
+
{"current_steps": 37715, "total_steps": 38480, "loss": 0.2608, "lr": 6.033056601494613e-08, "epoch": 19.60239085239085, "percentage": 98.01, "elapsed_time": "1:30:49", "remaining_time": "0:01:50", "throughput": 1319.35, "total_tokens": 7189984}
|
| 7563 |
+
{"current_steps": 37720, "total_steps": 38480, "loss": 0.1168, "lr": 5.9545843014460754e-08, "epoch": 19.604989604989605, "percentage": 98.02, "elapsed_time": "1:30:50", "remaining_time": "0:01:49", "throughput": 1319.36, "total_tokens": 7190976}
|
| 7564 |
+
{"current_steps": 37725, "total_steps": 38480, "loss": 0.2052, "lr": 5.876625085972642e-08, "epoch": 19.60758835758836, "percentage": 98.04, "elapsed_time": "1:30:51", "remaining_time": "0:01:49", "throughput": 1319.36, "total_tokens": 7191904}
|
| 7565 |
+
{"current_steps": 37730, "total_steps": 38480, "loss": 0.112, "lr": 5.799178971112318e-08, "epoch": 19.61018711018711, "percentage": 98.05, "elapsed_time": "1:30:51", "remaining_time": "0:01:48", "throughput": 1319.36, "total_tokens": 7192800}
|
| 7566 |
+
{"current_steps": 37735, "total_steps": 38480, "loss": 0.2336, "lr": 5.722245972797635e-08, "epoch": 19.612785862785863, "percentage": 98.06, "elapsed_time": "1:30:52", "remaining_time": "0:01:47", "throughput": 1319.35, "total_tokens": 7193696}
|
| 7567 |
+
{"current_steps": 37740, "total_steps": 38480, "loss": 0.1431, "lr": 5.6458261068553787e-08, "epoch": 19.615384615384617, "percentage": 98.08, "elapsed_time": "1:30:53", "remaining_time": "0:01:46", "throughput": 1319.36, "total_tokens": 7194656}
|
| 7568 |
+
{"current_steps": 37745, "total_steps": 38480, "loss": 0.0485, "lr": 5.5699193890074165e-08, "epoch": 19.617983367983367, "percentage": 98.09, "elapsed_time": "1:30:53", "remaining_time": "0:01:46", "throughput": 1319.36, "total_tokens": 7195584}
|
| 7569 |
+
{"current_steps": 37750, "total_steps": 38480, "loss": 0.0551, "lr": 5.4945258348690354e-08, "epoch": 19.62058212058212, "percentage": 98.1, "elapsed_time": "1:30:54", "remaining_time": "0:01:45", "throughput": 1319.36, "total_tokens": 7196512}
|
| 7570 |
+
{"current_steps": 37755, "total_steps": 38480, "loss": 0.1734, "lr": 5.419645459950606e-08, "epoch": 19.623180873180875, "percentage": 98.12, "elapsed_time": "1:30:55", "remaining_time": "0:01:44", "throughput": 1319.37, "total_tokens": 7197440}
|
| 7571 |
+
{"current_steps": 37760, "total_steps": 38480, "loss": 0.0855, "lr": 5.345278279656751e-08, "epoch": 19.625779625779625, "percentage": 98.13, "elapsed_time": "1:30:55", "remaining_time": "0:01:44", "throughput": 1319.37, "total_tokens": 7198400}
|
| 7572 |
+
{"current_steps": 37765, "total_steps": 38480, "loss": 0.1552, "lr": 5.271424309286899e-08, "epoch": 19.62837837837838, "percentage": 98.14, "elapsed_time": "1:30:56", "remaining_time": "0:01:43", "throughput": 1319.39, "total_tokens": 7199424}
|
| 7573 |
+
{"current_steps": 37770, "total_steps": 38480, "loss": 0.1076, "lr": 5.1980835640344505e-08, "epoch": 19.63097713097713, "percentage": 98.15, "elapsed_time": "1:30:57", "remaining_time": "0:01:42", "throughput": 1319.4, "total_tokens": 7200384}
|
| 7574 |
+
{"current_steps": 37775, "total_steps": 38480, "loss": 0.1272, "lr": 5.12525605898706e-08, "epoch": 19.633575883575883, "percentage": 98.17, "elapsed_time": "1:30:58", "remaining_time": "0:01:41", "throughput": 1319.42, "total_tokens": 7201440}
|
| 7575 |
+
{"current_steps": 37780, "total_steps": 38480, "loss": 0.1969, "lr": 5.052941809127465e-08, "epoch": 19.636174636174637, "percentage": 98.18, "elapsed_time": "1:30:58", "remaining_time": "0:01:41", "throughput": 1319.44, "total_tokens": 7202432}
|
| 7576 |
+
{"current_steps": 37785, "total_steps": 38480, "loss": 0.0665, "lr": 4.9811408293320984e-08, "epoch": 19.638773388773387, "percentage": 98.19, "elapsed_time": "1:30:59", "remaining_time": "0:01:40", "throughput": 1319.44, "total_tokens": 7203392}
|
| 7577 |
+
{"current_steps": 37790, "total_steps": 38480, "loss": 0.0928, "lr": 4.9098531343724773e-08, "epoch": 19.64137214137214, "percentage": 98.21, "elapsed_time": "1:31:00", "remaining_time": "0:01:39", "throughput": 1319.45, "total_tokens": 7204352}
|
| 7578 |
+
{"current_steps": 37795, "total_steps": 38480, "loss": 0.0956, "lr": 4.839078738913816e-08, "epoch": 19.643970893970895, "percentage": 98.22, "elapsed_time": "1:31:00", "remaining_time": "0:01:38", "throughput": 1319.45, "total_tokens": 7205280}
|
| 7579 |
+
{"current_steps": 37800, "total_steps": 38480, "loss": 0.1632, "lr": 4.768817657516411e-08, "epoch": 19.646569646569645, "percentage": 98.23, "elapsed_time": "1:31:01", "remaining_time": "0:01:38", "throughput": 1319.46, "total_tokens": 7206240}
|
| 7580 |
+
{"current_steps": 37805, "total_steps": 38480, "loss": 0.1256, "lr": 4.699069904634257e-08, "epoch": 19.6491683991684, "percentage": 98.25, "elapsed_time": "1:31:02", "remaining_time": "0:01:37", "throughput": 1319.47, "total_tokens": 7207200}
|
| 7581 |
+
{"current_steps": 37810, "total_steps": 38480, "loss": 0.1054, "lr": 4.629835494616708e-08, "epoch": 19.651767151767153, "percentage": 98.26, "elapsed_time": "1:31:02", "remaining_time": "0:01:36", "throughput": 1319.48, "total_tokens": 7208192}
|
| 7582 |
+
{"current_steps": 37815, "total_steps": 38480, "loss": 0.0788, "lr": 4.56111444170626e-08, "epoch": 19.654365904365903, "percentage": 98.27, "elapsed_time": "1:31:03", "remaining_time": "0:01:36", "throughput": 1319.49, "total_tokens": 7209152}
|
| 7583 |
+
{"current_steps": 37820, "total_steps": 38480, "loss": 0.1755, "lr": 4.492906760041049e-08, "epoch": 19.656964656964657, "percentage": 98.28, "elapsed_time": "1:31:04", "remaining_time": "0:01:35", "throughput": 1319.49, "total_tokens": 7210080}
|
| 7584 |
+
{"current_steps": 37825, "total_steps": 38480, "loss": 0.1456, "lr": 4.425212463652628e-08, "epoch": 19.65956340956341, "percentage": 98.3, "elapsed_time": "1:31:05", "remaining_time": "0:01:34", "throughput": 1319.5, "total_tokens": 7211072}
|
| 7585 |
+
{"current_steps": 37830, "total_steps": 38480, "loss": 0.0778, "lr": 4.358031566467635e-08, "epoch": 19.66216216216216, "percentage": 98.31, "elapsed_time": "1:31:05", "remaining_time": "0:01:33", "throughput": 1319.52, "total_tokens": 7212096}
|
| 7586 |
+
{"current_steps": 37835, "total_steps": 38480, "loss": 0.1097, "lr": 4.291364082306404e-08, "epoch": 19.664760914760915, "percentage": 98.32, "elapsed_time": "1:31:06", "remaining_time": "0:01:33", "throughput": 1319.53, "total_tokens": 7213088}
|
| 7587 |
+
{"current_steps": 37840, "total_steps": 38480, "loss": 0.0985, "lr": 4.2252100248843515e-08, "epoch": 19.66735966735967, "percentage": 98.34, "elapsed_time": "1:31:07", "remaining_time": "0:01:32", "throughput": 1319.53, "total_tokens": 7214016}
|
| 7588 |
+
{"current_steps": 37845, "total_steps": 38480, "loss": 0.1575, "lr": 4.1595694078108706e-08, "epoch": 19.66995841995842, "percentage": 98.35, "elapsed_time": "1:31:07", "remaining_time": "0:01:31", "throughput": 1319.52, "total_tokens": 7214880}
|
| 7589 |
+
{"current_steps": 37850, "total_steps": 38480, "loss": 0.0932, "lr": 4.09444224458988e-08, "epoch": 19.672557172557173, "percentage": 98.36, "elapsed_time": "1:31:08", "remaining_time": "0:01:31", "throughput": 1319.53, "total_tokens": 7215840}
|
| 7590 |
+
{"current_steps": 37855, "total_steps": 38480, "loss": 0.1637, "lr": 4.0298285486192746e-08, "epoch": 19.675155925155924, "percentage": 98.38, "elapsed_time": "1:31:09", "remaining_time": "0:01:30", "throughput": 1319.53, "total_tokens": 7216736}
|
| 7591 |
+
{"current_steps": 37860, "total_steps": 38480, "loss": 0.1308, "lr": 3.965728333192031e-08, "epoch": 19.677754677754677, "percentage": 98.39, "elapsed_time": "1:31:09", "remaining_time": "0:01:29", "throughput": 1319.53, "total_tokens": 7217664}
|
| 7592 |
+
{"current_steps": 37865, "total_steps": 38480, "loss": 0.0599, "lr": 3.902141611494548e-08, "epoch": 19.68035343035343, "percentage": 98.4, "elapsed_time": "1:31:10", "remaining_time": "0:01:28", "throughput": 1319.52, "total_tokens": 7218528}
|
| 7593 |
+
{"current_steps": 37870, "total_steps": 38480, "loss": 0.1197, "lr": 3.839068396608858e-08, "epoch": 19.68295218295218, "percentage": 98.41, "elapsed_time": "1:31:11", "remaining_time": "0:01:28", "throughput": 1319.53, "total_tokens": 7219520}
|
| 7594 |
+
{"current_steps": 37875, "total_steps": 38480, "loss": 0.2563, "lr": 3.776508701509862e-08, "epoch": 19.685550935550935, "percentage": 98.43, "elapsed_time": "1:31:11", "remaining_time": "0:01:27", "throughput": 1319.54, "total_tokens": 7220480}
|
| 7595 |
+
{"current_steps": 37880, "total_steps": 38480, "loss": 0.0439, "lr": 3.714462539068375e-08, "epoch": 19.68814968814969, "percentage": 98.44, "elapsed_time": "1:31:12", "remaining_time": "0:01:26", "throughput": 1319.54, "total_tokens": 7221408}
|
| 7596 |
+
{"current_steps": 37885, "total_steps": 38480, "loss": 0.1911, "lr": 3.652929922048076e-08, "epoch": 19.69074844074844, "percentage": 98.45, "elapsed_time": "1:31:13", "remaining_time": "0:01:25", "throughput": 1319.55, "total_tokens": 7222400}
|
| 7597 |
+
{"current_steps": 37890, "total_steps": 38480, "loss": 0.073, "lr": 3.591910863108006e-08, "epoch": 19.693347193347194, "percentage": 98.47, "elapsed_time": "1:31:14", "remaining_time": "0:01:25", "throughput": 1319.55, "total_tokens": 7223296}
|
| 7598 |
+
{"current_steps": 37895, "total_steps": 38480, "loss": 0.2109, "lr": 3.531405374801178e-08, "epoch": 19.695945945945947, "percentage": 98.48, "elapsed_time": "1:31:14", "remaining_time": "0:01:24", "throughput": 1319.55, "total_tokens": 7224192}
|
| 7599 |
+
{"current_steps": 37900, "total_steps": 38480, "loss": 0.1157, "lr": 3.471413469575135e-08, "epoch": 19.698544698544698, "percentage": 98.49, "elapsed_time": "1:31:15", "remaining_time": "0:01:23", "throughput": 1319.55, "total_tokens": 7225152}
|
| 7600 |
+
{"current_steps": 37905, "total_steps": 38480, "loss": 0.0884, "lr": 3.411935159771395e-08, "epoch": 19.70114345114345, "percentage": 98.51, "elapsed_time": "1:31:16", "remaining_time": "0:01:23", "throughput": 1319.56, "total_tokens": 7226144}
|
| 7601 |
+
{"current_steps": 37910, "total_steps": 38480, "loss": 0.2628, "lr": 3.352970457626281e-08, "epoch": 19.703742203742205, "percentage": 98.52, "elapsed_time": "1:31:16", "remaining_time": "0:01:22", "throughput": 1319.58, "total_tokens": 7227168}
|
| 7602 |
+
{"current_steps": 37915, "total_steps": 38480, "loss": 0.1184, "lr": 3.2945193752700887e-08, "epoch": 19.706340956340956, "percentage": 98.53, "elapsed_time": "1:31:17", "remaining_time": "0:01:21", "throughput": 1319.59, "total_tokens": 7228128}
|
| 7603 |
+
{"current_steps": 37920, "total_steps": 38480, "loss": 0.1395, "lr": 3.236581924727922e-08, "epoch": 19.70893970893971, "percentage": 98.54, "elapsed_time": "1:31:18", "remaining_time": "0:01:20", "throughput": 1319.6, "total_tokens": 7229088}
|
| 7604 |
+
{"current_steps": 37925, "total_steps": 38480, "loss": 0.096, "lr": 3.179158117918579e-08, "epoch": 19.71153846153846, "percentage": 98.56, "elapsed_time": "1:31:18", "remaining_time": "0:01:20", "throughput": 1319.59, "total_tokens": 7229984}
|
| 7605 |
+
{"current_steps": 37930, "total_steps": 38480, "loss": 0.0732, "lr": 3.1222479666553894e-08, "epoch": 19.714137214137214, "percentage": 98.57, "elapsed_time": "1:31:19", "remaining_time": "0:01:19", "throughput": 1319.6, "total_tokens": 7230944}
|
| 7606 |
+
{"current_steps": 37935, "total_steps": 38480, "loss": 0.2012, "lr": 3.065851482646487e-08, "epoch": 19.716735966735968, "percentage": 98.58, "elapsed_time": "1:31:20", "remaining_time": "0:01:18", "throughput": 1319.59, "total_tokens": 7231840}
|
| 7607 |
+
{"current_steps": 37940, "total_steps": 38480, "loss": 0.0818, "lr": 3.009968677493702e-08, "epoch": 19.719334719334718, "percentage": 98.6, "elapsed_time": "1:31:21", "remaining_time": "0:01:18", "throughput": 1319.6, "total_tokens": 7232800}
|
| 7608 |
+
{"current_steps": 37945, "total_steps": 38480, "loss": 0.0957, "lr": 2.9545995626936717e-08, "epoch": 19.721933471933472, "percentage": 98.61, "elapsed_time": "1:31:21", "remaining_time": "0:01:17", "throughput": 1319.6, "total_tokens": 7233728}
|
| 7609 |
+
{"current_steps": 37950, "total_steps": 38480, "loss": 0.0829, "lr": 2.8997441496370072e-08, "epoch": 19.724532224532226, "percentage": 98.62, "elapsed_time": "1:31:22", "remaining_time": "0:01:16", "throughput": 1319.59, "total_tokens": 7234624}
|
| 7610 |
+
{"current_steps": 37955, "total_steps": 38480, "loss": 0.0894, "lr": 2.8454024496085696e-08, "epoch": 19.727130977130976, "percentage": 98.64, "elapsed_time": "1:31:23", "remaining_time": "0:01:15", "throughput": 1319.6, "total_tokens": 7235584}
|
| 7611 |
+
{"current_steps": 37960, "total_steps": 38480, "loss": 0.068, "lr": 2.7915744737883055e-08, "epoch": 19.72972972972973, "percentage": 98.65, "elapsed_time": "1:31:23", "remaining_time": "0:01:15", "throughput": 1319.6, "total_tokens": 7236480}
|
| 7612 |
+
{"current_steps": 37965, "total_steps": 38480, "loss": 0.1573, "lr": 2.7382602332493013e-08, "epoch": 19.732328482328484, "percentage": 98.66, "elapsed_time": "1:31:24", "remaining_time": "0:01:14", "throughput": 1319.59, "total_tokens": 7237376}
|
| 7613 |
+
{"current_steps": 37970, "total_steps": 38480, "loss": 0.0566, "lr": 2.6854597389600055e-08, "epoch": 19.734927234927234, "percentage": 98.67, "elapsed_time": "1:31:25", "remaining_time": "0:01:13", "throughput": 1319.6, "total_tokens": 7238368}
|
| 7614 |
+
{"current_steps": 37975, "total_steps": 38480, "loss": 0.0858, "lr": 2.633173001782563e-08, "epoch": 19.737525987525988, "percentage": 98.69, "elapsed_time": "1:31:25", "remaining_time": "0:01:12", "throughput": 1319.61, "total_tokens": 7239328}
|
| 7615 |
+
{"current_steps": 37980, "total_steps": 38480, "loss": 0.0606, "lr": 2.581400032473369e-08, "epoch": 19.74012474012474, "percentage": 98.7, "elapsed_time": "1:31:26", "remaining_time": "0:01:12", "throughput": 1319.62, "total_tokens": 7240288}
|
| 7616 |
+
{"current_steps": 37985, "total_steps": 38480, "loss": 0.2136, "lr": 2.5301408416839035e-08, "epoch": 19.742723492723492, "percentage": 98.71, "elapsed_time": "1:31:27", "remaining_time": "0:01:11", "throughput": 1319.62, "total_tokens": 7241216}
|
| 7617 |
+
{"current_steps": 37990, "total_steps": 38480, "loss": 0.1287, "lr": 2.4793954399590647e-08, "epoch": 19.745322245322246, "percentage": 98.73, "elapsed_time": "1:31:28", "remaining_time": "0:01:10", "throughput": 1319.63, "total_tokens": 7242176}
|
| 7618 |
+
{"current_steps": 37995, "total_steps": 38480, "loss": 0.0474, "lr": 2.4291638377382798e-08, "epoch": 19.747920997921, "percentage": 98.74, "elapsed_time": "1:31:28", "remaining_time": "0:01:10", "throughput": 1319.63, "total_tokens": 7243104}
|
| 7619 |
+
{"current_steps": 38000, "total_steps": 38480, "loss": 0.0366, "lr": 2.3794460453555047e-08, "epoch": 19.75051975051975, "percentage": 98.75, "elapsed_time": "1:31:29", "remaining_time": "0:01:09", "throughput": 1319.62, "total_tokens": 7244000}
|
| 7620 |
+
{"current_steps": 38005, "total_steps": 38480, "loss": 0.0836, "lr": 2.330242073039224e-08, "epoch": 19.753118503118504, "percentage": 98.77, "elapsed_time": "1:31:30", "remaining_time": "0:01:08", "throughput": 1319.62, "total_tokens": 7244896}
|
| 7621 |
+
{"current_steps": 38010, "total_steps": 38480, "loss": 0.17, "lr": 2.2815519309113407e-08, "epoch": 19.755717255717254, "percentage": 98.78, "elapsed_time": "1:31:30", "remaining_time": "0:01:07", "throughput": 1319.61, "total_tokens": 7245792}
|
| 7622 |
+
{"current_steps": 38015, "total_steps": 38480, "loss": 0.0571, "lr": 2.2333756289885656e-08, "epoch": 19.758316008316008, "percentage": 98.79, "elapsed_time": "1:31:31", "remaining_time": "0:01:07", "throughput": 1319.62, "total_tokens": 7246784}
|
| 7623 |
+
{"current_steps": 38020, "total_steps": 38480, "loss": 0.1419, "lr": 2.1857131771824135e-08, "epoch": 19.760914760914762, "percentage": 98.8, "elapsed_time": "1:31:32", "remaining_time": "0:01:06", "throughput": 1319.63, "total_tokens": 7247744}
|
| 7624 |
+
{"current_steps": 38025, "total_steps": 38480, "loss": 0.0665, "lr": 2.13856458529782e-08, "epoch": 19.763513513513512, "percentage": 98.82, "elapsed_time": "1:31:32", "remaining_time": "0:01:05", "throughput": 1319.63, "total_tokens": 7248672}
|
| 7625 |
+
{"current_steps": 38030, "total_steps": 38480, "loss": 0.1946, "lr": 2.0919298630342477e-08, "epoch": 19.766112266112266, "percentage": 98.83, "elapsed_time": "1:31:33", "remaining_time": "0:01:05", "throughput": 1319.65, "total_tokens": 7249664}
|
| 7626 |
+
{"current_steps": 38035, "total_steps": 38480, "loss": 0.0527, "lr": 2.0458090199862446e-08, "epoch": 19.76871101871102, "percentage": 98.84, "elapsed_time": "1:31:34", "remaining_time": "0:01:04", "throughput": 1319.65, "total_tokens": 7250624}
|
| 7627 |
+
{"current_steps": 38040, "total_steps": 38480, "loss": 0.1543, "lr": 2.0002020656409436e-08, "epoch": 19.77130977130977, "percentage": 98.86, "elapsed_time": "1:31:35", "remaining_time": "0:01:03", "throughput": 1319.65, "total_tokens": 7251520}
|
| 7628 |
+
{"current_steps": 38045, "total_steps": 38480, "loss": 0.1873, "lr": 1.9551090093816727e-08, "epoch": 19.773908523908524, "percentage": 98.87, "elapsed_time": "1:31:35", "remaining_time": "0:01:02", "throughput": 1319.66, "total_tokens": 7252512}
|
| 7629 |
+
{"current_steps": 38050, "total_steps": 38480, "loss": 0.1015, "lr": 1.9105298604849e-08, "epoch": 19.776507276507278, "percentage": 98.88, "elapsed_time": "1:31:36", "remaining_time": "0:01:02", "throughput": 1319.7, "total_tokens": 7253632}
|
| 7630 |
+
{"current_steps": 38055, "total_steps": 38480, "loss": 0.1094, "lr": 1.8664646281213448e-08, "epoch": 19.77910602910603, "percentage": 98.9, "elapsed_time": "1:31:37", "remaining_time": "0:01:01", "throughput": 1319.71, "total_tokens": 7254624}
|
| 7631 |
+
{"current_steps": 38060, "total_steps": 38480, "loss": 0.1117, "lr": 1.8229133213565342e-08, "epoch": 19.781704781704782, "percentage": 98.91, "elapsed_time": "1:31:37", "remaining_time": "0:01:00", "throughput": 1319.72, "total_tokens": 7255616}
|
| 7632 |
+
{"current_steps": 38065, "total_steps": 38480, "loss": 0.2112, "lr": 1.779875949149967e-08, "epoch": 19.784303534303533, "percentage": 98.92, "elapsed_time": "1:31:38", "remaining_time": "0:00:59", "throughput": 1319.71, "total_tokens": 7256480}
|
| 7633 |
+
{"current_steps": 38070, "total_steps": 38480, "loss": 0.3019, "lr": 1.7373525203553953e-08, "epoch": 19.786902286902286, "percentage": 98.93, "elapsed_time": "1:31:39", "remaining_time": "0:00:59", "throughput": 1319.72, "total_tokens": 7257472}
|
| 7634 |
+
{"current_steps": 38075, "total_steps": 38480, "loss": 0.1467, "lr": 1.6953430437208206e-08, "epoch": 19.78950103950104, "percentage": 98.95, "elapsed_time": "1:31:39", "remaining_time": "0:00:58", "throughput": 1319.74, "total_tokens": 7258464}
|
| 7635 |
+
{"current_steps": 38080, "total_steps": 38480, "loss": 0.0945, "lr": 1.6538475278887743e-08, "epoch": 19.79209979209979, "percentage": 98.96, "elapsed_time": "1:31:40", "remaining_time": "0:00:57", "throughput": 1319.74, "total_tokens": 7259424}
|
| 7636 |
+
{"current_steps": 38085, "total_steps": 38480, "loss": 0.1508, "lr": 1.6128659813957613e-08, "epoch": 19.794698544698544, "percentage": 98.97, "elapsed_time": "1:31:41", "remaining_time": "0:00:57", "throughput": 1319.74, "total_tokens": 7260320}
|
| 7637 |
+
{"current_steps": 38090, "total_steps": 38480, "loss": 0.108, "lr": 1.5723984126728153e-08, "epoch": 19.7972972972973, "percentage": 98.99, "elapsed_time": "1:31:42", "remaining_time": "0:00:56", "throughput": 1319.75, "total_tokens": 7261312}
|
| 7638 |
+
{"current_steps": 38095, "total_steps": 38480, "loss": 0.1875, "lr": 1.5324448300449435e-08, "epoch": 19.79989604989605, "percentage": 99.0, "elapsed_time": "1:31:42", "remaining_time": "0:00:55", "throughput": 1319.74, "total_tokens": 7262208}
|
| 7639 |
+
{"current_steps": 38100, "total_steps": 38480, "loss": 0.1587, "lr": 1.493005241731682e-08, "epoch": 19.802494802494802, "percentage": 99.01, "elapsed_time": "1:31:43", "remaining_time": "0:00:54", "throughput": 1319.77, "total_tokens": 7263264}
|
| 7640 |
+
{"current_steps": 38105, "total_steps": 38480, "loss": 0.121, "lr": 1.4540796558465408e-08, "epoch": 19.805093555093556, "percentage": 99.03, "elapsed_time": "1:31:44", "remaining_time": "0:00:54", "throughput": 1319.76, "total_tokens": 7264128}
|
| 7641 |
+
{"current_steps": 38110, "total_steps": 38480, "loss": 0.1074, "lr": 1.4156680803972811e-08, "epoch": 19.807692307692307, "percentage": 99.04, "elapsed_time": "1:31:44", "remaining_time": "0:00:53", "throughput": 1319.77, "total_tokens": 7265120}
|
| 7642 |
+
{"current_steps": 38115, "total_steps": 38480, "loss": 0.1722, "lr": 1.3777705232864701e-08, "epoch": 19.81029106029106, "percentage": 99.05, "elapsed_time": "1:31:45", "remaining_time": "0:00:52", "throughput": 1319.79, "total_tokens": 7266144}
|
| 7643 |
+
{"current_steps": 38120, "total_steps": 38480, "loss": 0.0447, "lr": 1.3403869923103717e-08, "epoch": 19.812889812889814, "percentage": 99.06, "elapsed_time": "1:31:46", "remaining_time": "0:00:52", "throughput": 1319.79, "total_tokens": 7267072}
|
| 7644 |
+
{"current_steps": 38125, "total_steps": 38480, "loss": 0.342, "lr": 1.3035174951595009e-08, "epoch": 19.815488565488565, "percentage": 99.08, "elapsed_time": "1:31:46", "remaining_time": "0:00:51", "throughput": 1319.78, "total_tokens": 7267968}
|
| 7645 |
+
{"current_steps": 38130, "total_steps": 38480, "loss": 0.1225, "lr": 1.267162039418901e-08, "epoch": 19.81808731808732, "percentage": 99.09, "elapsed_time": "1:31:47", "remaining_time": "0:00:50", "throughput": 1319.77, "total_tokens": 7268832}
|
| 7646 |
+
{"current_steps": 38135, "total_steps": 38480, "loss": 0.2885, "lr": 1.2313206325681447e-08, "epoch": 19.820686070686072, "percentage": 99.1, "elapsed_time": "1:31:48", "remaining_time": "0:00:49", "throughput": 1319.77, "total_tokens": 7269760}
|
| 7647 |
+
{"current_steps": 38140, "total_steps": 38480, "loss": 0.1288, "lr": 1.1959932819799457e-08, "epoch": 19.823284823284823, "percentage": 99.12, "elapsed_time": "1:31:49", "remaining_time": "0:00:49", "throughput": 1319.78, "total_tokens": 7270720}
|
| 7648 |
+
{"current_steps": 38145, "total_steps": 38480, "loss": 0.2469, "lr": 1.1611799949226565e-08, "epoch": 19.825883575883577, "percentage": 99.13, "elapsed_time": "1:31:49", "remaining_time": "0:00:48", "throughput": 1319.79, "total_tokens": 7271712}
|
| 7649 |
+
{"current_steps": 38150, "total_steps": 38480, "loss": 0.1151, "lr": 1.1268807785577707e-08, "epoch": 19.828482328482327, "percentage": 99.14, "elapsed_time": "1:31:50", "remaining_time": "0:00:47", "throughput": 1319.79, "total_tokens": 7272608}
|
| 7650 |
+
{"current_steps": 38155, "total_steps": 38480, "loss": 0.1232, "lr": 1.0930956399415881e-08, "epoch": 19.83108108108108, "percentage": 99.16, "elapsed_time": "1:31:51", "remaining_time": "0:00:46", "throughput": 1319.79, "total_tokens": 7273536}
|
| 7651 |
+
{"current_steps": 38160, "total_steps": 38480, "loss": 0.1073, "lr": 1.0598245860243828e-08, "epoch": 19.833679833679835, "percentage": 99.17, "elapsed_time": "1:31:51", "remaining_time": "0:00:46", "throughput": 1319.78, "total_tokens": 7274432}
|
| 7652 |
+
{"current_steps": 38165, "total_steps": 38480, "loss": 0.154, "lr": 1.027067623650957e-08, "epoch": 19.836278586278585, "percentage": 99.18, "elapsed_time": "1:31:52", "remaining_time": "0:00:45", "throughput": 1319.79, "total_tokens": 7275424}
|
| 7653 |
+
{"current_steps": 38170, "total_steps": 38480, "loss": 0.1013, "lr": 9.948247595603643e-09, "epoch": 19.83887733887734, "percentage": 99.19, "elapsed_time": "1:31:53", "remaining_time": "0:00:44", "throughput": 1319.8, "total_tokens": 7276384}
|
| 7654 |
+
{"current_steps": 38175, "total_steps": 38480, "loss": 0.0745, "lr": 9.63096000385355e-09, "epoch": 19.841476091476093, "percentage": 99.21, "elapsed_time": "1:31:53", "remaining_time": "0:00:44", "throughput": 1319.8, "total_tokens": 7277312}
|
| 7655 |
+
{"current_steps": 38180, "total_steps": 38480, "loss": 0.1995, "lr": 9.318813526534853e-09, "epoch": 19.844074844074843, "percentage": 99.22, "elapsed_time": "1:31:54", "remaining_time": "0:00:43", "throughput": 1319.8, "total_tokens": 7278208}
|
| 7656 |
+
{"current_steps": 38185, "total_steps": 38480, "loss": 0.0667, "lr": 9.011808227865625e-09, "epoch": 19.846673596673597, "percentage": 99.23, "elapsed_time": "1:31:55", "remaining_time": "0:00:42", "throughput": 1319.79, "total_tokens": 7279104}
|
| 7657 |
+
{"current_steps": 38190, "total_steps": 38480, "loss": 0.1553, "lr": 8.70994417099813e-09, "epoch": 19.84927234927235, "percentage": 99.25, "elapsed_time": "1:31:56", "remaining_time": "0:00:41", "throughput": 1319.79, "total_tokens": 7280000}
|
| 7658 |
+
{"current_steps": 38195, "total_steps": 38480, "loss": 0.0845, "lr": 8.413221418041018e-09, "epoch": 19.8518711018711, "percentage": 99.26, "elapsed_time": "1:31:56", "remaining_time": "0:00:41", "throughput": 1319.79, "total_tokens": 7280928}
|
| 7659 |
+
{"current_steps": 38200, "total_steps": 38480, "loss": 0.2997, "lr": 8.121640030028798e-09, "epoch": 19.854469854469855, "percentage": 99.27, "elapsed_time": "1:31:57", "remaining_time": "0:00:40", "throughput": 1319.8, "total_tokens": 7281920}
|
| 7660 |
+
{"current_steps": 38205, "total_steps": 38480, "loss": 0.0814, "lr": 7.83520006695515e-09, "epoch": 19.85706860706861, "percentage": 99.29, "elapsed_time": "1:31:58", "remaining_time": "0:00:39", "throughput": 1319.8, "total_tokens": 7282848}
|
| 7661 |
+
{"current_steps": 38210, "total_steps": 38480, "loss": 0.1091, "lr": 7.55390158773961e-09, "epoch": 19.85966735966736, "percentage": 99.3, "elapsed_time": "1:31:58", "remaining_time": "0:00:38", "throughput": 1319.81, "total_tokens": 7283840}
|
| 7662 |
+
{"current_steps": 38215, "total_steps": 38480, "loss": 0.1994, "lr": 7.2777446502581006e-09, "epoch": 19.862266112266113, "percentage": 99.31, "elapsed_time": "1:31:59", "remaining_time": "0:00:38", "throughput": 1319.81, "total_tokens": 7284736}
|
| 7663 |
+
{"current_steps": 38220, "total_steps": 38480, "loss": 0.134, "lr": 7.006729311320737e-09, "epoch": 19.864864864864863, "percentage": 99.32, "elapsed_time": "1:32:00", "remaining_time": "0:00:37", "throughput": 1319.8, "total_tokens": 7285632}
|
| 7664 |
+
{"current_steps": 38225, "total_steps": 38480, "loss": 0.0445, "lr": 6.7408556266801425e-09, "epoch": 19.867463617463617, "percentage": 99.34, "elapsed_time": "1:32:00", "remaining_time": "0:00:36", "throughput": 1319.79, "total_tokens": 7286496}
|
| 7665 |
+
{"current_steps": 38230, "total_steps": 38480, "loss": 0.2176, "lr": 6.480123651034231e-09, "epoch": 19.87006237006237, "percentage": 99.35, "elapsed_time": "1:32:01", "remaining_time": "0:00:36", "throughput": 1319.79, "total_tokens": 7287424}
|
| 7666 |
+
{"current_steps": 38235, "total_steps": 38480, "loss": 0.0772, "lr": 6.224533438020652e-09, "epoch": 19.87266112266112, "percentage": 99.36, "elapsed_time": "1:32:02", "remaining_time": "0:00:35", "throughput": 1319.8, "total_tokens": 7288384}
|
| 7667 |
+
{"current_steps": 38240, "total_steps": 38480, "loss": 0.1487, "lr": 5.974085040222344e-09, "epoch": 19.875259875259875, "percentage": 99.38, "elapsed_time": "1:32:03", "remaining_time": "0:00:34", "throughput": 1319.79, "total_tokens": 7289280}
|
| 7668 |
+
{"current_steps": 38245, "total_steps": 38480, "loss": 0.0849, "lr": 5.728778509161981e-09, "epoch": 19.87785862785863, "percentage": 99.39, "elapsed_time": "1:32:03", "remaining_time": "0:00:33", "throughput": 1319.8, "total_tokens": 7290240}
|
| 7669 |
+
{"current_steps": 38250, "total_steps": 38480, "loss": 0.0841, "lr": 5.4886138953047505e-09, "epoch": 19.88045738045738, "percentage": 99.4, "elapsed_time": "1:32:04", "remaining_time": "0:00:33", "throughput": 1319.79, "total_tokens": 7291136}
|
| 7670 |
+
{"current_steps": 38255, "total_steps": 38480, "loss": 0.093, "lr": 5.253591248058354e-09, "epoch": 19.883056133056133, "percentage": 99.42, "elapsed_time": "1:32:05", "remaining_time": "0:00:32", "throughput": 1319.81, "total_tokens": 7292128}
|
| 7671 |
+
{"current_steps": 38260, "total_steps": 38480, "loss": 0.1742, "lr": 5.023710615773003e-09, "epoch": 19.885654885654887, "percentage": 99.43, "elapsed_time": "1:32:05", "remaining_time": "0:00:31", "throughput": 1319.8, "total_tokens": 7292992}
|
| 7672 |
+
{"current_steps": 38265, "total_steps": 38480, "loss": 0.1874, "lr": 4.798972045738647e-09, "epoch": 19.888253638253637, "percentage": 99.44, "elapsed_time": "1:32:06", "remaining_time": "0:00:31", "throughput": 1319.8, "total_tokens": 7293952}
|
| 7673 |
+
{"current_steps": 38270, "total_steps": 38480, "loss": 0.1791, "lr": 4.579375584190526e-09, "epoch": 19.89085239085239, "percentage": 99.45, "elapsed_time": "1:32:07", "remaining_time": "0:00:30", "throughput": 1319.81, "total_tokens": 7294912}
|
| 7674 |
+
{"current_steps": 38275, "total_steps": 38480, "loss": 0.0784, "lr": 4.3649212763036125e-09, "epoch": 19.893451143451145, "percentage": 99.47, "elapsed_time": "1:32:07", "remaining_time": "0:00:29", "throughput": 1319.8, "total_tokens": 7295808}
|
| 7675 |
+
{"current_steps": 38280, "total_steps": 38480, "loss": 0.1782, "lr": 4.1556091662009465e-09, "epoch": 19.896049896049895, "percentage": 99.48, "elapsed_time": "1:32:08", "remaining_time": "0:00:28", "throughput": 1319.81, "total_tokens": 7296768}
|
| 7676 |
+
{"current_steps": 38285, "total_steps": 38480, "loss": 0.1605, "lr": 3.951439296936976e-09, "epoch": 19.89864864864865, "percentage": 99.49, "elapsed_time": "1:32:09", "remaining_time": "0:00:28", "throughput": 1319.82, "total_tokens": 7297728}
|
| 7677 |
+
{"current_steps": 38290, "total_steps": 38480, "loss": 0.1042, "lr": 3.75241171051699e-09, "epoch": 19.901247401247403, "percentage": 99.51, "elapsed_time": "1:32:10", "remaining_time": "0:00:27", "throughput": 1319.82, "total_tokens": 7298656}
|
| 7678 |
+
{"current_steps": 38295, "total_steps": 38480, "loss": 0.0758, "lr": 3.558526447888788e-09, "epoch": 19.903846153846153, "percentage": 99.52, "elapsed_time": "1:32:10", "remaining_time": "0:00:26", "throughput": 1319.81, "total_tokens": 7299520}
|
| 7679 |
+
{"current_steps": 38300, "total_steps": 38480, "loss": 0.0772, "lr": 3.3697835489343575e-09, "epoch": 19.906444906444907, "percentage": 99.53, "elapsed_time": "1:32:11", "remaining_time": "0:00:25", "throughput": 1319.81, "total_tokens": 7300448}
|
| 7680 |
+
{"current_steps": 38305, "total_steps": 38480, "loss": 0.1193, "lr": 3.1861830524837487e-09, "epoch": 19.909043659043657, "percentage": 99.55, "elapsed_time": "1:32:12", "remaining_time": "0:00:25", "throughput": 1319.81, "total_tokens": 7301376}
|
| 7681 |
+
{"current_steps": 38310, "total_steps": 38480, "loss": 0.1364, "lr": 3.0077249963095246e-09, "epoch": 19.91164241164241, "percentage": 99.56, "elapsed_time": "1:32:12", "remaining_time": "0:00:24", "throughput": 1319.83, "total_tokens": 7302400}
|
| 7682 |
+
{"current_steps": 38315, "total_steps": 38480, "loss": 0.0549, "lr": 2.8344094171212086e-09, "epoch": 19.914241164241165, "percentage": 99.57, "elapsed_time": "1:32:13", "remaining_time": "0:00:23", "throughput": 1319.83, "total_tokens": 7303296}
|
| 7683 |
+
{"current_steps": 38320, "total_steps": 38480, "loss": 0.1727, "lr": 2.6662363505791656e-09, "epoch": 19.916839916839916, "percentage": 99.58, "elapsed_time": "1:32:14", "remaining_time": "0:00:23", "throughput": 1319.83, "total_tokens": 7304256}
|
| 7684 |
+
{"current_steps": 38325, "total_steps": 38480, "loss": 0.1189, "lr": 2.503205831277944e-09, "epoch": 19.91943866943867, "percentage": 99.6, "elapsed_time": "1:32:14", "remaining_time": "0:00:22", "throughput": 1319.83, "total_tokens": 7305184}
|
| 7685 |
+
{"current_steps": 38330, "total_steps": 38480, "loss": 0.1511, "lr": 2.3453178927573816e-09, "epoch": 19.922037422037423, "percentage": 99.61, "elapsed_time": "1:32:15", "remaining_time": "0:00:21", "throughput": 1319.84, "total_tokens": 7306144}
|
| 7686 |
+
{"current_steps": 38335, "total_steps": 38480, "loss": 0.1277, "lr": 2.1925725674970533e-09, "epoch": 19.924636174636174, "percentage": 99.62, "elapsed_time": "1:32:16", "remaining_time": "0:00:20", "throughput": 1319.85, "total_tokens": 7307104}
|
| 7687 |
+
{"current_steps": 38340, "total_steps": 38480, "loss": 0.0621, "lr": 2.0449698869218214e-09, "epoch": 19.927234927234927, "percentage": 99.64, "elapsed_time": "1:32:17", "remaining_time": "0:00:20", "throughput": 1319.86, "total_tokens": 7308096}
|
| 7688 |
+
{"current_steps": 38345, "total_steps": 38480, "loss": 0.1874, "lr": 1.902509881396286e-09, "epoch": 19.92983367983368, "percentage": 99.65, "elapsed_time": "1:32:17", "remaining_time": "0:00:19", "throughput": 1319.86, "total_tokens": 7309024}
|
| 7689 |
+
{"current_steps": 38350, "total_steps": 38480, "loss": 0.1682, "lr": 1.7651925802303347e-09, "epoch": 19.93243243243243, "percentage": 99.66, "elapsed_time": "1:32:18", "remaining_time": "0:00:18", "throughput": 1319.86, "total_tokens": 7309952}
|
| 7690 |
+
{"current_steps": 38355, "total_steps": 38480, "loss": 0.1273, "lr": 1.6330180116708172e-09, "epoch": 19.935031185031185, "percentage": 99.68, "elapsed_time": "1:32:19", "remaining_time": "0:00:18", "throughput": 1319.87, "total_tokens": 7310912}
|
| 7691 |
+
{"current_steps": 38360, "total_steps": 38480, "loss": 0.1036, "lr": 1.5059862029070947e-09, "epoch": 19.93762993762994, "percentage": 99.69, "elapsed_time": "1:32:19", "remaining_time": "0:00:17", "throughput": 1319.87, "total_tokens": 7311840}
|
| 7692 |
+
{"current_steps": 38365, "total_steps": 38480, "loss": 0.0845, "lr": 1.3840971800793689e-09, "epoch": 19.94022869022869, "percentage": 99.7, "elapsed_time": "1:32:20", "remaining_time": "0:00:16", "throughput": 1319.88, "total_tokens": 7312800}
|
| 7693 |
+
{"current_steps": 38370, "total_steps": 38480, "loss": 0.0855, "lr": 1.2673509682564755e-09, "epoch": 19.942827442827443, "percentage": 99.71, "elapsed_time": "1:32:21", "remaining_time": "0:00:15", "throughput": 1319.88, "total_tokens": 7313760}
|
| 7694 |
+
{"current_steps": 38375, "total_steps": 38480, "loss": 0.1138, "lr": 1.1557475914580895e-09, "epoch": 19.945426195426194, "percentage": 99.73, "elapsed_time": "1:32:21", "remaining_time": "0:00:15", "throughput": 1319.9, "total_tokens": 7314784}
|
| 7695 |
+
{"current_steps": 38380, "total_steps": 38480, "loss": 0.1106, "lr": 1.0492870726463987e-09, "epoch": 19.948024948024948, "percentage": 99.74, "elapsed_time": "1:32:22", "remaining_time": "0:00:14", "throughput": 1319.9, "total_tokens": 7315680}
|
| 7696 |
+
{"current_steps": 38385, "total_steps": 38480, "loss": 0.1334, "lr": 9.479694337205524e-10, "epoch": 19.9506237006237, "percentage": 99.75, "elapsed_time": "1:32:23", "remaining_time": "0:00:13", "throughput": 1319.91, "total_tokens": 7316672}
|
| 7697 |
+
{"current_steps": 38390, "total_steps": 38480, "loss": 0.0361, "lr": 8.517946955222123e-10, "epoch": 19.953222453222452, "percentage": 99.77, "elapsed_time": "1:32:24", "remaining_time": "0:00:12", "throughput": 1319.9, "total_tokens": 7317568}
|
| 7698 |
+
{"current_steps": 38395, "total_steps": 38480, "loss": 0.1069, "lr": 7.607628778383281e-10, "epoch": 19.955821205821206, "percentage": 99.78, "elapsed_time": "1:32:24", "remaining_time": "0:00:12", "throughput": 1319.89, "total_tokens": 7318400}
|
| 7699 |
+
{"current_steps": 38400, "total_steps": 38480, "loss": 0.0753, "lr": 6.748739993983622e-10, "epoch": 19.95841995841996, "percentage": 99.79, "elapsed_time": "1:32:25", "remaining_time": "0:00:11", "throughput": 1319.89, "total_tokens": 7319360}
|
| 7700 |
+
{"current_steps": 38405, "total_steps": 38480, "loss": 0.1131, "lr": 5.941280778687386e-10, "epoch": 19.96101871101871, "percentage": 99.81, "elapsed_time": "1:32:26", "remaining_time": "0:00:10", "throughput": 1319.89, "total_tokens": 7320288}
|
| 7701 |
+
{"current_steps": 38410, "total_steps": 38480, "loss": 0.0505, "lr": 5.185251298639449e-10, "epoch": 19.963617463617464, "percentage": 99.82, "elapsed_time": "1:32:26", "remaining_time": "0:00:10", "throughput": 1319.89, "total_tokens": 7321216}
|
| 7702 |
+
{"current_steps": 38415, "total_steps": 38480, "loss": 0.1433, "lr": 4.4806517093265444e-10, "epoch": 19.966216216216218, "percentage": 99.83, "elapsed_time": "1:32:27", "remaining_time": "0:00:09", "throughput": 1319.9, "total_tokens": 7322176}
|
| 7703 |
+
{"current_steps": 38420, "total_steps": 38480, "loss": 0.0375, "lr": 3.8274821557438e-10, "epoch": 19.968814968814968, "percentage": 99.84, "elapsed_time": "1:32:28", "remaining_time": "0:00:08", "throughput": 1319.91, "total_tokens": 7323136}
|
| 7704 |
+
{"current_steps": 38425, "total_steps": 38480, "loss": 0.1064, "lr": 3.2257427722559574e-10, "epoch": 19.97141372141372, "percentage": 99.86, "elapsed_time": "1:32:28", "remaining_time": "0:00:07", "throughput": 1319.91, "total_tokens": 7324064}
|
| 7705 |
+
{"current_steps": 38430, "total_steps": 38480, "loss": 0.0495, "lr": 2.6754336826528837e-10, "epoch": 19.974012474012476, "percentage": 99.87, "elapsed_time": "1:32:29", "remaining_time": "0:00:07", "throughput": 1319.9, "total_tokens": 7324928}
|
| 7706 |
+
{"current_steps": 38435, "total_steps": 38480, "loss": 0.1053, "lr": 2.176555000121816e-10, "epoch": 19.976611226611226, "percentage": 99.88, "elapsed_time": "1:32:30", "remaining_time": "0:00:06", "throughput": 1319.9, "total_tokens": 7325888}
|
| 7707 |
+
{"current_steps": 38440, "total_steps": 38480, "loss": 0.2411, "lr": 1.7291068273306288e-10, "epoch": 19.97920997920998, "percentage": 99.9, "elapsed_time": "1:32:31", "remaining_time": "0:00:05", "throughput": 1319.9, "total_tokens": 7326784}
|
| 7708 |
+
{"current_steps": 38445, "total_steps": 38480, "loss": 0.0481, "lr": 1.3330892563168107e-10, "epoch": 19.981808731808734, "percentage": 99.91, "elapsed_time": "1:32:31", "remaining_time": "0:00:05", "throughput": 1319.89, "total_tokens": 7327680}
|
| 7709 |
+
{"current_steps": 38450, "total_steps": 38480, "loss": 0.0448, "lr": 9.885023685152206e-11, "epoch": 19.984407484407484, "percentage": 99.92, "elapsed_time": "1:32:32", "remaining_time": "0:00:04", "throughput": 1319.89, "total_tokens": 7328608}
|
| 7710 |
+
{"current_steps": 38455, "total_steps": 38480, "loss": 0.1303, "lr": 6.953462348691098e-11, "epoch": 19.987006237006238, "percentage": 99.94, "elapsed_time": "1:32:33", "remaining_time": "0:00:03", "throughput": 1319.9, "total_tokens": 7329600}
|
| 7711 |
+
{"current_steps": 38460, "total_steps": 38480, "loss": 0.0776, "lr": 4.53620915691344e-11, "epoch": 19.989604989604988, "percentage": 99.95, "elapsed_time": "1:32:33", "remaining_time": "0:00:02", "throughput": 1319.9, "total_tokens": 7330496}
|
| 7712 |
+
{"current_steps": 38465, "total_steps": 38480, "loss": 0.1508, "lr": 2.6332646063664812e-11, "epoch": 19.992203742203742, "percentage": 99.96, "elapsed_time": "1:32:34", "remaining_time": "0:00:02", "throughput": 1319.91, "total_tokens": 7331456}
|
| 7713 |
+
{"current_steps": 38470, "total_steps": 38480, "loss": 0.1028, "lr": 1.24462908951406e-11, "epoch": 19.994802494802496, "percentage": 99.97, "elapsed_time": "1:32:35", "remaining_time": "0:00:01", "throughput": 1319.91, "total_tokens": 7332384}
|
| 7714 |
+
{"current_steps": 38475, "total_steps": 38480, "loss": 0.1778, "lr": 3.703028911283823e-12, "epoch": 19.997401247401246, "percentage": 99.99, "elapsed_time": "1:32:35", "remaining_time": "0:00:00", "throughput": 1319.91, "total_tokens": 7333344}
|
| 7715 |
+
{"current_steps": 38480, "total_steps": 38480, "loss": 0.1629, "lr": 1.0286191620689778e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:32:36", "remaining_time": "0:00:00", "throughput": 1319.92, "total_tokens": 7334376}
|
| 7716 |
+
{"current_steps": 38480, "total_steps": 38480, "eval_loss": 0.14645375311374664, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:32:44", "remaining_time": "0:00:00", "throughput": 1318.02, "total_tokens": 7334376}
|
| 7717 |
+
{"current_steps": 38480, "total_steps": 38480, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:32:46", "remaining_time": "0:00:00", "throughput": 1317.71, "total_tokens": 7334376}
|