train_cb_789_1760637868 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1140
10d2af2 verified
{"current_steps": 5, "total_steps": 1140, "loss": 4.9733, "lr": 3.508771929824561e-05, "epoch": 0.08771929824561403, "percentage": 0.44, "elapsed_time": "0:00:01", "remaining_time": "0:05:58", "throughput": 1985.57, "total_tokens": 3136}
{"current_steps": 10, "total_steps": 1140, "loss": 1.344, "lr": 7.894736842105263e-05, "epoch": 0.17543859649122806, "percentage": 0.88, "elapsed_time": "0:00:02", "remaining_time": "0:05:01", "throughput": 2289.49, "total_tokens": 6112}
{"current_steps": 15, "total_steps": 1140, "loss": 0.7777, "lr": 0.00012280701754385965, "epoch": 0.2631578947368421, "percentage": 1.32, "elapsed_time": "0:00:03", "remaining_time": "0:04:53", "throughput": 2584.42, "total_tokens": 10112}
{"current_steps": 20, "total_steps": 1140, "loss": 0.5275, "lr": 0.00016666666666666666, "epoch": 0.3508771929824561, "percentage": 1.75, "elapsed_time": "0:00:05", "remaining_time": "0:04:41", "throughput": 2643.63, "total_tokens": 13280}
{"current_steps": 25, "total_steps": 1140, "loss": 0.4235, "lr": 0.00021052631578947367, "epoch": 0.43859649122807015, "percentage": 2.19, "elapsed_time": "0:00:06", "remaining_time": "0:04:32", "throughput": 2667.67, "total_tokens": 16288}
{"current_steps": 30, "total_steps": 1140, "loss": 0.5063, "lr": 0.0002543859649122807, "epoch": 0.5263157894736842, "percentage": 2.63, "elapsed_time": "0:00:07", "remaining_time": "0:04:25", "throughput": 2667.0, "total_tokens": 19104}
{"current_steps": 35, "total_steps": 1140, "loss": 0.4182, "lr": 0.0002982456140350877, "epoch": 0.6140350877192983, "percentage": 3.07, "elapsed_time": "0:00:08", "remaining_time": "0:04:20", "throughput": 2683.91, "total_tokens": 22144}
{"current_steps": 40, "total_steps": 1140, "loss": 0.4154, "lr": 0.00034210526315789477, "epoch": 0.7017543859649122, "percentage": 3.51, "elapsed_time": "0:00:09", "remaining_time": "0:04:19", "throughput": 2729.61, "total_tokens": 25792}
{"current_steps": 45, "total_steps": 1140, "loss": 0.3517, "lr": 0.00038596491228070175, "epoch": 0.7894736842105263, "percentage": 3.95, "elapsed_time": "0:00:10", "remaining_time": "0:04:15", "throughput": 2720.58, "total_tokens": 28576}
{"current_steps": 50, "total_steps": 1140, "loss": 0.252, "lr": 0.0004298245614035088, "epoch": 0.8771929824561403, "percentage": 4.39, "elapsed_time": "0:00:11", "remaining_time": "0:04:12", "throughput": 2714.23, "total_tokens": 31424}
{"current_steps": 55, "total_steps": 1140, "loss": 0.4751, "lr": 0.00047368421052631577, "epoch": 0.9649122807017544, "percentage": 4.82, "elapsed_time": "0:00:12", "remaining_time": "0:04:10", "throughput": 2733.54, "total_tokens": 34720}
{"current_steps": 57, "total_steps": 1140, "eval_loss": 1.2149380445480347, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:13", "remaining_time": "0:04:23", "throughput": 2555.33, "total_tokens": 35448}
{"current_steps": 60, "total_steps": 1140, "loss": 0.9889, "lr": 0.0005175438596491229, "epoch": 1.0526315789473684, "percentage": 5.26, "elapsed_time": "0:00:15", "remaining_time": "0:04:39", "throughput": 2429.21, "total_tokens": 37688}
{"current_steps": 65, "total_steps": 1140, "loss": 0.5786, "lr": 0.0005614035087719298, "epoch": 1.1403508771929824, "percentage": 5.7, "elapsed_time": "0:00:16", "remaining_time": "0:04:34", "throughput": 2453.73, "total_tokens": 40792}
{"current_steps": 70, "total_steps": 1140, "loss": 0.2984, "lr": 0.0006052631578947369, "epoch": 1.2280701754385965, "percentage": 6.14, "elapsed_time": "0:00:17", "remaining_time": "0:04:30", "throughput": 2473.35, "total_tokens": 43832}
{"current_steps": 75, "total_steps": 1140, "loss": 0.3784, "lr": 0.0006491228070175439, "epoch": 1.3157894736842106, "percentage": 6.58, "elapsed_time": "0:00:18", "remaining_time": "0:04:26", "throughput": 2482.4, "total_tokens": 46648}
{"current_steps": 80, "total_steps": 1140, "loss": 0.3617, "lr": 0.0006929824561403509, "epoch": 1.4035087719298245, "percentage": 7.02, "elapsed_time": "0:00:19", "remaining_time": "0:04:23", "throughput": 2501.99, "total_tokens": 49848}
{"current_steps": 85, "total_steps": 1140, "loss": 0.5845, "lr": 0.0007368421052631579, "epoch": 1.4912280701754386, "percentage": 7.46, "elapsed_time": "0:00:20", "remaining_time": "0:04:20", "throughput": 2502.34, "total_tokens": 52504}
{"current_steps": 90, "total_steps": 1140, "loss": 0.3749, "lr": 0.0007807017543859649, "epoch": 1.5789473684210527, "percentage": 7.89, "elapsed_time": "0:00:22", "remaining_time": "0:04:17", "throughput": 2513.41, "total_tokens": 55448}
{"current_steps": 95, "total_steps": 1140, "loss": 0.2262, "lr": 0.000824561403508772, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:23", "remaining_time": "0:04:15", "throughput": 2533.23, "total_tokens": 58776}
{"current_steps": 100, "total_steps": 1140, "loss": 0.2606, "lr": 0.000868421052631579, "epoch": 1.7543859649122808, "percentage": 8.77, "elapsed_time": "0:00:24", "remaining_time": "0:04:12", "throughput": 2539.69, "total_tokens": 61624}
{"current_steps": 105, "total_steps": 1140, "loss": 0.3744, "lr": 0.000912280701754386, "epoch": 1.8421052631578947, "percentage": 9.21, "elapsed_time": "0:00:25", "remaining_time": "0:04:10", "throughput": 2565.88, "total_tokens": 65336}
{"current_steps": 110, "total_steps": 1140, "loss": 0.3845, "lr": 0.0009561403508771929, "epoch": 1.9298245614035088, "percentage": 9.65, "elapsed_time": "0:00:26", "remaining_time": "0:04:08", "throughput": 2574.13, "total_tokens": 68280}
{"current_steps": 114, "total_steps": 1140, "eval_loss": 1.1474884748458862, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:28", "remaining_time": "0:04:13", "throughput": 2499.82, "total_tokens": 70496}
{"current_steps": 115, "total_steps": 1140, "loss": 0.4412, "lr": 0.001, "epoch": 2.017543859649123, "percentage": 10.09, "elapsed_time": "0:00:29", "remaining_time": "0:04:21", "throughput": 2422.08, "total_tokens": 71008}
{"current_steps": 120, "total_steps": 1140, "loss": 0.7242, "lr": 0.000999941402841295, "epoch": 2.1052631578947367, "percentage": 10.53, "elapsed_time": "0:00:30", "remaining_time": "0:04:19", "throughput": 2433.87, "total_tokens": 74176}
{"current_steps": 125, "total_steps": 1140, "loss": 0.2506, "lr": 0.0009997656250996883, "epoch": 2.192982456140351, "percentage": 10.96, "elapsed_time": "0:00:31", "remaining_time": "0:04:16", "throughput": 2451.74, "total_tokens": 77472}
{"current_steps": 130, "total_steps": 1140, "loss": 0.4359, "lr": 0.0009994727079754844, "epoch": 2.280701754385965, "percentage": 11.4, "elapsed_time": "0:00:32", "remaining_time": "0:04:13", "throughput": 2456.06, "total_tokens": 80192}
{"current_steps": 135, "total_steps": 1140, "loss": 0.3637, "lr": 0.0009990627201251284, "epoch": 2.3684210526315788, "percentage": 11.84, "elapsed_time": "0:00:33", "remaining_time": "0:04:11", "throughput": 2470.39, "total_tokens": 83424}
{"current_steps": 140, "total_steps": 1140, "loss": 0.2151, "lr": 0.0009985357576451127, "epoch": 2.456140350877193, "percentage": 12.28, "elapsed_time": "0:00:34", "remaining_time": "0:04:08", "throughput": 2472.96, "total_tokens": 86080}
{"current_steps": 145, "total_steps": 1140, "loss": 0.3518, "lr": 0.0009978919440494537, "epoch": 2.543859649122807, "percentage": 12.72, "elapsed_time": "0:00:35", "remaining_time": "0:04:06", "throughput": 2490.34, "total_tokens": 89600}
{"current_steps": 150, "total_steps": 1140, "loss": 0.4052, "lr": 0.0009971314302407413, "epoch": 2.6315789473684212, "percentage": 13.16, "elapsed_time": "0:00:37", "remaining_time": "0:04:04", "throughput": 2499.09, "total_tokens": 92704}
{"current_steps": 155, "total_steps": 1140, "loss": 0.2407, "lr": 0.0009962543944747686, "epoch": 2.719298245614035, "percentage": 13.6, "elapsed_time": "0:00:38", "remaining_time": "0:04:03", "throughput": 2512.91, "total_tokens": 96160}
{"current_steps": 160, "total_steps": 1140, "loss": 0.3001, "lr": 0.0009952610423187517, "epoch": 2.807017543859649, "percentage": 14.04, "elapsed_time": "0:00:39", "remaining_time": "0:04:01", "throughput": 2525.05, "total_tokens": 99520}
{"current_steps": 165, "total_steps": 1140, "loss": 0.2196, "lr": 0.0009941516066031462, "epoch": 2.8947368421052633, "percentage": 14.47, "elapsed_time": "0:00:40", "remaining_time": "0:03:59", "throughput": 2532.4, "total_tokens": 102528}
{"current_steps": 170, "total_steps": 1140, "loss": 0.2278, "lr": 0.0009929263473670749, "epoch": 2.982456140350877, "percentage": 14.91, "elapsed_time": "0:00:41", "remaining_time": "0:03:57", "throughput": 2550.38, "total_tokens": 106304}
{"current_steps": 171, "total_steps": 1140, "eval_loss": 0.19849923253059387, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:42", "remaining_time": "0:04:01", "throughput": 2494.88, "total_tokens": 106416}
{"current_steps": 175, "total_steps": 1140, "loss": 0.2223, "lr": 0.0009915855517973774, "epoch": 3.0701754385964914, "percentage": 15.35, "elapsed_time": "0:00:44", "remaining_time": "0:04:04", "throughput": 2454.16, "total_tokens": 108880}
{"current_steps": 180, "total_steps": 1140, "loss": 0.4317, "lr": 0.0009901295341612972, "epoch": 3.1578947368421053, "percentage": 15.79, "elapsed_time": "0:00:45", "remaining_time": "0:04:02", "throughput": 2463.04, "total_tokens": 111984}
{"current_steps": 185, "total_steps": 1140, "loss": 0.391, "lr": 0.00098855863573282, "epoch": 3.245614035087719, "percentage": 16.23, "elapsed_time": "0:00:46", "remaining_time": "0:04:00", "throughput": 2473.06, "total_tokens": 115248}
{"current_steps": 190, "total_steps": 1140, "loss": 0.0946, "lr": 0.0009868732247126839, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:47", "remaining_time": "0:03:58", "throughput": 2480.83, "total_tokens": 118384}
{"current_steps": 195, "total_steps": 1140, "loss": 0.1712, "lr": 0.000985073696142077, "epoch": 3.4210526315789473, "percentage": 17.11, "elapsed_time": "0:00:48", "remaining_time": "0:03:57", "throughput": 2496.67, "total_tokens": 122224}
{"current_steps": 200, "total_steps": 1140, "loss": 0.1537, "lr": 0.0009831604718100442, "epoch": 3.5087719298245617, "percentage": 17.54, "elapsed_time": "0:00:50", "remaining_time": "0:03:55", "throughput": 2506.96, "total_tokens": 125584}
{"current_steps": 205, "total_steps": 1140, "loss": 0.1084, "lr": 0.0009811340001546253, "epoch": 3.5964912280701755, "percentage": 17.98, "elapsed_time": "0:00:51", "remaining_time": "0:03:53", "throughput": 2514.4, "total_tokens": 128816}
{"current_steps": 210, "total_steps": 1140, "loss": 0.4722, "lr": 0.0009789947561577445, "epoch": 3.6842105263157894, "percentage": 18.42, "elapsed_time": "0:00:52", "remaining_time": "0:03:51", "throughput": 2521.17, "total_tokens": 131952}
{"current_steps": 215, "total_steps": 1140, "loss": 0.1285, "lr": 0.000976743241233882, "epoch": 3.7719298245614032, "percentage": 18.86, "elapsed_time": "0:00:53", "remaining_time": "0:03:50", "throughput": 2529.26, "total_tokens": 135248}
{"current_steps": 220, "total_steps": 1140, "loss": 0.087, "lr": 0.0009743799831125471, "epoch": 3.8596491228070176, "percentage": 19.3, "elapsed_time": "0:00:54", "remaining_time": "0:03:48", "throughput": 2535.33, "total_tokens": 138384}
{"current_steps": 225, "total_steps": 1140, "loss": 0.2558, "lr": 0.0009719055357145847, "epoch": 3.9473684210526314, "percentage": 19.74, "elapsed_time": "0:00:55", "remaining_time": "0:03:46", "throughput": 2538.52, "total_tokens": 141264}
{"current_steps": 228, "total_steps": 1140, "eval_loss": 0.09314573556184769, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:57", "remaining_time": "0:03:48", "throughput": 2497.78, "total_tokens": 142480}
{"current_steps": 230, "total_steps": 1140, "loss": 0.2478, "lr": 0.0009693204790223423, "epoch": 4.035087719298246, "percentage": 20.18, "elapsed_time": "0:00:58", "remaining_time": "0:03:50", "throughput": 2464.51, "total_tokens": 143664}
{"current_steps": 235, "total_steps": 1140, "loss": 0.0571, "lr": 0.0009666254189437286, "epoch": 4.12280701754386, "percentage": 20.61, "elapsed_time": "0:00:59", "remaining_time": "0:03:48", "throughput": 2470.2, "total_tokens": 146800}
{"current_steps": 240, "total_steps": 1140, "loss": 0.0745, "lr": 0.0009638209871701966, "epoch": 4.2105263157894735, "percentage": 21.05, "elapsed_time": "0:01:00", "remaining_time": "0:03:46", "throughput": 2472.17, "total_tokens": 149456}
{"current_steps": 245, "total_steps": 1140, "loss": 0.1353, "lr": 0.0009609078410286809, "epoch": 4.298245614035087, "percentage": 21.49, "elapsed_time": "0:01:01", "remaining_time": "0:03:45", "throughput": 2481.33, "total_tokens": 152880}
{"current_steps": 250, "total_steps": 1140, "loss": 0.0391, "lr": 0.0009578866633275287, "epoch": 4.385964912280702, "percentage": 21.93, "elapsed_time": "0:01:02", "remaining_time": "0:03:43", "throughput": 2494.03, "total_tokens": 156720}
{"current_steps": 255, "total_steps": 1140, "loss": 0.1566, "lr": 0.0009547581621964571, "epoch": 4.473684210526316, "percentage": 22.37, "elapsed_time": "0:01:03", "remaining_time": "0:03:41", "throughput": 2497.21, "total_tokens": 159600}
{"current_steps": 260, "total_steps": 1140, "loss": 0.1274, "lr": 0.0009515230709205749, "epoch": 4.56140350877193, "percentage": 22.81, "elapsed_time": "0:01:04", "remaining_time": "0:03:39", "throughput": 2497.44, "total_tokens": 162224}
{"current_steps": 265, "total_steps": 1140, "loss": 0.0674, "lr": 0.0009481821477685101, "epoch": 4.649122807017544, "percentage": 23.25, "elapsed_time": "0:01:06", "remaining_time": "0:03:38", "throughput": 2502.81, "total_tokens": 165328}
{"current_steps": 270, "total_steps": 1140, "loss": 0.1621, "lr": 0.0009447361758146791, "epoch": 4.7368421052631575, "percentage": 23.68, "elapsed_time": "0:01:07", "remaining_time": "0:03:36", "throughput": 2509.63, "total_tokens": 168592}
{"current_steps": 275, "total_steps": 1140, "loss": 0.148, "lr": 0.0009411859627557439, "epoch": 4.824561403508772, "percentage": 24.12, "elapsed_time": "0:01:08", "remaining_time": "0:03:34", "throughput": 2514.69, "total_tokens": 171696}
{"current_steps": 280, "total_steps": 1140, "loss": 0.216, "lr": 0.0009375323407212969, "epoch": 4.912280701754386, "percentage": 24.56, "elapsed_time": "0:01:09", "remaining_time": "0:03:33", "throughput": 2518.54, "total_tokens": 174672}
{"current_steps": 285, "total_steps": 1140, "loss": 0.1601, "lr": 0.0009337761660788185, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:10", "remaining_time": "0:03:31", "throughput": 2519.03, "total_tokens": 177224}
{"current_steps": 285, "total_steps": 1140, "eval_loss": 0.1481173187494278, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:11", "remaining_time": "0:03:33", "throughput": 2488.95, "total_tokens": 177224}
{"current_steps": 290, "total_steps": 1140, "loss": 0.0475, "lr": 0.0009299183192329556, "epoch": 5.087719298245614, "percentage": 25.44, "elapsed_time": "0:01:13", "remaining_time": "0:03:34", "throughput": 2468.42, "total_tokens": 180840}
{"current_steps": 295, "total_steps": 1140, "loss": 0.0633, "lr": 0.0009259597044191636, "epoch": 5.175438596491228, "percentage": 25.88, "elapsed_time": "0:01:14", "remaining_time": "0:03:33", "throughput": 2473.77, "total_tokens": 183976}
{"current_steps": 300, "total_steps": 1140, "loss": 0.1973, "lr": 0.0009219012494917644, "epoch": 5.2631578947368425, "percentage": 26.32, "elapsed_time": "0:01:15", "remaining_time": "0:03:31", "throughput": 2478.11, "total_tokens": 187016}
{"current_steps": 305, "total_steps": 1140, "loss": 0.0991, "lr": 0.0009177439057064682, "epoch": 5.350877192982456, "percentage": 26.75, "elapsed_time": "0:01:16", "remaining_time": "0:03:29", "throughput": 2477.81, "total_tokens": 189480}
{"current_steps": 310, "total_steps": 1140, "loss": 0.132, "lr": 0.0009134886474974092, "epoch": 5.43859649122807, "percentage": 27.19, "elapsed_time": "0:01:17", "remaining_time": "0:03:27", "throughput": 2483.75, "total_tokens": 192712}
{"current_steps": 315, "total_steps": 1140, "loss": 0.1531, "lr": 0.0009091364722487496, "epoch": 5.526315789473684, "percentage": 27.63, "elapsed_time": "0:01:18", "remaining_time": "0:03:25", "throughput": 2486.1, "total_tokens": 195496}
{"current_steps": 320, "total_steps": 1140, "loss": 0.0416, "lr": 0.0009046884000609047, "epoch": 5.614035087719298, "percentage": 28.07, "elapsed_time": "0:01:19", "remaining_time": "0:03:24", "throughput": 2495.24, "total_tokens": 199240}
{"current_steps": 325, "total_steps": 1140, "loss": 0.1052, "lr": 0.0009001454735114421, "epoch": 5.701754385964913, "percentage": 28.51, "elapsed_time": "0:01:20", "remaining_time": "0:03:22", "throughput": 2495.77, "total_tokens": 201832}
{"current_steps": 330, "total_steps": 1140, "loss": 0.0604, "lr": 0.0008955087574107137, "epoch": 5.7894736842105265, "percentage": 28.95, "elapsed_time": "0:01:21", "remaining_time": "0:03:21", "throughput": 2500.08, "total_tokens": 204968}
{"current_steps": 335, "total_steps": 1140, "loss": 0.1132, "lr": 0.0008907793385522767, "epoch": 5.87719298245614, "percentage": 29.39, "elapsed_time": "0:01:23", "remaining_time": "0:03:19", "throughput": 2506.25, "total_tokens": 208360}
{"current_steps": 340, "total_steps": 1140, "loss": 0.0544, "lr": 0.0008859583254581605, "epoch": 5.964912280701754, "percentage": 29.82, "elapsed_time": "0:01:24", "remaining_time": "0:03:18", "throughput": 2507.81, "total_tokens": 211080}
{"current_steps": 342, "total_steps": 1140, "eval_loss": 0.09723968058824539, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:25", "remaining_time": "0:03:19", "throughput": 2482.97, "total_tokens": 212000}
{"current_steps": 345, "total_steps": 1140, "loss": 0.0889, "lr": 0.0008810468481190428, "epoch": 6.052631578947368, "percentage": 30.26, "elapsed_time": "0:01:27", "remaining_time": "0:03:20", "throughput": 2459.91, "total_tokens": 214176}
{"current_steps": 350, "total_steps": 1140, "loss": 0.0667, "lr": 0.000876046057729392, "epoch": 6.140350877192983, "percentage": 30.7, "elapsed_time": "0:01:28", "remaining_time": "0:03:18", "throughput": 2459.65, "total_tokens": 216736}
{"current_steps": 355, "total_steps": 1140, "loss": 0.0484, "lr": 0.0008709571264176408, "epoch": 6.228070175438597, "percentage": 31.14, "elapsed_time": "0:01:29", "remaining_time": "0:03:17", "throughput": 2466.25, "total_tokens": 220224}
{"current_steps": 360, "total_steps": 1140, "loss": 0.0542, "lr": 0.0008657812469714519, "epoch": 6.315789473684211, "percentage": 31.58, "elapsed_time": "0:01:30", "remaining_time": "0:03:15", "throughput": 2469.13, "total_tokens": 223136}
{"current_steps": 365, "total_steps": 1140, "loss": 0.1475, "lr": 0.0008605196325581425, "epoch": 6.4035087719298245, "percentage": 32.02, "elapsed_time": "0:01:31", "remaining_time": "0:03:14", "throughput": 2477.99, "total_tokens": 226944}
{"current_steps": 370, "total_steps": 1140, "loss": 0.0126, "lr": 0.000855173516440332, "epoch": 6.491228070175438, "percentage": 32.46, "elapsed_time": "0:01:32", "remaining_time": "0:03:12", "throughput": 2484.39, "total_tokens": 230368}
{"current_steps": 375, "total_steps": 1140, "loss": 0.1144, "lr": 0.000849744151686879, "epoch": 6.578947368421053, "percentage": 32.89, "elapsed_time": "0:01:33", "remaining_time": "0:03:11", "throughput": 2487.85, "total_tokens": 233376}
{"current_steps": 380, "total_steps": 1140, "loss": 0.0617, "lr": 0.000844232810879176, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:34", "remaining_time": "0:03:09", "throughput": 2490.87, "total_tokens": 236384}
{"current_steps": 385, "total_steps": 1140, "loss": 0.0186, "lr": 0.0008386407858128706, "epoch": 6.754385964912281, "percentage": 33.77, "elapsed_time": "0:01:36", "remaining_time": "0:03:08", "throughput": 2495.21, "total_tokens": 239584}
{"current_steps": 390, "total_steps": 1140, "loss": 0.168, "lr": 0.0008329693871950843, "epoch": 6.842105263157895, "percentage": 34.21, "elapsed_time": "0:01:37", "remaining_time": "0:03:06", "throughput": 2498.37, "total_tokens": 242624}
{"current_steps": 395, "total_steps": 1140, "loss": 0.0212, "lr": 0.0008272199443371966, "epoch": 6.9298245614035086, "percentage": 34.65, "elapsed_time": "0:01:38", "remaining_time": "0:03:05", "throughput": 2505.09, "total_tokens": 246304}
{"current_steps": 399, "total_steps": 1140, "eval_loss": 0.0559244342148304, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:39", "remaining_time": "0:03:05", "throughput": 2484.14, "total_tokens": 248272}
{"current_steps": 400, "total_steps": 1140, "loss": 0.0662, "lr": 0.0008213938048432696, "epoch": 7.017543859649122, "percentage": 35.09, "elapsed_time": "0:01:41", "remaining_time": "0:03:06", "throughput": 2463.77, "total_tokens": 248912}
{"current_steps": 405, "total_steps": 1140, "loss": 0.0653, "lr": 0.0008154923342941862, "epoch": 7.105263157894737, "percentage": 35.53, "elapsed_time": "0:01:42", "remaining_time": "0:03:05", "throughput": 2467.4, "total_tokens": 252016}
{"current_steps": 410, "total_steps": 1140, "loss": 0.0297, "lr": 0.0008095169159275712, "epoch": 7.192982456140351, "percentage": 35.96, "elapsed_time": "0:01:43", "remaining_time": "0:03:04", "throughput": 2475.42, "total_tokens": 255856}
{"current_steps": 415, "total_steps": 1140, "loss": 0.065, "lr": 0.0008034689503135784, "epoch": 7.280701754385965, "percentage": 36.4, "elapsed_time": "0:01:44", "remaining_time": "0:03:02", "throughput": 2477.84, "total_tokens": 258800}
{"current_steps": 420, "total_steps": 1140, "loss": 0.0156, "lr": 0.0007973498550266114, "epoch": 7.368421052631579, "percentage": 36.84, "elapsed_time": "0:01:45", "remaining_time": "0:03:00", "throughput": 2479.0, "total_tokens": 261584}
{"current_steps": 425, "total_steps": 1140, "loss": 0.0847, "lr": 0.0007911610643130608, "epoch": 7.456140350877193, "percentage": 37.28, "elapsed_time": "0:01:46", "remaining_time": "0:02:59", "throughput": 2485.06, "total_tokens": 265168}
{"current_steps": 430, "total_steps": 1140, "loss": 0.0425, "lr": 0.0007849040287551332, "epoch": 7.543859649122807, "percentage": 37.72, "elapsed_time": "0:01:47", "remaining_time": "0:02:57", "throughput": 2488.59, "total_tokens": 268240}
{"current_steps": 435, "total_steps": 1140, "loss": 0.0193, "lr": 0.000778580214930851, "epoch": 7.631578947368421, "percentage": 38.16, "elapsed_time": "0:01:48", "remaining_time": "0:02:56", "throughput": 2494.06, "total_tokens": 271728}
{"current_steps": 440, "total_steps": 1140, "loss": 0.0406, "lr": 0.0007721911050703032, "epoch": 7.719298245614035, "percentage": 38.6, "elapsed_time": "0:01:50", "remaining_time": "0:02:55", "throughput": 2496.61, "total_tokens": 274736}
{"current_steps": 445, "total_steps": 1140, "loss": 0.0237, "lr": 0.000765738196708228, "epoch": 7.807017543859649, "percentage": 39.04, "elapsed_time": "0:01:51", "remaining_time": "0:02:53", "throughput": 2501.02, "total_tokens": 278096}
{"current_steps": 450, "total_steps": 1140, "loss": 0.0181, "lr": 0.0007592230023330069, "epoch": 7.894736842105263, "percentage": 39.47, "elapsed_time": "0:01:52", "remaining_time": "0:02:52", "throughput": 2500.85, "total_tokens": 280592}
{"current_steps": 455, "total_steps": 1140, "loss": 0.0503, "lr": 0.000752647049032155, "epoch": 7.982456140350877, "percentage": 39.91, "elapsed_time": "0:01:53", "remaining_time": "0:02:50", "throughput": 2506.49, "total_tokens": 284144}
{"current_steps": 456, "total_steps": 1140, "eval_loss": 0.0554032064974308, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:54", "remaining_time": "0:02:51", "throughput": 2486.09, "total_tokens": 284248}
{"current_steps": 460, "total_steps": 1140, "loss": 0.0392, "lr": 0.0007460118781343892, "epoch": 8.070175438596491, "percentage": 40.35, "elapsed_time": "0:01:56", "remaining_time": "0:02:51", "throughput": 2471.5, "total_tokens": 287128}
{"current_steps": 465, "total_steps": 1140, "loss": 0.003, "lr": 0.000739319044848358, "epoch": 8.157894736842104, "percentage": 40.79, "elapsed_time": "0:01:57", "remaining_time": "0:02:50", "throughput": 2477.34, "total_tokens": 290744}
{"current_steps": 470, "total_steps": 1140, "loss": 0.038, "lr": 0.0007325701178981183, "epoch": 8.24561403508772, "percentage": 41.23, "elapsed_time": "0:01:58", "remaining_time": "0:02:48", "throughput": 2480.53, "total_tokens": 293816}
{"current_steps": 475, "total_steps": 1140, "loss": 0.1005, "lr": 0.0007257666791554447, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:01:59", "remaining_time": "0:02:47", "throughput": 2484.29, "total_tokens": 297080}
{"current_steps": 480, "total_steps": 1140, "loss": 0.0303, "lr": 0.0007189103232690561, "epoch": 8.421052631578947, "percentage": 42.11, "elapsed_time": "0:02:00", "remaining_time": "0:02:45", "throughput": 2488.49, "total_tokens": 300408}
{"current_steps": 485, "total_steps": 1140, "loss": 0.0492, "lr": 0.0007120026572908484, "epoch": 8.508771929824562, "percentage": 42.54, "elapsed_time": "0:02:01", "remaining_time": "0:02:44", "throughput": 2490.81, "total_tokens": 303384}
{"current_steps": 490, "total_steps": 1140, "loss": 0.0229, "lr": 0.0007050453002992201, "epoch": 8.596491228070175, "percentage": 42.98, "elapsed_time": "0:02:02", "remaining_time": "0:02:42", "throughput": 2492.5, "total_tokens": 306232}
{"current_steps": 495, "total_steps": 1140, "loss": 0.0107, "lr": 0.0006980398830195785, "epoch": 8.68421052631579, "percentage": 43.42, "elapsed_time": "0:02:04", "remaining_time": "0:02:41", "throughput": 2497.74, "total_tokens": 309816}
{"current_steps": 500, "total_steps": 1140, "loss": 0.0063, "lr": 0.000690988047442116, "epoch": 8.771929824561404, "percentage": 43.86, "elapsed_time": "0:02:05", "remaining_time": "0:02:40", "throughput": 2498.24, "total_tokens": 312408}
{"current_steps": 505, "total_steps": 1140, "loss": 0.0072, "lr": 0.0006838914464369467, "epoch": 8.859649122807017, "percentage": 44.3, "elapsed_time": "0:02:06", "remaining_time": "0:02:38", "throughput": 2500.19, "total_tokens": 315416}
{"current_steps": 510, "total_steps": 1140, "loss": 0.0561, "lr": 0.0006767517433666918, "epoch": 8.947368421052632, "percentage": 44.74, "elapsed_time": "0:02:07", "remaining_time": "0:02:37", "throughput": 2502.51, "total_tokens": 318392}
{"current_steps": 513, "total_steps": 1140, "eval_loss": 0.09938608109951019, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:08", "remaining_time": "0:02:37", "throughput": 2484.34, "total_tokens": 319488}
{"current_steps": 515, "total_steps": 1140, "loss": 0.0023, "lr": 0.0006695706116966074, "epoch": 9.035087719298245, "percentage": 45.18, "elapsed_time": "0:02:09", "remaining_time": "0:02:37", "throughput": 2470.21, "total_tokens": 320832}
{"current_steps": 520, "total_steps": 1140, "loss": 0.0047, "lr": 0.0006623497346023419, "epoch": 9.12280701754386, "percentage": 45.61, "elapsed_time": "0:02:10", "remaining_time": "0:02:36", "throughput": 2472.28, "total_tokens": 323840}
{"current_steps": 525, "total_steps": 1140, "loss": 0.0119, "lr": 0.0006550908045754194, "epoch": 9.210526315789474, "percentage": 46.05, "elapsed_time": "0:02:12", "remaining_time": "0:02:34", "throughput": 2476.35, "total_tokens": 327200}
{"current_steps": 530, "total_steps": 1140, "loss": 0.0102, "lr": 0.0006477955230265393, "epoch": 9.298245614035087, "percentage": 46.49, "elapsed_time": "0:02:13", "remaining_time": "0:02:33", "throughput": 2478.55, "total_tokens": 330208}
{"current_steps": 535, "total_steps": 1140, "loss": 0.0008, "lr": 0.0006404655998867848, "epoch": 9.385964912280702, "percentage": 46.93, "elapsed_time": "0:02:14", "remaining_time": "0:02:31", "throughput": 2479.2, "total_tokens": 332864}
{"current_steps": 540, "total_steps": 1140, "loss": 0.0952, "lr": 0.0006331027532068335, "epoch": 9.473684210526315, "percentage": 47.37, "elapsed_time": "0:02:15", "remaining_time": "0:02:30", "throughput": 2482.94, "total_tokens": 336224}
{"current_steps": 545, "total_steps": 1140, "loss": 0.004, "lr": 0.0006257087087542672, "epoch": 9.56140350877193, "percentage": 47.81, "elapsed_time": "0:02:16", "remaining_time": "0:02:29", "throughput": 2485.82, "total_tokens": 339392}
{"current_steps": 550, "total_steps": 1140, "loss": 0.0089, "lr": 0.0006182851996090712, "epoch": 9.649122807017545, "percentage": 48.25, "elapsed_time": "0:02:17", "remaining_time": "0:02:27", "throughput": 2488.59, "total_tokens": 342624}
{"current_steps": 555, "total_steps": 1140, "loss": 0.0381, "lr": 0.0006108339657574193, "epoch": 9.736842105263158, "percentage": 48.68, "elapsed_time": "0:02:18", "remaining_time": "0:02:26", "throughput": 2489.87, "total_tokens": 345472}
{"current_steps": 560, "total_steps": 1140, "loss": 0.0288, "lr": 0.000603356753683842, "epoch": 9.824561403508772, "percentage": 49.12, "elapsed_time": "0:02:19", "remaining_time": "0:02:24", "throughput": 2490.61, "total_tokens": 348160}
{"current_steps": 565, "total_steps": 1140, "loss": 0.003, "lr": 0.0005958553159618693, "epoch": 9.912280701754385, "percentage": 49.56, "elapsed_time": "0:02:20", "remaining_time": "0:02:23", "throughput": 2494.13, "total_tokens": 351456}
{"current_steps": 570, "total_steps": 1140, "loss": 0.0031, "lr": 0.0005883314108432481, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:21", "remaining_time": "0:02:21", "throughput": 2496.64, "total_tokens": 354472}
{"current_steps": 570, "total_steps": 1140, "eval_loss": 0.07178983837366104, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:22", "remaining_time": "0:02:22", "throughput": 2481.81, "total_tokens": 354472}
{"current_steps": 575, "total_steps": 1140, "loss": 0.0038, "lr": 0.0005807868018458274, "epoch": 10.087719298245615, "percentage": 50.44, "elapsed_time": "0:02:24", "remaining_time": "0:02:22", "throughput": 2471.55, "total_tokens": 358024}
{"current_steps": 580, "total_steps": 1140, "loss": 0.0058, "lr": 0.0005732232573402109, "epoch": 10.175438596491228, "percentage": 50.88, "elapsed_time": "0:02:25", "remaining_time": "0:02:20", "throughput": 2474.38, "total_tokens": 361192}
{"current_steps": 585, "total_steps": 1140, "loss": 0.0044, "lr": 0.0005656425501352691, "epoch": 10.263157894736842, "percentage": 51.32, "elapsed_time": "0:02:27", "remaining_time": "0:02:19", "throughput": 2474.82, "total_tokens": 363816}
{"current_steps": 590, "total_steps": 1140, "loss": 0.001, "lr": 0.0005580464570626152, "epoch": 10.350877192982455, "percentage": 51.75, "elapsed_time": "0:02:28", "remaining_time": "0:02:17", "throughput": 2475.41, "total_tokens": 366440}
{"current_steps": 595, "total_steps": 1140, "loss": 0.0009, "lr": 0.0005504367585601342, "epoch": 10.43859649122807, "percentage": 52.19, "elapsed_time": "0:02:29", "remaining_time": "0:02:16", "throughput": 2478.48, "total_tokens": 369672}
{"current_steps": 600, "total_steps": 1140, "loss": 0.0006, "lr": 0.0005428152382546695, "epoch": 10.526315789473685, "percentage": 52.63, "elapsed_time": "0:02:30", "remaining_time": "0:02:15", "throughput": 2480.98, "total_tokens": 372808}
{"current_steps": 605, "total_steps": 1140, "loss": 0.0027, "lr": 0.0005351836825439609, "epoch": 10.614035087719298, "percentage": 53.07, "elapsed_time": "0:02:31", "remaining_time": "0:02:13", "throughput": 2482.07, "total_tokens": 375560}
{"current_steps": 610, "total_steps": 1140, "loss": 0.0009, "lr": 0.0005275438801779327, "epoch": 10.701754385964913, "percentage": 53.51, "elapsed_time": "0:02:32", "remaining_time": "0:02:12", "throughput": 2484.97, "total_tokens": 378792}
{"current_steps": 615, "total_steps": 1140, "loss": 0.0006, "lr": 0.0005198976218394321, "epoch": 10.789473684210526, "percentage": 53.95, "elapsed_time": "0:02:33", "remaining_time": "0:02:11", "throughput": 2489.1, "total_tokens": 382312}
{"current_steps": 620, "total_steps": 1140, "loss": 0.0082, "lr": 0.0005122466997245124, "epoch": 10.87719298245614, "percentage": 54.39, "elapsed_time": "0:02:34", "remaining_time": "0:02:09", "throughput": 2493.89, "total_tokens": 386088}
{"current_steps": 625, "total_steps": 1140, "loss": 0.0013, "lr": 0.0005045929071223632, "epoch": 10.964912280701755, "percentage": 54.82, "elapsed_time": "0:02:35", "remaining_time": "0:02:08", "throughput": 2494.78, "total_tokens": 388840}
{"current_steps": 627, "total_steps": 1140, "eval_loss": 0.15399591624736786, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:37", "remaining_time": "0:02:08", "throughput": 2479.9, "total_tokens": 389408}
{"current_steps": 630, "total_steps": 1140, "loss": 0.0424, "lr": 0.0004969380379949836, "epoch": 11.052631578947368, "percentage": 55.26, "elapsed_time": "0:02:38", "remaining_time": "0:02:08", "throughput": 2467.42, "total_tokens": 391200}
{"current_steps": 635, "total_steps": 1140, "loss": 0.0008, "lr": 0.0004892838865566986, "epoch": 11.140350877192983, "percentage": 55.7, "elapsed_time": "0:02:39", "remaining_time": "0:02:07", "throughput": 2471.64, "total_tokens": 394880}
{"current_steps": 640, "total_steps": 1140, "loss": 0.0008, "lr": 0.00048163224685361384, "epoch": 11.228070175438596, "percentage": 56.14, "elapsed_time": "0:02:40", "remaining_time": "0:02:05", "throughput": 2475.3, "total_tokens": 398336}
{"current_steps": 645, "total_steps": 1140, "loss": 0.0006, "lr": 0.0004739849123431138, "epoch": 11.31578947368421, "percentage": 56.58, "elapsed_time": "0:02:42", "remaining_time": "0:02:04", "throughput": 2478.28, "total_tokens": 401632}
{"current_steps": 650, "total_steps": 1140, "loss": 0.001, "lr": 0.00046634367547349433, "epoch": 11.403508771929825, "percentage": 57.02, "elapsed_time": "0:02:43", "remaining_time": "0:02:03", "throughput": 2482.05, "total_tokens": 405120}
{"current_steps": 655, "total_steps": 1140, "loss": 0.0081, "lr": 0.0004587103272638339, "epoch": 11.491228070175438, "percentage": 57.46, "elapsed_time": "0:02:44", "remaining_time": "0:02:01", "throughput": 2484.14, "total_tokens": 408192}
{"current_steps": 660, "total_steps": 1140, "loss": 0.0022, "lr": 0.0004510866568841981, "epoch": 11.578947368421053, "percentage": 57.89, "elapsed_time": "0:02:45", "remaining_time": "0:02:00", "throughput": 2486.7, "total_tokens": 411360}
{"current_steps": 665, "total_steps": 1140, "loss": 0.0017, "lr": 0.0004434744512362797, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:02:46", "remaining_time": "0:01:58", "throughput": 2487.81, "total_tokens": 414144}
{"current_steps": 670, "total_steps": 1140, "loss": 0.0019, "lr": 0.00043587549453456836, "epoch": 11.75438596491228, "percentage": 58.77, "elapsed_time": "0:02:47", "remaining_time": "0:01:57", "throughput": 2491.86, "total_tokens": 417760}
{"current_steps": 675, "total_steps": 1140, "loss": 0.0157, "lr": 0.00042829156788815195, "epoch": 11.842105263157894, "percentage": 59.21, "elapsed_time": "0:02:48", "remaining_time": "0:01:56", "throughput": 2493.08, "total_tokens": 420640}
{"current_steps": 680, "total_steps": 1140, "loss": 0.0013, "lr": 0.0004207244488832429, "epoch": 11.929824561403509, "percentage": 59.65, "elapsed_time": "0:02:49", "remaining_time": "0:01:54", "throughput": 2493.8, "total_tokens": 423360}
{"current_steps": 684, "total_steps": 1140, "eval_loss": 0.13970787823200226, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:02:51", "remaining_time": "0:01:54", "throughput": 2481.48, "total_tokens": 425328}
{"current_steps": 685, "total_steps": 1140, "loss": 0.001, "lr": 0.00041317591116653486, "epoch": 12.017543859649123, "percentage": 60.09, "elapsed_time": "0:02:52", "remaining_time": "0:01:54", "throughput": 2470.77, "total_tokens": 426288}
{"current_steps": 690, "total_steps": 1140, "loss": 0.0019, "lr": 0.00040564772402947784, "epoch": 12.105263157894736, "percentage": 60.53, "elapsed_time": "0:02:53", "remaining_time": "0:01:53", "throughput": 2472.14, "total_tokens": 429136}
{"current_steps": 695, "total_steps": 1140, "loss": 0.0006, "lr": 0.00039814165199357807, "epoch": 12.192982456140351, "percentage": 60.96, "elapsed_time": "0:02:54", "remaining_time": "0:01:51", "throughput": 2474.43, "total_tokens": 432272}
{"current_steps": 700, "total_steps": 1140, "loss": 0.0016, "lr": 0.00039065945439681213, "epoch": 12.280701754385966, "percentage": 61.4, "elapsed_time": "0:02:55", "remaining_time": "0:01:50", "throughput": 2477.63, "total_tokens": 435760}
{"current_steps": 705, "total_steps": 1140, "loss": 0.0008, "lr": 0.0003832028849812607, "epoch": 12.368421052631579, "percentage": 61.84, "elapsed_time": "0:02:57", "remaining_time": "0:01:49", "throughput": 2481.39, "total_tokens": 439312}
{"current_steps": 710, "total_steps": 1140, "loss": 0.0023, "lr": 0.00037577369148204934, "epoch": 12.456140350877194, "percentage": 62.28, "elapsed_time": "0:02:58", "remaining_time": "0:01:47", "throughput": 2484.17, "total_tokens": 442640}
{"current_steps": 715, "total_steps": 1140, "loss": 0.0014, "lr": 0.00036837361521770053, "epoch": 12.543859649122806, "percentage": 62.72, "elapsed_time": "0:02:59", "remaining_time": "0:01:46", "throughput": 2485.98, "total_tokens": 445648}
{"current_steps": 720, "total_steps": 1140, "loss": 0.0011, "lr": 0.00036100439068198676, "epoch": 12.631578947368421, "percentage": 63.16, "elapsed_time": "0:03:00", "remaining_time": "0:01:45", "throughput": 2485.97, "total_tokens": 448208}
{"current_steps": 725, "total_steps": 1140, "loss": 0.0009, "lr": 0.00035366774513738707, "epoch": 12.719298245614034, "percentage": 63.6, "elapsed_time": "0:03:01", "remaining_time": "0:01:43", "throughput": 2488.85, "total_tokens": 451600}
{"current_steps": 730, "total_steps": 1140, "loss": 0.0005, "lr": 0.0003463653982102347, "epoch": 12.807017543859649, "percentage": 64.04, "elapsed_time": "0:03:02", "remaining_time": "0:01:42", "throughput": 2491.88, "total_tokens": 454992}
{"current_steps": 735, "total_steps": 1140, "loss": 0.0003, "lr": 0.00033909906148765724, "epoch": 12.894736842105264, "percentage": 64.47, "elapsed_time": "0:03:03", "remaining_time": "0:01:41", "throughput": 2493.34, "total_tokens": 458032}
{"current_steps": 740, "total_steps": 1140, "loss": 0.0005, "lr": 0.00033187043811639863, "epoch": 12.982456140350877, "percentage": 64.91, "elapsed_time": "0:03:04", "remaining_time": "0:01:39", "throughput": 2495.11, "total_tokens": 461104}
{"current_steps": 741, "total_steps": 1140, "eval_loss": 0.11178340017795563, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:05", "remaining_time": "0:01:40", "throughput": 2482.62, "total_tokens": 461216}
{"current_steps": 745, "total_steps": 1140, "loss": 0.0002, "lr": 0.00032468122240362287, "epoch": 13.070175438596491, "percentage": 65.35, "elapsed_time": "0:03:07", "remaining_time": "0:01:39", "throughput": 2472.4, "total_tokens": 463904}
{"current_steps": 750, "total_steps": 1140, "loss": 0.0003, "lr": 0.00031753309941978615, "epoch": 13.157894736842104, "percentage": 65.79, "elapsed_time": "0:03:08", "remaining_time": "0:01:38", "throughput": 2475.35, "total_tokens": 467328}
{"current_steps": 755, "total_steps": 1140, "loss": 0.0004, "lr": 0.0003104277446036764, "epoch": 13.24561403508772, "percentage": 66.23, "elapsed_time": "0:03:09", "remaining_time": "0:01:36", "throughput": 2475.24, "total_tokens": 469792}
{"current_steps": 760, "total_steps": 1140, "loss": 0.0003, "lr": 0.00030336682336970847, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:10", "remaining_time": "0:01:35", "throughput": 2477.09, "total_tokens": 472896}
{"current_steps": 765, "total_steps": 1140, "loss": 0.0006, "lr": 0.0002963519907175713, "epoch": 13.421052631578947, "percentage": 67.11, "elapsed_time": "0:03:11", "remaining_time": "0:01:34", "throughput": 2478.76, "total_tokens": 475904}
{"current_steps": 770, "total_steps": 1140, "loss": 0.0007, "lr": 0.00028938489084431363, "epoch": 13.508771929824562, "percentage": 67.54, "elapsed_time": "0:03:13", "remaining_time": "0:01:32", "throughput": 2480.32, "total_tokens": 478848}
{"current_steps": 775, "total_steps": 1140, "loss": 0.0003, "lr": 0.0002824671567589635, "epoch": 13.596491228070175, "percentage": 67.98, "elapsed_time": "0:03:14", "remaining_time": "0:01:31", "throughput": 2482.64, "total_tokens": 482080}
{"current_steps": 780, "total_steps": 1140, "loss": 0.0003, "lr": 0.00027560040989976894, "epoch": 13.68421052631579, "percentage": 68.42, "elapsed_time": "0:03:15", "remaining_time": "0:01:30", "throughput": 2485.21, "total_tokens": 485440}
{"current_steps": 785, "total_steps": 1140, "loss": 0.0004, "lr": 0.0002687862597541523, "epoch": 13.771929824561404, "percentage": 68.86, "elapsed_time": "0:03:16", "remaining_time": "0:01:28", "throughput": 2487.51, "total_tokens": 488640}
{"current_steps": 790, "total_steps": 1140, "loss": 0.0003, "lr": 0.0002620263034814632, "epoch": 13.859649122807017, "percentage": 69.3, "elapsed_time": "0:03:17", "remaining_time": "0:01:27", "throughput": 2490.14, "total_tokens": 492032}
{"current_steps": 795, "total_steps": 1140, "loss": 0.0003, "lr": 0.00025532212553862446, "epoch": 13.947368421052632, "percentage": 69.74, "elapsed_time": "0:03:18", "remaining_time": "0:01:26", "throughput": 2492.48, "total_tokens": 495264}
{"current_steps": 798, "total_steps": 1140, "eval_loss": 0.11026651412248611, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:20", "remaining_time": "0:01:25", "throughput": 2481.99, "total_tokens": 496704}
{"current_steps": 800, "total_steps": 1140, "loss": 0.0002, "lr": 0.000248675297308751, "epoch": 14.035087719298245, "percentage": 70.18, "elapsed_time": "0:03:21", "remaining_time": "0:01:25", "throughput": 2473.92, "total_tokens": 498464}
{"current_steps": 805, "total_steps": 1140, "loss": 0.0002, "lr": 0.00024208737673283814, "epoch": 14.12280701754386, "percentage": 70.61, "elapsed_time": "0:03:22", "remaining_time": "0:01:24", "throughput": 2475.66, "total_tokens": 501632}
{"current_steps": 810, "total_steps": 1140, "loss": 0.0003, "lr": 0.00023555990794459542, "epoch": 14.210526315789474, "percentage": 71.05, "elapsed_time": "0:03:23", "remaining_time": "0:01:22", "throughput": 2476.88, "total_tokens": 504544}
{"current_steps": 815, "total_steps": 1140, "loss": 0.0002, "lr": 0.00022909442090852144, "epoch": 14.298245614035087, "percentage": 71.49, "elapsed_time": "0:03:24", "remaining_time": "0:01:21", "throughput": 2478.04, "total_tokens": 507456}
{"current_steps": 820, "total_steps": 1140, "loss": 0.0003, "lr": 0.0002226924310612956, "epoch": 14.385964912280702, "percentage": 71.93, "elapsed_time": "0:03:25", "remaining_time": "0:01:20", "throughput": 2481.49, "total_tokens": 511136}
{"current_steps": 825, "total_steps": 1140, "loss": 0.0003, "lr": 0.00021635543895657866, "epoch": 14.473684210526315, "percentage": 72.37, "elapsed_time": "0:03:27", "remaining_time": "0:01:19", "throughput": 2483.52, "total_tokens": 514368}
{"current_steps": 830, "total_steps": 1140, "loss": 0.0004, "lr": 0.00021008492991329863, "epoch": 14.56140350877193, "percentage": 72.81, "elapsed_time": "0:03:28", "remaining_time": "0:01:17", "throughput": 2484.7, "total_tokens": 517248}
{"current_steps": 835, "total_steps": 1140, "loss": 0.0005, "lr": 0.00020388237366751006, "epoch": 14.649122807017545, "percentage": 73.25, "elapsed_time": "0:03:29", "remaining_time": "0:01:16", "throughput": 2485.7, "total_tokens": 520160}
{"current_steps": 840, "total_steps": 1140, "loss": 0.0003, "lr": 0.0001977492240279035, "epoch": 14.736842105263158, "percentage": 73.68, "elapsed_time": "0:03:30", "remaining_time": "0:01:15", "throughput": 2487.81, "total_tokens": 523424}
{"current_steps": 845, "total_steps": 1140, "loss": 0.0005, "lr": 0.0001916869185350505, "epoch": 14.824561403508772, "percentage": 74.12, "elapsed_time": "0:03:31", "remaining_time": "0:01:13", "throughput": 2488.71, "total_tokens": 526240}
{"current_steps": 850, "total_steps": 1140, "loss": 0.0003, "lr": 0.00018569687812445895, "epoch": 14.912280701754385, "percentage": 74.56, "elapsed_time": "0:03:32", "remaining_time": "0:01:12", "throughput": 2490.6, "total_tokens": 529440}
{"current_steps": 855, "total_steps": 1140, "loss": 0.0002, "lr": 0.00017978050679352359, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:33", "remaining_time": "0:01:11", "throughput": 2492.52, "total_tokens": 532504}
{"current_steps": 855, "total_steps": 1140, "eval_loss": 0.1186794564127922, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:34", "remaining_time": "0:01:11", "throughput": 2482.64, "total_tokens": 532504}
{"current_steps": 860, "total_steps": 1140, "loss": 0.0002, "lr": 0.00017393919127244346, "epoch": 15.087719298245615, "percentage": 75.44, "elapsed_time": "0:03:36", "remaining_time": "0:01:10", "throughput": 2473.91, "total_tokens": 535544}
{"current_steps": 865, "total_steps": 1140, "loss": 0.0003, "lr": 0.00016817430069918936, "epoch": 15.175438596491228, "percentage": 75.88, "elapsed_time": "0:03:37", "remaining_time": "0:01:09", "throughput": 2475.91, "total_tokens": 538744}
{"current_steps": 870, "total_steps": 1140, "loss": 0.0002, "lr": 0.00016248718629859244, "epoch": 15.263157894736842, "percentage": 76.32, "elapsed_time": "0:03:38", "remaining_time": "0:01:07", "throughput": 2478.2, "total_tokens": 542040}
{"current_steps": 875, "total_steps": 1140, "loss": 0.0002, "lr": 0.00015687918106563326, "epoch": 15.350877192982455, "percentage": 76.75, "elapsed_time": "0:03:39", "remaining_time": "0:01:06", "throughput": 2479.26, "total_tokens": 544888}
{"current_steps": 880, "total_steps": 1140, "loss": 0.0004, "lr": 0.0001513515994530023, "epoch": 15.43859649122807, "percentage": 77.19, "elapsed_time": "0:03:40", "remaining_time": "0:01:05", "throughput": 2479.83, "total_tokens": 547640}
{"current_steps": 885, "total_steps": 1140, "loss": 0.0002, "lr": 0.00014590573706300782, "epoch": 15.526315789473685, "percentage": 77.63, "elapsed_time": "0:03:42", "remaining_time": "0:01:03", "throughput": 2482.82, "total_tokens": 551224}
{"current_steps": 890, "total_steps": 1140, "loss": 0.0004, "lr": 0.00014054287034390045, "epoch": 15.614035087719298, "percentage": 78.07, "elapsed_time": "0:03:43", "remaining_time": "0:01:02", "throughput": 2484.21, "total_tokens": 554264}
{"current_steps": 895, "total_steps": 1140, "loss": 0.0002, "lr": 0.00013526425629068966, "epoch": 15.701754385964913, "percentage": 78.51, "elapsed_time": "0:03:44", "remaining_time": "0:01:01", "throughput": 2487.27, "total_tokens": 557880}
{"current_steps": 900, "total_steps": 1140, "loss": 0.0003, "lr": 0.00013007113215051673, "epoch": 15.789473684210526, "percentage": 78.95, "elapsed_time": "0:03:45", "remaining_time": "0:01:00", "throughput": 2487.24, "total_tokens": 560376}
{"current_steps": 905, "total_steps": 1140, "loss": 0.0002, "lr": 0.00012496471513265967, "epoch": 15.87719298245614, "percentage": 79.39, "elapsed_time": "0:03:46", "remaining_time": "0:00:58", "throughput": 2490.0, "total_tokens": 563864}
{"current_steps": 910, "total_steps": 1140, "loss": 0.0003, "lr": 0.00011994620212323176, "epoch": 15.964912280701755, "percentage": 79.82, "elapsed_time": "0:03:47", "remaining_time": "0:00:57", "throughput": 2492.68, "total_tokens": 567352}
{"current_steps": 912, "total_steps": 1140, "eval_loss": 0.11453289538621902, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:03:48", "remaining_time": "0:00:57", "throughput": 2482.46, "total_tokens": 567952}
{"current_steps": 915, "total_steps": 1140, "loss": 0.0004, "lr": 0.00011501676940464645, "epoch": 16.05263157894737, "percentage": 80.26, "elapsed_time": "0:03:50", "remaining_time": "0:00:56", "throughput": 2473.65, "total_tokens": 570448}
{"current_steps": 920, "total_steps": 1140, "loss": 0.0002, "lr": 0.00011017757237990877, "epoch": 16.140350877192983, "percentage": 80.7, "elapsed_time": "0:03:51", "remaining_time": "0:00:55", "throughput": 2475.93, "total_tokens": 573808}
{"current_steps": 925, "total_steps": 1140, "loss": 0.0002, "lr": 0.00010542974530180327, "epoch": 16.228070175438596, "percentage": 81.14, "elapsed_time": "0:03:52", "remaining_time": "0:00:54", "throughput": 2478.21, "total_tokens": 577200}
{"current_steps": 930, "total_steps": 1140, "loss": 0.0004, "lr": 0.00010077440100703683, "epoch": 16.31578947368421, "percentage": 81.58, "elapsed_time": "0:03:54", "remaining_time": "0:00:52", "throughput": 2479.45, "total_tokens": 580240}
{"current_steps": 935, "total_steps": 1140, "loss": 0.0003, "lr": 9.621263065540364e-05, "epoch": 16.403508771929825, "percentage": 82.02, "elapsed_time": "0:03:55", "remaining_time": "0:00:51", "throughput": 2480.49, "total_tokens": 583120}
{"current_steps": 940, "total_steps": 1140, "loss": 0.0002, "lr": 9.174550347402855e-05, "epoch": 16.49122807017544, "percentage": 82.46, "elapsed_time": "0:03:56", "remaining_time": "0:00:50", "throughput": 2481.53, "total_tokens": 586032}
{"current_steps": 945, "total_steps": 1140, "loss": 0.0003, "lr": 8.737406650675333e-05, "epoch": 16.57894736842105, "percentage": 82.89, "elapsed_time": "0:03:57", "remaining_time": "0:00:48", "throughput": 2483.77, "total_tokens": 589392}
{"current_steps": 950, "total_steps": 1140, "loss": 0.0003, "lr": 8.309934436872074e-05, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:03:58", "remaining_time": "0:00:47", "throughput": 2485.66, "total_tokens": 592624}
{"current_steps": 955, "total_steps": 1140, "loss": 0.0002, "lr": 7.89223390062172e-05, "epoch": 16.75438596491228, "percentage": 83.77, "elapsed_time": "0:03:59", "remaining_time": "0:00:46", "throughput": 2486.44, "total_tokens": 595440}
{"current_steps": 960, "total_steps": 1140, "loss": 0.0003, "lr": 7.4844029461827e-05, "epoch": 16.842105263157894, "percentage": 84.21, "elapsed_time": "0:04:00", "remaining_time": "0:00:45", "throughput": 2488.18, "total_tokens": 598640}
{"current_steps": 965, "total_steps": 1140, "loss": 0.0002, "lr": 7.086537164495688e-05, "epoch": 16.92982456140351, "percentage": 84.65, "elapsed_time": "0:04:01", "remaining_time": "0:00:43", "throughput": 2489.15, "total_tokens": 601584}
{"current_steps": 969, "total_steps": 1140, "eval_loss": 0.10751347243785858, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:03", "remaining_time": "0:00:42", "throughput": 2481.21, "total_tokens": 603760}
{"current_steps": 970, "total_steps": 1140, "loss": 0.0002, "lr": 6.698729810778065e-05, "epoch": 17.017543859649123, "percentage": 85.09, "elapsed_time": "0:04:04", "remaining_time": "0:00:42", "throughput": 2473.2, "total_tokens": 604464}
{"current_steps": 975, "total_steps": 1140, "loss": 0.0002, "lr": 6.321071782666077e-05, "epoch": 17.105263157894736, "percentage": 85.53, "elapsed_time": "0:04:05", "remaining_time": "0:00:41", "throughput": 2474.6, "total_tokens": 607600}
{"current_steps": 980, "total_steps": 1140, "loss": 0.0002, "lr": 5.953651598909332e-05, "epoch": 17.19298245614035, "percentage": 85.96, "elapsed_time": "0:04:06", "remaining_time": "0:00:40", "throughput": 2477.29, "total_tokens": 611152}
{"current_steps": 985, "total_steps": 1140, "loss": 0.0002, "lr": 5.596555378623125e-05, "epoch": 17.280701754385966, "percentage": 86.4, "elapsed_time": "0:04:07", "remaining_time": "0:00:39", "throughput": 2479.91, "total_tokens": 614768}
{"current_steps": 990, "total_steps": 1140, "loss": 0.0001, "lr": 5.2498668211030166e-05, "epoch": 17.36842105263158, "percentage": 86.84, "elapsed_time": "0:04:09", "remaining_time": "0:00:37", "throughput": 2482.54, "total_tokens": 618384}
{"current_steps": 995, "total_steps": 1140, "loss": 0.0003, "lr": 4.913667186206722e-05, "epoch": 17.45614035087719, "percentage": 87.28, "elapsed_time": "0:04:10", "remaining_time": "0:00:36", "throughput": 2483.81, "total_tokens": 621424}
{"current_steps": 1000, "total_steps": 1140, "loss": 0.0004, "lr": 4.588035275307689e-05, "epoch": 17.54385964912281, "percentage": 87.72, "elapsed_time": "0:04:11", "remaining_time": "0:00:35", "throughput": 2485.16, "total_tokens": 624464}
{"current_steps": 1005, "total_steps": 1140, "loss": 0.0003, "lr": 4.273047412824954e-05, "epoch": 17.63157894736842, "percentage": 88.16, "elapsed_time": "0:04:12", "remaining_time": "0:00:33", "throughput": 2487.22, "total_tokens": 627824}
{"current_steps": 1010, "total_steps": 1140, "loss": 0.0002, "lr": 3.9687774283335975e-05, "epoch": 17.719298245614034, "percentage": 88.6, "elapsed_time": "0:04:13", "remaining_time": "0:00:32", "throughput": 2488.95, "total_tokens": 631056}
{"current_steps": 1015, "total_steps": 1140, "loss": 0.0002, "lr": 3.675296639259912e-05, "epoch": 17.80701754385965, "percentage": 89.04, "elapsed_time": "0:04:14", "remaining_time": "0:00:31", "throughput": 2490.41, "total_tokens": 634128}
{"current_steps": 1020, "total_steps": 1140, "loss": 0.0002, "lr": 3.392673834165388e-05, "epoch": 17.894736842105264, "percentage": 89.47, "elapsed_time": "0:04:15", "remaining_time": "0:00:30", "throughput": 2490.38, "total_tokens": 636656}
{"current_steps": 1025, "total_steps": 1140, "loss": 0.0004, "lr": 3.120975256623465e-05, "epoch": 17.982456140350877, "percentage": 89.91, "elapsed_time": "0:04:16", "remaining_time": "0:00:28", "throughput": 2491.71, "total_tokens": 639696}
{"current_steps": 1026, "total_steps": 1140, "eval_loss": 0.11331921070814133, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:17", "remaining_time": "0:00:28", "throughput": 2482.64, "total_tokens": 639784}
{"current_steps": 1030, "total_steps": 1140, "loss": 0.0002, "lr": 2.8602645896928293e-05, "epoch": 18.07017543859649, "percentage": 90.35, "elapsed_time": "0:04:19", "remaining_time": "0:00:27", "throughput": 2475.58, "total_tokens": 642280}
{"current_steps": 1035, "total_steps": 1140, "loss": 0.0002, "lr": 2.610602940990797e-05, "epoch": 18.157894736842106, "percentage": 90.79, "elapsed_time": "0:04:20", "remaining_time": "0:00:26", "throughput": 2476.43, "total_tokens": 645128}
{"current_steps": 1040, "total_steps": 1140, "loss": 0.0002, "lr": 2.3720488283703547e-05, "epoch": 18.24561403508772, "percentage": 91.23, "elapsed_time": "0:04:21", "remaining_time": "0:00:25", "throughput": 2476.24, "total_tokens": 647528}
{"current_steps": 1045, "total_steps": 1140, "loss": 0.0003, "lr": 2.144658166204294e-05, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:04:22", "remaining_time": "0:00:23", "throughput": 2479.72, "total_tokens": 651560}
{"current_steps": 1050, "total_steps": 1140, "loss": 0.0003, "lr": 1.9284842522794942e-05, "epoch": 18.42105263157895, "percentage": 92.11, "elapsed_time": "0:04:23", "remaining_time": "0:00:22", "throughput": 2480.86, "total_tokens": 654568}
{"current_steps": 1055, "total_steps": 1140, "loss": 0.0002, "lr": 1.7235777553045283e-05, "epoch": 18.50877192982456, "percentage": 92.54, "elapsed_time": "0:04:24", "remaining_time": "0:00:21", "throughput": 2481.55, "total_tokens": 657448}
{"current_steps": 1060, "total_steps": 1140, "loss": 0.0003, "lr": 1.5299867030334813e-05, "epoch": 18.596491228070175, "percentage": 92.98, "elapsed_time": "0:04:26", "remaining_time": "0:00:20", "throughput": 2482.67, "total_tokens": 660392}
{"current_steps": 1065, "total_steps": 1140, "loss": 0.0002, "lr": 1.3477564710088097e-05, "epoch": 18.68421052631579, "percentage": 93.42, "elapsed_time": "0:04:27", "remaining_time": "0:00:18", "throughput": 2484.35, "total_tokens": 663656}
{"current_steps": 1070, "total_steps": 1140, "loss": 0.0002, "lr": 1.1769297719258221e-05, "epoch": 18.771929824561404, "percentage": 93.86, "elapsed_time": "0:04:28", "remaining_time": "0:00:17", "throughput": 2486.7, "total_tokens": 667208}
{"current_steps": 1075, "total_steps": 1140, "loss": 0.0002, "lr": 1.0175466456213034e-05, "epoch": 18.859649122807017, "percentage": 94.3, "elapsed_time": "0:04:29", "remaining_time": "0:00:16", "throughput": 2488.6, "total_tokens": 670568}
{"current_steps": 1080, "total_steps": 1140, "loss": 0.0002, "lr": 8.696444496886502e-06, "epoch": 18.94736842105263, "percentage": 94.74, "elapsed_time": "0:04:30", "remaining_time": "0:00:15", "throughput": 2490.78, "total_tokens": 674024}
{"current_steps": 1083, "total_steps": 1140, "eval_loss": 0.11408393085002899, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:32", "remaining_time": "0:00:14", "throughput": 2483.79, "total_tokens": 675800}
{"current_steps": 1085, "total_steps": 1140, "loss": 0.0003, "lr": 7.332578507216469e-06, "epoch": 19.035087719298247, "percentage": 95.18, "elapsed_time": "0:04:33", "remaining_time": "0:00:13", "throughput": 2476.18, "total_tokens": 676920}
{"current_steps": 1090, "total_steps": 1140, "loss": 0.0003, "lr": 6.084188161890325e-06, "epoch": 19.12280701754386, "percentage": 95.61, "elapsed_time": "0:04:34", "remaining_time": "0:00:12", "throughput": 2476.8, "total_tokens": 679896}
{"current_steps": 1095, "total_steps": 1140, "loss": 0.0002, "lr": 4.95156606941688e-06, "epoch": 19.210526315789473, "percentage": 96.05, "elapsed_time": "0:04:35", "remaining_time": "0:00:11", "throughput": 2478.68, "total_tokens": 683288}
{"current_steps": 1100, "total_steps": 1140, "loss": 0.0003, "lr": 3.9349777035421194e-06, "epoch": 19.29824561403509, "percentage": 96.49, "elapsed_time": "0:04:36", "remaining_time": "0:00:10", "throughput": 2479.57, "total_tokens": 686168}
{"current_steps": 1105, "total_steps": 1140, "loss": 0.0001, "lr": 3.034661341025258e-06, "epoch": 19.385964912280702, "percentage": 96.93, "elapsed_time": "0:04:37", "remaining_time": "0:00:08", "throughput": 2480.32, "total_tokens": 689016}
{"current_steps": 1110, "total_steps": 1140, "loss": 0.0003, "lr": 2.250828005789518e-06, "epoch": 19.473684210526315, "percentage": 97.37, "elapsed_time": "0:04:38", "remaining_time": "0:00:07", "throughput": 2482.23, "total_tokens": 692376}
{"current_steps": 1115, "total_steps": 1140, "loss": 0.0003, "lr": 1.5836614194602028e-06, "epoch": 19.56140350877193, "percentage": 97.81, "elapsed_time": "0:04:40", "remaining_time": "0:00:06", "throughput": 2483.59, "total_tokens": 695480}
{"current_steps": 1120, "total_steps": 1140, "loss": 0.0002, "lr": 1.033317958302693e-06, "epoch": 19.649122807017545, "percentage": 98.25, "elapsed_time": "0:04:41", "remaining_time": "0:00:05", "throughput": 2484.61, "total_tokens": 698424}
{"current_steps": 1125, "total_steps": 1140, "loss": 0.0002, "lr": 5.999266165694906e-07, "epoch": 19.736842105263158, "percentage": 98.68, "elapsed_time": "0:04:42", "remaining_time": "0:00:03", "throughput": 2486.02, "total_tokens": 701592}
{"current_steps": 1130, "total_steps": 1140, "loss": 0.0002, "lr": 2.8358897626556966e-07, "epoch": 19.82456140350877, "percentage": 99.12, "elapsed_time": "0:04:43", "remaining_time": "0:00:02", "throughput": 2486.92, "total_tokens": 704504}
{"current_steps": 1135, "total_steps": 1140, "loss": 0.0004, "lr": 8.437918333864537e-08, "epoch": 19.912280701754387, "percentage": 99.56, "elapsed_time": "0:04:44", "remaining_time": "0:00:01", "throughput": 2488.92, "total_tokens": 707992}
{"current_steps": 1140, "total_steps": 1140, "loss": 0.0002, "lr": 2.343930299963937e-09, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:45", "remaining_time": "0:00:00", "throughput": 2490.38, "total_tokens": 711112}
{"current_steps": 1140, "total_steps": 1140, "eval_loss": 0.11126314103603363, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:46", "remaining_time": "0:00:00", "throughput": 2482.95, "total_tokens": 711112}
{"current_steps": 1140, "total_steps": 1140, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:47", "remaining_time": "0:00:00", "throughput": 2475.79, "total_tokens": 711112}