| {"current_steps": 5, "total_steps": 1140, "loss": 0.4708, "lr": 0.0010526315789473684, "epoch": 0.08771929824561403, "percentage": 0.44, "elapsed_time": "0:00:01", "remaining_time": "0:05:59", "throughput": 2305.31, "total_tokens": 3648} |
| {"current_steps": 10, "total_steps": 1140, "loss": 0.7899, "lr": 0.0023684210526315787, "epoch": 0.17543859649122806, "percentage": 0.88, "elapsed_time": "0:00:02", "remaining_time": "0:05:04", "throughput": 2542.52, "total_tokens": 6848} |
| {"current_steps": 15, "total_steps": 1140, "loss": 3.7407, "lr": 0.003684210526315789, "epoch": 0.2631578947368421, "percentage": 1.32, "elapsed_time": "0:00:03", "remaining_time": "0:04:50", "throughput": 2738.75, "total_tokens": 10592} |
| {"current_steps": 20, "total_steps": 1140, "loss": 6.4113, "lr": 0.004999999999999999, "epoch": 0.3508771929824561, "percentage": 1.75, "elapsed_time": "0:00:04", "remaining_time": "0:04:39", "throughput": 2780.15, "total_tokens": 13856} |
| {"current_steps": 25, "total_steps": 1140, "loss": 4.8737, "lr": 0.0063157894736842095, "epoch": 0.43859649122807015, "percentage": 2.19, "elapsed_time": "0:00:06", "remaining_time": "0:04:37", "throughput": 2858.82, "total_tokens": 17760} |
| {"current_steps": 30, "total_steps": 1140, "loss": 1.8232, "lr": 0.0076315789473684215, "epoch": 0.5263157894736842, "percentage": 2.63, "elapsed_time": "0:00:07", "remaining_time": "0:04:29", "throughput": 2846.52, "total_tokens": 20704} |
| {"current_steps": 35, "total_steps": 1140, "loss": 1.3582, "lr": 0.008947368421052631, "epoch": 0.6140350877192983, "percentage": 3.07, "elapsed_time": "0:00:08", "remaining_time": "0:04:21", "throughput": 2819.83, "total_tokens": 23392} |
| {"current_steps": 40, "total_steps": 1140, "loss": 0.9052, "lr": 0.010263157894736842, "epoch": 0.7017543859649122, "percentage": 3.51, "elapsed_time": "0:00:09", "remaining_time": "0:04:16", "throughput": 2804.85, "total_tokens": 26176} |
| {"current_steps": 45, "total_steps": 1140, "loss": 0.9899, "lr": 0.011578947368421052, "epoch": 0.7894736842105263, "percentage": 3.95, "elapsed_time": "0:00:10", "remaining_time": "0:04:14", "throughput": 2822.3, "total_tokens": 29536} |
| {"current_steps": 50, "total_steps": 1140, "loss": 0.9198, "lr": 0.012894736842105263, "epoch": 0.8771929824561403, "percentage": 4.39, "elapsed_time": "0:00:11", "remaining_time": "0:04:11", "throughput": 2829.35, "total_tokens": 32704} |
| {"current_steps": 55, "total_steps": 1140, "loss": 0.4459, "lr": 0.014210526315789472, "epoch": 0.9649122807017544, "percentage": 4.82, "elapsed_time": "0:00:12", "remaining_time": "0:04:08", "throughput": 2812.76, "total_tokens": 35392} |
| {"current_steps": 57, "total_steps": 1140, "eval_loss": 0.285372257232666, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:13", "remaining_time": "0:04:19", "throughput": 2641.61, "total_tokens": 36112} |
| {"current_steps": 60, "total_steps": 1140, "loss": 0.3165, "lr": 0.015526315789473685, "epoch": 1.0526315789473684, "percentage": 5.26, "elapsed_time": "0:00:15", "remaining_time": "0:04:32", "throughput": 2520.66, "total_tokens": 38192} |
| {"current_steps": 65, "total_steps": 1140, "loss": 0.2346, "lr": 0.016842105263157894, "epoch": 1.1403508771929824, "percentage": 5.7, "elapsed_time": "0:00:16", "remaining_time": "0:04:29", "throughput": 2545.96, "total_tokens": 41424} |
| {"current_steps": 70, "total_steps": 1140, "loss": 0.4517, "lr": 0.018157894736842106, "epoch": 1.2280701754385965, "percentage": 6.14, "elapsed_time": "0:00:17", "remaining_time": "0:04:25", "throughput": 2565.39, "total_tokens": 44496} |
| {"current_steps": 75, "total_steps": 1140, "loss": 1.1483, "lr": 0.019473684210526317, "epoch": 1.3157894736842106, "percentage": 6.58, "elapsed_time": "0:00:18", "remaining_time": "0:04:21", "throughput": 2575.78, "total_tokens": 47408} |
| {"current_steps": 80, "total_steps": 1140, "loss": 0.7027, "lr": 0.020789473684210528, "epoch": 1.4035087719298245, "percentage": 7.02, "elapsed_time": "0:00:19", "remaining_time": "0:04:17", "throughput": 2582.01, "total_tokens": 50256} |
| {"current_steps": 85, "total_steps": 1140, "loss": 0.262, "lr": 0.022105263157894735, "epoch": 1.4912280701754386, "percentage": 7.46, "elapsed_time": "0:00:20", "remaining_time": "0:04:14", "throughput": 2579.18, "total_tokens": 52816} |
| {"current_steps": 90, "total_steps": 1140, "loss": 0.5782, "lr": 0.023421052631578947, "epoch": 1.5789473684210527, "percentage": 7.89, "elapsed_time": "0:00:21", "remaining_time": "0:04:11", "throughput": 2591.22, "total_tokens": 55856} |
| {"current_steps": 95, "total_steps": 1140, "loss": 0.2276, "lr": 0.024736842105263158, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:22", "remaining_time": "0:04:10", "throughput": 2627.87, "total_tokens": 59920} |
| {"current_steps": 100, "total_steps": 1140, "loss": 0.2737, "lr": 0.02605263157894737, "epoch": 1.7543859649122808, "percentage": 8.77, "elapsed_time": "0:00:23", "remaining_time": "0:04:07", "throughput": 2625.7, "total_tokens": 62512} |
| {"current_steps": 105, "total_steps": 1140, "loss": 0.4566, "lr": 0.027368421052631577, "epoch": 1.8421052631578947, "percentage": 9.21, "elapsed_time": "0:00:25", "remaining_time": "0:04:06", "throughput": 2651.08, "total_tokens": 66288} |
| {"current_steps": 110, "total_steps": 1140, "loss": 0.4375, "lr": 0.028684210526315788, "epoch": 1.9298245614035088, "percentage": 9.65, "elapsed_time": "0:00:26", "remaining_time": "0:04:04", "throughput": 2659.05, "total_tokens": 69328} |
| {"current_steps": 114, "total_steps": 1140, "eval_loss": 0.22248466312885284, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:27", "remaining_time": "0:04:08", "throughput": 2587.06, "total_tokens": 71552} |
| {"current_steps": 115, "total_steps": 1140, "loss": 0.3079, "lr": 0.03, "epoch": 2.017543859649123, "percentage": 10.09, "elapsed_time": "0:00:28", "remaining_time": "0:04:13", "throughput": 2529.4, "total_tokens": 71968} |
| {"current_steps": 120, "total_steps": 1140, "loss": 0.256, "lr": 0.02999824208523885, "epoch": 2.1052631578947367, "percentage": 10.53, "elapsed_time": "0:00:29", "remaining_time": "0:04:11", "throughput": 2539.72, "total_tokens": 75040} |
| {"current_steps": 125, "total_steps": 1140, "loss": 0.3571, "lr": 0.029992968752990647, "epoch": 2.192982456140351, "percentage": 10.96, "elapsed_time": "0:00:30", "remaining_time": "0:04:09", "throughput": 2557.81, "total_tokens": 78624} |
| {"current_steps": 130, "total_steps": 1140, "loss": 0.2677, "lr": 0.02998418123926453, "epoch": 2.280701754385965, "percentage": 11.4, "elapsed_time": "0:00:31", "remaining_time": "0:04:07", "throughput": 2570.55, "total_tokens": 81824} |
| {"current_steps": 135, "total_steps": 1140, "loss": 0.213, "lr": 0.029971881603753848, "epoch": 2.3684210526315788, "percentage": 11.84, "elapsed_time": "0:00:32", "remaining_time": "0:04:05", "throughput": 2581.34, "total_tokens": 84960} |
| {"current_steps": 140, "total_steps": 1140, "loss": 0.5717, "lr": 0.02995607272935338, "epoch": 2.456140350877193, "percentage": 12.28, "elapsed_time": "0:00:34", "remaining_time": "0:04:04", "throughput": 2606.07, "total_tokens": 89024} |
| {"current_steps": 145, "total_steps": 1140, "loss": 0.9055, "lr": 0.02993675832148361, "epoch": 2.543859649122807, "percentage": 12.72, "elapsed_time": "0:00:35", "remaining_time": "0:04:02", "throughput": 2619.22, "total_tokens": 92480} |
| {"current_steps": 150, "total_steps": 1140, "loss": 0.2402, "lr": 0.029913942907222237, "epoch": 2.6315789473684212, "percentage": 13.16, "elapsed_time": "0:00:36", "remaining_time": "0:04:00", "throughput": 2624.94, "total_tokens": 95520} |
| {"current_steps": 155, "total_steps": 1140, "loss": 0.5185, "lr": 0.029887631834243058, "epoch": 2.719298245614035, "percentage": 13.6, "elapsed_time": "0:00:37", "remaining_time": "0:03:58", "throughput": 2635.88, "total_tokens": 98976} |
| {"current_steps": 160, "total_steps": 1140, "loss": 0.4228, "lr": 0.029857831269562547, "epoch": 2.807017543859649, "percentage": 14.04, "elapsed_time": "0:00:38", "remaining_time": "0:03:56", "throughput": 2634.33, "total_tokens": 101600} |
| {"current_steps": 165, "total_steps": 1140, "loss": 0.2408, "lr": 0.029824548198094384, "epoch": 2.8947368421052633, "percentage": 14.47, "elapsed_time": "0:00:39", "remaining_time": "0:03:54", "throughput": 2637.87, "total_tokens": 104544} |
| {"current_steps": 170, "total_steps": 1140, "loss": 0.3917, "lr": 0.029787790421012244, "epoch": 2.982456140350877, "percentage": 14.91, "elapsed_time": "0:00:40", "remaining_time": "0:03:52", "throughput": 2647.36, "total_tokens": 107936} |
| {"current_steps": 171, "total_steps": 1140, "eval_loss": 0.31632205843925476, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:41", "remaining_time": "0:03:56", "throughput": 2594.6, "total_tokens": 108088} |
| {"current_steps": 175, "total_steps": 1140, "loss": 0.3128, "lr": 0.029747566553921325, "epoch": 3.0701754385964914, "percentage": 15.35, "elapsed_time": "0:00:43", "remaining_time": "0:03:59", "throughput": 2542.79, "total_tokens": 110360} |
| {"current_steps": 180, "total_steps": 1140, "loss": 0.2461, "lr": 0.029703886024838914, "epoch": 3.1578947368421053, "percentage": 15.79, "elapsed_time": "0:00:44", "remaining_time": "0:03:57", "throughput": 2550.73, "total_tokens": 113656} |
| {"current_steps": 185, "total_steps": 1140, "loss": 0.2173, "lr": 0.0296567590719846, "epoch": 3.245614035087719, "percentage": 16.23, "elapsed_time": "0:00:45", "remaining_time": "0:03:55", "throughput": 2558.62, "total_tokens": 116792} |
| {"current_steps": 190, "total_steps": 1140, "loss": 0.2442, "lr": 0.029606196741380517, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:46", "remaining_time": "0:03:53", "throughput": 2564.55, "total_tokens": 119864} |
| {"current_steps": 195, "total_steps": 1140, "loss": 0.2336, "lr": 0.029552210884262308, "epoch": 3.4210526315789473, "percentage": 17.11, "elapsed_time": "0:00:47", "remaining_time": "0:03:52", "throughput": 2577.42, "total_tokens": 123512} |
| {"current_steps": 200, "total_steps": 1140, "loss": 0.34, "lr": 0.029494814154301326, "epoch": 3.5087719298245617, "percentage": 17.54, "elapsed_time": "0:00:49", "remaining_time": "0:03:50", "throughput": 2588.84, "total_tokens": 127064} |
| {"current_steps": 205, "total_steps": 1140, "loss": 0.2974, "lr": 0.029434020004638753, "epoch": 3.5964912280701755, "percentage": 17.98, "elapsed_time": "0:00:50", "remaining_time": "0:03:48", "throughput": 2592.89, "total_tokens": 130008} |
| {"current_steps": 210, "total_steps": 1140, "loss": 0.2723, "lr": 0.029369842684732334, "epoch": 3.6842105263157894, "percentage": 18.42, "elapsed_time": "0:00:51", "remaining_time": "0:03:46", "throughput": 2594.19, "total_tokens": 132760} |
| {"current_steps": 215, "total_steps": 1140, "loss": 0.1512, "lr": 0.02930229723701646, "epoch": 3.7719298245614032, "percentage": 18.86, "elapsed_time": "0:00:52", "remaining_time": "0:03:45", "throughput": 2609.05, "total_tokens": 136728} |
| {"current_steps": 220, "total_steps": 1140, "loss": 0.195, "lr": 0.029231399493376414, "epoch": 3.8596491228070176, "percentage": 19.3, "elapsed_time": "0:00:53", "remaining_time": "0:03:43", "throughput": 2612.82, "total_tokens": 139800} |
| {"current_steps": 225, "total_steps": 1140, "loss": 0.4173, "lr": 0.02915716607143754, "epoch": 3.9473684210526314, "percentage": 19.74, "elapsed_time": "0:00:54", "remaining_time": "0:03:42", "throughput": 2620.11, "total_tokens": 143128} |
| {"current_steps": 228, "total_steps": 1140, "eval_loss": 0.19939158856868744, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:55", "remaining_time": "0:03:43", "throughput": 2585.1, "total_tokens": 144720} |
| {"current_steps": 230, "total_steps": 1140, "loss": 0.1771, "lr": 0.029079614370670265, "epoch": 4.035087719298246, "percentage": 20.18, "elapsed_time": "0:00:57", "remaining_time": "0:03:46", "throughput": 2548.63, "total_tokens": 145904} |
| {"current_steps": 235, "total_steps": 1140, "loss": 0.3489, "lr": 0.028998762568311857, "epoch": 4.12280701754386, "percentage": 20.61, "elapsed_time": "0:00:58", "remaining_time": "0:03:44", "throughput": 2559.55, "total_tokens": 149520} |
| {"current_steps": 240, "total_steps": 1140, "loss": 0.2159, "lr": 0.028914629615105897, "epoch": 4.2105263157894735, "percentage": 21.05, "elapsed_time": "0:00:59", "remaining_time": "0:03:43", "throughput": 2567.46, "total_tokens": 152912} |
| {"current_steps": 245, "total_steps": 1140, "loss": 0.2099, "lr": 0.028827235230860424, "epoch": 4.298245614035087, "percentage": 21.49, "elapsed_time": "0:01:00", "remaining_time": "0:03:41", "throughput": 2574.35, "total_tokens": 156176} |
| {"current_steps": 250, "total_steps": 1140, "loss": 0.191, "lr": 0.02873659989982586, "epoch": 4.385964912280702, "percentage": 21.93, "elapsed_time": "0:01:01", "remaining_time": "0:03:39", "throughput": 2579.8, "total_tokens": 159376} |
| {"current_steps": 255, "total_steps": 1140, "loss": 0.2055, "lr": 0.02864274486589371, "epoch": 4.473684210526316, "percentage": 22.37, "elapsed_time": "0:01:02", "remaining_time": "0:03:37", "throughput": 2581.47, "total_tokens": 162128} |
| {"current_steps": 260, "total_steps": 1140, "loss": 0.1851, "lr": 0.028545692127617244, "epoch": 4.56140350877193, "percentage": 22.81, "elapsed_time": "0:01:03", "remaining_time": "0:03:36", "throughput": 2588.12, "total_tokens": 165456} |
| {"current_steps": 265, "total_steps": 1140, "loss": 0.2107, "lr": 0.0284454644330553, "epoch": 4.649122807017544, "percentage": 23.25, "elapsed_time": "0:01:04", "remaining_time": "0:03:34", "throughput": 2590.45, "total_tokens": 168272} |
| {"current_steps": 270, "total_steps": 1140, "loss": 0.2177, "lr": 0.02834208527444037, "epoch": 4.7368421052631575, "percentage": 23.68, "elapsed_time": "0:01:06", "remaining_time": "0:03:32", "throughput": 2593.05, "total_tokens": 171216} |
| {"current_steps": 275, "total_steps": 1140, "loss": 0.2189, "lr": 0.028235578882672318, "epoch": 4.824561403508772, "percentage": 24.12, "elapsed_time": "0:01:07", "remaining_time": "0:03:31", "throughput": 2596.25, "total_tokens": 174192} |
| {"current_steps": 280, "total_steps": 1140, "loss": 0.2274, "lr": 0.028125970221638905, "epoch": 4.912280701754386, "percentage": 24.56, "elapsed_time": "0:01:08", "remaining_time": "0:03:29", "throughput": 2608.98, "total_tokens": 178320} |
| {"current_steps": 285, "total_steps": 1140, "loss": 0.1829, "lr": 0.028013284982364554, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:09", "remaining_time": "0:03:28", "throughput": 2610.93, "total_tokens": 181120} |
| {"current_steps": 285, "total_steps": 1140, "eval_loss": 0.19711753726005554, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:10", "remaining_time": "0:03:30", "throughput": 2582.45, "total_tokens": 181120} |
| {"current_steps": 290, "total_steps": 1140, "loss": 0.1876, "lr": 0.027897549576988666, "epoch": 5.087719298245614, "percentage": 25.44, "elapsed_time": "0:01:12", "remaining_time": "0:03:32", "throughput": 2544.91, "total_tokens": 184224} |
| {"current_steps": 295, "total_steps": 1140, "loss": 0.2358, "lr": 0.027778791132574907, "epoch": 5.175438596491228, "percentage": 25.88, "elapsed_time": "0:01:13", "remaining_time": "0:03:30", "throughput": 2549.01, "total_tokens": 187296} |
| {"current_steps": 300, "total_steps": 1140, "loss": 0.1797, "lr": 0.02765703748475293, "epoch": 5.2631578947368425, "percentage": 26.32, "elapsed_time": "0:01:14", "remaining_time": "0:03:29", "throughput": 2557.83, "total_tokens": 190944} |
| {"current_steps": 305, "total_steps": 1140, "loss": 0.2184, "lr": 0.027532317171194046, "epoch": 5.350877192982456, "percentage": 26.75, "elapsed_time": "0:01:15", "remaining_time": "0:03:27", "throughput": 2562.53, "total_tokens": 194080} |
| {"current_steps": 310, "total_steps": 1140, "loss": 0.1933, "lr": 0.027404659424922272, "epoch": 5.43859649122807, "percentage": 27.19, "elapsed_time": "0:01:16", "remaining_time": "0:03:25", "throughput": 2567.39, "total_tokens": 197312} |
| {"current_steps": 315, "total_steps": 1140, "loss": 0.1737, "lr": 0.027274094167462487, "epoch": 5.526315789473684, "percentage": 27.63, "elapsed_time": "0:01:18", "remaining_time": "0:03:24", "throughput": 2577.78, "total_tokens": 201280} |
| {"current_steps": 320, "total_steps": 1140, "loss": 0.2966, "lr": 0.02714065200182714, "epoch": 5.614035087719298, "percentage": 28.07, "elapsed_time": "0:01:19", "remaining_time": "0:03:22", "throughput": 2580.81, "total_tokens": 204256} |
| {"current_steps": 325, "total_steps": 1140, "loss": 0.2029, "lr": 0.02700436420534326, "epoch": 5.701754385964913, "percentage": 28.51, "elapsed_time": "0:01:20", "remaining_time": "0:03:21", "throughput": 2585.94, "total_tokens": 207488} |
| {"current_steps": 330, "total_steps": 1140, "loss": 0.1852, "lr": 0.02686526272232141, "epoch": 5.7894736842105265, "percentage": 28.95, "elapsed_time": "0:01:21", "remaining_time": "0:03:19", "throughput": 2587.55, "total_tokens": 210240} |
| {"current_steps": 335, "total_steps": 1140, "loss": 0.2156, "lr": 0.026723380156568298, "epoch": 5.87719298245614, "percentage": 29.39, "elapsed_time": "0:01:22", "remaining_time": "0:03:18", "throughput": 2594.86, "total_tokens": 213952} |
| {"current_steps": 340, "total_steps": 1140, "loss": 0.158, "lr": 0.026578749763744813, "epoch": 5.964912280701754, "percentage": 29.82, "elapsed_time": "0:01:23", "remaining_time": "0:03:16", "throughput": 2593.38, "total_tokens": 216352} |
| {"current_steps": 342, "total_steps": 1140, "eval_loss": 0.22433365881443024, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:24", "remaining_time": "0:03:17", "throughput": 2568.49, "total_tokens": 217128} |
| {"current_steps": 345, "total_steps": 1140, "loss": 0.32, "lr": 0.026431405443571282, "epoch": 6.052631578947368, "percentage": 30.26, "elapsed_time": "0:01:26", "remaining_time": "0:03:18", "throughput": 2549.45, "total_tokens": 219528} |
| {"current_steps": 350, "total_steps": 1140, "loss": 0.1625, "lr": 0.02628138173188176, "epoch": 6.140350877192983, "percentage": 30.7, "elapsed_time": "0:01:27", "remaining_time": "0:03:16", "throughput": 2552.17, "total_tokens": 222472} |
| {"current_steps": 355, "total_steps": 1140, "loss": 0.2369, "lr": 0.026128713792529224, "epoch": 6.228070175438597, "percentage": 31.14, "elapsed_time": "0:01:28", "remaining_time": "0:03:15", "throughput": 2554.79, "total_tokens": 225448} |
| {"current_steps": 360, "total_steps": 1140, "loss": 0.2825, "lr": 0.025973437409143554, "epoch": 6.315789473684211, "percentage": 31.58, "elapsed_time": "0:01:29", "remaining_time": "0:03:13", "throughput": 2558.38, "total_tokens": 228584} |
| {"current_steps": 365, "total_steps": 1140, "loss": 0.1558, "lr": 0.025815588976744273, "epoch": 6.4035087719298245, "percentage": 32.02, "elapsed_time": "0:01:30", "remaining_time": "0:03:12", "throughput": 2561.39, "total_tokens": 231624} |
| {"current_steps": 370, "total_steps": 1140, "loss": 0.1761, "lr": 0.02565520549320996, "epoch": 6.491228070175438, "percentage": 32.46, "elapsed_time": "0:01:31", "remaining_time": "0:03:10", "throughput": 2565.48, "total_tokens": 234856} |
| {"current_steps": 375, "total_steps": 1140, "loss": 0.2545, "lr": 0.02549232455060637, "epoch": 6.578947368421053, "percentage": 32.89, "elapsed_time": "0:01:32", "remaining_time": "0:03:08", "throughput": 2569.84, "total_tokens": 238024} |
| {"current_steps": 380, "total_steps": 1140, "loss": 0.1092, "lr": 0.025326984326375274, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:33", "remaining_time": "0:03:07", "throughput": 2575.83, "total_tokens": 241576} |
| {"current_steps": 385, "total_steps": 1140, "loss": 0.1499, "lr": 0.025159223574386116, "epoch": 6.754385964912281, "percentage": 33.77, "elapsed_time": "0:01:34", "remaining_time": "0:03:06", "throughput": 2579.06, "total_tokens": 244680} |
| {"current_steps": 390, "total_steps": 1140, "loss": 0.3659, "lr": 0.02498908161585253, "epoch": 6.842105263157895, "percentage": 34.21, "elapsed_time": "0:01:35", "remaining_time": "0:03:04", "throughput": 2582.73, "total_tokens": 247880} |
| {"current_steps": 395, "total_steps": 1140, "loss": 0.2394, "lr": 0.024816598330115895, "epoch": 6.9298245614035086, "percentage": 34.65, "elapsed_time": "0:01:37", "remaining_time": "0:03:03", "throughput": 2588.82, "total_tokens": 251496} |
| {"current_steps": 399, "total_steps": 1140, "eval_loss": 0.1963070183992386, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:38", "remaining_time": "0:03:03", "throughput": 2568.88, "total_tokens": 253536} |
| {"current_steps": 400, "total_steps": 1140, "loss": 0.2116, "lr": 0.024641814145298088, "epoch": 7.017543859649122, "percentage": 35.09, "elapsed_time": "0:01:39", "remaining_time": "0:03:04", "throughput": 2543.74, "total_tokens": 254016} |
| {"current_steps": 405, "total_steps": 1140, "loss": 0.1818, "lr": 0.024464770028825585, "epoch": 7.105263157894737, "percentage": 35.53, "elapsed_time": "0:01:40", "remaining_time": "0:03:03", "throughput": 2546.24, "total_tokens": 257024} |
| {"current_steps": 410, "total_steps": 1140, "loss": 0.1939, "lr": 0.024285507477827137, "epoch": 7.192982456140351, "percentage": 35.96, "elapsed_time": "0:01:42", "remaining_time": "0:03:01", "throughput": 2549.6, "total_tokens": 260096} |
| {"current_steps": 415, "total_steps": 1140, "loss": 0.1352, "lr": 0.02410406850940735, "epoch": 7.280701754385965, "percentage": 36.4, "elapsed_time": "0:01:43", "remaining_time": "0:03:00", "throughput": 2555.99, "total_tokens": 263776} |
| {"current_steps": 420, "total_steps": 1140, "loss": 0.2401, "lr": 0.02392049565079834, "epoch": 7.368421052631579, "percentage": 36.84, "elapsed_time": "0:01:44", "remaining_time": "0:02:58", "throughput": 2560.16, "total_tokens": 267008} |
| {"current_steps": 425, "total_steps": 1140, "loss": 0.2132, "lr": 0.023734831929391822, "epoch": 7.456140350877193, "percentage": 37.28, "elapsed_time": "0:01:45", "remaining_time": "0:02:57", "throughput": 2564.47, "total_tokens": 270368} |
| {"current_steps": 430, "total_steps": 1140, "loss": 0.2127, "lr": 0.02354712086265399, "epoch": 7.543859649122807, "percentage": 37.72, "elapsed_time": "0:01:46", "remaining_time": "0:02:55", "throughput": 2567.64, "total_tokens": 273568} |
| {"current_steps": 435, "total_steps": 1140, "loss": 0.2377, "lr": 0.023357406447925527, "epoch": 7.631578947368421, "percentage": 38.16, "elapsed_time": "0:01:47", "remaining_time": "0:02:54", "throughput": 2573.14, "total_tokens": 277152} |
| {"current_steps": 440, "total_steps": 1140, "loss": 0.1392, "lr": 0.023165733152109094, "epoch": 7.719298245614035, "percentage": 38.6, "elapsed_time": "0:01:48", "remaining_time": "0:02:53", "throughput": 2577.82, "total_tokens": 280640} |
| {"current_steps": 445, "total_steps": 1140, "loss": 0.2816, "lr": 0.022972145901246837, "epoch": 7.807017543859649, "percentage": 39.04, "elapsed_time": "0:01:49", "remaining_time": "0:02:51", "throughput": 2578.51, "total_tokens": 283392} |
| {"current_steps": 450, "total_steps": 1140, "loss": 0.2469, "lr": 0.022776690069990207, "epoch": 7.894736842105263, "percentage": 39.47, "elapsed_time": "0:01:50", "remaining_time": "0:02:50", "throughput": 2581.23, "total_tokens": 286496} |
| {"current_steps": 455, "total_steps": 1140, "loss": 0.2078, "lr": 0.022579411470964646, "epoch": 7.982456140350877, "percentage": 39.91, "elapsed_time": "0:01:52", "remaining_time": "0:02:48", "throughput": 2585.48, "total_tokens": 289952} |
| {"current_steps": 456, "total_steps": 1140, "eval_loss": 0.18284009397029877, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:53", "remaining_time": "0:02:49", "throughput": 2566.45, "total_tokens": 290112} |
| {"current_steps": 460, "total_steps": 1140, "loss": 0.2866, "lr": 0.022380356344031675, "epoch": 8.070175438596491, "percentage": 40.35, "elapsed_time": "0:01:54", "remaining_time": "0:02:49", "throughput": 2548.26, "total_tokens": 292512} |
| {"current_steps": 465, "total_steps": 1140, "loss": 0.3007, "lr": 0.02217957134545074, "epoch": 8.157894736842104, "percentage": 40.79, "elapsed_time": "0:01:55", "remaining_time": "0:02:48", "throughput": 2549.9, "total_tokens": 295456} |
| {"current_steps": 470, "total_steps": 1140, "loss": 0.1807, "lr": 0.02197710353694355, "epoch": 8.24561403508772, "percentage": 41.23, "elapsed_time": "0:01:57", "remaining_time": "0:02:46", "throughput": 2557.04, "total_tokens": 299424} |
| {"current_steps": 475, "total_steps": 1140, "loss": 0.1915, "lr": 0.02177300037466334, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:01:58", "remaining_time": "0:02:45", "throughput": 2560.16, "total_tokens": 302656} |
| {"current_steps": 480, "total_steps": 1140, "loss": 0.2368, "lr": 0.02156730969807168, "epoch": 8.421052631578947, "percentage": 42.11, "elapsed_time": "0:01:59", "remaining_time": "0:02:44", "throughput": 2563.63, "total_tokens": 305952} |
| {"current_steps": 485, "total_steps": 1140, "loss": 0.1138, "lr": 0.021360079718725448, "epoch": 8.508771929824562, "percentage": 42.54, "elapsed_time": "0:02:00", "remaining_time": "0:02:42", "throughput": 2567.08, "total_tokens": 309248} |
| {"current_steps": 490, "total_steps": 1140, "loss": 0.2241, "lr": 0.021151359008976602, "epoch": 8.596491228070175, "percentage": 42.98, "elapsed_time": "0:02:01", "remaining_time": "0:02:41", "throughput": 2568.79, "total_tokens": 312224} |
| {"current_steps": 495, "total_steps": 1140, "loss": 0.1382, "lr": 0.02094119649058735, "epoch": 8.68421052631579, "percentage": 43.42, "elapsed_time": "0:02:02", "remaining_time": "0:02:39", "throughput": 2570.97, "total_tokens": 315200} |
| {"current_steps": 500, "total_steps": 1140, "loss": 0.2488, "lr": 0.020729641423263476, "epoch": 8.771929824561404, "percentage": 43.86, "elapsed_time": "0:02:03", "remaining_time": "0:02:38", "throughput": 2573.36, "total_tokens": 318240} |
| {"current_steps": 505, "total_steps": 1140, "loss": 0.2099, "lr": 0.0205167433931084, "epoch": 8.859649122807017, "percentage": 44.3, "elapsed_time": "0:02:04", "remaining_time": "0:02:36", "throughput": 2576.04, "total_tokens": 321408} |
| {"current_steps": 510, "total_steps": 1140, "loss": 0.1586, "lr": 0.020302552301000754, "epoch": 8.947368421052632, "percentage": 44.74, "elapsed_time": "0:02:05", "remaining_time": "0:02:35", "throughput": 2579.62, "total_tokens": 324800} |
| {"current_steps": 513, "total_steps": 1140, "eval_loss": 0.17818011343479156, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:07", "remaining_time": "0:02:35", "throughput": 2562.0, "total_tokens": 325872} |
| {"current_steps": 515, "total_steps": 1140, "loss": 0.1178, "lr": 0.02008711835089822, "epoch": 9.035087719298245, "percentage": 45.18, "elapsed_time": "0:02:08", "remaining_time": "0:02:35", "throughput": 2547.17, "total_tokens": 327184} |
| {"current_steps": 520, "total_steps": 1140, "loss": 0.1038, "lr": 0.019870492038070255, "epoch": 9.12280701754386, "percentage": 45.61, "elapsed_time": "0:02:09", "remaining_time": "0:02:34", "throughput": 2547.19, "total_tokens": 329744} |
| {"current_steps": 525, "total_steps": 1140, "loss": 0.0967, "lr": 0.01965272413726258, "epoch": 9.210526315789474, "percentage": 46.05, "elapsed_time": "0:02:10", "remaining_time": "0:02:32", "throughput": 2550.28, "total_tokens": 332976} |
| {"current_steps": 530, "total_steps": 1140, "loss": 0.2715, "lr": 0.01943386569079618, "epoch": 9.298245614035087, "percentage": 46.49, "elapsed_time": "0:02:11", "remaining_time": "0:02:31", "throughput": 2551.31, "total_tokens": 335760} |
| {"current_steps": 535, "total_steps": 1140, "loss": 0.3602, "lr": 0.01921396799660354, "epoch": 9.385964912280702, "percentage": 46.93, "elapsed_time": "0:02:12", "remaining_time": "0:02:29", "throughput": 2550.51, "total_tokens": 338128} |
| {"current_steps": 540, "total_steps": 1140, "loss": 0.1625, "lr": 0.018993082596205, "epoch": 9.473684210526315, "percentage": 47.37, "elapsed_time": "0:02:13", "remaining_time": "0:02:28", "throughput": 2557.12, "total_tokens": 342160} |
| {"current_steps": 545, "total_steps": 1140, "loss": 0.2243, "lr": 0.018771261262628014, "epoch": 9.56140350877193, "percentage": 47.81, "elapsed_time": "0:02:14", "remaining_time": "0:02:27", "throughput": 2558.19, "total_tokens": 344976} |
| {"current_steps": 550, "total_steps": 1140, "loss": 0.2079, "lr": 0.018548555988272136, "epoch": 9.649122807017545, "percentage": 48.25, "elapsed_time": "0:02:15", "remaining_time": "0:02:25", "throughput": 2561.15, "total_tokens": 348208} |
| {"current_steps": 555, "total_steps": 1140, "loss": 0.1697, "lr": 0.018325018972722578, "epoch": 9.736842105263158, "percentage": 48.68, "elapsed_time": "0:02:17", "remaining_time": "0:02:24", "throughput": 2564.12, "total_tokens": 351472} |
| {"current_steps": 560, "total_steps": 1140, "loss": 0.2251, "lr": 0.01810070261051526, "epoch": 9.824561403508772, "percentage": 49.12, "elapsed_time": "0:02:18", "remaining_time": "0:02:23", "throughput": 2568.81, "total_tokens": 355152} |
| {"current_steps": 565, "total_steps": 1140, "loss": 0.1747, "lr": 0.01787565947885608, "epoch": 9.912280701754385, "percentage": 49.56, "elapsed_time": "0:02:19", "remaining_time": "0:02:21", "throughput": 2574.52, "total_tokens": 359184} |
| {"current_steps": 570, "total_steps": 1140, "loss": 0.1667, "lr": 0.01764994232529744, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:20", "remaining_time": "0:02:20", "throughput": 2575.15, "total_tokens": 361920} |
| {"current_steps": 570, "total_steps": 1140, "eval_loss": 0.189362034201622, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:21", "remaining_time": "0:02:21", "throughput": 2561.18, "total_tokens": 361920} |
| {"current_steps": 575, "total_steps": 1140, "loss": 0.2448, "lr": 0.01742360405537482, "epoch": 10.087719298245615, "percentage": 50.44, "elapsed_time": "0:02:23", "remaining_time": "0:02:20", "throughput": 2550.84, "total_tokens": 365216} |
| {"current_steps": 580, "total_steps": 1140, "loss": 0.1898, "lr": 0.017196697720206326, "epoch": 10.175438596491228, "percentage": 50.88, "elapsed_time": "0:02:24", "remaining_time": "0:02:19", "throughput": 2553.82, "total_tokens": 368512} |
| {"current_steps": 585, "total_steps": 1140, "loss": 0.2437, "lr": 0.01696927650405807, "epoch": 10.263157894736842, "percentage": 51.32, "elapsed_time": "0:02:25", "remaining_time": "0:02:17", "throughput": 2557.82, "total_tokens": 372032} |
| {"current_steps": 590, "total_steps": 1140, "loss": 0.106, "lr": 0.016741393711878452, "epoch": 10.350877192982455, "percentage": 51.75, "elapsed_time": "0:02:26", "remaining_time": "0:02:16", "throughput": 2559.82, "total_tokens": 375136} |
| {"current_steps": 595, "total_steps": 1140, "loss": 0.1765, "lr": 0.016513102756804024, "epoch": 10.43859649122807, "percentage": 52.19, "elapsed_time": "0:02:27", "remaining_time": "0:02:15", "throughput": 2561.89, "total_tokens": 378208} |
| {"current_steps": 600, "total_steps": 1140, "loss": 0.1756, "lr": 0.016284457147640083, "epoch": 10.526315789473685, "percentage": 52.63, "elapsed_time": "0:02:28", "remaining_time": "0:02:13", "throughput": 2562.37, "total_tokens": 380864} |
| {"current_steps": 605, "total_steps": 1140, "loss": 0.1515, "lr": 0.016055510476318827, "epoch": 10.614035087719298, "percentage": 53.07, "elapsed_time": "0:02:29", "remaining_time": "0:02:12", "throughput": 2565.22, "total_tokens": 384160} |
| {"current_steps": 610, "total_steps": 1140, "loss": 0.1735, "lr": 0.015826316405337982, "epoch": 10.701754385964913, "percentage": 53.51, "elapsed_time": "0:02:30", "remaining_time": "0:02:11", "throughput": 2568.5, "total_tokens": 387584} |
| {"current_steps": 615, "total_steps": 1140, "loss": 0.2056, "lr": 0.015596928655182963, "epoch": 10.789473684210526, "percentage": 53.95, "elapsed_time": "0:02:32", "remaining_time": "0:02:09", "throughput": 2573.9, "total_tokens": 391584} |
| {"current_steps": 620, "total_steps": 1140, "loss": 0.0977, "lr": 0.015367400991735372, "epoch": 10.87719298245614, "percentage": 54.39, "elapsed_time": "0:02:33", "remaining_time": "0:02:08", "throughput": 2575.65, "total_tokens": 394592} |
| {"current_steps": 625, "total_steps": 1140, "loss": 0.2652, "lr": 0.015137787213670897, "epoch": 10.964912280701755, "percentage": 54.82, "elapsed_time": "0:02:34", "remaining_time": "0:02:07", "throughput": 2577.69, "total_tokens": 397696} |
| {"current_steps": 627, "total_steps": 1140, "eval_loss": 0.17897310853004456, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:35", "remaining_time": "0:02:07", "throughput": 2564.0, "total_tokens": 398432} |
| {"current_steps": 630, "total_steps": 1140, "loss": 0.1901, "lr": 0.014908141139849508, "epoch": 11.052631578947368, "percentage": 55.26, "elapsed_time": "0:02:36", "remaining_time": "0:02:07", "throughput": 2553.31, "total_tokens": 400736} |
| {"current_steps": 635, "total_steps": 1140, "loss": 0.1596, "lr": 0.014678516596700955, "epoch": 11.140350877192983, "percentage": 55.7, "elapsed_time": "0:02:38", "remaining_time": "0:02:05", "throughput": 2554.54, "total_tokens": 403680} |
| {"current_steps": 640, "total_steps": 1140, "loss": 0.149, "lr": 0.014448967405608415, "epoch": 11.228070175438596, "percentage": 56.14, "elapsed_time": "0:02:39", "remaining_time": "0:02:04", "throughput": 2556.42, "total_tokens": 406784} |
| {"current_steps": 645, "total_steps": 1140, "loss": 0.2002, "lr": 0.014219547370293413, "epoch": 11.31578947368421, "percentage": 56.58, "elapsed_time": "0:02:40", "remaining_time": "0:02:02", "throughput": 2559.62, "total_tokens": 410208} |
| {"current_steps": 650, "total_steps": 1140, "loss": 0.2033, "lr": 0.013990310264204829, "epoch": 11.403508771929825, "percentage": 57.02, "elapsed_time": "0:02:41", "remaining_time": "0:02:01", "throughput": 2563.97, "total_tokens": 414016} |
| {"current_steps": 655, "total_steps": 1140, "loss": 0.143, "lr": 0.013761309817915016, "epoch": 11.491228070175438, "percentage": 57.46, "elapsed_time": "0:02:42", "remaining_time": "0:02:00", "throughput": 2565.51, "total_tokens": 416992} |
| {"current_steps": 660, "total_steps": 1140, "loss": 0.1098, "lr": 0.013532599706525941, "epoch": 11.578947368421053, "percentage": 57.89, "elapsed_time": "0:02:43", "remaining_time": "0:01:59", "throughput": 2567.95, "total_tokens": 420224} |
| {"current_steps": 665, "total_steps": 1140, "loss": 0.1196, "lr": 0.013304233537088392, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:02:44", "remaining_time": "0:01:57", "throughput": 2570.32, "total_tokens": 423456} |
| {"current_steps": 670, "total_steps": 1140, "loss": 0.2291, "lr": 0.013076264836037051, "epoch": 11.75438596491228, "percentage": 58.77, "elapsed_time": "0:02:45", "remaining_time": "0:01:56", "throughput": 2572.19, "total_tokens": 426560} |
| {"current_steps": 675, "total_steps": 1140, "loss": 0.2267, "lr": 0.012848747036644558, "epoch": 11.842105263157894, "percentage": 59.21, "elapsed_time": "0:02:46", "remaining_time": "0:01:55", "throughput": 2574.82, "total_tokens": 429920} |
| {"current_steps": 680, "total_steps": 1140, "loss": 0.2114, "lr": 0.012621733466497287, "epoch": 11.929824561403509, "percentage": 59.65, "elapsed_time": "0:02:48", "remaining_time": "0:01:53", "throughput": 2576.67, "total_tokens": 433088} |
| {"current_steps": 684, "total_steps": 1140, "eval_loss": 0.18509027361869812, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:02:49", "remaining_time": "0:01:53", "throughput": 2566.54, "total_tokens": 435536} |
| {"current_steps": 685, "total_steps": 1140, "loss": 0.2499, "lr": 0.012395277334996044, "epoch": 12.017543859649123, "percentage": 60.09, "elapsed_time": "0:02:50", "remaining_time": "0:01:53", "throughput": 2554.99, "total_tokens": 436304} |
| {"current_steps": 690, "total_steps": 1140, "loss": 0.1833, "lr": 0.012169431720884335, "epoch": 12.105263157894736, "percentage": 60.53, "elapsed_time": "0:02:51", "remaining_time": "0:01:52", "throughput": 2556.82, "total_tokens": 439472} |
| {"current_steps": 695, "total_steps": 1140, "loss": 0.1495, "lr": 0.01194424955980734, "epoch": 12.192982456140351, "percentage": 60.96, "elapsed_time": "0:02:52", "remaining_time": "0:01:50", "throughput": 2557.09, "total_tokens": 442160} |
| {"current_steps": 700, "total_steps": 1140, "loss": 0.1731, "lr": 0.011719783631904363, "epoch": 12.280701754385966, "percentage": 61.4, "elapsed_time": "0:02:54", "remaining_time": "0:01:49", "throughput": 2559.7, "total_tokens": 445488} |
| {"current_steps": 705, "total_steps": 1140, "loss": 0.1407, "lr": 0.01149608654943782, "epoch": 12.368421052631579, "percentage": 61.84, "elapsed_time": "0:02:55", "remaining_time": "0:01:48", "throughput": 2562.07, "total_tokens": 448784} |
| {"current_steps": 710, "total_steps": 1140, "loss": 0.1313, "lr": 0.011273210744461479, "epoch": 12.456140350877194, "percentage": 62.28, "elapsed_time": "0:02:56", "remaining_time": "0:01:46", "throughput": 2566.36, "total_tokens": 452656} |
| {"current_steps": 715, "total_steps": 1140, "loss": 0.1748, "lr": 0.011051208456531014, "epoch": 12.543859649122806, "percentage": 62.72, "elapsed_time": "0:02:57", "remaining_time": "0:01:45", "throughput": 2568.39, "total_tokens": 455888} |
| {"current_steps": 720, "total_steps": 1140, "loss": 0.1033, "lr": 0.010830131720459601, "epoch": 12.631578947368421, "percentage": 63.16, "elapsed_time": "0:02:58", "remaining_time": "0:01:44", "throughput": 2569.85, "total_tokens": 458896} |
| {"current_steps": 725, "total_steps": 1140, "loss": 0.1099, "lr": 0.010610032354121612, "epoch": 12.719298245614034, "percentage": 63.6, "elapsed_time": "0:02:59", "remaining_time": "0:01:42", "throughput": 2570.49, "total_tokens": 461648} |
| {"current_steps": 730, "total_steps": 1140, "loss": 0.3051, "lr": 0.01039096194630704, "epoch": 12.807017543859649, "percentage": 64.04, "elapsed_time": "0:03:00", "remaining_time": "0:01:41", "throughput": 2572.59, "total_tokens": 464912} |
| {"current_steps": 735, "total_steps": 1140, "loss": 0.2459, "lr": 0.010172971844629716, "epoch": 12.894736842105264, "percentage": 64.47, "elapsed_time": "0:03:01", "remaining_time": "0:01:40", "throughput": 2573.89, "total_tokens": 467888} |
| {"current_steps": 740, "total_steps": 1140, "loss": 0.1488, "lr": 0.009956113143491957, "epoch": 12.982456140350877, "percentage": 64.91, "elapsed_time": "0:03:02", "remaining_time": "0:01:38", "throughput": 2576.98, "total_tokens": 471440} |
| {"current_steps": 741, "total_steps": 1140, "eval_loss": 0.19752609729766846, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:03", "remaining_time": "0:01:38", "throughput": 2564.95, "total_tokens": 471520} |
| {"current_steps": 745, "total_steps": 1140, "loss": 0.173, "lr": 0.009740436672108685, "epoch": 13.070175438596491, "percentage": 65.35, "elapsed_time": "0:03:05", "remaining_time": "0:01:38", "throughput": 2553.57, "total_tokens": 473600} |
| {"current_steps": 750, "total_steps": 1140, "loss": 0.1818, "lr": 0.009525992982593583, "epoch": 13.157894736842104, "percentage": 65.79, "elapsed_time": "0:03:06", "remaining_time": "0:01:36", "throughput": 2554.87, "total_tokens": 476576} |
| {"current_steps": 755, "total_steps": 1140, "loss": 0.1059, "lr": 0.009312832338110292, "epoch": 13.24561403508772, "percentage": 66.23, "elapsed_time": "0:03:07", "remaining_time": "0:01:35", "throughput": 2557.05, "total_tokens": 479776} |
| {"current_steps": 760, "total_steps": 1140, "loss": 0.2284, "lr": 0.009101004701091252, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:08", "remaining_time": "0:01:34", "throughput": 2558.37, "total_tokens": 482784} |
| {"current_steps": 765, "total_steps": 1140, "loss": 0.1312, "lr": 0.008890559721527138, "epoch": 13.421052631578947, "percentage": 67.11, "elapsed_time": "0:03:09", "remaining_time": "0:01:33", "throughput": 2559.43, "total_tokens": 485696} |
| {"current_steps": 770, "total_steps": 1140, "loss": 0.1054, "lr": 0.008681546725329408, "epoch": 13.508771929824562, "percentage": 67.54, "elapsed_time": "0:03:10", "remaining_time": "0:01:31", "throughput": 2561.28, "total_tokens": 488832} |
| {"current_steps": 775, "total_steps": 1140, "loss": 0.172, "lr": 0.008474014702768904, "epoch": 13.596491228070175, "percentage": 67.98, "elapsed_time": "0:03:12", "remaining_time": "0:01:30", "throughput": 2565.75, "total_tokens": 492896} |
| {"current_steps": 780, "total_steps": 1140, "loss": 0.1209, "lr": 0.008268012296993067, "epoch": 13.68421052631579, "percentage": 68.42, "elapsed_time": "0:03:13", "remaining_time": "0:01:29", "throughput": 2568.08, "total_tokens": 496224} |
| {"current_steps": 785, "total_steps": 1140, "loss": 0.1832, "lr": 0.008063587792624567, "epoch": 13.771929824561404, "percentage": 68.86, "elapsed_time": "0:03:14", "remaining_time": "0:01:27", "throughput": 2568.81, "total_tokens": 499040} |
| {"current_steps": 790, "total_steps": 1140, "loss": 0.1872, "lr": 0.007860789104443896, "epoch": 13.859649122807017, "percentage": 69.3, "elapsed_time": "0:03:15", "remaining_time": "0:01:26", "throughput": 2571.49, "total_tokens": 502560} |
| {"current_steps": 795, "total_steps": 1140, "loss": 0.2387, "lr": 0.0076596637661587325, "epoch": 13.947368421052632, "percentage": 69.74, "elapsed_time": "0:03:16", "remaining_time": "0:01:25", "throughput": 2571.52, "total_tokens": 505152} |
| {"current_steps": 798, "total_steps": 1140, "eval_loss": 0.21113429963588715, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:17", "remaining_time": "0:01:24", "throughput": 2563.21, "total_tokens": 507256} |
| {"current_steps": 800, "total_steps": 1140, "loss": 0.1353, "lr": 0.007460258919262529, "epoch": 14.035087719298245, "percentage": 70.18, "elapsed_time": "0:03:19", "remaining_time": "0:01:24", "throughput": 2555.1, "total_tokens": 508888} |
| {"current_steps": 805, "total_steps": 1140, "loss": 0.1836, "lr": 0.007262621301985144, "epoch": 14.12280701754386, "percentage": 70.61, "elapsed_time": "0:03:20", "remaining_time": "0:01:23", "throughput": 2555.39, "total_tokens": 511576} |
| {"current_steps": 810, "total_steps": 1140, "loss": 0.0974, "lr": 0.007066797238337862, "epoch": 14.210526315789474, "percentage": 71.05, "elapsed_time": "0:03:21", "remaining_time": "0:01:22", "throughput": 2557.96, "total_tokens": 514968} |
| {"current_steps": 815, "total_steps": 1140, "loss": 0.1565, "lr": 0.006872832627255643, "epoch": 14.298245614035087, "percentage": 71.49, "elapsed_time": "0:03:22", "remaining_time": "0:01:20", "throughput": 2559.05, "total_tokens": 517912} |
| {"current_steps": 820, "total_steps": 1140, "loss": 0.1398, "lr": 0.006680772931838868, "epoch": 14.385964912280702, "percentage": 71.93, "elapsed_time": "0:03:23", "remaining_time": "0:01:19", "throughput": 2561.04, "total_tokens": 521144} |
| {"current_steps": 825, "total_steps": 1140, "loss": 0.1252, "lr": 0.00649066316869736, "epoch": 14.473684210526315, "percentage": 72.37, "elapsed_time": "0:03:24", "remaining_time": "0:01:18", "throughput": 2562.24, "total_tokens": 524152} |
| {"current_steps": 830, "total_steps": 1140, "loss": 0.1669, "lr": 0.0063025478973989585, "epoch": 14.56140350877193, "percentage": 72.81, "elapsed_time": "0:03:25", "remaining_time": "0:01:16", "throughput": 2562.6, "total_tokens": 526808} |
| {"current_steps": 835, "total_steps": 1140, "loss": 0.1768, "lr": 0.006116471210025301, "epoch": 14.649122807017545, "percentage": 73.25, "elapsed_time": "0:03:26", "remaining_time": "0:01:15", "throughput": 2564.52, "total_tokens": 530072} |
| {"current_steps": 840, "total_steps": 1140, "loss": 0.1716, "lr": 0.005932476720837105, "epoch": 14.736842105263158, "percentage": 73.68, "elapsed_time": "0:03:27", "remaining_time": "0:01:14", "throughput": 2568.5, "total_tokens": 534104} |
| {"current_steps": 845, "total_steps": 1140, "loss": 0.1353, "lr": 0.005750607556051514, "epoch": 14.824561403508772, "percentage": 74.12, "elapsed_time": "0:03:29", "remaining_time": "0:01:13", "throughput": 2571.45, "total_tokens": 537752} |
| {"current_steps": 850, "total_steps": 1140, "loss": 0.1462, "lr": 0.0055709063437337685, "epoch": 14.912280701754385, "percentage": 74.56, "elapsed_time": "0:03:30", "remaining_time": "0:01:11", "throughput": 2572.24, "total_tokens": 540696} |
| {"current_steps": 855, "total_steps": 1140, "loss": 0.157, "lr": 0.005393415203805707, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:31", "remaining_time": "0:01:10", "throughput": 2571.79, "total_tokens": 543064} |
| {"current_steps": 855, "total_steps": 1140, "eval_loss": 0.2199254035949707, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:31", "remaining_time": "0:01:10", "throughput": 2562.49, "total_tokens": 543064} |
| {"current_steps": 860, "total_steps": 1140, "loss": 0.1325, "lr": 0.005218175738173303, "epoch": 15.087719298245615, "percentage": 75.44, "elapsed_time": "0:03:33", "remaining_time": "0:01:09", "throughput": 2554.38, "total_tokens": 546104} |
| {"current_steps": 865, "total_steps": 1140, "loss": 0.1853, "lr": 0.005045229020975681, "epoch": 15.175438596491228, "percentage": 75.88, "elapsed_time": "0:03:34", "remaining_time": "0:01:08", "throughput": 2555.53, "total_tokens": 549112} |
| {"current_steps": 870, "total_steps": 1140, "loss": 0.1294, "lr": 0.004874615588957773, "epoch": 15.263157894736842, "percentage": 76.32, "elapsed_time": "0:03:36", "remaining_time": "0:01:07", "throughput": 2558.35, "total_tokens": 552696} |
| {"current_steps": 875, "total_steps": 1140, "loss": 0.1524, "lr": 0.0047063754319689975, "epoch": 15.350877192982455, "percentage": 76.75, "elapsed_time": "0:03:37", "remaining_time": "0:01:05", "throughput": 2560.67, "total_tokens": 556152} |
| {"current_steps": 880, "total_steps": 1140, "loss": 0.0872, "lr": 0.004540547983590069, "epoch": 15.43859649122807, "percentage": 77.19, "elapsed_time": "0:03:38", "remaining_time": "0:01:04", "throughput": 2561.24, "total_tokens": 558936} |
| {"current_steps": 885, "total_steps": 1140, "loss": 0.127, "lr": 0.0043771721118902335, "epoch": 15.526315789473685, "percentage": 77.63, "elapsed_time": "0:03:39", "remaining_time": "0:01:03", "throughput": 2564.17, "total_tokens": 562584} |
| {"current_steps": 890, "total_steps": 1140, "loss": 0.1282, "lr": 0.004216286110317013, "epoch": 15.614035087719298, "percentage": 78.07, "elapsed_time": "0:03:40", "remaining_time": "0:01:01", "throughput": 2567.81, "total_tokens": 566456} |
| {"current_steps": 895, "total_steps": 1140, "loss": 0.118, "lr": 0.00405792768872069, "epoch": 15.701754385964913, "percentage": 78.51, "elapsed_time": "0:03:41", "remaining_time": "0:01:00", "throughput": 2568.2, "total_tokens": 569176} |
| {"current_steps": 900, "total_steps": 1140, "loss": 0.0947, "lr": 0.003902133964515502, "epoch": 15.789473684210526, "percentage": 78.95, "elapsed_time": "0:03:42", "remaining_time": "0:00:59", "throughput": 2570.63, "total_tokens": 572696} |
| {"current_steps": 905, "total_steps": 1140, "loss": 0.1558, "lr": 0.00374894145397979, "epoch": 15.87719298245614, "percentage": 79.39, "elapsed_time": "0:03:43", "remaining_time": "0:00:58", "throughput": 2572.05, "total_tokens": 575832} |
| {"current_steps": 910, "total_steps": 1140, "loss": 0.1676, "lr": 0.0035983860636969525, "epoch": 15.964912280701755, "percentage": 79.82, "elapsed_time": "0:03:44", "remaining_time": "0:00:56", "throughput": 2572.98, "total_tokens": 578808} |
| {"current_steps": 912, "total_steps": 1140, "eval_loss": 0.20820914208889008, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:03:46", "remaining_time": "0:00:56", "throughput": 2564.02, "total_tokens": 579704} |
| {"current_steps": 915, "total_steps": 1140, "loss": 0.0726, "lr": 0.003450503082139393, "epoch": 16.05263157894737, "percentage": 80.26, "elapsed_time": "0:03:47", "remaining_time": "0:00:55", "throughput": 2556.38, "total_tokens": 581720} |
| {"current_steps": 920, "total_steps": 1140, "loss": 0.1352, "lr": 0.003305327171397263, "epoch": 16.140350877192983, "percentage": 80.7, "elapsed_time": "0:03:48", "remaining_time": "0:00:54", "throughput": 2559.87, "total_tokens": 585624} |
| {"current_steps": 925, "total_steps": 1140, "loss": 0.0811, "lr": 0.003162892359054098, "epoch": 16.228070175438596, "percentage": 81.14, "elapsed_time": "0:03:49", "remaining_time": "0:00:53", "throughput": 2560.4, "total_tokens": 588408} |
| {"current_steps": 930, "total_steps": 1140, "loss": 0.1149, "lr": 0.0030232320302111047, "epoch": 16.31578947368421, "percentage": 81.58, "elapsed_time": "0:03:50", "remaining_time": "0:00:52", "throughput": 2561.79, "total_tokens": 591512} |
| {"current_steps": 935, "total_steps": 1140, "loss": 0.1354, "lr": 0.0028863789196621093, "epoch": 16.403508771929825, "percentage": 82.02, "elapsed_time": "0:03:52", "remaining_time": "0:00:50", "throughput": 2564.94, "total_tokens": 595288} |
| {"current_steps": 940, "total_steps": 1140, "loss": 0.1228, "lr": 0.0027523651042208564, "epoch": 16.49122807017544, "percentage": 82.46, "elapsed_time": "0:03:53", "remaining_time": "0:00:49", "throughput": 2565.98, "total_tokens": 598360} |
| {"current_steps": 945, "total_steps": 1140, "loss": 0.1227, "lr": 0.0026212219952026, "epoch": 16.57894736842105, "percentage": 82.89, "elapsed_time": "0:03:54", "remaining_time": "0:00:48", "throughput": 2568.58, "total_tokens": 601944} |
| {"current_steps": 950, "total_steps": 1140, "loss": 0.1163, "lr": 0.0024929803310616222, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:03:55", "remaining_time": "0:00:47", "throughput": 2569.41, "total_tokens": 604824} |
| {"current_steps": 955, "total_steps": 1140, "loss": 0.0896, "lr": 0.002367670170186516, "epoch": 16.75438596491228, "percentage": 83.77, "elapsed_time": "0:03:56", "remaining_time": "0:00:45", "throughput": 2570.0, "total_tokens": 607608} |
| {"current_steps": 960, "total_steps": 1140, "loss": 0.1448, "lr": 0.00224532088385481, "epoch": 16.842105263157894, "percentage": 84.21, "elapsed_time": "0:03:57", "remaining_time": "0:00:44", "throughput": 2572.27, "total_tokens": 611128} |
| {"current_steps": 965, "total_steps": 1140, "loss": 0.1222, "lr": 0.002125961149348706, "epoch": 16.92982456140351, "percentage": 84.65, "elapsed_time": "0:03:58", "remaining_time": "0:00:43", "throughput": 2572.91, "total_tokens": 614040} |
| {"current_steps": 969, "total_steps": 1140, "eval_loss": 0.20686452090740204, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:00", "remaining_time": "0:00:42", "throughput": 2564.27, "total_tokens": 615960} |
| {"current_steps": 970, "total_steps": 1140, "loss": 0.1845, "lr": 0.0020096189432334192, "epoch": 17.017543859649123, "percentage": 85.09, "elapsed_time": "0:04:01", "remaining_time": "0:00:42", "throughput": 2556.24, "total_tokens": 616600} |
| {"current_steps": 975, "total_steps": 1140, "loss": 0.1548, "lr": 0.001896321534799823, "epoch": 17.105263157894736, "percentage": 85.53, "elapsed_time": "0:04:02", "remaining_time": "0:00:41", "throughput": 2557.25, "total_tokens": 619736} |
| {"current_steps": 980, "total_steps": 1140, "loss": 0.1217, "lr": 0.0017860954796727994, "epoch": 17.19298245614035, "percentage": 85.96, "elapsed_time": "0:04:03", "remaining_time": "0:00:39", "throughput": 2558.19, "total_tokens": 622680} |
| {"current_steps": 985, "total_steps": 1140, "loss": 0.1027, "lr": 0.0016789666135869374, "epoch": 17.280701754385966, "percentage": 86.4, "elapsed_time": "0:04:04", "remaining_time": "0:00:38", "throughput": 2560.44, "total_tokens": 626168} |
| {"current_steps": 990, "total_steps": 1140, "loss": 0.1495, "lr": 0.0015749600463309049, "epoch": 17.36842105263158, "percentage": 86.84, "elapsed_time": "0:04:05", "remaining_time": "0:00:37", "throughput": 2561.03, "total_tokens": 628952} |
| {"current_steps": 995, "total_steps": 1140, "loss": 0.092, "lr": 0.0014741001558620163, "epoch": 17.45614035087719, "percentage": 87.28, "elapsed_time": "0:04:06", "remaining_time": "0:00:35", "throughput": 2562.77, "total_tokens": 632184} |
| {"current_steps": 1000, "total_steps": 1140, "loss": 0.1115, "lr": 0.0013764105825923066, "epoch": 17.54385964912281, "percentage": 87.72, "elapsed_time": "0:04:07", "remaining_time": "0:00:34", "throughput": 2564.36, "total_tokens": 635448} |
| {"current_steps": 1005, "total_steps": 1140, "loss": 0.1116, "lr": 0.0012819142238474862, "epoch": 17.63157894736842, "percentage": 88.16, "elapsed_time": "0:04:08", "remaining_time": "0:00:33", "throughput": 2565.75, "total_tokens": 638616} |
| {"current_steps": 1010, "total_steps": 1140, "loss": 0.0696, "lr": 0.0011906332285000793, "epoch": 17.719298245614034, "percentage": 88.6, "elapsed_time": "0:04:09", "remaining_time": "0:00:32", "throughput": 2566.92, "total_tokens": 641688} |
| {"current_steps": 1015, "total_steps": 1140, "loss": 0.1389, "lr": 0.0011025889917779736, "epoch": 17.80701754385965, "percentage": 89.04, "elapsed_time": "0:04:11", "remaining_time": "0:00:30", "throughput": 2567.39, "total_tokens": 644472} |
| {"current_steps": 1020, "total_steps": 1140, "loss": 0.09, "lr": 0.0010178021502496165, "epoch": 17.894736842105264, "percentage": 89.47, "elapsed_time": "0:04:12", "remaining_time": "0:00:29", "throughput": 2570.19, "total_tokens": 648312} |
| {"current_steps": 1025, "total_steps": 1140, "loss": 0.0839, "lr": 0.0009362925769870394, "epoch": 17.982456140350877, "percentage": 89.91, "elapsed_time": "0:04:13", "remaining_time": "0:00:28", "throughput": 2573.12, "total_tokens": 652184} |
| {"current_steps": 1026, "total_steps": 1140, "eval_loss": 0.21928760409355164, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:14", "remaining_time": "0:00:28", "throughput": 2564.72, "total_tokens": 652368} |
| {"current_steps": 1030, "total_steps": 1140, "loss": 0.152, "lr": 0.0008580793769078487, "epoch": 18.07017543859649, "percentage": 90.35, "elapsed_time": "0:04:16", "remaining_time": "0:00:27", "throughput": 2558.89, "total_tokens": 655312} |
| {"current_steps": 1035, "total_steps": 1140, "loss": 0.0724, "lr": 0.0007831808822972391, "epoch": 18.157894736842106, "percentage": 90.79, "elapsed_time": "0:04:17", "remaining_time": "0:00:26", "throughput": 2561.08, "total_tokens": 658896} |
| {"current_steps": 1040, "total_steps": 1140, "loss": 0.0858, "lr": 0.0007116146485111063, "epoch": 18.24561403508772, "percentage": 91.23, "elapsed_time": "0:04:18", "remaining_time": "0:00:24", "throughput": 2563.01, "total_tokens": 662288} |
| {"current_steps": 1045, "total_steps": 1140, "loss": 0.1156, "lr": 0.0006433974498612882, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:04:19", "remaining_time": "0:00:23", "throughput": 2563.45, "total_tokens": 665104} |
| {"current_steps": 1050, "total_steps": 1140, "loss": 0.1102, "lr": 0.0005785452756838482, "epoch": 18.42105263157895, "percentage": 92.11, "elapsed_time": "0:04:20", "remaining_time": "0:00:22", "throughput": 2563.11, "total_tokens": 667536} |
| {"current_steps": 1055, "total_steps": 1140, "loss": 0.0977, "lr": 0.0005170733265913585, "epoch": 18.50877192982456, "percentage": 92.54, "elapsed_time": "0:04:21", "remaining_time": "0:00:21", "throughput": 2563.75, "total_tokens": 670448} |
| {"current_steps": 1060, "total_steps": 1140, "loss": 0.1035, "lr": 0.0004589960109100444, "epoch": 18.596491228070175, "percentage": 92.98, "elapsed_time": "0:04:22", "remaining_time": "0:00:19", "throughput": 2566.42, "total_tokens": 674192} |
| {"current_steps": 1065, "total_steps": 1140, "loss": 0.1124, "lr": 0.00040432694130264286, "epoch": 18.68421052631579, "percentage": 93.42, "elapsed_time": "0:04:23", "remaining_time": "0:00:18", "throughput": 2566.46, "total_tokens": 676784} |
| {"current_steps": 1070, "total_steps": 1140, "loss": 0.0942, "lr": 0.0003530789315777466, "epoch": 18.771929824561404, "percentage": 93.86, "elapsed_time": "0:04:24", "remaining_time": "0:00:17", "throughput": 2566.3, "total_tokens": 679280} |
| {"current_steps": 1075, "total_steps": 1140, "loss": 0.0708, "lr": 0.000305263993686391, "epoch": 18.859649122807017, "percentage": 94.3, "elapsed_time": "0:04:25", "remaining_time": "0:00:16", "throughput": 2566.93, "total_tokens": 682160} |
| {"current_steps": 1080, "total_steps": 1140, "loss": 0.112, "lr": 0.000260893334906595, "epoch": 18.94736842105263, "percentage": 94.74, "elapsed_time": "0:04:26", "remaining_time": "0:00:14", "throughput": 2569.7, "total_tokens": 685968} |
| {"current_steps": 1083, "total_steps": 1140, "eval_loss": 0.22394225001335144, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:28", "remaining_time": "0:00:14", "throughput": 2563.5, "total_tokens": 687976} |
| {"current_steps": 1085, "total_steps": 1140, "loss": 0.0573, "lr": 0.00021997735521649408, "epoch": 19.035087719298247, "percentage": 95.18, "elapsed_time": "0:04:29", "remaining_time": "0:00:13", "throughput": 2555.83, "total_tokens": 689128} |
| {"current_steps": 1090, "total_steps": 1140, "loss": 0.1072, "lr": 0.00018252564485670973, "epoch": 19.12280701754386, "percentage": 95.61, "elapsed_time": "0:04:30", "remaining_time": "0:00:12", "throughput": 2556.57, "total_tokens": 692136} |
| {"current_steps": 1095, "total_steps": 1140, "loss": 0.0749, "lr": 0.00014854698208250638, "epoch": 19.210526315789473, "percentage": 96.05, "elapsed_time": "0:04:31", "remaining_time": "0:00:11", "throughput": 2557.38, "total_tokens": 695112} |
| {"current_steps": 1100, "total_steps": 1140, "loss": 0.1104, "lr": 0.00011804933110626359, "epoch": 19.29824561403509, "percentage": 96.49, "elapsed_time": "0:04:32", "remaining_time": "0:00:09", "throughput": 2559.31, "total_tokens": 698600} |
| {"current_steps": 1105, "total_steps": 1140, "loss": 0.1751, "lr": 9.103984023075773e-05, "epoch": 19.385964912280702, "percentage": 96.93, "elapsed_time": "0:04:34", "remaining_time": "0:00:08", "throughput": 2560.38, "total_tokens": 701640} |
| {"current_steps": 1110, "total_steps": 1140, "loss": 0.1089, "lr": 6.752484017368553e-05, "epoch": 19.473684210526315, "percentage": 97.37, "elapsed_time": "0:04:35", "remaining_time": "0:00:07", "throughput": 2561.0, "total_tokens": 704488} |
| {"current_steps": 1115, "total_steps": 1140, "loss": 0.0643, "lr": 4.750984258380608e-05, "epoch": 19.56140350877193, "percentage": 97.81, "elapsed_time": "0:04:36", "remaining_time": "0:00:06", "throughput": 2562.63, "total_tokens": 707848} |
| {"current_steps": 1120, "total_steps": 1140, "loss": 0.0751, "lr": 3.099953874908079e-05, "epoch": 19.649122807017545, "percentage": 98.25, "elapsed_time": "0:04:37", "remaining_time": "0:00:04", "throughput": 2564.15, "total_tokens": 711112} |
| {"current_steps": 1125, "total_steps": 1140, "loss": 0.1224, "lr": 1.7997798497084714e-05, "epoch": 19.736842105263158, "percentage": 98.68, "elapsed_time": "0:04:38", "remaining_time": "0:00:03", "throughput": 2565.51, "total_tokens": 714344} |
| {"current_steps": 1130, "total_steps": 1140, "loss": 0.0569, "lr": 8.50766928796709e-06, "epoch": 19.82456140350877, "percentage": 99.12, "elapsed_time": "0:04:39", "remaining_time": "0:00:02", "throughput": 2568.33, "total_tokens": 718280} |
| {"current_steps": 1135, "total_steps": 1140, "loss": 0.1197, "lr": 2.5313755001593604e-06, "epoch": 19.912280701754387, "percentage": 99.56, "elapsed_time": "0:04:40", "remaining_time": "0:00:01", "throughput": 2568.64, "total_tokens": 720968} |
| {"current_steps": 1140, "total_steps": 1140, "loss": 0.0968, "lr": 7.03179089989181e-08, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:41", "remaining_time": "0:00:00", "throughput": 2568.9, "total_tokens": 723584} |
| {"current_steps": 1140, "total_steps": 1140, "eval_loss": 0.22509132325649261, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:42", "remaining_time": "0:00:00", "throughput": 2561.93, "total_tokens": 723584} |
| {"current_steps": 1140, "total_steps": 1140, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:43", "remaining_time": "0:00:00", "throughput": 2555.44, "total_tokens": 723584} |
|
|