| {"current_steps": 5, "total_steps": 1140, "loss": 0.6016, "lr": 0.0010526315789473684, "epoch": 0.08771929824561403, "percentage": 0.44, "elapsed_time": "0:00:01", "remaining_time": "0:05:40", "throughput": 2092.28, "total_tokens": 3136} |
| {"current_steps": 10, "total_steps": 1140, "loss": 0.7503, "lr": 0.0023684210526315787, "epoch": 0.17543859649122806, "percentage": 0.88, "elapsed_time": "0:00:02", "remaining_time": "0:04:51", "throughput": 2373.29, "total_tokens": 6112} |
| {"current_steps": 15, "total_steps": 1140, "loss": 2.8137, "lr": 0.003684210526315789, "epoch": 0.2631578947368421, "percentage": 1.32, "elapsed_time": "0:00:03", "remaining_time": "0:04:45", "throughput": 2659.03, "total_tokens": 10112} |
| {"current_steps": 20, "total_steps": 1140, "loss": 0.6094, "lr": 0.004999999999999999, "epoch": 0.3508771929824561, "percentage": 1.75, "elapsed_time": "0:00:04", "remaining_time": "0:04:34", "throughput": 2710.75, "total_tokens": 13280} |
| {"current_steps": 25, "total_steps": 1140, "loss": 0.7539, "lr": 0.0063157894736842095, "epoch": 0.43859649122807015, "percentage": 2.19, "elapsed_time": "0:00:05", "remaining_time": "0:04:26", "throughput": 2729.98, "total_tokens": 16288} |
| {"current_steps": 30, "total_steps": 1140, "loss": 1.6404, "lr": 0.0076315789473684215, "epoch": 0.5263157894736842, "percentage": 2.63, "elapsed_time": "0:00:07", "remaining_time": "0:04:19", "throughput": 2725.72, "total_tokens": 19104} |
| {"current_steps": 35, "total_steps": 1140, "loss": 5.4717, "lr": 0.008947368421052631, "epoch": 0.6140350877192983, "percentage": 3.07, "elapsed_time": "0:00:08", "remaining_time": "0:04:15", "throughput": 2740.48, "total_tokens": 22144} |
| {"current_steps": 40, "total_steps": 1140, "loss": 4.8847, "lr": 0.010263157894736842, "epoch": 0.7017543859649122, "percentage": 3.51, "elapsed_time": "0:00:09", "remaining_time": "0:04:14", "throughput": 2784.15, "total_tokens": 25792} |
| {"current_steps": 45, "total_steps": 1140, "loss": 2.5551, "lr": 0.011578947368421052, "epoch": 0.7894736842105263, "percentage": 3.95, "elapsed_time": "0:00:10", "remaining_time": "0:04:10", "throughput": 2773.56, "total_tokens": 28576} |
| {"current_steps": 50, "total_steps": 1140, "loss": 0.7962, "lr": 0.012894736842105263, "epoch": 0.8771929824561403, "percentage": 4.39, "elapsed_time": "0:00:11", "remaining_time": "0:04:07", "throughput": 2766.1, "total_tokens": 31424} |
| {"current_steps": 55, "total_steps": 1140, "loss": 1.353, "lr": 0.014210526315789472, "epoch": 0.9649122807017544, "percentage": 4.82, "elapsed_time": "0:00:12", "remaining_time": "0:04:06", "throughput": 2782.22, "total_tokens": 34720} |
| {"current_steps": 57, "total_steps": 1140, "eval_loss": 1.0482252836227417, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:13", "remaining_time": "0:04:19", "throughput": 2596.91, "total_tokens": 35448} |
| {"current_steps": 60, "total_steps": 1140, "loss": 0.6558, "lr": 0.015526315789473685, "epoch": 1.0526315789473684, "percentage": 5.26, "elapsed_time": "0:00:15", "remaining_time": "0:04:33", "throughput": 2483.9, "total_tokens": 37688} |
| {"current_steps": 65, "total_steps": 1140, "loss": 0.7116, "lr": 0.016842105263157894, "epoch": 1.1403508771929824, "percentage": 5.7, "elapsed_time": "0:00:16", "remaining_time": "0:04:28", "throughput": 2508.84, "total_tokens": 40792} |
| {"current_steps": 70, "total_steps": 1140, "loss": 0.4407, "lr": 0.018157894736842106, "epoch": 1.2280701754385965, "percentage": 6.14, "elapsed_time": "0:00:17", "remaining_time": "0:04:24", "throughput": 2529.39, "total_tokens": 43832} |
| {"current_steps": 75, "total_steps": 1140, "loss": 0.4844, "lr": 0.019473684210526317, "epoch": 1.3157894736842106, "percentage": 6.58, "elapsed_time": "0:00:18", "remaining_time": "0:04:20", "throughput": 2539.12, "total_tokens": 46648} |
| {"current_steps": 80, "total_steps": 1140, "loss": 0.3423, "lr": 0.020789473684210528, "epoch": 1.4035087719298245, "percentage": 7.02, "elapsed_time": "0:00:19", "remaining_time": "0:04:18", "throughput": 2559.83, "total_tokens": 49848} |
| {"current_steps": 85, "total_steps": 1140, "loss": 0.3679, "lr": 0.022105263157894735, "epoch": 1.4912280701754386, "percentage": 7.46, "elapsed_time": "0:00:20", "remaining_time": "0:04:14", "throughput": 2561.79, "total_tokens": 52504} |
| {"current_steps": 90, "total_steps": 1140, "loss": 0.4392, "lr": 0.023421052631578947, "epoch": 1.5789473684210527, "percentage": 7.89, "elapsed_time": "0:00:21", "remaining_time": "0:04:11", "throughput": 2571.9, "total_tokens": 55448} |
| {"current_steps": 95, "total_steps": 1140, "loss": 0.1719, "lr": 0.024736842105263158, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:22", "remaining_time": "0:04:09", "throughput": 2591.15, "total_tokens": 58776} |
| {"current_steps": 100, "total_steps": 1140, "loss": 0.2579, "lr": 0.02605263157894737, "epoch": 1.7543859649122808, "percentage": 8.77, "elapsed_time": "0:00:23", "remaining_time": "0:04:06", "throughput": 2596.69, "total_tokens": 61624} |
| {"current_steps": 105, "total_steps": 1140, "loss": 0.2097, "lr": 0.027368421052631577, "epoch": 1.8421052631578947, "percentage": 9.21, "elapsed_time": "0:00:24", "remaining_time": "0:04:05", "throughput": 2621.98, "total_tokens": 65336} |
| {"current_steps": 110, "total_steps": 1140, "loss": 0.4513, "lr": 0.028684210526315788, "epoch": 1.9298245614035088, "percentage": 9.65, "elapsed_time": "0:00:25", "remaining_time": "0:04:03", "throughput": 2629.09, "total_tokens": 68280} |
| {"current_steps": 114, "total_steps": 1140, "eval_loss": 0.22055481374263763, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:27", "remaining_time": "0:04:08", "throughput": 2549.52, "total_tokens": 70496} |
| {"current_steps": 115, "total_steps": 1140, "loss": 0.2656, "lr": 0.03, "epoch": 2.017543859649123, "percentage": 10.09, "elapsed_time": "0:00:28", "remaining_time": "0:04:15", "throughput": 2477.2, "total_tokens": 71008} |
| {"current_steps": 120, "total_steps": 1140, "loss": 0.2642, "lr": 0.02999824208523885, "epoch": 2.1052631578947367, "percentage": 10.53, "elapsed_time": "0:00:29", "remaining_time": "0:04:12", "throughput": 2492.65, "total_tokens": 74176} |
| {"current_steps": 125, "total_steps": 1140, "loss": 0.2855, "lr": 0.029992968752990647, "epoch": 2.192982456140351, "percentage": 10.96, "elapsed_time": "0:00:30", "remaining_time": "0:04:10", "throughput": 2509.95, "total_tokens": 77472} |
| {"current_steps": 130, "total_steps": 1140, "loss": 0.3213, "lr": 0.02998418123926453, "epoch": 2.280701754385965, "percentage": 11.4, "elapsed_time": "0:00:31", "remaining_time": "0:04:07", "throughput": 2513.85, "total_tokens": 80192} |
| {"current_steps": 135, "total_steps": 1140, "loss": 0.4367, "lr": 0.029971881603753848, "epoch": 2.3684210526315788, "percentage": 11.84, "elapsed_time": "0:00:33", "remaining_time": "0:04:05", "throughput": 2527.66, "total_tokens": 83424} |
| {"current_steps": 140, "total_steps": 1140, "loss": 0.4123, "lr": 0.02995607272935338, "epoch": 2.456140350877193, "percentage": 12.28, "elapsed_time": "0:00:34", "remaining_time": "0:04:03", "throughput": 2529.8, "total_tokens": 86080} |
| {"current_steps": 145, "total_steps": 1140, "loss": 0.3135, "lr": 0.02993675832148361, "epoch": 2.543859649122807, "percentage": 12.72, "elapsed_time": "0:00:35", "remaining_time": "0:04:01", "throughput": 2546.64, "total_tokens": 89600} |
| {"current_steps": 150, "total_steps": 1140, "loss": 0.2866, "lr": 0.029913942907222237, "epoch": 2.6315789473684212, "percentage": 13.16, "elapsed_time": "0:00:36", "remaining_time": "0:03:59", "throughput": 2555.35, "total_tokens": 92704} |
| {"current_steps": 155, "total_steps": 1140, "loss": 0.2538, "lr": 0.029887631834243058, "epoch": 2.719298245614035, "percentage": 13.6, "elapsed_time": "0:00:37", "remaining_time": "0:03:57", "throughput": 2568.76, "total_tokens": 96160} |
| {"current_steps": 160, "total_steps": 1140, "loss": 0.3449, "lr": 0.029857831269562547, "epoch": 2.807017543859649, "percentage": 14.04, "elapsed_time": "0:00:38", "remaining_time": "0:03:56", "throughput": 2580.42, "total_tokens": 99520} |
| {"current_steps": 165, "total_steps": 1140, "loss": 0.2438, "lr": 0.029824548198094384, "epoch": 2.8947368421052633, "percentage": 14.47, "elapsed_time": "0:00:39", "remaining_time": "0:03:54", "throughput": 2587.41, "total_tokens": 102528} |
| {"current_steps": 170, "total_steps": 1140, "loss": 0.2683, "lr": 0.029787790421012244, "epoch": 2.982456140350877, "percentage": 14.91, "elapsed_time": "0:00:40", "remaining_time": "0:03:52", "throughput": 2604.29, "total_tokens": 106304} |
| {"current_steps": 171, "total_steps": 1140, "eval_loss": 0.18028998374938965, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:41", "remaining_time": "0:03:56", "throughput": 2546.93, "total_tokens": 106416} |
| {"current_steps": 175, "total_steps": 1140, "loss": 0.2154, "lr": 0.029747566553921325, "epoch": 3.0701754385964914, "percentage": 15.35, "elapsed_time": "0:00:43", "remaining_time": "0:03:59", "throughput": 2507.02, "total_tokens": 108880} |
| {"current_steps": 180, "total_steps": 1140, "loss": 0.2558, "lr": 0.029703886024838914, "epoch": 3.1578947368421053, "percentage": 15.79, "elapsed_time": "0:00:44", "remaining_time": "0:03:57", "throughput": 2515.56, "total_tokens": 111984} |
| {"current_steps": 185, "total_steps": 1140, "loss": 0.2182, "lr": 0.0296567590719846, "epoch": 3.245614035087719, "percentage": 16.23, "elapsed_time": "0:00:45", "remaining_time": "0:03:55", "throughput": 2525.39, "total_tokens": 115248} |
| {"current_steps": 190, "total_steps": 1140, "loss": 0.1636, "lr": 0.029606196741380517, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:46", "remaining_time": "0:03:53", "throughput": 2532.7, "total_tokens": 118384} |
| {"current_steps": 195, "total_steps": 1140, "loss": 0.2113, "lr": 0.029552210884262308, "epoch": 3.4210526315789473, "percentage": 17.11, "elapsed_time": "0:00:47", "remaining_time": "0:03:52", "throughput": 2548.33, "total_tokens": 122224} |
| {"current_steps": 200, "total_steps": 1140, "loss": 0.2613, "lr": 0.029494814154301326, "epoch": 3.5087719298245617, "percentage": 17.54, "elapsed_time": "0:00:49", "remaining_time": "0:03:50", "throughput": 2558.4, "total_tokens": 125584} |
| {"current_steps": 205, "total_steps": 1140, "loss": 0.1587, "lr": 0.029434020004638753, "epoch": 3.5964912280701755, "percentage": 17.98, "elapsed_time": "0:00:50", "remaining_time": "0:03:48", "throughput": 2565.79, "total_tokens": 128816} |
| {"current_steps": 210, "total_steps": 1140, "loss": 0.4129, "lr": 0.029369842684732334, "epoch": 3.6842105263157894, "percentage": 18.42, "elapsed_time": "0:00:51", "remaining_time": "0:03:47", "throughput": 2572.33, "total_tokens": 131952} |
| {"current_steps": 215, "total_steps": 1140, "loss": 0.2719, "lr": 0.02930229723701646, "epoch": 3.7719298245614032, "percentage": 18.86, "elapsed_time": "0:00:52", "remaining_time": "0:03:45", "throughput": 2580.31, "total_tokens": 135248} |
| {"current_steps": 220, "total_steps": 1140, "loss": 0.2218, "lr": 0.029231399493376414, "epoch": 3.8596491228070176, "percentage": 19.3, "elapsed_time": "0:00:53", "remaining_time": "0:03:43", "throughput": 2586.06, "total_tokens": 138384} |
| {"current_steps": 225, "total_steps": 1140, "loss": 0.2404, "lr": 0.02915716607143754, "epoch": 3.9473684210526314, "percentage": 19.74, "elapsed_time": "0:00:54", "remaining_time": "0:03:41", "throughput": 2588.75, "total_tokens": 141264} |
| {"current_steps": 228, "total_steps": 1140, "eval_loss": 0.16730651259422302, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:55", "remaining_time": "0:03:43", "throughput": 2546.81, "total_tokens": 142480} |
| {"current_steps": 230, "total_steps": 1140, "loss": 0.2303, "lr": 0.029079614370670265, "epoch": 4.035087719298246, "percentage": 20.18, "elapsed_time": "0:00:57", "remaining_time": "0:03:46", "throughput": 2513.47, "total_tokens": 143664} |
| {"current_steps": 235, "total_steps": 1140, "loss": 0.1414, "lr": 0.028998762568311857, "epoch": 4.12280701754386, "percentage": 20.61, "elapsed_time": "0:00:58", "remaining_time": "0:03:44", "throughput": 2519.7, "total_tokens": 146800} |
| {"current_steps": 240, "total_steps": 1140, "loss": 0.1369, "lr": 0.028914629615105897, "epoch": 4.2105263157894735, "percentage": 21.05, "elapsed_time": "0:00:59", "remaining_time": "0:03:42", "throughput": 2521.61, "total_tokens": 149456} |
| {"current_steps": 245, "total_steps": 1140, "loss": 0.196, "lr": 0.028827235230860424, "epoch": 4.298245614035087, "percentage": 21.49, "elapsed_time": "0:01:00", "remaining_time": "0:03:40", "throughput": 2530.59, "total_tokens": 152880} |
| {"current_steps": 250, "total_steps": 1140, "loss": 0.2025, "lr": 0.02873659989982586, "epoch": 4.385964912280702, "percentage": 21.93, "elapsed_time": "0:01:01", "remaining_time": "0:03:39", "throughput": 2543.09, "total_tokens": 156720} |
| {"current_steps": 255, "total_steps": 1140, "loss": 0.311, "lr": 0.02864274486589371, "epoch": 4.473684210526316, "percentage": 22.37, "elapsed_time": "0:01:02", "remaining_time": "0:03:37", "throughput": 2546.09, "total_tokens": 159600} |
| {"current_steps": 260, "total_steps": 1140, "loss": 0.2458, "lr": 0.028545692127617244, "epoch": 4.56140350877193, "percentage": 22.81, "elapsed_time": "0:01:03", "remaining_time": "0:03:35", "throughput": 2546.24, "total_tokens": 162224} |
| {"current_steps": 265, "total_steps": 1140, "loss": 0.1756, "lr": 0.0284454644330553, "epoch": 4.649122807017544, "percentage": 23.25, "elapsed_time": "0:01:04", "remaining_time": "0:03:33", "throughput": 2551.39, "total_tokens": 165328} |
| {"current_steps": 270, "total_steps": 1140, "loss": 0.2166, "lr": 0.02834208527444037, "epoch": 4.7368421052631575, "percentage": 23.68, "elapsed_time": "0:01:05", "remaining_time": "0:03:32", "throughput": 2558.14, "total_tokens": 168592} |
| {"current_steps": 275, "total_steps": 1140, "loss": 0.2856, "lr": 0.028235578882672318, "epoch": 4.824561403508772, "percentage": 24.12, "elapsed_time": "0:01:06", "remaining_time": "0:03:30", "throughput": 2563.09, "total_tokens": 171696} |
| {"current_steps": 280, "total_steps": 1140, "loss": 0.2382, "lr": 0.028125970221638905, "epoch": 4.912280701754386, "percentage": 24.56, "elapsed_time": "0:01:08", "remaining_time": "0:03:29", "throughput": 2566.8, "total_tokens": 174672} |
| {"current_steps": 285, "total_steps": 1140, "loss": 0.2199, "lr": 0.028013284982364554, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:09", "remaining_time": "0:03:27", "throughput": 2566.81, "total_tokens": 177224} |
| {"current_steps": 285, "total_steps": 1140, "eval_loss": 0.252529114484787, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:09", "remaining_time": "0:03:29", "throughput": 2535.75, "total_tokens": 177224} |
| {"current_steps": 290, "total_steps": 1140, "loss": 0.16, "lr": 0.027897549576988666, "epoch": 5.087719298245614, "percentage": 25.44, "elapsed_time": "0:01:11", "remaining_time": "0:03:30", "throughput": 2516.55, "total_tokens": 180840} |
| {"current_steps": 295, "total_steps": 1140, "loss": 0.1788, "lr": 0.027778791132574907, "epoch": 5.175438596491228, "percentage": 25.88, "elapsed_time": "0:01:12", "remaining_time": "0:03:28", "throughput": 2521.79, "total_tokens": 183976} |
| {"current_steps": 300, "total_steps": 1140, "loss": 0.3165, "lr": 0.02765703748475293, "epoch": 5.2631578947368425, "percentage": 26.32, "elapsed_time": "0:01:14", "remaining_time": "0:03:27", "throughput": 2526.08, "total_tokens": 187016} |
| {"current_steps": 305, "total_steps": 1140, "loss": 0.2463, "lr": 0.027532317171194046, "epoch": 5.350877192982456, "percentage": 26.75, "elapsed_time": "0:01:15", "remaining_time": "0:03:25", "throughput": 2525.6, "total_tokens": 189480} |
| {"current_steps": 310, "total_steps": 1140, "loss": 0.1892, "lr": 0.027404659424922272, "epoch": 5.43859649122807, "percentage": 27.19, "elapsed_time": "0:01:16", "remaining_time": "0:03:23", "throughput": 2531.38, "total_tokens": 192712} |
| {"current_steps": 315, "total_steps": 1140, "loss": 0.3672, "lr": 0.027274094167462487, "epoch": 5.526315789473684, "percentage": 27.63, "elapsed_time": "0:01:17", "remaining_time": "0:03:22", "throughput": 2533.59, "total_tokens": 195496} |
| {"current_steps": 320, "total_steps": 1140, "loss": 0.1932, "lr": 0.02714065200182714, "epoch": 5.614035087719298, "percentage": 28.07, "elapsed_time": "0:01:18", "remaining_time": "0:03:20", "throughput": 2542.57, "total_tokens": 199240} |
| {"current_steps": 325, "total_steps": 1140, "loss": 0.2354, "lr": 0.02700436420534326, "epoch": 5.701754385964913, "percentage": 28.51, "elapsed_time": "0:01:19", "remaining_time": "0:03:19", "throughput": 2543.03, "total_tokens": 201832} |
| {"current_steps": 330, "total_steps": 1140, "loss": 0.1387, "lr": 0.02686526272232141, "epoch": 5.7894736842105265, "percentage": 28.95, "elapsed_time": "0:01:20", "remaining_time": "0:03:17", "throughput": 2547.29, "total_tokens": 204968} |
| {"current_steps": 335, "total_steps": 1140, "loss": 0.3058, "lr": 0.026723380156568298, "epoch": 5.87719298245614, "percentage": 29.39, "elapsed_time": "0:01:21", "remaining_time": "0:03:16", "throughput": 2553.36, "total_tokens": 208360} |
| {"current_steps": 340, "total_steps": 1140, "loss": 0.226, "lr": 0.026578749763744813, "epoch": 5.964912280701754, "percentage": 29.82, "elapsed_time": "0:01:22", "remaining_time": "0:03:14", "throughput": 2554.64, "total_tokens": 211080} |
| {"current_steps": 342, "total_steps": 1140, "eval_loss": 0.20502713322639465, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:23", "remaining_time": "0:03:15", "throughput": 2528.63, "total_tokens": 212000} |
| {"current_steps": 345, "total_steps": 1140, "loss": 0.18, "lr": 0.026431405443571282, "epoch": 6.052631578947368, "percentage": 30.26, "elapsed_time": "0:01:25", "remaining_time": "0:03:16", "throughput": 2510.4, "total_tokens": 214176} |
| {"current_steps": 350, "total_steps": 1140, "loss": 0.1775, "lr": 0.02628138173188176, "epoch": 6.140350877192983, "percentage": 30.7, "elapsed_time": "0:01:26", "remaining_time": "0:03:14", "throughput": 2510.72, "total_tokens": 216736} |
| {"current_steps": 355, "total_steps": 1140, "loss": 0.2343, "lr": 0.026128713792529224, "epoch": 6.228070175438597, "percentage": 31.14, "elapsed_time": "0:01:27", "remaining_time": "0:03:13", "throughput": 2517.18, "total_tokens": 220224} |
| {"current_steps": 360, "total_steps": 1140, "loss": 0.1887, "lr": 0.025973437409143554, "epoch": 6.315789473684211, "percentage": 31.58, "elapsed_time": "0:01:28", "remaining_time": "0:03:11", "throughput": 2519.99, "total_tokens": 223136} |
| {"current_steps": 365, "total_steps": 1140, "loss": 0.1615, "lr": 0.025815588976744273, "epoch": 6.4035087719298245, "percentage": 32.02, "elapsed_time": "0:01:29", "remaining_time": "0:03:10", "throughput": 2528.72, "total_tokens": 226944} |
| {"current_steps": 370, "total_steps": 1140, "loss": 0.1746, "lr": 0.02565520549320996, "epoch": 6.491228070175438, "percentage": 32.46, "elapsed_time": "0:01:30", "remaining_time": "0:03:09", "throughput": 2534.99, "total_tokens": 230368} |
| {"current_steps": 375, "total_steps": 1140, "loss": 0.2084, "lr": 0.02549232455060637, "epoch": 6.578947368421053, "percentage": 32.89, "elapsed_time": "0:01:31", "remaining_time": "0:03:07", "throughput": 2538.35, "total_tokens": 233376} |
| {"current_steps": 380, "total_steps": 1140, "loss": 0.1668, "lr": 0.025326984326375274, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:33", "remaining_time": "0:03:06", "throughput": 2541.23, "total_tokens": 236384} |
| {"current_steps": 385, "total_steps": 1140, "loss": 0.1415, "lr": 0.025159223574386116, "epoch": 6.754385964912281, "percentage": 33.77, "elapsed_time": "0:01:34", "remaining_time": "0:03:04", "throughput": 2545.46, "total_tokens": 239584} |
| {"current_steps": 390, "total_steps": 1140, "loss": 0.3343, "lr": 0.02498908161585253, "epoch": 6.842105263157895, "percentage": 34.21, "elapsed_time": "0:01:35", "remaining_time": "0:03:03", "throughput": 2548.51, "total_tokens": 242624} |
| {"current_steps": 395, "total_steps": 1140, "loss": 0.0928, "lr": 0.024816598330115895, "epoch": 6.9298245614035086, "percentage": 34.65, "elapsed_time": "0:01:36", "remaining_time": "0:03:01", "throughput": 2555.03, "total_tokens": 246304} |
| {"current_steps": 399, "total_steps": 1140, "eval_loss": 0.2429497092962265, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:38", "remaining_time": "0:03:02", "throughput": 2532.99, "total_tokens": 248272} |
| {"current_steps": 400, "total_steps": 1140, "loss": 0.3043, "lr": 0.024641814145298088, "epoch": 7.017543859649122, "percentage": 35.09, "elapsed_time": "0:01:39", "remaining_time": "0:03:03", "throughput": 2514.01, "total_tokens": 248912} |
| {"current_steps": 405, "total_steps": 1140, "loss": 0.2289, "lr": 0.024464770028825585, "epoch": 7.105263157894737, "percentage": 35.53, "elapsed_time": "0:01:40", "remaining_time": "0:03:01", "throughput": 2517.76, "total_tokens": 252016} |
| {"current_steps": 410, "total_steps": 1140, "loss": 0.2158, "lr": 0.024285507477827137, "epoch": 7.192982456140351, "percentage": 35.96, "elapsed_time": "0:01:41", "remaining_time": "0:03:00", "throughput": 2525.7, "total_tokens": 255856} |
| {"current_steps": 415, "total_steps": 1140, "loss": 0.2326, "lr": 0.02410406850940735, "epoch": 7.280701754385965, "percentage": 36.4, "elapsed_time": "0:01:42", "remaining_time": "0:02:58", "throughput": 2528.05, "total_tokens": 258800} |
| {"current_steps": 420, "total_steps": 1140, "loss": 0.1642, "lr": 0.02392049565079834, "epoch": 7.368421052631579, "percentage": 36.84, "elapsed_time": "0:01:43", "remaining_time": "0:02:57", "throughput": 2529.25, "total_tokens": 261584} |
| {"current_steps": 425, "total_steps": 1140, "loss": 0.1351, "lr": 0.023734831929391822, "epoch": 7.456140350877193, "percentage": 37.28, "elapsed_time": "0:01:44", "remaining_time": "0:02:55", "throughput": 2535.25, "total_tokens": 265168} |
| {"current_steps": 430, "total_steps": 1140, "loss": 0.2195, "lr": 0.02354712086265399, "epoch": 7.543859649122807, "percentage": 37.72, "elapsed_time": "0:01:45", "remaining_time": "0:02:54", "throughput": 2538.67, "total_tokens": 268240} |
| {"current_steps": 435, "total_steps": 1140, "loss": 0.2902, "lr": 0.023357406447925527, "epoch": 7.631578947368421, "percentage": 38.16, "elapsed_time": "0:01:46", "remaining_time": "0:02:53", "throughput": 2544.08, "total_tokens": 271728} |
| {"current_steps": 440, "total_steps": 1140, "loss": 0.129, "lr": 0.023165733152109094, "epoch": 7.719298245614035, "percentage": 38.6, "elapsed_time": "0:01:47", "remaining_time": "0:02:51", "throughput": 2546.52, "total_tokens": 274736} |
| {"current_steps": 445, "total_steps": 1140, "loss": 0.1276, "lr": 0.022972145901246837, "epoch": 7.807017543859649, "percentage": 39.04, "elapsed_time": "0:01:49", "remaining_time": "0:02:50", "throughput": 2550.82, "total_tokens": 278096} |
| {"current_steps": 450, "total_steps": 1140, "loss": 0.0985, "lr": 0.022776690069990207, "epoch": 7.894736842105263, "percentage": 39.47, "elapsed_time": "0:01:50", "remaining_time": "0:02:48", "throughput": 2550.63, "total_tokens": 280592} |
| {"current_steps": 455, "total_steps": 1140, "loss": 0.1848, "lr": 0.022579411470964646, "epoch": 7.982456140350877, "percentage": 39.91, "elapsed_time": "0:01:51", "remaining_time": "0:02:47", "throughput": 2555.94, "total_tokens": 284144} |
| {"current_steps": 456, "total_steps": 1140, "eval_loss": 0.17890170216560364, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:52", "remaining_time": "0:02:48", "throughput": 2534.76, "total_tokens": 284248} |
| {"current_steps": 460, "total_steps": 1140, "loss": 0.2103, "lr": 0.022380356344031675, "epoch": 8.070175438596491, "percentage": 40.35, "elapsed_time": "0:01:53", "remaining_time": "0:02:48", "throughput": 2522.13, "total_tokens": 287128} |
| {"current_steps": 465, "total_steps": 1140, "loss": 0.1396, "lr": 0.02217957134545074, "epoch": 8.157894736842104, "percentage": 40.79, "elapsed_time": "0:01:55", "remaining_time": "0:02:46", "throughput": 2527.87, "total_tokens": 290744} |
| {"current_steps": 470, "total_steps": 1140, "loss": 0.1304, "lr": 0.02197710353694355, "epoch": 8.24561403508772, "percentage": 41.23, "elapsed_time": "0:01:56", "remaining_time": "0:02:45", "throughput": 2530.96, "total_tokens": 293816} |
| {"current_steps": 475, "total_steps": 1140, "loss": 0.1891, "lr": 0.02177300037466334, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:01:57", "remaining_time": "0:02:44", "throughput": 2534.61, "total_tokens": 297080} |
| {"current_steps": 480, "total_steps": 1140, "loss": 0.2151, "lr": 0.02156730969807168, "epoch": 8.421052631578947, "percentage": 42.11, "elapsed_time": "0:01:58", "remaining_time": "0:02:42", "throughput": 2538.7, "total_tokens": 300408} |
| {"current_steps": 485, "total_steps": 1140, "loss": 0.1712, "lr": 0.021360079718725448, "epoch": 8.508771929824562, "percentage": 42.54, "elapsed_time": "0:01:59", "remaining_time": "0:02:41", "throughput": 2540.89, "total_tokens": 303384} |
| {"current_steps": 490, "total_steps": 1140, "loss": 0.1259, "lr": 0.021151359008976602, "epoch": 8.596491228070175, "percentage": 42.98, "elapsed_time": "0:02:00", "remaining_time": "0:02:39", "throughput": 2542.54, "total_tokens": 306232} |
| {"current_steps": 495, "total_steps": 1140, "loss": 0.189, "lr": 0.02094119649058735, "epoch": 8.68421052631579, "percentage": 43.42, "elapsed_time": "0:02:01", "remaining_time": "0:02:38", "throughput": 2547.69, "total_tokens": 309816} |
| {"current_steps": 500, "total_steps": 1140, "loss": 0.1627, "lr": 0.020729641423263476, "epoch": 8.771929824561404, "percentage": 43.86, "elapsed_time": "0:02:02", "remaining_time": "0:02:36", "throughput": 2548.13, "total_tokens": 312408} |
| {"current_steps": 505, "total_steps": 1140, "loss": 0.1801, "lr": 0.0205167433931084, "epoch": 8.859649122807017, "percentage": 44.3, "elapsed_time": "0:02:03", "remaining_time": "0:02:35", "throughput": 2550.12, "total_tokens": 315416} |
| {"current_steps": 510, "total_steps": 1140, "loss": 0.271, "lr": 0.020302552301000754, "epoch": 8.947368421052632, "percentage": 44.74, "elapsed_time": "0:02:04", "remaining_time": "0:02:34", "throughput": 2552.24, "total_tokens": 318392} |
| {"current_steps": 513, "total_steps": 1140, "eval_loss": 0.19974899291992188, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:06", "remaining_time": "0:02:34", "throughput": 2533.31, "total_tokens": 319488} |
| {"current_steps": 515, "total_steps": 1140, "loss": 0.14, "lr": 0.02008711835089822, "epoch": 9.035087719298245, "percentage": 45.18, "elapsed_time": "0:02:07", "remaining_time": "0:02:34", "throughput": 2519.16, "total_tokens": 320832} |
| {"current_steps": 520, "total_steps": 1140, "loss": 0.1427, "lr": 0.019870492038070255, "epoch": 9.12280701754386, "percentage": 45.61, "elapsed_time": "0:02:08", "remaining_time": "0:02:33", "throughput": 2521.79, "total_tokens": 323840} |
| {"current_steps": 525, "total_steps": 1140, "loss": 0.0586, "lr": 0.01965272413726258, "epoch": 9.210526315789474, "percentage": 46.05, "elapsed_time": "0:02:09", "remaining_time": "0:02:31", "throughput": 2525.79, "total_tokens": 327200} |
| {"current_steps": 530, "total_steps": 1140, "loss": 0.1931, "lr": 0.01943386569079618, "epoch": 9.298245614035087, "percentage": 46.49, "elapsed_time": "0:02:10", "remaining_time": "0:02:30", "throughput": 2527.95, "total_tokens": 330208} |
| {"current_steps": 535, "total_steps": 1140, "loss": 0.0365, "lr": 0.01921396799660354, "epoch": 9.385964912280702, "percentage": 46.93, "elapsed_time": "0:02:11", "remaining_time": "0:02:28", "throughput": 2528.58, "total_tokens": 332864} |
| {"current_steps": 540, "total_steps": 1140, "loss": 0.305, "lr": 0.018993082596205, "epoch": 9.473684210526315, "percentage": 47.37, "elapsed_time": "0:02:12", "remaining_time": "0:02:27", "throughput": 2532.28, "total_tokens": 336224} |
| {"current_steps": 545, "total_steps": 1140, "loss": 0.1647, "lr": 0.018771261262628014, "epoch": 9.56140350877193, "percentage": 47.81, "elapsed_time": "0:02:13", "remaining_time": "0:02:26", "throughput": 2535.09, "total_tokens": 339392} |
| {"current_steps": 550, "total_steps": 1140, "loss": 0.1295, "lr": 0.018548555988272136, "epoch": 9.649122807017545, "percentage": 48.25, "elapsed_time": "0:02:15", "remaining_time": "0:02:24", "throughput": 2537.78, "total_tokens": 342624} |
| {"current_steps": 555, "total_steps": 1140, "loss": 0.1854, "lr": 0.018325018972722578, "epoch": 9.736842105263158, "percentage": 48.68, "elapsed_time": "0:02:16", "remaining_time": "0:02:23", "throughput": 2539.01, "total_tokens": 345472} |
| {"current_steps": 560, "total_steps": 1140, "loss": 0.1956, "lr": 0.01810070261051526, "epoch": 9.824561403508772, "percentage": 49.12, "elapsed_time": "0:02:17", "remaining_time": "0:02:21", "throughput": 2539.68, "total_tokens": 348160} |
| {"current_steps": 565, "total_steps": 1140, "loss": 0.1919, "lr": 0.01787565947885608, "epoch": 9.912280701754385, "percentage": 49.56, "elapsed_time": "0:02:18", "remaining_time": "0:02:20", "throughput": 2543.13, "total_tokens": 351456} |
| {"current_steps": 570, "total_steps": 1140, "loss": 0.1533, "lr": 0.01764994232529744, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:19", "remaining_time": "0:02:19", "throughput": 2545.43, "total_tokens": 354472} |
| {"current_steps": 570, "total_steps": 1140, "eval_loss": 0.24398671090602875, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:20", "remaining_time": "0:02:20", "throughput": 2530.02, "total_tokens": 354472} |
| {"current_steps": 575, "total_steps": 1140, "loss": 0.1188, "lr": 0.01742360405537482, "epoch": 10.087719298245615, "percentage": 50.44, "elapsed_time": "0:02:22", "remaining_time": "0:02:19", "throughput": 2520.35, "total_tokens": 358024} |
| {"current_steps": 580, "total_steps": 1140, "loss": 0.1679, "lr": 0.017196697720206326, "epoch": 10.175438596491228, "percentage": 50.88, "elapsed_time": "0:02:23", "remaining_time": "0:02:18", "throughput": 2523.11, "total_tokens": 361192} |
| {"current_steps": 585, "total_steps": 1140, "loss": 0.119, "lr": 0.01696927650405807, "epoch": 10.263157894736842, "percentage": 51.32, "elapsed_time": "0:02:24", "remaining_time": "0:02:16", "throughput": 2523.52, "total_tokens": 363816} |
| {"current_steps": 590, "total_steps": 1140, "loss": 0.1062, "lr": 0.016741393711878452, "epoch": 10.350877192982455, "percentage": 51.75, "elapsed_time": "0:02:25", "remaining_time": "0:02:15", "throughput": 2524.08, "total_tokens": 366440} |
| {"current_steps": 595, "total_steps": 1140, "loss": 0.1102, "lr": 0.016513102756804024, "epoch": 10.43859649122807, "percentage": 52.19, "elapsed_time": "0:02:26", "remaining_time": "0:02:13", "throughput": 2527.11, "total_tokens": 369672} |
| {"current_steps": 600, "total_steps": 1140, "loss": 0.1657, "lr": 0.016284457147640083, "epoch": 10.526315789473685, "percentage": 52.63, "elapsed_time": "0:02:27", "remaining_time": "0:02:12", "throughput": 2529.55, "total_tokens": 372808} |
| {"current_steps": 605, "total_steps": 1140, "loss": 0.2843, "lr": 0.016055510476318827, "epoch": 10.614035087719298, "percentage": 53.07, "elapsed_time": "0:02:28", "remaining_time": "0:02:11", "throughput": 2530.59, "total_tokens": 375560} |
| {"current_steps": 610, "total_steps": 1140, "loss": 0.1846, "lr": 0.015826316405337982, "epoch": 10.701754385964913, "percentage": 53.51, "elapsed_time": "0:02:29", "remaining_time": "0:02:09", "throughput": 2533.46, "total_tokens": 378792} |
| {"current_steps": 615, "total_steps": 1140, "loss": 0.1775, "lr": 0.015596928655182963, "epoch": 10.789473684210526, "percentage": 53.95, "elapsed_time": "0:02:30", "remaining_time": "0:02:08", "throughput": 2537.52, "total_tokens": 382312} |
| {"current_steps": 620, "total_steps": 1140, "loss": 0.1141, "lr": 0.015367400991735372, "epoch": 10.87719298245614, "percentage": 54.39, "elapsed_time": "0:02:31", "remaining_time": "0:02:07", "throughput": 2542.26, "total_tokens": 386088} |
| {"current_steps": 625, "total_steps": 1140, "loss": 0.0974, "lr": 0.015137787213670897, "epoch": 10.964912280701755, "percentage": 54.82, "elapsed_time": "0:02:32", "remaining_time": "0:02:05", "throughput": 2542.99, "total_tokens": 388840} |
| {"current_steps": 627, "total_steps": 1140, "eval_loss": 0.2553347647190094, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:34", "remaining_time": "0:02:06", "throughput": 2527.48, "total_tokens": 389408} |
| {"current_steps": 630, "total_steps": 1140, "loss": 0.127, "lr": 0.014908141139849508, "epoch": 11.052631578947368, "percentage": 55.26, "elapsed_time": "0:02:35", "remaining_time": "0:02:05", "throughput": 2514.61, "total_tokens": 391200} |
| {"current_steps": 635, "total_steps": 1140, "loss": 0.222, "lr": 0.014678516596700955, "epoch": 11.140350877192983, "percentage": 55.7, "elapsed_time": "0:02:36", "remaining_time": "0:02:04", "throughput": 2519.05, "total_tokens": 394880} |
| {"current_steps": 640, "total_steps": 1140, "loss": 0.0825, "lr": 0.014448967405608415, "epoch": 11.228070175438596, "percentage": 56.14, "elapsed_time": "0:02:37", "remaining_time": "0:02:03", "throughput": 2522.66, "total_tokens": 398336} |
| {"current_steps": 645, "total_steps": 1140, "loss": 0.0889, "lr": 0.014219547370293413, "epoch": 11.31578947368421, "percentage": 56.58, "elapsed_time": "0:02:39", "remaining_time": "0:02:02", "throughput": 2525.65, "total_tokens": 401632} |
| {"current_steps": 650, "total_steps": 1140, "loss": 0.0741, "lr": 0.013990310264204829, "epoch": 11.403508771929825, "percentage": 57.02, "elapsed_time": "0:02:40", "remaining_time": "0:02:00", "throughput": 2529.4, "total_tokens": 405120} |
| {"current_steps": 655, "total_steps": 1140, "loss": 0.1272, "lr": 0.013761309817915016, "epoch": 11.491228070175438, "percentage": 57.46, "elapsed_time": "0:02:41", "remaining_time": "0:01:59", "throughput": 2531.44, "total_tokens": 408192} |
| {"current_steps": 660, "total_steps": 1140, "loss": 0.1003, "lr": 0.013532599706525941, "epoch": 11.578947368421053, "percentage": 57.89, "elapsed_time": "0:02:42", "remaining_time": "0:01:58", "throughput": 2533.94, "total_tokens": 411360} |
| {"current_steps": 665, "total_steps": 1140, "loss": 0.1163, "lr": 0.013304233537088392, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:02:43", "remaining_time": "0:01:56", "throughput": 2535.0, "total_tokens": 414144} |
| {"current_steps": 670, "total_steps": 1140, "loss": 0.0978, "lr": 0.013076264836037051, "epoch": 11.75438596491228, "percentage": 58.77, "elapsed_time": "0:02:44", "remaining_time": "0:01:55", "throughput": 2539.02, "total_tokens": 417760} |
| {"current_steps": 675, "total_steps": 1140, "loss": 0.1583, "lr": 0.012848747036644558, "epoch": 11.842105263157894, "percentage": 59.21, "elapsed_time": "0:02:45", "remaining_time": "0:01:54", "throughput": 2540.19, "total_tokens": 420640} |
| {"current_steps": 680, "total_steps": 1140, "loss": 0.1862, "lr": 0.012621733466497287, "epoch": 11.929824561403509, "percentage": 59.65, "elapsed_time": "0:02:46", "remaining_time": "0:01:52", "throughput": 2540.82, "total_tokens": 423360} |
| {"current_steps": 684, "total_steps": 1140, "eval_loss": 0.1732322871685028, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:02:48", "remaining_time": "0:01:52", "throughput": 2527.93, "total_tokens": 425328} |
| {"current_steps": 685, "total_steps": 1140, "loss": 0.078, "lr": 0.012395277334996044, "epoch": 12.017543859649123, "percentage": 60.09, "elapsed_time": "0:02:49", "remaining_time": "0:01:52", "throughput": 2517.42, "total_tokens": 426288} |
| {"current_steps": 690, "total_steps": 1140, "loss": 0.2284, "lr": 0.012169431720884335, "epoch": 12.105263157894736, "percentage": 60.53, "elapsed_time": "0:02:50", "remaining_time": "0:01:51", "throughput": 2518.82, "total_tokens": 429136} |
| {"current_steps": 695, "total_steps": 1140, "loss": 0.1374, "lr": 0.01194424955980734, "epoch": 12.192982456140351, "percentage": 60.96, "elapsed_time": "0:02:51", "remaining_time": "0:01:49", "throughput": 2521.1, "total_tokens": 432272} |
| {"current_steps": 700, "total_steps": 1140, "loss": 0.1079, "lr": 0.011719783631904363, "epoch": 12.280701754385966, "percentage": 61.4, "elapsed_time": "0:02:52", "remaining_time": "0:01:48", "throughput": 2524.26, "total_tokens": 435760} |
| {"current_steps": 705, "total_steps": 1140, "loss": 0.1092, "lr": 0.01149608654943782, "epoch": 12.368421052631579, "percentage": 61.84, "elapsed_time": "0:02:53", "remaining_time": "0:01:47", "throughput": 2528.03, "total_tokens": 439312} |
| {"current_steps": 710, "total_steps": 1140, "loss": 0.065, "lr": 0.011273210744461479, "epoch": 12.456140350877194, "percentage": 62.28, "elapsed_time": "0:02:54", "remaining_time": "0:01:45", "throughput": 2530.75, "total_tokens": 442640} |
| {"current_steps": 715, "total_steps": 1140, "loss": 0.0943, "lr": 0.011051208456531014, "epoch": 12.543859649122806, "percentage": 62.72, "elapsed_time": "0:02:55", "remaining_time": "0:01:44", "throughput": 2532.54, "total_tokens": 445648} |
| {"current_steps": 720, "total_steps": 1140, "loss": 0.0867, "lr": 0.010830131720459601, "epoch": 12.631578947368421, "percentage": 63.16, "elapsed_time": "0:02:56", "remaining_time": "0:01:43", "throughput": 2532.51, "total_tokens": 448208} |
| {"current_steps": 725, "total_steps": 1140, "loss": 0.0837, "lr": 0.010610032354121612, "epoch": 12.719298245614034, "percentage": 63.6, "elapsed_time": "0:02:58", "remaining_time": "0:01:41", "throughput": 2535.31, "total_tokens": 451600} |
| {"current_steps": 730, "total_steps": 1140, "loss": 0.0844, "lr": 0.01039096194630704, "epoch": 12.807017543859649, "percentage": 64.04, "elapsed_time": "0:02:59", "remaining_time": "0:01:40", "throughput": 2538.28, "total_tokens": 454992} |
| {"current_steps": 735, "total_steps": 1140, "loss": 0.0553, "lr": 0.010172971844629716, "epoch": 12.894736842105264, "percentage": 64.47, "elapsed_time": "0:03:00", "remaining_time": "0:01:39", "throughput": 2539.69, "total_tokens": 458032} |
| {"current_steps": 740, "total_steps": 1140, "loss": 0.0931, "lr": 0.009956113143491957, "epoch": 12.982456140350877, "percentage": 64.91, "elapsed_time": "0:03:01", "remaining_time": "0:01:38", "throughput": 2541.29, "total_tokens": 461104} |
| {"current_steps": 741, "total_steps": 1140, "eval_loss": 0.23760755360126495, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:02", "remaining_time": "0:01:38", "throughput": 2528.4, "total_tokens": 461216} |
| {"current_steps": 745, "total_steps": 1140, "loss": 0.043, "lr": 0.009740436672108685, "epoch": 13.070175438596491, "percentage": 65.35, "elapsed_time": "0:03:04", "remaining_time": "0:01:37", "throughput": 2519.65, "total_tokens": 463904} |
| {"current_steps": 750, "total_steps": 1140, "loss": 0.1013, "lr": 0.009525992982593583, "epoch": 13.157894736842104, "percentage": 65.79, "elapsed_time": "0:03:05", "remaining_time": "0:01:36", "throughput": 2522.56, "total_tokens": 467328} |
| {"current_steps": 755, "total_steps": 1140, "loss": 0.1884, "lr": 0.009312832338110292, "epoch": 13.24561403508772, "percentage": 66.23, "elapsed_time": "0:03:06", "remaining_time": "0:01:34", "throughput": 2522.43, "total_tokens": 469792} |
| {"current_steps": 760, "total_steps": 1140, "loss": 0.0553, "lr": 0.009101004701091252, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:07", "remaining_time": "0:01:33", "throughput": 2524.27, "total_tokens": 472896} |
| {"current_steps": 765, "total_steps": 1140, "loss": 0.0876, "lr": 0.008890559721527138, "epoch": 13.421052631578947, "percentage": 67.11, "elapsed_time": "0:03:08", "remaining_time": "0:01:32", "throughput": 2525.89, "total_tokens": 475904} |
| {"current_steps": 770, "total_steps": 1140, "loss": 0.1052, "lr": 0.008681546725329408, "epoch": 13.508771929824562, "percentage": 67.54, "elapsed_time": "0:03:09", "remaining_time": "0:01:31", "throughput": 2527.4, "total_tokens": 478848} |
| {"current_steps": 775, "total_steps": 1140, "loss": 0.065, "lr": 0.008474014702768904, "epoch": 13.596491228070175, "percentage": 67.98, "elapsed_time": "0:03:10", "remaining_time": "0:01:29", "throughput": 2529.66, "total_tokens": 482080} |
| {"current_steps": 780, "total_steps": 1140, "loss": 0.0326, "lr": 0.008268012296993067, "epoch": 13.68421052631579, "percentage": 68.42, "elapsed_time": "0:03:11", "remaining_time": "0:01:28", "throughput": 2532.18, "total_tokens": 485440} |
| {"current_steps": 785, "total_steps": 1140, "loss": 0.0849, "lr": 0.008063587792624567, "epoch": 13.771929824561404, "percentage": 68.86, "elapsed_time": "0:03:12", "remaining_time": "0:01:27", "throughput": 2534.46, "total_tokens": 488640} |
| {"current_steps": 790, "total_steps": 1140, "loss": 0.1001, "lr": 0.007860789104443896, "epoch": 13.859649122807017, "percentage": 69.3, "elapsed_time": "0:03:13", "remaining_time": "0:01:25", "throughput": 2537.05, "total_tokens": 492032} |
| {"current_steps": 795, "total_steps": 1140, "loss": 0.0754, "lr": 0.0076596637661587325, "epoch": 13.947368421052632, "percentage": 69.74, "elapsed_time": "0:03:15", "remaining_time": "0:01:24", "throughput": 2539.3, "total_tokens": 495264} |
| {"current_steps": 798, "total_steps": 1140, "eval_loss": 0.21240630745887756, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:16", "remaining_time": "0:01:24", "throughput": 2528.37, "total_tokens": 496704} |
| {"current_steps": 800, "total_steps": 1140, "loss": 0.0359, "lr": 0.007460258919262529, "epoch": 14.035087719298245, "percentage": 70.18, "elapsed_time": "0:03:17", "remaining_time": "0:01:24", "throughput": 2520.65, "total_tokens": 498464} |
| {"current_steps": 805, "total_steps": 1140, "loss": 0.0507, "lr": 0.007262621301985144, "epoch": 14.12280701754386, "percentage": 70.61, "elapsed_time": "0:03:18", "remaining_time": "0:01:22", "throughput": 2522.12, "total_tokens": 501632} |
| {"current_steps": 810, "total_steps": 1140, "loss": 0.0363, "lr": 0.007066797238337862, "epoch": 14.210526315789474, "percentage": 71.05, "elapsed_time": "0:03:19", "remaining_time": "0:01:21", "throughput": 2523.35, "total_tokens": 504544} |
| {"current_steps": 815, "total_steps": 1140, "loss": 0.065, "lr": 0.006872832627255643, "epoch": 14.298245614035087, "percentage": 71.49, "elapsed_time": "0:03:21", "remaining_time": "0:01:20", "throughput": 2524.52, "total_tokens": 507456} |
| {"current_steps": 820, "total_steps": 1140, "loss": 0.0311, "lr": 0.006680772931838868, "epoch": 14.385964912280702, "percentage": 71.93, "elapsed_time": "0:03:22", "remaining_time": "0:01:18", "throughput": 2527.96, "total_tokens": 511136} |
| {"current_steps": 825, "total_steps": 1140, "loss": 0.0323, "lr": 0.00649066316869736, "epoch": 14.473684210526315, "percentage": 72.37, "elapsed_time": "0:03:23", "remaining_time": "0:01:17", "throughput": 2529.98, "total_tokens": 514368} |
| {"current_steps": 830, "total_steps": 1140, "loss": 0.084, "lr": 0.0063025478973989585, "epoch": 14.56140350877193, "percentage": 72.81, "elapsed_time": "0:03:24", "remaining_time": "0:01:16", "throughput": 2531.12, "total_tokens": 517248} |
| {"current_steps": 835, "total_steps": 1140, "loss": 0.0384, "lr": 0.006116471210025301, "epoch": 14.649122807017545, "percentage": 73.25, "elapsed_time": "0:03:25", "remaining_time": "0:01:15", "throughput": 2532.08, "total_tokens": 520160} |
| {"current_steps": 840, "total_steps": 1140, "loss": 0.0341, "lr": 0.005932476720837105, "epoch": 14.736842105263158, "percentage": 73.68, "elapsed_time": "0:03:26", "remaining_time": "0:01:13", "throughput": 2534.14, "total_tokens": 523424} |
| {"current_steps": 845, "total_steps": 1140, "loss": 0.1026, "lr": 0.005750607556051514, "epoch": 14.824561403508772, "percentage": 74.12, "elapsed_time": "0:03:27", "remaining_time": "0:01:12", "throughput": 2534.99, "total_tokens": 526240} |
| {"current_steps": 850, "total_steps": 1140, "loss": 0.0325, "lr": 0.0055709063437337685, "epoch": 14.912280701754385, "percentage": 74.56, "elapsed_time": "0:03:28", "remaining_time": "0:01:11", "throughput": 2536.85, "total_tokens": 529440} |
| {"current_steps": 855, "total_steps": 1140, "loss": 0.0246, "lr": 0.005393415203805707, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:29", "remaining_time": "0:01:09", "throughput": 2538.6, "total_tokens": 532504} |
| {"current_steps": 855, "total_steps": 1140, "eval_loss": 0.2732408344745636, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:30", "remaining_time": "0:01:10", "throughput": 2528.39, "total_tokens": 532504} |
| {"current_steps": 860, "total_steps": 1140, "loss": 0.0257, "lr": 0.005218175738173303, "epoch": 15.087719298245615, "percentage": 75.44, "elapsed_time": "0:03:32", "remaining_time": "0:01:09", "throughput": 2520.22, "total_tokens": 535544} |
| {"current_steps": 865, "total_steps": 1140, "loss": 0.0155, "lr": 0.005045229020975681, "epoch": 15.175438596491228, "percentage": 75.88, "elapsed_time": "0:03:33", "remaining_time": "0:01:07", "throughput": 2522.21, "total_tokens": 538744} |
| {"current_steps": 870, "total_steps": 1140, "loss": 0.0222, "lr": 0.004874615588957773, "epoch": 15.263157894736842, "percentage": 76.32, "elapsed_time": "0:03:34", "remaining_time": "0:01:06", "throughput": 2524.53, "total_tokens": 542040} |
| {"current_steps": 875, "total_steps": 1140, "loss": 0.0084, "lr": 0.0047063754319689975, "epoch": 15.350877192982455, "percentage": 76.75, "elapsed_time": "0:03:35", "remaining_time": "0:01:05", "throughput": 2525.61, "total_tokens": 544888} |
| {"current_steps": 880, "total_steps": 1140, "loss": 0.0189, "lr": 0.004540547983590069, "epoch": 15.43859649122807, "percentage": 77.19, "elapsed_time": "0:03:36", "remaining_time": "0:01:04", "throughput": 2526.23, "total_tokens": 547640} |
| {"current_steps": 885, "total_steps": 1140, "loss": 0.0149, "lr": 0.0043771721118902335, "epoch": 15.526315789473685, "percentage": 77.63, "elapsed_time": "0:03:37", "remaining_time": "0:01:02", "throughput": 2529.23, "total_tokens": 551224} |
| {"current_steps": 890, "total_steps": 1140, "loss": 0.0107, "lr": 0.004216286110317013, "epoch": 15.614035087719298, "percentage": 78.07, "elapsed_time": "0:03:39", "remaining_time": "0:01:01", "throughput": 2530.6, "total_tokens": 554264} |
| {"current_steps": 895, "total_steps": 1140, "loss": 0.0101, "lr": 0.00405792768872069, "epoch": 15.701754385964913, "percentage": 78.51, "elapsed_time": "0:03:40", "remaining_time": "0:01:00", "throughput": 2533.63, "total_tokens": 557880} |
| {"current_steps": 900, "total_steps": 1140, "loss": 0.041, "lr": 0.003902133964515502, "epoch": 15.789473684210526, "percentage": 78.95, "elapsed_time": "0:03:41", "remaining_time": "0:00:58", "throughput": 2533.61, "total_tokens": 560376} |
| {"current_steps": 905, "total_steps": 1140, "loss": 0.019, "lr": 0.00374894145397979, "epoch": 15.87719298245614, "percentage": 79.39, "elapsed_time": "0:03:42", "remaining_time": "0:00:57", "throughput": 2536.35, "total_tokens": 563864} |
| {"current_steps": 910, "total_steps": 1140, "loss": 0.0136, "lr": 0.0035983860636969525, "epoch": 15.964912280701755, "percentage": 79.82, "elapsed_time": "0:03:43", "remaining_time": "0:00:56", "throughput": 2538.93, "total_tokens": 567352} |
| {"current_steps": 912, "total_steps": 1140, "eval_loss": 0.29675254225730896, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:03:44", "remaining_time": "0:00:56", "throughput": 2528.36, "total_tokens": 567952} |
| {"current_steps": 915, "total_steps": 1140, "loss": 0.0154, "lr": 0.003450503082139393, "epoch": 16.05263157894737, "percentage": 80.26, "elapsed_time": "0:03:46", "remaining_time": "0:00:55", "throughput": 2522.25, "total_tokens": 570448} |
| {"current_steps": 920, "total_steps": 1140, "loss": 0.0102, "lr": 0.003305327171397263, "epoch": 16.140350877192983, "percentage": 80.7, "elapsed_time": "0:03:47", "remaining_time": "0:00:54", "throughput": 2524.49, "total_tokens": 573808} |
| {"current_steps": 925, "total_steps": 1140, "loss": 0.0104, "lr": 0.003162892359054098, "epoch": 16.228070175438596, "percentage": 81.14, "elapsed_time": "0:03:48", "remaining_time": "0:00:53", "throughput": 2526.74, "total_tokens": 577200} |
| {"current_steps": 930, "total_steps": 1140, "loss": 0.0101, "lr": 0.0030232320302111047, "epoch": 16.31578947368421, "percentage": 81.58, "elapsed_time": "0:03:49", "remaining_time": "0:00:51", "throughput": 2527.94, "total_tokens": 580240} |
| {"current_steps": 935, "total_steps": 1140, "loss": 0.0098, "lr": 0.0028863789196621093, "epoch": 16.403508771929825, "percentage": 82.02, "elapsed_time": "0:03:50", "remaining_time": "0:00:50", "throughput": 2528.95, "total_tokens": 583120} |
| {"current_steps": 940, "total_steps": 1140, "loss": 0.0053, "lr": 0.0027523651042208564, "epoch": 16.49122807017544, "percentage": 82.46, "elapsed_time": "0:03:51", "remaining_time": "0:00:49", "throughput": 2529.99, "total_tokens": 586032} |
| {"current_steps": 945, "total_steps": 1140, "loss": 0.0064, "lr": 0.0026212219952026, "epoch": 16.57894736842105, "percentage": 82.89, "elapsed_time": "0:03:52", "remaining_time": "0:00:48", "throughput": 2532.18, "total_tokens": 589392} |
| {"current_steps": 950, "total_steps": 1140, "loss": 0.0072, "lr": 0.0024929803310616222, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:03:53", "remaining_time": "0:00:46", "throughput": 2534.03, "total_tokens": 592624} |
| {"current_steps": 955, "total_steps": 1140, "loss": 0.0064, "lr": 0.002367670170186516, "epoch": 16.75438596491228, "percentage": 83.77, "elapsed_time": "0:03:54", "remaining_time": "0:00:45", "throughput": 2534.8, "total_tokens": 595440} |
| {"current_steps": 960, "total_steps": 1140, "loss": 0.0094, "lr": 0.00224532088385481, "epoch": 16.842105263157894, "percentage": 84.21, "elapsed_time": "0:03:56", "remaining_time": "0:00:44", "throughput": 2536.47, "total_tokens": 598640} |
| {"current_steps": 965, "total_steps": 1140, "loss": 0.0147, "lr": 0.002125961149348706, "epoch": 16.92982456140351, "percentage": 84.65, "elapsed_time": "0:03:57", "remaining_time": "0:00:42", "throughput": 2537.41, "total_tokens": 601584} |
| {"current_steps": 969, "total_steps": 1140, "eval_loss": 0.2885914742946625, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:03:58", "remaining_time": "0:00:42", "throughput": 2529.06, "total_tokens": 603760} |
| {"current_steps": 970, "total_steps": 1140, "loss": 0.0054, "lr": 0.0020096189432334192, "epoch": 17.017543859649123, "percentage": 85.09, "elapsed_time": "0:03:59", "remaining_time": "0:00:42", "throughput": 2519.95, "total_tokens": 604464} |
| {"current_steps": 975, "total_steps": 1140, "loss": 0.0125, "lr": 0.001896321534799823, "epoch": 17.105263157894736, "percentage": 85.53, "elapsed_time": "0:04:00", "remaining_time": "0:00:40", "throughput": 2521.43, "total_tokens": 607600} |
| {"current_steps": 980, "total_steps": 1140, "loss": 0.009, "lr": 0.0017860954796727994, "epoch": 17.19298245614035, "percentage": 85.96, "elapsed_time": "0:04:02", "remaining_time": "0:00:39", "throughput": 2524.11, "total_tokens": 611152} |
| {"current_steps": 985, "total_steps": 1140, "loss": 0.0081, "lr": 0.0016789666135869374, "epoch": 17.280701754385966, "percentage": 86.4, "elapsed_time": "0:04:03", "remaining_time": "0:00:38", "throughput": 2526.72, "total_tokens": 614768} |
| {"current_steps": 990, "total_steps": 1140, "loss": 0.0075, "lr": 0.0015749600463309049, "epoch": 17.36842105263158, "percentage": 86.84, "elapsed_time": "0:04:04", "remaining_time": "0:00:37", "throughput": 2529.32, "total_tokens": 618384} |
| {"current_steps": 995, "total_steps": 1140, "loss": 0.0035, "lr": 0.0014741001558620163, "epoch": 17.45614035087719, "percentage": 87.28, "elapsed_time": "0:04:05", "remaining_time": "0:00:35", "throughput": 2530.57, "total_tokens": 621424} |
| {"current_steps": 1000, "total_steps": 1140, "loss": 0.0093, "lr": 0.0013764105825923066, "epoch": 17.54385964912281, "percentage": 87.72, "elapsed_time": "0:04:06", "remaining_time": "0:00:34", "throughput": 2531.92, "total_tokens": 624464} |
| {"current_steps": 1005, "total_steps": 1140, "loss": 0.0061, "lr": 0.0012819142238474862, "epoch": 17.63157894736842, "percentage": 88.16, "elapsed_time": "0:04:07", "remaining_time": "0:00:33", "throughput": 2533.97, "total_tokens": 627824} |
| {"current_steps": 1010, "total_steps": 1140, "loss": 0.0059, "lr": 0.0011906332285000793, "epoch": 17.719298245614034, "percentage": 88.6, "elapsed_time": "0:04:08", "remaining_time": "0:00:32", "throughput": 2535.68, "total_tokens": 631056} |
| {"current_steps": 1015, "total_steps": 1140, "loss": 0.0057, "lr": 0.0011025889917779736, "epoch": 17.80701754385965, "percentage": 89.04, "elapsed_time": "0:04:09", "remaining_time": "0:00:30", "throughput": 2537.13, "total_tokens": 634128} |
| {"current_steps": 1020, "total_steps": 1140, "loss": 0.0066, "lr": 0.0010178021502496165, "epoch": 17.894736842105264, "percentage": 89.47, "elapsed_time": "0:04:10", "remaining_time": "0:00:29", "throughput": 2537.09, "total_tokens": 636656} |
| {"current_steps": 1025, "total_steps": 1140, "loss": 0.0111, "lr": 0.0009362925769870394, "epoch": 17.982456140350877, "percentage": 89.91, "elapsed_time": "0:04:12", "remaining_time": "0:00:28", "throughput": 2538.31, "total_tokens": 639696} |
| {"current_steps": 1026, "total_steps": 1140, "eval_loss": 0.286989688873291, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:12", "remaining_time": "0:00:28", "throughput": 2528.95, "total_tokens": 639784} |
| {"current_steps": 1030, "total_steps": 1140, "loss": 0.008, "lr": 0.0008580793769078487, "epoch": 18.07017543859649, "percentage": 90.35, "elapsed_time": "0:04:14", "remaining_time": "0:00:27", "throughput": 2522.21, "total_tokens": 642280} |
| {"current_steps": 1035, "total_steps": 1140, "loss": 0.0065, "lr": 0.0007831808822972391, "epoch": 18.157894736842106, "percentage": 90.79, "elapsed_time": "0:04:15", "remaining_time": "0:00:25", "throughput": 2522.96, "total_tokens": 645128} |
| {"current_steps": 1040, "total_steps": 1140, "loss": 0.0066, "lr": 0.0007116146485111063, "epoch": 18.24561403508772, "percentage": 91.23, "elapsed_time": "0:04:16", "remaining_time": "0:00:24", "throughput": 2522.75, "total_tokens": 647528} |
| {"current_steps": 1045, "total_steps": 1140, "loss": 0.004, "lr": 0.0006433974498612882, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:04:17", "remaining_time": "0:00:23", "throughput": 2526.21, "total_tokens": 651560} |
| {"current_steps": 1050, "total_steps": 1140, "loss": 0.0122, "lr": 0.0005785452756838482, "epoch": 18.42105263157895, "percentage": 92.11, "elapsed_time": "0:04:18", "remaining_time": "0:00:22", "throughput": 2527.34, "total_tokens": 654568} |
| {"current_steps": 1055, "total_steps": 1140, "loss": 0.0072, "lr": 0.0005170733265913585, "epoch": 18.50877192982456, "percentage": 92.54, "elapsed_time": "0:04:20", "remaining_time": "0:00:20", "throughput": 2528.01, "total_tokens": 657448} |
| {"current_steps": 1060, "total_steps": 1140, "loss": 0.0065, "lr": 0.0004589960109100444, "epoch": 18.596491228070175, "percentage": 92.98, "elapsed_time": "0:04:21", "remaining_time": "0:00:19", "throughput": 2529.15, "total_tokens": 660392} |
| {"current_steps": 1065, "total_steps": 1140, "loss": 0.0098, "lr": 0.00040432694130264286, "epoch": 18.68421052631579, "percentage": 93.42, "elapsed_time": "0:04:22", "remaining_time": "0:00:18", "throughput": 2530.78, "total_tokens": 663656} |
| {"current_steps": 1070, "total_steps": 1140, "loss": 0.0055, "lr": 0.0003530789315777466, "epoch": 18.771929824561404, "percentage": 93.86, "elapsed_time": "0:04:23", "remaining_time": "0:00:17", "throughput": 2533.11, "total_tokens": 667208} |
| {"current_steps": 1075, "total_steps": 1140, "loss": 0.0049, "lr": 0.000305263993686391, "epoch": 18.859649122807017, "percentage": 94.3, "elapsed_time": "0:04:24", "remaining_time": "0:00:15", "throughput": 2534.98, "total_tokens": 670568} |
| {"current_steps": 1080, "total_steps": 1140, "loss": 0.0129, "lr": 0.000260893334906595, "epoch": 18.94736842105263, "percentage": 94.74, "elapsed_time": "0:04:25", "remaining_time": "0:00:14", "throughput": 2537.08, "total_tokens": 674024} |
| {"current_steps": 1083, "total_steps": 1140, "eval_loss": 0.28917577862739563, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:27", "remaining_time": "0:00:14", "throughput": 2529.78, "total_tokens": 675800} |
| {"current_steps": 1085, "total_steps": 1140, "loss": 0.0046, "lr": 0.00021997735521649408, "epoch": 19.035087719298247, "percentage": 95.18, "elapsed_time": "0:04:28", "remaining_time": "0:00:13", "throughput": 2522.87, "total_tokens": 676920} |
| {"current_steps": 1090, "total_steps": 1140, "loss": 0.0069, "lr": 0.00018252564485670973, "epoch": 19.12280701754386, "percentage": 95.61, "elapsed_time": "0:04:29", "remaining_time": "0:00:12", "throughput": 2523.63, "total_tokens": 679896} |
| {"current_steps": 1095, "total_steps": 1140, "loss": 0.0062, "lr": 0.00014854698208250638, "epoch": 19.210526315789473, "percentage": 96.05, "elapsed_time": "0:04:30", "remaining_time": "0:00:11", "throughput": 2525.5, "total_tokens": 683288} |
| {"current_steps": 1100, "total_steps": 1140, "loss": 0.0067, "lr": 0.00011804933110626359, "epoch": 19.29824561403509, "percentage": 96.49, "elapsed_time": "0:04:31", "remaining_time": "0:00:09", "throughput": 2526.37, "total_tokens": 686168} |
| {"current_steps": 1105, "total_steps": 1140, "loss": 0.0107, "lr": 9.103984023075773e-05, "epoch": 19.385964912280702, "percentage": 96.93, "elapsed_time": "0:04:32", "remaining_time": "0:00:08", "throughput": 2527.11, "total_tokens": 689016} |
| {"current_steps": 1110, "total_steps": 1140, "loss": 0.0062, "lr": 6.752484017368553e-05, "epoch": 19.473684210526315, "percentage": 97.37, "elapsed_time": "0:04:33", "remaining_time": "0:00:07", "throughput": 2528.99, "total_tokens": 692376} |
| {"current_steps": 1115, "total_steps": 1140, "loss": 0.0084, "lr": 4.750984258380608e-05, "epoch": 19.56140350877193, "percentage": 97.81, "elapsed_time": "0:04:34", "remaining_time": "0:00:06", "throughput": 2530.34, "total_tokens": 695480} |
| {"current_steps": 1120, "total_steps": 1140, "loss": 0.0044, "lr": 3.099953874908079e-05, "epoch": 19.649122807017545, "percentage": 98.25, "elapsed_time": "0:04:35", "remaining_time": "0:00:04", "throughput": 2531.34, "total_tokens": 698424} |
| {"current_steps": 1125, "total_steps": 1140, "loss": 0.0061, "lr": 1.7997798497084714e-05, "epoch": 19.736842105263158, "percentage": 98.68, "elapsed_time": "0:04:37", "remaining_time": "0:00:03", "throughput": 2532.72, "total_tokens": 701592} |
| {"current_steps": 1130, "total_steps": 1140, "loss": 0.0073, "lr": 8.50766928796709e-06, "epoch": 19.82456140350877, "percentage": 99.12, "elapsed_time": "0:04:38", "remaining_time": "0:00:02", "throughput": 2533.63, "total_tokens": 704504} |
| {"current_steps": 1135, "total_steps": 1140, "loss": 0.009, "lr": 2.5313755001593604e-06, "epoch": 19.912280701754387, "percentage": 99.56, "elapsed_time": "0:04:39", "remaining_time": "0:00:01", "throughput": 2535.61, "total_tokens": 707992} |
| {"current_steps": 1140, "total_steps": 1140, "loss": 0.0085, "lr": 7.03179089989181e-08, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:40", "remaining_time": "0:00:00", "throughput": 2537.03, "total_tokens": 711112} |
| {"current_steps": 1140, "total_steps": 1140, "eval_loss": 0.28609830141067505, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:41", "remaining_time": "0:00:00", "throughput": 2529.38, "total_tokens": 711112} |
| {"current_steps": 1140, "total_steps": 1140, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:41", "remaining_time": "0:00:00", "throughput": 2522.69, "total_tokens": 711112} |
|
|