| {"current_steps": 5, "total_steps": 1600, "loss": 8.3885, "lr": 2.5000000000000004e-07, "epoch": 0.0625, "percentage": 0.31, "elapsed_time": "0:00:00", "remaining_time": "0:04:52", "throughput": 1743.21, "total_tokens": 1600} |
| {"current_steps": 10, "total_steps": 1600, "loss": 8.1973, "lr": 5.625e-07, "epoch": 0.125, "percentage": 0.62, "elapsed_time": "0:00:01", "remaining_time": "0:03:35", "throughput": 2291.18, "total_tokens": 3104} |
| {"current_steps": 15, "total_steps": 1600, "loss": 7.5725, "lr": 8.75e-07, "epoch": 0.1875, "percentage": 0.94, "elapsed_time": "0:00:01", "remaining_time": "0:03:09", "throughput": 2601.42, "total_tokens": 4672} |
| {"current_steps": 20, "total_steps": 1600, "loss": 6.9352, "lr": 1.1875e-06, "epoch": 0.25, "percentage": 1.25, "elapsed_time": "0:00:02", "remaining_time": "0:02:56", "throughput": 2769.62, "total_tokens": 6176} |
| {"current_steps": 25, "total_steps": 1600, "loss": 5.878, "lr": 1.5e-06, "epoch": 0.3125, "percentage": 1.56, "elapsed_time": "0:00:02", "remaining_time": "0:02:48", "throughput": 2902.49, "total_tokens": 7744} |
| {"current_steps": 30, "total_steps": 1600, "loss": 4.7349, "lr": 1.8125e-06, "epoch": 0.375, "percentage": 1.88, "elapsed_time": "0:00:03", "remaining_time": "0:02:42", "throughput": 3016.31, "total_tokens": 9376} |
| {"current_steps": 35, "total_steps": 1600, "loss": 3.6716, "lr": 2.125e-06, "epoch": 0.4375, "percentage": 2.19, "elapsed_time": "0:00:03", "remaining_time": "0:02:38", "throughput": 3076.33, "total_tokens": 10912} |
| {"current_steps": 40, "total_steps": 1600, "loss": 2.6478, "lr": 2.4375e-06, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:03", "remaining_time": "0:02:35", "throughput": 3131.86, "total_tokens": 12480} |
| {"current_steps": 45, "total_steps": 1600, "loss": 1.7471, "lr": 2.7500000000000004e-06, "epoch": 0.5625, "percentage": 2.81, "elapsed_time": "0:00:04", "remaining_time": "0:02:32", "throughput": 3176.41, "total_tokens": 14048} |
| {"current_steps": 50, "total_steps": 1600, "loss": 0.9994, "lr": 3.0625000000000003e-06, "epoch": 0.625, "percentage": 3.12, "elapsed_time": "0:00:04", "remaining_time": "0:02:30", "throughput": 3211.9, "total_tokens": 15616} |
| {"current_steps": 55, "total_steps": 1600, "loss": 0.6295, "lr": 3.3750000000000003e-06, "epoch": 0.6875, "percentage": 3.44, "elapsed_time": "0:00:05", "remaining_time": "0:02:28", "throughput": 3247.97, "total_tokens": 17216} |
| {"current_steps": 60, "total_steps": 1600, "loss": 0.357, "lr": 3.6875000000000007e-06, "epoch": 0.75, "percentage": 3.75, "elapsed_time": "0:00:05", "remaining_time": "0:02:27", "throughput": 3274.86, "total_tokens": 18784} |
| {"current_steps": 65, "total_steps": 1600, "loss": 0.3752, "lr": 4.000000000000001e-06, "epoch": 0.8125, "percentage": 4.06, "elapsed_time": "0:00:06", "remaining_time": "0:02:25", "throughput": 3296.73, "total_tokens": 20352} |
| {"current_steps": 70, "total_steps": 1600, "loss": 0.2522, "lr": 4.312500000000001e-06, "epoch": 0.875, "percentage": 4.38, "elapsed_time": "0:00:06", "remaining_time": "0:02:24", "throughput": 3316.39, "total_tokens": 21920} |
| {"current_steps": 75, "total_steps": 1600, "loss": 0.2684, "lr": 4.625000000000001e-06, "epoch": 0.9375, "percentage": 4.69, "elapsed_time": "0:00:07", "remaining_time": "0:02:23", "throughput": 3327.11, "total_tokens": 23456} |
| {"current_steps": 80, "total_steps": 1600, "loss": 0.2501, "lr": 4.937500000000001e-06, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:07", "remaining_time": "0:02:22", "throughput": 3332.71, "total_tokens": 25056} |
| {"current_steps": 85, "total_steps": 1600, "loss": 0.2607, "lr": 5.2500000000000006e-06, "epoch": 1.0625, "percentage": 5.31, "elapsed_time": "0:00:08", "remaining_time": "0:02:23", "throughput": 3321.74, "total_tokens": 26656} |
| {"current_steps": 90, "total_steps": 1600, "loss": 0.2992, "lr": 5.5625000000000005e-06, "epoch": 1.125, "percentage": 5.62, "elapsed_time": "0:00:08", "remaining_time": "0:02:21", "throughput": 3334.99, "total_tokens": 28224} |
| {"current_steps": 95, "total_steps": 1600, "loss": 0.2638, "lr": 5.8750000000000005e-06, "epoch": 1.1875, "percentage": 5.94, "elapsed_time": "0:00:08", "remaining_time": "0:02:20", "throughput": 3351.11, "total_tokens": 29824} |
| {"current_steps": 100, "total_steps": 1600, "loss": 0.2491, "lr": 6.1875000000000005e-06, "epoch": 1.25, "percentage": 6.25, "elapsed_time": "0:00:09", "remaining_time": "0:02:20", "throughput": 3361.82, "total_tokens": 31392} |
| {"current_steps": 105, "total_steps": 1600, "loss": 0.2659, "lr": 6.5000000000000004e-06, "epoch": 1.3125, "percentage": 6.56, "elapsed_time": "0:00:09", "remaining_time": "0:02:19", "throughput": 3374.84, "total_tokens": 32992} |
| {"current_steps": 110, "total_steps": 1600, "loss": 0.2585, "lr": 6.8125e-06, "epoch": 1.375, "percentage": 6.88, "elapsed_time": "0:00:10", "remaining_time": "0:02:18", "throughput": 3384.43, "total_tokens": 34560} |
| {"current_steps": 115, "total_steps": 1600, "loss": 0.2493, "lr": 7.125e-06, "epoch": 1.4375, "percentage": 7.19, "elapsed_time": "0:00:10", "remaining_time": "0:02:17", "throughput": 3390.01, "total_tokens": 36096} |
| {"current_steps": 120, "total_steps": 1600, "loss": 0.2597, "lr": 7.437500000000001e-06, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:11", "remaining_time": "0:02:16", "throughput": 3397.85, "total_tokens": 37664} |
| {"current_steps": 125, "total_steps": 1600, "loss": 0.2387, "lr": 7.75e-06, "epoch": 1.5625, "percentage": 7.81, "elapsed_time": "0:00:11", "remaining_time": "0:02:15", "throughput": 3405.05, "total_tokens": 39232} |
| {"current_steps": 130, "total_steps": 1600, "loss": 0.2295, "lr": 8.062500000000001e-06, "epoch": 1.625, "percentage": 8.12, "elapsed_time": "0:00:11", "remaining_time": "0:02:15", "throughput": 3413.79, "total_tokens": 40832} |
| {"current_steps": 135, "total_steps": 1600, "loss": 0.2646, "lr": 8.375e-06, "epoch": 1.6875, "percentage": 8.44, "elapsed_time": "0:00:12", "remaining_time": "0:02:14", "throughput": 3421.68, "total_tokens": 42432} |
| {"current_steps": 140, "total_steps": 1600, "loss": 0.2548, "lr": 8.687500000000001e-06, "epoch": 1.75, "percentage": 8.75, "elapsed_time": "0:00:12", "remaining_time": "0:02:13", "throughput": 3424.55, "total_tokens": 43968} |
| {"current_steps": 145, "total_steps": 1600, "loss": 0.276, "lr": 9e-06, "epoch": 1.8125, "percentage": 9.06, "elapsed_time": "0:00:13", "remaining_time": "0:02:13", "throughput": 3429.44, "total_tokens": 45536} |
| {"current_steps": 150, "total_steps": 1600, "loss": 0.2343, "lr": 9.312500000000001e-06, "epoch": 1.875, "percentage": 9.38, "elapsed_time": "0:00:13", "remaining_time": "0:02:12", "throughput": 3434.81, "total_tokens": 47104} |
| {"current_steps": 155, "total_steps": 1600, "loss": 0.2473, "lr": 9.625e-06, "epoch": 1.9375, "percentage": 9.69, "elapsed_time": "0:00:14", "remaining_time": "0:02:11", "throughput": 3437.73, "total_tokens": 48640} |
| {"current_steps": 160, "total_steps": 1600, "loss": 0.2428, "lr": 9.937500000000001e-06, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:14", "remaining_time": "0:02:11", "throughput": 3432.89, "total_tokens": 50208} |
| {"current_steps": 160, "total_steps": 1600, "eval_loss": 0.23889970779418945, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:15", "remaining_time": "0:02:19", "throughput": 3233.26, "total_tokens": 50208} |
| {"current_steps": 165, "total_steps": 1600, "loss": 0.2291, "lr": 9.999809615320857e-06, "epoch": 2.0625, "percentage": 10.31, "elapsed_time": "0:00:17", "remaining_time": "0:02:29", "throughput": 3000.36, "total_tokens": 51744} |
| {"current_steps": 170, "total_steps": 1600, "loss": 0.2506, "lr": 9.999036202410324e-06, "epoch": 2.125, "percentage": 10.62, "elapsed_time": "0:00:17", "remaining_time": "0:02:28", "throughput": 3014.62, "total_tokens": 53312} |
| {"current_steps": 175, "total_steps": 1600, "loss": 0.2366, "lr": 9.997667954183566e-06, "epoch": 2.1875, "percentage": 10.94, "elapsed_time": "0:00:18", "remaining_time": "0:02:27", "throughput": 3027.6, "total_tokens": 54880} |
| {"current_steps": 180, "total_steps": 1600, "loss": 0.2356, "lr": 9.995705033448435e-06, "epoch": 2.25, "percentage": 11.25, "elapsed_time": "0:00:18", "remaining_time": "0:02:26", "throughput": 3043.97, "total_tokens": 56512} |
| {"current_steps": 185, "total_steps": 1600, "loss": 0.2501, "lr": 9.993147673772869e-06, "epoch": 2.3125, "percentage": 11.56, "elapsed_time": "0:00:19", "remaining_time": "0:02:25", "throughput": 3058.02, "total_tokens": 58112} |
| {"current_steps": 190, "total_steps": 1600, "loss": 0.2397, "lr": 9.9899961794571e-06, "epoch": 2.375, "percentage": 11.88, "elapsed_time": "0:00:19", "remaining_time": "0:02:24", "throughput": 3067.85, "total_tokens": 59648} |
| {"current_steps": 195, "total_steps": 1600, "loss": 0.2334, "lr": 9.986250925497429e-06, "epoch": 2.4375, "percentage": 12.19, "elapsed_time": "0:00:19", "remaining_time": "0:02:23", "throughput": 3080.2, "total_tokens": 61248} |
| {"current_steps": 200, "total_steps": 1600, "loss": 0.2319, "lr": 9.981912357541628e-06, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:20", "remaining_time": "0:02:22", "throughput": 3090.74, "total_tokens": 62816} |
| {"current_steps": 205, "total_steps": 1600, "loss": 0.2413, "lr": 9.976980991835896e-06, "epoch": 2.5625, "percentage": 12.81, "elapsed_time": "0:00:20", "remaining_time": "0:02:21", "throughput": 3100.89, "total_tokens": 64384} |
| {"current_steps": 210, "total_steps": 1600, "loss": 0.2264, "lr": 9.971457415163435e-06, "epoch": 2.625, "percentage": 13.12, "elapsed_time": "0:00:21", "remaining_time": "0:02:20", "throughput": 3109.49, "total_tokens": 65920} |
| {"current_steps": 215, "total_steps": 1600, "loss": 0.2709, "lr": 9.965342284774633e-06, "epoch": 2.6875, "percentage": 13.44, "elapsed_time": "0:00:21", "remaining_time": "0:02:19", "throughput": 3117.53, "total_tokens": 67456} |
| {"current_steps": 220, "total_steps": 1600, "loss": 0.2399, "lr": 9.958636328308852e-06, "epoch": 2.75, "percentage": 13.75, "elapsed_time": "0:00:22", "remaining_time": "0:02:18", "throughput": 3127.78, "total_tokens": 69056} |
| {"current_steps": 225, "total_steps": 1600, "loss": 0.2463, "lr": 9.951340343707852e-06, "epoch": 2.8125, "percentage": 14.06, "elapsed_time": "0:00:22", "remaining_time": "0:02:17", "throughput": 3135.99, "total_tokens": 70624} |
| {"current_steps": 230, "total_steps": 1600, "loss": 0.2089, "lr": 9.943455199120836e-06, "epoch": 2.875, "percentage": 14.37, "elapsed_time": "0:00:22", "remaining_time": "0:02:16", "throughput": 3142.91, "total_tokens": 72160} |
| {"current_steps": 235, "total_steps": 1600, "loss": 0.3221, "lr": 9.934981832801161e-06, "epoch": 2.9375, "percentage": 14.69, "elapsed_time": "0:00:23", "remaining_time": "0:02:15", "throughput": 3149.68, "total_tokens": 73696} |
| {"current_steps": 240, "total_steps": 1600, "loss": 0.2441, "lr": 9.925921252994677e-06, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:23", "remaining_time": "0:02:15", "throughput": 3154.44, "total_tokens": 75328} |
| {"current_steps": 245, "total_steps": 1600, "loss": 0.2538, "lr": 9.916274537819774e-06, "epoch": 3.0625, "percentage": 15.31, "elapsed_time": "0:00:24", "remaining_time": "0:02:14", "throughput": 3151.7, "total_tokens": 76864} |
| {"current_steps": 250, "total_steps": 1600, "loss": 0.2207, "lr": 9.90604283513909e-06, "epoch": 3.125, "percentage": 15.62, "elapsed_time": "0:00:24", "remaining_time": "0:02:14", "throughput": 3160.27, "total_tokens": 78464} |
| {"current_steps": 255, "total_steps": 1600, "loss": 0.2518, "lr": 9.89522736242292e-06, "epoch": 3.1875, "percentage": 15.94, "elapsed_time": "0:00:25", "remaining_time": "0:02:13", "throughput": 3168.15, "total_tokens": 80064} |
| {"current_steps": 260, "total_steps": 1600, "loss": 0.291, "lr": 9.883829406604363e-06, "epoch": 3.25, "percentage": 16.25, "elapsed_time": "0:00:25", "remaining_time": "0:02:12", "throughput": 3176.43, "total_tokens": 81664} |
| {"current_steps": 265, "total_steps": 1600, "loss": 0.2894, "lr": 9.871850323926178e-06, "epoch": 3.3125, "percentage": 16.56, "elapsed_time": "0:00:26", "remaining_time": "0:02:11", "throughput": 3182.24, "total_tokens": 83200} |
| {"current_steps": 270, "total_steps": 1600, "loss": 0.2541, "lr": 9.859291539779407e-06, "epoch": 3.375, "percentage": 16.88, "elapsed_time": "0:00:26", "remaining_time": "0:02:10", "throughput": 3184.83, "total_tokens": 84640} |
| {"current_steps": 275, "total_steps": 1600, "loss": 0.2269, "lr": 9.846154548533773e-06, "epoch": 3.4375, "percentage": 17.19, "elapsed_time": "0:00:27", "remaining_time": "0:02:10", "throughput": 3187.15, "total_tokens": 86080} |
| {"current_steps": 280, "total_steps": 1600, "loss": 0.2597, "lr": 9.83244091335986e-06, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:00:27", "remaining_time": "0:02:09", "throughput": 3191.51, "total_tokens": 87584} |
| {"current_steps": 285, "total_steps": 1600, "loss": 0.2236, "lr": 9.818152266043115e-06, "epoch": 3.5625, "percentage": 17.81, "elapsed_time": "0:00:27", "remaining_time": "0:02:08", "throughput": 3198.53, "total_tokens": 89184} |
| {"current_steps": 290, "total_steps": 1600, "loss": 0.2761, "lr": 9.803290306789676e-06, "epoch": 3.625, "percentage": 18.12, "elapsed_time": "0:00:28", "remaining_time": "0:02:07", "throughput": 3204.18, "total_tokens": 90752} |
| {"current_steps": 295, "total_steps": 1600, "loss": 0.2338, "lr": 9.787856804024073e-06, "epoch": 3.6875, "percentage": 18.44, "elapsed_time": "0:00:28", "remaining_time": "0:02:07", "throughput": 3211.37, "total_tokens": 92384} |
| {"current_steps": 300, "total_steps": 1600, "loss": 0.215, "lr": 9.771853594178791e-06, "epoch": 3.75, "percentage": 18.75, "elapsed_time": "0:00:29", "remaining_time": "0:02:06", "throughput": 3217.5, "total_tokens": 93984} |
| {"current_steps": 305, "total_steps": 1600, "loss": 0.2437, "lr": 9.755282581475769e-06, "epoch": 3.8125, "percentage": 19.06, "elapsed_time": "0:00:29", "remaining_time": "0:02:05", "throughput": 3222.55, "total_tokens": 95552} |
| {"current_steps": 310, "total_steps": 1600, "loss": 0.2329, "lr": 9.7381457376998e-06, "epoch": 3.875, "percentage": 19.38, "elapsed_time": "0:00:30", "remaining_time": "0:02:05", "throughput": 3228.11, "total_tokens": 97152} |
| {"current_steps": 315, "total_steps": 1600, "loss": 0.2376, "lr": 9.720445101963923e-06, "epoch": 3.9375, "percentage": 19.69, "elapsed_time": "0:00:30", "remaining_time": "0:02:04", "throughput": 3233.88, "total_tokens": 98752} |
| {"current_steps": 320, "total_steps": 1600, "loss": 0.2334, "lr": 9.702182780466775e-06, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:31", "remaining_time": "0:02:04", "throughput": 3236.44, "total_tokens": 100384} |
| {"current_steps": 320, "total_steps": 1600, "eval_loss": 0.2316662073135376, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:31", "remaining_time": "0:02:07", "throughput": 3145.67, "total_tokens": 100384} |
| {"current_steps": 325, "total_steps": 1600, "loss": 0.2092, "lr": 9.683360946241988e-06, "epoch": 4.0625, "percentage": 20.31, "elapsed_time": "0:00:33", "remaining_time": "0:02:11", "throughput": 3051.45, "total_tokens": 101984} |
| {"current_steps": 330, "total_steps": 1600, "loss": 0.2534, "lr": 9.663981838899612e-06, "epoch": 4.125, "percentage": 20.62, "elapsed_time": "0:00:33", "remaining_time": "0:02:10", "throughput": 3056.67, "total_tokens": 103488} |
| {"current_steps": 335, "total_steps": 1600, "loss": 0.2347, "lr": 9.644047764359623e-06, "epoch": 4.1875, "percentage": 20.94, "elapsed_time": "0:00:34", "remaining_time": "0:02:09", "throughput": 3063.3, "total_tokens": 105056} |
| {"current_steps": 340, "total_steps": 1600, "loss": 0.2503, "lr": 9.623561094577541e-06, "epoch": 4.25, "percentage": 21.25, "elapsed_time": "0:00:34", "remaining_time": "0:02:08", "throughput": 3068.68, "total_tokens": 106592} |
| {"current_steps": 345, "total_steps": 1600, "loss": 0.2324, "lr": 9.602524267262202e-06, "epoch": 4.3125, "percentage": 21.56, "elapsed_time": "0:00:35", "remaining_time": "0:02:07", "throughput": 3072.54, "total_tokens": 108064} |
| {"current_steps": 350, "total_steps": 1600, "loss": 0.226, "lr": 9.58093978558568e-06, "epoch": 4.375, "percentage": 21.88, "elapsed_time": "0:00:35", "remaining_time": "0:02:07", "throughput": 3080.2, "total_tokens": 109696} |
| {"current_steps": 355, "total_steps": 1600, "loss": 0.2392, "lr": 9.558810217885444e-06, "epoch": 4.4375, "percentage": 22.19, "elapsed_time": "0:00:36", "remaining_time": "0:02:06", "throughput": 3086.91, "total_tokens": 111296} |
| {"current_steps": 360, "total_steps": 1600, "loss": 0.2292, "lr": 9.536138197358747e-06, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:00:36", "remaining_time": "0:02:05", "throughput": 3091.08, "total_tokens": 112800} |
| {"current_steps": 365, "total_steps": 1600, "loss": 0.2321, "lr": 9.512926421749305e-06, "epoch": 4.5625, "percentage": 22.81, "elapsed_time": "0:00:36", "remaining_time": "0:02:04", "throughput": 3096.76, "total_tokens": 114368} |
| {"current_steps": 370, "total_steps": 1600, "loss": 0.2441, "lr": 9.48917765302629e-06, "epoch": 4.625, "percentage": 23.12, "elapsed_time": "0:00:37", "remaining_time": "0:02:04", "throughput": 3103.81, "total_tokens": 116000} |
| {"current_steps": 375, "total_steps": 1600, "loss": 0.2302, "lr": 9.464894717055686e-06, "epoch": 4.6875, "percentage": 23.44, "elapsed_time": "0:00:37", "remaining_time": "0:02:03", "throughput": 3110.8, "total_tokens": 117632} |
| {"current_steps": 380, "total_steps": 1600, "loss": 0.2346, "lr": 9.440080503264038e-06, "epoch": 4.75, "percentage": 23.75, "elapsed_time": "0:00:38", "remaining_time": "0:02:02", "throughput": 3116.2, "total_tokens": 119200} |
| {"current_steps": 385, "total_steps": 1600, "loss": 0.2274, "lr": 9.414737964294636e-06, "epoch": 4.8125, "percentage": 24.06, "elapsed_time": "0:00:38", "remaining_time": "0:02:02", "throughput": 3120.03, "total_tokens": 120704} |
| {"current_steps": 390, "total_steps": 1600, "loss": 0.2325, "lr": 9.388870115656185e-06, "epoch": 4.875, "percentage": 24.38, "elapsed_time": "0:00:39", "remaining_time": "0:02:01", "throughput": 3122.85, "total_tokens": 122176} |
| {"current_steps": 395, "total_steps": 1600, "loss": 0.2297, "lr": 9.362480035363987e-06, "epoch": 4.9375, "percentage": 24.69, "elapsed_time": "0:00:39", "remaining_time": "0:02:00", "throughput": 3128.44, "total_tokens": 123776} |
| {"current_steps": 400, "total_steps": 1600, "loss": 0.2315, "lr": 9.335570863573687e-06, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:00:40", "remaining_time": "0:02:00", "throughput": 3130.89, "total_tokens": 125376} |
| {"current_steps": 405, "total_steps": 1600, "loss": 0.219, "lr": 9.30814580220763e-06, "epoch": 5.0625, "percentage": 25.31, "elapsed_time": "0:00:40", "remaining_time": "0:01:59", "throughput": 3128.72, "total_tokens": 126880} |
| {"current_steps": 410, "total_steps": 1600, "loss": 0.2396, "lr": 9.280208114573859e-06, "epoch": 5.125, "percentage": 25.62, "elapsed_time": "0:00:40", "remaining_time": "0:01:58", "throughput": 3132.6, "total_tokens": 128416} |
| {"current_steps": 415, "total_steps": 1600, "loss": 0.2385, "lr": 9.251761124977816e-06, "epoch": 5.1875, "percentage": 25.94, "elapsed_time": "0:00:41", "remaining_time": "0:01:58", "throughput": 3136.83, "total_tokens": 129984} |
| {"current_steps": 420, "total_steps": 1600, "loss": 0.2163, "lr": 9.222808218326784e-06, "epoch": 5.25, "percentage": 26.25, "elapsed_time": "0:00:41", "remaining_time": "0:01:57", "throughput": 3140.5, "total_tokens": 131520} |
| {"current_steps": 425, "total_steps": 1600, "loss": 0.2819, "lr": 9.193352839727122e-06, "epoch": 5.3125, "percentage": 26.56, "elapsed_time": "0:00:42", "remaining_time": "0:01:56", "throughput": 3145.04, "total_tokens": 133088} |
| {"current_steps": 430, "total_steps": 1600, "loss": 0.2174, "lr": 9.163398494074314e-06, "epoch": 5.375, "percentage": 26.88, "elapsed_time": "0:00:42", "remaining_time": "0:01:56", "throughput": 3150.03, "total_tokens": 134688} |
| {"current_steps": 435, "total_steps": 1600, "loss": 0.2249, "lr": 9.132948745635943e-06, "epoch": 5.4375, "percentage": 27.19, "elapsed_time": "0:00:43", "remaining_time": "0:01:55", "throughput": 3154.97, "total_tokens": 136288} |
| {"current_steps": 440, "total_steps": 1600, "loss": 0.2384, "lr": 9.102007217627568e-06, "epoch": 5.5, "percentage": 27.5, "elapsed_time": "0:00:43", "remaining_time": "0:01:55", "throughput": 3158.91, "total_tokens": 137856} |
| {"current_steps": 445, "total_steps": 1600, "loss": 0.2301, "lr": 9.070577591781598e-06, "epoch": 5.5625, "percentage": 27.81, "elapsed_time": "0:00:44", "remaining_time": "0:01:54", "throughput": 3162.97, "total_tokens": 139424} |
| {"current_steps": 450, "total_steps": 1600, "loss": 0.2504, "lr": 9.038663607909198e-06, "epoch": 5.625, "percentage": 28.12, "elapsed_time": "0:00:44", "remaining_time": "0:01:53", "throughput": 3166.97, "total_tokens": 140992} |
| {"current_steps": 455, "total_steps": 1600, "loss": 0.2212, "lr": 9.006269063455305e-06, "epoch": 5.6875, "percentage": 28.44, "elapsed_time": "0:00:44", "remaining_time": "0:01:53", "throughput": 3171.03, "total_tokens": 142560} |
| {"current_steps": 460, "total_steps": 1600, "loss": 0.2353, "lr": 8.97339781304675e-06, "epoch": 5.75, "percentage": 28.75, "elapsed_time": "0:00:45", "remaining_time": "0:01:52", "throughput": 3174.42, "total_tokens": 144096} |
| {"current_steps": 465, "total_steps": 1600, "loss": 0.2219, "lr": 8.94005376803361e-06, "epoch": 5.8125, "percentage": 29.06, "elapsed_time": "0:00:45", "remaining_time": "0:01:51", "throughput": 3177.81, "total_tokens": 145632} |
| {"current_steps": 470, "total_steps": 1600, "loss": 0.2298, "lr": 8.906240896023794e-06, "epoch": 5.875, "percentage": 29.38, "elapsed_time": "0:00:46", "remaining_time": "0:01:51", "throughput": 3182.29, "total_tokens": 147232} |
| {"current_steps": 475, "total_steps": 1600, "loss": 0.2236, "lr": 8.871963220410929e-06, "epoch": 5.9375, "percentage": 29.69, "elapsed_time": "0:00:46", "remaining_time": "0:01:50", "throughput": 3185.92, "total_tokens": 148800} |
| {"current_steps": 480, "total_steps": 1600, "loss": 0.2085, "lr": 8.837224819895627e-06, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:00:47", "remaining_time": "0:01:50", "throughput": 3187.37, "total_tokens": 150400} |
| {"current_steps": 480, "total_steps": 1600, "eval_loss": 0.25171852111816406, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:00:48", "remaining_time": "0:01:52", "throughput": 3127.45, "total_tokens": 150400} |
| {"current_steps": 485, "total_steps": 1600, "loss": 0.2806, "lr": 8.802029828000157e-06, "epoch": 6.0625, "percentage": 30.31, "elapsed_time": "0:00:49", "remaining_time": "0:01:53", "throughput": 3065.03, "total_tokens": 151968} |
| {"current_steps": 490, "total_steps": 1600, "loss": 0.2683, "lr": 8.766382432576589e-06, "epoch": 6.125, "percentage": 30.63, "elapsed_time": "0:00:50", "remaining_time": "0:01:53", "throughput": 3069.93, "total_tokens": 153568} |
| {"current_steps": 495, "total_steps": 1600, "loss": 0.2261, "lr": 8.730286875308498e-06, "epoch": 6.1875, "percentage": 30.94, "elapsed_time": "0:00:50", "remaining_time": "0:01:52", "throughput": 3074.8, "total_tokens": 155168} |
| {"current_steps": 500, "total_steps": 1600, "loss": 0.2222, "lr": 8.693747451206231e-06, "epoch": 6.25, "percentage": 31.25, "elapsed_time": "0:00:50", "remaining_time": "0:01:51", "throughput": 3078.14, "total_tokens": 156704} |
| {"current_steps": 505, "total_steps": 1600, "loss": 0.2282, "lr": 8.656768508095853e-06, "epoch": 6.3125, "percentage": 31.56, "elapsed_time": "0:00:51", "remaining_time": "0:01:51", "throughput": 3082.76, "total_tokens": 158304} |
| {"current_steps": 510, "total_steps": 1600, "loss": 0.2207, "lr": 8.61935444610179e-06, "epoch": 6.375, "percentage": 31.87, "elapsed_time": "0:00:51", "remaining_time": "0:01:50", "throughput": 3087.44, "total_tokens": 159904} |
| {"current_steps": 515, "total_steps": 1600, "loss": 0.2399, "lr": 8.581509717123272e-06, "epoch": 6.4375, "percentage": 32.19, "elapsed_time": "0:00:52", "remaining_time": "0:01:50", "throughput": 3091.38, "total_tokens": 161472} |
| {"current_steps": 520, "total_steps": 1600, "loss": 0.2293, "lr": 8.543238824304585e-06, "epoch": 6.5, "percentage": 32.5, "elapsed_time": "0:00:52", "remaining_time": "0:01:49", "throughput": 3094.56, "total_tokens": 163008} |
| {"current_steps": 525, "total_steps": 1600, "loss": 0.247, "lr": 8.504546321499255e-06, "epoch": 6.5625, "percentage": 32.81, "elapsed_time": "0:00:53", "remaining_time": "0:01:48", "throughput": 3099.13, "total_tokens": 164608} |
| {"current_steps": 530, "total_steps": 1600, "loss": 0.2267, "lr": 8.465436812728181e-06, "epoch": 6.625, "percentage": 33.12, "elapsed_time": "0:00:53", "remaining_time": "0:01:48", "throughput": 3103.03, "total_tokens": 166176} |
| {"current_steps": 535, "total_steps": 1600, "loss": 0.2289, "lr": 8.425914951631796e-06, "epoch": 6.6875, "percentage": 33.44, "elapsed_time": "0:00:53", "remaining_time": "0:01:47", "throughput": 3107.04, "total_tokens": 167744} |
| {"current_steps": 540, "total_steps": 1600, "loss": 0.2345, "lr": 8.385985440916344e-06, "epoch": 6.75, "percentage": 33.75, "elapsed_time": "0:00:54", "remaining_time": "0:01:46", "throughput": 3110.01, "total_tokens": 169280} |
| {"current_steps": 545, "total_steps": 1600, "loss": 0.2241, "lr": 8.345653031794292e-06, "epoch": 6.8125, "percentage": 34.06, "elapsed_time": "0:00:54", "remaining_time": "0:01:46", "throughput": 3114.42, "total_tokens": 170880} |
| {"current_steps": 550, "total_steps": 1600, "loss": 0.2303, "lr": 8.304922523418988e-06, "epoch": 6.875, "percentage": 34.38, "elapsed_time": "0:00:55", "remaining_time": "0:01:45", "throughput": 3117.48, "total_tokens": 172416} |
| {"current_steps": 555, "total_steps": 1600, "loss": 0.2208, "lr": 8.263798762313613e-06, "epoch": 6.9375, "percentage": 34.69, "elapsed_time": "0:00:55", "remaining_time": "0:01:44", "throughput": 3120.2, "total_tokens": 173920} |
| {"current_steps": 560, "total_steps": 1600, "loss": 0.2466, "lr": 8.222286641794488e-06, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:00:56", "remaining_time": "0:01:44", "throughput": 3121.04, "total_tokens": 175456} |
| {"current_steps": 565, "total_steps": 1600, "loss": 0.2099, "lr": 8.18039110138882e-06, "epoch": 7.0625, "percentage": 35.31, "elapsed_time": "0:00:56", "remaining_time": "0:01:43", "throughput": 3120.85, "total_tokens": 177024} |
| {"current_steps": 570, "total_steps": 1600, "loss": 0.2225, "lr": 8.138117126246951e-06, "epoch": 7.125, "percentage": 35.62, "elapsed_time": "0:00:57", "remaining_time": "0:01:43", "throughput": 3125.7, "total_tokens": 178688} |
| {"current_steps": 575, "total_steps": 1600, "loss": 0.2322, "lr": 8.095469746549172e-06, "epoch": 7.1875, "percentage": 35.94, "elapsed_time": "0:00:57", "remaining_time": "0:01:42", "throughput": 3129.5, "total_tokens": 180288} |
| {"current_steps": 580, "total_steps": 1600, "loss": 0.182, "lr": 8.052454036907174e-06, "epoch": 7.25, "percentage": 36.25, "elapsed_time": "0:00:58", "remaining_time": "0:01:42", "throughput": 3132.22, "total_tokens": 181824} |
| {"current_steps": 585, "total_steps": 1600, "loss": 0.2071, "lr": 8.009075115760243e-06, "epoch": 7.3125, "percentage": 36.56, "elapsed_time": "0:00:58", "remaining_time": "0:01:41", "throughput": 3136.01, "total_tokens": 183424} |
| {"current_steps": 590, "total_steps": 1600, "loss": 0.2756, "lr": 7.965338144766186e-06, "epoch": 7.375, "percentage": 36.88, "elapsed_time": "0:00:58", "remaining_time": "0:01:40", "throughput": 3139.24, "total_tokens": 184992} |
| {"current_steps": 595, "total_steps": 1600, "loss": 0.2801, "lr": 7.921248328187174e-06, "epoch": 7.4375, "percentage": 37.19, "elapsed_time": "0:00:59", "remaining_time": "0:01:40", "throughput": 3142.32, "total_tokens": 186560} |
| {"current_steps": 600, "total_steps": 1600, "loss": 0.2051, "lr": 7.876810912270462e-06, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:00:59", "remaining_time": "0:01:39", "throughput": 3145.58, "total_tokens": 188128} |
| {"current_steps": 605, "total_steps": 1600, "loss": 0.1911, "lr": 7.832031184624165e-06, "epoch": 7.5625, "percentage": 37.81, "elapsed_time": "0:01:00", "remaining_time": "0:01:39", "throughput": 3148.08, "total_tokens": 189664} |
| {"current_steps": 610, "total_steps": 1600, "loss": 0.2261, "lr": 7.786914473588057e-06, "epoch": 7.625, "percentage": 38.12, "elapsed_time": "0:01:00", "remaining_time": "0:01:38", "throughput": 3151.47, "total_tokens": 191264} |
| {"current_steps": 615, "total_steps": 1600, "loss": 0.2238, "lr": 7.74146614759957e-06, "epoch": 7.6875, "percentage": 38.44, "elapsed_time": "0:01:01", "remaining_time": "0:01:37", "throughput": 3154.51, "total_tokens": 192832} |
| {"current_steps": 620, "total_steps": 1600, "loss": 0.2306, "lr": 7.695691614555002e-06, "epoch": 7.75, "percentage": 38.75, "elapsed_time": "0:01:01", "remaining_time": "0:01:37", "throughput": 3157.91, "total_tokens": 194432} |
| {"current_steps": 625, "total_steps": 1600, "loss": 0.1993, "lr": 7.649596321166024e-06, "epoch": 7.8125, "percentage": 39.06, "elapsed_time": "0:01:02", "remaining_time": "0:01:36", "throughput": 3160.87, "total_tokens": 196000} |
| {"current_steps": 630, "total_steps": 1600, "loss": 0.2073, "lr": 7.603185752311587e-06, "epoch": 7.875, "percentage": 39.38, "elapsed_time": "0:01:02", "remaining_time": "0:01:36", "throughput": 3163.23, "total_tokens": 197536} |
| {"current_steps": 635, "total_steps": 1600, "loss": 0.2468, "lr": 7.55646543038526e-06, "epoch": 7.9375, "percentage": 39.69, "elapsed_time": "0:01:02", "remaining_time": "0:01:35", "throughput": 3165.93, "total_tokens": 199104} |
| {"current_steps": 640, "total_steps": 1600, "loss": 0.2214, "lr": 7.50944091463814e-06, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:03", "remaining_time": "0:01:35", "throughput": 3166.38, "total_tokens": 200640} |
| {"current_steps": 640, "total_steps": 1600, "eval_loss": 0.2479616105556488, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:04", "remaining_time": "0:01:36", "throughput": 3122.18, "total_tokens": 200640} |
| {"current_steps": 645, "total_steps": 1600, "loss": 0.244, "lr": 7.462117800517337e-06, "epoch": 8.0625, "percentage": 40.31, "elapsed_time": "0:01:05", "remaining_time": "0:01:37", "throughput": 3075.38, "total_tokens": 202208} |
| {"current_steps": 650, "total_steps": 1600, "loss": 0.1986, "lr": 7.414501719000187e-06, "epoch": 8.125, "percentage": 40.62, "elapsed_time": "0:01:06", "remaining_time": "0:01:36", "throughput": 3078.23, "total_tokens": 203744} |
| {"current_steps": 655, "total_steps": 1600, "loss": 0.2042, "lr": 7.3665983359242175e-06, "epoch": 8.1875, "percentage": 40.94, "elapsed_time": "0:01:06", "remaining_time": "0:01:36", "throughput": 3081.54, "total_tokens": 205312} |
| {"current_steps": 660, "total_steps": 1600, "loss": 0.1991, "lr": 7.318413351312965e-06, "epoch": 8.25, "percentage": 41.25, "elapsed_time": "0:01:07", "remaining_time": "0:01:35", "throughput": 3084.27, "total_tokens": 206848} |
| {"current_steps": 665, "total_steps": 1600, "loss": 0.2579, "lr": 7.269952498697734e-06, "epoch": 8.3125, "percentage": 41.56, "elapsed_time": "0:01:07", "remaining_time": "0:01:34", "throughput": 3086.9, "total_tokens": 208384} |
| {"current_steps": 670, "total_steps": 1600, "loss": 0.2221, "lr": 7.221221544435364e-06, "epoch": 8.375, "percentage": 41.88, "elapsed_time": "0:01:07", "remaining_time": "0:01:34", "throughput": 3089.68, "total_tokens": 209920} |
| {"current_steps": 675, "total_steps": 1600, "loss": 0.2432, "lr": 7.172226287022086e-06, "epoch": 8.4375, "percentage": 42.19, "elapsed_time": "0:01:08", "remaining_time": "0:01:33", "throughput": 3092.71, "total_tokens": 211488} |
| {"current_steps": 680, "total_steps": 1600, "loss": 0.2378, "lr": 7.1229725564035665e-06, "epoch": 8.5, "percentage": 42.5, "elapsed_time": "0:01:08", "remaining_time": "0:01:33", "throughput": 3095.57, "total_tokens": 213024} |
| {"current_steps": 685, "total_steps": 1600, "loss": 0.2169, "lr": 7.073466213281196e-06, "epoch": 8.5625, "percentage": 42.81, "elapsed_time": "0:01:09", "remaining_time": "0:01:32", "throughput": 3099.28, "total_tokens": 214656} |
| {"current_steps": 690, "total_steps": 1600, "loss": 0.2207, "lr": 7.023713148414728e-06, "epoch": 8.625, "percentage": 43.12, "elapsed_time": "0:01:09", "remaining_time": "0:01:31", "throughput": 3102.67, "total_tokens": 216256} |
| {"current_steps": 695, "total_steps": 1600, "loss": 0.2896, "lr": 6.973719281921336e-06, "epoch": 8.6875, "percentage": 43.44, "elapsed_time": "0:01:10", "remaining_time": "0:01:31", "throughput": 3105.93, "total_tokens": 217856} |
| {"current_steps": 700, "total_steps": 1600, "loss": 0.212, "lr": 6.9234905625711816e-06, "epoch": 8.75, "percentage": 43.75, "elapsed_time": "0:01:10", "remaining_time": "0:01:30", "throughput": 3108.78, "total_tokens": 219424} |
| {"current_steps": 705, "total_steps": 1600, "loss": 0.2271, "lr": 6.873032967079562e-06, "epoch": 8.8125, "percentage": 44.06, "elapsed_time": "0:01:11", "remaining_time": "0:01:30", "throughput": 3111.66, "total_tokens": 220992} |
| {"current_steps": 710, "total_steps": 1600, "loss": 0.2088, "lr": 6.822352499395751e-06, "epoch": 8.875, "percentage": 44.38, "elapsed_time": "0:01:11", "remaining_time": "0:01:29", "throughput": 3114.44, "total_tokens": 222560} |
| {"current_steps": 715, "total_steps": 1600, "loss": 0.2192, "lr": 6.771455189988579e-06, "epoch": 8.9375, "percentage": 44.69, "elapsed_time": "0:01:11", "remaining_time": "0:01:28", "throughput": 3118.1, "total_tokens": 224192} |
| {"current_steps": 720, "total_steps": 1600, "loss": 0.2221, "lr": 6.720347095128884e-06, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:01:12", "remaining_time": "0:01:28", "throughput": 3118.81, "total_tokens": 225728} |
| {"current_steps": 725, "total_steps": 1600, "loss": 0.1935, "lr": 6.669034296168855e-06, "epoch": 9.0625, "percentage": 45.31, "elapsed_time": "0:01:12", "remaining_time": "0:01:27", "throughput": 3118.64, "total_tokens": 227296} |
| {"current_steps": 730, "total_steps": 1600, "loss": 0.2205, "lr": 6.617522898818426e-06, "epoch": 9.125, "percentage": 45.62, "elapsed_time": "0:01:13", "remaining_time": "0:01:27", "throughput": 3120.9, "total_tokens": 228832} |
| {"current_steps": 735, "total_steps": 1600, "loss": 0.1967, "lr": 6.565819032418748e-06, "epoch": 9.1875, "percentage": 45.94, "elapsed_time": "0:01:13", "remaining_time": "0:01:26", "throughput": 3123.47, "total_tokens": 230400} |
| {"current_steps": 740, "total_steps": 1600, "loss": 0.1839, "lr": 6.513928849212874e-06, "epoch": 9.25, "percentage": 46.25, "elapsed_time": "0:01:14", "remaining_time": "0:01:26", "throughput": 3126.88, "total_tokens": 232032} |
| {"current_steps": 745, "total_steps": 1600, "loss": 0.2249, "lr": 6.461858523613684e-06, "epoch": 9.3125, "percentage": 46.56, "elapsed_time": "0:01:14", "remaining_time": "0:01:25", "throughput": 3128.69, "total_tokens": 233536} |
| {"current_steps": 750, "total_steps": 1600, "loss": 0.2029, "lr": 6.4096142514692085e-06, "epoch": 9.375, "percentage": 46.88, "elapsed_time": "0:01:15", "remaining_time": "0:01:25", "throughput": 3130.96, "total_tokens": 235072} |
| {"current_steps": 755, "total_steps": 1600, "loss": 0.1957, "lr": 6.3572022493253715e-06, "epoch": 9.4375, "percentage": 47.19, "elapsed_time": "0:01:15", "remaining_time": "0:01:24", "throughput": 3133.17, "total_tokens": 236608} |
| {"current_steps": 760, "total_steps": 1600, "loss": 0.1997, "lr": 6.304628753686295e-06, "epoch": 9.5, "percentage": 47.5, "elapsed_time": "0:01:15", "remaining_time": "0:01:23", "throughput": 3135.68, "total_tokens": 238176} |
| {"current_steps": 765, "total_steps": 1600, "loss": 0.2213, "lr": 6.251900020272208e-06, "epoch": 9.5625, "percentage": 47.81, "elapsed_time": "0:01:16", "remaining_time": "0:01:23", "throughput": 3137.86, "total_tokens": 239712} |
| {"current_steps": 770, "total_steps": 1600, "loss": 0.1946, "lr": 6.199022323275083e-06, "epoch": 9.625, "percentage": 48.12, "elapsed_time": "0:01:16", "remaining_time": "0:01:22", "throughput": 3140.41, "total_tokens": 241280} |
| {"current_steps": 775, "total_steps": 1600, "loss": 0.2131, "lr": 6.146001954612072e-06, "epoch": 9.6875, "percentage": 48.44, "elapsed_time": "0:01:17", "remaining_time": "0:01:22", "throughput": 3142.63, "total_tokens": 242816} |
| {"current_steps": 780, "total_steps": 1600, "loss": 0.1925, "lr": 6.092845223176823e-06, "epoch": 9.75, "percentage": 48.75, "elapsed_time": "0:01:17", "remaining_time": "0:01:21", "throughput": 3145.71, "total_tokens": 244448} |
| {"current_steps": 785, "total_steps": 1600, "loss": 0.2008, "lr": 6.039558454088796e-06, "epoch": 9.8125, "percentage": 49.06, "elapsed_time": "0:01:18", "remaining_time": "0:01:21", "throughput": 3148.65, "total_tokens": 246080} |
| {"current_steps": 790, "total_steps": 1600, "loss": 0.1932, "lr": 5.986147987940632e-06, "epoch": 9.875, "percentage": 49.38, "elapsed_time": "0:01:18", "remaining_time": "0:01:20", "throughput": 3151.26, "total_tokens": 247680} |
| {"current_steps": 795, "total_steps": 1600, "loss": 0.1862, "lr": 5.932620180043674e-06, "epoch": 9.9375, "percentage": 49.69, "elapsed_time": "0:01:19", "remaining_time": "0:01:20", "throughput": 3153.67, "total_tokens": 249248} |
| {"current_steps": 800, "total_steps": 1600, "loss": 0.2534, "lr": 5.878981399671774e-06, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:01:19", "remaining_time": "0:01:19", "throughput": 3154.8, "total_tokens": 250848} |
| {"current_steps": 800, "total_steps": 1600, "eval_loss": 0.25115442276000977, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:01:20", "remaining_time": "0:01:20", "throughput": 3119.55, "total_tokens": 250848} |
| {"current_steps": 805, "total_steps": 1600, "loss": 0.1779, "lr": 5.825238029303388e-06, "epoch": 10.0625, "percentage": 50.31, "elapsed_time": "0:01:21", "remaining_time": "0:01:20", "throughput": 3078.85, "total_tokens": 252416} |
| {"current_steps": 810, "total_steps": 1600, "loss": 0.1778, "lr": 5.771396463862145e-06, "epoch": 10.125, "percentage": 50.62, "elapsed_time": "0:01:22", "remaining_time": "0:01:20", "throughput": 3081.43, "total_tokens": 253984} |
| {"current_steps": 815, "total_steps": 1600, "loss": 0.1989, "lr": 5.717463109955896e-06, "epoch": 10.1875, "percentage": 50.94, "elapsed_time": "0:01:22", "remaining_time": "0:01:19", "throughput": 3083.95, "total_tokens": 255552} |
| {"current_steps": 820, "total_steps": 1600, "loss": 0.2326, "lr": 5.6634443851144115e-06, "epoch": 10.25, "percentage": 51.25, "elapsed_time": "0:01:23", "remaining_time": "0:01:19", "throughput": 3086.79, "total_tokens": 257152} |
| {"current_steps": 825, "total_steps": 1600, "loss": 0.1754, "lr": 5.609346717025738e-06, "epoch": 10.3125, "percentage": 51.56, "elapsed_time": "0:01:23", "remaining_time": "0:01:18", "throughput": 3089.44, "total_tokens": 258720} |
| {"current_steps": 830, "total_steps": 1600, "loss": 0.1945, "lr": 5.555176542771389e-06, "epoch": 10.375, "percentage": 51.88, "elapsed_time": "0:01:24", "remaining_time": "0:01:18", "throughput": 3091.53, "total_tokens": 260256} |
| {"current_steps": 835, "total_steps": 1600, "loss": 0.1826, "lr": 5.500940308060382e-06, "epoch": 10.4375, "percentage": 52.19, "elapsed_time": "0:01:24", "remaining_time": "0:01:17", "throughput": 3093.71, "total_tokens": 261792} |
| {"current_steps": 840, "total_steps": 1600, "loss": 0.16, "lr": 5.446644466462269e-06, "epoch": 10.5, "percentage": 52.5, "elapsed_time": "0:01:25", "remaining_time": "0:01:16", "throughput": 3096.17, "total_tokens": 263360} |
| {"current_steps": 845, "total_steps": 1600, "loss": 0.2006, "lr": 5.392295478639226e-06, "epoch": 10.5625, "percentage": 52.81, "elapsed_time": "0:01:25", "remaining_time": "0:01:16", "throughput": 3098.51, "total_tokens": 264928} |
| {"current_steps": 850, "total_steps": 1600, "loss": 0.1788, "lr": 5.337899811577297e-06, "epoch": 10.625, "percentage": 53.12, "elapsed_time": "0:01:25", "remaining_time": "0:01:15", "throughput": 3100.95, "total_tokens": 266496} |
| {"current_steps": 855, "total_steps": 1600, "loss": 0.1676, "lr": 5.283463937816888e-06, "epoch": 10.6875, "percentage": 53.44, "elapsed_time": "0:01:26", "remaining_time": "0:01:15", "throughput": 3103.35, "total_tokens": 268064} |
| {"current_steps": 860, "total_steps": 1600, "loss": 0.2262, "lr": 5.228994334682605e-06, "epoch": 10.75, "percentage": 53.75, "elapsed_time": "0:01:26", "remaining_time": "0:01:14", "throughput": 3106.27, "total_tokens": 269696} |
| {"current_steps": 865, "total_steps": 1600, "loss": 0.1796, "lr": 5.174497483512506e-06, "epoch": 10.8125, "percentage": 54.06, "elapsed_time": "0:01:27", "remaining_time": "0:01:14", "throughput": 3108.55, "total_tokens": 271264} |
| {"current_steps": 870, "total_steps": 1600, "loss": 0.1535, "lr": 5.1199798688868955e-06, "epoch": 10.875, "percentage": 54.37, "elapsed_time": "0:01:27", "remaining_time": "0:01:13", "throughput": 3110.79, "total_tokens": 272832} |
| {"current_steps": 875, "total_steps": 1600, "loss": 0.135, "lr": 5.065447977856723e-06, "epoch": 10.9375, "percentage": 54.69, "elapsed_time": "0:01:28", "remaining_time": "0:01:13", "throughput": 3113.07, "total_tokens": 274400} |
| {"current_steps": 880, "total_steps": 1600, "loss": 0.1739, "lr": 5.010908299171685e-06, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:01:28", "remaining_time": "0:01:12", "throughput": 3114.25, "total_tokens": 276000} |
| {"current_steps": 885, "total_steps": 1600, "loss": 0.1623, "lr": 4.956367322508131e-06, "epoch": 11.0625, "percentage": 55.31, "elapsed_time": "0:01:29", "remaining_time": "0:01:12", "throughput": 3114.41, "total_tokens": 277600} |
| {"current_steps": 890, "total_steps": 1600, "loss": 0.1268, "lr": 4.90183153769686e-06, "epoch": 11.125, "percentage": 55.62, "elapsed_time": "0:01:29", "remaining_time": "0:01:11", "throughput": 3116.34, "total_tokens": 279136} |
| {"current_steps": 895, "total_steps": 1600, "loss": 0.1662, "lr": 4.847307433950888e-06, "epoch": 11.1875, "percentage": 55.94, "elapsed_time": "0:01:30", "remaining_time": "0:01:10", "throughput": 3119.07, "total_tokens": 280768} |
| {"current_steps": 900, "total_steps": 1600, "loss": 0.1407, "lr": 4.792801499093305e-06, "epoch": 11.25, "percentage": 56.25, "elapsed_time": "0:01:30", "remaining_time": "0:01:10", "throughput": 3120.44, "total_tokens": 282304} |
| {"current_steps": 905, "total_steps": 1600, "loss": 0.1264, "lr": 4.738320218785281e-06, "epoch": 11.3125, "percentage": 56.56, "elapsed_time": "0:01:30", "remaining_time": "0:01:09", "throughput": 3122.51, "total_tokens": 283872} |
| {"current_steps": 910, "total_steps": 1600, "loss": 0.1338, "lr": 4.683870075754347e-06, "epoch": 11.375, "percentage": 56.88, "elapsed_time": "0:01:31", "remaining_time": "0:01:09", "throughput": 3124.95, "total_tokens": 285472} |
| {"current_steps": 915, "total_steps": 1600, "loss": 0.1449, "lr": 4.629457549023004e-06, "epoch": 11.4375, "percentage": 57.19, "elapsed_time": "0:01:31", "remaining_time": "0:01:08", "throughput": 3126.7, "total_tokens": 287008} |
| {"current_steps": 920, "total_steps": 1600, "loss": 0.1748, "lr": 4.575089113137792e-06, "epoch": 11.5, "percentage": 57.5, "elapsed_time": "0:01:32", "remaining_time": "0:01:08", "throughput": 3128.51, "total_tokens": 288544} |
| {"current_steps": 925, "total_steps": 1600, "loss": 0.1324, "lr": 4.52077123739888e-06, "epoch": 11.5625, "percentage": 57.81, "elapsed_time": "0:01:32", "remaining_time": "0:01:07", "throughput": 3130.92, "total_tokens": 290144} |
| {"current_steps": 930, "total_steps": 1600, "loss": 0.1208, "lr": 4.466510385090287e-06, "epoch": 11.625, "percentage": 58.13, "elapsed_time": "0:01:33", "remaining_time": "0:01:07", "throughput": 3133.25, "total_tokens": 291744} |
| {"current_steps": 935, "total_steps": 1600, "loss": 0.1986, "lr": 4.4123130127108125e-06, "epoch": 11.6875, "percentage": 58.44, "elapsed_time": "0:01:33", "remaining_time": "0:01:06", "throughput": 3135.0, "total_tokens": 293280} |
| {"current_steps": 940, "total_steps": 1600, "loss": 0.114, "lr": 4.358185569205779e-06, "epoch": 11.75, "percentage": 58.75, "elapsed_time": "0:01:33", "remaining_time": "0:01:05", "throughput": 3136.81, "total_tokens": 294816} |
| {"current_steps": 945, "total_steps": 1600, "loss": 0.1739, "lr": 4.304134495199675e-06, "epoch": 11.8125, "percentage": 59.06, "elapsed_time": "0:01:34", "remaining_time": "0:01:05", "throughput": 3139.06, "total_tokens": 296416} |
| {"current_steps": 950, "total_steps": 1600, "loss": 0.1151, "lr": 4.250166222229775e-06, "epoch": 11.875, "percentage": 59.38, "elapsed_time": "0:01:34", "remaining_time": "0:01:04", "throughput": 3141.04, "total_tokens": 297984} |
| {"current_steps": 955, "total_steps": 1600, "loss": 0.137, "lr": 4.196287171980869e-06, "epoch": 11.9375, "percentage": 59.69, "elapsed_time": "0:01:35", "remaining_time": "0:01:04", "throughput": 3143.14, "total_tokens": 299552} |
| {"current_steps": 960, "total_steps": 1600, "loss": 0.1347, "lr": 4.142503755521129e-06, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:01:35", "remaining_time": "0:01:03", "throughput": 3143.53, "total_tokens": 301088} |
| {"current_steps": 960, "total_steps": 1600, "eval_loss": 0.30825209617614746, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:01:36", "remaining_time": "0:01:04", "throughput": 3114.13, "total_tokens": 301088} |
| {"current_steps": 965, "total_steps": 1600, "loss": 0.0946, "lr": 4.088822372539263e-06, "epoch": 12.0625, "percentage": 60.31, "elapsed_time": "0:01:38", "remaining_time": "0:01:04", "throughput": 3080.24, "total_tokens": 302656} |
| {"current_steps": 970, "total_steps": 1600, "loss": 0.1192, "lr": 4.0352494105830155e-06, "epoch": 12.125, "percentage": 60.62, "elapsed_time": "0:01:38", "remaining_time": "0:01:04", "throughput": 3082.7, "total_tokens": 304256} |
| {"current_steps": 975, "total_steps": 1600, "loss": 0.0872, "lr": 3.981791244299113e-06, "epoch": 12.1875, "percentage": 60.94, "elapsed_time": "0:01:39", "remaining_time": "0:01:03", "throughput": 3084.6, "total_tokens": 305792} |
| {"current_steps": 980, "total_steps": 1600, "loss": 0.1315, "lr": 3.928454234674748e-06, "epoch": 12.25, "percentage": 61.25, "elapsed_time": "0:01:39", "remaining_time": "0:01:02", "throughput": 3086.94, "total_tokens": 307392} |
| {"current_steps": 985, "total_steps": 1600, "loss": 0.1385, "lr": 3.875244728280676e-06, "epoch": 12.3125, "percentage": 61.56, "elapsed_time": "0:01:40", "remaining_time": "0:01:02", "throughput": 3088.75, "total_tokens": 308928} |
| {"current_steps": 990, "total_steps": 1600, "loss": 0.0713, "lr": 3.822169056516051e-06, "epoch": 12.375, "percentage": 61.88, "elapsed_time": "0:01:40", "remaining_time": "0:01:01", "throughput": 3090.56, "total_tokens": 310464} |
| {"current_steps": 995, "total_steps": 1600, "loss": 0.0956, "lr": 3.769233534855035e-06, "epoch": 12.4375, "percentage": 62.19, "elapsed_time": "0:01:40", "remaining_time": "0:01:01", "throughput": 3092.63, "total_tokens": 312032} |
| {"current_steps": 1000, "total_steps": 1600, "loss": 0.0841, "lr": 3.7164444620953397e-06, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:01:41", "remaining_time": "0:01:00", "throughput": 3095.07, "total_tokens": 313632} |
| {"current_steps": 1005, "total_steps": 1600, "loss": 0.1103, "lr": 3.663808119608716e-06, "epoch": 12.5625, "percentage": 62.81, "elapsed_time": "0:01:41", "remaining_time": "0:01:00", "throughput": 3097.17, "total_tokens": 315200} |
| {"current_steps": 1010, "total_steps": 1600, "loss": 0.0866, "lr": 3.6113307705935398e-06, "epoch": 12.625, "percentage": 63.12, "elapsed_time": "0:01:42", "remaining_time": "0:00:59", "throughput": 3099.71, "total_tokens": 316832} |
| {"current_steps": 1015, "total_steps": 1600, "loss": 0.1049, "lr": 3.559018659329554e-06, "epoch": 12.6875, "percentage": 63.44, "elapsed_time": "0:01:42", "remaining_time": "0:00:59", "throughput": 3101.17, "total_tokens": 318336} |
| {"current_steps": 1020, "total_steps": 1600, "loss": 0.0818, "lr": 3.5068780104348632e-06, "epoch": 12.75, "percentage": 63.75, "elapsed_time": "0:01:43", "remaining_time": "0:00:58", "throughput": 3103.46, "total_tokens": 319936} |
| {"current_steps": 1025, "total_steps": 1600, "loss": 0.1129, "lr": 3.4549150281252635e-06, "epoch": 12.8125, "percentage": 64.06, "elapsed_time": "0:01:43", "remaining_time": "0:00:58", "throughput": 3105.37, "total_tokens": 321504} |
| {"current_steps": 1030, "total_steps": 1600, "loss": 0.1367, "lr": 3.403135895476004e-06, "epoch": 12.875, "percentage": 64.38, "elapsed_time": "0:01:43", "remaining_time": "0:00:57", "throughput": 3107.93, "total_tokens": 323136} |
| {"current_steps": 1035, "total_steps": 1600, "loss": 0.157, "lr": 3.351546773686065e-06, "epoch": 12.9375, "percentage": 64.69, "elapsed_time": "0:01:44", "remaining_time": "0:00:56", "throughput": 3110.13, "total_tokens": 324736} |
| {"current_steps": 1040, "total_steps": 1600, "loss": 0.109, "lr": 3.3001538013450285e-06, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:01:44", "remaining_time": "0:00:56", "throughput": 3110.3, "total_tokens": 326240} |
| {"current_steps": 1045, "total_steps": 1600, "loss": 0.0474, "lr": 3.248963093702663e-06, "epoch": 13.0625, "percentage": 65.31, "elapsed_time": "0:01:45", "remaining_time": "0:00:55", "throughput": 3109.35, "total_tokens": 327744} |
| {"current_steps": 1050, "total_steps": 1600, "loss": 0.0886, "lr": 3.1979807419412523e-06, "epoch": 13.125, "percentage": 65.62, "elapsed_time": "0:01:45", "remaining_time": "0:00:55", "throughput": 3110.95, "total_tokens": 329280} |
| {"current_steps": 1055, "total_steps": 1600, "loss": 0.0442, "lr": 3.147212812450819e-06, "epoch": 13.1875, "percentage": 65.94, "elapsed_time": "0:01:46", "remaining_time": "0:00:54", "throughput": 3113.1, "total_tokens": 330880} |
| {"current_steps": 1060, "total_steps": 1600, "loss": 0.052, "lr": 3.0966653461072778e-06, "epoch": 13.25, "percentage": 66.25, "elapsed_time": "0:01:46", "remaining_time": "0:00:54", "throughput": 3115.25, "total_tokens": 332480} |
| {"current_steps": 1065, "total_steps": 1600, "loss": 0.0648, "lr": 3.0463443575536324e-06, "epoch": 13.3125, "percentage": 66.56, "elapsed_time": "0:01:47", "remaining_time": "0:00:53", "throughput": 3117.41, "total_tokens": 334080} |
| {"current_steps": 1070, "total_steps": 1600, "loss": 0.047, "lr": 2.9962558344842963e-06, "epoch": 13.375, "percentage": 66.88, "elapsed_time": "0:01:47", "remaining_time": "0:00:53", "throughput": 3119.21, "total_tokens": 335648} |
| {"current_steps": 1075, "total_steps": 1600, "loss": 0.093, "lr": 2.946405736932615e-06, "epoch": 13.4375, "percentage": 67.19, "elapsed_time": "0:01:48", "remaining_time": "0:00:52", "throughput": 3120.85, "total_tokens": 337184} |
| {"current_steps": 1080, "total_steps": 1600, "loss": 0.0829, "lr": 2.8967999965616815e-06, "epoch": 13.5, "percentage": 67.5, "elapsed_time": "0:01:48", "remaining_time": "0:00:52", "throughput": 3122.7, "total_tokens": 338752} |
| {"current_steps": 1085, "total_steps": 1600, "loss": 0.0851, "lr": 2.8474445159585235e-06, "epoch": 13.5625, "percentage": 67.81, "elapsed_time": "0:01:48", "remaining_time": "0:00:51", "throughput": 3124.84, "total_tokens": 340352} |
| {"current_steps": 1090, "total_steps": 1600, "loss": 0.1282, "lr": 2.798345167931771e-06, "epoch": 13.625, "percentage": 68.12, "elapsed_time": "0:01:49", "remaining_time": "0:00:51", "throughput": 3126.58, "total_tokens": 341920} |
| {"current_steps": 1095, "total_steps": 1600, "loss": 0.0588, "lr": 2.7495077948128245e-06, "epoch": 13.6875, "percentage": 68.44, "elapsed_time": "0:01:49", "remaining_time": "0:00:50", "throughput": 3128.92, "total_tokens": 343552} |
| {"current_steps": 1100, "total_steps": 1600, "loss": 0.1282, "lr": 2.700938207760701e-06, "epoch": 13.75, "percentage": 68.75, "elapsed_time": "0:01:50", "remaining_time": "0:00:50", "throughput": 3130.74, "total_tokens": 345120} |
| {"current_steps": 1105, "total_steps": 1600, "loss": 0.0794, "lr": 2.6526421860705474e-06, "epoch": 13.8125, "percentage": 69.06, "elapsed_time": "0:01:50", "remaining_time": "0:00:49", "throughput": 3132.68, "total_tokens": 346720} |
| {"current_steps": 1110, "total_steps": 1600, "loss": 0.0996, "lr": 2.6046254764859687e-06, "epoch": 13.875, "percentage": 69.38, "elapsed_time": "0:01:51", "remaining_time": "0:00:49", "throughput": 3134.18, "total_tokens": 348256} |
| {"current_steps": 1115, "total_steps": 1600, "loss": 0.0473, "lr": 2.5568937925152272e-06, "epoch": 13.9375, "percentage": 69.69, "elapsed_time": "0:01:51", "remaining_time": "0:00:48", "throughput": 3135.86, "total_tokens": 349824} |
| {"current_steps": 1120, "total_steps": 1600, "loss": 0.0685, "lr": 2.5094528137513797e-06, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:01:52", "remaining_time": "0:00:48", "throughput": 3136.7, "total_tokens": 351424} |
| {"current_steps": 1120, "total_steps": 1600, "eval_loss": 0.4576021730899811, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:01:52", "remaining_time": "0:00:48", "throughput": 3111.4, "total_tokens": 351424} |
| {"current_steps": 1125, "total_steps": 1600, "loss": 0.0726, "lr": 2.462308185196481e-06, "epoch": 14.0625, "percentage": 70.31, "elapsed_time": "0:01:54", "remaining_time": "0:00:48", "throughput": 3082.68, "total_tokens": 352992} |
| {"current_steps": 1130, "total_steps": 1600, "loss": 0.0306, "lr": 2.4154655165898626e-06, "epoch": 14.125, "percentage": 70.62, "elapsed_time": "0:01:54", "remaining_time": "0:00:47", "throughput": 3084.49, "total_tokens": 354560} |
| {"current_steps": 1135, "total_steps": 1600, "loss": 0.0468, "lr": 2.3689303817406523e-06, "epoch": 14.1875, "percentage": 70.94, "elapsed_time": "0:01:55", "remaining_time": "0:00:47", "throughput": 3086.39, "total_tokens": 356128} |
| {"current_steps": 1140, "total_steps": 1600, "loss": 0.0455, "lr": 2.3227083178645316e-06, "epoch": 14.25, "percentage": 71.25, "elapsed_time": "0:01:55", "remaining_time": "0:00:46", "throughput": 3088.09, "total_tokens": 357696} |
| {"current_steps": 1145, "total_steps": 1600, "loss": 0.0545, "lr": 2.2768048249248648e-06, "epoch": 14.3125, "percentage": 71.56, "elapsed_time": "0:01:56", "remaining_time": "0:00:46", "throughput": 3090.49, "total_tokens": 359328} |
| {"current_steps": 1150, "total_steps": 1600, "loss": 0.0488, "lr": 2.2312253649782655e-06, "epoch": 14.375, "percentage": 71.88, "elapsed_time": "0:01:56", "remaining_time": "0:00:45", "throughput": 3091.6, "total_tokens": 360800} |
| {"current_steps": 1155, "total_steps": 1600, "loss": 0.083, "lr": 2.185975361524657e-06, "epoch": 14.4375, "percentage": 72.19, "elapsed_time": "0:01:57", "remaining_time": "0:00:45", "throughput": 3093.38, "total_tokens": 362368} |
| {"current_steps": 1160, "total_steps": 1600, "loss": 0.0194, "lr": 2.1410601988619394e-06, "epoch": 14.5, "percentage": 72.5, "elapsed_time": "0:01:57", "remaining_time": "0:00:44", "throughput": 3095.13, "total_tokens": 363936} |
| {"current_steps": 1165, "total_steps": 1600, "loss": 0.0569, "lr": 2.096485221445301e-06, "epoch": 14.5625, "percentage": 72.81, "elapsed_time": "0:01:58", "remaining_time": "0:00:44", "throughput": 3096.68, "total_tokens": 365472} |
| {"current_steps": 1170, "total_steps": 1600, "loss": 0.0576, "lr": 2.0522557332512953e-06, "epoch": 14.625, "percentage": 73.12, "elapsed_time": "0:01:58", "remaining_time": "0:00:43", "throughput": 3098.71, "total_tokens": 367072} |
| {"current_steps": 1175, "total_steps": 1600, "loss": 0.067, "lr": 2.008376997146705e-06, "epoch": 14.6875, "percentage": 73.44, "elapsed_time": "0:01:58", "remaining_time": "0:00:43", "throughput": 3100.44, "total_tokens": 368640} |
| {"current_steps": 1180, "total_steps": 1600, "loss": 0.04, "lr": 1.9648542342623276e-06, "epoch": 14.75, "percentage": 73.75, "elapsed_time": "0:01:59", "remaining_time": "0:00:42", "throughput": 3102.24, "total_tokens": 370208} |
| {"current_steps": 1185, "total_steps": 1600, "loss": 0.0493, "lr": 1.9216926233717087e-06, "epoch": 14.8125, "percentage": 74.06, "elapsed_time": "0:01:59", "remaining_time": "0:00:41", "throughput": 3103.73, "total_tokens": 371744} |
| {"current_steps": 1190, "total_steps": 1600, "loss": 0.0513, "lr": 1.8788973002749112e-06, "epoch": 14.875, "percentage": 74.38, "elapsed_time": "0:02:00", "remaining_time": "0:00:41", "throughput": 3105.52, "total_tokens": 373344} |
| {"current_steps": 1195, "total_steps": 1600, "loss": 0.0918, "lr": 1.83647335718742e-06, "epoch": 14.9375, "percentage": 74.69, "elapsed_time": "0:02:00", "remaining_time": "0:00:40", "throughput": 3107.04, "total_tokens": 374880} |
| {"current_steps": 1200, "total_steps": 1600, "loss": 0.0846, "lr": 1.7944258421342097e-06, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:02:01", "remaining_time": "0:00:40", "throughput": 3107.92, "total_tokens": 376480} |
| {"current_steps": 1205, "total_steps": 1600, "loss": 0.0102, "lr": 1.7527597583490825e-06, "epoch": 15.0625, "percentage": 75.31, "elapsed_time": "0:02:01", "remaining_time": "0:00:39", "throughput": 3107.36, "total_tokens": 378016} |
| {"current_steps": 1210, "total_steps": 1600, "loss": 0.015, "lr": 1.7114800636793378e-06, "epoch": 15.125, "percentage": 75.62, "elapsed_time": "0:02:02", "remaining_time": "0:00:39", "throughput": 3108.52, "total_tokens": 379520} |
| {"current_steps": 1215, "total_steps": 1600, "loss": 0.0231, "lr": 1.6705916699958292e-06, "epoch": 15.1875, "percentage": 75.94, "elapsed_time": "0:02:02", "remaining_time": "0:00:38", "throughput": 3110.34, "total_tokens": 381120} |
| {"current_steps": 1220, "total_steps": 1600, "loss": 0.0137, "lr": 1.6300994426085103e-06, "epoch": 15.25, "percentage": 76.25, "elapsed_time": "0:02:02", "remaining_time": "0:00:38", "throughput": 3111.97, "total_tokens": 382688} |
| {"current_steps": 1225, "total_steps": 1600, "loss": 0.0512, "lr": 1.5900081996875083e-06, "epoch": 15.3125, "percentage": 76.56, "elapsed_time": "0:02:03", "remaining_time": "0:00:37", "throughput": 3113.81, "total_tokens": 384288} |
| {"current_steps": 1230, "total_steps": 1600, "loss": 0.0156, "lr": 1.5503227116898017e-06, "epoch": 15.375, "percentage": 76.88, "elapsed_time": "0:02:03", "remaining_time": "0:00:37", "throughput": 3115.32, "total_tokens": 385856} |
| {"current_steps": 1235, "total_steps": 1600, "loss": 0.0159, "lr": 1.5110477007916002e-06, "epoch": 15.4375, "percentage": 77.19, "elapsed_time": "0:02:04", "remaining_time": "0:00:36", "throughput": 3116.97, "total_tokens": 387424} |
| {"current_steps": 1240, "total_steps": 1600, "loss": 0.0558, "lr": 1.4721878403264344e-06, "epoch": 15.5, "percentage": 77.5, "elapsed_time": "0:02:04", "remaining_time": "0:00:36", "throughput": 3118.32, "total_tokens": 388960} |
| {"current_steps": 1245, "total_steps": 1600, "loss": 0.0458, "lr": 1.433747754229093e-06, "epoch": 15.5625, "percentage": 77.81, "elapsed_time": "0:02:05", "remaining_time": "0:00:35", "throughput": 3119.61, "total_tokens": 390496} |
| {"current_steps": 1250, "total_steps": 1600, "loss": 0.0871, "lr": 1.395732016485406e-06, "epoch": 15.625, "percentage": 78.12, "elapsed_time": "0:02:05", "remaining_time": "0:00:35", "throughput": 3121.2, "total_tokens": 392064} |
| {"current_steps": 1255, "total_steps": 1600, "loss": 0.0288, "lr": 1.3581451505879995e-06, "epoch": 15.6875, "percentage": 78.44, "elapsed_time": "0:02:06", "remaining_time": "0:00:34", "throughput": 3120.73, "total_tokens": 393632} |
| {"current_steps": 1260, "total_steps": 1600, "loss": 0.0395, "lr": 1.3209916289980336e-06, "epoch": 15.75, "percentage": 78.75, "elapsed_time": "0:02:06", "remaining_time": "0:00:34", "throughput": 3122.21, "total_tokens": 395200} |
| {"current_steps": 1265, "total_steps": 1600, "loss": 0.0766, "lr": 1.2842758726130283e-06, "epoch": 15.8125, "percentage": 79.06, "elapsed_time": "0:02:07", "remaining_time": "0:00:33", "throughput": 3123.52, "total_tokens": 396736} |
| {"current_steps": 1270, "total_steps": 1600, "loss": 0.0273, "lr": 1.2480022502408306e-06, "epoch": 15.875, "percentage": 79.38, "elapsed_time": "0:02:07", "remaining_time": "0:00:33", "throughput": 3125.13, "total_tokens": 398304} |
| {"current_steps": 1275, "total_steps": 1600, "loss": 0.0453, "lr": 1.2121750780797514e-06, "epoch": 15.9375, "percentage": 79.69, "elapsed_time": "0:02:07", "remaining_time": "0:00:32", "throughput": 3127.01, "total_tokens": 399936} |
| {"current_steps": 1280, "total_steps": 1600, "loss": 0.0203, "lr": 1.1767986192049986e-06, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:02:08", "remaining_time": "0:00:32", "throughput": 3127.49, "total_tokens": 401504} |
| {"current_steps": 1280, "total_steps": 1600, "eval_loss": 0.5269851684570312, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:02:09", "remaining_time": "0:00:32", "throughput": 3105.58, "total_tokens": 401504} |
| {"current_steps": 1285, "total_steps": 1600, "loss": 0.0167, "lr": 1.1418770830614012e-06, "epoch": 16.0625, "percentage": 80.31, "elapsed_time": "0:02:11", "remaining_time": "0:00:32", "throughput": 3072.02, "total_tokens": 403104} |
| {"current_steps": 1290, "total_steps": 1600, "loss": 0.0167, "lr": 1.1074146249625334e-06, "epoch": 16.125, "percentage": 80.62, "elapsed_time": "0:02:11", "remaining_time": "0:00:31", "throughput": 3073.49, "total_tokens": 404640} |
| {"current_steps": 1295, "total_steps": 1600, "loss": 0.0518, "lr": 1.0734153455962765e-06, "epoch": 16.1875, "percentage": 80.94, "elapsed_time": "0:02:12", "remaining_time": "0:00:31", "throughput": 3074.91, "total_tokens": 406176} |
| {"current_steps": 1300, "total_steps": 1600, "loss": 0.0211, "lr": 1.0398832905368693e-06, "epoch": 16.25, "percentage": 81.25, "elapsed_time": "0:02:12", "remaining_time": "0:00:30", "throughput": 3076.72, "total_tokens": 407776} |
| {"current_steps": 1305, "total_steps": 1600, "loss": 0.0067, "lr": 1.006822449763537e-06, "epoch": 16.3125, "percentage": 81.56, "elapsed_time": "0:02:12", "remaining_time": "0:00:30", "throughput": 3078.78, "total_tokens": 409408} |
| {"current_steps": 1310, "total_steps": 1600, "loss": 0.0449, "lr": 9.742367571857092e-07, "epoch": 16.375, "percentage": 81.88, "elapsed_time": "0:02:13", "remaining_time": "0:00:29", "throughput": 3080.63, "total_tokens": 411008} |
| {"current_steps": 1315, "total_steps": 1600, "loss": 0.009, "lr": 9.421300901749386e-07, "epoch": 16.4375, "percentage": 82.19, "elapsed_time": "0:02:13", "remaining_time": "0:00:29", "throughput": 3081.99, "total_tokens": 412544} |
| {"current_steps": 1320, "total_steps": 1600, "loss": 0.0235, "lr": 9.105062691035233e-07, "epoch": 16.5, "percentage": 82.5, "elapsed_time": "0:02:14", "remaining_time": "0:00:28", "throughput": 3083.39, "total_tokens": 414080} |
| {"current_steps": 1325, "total_steps": 1600, "loss": 0.0089, "lr": 8.793690568899216e-07, "epoch": 16.5625, "percentage": 82.81, "elapsed_time": "0:02:14", "remaining_time": "0:00:27", "throughput": 3085.15, "total_tokens": 415680} |
| {"current_steps": 1330, "total_steps": 1600, "loss": 0.0432, "lr": 8.487221585510075e-07, "epoch": 16.625, "percentage": 83.12, "elapsed_time": "0:02:15", "remaining_time": "0:00:27", "throughput": 3085.95, "total_tokens": 417216} |
| {"current_steps": 1335, "total_steps": 1600, "loss": 0.0995, "lr": 8.185692207612023e-07, "epoch": 16.6875, "percentage": 83.44, "elapsed_time": "0:02:15", "remaining_time": "0:00:26", "throughput": 3084.87, "total_tokens": 418784} |
| {"current_steps": 1340, "total_steps": 1600, "loss": 0.0101, "lr": 7.88913831418568e-07, "epoch": 16.75, "percentage": 83.75, "elapsed_time": "0:02:16", "remaining_time": "0:00:26", "throughput": 3085.78, "total_tokens": 420256} |
| {"current_steps": 1345, "total_steps": 1600, "loss": 0.0033, "lr": 7.597595192178702e-07, "epoch": 16.8125, "percentage": 84.06, "elapsed_time": "0:02:16", "remaining_time": "0:00:25", "throughput": 3087.31, "total_tokens": 421824} |
| {"current_steps": 1350, "total_steps": 1600, "loss": 0.0187, "lr": 7.311097532307121e-07, "epoch": 16.875, "percentage": 84.38, "elapsed_time": "0:02:17", "remaining_time": "0:00:25", "throughput": 3088.69, "total_tokens": 423360} |
| {"current_steps": 1355, "total_steps": 1600, "loss": 0.0522, "lr": 7.029679424927366e-07, "epoch": 16.9375, "percentage": 84.69, "elapsed_time": "0:02:17", "remaining_time": "0:00:24", "throughput": 3090.49, "total_tokens": 424960} |
| {"current_steps": 1360, "total_steps": 1600, "loss": 0.0077, "lr": 6.753374355979975e-07, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:02:17", "remaining_time": "0:00:24", "throughput": 3091.49, "total_tokens": 426592} |
| {"current_steps": 1365, "total_steps": 1600, "loss": 0.0797, "lr": 6.482215203005016e-07, "epoch": 17.0625, "percentage": 85.31, "elapsed_time": "0:02:18", "remaining_time": "0:00:23", "throughput": 3091.38, "total_tokens": 428160} |
| {"current_steps": 1370, "total_steps": 1600, "loss": 0.0114, "lr": 6.216234231230012e-07, "epoch": 17.125, "percentage": 85.62, "elapsed_time": "0:02:18", "remaining_time": "0:00:23", "throughput": 3093.06, "total_tokens": 429760} |
| {"current_steps": 1375, "total_steps": 1600, "loss": 0.0061, "lr": 5.955463089730723e-07, "epoch": 17.1875, "percentage": 85.94, "elapsed_time": "0:02:19", "remaining_time": "0:00:22", "throughput": 3094.58, "total_tokens": 431328} |
| {"current_steps": 1380, "total_steps": 1600, "loss": 0.0068, "lr": 5.699932807665198e-07, "epoch": 17.25, "percentage": 86.25, "elapsed_time": "0:02:19", "remaining_time": "0:00:22", "throughput": 3096.26, "total_tokens": 432928} |
| {"current_steps": 1385, "total_steps": 1600, "loss": 0.0294, "lr": 5.449673790581611e-07, "epoch": 17.3125, "percentage": 86.56, "elapsed_time": "0:02:20", "remaining_time": "0:00:21", "throughput": 3097.92, "total_tokens": 434528} |
| {"current_steps": 1390, "total_steps": 1600, "loss": 0.0136, "lr": 5.204715816800343e-07, "epoch": 17.375, "percentage": 86.88, "elapsed_time": "0:02:20", "remaining_time": "0:00:21", "throughput": 3099.69, "total_tokens": 436128} |
| {"current_steps": 1395, "total_steps": 1600, "loss": 0.0213, "lr": 4.965088033870608e-07, "epoch": 17.4375, "percentage": 87.19, "elapsed_time": "0:02:21", "remaining_time": "0:00:20", "throughput": 3101.21, "total_tokens": 437696} |
| {"current_steps": 1400, "total_steps": 1600, "loss": 0.037, "lr": 4.730818955102234e-07, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:02:21", "remaining_time": "0:00:20", "throughput": 3102.73, "total_tokens": 439264} |
| {"current_steps": 1405, "total_steps": 1600, "loss": 0.0086, "lr": 4.501936456172845e-07, "epoch": 17.5625, "percentage": 87.81, "elapsed_time": "0:02:22", "remaining_time": "0:00:19", "throughput": 3104.0, "total_tokens": 440800} |
| {"current_steps": 1410, "total_steps": 1600, "loss": 0.0036, "lr": 4.278467771810896e-07, "epoch": 17.625, "percentage": 88.12, "elapsed_time": "0:02:22", "remaining_time": "0:00:19", "throughput": 3105.31, "total_tokens": 442336} |
| {"current_steps": 1415, "total_steps": 1600, "loss": 0.0042, "lr": 4.0604394925550906e-07, "epoch": 17.6875, "percentage": 88.44, "elapsed_time": "0:02:22", "remaining_time": "0:00:18", "throughput": 3106.56, "total_tokens": 443872} |
| {"current_steps": 1420, "total_steps": 1600, "loss": 0.0334, "lr": 3.8478775615902965e-07, "epoch": 17.75, "percentage": 88.75, "elapsed_time": "0:02:23", "remaining_time": "0:00:18", "throughput": 3107.83, "total_tokens": 445408} |
| {"current_steps": 1425, "total_steps": 1600, "loss": 0.0149, "lr": 3.6408072716606346e-07, "epoch": 17.8125, "percentage": 89.06, "elapsed_time": "0:02:23", "remaining_time": "0:00:17", "throughput": 3109.3, "total_tokens": 446976} |
| {"current_steps": 1430, "total_steps": 1600, "loss": 0.0137, "lr": 3.439253262059822e-07, "epoch": 17.875, "percentage": 89.38, "elapsed_time": "0:02:24", "remaining_time": "0:00:17", "throughput": 3110.52, "total_tokens": 448512} |
| {"current_steps": 1435, "total_steps": 1600, "loss": 0.0428, "lr": 3.24323951569942e-07, "epoch": 17.9375, "percentage": 89.69, "elapsed_time": "0:02:24", "remaining_time": "0:00:16", "throughput": 3112.19, "total_tokens": 450112} |
| {"current_steps": 1440, "total_steps": 1600, "loss": 0.0132, "lr": 3.052789356255037e-07, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:02:25", "remaining_time": "0:00:16", "throughput": 3112.52, "total_tokens": 451648} |
| {"current_steps": 1440, "total_steps": 1600, "eval_loss": 0.6123794317245483, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:02:26", "remaining_time": "0:00:16", "throughput": 3093.28, "total_tokens": 451648} |
| {"current_steps": 1445, "total_steps": 1600, "loss": 0.0838, "lr": 2.867925445391079e-07, "epoch": 18.0625, "percentage": 90.31, "elapsed_time": "0:02:27", "remaining_time": "0:00:15", "throughput": 3066.69, "total_tokens": 453248} |
| {"current_steps": 1450, "total_steps": 1600, "loss": 0.0035, "lr": 2.688669780064268e-07, "epoch": 18.125, "percentage": 90.62, "elapsed_time": "0:02:28", "remaining_time": "0:00:15", "throughput": 3068.11, "total_tokens": 454816} |
| {"current_steps": 1455, "total_steps": 1600, "loss": 0.0467, "lr": 2.5150436899061494e-07, "epoch": 18.1875, "percentage": 90.94, "elapsed_time": "0:02:28", "remaining_time": "0:00:14", "throughput": 3069.36, "total_tokens": 456352} |
| {"current_steps": 1460, "total_steps": 1600, "loss": 0.0122, "lr": 2.3470678346851517e-07, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:02:29", "remaining_time": "0:00:14", "throughput": 3070.57, "total_tokens": 457888} |
| {"current_steps": 1465, "total_steps": 1600, "loss": 0.0499, "lr": 2.1847622018482283e-07, "epoch": 18.3125, "percentage": 91.56, "elapsed_time": "0:02:29", "remaining_time": "0:00:13", "throughput": 3072.2, "total_tokens": 459488} |
| {"current_steps": 1470, "total_steps": 1600, "loss": 0.0182, "lr": 2.028146104142581e-07, "epoch": 18.375, "percentage": 91.88, "elapsed_time": "0:02:30", "remaining_time": "0:00:13", "throughput": 3071.04, "total_tokens": 461088} |
| {"current_steps": 1475, "total_steps": 1600, "loss": 0.0051, "lr": 1.8772381773176417e-07, "epoch": 18.4375, "percentage": 92.19, "elapsed_time": "0:02:30", "remaining_time": "0:00:12", "throughput": 3072.44, "total_tokens": 462656} |
| {"current_steps": 1480, "total_steps": 1600, "loss": 0.0041, "lr": 1.7320563779075595e-07, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:02:31", "remaining_time": "0:00:12", "throughput": 3073.92, "total_tokens": 464224} |
| {"current_steps": 1485, "total_steps": 1600, "loss": 0.0054, "lr": 1.5926179810946185e-07, "epoch": 18.5625, "percentage": 92.81, "elapsed_time": "0:02:31", "remaining_time": "0:00:11", "throughput": 3075.4, "total_tokens": 465792} |
| {"current_steps": 1490, "total_steps": 1600, "loss": 0.032, "lr": 1.4589395786535954e-07, "epoch": 18.625, "percentage": 93.12, "elapsed_time": "0:02:31", "remaining_time": "0:00:11", "throughput": 3076.67, "total_tokens": 467328} |
| {"current_steps": 1495, "total_steps": 1600, "loss": 0.0031, "lr": 1.331037076977576e-07, "epoch": 18.6875, "percentage": 93.44, "elapsed_time": "0:02:32", "remaining_time": "0:00:10", "throughput": 3078.1, "total_tokens": 468896} |
| {"current_steps": 1500, "total_steps": 1600, "loss": 0.0074, "lr": 1.2089256951851923e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "0:02:32", "remaining_time": "0:00:10", "throughput": 3079.78, "total_tokens": 470496} |
| {"current_steps": 1505, "total_steps": 1600, "loss": 0.0019, "lr": 1.0926199633097156e-07, "epoch": 18.8125, "percentage": 94.06, "elapsed_time": "0:02:33", "remaining_time": "0:00:09", "throughput": 3081.43, "total_tokens": 472096} |
| {"current_steps": 1510, "total_steps": 1600, "loss": 0.0075, "lr": 9.821337205701664e-08, "epoch": 18.875, "percentage": 94.38, "elapsed_time": "0:02:33", "remaining_time": "0:00:09", "throughput": 3082.89, "total_tokens": 473664} |
| {"current_steps": 1515, "total_steps": 1600, "loss": 0.0024, "lr": 8.77480113724516e-08, "epoch": 18.9375, "percentage": 94.69, "elapsed_time": "0:02:34", "remaining_time": "0:00:08", "throughput": 3084.44, "total_tokens": 475264} |
| {"current_steps": 1520, "total_steps": 1600, "loss": 0.0312, "lr": 7.786715955054202e-08, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:02:34", "remaining_time": "0:00:08", "throughput": 3084.98, "total_tokens": 476832} |
| {"current_steps": 1525, "total_steps": 1600, "loss": 0.0024, "lr": 6.857199231384282e-08, "epoch": 19.0625, "percentage": 95.31, "elapsed_time": "0:02:35", "remaining_time": "0:00:07", "throughput": 3085.23, "total_tokens": 478432} |
| {"current_steps": 1530, "total_steps": 1600, "loss": 0.0098, "lr": 5.986361569430166e-08, "epoch": 19.125, "percentage": 95.62, "elapsed_time": "0:02:35", "remaining_time": "0:00:07", "throughput": 3087.0, "total_tokens": 480064} |
| {"current_steps": 1535, "total_steps": 1600, "loss": 0.0843, "lr": 5.174306590164879e-08, "epoch": 19.1875, "percentage": 95.94, "elapsed_time": "0:02:35", "remaining_time": "0:00:06", "throughput": 3088.22, "total_tokens": 481600} |
| {"current_steps": 1540, "total_steps": 1600, "loss": 0.0128, "lr": 4.42113092001023e-08, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:02:36", "remaining_time": "0:00:06", "throughput": 3089.42, "total_tokens": 483136} |
| {"current_steps": 1545, "total_steps": 1600, "loss": 0.0229, "lr": 3.726924179339009e-08, "epoch": 19.3125, "percentage": 96.56, "elapsed_time": "0:02:36", "remaining_time": "0:00:05", "throughput": 3090.79, "total_tokens": 484704} |
| {"current_steps": 1550, "total_steps": 1600, "loss": 0.0379, "lr": 3.09176897181096e-08, "epoch": 19.375, "percentage": 96.88, "elapsed_time": "0:02:37", "remaining_time": "0:00:05", "throughput": 3092.34, "total_tokens": 486304} |
| {"current_steps": 1555, "total_steps": 1600, "loss": 0.0026, "lr": 2.515740874544148e-08, "epoch": 19.4375, "percentage": 97.19, "elapsed_time": "0:02:37", "remaining_time": "0:00:04", "throughput": 3093.56, "total_tokens": 487840} |
| {"current_steps": 1560, "total_steps": 1600, "loss": 0.0207, "lr": 1.9989084291216487e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:02:38", "remaining_time": "0:00:04", "throughput": 3094.76, "total_tokens": 489376} |
| {"current_steps": 1565, "total_steps": 1600, "loss": 0.0229, "lr": 1.541333133436018e-08, "epoch": 19.5625, "percentage": 97.81, "elapsed_time": "0:02:38", "remaining_time": "0:00:03", "throughput": 3095.89, "total_tokens": 490912} |
| {"current_steps": 1570, "total_steps": 1600, "loss": 0.017, "lr": 1.1430694343715354e-08, "epoch": 19.625, "percentage": 98.12, "elapsed_time": "0:02:39", "remaining_time": "0:00:03", "throughput": 3097.09, "total_tokens": 492448} |
| {"current_steps": 1575, "total_steps": 1600, "loss": 0.0022, "lr": 8.041647213256066e-09, "epoch": 19.6875, "percentage": 98.44, "elapsed_time": "0:02:39", "remaining_time": "0:00:02", "throughput": 3098.46, "total_tokens": 494016} |
| {"current_steps": 1580, "total_steps": 1600, "loss": 0.0131, "lr": 5.246593205699424e-09, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:02:39", "remaining_time": "0:00:02", "throughput": 3099.75, "total_tokens": 495584} |
| {"current_steps": 1585, "total_steps": 1600, "loss": 0.0048, "lr": 3.0458649045211897e-09, "epoch": 19.8125, "percentage": 99.06, "elapsed_time": "0:02:40", "remaining_time": "0:00:01", "throughput": 3101.08, "total_tokens": 497152} |
| {"current_steps": 1590, "total_steps": 1600, "loss": 0.003, "lr": 1.4397241743813185e-09, "epoch": 19.875, "percentage": 99.38, "elapsed_time": "0:02:40", "remaining_time": "0:00:01", "throughput": 3102.39, "total_tokens": 498720} |
| {"current_steps": 1595, "total_steps": 1600, "loss": 0.0033, "lr": 4.283621299649987e-10, "epoch": 19.9375, "percentage": 99.69, "elapsed_time": "0:02:41", "remaining_time": "0:00:00", "throughput": 3103.74, "total_tokens": 500288} |
| {"current_steps": 1600, "total_steps": 1600, "loss": 0.0032, "lr": 1.189911324084303e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:41", "remaining_time": "0:00:00", "throughput": 3104.4, "total_tokens": 501888} |
| {"current_steps": 1600, "total_steps": 1600, "eval_loss": 0.6324604749679565, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:42", "remaining_time": "0:00:00", "throughput": 3087.19, "total_tokens": 501888} |
| {"current_steps": 1600, "total_steps": 1600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:44", "remaining_time": "0:00:00", "throughput": 3059.75, "total_tokens": 501888} |
|
|