train_wsc_1754652157 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1250
98f4125 verified
{"current_steps": 5, "total_steps": 1250, "loss": 15.2081, "lr": 1.6000000000000001e-06, "epoch": 0.04, "percentage": 0.4, "elapsed_time": "0:00:01", "remaining_time": "0:04:21", "throughput": 2039.14, "total_tokens": 2144}
{"current_steps": 10, "total_steps": 1250, "loss": 15.3148, "lr": 3.6e-06, "epoch": 0.08, "percentage": 0.8, "elapsed_time": "0:00:01", "remaining_time": "0:03:18", "throughput": 2584.13, "total_tokens": 4128}
{"current_steps": 15, "total_steps": 1250, "loss": 15.3875, "lr": 5.600000000000001e-06, "epoch": 0.12, "percentage": 1.2, "elapsed_time": "0:00:02", "remaining_time": "0:02:57", "throughput": 2899.98, "total_tokens": 6240}
{"current_steps": 20, "total_steps": 1250, "loss": 15.1604, "lr": 7.6e-06, "epoch": 0.16, "percentage": 1.6, "elapsed_time": "0:00:02", "remaining_time": "0:02:44", "throughput": 3027.95, "total_tokens": 8096}
{"current_steps": 25, "total_steps": 1250, "loss": 15.358, "lr": 9.600000000000001e-06, "epoch": 0.2, "percentage": 2.0, "elapsed_time": "0:00:03", "remaining_time": "0:02:36", "throughput": 3160.08, "total_tokens": 10112}
{"current_steps": 30, "total_steps": 1250, "loss": 14.8947, "lr": 1.16e-05, "epoch": 0.24, "percentage": 2.4, "elapsed_time": "0:00:03", "remaining_time": "0:02:31", "throughput": 3231.44, "total_tokens": 12032}
{"current_steps": 35, "total_steps": 1250, "loss": 14.9376, "lr": 1.3600000000000002e-05, "epoch": 0.28, "percentage": 2.8, "elapsed_time": "0:00:04", "remaining_time": "0:02:26", "throughput": 3277.81, "total_tokens": 13824}
{"current_steps": 40, "total_steps": 1250, "loss": 15.0763, "lr": 1.56e-05, "epoch": 0.32, "percentage": 3.2, "elapsed_time": "0:00:04", "remaining_time": "0:02:24", "throughput": 3323.85, "total_tokens": 15840}
{"current_steps": 45, "total_steps": 1250, "loss": 14.7281, "lr": 1.76e-05, "epoch": 0.36, "percentage": 3.6, "elapsed_time": "0:00:05", "remaining_time": "0:02:22", "throughput": 3356.58, "total_tokens": 17920}
{"current_steps": 50, "total_steps": 1250, "loss": 14.723, "lr": 1.9600000000000002e-05, "epoch": 0.4, "percentage": 4.0, "elapsed_time": "0:00:05", "remaining_time": "0:02:20", "throughput": 3367.2, "total_tokens": 19712}
{"current_steps": 55, "total_steps": 1250, "loss": 14.5953, "lr": 2.16e-05, "epoch": 0.44, "percentage": 4.4, "elapsed_time": "0:00:06", "remaining_time": "0:02:19", "throughput": 3414.49, "total_tokens": 21952}
{"current_steps": 60, "total_steps": 1250, "loss": 13.9838, "lr": 2.36e-05, "epoch": 0.48, "percentage": 4.8, "elapsed_time": "0:00:07", "remaining_time": "0:02:18", "throughput": 3450.59, "total_tokens": 24160}
{"current_steps": 63, "total_steps": 1250, "eval_loss": 13.871087074279785, "epoch": 0.504, "percentage": 5.04, "elapsed_time": "0:00:08", "remaining_time": "0:02:34", "throughput": 3109.82, "total_tokens": 25504}
{"current_steps": 65, "total_steps": 1250, "loss": 13.8098, "lr": 2.5600000000000002e-05, "epoch": 0.52, "percentage": 5.2, "elapsed_time": "0:00:09", "remaining_time": "0:02:48", "throughput": 2826.31, "total_tokens": 26112}
{"current_steps": 70, "total_steps": 1250, "loss": 13.7342, "lr": 2.7600000000000003e-05, "epoch": 0.56, "percentage": 5.6, "elapsed_time": "0:00:09", "remaining_time": "0:02:44", "throughput": 2869.83, "total_tokens": 28064}
{"current_steps": 75, "total_steps": 1250, "loss": 13.545, "lr": 2.96e-05, "epoch": 0.6, "percentage": 6.0, "elapsed_time": "0:00:10", "remaining_time": "0:02:40", "throughput": 2904.04, "total_tokens": 29824}
{"current_steps": 80, "total_steps": 1250, "loss": 13.168, "lr": 3.16e-05, "epoch": 0.64, "percentage": 6.4, "elapsed_time": "0:00:10", "remaining_time": "0:02:38", "throughput": 2947.91, "total_tokens": 31904}
{"current_steps": 85, "total_steps": 1250, "loss": 13.0662, "lr": 3.3600000000000004e-05, "epoch": 0.68, "percentage": 6.8, "elapsed_time": "0:00:11", "remaining_time": "0:02:35", "throughput": 2986.13, "total_tokens": 33984}
{"current_steps": 90, "total_steps": 1250, "loss": 12.3617, "lr": 3.56e-05, "epoch": 0.72, "percentage": 7.2, "elapsed_time": "0:00:11", "remaining_time": "0:02:33", "throughput": 3011.28, "total_tokens": 35776}
{"current_steps": 95, "total_steps": 1250, "loss": 12.2163, "lr": 3.76e-05, "epoch": 0.76, "percentage": 7.6, "elapsed_time": "0:00:12", "remaining_time": "0:02:30", "throughput": 3027.62, "total_tokens": 37472}
{"current_steps": 100, "total_steps": 1250, "loss": 12.0549, "lr": 3.960000000000001e-05, "epoch": 0.8, "percentage": 8.0, "elapsed_time": "0:00:12", "remaining_time": "0:02:28", "throughput": 3049.51, "total_tokens": 39328}
{"current_steps": 105, "total_steps": 1250, "loss": 11.9157, "lr": 4.16e-05, "epoch": 0.84, "percentage": 8.4, "elapsed_time": "0:00:13", "remaining_time": "0:02:26", "throughput": 3075.56, "total_tokens": 41280}
{"current_steps": 110, "total_steps": 1250, "loss": 11.5939, "lr": 4.36e-05, "epoch": 0.88, "percentage": 8.8, "elapsed_time": "0:00:13", "remaining_time": "0:02:25", "throughput": 3111.97, "total_tokens": 43552}
{"current_steps": 115, "total_steps": 1250, "loss": 10.6461, "lr": 4.5600000000000004e-05, "epoch": 0.92, "percentage": 9.2, "elapsed_time": "0:00:14", "remaining_time": "0:02:22", "throughput": 3121.95, "total_tokens": 45216}
{"current_steps": 120, "total_steps": 1250, "loss": 10.384, "lr": 4.76e-05, "epoch": 0.96, "percentage": 9.6, "elapsed_time": "0:00:15", "remaining_time": "0:02:21", "throughput": 3147.27, "total_tokens": 47360}
{"current_steps": 125, "total_steps": 1250, "loss": 9.9251, "lr": 4.96e-05, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:00:15", "remaining_time": "0:02:20", "throughput": 3153.6, "total_tokens": 49376}
{"current_steps": 126, "total_steps": 1250, "eval_loss": 9.737223625183105, "epoch": 1.008, "percentage": 10.08, "elapsed_time": "0:00:16", "remaining_time": "0:02:29", "throughput": 2970.69, "total_tokens": 49696}
{"current_steps": 130, "total_steps": 1250, "loss": 9.5781, "lr": 4.9998440375027166e-05, "epoch": 1.04, "percentage": 10.4, "elapsed_time": "0:00:18", "remaining_time": "0:02:35", "throughput": 2832.05, "total_tokens": 51200}
{"current_steps": 135, "total_steps": 1250, "loss": 8.9157, "lr": 4.99921047320825e-05, "epoch": 1.08, "percentage": 10.8, "elapsed_time": "0:00:18", "remaining_time": "0:02:33", "throughput": 2861.7, "total_tokens": 53216}
{"current_steps": 140, "total_steps": 1250, "loss": 8.0389, "lr": 4.998089682880117e-05, "epoch": 1.12, "percentage": 11.2, "elapsed_time": "0:00:19", "remaining_time": "0:02:31", "throughput": 2882.83, "total_tokens": 55168}
{"current_steps": 145, "total_steps": 1250, "loss": 8.6052, "lr": 4.9964818850186135e-05, "epoch": 1.16, "percentage": 11.6, "elapsed_time": "0:00:19", "remaining_time": "0:02:29", "throughput": 2897.72, "total_tokens": 56960}
{"current_steps": 150, "total_steps": 1250, "loss": 7.8179, "lr": 4.994387393067117e-05, "epoch": 1.2, "percentage": 12.0, "elapsed_time": "0:00:20", "remaining_time": "0:02:27", "throughput": 2918.51, "total_tokens": 58880}
{"current_steps": 155, "total_steps": 1250, "loss": 7.3245, "lr": 4.9918066153509834e-05, "epoch": 1.24, "percentage": 12.4, "elapsed_time": "0:00:20", "remaining_time": "0:02:26", "throughput": 2935.02, "total_tokens": 60672}
{"current_steps": 160, "total_steps": 1250, "loss": 7.6592, "lr": 4.988740054997943e-05, "epoch": 1.28, "percentage": 12.8, "elapsed_time": "0:00:21", "remaining_time": "0:02:24", "throughput": 2958.95, "total_tokens": 62848}
{"current_steps": 165, "total_steps": 1250, "loss": 6.5144, "lr": 4.985188309840012e-05, "epoch": 1.32, "percentage": 13.2, "elapsed_time": "0:00:21", "remaining_time": "0:02:22", "throughput": 2966.97, "total_tokens": 64448}
{"current_steps": 170, "total_steps": 1250, "loss": 6.9579, "lr": 4.9811520722969465e-05, "epoch": 1.3599999999999999, "percentage": 13.6, "elapsed_time": "0:00:22", "remaining_time": "0:02:21", "throughput": 2983.41, "total_tokens": 66368}
{"current_steps": 175, "total_steps": 1250, "loss": 5.7934, "lr": 4.976632129241252e-05, "epoch": 1.4, "percentage": 14.0, "elapsed_time": "0:00:22", "remaining_time": "0:02:19", "throughput": 2993.26, "total_tokens": 68128}
{"current_steps": 180, "total_steps": 1250, "loss": 6.0098, "lr": 4.971629361844785e-05, "epoch": 1.44, "percentage": 14.4, "elapsed_time": "0:00:23", "remaining_time": "0:02:18", "throughput": 3008.5, "total_tokens": 70112}
{"current_steps": 185, "total_steps": 1250, "loss": 6.0466, "lr": 4.966144745406961e-05, "epoch": 1.48, "percentage": 14.8, "elapsed_time": "0:00:23", "remaining_time": "0:02:17", "throughput": 3032.11, "total_tokens": 72384}
{"current_steps": 189, "total_steps": 1250, "eval_loss": 5.308993816375732, "epoch": 1.512, "percentage": 15.12, "elapsed_time": "0:00:25", "remaining_time": "0:02:21", "throughput": 2942.82, "total_tokens": 74112}
{"current_steps": 190, "total_steps": 1250, "loss": 5.7175, "lr": 4.960179349164621e-05, "epoch": 1.52, "percentage": 15.2, "elapsed_time": "0:00:26", "remaining_time": "0:02:26", "throughput": 2845.26, "total_tokens": 74752}
{"current_steps": 195, "total_steps": 1250, "loss": 5.0706, "lr": 4.953734336083583e-05, "epoch": 1.56, "percentage": 15.6, "elapsed_time": "0:00:26", "remaining_time": "0:02:24", "throughput": 2859.88, "total_tokens": 76640}
{"current_steps": 200, "total_steps": 1250, "loss": 5.3219, "lr": 4.946810962631916e-05, "epoch": 1.6, "percentage": 16.0, "elapsed_time": "0:00:27", "remaining_time": "0:02:23", "throughput": 2879.73, "total_tokens": 78784}
{"current_steps": 205, "total_steps": 1250, "loss": 4.6854, "lr": 4.9394105785349944e-05, "epoch": 1.6400000000000001, "percentage": 16.4, "elapsed_time": "0:00:27", "remaining_time": "0:02:22", "throughput": 2894.26, "total_tokens": 80768}
{"current_steps": 210, "total_steps": 1250, "loss": 4.0843, "lr": 4.9315346265123594e-05, "epoch": 1.6800000000000002, "percentage": 16.8, "elapsed_time": "0:00:28", "remaining_time": "0:02:20", "throughput": 2911.88, "total_tokens": 82848}
{"current_steps": 215, "total_steps": 1250, "loss": 4.0373, "lr": 4.923184641996463e-05, "epoch": 1.72, "percentage": 17.2, "elapsed_time": "0:00:28", "remaining_time": "0:02:19", "throughput": 2924.95, "total_tokens": 84768}
{"current_steps": 220, "total_steps": 1250, "loss": 3.6419, "lr": 4.914362252833332e-05, "epoch": 1.76, "percentage": 17.6, "elapsed_time": "0:00:29", "remaining_time": "0:02:18", "throughput": 2941.37, "total_tokens": 86848}
{"current_steps": 225, "total_steps": 1250, "loss": 3.3145, "lr": 4.905069178965215e-05, "epoch": 1.8, "percentage": 18.0, "elapsed_time": "0:00:30", "remaining_time": "0:02:16", "throughput": 2955.27, "total_tokens": 88736}
{"current_steps": 230, "total_steps": 1250, "loss": 3.4327, "lr": 4.8953072320952745e-05, "epoch": 1.8399999999999999, "percentage": 18.4, "elapsed_time": "0:00:30", "remaining_time": "0:02:15", "throughput": 2970.38, "total_tokens": 90848}
{"current_steps": 235, "total_steps": 1250, "loss": 3.2526, "lr": 4.885078315334395e-05, "epoch": 1.88, "percentage": 18.8, "elapsed_time": "0:00:31", "remaining_time": "0:02:14", "throughput": 2982.41, "total_tokens": 92992}
{"current_steps": 240, "total_steps": 1250, "loss": 2.791, "lr": 4.874384422830167e-05, "epoch": 1.92, "percentage": 19.2, "elapsed_time": "0:00:31", "remaining_time": "0:02:13", "throughput": 2991.26, "total_tokens": 94880}
{"current_steps": 245, "total_steps": 1250, "loss": 2.2217, "lr": 4.863227639378124e-05, "epoch": 1.96, "percentage": 19.6, "elapsed_time": "0:00:32", "remaining_time": "0:02:12", "throughput": 3001.99, "total_tokens": 96704}
{"current_steps": 250, "total_steps": 1250, "loss": 1.6788, "lr": 4.851610140015304e-05, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:00:32", "remaining_time": "0:02:11", "throughput": 2998.8, "total_tokens": 98240}
{"current_steps": 252, "total_steps": 1250, "eval_loss": 1.980229139328003, "epoch": 2.016, "percentage": 20.16, "elapsed_time": "0:00:33", "remaining_time": "0:02:14", "throughput": 2918.17, "total_tokens": 99136}
{"current_steps": 255, "total_steps": 1250, "loss": 2.0365, "lr": 4.839534189596228e-05, "epoch": 2.04, "percentage": 20.4, "elapsed_time": "0:00:35", "remaining_time": "0:02:18", "throughput": 2831.91, "total_tokens": 100224}
{"current_steps": 260, "total_steps": 1250, "loss": 1.4581, "lr": 4.8270021423513554e-05, "epoch": 2.08, "percentage": 20.8, "elapsed_time": "0:00:35", "remaining_time": "0:02:16", "throughput": 2839.98, "total_tokens": 101920}
{"current_steps": 265, "total_steps": 1250, "loss": 1.2941, "lr": 4.8140164414281306e-05, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:00:36", "remaining_time": "0:02:15", "throughput": 2850.81, "total_tokens": 103808}
{"current_steps": 270, "total_steps": 1250, "loss": 1.4016, "lr": 4.800579618414676e-05, "epoch": 2.16, "percentage": 21.6, "elapsed_time": "0:00:36", "remaining_time": "0:02:14", "throughput": 2864.93, "total_tokens": 105920}
{"current_steps": 275, "total_steps": 1250, "loss": 1.5631, "lr": 4.7866942928462625e-05, "epoch": 2.2, "percentage": 22.0, "elapsed_time": "0:00:37", "remaining_time": "0:02:13", "throughput": 2878.32, "total_tokens": 108160}
{"current_steps": 280, "total_steps": 1250, "loss": 0.856, "lr": 4.772363171694622e-05, "epoch": 2.24, "percentage": 22.4, "elapsed_time": "0:00:38", "remaining_time": "0:02:11", "throughput": 2887.32, "total_tokens": 109920}
{"current_steps": 285, "total_steps": 1250, "loss": 1.1216, "lr": 4.7575890488402185e-05, "epoch": 2.2800000000000002, "percentage": 22.8, "elapsed_time": "0:00:38", "remaining_time": "0:02:10", "throughput": 2899.9, "total_tokens": 111904}
{"current_steps": 290, "total_steps": 1250, "loss": 0.776, "lr": 4.742374804527575e-05, "epoch": 2.32, "percentage": 23.2, "elapsed_time": "0:00:39", "remaining_time": "0:02:09", "throughput": 2907.68, "total_tokens": 113632}
{"current_steps": 295, "total_steps": 1250, "loss": 1.0421, "lr": 4.7267234048037664e-05, "epoch": 2.36, "percentage": 23.6, "elapsed_time": "0:00:39", "remaining_time": "0:02:08", "throughput": 2919.24, "total_tokens": 115616}
{"current_steps": 300, "total_steps": 1250, "loss": 0.7994, "lr": 4.710637900940181e-05, "epoch": 2.4, "percentage": 24.0, "elapsed_time": "0:00:40", "remaining_time": "0:02:07", "throughput": 2927.63, "total_tokens": 117472}
{"current_steps": 305, "total_steps": 1250, "loss": 1.0609, "lr": 4.694121428837668e-05, "epoch": 2.44, "percentage": 24.4, "elapsed_time": "0:00:40", "remaining_time": "0:02:06", "throughput": 2940.93, "total_tokens": 119616}
{"current_steps": 310, "total_steps": 1250, "loss": 0.8039, "lr": 4.6771772084151885e-05, "epoch": 2.48, "percentage": 24.8, "elapsed_time": "0:00:41", "remaining_time": "0:02:04", "throughput": 2949.49, "total_tokens": 121568}
{"current_steps": 315, "total_steps": 1250, "loss": 1.0818, "lr": 4.659808542982088e-05, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:00:41", "remaining_time": "0:02:04", "throughput": 2964.72, "total_tokens": 123904}
{"current_steps": 315, "total_steps": 1250, "eval_loss": 0.7967647314071655, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:00:42", "remaining_time": "0:02:06", "throughput": 2905.39, "total_tokens": 123904}
{"current_steps": 320, "total_steps": 1250, "loss": 0.5327, "lr": 4.642018818594107e-05, "epoch": 2.56, "percentage": 25.6, "elapsed_time": "0:00:44", "remaining_time": "0:02:08", "throughput": 2853.1, "total_tokens": 125696}
{"current_steps": 325, "total_steps": 1250, "loss": 0.6088, "lr": 4.6238115033932636e-05, "epoch": 2.6, "percentage": 26.0, "elapsed_time": "0:00:44", "remaining_time": "0:02:06", "throughput": 2861.32, "total_tokens": 127488}
{"current_steps": 330, "total_steps": 1250, "loss": 0.7891, "lr": 4.605190146931731e-05, "epoch": 2.64, "percentage": 26.4, "elapsed_time": "0:00:45", "remaining_time": "0:02:05", "throughput": 2873.96, "total_tokens": 129632}
{"current_steps": 335, "total_steps": 1250, "loss": 0.7432, "lr": 4.586158379479848e-05, "epoch": 2.68, "percentage": 26.8, "elapsed_time": "0:00:45", "remaining_time": "0:02:04", "throughput": 2885.65, "total_tokens": 131680}
{"current_steps": 340, "total_steps": 1250, "loss": 0.5091, "lr": 4.566719911318389e-05, "epoch": 2.7199999999999998, "percentage": 27.2, "elapsed_time": "0:00:46", "remaining_time": "0:02:03", "throughput": 2893.4, "total_tokens": 133472}
{"current_steps": 345, "total_steps": 1250, "loss": 0.5338, "lr": 4.5468785320152365e-05, "epoch": 2.76, "percentage": 27.6, "elapsed_time": "0:00:46", "remaining_time": "0:02:02", "throughput": 2898.31, "total_tokens": 135200}
{"current_steps": 350, "total_steps": 1250, "loss": 0.9588, "lr": 4.5266381096866e-05, "epoch": 2.8, "percentage": 28.0, "elapsed_time": "0:00:47", "remaining_time": "0:02:01", "throughput": 2910.95, "total_tokens": 137536}
{"current_steps": 355, "total_steps": 1250, "loss": 0.7235, "lr": 4.5060025902429174e-05, "epoch": 2.84, "percentage": 28.4, "elapsed_time": "0:00:47", "remaining_time": "0:02:00", "throughput": 2922.42, "total_tokens": 139744}
{"current_steps": 360, "total_steps": 1250, "loss": 0.703, "lr": 4.484975996619589e-05, "epoch": 2.88, "percentage": 28.8, "elapsed_time": "0:00:48", "remaining_time": "0:01:59", "throughput": 2931.34, "total_tokens": 141760}
{"current_steps": 365, "total_steps": 1250, "loss": 0.6324, "lr": 4.4635624279927044e-05, "epoch": 2.92, "percentage": 29.2, "elapsed_time": "0:00:48", "remaining_time": "0:01:58", "throughput": 2941.49, "total_tokens": 143872}
{"current_steps": 370, "total_steps": 1250, "loss": 0.5377, "lr": 4.441766058979898e-05, "epoch": 2.96, "percentage": 29.6, "elapsed_time": "0:00:49", "remaining_time": "0:01:57", "throughput": 2949.34, "total_tokens": 145856}
{"current_steps": 375, "total_steps": 1250, "loss": 0.8394, "lr": 4.4195911388264946e-05, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:00:50", "remaining_time": "0:01:56", "throughput": 2952.08, "total_tokens": 147648}
{"current_steps": 378, "total_steps": 1250, "eval_loss": 0.5601035952568054, "epoch": 3.024, "percentage": 30.24, "elapsed_time": "0:00:51", "remaining_time": "0:01:58", "throughput": 2899.04, "total_tokens": 148736}
{"current_steps": 380, "total_steps": 1250, "loss": 0.4311, "lr": 4.3970419905771145e-05, "epoch": 3.04, "percentage": 30.4, "elapsed_time": "0:00:52", "remaining_time": "0:02:00", "throughput": 2849.13, "total_tokens": 149472}
{"current_steps": 385, "total_steps": 1250, "loss": 0.6238, "lr": 4.374123010232888e-05, "epoch": 3.08, "percentage": 30.8, "elapsed_time": "0:00:53", "remaining_time": "0:01:59", "throughput": 2858.97, "total_tokens": 151552}
{"current_steps": 390, "total_steps": 1250, "loss": 0.5635, "lr": 4.350838665894446e-05, "epoch": 3.12, "percentage": 31.2, "elapsed_time": "0:00:53", "remaining_time": "0:01:58", "throughput": 2867.25, "total_tokens": 153568}
{"current_steps": 395, "total_steps": 1250, "loss": 0.4754, "lr": 4.3271934968908514e-05, "epoch": 3.16, "percentage": 31.6, "elapsed_time": "0:00:54", "remaining_time": "0:01:57", "throughput": 2875.99, "total_tokens": 155616}
{"current_steps": 400, "total_steps": 1250, "loss": 0.6315, "lr": 4.303192112894652e-05, "epoch": 3.2, "percentage": 32.0, "elapsed_time": "0:00:54", "remaining_time": "0:01:56", "throughput": 2884.77, "total_tokens": 157728}
{"current_steps": 405, "total_steps": 1250, "loss": 0.4821, "lr": 4.278839193023214e-05, "epoch": 3.24, "percentage": 32.4, "elapsed_time": "0:00:55", "remaining_time": "0:01:55", "throughput": 2890.84, "total_tokens": 159488}
{"current_steps": 410, "total_steps": 1250, "loss": 0.5577, "lr": 4.254139484926519e-05, "epoch": 3.2800000000000002, "percentage": 32.8, "elapsed_time": "0:00:55", "remaining_time": "0:01:54", "throughput": 2900.49, "total_tokens": 161600}
{"current_steps": 415, "total_steps": 1250, "loss": 0.5065, "lr": 4.2290978038616e-05, "epoch": 3.32, "percentage": 33.2, "elapsed_time": "0:00:56", "remaining_time": "0:01:53", "throughput": 2909.79, "total_tokens": 163712}
{"current_steps": 420, "total_steps": 1250, "loss": 0.4285, "lr": 4.2037190317538e-05, "epoch": 3.36, "percentage": 33.6, "elapsed_time": "0:00:56", "remaining_time": "0:01:52", "throughput": 2916.59, "total_tokens": 165536}
{"current_steps": 425, "total_steps": 1250, "loss": 0.5842, "lr": 4.178008116245024e-05, "epoch": 3.4, "percentage": 34.0, "elapsed_time": "0:00:57", "remaining_time": "0:01:51", "throughput": 2926.64, "total_tokens": 167872}
{"current_steps": 430, "total_steps": 1250, "loss": 0.6883, "lr": 4.1519700697291944e-05, "epoch": 3.44, "percentage": 34.4, "elapsed_time": "0:00:57", "remaining_time": "0:01:50", "throughput": 2936.22, "total_tokens": 170112}
{"current_steps": 435, "total_steps": 1250, "loss": 0.4686, "lr": 4.125609968375072e-05, "epoch": 3.48, "percentage": 34.8, "elapsed_time": "0:00:58", "remaining_time": "0:01:49", "throughput": 2942.28, "total_tokens": 172000}
{"current_steps": 440, "total_steps": 1250, "loss": 0.5184, "lr": 4.098932951136645e-05, "epoch": 3.52, "percentage": 35.2, "elapsed_time": "0:00:59", "remaining_time": "0:01:48", "throughput": 2949.28, "total_tokens": 174016}
{"current_steps": 441, "total_steps": 1250, "eval_loss": 0.4781542420387268, "epoch": 3.528, "percentage": 35.28, "elapsed_time": "0:00:59", "remaining_time": "0:01:50", "throughput": 2908.13, "total_tokens": 174432}
{"current_steps": 445, "total_steps": 1250, "loss": 0.4466, "lr": 4.071944218751282e-05, "epoch": 3.56, "percentage": 35.6, "elapsed_time": "0:01:01", "remaining_time": "0:01:50", "throughput": 2867.46, "total_tokens": 175776}
{"current_steps": 450, "total_steps": 1250, "loss": 0.5084, "lr": 4.044649032725836e-05, "epoch": 3.6, "percentage": 36.0, "elapsed_time": "0:01:01", "remaining_time": "0:01:49", "throughput": 2876.17, "total_tokens": 177952}
{"current_steps": 455, "total_steps": 1250, "loss": 0.3983, "lr": 4.017052714310906e-05, "epoch": 3.64, "percentage": 36.4, "elapsed_time": "0:01:02", "remaining_time": "0:01:49", "throughput": 2882.15, "total_tokens": 179968}
{"current_steps": 460, "total_steps": 1250, "loss": 0.4557, "lr": 3.989160643463445e-05, "epoch": 3.68, "percentage": 36.8, "elapsed_time": "0:01:02", "remaining_time": "0:01:48", "throughput": 2889.31, "total_tokens": 181952}
{"current_steps": 465, "total_steps": 1250, "loss": 0.3364, "lr": 3.960978257797931e-05, "epoch": 3.7199999999999998, "percentage": 37.2, "elapsed_time": "0:01:03", "remaining_time": "0:01:47", "throughput": 2893.06, "total_tokens": 183680}
{"current_steps": 470, "total_steps": 1250, "loss": 0.4238, "lr": 3.932511051526289e-05, "epoch": 3.76, "percentage": 37.6, "elapsed_time": "0:01:04", "remaining_time": "0:01:46", "throughput": 2899.78, "total_tokens": 185632}
{"current_steps": 475, "total_steps": 1250, "loss": 0.46, "lr": 3.903764574386786e-05, "epoch": 3.8, "percentage": 38.0, "elapsed_time": "0:01:04", "remaining_time": "0:01:45", "throughput": 2906.03, "total_tokens": 187552}
{"current_steps": 480, "total_steps": 1250, "loss": 0.4533, "lr": 3.8747444305621e-05, "epoch": 3.84, "percentage": 38.4, "elapsed_time": "0:01:05", "remaining_time": "0:01:44", "throughput": 2911.35, "total_tokens": 189408}
{"current_steps": 485, "total_steps": 1250, "loss": 0.4832, "lr": 3.8454562775867684e-05, "epoch": 3.88, "percentage": 38.8, "elapsed_time": "0:01:05", "remaining_time": "0:01:43", "throughput": 2918.66, "total_tokens": 191488}
{"current_steps": 490, "total_steps": 1250, "loss": 0.4162, "lr": 3.8159058252442446e-05, "epoch": 3.92, "percentage": 39.2, "elapsed_time": "0:01:06", "remaining_time": "0:01:42", "throughput": 2923.48, "total_tokens": 193312}
{"current_steps": 495, "total_steps": 1250, "loss": 0.4683, "lr": 3.786098834453766e-05, "epoch": 3.96, "percentage": 39.6, "elapsed_time": "0:01:06", "remaining_time": "0:01:41", "throughput": 2930.42, "total_tokens": 195424}
{"current_steps": 500, "total_steps": 1250, "loss": 0.3853, "lr": 3.7560411161472456e-05, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:01:07", "remaining_time": "0:01:40", "throughput": 2929.35, "total_tokens": 197024}
{"current_steps": 504, "total_steps": 1250, "eval_loss": 0.46134254336357117, "epoch": 4.032, "percentage": 40.32, "elapsed_time": "0:01:08", "remaining_time": "0:01:41", "throughput": 2893.46, "total_tokens": 198656}
{"current_steps": 505, "total_steps": 1250, "loss": 0.5412, "lr": 3.725738530136422e-05, "epoch": 4.04, "percentage": 40.4, "elapsed_time": "0:01:09", "remaining_time": "0:01:42", "throughput": 2856.82, "total_tokens": 199040}
{"current_steps": 510, "total_steps": 1250, "loss": 0.4867, "lr": 3.695196983970481e-05, "epoch": 4.08, "percentage": 40.8, "elapsed_time": "0:01:10", "remaining_time": "0:01:41", "throughput": 2863.24, "total_tokens": 200960}
{"current_steps": 515, "total_steps": 1250, "loss": 0.3728, "lr": 3.664422431784361e-05, "epoch": 4.12, "percentage": 41.2, "elapsed_time": "0:01:10", "remaining_time": "0:01:40", "throughput": 2870.89, "total_tokens": 203008}
{"current_steps": 520, "total_steps": 1250, "loss": 0.378, "lr": 3.633420873137988e-05, "epoch": 4.16, "percentage": 41.6, "elapsed_time": "0:01:11", "remaining_time": "0:01:39", "throughput": 2874.49, "total_tokens": 204672}
{"current_steps": 525, "total_steps": 1250, "loss": 0.4497, "lr": 3.602198351846647e-05, "epoch": 4.2, "percentage": 42.0, "elapsed_time": "0:01:11", "remaining_time": "0:01:39", "throughput": 2881.08, "total_tokens": 206784}
{"current_steps": 530, "total_steps": 1250, "loss": 0.3772, "lr": 3.570760954802726e-05, "epoch": 4.24, "percentage": 42.4, "elapsed_time": "0:01:12", "remaining_time": "0:01:38", "throughput": 2885.66, "total_tokens": 208672}
{"current_steps": 535, "total_steps": 1250, "loss": 0.4375, "lr": 3.53911481078907e-05, "epoch": 4.28, "percentage": 42.8, "elapsed_time": "0:01:12", "remaining_time": "0:01:37", "throughput": 2892.59, "total_tokens": 210752}
{"current_steps": 540, "total_steps": 1250, "loss": 0.5944, "lr": 3.507266089284157e-05, "epoch": 4.32, "percentage": 43.2, "elapsed_time": "0:01:13", "remaining_time": "0:01:36", "throughput": 2904.21, "total_tokens": 213472}
{"current_steps": 545, "total_steps": 1250, "loss": 0.4546, "lr": 3.475220999259349e-05, "epoch": 4.36, "percentage": 43.6, "elapsed_time": "0:01:14", "remaining_time": "0:01:35", "throughput": 2910.56, "total_tokens": 215616}
{"current_steps": 550, "total_steps": 1250, "loss": 0.4207, "lr": 3.442985787968442e-05, "epoch": 4.4, "percentage": 44.0, "elapsed_time": "0:01:14", "remaining_time": "0:01:34", "throughput": 2917.37, "total_tokens": 217664}
{"current_steps": 555, "total_steps": 1250, "loss": 0.4204, "lr": 3.410566739729746e-05, "epoch": 4.44, "percentage": 44.4, "elapsed_time": "0:01:15", "remaining_time": "0:01:34", "throughput": 2922.68, "total_tokens": 219584}
{"current_steps": 560, "total_steps": 1250, "loss": 0.4381, "lr": 3.3779701747009504e-05, "epoch": 4.48, "percentage": 44.8, "elapsed_time": "0:01:15", "remaining_time": "0:01:33", "throughput": 2927.04, "total_tokens": 221504}
{"current_steps": 565, "total_steps": 1250, "loss": 0.4549, "lr": 3.3452024476469934e-05, "epoch": 4.52, "percentage": 45.2, "elapsed_time": "0:01:16", "remaining_time": "0:01:32", "throughput": 2932.18, "total_tokens": 223424}
{"current_steps": 567, "total_steps": 1250, "eval_loss": 0.43884095549583435, "epoch": 4.536, "percentage": 45.36, "elapsed_time": "0:01:17", "remaining_time": "0:01:33", "throughput": 2900.47, "total_tokens": 224032}
{"current_steps": 570, "total_steps": 1250, "loss": 0.4388, "lr": 3.312269946701191e-05, "epoch": 4.5600000000000005, "percentage": 45.6, "elapsed_time": "0:01:18", "remaining_time": "0:01:33", "throughput": 2870.47, "total_tokens": 225216}
{"current_steps": 575, "total_steps": 1250, "loss": 0.3681, "lr": 3.279179092119855e-05, "epoch": 4.6, "percentage": 46.0, "elapsed_time": "0:01:18", "remaining_time": "0:01:32", "throughput": 2875.01, "total_tokens": 227008}
{"current_steps": 580, "total_steps": 1250, "loss": 0.4424, "lr": 3.245936335030651e-05, "epoch": 4.64, "percentage": 46.4, "elapsed_time": "0:01:19", "remaining_time": "0:01:31", "throughput": 2879.09, "total_tokens": 228736}
{"current_steps": 585, "total_steps": 1250, "loss": 0.3985, "lr": 3.21254815617494e-05, "epoch": 4.68, "percentage": 46.8, "elapsed_time": "0:01:19", "remaining_time": "0:01:30", "throughput": 2881.34, "total_tokens": 230240}
{"current_steps": 590, "total_steps": 1250, "loss": 0.428, "lr": 3.179021064644347e-05, "epoch": 4.72, "percentage": 47.2, "elapsed_time": "0:01:20", "remaining_time": "0:01:29", "throughput": 2886.19, "total_tokens": 232192}
{"current_steps": 595, "total_steps": 1250, "loss": 0.416, "lr": 3.145361596611795e-05, "epoch": 4.76, "percentage": 47.6, "elapsed_time": "0:01:21", "remaining_time": "0:01:29", "throughput": 2892.83, "total_tokens": 234368}
{"current_steps": 600, "total_steps": 1250, "loss": 0.395, "lr": 3.111576314057268e-05, "epoch": 4.8, "percentage": 48.0, "elapsed_time": "0:01:21", "remaining_time": "0:01:28", "throughput": 2896.66, "total_tokens": 236032}
{"current_steps": 605, "total_steps": 1250, "loss": 0.3691, "lr": 3.0776718034885454e-05, "epoch": 4.84, "percentage": 48.4, "elapsed_time": "0:01:22", "remaining_time": "0:01:27", "throughput": 2901.38, "total_tokens": 237920}
{"current_steps": 610, "total_steps": 1250, "loss": 0.3724, "lr": 3.0436546746571372e-05, "epoch": 4.88, "percentage": 48.8, "elapsed_time": "0:01:22", "remaining_time": "0:01:26", "throughput": 2904.62, "total_tokens": 239680}
{"current_steps": 615, "total_steps": 1250, "loss": 0.3814, "lr": 3.0095315592697126e-05, "epoch": 4.92, "percentage": 49.2, "elapsed_time": "0:01:23", "remaining_time": "0:01:25", "throughput": 2908.58, "total_tokens": 241504}
{"current_steps": 620, "total_steps": 1250, "loss": 0.4676, "lr": 2.9753091096952255e-05, "epoch": 4.96, "percentage": 49.6, "elapsed_time": "0:01:23", "remaining_time": "0:01:24", "throughput": 2914.68, "total_tokens": 243584}
{"current_steps": 625, "total_steps": 1250, "loss": 0.4232, "lr": 2.9409939976680313e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:01:24", "remaining_time": "0:01:24", "throughput": 2916.13, "total_tokens": 245472}
{"current_steps": 630, "total_steps": 1250, "loss": 0.4193, "lr": 2.9065929129872094e-05, "epoch": 5.04, "percentage": 50.4, "elapsed_time": "0:01:24", "remaining_time": "0:01:23", "throughput": 2916.28, "total_tokens": 247424}
{"current_steps": 630, "total_steps": 1250, "eval_loss": 0.42154452204704285, "epoch": 5.04, "percentage": 50.4, "elapsed_time": "0:01:25", "remaining_time": "0:01:24", "throughput": 2887.21, "total_tokens": 247424}
{"current_steps": 635, "total_steps": 1250, "loss": 0.3778, "lr": 2.8721125622123806e-05, "epoch": 5.08, "percentage": 50.8, "elapsed_time": "0:01:27", "remaining_time": "0:01:24", "throughput": 2857.35, "total_tokens": 249472}
{"current_steps": 640, "total_steps": 1250, "loss": 0.3189, "lr": 2.8375596673562482e-05, "epoch": 5.12, "percentage": 51.2, "elapsed_time": "0:01:27", "remaining_time": "0:01:23", "throughput": 2861.88, "total_tokens": 251296}
{"current_steps": 645, "total_steps": 1250, "loss": 0.3988, "lr": 2.8029409645741267e-05, "epoch": 5.16, "percentage": 51.6, "elapsed_time": "0:01:28", "remaining_time": "0:01:22", "throughput": 2867.19, "total_tokens": 253344}
{"current_steps": 650, "total_steps": 1250, "loss": 0.3687, "lr": 2.7682632028507167e-05, "epoch": 5.2, "percentage": 52.0, "elapsed_time": "0:01:28", "remaining_time": "0:01:22", "throughput": 2870.84, "total_tokens": 255104}
{"current_steps": 655, "total_steps": 1250, "loss": 0.3907, "lr": 2.733533142684377e-05, "epoch": 5.24, "percentage": 52.4, "elapsed_time": "0:01:29", "remaining_time": "0:01:21", "throughput": 2874.49, "total_tokens": 256832}
{"current_steps": 660, "total_steps": 1250, "loss": 0.4176, "lr": 2.6987575547691497e-05, "epoch": 5.28, "percentage": 52.8, "elapsed_time": "0:01:29", "remaining_time": "0:01:20", "throughput": 2878.8, "total_tokens": 258720}
{"current_steps": 665, "total_steps": 1250, "loss": 0.4127, "lr": 2.6639432186748043e-05, "epoch": 5.32, "percentage": 53.2, "elapsed_time": "0:01:30", "remaining_time": "0:01:19", "throughput": 2883.49, "total_tokens": 260576}
{"current_steps": 670, "total_steps": 1250, "loss": 0.3817, "lr": 2.6290969215251416e-05, "epoch": 5.36, "percentage": 53.6, "elapsed_time": "0:01:30", "remaining_time": "0:01:18", "throughput": 2887.55, "total_tokens": 262368}
{"current_steps": 675, "total_steps": 1250, "loss": 0.3826, "lr": 2.594225456674837e-05, "epoch": 5.4, "percentage": 54.0, "elapsed_time": "0:01:31", "remaining_time": "0:01:17", "throughput": 2892.29, "total_tokens": 264320}
{"current_steps": 680, "total_steps": 1250, "loss": 0.3825, "lr": 2.559335622385055e-05, "epoch": 5.44, "percentage": 54.4, "elapsed_time": "0:01:31", "remaining_time": "0:01:17", "throughput": 2896.74, "total_tokens": 266304}
{"current_steps": 685, "total_steps": 1250, "loss": 0.3944, "lr": 2.524434220498123e-05, "epoch": 5.48, "percentage": 54.8, "elapsed_time": "0:01:32", "remaining_time": "0:01:16", "throughput": 2901.94, "total_tokens": 268384}
{"current_steps": 690, "total_steps": 1250, "loss": 0.3691, "lr": 2.4895280551114907e-05, "epoch": 5.52, "percentage": 55.2, "elapsed_time": "0:01:32", "remaining_time": "0:01:15", "throughput": 2905.49, "total_tokens": 270208}
{"current_steps": 693, "total_steps": 1250, "eval_loss": 0.4073176383972168, "epoch": 5.5440000000000005, "percentage": 55.44, "elapsed_time": "0:01:34", "remaining_time": "0:01:15", "throughput": 2880.79, "total_tokens": 271232}
{"current_steps": 695, "total_steps": 1250, "loss": 0.4443, "lr": 2.4546239312512635e-05, "epoch": 5.5600000000000005, "percentage": 55.6, "elapsed_time": "0:01:35", "remaining_time": "0:01:16", "throughput": 2851.61, "total_tokens": 271840}
{"current_steps": 700, "total_steps": 1250, "loss": 0.502, "lr": 2.4197286535455464e-05, "epoch": 5.6, "percentage": 56.0, "elapsed_time": "0:01:35", "remaining_time": "0:01:15", "throughput": 2856.66, "total_tokens": 273888}
{"current_steps": 705, "total_steps": 1250, "loss": 0.4579, "lr": 2.384849024897869e-05, "epoch": 5.64, "percentage": 56.4, "elapsed_time": "0:01:36", "remaining_time": "0:01:14", "throughput": 2861.57, "total_tokens": 275904}
{"current_steps": 710, "total_steps": 1250, "loss": 0.4459, "lr": 2.349991845160949e-05, "epoch": 5.68, "percentage": 56.8, "elapsed_time": "0:01:36", "remaining_time": "0:01:13", "throughput": 2865.95, "total_tokens": 277888}
{"current_steps": 715, "total_steps": 1250, "loss": 0.4206, "lr": 2.3151639098110377e-05, "epoch": 5.72, "percentage": 57.2, "elapsed_time": "0:01:37", "remaining_time": "0:01:12", "throughput": 2871.43, "total_tokens": 279872}
{"current_steps": 720, "total_steps": 1250, "loss": 0.3705, "lr": 2.280372008623142e-05, "epoch": 5.76, "percentage": 57.6, "elapsed_time": "0:01:37", "remaining_time": "0:01:12", "throughput": 2874.54, "total_tokens": 281664}
{"current_steps": 725, "total_steps": 1250, "loss": 0.3991, "lr": 2.2456229243473345e-05, "epoch": 5.8, "percentage": 58.0, "elapsed_time": "0:01:38", "remaining_time": "0:01:11", "throughput": 2880.15, "total_tokens": 283776}
{"current_steps": 730, "total_steps": 1250, "loss": 0.3788, "lr": 2.2109234313864465e-05, "epoch": 5.84, "percentage": 58.4, "elapsed_time": "0:01:39", "remaining_time": "0:01:10", "throughput": 2883.3, "total_tokens": 285568}
{"current_steps": 735, "total_steps": 1250, "loss": 0.3795, "lr": 2.176280294475383e-05, "epoch": 5.88, "percentage": 58.8, "elapsed_time": "0:01:39", "remaining_time": "0:01:09", "throughput": 2886.9, "total_tokens": 287360}
{"current_steps": 740, "total_steps": 1250, "loss": 0.4557, "lr": 2.1417002673623264e-05, "epoch": 5.92, "percentage": 59.2, "elapsed_time": "0:01:40", "remaining_time": "0:01:09", "throughput": 2892.48, "total_tokens": 289632}
{"current_steps": 745, "total_steps": 1250, "loss": 0.4056, "lr": 2.1071900914920816e-05, "epoch": 5.96, "percentage": 59.6, "elapsed_time": "0:01:40", "remaining_time": "0:01:08", "throughput": 2896.69, "total_tokens": 291552}
{"current_steps": 750, "total_steps": 1250, "loss": 0.4079, "lr": 2.0727564946918087e-05, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:01:41", "remaining_time": "0:01:07", "throughput": 2899.0, "total_tokens": 293616}
{"current_steps": 755, "total_steps": 1250, "loss": 0.3746, "lr": 2.038406189859433e-05, "epoch": 6.04, "percentage": 60.4, "elapsed_time": "0:01:41", "remaining_time": "0:01:06", "throughput": 2899.02, "total_tokens": 295440}
{"current_steps": 756, "total_steps": 1250, "eval_loss": 0.40050727128982544, "epoch": 6.048, "percentage": 60.48, "elapsed_time": "0:01:42", "remaining_time": "0:01:07", "throughput": 2874.92, "total_tokens": 295728}
{"current_steps": 760, "total_steps": 1250, "loss": 0.4006, "lr": 2.004145873654942e-05, "epoch": 6.08, "percentage": 60.8, "elapsed_time": "0:01:44", "remaining_time": "0:01:07", "throughput": 2853.09, "total_tokens": 297360}
{"current_steps": 765, "total_steps": 1250, "loss": 0.3928, "lr": 1.969982225194864e-05, "epoch": 6.12, "percentage": 61.2, "elapsed_time": "0:01:44", "remaining_time": "0:01:06", "throughput": 2856.98, "total_tokens": 299312}
{"current_steps": 770, "total_steps": 1250, "loss": 0.3839, "lr": 1.9359219047501565e-05, "epoch": 6.16, "percentage": 61.6, "elapsed_time": "0:01:45", "remaining_time": "0:01:05", "throughput": 2862.05, "total_tokens": 301488}
{"current_steps": 775, "total_steps": 1250, "loss": 0.4211, "lr": 1.9019715524477767e-05, "epoch": 6.2, "percentage": 62.0, "elapsed_time": "0:01:45", "remaining_time": "0:01:04", "throughput": 2867.46, "total_tokens": 303696}
{"current_steps": 780, "total_steps": 1250, "loss": 0.3858, "lr": 1.868137786976177e-05, "epoch": 6.24, "percentage": 62.4, "elapsed_time": "0:01:46", "remaining_time": "0:01:04", "throughput": 2869.92, "total_tokens": 305360}
{"current_steps": 785, "total_steps": 1250, "loss": 0.3779, "lr": 1.8344272042949724e-05, "epoch": 6.28, "percentage": 62.8, "elapsed_time": "0:01:46", "remaining_time": "0:01:03", "throughput": 2874.81, "total_tokens": 307408}
{"current_steps": 790, "total_steps": 1250, "loss": 0.4222, "lr": 1.800846376349051e-05, "epoch": 6.32, "percentage": 63.2, "elapsed_time": "0:01:47", "remaining_time": "0:01:02", "throughput": 2878.5, "total_tokens": 309232}
{"current_steps": 795, "total_steps": 1250, "loss": 0.3675, "lr": 1.767401849787357e-05, "epoch": 6.36, "percentage": 63.6, "elapsed_time": "0:01:47", "remaining_time": "0:01:01", "throughput": 2882.48, "total_tokens": 311184}
{"current_steps": 800, "total_steps": 1250, "loss": 0.3727, "lr": 1.73410014468661e-05, "epoch": 6.4, "percentage": 64.0, "elapsed_time": "0:01:48", "remaining_time": "0:01:01", "throughput": 2886.06, "total_tokens": 313072}
{"current_steps": 805, "total_steps": 1250, "loss": 0.3823, "lr": 1.7009477532802054e-05, "epoch": 6.44, "percentage": 64.4, "elapsed_time": "0:01:48", "remaining_time": "0:01:00", "throughput": 2889.67, "total_tokens": 314832}
{"current_steps": 810, "total_steps": 1250, "loss": 0.3356, "lr": 1.6679511386925337e-05, "epoch": 6.48, "percentage": 64.8, "elapsed_time": "0:01:49", "remaining_time": "0:00:59", "throughput": 2892.47, "total_tokens": 316560}
{"current_steps": 815, "total_steps": 1250, "loss": 0.427, "lr": 1.635116733678988e-05, "epoch": 6.52, "percentage": 65.2, "elapsed_time": "0:01:50", "remaining_time": "0:00:58", "throughput": 2898.63, "total_tokens": 318960}
{"current_steps": 819, "total_steps": 1250, "eval_loss": 0.40054336190223694, "epoch": 6.552, "percentage": 65.52, "elapsed_time": "0:01:51", "remaining_time": "0:00:58", "throughput": 2878.9, "total_tokens": 320464}
{"current_steps": 820, "total_steps": 1250, "loss": 0.3747, "lr": 1.6024509393718844e-05, "epoch": 6.5600000000000005, "percentage": 65.6, "elapsed_time": "0:01:52", "remaining_time": "0:00:58", "throughput": 2855.53, "total_tokens": 320880}
{"current_steps": 825, "total_steps": 1250, "loss": 0.3936, "lr": 1.5699601240325474e-05, "epoch": 6.6, "percentage": 66.0, "elapsed_time": "0:01:52", "remaining_time": "0:00:58", "throughput": 2860.64, "total_tokens": 323184}
{"current_steps": 830, "total_steps": 1250, "loss": 0.371, "lr": 1.5376506218098015e-05, "epoch": 6.64, "percentage": 66.4, "elapsed_time": "0:01:53", "remaining_time": "0:00:57", "throughput": 2864.22, "total_tokens": 325168}
{"current_steps": 835, "total_steps": 1250, "loss": 0.3795, "lr": 1.505528731505126e-05, "epoch": 6.68, "percentage": 66.8, "elapsed_time": "0:01:54", "remaining_time": "0:00:56", "throughput": 2867.14, "total_tokens": 326992}
{"current_steps": 840, "total_steps": 1250, "loss": 0.4107, "lr": 1.4736007153446801e-05, "epoch": 6.72, "percentage": 67.2, "elapsed_time": "0:01:54", "remaining_time": "0:00:55", "throughput": 2871.75, "total_tokens": 329104}
{"current_steps": 845, "total_steps": 1250, "loss": 0.3653, "lr": 1.4418727977584774e-05, "epoch": 6.76, "percentage": 67.6, "elapsed_time": "0:01:55", "remaining_time": "0:00:55", "throughput": 2875.9, "total_tokens": 331088}
{"current_steps": 850, "total_steps": 1250, "loss": 0.3939, "lr": 1.4103511641669152e-05, "epoch": 6.8, "percentage": 68.0, "elapsed_time": "0:01:55", "remaining_time": "0:00:54", "throughput": 2879.02, "total_tokens": 333008}
{"current_steps": 855, "total_steps": 1250, "loss": 0.3725, "lr": 1.3790419597749199e-05, "epoch": 6.84, "percentage": 68.4, "elapsed_time": "0:01:56", "remaining_time": "0:00:53", "throughput": 2882.81, "total_tokens": 335024}
{"current_steps": 860, "total_steps": 1250, "loss": 0.4179, "lr": 1.3479512883739232e-05, "epoch": 6.88, "percentage": 68.8, "elapsed_time": "0:01:56", "remaining_time": "0:00:52", "throughput": 2886.64, "total_tokens": 337104}
{"current_steps": 865, "total_steps": 1250, "loss": 0.3773, "lr": 1.3170852111519175e-05, "epoch": 6.92, "percentage": 69.2, "elapsed_time": "0:01:57", "remaining_time": "0:00:52", "throughput": 2890.18, "total_tokens": 338960}
{"current_steps": 870, "total_steps": 1250, "loss": 0.3702, "lr": 1.2864497455118152e-05, "epoch": 6.96, "percentage": 69.6, "elapsed_time": "0:01:57", "remaining_time": "0:00:51", "throughput": 2893.01, "total_tokens": 340848}
{"current_steps": 875, "total_steps": 1250, "loss": 0.3594, "lr": 1.2560508638983437e-05, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:01:58", "remaining_time": "0:00:50", "throughput": 2895.45, "total_tokens": 343040}
{"current_steps": 880, "total_steps": 1250, "loss": 0.3347, "lr": 1.2258944926337057e-05, "epoch": 7.04, "percentage": 70.4, "elapsed_time": "0:01:59", "remaining_time": "0:00:50", "throughput": 2896.4, "total_tokens": 345056}
{"current_steps": 882, "total_steps": 1250, "eval_loss": 0.41065576672554016, "epoch": 7.056, "percentage": 70.56, "elapsed_time": "0:02:00", "remaining_time": "0:00:50", "throughput": 2877.26, "total_tokens": 345856}
{"current_steps": 885, "total_steps": 1250, "loss": 0.3597, "lr": 1.1959865107622307e-05, "epoch": 7.08, "percentage": 70.8, "elapsed_time": "0:02:01", "remaining_time": "0:00:50", "throughput": 2857.85, "total_tokens": 347232}
{"current_steps": 890, "total_steps": 1250, "loss": 0.4383, "lr": 1.1663327489042435e-05, "epoch": 7.12, "percentage": 71.2, "elapsed_time": "0:02:02", "remaining_time": "0:00:49", "throughput": 2862.98, "total_tokens": 349504}
{"current_steps": 895, "total_steps": 1250, "loss": 0.4164, "lr": 1.1369389881193749e-05, "epoch": 7.16, "percentage": 71.6, "elapsed_time": "0:02:02", "remaining_time": "0:00:48", "throughput": 2865.97, "total_tokens": 351296}
{"current_steps": 900, "total_steps": 1250, "loss": 0.3858, "lr": 1.107810958779531e-05, "epoch": 7.2, "percentage": 72.0, "elapsed_time": "0:02:03", "remaining_time": "0:00:47", "throughput": 2869.77, "total_tokens": 353248}
{"current_steps": 905, "total_steps": 1250, "loss": 0.4069, "lr": 1.0789543394517435e-05, "epoch": 7.24, "percentage": 72.4, "elapsed_time": "0:02:03", "remaining_time": "0:00:47", "throughput": 2872.99, "total_tokens": 355232}
{"current_steps": 910, "total_steps": 1250, "loss": 0.3591, "lr": 1.050374755791127e-05, "epoch": 7.28, "percentage": 72.8, "elapsed_time": "0:02:04", "remaining_time": "0:00:46", "throughput": 2877.65, "total_tokens": 357376}
{"current_steps": 915, "total_steps": 1250, "loss": 0.3539, "lr": 1.022077779444145e-05, "epoch": 7.32, "percentage": 73.2, "elapsed_time": "0:02:04", "remaining_time": "0:00:45", "throughput": 2881.02, "total_tokens": 359232}
{"current_steps": 920, "total_steps": 1250, "loss": 0.3527, "lr": 9.94068926962404e-06, "epoch": 7.36, "percentage": 73.6, "elapsed_time": "0:02:05", "remaining_time": "0:00:44", "throughput": 2883.96, "total_tokens": 361024}
{"current_steps": 925, "total_steps": 1250, "loss": 0.376, "lr": 9.663536587271902e-06, "epoch": 7.4, "percentage": 74.0, "elapsed_time": "0:02:05", "remaining_time": "0:00:44", "throughput": 2888.59, "total_tokens": 363200}
{"current_steps": 930, "total_steps": 1250, "loss": 0.3517, "lr": 9.389373778849612e-06, "epoch": 7.44, "percentage": 74.4, "elapsed_time": "0:02:06", "remaining_time": "0:00:43", "throughput": 2892.11, "total_tokens": 365152}
{"current_steps": 935, "total_steps": 1250, "loss": 0.4175, "lr": 9.11825429293989e-06, "epoch": 7.48, "percentage": 74.8, "elapsed_time": "0:02:06", "remaining_time": "0:00:42", "throughput": 2896.26, "total_tokens": 367328}
{"current_steps": 940, "total_steps": 1250, "loss": 0.358, "lr": 8.850230984823735e-06, "epoch": 7.52, "percentage": 75.2, "elapsed_time": "0:02:07", "remaining_time": "0:00:41", "throughput": 2899.43, "total_tokens": 369248}
{"current_steps": 945, "total_steps": 1250, "loss": 0.331, "lr": 8.585356106176094e-06, "epoch": 7.5600000000000005, "percentage": 75.6, "elapsed_time": "0:02:07", "remaining_time": "0:00:41", "throughput": 2901.79, "total_tokens": 371040}
{"current_steps": 945, "total_steps": 1250, "eval_loss": 0.4088888466358185, "epoch": 7.5600000000000005, "percentage": 75.6, "elapsed_time": "0:02:08", "remaining_time": "0:00:41", "throughput": 2882.64, "total_tokens": 371040}
{"current_steps": 950, "total_steps": 1250, "loss": 0.4139, "lr": 8.323681294879394e-06, "epoch": 7.6, "percentage": 76.0, "elapsed_time": "0:02:10", "remaining_time": "0:00:41", "throughput": 2865.23, "total_tokens": 372928}
{"current_steps": 955, "total_steps": 1250, "loss": 0.4047, "lr": 8.06525756495657e-06, "epoch": 7.64, "percentage": 76.4, "elapsed_time": "0:02:10", "remaining_time": "0:00:40", "throughput": 2868.19, "total_tokens": 374816}
{"current_steps": 960, "total_steps": 1250, "loss": 0.3748, "lr": 7.810135296625818e-06, "epoch": 7.68, "percentage": 76.8, "elapsed_time": "0:02:11", "remaining_time": "0:00:39", "throughput": 2871.17, "total_tokens": 376704}
{"current_steps": 965, "total_steps": 1250, "loss": 0.3622, "lr": 7.558364226478842e-06, "epoch": 7.72, "percentage": 77.2, "elapsed_time": "0:02:11", "remaining_time": "0:00:38", "throughput": 2874.27, "total_tokens": 378624}
{"current_steps": 970, "total_steps": 1250, "loss": 0.3991, "lr": 7.309993437784624e-06, "epoch": 7.76, "percentage": 77.6, "elapsed_time": "0:02:12", "remaining_time": "0:00:38", "throughput": 2877.49, "total_tokens": 380480}
{"current_steps": 975, "total_steps": 1250, "loss": 0.3731, "lr": 7.065071350920538e-06, "epoch": 7.8, "percentage": 78.0, "elapsed_time": "0:02:12", "remaining_time": "0:00:37", "throughput": 2879.83, "total_tokens": 382144}
{"current_steps": 980, "total_steps": 1250, "loss": 0.4033, "lr": 6.823645713932708e-06, "epoch": 7.84, "percentage": 78.4, "elapsed_time": "0:02:13", "remaining_time": "0:00:36", "throughput": 2883.67, "total_tokens": 384320}
{"current_steps": 985, "total_steps": 1250, "loss": 0.4272, "lr": 6.58576359322742e-06, "epoch": 7.88, "percentage": 78.8, "elapsed_time": "0:02:13", "remaining_time": "0:00:36", "throughput": 2888.62, "total_tokens": 386656}
{"current_steps": 990, "total_steps": 1250, "loss": 0.3612, "lr": 6.3514713643954475e-06, "epoch": 7.92, "percentage": 79.2, "elapsed_time": "0:02:14", "remaining_time": "0:00:35", "throughput": 2891.4, "total_tokens": 388672}
{"current_steps": 995, "total_steps": 1250, "loss": 0.3805, "lr": 6.120814703171024e-06, "epoch": 7.96, "percentage": 79.6, "elapsed_time": "0:02:14", "remaining_time": "0:00:34", "throughput": 2893.76, "total_tokens": 390400}
{"current_steps": 1000, "total_steps": 1250, "loss": 0.3604, "lr": 5.893838576527275e-06, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:02:15", "remaining_time": "0:00:33", "throughput": 2893.9, "total_tokens": 392080}
{"current_steps": 1005, "total_steps": 1250, "loss": 0.4144, "lr": 5.6705872339098186e-06, "epoch": 8.04, "percentage": 80.4, "elapsed_time": "0:02:16", "remaining_time": "0:00:33", "throughput": 2895.11, "total_tokens": 394160}
{"current_steps": 1008, "total_steps": 1250, "eval_loss": 0.38490504026412964, "epoch": 8.064, "percentage": 80.64, "elapsed_time": "0:02:17", "remaining_time": "0:00:32", "throughput": 2878.79, "total_tokens": 395216}
{"current_steps": 1010, "total_steps": 1250, "loss": 0.3568, "lr": 5.451104198610249e-06, "epoch": 8.08, "percentage": 80.8, "elapsed_time": "0:02:18", "remaining_time": "0:00:32", "throughput": 2856.65, "total_tokens": 395888}
{"current_steps": 1015, "total_steps": 1250, "loss": 0.3882, "lr": 5.235432259281175e-06, "epoch": 8.12, "percentage": 81.2, "elapsed_time": "0:02:19", "remaining_time": "0:00:32", "throughput": 2860.7, "total_tokens": 398032}
{"current_steps": 1020, "total_steps": 1250, "loss": 0.3798, "lr": 5.023613461594512e-06, "epoch": 8.16, "percentage": 81.6, "elapsed_time": "0:02:19", "remaining_time": "0:00:31", "throughput": 2863.5, "total_tokens": 399856}
{"current_steps": 1025, "total_steps": 1250, "loss": 0.3718, "lr": 4.8156891000445406e-06, "epoch": 8.2, "percentage": 82.0, "elapsed_time": "0:02:20", "remaining_time": "0:00:30", "throughput": 2865.97, "total_tokens": 401616}
{"current_steps": 1030, "total_steps": 1250, "loss": 0.3458, "lr": 4.6116997098975465e-06, "epoch": 8.24, "percentage": 82.4, "elapsed_time": "0:02:20", "remaining_time": "0:00:30", "throughput": 2868.94, "total_tokens": 403568}
{"current_steps": 1035, "total_steps": 1250, "loss": 0.3725, "lr": 4.411685059289314e-06, "epoch": 8.28, "percentage": 82.8, "elapsed_time": "0:02:21", "remaining_time": "0:00:29", "throughput": 2872.57, "total_tokens": 405712}
{"current_steps": 1040, "total_steps": 1250, "loss": 0.3745, "lr": 4.215684141472292e-06, "epoch": 8.32, "percentage": 83.2, "elapsed_time": "0:02:21", "remaining_time": "0:00:28", "throughput": 2875.79, "total_tokens": 407888}
{"current_steps": 1045, "total_steps": 1250, "loss": 0.3587, "lr": 4.023735167213752e-06, "epoch": 8.36, "percentage": 83.6, "elapsed_time": "0:02:22", "remaining_time": "0:00:27", "throughput": 2878.58, "total_tokens": 409712}
{"current_steps": 1050, "total_steps": 1250, "loss": 0.3633, "lr": 3.835875557346552e-06, "epoch": 8.4, "percentage": 84.0, "elapsed_time": "0:02:22", "remaining_time": "0:00:27", "throughput": 2880.73, "total_tokens": 411504}
{"current_steps": 1055, "total_steps": 1250, "loss": 0.368, "lr": 3.6521419354738738e-06, "epoch": 8.44, "percentage": 84.4, "elapsed_time": "0:02:23", "remaining_time": "0:00:26", "throughput": 2882.93, "total_tokens": 413168}
{"current_steps": 1060, "total_steps": 1250, "loss": 0.3561, "lr": 3.4725701208293435e-06, "epoch": 8.48, "percentage": 84.8, "elapsed_time": "0:02:23", "remaining_time": "0:00:25", "throughput": 2885.43, "total_tokens": 414960}
{"current_steps": 1065, "total_steps": 1250, "loss": 0.3644, "lr": 3.297195121294022e-06, "epoch": 8.52, "percentage": 85.2, "elapsed_time": "0:02:24", "remaining_time": "0:00:25", "throughput": 2887.85, "total_tokens": 416880}
{"current_steps": 1070, "total_steps": 1250, "loss": 0.3779, "lr": 3.126051126571561e-06, "epoch": 8.56, "percentage": 85.6, "elapsed_time": "0:02:24", "remaining_time": "0:00:24", "throughput": 2890.52, "total_tokens": 418768}
{"current_steps": 1071, "total_steps": 1250, "eval_loss": 0.38693496584892273, "epoch": 8.568, "percentage": 85.68, "elapsed_time": "0:02:25", "remaining_time": "0:00:24", "throughput": 2874.15, "total_tokens": 419184}
{"current_steps": 1075, "total_steps": 1250, "loss": 0.4053, "lr": 2.9591715015228284e-06, "epoch": 8.6, "percentage": 86.0, "elapsed_time": "0:02:27", "remaining_time": "0:00:23", "throughput": 2860.68, "total_tokens": 421008}
{"current_steps": 1080, "total_steps": 1250, "loss": 0.3589, "lr": 2.7965887796613884e-06, "epoch": 8.64, "percentage": 86.4, "elapsed_time": "0:02:27", "remaining_time": "0:00:23", "throughput": 2863.16, "total_tokens": 422864}
{"current_steps": 1085, "total_steps": 1250, "loss": 0.3792, "lr": 2.6383346568110062e-06, "epoch": 8.68, "percentage": 86.8, "elapsed_time": "0:02:28", "remaining_time": "0:00:22", "throughput": 2866.23, "total_tokens": 424976}
{"current_steps": 1090, "total_steps": 1250, "loss": 0.3899, "lr": 2.4844399849264928e-06, "epoch": 8.72, "percentage": 87.2, "elapsed_time": "0:02:28", "remaining_time": "0:00:21", "throughput": 2870.07, "total_tokens": 427120}
{"current_steps": 1095, "total_steps": 1250, "loss": 0.4017, "lr": 2.3349347660790582e-06, "epoch": 8.76, "percentage": 87.6, "elapsed_time": "0:02:29", "remaining_time": "0:00:21", "throughput": 2873.52, "total_tokens": 429264}
{"current_steps": 1100, "total_steps": 1250, "loss": 0.3718, "lr": 2.189848146607348e-06, "epoch": 8.8, "percentage": 88.0, "elapsed_time": "0:02:29", "remaining_time": "0:00:20", "throughput": 2875.61, "total_tokens": 431056}
{"current_steps": 1105, "total_steps": 1250, "loss": 0.3644, "lr": 2.0492084114352965e-06, "epoch": 8.84, "percentage": 88.4, "elapsed_time": "0:02:30", "remaining_time": "0:00:19", "throughput": 2878.92, "total_tokens": 433136}
{"current_steps": 1110, "total_steps": 1250, "loss": 0.3859, "lr": 1.913042978557944e-06, "epoch": 8.88, "percentage": 88.8, "elapsed_time": "0:02:31", "remaining_time": "0:00:19", "throughput": 2882.19, "total_tokens": 435216}
{"current_steps": 1115, "total_steps": 1250, "loss": 0.3521, "lr": 1.7813783936962258e-06, "epoch": 8.92, "percentage": 89.2, "elapsed_time": "0:02:31", "remaining_time": "0:00:18", "throughput": 2884.77, "total_tokens": 437040}
{"current_steps": 1120, "total_steps": 1250, "loss": 0.3873, "lr": 1.654240325121831e-06, "epoch": 8.96, "percentage": 89.6, "elapsed_time": "0:02:32", "remaining_time": "0:00:17", "throughput": 2888.0, "total_tokens": 439088}
{"current_steps": 1125, "total_steps": 1250, "loss": 0.4015, "lr": 1.5316535586531483e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:32", "remaining_time": "0:00:16", "throughput": 2888.56, "total_tokens": 440848}
{"current_steps": 1130, "total_steps": 1250, "loss": 0.3714, "lr": 1.4136419928231892e-06, "epoch": 9.04, "percentage": 90.4, "elapsed_time": "0:02:33", "remaining_time": "0:00:16", "throughput": 2889.1, "total_tokens": 442864}
{"current_steps": 1134, "total_steps": 1250, "eval_loss": 0.38989585638046265, "epoch": 9.072, "percentage": 90.72, "elapsed_time": "0:02:34", "remaining_time": "0:00:15", "throughput": 2875.94, "total_tokens": 444560}
{"current_steps": 1135, "total_steps": 1250, "loss": 0.3985, "lr": 1.3002286342205462e-06, "epoch": 9.08, "percentage": 90.8, "elapsed_time": "0:02:35", "remaining_time": "0:00:15", "throughput": 2860.11, "total_tokens": 445040}
{"current_steps": 1140, "total_steps": 1250, "loss": 0.3583, "lr": 1.1914355930041837e-06, "epoch": 9.12, "percentage": 91.2, "elapsed_time": "0:02:36", "remaining_time": "0:00:15", "throughput": 2861.84, "total_tokens": 446864}
{"current_steps": 1145, "total_steps": 1250, "loss": 0.3824, "lr": 1.087284078593051e-06, "epoch": 9.16, "percentage": 91.6, "elapsed_time": "0:02:36", "remaining_time": "0:00:14", "throughput": 2865.53, "total_tokens": 449008}
{"current_steps": 1150, "total_steps": 1250, "loss": 0.3471, "lr": 9.877943955312552e-07, "epoch": 9.2, "percentage": 92.0, "elapsed_time": "0:02:37", "remaining_time": "0:00:13", "throughput": 2867.57, "total_tokens": 450832}
{"current_steps": 1155, "total_steps": 1250, "loss": 0.3737, "lr": 8.929859395296364e-07, "epoch": 9.24, "percentage": 92.4, "elapsed_time": "0:02:37", "remaining_time": "0:00:12", "throughput": 2870.51, "total_tokens": 452784}
{"current_steps": 1160, "total_steps": 1250, "loss": 0.3719, "lr": 8.028771936845342e-07, "epoch": 9.28, "percentage": 92.8, "elapsed_time": "0:02:38", "remaining_time": "0:00:12", "throughput": 2873.7, "total_tokens": 454992}
{"current_steps": 1165, "total_steps": 1250, "loss": 0.3571, "lr": 7.174857248745004e-07, "epoch": 9.32, "percentage": 93.2, "elapsed_time": "0:02:38", "remaining_time": "0:00:11", "throughput": 2874.98, "total_tokens": 456528}
{"current_steps": 1170, "total_steps": 1250, "loss": 0.3921, "lr": 6.368281803355691e-07, "epoch": 9.36, "percentage": 93.6, "elapsed_time": "0:02:39", "remaining_time": "0:00:10", "throughput": 2878.32, "total_tokens": 458768}
{"current_steps": 1175, "total_steps": 1250, "loss": 0.3935, "lr": 5.609202844158723e-07, "epoch": 9.4, "percentage": 94.0, "elapsed_time": "0:02:39", "remaining_time": "0:00:10", "throughput": 2881.16, "total_tokens": 460816}
{"current_steps": 1180, "total_steps": 1250, "loss": 0.3657, "lr": 4.897768355101084e-07, "epoch": 9.44, "percentage": 94.4, "elapsed_time": "0:02:40", "remaining_time": "0:00:09", "throughput": 2882.63, "total_tokens": 462384}
{"current_steps": 1185, "total_steps": 1250, "loss": 0.3923, "lr": 4.234117031746143e-07, "epoch": 9.48, "percentage": 94.8, "elapsed_time": "0:02:40", "remaining_time": "0:00:08", "throughput": 2885.15, "total_tokens": 464304}
{"current_steps": 1190, "total_steps": 1250, "loss": 0.3341, "lr": 3.6183782542343057e-07, "epoch": 9.52, "percentage": 95.2, "elapsed_time": "0:02:41", "remaining_time": "0:00:08", "throughput": 2888.15, "total_tokens": 466384}
{"current_steps": 1195, "total_steps": 1250, "loss": 0.3858, "lr": 3.050672062060278e-07, "epoch": 9.56, "percentage": 95.6, "elapsed_time": "0:02:42", "remaining_time": "0:00:07", "throughput": 2890.93, "total_tokens": 468368}
{"current_steps": 1197, "total_steps": 1250, "eval_loss": 0.3861676752567291, "epoch": 9.576, "percentage": 95.76, "elapsed_time": "0:02:43", "remaining_time": "0:00:07", "throughput": 2876.62, "total_tokens": 469104}
{"current_steps": 1200, "total_steps": 1250, "loss": 0.3761, "lr": 2.531109130671061e-07, "epoch": 9.6, "percentage": 96.0, "elapsed_time": "0:02:44", "remaining_time": "0:00:06", "throughput": 2862.31, "total_tokens": 470192}
{"current_steps": 1205, "total_steps": 1250, "loss": 0.3878, "lr": 2.0597907498896007e-07, "epoch": 9.64, "percentage": 96.4, "elapsed_time": "0:02:44", "remaining_time": "0:00:06", "throughput": 2865.12, "total_tokens": 472240}
{"current_steps": 1210, "total_steps": 1250, "loss": 0.3748, "lr": 1.6368088041681108e-07, "epoch": 9.68, "percentage": 96.8, "elapsed_time": "0:02:45", "remaining_time": "0:00:05", "throughput": 2867.56, "total_tokens": 474160}
{"current_steps": 1215, "total_steps": 1250, "loss": 0.4067, "lr": 1.2622457546749567e-07, "epoch": 9.72, "percentage": 97.2, "elapsed_time": "0:02:45", "remaining_time": "0:00:04", "throughput": 2872.32, "total_tokens": 476784}
{"current_steps": 1220, "total_steps": 1250, "loss": 0.3679, "lr": 9.361746232188495e-08, "epoch": 9.76, "percentage": 97.6, "elapsed_time": "0:02:46", "remaining_time": "0:00:04", "throughput": 2875.26, "total_tokens": 478864}
{"current_steps": 1225, "total_steps": 1250, "loss": 0.3603, "lr": 6.586589780128716e-08, "epoch": 9.8, "percentage": 98.0, "elapsed_time": "0:02:47", "remaining_time": "0:00:03", "throughput": 2877.86, "total_tokens": 480816}
{"current_steps": 1230, "total_steps": 1250, "loss": 0.384, "lr": 4.2975292128200064e-08, "epoch": 9.84, "percentage": 98.4, "elapsed_time": "0:02:47", "remaining_time": "0:00:02", "throughput": 2880.62, "total_tokens": 482832}
{"current_steps": 1235, "total_steps": 1250, "loss": 0.3827, "lr": 2.4950107871549167e-08, "epoch": 9.88, "percentage": 98.8, "elapsed_time": "0:02:48", "remaining_time": "0:00:02", "throughput": 2883.55, "total_tokens": 484912}
{"current_steps": 1240, "total_steps": 1250, "loss": 0.3863, "lr": 1.179385907672248e-08, "epoch": 9.92, "percentage": 99.2, "elapsed_time": "0:02:48", "remaining_time": "0:00:01", "throughput": 2885.39, "total_tokens": 486640}
{"current_steps": 1245, "total_steps": 1250, "loss": 0.3537, "lr": 3.5091105804907487e-09, "epoch": 9.96, "percentage": 99.6, "elapsed_time": "0:02:49", "remaining_time": "0:00:00", "throughput": 2887.09, "total_tokens": 488336}
{"current_steps": 1250, "total_steps": 1250, "loss": 0.3453, "lr": 9.747751098521107e-11, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:02:49", "remaining_time": "0:00:00", "throughput": 2887.13, "total_tokens": 490000}
{"current_steps": 1250, "total_steps": 1250, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:02:50", "remaining_time": "0:00:00", "throughput": 2871.92, "total_tokens": 490000}