train_wsc_1754652156 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1250
991f18a verified
{"current_steps": 5, "total_steps": 1250, "loss": 10.265, "lr": 1.6000000000000001e-06, "epoch": 0.04, "percentage": 0.4, "elapsed_time": "0:00:01", "remaining_time": "0:06:49", "throughput": 1304.06, "total_tokens": 2144}
{"current_steps": 10, "total_steps": 1250, "loss": 6.1163, "lr": 3.6e-06, "epoch": 0.08, "percentage": 0.8, "elapsed_time": "0:00:02", "remaining_time": "0:05:23", "throughput": 1583.99, "total_tokens": 4128}
{"current_steps": 15, "total_steps": 1250, "loss": 2.9543, "lr": 5.600000000000001e-06, "epoch": 0.12, "percentage": 1.2, "elapsed_time": "0:00:03", "remaining_time": "0:04:55", "throughput": 1740.74, "total_tokens": 6240}
{"current_steps": 20, "total_steps": 1250, "loss": 1.7128, "lr": 7.6e-06, "epoch": 0.16, "percentage": 1.6, "elapsed_time": "0:00:04", "remaining_time": "0:04:38", "throughput": 1785.79, "total_tokens": 8096}
{"current_steps": 25, "total_steps": 1250, "loss": 0.6984, "lr": 9.600000000000001e-06, "epoch": 0.2, "percentage": 2.0, "elapsed_time": "0:00:05", "remaining_time": "0:04:28", "throughput": 1842.15, "total_tokens": 10112}
{"current_steps": 30, "total_steps": 1250, "loss": 0.4829, "lr": 1.16e-05, "epoch": 0.24, "percentage": 2.4, "elapsed_time": "0:00:06", "remaining_time": "0:04:22", "throughput": 1860.8, "total_tokens": 12032}
{"current_steps": 35, "total_steps": 1250, "loss": 0.4149, "lr": 1.3600000000000002e-05, "epoch": 0.28, "percentage": 2.8, "elapsed_time": "0:00:07", "remaining_time": "0:04:16", "throughput": 1871.52, "total_tokens": 13824}
{"current_steps": 40, "total_steps": 1250, "loss": 0.5009, "lr": 1.56e-05, "epoch": 0.32, "percentage": 3.2, "elapsed_time": "0:00:08", "remaining_time": "0:04:13", "throughput": 1892.79, "total_tokens": 15840}
{"current_steps": 45, "total_steps": 1250, "loss": 0.477, "lr": 1.76e-05, "epoch": 0.36, "percentage": 3.6, "elapsed_time": "0:00:09", "remaining_time": "0:04:10", "throughput": 1914.81, "total_tokens": 17920}
{"current_steps": 50, "total_steps": 1250, "loss": 0.3261, "lr": 1.9600000000000002e-05, "epoch": 0.4, "percentage": 4.0, "elapsed_time": "0:00:10", "remaining_time": "0:04:07", "throughput": 1912.82, "total_tokens": 19712}
{"current_steps": 55, "total_steps": 1250, "loss": 0.7469, "lr": 2.16e-05, "epoch": 0.44, "percentage": 4.4, "elapsed_time": "0:00:11", "remaining_time": "0:04:05", "throughput": 1939.6, "total_tokens": 21952}
{"current_steps": 60, "total_steps": 1250, "loss": 2.4086, "lr": 2.36e-05, "epoch": 0.48, "percentage": 4.8, "elapsed_time": "0:00:12", "remaining_time": "0:04:04", "throughput": 1962.11, "total_tokens": 24160}
{"current_steps": 63, "total_steps": 1250, "eval_loss": 0.3987593650817871, "epoch": 0.504, "percentage": 5.04, "elapsed_time": "0:00:14", "remaining_time": "0:04:29", "throughput": 1784.21, "total_tokens": 25504}
{"current_steps": 65, "total_steps": 1250, "loss": 0.9856, "lr": 2.5600000000000002e-05, "epoch": 0.52, "percentage": 5.2, "elapsed_time": "0:00:16", "remaining_time": "0:04:55", "throughput": 1611.6, "total_tokens": 26112}
{"current_steps": 70, "total_steps": 1250, "loss": 1.5292, "lr": 2.7600000000000003e-05, "epoch": 0.56, "percentage": 5.6, "elapsed_time": "0:00:17", "remaining_time": "0:04:48", "throughput": 1638.23, "total_tokens": 28064}
{"current_steps": 75, "total_steps": 1250, "loss": 9.9799, "lr": 2.96e-05, "epoch": 0.6, "percentage": 6.0, "elapsed_time": "0:00:18", "remaining_time": "0:04:42", "throughput": 1654.56, "total_tokens": 29824}
{"current_steps": 80, "total_steps": 1250, "loss": 6.4338, "lr": 3.16e-05, "epoch": 0.64, "percentage": 6.4, "elapsed_time": "0:00:19", "remaining_time": "0:04:38", "throughput": 1677.52, "total_tokens": 31904}
{"current_steps": 85, "total_steps": 1250, "loss": 3.3911, "lr": 3.3600000000000004e-05, "epoch": 0.68, "percentage": 6.8, "elapsed_time": "0:00:19", "remaining_time": "0:04:33", "throughput": 1700.33, "total_tokens": 33984}
{"current_steps": 90, "total_steps": 1250, "loss": 1.4546, "lr": 3.56e-05, "epoch": 0.72, "percentage": 7.2, "elapsed_time": "0:00:20", "remaining_time": "0:04:29", "throughput": 1711.15, "total_tokens": 35776}
{"current_steps": 95, "total_steps": 1250, "loss": 1.0812, "lr": 3.76e-05, "epoch": 0.76, "percentage": 7.6, "elapsed_time": "0:00:21", "remaining_time": "0:04:24", "throughput": 1719.42, "total_tokens": 37472}
{"current_steps": 100, "total_steps": 1250, "loss": 0.8397, "lr": 3.960000000000001e-05, "epoch": 0.8, "percentage": 8.0, "elapsed_time": "0:00:22", "remaining_time": "0:04:21", "throughput": 1729.22, "total_tokens": 39328}
{"current_steps": 105, "total_steps": 1250, "loss": 0.6836, "lr": 4.16e-05, "epoch": 0.84, "percentage": 8.4, "elapsed_time": "0:00:23", "remaining_time": "0:04:18", "throughput": 1742.17, "total_tokens": 41280}
{"current_steps": 110, "total_steps": 1250, "loss": 0.4958, "lr": 4.36e-05, "epoch": 0.88, "percentage": 8.8, "elapsed_time": "0:00:24", "remaining_time": "0:04:16", "throughput": 1761.96, "total_tokens": 43552}
{"current_steps": 115, "total_steps": 1250, "loss": 0.4681, "lr": 4.5600000000000004e-05, "epoch": 0.92, "percentage": 9.2, "elapsed_time": "0:00:25", "remaining_time": "0:04:12", "throughput": 1766.23, "total_tokens": 45216}
{"current_steps": 120, "total_steps": 1250, "loss": 0.3213, "lr": 4.76e-05, "epoch": 0.96, "percentage": 9.6, "elapsed_time": "0:00:26", "remaining_time": "0:04:10", "throughput": 1781.41, "total_tokens": 47360}
{"current_steps": 125, "total_steps": 1250, "loss": 0.5346, "lr": 4.96e-05, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:00:27", "remaining_time": "0:04:08", "throughput": 1791.59, "total_tokens": 49376}
{"current_steps": 126, "total_steps": 1250, "eval_loss": 0.5207234621047974, "epoch": 1.008, "percentage": 10.08, "elapsed_time": "0:00:29", "remaining_time": "0:04:20", "throughput": 1700.11, "total_tokens": 49696}
{"current_steps": 130, "total_steps": 1250, "loss": 0.4081, "lr": 4.9998440375027166e-05, "epoch": 1.04, "percentage": 10.4, "elapsed_time": "0:00:32", "remaining_time": "0:04:39", "throughput": 1579.64, "total_tokens": 51200}
{"current_steps": 135, "total_steps": 1250, "loss": 1.1051, "lr": 4.99921047320825e-05, "epoch": 1.08, "percentage": 10.8, "elapsed_time": "0:00:33", "remaining_time": "0:04:35", "throughput": 1594.8, "total_tokens": 53216}
{"current_steps": 140, "total_steps": 1250, "loss": 0.3755, "lr": 4.998089682880117e-05, "epoch": 1.12, "percentage": 11.2, "elapsed_time": "0:00:34", "remaining_time": "0:04:32", "throughput": 1607.57, "total_tokens": 55168}
{"current_steps": 145, "total_steps": 1250, "loss": 0.4421, "lr": 4.9964818850186135e-05, "epoch": 1.16, "percentage": 11.6, "elapsed_time": "0:00:35", "remaining_time": "0:04:28", "throughput": 1616.53, "total_tokens": 56960}
{"current_steps": 150, "total_steps": 1250, "loss": 0.5853, "lr": 4.994387393067117e-05, "epoch": 1.2, "percentage": 12.0, "elapsed_time": "0:00:36", "remaining_time": "0:04:25", "throughput": 1628.24, "total_tokens": 58880}
{"current_steps": 155, "total_steps": 1250, "loss": 0.3914, "lr": 4.9918066153509834e-05, "epoch": 1.24, "percentage": 12.4, "elapsed_time": "0:00:37", "remaining_time": "0:04:21", "throughput": 1636.36, "total_tokens": 60672}
{"current_steps": 160, "total_steps": 1250, "loss": 0.4006, "lr": 4.988740054997943e-05, "epoch": 1.28, "percentage": 12.8, "elapsed_time": "0:00:38", "remaining_time": "0:04:19", "throughput": 1650.91, "total_tokens": 62848}
{"current_steps": 165, "total_steps": 1250, "loss": 0.3925, "lr": 4.985188309840012e-05, "epoch": 1.32, "percentage": 13.2, "elapsed_time": "0:00:38", "remaining_time": "0:04:16", "throughput": 1654.69, "total_tokens": 64448}
{"current_steps": 170, "total_steps": 1250, "loss": 0.3813, "lr": 4.9811520722969465e-05, "epoch": 1.3599999999999999, "percentage": 13.6, "elapsed_time": "0:00:39", "remaining_time": "0:04:13", "throughput": 1663.05, "total_tokens": 66368}
{"current_steps": 175, "total_steps": 1250, "loss": 0.3874, "lr": 4.976632129241252e-05, "epoch": 1.4, "percentage": 14.0, "elapsed_time": "0:00:40", "remaining_time": "0:04:10", "throughput": 1668.88, "total_tokens": 68128}
{"current_steps": 180, "total_steps": 1250, "loss": 0.3944, "lr": 4.971629361844785e-05, "epoch": 1.44, "percentage": 14.4, "elapsed_time": "0:00:41", "remaining_time": "0:04:08", "throughput": 1678.23, "total_tokens": 70112}
{"current_steps": 185, "total_steps": 1250, "loss": 0.3725, "lr": 4.966144745406961e-05, "epoch": 1.48, "percentage": 14.8, "elapsed_time": "0:00:42", "remaining_time": "0:04:06", "throughput": 1692.33, "total_tokens": 72384}
{"current_steps": 189, "total_steps": 1250, "eval_loss": 0.4399993121623993, "epoch": 1.512, "percentage": 15.12, "elapsed_time": "0:00:44", "remaining_time": "0:04:12", "throughput": 1649.23, "total_tokens": 74112}
{"current_steps": 190, "total_steps": 1250, "loss": 0.5141, "lr": 4.960179349164621e-05, "epoch": 1.52, "percentage": 15.2, "elapsed_time": "0:00:46", "remaining_time": "0:04:20", "throughput": 1602.38, "total_tokens": 74752}
{"current_steps": 195, "total_steps": 1250, "loss": 0.3729, "lr": 4.953734336083583e-05, "epoch": 1.56, "percentage": 15.6, "elapsed_time": "0:00:47", "remaining_time": "0:04:17", "throughput": 1610.04, "total_tokens": 76640}
{"current_steps": 200, "total_steps": 1250, "loss": 0.4037, "lr": 4.946810962631916e-05, "epoch": 1.6, "percentage": 16.0, "elapsed_time": "0:00:48", "remaining_time": "0:04:15", "throughput": 1619.98, "total_tokens": 78784}
{"current_steps": 205, "total_steps": 1250, "loss": 0.3983, "lr": 4.9394105785349944e-05, "epoch": 1.6400000000000001, "percentage": 16.4, "elapsed_time": "0:00:49", "remaining_time": "0:04:12", "throughput": 1628.73, "total_tokens": 80768}
{"current_steps": 210, "total_steps": 1250, "loss": 0.3622, "lr": 4.9315346265123594e-05, "epoch": 1.6800000000000002, "percentage": 16.8, "elapsed_time": "0:00:50", "remaining_time": "0:04:10", "throughput": 1639.01, "total_tokens": 82848}
{"current_steps": 215, "total_steps": 1250, "loss": 0.3583, "lr": 4.923184641996463e-05, "epoch": 1.72, "percentage": 17.2, "elapsed_time": "0:00:51", "remaining_time": "0:04:07", "throughput": 1645.72, "total_tokens": 84768}
{"current_steps": 220, "total_steps": 1250, "loss": 0.3574, "lr": 4.914362252833332e-05, "epoch": 1.76, "percentage": 17.6, "elapsed_time": "0:00:52", "remaining_time": "0:04:05", "throughput": 1654.43, "total_tokens": 86848}
{"current_steps": 225, "total_steps": 1250, "loss": 0.3539, "lr": 4.905069178965215e-05, "epoch": 1.8, "percentage": 18.0, "elapsed_time": "0:00:53", "remaining_time": "0:04:03", "throughput": 1659.56, "total_tokens": 88736}
{"current_steps": 230, "total_steps": 1250, "loss": 0.3823, "lr": 4.8953072320952745e-05, "epoch": 1.8399999999999999, "percentage": 18.4, "elapsed_time": "0:00:54", "remaining_time": "0:04:01", "throughput": 1668.12, "total_tokens": 90848}
{"current_steps": 235, "total_steps": 1250, "loss": 0.3564, "lr": 4.885078315334395e-05, "epoch": 1.88, "percentage": 18.8, "elapsed_time": "0:00:55", "remaining_time": "0:03:59", "throughput": 1676.11, "total_tokens": 92992}
{"current_steps": 240, "total_steps": 1250, "loss": 0.3827, "lr": 4.874384422830167e-05, "epoch": 1.92, "percentage": 19.2, "elapsed_time": "0:00:56", "remaining_time": "0:03:57", "throughput": 1681.29, "total_tokens": 94880}
{"current_steps": 245, "total_steps": 1250, "loss": 0.3443, "lr": 4.863227639378124e-05, "epoch": 1.96, "percentage": 19.6, "elapsed_time": "0:00:57", "remaining_time": "0:03:55", "throughput": 1686.11, "total_tokens": 96704}
{"current_steps": 250, "total_steps": 1250, "loss": 0.343, "lr": 4.851610140015304e-05, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:00:58", "remaining_time": "0:03:52", "throughput": 1686.61, "total_tokens": 98240}
{"current_steps": 252, "total_steps": 1250, "eval_loss": 0.3816390037536621, "epoch": 2.016, "percentage": 20.16, "elapsed_time": "0:01:00", "remaining_time": "0:03:58", "throughput": 1648.16, "total_tokens": 99136}
{"current_steps": 255, "total_steps": 1250, "loss": 0.3878, "lr": 4.839534189596228e-05, "epoch": 2.04, "percentage": 20.4, "elapsed_time": "0:01:02", "remaining_time": "0:04:02", "throughput": 1613.11, "total_tokens": 100224}
{"current_steps": 260, "total_steps": 1250, "loss": 0.3688, "lr": 4.8270021423513554e-05, "epoch": 2.08, "percentage": 20.8, "elapsed_time": "0:01:03", "remaining_time": "0:03:59", "throughput": 1617.35, "total_tokens": 101920}
{"current_steps": 265, "total_steps": 1250, "loss": 0.3242, "lr": 4.8140164414281306e-05, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:01:03", "remaining_time": "0:03:57", "throughput": 1623.52, "total_tokens": 103808}
{"current_steps": 270, "total_steps": 1250, "loss": 0.357, "lr": 4.800579618414676e-05, "epoch": 2.16, "percentage": 21.6, "elapsed_time": "0:01:04", "remaining_time": "0:03:55", "throughput": 1631.32, "total_tokens": 105920}
{"current_steps": 275, "total_steps": 1250, "loss": 0.4048, "lr": 4.7866942928462625e-05, "epoch": 2.2, "percentage": 22.0, "elapsed_time": "0:01:05", "remaining_time": "0:03:53", "throughput": 1639.65, "total_tokens": 108160}
{"current_steps": 280, "total_steps": 1250, "loss": 0.3777, "lr": 4.772363171694622e-05, "epoch": 2.24, "percentage": 22.4, "elapsed_time": "0:01:06", "remaining_time": "0:03:51", "throughput": 1643.47, "total_tokens": 109920}
{"current_steps": 285, "total_steps": 1250, "loss": 0.3481, "lr": 4.7575890488402185e-05, "epoch": 2.2800000000000002, "percentage": 22.8, "elapsed_time": "0:01:07", "remaining_time": "0:03:49", "throughput": 1649.52, "total_tokens": 111904}
{"current_steps": 290, "total_steps": 1250, "loss": 0.3395, "lr": 4.742374804527575e-05, "epoch": 2.32, "percentage": 23.2, "elapsed_time": "0:01:08", "remaining_time": "0:03:47", "throughput": 1653.25, "total_tokens": 113632}
{"current_steps": 295, "total_steps": 1250, "loss": 0.4056, "lr": 4.7267234048037664e-05, "epoch": 2.36, "percentage": 23.6, "elapsed_time": "0:01:09", "remaining_time": "0:03:45", "throughput": 1658.33, "total_tokens": 115616}
{"current_steps": 300, "total_steps": 1250, "loss": 0.3559, "lr": 4.710637900940181e-05, "epoch": 2.4, "percentage": 24.0, "elapsed_time": "0:01:10", "remaining_time": "0:03:43", "throughput": 1663.0, "total_tokens": 117472}
{"current_steps": 305, "total_steps": 1250, "loss": 0.3324, "lr": 4.694121428837668e-05, "epoch": 2.44, "percentage": 24.4, "elapsed_time": "0:01:11", "remaining_time": "0:03:41", "throughput": 1670.62, "total_tokens": 119616}
{"current_steps": 310, "total_steps": 1250, "loss": 0.3677, "lr": 4.6771772084151885e-05, "epoch": 2.48, "percentage": 24.8, "elapsed_time": "0:01:12", "remaining_time": "0:03:39", "throughput": 1675.58, "total_tokens": 121568}
{"current_steps": 315, "total_steps": 1250, "loss": 0.3615, "lr": 4.659808542982088e-05, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:01:13", "remaining_time": "0:03:38", "throughput": 1683.04, "total_tokens": 123904}
{"current_steps": 315, "total_steps": 1250, "eval_loss": 0.3500639498233795, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:01:14", "remaining_time": "0:03:42", "throughput": 1652.17, "total_tokens": 123904}
{"current_steps": 320, "total_steps": 1250, "loss": 0.3502, "lr": 4.642018818594107e-05, "epoch": 2.56, "percentage": 25.6, "elapsed_time": "0:01:17", "remaining_time": "0:03:45", "throughput": 1622.81, "total_tokens": 125696}
{"current_steps": 325, "total_steps": 1250, "loss": 0.3558, "lr": 4.6238115033932636e-05, "epoch": 2.6, "percentage": 26.0, "elapsed_time": "0:01:18", "remaining_time": "0:03:43", "throughput": 1626.77, "total_tokens": 127488}
{"current_steps": 330, "total_steps": 1250, "loss": 0.4027, "lr": 4.605190146931731e-05, "epoch": 2.64, "percentage": 26.4, "elapsed_time": "0:01:19", "remaining_time": "0:03:41", "throughput": 1632.96, "total_tokens": 129632}
{"current_steps": 335, "total_steps": 1250, "loss": 0.3702, "lr": 4.586158379479848e-05, "epoch": 2.68, "percentage": 26.8, "elapsed_time": "0:01:20", "remaining_time": "0:03:39", "throughput": 1639.0, "total_tokens": 131680}
{"current_steps": 340, "total_steps": 1250, "loss": 0.3521, "lr": 4.566719911318389e-05, "epoch": 2.7199999999999998, "percentage": 27.2, "elapsed_time": "0:01:21", "remaining_time": "0:03:37", "throughput": 1642.62, "total_tokens": 133472}
{"current_steps": 345, "total_steps": 1250, "loss": 0.3733, "lr": 4.5468785320152365e-05, "epoch": 2.76, "percentage": 27.6, "elapsed_time": "0:01:22", "remaining_time": "0:03:35", "throughput": 1645.38, "total_tokens": 135200}
{"current_steps": 350, "total_steps": 1250, "loss": 0.3613, "lr": 4.5266381096866e-05, "epoch": 2.8, "percentage": 28.0, "elapsed_time": "0:01:23", "remaining_time": "0:03:33", "throughput": 1653.15, "total_tokens": 137536}
{"current_steps": 355, "total_steps": 1250, "loss": 0.4393, "lr": 4.5060025902429174e-05, "epoch": 2.84, "percentage": 28.4, "elapsed_time": "0:01:24", "remaining_time": "0:03:32", "throughput": 1659.91, "total_tokens": 139744}
{"current_steps": 360, "total_steps": 1250, "loss": 0.3412, "lr": 4.484975996619589e-05, "epoch": 2.88, "percentage": 28.8, "elapsed_time": "0:01:25", "remaining_time": "0:03:30", "throughput": 1664.49, "total_tokens": 141760}
{"current_steps": 365, "total_steps": 1250, "loss": 0.3622, "lr": 4.4635624279927044e-05, "epoch": 2.92, "percentage": 29.2, "elapsed_time": "0:01:26", "remaining_time": "0:03:28", "throughput": 1669.59, "total_tokens": 143872}
{"current_steps": 370, "total_steps": 1250, "loss": 0.3659, "lr": 4.441766058979898e-05, "epoch": 2.96, "percentage": 29.6, "elapsed_time": "0:01:27", "remaining_time": "0:03:27", "throughput": 1673.64, "total_tokens": 145856}
{"current_steps": 375, "total_steps": 1250, "loss": 0.5066, "lr": 4.4195911388264946e-05, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:01:28", "remaining_time": "0:03:25", "throughput": 1675.63, "total_tokens": 147648}
{"current_steps": 378, "total_steps": 1250, "eval_loss": 0.34830421209335327, "epoch": 3.024, "percentage": 30.24, "elapsed_time": "0:01:30", "remaining_time": "0:03:28", "throughput": 1649.21, "total_tokens": 148736}
{"current_steps": 380, "total_steps": 1250, "loss": 0.3732, "lr": 4.3970419905771145e-05, "epoch": 3.04, "percentage": 30.4, "elapsed_time": "0:01:32", "remaining_time": "0:03:30", "throughput": 1622.15, "total_tokens": 149472}
{"current_steps": 385, "total_steps": 1250, "loss": 0.3537, "lr": 4.374123010232888e-05, "epoch": 3.08, "percentage": 30.8, "elapsed_time": "0:01:33", "remaining_time": "0:03:29", "throughput": 1627.79, "total_tokens": 151552}
{"current_steps": 390, "total_steps": 1250, "loss": 0.3358, "lr": 4.350838665894446e-05, "epoch": 3.12, "percentage": 31.2, "elapsed_time": "0:01:34", "remaining_time": "0:03:27", "throughput": 1632.19, "total_tokens": 153568}
{"current_steps": 395, "total_steps": 1250, "loss": 0.3739, "lr": 4.3271934968908514e-05, "epoch": 3.16, "percentage": 31.6, "elapsed_time": "0:01:35", "remaining_time": "0:03:25", "throughput": 1636.57, "total_tokens": 155616}
{"current_steps": 400, "total_steps": 1250, "loss": 0.3455, "lr": 4.303192112894652e-05, "epoch": 3.2, "percentage": 32.0, "elapsed_time": "0:01:36", "remaining_time": "0:03:24", "throughput": 1641.69, "total_tokens": 157728}
{"current_steps": 405, "total_steps": 1250, "loss": 0.3485, "lr": 4.278839193023214e-05, "epoch": 3.24, "percentage": 32.4, "elapsed_time": "0:01:36", "remaining_time": "0:03:22", "throughput": 1644.28, "total_tokens": 159488}
{"current_steps": 410, "total_steps": 1250, "loss": 0.3622, "lr": 4.254139484926519e-05, "epoch": 3.2800000000000002, "percentage": 32.8, "elapsed_time": "0:01:37", "remaining_time": "0:03:20", "throughput": 1649.26, "total_tokens": 161600}
{"current_steps": 415, "total_steps": 1250, "loss": 0.3496, "lr": 4.2290978038616e-05, "epoch": 3.32, "percentage": 33.2, "elapsed_time": "0:01:38", "remaining_time": "0:03:19", "throughput": 1654.2, "total_tokens": 163712}
{"current_steps": 420, "total_steps": 1250, "loss": 0.3299, "lr": 4.2037190317538e-05, "epoch": 3.36, "percentage": 33.6, "elapsed_time": "0:01:39", "remaining_time": "0:03:17", "throughput": 1656.81, "total_tokens": 165536}
{"current_steps": 425, "total_steps": 1250, "loss": 0.3543, "lr": 4.178008116245024e-05, "epoch": 3.4, "percentage": 34.0, "elapsed_time": "0:01:40", "remaining_time": "0:03:15", "throughput": 1662.99, "total_tokens": 167872}
{"current_steps": 430, "total_steps": 1250, "loss": 0.4246, "lr": 4.1519700697291944e-05, "epoch": 3.44, "percentage": 34.4, "elapsed_time": "0:01:41", "remaining_time": "0:03:14", "throughput": 1668.62, "total_tokens": 170112}
{"current_steps": 435, "total_steps": 1250, "loss": 0.3865, "lr": 4.125609968375072e-05, "epoch": 3.48, "percentage": 34.8, "elapsed_time": "0:01:42", "remaining_time": "0:03:12", "throughput": 1671.95, "total_tokens": 172000}
{"current_steps": 440, "total_steps": 1250, "loss": 0.3459, "lr": 4.098932951136645e-05, "epoch": 3.52, "percentage": 35.2, "elapsed_time": "0:01:43", "remaining_time": "0:03:11", "throughput": 1675.96, "total_tokens": 174016}
{"current_steps": 441, "total_steps": 1250, "eval_loss": 0.36038732528686523, "epoch": 3.528, "percentage": 35.28, "elapsed_time": "0:01:45", "remaining_time": "0:03:13", "throughput": 1654.87, "total_tokens": 174432}
{"current_steps": 445, "total_steps": 1250, "loss": 0.4164, "lr": 4.071944218751282e-05, "epoch": 3.56, "percentage": 35.6, "elapsed_time": "0:01:47", "remaining_time": "0:03:14", "throughput": 1634.21, "total_tokens": 175776}
{"current_steps": 450, "total_steps": 1250, "loss": 0.3624, "lr": 4.044649032725836e-05, "epoch": 3.6, "percentage": 36.0, "elapsed_time": "0:01:48", "remaining_time": "0:03:12", "throughput": 1639.33, "total_tokens": 177952}
{"current_steps": 455, "total_steps": 1250, "loss": 0.3218, "lr": 4.017052714310906e-05, "epoch": 3.64, "percentage": 36.4, "elapsed_time": "0:01:49", "remaining_time": "0:03:11", "throughput": 1643.06, "total_tokens": 179968}
{"current_steps": 460, "total_steps": 1250, "loss": 0.3796, "lr": 3.989160643463445e-05, "epoch": 3.68, "percentage": 36.8, "elapsed_time": "0:01:50", "remaining_time": "0:03:09", "throughput": 1647.19, "total_tokens": 181952}
{"current_steps": 465, "total_steps": 1250, "loss": 0.324, "lr": 3.960978257797931e-05, "epoch": 3.7199999999999998, "percentage": 37.2, "elapsed_time": "0:01:51", "remaining_time": "0:03:08", "throughput": 1649.25, "total_tokens": 183680}
{"current_steps": 470, "total_steps": 1250, "loss": 0.3607, "lr": 3.932511051526289e-05, "epoch": 3.76, "percentage": 37.6, "elapsed_time": "0:01:52", "remaining_time": "0:03:06", "throughput": 1652.22, "total_tokens": 185632}
{"current_steps": 475, "total_steps": 1250, "loss": 0.3215, "lr": 3.903764574386786e-05, "epoch": 3.8, "percentage": 38.0, "elapsed_time": "0:01:53", "remaining_time": "0:03:04", "throughput": 1655.54, "total_tokens": 187552}
{"current_steps": 480, "total_steps": 1250, "loss": 0.3776, "lr": 3.8747444305621e-05, "epoch": 3.84, "percentage": 38.4, "elapsed_time": "0:01:54", "remaining_time": "0:03:03", "throughput": 1658.3, "total_tokens": 189408}
{"current_steps": 485, "total_steps": 1250, "loss": 0.3683, "lr": 3.8454562775867684e-05, "epoch": 3.88, "percentage": 38.8, "elapsed_time": "0:01:55", "remaining_time": "0:03:01", "throughput": 1662.5, "total_tokens": 191488}
{"current_steps": 490, "total_steps": 1250, "loss": 0.3428, "lr": 3.8159058252442446e-05, "epoch": 3.92, "percentage": 39.2, "elapsed_time": "0:01:56", "remaining_time": "0:03:00", "throughput": 1664.7, "total_tokens": 193312}
{"current_steps": 495, "total_steps": 1250, "loss": 0.3494, "lr": 3.786098834453766e-05, "epoch": 3.96, "percentage": 39.6, "elapsed_time": "0:01:57", "remaining_time": "0:02:58", "throughput": 1668.78, "total_tokens": 195424}
{"current_steps": 500, "total_steps": 1250, "loss": 0.3261, "lr": 3.7560411161472456e-05, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:01:58", "remaining_time": "0:02:57", "throughput": 1668.96, "total_tokens": 197024}
{"current_steps": 504, "total_steps": 1250, "eval_loss": 0.3559495508670807, "epoch": 4.032, "percentage": 40.32, "elapsed_time": "0:02:00", "remaining_time": "0:02:58", "throughput": 1651.29, "total_tokens": 198656}
{"current_steps": 505, "total_steps": 1250, "loss": 0.4099, "lr": 3.725738530136422e-05, "epoch": 4.04, "percentage": 40.4, "elapsed_time": "0:02:01", "remaining_time": "0:02:59", "throughput": 1632.86, "total_tokens": 199040}
{"current_steps": 510, "total_steps": 1250, "loss": 0.3695, "lr": 3.695196983970481e-05, "epoch": 4.08, "percentage": 40.8, "elapsed_time": "0:02:02", "remaining_time": "0:02:58", "throughput": 1635.61, "total_tokens": 200960}
{"current_steps": 515, "total_steps": 1250, "loss": 0.3457, "lr": 3.664422431784361e-05, "epoch": 4.12, "percentage": 41.2, "elapsed_time": "0:02:03", "remaining_time": "0:02:56", "throughput": 1638.78, "total_tokens": 203008}
{"current_steps": 520, "total_steps": 1250, "loss": 0.3686, "lr": 3.633420873137988e-05, "epoch": 4.16, "percentage": 41.6, "elapsed_time": "0:02:04", "remaining_time": "0:02:55", "throughput": 1640.51, "total_tokens": 204672}
{"current_steps": 525, "total_steps": 1250, "loss": 0.3581, "lr": 3.602198351846647e-05, "epoch": 4.2, "percentage": 42.0, "elapsed_time": "0:02:05", "remaining_time": "0:02:53", "throughput": 1644.41, "total_tokens": 206784}
{"current_steps": 530, "total_steps": 1250, "loss": 0.3498, "lr": 3.570760954802726e-05, "epoch": 4.24, "percentage": 42.4, "elapsed_time": "0:02:06", "remaining_time": "0:02:52", "throughput": 1646.95, "total_tokens": 208672}
{"current_steps": 535, "total_steps": 1250, "loss": 0.3468, "lr": 3.53911481078907e-05, "epoch": 4.28, "percentage": 42.8, "elapsed_time": "0:02:07", "remaining_time": "0:02:50", "throughput": 1650.89, "total_tokens": 210752}
{"current_steps": 540, "total_steps": 1250, "loss": 0.3455, "lr": 3.507266089284157e-05, "epoch": 4.32, "percentage": 43.2, "elapsed_time": "0:02:08", "remaining_time": "0:02:49", "throughput": 1657.96, "total_tokens": 213472}
{"current_steps": 545, "total_steps": 1250, "loss": 0.369, "lr": 3.475220999259349e-05, "epoch": 4.36, "percentage": 43.6, "elapsed_time": "0:02:09", "remaining_time": "0:02:47", "throughput": 1661.77, "total_tokens": 215616}
{"current_steps": 550, "total_steps": 1250, "loss": 0.3598, "lr": 3.442985787968442e-05, "epoch": 4.4, "percentage": 44.0, "elapsed_time": "0:02:10", "remaining_time": "0:02:46", "throughput": 1664.88, "total_tokens": 217664}
{"current_steps": 555, "total_steps": 1250, "loss": 0.3577, "lr": 3.410566739729746e-05, "epoch": 4.44, "percentage": 44.4, "elapsed_time": "0:02:11", "remaining_time": "0:02:44", "throughput": 1667.75, "total_tokens": 219584}
{"current_steps": 560, "total_steps": 1250, "loss": 0.3532, "lr": 3.3779701747009504e-05, "epoch": 4.48, "percentage": 44.8, "elapsed_time": "0:02:12", "remaining_time": "0:02:43", "throughput": 1670.2, "total_tokens": 221504}
{"current_steps": 565, "total_steps": 1250, "loss": 0.3529, "lr": 3.3452024476469934e-05, "epoch": 4.52, "percentage": 45.2, "elapsed_time": "0:02:13", "remaining_time": "0:02:41", "throughput": 1672.82, "total_tokens": 223424}
{"current_steps": 567, "total_steps": 1250, "eval_loss": 0.3537788391113281, "epoch": 4.536, "percentage": 45.36, "elapsed_time": "0:02:15", "remaining_time": "0:02:42", "throughput": 1656.1, "total_tokens": 224032}
{"current_steps": 570, "total_steps": 1250, "loss": 0.3412, "lr": 3.312269946701191e-05, "epoch": 4.5600000000000005, "percentage": 45.6, "elapsed_time": "0:02:17", "remaining_time": "0:02:43", "throughput": 1640.43, "total_tokens": 225216}
{"current_steps": 575, "total_steps": 1250, "loss": 0.3478, "lr": 3.279179092119855e-05, "epoch": 4.6, "percentage": 46.0, "elapsed_time": "0:02:18", "remaining_time": "0:02:42", "throughput": 1642.55, "total_tokens": 227008}
{"current_steps": 580, "total_steps": 1250, "loss": 0.3649, "lr": 3.245936335030651e-05, "epoch": 4.64, "percentage": 46.4, "elapsed_time": "0:02:19", "remaining_time": "0:02:40", "throughput": 1644.17, "total_tokens": 228736}
{"current_steps": 585, "total_steps": 1250, "loss": 0.3492, "lr": 3.21254815617494e-05, "epoch": 4.68, "percentage": 46.8, "elapsed_time": "0:02:19", "remaining_time": "0:02:39", "throughput": 1644.93, "total_tokens": 230240}
{"current_steps": 590, "total_steps": 1250, "loss": 0.3521, "lr": 3.179021064644347e-05, "epoch": 4.72, "percentage": 47.2, "elapsed_time": "0:02:20", "remaining_time": "0:02:37", "throughput": 1647.3, "total_tokens": 232192}
{"current_steps": 595, "total_steps": 1250, "loss": 0.3422, "lr": 3.145361596611795e-05, "epoch": 4.76, "percentage": 47.6, "elapsed_time": "0:02:21", "remaining_time": "0:02:36", "throughput": 1651.12, "total_tokens": 234368}
{"current_steps": 600, "total_steps": 1250, "loss": 0.3486, "lr": 3.111576314057268e-05, "epoch": 4.8, "percentage": 48.0, "elapsed_time": "0:02:22", "remaining_time": "0:02:34", "throughput": 1652.57, "total_tokens": 236032}
{"current_steps": 605, "total_steps": 1250, "loss": 0.356, "lr": 3.0776718034885454e-05, "epoch": 4.84, "percentage": 48.4, "elapsed_time": "0:02:23", "remaining_time": "0:02:33", "throughput": 1654.77, "total_tokens": 237920}
{"current_steps": 610, "total_steps": 1250, "loss": 0.3493, "lr": 3.0436546746571372e-05, "epoch": 4.88, "percentage": 48.8, "elapsed_time": "0:02:24", "remaining_time": "0:02:31", "throughput": 1656.47, "total_tokens": 239680}
{"current_steps": 615, "total_steps": 1250, "loss": 0.3546, "lr": 3.0095315592697126e-05, "epoch": 4.92, "percentage": 49.2, "elapsed_time": "0:02:25", "remaining_time": "0:02:30", "throughput": 1658.27, "total_tokens": 241504}
{"current_steps": 620, "total_steps": 1250, "loss": 0.3511, "lr": 2.9753091096952255e-05, "epoch": 4.96, "percentage": 49.6, "elapsed_time": "0:02:26", "remaining_time": "0:02:28", "throughput": 1661.63, "total_tokens": 243584}
{"current_steps": 625, "total_steps": 1250, "loss": 0.3463, "lr": 2.9409939976680313e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:02:27", "remaining_time": "0:02:27", "throughput": 1663.23, "total_tokens": 245472}
{"current_steps": 630, "total_steps": 1250, "loss": 0.3441, "lr": 2.9065929129872094e-05, "epoch": 5.04, "percentage": 50.4, "elapsed_time": "0:02:28", "remaining_time": "0:02:26", "throughput": 1664.12, "total_tokens": 247424}
{"current_steps": 630, "total_steps": 1250, "eval_loss": 0.3519797921180725, "epoch": 5.04, "percentage": 50.4, "elapsed_time": "0:02:30", "remaining_time": "0:02:27", "throughput": 1648.86, "total_tokens": 247424}
{"current_steps": 635, "total_steps": 1250, "loss": 0.3477, "lr": 2.8721125622123806e-05, "epoch": 5.08, "percentage": 50.8, "elapsed_time": "0:02:32", "remaining_time": "0:02:27", "throughput": 1634.82, "total_tokens": 249472}
{"current_steps": 640, "total_steps": 1250, "loss": 0.2918, "lr": 2.8375596673562482e-05, "epoch": 5.12, "percentage": 51.2, "elapsed_time": "0:02:33", "remaining_time": "0:02:26", "throughput": 1636.91, "total_tokens": 251296}
{"current_steps": 645, "total_steps": 1250, "loss": 0.3665, "lr": 2.8029409645741267e-05, "epoch": 5.16, "percentage": 51.6, "elapsed_time": "0:02:34", "remaining_time": "0:02:24", "throughput": 1639.68, "total_tokens": 253344}
{"current_steps": 650, "total_steps": 1250, "loss": 0.3434, "lr": 2.7682632028507167e-05, "epoch": 5.2, "percentage": 52.0, "elapsed_time": "0:02:35", "remaining_time": "0:02:23", "throughput": 1641.62, "total_tokens": 255104}
{"current_steps": 655, "total_steps": 1250, "loss": 0.3284, "lr": 2.733533142684377e-05, "epoch": 5.24, "percentage": 52.4, "elapsed_time": "0:02:36", "remaining_time": "0:02:21", "throughput": 1643.1, "total_tokens": 256832}
{"current_steps": 660, "total_steps": 1250, "loss": 0.3699, "lr": 2.6987575547691497e-05, "epoch": 5.28, "percentage": 52.8, "elapsed_time": "0:02:37", "remaining_time": "0:02:20", "throughput": 1645.46, "total_tokens": 258720}
{"current_steps": 665, "total_steps": 1250, "loss": 0.3559, "lr": 2.6639432186748043e-05, "epoch": 5.32, "percentage": 53.2, "elapsed_time": "0:02:38", "remaining_time": "0:02:19", "throughput": 1647.63, "total_tokens": 260576}
{"current_steps": 670, "total_steps": 1250, "loss": 0.351, "lr": 2.6290969215251416e-05, "epoch": 5.36, "percentage": 53.6, "elapsed_time": "0:02:39", "remaining_time": "0:02:17", "throughput": 1649.31, "total_tokens": 262368}
{"current_steps": 675, "total_steps": 1250, "loss": 0.3639, "lr": 2.594225456674837e-05, "epoch": 5.4, "percentage": 54.0, "elapsed_time": "0:02:40", "remaining_time": "0:02:16", "throughput": 1651.66, "total_tokens": 264320}
{"current_steps": 680, "total_steps": 1250, "loss": 0.3574, "lr": 2.559335622385055e-05, "epoch": 5.44, "percentage": 54.4, "elapsed_time": "0:02:40", "remaining_time": "0:02:14", "throughput": 1654.16, "total_tokens": 266304}
{"current_steps": 685, "total_steps": 1250, "loss": 0.3347, "lr": 2.524434220498123e-05, "epoch": 5.48, "percentage": 54.8, "elapsed_time": "0:02:41", "remaining_time": "0:02:13", "throughput": 1656.91, "total_tokens": 268384}
{"current_steps": 690, "total_steps": 1250, "loss": 0.3572, "lr": 2.4895280551114907e-05, "epoch": 5.52, "percentage": 55.2, "elapsed_time": "0:02:42", "remaining_time": "0:02:12", "throughput": 1658.49, "total_tokens": 270208}
{"current_steps": 693, "total_steps": 1250, "eval_loss": 0.3479459583759308, "epoch": 5.5440000000000005, "percentage": 55.44, "elapsed_time": "0:02:44", "remaining_time": "0:02:12", "throughput": 1645.46, "total_tokens": 271232}
{"current_steps": 695, "total_steps": 1250, "loss": 0.3879, "lr": 2.4546239312512635e-05, "epoch": 5.5600000000000005, "percentage": 55.6, "elapsed_time": "0:02:46", "remaining_time": "0:02:13", "throughput": 1631.69, "total_tokens": 271840}
{"current_steps": 700, "total_steps": 1250, "loss": 0.3405, "lr": 2.4197286535455464e-05, "epoch": 5.6, "percentage": 56.0, "elapsed_time": "0:02:47", "remaining_time": "0:02:11", "throughput": 1634.58, "total_tokens": 273888}
{"current_steps": 705, "total_steps": 1250, "loss": 0.3476, "lr": 2.384849024897869e-05, "epoch": 5.64, "percentage": 56.4, "elapsed_time": "0:02:48", "remaining_time": "0:02:10", "throughput": 1637.26, "total_tokens": 275904}
{"current_steps": 710, "total_steps": 1250, "loss": 0.347, "lr": 2.349991845160949e-05, "epoch": 5.68, "percentage": 56.8, "elapsed_time": "0:02:49", "remaining_time": "0:02:08", "throughput": 1639.75, "total_tokens": 277888}
{"current_steps": 715, "total_steps": 1250, "loss": 0.3426, "lr": 2.3151639098110377e-05, "epoch": 5.72, "percentage": 57.2, "elapsed_time": "0:02:50", "remaining_time": "0:02:07", "throughput": 1641.87, "total_tokens": 279872}
{"current_steps": 720, "total_steps": 1250, "loss": 0.3529, "lr": 2.280372008623142e-05, "epoch": 5.76, "percentage": 57.6, "elapsed_time": "0:02:51", "remaining_time": "0:02:06", "throughput": 1643.51, "total_tokens": 281664}
{"current_steps": 725, "total_steps": 1250, "loss": 0.3444, "lr": 2.2456229243473345e-05, "epoch": 5.8, "percentage": 58.0, "elapsed_time": "0:02:52", "remaining_time": "0:02:04", "throughput": 1646.38, "total_tokens": 283776}
{"current_steps": 730, "total_steps": 1250, "loss": 0.3635, "lr": 2.2109234313864465e-05, "epoch": 5.84, "percentage": 58.4, "elapsed_time": "0:02:53", "remaining_time": "0:02:03", "throughput": 1648.0, "total_tokens": 285568}
{"current_steps": 735, "total_steps": 1250, "loss": 0.3615, "lr": 2.176280294475383e-05, "epoch": 5.88, "percentage": 58.8, "elapsed_time": "0:02:54", "remaining_time": "0:02:02", "throughput": 1649.39, "total_tokens": 287360}
{"current_steps": 740, "total_steps": 1250, "loss": 0.351, "lr": 2.1417002673623264e-05, "epoch": 5.92, "percentage": 59.2, "elapsed_time": "0:02:55", "remaining_time": "0:02:00", "throughput": 1652.7, "total_tokens": 289632}
{"current_steps": 745, "total_steps": 1250, "loss": 0.3513, "lr": 2.1071900914920816e-05, "epoch": 5.96, "percentage": 59.6, "elapsed_time": "0:02:56", "remaining_time": "0:01:59", "throughput": 1654.97, "total_tokens": 291552}
{"current_steps": 750, "total_steps": 1250, "loss": 0.3481, "lr": 2.0727564946918087e-05, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:02:57", "remaining_time": "0:01:58", "throughput": 1657.25, "total_tokens": 293616}
{"current_steps": 755, "total_steps": 1250, "loss": 0.3479, "lr": 2.038406189859433e-05, "epoch": 6.04, "percentage": 60.4, "elapsed_time": "0:02:58", "remaining_time": "0:01:56", "throughput": 1657.43, "total_tokens": 295440}
{"current_steps": 756, "total_steps": 1250, "eval_loss": 0.3478395938873291, "epoch": 6.048, "percentage": 60.48, "elapsed_time": "0:02:59", "remaining_time": "0:01:57", "throughput": 1644.8, "total_tokens": 295728}
{"current_steps": 760, "total_steps": 1250, "loss": 0.3478, "lr": 2.004145873654942e-05, "epoch": 6.08, "percentage": 60.8, "elapsed_time": "0:03:01", "remaining_time": "0:01:57", "throughput": 1634.39, "total_tokens": 297360}
{"current_steps": 765, "total_steps": 1250, "loss": 0.3491, "lr": 1.969982225194864e-05, "epoch": 6.12, "percentage": 61.2, "elapsed_time": "0:03:02", "remaining_time": "0:01:55", "throughput": 1636.55, "total_tokens": 299312}
{"current_steps": 770, "total_steps": 1250, "loss": 0.3461, "lr": 1.9359219047501565e-05, "epoch": 6.16, "percentage": 61.6, "elapsed_time": "0:03:03", "remaining_time": "0:01:54", "throughput": 1639.34, "total_tokens": 301488}
{"current_steps": 775, "total_steps": 1250, "loss": 0.3509, "lr": 1.9019715524477767e-05, "epoch": 6.2, "percentage": 62.0, "elapsed_time": "0:03:04", "remaining_time": "0:01:53", "throughput": 1642.47, "total_tokens": 303696}
{"current_steps": 780, "total_steps": 1250, "loss": 0.3371, "lr": 1.868137786976177e-05, "epoch": 6.24, "percentage": 62.4, "elapsed_time": "0:03:05", "remaining_time": "0:01:51", "throughput": 1643.64, "total_tokens": 305360}
{"current_steps": 785, "total_steps": 1250, "loss": 0.3487, "lr": 1.8344272042949724e-05, "epoch": 6.28, "percentage": 62.8, "elapsed_time": "0:03:06", "remaining_time": "0:01:50", "throughput": 1646.13, "total_tokens": 307408}
{"current_steps": 790, "total_steps": 1250, "loss": 0.3404, "lr": 1.800846376349051e-05, "epoch": 6.32, "percentage": 63.2, "elapsed_time": "0:03:07", "remaining_time": "0:01:49", "throughput": 1647.91, "total_tokens": 309232}
{"current_steps": 795, "total_steps": 1250, "loss": 0.3456, "lr": 1.767401849787357e-05, "epoch": 6.36, "percentage": 63.6, "elapsed_time": "0:03:08", "remaining_time": "0:01:47", "throughput": 1650.12, "total_tokens": 311184}
{"current_steps": 800, "total_steps": 1250, "loss": 0.3432, "lr": 1.73410014468661e-05, "epoch": 6.4, "percentage": 64.0, "elapsed_time": "0:03:09", "remaining_time": "0:01:46", "throughput": 1651.82, "total_tokens": 313072}
{"current_steps": 805, "total_steps": 1250, "loss": 0.3648, "lr": 1.7009477532802054e-05, "epoch": 6.44, "percentage": 64.4, "elapsed_time": "0:03:10", "remaining_time": "0:01:45", "throughput": 1652.88, "total_tokens": 314832}
{"current_steps": 810, "total_steps": 1250, "loss": 0.3358, "lr": 1.6679511386925337e-05, "epoch": 6.48, "percentage": 64.8, "elapsed_time": "0:03:11", "remaining_time": "0:01:43", "throughput": 1654.25, "total_tokens": 316560}
{"current_steps": 815, "total_steps": 1250, "loss": 0.3533, "lr": 1.635116733678988e-05, "epoch": 6.52, "percentage": 65.2, "elapsed_time": "0:03:12", "remaining_time": "0:01:42", "throughput": 1657.88, "total_tokens": 318960}
{"current_steps": 819, "total_steps": 1250, "eval_loss": 0.35259565711021423, "epoch": 6.552, "percentage": 65.52, "elapsed_time": "0:03:14", "remaining_time": "0:01:42", "throughput": 1647.61, "total_tokens": 320464}
{"current_steps": 820, "total_steps": 1250, "loss": 0.3534, "lr": 1.6024509393718844e-05, "epoch": 6.5600000000000005, "percentage": 65.6, "elapsed_time": "0:03:16", "remaining_time": "0:01:42", "throughput": 1636.76, "total_tokens": 320880}
{"current_steps": 825, "total_steps": 1250, "loss": 0.3546, "lr": 1.5699601240325474e-05, "epoch": 6.6, "percentage": 66.0, "elapsed_time": "0:03:17", "remaining_time": "0:01:41", "throughput": 1639.88, "total_tokens": 323184}
{"current_steps": 830, "total_steps": 1250, "loss": 0.3466, "lr": 1.5376506218098015e-05, "epoch": 6.64, "percentage": 66.4, "elapsed_time": "0:03:18", "remaining_time": "0:01:40", "throughput": 1641.73, "total_tokens": 325168}
{"current_steps": 835, "total_steps": 1250, "loss": 0.353, "lr": 1.505528731505126e-05, "epoch": 6.68, "percentage": 66.8, "elapsed_time": "0:03:18", "remaining_time": "0:01:38", "throughput": 1643.32, "total_tokens": 326992}
{"current_steps": 840, "total_steps": 1250, "loss": 0.354, "lr": 1.4736007153446801e-05, "epoch": 6.72, "percentage": 67.2, "elapsed_time": "0:03:20", "remaining_time": "0:01:37", "throughput": 1645.41, "total_tokens": 329104}
{"current_steps": 845, "total_steps": 1250, "loss": 0.3446, "lr": 1.4418727977584774e-05, "epoch": 6.76, "percentage": 67.6, "elapsed_time": "0:03:20", "remaining_time": "0:01:36", "throughput": 1647.45, "total_tokens": 331088}
{"current_steps": 850, "total_steps": 1250, "loss": 0.342, "lr": 1.4103511641669152e-05, "epoch": 6.8, "percentage": 68.0, "elapsed_time": "0:03:21", "remaining_time": "0:01:35", "throughput": 1649.18, "total_tokens": 333008}
{"current_steps": 855, "total_steps": 1250, "loss": 0.3301, "lr": 1.3790419597749199e-05, "epoch": 6.84, "percentage": 68.4, "elapsed_time": "0:03:22", "remaining_time": "0:01:33", "throughput": 1651.12, "total_tokens": 335024}
{"current_steps": 860, "total_steps": 1250, "loss": 0.3654, "lr": 1.3479512883739232e-05, "epoch": 6.88, "percentage": 68.8, "elapsed_time": "0:03:23", "remaining_time": "0:01:32", "throughput": 1653.31, "total_tokens": 337104}
{"current_steps": 865, "total_steps": 1250, "loss": 0.3532, "lr": 1.3170852111519175e-05, "epoch": 6.92, "percentage": 69.2, "elapsed_time": "0:03:24", "remaining_time": "0:01:31", "throughput": 1654.91, "total_tokens": 338960}
{"current_steps": 870, "total_steps": 1250, "loss": 0.3362, "lr": 1.2864497455118152e-05, "epoch": 6.96, "percentage": 69.6, "elapsed_time": "0:03:25", "remaining_time": "0:01:29", "throughput": 1656.48, "total_tokens": 340848}
{"current_steps": 875, "total_steps": 1250, "loss": 0.3201, "lr": 1.2560508638983437e-05, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:03:26", "remaining_time": "0:01:28", "throughput": 1658.74, "total_tokens": 343040}
{"current_steps": 880, "total_steps": 1250, "loss": 0.3186, "lr": 1.2258944926337057e-05, "epoch": 7.04, "percentage": 70.4, "elapsed_time": "0:03:27", "remaining_time": "0:01:27", "throughput": 1659.85, "total_tokens": 345056}
{"current_steps": 882, "total_steps": 1250, "eval_loss": 0.3628290593624115, "epoch": 7.056, "percentage": 70.56, "elapsed_time": "0:03:29", "remaining_time": "0:01:27", "throughput": 1649.63, "total_tokens": 345856}
{"current_steps": 885, "total_steps": 1250, "loss": 0.3208, "lr": 1.1959865107622307e-05, "epoch": 7.08, "percentage": 70.8, "elapsed_time": "0:03:31", "remaining_time": "0:01:27", "throughput": 1640.59, "total_tokens": 347232}
{"current_steps": 890, "total_steps": 1250, "loss": 0.3678, "lr": 1.1663327489042435e-05, "epoch": 7.12, "percentage": 71.2, "elapsed_time": "0:03:32", "remaining_time": "0:01:26", "throughput": 1643.38, "total_tokens": 349504}
{"current_steps": 895, "total_steps": 1250, "loss": 0.386, "lr": 1.1369389881193749e-05, "epoch": 7.16, "percentage": 71.6, "elapsed_time": "0:03:33", "remaining_time": "0:01:24", "throughput": 1644.47, "total_tokens": 351296}
{"current_steps": 900, "total_steps": 1250, "loss": 0.3546, "lr": 1.107810958779531e-05, "epoch": 7.2, "percentage": 72.0, "elapsed_time": "0:03:34", "remaining_time": "0:01:23", "throughput": 1646.28, "total_tokens": 353248}
{"current_steps": 905, "total_steps": 1250, "loss": 0.3651, "lr": 1.0789543394517435e-05, "epoch": 7.24, "percentage": 72.4, "elapsed_time": "0:03:35", "remaining_time": "0:01:22", "throughput": 1648.12, "total_tokens": 355232}
{"current_steps": 910, "total_steps": 1250, "loss": 0.3332, "lr": 1.050374755791127e-05, "epoch": 7.28, "percentage": 72.8, "elapsed_time": "0:03:36", "remaining_time": "0:01:20", "throughput": 1650.3, "total_tokens": 357376}
{"current_steps": 915, "total_steps": 1250, "loss": 0.3174, "lr": 1.022077779444145e-05, "epoch": 7.32, "percentage": 73.2, "elapsed_time": "0:03:37", "remaining_time": "0:01:19", "throughput": 1651.65, "total_tokens": 359232}
{"current_steps": 920, "total_steps": 1250, "loss": 0.3461, "lr": 9.94068926962404e-06, "epoch": 7.36, "percentage": 73.6, "elapsed_time": "0:03:38", "remaining_time": "0:01:18", "throughput": 1653.13, "total_tokens": 361024}
{"current_steps": 925, "total_steps": 1250, "loss": 0.322, "lr": 9.663536587271902e-06, "epoch": 7.4, "percentage": 74.0, "elapsed_time": "0:03:39", "remaining_time": "0:01:17", "throughput": 1655.59, "total_tokens": 363200}
{"current_steps": 930, "total_steps": 1250, "loss": 0.3463, "lr": 9.389373778849612e-06, "epoch": 7.44, "percentage": 74.4, "elapsed_time": "0:03:40", "remaining_time": "0:01:15", "throughput": 1657.29, "total_tokens": 365152}
{"current_steps": 935, "total_steps": 1250, "loss": 0.348, "lr": 9.11825429293989e-06, "epoch": 7.48, "percentage": 74.8, "elapsed_time": "0:03:41", "remaining_time": "0:01:14", "throughput": 1659.73, "total_tokens": 367328}
{"current_steps": 940, "total_steps": 1250, "loss": 0.348, "lr": 8.850230984823735e-06, "epoch": 7.52, "percentage": 75.2, "elapsed_time": "0:03:42", "remaining_time": "0:01:13", "throughput": 1661.25, "total_tokens": 369248}
{"current_steps": 945, "total_steps": 1250, "loss": 0.3306, "lr": 8.585356106176094e-06, "epoch": 7.5600000000000005, "percentage": 75.6, "elapsed_time": "0:03:43", "remaining_time": "0:01:12", "throughput": 1662.43, "total_tokens": 371040}
{"current_steps": 945, "total_steps": 1250, "eval_loss": 0.3621916174888611, "epoch": 7.5600000000000005, "percentage": 75.6, "elapsed_time": "0:03:44", "remaining_time": "0:01:12", "throughput": 1652.23, "total_tokens": 371040}
{"current_steps": 950, "total_steps": 1250, "loss": 0.373, "lr": 8.323681294879394e-06, "epoch": 7.6, "percentage": 76.0, "elapsed_time": "0:03:46", "remaining_time": "0:01:11", "throughput": 1644.01, "total_tokens": 372928}
{"current_steps": 955, "total_steps": 1250, "loss": 0.3755, "lr": 8.06525756495657e-06, "epoch": 7.64, "percentage": 76.4, "elapsed_time": "0:03:47", "remaining_time": "0:01:10", "throughput": 1645.64, "total_tokens": 374816}
{"current_steps": 960, "total_steps": 1250, "loss": 0.3481, "lr": 7.810135296625818e-06, "epoch": 7.68, "percentage": 76.8, "elapsed_time": "0:03:48", "remaining_time": "0:01:09", "throughput": 1647.26, "total_tokens": 376704}
{"current_steps": 965, "total_steps": 1250, "loss": 0.352, "lr": 7.558364226478842e-06, "epoch": 7.72, "percentage": 77.2, "elapsed_time": "0:03:49", "remaining_time": "0:01:07", "throughput": 1648.98, "total_tokens": 378624}
{"current_steps": 970, "total_steps": 1250, "loss": 0.3693, "lr": 7.309993437784624e-06, "epoch": 7.76, "percentage": 77.6, "elapsed_time": "0:03:50", "remaining_time": "0:01:06", "throughput": 1650.22, "total_tokens": 380480}
{"current_steps": 975, "total_steps": 1250, "loss": 0.3531, "lr": 7.065071350920538e-06, "epoch": 7.8, "percentage": 78.0, "elapsed_time": "0:03:51", "remaining_time": "0:01:05", "throughput": 1651.3, "total_tokens": 382144}
{"current_steps": 980, "total_steps": 1250, "loss": 0.3492, "lr": 6.823645713932708e-06, "epoch": 7.84, "percentage": 78.4, "elapsed_time": "0:03:52", "remaining_time": "0:01:04", "throughput": 1653.34, "total_tokens": 384320}
{"current_steps": 985, "total_steps": 1250, "loss": 0.343, "lr": 6.58576359322742e-06, "epoch": 7.88, "percentage": 78.8, "elapsed_time": "0:03:53", "remaining_time": "0:01:02", "throughput": 1655.99, "total_tokens": 386656}
{"current_steps": 990, "total_steps": 1250, "loss": 0.3463, "lr": 6.3514713643954475e-06, "epoch": 7.92, "percentage": 79.2, "elapsed_time": "0:03:54", "remaining_time": "0:01:01", "throughput": 1657.58, "total_tokens": 388672}
{"current_steps": 995, "total_steps": 1250, "loss": 0.3604, "lr": 6.120814703171024e-06, "epoch": 7.96, "percentage": 79.6, "elapsed_time": "0:03:55", "remaining_time": "0:01:00", "throughput": 1658.69, "total_tokens": 390400}
{"current_steps": 1000, "total_steps": 1250, "loss": 0.3433, "lr": 5.893838576527275e-06, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:03:56", "remaining_time": "0:00:59", "throughput": 1659.14, "total_tokens": 392080}
{"current_steps": 1005, "total_steps": 1250, "loss": 0.3524, "lr": 5.6705872339098186e-06, "epoch": 8.04, "percentage": 80.4, "elapsed_time": "0:03:57", "remaining_time": "0:00:57", "throughput": 1660.23, "total_tokens": 394160}
{"current_steps": 1008, "total_steps": 1250, "eval_loss": 0.3505736291408539, "epoch": 8.064, "percentage": 80.64, "elapsed_time": "0:03:59", "remaining_time": "0:00:57", "throughput": 1651.17, "total_tokens": 395216}
{"current_steps": 1010, "total_steps": 1250, "loss": 0.3508, "lr": 5.451104198610249e-06, "epoch": 8.08, "percentage": 80.8, "elapsed_time": "0:04:01", "remaining_time": "0:00:57", "throughput": 1640.64, "total_tokens": 395888}
{"current_steps": 1015, "total_steps": 1250, "loss": 0.3383, "lr": 5.235432259281175e-06, "epoch": 8.12, "percentage": 81.2, "elapsed_time": "0:04:02", "remaining_time": "0:00:56", "throughput": 1642.77, "total_tokens": 398032}
{"current_steps": 1020, "total_steps": 1250, "loss": 0.348, "lr": 5.023613461594512e-06, "epoch": 8.16, "percentage": 81.6, "elapsed_time": "0:04:03", "remaining_time": "0:00:54", "throughput": 1644.23, "total_tokens": 399856}
{"current_steps": 1025, "total_steps": 1250, "loss": 0.3465, "lr": 4.8156891000445406e-06, "epoch": 8.2, "percentage": 82.0, "elapsed_time": "0:04:04", "remaining_time": "0:00:53", "throughput": 1645.41, "total_tokens": 401616}
{"current_steps": 1030, "total_steps": 1250, "loss": 0.3419, "lr": 4.6116997098975465e-06, "epoch": 8.24, "percentage": 82.4, "elapsed_time": "0:04:05", "remaining_time": "0:00:52", "throughput": 1646.99, "total_tokens": 403568}
{"current_steps": 1035, "total_steps": 1250, "loss": 0.3511, "lr": 4.411685059289314e-06, "epoch": 8.28, "percentage": 82.8, "elapsed_time": "0:04:06", "remaining_time": "0:00:51", "throughput": 1649.11, "total_tokens": 405712}
{"current_steps": 1040, "total_steps": 1250, "loss": 0.3479, "lr": 4.215684141472292e-06, "epoch": 8.32, "percentage": 83.2, "elapsed_time": "0:04:07", "remaining_time": "0:00:49", "throughput": 1651.08, "total_tokens": 407888}
{"current_steps": 1045, "total_steps": 1250, "loss": 0.3542, "lr": 4.023735167213752e-06, "epoch": 8.36, "percentage": 83.6, "elapsed_time": "0:04:07", "remaining_time": "0:00:48", "throughput": 1652.33, "total_tokens": 409712}
{"current_steps": 1050, "total_steps": 1250, "loss": 0.351, "lr": 3.835875557346552e-06, "epoch": 8.4, "percentage": 84.0, "elapsed_time": "0:04:08", "remaining_time": "0:00:47", "throughput": 1653.42, "total_tokens": 411504}
{"current_steps": 1055, "total_steps": 1250, "loss": 0.3478, "lr": 3.6521419354738738e-06, "epoch": 8.44, "percentage": 84.4, "elapsed_time": "0:04:09", "remaining_time": "0:00:46", "throughput": 1654.22, "total_tokens": 413168}
{"current_steps": 1060, "total_steps": 1250, "loss": 0.3417, "lr": 3.4725701208293435e-06, "epoch": 8.48, "percentage": 84.8, "elapsed_time": "0:04:10", "remaining_time": "0:00:44", "throughput": 1655.3, "total_tokens": 414960}
{"current_steps": 1065, "total_steps": 1250, "loss": 0.3463, "lr": 3.297195121294022e-06, "epoch": 8.52, "percentage": 85.2, "elapsed_time": "0:04:11", "remaining_time": "0:00:43", "throughput": 1656.66, "total_tokens": 416880}
{"current_steps": 1070, "total_steps": 1250, "loss": 0.3447, "lr": 3.126051126571561e-06, "epoch": 8.56, "percentage": 85.6, "elapsed_time": "0:04:12", "remaining_time": "0:00:42", "throughput": 1658.08, "total_tokens": 418768}
{"current_steps": 1071, "total_steps": 1250, "eval_loss": 0.34890231490135193, "epoch": 8.568, "percentage": 85.68, "elapsed_time": "0:04:14", "remaining_time": "0:00:42", "throughput": 1649.47, "total_tokens": 419184}
{"current_steps": 1075, "total_steps": 1250, "loss": 0.3385, "lr": 2.9591715015228284e-06, "epoch": 8.6, "percentage": 86.0, "elapsed_time": "0:04:16", "remaining_time": "0:00:41", "throughput": 1641.14, "total_tokens": 421008}
{"current_steps": 1080, "total_steps": 1250, "loss": 0.3496, "lr": 2.7965887796613884e-06, "epoch": 8.64, "percentage": 86.4, "elapsed_time": "0:04:17", "remaining_time": "0:00:40", "throughput": 1642.32, "total_tokens": 422864}
{"current_steps": 1085, "total_steps": 1250, "loss": 0.3493, "lr": 2.6383346568110062e-06, "epoch": 8.68, "percentage": 86.8, "elapsed_time": "0:04:18", "remaining_time": "0:00:39", "throughput": 1644.06, "total_tokens": 424976}
{"current_steps": 1090, "total_steps": 1250, "loss": 0.3417, "lr": 2.4844399849264928e-06, "epoch": 8.72, "percentage": 87.2, "elapsed_time": "0:04:19", "remaining_time": "0:00:38", "throughput": 1646.14, "total_tokens": 427120}
{"current_steps": 1095, "total_steps": 1250, "loss": 0.3324, "lr": 2.3349347660790582e-06, "epoch": 8.76, "percentage": 87.6, "elapsed_time": "0:04:20", "remaining_time": "0:00:36", "throughput": 1647.99, "total_tokens": 429264}
{"current_steps": 1100, "total_steps": 1250, "loss": 0.345, "lr": 2.189848146607348e-06, "epoch": 8.8, "percentage": 88.0, "elapsed_time": "0:04:21", "remaining_time": "0:00:35", "throughput": 1649.05, "total_tokens": 431056}
{"current_steps": 1105, "total_steps": 1250, "loss": 0.3543, "lr": 2.0492084114352965e-06, "epoch": 8.84, "percentage": 88.4, "elapsed_time": "0:04:22", "remaining_time": "0:00:34", "throughput": 1650.75, "total_tokens": 433136}
{"current_steps": 1110, "total_steps": 1250, "loss": 0.3449, "lr": 1.913042978557944e-06, "epoch": 8.88, "percentage": 88.8, "elapsed_time": "0:04:23", "remaining_time": "0:00:33", "throughput": 1652.46, "total_tokens": 435216}
{"current_steps": 1115, "total_steps": 1250, "loss": 0.3466, "lr": 1.7813783936962258e-06, "epoch": 8.92, "percentage": 89.2, "elapsed_time": "0:04:24", "remaining_time": "0:00:32", "throughput": 1653.44, "total_tokens": 437040}
{"current_steps": 1120, "total_steps": 1250, "loss": 0.3561, "lr": 1.654240325121831e-06, "epoch": 8.96, "percentage": 89.6, "elapsed_time": "0:04:25", "remaining_time": "0:00:30", "throughput": 1655.21, "total_tokens": 439088}
{"current_steps": 1125, "total_steps": 1250, "loss": 0.3325, "lr": 1.5316535586531483e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:04:26", "remaining_time": "0:00:29", "throughput": 1655.81, "total_tokens": 440848}
{"current_steps": 1130, "total_steps": 1250, "loss": 0.3639, "lr": 1.4136419928231892e-06, "epoch": 9.04, "percentage": 90.4, "elapsed_time": "0:04:27", "remaining_time": "0:00:28", "throughput": 1656.6, "total_tokens": 442864}
{"current_steps": 1134, "total_steps": 1250, "eval_loss": 0.3548278212547302, "epoch": 9.072, "percentage": 90.72, "elapsed_time": "0:04:29", "remaining_time": "0:00:27", "throughput": 1649.39, "total_tokens": 444560}
{"current_steps": 1135, "total_steps": 1250, "loss": 0.3545, "lr": 1.3002286342205462e-06, "epoch": 9.08, "percentage": 90.8, "elapsed_time": "0:04:31", "remaining_time": "0:00:27", "throughput": 1640.92, "total_tokens": 445040}
{"current_steps": 1140, "total_steps": 1250, "loss": 0.3514, "lr": 1.1914355930041837e-06, "epoch": 9.12, "percentage": 91.2, "elapsed_time": "0:04:32", "remaining_time": "0:00:26", "throughput": 1641.93, "total_tokens": 446864}
{"current_steps": 1145, "total_steps": 1250, "loss": 0.3528, "lr": 1.087284078593051e-06, "epoch": 9.16, "percentage": 91.6, "elapsed_time": "0:04:33", "remaining_time": "0:00:25", "throughput": 1643.69, "total_tokens": 449008}
{"current_steps": 1150, "total_steps": 1250, "loss": 0.3419, "lr": 9.877943955312552e-07, "epoch": 9.2, "percentage": 92.0, "elapsed_time": "0:04:34", "remaining_time": "0:00:23", "throughput": 1644.81, "total_tokens": 450832}
{"current_steps": 1155, "total_steps": 1250, "loss": 0.3448, "lr": 8.929859395296364e-07, "epoch": 9.24, "percentage": 92.4, "elapsed_time": "0:04:35", "remaining_time": "0:00:22", "throughput": 1646.22, "total_tokens": 452784}
{"current_steps": 1160, "total_steps": 1250, "loss": 0.3387, "lr": 8.028771936845342e-07, "epoch": 9.28, "percentage": 92.8, "elapsed_time": "0:04:36", "remaining_time": "0:00:21", "throughput": 1648.16, "total_tokens": 454992}
{"current_steps": 1165, "total_steps": 1250, "loss": 0.3482, "lr": 7.174857248745004e-07, "epoch": 9.32, "percentage": 93.2, "elapsed_time": "0:04:36", "remaining_time": "0:00:20", "throughput": 1648.64, "total_tokens": 456528}
{"current_steps": 1170, "total_steps": 1250, "loss": 0.3419, "lr": 6.368281803355691e-07, "epoch": 9.36, "percentage": 93.6, "elapsed_time": "0:04:37", "remaining_time": "0:00:19", "throughput": 1650.67, "total_tokens": 458768}
{"current_steps": 1175, "total_steps": 1250, "loss": 0.3528, "lr": 5.609202844158723e-07, "epoch": 9.4, "percentage": 94.0, "elapsed_time": "0:04:38", "remaining_time": "0:00:17", "throughput": 1652.33, "total_tokens": 460816}
{"current_steps": 1180, "total_steps": 1250, "loss": 0.3434, "lr": 4.897768355101084e-07, "epoch": 9.44, "percentage": 94.4, "elapsed_time": "0:04:39", "remaining_time": "0:00:16", "throughput": 1652.88, "total_tokens": 462384}
{"current_steps": 1185, "total_steps": 1250, "loss": 0.3511, "lr": 4.234117031746143e-07, "epoch": 9.48, "percentage": 94.8, "elapsed_time": "0:04:40", "remaining_time": "0:00:15", "throughput": 1654.26, "total_tokens": 464304}
{"current_steps": 1190, "total_steps": 1250, "loss": 0.3511, "lr": 3.6183782542343057e-07, "epoch": 9.52, "percentage": 95.2, "elapsed_time": "0:04:41", "remaining_time": "0:00:14", "throughput": 1655.82, "total_tokens": 466384}
{"current_steps": 1195, "total_steps": 1250, "loss": 0.3419, "lr": 3.050672062060278e-07, "epoch": 9.56, "percentage": 95.6, "elapsed_time": "0:04:42", "remaining_time": "0:00:13", "throughput": 1657.32, "total_tokens": 468368}
{"current_steps": 1197, "total_steps": 1250, "eval_loss": 0.3463664650917053, "epoch": 9.576, "percentage": 95.76, "elapsed_time": "0:04:44", "remaining_time": "0:00:12", "throughput": 1649.71, "total_tokens": 469104}
{"current_steps": 1200, "total_steps": 1250, "loss": 0.342, "lr": 2.531109130671061e-07, "epoch": 9.6, "percentage": 96.0, "elapsed_time": "0:04:46", "remaining_time": "0:00:11", "throughput": 1639.22, "total_tokens": 470192}
{"current_steps": 1205, "total_steps": 1250, "loss": 0.3418, "lr": 2.0597907498896007e-07, "epoch": 9.64, "percentage": 96.4, "elapsed_time": "0:04:47", "remaining_time": "0:00:10", "throughput": 1640.9, "total_tokens": 472240}
{"current_steps": 1210, "total_steps": 1250, "loss": 0.3514, "lr": 1.6368088041681108e-07, "epoch": 9.68, "percentage": 96.8, "elapsed_time": "0:04:48", "remaining_time": "0:00:09", "throughput": 1642.28, "total_tokens": 474160}
{"current_steps": 1215, "total_steps": 1250, "loss": 0.3498, "lr": 1.2622457546749567e-07, "epoch": 9.72, "percentage": 97.2, "elapsed_time": "0:04:49", "remaining_time": "0:00:08", "throughput": 1645.12, "total_tokens": 476784}
{"current_steps": 1220, "total_steps": 1250, "loss": 0.3311, "lr": 9.361746232188495e-08, "epoch": 9.76, "percentage": 97.6, "elapsed_time": "0:04:50", "remaining_time": "0:00:07", "throughput": 1646.73, "total_tokens": 478864}
{"current_steps": 1225, "total_steps": 1250, "loss": 0.3449, "lr": 6.586589780128716e-08, "epoch": 9.8, "percentage": 98.0, "elapsed_time": "0:04:51", "remaining_time": "0:00:05", "throughput": 1647.89, "total_tokens": 480816}
{"current_steps": 1230, "total_steps": 1250, "loss": 0.3449, "lr": 4.2975292128200064e-08, "epoch": 9.84, "percentage": 98.4, "elapsed_time": "0:04:52", "remaining_time": "0:00:04", "throughput": 1649.43, "total_tokens": 482832}
{"current_steps": 1235, "total_steps": 1250, "loss": 0.3434, "lr": 2.4950107871549167e-08, "epoch": 9.88, "percentage": 98.8, "elapsed_time": "0:04:53", "remaining_time": "0:00:03", "throughput": 1650.94, "total_tokens": 484912}
{"current_steps": 1240, "total_steps": 1250, "loss": 0.3434, "lr": 1.179385907672248e-08, "epoch": 9.92, "percentage": 99.2, "elapsed_time": "0:04:54", "remaining_time": "0:00:02", "throughput": 1651.68, "total_tokens": 486640}
{"current_steps": 1245, "total_steps": 1250, "loss": 0.3543, "lr": 3.5091105804907487e-09, "epoch": 9.96, "percentage": 99.6, "elapsed_time": "0:04:55", "remaining_time": "0:00:01", "throughput": 1652.49, "total_tokens": 488336}
{"current_steps": 1250, "total_steps": 1250, "loss": 0.3374, "lr": 9.747751098521107e-11, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:04:56", "remaining_time": "0:00:00", "throughput": 1652.94, "total_tokens": 490000}
{"current_steps": 1250, "total_steps": 1250, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:04:57", "remaining_time": "0:00:00", "throughput": 1645.64, "total_tokens": 490000}