train_copa_123_1760637644 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1800
6079892 verified
{"current_steps": 5, "total_steps": 1800, "loss": 0.2968, "lr": 0.0006666666666666666, "epoch": 0.05555555555555555, "percentage": 0.28, "elapsed_time": "0:00:01", "remaining_time": "0:07:08", "throughput": 1287.27, "total_tokens": 1536}
{"current_steps": 10, "total_steps": 1800, "loss": 0.2567, "lr": 0.0015, "epoch": 0.1111111111111111, "percentage": 0.56, "elapsed_time": "0:00:01", "remaining_time": "0:05:53", "throughput": 1603.65, "total_tokens": 3168}
{"current_steps": 15, "total_steps": 1800, "loss": 1.6776, "lr": 0.002333333333333333, "epoch": 0.16666666666666666, "percentage": 0.83, "elapsed_time": "0:00:02", "remaining_time": "0:05:27", "throughput": 1718.86, "total_tokens": 4736}
{"current_steps": 20, "total_steps": 1800, "loss": 1.3771, "lr": 0.0031666666666666666, "epoch": 0.2222222222222222, "percentage": 1.11, "elapsed_time": "0:00:03", "remaining_time": "0:05:14", "throughput": 1782.61, "total_tokens": 6304}
{"current_steps": 25, "total_steps": 1800, "loss": 1.7454, "lr": 0.004, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:04", "remaining_time": "0:05:06", "throughput": 1817.5, "total_tokens": 7840}
{"current_steps": 30, "total_steps": 1800, "loss": 1.5788, "lr": 0.004833333333333334, "epoch": 0.3333333333333333, "percentage": 1.67, "elapsed_time": "0:00:05", "remaining_time": "0:05:00", "throughput": 1848.2, "total_tokens": 9408}
{"current_steps": 35, "total_steps": 1800, "loss": 0.7688, "lr": 0.005666666666666666, "epoch": 0.3888888888888889, "percentage": 1.94, "elapsed_time": "0:00:05", "remaining_time": "0:04:55", "throughput": 1860.54, "total_tokens": 10912}
{"current_steps": 40, "total_steps": 1800, "loss": 0.8306, "lr": 0.0065, "epoch": 0.4444444444444444, "percentage": 2.22, "elapsed_time": "0:00:06", "remaining_time": "0:04:52", "throughput": 1874.22, "total_tokens": 12448}
{"current_steps": 45, "total_steps": 1800, "loss": 2.603, "lr": 0.007333333333333333, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:07", "remaining_time": "0:04:49", "throughput": 1888.7, "total_tokens": 14016}
{"current_steps": 50, "total_steps": 1800, "loss": 1.2938, "lr": 0.008166666666666666, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:00:08", "remaining_time": "0:04:47", "throughput": 1900.45, "total_tokens": 15584}
{"current_steps": 55, "total_steps": 1800, "loss": 3.0021, "lr": 0.009, "epoch": 0.6111111111111112, "percentage": 3.06, "elapsed_time": "0:00:08", "remaining_time": "0:04:44", "throughput": 1913.15, "total_tokens": 17184}
{"current_steps": 60, "total_steps": 1800, "loss": 0.7343, "lr": 0.009833333333333333, "epoch": 0.6666666666666666, "percentage": 3.33, "elapsed_time": "0:00:09", "remaining_time": "0:04:43", "throughput": 1921.53, "total_tokens": 18752}
{"current_steps": 65, "total_steps": 1800, "loss": 1.0839, "lr": 0.010666666666666666, "epoch": 0.7222222222222222, "percentage": 3.61, "elapsed_time": "0:00:10", "remaining_time": "0:04:41", "throughput": 1930.99, "total_tokens": 20352}
{"current_steps": 70, "total_steps": 1800, "loss": 0.3506, "lr": 0.0115, "epoch": 0.7777777777777778, "percentage": 3.89, "elapsed_time": "0:00:11", "remaining_time": "0:04:39", "throughput": 1939.23, "total_tokens": 21952}
{"current_steps": 75, "total_steps": 1800, "loss": 1.6048, "lr": 0.012333333333333332, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:00:12", "remaining_time": "0:04:38", "throughput": 1939.33, "total_tokens": 23456}
{"current_steps": 80, "total_steps": 1800, "loss": 0.8232, "lr": 0.013166666666666667, "epoch": 0.8888888888888888, "percentage": 4.44, "elapsed_time": "0:00:12", "remaining_time": "0:04:36", "throughput": 1946.08, "total_tokens": 25056}
{"current_steps": 85, "total_steps": 1800, "loss": 0.2744, "lr": 0.014, "epoch": 0.9444444444444444, "percentage": 4.72, "elapsed_time": "0:00:13", "remaining_time": "0:04:35", "throughput": 1945.94, "total_tokens": 26560}
{"current_steps": 90, "total_steps": 1800, "loss": 0.3653, "lr": 0.014833333333333334, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:14", "remaining_time": "0:04:34", "throughput": 1943.29, "total_tokens": 28096}
{"current_steps": 90, "total_steps": 1800, "eval_loss": 0.2384539097547531, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:15", "remaining_time": "0:04:50", "throughput": 1840.44, "total_tokens": 28096}
{"current_steps": 95, "total_steps": 1800, "loss": 0.2454, "lr": 0.015666666666666666, "epoch": 1.0555555555555556, "percentage": 5.28, "elapsed_time": "0:00:16", "remaining_time": "0:05:02", "throughput": 1761.44, "total_tokens": 29696}
{"current_steps": 100, "total_steps": 1800, "loss": 0.2655, "lr": 0.0165, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:00:17", "remaining_time": "0:04:59", "throughput": 1770.64, "total_tokens": 31232}
{"current_steps": 105, "total_steps": 1800, "loss": 0.2555, "lr": 0.017333333333333333, "epoch": 1.1666666666666667, "percentage": 5.83, "elapsed_time": "0:00:18", "remaining_time": "0:04:57", "throughput": 1779.25, "total_tokens": 32768}
{"current_steps": 110, "total_steps": 1800, "loss": 0.2362, "lr": 0.018166666666666664, "epoch": 1.2222222222222223, "percentage": 6.11, "elapsed_time": "0:00:19", "remaining_time": "0:04:54", "throughput": 1787.22, "total_tokens": 34304}
{"current_steps": 115, "total_steps": 1800, "loss": 0.2449, "lr": 0.019, "epoch": 1.2777777777777777, "percentage": 6.39, "elapsed_time": "0:00:19", "remaining_time": "0:04:52", "throughput": 1795.67, "total_tokens": 35872}
{"current_steps": 120, "total_steps": 1800, "loss": 0.2431, "lr": 0.01983333333333333, "epoch": 1.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:20", "remaining_time": "0:04:50", "throughput": 1802.17, "total_tokens": 37408}
{"current_steps": 125, "total_steps": 1800, "loss": 0.2233, "lr": 0.020666666666666667, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:00:21", "remaining_time": "0:04:48", "throughput": 1809.7, "total_tokens": 38976}
{"current_steps": 130, "total_steps": 1800, "loss": 0.2557, "lr": 0.0215, "epoch": 1.4444444444444444, "percentage": 7.22, "elapsed_time": "0:00:22", "remaining_time": "0:04:46", "throughput": 1817.7, "total_tokens": 40576}
{"current_steps": 135, "total_steps": 1800, "loss": 0.2458, "lr": 0.022333333333333334, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:23", "remaining_time": "0:04:44", "throughput": 1824.21, "total_tokens": 42144}
{"current_steps": 140, "total_steps": 1800, "loss": 0.255, "lr": 0.023166666666666665, "epoch": 1.5555555555555556, "percentage": 7.78, "elapsed_time": "0:00:23", "remaining_time": "0:04:43", "throughput": 1828.89, "total_tokens": 43680}
{"current_steps": 145, "total_steps": 1800, "loss": 0.2535, "lr": 0.024, "epoch": 1.6111111111111112, "percentage": 8.06, "elapsed_time": "0:00:24", "remaining_time": "0:04:41", "throughput": 1834.64, "total_tokens": 45248}
{"current_steps": 150, "total_steps": 1800, "loss": 0.229, "lr": 0.024833333333333332, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:25", "remaining_time": "0:04:39", "throughput": 1839.91, "total_tokens": 46816}
{"current_steps": 155, "total_steps": 1800, "loss": 0.2385, "lr": 0.025666666666666664, "epoch": 1.7222222222222223, "percentage": 8.61, "elapsed_time": "0:00:26", "remaining_time": "0:04:38", "throughput": 1845.13, "total_tokens": 48384}
{"current_steps": 160, "total_steps": 1800, "loss": 0.2328, "lr": 0.0265, "epoch": 1.7777777777777777, "percentage": 8.89, "elapsed_time": "0:00:27", "remaining_time": "0:04:36", "throughput": 1849.94, "total_tokens": 49952}
{"current_steps": 165, "total_steps": 1800, "loss": 0.2373, "lr": 0.02733333333333333, "epoch": 1.8333333333333335, "percentage": 9.17, "elapsed_time": "0:00:27", "remaining_time": "0:04:35", "throughput": 1854.61, "total_tokens": 51520}
{"current_steps": 170, "total_steps": 1800, "loss": 0.2276, "lr": 0.028166666666666666, "epoch": 1.8888888888888888, "percentage": 9.44, "elapsed_time": "0:00:28", "remaining_time": "0:04:33", "throughput": 1857.07, "total_tokens": 53024}
{"current_steps": 175, "total_steps": 1800, "loss": 0.2308, "lr": 0.028999999999999998, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:00:29", "remaining_time": "0:04:32", "throughput": 1861.24, "total_tokens": 54592}
{"current_steps": 180, "total_steps": 1800, "loss": 0.2361, "lr": 0.029833333333333333, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:30", "remaining_time": "0:04:31", "throughput": 1861.18, "total_tokens": 56128}
{"current_steps": 180, "total_steps": 1800, "eval_loss": 0.23441705107688904, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:30", "remaining_time": "0:04:38", "throughput": 1812.01, "total_tokens": 56128}
{"current_steps": 185, "total_steps": 1800, "loss": 0.2325, "lr": 0.02999954871719651, "epoch": 2.0555555555555554, "percentage": 10.28, "elapsed_time": "0:00:32", "remaining_time": "0:04:44", "throughput": 1769.65, "total_tokens": 57696}
{"current_steps": 190, "total_steps": 1800, "loss": 0.2306, "lr": 0.029997715427345868, "epoch": 2.111111111111111, "percentage": 10.56, "elapsed_time": "0:00:33", "remaining_time": "0:04:42", "throughput": 1775.15, "total_tokens": 59264}
{"current_steps": 195, "total_steps": 1800, "loss": 0.2315, "lr": 0.02999447209750064, "epoch": 2.1666666666666665, "percentage": 10.83, "elapsed_time": "0:00:34", "remaining_time": "0:04:41", "throughput": 1781.24, "total_tokens": 60864}
{"current_steps": 200, "total_steps": 1800, "loss": 0.2266, "lr": 0.02998981903258893, "epoch": 2.2222222222222223, "percentage": 11.11, "elapsed_time": "0:00:34", "remaining_time": "0:04:39", "throughput": 1787.0, "total_tokens": 62464}
{"current_steps": 205, "total_steps": 1800, "loss": 0.238, "lr": 0.02998375667007787, "epoch": 2.2777777777777777, "percentage": 11.39, "elapsed_time": "0:00:35", "remaining_time": "0:04:38", "throughput": 1791.76, "total_tokens": 64032}
{"current_steps": 210, "total_steps": 1800, "loss": 0.2356, "lr": 0.029976285579932503, "epoch": 2.3333333333333335, "percentage": 11.67, "elapsed_time": "0:00:36", "remaining_time": "0:04:36", "throughput": 1795.57, "total_tokens": 65568}
{"current_steps": 215, "total_steps": 1800, "loss": 0.2319, "lr": 0.029967406464562214, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:37", "remaining_time": "0:04:34", "throughput": 1799.05, "total_tokens": 67104}
{"current_steps": 220, "total_steps": 1800, "loss": 0.2457, "lr": 0.02995712015875466, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:38", "remaining_time": "0:04:33", "throughput": 1804.03, "total_tokens": 68704}
{"current_steps": 225, "total_steps": 1800, "loss": 0.2266, "lr": 0.029945427629597305, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:38", "remaining_time": "0:04:32", "throughput": 1808.05, "total_tokens": 70272}
{"current_steps": 230, "total_steps": 1800, "loss": 0.2348, "lr": 0.029932329976386493, "epoch": 2.5555555555555554, "percentage": 12.78, "elapsed_time": "0:00:39", "remaining_time": "0:04:30", "throughput": 1811.13, "total_tokens": 71808}
{"current_steps": 235, "total_steps": 1800, "loss": 0.2186, "lr": 0.0299178284305241, "epoch": 2.611111111111111, "percentage": 13.06, "elapsed_time": "0:00:40", "remaining_time": "0:04:29", "throughput": 1815.78, "total_tokens": 73408}
{"current_steps": 240, "total_steps": 1800, "loss": 0.2477, "lr": 0.02990192435540175, "epoch": 2.6666666666666665, "percentage": 13.33, "elapsed_time": "0:00:41", "remaining_time": "0:04:27", "throughput": 1818.22, "total_tokens": 74912}
{"current_steps": 245, "total_steps": 1800, "loss": 0.2585, "lr": 0.029884619246272646, "epoch": 2.7222222222222223, "percentage": 13.61, "elapsed_time": "0:00:41", "remaining_time": "0:04:26", "throughput": 1823.26, "total_tokens": 76544}
{"current_steps": 250, "total_steps": 1800, "loss": 0.2395, "lr": 0.02986591473011098, "epoch": 2.7777777777777777, "percentage": 13.89, "elapsed_time": "0:00:42", "remaining_time": "0:04:25", "throughput": 1826.84, "total_tokens": 78112}
{"current_steps": 255, "total_steps": 1800, "loss": 0.2366, "lr": 0.02984581256545898, "epoch": 2.8333333333333335, "percentage": 14.17, "elapsed_time": "0:00:43", "remaining_time": "0:04:23", "throughput": 1830.9, "total_tokens": 79712}
{"current_steps": 260, "total_steps": 1800, "loss": 0.2284, "lr": 0.02982431464226157, "epoch": 2.888888888888889, "percentage": 14.44, "elapsed_time": "0:00:44", "remaining_time": "0:04:22", "throughput": 1834.2, "total_tokens": 81280}
{"current_steps": 265, "total_steps": 1800, "loss": 0.2426, "lr": 0.02980142298168869, "epoch": 2.9444444444444446, "percentage": 14.72, "elapsed_time": "0:00:45", "remaining_time": "0:04:21", "throughput": 1837.37, "total_tokens": 82848}
{"current_steps": 270, "total_steps": 1800, "loss": 0.244, "lr": 0.029777139735945243, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:45", "remaining_time": "0:04:20", "throughput": 1837.39, "total_tokens": 84352}
{"current_steps": 270, "total_steps": 1800, "eval_loss": 0.44520169496536255, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:46", "remaining_time": "0:04:25", "throughput": 1799.47, "total_tokens": 84352}
{"current_steps": 275, "total_steps": 1800, "loss": 6.4641, "lr": 0.029751467188068818, "epoch": 3.0555555555555554, "percentage": 15.28, "elapsed_time": "0:00:49", "remaining_time": "0:04:32", "throughput": 1748.18, "total_tokens": 85920}
{"current_steps": 280, "total_steps": 1800, "loss": 7.4452, "lr": 0.02972440775171496, "epoch": 3.111111111111111, "percentage": 15.56, "elapsed_time": "0:00:49", "remaining_time": "0:04:31", "throughput": 1751.98, "total_tokens": 87520}
{"current_steps": 285, "total_steps": 1800, "loss": 4.3594, "lr": 0.029695963970930307, "epoch": 3.1666666666666665, "percentage": 15.83, "elapsed_time": "0:00:50", "remaining_time": "0:04:29", "throughput": 1755.98, "total_tokens": 89088}
{"current_steps": 290, "total_steps": 1800, "loss": 3.2776, "lr": 0.029666138519913395, "epoch": 3.2222222222222223, "percentage": 16.11, "elapsed_time": "0:00:51", "remaining_time": "0:04:28", "throughput": 1760.52, "total_tokens": 90688}
{"current_steps": 295, "total_steps": 1800, "loss": 2.6029, "lr": 0.029634934202763214, "epoch": 3.2777777777777777, "percentage": 16.39, "elapsed_time": "0:00:52", "remaining_time": "0:04:26", "throughput": 1762.61, "total_tokens": 92160}
{"current_steps": 300, "total_steps": 1800, "loss": 1.9547, "lr": 0.0296023539532156, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:53", "remaining_time": "0:04:25", "throughput": 1766.72, "total_tokens": 93760}
{"current_steps": 305, "total_steps": 1800, "loss": 1.4217, "lr": 0.029568400834367403, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:00:53", "remaining_time": "0:04:23", "throughput": 1770.65, "total_tokens": 95360}
{"current_steps": 310, "total_steps": 1800, "loss": 0.9245, "lr": 0.02953307803838851, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:00:54", "remaining_time": "0:04:22", "throughput": 1774.01, "total_tokens": 96928}
{"current_steps": 315, "total_steps": 1800, "loss": 0.559, "lr": 0.02949638888622172, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:00:55", "remaining_time": "0:04:21", "throughput": 1776.83, "total_tokens": 98464}
{"current_steps": 320, "total_steps": 1800, "loss": 0.3566, "lr": 0.029458336827270518, "epoch": 3.5555555555555554, "percentage": 17.78, "elapsed_time": "0:00:56", "remaining_time": "0:04:19", "throughput": 1780.64, "total_tokens": 100064}
{"current_steps": 325, "total_steps": 1800, "loss": 0.2966, "lr": 0.029418925439074782, "epoch": 3.611111111111111, "percentage": 18.06, "elapsed_time": "0:00:56", "remaining_time": "0:04:18", "throughput": 1783.29, "total_tokens": 101600}
{"current_steps": 330, "total_steps": 1800, "loss": 0.2705, "lr": 0.029378158426974426, "epoch": 3.6666666666666665, "percentage": 18.33, "elapsed_time": "0:00:57", "remaining_time": "0:04:17", "throughput": 1786.92, "total_tokens": 103200}
{"current_steps": 335, "total_steps": 1800, "loss": 0.235, "lr": 0.029336039623761044, "epoch": 3.7222222222222223, "percentage": 18.61, "elapsed_time": "0:00:58", "remaining_time": "0:04:15", "throughput": 1789.91, "total_tokens": 104768}
{"current_steps": 340, "total_steps": 1800, "loss": 0.2792, "lr": 0.02929257298931754, "epoch": 3.7777777777777777, "percentage": 18.89, "elapsed_time": "0:00:59", "remaining_time": "0:04:14", "throughput": 1792.38, "total_tokens": 106304}
{"current_steps": 345, "total_steps": 1800, "loss": 0.2916, "lr": 0.02924776261024586, "epoch": 3.8333333333333335, "percentage": 19.17, "elapsed_time": "0:01:00", "remaining_time": "0:04:13", "throughput": 1795.54, "total_tokens": 107904}
{"current_steps": 350, "total_steps": 1800, "loss": 0.2437, "lr": 0.02920161269948277, "epoch": 3.888888888888889, "percentage": 19.44, "elapsed_time": "0:01:00", "remaining_time": "0:04:12", "throughput": 1797.48, "total_tokens": 109408}
{"current_steps": 355, "total_steps": 1800, "loss": 0.2606, "lr": 0.029154127595903752, "epoch": 3.9444444444444446, "percentage": 19.72, "elapsed_time": "0:01:01", "remaining_time": "0:04:10", "throughput": 1800.8, "total_tokens": 111008}
{"current_steps": 360, "total_steps": 1800, "loss": 0.2377, "lr": 0.029105311763915113, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:02", "remaining_time": "0:04:09", "throughput": 1802.2, "total_tokens": 112576}
{"current_steps": 360, "total_steps": 1800, "eval_loss": 0.23659464716911316, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:03", "remaining_time": "0:04:13", "throughput": 1779.09, "total_tokens": 112576}
{"current_steps": 365, "total_steps": 1800, "loss": 0.2349, "lr": 0.029055169793034224, "epoch": 4.055555555555555, "percentage": 20.28, "elapsed_time": "0:01:04", "remaining_time": "0:04:15", "throughput": 1759.55, "total_tokens": 114144}
{"current_steps": 370, "total_steps": 1800, "loss": 0.2377, "lr": 0.029003706397458022, "epoch": 4.111111111111111, "percentage": 20.56, "elapsed_time": "0:01:05", "remaining_time": "0:04:13", "throughput": 1761.93, "total_tokens": 115712}
{"current_steps": 375, "total_steps": 1800, "loss": 0.2551, "lr": 0.028950926415619846, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:01:06", "remaining_time": "0:04:12", "throughput": 1763.95, "total_tokens": 117216}
{"current_steps": 380, "total_steps": 1800, "loss": 0.2669, "lr": 0.028896834809734474, "epoch": 4.222222222222222, "percentage": 21.11, "elapsed_time": "0:01:07", "remaining_time": "0:04:11", "throughput": 1767.31, "total_tokens": 118816}
{"current_steps": 385, "total_steps": 1800, "loss": 0.2195, "lr": 0.028841436665331635, "epoch": 4.277777777777778, "percentage": 21.39, "elapsed_time": "0:01:08", "remaining_time": "0:04:09", "throughput": 1769.68, "total_tokens": 120352}
{"current_steps": 390, "total_steps": 1800, "loss": 0.3196, "lr": 0.02878473719077787, "epoch": 4.333333333333333, "percentage": 21.67, "elapsed_time": "0:01:08", "remaining_time": "0:04:08", "throughput": 1772.45, "total_tokens": 121920}
{"current_steps": 395, "total_steps": 1800, "loss": 0.2416, "lr": 0.028726741716786866, "epoch": 4.388888888888889, "percentage": 21.94, "elapsed_time": "0:01:09", "remaining_time": "0:04:07", "throughput": 1774.72, "total_tokens": 123456}
{"current_steps": 400, "total_steps": 1800, "loss": 0.2265, "lr": 0.02866745569591825, "epoch": 4.444444444444445, "percentage": 22.22, "elapsed_time": "0:01:10", "remaining_time": "0:04:06", "throughput": 1777.75, "total_tokens": 125056}
{"current_steps": 405, "total_steps": 1800, "loss": 0.2598, "lr": 0.028606884702065006, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:01:11", "remaining_time": "0:04:04", "throughput": 1780.34, "total_tokens": 126624}
{"current_steps": 410, "total_steps": 1800, "loss": 0.31, "lr": 0.028545034429929377, "epoch": 4.555555555555555, "percentage": 22.78, "elapsed_time": "0:01:11", "remaining_time": "0:04:03", "throughput": 1783.26, "total_tokens": 128224}
{"current_steps": 415, "total_steps": 1800, "loss": 0.2659, "lr": 0.028481910694487505, "epoch": 4.611111111111111, "percentage": 23.06, "elapsed_time": "0:01:12", "remaining_time": "0:04:02", "throughput": 1784.94, "total_tokens": 129728}
{"current_steps": 420, "total_steps": 1800, "loss": 0.2464, "lr": 0.02841751943044271, "epoch": 4.666666666666667, "percentage": 23.33, "elapsed_time": "0:01:13", "remaining_time": "0:04:01", "throughput": 1787.74, "total_tokens": 131328}
{"current_steps": 425, "total_steps": 1800, "loss": 0.2335, "lr": 0.028351866691667543, "epoch": 4.722222222222222, "percentage": 23.61, "elapsed_time": "0:01:14", "remaining_time": "0:04:00", "throughput": 1790.15, "total_tokens": 132896}
{"current_steps": 430, "total_steps": 1800, "loss": 0.2379, "lr": 0.02828495865063459, "epoch": 4.777777777777778, "percentage": 23.89, "elapsed_time": "0:01:15", "remaining_time": "0:03:59", "throughput": 1792.85, "total_tokens": 134496}
{"current_steps": 435, "total_steps": 1800, "loss": 0.2369, "lr": 0.028216801597836176, "epoch": 4.833333333333333, "percentage": 24.17, "elapsed_time": "0:01:15", "remaining_time": "0:03:57", "throughput": 1795.21, "total_tokens": 136064}
{"current_steps": 440, "total_steps": 1800, "loss": 0.2991, "lr": 0.028147401941192952, "epoch": 4.888888888888889, "percentage": 24.44, "elapsed_time": "0:01:16", "remaining_time": "0:03:56", "throughput": 1797.83, "total_tokens": 137664}
{"current_steps": 445, "total_steps": 1800, "loss": 0.2247, "lr": 0.028076766205451433, "epoch": 4.944444444444445, "percentage": 24.72, "elapsed_time": "0:01:17", "remaining_time": "0:03:55", "throughput": 1800.03, "total_tokens": 139232}
{"current_steps": 450, "total_steps": 1800, "loss": 0.2286, "lr": 0.028004901031570568, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:18", "remaining_time": "0:03:54", "throughput": 1801.38, "total_tokens": 140832}
{"current_steps": 450, "total_steps": 1800, "eval_loss": 0.2611430585384369, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:18", "remaining_time": "0:03:56", "throughput": 1782.8, "total_tokens": 140832}
{"current_steps": 455, "total_steps": 1800, "loss": 0.2677, "lr": 0.027931813176097366, "epoch": 5.055555555555555, "percentage": 25.28, "elapsed_time": "0:01:20", "remaining_time": "0:03:58", "throughput": 1766.4, "total_tokens": 142368}
{"current_steps": 460, "total_steps": 1800, "loss": 0.2321, "lr": 0.027857509510531685, "epoch": 5.111111111111111, "percentage": 25.56, "elapsed_time": "0:01:21", "remaining_time": "0:03:57", "throughput": 1768.76, "total_tokens": 144000}
{"current_steps": 465, "total_steps": 1800, "loss": 0.2587, "lr": 0.02778199702068017, "epoch": 5.166666666666667, "percentage": 25.83, "elapsed_time": "0:01:22", "remaining_time": "0:03:55", "throughput": 1771.67, "total_tokens": 145632}
{"current_steps": 470, "total_steps": 1800, "loss": 0.2351, "lr": 0.02770528280599949, "epoch": 5.222222222222222, "percentage": 26.11, "elapsed_time": "0:01:22", "remaining_time": "0:03:54", "throughput": 1773.45, "total_tokens": 147168}
{"current_steps": 475, "total_steps": 1800, "loss": 0.2364, "lr": 0.02762737407892886, "epoch": 5.277777777777778, "percentage": 26.39, "elapsed_time": "0:01:23", "remaining_time": "0:03:53", "throughput": 1775.6, "total_tokens": 148736}
{"current_steps": 480, "total_steps": 1800, "loss": 0.2289, "lr": 0.02754827816421195, "epoch": 5.333333333333333, "percentage": 26.67, "elapsed_time": "0:01:24", "remaining_time": "0:03:52", "throughput": 1777.75, "total_tokens": 150304}
{"current_steps": 485, "total_steps": 1800, "loss": 0.2796, "lr": 0.02746800249820822, "epoch": 5.388888888888889, "percentage": 26.94, "elapsed_time": "0:01:25", "remaining_time": "0:03:51", "throughput": 1779.78, "total_tokens": 151872}
{"current_steps": 490, "total_steps": 1800, "loss": 0.2706, "lr": 0.027386554628193813, "epoch": 5.444444444444445, "percentage": 27.22, "elapsed_time": "0:01:26", "remaining_time": "0:03:50", "throughput": 1782.16, "total_tokens": 153472}
{"current_steps": 495, "total_steps": 1800, "loss": 0.2109, "lr": 0.027303942211651937, "epoch": 5.5, "percentage": 27.5, "elapsed_time": "0:01:26", "remaining_time": "0:03:49", "throughput": 1783.52, "total_tokens": 154976}
{"current_steps": 500, "total_steps": 1800, "loss": 0.2339, "lr": 0.02722017301555297, "epoch": 5.555555555555555, "percentage": 27.78, "elapsed_time": "0:01:27", "remaining_time": "0:03:47", "throughput": 1785.55, "total_tokens": 156544}
{"current_steps": 505, "total_steps": 1800, "loss": 0.242, "lr": 0.02713525491562421, "epoch": 5.611111111111111, "percentage": 28.06, "elapsed_time": "0:01:28", "remaining_time": "0:03:46", "throughput": 1787.56, "total_tokens": 158112}
{"current_steps": 510, "total_steps": 1800, "loss": 0.227, "lr": 0.027049195895609432, "epoch": 5.666666666666667, "percentage": 28.33, "elapsed_time": "0:01:29", "remaining_time": "0:03:45", "throughput": 1789.54, "total_tokens": 159680}
{"current_steps": 515, "total_steps": 1800, "loss": 0.2644, "lr": 0.026962004046518273, "epoch": 5.722222222222222, "percentage": 28.61, "elapsed_time": "0:01:30", "remaining_time": "0:03:44", "throughput": 1792.04, "total_tokens": 161312}
{"current_steps": 520, "total_steps": 1800, "loss": 0.2543, "lr": 0.02687368756586555, "epoch": 5.777777777777778, "percentage": 28.89, "elapsed_time": "0:01:30", "remaining_time": "0:03:43", "throughput": 1793.93, "total_tokens": 162880}
{"current_steps": 525, "total_steps": 1800, "loss": 0.2596, "lr": 0.02678425475690055, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:01:31", "remaining_time": "0:03:42", "throughput": 1795.85, "total_tokens": 164448}
{"current_steps": 530, "total_steps": 1800, "loss": 0.2473, "lr": 0.02669371402782638, "epoch": 5.888888888888889, "percentage": 29.44, "elapsed_time": "0:01:32", "remaining_time": "0:03:41", "throughput": 1797.7, "total_tokens": 166016}
{"current_steps": 535, "total_steps": 1800, "loss": 0.2558, "lr": 0.026602073891009458, "epoch": 5.944444444444445, "percentage": 29.72, "elapsed_time": "0:01:33", "remaining_time": "0:03:40", "throughput": 1799.24, "total_tokens": 167552}
{"current_steps": 540, "total_steps": 1800, "loss": 0.2333, "lr": 0.0265093429621792, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:33", "remaining_time": "0:03:39", "throughput": 1799.57, "total_tokens": 169056}
{"current_steps": 540, "total_steps": 1800, "eval_loss": 0.2368834763765335, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:34", "remaining_time": "0:03:41", "throughput": 1784.2, "total_tokens": 169056}
{"current_steps": 545, "total_steps": 1800, "loss": 0.2444, "lr": 0.026415529959618007, "epoch": 6.055555555555555, "percentage": 30.28, "elapsed_time": "0:01:36", "remaining_time": "0:03:42", "throughput": 1768.12, "total_tokens": 170592}
{"current_steps": 550, "total_steps": 1800, "loss": 0.2385, "lr": 0.02632064370334158, "epoch": 6.111111111111111, "percentage": 30.56, "elapsed_time": "0:01:37", "remaining_time": "0:03:41", "throughput": 1770.11, "total_tokens": 172192}
{"current_steps": 555, "total_steps": 1800, "loss": 0.2371, "lr": 0.026224693114269705, "epoch": 6.166666666666667, "percentage": 30.83, "elapsed_time": "0:01:38", "remaining_time": "0:03:39", "throughput": 1772.39, "total_tokens": 173792}
{"current_steps": 560, "total_steps": 1800, "loss": 0.2344, "lr": 0.02612768721338753, "epoch": 6.222222222222222, "percentage": 31.11, "elapsed_time": "0:01:38", "remaining_time": "0:03:38", "throughput": 1774.34, "total_tokens": 175360}
{"current_steps": 565, "total_steps": 1800, "loss": 0.2442, "lr": 0.02602963512089743, "epoch": 6.277777777777778, "percentage": 31.39, "elapsed_time": "0:01:39", "remaining_time": "0:03:37", "throughput": 1776.81, "total_tokens": 176992}
{"current_steps": 570, "total_steps": 1800, "loss": 0.2366, "lr": 0.025930546055361575, "epoch": 6.333333333333333, "percentage": 31.67, "elapsed_time": "0:01:40", "remaining_time": "0:03:36", "throughput": 1778.98, "total_tokens": 178592}
{"current_steps": 575, "total_steps": 1800, "loss": 0.2222, "lr": 0.025830429332835202, "epoch": 6.388888888888889, "percentage": 31.94, "elapsed_time": "0:01:41", "remaining_time": "0:03:35", "throughput": 1780.53, "total_tokens": 180128}
{"current_steps": 580, "total_steps": 1800, "loss": 0.2531, "lr": 0.025729294365990772, "epoch": 6.444444444444445, "percentage": 32.22, "elapsed_time": "0:01:41", "remaining_time": "0:03:34", "throughput": 1781.81, "total_tokens": 181632}
{"current_steps": 585, "total_steps": 1800, "loss": 0.2647, "lr": 0.025627150663232998, "epoch": 6.5, "percentage": 32.5, "elapsed_time": "0:01:42", "remaining_time": "0:03:33", "throughput": 1783.54, "total_tokens": 183200}
{"current_steps": 590, "total_steps": 1800, "loss": 0.255, "lr": 0.025524007827804902, "epoch": 6.555555555555555, "percentage": 32.78, "elapsed_time": "0:01:43", "remaining_time": "0:03:32", "throughput": 1785.59, "total_tokens": 184800}
{"current_steps": 595, "total_steps": 1800, "loss": 0.2067, "lr": 0.025419875556884956, "epoch": 6.611111111111111, "percentage": 33.06, "elapsed_time": "0:01:44", "remaining_time": "0:03:31", "throughput": 1787.01, "total_tokens": 186336}
{"current_steps": 600, "total_steps": 1800, "loss": 0.2494, "lr": 0.025314763640675374, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:45", "remaining_time": "0:03:30", "throughput": 1788.67, "total_tokens": 187904}
{"current_steps": 605, "total_steps": 1800, "loss": 0.2507, "lr": 0.025208681961481655, "epoch": 6.722222222222222, "percentage": 33.61, "elapsed_time": "0:01:45", "remaining_time": "0:03:29", "throughput": 1790.31, "total_tokens": 189472}
{"current_steps": 610, "total_steps": 1800, "loss": 0.2357, "lr": 0.025101640492783503, "epoch": 6.777777777777778, "percentage": 33.89, "elapsed_time": "0:01:46", "remaining_time": "0:03:27", "throughput": 1792.19, "total_tokens": 191072}
{"current_steps": 615, "total_steps": 1800, "loss": 0.2406, "lr": 0.024993649298297137, "epoch": 6.833333333333333, "percentage": 34.17, "elapsed_time": "0:01:47", "remaining_time": "0:03:26", "throughput": 1793.58, "total_tokens": 192608}
{"current_steps": 620, "total_steps": 1800, "loss": 0.2365, "lr": 0.02488471853102912, "epoch": 6.888888888888889, "percentage": 34.44, "elapsed_time": "0:01:48", "remaining_time": "0:03:25", "throughput": 1795.43, "total_tokens": 194208}
{"current_steps": 625, "total_steps": 1800, "loss": 0.2368, "lr": 0.024774858432321828, "epoch": 6.944444444444445, "percentage": 34.72, "elapsed_time": "0:01:48", "remaining_time": "0:03:24", "throughput": 1796.97, "total_tokens": 195776}
{"current_steps": 630, "total_steps": 1800, "loss": 0.208, "lr": 0.024664079330890574, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:49", "remaining_time": "0:03:23", "throughput": 1797.77, "total_tokens": 197344}
{"current_steps": 630, "total_steps": 1800, "eval_loss": 0.2651023268699646, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:50", "remaining_time": "0:03:25", "throughput": 1784.58, "total_tokens": 197344}
{"current_steps": 635, "total_steps": 1800, "loss": 0.2352, "lr": 0.02455239164185254, "epoch": 7.055555555555555, "percentage": 35.28, "elapsed_time": "0:01:52", "remaining_time": "0:03:25", "throughput": 1773.73, "total_tokens": 198944}
{"current_steps": 640, "total_steps": 1800, "loss": 0.2671, "lr": 0.024439805865747562, "epoch": 7.111111111111111, "percentage": 35.56, "elapsed_time": "0:01:52", "remaining_time": "0:03:24", "throughput": 1774.74, "total_tokens": 200512}
{"current_steps": 645, "total_steps": 1800, "loss": 0.2712, "lr": 0.02432633258755093, "epoch": 7.166666666666667, "percentage": 35.83, "elapsed_time": "0:01:53", "remaining_time": "0:03:23", "throughput": 1775.92, "total_tokens": 202016}
{"current_steps": 650, "total_steps": 1800, "loss": 0.2298, "lr": 0.024211982475678205, "epoch": 7.222222222222222, "percentage": 36.11, "elapsed_time": "0:01:54", "remaining_time": "0:03:22", "throughput": 1778.04, "total_tokens": 203648}
{"current_steps": 655, "total_steps": 1800, "loss": 0.2127, "lr": 0.024096766280982205, "epoch": 7.277777777777778, "percentage": 36.39, "elapsed_time": "0:01:55", "remaining_time": "0:03:21", "throughput": 1779.4, "total_tokens": 205184}
{"current_steps": 660, "total_steps": 1800, "loss": 0.2597, "lr": 0.023980694835742226, "epoch": 7.333333333333333, "percentage": 36.67, "elapsed_time": "0:01:56", "remaining_time": "0:03:20", "throughput": 1780.76, "total_tokens": 206720}
{"current_steps": 665, "total_steps": 1800, "loss": 0.2177, "lr": 0.023863779052645667, "epoch": 7.388888888888889, "percentage": 36.94, "elapsed_time": "0:01:56", "remaining_time": "0:03:19", "throughput": 1782.58, "total_tokens": 208320}
{"current_steps": 670, "total_steps": 1800, "loss": 0.2513, "lr": 0.02374602992376202, "epoch": 7.444444444444445, "percentage": 37.22, "elapsed_time": "0:01:57", "remaining_time": "0:03:18", "throughput": 1783.88, "total_tokens": 209856}
{"current_steps": 675, "total_steps": 1800, "loss": 0.2315, "lr": 0.023627458519509432, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:01:58", "remaining_time": "0:03:17", "throughput": 1785.19, "total_tokens": 211392}
{"current_steps": 680, "total_steps": 1800, "loss": 0.2372, "lr": 0.023508075987613904, "epoch": 7.555555555555555, "percentage": 37.78, "elapsed_time": "0:01:59", "remaining_time": "0:03:16", "throughput": 1786.69, "total_tokens": 212960}
{"current_steps": 685, "total_steps": 1800, "loss": 0.2477, "lr": 0.023387893552061202, "epoch": 7.611111111111111, "percentage": 38.06, "elapsed_time": "0:01:59", "remaining_time": "0:03:15", "throughput": 1788.16, "total_tokens": 214528}
{"current_steps": 690, "total_steps": 1800, "loss": 0.229, "lr": 0.023266922512041644, "epoch": 7.666666666666667, "percentage": 38.33, "elapsed_time": "0:02:00", "remaining_time": "0:03:14", "throughput": 1788.91, "total_tokens": 216000}
{"current_steps": 695, "total_steps": 1800, "loss": 0.2372, "lr": 0.023145174240887748, "epoch": 7.722222222222222, "percentage": 38.61, "elapsed_time": "0:02:01", "remaining_time": "0:03:13", "throughput": 1790.78, "total_tokens": 217632}
{"current_steps": 700, "total_steps": 1800, "loss": 0.2365, "lr": 0.023022660185004967, "epoch": 7.777777777777778, "percentage": 38.89, "elapsed_time": "0:02:02", "remaining_time": "0:03:12", "throughput": 1792.4, "total_tokens": 219232}
{"current_steps": 705, "total_steps": 1800, "loss": 0.2369, "lr": 0.02289939186279551, "epoch": 7.833333333333333, "percentage": 39.17, "elapsed_time": "0:02:03", "remaining_time": "0:03:11", "throughput": 1793.79, "total_tokens": 220800}
{"current_steps": 710, "total_steps": 1800, "loss": 0.23, "lr": 0.022775380863575456, "epoch": 7.888888888888889, "percentage": 39.44, "elapsed_time": "0:02:03", "remaining_time": "0:03:10", "throughput": 1795.14, "total_tokens": 222368}
{"current_steps": 715, "total_steps": 1800, "loss": 0.2314, "lr": 0.02265063884648513, "epoch": 7.944444444444445, "percentage": 39.72, "elapsed_time": "0:02:04", "remaining_time": "0:03:09", "throughput": 1796.74, "total_tokens": 223968}
{"current_steps": 720, "total_steps": 1800, "loss": 0.2633, "lr": 0.022525177539392937, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:05", "remaining_time": "0:03:08", "throughput": 1797.45, "total_tokens": 225536}
{"current_steps": 720, "total_steps": 1800, "eval_loss": 0.2546592950820923, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:06", "remaining_time": "0:03:09", "throughput": 1785.89, "total_tokens": 225536}
{"current_steps": 725, "total_steps": 1800, "loss": 0.2551, "lr": 0.02239900873779278, "epoch": 8.055555555555555, "percentage": 40.28, "elapsed_time": "0:02:07", "remaining_time": "0:03:09", "throughput": 1776.7, "total_tokens": 227168}
{"current_steps": 730, "total_steps": 1800, "loss": 0.2374, "lr": 0.022272144303695056, "epoch": 8.11111111111111, "percentage": 40.56, "elapsed_time": "0:02:08", "remaining_time": "0:03:08", "throughput": 1777.61, "total_tokens": 228704}
{"current_steps": 735, "total_steps": 1800, "loss": 0.2338, "lr": 0.02214459616451143, "epoch": 8.166666666666666, "percentage": 40.83, "elapsed_time": "0:02:09", "remaining_time": "0:03:07", "throughput": 1779.47, "total_tokens": 230336}
{"current_steps": 740, "total_steps": 1800, "loss": 0.2364, "lr": 0.02201637631193346, "epoch": 8.222222222222221, "percentage": 41.11, "elapsed_time": "0:02:10", "remaining_time": "0:03:06", "throughput": 1781.12, "total_tokens": 231936}
{"current_steps": 745, "total_steps": 1800, "loss": 0.2331, "lr": 0.021887496800805175, "epoch": 8.277777777777779, "percentage": 41.39, "elapsed_time": "0:02:10", "remaining_time": "0:03:05", "throughput": 1782.32, "total_tokens": 233472}
{"current_steps": 750, "total_steps": 1800, "loss": 0.2333, "lr": 0.021757969747989707, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:02:11", "remaining_time": "0:03:04", "throughput": 1783.72, "total_tokens": 235040}
{"current_steps": 755, "total_steps": 1800, "loss": 0.2476, "lr": 0.02162780733123012, "epoch": 8.38888888888889, "percentage": 41.94, "elapsed_time": "0:02:12", "remaining_time": "0:03:03", "throughput": 1785.07, "total_tokens": 236608}
{"current_steps": 760, "total_steps": 1800, "loss": 0.2377, "lr": 0.021497021788004445, "epoch": 8.444444444444445, "percentage": 42.22, "elapsed_time": "0:02:13", "remaining_time": "0:03:02", "throughput": 1786.21, "total_tokens": 238144}
{"current_steps": 765, "total_steps": 1800, "loss": 0.2348, "lr": 0.021365625414375228, "epoch": 8.5, "percentage": 42.5, "elapsed_time": "0:02:14", "remaining_time": "0:03:01", "throughput": 1787.33, "total_tokens": 239680}
{"current_steps": 770, "total_steps": 1800, "loss": 0.2233, "lr": 0.021233630563833435, "epoch": 8.555555555555555, "percentage": 42.78, "elapsed_time": "0:02:14", "remaining_time": "0:03:00", "throughput": 1788.85, "total_tokens": 241280}
{"current_steps": 775, "total_steps": 1800, "loss": 0.2365, "lr": 0.021101049646137005, "epoch": 8.61111111111111, "percentage": 43.06, "elapsed_time": "0:02:15", "remaining_time": "0:02:59", "throughput": 1789.94, "total_tokens": 242816}
{"current_steps": 780, "total_steps": 1800, "loss": 0.2505, "lr": 0.02096789512614417, "epoch": 8.666666666666666, "percentage": 43.33, "elapsed_time": "0:02:16", "remaining_time": "0:02:58", "throughput": 1791.0, "total_tokens": 244352}
{"current_steps": 785, "total_steps": 1800, "loss": 0.2283, "lr": 0.020834179522641504, "epoch": 8.722222222222221, "percentage": 43.61, "elapsed_time": "0:02:17", "remaining_time": "0:02:57", "throughput": 1791.86, "total_tokens": 245856}
{"current_steps": 790, "total_steps": 1800, "loss": 0.2357, "lr": 0.020699915407166987, "epoch": 8.777777777777779, "percentage": 43.89, "elapsed_time": "0:02:17", "remaining_time": "0:02:56", "throughput": 1793.28, "total_tokens": 247456}
{"current_steps": 795, "total_steps": 1800, "loss": 0.2294, "lr": 0.020565115402828002, "epoch": 8.833333333333334, "percentage": 44.17, "elapsed_time": "0:02:18", "remaining_time": "0:02:55", "throughput": 1794.3, "total_tokens": 248992}
{"current_steps": 800, "total_steps": 1800, "loss": 0.2302, "lr": 0.02042979218311462, "epoch": 8.88888888888889, "percentage": 44.44, "elapsed_time": "0:02:19", "remaining_time": "0:02:54", "throughput": 1795.33, "total_tokens": 250528}
{"current_steps": 805, "total_steps": 1800, "loss": 0.2433, "lr": 0.02029395847070803, "epoch": 8.944444444444445, "percentage": 44.72, "elapsed_time": "0:02:20", "remaining_time": "0:02:53", "throughput": 1796.76, "total_tokens": 252128}
{"current_steps": 810, "total_steps": 1800, "loss": 0.2563, "lr": 0.020157627036284417, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:21", "remaining_time": "0:02:52", "throughput": 1797.38, "total_tokens": 253696}
{"current_steps": 810, "total_steps": 1800, "eval_loss": 0.23346838355064392, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:21", "remaining_time": "0:02:53", "throughput": 1787.09, "total_tokens": 253696}
{"current_steps": 815, "total_steps": 1800, "loss": 0.2294, "lr": 0.02002081069731427, "epoch": 9.055555555555555, "percentage": 45.28, "elapsed_time": "0:02:24", "remaining_time": "0:02:54", "throughput": 1771.02, "total_tokens": 255296}
{"current_steps": 820, "total_steps": 1800, "loss": 0.2305, "lr": 0.01988352231685735, "epoch": 9.11111111111111, "percentage": 45.56, "elapsed_time": "0:02:24", "remaining_time": "0:02:53", "throughput": 1772.0, "total_tokens": 256896}
{"current_steps": 825, "total_steps": 1800, "loss": 0.2297, "lr": 0.019745774802353344, "epoch": 9.166666666666666, "percentage": 45.83, "elapsed_time": "0:02:25", "remaining_time": "0:02:52", "throughput": 1773.07, "total_tokens": 258432}
{"current_steps": 830, "total_steps": 1800, "loss": 0.2386, "lr": 0.019607581104408342, "epoch": 9.222222222222221, "percentage": 46.11, "elapsed_time": "0:02:26", "remaining_time": "0:02:51", "throughput": 1774.32, "total_tokens": 260000}
{"current_steps": 835, "total_steps": 1800, "loss": 0.229, "lr": 0.019468954215577226, "epoch": 9.277777777777779, "percentage": 46.39, "elapsed_time": "0:02:27", "remaining_time": "0:02:50", "throughput": 1775.59, "total_tokens": 261568}
{"current_steps": 840, "total_steps": 1800, "loss": 0.2104, "lr": 0.01932990716914222, "epoch": 9.333333333333334, "percentage": 46.67, "elapsed_time": "0:02:28", "remaining_time": "0:02:49", "throughput": 1777.2, "total_tokens": 263200}
{"current_steps": 845, "total_steps": 1800, "loss": 0.3207, "lr": 0.019190453037887464, "epoch": 9.38888888888889, "percentage": 46.94, "elapsed_time": "0:02:28", "remaining_time": "0:02:48", "throughput": 1778.25, "total_tokens": 264736}
{"current_steps": 850, "total_steps": 1800, "loss": 0.2552, "lr": 0.019050604932870013, "epoch": 9.444444444444445, "percentage": 47.22, "elapsed_time": "0:02:29", "remaining_time": "0:02:47", "throughput": 1779.5, "total_tokens": 266304}
{"current_steps": 855, "total_steps": 1800, "loss": 0.2376, "lr": 0.01891037600218712, "epoch": 9.5, "percentage": 47.5, "elapsed_time": "0:02:30", "remaining_time": "0:02:46", "throughput": 1780.53, "total_tokens": 267840}
{"current_steps": 860, "total_steps": 1800, "loss": 0.2519, "lr": 0.018769779429740154, "epoch": 9.555555555555555, "percentage": 47.78, "elapsed_time": "0:02:31", "remaining_time": "0:02:45", "throughput": 1781.56, "total_tokens": 269376}
{"current_steps": 865, "total_steps": 1800, "loss": 0.2375, "lr": 0.018628828433995014, "epoch": 9.61111111111111, "percentage": 48.06, "elapsed_time": "0:02:31", "remaining_time": "0:02:44", "throughput": 1782.74, "total_tokens": 270944}
{"current_steps": 870, "total_steps": 1800, "loss": 0.2336, "lr": 0.018487536266739445, "epoch": 9.666666666666666, "percentage": 48.33, "elapsed_time": "0:02:32", "remaining_time": "0:02:43", "throughput": 1783.56, "total_tokens": 272448}
{"current_steps": 875, "total_steps": 1800, "loss": 0.2384, "lr": 0.01834591621183709, "epoch": 9.722222222222221, "percentage": 48.61, "elapsed_time": "0:02:33", "remaining_time": "0:02:42", "throughput": 1784.52, "total_tokens": 273984}
{"current_steps": 880, "total_steps": 1800, "loss": 0.2333, "lr": 0.018203981583978603, "epoch": 9.777777777777779, "percentage": 48.89, "elapsed_time": "0:02:34", "remaining_time": "0:02:41", "throughput": 1785.46, "total_tokens": 275520}
{"current_steps": 885, "total_steps": 1800, "loss": 0.2297, "lr": 0.018061745727429836, "epoch": 9.833333333333334, "percentage": 49.17, "elapsed_time": "0:02:35", "remaining_time": "0:02:40", "throughput": 1786.97, "total_tokens": 277152}
{"current_steps": 890, "total_steps": 1800, "loss": 0.2373, "lr": 0.017919222014777265, "epoch": 9.88888888888889, "percentage": 49.44, "elapsed_time": "0:02:35", "remaining_time": "0:02:39", "throughput": 1787.93, "total_tokens": 278688}
{"current_steps": 895, "total_steps": 1800, "loss": 0.2415, "lr": 0.017776423845670717, "epoch": 9.944444444444445, "percentage": 49.72, "elapsed_time": "0:02:36", "remaining_time": "0:02:38", "throughput": 1789.05, "total_tokens": 280256}
{"current_steps": 900, "total_steps": 1800, "loss": 0.2474, "lr": 0.0176333646455636, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:37", "remaining_time": "0:02:37", "throughput": 1789.82, "total_tokens": 281856}
{"current_steps": 900, "total_steps": 1800, "eval_loss": 0.23486152291297913, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:38", "remaining_time": "0:02:38", "throughput": 1780.65, "total_tokens": 281856}
{"current_steps": 905, "total_steps": 1800, "loss": 0.231, "lr": 0.017490057864450664, "epoch": 10.055555555555555, "percentage": 50.28, "elapsed_time": "0:02:39", "remaining_time": "0:02:38", "throughput": 1772.63, "total_tokens": 283424}
{"current_steps": 910, "total_steps": 1800, "loss": 0.2475, "lr": 0.017346516975603462, "epoch": 10.11111111111111, "percentage": 50.56, "elapsed_time": "0:02:40", "remaining_time": "0:02:37", "throughput": 1773.27, "total_tokens": 284960}
{"current_steps": 915, "total_steps": 1800, "loss": 0.2488, "lr": 0.017202755474303683, "epoch": 10.166666666666666, "percentage": 50.83, "elapsed_time": "0:02:41", "remaining_time": "0:02:36", "throughput": 1774.24, "total_tokens": 286496}
{"current_steps": 920, "total_steps": 1800, "loss": 0.2386, "lr": 0.017058786876574313, "epoch": 10.222222222222221, "percentage": 51.11, "elapsed_time": "0:02:42", "remaining_time": "0:02:35", "throughput": 1775.4, "total_tokens": 288064}
{"current_steps": 925, "total_steps": 1800, "loss": 0.2246, "lr": 0.016914624717908923, "epoch": 10.277777777777779, "percentage": 51.39, "elapsed_time": "0:02:43", "remaining_time": "0:02:34", "throughput": 1776.7, "total_tokens": 289664}
{"current_steps": 930, "total_steps": 1800, "loss": 0.2356, "lr": 0.016770282551999093, "epoch": 10.333333333333334, "percentage": 51.67, "elapsed_time": "0:02:43", "remaining_time": "0:02:33", "throughput": 1777.99, "total_tokens": 291264}
{"current_steps": 935, "total_steps": 1800, "loss": 0.2215, "lr": 0.01662577394946016, "epoch": 10.38888888888889, "percentage": 51.94, "elapsed_time": "0:02:44", "remaining_time": "0:02:32", "throughput": 1779.44, "total_tokens": 292896}
{"current_steps": 940, "total_steps": 1800, "loss": 0.2397, "lr": 0.016481112496555317, "epoch": 10.444444444444445, "percentage": 52.22, "elapsed_time": "0:02:45", "remaining_time": "0:02:31", "throughput": 1780.53, "total_tokens": 294464}
{"current_steps": 945, "total_steps": 1800, "loss": 0.2423, "lr": 0.016336311793918295, "epoch": 10.5, "percentage": 52.5, "elapsed_time": "0:02:46", "remaining_time": "0:02:30", "throughput": 1781.78, "total_tokens": 296064}
{"current_steps": 950, "total_steps": 1800, "loss": 0.2337, "lr": 0.016191385455274654, "epoch": 10.555555555555555, "percentage": 52.78, "elapsed_time": "0:02:46", "remaining_time": "0:02:29", "throughput": 1782.68, "total_tokens": 297600}
{"current_steps": 955, "total_steps": 1800, "loss": 0.234, "lr": 0.016046347106161877, "epoch": 10.61111111111111, "percentage": 53.06, "elapsed_time": "0:02:47", "remaining_time": "0:02:28", "throughput": 1783.89, "total_tokens": 299200}
{"current_steps": 960, "total_steps": 1800, "loss": 0.2301, "lr": 0.01590121038264835, "epoch": 10.666666666666666, "percentage": 53.33, "elapsed_time": "0:02:48", "remaining_time": "0:02:27", "throughput": 1784.59, "total_tokens": 300704}
{"current_steps": 965, "total_steps": 1800, "loss": 0.2289, "lr": 0.015755988930051302, "epoch": 10.722222222222221, "percentage": 53.61, "elapsed_time": "0:02:49", "remaining_time": "0:02:26", "throughput": 1785.43, "total_tokens": 302240}
{"current_steps": 970, "total_steps": 1800, "loss": 0.2461, "lr": 0.01561069640165394, "epoch": 10.777777777777779, "percentage": 53.89, "elapsed_time": "0:02:50", "remaining_time": "0:02:25", "throughput": 1786.25, "total_tokens": 303776}
{"current_steps": 975, "total_steps": 1800, "loss": 0.2352, "lr": 0.015465346457421807, "epoch": 10.833333333333334, "percentage": 54.17, "elapsed_time": "0:02:50", "remaining_time": "0:02:24", "throughput": 1787.27, "total_tokens": 305344}
{"current_steps": 980, "total_steps": 1800, "loss": 0.2332, "lr": 0.015319952762718515, "epoch": 10.88888888888889, "percentage": 54.44, "elapsed_time": "0:02:51", "remaining_time": "0:02:23", "throughput": 1788.28, "total_tokens": 306912}
{"current_steps": 985, "total_steps": 1800, "loss": 0.2354, "lr": 0.015174528987020958, "epoch": 10.944444444444445, "percentage": 54.72, "elapsed_time": "0:02:52", "remaining_time": "0:02:22", "throughput": 1789.45, "total_tokens": 308512}
{"current_steps": 990, "total_steps": 1800, "loss": 0.2115, "lr": 0.015029088802634146, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:53", "remaining_time": "0:02:21", "throughput": 1789.93, "total_tokens": 310080}
{"current_steps": 990, "total_steps": 1800, "eval_loss": 0.24229076504707336, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:54", "remaining_time": "0:02:22", "throughput": 1781.54, "total_tokens": 310080}
{"current_steps": 995, "total_steps": 1800, "loss": 0.2362, "lr": 0.014883645883405797, "epoch": 11.055555555555555, "percentage": 55.28, "elapsed_time": "0:02:55", "remaining_time": "0:02:22", "throughput": 1774.11, "total_tokens": 311616}
{"current_steps": 1000, "total_steps": 1800, "loss": 0.2236, "lr": 0.014738213903440746, "epoch": 11.11111111111111, "percentage": 55.56, "elapsed_time": "0:02:56", "remaining_time": "0:02:21", "throughput": 1774.76, "total_tokens": 313184}
{"current_steps": 1005, "total_steps": 1800, "loss": 0.2486, "lr": 0.014592806535815357, "epoch": 11.166666666666666, "percentage": 55.83, "elapsed_time": "0:02:57", "remaining_time": "0:02:20", "throughput": 1776.12, "total_tokens": 314816}
{"current_steps": 1010, "total_steps": 1800, "loss": 0.2368, "lr": 0.014447437451291999, "epoch": 11.222222222222221, "percentage": 56.11, "elapsed_time": "0:02:58", "remaining_time": "0:02:19", "throughput": 1777.16, "total_tokens": 316384}
{"current_steps": 1015, "total_steps": 1800, "loss": 0.2343, "lr": 0.014302120317033798, "epoch": 11.277777777777779, "percentage": 56.39, "elapsed_time": "0:02:58", "remaining_time": "0:02:18", "throughput": 1778.19, "total_tokens": 317952}
{"current_steps": 1020, "total_steps": 1800, "loss": 0.2317, "lr": 0.014156868795319669, "epoch": 11.333333333333334, "percentage": 56.67, "elapsed_time": "0:02:59", "remaining_time": "0:02:17", "throughput": 1779.08, "total_tokens": 319488}
{"current_steps": 1025, "total_steps": 1800, "loss": 0.2203, "lr": 0.014011696542259821, "epoch": 11.38888888888889, "percentage": 56.94, "elapsed_time": "0:03:00", "remaining_time": "0:02:16", "throughput": 1779.94, "total_tokens": 321024}
{"current_steps": 1030, "total_steps": 1800, "loss": 0.247, "lr": 0.013866617206511882, "epoch": 11.444444444444445, "percentage": 57.22, "elapsed_time": "0:03:01", "remaining_time": "0:02:15", "throughput": 1780.8, "total_tokens": 322560}
{"current_steps": 1035, "total_steps": 1800, "loss": 0.2376, "lr": 0.013721644427997651, "epoch": 11.5, "percentage": 57.5, "elapsed_time": "0:03:01", "remaining_time": "0:02:14", "throughput": 1781.81, "total_tokens": 324128}
{"current_steps": 1040, "total_steps": 1800, "loss": 0.2345, "lr": 0.01357679183662076, "epoch": 11.555555555555555, "percentage": 57.78, "elapsed_time": "0:03:02", "remaining_time": "0:02:13", "throughput": 1782.79, "total_tokens": 325696}
{"current_steps": 1045, "total_steps": 1800, "loss": 0.2312, "lr": 0.0134320730509852, "epoch": 11.61111111111111, "percentage": 58.06, "elapsed_time": "0:03:03", "remaining_time": "0:02:12", "throughput": 1783.62, "total_tokens": 327232}
{"current_steps": 1050, "total_steps": 1800, "loss": 0.2357, "lr": 0.01328750167711494, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:03:04", "remaining_time": "0:02:11", "throughput": 1784.43, "total_tokens": 328768}
{"current_steps": 1055, "total_steps": 1800, "loss": 0.23, "lr": 0.013143091307174755, "epoch": 11.722222222222221, "percentage": 58.61, "elapsed_time": "0:03:05", "remaining_time": "0:02:10", "throughput": 1785.41, "total_tokens": 330336}
{"current_steps": 1060, "total_steps": 1800, "loss": 0.2274, "lr": 0.012998855518192309, "epoch": 11.777777777777779, "percentage": 58.89, "elapsed_time": "0:03:05", "remaining_time": "0:02:09", "throughput": 1786.34, "total_tokens": 331904}
{"current_steps": 1065, "total_steps": 1800, "loss": 0.2256, "lr": 0.012854807870781686, "epoch": 11.833333333333334, "percentage": 59.17, "elapsed_time": "0:03:06", "remaining_time": "0:02:08", "throughput": 1787.29, "total_tokens": 333472}
{"current_steps": 1070, "total_steps": 1800, "loss": 0.2434, "lr": 0.012710961907868478, "epoch": 11.88888888888889, "percentage": 59.44, "elapsed_time": "0:03:07", "remaining_time": "0:02:07", "throughput": 1788.54, "total_tokens": 335104}
{"current_steps": 1075, "total_steps": 1800, "loss": 0.2332, "lr": 0.012567331153416489, "epoch": 11.944444444444445, "percentage": 59.72, "elapsed_time": "0:03:08", "remaining_time": "0:02:06", "throughput": 1789.18, "total_tokens": 336608}
{"current_steps": 1080, "total_steps": 1800, "loss": 0.2385, "lr": 0.012423929111156296, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:08", "remaining_time": "0:02:05", "throughput": 1789.51, "total_tokens": 338144}
{"current_steps": 1080, "total_steps": 1800, "eval_loss": 0.23256495594978333, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:09", "remaining_time": "0:02:06", "throughput": 1781.84, "total_tokens": 338144}
{"current_steps": 1085, "total_steps": 1800, "loss": 0.2299, "lr": 0.012280769263315627, "epoch": 12.055555555555555, "percentage": 60.28, "elapsed_time": "0:03:11", "remaining_time": "0:02:06", "throughput": 1773.74, "total_tokens": 339712}
{"current_steps": 1090, "total_steps": 1800, "loss": 0.231, "lr": 0.012137865069351828, "epoch": 12.11111111111111, "percentage": 60.56, "elapsed_time": "0:03:12", "remaining_time": "0:02:05", "throughput": 1774.11, "total_tokens": 341184}
{"current_steps": 1095, "total_steps": 1800, "loss": 0.2314, "lr": 0.01199522996468644, "epoch": 12.166666666666666, "percentage": 60.83, "elapsed_time": "0:03:13", "remaining_time": "0:02:04", "throughput": 1775.09, "total_tokens": 342752}
{"current_steps": 1100, "total_steps": 1800, "loss": 0.2372, "lr": 0.01185287735944204, "epoch": 12.222222222222221, "percentage": 61.11, "elapsed_time": "0:03:13", "remaining_time": "0:02:03", "throughput": 1776.21, "total_tokens": 344352}
{"current_steps": 1105, "total_steps": 1800, "loss": 0.2363, "lr": 0.011710820637181448, "epoch": 12.277777777777779, "percentage": 61.39, "elapsed_time": "0:03:14", "remaining_time": "0:02:02", "throughput": 1777.32, "total_tokens": 345952}
{"current_steps": 1110, "total_steps": 1800, "loss": 0.228, "lr": 0.011569073153649483, "epoch": 12.333333333333334, "percentage": 61.67, "elapsed_time": "0:03:15", "remaining_time": "0:02:01", "throughput": 1778.28, "total_tokens": 347520}
{"current_steps": 1115, "total_steps": 1800, "loss": 0.2392, "lr": 0.01142764823551724, "epoch": 12.38888888888889, "percentage": 61.94, "elapsed_time": "0:03:16", "remaining_time": "0:02:00", "throughput": 1779.09, "total_tokens": 349056}
{"current_steps": 1120, "total_steps": 1800, "loss": 0.2533, "lr": 0.011286559179129213, "epoch": 12.444444444444445, "percentage": 62.22, "elapsed_time": "0:03:16", "remaining_time": "0:01:59", "throughput": 1780.16, "total_tokens": 350656}
{"current_steps": 1125, "total_steps": 1800, "loss": 0.2331, "lr": 0.01114581924925317, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:03:17", "remaining_time": "0:01:58", "throughput": 1780.94, "total_tokens": 352192}
{"current_steps": 1130, "total_steps": 1800, "loss": 0.231, "lr": 0.011005441677833067, "epoch": 12.555555555555555, "percentage": 62.78, "elapsed_time": "0:03:18", "remaining_time": "0:01:57", "throughput": 1782.01, "total_tokens": 353792}
{"current_steps": 1135, "total_steps": 1800, "loss": 0.2222, "lr": 0.010865439662745013, "epoch": 12.61111111111111, "percentage": 63.06, "elapsed_time": "0:03:19", "remaining_time": "0:01:56", "throughput": 1782.92, "total_tokens": 355360}
{"current_steps": 1140, "total_steps": 1800, "loss": 0.2356, "lr": 0.01072582636655643, "epoch": 12.666666666666666, "percentage": 63.33, "elapsed_time": "0:03:20", "remaining_time": "0:01:55", "throughput": 1783.67, "total_tokens": 356896}
{"current_steps": 1145, "total_steps": 1800, "loss": 0.2386, "lr": 0.010586614915288572, "epoch": 12.722222222222221, "percentage": 63.61, "elapsed_time": "0:03:20", "remaining_time": "0:01:54", "throughput": 1784.43, "total_tokens": 358432}
{"current_steps": 1150, "total_steps": 1800, "loss": 0.22, "lr": 0.010447818397182444, "epoch": 12.777777777777779, "percentage": 63.89, "elapsed_time": "0:03:21", "remaining_time": "0:01:53", "throughput": 1785.43, "total_tokens": 360032}
{"current_steps": 1155, "total_steps": 1800, "loss": 0.2287, "lr": 0.010309449861468272, "epoch": 12.833333333333334, "percentage": 64.17, "elapsed_time": "0:03:22", "remaining_time": "0:01:53", "throughput": 1786.29, "total_tokens": 361600}
{"current_steps": 1160, "total_steps": 1800, "loss": 0.2368, "lr": 0.010171522317138689, "epoch": 12.88888888888889, "percentage": 64.44, "elapsed_time": "0:03:23", "remaining_time": "0:01:52", "throughput": 1787.3, "total_tokens": 363200}
{"current_steps": 1165, "total_steps": 1800, "loss": 0.2434, "lr": 0.01003404873172563, "epoch": 12.944444444444445, "percentage": 64.72, "elapsed_time": "0:03:23", "remaining_time": "0:01:51", "throughput": 1788.16, "total_tokens": 364768}
{"current_steps": 1170, "total_steps": 1800, "loss": 0.2391, "lr": 0.009897042030081191, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:24", "remaining_time": "0:01:50", "throughput": 1788.6, "total_tokens": 366336}
{"current_steps": 1170, "total_steps": 1800, "eval_loss": 0.2346113622188568, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:25", "remaining_time": "0:01:50", "throughput": 1781.51, "total_tokens": 366336}
{"current_steps": 1175, "total_steps": 1800, "loss": 0.2277, "lr": 0.009760515093162463, "epoch": 13.055555555555555, "percentage": 65.28, "elapsed_time": "0:03:27", "remaining_time": "0:01:50", "throughput": 1775.78, "total_tokens": 367936}
{"current_steps": 1180, "total_steps": 1800, "loss": 0.2392, "lr": 0.009624480756820496, "epoch": 13.11111111111111, "percentage": 65.56, "elapsed_time": "0:03:27", "remaining_time": "0:01:49", "throughput": 1776.48, "total_tokens": 369504}
{"current_steps": 1185, "total_steps": 1800, "loss": 0.2439, "lr": 0.009488951810593525, "epoch": 13.166666666666666, "percentage": 65.83, "elapsed_time": "0:03:28", "remaining_time": "0:01:48", "throughput": 1777.36, "total_tokens": 371072}
{"current_steps": 1190, "total_steps": 1800, "loss": 0.2261, "lr": 0.009353940996504537, "epoch": 13.222222222222221, "percentage": 66.11, "elapsed_time": "0:03:29", "remaining_time": "0:01:47", "throughput": 1777.83, "total_tokens": 372544}
{"current_steps": 1195, "total_steps": 1800, "loss": 0.2352, "lr": 0.009219461007863278, "epoch": 13.277777777777779, "percentage": 66.39, "elapsed_time": "0:03:30", "remaining_time": "0:01:46", "throughput": 1778.84, "total_tokens": 374144}
{"current_steps": 1200, "total_steps": 1800, "loss": 0.2361, "lr": 0.009085524488072901, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:31", "remaining_time": "0:01:45", "throughput": 1779.7, "total_tokens": 375712}
{"current_steps": 1205, "total_steps": 1800, "loss": 0.2362, "lr": 0.008952144029441248, "epoch": 13.38888888888889, "percentage": 66.94, "elapsed_time": "0:03:31", "remaining_time": "0:01:44", "throughput": 1780.56, "total_tokens": 377280}
{"current_steps": 1210, "total_steps": 1800, "loss": 0.2278, "lr": 0.008819332171996975, "epoch": 13.444444444444445, "percentage": 67.22, "elapsed_time": "0:03:32", "remaining_time": "0:01:43", "throughput": 1781.41, "total_tokens": 378848}
{"current_steps": 1215, "total_steps": 1800, "loss": 0.2282, "lr": 0.008687101402310564, "epoch": 13.5, "percentage": 67.5, "elapsed_time": "0:03:33", "remaining_time": "0:01:42", "throughput": 1782.25, "total_tokens": 380416}
{"current_steps": 1220, "total_steps": 1800, "loss": 0.2518, "lr": 0.008555464152320372, "epoch": 13.555555555555555, "percentage": 67.78, "elapsed_time": "0:03:34", "remaining_time": "0:01:41", "throughput": 1783.1, "total_tokens": 381984}
{"current_steps": 1225, "total_steps": 1800, "loss": 0.2296, "lr": 0.008424432798163836, "epoch": 13.61111111111111, "percentage": 68.06, "elapsed_time": "0:03:35", "remaining_time": "0:01:40", "throughput": 1783.93, "total_tokens": 383552}
{"current_steps": 1230, "total_steps": 1800, "loss": 0.2338, "lr": 0.008294019659013892, "epoch": 13.666666666666666, "percentage": 68.33, "elapsed_time": "0:03:35", "remaining_time": "0:01:39", "throughput": 1784.76, "total_tokens": 385120}
{"current_steps": 1235, "total_steps": 1800, "loss": 0.2524, "lr": 0.008164236995920735, "epoch": 13.722222222222221, "percentage": 68.61, "elapsed_time": "0:03:36", "remaining_time": "0:01:39", "throughput": 1785.7, "total_tokens": 386720}
{"current_steps": 1240, "total_steps": 1800, "loss": 0.2396, "lr": 0.008035097010659147, "epoch": 13.777777777777779, "percentage": 68.89, "elapsed_time": "0:03:37", "remaining_time": "0:01:38", "throughput": 1786.76, "total_tokens": 388352}
{"current_steps": 1245, "total_steps": 1800, "loss": 0.2316, "lr": 0.00790661184458125, "epoch": 13.833333333333334, "percentage": 69.17, "elapsed_time": "0:03:38", "remaining_time": "0:01:37", "throughput": 1787.68, "total_tokens": 389952}
{"current_steps": 1250, "total_steps": 1800, "loss": 0.2413, "lr": 0.007778793577475039, "epoch": 13.88888888888889, "percentage": 69.44, "elapsed_time": "0:03:38", "remaining_time": "0:01:36", "throughput": 1788.35, "total_tokens": 391488}
{"current_steps": 1255, "total_steps": 1800, "loss": 0.2449, "lr": 0.007651654226428696, "epoch": 13.944444444444445, "percentage": 69.72, "elapsed_time": "0:03:39", "remaining_time": "0:01:35", "throughput": 1788.88, "total_tokens": 392992}
{"current_steps": 1260, "total_steps": 1800, "loss": 0.2267, "lr": 0.0075252057447007465, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:40", "remaining_time": "0:01:34", "throughput": 1788.91, "total_tokens": 394464}
{"current_steps": 1260, "total_steps": 1800, "eval_loss": 0.23414771258831024, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:41", "remaining_time": "0:01:34", "throughput": 1782.33, "total_tokens": 394464}
{"current_steps": 1265, "total_steps": 1800, "loss": 0.2356, "lr": 0.007399460020596265, "epoch": 14.055555555555555, "percentage": 70.28, "elapsed_time": "0:03:42", "remaining_time": "0:01:34", "throughput": 1776.95, "total_tokens": 396064}
{"current_steps": 1270, "total_steps": 1800, "loss": 0.2322, "lr": 0.007274428876349185, "epoch": 14.11111111111111, "percentage": 70.56, "elapsed_time": "0:03:43", "remaining_time": "0:01:33", "throughput": 1777.71, "total_tokens": 397664}
{"current_steps": 1275, "total_steps": 1800, "loss": 0.2244, "lr": 0.007150124067010788, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "0:03:44", "remaining_time": "0:01:32", "throughput": 1778.41, "total_tokens": 399200}
{"current_steps": 1280, "total_steps": 1800, "loss": 0.228, "lr": 0.007026557279344533, "epoch": 14.222222222222221, "percentage": 71.11, "elapsed_time": "0:03:45", "remaining_time": "0:01:31", "throughput": 1779.23, "total_tokens": 400768}
{"current_steps": 1285, "total_steps": 1800, "loss": 0.2321, "lr": 0.006903740130727311, "epoch": 14.277777777777779, "percentage": 71.39, "elapsed_time": "0:03:46", "remaining_time": "0:01:30", "throughput": 1780.15, "total_tokens": 402368}
{"current_steps": 1290, "total_steps": 1800, "loss": 0.2431, "lr": 0.0067816841680572015, "epoch": 14.333333333333334, "percentage": 71.67, "elapsed_time": "0:03:46", "remaining_time": "0:01:29", "throughput": 1781.07, "total_tokens": 403968}
{"current_steps": 1295, "total_steps": 1800, "loss": 0.2376, "lr": 0.006660400866667899, "epoch": 14.38888888888889, "percentage": 71.94, "elapsed_time": "0:03:47", "remaining_time": "0:01:28", "throughput": 1782.24, "total_tokens": 405632}
{"current_steps": 1300, "total_steps": 1800, "loss": 0.2374, "lr": 0.006539901629249787, "epoch": 14.444444444444445, "percentage": 72.22, "elapsed_time": "0:03:48", "remaining_time": "0:01:27", "throughput": 1783.02, "total_tokens": 407200}
{"current_steps": 1305, "total_steps": 1800, "loss": 0.2259, "lr": 0.006420197784777924, "epoch": 14.5, "percentage": 72.5, "elapsed_time": "0:03:49", "remaining_time": "0:01:26", "throughput": 1783.8, "total_tokens": 408768}
{"current_steps": 1310, "total_steps": 1800, "loss": 0.2171, "lr": 0.006301300587446937, "epoch": 14.555555555555555, "percentage": 72.78, "elapsed_time": "0:03:49", "remaining_time": "0:01:26", "throughput": 1784.45, "total_tokens": 410304}
{"current_steps": 1315, "total_steps": 1800, "loss": 0.2372, "lr": 0.006183221215612904, "epoch": 14.61111111111111, "percentage": 73.06, "elapsed_time": "0:03:50", "remaining_time": "0:01:25", "throughput": 1784.97, "total_tokens": 411808}
{"current_steps": 1320, "total_steps": 1800, "loss": 0.2446, "lr": 0.00606597077074242, "epoch": 14.666666666666666, "percentage": 73.33, "elapsed_time": "0:03:51", "remaining_time": "0:01:24", "throughput": 1785.47, "total_tokens": 413312}
{"current_steps": 1325, "total_steps": 1800, "loss": 0.2308, "lr": 0.005949560276368865, "epoch": 14.722222222222221, "percentage": 73.61, "elapsed_time": "0:03:52", "remaining_time": "0:01:23", "throughput": 1786.22, "total_tokens": 414880}
{"current_steps": 1330, "total_steps": 1800, "loss": 0.2429, "lr": 0.005834000677056003, "epoch": 14.777777777777779, "percentage": 73.89, "elapsed_time": "0:03:53", "remaining_time": "0:01:22", "throughput": 1786.96, "total_tokens": 416448}
{"current_steps": 1335, "total_steps": 1800, "loss": 0.2318, "lr": 0.005719302837369021, "epoch": 14.833333333333334, "percentage": 74.17, "elapsed_time": "0:03:53", "remaining_time": "0:01:21", "throughput": 1787.44, "total_tokens": 417952}
{"current_steps": 1340, "total_steps": 1800, "loss": 0.2337, "lr": 0.00560547754085305, "epoch": 14.88888888888889, "percentage": 74.44, "elapsed_time": "0:03:54", "remaining_time": "0:01:20", "throughput": 1788.02, "total_tokens": 419488}
{"current_steps": 1345, "total_steps": 1800, "loss": 0.226, "lr": 0.005492535489019344, "epoch": 14.944444444444445, "percentage": 74.72, "elapsed_time": "0:03:55", "remaining_time": "0:01:19", "throughput": 1788.64, "total_tokens": 421024}
{"current_steps": 1350, "total_steps": 1800, "loss": 0.2321, "lr": 0.005380487300339167, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:56", "remaining_time": "0:01:18", "throughput": 1789.02, "total_tokens": 422592}
{"current_steps": 1350, "total_steps": 1800, "eval_loss": 0.23522897064685822, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:57", "remaining_time": "0:01:19", "throughput": 1782.86, "total_tokens": 422592}
{"current_steps": 1355, "total_steps": 1800, "loss": 0.2289, "lr": 0.005269343509245449, "epoch": 15.055555555555555, "percentage": 75.28, "elapsed_time": "0:03:58", "remaining_time": "0:01:18", "throughput": 1777.34, "total_tokens": 424128}
{"current_steps": 1360, "total_steps": 1800, "loss": 0.2301, "lr": 0.005159114565142392, "epoch": 15.11111111111111, "percentage": 75.56, "elapsed_time": "0:03:59", "remaining_time": "0:01:17", "throughput": 1778.24, "total_tokens": 425728}
{"current_steps": 1365, "total_steps": 1800, "loss": 0.2342, "lr": 0.0050498108314230425, "epoch": 15.166666666666666, "percentage": 75.83, "elapsed_time": "0:04:00", "remaining_time": "0:01:16", "throughput": 1778.76, "total_tokens": 427232}
{"current_steps": 1370, "total_steps": 1800, "loss": 0.2362, "lr": 0.0049414425844949445, "epoch": 15.222222222222221, "percentage": 76.11, "elapsed_time": "0:04:00", "remaining_time": "0:01:15", "throughput": 1779.51, "total_tokens": 428800}
{"current_steps": 1375, "total_steps": 1800, "loss": 0.2333, "lr": 0.004834020012814016, "epoch": 15.277777777777779, "percentage": 76.39, "elapsed_time": "0:04:01", "remaining_time": "0:01:14", "throughput": 1780.03, "total_tokens": 430304}
{"current_steps": 1380, "total_steps": 1800, "loss": 0.2262, "lr": 0.004727553215926623, "epoch": 15.333333333333334, "percentage": 76.67, "elapsed_time": "0:04:02", "remaining_time": "0:01:13", "throughput": 1780.76, "total_tokens": 431872}
{"current_steps": 1385, "total_steps": 1800, "loss": 0.2338, "lr": 0.004622052203520061, "epoch": 15.38888888888889, "percentage": 76.94, "elapsed_time": "0:04:03", "remaining_time": "0:01:12", "throughput": 1781.61, "total_tokens": 433472}
{"current_steps": 1390, "total_steps": 1800, "loss": 0.2347, "lr": 0.004517526894481498, "epoch": 15.444444444444445, "percentage": 77.22, "elapsed_time": "0:04:04", "remaining_time": "0:01:11", "throughput": 1782.33, "total_tokens": 435040}
{"current_steps": 1395, "total_steps": 1800, "loss": 0.2291, "lr": 0.004413987115965404, "epoch": 15.5, "percentage": 77.5, "elapsed_time": "0:04:04", "remaining_time": "0:01:11", "throughput": 1783.17, "total_tokens": 436640}
{"current_steps": 1400, "total_steps": 1800, "loss": 0.24, "lr": 0.004311442602469636, "epoch": 15.555555555555555, "percentage": 77.78, "elapsed_time": "0:04:05", "remaining_time": "0:01:10", "throughput": 1783.89, "total_tokens": 438208}
{"current_steps": 1405, "total_steps": 1800, "loss": 0.2299, "lr": 0.004209902994920235, "epoch": 15.61111111111111, "percentage": 78.06, "elapsed_time": "0:04:06", "remaining_time": "0:01:09", "throughput": 1784.62, "total_tokens": 439776}
{"current_steps": 1410, "total_steps": 1800, "loss": 0.2309, "lr": 0.004109377839765016, "epoch": 15.666666666666666, "percentage": 78.33, "elapsed_time": "0:04:07", "remaining_time": "0:01:08", "throughput": 1785.43, "total_tokens": 441376}
{"current_steps": 1415, "total_steps": 1800, "loss": 0.2309, "lr": 0.004009876588076046, "epoch": 15.722222222222221, "percentage": 78.61, "elapsed_time": "0:04:07", "remaining_time": "0:01:07", "throughput": 1786.09, "total_tokens": 442944}
{"current_steps": 1420, "total_steps": 1800, "loss": 0.233, "lr": 0.003911408594661061, "epoch": 15.777777777777779, "percentage": 78.89, "elapsed_time": "0:04:08", "remaining_time": "0:01:06", "throughput": 1786.56, "total_tokens": 444448}
{"current_steps": 1425, "total_steps": 1800, "loss": 0.2299, "lr": 0.0038139831171839726, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "0:04:09", "remaining_time": "0:01:05", "throughput": 1787.35, "total_tokens": 446048}
{"current_steps": 1430, "total_steps": 1800, "loss": 0.233, "lr": 0.0037176093152944947, "epoch": 15.88888888888889, "percentage": 79.44, "elapsed_time": "0:04:10", "remaining_time": "0:01:04", "throughput": 1787.92, "total_tokens": 447584}
{"current_steps": 1435, "total_steps": 1800, "loss": 0.2297, "lr": 0.0036222962497669668, "epoch": 15.944444444444445, "percentage": 79.72, "elapsed_time": "0:04:11", "remaining_time": "0:01:03", "throughput": 1788.38, "total_tokens": 449088}
{"current_steps": 1440, "total_steps": 1800, "loss": 0.23, "lr": 0.003528052881648488, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:11", "remaining_time": "0:01:02", "throughput": 1788.62, "total_tokens": 450624}
{"current_steps": 1440, "total_steps": 1800, "eval_loss": 0.2345980703830719, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:12", "remaining_time": "0:01:03", "throughput": 1782.85, "total_tokens": 450624}
{"current_steps": 1445, "total_steps": 1800, "loss": 0.2278, "lr": 0.0034348880714164414, "epoch": 16.055555555555557, "percentage": 80.28, "elapsed_time": "0:04:14", "remaining_time": "0:01:02", "throughput": 1777.89, "total_tokens": 452224}
{"current_steps": 1450, "total_steps": 1800, "loss": 0.2288, "lr": 0.0033428105781454364, "epoch": 16.11111111111111, "percentage": 80.56, "elapsed_time": "0:04:15", "remaining_time": "0:01:01", "throughput": 1778.5, "total_tokens": 453760}
{"current_steps": 1455, "total_steps": 1800, "loss": 0.2299, "lr": 0.0032518290586838377, "epoch": 16.166666666666668, "percentage": 80.83, "elapsed_time": "0:04:15", "remaining_time": "0:01:00", "throughput": 1779.09, "total_tokens": 455296}
{"current_steps": 1460, "total_steps": 1800, "loss": 0.2268, "lr": 0.0031619520668398388, "epoch": 16.22222222222222, "percentage": 81.11, "elapsed_time": "0:04:16", "remaining_time": "0:00:59", "throughput": 1779.69, "total_tokens": 456832}
{"current_steps": 1465, "total_steps": 1800, "loss": 0.2329, "lr": 0.003073188052577281, "epoch": 16.27777777777778, "percentage": 81.39, "elapsed_time": "0:04:17", "remaining_time": "0:00:58", "throughput": 1780.29, "total_tokens": 458368}
{"current_steps": 1470, "total_steps": 1800, "loss": 0.2311, "lr": 0.00298554536122122, "epoch": 16.333333333333332, "percentage": 81.67, "elapsed_time": "0:04:18", "remaining_time": "0:00:57", "throughput": 1781.1, "total_tokens": 459968}
{"current_steps": 1475, "total_steps": 1800, "loss": 0.2319, "lr": 0.0028990322326732957, "epoch": 16.38888888888889, "percentage": 81.94, "elapsed_time": "0:04:19", "remaining_time": "0:00:57", "throughput": 1781.8, "total_tokens": 461536}
{"current_steps": 1480, "total_steps": 1800, "loss": 0.2288, "lr": 0.0028136568006370643, "epoch": 16.444444444444443, "percentage": 82.22, "elapsed_time": "0:04:19", "remaining_time": "0:00:56", "throughput": 1782.6, "total_tokens": 463136}
{"current_steps": 1485, "total_steps": 1800, "loss": 0.2298, "lr": 0.0027294270918532875, "epoch": 16.5, "percentage": 82.5, "elapsed_time": "0:04:20", "remaining_time": "0:00:55", "throughput": 1783.29, "total_tokens": 464704}
{"current_steps": 1490, "total_steps": 1800, "loss": 0.2288, "lr": 0.0026463510253452744, "epoch": 16.555555555555557, "percentage": 82.78, "elapsed_time": "0:04:21", "remaining_time": "0:00:54", "throughput": 1783.88, "total_tokens": 466240}
{"current_steps": 1495, "total_steps": 1800, "loss": 0.2331, "lr": 0.0025644364116743754, "epoch": 16.61111111111111, "percentage": 83.06, "elapsed_time": "0:04:22", "remaining_time": "0:00:53", "throughput": 1784.35, "total_tokens": 467744}
{"current_steps": 1500, "total_steps": 1800, "loss": 0.2308, "lr": 0.002483690952205637, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:04:22", "remaining_time": "0:00:52", "throughput": 1785.15, "total_tokens": 469344}
{"current_steps": 1505, "total_steps": 1800, "loss": 0.2309, "lr": 0.0024041222383837536, "epoch": 16.72222222222222, "percentage": 83.61, "elapsed_time": "0:04:23", "remaining_time": "0:00:51", "throughput": 1785.92, "total_tokens": 470944}
{"current_steps": 1510, "total_steps": 1800, "loss": 0.2278, "lr": 0.002325737751019347, "epoch": 16.77777777777778, "percentage": 83.89, "elapsed_time": "0:04:24", "remaining_time": "0:00:50", "throughput": 1786.37, "total_tokens": 472448}
{"current_steps": 1515, "total_steps": 1800, "loss": 0.2278, "lr": 0.00224854485958563, "epoch": 16.833333333333332, "percentage": 84.17, "elapsed_time": "0:04:25", "remaining_time": "0:00:49", "throughput": 1787.12, "total_tokens": 474048}
{"current_steps": 1520, "total_steps": 1800, "loss": 0.2322, "lr": 0.0021725508215255634, "epoch": 16.88888888888889, "percentage": 84.44, "elapsed_time": "0:04:26", "remaining_time": "0:00:49", "throughput": 1787.76, "total_tokens": 475616}
{"current_steps": 1525, "total_steps": 1800, "loss": 0.2339, "lr": 0.0020977627815695213, "epoch": 16.944444444444443, "percentage": 84.72, "elapsed_time": "0:04:26", "remaining_time": "0:00:48", "throughput": 1788.3, "total_tokens": 477152}
{"current_steps": 1530, "total_steps": 1800, "loss": 0.232, "lr": 0.0020241877710635747, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:27", "remaining_time": "0:00:47", "throughput": 1788.63, "total_tokens": 478720}
{"current_steps": 1530, "total_steps": 1800, "eval_loss": 0.23464064300060272, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:28", "remaining_time": "0:00:47", "throughput": 1783.19, "total_tokens": 478720}
{"current_steps": 1535, "total_steps": 1800, "loss": 0.2278, "lr": 0.0019518327073084285, "epoch": 17.055555555555557, "percentage": 85.28, "elapsed_time": "0:04:30", "remaining_time": "0:00:46", "throughput": 1778.35, "total_tokens": 480288}
{"current_steps": 1540, "total_steps": 1800, "loss": 0.2299, "lr": 0.0018807043929090638, "epoch": 17.11111111111111, "percentage": 85.56, "elapsed_time": "0:04:30", "remaining_time": "0:00:45", "throughput": 1779.27, "total_tokens": 481952}
{"current_steps": 1545, "total_steps": 1800, "loss": 0.2393, "lr": 0.0018108095151351837, "epoch": 17.166666666666668, "percentage": 85.83, "elapsed_time": "0:04:31", "remaining_time": "0:00:44", "throughput": 1779.94, "total_tokens": 483520}
{"current_steps": 1550, "total_steps": 1800, "loss": 0.2289, "lr": 0.001742154645292508, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "0:04:32", "remaining_time": "0:00:43", "throughput": 1780.71, "total_tokens": 485120}
{"current_steps": 1555, "total_steps": 1800, "loss": 0.2277, "lr": 0.0016747462381049415, "epoch": 17.27777777777778, "percentage": 86.39, "elapsed_time": "0:04:33", "remaining_time": "0:00:43", "throughput": 1781.36, "total_tokens": 486688}
{"current_steps": 1560, "total_steps": 1800, "loss": 0.2227, "lr": 0.0016085906311077212, "epoch": 17.333333333333332, "percentage": 86.67, "elapsed_time": "0:04:33", "remaining_time": "0:00:42", "throughput": 1782.12, "total_tokens": 488288}
{"current_steps": 1565, "total_steps": 1800, "loss": 0.2289, "lr": 0.0015436940440516017, "epoch": 17.38888888888889, "percentage": 86.94, "elapsed_time": "0:04:34", "remaining_time": "0:00:41", "throughput": 1782.77, "total_tokens": 489856}
{"current_steps": 1570, "total_steps": 1800, "loss": 0.2332, "lr": 0.0014800625783180658, "epoch": 17.444444444444443, "percentage": 87.22, "elapsed_time": "0:04:35", "remaining_time": "0:00:40", "throughput": 1783.51, "total_tokens": 491456}
{"current_steps": 1575, "total_steps": 1800, "loss": 0.2268, "lr": 0.0014177022163457135, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:04:36", "remaining_time": "0:00:39", "throughput": 1784.16, "total_tokens": 493024}
{"current_steps": 1580, "total_steps": 1800, "loss": 0.231, "lr": 0.0013566188210677903, "epoch": 17.555555555555557, "percentage": 87.78, "elapsed_time": "0:04:37", "remaining_time": "0:00:38", "throughput": 1784.9, "total_tokens": 494624}
{"current_steps": 1585, "total_steps": 1800, "loss": 0.232, "lr": 0.0012968181353609854, "epoch": 17.61111111111111, "percentage": 88.06, "elapsed_time": "0:04:37", "remaining_time": "0:00:37", "throughput": 1785.52, "total_tokens": 496192}
{"current_steps": 1590, "total_steps": 1800, "loss": 0.234, "lr": 0.0012383057815055082, "epoch": 17.666666666666668, "percentage": 88.33, "elapsed_time": "0:04:38", "remaining_time": "0:00:36", "throughput": 1786.26, "total_tokens": 497792}
{"current_steps": 1595, "total_steps": 1800, "loss": 0.2343, "lr": 0.001181087260656487, "epoch": 17.72222222222222, "percentage": 88.61, "elapsed_time": "0:04:39", "remaining_time": "0:00:35", "throughput": 1786.89, "total_tokens": 499360}
{"current_steps": 1600, "total_steps": 1800, "loss": 0.23, "lr": 0.0011251679523267587, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "0:04:40", "remaining_time": "0:00:35", "throughput": 1787.4, "total_tokens": 500896}
{"current_steps": 1605, "total_steps": 1800, "loss": 0.2249, "lr": 0.0010705531138811369, "epoch": 17.833333333333332, "percentage": 89.17, "elapsed_time": "0:04:41", "remaining_time": "0:00:34", "throughput": 1787.99, "total_tokens": 502464}
{"current_steps": 1610, "total_steps": 1800, "loss": 0.2353, "lr": 0.0010172478800420954, "epoch": 17.88888888888889, "percentage": 89.44, "elapsed_time": "0:04:41", "remaining_time": "0:00:33", "throughput": 1788.49, "total_tokens": 504000}
{"current_steps": 1615, "total_steps": 1800, "loss": 0.2322, "lr": 0.0009652572624070293, "epoch": 17.944444444444443, "percentage": 89.72, "elapsed_time": "0:04:42", "remaining_time": "0:00:32", "throughput": 1788.79, "total_tokens": 505472}
{"current_steps": 1620, "total_steps": 1800, "loss": 0.2373, "lr": 0.0009145861489770912, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:43", "remaining_time": "0:00:31", "throughput": 1788.64, "total_tokens": 507008}
{"current_steps": 1620, "total_steps": 1800, "eval_loss": 0.23521371185779572, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:44", "remaining_time": "0:00:31", "throughput": 1783.53, "total_tokens": 507008}
{"current_steps": 1625, "total_steps": 1800, "loss": 0.23, "lr": 0.0008652393036976157, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "0:04:45", "remaining_time": "0:00:30", "throughput": 1779.2, "total_tokens": 508544}
{"current_steps": 1630, "total_steps": 1800, "loss": 0.2333, "lr": 0.0008172213660102473, "epoch": 18.11111111111111, "percentage": 90.56, "elapsed_time": "0:04:46", "remaining_time": "0:00:29", "throughput": 1779.66, "total_tokens": 510112}
{"current_steps": 1635, "total_steps": 1800, "loss": 0.2259, "lr": 0.0007705368504167398, "epoch": 18.166666666666668, "percentage": 90.83, "elapsed_time": "0:04:47", "remaining_time": "0:00:29", "throughput": 1780.28, "total_tokens": 511680}
{"current_steps": 1640, "total_steps": 1800, "loss": 0.2302, "lr": 0.0007251901460545118, "epoch": 18.22222222222222, "percentage": 91.11, "elapsed_time": "0:04:48", "remaining_time": "0:00:28", "throughput": 1780.6, "total_tokens": 513152}
{"current_steps": 1645, "total_steps": 1800, "loss": 0.231, "lr": 0.0006811855162840213, "epoch": 18.27777777777778, "percentage": 91.39, "elapsed_time": "0:04:48", "remaining_time": "0:00:27", "throughput": 1781.33, "total_tokens": 514752}
{"current_steps": 1650, "total_steps": 1800, "loss": 0.2309, "lr": 0.0006385270982879065, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:04:49", "remaining_time": "0:00:26", "throughput": 1781.95, "total_tokens": 516320}
{"current_steps": 1655, "total_steps": 1800, "loss": 0.2332, "lr": 0.0005972189026820351, "epoch": 18.38888888888889, "percentage": 91.94, "elapsed_time": "0:04:50", "remaining_time": "0:00:25", "throughput": 1782.56, "total_tokens": 517888}
{"current_steps": 1660, "total_steps": 1800, "loss": 0.2259, "lr": 0.0005572648131384361, "epoch": 18.444444444444443, "percentage": 92.22, "elapsed_time": "0:04:51", "remaining_time": "0:00:24", "throughput": 1783.18, "total_tokens": 519456}
{"current_steps": 1665, "total_steps": 1800, "loss": 0.227, "lr": 0.0005186685860201717, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:04:52", "remaining_time": "0:00:23", "throughput": 1783.88, "total_tokens": 521056}
{"current_steps": 1670, "total_steps": 1800, "loss": 0.2311, "lr": 0.0004814338500281634, "epoch": 18.555555555555557, "percentage": 92.78, "elapsed_time": "0:04:52", "remaining_time": "0:00:22", "throughput": 1784.2, "total_tokens": 522528}
{"current_steps": 1675, "total_steps": 1800, "loss": 0.232, "lr": 0.0004455641058600529, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "0:04:53", "remaining_time": "0:00:21", "throughput": 1784.71, "total_tokens": 524064}
{"current_steps": 1680, "total_steps": 1800, "loss": 0.232, "lr": 0.00041106272588105564, "epoch": 18.666666666666668, "percentage": 93.33, "elapsed_time": "0:04:54", "remaining_time": "0:00:21", "throughput": 1785.4, "total_tokens": 525664}
{"current_steps": 1685, "total_steps": 1800, "loss": 0.2311, "lr": 0.0003779329538069159, "epoch": 18.72222222222222, "percentage": 93.61, "elapsed_time": "0:04:55", "remaining_time": "0:00:20", "throughput": 1785.89, "total_tokens": 527200}
{"current_steps": 1690, "total_steps": 1800, "loss": 0.2311, "lr": 0.00034617790439893603, "epoch": 18.77777777777778, "percentage": 93.89, "elapsed_time": "0:04:55", "remaining_time": "0:00:19", "throughput": 1786.57, "total_tokens": 528800}
{"current_steps": 1695, "total_steps": 1800, "loss": 0.233, "lr": 0.00031580056317113525, "epoch": 18.833333333333332, "percentage": 94.17, "elapsed_time": "0:04:56", "remaining_time": "0:00:18", "throughput": 1787.35, "total_tokens": 530432}
{"current_steps": 1700, "total_steps": 1800, "loss": 0.229, "lr": 0.00028680378610956793, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "0:04:57", "remaining_time": "0:00:17", "throughput": 1788.01, "total_tokens": 532032}
{"current_steps": 1705, "total_steps": 1800, "loss": 0.2332, "lr": 0.00025919029940380146, "epoch": 18.944444444444443, "percentage": 94.72, "elapsed_time": "0:04:58", "remaining_time": "0:00:16", "throughput": 1788.49, "total_tokens": 533568}
{"current_steps": 1710, "total_steps": 1800, "loss": 0.229, "lr": 0.0002329626991906164, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:59", "remaining_time": "0:00:15", "throughput": 1788.79, "total_tokens": 535136}
{"current_steps": 1710, "total_steps": 1800, "eval_loss": 0.23268699645996094, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:59", "remaining_time": "0:00:15", "throughput": 1783.93, "total_tokens": 535136}
{"current_steps": 1715, "total_steps": 1800, "loss": 0.2301, "lr": 0.00020812345130992503, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:01", "remaining_time": "0:00:14", "throughput": 1779.93, "total_tokens": 536768}
{"current_steps": 1720, "total_steps": 1800, "loss": 0.2362, "lr": 0.0001846748910729351, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:05:02", "remaining_time": "0:00:14", "throughput": 1780.5, "total_tokens": 538336}
{"current_steps": 1725, "total_steps": 1800, "loss": 0.228, "lr": 0.0001626192230425938, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:05:03", "remaining_time": "0:00:13", "throughput": 1781.01, "total_tokens": 539872}
{"current_steps": 1730, "total_steps": 1800, "loss": 0.229, "lr": 0.00014195852082632686, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:05:03", "remaining_time": "0:00:12", "throughput": 1781.7, "total_tokens": 541472}
{"current_steps": 1735, "total_steps": 1800, "loss": 0.23, "lr": 0.00012269472688107463, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:05:04", "remaining_time": "0:00:11", "throughput": 1782.21, "total_tokens": 543008}
{"current_steps": 1740, "total_steps": 1800, "loss": 0.23, "lr": 0.00010482965233067298, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:05:05", "remaining_time": "0:00:10", "throughput": 1782.89, "total_tokens": 544608}
{"current_steps": 1745, "total_steps": 1800, "loss": 0.2282, "lr": 8.836497679557964e-05, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:05:06", "remaining_time": "0:00:09", "throughput": 1783.28, "total_tokens": 546112}
{"current_steps": 1750, "total_steps": 1800, "loss": 0.2353, "lr": 7.330224823495379e-05, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:05:07", "remaining_time": "0:00:08", "throughput": 1783.68, "total_tokens": 547616}
{"current_steps": 1755, "total_steps": 1800, "loss": 0.228, "lr": 5.96428828011325e-05, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:05:07", "remaining_time": "0:00:07", "throughput": 1784.36, "total_tokens": 549216}
{"current_steps": 1760, "total_steps": 1800, "loss": 0.2332, "lr": 4.738816470647389e-05, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:05:08", "remaining_time": "0:00:07", "throughput": 1784.84, "total_tokens": 550752}
{"current_steps": 1765, "total_steps": 1800, "loss": 0.23, "lr": 3.653924610263703e-05, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:05:09", "remaining_time": "0:00:06", "throughput": 1785.49, "total_tokens": 552352}
{"current_steps": 1770, "total_steps": 1800, "loss": 0.228, "lr": 2.7097146972240305e-05, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:05:10", "remaining_time": "0:00:05", "throughput": 1786.05, "total_tokens": 553920}
{"current_steps": 1775, "total_steps": 1800, "loss": 0.2331, "lr": 1.9062755032984713e-05, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:05:10", "remaining_time": "0:00:04", "throughput": 1786.79, "total_tokens": 555552}
{"current_steps": 1780, "total_steps": 1800, "loss": 0.2269, "lr": 1.2436825654180693e-05, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:05:11", "remaining_time": "0:00:03", "throughput": 1787.16, "total_tokens": 557056}
{"current_steps": 1785, "total_steps": 1800, "loss": 0.228, "lr": 7.219981785733242e-06, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:05:12", "remaining_time": "0:00:02", "throughput": 1787.89, "total_tokens": 558688}
{"current_steps": 1790, "total_steps": 1800, "loss": 0.2281, "lr": 3.4127138995787565e-06, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:05:13", "remaining_time": "0:00:01", "throughput": 1788.32, "total_tokens": 560224}
{"current_steps": 1795, "total_steps": 1800, "loss": 0.2375, "lr": 1.0153799435669298e-06, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:05:14", "remaining_time": "0:00:00", "throughput": 1788.68, "total_tokens": 561728}
{"current_steps": 1800, "total_steps": 1800, "loss": 0.2311, "lr": 2.820530780767161e-08, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:14", "remaining_time": "0:00:00", "throughput": 1789.04, "total_tokens": 563328}
{"current_steps": 1800, "total_steps": 1800, "eval_loss": 0.2325301468372345, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:15", "remaining_time": "0:00:00", "throughput": 1784.42, "total_tokens": 563328}
{"current_steps": 1800, "total_steps": 1800, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:16", "remaining_time": "0:00:00", "throughput": 1780.17, "total_tokens": 563328}