train_svamp_42_1760623621 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 3160
fbce9cb verified
{"current_steps": 5, "total_steps": 3160, "loss": 2.2551, "lr": 0.00037974683544303797, "epoch": 0.03164556962025317, "percentage": 0.16, "elapsed_time": "0:00:01", "remaining_time": "0:15:21", "throughput": 1599.63, "total_tokens": 2336}
{"current_steps": 10, "total_steps": 3160, "loss": 1.4629, "lr": 0.0008544303797468354, "epoch": 0.06329113924050633, "percentage": 0.32, "elapsed_time": "0:00:02", "remaining_time": "0:12:51", "throughput": 1908.63, "total_tokens": 4672}
{"current_steps": 15, "total_steps": 3160, "loss": 0.621, "lr": 0.001329113924050633, "epoch": 0.0949367088607595, "percentage": 0.47, "elapsed_time": "0:00:03", "remaining_time": "0:11:59", "throughput": 2013.07, "total_tokens": 6912}
{"current_steps": 20, "total_steps": 3160, "loss": 0.1941, "lr": 0.0018037974683544303, "epoch": 0.12658227848101267, "percentage": 0.63, "elapsed_time": "0:00:04", "remaining_time": "0:11:31", "throughput": 2077.63, "total_tokens": 9152}
{"current_steps": 25, "total_steps": 3160, "loss": 0.2347, "lr": 0.002278481012658228, "epoch": 0.15822784810126583, "percentage": 0.79, "elapsed_time": "0:00:05", "remaining_time": "0:11:16", "throughput": 2124.9, "total_tokens": 11456}
{"current_steps": 30, "total_steps": 3160, "loss": 0.1998, "lr": 0.0027531645569620253, "epoch": 0.189873417721519, "percentage": 0.95, "elapsed_time": "0:00:06", "remaining_time": "0:11:05", "throughput": 2161.05, "total_tokens": 13792}
{"current_steps": 35, "total_steps": 3160, "loss": 0.2293, "lr": 0.0032278481012658227, "epoch": 0.22151898734177214, "percentage": 1.11, "elapsed_time": "0:00:07", "remaining_time": "0:10:57", "throughput": 2182.21, "total_tokens": 16064}
{"current_steps": 40, "total_steps": 3160, "loss": 0.3135, "lr": 0.00370253164556962, "epoch": 0.25316455696202533, "percentage": 1.27, "elapsed_time": "0:00:08", "remaining_time": "0:10:50", "throughput": 2199.13, "total_tokens": 18336}
{"current_steps": 45, "total_steps": 3160, "loss": 1.6481, "lr": 0.004177215189873417, "epoch": 0.2848101265822785, "percentage": 1.42, "elapsed_time": "0:00:09", "remaining_time": "0:10:45", "throughput": 2215.02, "total_tokens": 20640}
{"current_steps": 50, "total_steps": 3160, "loss": 1.5985, "lr": 0.0046518987341772145, "epoch": 0.31645569620253167, "percentage": 1.58, "elapsed_time": "0:00:10", "remaining_time": "0:10:40", "throughput": 2222.54, "total_tokens": 22880}
{"current_steps": 55, "total_steps": 3160, "loss": 0.3414, "lr": 0.005126582278481013, "epoch": 0.34810126582278483, "percentage": 1.74, "elapsed_time": "0:00:11", "remaining_time": "0:10:36", "throughput": 2226.84, "total_tokens": 25088}
{"current_steps": 60, "total_steps": 3160, "loss": 0.4014, "lr": 0.00560126582278481, "epoch": 0.379746835443038, "percentage": 1.9, "elapsed_time": "0:00:12", "remaining_time": "0:10:33", "throughput": 2234.92, "total_tokens": 27392}
{"current_steps": 65, "total_steps": 3160, "loss": 0.7651, "lr": 0.0060759493670886075, "epoch": 0.41139240506329117, "percentage": 2.06, "elapsed_time": "0:00:13", "remaining_time": "0:10:31", "throughput": 2242.56, "total_tokens": 29728}
{"current_steps": 70, "total_steps": 3160, "loss": 1.8753, "lr": 0.006550632911392405, "epoch": 0.4430379746835443, "percentage": 2.22, "elapsed_time": "0:00:14", "remaining_time": "0:10:28", "throughput": 2250.36, "total_tokens": 32064}
{"current_steps": 75, "total_steps": 3160, "loss": 0.1454, "lr": 0.007025316455696202, "epoch": 0.47468354430379744, "percentage": 2.37, "elapsed_time": "0:00:15", "remaining_time": "0:10:26", "throughput": 2257.22, "total_tokens": 34400}
{"current_steps": 80, "total_steps": 3160, "loss": 0.2766, "lr": 0.0075, "epoch": 0.5063291139240507, "percentage": 2.53, "elapsed_time": "0:00:16", "remaining_time": "0:10:24", "throughput": 2261.7, "total_tokens": 36704}
{"current_steps": 85, "total_steps": 3160, "loss": 0.5439, "lr": 0.007974683544303796, "epoch": 0.5379746835443038, "percentage": 2.69, "elapsed_time": "0:00:17", "remaining_time": "0:10:22", "throughput": 2261.16, "total_tokens": 38880}
{"current_steps": 90, "total_steps": 3160, "loss": 1.2058, "lr": 0.008449367088607595, "epoch": 0.569620253164557, "percentage": 2.85, "elapsed_time": "0:00:18", "remaining_time": "0:10:19", "throughput": 2263.38, "total_tokens": 41120}
{"current_steps": 95, "total_steps": 3160, "loss": 0.4753, "lr": 0.008924050632911391, "epoch": 0.6012658227848101, "percentage": 3.01, "elapsed_time": "0:00:19", "remaining_time": "0:10:17", "throughput": 2259.86, "total_tokens": 43232}
{"current_steps": 100, "total_steps": 3160, "loss": 5.5594, "lr": 0.00939873417721519, "epoch": 0.6329113924050633, "percentage": 3.16, "elapsed_time": "0:00:20", "remaining_time": "0:10:15", "throughput": 2261.73, "total_tokens": 45504}
{"current_steps": 105, "total_steps": 3160, "loss": 3.1616, "lr": 0.009873417721518986, "epoch": 0.6645569620253164, "percentage": 3.32, "elapsed_time": "0:00:21", "remaining_time": "0:10:14", "throughput": 2267.84, "total_tokens": 47872}
{"current_steps": 110, "total_steps": 3160, "loss": 2.1289, "lr": 0.010348101265822784, "epoch": 0.6962025316455697, "percentage": 3.48, "elapsed_time": "0:00:22", "remaining_time": "0:10:11", "throughput": 2263.46, "total_tokens": 49952}
{"current_steps": 115, "total_steps": 3160, "loss": 1.2601, "lr": 0.01082278481012658, "epoch": 0.7278481012658228, "percentage": 3.64, "elapsed_time": "0:00:23", "remaining_time": "0:10:10", "throughput": 2268.81, "total_tokens": 52288}
{"current_steps": 120, "total_steps": 3160, "loss": 0.433, "lr": 0.011297468354430379, "epoch": 0.759493670886076, "percentage": 3.8, "elapsed_time": "0:00:24", "remaining_time": "0:10:08", "throughput": 2271.32, "total_tokens": 54592}
{"current_steps": 125, "total_steps": 3160, "loss": 0.1428, "lr": 0.011772151898734175, "epoch": 0.7911392405063291, "percentage": 3.96, "elapsed_time": "0:00:25", "remaining_time": "0:10:07", "throughput": 2273.3, "total_tokens": 56864}
{"current_steps": 130, "total_steps": 3160, "loss": 0.263, "lr": 0.012246835443037974, "epoch": 0.8227848101265823, "percentage": 4.11, "elapsed_time": "0:00:25", "remaining_time": "0:10:05", "throughput": 2274.19, "total_tokens": 59104}
{"current_steps": 135, "total_steps": 3160, "loss": 0.6412, "lr": 0.012721518987341772, "epoch": 0.8544303797468354, "percentage": 4.27, "elapsed_time": "0:00:26", "remaining_time": "0:10:04", "throughput": 2276.1, "total_tokens": 61408}
{"current_steps": 140, "total_steps": 3160, "loss": 0.3362, "lr": 0.01319620253164557, "epoch": 0.8860759493670886, "percentage": 4.43, "elapsed_time": "0:00:27", "remaining_time": "0:10:02", "throughput": 2275.8, "total_tokens": 63616}
{"current_steps": 145, "total_steps": 3160, "loss": 0.3041, "lr": 0.013670886075949367, "epoch": 0.9177215189873418, "percentage": 4.59, "elapsed_time": "0:00:28", "remaining_time": "0:10:01", "throughput": 2275.64, "total_tokens": 65824}
{"current_steps": 150, "total_steps": 3160, "loss": 0.1369, "lr": 0.014145569620253165, "epoch": 0.9493670886075949, "percentage": 4.75, "elapsed_time": "0:00:29", "remaining_time": "0:09:59", "throughput": 2276.45, "total_tokens": 68064}
{"current_steps": 155, "total_steps": 3160, "loss": 0.1167, "lr": 0.014620253164556962, "epoch": 0.9810126582278481, "percentage": 4.91, "elapsed_time": "0:00:30", "remaining_time": "0:09:58", "throughput": 2279.88, "total_tokens": 70432}
{"current_steps": 158, "total_steps": 3160, "eval_loss": 0.09709509462118149, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:33", "remaining_time": "0:10:28", "throughput": 2162.12, "total_tokens": 71568}
{"current_steps": 160, "total_steps": 3160, "loss": 0.168, "lr": 0.01509493670886076, "epoch": 1.0126582278481013, "percentage": 5.06, "elapsed_time": "0:00:34", "remaining_time": "0:10:45", "throughput": 2104.42, "total_tokens": 72400}
{"current_steps": 165, "total_steps": 3160, "loss": 0.1485, "lr": 0.015569620253164556, "epoch": 1.0443037974683544, "percentage": 5.22, "elapsed_time": "0:00:35", "remaining_time": "0:10:42", "throughput": 2108.07, "total_tokens": 74576}
{"current_steps": 170, "total_steps": 3160, "loss": 0.1079, "lr": 0.01604430379746835, "epoch": 1.0759493670886076, "percentage": 5.38, "elapsed_time": "0:00:36", "remaining_time": "0:10:39", "throughput": 2115.05, "total_tokens": 76944}
{"current_steps": 175, "total_steps": 3160, "loss": 0.1488, "lr": 0.01651898734177215, "epoch": 1.1075949367088607, "percentage": 5.54, "elapsed_time": "0:00:37", "remaining_time": "0:10:37", "throughput": 2119.97, "total_tokens": 79184}
{"current_steps": 180, "total_steps": 3160, "loss": 0.0581, "lr": 0.016993670886075948, "epoch": 1.139240506329114, "percentage": 5.7, "elapsed_time": "0:00:38", "remaining_time": "0:10:34", "throughput": 2124.65, "total_tokens": 81424}
{"current_steps": 185, "total_steps": 3160, "loss": 0.1631, "lr": 0.017468354430379748, "epoch": 1.1708860759493671, "percentage": 5.85, "elapsed_time": "0:00:39", "remaining_time": "0:10:32", "throughput": 2131.05, "total_tokens": 83760}
{"current_steps": 190, "total_steps": 3160, "loss": 0.0797, "lr": 0.017943037974683544, "epoch": 1.2025316455696202, "percentage": 6.01, "elapsed_time": "0:00:40", "remaining_time": "0:10:29", "throughput": 2135.28, "total_tokens": 86032}
{"current_steps": 195, "total_steps": 3160, "loss": 0.0969, "lr": 0.018417721518987344, "epoch": 1.2341772151898733, "percentage": 6.17, "elapsed_time": "0:00:41", "remaining_time": "0:10:27", "throughput": 2140.38, "total_tokens": 88368}
{"current_steps": 200, "total_steps": 3160, "loss": 0.0895, "lr": 0.01889240506329114, "epoch": 1.2658227848101267, "percentage": 6.33, "elapsed_time": "0:00:42", "remaining_time": "0:10:25", "throughput": 2143.87, "total_tokens": 90608}
{"current_steps": 205, "total_steps": 3160, "loss": 0.1284, "lr": 0.019367088607594937, "epoch": 1.2974683544303798, "percentage": 6.49, "elapsed_time": "0:00:43", "remaining_time": "0:10:23", "throughput": 2150.12, "total_tokens": 93040}
{"current_steps": 210, "total_steps": 3160, "loss": 0.1272, "lr": 0.019841772151898734, "epoch": 1.3291139240506329, "percentage": 6.65, "elapsed_time": "0:00:44", "remaining_time": "0:10:21", "throughput": 2154.38, "total_tokens": 95376}
{"current_steps": 215, "total_steps": 3160, "loss": 0.0786, "lr": 0.02031645569620253, "epoch": 1.360759493670886, "percentage": 6.8, "elapsed_time": "0:00:45", "remaining_time": "0:10:19", "throughput": 2157.1, "total_tokens": 97616}
{"current_steps": 220, "total_steps": 3160, "loss": 0.0547, "lr": 0.02079113924050633, "epoch": 1.3924050632911391, "percentage": 6.96, "elapsed_time": "0:00:46", "remaining_time": "0:10:17", "throughput": 2158.22, "total_tokens": 99760}
{"current_steps": 225, "total_steps": 3160, "loss": 0.164, "lr": 0.021265822784810127, "epoch": 1.4240506329113924, "percentage": 7.12, "elapsed_time": "0:00:47", "remaining_time": "0:10:15", "throughput": 2162.09, "total_tokens": 102064}
{"current_steps": 230, "total_steps": 3160, "loss": 0.0752, "lr": 0.021740506329113923, "epoch": 1.4556962025316456, "percentage": 7.28, "elapsed_time": "0:00:48", "remaining_time": "0:10:13", "throughput": 2164.57, "total_tokens": 104304}
{"current_steps": 235, "total_steps": 3160, "loss": 0.0558, "lr": 0.02221518987341772, "epoch": 1.4873417721518987, "percentage": 7.44, "elapsed_time": "0:00:49", "remaining_time": "0:10:11", "throughput": 2168.43, "total_tokens": 106608}
{"current_steps": 240, "total_steps": 3160, "loss": 0.0738, "lr": 0.02268987341772152, "epoch": 1.518987341772152, "percentage": 7.59, "elapsed_time": "0:00:50", "remaining_time": "0:10:09", "throughput": 2169.45, "total_tokens": 108752}
{"current_steps": 245, "total_steps": 3160, "loss": 0.1385, "lr": 0.023164556962025316, "epoch": 1.5506329113924051, "percentage": 7.75, "elapsed_time": "0:00:51", "remaining_time": "0:10:08", "throughput": 2173.07, "total_tokens": 111088}
{"current_steps": 250, "total_steps": 3160, "loss": 0.0286, "lr": 0.023639240506329113, "epoch": 1.5822784810126582, "percentage": 7.91, "elapsed_time": "0:00:52", "remaining_time": "0:10:06", "throughput": 2175.48, "total_tokens": 113328}
{"current_steps": 255, "total_steps": 3160, "loss": 0.2212, "lr": 0.02411392405063291, "epoch": 1.6139240506329116, "percentage": 8.07, "elapsed_time": "0:00:53", "remaining_time": "0:10:04", "throughput": 2177.88, "total_tokens": 115600}
{"current_steps": 260, "total_steps": 3160, "loss": 0.1101, "lr": 0.02458860759493671, "epoch": 1.6455696202531644, "percentage": 8.23, "elapsed_time": "0:00:54", "remaining_time": "0:10:03", "throughput": 2181.25, "total_tokens": 117936}
{"current_steps": 265, "total_steps": 3160, "loss": 0.1231, "lr": 0.025063291139240506, "epoch": 1.6772151898734178, "percentage": 8.39, "elapsed_time": "0:00:55", "remaining_time": "0:10:01", "throughput": 2183.21, "total_tokens": 120176}
{"current_steps": 270, "total_steps": 3160, "loss": 0.0565, "lr": 0.025537974683544303, "epoch": 1.7088607594936709, "percentage": 8.54, "elapsed_time": "0:00:56", "remaining_time": "0:09:59", "throughput": 2183.68, "total_tokens": 122320}
{"current_steps": 275, "total_steps": 3160, "loss": 0.0567, "lr": 0.0260126582278481, "epoch": 1.740506329113924, "percentage": 8.7, "elapsed_time": "0:00:57", "remaining_time": "0:09:58", "throughput": 2188.1, "total_tokens": 124752}
{"current_steps": 280, "total_steps": 3160, "loss": 0.1192, "lr": 0.0264873417721519, "epoch": 1.7721518987341773, "percentage": 8.86, "elapsed_time": "0:00:58", "remaining_time": "0:09:56", "throughput": 2192.0, "total_tokens": 127216}
{"current_steps": 285, "total_steps": 3160, "loss": 0.1226, "lr": 0.026962025316455696, "epoch": 1.8037974683544302, "percentage": 9.02, "elapsed_time": "0:00:59", "remaining_time": "0:09:55", "throughput": 2193.24, "total_tokens": 129424}
{"current_steps": 290, "total_steps": 3160, "loss": 0.0977, "lr": 0.027436708860759492, "epoch": 1.8354430379746836, "percentage": 9.18, "elapsed_time": "0:00:59", "remaining_time": "0:09:53", "throughput": 2194.38, "total_tokens": 131632}
{"current_steps": 295, "total_steps": 3160, "loss": 0.1471, "lr": 0.02791139240506329, "epoch": 1.8670886075949367, "percentage": 9.34, "elapsed_time": "0:01:00", "remaining_time": "0:09:52", "throughput": 2195.07, "total_tokens": 133808}
{"current_steps": 300, "total_steps": 3160, "loss": 0.1052, "lr": 0.02838607594936709, "epoch": 1.8987341772151898, "percentage": 9.49, "elapsed_time": "0:01:01", "remaining_time": "0:09:50", "throughput": 2197.62, "total_tokens": 136144}
{"current_steps": 305, "total_steps": 3160, "loss": 0.0865, "lr": 0.028860759493670885, "epoch": 1.9303797468354431, "percentage": 9.65, "elapsed_time": "0:01:02", "remaining_time": "0:09:49", "throughput": 2199.17, "total_tokens": 138384}
{"current_steps": 310, "total_steps": 3160, "loss": 0.1386, "lr": 0.02933544303797468, "epoch": 1.9620253164556962, "percentage": 9.81, "elapsed_time": "0:01:03", "remaining_time": "0:09:47", "throughput": 2201.14, "total_tokens": 140656}
{"current_steps": 315, "total_steps": 3160, "loss": 0.0618, "lr": 0.029810126582278478, "epoch": 1.9936708860759493, "percentage": 9.97, "elapsed_time": "0:01:04", "remaining_time": "0:09:46", "throughput": 2201.92, "total_tokens": 142992}
{"current_steps": 316, "total_steps": 3160, "eval_loss": 0.07142843306064606, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:01:06", "remaining_time": "0:10:01", "throughput": 2142.99, "total_tokens": 143232}
{"current_steps": 320, "total_steps": 3160, "loss": 0.0334, "lr": 0.02999991763476599, "epoch": 2.0253164556962027, "percentage": 10.13, "elapsed_time": "0:01:08", "remaining_time": "0:10:08", "throughput": 2114.53, "total_tokens": 145056}
{"current_steps": 325, "total_steps": 3160, "loss": 0.0793, "lr": 0.02999941429494495, "epoch": 2.0569620253164556, "percentage": 10.28, "elapsed_time": "0:01:09", "remaining_time": "0:10:06", "throughput": 2117.86, "total_tokens": 147360}
{"current_steps": 330, "total_steps": 3160, "loss": 0.0319, "lr": 0.02999845338910228, "epoch": 2.088607594936709, "percentage": 10.44, "elapsed_time": "0:01:10", "remaining_time": "0:10:05", "throughput": 2119.88, "total_tokens": 149568}
{"current_steps": 335, "total_steps": 3160, "loss": 0.1134, "lr": 0.029997034946550982, "epoch": 2.1202531645569622, "percentage": 10.6, "elapsed_time": "0:01:11", "remaining_time": "0:10:03", "throughput": 2123.07, "total_tokens": 151872}
{"current_steps": 340, "total_steps": 3160, "loss": 0.044, "lr": 0.029995159010561483, "epoch": 2.151898734177215, "percentage": 10.76, "elapsed_time": "0:01:12", "remaining_time": "0:10:01", "throughput": 2124.97, "total_tokens": 154080}
{"current_steps": 345, "total_steps": 3160, "loss": 0.047, "lr": 0.029992825638360327, "epoch": 2.1835443037974684, "percentage": 10.92, "elapsed_time": "0:01:13", "remaining_time": "0:09:59", "throughput": 2126.95, "total_tokens": 156288}
{"current_steps": 350, "total_steps": 3160, "loss": 0.0458, "lr": 0.02999003490112841, "epoch": 2.2151898734177213, "percentage": 11.08, "elapsed_time": "0:01:14", "remaining_time": "0:09:57", "throughput": 2129.94, "total_tokens": 158624}
{"current_steps": 355, "total_steps": 3160, "loss": 0.019, "lr": 0.029986786883998827, "epoch": 2.2468354430379747, "percentage": 11.23, "elapsed_time": "0:01:15", "remaining_time": "0:09:56", "throughput": 2130.95, "total_tokens": 160864}
{"current_steps": 360, "total_steps": 3160, "loss": 0.1553, "lr": 0.029983081686054267, "epoch": 2.278481012658228, "percentage": 11.39, "elapsed_time": "0:01:16", "remaining_time": "0:09:55", "throughput": 2130.77, "total_tokens": 163136}
{"current_steps": 365, "total_steps": 3160, "loss": 0.0626, "lr": 0.02997891942032399, "epoch": 2.310126582278481, "percentage": 11.55, "elapsed_time": "0:01:17", "remaining_time": "0:09:54", "throughput": 2130.82, "total_tokens": 165408}
{"current_steps": 370, "total_steps": 3160, "loss": 0.0852, "lr": 0.029974300213780378, "epoch": 2.3417721518987342, "percentage": 11.71, "elapsed_time": "0:01:18", "remaining_time": "0:09:52", "throughput": 2133.3, "total_tokens": 167712}
{"current_steps": 375, "total_steps": 3160, "loss": 0.1164, "lr": 0.02996922420733506, "epoch": 2.3734177215189876, "percentage": 11.87, "elapsed_time": "0:01:19", "remaining_time": "0:09:51", "throughput": 2134.03, "total_tokens": 169824}
{"current_steps": 380, "total_steps": 3160, "loss": 0.153, "lr": 0.029963691555834625, "epoch": 2.4050632911392404, "percentage": 12.03, "elapsed_time": "0:01:20", "remaining_time": "0:09:49", "throughput": 2135.02, "total_tokens": 171968}
{"current_steps": 385, "total_steps": 3160, "loss": 0.0872, "lr": 0.02995770242805588, "epoch": 2.4367088607594938, "percentage": 12.18, "elapsed_time": "0:01:21", "remaining_time": "0:09:47", "throughput": 2137.42, "total_tokens": 174272}
{"current_steps": 390, "total_steps": 3160, "loss": 0.1133, "lr": 0.029951257006700725, "epoch": 2.4683544303797467, "percentage": 12.34, "elapsed_time": "0:01:22", "remaining_time": "0:09:46", "throughput": 2139.92, "total_tokens": 176576}
{"current_steps": 395, "total_steps": 3160, "loss": 1.7205, "lr": 0.029944355488390553, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:01:23", "remaining_time": "0:09:44", "throughput": 2141.83, "total_tokens": 178848}
{"current_steps": 400, "total_steps": 3160, "loss": 1.1129, "lr": 0.029936998083660273, "epoch": 2.5316455696202533, "percentage": 12.66, "elapsed_time": "0:01:24", "remaining_time": "0:09:42", "throughput": 2143.68, "total_tokens": 181120}
{"current_steps": 405, "total_steps": 3160, "loss": 0.174, "lr": 0.029929185016951868, "epoch": 2.5632911392405062, "percentage": 12.82, "elapsed_time": "0:01:25", "remaining_time": "0:09:41", "throughput": 2144.16, "total_tokens": 183232}
{"current_steps": 410, "total_steps": 3160, "loss": 0.1194, "lr": 0.02992091652660758, "epoch": 2.5949367088607596, "percentage": 12.97, "elapsed_time": "0:01:26", "remaining_time": "0:09:39", "throughput": 2145.96, "total_tokens": 185504}
{"current_steps": 415, "total_steps": 3160, "loss": 0.1515, "lr": 0.029912192864862595, "epoch": 2.6265822784810124, "percentage": 13.13, "elapsed_time": "0:01:27", "remaining_time": "0:09:38", "throughput": 2148.28, "total_tokens": 187808}
{"current_steps": 420, "total_steps": 3160, "loss": 0.1169, "lr": 0.029903014297837396, "epoch": 2.6582278481012658, "percentage": 13.29, "elapsed_time": "0:01:28", "remaining_time": "0:09:36", "throughput": 2149.53, "total_tokens": 190016}
{"current_steps": 425, "total_steps": 3160, "loss": 0.1, "lr": 0.0298933811055296, "epoch": 2.689873417721519, "percentage": 13.45, "elapsed_time": "0:01:29", "remaining_time": "0:09:35", "throughput": 2152.53, "total_tokens": 192480}
{"current_steps": 430, "total_steps": 3160, "loss": 0.1351, "lr": 0.029883293581805453, "epoch": 2.721518987341772, "percentage": 13.61, "elapsed_time": "0:01:30", "remaining_time": "0:09:34", "throughput": 2155.09, "total_tokens": 194880}
{"current_steps": 435, "total_steps": 3160, "loss": 0.128, "lr": 0.029872752034390833, "epoch": 2.7531645569620253, "percentage": 13.77, "elapsed_time": "0:01:31", "remaining_time": "0:09:32", "throughput": 2156.87, "total_tokens": 197152}
{"current_steps": 440, "total_steps": 3160, "loss": 0.0751, "lr": 0.029861756784861908, "epoch": 2.7848101265822782, "percentage": 13.92, "elapsed_time": "0:01:32", "remaining_time": "0:09:31", "throughput": 2157.72, "total_tokens": 199328}
{"current_steps": 445, "total_steps": 3160, "loss": 0.1457, "lr": 0.029850308168635264, "epoch": 2.8164556962025316, "percentage": 14.08, "elapsed_time": "0:01:33", "remaining_time": "0:09:29", "throughput": 2159.98, "total_tokens": 201664}
{"current_steps": 450, "total_steps": 3160, "loss": 0.0665, "lr": 0.02983840653495774, "epoch": 2.848101265822785, "percentage": 14.24, "elapsed_time": "0:01:34", "remaining_time": "0:09:28", "throughput": 2162.04, "total_tokens": 204000}
{"current_steps": 455, "total_steps": 3160, "loss": 0.0807, "lr": 0.029826052246895707, "epoch": 2.879746835443038, "percentage": 14.4, "elapsed_time": "0:01:35", "remaining_time": "0:09:27", "throughput": 2164.45, "total_tokens": 206432}
{"current_steps": 460, "total_steps": 3160, "loss": 0.0481, "lr": 0.029813245681324055, "epoch": 2.911392405063291, "percentage": 14.56, "elapsed_time": "0:01:36", "remaining_time": "0:09:25", "throughput": 2165.83, "total_tokens": 208672}
{"current_steps": 465, "total_steps": 3160, "loss": 0.0907, "lr": 0.02979998722891465, "epoch": 2.9430379746835444, "percentage": 14.72, "elapsed_time": "0:01:37", "remaining_time": "0:09:24", "throughput": 2167.68, "total_tokens": 210976}
{"current_steps": 470, "total_steps": 3160, "loss": 0.0498, "lr": 0.029786277294124443, "epoch": 2.9746835443037973, "percentage": 14.87, "elapsed_time": "0:01:38", "remaining_time": "0:09:22", "throughput": 2169.2, "total_tokens": 213248}
{"current_steps": 474, "total_steps": 3160, "eval_loss": 0.07707643508911133, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:01:40", "remaining_time": "0:09:30", "throughput": 2132.93, "total_tokens": 214912}
{"current_steps": 475, "total_steps": 3160, "loss": 0.029, "lr": 0.029772116295183122, "epoch": 3.0063291139240507, "percentage": 15.03, "elapsed_time": "0:01:41", "remaining_time": "0:09:35", "throughput": 2114.77, "total_tokens": 215360}
{"current_steps": 480, "total_steps": 3160, "loss": 0.0487, "lr": 0.02975750466408034, "epoch": 3.037974683544304, "percentage": 15.19, "elapsed_time": "0:01:42", "remaining_time": "0:09:34", "throughput": 2116.77, "total_tokens": 217632}
{"current_steps": 485, "total_steps": 3160, "loss": 0.0696, "lr": 0.029742442846552575, "epoch": 3.069620253164557, "percentage": 15.35, "elapsed_time": "0:01:43", "remaining_time": "0:09:32", "throughput": 2118.62, "total_tokens": 219936}
{"current_steps": 490, "total_steps": 3160, "loss": 0.0665, "lr": 0.029726931302069493, "epoch": 3.1012658227848102, "percentage": 15.51, "elapsed_time": "0:01:44", "remaining_time": "0:09:31", "throughput": 2121.13, "total_tokens": 222304}
{"current_steps": 495, "total_steps": 3160, "loss": 0.0542, "lr": 0.029710970503819947, "epoch": 3.132911392405063, "percentage": 15.66, "elapsed_time": "0:01:45", "remaining_time": "0:09:29", "throughput": 2121.95, "total_tokens": 224448}
{"current_steps": 500, "total_steps": 3160, "loss": 0.0892, "lr": 0.029694560938697545, "epoch": 3.1645569620253164, "percentage": 15.82, "elapsed_time": "0:01:46", "remaining_time": "0:09:27", "throughput": 2124.02, "total_tokens": 226752}
{"current_steps": 505, "total_steps": 3160, "loss": 0.1341, "lr": 0.029677703107285798, "epoch": 3.1962025316455698, "percentage": 15.98, "elapsed_time": "0:01:47", "remaining_time": "0:09:26", "throughput": 2126.31, "total_tokens": 229088}
{"current_steps": 510, "total_steps": 3160, "loss": 0.026, "lr": 0.029660397523842846, "epoch": 3.2278481012658227, "percentage": 16.14, "elapsed_time": "0:01:48", "remaining_time": "0:09:24", "throughput": 2126.87, "total_tokens": 231232}
{"current_steps": 515, "total_steps": 3160, "loss": 0.0589, "lr": 0.029642644716285763, "epoch": 3.259493670886076, "percentage": 16.3, "elapsed_time": "0:01:49", "remaining_time": "0:09:23", "throughput": 2129.11, "total_tokens": 233568}
{"current_steps": 520, "total_steps": 3160, "loss": 0.0482, "lr": 0.02962444522617446, "epoch": 3.291139240506329, "percentage": 16.46, "elapsed_time": "0:01:50", "remaining_time": "0:09:21", "throughput": 2130.46, "total_tokens": 235808}
{"current_steps": 525, "total_steps": 3160, "loss": 0.1296, "lr": 0.02960579960869518, "epoch": 3.3227848101265822, "percentage": 16.61, "elapsed_time": "0:01:51", "remaining_time": "0:09:20", "throughput": 2131.98, "total_tokens": 238080}
{"current_steps": 530, "total_steps": 3160, "loss": 0.0445, "lr": 0.029586708432643525, "epoch": 3.3544303797468356, "percentage": 16.77, "elapsed_time": "0:01:52", "remaining_time": "0:09:18", "throughput": 2133.09, "total_tokens": 240288}
{"current_steps": 535, "total_steps": 3160, "loss": 0.0441, "lr": 0.029567172280407134, "epoch": 3.3860759493670884, "percentage": 16.93, "elapsed_time": "0:01:53", "remaining_time": "0:09:17", "throughput": 2133.94, "total_tokens": 242464}
{"current_steps": 540, "total_steps": 3160, "loss": 0.0436, "lr": 0.02954719174794791, "epoch": 3.4177215189873418, "percentage": 17.09, "elapsed_time": "0:01:54", "remaining_time": "0:09:16", "throughput": 2135.4, "total_tokens": 244736}
{"current_steps": 545, "total_steps": 3160, "loss": 0.0769, "lr": 0.02952676744478383, "epoch": 3.449367088607595, "percentage": 17.25, "elapsed_time": "0:01:55", "remaining_time": "0:09:14", "throughput": 2137.29, "total_tokens": 247072}
{"current_steps": 550, "total_steps": 3160, "loss": 0.037, "lr": 0.029505899993970373, "epoch": 3.481012658227848, "percentage": 17.41, "elapsed_time": "0:01:56", "remaining_time": "0:09:13", "throughput": 2139.1, "total_tokens": 249376}
{"current_steps": 555, "total_steps": 3160, "loss": 0.0548, "lr": 0.029484590032081488, "epoch": 3.5126582278481013, "percentage": 17.56, "elapsed_time": "0:01:57", "remaining_time": "0:09:11", "throughput": 2140.7, "total_tokens": 251680}
{"current_steps": 560, "total_steps": 3160, "loss": 0.0635, "lr": 0.029462838209190195, "epoch": 3.5443037974683547, "percentage": 17.72, "elapsed_time": "0:01:58", "remaining_time": "0:09:10", "throughput": 2142.72, "total_tokens": 254048}
{"current_steps": 565, "total_steps": 3160, "loss": 0.0936, "lr": 0.029440645188848733, "epoch": 3.5759493670886076, "percentage": 17.88, "elapsed_time": "0:01:59", "remaining_time": "0:09:09", "throughput": 2143.77, "total_tokens": 256256}
{"current_steps": 570, "total_steps": 3160, "loss": 0.0574, "lr": 0.029418011648068353, "epoch": 3.607594936708861, "percentage": 18.04, "elapsed_time": "0:02:00", "remaining_time": "0:09:07", "throughput": 2145.5, "total_tokens": 258592}
{"current_steps": 575, "total_steps": 3160, "loss": 0.0827, "lr": 0.029394938277298614, "epoch": 3.6392405063291138, "percentage": 18.2, "elapsed_time": "0:02:01", "remaining_time": "0:09:06", "throughput": 2146.7, "total_tokens": 260832}
{"current_steps": 580, "total_steps": 3160, "loss": 0.0578, "lr": 0.029371425780406368, "epoch": 3.670886075949367, "percentage": 18.35, "elapsed_time": "0:02:02", "remaining_time": "0:09:04", "throughput": 2148.35, "total_tokens": 263168}
{"current_steps": 585, "total_steps": 3160, "loss": 0.0399, "lr": 0.029347474874654274, "epoch": 3.7025316455696204, "percentage": 18.51, "elapsed_time": "0:02:03", "remaining_time": "0:09:03", "throughput": 2148.99, "total_tokens": 265344}
{"current_steps": 590, "total_steps": 3160, "loss": 0.0807, "lr": 0.029323086290678896, "epoch": 3.7341772151898733, "percentage": 18.67, "elapsed_time": "0:02:04", "remaining_time": "0:09:02", "throughput": 2149.92, "total_tokens": 267552}
{"current_steps": 595, "total_steps": 3160, "loss": 0.0741, "lr": 0.02929826077246845, "epoch": 3.7658227848101267, "percentage": 18.83, "elapsed_time": "0:02:05", "remaining_time": "0:09:00", "throughput": 2150.82, "total_tokens": 269760}
{"current_steps": 600, "total_steps": 3160, "loss": 0.0445, "lr": 0.029272999077340066, "epoch": 3.7974683544303796, "percentage": 18.99, "elapsed_time": "0:02:06", "remaining_time": "0:08:59", "throughput": 2151.91, "total_tokens": 272000}
{"current_steps": 605, "total_steps": 3160, "loss": 0.05, "lr": 0.02924730197591674, "epoch": 3.829113924050633, "percentage": 19.15, "elapsed_time": "0:02:07", "remaining_time": "0:08:57", "throughput": 2153.24, "total_tokens": 274304}
{"current_steps": 610, "total_steps": 3160, "loss": 0.0443, "lr": 0.029221170252103766, "epoch": 3.8607594936708862, "percentage": 19.3, "elapsed_time": "0:02:08", "remaining_time": "0:08:56", "throughput": 2154.56, "total_tokens": 276608}
{"current_steps": 615, "total_steps": 3160, "loss": 0.0408, "lr": 0.029194604703064876, "epoch": 3.892405063291139, "percentage": 19.46, "elapsed_time": "0:02:09", "remaining_time": "0:08:55", "throughput": 2156.03, "total_tokens": 278912}
{"current_steps": 620, "total_steps": 3160, "loss": 0.078, "lr": 0.029167606139197878, "epoch": 3.9240506329113924, "percentage": 19.62, "elapsed_time": "0:02:10", "remaining_time": "0:08:53", "throughput": 2157.11, "total_tokens": 281152}
{"current_steps": 625, "total_steps": 3160, "loss": 0.0847, "lr": 0.029140175384109963, "epoch": 3.9556962025316453, "percentage": 19.78, "elapsed_time": "0:02:11", "remaining_time": "0:08:52", "throughput": 2158.37, "total_tokens": 283424}
{"current_steps": 630, "total_steps": 3160, "loss": 0.0981, "lr": 0.02911231327459257, "epoch": 3.9873417721518987, "percentage": 19.94, "elapsed_time": "0:02:12", "remaining_time": "0:08:51", "throughput": 2159.89, "total_tokens": 285792}
{"current_steps": 632, "total_steps": 3160, "eval_loss": 0.057963330298662186, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:02:14", "remaining_time": "0:08:57", "throughput": 2132.16, "total_tokens": 286448}
{"current_steps": 635, "total_steps": 3160, "loss": 0.0247, "lr": 0.029084020660595865, "epoch": 4.018987341772152, "percentage": 20.09, "elapsed_time": "0:02:15", "remaining_time": "0:09:00", "throughput": 2118.95, "total_tokens": 287824}
{"current_steps": 640, "total_steps": 3160, "loss": 0.0395, "lr": 0.0290552984052028, "epoch": 4.050632911392405, "percentage": 20.25, "elapsed_time": "0:02:16", "remaining_time": "0:08:58", "throughput": 2120.12, "total_tokens": 290128}
{"current_steps": 645, "total_steps": 3160, "loss": 0.018, "lr": 0.029026147384602796, "epoch": 4.082278481012658, "percentage": 20.41, "elapsed_time": "0:02:17", "remaining_time": "0:08:57", "throughput": 2121.16, "total_tokens": 292336}
{"current_steps": 650, "total_steps": 3160, "loss": 0.0408, "lr": 0.028996568488065012, "epoch": 4.113924050632911, "percentage": 20.57, "elapsed_time": "0:02:18", "remaining_time": "0:08:55", "throughput": 2122.12, "total_tokens": 294544}
{"current_steps": 655, "total_steps": 3160, "loss": 0.0666, "lr": 0.02896656261791122, "epoch": 4.1455696202531644, "percentage": 20.73, "elapsed_time": "0:02:19", "remaining_time": "0:08:54", "throughput": 2123.32, "total_tokens": 296784}
{"current_steps": 660, "total_steps": 3160, "loss": 0.0343, "lr": 0.028936130689488263, "epoch": 4.177215189873418, "percentage": 20.89, "elapsed_time": "0:02:20", "remaining_time": "0:08:53", "throughput": 2124.67, "total_tokens": 299056}
{"current_steps": 665, "total_steps": 3160, "loss": 0.0286, "lr": 0.028905273631140153, "epoch": 4.208860759493671, "percentage": 21.04, "elapsed_time": "0:02:21", "remaining_time": "0:08:51", "throughput": 2125.59, "total_tokens": 301264}
{"current_steps": 670, "total_steps": 3160, "loss": 0.0207, "lr": 0.02887399238417974, "epoch": 4.2405063291139244, "percentage": 21.2, "elapsed_time": "0:02:22", "remaining_time": "0:08:50", "throughput": 2126.54, "total_tokens": 303472}
{"current_steps": 675, "total_steps": 3160, "loss": 0.0509, "lr": 0.02884228790286001, "epoch": 4.272151898734177, "percentage": 21.36, "elapsed_time": "0:02:23", "remaining_time": "0:08:49", "throughput": 2127.75, "total_tokens": 305744}
{"current_steps": 680, "total_steps": 3160, "loss": 0.0467, "lr": 0.02881016115434494, "epoch": 4.30379746835443, "percentage": 21.52, "elapsed_time": "0:02:24", "remaining_time": "0:08:47", "throughput": 2129.08, "total_tokens": 308016}
{"current_steps": 685, "total_steps": 3160, "loss": 0.0321, "lr": 0.028777613118680035, "epoch": 4.3354430379746836, "percentage": 21.68, "elapsed_time": "0:02:25", "remaining_time": "0:08:46", "throughput": 2130.41, "total_tokens": 310320}
{"current_steps": 690, "total_steps": 3160, "loss": 0.0559, "lr": 0.028744644788762413, "epoch": 4.367088607594937, "percentage": 21.84, "elapsed_time": "0:02:26", "remaining_time": "0:08:44", "throughput": 2131.89, "total_tokens": 312624}
{"current_steps": 695, "total_steps": 3160, "loss": 0.0241, "lr": 0.02871125717031052, "epoch": 4.39873417721519, "percentage": 21.99, "elapsed_time": "0:02:27", "remaining_time": "0:08:43", "throughput": 2133.39, "total_tokens": 314960}
{"current_steps": 700, "total_steps": 3160, "loss": 0.0067, "lr": 0.028677451281833435, "epoch": 4.430379746835443, "percentage": 22.15, "elapsed_time": "0:02:28", "remaining_time": "0:08:42", "throughput": 2134.66, "total_tokens": 317232}
{"current_steps": 705, "total_steps": 3160, "loss": 0.0212, "lr": 0.028643228154599815, "epoch": 4.462025316455696, "percentage": 22.31, "elapsed_time": "0:02:29", "remaining_time": "0:08:40", "throughput": 2136.04, "total_tokens": 319536}
{"current_steps": 710, "total_steps": 3160, "loss": 0.0317, "lr": 0.028608588832606446, "epoch": 4.493670886075949, "percentage": 22.47, "elapsed_time": "0:02:30", "remaining_time": "0:08:39", "throughput": 2137.7, "total_tokens": 321936}
{"current_steps": 715, "total_steps": 3160, "loss": 0.036, "lr": 0.02857353437254637, "epoch": 4.525316455696203, "percentage": 22.63, "elapsed_time": "0:02:31", "remaining_time": "0:08:38", "throughput": 2138.76, "total_tokens": 324208}
{"current_steps": 720, "total_steps": 3160, "loss": 0.0478, "lr": 0.028538065843776658, "epoch": 4.556962025316456, "percentage": 22.78, "elapsed_time": "0:02:32", "remaining_time": "0:08:37", "throughput": 2139.41, "total_tokens": 326384}
{"current_steps": 725, "total_steps": 3160, "loss": 0.0082, "lr": 0.028502184328285808, "epoch": 4.588607594936709, "percentage": 22.94, "elapsed_time": "0:02:33", "remaining_time": "0:08:35", "throughput": 2140.23, "total_tokens": 328592}
{"current_steps": 730, "total_steps": 3160, "loss": 0.0182, "lr": 0.02846589092066071, "epoch": 4.620253164556962, "percentage": 23.1, "elapsed_time": "0:02:34", "remaining_time": "0:08:34", "throughput": 2141.51, "total_tokens": 330896}
{"current_steps": 735, "total_steps": 3160, "loss": 0.0399, "lr": 0.02842918672805327, "epoch": 4.651898734177215, "percentage": 23.26, "elapsed_time": "0:02:35", "remaining_time": "0:08:33", "throughput": 2143.2, "total_tokens": 333360}
{"current_steps": 740, "total_steps": 3160, "loss": 0.0917, "lr": 0.028392072870146633, "epoch": 4.6835443037974684, "percentage": 23.42, "elapsed_time": "0:02:36", "remaining_time": "0:08:31", "throughput": 2143.97, "total_tokens": 335568}
{"current_steps": 745, "total_steps": 3160, "loss": 0.0462, "lr": 0.028354550479121027, "epoch": 4.715189873417722, "percentage": 23.58, "elapsed_time": "0:02:37", "remaining_time": "0:08:30", "throughput": 2144.89, "total_tokens": 337808}
{"current_steps": 750, "total_steps": 3160, "loss": 0.0275, "lr": 0.028316620699619228, "epoch": 4.746835443037975, "percentage": 23.73, "elapsed_time": "0:02:38", "remaining_time": "0:08:29", "throughput": 2145.58, "total_tokens": 340016}
{"current_steps": 755, "total_steps": 3160, "loss": 0.0641, "lr": 0.028278284688711637, "epoch": 4.7784810126582276, "percentage": 23.89, "elapsed_time": "0:02:39", "remaining_time": "0:08:27", "throughput": 2146.82, "total_tokens": 342320}
{"current_steps": 760, "total_steps": 3160, "loss": 0.0479, "lr": 0.028239543615860983, "epoch": 4.810126582278481, "percentage": 24.05, "elapsed_time": "0:02:40", "remaining_time": "0:08:26", "throughput": 2148.0, "total_tokens": 344656}
{"current_steps": 765, "total_steps": 3160, "loss": 0.0593, "lr": 0.028200398662886653, "epoch": 4.841772151898734, "percentage": 24.21, "elapsed_time": "0:02:41", "remaining_time": "0:08:25", "throughput": 2149.06, "total_tokens": 346928}
{"current_steps": 770, "total_steps": 3160, "loss": 0.1044, "lr": 0.028160851023928634, "epoch": 4.8734177215189876, "percentage": 24.37, "elapsed_time": "0:02:42", "remaining_time": "0:08:24", "throughput": 2149.95, "total_tokens": 349168}
{"current_steps": 775, "total_steps": 3160, "loss": 0.0527, "lr": 0.02812090190541108, "epoch": 4.905063291139241, "percentage": 24.53, "elapsed_time": "0:02:43", "remaining_time": "0:08:22", "throughput": 2151.23, "total_tokens": 351536}
{"current_steps": 780, "total_steps": 3160, "loss": 0.03, "lr": 0.028080552526005543, "epoch": 4.936708860759493, "percentage": 24.68, "elapsed_time": "0:02:44", "remaining_time": "0:08:21", "throughput": 2152.42, "total_tokens": 353840}
{"current_steps": 785, "total_steps": 3160, "loss": 0.0797, "lr": 0.02803980411659374, "epoch": 4.968354430379747, "percentage": 24.84, "elapsed_time": "0:02:45", "remaining_time": "0:08:20", "throughput": 2153.32, "total_tokens": 356112}
{"current_steps": 790, "total_steps": 3160, "loss": 0.021, "lr": 0.02799865792023004, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:02:46", "remaining_time": "0:08:18", "throughput": 2153.37, "total_tokens": 358176}
{"current_steps": 790, "total_steps": 3160, "eval_loss": 0.057497963309288025, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:02:48", "remaining_time": "0:08:24", "throughput": 2131.83, "total_tokens": 358176}
{"current_steps": 795, "total_steps": 3160, "loss": 0.021, "lr": 0.027957115192103567, "epoch": 5.031645569620253, "percentage": 25.16, "elapsed_time": "0:02:50", "remaining_time": "0:08:25", "throughput": 2119.86, "total_tokens": 360512}
{"current_steps": 800, "total_steps": 3160, "loss": 0.0336, "lr": 0.027915177199499843, "epoch": 5.063291139240507, "percentage": 25.32, "elapsed_time": "0:02:51", "remaining_time": "0:08:24", "throughput": 2120.86, "total_tokens": 362752}
{"current_steps": 805, "total_steps": 3160, "loss": 0.0355, "lr": 0.027872845221762192, "epoch": 5.094936708860759, "percentage": 25.47, "elapsed_time": "0:02:52", "remaining_time": "0:08:23", "throughput": 2121.99, "total_tokens": 365056}
{"current_steps": 810, "total_steps": 3160, "loss": 0.0592, "lr": 0.02783012055025268, "epoch": 5.1265822784810124, "percentage": 25.63, "elapsed_time": "0:02:53", "remaining_time": "0:08:21", "throughput": 2122.8, "total_tokens": 367264}
{"current_steps": 815, "total_steps": 3160, "loss": 0.0095, "lr": 0.027787004488312724, "epoch": 5.158227848101266, "percentage": 25.79, "elapsed_time": "0:02:53", "remaining_time": "0:08:20", "throughput": 2123.87, "total_tokens": 369536}
{"current_steps": 820, "total_steps": 3160, "loss": 0.0159, "lr": 0.027743498351223354, "epoch": 5.189873417721519, "percentage": 25.95, "elapsed_time": "0:02:54", "remaining_time": "0:08:19", "throughput": 2124.63, "total_tokens": 371744}
{"current_steps": 825, "total_steps": 3160, "loss": 0.0128, "lr": 0.027699603466165058, "epoch": 5.2215189873417724, "percentage": 26.11, "elapsed_time": "0:02:55", "remaining_time": "0:08:17", "throughput": 2125.51, "total_tokens": 373984}
{"current_steps": 830, "total_steps": 3160, "loss": 0.0304, "lr": 0.027655321172177314, "epoch": 5.253164556962025, "percentage": 26.27, "elapsed_time": "0:02:56", "remaining_time": "0:08:16", "throughput": 2126.77, "total_tokens": 376288}
{"current_steps": 835, "total_steps": 3160, "loss": 0.0402, "lr": 0.027610652820117747, "epoch": 5.284810126582278, "percentage": 26.42, "elapsed_time": "0:02:57", "remaining_time": "0:08:15", "throughput": 2127.6, "total_tokens": 378528}
{"current_steps": 840, "total_steps": 3160, "loss": 0.0245, "lr": 0.0275655997726209, "epoch": 5.3164556962025316, "percentage": 26.58, "elapsed_time": "0:02:58", "remaining_time": "0:08:14", "throughput": 2129.01, "total_tokens": 380896}
{"current_steps": 845, "total_steps": 3160, "loss": 0.0517, "lr": 0.02752016340405669, "epoch": 5.348101265822785, "percentage": 26.74, "elapsed_time": "0:02:59", "remaining_time": "0:08:12", "throughput": 2130.05, "total_tokens": 383168}
{"current_steps": 850, "total_steps": 3160, "loss": 0.0426, "lr": 0.027474345100488465, "epoch": 5.379746835443038, "percentage": 26.9, "elapsed_time": "0:03:00", "remaining_time": "0:08:11", "throughput": 2131.47, "total_tokens": 385568}
{"current_steps": 855, "total_steps": 3160, "loss": 0.0426, "lr": 0.027428146259630727, "epoch": 5.4113924050632916, "percentage": 27.06, "elapsed_time": "0:03:01", "remaining_time": "0:08:10", "throughput": 2132.41, "total_tokens": 387872}
{"current_steps": 860, "total_steps": 3160, "loss": 0.0082, "lr": 0.027381568290806495, "epoch": 5.443037974683544, "percentage": 27.22, "elapsed_time": "0:03:02", "remaining_time": "0:08:09", "throughput": 2133.25, "total_tokens": 390112}
{"current_steps": 865, "total_steps": 3160, "loss": 0.032, "lr": 0.027334612614904306, "epoch": 5.474683544303797, "percentage": 27.37, "elapsed_time": "0:03:03", "remaining_time": "0:08:07", "throughput": 2134.57, "total_tokens": 392512}
{"current_steps": 870, "total_steps": 3160, "loss": 0.0408, "lr": 0.02728728066433488, "epoch": 5.506329113924051, "percentage": 27.53, "elapsed_time": "0:03:04", "remaining_time": "0:08:06", "throughput": 2135.4, "total_tokens": 394752}
{"current_steps": 875, "total_steps": 3160, "loss": 0.0327, "lr": 0.027239573882987415, "epoch": 5.537974683544304, "percentage": 27.69, "elapsed_time": "0:03:05", "remaining_time": "0:08:05", "throughput": 2136.23, "total_tokens": 396992}
{"current_steps": 880, "total_steps": 3160, "loss": 0.0248, "lr": 0.02719149372618555, "epoch": 5.569620253164557, "percentage": 27.85, "elapsed_time": "0:03:06", "remaining_time": "0:08:04", "throughput": 2136.92, "total_tokens": 399232}
{"current_steps": 885, "total_steps": 3160, "loss": 0.0276, "lr": 0.027143041660642967, "epoch": 5.60126582278481, "percentage": 28.01, "elapsed_time": "0:03:07", "remaining_time": "0:08:02", "throughput": 2137.58, "total_tokens": 401440}
{"current_steps": 890, "total_steps": 3160, "loss": 0.016, "lr": 0.027094219164418627, "epoch": 5.632911392405063, "percentage": 28.16, "elapsed_time": "0:03:08", "remaining_time": "0:08:01", "throughput": 2138.1, "total_tokens": 403616}
{"current_steps": 895, "total_steps": 3160, "loss": 0.0218, "lr": 0.02704502772687172, "epoch": 5.6645569620253164, "percentage": 28.32, "elapsed_time": "0:03:09", "remaining_time": "0:08:00", "throughput": 2138.47, "total_tokens": 405760}
{"current_steps": 900, "total_steps": 3160, "loss": 0.0419, "lr": 0.026995468848616182, "epoch": 5.69620253164557, "percentage": 28.48, "elapsed_time": "0:03:10", "remaining_time": "0:07:58", "throughput": 2139.39, "total_tokens": 408064}
{"current_steps": 905, "total_steps": 3160, "loss": 0.0553, "lr": 0.026945544041474978, "epoch": 5.727848101265823, "percentage": 28.64, "elapsed_time": "0:03:11", "remaining_time": "0:07:57", "throughput": 2140.48, "total_tokens": 410400}
{"current_steps": 910, "total_steps": 3160, "loss": 0.032, "lr": 0.02689525482843393, "epoch": 5.759493670886076, "percentage": 28.8, "elapsed_time": "0:03:12", "remaining_time": "0:07:56", "throughput": 2141.56, "total_tokens": 412736}
{"current_steps": 915, "total_steps": 3160, "loss": 0.0392, "lr": 0.02684460274359528, "epoch": 5.791139240506329, "percentage": 28.96, "elapsed_time": "0:03:13", "remaining_time": "0:07:55", "throughput": 2143.06, "total_tokens": 415168}
{"current_steps": 920, "total_steps": 3160, "loss": 0.0468, "lr": 0.026793589332130902, "epoch": 5.822784810126582, "percentage": 29.11, "elapsed_time": "0:03:14", "remaining_time": "0:07:54", "throughput": 2143.56, "total_tokens": 417344}
{"current_steps": 925, "total_steps": 3160, "loss": 0.0345, "lr": 0.02674221615023513, "epoch": 5.8544303797468356, "percentage": 29.27, "elapsed_time": "0:03:15", "remaining_time": "0:07:52", "throughput": 2143.9, "total_tokens": 419488}
{"current_steps": 930, "total_steps": 3160, "loss": 0.0553, "lr": 0.026690484765077332, "epoch": 5.886075949367089, "percentage": 29.43, "elapsed_time": "0:03:16", "remaining_time": "0:07:51", "throughput": 2144.03, "total_tokens": 421600}
{"current_steps": 935, "total_steps": 3160, "loss": 0.0148, "lr": 0.026638396754754056, "epoch": 5.917721518987342, "percentage": 29.59, "elapsed_time": "0:03:17", "remaining_time": "0:07:50", "throughput": 2145.02, "total_tokens": 423904}
{"current_steps": 940, "total_steps": 3160, "loss": 0.0247, "lr": 0.026585953708240937, "epoch": 5.949367088607595, "percentage": 29.75, "elapsed_time": "0:03:18", "remaining_time": "0:07:49", "throughput": 2146.04, "total_tokens": 426240}
{"current_steps": 945, "total_steps": 3160, "loss": 0.0334, "lr": 0.02653315722534418, "epoch": 5.981012658227848, "percentage": 29.91, "elapsed_time": "0:03:19", "remaining_time": "0:07:47", "throughput": 2147.3, "total_tokens": 428608}
{"current_steps": 948, "total_steps": 3160, "eval_loss": 0.057629313319921494, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:03:21", "remaining_time": "0:07:50", "throughput": 2129.04, "total_tokens": 429728}
{"current_steps": 950, "total_steps": 3160, "loss": 0.0224, "lr": 0.026480008916651778, "epoch": 6.012658227848101, "percentage": 30.06, "elapsed_time": "0:03:23", "remaining_time": "0:07:52", "throughput": 2120.38, "total_tokens": 430624}
{"current_steps": 955, "total_steps": 3160, "loss": 0.0104, "lr": 0.02642651040348439, "epoch": 6.044303797468355, "percentage": 30.22, "elapsed_time": "0:03:24", "remaining_time": "0:07:51", "throughput": 2121.2, "total_tokens": 432960}
{"current_steps": 960, "total_steps": 3160, "loss": 0.0298, "lr": 0.026372663317845862, "epoch": 6.075949367088608, "percentage": 30.38, "elapsed_time": "0:03:25", "remaining_time": "0:07:49", "throughput": 2121.62, "total_tokens": 435104}
{"current_steps": 965, "total_steps": 3160, "loss": 0.0218, "lr": 0.02631846930237345, "epoch": 6.1075949367088604, "percentage": 30.54, "elapsed_time": "0:03:26", "remaining_time": "0:07:48", "throughput": 2122.26, "total_tokens": 437312}
{"current_steps": 970, "total_steps": 3160, "loss": 0.0266, "lr": 0.026263930010287713, "epoch": 6.139240506329114, "percentage": 30.7, "elapsed_time": "0:03:27", "remaining_time": "0:07:47", "throughput": 2123.17, "total_tokens": 439584}
{"current_steps": 975, "total_steps": 3160, "loss": 0.0454, "lr": 0.02620904710534207, "epoch": 6.170886075949367, "percentage": 30.85, "elapsed_time": "0:03:28", "remaining_time": "0:07:46", "throughput": 2124.09, "total_tokens": 441856}
{"current_steps": 980, "total_steps": 3160, "loss": 0.0287, "lr": 0.026153822261772066, "epoch": 6.2025316455696204, "percentage": 31.01, "elapsed_time": "0:03:28", "remaining_time": "0:07:44", "throughput": 2125.01, "total_tokens": 444128}
{"current_steps": 985, "total_steps": 3160, "loss": 0.0146, "lr": 0.026098257164244274, "epoch": 6.234177215189874, "percentage": 31.17, "elapsed_time": "0:03:29", "remaining_time": "0:07:43", "throughput": 2125.73, "total_tokens": 446368}
{"current_steps": 990, "total_steps": 3160, "loss": 0.004, "lr": 0.02604235350780493, "epoch": 6.265822784810126, "percentage": 31.33, "elapsed_time": "0:03:30", "remaining_time": "0:07:42", "throughput": 2126.58, "total_tokens": 448640}
{"current_steps": 995, "total_steps": 3160, "loss": 0.0223, "lr": 0.025986112997828197, "epoch": 6.2974683544303796, "percentage": 31.49, "elapsed_time": "0:03:31", "remaining_time": "0:07:41", "throughput": 2127.19, "total_tokens": 450848}
{"current_steps": 1000, "total_steps": 3160, "loss": 0.0108, "lr": 0.025929537349964157, "epoch": 6.329113924050633, "percentage": 31.65, "elapsed_time": "0:03:32", "remaining_time": "0:07:39", "throughput": 2127.8, "total_tokens": 453056}
{"current_steps": 1005, "total_steps": 3160, "loss": 0.0305, "lr": 0.025872628290086477, "epoch": 6.360759493670886, "percentage": 31.8, "elapsed_time": "0:03:33", "remaining_time": "0:07:38", "throughput": 2128.96, "total_tokens": 455424}
{"current_steps": 1010, "total_steps": 3160, "loss": 0.0084, "lr": 0.025815387554239753, "epoch": 6.3924050632911396, "percentage": 31.96, "elapsed_time": "0:03:34", "remaining_time": "0:07:37", "throughput": 2129.57, "total_tokens": 457632}
{"current_steps": 1015, "total_steps": 3160, "loss": 0.0425, "lr": 0.025757816888586547, "epoch": 6.424050632911392, "percentage": 32.12, "elapsed_time": "0:03:35", "remaining_time": "0:07:36", "throughput": 2130.59, "total_tokens": 459968}
{"current_steps": 1020, "total_steps": 3160, "loss": 0.0221, "lr": 0.025699918049354144, "epoch": 6.455696202531645, "percentage": 32.28, "elapsed_time": "0:03:36", "remaining_time": "0:07:34", "throughput": 2131.44, "total_tokens": 462240}
{"current_steps": 1025, "total_steps": 3160, "loss": 0.0021, "lr": 0.025641692802780933, "epoch": 6.487341772151899, "percentage": 32.44, "elapsed_time": "0:03:37", "remaining_time": "0:07:33", "throughput": 2131.91, "total_tokens": 464416}
{"current_steps": 1030, "total_steps": 3160, "loss": 0.0216, "lr": 0.02558314292506257, "epoch": 6.518987341772152, "percentage": 32.59, "elapsed_time": "0:03:38", "remaining_time": "0:07:32", "throughput": 2132.78, "total_tokens": 466752}
{"current_steps": 1035, "total_steps": 3160, "loss": 0.027, "lr": 0.025524270202297767, "epoch": 6.550632911392405, "percentage": 32.75, "elapsed_time": "0:03:39", "remaining_time": "0:07:31", "throughput": 2133.67, "total_tokens": 469088}
{"current_steps": 1040, "total_steps": 3160, "loss": 0.0288, "lr": 0.025465076430433827, "epoch": 6.582278481012658, "percentage": 32.91, "elapsed_time": "0:03:40", "remaining_time": "0:07:30", "throughput": 2134.37, "total_tokens": 471328}
{"current_steps": 1045, "total_steps": 3160, "loss": 0.0325, "lr": 0.025405563415211833, "epoch": 6.613924050632911, "percentage": 33.07, "elapsed_time": "0:03:41", "remaining_time": "0:07:28", "throughput": 2135.48, "total_tokens": 473728}
{"current_steps": 1050, "total_steps": 3160, "loss": 0.0163, "lr": 0.025345732972111585, "epoch": 6.6455696202531644, "percentage": 33.23, "elapsed_time": "0:03:42", "remaining_time": "0:07:27", "throughput": 2135.69, "total_tokens": 475840}
{"current_steps": 1055, "total_steps": 3160, "loss": 0.0182, "lr": 0.025285586926296195, "epoch": 6.677215189873418, "percentage": 33.39, "elapsed_time": "0:03:43", "remaining_time": "0:07:26", "throughput": 2136.22, "total_tokens": 478048}
{"current_steps": 1060, "total_steps": 3160, "loss": 0.038, "lr": 0.025225127112556447, "epoch": 6.708860759493671, "percentage": 33.54, "elapsed_time": "0:03:44", "remaining_time": "0:07:25", "throughput": 2137.04, "total_tokens": 480352}
{"current_steps": 1065, "total_steps": 3160, "loss": 0.0626, "lr": 0.025164355375254775, "epoch": 6.740506329113924, "percentage": 33.7, "elapsed_time": "0:03:45", "remaining_time": "0:07:24", "throughput": 2138.1, "total_tokens": 482752}
{"current_steps": 1070, "total_steps": 3160, "loss": 0.0365, "lr": 0.02510327356826905, "epoch": 6.772151898734177, "percentage": 33.86, "elapsed_time": "0:03:46", "remaining_time": "0:07:22", "throughput": 2138.91, "total_tokens": 485056}
{"current_steps": 1075, "total_steps": 3160, "loss": 0.0396, "lr": 0.02504188355493598, "epoch": 6.80379746835443, "percentage": 34.02, "elapsed_time": "0:03:47", "remaining_time": "0:07:21", "throughput": 2139.69, "total_tokens": 487360}
{"current_steps": 1080, "total_steps": 3160, "loss": 0.0469, "lr": 0.024980187207994307, "epoch": 6.8354430379746836, "percentage": 34.18, "elapsed_time": "0:03:48", "remaining_time": "0:07:20", "throughput": 2140.72, "total_tokens": 489728}
{"current_steps": 1085, "total_steps": 3160, "loss": 0.0226, "lr": 0.024918186409527657, "epoch": 6.867088607594937, "percentage": 34.34, "elapsed_time": "0:03:49", "remaining_time": "0:07:19", "throughput": 2141.72, "total_tokens": 492064}
{"current_steps": 1090, "total_steps": 3160, "loss": 0.0439, "lr": 0.024855883050907124, "epoch": 6.89873417721519, "percentage": 34.49, "elapsed_time": "0:03:50", "remaining_time": "0:07:18", "throughput": 2142.35, "total_tokens": 494304}
{"current_steps": 1095, "total_steps": 3160, "loss": 0.0403, "lr": 0.024793279032733578, "epoch": 6.930379746835443, "percentage": 34.65, "elapsed_time": "0:03:51", "remaining_time": "0:07:17", "throughput": 2143.65, "total_tokens": 496800}
{"current_steps": 1100, "total_steps": 3160, "loss": 0.0124, "lr": 0.024730376264779707, "epoch": 6.962025316455696, "percentage": 34.81, "elapsed_time": "0:03:52", "remaining_time": "0:07:15", "throughput": 2144.28, "total_tokens": 499040}
{"current_steps": 1105, "total_steps": 3160, "loss": 0.0169, "lr": 0.02466717666593172, "epoch": 6.993670886075949, "percentage": 34.97, "elapsed_time": "0:03:53", "remaining_time": "0:07:14", "throughput": 2144.88, "total_tokens": 501280}
{"current_steps": 1106, "total_steps": 3160, "eval_loss": 0.04775509238243103, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:03:55", "remaining_time": "0:07:17", "throughput": 2128.89, "total_tokens": 501504}
{"current_steps": 1110, "total_steps": 3160, "loss": 0.0175, "lr": 0.02460368216413082, "epoch": 7.025316455696203, "percentage": 35.13, "elapsed_time": "0:03:57", "remaining_time": "0:07:18", "throughput": 2121.46, "total_tokens": 503328}
{"current_steps": 1115, "total_steps": 3160, "loss": 0.0033, "lr": 0.024539894696314412, "epoch": 7.056962025316456, "percentage": 35.28, "elapsed_time": "0:03:58", "remaining_time": "0:07:16", "throughput": 2122.08, "total_tokens": 505568}
{"current_steps": 1120, "total_steps": 3160, "loss": 0.0229, "lr": 0.024475816208357017, "epoch": 7.0886075949367084, "percentage": 35.44, "elapsed_time": "0:03:59", "remaining_time": "0:07:15", "throughput": 2123.1, "total_tokens": 507904}
{"current_steps": 1125, "total_steps": 3160, "loss": 0.0058, "lr": 0.024411448655010867, "epoch": 7.120253164556962, "percentage": 35.6, "elapsed_time": "0:04:00", "remaining_time": "0:07:14", "throughput": 2123.44, "total_tokens": 510048}
{"current_steps": 1130, "total_steps": 3160, "loss": 0.0131, "lr": 0.024346793999846333, "epoch": 7.151898734177215, "percentage": 35.76, "elapsed_time": "0:04:01", "remaining_time": "0:07:13", "throughput": 2124.01, "total_tokens": 512256}
{"current_steps": 1135, "total_steps": 3160, "loss": 0.0039, "lr": 0.02428185421519197, "epoch": 7.1835443037974684, "percentage": 35.92, "elapsed_time": "0:04:02", "remaining_time": "0:07:12", "throughput": 2124.87, "total_tokens": 514592}
{"current_steps": 1140, "total_steps": 3160, "loss": 0.0084, "lr": 0.02421663128207441, "epoch": 7.215189873417722, "percentage": 36.08, "elapsed_time": "0:04:03", "remaining_time": "0:07:10", "throughput": 2126.03, "total_tokens": 517024}
{"current_steps": 1145, "total_steps": 3160, "loss": 0.0167, "lr": 0.024151127190157863, "epoch": 7.246835443037975, "percentage": 36.23, "elapsed_time": "0:04:04", "remaining_time": "0:07:09", "throughput": 2126.85, "total_tokens": 519360}
{"current_steps": 1150, "total_steps": 3160, "loss": 0.0082, "lr": 0.02408534393768348, "epoch": 7.2784810126582276, "percentage": 36.39, "elapsed_time": "0:04:05", "remaining_time": "0:07:08", "throughput": 2127.76, "total_tokens": 521664}
{"current_steps": 1155, "total_steps": 3160, "loss": 0.0026, "lr": 0.024019283531408357, "epoch": 7.310126582278481, "percentage": 36.55, "elapsed_time": "0:04:06", "remaining_time": "0:07:07", "throughput": 2128.35, "total_tokens": 523872}
{"current_steps": 1160, "total_steps": 3160, "loss": 0.0037, "lr": 0.02395294798654433, "epoch": 7.341772151898734, "percentage": 36.71, "elapsed_time": "0:04:07", "remaining_time": "0:07:06", "throughput": 2129.09, "total_tokens": 526144}
{"current_steps": 1165, "total_steps": 3160, "loss": 0.0101, "lr": 0.023886339326696513, "epoch": 7.3734177215189876, "percentage": 36.87, "elapsed_time": "0:04:08", "remaining_time": "0:07:04", "throughput": 2129.71, "total_tokens": 528416}
{"current_steps": 1170, "total_steps": 3160, "loss": 0.0379, "lr": 0.023819459583801543, "epoch": 7.405063291139241, "percentage": 37.03, "elapsed_time": "0:04:09", "remaining_time": "0:07:03", "throughput": 2130.53, "total_tokens": 530720}
{"current_steps": 1175, "total_steps": 3160, "loss": 0.0242, "lr": 0.023752310798065612, "epoch": 7.436708860759493, "percentage": 37.18, "elapsed_time": "0:04:10", "remaining_time": "0:07:02", "throughput": 2130.94, "total_tokens": 532896}
{"current_steps": 1180, "total_steps": 3160, "loss": 0.0015, "lr": 0.023684895017902212, "epoch": 7.468354430379747, "percentage": 37.34, "elapsed_time": "0:04:11", "remaining_time": "0:07:01", "throughput": 2131.19, "total_tokens": 535040}
{"current_steps": 1185, "total_steps": 3160, "loss": 0.0016, "lr": 0.02361721429986967, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:04:12", "remaining_time": "0:07:00", "throughput": 2131.7, "total_tokens": 537248}
{"current_steps": 1190, "total_steps": 3160, "loss": 0.0056, "lr": 0.02354927070860841, "epoch": 7.531645569620253, "percentage": 37.66, "elapsed_time": "0:04:13", "remaining_time": "0:06:58", "throughput": 2132.66, "total_tokens": 539616}
{"current_steps": 1195, "total_steps": 3160, "loss": 0.0166, "lr": 0.023481066316777932, "epoch": 7.563291139240507, "percentage": 37.82, "elapsed_time": "0:04:14", "remaining_time": "0:06:57", "throughput": 2133.13, "total_tokens": 541824}
{"current_steps": 1200, "total_steps": 3160, "loss": 0.007, "lr": 0.023412603204993634, "epoch": 7.594936708860759, "percentage": 37.97, "elapsed_time": "0:04:14", "remaining_time": "0:06:56", "throughput": 2133.59, "total_tokens": 544064}
{"current_steps": 1205, "total_steps": 3160, "loss": 0.0038, "lr": 0.023343883461763304, "epoch": 7.6265822784810124, "percentage": 38.13, "elapsed_time": "0:04:15", "remaining_time": "0:06:55", "throughput": 2134.28, "total_tokens": 546336}
{"current_steps": 1210, "total_steps": 3160, "loss": 0.0311, "lr": 0.023274909183423443, "epoch": 7.658227848101266, "percentage": 38.29, "elapsed_time": "0:04:16", "remaining_time": "0:06:54", "throughput": 2134.97, "total_tokens": 548608}
{"current_steps": 1215, "total_steps": 3160, "loss": 0.0167, "lr": 0.023205682474075274, "epoch": 7.689873417721519, "percentage": 38.45, "elapsed_time": "0:04:17", "remaining_time": "0:06:52", "throughput": 2135.74, "total_tokens": 550944}
{"current_steps": 1220, "total_steps": 3160, "loss": 0.0156, "lr": 0.023136205445520596, "epoch": 7.7215189873417724, "percentage": 38.61, "elapsed_time": "0:04:18", "remaining_time": "0:06:51", "throughput": 2136.7, "total_tokens": 553344}
{"current_steps": 1225, "total_steps": 3160, "loss": 0.0211, "lr": 0.02306648021719733, "epoch": 7.753164556962025, "percentage": 38.77, "elapsed_time": "0:04:19", "remaining_time": "0:06:50", "throughput": 2137.17, "total_tokens": 555552}
{"current_steps": 1230, "total_steps": 3160, "loss": 0.0031, "lr": 0.022996508916114898, "epoch": 7.784810126582278, "percentage": 38.92, "elapsed_time": "0:04:20", "remaining_time": "0:06:49", "throughput": 2137.72, "total_tokens": 557792}
{"current_steps": 1235, "total_steps": 3160, "loss": 0.0079, "lr": 0.02292629367678929, "epoch": 7.8164556962025316, "percentage": 39.08, "elapsed_time": "0:04:21", "remaining_time": "0:06:48", "throughput": 2138.08, "total_tokens": 559968}
{"current_steps": 1240, "total_steps": 3160, "loss": 0.0098, "lr": 0.022855836641178, "epoch": 7.848101265822785, "percentage": 39.24, "elapsed_time": "0:04:22", "remaining_time": "0:06:47", "throughput": 2138.76, "total_tokens": 562272}
{"current_steps": 1245, "total_steps": 3160, "loss": 0.0238, "lr": 0.022785139958614652, "epoch": 7.879746835443038, "percentage": 39.4, "elapsed_time": "0:04:23", "remaining_time": "0:06:45", "throughput": 2139.51, "total_tokens": 564608}
{"current_steps": 1250, "total_steps": 3160, "loss": 0.0284, "lr": 0.02271420578574343, "epoch": 7.911392405063291, "percentage": 39.56, "elapsed_time": "0:04:24", "remaining_time": "0:06:44", "throughput": 2140.27, "total_tokens": 566976}
{"current_steps": 1255, "total_steps": 3160, "loss": 0.0153, "lr": 0.022643036286453325, "epoch": 7.943037974683544, "percentage": 39.72, "elapsed_time": "0:04:25", "remaining_time": "0:06:43", "throughput": 2140.92, "total_tokens": 569248}
{"current_steps": 1260, "total_steps": 3160, "loss": 0.0271, "lr": 0.022571633631812082, "epoch": 7.974683544303797, "percentage": 39.87, "elapsed_time": "0:04:26", "remaining_time": "0:06:42", "throughput": 2141.45, "total_tokens": 571488}
{"current_steps": 1264, "total_steps": 3160, "eval_loss": 0.06543910503387451, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:04:29", "remaining_time": "0:06:43", "throughput": 2127.94, "total_tokens": 573120}
{"current_steps": 1265, "total_steps": 3160, "loss": 0.0486, "lr": 0.0225, "epoch": 8.00632911392405, "percentage": 40.03, "elapsed_time": "0:04:30", "remaining_time": "0:06:45", "throughput": 2121.05, "total_tokens": 573568}
{"current_steps": 1270, "total_steps": 3160, "loss": 0.0053, "lr": 0.022428137576243456, "epoch": 8.037974683544304, "percentage": 40.19, "elapsed_time": "0:04:31", "remaining_time": "0:06:43", "throughput": 2121.57, "total_tokens": 575808}
{"current_steps": 1275, "total_steps": 3160, "loss": 0.0142, "lr": 0.022356048552748285, "epoch": 8.069620253164556, "percentage": 40.35, "elapsed_time": "0:04:32", "remaining_time": "0:06:42", "throughput": 2122.45, "total_tokens": 578208}
{"current_steps": 1280, "total_steps": 3160, "loss": 0.002, "lr": 0.02228373512863286, "epoch": 8.10126582278481, "percentage": 40.51, "elapsed_time": "0:04:33", "remaining_time": "0:06:41", "throughput": 2122.92, "total_tokens": 580416}
{"current_steps": 1285, "total_steps": 3160, "loss": 0.0041, "lr": 0.022211199509861033, "epoch": 8.132911392405063, "percentage": 40.66, "elapsed_time": "0:04:34", "remaining_time": "0:06:40", "throughput": 2123.63, "total_tokens": 582720}
{"current_steps": 1290, "total_steps": 3160, "loss": 0.0066, "lr": 0.022138443909174844, "epoch": 8.164556962025316, "percentage": 40.82, "elapsed_time": "0:04:35", "remaining_time": "0:06:39", "throughput": 2124.11, "total_tokens": 584928}
{"current_steps": 1295, "total_steps": 3160, "loss": 0.0191, "lr": 0.02206547054602701, "epoch": 8.19620253164557, "percentage": 40.98, "elapsed_time": "0:04:36", "remaining_time": "0:06:38", "throughput": 2124.76, "total_tokens": 587232}
{"current_steps": 1300, "total_steps": 3160, "loss": 0.0067, "lr": 0.021992281646513213, "epoch": 8.227848101265822, "percentage": 41.14, "elapsed_time": "0:04:37", "remaining_time": "0:06:36", "throughput": 2125.42, "total_tokens": 589504}
{"current_steps": 1305, "total_steps": 3160, "loss": 0.0072, "lr": 0.0219188794433042, "epoch": 8.259493670886076, "percentage": 41.3, "elapsed_time": "0:04:38", "remaining_time": "0:06:35", "throughput": 2125.8, "total_tokens": 591680}
{"current_steps": 1310, "total_steps": 3160, "loss": 0.0619, "lr": 0.021845266175577683, "epoch": 8.291139240506329, "percentage": 41.46, "elapsed_time": "0:04:39", "remaining_time": "0:06:34", "throughput": 2126.56, "total_tokens": 594016}
{"current_steps": 1315, "total_steps": 3160, "loss": 0.0096, "lr": 0.02177144408895002, "epoch": 8.322784810126583, "percentage": 41.61, "elapsed_time": "0:04:40", "remaining_time": "0:06:33", "throughput": 2127.38, "total_tokens": 596416}
{"current_steps": 1320, "total_steps": 3160, "loss": 0.0368, "lr": 0.0216974154354077, "epoch": 8.354430379746836, "percentage": 41.77, "elapsed_time": "0:04:41", "remaining_time": "0:06:32", "throughput": 2128.25, "total_tokens": 598752}
{"current_steps": 1325, "total_steps": 3160, "loss": 0.0353, "lr": 0.02162318247323868, "epoch": 8.386075949367088, "percentage": 41.93, "elapsed_time": "0:04:42", "remaining_time": "0:06:30", "throughput": 2128.71, "total_tokens": 600960}
{"current_steps": 1330, "total_steps": 3160, "loss": 0.0221, "lr": 0.021548747466963447, "epoch": 8.417721518987342, "percentage": 42.09, "elapsed_time": "0:04:43", "remaining_time": "0:06:29", "throughput": 2129.4, "total_tokens": 603264}
{"current_steps": 1335, "total_steps": 3160, "loss": 0.0107, "lr": 0.02147411268726599, "epoch": 8.449367088607595, "percentage": 42.25, "elapsed_time": "0:04:44", "remaining_time": "0:06:28", "throughput": 2130.04, "total_tokens": 605536}
{"current_steps": 1340, "total_steps": 3160, "loss": 0.0387, "lr": 0.021399280410924492, "epoch": 8.481012658227849, "percentage": 42.41, "elapsed_time": "0:04:45", "remaining_time": "0:06:27", "throughput": 2130.73, "total_tokens": 607840}
{"current_steps": 1345, "total_steps": 3160, "loss": 0.0014, "lr": 0.021324252920741877, "epoch": 8.512658227848101, "percentage": 42.56, "elapsed_time": "0:04:46", "remaining_time": "0:06:26", "throughput": 2131.5, "total_tokens": 610144}
{"current_steps": 1350, "total_steps": 3160, "loss": 0.0213, "lr": 0.021249032505476193, "epoch": 8.544303797468354, "percentage": 42.72, "elapsed_time": "0:04:47", "remaining_time": "0:06:25", "throughput": 2132.16, "total_tokens": 612448}
{"current_steps": 1355, "total_steps": 3160, "loss": 0.0532, "lr": 0.0211736214597708, "epoch": 8.575949367088608, "percentage": 42.88, "elapsed_time": "0:04:48", "remaining_time": "0:06:23", "throughput": 2133.01, "total_tokens": 614816}
{"current_steps": 1360, "total_steps": 3160, "loss": 0.0263, "lr": 0.021098022084084324, "epoch": 8.60759493670886, "percentage": 43.04, "elapsed_time": "0:04:49", "remaining_time": "0:06:22", "throughput": 2133.58, "total_tokens": 617088}
{"current_steps": 1365, "total_steps": 3160, "loss": 0.0137, "lr": 0.02102223668462052, "epoch": 8.639240506329115, "percentage": 43.2, "elapsed_time": "0:04:50", "remaining_time": "0:06:21", "throughput": 2134.11, "total_tokens": 619328}
{"current_steps": 1370, "total_steps": 3160, "loss": 0.024, "lr": 0.02094626757325791, "epoch": 8.670886075949367, "percentage": 43.35, "elapsed_time": "0:04:51", "remaining_time": "0:06:20", "throughput": 2134.74, "total_tokens": 621600}
{"current_steps": 1375, "total_steps": 3160, "loss": 0.0252, "lr": 0.020870117067479252, "epoch": 8.70253164556962, "percentage": 43.51, "elapsed_time": "0:04:52", "remaining_time": "0:06:19", "throughput": 2135.23, "total_tokens": 623840}
{"current_steps": 1380, "total_steps": 3160, "loss": 0.0102, "lr": 0.02079378749030086, "epoch": 8.734177215189874, "percentage": 43.67, "elapsed_time": "0:04:53", "remaining_time": "0:06:18", "throughput": 2135.84, "total_tokens": 626112}
{"current_steps": 1385, "total_steps": 3160, "loss": 0.0083, "lr": 0.020717281170201704, "epoch": 8.765822784810126, "percentage": 43.83, "elapsed_time": "0:04:54", "remaining_time": "0:06:16", "throughput": 2136.56, "total_tokens": 628448}
{"current_steps": 1390, "total_steps": 3160, "loss": 0.0515, "lr": 0.02064060044105243, "epoch": 8.79746835443038, "percentage": 43.99, "elapsed_time": "0:04:55", "remaining_time": "0:06:15", "throughput": 2137.23, "total_tokens": 630752}
{"current_steps": 1395, "total_steps": 3160, "loss": 0.0056, "lr": 0.02056374764204411, "epoch": 8.829113924050633, "percentage": 44.15, "elapsed_time": "0:04:56", "remaining_time": "0:06:14", "throughput": 2137.86, "total_tokens": 633024}
{"current_steps": 1400, "total_steps": 3160, "loss": 0.0236, "lr": 0.02048672511761693, "epoch": 8.860759493670885, "percentage": 44.3, "elapsed_time": "0:04:57", "remaining_time": "0:06:13", "throughput": 2138.36, "total_tokens": 635264}
{"current_steps": 1405, "total_steps": 3160, "loss": 0.0061, "lr": 0.020409535217388638, "epoch": 8.89240506329114, "percentage": 44.46, "elapsed_time": "0:04:58", "remaining_time": "0:06:12", "throughput": 2138.78, "total_tokens": 637472}
{"current_steps": 1410, "total_steps": 3160, "loss": 0.0166, "lr": 0.020332180296082875, "epoch": 8.924050632911392, "percentage": 44.62, "elapsed_time": "0:04:59", "remaining_time": "0:06:11", "throughput": 2139.38, "total_tokens": 639744}
{"current_steps": 1415, "total_steps": 3160, "loss": 0.0118, "lr": 0.020254662713457366, "epoch": 8.955696202531646, "percentage": 44.78, "elapsed_time": "0:05:00", "remaining_time": "0:06:09", "throughput": 2139.97, "total_tokens": 642016}
{"current_steps": 1420, "total_steps": 3160, "loss": 0.0045, "lr": 0.020176984834231897, "epoch": 8.987341772151899, "percentage": 44.94, "elapsed_time": "0:05:00", "remaining_time": "0:06:08", "throughput": 2140.53, "total_tokens": 644288}
{"current_steps": 1422, "total_steps": 3160, "eval_loss": 0.06644842028617859, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:05:03", "remaining_time": "0:06:10", "throughput": 2128.26, "total_tokens": 644944}
{"current_steps": 1425, "total_steps": 3160, "loss": 0.0113, "lr": 0.02009914902801621, "epoch": 9.018987341772151, "percentage": 45.09, "elapsed_time": "0:05:04", "remaining_time": "0:06:10", "throughput": 2121.98, "total_tokens": 646320}
{"current_steps": 1430, "total_steps": 3160, "loss": 0.0084, "lr": 0.020021157669237698, "epoch": 9.050632911392405, "percentage": 45.25, "elapsed_time": "0:05:05", "remaining_time": "0:06:09", "throughput": 2122.31, "total_tokens": 648560}
{"current_steps": 1435, "total_steps": 3160, "loss": 0.0014, "lr": 0.01994301313706898, "epoch": 9.082278481012658, "percentage": 45.41, "elapsed_time": "0:05:06", "remaining_time": "0:06:08", "throughput": 2122.86, "total_tokens": 650832}
{"current_steps": 1440, "total_steps": 3160, "loss": 0.0214, "lr": 0.01986471781535531, "epoch": 9.113924050632912, "percentage": 45.57, "elapsed_time": "0:05:07", "remaining_time": "0:06:07", "throughput": 2123.39, "total_tokens": 653072}
{"current_steps": 1445, "total_steps": 3160, "loss": 0.0146, "lr": 0.019786274092541887, "epoch": 9.145569620253164, "percentage": 45.73, "elapsed_time": "0:05:08", "remaining_time": "0:06:06", "throughput": 2123.92, "total_tokens": 655344}
{"current_steps": 1450, "total_steps": 3160, "loss": 0.0028, "lr": 0.01970768436160095, "epoch": 9.177215189873417, "percentage": 45.89, "elapsed_time": "0:05:09", "remaining_time": "0:06:05", "throughput": 2124.25, "total_tokens": 657520}
{"current_steps": 1455, "total_steps": 3160, "loss": 0.0143, "lr": 0.019628951019958815, "epoch": 9.208860759493671, "percentage": 46.04, "elapsed_time": "0:05:10", "remaining_time": "0:06:03", "throughput": 2124.97, "total_tokens": 659856}
{"current_steps": 1460, "total_steps": 3160, "loss": 0.0047, "lr": 0.01955007646942273, "epoch": 9.240506329113924, "percentage": 46.2, "elapsed_time": "0:05:11", "remaining_time": "0:06:02", "throughput": 2125.5, "total_tokens": 662096}
{"current_steps": 1465, "total_steps": 3160, "loss": 0.0064, "lr": 0.019471063116107593, "epoch": 9.272151898734178, "percentage": 46.36, "elapsed_time": "0:05:12", "remaining_time": "0:06:01", "throughput": 2126.09, "total_tokens": 664368}
{"current_steps": 1470, "total_steps": 3160, "loss": 0.0027, "lr": 0.01939191337036257, "epoch": 9.30379746835443, "percentage": 46.52, "elapsed_time": "0:05:13", "remaining_time": "0:06:00", "throughput": 2126.59, "total_tokens": 666608}
{"current_steps": 1475, "total_steps": 3160, "loss": 0.0119, "lr": 0.019312629646697572, "epoch": 9.335443037974684, "percentage": 46.68, "elapsed_time": "0:05:14", "remaining_time": "0:05:59", "throughput": 2127.37, "total_tokens": 668976}
{"current_steps": 1480, "total_steps": 3160, "loss": 0.0048, "lr": 0.019233214363709557, "epoch": 9.367088607594937, "percentage": 46.84, "elapsed_time": "0:05:15", "remaining_time": "0:05:58", "throughput": 2128.09, "total_tokens": 671312}
{"current_steps": 1485, "total_steps": 3160, "loss": 0.0121, "lr": 0.0191536699440088, "epoch": 9.39873417721519, "percentage": 46.99, "elapsed_time": "0:05:16", "remaining_time": "0:05:56", "throughput": 2128.81, "total_tokens": 673648}
{"current_steps": 1490, "total_steps": 3160, "loss": 0.0044, "lr": 0.019073998814144958, "epoch": 9.430379746835444, "percentage": 47.15, "elapsed_time": "0:05:17", "remaining_time": "0:05:55", "throughput": 2129.5, "total_tokens": 675952}
{"current_steps": 1495, "total_steps": 3160, "loss": 0.0122, "lr": 0.018994203404533068, "epoch": 9.462025316455696, "percentage": 47.31, "elapsed_time": "0:05:18", "remaining_time": "0:05:54", "throughput": 2129.85, "total_tokens": 678128}
{"current_steps": 1500, "total_steps": 3160, "loss": 0.008, "lr": 0.01891428614937938, "epoch": 9.49367088607595, "percentage": 47.47, "elapsed_time": "0:05:19", "remaining_time": "0:05:53", "throughput": 2130.45, "total_tokens": 680432}
{"current_steps": 1505, "total_steps": 3160, "loss": 0.0093, "lr": 0.01883424948660712, "epoch": 9.525316455696203, "percentage": 47.63, "elapsed_time": "0:05:20", "remaining_time": "0:05:52", "throughput": 2130.82, "total_tokens": 682608}
{"current_steps": 1510, "total_steps": 3160, "loss": 0.0179, "lr": 0.018754095857782118, "epoch": 9.556962025316455, "percentage": 47.78, "elapsed_time": "0:05:21", "remaining_time": "0:05:51", "throughput": 2131.45, "total_tokens": 684912}
{"current_steps": 1515, "total_steps": 3160, "loss": 0.0253, "lr": 0.01867382770803832, "epoch": 9.58860759493671, "percentage": 47.94, "elapsed_time": "0:05:22", "remaining_time": "0:05:49", "throughput": 2132.08, "total_tokens": 687216}
{"current_steps": 1520, "total_steps": 3160, "loss": 0.0127, "lr": 0.018593447486003202, "epoch": 9.620253164556962, "percentage": 48.1, "elapsed_time": "0:05:23", "remaining_time": "0:05:48", "throughput": 2132.63, "total_tokens": 689488}
{"current_steps": 1525, "total_steps": 3160, "loss": 0.0011, "lr": 0.018512957643723064, "epoch": 9.651898734177216, "percentage": 48.26, "elapsed_time": "0:05:24", "remaining_time": "0:05:47", "throughput": 2133.24, "total_tokens": 691760}
{"current_steps": 1530, "total_steps": 3160, "loss": 0.0106, "lr": 0.01843236063658825, "epoch": 9.683544303797468, "percentage": 48.42, "elapsed_time": "0:05:25", "remaining_time": "0:05:46", "throughput": 2133.7, "total_tokens": 694000}
{"current_steps": 1535, "total_steps": 3160, "loss": 0.0134, "lr": 0.018351658923258213, "epoch": 9.715189873417721, "percentage": 48.58, "elapsed_time": "0:05:26", "remaining_time": "0:05:45", "throughput": 2134.51, "total_tokens": 696400}
{"current_steps": 1540, "total_steps": 3160, "loss": 0.0175, "lr": 0.018270854965586555, "epoch": 9.746835443037975, "percentage": 48.73, "elapsed_time": "0:05:27", "remaining_time": "0:05:44", "throughput": 2134.96, "total_tokens": 698640}
{"current_steps": 1545, "total_steps": 3160, "loss": 0.0058, "lr": 0.018189951228545883, "epoch": 9.778481012658228, "percentage": 48.89, "elapsed_time": "0:05:28", "remaining_time": "0:05:43", "throughput": 2135.39, "total_tokens": 700848}
{"current_steps": 1550, "total_steps": 3160, "loss": 0.0138, "lr": 0.018108950180152635, "epoch": 9.810126582278482, "percentage": 49.05, "elapsed_time": "0:05:29", "remaining_time": "0:05:41", "throughput": 2136.2, "total_tokens": 703248}
{"current_steps": 1555, "total_steps": 3160, "loss": 0.0099, "lr": 0.018027854291391796, "epoch": 9.841772151898734, "percentage": 49.21, "elapsed_time": "0:05:30", "remaining_time": "0:05:40", "throughput": 2136.7, "total_tokens": 705488}
{"current_steps": 1560, "total_steps": 3160, "loss": 0.0111, "lr": 0.017946666036141513, "epoch": 9.873417721518987, "percentage": 49.37, "elapsed_time": "0:05:31", "remaining_time": "0:05:39", "throughput": 2137.2, "total_tokens": 707728}
{"current_steps": 1565, "total_steps": 3160, "loss": 0.002, "lr": 0.017865387891097616, "epoch": 9.905063291139241, "percentage": 49.53, "elapsed_time": "0:05:32", "remaining_time": "0:05:38", "throughput": 2137.53, "total_tokens": 709904}
{"current_steps": 1570, "total_steps": 3160, "loss": 0.0029, "lr": 0.017784022335698094, "epoch": 9.936708860759493, "percentage": 49.68, "elapsed_time": "0:05:33", "remaining_time": "0:05:37", "throughput": 2138.12, "total_tokens": 712208}
{"current_steps": 1575, "total_steps": 3160, "loss": 0.0091, "lr": 0.01770257185204742, "epoch": 9.968354430379748, "percentage": 49.84, "elapsed_time": "0:05:34", "remaining_time": "0:05:36", "throughput": 2138.6, "total_tokens": 714448}
{"current_steps": 1580, "total_steps": 3160, "loss": 0.013, "lr": 0.017621038924840873, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:05:35", "remaining_time": "0:05:35", "throughput": 2138.58, "total_tokens": 716448}
{"current_steps": 1580, "total_steps": 3160, "eval_loss": 0.06166619062423706, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:05:36", "remaining_time": "0:05:36", "throughput": 2127.97, "total_tokens": 716448}
{"current_steps": 1585, "total_steps": 3160, "loss": 0.0014, "lr": 0.017539426041288716, "epoch": 10.031645569620252, "percentage": 50.16, "elapsed_time": "0:05:38", "remaining_time": "0:05:36", "throughput": 2121.7, "total_tokens": 718880}
{"current_steps": 1590, "total_steps": 3160, "loss": 0.0051, "lr": 0.017457735691040317, "epoch": 10.063291139240507, "percentage": 50.32, "elapsed_time": "0:05:39", "remaining_time": "0:05:35", "throughput": 2122.46, "total_tokens": 721248}
{"current_steps": 1595, "total_steps": 3160, "loss": 0.0169, "lr": 0.017375970366108225, "epoch": 10.094936708860759, "percentage": 50.47, "elapsed_time": "0:05:40", "remaining_time": "0:05:34", "throughput": 2122.97, "total_tokens": 723520}
{"current_steps": 1600, "total_steps": 3160, "loss": 0.0039, "lr": 0.017294132560792125, "epoch": 10.126582278481013, "percentage": 50.63, "elapsed_time": "0:05:41", "remaining_time": "0:05:33", "throughput": 2123.32, "total_tokens": 725696}
{"current_steps": 1605, "total_steps": 3160, "loss": 0.0018, "lr": 0.017212224771602776, "epoch": 10.158227848101266, "percentage": 50.79, "elapsed_time": "0:05:42", "remaining_time": "0:05:32", "throughput": 2123.9, "total_tokens": 727968}
{"current_steps": 1610, "total_steps": 3160, "loss": 0.0087, "lr": 0.01713024949718581, "epoch": 10.189873417721518, "percentage": 50.95, "elapsed_time": "0:05:43", "remaining_time": "0:05:30", "throughput": 2124.57, "total_tokens": 730304}
{"current_steps": 1615, "total_steps": 3160, "loss": 0.002, "lr": 0.01704820923824556, "epoch": 10.221518987341772, "percentage": 51.11, "elapsed_time": "0:05:44", "remaining_time": "0:05:29", "throughput": 2125.27, "total_tokens": 732672}
{"current_steps": 1620, "total_steps": 3160, "loss": 0.0026, "lr": 0.01696610649746875, "epoch": 10.253164556962025, "percentage": 51.27, "elapsed_time": "0:05:45", "remaining_time": "0:05:28", "throughput": 2125.68, "total_tokens": 734880}
{"current_steps": 1625, "total_steps": 3160, "loss": 0.0036, "lr": 0.016883943779448123, "epoch": 10.284810126582279, "percentage": 51.42, "elapsed_time": "0:05:46", "remaining_time": "0:05:27", "throughput": 2126.18, "total_tokens": 737120}
{"current_steps": 1630, "total_steps": 3160, "loss": 0.0004, "lr": 0.016801723590606086, "epoch": 10.316455696202532, "percentage": 51.58, "elapsed_time": "0:05:47", "remaining_time": "0:05:26", "throughput": 2126.66, "total_tokens": 739392}
{"current_steps": 1635, "total_steps": 3160, "loss": 0.0019, "lr": 0.016719448439118236, "epoch": 10.348101265822784, "percentage": 51.74, "elapsed_time": "0:05:48", "remaining_time": "0:05:25", "throughput": 2127.15, "total_tokens": 741632}
{"current_steps": 1640, "total_steps": 3160, "loss": 0.0012, "lr": 0.016637120834836816, "epoch": 10.379746835443038, "percentage": 51.9, "elapsed_time": "0:05:49", "remaining_time": "0:05:24", "throughput": 2127.9, "total_tokens": 744000}
{"current_steps": 1645, "total_steps": 3160, "loss": 0.0019, "lr": 0.016554743289214174, "epoch": 10.41139240506329, "percentage": 52.06, "elapsed_time": "0:05:50", "remaining_time": "0:05:22", "throughput": 2128.46, "total_tokens": 746272}
{"current_steps": 1650, "total_steps": 3160, "loss": 0.0011, "lr": 0.016472318315226164, "epoch": 10.443037974683545, "percentage": 52.22, "elapsed_time": "0:05:51", "remaining_time": "0:05:21", "throughput": 2129.04, "total_tokens": 748576}
{"current_steps": 1655, "total_steps": 3160, "loss": 0.0022, "lr": 0.016389848427295465, "epoch": 10.474683544303797, "percentage": 52.37, "elapsed_time": "0:05:52", "remaining_time": "0:05:20", "throughput": 2129.82, "total_tokens": 750944}
{"current_steps": 1660, "total_steps": 3160, "loss": 0.0007, "lr": 0.016307336141214875, "epoch": 10.50632911392405, "percentage": 52.53, "elapsed_time": "0:05:53", "remaining_time": "0:05:19", "throughput": 2129.86, "total_tokens": 753184}
{"current_steps": 1665, "total_steps": 3160, "loss": 0.0005, "lr": 0.016224783974070574, "epoch": 10.537974683544304, "percentage": 52.69, "elapsed_time": "0:05:54", "remaining_time": "0:05:18", "throughput": 2130.19, "total_tokens": 755360}
{"current_steps": 1670, "total_steps": 3160, "loss": 0.001, "lr": 0.016142194444165342, "epoch": 10.569620253164556, "percentage": 52.85, "elapsed_time": "0:05:55", "remaining_time": "0:05:17", "throughput": 2130.5, "total_tokens": 757536}
{"current_steps": 1675, "total_steps": 3160, "loss": 0.0046, "lr": 0.01605957007094174, "epoch": 10.60126582278481, "percentage": 53.01, "elapsed_time": "0:05:56", "remaining_time": "0:05:16", "throughput": 2131.05, "total_tokens": 759808}
{"current_steps": 1680, "total_steps": 3160, "loss": 0.0007, "lr": 0.015976913374905227, "epoch": 10.632911392405063, "percentage": 53.16, "elapsed_time": "0:05:57", "remaining_time": "0:05:14", "throughput": 2131.82, "total_tokens": 762208}
{"current_steps": 1685, "total_steps": 3160, "loss": 0.001, "lr": 0.015894226877547296, "epoch": 10.664556962025316, "percentage": 53.32, "elapsed_time": "0:05:58", "remaining_time": "0:05:13", "throughput": 2132.31, "total_tokens": 764480}
{"current_steps": 1690, "total_steps": 3160, "loss": 0.0004, "lr": 0.015811513101268555, "epoch": 10.69620253164557, "percentage": 53.48, "elapsed_time": "0:05:59", "remaining_time": "0:05:12", "throughput": 2132.76, "total_tokens": 766720}
{"current_steps": 1695, "total_steps": 3160, "loss": 0.0018, "lr": 0.015728774569301763, "epoch": 10.727848101265822, "percentage": 53.64, "elapsed_time": "0:06:00", "remaining_time": "0:05:11", "throughput": 2133.06, "total_tokens": 768896}
{"current_steps": 1700, "total_steps": 3160, "loss": 0.002, "lr": 0.015646013805634868, "epoch": 10.759493670886076, "percentage": 53.8, "elapsed_time": "0:06:01", "remaining_time": "0:05:10", "throughput": 2133.62, "total_tokens": 771200}
{"current_steps": 1705, "total_steps": 3160, "loss": 0.0029, "lr": 0.015563233334934002, "epoch": 10.791139240506329, "percentage": 53.96, "elapsed_time": "0:06:02", "remaining_time": "0:05:09", "throughput": 2134.01, "total_tokens": 773440}
{"current_steps": 1710, "total_steps": 3160, "loss": 0.002, "lr": 0.01548043568246649, "epoch": 10.822784810126583, "percentage": 54.11, "elapsed_time": "0:06:03", "remaining_time": "0:05:08", "throughput": 2134.24, "total_tokens": 775584}
{"current_steps": 1715, "total_steps": 3160, "loss": 0.0053, "lr": 0.01539762337402378, "epoch": 10.854430379746836, "percentage": 54.27, "elapsed_time": "0:06:04", "remaining_time": "0:05:07", "throughput": 2134.68, "total_tokens": 777824}
{"current_steps": 1720, "total_steps": 3160, "loss": 0.0175, "lr": 0.015314798935844417, "epoch": 10.886075949367088, "percentage": 54.43, "elapsed_time": "0:06:05", "remaining_time": "0:05:05", "throughput": 2135.34, "total_tokens": 780160}
{"current_steps": 1725, "total_steps": 3160, "loss": 0.0025, "lr": 0.015231964894536964, "epoch": 10.917721518987342, "percentage": 54.59, "elapsed_time": "0:06:06", "remaining_time": "0:05:04", "throughput": 2136.03, "total_tokens": 782528}
{"current_steps": 1730, "total_steps": 3160, "loss": 0.0008, "lr": 0.015149123777002947, "epoch": 10.949367088607595, "percentage": 54.75, "elapsed_time": "0:06:07", "remaining_time": "0:05:03", "throughput": 2136.56, "total_tokens": 784832}
{"current_steps": 1735, "total_steps": 3160, "loss": 0.0002, "lr": 0.015066278110359738, "epoch": 10.981012658227849, "percentage": 54.91, "elapsed_time": "0:06:08", "remaining_time": "0:05:02", "throughput": 2137.08, "total_tokens": 787136}
{"current_steps": 1738, "total_steps": 3160, "eval_loss": 0.08956073224544525, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:06:10", "remaining_time": "0:05:03", "throughput": 2127.02, "total_tokens": 788256}
{"current_steps": 1740, "total_steps": 3160, "loss": 0.0011, "lr": 0.014983430421863501, "epoch": 11.012658227848101, "percentage": 55.06, "elapsed_time": "0:06:11", "remaining_time": "0:05:03", "throughput": 2122.24, "total_tokens": 789184}
{"current_steps": 1745, "total_steps": 3160, "loss": 0.0008, "lr": 0.014900583238832062, "epoch": 11.044303797468354, "percentage": 55.22, "elapsed_time": "0:06:12", "remaining_time": "0:05:02", "throughput": 2122.5, "total_tokens": 791328}
{"current_steps": 1750, "total_steps": 3160, "loss": 0.0026, "lr": 0.014817739088567832, "epoch": 11.075949367088608, "percentage": 55.38, "elapsed_time": "0:06:13", "remaining_time": "0:05:01", "throughput": 2122.88, "total_tokens": 793536}
{"current_steps": 1755, "total_steps": 3160, "loss": 0.0001, "lr": 0.014734900498280717, "epoch": 11.10759493670886, "percentage": 55.54, "elapsed_time": "0:06:14", "remaining_time": "0:05:00", "throughput": 2123.26, "total_tokens": 795744}
{"current_steps": 1760, "total_steps": 3160, "loss": 0.0002, "lr": 0.014652069995011003, "epoch": 11.139240506329115, "percentage": 55.7, "elapsed_time": "0:06:15", "remaining_time": "0:04:58", "throughput": 2123.72, "total_tokens": 797984}
{"current_steps": 1765, "total_steps": 3160, "loss": 0.0006, "lr": 0.014569250105552262, "epoch": 11.170886075949367, "percentage": 55.85, "elapsed_time": "0:06:16", "remaining_time": "0:04:57", "throughput": 2124.4, "total_tokens": 800352}
{"current_steps": 1770, "total_steps": 3160, "loss": 0.0008, "lr": 0.014486443356374317, "epoch": 11.20253164556962, "percentage": 56.01, "elapsed_time": "0:06:17", "remaining_time": "0:04:56", "throughput": 2124.83, "total_tokens": 802592}
{"current_steps": 1775, "total_steps": 3160, "loss": 0.0013, "lr": 0.014403652273546117, "epoch": 11.234177215189874, "percentage": 56.17, "elapsed_time": "0:06:18", "remaining_time": "0:04:55", "throughput": 2125.42, "total_tokens": 804896}
{"current_steps": 1780, "total_steps": 3160, "loss": 0.0004, "lr": 0.014320879382658702, "epoch": 11.265822784810126, "percentage": 56.33, "elapsed_time": "0:06:19", "remaining_time": "0:04:54", "throughput": 2125.89, "total_tokens": 807168}
{"current_steps": 1785, "total_steps": 3160, "loss": 0.0001, "lr": 0.014238127208748164, "epoch": 11.29746835443038, "percentage": 56.49, "elapsed_time": "0:06:20", "remaining_time": "0:04:53", "throughput": 2126.34, "total_tokens": 809408}
{"current_steps": 1790, "total_steps": 3160, "loss": 0.0007, "lr": 0.014155398276218605, "epoch": 11.329113924050633, "percentage": 56.65, "elapsed_time": "0:06:21", "remaining_time": "0:04:52", "throughput": 2126.79, "total_tokens": 811680}
{"current_steps": 1795, "total_steps": 3160, "loss": 0.0003, "lr": 0.014072695108765128, "epoch": 11.360759493670885, "percentage": 56.8, "elapsed_time": "0:06:22", "remaining_time": "0:04:50", "throughput": 2127.02, "total_tokens": 813824}
{"current_steps": 1800, "total_steps": 3160, "loss": 0.0028, "lr": 0.013990020229296886, "epoch": 11.39240506329114, "percentage": 56.96, "elapsed_time": "0:06:23", "remaining_time": "0:04:49", "throughput": 2127.53, "total_tokens": 816096}
{"current_steps": 1805, "total_steps": 3160, "loss": 0.003, "lr": 0.013907376159860046, "epoch": 11.424050632911392, "percentage": 57.12, "elapsed_time": "0:06:24", "remaining_time": "0:04:48", "throughput": 2128.21, "total_tokens": 818496}
{"current_steps": 1810, "total_steps": 3160, "loss": 0.0005, "lr": 0.013824765421560938, "epoch": 11.455696202531646, "percentage": 57.28, "elapsed_time": "0:06:25", "remaining_time": "0:04:47", "throughput": 2128.58, "total_tokens": 820704}
{"current_steps": 1815, "total_steps": 3160, "loss": 0.0008, "lr": 0.013742190534489085, "epoch": 11.487341772151899, "percentage": 57.44, "elapsed_time": "0:06:26", "remaining_time": "0:04:46", "throughput": 2129.16, "total_tokens": 823040}
{"current_steps": 1820, "total_steps": 3160, "loss": 0.0001, "lr": 0.013659654017640343, "epoch": 11.518987341772151, "percentage": 57.59, "elapsed_time": "0:06:27", "remaining_time": "0:04:45", "throughput": 2129.52, "total_tokens": 825248}
{"current_steps": 1825, "total_steps": 3160, "loss": 0.0007, "lr": 0.013577158388840075, "epoch": 11.550632911392405, "percentage": 57.75, "elapsed_time": "0:06:28", "remaining_time": "0:04:44", "throughput": 2130.03, "total_tokens": 827552}
{"current_steps": 1830, "total_steps": 3160, "loss": 0.0013, "lr": 0.013494706164666324, "epoch": 11.582278481012658, "percentage": 57.91, "elapsed_time": "0:06:29", "remaining_time": "0:04:43", "throughput": 2130.5, "total_tokens": 829856}
{"current_steps": 1835, "total_steps": 3160, "loss": 0.0006, "lr": 0.013412299860373046, "epoch": 11.613924050632912, "percentage": 58.07, "elapsed_time": "0:06:30", "remaining_time": "0:04:41", "throughput": 2131.01, "total_tokens": 832192}
{"current_steps": 1840, "total_steps": 3160, "loss": 0.0002, "lr": 0.013329941989813392, "epoch": 11.645569620253164, "percentage": 58.23, "elapsed_time": "0:06:31", "remaining_time": "0:04:40", "throughput": 2131.3, "total_tokens": 834368}
{"current_steps": 1845, "total_steps": 3160, "loss": 0.0007, "lr": 0.013247635065363007, "epoch": 11.677215189873417, "percentage": 58.39, "elapsed_time": "0:06:32", "remaining_time": "0:04:39", "throughput": 2131.8, "total_tokens": 836672}
{"current_steps": 1850, "total_steps": 3160, "loss": 0.0004, "lr": 0.013165381597843384, "epoch": 11.708860759493671, "percentage": 58.54, "elapsed_time": "0:06:33", "remaining_time": "0:04:38", "throughput": 2132.21, "total_tokens": 838912}
{"current_steps": 1855, "total_steps": 3160, "loss": 0.0003, "lr": 0.013083184096445313, "epoch": 11.740506329113924, "percentage": 58.7, "elapsed_time": "0:06:34", "remaining_time": "0:04:37", "throughput": 2132.72, "total_tokens": 841216}
{"current_steps": 1860, "total_steps": 3160, "loss": 0.0009, "lr": 0.013001045068652269, "epoch": 11.772151898734178, "percentage": 58.86, "elapsed_time": "0:06:35", "remaining_time": "0:04:36", "throughput": 2133.22, "total_tokens": 843520}
{"current_steps": 1865, "total_steps": 3160, "loss": 0.0003, "lr": 0.012918967020163976, "epoch": 11.80379746835443, "percentage": 59.02, "elapsed_time": "0:06:36", "remaining_time": "0:04:35", "throughput": 2133.75, "total_tokens": 845824}
{"current_steps": 1870, "total_steps": 3160, "loss": 0.0006, "lr": 0.012836952454819943, "epoch": 11.835443037974684, "percentage": 59.18, "elapsed_time": "0:06:37", "remaining_time": "0:04:34", "throughput": 2134.31, "total_tokens": 848160}
{"current_steps": 1875, "total_steps": 3160, "loss": 0.0003, "lr": 0.012755003874523082, "epoch": 11.867088607594937, "percentage": 59.34, "elapsed_time": "0:06:38", "remaining_time": "0:04:33", "throughput": 2134.71, "total_tokens": 850400}
{"current_steps": 1880, "total_steps": 3160, "loss": 0.0009, "lr": 0.012673123779163402, "epoch": 11.89873417721519, "percentage": 59.49, "elapsed_time": "0:06:39", "remaining_time": "0:04:31", "throughput": 2135.29, "total_tokens": 852736}
{"current_steps": 1885, "total_steps": 3160, "loss": 0.0001, "lr": 0.01259131466654173, "epoch": 11.930379746835444, "percentage": 59.65, "elapsed_time": "0:06:40", "remaining_time": "0:04:30", "throughput": 2135.83, "total_tokens": 855072}
{"current_steps": 1890, "total_steps": 3160, "loss": 0.0005, "lr": 0.012509579032293525, "epoch": 11.962025316455696, "percentage": 59.81, "elapsed_time": "0:06:41", "remaining_time": "0:04:29", "throughput": 2136.21, "total_tokens": 857312}
{"current_steps": 1895, "total_steps": 3160, "loss": 0.0001, "lr": 0.012427919369812754, "epoch": 11.99367088607595, "percentage": 59.97, "elapsed_time": "0:06:42", "remaining_time": "0:04:28", "throughput": 2136.67, "total_tokens": 859584}
{"current_steps": 1896, "total_steps": 3160, "eval_loss": 0.09028957784175873, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:06:44", "remaining_time": "0:04:29", "throughput": 2127.54, "total_tokens": 859808}
{"current_steps": 1900, "total_steps": 3160, "loss": 0.0001, "lr": 0.012346338170175808, "epoch": 12.025316455696203, "percentage": 60.13, "elapsed_time": "0:06:45", "remaining_time": "0:04:29", "throughput": 2123.17, "total_tokens": 861632}
{"current_steps": 1905, "total_steps": 3160, "loss": 0.0002, "lr": 0.012264837922065518, "epoch": 12.056962025316455, "percentage": 60.28, "elapsed_time": "0:06:46", "remaining_time": "0:04:27", "throughput": 2123.43, "total_tokens": 863808}
{"current_steps": 1910, "total_steps": 3160, "loss": 0.0004, "lr": 0.012183421111695262, "epoch": 12.08860759493671, "percentage": 60.44, "elapsed_time": "0:06:47", "remaining_time": "0:04:26", "throughput": 2123.79, "total_tokens": 866048}
{"current_steps": 1915, "total_steps": 3160, "loss": 0.0006, "lr": 0.012102090222733081, "epoch": 12.120253164556962, "percentage": 60.6, "elapsed_time": "0:06:48", "remaining_time": "0:04:25", "throughput": 2124.25, "total_tokens": 868352}
{"current_steps": 1920, "total_steps": 3160, "loss": 0.0005, "lr": 0.012020847736225939, "epoch": 12.151898734177216, "percentage": 60.76, "elapsed_time": "0:06:49", "remaining_time": "0:04:24", "throughput": 2124.59, "total_tokens": 870560}
{"current_steps": 1925, "total_steps": 3160, "loss": 0.0005, "lr": 0.011939696130524032, "epoch": 12.183544303797468, "percentage": 60.92, "elapsed_time": "0:06:50", "remaining_time": "0:04:23", "throughput": 2124.93, "total_tokens": 872768}
{"current_steps": 1930, "total_steps": 3160, "loss": 0.0, "lr": 0.011858637881205177, "epoch": 12.215189873417721, "percentage": 61.08, "elapsed_time": "0:06:51", "remaining_time": "0:04:22", "throughput": 2125.28, "total_tokens": 874976}
{"current_steps": 1935, "total_steps": 3160, "loss": 0.0003, "lr": 0.011777675460999311, "epoch": 12.246835443037975, "percentage": 61.23, "elapsed_time": "0:06:52", "remaining_time": "0:04:21", "throughput": 2125.9, "total_tokens": 877344}
{"current_steps": 1940, "total_steps": 3160, "loss": 0.0005, "lr": 0.01169681133971304, "epoch": 12.278481012658228, "percentage": 61.39, "elapsed_time": "0:06:53", "remaining_time": "0:04:20", "throughput": 2126.31, "total_tokens": 879584}
{"current_steps": 1945, "total_steps": 3160, "loss": 0.0002, "lr": 0.011616047984154299, "epoch": 12.310126582278482, "percentage": 61.55, "elapsed_time": "0:06:54", "remaining_time": "0:04:19", "throughput": 2126.59, "total_tokens": 881760}
{"current_steps": 1950, "total_steps": 3160, "loss": 0.0004, "lr": 0.011535387858057114, "epoch": 12.341772151898734, "percentage": 61.71, "elapsed_time": "0:06:55", "remaining_time": "0:04:17", "throughput": 2126.93, "total_tokens": 883968}
{"current_steps": 1955, "total_steps": 3160, "loss": 0.0005, "lr": 0.011454833422006427, "epoch": 12.373417721518987, "percentage": 61.87, "elapsed_time": "0:06:56", "remaining_time": "0:04:16", "throughput": 2127.2, "total_tokens": 886144}
{"current_steps": 1960, "total_steps": 3160, "loss": 0.0004, "lr": 0.011374387133363046, "epoch": 12.405063291139241, "percentage": 62.03, "elapsed_time": "0:06:57", "remaining_time": "0:04:15", "throughput": 2127.71, "total_tokens": 888448}
{"current_steps": 1965, "total_steps": 3160, "loss": 0.0, "lr": 0.01129405144618868, "epoch": 12.436708860759493, "percentage": 62.18, "elapsed_time": "0:06:58", "remaining_time": "0:04:14", "throughput": 2128.16, "total_tokens": 890720}
{"current_steps": 1970, "total_steps": 3160, "loss": 0.0012, "lr": 0.01121382881117107, "epoch": 12.468354430379748, "percentage": 62.34, "elapsed_time": "0:06:59", "remaining_time": "0:04:13", "throughput": 2128.62, "total_tokens": 892992}
{"current_steps": 1975, "total_steps": 3160, "loss": 0.0009, "lr": 0.011133721675549232, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:07:00", "remaining_time": "0:04:12", "throughput": 2129.2, "total_tokens": 895360}
{"current_steps": 1980, "total_steps": 3160, "loss": 0.0003, "lr": 0.011053732483038824, "epoch": 12.531645569620252, "percentage": 62.66, "elapsed_time": "0:07:01", "remaining_time": "0:04:11", "throughput": 2129.69, "total_tokens": 897696}
{"current_steps": 1985, "total_steps": 3160, "loss": 0.0003, "lr": 0.010973863673757548, "epoch": 12.563291139240507, "percentage": 62.82, "elapsed_time": "0:07:02", "remaining_time": "0:04:10", "throughput": 2130.23, "total_tokens": 900032}
{"current_steps": 1990, "total_steps": 3160, "loss": 0.0001, "lr": 0.010894117684150773, "epoch": 12.594936708860759, "percentage": 62.97, "elapsed_time": "0:07:03", "remaining_time": "0:04:08", "throughput": 2130.74, "total_tokens": 902336}
{"current_steps": 1995, "total_steps": 3160, "loss": 0.0002, "lr": 0.010814496946917168, "epoch": 12.626582278481013, "percentage": 63.13, "elapsed_time": "0:07:04", "remaining_time": "0:04:07", "throughput": 2131.0, "total_tokens": 904512}
{"current_steps": 2000, "total_steps": 3160, "loss": 0.0005, "lr": 0.010735003890934494, "epoch": 12.658227848101266, "percentage": 63.29, "elapsed_time": "0:07:05", "remaining_time": "0:04:06", "throughput": 2131.67, "total_tokens": 906944}
{"current_steps": 2005, "total_steps": 3160, "loss": 0.0001, "lr": 0.010655640941185544, "epoch": 12.689873417721518, "percentage": 63.45, "elapsed_time": "0:07:06", "remaining_time": "0:04:05", "throughput": 2132.24, "total_tokens": 909280}
{"current_steps": 2010, "total_steps": 3160, "loss": 0.0005, "lr": 0.010576410518684127, "epoch": 12.721518987341772, "percentage": 63.61, "elapsed_time": "0:07:07", "remaining_time": "0:04:04", "throughput": 2132.43, "total_tokens": 911424}
{"current_steps": 2015, "total_steps": 3160, "loss": 0.0006, "lr": 0.01049731504040122, "epoch": 12.753164556962025, "percentage": 63.77, "elapsed_time": "0:07:08", "remaining_time": "0:04:03", "throughput": 2132.99, "total_tokens": 913760}
{"current_steps": 2020, "total_steps": 3160, "loss": 0.0012, "lr": 0.010418356919191284, "epoch": 12.784810126582279, "percentage": 63.92, "elapsed_time": "0:07:09", "remaining_time": "0:04:02", "throughput": 2133.51, "total_tokens": 916096}
{"current_steps": 2025, "total_steps": 3160, "loss": 0.0002, "lr": 0.010339538563718576, "epoch": 12.816455696202532, "percentage": 64.08, "elapsed_time": "0:07:10", "remaining_time": "0:04:01", "throughput": 2133.88, "total_tokens": 918336}
{"current_steps": 2030, "total_steps": 3160, "loss": 0.0008, "lr": 0.010260862378383738, "epoch": 12.848101265822784, "percentage": 64.24, "elapsed_time": "0:07:11", "remaining_time": "0:04:00", "throughput": 2134.39, "total_tokens": 920672}
{"current_steps": 2035, "total_steps": 3160, "loss": 0.0003, "lr": 0.01018233076325044, "epoch": 12.879746835443038, "percentage": 64.4, "elapsed_time": "0:07:12", "remaining_time": "0:03:59", "throughput": 2135.03, "total_tokens": 923072}
{"current_steps": 2040, "total_steps": 3160, "loss": 0.0006, "lr": 0.01010394611397213, "epoch": 12.91139240506329, "percentage": 64.56, "elapsed_time": "0:07:13", "remaining_time": "0:03:57", "throughput": 2135.55, "total_tokens": 925440}
{"current_steps": 2045, "total_steps": 3160, "loss": 0.0002, "lr": 0.010025710821718983, "epoch": 12.943037974683545, "percentage": 64.72, "elapsed_time": "0:07:14", "remaining_time": "0:03:56", "throughput": 2136.03, "total_tokens": 927744}
{"current_steps": 2050, "total_steps": 3160, "loss": 0.0005, "lr": 0.009947627273104958, "epoch": 12.974683544303797, "percentage": 64.87, "elapsed_time": "0:07:15", "remaining_time": "0:03:55", "throughput": 2136.33, "total_tokens": 929952}
{"current_steps": 2054, "total_steps": 3160, "eval_loss": 0.09263655543327332, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:07:17", "remaining_time": "0:03:55", "throughput": 2128.08, "total_tokens": 931472}
{"current_steps": 2055, "total_steps": 3160, "loss": 0.0004, "lr": 0.00986969785011497, "epoch": 13.00632911392405, "percentage": 65.03, "elapsed_time": "0:07:18", "remaining_time": "0:03:55", "throughput": 2123.9, "total_tokens": 931952}
{"current_steps": 2060, "total_steps": 3160, "loss": 0.0007, "lr": 0.009791924930032251, "epoch": 13.037974683544304, "percentage": 65.19, "elapsed_time": "0:07:19", "remaining_time": "0:03:54", "throughput": 2124.43, "total_tokens": 934288}
{"current_steps": 2065, "total_steps": 3160, "loss": 0.0002, "lr": 0.00971431088536582, "epoch": 13.069620253164556, "percentage": 65.35, "elapsed_time": "0:07:20", "remaining_time": "0:03:53", "throughput": 2124.95, "total_tokens": 936624}
{"current_steps": 2070, "total_steps": 3160, "loss": 0.0004, "lr": 0.009636858083778092, "epoch": 13.10126582278481, "percentage": 65.51, "elapsed_time": "0:07:21", "remaining_time": "0:03:52", "throughput": 2125.44, "total_tokens": 938928}
{"current_steps": 2075, "total_steps": 3160, "loss": 0.0002, "lr": 0.00955956888801269, "epoch": 13.132911392405063, "percentage": 65.66, "elapsed_time": "0:07:22", "remaining_time": "0:03:51", "throughput": 2125.89, "total_tokens": 941232}
{"current_steps": 2080, "total_steps": 3160, "loss": 0.0002, "lr": 0.009482445655822326, "epoch": 13.164556962025316, "percentage": 65.82, "elapsed_time": "0:07:23", "remaining_time": "0:03:50", "throughput": 2126.19, "total_tokens": 943440}
{"current_steps": 2085, "total_steps": 3160, "loss": 0.0001, "lr": 0.009405490739896898, "epoch": 13.19620253164557, "percentage": 65.98, "elapsed_time": "0:07:24", "remaining_time": "0:03:49", "throughput": 2126.5, "total_tokens": 945648}
{"current_steps": 2090, "total_steps": 3160, "loss": 0.0, "lr": 0.009328706487791726, "epoch": 13.227848101265822, "percentage": 66.14, "elapsed_time": "0:07:25", "remaining_time": "0:03:48", "throughput": 2126.94, "total_tokens": 947920}
{"current_steps": 2095, "total_steps": 3160, "loss": 0.0001, "lr": 0.009252095241855923, "epoch": 13.259493670886076, "percentage": 66.3, "elapsed_time": "0:07:26", "remaining_time": "0:03:47", "throughput": 2127.47, "total_tokens": 950288}
{"current_steps": 2100, "total_steps": 3160, "loss": 0.0002, "lr": 0.009175659339160935, "epoch": 13.291139240506329, "percentage": 66.46, "elapsed_time": "0:07:27", "remaining_time": "0:03:45", "throughput": 2127.9, "total_tokens": 952560}
{"current_steps": 2105, "total_steps": 3160, "loss": 0.0004, "lr": 0.009099401111429277, "epoch": 13.322784810126583, "percentage": 66.61, "elapsed_time": "0:07:28", "remaining_time": "0:03:44", "throughput": 2128.35, "total_tokens": 954864}
{"current_steps": 2110, "total_steps": 3160, "loss": 0.0007, "lr": 0.009023322884963372, "epoch": 13.354430379746836, "percentage": 66.77, "elapsed_time": "0:07:29", "remaining_time": "0:03:43", "throughput": 2128.84, "total_tokens": 957168}
{"current_steps": 2115, "total_steps": 3160, "loss": 0.0002, "lr": 0.008947426980574607, "epoch": 13.386075949367088, "percentage": 66.93, "elapsed_time": "0:07:30", "remaining_time": "0:03:42", "throughput": 2129.2, "total_tokens": 959408}
{"current_steps": 2120, "total_steps": 3160, "loss": 0.0002, "lr": 0.008871715713512522, "epoch": 13.417721518987342, "percentage": 67.09, "elapsed_time": "0:07:31", "remaining_time": "0:03:41", "throughput": 2129.53, "total_tokens": 961648}
{"current_steps": 2125, "total_steps": 3160, "loss": 0.0003, "lr": 0.008796191393394177, "epoch": 13.449367088607595, "percentage": 67.25, "elapsed_time": "0:07:32", "remaining_time": "0:03:40", "throughput": 2130.07, "total_tokens": 964016}
{"current_steps": 2130, "total_steps": 3160, "loss": 0.0003, "lr": 0.00872085632413372, "epoch": 13.481012658227849, "percentage": 67.41, "elapsed_time": "0:07:33", "remaining_time": "0:03:39", "throughput": 2130.39, "total_tokens": 966256}
{"current_steps": 2135, "total_steps": 3160, "loss": 0.0005, "lr": 0.008645712803872083, "epoch": 13.512658227848101, "percentage": 67.56, "elapsed_time": "0:07:34", "remaining_time": "0:03:38", "throughput": 2130.64, "total_tokens": 968432}
{"current_steps": 2140, "total_steps": 3160, "loss": 0.0004, "lr": 0.008570763124906865, "epoch": 13.544303797468354, "percentage": 67.72, "elapsed_time": "0:07:35", "remaining_time": "0:03:37", "throughput": 2131.0, "total_tokens": 970672}
{"current_steps": 2145, "total_steps": 3160, "loss": 0.0012, "lr": 0.00849600957362246, "epoch": 13.575949367088608, "percentage": 67.88, "elapsed_time": "0:07:36", "remaining_time": "0:03:36", "throughput": 2131.35, "total_tokens": 972912}
{"current_steps": 2150, "total_steps": 3160, "loss": 0.0012, "lr": 0.008421454430420234, "epoch": 13.60759493670886, "percentage": 68.04, "elapsed_time": "0:07:37", "remaining_time": "0:03:34", "throughput": 2131.67, "total_tokens": 975152}
{"current_steps": 2155, "total_steps": 3160, "loss": 0.0001, "lr": 0.008347099969649014, "epoch": 13.639240506329115, "percentage": 68.2, "elapsed_time": "0:07:38", "remaining_time": "0:03:33", "throughput": 2132.32, "total_tokens": 977584}
{"current_steps": 2160, "total_steps": 3160, "loss": 0.0001, "lr": 0.008272948459535695, "epoch": 13.670886075949367, "percentage": 68.35, "elapsed_time": "0:07:39", "remaining_time": "0:03:32", "throughput": 2132.69, "total_tokens": 979856}
{"current_steps": 2165, "total_steps": 3160, "loss": 0.0002, "lr": 0.008199002162116022, "epoch": 13.70253164556962, "percentage": 68.51, "elapsed_time": "0:07:40", "remaining_time": "0:03:31", "throughput": 2133.23, "total_tokens": 982256}
{"current_steps": 2170, "total_steps": 3160, "loss": 0.0001, "lr": 0.008125263333165628, "epoch": 13.734177215189874, "percentage": 68.67, "elapsed_time": "0:07:41", "remaining_time": "0:03:30", "throughput": 2133.71, "total_tokens": 984592}
{"current_steps": 2175, "total_steps": 3160, "loss": 0.0001, "lr": 0.008051734222131186, "epoch": 13.765822784810126, "percentage": 68.83, "elapsed_time": "0:07:42", "remaining_time": "0:03:29", "throughput": 2134.08, "total_tokens": 986864}
{"current_steps": 2180, "total_steps": 3160, "loss": 0.0004, "lr": 0.00797841707206179, "epoch": 13.79746835443038, "percentage": 68.99, "elapsed_time": "0:07:43", "remaining_time": "0:03:28", "throughput": 2134.45, "total_tokens": 989136}
{"current_steps": 2185, "total_steps": 3160, "loss": 0.0007, "lr": 0.00790531411954057, "epoch": 13.829113924050633, "percentage": 69.15, "elapsed_time": "0:07:44", "remaining_time": "0:03:27", "throughput": 2134.85, "total_tokens": 991440}
{"current_steps": 2190, "total_steps": 3160, "loss": 0.0003, "lr": 0.007832427594616397, "epoch": 13.860759493670885, "percentage": 69.3, "elapsed_time": "0:07:45", "remaining_time": "0:03:26", "throughput": 2135.25, "total_tokens": 993712}
{"current_steps": 2195, "total_steps": 3160, "loss": 0.0007, "lr": 0.0077597597207359125, "epoch": 13.89240506329114, "percentage": 69.46, "elapsed_time": "0:07:46", "remaining_time": "0:03:25", "throughput": 2135.54, "total_tokens": 995920}
{"current_steps": 2200, "total_steps": 3160, "loss": 0.0004, "lr": 0.007687312714675674, "epoch": 13.924050632911392, "percentage": 69.62, "elapsed_time": "0:07:47", "remaining_time": "0:03:23", "throughput": 2135.99, "total_tokens": 998224}
{"current_steps": 2205, "total_steps": 3160, "loss": 0.0007, "lr": 0.007615088786474526, "epoch": 13.955696202531646, "percentage": 69.78, "elapsed_time": "0:07:48", "remaining_time": "0:03:22", "throughput": 2136.21, "total_tokens": 1000400}
{"current_steps": 2210, "total_steps": 3160, "loss": 0.0003, "lr": 0.0075430901393662, "epoch": 13.987341772151899, "percentage": 69.94, "elapsed_time": "0:07:49", "remaining_time": "0:03:21", "throughput": 2136.55, "total_tokens": 1002640}
{"current_steps": 2212, "total_steps": 3160, "eval_loss": 0.09213147312402725, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:07:51", "remaining_time": "0:03:21", "throughput": 2128.8, "total_tokens": 1003376}
{"current_steps": 2215, "total_steps": 3160, "loss": 0.0002, "lr": 0.007471318969712099, "epoch": 14.018987341772151, "percentage": 70.09, "elapsed_time": "0:07:52", "remaining_time": "0:03:21", "throughput": 2125.0, "total_tokens": 1004752}
{"current_steps": 2220, "total_steps": 3160, "loss": 0.0002, "lr": 0.007399777466934275, "epoch": 14.050632911392405, "percentage": 70.25, "elapsed_time": "0:07:53", "remaining_time": "0:03:20", "throughput": 2125.39, "total_tokens": 1007120}
{"current_steps": 2225, "total_steps": 3160, "loss": 0.0006, "lr": 0.007328467813448668, "epoch": 14.082278481012658, "percentage": 70.41, "elapsed_time": "0:07:54", "remaining_time": "0:03:19", "throughput": 2125.93, "total_tokens": 1009520}
{"current_steps": 2230, "total_steps": 3160, "loss": 0.0003, "lr": 0.007257392184598517, "epoch": 14.113924050632912, "percentage": 70.57, "elapsed_time": "0:07:55", "remaining_time": "0:03:18", "throughput": 2126.34, "total_tokens": 1011792}
{"current_steps": 2235, "total_steps": 3160, "loss": 0.0002, "lr": 0.007186552748587997, "epoch": 14.145569620253164, "percentage": 70.73, "elapsed_time": "0:07:56", "remaining_time": "0:03:17", "throughput": 2126.64, "total_tokens": 1014032}
{"current_steps": 2240, "total_steps": 3160, "loss": 0.0008, "lr": 0.00711595166641609, "epoch": 14.177215189873417, "percentage": 70.89, "elapsed_time": "0:07:57", "remaining_time": "0:03:16", "throughput": 2127.1, "total_tokens": 1016336}
{"current_steps": 2245, "total_steps": 3160, "loss": 0.0005, "lr": 0.007045591091810634, "epoch": 14.208860759493671, "percentage": 71.04, "elapsed_time": "0:07:58", "remaining_time": "0:03:15", "throughput": 2127.57, "total_tokens": 1018672}
{"current_steps": 2250, "total_steps": 3160, "loss": 0.0, "lr": 0.006975473171162659, "epoch": 14.240506329113924, "percentage": 71.2, "elapsed_time": "0:07:59", "remaining_time": "0:03:14", "throughput": 2127.98, "total_tokens": 1020976}
{"current_steps": 2255, "total_steps": 3160, "loss": 0.0005, "lr": 0.006905600043460891, "epoch": 14.272151898734178, "percentage": 71.36, "elapsed_time": "0:08:00", "remaining_time": "0:03:12", "throughput": 2128.14, "total_tokens": 1023120}
{"current_steps": 2260, "total_steps": 3160, "loss": 0.0002, "lr": 0.006835973840226484, "epoch": 14.30379746835443, "percentage": 71.52, "elapsed_time": "0:08:01", "remaining_time": "0:03:11", "throughput": 2128.48, "total_tokens": 1025360}
{"current_steps": 2265, "total_steps": 3160, "loss": 0.0003, "lr": 0.006766596685448035, "epoch": 14.335443037974684, "percentage": 71.68, "elapsed_time": "0:08:02", "remaining_time": "0:03:10", "throughput": 2128.9, "total_tokens": 1027664}
{"current_steps": 2270, "total_steps": 3160, "loss": 0.0002, "lr": 0.006697470695516768, "epoch": 14.367088607594937, "percentage": 71.84, "elapsed_time": "0:08:03", "remaining_time": "0:03:09", "throughput": 2129.28, "total_tokens": 1029936}
{"current_steps": 2275, "total_steps": 3160, "loss": 0.0002, "lr": 0.006628597979161958, "epoch": 14.39873417721519, "percentage": 71.99, "elapsed_time": "0:08:04", "remaining_time": "0:03:08", "throughput": 2129.63, "total_tokens": 1032208}
{"current_steps": 2280, "total_steps": 3160, "loss": 0.0004, "lr": 0.006559980637386639, "epoch": 14.430379746835444, "percentage": 72.15, "elapsed_time": "0:08:05", "remaining_time": "0:03:07", "throughput": 2129.92, "total_tokens": 1034416}
{"current_steps": 2285, "total_steps": 3160, "loss": 0.0005, "lr": 0.00649162076340348, "epoch": 14.462025316455696, "percentage": 72.31, "elapsed_time": "0:08:06", "remaining_time": "0:03:06", "throughput": 2130.31, "total_tokens": 1036688}
{"current_steps": 2290, "total_steps": 3160, "loss": 0.0005, "lr": 0.006423520442570956, "epoch": 14.49367088607595, "percentage": 72.47, "elapsed_time": "0:08:07", "remaining_time": "0:03:05", "throughput": 2130.64, "total_tokens": 1038960}
{"current_steps": 2295, "total_steps": 3160, "loss": 0.0004, "lr": 0.006355681752329696, "epoch": 14.525316455696203, "percentage": 72.63, "elapsed_time": "0:08:08", "remaining_time": "0:03:04", "throughput": 2130.76, "total_tokens": 1041072}
{"current_steps": 2300, "total_steps": 3160, "loss": 0.0002, "lr": 0.006288106762139153, "epoch": 14.556962025316455, "percentage": 72.78, "elapsed_time": "0:08:09", "remaining_time": "0:03:03", "throughput": 2131.09, "total_tokens": 1043312}
{"current_steps": 2305, "total_steps": 3160, "loss": 0.0008, "lr": 0.006220797533414447, "epoch": 14.58860759493671, "percentage": 72.94, "elapsed_time": "0:08:10", "remaining_time": "0:03:01", "throughput": 2131.42, "total_tokens": 1045552}
{"current_steps": 2310, "total_steps": 3160, "loss": 0.0003, "lr": 0.0061537561194634945, "epoch": 14.620253164556962, "percentage": 73.1, "elapsed_time": "0:08:11", "remaining_time": "0:03:00", "throughput": 2132.11, "total_tokens": 1048048}
{"current_steps": 2315, "total_steps": 3160, "loss": 0.0003, "lr": 0.006086984565424345, "epoch": 14.651898734177216, "percentage": 73.26, "elapsed_time": "0:08:12", "remaining_time": "0:02:59", "throughput": 2132.56, "total_tokens": 1050384}
{"current_steps": 2320, "total_steps": 3160, "loss": 0.0001, "lr": 0.006020484908202826, "epoch": 14.683544303797468, "percentage": 73.42, "elapsed_time": "0:08:13", "remaining_time": "0:02:58", "throughput": 2133.01, "total_tokens": 1052720}
{"current_steps": 2325, "total_steps": 3160, "loss": 0.0001, "lr": 0.00595425917641039, "epoch": 14.715189873417721, "percentage": 73.58, "elapsed_time": "0:08:14", "remaining_time": "0:02:57", "throughput": 2133.17, "total_tokens": 1054864}
{"current_steps": 2330, "total_steps": 3160, "loss": 0.0002, "lr": 0.005888309390302235, "epoch": 14.746835443037975, "percentage": 73.73, "elapsed_time": "0:08:15", "remaining_time": "0:02:56", "throughput": 2133.6, "total_tokens": 1057168}
{"current_steps": 2335, "total_steps": 3160, "loss": 0.0003, "lr": 0.005822637561715658, "epoch": 14.778481012658228, "percentage": 73.89, "elapsed_time": "0:08:16", "remaining_time": "0:02:55", "throughput": 2133.87, "total_tokens": 1059376}
{"current_steps": 2340, "total_steps": 3160, "loss": 0.0004, "lr": 0.005757245694008714, "epoch": 14.810126582278482, "percentage": 74.05, "elapsed_time": "0:08:17", "remaining_time": "0:02:54", "throughput": 2134.3, "total_tokens": 1061680}
{"current_steps": 2345, "total_steps": 3160, "loss": 0.0004, "lr": 0.005692135781999078, "epoch": 14.841772151898734, "percentage": 74.21, "elapsed_time": "0:08:18", "remaining_time": "0:02:53", "throughput": 2134.72, "total_tokens": 1063984}
{"current_steps": 2350, "total_steps": 3160, "loss": 0.0001, "lr": 0.005627309811903193, "epoch": 14.873417721518987, "percentage": 74.37, "elapsed_time": "0:08:19", "remaining_time": "0:02:52", "throughput": 2135.25, "total_tokens": 1066352}
{"current_steps": 2355, "total_steps": 3160, "loss": 0.0004, "lr": 0.005562769761275697, "epoch": 14.905063291139241, "percentage": 74.53, "elapsed_time": "0:08:20", "remaining_time": "0:02:51", "throughput": 2135.43, "total_tokens": 1068528}
{"current_steps": 2360, "total_steps": 3160, "loss": 0.0004, "lr": 0.005498517598949082, "epoch": 14.936708860759493, "percentage": 74.68, "elapsed_time": "0:08:21", "remaining_time": "0:02:49", "throughput": 2135.86, "total_tokens": 1070864}
{"current_steps": 2365, "total_steps": 3160, "loss": 0.0006, "lr": 0.005434555284973631, "epoch": 14.968354430379748, "percentage": 74.84, "elapsed_time": "0:08:22", "remaining_time": "0:02:48", "throughput": 2136.11, "total_tokens": 1073072}
{"current_steps": 2370, "total_steps": 3160, "loss": 0.0011, "lr": 0.005370884770557645, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:08:23", "remaining_time": "0:02:47", "throughput": 2136.11, "total_tokens": 1075088}
{"current_steps": 2370, "total_steps": 3160, "eval_loss": 0.09199367463588715, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:08:24", "remaining_time": "0:02:48", "throughput": 2129.01, "total_tokens": 1075088}
{"current_steps": 2375, "total_steps": 3160, "loss": 0.0008, "lr": 0.0053075079980078824, "epoch": 15.031645569620252, "percentage": 75.16, "elapsed_time": "0:08:27", "remaining_time": "0:02:47", "throughput": 2124.63, "total_tokens": 1077296}
{"current_steps": 2380, "total_steps": 3160, "loss": 0.0001, "lr": 0.005244426900670356, "epoch": 15.063291139240507, "percentage": 75.32, "elapsed_time": "0:08:28", "remaining_time": "0:02:46", "throughput": 2124.77, "total_tokens": 1079440}
{"current_steps": 2385, "total_steps": 3160, "loss": 0.0002, "lr": 0.0051816434028713245, "epoch": 15.094936708860759, "percentage": 75.47, "elapsed_time": "0:08:29", "remaining_time": "0:02:45", "throughput": 2125.04, "total_tokens": 1081648}
{"current_steps": 2390, "total_steps": 3160, "loss": 0.0003, "lr": 0.005119159419858583, "epoch": 15.126582278481013, "percentage": 75.63, "elapsed_time": "0:08:29", "remaining_time": "0:02:44", "throughput": 2125.48, "total_tokens": 1083952}
{"current_steps": 2395, "total_steps": 3160, "loss": 0.0001, "lr": 0.005056976857743068, "epoch": 15.158227848101266, "percentage": 75.79, "elapsed_time": "0:08:30", "remaining_time": "0:02:43", "throughput": 2125.82, "total_tokens": 1086224}
{"current_steps": 2400, "total_steps": 3160, "loss": 0.0003, "lr": 0.004995097613440688, "epoch": 15.189873417721518, "percentage": 75.95, "elapsed_time": "0:08:31", "remaining_time": "0:02:42", "throughput": 2126.09, "total_tokens": 1088432}
{"current_steps": 2405, "total_steps": 3160, "loss": 0.0011, "lr": 0.004933523574614447, "epoch": 15.221518987341772, "percentage": 76.11, "elapsed_time": "0:08:32", "remaining_time": "0:02:41", "throughput": 2126.49, "total_tokens": 1090736}
{"current_steps": 2410, "total_steps": 3160, "loss": 0.0002, "lr": 0.004872256619616906, "epoch": 15.253164556962025, "percentage": 76.27, "elapsed_time": "0:08:33", "remaining_time": "0:02:39", "throughput": 2126.71, "total_tokens": 1092912}
{"current_steps": 2415, "total_steps": 3160, "loss": 0.0002, "lr": 0.004811298617432824, "epoch": 15.284810126582279, "percentage": 76.42, "elapsed_time": "0:08:34", "remaining_time": "0:02:38", "throughput": 2127.21, "total_tokens": 1095280}
{"current_steps": 2420, "total_steps": 3160, "loss": 0.001, "lr": 0.004750651427622173, "epoch": 15.316455696202532, "percentage": 76.58, "elapsed_time": "0:08:35", "remaining_time": "0:02:37", "throughput": 2127.54, "total_tokens": 1097552}
{"current_steps": 2425, "total_steps": 3160, "loss": 0.0001, "lr": 0.004690316900263435, "epoch": 15.348101265822784, "percentage": 76.74, "elapsed_time": "0:08:36", "remaining_time": "0:02:36", "throughput": 2127.8, "total_tokens": 1099760}
{"current_steps": 2430, "total_steps": 3160, "loss": 0.0005, "lr": 0.0046302968758971065, "epoch": 15.379746835443038, "percentage": 76.9, "elapsed_time": "0:08:37", "remaining_time": "0:02:35", "throughput": 2128.24, "total_tokens": 1102096}
{"current_steps": 2435, "total_steps": 3160, "loss": 0.0009, "lr": 0.004570593185469605, "epoch": 15.41139240506329, "percentage": 77.06, "elapsed_time": "0:08:38", "remaining_time": "0:02:34", "throughput": 2128.56, "total_tokens": 1104336}
{"current_steps": 2440, "total_steps": 3160, "loss": 0.0002, "lr": 0.004511207650277389, "epoch": 15.443037974683545, "percentage": 77.22, "elapsed_time": "0:08:39", "remaining_time": "0:02:33", "throughput": 2128.73, "total_tokens": 1106480}
{"current_steps": 2445, "total_steps": 3160, "loss": 0.0005, "lr": 0.004452142081911388, "epoch": 15.474683544303797, "percentage": 77.37, "elapsed_time": "0:08:40", "remaining_time": "0:02:32", "throughput": 2129.05, "total_tokens": 1108752}
{"current_steps": 2450, "total_steps": 3160, "loss": 0.0009, "lr": 0.004393398282201788, "epoch": 15.50632911392405, "percentage": 77.53, "elapsed_time": "0:08:41", "remaining_time": "0:02:31", "throughput": 2129.31, "total_tokens": 1110960}
{"current_steps": 2455, "total_steps": 3160, "loss": 0.0002, "lr": 0.004334978043162998, "epoch": 15.537974683544304, "percentage": 77.69, "elapsed_time": "0:08:42", "remaining_time": "0:02:30", "throughput": 2129.56, "total_tokens": 1113168}
{"current_steps": 2460, "total_steps": 3160, "loss": 0.0002, "lr": 0.004276883146939021, "epoch": 15.569620253164556, "percentage": 77.85, "elapsed_time": "0:08:43", "remaining_time": "0:02:29", "throughput": 2129.87, "total_tokens": 1115408}
{"current_steps": 2465, "total_steps": 3160, "loss": 0.0002, "lr": 0.004219115365749112, "epoch": 15.60126582278481, "percentage": 78.01, "elapsed_time": "0:08:44", "remaining_time": "0:02:27", "throughput": 2130.18, "total_tokens": 1117648}
{"current_steps": 2470, "total_steps": 3160, "loss": 0.0005, "lr": 0.004161676461833653, "epoch": 15.632911392405063, "percentage": 78.16, "elapsed_time": "0:08:45", "remaining_time": "0:02:26", "throughput": 2130.61, "total_tokens": 1119984}
{"current_steps": 2475, "total_steps": 3160, "loss": 0.0, "lr": 0.004104568187400455, "epoch": 15.664556962025316, "percentage": 78.32, "elapsed_time": "0:08:46", "remaining_time": "0:02:25", "throughput": 2130.97, "total_tokens": 1122256}
{"current_steps": 2480, "total_steps": 3160, "loss": 0.0002, "lr": 0.004047792284571272, "epoch": 15.69620253164557, "percentage": 78.48, "elapsed_time": "0:08:47", "remaining_time": "0:02:24", "throughput": 2131.36, "total_tokens": 1124560}
{"current_steps": 2485, "total_steps": 3160, "loss": 0.0005, "lr": 0.0039913504853286525, "epoch": 15.727848101265822, "percentage": 78.64, "elapsed_time": "0:08:48", "remaining_time": "0:02:23", "throughput": 2131.84, "total_tokens": 1126960}
{"current_steps": 2490, "total_steps": 3160, "loss": 0.0001, "lr": 0.00393524451146315, "epoch": 15.759493670886076, "percentage": 78.8, "elapsed_time": "0:08:49", "remaining_time": "0:02:22", "throughput": 2132.33, "total_tokens": 1129360}
{"current_steps": 2495, "total_steps": 3160, "loss": 0.0001, "lr": 0.0038794760745207314, "epoch": 15.791139240506329, "percentage": 78.96, "elapsed_time": "0:08:50", "remaining_time": "0:02:21", "throughput": 2132.58, "total_tokens": 1131568}
{"current_steps": 2500, "total_steps": 3160, "loss": 0.0001, "lr": 0.0038240468757506077, "epoch": 15.822784810126583, "percentage": 79.11, "elapsed_time": "0:08:51", "remaining_time": "0:02:20", "throughput": 2132.95, "total_tokens": 1133872}
{"current_steps": 2505, "total_steps": 3160, "loss": 0.0002, "lr": 0.0037689586060533522, "epoch": 15.854430379746836, "percentage": 79.27, "elapsed_time": "0:08:52", "remaining_time": "0:02:19", "throughput": 2133.44, "total_tokens": 1136240}
{"current_steps": 2510, "total_steps": 3160, "loss": 0.0002, "lr": 0.003714212945929265, "epoch": 15.886075949367088, "percentage": 79.43, "elapsed_time": "0:08:53", "remaining_time": "0:02:18", "throughput": 2133.69, "total_tokens": 1138448}
{"current_steps": 2515, "total_steps": 3160, "loss": 0.0006, "lr": 0.003659811565427151, "epoch": 15.917721518987342, "percentage": 79.59, "elapsed_time": "0:08:54", "remaining_time": "0:02:17", "throughput": 2134.04, "total_tokens": 1140752}
{"current_steps": 2520, "total_steps": 3160, "loss": 0.0001, "lr": 0.0036057561240933683, "epoch": 15.949367088607595, "percentage": 79.75, "elapsed_time": "0:08:55", "remaining_time": "0:02:16", "throughput": 2134.43, "total_tokens": 1143056}
{"current_steps": 2525, "total_steps": 3160, "loss": 0.0002, "lr": 0.003552048270921177, "epoch": 15.981012658227849, "percentage": 79.91, "elapsed_time": "0:08:56", "remaining_time": "0:02:14", "throughput": 2134.83, "total_tokens": 1145392}
{"current_steps": 2528, "total_steps": 3160, "eval_loss": 0.09226036071777344, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:08:58", "remaining_time": "0:02:14", "throughput": 2128.19, "total_tokens": 1146608}
{"current_steps": 2530, "total_steps": 3160, "loss": 0.0003, "lr": 0.0034986896443004695, "epoch": 16.0126582278481, "percentage": 80.06, "elapsed_time": "0:09:00", "remaining_time": "0:02:14", "throughput": 2124.81, "total_tokens": 1147536}
{"current_steps": 2535, "total_steps": 3160, "loss": 0.0002, "lr": 0.003445681871967776, "epoch": 16.044303797468356, "percentage": 80.22, "elapsed_time": "0:09:01", "remaining_time": "0:02:13", "throughput": 2125.05, "total_tokens": 1149776}
{"current_steps": 2540, "total_steps": 3160, "loss": 0.0004, "lr": 0.003393026570956594, "epoch": 16.075949367088608, "percentage": 80.38, "elapsed_time": "0:09:02", "remaining_time": "0:02:12", "throughput": 2125.24, "total_tokens": 1151952}
{"current_steps": 2545, "total_steps": 3160, "loss": 0.0003, "lr": 0.0033407253475480903, "epoch": 16.10759493670886, "percentage": 80.54, "elapsed_time": "0:09:03", "remaining_time": "0:02:11", "throughput": 2125.54, "total_tokens": 1154192}
{"current_steps": 2550, "total_steps": 3160, "loss": 0.0007, "lr": 0.0032887797972220756, "epoch": 16.139240506329113, "percentage": 80.7, "elapsed_time": "0:09:04", "remaining_time": "0:02:10", "throughput": 2125.96, "total_tokens": 1156528}
{"current_steps": 2555, "total_steps": 3160, "loss": 0.0002, "lr": 0.003237191504608346, "epoch": 16.170886075949365, "percentage": 80.85, "elapsed_time": "0:09:04", "remaining_time": "0:02:09", "throughput": 2126.27, "total_tokens": 1158768}
{"current_steps": 2560, "total_steps": 3160, "loss": 0.0002, "lr": 0.003185962043438345, "epoch": 16.20253164556962, "percentage": 81.01, "elapsed_time": "0:09:05", "remaining_time": "0:02:07", "throughput": 2126.44, "total_tokens": 1160912}
{"current_steps": 2565, "total_steps": 3160, "loss": 0.0005, "lr": 0.003135092976497134, "epoch": 16.234177215189874, "percentage": 81.17, "elapsed_time": "0:09:06", "remaining_time": "0:02:06", "throughput": 2126.7, "total_tokens": 1163120}
{"current_steps": 2570, "total_steps": 3160, "loss": 0.0002, "lr": 0.003084585855575747, "epoch": 16.265822784810126, "percentage": 81.33, "elapsed_time": "0:09:07", "remaining_time": "0:02:05", "throughput": 2126.85, "total_tokens": 1165264}
{"current_steps": 2575, "total_steps": 3160, "loss": 0.0003, "lr": 0.0030344422214238454, "epoch": 16.29746835443038, "percentage": 81.49, "elapsed_time": "0:09:08", "remaining_time": "0:02:04", "throughput": 2127.2, "total_tokens": 1167536}
{"current_steps": 2580, "total_steps": 3160, "loss": 0.0004, "lr": 0.002984663603702693, "epoch": 16.32911392405063, "percentage": 81.65, "elapsed_time": "0:09:09", "remaining_time": "0:02:03", "throughput": 2127.5, "total_tokens": 1169776}
{"current_steps": 2585, "total_steps": 3160, "loss": 0.0003, "lr": 0.0029352515209385283, "epoch": 16.360759493670887, "percentage": 81.8, "elapsed_time": "0:09:10", "remaining_time": "0:02:02", "throughput": 2128.01, "total_tokens": 1172176}
{"current_steps": 2590, "total_steps": 3160, "loss": 0.0004, "lr": 0.002886207480476215, "epoch": 16.39240506329114, "percentage": 81.96, "elapsed_time": "0:09:11", "remaining_time": "0:02:01", "throughput": 2128.25, "total_tokens": 1174384}
{"current_steps": 2595, "total_steps": 3160, "loss": 0.0003, "lr": 0.0028375329784332765, "epoch": 16.424050632911392, "percentage": 82.12, "elapsed_time": "0:09:12", "remaining_time": "0:02:00", "throughput": 2128.69, "total_tokens": 1176752}
{"current_steps": 2600, "total_steps": 3160, "loss": 0.0003, "lr": 0.002789229499654233, "epoch": 16.455696202531644, "percentage": 82.28, "elapsed_time": "0:09:13", "remaining_time": "0:01:59", "throughput": 2129.03, "total_tokens": 1179024}
{"current_steps": 2605, "total_steps": 3160, "loss": 0.0003, "lr": 0.002741298517665333, "epoch": 16.4873417721519, "percentage": 82.44, "elapsed_time": "0:09:14", "remaining_time": "0:01:58", "throughput": 2129.39, "total_tokens": 1181328}
{"current_steps": 2610, "total_steps": 3160, "loss": 0.0002, "lr": 0.002693741494629585, "epoch": 16.518987341772153, "percentage": 82.59, "elapsed_time": "0:09:15", "remaining_time": "0:01:57", "throughput": 2129.84, "total_tokens": 1183696}
{"current_steps": 2615, "total_steps": 3160, "loss": 0.0004, "lr": 0.002646559881302165, "epoch": 16.550632911392405, "percentage": 82.75, "elapsed_time": "0:09:16", "remaining_time": "0:01:56", "throughput": 2130.22, "total_tokens": 1186000}
{"current_steps": 2620, "total_steps": 3160, "loss": 0.0004, "lr": 0.0025997551169861365, "epoch": 16.582278481012658, "percentage": 82.91, "elapsed_time": "0:09:17", "remaining_time": "0:01:54", "throughput": 2130.69, "total_tokens": 1188400}
{"current_steps": 2625, "total_steps": 3160, "loss": 0.0003, "lr": 0.002553328629488577, "epoch": 16.61392405063291, "percentage": 83.07, "elapsed_time": "0:09:18", "remaining_time": "0:01:53", "throughput": 2130.97, "total_tokens": 1190640}
{"current_steps": 2630, "total_steps": 3160, "loss": 0.0002, "lr": 0.002507281835076998, "epoch": 16.645569620253166, "percentage": 83.23, "elapsed_time": "0:09:19", "remaining_time": "0:01:52", "throughput": 2131.12, "total_tokens": 1192784}
{"current_steps": 2635, "total_steps": 3160, "loss": 0.0005, "lr": 0.002461616138436155, "epoch": 16.67721518987342, "percentage": 83.39, "elapsed_time": "0:09:20", "remaining_time": "0:01:51", "throughput": 2131.4, "total_tokens": 1195024}
{"current_steps": 2640, "total_steps": 3160, "loss": 0.0001, "lr": 0.0024163329326251774, "epoch": 16.70886075949367, "percentage": 83.54, "elapsed_time": "0:09:21", "remaining_time": "0:01:50", "throughput": 2131.68, "total_tokens": 1197264}
{"current_steps": 2645, "total_steps": 3160, "loss": 0.0004, "lr": 0.002371433599035097, "epoch": 16.740506329113924, "percentage": 83.7, "elapsed_time": "0:09:22", "remaining_time": "0:01:49", "throughput": 2132.22, "total_tokens": 1199760}
{"current_steps": 2650, "total_steps": 3160, "loss": 0.0004, "lr": 0.0023269195073466957, "epoch": 16.772151898734176, "percentage": 83.86, "elapsed_time": "0:09:23", "remaining_time": "0:01:48", "throughput": 2132.61, "total_tokens": 1202096}
{"current_steps": 2655, "total_steps": 3160, "loss": 0.0002, "lr": 0.0022827920154887132, "epoch": 16.803797468354432, "percentage": 84.02, "elapsed_time": "0:09:24", "remaining_time": "0:01:47", "throughput": 2132.95, "total_tokens": 1204400}
{"current_steps": 2660, "total_steps": 3160, "loss": 0.0005, "lr": 0.002239052469596439, "epoch": 16.835443037974684, "percentage": 84.18, "elapsed_time": "0:09:25", "remaining_time": "0:01:46", "throughput": 2133.54, "total_tokens": 1206896}
{"current_steps": 2665, "total_steps": 3160, "loss": 0.0006, "lr": 0.0021957022039706454, "epoch": 16.867088607594937, "percentage": 84.34, "elapsed_time": "0:09:26", "remaining_time": "0:01:45", "throughput": 2134.01, "total_tokens": 1209264}
{"current_steps": 2670, "total_steps": 3160, "loss": 0.0003, "lr": 0.002152742541036869, "epoch": 16.89873417721519, "percentage": 84.49, "elapsed_time": "0:09:27", "remaining_time": "0:01:44", "throughput": 2134.28, "total_tokens": 1211504}
{"current_steps": 2675, "total_steps": 3160, "loss": 0.0011, "lr": 0.0021101747913050855, "epoch": 16.930379746835442, "percentage": 84.65, "elapsed_time": "0:09:28", "remaining_time": "0:01:43", "throughput": 2134.63, "total_tokens": 1213808}
{"current_steps": 2680, "total_steps": 3160, "loss": 0.0004, "lr": 0.0020680002533297274, "epoch": 16.962025316455698, "percentage": 84.81, "elapsed_time": "0:09:29", "remaining_time": "0:01:42", "throughput": 2134.77, "total_tokens": 1215952}
{"current_steps": 2685, "total_steps": 3160, "loss": 0.0005, "lr": 0.002026220213670069, "epoch": 16.99367088607595, "percentage": 84.97, "elapsed_time": "0:09:30", "remaining_time": "0:01:40", "throughput": 2134.96, "total_tokens": 1218160}
{"current_steps": 2686, "total_steps": 3160, "eval_loss": 0.09145190566778183, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:09:32", "remaining_time": "0:01:41", "throughput": 2128.44, "total_tokens": 1218368}
{"current_steps": 2690, "total_steps": 3160, "loss": 0.0001, "lr": 0.0019848359468509825, "epoch": 17.025316455696203, "percentage": 85.13, "elapsed_time": "0:09:34", "remaining_time": "0:01:40", "throughput": 2125.18, "total_tokens": 1220256}
{"current_steps": 2695, "total_steps": 3160, "loss": 0.001, "lr": 0.0019438487153240424, "epoch": 17.056962025316455, "percentage": 85.28, "elapsed_time": "0:09:35", "remaining_time": "0:01:39", "throughput": 2125.49, "total_tokens": 1222528}
{"current_steps": 2700, "total_steps": 3160, "loss": 0.0001, "lr": 0.0019032597694290392, "epoch": 17.088607594936708, "percentage": 85.44, "elapsed_time": "0:09:36", "remaining_time": "0:01:38", "throughput": 2125.77, "total_tokens": 1224768}
{"current_steps": 2705, "total_steps": 3160, "loss": 0.0002, "lr": 0.0018630703473558234, "epoch": 17.120253164556964, "percentage": 85.6, "elapsed_time": "0:09:37", "remaining_time": "0:01:37", "throughput": 2126.06, "total_tokens": 1227008}
{"current_steps": 2710, "total_steps": 3160, "loss": 0.0006, "lr": 0.0018232816751065249, "epoch": 17.151898734177216, "percentage": 85.76, "elapsed_time": "0:09:38", "remaining_time": "0:01:35", "throughput": 2126.41, "total_tokens": 1229344}
{"current_steps": 2715, "total_steps": 3160, "loss": 0.0007, "lr": 0.0017838949664581742, "epoch": 17.18354430379747, "percentage": 85.92, "elapsed_time": "0:09:39", "remaining_time": "0:01:34", "throughput": 2126.84, "total_tokens": 1231712}
{"current_steps": 2720, "total_steps": 3160, "loss": 0.0005, "lr": 0.0017449114229256607, "epoch": 17.21518987341772, "percentage": 86.08, "elapsed_time": "0:09:40", "remaining_time": "0:01:33", "throughput": 2127.16, "total_tokens": 1233984}
{"current_steps": 2725, "total_steps": 3160, "loss": 0.0002, "lr": 0.0017063322337250713, "epoch": 17.246835443037973, "percentage": 86.23, "elapsed_time": "0:09:41", "remaining_time": "0:01:32", "throughput": 2127.4, "total_tokens": 1236224}
{"current_steps": 2730, "total_steps": 3160, "loss": 0.0001, "lr": 0.0016681585757374472, "epoch": 17.27848101265823, "percentage": 86.39, "elapsed_time": "0:09:42", "remaining_time": "0:01:31", "throughput": 2127.71, "total_tokens": 1238528}
{"current_steps": 2735, "total_steps": 3160, "loss": 0.0002, "lr": 0.001630391613472837, "epoch": 17.310126582278482, "percentage": 86.55, "elapsed_time": "0:09:43", "remaining_time": "0:01:30", "throughput": 2128.05, "total_tokens": 1240832}
{"current_steps": 2740, "total_steps": 3160, "loss": 0.0002, "lr": 0.001593032499034811, "epoch": 17.341772151898734, "percentage": 86.71, "elapsed_time": "0:09:44", "remaining_time": "0:01:29", "throughput": 2128.37, "total_tokens": 1243104}
{"current_steps": 2745, "total_steps": 3160, "loss": 0.0002, "lr": 0.0015560823720852928, "epoch": 17.373417721518987, "percentage": 86.87, "elapsed_time": "0:09:45", "remaining_time": "0:01:28", "throughput": 2128.69, "total_tokens": 1245376}
{"current_steps": 2750, "total_steps": 3160, "loss": 0.0004, "lr": 0.0015195423598097972, "epoch": 17.40506329113924, "percentage": 87.03, "elapsed_time": "0:09:46", "remaining_time": "0:01:27", "throughput": 2129.01, "total_tokens": 1247648}
{"current_steps": 2755, "total_steps": 3160, "loss": 0.0006, "lr": 0.001483413576883057, "epoch": 17.436708860759495, "percentage": 87.18, "elapsed_time": "0:09:47", "remaining_time": "0:01:26", "throughput": 2129.47, "total_tokens": 1250048}
{"current_steps": 2760, "total_steps": 3160, "loss": 0.0006, "lr": 0.001447697125435004, "epoch": 17.468354430379748, "percentage": 87.34, "elapsed_time": "0:09:48", "remaining_time": "0:01:25", "throughput": 2129.84, "total_tokens": 1252352}
{"current_steps": 2765, "total_steps": 3160, "loss": 0.0002, "lr": 0.0014123940950171508, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:09:48", "remaining_time": "0:01:24", "throughput": 2130.16, "total_tokens": 1254624}
{"current_steps": 2770, "total_steps": 3160, "loss": 0.0003, "lr": 0.0013775055625693683, "epoch": 17.531645569620252, "percentage": 87.66, "elapsed_time": "0:09:49", "remaining_time": "0:01:23", "throughput": 2130.42, "total_tokens": 1256864}
{"current_steps": 2775, "total_steps": 3160, "loss": 0.0002, "lr": 0.0013430325923870095, "epoch": 17.563291139240505, "percentage": 87.82, "elapsed_time": "0:09:50", "remaining_time": "0:01:21", "throughput": 2130.65, "total_tokens": 1259072}
{"current_steps": 2780, "total_steps": 3160, "loss": 0.0004, "lr": 0.0013089762360884538, "epoch": 17.59493670886076, "percentage": 87.97, "elapsed_time": "0:09:51", "remaining_time": "0:01:20", "throughput": 2130.98, "total_tokens": 1261376}
{"current_steps": 2785, "total_steps": 3160, "loss": 0.0001, "lr": 0.0012753375325830413, "epoch": 17.626582278481013, "percentage": 88.13, "elapsed_time": "0:09:52", "remaining_time": "0:01:19", "throughput": 2131.07, "total_tokens": 1263488}
{"current_steps": 2790, "total_steps": 3160, "loss": 0.0008, "lr": 0.001242117508039347, "epoch": 17.658227848101266, "percentage": 88.29, "elapsed_time": "0:09:53", "remaining_time": "0:01:18", "throughput": 2131.44, "total_tokens": 1265824}
{"current_steps": 2795, "total_steps": 3160, "loss": 0.0001, "lr": 0.0012093171758539112, "epoch": 17.689873417721518, "percentage": 88.45, "elapsed_time": "0:09:54", "remaining_time": "0:01:17", "throughput": 2131.67, "total_tokens": 1268032}
{"current_steps": 2800, "total_steps": 3160, "loss": 0.0006, "lr": 0.0011769375366203066, "epoch": 17.72151898734177, "percentage": 88.61, "elapsed_time": "0:09:55", "remaining_time": "0:01:16", "throughput": 2131.93, "total_tokens": 1270272}
{"current_steps": 2805, "total_steps": 3160, "loss": 0.0002, "lr": 0.0011449795780986071, "epoch": 17.753164556962027, "percentage": 88.77, "elapsed_time": "0:09:56", "remaining_time": "0:01:15", "throughput": 2132.24, "total_tokens": 1272544}
{"current_steps": 2810, "total_steps": 3160, "loss": 0.0003, "lr": 0.0011134442751852846, "epoch": 17.78481012658228, "percentage": 88.92, "elapsed_time": "0:09:57", "remaining_time": "0:01:14", "throughput": 2132.64, "total_tokens": 1274880}
{"current_steps": 2815, "total_steps": 3160, "loss": 0.0, "lr": 0.0010823325898834395, "epoch": 17.81645569620253, "percentage": 89.08, "elapsed_time": "0:09:58", "remaining_time": "0:01:13", "throughput": 2132.91, "total_tokens": 1277120}
{"current_steps": 2820, "total_steps": 3160, "loss": 0.0003, "lr": 0.0010516454712734629, "epoch": 17.848101265822784, "percentage": 89.24, "elapsed_time": "0:09:59", "remaining_time": "0:01:12", "throughput": 2133.2, "total_tokens": 1279424}
{"current_steps": 2825, "total_steps": 3160, "loss": 0.0003, "lr": 0.0010213838554841027, "epoch": 17.879746835443036, "percentage": 89.4, "elapsed_time": "0:10:00", "remaining_time": "0:01:11", "throughput": 2133.52, "total_tokens": 1281728}
{"current_steps": 2830, "total_steps": 3160, "loss": 0.0002, "lr": 0.0009915486656638728, "epoch": 17.911392405063292, "percentage": 89.56, "elapsed_time": "0:10:01", "remaining_time": "0:01:10", "throughput": 2133.76, "total_tokens": 1283968}
{"current_steps": 2835, "total_steps": 3160, "loss": 0.0007, "lr": 0.0009621408119529234, "epoch": 17.943037974683545, "percentage": 89.72, "elapsed_time": "0:10:02", "remaining_time": "0:01:09", "throughput": 2134.1, "total_tokens": 1286272}
{"current_steps": 2840, "total_steps": 3160, "loss": 0.0007, "lr": 0.0009331611914552607, "epoch": 17.974683544303797, "percentage": 89.87, "elapsed_time": "0:10:03", "remaining_time": "0:01:08", "throughput": 2134.4, "total_tokens": 1288544}
{"current_steps": 2844, "total_steps": 3160, "eval_loss": 0.0919828787446022, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:10:06", "remaining_time": "0:01:07", "throughput": 2128.48, "total_tokens": 1290144}
{"current_steps": 2845, "total_steps": 3160, "loss": 0.0004, "lr": 0.0009046106882113752, "epoch": 18.00632911392405, "percentage": 90.03, "elapsed_time": "0:10:07", "remaining_time": "0:01:07", "throughput": 2125.35, "total_tokens": 1290624}
{"current_steps": 2850, "total_steps": 3160, "loss": 0.0003, "lr": 0.000876490173171291, "epoch": 18.037974683544302, "percentage": 90.19, "elapsed_time": "0:10:08", "remaining_time": "0:01:06", "throughput": 2125.59, "total_tokens": 1292992}
{"current_steps": 2855, "total_steps": 3160, "loss": 0.0003, "lr": 0.0008488005041679841, "epoch": 18.069620253164558, "percentage": 90.35, "elapsed_time": "0:10:09", "remaining_time": "0:01:05", "throughput": 2125.94, "total_tokens": 1295328}
{"current_steps": 2860, "total_steps": 3160, "loss": 0.0008, "lr": 0.0008215425258912096, "epoch": 18.10126582278481, "percentage": 90.51, "elapsed_time": "0:10:10", "remaining_time": "0:01:04", "throughput": 2126.21, "total_tokens": 1297568}
{"current_steps": 2865, "total_steps": 3160, "loss": 0.0002, "lr": 0.0007947170698617595, "epoch": 18.132911392405063, "percentage": 90.66, "elapsed_time": "0:10:11", "remaining_time": "0:01:02", "throughput": 2126.53, "total_tokens": 1299840}
{"current_steps": 2870, "total_steps": 3160, "loss": 0.0001, "lr": 0.0007683249544060571, "epoch": 18.164556962025316, "percentage": 90.82, "elapsed_time": "0:10:12", "remaining_time": "0:01:01", "throughput": 2126.81, "total_tokens": 1302080}
{"current_steps": 2875, "total_steps": 3160, "loss": 0.0005, "lr": 0.000742366984631227, "epoch": 18.196202531645568, "percentage": 90.98, "elapsed_time": "0:10:13", "remaining_time": "0:01:00", "throughput": 2127.03, "total_tokens": 1304288}
{"current_steps": 2880, "total_steps": 3160, "loss": 0.0003, "lr": 0.000716843952400522, "epoch": 18.227848101265824, "percentage": 91.14, "elapsed_time": "0:10:14", "remaining_time": "0:00:59", "throughput": 2127.55, "total_tokens": 1306784}
{"current_steps": 2885, "total_steps": 3160, "loss": 0.0004, "lr": 0.0006917566363091609, "epoch": 18.259493670886076, "percentage": 91.3, "elapsed_time": "0:10:15", "remaining_time": "0:00:58", "throughput": 2127.92, "total_tokens": 1309120}
{"current_steps": 2890, "total_steps": 3160, "loss": 0.0004, "lr": 0.000667105801660589, "epoch": 18.29113924050633, "percentage": 91.46, "elapsed_time": "0:10:16", "remaining_time": "0:00:57", "throughput": 2128.09, "total_tokens": 1311296}
{"current_steps": 2895, "total_steps": 3160, "loss": 0.0001, "lr": 0.0006428922004431298, "epoch": 18.32278481012658, "percentage": 91.61, "elapsed_time": "0:10:17", "remaining_time": "0:00:56", "throughput": 2128.4, "total_tokens": 1313568}
{"current_steps": 2900, "total_steps": 3160, "loss": 0.0001, "lr": 0.000619116571307029, "epoch": 18.354430379746834, "percentage": 91.77, "elapsed_time": "0:10:18", "remaining_time": "0:00:55", "throughput": 2128.6, "total_tokens": 1315776}
{"current_steps": 2905, "total_steps": 3160, "loss": 0.0006, "lr": 0.0005957796395419484, "epoch": 18.38607594936709, "percentage": 91.93, "elapsed_time": "0:10:19", "remaining_time": "0:00:54", "throughput": 2128.92, "total_tokens": 1318080}
{"current_steps": 2910, "total_steps": 3160, "loss": 0.0002, "lr": 0.0005728821170548199, "epoch": 18.417721518987342, "percentage": 92.09, "elapsed_time": "0:10:20", "remaining_time": "0:00:53", "throughput": 2129.15, "total_tokens": 1320320}
{"current_steps": 2915, "total_steps": 3160, "loss": 0.0002, "lr": 0.0005504247023481373, "epoch": 18.449367088607595, "percentage": 92.25, "elapsed_time": "0:10:21", "remaining_time": "0:00:52", "throughput": 2129.35, "total_tokens": 1322528}
{"current_steps": 2920, "total_steps": 3160, "loss": 0.0012, "lr": 0.0005284080804986412, "epoch": 18.481012658227847, "percentage": 92.41, "elapsed_time": "0:10:22", "remaining_time": "0:00:51", "throughput": 2129.8, "total_tokens": 1324928}
{"current_steps": 2925, "total_steps": 3160, "loss": 0.0001, "lr": 0.0005068329231364282, "epoch": 18.5126582278481, "percentage": 92.56, "elapsed_time": "0:10:23", "remaining_time": "0:00:50", "throughput": 2130.01, "total_tokens": 1327136}
{"current_steps": 2930, "total_steps": 3160, "loss": 0.0006, "lr": 0.00048569988842446065, "epoch": 18.544303797468356, "percentage": 92.72, "elapsed_time": "0:10:24", "remaining_time": "0:00:48", "throughput": 2130.32, "total_tokens": 1329408}
{"current_steps": 2935, "total_steps": 3160, "loss": 0.0005, "lr": 0.00046500962103848795, "epoch": 18.575949367088608, "percentage": 92.88, "elapsed_time": "0:10:25", "remaining_time": "0:00:47", "throughput": 2130.62, "total_tokens": 1331680}
{"current_steps": 2940, "total_steps": 3160, "loss": 0.0001, "lr": 0.00044476275214737235, "epoch": 18.60759493670886, "percentage": 93.04, "elapsed_time": "0:10:25", "remaining_time": "0:00:46", "throughput": 2130.8, "total_tokens": 1333856}
{"current_steps": 2945, "total_steps": 3160, "loss": 0.0004, "lr": 0.00042495989939384915, "epoch": 18.639240506329113, "percentage": 93.2, "elapsed_time": "0:10:26", "remaining_time": "0:00:45", "throughput": 2131.11, "total_tokens": 1336160}
{"current_steps": 2950, "total_steps": 3160, "loss": 0.0002, "lr": 0.0004056016668756801, "epoch": 18.67088607594937, "percentage": 93.35, "elapsed_time": "0:10:27", "remaining_time": "0:00:44", "throughput": 2131.49, "total_tokens": 1338496}
{"current_steps": 2955, "total_steps": 3160, "loss": 0.0003, "lr": 0.00038668864512721667, "epoch": 18.70253164556962, "percentage": 93.51, "elapsed_time": "0:10:28", "remaining_time": "0:00:43", "throughput": 2131.75, "total_tokens": 1340736}
{"current_steps": 2960, "total_steps": 3160, "loss": 0.0004, "lr": 0.00036822141110139594, "epoch": 18.734177215189874, "percentage": 93.67, "elapsed_time": "0:10:29", "remaining_time": "0:00:42", "throughput": 2131.98, "total_tokens": 1342976}
{"current_steps": 2965, "total_steps": 3160, "loss": 0.0003, "lr": 0.00035020052815213477, "epoch": 18.765822784810126, "percentage": 93.83, "elapsed_time": "0:10:30", "remaining_time": "0:00:41", "throughput": 2132.35, "total_tokens": 1345312}
{"current_steps": 2970, "total_steps": 3160, "loss": 0.0003, "lr": 0.0003326265460171468, "epoch": 18.79746835443038, "percentage": 93.99, "elapsed_time": "0:10:31", "remaining_time": "0:00:40", "throughput": 2132.57, "total_tokens": 1347552}
{"current_steps": 2975, "total_steps": 3160, "loss": 0.0003, "lr": 0.0003155000008011727, "epoch": 18.82911392405063, "percentage": 94.15, "elapsed_time": "0:10:32", "remaining_time": "0:00:39", "throughput": 2132.8, "total_tokens": 1349792}
{"current_steps": 2980, "total_steps": 3160, "loss": 0.0006, "lr": 0.0002988214149596197, "epoch": 18.860759493670887, "percentage": 94.3, "elapsed_time": "0:10:33", "remaining_time": "0:00:38", "throughput": 2133.11, "total_tokens": 1352096}
{"current_steps": 2985, "total_steps": 3160, "loss": 0.0006, "lr": 0.00028259129728263607, "epoch": 18.89240506329114, "percentage": 94.46, "elapsed_time": "0:10:34", "remaining_time": "0:00:37", "throughput": 2133.44, "total_tokens": 1354400}
{"current_steps": 2990, "total_steps": 3160, "loss": 0.0005, "lr": 0.0002668101428795788, "epoch": 18.924050632911392, "percentage": 94.62, "elapsed_time": "0:10:35", "remaining_time": "0:00:36", "throughput": 2133.71, "total_tokens": 1356672}
{"current_steps": 2995, "total_steps": 3160, "loss": 0.0001, "lr": 0.00025147843316391524, "epoch": 18.955696202531644, "percentage": 94.78, "elapsed_time": "0:10:36", "remaining_time": "0:00:35", "throughput": 2134.0, "total_tokens": 1358944}
{"current_steps": 3000, "total_steps": 3160, "loss": 0.0004, "lr": 0.0002365966358385335, "epoch": 18.9873417721519, "percentage": 94.94, "elapsed_time": "0:10:37", "remaining_time": "0:00:34", "throughput": 2134.38, "total_tokens": 1361312}
{"current_steps": 3002, "total_steps": 3160, "eval_loss": 0.0921000987291336, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:10:39", "remaining_time": "0:00:33", "throughput": 2128.62, "total_tokens": 1361984}
{"current_steps": 3005, "total_steps": 3160, "loss": 0.0001, "lr": 0.00022216520488148206, "epoch": 19.018987341772153, "percentage": 95.09, "elapsed_time": "0:10:41", "remaining_time": "0:00:33", "throughput": 2125.79, "total_tokens": 1363328}
{"current_steps": 3010, "total_steps": 3160, "loss": 0.0008, "lr": 0.00020818458053211252, "epoch": 19.050632911392405, "percentage": 95.25, "elapsed_time": "0:10:42", "remaining_time": "0:00:32", "throughput": 2125.96, "total_tokens": 1365600}
{"current_steps": 3015, "total_steps": 3160, "loss": 0.0008, "lr": 0.00019465518927765712, "epoch": 19.082278481012658, "percentage": 95.41, "elapsed_time": "0:10:43", "remaining_time": "0:00:30", "throughput": 2126.29, "total_tokens": 1368032}
{"current_steps": 3020, "total_steps": 3160, "loss": 0.0001, "lr": 0.00018157744384021234, "epoch": 19.11392405063291, "percentage": 95.57, "elapsed_time": "0:10:44", "remaining_time": "0:00:29", "throughput": 2126.51, "total_tokens": 1370272}
{"current_steps": 3025, "total_steps": 3160, "loss": 0.0004, "lr": 0.00016895174316415405, "epoch": 19.145569620253166, "percentage": 95.73, "elapsed_time": "0:10:45", "remaining_time": "0:00:28", "throughput": 2126.73, "total_tokens": 1372480}
{"current_steps": 3030, "total_steps": 3160, "loss": 0.0002, "lr": 0.0001567784724039589, "epoch": 19.17721518987342, "percentage": 95.89, "elapsed_time": "0:10:46", "remaining_time": "0:00:27", "throughput": 2126.94, "total_tokens": 1374688}
{"current_steps": 3035, "total_steps": 3160, "loss": 0.0005, "lr": 0.00014505800291247207, "epoch": 19.20886075949367, "percentage": 96.04, "elapsed_time": "0:10:47", "remaining_time": "0:00:26", "throughput": 2127.23, "total_tokens": 1376960}
{"current_steps": 3040, "total_steps": 3160, "loss": 0.0008, "lr": 0.00013379069222955618, "epoch": 19.240506329113924, "percentage": 96.2, "elapsed_time": "0:10:48", "remaining_time": "0:00:25", "throughput": 2127.53, "total_tokens": 1379232}
{"current_steps": 3045, "total_steps": 3160, "loss": 0.0011, "lr": 0.00012297688407120032, "epoch": 19.272151898734176, "percentage": 96.36, "elapsed_time": "0:10:49", "remaining_time": "0:00:24", "throughput": 2127.96, "total_tokens": 1381632}
{"current_steps": 3050, "total_steps": 3160, "loss": 0.0003, "lr": 0.00011261690831903481, "epoch": 19.303797468354432, "percentage": 96.52, "elapsed_time": "0:10:50", "remaining_time": "0:00:23", "throughput": 2128.13, "total_tokens": 1383808}
{"current_steps": 3055, "total_steps": 3160, "loss": 0.0001, "lr": 0.00010271108101025439, "epoch": 19.335443037974684, "percentage": 96.68, "elapsed_time": "0:10:51", "remaining_time": "0:00:22", "throughput": 2128.39, "total_tokens": 1386080}
{"current_steps": 3060, "total_steps": 3160, "loss": 0.0004, "lr": 9.325970432799424e-05, "epoch": 19.367088607594937, "percentage": 96.84, "elapsed_time": "0:10:52", "remaining_time": "0:00:21", "throughput": 2128.6, "total_tokens": 1388288}
{"current_steps": 3065, "total_steps": 3160, "loss": 0.0009, "lr": 8.426306659209903e-05, "epoch": 19.39873417721519, "percentage": 96.99, "elapsed_time": "0:10:53", "remaining_time": "0:00:20", "throughput": 2128.87, "total_tokens": 1390560}
{"current_steps": 3070, "total_steps": 3160, "loss": 0.0004, "lr": 7.572144225033495e-05, "epoch": 19.430379746835442, "percentage": 97.15, "elapsed_time": "0:10:54", "remaining_time": "0:00:19", "throughput": 2129.17, "total_tokens": 1392864}
{"current_steps": 3075, "total_steps": 3160, "loss": 0.0003, "lr": 6.76350918700147e-05, "epoch": 19.462025316455698, "percentage": 97.31, "elapsed_time": "0:10:55", "remaining_time": "0:00:18", "throughput": 2129.34, "total_tokens": 1395040}
{"current_steps": 3080, "total_steps": 3160, "loss": 0.0001, "lr": 6.0004262130048946e-05, "epoch": 19.49367088607595, "percentage": 97.47, "elapsed_time": "0:10:56", "remaining_time": "0:00:17", "throughput": 2129.67, "total_tokens": 1397344}
{"current_steps": 3085, "total_steps": 3160, "loss": 0.0002, "lr": 5.282918581341889e-05, "epoch": 19.525316455696203, "percentage": 97.63, "elapsed_time": "0:10:57", "remaining_time": "0:00:15", "throughput": 2129.83, "total_tokens": 1399520}
{"current_steps": 3090, "total_steps": 3160, "loss": 0.0001, "lr": 4.6110081800082025e-05, "epoch": 19.556962025316455, "percentage": 97.78, "elapsed_time": "0:10:58", "remaining_time": "0:00:14", "throughput": 2130.02, "total_tokens": 1401728}
{"current_steps": 3095, "total_steps": 3160, "loss": 0.0002, "lr": 3.98471550602858e-05, "epoch": 19.588607594936708, "percentage": 97.94, "elapsed_time": "0:10:59", "remaining_time": "0:00:13", "throughput": 2130.32, "total_tokens": 1404032}
{"current_steps": 3100, "total_steps": 3160, "loss": 0.0001, "lr": 3.404059664832259e-05, "epoch": 19.620253164556964, "percentage": 98.1, "elapsed_time": "0:11:00", "remaining_time": "0:00:12", "throughput": 2130.53, "total_tokens": 1406240}
{"current_steps": 3105, "total_steps": 3160, "loss": 0.0005, "lr": 2.869058369669941e-05, "epoch": 19.651898734177216, "percentage": 98.26, "elapsed_time": "0:11:01", "remaining_time": "0:00:11", "throughput": 2130.92, "total_tokens": 1408640}
{"current_steps": 3110, "total_steps": 3160, "loss": 0.0004, "lr": 2.3797279410728844e-05, "epoch": 19.68354430379747, "percentage": 98.42, "elapsed_time": "0:11:02", "remaining_time": "0:00:10", "throughput": 2131.24, "total_tokens": 1410944}
{"current_steps": 3115, "total_steps": 3160, "loss": 0.0003, "lr": 1.9360833063559732e-05, "epoch": 19.71518987341772, "percentage": 98.58, "elapsed_time": "0:11:02", "remaining_time": "0:00:09", "throughput": 2131.33, "total_tokens": 1413056}
{"current_steps": 3120, "total_steps": 3160, "loss": 0.0002, "lr": 1.5381379991615817e-05, "epoch": 19.746835443037973, "percentage": 98.73, "elapsed_time": "0:11:03", "remaining_time": "0:00:08", "throughput": 2131.65, "total_tokens": 1415360}
{"current_steps": 3125, "total_steps": 3160, "loss": 0.0003, "lr": 1.1859041590472351e-05, "epoch": 19.77848101265823, "percentage": 98.89, "elapsed_time": "0:11:04", "remaining_time": "0:00:07", "throughput": 2131.89, "total_tokens": 1417600}
{"current_steps": 3130, "total_steps": 3160, "loss": 0.0005, "lr": 8.793925311149087e-06, "epoch": 19.810126582278482, "percentage": 99.05, "elapsed_time": "0:11:05", "remaining_time": "0:00:06", "throughput": 2132.21, "total_tokens": 1419904}
{"current_steps": 3135, "total_steps": 3160, "loss": 0.0001, "lr": 6.18612465683288e-06, "epoch": 19.841772151898734, "percentage": 99.21, "elapsed_time": "0:11:06", "remaining_time": "0:00:05", "throughput": 2132.46, "total_tokens": 1422176}
{"current_steps": 3140, "total_steps": 3160, "loss": 0.0003, "lr": 4.035719180031649e-06, "epoch": 19.873417721518987, "percentage": 99.37, "elapsed_time": "0:11:07", "remaining_time": "0:00:04", "throughput": 2132.88, "total_tokens": 1424608}
{"current_steps": 3145, "total_steps": 3160, "loss": 0.0001, "lr": 2.3427744801363113e-06, "epoch": 19.90506329113924, "percentage": 99.53, "elapsed_time": "0:11:08", "remaining_time": "0:00:03", "throughput": 2133.2, "total_tokens": 1426912}
{"current_steps": 3150, "total_steps": 3160, "loss": 0.0003, "lr": 1.107342201427386e-06, "epoch": 19.936708860759495, "percentage": 99.68, "elapsed_time": "0:11:09", "remaining_time": "0:00:02", "throughput": 2133.54, "total_tokens": 1429248}
{"current_steps": 3155, "total_steps": 3160, "loss": 0.0004, "lr": 3.294600315012497e-07, "epoch": 19.968354430379748, "percentage": 99.84, "elapsed_time": "0:11:10", "remaining_time": "0:00:01", "throughput": 2133.82, "total_tokens": 1431552}
{"current_steps": 3160, "total_steps": 3160, "loss": 0.0003, "lr": 9.151700112730588e-09, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:11:12", "remaining_time": "0:00:00", "throughput": 2133.14, "total_tokens": 1433520}
{"current_steps": 3160, "total_steps": 3160, "eval_loss": 0.09257736802101135, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:11:13", "remaining_time": "0:00:00", "throughput": 2127.81, "total_tokens": 1433520}
{"current_steps": 3160, "total_steps": 3160, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:11:14", "remaining_time": "0:00:00", "throughput": 2125.12, "total_tokens": 1433520}