train_record_1745936337 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 800
4efcd48 verified
{"current_steps": 5, "total_steps": 40000, "loss": 13.7251, "lr": 0.29999999259779675, "epoch": 0.0006401638819537802, "percentage": 0.01, "elapsed_time": "0:00:05", "remaining_time": "11:42:32", "throughput": 5762.67, "total_tokens": 30368}
{"current_steps": 10, "total_steps": 40000, "loss": 9.6828, "lr": 0.29999996252634736, "epoch": 0.0012803277639075604, "percentage": 0.03, "elapsed_time": "0:00:08", "remaining_time": "9:14:51", "throughput": 7341.8, "total_tokens": 61120}
{"current_steps": 15, "total_steps": 40000, "loss": 8.4003, "lr": 0.2999999093230187, "epoch": 0.0019204916458613404, "percentage": 0.04, "elapsed_time": "0:00:11", "remaining_time": "8:23:15", "throughput": 8003.1, "total_tokens": 90656}
{"current_steps": 20, "total_steps": 40000, "loss": 7.7231, "lr": 0.299999832987819, "epoch": 0.002560655527815121, "percentage": 0.05, "elapsed_time": "0:00:14", "remaining_time": "7:57:21", "throughput": 8399.73, "total_tokens": 120352}
{"current_steps": 25, "total_steps": 40000, "loss": 6.9893, "lr": 0.29999973352076004, "epoch": 0.003200819409768901, "percentage": 0.06, "elapsed_time": "0:00:17", "remaining_time": "7:43:02", "throughput": 8670.75, "total_tokens": 150656}
{"current_steps": 30, "total_steps": 40000, "loss": 6.863, "lr": 0.2999996109218572, "epoch": 0.003840983291722681, "percentage": 0.07, "elapsed_time": "0:00:20", "remaining_time": "7:32:22", "throughput": 8870.25, "total_tokens": 180704}
{"current_steps": 35, "total_steps": 40000, "loss": 6.2616, "lr": 0.2999994651911293, "epoch": 0.004481147173676461, "percentage": 0.09, "elapsed_time": "0:00:23", "remaining_time": "7:24:00", "throughput": 8985.2, "total_tokens": 209632}
{"current_steps": 40, "total_steps": 40000, "loss": 6.008, "lr": 0.2999992963285989, "epoch": 0.005121311055630242, "percentage": 0.1, "elapsed_time": "0:00:26", "remaining_time": "7:16:52", "throughput": 9052.77, "total_tokens": 237536}
{"current_steps": 45, "total_steps": 40000, "loss": 5.2564, "lr": 0.29999910433429194, "epoch": 0.005761474937584022, "percentage": 0.11, "elapsed_time": "0:00:29", "remaining_time": "7:12:46", "throughput": 9146.47, "total_tokens": 267488}
{"current_steps": 50, "total_steps": 40000, "loss": 5.7238, "lr": 0.29999888920823814, "epoch": 0.006401638819537802, "percentage": 0.12, "elapsed_time": "0:00:32", "remaining_time": "7:09:51", "throughput": 9232.19, "total_tokens": 298016}
{"current_steps": 55, "total_steps": 40000, "loss": 5.1487, "lr": 0.29999865095047057, "epoch": 0.007041802701491582, "percentage": 0.14, "elapsed_time": "0:00:35", "remaining_time": "7:06:49", "throughput": 9284.75, "total_tokens": 327392}
{"current_steps": 60, "total_steps": 40000, "loss": 4.8912, "lr": 0.29999838956102604, "epoch": 0.007681966583445362, "percentage": 0.15, "elapsed_time": "0:00:38", "remaining_time": "7:04:02", "throughput": 9325.23, "total_tokens": 356416}
{"current_steps": 65, "total_steps": 40000, "loss": 4.3542, "lr": 0.29999810503994484, "epoch": 0.008322130465399142, "percentage": 0.16, "elapsed_time": "0:00:41", "remaining_time": "7:01:39", "throughput": 9354.03, "total_tokens": 385184}
{"current_steps": 70, "total_steps": 40000, "loss": 4.0692, "lr": 0.29999779738727084, "epoch": 0.008962294347352922, "percentage": 0.18, "elapsed_time": "0:00:44", "remaining_time": "6:59:29", "throughput": 9384.37, "total_tokens": 414080}
{"current_steps": 75, "total_steps": 40000, "loss": 4.0438, "lr": 0.29999746660305154, "epoch": 0.009602458229306703, "percentage": 0.19, "elapsed_time": "0:00:47", "remaining_time": "6:57:25", "throughput": 9402.93, "total_tokens": 442400}
{"current_steps": 80, "total_steps": 40000, "loss": 3.8262, "lr": 0.2999971126873379, "epoch": 0.010242622111260483, "percentage": 0.2, "elapsed_time": "0:00:49", "remaining_time": "6:55:32", "throughput": 9418.34, "total_tokens": 470592}
{"current_steps": 85, "total_steps": 40000, "loss": 3.0453, "lr": 0.2999967356401845, "epoch": 0.010882785993214263, "percentage": 0.21, "elapsed_time": "0:00:52", "remaining_time": "6:53:51", "throughput": 9434.32, "total_tokens": 498880}
{"current_steps": 90, "total_steps": 40000, "loss": 2.4601, "lr": 0.29999633546164944, "epoch": 0.011522949875168043, "percentage": 0.22, "elapsed_time": "0:00:55", "remaining_time": "6:53:13", "throughput": 9460.07, "total_tokens": 528928}
{"current_steps": 95, "total_steps": 40000, "loss": 2.1518, "lr": 0.29999591215179444, "epoch": 0.012163113757121823, "percentage": 0.24, "elapsed_time": "0:00:58", "remaining_time": "6:52:05", "throughput": 9473.55, "total_tokens": 557632}
{"current_steps": 100, "total_steps": 40000, "loss": 2.1624, "lr": 0.2999954657106849, "epoch": 0.012803277639075603, "percentage": 0.25, "elapsed_time": "0:01:01", "remaining_time": "6:51:28", "throughput": 9494.04, "total_tokens": 587456}
{"current_steps": 105, "total_steps": 40000, "loss": 1.6386, "lr": 0.2999949961383896, "epoch": 0.013443441521029383, "percentage": 0.26, "elapsed_time": "0:01:04", "remaining_time": "6:50:30", "throughput": 9508.38, "total_tokens": 616384}
{"current_steps": 110, "total_steps": 40000, "loss": 1.6483, "lr": 0.2999945034349809, "epoch": 0.014083605402983163, "percentage": 0.27, "elapsed_time": "0:01:07", "remaining_time": "6:49:29", "throughput": 9518.57, "total_tokens": 644896}
{"current_steps": 115, "total_steps": 40000, "loss": 2.1854, "lr": 0.2999939876005348, "epoch": 0.014723769284936943, "percentage": 0.29, "elapsed_time": "0:01:10", "remaining_time": "6:48:51", "throughput": 9535.02, "total_tokens": 674432}
{"current_steps": 120, "total_steps": 40000, "loss": 1.8906, "lr": 0.29999344863513094, "epoch": 0.015363933166890723, "percentage": 0.3, "elapsed_time": "0:01:13", "remaining_time": "6:48:29", "throughput": 9552.78, "total_tokens": 704512}
{"current_steps": 125, "total_steps": 40000, "loss": 1.949, "lr": 0.2999928865388523, "epoch": 0.016004097048844503, "percentage": 0.31, "elapsed_time": "0:01:16", "remaining_time": "6:47:41", "throughput": 9555.69, "total_tokens": 732736}
{"current_steps": 130, "total_steps": 40000, "loss": 1.8998, "lr": 0.29999230131178567, "epoch": 0.016644260930798283, "percentage": 0.33, "elapsed_time": "0:01:19", "remaining_time": "6:48:25", "throughput": 9561.39, "total_tokens": 763968}
{"current_steps": 135, "total_steps": 40000, "loss": 1.7515, "lr": 0.2999916929540212, "epoch": 0.017284424812752063, "percentage": 0.34, "elapsed_time": "0:01:22", "remaining_time": "6:47:49", "throughput": 9572.79, "total_tokens": 793248}
{"current_steps": 140, "total_steps": 40000, "loss": 1.7399, "lr": 0.29999106146565285, "epoch": 0.017924588694705843, "percentage": 0.35, "elapsed_time": "0:01:25", "remaining_time": "6:47:19", "throughput": 9586.94, "total_tokens": 822944}
{"current_steps": 145, "total_steps": 40000, "loss": 1.4607, "lr": 0.29999040684677786, "epoch": 0.018564752576659623, "percentage": 0.36, "elapsed_time": "0:01:28", "remaining_time": "6:46:53", "throughput": 9597.9, "total_tokens": 852512}
{"current_steps": 150, "total_steps": 40000, "loss": 1.5738, "lr": 0.2999897290974972, "epoch": 0.019204916458613407, "percentage": 0.38, "elapsed_time": "0:01:31", "remaining_time": "6:46:33", "throughput": 9603.79, "total_tokens": 881824}
{"current_steps": 155, "total_steps": 40000, "loss": 1.4388, "lr": 0.2999890282179155, "epoch": 0.019845080340567187, "percentage": 0.39, "elapsed_time": "0:01:34", "remaining_time": "6:46:06", "throughput": 9610.6, "total_tokens": 910976}
{"current_steps": 160, "total_steps": 40000, "loss": 1.4048, "lr": 0.29998830420814077, "epoch": 0.020485244222520967, "percentage": 0.4, "elapsed_time": "0:01:37", "remaining_time": "6:45:41", "throughput": 9616.7, "total_tokens": 940096}
{"current_steps": 165, "total_steps": 40000, "loss": 1.455, "lr": 0.2999875570682846, "epoch": 0.021125408104474747, "percentage": 0.41, "elapsed_time": "0:01:40", "remaining_time": "6:45:22", "throughput": 9625.7, "total_tokens": 969760}
{"current_steps": 170, "total_steps": 40000, "loss": 1.4873, "lr": 0.2999867867984623, "epoch": 0.021765571986428527, "percentage": 0.43, "elapsed_time": "0:01:43", "remaining_time": "6:45:14", "throughput": 9635.75, "total_tokens": 999968}
{"current_steps": 175, "total_steps": 40000, "loss": 1.5199, "lr": 0.29998599339879267, "epoch": 0.022405735868382307, "percentage": 0.44, "elapsed_time": "0:01:46", "remaining_time": "6:44:51", "throughput": 9639.26, "total_tokens": 1028928}
{"current_steps": 180, "total_steps": 40000, "loss": 1.4215, "lr": 0.29998517686939796, "epoch": 0.023045899750336087, "percentage": 0.45, "elapsed_time": "0:01:49", "remaining_time": "6:44:30", "throughput": 9645.59, "total_tokens": 1058240}
{"current_steps": 185, "total_steps": 40000, "loss": 1.4071, "lr": 0.29998433721040413, "epoch": 0.023686063632289867, "percentage": 0.46, "elapsed_time": "0:01:52", "remaining_time": "6:44:11", "throughput": 9650.48, "total_tokens": 1087456}
{"current_steps": 190, "total_steps": 40000, "loss": 1.4328, "lr": 0.29998347442194073, "epoch": 0.024326227514243647, "percentage": 0.47, "elapsed_time": "0:01:55", "remaining_time": "6:43:47", "throughput": 9654.27, "total_tokens": 1116320}
{"current_steps": 195, "total_steps": 40000, "loss": 1.4792, "lr": 0.2999825885041407, "epoch": 0.024966391396197427, "percentage": 0.49, "elapsed_time": "0:01:58", "remaining_time": "6:43:18", "throughput": 9656.43, "total_tokens": 1144736}
{"current_steps": 200, "total_steps": 40000, "loss": 1.3854, "lr": 0.29998167945714077, "epoch": 0.025606555278151207, "percentage": 0.5, "elapsed_time": "0:02:01", "remaining_time": "6:43:02", "throughput": 9660.77, "total_tokens": 1173984}
{"current_steps": 200, "total_steps": 40000, "eval_loss": 1.3866865634918213, "epoch": 0.025606555278151207, "percentage": 0.5, "elapsed_time": "0:05:23", "remaining_time": "17:51:51", "throughput": 3632.7, "total_tokens": 1173984}
{"current_steps": 205, "total_steps": 40000, "loss": 1.4504, "lr": 0.2999807472810811, "epoch": 0.026246719160104987, "percentage": 0.51, "elapsed_time": "0:05:27", "remaining_time": "17:40:02", "throughput": 3670.85, "total_tokens": 1202720}
{"current_steps": 210, "total_steps": 40000, "loss": 1.3349, "lr": 0.29997979197610536, "epoch": 0.026886883042058767, "percentage": 0.53, "elapsed_time": "0:05:30", "remaining_time": "17:24:18", "throughput": 3729.37, "total_tokens": 1233280}
{"current_steps": 215, "total_steps": 40000, "loss": 1.1925, "lr": 0.299978813542361, "epoch": 0.027527046924012546, "percentage": 0.54, "elapsed_time": "0:05:33", "remaining_time": "17:08:51", "throughput": 3780.61, "total_tokens": 1261216}
{"current_steps": 220, "total_steps": 40000, "loss": 1.3447, "lr": 0.2999778119799988, "epoch": 0.028167210805966326, "percentage": 0.55, "elapsed_time": "0:05:36", "remaining_time": "16:54:17", "throughput": 3833.6, "total_tokens": 1290272}
{"current_steps": 225, "total_steps": 40000, "loss": 1.09, "lr": 0.29997678728917326, "epoch": 0.028807374687920106, "percentage": 0.56, "elapsed_time": "0:05:39", "remaining_time": "16:40:13", "throughput": 3883.54, "total_tokens": 1318400}
{"current_steps": 230, "total_steps": 40000, "loss": 1.2867, "lr": 0.2999757394700424, "epoch": 0.029447538569873886, "percentage": 0.57, "elapsed_time": "0:05:42", "remaining_time": "16:26:53", "throughput": 3934.39, "total_tokens": 1347328}
{"current_steps": 235, "total_steps": 40000, "loss": 1.2724, "lr": 0.29997466852276783, "epoch": 0.030087702451827666, "percentage": 0.59, "elapsed_time": "0:05:45", "remaining_time": "16:14:10", "throughput": 3985.23, "total_tokens": 1376608}
{"current_steps": 240, "total_steps": 40000, "loss": 1.1477, "lr": 0.29997357444751466, "epoch": 0.030727866333781446, "percentage": 0.6, "elapsed_time": "0:05:48", "remaining_time": "16:02:16", "throughput": 4037.96, "total_tokens": 1407264}
{"current_steps": 245, "total_steps": 40000, "loss": 1.2046, "lr": 0.2999724572444516, "epoch": 0.031368030215735226, "percentage": 0.61, "elapsed_time": "0:05:51", "remaining_time": "15:50:38", "throughput": 4087.74, "total_tokens": 1436896}
{"current_steps": 250, "total_steps": 40000, "loss": 1.261, "lr": 0.29997131691375095, "epoch": 0.032008194097689006, "percentage": 0.62, "elapsed_time": "0:05:54", "remaining_time": "15:39:27", "throughput": 4137.08, "total_tokens": 1466656}
{"current_steps": 255, "total_steps": 40000, "loss": 1.1593, "lr": 0.2999701534555886, "epoch": 0.032648357979642786, "percentage": 0.64, "elapsed_time": "0:05:57", "remaining_time": "15:28:35", "throughput": 4183.43, "total_tokens": 1495424}
{"current_steps": 260, "total_steps": 40000, "loss": 1.2118, "lr": 0.2999689668701439, "epoch": 0.033288521861596566, "percentage": 0.65, "elapsed_time": "0:06:00", "remaining_time": "15:18:14", "throughput": 4230.75, "total_tokens": 1524992}
{"current_steps": 265, "total_steps": 40000, "loss": 1.1233, "lr": 0.29996775715759993, "epoch": 0.033928685743550346, "percentage": 0.66, "elapsed_time": "0:06:03", "remaining_time": "15:08:07", "throughput": 4275.52, "total_tokens": 1553664}
{"current_steps": 270, "total_steps": 40000, "loss": 1.1762, "lr": 0.2999665243181432, "epoch": 0.034568849625504126, "percentage": 0.68, "elapsed_time": "0:06:06", "remaining_time": "14:58:26", "throughput": 4319.94, "total_tokens": 1582560}
{"current_steps": 275, "total_steps": 40000, "loss": 1.0844, "lr": 0.2999652683519638, "epoch": 0.035209013507457906, "percentage": 0.69, "elapsed_time": "0:06:09", "remaining_time": "14:49:11", "throughput": 4365.1, "total_tokens": 1612160}
{"current_steps": 280, "total_steps": 40000, "loss": 1.2859, "lr": 0.29996398925925544, "epoch": 0.035849177389411686, "percentage": 0.7, "elapsed_time": "0:06:12", "remaining_time": "14:40:10", "throughput": 4407.94, "total_tokens": 1640992}
{"current_steps": 285, "total_steps": 40000, "loss": 1.0789, "lr": 0.2999626870402154, "epoch": 0.036489341271365466, "percentage": 0.71, "elapsed_time": "0:06:15", "remaining_time": "14:31:27", "throughput": 4449.39, "total_tokens": 1669504}
{"current_steps": 290, "total_steps": 40000, "loss": 1.356, "lr": 0.29996136169504445, "epoch": 0.037129505153319246, "percentage": 0.73, "elapsed_time": "0:06:18", "remaining_time": "14:23:01", "throughput": 4490.91, "total_tokens": 1698272}
{"current_steps": 295, "total_steps": 40000, "loss": 1.1975, "lr": 0.29996001322394694, "epoch": 0.03776966903527303, "percentage": 0.74, "elapsed_time": "0:06:21", "remaining_time": "14:14:54", "throughput": 4531.39, "total_tokens": 1726944}
{"current_steps": 300, "total_steps": 40000, "loss": 1.0574, "lr": 0.29995864162713093, "epoch": 0.03840983291722681, "percentage": 0.75, "elapsed_time": "0:06:24", "remaining_time": "14:07:03", "throughput": 4571.59, "total_tokens": 1755744}
{"current_steps": 305, "total_steps": 40000, "loss": 1.2354, "lr": 0.2999572469048079, "epoch": 0.03904999679918059, "percentage": 0.76, "elapsed_time": "0:06:27", "remaining_time": "13:59:29", "throughput": 4611.64, "total_tokens": 1784800}
{"current_steps": 310, "total_steps": 40000, "loss": 0.9705, "lr": 0.29995582905719287, "epoch": 0.03969016068113437, "percentage": 0.78, "elapsed_time": "0:06:29", "remaining_time": "13:52:08", "throughput": 4651.09, "total_tokens": 1813792}
{"current_steps": 315, "total_steps": 40000, "loss": 1.0894, "lr": 0.2999543880845046, "epoch": 0.04033032456308815, "percentage": 0.79, "elapsed_time": "0:06:32", "remaining_time": "13:45:03", "throughput": 4689.55, "total_tokens": 1842688}
{"current_steps": 320, "total_steps": 40000, "loss": 1.0823, "lr": 0.2999529239869652, "epoch": 0.04097048844504193, "percentage": 0.8, "elapsed_time": "0:06:35", "remaining_time": "13:38:07", "throughput": 4727.16, "total_tokens": 1871328}
{"current_steps": 325, "total_steps": 40000, "loss": 1.0011, "lr": 0.2999514367648005, "epoch": 0.04161065232699571, "percentage": 0.81, "elapsed_time": "0:06:38", "remaining_time": "13:31:32", "throughput": 4765.09, "total_tokens": 1900640}
{"current_steps": 330, "total_steps": 40000, "loss": 1.133, "lr": 0.29994992641823987, "epoch": 0.04225081620894949, "percentage": 0.83, "elapsed_time": "0:06:41", "remaining_time": "13:25:07", "throughput": 4803.27, "total_tokens": 1930208}
{"current_steps": 335, "total_steps": 40000, "loss": 1.1052, "lr": 0.29994839294751613, "epoch": 0.04289098009090327, "percentage": 0.84, "elapsed_time": "0:06:44", "remaining_time": "13:18:50", "throughput": 4839.95, "total_tokens": 1959264}
{"current_steps": 340, "total_steps": 40000, "loss": 0.998, "lr": 0.29994683635286584, "epoch": 0.04353114397285705, "percentage": 0.85, "elapsed_time": "0:06:47", "remaining_time": "13:12:45", "throughput": 4876.16, "total_tokens": 1988384}
{"current_steps": 345, "total_steps": 40000, "loss": 1.0925, "lr": 0.2999452566345291, "epoch": 0.04417130785481083, "percentage": 0.86, "elapsed_time": "0:06:50", "remaining_time": "13:06:51", "throughput": 4911.53, "total_tokens": 2017376}
{"current_steps": 350, "total_steps": 40000, "loss": 1.0456, "lr": 0.2999436537927494, "epoch": 0.04481147173676461, "percentage": 0.88, "elapsed_time": "0:06:53", "remaining_time": "13:01:10", "throughput": 4946.8, "total_tokens": 2046656}
{"current_steps": 355, "total_steps": 40000, "loss": 0.9938, "lr": 0.299942027827774, "epoch": 0.04545163561871839, "percentage": 0.89, "elapsed_time": "0:06:56", "remaining_time": "12:55:29", "throughput": 4980.23, "total_tokens": 2075008}
{"current_steps": 360, "total_steps": 40000, "loss": 1.1275, "lr": 0.29994037873985363, "epoch": 0.04609179950067217, "percentage": 0.9, "elapsed_time": "0:06:59", "remaining_time": "12:49:56", "throughput": 5011.89, "total_tokens": 2102688}
{"current_steps": 365, "total_steps": 40000, "loss": 1.0646, "lr": 0.29993870652924254, "epoch": 0.04673196338262595, "percentage": 0.91, "elapsed_time": "0:07:02", "remaining_time": "12:44:41", "throughput": 5046.58, "total_tokens": 2132288}
{"current_steps": 370, "total_steps": 40000, "loss": 1.0621, "lr": 0.29993701119619876, "epoch": 0.04737212726457973, "percentage": 0.92, "elapsed_time": "0:07:05", "remaining_time": "12:39:33", "throughput": 5079.68, "total_tokens": 2161376}
{"current_steps": 375, "total_steps": 40000, "loss": 1.0224, "lr": 0.2999352927409835, "epoch": 0.04801229114653351, "percentage": 0.94, "elapsed_time": "0:07:08", "remaining_time": "12:34:28", "throughput": 5111.08, "total_tokens": 2189632}
{"current_steps": 380, "total_steps": 40000, "loss": 1.167, "lr": 0.29993355116386194, "epoch": 0.04865245502848729, "percentage": 0.95, "elapsed_time": "0:07:11", "remaining_time": "12:29:30", "throughput": 5141.94, "total_tokens": 2217792}
{"current_steps": 385, "total_steps": 40000, "loss": 1.1689, "lr": 0.29993178646510266, "epoch": 0.04929261891044107, "percentage": 0.96, "elapsed_time": "0:07:14", "remaining_time": "12:24:43", "throughput": 5173.71, "total_tokens": 2246720}
{"current_steps": 390, "total_steps": 40000, "loss": 1.1936, "lr": 0.2999299986449777, "epoch": 0.04993278279239485, "percentage": 0.97, "elapsed_time": "0:07:17", "remaining_time": "12:20:05", "throughput": 5205.2, "total_tokens": 2275776}
{"current_steps": 395, "total_steps": 40000, "loss": 1.0139, "lr": 0.29992818770376284, "epoch": 0.05057294667434863, "percentage": 0.99, "elapsed_time": "0:07:20", "remaining_time": "12:15:35", "throughput": 5236.82, "total_tokens": 2305152}
{"current_steps": 400, "total_steps": 40000, "loss": 1.0969, "lr": 0.29992635364173725, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:07:23", "remaining_time": "12:11:14", "throughput": 5269.11, "total_tokens": 2335136}
{"current_steps": 400, "total_steps": 40000, "eval_loss": 1.0986460447311401, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:10:44", "remaining_time": "17:42:45", "throughput": 3625.42, "total_tokens": 2335136}
{"current_steps": 405, "total_steps": 40000, "loss": 1.2408, "lr": 0.2999244964591839, "epoch": 0.05185327443825619, "percentage": 1.01, "elapsed_time": "0:10:48", "remaining_time": "17:37:22", "throughput": 3643.65, "total_tokens": 2364448}
{"current_steps": 410, "total_steps": 40000, "loss": 1.1477, "lr": 0.2999226161563891, "epoch": 0.05249343832020997, "percentage": 1.03, "elapsed_time": "0:10:51", "remaining_time": "17:29:08", "throughput": 3672.09, "total_tokens": 2393856}
{"current_steps": 415, "total_steps": 40000, "loss": 1.0423, "lr": 0.2999207127336429, "epoch": 0.05313360220216375, "percentage": 1.04, "elapsed_time": "0:10:54", "remaining_time": "17:21:05", "throughput": 3699.84, "total_tokens": 2422912}
{"current_steps": 420, "total_steps": 40000, "loss": 1.0272, "lr": 0.2999187861912387, "epoch": 0.05377376608411753, "percentage": 1.05, "elapsed_time": "0:10:57", "remaining_time": "17:13:14", "throughput": 3728.01, "total_tokens": 2452480}
{"current_steps": 425, "total_steps": 40000, "loss": 1.1012, "lr": 0.2999168365294737, "epoch": 0.05441392996607131, "percentage": 1.06, "elapsed_time": "0:11:00", "remaining_time": "17:05:46", "throughput": 3757.38, "total_tokens": 2483456}
{"current_steps": 430, "total_steps": 40000, "loss": 1.2242, "lr": 0.29991486374864856, "epoch": 0.05505409384802509, "percentage": 1.07, "elapsed_time": "0:11:04", "remaining_time": "16:58:25", "throughput": 3786.74, "total_tokens": 2514464}
{"current_steps": 435, "total_steps": 40000, "loss": 1.0773, "lr": 0.29991286784906745, "epoch": 0.05569425772997887, "percentage": 1.09, "elapsed_time": "0:11:07", "remaining_time": "16:51:37", "throughput": 3812.5, "total_tokens": 2544224}
{"current_steps": 440, "total_steps": 40000, "loss": 1.1898, "lr": 0.2999108488310382, "epoch": 0.05633442161193265, "percentage": 1.1, "elapsed_time": "0:11:10", "remaining_time": "16:44:29", "throughput": 3839.28, "total_tokens": 2573600}
{"current_steps": 445, "total_steps": 40000, "loss": 1.1474, "lr": 0.29990880669487213, "epoch": 0.05697458549388643, "percentage": 1.11, "elapsed_time": "0:11:13", "remaining_time": "16:37:29", "throughput": 3866.02, "total_tokens": 2603040}
{"current_steps": 450, "total_steps": 40000, "loss": 1.0324, "lr": 0.29990674144088425, "epoch": 0.05761474937584021, "percentage": 1.12, "elapsed_time": "0:11:16", "remaining_time": "16:30:33", "throughput": 3891.31, "total_tokens": 2631456}
{"current_steps": 455, "total_steps": 40000, "loss": 1.0619, "lr": 0.299904653069393, "epoch": 0.05825491325779399, "percentage": 1.14, "elapsed_time": "0:11:19", "remaining_time": "16:23:48", "throughput": 3917.24, "total_tokens": 2660512}
{"current_steps": 460, "total_steps": 40000, "loss": 0.9969, "lr": 0.29990254158072044, "epoch": 0.05889507713974777, "percentage": 1.15, "elapsed_time": "0:11:22", "remaining_time": "16:17:14", "throughput": 3942.63, "total_tokens": 2689440}
{"current_steps": 465, "total_steps": 40000, "loss": 1.0444, "lr": 0.2999004069751921, "epoch": 0.05953524102170155, "percentage": 1.16, "elapsed_time": "0:11:25", "remaining_time": "16:10:48", "throughput": 3968.19, "total_tokens": 2718624}
{"current_steps": 470, "total_steps": 40000, "loss": 1.0846, "lr": 0.2998982492531373, "epoch": 0.06017540490365533, "percentage": 1.18, "elapsed_time": "0:11:28", "remaining_time": "16:04:31", "throughput": 3993.58, "total_tokens": 2747872}
{"current_steps": 475, "total_steps": 40000, "loss": 1.1216, "lr": 0.2998960684148887, "epoch": 0.06081556878560911, "percentage": 1.19, "elapsed_time": "0:11:31", "remaining_time": "15:58:19", "throughput": 4017.94, "total_tokens": 2776448}
{"current_steps": 480, "total_steps": 40000, "loss": 1.0923, "lr": 0.29989386446078264, "epoch": 0.06145573266756289, "percentage": 1.2, "elapsed_time": "0:11:33", "remaining_time": "15:52:17", "throughput": 4042.63, "total_tokens": 2805504}
{"current_steps": 485, "total_steps": 40000, "loss": 1.0878, "lr": 0.299891637391159, "epoch": 0.06209589654951667, "percentage": 1.21, "elapsed_time": "0:11:36", "remaining_time": "15:46:25", "throughput": 4067.6, "total_tokens": 2835008}
{"current_steps": 490, "total_steps": 40000, "loss": 0.9946, "lr": 0.2998893872063612, "epoch": 0.06273606043147045, "percentage": 1.23, "elapsed_time": "0:11:39", "remaining_time": "15:40:39", "throughput": 4092.4, "total_tokens": 2864512}
{"current_steps": 495, "total_steps": 40000, "loss": 1.0689, "lr": 0.2998871139067363, "epoch": 0.06337622431342424, "percentage": 1.24, "elapsed_time": "0:11:43", "remaining_time": "15:35:12", "throughput": 4118.65, "total_tokens": 2895776}
{"current_steps": 500, "total_steps": 40000, "loss": 1.1296, "lr": 0.2998848174926348, "epoch": 0.06401638819537801, "percentage": 1.25, "elapsed_time": "0:11:46", "remaining_time": "15:29:40", "throughput": 4143.03, "total_tokens": 2925344}
{"current_steps": 505, "total_steps": 40000, "loss": 0.8825, "lr": 0.2998824979644109, "epoch": 0.0646565520773318, "percentage": 1.26, "elapsed_time": "0:11:48", "remaining_time": "15:24:09", "throughput": 4165.76, "total_tokens": 2953504}
{"current_steps": 510, "total_steps": 40000, "loss": 1.1476, "lr": 0.29988015532242224, "epoch": 0.06529671595928557, "percentage": 1.27, "elapsed_time": "0:11:51", "remaining_time": "15:18:46", "throughput": 4189.1, "total_tokens": 2982400}
{"current_steps": 515, "total_steps": 40000, "loss": 1.1222, "lr": 0.29987778956703015, "epoch": 0.06593687984123936, "percentage": 1.29, "elapsed_time": "0:11:54", "remaining_time": "15:13:37", "throughput": 4213.68, "total_tokens": 3012704}
{"current_steps": 520, "total_steps": 40000, "loss": 1.0441, "lr": 0.2998754006985994, "epoch": 0.06657704372319313, "percentage": 1.3, "elapsed_time": "0:11:57", "remaining_time": "15:08:27", "throughput": 4236.69, "total_tokens": 3041664}
{"current_steps": 525, "total_steps": 40000, "loss": 0.9971, "lr": 0.29987298871749846, "epoch": 0.06721720760514692, "percentage": 1.31, "elapsed_time": "0:12:00", "remaining_time": "15:03:25", "throughput": 4259.56, "total_tokens": 3070752}
{"current_steps": 530, "total_steps": 40000, "loss": 1.068, "lr": 0.2998705536240992, "epoch": 0.06785737148710069, "percentage": 1.32, "elapsed_time": "0:12:03", "remaining_time": "14:58:31", "throughput": 4283.13, "total_tokens": 3100640}
{"current_steps": 535, "total_steps": 40000, "loss": 1.1588, "lr": 0.2998680954187772, "epoch": 0.06849753536905448, "percentage": 1.34, "elapsed_time": "0:12:06", "remaining_time": "14:53:41", "throughput": 4306.31, "total_tokens": 3130304}
{"current_steps": 540, "total_steps": 40000, "loss": 0.9421, "lr": 0.2998656141019115, "epoch": 0.06913769925100825, "percentage": 1.35, "elapsed_time": "0:12:09", "remaining_time": "14:48:51", "throughput": 4327.74, "total_tokens": 3158496}
{"current_steps": 545, "total_steps": 40000, "loss": 1.0863, "lr": 0.2998631096738848, "epoch": 0.06977786313296204, "percentage": 1.36, "elapsed_time": "0:12:12", "remaining_time": "14:44:09", "throughput": 4349.66, "total_tokens": 3187328}
{"current_steps": 550, "total_steps": 40000, "loss": 1.2508, "lr": 0.29986058213508326, "epoch": 0.07041802701491581, "percentage": 1.38, "elapsed_time": "0:12:15", "remaining_time": "14:39:31", "throughput": 4371.02, "total_tokens": 3215840}
{"current_steps": 555, "total_steps": 40000, "loss": 0.9364, "lr": 0.29985803148589674, "epoch": 0.0710581908968696, "percentage": 1.39, "elapsed_time": "0:12:18", "remaining_time": "14:35:00", "throughput": 4393.33, "total_tokens": 3245344}
{"current_steps": 560, "total_steps": 40000, "loss": 1.0526, "lr": 0.2998554577267185, "epoch": 0.07169835477882337, "percentage": 1.4, "elapsed_time": "0:12:21", "remaining_time": "14:30:33", "throughput": 4415.24, "total_tokens": 3274592}
{"current_steps": 565, "total_steps": 40000, "loss": 0.9364, "lr": 0.2998528608579455, "epoch": 0.07233851866077716, "percentage": 1.41, "elapsed_time": "0:12:24", "remaining_time": "14:26:09", "throughput": 4436.22, "total_tokens": 3303136}
{"current_steps": 570, "total_steps": 40000, "loss": 1.0474, "lr": 0.2998502408799781, "epoch": 0.07297868254273093, "percentage": 1.43, "elapsed_time": "0:12:27", "remaining_time": "14:21:53", "throughput": 4458.06, "total_tokens": 3332704}
{"current_steps": 575, "total_steps": 40000, "loss": 1.0375, "lr": 0.2998475977932205, "epoch": 0.07361884642468472, "percentage": 1.44, "elapsed_time": "0:12:30", "remaining_time": "14:17:43", "throughput": 4480.39, "total_tokens": 3362880}
{"current_steps": 580, "total_steps": 40000, "loss": 1.0328, "lr": 0.29984493159808023, "epoch": 0.07425901030663849, "percentage": 1.45, "elapsed_time": "0:12:33", "remaining_time": "14:13:38", "throughput": 4502.63, "total_tokens": 3393184}
{"current_steps": 585, "total_steps": 40000, "loss": 0.9672, "lr": 0.29984224229496836, "epoch": 0.07489917418859228, "percentage": 1.46, "elapsed_time": "0:12:36", "remaining_time": "14:09:31", "throughput": 4522.85, "total_tokens": 3421664}
{"current_steps": 590, "total_steps": 40000, "loss": 0.9533, "lr": 0.2998395298842998, "epoch": 0.07553933807054607, "percentage": 1.47, "elapsed_time": "0:12:39", "remaining_time": "14:05:31", "throughput": 4543.44, "total_tokens": 3450688}
{"current_steps": 595, "total_steps": 40000, "loss": 1.0442, "lr": 0.29983679436649263, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:12:42", "remaining_time": "14:01:45", "throughput": 4565.78, "total_tokens": 3481952}
{"current_steps": 600, "total_steps": 40000, "loss": 1.1743, "lr": 0.2998340357419689, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:12:45", "remaining_time": "13:57:57", "throughput": 4586.59, "total_tokens": 3511712}
{"current_steps": 600, "total_steps": 40000, "eval_loss": 1.051778793334961, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:06", "remaining_time": "17:37:35", "throughput": 3634.09, "total_tokens": 3511712}
{"current_steps": 605, "total_steps": 40000, "loss": 1.0642, "lr": 0.29983125401115385, "epoch": 0.0774598297164074, "percentage": 1.51, "elapsed_time": "0:16:10", "remaining_time": "17:33:30", "throughput": 3647.02, "total_tokens": 3540288}
{"current_steps": 610, "total_steps": 40000, "loss": 0.933, "lr": 0.29982844917447654, "epoch": 0.07809999359836119, "percentage": 1.52, "elapsed_time": "0:16:13", "remaining_time": "17:27:57", "throughput": 3666.17, "total_tokens": 3569856}
{"current_steps": 615, "total_steps": 40000, "loss": 0.991, "lr": 0.2998256212323695, "epoch": 0.07874015748031496, "percentage": 1.54, "elapsed_time": "0:16:16", "remaining_time": "17:22:27", "throughput": 3684.64, "total_tokens": 3598720}
{"current_steps": 620, "total_steps": 40000, "loss": 1.0314, "lr": 0.29982277018526887, "epoch": 0.07938032136226875, "percentage": 1.55, "elapsed_time": "0:16:19", "remaining_time": "17:17:02", "throughput": 3703.33, "total_tokens": 3627936}
{"current_steps": 625, "total_steps": 40000, "loss": 1.0803, "lr": 0.2998198960336143, "epoch": 0.08002048524422252, "percentage": 1.56, "elapsed_time": "0:16:22", "remaining_time": "17:11:45", "throughput": 3722.2, "total_tokens": 3657536}
{"current_steps": 630, "total_steps": 40000, "loss": 0.9903, "lr": 0.299816998777849, "epoch": 0.0806606491261763, "percentage": 1.57, "elapsed_time": "0:16:25", "remaining_time": "17:06:29", "throughput": 3740.22, "total_tokens": 3686176}
{"current_steps": 635, "total_steps": 40000, "loss": 0.9363, "lr": 0.2998140784184197, "epoch": 0.08130081300813008, "percentage": 1.59, "elapsed_time": "0:16:28", "remaining_time": "17:01:25", "throughput": 3759.08, "total_tokens": 3716224}
{"current_steps": 640, "total_steps": 40000, "loss": 1.1208, "lr": 0.2998111349557769, "epoch": 0.08194097689008387, "percentage": 1.6, "elapsed_time": "0:16:31", "remaining_time": "16:56:19", "throughput": 3776.63, "total_tokens": 3744672}
{"current_steps": 645, "total_steps": 40000, "loss": 0.978, "lr": 0.29980816839037444, "epoch": 0.08258114077203764, "percentage": 1.61, "elapsed_time": "0:16:34", "remaining_time": "16:51:26", "throughput": 3795.67, "total_tokens": 3775232}
{"current_steps": 650, "total_steps": 40000, "loss": 1.035, "lr": 0.2998051787226698, "epoch": 0.08322130465399143, "percentage": 1.62, "elapsed_time": "0:16:37", "remaining_time": "16:46:32", "throughput": 3813.82, "total_tokens": 3804608}
{"current_steps": 655, "total_steps": 40000, "loss": 0.9583, "lr": 0.29980216595312403, "epoch": 0.0838614685359452, "percentage": 1.64, "elapsed_time": "0:16:40", "remaining_time": "16:41:46", "throughput": 3832.89, "total_tokens": 3835296}
{"current_steps": 660, "total_steps": 40000, "loss": 0.9998, "lr": 0.29979913008220177, "epoch": 0.08450163241789899, "percentage": 1.65, "elapsed_time": "0:16:43", "remaining_time": "16:37:01", "throughput": 3851.27, "total_tokens": 3865184}
{"current_steps": 665, "total_steps": 40000, "loss": 0.9844, "lr": 0.2997960711103711, "epoch": 0.08514179629985276, "percentage": 1.66, "elapsed_time": "0:16:46", "remaining_time": "16:32:22", "throughput": 3869.27, "total_tokens": 3894912}
{"current_steps": 670, "total_steps": 40000, "loss": 0.9351, "lr": 0.29979298903810386, "epoch": 0.08578196018180655, "percentage": 1.68, "elapsed_time": "0:16:49", "remaining_time": "16:27:43", "throughput": 3886.63, "total_tokens": 3923840}
{"current_steps": 675, "total_steps": 40000, "loss": 1.1098, "lr": 0.29978988386587524, "epoch": 0.08642212406376032, "percentage": 1.69, "elapsed_time": "0:16:52", "remaining_time": "16:23:10", "throughput": 3904.02, "total_tokens": 3952992}
{"current_steps": 680, "total_steps": 40000, "loss": 0.8682, "lr": 0.2997867555941642, "epoch": 0.0870622879457141, "percentage": 1.7, "elapsed_time": "0:16:55", "remaining_time": "16:18:40", "throughput": 3921.49, "total_tokens": 3982336}
{"current_steps": 685, "total_steps": 40000, "loss": 0.944, "lr": 0.299783604223453, "epoch": 0.08770245182766788, "percentage": 1.71, "elapsed_time": "0:16:58", "remaining_time": "16:14:18", "throughput": 3939.37, "total_tokens": 4012416}
{"current_steps": 690, "total_steps": 40000, "loss": 0.8365, "lr": 0.29978042975422786, "epoch": 0.08834261570962167, "percentage": 1.73, "elapsed_time": "0:17:01", "remaining_time": "16:09:54", "throughput": 3955.9, "total_tokens": 4040864}
{"current_steps": 695, "total_steps": 40000, "loss": 0.8845, "lr": 0.29977723218697816, "epoch": 0.08898277959157544, "percentage": 1.74, "elapsed_time": "0:17:04", "remaining_time": "16:05:38", "throughput": 3972.96, "total_tokens": 4070208}
{"current_steps": 700, "total_steps": 40000, "loss": 1.1288, "lr": 0.299774011522197, "epoch": 0.08962294347352923, "percentage": 1.75, "elapsed_time": "0:17:07", "remaining_time": "16:01:26", "throughput": 3990.5, "total_tokens": 4100224}
{"current_steps": 705, "total_steps": 40000, "loss": 0.9551, "lr": 0.29977076776038114, "epoch": 0.090263107355483, "percentage": 1.76, "elapsed_time": "0:17:10", "remaining_time": "15:57:13", "throughput": 4006.74, "total_tokens": 4128640}
{"current_steps": 710, "total_steps": 40000, "loss": 1.0232, "lr": 0.2997675009020307, "epoch": 0.09090327123743679, "percentage": 1.77, "elapsed_time": "0:17:13", "remaining_time": "15:53:06", "throughput": 4023.48, "total_tokens": 4157888}
{"current_steps": 715, "total_steps": 40000, "loss": 0.883, "lr": 0.2997642109476496, "epoch": 0.09154343511939056, "percentage": 1.79, "elapsed_time": "0:17:16", "remaining_time": "15:49:02", "throughput": 4039.69, "total_tokens": 4186592}
{"current_steps": 720, "total_steps": 40000, "loss": 0.9275, "lr": 0.299760897897745, "epoch": 0.09218359900134435, "percentage": 1.8, "elapsed_time": "0:17:19", "remaining_time": "15:45:00", "throughput": 4056.21, "total_tokens": 4215712}
{"current_steps": 725, "total_steps": 40000, "loss": 0.971, "lr": 0.29975756175282803, "epoch": 0.09282376288329812, "percentage": 1.81, "elapsed_time": "0:17:22", "remaining_time": "15:41:02", "throughput": 4072.57, "total_tokens": 4244736}
{"current_steps": 730, "total_steps": 40000, "loss": 0.8922, "lr": 0.29975420251341306, "epoch": 0.0934639267652519, "percentage": 1.82, "elapsed_time": "0:17:25", "remaining_time": "15:37:09", "throughput": 4088.96, "total_tokens": 4274048}
{"current_steps": 735, "total_steps": 40000, "loss": 0.9909, "lr": 0.29975082018001814, "epoch": 0.09410409064720568, "percentage": 1.84, "elapsed_time": "0:17:28", "remaining_time": "15:33:15", "throughput": 4104.51, "total_tokens": 4302272}
{"current_steps": 740, "total_steps": 40000, "loss": 0.8478, "lr": 0.2997474147531648, "epoch": 0.09474425452915947, "percentage": 1.85, "elapsed_time": "0:17:31", "remaining_time": "15:29:27", "throughput": 4120.53, "total_tokens": 4331264}
{"current_steps": 745, "total_steps": 40000, "loss": 0.8995, "lr": 0.29974398623337833, "epoch": 0.09538441841111324, "percentage": 1.86, "elapsed_time": "0:17:34", "remaining_time": "15:25:45", "throughput": 4137.06, "total_tokens": 4361120}
{"current_steps": 750, "total_steps": 40000, "loss": 1.0509, "lr": 0.2997405346211873, "epoch": 0.09602458229306703, "percentage": 1.88, "elapsed_time": "0:17:37", "remaining_time": "15:22:05", "throughput": 4153.8, "total_tokens": 4391264}
{"current_steps": 755, "total_steps": 40000, "loss": 0.8757, "lr": 0.2997370599171241, "epoch": 0.0966647461750208, "percentage": 1.89, "elapsed_time": "0:17:40", "remaining_time": "15:18:27", "throughput": 4169.86, "total_tokens": 4420704}
{"current_steps": 760, "total_steps": 40000, "loss": 0.9997, "lr": 0.2997335621217246, "epoch": 0.09730491005697459, "percentage": 1.9, "elapsed_time": "0:17:43", "remaining_time": "15:14:50", "throughput": 4185.52, "total_tokens": 4449696}
{"current_steps": 765, "total_steps": 40000, "loss": 0.9914, "lr": 0.29973004123552816, "epoch": 0.09794507393892836, "percentage": 1.91, "elapsed_time": "0:17:46", "remaining_time": "15:11:16", "throughput": 4201.1, "total_tokens": 4478688}
{"current_steps": 770, "total_steps": 40000, "loss": 0.9542, "lr": 0.2997264972590777, "epoch": 0.09858523782088215, "percentage": 1.93, "elapsed_time": "0:17:49", "remaining_time": "15:07:44", "throughput": 4216.26, "total_tokens": 4507264}
{"current_steps": 775, "total_steps": 40000, "loss": 0.8846, "lr": 0.29972293019291973, "epoch": 0.09922540170283592, "percentage": 1.94, "elapsed_time": "0:17:51", "remaining_time": "15:04:14", "throughput": 4231.51, "total_tokens": 4536000}
{"current_steps": 780, "total_steps": 40000, "loss": 0.9268, "lr": 0.2997193400376045, "epoch": 0.0998655655847897, "percentage": 1.95, "elapsed_time": "0:17:54", "remaining_time": "15:00:51", "throughput": 4247.62, "total_tokens": 4566016}
{"current_steps": 785, "total_steps": 40000, "loss": 1.0586, "lr": 0.2997157267936854, "epoch": 0.1005057294667435, "percentage": 1.96, "elapsed_time": "0:17:57", "remaining_time": "14:57:29", "throughput": 4263.28, "total_tokens": 4595584}
{"current_steps": 790, "total_steps": 40000, "loss": 0.9325, "lr": 0.2997120904617199, "epoch": 0.10114589334869727, "percentage": 1.98, "elapsed_time": "0:18:01", "remaining_time": "14:54:14", "throughput": 4279.87, "total_tokens": 4626624}
{"current_steps": 795, "total_steps": 40000, "loss": 0.8653, "lr": 0.29970843104226863, "epoch": 0.10178605723065105, "percentage": 1.99, "elapsed_time": "0:18:04", "remaining_time": "14:50:59", "throughput": 4295.65, "total_tokens": 4656704}
{"current_steps": 800, "total_steps": 40000, "loss": 0.9377, "lr": 0.2997047485358959, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:18:07", "remaining_time": "14:47:45", "throughput": 4311.27, "total_tokens": 4686560}
{"current_steps": 800, "total_steps": 40000, "eval_loss": 0.9072315096855164, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:21:27", "remaining_time": "17:31:46", "throughput": 3638.95, "total_tokens": 4686560}