train_cb_1757340192 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 570
875b7b6 verified
{"current_steps": 5, "total_steps": 570, "loss": 3.7311, "lr": 3.5087719298245615e-06, "epoch": 0.08771929824561403, "percentage": 0.88, "elapsed_time": "0:00:02", "remaining_time": "0:04:07", "throughput": 1619.79, "total_tokens": 3552}
{"current_steps": 10, "total_steps": 570, "loss": 3.2007, "lr": 7.894736842105263e-06, "epoch": 0.17543859649122806, "percentage": 1.75, "elapsed_time": "0:00:03", "remaining_time": "0:03:15", "throughput": 2080.8, "total_tokens": 7264}
{"current_steps": 15, "total_steps": 570, "loss": 2.1438, "lr": 1.2280701754385964e-05, "epoch": 0.2631578947368421, "percentage": 2.63, "elapsed_time": "0:00:04", "remaining_time": "0:02:54", "throughput": 2226.21, "total_tokens": 10528}
{"current_steps": 20, "total_steps": 570, "loss": 0.8229, "lr": 1.6666666666666667e-05, "epoch": 0.3508771929824561, "percentage": 3.51, "elapsed_time": "0:00:06", "remaining_time": "0:02:46", "throughput": 2427.01, "total_tokens": 14720}
{"current_steps": 25, "total_steps": 570, "loss": 0.7984, "lr": 2.105263157894737e-05, "epoch": 0.43859649122807015, "percentage": 4.39, "elapsed_time": "0:00:07", "remaining_time": "0:02:38", "throughput": 2483.56, "total_tokens": 18016}
{"current_steps": 29, "total_steps": 570, "eval_loss": 0.7324875593185425, "epoch": 0.5087719298245614, "percentage": 5.09, "elapsed_time": "0:00:08", "remaining_time": "0:02:47", "throughput": 2238.44, "total_tokens": 20064}
{"current_steps": 30, "total_steps": 570, "loss": 1.0221, "lr": 2.5438596491228074e-05, "epoch": 0.5263157894736842, "percentage": 5.26, "elapsed_time": "0:00:10", "remaining_time": "0:03:10", "throughput": 1945.18, "total_tokens": 20640}
{"current_steps": 35, "total_steps": 570, "loss": 0.4402, "lr": 2.9824561403508772e-05, "epoch": 0.6140350877192983, "percentage": 6.14, "elapsed_time": "0:00:11", "remaining_time": "0:03:02", "throughput": 2076.1, "total_tokens": 24800}
{"current_steps": 40, "total_steps": 570, "loss": 0.3711, "lr": 3.421052631578947e-05, "epoch": 0.7017543859649122, "percentage": 7.02, "elapsed_time": "0:00:13", "remaining_time": "0:02:54", "throughput": 2134.66, "total_tokens": 28064}
{"current_steps": 45, "total_steps": 570, "loss": 0.3899, "lr": 3.859649122807018e-05, "epoch": 0.7894736842105263, "percentage": 7.89, "elapsed_time": "0:00:14", "remaining_time": "0:02:46", "throughput": 2165.93, "total_tokens": 30944}
{"current_steps": 50, "total_steps": 570, "loss": 0.5316, "lr": 4.298245614035088e-05, "epoch": 0.8771929824561403, "percentage": 8.77, "elapsed_time": "0:00:15", "remaining_time": "0:02:40", "throughput": 2188.05, "total_tokens": 33664}
{"current_steps": 55, "total_steps": 570, "loss": 1.5006, "lr": 4.736842105263158e-05, "epoch": 0.9649122807017544, "percentage": 9.65, "elapsed_time": "0:00:16", "remaining_time": "0:02:34", "throughput": 2203.78, "total_tokens": 36320}
{"current_steps": 58, "total_steps": 570, "eval_loss": 0.5227728486061096, "epoch": 1.0175438596491229, "percentage": 10.18, "elapsed_time": "0:00:18", "remaining_time": "0:02:39", "throughput": 2088.53, "total_tokens": 37832}
{"current_steps": 60, "total_steps": 570, "loss": 0.349, "lr": 4.999812487773597e-05, "epoch": 1.0526315789473684, "percentage": 10.53, "elapsed_time": "0:00:20", "remaining_time": "0:02:50", "throughput": 1943.13, "total_tokens": 39080}
{"current_steps": 65, "total_steps": 570, "loss": 0.9668, "lr": 4.997703298253406e-05, "epoch": 1.1403508771929824, "percentage": 11.4, "elapsed_time": "0:00:21", "remaining_time": "0:02:45", "throughput": 1994.72, "total_tokens": 42536}
{"current_steps": 70, "total_steps": 570, "loss": 0.7583, "lr": 4.993252512887069e-05, "epoch": 1.2280701754385965, "percentage": 12.28, "elapsed_time": "0:00:22", "remaining_time": "0:02:40", "throughput": 2028.63, "total_tokens": 45608}
{"current_steps": 75, "total_steps": 570, "loss": 1.4897, "lr": 4.986464304284091e-05, "epoch": 1.3157894736842106, "percentage": 13.16, "elapsed_time": "0:00:23", "remaining_time": "0:02:36", "throughput": 2077.73, "total_tokens": 49352}
{"current_steps": 80, "total_steps": 570, "loss": 0.4684, "lr": 4.977345036387331e-05, "epoch": 1.4035087719298245, "percentage": 14.04, "elapsed_time": "0:00:24", "remaining_time": "0:02:32", "throughput": 2100.83, "total_tokens": 52328}
{"current_steps": 85, "total_steps": 570, "loss": 0.3837, "lr": 4.965903258506806e-05, "epoch": 1.4912280701754386, "percentage": 14.91, "elapsed_time": "0:00:26", "remaining_time": "0:02:29", "throughput": 2148.22, "total_tokens": 56328}
{"current_steps": 87, "total_steps": 570, "eval_loss": 0.5208507776260376, "epoch": 1.526315789473684, "percentage": 15.26, "elapsed_time": "0:00:27", "remaining_time": "0:02:32", "throughput": 2083.02, "total_tokens": 57288}
{"current_steps": 90, "total_steps": 570, "loss": 0.634, "lr": 4.952149697304716e-05, "epoch": 1.5789473684210527, "percentage": 15.79, "elapsed_time": "0:00:29", "remaining_time": "0:02:38", "throughput": 1982.73, "total_tokens": 59048}
{"current_steps": 95, "total_steps": 570, "loss": 0.2586, "lr": 4.9360972467392056e-05, "epoch": 1.6666666666666665, "percentage": 16.67, "elapsed_time": "0:00:31", "remaining_time": "0:02:35", "throughput": 2015.74, "total_tokens": 62504}
{"current_steps": 100, "total_steps": 570, "loss": 0.2382, "lr": 4.917760955976277e-05, "epoch": 1.7543859649122808, "percentage": 17.54, "elapsed_time": "0:00:32", "remaining_time": "0:02:31", "throughput": 2043.13, "total_tokens": 65832}
{"current_steps": 105, "total_steps": 570, "loss": 0.2652, "lr": 4.897158015281209e-05, "epoch": 1.8421052631578947, "percentage": 18.42, "elapsed_time": "0:00:33", "remaining_time": "0:02:27", "throughput": 2061.14, "total_tokens": 68808}
{"current_steps": 110, "total_steps": 570, "loss": 0.233, "lr": 4.874307739902689e-05, "epoch": 1.9298245614035088, "percentage": 19.3, "elapsed_time": "0:00:34", "remaining_time": "0:02:24", "throughput": 2081.02, "total_tokens": 71848}
{"current_steps": 115, "total_steps": 570, "loss": 0.2138, "lr": 4.849231551964771e-05, "epoch": 2.017543859649123, "percentage": 20.18, "elapsed_time": "0:00:35", "remaining_time": "0:02:21", "throughput": 2073.79, "total_tokens": 74040}
{"current_steps": 116, "total_steps": 570, "eval_loss": 0.23195452988147736, "epoch": 2.0350877192982457, "percentage": 20.35, "elapsed_time": "0:00:36", "remaining_time": "0:02:23", "throughput": 2026.69, "total_tokens": 74520}
{"current_steps": 120, "total_steps": 570, "loss": 0.1899, "lr": 4.821952960383649e-05, "epoch": 2.1052631578947367, "percentage": 21.05, "elapsed_time": "0:00:39", "remaining_time": "0:02:27", "throughput": 1966.78, "total_tokens": 77272}
{"current_steps": 125, "total_steps": 570, "loss": 0.2362, "lr": 4.7924975388280524e-05, "epoch": 2.192982456140351, "percentage": 21.93, "elapsed_time": "0:00:40", "remaining_time": "0:02:23", "throughput": 1984.99, "total_tokens": 80280}
{"current_steps": 130, "total_steps": 570, "loss": 0.3055, "lr": 4.760892901743944e-05, "epoch": 2.280701754385965, "percentage": 22.81, "elapsed_time": "0:00:41", "remaining_time": "0:02:20", "throughput": 2005.43, "total_tokens": 83480}
{"current_steps": 135, "total_steps": 570, "loss": 0.4557, "lr": 4.727168678465988e-05, "epoch": 2.3684210526315788, "percentage": 23.68, "elapsed_time": "0:00:42", "remaining_time": "0:02:17", "throughput": 2017.47, "total_tokens": 86232}
{"current_steps": 140, "total_steps": 570, "loss": 0.2885, "lr": 4.6913564854400595e-05, "epoch": 2.456140350877193, "percentage": 24.56, "elapsed_time": "0:00:43", "remaining_time": "0:02:15", "throughput": 2039.58, "total_tokens": 89656}
{"current_steps": 145, "total_steps": 570, "loss": 0.342, "lr": 4.6534898965828405e-05, "epoch": 2.543859649122807, "percentage": 25.44, "elapsed_time": "0:00:45", "remaining_time": "0:02:12", "throughput": 2060.52, "total_tokens": 93080}
{"current_steps": 145, "total_steps": 570, "eval_loss": 0.24903666973114014, "epoch": 2.543859649122807, "percentage": 25.44, "elapsed_time": "0:00:46", "remaining_time": "0:02:14", "throughput": 2021.96, "total_tokens": 93080}
{"current_steps": 150, "total_steps": 570, "loss": 0.2368, "lr": 4.613604411806285e-05, "epoch": 2.6315789473684212, "percentage": 26.32, "elapsed_time": "0:00:48", "remaining_time": "0:02:16", "throughput": 1980.34, "total_tokens": 96440}
{"current_steps": 155, "total_steps": 570, "loss": 0.2185, "lr": 4.5717374237364665e-05, "epoch": 2.719298245614035, "percentage": 27.19, "elapsed_time": "0:00:49", "remaining_time": "0:02:13", "throughput": 2005.94, "total_tokens": 100280}
{"current_steps": 160, "total_steps": 570, "loss": 0.4247, "lr": 4.5279281826580056e-05, "epoch": 2.807017543859649, "percentage": 28.07, "elapsed_time": "0:00:51", "remaining_time": "0:02:11", "throughput": 2022.79, "total_tokens": 103512}
{"current_steps": 165, "total_steps": 570, "loss": 0.3187, "lr": 4.482217759716946e-05, "epoch": 2.8947368421052633, "percentage": 28.95, "elapsed_time": "0:00:52", "remaining_time": "0:02:08", "throughput": 2031.53, "total_tokens": 106168}
{"current_steps": 170, "total_steps": 570, "loss": 0.231, "lr": 4.434649008416565e-05, "epoch": 2.982456140350877, "percentage": 29.82, "elapsed_time": "0:00:53", "remaining_time": "0:02:05", "throughput": 2050.15, "total_tokens": 109624}
{"current_steps": 174, "total_steps": 570, "eval_loss": 0.16980019211769104, "epoch": 3.0526315789473686, "percentage": 30.53, "elapsed_time": "0:00:55", "remaining_time": "0:02:06", "throughput": 2019.87, "total_tokens": 111928}
{"current_steps": 175, "total_steps": 570, "loss": 0.1977, "lr": 4.385266524442241e-05, "epoch": 3.0701754385964914, "percentage": 30.7, "elapsed_time": "0:00:57", "remaining_time": "0:02:09", "throughput": 1963.29, "total_tokens": 112472}
{"current_steps": 180, "total_steps": 570, "loss": 0.31, "lr": 4.334116603853007e-05, "epoch": 3.1578947368421053, "percentage": 31.58, "elapsed_time": "0:00:58", "remaining_time": "0:02:06", "throughput": 1977.39, "total_tokens": 115576}
{"current_steps": 185, "total_steps": 570, "loss": 0.3518, "lr": 4.2812471996790206e-05, "epoch": 3.245614035087719, "percentage": 32.46, "elapsed_time": "0:00:59", "remaining_time": "0:02:04", "throughput": 1993.95, "total_tokens": 119000}
{"current_steps": 190, "total_steps": 570, "loss": 0.2729, "lr": 4.226707876965611e-05, "epoch": 3.3333333333333335, "percentage": 33.33, "elapsed_time": "0:01:00", "remaining_time": "0:02:01", "throughput": 2009.74, "total_tokens": 122360}
{"current_steps": 195, "total_steps": 570, "loss": 0.2196, "lr": 4.1705497663060767e-05, "epoch": 3.4210526315789473, "percentage": 34.21, "elapsed_time": "0:01:02", "remaining_time": "0:01:59", "throughput": 2022.45, "total_tokens": 125496}
{"current_steps": 200, "total_steps": 570, "loss": 0.3391, "lr": 4.1128255159067665e-05, "epoch": 3.5087719298245617, "percentage": 35.09, "elapsed_time": "0:01:03", "remaining_time": "0:01:56", "throughput": 2036.01, "total_tokens": 128760}
{"current_steps": 203, "total_steps": 570, "eval_loss": 0.2744066119194031, "epoch": 3.56140350877193, "percentage": 35.61, "elapsed_time": "0:01:04", "remaining_time": "0:01:57", "throughput": 2021.34, "total_tokens": 131160}
{"current_steps": 205, "total_steps": 570, "loss": 0.2077, "lr": 4.053589242229412e-05, "epoch": 3.5964912280701755, "percentage": 35.96, "elapsed_time": "0:01:06", "remaining_time": "0:01:59", "throughput": 1974.07, "total_tokens": 132248}
{"current_steps": 210, "total_steps": 570, "loss": 0.2695, "lr": 3.9928964792569655e-05, "epoch": 3.6842105263157894, "percentage": 36.84, "elapsed_time": "0:01:08", "remaining_time": "0:01:56", "throughput": 1983.9, "total_tokens": 135160}
{"current_steps": 215, "total_steps": 570, "loss": 0.2965, "lr": 3.930804126430513e-05, "epoch": 3.7719298245614032, "percentage": 37.72, "elapsed_time": "0:01:09", "remaining_time": "0:01:54", "throughput": 1994.45, "total_tokens": 138200}
{"current_steps": 220, "total_steps": 570, "loss": 0.1714, "lr": 3.867370395306068e-05, "epoch": 3.8596491228070176, "percentage": 38.6, "elapsed_time": "0:01:10", "remaining_time": "0:01:52", "throughput": 2009.66, "total_tokens": 141752}
{"current_steps": 225, "total_steps": 570, "loss": 0.3448, "lr": 3.8026547549812665e-05, "epoch": 3.9473684210526314, "percentage": 39.47, "elapsed_time": "0:01:11", "remaining_time": "0:01:50", "throughput": 2031.12, "total_tokens": 146008}
{"current_steps": 230, "total_steps": 570, "loss": 0.2601, "lr": 3.736717876343106e-05, "epoch": 4.035087719298246, "percentage": 40.35, "elapsed_time": "0:01:13", "remaining_time": "0:01:48", "throughput": 2031.79, "total_tokens": 148648}
{"current_steps": 232, "total_steps": 570, "eval_loss": 0.16180922091007233, "epoch": 4.0701754385964914, "percentage": 40.7, "elapsed_time": "0:01:14", "remaining_time": "0:01:48", "throughput": 2013.99, "total_tokens": 150056}
{"current_steps": 235, "total_steps": 570, "loss": 0.2069, "lr": 3.66962157518902e-05, "epoch": 4.12280701754386, "percentage": 41.23, "elapsed_time": "0:01:16", "remaining_time": "0:01:49", "throughput": 1975.84, "total_tokens": 151656}
{"current_steps": 240, "total_steps": 570, "loss": 0.1738, "lr": 3.601428754274584e-05, "epoch": 4.2105263157894735, "percentage": 42.11, "elapsed_time": "0:01:17", "remaining_time": "0:01:47", "throughput": 1982.09, "total_tokens": 154280}
{"current_steps": 245, "total_steps": 570, "loss": 0.2579, "lr": 3.532203344342212e-05, "epoch": 4.298245614035087, "percentage": 42.98, "elapsed_time": "0:01:18", "remaining_time": "0:01:44", "throughput": 1990.28, "total_tokens": 157160}
{"current_steps": 250, "total_steps": 570, "loss": 0.227, "lr": 3.4620102441861143e-05, "epoch": 4.385964912280702, "percentage": 43.86, "elapsed_time": "0:01:20", "remaining_time": "0:01:42", "throughput": 1998.75, "total_tokens": 160072}
{"current_steps": 255, "total_steps": 570, "loss": 0.1874, "lr": 3.390915259809696e-05, "epoch": 4.473684210526316, "percentage": 44.74, "elapsed_time": "0:01:21", "remaining_time": "0:01:40", "throughput": 2013.32, "total_tokens": 163752}
{"current_steps": 260, "total_steps": 570, "loss": 0.1828, "lr": 3.318985042732461e-05, "epoch": 4.56140350877193, "percentage": 45.61, "elapsed_time": "0:01:22", "remaining_time": "0:01:38", "throughput": 2020.27, "total_tokens": 166600}
{"current_steps": 261, "total_steps": 570, "eval_loss": 0.25386178493499756, "epoch": 4.578947368421053, "percentage": 45.79, "elapsed_time": "0:01:23", "remaining_time": "0:01:38", "throughput": 2001.3, "total_tokens": 167208}
{"current_steps": 265, "total_steps": 570, "loss": 0.2042, "lr": 3.246287027504237e-05, "epoch": 4.649122807017544, "percentage": 46.49, "elapsed_time": "0:01:25", "remaining_time": "0:01:38", "throughput": 1974.85, "total_tokens": 169704}
{"current_steps": 270, "total_steps": 570, "loss": 0.1841, "lr": 3.172889368485311e-05, "epoch": 4.7368421052631575, "percentage": 47.37, "elapsed_time": "0:01:27", "remaining_time": "0:01:36", "throughput": 1985.32, "total_tokens": 173000}
{"current_steps": 275, "total_steps": 570, "loss": 0.2559, "lr": 3.0988608759517475e-05, "epoch": 4.824561403508772, "percentage": 48.25, "elapsed_time": "0:01:28", "remaining_time": "0:01:34", "throughput": 2002.05, "total_tokens": 177128}
{"current_steps": 280, "total_steps": 570, "loss": 0.2189, "lr": 3.0242709515857758e-05, "epoch": 4.912280701754386, "percentage": 49.12, "elapsed_time": "0:01:29", "remaining_time": "0:01:32", "throughput": 2013.95, "total_tokens": 180680}
{"current_steps": 285, "total_steps": 570, "loss": 0.1958, "lr": 2.949189523411747e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:01:30", "remaining_time": "0:01:30", "throughput": 2017.91, "total_tokens": 183280}
{"current_steps": 290, "total_steps": 570, "loss": 0.3148, "lr": 2.8736869802386364e-05, "epoch": 5.087719298245614, "percentage": 50.88, "elapsed_time": "0:01:32", "remaining_time": "0:01:28", "throughput": 2021.65, "total_tokens": 186160}
{"current_steps": 290, "total_steps": 570, "eval_loss": 0.25446251034736633, "epoch": 5.087719298245614, "percentage": 50.88, "elapsed_time": "0:01:32", "remaining_time": "0:01:29", "throughput": 2002.99, "total_tokens": 186160}
{"current_steps": 295, "total_steps": 570, "loss": 0.203, "lr": 2.797834105670559e-05, "epoch": 5.175438596491228, "percentage": 51.75, "elapsed_time": "0:01:35", "remaining_time": "0:01:29", "throughput": 1980.88, "total_tokens": 189584}
{"current_steps": 300, "total_steps": 570, "loss": 0.2338, "lr": 2.7217020117471793e-05, "epoch": 5.2631578947368425, "percentage": 52.63, "elapsed_time": "0:01:36", "remaining_time": "0:01:27", "throughput": 1993.39, "total_tokens": 193296}
{"current_steps": 305, "total_steps": 570, "loss": 0.2074, "lr": 2.6453620722761896e-05, "epoch": 5.350877192982456, "percentage": 53.51, "elapsed_time": "0:01:38", "remaining_time": "0:01:25", "throughput": 2007.47, "total_tokens": 197328}
{"current_steps": 310, "total_steps": 570, "loss": 0.1303, "lr": 2.5688858559204053e-05, "epoch": 5.43859649122807, "percentage": 54.39, "elapsed_time": "0:01:39", "remaining_time": "0:01:23", "throughput": 2015.81, "total_tokens": 200560}
{"current_steps": 315, "total_steps": 570, "loss": 0.1062, "lr": 2.492345059102164e-05, "epoch": 5.526315789473684, "percentage": 55.26, "elapsed_time": "0:01:40", "remaining_time": "0:01:21", "throughput": 2023.03, "total_tokens": 203632}
{"current_steps": 319, "total_steps": 570, "eval_loss": 0.21558518707752228, "epoch": 5.5964912280701755, "percentage": 55.96, "elapsed_time": "0:01:42", "remaining_time": "0:01:20", "throughput": 2011.26, "total_tokens": 206000}
{"current_steps": 320, "total_steps": 570, "loss": 0.1738, "lr": 2.4158114387879616e-05, "epoch": 5.614035087719298, "percentage": 56.14, "elapsed_time": "0:01:44", "remaining_time": "0:01:21", "throughput": 1982.05, "total_tokens": 206480}
{"current_steps": 325, "total_steps": 570, "loss": 0.2403, "lr": 2.3393567452163252e-05, "epoch": 5.701754385964913, "percentage": 57.02, "elapsed_time": "0:01:45", "remaining_time": "0:01:19", "throughput": 1987.2, "total_tokens": 209232}
{"current_steps": 330, "total_steps": 570, "loss": 0.3923, "lr": 2.2630526546319914e-05, "epoch": 5.7894736842105265, "percentage": 57.89, "elapsed_time": "0:01:46", "remaining_time": "0:01:17", "throughput": 1999.97, "total_tokens": 213168}
{"current_steps": 335, "total_steps": 570, "loss": 0.159, "lr": 2.186970702089457e-05, "epoch": 5.87719298245614, "percentage": 58.77, "elapsed_time": "0:01:47", "remaining_time": "0:01:15", "throughput": 2010.18, "total_tokens": 216752}
{"current_steps": 340, "total_steps": 570, "loss": 0.1847, "lr": 2.111182214388893e-05, "epoch": 5.964912280701754, "percentage": 59.65, "elapsed_time": "0:01:48", "remaining_time": "0:01:13", "throughput": 2015.18, "total_tokens": 219536}
{"current_steps": 345, "total_steps": 570, "loss": 0.0974, "lr": 2.0357582432072957e-05, "epoch": 6.052631578947368, "percentage": 60.53, "elapsed_time": "0:01:50", "remaining_time": "0:01:11", "throughput": 2016.87, "total_tokens": 222272}
{"current_steps": 348, "total_steps": 570, "eval_loss": 0.2205836921930313, "epoch": 6.105263157894737, "percentage": 61.05, "elapsed_time": "0:01:51", "remaining_time": "0:01:11", "throughput": 2005.06, "total_tokens": 224064}
{"current_steps": 350, "total_steps": 570, "loss": 0.3393, "lr": 1.9607694984875754e-05, "epoch": 6.140350877192983, "percentage": 61.4, "elapsed_time": "0:01:53", "remaining_time": "0:01:11", "throughput": 1982.33, "total_tokens": 225664}
{"current_steps": 355, "total_steps": 570, "loss": 0.1941, "lr": 1.8862862821480025e-05, "epoch": 6.228070175438597, "percentage": 62.28, "elapsed_time": "0:01:55", "remaining_time": "0:01:09", "throughput": 1992.74, "total_tokens": 229408}
{"current_steps": 360, "total_steps": 570, "loss": 0.3113, "lr": 1.8123784221741964e-05, "epoch": 6.315789473684211, "percentage": 63.16, "elapsed_time": "0:01:56", "remaining_time": "0:01:07", "throughput": 2001.38, "total_tokens": 232832}
{"current_steps": 365, "total_steps": 570, "loss": 0.0671, "lr": 1.73911520715541e-05, "epoch": 6.4035087719298245, "percentage": 64.04, "elapsed_time": "0:01:57", "remaining_time": "0:01:06", "throughput": 2009.67, "total_tokens": 236288}
{"current_steps": 370, "total_steps": 570, "loss": 0.1522, "lr": 1.666565321326512e-05, "epoch": 6.491228070175438, "percentage": 64.91, "elapsed_time": "0:01:58", "remaining_time": "0:01:04", "throughput": 2015.46, "total_tokens": 239296}
{"current_steps": 375, "total_steps": 570, "loss": 0.1758, "lr": 1.5947967801765345e-05, "epoch": 6.578947368421053, "percentage": 65.79, "elapsed_time": "0:01:59", "remaining_time": "0:01:02", "throughput": 2024.19, "total_tokens": 242848}
{"current_steps": 377, "total_steps": 570, "eval_loss": 0.15596222877502441, "epoch": 6.614035087719298, "percentage": 66.14, "elapsed_time": "0:02:01", "remaining_time": "0:01:02", "throughput": 2010.86, "total_tokens": 243840}
{"current_steps": 380, "total_steps": 570, "loss": 0.1513, "lr": 1.5238768666841907e-05, "epoch": 6.666666666666667, "percentage": 66.67, "elapsed_time": "0:02:03", "remaining_time": "0:01:01", "throughput": 1986.68, "total_tokens": 245344}
{"current_steps": 385, "total_steps": 570, "loss": 0.2004, "lr": 1.4538720682400969e-05, "epoch": 6.754385964912281, "percentage": 67.54, "elapsed_time": "0:02:04", "remaining_time": "0:00:59", "throughput": 1991.66, "total_tokens": 248256}
{"current_steps": 390, "total_steps": 570, "loss": 0.2411, "lr": 1.3848480143148839e-05, "epoch": 6.842105263157895, "percentage": 68.42, "elapsed_time": "0:02:05", "remaining_time": "0:00:58", "throughput": 1995.55, "total_tokens": 250976}
{"current_steps": 395, "total_steps": 570, "loss": 0.075, "lr": 1.3168694149315796e-05, "epoch": 6.9298245614035086, "percentage": 69.3, "elapsed_time": "0:02:07", "remaining_time": "0:00:56", "throughput": 2006.27, "total_tokens": 254944}
{"current_steps": 400, "total_steps": 570, "loss": 0.0674, "lr": 1.2500000000000006e-05, "epoch": 7.017543859649122, "percentage": 70.18, "elapsed_time": "0:02:08", "remaining_time": "0:00:54", "throughput": 2008.91, "total_tokens": 257888}
{"current_steps": 405, "total_steps": 570, "loss": 0.0921, "lr": 1.1843024595699805e-05, "epoch": 7.105263157894737, "percentage": 71.05, "elapsed_time": "0:02:09", "remaining_time": "0:00:52", "throughput": 2014.98, "total_tokens": 261088}
{"current_steps": 406, "total_steps": 570, "eval_loss": 0.19613634049892426, "epoch": 7.12280701754386, "percentage": 71.23, "elapsed_time": "0:02:10", "remaining_time": "0:00:52", "throughput": 2001.76, "total_tokens": 261504}
{"current_steps": 410, "total_steps": 570, "loss": 0.0531, "lr": 1.1198383850594758e-05, "epoch": 7.192982456140351, "percentage": 71.93, "elapsed_time": "0:02:13", "remaining_time": "0:00:51", "throughput": 1982.86, "total_tokens": 263840}
{"current_steps": 415, "total_steps": 570, "loss": 0.2284, "lr": 1.0566682115126344e-05, "epoch": 7.280701754385965, "percentage": 72.81, "elapsed_time": "0:02:14", "remaining_time": "0:00:50", "throughput": 1991.22, "total_tokens": 267424}
{"current_steps": 420, "total_steps": 570, "loss": 0.3437, "lr": 9.948511609419675e-06, "epoch": 7.368421052631579, "percentage": 73.68, "elapsed_time": "0:02:15", "remaining_time": "0:00:48", "throughput": 1998.04, "total_tokens": 270720}
{"current_steps": 425, "total_steps": 570, "loss": 0.0494, "lr": 9.344451868077353e-06, "epoch": 7.456140350877193, "percentage": 74.56, "elapsed_time": "0:02:16", "remaining_time": "0:00:46", "throughput": 2002.07, "total_tokens": 273504}
{"current_steps": 430, "total_steps": 570, "loss": 0.1256, "lr": 8.755069196866014e-06, "epoch": 7.543859649122807, "percentage": 75.44, "elapsed_time": "0:02:17", "remaining_time": "0:00:44", "throughput": 2007.62, "total_tokens": 276608}
{"current_steps": 435, "total_steps": 570, "loss": 0.2705, "lr": 8.180916141804906e-06, "epoch": 7.631578947368421, "percentage": 76.32, "elapsed_time": "0:02:19", "remaining_time": "0:00:43", "throughput": 2016.43, "total_tokens": 280352}
{"current_steps": 435, "total_steps": 570, "eval_loss": 0.23887619376182556, "epoch": 7.631578947368421, "percentage": 76.32, "elapsed_time": "0:02:19", "remaining_time": "0:00:43", "throughput": 2003.97, "total_tokens": 280352}
{"current_steps": 440, "total_steps": 570, "loss": 0.2732, "lr": 7.622530971154199e-06, "epoch": 7.719298245614035, "percentage": 77.19, "elapsed_time": "0:02:22", "remaining_time": "0:00:42", "throughput": 1988.7, "total_tokens": 283808}
{"current_steps": 445, "total_steps": 570, "loss": 0.1001, "lr": 7.080437170788723e-06, "epoch": 7.807017543859649, "percentage": 78.07, "elapsed_time": "0:02:23", "remaining_time": "0:00:40", "throughput": 1993.0, "total_tokens": 286688}
{"current_steps": 450, "total_steps": 570, "loss": 0.1136, "lr": 6.555142953430158e-06, "epoch": 7.894736842105263, "percentage": 78.95, "elapsed_time": "0:02:25", "remaining_time": "0:00:38", "throughput": 2000.44, "total_tokens": 290240}
{"current_steps": 455, "total_steps": 570, "loss": 0.1173, "lr": 6.0471407821978135e-06, "epoch": 7.982456140350877, "percentage": 79.82, "elapsed_time": "0:02:26", "remaining_time": "0:00:36", "throughput": 2006.78, "total_tokens": 293568}
{"current_steps": 460, "total_steps": 570, "loss": 0.0502, "lr": 5.556906908924655e-06, "epoch": 8.070175438596491, "percentage": 80.7, "elapsed_time": "0:02:27", "remaining_time": "0:00:35", "throughput": 2009.93, "total_tokens": 296720}
{"current_steps": 464, "total_steps": 570, "eval_loss": 0.16461919248104095, "epoch": 8.140350877192983, "percentage": 81.4, "elapsed_time": "0:02:29", "remaining_time": "0:00:34", "throughput": 2002.99, "total_tokens": 299344}
{"current_steps": 465, "total_steps": 570, "loss": 0.3081, "lr": 5.084900927671393e-06, "epoch": 8.157894736842104, "percentage": 81.58, "elapsed_time": "0:02:31", "remaining_time": "0:00:34", "throughput": 1982.72, "total_tokens": 300112}
{"current_steps": 470, "total_steps": 570, "loss": 0.0428, "lr": 4.631565343857239e-06, "epoch": 8.24561403508772, "percentage": 82.46, "elapsed_time": "0:02:32", "remaining_time": "0:00:32", "throughput": 1990.06, "total_tokens": 303664}
{"current_steps": 475, "total_steps": 570, "loss": 0.0843, "lr": 4.19732515941125e-06, "epoch": 8.333333333333334, "percentage": 83.33, "elapsed_time": "0:02:33", "remaining_time": "0:00:30", "throughput": 1993.77, "total_tokens": 306480}
{"current_steps": 480, "total_steps": 570, "loss": 0.1734, "lr": 3.7825874743331907e-06, "epoch": 8.421052631578947, "percentage": 84.21, "elapsed_time": "0:02:34", "remaining_time": "0:00:29", "throughput": 1999.39, "total_tokens": 309744}
{"current_steps": 485, "total_steps": 570, "loss": 0.1019, "lr": 3.3877411050374424e-06, "epoch": 8.508771929824562, "percentage": 85.09, "elapsed_time": "0:02:36", "remaining_time": "0:00:27", "throughput": 2006.79, "total_tokens": 313360}
{"current_steps": 490, "total_steps": 570, "loss": 0.1646, "lr": 3.013156219837776e-06, "epoch": 8.596491228070175, "percentage": 85.96, "elapsed_time": "0:02:37", "remaining_time": "0:00:25", "throughput": 2013.11, "total_tokens": 316784}
{"current_steps": 493, "total_steps": 570, "eval_loss": 0.2208017259836197, "epoch": 8.649122807017545, "percentage": 86.49, "elapsed_time": "0:02:38", "remaining_time": "0:00:24", "throughput": 2005.37, "total_tokens": 318672}
{"current_steps": 495, "total_steps": 570, "loss": 0.0134, "lr": 2.659183991914696e-06, "epoch": 8.68421052631579, "percentage": 86.84, "elapsed_time": "0:02:40", "remaining_time": "0:00:24", "throughput": 1987.0, "total_tokens": 319760}
{"current_steps": 500, "total_steps": 570, "loss": 0.0183, "lr": 2.326156270090735e-06, "epoch": 8.771929824561404, "percentage": 87.72, "elapsed_time": "0:02:42", "remaining_time": "0:00:22", "throughput": 1990.25, "total_tokens": 322512}
{"current_steps": 505, "total_steps": 570, "loss": 0.163, "lr": 2.0143852677223075e-06, "epoch": 8.859649122807017, "percentage": 88.6, "elapsed_time": "0:02:43", "remaining_time": "0:00:21", "throughput": 1996.41, "total_tokens": 325936}
{"current_steps": 510, "total_steps": 570, "loss": 0.234, "lr": 1.7241632699998123e-06, "epoch": 8.947368421052632, "percentage": 89.47, "elapsed_time": "0:02:44", "remaining_time": "0:00:19", "throughput": 2002.26, "total_tokens": 329360}
{"current_steps": 515, "total_steps": 570, "loss": 0.1047, "lr": 1.4557623599303903e-06, "epoch": 9.035087719298245, "percentage": 90.35, "elapsed_time": "0:02:45", "remaining_time": "0:00:17", "throughput": 2003.95, "total_tokens": 332168}
{"current_steps": 520, "total_steps": 570, "loss": 0.1549, "lr": 1.2094341632602064e-06, "epoch": 9.12280701754386, "percentage": 91.23, "elapsed_time": "0:02:47", "remaining_time": "0:00:16", "throughput": 2012.49, "total_tokens": 336296}
{"current_steps": 522, "total_steps": 570, "eval_loss": 0.19831164181232452, "epoch": 9.157894736842104, "percentage": 91.58, "elapsed_time": "0:02:48", "remaining_time": "0:00:15", "throughput": 2003.9, "total_tokens": 337480}
{"current_steps": 525, "total_steps": 570, "loss": 0.1365, "lr": 9.85409612575411e-07, "epoch": 9.210526315789474, "percentage": 92.11, "elapsed_time": "0:02:50", "remaining_time": "0:00:14", "throughput": 1984.36, "total_tokens": 339080}
{"current_steps": 530, "total_steps": 570, "loss": 0.0859, "lr": 7.838987308029427e-07, "epoch": 9.298245614035087, "percentage": 92.98, "elapsed_time": "0:02:52", "remaining_time": "0:00:12", "throughput": 1990.48, "total_tokens": 342568}
{"current_steps": 535, "total_steps": 570, "loss": 0.067, "lr": 6.050904343141095e-07, "epoch": 9.385964912280702, "percentage": 93.86, "elapsed_time": "0:02:53", "remaining_time": "0:00:11", "throughput": 1994.59, "total_tokens": 345576}
{"current_steps": 540, "total_steps": 570, "loss": 0.1141, "lr": 4.491523558155714e-07, "epoch": 9.473684210526315, "percentage": 94.74, "elapsed_time": "0:02:54", "remaining_time": "0:00:09", "throughput": 1997.77, "total_tokens": 348392}
{"current_steps": 545, "total_steps": 570, "loss": 0.0688, "lr": 3.162306871937387e-07, "epoch": 9.56140350877193, "percentage": 95.61, "elapsed_time": "0:02:55", "remaining_time": "0:00:08", "throughput": 2003.85, "total_tokens": 351912}
{"current_steps": 550, "total_steps": 570, "loss": 0.072, "lr": 2.064500424599436e-07, "epoch": 9.649122807017545, "percentage": 96.49, "elapsed_time": "0:02:56", "remaining_time": "0:00:06", "throughput": 2009.85, "total_tokens": 355432}
{"current_steps": 551, "total_steps": 570, "eval_loss": 0.2247486412525177, "epoch": 9.666666666666666, "percentage": 96.67, "elapsed_time": "0:02:58", "remaining_time": "0:00:06", "throughput": 2002.54, "total_tokens": 356456}
{"current_steps": 555, "total_steps": 570, "loss": 0.0824, "lr": 1.1991334092484318e-07, "epoch": 9.736842105263158, "percentage": 97.37, "elapsed_time": "0:03:00", "remaining_time": "0:00:04", "throughput": 1987.95, "total_tokens": 358760}
{"current_steps": 560, "total_steps": 570, "loss": 0.141, "lr": 5.6701710711626334e-08, "epoch": 9.824561403508772, "percentage": 98.25, "elapsed_time": "0:03:01", "remaining_time": "0:00:03", "throughput": 1993.13, "total_tokens": 362088}
{"current_steps": 565, "total_steps": 570, "loss": 0.0604, "lr": 1.6874412698408836e-08, "epoch": 9.912280701754385, "percentage": 99.12, "elapsed_time": "0:03:02", "remaining_time": "0:00:01", "throughput": 1998.68, "total_tokens": 365544}
{"current_steps": 570, "total_steps": 570, "loss": 0.3128, "lr": 4.687849611939576e-10, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:03", "remaining_time": "0:00:00", "throughput": 1999.52, "total_tokens": 367864}
{"current_steps": 570, "total_steps": 570, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:05", "remaining_time": "0:00:00", "throughput": 1982.29, "total_tokens": 367864}