train_record_1745940980 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 200
6ee3cad verified
{"current_steps": 5, "total_steps": 40000, "loss": 9.9037, "lr": 0.29999999259779675, "epoch": 0.0012803277639075604, "percentage": 0.01, "elapsed_time": "0:00:07", "remaining_time": "15:46:51", "throughput": 8605.66, "total_tokens": 61120}
{"current_steps": 10, "total_steps": 40000, "loss": 6.8778, "lr": 0.29999996252634736, "epoch": 0.002560655527815121, "percentage": 0.03, "elapsed_time": "0:00:11", "remaining_time": "13:09:18", "throughput": 10162.68, "total_tokens": 120352}
{"current_steps": 15, "total_steps": 40000, "loss": 4.2195, "lr": 0.2999999093230187, "epoch": 0.003840983291722681, "percentage": 0.04, "elapsed_time": "0:00:16", "remaining_time": "12:18:30", "throughput": 10871.03, "total_tokens": 180704}
{"current_steps": 20, "total_steps": 40000, "loss": 3.1909, "lr": 0.299999832987819, "epoch": 0.005121311055630242, "percentage": 0.05, "elapsed_time": "0:00:21", "remaining_time": "11:51:03", "throughput": 11129.74, "total_tokens": 237536}
{"current_steps": 25, "total_steps": 40000, "loss": 2.3957, "lr": 0.29999973352076004, "epoch": 0.006401638819537802, "percentage": 0.06, "elapsed_time": "0:00:26", "remaining_time": "11:34:49", "throughput": 11430.31, "total_tokens": 298016}
{"current_steps": 30, "total_steps": 40000, "loss": 1.946, "lr": 0.2999996109218572, "epoch": 0.007681966583445362, "percentage": 0.07, "elapsed_time": "0:00:30", "remaining_time": "11:23:58", "throughput": 11571.09, "total_tokens": 356416}
{"current_steps": 35, "total_steps": 40000, "loss": 1.7394, "lr": 0.2999994651911293, "epoch": 0.008962294347352922, "percentage": 0.09, "elapsed_time": "0:00:35", "remaining_time": "11:15:32", "throughput": 11665.2, "total_tokens": 414080}
{"current_steps": 40, "total_steps": 40000, "loss": 1.643, "lr": 0.2999992963285989, "epoch": 0.010242622111260483, "percentage": 0.1, "elapsed_time": "0:00:40", "remaining_time": "11:10:41", "throughput": 11682.36, "total_tokens": 470592}
{"current_steps": 45, "total_steps": 40000, "loss": 1.5276, "lr": 0.29999910433429194, "epoch": 0.011522949875168043, "percentage": 0.11, "elapsed_time": "0:00:45", "remaining_time": "11:06:23", "throughput": 11745.49, "total_tokens": 528928}
{"current_steps": 50, "total_steps": 40000, "loss": 1.4031, "lr": 0.29999888920823814, "epoch": 0.012803277639075603, "percentage": 0.12, "elapsed_time": "0:00:49", "remaining_time": "11:02:58", "throughput": 11799.86, "total_tokens": 587456}
{"current_steps": 55, "total_steps": 40000, "loss": 1.1168, "lr": 0.29999865095047057, "epoch": 0.014083605402983163, "percentage": 0.14, "elapsed_time": "0:00:54", "remaining_time": "10:59:35", "throughput": 11834.79, "total_tokens": 644896}
{"current_steps": 60, "total_steps": 40000, "loss": 1.2793, "lr": 0.29999838956102604, "epoch": 0.015363933166890723, "percentage": 0.15, "elapsed_time": "0:00:59", "remaining_time": "10:57:15", "throughput": 11892.12, "total_tokens": 704512}
{"current_steps": 65, "total_steps": 40000, "loss": 1.2666, "lr": 0.29999810503994484, "epoch": 0.016644260930798283, "percentage": 0.16, "elapsed_time": "0:01:04", "remaining_time": "10:56:31", "throughput": 11915.53, "total_tokens": 763968}
{"current_steps": 70, "total_steps": 40000, "loss": 1.1931, "lr": 0.29999779738727084, "epoch": 0.017924588694705843, "percentage": 0.18, "elapsed_time": "0:01:08", "remaining_time": "10:54:29", "throughput": 11954.1, "total_tokens": 822944}
{"current_steps": 75, "total_steps": 40000, "loss": 1.1646, "lr": 0.29999746660305154, "epoch": 0.019204916458613407, "percentage": 0.19, "elapsed_time": "0:01:13", "remaining_time": "10:52:36", "throughput": 11988.45, "total_tokens": 881824}
{"current_steps": 80, "total_steps": 40000, "loss": 1.1098, "lr": 0.2999971126873379, "epoch": 0.020485244222520967, "percentage": 0.2, "elapsed_time": "0:01:18", "remaining_time": "10:51:19", "throughput": 12003.79, "total_tokens": 940096}
{"current_steps": 85, "total_steps": 40000, "loss": 1.1292, "lr": 0.2999967356401845, "epoch": 0.021765571986428527, "percentage": 0.21, "elapsed_time": "0:01:23", "remaining_time": "10:50:37", "throughput": 12028.68, "total_tokens": 999968}
{"current_steps": 90, "total_steps": 40000, "loss": 1.0479, "lr": 0.29999633546164944, "epoch": 0.023045899750336087, "percentage": 0.22, "elapsed_time": "0:01:27", "remaining_time": "10:49:39", "throughput": 12038.79, "total_tokens": 1058240}
{"current_steps": 95, "total_steps": 40000, "loss": 1.0844, "lr": 0.29999591215179444, "epoch": 0.024326227514243647, "percentage": 0.24, "elapsed_time": "0:01:32", "remaining_time": "10:48:55", "throughput": 12042.12, "total_tokens": 1116320}
{"current_steps": 100, "total_steps": 40000, "loss": 1.0662, "lr": 0.2999954657106849, "epoch": 0.025606555278151207, "percentage": 0.25, "elapsed_time": "0:01:37", "remaining_time": "10:48:31", "throughput": 12038.15, "total_tokens": 1173984}
{"current_steps": 105, "total_steps": 40000, "loss": 1.0343, "lr": 0.2999949961383896, "epoch": 0.026886883042058767, "percentage": 0.26, "elapsed_time": "0:01:42", "remaining_time": "10:47:19", "throughput": 12064.68, "total_tokens": 1233280}
{"current_steps": 110, "total_steps": 40000, "loss": 1.0429, "lr": 0.2999945034349809, "epoch": 0.028167210805966326, "percentage": 0.27, "elapsed_time": "0:01:46", "remaining_time": "10:46:32", "throughput": 12061.69, "total_tokens": 1290272}
{"current_steps": 115, "total_steps": 40000, "loss": 0.9369, "lr": 0.2999939876005348, "epoch": 0.029447538569873886, "percentage": 0.29, "elapsed_time": "0:01:51", "remaining_time": "10:45:37", "throughput": 12062.88, "total_tokens": 1347328}
{"current_steps": 120, "total_steps": 40000, "loss": 0.9907, "lr": 0.29999344863513094, "epoch": 0.030727866333781446, "percentage": 0.3, "elapsed_time": "0:01:56", "remaining_time": "10:45:44", "throughput": 12070.88, "total_tokens": 1407264}
{"current_steps": 125, "total_steps": 40000, "loss": 0.9726, "lr": 0.2999928865388523, "epoch": 0.032008194097689006, "percentage": 0.31, "elapsed_time": "0:02:01", "remaining_time": "10:45:13", "throughput": 12084.69, "total_tokens": 1466656}
{"current_steps": 130, "total_steps": 40000, "loss": 1.0113, "lr": 0.29999230131178567, "epoch": 0.033288521861596566, "percentage": 0.33, "elapsed_time": "0:02:06", "remaining_time": "10:44:31", "throughput": 12094.24, "total_tokens": 1524992}
{"current_steps": 135, "total_steps": 40000, "loss": 0.9509, "lr": 0.2999916929540212, "epoch": 0.034568849625504126, "percentage": 0.34, "elapsed_time": "0:02:10", "remaining_time": "10:43:36", "throughput": 12101.66, "total_tokens": 1582560}
{"current_steps": 140, "total_steps": 40000, "loss": 1.0079, "lr": 0.29999106146565285, "epoch": 0.035849177389411686, "percentage": 0.35, "elapsed_time": "0:02:15", "remaining_time": "10:43:16", "throughput": 12105.21, "total_tokens": 1640992}
{"current_steps": 145, "total_steps": 40000, "loss": 0.9191, "lr": 0.29999040684677786, "epoch": 0.037129505153319246, "percentage": 0.36, "elapsed_time": "0:02:20", "remaining_time": "10:42:41", "throughput": 12105.19, "total_tokens": 1698272}
{"current_steps": 150, "total_steps": 40000, "loss": 0.9585, "lr": 0.2999897290974972, "epoch": 0.03840983291722681, "percentage": 0.38, "elapsed_time": "0:02:25", "remaining_time": "10:42:04", "throughput": 12107.57, "total_tokens": 1755744}
{"current_steps": 155, "total_steps": 40000, "loss": 0.8734, "lr": 0.2999890282179155, "epoch": 0.03969016068113437, "percentage": 0.39, "elapsed_time": "0:02:29", "remaining_time": "10:41:25", "throughput": 12115.25, "total_tokens": 1813792}
{"current_steps": 160, "total_steps": 40000, "loss": 0.905, "lr": 0.29998830420814077, "epoch": 0.04097048844504193, "percentage": 0.4, "elapsed_time": "0:02:34", "remaining_time": "10:40:46", "throughput": 12119.56, "total_tokens": 1871328}
{"current_steps": 165, "total_steps": 40000, "loss": 0.9716, "lr": 0.2999875570682846, "epoch": 0.04225081620894949, "percentage": 0.41, "elapsed_time": "0:02:39", "remaining_time": "10:40:25", "throughput": 12127.32, "total_tokens": 1930208}
{"current_steps": 170, "total_steps": 40000, "loss": 0.8542, "lr": 0.2999867867984623, "epoch": 0.04353114397285705, "percentage": 0.43, "elapsed_time": "0:02:43", "remaining_time": "10:39:47", "throughput": 12135.99, "total_tokens": 1988384}
{"current_steps": 175, "total_steps": 40000, "loss": 0.9401, "lr": 0.29998599339879267, "epoch": 0.04481147173676461, "percentage": 0.44, "elapsed_time": "0:02:48", "remaining_time": "10:39:19", "throughput": 12141.83, "total_tokens": 2046656}
{"current_steps": 180, "total_steps": 40000, "loss": 0.8166, "lr": 0.29998517686939796, "epoch": 0.04609179950067217, "percentage": 0.45, "elapsed_time": "0:02:53", "remaining_time": "10:38:56", "throughput": 12133.76, "total_tokens": 2102688}
{"current_steps": 185, "total_steps": 40000, "loss": 0.9075, "lr": 0.29998433721040413, "epoch": 0.04737212726457973, "percentage": 0.46, "elapsed_time": "0:02:57", "remaining_time": "10:38:26", "throughput": 12143.09, "total_tokens": 2161376}
{"current_steps": 190, "total_steps": 40000, "loss": 0.8987, "lr": 0.29998347442194073, "epoch": 0.04865245502848729, "percentage": 0.47, "elapsed_time": "0:03:02", "remaining_time": "10:38:15", "throughput": 12134.22, "total_tokens": 2217792}
{"current_steps": 195, "total_steps": 40000, "loss": 0.8744, "lr": 0.2999825885041407, "epoch": 0.04993278279239485, "percentage": 0.49, "elapsed_time": "0:03:07", "remaining_time": "10:37:48", "throughput": 12139.31, "total_tokens": 2275776}
{"current_steps": 200, "total_steps": 40000, "loss": 0.8854, "lr": 0.29998167945714077, "epoch": 0.05121311055630241, "percentage": 0.5, "elapsed_time": "0:03:12", "remaining_time": "10:37:24", "throughput": 12150.63, "total_tokens": 2335136}
{"current_steps": 200, "total_steps": 40000, "eval_loss": 0.8948695659637451, "epoch": 0.05121311055630241, "percentage": 0.5, "elapsed_time": "0:06:38", "remaining_time": "22:00:15", "throughput": 5866.18, "total_tokens": 2335136}