| {"current_steps": 5, "total_steps": 2220, "loss": 10.8712, "lr": 1.801801801801802e-07, "epoch": 0.04504504504504504, "percentage": 0.23, "elapsed_time": "0:00:01", "remaining_time": "0:07:25", "throughput": 1907.67, "total_tokens": 1920} |
| {"current_steps": 10, "total_steps": 2220, "loss": 10.7996, "lr": 4.0540540540540546e-07, "epoch": 0.09009009009009009, "percentage": 0.45, "elapsed_time": "0:00:01", "remaining_time": "0:05:54", "throughput": 2773.08, "total_tokens": 4448} |
| {"current_steps": 15, "total_steps": 2220, "loss": 10.1949, "lr": 6.306306306306306e-07, "epoch": 0.13513513513513514, "percentage": 0.68, "elapsed_time": "0:00:02", "remaining_time": "0:05:12", "throughput": 3087.55, "total_tokens": 6560} |
| {"current_steps": 20, "total_steps": 2220, "loss": 9.1379, "lr": 8.55855855855856e-07, "epoch": 0.18018018018018017, "percentage": 0.9, "elapsed_time": "0:00:02", "remaining_time": "0:04:45", "throughput": 3216.79, "total_tokens": 8352} |
| {"current_steps": 25, "total_steps": 2220, "loss": 8.4407, "lr": 1.0810810810810812e-06, "epoch": 0.22522522522522523, "percentage": 1.13, "elapsed_time": "0:00:03", "remaining_time": "0:04:33", "throughput": 3351.65, "total_tokens": 10432} |
| {"current_steps": 30, "total_steps": 2220, "loss": 6.8938, "lr": 1.3063063063063065e-06, "epoch": 0.2702702702702703, "percentage": 1.35, "elapsed_time": "0:00:03", "remaining_time": "0:04:23", "throughput": 3398.66, "total_tokens": 12256} |
| {"current_steps": 35, "total_steps": 2220, "loss": 5.9679, "lr": 1.5315315315315316e-06, "epoch": 0.3153153153153153, "percentage": 1.58, "elapsed_time": "0:00:04", "remaining_time": "0:04:17", "throughput": 3480.85, "total_tokens": 14336} |
| {"current_steps": 40, "total_steps": 2220, "loss": 4.8585, "lr": 1.756756756756757e-06, "epoch": 0.36036036036036034, "percentage": 1.8, "elapsed_time": "0:00:04", "remaining_time": "0:04:14", "throughput": 3508.47, "total_tokens": 16384} |
| {"current_steps": 45, "total_steps": 2220, "loss": 3.5684, "lr": 1.9819819819819822e-06, "epoch": 0.40540540540540543, "percentage": 2.03, "elapsed_time": "0:00:05", "remaining_time": "0:04:10", "throughput": 3544.13, "total_tokens": 18368} |
| {"current_steps": 50, "total_steps": 2220, "loss": 2.7918, "lr": 2.2072072072072073e-06, "epoch": 0.45045045045045046, "percentage": 2.25, "elapsed_time": "0:00:05", "remaining_time": "0:04:05", "throughput": 3562.99, "total_tokens": 20128} |
| {"current_steps": 55, "total_steps": 2220, "loss": 2.0791, "lr": 2.432432432432433e-06, "epoch": 0.4954954954954955, "percentage": 2.48, "elapsed_time": "0:00:06", "remaining_time": "0:04:00", "throughput": 3591.34, "total_tokens": 21984} |
| {"current_steps": 60, "total_steps": 2220, "loss": 1.3666, "lr": 2.657657657657658e-06, "epoch": 0.5405405405405406, "percentage": 2.7, "elapsed_time": "0:00:06", "remaining_time": "0:03:58", "throughput": 3603.29, "total_tokens": 23840} |
| {"current_steps": 65, "total_steps": 2220, "loss": 1.1674, "lr": 2.882882882882883e-06, "epoch": 0.5855855855855856, "percentage": 2.93, "elapsed_time": "0:00:07", "remaining_time": "0:03:57", "throughput": 3627.89, "total_tokens": 25984} |
| {"current_steps": 70, "total_steps": 2220, "loss": 0.7208, "lr": 3.1081081081081082e-06, "epoch": 0.6306306306306306, "percentage": 3.15, "elapsed_time": "0:00:07", "remaining_time": "0:03:55", "throughput": 3628.16, "total_tokens": 27872} |
| {"current_steps": 75, "total_steps": 2220, "loss": 0.4869, "lr": 3.3333333333333333e-06, "epoch": 0.6756756756756757, "percentage": 3.38, "elapsed_time": "0:00:08", "remaining_time": "0:03:54", "throughput": 3632.04, "total_tokens": 29824} |
| {"current_steps": 80, "total_steps": 2220, "loss": 0.3862, "lr": 3.5585585585585584e-06, "epoch": 0.7207207207207207, "percentage": 3.6, "elapsed_time": "0:00:08", "remaining_time": "0:03:52", "throughput": 3638.88, "total_tokens": 31680} |
| {"current_steps": 85, "total_steps": 2220, "loss": 0.6045, "lr": 3.7837837837837844e-06, "epoch": 0.7657657657657657, "percentage": 3.83, "elapsed_time": "0:00:09", "remaining_time": "0:03:51", "throughput": 3643.22, "total_tokens": 33632} |
| {"current_steps": 90, "total_steps": 2220, "loss": 0.4795, "lr": 4.009009009009009e-06, "epoch": 0.8108108108108109, "percentage": 4.05, "elapsed_time": "0:00:09", "remaining_time": "0:03:49", "throughput": 3659.18, "total_tokens": 35520} |
| {"current_steps": 95, "total_steps": 2220, "loss": 0.4883, "lr": 4.234234234234235e-06, "epoch": 0.8558558558558559, "percentage": 4.28, "elapsed_time": "0:00:10", "remaining_time": "0:03:49", "throughput": 3678.65, "total_tokens": 37824} |
| {"current_steps": 100, "total_steps": 2220, "loss": 0.349, "lr": 4.45945945945946e-06, "epoch": 0.9009009009009009, "percentage": 4.5, "elapsed_time": "0:00:10", "remaining_time": "0:03:48", "throughput": 3676.54, "total_tokens": 39616} |
| {"current_steps": 105, "total_steps": 2220, "loss": 0.4046, "lr": 4.684684684684685e-06, "epoch": 0.9459459459459459, "percentage": 4.73, "elapsed_time": "0:00:11", "remaining_time": "0:03:47", "throughput": 3676.76, "total_tokens": 41440} |
| {"current_steps": 110, "total_steps": 2220, "loss": 0.4068, "lr": 4.90990990990991e-06, "epoch": 0.990990990990991, "percentage": 4.95, "elapsed_time": "0:00:11", "remaining_time": "0:03:45", "throughput": 3675.28, "total_tokens": 43232} |
| {"current_steps": 115, "total_steps": 2220, "loss": 0.4102, "lr": 5.135135135135135e-06, "epoch": 1.0360360360360361, "percentage": 5.18, "elapsed_time": "0:00:12", "remaining_time": "0:03:47", "throughput": 3661.91, "total_tokens": 45440} |
| {"current_steps": 120, "total_steps": 2220, "loss": 0.3998, "lr": 5.360360360360361e-06, "epoch": 1.0810810810810811, "percentage": 5.41, "elapsed_time": "0:00:12", "remaining_time": "0:03:46", "throughput": 3667.78, "total_tokens": 47456} |
| {"current_steps": 125, "total_steps": 2220, "loss": 0.3688, "lr": 5.585585585585585e-06, "epoch": 1.1261261261261262, "percentage": 5.63, "elapsed_time": "0:00:13", "remaining_time": "0:03:45", "throughput": 3670.74, "total_tokens": 49440} |
| {"current_steps": 130, "total_steps": 2220, "loss": 0.4314, "lr": 5.810810810810811e-06, "epoch": 1.1711711711711712, "percentage": 5.86, "elapsed_time": "0:00:13", "remaining_time": "0:03:44", "throughput": 3671.53, "total_tokens": 51264} |
| {"current_steps": 135, "total_steps": 2220, "loss": 0.4353, "lr": 6.036036036036037e-06, "epoch": 1.2162162162162162, "percentage": 6.08, "elapsed_time": "0:00:14", "remaining_time": "0:03:44", "throughput": 3681.49, "total_tokens": 53408} |
| {"current_steps": 140, "total_steps": 2220, "loss": 0.4889, "lr": 6.261261261261262e-06, "epoch": 1.2612612612612613, "percentage": 6.31, "elapsed_time": "0:00:14", "remaining_time": "0:03:42", "throughput": 3681.61, "total_tokens": 55104} |
| {"current_steps": 145, "total_steps": 2220, "loss": 0.6041, "lr": 6.486486486486487e-06, "epoch": 1.3063063063063063, "percentage": 6.53, "elapsed_time": "0:00:15", "remaining_time": "0:03:41", "throughput": 3697.25, "total_tokens": 57280} |
| {"current_steps": 150, "total_steps": 2220, "loss": 0.4361, "lr": 6.711711711711713e-06, "epoch": 1.3513513513513513, "percentage": 6.76, "elapsed_time": "0:00:16", "remaining_time": "0:03:41", "throughput": 3699.59, "total_tokens": 59264} |
| {"current_steps": 155, "total_steps": 2220, "loss": 0.3774, "lr": 6.936936936936938e-06, "epoch": 1.3963963963963963, "percentage": 6.98, "elapsed_time": "0:00:16", "remaining_time": "0:03:39", "throughput": 3705.2, "total_tokens": 60992} |
| {"current_steps": 160, "total_steps": 2220, "loss": 0.323, "lr": 7.162162162162163e-06, "epoch": 1.4414414414414414, "percentage": 7.21, "elapsed_time": "0:00:16", "remaining_time": "0:03:38", "throughput": 3705.55, "total_tokens": 62944} |
| {"current_steps": 165, "total_steps": 2220, "loss": 0.3688, "lr": 7.387387387387388e-06, "epoch": 1.4864864864864864, "percentage": 7.43, "elapsed_time": "0:00:17", "remaining_time": "0:03:38", "throughput": 3715.81, "total_tokens": 65152} |
| {"current_steps": 170, "total_steps": 2220, "loss": 0.4719, "lr": 7.612612612612613e-06, "epoch": 1.5315315315315314, "percentage": 7.66, "elapsed_time": "0:00:18", "remaining_time": "0:03:37", "throughput": 3715.2, "total_tokens": 67104} |
| {"current_steps": 175, "total_steps": 2220, "loss": 0.4431, "lr": 7.837837837837838e-06, "epoch": 1.5765765765765765, "percentage": 7.88, "elapsed_time": "0:00:18", "remaining_time": "0:03:36", "throughput": 3713.14, "total_tokens": 68800} |
| {"current_steps": 180, "total_steps": 2220, "loss": 0.4052, "lr": 8.063063063063063e-06, "epoch": 1.6216216216216215, "percentage": 8.11, "elapsed_time": "0:00:18", "remaining_time": "0:03:35", "throughput": 3715.72, "total_tokens": 70592} |
| {"current_steps": 185, "total_steps": 2220, "loss": 0.5416, "lr": 8.288288288288289e-06, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:19", "remaining_time": "0:03:34", "throughput": 3722.15, "total_tokens": 72736} |
| {"current_steps": 190, "total_steps": 2220, "loss": 0.4054, "lr": 8.513513513513514e-06, "epoch": 1.7117117117117115, "percentage": 8.56, "elapsed_time": "0:00:19", "remaining_time": "0:03:33", "throughput": 3723.49, "total_tokens": 74400} |
| {"current_steps": 195, "total_steps": 2220, "loss": 0.4809, "lr": 8.738738738738739e-06, "epoch": 1.7567567567567568, "percentage": 8.78, "elapsed_time": "0:00:20", "remaining_time": "0:03:32", "throughput": 3725.18, "total_tokens": 76192} |
| {"current_steps": 200, "total_steps": 2220, "loss": 0.3069, "lr": 8.963963963963965e-06, "epoch": 1.8018018018018018, "percentage": 9.01, "elapsed_time": "0:00:21", "remaining_time": "0:03:32", "throughput": 3733.32, "total_tokens": 78400} |
| {"current_steps": 205, "total_steps": 2220, "loss": 0.442, "lr": 9.189189189189191e-06, "epoch": 1.8468468468468469, "percentage": 9.23, "elapsed_time": "0:00:21", "remaining_time": "0:03:30", "throughput": 3731.66, "total_tokens": 80096} |
| {"current_steps": 210, "total_steps": 2220, "loss": 0.3877, "lr": 9.414414414414416e-06, "epoch": 1.8918918918918919, "percentage": 9.46, "elapsed_time": "0:00:22", "remaining_time": "0:03:30", "throughput": 3736.64, "total_tokens": 82336} |
| {"current_steps": 215, "total_steps": 2220, "loss": 0.5344, "lr": 9.63963963963964e-06, "epoch": 1.936936936936937, "percentage": 9.68, "elapsed_time": "0:00:22", "remaining_time": "0:03:30", "throughput": 3742.82, "total_tokens": 84352} |
| {"current_steps": 220, "total_steps": 2220, "loss": 0.3783, "lr": 9.864864864864865e-06, "epoch": 1.981981981981982, "percentage": 9.91, "elapsed_time": "0:00:23", "remaining_time": "0:03:29", "throughput": 3747.53, "total_tokens": 86496} |
| {"current_steps": 222, "total_steps": 2220, "eval_loss": 0.7572298049926758, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:24", "remaining_time": "0:03:42", "throughput": 3528.48, "total_tokens": 87128} |
| {"current_steps": 225, "total_steps": 2220, "loss": 0.3575, "lr": 9.99997527658723e-06, "epoch": 2.027027027027027, "percentage": 10.14, "elapsed_time": "0:00:26", "remaining_time": "0:03:52", "throughput": 3359.82, "total_tokens": 88088} |
| {"current_steps": 230, "total_steps": 2220, "loss": 0.5042, "lr": 9.99969714100147e-06, "epoch": 2.0720720720720722, "percentage": 10.36, "elapsed_time": "0:00:26", "remaining_time": "0:03:51", "throughput": 3374.02, "total_tokens": 90232} |
| {"current_steps": 235, "total_steps": 2220, "loss": 0.394, "lr": 9.999109982812368e-06, "epoch": 2.1171171171171173, "percentage": 10.59, "elapsed_time": "0:00:27", "remaining_time": "0:03:49", "throughput": 3378.68, "total_tokens": 91864} |
| {"current_steps": 240, "total_steps": 2220, "loss": 0.3417, "lr": 9.99821383831115e-06, "epoch": 2.1621621621621623, "percentage": 10.81, "elapsed_time": "0:00:27", "remaining_time": "0:03:49", "throughput": 3394.31, "total_tokens": 94232} |
| {"current_steps": 245, "total_steps": 2220, "loss": 0.4322, "lr": 9.997008762886957e-06, "epoch": 2.2072072072072073, "percentage": 11.04, "elapsed_time": "0:00:28", "remaining_time": "0:03:48", "throughput": 3409.29, "total_tokens": 96536} |
| {"current_steps": 250, "total_steps": 2220, "loss": 0.4454, "lr": 9.99549483102341e-06, "epoch": 2.2522522522522523, "percentage": 11.26, "elapsed_time": "0:00:28", "remaining_time": "0:03:46", "throughput": 3416.19, "total_tokens": 98360} |
| {"current_steps": 255, "total_steps": 2220, "loss": 0.3688, "lr": 9.993672136294004e-06, "epoch": 2.2972972972972974, "percentage": 11.49, "elapsed_time": "0:00:29", "remaining_time": "0:03:45", "throughput": 3419.57, "total_tokens": 100152} |
| {"current_steps": 260, "total_steps": 2220, "loss": 0.3615, "lr": 9.991540791356342e-06, "epoch": 2.3423423423423424, "percentage": 11.71, "elapsed_time": "0:00:29", "remaining_time": "0:03:44", "throughput": 3428.84, "total_tokens": 102296} |
| {"current_steps": 265, "total_steps": 2220, "loss": 0.3533, "lr": 9.989100927945155e-06, "epoch": 2.3873873873873874, "percentage": 11.94, "elapsed_time": "0:00:30", "remaining_time": "0:03:43", "throughput": 3435.58, "total_tokens": 104024} |
| {"current_steps": 270, "total_steps": 2220, "loss": 0.3618, "lr": 9.986352696864165e-06, "epoch": 2.4324324324324325, "percentage": 12.16, "elapsed_time": "0:00:30", "remaining_time": "0:03:42", "throughput": 3442.17, "total_tokens": 105944} |
| {"current_steps": 275, "total_steps": 2220, "loss": 0.4132, "lr": 9.983296267976766e-06, "epoch": 2.4774774774774775, "percentage": 12.39, "elapsed_time": "0:00:31", "remaining_time": "0:03:42", "throughput": 3434.38, "total_tokens": 107928} |
| {"current_steps": 280, "total_steps": 2220, "loss": 0.4237, "lr": 9.979931830195524e-06, "epoch": 2.5225225225225225, "percentage": 12.61, "elapsed_time": "0:00:31", "remaining_time": "0:03:41", "throughput": 3440.29, "total_tokens": 109976} |
| {"current_steps": 285, "total_steps": 2220, "loss": 0.368, "lr": 9.976259591470496e-06, "epoch": 2.5675675675675675, "percentage": 12.84, "elapsed_time": "0:00:32", "remaining_time": "0:03:40", "throughput": 3447.21, "total_tokens": 112024} |
| {"current_steps": 290, "total_steps": 2220, "loss": 0.3599, "lr": 9.972279778776388e-06, "epoch": 2.6126126126126126, "percentage": 13.06, "elapsed_time": "0:00:33", "remaining_time": "0:03:40", "throughput": 3462.26, "total_tokens": 114584} |
| {"current_steps": 295, "total_steps": 2220, "loss": 0.2899, "lr": 9.967992638098517e-06, "epoch": 2.6576576576576576, "percentage": 13.29, "elapsed_time": "0:00:33", "remaining_time": "0:03:39", "throughput": 3464.43, "total_tokens": 116472} |
|
|