| {"current_steps": 5, "total_steps": 1800, "loss": 0.4642, "lr": 1.1111111111111112e-06, "epoch": 0.05555555555555555, "percentage": 0.28, "elapsed_time": "0:00:01", "remaining_time": "0:08:44", "throughput": 1094.08, "total_tokens": 1600} |
| {"current_steps": 10, "total_steps": 1800, "loss": 0.5471, "lr": 2.5e-06, "epoch": 0.1111111111111111, "percentage": 0.56, "elapsed_time": "0:00:02", "remaining_time": "0:07:06", "throughput": 1343.56, "total_tokens": 3200} |
| {"current_steps": 15, "total_steps": 1800, "loss": 0.5976, "lr": 3.888888888888889e-06, "epoch": 0.16666666666666666, "percentage": 0.83, "elapsed_time": "0:00:03", "remaining_time": "0:06:32", "throughput": 1445.55, "total_tokens": 4768} |
| {"current_steps": 20, "total_steps": 1800, "loss": 0.7557, "lr": 5.277777777777778e-06, "epoch": 0.2222222222222222, "percentage": 1.11, "elapsed_time": "0:00:04", "remaining_time": "0:06:15", "throughput": 1502.25, "total_tokens": 6336} |
| {"current_steps": 25, "total_steps": 1800, "loss": 0.5813, "lr": 6.666666666666667e-06, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:05", "remaining_time": "0:06:04", "throughput": 1538.58, "total_tokens": 7904} |
| {"current_steps": 30, "total_steps": 1800, "loss": 0.45, "lr": 8.055555555555557e-06, "epoch": 0.3333333333333333, "percentage": 1.67, "elapsed_time": "0:00:06", "remaining_time": "0:05:57", "throughput": 1569.46, "total_tokens": 9504} |
| {"current_steps": 35, "total_steps": 1800, "loss": 0.1443, "lr": 9.444444444444445e-06, "epoch": 0.3888888888888889, "percentage": 1.94, "elapsed_time": "0:00:06", "remaining_time": "0:05:51", "throughput": 1588.18, "total_tokens": 11072} |
| {"current_steps": 40, "total_steps": 1800, "loss": 0.1337, "lr": 1.0833333333333334e-05, "epoch": 0.4444444444444444, "percentage": 2.22, "elapsed_time": "0:00:07", "remaining_time": "0:05:47", "throughput": 1606.46, "total_tokens": 12672} |
| {"current_steps": 45, "total_steps": 1800, "loss": 0.0038, "lr": 1.2222222222222222e-05, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:08", "remaining_time": "0:05:43", "throughput": 1609.62, "total_tokens": 14176} |
| {"current_steps": 50, "total_steps": 1800, "loss": 0.1347, "lr": 1.3611111111111111e-05, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:00:09", "remaining_time": "0:05:40", "throughput": 1622.39, "total_tokens": 15776} |
| {"current_steps": 55, "total_steps": 1800, "loss": 0.2198, "lr": 1.5e-05, "epoch": 0.6111111111111112, "percentage": 3.06, "elapsed_time": "0:00:10", "remaining_time": "0:05:37", "throughput": 1626.72, "total_tokens": 17312} |
| {"current_steps": 60, "total_steps": 1800, "loss": 0.1586, "lr": 1.638888888888889e-05, "epoch": 0.6666666666666666, "percentage": 3.33, "elapsed_time": "0:00:11", "remaining_time": "0:05:35", "throughput": 1630.36, "total_tokens": 18848} |
| {"current_steps": 65, "total_steps": 1800, "loss": 0.0848, "lr": 1.777777777777778e-05, "epoch": 0.7222222222222222, "percentage": 3.61, "elapsed_time": "0:00:12", "remaining_time": "0:05:33", "throughput": 1637.74, "total_tokens": 20448} |
| {"current_steps": 70, "total_steps": 1800, "loss": 0.0214, "lr": 1.9166666666666667e-05, "epoch": 0.7777777777777778, "percentage": 3.89, "elapsed_time": "0:00:13", "remaining_time": "0:05:31", "throughput": 1640.32, "total_tokens": 22016} |
| {"current_steps": 75, "total_steps": 1800, "loss": 0.1651, "lr": 2.0555555555555555e-05, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:00:14", "remaining_time": "0:05:29", "throughput": 1645.99, "total_tokens": 23616} |
| {"current_steps": 80, "total_steps": 1800, "loss": 0.1222, "lr": 2.1944444444444445e-05, "epoch": 0.8888888888888888, "percentage": 4.44, "elapsed_time": "0:00:15", "remaining_time": "0:05:28", "throughput": 1646.86, "total_tokens": 25152} |
| {"current_steps": 85, "total_steps": 1800, "loss": 0.0104, "lr": 2.3333333333333336e-05, "epoch": 0.9444444444444444, "percentage": 4.72, "elapsed_time": "0:00:16", "remaining_time": "0:05:26", "throughput": 1648.27, "total_tokens": 26688} |
| {"current_steps": 90, "total_steps": 1800, "loss": 0.0823, "lr": 2.4722222222222223e-05, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:17", "remaining_time": "0:05:25", "throughput": 1649.11, "total_tokens": 28256} |
| {"current_steps": 90, "total_steps": 1800, "eval_loss": 0.0761384591460228, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:17", "remaining_time": "0:05:36", "throughput": 1597.54, "total_tokens": 28256} |
| {"current_steps": 95, "total_steps": 1800, "loss": 0.0515, "lr": 2.6111111111111114e-05, "epoch": 1.0555555555555556, "percentage": 5.28, "elapsed_time": "0:00:19", "remaining_time": "0:05:57", "throughput": 1498.62, "total_tokens": 29824} |
| {"current_steps": 100, "total_steps": 1800, "loss": 0.1462, "lr": 2.7500000000000004e-05, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:00:20", "remaining_time": "0:05:54", "throughput": 1505.15, "total_tokens": 31360} |
| {"current_steps": 105, "total_steps": 1800, "loss": 0.0746, "lr": 2.8888888888888888e-05, "epoch": 1.1666666666666667, "percentage": 5.83, "elapsed_time": "0:00:21", "remaining_time": "0:05:51", "throughput": 1514.58, "total_tokens": 32960} |
| {"current_steps": 110, "total_steps": 1800, "loss": 0.132, "lr": 3.0277777777777776e-05, "epoch": 1.2222222222222223, "percentage": 6.11, "elapsed_time": "0:00:22", "remaining_time": "0:05:48", "throughput": 1519.03, "total_tokens": 34464} |
| {"current_steps": 115, "total_steps": 1800, "loss": 0.0438, "lr": 3.1666666666666666e-05, "epoch": 1.2777777777777777, "percentage": 6.39, "elapsed_time": "0:00:23", "remaining_time": "0:05:45", "throughput": 1525.96, "total_tokens": 36032} |
| {"current_steps": 120, "total_steps": 1800, "loss": 0.0689, "lr": 3.3055555555555553e-05, "epoch": 1.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:24", "remaining_time": "0:05:43", "throughput": 1531.81, "total_tokens": 37600} |
| {"current_steps": 125, "total_steps": 1800, "loss": 0.0283, "lr": 3.444444444444445e-05, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:00:25", "remaining_time": "0:05:41", "throughput": 1537.2, "total_tokens": 39168} |
| {"current_steps": 130, "total_steps": 1800, "loss": 0.1178, "lr": 3.5833333333333335e-05, "epoch": 1.4444444444444444, "percentage": 7.22, "elapsed_time": "0:00:26", "remaining_time": "0:05:39", "throughput": 1541.87, "total_tokens": 40736} |
| {"current_steps": 135, "total_steps": 1800, "loss": 0.0915, "lr": 3.722222222222222e-05, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:27", "remaining_time": "0:05:37", "throughput": 1543.95, "total_tokens": 42240} |
| {"current_steps": 140, "total_steps": 1800, "loss": 0.0715, "lr": 3.8611111111111116e-05, "epoch": 1.5555555555555556, "percentage": 7.78, "elapsed_time": "0:00:28", "remaining_time": "0:05:35", "throughput": 1549.78, "total_tokens": 43840} |
| {"current_steps": 145, "total_steps": 1800, "loss": 0.0238, "lr": 4e-05, "epoch": 1.6111111111111112, "percentage": 8.06, "elapsed_time": "0:00:29", "remaining_time": "0:05:33", "throughput": 1554.24, "total_tokens": 45408} |
| {"current_steps": 150, "total_steps": 1800, "loss": 0.1054, "lr": 4.138888888888889e-05, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:30", "remaining_time": "0:05:31", "throughput": 1558.11, "total_tokens": 46976} |
| {"current_steps": 155, "total_steps": 1800, "loss": 0.0814, "lr": 4.277777777777778e-05, "epoch": 1.7222222222222223, "percentage": 8.61, "elapsed_time": "0:00:31", "remaining_time": "0:05:29", "throughput": 1560.82, "total_tokens": 48512} |
| {"current_steps": 160, "total_steps": 1800, "loss": 0.0153, "lr": 4.4166666666666665e-05, "epoch": 1.7777777777777777, "percentage": 8.89, "elapsed_time": "0:00:32", "remaining_time": "0:05:28", "throughput": 1564.17, "total_tokens": 50112} |
| {"current_steps": 165, "total_steps": 1800, "loss": 0.0289, "lr": 4.555555555555556e-05, "epoch": 1.8333333333333335, "percentage": 9.17, "elapsed_time": "0:00:32", "remaining_time": "0:05:26", "throughput": 1568.71, "total_tokens": 51712} |
| {"current_steps": 170, "total_steps": 1800, "loss": 0.1408, "lr": 4.6944444444444446e-05, "epoch": 1.8888888888888888, "percentage": 9.44, "elapsed_time": "0:00:33", "remaining_time": "0:05:24", "throughput": 1572.02, "total_tokens": 53280} |
| {"current_steps": 175, "total_steps": 1800, "loss": 0.0322, "lr": 4.8333333333333334e-05, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:00:34", "remaining_time": "0:05:23", "throughput": 1576.11, "total_tokens": 54880} |
| {"current_steps": 180, "total_steps": 1800, "loss": 0.0632, "lr": 4.972222222222223e-05, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:35", "remaining_time": "0:05:22", "throughput": 1578.44, "total_tokens": 56480} |
| {"current_steps": 180, "total_steps": 1800, "eval_loss": 0.04248045012354851, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:36", "remaining_time": "0:05:27", "throughput": 1553.99, "total_tokens": 56480} |
|
|