| {"current_steps": 5, "total_steps": 3600, "loss": 9.072, "lr": 5.555555555555556e-07, "epoch": 0.027777777777777776, "percentage": 0.14, "elapsed_time": "0:00:02", "remaining_time": "0:28:03", "throughput": 327.93, "total_tokens": 768} |
| {"current_steps": 10, "total_steps": 3600, "loss": 8.3464, "lr": 1.25e-06, "epoch": 0.05555555555555555, "percentage": 0.28, "elapsed_time": "0:00:02", "remaining_time": "0:16:26", "throughput": 552.95, "total_tokens": 1520} |
| {"current_steps": 15, "total_steps": 3600, "loss": 6.6805, "lr": 1.9444444444444444e-06, "epoch": 0.08333333333333333, "percentage": 0.42, "elapsed_time": "0:00:03", "remaining_time": "0:12:34", "throughput": 719.63, "total_tokens": 2272} |
| {"current_steps": 20, "total_steps": 3600, "loss": 4.92, "lr": 2.638888888888889e-06, "epoch": 0.1111111111111111, "percentage": 0.56, "elapsed_time": "0:00:03", "remaining_time": "0:10:37", "throughput": 844.36, "total_tokens": 3008} |
| {"current_steps": 25, "total_steps": 3600, "loss": 3.0464, "lr": 3.3333333333333333e-06, "epoch": 0.1388888888888889, "percentage": 0.69, "elapsed_time": "0:00:03", "remaining_time": "0:09:27", "throughput": 951.01, "total_tokens": 3776} |
| {"current_steps": 30, "total_steps": 3600, "loss": 1.6061, "lr": 4.027777777777779e-06, "epoch": 0.16666666666666666, "percentage": 0.83, "elapsed_time": "0:00:04", "remaining_time": "0:08:40", "throughput": 1038.37, "total_tokens": 4544} |
| {"current_steps": 35, "total_steps": 3600, "loss": 0.6809, "lr": 4.722222222222222e-06, "epoch": 0.19444444444444445, "percentage": 0.97, "elapsed_time": "0:00:04", "remaining_time": "0:08:07", "throughput": 1110.7, "total_tokens": 5312} |
| {"current_steps": 40, "total_steps": 3600, "loss": 0.3506, "lr": 5.416666666666667e-06, "epoch": 0.2222222222222222, "percentage": 1.11, "elapsed_time": "0:00:05", "remaining_time": "0:07:41", "throughput": 1178.05, "total_tokens": 6112} |
| {"current_steps": 45, "total_steps": 3600, "loss": 0.2268, "lr": 6.111111111111111e-06, "epoch": 0.25, "percentage": 1.25, "elapsed_time": "0:00:05", "remaining_time": "0:07:22", "throughput": 1223.91, "total_tokens": 6848} |
| {"current_steps": 50, "total_steps": 3600, "loss": 0.5681, "lr": 6.805555555555556e-06, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:06", "remaining_time": "0:07:06", "throughput": 1263.79, "total_tokens": 7584} |
| {"current_steps": 55, "total_steps": 3600, "loss": 0.4959, "lr": 7.5e-06, "epoch": 0.3055555555555556, "percentage": 1.53, "elapsed_time": "0:00:06", "remaining_time": "0:06:52", "throughput": 1306.14, "total_tokens": 8368} |
| {"current_steps": 60, "total_steps": 3600, "loss": 0.2822, "lr": 8.194444444444445e-06, "epoch": 0.3333333333333333, "percentage": 1.67, "elapsed_time": "0:00:06", "remaining_time": "0:06:41", "throughput": 1336.35, "total_tokens": 9104} |
| {"current_steps": 65, "total_steps": 3600, "loss": 0.2802, "lr": 8.88888888888889e-06, "epoch": 0.3611111111111111, "percentage": 1.81, "elapsed_time": "0:00:07", "remaining_time": "0:06:32", "throughput": 1365.27, "total_tokens": 9856} |
| {"current_steps": 70, "total_steps": 3600, "loss": 0.3114, "lr": 9.583333333333334e-06, "epoch": 0.3888888888888889, "percentage": 1.94, "elapsed_time": "0:00:07", "remaining_time": "0:06:24", "throughput": 1393.27, "total_tokens": 10624} |
| {"current_steps": 75, "total_steps": 3600, "loss": 0.2552, "lr": 1.0277777777777777e-05, "epoch": 0.4166666666666667, "percentage": 2.08, "elapsed_time": "0:00:08", "remaining_time": "0:06:17", "throughput": 1418.32, "total_tokens": 11392} |
| {"current_steps": 80, "total_steps": 3600, "loss": 0.2433, "lr": 1.0972222222222223e-05, "epoch": 0.4444444444444444, "percentage": 2.22, "elapsed_time": "0:00:08", "remaining_time": "0:06:11", "throughput": 1441.1, "total_tokens": 12160} |
| {"current_steps": 85, "total_steps": 3600, "loss": 0.2441, "lr": 1.1666666666666668e-05, "epoch": 0.4722222222222222, "percentage": 2.36, "elapsed_time": "0:00:08", "remaining_time": "0:06:05", "throughput": 1458.19, "total_tokens": 12896} |
| {"current_steps": 90, "total_steps": 3600, "loss": 0.3188, "lr": 1.2361111111111112e-05, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:09", "remaining_time": "0:06:01", "throughput": 1472.34, "total_tokens": 13632} |
| {"current_steps": 95, "total_steps": 3600, "loss": 0.2929, "lr": 1.3055555555555557e-05, "epoch": 0.5277777777777778, "percentage": 2.64, "elapsed_time": "0:00:09", "remaining_time": "0:05:56", "throughput": 1487.89, "total_tokens": 14384} |
| {"current_steps": 100, "total_steps": 3600, "loss": 0.2516, "lr": 1.3750000000000002e-05, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:00:10", "remaining_time": "0:05:52", "throughput": 1501.27, "total_tokens": 15136} |
| {"current_steps": 105, "total_steps": 3600, "loss": 0.2697, "lr": 1.4444444444444444e-05, "epoch": 0.5833333333333334, "percentage": 2.92, "elapsed_time": "0:00:10", "remaining_time": "0:05:49", "throughput": 1516.73, "total_tokens": 15920} |
| {"current_steps": 110, "total_steps": 3600, "loss": 0.2693, "lr": 1.5138888888888888e-05, "epoch": 0.6111111111111112, "percentage": 3.06, "elapsed_time": "0:00:10", "remaining_time": "0:05:46", "throughput": 1526.1, "total_tokens": 16656} |
| {"current_steps": 115, "total_steps": 3600, "loss": 0.2373, "lr": 1.5833333333333333e-05, "epoch": 0.6388888888888888, "percentage": 3.19, "elapsed_time": "0:00:11", "remaining_time": "0:05:43", "throughput": 1538.53, "total_tokens": 17440} |
| {"current_steps": 120, "total_steps": 3600, "loss": 0.2968, "lr": 1.6527777777777777e-05, "epoch": 0.6666666666666666, "percentage": 3.33, "elapsed_time": "0:00:11", "remaining_time": "0:05:40", "throughput": 1551.3, "total_tokens": 18224} |
| {"current_steps": 125, "total_steps": 3600, "loss": 0.2625, "lr": 1.7222222222222224e-05, "epoch": 0.6944444444444444, "percentage": 3.47, "elapsed_time": "0:00:12", "remaining_time": "0:05:37", "throughput": 1563.42, "total_tokens": 19008} |
| {"current_steps": 130, "total_steps": 3600, "loss": 0.2268, "lr": 1.7916666666666667e-05, "epoch": 0.7222222222222222, "percentage": 3.61, "elapsed_time": "0:00:12", "remaining_time": "0:05:35", "throughput": 1574.0, "total_tokens": 19776} |
| {"current_steps": 135, "total_steps": 3600, "loss": 0.265, "lr": 1.861111111111111e-05, "epoch": 0.75, "percentage": 3.75, "elapsed_time": "0:00:12", "remaining_time": "0:05:32", "throughput": 1586.36, "total_tokens": 20576} |
| {"current_steps": 140, "total_steps": 3600, "loss": 0.2585, "lr": 1.9305555555555558e-05, "epoch": 0.7777777777777778, "percentage": 3.89, "elapsed_time": "0:00:13", "remaining_time": "0:05:30", "throughput": 1593.14, "total_tokens": 21312} |
| {"current_steps": 145, "total_steps": 3600, "loss": 0.2338, "lr": 2e-05, "epoch": 0.8055555555555556, "percentage": 4.03, "elapsed_time": "0:00:13", "remaining_time": "0:05:28", "throughput": 1601.86, "total_tokens": 22080} |
| {"current_steps": 150, "total_steps": 3600, "loss": 0.4694, "lr": 2.0694444444444445e-05, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:00:14", "remaining_time": "0:05:26", "throughput": 1611.07, "total_tokens": 22864} |
| {"current_steps": 155, "total_steps": 3600, "loss": 0.2769, "lr": 2.138888888888889e-05, "epoch": 0.8611111111111112, "percentage": 4.31, "elapsed_time": "0:00:14", "remaining_time": "0:05:24", "throughput": 1617.79, "total_tokens": 23616} |
| {"current_steps": 160, "total_steps": 3600, "loss": 0.2471, "lr": 2.2083333333333333e-05, "epoch": 0.8888888888888888, "percentage": 4.44, "elapsed_time": "0:00:15", "remaining_time": "0:05:22", "throughput": 1623.06, "total_tokens": 24352} |
| {"current_steps": 165, "total_steps": 3600, "loss": 0.2499, "lr": 2.277777777777778e-05, "epoch": 0.9166666666666666, "percentage": 4.58, "elapsed_time": "0:00:15", "remaining_time": "0:05:20", "throughput": 1627.78, "total_tokens": 25088} |
| {"current_steps": 170, "total_steps": 3600, "loss": 0.2382, "lr": 2.3472222222222223e-05, "epoch": 0.9444444444444444, "percentage": 4.72, "elapsed_time": "0:00:15", "remaining_time": "0:05:19", "throughput": 1634.06, "total_tokens": 25856} |
| {"current_steps": 175, "total_steps": 3600, "loss": 0.2486, "lr": 2.4166666666666667e-05, "epoch": 0.9722222222222222, "percentage": 4.86, "elapsed_time": "0:00:16", "remaining_time": "0:05:17", "throughput": 1641.25, "total_tokens": 26624} |
| {"current_steps": 180, "total_steps": 3600, "loss": 0.2953, "lr": 2.4861111111111114e-05, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:16", "remaining_time": "0:05:16", "throughput": 1642.81, "total_tokens": 27376} |
| {"current_steps": 180, "total_steps": 3600, "eval_loss": 0.2306298464536667, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:17", "remaining_time": "0:05:32", "throughput": 1563.82, "total_tokens": 27376} |
| {"current_steps": 185, "total_steps": 3600, "loss": 0.2508, "lr": 2.5555555555555554e-05, "epoch": 1.0277777777777777, "percentage": 5.14, "elapsed_time": "0:00:19", "remaining_time": "0:05:56", "throughput": 1456.69, "total_tokens": 28128} |
| {"current_steps": 190, "total_steps": 3600, "loss": 0.2485, "lr": 2.625e-05, "epoch": 1.0555555555555556, "percentage": 5.28, "elapsed_time": "0:00:19", "remaining_time": "0:05:55", "throughput": 1459.31, "total_tokens": 28896} |
| {"current_steps": 195, "total_steps": 3600, "loss": 0.2437, "lr": 2.6944444444444445e-05, "epoch": 1.0833333333333333, "percentage": 5.42, "elapsed_time": "0:00:20", "remaining_time": "0:05:53", "throughput": 1465.06, "total_tokens": 29648} |
| {"current_steps": 200, "total_steps": 3600, "loss": 0.2415, "lr": 2.7638888888888892e-05, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:00:20", "remaining_time": "0:05:51", "throughput": 1470.88, "total_tokens": 30416} |
| {"current_steps": 205, "total_steps": 3600, "loss": 0.2363, "lr": 2.8333333333333335e-05, "epoch": 1.1388888888888888, "percentage": 5.69, "elapsed_time": "0:00:21", "remaining_time": "0:05:49", "throughput": 1478.78, "total_tokens": 31200} |
| {"current_steps": 210, "total_steps": 3600, "loss": 0.232, "lr": 2.9027777777777782e-05, "epoch": 1.1666666666666667, "percentage": 5.83, "elapsed_time": "0:00:21", "remaining_time": "0:05:48", "throughput": 1482.56, "total_tokens": 31968} |
| {"current_steps": 215, "total_steps": 3600, "loss": 0.2271, "lr": 2.9722222222222223e-05, "epoch": 1.1944444444444444, "percentage": 5.97, "elapsed_time": "0:00:21", "remaining_time": "0:05:46", "throughput": 1488.24, "total_tokens": 32720} |
| {"current_steps": 220, "total_steps": 3600, "loss": 0.2513, "lr": 3.0416666666666666e-05, "epoch": 1.2222222222222223, "percentage": 6.11, "elapsed_time": "0:00:22", "remaining_time": "0:05:44", "throughput": 1494.14, "total_tokens": 33472} |
| {"current_steps": 225, "total_steps": 3600, "loss": 0.4205, "lr": 3.111111111111111e-05, "epoch": 1.25, "percentage": 6.25, "elapsed_time": "0:00:22", "remaining_time": "0:05:42", "throughput": 1499.61, "total_tokens": 34208} |
| {"current_steps": 230, "total_steps": 3600, "loss": 0.1853, "lr": 3.180555555555556e-05, "epoch": 1.2777777777777777, "percentage": 6.39, "elapsed_time": "0:00:23", "remaining_time": "0:05:40", "throughput": 1505.72, "total_tokens": 34960} |
| {"current_steps": 235, "total_steps": 3600, "loss": 0.6516, "lr": 3.2500000000000004e-05, "epoch": 1.3055555555555556, "percentage": 6.53, "elapsed_time": "0:00:23", "remaining_time": "0:05:38", "throughput": 1512.31, "total_tokens": 35728} |
| {"current_steps": 240, "total_steps": 3600, "loss": 0.518, "lr": 3.3194444444444444e-05, "epoch": 1.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:24", "remaining_time": "0:05:36", "throughput": 1517.31, "total_tokens": 36464} |
| {"current_steps": 245, "total_steps": 3600, "loss": 0.2392, "lr": 3.388888888888889e-05, "epoch": 1.3611111111111112, "percentage": 6.81, "elapsed_time": "0:00:24", "remaining_time": "0:05:34", "throughput": 1522.05, "total_tokens": 37200} |
| {"current_steps": 250, "total_steps": 3600, "loss": 0.2328, "lr": 3.458333333333333e-05, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:00:24", "remaining_time": "0:05:32", "throughput": 1527.41, "total_tokens": 37952} |
| {"current_steps": 255, "total_steps": 3600, "loss": 0.2219, "lr": 3.527777777777778e-05, "epoch": 1.4166666666666667, "percentage": 7.08, "elapsed_time": "0:00:25", "remaining_time": "0:05:31", "throughput": 1531.95, "total_tokens": 38688} |
| {"current_steps": 260, "total_steps": 3600, "loss": 0.2434, "lr": 3.5972222222222225e-05, "epoch": 1.4444444444444444, "percentage": 7.22, "elapsed_time": "0:00:25", "remaining_time": "0:05:29", "throughput": 1538.2, "total_tokens": 39472} |
| {"current_steps": 265, "total_steps": 3600, "loss": 0.2581, "lr": 3.6666666666666666e-05, "epoch": 1.4722222222222223, "percentage": 7.36, "elapsed_time": "0:00:26", "remaining_time": "0:05:28", "throughput": 1543.54, "total_tokens": 40240} |
| {"current_steps": 270, "total_steps": 3600, "loss": 0.2469, "lr": 3.736111111111111e-05, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:26", "remaining_time": "0:05:26", "throughput": 1548.83, "total_tokens": 41008} |
| {"current_steps": 275, "total_steps": 3600, "loss": 0.2577, "lr": 3.805555555555555e-05, "epoch": 1.5277777777777777, "percentage": 7.64, "elapsed_time": "0:00:26", "remaining_time": "0:05:25", "throughput": 1552.8, "total_tokens": 41744} |
| {"current_steps": 280, "total_steps": 3600, "loss": 0.2458, "lr": 3.875e-05, "epoch": 1.5555555555555556, "percentage": 7.78, "elapsed_time": "0:00:27", "remaining_time": "0:05:23", "throughput": 1558.18, "total_tokens": 42528} |
| {"current_steps": 285, "total_steps": 3600, "loss": 0.2288, "lr": 3.944444444444445e-05, "epoch": 1.5833333333333335, "percentage": 7.92, "elapsed_time": "0:00:27", "remaining_time": "0:05:22", "throughput": 1562.35, "total_tokens": 43280} |
| {"current_steps": 290, "total_steps": 3600, "loss": 0.1993, "lr": 4.0138888888888894e-05, "epoch": 1.6111111111111112, "percentage": 8.06, "elapsed_time": "0:00:28", "remaining_time": "0:05:20", "throughput": 1565.8, "total_tokens": 44016} |
| {"current_steps": 295, "total_steps": 3600, "loss": 0.3448, "lr": 4.0833333333333334e-05, "epoch": 1.6388888888888888, "percentage": 8.19, "elapsed_time": "0:00:28", "remaining_time": "0:05:19", "throughput": 1570.78, "total_tokens": 44800} |
| {"current_steps": 300, "total_steps": 3600, "loss": 0.223, "lr": 4.152777777777778e-05, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:28", "remaining_time": "0:05:18", "throughput": 1575.35, "total_tokens": 45584} |
| {"current_steps": 305, "total_steps": 3600, "loss": 0.2213, "lr": 4.222222222222222e-05, "epoch": 1.6944444444444444, "percentage": 8.47, "elapsed_time": "0:00:29", "remaining_time": "0:05:17", "throughput": 1579.64, "total_tokens": 46352} |
| {"current_steps": 310, "total_steps": 3600, "loss": 0.2503, "lr": 4.291666666666667e-05, "epoch": 1.7222222222222223, "percentage": 8.61, "elapsed_time": "0:00:29", "remaining_time": "0:05:15", "throughput": 1584.43, "total_tokens": 47136} |
| {"current_steps": 315, "total_steps": 3600, "loss": 0.244, "lr": 4.3611111111111116e-05, "epoch": 1.75, "percentage": 8.75, "elapsed_time": "0:00:30", "remaining_time": "0:05:14", "throughput": 1588.18, "total_tokens": 47904} |
| {"current_steps": 320, "total_steps": 3600, "loss": 0.2277, "lr": 4.4305555555555556e-05, "epoch": 1.7777777777777777, "percentage": 8.89, "elapsed_time": "0:00:30", "remaining_time": "0:05:13", "throughput": 1593.18, "total_tokens": 48704} |
| {"current_steps": 325, "total_steps": 3600, "loss": 0.2204, "lr": 4.5e-05, "epoch": 1.8055555555555556, "percentage": 9.03, "elapsed_time": "0:00:30", "remaining_time": "0:05:12", "throughput": 1595.77, "total_tokens": 49440} |
| {"current_steps": 330, "total_steps": 3600, "loss": 0.2366, "lr": 4.569444444444444e-05, "epoch": 1.8333333333333335, "percentage": 9.17, "elapsed_time": "0:00:31", "remaining_time": "0:05:11", "throughput": 1599.67, "total_tokens": 50224} |
| {"current_steps": 335, "total_steps": 3600, "loss": 0.2329, "lr": 4.638888888888889e-05, "epoch": 1.8611111111111112, "percentage": 9.31, "elapsed_time": "0:00:31", "remaining_time": "0:05:10", "throughput": 1602.05, "total_tokens": 50976} |
| {"current_steps": 340, "total_steps": 3600, "loss": 0.27, "lr": 4.708333333333334e-05, "epoch": 1.8888888888888888, "percentage": 9.44, "elapsed_time": "0:00:32", "remaining_time": "0:05:09", "throughput": 1605.87, "total_tokens": 51760} |
| {"current_steps": 345, "total_steps": 3600, "loss": 0.25, "lr": 4.7777777777777784e-05, "epoch": 1.9166666666666665, "percentage": 9.58, "elapsed_time": "0:00:32", "remaining_time": "0:05:07", "throughput": 1608.11, "total_tokens": 52496} |
| {"current_steps": 350, "total_steps": 3600, "loss": 0.2663, "lr": 4.8472222222222224e-05, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:00:33", "remaining_time": "0:05:06", "throughput": 1611.56, "total_tokens": 53264} |
| {"current_steps": 355, "total_steps": 3600, "loss": 0.2305, "lr": 4.9166666666666665e-05, "epoch": 1.9722222222222223, "percentage": 9.86, "elapsed_time": "0:00:33", "remaining_time": "0:05:05", "throughput": 1615.02, "total_tokens": 54032} |
| {"current_steps": 360, "total_steps": 3600, "loss": 0.2406, "lr": 4.986111111111111e-05, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:33", "remaining_time": "0:05:05", "throughput": 1615.66, "total_tokens": 54800} |
| {"current_steps": 360, "total_steps": 3600, "eval_loss": 0.23383498191833496, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:34", "remaining_time": "0:05:12", "throughput": 1576.61, "total_tokens": 54800} |
| {"current_steps": 365, "total_steps": 3600, "loss": 0.2153, "lr": 4.99998119647914e-05, "epoch": 2.0277777777777777, "percentage": 10.14, "elapsed_time": "0:00:36", "remaining_time": "0:05:25", "throughput": 1511.62, "total_tokens": 55536} |
| {"current_steps": 370, "total_steps": 3600, "loss": 0.2895, "lr": 4.999904807660428e-05, "epoch": 2.0555555555555554, "percentage": 10.28, "elapsed_time": "0:00:37", "remaining_time": "0:05:24", "throughput": 1513.95, "total_tokens": 56288} |
| {"current_steps": 375, "total_steps": 3600, "loss": 0.2023, "lr": 4.999769660117901e-05, "epoch": 2.0833333333333335, "percentage": 10.42, "elapsed_time": "0:00:37", "remaining_time": "0:05:23", "throughput": 1517.63, "total_tokens": 57072} |
| {"current_steps": 380, "total_steps": 3600, "loss": 0.2709, "lr": 4.999575757028119e-05, "epoch": 2.111111111111111, "percentage": 10.56, "elapsed_time": "0:00:38", "remaining_time": "0:05:22", "throughput": 1521.36, "total_tokens": 57840} |
| {"current_steps": 385, "total_steps": 3600, "loss": 0.2089, "lr": 4.9993231029486544e-05, "epoch": 2.138888888888889, "percentage": 10.69, "elapsed_time": "0:00:38", "remaining_time": "0:05:20", "throughput": 1524.71, "total_tokens": 58592} |
| {"current_steps": 390, "total_steps": 3600, "loss": 0.3449, "lr": 4.999011703817986e-05, "epoch": 2.1666666666666665, "percentage": 10.83, "elapsed_time": "0:00:38", "remaining_time": "0:05:19", "throughput": 1527.54, "total_tokens": 59328} |
| {"current_steps": 395, "total_steps": 3600, "loss": 0.2439, "lr": 4.9986415669553586e-05, "epoch": 2.1944444444444446, "percentage": 10.97, "elapsed_time": "0:00:39", "remaining_time": "0:05:18", "throughput": 1530.81, "total_tokens": 60080} |
| {"current_steps": 400, "total_steps": 3600, "loss": 0.2512, "lr": 4.998212701060612e-05, "epoch": 2.2222222222222223, "percentage": 11.11, "elapsed_time": "0:00:39", "remaining_time": "0:05:17", "throughput": 1534.05, "total_tokens": 60832} |
| {"current_steps": 405, "total_steps": 3600, "loss": 0.2341, "lr": 4.997725116213973e-05, "epoch": 2.25, "percentage": 11.25, "elapsed_time": "0:00:40", "remaining_time": "0:05:16", "throughput": 1536.71, "total_tokens": 61568} |
| {"current_steps": 410, "total_steps": 3600, "loss": 0.232, "lr": 4.997178823875826e-05, "epoch": 2.2777777777777777, "percentage": 11.39, "elapsed_time": "0:00:40", "remaining_time": "0:05:14", "throughput": 1539.99, "total_tokens": 62336} |
| {"current_steps": 415, "total_steps": 3600, "loss": 0.2359, "lr": 4.996573836886435e-05, "epoch": 2.3055555555555554, "percentage": 11.53, "elapsed_time": "0:00:40", "remaining_time": "0:05:13", "throughput": 1543.03, "total_tokens": 63088} |
| {"current_steps": 420, "total_steps": 3600, "loss": 0.238, "lr": 4.995910169465646e-05, "epoch": 2.3333333333333335, "percentage": 11.67, "elapsed_time": "0:00:41", "remaining_time": "0:05:12", "throughput": 1547.03, "total_tokens": 63888} |
| {"current_steps": 425, "total_steps": 3600, "loss": 0.2284, "lr": 4.9951878372125547e-05, "epoch": 2.361111111111111, "percentage": 11.81, "elapsed_time": "0:00:41", "remaining_time": "0:05:11", "throughput": 1549.82, "total_tokens": 64640} |
| {"current_steps": 430, "total_steps": 3600, "loss": 0.24, "lr": 4.994406857105136e-05, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:42", "remaining_time": "0:05:10", "throughput": 1552.25, "total_tokens": 65392} |
| {"current_steps": 435, "total_steps": 3600, "loss": 0.2399, "lr": 4.993567247499845e-05, "epoch": 2.4166666666666665, "percentage": 12.08, "elapsed_time": "0:00:42", "remaining_time": "0:05:09", "throughput": 1555.61, "total_tokens": 66176} |
| {"current_steps": 440, "total_steps": 3600, "loss": 0.2394, "lr": 4.9926690281311904e-05, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:42", "remaining_time": "0:05:08", "throughput": 1558.98, "total_tokens": 66960} |
| {"current_steps": 445, "total_steps": 3600, "loss": 0.2217, "lr": 4.9917122201112656e-05, "epoch": 2.4722222222222223, "percentage": 12.36, "elapsed_time": "0:00:43", "remaining_time": "0:05:07", "throughput": 1561.74, "total_tokens": 67712} |
| {"current_steps": 450, "total_steps": 3600, "loss": 0.2221, "lr": 4.9906968459292524e-05, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:43", "remaining_time": "0:05:06", "throughput": 1564.72, "total_tokens": 68480} |
| {"current_steps": 455, "total_steps": 3600, "loss": 0.2479, "lr": 4.9896229294508976e-05, "epoch": 2.5277777777777777, "percentage": 12.64, "elapsed_time": "0:00:44", "remaining_time": "0:05:05", "throughput": 1567.71, "total_tokens": 69248} |
| {"current_steps": 460, "total_steps": 3600, "loss": 0.2514, "lr": 4.988490495917947e-05, "epoch": 2.5555555555555554, "percentage": 12.78, "elapsed_time": "0:00:44", "remaining_time": "0:05:04", "throughput": 1570.61, "total_tokens": 70016} |
| {"current_steps": 465, "total_steps": 3600, "loss": 0.2126, "lr": 4.987299571947553e-05, "epoch": 2.5833333333333335, "percentage": 12.92, "elapsed_time": "0:00:44", "remaining_time": "0:05:03", "throughput": 1573.28, "total_tokens": 70784} |
| {"current_steps": 470, "total_steps": 3600, "loss": 0.2394, "lr": 4.9860501855316514e-05, "epoch": 2.611111111111111, "percentage": 13.06, "elapsed_time": "0:00:45", "remaining_time": "0:05:02", "throughput": 1575.85, "total_tokens": 71552} |
| {"current_steps": 475, "total_steps": 3600, "loss": 0.2393, "lr": 4.9847423660363e-05, "epoch": 2.638888888888889, "percentage": 13.19, "elapsed_time": "0:00:45", "remaining_time": "0:05:01", "throughput": 1577.79, "total_tokens": 72288} |
| {"current_steps": 480, "total_steps": 3600, "loss": 0.2498, "lr": 4.983376144200992e-05, "epoch": 2.6666666666666665, "percentage": 13.33, "elapsed_time": "0:00:46", "remaining_time": "0:05:00", "throughput": 1580.66, "total_tokens": 73072} |
| {"current_steps": 485, "total_steps": 3600, "loss": 0.2338, "lr": 4.981951552137929e-05, "epoch": 2.6944444444444446, "percentage": 13.47, "elapsed_time": "0:00:46", "remaining_time": "0:04:59", "throughput": 1583.42, "total_tokens": 73856} |
| {"current_steps": 490, "total_steps": 3600, "loss": 0.2277, "lr": 4.980468623331273e-05, "epoch": 2.7222222222222223, "percentage": 13.61, "elapsed_time": "0:00:47", "remaining_time": "0:04:58", "throughput": 1585.97, "total_tokens": 74624} |
| {"current_steps": 495, "total_steps": 3600, "loss": 0.2319, "lr": 4.978927392636351e-05, "epoch": 2.75, "percentage": 13.75, "elapsed_time": "0:00:47", "remaining_time": "0:04:57", "throughput": 1588.45, "total_tokens": 75392} |
| {"current_steps": 500, "total_steps": 3600, "loss": 0.2254, "lr": 4.9773278962788436e-05, "epoch": 2.7777777777777777, "percentage": 13.89, "elapsed_time": "0:00:47", "remaining_time": "0:04:56", "throughput": 1591.01, "total_tokens": 76160} |
| {"current_steps": 505, "total_steps": 3600, "loss": 0.2294, "lr": 4.975670171853926e-05, "epoch": 2.8055555555555554, "percentage": 14.03, "elapsed_time": "0:00:48", "remaining_time": "0:04:55", "throughput": 1593.07, "total_tokens": 76912} |
| {"current_steps": 510, "total_steps": 3600, "loss": 0.2466, "lr": 4.973954258325392e-05, "epoch": 2.8333333333333335, "percentage": 14.17, "elapsed_time": "0:00:48", "remaining_time": "0:04:54", "throughput": 1595.51, "total_tokens": 77680} |
| {"current_steps": 515, "total_steps": 3600, "loss": 0.2302, "lr": 4.972180196024733e-05, "epoch": 2.861111111111111, "percentage": 14.31, "elapsed_time": "0:00:49", "remaining_time": "0:04:54", "throughput": 1597.94, "total_tokens": 78464} |
| {"current_steps": 520, "total_steps": 3600, "loss": 0.2322, "lr": 4.97034802665019e-05, "epoch": 2.888888888888889, "percentage": 14.44, "elapsed_time": "0:00:49", "remaining_time": "0:04:53", "throughput": 1600.24, "total_tokens": 79232} |
| {"current_steps": 525, "total_steps": 3600, "loss": 0.2139, "lr": 4.9684577932657786e-05, "epoch": 2.9166666666666665, "percentage": 14.58, "elapsed_time": "0:00:49", "remaining_time": "0:04:52", "throughput": 1601.98, "total_tokens": 79968} |
| {"current_steps": 530, "total_steps": 3600, "loss": 0.2551, "lr": 4.966509540300269e-05, "epoch": 2.9444444444444446, "percentage": 14.72, "elapsed_time": "0:00:50", "remaining_time": "0:04:51", "throughput": 1604.21, "total_tokens": 80736} |
| {"current_steps": 535, "total_steps": 3600, "loss": 0.224, "lr": 4.9645033135461494e-05, "epoch": 2.9722222222222223, "percentage": 14.86, "elapsed_time": "0:00:50", "remaining_time": "0:04:50", "throughput": 1606.33, "total_tokens": 81488} |
| {"current_steps": 540, "total_steps": 3600, "loss": 0.254, "lr": 4.962439160158544e-05, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:51", "remaining_time": "0:04:50", "throughput": 1606.89, "total_tokens": 82256} |
| {"current_steps": 540, "total_steps": 3600, "eval_loss": 0.24405327439308167, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:52", "remaining_time": "0:04:54", "throughput": 1580.69, "total_tokens": 82256} |
| {"current_steps": 545, "total_steps": 3600, "loss": 0.2308, "lr": 4.960317128654108e-05, "epoch": 3.0277777777777777, "percentage": 15.14, "elapsed_time": "0:00:53", "remaining_time": "0:05:01", "throughput": 1541.02, "total_tokens": 83008} |
| {"current_steps": 550, "total_steps": 3600, "loss": 0.2345, "lr": 4.958137268909887e-05, "epoch": 3.0555555555555554, "percentage": 15.28, "elapsed_time": "0:00:54", "remaining_time": "0:05:01", "throughput": 1542.96, "total_tokens": 83760} |
| {"current_steps": 555, "total_steps": 3600, "loss": 0.2254, "lr": 4.9558996321621405e-05, "epoch": 3.0833333333333335, "percentage": 15.42, "elapsed_time": "0:00:54", "remaining_time": "0:05:00", "throughput": 1545.05, "total_tokens": 84512} |
| {"current_steps": 560, "total_steps": 3600, "loss": 0.2316, "lr": 4.953604271005144e-05, "epoch": 3.111111111111111, "percentage": 15.56, "elapsed_time": "0:00:55", "remaining_time": "0:04:59", "throughput": 1547.06, "total_tokens": 85264} |
| {"current_steps": 565, "total_steps": 3600, "loss": 0.2319, "lr": 4.951251239389948e-05, "epoch": 3.138888888888889, "percentage": 15.69, "elapsed_time": "0:00:55", "remaining_time": "0:04:58", "throughput": 1549.4, "total_tokens": 86048} |
| {"current_steps": 570, "total_steps": 3600, "loss": 0.2404, "lr": 4.9488405926231144e-05, "epoch": 3.1666666666666665, "percentage": 15.83, "elapsed_time": "0:00:55", "remaining_time": "0:04:57", "throughput": 1551.2, "total_tokens": 86784} |
| {"current_steps": 575, "total_steps": 3600, "loss": 0.2339, "lr": 4.946372387365409e-05, "epoch": 3.1944444444444446, "percentage": 15.97, "elapsed_time": "0:00:56", "remaining_time": "0:04:56", "throughput": 1553.88, "total_tokens": 87568} |
| {"current_steps": 580, "total_steps": 3600, "loss": 0.2317, "lr": 4.943846681630479e-05, "epoch": 3.2222222222222223, "percentage": 16.11, "elapsed_time": "0:00:56", "remaining_time": "0:04:55", "throughput": 1555.98, "total_tokens": 88320} |
| {"current_steps": 585, "total_steps": 3600, "loss": 0.23, "lr": 4.941263534783482e-05, "epoch": 3.25, "percentage": 16.25, "elapsed_time": "0:00:57", "remaining_time": "0:04:54", "throughput": 1557.99, "total_tokens": 89072} |
| {"current_steps": 590, "total_steps": 3600, "loss": 0.2316, "lr": 4.9386230075396964e-05, "epoch": 3.2777777777777777, "percentage": 16.39, "elapsed_time": "0:00:57", "remaining_time": "0:04:53", "throughput": 1560.87, "total_tokens": 89872} |
| {"current_steps": 595, "total_steps": 3600, "loss": 0.2297, "lr": 4.9359251619630886e-05, "epoch": 3.3055555555555554, "percentage": 16.53, "elapsed_time": "0:00:57", "remaining_time": "0:04:52", "throughput": 1563.15, "total_tokens": 90640} |
| {"current_steps": 600, "total_steps": 3600, "loss": 0.2317, "lr": 4.933170061464858e-05, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:58", "remaining_time": "0:04:51", "throughput": 1564.87, "total_tokens": 91376} |
| {"current_steps": 605, "total_steps": 3600, "loss": 0.2411, "lr": 4.930357770801947e-05, "epoch": 3.361111111111111, "percentage": 16.81, "elapsed_time": "0:00:58", "remaining_time": "0:04:51", "throughput": 1566.83, "total_tokens": 92128} |
| {"current_steps": 610, "total_steps": 3600, "loss": 0.2321, "lr": 4.9274883560755156e-05, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:00:59", "remaining_time": "0:04:50", "throughput": 1569.31, "total_tokens": 92912} |
| {"current_steps": 615, "total_steps": 3600, "loss": 0.2217, "lr": 4.924561884729391e-05, "epoch": 3.4166666666666665, "percentage": 17.08, "elapsed_time": "0:00:59", "remaining_time": "0:04:49", "throughput": 1570.52, "total_tokens": 93632} |
| {"current_steps": 620, "total_steps": 3600, "loss": 0.2442, "lr": 4.921578425548482e-05, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:01:00", "remaining_time": "0:04:48", "throughput": 1572.39, "total_tokens": 94384} |
| {"current_steps": 625, "total_steps": 3600, "loss": 0.2176, "lr": 4.9185380486571595e-05, "epoch": 3.4722222222222223, "percentage": 17.36, "elapsed_time": "0:01:00", "remaining_time": "0:04:47", "throughput": 1574.22, "total_tokens": 95152} |
| {"current_steps": 630, "total_steps": 3600, "loss": 0.2196, "lr": 4.915440825517612e-05, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:01:00", "remaining_time": "0:04:46", "throughput": 1576.32, "total_tokens": 95920} |
| {"current_steps": 635, "total_steps": 3600, "loss": 0.2427, "lr": 4.912286828928162e-05, "epoch": 3.5277777777777777, "percentage": 17.64, "elapsed_time": "0:01:01", "remaining_time": "0:04:46", "throughput": 1578.59, "total_tokens": 96704} |
| {"current_steps": 640, "total_steps": 3600, "loss": 0.2455, "lr": 4.909076133021557e-05, "epoch": 3.5555555555555554, "percentage": 17.78, "elapsed_time": "0:01:01", "remaining_time": "0:04:45", "throughput": 1578.51, "total_tokens": 97456} |
| {"current_steps": 645, "total_steps": 3600, "loss": 0.2193, "lr": 4.9058088132632306e-05, "epoch": 3.5833333333333335, "percentage": 17.92, "elapsed_time": "0:01:02", "remaining_time": "0:04:44", "throughput": 1580.04, "total_tokens": 98192} |
| {"current_steps": 650, "total_steps": 3600, "loss": 0.2242, "lr": 4.9024849464495215e-05, "epoch": 3.611111111111111, "percentage": 18.06, "elapsed_time": "0:01:02", "remaining_time": "0:04:43", "throughput": 1581.59, "total_tokens": 98944} |
| {"current_steps": 655, "total_steps": 3600, "loss": 0.2392, "lr": 4.8991046107058735e-05, "epoch": 3.638888888888889, "percentage": 18.19, "elapsed_time": "0:01:02", "remaining_time": "0:04:43", "throughput": 1583.16, "total_tokens": 99712} |
| {"current_steps": 660, "total_steps": 3600, "loss": 0.2488, "lr": 4.895667885484997e-05, "epoch": 3.6666666666666665, "percentage": 18.33, "elapsed_time": "0:01:03", "remaining_time": "0:04:42", "throughput": 1585.23, "total_tokens": 100496} |
| {"current_steps": 665, "total_steps": 3600, "loss": 0.2423, "lr": 4.892174851565004e-05, "epoch": 3.6944444444444446, "percentage": 18.47, "elapsed_time": "0:01:03", "remaining_time": "0:04:41", "throughput": 1586.39, "total_tokens": 101232} |
| {"current_steps": 670, "total_steps": 3600, "loss": 0.2215, "lr": 4.8886255910475054e-05, "epoch": 3.7222222222222223, "percentage": 18.61, "elapsed_time": "0:01:04", "remaining_time": "0:04:40", "throughput": 1588.06, "total_tokens": 102000} |
| {"current_steps": 675, "total_steps": 3600, "loss": 0.2264, "lr": 4.885020187355687e-05, "epoch": 3.75, "percentage": 18.75, "elapsed_time": "0:01:04", "remaining_time": "0:04:40", "throughput": 1589.88, "total_tokens": 102784} |
| {"current_steps": 680, "total_steps": 3600, "loss": 0.2685, "lr": 4.881358725232342e-05, "epoch": 3.7777777777777777, "percentage": 18.89, "elapsed_time": "0:01:05", "remaining_time": "0:04:39", "throughput": 1592.09, "total_tokens": 103584} |
| {"current_steps": 685, "total_steps": 3600, "loss": 0.247, "lr": 4.877641290737884e-05, "epoch": 3.8055555555555554, "percentage": 19.03, "elapsed_time": "0:01:05", "remaining_time": "0:04:38", "throughput": 1594.2, "total_tokens": 104368} |
| {"current_steps": 690, "total_steps": 3600, "loss": 0.2391, "lr": 4.873867971248324e-05, "epoch": 3.8333333333333335, "percentage": 19.17, "elapsed_time": "0:01:05", "remaining_time": "0:04:37", "throughput": 1595.56, "total_tokens": 105104} |
| {"current_steps": 695, "total_steps": 3600, "loss": 0.2354, "lr": 4.870038855453213e-05, "epoch": 3.861111111111111, "percentage": 19.31, "elapsed_time": "0:01:06", "remaining_time": "0:04:37", "throughput": 1597.16, "total_tokens": 105856} |
| {"current_steps": 700, "total_steps": 3600, "loss": 0.2259, "lr": 4.866154033353561e-05, "epoch": 3.888888888888889, "percentage": 19.44, "elapsed_time": "0:01:06", "remaining_time": "0:04:36", "throughput": 1598.98, "total_tokens": 106624} |
| {"current_steps": 705, "total_steps": 3600, "loss": 0.2426, "lr": 4.86221359625972e-05, "epoch": 3.9166666666666665, "percentage": 19.58, "elapsed_time": "0:01:07", "remaining_time": "0:04:35", "throughput": 1601.06, "total_tokens": 107408} |
| {"current_steps": 710, "total_steps": 3600, "loss": 0.2138, "lr": 4.858217636789241e-05, "epoch": 3.9444444444444446, "percentage": 19.72, "elapsed_time": "0:01:07", "remaining_time": "0:04:34", "throughput": 1602.66, "total_tokens": 108160} |
| {"current_steps": 715, "total_steps": 3600, "loss": 0.2231, "lr": 4.854166248864689e-05, "epoch": 3.9722222222222223, "percentage": 19.86, "elapsed_time": "0:01:07", "remaining_time": "0:04:33", "throughput": 1604.66, "total_tokens": 108928} |
| {"current_steps": 720, "total_steps": 3600, "loss": 0.2126, "lr": 4.850059527711444e-05, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:08", "remaining_time": "0:04:33", "throughput": 1604.89, "total_tokens": 109680} |
| {"current_steps": 720, "total_steps": 3600, "eval_loss": 0.24558468163013458, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:09", "remaining_time": "0:04:36", "throughput": 1585.38, "total_tokens": 109680} |
| {"current_steps": 725, "total_steps": 3600, "loss": 0.2617, "lr": 4.84589756985546e-05, "epoch": 4.027777777777778, "percentage": 20.14, "elapsed_time": "0:01:11", "remaining_time": "0:04:42", "throughput": 1552.71, "total_tokens": 110448} |
| {"current_steps": 730, "total_steps": 3600, "loss": 0.2522, "lr": 4.8416804731209945e-05, "epoch": 4.055555555555555, "percentage": 20.28, "elapsed_time": "0:01:11", "remaining_time": "0:04:41", "throughput": 1554.1, "total_tokens": 111216} |
| {"current_steps": 735, "total_steps": 3600, "loss": 0.2237, "lr": 4.8374083366283096e-05, "epoch": 4.083333333333333, "percentage": 20.42, "elapsed_time": "0:01:11", "remaining_time": "0:04:40", "throughput": 1555.73, "total_tokens": 111968} |
| {"current_steps": 740, "total_steps": 3600, "loss": 0.2299, "lr": 4.833081260791345e-05, "epoch": 4.111111111111111, "percentage": 20.56, "elapsed_time": "0:01:12", "remaining_time": "0:04:39", "throughput": 1557.45, "total_tokens": 112736} |
| {"current_steps": 745, "total_steps": 3600, "loss": 0.2297, "lr": 4.828699347315356e-05, "epoch": 4.138888888888889, "percentage": 20.69, "elapsed_time": "0:01:12", "remaining_time": "0:04:38", "throughput": 1558.9, "total_tokens": 113488} |
| {"current_steps": 750, "total_steps": 3600, "loss": 0.24, "lr": 4.82426269919452e-05, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:01:13", "remaining_time": "0:04:38", "throughput": 1560.19, "total_tokens": 114256} |
| {"current_steps": 755, "total_steps": 3600, "loss": 0.2413, "lr": 4.8197714207095205e-05, "epoch": 4.194444444444445, "percentage": 20.97, "elapsed_time": "0:01:13", "remaining_time": "0:04:37", "throughput": 1561.83, "total_tokens": 115024} |
| {"current_steps": 760, "total_steps": 3600, "loss": 0.216, "lr": 4.815225617425095e-05, "epoch": 4.222222222222222, "percentage": 21.11, "elapsed_time": "0:01:14", "remaining_time": "0:04:36", "throughput": 1563.33, "total_tokens": 115776} |
| {"current_steps": 765, "total_steps": 3600, "loss": 0.2355, "lr": 4.8106253961875506e-05, "epoch": 4.25, "percentage": 21.25, "elapsed_time": "0:01:14", "remaining_time": "0:04:35", "throughput": 1565.31, "total_tokens": 116560} |
| {"current_steps": 770, "total_steps": 3600, "loss": 0.2378, "lr": 4.805970865122257e-05, "epoch": 4.277777777777778, "percentage": 21.39, "elapsed_time": "0:01:14", "remaining_time": "0:04:35", "throughput": 1567.31, "total_tokens": 117344} |
| {"current_steps": 775, "total_steps": 3600, "loss": 0.2219, "lr": 4.8012621336311016e-05, "epoch": 4.305555555555555, "percentage": 21.53, "elapsed_time": "0:01:15", "remaining_time": "0:04:34", "throughput": 1568.44, "total_tokens": 118080} |
| {"current_steps": 780, "total_steps": 3600, "loss": 0.2239, "lr": 4.7964993123899195e-05, "epoch": 4.333333333333333, "percentage": 21.67, "elapsed_time": "0:01:15", "remaining_time": "0:04:33", "throughput": 1570.13, "total_tokens": 118848} |
| {"current_steps": 785, "total_steps": 3600, "loss": 0.256, "lr": 4.791682513345892e-05, "epoch": 4.361111111111111, "percentage": 21.81, "elapsed_time": "0:01:16", "remaining_time": "0:04:32", "throughput": 1572.04, "total_tokens": 119632} |
| {"current_steps": 790, "total_steps": 3600, "loss": 0.2624, "lr": 4.786811849714918e-05, "epoch": 4.388888888888889, "percentage": 21.94, "elapsed_time": "0:01:16", "remaining_time": "0:04:32", "throughput": 1573.31, "total_tokens": 120368} |
| {"current_steps": 795, "total_steps": 3600, "loss": 0.2302, "lr": 4.781887435978947e-05, "epoch": 4.416666666666667, "percentage": 22.08, "elapsed_time": "0:01:16", "remaining_time": "0:04:31", "throughput": 1574.95, "total_tokens": 121136} |
| {"current_steps": 800, "total_steps": 3600, "loss": 0.221, "lr": 4.776909387883292e-05, "epoch": 4.444444444444445, "percentage": 22.22, "elapsed_time": "0:01:17", "remaining_time": "0:04:30", "throughput": 1576.37, "total_tokens": 121888} |
| {"current_steps": 805, "total_steps": 3600, "loss": 0.2565, "lr": 4.771877822433911e-05, "epoch": 4.472222222222222, "percentage": 22.36, "elapsed_time": "0:01:17", "remaining_time": "0:04:29", "throughput": 1577.88, "total_tokens": 122656} |
| {"current_steps": 810, "total_steps": 3600, "loss": 0.2315, "lr": 4.766792857894652e-05, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:01:18", "remaining_time": "0:04:29", "throughput": 1579.27, "total_tokens": 123408} |
| {"current_steps": 815, "total_steps": 3600, "loss": 0.2369, "lr": 4.761654613784477e-05, "epoch": 4.527777777777778, "percentage": 22.64, "elapsed_time": "0:01:18", "remaining_time": "0:04:28", "throughput": 1580.56, "total_tokens": 124160} |
| {"current_steps": 820, "total_steps": 3600, "loss": 0.2443, "lr": 4.756463210874652e-05, "epoch": 4.555555555555555, "percentage": 22.78, "elapsed_time": "0:01:18", "remaining_time": "0:04:27", "throughput": 1582.11, "total_tokens": 124928} |
| {"current_steps": 825, "total_steps": 3600, "loss": 0.2224, "lr": 4.751218771185906e-05, "epoch": 4.583333333333333, "percentage": 22.92, "elapsed_time": "0:01:19", "remaining_time": "0:04:26", "throughput": 1583.61, "total_tokens": 125696} |
| {"current_steps": 830, "total_steps": 3600, "loss": 0.2312, "lr": 4.745921417985566e-05, "epoch": 4.611111111111111, "percentage": 23.06, "elapsed_time": "0:01:19", "remaining_time": "0:04:26", "throughput": 1584.9, "total_tokens": 126448} |
| {"current_steps": 835, "total_steps": 3600, "loss": 0.2216, "lr": 4.740571275784659e-05, "epoch": 4.638888888888889, "percentage": 23.19, "elapsed_time": "0:01:20", "remaining_time": "0:04:25", "throughput": 1585.81, "total_tokens": 127184} |
| {"current_steps": 840, "total_steps": 3600, "loss": 0.2436, "lr": 4.735168470334984e-05, "epoch": 4.666666666666667, "percentage": 23.33, "elapsed_time": "0:01:20", "remaining_time": "0:04:24", "throughput": 1587.0, "total_tokens": 127936} |
| {"current_steps": 845, "total_steps": 3600, "loss": 0.2327, "lr": 4.729713128626158e-05, "epoch": 4.694444444444445, "percentage": 23.47, "elapsed_time": "0:01:21", "remaining_time": "0:04:24", "throughput": 1588.27, "total_tokens": 128688} |
| {"current_steps": 850, "total_steps": 3600, "loss": 0.2496, "lr": 4.72420537888263e-05, "epoch": 4.722222222222222, "percentage": 23.61, "elapsed_time": "0:01:21", "remaining_time": "0:04:23", "throughput": 1589.54, "total_tokens": 129456} |
| {"current_steps": 855, "total_steps": 3600, "loss": 0.2261, "lr": 4.7186453505606676e-05, "epoch": 4.75, "percentage": 23.75, "elapsed_time": "0:01:21", "remaining_time": "0:04:22", "throughput": 1590.34, "total_tokens": 130192} |
| {"current_steps": 860, "total_steps": 3600, "loss": 0.2407, "lr": 4.713033174345314e-05, "epoch": 4.777777777777778, "percentage": 23.89, "elapsed_time": "0:01:22", "remaining_time": "0:04:22", "throughput": 1591.91, "total_tokens": 130976} |
| {"current_steps": 865, "total_steps": 3600, "loss": 0.2289, "lr": 4.707368982147318e-05, "epoch": 4.805555555555555, "percentage": 24.03, "elapsed_time": "0:01:22", "remaining_time": "0:04:21", "throughput": 1593.14, "total_tokens": 131728} |
| {"current_steps": 870, "total_steps": 3600, "loss": 0.2216, "lr": 4.701652907100029e-05, "epoch": 4.833333333333333, "percentage": 24.17, "elapsed_time": "0:01:23", "remaining_time": "0:04:20", "throughput": 1594.1, "total_tokens": 132480} |
| {"current_steps": 875, "total_steps": 3600, "loss": 0.2387, "lr": 4.695885083556275e-05, "epoch": 4.861111111111111, "percentage": 24.31, "elapsed_time": "0:01:23", "remaining_time": "0:04:20", "throughput": 1595.57, "total_tokens": 133264} |
| {"current_steps": 880, "total_steps": 3600, "loss": 0.2355, "lr": 4.6900656470851964e-05, "epoch": 4.888888888888889, "percentage": 24.44, "elapsed_time": "0:01:23", "remaining_time": "0:04:19", "throughput": 1596.78, "total_tokens": 134032} |
| {"current_steps": 885, "total_steps": 3600, "loss": 0.2281, "lr": 4.684194734469067e-05, "epoch": 4.916666666666667, "percentage": 24.58, "elapsed_time": "0:01:24", "remaining_time": "0:04:18", "throughput": 1598.17, "total_tokens": 134800} |
| {"current_steps": 890, "total_steps": 3600, "loss": 0.2264, "lr": 4.678272483700074e-05, "epoch": 4.944444444444445, "percentage": 24.72, "elapsed_time": "0:01:24", "remaining_time": "0:04:18", "throughput": 1599.16, "total_tokens": 135536} |
| {"current_steps": 895, "total_steps": 3600, "loss": 0.2126, "lr": 4.672299033977076e-05, "epoch": 4.972222222222222, "percentage": 24.86, "elapsed_time": "0:01:25", "remaining_time": "0:04:17", "throughput": 1600.32, "total_tokens": 136272} |
| {"current_steps": 900, "total_steps": 3600, "loss": 0.2399, "lr": 4.6662745257023325e-05, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:25", "remaining_time": "0:04:16", "throughput": 1600.6, "total_tokens": 137040} |
| {"current_steps": 900, "total_steps": 3600, "eval_loss": 0.24199585616588593, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:26", "remaining_time": "0:04:19", "throughput": 1584.94, "total_tokens": 137040} |
| {"current_steps": 905, "total_steps": 3600, "loss": 0.2633, "lr": 4.660199100478202e-05, "epoch": 5.027777777777778, "percentage": 25.14, "elapsed_time": "0:01:28", "remaining_time": "0:04:23", "throughput": 1557.88, "total_tokens": 137808} |
| {"current_steps": 910, "total_steps": 3600, "loss": 0.2342, "lr": 4.6540729011038146e-05, "epoch": 5.055555555555555, "percentage": 25.28, "elapsed_time": "0:01:28", "remaining_time": "0:04:22", "throughput": 1558.94, "total_tokens": 138560} |
| {"current_steps": 915, "total_steps": 3600, "loss": 0.2193, "lr": 4.6478960715717176e-05, "epoch": 5.083333333333333, "percentage": 25.42, "elapsed_time": "0:01:29", "remaining_time": "0:04:22", "throughput": 1560.1, "total_tokens": 139312} |
| {"current_steps": 920, "total_steps": 3600, "loss": 0.2387, "lr": 4.641668757064486e-05, "epoch": 5.111111111111111, "percentage": 25.56, "elapsed_time": "0:01:29", "remaining_time": "0:04:21", "throughput": 1560.97, "total_tokens": 140032} |
| {"current_steps": 925, "total_steps": 3600, "loss": 0.2351, "lr": 4.6353911039513145e-05, "epoch": 5.138888888888889, "percentage": 25.69, "elapsed_time": "0:01:30", "remaining_time": "0:04:20", "throughput": 1562.0, "total_tokens": 140768} |
| {"current_steps": 930, "total_steps": 3600, "loss": 0.2339, "lr": 4.6290632597845755e-05, "epoch": 5.166666666666667, "percentage": 25.83, "elapsed_time": "0:01:30", "remaining_time": "0:04:19", "throughput": 1563.41, "total_tokens": 141536} |
| {"current_steps": 935, "total_steps": 3600, "loss": 0.2303, "lr": 4.622685373296353e-05, "epoch": 5.194444444444445, "percentage": 25.97, "elapsed_time": "0:01:30", "remaining_time": "0:04:19", "throughput": 1565.0, "total_tokens": 142320} |
| {"current_steps": 940, "total_steps": 3600, "loss": 0.2256, "lr": 4.61625759439494e-05, "epoch": 5.222222222222222, "percentage": 26.11, "elapsed_time": "0:01:31", "remaining_time": "0:04:18", "throughput": 1566.24, "total_tokens": 143088} |
| {"current_steps": 945, "total_steps": 3600, "loss": 0.2297, "lr": 4.609780074161327e-05, "epoch": 5.25, "percentage": 26.25, "elapsed_time": "0:01:31", "remaining_time": "0:04:17", "throughput": 1567.32, "total_tokens": 143840} |
| {"current_steps": 950, "total_steps": 3600, "loss": 0.2214, "lr": 4.603252964845638e-05, "epoch": 5.277777777777778, "percentage": 26.39, "elapsed_time": "0:01:32", "remaining_time": "0:04:17", "throughput": 1566.92, "total_tokens": 144592} |
| {"current_steps": 955, "total_steps": 3600, "loss": 0.2151, "lr": 4.5966764198635606e-05, "epoch": 5.305555555555555, "percentage": 26.53, "elapsed_time": "0:01:32", "remaining_time": "0:04:16", "throughput": 1567.92, "total_tokens": 145328} |
| {"current_steps": 960, "total_steps": 3600, "loss": 0.185, "lr": 4.590050593792736e-05, "epoch": 5.333333333333333, "percentage": 26.67, "elapsed_time": "0:01:33", "remaining_time": "0:04:16", "throughput": 1569.0, "total_tokens": 146080} |
| {"current_steps": 965, "total_steps": 3600, "loss": 0.2611, "lr": 4.583375642369129e-05, "epoch": 5.361111111111111, "percentage": 26.81, "elapsed_time": "0:01:33", "remaining_time": "0:04:15", "throughput": 1570.31, "total_tokens": 146864} |
| {"current_steps": 970, "total_steps": 3600, "loss": 0.274, "lr": 4.5766517224833637e-05, "epoch": 5.388888888888889, "percentage": 26.94, "elapsed_time": "0:01:33", "remaining_time": "0:04:14", "throughput": 1571.54, "total_tokens": 147632} |
| {"current_steps": 975, "total_steps": 3600, "loss": 0.2228, "lr": 4.569878992177039e-05, "epoch": 5.416666666666667, "percentage": 27.08, "elapsed_time": "0:01:34", "remaining_time": "0:04:14", "throughput": 1573.07, "total_tokens": 148416} |
| {"current_steps": 980, "total_steps": 3600, "loss": 0.2225, "lr": 4.5630576106390114e-05, "epoch": 5.444444444444445, "percentage": 27.22, "elapsed_time": "0:01:34", "remaining_time": "0:04:13", "throughput": 1574.02, "total_tokens": 149152} |
| {"current_steps": 985, "total_steps": 3600, "loss": 0.2229, "lr": 4.556187738201656e-05, "epoch": 5.472222222222222, "percentage": 27.36, "elapsed_time": "0:01:35", "remaining_time": "0:04:12", "throughput": 1575.3, "total_tokens": 149920} |
| {"current_steps": 990, "total_steps": 3600, "loss": 0.2373, "lr": 4.549269536337095e-05, "epoch": 5.5, "percentage": 27.5, "elapsed_time": "0:01:35", "remaining_time": "0:04:12", "throughput": 1576.27, "total_tokens": 150672} |
| {"current_steps": 995, "total_steps": 3600, "loss": 0.2267, "lr": 4.5423031676534065e-05, "epoch": 5.527777777777778, "percentage": 27.64, "elapsed_time": "0:01:35", "remaining_time": "0:04:11", "throughput": 1577.38, "total_tokens": 151424} |
| {"current_steps": 1000, "total_steps": 3600, "loss": 0.2341, "lr": 4.535288795890798e-05, "epoch": 5.555555555555555, "percentage": 27.78, "elapsed_time": "0:01:36", "remaining_time": "0:04:10", "throughput": 1578.36, "total_tokens": 152160} |
| {"current_steps": 1005, "total_steps": 3600, "loss": 0.2499, "lr": 4.528226585917761e-05, "epoch": 5.583333333333333, "percentage": 27.92, "elapsed_time": "0:01:36", "remaining_time": "0:04:09", "throughput": 1580.02, "total_tokens": 152960} |
| {"current_steps": 1010, "total_steps": 3600, "loss": 0.2403, "lr": 4.521116703727193e-05, "epoch": 5.611111111111111, "percentage": 28.06, "elapsed_time": "0:01:37", "remaining_time": "0:04:09", "throughput": 1581.34, "total_tokens": 153728} |
| {"current_steps": 1015, "total_steps": 3600, "loss": 0.2364, "lr": 4.5139593164324986e-05, "epoch": 5.638888888888889, "percentage": 28.19, "elapsed_time": "0:01:37", "remaining_time": "0:04:08", "throughput": 1582.64, "total_tokens": 154496} |
| {"current_steps": 1020, "total_steps": 3600, "loss": 0.2248, "lr": 4.506754592263662e-05, "epoch": 5.666666666666667, "percentage": 28.33, "elapsed_time": "0:01:38", "remaining_time": "0:04:07", "throughput": 1584.07, "total_tokens": 155280} |
| {"current_steps": 1025, "total_steps": 3600, "loss": 0.2413, "lr": 4.49950270056329e-05, "epoch": 5.694444444444445, "percentage": 28.47, "elapsed_time": "0:01:38", "remaining_time": "0:04:07", "throughput": 1585.48, "total_tokens": 156064} |
| {"current_steps": 1030, "total_steps": 3600, "loss": 0.2304, "lr": 4.4922038117826334e-05, "epoch": 5.722222222222222, "percentage": 28.61, "elapsed_time": "0:01:38", "remaining_time": "0:04:06", "throughput": 1586.48, "total_tokens": 156816} |
| {"current_steps": 1035, "total_steps": 3600, "loss": 0.2227, "lr": 4.48485809747758e-05, "epoch": 5.75, "percentage": 28.75, "elapsed_time": "0:01:39", "remaining_time": "0:04:05", "throughput": 1587.57, "total_tokens": 157568} |
| {"current_steps": 1040, "total_steps": 3600, "loss": 0.2518, "lr": 4.477465730304624e-05, "epoch": 5.777777777777778, "percentage": 28.89, "elapsed_time": "0:01:39", "remaining_time": "0:04:05", "throughput": 1588.56, "total_tokens": 158320} |
| {"current_steps": 1045, "total_steps": 3600, "loss": 0.2322, "lr": 4.4700268840168045e-05, "epoch": 5.805555555555555, "percentage": 29.03, "elapsed_time": "0:01:40", "remaining_time": "0:04:04", "throughput": 1589.35, "total_tokens": 159056} |
| {"current_steps": 1050, "total_steps": 3600, "loss": 0.2339, "lr": 4.462541733459628e-05, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:01:40", "remaining_time": "0:04:04", "throughput": 1590.12, "total_tokens": 159792} |
| {"current_steps": 1055, "total_steps": 3600, "loss": 0.2318, "lr": 4.455010454566947e-05, "epoch": 5.861111111111111, "percentage": 29.31, "elapsed_time": "0:01:40", "remaining_time": "0:04:03", "throughput": 1591.12, "total_tokens": 160544} |
| {"current_steps": 1060, "total_steps": 3600, "loss": 0.2359, "lr": 4.447433224356839e-05, "epoch": 5.888888888888889, "percentage": 29.44, "elapsed_time": "0:01:41", "remaining_time": "0:04:02", "throughput": 1592.11, "total_tokens": 161296} |
| {"current_steps": 1065, "total_steps": 3600, "loss": 0.2316, "lr": 4.439810220927436e-05, "epoch": 5.916666666666667, "percentage": 29.58, "elapsed_time": "0:01:41", "remaining_time": "0:04:02", "throughput": 1593.12, "total_tokens": 162048} |
| {"current_steps": 1070, "total_steps": 3600, "loss": 0.2215, "lr": 4.432141623452743e-05, "epoch": 5.944444444444445, "percentage": 29.72, "elapsed_time": "0:01:42", "remaining_time": "0:04:01", "throughput": 1594.22, "total_tokens": 162816} |
| {"current_steps": 1075, "total_steps": 3600, "loss": 0.2298, "lr": 4.4244276121784195e-05, "epoch": 5.972222222222222, "percentage": 29.86, "elapsed_time": "0:01:42", "remaining_time": "0:04:00", "throughput": 1595.46, "total_tokens": 163584} |
| {"current_steps": 1080, "total_steps": 3600, "loss": 0.2275, "lr": 4.416668368417556e-05, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:42", "remaining_time": "0:04:00", "throughput": 1595.58, "total_tokens": 164336} |
| {"current_steps": 1080, "total_steps": 3600, "eval_loss": 0.2374120056629181, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:43", "remaining_time": "0:04:02", "throughput": 1582.58, "total_tokens": 164336} |
| {"current_steps": 1085, "total_steps": 3600, "loss": 0.2321, "lr": 4.408864074546401e-05, "epoch": 6.027777777777778, "percentage": 30.14, "elapsed_time": "0:01:45", "remaining_time": "0:04:04", "throughput": 1563.36, "total_tokens": 165088} |
| {"current_steps": 1090, "total_steps": 3600, "loss": 0.2316, "lr": 4.401014914000078e-05, "epoch": 6.055555555555555, "percentage": 30.28, "elapsed_time": "0:01:46", "remaining_time": "0:04:04", "throughput": 1564.35, "total_tokens": 165856} |
| {"current_steps": 1095, "total_steps": 3600, "loss": 0.2222, "lr": 4.393121071268274e-05, "epoch": 6.083333333333333, "percentage": 30.42, "elapsed_time": "0:01:46", "remaining_time": "0:04:03", "throughput": 1565.56, "total_tokens": 166624} |
| {"current_steps": 1100, "total_steps": 3600, "loss": 0.2295, "lr": 4.3851827318909036e-05, "epoch": 6.111111111111111, "percentage": 30.56, "elapsed_time": "0:01:46", "remaining_time": "0:04:02", "throughput": 1566.42, "total_tokens": 167360} |
| {"current_steps": 1105, "total_steps": 3600, "loss": 0.233, "lr": 4.377200082453749e-05, "epoch": 6.138888888888889, "percentage": 30.69, "elapsed_time": "0:01:47", "remaining_time": "0:04:02", "throughput": 1567.43, "total_tokens": 168112} |
| {"current_steps": 1110, "total_steps": 3600, "loss": 0.2293, "lr": 4.36917331058407e-05, "epoch": 6.166666666666667, "percentage": 30.83, "elapsed_time": "0:01:47", "remaining_time": "0:04:01", "throughput": 1568.39, "total_tokens": 168864} |
| {"current_steps": 1115, "total_steps": 3600, "loss": 0.2155, "lr": 4.361102604946201e-05, "epoch": 6.194444444444445, "percentage": 30.97, "elapsed_time": "0:01:48", "remaining_time": "0:04:00", "throughput": 1569.43, "total_tokens": 169616} |
| {"current_steps": 1120, "total_steps": 3600, "loss": 0.2293, "lr": 4.3529881552371096e-05, "epoch": 6.222222222222222, "percentage": 31.11, "elapsed_time": "0:01:48", "remaining_time": "0:04:00", "throughput": 1570.87, "total_tokens": 170416} |
| {"current_steps": 1125, "total_steps": 3600, "loss": 0.2436, "lr": 4.344830152181941e-05, "epoch": 6.25, "percentage": 31.25, "elapsed_time": "0:01:48", "remaining_time": "0:03:59", "throughput": 1571.94, "total_tokens": 171184} |
| {"current_steps": 1130, "total_steps": 3600, "loss": 0.2155, "lr": 4.336628787529538e-05, "epoch": 6.277777777777778, "percentage": 31.39, "elapsed_time": "0:01:49", "remaining_time": "0:03:58", "throughput": 1572.78, "total_tokens": 171936} |
| {"current_steps": 1135, "total_steps": 3600, "loss": 0.2173, "lr": 4.3283842540479264e-05, "epoch": 6.305555555555555, "percentage": 31.53, "elapsed_time": "0:01:49", "remaining_time": "0:03:58", "throughput": 1573.76, "total_tokens": 172688} |
| {"current_steps": 1140, "total_steps": 3600, "loss": 0.2273, "lr": 4.320096745519793e-05, "epoch": 6.333333333333333, "percentage": 31.67, "elapsed_time": "0:01:50", "remaining_time": "0:03:57", "throughput": 1574.9, "total_tokens": 173456} |
| {"current_steps": 1145, "total_steps": 3600, "loss": 0.2307, "lr": 4.3117664567379237e-05, "epoch": 6.361111111111111, "percentage": 31.81, "elapsed_time": "0:01:50", "remaining_time": "0:03:57", "throughput": 1575.53, "total_tokens": 174176} |
| {"current_steps": 1150, "total_steps": 3600, "loss": 0.2242, "lr": 4.303393583500628e-05, "epoch": 6.388888888888889, "percentage": 31.94, "elapsed_time": "0:01:50", "remaining_time": "0:03:56", "throughput": 1576.26, "total_tokens": 174912} |
| {"current_steps": 1155, "total_steps": 3600, "loss": 0.2307, "lr": 4.2949783226071406e-05, "epoch": 6.416666666666667, "percentage": 32.08, "elapsed_time": "0:01:51", "remaining_time": "0:03:55", "throughput": 1577.32, "total_tokens": 175680} |
| {"current_steps": 1160, "total_steps": 3600, "loss": 0.2188, "lr": 4.286520871852987e-05, "epoch": 6.444444444444445, "percentage": 32.22, "elapsed_time": "0:01:51", "remaining_time": "0:03:55", "throughput": 1578.43, "total_tokens": 176448} |
| {"current_steps": 1165, "total_steps": 3600, "loss": 0.2196, "lr": 4.278021430025343e-05, "epoch": 6.472222222222222, "percentage": 32.36, "elapsed_time": "0:01:52", "remaining_time": "0:03:54", "throughput": 1579.54, "total_tokens": 177216} |
| {"current_steps": 1170, "total_steps": 3600, "loss": 0.2769, "lr": 4.2694801968983566e-05, "epoch": 6.5, "percentage": 32.5, "elapsed_time": "0:01:52", "remaining_time": "0:03:53", "throughput": 1580.43, "total_tokens": 177968} |
| {"current_steps": 1175, "total_steps": 3600, "loss": 0.215, "lr": 4.260897373228456e-05, "epoch": 6.527777777777778, "percentage": 32.64, "elapsed_time": "0:01:53", "remaining_time": "0:03:53", "throughput": 1581.83, "total_tokens": 178768} |
| {"current_steps": 1180, "total_steps": 3600, "loss": 0.2254, "lr": 4.2522731607496275e-05, "epoch": 6.555555555555555, "percentage": 32.78, "elapsed_time": "0:01:53", "remaining_time": "0:03:52", "throughput": 1582.45, "total_tokens": 179488} |
| {"current_steps": 1185, "total_steps": 3600, "loss": 0.2411, "lr": 4.2436077621686786e-05, "epoch": 6.583333333333333, "percentage": 32.92, "elapsed_time": "0:01:53", "remaining_time": "0:03:51", "throughput": 1583.34, "total_tokens": 180240} |
| {"current_steps": 1190, "total_steps": 3600, "loss": 0.2452, "lr": 4.234901381160469e-05, "epoch": 6.611111111111111, "percentage": 33.06, "elapsed_time": "0:01:54", "remaining_time": "0:03:51", "throughput": 1584.08, "total_tokens": 180992} |
| {"current_steps": 1195, "total_steps": 3600, "loss": 0.2326, "lr": 4.226154222363124e-05, "epoch": 6.638888888888889, "percentage": 33.19, "elapsed_time": "0:01:54", "remaining_time": "0:03:50", "throughput": 1585.07, "total_tokens": 181760} |
| {"current_steps": 1200, "total_steps": 3600, "loss": 0.2267, "lr": 4.21736649137323e-05, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:55", "remaining_time": "0:03:50", "throughput": 1586.12, "total_tokens": 182528} |
| {"current_steps": 1205, "total_steps": 3600, "loss": 0.2166, "lr": 4.208538394740993e-05, "epoch": 6.694444444444445, "percentage": 33.47, "elapsed_time": "0:01:55", "remaining_time": "0:03:49", "throughput": 1587.58, "total_tokens": 183344} |
| {"current_steps": 1210, "total_steps": 3600, "loss": 0.2184, "lr": 4.199670139965393e-05, "epoch": 6.722222222222222, "percentage": 33.61, "elapsed_time": "0:01:55", "remaining_time": "0:03:48", "throughput": 1588.48, "total_tokens": 184096} |
| {"current_steps": 1215, "total_steps": 3600, "loss": 0.2318, "lr": 4.1907619354892965e-05, "epoch": 6.75, "percentage": 33.75, "elapsed_time": "0:01:56", "remaining_time": "0:03:48", "throughput": 1589.19, "total_tokens": 184832} |
| {"current_steps": 1220, "total_steps": 3600, "loss": 0.2453, "lr": 4.1818139906945694e-05, "epoch": 6.777777777777778, "percentage": 33.89, "elapsed_time": "0:01:56", "remaining_time": "0:03:47", "throughput": 1590.2, "total_tokens": 185600} |
| {"current_steps": 1225, "total_steps": 3600, "loss": 0.225, "lr": 4.172826515897146e-05, "epoch": 6.805555555555555, "percentage": 34.03, "elapsed_time": "0:01:57", "remaining_time": "0:03:47", "throughput": 1590.9, "total_tokens": 186336} |
| {"current_steps": 1230, "total_steps": 3600, "loss": 0.2404, "lr": 4.163799722342089e-05, "epoch": 6.833333333333333, "percentage": 34.17, "elapsed_time": "0:01:57", "remaining_time": "0:03:46", "throughput": 1592.03, "total_tokens": 187120} |
| {"current_steps": 1235, "total_steps": 3600, "loss": 0.2164, "lr": 4.1547338221986266e-05, "epoch": 6.861111111111111, "percentage": 34.31, "elapsed_time": "0:01:57", "remaining_time": "0:03:45", "throughput": 1592.77, "total_tokens": 187872} |
| {"current_steps": 1240, "total_steps": 3600, "loss": 0.2112, "lr": 4.1456290285551596e-05, "epoch": 6.888888888888889, "percentage": 34.44, "elapsed_time": "0:01:58", "remaining_time": "0:03:45", "throughput": 1593.77, "total_tokens": 188640} |
| {"current_steps": 1245, "total_steps": 3600, "loss": 0.2412, "lr": 4.13648555541426e-05, "epoch": 6.916666666666667, "percentage": 34.58, "elapsed_time": "0:01:58", "remaining_time": "0:03:44", "throughput": 1594.91, "total_tokens": 189424} |
| {"current_steps": 1250, "total_steps": 3600, "loss": 0.205, "lr": 4.127303617687636e-05, "epoch": 6.944444444444445, "percentage": 34.72, "elapsed_time": "0:01:59", "remaining_time": "0:03:44", "throughput": 1596.19, "total_tokens": 190224} |
| {"current_steps": 1255, "total_steps": 3600, "loss": 0.2604, "lr": 4.118083431191081e-05, "epoch": 6.972222222222222, "percentage": 34.86, "elapsed_time": "0:01:59", "remaining_time": "0:03:43", "throughput": 1597.32, "total_tokens": 190992} |
| {"current_steps": 1260, "total_steps": 3600, "loss": 0.2029, "lr": 4.108825212639405e-05, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:02:00", "remaining_time": "0:03:42", "throughput": 1597.59, "total_tokens": 191760} |
| {"current_steps": 1260, "total_steps": 3600, "eval_loss": 0.24735479056835175, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:02:00", "remaining_time": "0:03:44", "throughput": 1586.39, "total_tokens": 191760} |
| {"current_steps": 1265, "total_steps": 3600, "loss": 0.2576, "lr": 4.099529179641337e-05, "epoch": 7.027777777777778, "percentage": 35.14, "elapsed_time": "0:02:02", "remaining_time": "0:03:46", "throughput": 1568.78, "total_tokens": 192528} |
| {"current_steps": 1270, "total_steps": 3600, "loss": 0.2318, "lr": 4.09019555069441e-05, "epoch": 7.055555555555555, "percentage": 35.28, "elapsed_time": "0:02:03", "remaining_time": "0:03:45", "throughput": 1569.8, "total_tokens": 193312} |
| {"current_steps": 1275, "total_steps": 3600, "loss": 0.2521, "lr": 4.080824545179828e-05, "epoch": 7.083333333333333, "percentage": 35.42, "elapsed_time": "0:02:03", "remaining_time": "0:03:45", "throughput": 1570.64, "total_tokens": 194064} |
| {"current_steps": 1280, "total_steps": 3600, "loss": 0.2324, "lr": 4.071416383357307e-05, "epoch": 7.111111111111111, "percentage": 35.56, "elapsed_time": "0:02:03", "remaining_time": "0:03:44", "throughput": 1571.62, "total_tokens": 194832} |
| {"current_steps": 1285, "total_steps": 3600, "loss": 0.2214, "lr": 4.0619712863599e-05, "epoch": 7.138888888888889, "percentage": 35.69, "elapsed_time": "0:02:04", "remaining_time": "0:03:44", "throughput": 1572.48, "total_tokens": 195600} |
| {"current_steps": 1290, "total_steps": 3600, "loss": 0.2462, "lr": 4.0524894761888e-05, "epoch": 7.166666666666667, "percentage": 35.83, "elapsed_time": "0:02:04", "remaining_time": "0:03:43", "throughput": 1573.19, "total_tokens": 196352} |
| {"current_steps": 1295, "total_steps": 3600, "loss": 0.2168, "lr": 4.042971175708118e-05, "epoch": 7.194444444444445, "percentage": 35.97, "elapsed_time": "0:02:05", "remaining_time": "0:03:42", "throughput": 1573.89, "total_tokens": 197104} |
| {"current_steps": 1300, "total_steps": 3600, "loss": 0.229, "lr": 4.0334166086396484e-05, "epoch": 7.222222222222222, "percentage": 36.11, "elapsed_time": "0:02:05", "remaining_time": "0:03:42", "throughput": 1574.74, "total_tokens": 197856} |
| {"current_steps": 1305, "total_steps": 3600, "loss": 0.232, "lr": 4.0238259995576084e-05, "epoch": 7.25, "percentage": 36.25, "elapsed_time": "0:02:06", "remaining_time": "0:03:41", "throughput": 1575.42, "total_tokens": 198592} |
| {"current_steps": 1310, "total_steps": 3600, "loss": 0.2334, "lr": 4.0141995738833625e-05, "epoch": 7.277777777777778, "percentage": 36.39, "elapsed_time": "0:02:06", "remaining_time": "0:03:41", "throughput": 1576.24, "total_tokens": 199360} |
| {"current_steps": 1315, "total_steps": 3600, "loss": 0.2656, "lr": 4.0045375578801214e-05, "epoch": 7.305555555555555, "percentage": 36.53, "elapsed_time": "0:02:06", "remaining_time": "0:03:40", "throughput": 1577.07, "total_tokens": 200128} |
| {"current_steps": 1320, "total_steps": 3600, "loss": 0.2359, "lr": 3.994840178647623e-05, "epoch": 7.333333333333333, "percentage": 36.67, "elapsed_time": "0:02:07", "remaining_time": "0:03:39", "throughput": 1578.17, "total_tokens": 200912} |
| {"current_steps": 1325, "total_steps": 3600, "loss": 0.2327, "lr": 3.985107664116798e-05, "epoch": 7.361111111111111, "percentage": 36.81, "elapsed_time": "0:02:07", "remaining_time": "0:03:39", "throughput": 1579.02, "total_tokens": 201664} |
| {"current_steps": 1330, "total_steps": 3600, "loss": 0.2248, "lr": 3.9753402430444116e-05, "epoch": 7.388888888888889, "percentage": 36.94, "elapsed_time": "0:02:08", "remaining_time": "0:03:38", "throughput": 1579.86, "total_tokens": 202416} |
| {"current_steps": 1335, "total_steps": 3600, "loss": 0.2142, "lr": 3.9655381450076826e-05, "epoch": 7.416666666666667, "percentage": 37.08, "elapsed_time": "0:02:08", "remaining_time": "0:03:38", "throughput": 1580.84, "total_tokens": 203184} |
| {"current_steps": 1340, "total_steps": 3600, "loss": 0.228, "lr": 3.955701600398892e-05, "epoch": 7.444444444444445, "percentage": 37.22, "elapsed_time": "0:02:08", "remaining_time": "0:03:37", "throughput": 1581.94, "total_tokens": 203968} |
| {"current_steps": 1345, "total_steps": 3600, "loss": 0.207, "lr": 3.945830840419966e-05, "epoch": 7.472222222222222, "percentage": 37.36, "elapsed_time": "0:02:09", "remaining_time": "0:03:36", "throughput": 1582.9, "total_tokens": 204736} |
| {"current_steps": 1350, "total_steps": 3600, "loss": 0.2658, "lr": 3.935926097077045e-05, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:02:09", "remaining_time": "0:03:36", "throughput": 1583.85, "total_tokens": 205504} |
| {"current_steps": 1355, "total_steps": 3600, "loss": 0.2248, "lr": 3.925987603175023e-05, "epoch": 7.527777777777778, "percentage": 37.64, "elapsed_time": "0:02:10", "remaining_time": "0:03:35", "throughput": 1584.93, "total_tokens": 206288} |
| {"current_steps": 1360, "total_steps": 3600, "loss": 0.2358, "lr": 3.916015592312082e-05, "epoch": 7.555555555555555, "percentage": 37.78, "elapsed_time": "0:02:10", "remaining_time": "0:03:35", "throughput": 1585.5, "total_tokens": 207008} |
| {"current_steps": 1365, "total_steps": 3600, "loss": 0.2418, "lr": 3.9060102988742e-05, "epoch": 7.583333333333333, "percentage": 37.92, "elapsed_time": "0:02:10", "remaining_time": "0:03:34", "throughput": 1586.38, "total_tokens": 207776} |
| {"current_steps": 1370, "total_steps": 3600, "loss": 0.2493, "lr": 3.8959719580296415e-05, "epoch": 7.611111111111111, "percentage": 38.06, "elapsed_time": "0:02:11", "remaining_time": "0:03:33", "throughput": 1587.16, "total_tokens": 208528} |
| {"current_steps": 1375, "total_steps": 3600, "loss": 0.233, "lr": 3.885900805723429e-05, "epoch": 7.638888888888889, "percentage": 38.19, "elapsed_time": "0:02:11", "remaining_time": "0:03:33", "throughput": 1587.96, "total_tokens": 209280} |
| {"current_steps": 1380, "total_steps": 3600, "loss": 0.2279, "lr": 3.875797078671798e-05, "epoch": 7.666666666666667, "percentage": 38.33, "elapsed_time": "0:02:12", "remaining_time": "0:03:32", "throughput": 1589.12, "total_tokens": 210080} |
| {"current_steps": 1385, "total_steps": 3600, "loss": 0.22, "lr": 3.865661014356635e-05, "epoch": 7.694444444444445, "percentage": 38.47, "elapsed_time": "0:02:12", "remaining_time": "0:03:32", "throughput": 1589.92, "total_tokens": 210832} |
| {"current_steps": 1390, "total_steps": 3600, "loss": 0.2279, "lr": 3.855492851019893e-05, "epoch": 7.722222222222222, "percentage": 38.61, "elapsed_time": "0:02:13", "remaining_time": "0:03:31", "throughput": 1590.52, "total_tokens": 211568} |
| {"current_steps": 1395, "total_steps": 3600, "loss": 0.2206, "lr": 3.8452928276579916e-05, "epoch": 7.75, "percentage": 38.75, "elapsed_time": "0:02:13", "remaining_time": "0:03:30", "throughput": 1591.28, "total_tokens": 212320} |
| {"current_steps": 1400, "total_steps": 3600, "loss": 0.247, "lr": 3.835061184016203e-05, "epoch": 7.777777777777778, "percentage": 38.89, "elapsed_time": "0:02:13", "remaining_time": "0:03:30", "throughput": 1592.12, "total_tokens": 213088} |
| {"current_steps": 1405, "total_steps": 3600, "loss": 0.2373, "lr": 3.824798160583012e-05, "epoch": 7.805555555555555, "percentage": 39.03, "elapsed_time": "0:02:14", "remaining_time": "0:03:29", "throughput": 1592.59, "total_tokens": 213808} |
| {"current_steps": 1410, "total_steps": 3600, "loss": 0.224, "lr": 3.814503998584471e-05, "epoch": 7.833333333333333, "percentage": 39.17, "elapsed_time": "0:02:14", "remaining_time": "0:03:29", "throughput": 1593.07, "total_tokens": 214544} |
| {"current_steps": 1415, "total_steps": 3600, "loss": 0.2179, "lr": 3.804178939978517e-05, "epoch": 7.861111111111111, "percentage": 39.31, "elapsed_time": "0:02:15", "remaining_time": "0:03:28", "throughput": 1593.99, "total_tokens": 215328} |
| {"current_steps": 1420, "total_steps": 3600, "loss": 0.2281, "lr": 3.7938232274493e-05, "epoch": 7.888888888888889, "percentage": 39.44, "elapsed_time": "0:02:15", "remaining_time": "0:03:28", "throughput": 1594.54, "total_tokens": 216080} |
| {"current_steps": 1425, "total_steps": 3600, "loss": 0.2248, "lr": 3.783437104401469e-05, "epoch": 7.916666666666667, "percentage": 39.58, "elapsed_time": "0:02:15", "remaining_time": "0:03:27", "throughput": 1595.41, "total_tokens": 216848} |
| {"current_steps": 1430, "total_steps": 3600, "loss": 0.2172, "lr": 3.773020814954453e-05, "epoch": 7.944444444444445, "percentage": 39.72, "elapsed_time": "0:02:16", "remaining_time": "0:03:26", "throughput": 1596.42, "total_tokens": 217632} |
| {"current_steps": 1435, "total_steps": 3600, "loss": 0.2379, "lr": 3.762574603936725e-05, "epoch": 7.972222222222222, "percentage": 39.86, "elapsed_time": "0:02:16", "remaining_time": "0:03:26", "throughput": 1597.38, "total_tokens": 218400} |
| {"current_steps": 1440, "total_steps": 3600, "loss": 0.2183, "lr": 3.752098716880045e-05, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:17", "remaining_time": "0:03:25", "throughput": 1597.84, "total_tokens": 219200} |
| {"current_steps": 1440, "total_steps": 3600, "eval_loss": 0.24541738629341125, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:18", "remaining_time": "0:03:27", "throughput": 1588.09, "total_tokens": 219200} |
| {"current_steps": 1445, "total_steps": 3600, "loss": 0.2364, "lr": 3.74159340001369e-05, "epoch": 8.027777777777779, "percentage": 40.14, "elapsed_time": "0:02:19", "remaining_time": "0:03:28", "throughput": 1571.84, "total_tokens": 219984} |
| {"current_steps": 1450, "total_steps": 3600, "loss": 0.2245, "lr": 3.731058900258668e-05, "epoch": 8.055555555555555, "percentage": 40.28, "elapsed_time": "0:02:20", "remaining_time": "0:03:28", "throughput": 1572.8, "total_tokens": 220784} |
| {"current_steps": 1455, "total_steps": 3600, "loss": 0.2267, "lr": 3.7204954652219104e-05, "epoch": 8.083333333333334, "percentage": 40.42, "elapsed_time": "0:02:20", "remaining_time": "0:03:27", "throughput": 1573.55, "total_tokens": 221536} |
| {"current_steps": 1460, "total_steps": 3600, "loss": 0.2384, "lr": 3.7099033431904575e-05, "epoch": 8.11111111111111, "percentage": 40.56, "elapsed_time": "0:02:21", "remaining_time": "0:03:26", "throughput": 1574.05, "total_tokens": 222256} |
| {"current_steps": 1465, "total_steps": 3600, "loss": 0.2212, "lr": 3.699282783125616e-05, "epoch": 8.13888888888889, "percentage": 40.69, "elapsed_time": "0:02:21", "remaining_time": "0:03:26", "throughput": 1574.88, "total_tokens": 223024} |
| {"current_steps": 1470, "total_steps": 3600, "loss": 0.2254, "lr": 3.688634034657115e-05, "epoch": 8.166666666666666, "percentage": 40.83, "elapsed_time": "0:02:22", "remaining_time": "0:03:25", "throughput": 1575.47, "total_tokens": 223760} |
| {"current_steps": 1475, "total_steps": 3600, "loss": 0.2231, "lr": 3.6779573480772325e-05, "epoch": 8.194444444444445, "percentage": 40.97, "elapsed_time": "0:02:22", "remaining_time": "0:03:25", "throughput": 1576.26, "total_tokens": 224528} |
| {"current_steps": 1480, "total_steps": 3600, "loss": 0.2373, "lr": 3.6672529743349146e-05, "epoch": 8.222222222222221, "percentage": 41.11, "elapsed_time": "0:02:22", "remaining_time": "0:03:24", "throughput": 1577.03, "total_tokens": 225296} |
| {"current_steps": 1485, "total_steps": 3600, "loss": 0.2342, "lr": 3.656521165029879e-05, "epoch": 8.25, "percentage": 41.25, "elapsed_time": "0:02:23", "remaining_time": "0:03:24", "throughput": 1577.6, "total_tokens": 226032} |
| {"current_steps": 1490, "total_steps": 3600, "loss": 0.2319, "lr": 3.6457621724066964e-05, "epoch": 8.277777777777779, "percentage": 41.39, "elapsed_time": "0:02:23", "remaining_time": "0:03:23", "throughput": 1578.2, "total_tokens": 226768} |
| {"current_steps": 1495, "total_steps": 3600, "loss": 0.2223, "lr": 3.634976249348867e-05, "epoch": 8.305555555555555, "percentage": 41.53, "elapsed_time": "0:02:24", "remaining_time": "0:03:22", "throughput": 1579.14, "total_tokens": 227552} |
| {"current_steps": 1500, "total_steps": 3600, "loss": 0.2277, "lr": 3.6241636493728736e-05, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:02:24", "remaining_time": "0:03:22", "throughput": 1579.96, "total_tokens": 228336} |
| {"current_steps": 1505, "total_steps": 3600, "loss": 0.298, "lr": 3.613324626622224e-05, "epoch": 8.36111111111111, "percentage": 41.81, "elapsed_time": "0:02:24", "remaining_time": "0:03:21", "throughput": 1580.53, "total_tokens": 229088} |
| {"current_steps": 1510, "total_steps": 3600, "loss": 0.2486, "lr": 3.602459435861475e-05, "epoch": 8.38888888888889, "percentage": 41.94, "elapsed_time": "0:02:25", "remaining_time": "0:03:21", "throughput": 1581.21, "total_tokens": 229840} |
| {"current_steps": 1515, "total_steps": 3600, "loss": 0.2359, "lr": 3.591568332470249e-05, "epoch": 8.416666666666666, "percentage": 42.08, "elapsed_time": "0:02:25", "remaining_time": "0:03:20", "throughput": 1581.99, "total_tokens": 230608} |
| {"current_steps": 1520, "total_steps": 3600, "loss": 0.2202, "lr": 3.5806515724372274e-05, "epoch": 8.444444444444445, "percentage": 42.22, "elapsed_time": "0:02:26", "remaining_time": "0:03:20", "throughput": 1582.68, "total_tokens": 231360} |
| {"current_steps": 1525, "total_steps": 3600, "loss": 0.1993, "lr": 3.569709412354136e-05, "epoch": 8.472222222222221, "percentage": 42.36, "elapsed_time": "0:02:26", "remaining_time": "0:03:19", "throughput": 1583.73, "total_tokens": 232160} |
| {"current_steps": 1530, "total_steps": 3600, "loss": 0.252, "lr": 3.5587421094097115e-05, "epoch": 8.5, "percentage": 42.5, "elapsed_time": "0:02:26", "remaining_time": "0:03:18", "throughput": 1584.35, "total_tokens": 232896} |
| {"current_steps": 1535, "total_steps": 3600, "loss": 0.2555, "lr": 3.5477499213836616e-05, "epoch": 8.527777777777779, "percentage": 42.64, "elapsed_time": "0:02:27", "remaining_time": "0:03:18", "throughput": 1585.28, "total_tokens": 233680} |
| {"current_steps": 1540, "total_steps": 3600, "loss": 0.2291, "lr": 3.536733106640598e-05, "epoch": 8.555555555555555, "percentage": 42.78, "elapsed_time": "0:02:27", "remaining_time": "0:03:17", "throughput": 1586.21, "total_tokens": 234464} |
| {"current_steps": 1545, "total_steps": 3600, "loss": 0.2182, "lr": 3.525691924123971e-05, "epoch": 8.583333333333334, "percentage": 42.92, "elapsed_time": "0:02:28", "remaining_time": "0:03:17", "throughput": 1586.69, "total_tokens": 235184} |
| {"current_steps": 1550, "total_steps": 3600, "loss": 0.2249, "lr": 3.5146266333499795e-05, "epoch": 8.61111111111111, "percentage": 43.06, "elapsed_time": "0:02:28", "remaining_time": "0:03:16", "throughput": 1587.72, "total_tokens": 235984} |
| {"current_steps": 1555, "total_steps": 3600, "loss": 0.236, "lr": 3.503537494401473e-05, "epoch": 8.63888888888889, "percentage": 43.19, "elapsed_time": "0:02:29", "remaining_time": "0:03:16", "throughput": 1588.32, "total_tokens": 236720} |
| {"current_steps": 1560, "total_steps": 3600, "loss": 0.2444, "lr": 3.4924247679218375e-05, "epoch": 8.666666666666666, "percentage": 43.33, "elapsed_time": "0:02:29", "remaining_time": "0:03:15", "throughput": 1588.9, "total_tokens": 237456} |
| {"current_steps": 1565, "total_steps": 3600, "loss": 0.2458, "lr": 3.481288715108868e-05, "epoch": 8.694444444444445, "percentage": 43.47, "elapsed_time": "0:02:29", "remaining_time": "0:03:14", "throughput": 1589.81, "total_tokens": 238240} |
| {"current_steps": 1570, "total_steps": 3600, "loss": 0.232, "lr": 3.4701295977086324e-05, "epoch": 8.722222222222221, "percentage": 43.61, "elapsed_time": "0:02:30", "remaining_time": "0:03:14", "throughput": 1590.46, "total_tokens": 238992} |
| {"current_steps": 1575, "total_steps": 3600, "loss": 0.236, "lr": 3.4589476780093166e-05, "epoch": 8.75, "percentage": 43.75, "elapsed_time": "0:02:30", "remaining_time": "0:03:13", "throughput": 1591.34, "total_tokens": 239776} |
| {"current_steps": 1580, "total_steps": 3600, "loss": 0.2369, "lr": 3.44774321883506e-05, "epoch": 8.777777777777779, "percentage": 43.89, "elapsed_time": "0:02:31", "remaining_time": "0:03:13", "throughput": 1592.03, "total_tokens": 240528} |
| {"current_steps": 1585, "total_steps": 3600, "loss": 0.2217, "lr": 3.436516483539781e-05, "epoch": 8.805555555555555, "percentage": 44.03, "elapsed_time": "0:02:31", "remaining_time": "0:03:12", "throughput": 1592.65, "total_tokens": 241280} |
| {"current_steps": 1590, "total_steps": 3600, "loss": 0.2349, "lr": 3.42526773600098e-05, "epoch": 8.833333333333334, "percentage": 44.17, "elapsed_time": "0:02:31", "remaining_time": "0:03:12", "throughput": 1593.37, "total_tokens": 242048} |
| {"current_steps": 1595, "total_steps": 3600, "loss": 0.2119, "lr": 3.4139972406135464e-05, "epoch": 8.86111111111111, "percentage": 44.31, "elapsed_time": "0:02:32", "remaining_time": "0:03:11", "throughput": 1594.14, "total_tokens": 242816} |
| {"current_steps": 1600, "total_steps": 3600, "loss": 0.2258, "lr": 3.402705262283537e-05, "epoch": 8.88888888888889, "percentage": 44.44, "elapsed_time": "0:02:32", "remaining_time": "0:03:10", "throughput": 1594.89, "total_tokens": 243584} |
| {"current_steps": 1605, "total_steps": 3600, "loss": 0.2121, "lr": 3.39139206642195e-05, "epoch": 8.916666666666666, "percentage": 44.58, "elapsed_time": "0:02:33", "remaining_time": "0:03:10", "throughput": 1595.51, "total_tokens": 244336} |
| {"current_steps": 1610, "total_steps": 3600, "loss": 0.225, "lr": 3.3800579189384944e-05, "epoch": 8.944444444444445, "percentage": 44.72, "elapsed_time": "0:02:33", "remaining_time": "0:03:09", "throughput": 1595.98, "total_tokens": 245072} |
| {"current_steps": 1615, "total_steps": 3600, "loss": 0.1928, "lr": 3.3687030862353286e-05, "epoch": 8.972222222222221, "percentage": 44.86, "elapsed_time": "0:02:33", "remaining_time": "0:03:09", "throughput": 1596.71, "total_tokens": 245824} |
| {"current_steps": 1620, "total_steps": 3600, "loss": 0.2397, "lr": 3.357327835200807e-05, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:34", "remaining_time": "0:03:08", "throughput": 1596.91, "total_tokens": 246592} |
| {"current_steps": 1620, "total_steps": 3600, "eval_loss": 0.2567974030971527, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:35", "remaining_time": "0:03:09", "throughput": 1588.16, "total_tokens": 246592} |
| {"current_steps": 1625, "total_steps": 3600, "loss": 0.2117, "lr": 3.3459324332032035e-05, "epoch": 9.027777777777779, "percentage": 45.14, "elapsed_time": "0:02:37", "remaining_time": "0:03:10", "throughput": 1574.46, "total_tokens": 247360} |
| {"current_steps": 1630, "total_steps": 3600, "loss": 0.2455, "lr": 3.3345171480844275e-05, "epoch": 9.055555555555555, "percentage": 45.28, "elapsed_time": "0:02:37", "remaining_time": "0:03:10", "throughput": 1575.25, "total_tokens": 248160} |
| {"current_steps": 1635, "total_steps": 3600, "loss": 0.2504, "lr": 3.32308224815373e-05, "epoch": 9.083333333333334, "percentage": 45.42, "elapsed_time": "0:02:37", "remaining_time": "0:03:09", "throughput": 1575.87, "total_tokens": 248912} |
| {"current_steps": 1640, "total_steps": 3600, "loss": 0.2186, "lr": 3.311628002181398e-05, "epoch": 9.11111111111111, "percentage": 45.56, "elapsed_time": "0:02:38", "remaining_time": "0:03:09", "throughput": 1576.63, "total_tokens": 249680} |
| {"current_steps": 1645, "total_steps": 3600, "loss": 0.2096, "lr": 3.3001546793924285e-05, "epoch": 9.13888888888889, "percentage": 45.69, "elapsed_time": "0:02:38", "remaining_time": "0:03:08", "throughput": 1577.12, "total_tokens": 250400} |
| {"current_steps": 1650, "total_steps": 3600, "loss": 0.2111, "lr": 3.288662549460216e-05, "epoch": 9.166666666666666, "percentage": 45.83, "elapsed_time": "0:02:39", "remaining_time": "0:03:08", "throughput": 1577.7, "total_tokens": 251136} |
| {"current_steps": 1655, "total_steps": 3600, "loss": 0.2268, "lr": 3.277151882500199e-05, "epoch": 9.194444444444445, "percentage": 45.97, "elapsed_time": "0:02:39", "remaining_time": "0:03:07", "throughput": 1578.59, "total_tokens": 251920} |
| {"current_steps": 1660, "total_steps": 3600, "loss": 0.2172, "lr": 3.26562294906352e-05, "epoch": 9.222222222222221, "percentage": 46.11, "elapsed_time": "0:02:39", "remaining_time": "0:03:06", "throughput": 1579.17, "total_tokens": 252656} |
| {"current_steps": 1665, "total_steps": 3600, "loss": 0.2162, "lr": 3.254076020130664e-05, "epoch": 9.25, "percentage": 46.25, "elapsed_time": "0:02:40", "remaining_time": "0:03:06", "throughput": 1580.11, "total_tokens": 253456} |
| {"current_steps": 1670, "total_steps": 3600, "loss": 0.2062, "lr": 3.242511367105087e-05, "epoch": 9.277777777777779, "percentage": 46.39, "elapsed_time": "0:02:40", "remaining_time": "0:03:05", "throughput": 1580.71, "total_tokens": 254208} |
| {"current_steps": 1675, "total_steps": 3600, "loss": 0.2281, "lr": 3.230929261806842e-05, "epoch": 9.305555555555555, "percentage": 46.53, "elapsed_time": "0:02:41", "remaining_time": "0:03:05", "throughput": 1581.46, "total_tokens": 254976} |
| {"current_steps": 1680, "total_steps": 3600, "loss": 0.2273, "lr": 3.2193299764661845e-05, "epoch": 9.333333333333334, "percentage": 46.67, "elapsed_time": "0:02:41", "remaining_time": "0:03:04", "throughput": 1582.09, "total_tokens": 255728} |
| {"current_steps": 1685, "total_steps": 3600, "loss": 0.2392, "lr": 3.207713783717176e-05, "epoch": 9.36111111111111, "percentage": 46.81, "elapsed_time": "0:02:42", "remaining_time": "0:03:04", "throughput": 1582.62, "total_tokens": 256464} |
| {"current_steps": 1690, "total_steps": 3600, "loss": 0.2392, "lr": 3.1960809565912794e-05, "epoch": 9.38888888888889, "percentage": 46.94, "elapsed_time": "0:02:42", "remaining_time": "0:03:03", "throughput": 1583.15, "total_tokens": 257216} |
| {"current_steps": 1695, "total_steps": 3600, "loss": 0.2011, "lr": 3.1844317685109354e-05, "epoch": 9.416666666666666, "percentage": 47.08, "elapsed_time": "0:02:42", "remaining_time": "0:03:03", "throughput": 1583.99, "total_tokens": 258000} |
| {"current_steps": 1700, "total_steps": 3600, "loss": 0.5171, "lr": 3.1727664932831394e-05, "epoch": 9.444444444444445, "percentage": 47.22, "elapsed_time": "0:02:43", "remaining_time": "0:03:02", "throughput": 1584.66, "total_tokens": 258768} |
| {"current_steps": 1705, "total_steps": 3600, "loss": 0.2537, "lr": 3.161085405093006e-05, "epoch": 9.472222222222221, "percentage": 47.36, "elapsed_time": "0:02:43", "remaining_time": "0:03:01", "throughput": 1585.38, "total_tokens": 259536} |
| {"current_steps": 1710, "total_steps": 3600, "loss": 0.2248, "lr": 3.149388778497323e-05, "epoch": 9.5, "percentage": 47.5, "elapsed_time": "0:02:44", "remaining_time": "0:03:01", "throughput": 1586.19, "total_tokens": 260320} |
| {"current_steps": 1715, "total_steps": 3600, "loss": 0.2264, "lr": 3.137676888418099e-05, "epoch": 9.527777777777779, "percentage": 47.64, "elapsed_time": "0:02:44", "remaining_time": "0:03:00", "throughput": 1586.72, "total_tokens": 261056} |
| {"current_steps": 1720, "total_steps": 3600, "loss": 0.226, "lr": 3.125950010136104e-05, "epoch": 9.555555555555555, "percentage": 47.78, "elapsed_time": "0:02:44", "remaining_time": "0:03:00", "throughput": 1587.19, "total_tokens": 261808} |
| {"current_steps": 1725, "total_steps": 3600, "loss": 0.2238, "lr": 3.114208419284391e-05, "epoch": 9.583333333333334, "percentage": 47.92, "elapsed_time": "0:02:45", "remaining_time": "0:02:59", "throughput": 1587.81, "total_tokens": 262576} |
| {"current_steps": 1730, "total_steps": 3600, "loss": 0.216, "lr": 3.102452391841828e-05, "epoch": 9.61111111111111, "percentage": 48.06, "elapsed_time": "0:02:45", "remaining_time": "0:02:59", "throughput": 1588.25, "total_tokens": 263312} |
| {"current_steps": 1735, "total_steps": 3600, "loss": 0.2407, "lr": 3.090682204126604e-05, "epoch": 9.63888888888889, "percentage": 48.19, "elapsed_time": "0:02:46", "remaining_time": "0:02:58", "throughput": 1588.86, "total_tokens": 264064} |
| {"current_steps": 1740, "total_steps": 3600, "loss": 0.2153, "lr": 3.078898132789735e-05, "epoch": 9.666666666666666, "percentage": 48.33, "elapsed_time": "0:02:46", "remaining_time": "0:02:58", "throughput": 1589.49, "total_tokens": 264816} |
| {"current_steps": 1745, "total_steps": 3600, "loss": 0.2314, "lr": 3.0671004548085675e-05, "epoch": 9.694444444444445, "percentage": 48.47, "elapsed_time": "0:02:47", "remaining_time": "0:02:57", "throughput": 1590.21, "total_tokens": 265584} |
| {"current_steps": 1750, "total_steps": 3600, "loss": 0.1869, "lr": 3.0552894474802584e-05, "epoch": 9.722222222222221, "percentage": 48.61, "elapsed_time": "0:02:47", "remaining_time": "0:02:56", "throughput": 1590.75, "total_tokens": 266320} |
| {"current_steps": 1755, "total_steps": 3600, "loss": 0.219, "lr": 3.043465388415267e-05, "epoch": 9.75, "percentage": 48.75, "elapsed_time": "0:02:47", "remaining_time": "0:02:56", "throughput": 1591.37, "total_tokens": 267072} |
| {"current_steps": 1760, "total_steps": 3600, "loss": 0.2994, "lr": 3.0316285555308233e-05, "epoch": 9.777777777777779, "percentage": 48.89, "elapsed_time": "0:02:48", "remaining_time": "0:02:55", "throughput": 1592.08, "total_tokens": 267840} |
| {"current_steps": 1765, "total_steps": 3600, "loss": 0.2388, "lr": 3.0197792270443982e-05, "epoch": 9.805555555555555, "percentage": 49.03, "elapsed_time": "0:02:48", "remaining_time": "0:02:55", "throughput": 1592.69, "total_tokens": 268592} |
| {"current_steps": 1770, "total_steps": 3600, "loss": 0.2112, "lr": 3.0079176814671656e-05, "epoch": 9.833333333333334, "percentage": 49.17, "elapsed_time": "0:02:49", "remaining_time": "0:02:54", "throughput": 1593.3, "total_tokens": 269344} |
| {"current_steps": 1775, "total_steps": 3600, "loss": 0.217, "lr": 2.9960441975974534e-05, "epoch": 9.86111111111111, "percentage": 49.31, "elapsed_time": "0:02:49", "remaining_time": "0:02:54", "throughput": 1594.01, "total_tokens": 270112} |
| {"current_steps": 1780, "total_steps": 3600, "loss": 0.2832, "lr": 2.9841590545141906e-05, "epoch": 9.88888888888889, "percentage": 49.44, "elapsed_time": "0:02:49", "remaining_time": "0:02:53", "throughput": 1594.66, "total_tokens": 270880} |
| {"current_steps": 1785, "total_steps": 3600, "loss": 0.2252, "lr": 2.9722625315703512e-05, "epoch": 9.916666666666666, "percentage": 49.58, "elapsed_time": "0:02:50", "remaining_time": "0:02:53", "throughput": 1595.46, "total_tokens": 271664} |
| {"current_steps": 1790, "total_steps": 3600, "loss": 0.2218, "lr": 2.9603549083863847e-05, "epoch": 9.944444444444445, "percentage": 49.72, "elapsed_time": "0:02:50", "remaining_time": "0:02:52", "throughput": 1595.83, "total_tokens": 272384} |
| {"current_steps": 1795, "total_steps": 3600, "loss": 0.2465, "lr": 2.9484364648436437e-05, "epoch": 9.972222222222221, "percentage": 49.86, "elapsed_time": "0:02:51", "remaining_time": "0:02:52", "throughput": 1596.64, "total_tokens": 273168} |
| {"current_steps": 1800, "total_steps": 3600, "loss": 0.2287, "lr": 2.9365074810778094e-05, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:51", "remaining_time": "0:02:51", "throughput": 1596.76, "total_tokens": 273936} |
| {"current_steps": 1800, "total_steps": 3600, "eval_loss": 0.24837365746498108, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:52", "remaining_time": "0:02:52", "throughput": 1588.91, "total_tokens": 273936} |
| {"current_steps": 1805, "total_steps": 3600, "loss": 0.2373, "lr": 2.9245682374723016e-05, "epoch": 10.027777777777779, "percentage": 50.14, "elapsed_time": "0:02:54", "remaining_time": "0:02:53", "throughput": 1577.11, "total_tokens": 274704} |
| {"current_steps": 1810, "total_steps": 3600, "loss": 0.2047, "lr": 2.9126190146516942e-05, "epoch": 10.055555555555555, "percentage": 50.28, "elapsed_time": "0:02:54", "remaining_time": "0:02:52", "throughput": 1577.49, "total_tokens": 275440} |
| {"current_steps": 1815, "total_steps": 3600, "loss": 0.2402, "lr": 2.9006600934751145e-05, "epoch": 10.083333333333334, "percentage": 50.42, "elapsed_time": "0:02:55", "remaining_time": "0:02:52", "throughput": 1577.99, "total_tokens": 276192} |
| {"current_steps": 1820, "total_steps": 3600, "loss": 0.2147, "lr": 2.888691755029642e-05, "epoch": 10.11111111111111, "percentage": 50.56, "elapsed_time": "0:02:55", "remaining_time": "0:02:51", "throughput": 1578.49, "total_tokens": 276944} |
| {"current_steps": 1825, "total_steps": 3600, "loss": 0.2124, "lr": 2.876714280623708e-05, "epoch": 10.13888888888889, "percentage": 50.69, "elapsed_time": "0:02:55", "remaining_time": "0:02:51", "throughput": 1579.13, "total_tokens": 277712} |
| {"current_steps": 1830, "total_steps": 3600, "loss": 0.2234, "lr": 2.8647279517804754e-05, "epoch": 10.166666666666666, "percentage": 50.83, "elapsed_time": "0:02:56", "remaining_time": "0:02:50", "throughput": 1579.82, "total_tokens": 278480} |
| {"current_steps": 1835, "total_steps": 3600, "loss": 0.2255, "lr": 2.8527330502312248e-05, "epoch": 10.194444444444445, "percentage": 50.97, "elapsed_time": "0:02:56", "remaining_time": "0:02:49", "throughput": 1580.61, "total_tokens": 279264} |
| {"current_steps": 1840, "total_steps": 3600, "loss": 0.2156, "lr": 2.8407298579087365e-05, "epoch": 10.222222222222221, "percentage": 51.11, "elapsed_time": "0:02:57", "remaining_time": "0:02:49", "throughput": 1581.39, "total_tokens": 280048} |
| {"current_steps": 1845, "total_steps": 3600, "loss": 0.244, "lr": 2.8287186569406566e-05, "epoch": 10.25, "percentage": 51.25, "elapsed_time": "0:02:57", "remaining_time": "0:02:48", "throughput": 1581.91, "total_tokens": 280784} |
| {"current_steps": 1850, "total_steps": 3600, "loss": 0.2199, "lr": 2.816699729642871e-05, "epoch": 10.277777777777779, "percentage": 51.39, "elapsed_time": "0:02:57", "remaining_time": "0:02:48", "throughput": 1582.51, "total_tokens": 281536} |
| {"current_steps": 1855, "total_steps": 3600, "loss": 0.1986, "lr": 2.8046733585128687e-05, "epoch": 10.305555555555555, "percentage": 51.53, "elapsed_time": "0:02:58", "remaining_time": "0:02:47", "throughput": 1583.07, "total_tokens": 282288} |
| {"current_steps": 1860, "total_steps": 3600, "loss": 0.2194, "lr": 2.792639826223101e-05, "epoch": 10.333333333333334, "percentage": 51.67, "elapsed_time": "0:02:58", "remaining_time": "0:02:47", "throughput": 1583.87, "total_tokens": 283088} |
| {"current_steps": 1865, "total_steps": 3600, "loss": 0.2323, "lr": 2.7805994156143376e-05, "epoch": 10.36111111111111, "percentage": 51.81, "elapsed_time": "0:02:59", "remaining_time": "0:02:46", "throughput": 1584.45, "total_tokens": 283840} |
| {"current_steps": 1870, "total_steps": 3600, "loss": 0.183, "lr": 2.7685524096890185e-05, "epoch": 10.38888888888889, "percentage": 51.94, "elapsed_time": "0:02:59", "remaining_time": "0:02:46", "throughput": 1585.29, "total_tokens": 284640} |
| {"current_steps": 1875, "total_steps": 3600, "loss": 0.1878, "lr": 2.756499091604603e-05, "epoch": 10.416666666666666, "percentage": 52.08, "elapsed_time": "0:02:59", "remaining_time": "0:02:45", "throughput": 1585.85, "total_tokens": 285408} |
| {"current_steps": 1880, "total_steps": 3600, "loss": 0.1459, "lr": 2.744439744666915e-05, "epoch": 10.444444444444445, "percentage": 52.22, "elapsed_time": "0:03:00", "remaining_time": "0:02:45", "throughput": 1586.43, "total_tokens": 286176} |
| {"current_steps": 1885, "total_steps": 3600, "loss": 0.2942, "lr": 2.732374652323481e-05, "epoch": 10.472222222222221, "percentage": 52.36, "elapsed_time": "0:03:00", "remaining_time": "0:02:44", "throughput": 1586.93, "total_tokens": 286912} |
| {"current_steps": 1890, "total_steps": 3600, "loss": 0.2165, "lr": 2.72030409815687e-05, "epoch": 10.5, "percentage": 52.5, "elapsed_time": "0:03:01", "remaining_time": "0:02:43", "throughput": 1587.51, "total_tokens": 287664} |
| {"current_steps": 1895, "total_steps": 3600, "loss": 0.2449, "lr": 2.7082283658780288e-05, "epoch": 10.527777777777779, "percentage": 52.64, "elapsed_time": "0:03:01", "remaining_time": "0:02:43", "throughput": 1588.18, "total_tokens": 288432} |
| {"current_steps": 1900, "total_steps": 3600, "loss": 0.1857, "lr": 2.6961477393196126e-05, "epoch": 10.555555555555555, "percentage": 52.78, "elapsed_time": "0:03:02", "remaining_time": "0:02:42", "throughput": 1588.93, "total_tokens": 289216} |
| {"current_steps": 1905, "total_steps": 3600, "loss": 0.1868, "lr": 2.684062502429312e-05, "epoch": 10.583333333333334, "percentage": 52.92, "elapsed_time": "0:03:02", "remaining_time": "0:02:42", "throughput": 1589.69, "total_tokens": 290000} |
| {"current_steps": 1910, "total_steps": 3600, "loss": 0.1619, "lr": 2.6719729392631826e-05, "epoch": 10.61111111111111, "percentage": 53.06, "elapsed_time": "0:03:02", "remaining_time": "0:02:41", "throughput": 1590.38, "total_tokens": 290784} |
| {"current_steps": 1915, "total_steps": 3600, "loss": 0.2002, "lr": 2.659879333978964e-05, "epoch": 10.63888888888889, "percentage": 53.19, "elapsed_time": "0:03:03", "remaining_time": "0:02:41", "throughput": 1590.87, "total_tokens": 291536} |
| {"current_steps": 1920, "total_steps": 3600, "loss": 0.1753, "lr": 2.6477819708294064e-05, "epoch": 10.666666666666666, "percentage": 53.33, "elapsed_time": "0:03:03", "remaining_time": "0:02:40", "throughput": 1591.29, "total_tokens": 292272} |
| {"current_steps": 1925, "total_steps": 3600, "loss": 0.3134, "lr": 2.635681134155585e-05, "epoch": 10.694444444444445, "percentage": 53.47, "elapsed_time": "0:03:04", "remaining_time": "0:02:40", "throughput": 1591.85, "total_tokens": 293024} |
| {"current_steps": 1930, "total_steps": 3600, "loss": 0.3154, "lr": 2.623577108380215e-05, "epoch": 10.722222222222221, "percentage": 53.61, "elapsed_time": "0:03:04", "remaining_time": "0:02:39", "throughput": 1592.19, "total_tokens": 293744} |
| {"current_steps": 1935, "total_steps": 3600, "loss": 0.252, "lr": 2.6114701780009753e-05, "epoch": 10.75, "percentage": 53.75, "elapsed_time": "0:03:04", "remaining_time": "0:02:39", "throughput": 1592.66, "total_tokens": 294480} |
| {"current_steps": 1940, "total_steps": 3600, "loss": 0.3258, "lr": 2.5993606275838117e-05, "epoch": 10.777777777777779, "percentage": 53.89, "elapsed_time": "0:03:05", "remaining_time": "0:02:38", "throughput": 1593.28, "total_tokens": 295264} |
| {"current_steps": 1945, "total_steps": 3600, "loss": 0.2102, "lr": 2.587248741756253e-05, "epoch": 10.805555555555555, "percentage": 54.03, "elapsed_time": "0:03:05", "remaining_time": "0:02:38", "throughput": 1593.83, "total_tokens": 296032} |
| {"current_steps": 1950, "total_steps": 3600, "loss": 0.2204, "lr": 2.5751348052007206e-05, "epoch": 10.833333333333334, "percentage": 54.17, "elapsed_time": "0:03:06", "remaining_time": "0:02:37", "throughput": 1594.42, "total_tokens": 296800} |
| {"current_steps": 1955, "total_steps": 3600, "loss": 0.2397, "lr": 2.5630191026478368e-05, "epoch": 10.86111111111111, "percentage": 54.31, "elapsed_time": "0:03:06", "remaining_time": "0:02:36", "throughput": 1594.69, "total_tokens": 297520} |
| {"current_steps": 1960, "total_steps": 3600, "loss": 0.255, "lr": 2.5509019188697343e-05, "epoch": 10.88888888888889, "percentage": 54.44, "elapsed_time": "0:03:06", "remaining_time": "0:02:36", "throughput": 1595.09, "total_tokens": 298256} |
| {"current_steps": 1965, "total_steps": 3600, "loss": 0.24, "lr": 2.5387835386733584e-05, "epoch": 10.916666666666666, "percentage": 54.58, "elapsed_time": "0:03:07", "remaining_time": "0:02:35", "throughput": 1595.72, "total_tokens": 299024} |
| {"current_steps": 1970, "total_steps": 3600, "loss": 0.2221, "lr": 2.5266642468937766e-05, "epoch": 10.944444444444445, "percentage": 54.72, "elapsed_time": "0:03:07", "remaining_time": "0:02:35", "throughput": 1596.33, "total_tokens": 299792} |
| {"current_steps": 1975, "total_steps": 3600, "loss": 0.2857, "lr": 2.5145443283874848e-05, "epoch": 10.972222222222221, "percentage": 54.86, "elapsed_time": "0:03:08", "remaining_time": "0:02:34", "throughput": 1597.03, "total_tokens": 300560} |
| {"current_steps": 1980, "total_steps": 3600, "loss": 0.2819, "lr": 2.5024240680257055e-05, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:03:08", "remaining_time": "0:02:34", "throughput": 1597.1, "total_tokens": 301328} |
| {"current_steps": 1980, "total_steps": 3600, "eval_loss": 0.24904008209705353, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:03:09", "remaining_time": "0:02:35", "throughput": 1589.97, "total_tokens": 301328} |
| {"current_steps": 1985, "total_steps": 3600, "loss": 0.2303, "lr": 2.4903037506876997e-05, "epoch": 11.027777777777779, "percentage": 55.14, "elapsed_time": "0:03:11", "remaining_time": "0:02:35", "throughput": 1578.5, "total_tokens": 302080} |
| {"current_steps": 1990, "total_steps": 3600, "loss": 0.206, "lr": 2.4781836612540657e-05, "epoch": 11.055555555555555, "percentage": 55.28, "elapsed_time": "0:03:11", "remaining_time": "0:02:35", "throughput": 1578.87, "total_tokens": 302832} |
| {"current_steps": 1995, "total_steps": 3600, "loss": 0.2193, "lr": 2.4660640846000453e-05, "epoch": 11.083333333333334, "percentage": 55.42, "elapsed_time": "0:03:12", "remaining_time": "0:02:34", "throughput": 1579.39, "total_tokens": 303584} |
| {"current_steps": 2000, "total_steps": 3600, "loss": 0.2213, "lr": 2.4539453055888297e-05, "epoch": 11.11111111111111, "percentage": 55.56, "elapsed_time": "0:03:12", "remaining_time": "0:02:34", "throughput": 1579.6, "total_tokens": 304352} |
| {"current_steps": 2005, "total_steps": 3600, "loss": 0.1996, "lr": 2.4418276090648596e-05, "epoch": 11.13888888888889, "percentage": 55.69, "elapsed_time": "0:03:13", "remaining_time": "0:02:33", "throughput": 1580.13, "total_tokens": 305120} |
| {"current_steps": 2010, "total_steps": 3600, "loss": 0.224, "lr": 2.4297112798471326e-05, "epoch": 11.166666666666666, "percentage": 55.83, "elapsed_time": "0:03:13", "remaining_time": "0:02:33", "throughput": 1580.57, "total_tokens": 305856} |
| {"current_steps": 2015, "total_steps": 3600, "loss": 0.1909, "lr": 2.4175966027225107e-05, "epoch": 11.194444444444445, "percentage": 55.97, "elapsed_time": "0:03:13", "remaining_time": "0:02:32", "throughput": 1581.27, "total_tokens": 306640} |
| {"current_steps": 2020, "total_steps": 3600, "loss": 0.2037, "lr": 2.405483862439023e-05, "epoch": 11.222222222222221, "percentage": 56.11, "elapsed_time": "0:03:14", "remaining_time": "0:02:32", "throughput": 1581.93, "total_tokens": 307424} |
| {"current_steps": 2025, "total_steps": 3600, "loss": 0.1915, "lr": 2.3933733436991732e-05, "epoch": 11.25, "percentage": 56.25, "elapsed_time": "0:03:14", "remaining_time": "0:02:31", "throughput": 1582.37, "total_tokens": 308160} |
| {"current_steps": 2030, "total_steps": 3600, "loss": 0.2475, "lr": 2.381265331153252e-05, "epoch": 11.277777777777779, "percentage": 56.39, "elapsed_time": "0:03:15", "remaining_time": "0:02:30", "throughput": 1582.9, "total_tokens": 308928} |
| {"current_steps": 2035, "total_steps": 3600, "loss": 0.1856, "lr": 2.3691601093926404e-05, "epoch": 11.305555555555555, "percentage": 56.53, "elapsed_time": "0:03:15", "remaining_time": "0:02:30", "throughput": 1583.38, "total_tokens": 309680} |
| {"current_steps": 2040, "total_steps": 3600, "loss": 0.2362, "lr": 2.3570579629431267e-05, "epoch": 11.333333333333334, "percentage": 56.67, "elapsed_time": "0:03:16", "remaining_time": "0:02:29", "throughput": 1583.7, "total_tokens": 310416} |
| {"current_steps": 2045, "total_steps": 3600, "loss": 0.2011, "lr": 2.344959176258212e-05, "epoch": 11.36111111111111, "percentage": 56.81, "elapsed_time": "0:03:16", "remaining_time": "0:02:29", "throughput": 1584.22, "total_tokens": 311168} |
| {"current_steps": 2050, "total_steps": 3600, "loss": 0.1962, "lr": 2.3328640337124326e-05, "epoch": 11.38888888888889, "percentage": 56.94, "elapsed_time": "0:03:16", "remaining_time": "0:02:28", "throughput": 1584.79, "total_tokens": 311936} |
| {"current_steps": 2055, "total_steps": 3600, "loss": 0.1854, "lr": 2.3207728195946688e-05, "epoch": 11.416666666666666, "percentage": 57.08, "elapsed_time": "0:03:17", "remaining_time": "0:02:28", "throughput": 1585.47, "total_tokens": 312720} |
| {"current_steps": 2060, "total_steps": 3600, "loss": 0.2253, "lr": 2.3086858181014653e-05, "epoch": 11.444444444444445, "percentage": 57.22, "elapsed_time": "0:03:17", "remaining_time": "0:02:27", "throughput": 1585.98, "total_tokens": 313472} |
| {"current_steps": 2065, "total_steps": 3600, "loss": 0.1852, "lr": 2.2966033133303545e-05, "epoch": 11.472222222222221, "percentage": 57.36, "elapsed_time": "0:03:18", "remaining_time": "0:02:27", "throughput": 1586.54, "total_tokens": 314240} |
| {"current_steps": 2070, "total_steps": 3600, "loss": 0.2133, "lr": 2.2845255892731733e-05, "epoch": 11.5, "percentage": 57.5, "elapsed_time": "0:03:18", "remaining_time": "0:02:26", "throughput": 1587.04, "total_tokens": 314992} |
| {"current_steps": 2075, "total_steps": 3600, "loss": 0.1762, "lr": 2.2724529298093915e-05, "epoch": 11.527777777777779, "percentage": 57.64, "elapsed_time": "0:03:18", "remaining_time": "0:02:26", "throughput": 1587.53, "total_tokens": 315744} |
| {"current_steps": 2080, "total_steps": 3600, "loss": 0.1942, "lr": 2.26038561869944e-05, "epoch": 11.555555555555555, "percentage": 57.78, "elapsed_time": "0:03:19", "remaining_time": "0:02:25", "throughput": 1587.93, "total_tokens": 316480} |
| {"current_steps": 2085, "total_steps": 3600, "loss": 0.2263, "lr": 2.248323939578039e-05, "epoch": 11.583333333333334, "percentage": 57.92, "elapsed_time": "0:03:19", "remaining_time": "0:02:25", "throughput": 1588.52, "total_tokens": 317248} |
| {"current_steps": 2090, "total_steps": 3600, "loss": 0.2019, "lr": 2.2362681759475307e-05, "epoch": 11.61111111111111, "percentage": 58.06, "elapsed_time": "0:03:20", "remaining_time": "0:02:24", "throughput": 1589.17, "total_tokens": 318032} |
| {"current_steps": 2095, "total_steps": 3600, "loss": 0.1717, "lr": 2.2242186111712208e-05, "epoch": 11.63888888888889, "percentage": 58.19, "elapsed_time": "0:03:20", "remaining_time": "0:02:24", "throughput": 1589.64, "total_tokens": 318784} |
| {"current_steps": 2100, "total_steps": 3600, "loss": 0.1956, "lr": 2.212175528466712e-05, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:03:20", "remaining_time": "0:02:23", "throughput": 1590.17, "total_tokens": 319552} |
| {"current_steps": 2105, "total_steps": 3600, "loss": 0.211, "lr": 2.2001392108992504e-05, "epoch": 11.694444444444445, "percentage": 58.47, "elapsed_time": "0:03:21", "remaining_time": "0:02:23", "throughput": 1590.83, "total_tokens": 320336} |
| {"current_steps": 2110, "total_steps": 3600, "loss": 0.2588, "lr": 2.1881099413750733e-05, "epoch": 11.722222222222221, "percentage": 58.61, "elapsed_time": "0:03:21", "remaining_time": "0:02:22", "throughput": 1591.33, "total_tokens": 321088} |
| {"current_steps": 2115, "total_steps": 3600, "loss": 0.2254, "lr": 2.1760880026347562e-05, "epoch": 11.75, "percentage": 58.75, "elapsed_time": "0:03:22", "remaining_time": "0:02:21", "throughput": 1591.88, "total_tokens": 321856} |
| {"current_steps": 2120, "total_steps": 3600, "loss": 0.2133, "lr": 2.16407367724657e-05, "epoch": 11.777777777777779, "percentage": 58.89, "elapsed_time": "0:03:22", "remaining_time": "0:02:21", "throughput": 1592.31, "total_tokens": 322608} |
| {"current_steps": 2125, "total_steps": 3600, "loss": 0.1197, "lr": 2.1520672475998373e-05, "epoch": 11.805555555555555, "percentage": 59.03, "elapsed_time": "0:03:23", "remaining_time": "0:02:20", "throughput": 1592.89, "total_tokens": 323376} |
| {"current_steps": 2130, "total_steps": 3600, "loss": 0.1585, "lr": 2.140068995898297e-05, "epoch": 11.833333333333334, "percentage": 59.17, "elapsed_time": "0:03:23", "remaining_time": "0:02:20", "throughput": 1593.51, "total_tokens": 324160} |
| {"current_steps": 2135, "total_steps": 3600, "loss": 0.2245, "lr": 2.1280792041534714e-05, "epoch": 11.86111111111111, "percentage": 59.31, "elapsed_time": "0:03:23", "remaining_time": "0:02:19", "throughput": 1594.13, "total_tokens": 324944} |
| {"current_steps": 2140, "total_steps": 3600, "loss": 0.2306, "lr": 2.116098154178035e-05, "epoch": 11.88888888888889, "percentage": 59.44, "elapsed_time": "0:03:24", "remaining_time": "0:02:19", "throughput": 1594.46, "total_tokens": 325680} |
| {"current_steps": 2145, "total_steps": 3600, "loss": 0.2284, "lr": 2.1041261275791933e-05, "epoch": 11.916666666666666, "percentage": 59.58, "elapsed_time": "0:03:24", "remaining_time": "0:02:18", "throughput": 1594.82, "total_tokens": 326416} |
| {"current_steps": 2150, "total_steps": 3600, "loss": 0.1881, "lr": 2.092163405752063e-05, "epoch": 11.944444444444445, "percentage": 59.72, "elapsed_time": "0:03:25", "remaining_time": "0:02:18", "throughput": 1595.2, "total_tokens": 327152} |
| {"current_steps": 2155, "total_steps": 3600, "loss": 0.2431, "lr": 2.0802102698730574e-05, "epoch": 11.972222222222221, "percentage": 59.86, "elapsed_time": "0:03:25", "remaining_time": "0:02:17", "throughput": 1595.88, "total_tokens": 327936} |
| {"current_steps": 2160, "total_steps": 3600, "loss": 0.1011, "lr": 2.0682670008932785e-05, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:25", "remaining_time": "0:02:17", "throughput": 1595.95, "total_tokens": 328704} |
| {"current_steps": 2160, "total_steps": 3600, "eval_loss": 0.35741549730300903, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:26", "remaining_time": "0:02:17", "throughput": 1589.35, "total_tokens": 328704} |
| {"current_steps": 2165, "total_steps": 3600, "loss": 0.2562, "lr": 2.0563338795319123e-05, "epoch": 12.027777777777779, "percentage": 60.14, "elapsed_time": "0:03:28", "remaining_time": "0:02:18", "throughput": 1578.59, "total_tokens": 329456} |
| {"current_steps": 2170, "total_steps": 3600, "loss": 0.1975, "lr": 2.0444111862696314e-05, "epoch": 12.055555555555555, "percentage": 60.28, "elapsed_time": "0:03:29", "remaining_time": "0:02:17", "throughput": 1579.03, "total_tokens": 330240} |
| {"current_steps": 2175, "total_steps": 3600, "loss": 0.2241, "lr": 2.032499201342003e-05, "epoch": 12.083333333333334, "percentage": 60.42, "elapsed_time": "0:03:29", "remaining_time": "0:02:17", "throughput": 1579.62, "total_tokens": 331024} |
| {"current_steps": 2180, "total_steps": 3600, "loss": 0.1629, "lr": 2.020598204732901e-05, "epoch": 12.11111111111111, "percentage": 60.56, "elapsed_time": "0:03:29", "remaining_time": "0:02:16", "throughput": 1580.17, "total_tokens": 331792} |
| {"current_steps": 2185, "total_steps": 3600, "loss": 0.1661, "lr": 2.0087084761679245e-05, "epoch": 12.13888888888889, "percentage": 60.69, "elapsed_time": "0:03:30", "remaining_time": "0:02:16", "throughput": 1580.68, "total_tokens": 332560} |
| {"current_steps": 2190, "total_steps": 3600, "loss": 0.0971, "lr": 1.996830295107827e-05, "epoch": 12.166666666666666, "percentage": 60.83, "elapsed_time": "0:03:30", "remaining_time": "0:02:15", "throughput": 1581.06, "total_tokens": 333296} |
| {"current_steps": 2195, "total_steps": 3600, "loss": 0.1303, "lr": 1.9849639407419423e-05, "epoch": 12.194444444444445, "percentage": 60.97, "elapsed_time": "0:03:31", "remaining_time": "0:02:15", "throughput": 1581.49, "total_tokens": 334048} |
| {"current_steps": 2200, "total_steps": 3600, "loss": 0.1409, "lr": 1.973109691981627e-05, "epoch": 12.222222222222221, "percentage": 61.11, "elapsed_time": "0:03:31", "remaining_time": "0:02:14", "throughput": 1582.03, "total_tokens": 334816} |
| {"current_steps": 2205, "total_steps": 3600, "loss": 0.1, "lr": 1.9612678274537005e-05, "epoch": 12.25, "percentage": 61.25, "elapsed_time": "0:03:32", "remaining_time": "0:02:14", "throughput": 1582.43, "total_tokens": 335552} |
| {"current_steps": 2210, "total_steps": 3600, "loss": 0.1573, "lr": 1.9494386254939e-05, "epoch": 12.277777777777779, "percentage": 61.39, "elapsed_time": "0:03:32", "remaining_time": "0:02:13", "throughput": 1583.0, "total_tokens": 336320} |
| {"current_steps": 2215, "total_steps": 3600, "loss": 0.1766, "lr": 1.937622364140338e-05, "epoch": 12.305555555555555, "percentage": 61.53, "elapsed_time": "0:03:32", "remaining_time": "0:02:13", "throughput": 1583.65, "total_tokens": 337104} |
| {"current_steps": 2220, "total_steps": 3600, "loss": 0.2172, "lr": 1.925819321126964e-05, "epoch": 12.333333333333334, "percentage": 61.67, "elapsed_time": "0:03:33", "remaining_time": "0:02:12", "throughput": 1583.91, "total_tokens": 337824} |
| {"current_steps": 2225, "total_steps": 3600, "loss": 0.1574, "lr": 1.9140297738770385e-05, "epoch": 12.36111111111111, "percentage": 61.81, "elapsed_time": "0:03:33", "remaining_time": "0:02:12", "throughput": 1584.45, "total_tokens": 338592} |
| {"current_steps": 2230, "total_steps": 3600, "loss": 0.0875, "lr": 1.9022539994966147e-05, "epoch": 12.38888888888889, "percentage": 61.94, "elapsed_time": "0:03:34", "remaining_time": "0:02:11", "throughput": 1584.8, "total_tokens": 339328} |
| {"current_steps": 2235, "total_steps": 3600, "loss": 0.4871, "lr": 1.8904922747680204e-05, "epoch": 12.416666666666666, "percentage": 62.08, "elapsed_time": "0:03:34", "remaining_time": "0:02:11", "throughput": 1585.21, "total_tokens": 340064} |
| {"current_steps": 2240, "total_steps": 3600, "loss": 0.7559, "lr": 1.8787448761433556e-05, "epoch": 12.444444444444445, "percentage": 62.22, "elapsed_time": "0:03:34", "remaining_time": "0:02:10", "throughput": 1585.59, "total_tokens": 340800} |
| {"current_steps": 2245, "total_steps": 3600, "loss": 0.4803, "lr": 1.8670120797379958e-05, "epoch": 12.472222222222221, "percentage": 62.36, "elapsed_time": "0:03:35", "remaining_time": "0:02:09", "throughput": 1585.93, "total_tokens": 341536} |
| {"current_steps": 2250, "total_steps": 3600, "loss": 0.1932, "lr": 1.8552941613240983e-05, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:03:35", "remaining_time": "0:02:09", "throughput": 1586.47, "total_tokens": 342304} |
| {"current_steps": 2255, "total_steps": 3600, "loss": 0.1003, "lr": 1.8435913963241226e-05, "epoch": 12.527777777777779, "percentage": 62.64, "elapsed_time": "0:03:36", "remaining_time": "0:02:08", "throughput": 1586.88, "total_tokens": 343040} |
| {"current_steps": 2260, "total_steps": 3600, "loss": 0.2555, "lr": 1.831904059804358e-05, "epoch": 12.555555555555555, "percentage": 62.78, "elapsed_time": "0:03:36", "remaining_time": "0:02:08", "throughput": 1587.45, "total_tokens": 343824} |
| {"current_steps": 2265, "total_steps": 3600, "loss": 0.0737, "lr": 1.8202324264684544e-05, "epoch": 12.583333333333334, "percentage": 62.92, "elapsed_time": "0:03:36", "remaining_time": "0:02:07", "throughput": 1588.0, "total_tokens": 344592} |
| {"current_steps": 2270, "total_steps": 3600, "loss": 0.3693, "lr": 1.8085767706509712e-05, "epoch": 12.61111111111111, "percentage": 63.06, "elapsed_time": "0:03:37", "remaining_time": "0:02:07", "throughput": 1588.43, "total_tokens": 345328} |
| {"current_steps": 2275, "total_steps": 3600, "loss": 0.0706, "lr": 1.7969373663109234e-05, "epoch": 12.63888888888889, "percentage": 63.19, "elapsed_time": "0:03:37", "remaining_time": "0:02:06", "throughput": 1588.82, "total_tokens": 346080} |
| {"current_steps": 2280, "total_steps": 3600, "loss": 0.2483, "lr": 1.7853144870253458e-05, "epoch": 12.666666666666666, "percentage": 63.33, "elapsed_time": "0:03:38", "remaining_time": "0:02:06", "throughput": 1589.3, "total_tokens": 346864} |
| {"current_steps": 2285, "total_steps": 3600, "loss": 0.1894, "lr": 1.7737084059828637e-05, "epoch": 12.694444444444445, "percentage": 63.47, "elapsed_time": "0:03:38", "remaining_time": "0:02:05", "throughput": 1589.56, "total_tokens": 347600} |
| {"current_steps": 2290, "total_steps": 3600, "loss": 0.2874, "lr": 1.7621193959772657e-05, "epoch": 12.722222222222221, "percentage": 63.61, "elapsed_time": "0:03:39", "remaining_time": "0:02:05", "throughput": 1589.96, "total_tokens": 348368} |
| {"current_steps": 2295, "total_steps": 3600, "loss": 0.1799, "lr": 1.750547729401101e-05, "epoch": 12.75, "percentage": 63.75, "elapsed_time": "0:03:39", "remaining_time": "0:02:04", "throughput": 1590.52, "total_tokens": 349168} |
| {"current_steps": 2300, "total_steps": 3600, "loss": 0.3814, "lr": 1.7389936782392695e-05, "epoch": 12.777777777777779, "percentage": 63.89, "elapsed_time": "0:03:39", "remaining_time": "0:02:04", "throughput": 1590.96, "total_tokens": 349936} |
| {"current_steps": 2305, "total_steps": 3600, "loss": 0.3553, "lr": 1.7274575140626318e-05, "epoch": 12.805555555555555, "percentage": 64.03, "elapsed_time": "0:03:40", "remaining_time": "0:02:03", "throughput": 1591.44, "total_tokens": 350704} |
| {"current_steps": 2310, "total_steps": 3600, "loss": 0.1558, "lr": 1.7159395080216273e-05, "epoch": 12.833333333333334, "percentage": 64.17, "elapsed_time": "0:03:40", "remaining_time": "0:02:03", "throughput": 1592.02, "total_tokens": 351488} |
| {"current_steps": 2315, "total_steps": 3600, "loss": 0.1948, "lr": 1.7044399308398983e-05, "epoch": 12.86111111111111, "percentage": 64.31, "elapsed_time": "0:03:41", "remaining_time": "0:02:02", "throughput": 1592.43, "total_tokens": 352240} |
| {"current_steps": 2320, "total_steps": 3600, "loss": 0.0667, "lr": 1.692959052807928e-05, "epoch": 12.88888888888889, "percentage": 64.44, "elapsed_time": "0:03:41", "remaining_time": "0:02:02", "throughput": 1592.82, "total_tokens": 352992} |
| {"current_steps": 2325, "total_steps": 3600, "loss": 0.2195, "lr": 1.681497143776689e-05, "epoch": 12.916666666666666, "percentage": 64.58, "elapsed_time": "0:03:42", "remaining_time": "0:02:01", "throughput": 1593.32, "total_tokens": 353760} |
| {"current_steps": 2330, "total_steps": 3600, "loss": 0.1702, "lr": 1.670054473151298e-05, "epoch": 12.944444444444445, "percentage": 64.72, "elapsed_time": "0:03:42", "remaining_time": "0:02:01", "throughput": 1593.95, "total_tokens": 354560} |
| {"current_steps": 2335, "total_steps": 3600, "loss": 0.0926, "lr": 1.658631309884684e-05, "epoch": 12.972222222222221, "percentage": 64.86, "elapsed_time": "0:03:42", "remaining_time": "0:02:00", "throughput": 1594.52, "total_tokens": 355328} |
| {"current_steps": 2340, "total_steps": 3600, "loss": 0.1685, "lr": 1.6472279224712702e-05, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:43", "remaining_time": "0:02:00", "throughput": 1594.54, "total_tokens": 356080} |
| {"current_steps": 2340, "total_steps": 3600, "eval_loss": 0.36974024772644043, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:44", "remaining_time": "0:02:00", "throughput": 1588.43, "total_tokens": 356080} |
| {"current_steps": 2345, "total_steps": 3600, "loss": 0.1054, "lr": 1.6358445789406584e-05, "epoch": 13.027777777777779, "percentage": 65.14, "elapsed_time": "0:03:45", "remaining_time": "0:02:00", "throughput": 1579.29, "total_tokens": 356864} |
| {"current_steps": 2350, "total_steps": 3600, "loss": 0.0633, "lr": 1.6244815468513315e-05, "epoch": 13.055555555555555, "percentage": 65.28, "elapsed_time": "0:03:46", "remaining_time": "0:02:00", "throughput": 1579.8, "total_tokens": 357648} |
| {"current_steps": 2355, "total_steps": 3600, "loss": 0.0846, "lr": 1.6131390932843648e-05, "epoch": 13.083333333333334, "percentage": 65.42, "elapsed_time": "0:03:46", "remaining_time": "0:01:59", "throughput": 1580.34, "total_tokens": 358432} |
| {"current_steps": 2360, "total_steps": 3600, "loss": 0.0326, "lr": 1.6018174848371494e-05, "epoch": 13.11111111111111, "percentage": 65.56, "elapsed_time": "0:03:47", "remaining_time": "0:01:59", "throughput": 1580.69, "total_tokens": 359184} |
| {"current_steps": 2365, "total_steps": 3600, "loss": 0.0906, "lr": 1.5905169876171223e-05, "epoch": 13.13888888888889, "percentage": 65.69, "elapsed_time": "0:03:47", "remaining_time": "0:01:58", "throughput": 1581.13, "total_tokens": 359936} |
| {"current_steps": 2370, "total_steps": 3600, "loss": 0.0947, "lr": 1.579237867235514e-05, "epoch": 13.166666666666666, "percentage": 65.83, "elapsed_time": "0:03:48", "remaining_time": "0:01:58", "throughput": 1581.48, "total_tokens": 360672} |
| {"current_steps": 2375, "total_steps": 3600, "loss": 0.1612, "lr": 1.567980388801109e-05, "epoch": 13.194444444444445, "percentage": 65.97, "elapsed_time": "0:03:48", "remaining_time": "0:01:57", "throughput": 1581.86, "total_tokens": 361424} |
| {"current_steps": 2380, "total_steps": 3600, "loss": 0.1787, "lr": 1.556744816914008e-05, "epoch": 13.222222222222221, "percentage": 66.11, "elapsed_time": "0:03:48", "remaining_time": "0:01:57", "throughput": 1582.44, "total_tokens": 362208} |
| {"current_steps": 2385, "total_steps": 3600, "loss": 0.007, "lr": 1.5455314156594124e-05, "epoch": 13.25, "percentage": 66.25, "elapsed_time": "0:03:49", "remaining_time": "0:01:56", "throughput": 1582.96, "total_tokens": 362976} |
| {"current_steps": 2390, "total_steps": 3600, "loss": 0.0959, "lr": 1.534340448601418e-05, "epoch": 13.277777777777779, "percentage": 66.39, "elapsed_time": "0:03:49", "remaining_time": "0:01:56", "throughput": 1583.55, "total_tokens": 363760} |
| {"current_steps": 2395, "total_steps": 3600, "loss": 0.3737, "lr": 1.523172178776816e-05, "epoch": 13.305555555555555, "percentage": 66.53, "elapsed_time": "0:03:50", "remaining_time": "0:01:55", "throughput": 1584.14, "total_tokens": 364544} |
| {"current_steps": 2400, "total_steps": 3600, "loss": 0.269, "lr": 1.512026868688915e-05, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:50", "remaining_time": "0:01:55", "throughput": 1584.71, "total_tokens": 365328} |
| {"current_steps": 2405, "total_steps": 3600, "loss": 0.1292, "lr": 1.5009047803013699e-05, "epoch": 13.36111111111111, "percentage": 66.81, "elapsed_time": "0:03:50", "remaining_time": "0:01:54", "throughput": 1585.21, "total_tokens": 366112} |
| {"current_steps": 2410, "total_steps": 3600, "loss": 0.1784, "lr": 1.4898061750320212e-05, "epoch": 13.38888888888889, "percentage": 66.94, "elapsed_time": "0:03:51", "remaining_time": "0:01:54", "throughput": 1585.47, "total_tokens": 366848} |
| {"current_steps": 2415, "total_steps": 3600, "loss": 0.1364, "lr": 1.4787313137467546e-05, "epoch": 13.416666666666666, "percentage": 67.08, "elapsed_time": "0:03:51", "remaining_time": "0:01:53", "throughput": 1585.79, "total_tokens": 367600} |
| {"current_steps": 2420, "total_steps": 3600, "loss": 0.1387, "lr": 1.4676804567533687e-05, "epoch": 13.444444444444445, "percentage": 67.22, "elapsed_time": "0:03:52", "remaining_time": "0:01:53", "throughput": 1586.23, "total_tokens": 368368} |
| {"current_steps": 2425, "total_steps": 3600, "loss": 0.1387, "lr": 1.4566538637954554e-05, "epoch": 13.472222222222221, "percentage": 67.36, "elapsed_time": "0:03:52", "remaining_time": "0:01:52", "throughput": 1586.87, "total_tokens": 369168} |
| {"current_steps": 2430, "total_steps": 3600, "loss": 0.1741, "lr": 1.4456517940462949e-05, "epoch": 13.5, "percentage": 67.5, "elapsed_time": "0:03:53", "remaining_time": "0:01:52", "throughput": 1587.43, "total_tokens": 369952} |
| {"current_steps": 2435, "total_steps": 3600, "loss": 0.1012, "lr": 1.4346745061027644e-05, "epoch": 13.527777777777779, "percentage": 67.64, "elapsed_time": "0:03:53", "remaining_time": "0:01:51", "throughput": 1587.7, "total_tokens": 370688} |
| {"current_steps": 2440, "total_steps": 3600, "loss": 0.0562, "lr": 1.4237222579792618e-05, "epoch": 13.555555555555555, "percentage": 67.78, "elapsed_time": "0:03:53", "remaining_time": "0:01:51", "throughput": 1588.0, "total_tokens": 371424} |
| {"current_steps": 2445, "total_steps": 3600, "loss": 0.2235, "lr": 1.4127953071016383e-05, "epoch": 13.583333333333334, "percentage": 67.92, "elapsed_time": "0:03:54", "remaining_time": "0:01:50", "throughput": 1588.37, "total_tokens": 372176} |
| {"current_steps": 2450, "total_steps": 3600, "loss": 0.1715, "lr": 1.4018939103011472e-05, "epoch": 13.61111111111111, "percentage": 68.06, "elapsed_time": "0:03:54", "remaining_time": "0:01:50", "throughput": 1588.87, "total_tokens": 372944} |
| {"current_steps": 2455, "total_steps": 3600, "loss": 0.0483, "lr": 1.3910183238084112e-05, "epoch": 13.63888888888889, "percentage": 68.19, "elapsed_time": "0:03:55", "remaining_time": "0:01:49", "throughput": 1589.2, "total_tokens": 373696} |
| {"current_steps": 2460, "total_steps": 3600, "loss": 0.0139, "lr": 1.3801688032473958e-05, "epoch": 13.666666666666666, "percentage": 68.33, "elapsed_time": "0:03:55", "remaining_time": "0:01:49", "throughput": 1589.58, "total_tokens": 374448} |
| {"current_steps": 2465, "total_steps": 3600, "loss": 0.0093, "lr": 1.369345603629406e-05, "epoch": 13.694444444444445, "percentage": 68.47, "elapsed_time": "0:03:55", "remaining_time": "0:01:48", "throughput": 1590.03, "total_tokens": 375216} |
| {"current_steps": 2470, "total_steps": 3600, "loss": 0.0276, "lr": 1.3585489793470862e-05, "epoch": 13.722222222222221, "percentage": 68.61, "elapsed_time": "0:03:56", "remaining_time": "0:01:48", "throughput": 1590.38, "total_tokens": 375952} |
| {"current_steps": 2475, "total_steps": 3600, "loss": 0.202, "lr": 1.3477791841684451e-05, "epoch": 13.75, "percentage": 68.75, "elapsed_time": "0:03:56", "remaining_time": "0:01:47", "throughput": 1590.74, "total_tokens": 376688} |
| {"current_steps": 2480, "total_steps": 3600, "loss": 0.0211, "lr": 1.337036471230889e-05, "epoch": 13.777777777777779, "percentage": 68.89, "elapsed_time": "0:03:57", "remaining_time": "0:01:47", "throughput": 1591.03, "total_tokens": 377424} |
| {"current_steps": 2485, "total_steps": 3600, "loss": 0.0028, "lr": 1.3263210930352737e-05, "epoch": 13.805555555555555, "percentage": 69.03, "elapsed_time": "0:03:57", "remaining_time": "0:01:46", "throughput": 1591.43, "total_tokens": 378176} |
| {"current_steps": 2490, "total_steps": 3600, "loss": 0.0341, "lr": 1.3156333014399674e-05, "epoch": 13.833333333333334, "percentage": 69.17, "elapsed_time": "0:03:58", "remaining_time": "0:01:46", "throughput": 1591.92, "total_tokens": 378944} |
| {"current_steps": 2495, "total_steps": 3600, "loss": 0.0087, "lr": 1.3049733476549352e-05, "epoch": 13.86111111111111, "percentage": 69.31, "elapsed_time": "0:03:58", "remaining_time": "0:01:45", "throughput": 1592.24, "total_tokens": 379680} |
| {"current_steps": 2500, "total_steps": 3600, "loss": 0.0161, "lr": 1.2943414822358285e-05, "epoch": 13.88888888888889, "percentage": 69.44, "elapsed_time": "0:03:58", "remaining_time": "0:01:45", "throughput": 1592.6, "total_tokens": 380432} |
| {"current_steps": 2505, "total_steps": 3600, "loss": 0.1716, "lr": 1.2837379550781003e-05, "epoch": 13.916666666666666, "percentage": 69.58, "elapsed_time": "0:03:59", "remaining_time": "0:01:44", "throughput": 1592.98, "total_tokens": 381200} |
| {"current_steps": 2510, "total_steps": 3600, "loss": 0.575, "lr": 1.2731630154111296e-05, "epoch": 13.944444444444445, "percentage": 69.72, "elapsed_time": "0:03:59", "remaining_time": "0:01:44", "throughput": 1593.42, "total_tokens": 381984} |
| {"current_steps": 2515, "total_steps": 3600, "loss": 0.1063, "lr": 1.262616911792365e-05, "epoch": 13.972222222222221, "percentage": 69.86, "elapsed_time": "0:04:00", "remaining_time": "0:01:43", "throughput": 1593.83, "total_tokens": 382736} |
| {"current_steps": 2520, "total_steps": 3600, "loss": 0.3555, "lr": 1.2520998921014792e-05, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:04:00", "remaining_time": "0:01:43", "throughput": 1593.84, "total_tokens": 383488} |
| {"current_steps": 2520, "total_steps": 3600, "eval_loss": 0.977617621421814, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:04:01", "remaining_time": "0:01:43", "throughput": 1588.14, "total_tokens": 383488} |
| {"current_steps": 2525, "total_steps": 3600, "loss": 0.1161, "lr": 1.2416122035345507e-05, "epoch": 14.027777777777779, "percentage": 70.14, "elapsed_time": "0:04:03", "remaining_time": "0:01:43", "throughput": 1579.33, "total_tokens": 384224} |
| {"current_steps": 2530, "total_steps": 3600, "loss": 0.0061, "lr": 1.2311540925982403e-05, "epoch": 14.055555555555555, "percentage": 70.28, "elapsed_time": "0:04:03", "remaining_time": "0:01:43", "throughput": 1579.58, "total_tokens": 384976} |
| {"current_steps": 2535, "total_steps": 3600, "loss": 0.0266, "lr": 1.2207258051040099e-05, "epoch": 14.083333333333334, "percentage": 70.42, "elapsed_time": "0:04:04", "remaining_time": "0:01:42", "throughput": 1579.96, "total_tokens": 385744} |
| {"current_steps": 2540, "total_steps": 3600, "loss": 0.0029, "lr": 1.2103275861623378e-05, "epoch": 14.11111111111111, "percentage": 70.56, "elapsed_time": "0:04:04", "remaining_time": "0:01:42", "throughput": 1580.33, "total_tokens": 386512} |
| {"current_steps": 2545, "total_steps": 3600, "loss": 0.0036, "lr": 1.1999596801769616e-05, "epoch": 14.13888888888889, "percentage": 70.69, "elapsed_time": "0:04:05", "remaining_time": "0:01:41", "throughput": 1580.72, "total_tokens": 387280} |
| {"current_steps": 2550, "total_steps": 3600, "loss": 0.0014, "lr": 1.189622330839129e-05, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "0:04:05", "remaining_time": "0:01:41", "throughput": 1580.93, "total_tokens": 388000} |
| {"current_steps": 2555, "total_steps": 3600, "loss": 0.0216, "lr": 1.179315781121874e-05, "epoch": 14.194444444444445, "percentage": 70.97, "elapsed_time": "0:04:05", "remaining_time": "0:01:40", "throughput": 1581.26, "total_tokens": 388752} |
| {"current_steps": 2560, "total_steps": 3600, "loss": 0.0307, "lr": 1.1690402732743042e-05, "epoch": 14.222222222222221, "percentage": 71.11, "elapsed_time": "0:04:06", "remaining_time": "0:01:40", "throughput": 1581.59, "total_tokens": 389504} |
| {"current_steps": 2565, "total_steps": 3600, "loss": 0.0913, "lr": 1.158796048815906e-05, "epoch": 14.25, "percentage": 71.25, "elapsed_time": "0:04:06", "remaining_time": "0:01:39", "throughput": 1581.86, "total_tokens": 390240} |
| {"current_steps": 2570, "total_steps": 3600, "loss": 0.0011, "lr": 1.1485833485308702e-05, "epoch": 14.277777777777779, "percentage": 71.39, "elapsed_time": "0:04:07", "remaining_time": "0:01:39", "throughput": 1582.17, "total_tokens": 390992} |
| {"current_steps": 2575, "total_steps": 3600, "loss": 0.0004, "lr": 1.1384024124624324e-05, "epoch": 14.305555555555555, "percentage": 71.53, "elapsed_time": "0:04:07", "remaining_time": "0:01:38", "throughput": 1582.44, "total_tokens": 391728} |
| {"current_steps": 2580, "total_steps": 3600, "loss": 0.0038, "lr": 1.1282534799072272e-05, "epoch": 14.333333333333334, "percentage": 71.67, "elapsed_time": "0:04:07", "remaining_time": "0:01:38", "throughput": 1582.7, "total_tokens": 392464} |
| {"current_steps": 2585, "total_steps": 3600, "loss": 0.0006, "lr": 1.1181367894096684e-05, "epoch": 14.36111111111111, "percentage": 71.81, "elapsed_time": "0:04:08", "remaining_time": "0:01:37", "throughput": 1582.97, "total_tokens": 393200} |
| {"current_steps": 2590, "total_steps": 3600, "loss": 0.2819, "lr": 1.1080525787563393e-05, "epoch": 14.38888888888889, "percentage": 71.94, "elapsed_time": "0:04:08", "remaining_time": "0:01:37", "throughput": 1583.33, "total_tokens": 393968} |
| {"current_steps": 2595, "total_steps": 3600, "loss": 0.0384, "lr": 1.0980010849704036e-05, "epoch": 14.416666666666666, "percentage": 72.08, "elapsed_time": "0:04:09", "remaining_time": "0:01:36", "throughput": 1583.76, "total_tokens": 394752} |
| {"current_steps": 2600, "total_steps": 3600, "loss": 0.0004, "lr": 1.0879825443060362e-05, "epoch": 14.444444444444445, "percentage": 72.22, "elapsed_time": "0:04:09", "remaining_time": "0:01:36", "throughput": 1584.1, "total_tokens": 395504} |
| {"current_steps": 2605, "total_steps": 3600, "loss": 0.1231, "lr": 1.0779971922428711e-05, "epoch": 14.472222222222221, "percentage": 72.36, "elapsed_time": "0:04:10", "remaining_time": "0:01:35", "throughput": 1584.41, "total_tokens": 396256} |
| {"current_steps": 2610, "total_steps": 3600, "loss": 0.0713, "lr": 1.0680452634804603e-05, "epoch": 14.5, "percentage": 72.5, "elapsed_time": "0:04:10", "remaining_time": "0:01:35", "throughput": 1584.85, "total_tokens": 397040} |
| {"current_steps": 2615, "total_steps": 3600, "loss": 0.004, "lr": 1.0581269919327643e-05, "epoch": 14.527777777777779, "percentage": 72.64, "elapsed_time": "0:04:10", "remaining_time": "0:01:34", "throughput": 1585.33, "total_tokens": 397840} |
| {"current_steps": 2620, "total_steps": 3600, "loss": 0.0394, "lr": 1.0482426107226507e-05, "epoch": 14.555555555555555, "percentage": 72.78, "elapsed_time": "0:04:11", "remaining_time": "0:01:34", "throughput": 1585.84, "total_tokens": 398640} |
| {"current_steps": 2625, "total_steps": 3600, "loss": 0.0008, "lr": 1.0383923521764174e-05, "epoch": 14.583333333333334, "percentage": 72.92, "elapsed_time": "0:04:11", "remaining_time": "0:01:33", "throughput": 1586.09, "total_tokens": 399376} |
| {"current_steps": 2630, "total_steps": 3600, "loss": 0.0194, "lr": 1.0285764478183284e-05, "epoch": 14.61111111111111, "percentage": 73.06, "elapsed_time": "0:04:12", "remaining_time": "0:01:33", "throughput": 1586.52, "total_tokens": 400160} |
| {"current_steps": 2635, "total_steps": 3600, "loss": 0.0296, "lr": 1.0187951283651736e-05, "epoch": 14.63888888888889, "percentage": 73.19, "elapsed_time": "0:04:12", "remaining_time": "0:01:32", "throughput": 1586.83, "total_tokens": 400912} |
| {"current_steps": 2640, "total_steps": 3600, "loss": 0.0018, "lr": 1.0090486237208463e-05, "epoch": 14.666666666666666, "percentage": 73.33, "elapsed_time": "0:04:13", "remaining_time": "0:01:32", "throughput": 1587.27, "total_tokens": 401696} |
| {"current_steps": 2645, "total_steps": 3600, "loss": 0.0009, "lr": 9.993371629709391e-06, "epoch": 14.694444444444445, "percentage": 73.47, "elapsed_time": "0:04:13", "remaining_time": "0:01:31", "throughput": 1587.71, "total_tokens": 402480} |
| {"current_steps": 2650, "total_steps": 3600, "loss": 0.0096, "lr": 9.89660974377359e-06, "epoch": 14.722222222222221, "percentage": 73.61, "elapsed_time": "0:04:13", "remaining_time": "0:01:31", "throughput": 1588.09, "total_tokens": 403248} |
| {"current_steps": 2655, "total_steps": 3600, "loss": 0.0244, "lr": 9.800202853729651e-06, "epoch": 14.75, "percentage": 73.75, "elapsed_time": "0:04:14", "remaining_time": "0:01:30", "throughput": 1588.4, "total_tokens": 404000} |
| {"current_steps": 2660, "total_steps": 3600, "loss": 0.003, "lr": 9.704153225562171e-06, "epoch": 14.777777777777779, "percentage": 73.89, "elapsed_time": "0:04:14", "remaining_time": "0:01:30", "throughput": 1588.74, "total_tokens": 404768} |
| {"current_steps": 2665, "total_steps": 3600, "loss": 0.0665, "lr": 9.608463116858542e-06, "epoch": 14.805555555555555, "percentage": 74.03, "elapsed_time": "0:04:15", "remaining_time": "0:01:29", "throughput": 1588.97, "total_tokens": 405504} |
| {"current_steps": 2670, "total_steps": 3600, "loss": 0.0002, "lr": 9.51313477675588e-06, "epoch": 14.833333333333334, "percentage": 74.17, "elapsed_time": "0:04:15", "remaining_time": "0:01:29", "throughput": 1589.37, "total_tokens": 406272} |
| {"current_steps": 2675, "total_steps": 3600, "loss": 0.0093, "lr": 9.418170445888139e-06, "epoch": 14.86111111111111, "percentage": 74.31, "elapsed_time": "0:04:16", "remaining_time": "0:01:28", "throughput": 1589.8, "total_tokens": 407056} |
| {"current_steps": 2680, "total_steps": 3600, "loss": 0.0205, "lr": 9.323572356333454e-06, "epoch": 14.88888888888889, "percentage": 74.44, "elapsed_time": "0:04:16", "remaining_time": "0:01:28", "throughput": 1590.17, "total_tokens": 407808} |
| {"current_steps": 2685, "total_steps": 3600, "loss": 0.0055, "lr": 9.22934273156172e-06, "epoch": 14.916666666666666, "percentage": 74.58, "elapsed_time": "0:04:16", "remaining_time": "0:01:27", "throughput": 1590.45, "total_tokens": 408560} |
| {"current_steps": 2690, "total_steps": 3600, "loss": 0.0282, "lr": 9.135483786382262e-06, "epoch": 14.944444444444445, "percentage": 74.72, "elapsed_time": "0:04:17", "remaining_time": "0:01:27", "throughput": 1590.78, "total_tokens": 409328} |
| {"current_steps": 2695, "total_steps": 3600, "loss": 0.0044, "lr": 9.0419977268918e-06, "epoch": 14.972222222222221, "percentage": 74.86, "elapsed_time": "0:04:17", "remaining_time": "0:01:26", "throughput": 1591.19, "total_tokens": 410096} |
| {"current_steps": 2700, "total_steps": 3600, "loss": 0.004, "lr": 8.948886750422636e-06, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:04:18", "remaining_time": "0:01:26", "throughput": 1591.08, "total_tokens": 410832} |
| {"current_steps": 2700, "total_steps": 3600, "eval_loss": 0.9717798233032227, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:04:19", "remaining_time": "0:01:26", "throughput": 1585.78, "total_tokens": 410832} |
| {"current_steps": 2705, "total_steps": 3600, "loss": 0.0003, "lr": 8.856153045490948e-06, "epoch": 15.027777777777779, "percentage": 75.14, "elapsed_time": "0:04:20", "remaining_time": "0:01:26", "throughput": 1577.41, "total_tokens": 411600} |
| {"current_steps": 2710, "total_steps": 3600, "loss": 0.0013, "lr": 8.763798791745411e-06, "epoch": 15.055555555555555, "percentage": 75.28, "elapsed_time": "0:04:21", "remaining_time": "0:01:25", "throughput": 1577.89, "total_tokens": 412384} |
| {"current_steps": 2715, "total_steps": 3600, "loss": 0.0006, "lr": 8.671826159915907e-06, "epoch": 15.083333333333334, "percentage": 75.42, "elapsed_time": "0:04:21", "remaining_time": "0:01:25", "throughput": 1578.33, "total_tokens": 413168} |
| {"current_steps": 2720, "total_steps": 3600, "loss": 0.0001, "lr": 8.58023731176254e-06, "epoch": 15.11111111111111, "percentage": 75.56, "elapsed_time": "0:04:22", "remaining_time": "0:01:24", "throughput": 1578.66, "total_tokens": 413936} |
| {"current_steps": 2725, "total_steps": 3600, "loss": 0.0002, "lr": 8.489034400024812e-06, "epoch": 15.13888888888889, "percentage": 75.69, "elapsed_time": "0:04:22", "remaining_time": "0:01:24", "throughput": 1579.0, "total_tokens": 414704} |
| {"current_steps": 2730, "total_steps": 3600, "loss": 0.0001, "lr": 8.39821956837102e-06, "epoch": 15.166666666666666, "percentage": 75.83, "elapsed_time": "0:04:23", "remaining_time": "0:01:23", "throughput": 1579.44, "total_tokens": 415488} |
| {"current_steps": 2735, "total_steps": 3600, "loss": 0.0001, "lr": 8.3077949513479e-06, "epoch": 15.194444444444445, "percentage": 75.97, "elapsed_time": "0:04:23", "remaining_time": "0:01:23", "throughput": 1579.78, "total_tokens": 416240} |
| {"current_steps": 2740, "total_steps": 3600, "loss": 0.0002, "lr": 8.217762674330413e-06, "epoch": 15.222222222222221, "percentage": 76.11, "elapsed_time": "0:04:23", "remaining_time": "0:01:22", "throughput": 1580.16, "total_tokens": 417008} |
| {"current_steps": 2745, "total_steps": 3600, "loss": 0.0054, "lr": 8.128124853471814e-06, "epoch": 15.25, "percentage": 76.25, "elapsed_time": "0:04:24", "remaining_time": "0:01:22", "throughput": 1580.36, "total_tokens": 417728} |
| {"current_steps": 2750, "total_steps": 3600, "loss": 0.0009, "lr": 8.03888359565391e-06, "epoch": 15.277777777777779, "percentage": 76.39, "elapsed_time": "0:04:24", "remaining_time": "0:01:21", "throughput": 1580.57, "total_tokens": 418448} |
| {"current_steps": 2755, "total_steps": 3600, "loss": 0.0022, "lr": 7.950040998437542e-06, "epoch": 15.305555555555555, "percentage": 76.53, "elapsed_time": "0:04:25", "remaining_time": "0:01:21", "throughput": 1580.87, "total_tokens": 419184} |
| {"current_steps": 2760, "total_steps": 3600, "loss": 0.0001, "lr": 7.86159915001326e-06, "epoch": 15.333333333333334, "percentage": 76.67, "elapsed_time": "0:04:25", "remaining_time": "0:01:20", "throughput": 1581.36, "total_tokens": 419984} |
| {"current_steps": 2765, "total_steps": 3600, "loss": 0.0005, "lr": 7.7735601291523e-06, "epoch": 15.36111111111111, "percentage": 76.81, "elapsed_time": "0:04:26", "remaining_time": "0:01:20", "throughput": 1581.72, "total_tokens": 420752} |
| {"current_steps": 2770, "total_steps": 3600, "loss": 0.0001, "lr": 7.685926005157651e-06, "epoch": 15.38888888888889, "percentage": 76.94, "elapsed_time": "0:04:26", "remaining_time": "0:01:19", "throughput": 1582.08, "total_tokens": 421520} |
| {"current_steps": 2775, "total_steps": 3600, "loss": 0.0001, "lr": 7.598698837815449e-06, "epoch": 15.416666666666666, "percentage": 77.08, "elapsed_time": "0:04:26", "remaining_time": "0:01:19", "throughput": 1582.32, "total_tokens": 422256} |
| {"current_steps": 2780, "total_steps": 3600, "loss": 0.0003, "lr": 7.511880677346578e-06, "epoch": 15.444444444444445, "percentage": 77.22, "elapsed_time": "0:04:27", "remaining_time": "0:01:18", "throughput": 1582.63, "total_tokens": 423008} |
| {"current_steps": 2785, "total_steps": 3600, "loss": 0.0004, "lr": 7.4254735643584564e-06, "epoch": 15.472222222222221, "percentage": 77.36, "elapsed_time": "0:04:27", "remaining_time": "0:01:18", "throughput": 1582.94, "total_tokens": 423760} |
| {"current_steps": 2790, "total_steps": 3600, "loss": 0.0022, "lr": 7.339479529797111e-06, "epoch": 15.5, "percentage": 77.5, "elapsed_time": "0:04:28", "remaining_time": "0:01:17", "throughput": 1583.25, "total_tokens": 424512} |
| {"current_steps": 2795, "total_steps": 3600, "loss": 0.0004, "lr": 7.2539005948993825e-06, "epoch": 15.527777777777779, "percentage": 77.64, "elapsed_time": "0:04:28", "remaining_time": "0:01:17", "throughput": 1583.44, "total_tokens": 425232} |
| {"current_steps": 2800, "total_steps": 3600, "loss": 0.0007, "lr": 7.168738771145464e-06, "epoch": 15.555555555555555, "percentage": 77.78, "elapsed_time": "0:04:28", "remaining_time": "0:01:16", "throughput": 1583.75, "total_tokens": 425984} |
| {"current_steps": 2805, "total_steps": 3600, "loss": 0.0001, "lr": 7.083996060211607e-06, "epoch": 15.583333333333334, "percentage": 77.92, "elapsed_time": "0:04:29", "remaining_time": "0:01:16", "throughput": 1584.12, "total_tokens": 426752} |
| {"current_steps": 2810, "total_steps": 3600, "loss": 0.0004, "lr": 6.9996744539230665e-06, "epoch": 15.61111111111111, "percentage": 78.06, "elapsed_time": "0:04:29", "remaining_time": "0:01:15", "throughput": 1584.53, "total_tokens": 427536} |
| {"current_steps": 2815, "total_steps": 3600, "loss": 0.0001, "lr": 6.9157759342072995e-06, "epoch": 15.63888888888889, "percentage": 78.19, "elapsed_time": "0:04:30", "remaining_time": "0:01:15", "throughput": 1584.79, "total_tokens": 428288} |
| {"current_steps": 2820, "total_steps": 3600, "loss": 0.0002, "lr": 6.832302473047384e-06, "epoch": 15.666666666666666, "percentage": 78.33, "elapsed_time": "0:04:30", "remaining_time": "0:01:14", "throughput": 1585.08, "total_tokens": 429040} |
| {"current_steps": 2825, "total_steps": 3600, "loss": 0.0001, "lr": 6.7492560324356355e-06, "epoch": 15.694444444444445, "percentage": 78.47, "elapsed_time": "0:04:31", "remaining_time": "0:01:14", "throughput": 1585.37, "total_tokens": 429792} |
| {"current_steps": 2830, "total_steps": 3600, "loss": 0.0002, "lr": 6.666638564327532e-06, "epoch": 15.722222222222221, "percentage": 78.61, "elapsed_time": "0:04:31", "remaining_time": "0:01:13", "throughput": 1585.64, "total_tokens": 430544} |
| {"current_steps": 2835, "total_steps": 3600, "loss": 0.0001, "lr": 6.584452010595807e-06, "epoch": 15.75, "percentage": 78.75, "elapsed_time": "0:04:31", "remaining_time": "0:01:13", "throughput": 1585.98, "total_tokens": 431312} |
| {"current_steps": 2840, "total_steps": 3600, "loss": 0.0004, "lr": 6.502698302984811e-06, "epoch": 15.777777777777779, "percentage": 78.89, "elapsed_time": "0:04:32", "remaining_time": "0:01:12", "throughput": 1586.22, "total_tokens": 432048} |
| {"current_steps": 2845, "total_steps": 3600, "loss": 0.0001, "lr": 6.421379363065142e-06, "epoch": 15.805555555555555, "percentage": 79.03, "elapsed_time": "0:04:32", "remaining_time": "0:01:12", "throughput": 1586.57, "total_tokens": 432816} |
| {"current_steps": 2850, "total_steps": 3600, "loss": 0.0001, "lr": 6.340497102188425e-06, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "0:04:33", "remaining_time": "0:01:11", "throughput": 1587.05, "total_tokens": 433616} |
| {"current_steps": 2855, "total_steps": 3600, "loss": 0.0001, "lr": 6.26005342144241e-06, "epoch": 15.86111111111111, "percentage": 79.31, "elapsed_time": "0:04:33", "remaining_time": "0:01:11", "throughput": 1587.47, "total_tokens": 434400} |
| {"current_steps": 2860, "total_steps": 3600, "loss": 0.0001, "lr": 6.180050211606303e-06, "epoch": 15.88888888888889, "percentage": 79.44, "elapsed_time": "0:04:34", "remaining_time": "0:01:10", "throughput": 1587.81, "total_tokens": 435168} |
| {"current_steps": 2865, "total_steps": 3600, "loss": 0.0002, "lr": 6.100489353106304e-06, "epoch": 15.916666666666666, "percentage": 79.58, "elapsed_time": "0:04:34", "remaining_time": "0:01:10", "throughput": 1588.12, "total_tokens": 435920} |
| {"current_steps": 2870, "total_steps": 3600, "loss": 0.0002, "lr": 6.021372715971437e-06, "epoch": 15.944444444444445, "percentage": 79.72, "elapsed_time": "0:04:34", "remaining_time": "0:01:09", "throughput": 1588.48, "total_tokens": 436688} |
| {"current_steps": 2875, "total_steps": 3600, "loss": 0.0001, "lr": 5.942702159789554e-06, "epoch": 15.972222222222221, "percentage": 79.86, "elapsed_time": "0:04:35", "remaining_time": "0:01:09", "throughput": 1588.9, "total_tokens": 437456} |
| {"current_steps": 2880, "total_steps": 3600, "loss": 0.0001, "lr": 5.864479533663655e-06, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:35", "remaining_time": "0:01:08", "throughput": 1588.76, "total_tokens": 438176} |
| {"current_steps": 2880, "total_steps": 3600, "eval_loss": 1.0572586059570312, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:36", "remaining_time": "0:01:09", "throughput": 1583.72, "total_tokens": 438176} |
| {"current_steps": 2885, "total_steps": 3600, "loss": 0.0001, "lr": 5.786706676168424e-06, "epoch": 16.02777777777778, "percentage": 80.14, "elapsed_time": "0:04:38", "remaining_time": "0:01:09", "throughput": 1576.3, "total_tokens": 438944} |
| {"current_steps": 2890, "total_steps": 3600, "loss": 0.0001, "lr": 5.709385415307006e-06, "epoch": 16.055555555555557, "percentage": 80.28, "elapsed_time": "0:04:38", "remaining_time": "0:01:08", "throughput": 1576.51, "total_tokens": 439696} |
| {"current_steps": 2895, "total_steps": 3600, "loss": 0.0001, "lr": 5.6325175684680374e-06, "epoch": 16.083333333333332, "percentage": 80.42, "elapsed_time": "0:04:39", "remaining_time": "0:01:08", "throughput": 1576.83, "total_tokens": 440464} |
| {"current_steps": 2900, "total_steps": 3600, "loss": 0.0001, "lr": 5.556104942382964e-06, "epoch": 16.11111111111111, "percentage": 80.56, "elapsed_time": "0:04:39", "remaining_time": "0:01:07", "throughput": 1577.19, "total_tokens": 441248} |
| {"current_steps": 2905, "total_steps": 3600, "loss": 0.0001, "lr": 5.48014933308352e-06, "epoch": 16.13888888888889, "percentage": 80.69, "elapsed_time": "0:04:40", "remaining_time": "0:01:07", "throughput": 1577.51, "total_tokens": 442016} |
| {"current_steps": 2910, "total_steps": 3600, "loss": 0.0001, "lr": 5.404652525859552e-06, "epoch": 16.166666666666668, "percentage": 80.83, "elapsed_time": "0:04:40", "remaining_time": "0:01:06", "throughput": 1577.82, "total_tokens": 442784} |
| {"current_steps": 2915, "total_steps": 3600, "loss": 0.0001, "lr": 5.329616295217046e-06, "epoch": 16.194444444444443, "percentage": 80.97, "elapsed_time": "0:04:41", "remaining_time": "0:01:06", "throughput": 1578.18, "total_tokens": 443536} |
| {"current_steps": 2920, "total_steps": 3600, "loss": 0.0001, "lr": 5.2550424048364185e-06, "epoch": 16.22222222222222, "percentage": 81.11, "elapsed_time": "0:04:41", "remaining_time": "0:01:05", "throughput": 1578.44, "total_tokens": 444272} |
| {"current_steps": 2925, "total_steps": 3600, "loss": 0.0001, "lr": 5.180932607531056e-06, "epoch": 16.25, "percentage": 81.25, "elapsed_time": "0:04:41", "remaining_time": "0:01:05", "throughput": 1578.93, "total_tokens": 445056} |
| {"current_steps": 2930, "total_steps": 3600, "loss": 0.0001, "lr": 5.107288645206149e-06, "epoch": 16.27777777777778, "percentage": 81.39, "elapsed_time": "0:04:42", "remaining_time": "0:01:04", "throughput": 1579.31, "total_tokens": 445824} |
| {"current_steps": 2935, "total_steps": 3600, "loss": 0.0001, "lr": 5.034112248817685e-06, "epoch": 16.305555555555557, "percentage": 81.53, "elapsed_time": "0:04:42", "remaining_time": "0:01:04", "throughput": 1579.73, "total_tokens": 446592} |
| {"current_steps": 2940, "total_steps": 3600, "loss": 0.0001, "lr": 4.961405138331826e-06, "epoch": 16.333333333333332, "percentage": 81.67, "elapsed_time": "0:04:43", "remaining_time": "0:01:03", "throughput": 1579.99, "total_tokens": 447312} |
| {"current_steps": 2945, "total_steps": 3600, "loss": 0.0002, "lr": 4.88916902268445e-06, "epoch": 16.36111111111111, "percentage": 81.81, "elapsed_time": "0:04:43", "remaining_time": "0:01:03", "throughput": 1580.28, "total_tokens": 448064} |
| {"current_steps": 2950, "total_steps": 3600, "loss": 0.0001, "lr": 4.817405599741004e-06, "epoch": 16.38888888888889, "percentage": 81.94, "elapsed_time": "0:04:43", "remaining_time": "0:01:02", "throughput": 1580.72, "total_tokens": 448848} |
| {"current_steps": 2955, "total_steps": 3600, "loss": 0.0001, "lr": 4.746116556256569e-06, "epoch": 16.416666666666668, "percentage": 82.08, "elapsed_time": "0:04:44", "remaining_time": "0:01:02", "throughput": 1581.1, "total_tokens": 449600} |
| {"current_steps": 2960, "total_steps": 3600, "loss": 0.0001, "lr": 4.6753035678362314e-06, "epoch": 16.444444444444443, "percentage": 82.22, "elapsed_time": "0:04:44", "remaining_time": "0:01:01", "throughput": 1581.47, "total_tokens": 450352} |
| {"current_steps": 2965, "total_steps": 3600, "loss": 0.0001, "lr": 4.604968298895703e-06, "epoch": 16.47222222222222, "percentage": 82.36, "elapsed_time": "0:04:45", "remaining_time": "0:01:01", "throughput": 1581.86, "total_tokens": 451120} |
| {"current_steps": 2970, "total_steps": 3600, "loss": 0.0001, "lr": 4.535112402622185e-06, "epoch": 16.5, "percentage": 82.5, "elapsed_time": "0:04:45", "remaining_time": "0:01:00", "throughput": 1582.29, "total_tokens": 451888} |
| {"current_steps": 2975, "total_steps": 3600, "loss": 0.0001, "lr": 4.465737520935517e-06, "epoch": 16.52777777777778, "percentage": 82.64, "elapsed_time": "0:04:45", "remaining_time": "0:01:00", "throughput": 1582.66, "total_tokens": 452640} |
| {"current_steps": 2980, "total_steps": 3600, "loss": 0.0, "lr": 4.396845284449608e-06, "epoch": 16.555555555555557, "percentage": 82.78, "elapsed_time": "0:04:46", "remaining_time": "0:00:59", "throughput": 1583.16, "total_tokens": 453424} |
| {"current_steps": 2985, "total_steps": 3600, "loss": 0.0001, "lr": 4.328437312434067e-06, "epoch": 16.583333333333332, "percentage": 82.92, "elapsed_time": "0:04:46", "remaining_time": "0:00:59", "throughput": 1583.61, "total_tokens": 454192} |
| {"current_steps": 2990, "total_steps": 3600, "loss": 0.0001, "lr": 4.2605152127761675e-06, "epoch": 16.61111111111111, "percentage": 83.06, "elapsed_time": "0:04:47", "remaining_time": "0:00:58", "throughput": 1583.99, "total_tokens": 454944} |
| {"current_steps": 2995, "total_steps": 3600, "loss": 0.0001, "lr": 4.19308058194306e-06, "epoch": 16.63888888888889, "percentage": 83.19, "elapsed_time": "0:04:47", "remaining_time": "0:00:58", "throughput": 1584.32, "total_tokens": 455680} |
| {"current_steps": 3000, "total_steps": 3600, "loss": 0.0, "lr": 4.126135004944231e-06, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:04:48", "remaining_time": "0:00:57", "throughput": 1584.72, "total_tokens": 456448} |
| {"current_steps": 3005, "total_steps": 3600, "loss": 0.0001, "lr": 4.059680055294266e-06, "epoch": 16.694444444444443, "percentage": 83.47, "elapsed_time": "0:04:48", "remaining_time": "0:00:57", "throughput": 1585.04, "total_tokens": 457184} |
| {"current_steps": 3010, "total_steps": 3600, "loss": 0.0001, "lr": 3.993717294975863e-06, "epoch": 16.72222222222222, "percentage": 83.61, "elapsed_time": "0:04:48", "remaining_time": "0:00:56", "throughput": 1585.5, "total_tokens": 457968} |
| {"current_steps": 3015, "total_steps": 3600, "loss": 0.0001, "lr": 3.92824827440309e-06, "epoch": 16.75, "percentage": 83.75, "elapsed_time": "0:04:49", "remaining_time": "0:00:56", "throughput": 1585.91, "total_tokens": 458736} |
| {"current_steps": 3020, "total_steps": 3600, "loss": 0.0, "lr": 3.863274532384981e-06, "epoch": 16.77777777777778, "percentage": 83.89, "elapsed_time": "0:04:49", "remaining_time": "0:00:55", "throughput": 1586.25, "total_tokens": 459504} |
| {"current_steps": 3025, "total_steps": 3600, "loss": 0.0001, "lr": 3.798797596089351e-06, "epoch": 16.805555555555557, "percentage": 84.03, "elapsed_time": "0:04:50", "remaining_time": "0:00:55", "throughput": 1586.69, "total_tokens": 460288} |
| {"current_steps": 3030, "total_steps": 3600, "loss": 0.0, "lr": 3.73481898100691e-06, "epoch": 16.833333333333332, "percentage": 84.17, "elapsed_time": "0:04:50", "remaining_time": "0:00:54", "throughput": 1587.03, "total_tokens": 461040} |
| {"current_steps": 3035, "total_steps": 3600, "loss": 0.0001, "lr": 3.6713401909156204e-06, "epoch": 16.86111111111111, "percentage": 84.31, "elapsed_time": "0:04:50", "remaining_time": "0:00:54", "throughput": 1587.32, "total_tokens": 461776} |
| {"current_steps": 3040, "total_steps": 3600, "loss": 0.0, "lr": 3.608362717845376e-06, "epoch": 16.88888888888889, "percentage": 84.44, "elapsed_time": "0:04:51", "remaining_time": "0:00:53", "throughput": 1587.7, "total_tokens": 462544} |
| {"current_steps": 3045, "total_steps": 3600, "loss": 0.0, "lr": 3.5458880420429135e-06, "epoch": 16.916666666666668, "percentage": 84.58, "elapsed_time": "0:04:51", "remaining_time": "0:00:53", "throughput": 1588.05, "total_tokens": 463296} |
| {"current_steps": 3050, "total_steps": 3600, "loss": 0.0001, "lr": 3.4839176319370394e-06, "epoch": 16.944444444444443, "percentage": 84.72, "elapsed_time": "0:04:52", "remaining_time": "0:00:52", "throughput": 1588.53, "total_tokens": 464080} |
| {"current_steps": 3055, "total_steps": 3600, "loss": 0.0, "lr": 3.4224529441040904e-06, "epoch": 16.97222222222222, "percentage": 84.86, "elapsed_time": "0:04:52", "remaining_time": "0:00:52", "throughput": 1589.05, "total_tokens": 464864} |
| {"current_steps": 3060, "total_steps": 3600, "loss": 0.0001, "lr": 3.3614954232337374e-06, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:53", "remaining_time": "0:00:51", "throughput": 1589.17, "total_tokens": 465632} |
| {"current_steps": 3060, "total_steps": 3600, "eval_loss": 1.0890483856201172, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:53", "remaining_time": "0:00:51", "throughput": 1584.59, "total_tokens": 465632} |
| {"current_steps": 3065, "total_steps": 3600, "loss": 0.0001, "lr": 3.3010465020949818e-06, "epoch": 17.02777777777778, "percentage": 85.14, "elapsed_time": "0:04:55", "remaining_time": "0:00:51", "throughput": 1577.38, "total_tokens": 466416} |
| {"current_steps": 3070, "total_steps": 3600, "loss": 0.0001, "lr": 3.2411076015025075e-06, "epoch": 17.055555555555557, "percentage": 85.28, "elapsed_time": "0:04:56", "remaining_time": "0:00:51", "throughput": 1577.76, "total_tokens": 467200} |
| {"current_steps": 3075, "total_steps": 3600, "loss": 0.0001, "lr": 3.1816801302832848e-06, "epoch": 17.083333333333332, "percentage": 85.42, "elapsed_time": "0:04:56", "remaining_time": "0:00:50", "throughput": 1578.15, "total_tokens": 467968} |
| {"current_steps": 3080, "total_steps": 3600, "loss": 0.0, "lr": 3.1227654852434454e-06, "epoch": 17.11111111111111, "percentage": 85.56, "elapsed_time": "0:04:56", "remaining_time": "0:00:50", "throughput": 1578.39, "total_tokens": 468704} |
| {"current_steps": 3085, "total_steps": 3600, "loss": 0.0001, "lr": 3.0643650511354484e-06, "epoch": 17.13888888888889, "percentage": 85.69, "elapsed_time": "0:04:57", "remaining_time": "0:00:49", "throughput": 1578.65, "total_tokens": 469440} |
| {"current_steps": 3090, "total_steps": 3600, "loss": 0.0001, "lr": 3.006480200625572e-06, "epoch": 17.166666666666668, "percentage": 85.83, "elapsed_time": "0:04:57", "remaining_time": "0:00:49", "throughput": 1578.88, "total_tokens": 470176} |
| {"current_steps": 3095, "total_steps": 3600, "loss": 0.0001, "lr": 2.949112294261591e-06, "epoch": 17.194444444444443, "percentage": 85.97, "elapsed_time": "0:04:58", "remaining_time": "0:00:48", "throughput": 1579.17, "total_tokens": 470912} |
| {"current_steps": 3100, "total_steps": 3600, "loss": 0.0, "lr": 2.89226268044083e-06, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "0:04:58", "remaining_time": "0:00:48", "throughput": 1579.62, "total_tokens": 471696} |
| {"current_steps": 3105, "total_steps": 3600, "loss": 0.0, "lr": 2.8359326953784737e-06, "epoch": 17.25, "percentage": 86.25, "elapsed_time": "0:04:59", "remaining_time": "0:00:47", "throughput": 1579.96, "total_tokens": 472464} |
| {"current_steps": 3110, "total_steps": 3600, "loss": 0.0001, "lr": 2.780123663076142e-06, "epoch": 17.27777777777778, "percentage": 86.39, "elapsed_time": "0:04:59", "remaining_time": "0:00:47", "throughput": 1580.42, "total_tokens": 473264} |
| {"current_steps": 3115, "total_steps": 3600, "loss": 0.0001, "lr": 2.7248368952908053e-06, "epoch": 17.305555555555557, "percentage": 86.53, "elapsed_time": "0:04:59", "remaining_time": "0:00:46", "throughput": 1580.75, "total_tokens": 474016} |
| {"current_steps": 3120, "total_steps": 3600, "loss": 0.0001, "lr": 2.670073691503902e-06, "epoch": 17.333333333333332, "percentage": 86.67, "elapsed_time": "0:05:00", "remaining_time": "0:00:46", "throughput": 1580.98, "total_tokens": 474736} |
| {"current_steps": 3125, "total_steps": 3600, "loss": 0.0, "lr": 2.6158353388908293e-06, "epoch": 17.36111111111111, "percentage": 86.81, "elapsed_time": "0:05:00", "remaining_time": "0:00:45", "throughput": 1581.34, "total_tokens": 475504} |
| {"current_steps": 3130, "total_steps": 3600, "loss": 0.0001, "lr": 2.5621231122906873e-06, "epoch": 17.38888888888889, "percentage": 86.94, "elapsed_time": "0:05:01", "remaining_time": "0:00:45", "throughput": 1581.7, "total_tokens": 476256} |
| {"current_steps": 3135, "total_steps": 3600, "loss": 0.0001, "lr": 2.5089382741762925e-06, "epoch": 17.416666666666668, "percentage": 87.08, "elapsed_time": "0:05:01", "remaining_time": "0:00:44", "throughput": 1582.0, "total_tokens": 476992} |
| {"current_steps": 3140, "total_steps": 3600, "loss": 0.0001, "lr": 2.4562820746245386e-06, "epoch": 17.444444444444443, "percentage": 87.22, "elapsed_time": "0:05:01", "remaining_time": "0:00:44", "throughput": 1582.31, "total_tokens": 477728} |
| {"current_steps": 3145, "total_steps": 3600, "loss": 0.0, "lr": 2.4041557512869878e-06, "epoch": 17.47222222222222, "percentage": 87.36, "elapsed_time": "0:05:02", "remaining_time": "0:00:43", "throughput": 1582.67, "total_tokens": 478480} |
| {"current_steps": 3150, "total_steps": 3600, "loss": 0.0, "lr": 2.3525605293607784e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:05:02", "remaining_time": "0:00:43", "throughput": 1583.18, "total_tokens": 479280} |
| {"current_steps": 3155, "total_steps": 3600, "loss": 0.0, "lr": 2.3014976215598503e-06, "epoch": 17.52777777777778, "percentage": 87.64, "elapsed_time": "0:05:03", "remaining_time": "0:00:42", "throughput": 1583.54, "total_tokens": 480032} |
| {"current_steps": 3160, "total_steps": 3600, "loss": 0.0, "lr": 2.2509682280864224e-06, "epoch": 17.555555555555557, "percentage": 87.78, "elapsed_time": "0:05:03", "remaining_time": "0:00:42", "throughput": 1583.9, "total_tokens": 480784} |
| {"current_steps": 3165, "total_steps": 3600, "loss": 0.0001, "lr": 2.2009735366027795e-06, "epoch": 17.583333333333332, "percentage": 87.92, "elapsed_time": "0:05:03", "remaining_time": "0:00:41", "throughput": 1584.25, "total_tokens": 481536} |
| {"current_steps": 3170, "total_steps": 3600, "loss": 0.0001, "lr": 2.151514722203385e-06, "epoch": 17.61111111111111, "percentage": 88.06, "elapsed_time": "0:05:04", "remaining_time": "0:00:41", "throughput": 1584.66, "total_tokens": 482304} |
| {"current_steps": 3175, "total_steps": 3600, "loss": 0.0001, "lr": 2.1025929473872274e-06, "epoch": 17.63888888888889, "percentage": 88.19, "elapsed_time": "0:05:04", "remaining_time": "0:00:40", "throughput": 1585.01, "total_tokens": 483056} |
| {"current_steps": 3180, "total_steps": 3600, "loss": 0.0, "lr": 2.0542093620305042e-06, "epoch": 17.666666666666668, "percentage": 88.33, "elapsed_time": "0:05:05", "remaining_time": "0:00:40", "throughput": 1585.42, "total_tokens": 483824} |
| {"current_steps": 3185, "total_steps": 3600, "loss": 0.0001, "lr": 2.0063651033596143e-06, "epoch": 17.694444444444443, "percentage": 88.47, "elapsed_time": "0:05:05", "remaining_time": "0:00:39", "throughput": 1585.69, "total_tokens": 484560} |
| {"current_steps": 3190, "total_steps": 3600, "loss": 0.0, "lr": 1.9590612959244055e-06, "epoch": 17.72222222222222, "percentage": 88.61, "elapsed_time": "0:05:05", "remaining_time": "0:00:39", "throughput": 1586.08, "total_tokens": 485328} |
| {"current_steps": 3195, "total_steps": 3600, "loss": 0.0, "lr": 1.912299051571764e-06, "epoch": 17.75, "percentage": 88.75, "elapsed_time": "0:05:06", "remaining_time": "0:00:38", "throughput": 1586.42, "total_tokens": 486080} |
| {"current_steps": 3200, "total_steps": 3600, "loss": 0.0001, "lr": 1.8660794694194573e-06, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "0:05:06", "remaining_time": "0:00:38", "throughput": 1586.7, "total_tokens": 486832} |
| {"current_steps": 3205, "total_steps": 3600, "loss": 0.0, "lr": 1.8204036358303173e-06, "epoch": 17.805555555555557, "percentage": 89.03, "elapsed_time": "0:05:07", "remaining_time": "0:00:37", "throughput": 1587.08, "total_tokens": 487600} |
| {"current_steps": 3210, "total_steps": 3600, "loss": 0.0001, "lr": 1.775272624386695e-06, "epoch": 17.833333333333332, "percentage": 89.17, "elapsed_time": "0:05:07", "remaining_time": "0:00:37", "throughput": 1587.45, "total_tokens": 488368} |
| {"current_steps": 3215, "total_steps": 3600, "loss": 0.0, "lr": 1.7306874958652408e-06, "epoch": 17.86111111111111, "percentage": 89.31, "elapsed_time": "0:05:08", "remaining_time": "0:00:36", "throughput": 1587.94, "total_tokens": 489168} |
| {"current_steps": 3220, "total_steps": 3600, "loss": 0.0, "lr": 1.686649298211951e-06, "epoch": 17.88888888888889, "percentage": 89.44, "elapsed_time": "0:05:08", "remaining_time": "0:00:36", "throughput": 1588.2, "total_tokens": 489920} |
| {"current_steps": 3225, "total_steps": 3600, "loss": 0.0, "lr": 1.643159066517566e-06, "epoch": 17.916666666666668, "percentage": 89.58, "elapsed_time": "0:05:08", "remaining_time": "0:00:35", "throughput": 1588.58, "total_tokens": 490688} |
| {"current_steps": 3230, "total_steps": 3600, "loss": 0.0, "lr": 1.6002178229932107e-06, "epoch": 17.944444444444443, "percentage": 89.72, "elapsed_time": "0:05:09", "remaining_time": "0:00:35", "throughput": 1589.01, "total_tokens": 491472} |
| {"current_steps": 3235, "total_steps": 3600, "loss": 0.0, "lr": 1.5578265769463806e-06, "epoch": 17.97222222222222, "percentage": 89.86, "elapsed_time": "0:05:09", "remaining_time": "0:00:34", "throughput": 1589.3, "total_tokens": 492192} |
| {"current_steps": 3240, "total_steps": 3600, "loss": 0.0, "lr": 1.5159863247572236e-06, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:05:10", "remaining_time": "0:00:34", "throughput": 1589.48, "total_tokens": 492976} |
| {"current_steps": 3240, "total_steps": 3600, "eval_loss": 1.0990222692489624, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:05:10", "remaining_time": "0:00:34", "throughput": 1585.15, "total_tokens": 492976} |
| {"current_steps": 3245, "total_steps": 3600, "loss": 0.0, "lr": 1.4746980498551112e-06, "epoch": 18.02777777777778, "percentage": 90.14, "elapsed_time": "0:05:12", "remaining_time": "0:00:34", "throughput": 1578.68, "total_tokens": 493728} |
| {"current_steps": 3250, "total_steps": 3600, "loss": 0.0001, "lr": 1.4339627226955392e-06, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "0:05:13", "remaining_time": "0:00:33", "throughput": 1578.95, "total_tokens": 494496} |
| {"current_steps": 3255, "total_steps": 3600, "loss": 0.0, "lr": 1.3937813007373013e-06, "epoch": 18.083333333333332, "percentage": 90.42, "elapsed_time": "0:05:13", "remaining_time": "0:00:33", "throughput": 1579.36, "total_tokens": 495280} |
| {"current_steps": 3260, "total_steps": 3600, "loss": 0.0, "lr": 1.354154728419979e-06, "epoch": 18.11111111111111, "percentage": 90.56, "elapsed_time": "0:05:14", "remaining_time": "0:00:32", "throughput": 1579.71, "total_tokens": 496048} |
| {"current_steps": 3265, "total_steps": 3600, "loss": 0.0, "lr": 1.31508393714177e-06, "epoch": 18.13888888888889, "percentage": 90.69, "elapsed_time": "0:05:14", "remaining_time": "0:00:32", "throughput": 1580.05, "total_tokens": 496800} |
| {"current_steps": 3270, "total_steps": 3600, "loss": 0.0001, "lr": 1.276569845237574e-06, "epoch": 18.166666666666668, "percentage": 90.83, "elapsed_time": "0:05:14", "remaining_time": "0:00:31", "throughput": 1580.44, "total_tokens": 497584} |
| {"current_steps": 3275, "total_steps": 3600, "loss": 0.0, "lr": 1.2386133579574189e-06, "epoch": 18.194444444444443, "percentage": 90.97, "elapsed_time": "0:05:15", "remaining_time": "0:00:31", "throughput": 1580.77, "total_tokens": 498352} |
| {"current_steps": 3280, "total_steps": 3600, "loss": 0.0, "lr": 1.2012153674451715e-06, "epoch": 18.22222222222222, "percentage": 91.11, "elapsed_time": "0:05:15", "remaining_time": "0:00:30", "throughput": 1581.08, "total_tokens": 499104} |
| {"current_steps": 3285, "total_steps": 3600, "loss": 0.0, "lr": 1.1643767527175857e-06, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:05:16", "remaining_time": "0:00:30", "throughput": 1581.39, "total_tokens": 499856} |
| {"current_steps": 3290, "total_steps": 3600, "loss": 0.0, "lr": 1.1280983796436245e-06, "epoch": 18.27777777777778, "percentage": 91.39, "elapsed_time": "0:05:16", "remaining_time": "0:00:29", "throughput": 1581.68, "total_tokens": 500608} |
| {"current_steps": 3295, "total_steps": 3600, "loss": 0.0001, "lr": 1.0923811009241142e-06, "epoch": 18.305555555555557, "percentage": 91.53, "elapsed_time": "0:05:16", "remaining_time": "0:00:29", "throughput": 1581.97, "total_tokens": 501344} |
| {"current_steps": 3300, "total_steps": 3600, "loss": 0.0001, "lr": 1.0572257560717086e-06, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:05:17", "remaining_time": "0:00:28", "throughput": 1582.29, "total_tokens": 502096} |
| {"current_steps": 3305, "total_steps": 3600, "loss": 0.0001, "lr": 1.0226331713911546e-06, "epoch": 18.36111111111111, "percentage": 91.81, "elapsed_time": "0:05:17", "remaining_time": "0:00:28", "throughput": 1582.63, "total_tokens": 502864} |
| {"current_steps": 3310, "total_steps": 3600, "loss": 0.0, "lr": 9.886041599598606e-07, "epoch": 18.38888888888889, "percentage": 91.94, "elapsed_time": "0:05:18", "remaining_time": "0:00:27", "throughput": 1583.07, "total_tokens": 503664} |
| {"current_steps": 3315, "total_steps": 3600, "loss": 0.0, "lr": 9.551395216087944e-07, "epoch": 18.416666666666668, "percentage": 92.08, "elapsed_time": "0:05:18", "remaining_time": "0:00:27", "throughput": 1583.41, "total_tokens": 504432} |
| {"current_steps": 3320, "total_steps": 3600, "loss": 0.0, "lr": 9.222400429036854e-07, "epoch": 18.444444444444443, "percentage": 92.22, "elapsed_time": "0:05:18", "remaining_time": "0:00:26", "throughput": 1583.69, "total_tokens": 505168} |
| {"current_steps": 3325, "total_steps": 3600, "loss": 0.0, "lr": 8.899064971265276e-07, "epoch": 18.47222222222222, "percentage": 92.36, "elapsed_time": "0:05:19", "remaining_time": "0:00:26", "throughput": 1584.12, "total_tokens": 505952} |
| {"current_steps": 3330, "total_steps": 3600, "loss": 0.0, "lr": 8.581396442574135e-07, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:05:19", "remaining_time": "0:00:25", "throughput": 1584.51, "total_tokens": 506720} |
| {"current_steps": 3335, "total_steps": 3600, "loss": 0.0, "lr": 8.269402309566743e-07, "epoch": 18.52777777777778, "percentage": 92.64, "elapsed_time": "0:05:20", "remaining_time": "0:00:25", "throughput": 1584.82, "total_tokens": 507472} |
| {"current_steps": 3340, "total_steps": 3600, "loss": 0.0, "lr": 7.963089905473092e-07, "epoch": 18.555555555555557, "percentage": 92.78, "elapsed_time": "0:05:20", "remaining_time": "0:00:24", "throughput": 1585.16, "total_tokens": 508224} |
| {"current_steps": 3345, "total_steps": 3600, "loss": 0.0001, "lr": 7.662466429977699e-07, "epoch": 18.583333333333332, "percentage": 92.92, "elapsed_time": "0:05:21", "remaining_time": "0:00:24", "throughput": 1585.41, "total_tokens": 508960} |
| {"current_steps": 3350, "total_steps": 3600, "loss": 0.0, "lr": 7.367538949050345e-07, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "0:05:21", "remaining_time": "0:00:23", "throughput": 1585.69, "total_tokens": 509696} |
| {"current_steps": 3355, "total_steps": 3600, "loss": 0.0001, "lr": 7.078314394779961e-07, "epoch": 18.63888888888889, "percentage": 93.19, "elapsed_time": "0:05:21", "remaining_time": "0:00:23", "throughput": 1586.01, "total_tokens": 510448} |
| {"current_steps": 3360, "total_steps": 3600, "loss": 0.0001, "lr": 6.794799565211646e-07, "epoch": 18.666666666666668, "percentage": 93.33, "elapsed_time": "0:05:22", "remaining_time": "0:00:23", "throughput": 1586.33, "total_tokens": 511216} |
| {"current_steps": 3365, "total_steps": 3600, "loss": 0.0001, "lr": 6.517001124186989e-07, "epoch": 18.694444444444443, "percentage": 93.47, "elapsed_time": "0:05:22", "remaining_time": "0:00:22", "throughput": 1586.7, "total_tokens": 511984} |
| {"current_steps": 3370, "total_steps": 3600, "loss": 0.0, "lr": 6.244925601187363e-07, "epoch": 18.72222222222222, "percentage": 93.61, "elapsed_time": "0:05:23", "remaining_time": "0:00:22", "throughput": 1587.18, "total_tokens": 512784} |
| {"current_steps": 3375, "total_steps": 3600, "loss": 0.0001, "lr": 5.978579391180461e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "0:05:23", "remaining_time": "0:00:21", "throughput": 1587.49, "total_tokens": 513536} |
| {"current_steps": 3380, "total_steps": 3600, "loss": 0.0, "lr": 5.717968754469977e-07, "epoch": 18.77777777777778, "percentage": 93.89, "elapsed_time": "0:05:23", "remaining_time": "0:00:21", "throughput": 1587.78, "total_tokens": 514304} |
| {"current_steps": 3385, "total_steps": 3600, "loss": 0.0, "lr": 5.463099816548579e-07, "epoch": 18.805555555555557, "percentage": 94.03, "elapsed_time": "0:05:24", "remaining_time": "0:00:20", "throughput": 1587.96, "total_tokens": 515040} |
| {"current_steps": 3390, "total_steps": 3600, "loss": 0.0001, "lr": 5.213978567953775e-07, "epoch": 18.833333333333332, "percentage": 94.17, "elapsed_time": "0:05:24", "remaining_time": "0:00:20", "throughput": 1588.23, "total_tokens": 515792} |
| {"current_steps": 3395, "total_steps": 3600, "loss": 0.0, "lr": 4.970610864127173e-07, "epoch": 18.86111111111111, "percentage": 94.31, "elapsed_time": "0:05:25", "remaining_time": "0:00:19", "throughput": 1588.53, "total_tokens": 516544} |
| {"current_steps": 3400, "total_steps": 3600, "loss": 0.0, "lr": 4.7330024252768555e-07, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "0:05:25", "remaining_time": "0:00:19", "throughput": 1588.9, "total_tokens": 517328} |
| {"current_steps": 3405, "total_steps": 3600, "loss": 0.0, "lr": 4.5011588362429134e-07, "epoch": 18.916666666666668, "percentage": 94.58, "elapsed_time": "0:05:25", "remaining_time": "0:00:18", "throughput": 1589.31, "total_tokens": 518112} |
| {"current_steps": 3410, "total_steps": 3600, "loss": 0.0, "lr": 4.2750855463662143e-07, "epoch": 18.944444444444443, "percentage": 94.72, "elapsed_time": "0:05:26", "remaining_time": "0:00:18", "throughput": 1589.61, "total_tokens": 518864} |
| {"current_steps": 3415, "total_steps": 3600, "loss": 0.0001, "lr": 4.05478786936031e-07, "epoch": 18.97222222222222, "percentage": 94.86, "elapsed_time": "0:05:26", "remaining_time": "0:00:17", "throughput": 1589.95, "total_tokens": 519616} |
| {"current_steps": 3420, "total_steps": 3600, "loss": 0.0, "lr": 3.8402709831865113e-07, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:05:27", "remaining_time": "0:00:17", "throughput": 1589.97, "total_tokens": 520384} |
| {"current_steps": 3420, "total_steps": 3600, "eval_loss": 1.0897626876831055, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:05:28", "remaining_time": "0:00:17", "throughput": 1585.88, "total_tokens": 520384} |
| {"current_steps": 3425, "total_steps": 3600, "loss": 0.0, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:29", "remaining_time": "0:00:16", "throughput": 1579.91, "total_tokens": 521168} |
| {"current_steps": 3430, "total_steps": 3600, "loss": 0.0, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:30", "remaining_time": "0:00:16", "throughput": 1580.0, "total_tokens": 521888} |
| {"current_steps": 3435, "total_steps": 3600, "loss": 0.0, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:05:30", "remaining_time": "0:00:15", "throughput": 1580.15, "total_tokens": 522608} |
| {"current_steps": 3440, "total_steps": 3600, "loss": 0.0, "lr": 3.040110147984221e-07, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:05:31", "remaining_time": "0:00:15", "throughput": 1580.5, "total_tokens": 523392} |
| {"current_steps": 3445, "total_steps": 3600, "loss": 0.0, "lr": 2.8545701257221e-07, "epoch": 19.13888888888889, "percentage": 95.69, "elapsed_time": "0:05:31", "remaining_time": "0:00:14", "throughput": 1580.82, "total_tokens": 524144} |
| {"current_steps": 3450, "total_steps": 3600, "loss": 0.0001, "lr": 2.674839104671367e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:05:31", "remaining_time": "0:00:14", "throughput": 1581.13, "total_tokens": 524896} |
| {"current_steps": 3455, "total_steps": 3600, "loss": 0.0001, "lr": 2.5009213092991034e-07, "epoch": 19.194444444444443, "percentage": 95.97, "elapsed_time": "0:05:32", "remaining_time": "0:00:13", "throughput": 1581.41, "total_tokens": 525648} |
| {"current_steps": 3460, "total_steps": 3600, "loss": 0.0001, "lr": 2.3328208274359942e-07, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:05:32", "remaining_time": "0:00:13", "throughput": 1581.74, "total_tokens": 526400} |
| {"current_steps": 3465, "total_steps": 3600, "loss": 0.0, "lr": 2.170541610180432e-07, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:05:33", "remaining_time": "0:00:12", "throughput": 1582.05, "total_tokens": 527152} |
| {"current_steps": 3470, "total_steps": 3600, "loss": 0.0, "lr": 2.014087471805509e-07, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:05:33", "remaining_time": "0:00:12", "throughput": 1582.4, "total_tokens": 527920} |
| {"current_steps": 3475, "total_steps": 3600, "loss": 0.0, "lr": 1.8634620896695043e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "0:05:34", "remaining_time": "0:00:12", "throughput": 1582.74, "total_tokens": 528688} |
| {"current_steps": 3480, "total_steps": 3600, "loss": 0.0, "lr": 1.7186690041292586e-07, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:05:34", "remaining_time": "0:00:11", "throughput": 1583.1, "total_tokens": 529456} |
| {"current_steps": 3485, "total_steps": 3600, "loss": 0.0, "lr": 1.5797116184571304e-07, "epoch": 19.36111111111111, "percentage": 96.81, "elapsed_time": "0:05:34", "remaining_time": "0:00:11", "throughput": 1583.38, "total_tokens": 530208} |
| {"current_steps": 3490, "total_steps": 3600, "loss": 0.0001, "lr": 1.4465931987609482e-07, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:05:35", "remaining_time": "0:00:10", "throughput": 1583.72, "total_tokens": 530976} |
| {"current_steps": 3495, "total_steps": 3600, "loss": 0.0, "lr": 1.319316873907267e-07, "epoch": 19.416666666666668, "percentage": 97.08, "elapsed_time": "0:05:35", "remaining_time": "0:00:10", "throughput": 1584.01, "total_tokens": 531728} |
| {"current_steps": 3500, "total_steps": 3600, "loss": 0.0001, "lr": 1.1978856354477595e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:05:36", "remaining_time": "0:00:09", "throughput": 1584.41, "total_tokens": 532512} |
| {"current_steps": 3505, "total_steps": 3600, "loss": 0.0, "lr": 1.0823023375489127e-07, "epoch": 19.47222222222222, "percentage": 97.36, "elapsed_time": "0:05:36", "remaining_time": "0:00:09", "throughput": 1584.64, "total_tokens": 533264} |
| {"current_steps": 3510, "total_steps": 3600, "loss": 0.0, "lr": 9.725696969249965e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:05:36", "remaining_time": "0:00:08", "throughput": 1584.92, "total_tokens": 534016} |
| {"current_steps": 3515, "total_steps": 3600, "loss": 0.0001, "lr": 8.686902927741991e-08, "epoch": 19.52777777777778, "percentage": 97.64, "elapsed_time": "0:05:37", "remaining_time": "0:00:08", "throughput": 1585.21, "total_tokens": 534768} |
| {"current_steps": 3520, "total_steps": 3600, "loss": 0.0, "lr": 7.706665667180091e-08, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:05:37", "remaining_time": "0:00:07", "throughput": 1585.5, "total_tokens": 535520} |
| {"current_steps": 3525, "total_steps": 3600, "loss": 0.0001, "lr": 6.785008227437329e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "0:05:38", "remaining_time": "0:00:07", "throughput": 1585.81, "total_tokens": 536304} |
| {"current_steps": 3530, "total_steps": 3600, "loss": 0.0001, "lr": 5.921952271504827e-08, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:05:38", "remaining_time": "0:00:06", "throughput": 1586.14, "total_tokens": 537088} |
| {"current_steps": 3535, "total_steps": 3600, "loss": 0.0, "lr": 5.117518084981621e-08, "epoch": 19.63888888888889, "percentage": 98.19, "elapsed_time": "0:05:39", "remaining_time": "0:00:06", "throughput": 1586.47, "total_tokens": 537872} |
| {"current_steps": 3540, "total_steps": 3600, "loss": 0.0, "lr": 4.371724575597535e-08, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:05:39", "remaining_time": "0:00:05", "throughput": 1586.82, "total_tokens": 538640} |
| {"current_steps": 3545, "total_steps": 3600, "loss": 0.0, "lr": 3.684589272771044e-08, "epoch": 19.694444444444443, "percentage": 98.47, "elapsed_time": "0:05:39", "remaining_time": "0:00:05", "throughput": 1587.12, "total_tokens": 539392} |
| {"current_steps": 3550, "total_steps": 3600, "loss": 0.0, "lr": 3.056128327193486e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:05:40", "remaining_time": "0:00:04", "throughput": 1587.36, "total_tokens": 540128} |
| {"current_steps": 3555, "total_steps": 3600, "loss": 0.0001, "lr": 2.486356510453258e-08, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:05:40", "remaining_time": "0:00:04", "throughput": 1587.57, "total_tokens": 540864} |
| {"current_steps": 3560, "total_steps": 3600, "loss": 0.0, "lr": 1.975287214685817e-08, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:05:41", "remaining_time": "0:00:03", "throughput": 1587.94, "total_tokens": 541648} |
| {"current_steps": 3565, "total_steps": 3600, "loss": 0.0001, "lr": 1.522932452260595e-08, "epoch": 19.805555555555557, "percentage": 99.03, "elapsed_time": "0:05:41", "remaining_time": "0:00:03", "throughput": 1588.22, "total_tokens": 542400} |
| {"current_steps": 3570, "total_steps": 3600, "loss": 0.0001, "lr": 1.1293028554978935e-08, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:05:41", "remaining_time": "0:00:02", "throughput": 1588.58, "total_tokens": 543168} |
| {"current_steps": 3575, "total_steps": 3600, "loss": 0.0001, "lr": 7.944076764190845e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "0:05:42", "remaining_time": "0:00:02", "throughput": 1588.95, "total_tokens": 543952} |
| {"current_steps": 3580, "total_steps": 3600, "loss": 0.0001, "lr": 5.182547865290044e-09, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:05:42", "remaining_time": "0:00:01", "throughput": 1589.34, "total_tokens": 544736} |
| {"current_steps": 3585, "total_steps": 3600, "loss": 0.0, "lr": 3.008506766313812e-09, "epoch": 19.916666666666668, "percentage": 99.58, "elapsed_time": "0:05:43", "remaining_time": "0:00:01", "throughput": 1589.61, "total_tokens": 545488} |
| {"current_steps": 3590, "total_steps": 3600, "loss": 0.0, "lr": 1.4220045667645566e-09, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:05:43", "remaining_time": "0:00:00", "throughput": 1589.99, "total_tokens": 546272} |
| {"current_steps": 3595, "total_steps": 3600, "loss": 0.0001, "lr": 4.2307855639411865e-10, "epoch": 19.97222222222222, "percentage": 99.86, "elapsed_time": "0:05:43", "remaining_time": "0:00:00", "throughput": 1590.31, "total_tokens": 547024} |
| {"current_steps": 3600, "total_steps": 3600, "loss": 0.0, "lr": 1.1752214348903501e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:44", "remaining_time": "0:00:00", "throughput": 1590.37, "total_tokens": 547792} |
| {"current_steps": 3600, "total_steps": 3600, "eval_loss": 1.1001733541488647, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:45", "remaining_time": "0:00:00", "throughput": 1586.44, "total_tokens": 547792} |
| {"current_steps": 3600, "total_steps": 3600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:46", "remaining_time": "0:00:00", "throughput": 1580.48, "total_tokens": 547792} |
|
|