| {"current_steps": 5, "total_steps": 900, "loss": 0.5525, "lr": 2.2222222222222225e-06, "epoch": 0.05555555555555555, "percentage": 0.56, "elapsed_time": "0:00:01", "remaining_time": "0:04:25", "throughput": 1036.05, "total_tokens": 1536} | |
| {"current_steps": 10, "total_steps": 900, "loss": 0.6403, "lr": 5e-06, "epoch": 0.1111111111111111, "percentage": 1.11, "elapsed_time": "0:00:02", "remaining_time": "0:03:37", "throughput": 1296.6, "total_tokens": 3168} | |
| {"current_steps": 15, "total_steps": 900, "loss": 0.6012, "lr": 7.777777777777777e-06, "epoch": 0.16666666666666666, "percentage": 1.67, "elapsed_time": "0:00:03", "remaining_time": "0:03:20", "throughput": 1392.12, "total_tokens": 4736} | |
| {"current_steps": 20, "total_steps": 900, "loss": 0.4236, "lr": 1.0555555555555555e-05, "epoch": 0.2222222222222222, "percentage": 2.22, "elapsed_time": "0:00:04", "remaining_time": "0:03:11", "throughput": 1445.99, "total_tokens": 6304} | |
| {"current_steps": 25, "total_steps": 900, "loss": 0.147, "lr": 1.3333333333333333e-05, "epoch": 0.2777777777777778, "percentage": 2.78, "elapsed_time": "0:00:05", "remaining_time": "0:03:06", "throughput": 1471.82, "total_tokens": 7840} | |
| {"current_steps": 30, "total_steps": 900, "loss": 0.0189, "lr": 1.6111111111111115e-05, "epoch": 0.3333333333333333, "percentage": 3.33, "elapsed_time": "0:00:06", "remaining_time": "0:03:02", "throughput": 1493.81, "total_tokens": 9408} | |
| {"current_steps": 35, "total_steps": 900, "loss": 0.1336, "lr": 1.888888888888889e-05, "epoch": 0.3888888888888889, "percentage": 3.89, "elapsed_time": "0:00:07", "remaining_time": "0:02:59", "throughput": 1503.3, "total_tokens": 10912} | |
| {"current_steps": 40, "total_steps": 900, "loss": 0.0597, "lr": 2.1666666666666667e-05, "epoch": 0.4444444444444444, "percentage": 4.44, "elapsed_time": "0:00:08", "remaining_time": "0:02:56", "throughput": 1515.51, "total_tokens": 12448} | |
| {"current_steps": 45, "total_steps": 900, "loss": 0.0489, "lr": 2.4444444444444445e-05, "epoch": 0.5, "percentage": 5.0, "elapsed_time": "0:00:09", "remaining_time": "0:02:54", "throughput": 1528.37, "total_tokens": 14016} | |
| {"current_steps": 45, "total_steps": 900, "eval_loss": 0.15566711127758026, "epoch": 0.5, "percentage": 5.0, "elapsed_time": "0:00:09", "remaining_time": "0:03:06", "throughput": 1430.39, "total_tokens": 14016} | |
| {"current_steps": 50, "total_steps": 900, "loss": 0.1599, "lr": 2.7222222222222223e-05, "epoch": 0.5555555555555556, "percentage": 5.56, "elapsed_time": "0:00:12", "remaining_time": "0:03:24", "throughput": 1298.08, "total_tokens": 15584} | |
| {"current_steps": 55, "total_steps": 900, "loss": 0.1532, "lr": 3e-05, "epoch": 0.6111111111111112, "percentage": 6.11, "elapsed_time": "0:00:12", "remaining_time": "0:03:19", "throughput": 1322.05, "total_tokens": 17184} | |
| {"current_steps": 60, "total_steps": 900, "loss": 0.0488, "lr": 3.277777777777778e-05, "epoch": 0.6666666666666666, "percentage": 6.67, "elapsed_time": "0:00:13", "remaining_time": "0:03:15", "throughput": 1339.6, "total_tokens": 18752} | |
| {"current_steps": 65, "total_steps": 900, "loss": 0.1306, "lr": 3.555555555555556e-05, "epoch": 0.7222222222222222, "percentage": 7.22, "elapsed_time": "0:00:14", "remaining_time": "0:03:12", "throughput": 1357.45, "total_tokens": 20352} | |
| {"current_steps": 70, "total_steps": 900, "loss": 0.0936, "lr": 3.8333333333333334e-05, "epoch": 0.7777777777777778, "percentage": 7.78, "elapsed_time": "0:00:15", "remaining_time": "0:03:09", "throughput": 1374.1, "total_tokens": 21952} | |
| {"current_steps": 75, "total_steps": 900, "loss": 0.0789, "lr": 4.111111111111111e-05, "epoch": 0.8333333333333334, "percentage": 8.33, "elapsed_time": "0:00:16", "remaining_time": "0:03:06", "throughput": 1384.93, "total_tokens": 23456} | |
| {"current_steps": 80, "total_steps": 900, "loss": 0.1988, "lr": 4.388888888888889e-05, "epoch": 0.8888888888888888, "percentage": 8.89, "elapsed_time": "0:00:17", "remaining_time": "0:03:03", "throughput": 1399.19, "total_tokens": 25056} | |
| {"current_steps": 85, "total_steps": 900, "loss": 0.0413, "lr": 4.666666666666667e-05, "epoch": 0.9444444444444444, "percentage": 9.44, "elapsed_time": "0:00:18", "remaining_time": "0:03:00", "throughput": 1408.61, "total_tokens": 26560} | |
| {"current_steps": 90, "total_steps": 900, "loss": 0.2106, "lr": 4.9444444444444446e-05, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:00:19", "remaining_time": "0:02:58", "throughput": 1414.29, "total_tokens": 28096} | |
| {"current_steps": 90, "total_steps": 900, "eval_loss": 0.08737044036388397, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:00:20", "remaining_time": "0:03:04", "throughput": 1370.95, "total_tokens": 28096} | |
| {"current_steps": 95, "total_steps": 900, "loss": 0.1241, "lr": 4.9996991493233693e-05, "epoch": 1.0555555555555556, "percentage": 10.56, "elapsed_time": "0:00:22", "remaining_time": "0:03:10", "throughput": 1319.53, "total_tokens": 29696} | |
| {"current_steps": 100, "total_steps": 900, "loss": 0.0412, "lr": 4.99847706754774e-05, "epoch": 1.1111111111111112, "percentage": 11.11, "elapsed_time": "0:00:23", "remaining_time": "0:03:07", "throughput": 1330.68, "total_tokens": 31232} | |
| {"current_steps": 105, "total_steps": 900, "loss": 0.025, "lr": 4.9963154107272295e-05, "epoch": 1.1666666666666667, "percentage": 11.67, "elapsed_time": "0:00:24", "remaining_time": "0:03:04", "throughput": 1341.45, "total_tokens": 32768} | |
| {"current_steps": 110, "total_steps": 900, "loss": 0.0024, "lr": 4.993214991772563e-05, "epoch": 1.2222222222222223, "percentage": 12.22, "elapsed_time": "0:00:25", "remaining_time": "0:03:02", "throughput": 1351.15, "total_tokens": 34304} | |
| {"current_steps": 115, "total_steps": 900, "loss": 0.0087, "lr": 4.989176976624511e-05, "epoch": 1.2777777777777777, "percentage": 12.78, "elapsed_time": "0:00:26", "remaining_time": "0:02:59", "throughput": 1361.44, "total_tokens": 35872} | |
| {"current_steps": 120, "total_steps": 900, "loss": 0.029, "lr": 4.9842028838154285e-05, "epoch": 1.3333333333333333, "percentage": 13.33, "elapsed_time": "0:00:27", "remaining_time": "0:02:57", "throughput": 1369.82, "total_tokens": 37408} | |
| {"current_steps": 125, "total_steps": 900, "loss": 0.064, "lr": 4.978294583898196e-05, "epoch": 1.3888888888888888, "percentage": 13.89, "elapsed_time": "0:00:28", "remaining_time": "0:02:55", "throughput": 1378.81, "total_tokens": 38976} | |
| {"current_steps": 130, "total_steps": 900, "loss": 0.0551, "lr": 4.971454298742779e-05, "epoch": 1.4444444444444444, "percentage": 14.44, "elapsed_time": "0:00:29", "remaining_time": "0:02:53", "throughput": 1387.94, "total_tokens": 40576} | |
| {"current_steps": 135, "total_steps": 900, "loss": 0.0645, "lr": 4.963684600700679e-05, "epoch": 1.5, "percentage": 15.0, "elapsed_time": "0:00:30", "remaining_time": "0:02:51", "throughput": 1395.19, "total_tokens": 42144} | |
| {"current_steps": 135, "total_steps": 900, "eval_loss": 0.10639457404613495, "epoch": 1.5, "percentage": 15.0, "elapsed_time": "0:00:30", "remaining_time": "0:02:54", "throughput": 1365.92, "total_tokens": 42144} | |
| {"current_steps": 140, "total_steps": 900, "loss": 0.013, "lr": 4.9549884116375714e-05, "epoch": 1.5555555555555556, "percentage": 15.56, "elapsed_time": "0:00:33", "remaining_time": "0:02:59", "throughput": 1319.91, "total_tokens": 43680} | |
| {"current_steps": 145, "total_steps": 900, "loss": 0.1366, "lr": 4.9453690018345144e-05, "epoch": 1.6111111111111112, "percentage": 16.11, "elapsed_time": "0:00:34", "remaining_time": "0:02:57", "throughput": 1328.24, "total_tokens": 45248} | |
| {"current_steps": 150, "total_steps": 900, "loss": 0.0008, "lr": 4.934829988758131e-05, "epoch": 1.6666666666666665, "percentage": 16.67, "elapsed_time": "0:00:35", "remaining_time": "0:02:55", "throughput": 1335.96, "total_tokens": 46816} | |
| {"current_steps": 155, "total_steps": 900, "loss": 0.1196, "lr": 4.923375335700223e-05, "epoch": 1.7222222222222223, "percentage": 17.22, "elapsed_time": "0:00:36", "remaining_time": "0:02:53", "throughput": 1343.46, "total_tokens": 48384} | |
| {"current_steps": 160, "total_steps": 900, "loss": 0.0026, "lr": 4.9110093502873476e-05, "epoch": 1.7777777777777777, "percentage": 17.78, "elapsed_time": "0:00:37", "remaining_time": "0:02:51", "throughput": 1349.95, "total_tokens": 49952} | |
| {"current_steps": 165, "total_steps": 900, "loss": 0.0112, "lr": 4.897736682860885e-05, "epoch": 1.8333333333333335, "percentage": 18.33, "elapsed_time": "0:00:37", "remaining_time": "0:02:49", "throughput": 1356.3, "total_tokens": 51520} | |
| {"current_steps": 170, "total_steps": 900, "loss": 0.0257, "lr": 4.883562324728241e-05, "epoch": 1.8888888888888888, "percentage": 18.89, "elapsed_time": "0:00:38", "remaining_time": "0:02:47", "throughput": 1361.0, "total_tokens": 53024} | |
| {"current_steps": 175, "total_steps": 900, "loss": 0.0341, "lr": 4.868491606285823e-05, "epoch": 1.9444444444444444, "percentage": 19.44, "elapsed_time": "0:00:39", "remaining_time": "0:02:45", "throughput": 1368.01, "total_tokens": 54592} | |
| {"current_steps": 180, "total_steps": 900, "loss": 0.1351, "lr": 4.8525301950144894e-05, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:00:40", "remaining_time": "0:02:43", "throughput": 1371.57, "total_tokens": 56128} | |
| {"current_steps": 180, "total_steps": 900, "eval_loss": 0.04244411736726761, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:00:41", "remaining_time": "0:02:46", "throughput": 1350.28, "total_tokens": 56128} | |
| {"current_steps": 185, "total_steps": 900, "loss": 0.0006, "lr": 4.835684093348244e-05, "epoch": 2.0555555555555554, "percentage": 20.56, "elapsed_time": "0:00:43", "remaining_time": "0:02:49", "throughput": 1312.15, "total_tokens": 57696} | |
| {"current_steps": 190, "total_steps": 900, "loss": 0.0004, "lr": 4.817959636416969e-05, "epoch": 2.111111111111111, "percentage": 21.11, "elapsed_time": "0:00:44", "remaining_time": "0:02:47", "throughput": 1318.57, "total_tokens": 59264} | |
| {"current_steps": 195, "total_steps": 900, "loss": 0.0005, "lr": 4.7993634896640394e-05, "epoch": 2.1666666666666665, "percentage": 21.67, "elapsed_time": "0:00:45", "remaining_time": "0:02:46", "throughput": 1324.82, "total_tokens": 60864} | |
| {"current_steps": 200, "total_steps": 900, "loss": 0.0011, "lr": 4.779902646339722e-05, "epoch": 2.2222222222222223, "percentage": 22.22, "elapsed_time": "0:00:46", "remaining_time": "0:02:44", "throughput": 1331.04, "total_tokens": 62464} | |
| {"current_steps": 205, "total_steps": 900, "loss": 0.0133, "lr": 4.759584424871302e-05, "epoch": 2.2777777777777777, "percentage": 22.78, "elapsed_time": "0:00:47", "remaining_time": "0:02:42", "throughput": 1336.81, "total_tokens": 64032} | |
| {"current_steps": 210, "total_steps": 900, "loss": 0.0005, "lr": 4.7384164661109176e-05, "epoch": 2.3333333333333335, "percentage": 23.33, "elapsed_time": "0:00:48", "remaining_time": "0:02:40", "throughput": 1341.9, "total_tokens": 65568} | |
| {"current_steps": 215, "total_steps": 900, "loss": 0.0009, "lr": 4.7164067304621536e-05, "epoch": 2.388888888888889, "percentage": 23.89, "elapsed_time": "0:00:49", "remaining_time": "0:02:38", "throughput": 1346.91, "total_tokens": 67104} | |
| {"current_steps": 220, "total_steps": 900, "loss": 0.0001, "lr": 4.693563494886455e-05, "epoch": 2.4444444444444446, "percentage": 24.44, "elapsed_time": "0:00:50", "remaining_time": "0:02:36", "throughput": 1352.95, "total_tokens": 68704} | |
| {"current_steps": 225, "total_steps": 900, "loss": 0.0005, "lr": 4.669895349790502e-05, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "0:00:51", "remaining_time": "0:02:35", "throughput": 1358.05, "total_tokens": 70272} | |
| {"current_steps": 225, "total_steps": 900, "eval_loss": 0.060432981699705124, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "0:00:52", "remaining_time": "0:02:37", "throughput": 1341.38, "total_tokens": 70272} | |
| {"current_steps": 230, "total_steps": 900, "loss": 0.0001, "lr": 4.645411195795709e-05, "epoch": 2.5555555555555554, "percentage": 25.56, "elapsed_time": "0:00:54", "remaining_time": "0:02:39", "throughput": 1313.71, "total_tokens": 71808} | |
| {"current_steps": 235, "total_steps": 900, "loss": 0.0398, "lr": 4.620120240391065e-05, "epoch": 2.611111111111111, "percentage": 26.11, "elapsed_time": "0:00:55", "remaining_time": "0:02:37", "throughput": 1319.38, "total_tokens": 73408} | |
| {"current_steps": 240, "total_steps": 900, "loss": 0.0031, "lr": 4.5940319944705736e-05, "epoch": 2.6666666666666665, "percentage": 26.67, "elapsed_time": "0:00:56", "remaining_time": "0:02:35", "throughput": 1322.91, "total_tokens": 74912} | |
| {"current_steps": 245, "total_steps": 900, "loss": 0.0007, "lr": 4.567156268756594e-05, "epoch": 2.7222222222222223, "percentage": 27.22, "elapsed_time": "0:00:57", "remaining_time": "0:02:34", "throughput": 1328.37, "total_tokens": 76544} | |
| {"current_steps": 250, "total_steps": 900, "loss": 0.0001, "lr": 4.539503170110431e-05, "epoch": 2.7777777777777777, "percentage": 27.78, "elapsed_time": "0:00:58", "remaining_time": "0:02:32", "throughput": 1332.91, "total_tokens": 78112} | |
| {"current_steps": 255, "total_steps": 900, "loss": 0.0, "lr": 4.5110830977315556e-05, "epoch": 2.8333333333333335, "percentage": 28.33, "elapsed_time": "0:00:59", "remaining_time": "0:02:30", "throughput": 1338.24, "total_tokens": 79712} | |
| {"current_steps": 260, "total_steps": 900, "loss": 0.0001, "lr": 4.4819067392468944e-05, "epoch": 2.888888888888889, "percentage": 28.89, "elapsed_time": "0:01:00", "remaining_time": "0:02:29", "throughput": 1342.67, "total_tokens": 81280} | |
| {"current_steps": 265, "total_steps": 900, "loss": 0.047, "lr": 4.4519850666916484e-05, "epoch": 2.9444444444444446, "percentage": 29.44, "elapsed_time": "0:01:01", "remaining_time": "0:02:27", "throughput": 1347.37, "total_tokens": 82848} | |
| {"current_steps": 270, "total_steps": 900, "loss": 0.0005, "lr": 4.4213293323831585e-05, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:01:02", "remaining_time": "0:02:25", "throughput": 1349.24, "total_tokens": 84352} | |
| {"current_steps": 270, "total_steps": 900, "eval_loss": 0.047240711748600006, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:01:03", "remaining_time": "0:02:27", "throughput": 1335.36, "total_tokens": 84352} | |
| {"current_steps": 275, "total_steps": 900, "loss": 0.0, "lr": 4.38995106468937e-05, "epoch": 3.0555555555555554, "percentage": 30.56, "elapsed_time": "0:01:05", "remaining_time": "0:02:29", "throughput": 1310.53, "total_tokens": 85920} | |
| {"current_steps": 280, "total_steps": 900, "loss": 0.0, "lr": 4.357862063693486e-05, "epoch": 3.111111111111111, "percentage": 31.11, "elapsed_time": "0:01:06", "remaining_time": "0:02:27", "throughput": 1314.61, "total_tokens": 87520} | |
| {"current_steps": 285, "total_steps": 900, "loss": 0.0, "lr": 4.325074396756437e-05, "epoch": 3.1666666666666665, "percentage": 31.67, "elapsed_time": "0:01:07", "remaining_time": "0:02:25", "throughput": 1318.69, "total_tokens": 89088} | |
| {"current_steps": 290, "total_steps": 900, "loss": 0.0, "lr": 4.2916003939788403e-05, "epoch": 3.2222222222222223, "percentage": 32.22, "elapsed_time": "0:01:08", "remaining_time": "0:02:24", "throughput": 1323.53, "total_tokens": 90688} | |
| {"current_steps": 295, "total_steps": 900, "loss": 0.0, "lr": 4.257452643564155e-05, "epoch": 3.2777777777777777, "percentage": 32.78, "elapsed_time": "0:01:09", "remaining_time": "0:02:22", "throughput": 1326.34, "total_tokens": 92160} | |
| {"current_steps": 300, "total_steps": 900, "loss": 0.0001, "lr": 4.22264398708477e-05, "epoch": 3.3333333333333335, "percentage": 33.33, "elapsed_time": "0:01:10", "remaining_time": "0:02:20", "throughput": 1330.89, "total_tokens": 93760} | |
| {"current_steps": 305, "total_steps": 900, "loss": 0.0, "lr": 4.1871875146528195e-05, "epoch": 3.388888888888889, "percentage": 33.89, "elapsed_time": "0:01:11", "remaining_time": "0:02:19", "throughput": 1335.4, "total_tokens": 95360} | |
| {"current_steps": 310, "total_steps": 900, "loss": 0.0, "lr": 4.1510965599975196e-05, "epoch": 3.4444444444444446, "percentage": 34.44, "elapsed_time": "0:01:12", "remaining_time": "0:02:17", "throughput": 1339.33, "total_tokens": 96928} | |
| {"current_steps": 315, "total_steps": 900, "loss": 0.0, "lr": 4.114384695450906e-05, "epoch": 3.5, "percentage": 35.0, "elapsed_time": "0:01:13", "remaining_time": "0:02:16", "throughput": 1342.5, "total_tokens": 98464} | |
| {"current_steps": 315, "total_steps": 900, "eval_loss": 0.04881405085325241, "epoch": 3.5, "percentage": 35.0, "elapsed_time": "0:01:13", "remaining_time": "0:02:17", "throughput": 1330.63, "total_tokens": 98464} | |
| {"current_steps": 320, "total_steps": 900, "loss": 0.0, "lr": 4.077065726843828e-05, "epoch": 3.5555555555555554, "percentage": 35.56, "elapsed_time": "0:01:16", "remaining_time": "0:02:18", "throughput": 1311.65, "total_tokens": 100064} | |
| {"current_steps": 325, "total_steps": 900, "loss": 0.0, "lr": 4.039153688314145e-05, "epoch": 3.611111111111111, "percentage": 36.11, "elapsed_time": "0:01:17", "remaining_time": "0:02:16", "throughput": 1314.51, "total_tokens": 101600} | |
| {"current_steps": 330, "total_steps": 900, "loss": 0.0, "lr": 4.000662837029062e-05, "epoch": 3.6666666666666665, "percentage": 36.67, "elapsed_time": "0:01:18", "remaining_time": "0:02:15", "throughput": 1318.18, "total_tokens": 103200} | |
| {"current_steps": 335, "total_steps": 900, "loss": 0.0, "lr": 3.961607647823583e-05, "epoch": 3.7222222222222223, "percentage": 37.22, "elapsed_time": "0:01:19", "remaining_time": "0:02:13", "throughput": 1321.77, "total_tokens": 104768} | |
| {"current_steps": 340, "total_steps": 900, "loss": 0.0, "lr": 3.9220028077571295e-05, "epoch": 3.7777777777777777, "percentage": 37.78, "elapsed_time": "0:01:20", "remaining_time": "0:02:12", "throughput": 1325.06, "total_tokens": 106304} | |
| {"current_steps": 345, "total_steps": 900, "loss": 0.0, "lr": 3.881863210590332e-05, "epoch": 3.8333333333333335, "percentage": 38.33, "elapsed_time": "0:01:21", "remaining_time": "0:02:10", "throughput": 1328.94, "total_tokens": 107904} | |
| {"current_steps": 350, "total_steps": 900, "loss": 0.0, "lr": 3.841203951184095e-05, "epoch": 3.888888888888889, "percentage": 38.89, "elapsed_time": "0:01:22", "remaining_time": "0:02:09", "throughput": 1331.66, "total_tokens": 109408} | |
| {"current_steps": 355, "total_steps": 900, "loss": 0.0, "lr": 3.8000403198230387e-05, "epoch": 3.9444444444444446, "percentage": 39.44, "elapsed_time": "0:01:23", "remaining_time": "0:02:07", "throughput": 1335.63, "total_tokens": 111008} | |
| {"current_steps": 360, "total_steps": 900, "loss": 0.0, "lr": 3.75838779646545e-05, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:01:24", "remaining_time": "0:02:06", "throughput": 1337.83, "total_tokens": 112576} | |
| {"current_steps": 360, "total_steps": 900, "eval_loss": 0.05044294521212578, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:01:24", "remaining_time": "0:02:07", "throughput": 1327.57, "total_tokens": 112576} | |
| {"current_steps": 365, "total_steps": 900, "loss": 0.0, "lr": 3.7162620449219e-05, "epoch": 4.055555555555555, "percentage": 40.56, "elapsed_time": "0:01:27", "remaining_time": "0:02:08", "throughput": 1306.27, "total_tokens": 114144} | |
| {"current_steps": 370, "total_steps": 900, "loss": 0.0, "lr": 3.673678906964727e-05, "epoch": 4.111111111111111, "percentage": 41.11, "elapsed_time": "0:01:28", "remaining_time": "0:02:06", "throughput": 1309.11, "total_tokens": 115712} | |
| {"current_steps": 375, "total_steps": 900, "loss": 0.0, "lr": 3.630654396370594e-05, "epoch": 4.166666666666667, "percentage": 41.67, "elapsed_time": "0:01:29", "remaining_time": "0:02:05", "throughput": 1311.33, "total_tokens": 117216} | |
| {"current_steps": 380, "total_steps": 900, "loss": 0.0, "lr": 3.5872046928983626e-05, "epoch": 4.222222222222222, "percentage": 42.22, "elapsed_time": "0:01:30", "remaining_time": "0:02:03", "throughput": 1314.82, "total_tokens": 118816} | |
| {"current_steps": 385, "total_steps": 900, "loss": 0.0, "lr": 3.543346136204545e-05, "epoch": 4.277777777777778, "percentage": 42.78, "elapsed_time": "0:01:31", "remaining_time": "0:02:02", "throughput": 1317.73, "total_tokens": 120352} | |
| {"current_steps": 390, "total_steps": 900, "loss": 0.0, "lr": 3.499095219698631e-05, "epoch": 4.333333333333333, "percentage": 43.33, "elapsed_time": "0:01:32", "remaining_time": "0:02:00", "throughput": 1320.98, "total_tokens": 121920} | |
| {"current_steps": 395, "total_steps": 900, "loss": 0.0, "lr": 3.454468584340588e-05, "epoch": 4.388888888888889, "percentage": 43.89, "elapsed_time": "0:01:33", "remaining_time": "0:01:59", "throughput": 1323.77, "total_tokens": 123456} | |
| {"current_steps": 400, "total_steps": 900, "loss": 0.0, "lr": 3.409483012382879e-05, "epoch": 4.444444444444445, "percentage": 44.44, "elapsed_time": "0:01:34", "remaining_time": "0:01:57", "throughput": 1327.0, "total_tokens": 125056} | |
| {"current_steps": 405, "total_steps": 900, "loss": 0.0, "lr": 3.364155421059342e-05, "epoch": 4.5, "percentage": 45.0, "elapsed_time": "0:01:35", "remaining_time": "0:01:56", "throughput": 1329.77, "total_tokens": 126624} | |
| {"current_steps": 405, "total_steps": 900, "eval_loss": 0.05202624946832657, "epoch": 4.5, "percentage": 45.0, "elapsed_time": "0:01:35", "remaining_time": "0:01:57", "throughput": 1320.8, "total_tokens": 126624} | |
| {"current_steps": 410, "total_steps": 900, "loss": 0.0, "lr": 3.318502856223311e-05, "epoch": 4.555555555555555, "percentage": 45.56, "elapsed_time": "0:01:38", "remaining_time": "0:01:57", "throughput": 1306.52, "total_tokens": 128224} | |
| {"current_steps": 415, "total_steps": 900, "loss": 0.0, "lr": 3.272542485937369e-05, "epoch": 4.611111111111111, "percentage": 46.11, "elapsed_time": "0:01:39", "remaining_time": "0:01:55", "throughput": 1308.19, "total_tokens": 129728} | |
| {"current_steps": 420, "total_steps": 900, "loss": 0.0, "lr": 3.2262915940171376e-05, "epoch": 4.666666666666667, "percentage": 46.67, "elapsed_time": "0:01:40", "remaining_time": "0:01:54", "throughput": 1310.84, "total_tokens": 131328} | |
| {"current_steps": 425, "total_steps": 900, "loss": 0.0, "lr": 3.1797675735315455e-05, "epoch": 4.722222222222222, "percentage": 47.22, "elapsed_time": "0:01:41", "remaining_time": "0:01:53", "throughput": 1313.53, "total_tokens": 132896} | |
| {"current_steps": 430, "total_steps": 900, "loss": 0.0, "lr": 3.132987920262005e-05, "epoch": 4.777777777777778, "percentage": 47.78, "elapsed_time": "0:01:42", "remaining_time": "0:01:51", "throughput": 1316.54, "total_tokens": 134496} | |
| {"current_steps": 435, "total_steps": 900, "loss": 0.0, "lr": 3.085970226122962e-05, "epoch": 4.833333333333333, "percentage": 48.33, "elapsed_time": "0:01:43", "remaining_time": "0:01:50", "throughput": 1319.18, "total_tokens": 136064} | |
| {"current_steps": 440, "total_steps": 900, "loss": 0.0, "lr": 3.0387321725463e-05, "epoch": 4.888888888888889, "percentage": 48.89, "elapsed_time": "0:01:44", "remaining_time": "0:01:48", "throughput": 1322.09, "total_tokens": 137664} | |
| {"current_steps": 445, "total_steps": 900, "loss": 0.0, "lr": 2.9912915238320754e-05, "epoch": 4.944444444444445, "percentage": 49.44, "elapsed_time": "0:01:45", "remaining_time": "0:01:47", "throughput": 1324.75, "total_tokens": 139232} | |
| {"current_steps": 450, "total_steps": 900, "loss": 0.0, "lr": 2.9436661204680882e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:01:46", "remaining_time": "0:01:46", "throughput": 1326.68, "total_tokens": 140832} | |
| {"current_steps": 450, "total_steps": 900, "eval_loss": 0.050029732286930084, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:01:46", "remaining_time": "0:01:46", "throughput": 1318.67, "total_tokens": 140832} | |
| {"current_steps": 455, "total_steps": 900, "loss": 0.0, "lr": 2.8958738724208072e-05, "epoch": 5.055555555555555, "percentage": 50.56, "elapsed_time": "0:01:49", "remaining_time": "0:01:46", "throughput": 1302.58, "total_tokens": 142368} | |
| {"current_steps": 460, "total_steps": 900, "loss": 0.0, "lr": 2.8479327524001636e-05, "epoch": 5.111111111111111, "percentage": 51.11, "elapsed_time": "0:01:50", "remaining_time": "0:01:45", "throughput": 1305.35, "total_tokens": 144000} | |
| {"current_steps": 465, "total_steps": 900, "loss": 0.0, "lr": 2.7998607891007495e-05, "epoch": 5.166666666666667, "percentage": 51.67, "elapsed_time": "0:01:51", "remaining_time": "0:01:44", "throughput": 1308.52, "total_tokens": 145632} | |
| {"current_steps": 470, "total_steps": 900, "loss": 0.0, "lr": 2.7516760604219617e-05, "epoch": 5.222222222222222, "percentage": 52.22, "elapsed_time": "0:01:52", "remaining_time": "0:01:42", "throughput": 1310.86, "total_tokens": 147168} | |
| {"current_steps": 475, "total_steps": 900, "loss": 0.0, "lr": 2.7033966866696457e-05, "epoch": 5.277777777777778, "percentage": 52.78, "elapsed_time": "0:01:53", "remaining_time": "0:01:41", "throughput": 1313.52, "total_tokens": 148736} | |
| {"current_steps": 480, "total_steps": 900, "loss": 0.0, "lr": 2.6550408237417885e-05, "epoch": 5.333333333333333, "percentage": 53.33, "elapsed_time": "0:01:54", "remaining_time": "0:01:39", "throughput": 1315.92, "total_tokens": 150304} | |
| {"current_steps": 485, "total_steps": 900, "loss": 0.0, "lr": 2.6066266563008267e-05, "epoch": 5.388888888888889, "percentage": 53.89, "elapsed_time": "0:01:55", "remaining_time": "0:01:38", "throughput": 1318.55, "total_tokens": 151872} | |
| {"current_steps": 490, "total_steps": 900, "loss": 0.0, "lr": 2.5581723909351406e-05, "epoch": 5.444444444444445, "percentage": 54.44, "elapsed_time": "0:01:56", "remaining_time": "0:01:37", "throughput": 1321.42, "total_tokens": 153472} | |
| {"current_steps": 495, "total_steps": 900, "loss": 0.0, "lr": 2.5096962493123012e-05, "epoch": 5.5, "percentage": 55.0, "elapsed_time": "0:01:57", "remaining_time": "0:01:35", "throughput": 1323.44, "total_tokens": 154976} | |
| {"current_steps": 495, "total_steps": 900, "eval_loss": 0.04972325265407562, "epoch": 5.5, "percentage": 55.0, "elapsed_time": "0:01:57", "remaining_time": "0:01:36", "throughput": 1316.25, "total_tokens": 154976} | |
| {"current_steps": 500, "total_steps": 900, "loss": 0.0, "lr": 2.461216461326642e-05, "epoch": 5.555555555555555, "percentage": 55.56, "elapsed_time": "0:02:00", "remaining_time": "0:01:36", "throughput": 1302.62, "total_tokens": 156544} | |
| {"current_steps": 505, "total_steps": 900, "loss": 0.0, "lr": 2.4127512582437485e-05, "epoch": 5.611111111111111, "percentage": 56.11, "elapsed_time": "0:02:01", "remaining_time": "0:01:34", "throughput": 1304.75, "total_tokens": 158112} | |
| {"current_steps": 510, "total_steps": 900, "loss": 0.0, "lr": 2.364318865844416e-05, "epoch": 5.666666666666667, "percentage": 56.67, "elapsed_time": "0:02:02", "remaining_time": "0:01:33", "throughput": 1306.86, "total_tokens": 159680} | |
| {"current_steps": 515, "total_steps": 900, "loss": 0.0, "lr": 2.3159374975706884e-05, "epoch": 5.722222222222222, "percentage": 57.22, "elapsed_time": "0:02:03", "remaining_time": "0:01:32", "throughput": 1309.84, "total_tokens": 161312} | |
| {"current_steps": 520, "total_steps": 900, "loss": 0.0, "lr": 2.2676253476765196e-05, "epoch": 5.777777777777778, "percentage": 57.78, "elapsed_time": "0:02:04", "remaining_time": "0:01:30", "throughput": 1312.35, "total_tokens": 162880} | |
| {"current_steps": 525, "total_steps": 900, "loss": 0.0, "lr": 2.2194005843856636e-05, "epoch": 5.833333333333333, "percentage": 58.33, "elapsed_time": "0:02:05", "remaining_time": "0:01:29", "throughput": 1314.82, "total_tokens": 164448} | |
| {"current_steps": 530, "total_steps": 900, "loss": 0.0, "lr": 2.1712813430593436e-05, "epoch": 5.888888888888889, "percentage": 58.89, "elapsed_time": "0:02:06", "remaining_time": "0:01:27", "throughput": 1317.25, "total_tokens": 166016} | |
| {"current_steps": 535, "total_steps": 900, "loss": 0.0, "lr": 2.1232857193762924e-05, "epoch": 5.944444444444445, "percentage": 59.44, "elapsed_time": "0:02:06", "remaining_time": "0:01:26", "throughput": 1319.5, "total_tokens": 167552} | |
| {"current_steps": 540, "total_steps": 900, "loss": 0.0, "lr": 2.0754317625276983e-05, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:02:08", "remaining_time": "0:01:25", "throughput": 1320.65, "total_tokens": 169056} | |
| {"current_steps": 540, "total_steps": 900, "eval_loss": 0.050714749842882156, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:02:08", "remaining_time": "0:01:25", "throughput": 1314.07, "total_tokens": 169056} | |
| {"current_steps": 545, "total_steps": 900, "loss": 0.0, "lr": 2.02773746842965e-05, "epoch": 6.055555555555555, "percentage": 60.56, "elapsed_time": "0:02:11", "remaining_time": "0:01:25", "throughput": 1301.81, "total_tokens": 170592} | |
| {"current_steps": 550, "total_steps": 900, "loss": 0.0, "lr": 1.980220772955602e-05, "epoch": 6.111111111111111, "percentage": 61.11, "elapsed_time": "0:02:12", "remaining_time": "0:01:24", "throughput": 1304.05, "total_tokens": 172192} | |
| {"current_steps": 555, "total_steps": 900, "loss": 0.0, "lr": 1.932899545191433e-05, "epoch": 6.166666666666667, "percentage": 61.67, "elapsed_time": "0:02:13", "remaining_time": "0:01:22", "throughput": 1306.36, "total_tokens": 173792} | |
| {"current_steps": 560, "total_steps": 900, "loss": 0.0, "lr": 1.8857915807156092e-05, "epoch": 6.222222222222222, "percentage": 62.22, "elapsed_time": "0:02:14", "remaining_time": "0:01:21", "throughput": 1308.6, "total_tokens": 175360} | |
| {"current_steps": 565, "total_steps": 900, "loss": 0.0, "lr": 1.838914594906995e-05, "epoch": 6.277777777777778, "percentage": 62.78, "elapsed_time": "0:02:14", "remaining_time": "0:01:20", "throughput": 1311.29, "total_tokens": 176992} | |
| {"current_steps": 570, "total_steps": 900, "loss": 0.0, "lr": 1.792286216282824e-05, "epoch": 6.333333333333333, "percentage": 63.33, "elapsed_time": "0:02:15", "remaining_time": "0:01:18", "throughput": 1313.64, "total_tokens": 178592} | |
| {"current_steps": 575, "total_steps": 900, "loss": 0.0, "lr": 1.7459239798693364e-05, "epoch": 6.388888888888889, "percentage": 63.89, "elapsed_time": "0:02:16", "remaining_time": "0:01:17", "throughput": 1315.57, "total_tokens": 180128} | |
| {"current_steps": 580, "total_steps": 900, "loss": 0.0, "lr": 1.699845320607571e-05, "epoch": 6.444444444444445, "percentage": 64.44, "elapsed_time": "0:02:17", "remaining_time": "0:01:16", "throughput": 1317.32, "total_tokens": 181632} | |
| {"current_steps": 585, "total_steps": 900, "loss": 0.0, "lr": 1.6540675667967974e-05, "epoch": 6.5, "percentage": 65.0, "elapsed_time": "0:02:18", "remaining_time": "0:01:14", "throughput": 1319.47, "total_tokens": 183200} | |
| {"current_steps": 585, "total_steps": 900, "eval_loss": 0.050469737499952316, "epoch": 6.5, "percentage": 65.0, "elapsed_time": "0:02:19", "remaining_time": "0:01:15", "throughput": 1313.37, "total_tokens": 183200} | |
| {"current_steps": 590, "total_steps": 900, "loss": 0.0, "lr": 1.60860793357805e-05, "epoch": 6.555555555555555, "percentage": 65.56, "elapsed_time": "0:02:21", "remaining_time": "0:01:14", "throughput": 1303.71, "total_tokens": 184800} | |
| {"current_steps": 595, "total_steps": 900, "loss": 0.0, "lr": 1.56348351646022e-05, "epoch": 6.611111111111111, "percentage": 66.11, "elapsed_time": "0:02:22", "remaining_time": "0:01:13", "throughput": 1305.35, "total_tokens": 186336} | |
| {"current_steps": 600, "total_steps": 900, "loss": 0.0, "lr": 1.5187112848911323e-05, "epoch": 6.666666666666667, "percentage": 66.67, "elapsed_time": "0:02:23", "remaining_time": "0:01:11", "throughput": 1307.13, "total_tokens": 187904} | |
| {"current_steps": 605, "total_steps": 900, "loss": 0.0, "lr": 1.47430807587603e-05, "epoch": 6.722222222222222, "percentage": 67.22, "elapsed_time": "0:02:24", "remaining_time": "0:01:10", "throughput": 1309.11, "total_tokens": 189472} | |
| {"current_steps": 610, "total_steps": 900, "loss": 0.0, "lr": 1.430290587645865e-05, "epoch": 6.777777777777778, "percentage": 67.78, "elapsed_time": "0:02:25", "remaining_time": "0:01:09", "throughput": 1311.45, "total_tokens": 191072} | |
| {"current_steps": 615, "total_steps": 900, "loss": 0.0, "lr": 1.3866753733777765e-05, "epoch": 6.833333333333333, "percentage": 68.33, "elapsed_time": "0:02:26", "remaining_time": "0:01:07", "throughput": 1313.35, "total_tokens": 192608} | |
| {"current_steps": 620, "total_steps": 900, "loss": 0.0, "lr": 1.343478834970121e-05, "epoch": 6.888888888888889, "percentage": 68.89, "elapsed_time": "0:02:27", "remaining_time": "0:01:06", "throughput": 1315.66, "total_tokens": 194208} | |
| {"current_steps": 625, "total_steps": 900, "loss": 0.0, "lr": 1.3007172168743854e-05, "epoch": 6.944444444444445, "percentage": 69.44, "elapsed_time": "0:02:28", "remaining_time": "0:01:05", "throughput": 1317.81, "total_tokens": 195776} | |
| {"current_steps": 630, "total_steps": 900, "loss": 0.0, "lr": 1.2584065999863102e-05, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:02:29", "remaining_time": "0:01:04", "throughput": 1319.25, "total_tokens": 197344} | |
| {"current_steps": 630, "total_steps": 900, "eval_loss": 0.05317758396267891, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:02:30", "remaining_time": "0:01:04", "throughput": 1313.63, "total_tokens": 197344} | |
| {"current_steps": 635, "total_steps": 900, "loss": 0.0, "lr": 1.2165628955985314e-05, "epoch": 7.055555555555555, "percentage": 70.56, "elapsed_time": "0:02:33", "remaining_time": "0:01:03", "throughput": 1299.21, "total_tokens": 198944} | |
| {"current_steps": 640, "total_steps": 900, "loss": 0.0, "lr": 1.175201839416988e-05, "epoch": 7.111111111111111, "percentage": 71.11, "elapsed_time": "0:02:34", "remaining_time": "0:01:02", "throughput": 1300.83, "total_tokens": 200512} | |
| {"current_steps": 645, "total_steps": 900, "loss": 0.0, "lr": 1.1343389856433658e-05, "epoch": 7.166666666666667, "percentage": 71.67, "elapsed_time": "0:02:35", "remaining_time": "0:01:01", "throughput": 1302.32, "total_tokens": 202016} | |
| {"current_steps": 650, "total_steps": 900, "loss": 0.0, "lr": 1.0939897011258001e-05, "epoch": 7.222222222222222, "percentage": 72.22, "elapsed_time": "0:02:36", "remaining_time": "0:01:00", "throughput": 1304.73, "total_tokens": 203648} | |
| {"current_steps": 655, "total_steps": 900, "loss": 0.0, "lr": 1.0541691595800337e-05, "epoch": 7.277777777777778, "percentage": 72.78, "elapsed_time": "0:02:37", "remaining_time": "0:00:58", "throughput": 1306.51, "total_tokens": 205184} | |
| {"current_steps": 660, "total_steps": 900, "loss": 0.0, "lr": 1.0148923358832022e-05, "epoch": 7.333333333333333, "percentage": 73.33, "elapsed_time": "0:02:38", "remaining_time": "0:00:57", "throughput": 1308.28, "total_tokens": 206720} | |
| {"current_steps": 665, "total_steps": 900, "loss": 0.0, "lr": 9.761740004423927e-06, "epoch": 7.388888888888889, "percentage": 73.89, "elapsed_time": "0:02:38", "remaining_time": "0:00:56", "throughput": 1310.36, "total_tokens": 208320} | |
| {"current_steps": 670, "total_steps": 900, "loss": 0.0, "lr": 9.380287136401e-06, "epoch": 7.444444444444445, "percentage": 74.44, "elapsed_time": "0:02:39", "remaining_time": "0:00:54", "throughput": 1311.97, "total_tokens": 209856} | |
| {"current_steps": 675, "total_steps": 900, "loss": 0.0, "lr": 9.00470820358663e-06, "epoch": 7.5, "percentage": 75.0, "elapsed_time": "0:02:40", "remaining_time": "0:00:53", "throughput": 1313.63, "total_tokens": 211392} | |
| {"current_steps": 675, "total_steps": 900, "eval_loss": 0.04903189465403557, "epoch": 7.5, "percentage": 75.0, "elapsed_time": "0:02:41", "remaining_time": "0:00:53", "throughput": 1308.42, "total_tokens": 211392} | |
| {"current_steps": 680, "total_steps": 900, "loss": 0.0, "lr": 8.635144445857406e-06, "epoch": 7.555555555555555, "percentage": 75.56, "elapsed_time": "0:02:43", "remaining_time": "0:00:53", "throughput": 1299.79, "total_tokens": 212960} | |
| {"current_steps": 685, "total_steps": 900, "loss": 0.0, "lr": 8.271734841028553e-06, "epoch": 7.611111111111111, "percentage": 76.11, "elapsed_time": "0:02:44", "remaining_time": "0:00:51", "throughput": 1301.4, "total_tokens": 214528} | |
| {"current_steps": 690, "total_steps": 900, "loss": 0.0, "lr": 7.914616052590071e-06, "epoch": 7.666666666666667, "percentage": 76.67, "elapsed_time": "0:02:45", "remaining_time": "0:00:50", "throughput": 1302.45, "total_tokens": 216000} | |
| {"current_steps": 695, "total_steps": 900, "loss": 0.0, "lr": 7.563922378313218e-06, "epoch": 7.722222222222222, "percentage": 77.22, "elapsed_time": "0:02:46", "remaining_time": "0:00:49", "throughput": 1304.64, "total_tokens": 217632} | |
| {"current_steps": 700, "total_steps": 900, "loss": 0.0, "lr": 7.219785699746573e-06, "epoch": 7.777777777777778, "percentage": 77.78, "elapsed_time": "0:02:47", "remaining_time": "0:00:47", "throughput": 1306.61, "total_tokens": 219232} | |
| {"current_steps": 705, "total_steps": 900, "loss": 0.0, "lr": 6.882335432620779e-06, "epoch": 7.833333333333333, "percentage": 78.33, "elapsed_time": "0:02:48", "remaining_time": "0:00:46", "throughput": 1308.45, "total_tokens": 220800} | |
| {"current_steps": 710, "total_steps": 900, "loss": 0.0, "lr": 6.55169847818059e-06, "epoch": 7.888888888888889, "percentage": 78.89, "elapsed_time": "0:02:49", "remaining_time": "0:00:45", "throughput": 1310.28, "total_tokens": 222368} | |
| {"current_steps": 715, "total_steps": 900, "loss": 0.0, "lr": 6.22799917546252e-06, "epoch": 7.944444444444445, "percentage": 79.44, "elapsed_time": "0:02:50", "remaining_time": "0:00:44", "throughput": 1312.34, "total_tokens": 223968} | |
| {"current_steps": 720, "total_steps": 900, "loss": 0.0, "lr": 5.9113592545359945e-06, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:02:51", "remaining_time": "0:00:42", "throughput": 1313.63, "total_tokens": 225536} | |
| {"current_steps": 720, "total_steps": 900, "eval_loss": 0.04664245992898941, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:02:52", "remaining_time": "0:00:43", "throughput": 1308.75, "total_tokens": 225536} | |
| {"current_steps": 725, "total_steps": 900, "loss": 0.0, "lr": 5.601897790725643e-06, "epoch": 8.055555555555555, "percentage": 80.56, "elapsed_time": "0:02:54", "remaining_time": "0:00:42", "throughput": 1299.85, "total_tokens": 227168} | |
| {"current_steps": 730, "total_steps": 900, "loss": 0.0, "lr": 5.299731159831953e-06, "epoch": 8.11111111111111, "percentage": 81.11, "elapsed_time": "0:02:55", "remaining_time": "0:00:40", "throughput": 1301.13, "total_tokens": 228704} | |
| {"current_steps": 735, "total_steps": 900, "loss": 0.0, "lr": 5.004972994367102e-06, "epoch": 8.166666666666666, "percentage": 81.67, "elapsed_time": "0:02:56", "remaining_time": "0:00:39", "throughput": 1303.15, "total_tokens": 230336} | |
| {"current_steps": 740, "total_steps": 900, "loss": 0.0, "lr": 4.7177341408224e-06, "epoch": 8.222222222222221, "percentage": 82.22, "elapsed_time": "0:02:57", "remaining_time": "0:00:38", "throughput": 1305.14, "total_tokens": 231936} | |
| {"current_steps": 745, "total_steps": 900, "loss": 0.0, "lr": 4.438122617983443e-06, "epoch": 8.277777777777779, "percentage": 82.78, "elapsed_time": "0:02:58", "remaining_time": "0:00:37", "throughput": 1306.7, "total_tokens": 233472} | |
| {"current_steps": 750, "total_steps": 900, "loss": 0.0, "lr": 4.166243576308712e-06, "epoch": 8.333333333333334, "percentage": 83.33, "elapsed_time": "0:02:59", "remaining_time": "0:00:35", "throughput": 1308.42, "total_tokens": 235040} | |
| {"current_steps": 755, "total_steps": 900, "loss": 0.0, "lr": 3.9021992583867325e-06, "epoch": 8.38888888888889, "percentage": 83.89, "elapsed_time": "0:03:00", "remaining_time": "0:00:34", "throughput": 1310.1, "total_tokens": 236608} | |
| {"current_steps": 760, "total_steps": 900, "loss": 0.0, "lr": 3.6460889604868626e-06, "epoch": 8.444444444444445, "percentage": 84.44, "elapsed_time": "0:03:01", "remaining_time": "0:00:33", "throughput": 1311.63, "total_tokens": 238144} | |
| {"current_steps": 765, "total_steps": 900, "loss": 0.0, "lr": 3.398008995217988e-06, "epoch": 8.5, "percentage": 85.0, "elapsed_time": "0:03:02", "remaining_time": "0:00:32", "throughput": 1313.08, "total_tokens": 239680} | |
| {"current_steps": 765, "total_steps": 900, "eval_loss": 0.052941907197237015, "epoch": 8.5, "percentage": 85.0, "elapsed_time": "0:03:03", "remaining_time": "0:00:32", "throughput": 1308.46, "total_tokens": 239680} | |
| {"current_steps": 770, "total_steps": 900, "loss": 0.0, "lr": 3.158052655309332e-06, "epoch": 8.555555555555555, "percentage": 85.56, "elapsed_time": "0:03:05", "remaining_time": "0:00:31", "throughput": 1301.14, "total_tokens": 241280} | |
| {"current_steps": 775, "total_steps": 900, "loss": 0.0, "lr": 2.9263101785268254e-06, "epoch": 8.61111111111111, "percentage": 86.11, "elapsed_time": "0:03:06", "remaining_time": "0:00:30", "throughput": 1302.37, "total_tokens": 242816} | |
| {"current_steps": 780, "total_steps": 900, "loss": 0.0, "lr": 2.7028687137384267e-06, "epoch": 8.666666666666666, "percentage": 86.67, "elapsed_time": "0:03:07", "remaining_time": "0:00:28", "throughput": 1303.64, "total_tokens": 244352} | |
| {"current_steps": 785, "total_steps": 900, "loss": 0.0, "lr": 2.487812288140945e-06, "epoch": 8.722222222222221, "percentage": 87.22, "elapsed_time": "0:03:08", "remaining_time": "0:00:27", "throughput": 1304.93, "total_tokens": 245856} | |
| {"current_steps": 790, "total_steps": 900, "loss": 0.0, "lr": 2.281221775660894e-06, "epoch": 8.777777777777779, "percentage": 87.78, "elapsed_time": "0:03:09", "remaining_time": "0:00:26", "throughput": 1306.75, "total_tokens": 247456} | |
| {"current_steps": 795, "total_steps": 900, "loss": 0.0, "lr": 2.0831748665410765e-06, "epoch": 8.833333333333334, "percentage": 88.33, "elapsed_time": "0:03:10", "remaining_time": "0:00:25", "throughput": 1308.17, "total_tokens": 248992} | |
| {"current_steps": 800, "total_steps": 900, "loss": 0.0, "lr": 1.893746038124497e-06, "epoch": 8.88888888888889, "percentage": 88.89, "elapsed_time": "0:03:11", "remaining_time": "0:00:23", "throughput": 1309.56, "total_tokens": 250528} | |
| {"current_steps": 805, "total_steps": 900, "loss": 0.0, "lr": 1.713006526846439e-06, "epoch": 8.944444444444445, "percentage": 89.44, "elapsed_time": "0:03:12", "remaining_time": "0:00:22", "throughput": 1311.41, "total_tokens": 252128} | |
| {"current_steps": 810, "total_steps": 900, "loss": 0.0, "lr": 1.541024301445404e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:03:13", "remaining_time": "0:00:21", "throughput": 1312.56, "total_tokens": 253696} | |
| {"current_steps": 810, "total_steps": 900, "eval_loss": 0.05138952657580376, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:03:13", "remaining_time": "0:00:21", "throughput": 1308.29, "total_tokens": 253696} | |
| {"current_steps": 815, "total_steps": 900, "loss": 0.0, "lr": 1.3778640374027985e-06, "epoch": 9.055555555555555, "percentage": 90.56, "elapsed_time": "0:03:16", "remaining_time": "0:00:20", "throughput": 1300.36, "total_tokens": 255296} | |
| {"current_steps": 820, "total_steps": 900, "loss": 0.0, "lr": 1.2235870926211619e-06, "epoch": 9.11111111111111, "percentage": 91.11, "elapsed_time": "0:03:17", "remaining_time": "0:00:19", "throughput": 1301.79, "total_tokens": 256896} | |
| {"current_steps": 825, "total_steps": 900, "loss": 0.0, "lr": 1.0782514843499653e-06, "epoch": 9.166666666666666, "percentage": 91.67, "elapsed_time": "0:03:18", "remaining_time": "0:00:18", "throughput": 1303.04, "total_tokens": 258432} | |
| {"current_steps": 830, "total_steps": 900, "loss": 0.0, "lr": 9.419118673676924e-07, "epoch": 9.222222222222221, "percentage": 92.22, "elapsed_time": "0:03:19", "remaining_time": "0:00:16", "throughput": 1304.54, "total_tokens": 260000} | |
| {"current_steps": 835, "total_steps": 900, "loss": 0.0, "lr": 8.146195134284052e-07, "epoch": 9.277777777777779, "percentage": 92.78, "elapsed_time": "0:03:20", "remaining_time": "0:00:15", "throughput": 1306.1, "total_tokens": 261568} | |
| {"current_steps": 840, "total_steps": 900, "loss": 0.0, "lr": 6.964222919805391e-07, "epoch": 9.333333333333334, "percentage": 93.33, "elapsed_time": "0:03:21", "remaining_time": "0:00:14", "throughput": 1307.97, "total_tokens": 263200} | |
| {"current_steps": 845, "total_steps": 900, "loss": 0.0, "lr": 5.87364652165176e-07, "epoch": 9.38888888888889, "percentage": 93.89, "elapsed_time": "0:03:22", "remaining_time": "0:00:13", "throughput": 1309.36, "total_tokens": 264736} | |
| {"current_steps": 850, "total_steps": 900, "loss": 0.0, "lr": 4.874876061005173e-07, "epoch": 9.444444444444445, "percentage": 94.44, "elapsed_time": "0:03:23", "remaining_time": "0:00:11", "throughput": 1310.87, "total_tokens": 266304} | |
| {"current_steps": 855, "total_steps": 900, "loss": 0.0, "lr": 3.9682871345891883e-07, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:03:24", "remaining_time": "0:00:10", "throughput": 1312.23, "total_tokens": 267840} | |
| {"current_steps": 855, "total_steps": 900, "eval_loss": 0.05005338042974472, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:03:24", "remaining_time": "0:00:10", "throughput": 1308.11, "total_tokens": 267840} | |
| {"current_steps": 860, "total_steps": 900, "loss": 0.0, "lr": 3.1542206734221924e-07, "epoch": 9.555555555555555, "percentage": 95.56, "elapsed_time": "0:03:27", "remaining_time": "0:00:09", "throughput": 1296.31, "total_tokens": 269376} | |
| {"current_steps": 865, "total_steps": 900, "loss": 0.0, "lr": 2.4329828146074095e-07, "epoch": 9.61111111111111, "percentage": 96.11, "elapsed_time": "0:03:28", "remaining_time": "0:00:08", "throughput": 1297.59, "total_tokens": 270944} | |
| {"current_steps": 870, "total_steps": 900, "loss": 0.0, "lr": 1.8048447862070718e-07, "epoch": 9.666666666666666, "percentage": 96.67, "elapsed_time": "0:03:29", "remaining_time": "0:00:07", "throughput": 1298.7, "total_tokens": 272448} | |
| {"current_steps": 875, "total_steps": 900, "loss": 0.0, "lr": 1.2700428052447033e-07, "epoch": 9.722222222222221, "percentage": 97.22, "elapsed_time": "0:03:30", "remaining_time": "0:00:06", "throughput": 1299.94, "total_tokens": 273984} | |
| {"current_steps": 880, "total_steps": 900, "loss": 0.0, "lr": 8.28777988873486e-08, "epoch": 9.777777777777779, "percentage": 97.78, "elapsed_time": "0:03:31", "remaining_time": "0:00:04", "throughput": 1301.27, "total_tokens": 275520} | |
| {"current_steps": 885, "total_steps": 900, "loss": 0.0, "lr": 4.8121627874450625e-08, "epoch": 9.833333333333334, "percentage": 98.33, "elapsed_time": "0:03:32", "remaining_time": "0:00:03", "throughput": 1303.08, "total_tokens": 277152} | |
| {"current_steps": 890, "total_steps": 900, "loss": 0.0, "lr": 2.2748837860270267e-08, "epoch": 9.88888888888889, "percentage": 98.89, "elapsed_time": "0:03:33", "remaining_time": "0:00:02", "throughput": 1304.39, "total_tokens": 278688} | |
| {"current_steps": 895, "total_steps": 900, "loss": 0.0, "lr": 6.768970513457151e-09, "epoch": 9.944444444444445, "percentage": 99.44, "elapsed_time": "0:03:34", "remaining_time": "0:00:01", "throughput": 1305.89, "total_tokens": 280256} | |
| {"current_steps": 900, "total_steps": 900, "loss": 0.0, "lr": 1.8803520859811406e-10, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:35", "remaining_time": "0:00:00", "throughput": 1307.09, "total_tokens": 281856} | |
| {"current_steps": 900, "total_steps": 900, "eval_loss": 0.05062691122293472, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:36", "remaining_time": "0:00:00", "throughput": 1303.22, "total_tokens": 281856} | |
| {"current_steps": 900, "total_steps": 900, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:37", "remaining_time": "0:00:00", "throughput": 1295.51, "total_tokens": 281856} | |