perc_240814 / trainer_log.jsonl
ba144220's picture
Model save
d14b405 verified
{"current_steps": 10, "total_steps": 450, "loss": 1.4571, "learning_rate": 2.222222222222222e-06, "epoch": 0.022222222222222223, "percentage": 2.22, "elapsed_time": "0:00:37", "remaining_time": "0:27:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 450, "loss": 1.1268, "learning_rate": 4.444444444444444e-06, "epoch": 0.044444444444444446, "percentage": 4.44, "elapsed_time": "0:01:12", "remaining_time": "0:26:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 450, "loss": 0.9902, "learning_rate": 6.666666666666667e-06, "epoch": 0.06666666666666667, "percentage": 6.67, "elapsed_time": "0:01:48", "remaining_time": "0:25:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 450, "loss": 1.0981, "learning_rate": 8.888888888888888e-06, "epoch": 0.08888888888888889, "percentage": 8.89, "elapsed_time": "0:02:24", "remaining_time": "0:24:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 450, "loss": 1.1456, "learning_rate": 9.996239762521152e-06, "epoch": 0.1111111111111111, "percentage": 11.11, "elapsed_time": "0:03:00", "remaining_time": "0:24:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 450, "loss": 1.2093, "learning_rate": 9.966191788709716e-06, "epoch": 0.13333333333333333, "percentage": 13.33, "elapsed_time": "0:03:36", "remaining_time": "0:23:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 450, "loss": 1.1195, "learning_rate": 9.906276553136924e-06, "epoch": 0.15555555555555556, "percentage": 15.56, "elapsed_time": "0:04:12", "remaining_time": "0:22:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 450, "loss": 1.2135, "learning_rate": 9.816854393079402e-06, "epoch": 0.17777777777777778, "percentage": 17.78, "elapsed_time": "0:04:47", "remaining_time": "0:22:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 450, "loss": 1.112, "learning_rate": 9.698463103929542e-06, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:05:23", "remaining_time": "0:21:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 450, "loss": 1.1888, "learning_rate": 9.551814704830734e-06, "epoch": 0.2222222222222222, "percentage": 22.22, "elapsed_time": "0:05:59", "remaining_time": "0:20:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 450, "eval_loss": 1.1302675008773804, "epoch": 0.2222222222222222, "percentage": 22.22, "elapsed_time": "0:07:02", "remaining_time": "0:24:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 450, "loss": 1.1125, "learning_rate": 9.377791156510456e-06, "epoch": 0.24444444444444444, "percentage": 24.44, "elapsed_time": "0:07:38", "remaining_time": "0:23:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 450, "loss": 1.177, "learning_rate": 9.177439057064684e-06, "epoch": 0.26666666666666666, "percentage": 26.67, "elapsed_time": "0:08:13", "remaining_time": "0:22:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 450, "loss": 1.1346, "learning_rate": 8.951963347593797e-06, "epoch": 0.28888888888888886, "percentage": 28.89, "elapsed_time": "0:08:49", "remaining_time": "0:21:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 450, "loss": 1.0721, "learning_rate": 8.702720065545024e-06, "epoch": 0.3111111111111111, "percentage": 31.11, "elapsed_time": "0:09:25", "remaining_time": "0:20:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 450, "loss": 1.093, "learning_rate": 8.43120818934367e-06, "epoch": 0.3333333333333333, "percentage": 33.33, "elapsed_time": "0:10:01", "remaining_time": "0:20:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 450, "loss": 1.0672, "learning_rate": 8.139060623360494e-06, "epoch": 0.35555555555555557, "percentage": 35.56, "elapsed_time": "0:10:37", "remaining_time": "0:19:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 450, "loss": 1.0878, "learning_rate": 7.828034377432694e-06, "epoch": 0.37777777777777777, "percentage": 37.78, "elapsed_time": "0:11:13", "remaining_time": "0:18:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 450, "loss": 1.1554, "learning_rate": 7.500000000000001e-06, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:11:49", "remaining_time": "0:17:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 450, "loss": 1.0949, "learning_rate": 7.156930328406268e-06, "epoch": 0.4222222222222222, "percentage": 42.22, "elapsed_time": "0:12:25", "remaining_time": "0:16:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 450, "loss": 1.1374, "learning_rate": 6.800888624023552e-06, "epoch": 0.4444444444444444, "percentage": 44.44, "elapsed_time": "0:13:00", "remaining_time": "0:16:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 450, "eval_loss": 1.0855395793914795, "epoch": 0.4444444444444444, "percentage": 44.44, "elapsed_time": "0:14:03", "remaining_time": "0:17:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 450, "loss": 1.0843, "learning_rate": 6.434016163555452e-06, "epoch": 0.4666666666666667, "percentage": 46.67, "elapsed_time": "0:14:39", "remaining_time": "0:16:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 220, "total_steps": 450, "loss": 0.9452, "learning_rate": 6.058519361147055e-06, "epoch": 0.4888888888888889, "percentage": 48.89, "elapsed_time": "0:15:15", "remaining_time": "0:15:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 230, "total_steps": 450, "loss": 1.1135, "learning_rate": 5.6766564987506564e-06, "epoch": 0.5111111111111111, "percentage": 51.11, "elapsed_time": "0:15:51", "remaining_time": "0:15:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 240, "total_steps": 450, "loss": 1.0685, "learning_rate": 5.290724144552379e-06, "epoch": 0.5333333333333333, "percentage": 53.33, "elapsed_time": "0:16:27", "remaining_time": "0:14:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 250, "total_steps": 450, "loss": 1.0343, "learning_rate": 4.903043341140879e-06, "epoch": 0.5555555555555556, "percentage": 55.56, "elapsed_time": "0:17:02", "remaining_time": "0:13:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 260, "total_steps": 450, "loss": 1.0521, "learning_rate": 4.515945646484105e-06, "epoch": 0.5777777777777777, "percentage": 57.78, "elapsed_time": "0:17:38", "remaining_time": "0:12:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 270, "total_steps": 450, "loss": 1.033, "learning_rate": 4.131759111665349e-06, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:18:14", "remaining_time": "0:12:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 280, "total_steps": 450, "loss": 1.0148, "learning_rate": 3.752794279710094e-06, "epoch": 0.6222222222222222, "percentage": 62.22, "elapsed_time": "0:18:50", "remaining_time": "0:11:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 290, "total_steps": 450, "loss": 1.0207, "learning_rate": 3.3813302897083955e-06, "epoch": 0.6444444444444445, "percentage": 64.44, "elapsed_time": "0:19:26", "remaining_time": "0:10:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 450, "loss": 0.9925, "learning_rate": 3.019601169804216e-06, "epoch": 0.6666666666666666, "percentage": 66.67, "elapsed_time": "0:20:02", "remaining_time": "0:10:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 450, "eval_loss": 1.0038442611694336, "epoch": 0.6666666666666666, "percentage": 66.67, "elapsed_time": "0:21:05", "remaining_time": "0:10:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 310, "total_steps": 450, "loss": 1.0074, "learning_rate": 2.6697824014873076e-06, "epoch": 0.6888888888888889, "percentage": 68.89, "elapsed_time": "0:21:40", "remaining_time": "0:09:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 320, "total_steps": 450, "loss": 0.9849, "learning_rate": 2.333977835991545e-06, "epoch": 0.7111111111111111, "percentage": 71.11, "elapsed_time": "0:22:16", "remaining_time": "0:09:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 330, "total_steps": 450, "loss": 0.8921, "learning_rate": 2.0142070414860704e-06, "epoch": 0.7333333333333333, "percentage": 73.33, "elapsed_time": "0:22:52", "remaining_time": "0:08:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 340, "total_steps": 450, "loss": 0.9733, "learning_rate": 1.7123931571546826e-06, "epoch": 0.7555555555555555, "percentage": 75.56, "elapsed_time": "0:23:28", "remaining_time": "0:07:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 350, "total_steps": 450, "loss": 0.958, "learning_rate": 1.4303513272105057e-06, "epoch": 0.7777777777777778, "percentage": 77.78, "elapsed_time": "0:24:04", "remaining_time": "0:06:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 360, "total_steps": 450, "loss": 0.9337, "learning_rate": 1.1697777844051105e-06, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:24:39", "remaining_time": "0:06:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 370, "total_steps": 450, "loss": 0.9707, "learning_rate": 9.322396486851626e-07, "epoch": 0.8222222222222222, "percentage": 82.22, "elapsed_time": "0:25:15", "remaining_time": "0:05:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 380, "total_steps": 450, "loss": 0.9158, "learning_rate": 7.191655023486682e-07, "epoch": 0.8444444444444444, "percentage": 84.44, "elapsed_time": "0:25:51", "remaining_time": "0:04:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 390, "total_steps": 450, "loss": 0.9862, "learning_rate": 5.318367983829393e-07, "epoch": 0.8666666666666667, "percentage": 86.67, "elapsed_time": "0:26:27", "remaining_time": "0:04:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 400, "total_steps": 450, "loss": 0.9863, "learning_rate": 3.7138015365554834e-07, "epoch": 0.8888888888888888, "percentage": 88.89, "elapsed_time": "0:27:03", "remaining_time": "0:03:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 400, "total_steps": 450, "eval_loss": 0.9608938694000244, "epoch": 0.8888888888888888, "percentage": 88.89, "elapsed_time": "0:28:06", "remaining_time": "0:03:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 410, "total_steps": 450, "loss": 0.9407, "learning_rate": 2.3876057330792344e-07, "epoch": 0.9111111111111111, "percentage": 91.11, "elapsed_time": "0:28:42", "remaining_time": "0:02:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 420, "total_steps": 450, "loss": 1.0322, "learning_rate": 1.3477564710088097e-07, "epoch": 0.9333333333333333, "percentage": 93.33, "elapsed_time": "0:29:17", "remaining_time": "0:02:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 430, "total_steps": 450, "loss": 0.9011, "learning_rate": 6.005075261595495e-08, "epoch": 0.9555555555555556, "percentage": 95.56, "elapsed_time": "0:29:53", "remaining_time": "0:01:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 440, "total_steps": 450, "loss": 0.9997, "learning_rate": 1.5035294161039882e-08, "epoch": 0.9777777777777777, "percentage": 97.78, "elapsed_time": "0:30:29", "remaining_time": "0:00:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 450, "total_steps": 450, "loss": 1.0307, "learning_rate": 0.0, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:31:05", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 450, "total_steps": 450, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:31:05", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}