train_cb_42_1760637405 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 114
55749a9 verified
{"current_steps": 5, "total_steps": 1140, "loss": 1.0538, "lr": 1.7543859649122807e-06, "epoch": 0.08771929824561403, "percentage": 0.44, "elapsed_time": "0:00:01", "remaining_time": "0:06:01", "throughput": 2032.24, "total_tokens": 3232}
{"current_steps": 10, "total_steps": 1140, "loss": 1.0309, "lr": 3.9473684210526315e-06, "epoch": 0.17543859649122806, "percentage": 0.88, "elapsed_time": "0:00:02", "remaining_time": "0:05:00", "throughput": 2414.96, "total_tokens": 6432}
{"current_steps": 15, "total_steps": 1140, "loss": 1.0359, "lr": 6.140350877192982e-06, "epoch": 0.2631578947368421, "percentage": 1.32, "elapsed_time": "0:00:03", "remaining_time": "0:04:33", "throughput": 2567.77, "total_tokens": 9376}
{"current_steps": 20, "total_steps": 1140, "loss": 1.0625, "lr": 8.333333333333334e-06, "epoch": 0.3508771929824561, "percentage": 1.75, "elapsed_time": "0:00:04", "remaining_time": "0:04:23", "throughput": 2649.02, "total_tokens": 12448}
{"current_steps": 25, "total_steps": 1140, "loss": 1.1393, "lr": 1.0526315789473684e-05, "epoch": 0.43859649122807015, "percentage": 2.19, "elapsed_time": "0:00:05", "remaining_time": "0:04:13", "throughput": 2716.2, "total_tokens": 15456}
{"current_steps": 30, "total_steps": 1140, "loss": 1.0549, "lr": 1.2719298245614037e-05, "epoch": 0.5263157894736842, "percentage": 2.63, "elapsed_time": "0:00:06", "remaining_time": "0:04:09", "throughput": 2759.14, "total_tokens": 18592}
{"current_steps": 35, "total_steps": 1140, "loss": 1.1235, "lr": 1.4912280701754386e-05, "epoch": 0.6140350877192983, "percentage": 3.07, "elapsed_time": "0:00:08", "remaining_time": "0:04:16", "throughput": 2843.36, "total_tokens": 23136}
{"current_steps": 40, "total_steps": 1140, "loss": 1.1976, "lr": 1.7105263157894737e-05, "epoch": 0.7017543859649122, "percentage": 3.51, "elapsed_time": "0:00:09", "remaining_time": "0:04:14", "throughput": 2867.76, "total_tokens": 26496}
{"current_steps": 45, "total_steps": 1140, "loss": 1.1655, "lr": 1.929824561403509e-05, "epoch": 0.7894736842105263, "percentage": 3.95, "elapsed_time": "0:00:10", "remaining_time": "0:04:10", "throughput": 2882.27, "total_tokens": 29728}
{"current_steps": 50, "total_steps": 1140, "loss": 1.184, "lr": 2.149122807017544e-05, "epoch": 0.8771929824561403, "percentage": 4.39, "elapsed_time": "0:00:11", "remaining_time": "0:04:08", "throughput": 2902.63, "total_tokens": 33024}
{"current_steps": 55, "total_steps": 1140, "loss": 1.0884, "lr": 2.368421052631579e-05, "epoch": 0.9649122807017544, "percentage": 4.82, "elapsed_time": "0:00:12", "remaining_time": "0:04:04", "throughput": 2899.38, "total_tokens": 35904}
{"current_steps": 57, "total_steps": 1140, "eval_loss": 1.0677411556243896, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:13", "remaining_time": "0:04:12", "throughput": 2741.69, "total_tokens": 36480}
{"current_steps": 60, "total_steps": 1140, "loss": 0.9972, "lr": 2.5877192982456143e-05, "epoch": 1.0526315789473684, "percentage": 5.26, "elapsed_time": "0:00:14", "remaining_time": "0:04:25", "throughput": 2580.61, "total_tokens": 38016}
{"current_steps": 65, "total_steps": 1140, "loss": 1.1312, "lr": 2.8070175438596492e-05, "epoch": 1.1403508771929824, "percentage": 5.7, "elapsed_time": "0:00:15", "remaining_time": "0:04:19", "throughput": 2598.6, "total_tokens": 40800}
{"current_steps": 70, "total_steps": 1140, "loss": 1.1464, "lr": 3.0263157894736844e-05, "epoch": 1.2280701754385965, "percentage": 6.14, "elapsed_time": "0:00:16", "remaining_time": "0:04:14", "throughput": 2616.65, "total_tokens": 43648}
{"current_steps": 75, "total_steps": 1140, "loss": 1.1615, "lr": 3.24561403508772e-05, "epoch": 1.3157894736842106, "percentage": 6.58, "elapsed_time": "0:00:17", "remaining_time": "0:04:09", "throughput": 2624.29, "total_tokens": 46112}
{"current_steps": 80, "total_steps": 1140, "loss": 1.1548, "lr": 3.4649122807017546e-05, "epoch": 1.4035087719298245, "percentage": 7.02, "elapsed_time": "0:00:18", "remaining_time": "0:04:08", "throughput": 2651.59, "total_tokens": 49632}
{"current_steps": 85, "total_steps": 1140, "loss": 1.0904, "lr": 3.6842105263157895e-05, "epoch": 1.4912280701754386, "percentage": 7.46, "elapsed_time": "0:00:19", "remaining_time": "0:04:06", "throughput": 2672.14, "total_tokens": 53024}
{"current_steps": 90, "total_steps": 1140, "loss": 1.0917, "lr": 3.9035087719298244e-05, "epoch": 1.5789473684210527, "percentage": 7.89, "elapsed_time": "0:00:21", "remaining_time": "0:04:05", "throughput": 2696.78, "total_tokens": 56672}
{"current_steps": 95, "total_steps": 1140, "loss": 1.1794, "lr": 4.12280701754386e-05, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:21", "remaining_time": "0:04:01", "throughput": 2705.05, "total_tokens": 59296}
{"current_steps": 100, "total_steps": 1140, "loss": 1.098, "lr": 4.342105263157895e-05, "epoch": 1.7543859649122808, "percentage": 8.77, "elapsed_time": "0:00:23", "remaining_time": "0:03:59", "throughput": 2715.98, "total_tokens": 62528}
{"current_steps": 105, "total_steps": 1140, "loss": 0.6296, "lr": 4.56140350877193e-05, "epoch": 1.8421052631578947, "percentage": 9.21, "elapsed_time": "0:00:24", "remaining_time": "0:03:57", "throughput": 2731.14, "total_tokens": 65856}
{"current_steps": 110, "total_steps": 1140, "loss": 0.7728, "lr": 4.780701754385965e-05, "epoch": 1.9298245614035088, "percentage": 9.65, "elapsed_time": "0:00:25", "remaining_time": "0:03:57", "throughput": 2747.93, "total_tokens": 69664}
{"current_steps": 114, "total_steps": 1140, "eval_loss": 0.47614240646362305, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:26", "remaining_time": "0:04:01", "throughput": 2684.58, "total_tokens": 72112}
{"current_steps": 115, "total_steps": 1140, "loss": 0.7205, "lr": 5e-05, "epoch": 2.017543859649123, "percentage": 10.09, "elapsed_time": "0:00:28", "remaining_time": "0:04:09", "throughput": 2599.59, "total_tokens": 72848}
{"current_steps": 120, "total_steps": 1140, "loss": 0.4737, "lr": 4.999707014206475e-05, "epoch": 2.1052631578947367, "percentage": 10.53, "elapsed_time": "0:00:29", "remaining_time": "0:04:06", "throughput": 2613.07, "total_tokens": 75888}
{"current_steps": 125, "total_steps": 1140, "loss": 0.2419, "lr": 4.9988281254984414e-05, "epoch": 2.192982456140351, "percentage": 10.96, "elapsed_time": "0:00:30", "remaining_time": "0:04:04", "throughput": 2624.32, "total_tokens": 78896}