v115rc_A3 / trainer_log.jsonl
Linksome's picture
Upload folder using huggingface_hub
e50ccb7 verified
{"current_steps": 1, "total_steps": 27, "loss": 0.1529, "lr": 9.966191788709716e-06, "epoch": 0.10126582278481013, "percentage": 3.7, "elapsed_time": "0:00:27", "remaining_time": "0:11:57", "throughput": 9494.68, "total_tokens": 262144}
{"current_steps": 2, "total_steps": 27, "loss": 0.2089, "lr": 9.86522435289912e-06, "epoch": 0.20253164556962025, "percentage": 7.41, "elapsed_time": "0:00:53", "remaining_time": "0:11:09", "throughput": 9788.18, "total_tokens": 524288}
{"current_steps": 3, "total_steps": 27, "loss": 0.2318, "lr": 9.698463103929542e-06, "epoch": 0.3037974683544304, "percentage": 11.11, "elapsed_time": "0:01:18", "remaining_time": "0:10:25", "throughput": 10057.98, "total_tokens": 786432}
{"current_steps": 4, "total_steps": 27, "loss": 0.2081, "lr": 9.468163201617063e-06, "epoch": 0.4050632911392405, "percentage": 14.81, "elapsed_time": "0:01:42", "remaining_time": "0:09:51", "throughput": 10194.47, "total_tokens": 1048576}
{"current_steps": 5, "total_steps": 27, "loss": 0.2143, "lr": 9.177439057064684e-06, "epoch": 0.5063291139240507, "percentage": 18.52, "elapsed_time": "0:02:07", "remaining_time": "0:09:22", "throughput": 10256.36, "total_tokens": 1310720}
{"current_steps": 6, "total_steps": 27, "loss": 0.2281, "lr": 8.83022221559489e-06, "epoch": 0.6075949367088608, "percentage": 22.22, "elapsed_time": "0:02:32", "remaining_time": "0:08:53", "throughput": 10314.29, "total_tokens": 1572864}
{"current_steps": 7, "total_steps": 27, "loss": 0.2212, "lr": 8.43120818934367e-06, "epoch": 0.7088607594936709, "percentage": 25.93, "elapsed_time": "0:02:56", "remaining_time": "0:08:25", "throughput": 10376.24, "total_tokens": 1835008}
{"current_steps": 8, "total_steps": 27, "loss": 0.216, "lr": 7.985792958513932e-06, "epoch": 0.810126582278481, "percentage": 29.63, "elapsed_time": "0:03:21", "remaining_time": "0:07:57", "throughput": 10421.67, "total_tokens": 2097152}
{"current_steps": 9, "total_steps": 27, "loss": 0.2183, "lr": 7.500000000000001e-06, "epoch": 0.9113924050632911, "percentage": 33.33, "elapsed_time": "0:03:45", "remaining_time": "0:07:31", "throughput": 10460.04, "total_tokens": 2359296}
{"current_steps": 10, "total_steps": 27, "loss": 0.1919, "lr": 6.980398830195785e-06, "epoch": 1.0, "percentage": 37.04, "elapsed_time": "0:04:06", "remaining_time": "0:06:59", "throughput": 10495.38, "total_tokens": 2588672}
{"current_steps": 11, "total_steps": 27, "loss": 0.1179, "lr": 6.434016163555452e-06, "epoch": 1.1012658227848102, "percentage": 40.74, "elapsed_time": "0:04:30", "remaining_time": "0:06:34", "throughput": 10519.75, "total_tokens": 2850816}
{"current_steps": 12, "total_steps": 27, "loss": 0.1105, "lr": 5.8682408883346535e-06, "epoch": 1.2025316455696202, "percentage": 44.44, "elapsed_time": "0:04:55", "remaining_time": "0:06:09", "throughput": 10541.45, "total_tokens": 3112960}
{"current_steps": 13, "total_steps": 27, "loss": 0.0932, "lr": 5.290724144552379e-06, "epoch": 1.3037974683544304, "percentage": 48.15, "elapsed_time": "0:05:19", "remaining_time": "0:05:44", "throughput": 10558.24, "total_tokens": 3375104}
{"current_steps": 14, "total_steps": 27, "loss": 0.0897, "lr": 4.7092758554476215e-06, "epoch": 1.4050632911392404, "percentage": 51.85, "elapsed_time": "0:05:43", "remaining_time": "0:05:19", "throughput": 10575.05, "total_tokens": 3637248}
{"current_steps": 15, "total_steps": 27, "loss": 0.08, "lr": 4.131759111665349e-06, "epoch": 1.5063291139240507, "percentage": 55.56, "elapsed_time": "0:06:08", "remaining_time": "0:04:54", "throughput": 10589.27, "total_tokens": 3899392}
{"current_steps": 16, "total_steps": 27, "loss": 0.0836, "lr": 3.5659838364445505e-06, "epoch": 1.6075949367088609, "percentage": 59.26, "elapsed_time": "0:06:32", "remaining_time": "0:04:29", "throughput": 10600.26, "total_tokens": 4161536}
{"current_steps": 17, "total_steps": 27, "loss": 0.0823, "lr": 3.019601169804216e-06, "epoch": 1.7088607594936709, "percentage": 62.96, "elapsed_time": "0:06:56", "remaining_time": "0:04:05", "throughput": 10611.04, "total_tokens": 4423680}
{"current_steps": 18, "total_steps": 27, "loss": 0.0881, "lr": 2.5000000000000015e-06, "epoch": 1.810126582278481, "percentage": 66.67, "elapsed_time": "0:07:21", "remaining_time": "0:03:40", "throughput": 10620.01, "total_tokens": 4685824}
{"current_steps": 19, "total_steps": 27, "loss": 0.0879, "lr": 2.0142070414860704e-06, "epoch": 1.9113924050632911, "percentage": 70.37, "elapsed_time": "0:07:45", "remaining_time": "0:03:16", "throughput": 10629.1, "total_tokens": 4947968}
{"current_steps": 20, "total_steps": 27, "loss": 0.0762, "lr": 1.5687918106563326e-06, "epoch": 2.0, "percentage": 74.07, "elapsed_time": "0:08:06", "remaining_time": "0:02:50", "throughput": 10634.06, "total_tokens": 5177344}
{"current_steps": 21, "total_steps": 27, "loss": 0.0424, "lr": 1.1697777844051105e-06, "epoch": 2.1012658227848102, "percentage": 77.78, "elapsed_time": "0:08:31", "remaining_time": "0:02:26", "throughput": 10639.93, "total_tokens": 5439488}
{"current_steps": 22, "total_steps": 27, "loss": 0.0358, "lr": 8.225609429353187e-07, "epoch": 2.2025316455696204, "percentage": 81.48, "elapsed_time": "0:08:55", "remaining_time": "0:02:01", "throughput": 10646.06, "total_tokens": 5701632}
{"current_steps": 23, "total_steps": 27, "loss": 0.0365, "lr": 5.318367983829393e-07, "epoch": 2.3037974683544302, "percentage": 85.19, "elapsed_time": "0:09:19", "remaining_time": "0:01:37", "throughput": 10652.13, "total_tokens": 5963776}
{"current_steps": 24, "total_steps": 27, "loss": 0.0331, "lr": 3.015368960704584e-07, "epoch": 2.4050632911392404, "percentage": 88.89, "elapsed_time": "0:09:44", "remaining_time": "0:01:13", "throughput": 10658.36, "total_tokens": 6225920}
{"current_steps": 25, "total_steps": 27, "loss": 0.0371, "lr": 1.3477564710088097e-07, "epoch": 2.5063291139240507, "percentage": 92.59, "elapsed_time": "0:10:08", "remaining_time": "0:00:48", "throughput": 10663.07, "total_tokens": 6488064}
{"current_steps": 26, "total_steps": 27, "loss": 0.0393, "lr": 3.3808211290284886e-08, "epoch": 2.607594936708861, "percentage": 96.3, "elapsed_time": "0:10:32", "remaining_time": "0:00:24", "throughput": 10668.33, "total_tokens": 6750208}
{"current_steps": 27, "total_steps": 27, "loss": 0.0391, "lr": 0.0, "epoch": 2.708860759493671, "percentage": 100.0, "elapsed_time": "0:10:57", "remaining_time": "0:00:00", "throughput": 10672.35, "total_tokens": 7012352}