v115rc_C / trainer_log.jsonl
Linksome's picture
Upload folder using huggingface_hub
72a8945 verified
{"current_steps": 1, "total_steps": 15, "loss": 2.0062, "lr": 1.9781476007338058e-05, "epoch": 0.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:25", "remaining_time": "0:06:01", "throughput": 10151.22, "total_tokens": 262144}
{"current_steps": 2, "total_steps": 15, "loss": 1.978, "lr": 1.913545457642601e-05, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:00:50", "remaining_time": "0:05:25", "throughput": 10474.24, "total_tokens": 524288}
{"current_steps": 3, "total_steps": 15, "loss": 1.6415, "lr": 1.8090169943749477e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:01:14", "remaining_time": "0:04:56", "throughput": 10614.63, "total_tokens": 786432}
{"current_steps": 4, "total_steps": 15, "loss": 1.494, "lr": 1.6691306063588583e-05, "epoch": 1.3333333333333333, "percentage": 26.67, "elapsed_time": "0:01:38", "remaining_time": "0:04:30", "throughput": 10674.26, "total_tokens": 1048576}
{"current_steps": 5, "total_steps": 15, "loss": 1.3698, "lr": 1.5000000000000002e-05, "epoch": 1.6666666666666665, "percentage": 33.33, "elapsed_time": "0:02:02", "remaining_time": "0:04:04", "throughput": 10711.3, "total_tokens": 1310720}
{"current_steps": 6, "total_steps": 15, "loss": 1.2727, "lr": 1.3090169943749475e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:02:26", "remaining_time": "0:03:39", "throughput": 10730.28, "total_tokens": 1572864}
{"current_steps": 7, "total_steps": 15, "loss": 1.1431, "lr": 1.1045284632676535e-05, "epoch": 2.3333333333333335, "percentage": 46.67, "elapsed_time": "0:02:50", "remaining_time": "0:03:15", "throughput": 10748.64, "total_tokens": 1835008}
{"current_steps": 8, "total_steps": 15, "loss": 1.0617, "lr": 8.954715367323468e-06, "epoch": 2.6666666666666665, "percentage": 53.33, "elapsed_time": "0:03:14", "remaining_time": "0:02:50", "throughput": 10760.44, "total_tokens": 2097152}
{"current_steps": 9, "total_steps": 15, "loss": 0.9622, "lr": 6.909830056250527e-06, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:03:39", "remaining_time": "0:02:26", "throughput": 10771.83, "total_tokens": 2359296}
{"current_steps": 10, "total_steps": 15, "loss": 0.9027, "lr": 5.000000000000003e-06, "epoch": 3.3333333333333335, "percentage": 66.67, "elapsed_time": "0:04:03", "remaining_time": "0:02:01", "throughput": 10776.8, "total_tokens": 2621440}
{"current_steps": 11, "total_steps": 15, "loss": 0.8435, "lr": 3.308693936411421e-06, "epoch": 3.6666666666666665, "percentage": 73.33, "elapsed_time": "0:04:27", "remaining_time": "0:01:37", "throughput": 10783.27, "total_tokens": 2883584}
{"current_steps": 12, "total_steps": 15, "loss": 0.8264, "lr": 1.9098300562505266e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:04:51", "remaining_time": "0:01:12", "throughput": 10788.73, "total_tokens": 3145728}
{"current_steps": 13, "total_steps": 15, "loss": 0.7904, "lr": 8.645454235739903e-07, "epoch": 4.333333333333333, "percentage": 86.67, "elapsed_time": "0:05:15", "remaining_time": "0:00:48", "throughput": 10792.34, "total_tokens": 3407872}
{"current_steps": 14, "total_steps": 15, "loss": 0.7624, "lr": 2.1852399266194312e-07, "epoch": 4.666666666666667, "percentage": 93.33, "elapsed_time": "0:05:39", "remaining_time": "0:00:24", "throughput": 10794.95, "total_tokens": 3670016}
{"current_steps": 1, "total_steps": 15, "loss": 2.0062, "lr": 4.9453690018345144e-05, "epoch": 0.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:25", "remaining_time": "0:06:03", "throughput": 10103.24, "total_tokens": 262144}
{"current_steps": 2, "total_steps": 15, "loss": 3.254, "lr": 4.783863644106502e-05, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:00:50", "remaining_time": "0:05:26", "throughput": 10441.0, "total_tokens": 524288}
{"current_steps": 1, "total_steps": 30, "loss": 2.0062, "lr": 1.9945218953682736e-05, "epoch": 0.3333333333333333, "percentage": 3.33, "elapsed_time": "0:00:26", "remaining_time": "0:12:46", "throughput": 9919.14, "total_tokens": 262144}
{"current_steps": 2, "total_steps": 30, "loss": 1.9779, "lr": 1.9781476007338058e-05, "epoch": 0.6666666666666666, "percentage": 6.67, "elapsed_time": "0:00:50", "remaining_time": "0:11:50", "throughput": 10327.2, "total_tokens": 524288}
{"current_steps": 3, "total_steps": 30, "loss": 1.6416, "lr": 1.9510565162951538e-05, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:01:14", "remaining_time": "0:11:14", "throughput": 10488.58, "total_tokens": 786432}
{"current_steps": 4, "total_steps": 30, "loss": 1.4959, "lr": 1.913545457642601e-05, "epoch": 1.3333333333333333, "percentage": 13.33, "elapsed_time": "0:01:39", "remaining_time": "0:10:44", "throughput": 10569.92, "total_tokens": 1048576}
{"current_steps": 5, "total_steps": 30, "loss": 1.3738, "lr": 1.866025403784439e-05, "epoch": 1.6666666666666665, "percentage": 16.67, "elapsed_time": "0:02:03", "remaining_time": "0:10:16", "throughput": 10621.85, "total_tokens": 1310720}
{"current_steps": 6, "total_steps": 30, "loss": 1.2725, "lr": 1.8090169943749477e-05, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:02:27", "remaining_time": "0:09:50", "throughput": 10657.95, "total_tokens": 1572864}
{"current_steps": 7, "total_steps": 30, "loss": 1.115, "lr": 1.7431448254773943e-05, "epoch": 2.3333333333333335, "percentage": 23.33, "elapsed_time": "0:02:51", "remaining_time": "0:09:24", "throughput": 10681.97, "total_tokens": 1835008}
{"current_steps": 8, "total_steps": 30, "loss": 1.0322, "lr": 1.6691306063588583e-05, "epoch": 2.6666666666666665, "percentage": 26.67, "elapsed_time": "0:03:16", "remaining_time": "0:08:59", "throughput": 10699.75, "total_tokens": 2097152}
{"current_steps": 9, "total_steps": 30, "loss": 0.9744, "lr": 1.5877852522924733e-05, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:03:40", "remaining_time": "0:08:33", "throughput": 10713.27, "total_tokens": 2359296}
{"current_steps": 10, "total_steps": 30, "loss": 0.8432, "lr": 1.5000000000000002e-05, "epoch": 3.3333333333333335, "percentage": 33.33, "elapsed_time": "0:04:04", "remaining_time": "0:08:08", "throughput": 10723.92, "total_tokens": 2621440}
{"current_steps": 11, "total_steps": 30, "loss": 0.7944, "lr": 1.4067366430758004e-05, "epoch": 3.6666666666666665, "percentage": 36.67, "elapsed_time": "0:04:28", "remaining_time": "0:07:43", "throughput": 10734.43, "total_tokens": 2883584}
{"current_steps": 12, "total_steps": 30, "loss": 0.7473, "lr": 1.3090169943749475e-05, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:04:52", "remaining_time": "0:07:19", "throughput": 10745.5, "total_tokens": 3145728}
{"current_steps": 13, "total_steps": 30, "loss": 0.6582, "lr": 1.2079116908177592e-05, "epoch": 4.333333333333333, "percentage": 43.33, "elapsed_time": "0:05:16", "remaining_time": "0:06:54", "throughput": 10751.12, "total_tokens": 3407872}
{"current_steps": 14, "total_steps": 30, "loss": 0.604, "lr": 1.1045284632676535e-05, "epoch": 4.666666666666667, "percentage": 46.67, "elapsed_time": "0:05:41", "remaining_time": "0:06:29", "throughput": 10755.41, "total_tokens": 3670016}
{"current_steps": 15, "total_steps": 30, "loss": 0.5651, "lr": 1e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:06:05", "remaining_time": "0:06:05", "throughput": 10760.6, "total_tokens": 3932160}
{"current_steps": 16, "total_steps": 30, "loss": 0.4855, "lr": 8.954715367323468e-06, "epoch": 5.333333333333333, "percentage": 53.33, "elapsed_time": "0:06:29", "remaining_time": "0:05:40", "throughput": 10764.18, "total_tokens": 4194304}
{"current_steps": 17, "total_steps": 30, "loss": 0.4643, "lr": 7.92088309182241e-06, "epoch": 5.666666666666667, "percentage": 56.67, "elapsed_time": "0:06:53", "remaining_time": "0:05:16", "throughput": 10766.49, "total_tokens": 4456448}
{"current_steps": 18, "total_steps": 30, "loss": 0.4215, "lr": 6.909830056250527e-06, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:07:18", "remaining_time": "0:04:52", "throughput": 10772.2, "total_tokens": 4718592}
{"current_steps": 19, "total_steps": 30, "loss": 0.3629, "lr": 5.932633569242e-06, "epoch": 6.333333333333333, "percentage": 63.33, "elapsed_time": "0:07:42", "remaining_time": "0:04:27", "throughput": 10774.29, "total_tokens": 4980736}
{"current_steps": 20, "total_steps": 30, "loss": 0.3389, "lr": 5.000000000000003e-06, "epoch": 6.666666666666667, "percentage": 66.67, "elapsed_time": "0:08:06", "remaining_time": "0:04:03", "throughput": 10776.94, "total_tokens": 5242880}
{"current_steps": 21, "total_steps": 30, "loss": 0.3203, "lr": 4.12214747707527e-06, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:08:30", "remaining_time": "0:03:38", "throughput": 10779.6, "total_tokens": 5505024}
{"current_steps": 22, "total_steps": 30, "loss": 0.2775, "lr": 3.308693936411421e-06, "epoch": 7.333333333333333, "percentage": 73.33, "elapsed_time": "0:08:54", "remaining_time": "0:03:14", "throughput": 10780.81, "total_tokens": 5767168}
{"current_steps": 23, "total_steps": 30, "loss": 0.2538, "lr": 2.5685517452260566e-06, "epoch": 7.666666666666667, "percentage": 76.67, "elapsed_time": "0:09:19", "remaining_time": "0:02:50", "throughput": 10782.4, "total_tokens": 6029312}
{"current_steps": 24, "total_steps": 30, "loss": 0.2569, "lr": 1.9098300562505266e-06, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:09:43", "remaining_time": "0:02:25", "throughput": 10785.72, "total_tokens": 6291456}
{"current_steps": 25, "total_steps": 30, "loss": 0.2212, "lr": 1.339745962155613e-06, "epoch": 8.333333333333334, "percentage": 83.33, "elapsed_time": "0:10:07", "remaining_time": "0:02:01", "throughput": 10786.5, "total_tokens": 6553600}
{"current_steps": 26, "total_steps": 30, "loss": 0.2083, "lr": 8.645454235739903e-07, "epoch": 8.666666666666666, "percentage": 86.67, "elapsed_time": "0:10:31", "remaining_time": "0:01:37", "throughput": 10788.26, "total_tokens": 6815744}
{"current_steps": 27, "total_steps": 30, "loss": 0.2053, "lr": 4.894348370484648e-07, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:10:55", "remaining_time": "0:01:12", "throughput": 10790.52, "total_tokens": 7077888}
{"current_steps": 28, "total_steps": 30, "loss": 0.1922, "lr": 2.1852399266194312e-07, "epoch": 9.333333333333334, "percentage": 93.33, "elapsed_time": "0:11:20", "remaining_time": "0:00:48", "throughput": 10791.8, "total_tokens": 7340032}
{"current_steps": 29, "total_steps": 30, "loss": 0.1886, "lr": 5.4781046317267103e-08, "epoch": 9.666666666666666, "percentage": 96.67, "elapsed_time": "0:11:44", "remaining_time": "0:00:24", "throughput": 10793.83, "total_tokens": 7602176}
{"current_steps": 30, "total_steps": 30, "loss": 0.1913, "lr": 0.0, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:12:08", "remaining_time": "0:00:00", "throughput": 10795.45, "total_tokens": 7864320}