C_L / trainer_log.jsonl
Markmochi's picture
Upload folder using huggingface_hub
c3eaf66 verified
{"current_steps": 1, "total_steps": 50, "loss": 2.2667, "lr": 1.9980267284282718e-05, "epoch": 0.09523809523809523, "percentage": 2.0, "elapsed_time": "0:00:55", "remaining_time": "0:45:23", "throughput": 9434.18, "total_tokens": 524288}
{"current_steps": 2, "total_steps": 50, "loss": 2.1611, "lr": 1.9921147013144782e-05, "epoch": 0.19047619047619047, "percentage": 4.0, "elapsed_time": "0:01:48", "remaining_time": "0:43:33", "throughput": 9628.82, "total_tokens": 1048576}
{"current_steps": 3, "total_steps": 50, "loss": 1.9961, "lr": 1.982287250728689e-05, "epoch": 0.2857142857142857, "percentage": 6.0, "elapsed_time": "0:02:41", "remaining_time": "0:42:13", "throughput": 9726.07, "total_tokens": 1572864}
{"current_steps": 4, "total_steps": 50, "loss": 2.0384, "lr": 1.9685831611286312e-05, "epoch": 0.38095238095238093, "percentage": 8.0, "elapsed_time": "0:03:34", "remaining_time": "0:41:04", "throughput": 9787.6, "total_tokens": 2097152}
{"current_steps": 5, "total_steps": 50, "loss": 1.8353, "lr": 1.9510565162951538e-05, "epoch": 0.47619047619047616, "percentage": 10.0, "elapsed_time": "0:04:26", "remaining_time": "0:40:01", "throughput": 9825.77, "total_tokens": 2621440}
{"current_steps": 6, "total_steps": 50, "loss": 1.8176, "lr": 1.9297764858882516e-05, "epoch": 0.5714285714285714, "percentage": 12.0, "elapsed_time": "0:05:19", "remaining_time": "0:39:02", "throughput": 9848.55, "total_tokens": 3145728}
{"current_steps": 7, "total_steps": 50, "loss": 1.7109, "lr": 1.9048270524660197e-05, "epoch": 0.6666666666666666, "percentage": 14.0, "elapsed_time": "0:06:12", "remaining_time": "0:38:05", "throughput": 9864.52, "total_tokens": 3670016}
{"current_steps": 8, "total_steps": 50, "loss": 1.656, "lr": 1.8763066800438638e-05, "epoch": 0.7619047619047619, "percentage": 16.0, "elapsed_time": "0:07:04", "remaining_time": "0:37:09", "throughput": 9876.55, "total_tokens": 4194304}
{"current_steps": 9, "total_steps": 50, "loss": 1.5958, "lr": 1.8443279255020153e-05, "epoch": 0.8571428571428571, "percentage": 18.0, "elapsed_time": "0:07:57", "remaining_time": "0:36:14", "throughput": 9885.93, "total_tokens": 4718592}
{"current_steps": 10, "total_steps": 50, "loss": 1.5651, "lr": 1.8090169943749477e-05, "epoch": 0.9523809523809523, "percentage": 20.0, "elapsed_time": "0:08:49", "remaining_time": "0:35:19", "throughput": 9893.73, "total_tokens": 5242880}
{"current_steps": 11, "total_steps": 50, "loss": 2.9817, "lr": 1.7705132427757895e-05, "epoch": 1.0952380952380953, "percentage": 22.0, "elapsed_time": "0:09:49", "remaining_time": "0:34:49", "throughput": 9897.68, "total_tokens": 5832704}
{"current_steps": 12, "total_steps": 50, "loss": 1.3902, "lr": 1.7289686274214116e-05, "epoch": 1.1904761904761905, "percentage": 24.0, "elapsed_time": "0:10:41", "remaining_time": "0:33:52", "throughput": 9903.7, "total_tokens": 6356992}
{"current_steps": 13, "total_steps": 50, "loss": 1.3736, "lr": 1.684547105928689e-05, "epoch": 1.2857142857142856, "percentage": 26.0, "elapsed_time": "0:11:34", "remaining_time": "0:32:57", "throughput": 9906.48, "total_tokens": 6881280}
{"current_steps": 14, "total_steps": 50, "loss": 1.3114, "lr": 1.63742398974869e-05, "epoch": 1.380952380952381, "percentage": 28.0, "elapsed_time": "0:12:27", "remaining_time": "0:32:01", "throughput": 9909.69, "total_tokens": 7405568}
{"current_steps": 15, "total_steps": 50, "loss": 1.2873, "lr": 1.5877852522924733e-05, "epoch": 1.4761904761904763, "percentage": 30.0, "elapsed_time": "0:13:20", "remaining_time": "0:31:06", "throughput": 9910.65, "total_tokens": 7929856}
{"current_steps": 16, "total_steps": 50, "loss": 1.2836, "lr": 1.5358267949789968e-05, "epoch": 1.5714285714285714, "percentage": 32.0, "elapsed_time": "0:14:12", "remaining_time": "0:30:12", "throughput": 9912.55, "total_tokens": 8454144}
{"current_steps": 17, "total_steps": 50, "loss": 1.2841, "lr": 1.4817536741017153e-05, "epoch": 1.6666666666666665, "percentage": 34.0, "elapsed_time": "0:15:05", "remaining_time": "0:29:17", "throughput": 9914.73, "total_tokens": 8978432}
{"current_steps": 18, "total_steps": 50, "loss": 1.2425, "lr": 1.4257792915650728e-05, "epoch": 1.7619047619047619, "percentage": 36.0, "elapsed_time": "0:15:58", "remaining_time": "0:28:23", "throughput": 9915.73, "total_tokens": 9502720}
{"current_steps": 19, "total_steps": 50, "loss": 1.1957, "lr": 1.3681245526846782e-05, "epoch": 1.8571428571428572, "percentage": 38.0, "elapsed_time": "0:16:51", "remaining_time": "0:27:29", "throughput": 9916.55, "total_tokens": 10027008}
{"current_steps": 20, "total_steps": 50, "loss": 1.1952, "lr": 1.3090169943749475e-05, "epoch": 1.9523809523809523, "percentage": 40.0, "elapsed_time": "0:17:43", "remaining_time": "0:26:35", "throughput": 9916.89, "total_tokens": 10551296}
{"current_steps": 21, "total_steps": 50, "loss": 2.2353, "lr": 1.2486898871648552e-05, "epoch": 2.0952380952380953, "percentage": 42.0, "elapsed_time": "0:18:43", "remaining_time": "0:25:51", "throughput": 9917.73, "total_tokens": 11141120}
{"current_steps": 22, "total_steps": 50, "loss": 1.029, "lr": 1.187381314585725e-05, "epoch": 2.1904761904761907, "percentage": 44.0, "elapsed_time": "0:19:36", "remaining_time": "0:24:56", "throughput": 9919.33, "total_tokens": 11665408}
{"current_steps": 23, "total_steps": 50, "loss": 1.0192, "lr": 1.1253332335643043e-05, "epoch": 2.2857142857142856, "percentage": 46.0, "elapsed_time": "0:20:28", "remaining_time": "0:24:02", "throughput": 9920.22, "total_tokens": 12189696}
{"current_steps": 24, "total_steps": 50, "loss": 0.9982, "lr": 1.0627905195293135e-05, "epoch": 2.380952380952381, "percentage": 48.0, "elapsed_time": "0:21:21", "remaining_time": "0:23:08", "throughput": 9921.68, "total_tokens": 12713984}
{"current_steps": 25, "total_steps": 50, "loss": 0.9373, "lr": 1e-05, "epoch": 2.4761904761904763, "percentage": 50.0, "elapsed_time": "0:22:14", "remaining_time": "0:22:14", "throughput": 9923.08, "total_tokens": 13238272}
{"current_steps": 26, "total_steps": 50, "loss": 0.9552, "lr": 9.372094804706867e-06, "epoch": 2.571428571428571, "percentage": 52.0, "elapsed_time": "0:23:06", "remaining_time": "0:21:20", "throughput": 9923.81, "total_tokens": 13762560}
{"current_steps": 27, "total_steps": 50, "loss": 0.9351, "lr": 8.746667664356957e-06, "epoch": 2.6666666666666665, "percentage": 54.0, "elapsed_time": "0:23:59", "remaining_time": "0:20:26", "throughput": 9924.65, "total_tokens": 14286848}
{"current_steps": 28, "total_steps": 50, "loss": 0.927, "lr": 8.126186854142752e-06, "epoch": 2.761904761904762, "percentage": 56.0, "elapsed_time": "0:24:52", "remaining_time": "0:19:32", "throughput": 9925.08, "total_tokens": 14811136}
{"current_steps": 29, "total_steps": 50, "loss": 0.9246, "lr": 7.513101128351454e-06, "epoch": 2.857142857142857, "percentage": 58.0, "elapsed_time": "0:25:44", "remaining_time": "0:18:38", "throughput": 9925.87, "total_tokens": 15335424}
{"current_steps": 30, "total_steps": 50, "loss": 0.9258, "lr": 6.909830056250527e-06, "epoch": 2.9523809523809526, "percentage": 60.0, "elapsed_time": "0:26:37", "remaining_time": "0:17:45", "throughput": 9926.36, "total_tokens": 15859712}
{"current_steps": 31, "total_steps": 50, "loss": 1.6889, "lr": 6.318754473153221e-06, "epoch": 3.0952380952380953, "percentage": 62.0, "elapsed_time": "0:27:37", "remaining_time": "0:16:55", "throughput": 9926.03, "total_tokens": 16449536}
{"current_steps": 32, "total_steps": 50, "loss": 0.8019, "lr": 5.742207084349274e-06, "epoch": 3.1904761904761907, "percentage": 64.0, "elapsed_time": "0:28:29", "remaining_time": "0:16:01", "throughput": 9926.22, "total_tokens": 16973824}
{"current_steps": 33, "total_steps": 50, "loss": 0.7929, "lr": 5.1824632589828465e-06, "epoch": 3.2857142857142856, "percentage": 66.0, "elapsed_time": "0:29:22", "remaining_time": "0:15:08", "throughput": 9926.7, "total_tokens": 17498112}
{"current_steps": 34, "total_steps": 50, "loss": 0.7776, "lr": 4.641732050210032e-06, "epoch": 3.380952380952381, "percentage": 68.0, "elapsed_time": "0:30:15", "remaining_time": "0:14:14", "throughput": 9927.33, "total_tokens": 18022400}
{"current_steps": 35, "total_steps": 50, "loss": 0.7658, "lr": 4.12214747707527e-06, "epoch": 3.4761904761904763, "percentage": 70.0, "elapsed_time": "0:31:08", "remaining_time": "0:13:20", "throughput": 9928.15, "total_tokens": 18546688}
{"current_steps": 36, "total_steps": 50, "loss": 0.7474, "lr": 3.625760102513103e-06, "epoch": 3.571428571428571, "percentage": 72.0, "elapsed_time": "0:32:00", "remaining_time": "0:12:26", "throughput": 9928.84, "total_tokens": 19070976}
{"current_steps": 37, "total_steps": 50, "loss": 0.7614, "lr": 3.1545289407131128e-06, "epoch": 3.6666666666666665, "percentage": 74.0, "elapsed_time": "0:32:53", "remaining_time": "0:11:33", "throughput": 9928.83, "total_tokens": 19595264}
{"current_steps": 38, "total_steps": 50, "loss": 0.744, "lr": 2.7103137257858867e-06, "epoch": 3.761904761904762, "percentage": 76.0, "elapsed_time": "0:33:46", "remaining_time": "0:10:39", "throughput": 9929.39, "total_tokens": 20119552}
{"current_steps": 39, "total_steps": 50, "loss": 0.7534, "lr": 2.2948675722421086e-06, "epoch": 3.857142857142857, "percentage": 78.0, "elapsed_time": "0:34:38", "remaining_time": "0:09:46", "throughput": 9929.88, "total_tokens": 20643840}
{"current_steps": 40, "total_steps": 50, "loss": 0.735, "lr": 1.9098300562505266e-06, "epoch": 3.9523809523809526, "percentage": 80.0, "elapsed_time": "0:35:31", "remaining_time": "0:08:52", "throughput": 9930.09, "total_tokens": 21168128}
{"current_steps": 41, "total_steps": 50, "loss": 1.4372, "lr": 1.5567207449798517e-06, "epoch": 4.095238095238095, "percentage": 82.0, "elapsed_time": "0:36:31", "remaining_time": "0:08:01", "throughput": 9929.19, "total_tokens": 21757952}
{"current_steps": 42, "total_steps": 50, "loss": 0.6911, "lr": 1.2369331995613664e-06, "epoch": 4.190476190476191, "percentage": 84.0, "elapsed_time": "0:37:23", "remaining_time": "0:07:07", "throughput": 9929.76, "total_tokens": 22282240}
{"current_steps": 43, "total_steps": 50, "loss": 0.6881, "lr": 9.517294753398066e-07, "epoch": 4.285714285714286, "percentage": 86.0, "elapsed_time": "0:38:16", "remaining_time": "0:06:13", "throughput": 9929.69, "total_tokens": 22806528}
{"current_steps": 44, "total_steps": 50, "loss": 0.6862, "lr": 7.022351411174866e-07, "epoch": 4.380952380952381, "percentage": 88.0, "elapsed_time": "0:39:09", "remaining_time": "0:05:20", "throughput": 9929.82, "total_tokens": 23330816}
{"current_steps": 45, "total_steps": 50, "loss": 0.6777, "lr": 4.894348370484648e-07, "epoch": 4.476190476190476, "percentage": 90.0, "elapsed_time": "0:40:02", "remaining_time": "0:04:26", "throughput": 9929.9, "total_tokens": 23855104}
{"current_steps": 46, "total_steps": 50, "loss": 0.6719, "lr": 3.1416838871368925e-07, "epoch": 4.571428571428571, "percentage": 92.0, "elapsed_time": "0:40:55", "remaining_time": "0:03:33", "throughput": 9930.17, "total_tokens": 24379392}
{"current_steps": 47, "total_steps": 50, "loss": 0.6615, "lr": 1.7712749271311392e-07, "epoch": 4.666666666666667, "percentage": 94.0, "elapsed_time": "0:41:47", "remaining_time": "0:02:40", "throughput": 9930.34, "total_tokens": 24903680}
{"current_steps": 48, "total_steps": 50, "loss": 0.6738, "lr": 7.885298685522235e-08, "epoch": 4.761904761904762, "percentage": 96.0, "elapsed_time": "0:42:40", "remaining_time": "0:01:46", "throughput": 9930.15, "total_tokens": 25427968}
{"current_steps": 49, "total_steps": 50, "loss": 0.679, "lr": 1.973271571728441e-08, "epoch": 4.857142857142857, "percentage": 98.0, "elapsed_time": "0:43:33", "remaining_time": "0:00:53", "throughput": 9930.42, "total_tokens": 25952256}
{"current_steps": 50, "total_steps": 50, "loss": 0.6713, "lr": 0.0, "epoch": 4.9523809523809526, "percentage": 100.0, "elapsed_time": "0:44:26", "remaining_time": "0:00:00", "throughput": 9930.42, "total_tokens": 26476544}