C / trainer_log.jsonl
Linksome's picture
Upload folder using huggingface_hub
72d1a32 verified
{"current_steps": 1, "total_steps": 105, "loss": 2.2948, "lr": 4.9988810807087584e-05, "epoch": 0.047619047619047616, "percentage": 0.95, "elapsed_time": "0:00:26", "remaining_time": "0:45:28", "throughput": 9990.19, "total_tokens": 262144}
{"current_steps": 2, "total_steps": 105, "loss": 3.146, "lr": 4.9955253244193375e-05, "epoch": 0.09523809523809523, "percentage": 1.9, "elapsed_time": "0:00:50", "remaining_time": "0:43:34", "throughput": 10327.0, "total_tokens": 524288}
{"current_steps": 3, "total_steps": 105, "loss": 2.5121, "lr": 4.989935734988098e-05, "epoch": 0.14285714285714285, "percentage": 2.86, "elapsed_time": "0:01:14", "remaining_time": "0:42:29", "throughput": 10489.15, "total_tokens": 786432}
{"current_steps": 4, "total_steps": 105, "loss": 2.4086, "lr": 4.9821173158545936e-05, "epoch": 0.19047619047619047, "percentage": 3.81, "elapsed_time": "0:01:39", "remaining_time": "0:41:43", "throughput": 10575.69, "total_tokens": 1048576}
{"current_steps": 5, "total_steps": 105, "loss": 2.5585, "lr": 4.972077065562821e-05, "epoch": 0.23809523809523808, "percentage": 4.76, "elapsed_time": "0:02:03", "remaining_time": "0:41:06", "throughput": 10628.3, "total_tokens": 1310720}
{"current_steps": 6, "total_steps": 105, "loss": 2.2115, "lr": 4.959823971496574e-05, "epoch": 0.2857142857142857, "percentage": 5.71, "elapsed_time": "0:02:27", "remaining_time": "0:40:33", "throughput": 10662.74, "total_tokens": 1572864}
{"current_steps": 7, "total_steps": 105, "loss": 2.0896, "lr": 4.9453690018345144e-05, "epoch": 0.3333333333333333, "percentage": 6.67, "elapsed_time": "0:02:51", "remaining_time": "0:40:03", "throughput": 10686.43, "total_tokens": 1835008}
{"current_steps": 8, "total_steps": 105, "loss": 1.8717, "lr": 4.928725095732169e-05, "epoch": 0.38095238095238093, "percentage": 7.62, "elapsed_time": "0:03:15", "remaining_time": "0:39:35", "throughput": 10704.14, "total_tokens": 2097152}
{"current_steps": 9, "total_steps": 105, "loss": 1.892, "lr": 4.909907151739633e-05, "epoch": 0.42857142857142855, "percentage": 8.57, "elapsed_time": "0:03:40", "remaining_time": "0:39:07", "throughput": 10719.55, "total_tokens": 2359296}
{"current_steps": 10, "total_steps": 105, "loss": 1.6835, "lr": 4.888932014465352e-05, "epoch": 0.47619047619047616, "percentage": 9.52, "elapsed_time": "0:04:04", "remaining_time": "0:38:41", "throughput": 10729.46, "total_tokens": 2621440}
{"current_steps": 11, "total_steps": 105, "loss": 1.771, "lr": 4.865818459497911e-05, "epoch": 0.5238095238095238, "percentage": 10.48, "elapsed_time": "0:04:28", "remaining_time": "0:38:14", "throughput": 10737.59, "total_tokens": 2883584}
{"current_steps": 12, "total_steps": 105, "loss": 1.7042, "lr": 4.8405871765993433e-05, "epoch": 0.5714285714285714, "percentage": 11.43, "elapsed_time": "0:04:52", "remaining_time": "0:37:48", "throughput": 10745.28, "total_tokens": 3145728}
{"current_steps": 13, "total_steps": 105, "loss": 1.6366, "lr": 4.813260751184992e-05, "epoch": 0.6190476190476191, "percentage": 12.38, "elapsed_time": "0:05:16", "remaining_time": "0:37:23", "throughput": 10751.74, "total_tokens": 3407872}
{"current_steps": 14, "total_steps": 105, "loss": 1.6423, "lr": 4.783863644106502e-05, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:05:41", "remaining_time": "0:36:57", "throughput": 10757.44, "total_tokens": 3670016}
{"current_steps": 15, "total_steps": 105, "loss": 1.5964, "lr": 4.752422169756048e-05, "epoch": 0.7142857142857143, "percentage": 14.29, "elapsed_time": "0:06:05", "remaining_time": "0:36:32", "throughput": 10762.36, "total_tokens": 3932160}
{"current_steps": 16, "total_steps": 105, "loss": 1.4749, "lr": 4.718964472511386e-05, "epoch": 0.7619047619047619, "percentage": 15.24, "elapsed_time": "0:06:29", "remaining_time": "0:36:06", "throughput": 10766.52, "total_tokens": 4194304}
{"current_steps": 17, "total_steps": 105, "loss": 1.4656, "lr": 4.6835205015428246e-05, "epoch": 0.8095238095238095, "percentage": 16.19, "elapsed_time": "0:06:53", "remaining_time": "0:35:42", "throughput": 10769.31, "total_tokens": 4456448}
{"current_steps": 18, "total_steps": 105, "loss": 1.4089, "lr": 4.6461219840046654e-05, "epoch": 0.8571428571428571, "percentage": 17.14, "elapsed_time": "0:07:17", "remaining_time": "0:35:16", "throughput": 10773.31, "total_tokens": 4718592}
{"current_steps": 19, "total_steps": 105, "loss": 1.3976, "lr": 4.606802396635098e-05, "epoch": 0.9047619047619048, "percentage": 18.1, "elapsed_time": "0:07:42", "remaining_time": "0:34:51", "throughput": 10776.83, "total_tokens": 4980736}
{"current_steps": 20, "total_steps": 105, "loss": 1.3894, "lr": 4.5655969357899874e-05, "epoch": 0.9523809523809523, "percentage": 19.05, "elapsed_time": "0:08:06", "remaining_time": "0:34:27", "throughput": 10779.82, "total_tokens": 5242880}
{"current_steps": 21, "total_steps": 105, "loss": 1.3524, "lr": 4.522542485937369e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:08:30", "remaining_time": "0:34:02", "throughput": 10783.48, "total_tokens": 5505024}
{"current_steps": 22, "total_steps": 105, "loss": 1.0828, "lr": 4.477677586640854e-05, "epoch": 1.0476190476190477, "percentage": 20.95, "elapsed_time": "0:08:54", "remaining_time": "0:33:37", "throughput": 10784.76, "total_tokens": 5767168}
{"current_steps": 23, "total_steps": 105, "loss": 1.0325, "lr": 4.431042398061499e-05, "epoch": 1.0952380952380953, "percentage": 21.9, "elapsed_time": "0:09:18", "remaining_time": "0:33:12", "throughput": 10785.98, "total_tokens": 6029312}
{"current_steps": 24, "total_steps": 105, "loss": 1.0385, "lr": 4.382678665009028e-05, "epoch": 1.1428571428571428, "percentage": 22.86, "elapsed_time": "0:09:43", "remaining_time": "0:32:48", "throughput": 10788.6, "total_tokens": 6291456}
{"current_steps": 25, "total_steps": 105, "loss": 0.9834, "lr": 4.332629679574566e-05, "epoch": 1.1904761904761905, "percentage": 23.81, "elapsed_time": "0:10:07", "remaining_time": "0:32:23", "throughput": 10790.06, "total_tokens": 6553600}
{"current_steps": 26, "total_steps": 105, "loss": 0.9752, "lr": 4.2809402423783624e-05, "epoch": 1.2380952380952381, "percentage": 24.76, "elapsed_time": "0:10:31", "remaining_time": "0:31:58", "throughput": 10791.98, "total_tokens": 6815744}
{"current_steps": 27, "total_steps": 105, "loss": 0.9711, "lr": 4.227656622467162e-05, "epoch": 1.2857142857142856, "percentage": 25.71, "elapsed_time": "0:10:55", "remaining_time": "0:31:34", "throughput": 10793.05, "total_tokens": 7077888}
{"current_steps": 28, "total_steps": 105, "loss": 0.9585, "lr": 4.172826515897146e-05, "epoch": 1.3333333333333333, "percentage": 26.67, "elapsed_time": "0:11:20", "remaining_time": "0:31:10", "throughput": 10794.08, "total_tokens": 7340032}
{"current_steps": 29, "total_steps": 105, "loss": 0.9722, "lr": 4.116499003039499e-05, "epoch": 1.380952380952381, "percentage": 27.62, "elapsed_time": "0:11:44", "remaining_time": "0:30:45", "throughput": 10795.86, "total_tokens": 7602176}
{"current_steps": 30, "total_steps": 105, "loss": 0.9078, "lr": 4.058724504646834e-05, "epoch": 1.4285714285714286, "percentage": 28.57, "elapsed_time": "0:12:08", "remaining_time": "0:30:21", "throughput": 10796.64, "total_tokens": 7864320}
{"current_steps": 31, "total_steps": 105, "loss": 0.9536, "lr": 3.9995547367197845e-05, "epoch": 1.4761904761904763, "percentage": 29.52, "elapsed_time": "0:12:32", "remaining_time": "0:29:56", "throughput": 10797.69, "total_tokens": 8126464}
{"current_steps": 32, "total_steps": 105, "loss": 0.9287, "lr": 3.939042664214184e-05, "epoch": 1.5238095238095237, "percentage": 30.48, "elapsed_time": "0:12:56", "remaining_time": "0:29:32", "throughput": 10798.84, "total_tokens": 8388608}
{"current_steps": 33, "total_steps": 105, "loss": 0.8881, "lr": 3.8772424536302564e-05, "epoch": 1.5714285714285714, "percentage": 31.43, "elapsed_time": "0:13:20", "remaining_time": "0:29:07", "throughput": 10799.95, "total_tokens": 8650752}
{"current_steps": 34, "total_steps": 105, "loss": 0.9404, "lr": 3.814209424526262e-05, "epoch": 1.619047619047619, "percentage": 32.38, "elapsed_time": "0:13:45", "remaining_time": "0:28:43", "throughput": 10801.03, "total_tokens": 8912896}
{"current_steps": 35, "total_steps": 105, "loss": 0.9223, "lr": 3.7500000000000003e-05, "epoch": 1.6666666666666665, "percentage": 33.33, "elapsed_time": "0:14:09", "remaining_time": "0:28:18", "throughput": 10801.76, "total_tokens": 9175040}
{"current_steps": 36, "total_steps": 105, "loss": 0.8941, "lr": 3.6846716561824965e-05, "epoch": 1.7142857142857144, "percentage": 34.29, "elapsed_time": "0:14:33", "remaining_time": "0:27:54", "throughput": 10802.6, "total_tokens": 9437184}
{"current_steps": 37, "total_steps": 105, "loss": 0.8731, "lr": 3.6182828707890816e-05, "epoch": 1.7619047619047619, "percentage": 35.24, "elapsed_time": "0:14:57", "remaining_time": "0:27:30", "throughput": 10803.33, "total_tokens": 9699328}
{"current_steps": 38, "total_steps": 105, "loss": 0.9095, "lr": 3.550893070773914e-05, "epoch": 1.8095238095238095, "percentage": 36.19, "elapsed_time": "0:15:22", "remaining_time": "0:27:05", "throughput": 10803.94, "total_tokens": 9961472}
{"current_steps": 39, "total_steps": 105, "loss": 0.8725, "lr": 3.4825625791348096e-05, "epoch": 1.8571428571428572, "percentage": 37.14, "elapsed_time": "0:15:46", "remaining_time": "0:26:41", "throughput": 10804.67, "total_tokens": 10223616}
{"current_steps": 40, "total_steps": 105, "loss": 0.8955, "lr": 3.413352560915988e-05, "epoch": 1.9047619047619047, "percentage": 38.1, "elapsed_time": "0:16:10", "remaining_time": "0:26:16", "throughput": 10805.13, "total_tokens": 10485760}
{"current_steps": 41, "total_steps": 105, "loss": 0.9127, "lr": 3.343324968457076e-05, "epoch": 1.9523809523809523, "percentage": 39.05, "elapsed_time": "0:16:34", "remaining_time": "0:25:52", "throughput": 10805.71, "total_tokens": 10747904}
{"current_steps": 42, "total_steps": 105, "loss": 0.8675, "lr": 3.272542485937369e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:16:58", "remaining_time": "0:25:28", "throughput": 10806.66, "total_tokens": 11010048}
{"current_steps": 43, "total_steps": 105, "loss": 0.4855, "lr": 3.201068473265007e-05, "epoch": 2.0476190476190474, "percentage": 40.95, "elapsed_time": "0:17:23", "remaining_time": "0:25:03", "throughput": 10807.26, "total_tokens": 11272192}
{"current_steps": 44, "total_steps": 105, "loss": 0.4813, "lr": 3.1289669093612714e-05, "epoch": 2.0952380952380953, "percentage": 41.9, "elapsed_time": "0:17:47", "remaining_time": "0:24:39", "throughput": 10807.95, "total_tokens": 11534336}
{"current_steps": 45, "total_steps": 105, "loss": 0.4473, "lr": 3.056302334890786e-05, "epoch": 2.142857142857143, "percentage": 42.86, "elapsed_time": "0:18:11", "remaining_time": "0:24:15", "throughput": 10808.57, "total_tokens": 11796480}
{"current_steps": 46, "total_steps": 105, "loss": 0.4253, "lr": 2.9831397944888833e-05, "epoch": 2.1904761904761907, "percentage": 43.81, "elapsed_time": "0:18:35", "remaining_time": "0:23:50", "throughput": 10809.38, "total_tokens": 12058624}
{"current_steps": 47, "total_steps": 105, "loss": 0.4185, "lr": 2.9095447785378443e-05, "epoch": 2.238095238095238, "percentage": 44.76, "elapsed_time": "0:18:59", "remaining_time": "0:23:26", "throughput": 10810.41, "total_tokens": 12320768}
{"current_steps": 48, "total_steps": 105, "loss": 0.4263, "lr": 2.8355831645441388e-05, "epoch": 2.2857142857142856, "percentage": 45.71, "elapsed_time": "0:19:23", "remaining_time": "0:23:02", "throughput": 10810.95, "total_tokens": 12582912}
{"current_steps": 49, "total_steps": 105, "loss": 0.3913, "lr": 2.761321158169134e-05, "epoch": 2.3333333333333335, "percentage": 46.67, "elapsed_time": "0:19:48", "remaining_time": "0:22:37", "throughput": 10811.6, "total_tokens": 12845056}
{"current_steps": 50, "total_steps": 105, "loss": 0.3806, "lr": 2.686825233966061e-05, "epoch": 2.380952380952381, "percentage": 47.62, "elapsed_time": "0:20:12", "remaining_time": "0:22:13", "throughput": 10812.0, "total_tokens": 13107200}
{"current_steps": 51, "total_steps": 105, "loss": 0.361, "lr": 2.6121620758762877e-05, "epoch": 2.4285714285714284, "percentage": 48.57, "elapsed_time": "0:20:36", "remaining_time": "0:21:49", "throughput": 10812.56, "total_tokens": 13369344}
{"current_steps": 52, "total_steps": 105, "loss": 0.3718, "lr": 2.5373985175381594e-05, "epoch": 2.4761904761904763, "percentage": 49.52, "elapsed_time": "0:21:00", "remaining_time": "0:21:24", "throughput": 10812.83, "total_tokens": 13631488}
{"current_steps": 53, "total_steps": 105, "loss": 0.3773, "lr": 2.4626014824618415e-05, "epoch": 2.5238095238095237, "percentage": 50.48, "elapsed_time": "0:21:24", "remaining_time": "0:21:00", "throughput": 10812.68, "total_tokens": 13893632}
{"current_steps": 54, "total_steps": 105, "loss": 0.3633, "lr": 2.3878379241237136e-05, "epoch": 2.571428571428571, "percentage": 51.43, "elapsed_time": "0:21:49", "remaining_time": "0:20:36", "throughput": 10812.39, "total_tokens": 14155776}
{"current_steps": 55, "total_steps": 105, "loss": 0.3578, "lr": 2.3131747660339394e-05, "epoch": 2.619047619047619, "percentage": 52.38, "elapsed_time": "0:22:13", "remaining_time": "0:20:12", "throughput": 10812.69, "total_tokens": 14417920}
{"current_steps": 56, "total_steps": 105, "loss": 0.3634, "lr": 2.238678841830867e-05, "epoch": 2.6666666666666665, "percentage": 53.33, "elapsed_time": "0:22:37", "remaining_time": "0:19:47", "throughput": 10812.59, "total_tokens": 14680064}
{"current_steps": 57, "total_steps": 105, "loss": 0.3431, "lr": 2.164416835455862e-05, "epoch": 2.7142857142857144, "percentage": 54.29, "elapsed_time": "0:23:01", "remaining_time": "0:19:23", "throughput": 10812.89, "total_tokens": 14942208}
{"current_steps": 58, "total_steps": 105, "loss": 0.3352, "lr": 2.090455221462156e-05, "epoch": 2.761904761904762, "percentage": 55.24, "elapsed_time": "0:23:26", "remaining_time": "0:18:59", "throughput": 10813.11, "total_tokens": 15204352}
{"current_steps": 59, "total_steps": 105, "loss": 0.3508, "lr": 2.0168602055111173e-05, "epoch": 2.8095238095238093, "percentage": 56.19, "elapsed_time": "0:23:50", "remaining_time": "0:18:35", "throughput": 10813.06, "total_tokens": 15466496}
{"current_steps": 60, "total_steps": 105, "loss": 0.365, "lr": 1.9436976651092144e-05, "epoch": 2.857142857142857, "percentage": 57.14, "elapsed_time": "0:24:14", "remaining_time": "0:18:10", "throughput": 10813.34, "total_tokens": 15728640}
{"current_steps": 61, "total_steps": 105, "loss": 0.336, "lr": 1.871033090638729e-05, "epoch": 2.9047619047619047, "percentage": 58.1, "elapsed_time": "0:24:38", "remaining_time": "0:17:46", "throughput": 10813.19, "total_tokens": 15990784}
{"current_steps": 62, "total_steps": 105, "loss": 0.3493, "lr": 1.7989315267349936e-05, "epoch": 2.9523809523809526, "percentage": 59.05, "elapsed_time": "0:25:03", "remaining_time": "0:17:22", "throughput": 10813.32, "total_tokens": 16252928}
{"current_steps": 63, "total_steps": 105, "loss": 0.3339, "lr": 1.7274575140626318e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:25:27", "remaining_time": "0:16:58", "throughput": 10813.91, "total_tokens": 16515072}
{"current_steps": 64, "total_steps": 105, "loss": 0.1579, "lr": 1.6566750315429254e-05, "epoch": 3.0476190476190474, "percentage": 60.95, "elapsed_time": "0:25:51", "remaining_time": "0:16:33", "throughput": 10814.13, "total_tokens": 16777216}
{"current_steps": 65, "total_steps": 105, "loss": 0.1513, "lr": 1.5866474390840125e-05, "epoch": 3.0952380952380953, "percentage": 61.9, "elapsed_time": "0:26:15", "remaining_time": "0:16:09", "throughput": 10814.57, "total_tokens": 17039360}
{"current_steps": 66, "total_steps": 105, "loss": 0.131, "lr": 1.5174374208651912e-05, "epoch": 3.142857142857143, "percentage": 62.86, "elapsed_time": "0:26:39", "remaining_time": "0:15:45", "throughput": 10814.62, "total_tokens": 17301504}
{"current_steps": 67, "total_steps": 105, "loss": 0.1294, "lr": 1.4491069292260868e-05, "epoch": 3.1904761904761907, "percentage": 63.81, "elapsed_time": "0:27:04", "remaining_time": "0:15:21", "throughput": 10814.98, "total_tokens": 17563648}
{"current_steps": 68, "total_steps": 105, "loss": 0.1271, "lr": 1.3817171292109183e-05, "epoch": 3.238095238095238, "percentage": 64.76, "elapsed_time": "0:27:28", "remaining_time": "0:14:56", "throughput": 10815.25, "total_tokens": 17825792}
{"current_steps": 69, "total_steps": 105, "loss": 0.1239, "lr": 1.3153283438175034e-05, "epoch": 3.2857142857142856, "percentage": 65.71, "elapsed_time": "0:27:52", "remaining_time": "0:14:32", "throughput": 10815.38, "total_tokens": 18087936}
{"current_steps": 70, "total_steps": 105, "loss": 0.1269, "lr": 1.2500000000000006e-05, "epoch": 3.3333333333333335, "percentage": 66.67, "elapsed_time": "0:28:16", "remaining_time": "0:14:08", "throughput": 10815.51, "total_tokens": 18350080}
{"current_steps": 71, "total_steps": 105, "loss": 0.1169, "lr": 1.185790575473738e-05, "epoch": 3.380952380952381, "percentage": 67.62, "elapsed_time": "0:28:40", "remaining_time": "0:13:44", "throughput": 10815.89, "total_tokens": 18612224}
{"current_steps": 72, "total_steps": 105, "loss": 0.1165, "lr": 1.122757546369744e-05, "epoch": 3.4285714285714284, "percentage": 68.57, "elapsed_time": "0:29:05", "remaining_time": "0:13:19", "throughput": 10816.12, "total_tokens": 18874368}
{"current_steps": 73, "total_steps": 105, "loss": 0.1121, "lr": 1.0609573357858166e-05, "epoch": 3.4761904761904763, "percentage": 69.52, "elapsed_time": "0:29:29", "remaining_time": "0:12:55", "throughput": 10816.28, "total_tokens": 19136512}
{"current_steps": 74, "total_steps": 105, "loss": 0.1151, "lr": 1.0004452632802158e-05, "epoch": 3.5238095238095237, "percentage": 70.48, "elapsed_time": "0:29:53", "remaining_time": "0:12:31", "throughput": 10816.42, "total_tokens": 19398656}
{"current_steps": 75, "total_steps": 105, "loss": 0.1094, "lr": 9.412754953531663e-06, "epoch": 3.571428571428571, "percentage": 71.43, "elapsed_time": "0:30:17", "remaining_time": "0:12:07", "throughput": 10816.53, "total_tokens": 19660800}
{"current_steps": 76, "total_steps": 105, "loss": 0.1049, "lr": 8.835009969605012e-06, "epoch": 3.619047619047619, "percentage": 72.38, "elapsed_time": "0:30:41", "remaining_time": "0:11:42", "throughput": 10816.55, "total_tokens": 19922944}
{"current_steps": 77, "total_steps": 105, "loss": 0.1052, "lr": 8.271734841028553e-06, "epoch": 3.6666666666666665, "percentage": 73.33, "elapsed_time": "0:31:06", "remaining_time": "0:11:18", "throughput": 10816.86, "total_tokens": 20185088}
{"current_steps": 78, "total_steps": 105, "loss": 0.1161, "lr": 7.723433775328384e-06, "epoch": 3.7142857142857144, "percentage": 74.29, "elapsed_time": "0:31:30", "remaining_time": "0:10:54", "throughput": 10817.11, "total_tokens": 20447232}
{"current_steps": 79, "total_steps": 105, "loss": 0.1089, "lr": 7.190597576216385e-06, "epoch": 3.761904761904762, "percentage": 75.24, "elapsed_time": "0:31:54", "remaining_time": "0:10:30", "throughput": 10817.28, "total_tokens": 20709376}
{"current_steps": 80, "total_steps": 105, "loss": 0.1117, "lr": 6.673703204254347e-06, "epoch": 3.8095238095238093, "percentage": 76.19, "elapsed_time": "0:32:18", "remaining_time": "0:10:05", "throughput": 10817.64, "total_tokens": 20971520}
{"current_steps": 81, "total_steps": 105, "loss": 0.1093, "lr": 6.173213349909729e-06, "epoch": 3.857142857142857, "percentage": 77.14, "elapsed_time": "0:32:42", "remaining_time": "0:09:41", "throughput": 10817.84, "total_tokens": 21233664}
{"current_steps": 82, "total_steps": 105, "loss": 0.1013, "lr": 5.689576019385015e-06, "epoch": 3.9047619047619047, "percentage": 78.1, "elapsed_time": "0:33:07", "remaining_time": "0:09:17", "throughput": 10818.05, "total_tokens": 21495808}
{"current_steps": 83, "total_steps": 105, "loss": 0.0984, "lr": 5.223224133591476e-06, "epoch": 3.9523809523809526, "percentage": 79.05, "elapsed_time": "0:33:31", "remaining_time": "0:08:53", "throughput": 10818.12, "total_tokens": 21757952}
{"current_steps": 84, "total_steps": 105, "loss": 0.0965, "lr": 4.7745751406263165e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:33:55", "remaining_time": "0:08:28", "throughput": 10818.48, "total_tokens": 22020096}
{"current_steps": 85, "total_steps": 105, "loss": 0.0413, "lr": 4.344030642100133e-06, "epoch": 4.0476190476190474, "percentage": 80.95, "elapsed_time": "0:34:19", "remaining_time": "0:08:04", "throughput": 10818.43, "total_tokens": 22282240}
{"current_steps": 86, "total_steps": 105, "loss": 0.036, "lr": 3.931976033649021e-06, "epoch": 4.095238095238095, "percentage": 81.9, "elapsed_time": "0:34:43", "remaining_time": "0:07:40", "throughput": 10818.49, "total_tokens": 22544384}
{"current_steps": 87, "total_steps": 105, "loss": 0.0384, "lr": 3.5387801599533475e-06, "epoch": 4.142857142857143, "percentage": 82.86, "elapsed_time": "0:35:08", "remaining_time": "0:07:16", "throughput": 10818.62, "total_tokens": 22806528}
{"current_steps": 88, "total_steps": 105, "loss": 0.0341, "lr": 3.164794984571759e-06, "epoch": 4.190476190476191, "percentage": 83.81, "elapsed_time": "0:35:32", "remaining_time": "0:06:51", "throughput": 10818.5, "total_tokens": 23068672}
{"current_steps": 89, "total_steps": 105, "loss": 0.0361, "lr": 2.8103552748861476e-06, "epoch": 4.238095238095238, "percentage": 84.76, "elapsed_time": "0:35:56", "remaining_time": "0:06:27", "throughput": 10818.56, "total_tokens": 23330816}
{"current_steps": 90, "total_steps": 105, "loss": 0.0299, "lr": 2.475778302439524e-06, "epoch": 4.285714285714286, "percentage": 85.71, "elapsed_time": "0:36:20", "remaining_time": "0:06:03", "throughput": 10818.51, "total_tokens": 23592960}
{"current_steps": 91, "total_steps": 105, "loss": 0.0316, "lr": 2.1613635589349756e-06, "epoch": 4.333333333333333, "percentage": 86.67, "elapsed_time": "0:36:44", "remaining_time": "0:05:39", "throughput": 10818.73, "total_tokens": 23855104}
{"current_steps": 92, "total_steps": 105, "loss": 0.0284, "lr": 1.8673924881500826e-06, "epoch": 4.380952380952381, "percentage": 87.62, "elapsed_time": "0:37:09", "remaining_time": "0:05:14", "throughput": 10818.71, "total_tokens": 24117248}
{"current_steps": 93, "total_steps": 105, "loss": 0.0308, "lr": 1.59412823400657e-06, "epoch": 4.428571428571429, "percentage": 88.57, "elapsed_time": "0:37:33", "remaining_time": "0:04:50", "throughput": 10818.73, "total_tokens": 24379392}
{"current_steps": 94, "total_steps": 105, "loss": 0.0293, "lr": 1.3418154050208936e-06, "epoch": 4.476190476190476, "percentage": 89.52, "elapsed_time": "0:37:57", "remaining_time": "0:04:26", "throughput": 10818.89, "total_tokens": 24641536}
{"current_steps": 95, "total_steps": 105, "loss": 0.0305, "lr": 1.1106798553464804e-06, "epoch": 4.523809523809524, "percentage": 90.48, "elapsed_time": "0:38:21", "remaining_time": "0:04:02", "throughput": 10819.12, "total_tokens": 24903680}
{"current_steps": 96, "total_steps": 105, "loss": 0.0272, "lr": 9.009284826036691e-07, "epoch": 4.571428571428571, "percentage": 91.43, "elapsed_time": "0:38:46", "remaining_time": "0:03:38", "throughput": 10819.26, "total_tokens": 25165824}
{"current_steps": 97, "total_steps": 105, "loss": 0.0275, "lr": 7.127490426783123e-07, "epoch": 4.619047619047619, "percentage": 92.38, "elapsed_time": "0:39:10", "remaining_time": "0:03:13", "throughput": 10819.44, "total_tokens": 25427968}
{"current_steps": 98, "total_steps": 105, "loss": 0.0312, "lr": 5.463099816548579e-07, "epoch": 4.666666666666667, "percentage": 93.33, "elapsed_time": "0:39:34", "remaining_time": "0:02:49", "throughput": 10819.57, "total_tokens": 25690112}
{"current_steps": 99, "total_steps": 105, "loss": 0.03, "lr": 4.0176028503425835e-07, "epoch": 4.714285714285714, "percentage": 94.29, "elapsed_time": "0:39:58", "remaining_time": "0:02:25", "throughput": 10819.71, "total_tokens": 25952256}
{"current_steps": 100, "total_steps": 105, "loss": 0.0276, "lr": 2.7922934437178695e-07, "epoch": 4.761904761904762, "percentage": 95.24, "elapsed_time": "0:40:22", "remaining_time": "0:02:01", "throughput": 10819.75, "total_tokens": 26214400}
{"current_steps": 101, "total_steps": 105, "loss": 0.0309, "lr": 1.7882684145406614e-07, "epoch": 4.809523809523809, "percentage": 96.19, "elapsed_time": "0:40:47", "remaining_time": "0:01:36", "throughput": 10819.9, "total_tokens": 26476544}
{"current_steps": 102, "total_steps": 105, "loss": 0.0266, "lr": 1.006426501190233e-07, "epoch": 4.857142857142857, "percentage": 97.14, "elapsed_time": "0:41:11", "remaining_time": "0:01:12", "throughput": 10820.0, "total_tokens": 26738688}
{"current_steps": 103, "total_steps": 105, "loss": 0.0282, "lr": 4.474675580662113e-08, "epoch": 4.904761904761905, "percentage": 98.1, "elapsed_time": "0:41:35", "remaining_time": "0:00:48", "throughput": 10820.03, "total_tokens": 27000832}
{"current_steps": 104, "total_steps": 105, "loss": 0.0274, "lr": 1.1189192912416934e-08, "epoch": 4.9523809523809526, "percentage": 99.05, "elapsed_time": "0:41:59", "remaining_time": "0:00:24", "throughput": 10820.13, "total_tokens": 27262976}
{"current_steps": 105, "total_steps": 105, "loss": 0.0261, "lr": 0.0, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:42:23", "remaining_time": "0:00:00", "throughput": 10820.27, "total_tokens": 27525120}