File size: 25,354 Bytes
1125f64 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | {"current_steps": 1, "total_steps": 105, "loss": 2.5468, "lr": 4.9988810807087584e-05, "epoch": 0.047619047619047616, "percentage": 0.95, "elapsed_time": "0:00:25", "remaining_time": "0:45:02", "throughput": 10089.28, "total_tokens": 262144}
{"current_steps": 2, "total_steps": 105, "loss": 2.2211, "lr": 4.9955253244193375e-05, "epoch": 0.09523809523809523, "percentage": 1.9, "elapsed_time": "0:00:50", "remaining_time": "0:43:28", "throughput": 10351.56, "total_tokens": 524288}
{"current_steps": 3, "total_steps": 105, "loss": 2.9797, "lr": 4.989935734988098e-05, "epoch": 0.14285714285714285, "percentage": 2.86, "elapsed_time": "0:01:15", "remaining_time": "0:42:35", "throughput": 10464.11, "total_tokens": 786432}
{"current_steps": 4, "total_steps": 105, "loss": 1.8267, "lr": 4.9821173158545936e-05, "epoch": 0.19047619047619047, "percentage": 3.81, "elapsed_time": "0:01:39", "remaining_time": "0:41:57", "throughput": 10518.44, "total_tokens": 1048576}
{"current_steps": 5, "total_steps": 105, "loss": 1.5966, "lr": 4.972077065562821e-05, "epoch": 0.23809523809523808, "percentage": 4.76, "elapsed_time": "0:02:04", "remaining_time": "0:41:25", "throughput": 10548.74, "total_tokens": 1310720}
{"current_steps": 6, "total_steps": 105, "loss": 1.642, "lr": 4.959823971496574e-05, "epoch": 0.2857142857142857, "percentage": 5.71, "elapsed_time": "0:02:28", "remaining_time": "0:40:56", "throughput": 10564.97, "total_tokens": 1572864}
{"current_steps": 7, "total_steps": 105, "loss": 1.4425, "lr": 4.9453690018345144e-05, "epoch": 0.3333333333333333, "percentage": 6.67, "elapsed_time": "0:02:53", "remaining_time": "0:40:27", "throughput": 10584.64, "total_tokens": 1835008}
{"current_steps": 8, "total_steps": 105, "loss": 1.3136, "lr": 4.928725095732169e-05, "epoch": 0.38095238095238093, "percentage": 7.62, "elapsed_time": "0:03:17", "remaining_time": "0:39:59", "throughput": 10597.7, "total_tokens": 2097152}
{"current_steps": 9, "total_steps": 105, "loss": 1.2788, "lr": 4.909907151739633e-05, "epoch": 0.42857142857142855, "percentage": 8.57, "elapsed_time": "0:03:42", "remaining_time": "0:39:32", "throughput": 10607.98, "total_tokens": 2359296}
{"current_steps": 10, "total_steps": 105, "loss": 1.1824, "lr": 4.888932014465352e-05, "epoch": 0.47619047619047616, "percentage": 9.52, "elapsed_time": "0:04:06", "remaining_time": "0:39:05", "throughput": 10617.33, "total_tokens": 2621440}
{"current_steps": 11, "total_steps": 105, "loss": 1.1745, "lr": 4.865818459497911e-05, "epoch": 0.5238095238095238, "percentage": 10.48, "elapsed_time": "0:04:31", "remaining_time": "0:38:39", "throughput": 10625.92, "total_tokens": 2883584}
{"current_steps": 12, "total_steps": 105, "loss": 1.1362, "lr": 4.8405871765993433e-05, "epoch": 0.5714285714285714, "percentage": 11.43, "elapsed_time": "0:04:55", "remaining_time": "0:38:12", "throughput": 10633.79, "total_tokens": 3145728}
{"current_steps": 13, "total_steps": 105, "loss": 1.0672, "lr": 4.813260751184992e-05, "epoch": 0.6190476190476191, "percentage": 12.38, "elapsed_time": "0:05:20", "remaining_time": "0:37:47", "throughput": 10636.89, "total_tokens": 3407872}
{"current_steps": 14, "total_steps": 105, "loss": 1.1173, "lr": 4.783863644106502e-05, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:05:44", "remaining_time": "0:37:21", "throughput": 10642.07, "total_tokens": 3670016}
{"current_steps": 15, "total_steps": 105, "loss": 1.1138, "lr": 4.752422169756048e-05, "epoch": 0.7142857142857143, "percentage": 14.29, "elapsed_time": "0:06:09", "remaining_time": "0:36:56", "throughput": 10644.01, "total_tokens": 3932160}
{"current_steps": 16, "total_steps": 105, "loss": 1.0602, "lr": 4.718964472511386e-05, "epoch": 0.7619047619047619, "percentage": 15.24, "elapsed_time": "0:06:33", "remaining_time": "0:36:31", "throughput": 10647.1, "total_tokens": 4194304}
{"current_steps": 17, "total_steps": 105, "loss": 1.0543, "lr": 4.6835205015428246e-05, "epoch": 0.8095238095238095, "percentage": 16.19, "elapsed_time": "0:06:58", "remaining_time": "0:36:06", "throughput": 10649.94, "total_tokens": 4456448}
{"current_steps": 18, "total_steps": 105, "loss": 1.0356, "lr": 4.6461219840046654e-05, "epoch": 0.8571428571428571, "percentage": 17.14, "elapsed_time": "0:07:22", "remaining_time": "0:35:40", "throughput": 10653.69, "total_tokens": 4718592}
{"current_steps": 19, "total_steps": 105, "loss": 1.0432, "lr": 4.606802396635098e-05, "epoch": 0.9047619047619048, "percentage": 18.1, "elapsed_time": "0:07:47", "remaining_time": "0:35:15", "throughput": 10658.09, "total_tokens": 4980736}
{"current_steps": 20, "total_steps": 105, "loss": 1.0264, "lr": 4.5655969357899874e-05, "epoch": 0.9523809523809523, "percentage": 19.05, "elapsed_time": "0:08:11", "remaining_time": "0:34:50", "throughput": 10660.13, "total_tokens": 5242880}
{"current_steps": 21, "total_steps": 105, "loss": 1.0231, "lr": 4.522542485937369e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:08:36", "remaining_time": "0:34:25", "throughput": 10662.73, "total_tokens": 5505024}
{"current_steps": 22, "total_steps": 105, "loss": 0.7101, "lr": 4.477677586640854e-05, "epoch": 1.0476190476190477, "percentage": 20.95, "elapsed_time": "0:09:00", "remaining_time": "0:34:00", "throughput": 10664.53, "total_tokens": 5767168}
{"current_steps": 23, "total_steps": 105, "loss": 0.6925, "lr": 4.431042398061499e-05, "epoch": 1.0952380952380953, "percentage": 21.9, "elapsed_time": "0:09:25", "remaining_time": "0:33:35", "throughput": 10667.36, "total_tokens": 6029312}
{"current_steps": 24, "total_steps": 105, "loss": 0.6875, "lr": 4.382678665009028e-05, "epoch": 1.1428571428571428, "percentage": 22.86, "elapsed_time": "0:09:49", "remaining_time": "0:33:10", "throughput": 10668.82, "total_tokens": 6291456}
{"current_steps": 25, "total_steps": 105, "loss": 0.6625, "lr": 4.332629679574566e-05, "epoch": 1.1904761904761905, "percentage": 23.81, "elapsed_time": "0:10:14", "remaining_time": "0:32:45", "throughput": 10669.75, "total_tokens": 6553600}
{"current_steps": 26, "total_steps": 105, "loss": 0.657, "lr": 4.2809402423783624e-05, "epoch": 1.2380952380952381, "percentage": 24.76, "elapsed_time": "0:10:38", "remaining_time": "0:32:20", "throughput": 10671.73, "total_tokens": 6815744}
{"current_steps": 27, "total_steps": 105, "loss": 0.6656, "lr": 4.227656622467162e-05, "epoch": 1.2857142857142856, "percentage": 25.71, "elapsed_time": "0:11:03", "remaining_time": "0:31:55", "throughput": 10672.57, "total_tokens": 7077888}
{"current_steps": 28, "total_steps": 105, "loss": 0.6416, "lr": 4.172826515897146e-05, "epoch": 1.3333333333333333, "percentage": 26.67, "elapsed_time": "0:11:27", "remaining_time": "0:31:31", "throughput": 10673.98, "total_tokens": 7340032}
{"current_steps": 29, "total_steps": 105, "loss": 0.6325, "lr": 4.116499003039499e-05, "epoch": 1.380952380952381, "percentage": 27.62, "elapsed_time": "0:11:52", "remaining_time": "0:31:06", "throughput": 10675.12, "total_tokens": 7602176}
{"current_steps": 30, "total_steps": 105, "loss": 0.6267, "lr": 4.058724504646834e-05, "epoch": 1.4285714285714286, "percentage": 28.57, "elapsed_time": "0:12:16", "remaining_time": "0:30:41", "throughput": 10676.02, "total_tokens": 7864320}
{"current_steps": 31, "total_steps": 105, "loss": 0.6483, "lr": 3.9995547367197845e-05, "epoch": 1.4761904761904763, "percentage": 29.52, "elapsed_time": "0:12:41", "remaining_time": "0:30:16", "throughput": 10676.69, "total_tokens": 8126464}
{"current_steps": 32, "total_steps": 105, "loss": 0.6241, "lr": 3.939042664214184e-05, "epoch": 1.5238095238095237, "percentage": 30.48, "elapsed_time": "0:13:05", "remaining_time": "0:29:52", "throughput": 10677.92, "total_tokens": 8388608}
{"current_steps": 33, "total_steps": 105, "loss": 0.5846, "lr": 3.8772424536302564e-05, "epoch": 1.5714285714285714, "percentage": 31.43, "elapsed_time": "0:13:30", "remaining_time": "0:29:27", "throughput": 10679.02, "total_tokens": 8650752}
{"current_steps": 34, "total_steps": 105, "loss": 0.65, "lr": 3.814209424526262e-05, "epoch": 1.619047619047619, "percentage": 32.38, "elapsed_time": "0:13:54", "remaining_time": "0:29:02", "throughput": 10680.24, "total_tokens": 8912896}
{"current_steps": 35, "total_steps": 105, "loss": 0.6269, "lr": 3.7500000000000003e-05, "epoch": 1.6666666666666665, "percentage": 33.33, "elapsed_time": "0:14:19", "remaining_time": "0:28:38", "throughput": 10680.15, "total_tokens": 9175040}
{"current_steps": 36, "total_steps": 105, "loss": 0.6308, "lr": 3.6846716561824965e-05, "epoch": 1.7142857142857144, "percentage": 34.29, "elapsed_time": "0:14:43", "remaining_time": "0:28:13", "throughput": 10680.81, "total_tokens": 9437184}
{"current_steps": 37, "total_steps": 105, "loss": 0.5917, "lr": 3.6182828707890816e-05, "epoch": 1.7619047619047619, "percentage": 35.24, "elapsed_time": "0:15:08", "remaining_time": "0:27:48", "throughput": 10681.23, "total_tokens": 9699328}
{"current_steps": 38, "total_steps": 105, "loss": 0.6359, "lr": 3.550893070773914e-05, "epoch": 1.8095238095238095, "percentage": 36.19, "elapsed_time": "0:15:32", "remaining_time": "0:27:24", "throughput": 10680.98, "total_tokens": 9961472}
{"current_steps": 39, "total_steps": 105, "loss": 0.615, "lr": 3.4825625791348096e-05, "epoch": 1.8571428571428572, "percentage": 37.14, "elapsed_time": "0:15:57", "remaining_time": "0:26:59", "throughput": 10681.64, "total_tokens": 10223616}
{"current_steps": 40, "total_steps": 105, "loss": 0.636, "lr": 3.413352560915988e-05, "epoch": 1.9047619047619047, "percentage": 38.1, "elapsed_time": "0:16:21", "remaining_time": "0:26:35", "throughput": 10682.13, "total_tokens": 10485760}
{"current_steps": 41, "total_steps": 105, "loss": 0.6496, "lr": 3.343324968457076e-05, "epoch": 1.9523809523809523, "percentage": 39.05, "elapsed_time": "0:16:46", "remaining_time": "0:26:10", "throughput": 10682.11, "total_tokens": 10747904}
{"current_steps": 42, "total_steps": 105, "loss": 0.6261, "lr": 3.272542485937369e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:17:10", "remaining_time": "0:25:45", "throughput": 10682.97, "total_tokens": 11010048}
{"current_steps": 43, "total_steps": 105, "loss": 0.3074, "lr": 3.201068473265007e-05, "epoch": 2.0476190476190474, "percentage": 40.95, "elapsed_time": "0:17:35", "remaining_time": "0:25:21", "throughput": 10683.29, "total_tokens": 11272192}
{"current_steps": 44, "total_steps": 105, "loss": 0.298, "lr": 3.1289669093612714e-05, "epoch": 2.0952380952380953, "percentage": 41.9, "elapsed_time": "0:17:59", "remaining_time": "0:24:56", "throughput": 10683.38, "total_tokens": 11534336}
{"current_steps": 45, "total_steps": 105, "loss": 0.2688, "lr": 3.056302334890786e-05, "epoch": 2.142857142857143, "percentage": 42.86, "elapsed_time": "0:18:24", "remaining_time": "0:24:32", "throughput": 10683.75, "total_tokens": 11796480}
{"current_steps": 46, "total_steps": 105, "loss": 0.2642, "lr": 2.9831397944888833e-05, "epoch": 2.1904761904761907, "percentage": 43.81, "elapsed_time": "0:18:48", "remaining_time": "0:24:07", "throughput": 10684.41, "total_tokens": 12058624}
{"current_steps": 47, "total_steps": 105, "loss": 0.2657, "lr": 2.9095447785378443e-05, "epoch": 2.238095238095238, "percentage": 44.76, "elapsed_time": "0:19:13", "remaining_time": "0:23:42", "throughput": 10684.84, "total_tokens": 12320768}
{"current_steps": 48, "total_steps": 105, "loss": 0.2681, "lr": 2.8355831645441388e-05, "epoch": 2.2857142857142856, "percentage": 45.71, "elapsed_time": "0:19:37", "remaining_time": "0:23:18", "throughput": 10685.28, "total_tokens": 12582912}
{"current_steps": 49, "total_steps": 105, "loss": 0.2442, "lr": 2.761321158169134e-05, "epoch": 2.3333333333333335, "percentage": 46.67, "elapsed_time": "0:20:02", "remaining_time": "0:22:53", "throughput": 10685.19, "total_tokens": 12845056}
{"current_steps": 50, "total_steps": 105, "loss": 0.235, "lr": 2.686825233966061e-05, "epoch": 2.380952380952381, "percentage": 47.62, "elapsed_time": "0:20:26", "remaining_time": "0:22:29", "throughput": 10685.74, "total_tokens": 13107200}
{"current_steps": 51, "total_steps": 105, "loss": 0.222, "lr": 2.6121620758762877e-05, "epoch": 2.4285714285714284, "percentage": 48.57, "elapsed_time": "0:20:51", "remaining_time": "0:22:04", "throughput": 10685.44, "total_tokens": 13369344}
{"current_steps": 52, "total_steps": 105, "loss": 0.2386, "lr": 2.5373985175381594e-05, "epoch": 2.4761904761904763, "percentage": 49.52, "elapsed_time": "0:21:15", "remaining_time": "0:21:40", "throughput": 10685.66, "total_tokens": 13631488}
{"current_steps": 53, "total_steps": 105, "loss": 0.2479, "lr": 2.4626014824618415e-05, "epoch": 2.5238095238095237, "percentage": 50.48, "elapsed_time": "0:21:40", "remaining_time": "0:21:15", "throughput": 10685.77, "total_tokens": 13893632}
{"current_steps": 54, "total_steps": 105, "loss": 0.2339, "lr": 2.3878379241237136e-05, "epoch": 2.571428571428571, "percentage": 51.43, "elapsed_time": "0:22:04", "remaining_time": "0:20:51", "throughput": 10685.93, "total_tokens": 14155776}
{"current_steps": 55, "total_steps": 105, "loss": 0.2275, "lr": 2.3131747660339394e-05, "epoch": 2.619047619047619, "percentage": 52.38, "elapsed_time": "0:22:29", "remaining_time": "0:20:26", "throughput": 10686.07, "total_tokens": 14417920}
{"current_steps": 56, "total_steps": 105, "loss": 0.2382, "lr": 2.238678841830867e-05, "epoch": 2.6666666666666665, "percentage": 53.33, "elapsed_time": "0:22:53", "remaining_time": "0:20:02", "throughput": 10685.79, "total_tokens": 14680064}
{"current_steps": 57, "total_steps": 105, "loss": 0.2157, "lr": 2.164416835455862e-05, "epoch": 2.7142857142857144, "percentage": 54.29, "elapsed_time": "0:23:18", "remaining_time": "0:19:37", "throughput": 10685.6, "total_tokens": 14942208}
{"current_steps": 58, "total_steps": 105, "loss": 0.2196, "lr": 2.090455221462156e-05, "epoch": 2.761904761904762, "percentage": 55.24, "elapsed_time": "0:23:42", "remaining_time": "0:19:12", "throughput": 10685.93, "total_tokens": 15204352}
{"current_steps": 59, "total_steps": 105, "loss": 0.226, "lr": 2.0168602055111173e-05, "epoch": 2.8095238095238093, "percentage": 56.19, "elapsed_time": "0:24:07", "remaining_time": "0:18:48", "throughput": 10686.23, "total_tokens": 15466496}
{"current_steps": 60, "total_steps": 105, "loss": 0.2385, "lr": 1.9436976651092144e-05, "epoch": 2.857142857142857, "percentage": 57.14, "elapsed_time": "0:24:31", "remaining_time": "0:18:23", "throughput": 10686.44, "total_tokens": 15728640}
{"current_steps": 61, "total_steps": 105, "loss": 0.2162, "lr": 1.871033090638729e-05, "epoch": 2.9047619047619047, "percentage": 58.1, "elapsed_time": "0:24:56", "remaining_time": "0:17:59", "throughput": 10686.61, "total_tokens": 15990784}
{"current_steps": 62, "total_steps": 105, "loss": 0.2283, "lr": 1.7989315267349936e-05, "epoch": 2.9523809523809526, "percentage": 59.05, "elapsed_time": "0:25:20", "remaining_time": "0:17:34", "throughput": 10686.59, "total_tokens": 16252928}
{"current_steps": 63, "total_steps": 105, "loss": 0.216, "lr": 1.7274575140626318e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:25:45", "remaining_time": "0:17:10", "throughput": 10686.85, "total_tokens": 16515072}
{"current_steps": 64, "total_steps": 105, "loss": 0.0882, "lr": 1.6566750315429254e-05, "epoch": 3.0476190476190474, "percentage": 60.95, "elapsed_time": "0:26:09", "remaining_time": "0:16:45", "throughput": 10687.19, "total_tokens": 16777216}
{"current_steps": 65, "total_steps": 105, "loss": 0.0808, "lr": 1.5866474390840125e-05, "epoch": 3.0952380952380953, "percentage": 61.9, "elapsed_time": "0:26:34", "remaining_time": "0:16:21", "throughput": 10687.86, "total_tokens": 17039360}
{"current_steps": 66, "total_steps": 105, "loss": 0.0732, "lr": 1.5174374208651912e-05, "epoch": 3.142857142857143, "percentage": 62.86, "elapsed_time": "0:26:58", "remaining_time": "0:15:56", "throughput": 10688.15, "total_tokens": 17301504}
{"current_steps": 67, "total_steps": 105, "loss": 0.0729, "lr": 1.4491069292260868e-05, "epoch": 3.1904761904761907, "percentage": 63.81, "elapsed_time": "0:27:23", "remaining_time": "0:15:31", "throughput": 10688.63, "total_tokens": 17563648}
{"current_steps": 68, "total_steps": 105, "loss": 0.0752, "lr": 1.3817171292109183e-05, "epoch": 3.238095238095238, "percentage": 64.76, "elapsed_time": "0:27:47", "remaining_time": "0:15:07", "throughput": 10689.35, "total_tokens": 17825792}
{"current_steps": 69, "total_steps": 105, "loss": 0.0721, "lr": 1.3153283438175034e-05, "epoch": 3.2857142857142856, "percentage": 65.71, "elapsed_time": "0:28:12", "remaining_time": "0:14:42", "throughput": 10689.73, "total_tokens": 18087936}
{"current_steps": 70, "total_steps": 105, "loss": 0.0711, "lr": 1.2500000000000006e-05, "epoch": 3.3333333333333335, "percentage": 66.67, "elapsed_time": "0:28:36", "remaining_time": "0:14:18", "throughput": 10690.23, "total_tokens": 18350080}
{"current_steps": 71, "total_steps": 105, "loss": 0.0685, "lr": 1.185790575473738e-05, "epoch": 3.380952380952381, "percentage": 67.62, "elapsed_time": "0:29:00", "remaining_time": "0:13:53", "throughput": 10690.62, "total_tokens": 18612224}
{"current_steps": 72, "total_steps": 105, "loss": 0.0705, "lr": 1.122757546369744e-05, "epoch": 3.4285714285714284, "percentage": 68.57, "elapsed_time": "0:29:25", "remaining_time": "0:13:29", "throughput": 10691.03, "total_tokens": 18874368}
{"current_steps": 73, "total_steps": 105, "loss": 0.0687, "lr": 1.0609573357858166e-05, "epoch": 3.4761904761904763, "percentage": 69.52, "elapsed_time": "0:29:49", "remaining_time": "0:13:04", "throughput": 10691.23, "total_tokens": 19136512}
{"current_steps": 74, "total_steps": 105, "loss": 0.068, "lr": 1.0004452632802158e-05, "epoch": 3.5238095238095237, "percentage": 70.48, "elapsed_time": "0:30:14", "remaining_time": "0:12:40", "throughput": 10691.4, "total_tokens": 19398656}
{"current_steps": 75, "total_steps": 105, "loss": 0.0641, "lr": 9.412754953531663e-06, "epoch": 3.571428571428571, "percentage": 71.43, "elapsed_time": "0:30:38", "remaining_time": "0:12:15", "throughput": 10691.69, "total_tokens": 19660800}
{"current_steps": 76, "total_steps": 105, "loss": 0.0653, "lr": 8.835009969605012e-06, "epoch": 3.619047619047619, "percentage": 72.38, "elapsed_time": "0:31:03", "remaining_time": "0:11:51", "throughput": 10692.0, "total_tokens": 19922944}
{"current_steps": 77, "total_steps": 105, "loss": 0.0636, "lr": 8.271734841028553e-06, "epoch": 3.6666666666666665, "percentage": 73.33, "elapsed_time": "0:31:27", "remaining_time": "0:11:26", "throughput": 10692.26, "total_tokens": 20185088}
{"current_steps": 78, "total_steps": 105, "loss": 0.0677, "lr": 7.723433775328384e-06, "epoch": 3.7142857142857144, "percentage": 74.29, "elapsed_time": "0:31:52", "remaining_time": "0:11:01", "throughput": 10692.3, "total_tokens": 20447232}
{"current_steps": 79, "total_steps": 105, "loss": 0.0653, "lr": 7.190597576216385e-06, "epoch": 3.761904761904762, "percentage": 75.24, "elapsed_time": "0:32:16", "remaining_time": "0:10:37", "throughput": 10692.79, "total_tokens": 20709376}
{"current_steps": 80, "total_steps": 105, "loss": 0.0686, "lr": 6.673703204254347e-06, "epoch": 3.8095238095238093, "percentage": 76.19, "elapsed_time": "0:32:41", "remaining_time": "0:10:12", "throughput": 10693.08, "total_tokens": 20971520}
{"current_steps": 81, "total_steps": 105, "loss": 0.0613, "lr": 6.173213349909729e-06, "epoch": 3.857142857142857, "percentage": 77.14, "elapsed_time": "0:33:05", "remaining_time": "0:09:48", "throughput": 10693.29, "total_tokens": 21233664}
{"current_steps": 82, "total_steps": 105, "loss": 0.063, "lr": 5.689576019385015e-06, "epoch": 3.9047619047619047, "percentage": 78.1, "elapsed_time": "0:33:30", "remaining_time": "0:09:23", "throughput": 10693.49, "total_tokens": 21495808}
{"current_steps": 83, "total_steps": 105, "loss": 0.0587, "lr": 5.223224133591476e-06, "epoch": 3.9523809523809526, "percentage": 79.05, "elapsed_time": "0:33:54", "remaining_time": "0:08:59", "throughput": 10693.75, "total_tokens": 21757952}
{"current_steps": 84, "total_steps": 105, "loss": 0.0567, "lr": 4.7745751406263165e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:34:19", "remaining_time": "0:08:34", "throughput": 10694.44, "total_tokens": 22020096}
{"current_steps": 85, "total_steps": 105, "loss": 0.0234, "lr": 4.344030642100133e-06, "epoch": 4.0476190476190474, "percentage": 80.95, "elapsed_time": "0:34:43", "remaining_time": "0:08:10", "throughput": 10694.68, "total_tokens": 22282240}
{"current_steps": 86, "total_steps": 105, "loss": 0.0209, "lr": 3.931976033649021e-06, "epoch": 4.095238095238095, "percentage": 81.9, "elapsed_time": "0:35:07", "remaining_time": "0:07:45", "throughput": 10694.85, "total_tokens": 22544384}
{"current_steps": 87, "total_steps": 105, "loss": 0.0223, "lr": 3.5387801599533475e-06, "epoch": 4.142857142857143, "percentage": 82.86, "elapsed_time": "0:35:32", "remaining_time": "0:07:21", "throughput": 10695.0, "total_tokens": 22806528}
{"current_steps": 88, "total_steps": 105, "loss": 0.0194, "lr": 3.164794984571759e-06, "epoch": 4.190476190476191, "percentage": 83.81, "elapsed_time": "0:35:56", "remaining_time": "0:06:56", "throughput": 10695.51, "total_tokens": 23068672}
{"current_steps": 89, "total_steps": 105, "loss": 0.0208, "lr": 2.8103552748861476e-06, "epoch": 4.238095238095238, "percentage": 84.76, "elapsed_time": "0:36:21", "remaining_time": "0:06:32", "throughput": 10695.95, "total_tokens": 23330816}
{"current_steps": 90, "total_steps": 105, "loss": 0.0172, "lr": 2.475778302439524e-06, "epoch": 4.285714285714286, "percentage": 85.71, "elapsed_time": "0:36:45", "remaining_time": "0:06:07", "throughput": 10696.33, "total_tokens": 23592960}
{"current_steps": 91, "total_steps": 105, "loss": 0.0178, "lr": 2.1613635589349756e-06, "epoch": 4.333333333333333, "percentage": 86.67, "elapsed_time": "0:37:10", "remaining_time": "0:05:43", "throughput": 10696.25, "total_tokens": 23855104}
{"current_steps": 92, "total_steps": 105, "loss": 0.0168, "lr": 1.8673924881500826e-06, "epoch": 4.380952380952381, "percentage": 87.62, "elapsed_time": "0:37:34", "remaining_time": "0:05:18", "throughput": 10696.35, "total_tokens": 24117248}
{"current_steps": 93, "total_steps": 105, "loss": 0.0179, "lr": 1.59412823400657e-06, "epoch": 4.428571428571429, "percentage": 88.57, "elapsed_time": "0:37:59", "remaining_time": "0:04:54", "throughput": 10696.59, "total_tokens": 24379392}
{"current_steps": 94, "total_steps": 105, "loss": 0.0172, "lr": 1.3418154050208936e-06, "epoch": 4.476190476190476, "percentage": 89.52, "elapsed_time": "0:38:23", "remaining_time": "0:04:29", "throughput": 10696.73, "total_tokens": 24641536}
{"current_steps": 95, "total_steps": 105, "loss": 0.0167, "lr": 1.1106798553464804e-06, "epoch": 4.523809523809524, "percentage": 90.48, "elapsed_time": "0:38:48", "remaining_time": "0:04:05", "throughput": 10696.86, "total_tokens": 24903680}
{"current_steps": 96, "total_steps": 105, "loss": 0.0149, "lr": 9.009284826036691e-07, "epoch": 4.571428571428571, "percentage": 91.43, "elapsed_time": "0:39:12", "remaining_time": "0:03:40", "throughput": 10696.93, "total_tokens": 25165824}
{"current_steps": 97, "total_steps": 105, "loss": 0.015, "lr": 7.127490426783123e-07, "epoch": 4.619047619047619, "percentage": 92.38, "elapsed_time": "0:39:37", "remaining_time": "0:03:16", "throughput": 10697.18, "total_tokens": 25427968}
{"current_steps": 98, "total_steps": 105, "loss": 0.0177, "lr": 5.463099816548579e-07, "epoch": 4.666666666666667, "percentage": 93.33, "elapsed_time": "0:40:01", "remaining_time": "0:02:51", "throughput": 10697.45, "total_tokens": 25690112}
{"current_steps": 99, "total_steps": 105, "loss": 0.0159, "lr": 4.0176028503425835e-07, "epoch": 4.714285714285714, "percentage": 94.29, "elapsed_time": "0:40:26", "remaining_time": "0:02:27", "throughput": 10697.21, "total_tokens": 25952256}
{"current_steps": 100, "total_steps": 105, "loss": 0.0152, "lr": 2.7922934437178695e-07, "epoch": 4.761904761904762, "percentage": 95.24, "elapsed_time": "0:40:50", "remaining_time": "0:02:02", "throughput": 10697.3, "total_tokens": 26214400}
{"current_steps": 101, "total_steps": 105, "loss": 0.0183, "lr": 1.7882684145406614e-07, "epoch": 4.809523809523809, "percentage": 96.19, "elapsed_time": "0:41:15", "remaining_time": "0:01:38", "throughput": 10697.29, "total_tokens": 26476544}
{"current_steps": 102, "total_steps": 105, "loss": 0.0134, "lr": 1.006426501190233e-07, "epoch": 4.857142857142857, "percentage": 97.14, "elapsed_time": "0:41:39", "remaining_time": "0:01:13", "throughput": 10697.08, "total_tokens": 26738688}
{"current_steps": 103, "total_steps": 105, "loss": 0.0161, "lr": 4.474675580662113e-08, "epoch": 4.904761904761905, "percentage": 98.1, "elapsed_time": "0:42:04", "remaining_time": "0:00:49", "throughput": 10697.03, "total_tokens": 27000832}
{"current_steps": 104, "total_steps": 105, "loss": 0.0158, "lr": 1.1189192912416934e-08, "epoch": 4.9523809523809526, "percentage": 99.05, "elapsed_time": "0:42:28", "remaining_time": "0:00:24", "throughput": 10697.03, "total_tokens": 27262976}
{"current_steps": 105, "total_steps": 105, "loss": 0.0141, "lr": 0.0, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:42:53", "remaining_time": "0:00:00", "throughput": 10697.39, "total_tokens": 27525120}
|