| {"current_steps": 1, "total_steps": 315, "loss": 2.3218, "lr": 4.999875667389858e-05, "epoch": 0.015841584158415842, "percentage": 0.32, "elapsed_time": "0:00:23", "remaining_time": "2:03:29", "throughput": 11109.01, "total_tokens": 262144} | |
| {"current_steps": 2, "total_steps": 315, "loss": 3.4992, "lr": 4.999502681926309e-05, "epoch": 0.031683168316831684, "percentage": 0.63, "elapsed_time": "0:00:46", "remaining_time": "2:00:41", "throughput": 11330.4, "total_tokens": 524288} | |
| {"current_steps": 3, "total_steps": 315, "loss": 2.6702, "lr": 4.9988810807087584e-05, "epoch": 0.047524752475247525, "percentage": 0.95, "elapsed_time": "0:01:08", "remaining_time": "1:59:21", "throughput": 11420.79, "total_tokens": 786432} | |
| {"current_steps": 4, "total_steps": 315, "loss": 2.3942, "lr": 4.998010925565448e-05, "epoch": 0.06336633663366337, "percentage": 1.27, "elapsed_time": "0:01:31", "remaining_time": "1:58:29", "throughput": 11466.64, "total_tokens": 1048576} | |
| {"current_steps": 5, "total_steps": 315, "loss": 2.4923, "lr": 4.996892303047306e-05, "epoch": 0.07920792079207921, "percentage": 1.59, "elapsed_time": "0:01:54", "remaining_time": "1:57:53", "throughput": 11488.84, "total_tokens": 1310720} | |
| {"current_steps": 6, "total_steps": 315, "loss": 2.2646, "lr": 4.9955253244193375e-05, "epoch": 0.09504950495049505, "percentage": 1.9, "elapsed_time": "0:02:16", "remaining_time": "1:57:17", "throughput": 11510.89, "total_tokens": 1572864} | |
| {"current_steps": 7, "total_steps": 315, "loss": 2.0938, "lr": 4.993910125649561e-05, "epoch": 0.11089108910891089, "percentage": 2.22, "elapsed_time": "0:02:39", "remaining_time": "1:56:44", "throughput": 11527.0, "total_tokens": 1835008} | |
| {"current_steps": 8, "total_steps": 315, "loss": 2.0191, "lr": 4.992046867395478e-05, "epoch": 0.12673267326732673, "percentage": 2.54, "elapsed_time": "0:03:01", "remaining_time": "1:56:14", "throughput": 11538.25, "total_tokens": 2097152} | |
| {"current_steps": 9, "total_steps": 315, "loss": 2.0952, "lr": 4.989935734988098e-05, "epoch": 0.14257425742574256, "percentage": 2.86, "elapsed_time": "0:03:24", "remaining_time": "1:55:47", "throughput": 11545.75, "total_tokens": 2359296} | |
| {"current_steps": 10, "total_steps": 315, "loss": 1.9112, "lr": 4.987576938413504e-05, "epoch": 0.15841584158415842, "percentage": 3.17, "elapsed_time": "0:03:46", "remaining_time": "1:55:19", "throughput": 11554.39, "total_tokens": 2621440} | |
| {"current_steps": 11, "total_steps": 315, "loss": 1.9916, "lr": 4.984970712291963e-05, "epoch": 0.17425742574257425, "percentage": 3.49, "elapsed_time": "0:04:09", "remaining_time": "1:54:52", "throughput": 11561.66, "total_tokens": 2883584} | |
| {"current_steps": 12, "total_steps": 315, "loss": 1.9434, "lr": 4.9821173158545936e-05, "epoch": 0.1900990099009901, "percentage": 3.81, "elapsed_time": "0:04:31", "remaining_time": "1:54:26", "throughput": 11567.18, "total_tokens": 3145728} | |
| {"current_steps": 13, "total_steps": 315, "loss": 1.9084, "lr": 4.9790170329175754e-05, "epoch": 0.20594059405940593, "percentage": 4.13, "elapsed_time": "0:04:54", "remaining_time": "1:54:01", "throughput": 11572.39, "total_tokens": 3407872} | |
| {"current_steps": 14, "total_steps": 315, "loss": 1.887, "lr": 4.975670171853926e-05, "epoch": 0.22178217821782178, "percentage": 4.44, "elapsed_time": "0:05:17", "remaining_time": "1:53:36", "throughput": 11576.46, "total_tokens": 3670016} | |
| {"current_steps": 15, "total_steps": 315, "loss": 1.8368, "lr": 4.972077065562821e-05, "epoch": 0.2376237623762376, "percentage": 4.76, "elapsed_time": "0:05:39", "remaining_time": "1:53:11", "throughput": 11579.52, "total_tokens": 3932160} | |
| {"current_steps": 16, "total_steps": 315, "loss": 1.7729, "lr": 4.9682380714364897e-05, "epoch": 0.25346534653465347, "percentage": 5.08, "elapsed_time": "0:06:02", "remaining_time": "1:52:46", "throughput": 11583.22, "total_tokens": 4194304} | |
| {"current_steps": 17, "total_steps": 315, "loss": 1.8139, "lr": 4.964153571324658e-05, "epoch": 0.2693069306930693, "percentage": 5.4, "elapsed_time": "0:06:24", "remaining_time": "1:52:22", "throughput": 11586.04, "total_tokens": 4456448} | |
| {"current_steps": 18, "total_steps": 315, "loss": 1.7543, "lr": 4.959823971496574e-05, "epoch": 0.2851485148514851, "percentage": 5.71, "elapsed_time": "0:06:47", "remaining_time": "1:51:58", "throughput": 11588.15, "total_tokens": 4718592} | |
| {"current_steps": 19, "total_steps": 315, "loss": 1.7287, "lr": 4.9552497026005974e-05, "epoch": 0.300990099009901, "percentage": 6.03, "elapsed_time": "0:07:09", "remaining_time": "1:51:35", "throughput": 11589.87, "total_tokens": 4980736} | |
| {"current_steps": 20, "total_steps": 315, "loss": 1.7471, "lr": 4.9504312196213596e-05, "epoch": 0.31683168316831684, "percentage": 6.35, "elapsed_time": "0:07:32", "remaining_time": "1:51:10", "throughput": 11592.36, "total_tokens": 5242880} | |
| {"current_steps": 21, "total_steps": 315, "loss": 1.6805, "lr": 4.9453690018345144e-05, "epoch": 0.3326732673267327, "percentage": 6.67, "elapsed_time": "0:07:54", "remaining_time": "1:50:47", "throughput": 11593.94, "total_tokens": 5505024} | |
| {"current_steps": 22, "total_steps": 315, "loss": 1.6845, "lr": 4.940063552759061e-05, "epoch": 0.3485148514851485, "percentage": 6.98, "elapsed_time": "0:08:17", "remaining_time": "1:50:23", "throughput": 11595.65, "total_tokens": 5767168} | |
| {"current_steps": 23, "total_steps": 315, "loss": 1.6713, "lr": 4.934515400107266e-05, "epoch": 0.36435643564356435, "percentage": 7.3, "elapsed_time": "0:08:39", "remaining_time": "1:50:00", "throughput": 11597.22, "total_tokens": 6029312} | |
| {"current_steps": 24, "total_steps": 315, "loss": 1.681, "lr": 4.928725095732169e-05, "epoch": 0.3801980198019802, "percentage": 7.62, "elapsed_time": "0:09:02", "remaining_time": "1:49:37", "throughput": 11597.51, "total_tokens": 6291456} | |
| {"current_steps": 25, "total_steps": 315, "loss": 1.6459, "lr": 4.922693215572695e-05, "epoch": 0.39603960396039606, "percentage": 7.94, "elapsed_time": "0:09:25", "remaining_time": "1:49:14", "throughput": 11597.84, "total_tokens": 6553600} | |
| {"current_steps": 26, "total_steps": 315, "loss": 1.6754, "lr": 4.916420359596368e-05, "epoch": 0.41188118811881186, "percentage": 8.25, "elapsed_time": "0:09:47", "remaining_time": "1:48:51", "throughput": 11598.56, "total_tokens": 6815744} | |
| {"current_steps": 27, "total_steps": 315, "loss": 1.6168, "lr": 4.909907151739633e-05, "epoch": 0.4277227722772277, "percentage": 8.57, "elapsed_time": "0:10:10", "remaining_time": "1:48:28", "throughput": 11599.44, "total_tokens": 7077888} | |
| {"current_steps": 28, "total_steps": 315, "loss": 1.6399, "lr": 4.9031542398457974e-05, "epoch": 0.44356435643564357, "percentage": 8.89, "elapsed_time": "0:10:32", "remaining_time": "1:48:05", "throughput": 11600.39, "total_tokens": 7340032} | |
| {"current_steps": 29, "total_steps": 315, "loss": 1.6318, "lr": 4.896162295600589e-05, "epoch": 0.4594059405940594, "percentage": 9.21, "elapsed_time": "0:10:55", "remaining_time": "1:47:42", "throughput": 11601.42, "total_tokens": 7602176} | |
| {"current_steps": 30, "total_steps": 315, "loss": 1.6046, "lr": 4.888932014465352e-05, "epoch": 0.4752475247524752, "percentage": 9.52, "elapsed_time": "0:11:17", "remaining_time": "1:47:19", "throughput": 11602.41, "total_tokens": 7864320} | |
| {"current_steps": 31, "total_steps": 315, "loss": 1.6143, "lr": 4.881464115607865e-05, "epoch": 0.4910891089108911, "percentage": 9.84, "elapsed_time": "0:11:40", "remaining_time": "1:46:56", "throughput": 11603.45, "total_tokens": 8126464} | |
| {"current_steps": 32, "total_steps": 315, "loss": 1.6118, "lr": 4.8737593418308156e-05, "epoch": 0.5069306930693069, "percentage": 10.16, "elapsed_time": "0:12:02", "remaining_time": "1:46:33", "throughput": 11603.87, "total_tokens": 8388608} | |
| {"current_steps": 33, "total_steps": 315, "loss": 1.6007, "lr": 4.865818459497911e-05, "epoch": 0.5227722772277228, "percentage": 10.48, "elapsed_time": "0:12:25", "remaining_time": "1:46:10", "throughput": 11604.71, "total_tokens": 8650752} | |
| {"current_steps": 34, "total_steps": 315, "loss": 1.5592, "lr": 4.8576422584576514e-05, "epoch": 0.5386138613861386, "percentage": 10.79, "elapsed_time": "0:12:47", "remaining_time": "1:45:47", "throughput": 11605.45, "total_tokens": 8912896} | |
| {"current_steps": 35, "total_steps": 315, "loss": 1.5396, "lr": 4.849231551964771e-05, "epoch": 0.5544554455445545, "percentage": 11.11, "elapsed_time": "0:13:10", "remaining_time": "1:45:24", "throughput": 11606.01, "total_tokens": 9175040} | |
| {"current_steps": 36, "total_steps": 315, "loss": 1.5477, "lr": 4.8405871765993433e-05, "epoch": 0.5702970297029702, "percentage": 11.43, "elapsed_time": "0:13:33", "remaining_time": "1:45:01", "throughput": 11606.54, "total_tokens": 9437184} | |
| {"current_steps": 37, "total_steps": 315, "loss": 1.5415, "lr": 4.8317099921835697e-05, "epoch": 0.5861386138613861, "percentage": 11.75, "elapsed_time": "0:13:55", "remaining_time": "1:44:38", "throughput": 11606.88, "total_tokens": 9699328} | |
| {"current_steps": 38, "total_steps": 315, "loss": 1.5087, "lr": 4.822600881696256e-05, "epoch": 0.601980198019802, "percentage": 12.06, "elapsed_time": "0:14:18", "remaining_time": "1:44:15", "throughput": 11607.26, "total_tokens": 9961472} | |
| {"current_steps": 39, "total_steps": 315, "loss": 1.5329, "lr": 4.813260751184992e-05, "epoch": 0.6178217821782178, "percentage": 12.38, "elapsed_time": "0:14:40", "remaining_time": "1:43:53", "throughput": 11607.48, "total_tokens": 10223616} | |
| {"current_steps": 40, "total_steps": 315, "loss": 1.5234, "lr": 4.803690529676019e-05, "epoch": 0.6336633663366337, "percentage": 12.7, "elapsed_time": "0:15:03", "remaining_time": "1:43:30", "throughput": 11607.68, "total_tokens": 10485760} | |
| {"current_steps": 41, "total_steps": 315, "loss": 1.5128, "lr": 4.7938911690818347e-05, "epoch": 0.6495049504950495, "percentage": 13.02, "elapsed_time": "0:15:25", "remaining_time": "1:43:07", "throughput": 11608.27, "total_tokens": 10747904} | |
| {"current_steps": 42, "total_steps": 315, "loss": 1.5069, "lr": 4.783863644106502e-05, "epoch": 0.6653465346534654, "percentage": 13.33, "elapsed_time": "0:15:48", "remaining_time": "1:42:44", "throughput": 11608.47, "total_tokens": 11010048} | |
| {"current_steps": 43, "total_steps": 315, "loss": 1.5214, "lr": 4.773608952148706e-05, "epoch": 0.6811881188118812, "percentage": 13.65, "elapsed_time": "0:16:11", "remaining_time": "1:42:22", "throughput": 11608.72, "total_tokens": 11272192} | |
| {"current_steps": 44, "total_steps": 315, "loss": 1.503, "lr": 4.763128113202537e-05, "epoch": 0.697029702970297, "percentage": 13.97, "elapsed_time": "0:16:33", "remaining_time": "1:41:59", "throughput": 11609.25, "total_tokens": 11534336} | |
| {"current_steps": 45, "total_steps": 315, "loss": 1.48, "lr": 4.752422169756048e-05, "epoch": 0.7128712871287128, "percentage": 14.29, "elapsed_time": "0:16:56", "remaining_time": "1:41:36", "throughput": 11609.48, "total_tokens": 11796480} | |
| {"current_steps": 46, "total_steps": 315, "loss": 1.5072, "lr": 4.7414921866875524e-05, "epoch": 0.7287128712871287, "percentage": 14.6, "elapsed_time": "0:17:18", "remaining_time": "1:41:14", "throughput": 11609.5, "total_tokens": 12058624} | |
| {"current_steps": 47, "total_steps": 315, "loss": 1.517, "lr": 4.730339251159709e-05, "epoch": 0.7445544554455445, "percentage": 14.92, "elapsed_time": "0:17:41", "remaining_time": "1:40:51", "throughput": 11609.45, "total_tokens": 12320768} | |
| {"current_steps": 48, "total_steps": 315, "loss": 1.5103, "lr": 4.718964472511386e-05, "epoch": 0.7603960396039604, "percentage": 15.24, "elapsed_time": "0:18:03", "remaining_time": "1:40:28", "throughput": 11609.73, "total_tokens": 12582912} | |
| {"current_steps": 49, "total_steps": 315, "loss": 1.4858, "lr": 4.707368982147318e-05, "epoch": 0.7762376237623763, "percentage": 15.56, "elapsed_time": "0:18:26", "remaining_time": "1:40:06", "throughput": 11609.6, "total_tokens": 12845056} | |
| {"current_steps": 50, "total_steps": 315, "loss": 1.4908, "lr": 4.6955539334255716e-05, "epoch": 0.7920792079207921, "percentage": 15.87, "elapsed_time": "0:18:48", "remaining_time": "1:39:43", "throughput": 11609.72, "total_tokens": 13107200} | |
| {"current_steps": 51, "total_steps": 315, "loss": 1.4893, "lr": 4.6835205015428246e-05, "epoch": 0.807920792079208, "percentage": 16.19, "elapsed_time": "0:19:11", "remaining_time": "1:39:20", "throughput": 11609.98, "total_tokens": 13369344} | |
| {"current_steps": 52, "total_steps": 315, "loss": 1.4719, "lr": 4.671269883417473e-05, "epoch": 0.8237623762376237, "percentage": 16.51, "elapsed_time": "0:19:34", "remaining_time": "1:38:58", "throughput": 11610.22, "total_tokens": 13631488} | |
| {"current_steps": 53, "total_steps": 315, "loss": 1.4455, "lr": 4.658803297570577e-05, "epoch": 0.8396039603960396, "percentage": 16.83, "elapsed_time": "0:19:56", "remaining_time": "1:38:35", "throughput": 11610.54, "total_tokens": 13893632} | |
| {"current_steps": 54, "total_steps": 315, "loss": 1.4374, "lr": 4.6461219840046654e-05, "epoch": 0.8554455445544554, "percentage": 17.14, "elapsed_time": "0:20:19", "remaining_time": "1:38:12", "throughput": 11610.92, "total_tokens": 14155776} | |
| {"current_steps": 55, "total_steps": 315, "loss": 1.4569, "lr": 4.6332272040803895e-05, "epoch": 0.8712871287128713, "percentage": 17.46, "elapsed_time": "0:20:41", "remaining_time": "1:37:49", "throughput": 11611.25, "total_tokens": 14417920} | |
| {"current_steps": 56, "total_steps": 315, "loss": 1.461, "lr": 4.620120240391065e-05, "epoch": 0.8871287128712871, "percentage": 17.78, "elapsed_time": "0:21:04", "remaining_time": "1:37:27", "throughput": 11611.59, "total_tokens": 14680064} | |
| {"current_steps": 57, "total_steps": 315, "loss": 1.4401, "lr": 4.606802396635098e-05, "epoch": 0.902970297029703, "percentage": 18.1, "elapsed_time": "0:21:26", "remaining_time": "1:37:04", "throughput": 11611.77, "total_tokens": 14942208} | |
| {"current_steps": 58, "total_steps": 315, "loss": 1.4271, "lr": 4.593274997486309e-05, "epoch": 0.9188118811881189, "percentage": 18.41, "elapsed_time": "0:21:49", "remaining_time": "1:36:41", "throughput": 11612.03, "total_tokens": 15204352} | |
| {"current_steps": 59, "total_steps": 315, "loss": 1.4444, "lr": 4.579539388462173e-05, "epoch": 0.9346534653465347, "percentage": 18.73, "elapsed_time": "0:22:11", "remaining_time": "1:36:19", "throughput": 11612.17, "total_tokens": 15466496} | |
| {"current_steps": 60, "total_steps": 315, "loss": 1.4349, "lr": 4.5655969357899874e-05, "epoch": 0.9504950495049505, "percentage": 19.05, "elapsed_time": "0:22:34", "remaining_time": "1:35:56", "throughput": 11612.35, "total_tokens": 15728640} | |
| {"current_steps": 61, "total_steps": 315, "loss": 1.4081, "lr": 4.551449026270979e-05, "epoch": 0.9663366336633663, "percentage": 19.37, "elapsed_time": "0:22:57", "remaining_time": "1:35:33", "throughput": 11612.37, "total_tokens": 15990784} | |
| {"current_steps": 62, "total_steps": 315, "loss": 1.401, "lr": 4.537097067142363e-05, "epoch": 0.9821782178217822, "percentage": 19.68, "elapsed_time": "0:23:19", "remaining_time": "1:35:11", "throughput": 11612.47, "total_tokens": 16252928} | |
| {"current_steps": 63, "total_steps": 315, "loss": 1.4383, "lr": 4.522542485937369e-05, "epoch": 0.998019801980198, "percentage": 20.0, "elapsed_time": "0:23:42", "remaining_time": "1:34:48", "throughput": 11612.6, "total_tokens": 16515072} | |
| {"current_steps": 64, "total_steps": 315, "loss": 1.3665, "lr": 4.5077867303432546e-05, "epoch": 1.0, "percentage": 20.32, "elapsed_time": "0:23:44", "remaining_time": "1:33:08", "throughput": 11612.91, "total_tokens": 16547840} | |
| {"current_steps": 65, "total_steps": 315, "loss": 1.0116, "lr": 4.4928312680573064e-05, "epoch": 1.0158415841584159, "percentage": 20.63, "elapsed_time": "0:24:07", "remaining_time": "1:32:47", "throughput": 11612.55, "total_tokens": 16809984} | |
| {"current_steps": 66, "total_steps": 315, "loss": 0.9952, "lr": 4.477677586640854e-05, "epoch": 1.0316831683168317, "percentage": 20.95, "elapsed_time": "0:24:30", "remaining_time": "1:32:26", "throughput": 11612.69, "total_tokens": 17072128} | |
| {"current_steps": 67, "total_steps": 315, "loss": 0.9662, "lr": 4.4623271933713065e-05, "epoch": 1.0475247524752476, "percentage": 21.27, "elapsed_time": "0:24:52", "remaining_time": "1:32:05", "throughput": 11612.89, "total_tokens": 17334272} | |
| {"current_steps": 68, "total_steps": 315, "loss": 0.9799, "lr": 4.446781615092235e-05, "epoch": 1.0633663366336634, "percentage": 21.59, "elapsed_time": "0:25:15", "remaining_time": "1:31:43", "throughput": 11613.01, "total_tokens": 17596416} | |
| {"current_steps": 69, "total_steps": 315, "loss": 0.9848, "lr": 4.431042398061499e-05, "epoch": 1.0792079207920793, "percentage": 21.9, "elapsed_time": "0:25:37", "remaining_time": "1:31:22", "throughput": 11613.05, "total_tokens": 17858560} | |
| {"current_steps": 70, "total_steps": 315, "loss": 0.9054, "lr": 4.415111107797445e-05, "epoch": 1.0950495049504951, "percentage": 22.22, "elapsed_time": "0:26:00", "remaining_time": "1:31:01", "throughput": 11613.0, "total_tokens": 18120704} | |
| {"current_steps": 71, "total_steps": 315, "loss": 0.9266, "lr": 4.3989893289231954e-05, "epoch": 1.110891089108911, "percentage": 22.54, "elapsed_time": "0:26:22", "remaining_time": "1:30:39", "throughput": 11613.18, "total_tokens": 18382848} | |
| {"current_steps": 72, "total_steps": 315, "loss": 0.923, "lr": 4.382678665009028e-05, "epoch": 1.1267326732673268, "percentage": 22.86, "elapsed_time": "0:26:45", "remaining_time": "1:30:18", "throughput": 11613.34, "total_tokens": 18644992} | |
| {"current_steps": 73, "total_steps": 315, "loss": 0.9083, "lr": 4.366180738412876e-05, "epoch": 1.1425742574257425, "percentage": 23.17, "elapsed_time": "0:27:08", "remaining_time": "1:29:57", "throughput": 11613.22, "total_tokens": 18907136} | |
| {"current_steps": 74, "total_steps": 315, "loss": 0.9211, "lr": 4.34949719011896e-05, "epoch": 1.1584158415841583, "percentage": 23.49, "elapsed_time": "0:27:30", "remaining_time": "1:29:35", "throughput": 11613.17, "total_tokens": 19169280} | |
| {"current_steps": 75, "total_steps": 315, "loss": 0.9228, "lr": 4.332629679574566e-05, "epoch": 1.1742574257425742, "percentage": 23.81, "elapsed_time": "0:27:53", "remaining_time": "1:29:14", "throughput": 11613.14, "total_tokens": 19431424} | |
| {"current_steps": 76, "total_steps": 315, "loss": 0.8703, "lr": 4.3155798845249827e-05, "epoch": 1.19009900990099, "percentage": 24.13, "elapsed_time": "0:28:15", "remaining_time": "1:28:52", "throughput": 11612.93, "total_tokens": 19693568} | |
| {"current_steps": 77, "total_steps": 315, "loss": 0.8964, "lr": 4.2983495008466276e-05, "epoch": 1.205940594059406, "percentage": 24.44, "elapsed_time": "0:28:38", "remaining_time": "1:28:31", "throughput": 11612.67, "total_tokens": 19955712} | |
| {"current_steps": 78, "total_steps": 315, "loss": 0.9076, "lr": 4.2809402423783624e-05, "epoch": 1.2217821782178218, "percentage": 24.76, "elapsed_time": "0:29:01", "remaining_time": "1:28:09", "throughput": 11612.7, "total_tokens": 20217856} | |
| {"current_steps": 79, "total_steps": 315, "loss": 0.9015, "lr": 4.263353840751022e-05, "epoch": 1.2376237623762376, "percentage": 25.08, "elapsed_time": "0:29:23", "remaining_time": "1:27:48", "throughput": 11612.78, "total_tokens": 20480000} | |
| {"current_steps": 80, "total_steps": 315, "loss": 0.8966, "lr": 4.245592045215182e-05, "epoch": 1.2534653465346535, "percentage": 25.4, "elapsed_time": "0:29:46", "remaining_time": "1:27:26", "throughput": 11612.79, "total_tokens": 20742144} | |
| {"current_steps": 81, "total_steps": 315, "loss": 0.8833, "lr": 4.227656622467162e-05, "epoch": 1.2693069306930693, "percentage": 25.71, "elapsed_time": "0:30:08", "remaining_time": "1:27:05", "throughput": 11613.0, "total_tokens": 21004288} | |
| {"current_steps": 82, "total_steps": 315, "loss": 0.8531, "lr": 4.2095493564733005e-05, "epoch": 1.2851485148514852, "percentage": 26.03, "elapsed_time": "0:30:31", "remaining_time": "1:26:43", "throughput": 11613.01, "total_tokens": 21266432} | |
| {"current_steps": 83, "total_steps": 315, "loss": 0.8951, "lr": 4.191272048292513e-05, "epoch": 1.300990099009901, "percentage": 26.35, "elapsed_time": "0:30:53", "remaining_time": "1:26:21", "throughput": 11613.01, "total_tokens": 21528576} | |
| {"current_steps": 84, "total_steps": 315, "loss": 0.909, "lr": 4.172826515897146e-05, "epoch": 1.316831683168317, "percentage": 26.67, "elapsed_time": "0:31:16", "remaining_time": "1:26:00", "throughput": 11612.9, "total_tokens": 21790720} | |
| {"current_steps": 85, "total_steps": 315, "loss": 0.8722, "lr": 4.154214593992149e-05, "epoch": 1.3326732673267327, "percentage": 26.98, "elapsed_time": "0:31:39", "remaining_time": "1:25:38", "throughput": 11612.82, "total_tokens": 22052864} | |
| {"current_steps": 86, "total_steps": 315, "loss": 0.8759, "lr": 4.1354381338325864e-05, "epoch": 1.3485148514851484, "percentage": 27.3, "elapsed_time": "0:32:01", "remaining_time": "1:25:16", "throughput": 11612.86, "total_tokens": 22315008} | |
| {"current_steps": 87, "total_steps": 315, "loss": 0.9013, "lr": 4.116499003039499e-05, "epoch": 1.3643564356435642, "percentage": 27.62, "elapsed_time": "0:32:24", "remaining_time": "1:24:55", "throughput": 11612.83, "total_tokens": 22577152} | |
| {"current_steps": 88, "total_steps": 315, "loss": 0.8782, "lr": 4.09739908541414e-05, "epoch": 1.38019801980198, "percentage": 27.94, "elapsed_time": "0:32:46", "remaining_time": "1:24:33", "throughput": 11612.89, "total_tokens": 22839296} | |
| {"current_steps": 89, "total_steps": 315, "loss": 0.8791, "lr": 4.078140280750597e-05, "epoch": 1.396039603960396, "percentage": 28.25, "elapsed_time": "0:33:09", "remaining_time": "1:24:11", "throughput": 11612.95, "total_tokens": 23101440} | |
| {"current_steps": 90, "total_steps": 315, "loss": 0.8914, "lr": 4.058724504646834e-05, "epoch": 1.4118811881188118, "percentage": 28.57, "elapsed_time": "0:33:31", "remaining_time": "1:23:49", "throughput": 11613.05, "total_tokens": 23363584} | |
| {"current_steps": 91, "total_steps": 315, "loss": 0.8827, "lr": 4.039153688314145e-05, "epoch": 1.4277227722772277, "percentage": 28.89, "elapsed_time": "0:33:54", "remaining_time": "1:23:27", "throughput": 11613.18, "total_tokens": 23625728} | |
| {"current_steps": 92, "total_steps": 315, "loss": 0.9108, "lr": 4.0194297783850755e-05, "epoch": 1.4435643564356435, "percentage": 29.21, "elapsed_time": "0:34:16", "remaining_time": "1:23:05", "throughput": 11613.25, "total_tokens": 23887872} | |
| {"current_steps": 93, "total_steps": 315, "loss": 0.8941, "lr": 3.9995547367197845e-05, "epoch": 1.4594059405940594, "percentage": 29.52, "elapsed_time": "0:34:39", "remaining_time": "1:22:43", "throughput": 11613.31, "total_tokens": 24150016} | |
| {"current_steps": 94, "total_steps": 315, "loss": 0.8923, "lr": 3.9795305402109195e-05, "epoch": 1.4752475247524752, "percentage": 29.84, "elapsed_time": "0:35:02", "remaining_time": "1:22:22", "throughput": 11613.44, "total_tokens": 24412160} | |
| {"current_steps": 95, "total_steps": 315, "loss": 0.8704, "lr": 3.959359180586975e-05, "epoch": 1.491089108910891, "percentage": 30.16, "elapsed_time": "0:35:24", "remaining_time": "1:22:00", "throughput": 11613.43, "total_tokens": 24674304} | |
| {"current_steps": 96, "total_steps": 315, "loss": 0.8993, "lr": 3.939042664214184e-05, "epoch": 1.506930693069307, "percentage": 30.48, "elapsed_time": "0:35:47", "remaining_time": "1:21:38", "throughput": 11613.5, "total_tokens": 24936448} | |
| {"current_steps": 97, "total_steps": 315, "loss": 0.9145, "lr": 3.918583011896955e-05, "epoch": 1.5227722772277228, "percentage": 30.79, "elapsed_time": "0:36:09", "remaining_time": "1:21:16", "throughput": 11613.54, "total_tokens": 25198592} | |
| {"current_steps": 98, "total_steps": 315, "loss": 0.8707, "lr": 3.897982258676867e-05, "epoch": 1.5386138613861386, "percentage": 31.11, "elapsed_time": "0:36:32", "remaining_time": "1:20:54", "throughput": 11613.56, "total_tokens": 25460736} | |
| {"current_steps": 99, "total_steps": 315, "loss": 0.8842, "lr": 3.8772424536302564e-05, "epoch": 1.5544554455445545, "percentage": 31.43, "elapsed_time": "0:36:54", "remaining_time": "1:20:32", "throughput": 11613.63, "total_tokens": 25722880} | |
| {"current_steps": 100, "total_steps": 315, "loss": 0.9139, "lr": 3.856365659664399e-05, "epoch": 1.5702970297029704, "percentage": 31.75, "elapsed_time": "0:37:17", "remaining_time": "1:20:10", "throughput": 11613.41, "total_tokens": 25985024} | |
| {"current_steps": 101, "total_steps": 315, "loss": 0.8868, "lr": 3.835353953312322e-05, "epoch": 1.5861386138613862, "percentage": 32.06, "elapsed_time": "0:37:40", "remaining_time": "1:19:48", "throughput": 11613.3, "total_tokens": 26247168} | |
| {"current_steps": 102, "total_steps": 315, "loss": 0.8928, "lr": 3.814209424526262e-05, "epoch": 1.601980198019802, "percentage": 32.38, "elapsed_time": "0:38:02", "remaining_time": "1:19:26", "throughput": 11613.02, "total_tokens": 26509312} | |
| {"current_steps": 103, "total_steps": 315, "loss": 0.8843, "lr": 3.7929341764697816e-05, "epoch": 1.617821782178218, "percentage": 32.7, "elapsed_time": "0:38:25", "remaining_time": "1:19:04", "throughput": 11612.89, "total_tokens": 26771456} | |
| {"current_steps": 104, "total_steps": 315, "loss": 0.9118, "lr": 3.771530325308579e-05, "epoch": 1.6336633663366338, "percentage": 33.02, "elapsed_time": "0:38:47", "remaining_time": "1:18:42", "throughput": 11612.94, "total_tokens": 27033600} | |
| {"current_steps": 105, "total_steps": 315, "loss": 0.9178, "lr": 3.7500000000000003e-05, "epoch": 1.6495049504950496, "percentage": 33.33, "elapsed_time": "0:39:10", "remaining_time": "1:18:20", "throughput": 11612.98, "total_tokens": 27295744} | |
| {"current_steps": 106, "total_steps": 315, "loss": 0.9019, "lr": 3.7283453420812786e-05, "epoch": 1.6653465346534655, "percentage": 33.65, "elapsed_time": "0:39:33", "remaining_time": "1:17:58", "throughput": 11613.04, "total_tokens": 27557888} | |
| {"current_steps": 107, "total_steps": 315, "loss": 0.9223, "lr": 3.706568505456527e-05, "epoch": 1.6811881188118813, "percentage": 33.97, "elapsed_time": "0:39:55", "remaining_time": "1:17:36", "throughput": 11613.15, "total_tokens": 27820032} | |
| {"current_steps": 108, "total_steps": 315, "loss": 0.9232, "lr": 3.6846716561824965e-05, "epoch": 1.697029702970297, "percentage": 34.29, "elapsed_time": "0:40:18", "remaining_time": "1:17:14", "throughput": 11612.91, "total_tokens": 28082176} | |
| {"current_steps": 109, "total_steps": 315, "loss": 0.8993, "lr": 3.662656972253127e-05, "epoch": 1.7128712871287128, "percentage": 34.6, "elapsed_time": "0:40:40", "remaining_time": "1:16:52", "throughput": 11612.77, "total_tokens": 28344320} | |
| {"current_steps": 110, "total_steps": 315, "loss": 0.8772, "lr": 3.6405266433829075e-05, "epoch": 1.7287128712871287, "percentage": 34.92, "elapsed_time": "0:41:03", "remaining_time": "1:16:30", "throughput": 11612.65, "total_tokens": 28606464} | |
| {"current_steps": 111, "total_steps": 315, "loss": 0.8864, "lr": 3.6182828707890816e-05, "epoch": 1.7445544554455445, "percentage": 35.24, "elapsed_time": "0:41:25", "remaining_time": "1:16:08", "throughput": 11612.67, "total_tokens": 28868608} | |
| {"current_steps": 112, "total_steps": 315, "loss": 0.9053, "lr": 3.5959278669726935e-05, "epoch": 1.7603960396039604, "percentage": 35.56, "elapsed_time": "0:41:48", "remaining_time": "1:15:46", "throughput": 11612.67, "total_tokens": 29130752} | |
| {"current_steps": 113, "total_steps": 315, "loss": 0.8868, "lr": 3.5734638554985236e-05, "epoch": 1.7762376237623763, "percentage": 35.87, "elapsed_time": "0:42:11", "remaining_time": "1:15:24", "throughput": 11612.57, "total_tokens": 29392896} | |
| {"current_steps": 114, "total_steps": 315, "loss": 0.9201, "lr": 3.550893070773914e-05, "epoch": 1.7920792079207921, "percentage": 36.19, "elapsed_time": "0:42:33", "remaining_time": "1:15:02", "throughput": 11612.49, "total_tokens": 29655040} | |
| {"current_steps": 115, "total_steps": 315, "loss": 0.8993, "lr": 3.5282177578265296e-05, "epoch": 1.807920792079208, "percentage": 36.51, "elapsed_time": "0:42:56", "remaining_time": "1:14:40", "throughput": 11612.44, "total_tokens": 29917184} | |
| {"current_steps": 116, "total_steps": 315, "loss": 0.8602, "lr": 3.505440172081044e-05, "epoch": 1.8237623762376236, "percentage": 36.83, "elapsed_time": "0:43:18", "remaining_time": "1:14:18", "throughput": 11612.48, "total_tokens": 30179328} | |
| {"current_steps": 117, "total_steps": 315, "loss": 0.8879, "lr": 3.4825625791348096e-05, "epoch": 1.8396039603960395, "percentage": 37.14, "elapsed_time": "0:43:41", "remaining_time": "1:13:56", "throughput": 11612.61, "total_tokens": 30441472} | |
| {"current_steps": 118, "total_steps": 315, "loss": 0.8879, "lr": 3.459587254532502e-05, "epoch": 1.8554455445544553, "percentage": 37.46, "elapsed_time": "0:44:03", "remaining_time": "1:13:34", "throughput": 11612.58, "total_tokens": 30703616} | |
| {"current_steps": 119, "total_steps": 315, "loss": 0.9136, "lr": 3.436516483539781e-05, "epoch": 1.8712871287128712, "percentage": 37.78, "elapsed_time": "0:44:26", "remaining_time": "1:13:11", "throughput": 11612.74, "total_tokens": 30965760} | |
| {"current_steps": 120, "total_steps": 315, "loss": 0.8795, "lr": 3.413352560915988e-05, "epoch": 1.887128712871287, "percentage": 38.1, "elapsed_time": "0:44:49", "remaining_time": "1:12:49", "throughput": 11612.83, "total_tokens": 31227904} | |
| {"current_steps": 121, "total_steps": 315, "loss": 0.8771, "lr": 3.390097790685892e-05, "epoch": 1.9029702970297029, "percentage": 38.41, "elapsed_time": "0:45:11", "remaining_time": "1:12:27", "throughput": 11612.88, "total_tokens": 31490048} | |
| {"current_steps": 122, "total_steps": 315, "loss": 0.8743, "lr": 3.366754485910518e-05, "epoch": 1.9188118811881187, "percentage": 38.73, "elapsed_time": "0:45:34", "remaining_time": "1:12:05", "throughput": 11612.98, "total_tokens": 31752192} | |
| {"current_steps": 123, "total_steps": 315, "loss": 0.8785, "lr": 3.343324968457076e-05, "epoch": 1.9346534653465346, "percentage": 39.05, "elapsed_time": "0:45:56", "remaining_time": "1:11:43", "throughput": 11612.99, "total_tokens": 32014336} | |
| {"current_steps": 124, "total_steps": 315, "loss": 0.8941, "lr": 3.3198115687680115e-05, "epoch": 1.9504950495049505, "percentage": 39.37, "elapsed_time": "0:46:19", "remaining_time": "1:11:21", "throughput": 11613.12, "total_tokens": 32276480} | |
| {"current_steps": 125, "total_steps": 315, "loss": 0.9004, "lr": 3.2962166256292113e-05, "epoch": 1.9663366336633663, "percentage": 39.68, "elapsed_time": "0:46:41", "remaining_time": "1:10:58", "throughput": 11613.22, "total_tokens": 32538624} | |
| {"current_steps": 126, "total_steps": 315, "loss": 0.8775, "lr": 3.272542485937369e-05, "epoch": 1.9821782178217822, "percentage": 40.0, "elapsed_time": "0:47:04", "remaining_time": "1:10:36", "throughput": 11613.3, "total_tokens": 32800768} | |
| {"current_steps": 127, "total_steps": 315, "loss": 0.9204, "lr": 3.248791504466548e-05, "epoch": 1.998019801980198, "percentage": 40.32, "elapsed_time": "0:47:26", "remaining_time": "1:10:14", "throughput": 11613.44, "total_tokens": 33062912} | |
| {"current_steps": 128, "total_steps": 315, "loss": 0.8618, "lr": 3.224966043633966e-05, "epoch": 2.0, "percentage": 40.63, "elapsed_time": "0:47:29", "remaining_time": "1:09:23", "throughput": 11613.62, "total_tokens": 33095680} | |
| {"current_steps": 129, "total_steps": 315, "loss": 0.4193, "lr": 3.201068473265007e-05, "epoch": 2.015841584158416, "percentage": 40.95, "elapsed_time": "0:47:52", "remaining_time": "1:09:01", "throughput": 11613.54, "total_tokens": 33357824} | |
| {"current_steps": 130, "total_steps": 315, "loss": 0.3973, "lr": 3.177101170357513e-05, "epoch": 2.0316831683168317, "percentage": 41.27, "elapsed_time": "0:48:14", "remaining_time": "1:08:39", "throughput": 11613.63, "total_tokens": 33619968} | |
| {"current_steps": 131, "total_steps": 315, "loss": 0.3753, "lr": 3.1530665188453464e-05, "epoch": 2.0475247524752476, "percentage": 41.59, "elapsed_time": "0:48:37", "remaining_time": "1:08:17", "throughput": 11613.72, "total_tokens": 33882112} | |
| {"current_steps": 132, "total_steps": 315, "loss": 0.3861, "lr": 3.1289669093612714e-05, "epoch": 2.0633663366336634, "percentage": 41.9, "elapsed_time": "0:48:59", "remaining_time": "1:07:55", "throughput": 11613.71, "total_tokens": 34144256} | |
| {"current_steps": 133, "total_steps": 315, "loss": 0.3843, "lr": 3.104804738999169e-05, "epoch": 2.0792079207920793, "percentage": 42.22, "elapsed_time": "0:49:22", "remaining_time": "1:07:34", "throughput": 11613.79, "total_tokens": 34406400} | |
| {"current_steps": 134, "total_steps": 315, "loss": 0.3644, "lr": 3.0805824110756064e-05, "epoch": 2.095049504950495, "percentage": 42.54, "elapsed_time": "0:49:45", "remaining_time": "1:07:12", "throughput": 11613.84, "total_tokens": 34668544} | |
| {"current_steps": 135, "total_steps": 315, "loss": 0.3452, "lr": 3.056302334890786e-05, "epoch": 2.110891089108911, "percentage": 42.86, "elapsed_time": "0:50:07", "remaining_time": "1:06:50", "throughput": 11613.91, "total_tokens": 34930688} | |
| {"current_steps": 136, "total_steps": 315, "loss": 0.3311, "lr": 3.0319669254889055e-05, "epoch": 2.126732673267327, "percentage": 43.17, "elapsed_time": "0:50:30", "remaining_time": "1:06:28", "throughput": 11613.97, "total_tokens": 35192832} | |
| {"current_steps": 137, "total_steps": 315, "loss": 0.3337, "lr": 3.0075786034179405e-05, "epoch": 2.1425742574257427, "percentage": 43.49, "elapsed_time": "0:50:52", "remaining_time": "1:06:06", "throughput": 11614.08, "total_tokens": 35454976} | |
| {"current_steps": 138, "total_steps": 315, "loss": 0.3356, "lr": 2.9831397944888833e-05, "epoch": 2.1584158415841586, "percentage": 43.81, "elapsed_time": "0:51:15", "remaining_time": "1:05:44", "throughput": 11614.1, "total_tokens": 35717120} | |
| {"current_steps": 139, "total_steps": 315, "loss": 0.338, "lr": 2.958652929534456e-05, "epoch": 2.1742574257425744, "percentage": 44.13, "elapsed_time": "0:51:37", "remaining_time": "1:05:22", "throughput": 11614.11, "total_tokens": 35979264} | |
| {"current_steps": 140, "total_steps": 315, "loss": 0.3364, "lr": 2.9341204441673266e-05, "epoch": 2.1900990099009903, "percentage": 44.44, "elapsed_time": "0:52:00", "remaining_time": "1:05:00", "throughput": 11614.07, "total_tokens": 36241408} | |
| {"current_steps": 141, "total_steps": 315, "loss": 0.3109, "lr": 2.9095447785378443e-05, "epoch": 2.205940594059406, "percentage": 44.76, "elapsed_time": "0:52:23", "remaining_time": "1:04:38", "throughput": 11614.11, "total_tokens": 36503552} | |
| {"current_steps": 142, "total_steps": 315, "loss": 0.3099, "lr": 2.8849283770913337e-05, "epoch": 2.221782178217822, "percentage": 45.08, "elapsed_time": "0:52:45", "remaining_time": "1:04:16", "throughput": 11614.24, "total_tokens": 36765696} | |
| {"current_steps": 143, "total_steps": 315, "loss": 0.312, "lr": 2.8602736883249503e-05, "epoch": 2.237623762376238, "percentage": 45.4, "elapsed_time": "0:53:08", "remaining_time": "1:03:54", "throughput": 11614.31, "total_tokens": 37027840} | |
| {"current_steps": 144, "total_steps": 315, "loss": 0.3212, "lr": 2.8355831645441388e-05, "epoch": 2.2534653465346537, "percentage": 45.71, "elapsed_time": "0:53:30", "remaining_time": "1:03:32", "throughput": 11614.37, "total_tokens": 37289984} | |
| {"current_steps": 145, "total_steps": 315, "loss": 0.3264, "lr": 2.8108592616187133e-05, "epoch": 2.2693069306930695, "percentage": 46.03, "elapsed_time": "0:53:53", "remaining_time": "1:03:10", "throughput": 11614.4, "total_tokens": 37552128} | |
| {"current_steps": 146, "total_steps": 315, "loss": 0.2986, "lr": 2.78610443873858e-05, "epoch": 2.285148514851485, "percentage": 46.35, "elapsed_time": "0:54:15", "remaining_time": "1:02:48", "throughput": 11614.43, "total_tokens": 37814272} | |
| {"current_steps": 147, "total_steps": 315, "loss": 0.3132, "lr": 2.761321158169134e-05, "epoch": 2.300990099009901, "percentage": 46.67, "elapsed_time": "0:54:38", "remaining_time": "1:02:26", "throughput": 11614.45, "total_tokens": 38076416} | |
| {"current_steps": 148, "total_steps": 315, "loss": 0.3184, "lr": 2.736511885006343e-05, "epoch": 2.3168316831683167, "percentage": 46.98, "elapsed_time": "0:55:00", "remaining_time": "1:02:04", "throughput": 11614.47, "total_tokens": 38338560} | |
| {"current_steps": 149, "total_steps": 315, "loss": 0.295, "lr": 2.7116790869315582e-05, "epoch": 2.3326732673267325, "percentage": 47.3, "elapsed_time": "0:55:23", "remaining_time": "1:01:42", "throughput": 11614.54, "total_tokens": 38600704} | |
| {"current_steps": 150, "total_steps": 315, "loss": 0.3218, "lr": 2.686825233966061e-05, "epoch": 2.3485148514851484, "percentage": 47.62, "elapsed_time": "0:55:46", "remaining_time": "1:01:20", "throughput": 11614.61, "total_tokens": 38862848} | |
| {"current_steps": 151, "total_steps": 315, "loss": 0.3033, "lr": 2.6619527982253794e-05, "epoch": 2.3643564356435642, "percentage": 47.94, "elapsed_time": "0:56:08", "remaining_time": "1:00:58", "throughput": 11614.58, "total_tokens": 39124992} | |
| {"current_steps": 152, "total_steps": 315, "loss": 0.31, "lr": 2.6370642536734004e-05, "epoch": 2.38019801980198, "percentage": 48.25, "elapsed_time": "0:56:31", "remaining_time": "1:00:36", "throughput": 11614.62, "total_tokens": 39387136} | |
| {"current_steps": 153, "total_steps": 315, "loss": 0.2966, "lr": 2.6121620758762877e-05, "epoch": 2.396039603960396, "percentage": 48.57, "elapsed_time": "0:56:53", "remaining_time": "1:00:14", "throughput": 11614.59, "total_tokens": 39649280} | |
| {"current_steps": 154, "total_steps": 315, "loss": 0.3146, "lr": 2.587248741756253e-05, "epoch": 2.411881188118812, "percentage": 48.89, "elapsed_time": "0:57:16", "remaining_time": "0:59:52", "throughput": 11614.65, "total_tokens": 39911424} | |
| {"current_steps": 155, "total_steps": 315, "loss": 0.3063, "lr": 2.5623267293451826e-05, "epoch": 2.4277227722772277, "percentage": 49.21, "elapsed_time": "0:57:38", "remaining_time": "0:59:30", "throughput": 11614.67, "total_tokens": 40173568} | |
| {"current_steps": 156, "total_steps": 315, "loss": 0.2858, "lr": 2.5373985175381594e-05, "epoch": 2.4435643564356435, "percentage": 49.52, "elapsed_time": "0:58:01", "remaining_time": "0:59:08", "throughput": 11614.63, "total_tokens": 40435712} | |
| {"current_steps": 157, "total_steps": 315, "loss": 0.3047, "lr": 2.5124665858468954e-05, "epoch": 2.4594059405940594, "percentage": 49.84, "elapsed_time": "0:58:24", "remaining_time": "0:58:46", "throughput": 11614.56, "total_tokens": 40697856} | |
| {"current_steps": 158, "total_steps": 315, "loss": 0.2995, "lr": 2.4875334141531052e-05, "epoch": 2.4752475247524752, "percentage": 50.16, "elapsed_time": "0:58:46", "remaining_time": "0:58:24", "throughput": 11614.58, "total_tokens": 40960000} | |
| {"current_steps": 159, "total_steps": 315, "loss": 0.3007, "lr": 2.4626014824618415e-05, "epoch": 2.491089108910891, "percentage": 50.48, "elapsed_time": "0:59:09", "remaining_time": "0:58:02", "throughput": 11614.6, "total_tokens": 41222144} | |
| {"current_steps": 160, "total_steps": 315, "loss": 0.3126, "lr": 2.4376732706548183e-05, "epoch": 2.506930693069307, "percentage": 50.79, "elapsed_time": "0:59:31", "remaining_time": "0:57:40", "throughput": 11614.68, "total_tokens": 41484288} | |
| {"current_steps": 161, "total_steps": 315, "loss": 0.3149, "lr": 2.4127512582437485e-05, "epoch": 2.522772277227723, "percentage": 51.11, "elapsed_time": "0:59:54", "remaining_time": "0:57:18", "throughput": 11614.63, "total_tokens": 41746432} | |
| {"current_steps": 162, "total_steps": 315, "loss": 0.3178, "lr": 2.3878379241237136e-05, "epoch": 2.5386138613861386, "percentage": 51.43, "elapsed_time": "1:00:16", "remaining_time": "0:56:55", "throughput": 11614.72, "total_tokens": 42008576} | |
| {"current_steps": 163, "total_steps": 315, "loss": 0.3153, "lr": 2.3629357463265995e-05, "epoch": 2.5544554455445545, "percentage": 51.75, "elapsed_time": "1:00:39", "remaining_time": "0:56:33", "throughput": 11614.75, "total_tokens": 42270720} | |
| {"current_steps": 164, "total_steps": 315, "loss": 0.314, "lr": 2.3380472017746202e-05, "epoch": 2.5702970297029704, "percentage": 52.06, "elapsed_time": "1:01:01", "remaining_time": "0:56:11", "throughput": 11614.73, "total_tokens": 42532864} | |
| {"current_steps": 165, "total_steps": 315, "loss": 0.3121, "lr": 2.3131747660339394e-05, "epoch": 2.586138613861386, "percentage": 52.38, "elapsed_time": "1:01:24", "remaining_time": "0:55:49", "throughput": 11614.7, "total_tokens": 42795008} | |
| {"current_steps": 166, "total_steps": 315, "loss": 0.3112, "lr": 2.288320913068442e-05, "epoch": 2.601980198019802, "percentage": 52.7, "elapsed_time": "1:01:47", "remaining_time": "0:55:27", "throughput": 11614.66, "total_tokens": 43057152} | |
| {"current_steps": 167, "total_steps": 315, "loss": 0.3114, "lr": 2.2634881149936575e-05, "epoch": 2.617821782178218, "percentage": 53.02, "elapsed_time": "1:02:09", "remaining_time": "0:55:05", "throughput": 11614.6, "total_tokens": 43319296} | |
| {"current_steps": 168, "total_steps": 315, "loss": 0.3092, "lr": 2.238678841830867e-05, "epoch": 2.633663366336634, "percentage": 53.33, "elapsed_time": "1:02:32", "remaining_time": "0:54:43", "throughput": 11614.62, "total_tokens": 43581440} | |
| {"current_steps": 169, "total_steps": 315, "loss": 0.3114, "lr": 2.2138955612614207e-05, "epoch": 2.6495049504950496, "percentage": 53.65, "elapsed_time": "1:02:54", "remaining_time": "0:54:21", "throughput": 11614.76, "total_tokens": 43843584} | |
| {"current_steps": 170, "total_steps": 315, "loss": 0.3053, "lr": 2.189140738381288e-05, "epoch": 2.6653465346534655, "percentage": 53.97, "elapsed_time": "1:03:17", "remaining_time": "0:53:58", "throughput": 11614.85, "total_tokens": 44105728} | |
| {"current_steps": 171, "total_steps": 315, "loss": 0.2993, "lr": 2.164416835455862e-05, "epoch": 2.6811881188118813, "percentage": 54.29, "elapsed_time": "1:03:39", "remaining_time": "0:53:36", "throughput": 11614.96, "total_tokens": 44367872} | |
| {"current_steps": 172, "total_steps": 315, "loss": 0.3082, "lr": 2.1397263116750503e-05, "epoch": 2.6970297029702968, "percentage": 54.6, "elapsed_time": "1:04:02", "remaining_time": "0:53:14", "throughput": 11615.02, "total_tokens": 44630016} | |
| {"current_steps": 173, "total_steps": 315, "loss": 0.3042, "lr": 2.115071622908666e-05, "epoch": 2.7128712871287126, "percentage": 54.92, "elapsed_time": "1:04:24", "remaining_time": "0:52:52", "throughput": 11615.1, "total_tokens": 44892160} | |
| {"current_steps": 174, "total_steps": 315, "loss": 0.3083, "lr": 2.090455221462156e-05, "epoch": 2.7287128712871285, "percentage": 55.24, "elapsed_time": "1:04:47", "remaining_time": "0:52:30", "throughput": 11615.1, "total_tokens": 45154304} | |
| {"current_steps": 175, "total_steps": 315, "loss": 0.3202, "lr": 2.0658795558326743e-05, "epoch": 2.7445544554455443, "percentage": 55.56, "elapsed_time": "1:05:10", "remaining_time": "0:52:08", "throughput": 11615.14, "total_tokens": 45416448} | |
| {"current_steps": 176, "total_steps": 315, "loss": 0.3099, "lr": 2.0413470704655445e-05, "epoch": 2.76039603960396, "percentage": 55.87, "elapsed_time": "1:05:32", "remaining_time": "0:51:45", "throughput": 11615.12, "total_tokens": 45678592} | |
| {"current_steps": 177, "total_steps": 315, "loss": 0.3225, "lr": 2.0168602055111173e-05, "epoch": 2.776237623762376, "percentage": 56.19, "elapsed_time": "1:05:55", "remaining_time": "0:51:23", "throughput": 11615.1, "total_tokens": 45940736} | |
| {"current_steps": 178, "total_steps": 315, "loss": 0.3149, "lr": 1.99242139658206e-05, "epoch": 2.792079207920792, "percentage": 56.51, "elapsed_time": "1:06:17", "remaining_time": "0:51:01", "throughput": 11615.15, "total_tokens": 46202880} | |
| {"current_steps": 179, "total_steps": 315, "loss": 0.2952, "lr": 1.9680330745110954e-05, "epoch": 2.8079207920792078, "percentage": 56.83, "elapsed_time": "1:06:40", "remaining_time": "0:50:39", "throughput": 11615.21, "total_tokens": 46465024} | |
| {"current_steps": 180, "total_steps": 315, "loss": 0.3035, "lr": 1.9436976651092144e-05, "epoch": 2.8237623762376236, "percentage": 57.14, "elapsed_time": "1:07:02", "remaining_time": "0:50:17", "throughput": 11615.27, "total_tokens": 46727168} | |
| {"current_steps": 181, "total_steps": 315, "loss": 0.303, "lr": 1.919417588924394e-05, "epoch": 2.8396039603960395, "percentage": 57.46, "elapsed_time": "1:07:25", "remaining_time": "0:49:54", "throughput": 11615.3, "total_tokens": 46989312} | |
| {"current_steps": 182, "total_steps": 315, "loss": 0.3057, "lr": 1.895195261000831e-05, "epoch": 2.8554455445544553, "percentage": 57.78, "elapsed_time": "1:07:48", "remaining_time": "0:49:32", "throughput": 11615.31, "total_tokens": 47251456} | |
| {"current_steps": 183, "total_steps": 315, "loss": 0.3138, "lr": 1.871033090638729e-05, "epoch": 2.871287128712871, "percentage": 58.1, "elapsed_time": "1:08:10", "remaining_time": "0:49:10", "throughput": 11615.17, "total_tokens": 47513600} | |
| {"current_steps": 184, "total_steps": 315, "loss": 0.3164, "lr": 1.8469334811546542e-05, "epoch": 2.887128712871287, "percentage": 58.41, "elapsed_time": "1:08:33", "remaining_time": "0:48:48", "throughput": 11615.2, "total_tokens": 47775744} | |
| {"current_steps": 185, "total_steps": 315, "loss": 0.3178, "lr": 1.8228988296424877e-05, "epoch": 2.902970297029703, "percentage": 58.73, "elapsed_time": "1:08:55", "remaining_time": "0:48:26", "throughput": 11615.19, "total_tokens": 48037888} | |
| {"current_steps": 186, "total_steps": 315, "loss": 0.3135, "lr": 1.7989315267349936e-05, "epoch": 2.9188118811881187, "percentage": 59.05, "elapsed_time": "1:09:18", "remaining_time": "0:48:04", "throughput": 11615.12, "total_tokens": 48300032} | |
| {"current_steps": 187, "total_steps": 315, "loss": 0.3068, "lr": 1.7750339563660347e-05, "epoch": 2.9346534653465346, "percentage": 59.37, "elapsed_time": "1:09:40", "remaining_time": "0:47:41", "throughput": 11615.13, "total_tokens": 48562176} | |
| {"current_steps": 188, "total_steps": 315, "loss": 0.3052, "lr": 1.751208495533452e-05, "epoch": 2.9504950495049505, "percentage": 59.68, "elapsed_time": "1:10:03", "remaining_time": "0:47:19", "throughput": 11615.19, "total_tokens": 48824320} | |
| {"current_steps": 189, "total_steps": 315, "loss": 0.3192, "lr": 1.7274575140626318e-05, "epoch": 2.9663366336633663, "percentage": 60.0, "elapsed_time": "1:10:26", "remaining_time": "0:46:57", "throughput": 11615.23, "total_tokens": 49086464} | |
| {"current_steps": 190, "total_steps": 315, "loss": 0.313, "lr": 1.7037833743707892e-05, "epoch": 2.982178217821782, "percentage": 60.32, "elapsed_time": "1:10:48", "remaining_time": "0:46:35", "throughput": 11615.23, "total_tokens": 49348608} | |
| {"current_steps": 191, "total_steps": 315, "loss": 0.31, "lr": 1.6801884312319895e-05, "epoch": 2.998019801980198, "percentage": 60.63, "elapsed_time": "1:11:11", "remaining_time": "0:46:12", "throughput": 11615.25, "total_tokens": 49610752} | |
| {"current_steps": 192, "total_steps": 315, "loss": 0.2615, "lr": 1.6566750315429254e-05, "epoch": 3.0, "percentage": 60.95, "elapsed_time": "1:11:13", "remaining_time": "0:45:37", "throughput": 11615.38, "total_tokens": 49643520} | |
| {"current_steps": 193, "total_steps": 315, "loss": 0.0973, "lr": 1.633245514089482e-05, "epoch": 3.015841584158416, "percentage": 61.27, "elapsed_time": "1:11:36", "remaining_time": "0:45:15", "throughput": 11615.37, "total_tokens": 49905664} | |
| {"current_steps": 194, "total_steps": 315, "loss": 0.0962, "lr": 1.609902209314108e-05, "epoch": 3.0316831683168317, "percentage": 61.59, "elapsed_time": "1:11:59", "remaining_time": "0:44:53", "throughput": 11615.41, "total_tokens": 50167808} | |
| {"current_steps": 195, "total_steps": 315, "loss": 0.0855, "lr": 1.5866474390840125e-05, "epoch": 3.0475247524752476, "percentage": 61.9, "elapsed_time": "1:12:21", "remaining_time": "0:44:31", "throughput": 11615.43, "total_tokens": 50429952} | |
| {"current_steps": 196, "total_steps": 315, "loss": 0.077, "lr": 1.56348351646022e-05, "epoch": 3.0633663366336634, "percentage": 62.22, "elapsed_time": "1:12:44", "remaining_time": "0:44:09", "throughput": 11615.5, "total_tokens": 50692096} | |
| {"current_steps": 197, "total_steps": 315, "loss": 0.0784, "lr": 1.5404127454674995e-05, "epoch": 3.0792079207920793, "percentage": 62.54, "elapsed_time": "1:13:06", "remaining_time": "0:43:47", "throughput": 11615.61, "total_tokens": 50954240} | |
| {"current_steps": 198, "total_steps": 315, "loss": 0.0791, "lr": 1.5174374208651912e-05, "epoch": 3.095049504950495, "percentage": 62.86, "elapsed_time": "1:13:29", "remaining_time": "0:43:25", "throughput": 11615.64, "total_tokens": 51216384} | |
| {"current_steps": 199, "total_steps": 315, "loss": 0.0757, "lr": 1.4945598279189565e-05, "epoch": 3.110891089108911, "percentage": 63.17, "elapsed_time": "1:13:51", "remaining_time": "0:43:03", "throughput": 11615.65, "total_tokens": 51478528} | |
| {"current_steps": 200, "total_steps": 315, "loss": 0.0871, "lr": 1.4717822421734718e-05, "epoch": 3.126732673267327, "percentage": 63.49, "elapsed_time": "1:14:14", "remaining_time": "0:42:41", "throughput": 11615.65, "total_tokens": 51740672} | |
| {"current_steps": 201, "total_steps": 315, "loss": 0.0765, "lr": 1.4491069292260868e-05, "epoch": 3.1425742574257427, "percentage": 63.81, "elapsed_time": "1:14:36", "remaining_time": "0:42:19", "throughput": 11615.59, "total_tokens": 52002816} | |
| {"current_steps": 202, "total_steps": 315, "loss": 0.0735, "lr": 1.4265361445014768e-05, "epoch": 3.1584158415841586, "percentage": 64.13, "elapsed_time": "1:14:59", "remaining_time": "0:41:57", "throughput": 11615.58, "total_tokens": 52264960} | |
| {"current_steps": 203, "total_steps": 315, "loss": 0.0745, "lr": 1.4040721330273062e-05, "epoch": 3.1742574257425744, "percentage": 64.44, "elapsed_time": "1:15:22", "remaining_time": "0:41:35", "throughput": 11614.87, "total_tokens": 52527104} | |
| {"current_steps": 204, "total_steps": 315, "loss": 0.0821, "lr": 1.3817171292109183e-05, "epoch": 3.1900990099009903, "percentage": 64.76, "elapsed_time": "1:15:45", "remaining_time": "0:41:13", "throughput": 11614.19, "total_tokens": 52789248} | |
| {"current_steps": 205, "total_steps": 315, "loss": 0.0754, "lr": 1.3594733566170926e-05, "epoch": 3.205940594059406, "percentage": 65.08, "elapsed_time": "1:16:07", "remaining_time": "0:40:51", "throughput": 11614.17, "total_tokens": 53051392} | |
| {"current_steps": 206, "total_steps": 315, "loss": 0.0709, "lr": 1.337343027746874e-05, "epoch": 3.221782178217822, "percentage": 65.4, "elapsed_time": "1:16:30", "remaining_time": "0:40:28", "throughput": 11614.2, "total_tokens": 53313536} | |
| {"current_steps": 207, "total_steps": 315, "loss": 0.0681, "lr": 1.3153283438175034e-05, "epoch": 3.237623762376238, "percentage": 65.71, "elapsed_time": "1:16:53", "remaining_time": "0:40:06", "throughput": 11613.11, "total_tokens": 53575680} | |
| {"current_steps": 208, "total_steps": 315, "loss": 0.0754, "lr": 1.2934314945434734e-05, "epoch": 3.2534653465346537, "percentage": 66.03, "elapsed_time": "1:17:15", "remaining_time": "0:39:44", "throughput": 11613.2, "total_tokens": 53837824} | |
| {"current_steps": 209, "total_steps": 315, "loss": 0.0675, "lr": 1.271654657918722e-05, "epoch": 3.2693069306930695, "percentage": 66.35, "elapsed_time": "1:17:38", "remaining_time": "0:39:22", "throughput": 11613.21, "total_tokens": 54099968} | |
| {"current_steps": 210, "total_steps": 315, "loss": 0.0683, "lr": 1.2500000000000006e-05, "epoch": 3.285148514851485, "percentage": 66.67, "elapsed_time": "1:18:01", "remaining_time": "0:39:00", "throughput": 11613.26, "total_tokens": 54362112} | |
| {"current_steps": 211, "total_steps": 315, "loss": 0.0734, "lr": 1.2284696746914216e-05, "epoch": 3.300990099009901, "percentage": 66.98, "elapsed_time": "1:18:23", "remaining_time": "0:38:38", "throughput": 11613.31, "total_tokens": 54624256} | |
| {"current_steps": 212, "total_steps": 315, "loss": 0.0678, "lr": 1.2070658235302181e-05, "epoch": 3.3168316831683167, "percentage": 67.3, "elapsed_time": "1:18:46", "remaining_time": "0:38:16", "throughput": 11613.37, "total_tokens": 54886400} | |
| {"current_steps": 213, "total_steps": 315, "loss": 0.0662, "lr": 1.185790575473738e-05, "epoch": 3.3326732673267325, "percentage": 67.62, "elapsed_time": "1:19:08", "remaining_time": "0:37:54", "throughput": 11613.47, "total_tokens": 55148544} | |
| {"current_steps": 214, "total_steps": 315, "loss": 0.0746, "lr": 1.1646460466876783e-05, "epoch": 3.3485148514851484, "percentage": 67.94, "elapsed_time": "1:19:31", "remaining_time": "0:37:31", "throughput": 11613.55, "total_tokens": 55410688} | |
| {"current_steps": 215, "total_steps": 315, "loss": 0.0747, "lr": 1.1436343403356017e-05, "epoch": 3.3643564356435642, "percentage": 68.25, "elapsed_time": "1:19:53", "remaining_time": "0:37:09", "throughput": 11613.6, "total_tokens": 55672832} | |
| {"current_steps": 216, "total_steps": 315, "loss": 0.0647, "lr": 1.122757546369744e-05, "epoch": 3.38019801980198, "percentage": 68.57, "elapsed_time": "1:20:16", "remaining_time": "0:36:47", "throughput": 11613.59, "total_tokens": 55934976} | |
| {"current_steps": 217, "total_steps": 315, "loss": 0.0666, "lr": 1.1020177413231334e-05, "epoch": 3.396039603960396, "percentage": 68.89, "elapsed_time": "1:20:38", "remaining_time": "0:36:25", "throughput": 11613.63, "total_tokens": 56197120} | |
| {"current_steps": 218, "total_steps": 315, "loss": 0.0678, "lr": 1.0814169881030459e-05, "epoch": 3.411881188118812, "percentage": 69.21, "elapsed_time": "1:21:01", "remaining_time": "0:36:03", "throughput": 11613.67, "total_tokens": 56459264} | |
| {"current_steps": 219, "total_steps": 315, "loss": 0.068, "lr": 1.0609573357858166e-05, "epoch": 3.4277227722772277, "percentage": 69.52, "elapsed_time": "1:21:24", "remaining_time": "0:35:40", "throughput": 11613.68, "total_tokens": 56721408} | |
| {"current_steps": 220, "total_steps": 315, "loss": 0.069, "lr": 1.0406408194130259e-05, "epoch": 3.4435643564356435, "percentage": 69.84, "elapsed_time": "1:21:46", "remaining_time": "0:35:18", "throughput": 11613.59, "total_tokens": 56983552} | |
| {"current_steps": 221, "total_steps": 315, "loss": 0.0602, "lr": 1.0204694597890812e-05, "epoch": 3.4594059405940594, "percentage": 70.16, "elapsed_time": "1:22:09", "remaining_time": "0:34:56", "throughput": 11613.6, "total_tokens": 57245696} | |
| {"current_steps": 222, "total_steps": 315, "loss": 0.0698, "lr": 1.0004452632802158e-05, "epoch": 3.4752475247524752, "percentage": 70.48, "elapsed_time": "1:22:31", "remaining_time": "0:34:34", "throughput": 11613.54, "total_tokens": 57507840} | |
| {"current_steps": 223, "total_steps": 315, "loss": 0.065, "lr": 9.805702216149251e-06, "epoch": 3.491089108910891, "percentage": 70.79, "elapsed_time": "1:22:54", "remaining_time": "0:34:12", "throughput": 11613.51, "total_tokens": 57769984} | |
| {"current_steps": 224, "total_steps": 315, "loss": 0.0582, "lr": 9.608463116858542e-06, "epoch": 3.506930693069307, "percentage": 71.11, "elapsed_time": "1:23:16", "remaining_time": "0:33:50", "throughput": 11613.52, "total_tokens": 58032128} | |
| {"current_steps": 225, "total_steps": 315, "loss": 0.063, "lr": 9.412754953531663e-06, "epoch": 3.522772277227723, "percentage": 71.43, "elapsed_time": "1:23:39", "remaining_time": "0:33:27", "throughput": 11613.54, "total_tokens": 58294272} | |
| {"current_steps": 226, "total_steps": 315, "loss": 0.0686, "lr": 9.21859719249403e-06, "epoch": 3.5386138613861386, "percentage": 71.75, "elapsed_time": "1:24:02", "remaining_time": "0:33:05", "throughput": 11613.56, "total_tokens": 58556416} | |
| {"current_steps": 227, "total_steps": 315, "loss": 0.0616, "lr": 9.026009145858607e-06, "epoch": 3.5544554455445545, "percentage": 72.06, "elapsed_time": "1:24:24", "remaining_time": "0:32:43", "throughput": 11613.53, "total_tokens": 58818560} | |
| {"current_steps": 228, "total_steps": 315, "loss": 0.06, "lr": 8.835009969605012e-06, "epoch": 3.5702970297029704, "percentage": 72.38, "elapsed_time": "1:24:47", "remaining_time": "0:32:21", "throughput": 11613.55, "total_tokens": 59080704} | |
| {"current_steps": 229, "total_steps": 315, "loss": 0.0613, "lr": 8.645618661674142e-06, "epoch": 3.586138613861386, "percentage": 72.7, "elapsed_time": "1:25:09", "remaining_time": "0:31:58", "throughput": 11613.55, "total_tokens": 59342848} | |
| {"current_steps": 230, "total_steps": 315, "loss": 0.0616, "lr": 8.45785406007852e-06, "epoch": 3.601980198019802, "percentage": 73.02, "elapsed_time": "1:25:32", "remaining_time": "0:31:36", "throughput": 11613.49, "total_tokens": 59604992} | |
| {"current_steps": 231, "total_steps": 315, "loss": 0.0644, "lr": 8.271734841028553e-06, "epoch": 3.617821782178218, "percentage": 73.33, "elapsed_time": "1:25:54", "remaining_time": "0:31:14", "throughput": 11613.45, "total_tokens": 59867136} | |
| {"current_steps": 232, "total_steps": 315, "loss": 0.0637, "lr": 8.08727951707487e-06, "epoch": 3.633663366336634, "percentage": 73.65, "elapsed_time": "1:26:17", "remaining_time": "0:30:52", "throughput": 11613.45, "total_tokens": 60129280} | |
| {"current_steps": 233, "total_steps": 315, "loss": 0.0604, "lr": 7.904506435266998e-06, "epoch": 3.6495049504950496, "percentage": 73.97, "elapsed_time": "1:26:40", "remaining_time": "0:30:30", "throughput": 11613.36, "total_tokens": 60391424} | |
| {"current_steps": 234, "total_steps": 315, "loss": 0.0623, "lr": 7.723433775328384e-06, "epoch": 3.6653465346534655, "percentage": 74.29, "elapsed_time": "1:27:02", "remaining_time": "0:30:07", "throughput": 11613.39, "total_tokens": 60653568} | |
| {"current_steps": 235, "total_steps": 315, "loss": 0.0621, "lr": 7.5440795478481815e-06, "epoch": 3.6811881188118813, "percentage": 74.6, "elapsed_time": "1:27:25", "remaining_time": "0:29:45", "throughput": 11613.34, "total_tokens": 60915712} | |
| {"current_steps": 236, "total_steps": 315, "loss": 0.0599, "lr": 7.366461592489782e-06, "epoch": 3.6970297029702968, "percentage": 74.92, "elapsed_time": "1:27:47", "remaining_time": "0:29:23", "throughput": 11613.37, "total_tokens": 61177856} | |
| {"current_steps": 237, "total_steps": 315, "loss": 0.0616, "lr": 7.190597576216385e-06, "epoch": 3.7128712871287126, "percentage": 75.24, "elapsed_time": "1:28:10", "remaining_time": "0:29:01", "throughput": 11613.4, "total_tokens": 61440000} | |
| {"current_steps": 238, "total_steps": 315, "loss": 0.0569, "lr": 7.016504991533726e-06, "epoch": 3.7287128712871285, "percentage": 75.56, "elapsed_time": "1:28:32", "remaining_time": "0:28:38", "throughput": 11613.44, "total_tokens": 61702144} | |
| {"current_steps": 239, "total_steps": 315, "loss": 0.0601, "lr": 6.844201154750177e-06, "epoch": 3.7445544554455443, "percentage": 75.87, "elapsed_time": "1:28:55", "remaining_time": "0:28:16", "throughput": 11613.48, "total_tokens": 61964288} | |
| {"current_steps": 240, "total_steps": 315, "loss": 0.0524, "lr": 6.673703204254347e-06, "epoch": 3.76039603960396, "percentage": 76.19, "elapsed_time": "1:29:18", "remaining_time": "0:27:54", "throughput": 11613.35, "total_tokens": 62226432} | |
| {"current_steps": 241, "total_steps": 315, "loss": 0.0583, "lr": 6.505028098810406e-06, "epoch": 3.776237623762376, "percentage": 76.51, "elapsed_time": "1:29:40", "remaining_time": "0:27:32", "throughput": 11613.39, "total_tokens": 62488576} | |
| {"current_steps": 242, "total_steps": 315, "loss": 0.0553, "lr": 6.338192615871247e-06, "epoch": 3.792079207920792, "percentage": 76.83, "elapsed_time": "1:30:03", "remaining_time": "0:27:09", "throughput": 11613.35, "total_tokens": 62750720} | |
| {"current_steps": 243, "total_steps": 315, "loss": 0.0599, "lr": 6.173213349909729e-06, "epoch": 3.8079207920792078, "percentage": 77.14, "elapsed_time": "1:30:25", "remaining_time": "0:26:47", "throughput": 11613.38, "total_tokens": 63012864} | |
| {"current_steps": 244, "total_steps": 315, "loss": 0.0614, "lr": 6.010106710768052e-06, "epoch": 3.8237623762376236, "percentage": 77.46, "elapsed_time": "1:30:48", "remaining_time": "0:26:25", "throughput": 11613.31, "total_tokens": 63275008} | |
| {"current_steps": 245, "total_steps": 315, "loss": 0.0562, "lr": 5.848888922025553e-06, "epoch": 3.8396039603960395, "percentage": 77.78, "elapsed_time": "1:31:11", "remaining_time": "0:26:03", "throughput": 11613.3, "total_tokens": 63537152} | |
| {"current_steps": 246, "total_steps": 315, "loss": 0.0565, "lr": 5.689576019385015e-06, "epoch": 3.8554455445544553, "percentage": 78.1, "elapsed_time": "1:31:33", "remaining_time": "0:25:40", "throughput": 11613.35, "total_tokens": 63799296} | |
| {"current_steps": 247, "total_steps": 315, "loss": 0.0671, "lr": 5.532183849077652e-06, "epoch": 3.871287128712871, "percentage": 78.41, "elapsed_time": "1:31:56", "remaining_time": "0:25:18", "throughput": 11613.4, "total_tokens": 64061440} | |
| {"current_steps": 248, "total_steps": 315, "loss": 0.0581, "lr": 5.376728066286943e-06, "epoch": 3.887128712871287, "percentage": 78.73, "elapsed_time": "1:32:18", "remaining_time": "0:24:56", "throughput": 11613.42, "total_tokens": 64323584} | |
| {"current_steps": 249, "total_steps": 315, "loss": 0.0584, "lr": 5.223224133591476e-06, "epoch": 3.902970297029703, "percentage": 79.05, "elapsed_time": "1:32:41", "remaining_time": "0:24:34", "throughput": 11613.45, "total_tokens": 64585728} | |
| {"current_steps": 250, "total_steps": 315, "loss": 0.0563, "lr": 5.071687319426946e-06, "epoch": 3.9188118811881187, "percentage": 79.37, "elapsed_time": "1:33:03", "remaining_time": "0:24:11", "throughput": 11613.48, "total_tokens": 64847872} | |
| {"current_steps": 251, "total_steps": 315, "loss": 0.0601, "lr": 4.922132696567464e-06, "epoch": 3.9346534653465346, "percentage": 79.68, "elapsed_time": "1:33:26", "remaining_time": "0:23:49", "throughput": 11613.44, "total_tokens": 65110016} | |
| {"current_steps": 252, "total_steps": 315, "loss": 0.0538, "lr": 4.7745751406263165e-06, "epoch": 3.9504950495049505, "percentage": 80.0, "elapsed_time": "1:33:49", "remaining_time": "0:23:27", "throughput": 11613.43, "total_tokens": 65372160} | |
| {"current_steps": 253, "total_steps": 315, "loss": 0.0587, "lr": 4.629029328576381e-06, "epoch": 3.9663366336633663, "percentage": 80.32, "elapsed_time": "1:34:11", "remaining_time": "0:23:04", "throughput": 11613.45, "total_tokens": 65634304} | |
| {"current_steps": 254, "total_steps": 315, "loss": 0.0531, "lr": 4.4855097372902135e-06, "epoch": 3.982178217821782, "percentage": 80.63, "elapsed_time": "1:34:34", "remaining_time": "0:22:42", "throughput": 11613.47, "total_tokens": 65896448} | |
| {"current_steps": 255, "total_steps": 315, "loss": 0.0605, "lr": 4.344030642100133e-06, "epoch": 3.998019801980198, "percentage": 80.95, "elapsed_time": "1:34:56", "remaining_time": "0:22:20", "throughput": 11613.46, "total_tokens": 66158592} | |
| {"current_steps": 256, "total_steps": 315, "loss": 0.0431, "lr": 4.204606115378282e-06, "epoch": 4.0, "percentage": 81.27, "elapsed_time": "1:34:59", "remaining_time": "0:21:53", "throughput": 11613.55, "total_tokens": 66191360} | |
| {"current_steps": 257, "total_steps": 315, "loss": 0.0167, "lr": 4.067250025136921e-06, "epoch": 4.015841584158416, "percentage": 81.59, "elapsed_time": "1:35:22", "remaining_time": "0:21:31", "throughput": 11613.52, "total_tokens": 66453504} | |
| {"current_steps": 258, "total_steps": 315, "loss": 0.0163, "lr": 3.931976033649021e-06, "epoch": 4.031683168316832, "percentage": 81.9, "elapsed_time": "1:35:44", "remaining_time": "0:21:09", "throughput": 11613.45, "total_tokens": 66715648} | |
| {"current_steps": 259, "total_steps": 315, "loss": 0.0142, "lr": 3.798797596089351e-06, "epoch": 4.047524752475248, "percentage": 82.22, "elapsed_time": "1:36:07", "remaining_time": "0:20:46", "throughput": 11613.44, "total_tokens": 66977792} | |
| {"current_steps": 260, "total_steps": 315, "loss": 0.0156, "lr": 3.66772795919611e-06, "epoch": 4.063366336633663, "percentage": 82.54, "elapsed_time": "1:36:29", "remaining_time": "0:20:24", "throughput": 11613.39, "total_tokens": 67239936} | |
| {"current_steps": 261, "total_steps": 315, "loss": 0.0143, "lr": 3.5387801599533475e-06, "epoch": 4.079207920792079, "percentage": 82.86, "elapsed_time": "1:36:52", "remaining_time": "0:20:02", "throughput": 11613.37, "total_tokens": 67502080} | |
| {"current_steps": 262, "total_steps": 315, "loss": 0.014, "lr": 3.41196702429423e-06, "epoch": 4.095049504950495, "percentage": 83.17, "elapsed_time": "1:37:15", "remaining_time": "0:19:40", "throughput": 11613.34, "total_tokens": 67764224} | |
| {"current_steps": 263, "total_steps": 315, "loss": 0.0131, "lr": 3.2873011658252796e-06, "epoch": 4.110891089108911, "percentage": 83.49, "elapsed_time": "1:37:37", "remaining_time": "0:19:18", "throughput": 11613.3, "total_tokens": 68026368} | |
| {"current_steps": 264, "total_steps": 315, "loss": 0.012, "lr": 3.164794984571759e-06, "epoch": 4.126732673267327, "percentage": 83.81, "elapsed_time": "1:38:00", "remaining_time": "0:18:55", "throughput": 11613.3, "total_tokens": 68288512} | |
| {"current_steps": 265, "total_steps": 315, "loss": 0.0126, "lr": 3.044460665744284e-06, "epoch": 4.142574257425743, "percentage": 84.13, "elapsed_time": "1:38:22", "remaining_time": "0:18:33", "throughput": 11613.29, "total_tokens": 68550656} | |
| {"current_steps": 266, "total_steps": 315, "loss": 0.0129, "lr": 2.9263101785268254e-06, "epoch": 4.158415841584159, "percentage": 84.44, "elapsed_time": "1:38:45", "remaining_time": "0:18:11", "throughput": 11613.31, "total_tokens": 68812800} | |
| {"current_steps": 267, "total_steps": 315, "loss": 0.0132, "lr": 2.8103552748861476e-06, "epoch": 4.174257425742574, "percentage": 84.76, "elapsed_time": "1:39:07", "remaining_time": "0:17:49", "throughput": 11613.35, "total_tokens": 69074944} | |
| {"current_steps": 268, "total_steps": 315, "loss": 0.0107, "lr": 2.6966074884029164e-06, "epoch": 4.19009900990099, "percentage": 85.08, "elapsed_time": "1:39:30", "remaining_time": "0:17:27", "throughput": 11613.38, "total_tokens": 69337088} | |
| {"current_steps": 269, "total_steps": 315, "loss": 0.0113, "lr": 2.58507813312448e-06, "epoch": 4.205940594059406, "percentage": 85.4, "elapsed_time": "1:39:52", "remaining_time": "0:17:04", "throughput": 11613.42, "total_tokens": 69599232} | |
| {"current_steps": 270, "total_steps": 315, "loss": 0.0109, "lr": 2.475778302439524e-06, "epoch": 4.221782178217822, "percentage": 85.71, "elapsed_time": "1:40:15", "remaining_time": "0:16:42", "throughput": 11613.47, "total_tokens": 69861376} | |
| {"current_steps": 271, "total_steps": 315, "loss": 0.0112, "lr": 2.3687188679746315e-06, "epoch": 4.237623762376238, "percentage": 86.03, "elapsed_time": "1:40:38", "remaining_time": "0:16:20", "throughput": 11613.49, "total_tokens": 70123520} | |
| {"current_steps": 272, "total_steps": 315, "loss": 0.0113, "lr": 2.2639104785129455e-06, "epoch": 4.253465346534654, "percentage": 86.35, "elapsed_time": "1:41:00", "remaining_time": "0:15:58", "throughput": 11613.4, "total_tokens": 70385664} | |
| {"current_steps": 273, "total_steps": 315, "loss": 0.0108, "lr": 2.1613635589349756e-06, "epoch": 4.2693069306930695, "percentage": 86.67, "elapsed_time": "1:41:23", "remaining_time": "0:15:35", "throughput": 11613.42, "total_tokens": 70647808} | |
| {"current_steps": 274, "total_steps": 315, "loss": 0.0103, "lr": 2.0610883091816525e-06, "epoch": 4.285148514851485, "percentage": 86.98, "elapsed_time": "1:41:45", "remaining_time": "0:15:13", "throughput": 11613.46, "total_tokens": 70909952} | |
| {"current_steps": 275, "total_steps": 315, "loss": 0.0115, "lr": 1.9630947032398067e-06, "epoch": 4.300990099009901, "percentage": 87.3, "elapsed_time": "1:42:08", "remaining_time": "0:14:51", "throughput": 11613.46, "total_tokens": 71172096} | |
| {"current_steps": 276, "total_steps": 315, "loss": 0.012, "lr": 1.8673924881500826e-06, "epoch": 4.316831683168317, "percentage": 87.62, "elapsed_time": "1:42:31", "remaining_time": "0:14:29", "throughput": 11613.42, "total_tokens": 71434240} | |
| {"current_steps": 277, "total_steps": 315, "loss": 0.0106, "lr": 1.773991183037435e-06, "epoch": 4.332673267326733, "percentage": 87.94, "elapsed_time": "1:42:53", "remaining_time": "0:14:06", "throughput": 11613.45, "total_tokens": 71696384} | |
| {"current_steps": 278, "total_steps": 315, "loss": 0.0125, "lr": 1.6829000781643094e-06, "epoch": 4.348514851485149, "percentage": 88.25, "elapsed_time": "1:43:16", "remaining_time": "0:13:44", "throughput": 11613.48, "total_tokens": 71958528} | |
| {"current_steps": 279, "total_steps": 315, "loss": 0.0109, "lr": 1.59412823400657e-06, "epoch": 4.364356435643565, "percentage": 88.57, "elapsed_time": "1:43:38", "remaining_time": "0:13:22", "throughput": 11613.48, "total_tokens": 72220672} | |
| {"current_steps": 280, "total_steps": 315, "loss": 0.0114, "lr": 1.5076844803522922e-06, "epoch": 4.3801980198019805, "percentage": 88.89, "elapsed_time": "1:44:01", "remaining_time": "0:13:00", "throughput": 11613.57, "total_tokens": 72482816} | |
| {"current_steps": 281, "total_steps": 315, "loss": 0.0104, "lr": 1.4235774154234855e-06, "epoch": 4.396039603960396, "percentage": 89.21, "elapsed_time": "1:44:23", "remaining_time": "0:12:37", "throughput": 11613.59, "total_tokens": 72744960} | |
| {"current_steps": 282, "total_steps": 315, "loss": 0.0109, "lr": 1.3418154050208936e-06, "epoch": 4.411881188118812, "percentage": 89.52, "elapsed_time": "1:44:46", "remaining_time": "0:12:15", "throughput": 11613.66, "total_tokens": 73007104} | |
| {"current_steps": 283, "total_steps": 315, "loss": 0.0115, "lr": 1.2624065816918413e-06, "epoch": 4.427722772277228, "percentage": 89.84, "elapsed_time": "1:45:08", "remaining_time": "0:11:53", "throughput": 11613.72, "total_tokens": 73269248} | |
| {"current_steps": 284, "total_steps": 315, "loss": 0.0108, "lr": 1.1853588439213442e-06, "epoch": 4.443564356435644, "percentage": 90.16, "elapsed_time": "1:45:31", "remaining_time": "0:11:31", "throughput": 11613.78, "total_tokens": 73531392} | |
| {"current_steps": 285, "total_steps": 315, "loss": 0.0103, "lr": 1.1106798553464804e-06, "epoch": 4.45940594059406, "percentage": 90.48, "elapsed_time": "1:45:53", "remaining_time": "0:11:08", "throughput": 11613.81, "total_tokens": 73793536} | |
| {"current_steps": 286, "total_steps": 315, "loss": 0.0107, "lr": 1.038377043994107e-06, "epoch": 4.475247524752476, "percentage": 90.79, "elapsed_time": "1:46:16", "remaining_time": "0:10:46", "throughput": 11613.82, "total_tokens": 74055680} | |
| {"current_steps": 287, "total_steps": 315, "loss": 0.0122, "lr": 9.684576015420278e-07, "epoch": 4.4910891089108915, "percentage": 91.11, "elapsed_time": "1:46:39", "remaining_time": "0:10:24", "throughput": 11613.87, "total_tokens": 74317824} | |
| {"current_steps": 288, "total_steps": 315, "loss": 0.0109, "lr": 9.009284826036691e-07, "epoch": 4.506930693069307, "percentage": 91.43, "elapsed_time": "1:47:01", "remaining_time": "0:10:02", "throughput": 11613.88, "total_tokens": 74579968} | |
| {"current_steps": 289, "total_steps": 315, "loss": 0.0103, "lr": 8.357964040363209e-07, "epoch": 4.522772277227723, "percentage": 91.75, "elapsed_time": "1:47:24", "remaining_time": "0:09:39", "throughput": 11613.87, "total_tokens": 74842112} | |
| {"current_steps": 290, "total_steps": 315, "loss": 0.011, "lr": 7.730678442730538e-07, "epoch": 4.538613861386139, "percentage": 92.06, "elapsed_time": "1:47:46", "remaining_time": "0:09:17", "throughput": 11613.88, "total_tokens": 75104256} | |
| {"current_steps": 291, "total_steps": 315, "loss": 0.0103, "lr": 7.127490426783123e-07, "epoch": 4.554455445544555, "percentage": 92.38, "elapsed_time": "1:48:09", "remaining_time": "0:08:55", "throughput": 11613.92, "total_tokens": 75366400} | |
| {"current_steps": 292, "total_steps": 315, "loss": 0.0104, "lr": 6.54845998927342e-07, "epoch": 4.57029702970297, "percentage": 92.7, "elapsed_time": "1:48:31", "remaining_time": "0:08:32", "throughput": 11613.91, "total_tokens": 75628544} | |
| {"current_steps": 293, "total_steps": 315, "loss": 0.0097, "lr": 5.993644724093888e-07, "epoch": 4.586138613861386, "percentage": 93.02, "elapsed_time": "1:48:54", "remaining_time": "0:08:10", "throughput": 11613.94, "total_tokens": 75890688} | |
| {"current_steps": 294, "total_steps": 315, "loss": 0.0105, "lr": 5.463099816548579e-07, "epoch": 4.601980198019802, "percentage": 93.33, "elapsed_time": "1:49:17", "remaining_time": "0:07:48", "throughput": 11613.94, "total_tokens": 76152832} | |
| {"current_steps": 295, "total_steps": 315, "loss": 0.0096, "lr": 4.956878037864043e-07, "epoch": 4.6178217821782175, "percentage": 93.65, "elapsed_time": "1:49:39", "remaining_time": "0:07:26", "throughput": 11613.96, "total_tokens": 76414976} | |
| {"current_steps": 296, "total_steps": 315, "loss": 0.0113, "lr": 4.475029739940295e-07, "epoch": 4.633663366336633, "percentage": 93.97, "elapsed_time": "1:50:02", "remaining_time": "0:07:03", "throughput": 11613.91, "total_tokens": 76677120} | |
| {"current_steps": 297, "total_steps": 315, "loss": 0.0113, "lr": 4.0176028503425835e-07, "epoch": 4.649504950495049, "percentage": 94.29, "elapsed_time": "1:50:24", "remaining_time": "0:06:41", "throughput": 11613.86, "total_tokens": 76939264} | |
| {"current_steps": 298, "total_steps": 315, "loss": 0.0115, "lr": 3.5846428675342657e-07, "epoch": 4.665346534653465, "percentage": 94.6, "elapsed_time": "1:50:47", "remaining_time": "0:06:19", "throughput": 11613.84, "total_tokens": 77201408} | |
| {"current_steps": 299, "total_steps": 315, "loss": 0.0097, "lr": 3.1761928563510955e-07, "epoch": 4.681188118811881, "percentage": 94.92, "elapsed_time": "1:51:09", "remaining_time": "0:05:56", "throughput": 11613.83, "total_tokens": 77463552} | |
| {"current_steps": 300, "total_steps": 315, "loss": 0.0104, "lr": 2.7922934437178695e-07, "epoch": 4.697029702970297, "percentage": 95.24, "elapsed_time": "1:51:32", "remaining_time": "0:05:34", "throughput": 11613.83, "total_tokens": 77725696} | |
| {"current_steps": 301, "total_steps": 315, "loss": 0.011, "lr": 2.4329828146074095e-07, "epoch": 4.712871287128713, "percentage": 95.56, "elapsed_time": "1:51:55", "remaining_time": "0:05:12", "throughput": 11613.83, "total_tokens": 77987840} | |
| {"current_steps": 302, "total_steps": 315, "loss": 0.0095, "lr": 2.0982967082424365e-07, "epoch": 4.7287128712871285, "percentage": 95.87, "elapsed_time": "1:52:17", "remaining_time": "0:04:50", "throughput": 11613.83, "total_tokens": 78249984} | |
| {"current_steps": 303, "total_steps": 315, "loss": 0.0105, "lr": 1.7882684145406614e-07, "epoch": 4.744554455445544, "percentage": 96.19, "elapsed_time": "1:52:40", "remaining_time": "0:04:27", "throughput": 11613.89, "total_tokens": 78512128} | |
| {"current_steps": 304, "total_steps": 315, "loss": 0.0099, "lr": 1.5029287708036854e-07, "epoch": 4.76039603960396, "percentage": 96.51, "elapsed_time": "1:53:02", "remaining_time": "0:04:05", "throughput": 11613.93, "total_tokens": 78774272} | |
| {"current_steps": 305, "total_steps": 315, "loss": 0.0106, "lr": 1.2423061586496477e-07, "epoch": 4.776237623762376, "percentage": 96.83, "elapsed_time": "1:53:25", "remaining_time": "0:03:43", "throughput": 11613.96, "total_tokens": 79036416} | |
| {"current_steps": 306, "total_steps": 315, "loss": 0.0103, "lr": 1.006426501190233e-07, "epoch": 4.792079207920792, "percentage": 97.14, "elapsed_time": "1:53:47", "remaining_time": "0:03:20", "throughput": 11613.99, "total_tokens": 79298560} | |
| {"current_steps": 307, "total_steps": 315, "loss": 0.0107, "lr": 7.953132604522628e-08, "epoch": 4.807920792079208, "percentage": 97.46, "elapsed_time": "1:54:10", "remaining_time": "0:02:58", "throughput": 11614.02, "total_tokens": 79560704} | |
| {"current_steps": 308, "total_steps": 315, "loss": 0.0098, "lr": 6.089874350439506e-08, "epoch": 4.823762376237624, "percentage": 97.78, "elapsed_time": "1:54:32", "remaining_time": "0:02:36", "throughput": 11614.02, "total_tokens": 79822848} | |
| {"current_steps": 309, "total_steps": 315, "loss": 0.0101, "lr": 4.474675580662113e-08, "epoch": 4.8396039603960395, "percentage": 98.1, "elapsed_time": "1:54:55", "remaining_time": "0:02:13", "throughput": 11614.02, "total_tokens": 80084992} | |
| {"current_steps": 310, "total_steps": 315, "loss": 0.0102, "lr": 3.107696952694139e-08, "epoch": 4.855445544554455, "percentage": 98.41, "elapsed_time": "1:55:18", "remaining_time": "0:01:51", "throughput": 11614.04, "total_tokens": 80347136} | |
| {"current_steps": 311, "total_steps": 315, "loss": 0.0102, "lr": 1.9890744345518742e-08, "epoch": 4.871287128712871, "percentage": 98.73, "elapsed_time": "1:55:40", "remaining_time": "0:01:29", "throughput": 11614.08, "total_tokens": 80609280} | |
| {"current_steps": 312, "total_steps": 315, "loss": 0.01, "lr": 1.1189192912416934e-08, "epoch": 4.887128712871287, "percentage": 99.05, "elapsed_time": "1:56:03", "remaining_time": "0:01:06", "throughput": 11614.12, "total_tokens": 80871424} | |
| {"current_steps": 313, "total_steps": 315, "loss": 0.0102, "lr": 4.9731807369113316e-09, "epoch": 4.902970297029703, "percentage": 99.37, "elapsed_time": "1:56:25", "remaining_time": "0:00:44", "throughput": 11614.13, "total_tokens": 81133568} | |
| {"current_steps": 314, "total_steps": 315, "loss": 0.0101, "lr": 1.2433261014244136e-09, "epoch": 4.918811881188119, "percentage": 99.68, "elapsed_time": "1:56:48", "remaining_time": "0:00:22", "throughput": 11614.14, "total_tokens": 81395712} | |
| {"current_steps": 315, "total_steps": 315, "loss": 0.01, "lr": 0.0, "epoch": 4.934653465346535, "percentage": 100.0, "elapsed_time": "1:57:10", "remaining_time": "0:00:00", "throughput": 11614.12, "total_tokens": 81657856} | |