| {"current_steps": 5, "total_steps": 9960, "loss": 8.9772, "lr": 4.016064257028112e-08, "epoch": 0.010040160642570281, "percentage": 0.05, "elapsed_time": "0:00:01", "remaining_time": "0:35:12", "throughput": 2051.03, "total_tokens": 2176} |
| {"current_steps": 10, "total_steps": 9960, "loss": 9.1361, "lr": 9.036144578313253e-08, "epoch": 0.020080321285140562, "percentage": 0.1, "elapsed_time": "0:00:01", "remaining_time": "0:32:07", "throughput": 3172.26, "total_tokens": 6144} |
| {"current_steps": 15, "total_steps": 9960, "loss": 8.8855, "lr": 1.4056224899598394e-07, "epoch": 0.030120481927710843, "percentage": 0.15, "elapsed_time": "0:00:02", "remaining_time": "0:30:00", "throughput": 3463.43, "total_tokens": 9408} |
| {"current_steps": 20, "total_steps": 9960, "loss": 8.6766, "lr": 1.9076305220883537e-07, "epoch": 0.040160642570281124, "percentage": 0.2, "elapsed_time": "0:00:03", "remaining_time": "0:27:34", "throughput": 3556.15, "total_tokens": 11840} |
| {"current_steps": 25, "total_steps": 9960, "loss": 8.3983, "lr": 2.409638554216868e-07, "epoch": 0.050200803212851405, "percentage": 0.25, "elapsed_time": "0:00:04", "remaining_time": "0:26:44", "throughput": 3693.6, "total_tokens": 14912} |
| {"current_steps": 30, "total_steps": 9960, "loss": 8.1478, "lr": 2.911646586345382e-07, "epoch": 0.060240963855421686, "percentage": 0.3, "elapsed_time": "0:00:04", "remaining_time": "0:26:37", "throughput": 3792.49, "total_tokens": 18304} |
| {"current_steps": 35, "total_steps": 9960, "loss": 7.7226, "lr": 3.413654618473896e-07, "epoch": 0.07028112449799197, "percentage": 0.35, "elapsed_time": "0:00:05", "remaining_time": "0:26:04", "throughput": 3869.64, "total_tokens": 21344} |
| {"current_steps": 40, "total_steps": 9960, "loss": 7.4309, "lr": 3.91566265060241e-07, "epoch": 0.08032128514056225, "percentage": 0.4, "elapsed_time": "0:00:06", "remaining_time": "0:26:17", "throughput": 3943.39, "total_tokens": 25088} |
| {"current_steps": 45, "total_steps": 9960, "loss": 7.098, "lr": 4.417670682730924e-07, "epoch": 0.09036144578313253, "percentage": 0.45, "elapsed_time": "0:00:07", "remaining_time": "0:26:06", "throughput": 3970.92, "total_tokens": 28224} |
| {"current_steps": 50, "total_steps": 9960, "loss": 6.5589, "lr": 4.919678714859438e-07, "epoch": 0.10040160642570281, "percentage": 0.5, "elapsed_time": "0:00:07", "remaining_time": "0:26:16", "throughput": 4018.49, "total_tokens": 31968} |
| {"current_steps": 55, "total_steps": 9960, "loss": 6.3171, "lr": 5.421686746987952e-07, "epoch": 0.11044176706827309, "percentage": 0.55, "elapsed_time": "0:00:08", "remaining_time": "0:26:05", "throughput": 4038.52, "total_tokens": 35104} |
| {"current_steps": 60, "total_steps": 9960, "loss": 5.7753, "lr": 5.923694779116467e-07, "epoch": 0.12048192771084337, "percentage": 0.6, "elapsed_time": "0:00:09", "remaining_time": "0:25:57", "throughput": 4051.96, "total_tokens": 38240} |
| {"current_steps": 65, "total_steps": 9960, "loss": 5.6056, "lr": 6.425702811244979e-07, "epoch": 0.13052208835341367, "percentage": 0.65, "elapsed_time": "0:00:10", "remaining_time": "0:25:40", "throughput": 4051.63, "total_tokens": 40992} |
| {"current_steps": 70, "total_steps": 9960, "loss": 5.2274, "lr": 6.927710843373495e-07, "epoch": 0.14056224899598393, "percentage": 0.7, "elapsed_time": "0:00:10", "remaining_time": "0:25:42", "throughput": 4079.8, "total_tokens": 44544} |
| {"current_steps": 75, "total_steps": 9960, "loss": 4.6893, "lr": 7.429718875502008e-07, "epoch": 0.15060240963855423, "percentage": 0.75, "elapsed_time": "0:00:11", "remaining_time": "0:25:43", "throughput": 4098.14, "total_tokens": 48000} |
| {"current_steps": 80, "total_steps": 9960, "loss": 3.8478, "lr": 7.931726907630523e-07, "epoch": 0.1606425702811245, "percentage": 0.8, "elapsed_time": "0:00:12", "remaining_time": "0:25:52", "throughput": 4107.55, "total_tokens": 51648} |
| {"current_steps": 85, "total_steps": 9960, "loss": 3.5727, "lr": 8.433734939759036e-07, "epoch": 0.1706827309236948, "percentage": 0.85, "elapsed_time": "0:00:13", "remaining_time": "0:25:40", "throughput": 4120.7, "total_tokens": 54656} |
| {"current_steps": 90, "total_steps": 9960, "loss": 3.1493, "lr": 8.935742971887551e-07, "epoch": 0.18072289156626506, "percentage": 0.9, "elapsed_time": "0:00:13", "remaining_time": "0:25:20", "throughput": 4117.41, "total_tokens": 57088} |
| {"current_steps": 95, "total_steps": 9960, "loss": 2.8682, "lr": 9.437751004016064e-07, "epoch": 0.19076305220883535, "percentage": 0.95, "elapsed_time": "0:00:14", "remaining_time": "0:25:15", "throughput": 4129.75, "total_tokens": 60256} |
| {"current_steps": 100, "total_steps": 9960, "loss": 2.6081, "lr": 9.93975903614458e-07, "epoch": 0.20080321285140562, "percentage": 1.0, "elapsed_time": "0:00:15", "remaining_time": "0:25:02", "throughput": 4138.1, "total_tokens": 63072} |
| {"current_steps": 105, "total_steps": 9960, "loss": 1.6849, "lr": 1.0441767068273092e-06, "epoch": 0.21084337349397592, "percentage": 1.05, "elapsed_time": "0:00:15", "remaining_time": "0:24:43", "throughput": 4131.03, "total_tokens": 65312} |
| {"current_steps": 110, "total_steps": 9960, "loss": 1.5056, "lr": 1.0943775100401608e-06, "epoch": 0.22088353413654618, "percentage": 1.1, "elapsed_time": "0:00:16", "remaining_time": "0:24:40", "throughput": 4134.06, "total_tokens": 68352} |
| {"current_steps": 115, "total_steps": 9960, "loss": 1.2485, "lr": 1.1445783132530121e-06, "epoch": 0.23092369477911648, "percentage": 1.15, "elapsed_time": "0:00:17", "remaining_time": "0:24:31", "throughput": 4137.93, "total_tokens": 71104} |
| {"current_steps": 120, "total_steps": 9960, "loss": 1.0266, "lr": 1.1947791164658635e-06, "epoch": 0.24096385542168675, "percentage": 1.2, "elapsed_time": "0:00:18", "remaining_time": "0:24:40", "throughput": 4156.94, "total_tokens": 75040} |
| {"current_steps": 125, "total_steps": 9960, "loss": 0.8694, "lr": 1.2449799196787148e-06, "epoch": 0.25100401606425704, "percentage": 1.26, "elapsed_time": "0:00:18", "remaining_time": "0:24:34", "throughput": 4157.9, "total_tokens": 77920} |
| {"current_steps": 130, "total_steps": 9960, "loss": 0.5457, "lr": 1.2951807228915664e-06, "epoch": 0.26104417670682734, "percentage": 1.31, "elapsed_time": "0:00:19", "remaining_time": "0:24:20", "throughput": 4143.2, "total_tokens": 80000} |
| {"current_steps": 135, "total_steps": 9960, "loss": 0.6048, "lr": 1.345381526104418e-06, "epoch": 0.2710843373493976, "percentage": 1.36, "elapsed_time": "0:00:20", "remaining_time": "0:24:16", "throughput": 4144.64, "total_tokens": 82944} |
| {"current_steps": 140, "total_steps": 9960, "loss": 0.3919, "lr": 1.395582329317269e-06, "epoch": 0.28112449799196787, "percentage": 1.41, "elapsed_time": "0:00:20", "remaining_time": "0:24:21", "throughput": 4144.65, "total_tokens": 86368} |
| {"current_steps": 145, "total_steps": 9960, "loss": 0.312, "lr": 1.4457831325301204e-06, "epoch": 0.29116465863453816, "percentage": 1.46, "elapsed_time": "0:00:21", "remaining_time": "0:24:17", "throughput": 4148.07, "total_tokens": 89344} |
| {"current_steps": 150, "total_steps": 9960, "loss": 0.2272, "lr": 1.495983935742972e-06, "epoch": 0.30120481927710846, "percentage": 1.51, "elapsed_time": "0:00:22", "remaining_time": "0:24:12", "throughput": 4148.96, "total_tokens": 92128} |
| {"current_steps": 155, "total_steps": 9960, "loss": 0.2378, "lr": 1.5461847389558236e-06, "epoch": 0.3112449799196787, "percentage": 1.56, "elapsed_time": "0:00:22", "remaining_time": "0:24:12", "throughput": 4150.85, "total_tokens": 95296} |
| {"current_steps": 160, "total_steps": 9960, "loss": 0.1967, "lr": 1.5963855421686747e-06, "epoch": 0.321285140562249, "percentage": 1.61, "elapsed_time": "0:00:23", "remaining_time": "0:24:10", "throughput": 4150.06, "total_tokens": 98272} |
| {"current_steps": 165, "total_steps": 9960, "loss": 0.2241, "lr": 1.6465863453815263e-06, "epoch": 0.3313253012048193, "percentage": 1.66, "elapsed_time": "0:00:24", "remaining_time": "0:24:08", "throughput": 4154.51, "total_tokens": 101344} |
| {"current_steps": 170, "total_steps": 9960, "loss": 0.1959, "lr": 1.6967871485943776e-06, "epoch": 0.3413654618473896, "percentage": 1.71, "elapsed_time": "0:00:25", "remaining_time": "0:24:00", "throughput": 4151.04, "total_tokens": 103808} |
| {"current_steps": 175, "total_steps": 9960, "loss": 0.1855, "lr": 1.7469879518072292e-06, "epoch": 0.3514056224899598, "percentage": 1.76, "elapsed_time": "0:00:25", "remaining_time": "0:23:58", "throughput": 4153.56, "total_tokens": 106880} |
| {"current_steps": 180, "total_steps": 9960, "loss": 0.1717, "lr": 1.7971887550200803e-06, "epoch": 0.3614457831325301, "percentage": 1.81, "elapsed_time": "0:00:26", "remaining_time": "0:23:57", "throughput": 4158.31, "total_tokens": 110016} |
| {"current_steps": 185, "total_steps": 9960, "loss": 0.1764, "lr": 1.8473895582329318e-06, "epoch": 0.3714859437751004, "percentage": 1.86, "elapsed_time": "0:00:27", "remaining_time": "0:23:54", "throughput": 4158.74, "total_tokens": 112896} |
| {"current_steps": 190, "total_steps": 9960, "loss": 0.2006, "lr": 1.8975903614457832e-06, "epoch": 0.3815261044176707, "percentage": 1.91, "elapsed_time": "0:00:27", "remaining_time": "0:23:53", "throughput": 4155.36, "total_tokens": 115808} |
| {"current_steps": 195, "total_steps": 9960, "loss": 0.2027, "lr": 1.947791164658635e-06, "epoch": 0.39156626506024095, "percentage": 1.96, "elapsed_time": "0:00:28", "remaining_time": "0:23:49", "throughput": 4157.23, "total_tokens": 118656} |
| {"current_steps": 200, "total_steps": 9960, "loss": 0.2035, "lr": 1.997991967871486e-06, "epoch": 0.40160642570281124, "percentage": 2.01, "elapsed_time": "0:00:29", "remaining_time": "0:23:46", "throughput": 4164.09, "total_tokens": 121728} |
| {"current_steps": 205, "total_steps": 9960, "loss": 0.335, "lr": 2.0481927710843377e-06, "epoch": 0.41164658634538154, "percentage": 2.06, "elapsed_time": "0:00:29", "remaining_time": "0:23:39", "throughput": 4160.11, "total_tokens": 124128} |
| {"current_steps": 210, "total_steps": 9960, "loss": 0.1571, "lr": 2.098393574297189e-06, "epoch": 0.42168674698795183, "percentage": 2.11, "elapsed_time": "0:00:30", "remaining_time": "0:23:38", "throughput": 4159.68, "total_tokens": 127072} |
| {"current_steps": 215, "total_steps": 9960, "loss": 0.1704, "lr": 2.1485943775100404e-06, "epoch": 0.43172690763052207, "percentage": 2.16, "elapsed_time": "0:00:31", "remaining_time": "0:23:34", "throughput": 4161.75, "total_tokens": 129888} |
| {"current_steps": 220, "total_steps": 9960, "loss": 0.1659, "lr": 2.1987951807228917e-06, "epoch": 0.44176706827309237, "percentage": 2.21, "elapsed_time": "0:00:31", "remaining_time": "0:23:32", "throughput": 4162.09, "total_tokens": 132800} |
| {"current_steps": 225, "total_steps": 9960, "loss": 0.1919, "lr": 2.248995983935743e-06, "epoch": 0.45180722891566266, "percentage": 2.26, "elapsed_time": "0:00:32", "remaining_time": "0:23:36", "throughput": 4168.68, "total_tokens": 136480} |
| {"current_steps": 230, "total_steps": 9960, "loss": 0.1908, "lr": 2.2991967871485944e-06, "epoch": 0.46184738955823296, "percentage": 2.31, "elapsed_time": "0:00:33", "remaining_time": "0:23:41", "throughput": 4176.07, "total_tokens": 140352} |
| {"current_steps": 235, "total_steps": 9960, "loss": 0.1697, "lr": 2.349397590361446e-06, "epoch": 0.4718875502008032, "percentage": 2.36, "elapsed_time": "0:00:34", "remaining_time": "0:23:43", "throughput": 4176.53, "total_tokens": 143616} |
| {"current_steps": 240, "total_steps": 9960, "loss": 0.1851, "lr": 2.399598393574297e-06, "epoch": 0.4819277108433735, "percentage": 2.41, "elapsed_time": "0:00:35", "remaining_time": "0:23:42", "throughput": 4175.18, "total_tokens": 146656} |
| {"current_steps": 245, "total_steps": 9960, "loss": 0.143, "lr": 2.449799196787149e-06, "epoch": 0.4919678714859438, "percentage": 2.46, "elapsed_time": "0:00:35", "remaining_time": "0:23:41", "throughput": 4174.09, "total_tokens": 149664} |
| {"current_steps": 250, "total_steps": 9960, "loss": 0.2288, "lr": 2.5e-06, "epoch": 0.5020080321285141, "percentage": 2.51, "elapsed_time": "0:00:36", "remaining_time": "0:23:46", "throughput": 4176.47, "total_tokens": 153344} |
| {"current_steps": 255, "total_steps": 9960, "loss": 0.2078, "lr": 2.5502008032128516e-06, "epoch": 0.5120481927710844, "percentage": 2.56, "elapsed_time": "0:00:37", "remaining_time": "0:23:42", "throughput": 4178.53, "total_tokens": 156224} |
| {"current_steps": 260, "total_steps": 9960, "loss": 0.1754, "lr": 2.6004016064257033e-06, "epoch": 0.5220883534136547, "percentage": 2.61, "elapsed_time": "0:00:38", "remaining_time": "0:23:45", "throughput": 4179.52, "total_tokens": 159648} |
| {"current_steps": 265, "total_steps": 9960, "loss": 0.1866, "lr": 2.6506024096385547e-06, "epoch": 0.5321285140562249, "percentage": 2.66, "elapsed_time": "0:00:38", "remaining_time": "0:23:40", "throughput": 4179.64, "total_tokens": 162304} |
| {"current_steps": 270, "total_steps": 9960, "loss": 0.1895, "lr": 2.700803212851406e-06, "epoch": 0.5421686746987951, "percentage": 2.71, "elapsed_time": "0:00:39", "remaining_time": "0:23:41", "throughput": 4182.06, "total_tokens": 165632} |
| {"current_steps": 275, "total_steps": 9960, "loss": 0.1845, "lr": 2.751004016064257e-06, "epoch": 0.5522088353413654, "percentage": 2.76, "elapsed_time": "0:00:40", "remaining_time": "0:23:41", "throughput": 4186.61, "total_tokens": 169024} |
| {"current_steps": 280, "total_steps": 9960, "loss": 0.1919, "lr": 2.8012048192771087e-06, "epoch": 0.5622489959839357, "percentage": 2.81, "elapsed_time": "0:00:41", "remaining_time": "0:23:47", "throughput": 4191.87, "total_tokens": 173056} |
| {"current_steps": 285, "total_steps": 9960, "loss": 0.186, "lr": 2.85140562248996e-06, "epoch": 0.572289156626506, "percentage": 2.86, "elapsed_time": "0:00:41", "remaining_time": "0:23:43", "throughput": 4194.46, "total_tokens": 175904} |
| {"current_steps": 290, "total_steps": 9960, "loss": 0.1724, "lr": 2.9016064257028114e-06, "epoch": 0.5823293172690763, "percentage": 2.91, "elapsed_time": "0:00:42", "remaining_time": "0:23:42", "throughput": 4191.44, "total_tokens": 178816} |
| {"current_steps": 295, "total_steps": 9960, "loss": 0.1727, "lr": 2.9518072289156627e-06, "epoch": 0.5923694779116466, "percentage": 2.96, "elapsed_time": "0:00:43", "remaining_time": "0:23:44", "throughput": 4197.8, "total_tokens": 182464} |
| {"current_steps": 300, "total_steps": 9960, "loss": 0.1692, "lr": 3.0020080321285145e-06, "epoch": 0.6024096385542169, "percentage": 3.01, "elapsed_time": "0:00:44", "remaining_time": "0:23:48", "throughput": 4201.05, "total_tokens": 186368} |
| {"current_steps": 305, "total_steps": 9960, "loss": 0.2044, "lr": 3.052208835341366e-06, "epoch": 0.6124497991967871, "percentage": 3.06, "elapsed_time": "0:00:45", "remaining_time": "0:23:44", "throughput": 4202.7, "total_tokens": 189184} |
| {"current_steps": 310, "total_steps": 9960, "loss": 0.1926, "lr": 3.1024096385542172e-06, "epoch": 0.6224899598393574, "percentage": 3.11, "elapsed_time": "0:00:45", "remaining_time": "0:23:46", "throughput": 4203.91, "total_tokens": 192640} |
| {"current_steps": 315, "total_steps": 9960, "loss": 0.1875, "lr": 3.152610441767068e-06, "epoch": 0.6325301204819277, "percentage": 3.16, "elapsed_time": "0:00:46", "remaining_time": "0:23:47", "throughput": 4204.43, "total_tokens": 195968} |
| {"current_steps": 320, "total_steps": 9960, "loss": 0.1517, "lr": 3.20281124497992e-06, "epoch": 0.642570281124498, "percentage": 3.21, "elapsed_time": "0:00:47", "remaining_time": "0:23:45", "throughput": 4207.9, "total_tokens": 199168} |
| {"current_steps": 325, "total_steps": 9960, "loss": 0.1917, "lr": 3.2530120481927713e-06, "epoch": 0.6526104417670683, "percentage": 3.26, "elapsed_time": "0:00:47", "remaining_time": "0:23:41", "throughput": 4206.03, "total_tokens": 201728} |
| {"current_steps": 330, "total_steps": 9960, "loss": 0.1671, "lr": 3.3032128514056226e-06, "epoch": 0.6626506024096386, "percentage": 3.31, "elapsed_time": "0:00:48", "remaining_time": "0:23:40", "throughput": 4205.94, "total_tokens": 204704} |
| {"current_steps": 335, "total_steps": 9960, "loss": 0.1743, "lr": 3.3534136546184744e-06, "epoch": 0.6726907630522089, "percentage": 3.36, "elapsed_time": "0:00:49", "remaining_time": "0:23:38", "throughput": 4206.01, "total_tokens": 207616} |
| {"current_steps": 340, "total_steps": 9960, "loss": 0.1898, "lr": 3.4036144578313257e-06, "epoch": 0.6827309236947792, "percentage": 3.41, "elapsed_time": "0:00:50", "remaining_time": "0:23:36", "throughput": 4206.08, "total_tokens": 210528} |
| {"current_steps": 345, "total_steps": 9960, "loss": 0.2041, "lr": 3.453815261044177e-06, "epoch": 0.6927710843373494, "percentage": 3.46, "elapsed_time": "0:00:50", "remaining_time": "0:23:40", "throughput": 4212.65, "total_tokens": 214752} |
| {"current_steps": 350, "total_steps": 9960, "loss": 0.2241, "lr": 3.504016064257029e-06, "epoch": 0.7028112449799196, "percentage": 3.51, "elapsed_time": "0:00:51", "remaining_time": "0:23:38", "throughput": 4208.21, "total_tokens": 217408} |
| {"current_steps": 355, "total_steps": 9960, "loss": 0.2229, "lr": 3.5542168674698798e-06, "epoch": 0.7128514056224899, "percentage": 3.56, "elapsed_time": "0:00:52", "remaining_time": "0:23:37", "throughput": 4209.69, "total_tokens": 220480} |
| {"current_steps": 360, "total_steps": 9960, "loss": 0.1506, "lr": 3.604417670682731e-06, "epoch": 0.7228915662650602, "percentage": 3.61, "elapsed_time": "0:00:53", "remaining_time": "0:23:35", "throughput": 4210.08, "total_tokens": 223520} |
| {"current_steps": 365, "total_steps": 9960, "loss": 0.153, "lr": 3.6546184738955825e-06, "epoch": 0.7329317269076305, "percentage": 3.66, "elapsed_time": "0:00:53", "remaining_time": "0:23:34", "throughput": 4213.54, "total_tokens": 226752} |
| {"current_steps": 370, "total_steps": 9960, "loss": 0.1937, "lr": 3.7048192771084342e-06, "epoch": 0.7429718875502008, "percentage": 3.71, "elapsed_time": "0:00:54", "remaining_time": "0:23:34", "throughput": 4214.95, "total_tokens": 230080} |
| {"current_steps": 375, "total_steps": 9960, "loss": 0.1582, "lr": 3.7550200803212856e-06, "epoch": 0.7530120481927711, "percentage": 3.77, "elapsed_time": "0:00:55", "remaining_time": "0:23:34", "throughput": 4215.16, "total_tokens": 233280} |
| {"current_steps": 380, "total_steps": 9960, "loss": 0.1743, "lr": 3.805220883534137e-06, "epoch": 0.7630522088353414, "percentage": 3.82, "elapsed_time": "0:00:56", "remaining_time": "0:23:33", "throughput": 4215.15, "total_tokens": 236416} |
| {"current_steps": 385, "total_steps": 9960, "loss": 0.1722, "lr": 3.855421686746989e-06, "epoch": 0.7730923694779116, "percentage": 3.87, "elapsed_time": "0:00:56", "remaining_time": "0:23:36", "throughput": 4218.84, "total_tokens": 240352} |
| {"current_steps": 390, "total_steps": 9960, "loss": 0.1773, "lr": 3.90562248995984e-06, "epoch": 0.7831325301204819, "percentage": 3.92, "elapsed_time": "0:00:57", "remaining_time": "0:23:35", "throughput": 4218.55, "total_tokens": 243296} |
| {"current_steps": 395, "total_steps": 9960, "loss": 0.1752, "lr": 3.9558232931726905e-06, "epoch": 0.7931726907630522, "percentage": 3.97, "elapsed_time": "0:00:58", "remaining_time": "0:23:35", "throughput": 4219.46, "total_tokens": 246656} |
| {"current_steps": 400, "total_steps": 9960, "loss": 0.166, "lr": 4.006024096385543e-06, "epoch": 0.8032128514056225, "percentage": 4.02, "elapsed_time": "0:00:59", "remaining_time": "0:23:35", "throughput": 4222.3, "total_tokens": 250112} |
| {"current_steps": 405, "total_steps": 9960, "loss": 0.1758, "lr": 4.056224899598394e-06, "epoch": 0.8132530120481928, "percentage": 4.07, "elapsed_time": "0:01:00", "remaining_time": "0:23:36", "throughput": 4223.7, "total_tokens": 253632} |
| {"current_steps": 410, "total_steps": 9960, "loss": 0.1631, "lr": 4.106425702811245e-06, "epoch": 0.8232931726907631, "percentage": 4.12, "elapsed_time": "0:01:00", "remaining_time": "0:23:35", "throughput": 4225.56, "total_tokens": 256864} |
| {"current_steps": 415, "total_steps": 9960, "loss": 0.1934, "lr": 4.156626506024097e-06, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:01:01", "remaining_time": "0:23:33", "throughput": 4223.47, "total_tokens": 259584} |
| {"current_steps": 420, "total_steps": 9960, "loss": 0.1684, "lr": 4.206827309236948e-06, "epoch": 0.8433734939759037, "percentage": 4.22, "elapsed_time": "0:01:02", "remaining_time": "0:23:32", "throughput": 4221.95, "total_tokens": 262560} |
| {"current_steps": 425, "total_steps": 9960, "loss": 0.26, "lr": 4.2570281124497995e-06, "epoch": 0.8534136546184738, "percentage": 4.27, "elapsed_time": "0:01:02", "remaining_time": "0:23:32", "throughput": 4222.64, "total_tokens": 265760} |
| {"current_steps": 430, "total_steps": 9960, "loss": 0.1703, "lr": 4.307228915662651e-06, "epoch": 0.8634538152610441, "percentage": 4.32, "elapsed_time": "0:01:03", "remaining_time": "0:23:27", "throughput": 4218.33, "total_tokens": 267904} |
| {"current_steps": 435, "total_steps": 9960, "loss": 0.1868, "lr": 4.357429718875502e-06, "epoch": 0.8734939759036144, "percentage": 4.37, "elapsed_time": "0:01:04", "remaining_time": "0:23:32", "throughput": 4220.89, "total_tokens": 272288} |
| {"current_steps": 440, "total_steps": 9960, "loss": 0.1644, "lr": 4.4076305220883535e-06, "epoch": 0.8835341365461847, "percentage": 4.42, "elapsed_time": "0:01:05", "remaining_time": "0:23:30", "throughput": 4222.77, "total_tokens": 275232} |
| {"current_steps": 445, "total_steps": 9960, "loss": 0.177, "lr": 4.457831325301205e-06, "epoch": 0.893574297188755, "percentage": 4.47, "elapsed_time": "0:01:05", "remaining_time": "0:23:29", "throughput": 4226.14, "total_tokens": 278624} |
| {"current_steps": 450, "total_steps": 9960, "loss": 0.1735, "lr": 4.508032128514056e-06, "epoch": 0.9036144578313253, "percentage": 4.52, "elapsed_time": "0:01:06", "remaining_time": "0:23:28", "throughput": 4227.49, "total_tokens": 281696} |
| {"current_steps": 455, "total_steps": 9960, "loss": 0.1862, "lr": 4.558232931726908e-06, "epoch": 0.9136546184738956, "percentage": 4.57, "elapsed_time": "0:01:07", "remaining_time": "0:23:27", "throughput": 4228.33, "total_tokens": 284832} |
| {"current_steps": 460, "total_steps": 9960, "loss": 0.1664, "lr": 4.60843373493976e-06, "epoch": 0.9236947791164659, "percentage": 4.62, "elapsed_time": "0:01:07", "remaining_time": "0:23:24", "throughput": 4227.58, "total_tokens": 287424} |
| {"current_steps": 465, "total_steps": 9960, "loss": 0.1646, "lr": 4.658634538152611e-06, "epoch": 0.9337349397590361, "percentage": 4.67, "elapsed_time": "0:01:08", "remaining_time": "0:23:20", "throughput": 4225.15, "total_tokens": 289792} |
| {"current_steps": 470, "total_steps": 9960, "loss": 0.1703, "lr": 4.7088353413654624e-06, "epoch": 0.9437751004016064, "percentage": 4.72, "elapsed_time": "0:01:09", "remaining_time": "0:23:20", "throughput": 4225.39, "total_tokens": 293152} |
| {"current_steps": 475, "total_steps": 9960, "loss": 0.1596, "lr": 4.759036144578314e-06, "epoch": 0.9538152610441767, "percentage": 4.77, "elapsed_time": "0:01:10", "remaining_time": "0:23:20", "throughput": 4225.36, "total_tokens": 296320} |
| {"current_steps": 480, "total_steps": 9960, "loss": 0.1694, "lr": 4.809236947791165e-06, "epoch": 0.963855421686747, "percentage": 4.82, "elapsed_time": "0:01:10", "remaining_time": "0:23:19", "throughput": 4227.38, "total_tokens": 299616} |
| {"current_steps": 485, "total_steps": 9960, "loss": 0.1726, "lr": 4.8594377510040165e-06, "epoch": 0.9738955823293173, "percentage": 4.87, "elapsed_time": "0:01:11", "remaining_time": "0:23:17", "throughput": 4228.59, "total_tokens": 302528} |
| {"current_steps": 490, "total_steps": 9960, "loss": 0.1914, "lr": 4.909638554216868e-06, "epoch": 0.9839357429718876, "percentage": 4.92, "elapsed_time": "0:01:12", "remaining_time": "0:23:15", "throughput": 4226.91, "total_tokens": 305120} |
| {"current_steps": 495, "total_steps": 9960, "loss": 0.1552, "lr": 4.959839357429719e-06, "epoch": 0.9939759036144579, "percentage": 4.97, "elapsed_time": "0:01:13", "remaining_time": "0:23:17", "throughput": 4228.18, "total_tokens": 308928} |
| {"current_steps": 500, "total_steps": 9960, "loss": 0.1611, "lr": 5.0100401606425705e-06, "epoch": 1.0040160642570282, "percentage": 5.02, "elapsed_time": "0:01:13", "remaining_time": "0:23:20", "throughput": 4223.28, "total_tokens": 312512} |
| {"current_steps": 505, "total_steps": 9960, "loss": 0.182, "lr": 5.060240963855422e-06, "epoch": 1.0140562248995983, "percentage": 5.07, "elapsed_time": "0:01:14", "remaining_time": "0:23:17", "throughput": 4222.51, "total_tokens": 315232} |
| {"current_steps": 510, "total_steps": 9960, "loss": 0.217, "lr": 5.110441767068274e-06, "epoch": 1.0240963855421688, "percentage": 5.12, "elapsed_time": "0:01:15", "remaining_time": "0:23:15", "throughput": 4222.24, "total_tokens": 317952} |
| {"current_steps": 515, "total_steps": 9960, "loss": 0.1535, "lr": 5.1606425702811245e-06, "epoch": 1.034136546184739, "percentage": 5.17, "elapsed_time": "0:01:16", "remaining_time": "0:23:16", "throughput": 4224.31, "total_tokens": 321696} |
| {"current_steps": 520, "total_steps": 9960, "loss": 0.1515, "lr": 5.210843373493977e-06, "epoch": 1.0441767068273093, "percentage": 5.22, "elapsed_time": "0:01:16", "remaining_time": "0:23:16", "throughput": 4225.93, "total_tokens": 325184} |
| {"current_steps": 525, "total_steps": 9960, "loss": 0.1761, "lr": 5.261044176706827e-06, "epoch": 1.0542168674698795, "percentage": 5.27, "elapsed_time": "0:01:17", "remaining_time": "0:23:17", "throughput": 4227.78, "total_tokens": 328832} |
| {"current_steps": 530, "total_steps": 9960, "loss": 0.1718, "lr": 5.3112449799196794e-06, "epoch": 1.0642570281124497, "percentage": 5.32, "elapsed_time": "0:01:18", "remaining_time": "0:23:18", "throughput": 4226.99, "total_tokens": 332320} |
| {"current_steps": 535, "total_steps": 9960, "loss": 0.1717, "lr": 5.361445783132531e-06, "epoch": 1.0742971887550201, "percentage": 5.37, "elapsed_time": "0:01:19", "remaining_time": "0:23:18", "throughput": 4226.39, "total_tokens": 335584} |
| {"current_steps": 540, "total_steps": 9960, "loss": 0.1412, "lr": 5.411646586345381e-06, "epoch": 1.0843373493975903, "percentage": 5.42, "elapsed_time": "0:01:20", "remaining_time": "0:23:17", "throughput": 4224.78, "total_tokens": 338464} |
| {"current_steps": 545, "total_steps": 9960, "loss": 0.192, "lr": 5.4618473895582335e-06, "epoch": 1.0943775100401607, "percentage": 5.47, "elapsed_time": "0:01:20", "remaining_time": "0:23:15", "throughput": 4224.43, "total_tokens": 341248} |
| {"current_steps": 550, "total_steps": 9960, "loss": 0.1577, "lr": 5.512048192771085e-06, "epoch": 1.104417670682731, "percentage": 5.52, "elapsed_time": "0:01:21", "remaining_time": "0:23:14", "throughput": 4223.07, "total_tokens": 344288} |
| {"current_steps": 555, "total_steps": 9960, "loss": 0.1656, "lr": 5.562248995983936e-06, "epoch": 1.1144578313253013, "percentage": 5.57, "elapsed_time": "0:01:22", "remaining_time": "0:23:14", "throughput": 4223.18, "total_tokens": 347456} |
| {"current_steps": 560, "total_steps": 9960, "loss": 0.144, "lr": 5.6124497991967875e-06, "epoch": 1.1244979919678715, "percentage": 5.62, "elapsed_time": "0:01:22", "remaining_time": "0:23:12", "throughput": 4223.52, "total_tokens": 350400} |
| {"current_steps": 565, "total_steps": 9960, "loss": 0.1922, "lr": 5.66265060240964e-06, "epoch": 1.1345381526104417, "percentage": 5.67, "elapsed_time": "0:01:23", "remaining_time": "0:23:09", "throughput": 4221.83, "total_tokens": 352896} |
| {"current_steps": 570, "total_steps": 9960, "loss": 0.1733, "lr": 5.71285140562249e-06, "epoch": 1.144578313253012, "percentage": 5.72, "elapsed_time": "0:01:24", "remaining_time": "0:23:06", "throughput": 4221.18, "total_tokens": 355328} |
| {"current_steps": 575, "total_steps": 9960, "loss": 0.1604, "lr": 5.7630522088353416e-06, "epoch": 1.1546184738955823, "percentage": 5.77, "elapsed_time": "0:01:24", "remaining_time": "0:23:05", "throughput": 4221.63, "total_tokens": 358240} |
| {"current_steps": 580, "total_steps": 9960, "loss": 0.2166, "lr": 5.813253012048194e-06, "epoch": 1.1646586345381527, "percentage": 5.82, "elapsed_time": "0:01:25", "remaining_time": "0:23:05", "throughput": 4222.97, "total_tokens": 361792} |
| {"current_steps": 585, "total_steps": 9960, "loss": 0.1522, "lr": 5.863453815261044e-06, "epoch": 1.1746987951807228, "percentage": 5.87, "elapsed_time": "0:01:26", "remaining_time": "0:23:04", "throughput": 4223.84, "total_tokens": 364960} |
| {"current_steps": 590, "total_steps": 9960, "loss": 0.1469, "lr": 5.9136546184738964e-06, "epoch": 1.1847389558232932, "percentage": 5.92, "elapsed_time": "0:01:27", "remaining_time": "0:23:05", "throughput": 4224.4, "total_tokens": 368416} |
| {"current_steps": 595, "total_steps": 9960, "loss": 0.1788, "lr": 5.963855421686747e-06, "epoch": 1.1947791164658634, "percentage": 5.97, "elapsed_time": "0:01:28", "remaining_time": "0:23:05", "throughput": 4224.13, "total_tokens": 371872} |
| {"current_steps": 600, "total_steps": 9960, "loss": 0.1649, "lr": 6.014056224899599e-06, "epoch": 1.2048192771084336, "percentage": 6.02, "elapsed_time": "0:01:28", "remaining_time": "0:23:03", "throughput": 4223.03, "total_tokens": 374432} |
| {"current_steps": 605, "total_steps": 9960, "loss": 0.1436, "lr": 6.0642570281124505e-06, "epoch": 1.214859437751004, "percentage": 6.07, "elapsed_time": "0:01:29", "remaining_time": "0:23:03", "throughput": 4224.7, "total_tokens": 378080} |
| {"current_steps": 610, "total_steps": 9960, "loss": 0.2001, "lr": 6.114457831325302e-06, "epoch": 1.2248995983935742, "percentage": 6.12, "elapsed_time": "0:01:30", "remaining_time": "0:23:02", "throughput": 4224.04, "total_tokens": 380928} |
| {"current_steps": 615, "total_steps": 9960, "loss": 0.1959, "lr": 6.164658634538153e-06, "epoch": 1.2349397590361446, "percentage": 6.17, "elapsed_time": "0:01:30", "remaining_time": "0:23:01", "throughput": 4221.82, "total_tokens": 383872} |
| {"current_steps": 620, "total_steps": 9960, "loss": 0.1745, "lr": 6.214859437751004e-06, "epoch": 1.2449799196787148, "percentage": 6.22, "elapsed_time": "0:01:31", "remaining_time": "0:22:59", "throughput": 4222.36, "total_tokens": 386752} |
| {"current_steps": 625, "total_steps": 9960, "loss": 0.1546, "lr": 6.265060240963856e-06, "epoch": 1.2550200803212852, "percentage": 6.28, "elapsed_time": "0:01:32", "remaining_time": "0:22:58", "throughput": 4221.79, "total_tokens": 389600} |
| {"current_steps": 630, "total_steps": 9960, "loss": 0.1835, "lr": 6.315261044176707e-06, "epoch": 1.2650602409638554, "percentage": 6.33, "elapsed_time": "0:01:33", "remaining_time": "0:22:57", "throughput": 4221.27, "total_tokens": 392672} |
| {"current_steps": 635, "total_steps": 9960, "loss": 0.2297, "lr": 6.365461847389559e-06, "epoch": 1.2751004016064256, "percentage": 6.38, "elapsed_time": "0:01:33", "remaining_time": "0:22:56", "throughput": 4220.32, "total_tokens": 395520} |
| {"current_steps": 640, "total_steps": 9960, "loss": 0.2924, "lr": 6.41566265060241e-06, "epoch": 1.285140562248996, "percentage": 6.43, "elapsed_time": "0:01:34", "remaining_time": "0:22:55", "throughput": 4219.73, "total_tokens": 398496} |
| {"current_steps": 645, "total_steps": 9960, "loss": 0.1721, "lr": 6.465863453815262e-06, "epoch": 1.2951807228915664, "percentage": 6.48, "elapsed_time": "0:01:35", "remaining_time": "0:22:53", "throughput": 4218.48, "total_tokens": 401312} |
| {"current_steps": 650, "total_steps": 9960, "loss": 0.187, "lr": 6.516064257028113e-06, "epoch": 1.3052208835341366, "percentage": 6.53, "elapsed_time": "0:01:35", "remaining_time": "0:22:54", "throughput": 4220.19, "total_tokens": 404960} |
| {"current_steps": 655, "total_steps": 9960, "loss": 0.1458, "lr": 6.566265060240964e-06, "epoch": 1.3152610441767068, "percentage": 6.58, "elapsed_time": "0:01:36", "remaining_time": "0:22:53", "throughput": 4220.04, "total_tokens": 408032} |
| {"current_steps": 660, "total_steps": 9960, "loss": 0.165, "lr": 6.616465863453816e-06, "epoch": 1.3253012048192772, "percentage": 6.63, "elapsed_time": "0:01:37", "remaining_time": "0:22:54", "throughput": 4222.16, "total_tokens": 411968} |
| {"current_steps": 665, "total_steps": 9960, "loss": 0.1554, "lr": 6.666666666666667e-06, "epoch": 1.3353413654618473, "percentage": 6.68, "elapsed_time": "0:01:38", "remaining_time": "0:22:54", "throughput": 4222.59, "total_tokens": 415168} |
| {"current_steps": 670, "total_steps": 9960, "loss": 0.1623, "lr": 6.716867469879519e-06, "epoch": 1.3453815261044177, "percentage": 6.73, "elapsed_time": "0:01:39", "remaining_time": "0:22:53", "throughput": 4221.36, "total_tokens": 418112} |
| {"current_steps": 675, "total_steps": 9960, "loss": 0.1602, "lr": 6.76706827309237e-06, "epoch": 1.355421686746988, "percentage": 6.78, "elapsed_time": "0:01:39", "remaining_time": "0:22:51", "throughput": 4219.48, "total_tokens": 420608} |
| {"current_steps": 680, "total_steps": 9960, "loss": 0.1651, "lr": 6.8172690763052215e-06, "epoch": 1.3654618473895583, "percentage": 6.83, "elapsed_time": "0:01:40", "remaining_time": "0:22:50", "throughput": 4220.01, "total_tokens": 423680} |
| {"current_steps": 685, "total_steps": 9960, "loss": 0.15, "lr": 6.867469879518073e-06, "epoch": 1.3755020080321285, "percentage": 6.88, "elapsed_time": "0:01:41", "remaining_time": "0:22:47", "throughput": 4220.15, "total_tokens": 426368} |
| {"current_steps": 690, "total_steps": 9960, "loss": 0.1684, "lr": 6.917670682730925e-06, "epoch": 1.3855421686746987, "percentage": 6.93, "elapsed_time": "0:01:41", "remaining_time": "0:22:47", "throughput": 4219.85, "total_tokens": 429568} |
| {"current_steps": 695, "total_steps": 9960, "loss": 0.1996, "lr": 6.9678714859437756e-06, "epoch": 1.395582329317269, "percentage": 6.98, "elapsed_time": "0:01:42", "remaining_time": "0:22:46", "throughput": 4218.99, "total_tokens": 432320} |
| {"current_steps": 700, "total_steps": 9960, "loss": 0.2936, "lr": 7.018072289156627e-06, "epoch": 1.4056224899598393, "percentage": 7.03, "elapsed_time": "0:01:43", "remaining_time": "0:22:44", "throughput": 4219.42, "total_tokens": 435360} |
| {"current_steps": 705, "total_steps": 9960, "loss": 0.1966, "lr": 7.068273092369478e-06, "epoch": 1.4156626506024097, "percentage": 7.08, "elapsed_time": "0:01:43", "remaining_time": "0:22:43", "throughput": 4217.95, "total_tokens": 438112} |
| {"current_steps": 710, "total_steps": 9960, "loss": 0.2228, "lr": 7.11847389558233e-06, "epoch": 1.4257028112449799, "percentage": 7.13, "elapsed_time": "0:01:44", "remaining_time": "0:22:41", "throughput": 4217.27, "total_tokens": 440672} |
| {"current_steps": 715, "total_steps": 9960, "loss": 0.1611, "lr": 7.168674698795182e-06, "epoch": 1.4357429718875503, "percentage": 7.18, "elapsed_time": "0:01:45", "remaining_time": "0:22:39", "throughput": 4217.94, "total_tokens": 443424} |
| {"current_steps": 720, "total_steps": 9960, "loss": 0.1893, "lr": 7.218875502008032e-06, "epoch": 1.4457831325301205, "percentage": 7.23, "elapsed_time": "0:01:45", "remaining_time": "0:22:38", "throughput": 4218.13, "total_tokens": 446528} |
| {"current_steps": 725, "total_steps": 9960, "loss": 0.164, "lr": 7.2690763052208845e-06, "epoch": 1.4558232931726907, "percentage": 7.28, "elapsed_time": "0:01:46", "remaining_time": "0:22:38", "throughput": 4220.48, "total_tokens": 450016} |
| {"current_steps": 730, "total_steps": 9960, "loss": 0.1482, "lr": 7.319277108433736e-06, "epoch": 1.465863453815261, "percentage": 7.33, "elapsed_time": "0:01:47", "remaining_time": "0:22:38", "throughput": 4222.82, "total_tokens": 453664} |
| {"current_steps": 735, "total_steps": 9960, "loss": 0.1891, "lr": 7.369477911646586e-06, "epoch": 1.4759036144578312, "percentage": 7.38, "elapsed_time": "0:01:48", "remaining_time": "0:22:38", "throughput": 4223.49, "total_tokens": 457216} |
| {"current_steps": 740, "total_steps": 9960, "loss": 0.1748, "lr": 7.4196787148594385e-06, "epoch": 1.4859437751004017, "percentage": 7.43, "elapsed_time": "0:01:49", "remaining_time": "0:22:38", "throughput": 4223.38, "total_tokens": 460416} |
| {"current_steps": 745, "total_steps": 9960, "loss": 0.1654, "lr": 7.469879518072289e-06, "epoch": 1.4959839357429718, "percentage": 7.48, "elapsed_time": "0:01:49", "remaining_time": "0:22:38", "throughput": 4223.13, "total_tokens": 463776} |
| {"current_steps": 750, "total_steps": 9960, "loss": 0.1918, "lr": 7.520080321285141e-06, "epoch": 1.5060240963855422, "percentage": 7.53, "elapsed_time": "0:01:50", "remaining_time": "0:22:38", "throughput": 4223.86, "total_tokens": 467200} |
| {"current_steps": 755, "total_steps": 9960, "loss": 0.1481, "lr": 7.570281124497993e-06, "epoch": 1.5160642570281124, "percentage": 7.58, "elapsed_time": "0:01:51", "remaining_time": "0:22:36", "throughput": 4222.75, "total_tokens": 469728} |
| {"current_steps": 760, "total_steps": 9960, "loss": 0.1537, "lr": 7.620481927710845e-06, "epoch": 1.5261044176706826, "percentage": 7.63, "elapsed_time": "0:01:51", "remaining_time": "0:22:35", "throughput": 4223.01, "total_tokens": 472832} |
| {"current_steps": 765, "total_steps": 9960, "loss": 0.1617, "lr": 7.670682730923695e-06, "epoch": 1.536144578313253, "percentage": 7.68, "elapsed_time": "0:01:52", "remaining_time": "0:22:35", "throughput": 4224.0, "total_tokens": 476256} |
| {"current_steps": 770, "total_steps": 9960, "loss": 0.1653, "lr": 7.720883534136547e-06, "epoch": 1.5461847389558234, "percentage": 7.73, "elapsed_time": "0:01:53", "remaining_time": "0:22:33", "throughput": 4222.78, "total_tokens": 478784} |
| {"current_steps": 775, "total_steps": 9960, "loss": 0.1399, "lr": 7.771084337349398e-06, "epoch": 1.5562248995983936, "percentage": 7.78, "elapsed_time": "0:01:54", "remaining_time": "0:22:32", "throughput": 4222.82, "total_tokens": 481920} |
| {"current_steps": 780, "total_steps": 9960, "loss": 0.1345, "lr": 7.82128514056225e-06, "epoch": 1.5662650602409638, "percentage": 7.83, "elapsed_time": "0:01:54", "remaining_time": "0:22:32", "throughput": 4223.61, "total_tokens": 485344} |
| {"current_steps": 785, "total_steps": 9960, "loss": 0.1843, "lr": 7.8714859437751e-06, "epoch": 1.5763052208835342, "percentage": 7.88, "elapsed_time": "0:01:55", "remaining_time": "0:22:30", "throughput": 4221.57, "total_tokens": 487968} |
| {"current_steps": 790, "total_steps": 9960, "loss": 0.1692, "lr": 7.921686746987952e-06, "epoch": 1.5863453815261044, "percentage": 7.93, "elapsed_time": "0:01:56", "remaining_time": "0:22:30", "throughput": 4220.56, "total_tokens": 490880} |
| {"current_steps": 795, "total_steps": 9960, "loss": 0.1647, "lr": 7.971887550200803e-06, "epoch": 1.5963855421686746, "percentage": 7.98, "elapsed_time": "0:01:56", "remaining_time": "0:22:28", "throughput": 4219.82, "total_tokens": 493568} |
| {"current_steps": 800, "total_steps": 9960, "loss": 0.1725, "lr": 8.022088353413655e-06, "epoch": 1.606425702811245, "percentage": 8.03, "elapsed_time": "0:01:57", "remaining_time": "0:22:28", "throughput": 4220.72, "total_tokens": 497184} |
| {"current_steps": 805, "total_steps": 9960, "loss": 0.1703, "lr": 8.072289156626508e-06, "epoch": 1.6164658634538154, "percentage": 8.08, "elapsed_time": "0:01:58", "remaining_time": "0:22:27", "throughput": 4218.72, "total_tokens": 499680} |
| {"current_steps": 810, "total_steps": 9960, "loss": 0.1438, "lr": 8.122489959839357e-06, "epoch": 1.6265060240963856, "percentage": 8.13, "elapsed_time": "0:01:59", "remaining_time": "0:22:25", "throughput": 4218.85, "total_tokens": 502656} |
| {"current_steps": 815, "total_steps": 9960, "loss": 0.1673, "lr": 8.172690763052209e-06, "epoch": 1.6365461847389557, "percentage": 8.18, "elapsed_time": "0:01:59", "remaining_time": "0:22:23", "throughput": 4218.61, "total_tokens": 505056} |
| {"current_steps": 820, "total_steps": 9960, "loss": 0.1565, "lr": 8.222891566265062e-06, "epoch": 1.6465863453815262, "percentage": 8.23, "elapsed_time": "0:02:00", "remaining_time": "0:22:22", "throughput": 4218.86, "total_tokens": 508320} |
| {"current_steps": 825, "total_steps": 9960, "loss": 0.1429, "lr": 8.273092369477911e-06, "epoch": 1.6566265060240963, "percentage": 8.28, "elapsed_time": "0:02:01", "remaining_time": "0:22:23", "throughput": 4219.96, "total_tokens": 512032} |
| {"current_steps": 830, "total_steps": 9960, "loss": 0.2157, "lr": 8.323293172690764e-06, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:02:01", "remaining_time": "0:22:21", "throughput": 4220.12, "total_tokens": 514752} |
| {"current_steps": 835, "total_steps": 9960, "loss": 0.1982, "lr": 8.373493975903614e-06, "epoch": 1.676706827309237, "percentage": 8.38, "elapsed_time": "0:02:02", "remaining_time": "0:22:21", "throughput": 4221.13, "total_tokens": 518176} |
| {"current_steps": 840, "total_steps": 9960, "loss": 0.1626, "lr": 8.423694779116467e-06, "epoch": 1.6867469879518073, "percentage": 8.43, "elapsed_time": "0:02:03", "remaining_time": "0:22:20", "throughput": 4221.45, "total_tokens": 521216} |
| {"current_steps": 845, "total_steps": 9960, "loss": 0.1674, "lr": 8.473895582329319e-06, "epoch": 1.6967871485943775, "percentage": 8.48, "elapsed_time": "0:02:04", "remaining_time": "0:22:21", "throughput": 4222.95, "total_tokens": 525216} |
| {"current_steps": 850, "total_steps": 9960, "loss": 0.1472, "lr": 8.52409638554217e-06, "epoch": 1.7068273092369477, "percentage": 8.53, "elapsed_time": "0:02:05", "remaining_time": "0:22:21", "throughput": 4224.73, "total_tokens": 528896} |
| {"current_steps": 855, "total_steps": 9960, "loss": 0.2251, "lr": 8.574297188755021e-06, "epoch": 1.716867469879518, "percentage": 8.58, "elapsed_time": "0:02:05", "remaining_time": "0:22:20", "throughput": 4224.0, "total_tokens": 531744} |
| {"current_steps": 860, "total_steps": 9960, "loss": 0.1385, "lr": 8.624497991967873e-06, "epoch": 1.7269076305220885, "percentage": 8.63, "elapsed_time": "0:02:06", "remaining_time": "0:22:19", "throughput": 4222.27, "total_tokens": 534464} |
| {"current_steps": 865, "total_steps": 9960, "loss": 0.1555, "lr": 8.674698795180724e-06, "epoch": 1.7369477911646585, "percentage": 8.68, "elapsed_time": "0:02:07", "remaining_time": "0:22:19", "throughput": 4223.33, "total_tokens": 538208} |
| {"current_steps": 870, "total_steps": 9960, "loss": 0.1201, "lr": 8.724899598393575e-06, "epoch": 1.7469879518072289, "percentage": 8.73, "elapsed_time": "0:02:08", "remaining_time": "0:22:19", "throughput": 4223.13, "total_tokens": 541440} |
| {"current_steps": 875, "total_steps": 9960, "loss": 0.2551, "lr": 8.775100401606427e-06, "epoch": 1.7570281124497993, "percentage": 8.79, "elapsed_time": "0:02:08", "remaining_time": "0:22:17", "throughput": 4221.73, "total_tokens": 543968} |
| {"current_steps": 880, "total_steps": 9960, "loss": 0.1633, "lr": 8.825301204819278e-06, "epoch": 1.7670682730923695, "percentage": 8.84, "elapsed_time": "0:02:09", "remaining_time": "0:22:16", "throughput": 4220.45, "total_tokens": 546752} |
| {"current_steps": 885, "total_steps": 9960, "loss": 0.1441, "lr": 8.87550200803213e-06, "epoch": 1.7771084337349397, "percentage": 8.89, "elapsed_time": "0:02:10", "remaining_time": "0:22:15", "throughput": 4220.22, "total_tokens": 549632} |
| {"current_steps": 890, "total_steps": 9960, "loss": 0.1765, "lr": 8.92570281124498e-06, "epoch": 1.78714859437751, "percentage": 8.94, "elapsed_time": "0:02:10", "remaining_time": "0:22:14", "throughput": 4220.81, "total_tokens": 552736} |
| {"current_steps": 895, "total_steps": 9960, "loss": 0.1459, "lr": 8.975903614457832e-06, "epoch": 1.7971887550200805, "percentage": 8.99, "elapsed_time": "0:02:11", "remaining_time": "0:22:13", "throughput": 4220.89, "total_tokens": 555808} |
| {"current_steps": 900, "total_steps": 9960, "loss": 0.1618, "lr": 9.026104417670683e-06, "epoch": 1.8072289156626506, "percentage": 9.04, "elapsed_time": "0:02:12", "remaining_time": "0:22:12", "throughput": 4219.98, "total_tokens": 558624} |
| {"current_steps": 905, "total_steps": 9960, "loss": 0.1617, "lr": 9.076305220883535e-06, "epoch": 1.8172690763052208, "percentage": 9.09, "elapsed_time": "0:02:13", "remaining_time": "0:22:13", "throughput": 4220.57, "total_tokens": 562368} |
| {"current_steps": 910, "total_steps": 9960, "loss": 0.1613, "lr": 9.126506024096386e-06, "epoch": 1.8273092369477912, "percentage": 9.14, "elapsed_time": "0:02:14", "remaining_time": "0:22:13", "throughput": 4221.07, "total_tokens": 565824} |
| {"current_steps": 915, "total_steps": 9960, "loss": 0.1839, "lr": 9.176706827309237e-06, "epoch": 1.8373493975903614, "percentage": 9.19, "elapsed_time": "0:02:14", "remaining_time": "0:22:13", "throughput": 4223.7, "total_tokens": 569888} |
| {"current_steps": 920, "total_steps": 9960, "loss": 0.216, "lr": 9.226907630522089e-06, "epoch": 1.8473895582329316, "percentage": 9.24, "elapsed_time": "0:02:15", "remaining_time": "0:22:11", "throughput": 4222.33, "total_tokens": 572160} |
| {"current_steps": 925, "total_steps": 9960, "loss": 0.1682, "lr": 9.27710843373494e-06, "epoch": 1.857429718875502, "percentage": 9.29, "elapsed_time": "0:02:16", "remaining_time": "0:22:10", "throughput": 4222.78, "total_tokens": 575072} |
| {"current_steps": 930, "total_steps": 9960, "loss": 0.1811, "lr": 9.327309236947793e-06, "epoch": 1.8674698795180724, "percentage": 9.34, "elapsed_time": "0:02:16", "remaining_time": "0:22:09", "throughput": 4223.2, "total_tokens": 578112} |
| {"current_steps": 935, "total_steps": 9960, "loss": 0.1639, "lr": 9.377510040160643e-06, "epoch": 1.8775100401606426, "percentage": 9.39, "elapsed_time": "0:02:17", "remaining_time": "0:22:08", "throughput": 4223.41, "total_tokens": 581248} |
| {"current_steps": 940, "total_steps": 9960, "loss": 0.1592, "lr": 9.427710843373494e-06, "epoch": 1.8875502008032128, "percentage": 9.44, "elapsed_time": "0:02:18", "remaining_time": "0:22:08", "throughput": 4223.74, "total_tokens": 584608} |
| {"current_steps": 945, "total_steps": 9960, "loss": 0.1492, "lr": 9.477911646586347e-06, "epoch": 1.8975903614457832, "percentage": 9.49, "elapsed_time": "0:02:19", "remaining_time": "0:22:07", "throughput": 4225.36, "total_tokens": 588192} |
| {"current_steps": 950, "total_steps": 9960, "loss": 0.1581, "lr": 9.528112449799197e-06, "epoch": 1.9076305220883534, "percentage": 9.54, "elapsed_time": "0:02:19", "remaining_time": "0:22:07", "throughput": 4225.47, "total_tokens": 591552} |
| {"current_steps": 955, "total_steps": 9960, "loss": 0.1515, "lr": 9.57831325301205e-06, "epoch": 1.9176706827309236, "percentage": 9.59, "elapsed_time": "0:02:20", "remaining_time": "0:22:07", "throughput": 4224.39, "total_tokens": 594784} |
| {"current_steps": 960, "total_steps": 9960, "loss": 0.1574, "lr": 9.6285140562249e-06, "epoch": 1.927710843373494, "percentage": 9.64, "elapsed_time": "0:02:21", "remaining_time": "0:22:06", "throughput": 4225.14, "total_tokens": 598016} |
| {"current_steps": 965, "total_steps": 9960, "loss": 0.166, "lr": 9.678714859437753e-06, "epoch": 1.9377510040160644, "percentage": 9.69, "elapsed_time": "0:02:22", "remaining_time": "0:22:06", "throughput": 4225.06, "total_tokens": 601216} |
| {"current_steps": 970, "total_steps": 9960, "loss": 0.1514, "lr": 9.728915662650604e-06, "epoch": 1.9477911646586346, "percentage": 9.74, "elapsed_time": "0:02:23", "remaining_time": "0:22:06", "throughput": 4225.89, "total_tokens": 604640} |
| {"current_steps": 975, "total_steps": 9960, "loss": 0.1461, "lr": 9.779116465863454e-06, "epoch": 1.9578313253012047, "percentage": 9.79, "elapsed_time": "0:02:23", "remaining_time": "0:22:05", "throughput": 4226.25, "total_tokens": 607680} |
| {"current_steps": 980, "total_steps": 9960, "loss": 0.1787, "lr": 9.829317269076307e-06, "epoch": 1.9678714859437751, "percentage": 9.84, "elapsed_time": "0:02:24", "remaining_time": "0:22:05", "throughput": 4226.88, "total_tokens": 611296} |
| {"current_steps": 985, "total_steps": 9960, "loss": 0.152, "lr": 9.879518072289156e-06, "epoch": 1.9779116465863453, "percentage": 9.89, "elapsed_time": "0:02:25", "remaining_time": "0:22:05", "throughput": 4227.31, "total_tokens": 614912} |
| {"current_steps": 990, "total_steps": 9960, "loss": 0.2004, "lr": 9.92971887550201e-06, "epoch": 1.9879518072289155, "percentage": 9.94, "elapsed_time": "0:02:26", "remaining_time": "0:22:04", "throughput": 4227.91, "total_tokens": 618272} |
| {"current_steps": 995, "total_steps": 9960, "loss": 0.1312, "lr": 9.97991967871486e-06, "epoch": 1.997991967871486, "percentage": 9.99, "elapsed_time": "0:02:27", "remaining_time": "0:22:04", "throughput": 4229.08, "total_tokens": 621824} |
| {"current_steps": 996, "total_steps": 9960, "eval_loss": 0.16479995846748352, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:02:35", "remaining_time": "0:23:18", "throughput": 4008.72, "total_tokens": 622720} |
| {"current_steps": 1000, "total_steps": 9960, "loss": 0.1477, "lr": 9.999997236378723e-06, "epoch": 2.0080321285140563, "percentage": 10.04, "elapsed_time": "0:02:37", "remaining_time": "0:23:28", "throughput": 3979.28, "total_tokens": 625344} |
| {"current_steps": 1005, "total_steps": 9960, "loss": 0.1797, "lr": 9.999980347593088e-06, "epoch": 2.0180722891566263, "percentage": 10.09, "elapsed_time": "0:02:38", "remaining_time": "0:23:27", "throughput": 3981.36, "total_tokens": 629056} |
| {"current_steps": 1010, "total_steps": 9960, "loss": 0.1524, "lr": 9.999948105418771e-06, "epoch": 2.0281124497991967, "percentage": 10.14, "elapsed_time": "0:02:38", "remaining_time": "0:23:27", "throughput": 3982.75, "total_tokens": 632640} |
| {"current_steps": 1015, "total_steps": 9960, "loss": 0.1683, "lr": 9.999900509954779e-06, "epoch": 2.038152610441767, "percentage": 10.19, "elapsed_time": "0:02:39", "remaining_time": "0:23:27", "throughput": 3983.19, "total_tokens": 636032} |
| {"current_steps": 1020, "total_steps": 9960, "loss": 0.1756, "lr": 9.999837561347259e-06, "epoch": 2.0481927710843375, "percentage": 10.24, "elapsed_time": "0:02:40", "remaining_time": "0:23:25", "throughput": 3983.65, "total_tokens": 639008} |
| {"current_steps": 1025, "total_steps": 9960, "loss": 0.1659, "lr": 9.99975925978951e-06, "epoch": 2.0582329317269075, "percentage": 10.29, "elapsed_time": "0:02:41", "remaining_time": "0:23:24", "throughput": 3984.62, "total_tokens": 642048} |
| {"current_steps": 1030, "total_steps": 9960, "loss": 0.1371, "lr": 9.99966560552197e-06, "epoch": 2.068273092369478, "percentage": 10.34, "elapsed_time": "0:02:41", "remaining_time": "0:23:23", "throughput": 3985.86, "total_tokens": 645312} |
| {"current_steps": 1035, "total_steps": 9960, "loss": 0.1625, "lr": 9.999556598832224e-06, "epoch": 2.0783132530120483, "percentage": 10.39, "elapsed_time": "0:02:42", "remaining_time": "0:23:23", "throughput": 3988.03, "total_tokens": 648864} |
| {"current_steps": 1040, "total_steps": 9960, "loss": 0.1701, "lr": 9.999432240054994e-06, "epoch": 2.0883534136546187, "percentage": 10.44, "elapsed_time": "0:02:43", "remaining_time": "0:23:20", "throughput": 3989.22, "total_tokens": 651616} |
| {"current_steps": 1045, "total_steps": 9960, "loss": 0.1761, "lr": 9.999292529572152e-06, "epoch": 2.0983935742971886, "percentage": 10.49, "elapsed_time": "0:02:44", "remaining_time": "0:23:19", "throughput": 3990.99, "total_tokens": 654912} |
| {"current_steps": 1050, "total_steps": 9960, "loss": 0.1769, "lr": 9.9991374678127e-06, "epoch": 2.108433734939759, "percentage": 10.54, "elapsed_time": "0:02:44", "remaining_time": "0:23:19", "throughput": 3992.58, "total_tokens": 658400} |
| {"current_steps": 1055, "total_steps": 9960, "loss": 0.1754, "lr": 9.998967055252791e-06, "epoch": 2.1184738955823295, "percentage": 10.59, "elapsed_time": "0:02:45", "remaining_time": "0:23:18", "throughput": 3993.36, "total_tokens": 661536} |
| {"current_steps": 1060, "total_steps": 9960, "loss": 0.1485, "lr": 9.998781292415705e-06, "epoch": 2.1285140562248994, "percentage": 10.64, "elapsed_time": "0:02:46", "remaining_time": "0:23:17", "throughput": 3994.56, "total_tokens": 664736} |
| {"current_steps": 1065, "total_steps": 9960, "loss": 0.1628, "lr": 9.998580179871864e-06, "epoch": 2.13855421686747, "percentage": 10.69, "elapsed_time": "0:02:47", "remaining_time": "0:23:15", "throughput": 3995.49, "total_tokens": 667520} |
| {"current_steps": 1070, "total_steps": 9960, "loss": 0.1636, "lr": 9.998363718238819e-06, "epoch": 2.1485943775100402, "percentage": 10.74, "elapsed_time": "0:02:47", "remaining_time": "0:23:14", "throughput": 3997.15, "total_tokens": 670976} |
| {"current_steps": 1075, "total_steps": 9960, "loss": 0.1487, "lr": 9.998131908181262e-06, "epoch": 2.1586345381526106, "percentage": 10.79, "elapsed_time": "0:02:48", "remaining_time": "0:23:14", "throughput": 3999.2, "total_tokens": 674816} |
| {"current_steps": 1080, "total_steps": 9960, "loss": 0.153, "lr": 9.997884750411004e-06, "epoch": 2.1686746987951806, "percentage": 10.84, "elapsed_time": "0:02:49", "remaining_time": "0:23:13", "throughput": 4000.07, "total_tokens": 678080} |
| {"current_steps": 1085, "total_steps": 9960, "loss": 0.1557, "lr": 9.997622245686993e-06, "epoch": 2.178714859437751, "percentage": 10.89, "elapsed_time": "0:02:50", "remaining_time": "0:23:11", "throughput": 4000.81, "total_tokens": 680736} |
| {"current_steps": 1090, "total_steps": 9960, "loss": 0.1665, "lr": 9.997344394815298e-06, "epoch": 2.1887550200803214, "percentage": 10.94, "elapsed_time": "0:02:50", "remaining_time": "0:23:10", "throughput": 4002.0, "total_tokens": 684064} |
| {"current_steps": 1095, "total_steps": 9960, "loss": 0.1743, "lr": 9.997051198649117e-06, "epoch": 2.1987951807228914, "percentage": 10.99, "elapsed_time": "0:02:51", "remaining_time": "0:23:08", "throughput": 4002.3, "total_tokens": 686560} |
| {"current_steps": 1100, "total_steps": 9960, "loss": 0.183, "lr": 9.996742658088759e-06, "epoch": 2.208835341365462, "percentage": 11.04, "elapsed_time": "0:02:52", "remaining_time": "0:23:06", "throughput": 4002.95, "total_tokens": 689312} |
| {"current_steps": 1105, "total_steps": 9960, "loss": 0.1928, "lr": 9.996418774081658e-06, "epoch": 2.218875502008032, "percentage": 11.09, "elapsed_time": "0:02:52", "remaining_time": "0:23:05", "throughput": 4004.23, "total_tokens": 692320} |
| {"current_steps": 1110, "total_steps": 9960, "loss": 0.1581, "lr": 9.996079547622362e-06, "epoch": 2.2289156626506026, "percentage": 11.14, "elapsed_time": "0:02:53", "remaining_time": "0:23:03", "throughput": 4005.1, "total_tokens": 695040} |
| {"current_steps": 1115, "total_steps": 9960, "loss": 0.1677, "lr": 9.995724979752533e-06, "epoch": 2.2389558232931726, "percentage": 11.19, "elapsed_time": "0:02:54", "remaining_time": "0:23:02", "throughput": 4005.7, "total_tokens": 697952} |
| {"current_steps": 1120, "total_steps": 9960, "loss": 0.1657, "lr": 9.995355071560933e-06, "epoch": 2.248995983935743, "percentage": 11.24, "elapsed_time": "0:02:54", "remaining_time": "0:23:00", "throughput": 4006.53, "total_tokens": 700992} |
| {"current_steps": 1125, "total_steps": 9960, "loss": 0.157, "lr": 9.994969824183441e-06, "epoch": 2.2590361445783134, "percentage": 11.3, "elapsed_time": "0:02:55", "remaining_time": "0:22:59", "throughput": 4007.24, "total_tokens": 703936} |
| {"current_steps": 1130, "total_steps": 9960, "loss": 0.1444, "lr": 9.994569238803027e-06, "epoch": 2.2690763052208833, "percentage": 11.35, "elapsed_time": "0:02:56", "remaining_time": "0:22:58", "throughput": 4009.31, "total_tokens": 707424} |
| {"current_steps": 1135, "total_steps": 9960, "loss": 0.1327, "lr": 9.994153316649769e-06, "epoch": 2.2791164658634537, "percentage": 11.4, "elapsed_time": "0:02:57", "remaining_time": "0:22:57", "throughput": 4010.0, "total_tokens": 710592} |
| {"current_steps": 1140, "total_steps": 9960, "loss": 0.2246, "lr": 9.993722059000833e-06, "epoch": 2.289156626506024, "percentage": 11.45, "elapsed_time": "0:02:58", "remaining_time": "0:22:57", "throughput": 4011.86, "total_tokens": 714368} |
| {"current_steps": 1145, "total_steps": 9960, "loss": 0.1575, "lr": 9.993275467180476e-06, "epoch": 2.2991967871485945, "percentage": 11.5, "elapsed_time": "0:02:58", "remaining_time": "0:22:56", "throughput": 4012.16, "total_tokens": 717344} |
| {"current_steps": 1150, "total_steps": 9960, "loss": 0.1554, "lr": 9.992813542560045e-06, "epoch": 2.3092369477911645, "percentage": 11.55, "elapsed_time": "0:02:59", "remaining_time": "0:22:55", "throughput": 4013.03, "total_tokens": 720576} |
| {"current_steps": 1155, "total_steps": 9960, "loss": 0.1799, "lr": 9.992336286557967e-06, "epoch": 2.319277108433735, "percentage": 11.6, "elapsed_time": "0:03:00", "remaining_time": "0:22:53", "throughput": 4013.51, "total_tokens": 723296} |
| {"current_steps": 1160, "total_steps": 9960, "loss": 0.1408, "lr": 9.991843700639747e-06, "epoch": 2.3293172690763053, "percentage": 11.65, "elapsed_time": "0:03:01", "remaining_time": "0:22:53", "throughput": 4015.02, "total_tokens": 726720} |
| {"current_steps": 1165, "total_steps": 9960, "loss": 0.1704, "lr": 9.991335786317964e-06, "epoch": 2.3393574297188753, "percentage": 11.7, "elapsed_time": "0:03:01", "remaining_time": "0:22:52", "throughput": 4017.19, "total_tokens": 730240} |
| {"current_steps": 1170, "total_steps": 9960, "loss": 0.1711, "lr": 9.990812545152264e-06, "epoch": 2.3493975903614457, "percentage": 11.75, "elapsed_time": "0:03:02", "remaining_time": "0:22:52", "throughput": 4018.77, "total_tokens": 733984} |
| {"current_steps": 1175, "total_steps": 9960, "loss": 0.1465, "lr": 9.990273978749358e-06, "epoch": 2.359437751004016, "percentage": 11.8, "elapsed_time": "0:03:03", "remaining_time": "0:22:51", "throughput": 4019.13, "total_tokens": 737056} |
| {"current_steps": 1180, "total_steps": 9960, "loss": 0.1695, "lr": 9.98972008876302e-06, "epoch": 2.3694779116465865, "percentage": 11.85, "elapsed_time": "0:03:04", "remaining_time": "0:22:49", "throughput": 4019.63, "total_tokens": 739744} |
| {"current_steps": 1185, "total_steps": 9960, "loss": 0.169, "lr": 9.98915087689407e-06, "epoch": 2.3795180722891565, "percentage": 11.9, "elapsed_time": "0:03:04", "remaining_time": "0:22:48", "throughput": 4020.51, "total_tokens": 742912} |
| {"current_steps": 1190, "total_steps": 9960, "loss": 0.1525, "lr": 9.988566344890383e-06, "epoch": 2.389558232931727, "percentage": 11.95, "elapsed_time": "0:03:05", "remaining_time": "0:22:47", "throughput": 4022.22, "total_tokens": 746400} |
| {"current_steps": 1195, "total_steps": 9960, "loss": 0.1502, "lr": 9.987966494546873e-06, "epoch": 2.3995983935742973, "percentage": 12.0, "elapsed_time": "0:03:06", "remaining_time": "0:22:47", "throughput": 4024.0, "total_tokens": 750144} |
| {"current_steps": 1200, "total_steps": 9960, "loss": 0.1395, "lr": 9.987351327705498e-06, "epoch": 2.4096385542168672, "percentage": 12.05, "elapsed_time": "0:03:07", "remaining_time": "0:22:45", "throughput": 4024.08, "total_tokens": 752640} |
| {"current_steps": 1205, "total_steps": 9960, "loss": 0.1586, "lr": 9.986720846255244e-06, "epoch": 2.4196787148594376, "percentage": 12.1, "elapsed_time": "0:03:07", "remaining_time": "0:22:43", "throughput": 4024.82, "total_tokens": 755584} |
| {"current_steps": 1210, "total_steps": 9960, "loss": 0.1645, "lr": 9.986075052132124e-06, "epoch": 2.429718875502008, "percentage": 12.15, "elapsed_time": "0:03:08", "remaining_time": "0:22:42", "throughput": 4025.94, "total_tokens": 758656} |
| {"current_steps": 1215, "total_steps": 9960, "loss": 0.149, "lr": 9.98541394731917e-06, "epoch": 2.4397590361445785, "percentage": 12.2, "elapsed_time": "0:03:09", "remaining_time": "0:22:40", "throughput": 4026.83, "total_tokens": 761280} |
| {"current_steps": 1220, "total_steps": 9960, "loss": 0.1714, "lr": 9.984737533846429e-06, "epoch": 2.4497991967871484, "percentage": 12.25, "elapsed_time": "0:03:09", "remaining_time": "0:22:40", "throughput": 4029.45, "total_tokens": 765440} |
| {"current_steps": 1225, "total_steps": 9960, "loss": 0.1557, "lr": 9.984045813790959e-06, "epoch": 2.459839357429719, "percentage": 12.3, "elapsed_time": "0:03:10", "remaining_time": "0:22:40", "throughput": 4031.75, "total_tokens": 769184} |
| {"current_steps": 1230, "total_steps": 9960, "loss": 0.1521, "lr": 9.983338789276817e-06, "epoch": 2.4698795180722892, "percentage": 12.35, "elapsed_time": "0:03:11", "remaining_time": "0:22:38", "throughput": 4031.55, "total_tokens": 771584} |
| {"current_steps": 1235, "total_steps": 9960, "loss": 0.1492, "lr": 9.982616462475055e-06, "epoch": 2.479919678714859, "percentage": 12.4, "elapsed_time": "0:03:12", "remaining_time": "0:22:37", "throughput": 4031.74, "total_tokens": 774464} |
| {"current_steps": 1240, "total_steps": 9960, "loss": 0.171, "lr": 9.981878835603718e-06, "epoch": 2.4899598393574296, "percentage": 12.45, "elapsed_time": "0:03:12", "remaining_time": "0:22:36", "throughput": 4032.18, "total_tokens": 777568} |
| {"current_steps": 1245, "total_steps": 9960, "loss": 0.1484, "lr": 9.981125910927824e-06, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:03:13", "remaining_time": "0:22:33", "throughput": 4032.21, "total_tokens": 779936} |
| {"current_steps": 1250, "total_steps": 9960, "loss": 0.1717, "lr": 9.980357690759376e-06, "epoch": 2.5100401606425704, "percentage": 12.55, "elapsed_time": "0:03:14", "remaining_time": "0:22:32", "throughput": 4033.52, "total_tokens": 782752} |
| {"current_steps": 1255, "total_steps": 9960, "loss": 0.1508, "lr": 9.979574177457337e-06, "epoch": 2.520080321285141, "percentage": 12.6, "elapsed_time": "0:03:14", "remaining_time": "0:22:31", "throughput": 4035.0, "total_tokens": 785984} |
| {"current_steps": 1260, "total_steps": 9960, "loss": 0.1551, "lr": 9.978775373427634e-06, "epoch": 2.5301204819277108, "percentage": 12.65, "elapsed_time": "0:03:15", "remaining_time": "0:22:30", "throughput": 4036.13, "total_tokens": 789280} |
| {"current_steps": 1265, "total_steps": 9960, "loss": 0.1623, "lr": 9.977961281123146e-06, "epoch": 2.540160642570281, "percentage": 12.7, "elapsed_time": "0:03:16", "remaining_time": "0:22:29", "throughput": 4036.65, "total_tokens": 792384} |
| {"current_steps": 1270, "total_steps": 9960, "loss": 0.1581, "lr": 9.9771319030437e-06, "epoch": 2.550200803212851, "percentage": 12.75, "elapsed_time": "0:03:16", "remaining_time": "0:22:27", "throughput": 4036.93, "total_tokens": 795200} |
| {"current_steps": 1275, "total_steps": 9960, "loss": 0.1561, "lr": 9.976287241736055e-06, "epoch": 2.5602409638554215, "percentage": 12.8, "elapsed_time": "0:03:17", "remaining_time": "0:22:26", "throughput": 4037.16, "total_tokens": 798144} |
| {"current_steps": 1280, "total_steps": 9960, "loss": 0.1669, "lr": 9.975427299793908e-06, "epoch": 2.570281124497992, "percentage": 12.85, "elapsed_time": "0:03:18", "remaining_time": "0:22:25", "throughput": 4037.91, "total_tokens": 800992} |
| {"current_steps": 1285, "total_steps": 9960, "loss": 0.1416, "lr": 9.974552079857873e-06, "epoch": 2.5803212851405624, "percentage": 12.9, "elapsed_time": "0:03:19", "remaining_time": "0:22:23", "throughput": 4038.26, "total_tokens": 803872} |
| {"current_steps": 1290, "total_steps": 9960, "loss": 0.1636, "lr": 9.973661584615476e-06, "epoch": 2.5903614457831328, "percentage": 12.95, "elapsed_time": "0:03:19", "remaining_time": "0:22:23", "throughput": 4039.28, "total_tokens": 807616} |
| {"current_steps": 1295, "total_steps": 9960, "loss": 0.1555, "lr": 9.972755816801155e-06, "epoch": 2.6004016064257027, "percentage": 13.0, "elapsed_time": "0:03:20", "remaining_time": "0:22:22", "throughput": 4039.82, "total_tokens": 810592} |
| {"current_steps": 1300, "total_steps": 9960, "loss": 0.1738, "lr": 9.971834779196238e-06, "epoch": 2.610441767068273, "percentage": 13.05, "elapsed_time": "0:03:21", "remaining_time": "0:22:20", "throughput": 4040.03, "total_tokens": 812992} |
| {"current_steps": 1305, "total_steps": 9960, "loss": 0.1559, "lr": 9.970898474628951e-06, "epoch": 2.6204819277108435, "percentage": 13.1, "elapsed_time": "0:03:22", "remaining_time": "0:22:19", "throughput": 4041.42, "total_tokens": 816544} |
| {"current_steps": 1310, "total_steps": 9960, "loss": 0.1491, "lr": 9.969946905974392e-06, "epoch": 2.6305220883534135, "percentage": 13.15, "elapsed_time": "0:03:22", "remaining_time": "0:22:19", "throughput": 4043.05, "total_tokens": 819904} |
| {"current_steps": 1315, "total_steps": 9960, "loss": 0.1472, "lr": 9.968980076154533e-06, "epoch": 2.640562248995984, "percentage": 13.2, "elapsed_time": "0:03:23", "remaining_time": "0:22:17", "throughput": 4043.72, "total_tokens": 822848} |
| {"current_steps": 1320, "total_steps": 9960, "loss": 0.1526, "lr": 9.96799798813821e-06, "epoch": 2.6506024096385543, "percentage": 13.25, "elapsed_time": "0:03:24", "remaining_time": "0:22:16", "throughput": 4044.61, "total_tokens": 825696} |
| {"current_steps": 1325, "total_steps": 9960, "loss": 0.138, "lr": 9.96700064494111e-06, "epoch": 2.6606425702811247, "percentage": 13.3, "elapsed_time": "0:03:24", "remaining_time": "0:22:15", "throughput": 4044.89, "total_tokens": 828704} |
| {"current_steps": 1330, "total_steps": 9960, "loss": 0.1962, "lr": 9.965988049625763e-06, "epoch": 2.6706827309236947, "percentage": 13.35, "elapsed_time": "0:03:25", "remaining_time": "0:22:14", "throughput": 4045.17, "total_tokens": 831744} |
| {"current_steps": 1335, "total_steps": 9960, "loss": 0.1459, "lr": 9.964960205301534e-06, "epoch": 2.680722891566265, "percentage": 13.4, "elapsed_time": "0:03:26", "remaining_time": "0:22:12", "throughput": 4045.76, "total_tokens": 834720} |
| {"current_steps": 1340, "total_steps": 9960, "loss": 0.1723, "lr": 9.963917115124621e-06, "epoch": 2.6907630522088355, "percentage": 13.45, "elapsed_time": "0:03:27", "remaining_time": "0:22:12", "throughput": 4047.18, "total_tokens": 838048} |
| {"current_steps": 1345, "total_steps": 9960, "loss": 0.1566, "lr": 9.962858782298023e-06, "epoch": 2.7008032128514055, "percentage": 13.5, "elapsed_time": "0:03:27", "remaining_time": "0:22:11", "throughput": 4047.48, "total_tokens": 841216} |
| {"current_steps": 1350, "total_steps": 9960, "loss": 0.1866, "lr": 9.961785210071554e-06, "epoch": 2.710843373493976, "percentage": 13.55, "elapsed_time": "0:03:28", "remaining_time": "0:22:10", "throughput": 4048.93, "total_tokens": 844576} |
| {"current_steps": 1355, "total_steps": 9960, "loss": 0.1571, "lr": 9.960696401741825e-06, "epoch": 2.7208835341365463, "percentage": 13.6, "elapsed_time": "0:03:29", "remaining_time": "0:22:09", "throughput": 4049.85, "total_tokens": 847872} |
| {"current_steps": 1360, "total_steps": 9960, "loss": 0.1448, "lr": 9.959592360652224e-06, "epoch": 2.7309236947791167, "percentage": 13.65, "elapsed_time": "0:03:30", "remaining_time": "0:22:08", "throughput": 4049.87, "total_tokens": 850848} |
| {"current_steps": 1365, "total_steps": 9960, "loss": 0.1496, "lr": 9.95847309019292e-06, "epoch": 2.7409638554216866, "percentage": 13.7, "elapsed_time": "0:03:30", "remaining_time": "0:22:07", "throughput": 4050.12, "total_tokens": 853664} |
| {"current_steps": 1370, "total_steps": 9960, "loss": 0.1483, "lr": 9.957338593800844e-06, "epoch": 2.751004016064257, "percentage": 13.76, "elapsed_time": "0:03:31", "remaining_time": "0:22:06", "throughput": 4051.26, "total_tokens": 856928} |
| {"current_steps": 1375, "total_steps": 9960, "loss": 0.1877, "lr": 9.956188874959686e-06, "epoch": 2.7610441767068274, "percentage": 13.81, "elapsed_time": "0:03:32", "remaining_time": "0:22:05", "throughput": 4051.65, "total_tokens": 860192} |
| {"current_steps": 1380, "total_steps": 9960, "loss": 0.1748, "lr": 9.955023937199876e-06, "epoch": 2.7710843373493974, "percentage": 13.86, "elapsed_time": "0:03:33", "remaining_time": "0:22:04", "throughput": 4052.91, "total_tokens": 863616} |
| {"current_steps": 1385, "total_steps": 9960, "loss": 0.1268, "lr": 9.953843784098573e-06, "epoch": 2.781124497991968, "percentage": 13.91, "elapsed_time": "0:03:33", "remaining_time": "0:22:04", "throughput": 4054.06, "total_tokens": 867296} |
| {"current_steps": 1390, "total_steps": 9960, "loss": 0.1956, "lr": 9.952648419279662e-06, "epoch": 2.791164658634538, "percentage": 13.96, "elapsed_time": "0:03:34", "remaining_time": "0:22:03", "throughput": 4054.6, "total_tokens": 870368} |
| {"current_steps": 1395, "total_steps": 9960, "loss": 0.2096, "lr": 9.951437846413738e-06, "epoch": 2.8012048192771086, "percentage": 14.01, "elapsed_time": "0:03:35", "remaining_time": "0:22:02", "throughput": 4055.23, "total_tokens": 873472} |
| {"current_steps": 1400, "total_steps": 9960, "loss": 0.1534, "lr": 9.950212069218095e-06, "epoch": 2.8112449799196786, "percentage": 14.06, "elapsed_time": "0:03:36", "remaining_time": "0:22:00", "throughput": 4055.67, "total_tokens": 876224} |
| {"current_steps": 1405, "total_steps": 9960, "loss": 0.1606, "lr": 9.948971091456715e-06, "epoch": 2.821285140562249, "percentage": 14.11, "elapsed_time": "0:03:36", "remaining_time": "0:21:59", "throughput": 4056.56, "total_tokens": 879392} |
| {"current_steps": 1410, "total_steps": 9960, "loss": 0.1265, "lr": 9.947714916940257e-06, "epoch": 2.8313253012048194, "percentage": 14.16, "elapsed_time": "0:03:37", "remaining_time": "0:21:59", "throughput": 4057.16, "total_tokens": 882656} |
| {"current_steps": 1415, "total_steps": 9960, "loss": 0.1417, "lr": 9.946443549526041e-06, "epoch": 2.8413654618473894, "percentage": 14.21, "elapsed_time": "0:03:38", "remaining_time": "0:21:58", "throughput": 4057.48, "total_tokens": 885696} |
| {"current_steps": 1420, "total_steps": 9960, "loss": 0.1702, "lr": 9.945156993118042e-06, "epoch": 2.8514056224899598, "percentage": 14.26, "elapsed_time": "0:03:38", "remaining_time": "0:21:56", "throughput": 4058.18, "total_tokens": 888640} |
| {"current_steps": 1425, "total_steps": 9960, "loss": 0.1291, "lr": 9.943855251666873e-06, "epoch": 2.86144578313253, "percentage": 14.31, "elapsed_time": "0:03:39", "remaining_time": "0:21:56", "throughput": 4060.06, "total_tokens": 892384} |
| {"current_steps": 1430, "total_steps": 9960, "loss": 0.1453, "lr": 9.942538329169786e-06, "epoch": 2.8714859437751006, "percentage": 14.36, "elapsed_time": "0:03:40", "remaining_time": "0:21:55", "throughput": 4061.09, "total_tokens": 895328} |
| {"current_steps": 1435, "total_steps": 9960, "loss": 0.181, "lr": 9.941206229670634e-06, "epoch": 2.8815261044176705, "percentage": 14.41, "elapsed_time": "0:03:41", "remaining_time": "0:21:53", "throughput": 4061.03, "total_tokens": 897952} |
| {"current_steps": 1440, "total_steps": 9960, "loss": 0.1356, "lr": 9.939858957259887e-06, "epoch": 2.891566265060241, "percentage": 14.46, "elapsed_time": "0:03:41", "remaining_time": "0:21:53", "throughput": 4062.42, "total_tokens": 901792} |
| {"current_steps": 1445, "total_steps": 9960, "loss": 0.1256, "lr": 9.938496516074597e-06, "epoch": 2.9016064257028114, "percentage": 14.51, "elapsed_time": "0:03:42", "remaining_time": "0:21:53", "throughput": 4063.6, "total_tokens": 905664} |
| {"current_steps": 1450, "total_steps": 9960, "loss": 0.1685, "lr": 9.937118910298398e-06, "epoch": 2.9116465863453813, "percentage": 14.56, "elapsed_time": "0:03:43", "remaining_time": "0:21:51", "throughput": 4063.8, "total_tokens": 907904} |
| {"current_steps": 1455, "total_steps": 9960, "loss": 0.1314, "lr": 9.935726144161492e-06, "epoch": 2.9216867469879517, "percentage": 14.61, "elapsed_time": "0:03:44", "remaining_time": "0:21:49", "throughput": 4064.41, "total_tokens": 910816} |
| {"current_steps": 1460, "total_steps": 9960, "loss": 0.1309, "lr": 9.934318221940632e-06, "epoch": 2.931726907630522, "percentage": 14.66, "elapsed_time": "0:03:44", "remaining_time": "0:21:48", "throughput": 4064.13, "total_tokens": 913568} |
| {"current_steps": 1465, "total_steps": 9960, "loss": 0.3052, "lr": 9.932895147959106e-06, "epoch": 2.9417670682730925, "percentage": 14.71, "elapsed_time": "0:03:45", "remaining_time": "0:21:47", "throughput": 4064.61, "total_tokens": 916320} |
| {"current_steps": 1470, "total_steps": 9960, "loss": 0.1818, "lr": 9.931456926586738e-06, "epoch": 2.9518072289156625, "percentage": 14.76, "elapsed_time": "0:03:46", "remaining_time": "0:21:45", "throughput": 4065.01, "total_tokens": 919136} |
| {"current_steps": 1475, "total_steps": 9960, "loss": 0.1883, "lr": 9.930003562239858e-06, "epoch": 2.961847389558233, "percentage": 14.81, "elapsed_time": "0:03:46", "remaining_time": "0:21:44", "throughput": 4065.28, "total_tokens": 922080} |
| {"current_steps": 1480, "total_steps": 9960, "loss": 0.1681, "lr": 9.928535059381298e-06, "epoch": 2.9718875502008033, "percentage": 14.86, "elapsed_time": "0:03:47", "remaining_time": "0:21:43", "throughput": 4065.77, "total_tokens": 925088} |
| {"current_steps": 1485, "total_steps": 9960, "loss": 0.1436, "lr": 9.927051422520373e-06, "epoch": 2.9819277108433733, "percentage": 14.91, "elapsed_time": "0:03:48", "remaining_time": "0:21:42", "throughput": 4066.76, "total_tokens": 928160} |
| {"current_steps": 1490, "total_steps": 9960, "loss": 0.1555, "lr": 9.925552656212871e-06, "epoch": 2.9919678714859437, "percentage": 14.96, "elapsed_time": "0:03:48", "remaining_time": "0:21:40", "throughput": 4067.11, "total_tokens": 930688} |
| {"current_steps": 1495, "total_steps": 9960, "loss": 0.1497, "lr": 9.924038765061042e-06, "epoch": 3.002008032128514, "percentage": 15.01, "elapsed_time": "0:03:49", "remaining_time": "0:21:40", "throughput": 4065.49, "total_tokens": 933504} |
| {"current_steps": 1500, "total_steps": 9960, "loss": 0.1453, "lr": 9.922509753713572e-06, "epoch": 3.0120481927710845, "percentage": 15.06, "elapsed_time": "0:03:50", "remaining_time": "0:21:39", "throughput": 4065.44, "total_tokens": 936448} |
| {"current_steps": 1505, "total_steps": 9960, "loss": 0.1549, "lr": 9.920965626865582e-06, "epoch": 3.0220883534136544, "percentage": 15.11, "elapsed_time": "0:03:51", "remaining_time": "0:21:38", "throughput": 4065.87, "total_tokens": 939488} |
| {"current_steps": 1510, "total_steps": 9960, "loss": 0.145, "lr": 9.919406389258607e-06, "epoch": 3.032128514056225, "percentage": 15.16, "elapsed_time": "0:03:51", "remaining_time": "0:21:36", "throughput": 4066.45, "total_tokens": 942240} |
| {"current_steps": 1515, "total_steps": 9960, "loss": 0.1603, "lr": 9.917832045680584e-06, "epoch": 3.0421686746987953, "percentage": 15.21, "elapsed_time": "0:03:52", "remaining_time": "0:21:36", "throughput": 4067.86, "total_tokens": 946048} |
| {"current_steps": 1520, "total_steps": 9960, "loss": 0.1407, "lr": 9.91624260096583e-06, "epoch": 3.0522088353413657, "percentage": 15.26, "elapsed_time": "0:03:53", "remaining_time": "0:21:35", "throughput": 4067.66, "total_tokens": 948672} |
| {"current_steps": 1525, "total_steps": 9960, "loss": 0.1642, "lr": 9.91463805999504e-06, "epoch": 3.0622489959839356, "percentage": 15.31, "elapsed_time": "0:03:53", "remaining_time": "0:21:34", "throughput": 4067.92, "total_tokens": 951744} |
| {"current_steps": 1530, "total_steps": 9960, "loss": 0.1516, "lr": 9.913018427695257e-06, "epoch": 3.072289156626506, "percentage": 15.36, "elapsed_time": "0:03:54", "remaining_time": "0:21:33", "throughput": 4068.61, "total_tokens": 955136} |
| {"current_steps": 1535, "total_steps": 9960, "loss": 0.1336, "lr": 9.911383709039876e-06, "epoch": 3.0823293172690764, "percentage": 15.41, "elapsed_time": "0:03:55", "remaining_time": "0:21:32", "throughput": 4069.25, "total_tokens": 958240} |
| {"current_steps": 1540, "total_steps": 9960, "loss": 0.1601, "lr": 9.909733909048606e-06, "epoch": 3.0923694779116464, "percentage": 15.46, "elapsed_time": "0:03:56", "remaining_time": "0:21:31", "throughput": 4069.91, "total_tokens": 961056} |
| {"current_steps": 1545, "total_steps": 9960, "loss": 0.1588, "lr": 9.908069032787473e-06, "epoch": 3.102409638554217, "percentage": 15.51, "elapsed_time": "0:03:56", "remaining_time": "0:21:29", "throughput": 4069.79, "total_tokens": 963808} |
| {"current_steps": 1550, "total_steps": 9960, "loss": 0.1487, "lr": 9.906389085368792e-06, "epoch": 3.112449799196787, "percentage": 15.56, "elapsed_time": "0:03:57", "remaining_time": "0:21:29", "throughput": 4070.09, "total_tokens": 967168} |
| {"current_steps": 1555, "total_steps": 9960, "loss": 0.1448, "lr": 9.904694071951167e-06, "epoch": 3.1224899598393576, "percentage": 15.61, "elapsed_time": "0:03:58", "remaining_time": "0:21:28", "throughput": 4071.07, "total_tokens": 970272} |
| {"current_steps": 1560, "total_steps": 9960, "loss": 0.1227, "lr": 9.902983997739453e-06, "epoch": 3.1325301204819276, "percentage": 15.66, "elapsed_time": "0:03:58", "remaining_time": "0:21:26", "throughput": 4071.31, "total_tokens": 972960} |
| {"current_steps": 1565, "total_steps": 9960, "loss": 0.1283, "lr": 9.90125886798476e-06, "epoch": 3.142570281124498, "percentage": 15.71, "elapsed_time": "0:03:59", "remaining_time": "0:21:25", "throughput": 4072.02, "total_tokens": 976064} |
| {"current_steps": 1570, "total_steps": 9960, "loss": 0.1485, "lr": 9.899518687984424e-06, "epoch": 3.1526104417670684, "percentage": 15.76, "elapsed_time": "0:04:00", "remaining_time": "0:21:24", "throughput": 4072.29, "total_tokens": 979168} |
| {"current_steps": 1575, "total_steps": 9960, "loss": 0.1622, "lr": 9.897763463082e-06, "epoch": 3.1626506024096384, "percentage": 15.81, "elapsed_time": "0:04:01", "remaining_time": "0:21:24", "throughput": 4072.44, "total_tokens": 982528} |
| {"current_steps": 1580, "total_steps": 9960, "loss": 0.1166, "lr": 9.89599319866724e-06, "epoch": 3.1726907630522088, "percentage": 15.86, "elapsed_time": "0:04:01", "remaining_time": "0:21:23", "throughput": 4072.69, "total_tokens": 985472} |
| {"current_steps": 1585, "total_steps": 9960, "loss": 0.1433, "lr": 9.894207900176074e-06, "epoch": 3.182730923694779, "percentage": 15.91, "elapsed_time": "0:04:02", "remaining_time": "0:21:22", "throughput": 4073.57, "total_tokens": 988448} |
| {"current_steps": 1590, "total_steps": 9960, "loss": 0.1531, "lr": 9.892407573090603e-06, "epoch": 3.1927710843373496, "percentage": 15.96, "elapsed_time": "0:04:03", "remaining_time": "0:21:21", "throughput": 4073.91, "total_tokens": 991392} |
| {"current_steps": 1595, "total_steps": 9960, "loss": 0.191, "lr": 9.890592222939071e-06, "epoch": 3.2028112449799195, "percentage": 16.01, "elapsed_time": "0:04:03", "remaining_time": "0:21:19", "throughput": 4073.75, "total_tokens": 993760} |
| {"current_steps": 1600, "total_steps": 9960, "loss": 0.1723, "lr": 9.888761855295855e-06, "epoch": 3.21285140562249, "percentage": 16.06, "elapsed_time": "0:04:04", "remaining_time": "0:21:18", "throughput": 4074.44, "total_tokens": 997216} |
| {"current_steps": 1605, "total_steps": 9960, "loss": 0.1387, "lr": 9.886916475781448e-06, "epoch": 3.2228915662650603, "percentage": 16.11, "elapsed_time": "0:04:05", "remaining_time": "0:21:17", "throughput": 4074.51, "total_tokens": 1000160} |
| {"current_steps": 1610, "total_steps": 9960, "loss": 0.1349, "lr": 9.885056090062436e-06, "epoch": 3.2329317269076308, "percentage": 16.16, "elapsed_time": "0:04:06", "remaining_time": "0:21:17", "throughput": 4074.87, "total_tokens": 1003424} |
| {"current_steps": 1615, "total_steps": 9960, "loss": 0.1236, "lr": 9.883180703851488e-06, "epoch": 3.2429718875502007, "percentage": 16.21, "elapsed_time": "0:04:06", "remaining_time": "0:21:15", "throughput": 4074.88, "total_tokens": 1006080} |
| {"current_steps": 1620, "total_steps": 9960, "loss": 0.1659, "lr": 9.881290322907332e-06, "epoch": 3.253012048192771, "percentage": 16.27, "elapsed_time": "0:04:07", "remaining_time": "0:21:15", "throughput": 4075.58, "total_tokens": 1009472} |
| {"current_steps": 1625, "total_steps": 9960, "loss": 0.1175, "lr": 9.879384953034745e-06, "epoch": 3.2630522088353415, "percentage": 16.32, "elapsed_time": "0:04:08", "remaining_time": "0:21:14", "throughput": 4076.43, "total_tokens": 1012576} |
| {"current_steps": 1630, "total_steps": 9960, "loss": 0.1796, "lr": 9.877464600084521e-06, "epoch": 3.2730923694779115, "percentage": 16.37, "elapsed_time": "0:04:09", "remaining_time": "0:21:13", "throughput": 4077.08, "total_tokens": 1015744} |
| {"current_steps": 1635, "total_steps": 9960, "loss": 0.1309, "lr": 9.875529269953474e-06, "epoch": 3.283132530120482, "percentage": 16.42, "elapsed_time": "0:04:09", "remaining_time": "0:21:11", "throughput": 4077.15, "total_tokens": 1018336} |
| {"current_steps": 1640, "total_steps": 9960, "loss": 0.152, "lr": 9.873578968584399e-06, "epoch": 3.2931726907630523, "percentage": 16.47, "elapsed_time": "0:04:10", "remaining_time": "0:21:10", "throughput": 4077.21, "total_tokens": 1021056} |
| {"current_steps": 1645, "total_steps": 9960, "loss": 0.1473, "lr": 9.871613701966067e-06, "epoch": 3.3032128514056227, "percentage": 16.52, "elapsed_time": "0:04:11", "remaining_time": "0:21:09", "throughput": 4078.04, "total_tokens": 1024576} |
| {"current_steps": 1650, "total_steps": 9960, "loss": 0.1158, "lr": 9.869633476133205e-06, "epoch": 3.3132530120481927, "percentage": 16.57, "elapsed_time": "0:04:11", "remaining_time": "0:21:09", "throughput": 4078.74, "total_tokens": 1027840} |
| {"current_steps": 1655, "total_steps": 9960, "loss": 0.1114, "lr": 9.867638297166467e-06, "epoch": 3.323293172690763, "percentage": 16.62, "elapsed_time": "0:04:12", "remaining_time": "0:21:08", "throughput": 4079.55, "total_tokens": 1031232} |
| {"current_steps": 1660, "total_steps": 9960, "loss": 0.1627, "lr": 9.865628171192432e-06, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:04:13", "remaining_time": "0:21:07", "throughput": 4079.91, "total_tokens": 1034624} |
| {"current_steps": 1665, "total_steps": 9960, "loss": 0.1279, "lr": 9.863603104383575e-06, "epoch": 3.3433734939759034, "percentage": 16.72, "elapsed_time": "0:04:14", "remaining_time": "0:21:07", "throughput": 4080.37, "total_tokens": 1037792} |
| {"current_steps": 1670, "total_steps": 9960, "loss": 0.1248, "lr": 9.861563102958243e-06, "epoch": 3.353413654618474, "percentage": 16.77, "elapsed_time": "0:04:14", "remaining_time": "0:21:05", "throughput": 4080.04, "total_tokens": 1040352} |
| {"current_steps": 1675, "total_steps": 9960, "loss": 0.1567, "lr": 9.859508173180653e-06, "epoch": 3.3634538152610443, "percentage": 16.82, "elapsed_time": "0:04:15", "remaining_time": "0:21:04", "throughput": 4080.16, "total_tokens": 1043328} |
| {"current_steps": 1680, "total_steps": 9960, "loss": 0.1332, "lr": 9.857438321360853e-06, "epoch": 3.3734939759036147, "percentage": 16.87, "elapsed_time": "0:04:16", "remaining_time": "0:21:04", "throughput": 4080.7, "total_tokens": 1046912} |
| {"current_steps": 1685, "total_steps": 9960, "loss": 0.1333, "lr": 9.855353553854719e-06, "epoch": 3.3835341365461846, "percentage": 16.92, "elapsed_time": "0:04:17", "remaining_time": "0:21:03", "throughput": 4081.65, "total_tokens": 1050272} |
| {"current_steps": 1690, "total_steps": 9960, "loss": 0.1552, "lr": 9.853253877063922e-06, "epoch": 3.393574297188755, "percentage": 16.97, "elapsed_time": "0:04:17", "remaining_time": "0:21:02", "throughput": 4081.02, "total_tokens": 1052512} |
| {"current_steps": 1695, "total_steps": 9960, "loss": 0.1335, "lr": 9.85113929743592e-06, "epoch": 3.4036144578313254, "percentage": 17.02, "elapsed_time": "0:04:18", "remaining_time": "0:21:01", "throughput": 4081.87, "total_tokens": 1056000} |
| {"current_steps": 1700, "total_steps": 9960, "loss": 0.179, "lr": 9.849009821463931e-06, "epoch": 3.4136546184738954, "percentage": 17.07, "elapsed_time": "0:04:19", "remaining_time": "0:21:00", "throughput": 4081.8, "total_tokens": 1058624} |
| {"current_steps": 1705, "total_steps": 9960, "loss": 0.1076, "lr": 9.846865455686915e-06, "epoch": 3.423694779116466, "percentage": 17.12, "elapsed_time": "0:04:20", "remaining_time": "0:20:58", "throughput": 4081.67, "total_tokens": 1061280} |
| {"current_steps": 1710, "total_steps": 9960, "loss": 0.1559, "lr": 9.844706206689557e-06, "epoch": 3.433734939759036, "percentage": 17.17, "elapsed_time": "0:04:20", "remaining_time": "0:20:58", "throughput": 4082.2, "total_tokens": 1064576} |
| {"current_steps": 1715, "total_steps": 9960, "loss": 0.1566, "lr": 9.842532081102234e-06, "epoch": 3.4437751004016066, "percentage": 17.22, "elapsed_time": "0:04:21", "remaining_time": "0:20:56", "throughput": 4082.39, "total_tokens": 1067232} |
| {"current_steps": 1720, "total_steps": 9960, "loss": 0.1286, "lr": 9.840343085601018e-06, "epoch": 3.4538152610441766, "percentage": 17.27, "elapsed_time": "0:04:22", "remaining_time": "0:20:56", "throughput": 4082.98, "total_tokens": 1070624} |
| {"current_steps": 1725, "total_steps": 9960, "loss": 0.171, "lr": 9.838139226907631e-06, "epoch": 3.463855421686747, "percentage": 17.32, "elapsed_time": "0:04:23", "remaining_time": "0:20:55", "throughput": 4084.0, "total_tokens": 1074208} |
| {"current_steps": 1730, "total_steps": 9960, "loss": 0.1862, "lr": 9.835920511789441e-06, "epoch": 3.4738955823293174, "percentage": 17.37, "elapsed_time": "0:04:23", "remaining_time": "0:20:55", "throughput": 4085.36, "total_tokens": 1078144} |
| {"current_steps": 1735, "total_steps": 9960, "loss": 0.1129, "lr": 9.833686947059436e-06, "epoch": 3.4839357429718874, "percentage": 17.42, "elapsed_time": "0:04:24", "remaining_time": "0:20:54", "throughput": 4086.52, "total_tokens": 1081728} |
| {"current_steps": 1740, "total_steps": 9960, "loss": 0.1192, "lr": 9.831438539576194e-06, "epoch": 3.4939759036144578, "percentage": 17.47, "elapsed_time": "0:04:25", "remaining_time": "0:20:53", "throughput": 4086.34, "total_tokens": 1084320} |
| {"current_steps": 1745, "total_steps": 9960, "loss": 0.1328, "lr": 9.829175296243885e-06, "epoch": 3.504016064257028, "percentage": 17.52, "elapsed_time": "0:04:26", "remaining_time": "0:20:52", "throughput": 4086.56, "total_tokens": 1087168} |
| {"current_steps": 1750, "total_steps": 9960, "loss": 0.1197, "lr": 9.826897224012221e-06, "epoch": 3.5140562248995986, "percentage": 17.57, "elapsed_time": "0:04:26", "remaining_time": "0:20:51", "throughput": 4087.08, "total_tokens": 1090304} |
| {"current_steps": 1755, "total_steps": 9960, "loss": 0.1532, "lr": 9.82460432987646e-06, "epoch": 3.5240963855421685, "percentage": 17.62, "elapsed_time": "0:04:27", "remaining_time": "0:20:50", "throughput": 4087.58, "total_tokens": 1093248} |
| {"current_steps": 1760, "total_steps": 9960, "loss": 0.1532, "lr": 9.822296620877364e-06, "epoch": 3.534136546184739, "percentage": 17.67, "elapsed_time": "0:04:28", "remaining_time": "0:20:49", "throughput": 4088.14, "total_tokens": 1096160} |
| {"current_steps": 1765, "total_steps": 9960, "loss": 0.1268, "lr": 9.819974104101198e-06, "epoch": 3.5441767068273093, "percentage": 17.72, "elapsed_time": "0:04:28", "remaining_time": "0:20:48", "throughput": 4088.3, "total_tokens": 1099712} |
| {"current_steps": 1770, "total_steps": 9960, "loss": 0.1301, "lr": 9.817636786679682e-06, "epoch": 3.5542168674698793, "percentage": 17.77, "elapsed_time": "0:04:29", "remaining_time": "0:20:47", "throughput": 4088.19, "total_tokens": 1102528} |
| {"current_steps": 1775, "total_steps": 9960, "loss": 0.1342, "lr": 9.815284675789999e-06, "epoch": 3.5642570281124497, "percentage": 17.82, "elapsed_time": "0:04:30", "remaining_time": "0:20:47", "throughput": 4089.23, "total_tokens": 1106368} |
| {"current_steps": 1780, "total_steps": 9960, "loss": 0.1162, "lr": 9.81291777865475e-06, "epoch": 3.57429718875502, "percentage": 17.87, "elapsed_time": "0:04:31", "remaining_time": "0:20:46", "throughput": 4089.35, "total_tokens": 1109344} |
| {"current_steps": 1785, "total_steps": 9960, "loss": 0.0825, "lr": 9.810536102541941e-06, "epoch": 3.5843373493975905, "percentage": 17.92, "elapsed_time": "0:04:32", "remaining_time": "0:20:45", "throughput": 4089.53, "total_tokens": 1112480} |
| {"current_steps": 1790, "total_steps": 9960, "loss": 0.2169, "lr": 9.808139654764962e-06, "epoch": 3.5943775100401605, "percentage": 17.97, "elapsed_time": "0:04:32", "remaining_time": "0:20:44", "throughput": 4089.66, "total_tokens": 1115104} |
| {"current_steps": 1795, "total_steps": 9960, "loss": 0.21, "lr": 9.80572844268256e-06, "epoch": 3.604417670682731, "percentage": 18.02, "elapsed_time": "0:04:33", "remaining_time": "0:20:42", "throughput": 4089.63, "total_tokens": 1117376} |
| {"current_steps": 1800, "total_steps": 9960, "loss": 0.1643, "lr": 9.80330247369882e-06, "epoch": 3.6144578313253013, "percentage": 18.07, "elapsed_time": "0:04:33", "remaining_time": "0:20:41", "throughput": 4090.51, "total_tokens": 1120576} |
| {"current_steps": 1805, "total_steps": 9960, "loss": 0.1449, "lr": 9.800861755263141e-06, "epoch": 3.6244979919678713, "percentage": 18.12, "elapsed_time": "0:04:34", "remaining_time": "0:20:40", "throughput": 4091.04, "total_tokens": 1123712} |
| {"current_steps": 1810, "total_steps": 9960, "loss": 0.1334, "lr": 9.79840629487021e-06, "epoch": 3.6345381526104417, "percentage": 18.17, "elapsed_time": "0:04:35", "remaining_time": "0:20:40", "throughput": 4091.56, "total_tokens": 1127232} |
| {"current_steps": 1815, "total_steps": 9960, "loss": 0.1875, "lr": 9.795936100059986e-06, "epoch": 3.644578313253012, "percentage": 18.22, "elapsed_time": "0:04:36", "remaining_time": "0:20:39", "throughput": 4091.5, "total_tokens": 1130016} |
| {"current_steps": 1820, "total_steps": 9960, "loss": 0.1633, "lr": 9.79345117841767e-06, "epoch": 3.6546184738955825, "percentage": 18.27, "elapsed_time": "0:04:37", "remaining_time": "0:20:38", "throughput": 4092.27, "total_tokens": 1133632} |
| {"current_steps": 1825, "total_steps": 9960, "loss": 0.1679, "lr": 9.790951537573686e-06, "epoch": 3.664658634538153, "percentage": 18.32, "elapsed_time": "0:04:37", "remaining_time": "0:20:37", "throughput": 4092.31, "total_tokens": 1136512} |
| {"current_steps": 1830, "total_steps": 9960, "loss": 0.152, "lr": 9.788437185203655e-06, "epoch": 3.674698795180723, "percentage": 18.37, "elapsed_time": "0:04:38", "remaining_time": "0:20:36", "throughput": 4092.32, "total_tokens": 1139424} |
| {"current_steps": 1835, "total_steps": 9960, "loss": 0.1428, "lr": 9.785908129028374e-06, "epoch": 3.6847389558232932, "percentage": 18.42, "elapsed_time": "0:04:39", "remaining_time": "0:20:36", "throughput": 4093.39, "total_tokens": 1142976} |
| {"current_steps": 1840, "total_steps": 9960, "loss": 0.1709, "lr": 9.78336437681379e-06, "epoch": 3.694779116465863, "percentage": 18.47, "elapsed_time": "0:04:40", "remaining_time": "0:20:35", "throughput": 4094.3, "total_tokens": 1146624} |
| {"current_steps": 1845, "total_steps": 9960, "loss": 0.1462, "lr": 9.780805936370976e-06, "epoch": 3.7048192771084336, "percentage": 18.52, "elapsed_time": "0:04:40", "remaining_time": "0:20:34", "throughput": 4095.19, "total_tokens": 1149632} |
| {"current_steps": 1850, "total_steps": 9960, "loss": 0.1669, "lr": 9.77823281555611e-06, "epoch": 3.714859437751004, "percentage": 18.57, "elapsed_time": "0:04:41", "remaining_time": "0:20:34", "throughput": 4096.63, "total_tokens": 1153760} |
| {"current_steps": 1855, "total_steps": 9960, "loss": 0.1465, "lr": 9.775645022270448e-06, "epoch": 3.7248995983935744, "percentage": 18.62, "elapsed_time": "0:04:42", "remaining_time": "0:20:33", "throughput": 4097.37, "total_tokens": 1156992} |
| {"current_steps": 1860, "total_steps": 9960, "loss": 0.1401, "lr": 9.773042564460299e-06, "epoch": 3.734939759036145, "percentage": 18.67, "elapsed_time": "0:04:43", "remaining_time": "0:20:32", "throughput": 4097.61, "total_tokens": 1160032} |
| {"current_steps": 1865, "total_steps": 9960, "loss": 0.1428, "lr": 9.770425450117005e-06, "epoch": 3.744979919678715, "percentage": 18.72, "elapsed_time": "0:04:44", "remaining_time": "0:20:32", "throughput": 4098.44, "total_tokens": 1164128} |
| {"current_steps": 1870, "total_steps": 9960, "loss": 0.123, "lr": 9.767793687276913e-06, "epoch": 3.755020080321285, "percentage": 18.78, "elapsed_time": "0:04:44", "remaining_time": "0:20:31", "throughput": 4098.88, "total_tokens": 1167264} |
| {"current_steps": 1875, "total_steps": 9960, "loss": 0.1539, "lr": 9.76514728402135e-06, "epoch": 3.765060240963855, "percentage": 18.83, "elapsed_time": "0:04:45", "remaining_time": "0:20:30", "throughput": 4098.73, "total_tokens": 1169920} |
| {"current_steps": 1880, "total_steps": 9960, "loss": 0.1462, "lr": 9.762486248476597e-06, "epoch": 3.7751004016064256, "percentage": 18.88, "elapsed_time": "0:04:46", "remaining_time": "0:20:29", "throughput": 4099.1, "total_tokens": 1172640} |
| {"current_steps": 1885, "total_steps": 9960, "loss": 0.1893, "lr": 9.759810588813872e-06, "epoch": 3.785140562248996, "percentage": 18.93, "elapsed_time": "0:04:46", "remaining_time": "0:20:27", "throughput": 4098.79, "total_tokens": 1174816} |
| {"current_steps": 1890, "total_steps": 9960, "loss": 0.1554, "lr": 9.757120313249292e-06, "epoch": 3.7951807228915664, "percentage": 18.98, "elapsed_time": "0:04:47", "remaining_time": "0:20:26", "throughput": 4098.69, "total_tokens": 1177568} |
| {"current_steps": 1895, "total_steps": 9960, "loss": 0.1431, "lr": 9.754415430043864e-06, "epoch": 3.805220883534137, "percentage": 19.03, "elapsed_time": "0:04:48", "remaining_time": "0:20:26", "throughput": 4099.69, "total_tokens": 1181472} |
| {"current_steps": 1900, "total_steps": 9960, "loss": 0.1324, "lr": 9.751695947503442e-06, "epoch": 3.8152610441767068, "percentage": 19.08, "elapsed_time": "0:04:48", "remaining_time": "0:20:25", "throughput": 4099.44, "total_tokens": 1184064} |
| {"current_steps": 1905, "total_steps": 9960, "loss": 0.1494, "lr": 9.748961873978713e-06, "epoch": 3.825301204819277, "percentage": 19.13, "elapsed_time": "0:04:49", "remaining_time": "0:20:24", "throughput": 4099.24, "total_tokens": 1186976} |
| {"current_steps": 1910, "total_steps": 9960, "loss": 0.1644, "lr": 9.74621321786517e-06, "epoch": 3.835341365461847, "percentage": 19.18, "elapsed_time": "0:04:50", "remaining_time": "0:20:23", "throughput": 4099.41, "total_tokens": 1190080} |
| {"current_steps": 1915, "total_steps": 9960, "loss": 0.1484, "lr": 9.743449987603082e-06, "epoch": 3.8453815261044175, "percentage": 19.23, "elapsed_time": "0:04:50", "remaining_time": "0:20:22", "throughput": 4099.73, "total_tokens": 1192800} |
| {"current_steps": 1920, "total_steps": 9960, "loss": 0.1237, "lr": 9.740672191677474e-06, "epoch": 3.855421686746988, "percentage": 19.28, "elapsed_time": "0:04:51", "remaining_time": "0:20:21", "throughput": 4100.23, "total_tokens": 1195936} |
| {"current_steps": 1925, "total_steps": 9960, "loss": 0.1634, "lr": 9.737879838618095e-06, "epoch": 3.8654618473895583, "percentage": 19.33, "elapsed_time": "0:04:52", "remaining_time": "0:20:20", "throughput": 4100.42, "total_tokens": 1199232} |
| {"current_steps": 1930, "total_steps": 9960, "loss": 0.1529, "lr": 9.735072936999392e-06, "epoch": 3.8755020080321287, "percentage": 19.38, "elapsed_time": "0:04:53", "remaining_time": "0:20:19", "throughput": 4101.02, "total_tokens": 1202464} |
| {"current_steps": 1935, "total_steps": 9960, "loss": 0.1659, "lr": 9.732251495440495e-06, "epoch": 3.8855421686746987, "percentage": 19.43, "elapsed_time": "0:04:53", "remaining_time": "0:20:18", "throughput": 4101.83, "total_tokens": 1205632} |
| {"current_steps": 1940, "total_steps": 9960, "loss": 0.1869, "lr": 9.729415522605171e-06, "epoch": 3.895582329317269, "percentage": 19.48, "elapsed_time": "0:04:54", "remaining_time": "0:20:18", "throughput": 4102.57, "total_tokens": 1208768} |
| {"current_steps": 1945, "total_steps": 9960, "loss": 0.14, "lr": 9.726565027201813e-06, "epoch": 3.9056224899598395, "percentage": 19.53, "elapsed_time": "0:04:55", "remaining_time": "0:20:17", "throughput": 4102.61, "total_tokens": 1211872} |
| {"current_steps": 1950, "total_steps": 9960, "loss": 0.1424, "lr": 9.72370001798341e-06, "epoch": 3.9156626506024095, "percentage": 19.58, "elapsed_time": "0:04:56", "remaining_time": "0:20:16", "throughput": 4103.42, "total_tokens": 1215360} |
| {"current_steps": 1955, "total_steps": 9960, "loss": 0.1403, "lr": 9.720820503747517e-06, "epoch": 3.92570281124498, "percentage": 19.63, "elapsed_time": "0:04:56", "remaining_time": "0:20:15", "throughput": 4103.5, "total_tokens": 1218080} |
| {"current_steps": 1960, "total_steps": 9960, "loss": 0.167, "lr": 9.717926493336227e-06, "epoch": 3.9357429718875503, "percentage": 19.68, "elapsed_time": "0:04:57", "remaining_time": "0:20:14", "throughput": 4104.05, "total_tokens": 1221216} |
| {"current_steps": 1965, "total_steps": 9960, "loss": 0.1711, "lr": 9.715017995636151e-06, "epoch": 3.9457831325301207, "percentage": 19.73, "elapsed_time": "0:04:58", "remaining_time": "0:20:13", "throughput": 4104.2, "total_tokens": 1224096} |
| {"current_steps": 1970, "total_steps": 9960, "loss": 0.1591, "lr": 9.712095019578382e-06, "epoch": 3.9558232931726907, "percentage": 19.78, "elapsed_time": "0:04:59", "remaining_time": "0:20:12", "throughput": 4105.13, "total_tokens": 1227584} |
| {"current_steps": 1975, "total_steps": 9960, "loss": 0.1023, "lr": 9.70915757413847e-06, "epoch": 3.965863453815261, "percentage": 19.83, "elapsed_time": "0:04:59", "remaining_time": "0:20:12", "throughput": 4105.02, "total_tokens": 1230592} |
| {"current_steps": 1980, "total_steps": 9960, "loss": 0.1923, "lr": 9.706205668336404e-06, "epoch": 3.9759036144578315, "percentage": 19.88, "elapsed_time": "0:05:00", "remaining_time": "0:20:11", "throughput": 4105.98, "total_tokens": 1234592} |
| {"current_steps": 1985, "total_steps": 9960, "loss": 0.1158, "lr": 9.703239311236567e-06, "epoch": 3.9859437751004014, "percentage": 19.93, "elapsed_time": "0:05:01", "remaining_time": "0:20:11", "throughput": 4107.34, "total_tokens": 1238464} |
| {"current_steps": 1990, "total_steps": 9960, "loss": 0.1786, "lr": 9.700258511947722e-06, "epoch": 3.995983935742972, "percentage": 19.98, "elapsed_time": "0:05:02", "remaining_time": "0:20:10", "throughput": 4108.06, "total_tokens": 1241760} |
| {"current_steps": 1992, "total_steps": 9960, "eval_loss": 0.16210927069187164, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:05:10", "remaining_time": "0:20:42", "throughput": 4001.22, "total_tokens": 1242912} |
| {"current_steps": 1995, "total_steps": 9960, "loss": 0.1517, "lr": 9.697263279622982e-06, "epoch": 4.006024096385542, "percentage": 20.03, "elapsed_time": "0:05:12", "remaining_time": "0:20:47", "throughput": 3986.07, "total_tokens": 1245120} |
| {"current_steps": 2000, "total_steps": 9960, "loss": 0.1196, "lr": 9.694253623459773e-06, "epoch": 4.016064257028113, "percentage": 20.08, "elapsed_time": "0:05:13", "remaining_time": "0:20:46", "throughput": 3984.98, "total_tokens": 1247680} |
| {"current_steps": 2005, "total_steps": 9960, "loss": 0.1008, "lr": 9.691229552699817e-06, "epoch": 4.026104417670683, "percentage": 20.13, "elapsed_time": "0:05:13", "remaining_time": "0:20:45", "throughput": 3985.58, "total_tokens": 1250944} |
| {"current_steps": 2010, "total_steps": 9960, "loss": 0.0652, "lr": 9.688191076629096e-06, "epoch": 4.036144578313253, "percentage": 20.18, "elapsed_time": "0:05:14", "remaining_time": "0:20:44", "throughput": 3986.34, "total_tokens": 1253888} |
| {"current_steps": 2015, "total_steps": 9960, "loss": 0.1276, "lr": 9.685138204577829e-06, "epoch": 4.046184738955823, "percentage": 20.23, "elapsed_time": "0:05:15", "remaining_time": "0:20:43", "throughput": 3987.19, "total_tokens": 1257312} |
| {"current_steps": 2020, "total_steps": 9960, "loss": 0.1594, "lr": 9.682070945920437e-06, "epoch": 4.056224899598393, "percentage": 20.28, "elapsed_time": "0:05:16", "remaining_time": "0:20:42", "throughput": 3988.22, "total_tokens": 1260320} |
| {"current_steps": 2025, "total_steps": 9960, "loss": 0.1536, "lr": 9.678989310075524e-06, "epoch": 4.066265060240964, "percentage": 20.33, "elapsed_time": "0:05:16", "remaining_time": "0:20:41", "throughput": 3989.12, "total_tokens": 1263968} |
| {"current_steps": 2030, "total_steps": 9960, "loss": 0.2476, "lr": 9.675893306505834e-06, "epoch": 4.076305220883534, "percentage": 20.38, "elapsed_time": "0:05:17", "remaining_time": "0:20:40", "throughput": 3989.54, "total_tokens": 1266912} |
| {"current_steps": 2035, "total_steps": 9960, "loss": 0.1311, "lr": 9.672782944718234e-06, "epoch": 4.086345381526105, "percentage": 20.43, "elapsed_time": "0:05:18", "remaining_time": "0:20:39", "throughput": 3990.05, "total_tokens": 1270016} |
| {"current_steps": 2040, "total_steps": 9960, "loss": 0.1124, "lr": 9.669658234263682e-06, "epoch": 4.096385542168675, "percentage": 20.48, "elapsed_time": "0:05:19", "remaining_time": "0:20:39", "throughput": 3991.13, "total_tokens": 1273984} |
| {"current_steps": 2045, "total_steps": 9960, "loss": 0.1372, "lr": 9.666519184737193e-06, "epoch": 4.106425702811245, "percentage": 20.53, "elapsed_time": "0:05:19", "remaining_time": "0:20:38", "throughput": 3991.48, "total_tokens": 1276992} |
| {"current_steps": 2050, "total_steps": 9960, "loss": 0.1124, "lr": 9.663365805777815e-06, "epoch": 4.116465863453815, "percentage": 20.58, "elapsed_time": "0:05:20", "remaining_time": "0:20:36", "throughput": 3991.83, "total_tokens": 1279520} |
| {"current_steps": 2055, "total_steps": 9960, "loss": 0.1087, "lr": 9.660198107068597e-06, "epoch": 4.126506024096385, "percentage": 20.63, "elapsed_time": "0:05:21", "remaining_time": "0:20:35", "throughput": 3992.36, "total_tokens": 1282496} |
| {"current_steps": 2060, "total_steps": 9960, "loss": 0.1004, "lr": 9.657016098336557e-06, "epoch": 4.136546184738956, "percentage": 20.68, "elapsed_time": "0:05:21", "remaining_time": "0:20:34", "throughput": 3992.42, "total_tokens": 1284960} |
| {"current_steps": 2065, "total_steps": 9960, "loss": 0.1622, "lr": 9.65381978935266e-06, "epoch": 4.146586345381526, "percentage": 20.73, "elapsed_time": "0:05:22", "remaining_time": "0:20:33", "throughput": 3993.42, "total_tokens": 1288544} |
| {"current_steps": 2070, "total_steps": 9960, "loss": 0.1515, "lr": 9.650609189931778e-06, "epoch": 4.156626506024097, "percentage": 20.78, "elapsed_time": "0:05:23", "remaining_time": "0:20:32", "throughput": 3994.26, "total_tokens": 1291904} |
| {"current_steps": 2075, "total_steps": 9960, "loss": 0.1402, "lr": 9.647384309932665e-06, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:05:24", "remaining_time": "0:20:31", "throughput": 3994.33, "total_tokens": 1294880} |
| {"current_steps": 2080, "total_steps": 9960, "loss": 0.194, "lr": 9.644145159257928e-06, "epoch": 4.176706827309237, "percentage": 20.88, "elapsed_time": "0:05:24", "remaining_time": "0:20:31", "throughput": 3995.39, "total_tokens": 1298432} |
| {"current_steps": 2085, "total_steps": 9960, "loss": 0.1261, "lr": 9.640891747853995e-06, "epoch": 4.186746987951807, "percentage": 20.93, "elapsed_time": "0:05:25", "remaining_time": "0:20:30", "throughput": 3996.17, "total_tokens": 1301568} |
| {"current_steps": 2090, "total_steps": 9960, "loss": 0.1632, "lr": 9.63762408571108e-06, "epoch": 4.196787148594377, "percentage": 20.98, "elapsed_time": "0:05:26", "remaining_time": "0:20:28", "throughput": 3996.56, "total_tokens": 1304288} |
| {"current_steps": 2095, "total_steps": 9960, "loss": 0.1517, "lr": 9.634342182863163e-06, "epoch": 4.206827309236948, "percentage": 21.03, "elapsed_time": "0:05:26", "remaining_time": "0:20:27", "throughput": 3996.65, "total_tokens": 1306784} |
| {"current_steps": 2100, "total_steps": 9960, "loss": 0.1451, "lr": 9.63104604938795e-06, "epoch": 4.216867469879518, "percentage": 21.08, "elapsed_time": "0:05:27", "remaining_time": "0:20:26", "throughput": 3997.29, "total_tokens": 1309760} |
| {"current_steps": 2105, "total_steps": 9960, "loss": 0.1158, "lr": 9.627735695406842e-06, "epoch": 4.2269076305220885, "percentage": 21.13, "elapsed_time": "0:05:28", "remaining_time": "0:20:25", "throughput": 3997.8, "total_tokens": 1312928} |
| {"current_steps": 2110, "total_steps": 9960, "loss": 0.1374, "lr": 9.62441113108491e-06, "epoch": 4.236947791164659, "percentage": 21.18, "elapsed_time": "0:05:29", "remaining_time": "0:20:24", "throughput": 3998.58, "total_tokens": 1316192} |
| {"current_steps": 2115, "total_steps": 9960, "loss": 0.1737, "lr": 9.621072366630859e-06, "epoch": 4.246987951807229, "percentage": 21.23, "elapsed_time": "0:05:29", "remaining_time": "0:20:23", "throughput": 3999.16, "total_tokens": 1319072} |
| {"current_steps": 2120, "total_steps": 9960, "loss": 0.1038, "lr": 9.617719412297002e-06, "epoch": 4.257028112449799, "percentage": 21.29, "elapsed_time": "0:05:30", "remaining_time": "0:20:22", "throughput": 3999.15, "total_tokens": 1321760} |
| {"current_steps": 2125, "total_steps": 9960, "loss": 0.1736, "lr": 9.614352278379217e-06, "epoch": 4.267068273092369, "percentage": 21.34, "elapsed_time": "0:05:31", "remaining_time": "0:20:21", "throughput": 4000.15, "total_tokens": 1325600} |
| {"current_steps": 2130, "total_steps": 9960, "loss": 0.1458, "lr": 9.610970975216933e-06, "epoch": 4.27710843373494, "percentage": 21.39, "elapsed_time": "0:05:32", "remaining_time": "0:20:21", "throughput": 4000.98, "total_tokens": 1328992} |
| {"current_steps": 2135, "total_steps": 9960, "loss": 0.1654, "lr": 9.60757551319308e-06, "epoch": 4.28714859437751, "percentage": 21.44, "elapsed_time": "0:05:33", "remaining_time": "0:20:20", "throughput": 4002.36, "total_tokens": 1333152} |
| {"current_steps": 2140, "total_steps": 9960, "loss": 0.1271, "lr": 9.604165902734069e-06, "epoch": 4.2971887550200805, "percentage": 21.49, "elapsed_time": "0:05:33", "remaining_time": "0:20:19", "throughput": 4002.06, "total_tokens": 1335488} |
| {"current_steps": 2145, "total_steps": 9960, "loss": 0.1365, "lr": 9.600742154309756e-06, "epoch": 4.307228915662651, "percentage": 21.54, "elapsed_time": "0:05:34", "remaining_time": "0:20:18", "throughput": 4002.52, "total_tokens": 1338720} |
| {"current_steps": 2150, "total_steps": 9960, "loss": 0.1103, "lr": 9.59730427843341e-06, "epoch": 4.317269076305221, "percentage": 21.59, "elapsed_time": "0:05:35", "remaining_time": "0:20:17", "throughput": 4003.23, "total_tokens": 1342272} |
| {"current_steps": 2155, "total_steps": 9960, "loss": 0.1479, "lr": 9.593852285661684e-06, "epoch": 4.327309236947791, "percentage": 21.64, "elapsed_time": "0:05:35", "remaining_time": "0:20:16", "throughput": 4003.23, "total_tokens": 1344704} |
| {"current_steps": 2160, "total_steps": 9960, "loss": 0.1618, "lr": 9.590386186594574e-06, "epoch": 4.337349397590361, "percentage": 21.69, "elapsed_time": "0:05:36", "remaining_time": "0:20:15", "throughput": 4003.61, "total_tokens": 1347392} |
| {"current_steps": 2165, "total_steps": 9960, "loss": 0.1114, "lr": 9.586905991875397e-06, "epoch": 4.347389558232932, "percentage": 21.74, "elapsed_time": "0:05:37", "remaining_time": "0:20:14", "throughput": 4004.56, "total_tokens": 1350912} |
| {"current_steps": 2170, "total_steps": 9960, "loss": 0.1447, "lr": 9.583411712190749e-06, "epoch": 4.357429718875502, "percentage": 21.79, "elapsed_time": "0:05:38", "remaining_time": "0:20:13", "throughput": 4004.6, "total_tokens": 1353824} |
| {"current_steps": 2175, "total_steps": 9960, "loss": 0.1457, "lr": 9.579903358270482e-06, "epoch": 4.367469879518072, "percentage": 21.84, "elapsed_time": "0:05:38", "remaining_time": "0:20:12", "throughput": 4004.5, "total_tokens": 1356416} |
| {"current_steps": 2180, "total_steps": 9960, "loss": 0.1266, "lr": 9.576380940887661e-06, "epoch": 4.377510040160643, "percentage": 21.89, "elapsed_time": "0:05:39", "remaining_time": "0:20:11", "throughput": 4005.17, "total_tokens": 1359712} |
| {"current_steps": 2185, "total_steps": 9960, "loss": 0.1224, "lr": 9.572844470858537e-06, "epoch": 4.387550200803213, "percentage": 21.94, "elapsed_time": "0:05:40", "remaining_time": "0:20:10", "throughput": 4005.5, "total_tokens": 1362368} |
| {"current_steps": 2190, "total_steps": 9960, "loss": 0.1564, "lr": 9.569293959042513e-06, "epoch": 4.397590361445783, "percentage": 21.99, "elapsed_time": "0:05:40", "remaining_time": "0:20:09", "throughput": 4005.92, "total_tokens": 1365312} |
| {"current_steps": 2195, "total_steps": 9960, "loss": 0.2695, "lr": 9.56572941634211e-06, "epoch": 4.407630522088353, "percentage": 22.04, "elapsed_time": "0:05:41", "remaining_time": "0:20:08", "throughput": 4006.06, "total_tokens": 1368416} |
| {"current_steps": 2200, "total_steps": 9960, "loss": 0.1535, "lr": 9.562150853702931e-06, "epoch": 4.417670682730924, "percentage": 22.09, "elapsed_time": "0:05:42", "remaining_time": "0:20:07", "throughput": 4006.37, "total_tokens": 1371456} |
| {"current_steps": 2205, "total_steps": 9960, "loss": 0.1436, "lr": 9.558558282113634e-06, "epoch": 4.427710843373494, "percentage": 22.14, "elapsed_time": "0:05:43", "remaining_time": "0:20:06", "throughput": 4007.2, "total_tokens": 1375040} |
| {"current_steps": 2210, "total_steps": 9960, "loss": 0.1319, "lr": 9.554951712605891e-06, "epoch": 4.437751004016064, "percentage": 22.19, "elapsed_time": "0:05:44", "remaining_time": "0:20:06", "throughput": 4008.63, "total_tokens": 1379104} |
| {"current_steps": 2215, "total_steps": 9960, "loss": 0.1159, "lr": 9.551331156254358e-06, "epoch": 4.447791164658635, "percentage": 22.24, "elapsed_time": "0:05:44", "remaining_time": "0:20:05", "throughput": 4009.68, "total_tokens": 1382848} |
| {"current_steps": 2220, "total_steps": 9960, "loss": 0.1147, "lr": 9.547696624176642e-06, "epoch": 4.457831325301205, "percentage": 22.29, "elapsed_time": "0:05:45", "remaining_time": "0:20:05", "throughput": 4010.43, "total_tokens": 1386432} |
| {"current_steps": 2225, "total_steps": 9960, "loss": 0.1554, "lr": 9.544048127533262e-06, "epoch": 4.467871485943775, "percentage": 22.34, "elapsed_time": "0:05:46", "remaining_time": "0:20:04", "throughput": 4011.22, "total_tokens": 1390048} |
| {"current_steps": 2230, "total_steps": 9960, "loss": 0.2124, "lr": 9.540385677527617e-06, "epoch": 4.477911646586345, "percentage": 22.39, "elapsed_time": "0:05:47", "remaining_time": "0:20:03", "throughput": 4011.95, "total_tokens": 1393344} |
| {"current_steps": 2235, "total_steps": 9960, "loss": 0.1014, "lr": 9.53670928540596e-06, "epoch": 4.4879518072289155, "percentage": 22.44, "elapsed_time": "0:05:47", "remaining_time": "0:20:02", "throughput": 4012.25, "total_tokens": 1396096} |
| {"current_steps": 2240, "total_steps": 9960, "loss": 0.0831, "lr": 9.533018962457347e-06, "epoch": 4.497991967871486, "percentage": 22.49, "elapsed_time": "0:05:48", "remaining_time": "0:20:01", "throughput": 4012.78, "total_tokens": 1399168} |
| {"current_steps": 2245, "total_steps": 9960, "loss": 0.1206, "lr": 9.529314720013618e-06, "epoch": 4.508032128514056, "percentage": 22.54, "elapsed_time": "0:05:49", "remaining_time": "0:20:01", "throughput": 4013.72, "total_tokens": 1402976} |
| {"current_steps": 2250, "total_steps": 9960, "loss": 0.1574, "lr": 9.52559656944935e-06, "epoch": 4.518072289156627, "percentage": 22.59, "elapsed_time": "0:05:50", "remaining_time": "0:20:00", "throughput": 4014.43, "total_tokens": 1406496} |
| {"current_steps": 2255, "total_steps": 9960, "loss": 0.1341, "lr": 9.521864522181834e-06, "epoch": 4.528112449799197, "percentage": 22.64, "elapsed_time": "0:05:51", "remaining_time": "0:19:59", "throughput": 4014.74, "total_tokens": 1409344} |
| {"current_steps": 2260, "total_steps": 9960, "loss": 0.1485, "lr": 9.518118589671025e-06, "epoch": 4.538152610441767, "percentage": 22.69, "elapsed_time": "0:05:51", "remaining_time": "0:19:58", "throughput": 4015.19, "total_tokens": 1412544} |
| {"current_steps": 2265, "total_steps": 9960, "loss": 0.1299, "lr": 9.514358783419518e-06, "epoch": 4.548192771084337, "percentage": 22.74, "elapsed_time": "0:05:52", "remaining_time": "0:19:57", "throughput": 4015.74, "total_tokens": 1415680} |
| {"current_steps": 2270, "total_steps": 9960, "loss": 0.2371, "lr": 9.510585114972518e-06, "epoch": 4.5582329317269075, "percentage": 22.79, "elapsed_time": "0:05:53", "remaining_time": "0:19:56", "throughput": 4016.07, "total_tokens": 1419040} |
| {"current_steps": 2275, "total_steps": 9960, "loss": 0.1112, "lr": 9.506797595917787e-06, "epoch": 4.568273092369478, "percentage": 22.84, "elapsed_time": "0:05:54", "remaining_time": "0:19:55", "throughput": 4016.67, "total_tokens": 1422048} |
| {"current_steps": 2280, "total_steps": 9960, "loss": 0.1132, "lr": 9.502996237885623e-06, "epoch": 4.578313253012048, "percentage": 22.89, "elapsed_time": "0:05:54", "remaining_time": "0:19:55", "throughput": 4017.47, "total_tokens": 1425504} |
| {"current_steps": 2285, "total_steps": 9960, "loss": 0.1357, "lr": 9.499181052548813e-06, "epoch": 4.588353413654619, "percentage": 22.94, "elapsed_time": "0:05:55", "remaining_time": "0:19:54", "throughput": 4017.82, "total_tokens": 1428608} |
| {"current_steps": 2290, "total_steps": 9960, "loss": 0.1227, "lr": 9.495352051622612e-06, "epoch": 4.598393574297189, "percentage": 22.99, "elapsed_time": "0:05:56", "remaining_time": "0:19:52", "throughput": 4017.38, "total_tokens": 1430752} |
| {"current_steps": 2295, "total_steps": 9960, "loss": 0.1814, "lr": 9.491509246864691e-06, "epoch": 4.608433734939759, "percentage": 23.04, "elapsed_time": "0:05:56", "remaining_time": "0:19:51", "throughput": 4017.8, "total_tokens": 1433600} |
| {"current_steps": 2300, "total_steps": 9960, "loss": 0.1479, "lr": 9.487652650075116e-06, "epoch": 4.618473895582329, "percentage": 23.09, "elapsed_time": "0:05:57", "remaining_time": "0:19:50", "throughput": 4018.3, "total_tokens": 1436352} |
| {"current_steps": 2305, "total_steps": 9960, "loss": 0.1756, "lr": 9.483782273096295e-06, "epoch": 4.628514056224899, "percentage": 23.14, "elapsed_time": "0:05:58", "remaining_time": "0:19:49", "throughput": 4018.66, "total_tokens": 1439296} |
| {"current_steps": 2310, "total_steps": 9960, "loss": 0.1802, "lr": 9.479898127812957e-06, "epoch": 4.63855421686747, "percentage": 23.19, "elapsed_time": "0:05:59", "remaining_time": "0:19:49", "throughput": 4019.9, "total_tokens": 1443456} |
| {"current_steps": 2315, "total_steps": 9960, "loss": 0.1391, "lr": 9.476000226152107e-06, "epoch": 4.64859437751004, "percentage": 23.24, "elapsed_time": "0:05:59", "remaining_time": "0:19:48", "throughput": 4020.26, "total_tokens": 1446624} |
| {"current_steps": 2320, "total_steps": 9960, "loss": 0.1071, "lr": 9.472088580082991e-06, "epoch": 4.658634538152611, "percentage": 23.29, "elapsed_time": "0:06:00", "remaining_time": "0:19:47", "throughput": 4020.62, "total_tokens": 1450016} |
| {"current_steps": 2325, "total_steps": 9960, "loss": 0.1438, "lr": 9.468163201617063e-06, "epoch": 4.668674698795181, "percentage": 23.34, "elapsed_time": "0:06:01", "remaining_time": "0:19:47", "throughput": 4021.37, "total_tokens": 1453856} |
| {"current_steps": 2330, "total_steps": 9960, "loss": 0.0999, "lr": 9.46422410280794e-06, "epoch": 4.678714859437751, "percentage": 23.39, "elapsed_time": "0:06:02", "remaining_time": "0:19:46", "throughput": 4021.78, "total_tokens": 1456832} |
| {"current_steps": 2335, "total_steps": 9960, "loss": 0.2223, "lr": 9.460271295751373e-06, "epoch": 4.688755020080321, "percentage": 23.44, "elapsed_time": "0:06:02", "remaining_time": "0:19:44", "throughput": 4021.96, "total_tokens": 1459488} |
| {"current_steps": 2340, "total_steps": 9960, "loss": 0.1433, "lr": 9.456304792585207e-06, "epoch": 4.698795180722891, "percentage": 23.49, "elapsed_time": "0:06:03", "remaining_time": "0:19:43", "throughput": 4022.13, "total_tokens": 1462400} |
| {"current_steps": 2345, "total_steps": 9960, "loss": 0.1415, "lr": 9.452324605489344e-06, "epoch": 4.708835341365462, "percentage": 23.54, "elapsed_time": "0:06:04", "remaining_time": "0:19:42", "throughput": 4022.16, "total_tokens": 1465248} |
| {"current_steps": 2350, "total_steps": 9960, "loss": 0.1313, "lr": 9.448330746685704e-06, "epoch": 4.718875502008032, "percentage": 23.59, "elapsed_time": "0:06:05", "remaining_time": "0:19:41", "throughput": 4022.25, "total_tokens": 1468128} |
| {"current_steps": 2355, "total_steps": 9960, "loss": 0.2186, "lr": 9.444323228438186e-06, "epoch": 4.728915662650603, "percentage": 23.64, "elapsed_time": "0:06:05", "remaining_time": "0:19:40", "throughput": 4022.44, "total_tokens": 1471040} |
| {"current_steps": 2360, "total_steps": 9960, "loss": 0.1963, "lr": 9.440302063052638e-06, "epoch": 4.738955823293173, "percentage": 23.69, "elapsed_time": "0:06:06", "remaining_time": "0:19:39", "throughput": 4022.8, "total_tokens": 1473568} |
| {"current_steps": 2365, "total_steps": 9960, "loss": 0.1266, "lr": 9.436267262876808e-06, "epoch": 4.7489959839357425, "percentage": 23.74, "elapsed_time": "0:06:07", "remaining_time": "0:19:38", "throughput": 4023.71, "total_tokens": 1477184} |
| {"current_steps": 2370, "total_steps": 9960, "loss": 0.1391, "lr": 9.43221884030032e-06, "epoch": 4.759036144578313, "percentage": 23.8, "elapsed_time": "0:06:07", "remaining_time": "0:19:38", "throughput": 4024.38, "total_tokens": 1480512} |
| {"current_steps": 2375, "total_steps": 9960, "loss": 0.1569, "lr": 9.428156807754622e-06, "epoch": 4.769076305220883, "percentage": 23.85, "elapsed_time": "0:06:08", "remaining_time": "0:19:37", "throughput": 4024.91, "total_tokens": 1483776} |
| {"current_steps": 2380, "total_steps": 9960, "loss": 0.1241, "lr": 9.424081177712955e-06, "epoch": 4.779116465863454, "percentage": 23.9, "elapsed_time": "0:06:09", "remaining_time": "0:19:36", "throughput": 4025.14, "total_tokens": 1486464} |
| {"current_steps": 2385, "total_steps": 9960, "loss": 0.1112, "lr": 9.419991962690317e-06, "epoch": 4.789156626506024, "percentage": 23.95, "elapsed_time": "0:06:09", "remaining_time": "0:19:34", "throughput": 4025.28, "total_tokens": 1489056} |
| {"current_steps": 2390, "total_steps": 9960, "loss": 0.1215, "lr": 9.415889175243416e-06, "epoch": 4.7991967871485945, "percentage": 24.0, "elapsed_time": "0:06:10", "remaining_time": "0:19:33", "throughput": 4025.47, "total_tokens": 1491808} |
| {"current_steps": 2395, "total_steps": 9960, "loss": 0.1055, "lr": 9.411772827970642e-06, "epoch": 4.809236947791165, "percentage": 24.05, "elapsed_time": "0:06:11", "remaining_time": "0:19:32", "throughput": 4025.84, "total_tokens": 1495008} |
| {"current_steps": 2400, "total_steps": 9960, "loss": 0.1365, "lr": 9.40764293351202e-06, "epoch": 4.8192771084337345, "percentage": 24.1, "elapsed_time": "0:06:12", "remaining_time": "0:19:31", "throughput": 4026.22, "total_tokens": 1497760} |
| {"current_steps": 2405, "total_steps": 9960, "loss": 0.175, "lr": 9.403499504549174e-06, "epoch": 4.829317269076305, "percentage": 24.15, "elapsed_time": "0:06:12", "remaining_time": "0:19:30", "throughput": 4026.39, "total_tokens": 1500544} |
| {"current_steps": 2410, "total_steps": 9960, "loss": 0.1112, "lr": 9.399342553805289e-06, "epoch": 4.839357429718875, "percentage": 24.2, "elapsed_time": "0:06:13", "remaining_time": "0:19:29", "throughput": 4026.3, "total_tokens": 1503232} |
| {"current_steps": 2415, "total_steps": 9960, "loss": 0.1045, "lr": 9.395172094045073e-06, "epoch": 4.849397590361446, "percentage": 24.25, "elapsed_time": "0:06:14", "remaining_time": "0:19:28", "throughput": 4027.12, "total_tokens": 1506432} |
| {"current_steps": 2420, "total_steps": 9960, "loss": 0.1387, "lr": 9.390988138074713e-06, "epoch": 4.859437751004016, "percentage": 24.3, "elapsed_time": "0:06:14", "remaining_time": "0:19:28", "throughput": 4027.94, "total_tokens": 1510336} |
| {"current_steps": 2425, "total_steps": 9960, "loss": 0.2045, "lr": 9.38679069874184e-06, "epoch": 4.8694779116465865, "percentage": 24.35, "elapsed_time": "0:06:15", "remaining_time": "0:19:27", "throughput": 4027.96, "total_tokens": 1512928} |
| {"current_steps": 2430, "total_steps": 9960, "loss": 0.1364, "lr": 9.382579788935487e-06, "epoch": 4.879518072289157, "percentage": 24.4, "elapsed_time": "0:06:16", "remaining_time": "0:19:26", "throughput": 4028.19, "total_tokens": 1515968} |
| {"current_steps": 2435, "total_steps": 9960, "loss": 0.1537, "lr": 9.378355421586053e-06, "epoch": 4.889558232931726, "percentage": 24.45, "elapsed_time": "0:06:17", "remaining_time": "0:19:25", "throughput": 4028.59, "total_tokens": 1519168} |
| {"current_steps": 2440, "total_steps": 9960, "loss": 0.1241, "lr": 9.374117609665263e-06, "epoch": 4.899598393574297, "percentage": 24.5, "elapsed_time": "0:06:17", "remaining_time": "0:19:24", "throughput": 4029.21, "total_tokens": 1522432} |
| {"current_steps": 2445, "total_steps": 9960, "loss": 0.1524, "lr": 9.369866366186116e-06, "epoch": 4.909638554216867, "percentage": 24.55, "elapsed_time": "0:06:18", "remaining_time": "0:19:23", "throughput": 4030.05, "total_tokens": 1525696} |
| {"current_steps": 2450, "total_steps": 9960, "loss": 0.1753, "lr": 9.365601704202869e-06, "epoch": 4.919678714859438, "percentage": 24.6, "elapsed_time": "0:06:19", "remaining_time": "0:19:22", "throughput": 4030.35, "total_tokens": 1528736} |
| {"current_steps": 2455, "total_steps": 9960, "loss": 0.1714, "lr": 9.36132363681097e-06, "epoch": 4.929718875502008, "percentage": 24.65, "elapsed_time": "0:06:20", "remaining_time": "0:19:21", "throughput": 4030.55, "total_tokens": 1531648} |
| {"current_steps": 2460, "total_steps": 9960, "loss": 0.1605, "lr": 9.35703217714704e-06, "epoch": 4.9397590361445785, "percentage": 24.7, "elapsed_time": "0:06:20", "remaining_time": "0:19:20", "throughput": 4030.98, "total_tokens": 1534720} |
| {"current_steps": 2465, "total_steps": 9960, "loss": 0.195, "lr": 9.35272733838882e-06, "epoch": 4.949799196787149, "percentage": 24.75, "elapsed_time": "0:06:21", "remaining_time": "0:19:19", "throughput": 4030.82, "total_tokens": 1536928} |
| {"current_steps": 2470, "total_steps": 9960, "loss": 0.1326, "lr": 9.348409133755137e-06, "epoch": 4.959839357429718, "percentage": 24.8, "elapsed_time": "0:06:21", "remaining_time": "0:19:18", "throughput": 4031.1, "total_tokens": 1539648} |
| {"current_steps": 2475, "total_steps": 9960, "loss": 0.1515, "lr": 9.344077576505853e-06, "epoch": 4.969879518072289, "percentage": 24.85, "elapsed_time": "0:06:22", "remaining_time": "0:19:17", "throughput": 4031.93, "total_tokens": 1543552} |
| {"current_steps": 2480, "total_steps": 9960, "loss": 0.1143, "lr": 9.339732679941842e-06, "epoch": 4.979919678714859, "percentage": 24.9, "elapsed_time": "0:06:23", "remaining_time": "0:19:17", "throughput": 4032.35, "total_tokens": 1546912} |
| {"current_steps": 2485, "total_steps": 9960, "loss": 0.1388, "lr": 9.335374457404928e-06, "epoch": 4.98995983935743, "percentage": 24.95, "elapsed_time": "0:06:24", "remaining_time": "0:19:16", "throughput": 4033.04, "total_tokens": 1550688} |
| {"current_steps": 2490, "total_steps": 9960, "loss": 0.1338, "lr": 9.331002922277865e-06, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:06:25", "remaining_time": "0:19:15", "throughput": 4033.05, "total_tokens": 1553472} |
| {"current_steps": 2495, "total_steps": 9960, "loss": 0.0774, "lr": 9.326618087984278e-06, "epoch": 5.01004016064257, "percentage": 25.05, "elapsed_time": "0:06:26", "remaining_time": "0:19:15", "throughput": 4033.08, "total_tokens": 1557056} |
| {"current_steps": 2500, "total_steps": 9960, "loss": 0.1358, "lr": 9.322219967988638e-06, "epoch": 5.020080321285141, "percentage": 25.1, "elapsed_time": "0:06:26", "remaining_time": "0:19:14", "throughput": 4033.47, "total_tokens": 1559968} |
| {"current_steps": 2505, "total_steps": 9960, "loss": 0.1477, "lr": 9.317808575796202e-06, "epoch": 5.030120481927711, "percentage": 25.15, "elapsed_time": "0:06:27", "remaining_time": "0:19:13", "throughput": 4033.94, "total_tokens": 1563040} |
| {"current_steps": 2510, "total_steps": 9960, "loss": 0.1027, "lr": 9.313383924952988e-06, "epoch": 5.040160642570281, "percentage": 25.2, "elapsed_time": "0:06:28", "remaining_time": "0:19:12", "throughput": 4034.06, "total_tokens": 1565760} |
| {"current_steps": 2515, "total_steps": 9960, "loss": 0.0806, "lr": 9.308946029045726e-06, "epoch": 5.050200803212851, "percentage": 25.25, "elapsed_time": "0:06:28", "remaining_time": "0:19:11", "throughput": 4034.38, "total_tokens": 1568928} |
| {"current_steps": 2520, "total_steps": 9960, "loss": 0.1202, "lr": 9.304494901701821e-06, "epoch": 5.0602409638554215, "percentage": 25.3, "elapsed_time": "0:06:29", "remaining_time": "0:19:10", "throughput": 4034.62, "total_tokens": 1571808} |
| {"current_steps": 2525, "total_steps": 9960, "loss": 0.1632, "lr": 9.300030556589303e-06, "epoch": 5.070281124497992, "percentage": 25.35, "elapsed_time": "0:06:30", "remaining_time": "0:19:09", "throughput": 4034.93, "total_tokens": 1575200} |
| {"current_steps": 2530, "total_steps": 9960, "loss": 0.1406, "lr": 9.29555300741679e-06, "epoch": 5.080321285140562, "percentage": 25.4, "elapsed_time": "0:06:31", "remaining_time": "0:19:08", "throughput": 4035.76, "total_tokens": 1578880} |
| {"current_steps": 2535, "total_steps": 9960, "loss": 0.1248, "lr": 9.291062267933446e-06, "epoch": 5.090361445783133, "percentage": 25.45, "elapsed_time": "0:06:31", "remaining_time": "0:19:07", "throughput": 4035.91, "total_tokens": 1581120} |
| {"current_steps": 2540, "total_steps": 9960, "loss": 0.1795, "lr": 9.28655835192894e-06, "epoch": 5.100401606425703, "percentage": 25.5, "elapsed_time": "0:06:32", "remaining_time": "0:19:06", "throughput": 4036.47, "total_tokens": 1584064} |
| {"current_steps": 2545, "total_steps": 9960, "loss": 0.1542, "lr": 9.282041273233402e-06, "epoch": 5.110441767068273, "percentage": 25.55, "elapsed_time": "0:06:33", "remaining_time": "0:19:05", "throughput": 4037.27, "total_tokens": 1587744} |
| {"current_steps": 2550, "total_steps": 9960, "loss": 0.1454, "lr": 9.277511045717377e-06, "epoch": 5.120481927710843, "percentage": 25.6, "elapsed_time": "0:06:33", "remaining_time": "0:19:04", "throughput": 4037.55, "total_tokens": 1590624} |
| {"current_steps": 2555, "total_steps": 9960, "loss": 0.1246, "lr": 9.27296768329179e-06, "epoch": 5.1305220883534135, "percentage": 25.65, "elapsed_time": "0:06:34", "remaining_time": "0:19:04", "throughput": 4038.27, "total_tokens": 1594016} |
| {"current_steps": 2560, "total_steps": 9960, "loss": 0.1345, "lr": 9.268411199907898e-06, "epoch": 5.140562248995984, "percentage": 25.7, "elapsed_time": "0:06:35", "remaining_time": "0:19:02", "throughput": 4038.43, "total_tokens": 1596640} |
| {"current_steps": 2565, "total_steps": 9960, "loss": 0.1773, "lr": 9.263841609557247e-06, "epoch": 5.150602409638554, "percentage": 25.75, "elapsed_time": "0:06:36", "remaining_time": "0:19:01", "throughput": 4039.09, "total_tokens": 1599840} |
| {"current_steps": 2570, "total_steps": 9960, "loss": 0.1523, "lr": 9.259258926271632e-06, "epoch": 5.160642570281125, "percentage": 25.8, "elapsed_time": "0:06:36", "remaining_time": "0:19:00", "throughput": 4039.21, "total_tokens": 1602656} |
| {"current_steps": 2575, "total_steps": 9960, "loss": 0.119, "lr": 9.254663164123052e-06, "epoch": 5.170682730923695, "percentage": 25.85, "elapsed_time": "0:06:37", "remaining_time": "0:19:00", "throughput": 4040.03, "total_tokens": 1606176} |
| {"current_steps": 2580, "total_steps": 9960, "loss": 0.1502, "lr": 9.250054337223666e-06, "epoch": 5.180722891566265, "percentage": 25.9, "elapsed_time": "0:06:38", "remaining_time": "0:18:58", "throughput": 4040.28, "total_tokens": 1608768} |
| {"current_steps": 2585, "total_steps": 9960, "loss": 0.1165, "lr": 9.245432459725754e-06, "epoch": 5.190763052208835, "percentage": 25.95, "elapsed_time": "0:06:38", "remaining_time": "0:18:57", "throughput": 4040.49, "total_tokens": 1611168} |
| {"current_steps": 2590, "total_steps": 9960, "loss": 0.1484, "lr": 9.240797545821666e-06, "epoch": 5.2008032128514055, "percentage": 26.0, "elapsed_time": "0:06:39", "remaining_time": "0:18:57", "throughput": 4041.06, "total_tokens": 1614720} |
| {"current_steps": 2595, "total_steps": 9960, "loss": 0.1225, "lr": 9.236149609743786e-06, "epoch": 5.210843373493976, "percentage": 26.05, "elapsed_time": "0:06:40", "remaining_time": "0:18:55", "throughput": 4041.27, "total_tokens": 1617504} |
| {"current_steps": 2600, "total_steps": 9960, "loss": 0.0974, "lr": 9.231488665764485e-06, "epoch": 5.220883534136546, "percentage": 26.1, "elapsed_time": "0:06:40", "remaining_time": "0:18:55", "throughput": 4041.82, "total_tokens": 1620672} |
| {"current_steps": 2605, "total_steps": 9960, "loss": 0.1373, "lr": 9.226814728196072e-06, "epoch": 5.230923694779117, "percentage": 26.15, "elapsed_time": "0:06:41", "remaining_time": "0:18:54", "throughput": 4041.82, "total_tokens": 1623488} |
| {"current_steps": 2610, "total_steps": 9960, "loss": 0.1718, "lr": 9.222127811390765e-06, "epoch": 5.240963855421687, "percentage": 26.2, "elapsed_time": "0:06:42", "remaining_time": "0:18:52", "throughput": 4042.0, "total_tokens": 1626080} |
| {"current_steps": 2615, "total_steps": 9960, "loss": 0.0963, "lr": 9.217427929740625e-06, "epoch": 5.2510040160642575, "percentage": 26.26, "elapsed_time": "0:06:43", "remaining_time": "0:18:52", "throughput": 4042.48, "total_tokens": 1629536} |
| {"current_steps": 2620, "total_steps": 9960, "loss": 0.1076, "lr": 9.212715097677537e-06, "epoch": 5.261044176706827, "percentage": 26.31, "elapsed_time": "0:06:43", "remaining_time": "0:18:51", "throughput": 4043.08, "total_tokens": 1632768} |
| {"current_steps": 2625, "total_steps": 9960, "loss": 0.1039, "lr": 9.207989329673143e-06, "epoch": 5.271084337349397, "percentage": 26.36, "elapsed_time": "0:06:44", "remaining_time": "0:18:50", "throughput": 4043.7, "total_tokens": 1636256} |
| {"current_steps": 2630, "total_steps": 9960, "loss": 0.1435, "lr": 9.203250640238813e-06, "epoch": 5.281124497991968, "percentage": 26.41, "elapsed_time": "0:06:45", "remaining_time": "0:18:49", "throughput": 4044.06, "total_tokens": 1639264} |
| {"current_steps": 2635, "total_steps": 9960, "loss": 0.1152, "lr": 9.198499043925591e-06, "epoch": 5.291164658634538, "percentage": 26.46, "elapsed_time": "0:06:46", "remaining_time": "0:18:48", "throughput": 4044.74, "total_tokens": 1642432} |
| {"current_steps": 2640, "total_steps": 9960, "loss": 0.1229, "lr": 9.193734555324154e-06, "epoch": 5.301204819277109, "percentage": 26.51, "elapsed_time": "0:06:46", "remaining_time": "0:18:47", "throughput": 4045.07, "total_tokens": 1645600} |
| {"current_steps": 2645, "total_steps": 9960, "loss": 0.1259, "lr": 9.18895718906477e-06, "epoch": 5.311244979919679, "percentage": 26.56, "elapsed_time": "0:06:47", "remaining_time": "0:18:46", "throughput": 4045.21, "total_tokens": 1648192} |
| {"current_steps": 2650, "total_steps": 9960, "loss": 0.1592, "lr": 9.184166959817247e-06, "epoch": 5.321285140562249, "percentage": 26.61, "elapsed_time": "0:06:48", "remaining_time": "0:18:46", "throughput": 4045.84, "total_tokens": 1651776} |
| {"current_steps": 2655, "total_steps": 9960, "loss": 0.0776, "lr": 9.179363882290896e-06, "epoch": 5.331325301204819, "percentage": 26.66, "elapsed_time": "0:06:49", "remaining_time": "0:18:45", "throughput": 4046.25, "total_tokens": 1654944} |
| {"current_steps": 2660, "total_steps": 9960, "loss": 0.1496, "lr": 9.17454797123448e-06, "epoch": 5.341365461847389, "percentage": 26.71, "elapsed_time": "0:06:49", "remaining_time": "0:18:44", "throughput": 4046.22, "total_tokens": 1657344} |
| {"current_steps": 2665, "total_steps": 9960, "loss": 0.0683, "lr": 9.169719241436162e-06, "epoch": 5.35140562248996, "percentage": 26.76, "elapsed_time": "0:06:50", "remaining_time": "0:18:42", "throughput": 4046.16, "total_tokens": 1659680} |
| {"current_steps": 2670, "total_steps": 9960, "loss": 0.2095, "lr": 9.164877707723476e-06, "epoch": 5.36144578313253, "percentage": 26.81, "elapsed_time": "0:06:50", "remaining_time": "0:18:41", "throughput": 4046.23, "total_tokens": 1662560} |
| {"current_steps": 2675, "total_steps": 9960, "loss": 0.114, "lr": 9.160023384963271e-06, "epoch": 5.371485943775101, "percentage": 26.86, "elapsed_time": "0:06:51", "remaining_time": "0:18:41", "throughput": 4046.64, "total_tokens": 1665728} |
| {"current_steps": 2680, "total_steps": 9960, "loss": 0.1034, "lr": 9.155156288061666e-06, "epoch": 5.381526104417671, "percentage": 26.91, "elapsed_time": "0:06:52", "remaining_time": "0:18:40", "throughput": 4047.18, "total_tokens": 1669216} |
| {"current_steps": 2685, "total_steps": 9960, "loss": 0.154, "lr": 9.150276431964007e-06, "epoch": 5.391566265060241, "percentage": 26.96, "elapsed_time": "0:06:53", "remaining_time": "0:18:39", "throughput": 4047.73, "total_tokens": 1672768} |
| {"current_steps": 2690, "total_steps": 9960, "loss": 0.1459, "lr": 9.145383831654814e-06, "epoch": 5.401606425702811, "percentage": 27.01, "elapsed_time": "0:06:53", "remaining_time": "0:18:38", "throughput": 4047.62, "total_tokens": 1675520} |
| {"current_steps": 2695, "total_steps": 9960, "loss": 0.0966, "lr": 9.14047850215775e-06, "epoch": 5.411646586345381, "percentage": 27.06, "elapsed_time": "0:06:54", "remaining_time": "0:18:38", "throughput": 4047.88, "total_tokens": 1678784} |
| {"current_steps": 2700, "total_steps": 9960, "loss": 0.1964, "lr": 9.13556045853556e-06, "epoch": 5.421686746987952, "percentage": 27.11, "elapsed_time": "0:06:55", "remaining_time": "0:18:36", "throughput": 4047.97, "total_tokens": 1681376} |
| {"current_steps": 2705, "total_steps": 9960, "loss": 0.1234, "lr": 9.130629715890027e-06, "epoch": 5.431726907630522, "percentage": 27.16, "elapsed_time": "0:06:56", "remaining_time": "0:18:36", "throughput": 4048.59, "total_tokens": 1684864} |
| {"current_steps": 2710, "total_steps": 9960, "loss": 0.1196, "lr": 9.125686289361935e-06, "epoch": 5.4417670682730925, "percentage": 27.21, "elapsed_time": "0:06:57", "remaining_time": "0:18:35", "throughput": 4049.42, "total_tokens": 1688896} |
| {"current_steps": 2715, "total_steps": 9960, "loss": 0.0986, "lr": 9.120730194131011e-06, "epoch": 5.451807228915663, "percentage": 27.26, "elapsed_time": "0:06:57", "remaining_time": "0:18:35", "throughput": 4049.86, "total_tokens": 1692288} |
| {"current_steps": 2720, "total_steps": 9960, "loss": 0.1174, "lr": 9.115761445415887e-06, "epoch": 5.461847389558233, "percentage": 27.31, "elapsed_time": "0:06:58", "remaining_time": "0:18:34", "throughput": 4050.18, "total_tokens": 1695200} |
| {"current_steps": 2725, "total_steps": 9960, "loss": 0.1247, "lr": 9.110780058474052e-06, "epoch": 5.471887550200803, "percentage": 27.36, "elapsed_time": "0:06:59", "remaining_time": "0:18:33", "throughput": 4050.6, "total_tokens": 1698720} |
| {"current_steps": 2730, "total_steps": 9960, "loss": 0.1565, "lr": 9.105786048601795e-06, "epoch": 5.481927710843373, "percentage": 27.41, "elapsed_time": "0:07:00", "remaining_time": "0:18:32", "throughput": 4050.73, "total_tokens": 1701536} |
| {"current_steps": 2735, "total_steps": 9960, "loss": 0.1237, "lr": 9.100779431134175e-06, "epoch": 5.491967871485944, "percentage": 27.46, "elapsed_time": "0:07:00", "remaining_time": "0:18:31", "throughput": 4051.05, "total_tokens": 1704864} |
| {"current_steps": 2740, "total_steps": 9960, "loss": 0.1348, "lr": 9.09576022144496e-06, "epoch": 5.502008032128514, "percentage": 27.51, "elapsed_time": "0:07:01", "remaining_time": "0:18:30", "throughput": 4051.31, "total_tokens": 1708000} |
| {"current_steps": 2745, "total_steps": 9960, "loss": 0.1286, "lr": 9.090728434946584e-06, "epoch": 5.5120481927710845, "percentage": 27.56, "elapsed_time": "0:07:02", "remaining_time": "0:18:30", "throughput": 4051.79, "total_tokens": 1711296} |
| {"current_steps": 2750, "total_steps": 9960, "loss": 0.1311, "lr": 9.085684087090108e-06, "epoch": 5.522088353413655, "percentage": 27.61, "elapsed_time": "0:07:03", "remaining_time": "0:18:29", "throughput": 4052.39, "total_tokens": 1714880} |
| {"current_steps": 2755, "total_steps": 9960, "loss": 0.1346, "lr": 9.080627193365155e-06, "epoch": 5.532128514056225, "percentage": 27.66, "elapsed_time": "0:07:03", "remaining_time": "0:18:28", "throughput": 4052.6, "total_tokens": 1717728} |
| {"current_steps": 2760, "total_steps": 9960, "loss": 0.1556, "lr": 9.075557769299877e-06, "epoch": 5.542168674698795, "percentage": 27.71, "elapsed_time": "0:07:04", "remaining_time": "0:18:27", "throughput": 4053.2, "total_tokens": 1721280} |
| {"current_steps": 2765, "total_steps": 9960, "loss": 0.1214, "lr": 9.070475830460906e-06, "epoch": 5.552208835341365, "percentage": 27.76, "elapsed_time": "0:07:05", "remaining_time": "0:18:26", "throughput": 4053.27, "total_tokens": 1723968} |
| {"current_steps": 2770, "total_steps": 9960, "loss": 0.1406, "lr": 9.065381392453296e-06, "epoch": 5.562248995983936, "percentage": 27.81, "elapsed_time": "0:07:06", "remaining_time": "0:18:26", "throughput": 4053.72, "total_tokens": 1727424} |
| {"current_steps": 2775, "total_steps": 9960, "loss": 0.1231, "lr": 9.060274470920487e-06, "epoch": 5.572289156626506, "percentage": 27.86, "elapsed_time": "0:07:06", "remaining_time": "0:18:25", "throughput": 4054.4, "total_tokens": 1730528} |
| {"current_steps": 2780, "total_steps": 9960, "loss": 0.1405, "lr": 9.055155081544253e-06, "epoch": 5.582329317269076, "percentage": 27.91, "elapsed_time": "0:07:07", "remaining_time": "0:18:24", "throughput": 4055.01, "total_tokens": 1734208} |
| {"current_steps": 2785, "total_steps": 9960, "loss": 0.1144, "lr": 9.050023240044649e-06, "epoch": 5.592369477911647, "percentage": 27.96, "elapsed_time": "0:07:08", "remaining_time": "0:18:23", "throughput": 4055.71, "total_tokens": 1737728} |
| {"current_steps": 2790, "total_steps": 9960, "loss": 0.1405, "lr": 9.044878962179968e-06, "epoch": 5.602409638554217, "percentage": 28.01, "elapsed_time": "0:07:09", "remaining_time": "0:18:22", "throughput": 4056.0, "total_tokens": 1740800} |
| {"current_steps": 2795, "total_steps": 9960, "loss": 0.1596, "lr": 9.039722263746693e-06, "epoch": 5.612449799196787, "percentage": 28.06, "elapsed_time": "0:07:09", "remaining_time": "0:18:22", "throughput": 4056.45, "total_tokens": 1744096} |
| {"current_steps": 2800, "total_steps": 9960, "loss": 0.0979, "lr": 9.034553160579444e-06, "epoch": 5.622489959839357, "percentage": 28.11, "elapsed_time": "0:07:10", "remaining_time": "0:18:21", "throughput": 4056.58, "total_tokens": 1746720} |
| {"current_steps": 2805, "total_steps": 9960, "loss": 0.1587, "lr": 9.029371668550933e-06, "epoch": 5.632530120481928, "percentage": 28.16, "elapsed_time": "0:07:11", "remaining_time": "0:18:20", "throughput": 4057.4, "total_tokens": 1750304} |
| {"current_steps": 2810, "total_steps": 9960, "loss": 0.13, "lr": 9.024177803571917e-06, "epoch": 5.642570281124498, "percentage": 28.21, "elapsed_time": "0:07:12", "remaining_time": "0:18:19", "throughput": 4057.63, "total_tokens": 1753600} |
| {"current_steps": 2815, "total_steps": 9960, "loss": 0.1681, "lr": 9.018971581591141e-06, "epoch": 5.652610441767068, "percentage": 28.26, "elapsed_time": "0:07:12", "remaining_time": "0:18:18", "throughput": 4057.74, "total_tokens": 1756096} |
| {"current_steps": 2820, "total_steps": 9960, "loss": 0.1039, "lr": 9.013753018595302e-06, "epoch": 5.662650602409639, "percentage": 28.31, "elapsed_time": "0:07:13", "remaining_time": "0:18:17", "throughput": 4057.78, "total_tokens": 1759072} |
| {"current_steps": 2825, "total_steps": 9960, "loss": 0.0958, "lr": 9.008522130608984e-06, "epoch": 5.672690763052209, "percentage": 28.36, "elapsed_time": "0:07:14", "remaining_time": "0:18:16", "throughput": 4058.44, "total_tokens": 1762720} |
| {"current_steps": 2830, "total_steps": 9960, "loss": 0.1527, "lr": 9.003278933694625e-06, "epoch": 5.682730923694779, "percentage": 28.41, "elapsed_time": "0:07:15", "remaining_time": "0:18:15", "throughput": 4058.55, "total_tokens": 1765472} |
| {"current_steps": 2835, "total_steps": 9960, "loss": 0.0948, "lr": 8.998023443952453e-06, "epoch": 5.692771084337349, "percentage": 28.46, "elapsed_time": "0:07:15", "remaining_time": "0:18:15", "throughput": 4059.37, "total_tokens": 1769472} |
| {"current_steps": 2840, "total_steps": 9960, "loss": 0.1371, "lr": 8.992755677520448e-06, "epoch": 5.7028112449799195, "percentage": 28.51, "elapsed_time": "0:07:16", "remaining_time": "0:18:14", "throughput": 4059.74, "total_tokens": 1772640} |
| {"current_steps": 2845, "total_steps": 9960, "loss": 0.1788, "lr": 8.987475650574289e-06, "epoch": 5.71285140562249, "percentage": 28.56, "elapsed_time": "0:07:17", "remaining_time": "0:18:13", "throughput": 4060.08, "total_tokens": 1775744} |
| {"current_steps": 2850, "total_steps": 9960, "loss": 0.1061, "lr": 8.982183379327299e-06, "epoch": 5.72289156626506, "percentage": 28.61, "elapsed_time": "0:07:18", "remaining_time": "0:18:12", "throughput": 4060.44, "total_tokens": 1778944} |
| {"current_steps": 2855, "total_steps": 9960, "loss": 0.1245, "lr": 8.9768788800304e-06, "epoch": 5.732931726907631, "percentage": 28.66, "elapsed_time": "0:07:18", "remaining_time": "0:18:12", "throughput": 4060.84, "total_tokens": 1782400} |
| {"current_steps": 2860, "total_steps": 9960, "loss": 0.144, "lr": 8.971562168972065e-06, "epoch": 5.742971887550201, "percentage": 28.71, "elapsed_time": "0:07:19", "remaining_time": "0:18:10", "throughput": 4060.43, "total_tokens": 1784416} |
| {"current_steps": 2865, "total_steps": 9960, "loss": 0.1747, "lr": 8.966233262478266e-06, "epoch": 5.753012048192771, "percentage": 28.77, "elapsed_time": "0:07:20", "remaining_time": "0:18:10", "throughput": 4060.55, "total_tokens": 1787392} |
| {"current_steps": 2870, "total_steps": 9960, "loss": 0.1084, "lr": 8.960892176912418e-06, "epoch": 5.763052208835341, "percentage": 28.82, "elapsed_time": "0:07:20", "remaining_time": "0:18:09", "throughput": 4061.21, "total_tokens": 1790976} |
| {"current_steps": 2875, "total_steps": 9960, "loss": 0.1494, "lr": 8.955538928675343e-06, "epoch": 5.7730923694779115, "percentage": 28.87, "elapsed_time": "0:07:21", "remaining_time": "0:18:08", "throughput": 4061.45, "total_tokens": 1793952} |
| {"current_steps": 2880, "total_steps": 9960, "loss": 0.1379, "lr": 8.950173534205202e-06, "epoch": 5.783132530120482, "percentage": 28.92, "elapsed_time": "0:07:22", "remaining_time": "0:18:07", "throughput": 4061.93, "total_tokens": 1797568} |
| {"current_steps": 2885, "total_steps": 9960, "loss": 0.1645, "lr": 8.944796009977459e-06, "epoch": 5.793172690763052, "percentage": 28.97, "elapsed_time": "0:07:23", "remaining_time": "0:18:06", "throughput": 4061.78, "total_tokens": 1800128} |
| {"current_steps": 2890, "total_steps": 9960, "loss": 0.1543, "lr": 8.939406372504823e-06, "epoch": 5.803212851405623, "percentage": 29.02, "elapsed_time": "0:07:24", "remaining_time": "0:18:06", "throughput": 4062.21, "total_tokens": 1803712} |
| {"current_steps": 2895, "total_steps": 9960, "loss": 0.0882, "lr": 8.934004638337197e-06, "epoch": 5.813253012048193, "percentage": 29.07, "elapsed_time": "0:07:24", "remaining_time": "0:18:05", "throughput": 4062.65, "total_tokens": 1806784} |
| {"current_steps": 2900, "total_steps": 9960, "loss": 0.0888, "lr": 8.928590824061633e-06, "epoch": 5.823293172690763, "percentage": 29.12, "elapsed_time": "0:07:25", "remaining_time": "0:18:04", "throughput": 4062.44, "total_tokens": 1809312} |
| {"current_steps": 2905, "total_steps": 9960, "loss": 0.1286, "lr": 8.923164946302274e-06, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:07:26", "remaining_time": "0:18:03", "throughput": 4062.64, "total_tokens": 1812192} |
| {"current_steps": 2910, "total_steps": 9960, "loss": 0.1309, "lr": 8.917727021720308e-06, "epoch": 5.843373493975903, "percentage": 29.22, "elapsed_time": "0:07:26", "remaining_time": "0:18:02", "throughput": 4062.52, "total_tokens": 1815168} |
| {"current_steps": 2915, "total_steps": 9960, "loss": 0.123, "lr": 8.912277067013914e-06, "epoch": 5.853413654618474, "percentage": 29.27, "elapsed_time": "0:07:27", "remaining_time": "0:18:01", "throughput": 4063.0, "total_tokens": 1818176} |
| {"current_steps": 2920, "total_steps": 9960, "loss": 0.1411, "lr": 8.906815098918214e-06, "epoch": 5.863453815261044, "percentage": 29.32, "elapsed_time": "0:07:28", "remaining_time": "0:18:00", "throughput": 4062.98, "total_tokens": 1821120} |
| {"current_steps": 2925, "total_steps": 9960, "loss": 0.1104, "lr": 8.901341134205214e-06, "epoch": 5.873493975903615, "percentage": 29.37, "elapsed_time": "0:07:28", "remaining_time": "0:17:59", "throughput": 4063.02, "total_tokens": 1823840} |
| {"current_steps": 2930, "total_steps": 9960, "loss": 0.1315, "lr": 8.895855189683768e-06, "epoch": 5.883534136546185, "percentage": 29.42, "elapsed_time": "0:07:29", "remaining_time": "0:17:58", "throughput": 4063.62, "total_tokens": 1827168} |
| {"current_steps": 2935, "total_steps": 9960, "loss": 0.1622, "lr": 8.890357282199504e-06, "epoch": 5.893574297188755, "percentage": 29.47, "elapsed_time": "0:07:30", "remaining_time": "0:17:57", "throughput": 4063.67, "total_tokens": 1829504} |
| {"current_steps": 2940, "total_steps": 9960, "loss": 0.156, "lr": 8.884847428634792e-06, "epoch": 5.903614457831325, "percentage": 29.52, "elapsed_time": "0:07:30", "remaining_time": "0:17:56", "throughput": 4063.6, "total_tokens": 1832640} |
| {"current_steps": 2945, "total_steps": 9960, "loss": 0.1578, "lr": 8.879325645908686e-06, "epoch": 5.913654618473895, "percentage": 29.57, "elapsed_time": "0:07:31", "remaining_time": "0:17:56", "throughput": 4064.1, "total_tokens": 1836448} |
| {"current_steps": 2950, "total_steps": 9960, "loss": 0.1497, "lr": 8.873791950976865e-06, "epoch": 5.923694779116466, "percentage": 29.62, "elapsed_time": "0:07:32", "remaining_time": "0:17:55", "throughput": 4064.05, "total_tokens": 1839104} |
| {"current_steps": 2955, "total_steps": 9960, "loss": 0.1957, "lr": 8.868246360831589e-06, "epoch": 5.933734939759036, "percentage": 29.67, "elapsed_time": "0:07:33", "remaining_time": "0:17:54", "throughput": 4064.32, "total_tokens": 1841952} |
| {"current_steps": 2960, "total_steps": 9960, "loss": 0.1177, "lr": 8.862688892501648e-06, "epoch": 5.943775100401607, "percentage": 29.72, "elapsed_time": "0:07:34", "remaining_time": "0:17:54", "throughput": 4065.12, "total_tokens": 1846272} |
| {"current_steps": 2965, "total_steps": 9960, "loss": 0.1179, "lr": 8.857119563052301e-06, "epoch": 5.953815261044177, "percentage": 29.77, "elapsed_time": "0:07:35", "remaining_time": "0:17:53", "throughput": 4065.45, "total_tokens": 1849888} |
| {"current_steps": 2970, "total_steps": 9960, "loss": 0.1344, "lr": 8.851538389585234e-06, "epoch": 5.9638554216867465, "percentage": 29.82, "elapsed_time": "0:07:35", "remaining_time": "0:17:52", "throughput": 4065.83, "total_tokens": 1853152} |
| {"current_steps": 2975, "total_steps": 9960, "loss": 0.0961, "lr": 8.845945389238496e-06, "epoch": 5.973895582329317, "percentage": 29.87, "elapsed_time": "0:07:36", "remaining_time": "0:17:51", "throughput": 4065.87, "total_tokens": 1856128} |
| {"current_steps": 2980, "total_steps": 9960, "loss": 0.1184, "lr": 8.840340579186457e-06, "epoch": 5.983935742971887, "percentage": 29.92, "elapsed_time": "0:07:37", "remaining_time": "0:17:50", "throughput": 4065.76, "total_tokens": 1858496} |
| {"current_steps": 2985, "total_steps": 9960, "loss": 0.1139, "lr": 8.834723976639752e-06, "epoch": 5.993975903614458, "percentage": 29.97, "elapsed_time": "0:07:37", "remaining_time": "0:17:49", "throughput": 4065.72, "total_tokens": 1860928} |
| {"current_steps": 2988, "total_steps": 9960, "eval_loss": 0.19226641952991486, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:07:46", "remaining_time": "0:18:07", "throughput": 3995.17, "total_tokens": 1862848} |
| {"current_steps": 2990, "total_steps": 9960, "loss": 0.1233, "lr": 8.829095598845224e-06, "epoch": 6.004016064257028, "percentage": 30.02, "elapsed_time": "0:07:47", "remaining_time": "0:18:10", "throughput": 3985.11, "total_tokens": 1864064} |
| {"current_steps": 2995, "total_steps": 9960, "loss": 0.0896, "lr": 8.823455463085873e-06, "epoch": 6.014056224899599, "percentage": 30.07, "elapsed_time": "0:07:48", "remaining_time": "0:18:09", "throughput": 3985.1, "total_tokens": 1867360} |
| {"current_steps": 3000, "total_steps": 9960, "loss": 0.114, "lr": 8.81780358668081e-06, "epoch": 6.024096385542169, "percentage": 30.12, "elapsed_time": "0:07:49", "remaining_time": "0:18:08", "throughput": 3985.15, "total_tokens": 1870112} |
| {"current_steps": 3005, "total_steps": 9960, "loss": 0.0927, "lr": 8.812139986985194e-06, "epoch": 6.034136546184739, "percentage": 30.17, "elapsed_time": "0:07:50", "remaining_time": "0:18:08", "throughput": 3985.51, "total_tokens": 1873632} |
| {"current_steps": 3010, "total_steps": 9960, "loss": 0.1233, "lr": 8.806464681390182e-06, "epoch": 6.044176706827309, "percentage": 30.22, "elapsed_time": "0:07:50", "remaining_time": "0:18:07", "throughput": 3985.78, "total_tokens": 1876480} |
| {"current_steps": 3015, "total_steps": 9960, "loss": 0.1091, "lr": 8.800777687322875e-06, "epoch": 6.054216867469879, "percentage": 30.27, "elapsed_time": "0:07:51", "remaining_time": "0:18:06", "throughput": 3986.45, "total_tokens": 1880032} |
| {"current_steps": 3020, "total_steps": 9960, "loss": 0.1362, "lr": 8.795079022246269e-06, "epoch": 6.06425702811245, "percentage": 30.32, "elapsed_time": "0:07:52", "remaining_time": "0:18:05", "throughput": 3986.56, "total_tokens": 1882400} |
| {"current_steps": 3025, "total_steps": 9960, "loss": 0.1909, "lr": 8.789368703659199e-06, "epoch": 6.07429718875502, "percentage": 30.37, "elapsed_time": "0:07:52", "remaining_time": "0:18:04", "throughput": 3987.11, "total_tokens": 1885632} |
| {"current_steps": 3030, "total_steps": 9960, "loss": 0.1584, "lr": 8.78364674909628e-06, "epoch": 6.0843373493975905, "percentage": 30.42, "elapsed_time": "0:07:53", "remaining_time": "0:18:03", "throughput": 3987.8, "total_tokens": 1889088} |
| {"current_steps": 3035, "total_steps": 9960, "loss": 0.1345, "lr": 8.777913176127859e-06, "epoch": 6.094377510040161, "percentage": 30.47, "elapsed_time": "0:07:54", "remaining_time": "0:18:03", "throughput": 3988.34, "total_tokens": 1893056} |
| {"current_steps": 3040, "total_steps": 9960, "loss": 0.1093, "lr": 8.772168002359962e-06, "epoch": 6.104417670682731, "percentage": 30.52, "elapsed_time": "0:07:55", "remaining_time": "0:18:02", "throughput": 3989.34, "total_tokens": 1896896} |
| {"current_steps": 3045, "total_steps": 9960, "loss": 0.081, "lr": 8.766411245434234e-06, "epoch": 6.114457831325301, "percentage": 30.57, "elapsed_time": "0:07:56", "remaining_time": "0:18:01", "throughput": 3989.71, "total_tokens": 1899968} |
| {"current_steps": 3050, "total_steps": 9960, "loss": 0.0967, "lr": 8.760642923027888e-06, "epoch": 6.124497991967871, "percentage": 30.62, "elapsed_time": "0:07:56", "remaining_time": "0:18:00", "throughput": 3989.81, "total_tokens": 1902944} |
| {"current_steps": 3055, "total_steps": 9960, "loss": 0.1337, "lr": 8.754863052853658e-06, "epoch": 6.134538152610442, "percentage": 30.67, "elapsed_time": "0:07:57", "remaining_time": "0:17:59", "throughput": 3990.58, "total_tokens": 1906368} |
| {"current_steps": 3060, "total_steps": 9960, "loss": 0.1273, "lr": 8.74907165265973e-06, "epoch": 6.144578313253012, "percentage": 30.72, "elapsed_time": "0:07:58", "remaining_time": "0:17:58", "throughput": 3990.53, "total_tokens": 1908832} |
| {"current_steps": 3065, "total_steps": 9960, "loss": 0.1598, "lr": 8.743268740229693e-06, "epoch": 6.1546184738955825, "percentage": 30.77, "elapsed_time": "0:07:58", "remaining_time": "0:17:57", "throughput": 3990.41, "total_tokens": 1911360} |
| {"current_steps": 3070, "total_steps": 9960, "loss": 0.1322, "lr": 8.7374543333825e-06, "epoch": 6.164658634538153, "percentage": 30.82, "elapsed_time": "0:07:59", "remaining_time": "0:17:56", "throughput": 3990.78, "total_tokens": 1914816} |
| {"current_steps": 3075, "total_steps": 9960, "loss": 0.0712, "lr": 8.731628449972382e-06, "epoch": 6.174698795180723, "percentage": 30.87, "elapsed_time": "0:08:00", "remaining_time": "0:17:55", "throughput": 3990.94, "total_tokens": 1917728} |
| {"current_steps": 3080, "total_steps": 9960, "loss": 0.0885, "lr": 8.725791107888825e-06, "epoch": 6.184738955823293, "percentage": 30.92, "elapsed_time": "0:08:01", "remaining_time": "0:17:54", "throughput": 3991.25, "total_tokens": 1920672} |
| {"current_steps": 3085, "total_steps": 9960, "loss": 0.1114, "lr": 8.719942325056496e-06, "epoch": 6.194779116465863, "percentage": 30.97, "elapsed_time": "0:08:01", "remaining_time": "0:17:54", "throughput": 3991.6, "total_tokens": 1923776} |
| {"current_steps": 3090, "total_steps": 9960, "loss": 0.1036, "lr": 8.71408211943519e-06, "epoch": 6.204819277108434, "percentage": 31.02, "elapsed_time": "0:08:02", "remaining_time": "0:17:52", "throughput": 3991.85, "total_tokens": 1926464} |
| {"current_steps": 3095, "total_steps": 9960, "loss": 0.1345, "lr": 8.70821050901978e-06, "epoch": 6.214859437751004, "percentage": 31.07, "elapsed_time": "0:08:03", "remaining_time": "0:17:52", "throughput": 3992.45, "total_tokens": 1929792} |
| {"current_steps": 3100, "total_steps": 9960, "loss": 0.0932, "lr": 8.702327511840165e-06, "epoch": 6.224899598393574, "percentage": 31.12, "elapsed_time": "0:08:04", "remaining_time": "0:17:51", "throughput": 3993.27, "total_tokens": 1933664} |
| {"current_steps": 3105, "total_steps": 9960, "loss": 0.1173, "lr": 8.6964331459612e-06, "epoch": 6.234939759036145, "percentage": 31.17, "elapsed_time": "0:08:04", "remaining_time": "0:17:50", "throughput": 3993.66, "total_tokens": 1936704} |
| {"current_steps": 3110, "total_steps": 9960, "loss": 0.1965, "lr": 8.690527429482658e-06, "epoch": 6.244979919678715, "percentage": 31.22, "elapsed_time": "0:08:05", "remaining_time": "0:17:49", "throughput": 3993.79, "total_tokens": 1939552} |
| {"current_steps": 3115, "total_steps": 9960, "loss": 0.1401, "lr": 8.68461038053916e-06, "epoch": 6.255020080321285, "percentage": 31.28, "elapsed_time": "0:08:06", "remaining_time": "0:17:48", "throughput": 3994.35, "total_tokens": 1942944} |
| {"current_steps": 3120, "total_steps": 9960, "loss": 0.0998, "lr": 8.678682017300126e-06, "epoch": 6.265060240963855, "percentage": 31.33, "elapsed_time": "0:08:07", "remaining_time": "0:17:47", "throughput": 3994.47, "total_tokens": 1945600} |
| {"current_steps": 3125, "total_steps": 9960, "loss": 0.1296, "lr": 8.672742357969724e-06, "epoch": 6.275100401606426, "percentage": 31.38, "elapsed_time": "0:08:07", "remaining_time": "0:17:46", "throughput": 3994.82, "total_tokens": 1948416} |
| {"current_steps": 3130, "total_steps": 9960, "loss": 0.1204, "lr": 8.666791420786805e-06, "epoch": 6.285140562248996, "percentage": 31.43, "elapsed_time": "0:08:08", "remaining_time": "0:17:45", "throughput": 3994.93, "total_tokens": 1951296} |
| {"current_steps": 3135, "total_steps": 9960, "loss": 0.1233, "lr": 8.660829224024849e-06, "epoch": 6.295180722891566, "percentage": 31.48, "elapsed_time": "0:08:09", "remaining_time": "0:17:45", "throughput": 3995.17, "total_tokens": 1954784} |
| {"current_steps": 3140, "total_steps": 9960, "loss": 0.134, "lr": 8.654855785991915e-06, "epoch": 6.305220883534137, "percentage": 31.53, "elapsed_time": "0:08:09", "remaining_time": "0:17:44", "throughput": 3995.61, "total_tokens": 1957664} |
| {"current_steps": 3145, "total_steps": 9960, "loss": 0.078, "lr": 8.648871125030576e-06, "epoch": 6.315261044176707, "percentage": 31.58, "elapsed_time": "0:08:10", "remaining_time": "0:17:43", "throughput": 3996.1, "total_tokens": 1960736} |
| {"current_steps": 3150, "total_steps": 9960, "loss": 0.0517, "lr": 8.642875259517871e-06, "epoch": 6.325301204819277, "percentage": 31.63, "elapsed_time": "0:08:11", "remaining_time": "0:17:42", "throughput": 3996.99, "total_tokens": 1964448} |
| {"current_steps": 3155, "total_steps": 9960, "loss": 0.1463, "lr": 8.636868207865244e-06, "epoch": 6.335341365461847, "percentage": 31.68, "elapsed_time": "0:08:12", "remaining_time": "0:17:41", "throughput": 3997.49, "total_tokens": 1967808} |
| {"current_steps": 3160, "total_steps": 9960, "loss": 0.0814, "lr": 8.630849988518486e-06, "epoch": 6.3453815261044175, "percentage": 31.73, "elapsed_time": "0:08:12", "remaining_time": "0:17:40", "throughput": 3997.74, "total_tokens": 1970592} |
| {"current_steps": 3165, "total_steps": 9960, "loss": 0.0911, "lr": 8.62482061995768e-06, "epoch": 6.355421686746988, "percentage": 31.78, "elapsed_time": "0:08:13", "remaining_time": "0:17:39", "throughput": 3998.27, "total_tokens": 1973856} |
| {"current_steps": 3170, "total_steps": 9960, "loss": 0.1716, "lr": 8.618780120697152e-06, "epoch": 6.365461847389558, "percentage": 31.83, "elapsed_time": "0:08:14", "remaining_time": "0:17:39", "throughput": 3999.05, "total_tokens": 1977760} |
| {"current_steps": 3175, "total_steps": 9960, "loss": 0.1568, "lr": 8.612728509285395e-06, "epoch": 6.375502008032129, "percentage": 31.88, "elapsed_time": "0:08:15", "remaining_time": "0:17:38", "throughput": 3999.67, "total_tokens": 1981408} |
| {"current_steps": 3180, "total_steps": 9960, "loss": 0.0847, "lr": 8.606665804305034e-06, "epoch": 6.385542168674699, "percentage": 31.93, "elapsed_time": "0:08:16", "remaining_time": "0:17:37", "throughput": 4000.45, "total_tokens": 1985056} |
| {"current_steps": 3185, "total_steps": 9960, "loss": 0.1526, "lr": 8.600592024372756e-06, "epoch": 6.395582329317269, "percentage": 31.98, "elapsed_time": "0:08:16", "remaining_time": "0:17:37", "throughput": 4000.48, "total_tokens": 1988000} |
| {"current_steps": 3190, "total_steps": 9960, "loss": 0.1492, "lr": 8.594507188139251e-06, "epoch": 6.405622489959839, "percentage": 32.03, "elapsed_time": "0:08:17", "remaining_time": "0:17:36", "throughput": 4000.8, "total_tokens": 1991168} |
| {"current_steps": 3195, "total_steps": 9960, "loss": 0.0747, "lr": 8.588411314289169e-06, "epoch": 6.4156626506024095, "percentage": 32.08, "elapsed_time": "0:08:18", "remaining_time": "0:17:35", "throughput": 4001.16, "total_tokens": 1994560} |
| {"current_steps": 3200, "total_steps": 9960, "loss": 0.1097, "lr": 8.582304421541045e-06, "epoch": 6.42570281124498, "percentage": 32.13, "elapsed_time": "0:08:19", "remaining_time": "0:17:34", "throughput": 4001.56, "total_tokens": 1997248} |
| {"current_steps": 3205, "total_steps": 9960, "loss": 0.1648, "lr": 8.576186528647253e-06, "epoch": 6.43574297188755, "percentage": 32.18, "elapsed_time": "0:08:19", "remaining_time": "0:17:33", "throughput": 4002.21, "total_tokens": 2000736} |
| {"current_steps": 3210, "total_steps": 9960, "loss": 0.0742, "lr": 8.570057654393943e-06, "epoch": 6.445783132530121, "percentage": 32.23, "elapsed_time": "0:08:20", "remaining_time": "0:17:32", "throughput": 4002.77, "total_tokens": 2004192} |
| {"current_steps": 3215, "total_steps": 9960, "loss": 0.1466, "lr": 8.563917817600988e-06, "epoch": 6.455823293172691, "percentage": 32.28, "elapsed_time": "0:08:21", "remaining_time": "0:17:32", "throughput": 4003.53, "total_tokens": 2007616} |
| {"current_steps": 3220, "total_steps": 9960, "loss": 0.1116, "lr": 8.557767037121923e-06, "epoch": 6.4658634538152615, "percentage": 32.33, "elapsed_time": "0:08:22", "remaining_time": "0:17:31", "throughput": 4003.75, "total_tokens": 2010720} |
| {"current_steps": 3225, "total_steps": 9960, "loss": 0.1186, "lr": 8.551605331843885e-06, "epoch": 6.475903614457831, "percentage": 32.38, "elapsed_time": "0:08:23", "remaining_time": "0:17:30", "throughput": 4004.47, "total_tokens": 2014368} |
| {"current_steps": 3230, "total_steps": 9960, "loss": 0.1393, "lr": 8.545432720687558e-06, "epoch": 6.485943775100401, "percentage": 32.43, "elapsed_time": "0:08:23", "remaining_time": "0:17:29", "throughput": 4004.85, "total_tokens": 2017280} |
| {"current_steps": 3235, "total_steps": 9960, "loss": 0.2241, "lr": 8.53924922260712e-06, "epoch": 6.495983935742972, "percentage": 32.48, "elapsed_time": "0:08:24", "remaining_time": "0:17:28", "throughput": 4005.1, "total_tokens": 2020256} |
| {"current_steps": 3240, "total_steps": 9960, "loss": 0.1532, "lr": 8.533054856590175e-06, "epoch": 6.506024096385542, "percentage": 32.53, "elapsed_time": "0:08:25", "remaining_time": "0:17:28", "throughput": 4005.47, "total_tokens": 2023968} |
| {"current_steps": 3245, "total_steps": 9960, "loss": 0.1268, "lr": 8.526849641657697e-06, "epoch": 6.516064257028113, "percentage": 32.58, "elapsed_time": "0:08:25", "remaining_time": "0:17:26", "throughput": 4005.26, "total_tokens": 2026208} |
| {"current_steps": 3250, "total_steps": 9960, "loss": 0.1392, "lr": 8.520633596863978e-06, "epoch": 6.526104417670683, "percentage": 32.63, "elapsed_time": "0:08:26", "remaining_time": "0:17:25", "throughput": 4005.13, "total_tokens": 2028512} |
| {"current_steps": 3255, "total_steps": 9960, "loss": 0.161, "lr": 8.514406741296565e-06, "epoch": 6.5361445783132535, "percentage": 32.68, "elapsed_time": "0:08:27", "remaining_time": "0:17:24", "throughput": 4005.61, "total_tokens": 2031456} |
| {"current_steps": 3260, "total_steps": 9960, "loss": 0.108, "lr": 8.508169094076197e-06, "epoch": 6.546184738955823, "percentage": 32.73, "elapsed_time": "0:08:27", "remaining_time": "0:17:23", "throughput": 4005.74, "total_tokens": 2033888} |
| {"current_steps": 3265, "total_steps": 9960, "loss": 0.1776, "lr": 8.501920674356755e-06, "epoch": 6.556224899598393, "percentage": 32.78, "elapsed_time": "0:08:28", "remaining_time": "0:17:22", "throughput": 4006.26, "total_tokens": 2037312} |
| {"current_steps": 3270, "total_steps": 9960, "loss": 0.1337, "lr": 8.495661501325197e-06, "epoch": 6.566265060240964, "percentage": 32.83, "elapsed_time": "0:08:29", "remaining_time": "0:17:21", "throughput": 4006.47, "total_tokens": 2040448} |
| {"current_steps": 3275, "total_steps": 9960, "loss": 0.1077, "lr": 8.489391594201503e-06, "epoch": 6.576305220883534, "percentage": 32.88, "elapsed_time": "0:08:30", "remaining_time": "0:17:21", "throughput": 4007.16, "total_tokens": 2043968} |
| {"current_steps": 3280, "total_steps": 9960, "loss": 0.1124, "lr": 8.483110972238612e-06, "epoch": 6.586345381526105, "percentage": 32.93, "elapsed_time": "0:08:30", "remaining_time": "0:17:20", "throughput": 4007.86, "total_tokens": 2047584} |
| {"current_steps": 3285, "total_steps": 9960, "loss": 0.0998, "lr": 8.476819654722365e-06, "epoch": 6.596385542168675, "percentage": 32.98, "elapsed_time": "0:08:31", "remaining_time": "0:17:19", "throughput": 4008.56, "total_tokens": 2051136} |
| {"current_steps": 3290, "total_steps": 9960, "loss": 0.1312, "lr": 8.47051766097145e-06, "epoch": 6.606425702811245, "percentage": 33.03, "elapsed_time": "0:08:32", "remaining_time": "0:17:19", "throughput": 4009.3, "total_tokens": 2054784} |
| {"current_steps": 3295, "total_steps": 9960, "loss": 0.1411, "lr": 8.46420501033733e-06, "epoch": 6.616465863453815, "percentage": 33.08, "elapsed_time": "0:08:33", "remaining_time": "0:17:18", "throughput": 4009.84, "total_tokens": 2058176} |
| {"current_steps": 3300, "total_steps": 9960, "loss": 0.1068, "lr": 8.457881722204201e-06, "epoch": 6.626506024096385, "percentage": 33.13, "elapsed_time": "0:08:34", "remaining_time": "0:17:17", "throughput": 4010.29, "total_tokens": 2061472} |
| {"current_steps": 3305, "total_steps": 9960, "loss": 0.1221, "lr": 8.45154781598892e-06, "epoch": 6.636546184738956, "percentage": 33.18, "elapsed_time": "0:08:34", "remaining_time": "0:17:16", "throughput": 4010.46, "total_tokens": 2064288} |
| {"current_steps": 3310, "total_steps": 9960, "loss": 0.0784, "lr": 8.445203311140944e-06, "epoch": 6.646586345381526, "percentage": 33.23, "elapsed_time": "0:08:35", "remaining_time": "0:17:15", "throughput": 4011.08, "total_tokens": 2067936} |
| {"current_steps": 3315, "total_steps": 9960, "loss": 0.1184, "lr": 8.438848227142282e-06, "epoch": 6.656626506024097, "percentage": 33.28, "elapsed_time": "0:08:36", "remaining_time": "0:17:14", "throughput": 4011.21, "total_tokens": 2070752} |
| {"current_steps": 3320, "total_steps": 9960, "loss": 0.0321, "lr": 8.432482583507425e-06, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:08:36", "remaining_time": "0:17:13", "throughput": 4011.61, "total_tokens": 2073664} |
| {"current_steps": 3325, "total_steps": 9960, "loss": 0.1295, "lr": 8.42610639978329e-06, "epoch": 6.676706827309237, "percentage": 33.38, "elapsed_time": "0:08:37", "remaining_time": "0:17:13", "throughput": 4012.32, "total_tokens": 2077376} |
| {"current_steps": 3330, "total_steps": 9960, "loss": 0.074, "lr": 8.41971969554916e-06, "epoch": 6.686746987951807, "percentage": 33.43, "elapsed_time": "0:08:38", "remaining_time": "0:17:12", "throughput": 4012.62, "total_tokens": 2080608} |
| {"current_steps": 3335, "total_steps": 9960, "loss": 0.1991, "lr": 8.413322490416623e-06, "epoch": 6.696787148594377, "percentage": 33.48, "elapsed_time": "0:08:39", "remaining_time": "0:17:11", "throughput": 4012.89, "total_tokens": 2083104} |
| {"current_steps": 3340, "total_steps": 9960, "loss": 0.1012, "lr": 8.40691480402951e-06, "epoch": 6.706827309236948, "percentage": 33.53, "elapsed_time": "0:08:39", "remaining_time": "0:17:10", "throughput": 4012.94, "total_tokens": 2085856} |
| {"current_steps": 3345, "total_steps": 9960, "loss": 0.1205, "lr": 8.40049665606384e-06, "epoch": 6.716867469879518, "percentage": 33.58, "elapsed_time": "0:08:40", "remaining_time": "0:17:09", "throughput": 4013.24, "total_tokens": 2088928} |
| {"current_steps": 3350, "total_steps": 9960, "loss": 0.2032, "lr": 8.394068066227752e-06, "epoch": 6.7269076305220885, "percentage": 33.63, "elapsed_time": "0:08:41", "remaining_time": "0:17:08", "throughput": 4013.46, "total_tokens": 2091616} |
| {"current_steps": 3355, "total_steps": 9960, "loss": 0.1548, "lr": 8.387629054261454e-06, "epoch": 6.736947791164659, "percentage": 33.68, "elapsed_time": "0:08:41", "remaining_time": "0:17:07", "throughput": 4013.45, "total_tokens": 2094272} |
| {"current_steps": 3360, "total_steps": 9960, "loss": 0.1488, "lr": 8.381179639937152e-06, "epoch": 6.746987951807229, "percentage": 33.73, "elapsed_time": "0:08:42", "remaining_time": "0:17:06", "throughput": 4013.48, "total_tokens": 2097152} |
| {"current_steps": 3365, "total_steps": 9960, "loss": 0.1051, "lr": 8.374719843059e-06, "epoch": 6.757028112449799, "percentage": 33.79, "elapsed_time": "0:08:43", "remaining_time": "0:17:05", "throughput": 4013.97, "total_tokens": 2100480} |
| {"current_steps": 3370, "total_steps": 9960, "loss": 0.1419, "lr": 8.368249683463028e-06, "epoch": 6.767068273092369, "percentage": 33.84, "elapsed_time": "0:08:44", "remaining_time": "0:17:04", "throughput": 4014.28, "total_tokens": 2103552} |
| {"current_steps": 3375, "total_steps": 9960, "loss": 0.1153, "lr": 8.361769181017089e-06, "epoch": 6.77710843373494, "percentage": 33.89, "elapsed_time": "0:08:44", "remaining_time": "0:17:03", "throughput": 4014.8, "total_tokens": 2106848} |
| {"current_steps": 3380, "total_steps": 9960, "loss": 0.1921, "lr": 8.355278355620795e-06, "epoch": 6.78714859437751, "percentage": 33.94, "elapsed_time": "0:08:45", "remaining_time": "0:17:02", "throughput": 4014.68, "total_tokens": 2109632} |
| {"current_steps": 3385, "total_steps": 9960, "loss": 0.1737, "lr": 8.348777227205462e-06, "epoch": 6.7971887550200805, "percentage": 33.99, "elapsed_time": "0:08:46", "remaining_time": "0:17:01", "throughput": 4014.58, "total_tokens": 2112128} |
| {"current_steps": 3390, "total_steps": 9960, "loss": 0.0897, "lr": 8.342265815734034e-06, "epoch": 6.807228915662651, "percentage": 34.04, "elapsed_time": "0:08:46", "remaining_time": "0:17:00", "throughput": 4014.54, "total_tokens": 2114592} |
| {"current_steps": 3395, "total_steps": 9960, "loss": 0.1537, "lr": 8.335744141201037e-06, "epoch": 6.817269076305221, "percentage": 34.09, "elapsed_time": "0:08:47", "remaining_time": "0:16:59", "throughput": 4014.97, "total_tokens": 2117728} |
| {"current_steps": 3400, "total_steps": 9960, "loss": 0.1561, "lr": 8.329212223632511e-06, "epoch": 6.827309236947791, "percentage": 34.14, "elapsed_time": "0:08:48", "remaining_time": "0:16:59", "throughput": 4015.54, "total_tokens": 2121792} |
| {"current_steps": 3405, "total_steps": 9960, "loss": 0.1087, "lr": 8.32267008308595e-06, "epoch": 6.837349397590361, "percentage": 34.19, "elapsed_time": "0:08:49", "remaining_time": "0:16:58", "throughput": 4015.9, "total_tokens": 2124736} |
| {"current_steps": 3410, "total_steps": 9960, "loss": 0.133, "lr": 8.316117739650235e-06, "epoch": 6.847389558232932, "percentage": 34.24, "elapsed_time": "0:08:49", "remaining_time": "0:16:57", "throughput": 4016.1, "total_tokens": 2127456} |
| {"current_steps": 3415, "total_steps": 9960, "loss": 0.1316, "lr": 8.309555213445583e-06, "epoch": 6.857429718875502, "percentage": 34.29, "elapsed_time": "0:08:50", "remaining_time": "0:16:56", "throughput": 4016.64, "total_tokens": 2130720} |
| {"current_steps": 3420, "total_steps": 9960, "loss": 0.0751, "lr": 8.302982524623475e-06, "epoch": 6.867469879518072, "percentage": 34.34, "elapsed_time": "0:08:51", "remaining_time": "0:16:55", "throughput": 4016.76, "total_tokens": 2133376} |
| {"current_steps": 3425, "total_steps": 9960, "loss": 0.1011, "lr": 8.296399693366601e-06, "epoch": 6.877510040160643, "percentage": 34.39, "elapsed_time": "0:08:51", "remaining_time": "0:16:54", "throughput": 4016.7, "total_tokens": 2135872} |
| {"current_steps": 3430, "total_steps": 9960, "loss": 0.1031, "lr": 8.289806739888791e-06, "epoch": 6.887550200803213, "percentage": 34.44, "elapsed_time": "0:08:52", "remaining_time": "0:16:53", "throughput": 4016.71, "total_tokens": 2138592} |
| {"current_steps": 3435, "total_steps": 9960, "loss": 0.1485, "lr": 8.283203684434963e-06, "epoch": 6.897590361445783, "percentage": 34.49, "elapsed_time": "0:08:53", "remaining_time": "0:16:52", "throughput": 4016.77, "total_tokens": 2141312} |
| {"current_steps": 3440, "total_steps": 9960, "loss": 0.1108, "lr": 8.27659054728105e-06, "epoch": 6.907630522088353, "percentage": 34.54, "elapsed_time": "0:08:53", "remaining_time": "0:16:51", "throughput": 4016.63, "total_tokens": 2143936} |
| {"current_steps": 3445, "total_steps": 9960, "loss": 0.1032, "lr": 8.269967348733947e-06, "epoch": 6.917670682730924, "percentage": 34.59, "elapsed_time": "0:08:54", "remaining_time": "0:16:50", "throughput": 4017.17, "total_tokens": 2147456} |
| {"current_steps": 3450, "total_steps": 9960, "loss": 0.1131, "lr": 8.26333410913144e-06, "epoch": 6.927710843373494, "percentage": 34.64, "elapsed_time": "0:08:55", "remaining_time": "0:16:50", "throughput": 4017.46, "total_tokens": 2150624} |
| {"current_steps": 3455, "total_steps": 9960, "loss": 0.124, "lr": 8.256690848842153e-06, "epoch": 6.937751004016064, "percentage": 34.69, "elapsed_time": "0:08:56", "remaining_time": "0:16:49", "throughput": 4017.95, "total_tokens": 2154176} |
| {"current_steps": 3460, "total_steps": 9960, "loss": 0.1661, "lr": 8.250037588265473e-06, "epoch": 6.947791164658635, "percentage": 34.74, "elapsed_time": "0:08:56", "remaining_time": "0:16:48", "throughput": 4018.22, "total_tokens": 2157056} |
| {"current_steps": 3465, "total_steps": 9960, "loss": 0.0956, "lr": 8.243374347831505e-06, "epoch": 6.957831325301205, "percentage": 34.79, "elapsed_time": "0:08:57", "remaining_time": "0:16:47", "throughput": 4018.53, "total_tokens": 2160480} |
| {"current_steps": 3470, "total_steps": 9960, "loss": 0.1597, "lr": 8.236701148000989e-06, "epoch": 6.967871485943775, "percentage": 34.84, "elapsed_time": "0:08:58", "remaining_time": "0:16:46", "throughput": 4019.05, "total_tokens": 2163840} |
| {"current_steps": 3475, "total_steps": 9960, "loss": 0.1122, "lr": 8.230018009265255e-06, "epoch": 6.977911646586345, "percentage": 34.89, "elapsed_time": "0:08:59", "remaining_time": "0:16:46", "throughput": 4019.32, "total_tokens": 2166848} |
| {"current_steps": 3480, "total_steps": 9960, "loss": 0.1319, "lr": 8.223324952146145e-06, "epoch": 6.9879518072289155, "percentage": 34.94, "elapsed_time": "0:08:59", "remaining_time": "0:16:45", "throughput": 4019.6, "total_tokens": 2169568} |
| {"current_steps": 3485, "total_steps": 9960, "loss": 0.0853, "lr": 8.216621997195966e-06, "epoch": 6.997991967871486, "percentage": 34.99, "elapsed_time": "0:09:00", "remaining_time": "0:16:44", "throughput": 4019.95, "total_tokens": 2172288} |
| {"current_steps": 3490, "total_steps": 9960, "loss": 0.1287, "lr": 8.209909164997409e-06, "epoch": 7.008032128514056, "percentage": 35.04, "elapsed_time": "0:09:01", "remaining_time": "0:16:43", "throughput": 4019.35, "total_tokens": 2175136} |
| {"current_steps": 3495, "total_steps": 9960, "loss": 0.0723, "lr": 8.203186476163503e-06, "epoch": 7.018072289156627, "percentage": 35.09, "elapsed_time": "0:09:02", "remaining_time": "0:16:42", "throughput": 4019.94, "total_tokens": 2178848} |
| {"current_steps": 3500, "total_steps": 9960, "loss": 0.0719, "lr": 8.196453951337538e-06, "epoch": 7.028112449799197, "percentage": 35.14, "elapsed_time": "0:09:02", "remaining_time": "0:16:41", "throughput": 4020.1, "total_tokens": 2181568} |
| {"current_steps": 3505, "total_steps": 9960, "loss": 0.1081, "lr": 8.189711611193012e-06, "epoch": 7.038152610441767, "percentage": 35.19, "elapsed_time": "0:09:03", "remaining_time": "0:16:41", "throughput": 4020.77, "total_tokens": 2185664} |
| {"current_steps": 3510, "total_steps": 9960, "loss": 0.1156, "lr": 8.182959476433555e-06, "epoch": 7.048192771084337, "percentage": 35.24, "elapsed_time": "0:09:04", "remaining_time": "0:16:40", "throughput": 4021.41, "total_tokens": 2189536} |
| {"current_steps": 3515, "total_steps": 9960, "loss": 0.0488, "lr": 8.176197567792883e-06, "epoch": 7.0582329317269075, "percentage": 35.29, "elapsed_time": "0:09:05", "remaining_time": "0:16:39", "throughput": 4021.72, "total_tokens": 2192672} |
| {"current_steps": 3520, "total_steps": 9960, "loss": 0.1427, "lr": 8.169425906034718e-06, "epoch": 7.068273092369478, "percentage": 35.34, "elapsed_time": "0:09:05", "remaining_time": "0:16:38", "throughput": 4021.91, "total_tokens": 2195136} |
| {"current_steps": 3525, "total_steps": 9960, "loss": 0.1883, "lr": 8.162644511952735e-06, "epoch": 7.078313253012048, "percentage": 35.39, "elapsed_time": "0:09:06", "remaining_time": "0:16:37", "throughput": 4022.18, "total_tokens": 2198368} |
| {"current_steps": 3530, "total_steps": 9960, "loss": 0.1243, "lr": 8.155853406370488e-06, "epoch": 7.088353413654619, "percentage": 35.44, "elapsed_time": "0:09:07", "remaining_time": "0:16:36", "throughput": 4022.44, "total_tokens": 2201376} |
| {"current_steps": 3535, "total_steps": 9960, "loss": 0.1082, "lr": 8.149052610141357e-06, "epoch": 7.098393574297189, "percentage": 35.49, "elapsed_time": "0:09:07", "remaining_time": "0:16:35", "throughput": 4022.57, "total_tokens": 2204160} |
| {"current_steps": 3540, "total_steps": 9960, "loss": 0.1217, "lr": 8.142242144148478e-06, "epoch": 7.108433734939759, "percentage": 35.54, "elapsed_time": "0:09:08", "remaining_time": "0:16:35", "throughput": 4023.07, "total_tokens": 2207296} |
| {"current_steps": 3545, "total_steps": 9960, "loss": 0.0637, "lr": 8.135422029304682e-06, "epoch": 7.118473895582329, "percentage": 35.59, "elapsed_time": "0:09:09", "remaining_time": "0:16:34", "throughput": 4023.44, "total_tokens": 2210528} |
| {"current_steps": 3550, "total_steps": 9960, "loss": 0.101, "lr": 8.128592286552422e-06, "epoch": 7.128514056224899, "percentage": 35.64, "elapsed_time": "0:09:10", "remaining_time": "0:16:33", "throughput": 4023.38, "total_tokens": 2212960} |
| {"current_steps": 3555, "total_steps": 9960, "loss": 0.114, "lr": 8.12175293686372e-06, "epoch": 7.13855421686747, "percentage": 35.69, "elapsed_time": "0:09:10", "remaining_time": "0:16:32", "throughput": 4023.48, "total_tokens": 2216032} |
| {"current_steps": 3560, "total_steps": 9960, "loss": 0.1794, "lr": 8.1149040012401e-06, "epoch": 7.14859437751004, "percentage": 35.74, "elapsed_time": "0:09:11", "remaining_time": "0:16:31", "throughput": 4023.7, "total_tokens": 2218944} |
| {"current_steps": 3565, "total_steps": 9960, "loss": 0.1161, "lr": 8.108045500712518e-06, "epoch": 7.158634538152611, "percentage": 35.79, "elapsed_time": "0:09:12", "remaining_time": "0:16:30", "throughput": 4024.06, "total_tokens": 2222336} |
| {"current_steps": 3570, "total_steps": 9960, "loss": 0.1088, "lr": 8.101177456341301e-06, "epoch": 7.168674698795181, "percentage": 35.84, "elapsed_time": "0:09:12", "remaining_time": "0:16:29", "throughput": 4024.46, "total_tokens": 2225472} |
| {"current_steps": 3575, "total_steps": 9960, "loss": 0.1078, "lr": 8.094299889216081e-06, "epoch": 7.178714859437751, "percentage": 35.89, "elapsed_time": "0:09:13", "remaining_time": "0:16:28", "throughput": 4024.59, "total_tokens": 2228320} |
| {"current_steps": 3580, "total_steps": 9960, "loss": 0.1111, "lr": 8.087412820455738e-06, "epoch": 7.188755020080321, "percentage": 35.94, "elapsed_time": "0:09:14", "remaining_time": "0:16:28", "throughput": 4025.14, "total_tokens": 2231648} |
| {"current_steps": 3585, "total_steps": 9960, "loss": 0.0929, "lr": 8.080516271208319e-06, "epoch": 7.198795180722891, "percentage": 35.99, "elapsed_time": "0:09:15", "remaining_time": "0:16:27", "throughput": 4025.48, "total_tokens": 2234560} |
| {"current_steps": 3590, "total_steps": 9960, "loss": 0.1326, "lr": 8.07361026265099e-06, "epoch": 7.208835341365462, "percentage": 36.04, "elapsed_time": "0:09:15", "remaining_time": "0:16:26", "throughput": 4025.85, "total_tokens": 2237728} |
| {"current_steps": 3595, "total_steps": 9960, "loss": 0.0802, "lr": 8.066694815989961e-06, "epoch": 7.218875502008032, "percentage": 36.09, "elapsed_time": "0:09:16", "remaining_time": "0:16:25", "throughput": 4026.08, "total_tokens": 2240992} |
| {"current_steps": 3600, "total_steps": 9960, "loss": 0.1238, "lr": 8.059769952460423e-06, "epoch": 7.228915662650603, "percentage": 36.14, "elapsed_time": "0:09:17", "remaining_time": "0:16:24", "throughput": 4026.69, "total_tokens": 2244608} |
| {"current_steps": 3605, "total_steps": 9960, "loss": 0.1064, "lr": 8.052835693326484e-06, "epoch": 7.238955823293173, "percentage": 36.19, "elapsed_time": "0:09:18", "remaining_time": "0:16:23", "throughput": 4027.14, "total_tokens": 2247840} |
| {"current_steps": 3610, "total_steps": 9960, "loss": 0.2156, "lr": 8.045892059881101e-06, "epoch": 7.2489959839357425, "percentage": 36.24, "elapsed_time": "0:09:18", "remaining_time": "0:16:23", "throughput": 4027.43, "total_tokens": 2251104} |
| {"current_steps": 3615, "total_steps": 9960, "loss": 0.136, "lr": 8.038939073446022e-06, "epoch": 7.259036144578313, "percentage": 36.3, "elapsed_time": "0:09:19", "remaining_time": "0:16:22", "throughput": 4027.78, "total_tokens": 2254240} |
| {"current_steps": 3620, "total_steps": 9960, "loss": 0.119, "lr": 8.031976755371709e-06, "epoch": 7.269076305220883, "percentage": 36.35, "elapsed_time": "0:09:20", "remaining_time": "0:16:21", "throughput": 4027.91, "total_tokens": 2257472} |
| {"current_steps": 3625, "total_steps": 9960, "loss": 0.0584, "lr": 8.025005127037282e-06, "epoch": 7.279116465863454, "percentage": 36.4, "elapsed_time": "0:09:21", "remaining_time": "0:16:20", "throughput": 4028.19, "total_tokens": 2260640} |
| {"current_steps": 3630, "total_steps": 9960, "loss": 0.1104, "lr": 8.018024209850448e-06, "epoch": 7.289156626506024, "percentage": 36.45, "elapsed_time": "0:09:22", "remaining_time": "0:16:20", "throughput": 4028.82, "total_tokens": 2264544} |
| {"current_steps": 3635, "total_steps": 9960, "loss": 0.1133, "lr": 8.01103402524744e-06, "epoch": 7.2991967871485945, "percentage": 36.5, "elapsed_time": "0:09:22", "remaining_time": "0:16:19", "throughput": 4029.27, "total_tokens": 2268064} |
| {"current_steps": 3640, "total_steps": 9960, "loss": 0.098, "lr": 8.004034594692946e-06, "epoch": 7.309236947791165, "percentage": 36.55, "elapsed_time": "0:09:23", "remaining_time": "0:16:18", "throughput": 4029.53, "total_tokens": 2271136} |
| {"current_steps": 3645, "total_steps": 9960, "loss": 0.0922, "lr": 7.997025939680047e-06, "epoch": 7.3192771084337345, "percentage": 36.6, "elapsed_time": "0:09:24", "remaining_time": "0:16:17", "throughput": 4029.76, "total_tokens": 2274016} |
| {"current_steps": 3650, "total_steps": 9960, "loss": 0.1477, "lr": 7.990008081730145e-06, "epoch": 7.329317269076305, "percentage": 36.65, "elapsed_time": "0:09:25", "remaining_time": "0:16:16", "throughput": 4030.25, "total_tokens": 2277344} |
| {"current_steps": 3655, "total_steps": 9960, "loss": 0.0949, "lr": 7.982981042392907e-06, "epoch": 7.339357429718875, "percentage": 36.7, "elapsed_time": "0:09:25", "remaining_time": "0:16:16", "throughput": 4030.42, "total_tokens": 2280480} |
| {"current_steps": 3660, "total_steps": 9960, "loss": 0.1056, "lr": 7.975944843246195e-06, "epoch": 7.349397590361446, "percentage": 36.75, "elapsed_time": "0:09:26", "remaining_time": "0:16:15", "throughput": 4030.57, "total_tokens": 2283616} |
| {"current_steps": 3665, "total_steps": 9960, "loss": 0.0823, "lr": 7.968899505895987e-06, "epoch": 7.359437751004016, "percentage": 36.8, "elapsed_time": "0:09:27", "remaining_time": "0:16:14", "throughput": 4030.45, "total_tokens": 2285888} |
| {"current_steps": 3670, "total_steps": 9960, "loss": 0.0945, "lr": 7.961845051976334e-06, "epoch": 7.3694779116465865, "percentage": 36.85, "elapsed_time": "0:09:28", "remaining_time": "0:16:13", "throughput": 4031.1, "total_tokens": 2289920} |
| {"current_steps": 3675, "total_steps": 9960, "loss": 0.1121, "lr": 7.954781503149272e-06, "epoch": 7.379518072289157, "percentage": 36.9, "elapsed_time": "0:09:28", "remaining_time": "0:16:12", "throughput": 4031.35, "total_tokens": 2293152} |
| {"current_steps": 3680, "total_steps": 9960, "loss": 0.1065, "lr": 7.94770888110477e-06, "epoch": 7.389558232931727, "percentage": 36.95, "elapsed_time": "0:09:29", "remaining_time": "0:16:11", "throughput": 4031.46, "total_tokens": 2295680} |
| {"current_steps": 3685, "total_steps": 9960, "loss": 0.1099, "lr": 7.940627207560655e-06, "epoch": 7.399598393574297, "percentage": 37.0, "elapsed_time": "0:09:30", "remaining_time": "0:16:11", "throughput": 4031.82, "total_tokens": 2299264} |
| {"current_steps": 3690, "total_steps": 9960, "loss": 0.1326, "lr": 7.933536504262554e-06, "epoch": 7.409638554216867, "percentage": 37.05, "elapsed_time": "0:09:31", "remaining_time": "0:16:10", "throughput": 4032.2, "total_tokens": 2302528} |
| {"current_steps": 3695, "total_steps": 9960, "loss": 0.2, "lr": 7.926436792983813e-06, "epoch": 7.419678714859438, "percentage": 37.1, "elapsed_time": "0:09:31", "remaining_time": "0:16:09", "throughput": 4032.58, "total_tokens": 2305344} |
| {"current_steps": 3700, "total_steps": 9960, "loss": 0.1095, "lr": 7.919328095525446e-06, "epoch": 7.429718875502008, "percentage": 37.15, "elapsed_time": "0:09:32", "remaining_time": "0:16:08", "throughput": 4032.91, "total_tokens": 2308480} |
| {"current_steps": 3705, "total_steps": 9960, "loss": 0.0761, "lr": 7.912210433716054e-06, "epoch": 7.4397590361445785, "percentage": 37.2, "elapsed_time": "0:09:33", "remaining_time": "0:16:07", "throughput": 4033.27, "total_tokens": 2311712} |
| {"current_steps": 3710, "total_steps": 9960, "loss": 0.1312, "lr": 7.90508382941177e-06, "epoch": 7.449799196787149, "percentage": 37.25, "elapsed_time": "0:09:33", "remaining_time": "0:16:06", "throughput": 4033.7, "total_tokens": 2314816} |
| {"current_steps": 3715, "total_steps": 9960, "loss": 0.1492, "lr": 7.897948304496189e-06, "epoch": 7.459839357429719, "percentage": 37.3, "elapsed_time": "0:09:34", "remaining_time": "0:16:05", "throughput": 4033.52, "total_tokens": 2317088} |
| {"current_steps": 3720, "total_steps": 9960, "loss": 0.0939, "lr": 7.890803880880291e-06, "epoch": 7.469879518072289, "percentage": 37.35, "elapsed_time": "0:09:35", "remaining_time": "0:16:04", "throughput": 4033.94, "total_tokens": 2320192} |
| {"current_steps": 3725, "total_steps": 9960, "loss": 0.1041, "lr": 7.883650580502384e-06, "epoch": 7.479919678714859, "percentage": 37.4, "elapsed_time": "0:09:35", "remaining_time": "0:16:03", "throughput": 4034.17, "total_tokens": 2323328} |
| {"current_steps": 3730, "total_steps": 9960, "loss": 0.1267, "lr": 7.876488425328037e-06, "epoch": 7.48995983935743, "percentage": 37.45, "elapsed_time": "0:09:36", "remaining_time": "0:16:02", "throughput": 4034.09, "total_tokens": 2325760} |
| {"current_steps": 3735, "total_steps": 9960, "loss": 0.0458, "lr": 7.869317437350007e-06, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:09:37", "remaining_time": "0:16:02", "throughput": 4034.61, "total_tokens": 2329280} |
| {"current_steps": 3740, "total_steps": 9960, "loss": 0.1071, "lr": 7.862137638588171e-06, "epoch": 7.51004016064257, "percentage": 37.55, "elapsed_time": "0:09:38", "remaining_time": "0:16:01", "throughput": 4034.88, "total_tokens": 2332544} |
| {"current_steps": 3745, "total_steps": 9960, "loss": 0.1379, "lr": 7.854949051089467e-06, "epoch": 7.520080321285141, "percentage": 37.6, "elapsed_time": "0:09:38", "remaining_time": "0:16:00", "throughput": 4035.42, "total_tokens": 2336352} |
| {"current_steps": 3750, "total_steps": 9960, "loss": 0.0351, "lr": 7.847751696927813e-06, "epoch": 7.530120481927711, "percentage": 37.65, "elapsed_time": "0:09:39", "remaining_time": "0:16:00", "throughput": 4035.77, "total_tokens": 2339840} |
| {"current_steps": 3755, "total_steps": 9960, "loss": 0.1339, "lr": 7.840545598204056e-06, "epoch": 7.540160642570282, "percentage": 37.7, "elapsed_time": "0:09:40", "remaining_time": "0:15:59", "throughput": 4036.55, "total_tokens": 2343808} |
| {"current_steps": 3760, "total_steps": 9960, "loss": 0.1692, "lr": 7.833330777045886e-06, "epoch": 7.550200803212851, "percentage": 37.75, "elapsed_time": "0:09:41", "remaining_time": "0:15:58", "throughput": 4036.78, "total_tokens": 2346816} |
| {"current_steps": 3765, "total_steps": 9960, "loss": 0.1368, "lr": 7.826107255607784e-06, "epoch": 7.5602409638554215, "percentage": 37.8, "elapsed_time": "0:09:42", "remaining_time": "0:15:57", "throughput": 4037.17, "total_tokens": 2349888} |
| {"current_steps": 3770, "total_steps": 9960, "loss": 0.0795, "lr": 7.818875056070944e-06, "epoch": 7.570281124497992, "percentage": 37.85, "elapsed_time": "0:09:42", "remaining_time": "0:15:56", "throughput": 4037.42, "total_tokens": 2352960} |
| {"current_steps": 3775, "total_steps": 9960, "loss": 0.1426, "lr": 7.811634200643202e-06, "epoch": 7.580321285140562, "percentage": 37.9, "elapsed_time": "0:09:43", "remaining_time": "0:15:56", "throughput": 4037.66, "total_tokens": 2356672} |
| {"current_steps": 3780, "total_steps": 9960, "loss": 0.1388, "lr": 7.804384711558983e-06, "epoch": 7.590361445783133, "percentage": 37.95, "elapsed_time": "0:09:44", "remaining_time": "0:15:55", "throughput": 4038.04, "total_tokens": 2360032} |
| {"current_steps": 3785, "total_steps": 9960, "loss": 0.0769, "lr": 7.797126611079219e-06, "epoch": 7.600401606425703, "percentage": 38.0, "elapsed_time": "0:09:45", "remaining_time": "0:15:54", "throughput": 4038.64, "total_tokens": 2363456} |
| {"current_steps": 3790, "total_steps": 9960, "loss": 0.1245, "lr": 7.789859921491288e-06, "epoch": 7.610441767068274, "percentage": 38.05, "elapsed_time": "0:09:46", "remaining_time": "0:15:54", "throughput": 4039.05, "total_tokens": 2366912} |
| {"current_steps": 3795, "total_steps": 9960, "loss": 0.1209, "lr": 7.782584665108934e-06, "epoch": 7.620481927710843, "percentage": 38.1, "elapsed_time": "0:09:46", "remaining_time": "0:15:53", "throughput": 4039.62, "total_tokens": 2371008} |
| {"current_steps": 3800, "total_steps": 9960, "loss": 0.0855, "lr": 7.775300864272214e-06, "epoch": 7.6305220883534135, "percentage": 38.15, "elapsed_time": "0:09:47", "remaining_time": "0:15:52", "throughput": 4039.8, "total_tokens": 2374016} |
| {"current_steps": 3805, "total_steps": 9960, "loss": 0.166, "lr": 7.768008541347423e-06, "epoch": 7.640562248995984, "percentage": 38.2, "elapsed_time": "0:09:48", "remaining_time": "0:15:51", "throughput": 4040.05, "total_tokens": 2377408} |
| {"current_steps": 3810, "total_steps": 9960, "loss": 0.2516, "lr": 7.760707718727023e-06, "epoch": 7.650602409638554, "percentage": 38.25, "elapsed_time": "0:09:49", "remaining_time": "0:15:50", "throughput": 4039.8, "total_tokens": 2379680} |
| {"current_steps": 3815, "total_steps": 9960, "loss": 0.1049, "lr": 7.753398418829572e-06, "epoch": 7.660642570281125, "percentage": 38.3, "elapsed_time": "0:09:49", "remaining_time": "0:15:50", "throughput": 4040.19, "total_tokens": 2383200} |
| {"current_steps": 3820, "total_steps": 9960, "loss": 0.0645, "lr": 7.746080664099667e-06, "epoch": 7.670682730923695, "percentage": 38.35, "elapsed_time": "0:09:50", "remaining_time": "0:15:49", "throughput": 4040.24, "total_tokens": 2386048} |
| {"current_steps": 3825, "total_steps": 9960, "loss": 0.0484, "lr": 7.73875447700786e-06, "epoch": 7.6807228915662655, "percentage": 38.4, "elapsed_time": "0:09:51", "remaining_time": "0:15:48", "throughput": 4040.33, "total_tokens": 2389152} |
| {"current_steps": 3830, "total_steps": 9960, "loss": 0.198, "lr": 7.731419880050599e-06, "epoch": 7.690763052208835, "percentage": 38.45, "elapsed_time": "0:09:52", "remaining_time": "0:15:47", "throughput": 4040.47, "total_tokens": 2392064} |
| {"current_steps": 3835, "total_steps": 9960, "loss": 0.1013, "lr": 7.72407689575016e-06, "epoch": 7.7008032128514055, "percentage": 38.5, "elapsed_time": "0:09:52", "remaining_time": "0:15:46", "throughput": 4040.95, "total_tokens": 2395488} |
| {"current_steps": 3840, "total_steps": 9960, "loss": 0.1659, "lr": 7.716725546654564e-06, "epoch": 7.710843373493976, "percentage": 38.55, "elapsed_time": "0:09:53", "remaining_time": "0:15:45", "throughput": 4041.17, "total_tokens": 2398496} |
| {"current_steps": 3845, "total_steps": 9960, "loss": 0.0891, "lr": 7.709365855337528e-06, "epoch": 7.720883534136546, "percentage": 38.6, "elapsed_time": "0:09:54", "remaining_time": "0:15:45", "throughput": 4041.49, "total_tokens": 2401728} |
| {"current_steps": 3850, "total_steps": 9960, "loss": 0.1085, "lr": 7.701997844398379e-06, "epoch": 7.730923694779117, "percentage": 38.65, "elapsed_time": "0:09:54", "remaining_time": "0:15:44", "throughput": 4041.59, "total_tokens": 2404320} |
| {"current_steps": 3855, "total_steps": 9960, "loss": 0.1266, "lr": 7.694621536461995e-06, "epoch": 7.740963855421687, "percentage": 38.7, "elapsed_time": "0:09:55", "remaining_time": "0:15:43", "throughput": 4041.96, "total_tokens": 2407424} |
| {"current_steps": 3860, "total_steps": 9960, "loss": 0.0699, "lr": 7.687236954178729e-06, "epoch": 7.7510040160642575, "percentage": 38.76, "elapsed_time": "0:09:56", "remaining_time": "0:15:42", "throughput": 4042.52, "total_tokens": 2411136} |
| {"current_steps": 3865, "total_steps": 9960, "loss": 0.099, "lr": 7.67984412022434e-06, "epoch": 7.761044176706827, "percentage": 38.81, "elapsed_time": "0:09:57", "remaining_time": "0:15:41", "throughput": 4042.91, "total_tokens": 2414080} |
| {"current_steps": 3870, "total_steps": 9960, "loss": 0.1714, "lr": 7.672443057299931e-06, "epoch": 7.771084337349397, "percentage": 38.86, "elapsed_time": "0:09:57", "remaining_time": "0:15:40", "throughput": 4043.02, "total_tokens": 2416832} |
| {"current_steps": 3875, "total_steps": 9960, "loss": 0.0734, "lr": 7.665033788131869e-06, "epoch": 7.781124497991968, "percentage": 38.91, "elapsed_time": "0:09:58", "remaining_time": "0:15:39", "throughput": 4043.3, "total_tokens": 2419680} |
| {"current_steps": 3880, "total_steps": 9960, "loss": 0.1235, "lr": 7.657616335471723e-06, "epoch": 7.791164658634538, "percentage": 38.96, "elapsed_time": "0:09:59", "remaining_time": "0:15:38", "throughput": 4043.47, "total_tokens": 2422848} |
| {"current_steps": 3885, "total_steps": 9960, "loss": 0.1255, "lr": 7.650190722096188e-06, "epoch": 7.801204819277109, "percentage": 39.01, "elapsed_time": "0:10:00", "remaining_time": "0:15:38", "throughput": 4043.9, "total_tokens": 2426816} |
| {"current_steps": 3890, "total_steps": 9960, "loss": 0.1027, "lr": 7.64275697080702e-06, "epoch": 7.811244979919679, "percentage": 39.06, "elapsed_time": "0:10:00", "remaining_time": "0:15:37", "throughput": 4044.22, "total_tokens": 2429440} |
| {"current_steps": 3895, "total_steps": 9960, "loss": 0.0784, "lr": 7.635315104430959e-06, "epoch": 7.821285140562249, "percentage": 39.11, "elapsed_time": "0:10:01", "remaining_time": "0:15:36", "throughput": 4044.38, "total_tokens": 2432064} |
| {"current_steps": 3900, "total_steps": 9960, "loss": 0.0596, "lr": 7.6278651458196724e-06, "epoch": 7.831325301204819, "percentage": 39.16, "elapsed_time": "0:10:02", "remaining_time": "0:15:35", "throughput": 4044.64, "total_tokens": 2435328} |
| {"current_steps": 3905, "total_steps": 9960, "loss": 0.1102, "lr": 7.620407117849674e-06, "epoch": 7.841365461847389, "percentage": 39.21, "elapsed_time": "0:10:02", "remaining_time": "0:15:34", "throughput": 4044.88, "total_tokens": 2438240} |
| {"current_steps": 3910, "total_steps": 9960, "loss": 0.036, "lr": 7.6129410434222505e-06, "epoch": 7.85140562248996, "percentage": 39.26, "elapsed_time": "0:10:03", "remaining_time": "0:15:33", "throughput": 4045.14, "total_tokens": 2441312} |
| {"current_steps": 3915, "total_steps": 9960, "loss": 0.0824, "lr": 7.6054669454634025e-06, "epoch": 7.86144578313253, "percentage": 39.31, "elapsed_time": "0:10:04", "remaining_time": "0:15:32", "throughput": 4045.16, "total_tokens": 2444288} |
| {"current_steps": 3920, "total_steps": 9960, "loss": 0.0896, "lr": 7.597984846923765e-06, "epoch": 7.871485943775101, "percentage": 39.36, "elapsed_time": "0:10:04", "remaining_time": "0:15:32", "throughput": 4045.55, "total_tokens": 2447360} |
| {"current_steps": 3925, "total_steps": 9960, "loss": 0.1538, "lr": 7.5904947707785434e-06, "epoch": 7.881526104417671, "percentage": 39.41, "elapsed_time": "0:10:05", "remaining_time": "0:15:31", "throughput": 4045.57, "total_tokens": 2449920} |
| {"current_steps": 3930, "total_steps": 9960, "loss": 0.2447, "lr": 7.582996740027438e-06, "epoch": 7.891566265060241, "percentage": 39.46, "elapsed_time": "0:10:06", "remaining_time": "0:15:30", "throughput": 4045.9, "total_tokens": 2453120} |
| {"current_steps": 3935, "total_steps": 9960, "loss": 0.1763, "lr": 7.575490777694572e-06, "epoch": 7.901606425702811, "percentage": 39.51, "elapsed_time": "0:10:07", "remaining_time": "0:15:29", "throughput": 4046.13, "total_tokens": 2456512} |
| {"current_steps": 3940, "total_steps": 9960, "loss": 0.1581, "lr": 7.567976906828431e-06, "epoch": 7.911646586345381, "percentage": 39.56, "elapsed_time": "0:10:07", "remaining_time": "0:15:28", "throughput": 4046.39, "total_tokens": 2459488} |
| {"current_steps": 3945, "total_steps": 9960, "loss": 0.1783, "lr": 7.560455150501781e-06, "epoch": 7.921686746987952, "percentage": 39.61, "elapsed_time": "0:10:08", "remaining_time": "0:15:27", "throughput": 4046.75, "total_tokens": 2462880} |
| {"current_steps": 3950, "total_steps": 9960, "loss": 0.1394, "lr": 7.552925531811601e-06, "epoch": 7.931726907630522, "percentage": 39.66, "elapsed_time": "0:10:09", "remaining_time": "0:15:27", "throughput": 4047.21, "total_tokens": 2466432} |
| {"current_steps": 3955, "total_steps": 9960, "loss": 0.125, "lr": 7.545388073879018e-06, "epoch": 7.9417670682730925, "percentage": 39.71, "elapsed_time": "0:10:10", "remaining_time": "0:15:26", "throughput": 4047.68, "total_tokens": 2470048} |
| {"current_steps": 3960, "total_steps": 9960, "loss": 0.1295, "lr": 7.537842799849223e-06, "epoch": 7.951807228915663, "percentage": 39.76, "elapsed_time": "0:10:11", "remaining_time": "0:15:25", "throughput": 4047.87, "total_tokens": 2473344} |
| {"current_steps": 3965, "total_steps": 9960, "loss": 0.09, "lr": 7.530289732891415e-06, "epoch": 7.961847389558233, "percentage": 39.81, "elapsed_time": "0:10:11", "remaining_time": "0:15:24", "throughput": 4047.77, "total_tokens": 2475904} |
| {"current_steps": 3970, "total_steps": 9960, "loss": 0.1257, "lr": 7.522728896198718e-06, "epoch": 7.971887550200803, "percentage": 39.86, "elapsed_time": "0:10:12", "remaining_time": "0:15:24", "throughput": 4048.31, "total_tokens": 2479584} |
| {"current_steps": 3975, "total_steps": 9960, "loss": 0.0629, "lr": 7.515160312988117e-06, "epoch": 7.981927710843373, "percentage": 39.91, "elapsed_time": "0:10:13", "remaining_time": "0:15:23", "throughput": 4048.4, "total_tokens": 2482208} |
| {"current_steps": 3980, "total_steps": 9960, "loss": 0.0842, "lr": 7.507584006500381e-06, "epoch": 7.991967871485944, "percentage": 39.96, "elapsed_time": "0:10:13", "remaining_time": "0:15:22", "throughput": 4048.75, "total_tokens": 2485760} |
| {"current_steps": 3984, "total_steps": 9960, "eval_loss": 0.1914680153131485, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:10:22", "remaining_time": "0:15:33", "throughput": 3996.07, "total_tokens": 2487712} |
| {"current_steps": 3985, "total_steps": 9960, "loss": 0.128, "lr": 7.500000000000001e-06, "epoch": 8.002008032128513, "percentage": 40.01, "elapsed_time": "0:10:23", "remaining_time": "0:15:35", "throughput": 3988.53, "total_tokens": 2488608} |
| {"current_steps": 3990, "total_steps": 9960, "loss": 0.1054, "lr": 7.492408316775105e-06, "epoch": 8.012048192771084, "percentage": 40.06, "elapsed_time": "0:10:24", "remaining_time": "0:15:34", "throughput": 3988.03, "total_tokens": 2491424} |
| {"current_steps": 3995, "total_steps": 9960, "loss": 0.1303, "lr": 7.4848089801374005e-06, "epoch": 8.022088353413654, "percentage": 40.11, "elapsed_time": "0:10:25", "remaining_time": "0:15:34", "throughput": 3988.52, "total_tokens": 2495136} |
| {"current_steps": 4000, "total_steps": 9960, "loss": 0.0562, "lr": 7.47720201342209e-06, "epoch": 8.032128514056225, "percentage": 40.16, "elapsed_time": "0:10:26", "remaining_time": "0:15:33", "throughput": 3988.44, "total_tokens": 2497504} |
| {"current_steps": 4005, "total_steps": 9960, "loss": 0.0594, "lr": 7.469587439987811e-06, "epoch": 8.042168674698795, "percentage": 40.21, "elapsed_time": "0:10:26", "remaining_time": "0:15:32", "throughput": 3989.05, "total_tokens": 2500928} |
| {"current_steps": 4010, "total_steps": 9960, "loss": 0.0109, "lr": 7.461965283216557e-06, "epoch": 8.052208835341366, "percentage": 40.26, "elapsed_time": "0:10:27", "remaining_time": "0:15:31", "throughput": 3989.42, "total_tokens": 2504288} |
| {"current_steps": 4015, "total_steps": 9960, "loss": 0.1269, "lr": 7.454335566513603e-06, "epoch": 8.062248995983936, "percentage": 40.31, "elapsed_time": "0:10:28", "remaining_time": "0:15:30", "throughput": 3989.5, "total_tokens": 2507072} |
| {"current_steps": 4020, "total_steps": 9960, "loss": 0.0798, "lr": 7.446698313307445e-06, "epoch": 8.072289156626505, "percentage": 40.36, "elapsed_time": "0:10:29", "remaining_time": "0:15:29", "throughput": 3989.82, "total_tokens": 2510176} |
| {"current_steps": 4025, "total_steps": 9960, "loss": 0.0449, "lr": 7.43905354704972e-06, "epoch": 8.082329317269076, "percentage": 40.41, "elapsed_time": "0:10:29", "remaining_time": "0:15:28", "throughput": 3989.84, "total_tokens": 2512576} |
| {"current_steps": 4030, "total_steps": 9960, "loss": 0.1062, "lr": 7.431401291215131e-06, "epoch": 8.092369477911646, "percentage": 40.46, "elapsed_time": "0:10:30", "remaining_time": "0:15:27", "throughput": 3990.13, "total_tokens": 2515744} |
| {"current_steps": 4035, "total_steps": 9960, "loss": 0.1092, "lr": 7.4237415693013846e-06, "epoch": 8.102409638554217, "percentage": 40.51, "elapsed_time": "0:10:31", "remaining_time": "0:15:26", "throughput": 3990.42, "total_tokens": 2519136} |
| {"current_steps": 4040, "total_steps": 9960, "loss": 0.1781, "lr": 7.416074404829108e-06, "epoch": 8.112449799196787, "percentage": 40.56, "elapsed_time": "0:10:32", "remaining_time": "0:15:26", "throughput": 3990.87, "total_tokens": 2522432} |
| {"current_steps": 4045, "total_steps": 9960, "loss": 0.0839, "lr": 7.408399821341787e-06, "epoch": 8.122489959839358, "percentage": 40.61, "elapsed_time": "0:10:32", "remaining_time": "0:15:25", "throughput": 3991.3, "total_tokens": 2525856} |
| {"current_steps": 4050, "total_steps": 9960, "loss": 0.0912, "lr": 7.400717842405688e-06, "epoch": 8.132530120481928, "percentage": 40.66, "elapsed_time": "0:10:33", "remaining_time": "0:15:24", "throughput": 3991.58, "total_tokens": 2528736} |
| {"current_steps": 4055, "total_steps": 9960, "loss": 0.1502, "lr": 7.393028491609782e-06, "epoch": 8.142570281124499, "percentage": 40.71, "elapsed_time": "0:10:34", "remaining_time": "0:15:23", "throughput": 3992.17, "total_tokens": 2532448} |
| {"current_steps": 4060, "total_steps": 9960, "loss": 0.1389, "lr": 7.385331792565682e-06, "epoch": 8.152610441767068, "percentage": 40.76, "elapsed_time": "0:10:35", "remaining_time": "0:15:23", "throughput": 3992.53, "total_tokens": 2535872} |
| {"current_steps": 4065, "total_steps": 9960, "loss": 0.1405, "lr": 7.377627768907563e-06, "epoch": 8.162650602409638, "percentage": 40.81, "elapsed_time": "0:10:35", "remaining_time": "0:15:22", "throughput": 3992.87, "total_tokens": 2538880} |
| {"current_steps": 4070, "total_steps": 9960, "loss": 0.0228, "lr": 7.369916444292092e-06, "epoch": 8.17269076305221, "percentage": 40.86, "elapsed_time": "0:10:36", "remaining_time": "0:15:21", "throughput": 3993.05, "total_tokens": 2541888} |
| {"current_steps": 4075, "total_steps": 9960, "loss": 0.0577, "lr": 7.362197842398355e-06, "epoch": 8.182730923694779, "percentage": 40.91, "elapsed_time": "0:10:37", "remaining_time": "0:15:20", "throughput": 3993.5, "total_tokens": 2545216} |
| {"current_steps": 4080, "total_steps": 9960, "loss": 0.107, "lr": 7.354471986927785e-06, "epoch": 8.19277108433735, "percentage": 40.96, "elapsed_time": "0:10:38", "remaining_time": "0:15:19", "throughput": 3994.06, "total_tokens": 2548768} |
| {"current_steps": 4085, "total_steps": 9960, "loss": 0.0757, "lr": 7.346738901604086e-06, "epoch": 8.20281124497992, "percentage": 41.01, "elapsed_time": "0:10:38", "remaining_time": "0:15:18", "throughput": 3994.25, "total_tokens": 2551776} |
| {"current_steps": 4090, "total_steps": 9960, "loss": 0.1362, "lr": 7.338998610173166e-06, "epoch": 8.21285140562249, "percentage": 41.06, "elapsed_time": "0:10:39", "remaining_time": "0:15:18", "throughput": 3994.75, "total_tokens": 2555872} |
| {"current_steps": 4095, "total_steps": 9960, "loss": 0.1629, "lr": 7.331251136403057e-06, "epoch": 8.22289156626506, "percentage": 41.11, "elapsed_time": "0:10:40", "remaining_time": "0:15:17", "throughput": 3995.19, "total_tokens": 2559040} |
| {"current_steps": 4100, "total_steps": 9960, "loss": 0.1447, "lr": 7.323496504083849e-06, "epoch": 8.23293172690763, "percentage": 41.16, "elapsed_time": "0:10:41", "remaining_time": "0:15:16", "throughput": 3995.73, "total_tokens": 2562560} |
| {"current_steps": 4105, "total_steps": 9960, "loss": 0.11, "lr": 7.315734737027612e-06, "epoch": 8.242971887550201, "percentage": 41.21, "elapsed_time": "0:10:42", "remaining_time": "0:15:15", "throughput": 3995.96, "total_tokens": 2565504} |
| {"current_steps": 4110, "total_steps": 9960, "loss": 0.0784, "lr": 7.307965859068324e-06, "epoch": 8.25301204819277, "percentage": 41.27, "elapsed_time": "0:10:42", "remaining_time": "0:15:14", "throughput": 3996.02, "total_tokens": 2568256} |
| {"current_steps": 4115, "total_steps": 9960, "loss": 0.085, "lr": 7.300189894061802e-06, "epoch": 8.263052208835342, "percentage": 41.32, "elapsed_time": "0:10:43", "remaining_time": "0:15:13", "throughput": 3996.64, "total_tokens": 2571648} |
| {"current_steps": 4120, "total_steps": 9960, "loss": 0.0661, "lr": 7.292406865885619e-06, "epoch": 8.273092369477911, "percentage": 41.37, "elapsed_time": "0:10:44", "remaining_time": "0:15:13", "throughput": 3997.09, "total_tokens": 2575104} |
| {"current_steps": 4125, "total_steps": 9960, "loss": 0.1056, "lr": 7.284616798439045e-06, "epoch": 8.283132530120483, "percentage": 41.42, "elapsed_time": "0:10:44", "remaining_time": "0:15:12", "throughput": 3997.56, "total_tokens": 2578400} |
| {"current_steps": 4130, "total_steps": 9960, "loss": 0.1329, "lr": 7.2768197156429564e-06, "epoch": 8.293172690763052, "percentage": 41.47, "elapsed_time": "0:10:45", "remaining_time": "0:15:11", "throughput": 3997.82, "total_tokens": 2581376} |
| {"current_steps": 4135, "total_steps": 9960, "loss": 0.0798, "lr": 7.2690156414397775e-06, "epoch": 8.303212851405622, "percentage": 41.52, "elapsed_time": "0:10:46", "remaining_time": "0:15:10", "throughput": 3998.12, "total_tokens": 2584192} |
| {"current_steps": 4140, "total_steps": 9960, "loss": 0.1572, "lr": 7.261204599793399e-06, "epoch": 8.313253012048193, "percentage": 41.57, "elapsed_time": "0:10:47", "remaining_time": "0:15:09", "throughput": 3998.4, "total_tokens": 2587040} |
| {"current_steps": 4145, "total_steps": 9960, "loss": 0.0726, "lr": 7.2533866146891085e-06, "epoch": 8.323293172690763, "percentage": 41.62, "elapsed_time": "0:10:47", "remaining_time": "0:15:08", "throughput": 3998.52, "total_tokens": 2590112} |
| {"current_steps": 4150, "total_steps": 9960, "loss": 0.1047, "lr": 7.245561710133511e-06, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:10:48", "remaining_time": "0:15:07", "throughput": 3998.42, "total_tokens": 2592640} |
| {"current_steps": 4155, "total_steps": 9960, "loss": 0.0888, "lr": 7.23772991015446e-06, "epoch": 8.343373493975903, "percentage": 41.72, "elapsed_time": "0:10:49", "remaining_time": "0:15:06", "throughput": 3998.66, "total_tokens": 2595616} |
| {"current_steps": 4160, "total_steps": 9960, "loss": 0.182, "lr": 7.229891238800988e-06, "epoch": 8.353413654618475, "percentage": 41.77, "elapsed_time": "0:10:50", "remaining_time": "0:15:06", "throughput": 3999.39, "total_tokens": 2599936} |
| {"current_steps": 4165, "total_steps": 9960, "loss": 0.1266, "lr": 7.22204572014322e-06, "epoch": 8.363453815261044, "percentage": 41.82, "elapsed_time": "0:10:50", "remaining_time": "0:15:05", "throughput": 3999.49, "total_tokens": 2602912} |
| {"current_steps": 4170, "total_steps": 9960, "loss": 0.0899, "lr": 7.214193378272312e-06, "epoch": 8.373493975903614, "percentage": 41.87, "elapsed_time": "0:10:51", "remaining_time": "0:15:04", "throughput": 3999.63, "total_tokens": 2605536} |
| {"current_steps": 4175, "total_steps": 9960, "loss": 0.1795, "lr": 7.2063342373003676e-06, "epoch": 8.383534136546185, "percentage": 41.92, "elapsed_time": "0:10:52", "remaining_time": "0:15:03", "throughput": 3999.56, "total_tokens": 2607936} |
| {"current_steps": 4180, "total_steps": 9960, "loss": 0.0467, "lr": 7.198468321360376e-06, "epoch": 8.393574297188755, "percentage": 41.97, "elapsed_time": "0:10:52", "remaining_time": "0:15:02", "throughput": 3999.56, "total_tokens": 2610656} |
| {"current_steps": 4185, "total_steps": 9960, "loss": 0.1586, "lr": 7.190595654606118e-06, "epoch": 8.403614457831326, "percentage": 42.02, "elapsed_time": "0:10:53", "remaining_time": "0:15:01", "throughput": 3999.89, "total_tokens": 2613952} |
| {"current_steps": 4190, "total_steps": 9960, "loss": 0.1357, "lr": 7.182716261212116e-06, "epoch": 8.413654618473895, "percentage": 42.07, "elapsed_time": "0:10:54", "remaining_time": "0:15:00", "throughput": 3999.91, "total_tokens": 2616864} |
| {"current_steps": 4195, "total_steps": 9960, "loss": 0.1129, "lr": 7.174830165373542e-06, "epoch": 8.423694779116467, "percentage": 42.12, "elapsed_time": "0:10:55", "remaining_time": "0:15:00", "throughput": 4000.29, "total_tokens": 2620480} |
| {"current_steps": 4200, "total_steps": 9960, "loss": 0.081, "lr": 7.1669373913061505e-06, "epoch": 8.433734939759036, "percentage": 42.17, "elapsed_time": "0:10:55", "remaining_time": "0:14:59", "throughput": 4000.49, "total_tokens": 2623392} |
| {"current_steps": 4205, "total_steps": 9960, "loss": 0.0565, "lr": 7.1590379632462004e-06, "epoch": 8.443775100401606, "percentage": 42.22, "elapsed_time": "0:10:56", "remaining_time": "0:14:58", "throughput": 4000.76, "total_tokens": 2626432} |
| {"current_steps": 4210, "total_steps": 9960, "loss": 0.0768, "lr": 7.151131905450386e-06, "epoch": 8.453815261044177, "percentage": 42.27, "elapsed_time": "0:10:57", "remaining_time": "0:14:57", "throughput": 4001.15, "total_tokens": 2629888} |
| {"current_steps": 4215, "total_steps": 9960, "loss": 0.1255, "lr": 7.14321924219576e-06, "epoch": 8.463855421686747, "percentage": 42.32, "elapsed_time": "0:10:57", "remaining_time": "0:14:56", "throughput": 4001.44, "total_tokens": 2632864} |
| {"current_steps": 4220, "total_steps": 9960, "loss": 0.1375, "lr": 7.1352999977796565e-06, "epoch": 8.473895582329318, "percentage": 42.37, "elapsed_time": "0:10:58", "remaining_time": "0:14:56", "throughput": 4001.59, "total_tokens": 2636096} |
| {"current_steps": 4225, "total_steps": 9960, "loss": 0.0704, "lr": 7.127374196519616e-06, "epoch": 8.483935742971887, "percentage": 42.42, "elapsed_time": "0:10:59", "remaining_time": "0:14:54", "throughput": 4001.6, "total_tokens": 2638368} |
| {"current_steps": 4230, "total_steps": 9960, "loss": 0.059, "lr": 7.119441862753316e-06, "epoch": 8.493975903614459, "percentage": 42.47, "elapsed_time": "0:11:00", "remaining_time": "0:14:54", "throughput": 4002.25, "total_tokens": 2642080} |
| {"current_steps": 4235, "total_steps": 9960, "loss": 0.1087, "lr": 7.111503020838495e-06, "epoch": 8.504016064257028, "percentage": 42.52, "elapsed_time": "0:11:00", "remaining_time": "0:14:53", "throughput": 4002.32, "total_tokens": 2644640} |
| {"current_steps": 4240, "total_steps": 9960, "loss": 0.0577, "lr": 7.103557695152874e-06, "epoch": 8.514056224899598, "percentage": 42.57, "elapsed_time": "0:11:01", "remaining_time": "0:14:52", "throughput": 4002.55, "total_tokens": 2647616} |
| {"current_steps": 4245, "total_steps": 9960, "loss": 0.0759, "lr": 7.095605910094081e-06, "epoch": 8.524096385542169, "percentage": 42.62, "elapsed_time": "0:11:02", "remaining_time": "0:14:51", "throughput": 4002.61, "total_tokens": 2650560} |
| {"current_steps": 4250, "total_steps": 9960, "loss": 0.2256, "lr": 7.087647690079584e-06, "epoch": 8.534136546184738, "percentage": 42.67, "elapsed_time": "0:11:02", "remaining_time": "0:14:50", "throughput": 4002.81, "total_tokens": 2653216} |
| {"current_steps": 4255, "total_steps": 9960, "loss": 0.0457, "lr": 7.079683059546607e-06, "epoch": 8.54417670682731, "percentage": 42.72, "elapsed_time": "0:11:03", "remaining_time": "0:14:49", "throughput": 4002.95, "total_tokens": 2656256} |
| {"current_steps": 4260, "total_steps": 9960, "loss": 0.0834, "lr": 7.071712042952061e-06, "epoch": 8.55421686746988, "percentage": 42.77, "elapsed_time": "0:11:04", "remaining_time": "0:14:48", "throughput": 4003.09, "total_tokens": 2659040} |
| {"current_steps": 4265, "total_steps": 9960, "loss": 0.1075, "lr": 7.063734664772461e-06, "epoch": 8.56425702811245, "percentage": 42.82, "elapsed_time": "0:11:04", "remaining_time": "0:14:47", "throughput": 4003.17, "total_tokens": 2661824} |
| {"current_steps": 4270, "total_steps": 9960, "loss": 0.0369, "lr": 7.055750949503867e-06, "epoch": 8.57429718875502, "percentage": 42.87, "elapsed_time": "0:11:05", "remaining_time": "0:14:46", "throughput": 4003.4, "total_tokens": 2664576} |
| {"current_steps": 4275, "total_steps": 9960, "loss": 0.0959, "lr": 7.047760921661788e-06, "epoch": 8.58433734939759, "percentage": 42.92, "elapsed_time": "0:11:06", "remaining_time": "0:14:46", "throughput": 4003.72, "total_tokens": 2667712} |
| {"current_steps": 4280, "total_steps": 9960, "loss": 0.1003, "lr": 7.039764605781121e-06, "epoch": 8.594377510040161, "percentage": 42.97, "elapsed_time": "0:11:07", "remaining_time": "0:14:45", "throughput": 4003.97, "total_tokens": 2670944} |
| {"current_steps": 4285, "total_steps": 9960, "loss": 0.0871, "lr": 7.031762026416074e-06, "epoch": 8.60441767068273, "percentage": 43.02, "elapsed_time": "0:11:07", "remaining_time": "0:14:44", "throughput": 4004.23, "total_tokens": 2674336} |
| {"current_steps": 4290, "total_steps": 9960, "loss": 0.0914, "lr": 7.023753208140084e-06, "epoch": 8.614457831325302, "percentage": 43.07, "elapsed_time": "0:11:08", "remaining_time": "0:14:43", "throughput": 4004.6, "total_tokens": 2677824} |
| {"current_steps": 4295, "total_steps": 9960, "loss": 0.068, "lr": 7.01573817554575e-06, "epoch": 8.624497991967871, "percentage": 43.12, "elapsed_time": "0:11:09", "remaining_time": "0:14:42", "throughput": 4004.78, "total_tokens": 2680544} |
| {"current_steps": 4300, "total_steps": 9960, "loss": 0.022, "lr": 7.0077169532447474e-06, "epoch": 8.634538152610443, "percentage": 43.17, "elapsed_time": "0:11:10", "remaining_time": "0:14:42", "throughput": 4005.22, "total_tokens": 2683904} |
| {"current_steps": 4305, "total_steps": 9960, "loss": 0.0531, "lr": 6.999689565867764e-06, "epoch": 8.644578313253012, "percentage": 43.22, "elapsed_time": "0:11:10", "remaining_time": "0:14:41", "throughput": 4005.46, "total_tokens": 2687200} |
| {"current_steps": 4310, "total_steps": 9960, "loss": 0.1906, "lr": 6.991656038064416e-06, "epoch": 8.654618473895582, "percentage": 43.27, "elapsed_time": "0:11:11", "remaining_time": "0:14:40", "throughput": 4005.84, "total_tokens": 2690560} |
| {"current_steps": 4315, "total_steps": 9960, "loss": 0.0982, "lr": 6.983616394503177e-06, "epoch": 8.664658634538153, "percentage": 43.32, "elapsed_time": "0:11:12", "remaining_time": "0:14:39", "throughput": 4006.3, "total_tokens": 2693728} |
| {"current_steps": 4320, "total_steps": 9960, "loss": 0.1921, "lr": 6.975570659871295e-06, "epoch": 8.674698795180722, "percentage": 43.37, "elapsed_time": "0:11:13", "remaining_time": "0:14:38", "throughput": 4006.68, "total_tokens": 2697312} |
| {"current_steps": 4325, "total_steps": 9960, "loss": 0.0666, "lr": 6.967518858874727e-06, "epoch": 8.684738955823294, "percentage": 43.42, "elapsed_time": "0:11:13", "remaining_time": "0:14:38", "throughput": 4006.9, "total_tokens": 2700480} |
| {"current_steps": 4330, "total_steps": 9960, "loss": 0.1458, "lr": 6.959461016238056e-06, "epoch": 8.694779116465863, "percentage": 43.47, "elapsed_time": "0:11:14", "remaining_time": "0:14:37", "throughput": 4007.21, "total_tokens": 2703520} |
| {"current_steps": 4335, "total_steps": 9960, "loss": 0.0863, "lr": 6.951397156704418e-06, "epoch": 8.704819277108435, "percentage": 43.52, "elapsed_time": "0:11:15", "remaining_time": "0:14:36", "throughput": 4007.53, "total_tokens": 2706688} |
| {"current_steps": 4340, "total_steps": 9960, "loss": 0.1132, "lr": 6.943327305035424e-06, "epoch": 8.714859437751004, "percentage": 43.57, "elapsed_time": "0:11:16", "remaining_time": "0:14:35", "throughput": 4007.69, "total_tokens": 2709440} |
| {"current_steps": 4345, "total_steps": 9960, "loss": 0.0675, "lr": 6.9352514860110876e-06, "epoch": 8.724899598393574, "percentage": 43.62, "elapsed_time": "0:11:16", "remaining_time": "0:14:34", "throughput": 4007.98, "total_tokens": 2712512} |
| {"current_steps": 4350, "total_steps": 9960, "loss": 0.0307, "lr": 6.927169724429737e-06, "epoch": 8.734939759036145, "percentage": 43.67, "elapsed_time": "0:11:17", "remaining_time": "0:14:33", "throughput": 4008.04, "total_tokens": 2715296} |
| {"current_steps": 4355, "total_steps": 9960, "loss": 0.0683, "lr": 6.919082045107963e-06, "epoch": 8.744979919678714, "percentage": 43.72, "elapsed_time": "0:11:18", "remaining_time": "0:14:32", "throughput": 4008.42, "total_tokens": 2718720} |
| {"current_steps": 4360, "total_steps": 9960, "loss": 0.0932, "lr": 6.910988472880515e-06, "epoch": 8.755020080321286, "percentage": 43.78, "elapsed_time": "0:11:18", "remaining_time": "0:14:31", "throughput": 4008.67, "total_tokens": 2721536} |
| {"current_steps": 4365, "total_steps": 9960, "loss": 0.0802, "lr": 6.902889032600245e-06, "epoch": 8.765060240963855, "percentage": 43.83, "elapsed_time": "0:11:19", "remaining_time": "0:14:31", "throughput": 4009.14, "total_tokens": 2725024} |
| {"current_steps": 4370, "total_steps": 9960, "loss": 0.1271, "lr": 6.894783749138021e-06, "epoch": 8.775100401606426, "percentage": 43.88, "elapsed_time": "0:11:20", "remaining_time": "0:14:30", "throughput": 4009.52, "total_tokens": 2728288} |
| {"current_steps": 4375, "total_steps": 9960, "loss": 0.1137, "lr": 6.886672647382653e-06, "epoch": 8.785140562248996, "percentage": 43.93, "elapsed_time": "0:11:21", "remaining_time": "0:14:29", "throughput": 4009.59, "total_tokens": 2731424} |
| {"current_steps": 4380, "total_steps": 9960, "loss": 0.1214, "lr": 6.878555752240821e-06, "epoch": 8.795180722891565, "percentage": 43.98, "elapsed_time": "0:11:22", "remaining_time": "0:14:28", "throughput": 4009.96, "total_tokens": 2735008} |
| {"current_steps": 4385, "total_steps": 9960, "loss": 0.0553, "lr": 6.870433088636992e-06, "epoch": 8.805220883534137, "percentage": 44.03, "elapsed_time": "0:11:22", "remaining_time": "0:14:27", "throughput": 4010.1, "total_tokens": 2737728} |
| {"current_steps": 4390, "total_steps": 9960, "loss": 0.2023, "lr": 6.862304681513344e-06, "epoch": 8.815261044176706, "percentage": 44.08, "elapsed_time": "0:11:23", "remaining_time": "0:14:27", "throughput": 4010.5, "total_tokens": 2741120} |
| {"current_steps": 4395, "total_steps": 9960, "loss": 0.1059, "lr": 6.8541705558296954e-06, "epoch": 8.825301204819278, "percentage": 44.13, "elapsed_time": "0:11:24", "remaining_time": "0:14:26", "throughput": 4010.86, "total_tokens": 2744384} |
| {"current_steps": 4400, "total_steps": 9960, "loss": 0.0959, "lr": 6.8460307365634225e-06, "epoch": 8.835341365461847, "percentage": 44.18, "elapsed_time": "0:11:24", "remaining_time": "0:14:25", "throughput": 4010.89, "total_tokens": 2747296} |
| {"current_steps": 4405, "total_steps": 9960, "loss": 0.0289, "lr": 6.837885248709386e-06, "epoch": 8.845381526104418, "percentage": 44.23, "elapsed_time": "0:11:25", "remaining_time": "0:14:24", "throughput": 4011.46, "total_tokens": 2750880} |
| {"current_steps": 4410, "total_steps": 9960, "loss": 0.121, "lr": 6.829734117279853e-06, "epoch": 8.855421686746988, "percentage": 44.28, "elapsed_time": "0:11:26", "remaining_time": "0:14:23", "throughput": 4011.7, "total_tokens": 2753696} |
| {"current_steps": 4415, "total_steps": 9960, "loss": 0.1068, "lr": 6.8215773673044175e-06, "epoch": 8.865461847389557, "percentage": 44.33, "elapsed_time": "0:11:27", "remaining_time": "0:14:22", "throughput": 4011.91, "total_tokens": 2756544} |
| {"current_steps": 4420, "total_steps": 9960, "loss": 0.1459, "lr": 6.81341502382993e-06, "epoch": 8.875502008032129, "percentage": 44.38, "elapsed_time": "0:11:27", "remaining_time": "0:14:21", "throughput": 4011.86, "total_tokens": 2759008} |
| {"current_steps": 4425, "total_steps": 9960, "loss": 0.1331, "lr": 6.805247111920416e-06, "epoch": 8.885542168674698, "percentage": 44.43, "elapsed_time": "0:11:28", "remaining_time": "0:14:21", "throughput": 4012.09, "total_tokens": 2762112} |
| {"current_steps": 4430, "total_steps": 9960, "loss": 0.0897, "lr": 6.797073656656998e-06, "epoch": 8.89558232931727, "percentage": 44.48, "elapsed_time": "0:11:29", "remaining_time": "0:14:20", "throughput": 4012.33, "total_tokens": 2765216} |
| {"current_steps": 4435, "total_steps": 9960, "loss": 0.0792, "lr": 6.788894683137822e-06, "epoch": 8.905622489959839, "percentage": 44.53, "elapsed_time": "0:11:29", "remaining_time": "0:14:19", "throughput": 4012.43, "total_tokens": 2767744} |
| {"current_steps": 4440, "total_steps": 9960, "loss": 0.0705, "lr": 6.780710216477979e-06, "epoch": 8.91566265060241, "percentage": 44.58, "elapsed_time": "0:11:30", "remaining_time": "0:14:18", "throughput": 4012.81, "total_tokens": 2770976} |
| {"current_steps": 4445, "total_steps": 9960, "loss": 0.1015, "lr": 6.772520281809426e-06, "epoch": 8.92570281124498, "percentage": 44.63, "elapsed_time": "0:11:31", "remaining_time": "0:14:17", "throughput": 4013.01, "total_tokens": 2774016} |
| {"current_steps": 4450, "total_steps": 9960, "loss": 0.153, "lr": 6.7643249042809146e-06, "epoch": 8.93574297188755, "percentage": 44.68, "elapsed_time": "0:11:31", "remaining_time": "0:14:16", "throughput": 4012.89, "total_tokens": 2776768} |
| {"current_steps": 4455, "total_steps": 9960, "loss": 0.0843, "lr": 6.7561241090579045e-06, "epoch": 8.94578313253012, "percentage": 44.73, "elapsed_time": "0:11:32", "remaining_time": "0:14:15", "throughput": 4013.01, "total_tokens": 2779520} |
| {"current_steps": 4460, "total_steps": 9960, "loss": 0.0886, "lr": 6.747917921322496e-06, "epoch": 8.95582329317269, "percentage": 44.78, "elapsed_time": "0:11:33", "remaining_time": "0:14:15", "throughput": 4013.41, "total_tokens": 2783136} |
| {"current_steps": 4465, "total_steps": 9960, "loss": 0.0707, "lr": 6.739706366273346e-06, "epoch": 8.965863453815262, "percentage": 44.83, "elapsed_time": "0:11:34", "remaining_time": "0:14:14", "throughput": 4013.7, "total_tokens": 2786688} |
| {"current_steps": 4470, "total_steps": 9960, "loss": 0.0703, "lr": 6.731489469125591e-06, "epoch": 8.975903614457831, "percentage": 44.88, "elapsed_time": "0:11:35", "remaining_time": "0:14:13", "throughput": 4014.34, "total_tokens": 2790432} |
| {"current_steps": 4475, "total_steps": 9960, "loss": 0.1706, "lr": 6.723267255110773e-06, "epoch": 8.985943775100402, "percentage": 44.93, "elapsed_time": "0:11:35", "remaining_time": "0:14:12", "throughput": 4014.67, "total_tokens": 2793696} |
| {"current_steps": 4480, "total_steps": 9960, "loss": 0.1357, "lr": 6.715039749476764e-06, "epoch": 8.995983935742972, "percentage": 44.98, "elapsed_time": "0:11:36", "remaining_time": "0:14:12", "throughput": 4014.79, "total_tokens": 2796512} |
| {"current_steps": 4485, "total_steps": 9960, "loss": 0.1072, "lr": 6.7068069774876785e-06, "epoch": 9.006024096385541, "percentage": 45.03, "elapsed_time": "0:11:37", "remaining_time": "0:14:11", "throughput": 4014.19, "total_tokens": 2799168} |
| {"current_steps": 4490, "total_steps": 9960, "loss": 0.0907, "lr": 6.698568964423808e-06, "epoch": 9.016064257028113, "percentage": 45.08, "elapsed_time": "0:11:38", "remaining_time": "0:14:10", "throughput": 4014.43, "total_tokens": 2802304} |
| {"current_steps": 4495, "total_steps": 9960, "loss": 0.0329, "lr": 6.690325735581532e-06, "epoch": 9.026104417670682, "percentage": 45.13, "elapsed_time": "0:11:38", "remaining_time": "0:14:09", "throughput": 4014.83, "total_tokens": 2805952} |
| {"current_steps": 4500, "total_steps": 9960, "loss": 0.1234, "lr": 6.682077316273252e-06, "epoch": 9.036144578313253, "percentage": 45.18, "elapsed_time": "0:11:39", "remaining_time": "0:14:08", "throughput": 4015.24, "total_tokens": 2809440} |
| {"current_steps": 4505, "total_steps": 9960, "loss": 0.1117, "lr": 6.673823731827306e-06, "epoch": 9.046184738955823, "percentage": 45.23, "elapsed_time": "0:11:40", "remaining_time": "0:14:08", "throughput": 4015.52, "total_tokens": 2812576} |
| {"current_steps": 4510, "total_steps": 9960, "loss": 0.0835, "lr": 6.665565007587888e-06, "epoch": 9.056224899598394, "percentage": 45.28, "elapsed_time": "0:11:41", "remaining_time": "0:14:07", "throughput": 4015.65, "total_tokens": 2815552} |
| {"current_steps": 4515, "total_steps": 9960, "loss": 0.0963, "lr": 6.657301168914983e-06, "epoch": 9.066265060240964, "percentage": 45.33, "elapsed_time": "0:11:41", "remaining_time": "0:14:06", "throughput": 4015.87, "total_tokens": 2818304} |
| {"current_steps": 4520, "total_steps": 9960, "loss": 0.068, "lr": 6.649032241184271e-06, "epoch": 9.076305220883533, "percentage": 45.38, "elapsed_time": "0:11:42", "remaining_time": "0:14:05", "throughput": 4016.33, "total_tokens": 2822016} |
| {"current_steps": 4525, "total_steps": 9960, "loss": 0.0016, "lr": 6.640758249787067e-06, "epoch": 9.086345381526105, "percentage": 45.43, "elapsed_time": "0:11:43", "remaining_time": "0:14:04", "throughput": 4016.41, "total_tokens": 2824992} |
| {"current_steps": 4530, "total_steps": 9960, "loss": 0.2172, "lr": 6.632479220130232e-06, "epoch": 9.096385542168674, "percentage": 45.48, "elapsed_time": "0:11:44", "remaining_time": "0:14:04", "throughput": 4016.8, "total_tokens": 2828384} |
| {"current_steps": 4535, "total_steps": 9960, "loss": 0.0183, "lr": 6.624195177636098e-06, "epoch": 9.106425702811245, "percentage": 45.53, "elapsed_time": "0:11:44", "remaining_time": "0:14:03", "throughput": 4017.1, "total_tokens": 2831616} |
| {"current_steps": 4540, "total_steps": 9960, "loss": 0.0771, "lr": 6.615906147742389e-06, "epoch": 9.116465863453815, "percentage": 45.58, "elapsed_time": "0:11:45", "remaining_time": "0:14:02", "throughput": 4017.45, "total_tokens": 2834912} |
| {"current_steps": 4545, "total_steps": 9960, "loss": 0.0624, "lr": 6.6076121559021445e-06, "epoch": 9.126506024096386, "percentage": 45.63, "elapsed_time": "0:11:46", "remaining_time": "0:14:01", "throughput": 4017.87, "total_tokens": 2838080} |
| {"current_steps": 4550, "total_steps": 9960, "loss": 0.0352, "lr": 6.599313227583642e-06, "epoch": 9.136546184738956, "percentage": 45.68, "elapsed_time": "0:11:47", "remaining_time": "0:14:00", "throughput": 4018.08, "total_tokens": 2841056} |
| {"current_steps": 4555, "total_steps": 9960, "loss": 0.084, "lr": 6.591009388270315e-06, "epoch": 9.146586345381525, "percentage": 45.73, "elapsed_time": "0:11:47", "remaining_time": "0:13:59", "throughput": 4018.27, "total_tokens": 2844192} |
| {"current_steps": 4560, "total_steps": 9960, "loss": 0.0175, "lr": 6.582700663460679e-06, "epoch": 9.156626506024097, "percentage": 45.78, "elapsed_time": "0:11:48", "remaining_time": "0:13:59", "throughput": 4018.47, "total_tokens": 2847296} |
| {"current_steps": 4565, "total_steps": 9960, "loss": 0.0691, "lr": 6.57438707866825e-06, "epoch": 9.166666666666666, "percentage": 45.83, "elapsed_time": "0:11:49", "remaining_time": "0:13:58", "throughput": 4018.75, "total_tokens": 2850656} |
| {"current_steps": 4570, "total_steps": 9960, "loss": 0.1824, "lr": 6.566068659421467e-06, "epoch": 9.176706827309237, "percentage": 45.88, "elapsed_time": "0:11:50", "remaining_time": "0:13:57", "throughput": 4019.14, "total_tokens": 2854272} |
| {"current_steps": 4575, "total_steps": 9960, "loss": 0.105, "lr": 6.557745431263617e-06, "epoch": 9.186746987951807, "percentage": 45.93, "elapsed_time": "0:11:50", "remaining_time": "0:13:56", "throughput": 4019.29, "total_tokens": 2857248} |
| {"current_steps": 4580, "total_steps": 9960, "loss": 0.1361, "lr": 6.5494174197527515e-06, "epoch": 9.196787148594378, "percentage": 45.98, "elapsed_time": "0:11:51", "remaining_time": "0:13:55", "throughput": 4019.07, "total_tokens": 2859392} |
| {"current_steps": 4585, "total_steps": 9960, "loss": 0.0357, "lr": 6.54108465046161e-06, "epoch": 9.206827309236948, "percentage": 46.03, "elapsed_time": "0:11:52", "remaining_time": "0:13:54", "throughput": 4019.27, "total_tokens": 2862432} |
| {"current_steps": 4590, "total_steps": 9960, "loss": 0.1851, "lr": 6.532747148977543e-06, "epoch": 9.216867469879517, "percentage": 46.08, "elapsed_time": "0:11:52", "remaining_time": "0:13:54", "throughput": 4019.53, "total_tokens": 2865728} |
| {"current_steps": 4595, "total_steps": 9960, "loss": 0.0511, "lr": 6.52440494090243e-06, "epoch": 9.226907630522089, "percentage": 46.13, "elapsed_time": "0:11:53", "remaining_time": "0:13:53", "throughput": 4019.52, "total_tokens": 2868448} |
| {"current_steps": 4600, "total_steps": 9960, "loss": 0.1878, "lr": 6.516058051852605e-06, "epoch": 9.236947791164658, "percentage": 46.18, "elapsed_time": "0:11:54", "remaining_time": "0:13:52", "throughput": 4019.63, "total_tokens": 2871168} |
| {"current_steps": 4605, "total_steps": 9960, "loss": 0.0551, "lr": 6.507706507458776e-06, "epoch": 9.24698795180723, "percentage": 46.23, "elapsed_time": "0:11:54", "remaining_time": "0:13:51", "throughput": 4019.61, "total_tokens": 2873728} |
| {"current_steps": 4610, "total_steps": 9960, "loss": 0.0495, "lr": 6.499350333365945e-06, "epoch": 9.257028112449799, "percentage": 46.29, "elapsed_time": "0:11:55", "remaining_time": "0:13:50", "throughput": 4020.07, "total_tokens": 2877376} |
| {"current_steps": 4615, "total_steps": 9960, "loss": 0.0838, "lr": 6.490989555233328e-06, "epoch": 9.26706827309237, "percentage": 46.34, "elapsed_time": "0:11:56", "remaining_time": "0:13:49", "throughput": 4020.33, "total_tokens": 2880864} |
| {"current_steps": 4620, "total_steps": 9960, "loss": 0.0913, "lr": 6.482624198734284e-06, "epoch": 9.27710843373494, "percentage": 46.39, "elapsed_time": "0:11:57", "remaining_time": "0:13:49", "throughput": 4020.39, "total_tokens": 2883584} |
| {"current_steps": 4625, "total_steps": 9960, "loss": 0.0817, "lr": 6.4742542895562276e-06, "epoch": 9.28714859437751, "percentage": 46.44, "elapsed_time": "0:11:57", "remaining_time": "0:13:48", "throughput": 4020.59, "total_tokens": 2886272} |
| {"current_steps": 4630, "total_steps": 9960, "loss": 0.1111, "lr": 6.465879853400553e-06, "epoch": 9.29718875502008, "percentage": 46.49, "elapsed_time": "0:11:58", "remaining_time": "0:13:47", "throughput": 4020.85, "total_tokens": 2889216} |
| {"current_steps": 4635, "total_steps": 9960, "loss": 0.1591, "lr": 6.457500915982555e-06, "epoch": 9.30722891566265, "percentage": 46.54, "elapsed_time": "0:11:59", "remaining_time": "0:13:46", "throughput": 4021.08, "total_tokens": 2892224} |
| {"current_steps": 4640, "total_steps": 9960, "loss": 0.1163, "lr": 6.449117503031355e-06, "epoch": 9.317269076305221, "percentage": 46.59, "elapsed_time": "0:12:00", "remaining_time": "0:13:45", "throughput": 4021.43, "total_tokens": 2895584} |
| {"current_steps": 4645, "total_steps": 9960, "loss": 0.126, "lr": 6.440729640289809e-06, "epoch": 9.32730923694779, "percentage": 46.64, "elapsed_time": "0:12:00", "remaining_time": "0:13:44", "throughput": 4021.27, "total_tokens": 2897920} |
| {"current_steps": 4650, "total_steps": 9960, "loss": 0.111, "lr": 6.432337353514444e-06, "epoch": 9.337349397590362, "percentage": 46.69, "elapsed_time": "0:12:01", "remaining_time": "0:13:43", "throughput": 4021.22, "total_tokens": 2900224} |
| {"current_steps": 4655, "total_steps": 9960, "loss": 0.1038, "lr": 6.4239406684753695e-06, "epoch": 9.347389558232932, "percentage": 46.74, "elapsed_time": "0:12:01", "remaining_time": "0:13:42", "throughput": 4021.41, "total_tokens": 2903200} |
| {"current_steps": 4660, "total_steps": 9960, "loss": 0.1554, "lr": 6.4155396109561995e-06, "epoch": 9.357429718875501, "percentage": 46.79, "elapsed_time": "0:12:02", "remaining_time": "0:13:41", "throughput": 4021.52, "total_tokens": 2905824} |
| {"current_steps": 4665, "total_steps": 9960, "loss": 0.0318, "lr": 6.407134206753977e-06, "epoch": 9.367469879518072, "percentage": 46.84, "elapsed_time": "0:12:03", "remaining_time": "0:13:41", "throughput": 4021.89, "total_tokens": 2909312} |
| {"current_steps": 4670, "total_steps": 9960, "loss": 0.067, "lr": 6.39872448167909e-06, "epoch": 9.377510040160642, "percentage": 46.89, "elapsed_time": "0:12:04", "remaining_time": "0:13:40", "throughput": 4022.13, "total_tokens": 2912416} |
| {"current_steps": 4675, "total_steps": 9960, "loss": 0.075, "lr": 6.3903104615551956e-06, "epoch": 9.387550200803213, "percentage": 46.94, "elapsed_time": "0:12:04", "remaining_time": "0:13:39", "throughput": 4022.26, "total_tokens": 2915232} |
| {"current_steps": 4680, "total_steps": 9960, "loss": 0.0786, "lr": 6.381892172219142e-06, "epoch": 9.397590361445783, "percentage": 46.99, "elapsed_time": "0:12:05", "remaining_time": "0:13:38", "throughput": 4022.73, "total_tokens": 2918624} |
| {"current_steps": 4685, "total_steps": 9960, "loss": 0.0335, "lr": 6.373469639520881e-06, "epoch": 9.407630522088354, "percentage": 47.04, "elapsed_time": "0:12:06", "remaining_time": "0:13:37", "throughput": 4022.96, "total_tokens": 2922112} |
| {"current_steps": 4690, "total_steps": 9960, "loss": 0.0375, "lr": 6.3650428893234e-06, "epoch": 9.417670682730924, "percentage": 47.09, "elapsed_time": "0:12:07", "remaining_time": "0:13:36", "throughput": 4022.98, "total_tokens": 2924800} |
| {"current_steps": 4695, "total_steps": 9960, "loss": 0.0801, "lr": 6.356611947502633e-06, "epoch": 9.427710843373493, "percentage": 47.14, "elapsed_time": "0:12:07", "remaining_time": "0:13:36", "throughput": 4023.34, "total_tokens": 2928064} |
| {"current_steps": 4700, "total_steps": 9960, "loss": 0.1439, "lr": 6.348176839947389e-06, "epoch": 9.437751004016064, "percentage": 47.19, "elapsed_time": "0:12:08", "remaining_time": "0:13:35", "throughput": 4023.43, "total_tokens": 2931168} |
| {"current_steps": 4705, "total_steps": 9960, "loss": 0.223, "lr": 6.3397375925592675e-06, "epoch": 9.447791164658634, "percentage": 47.24, "elapsed_time": "0:12:09", "remaining_time": "0:13:34", "throughput": 4023.84, "total_tokens": 2934496} |
| {"current_steps": 4710, "total_steps": 9960, "loss": 0.0278, "lr": 6.331294231252576e-06, "epoch": 9.457831325301205, "percentage": 47.29, "elapsed_time": "0:12:10", "remaining_time": "0:13:33", "throughput": 4024.22, "total_tokens": 2937984} |
| {"current_steps": 4715, "total_steps": 9960, "loss": 0.0346, "lr": 6.3228467819542606e-06, "epoch": 9.467871485943775, "percentage": 47.34, "elapsed_time": "0:12:10", "remaining_time": "0:13:32", "throughput": 4024.43, "total_tokens": 2940928} |
| {"current_steps": 4720, "total_steps": 9960, "loss": 0.0529, "lr": 6.314395270603819e-06, "epoch": 9.477911646586346, "percentage": 47.39, "elapsed_time": "0:12:11", "remaining_time": "0:13:32", "throughput": 4024.7, "total_tokens": 2944448} |
| {"current_steps": 4725, "total_steps": 9960, "loss": 0.052, "lr": 6.305939723153218e-06, "epoch": 9.487951807228916, "percentage": 47.44, "elapsed_time": "0:12:12", "remaining_time": "0:13:31", "throughput": 4024.84, "total_tokens": 2947584} |
| {"current_steps": 4730, "total_steps": 9960, "loss": 0.0856, "lr": 6.297480165566823e-06, "epoch": 9.497991967871485, "percentage": 47.49, "elapsed_time": "0:12:13", "remaining_time": "0:13:30", "throughput": 4025.2, "total_tokens": 2951136} |
| {"current_steps": 4735, "total_steps": 9960, "loss": 0.1398, "lr": 6.289016623821308e-06, "epoch": 9.508032128514056, "percentage": 47.54, "elapsed_time": "0:12:13", "remaining_time": "0:13:29", "throughput": 4025.27, "total_tokens": 2953760} |
| {"current_steps": 4740, "total_steps": 9960, "loss": 0.0166, "lr": 6.280549123905588e-06, "epoch": 9.518072289156626, "percentage": 47.59, "elapsed_time": "0:12:14", "remaining_time": "0:13:28", "throughput": 4025.61, "total_tokens": 2956736} |
| {"current_steps": 4745, "total_steps": 9960, "loss": 0.094, "lr": 6.2720776918207285e-06, "epoch": 9.528112449799197, "percentage": 47.64, "elapsed_time": "0:12:15", "remaining_time": "0:13:28", "throughput": 4025.99, "total_tokens": 2960224} |
| {"current_steps": 4750, "total_steps": 9960, "loss": 0.1783, "lr": 6.263602353579868e-06, "epoch": 9.538152610441767, "percentage": 47.69, "elapsed_time": "0:12:16", "remaining_time": "0:13:27", "throughput": 4026.45, "total_tokens": 2963616} |
| {"current_steps": 4755, "total_steps": 9960, "loss": 0.0599, "lr": 6.255123135208141e-06, "epoch": 9.548192771084338, "percentage": 47.74, "elapsed_time": "0:12:16", "remaining_time": "0:13:26", "throughput": 4026.7, "total_tokens": 2966848} |
| {"current_steps": 4760, "total_steps": 9960, "loss": 0.1511, "lr": 6.246640062742598e-06, "epoch": 9.558232931726907, "percentage": 47.79, "elapsed_time": "0:12:17", "remaining_time": "0:13:25", "throughput": 4026.82, "total_tokens": 2969568} |
| {"current_steps": 4765, "total_steps": 9960, "loss": 0.1446, "lr": 6.2381531622321234e-06, "epoch": 9.568273092369477, "percentage": 47.84, "elapsed_time": "0:12:17", "remaining_time": "0:13:24", "throughput": 4026.62, "total_tokens": 2971488} |
| {"current_steps": 4770, "total_steps": 9960, "loss": 0.1338, "lr": 6.229662459737354e-06, "epoch": 9.578313253012048, "percentage": 47.89, "elapsed_time": "0:12:18", "remaining_time": "0:13:23", "throughput": 4026.8, "total_tokens": 2974656} |
| {"current_steps": 4775, "total_steps": 9960, "loss": 0.1145, "lr": 6.221167981330607e-06, "epoch": 9.588353413654618, "percentage": 47.94, "elapsed_time": "0:12:19", "remaining_time": "0:13:22", "throughput": 4027.01, "total_tokens": 2977536} |
| {"current_steps": 4780, "total_steps": 9960, "loss": 0.1066, "lr": 6.212669753095788e-06, "epoch": 9.598393574297189, "percentage": 47.99, "elapsed_time": "0:12:20", "remaining_time": "0:13:22", "throughput": 4027.43, "total_tokens": 2981632} |
| {"current_steps": 4785, "total_steps": 9960, "loss": 0.109, "lr": 6.204167801128319e-06, "epoch": 9.608433734939759, "percentage": 48.04, "elapsed_time": "0:12:21", "remaining_time": "0:13:21", "throughput": 4027.68, "total_tokens": 2985184} |
| {"current_steps": 4790, "total_steps": 9960, "loss": 0.1409, "lr": 6.19566215153506e-06, "epoch": 9.61847389558233, "percentage": 48.09, "elapsed_time": "0:12:21", "remaining_time": "0:13:20", "throughput": 4028.08, "total_tokens": 2988352} |
| {"current_steps": 4795, "total_steps": 9960, "loss": 0.0658, "lr": 6.18715283043422e-06, "epoch": 9.6285140562249, "percentage": 48.14, "elapsed_time": "0:12:22", "remaining_time": "0:13:19", "throughput": 4028.41, "total_tokens": 2991808} |
| {"current_steps": 4800, "total_steps": 9960, "loss": 0.1023, "lr": 6.178639863955287e-06, "epoch": 9.638554216867469, "percentage": 48.19, "elapsed_time": "0:12:23", "remaining_time": "0:13:19", "throughput": 4028.46, "total_tokens": 2994688} |
| {"current_steps": 4805, "total_steps": 9960, "loss": 0.0813, "lr": 6.170123278238939e-06, "epoch": 9.64859437751004, "percentage": 48.24, "elapsed_time": "0:12:24", "remaining_time": "0:13:18", "throughput": 4028.89, "total_tokens": 2998304} |
| {"current_steps": 4810, "total_steps": 9960, "loss": 0.0725, "lr": 6.161603099436968e-06, "epoch": 9.65863453815261, "percentage": 48.29, "elapsed_time": "0:12:24", "remaining_time": "0:13:17", "throughput": 4028.98, "total_tokens": 3000928} |
| {"current_steps": 4815, "total_steps": 9960, "loss": 0.0623, "lr": 6.153079353712201e-06, "epoch": 9.668674698795181, "percentage": 48.34, "elapsed_time": "0:12:25", "remaining_time": "0:13:16", "throughput": 4029.26, "total_tokens": 3004224} |
| {"current_steps": 4820, "total_steps": 9960, "loss": 0.1374, "lr": 6.144552067238418e-06, "epoch": 9.67871485943775, "percentage": 48.39, "elapsed_time": "0:12:26", "remaining_time": "0:13:15", "throughput": 4029.34, "total_tokens": 3007200} |
| {"current_steps": 4825, "total_steps": 9960, "loss": 0.0868, "lr": 6.136021266200271e-06, "epoch": 9.688755020080322, "percentage": 48.44, "elapsed_time": "0:12:27", "remaining_time": "0:13:15", "throughput": 4029.31, "total_tokens": 3009920} |
| {"current_steps": 4830, "total_steps": 9960, "loss": 0.073, "lr": 6.1274869767932e-06, "epoch": 9.698795180722891, "percentage": 48.49, "elapsed_time": "0:12:27", "remaining_time": "0:13:14", "throughput": 4029.61, "total_tokens": 3013152} |
| {"current_steps": 4835, "total_steps": 9960, "loss": 0.0502, "lr": 6.118949225223365e-06, "epoch": 9.708835341365463, "percentage": 48.54, "elapsed_time": "0:12:28", "remaining_time": "0:13:13", "throughput": 4029.74, "total_tokens": 3015936} |
| {"current_steps": 4840, "total_steps": 9960, "loss": 0.0402, "lr": 6.110408037707551e-06, "epoch": 9.718875502008032, "percentage": 48.59, "elapsed_time": "0:12:29", "remaining_time": "0:13:12", "throughput": 4030.09, "total_tokens": 3019424} |
| {"current_steps": 4845, "total_steps": 9960, "loss": 0.207, "lr": 6.1018634404730945e-06, "epoch": 9.728915662650602, "percentage": 48.64, "elapsed_time": "0:12:30", "remaining_time": "0:13:11", "throughput": 4030.29, "total_tokens": 3023040} |
| {"current_steps": 4850, "total_steps": 9960, "loss": 0.0914, "lr": 6.093315459757807e-06, "epoch": 9.738955823293173, "percentage": 48.69, "elapsed_time": "0:12:30", "remaining_time": "0:13:11", "throughput": 4030.18, "total_tokens": 3025728} |
| {"current_steps": 4855, "total_steps": 9960, "loss": 0.0947, "lr": 6.084764121809878e-06, "epoch": 9.748995983935743, "percentage": 48.74, "elapsed_time": "0:12:31", "remaining_time": "0:13:10", "throughput": 4030.27, "total_tokens": 3028352} |
| {"current_steps": 4860, "total_steps": 9960, "loss": 0.0236, "lr": 6.076209452887821e-06, "epoch": 9.759036144578314, "percentage": 48.8, "elapsed_time": "0:12:32", "remaining_time": "0:13:09", "throughput": 4030.6, "total_tokens": 3031968} |
| {"current_steps": 4865, "total_steps": 9960, "loss": 0.0284, "lr": 6.067651479260368e-06, "epoch": 9.769076305220883, "percentage": 48.85, "elapsed_time": "0:12:33", "remaining_time": "0:13:08", "throughput": 4030.57, "total_tokens": 3035072} |
| {"current_steps": 4870, "total_steps": 9960, "loss": 0.0481, "lr": 6.059090227206402e-06, "epoch": 9.779116465863455, "percentage": 48.9, "elapsed_time": "0:12:33", "remaining_time": "0:13:07", "throughput": 4030.68, "total_tokens": 3037568} |
| {"current_steps": 4875, "total_steps": 9960, "loss": 0.0885, "lr": 6.0505257230148715e-06, "epoch": 9.789156626506024, "percentage": 48.95, "elapsed_time": "0:12:34", "remaining_time": "0:13:06", "throughput": 4030.84, "total_tokens": 3040384} |
| {"current_steps": 4880, "total_steps": 9960, "loss": 0.0831, "lr": 6.041957992984711e-06, "epoch": 9.799196787148594, "percentage": 49.0, "elapsed_time": "0:12:34", "remaining_time": "0:13:05", "throughput": 4031.08, "total_tokens": 3043104} |
| {"current_steps": 4885, "total_steps": 9960, "loss": 0.1508, "lr": 6.033387063424765e-06, "epoch": 9.809236947791165, "percentage": 49.05, "elapsed_time": "0:12:35", "remaining_time": "0:13:05", "throughput": 4031.3, "total_tokens": 3046240} |
| {"current_steps": 4890, "total_steps": 9960, "loss": 0.0875, "lr": 6.0248129606536984e-06, "epoch": 9.819277108433734, "percentage": 49.1, "elapsed_time": "0:12:36", "remaining_time": "0:13:04", "throughput": 4031.64, "total_tokens": 3049792} |
| {"current_steps": 4895, "total_steps": 9960, "loss": 0.1742, "lr": 6.01623571099992e-06, "epoch": 9.829317269076306, "percentage": 49.15, "elapsed_time": "0:12:37", "remaining_time": "0:13:03", "throughput": 4031.97, "total_tokens": 3052928} |
| {"current_steps": 4900, "total_steps": 9960, "loss": 0.0964, "lr": 6.0076553408015035e-06, "epoch": 9.839357429718875, "percentage": 49.2, "elapsed_time": "0:12:37", "remaining_time": "0:13:02", "throughput": 4032.28, "total_tokens": 3056416} |
| {"current_steps": 4905, "total_steps": 9960, "loss": 0.1073, "lr": 5.999071876406104e-06, "epoch": 9.849397590361447, "percentage": 49.25, "elapsed_time": "0:12:38", "remaining_time": "0:13:01", "throughput": 4032.53, "total_tokens": 3059456} |
| {"current_steps": 4910, "total_steps": 9960, "loss": 0.2183, "lr": 5.990485344170879e-06, "epoch": 9.859437751004016, "percentage": 49.3, "elapsed_time": "0:12:39", "remaining_time": "0:13:01", "throughput": 4032.87, "total_tokens": 3062816} |
| {"current_steps": 4915, "total_steps": 9960, "loss": 0.1465, "lr": 5.9818957704624046e-06, "epoch": 9.869477911646586, "percentage": 49.35, "elapsed_time": "0:12:40", "remaining_time": "0:13:00", "throughput": 4032.95, "total_tokens": 3065472} |
| {"current_steps": 4920, "total_steps": 9960, "loss": 0.1317, "lr": 5.973303181656597e-06, "epoch": 9.879518072289157, "percentage": 49.4, "elapsed_time": "0:12:40", "remaining_time": "0:12:59", "throughput": 4033.13, "total_tokens": 3068480} |
| {"current_steps": 4925, "total_steps": 9960, "loss": 0.1163, "lr": 5.964707604138632e-06, "epoch": 9.889558232931726, "percentage": 49.45, "elapsed_time": "0:12:41", "remaining_time": "0:12:58", "throughput": 4033.48, "total_tokens": 3072032} |
| {"current_steps": 4930, "total_steps": 9960, "loss": 0.0549, "lr": 5.956109064302862e-06, "epoch": 9.899598393574298, "percentage": 49.5, "elapsed_time": "0:12:42", "remaining_time": "0:12:57", "throughput": 4033.37, "total_tokens": 3074336} |
| {"current_steps": 4935, "total_steps": 9960, "loss": 0.0499, "lr": 5.947507588552734e-06, "epoch": 9.909638554216867, "percentage": 49.55, "elapsed_time": "0:12:43", "remaining_time": "0:12:56", "throughput": 4033.65, "total_tokens": 3077728} |
| {"current_steps": 4940, "total_steps": 9960, "loss": 0.0673, "lr": 5.9389032033007135e-06, "epoch": 9.919678714859439, "percentage": 49.6, "elapsed_time": "0:12:43", "remaining_time": "0:12:56", "throughput": 4033.86, "total_tokens": 3080992} |
| {"current_steps": 4945, "total_steps": 9960, "loss": 0.0601, "lr": 5.930295934968197e-06, "epoch": 9.929718875502008, "percentage": 49.65, "elapsed_time": "0:12:44", "remaining_time": "0:12:55", "throughput": 4034.33, "total_tokens": 3084768} |
| {"current_steps": 4950, "total_steps": 9960, "loss": 0.036, "lr": 5.9216858099854365e-06, "epoch": 9.939759036144578, "percentage": 49.7, "elapsed_time": "0:12:45", "remaining_time": "0:12:54", "throughput": 4034.67, "total_tokens": 3088160} |
| {"current_steps": 4955, "total_steps": 9960, "loss": 0.0386, "lr": 5.913072854791458e-06, "epoch": 9.949799196787149, "percentage": 49.75, "elapsed_time": "0:12:46", "remaining_time": "0:12:53", "throughput": 4034.68, "total_tokens": 3091104} |
| {"current_steps": 4960, "total_steps": 9960, "loss": 0.2039, "lr": 5.90445709583397e-06, "epoch": 9.959839357429718, "percentage": 49.8, "elapsed_time": "0:12:46", "remaining_time": "0:12:53", "throughput": 4034.89, "total_tokens": 3094272} |
| {"current_steps": 4965, "total_steps": 9960, "loss": 0.0434, "lr": 5.895838559569298e-06, "epoch": 9.96987951807229, "percentage": 49.85, "elapsed_time": "0:12:47", "remaining_time": "0:12:52", "throughput": 4035.3, "total_tokens": 3098240} |
| {"current_steps": 4970, "total_steps": 9960, "loss": 0.1129, "lr": 5.887217272462295e-06, "epoch": 9.97991967871486, "percentage": 49.9, "elapsed_time": "0:12:48", "remaining_time": "0:12:51", "throughput": 4035.56, "total_tokens": 3101056} |
| {"current_steps": 4975, "total_steps": 9960, "loss": 0.0883, "lr": 5.878593260986256e-06, "epoch": 9.98995983935743, "percentage": 49.95, "elapsed_time": "0:12:49", "remaining_time": "0:12:50", "throughput": 4035.94, "total_tokens": 3104576} |
| {"current_steps": 4980, "total_steps": 9960, "loss": 0.0943, "lr": 5.869966551622848e-06, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:12:50", "remaining_time": "0:12:50", "throughput": 4036.2, "total_tokens": 3108288} |
| {"current_steps": 4980, "total_steps": 9960, "eval_loss": 0.29868796467781067, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:12:58", "remaining_time": "0:12:58", "throughput": 3994.32, "total_tokens": 3108288} |
| {"current_steps": 4985, "total_steps": 9960, "loss": 0.0089, "lr": 5.861337170862018e-06, "epoch": 10.01004016064257, "percentage": 50.05, "elapsed_time": "0:13:00", "remaining_time": "0:12:58", "throughput": 3987.73, "total_tokens": 3111360} |
| {"current_steps": 4990, "total_steps": 9960, "loss": 0.0079, "lr": 5.852705145201919e-06, "epoch": 10.02008032128514, "percentage": 50.1, "elapsed_time": "0:13:00", "remaining_time": "0:12:57", "throughput": 3987.9, "total_tokens": 3114496} |
| {"current_steps": 4995, "total_steps": 9960, "loss": 0.1431, "lr": 5.844070501148823e-06, "epoch": 10.03012048192771, "percentage": 50.15, "elapsed_time": "0:13:01", "remaining_time": "0:12:56", "throughput": 3987.89, "total_tokens": 3117120} |
| {"current_steps": 5000, "total_steps": 9960, "loss": 0.0972, "lr": 5.835433265217043e-06, "epoch": 10.040160642570282, "percentage": 50.2, "elapsed_time": "0:13:02", "remaining_time": "0:12:56", "throughput": 3988.58, "total_tokens": 3121376} |
| {"current_steps": 5005, "total_steps": 9960, "loss": 0.1294, "lr": 5.8267934639288525e-06, "epoch": 10.050200803212851, "percentage": 50.25, "elapsed_time": "0:13:03", "remaining_time": "0:12:55", "throughput": 3988.92, "total_tokens": 3124352} |
| {"current_steps": 5010, "total_steps": 9960, "loss": 0.1062, "lr": 5.818151123814401e-06, "epoch": 10.060240963855422, "percentage": 50.3, "elapsed_time": "0:13:03", "remaining_time": "0:12:54", "throughput": 3989.08, "total_tokens": 3127264} |
| {"current_steps": 5015, "total_steps": 9960, "loss": 0.0603, "lr": 5.809506271411635e-06, "epoch": 10.070281124497992, "percentage": 50.35, "elapsed_time": "0:13:04", "remaining_time": "0:12:53", "throughput": 3989.34, "total_tokens": 3130368} |
| {"current_steps": 5020, "total_steps": 9960, "loss": 0.044, "lr": 5.800858933266214e-06, "epoch": 10.080321285140561, "percentage": 50.4, "elapsed_time": "0:13:05", "remaining_time": "0:12:53", "throughput": 3989.69, "total_tokens": 3134016} |
| {"current_steps": 5025, "total_steps": 9960, "loss": 0.0111, "lr": 5.792209135931428e-06, "epoch": 10.090361445783133, "percentage": 50.45, "elapsed_time": "0:13:06", "remaining_time": "0:12:52", "throughput": 3989.9, "total_tokens": 3137120} |
| {"current_steps": 5030, "total_steps": 9960, "loss": 0.0852, "lr": 5.7835569059681255e-06, "epoch": 10.100401606425702, "percentage": 50.5, "elapsed_time": "0:13:07", "remaining_time": "0:12:51", "throughput": 3990.13, "total_tokens": 3140288} |
| {"current_steps": 5035, "total_steps": 9960, "loss": 0.1236, "lr": 5.77490226994462e-06, "epoch": 10.110441767068274, "percentage": 50.55, "elapsed_time": "0:13:07", "remaining_time": "0:12:50", "throughput": 3990.24, "total_tokens": 3142912} |
| {"current_steps": 5040, "total_steps": 9960, "loss": 0.0678, "lr": 5.766245254436613e-06, "epoch": 10.120481927710843, "percentage": 50.6, "elapsed_time": "0:13:08", "remaining_time": "0:12:49", "throughput": 3990.42, "total_tokens": 3146112} |
| {"current_steps": 5045, "total_steps": 9960, "loss": 0.0703, "lr": 5.757585886027114e-06, "epoch": 10.130522088353414, "percentage": 50.65, "elapsed_time": "0:13:09", "remaining_time": "0:12:48", "throughput": 3990.68, "total_tokens": 3148928} |
| {"current_steps": 5050, "total_steps": 9960, "loss": 0.021, "lr": 5.748924191306359e-06, "epoch": 10.140562248995984, "percentage": 50.7, "elapsed_time": "0:13:09", "remaining_time": "0:12:47", "throughput": 3990.96, "total_tokens": 3152256} |
| {"current_steps": 5055, "total_steps": 9960, "loss": 0.1104, "lr": 5.740260196871726e-06, "epoch": 10.150602409638553, "percentage": 50.75, "elapsed_time": "0:13:10", "remaining_time": "0:12:47", "throughput": 3991.29, "total_tokens": 3155360} |
| {"current_steps": 5060, "total_steps": 9960, "loss": 0.0009, "lr": 5.73159392932765e-06, "epoch": 10.160642570281125, "percentage": 50.8, "elapsed_time": "0:13:11", "remaining_time": "0:12:46", "throughput": 3991.62, "total_tokens": 3158784} |
| {"current_steps": 5065, "total_steps": 9960, "loss": 0.1475, "lr": 5.722925415285555e-06, "epoch": 10.170682730923694, "percentage": 50.85, "elapsed_time": "0:13:12", "remaining_time": "0:12:45", "throughput": 3992.02, "total_tokens": 3161952} |
| {"current_steps": 5070, "total_steps": 9960, "loss": 0.0859, "lr": 5.714254681363756e-06, "epoch": 10.180722891566266, "percentage": 50.9, "elapsed_time": "0:13:12", "remaining_time": "0:12:44", "throughput": 3992.46, "total_tokens": 3165536} |
| {"current_steps": 5075, "total_steps": 9960, "loss": 0.0771, "lr": 5.705581754187387e-06, "epoch": 10.190763052208835, "percentage": 50.95, "elapsed_time": "0:13:13", "remaining_time": "0:12:43", "throughput": 3992.48, "total_tokens": 3168064} |
| {"current_steps": 5080, "total_steps": 9960, "loss": 0.0067, "lr": 5.69690666038832e-06, "epoch": 10.200803212851406, "percentage": 51.0, "elapsed_time": "0:13:14", "remaining_time": "0:12:42", "throughput": 3992.79, "total_tokens": 3171232} |
| {"current_steps": 5085, "total_steps": 9960, "loss": 0.089, "lr": 5.688229426605075e-06, "epoch": 10.210843373493976, "percentage": 51.05, "elapsed_time": "0:13:14", "remaining_time": "0:12:42", "throughput": 3993.1, "total_tokens": 3174368} |
| {"current_steps": 5090, "total_steps": 9960, "loss": 0.0532, "lr": 5.679550079482747e-06, "epoch": 10.220883534136545, "percentage": 51.1, "elapsed_time": "0:13:15", "remaining_time": "0:12:41", "throughput": 3993.46, "total_tokens": 3177792} |
| {"current_steps": 5095, "total_steps": 9960, "loss": 0.0561, "lr": 5.670868645672916e-06, "epoch": 10.230923694779117, "percentage": 51.15, "elapsed_time": "0:13:16", "remaining_time": "0:12:40", "throughput": 3993.57, "total_tokens": 3180704} |
| {"current_steps": 5100, "total_steps": 9960, "loss": 0.085, "lr": 5.6621851518335725e-06, "epoch": 10.240963855421686, "percentage": 51.2, "elapsed_time": "0:13:17", "remaining_time": "0:12:39", "throughput": 3993.8, "total_tokens": 3184320} |
| {"current_steps": 5105, "total_steps": 9960, "loss": 0.0311, "lr": 5.653499624629035e-06, "epoch": 10.251004016064257, "percentage": 51.26, "elapsed_time": "0:13:18", "remaining_time": "0:12:38", "throughput": 3994.06, "total_tokens": 3187552} |
| {"current_steps": 5110, "total_steps": 9960, "loss": 0.0029, "lr": 5.644812090729863e-06, "epoch": 10.261044176706827, "percentage": 51.31, "elapsed_time": "0:13:18", "remaining_time": "0:12:38", "throughput": 3994.27, "total_tokens": 3190496} |
| {"current_steps": 5115, "total_steps": 9960, "loss": 0.0672, "lr": 5.636122576812776e-06, "epoch": 10.271084337349398, "percentage": 51.36, "elapsed_time": "0:13:19", "remaining_time": "0:12:37", "throughput": 3994.58, "total_tokens": 3193760} |
| {"current_steps": 5120, "total_steps": 9960, "loss": 0.0477, "lr": 5.627431109560577e-06, "epoch": 10.281124497991968, "percentage": 51.41, "elapsed_time": "0:13:20", "remaining_time": "0:12:36", "throughput": 3995.06, "total_tokens": 3197536} |
| {"current_steps": 5125, "total_steps": 9960, "loss": 0.0529, "lr": 5.618737715662067e-06, "epoch": 10.291164658634537, "percentage": 51.46, "elapsed_time": "0:13:21", "remaining_time": "0:12:35", "throughput": 3995.5, "total_tokens": 3201536} |
| {"current_steps": 5130, "total_steps": 9960, "loss": 0.1767, "lr": 5.61004242181196e-06, "epoch": 10.301204819277109, "percentage": 51.51, "elapsed_time": "0:13:22", "remaining_time": "0:12:35", "throughput": 3995.93, "total_tokens": 3205056} |
| {"current_steps": 5135, "total_steps": 9960, "loss": 0.0998, "lr": 5.601345254710808e-06, "epoch": 10.311244979919678, "percentage": 51.56, "elapsed_time": "0:13:22", "remaining_time": "0:12:34", "throughput": 3996.32, "total_tokens": 3208608} |
| {"current_steps": 5140, "total_steps": 9960, "loss": 0.1761, "lr": 5.592646241064913e-06, "epoch": 10.32128514056225, "percentage": 51.61, "elapsed_time": "0:13:23", "remaining_time": "0:12:33", "throughput": 3996.58, "total_tokens": 3211648} |
| {"current_steps": 5145, "total_steps": 9960, "loss": 0.0906, "lr": 5.583945407586247e-06, "epoch": 10.331325301204819, "percentage": 51.66, "elapsed_time": "0:13:24", "remaining_time": "0:12:32", "throughput": 3996.77, "total_tokens": 3214560} |
| {"current_steps": 5150, "total_steps": 9960, "loss": 0.0525, "lr": 5.5752427809923704e-06, "epoch": 10.34136546184739, "percentage": 51.71, "elapsed_time": "0:13:25", "remaining_time": "0:12:31", "throughput": 3997.08, "total_tokens": 3218112} |
| {"current_steps": 5155, "total_steps": 9960, "loss": 0.1533, "lr": 5.566538388006351e-06, "epoch": 10.35140562248996, "percentage": 51.76, "elapsed_time": "0:13:25", "remaining_time": "0:12:31", "throughput": 3997.32, "total_tokens": 3220992} |
| {"current_steps": 5160, "total_steps": 9960, "loss": 0.0048, "lr": 5.557832255356677e-06, "epoch": 10.36144578313253, "percentage": 51.81, "elapsed_time": "0:13:26", "remaining_time": "0:12:30", "throughput": 3997.65, "total_tokens": 3224128} |
| {"current_steps": 5165, "total_steps": 9960, "loss": 0.1247, "lr": 5.549124409777185e-06, "epoch": 10.3714859437751, "percentage": 51.86, "elapsed_time": "0:13:27", "remaining_time": "0:12:29", "throughput": 3998.11, "total_tokens": 3227648} |
| {"current_steps": 5170, "total_steps": 9960, "loss": 0.0086, "lr": 5.540414878006965e-06, "epoch": 10.38152610441767, "percentage": 51.91, "elapsed_time": "0:13:28", "remaining_time": "0:12:28", "throughput": 3998.42, "total_tokens": 3230848} |
| {"current_steps": 5175, "total_steps": 9960, "loss": 0.0039, "lr": 5.5317036867902885e-06, "epoch": 10.391566265060241, "percentage": 51.96, "elapsed_time": "0:13:28", "remaining_time": "0:12:27", "throughput": 3998.91, "total_tokens": 3234656} |
| {"current_steps": 5180, "total_steps": 9960, "loss": 0.0651, "lr": 5.52299086287652e-06, "epoch": 10.401606425702811, "percentage": 52.01, "elapsed_time": "0:13:29", "remaining_time": "0:12:27", "throughput": 3999.08, "total_tokens": 3237760} |
| {"current_steps": 5185, "total_steps": 9960, "loss": 0.0379, "lr": 5.514276433020044e-06, "epoch": 10.411646586345382, "percentage": 52.06, "elapsed_time": "0:13:30", "remaining_time": "0:12:26", "throughput": 3999.26, "total_tokens": 3240928} |
| {"current_steps": 5190, "total_steps": 9960, "loss": 0.0041, "lr": 5.505560423980164e-06, "epoch": 10.421686746987952, "percentage": 52.11, "elapsed_time": "0:13:31", "remaining_time": "0:12:25", "throughput": 3999.59, "total_tokens": 3244512} |
| {"current_steps": 5195, "total_steps": 9960, "loss": 0.1364, "lr": 5.496842862521046e-06, "epoch": 10.431726907630521, "percentage": 52.16, "elapsed_time": "0:13:31", "remaining_time": "0:12:24", "throughput": 3999.75, "total_tokens": 3247488} |
| {"current_steps": 5200, "total_steps": 9960, "loss": 0.1852, "lr": 5.4881237754116135e-06, "epoch": 10.441767068273093, "percentage": 52.21, "elapsed_time": "0:13:32", "remaining_time": "0:12:23", "throughput": 3999.86, "total_tokens": 3249952} |
| {"current_steps": 5205, "total_steps": 9960, "loss": 0.2229, "lr": 5.479403189425481e-06, "epoch": 10.451807228915662, "percentage": 52.26, "elapsed_time": "0:13:33", "remaining_time": "0:12:22", "throughput": 4000.1, "total_tokens": 3253248} |
| {"current_steps": 5210, "total_steps": 9960, "loss": 0.0127, "lr": 5.4706811313408616e-06, "epoch": 10.461847389558233, "percentage": 52.31, "elapsed_time": "0:13:33", "remaining_time": "0:12:22", "throughput": 4000.18, "total_tokens": 3255808} |
| {"current_steps": 5215, "total_steps": 9960, "loss": 0.0817, "lr": 5.461957627940489e-06, "epoch": 10.471887550200803, "percentage": 52.36, "elapsed_time": "0:13:34", "remaining_time": "0:12:21", "throughput": 4000.38, "total_tokens": 3259008} |
| {"current_steps": 5220, "total_steps": 9960, "loss": 0.1075, "lr": 5.453232706011539e-06, "epoch": 10.481927710843374, "percentage": 52.41, "elapsed_time": "0:13:35", "remaining_time": "0:12:20", "throughput": 4000.68, "total_tokens": 3262208} |
| {"current_steps": 5225, "total_steps": 9960, "loss": 0.0785, "lr": 5.44450639234554e-06, "epoch": 10.491967871485944, "percentage": 52.46, "elapsed_time": "0:13:36", "remaining_time": "0:12:19", "throughput": 4000.9, "total_tokens": 3265216} |
| {"current_steps": 5230, "total_steps": 9960, "loss": 0.0788, "lr": 5.435778713738292e-06, "epoch": 10.502008032128515, "percentage": 52.51, "elapsed_time": "0:13:36", "remaining_time": "0:12:18", "throughput": 4000.94, "total_tokens": 3267936} |
| {"current_steps": 5235, "total_steps": 9960, "loss": 0.0193, "lr": 5.427049696989792e-06, "epoch": 10.512048192771084, "percentage": 52.56, "elapsed_time": "0:13:37", "remaining_time": "0:12:17", "throughput": 4001.31, "total_tokens": 3271552} |
| {"current_steps": 5240, "total_steps": 9960, "loss": 0.1446, "lr": 5.418319368904137e-06, "epoch": 10.522088353413654, "percentage": 52.61, "elapsed_time": "0:13:38", "remaining_time": "0:12:17", "throughput": 4001.54, "total_tokens": 3274304} |
| {"current_steps": 5245, "total_steps": 9960, "loss": 0.1745, "lr": 5.409587756289462e-06, "epoch": 10.532128514056225, "percentage": 52.66, "elapsed_time": "0:13:38", "remaining_time": "0:12:16", "throughput": 4001.73, "total_tokens": 3277056} |
| {"current_steps": 5250, "total_steps": 9960, "loss": 0.025, "lr": 5.40085488595784e-06, "epoch": 10.542168674698795, "percentage": 52.71, "elapsed_time": "0:13:39", "remaining_time": "0:12:15", "throughput": 4001.98, "total_tokens": 3280448} |
| {"current_steps": 5255, "total_steps": 9960, "loss": 0.068, "lr": 5.392120784725206e-06, "epoch": 10.552208835341366, "percentage": 52.76, "elapsed_time": "0:13:40", "remaining_time": "0:12:14", "throughput": 4002.37, "total_tokens": 3284672} |
| {"current_steps": 5260, "total_steps": 9960, "loss": 0.055, "lr": 5.383385479411276e-06, "epoch": 10.562248995983936, "percentage": 52.81, "elapsed_time": "0:13:41", "remaining_time": "0:12:13", "throughput": 4002.44, "total_tokens": 3287648} |
| {"current_steps": 5265, "total_steps": 9960, "loss": 0.0597, "lr": 5.374648996839462e-06, "epoch": 10.572289156626507, "percentage": 52.86, "elapsed_time": "0:13:42", "remaining_time": "0:12:13", "throughput": 4002.71, "total_tokens": 3291040} |
| {"current_steps": 5270, "total_steps": 9960, "loss": 0.0567, "lr": 5.3659113638367936e-06, "epoch": 10.582329317269076, "percentage": 52.91, "elapsed_time": "0:13:42", "remaining_time": "0:12:12", "throughput": 4002.69, "total_tokens": 3293536} |
| {"current_steps": 5275, "total_steps": 9960, "loss": 0.0485, "lr": 5.357172607233831e-06, "epoch": 10.592369477911646, "percentage": 52.96, "elapsed_time": "0:13:43", "remaining_time": "0:12:11", "throughput": 4002.93, "total_tokens": 3296704} |
| {"current_steps": 5280, "total_steps": 9960, "loss": 0.0804, "lr": 5.348432753864582e-06, "epoch": 10.602409638554217, "percentage": 53.01, "elapsed_time": "0:13:44", "remaining_time": "0:12:10", "throughput": 4003.1, "total_tokens": 3299744} |
| {"current_steps": 5285, "total_steps": 9960, "loss": 0.2024, "lr": 5.339691830566428e-06, "epoch": 10.612449799196787, "percentage": 53.06, "elapsed_time": "0:13:44", "remaining_time": "0:12:09", "throughput": 4003.3, "total_tokens": 3302432} |
| {"current_steps": 5290, "total_steps": 9960, "loss": 0.0596, "lr": 5.330949864180034e-06, "epoch": 10.622489959839358, "percentage": 53.11, "elapsed_time": "0:13:45", "remaining_time": "0:12:08", "throughput": 4003.37, "total_tokens": 3305760} |
| {"current_steps": 5295, "total_steps": 9960, "loss": 0.0486, "lr": 5.322206881549266e-06, "epoch": 10.632530120481928, "percentage": 53.16, "elapsed_time": "0:13:46", "remaining_time": "0:12:08", "throughput": 4003.72, "total_tokens": 3309312} |
| {"current_steps": 5300, "total_steps": 9960, "loss": 0.0613, "lr": 5.313462909521111e-06, "epoch": 10.642570281124499, "percentage": 53.21, "elapsed_time": "0:13:47", "remaining_time": "0:12:07", "throughput": 4003.81, "total_tokens": 3312224} |
| {"current_steps": 5305, "total_steps": 9960, "loss": 0.1876, "lr": 5.304717974945596e-06, "epoch": 10.652610441767068, "percentage": 53.26, "elapsed_time": "0:13:47", "remaining_time": "0:12:06", "throughput": 4004.01, "total_tokens": 3314912} |
| {"current_steps": 5310, "total_steps": 9960, "loss": 0.1077, "lr": 5.2959721046757004e-06, "epoch": 10.662650602409638, "percentage": 53.31, "elapsed_time": "0:13:48", "remaining_time": "0:12:05", "throughput": 4004.2, "total_tokens": 3317824} |
| {"current_steps": 5315, "total_steps": 9960, "loss": 0.0748, "lr": 5.287225325567281e-06, "epoch": 10.67269076305221, "percentage": 53.36, "elapsed_time": "0:13:49", "remaining_time": "0:12:04", "throughput": 4004.45, "total_tokens": 3321216} |
| {"current_steps": 5320, "total_steps": 9960, "loss": 0.0298, "lr": 5.2784776644789825e-06, "epoch": 10.682730923694779, "percentage": 53.41, "elapsed_time": "0:13:50", "remaining_time": "0:12:04", "throughput": 4004.76, "total_tokens": 3324640} |
| {"current_steps": 5325, "total_steps": 9960, "loss": 0.1266, "lr": 5.269729148272158e-06, "epoch": 10.69277108433735, "percentage": 53.46, "elapsed_time": "0:13:50", "remaining_time": "0:12:03", "throughput": 4004.72, "total_tokens": 3327232} |
| {"current_steps": 5330, "total_steps": 9960, "loss": 0.0633, "lr": 5.260979803810787e-06, "epoch": 10.70281124497992, "percentage": 53.51, "elapsed_time": "0:13:51", "remaining_time": "0:12:02", "throughput": 4004.86, "total_tokens": 3330304} |
| {"current_steps": 5335, "total_steps": 9960, "loss": 0.0565, "lr": 5.252229657961394e-06, "epoch": 10.71285140562249, "percentage": 53.56, "elapsed_time": "0:13:52", "remaining_time": "0:12:01", "throughput": 4005.13, "total_tokens": 3333472} |
| {"current_steps": 5340, "total_steps": 9960, "loss": 0.0313, "lr": 5.2434787375929605e-06, "epoch": 10.72289156626506, "percentage": 53.61, "elapsed_time": "0:13:53", "remaining_time": "0:12:00", "throughput": 4005.32, "total_tokens": 3336704} |
| {"current_steps": 5345, "total_steps": 9960, "loss": 0.0202, "lr": 5.2347270695768505e-06, "epoch": 10.73293172690763, "percentage": 53.66, "elapsed_time": "0:13:53", "remaining_time": "0:11:59", "throughput": 4005.44, "total_tokens": 3339392} |
| {"current_steps": 5350, "total_steps": 9960, "loss": 0.1127, "lr": 5.225974680786721e-06, "epoch": 10.742971887550201, "percentage": 53.71, "elapsed_time": "0:13:54", "remaining_time": "0:11:59", "throughput": 4005.62, "total_tokens": 3342400} |
| {"current_steps": 5355, "total_steps": 9960, "loss": 0.1213, "lr": 5.217221598098444e-06, "epoch": 10.75301204819277, "percentage": 53.77, "elapsed_time": "0:13:55", "remaining_time": "0:11:58", "throughput": 4005.89, "total_tokens": 3345792} |
| {"current_steps": 5360, "total_steps": 9960, "loss": 0.1532, "lr": 5.208467848390018e-06, "epoch": 10.763052208835342, "percentage": 53.82, "elapsed_time": "0:13:56", "remaining_time": "0:11:57", "throughput": 4006.15, "total_tokens": 3349248} |
| {"current_steps": 5365, "total_steps": 9960, "loss": 0.0453, "lr": 5.199713458541495e-06, "epoch": 10.773092369477911, "percentage": 53.87, "elapsed_time": "0:13:56", "remaining_time": "0:11:56", "throughput": 4006.49, "total_tokens": 3352384} |
| {"current_steps": 5370, "total_steps": 9960, "loss": 0.0667, "lr": 5.190958455434891e-06, "epoch": 10.783132530120483, "percentage": 53.92, "elapsed_time": "0:13:57", "remaining_time": "0:11:55", "throughput": 4006.75, "total_tokens": 3355648} |
| {"current_steps": 5375, "total_steps": 9960, "loss": 0.1253, "lr": 5.182202865954105e-06, "epoch": 10.793172690763052, "percentage": 53.97, "elapsed_time": "0:13:58", "remaining_time": "0:11:54", "throughput": 4006.84, "total_tokens": 3358400} |
| {"current_steps": 5380, "total_steps": 9960, "loss": 0.0201, "lr": 5.173446716984837e-06, "epoch": 10.803212851405622, "percentage": 54.02, "elapsed_time": "0:13:58", "remaining_time": "0:11:54", "throughput": 4006.83, "total_tokens": 3361408} |
| {"current_steps": 5385, "total_steps": 9960, "loss": 0.0566, "lr": 5.164690035414501e-06, "epoch": 10.813253012048193, "percentage": 54.07, "elapsed_time": "0:13:59", "remaining_time": "0:11:53", "throughput": 4007.23, "total_tokens": 3365216} |
| {"current_steps": 5390, "total_steps": 9960, "loss": 0.0725, "lr": 5.155932848132155e-06, "epoch": 10.823293172690763, "percentage": 54.12, "elapsed_time": "0:14:00", "remaining_time": "0:11:52", "throughput": 4007.48, "total_tokens": 3368736} |
| {"current_steps": 5395, "total_steps": 9960, "loss": 0.0465, "lr": 5.1471751820284e-06, "epoch": 10.833333333333334, "percentage": 54.17, "elapsed_time": "0:14:01", "remaining_time": "0:11:51", "throughput": 4007.78, "total_tokens": 3372096} |
| {"current_steps": 5400, "total_steps": 9960, "loss": 0.0601, "lr": 5.138417063995315e-06, "epoch": 10.843373493975903, "percentage": 54.22, "elapsed_time": "0:14:02", "remaining_time": "0:11:51", "throughput": 4007.94, "total_tokens": 3375296} |
| {"current_steps": 5405, "total_steps": 9960, "loss": 0.0839, "lr": 5.129658520926361e-06, "epoch": 10.853413654618475, "percentage": 54.27, "elapsed_time": "0:14:02", "remaining_time": "0:11:50", "throughput": 4008.25, "total_tokens": 3378880} |
| {"current_steps": 5410, "total_steps": 9960, "loss": 0.162, "lr": 5.1208995797163085e-06, "epoch": 10.863453815261044, "percentage": 54.32, "elapsed_time": "0:14:03", "remaining_time": "0:11:49", "throughput": 4008.48, "total_tokens": 3381600} |
| {"current_steps": 5415, "total_steps": 9960, "loss": 0.0322, "lr": 5.112140267261151e-06, "epoch": 10.873493975903614, "percentage": 54.37, "elapsed_time": "0:14:04", "remaining_time": "0:11:48", "throughput": 4008.68, "total_tokens": 3385024} |
| {"current_steps": 5420, "total_steps": 9960, "loss": 0.1112, "lr": 5.103380610458016e-06, "epoch": 10.883534136546185, "percentage": 54.42, "elapsed_time": "0:14:05", "remaining_time": "0:11:47", "throughput": 4008.82, "total_tokens": 3387744} |
| {"current_steps": 5425, "total_steps": 9960, "loss": 0.1092, "lr": 5.094620636205096e-06, "epoch": 10.893574297188755, "percentage": 54.47, "elapsed_time": "0:14:05", "remaining_time": "0:11:46", "throughput": 4008.86, "total_tokens": 3390464} |
| {"current_steps": 5430, "total_steps": 9960, "loss": 0.1259, "lr": 5.085860371401552e-06, "epoch": 10.903614457831326, "percentage": 54.52, "elapsed_time": "0:14:06", "remaining_time": "0:11:46", "throughput": 4009.05, "total_tokens": 3393312} |
| {"current_steps": 5435, "total_steps": 9960, "loss": 0.1288, "lr": 5.077099842947441e-06, "epoch": 10.913654618473895, "percentage": 54.57, "elapsed_time": "0:14:07", "remaining_time": "0:11:45", "throughput": 4009.31, "total_tokens": 3396704} |
| {"current_steps": 5440, "total_steps": 9960, "loss": 0.0167, "lr": 5.068339077743629e-06, "epoch": 10.923694779116467, "percentage": 54.62, "elapsed_time": "0:14:07", "remaining_time": "0:11:44", "throughput": 4009.35, "total_tokens": 3399264} |
| {"current_steps": 5445, "total_steps": 9960, "loss": 0.0114, "lr": 5.059578102691707e-06, "epoch": 10.933734939759036, "percentage": 54.67, "elapsed_time": "0:14:08", "remaining_time": "0:11:43", "throughput": 4009.4, "total_tokens": 3402144} |
| {"current_steps": 5450, "total_steps": 9960, "loss": 0.002, "lr": 5.050816944693913e-06, "epoch": 10.943775100401606, "percentage": 54.72, "elapsed_time": "0:14:09", "remaining_time": "0:11:42", "throughput": 4009.43, "total_tokens": 3404608} |
| {"current_steps": 5455, "total_steps": 9960, "loss": 0.0584, "lr": 5.042055630653042e-06, "epoch": 10.953815261044177, "percentage": 54.77, "elapsed_time": "0:14:09", "remaining_time": "0:11:41", "throughput": 4009.63, "total_tokens": 3407584} |
| {"current_steps": 5460, "total_steps": 9960, "loss": 0.0499, "lr": 5.0332941874723775e-06, "epoch": 10.963855421686747, "percentage": 54.82, "elapsed_time": "0:14:10", "remaining_time": "0:11:41", "throughput": 4009.96, "total_tokens": 3410848} |
| {"current_steps": 5465, "total_steps": 9960, "loss": 0.0993, "lr": 5.02453264205559e-06, "epoch": 10.973895582329318, "percentage": 54.87, "elapsed_time": "0:14:11", "remaining_time": "0:11:40", "throughput": 4010.15, "total_tokens": 3413792} |
| {"current_steps": 5470, "total_steps": 9960, "loss": 0.091, "lr": 5.01577102130667e-06, "epoch": 10.983935742971887, "percentage": 54.92, "elapsed_time": "0:14:12", "remaining_time": "0:11:39", "throughput": 4010.33, "total_tokens": 3416896} |
| {"current_steps": 5475, "total_steps": 9960, "loss": 0.0081, "lr": 5.007009352129835e-06, "epoch": 10.993975903614459, "percentage": 54.97, "elapsed_time": "0:14:12", "remaining_time": "0:11:38", "throughput": 4010.47, "total_tokens": 3419712} |
| {"current_steps": 5480, "total_steps": 9960, "loss": 0.0095, "lr": 4.998247661429453e-06, "epoch": 11.004016064257028, "percentage": 55.02, "elapsed_time": "0:14:13", "remaining_time": "0:11:37", "throughput": 4010.34, "total_tokens": 3423168} |
| {"current_steps": 5485, "total_steps": 9960, "loss": 0.0066, "lr": 4.98948597610996e-06, "epoch": 11.014056224899598, "percentage": 55.07, "elapsed_time": "0:14:14", "remaining_time": "0:11:37", "throughput": 4010.71, "total_tokens": 3426688} |
| {"current_steps": 5490, "total_steps": 9960, "loss": 0.1703, "lr": 4.980724323075772e-06, "epoch": 11.024096385542169, "percentage": 55.12, "elapsed_time": "0:14:15", "remaining_time": "0:11:36", "throughput": 4010.91, "total_tokens": 3429952} |
| {"current_steps": 5495, "total_steps": 9960, "loss": 0.006, "lr": 4.971962729231211e-06, "epoch": 11.034136546184738, "percentage": 55.17, "elapsed_time": "0:14:15", "remaining_time": "0:11:35", "throughput": 4011.21, "total_tokens": 3433088} |
| {"current_steps": 5500, "total_steps": 9960, "loss": 0.0025, "lr": 4.9632012214804086e-06, "epoch": 11.04417670682731, "percentage": 55.22, "elapsed_time": "0:14:16", "remaining_time": "0:11:34", "throughput": 4011.27, "total_tokens": 3435840} |
| {"current_steps": 5505, "total_steps": 9960, "loss": 0.0105, "lr": 4.954439826727243e-06, "epoch": 11.05421686746988, "percentage": 55.27, "elapsed_time": "0:14:17", "remaining_time": "0:11:33", "throughput": 4011.49, "total_tokens": 3438976} |
| {"current_steps": 5510, "total_steps": 9960, "loss": 0.0452, "lr": 4.945678571875234e-06, "epoch": 11.06425702811245, "percentage": 55.32, "elapsed_time": "0:14:18", "remaining_time": "0:11:32", "throughput": 4011.67, "total_tokens": 3442208} |
| {"current_steps": 5515, "total_steps": 9960, "loss": 0.007, "lr": 4.936917483827483e-06, "epoch": 11.07429718875502, "percentage": 55.37, "elapsed_time": "0:14:18", "remaining_time": "0:11:32", "throughput": 4011.98, "total_tokens": 3445632} |
| {"current_steps": 5520, "total_steps": 9960, "loss": 0.1426, "lr": 4.928156589486571e-06, "epoch": 11.08433734939759, "percentage": 55.42, "elapsed_time": "0:14:19", "remaining_time": "0:11:31", "throughput": 4012.24, "total_tokens": 3448608} |
| {"current_steps": 5525, "total_steps": 9960, "loss": 0.0008, "lr": 4.919395915754486e-06, "epoch": 11.094377510040161, "percentage": 55.47, "elapsed_time": "0:14:20", "remaining_time": "0:11:30", "throughput": 4012.26, "total_tokens": 3451264} |
| {"current_steps": 5530, "total_steps": 9960, "loss": 0.0699, "lr": 4.910635489532543e-06, "epoch": 11.10441767068273, "percentage": 55.52, "elapsed_time": "0:14:20", "remaining_time": "0:11:29", "throughput": 4012.55, "total_tokens": 3454496} |
| {"current_steps": 5535, "total_steps": 9960, "loss": 0.1167, "lr": 4.901875337721289e-06, "epoch": 11.114457831325302, "percentage": 55.57, "elapsed_time": "0:14:21", "remaining_time": "0:11:28", "throughput": 4012.82, "total_tokens": 3458016} |
| {"current_steps": 5540, "total_steps": 9960, "loss": 0.0807, "lr": 4.893115487220434e-06, "epoch": 11.124497991967871, "percentage": 55.62, "elapsed_time": "0:14:22", "remaining_time": "0:11:28", "throughput": 4013.13, "total_tokens": 3461344} |
| {"current_steps": 5545, "total_steps": 9960, "loss": 0.1003, "lr": 4.884355964928767e-06, "epoch": 11.134538152610443, "percentage": 55.67, "elapsed_time": "0:14:23", "remaining_time": "0:11:27", "throughput": 4013.0, "total_tokens": 3463424} |
| {"current_steps": 5550, "total_steps": 9960, "loss": 0.0127, "lr": 4.875596797744056e-06, "epoch": 11.144578313253012, "percentage": 55.72, "elapsed_time": "0:14:23", "remaining_time": "0:11:26", "throughput": 4013.21, "total_tokens": 3466560} |
| {"current_steps": 5555, "total_steps": 9960, "loss": 0.1129, "lr": 4.866838012562993e-06, "epoch": 11.154618473895582, "percentage": 55.77, "elapsed_time": "0:14:24", "remaining_time": "0:11:25", "throughput": 4013.31, "total_tokens": 3469664} |
| {"current_steps": 5560, "total_steps": 9960, "loss": 0.0025, "lr": 4.858079636281086e-06, "epoch": 11.164658634538153, "percentage": 55.82, "elapsed_time": "0:14:25", "remaining_time": "0:11:24", "throughput": 4013.44, "total_tokens": 3472544} |
| {"current_steps": 5565, "total_steps": 9960, "loss": 0.0965, "lr": 4.8493216957925915e-06, "epoch": 11.174698795180722, "percentage": 55.87, "elapsed_time": "0:14:25", "remaining_time": "0:11:23", "throughput": 4013.43, "total_tokens": 3475072} |
| {"current_steps": 5570, "total_steps": 9960, "loss": 0.0605, "lr": 4.840564217990432e-06, "epoch": 11.184738955823294, "percentage": 55.92, "elapsed_time": "0:14:26", "remaining_time": "0:11:22", "throughput": 4013.59, "total_tokens": 3477984} |
| {"current_steps": 5575, "total_steps": 9960, "loss": 0.0005, "lr": 4.831807229766101e-06, "epoch": 11.194779116465863, "percentage": 55.97, "elapsed_time": "0:14:27", "remaining_time": "0:11:22", "throughput": 4013.93, "total_tokens": 3481152} |
| {"current_steps": 5580, "total_steps": 9960, "loss": 0.0041, "lr": 4.823050758009597e-06, "epoch": 11.204819277108435, "percentage": 56.02, "elapsed_time": "0:14:28", "remaining_time": "0:11:21", "throughput": 4014.37, "total_tokens": 3484800} |
| {"current_steps": 5585, "total_steps": 9960, "loss": 0.0005, "lr": 4.814294829609325e-06, "epoch": 11.214859437751004, "percentage": 56.07, "elapsed_time": "0:14:28", "remaining_time": "0:11:20", "throughput": 4014.53, "total_tokens": 3487776} |
| {"current_steps": 5590, "total_steps": 9960, "loss": 0.1039, "lr": 4.805539471452026e-06, "epoch": 11.224899598393574, "percentage": 56.12, "elapsed_time": "0:14:29", "remaining_time": "0:11:19", "throughput": 4014.92, "total_tokens": 3491552} |
| {"current_steps": 5595, "total_steps": 9960, "loss": 0.0078, "lr": 4.796784710422692e-06, "epoch": 11.234939759036145, "percentage": 56.17, "elapsed_time": "0:14:30", "remaining_time": "0:11:19", "throughput": 4015.31, "total_tokens": 3495296} |
| {"current_steps": 5600, "total_steps": 9960, "loss": 0.0828, "lr": 4.788030573404475e-06, "epoch": 11.244979919678714, "percentage": 56.22, "elapsed_time": "0:14:31", "remaining_time": "0:11:18", "throughput": 4015.49, "total_tokens": 3498208} |
| {"current_steps": 5605, "total_steps": 9960, "loss": 0.0301, "lr": 4.779277087278615e-06, "epoch": 11.255020080321286, "percentage": 56.28, "elapsed_time": "0:14:31", "remaining_time": "0:11:17", "throughput": 4015.66, "total_tokens": 3501472} |
| {"current_steps": 5610, "total_steps": 9960, "loss": 0.0149, "lr": 4.770524278924353e-06, "epoch": 11.265060240963855, "percentage": 56.33, "elapsed_time": "0:14:32", "remaining_time": "0:11:16", "throughput": 4015.83, "total_tokens": 3504352} |
| {"current_steps": 5615, "total_steps": 9960, "loss": 0.061, "lr": 4.761772175218848e-06, "epoch": 11.275100401606426, "percentage": 56.38, "elapsed_time": "0:14:33", "remaining_time": "0:11:15", "throughput": 4016.18, "total_tokens": 3507904} |
| {"current_steps": 5620, "total_steps": 9960, "loss": 0.0376, "lr": 4.753020803037098e-06, "epoch": 11.285140562248996, "percentage": 56.43, "elapsed_time": "0:14:34", "remaining_time": "0:11:15", "throughput": 4016.52, "total_tokens": 3511328} |
| {"current_steps": 5625, "total_steps": 9960, "loss": 0.0009, "lr": 4.744270189251848e-06, "epoch": 11.295180722891565, "percentage": 56.48, "elapsed_time": "0:14:34", "remaining_time": "0:11:14", "throughput": 4016.77, "total_tokens": 3514432} |
| {"current_steps": 5630, "total_steps": 9960, "loss": 0.073, "lr": 4.735520360733523e-06, "epoch": 11.305220883534137, "percentage": 56.53, "elapsed_time": "0:14:35", "remaining_time": "0:11:13", "throughput": 4016.96, "total_tokens": 3517824} |
| {"current_steps": 5635, "total_steps": 9960, "loss": 0.0801, "lr": 4.7267713443501274e-06, "epoch": 11.315261044176706, "percentage": 56.58, "elapsed_time": "0:14:36", "remaining_time": "0:11:12", "throughput": 4016.98, "total_tokens": 3520416} |
| {"current_steps": 5640, "total_steps": 9960, "loss": 0.1055, "lr": 4.718023166967181e-06, "epoch": 11.325301204819278, "percentage": 56.63, "elapsed_time": "0:14:37", "remaining_time": "0:11:11", "throughput": 4017.25, "total_tokens": 3523648} |
| {"current_steps": 5645, "total_steps": 9960, "loss": 0.0693, "lr": 4.7092758554476215e-06, "epoch": 11.335341365461847, "percentage": 56.68, "elapsed_time": "0:14:37", "remaining_time": "0:11:11", "throughput": 4017.42, "total_tokens": 3526624} |
| {"current_steps": 5650, "total_steps": 9960, "loss": 0.1391, "lr": 4.700529436651729e-06, "epoch": 11.345381526104418, "percentage": 56.73, "elapsed_time": "0:14:38", "remaining_time": "0:11:10", "throughput": 4017.74, "total_tokens": 3530080} |
| {"current_steps": 5655, "total_steps": 9960, "loss": 0.0008, "lr": 4.691783937437043e-06, "epoch": 11.355421686746988, "percentage": 56.78, "elapsed_time": "0:14:39", "remaining_time": "0:11:09", "throughput": 4017.9, "total_tokens": 3533184} |
| {"current_steps": 5660, "total_steps": 9960, "loss": 0.0281, "lr": 4.683039384658276e-06, "epoch": 11.365461847389557, "percentage": 56.83, "elapsed_time": "0:14:40", "remaining_time": "0:11:08", "throughput": 4018.12, "total_tokens": 3536608} |
| {"current_steps": 5665, "total_steps": 9960, "loss": 0.105, "lr": 4.67429580516724e-06, "epoch": 11.375502008032129, "percentage": 56.88, "elapsed_time": "0:14:40", "remaining_time": "0:11:07", "throughput": 4018.4, "total_tokens": 3539840} |
| {"current_steps": 5670, "total_steps": 9960, "loss": 0.0015, "lr": 4.665553225812758e-06, "epoch": 11.385542168674698, "percentage": 56.93, "elapsed_time": "0:14:41", "remaining_time": "0:11:06", "throughput": 4018.19, "total_tokens": 3541952} |
| {"current_steps": 5675, "total_steps": 9960, "loss": 0.1544, "lr": 4.656811673440572e-06, "epoch": 11.39558232931727, "percentage": 56.98, "elapsed_time": "0:14:42", "remaining_time": "0:11:06", "throughput": 4018.42, "total_tokens": 3544992} |
| {"current_steps": 5680, "total_steps": 9960, "loss": 0.0318, "lr": 4.648071174893285e-06, "epoch": 11.405622489959839, "percentage": 57.03, "elapsed_time": "0:14:42", "remaining_time": "0:11:05", "throughput": 4018.6, "total_tokens": 3547872} |
| {"current_steps": 5685, "total_steps": 9960, "loss": 0.0131, "lr": 4.6393317570102505e-06, "epoch": 11.41566265060241, "percentage": 57.08, "elapsed_time": "0:14:43", "remaining_time": "0:11:04", "throughput": 4018.85, "total_tokens": 3550880} |
| {"current_steps": 5690, "total_steps": 9960, "loss": 0.0499, "lr": 4.6305934466275145e-06, "epoch": 11.42570281124498, "percentage": 57.13, "elapsed_time": "0:14:44", "remaining_time": "0:11:03", "throughput": 4019.21, "total_tokens": 3554464} |
| {"current_steps": 5695, "total_steps": 9960, "loss": 0.0011, "lr": 4.6218562705777185e-06, "epoch": 11.43574297188755, "percentage": 57.18, "elapsed_time": "0:14:45", "remaining_time": "0:11:02", "throughput": 4019.43, "total_tokens": 3557344} |
| {"current_steps": 5700, "total_steps": 9960, "loss": 0.1489, "lr": 4.613120255690014e-06, "epoch": 11.44578313253012, "percentage": 57.23, "elapsed_time": "0:14:45", "remaining_time": "0:11:01", "throughput": 4019.53, "total_tokens": 3560096} |
| {"current_steps": 5705, "total_steps": 9960, "loss": 0.0229, "lr": 4.604385428789997e-06, "epoch": 11.45582329317269, "percentage": 57.28, "elapsed_time": "0:14:46", "remaining_time": "0:11:01", "throughput": 4019.54, "total_tokens": 3562560} |
| {"current_steps": 5710, "total_steps": 9960, "loss": 0.0591, "lr": 4.595651816699612e-06, "epoch": 11.465863453815262, "percentage": 57.33, "elapsed_time": "0:14:47", "remaining_time": "0:11:00", "throughput": 4019.69, "total_tokens": 3565472} |
| {"current_steps": 5715, "total_steps": 9960, "loss": 0.0946, "lr": 4.586919446237071e-06, "epoch": 11.475903614457831, "percentage": 57.38, "elapsed_time": "0:14:47", "remaining_time": "0:10:59", "throughput": 4019.87, "total_tokens": 3568288} |
| {"current_steps": 5720, "total_steps": 9960, "loss": 0.0236, "lr": 4.578188344216777e-06, "epoch": 11.485943775100402, "percentage": 57.43, "elapsed_time": "0:14:48", "remaining_time": "0:10:58", "throughput": 4020.08, "total_tokens": 3571712} |
| {"current_steps": 5725, "total_steps": 9960, "loss": 0.0604, "lr": 4.5694585374492314e-06, "epoch": 11.495983935742972, "percentage": 57.48, "elapsed_time": "0:14:49", "remaining_time": "0:10:57", "throughput": 4020.22, "total_tokens": 3574528} |
| {"current_steps": 5730, "total_steps": 9960, "loss": 0.0027, "lr": 4.560730052740967e-06, "epoch": 11.506024096385541, "percentage": 57.53, "elapsed_time": "0:14:49", "remaining_time": "0:10:56", "throughput": 4020.38, "total_tokens": 3577504} |
| {"current_steps": 5735, "total_steps": 9960, "loss": 0.0029, "lr": 4.552002916894454e-06, "epoch": 11.516064257028113, "percentage": 57.58, "elapsed_time": "0:14:50", "remaining_time": "0:10:56", "throughput": 4020.71, "total_tokens": 3581024} |
| {"current_steps": 5740, "total_steps": 9960, "loss": 0.0853, "lr": 4.543277156708013e-06, "epoch": 11.526104417670682, "percentage": 57.63, "elapsed_time": "0:14:51", "remaining_time": "0:10:55", "throughput": 4020.71, "total_tokens": 3583552} |
| {"current_steps": 5745, "total_steps": 9960, "loss": 0.0414, "lr": 4.534552798975755e-06, "epoch": 11.536144578313253, "percentage": 57.68, "elapsed_time": "0:14:52", "remaining_time": "0:10:54", "throughput": 4021.04, "total_tokens": 3587136} |
| {"current_steps": 5750, "total_steps": 9960, "loss": 0.0038, "lr": 4.525829870487468e-06, "epoch": 11.546184738955823, "percentage": 57.73, "elapsed_time": "0:14:52", "remaining_time": "0:10:53", "throughput": 4021.18, "total_tokens": 3590368} |
| {"current_steps": 5755, "total_steps": 9960, "loss": 0.0486, "lr": 4.517108398028566e-06, "epoch": 11.556224899598394, "percentage": 57.78, "elapsed_time": "0:14:53", "remaining_time": "0:10:52", "throughput": 4021.22, "total_tokens": 3592896} |
| {"current_steps": 5760, "total_steps": 9960, "loss": 0.0376, "lr": 4.508388408379985e-06, "epoch": 11.566265060240964, "percentage": 57.83, "elapsed_time": "0:14:54", "remaining_time": "0:10:51", "throughput": 4021.22, "total_tokens": 3595424} |
| {"current_steps": 5765, "total_steps": 9960, "loss": 0.0384, "lr": 4.499669928318105e-06, "epoch": 11.576305220883533, "percentage": 57.88, "elapsed_time": "0:14:54", "remaining_time": "0:10:51", "throughput": 4021.5, "total_tokens": 3599136} |
| {"current_steps": 5770, "total_steps": 9960, "loss": 0.0472, "lr": 4.490952984614676e-06, "epoch": 11.586345381526105, "percentage": 57.93, "elapsed_time": "0:14:55", "remaining_time": "0:10:50", "throughput": 4021.77, "total_tokens": 3602496} |
| {"current_steps": 5775, "total_steps": 9960, "loss": 0.0978, "lr": 4.482237604036729e-06, "epoch": 11.596385542168674, "percentage": 57.98, "elapsed_time": "0:14:56", "remaining_time": "0:10:49", "throughput": 4022.0, "total_tokens": 3605824} |
| {"current_steps": 5780, "total_steps": 9960, "loss": 0.1101, "lr": 4.473523813346491e-06, "epoch": 11.606425702811245, "percentage": 58.03, "elapsed_time": "0:14:57", "remaining_time": "0:10:48", "throughput": 4022.1, "total_tokens": 3608544} |
| {"current_steps": 5785, "total_steps": 9960, "loss": 0.0407, "lr": 4.464811639301314e-06, "epoch": 11.616465863453815, "percentage": 58.08, "elapsed_time": "0:14:57", "remaining_time": "0:10:47", "throughput": 4022.2, "total_tokens": 3611328} |
| {"current_steps": 5790, "total_steps": 9960, "loss": 0.065, "lr": 4.456101108653579e-06, "epoch": 11.626506024096386, "percentage": 58.13, "elapsed_time": "0:14:58", "remaining_time": "0:10:47", "throughput": 4022.15, "total_tokens": 3613376} |
| {"current_steps": 5795, "total_steps": 9960, "loss": 0.1865, "lr": 4.447392248150627e-06, "epoch": 11.636546184738956, "percentage": 58.18, "elapsed_time": "0:14:59", "remaining_time": "0:10:46", "throughput": 4022.16, "total_tokens": 3616032} |
| {"current_steps": 5800, "total_steps": 9960, "loss": 0.022, "lr": 4.438685084534663e-06, "epoch": 11.646586345381525, "percentage": 58.23, "elapsed_time": "0:14:59", "remaining_time": "0:10:45", "throughput": 4022.61, "total_tokens": 3619552} |
| {"current_steps": 5805, "total_steps": 9960, "loss": 0.0459, "lr": 4.429979644542689e-06, "epoch": 11.656626506024097, "percentage": 58.28, "elapsed_time": "0:15:00", "remaining_time": "0:10:44", "throughput": 4022.85, "total_tokens": 3623200} |
| {"current_steps": 5810, "total_steps": 9960, "loss": 0.0709, "lr": 4.421275954906409e-06, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:15:01", "remaining_time": "0:10:43", "throughput": 4023.08, "total_tokens": 3626208} |
| {"current_steps": 5815, "total_steps": 9960, "loss": 0.0649, "lr": 4.412574042352156e-06, "epoch": 11.676706827309237, "percentage": 58.38, "elapsed_time": "0:15:02", "remaining_time": "0:10:43", "throughput": 4023.33, "total_tokens": 3629632} |
| {"current_steps": 5820, "total_steps": 9960, "loss": 0.0237, "lr": 4.403873933600803e-06, "epoch": 11.686746987951807, "percentage": 58.43, "elapsed_time": "0:15:02", "remaining_time": "0:10:42", "throughput": 4023.46, "total_tokens": 3632224} |
| {"current_steps": 5825, "total_steps": 9960, "loss": 0.0045, "lr": 4.395175655367682e-06, "epoch": 11.696787148594378, "percentage": 58.48, "elapsed_time": "0:15:03", "remaining_time": "0:10:41", "throughput": 4023.71, "total_tokens": 3635424} |
| {"current_steps": 5830, "total_steps": 9960, "loss": 0.1638, "lr": 4.386479234362512e-06, "epoch": 11.706827309236948, "percentage": 58.53, "elapsed_time": "0:15:04", "remaining_time": "0:10:40", "throughput": 4023.86, "total_tokens": 3638560} |
| {"current_steps": 5835, "total_steps": 9960, "loss": 0.0549, "lr": 4.377784697289304e-06, "epoch": 11.716867469879517, "percentage": 58.58, "elapsed_time": "0:15:05", "remaining_time": "0:10:39", "throughput": 4024.34, "total_tokens": 3642560} |
| {"current_steps": 5840, "total_steps": 9960, "loss": 0.0198, "lr": 4.36909207084628e-06, "epoch": 11.726907630522089, "percentage": 58.63, "elapsed_time": "0:15:05", "remaining_time": "0:10:39", "throughput": 4024.72, "total_tokens": 3646016} |
| {"current_steps": 5845, "total_steps": 9960, "loss": 0.0741, "lr": 4.360401381725806e-06, "epoch": 11.736947791164658, "percentage": 58.68, "elapsed_time": "0:15:06", "remaining_time": "0:10:38", "throughput": 4024.79, "total_tokens": 3649152} |
| {"current_steps": 5850, "total_steps": 9960, "loss": 0.0736, "lr": 4.3517126566142864e-06, "epoch": 11.74698795180723, "percentage": 58.73, "elapsed_time": "0:15:07", "remaining_time": "0:10:37", "throughput": 4024.93, "total_tokens": 3652096} |
| {"current_steps": 5855, "total_steps": 9960, "loss": 0.2828, "lr": 4.343025922192104e-06, "epoch": 11.757028112449799, "percentage": 58.79, "elapsed_time": "0:15:08", "remaining_time": "0:10:36", "throughput": 4025.22, "total_tokens": 3655776} |
| {"current_steps": 5860, "total_steps": 9960, "loss": 0.0433, "lr": 4.334341205133527e-06, "epoch": 11.76706827309237, "percentage": 58.84, "elapsed_time": "0:15:08", "remaining_time": "0:10:35", "throughput": 4025.3, "total_tokens": 3658656} |
| {"current_steps": 5865, "total_steps": 9960, "loss": 0.0372, "lr": 4.325658532106623e-06, "epoch": 11.77710843373494, "percentage": 58.89, "elapsed_time": "0:15:09", "remaining_time": "0:10:35", "throughput": 4025.38, "total_tokens": 3661440} |
| {"current_steps": 5870, "total_steps": 9960, "loss": 0.0765, "lr": 4.316977929773191e-06, "epoch": 11.78714859437751, "percentage": 58.94, "elapsed_time": "0:15:10", "remaining_time": "0:10:34", "throughput": 4025.54, "total_tokens": 3664288} |
| {"current_steps": 5875, "total_steps": 9960, "loss": 0.0367, "lr": 4.308299424788667e-06, "epoch": 11.79718875502008, "percentage": 58.99, "elapsed_time": "0:15:11", "remaining_time": "0:10:33", "throughput": 4025.85, "total_tokens": 3667744} |
| {"current_steps": 5880, "total_steps": 9960, "loss": 0.0453, "lr": 4.299623043802046e-06, "epoch": 11.80722891566265, "percentage": 59.04, "elapsed_time": "0:15:11", "remaining_time": "0:10:32", "throughput": 4025.96, "total_tokens": 3670624} |
| {"current_steps": 5885, "total_steps": 9960, "loss": 0.0436, "lr": 4.2909488134558086e-06, "epoch": 11.817269076305221, "percentage": 59.09, "elapsed_time": "0:15:12", "remaining_time": "0:10:31", "throughput": 4026.1, "total_tokens": 3673600} |
| {"current_steps": 5890, "total_steps": 9960, "loss": 0.0026, "lr": 4.2822767603858185e-06, "epoch": 11.82730923694779, "percentage": 59.14, "elapsed_time": "0:15:13", "remaining_time": "0:10:31", "throughput": 4026.49, "total_tokens": 3676928} |
| {"current_steps": 5895, "total_steps": 9960, "loss": 0.1138, "lr": 4.2736069112212656e-06, "epoch": 11.837349397590362, "percentage": 59.19, "elapsed_time": "0:15:13", "remaining_time": "0:10:30", "throughput": 4026.53, "total_tokens": 3680064} |
| {"current_steps": 5900, "total_steps": 9960, "loss": 0.0151, "lr": 4.264939292584565e-06, "epoch": 11.847389558232932, "percentage": 59.24, "elapsed_time": "0:15:14", "remaining_time": "0:10:29", "throughput": 4026.64, "total_tokens": 3683040} |
| {"current_steps": 5905, "total_steps": 9960, "loss": 0.0137, "lr": 4.256273931091284e-06, "epoch": 11.857429718875501, "percentage": 59.29, "elapsed_time": "0:15:15", "remaining_time": "0:10:28", "throughput": 4026.9, "total_tokens": 3686400} |
| {"current_steps": 5910, "total_steps": 9960, "loss": 0.0368, "lr": 4.247610853350063e-06, "epoch": 11.867469879518072, "percentage": 59.34, "elapsed_time": "0:15:16", "remaining_time": "0:10:27", "throughput": 4026.95, "total_tokens": 3689216} |
| {"current_steps": 5915, "total_steps": 9960, "loss": 0.0593, "lr": 4.238950085962522e-06, "epoch": 11.877510040160642, "percentage": 59.39, "elapsed_time": "0:15:16", "remaining_time": "0:10:26", "throughput": 4027.1, "total_tokens": 3692288} |
| {"current_steps": 5920, "total_steps": 9960, "loss": 0.028, "lr": 4.230291655523197e-06, "epoch": 11.887550200803213, "percentage": 59.44, "elapsed_time": "0:15:17", "remaining_time": "0:10:26", "throughput": 4027.57, "total_tokens": 3696288} |
| {"current_steps": 5925, "total_steps": 9960, "loss": 0.0461, "lr": 4.2216355886194355e-06, "epoch": 11.897590361445783, "percentage": 59.49, "elapsed_time": "0:15:18", "remaining_time": "0:10:25", "throughput": 4027.71, "total_tokens": 3699456} |
| {"current_steps": 5930, "total_steps": 9960, "loss": 0.0741, "lr": 4.212981911831338e-06, "epoch": 11.907630522088354, "percentage": 59.54, "elapsed_time": "0:15:19", "remaining_time": "0:10:24", "throughput": 4027.97, "total_tokens": 3703232} |
| {"current_steps": 5935, "total_steps": 9960, "loss": 0.0208, "lr": 4.204330651731662e-06, "epoch": 11.917670682730924, "percentage": 59.59, "elapsed_time": "0:15:19", "remaining_time": "0:10:23", "throughput": 4027.8, "total_tokens": 3705568} |
| {"current_steps": 5940, "total_steps": 9960, "loss": 0.0302, "lr": 4.195681834885743e-06, "epoch": 11.927710843373493, "percentage": 59.64, "elapsed_time": "0:15:20", "remaining_time": "0:10:23", "throughput": 4028.13, "total_tokens": 3709152} |
| {"current_steps": 5945, "total_steps": 9960, "loss": 0.0003, "lr": 4.187035487851412e-06, "epoch": 11.937751004016064, "percentage": 59.69, "elapsed_time": "0:15:21", "remaining_time": "0:10:22", "throughput": 4028.56, "total_tokens": 3713056} |
| {"current_steps": 5950, "total_steps": 9960, "loss": 0.0046, "lr": 4.178391637178923e-06, "epoch": 11.947791164658634, "percentage": 59.74, "elapsed_time": "0:15:22", "remaining_time": "0:10:21", "throughput": 4028.66, "total_tokens": 3715744} |
| {"current_steps": 5955, "total_steps": 9960, "loss": 0.0004, "lr": 4.169750309410856e-06, "epoch": 11.957831325301205, "percentage": 59.79, "elapsed_time": "0:15:23", "remaining_time": "0:10:20", "throughput": 4028.82, "total_tokens": 3718912} |
| {"current_steps": 5960, "total_steps": 9960, "loss": 0.3039, "lr": 4.161111531082052e-06, "epoch": 11.967871485943775, "percentage": 59.84, "elapsed_time": "0:15:23", "remaining_time": "0:10:19", "throughput": 4028.8, "total_tokens": 3721504} |
| {"current_steps": 5965, "total_steps": 9960, "loss": 0.0095, "lr": 4.152475328719517e-06, "epoch": 11.977911646586346, "percentage": 59.89, "elapsed_time": "0:15:24", "remaining_time": "0:10:19", "throughput": 4029.01, "total_tokens": 3724960} |
| {"current_steps": 5970, "total_steps": 9960, "loss": 0.0668, "lr": 4.14384172884235e-06, "epoch": 11.987951807228916, "percentage": 59.94, "elapsed_time": "0:15:25", "remaining_time": "0:10:18", "throughput": 4029.31, "total_tokens": 3728160} |
| {"current_steps": 5975, "total_steps": 9960, "loss": 0.1454, "lr": 4.13521075796166e-06, "epoch": 11.997991967871485, "percentage": 59.99, "elapsed_time": "0:15:26", "remaining_time": "0:10:17", "throughput": 4029.74, "total_tokens": 3732000} |
| {"current_steps": 5976, "total_steps": 9960, "eval_loss": 0.5769983530044556, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:15:34", "remaining_time": "0:10:22", "throughput": 3994.8, "total_tokens": 3732864} |
| {"current_steps": 5980, "total_steps": 9960, "loss": 0.0756, "lr": 4.126582442580478e-06, "epoch": 12.008032128514056, "percentage": 60.04, "elapsed_time": "0:15:36", "remaining_time": "0:10:23", "throughput": 3989.66, "total_tokens": 3735424} |
| {"current_steps": 5985, "total_steps": 9960, "loss": 0.0007, "lr": 4.117956809193687e-06, "epoch": 12.018072289156626, "percentage": 60.09, "elapsed_time": "0:15:37", "remaining_time": "0:10:22", "throughput": 3989.73, "total_tokens": 3738816} |
| {"current_steps": 5990, "total_steps": 9960, "loss": 0.0439, "lr": 4.109333884287929e-06, "epoch": 12.028112449799197, "percentage": 60.14, "elapsed_time": "0:15:37", "remaining_time": "0:10:21", "throughput": 3989.9, "total_tokens": 3742176} |
| {"current_steps": 5995, "total_steps": 9960, "loss": 0.067, "lr": 4.1007136943415325e-06, "epoch": 12.038152610441767, "percentage": 60.19, "elapsed_time": "0:15:38", "remaining_time": "0:10:20", "throughput": 3990.06, "total_tokens": 3744928} |
| {"current_steps": 6000, "total_steps": 9960, "loss": 0.0044, "lr": 4.092096265824429e-06, "epoch": 12.048192771084338, "percentage": 60.24, "elapsed_time": "0:15:39", "remaining_time": "0:10:19", "throughput": 3990.42, "total_tokens": 3748288} |
| {"current_steps": 6005, "total_steps": 9960, "loss": 0.0213, "lr": 4.083481625198065e-06, "epoch": 12.058232931726907, "percentage": 60.29, "elapsed_time": "0:15:40", "remaining_time": "0:10:19", "throughput": 3990.64, "total_tokens": 3751744} |
| {"current_steps": 6010, "total_steps": 9960, "loss": 0.0013, "lr": 4.074869798915333e-06, "epoch": 12.068273092369479, "percentage": 60.34, "elapsed_time": "0:15:40", "remaining_time": "0:10:18", "throughput": 3990.81, "total_tokens": 3754624} |
| {"current_steps": 6015, "total_steps": 9960, "loss": 0.0007, "lr": 4.066260813420477e-06, "epoch": 12.078313253012048, "percentage": 60.39, "elapsed_time": "0:15:41", "remaining_time": "0:10:17", "throughput": 3990.87, "total_tokens": 3757120} |
| {"current_steps": 6020, "total_steps": 9960, "loss": 0.0007, "lr": 4.0576546951490225e-06, "epoch": 12.088353413654618, "percentage": 60.44, "elapsed_time": "0:15:42", "remaining_time": "0:10:16", "throughput": 3991.04, "total_tokens": 3759872} |
| {"current_steps": 6025, "total_steps": 9960, "loss": 0.0741, "lr": 4.049051470527692e-06, "epoch": 12.098393574297189, "percentage": 60.49, "elapsed_time": "0:15:42", "remaining_time": "0:10:15", "throughput": 3991.21, "total_tokens": 3762848} |
| {"current_steps": 6030, "total_steps": 9960, "loss": 0.0594, "lr": 4.040451165974313e-06, "epoch": 12.108433734939759, "percentage": 60.54, "elapsed_time": "0:15:43", "remaining_time": "0:10:14", "throughput": 3991.48, "total_tokens": 3766080} |
| {"current_steps": 6035, "total_steps": 9960, "loss": 0.0397, "lr": 4.031853807897759e-06, "epoch": 12.11847389558233, "percentage": 60.59, "elapsed_time": "0:15:44", "remaining_time": "0:10:14", "throughput": 3991.72, "total_tokens": 3769216} |
| {"current_steps": 6040, "total_steps": 9960, "loss": 0.0017, "lr": 4.023259422697846e-06, "epoch": 12.1285140562249, "percentage": 60.64, "elapsed_time": "0:15:45", "remaining_time": "0:10:13", "throughput": 3991.97, "total_tokens": 3772480} |
| {"current_steps": 6045, "total_steps": 9960, "loss": 0.0134, "lr": 4.014668036765267e-06, "epoch": 12.13855421686747, "percentage": 60.69, "elapsed_time": "0:15:45", "remaining_time": "0:10:12", "throughput": 3992.32, "total_tokens": 3776096} |
| {"current_steps": 6050, "total_steps": 9960, "loss": 0.0005, "lr": 4.006079676481504e-06, "epoch": 12.14859437751004, "percentage": 60.74, "elapsed_time": "0:15:46", "remaining_time": "0:10:11", "throughput": 3992.52, "total_tokens": 3779520} |
| {"current_steps": 6055, "total_steps": 9960, "loss": 0.0451, "lr": 3.997494368218745e-06, "epoch": 12.15863453815261, "percentage": 60.79, "elapsed_time": "0:15:47", "remaining_time": "0:10:10", "throughput": 3992.72, "total_tokens": 3782560} |
| {"current_steps": 6060, "total_steps": 9960, "loss": 0.0011, "lr": 3.988912138339812e-06, "epoch": 12.168674698795181, "percentage": 60.84, "elapsed_time": "0:15:48", "remaining_time": "0:10:10", "throughput": 3992.8, "total_tokens": 3785216} |
| {"current_steps": 6065, "total_steps": 9960, "loss": 0.0215, "lr": 3.980333013198067e-06, "epoch": 12.17871485943775, "percentage": 60.89, "elapsed_time": "0:15:48", "remaining_time": "0:10:09", "throughput": 3992.88, "total_tokens": 3788256} |
| {"current_steps": 6070, "total_steps": 9960, "loss": 0.0067, "lr": 3.971757019137342e-06, "epoch": 12.188755020080322, "percentage": 60.94, "elapsed_time": "0:15:49", "remaining_time": "0:10:08", "throughput": 3993.1, "total_tokens": 3791552} |
| {"current_steps": 6075, "total_steps": 9960, "loss": 0.0042, "lr": 3.9631841824918585e-06, "epoch": 12.198795180722891, "percentage": 60.99, "elapsed_time": "0:15:50", "remaining_time": "0:10:07", "throughput": 3993.39, "total_tokens": 3795008} |
| {"current_steps": 6080, "total_steps": 9960, "loss": 0.0195, "lr": 3.954614529586135e-06, "epoch": 12.208835341365463, "percentage": 61.04, "elapsed_time": "0:15:50", "remaining_time": "0:10:06", "throughput": 3993.41, "total_tokens": 3797504} |
| {"current_steps": 6085, "total_steps": 9960, "loss": 0.0001, "lr": 3.946048086734921e-06, "epoch": 12.218875502008032, "percentage": 61.09, "elapsed_time": "0:15:51", "remaining_time": "0:10:06", "throughput": 3993.77, "total_tokens": 3800768} |
| {"current_steps": 6090, "total_steps": 9960, "loss": 0.0444, "lr": 3.9374848802430995e-06, "epoch": 12.228915662650602, "percentage": 61.14, "elapsed_time": "0:15:52", "remaining_time": "0:10:05", "throughput": 3994.12, "total_tokens": 3804032} |
| {"current_steps": 6095, "total_steps": 9960, "loss": 0.0084, "lr": 3.928924936405625e-06, "epoch": 12.238955823293173, "percentage": 61.19, "elapsed_time": "0:15:53", "remaining_time": "0:10:04", "throughput": 3994.33, "total_tokens": 3807360} |
| {"current_steps": 6100, "total_steps": 9960, "loss": 0.0319, "lr": 3.920368281507431e-06, "epoch": 12.248995983935743, "percentage": 61.24, "elapsed_time": "0:15:53", "remaining_time": "0:10:03", "throughput": 3994.43, "total_tokens": 3810304} |
| {"current_steps": 6105, "total_steps": 9960, "loss": 0.0386, "lr": 3.911814941823349e-06, "epoch": 12.259036144578314, "percentage": 61.3, "elapsed_time": "0:15:54", "remaining_time": "0:10:02", "throughput": 3994.64, "total_tokens": 3813504} |
| {"current_steps": 6110, "total_steps": 9960, "loss": 0.0416, "lr": 3.9032649436180325e-06, "epoch": 12.269076305220883, "percentage": 61.35, "elapsed_time": "0:15:55", "remaining_time": "0:10:01", "throughput": 3994.43, "total_tokens": 3815584} |
| {"current_steps": 6115, "total_steps": 9960, "loss": 0.0038, "lr": 3.894718313145873e-06, "epoch": 12.279116465863455, "percentage": 61.4, "elapsed_time": "0:15:56", "remaining_time": "0:10:01", "throughput": 3994.74, "total_tokens": 3819360} |
| {"current_steps": 6120, "total_steps": 9960, "loss": 0.0426, "lr": 3.88617507665092e-06, "epoch": 12.289156626506024, "percentage": 61.45, "elapsed_time": "0:15:56", "remaining_time": "0:10:00", "throughput": 3994.87, "total_tokens": 3822336} |
| {"current_steps": 6125, "total_steps": 9960, "loss": 0.0295, "lr": 3.877635260366807e-06, "epoch": 12.299196787148594, "percentage": 61.5, "elapsed_time": "0:15:57", "remaining_time": "0:09:59", "throughput": 3995.06, "total_tokens": 3825184} |
| {"current_steps": 6130, "total_steps": 9960, "loss": 0.0001, "lr": 3.869098890516656e-06, "epoch": 12.309236947791165, "percentage": 61.55, "elapsed_time": "0:15:58", "remaining_time": "0:09:58", "throughput": 3995.33, "total_tokens": 3828864} |
| {"current_steps": 6135, "total_steps": 9960, "loss": 0.0386, "lr": 3.8605659933130165e-06, "epoch": 12.319277108433734, "percentage": 61.6, "elapsed_time": "0:15:58", "remaining_time": "0:09:57", "throughput": 3995.32, "total_tokens": 3831168} |
| {"current_steps": 6140, "total_steps": 9960, "loss": 0.0117, "lr": 3.852036594957762e-06, "epoch": 12.329317269076306, "percentage": 61.65, "elapsed_time": "0:15:59", "remaining_time": "0:09:57", "throughput": 3995.58, "total_tokens": 3834304} |
| {"current_steps": 6145, "total_steps": 9960, "loss": 0.0005, "lr": 3.843510721642036e-06, "epoch": 12.339357429718875, "percentage": 61.7, "elapsed_time": "0:16:00", "remaining_time": "0:09:56", "throughput": 3995.86, "total_tokens": 3837792} |
| {"current_steps": 6150, "total_steps": 9960, "loss": 0.0596, "lr": 3.834988399546145e-06, "epoch": 12.349397590361447, "percentage": 61.75, "elapsed_time": "0:16:01", "remaining_time": "0:09:55", "throughput": 3996.01, "total_tokens": 3840736} |
| {"current_steps": 6155, "total_steps": 9960, "loss": 0.0105, "lr": 3.826469654839501e-06, "epoch": 12.359437751004016, "percentage": 61.8, "elapsed_time": "0:16:01", "remaining_time": "0:09:54", "throughput": 3996.24, "total_tokens": 3843968} |
| {"current_steps": 6160, "total_steps": 9960, "loss": 0.0002, "lr": 3.817954513680524e-06, "epoch": 12.369477911646586, "percentage": 61.85, "elapsed_time": "0:16:02", "remaining_time": "0:09:53", "throughput": 3996.21, "total_tokens": 3846560} |
| {"current_steps": 6165, "total_steps": 9960, "loss": 0.0571, "lr": 3.8094430022165713e-06, "epoch": 12.379518072289157, "percentage": 61.9, "elapsed_time": "0:16:03", "remaining_time": "0:09:52", "throughput": 3996.47, "total_tokens": 3849728} |
| {"current_steps": 6170, "total_steps": 9960, "loss": 0.0173, "lr": 3.800935146583854e-06, "epoch": 12.389558232931726, "percentage": 61.95, "elapsed_time": "0:16:03", "remaining_time": "0:09:52", "throughput": 3996.55, "total_tokens": 3852416} |
| {"current_steps": 6175, "total_steps": 9960, "loss": 0.0484, "lr": 3.7924309729073616e-06, "epoch": 12.399598393574298, "percentage": 62.0, "elapsed_time": "0:16:04", "remaining_time": "0:09:51", "throughput": 3996.69, "total_tokens": 3855968} |
| {"current_steps": 6180, "total_steps": 9960, "loss": 0.0015, "lr": 3.7839305073007675e-06, "epoch": 12.409638554216867, "percentage": 62.05, "elapsed_time": "0:16:05", "remaining_time": "0:09:50", "throughput": 3997.03, "total_tokens": 3859552} |
| {"current_steps": 6185, "total_steps": 9960, "loss": 0.0115, "lr": 3.775433775866369e-06, "epoch": 12.419678714859439, "percentage": 62.1, "elapsed_time": "0:16:06", "remaining_time": "0:09:49", "throughput": 3997.11, "total_tokens": 3862112} |
| {"current_steps": 6190, "total_steps": 9960, "loss": 0.0004, "lr": 3.766940804694992e-06, "epoch": 12.429718875502008, "percentage": 62.15, "elapsed_time": "0:16:06", "remaining_time": "0:09:48", "throughput": 3997.47, "total_tokens": 3865536} |
| {"current_steps": 6195, "total_steps": 9960, "loss": 0.0134, "lr": 3.758451619865915e-06, "epoch": 12.439759036144578, "percentage": 62.2, "elapsed_time": "0:16:07", "remaining_time": "0:09:48", "throughput": 3997.69, "total_tokens": 3868512} |
| {"current_steps": 6200, "total_steps": 9960, "loss": 0.0032, "lr": 3.749966247446794e-06, "epoch": 12.449799196787149, "percentage": 62.25, "elapsed_time": "0:16:08", "remaining_time": "0:09:47", "throughput": 3997.66, "total_tokens": 3870912} |
| {"current_steps": 6205, "total_steps": 9960, "loss": 0.1196, "lr": 3.7414847134935716e-06, "epoch": 12.459839357429718, "percentage": 62.3, "elapsed_time": "0:16:08", "remaining_time": "0:09:46", "throughput": 3997.77, "total_tokens": 3873568} |
| {"current_steps": 6210, "total_steps": 9960, "loss": 0.0025, "lr": 3.7330070440504097e-06, "epoch": 12.46987951807229, "percentage": 62.35, "elapsed_time": "0:16:09", "remaining_time": "0:09:45", "throughput": 3997.92, "total_tokens": 3876608} |
| {"current_steps": 6215, "total_steps": 9960, "loss": 0.0249, "lr": 3.7245332651496038e-06, "epoch": 12.47991967871486, "percentage": 62.4, "elapsed_time": "0:16:10", "remaining_time": "0:09:44", "throughput": 3998.01, "total_tokens": 3879232} |
| {"current_steps": 6220, "total_steps": 9960, "loss": 0.0179, "lr": 3.716063402811496e-06, "epoch": 12.48995983935743, "percentage": 62.45, "elapsed_time": "0:16:11", "remaining_time": "0:09:43", "throughput": 3998.23, "total_tokens": 3882752} |
| {"current_steps": 6225, "total_steps": 9960, "loss": 0.0003, "lr": 3.707597483044411e-06, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:16:11", "remaining_time": "0:09:43", "throughput": 3998.31, "total_tokens": 3885344} |
| {"current_steps": 6230, "total_steps": 9960, "loss": 0.0002, "lr": 3.699135531844559e-06, "epoch": 12.51004016064257, "percentage": 62.55, "elapsed_time": "0:16:12", "remaining_time": "0:09:42", "throughput": 3998.25, "total_tokens": 3887648} |
| {"current_steps": 6235, "total_steps": 9960, "loss": 0.001, "lr": 3.6906775751959667e-06, "epoch": 12.52008032128514, "percentage": 62.6, "elapsed_time": "0:16:13", "remaining_time": "0:09:41", "throughput": 3998.5, "total_tokens": 3891008} |
| {"current_steps": 6240, "total_steps": 9960, "loss": 0.0003, "lr": 3.682223639070398e-06, "epoch": 12.53012048192771, "percentage": 62.65, "elapsed_time": "0:16:13", "remaining_time": "0:09:40", "throughput": 3998.68, "total_tokens": 3894016} |
| {"current_steps": 6245, "total_steps": 9960, "loss": 0.0022, "lr": 3.673773749427266e-06, "epoch": 12.540160642570282, "percentage": 62.7, "elapsed_time": "0:16:14", "remaining_time": "0:09:39", "throughput": 3998.93, "total_tokens": 3897056} |
| {"current_steps": 6250, "total_steps": 9960, "loss": 0.0298, "lr": 3.6653279322135637e-06, "epoch": 12.550200803212851, "percentage": 62.75, "elapsed_time": "0:16:15", "remaining_time": "0:09:38", "throughput": 3999.14, "total_tokens": 3900064} |
| {"current_steps": 6255, "total_steps": 9960, "loss": 0.0707, "lr": 3.656886213363772e-06, "epoch": 12.560240963855422, "percentage": 62.8, "elapsed_time": "0:16:15", "remaining_time": "0:09:38", "throughput": 3999.43, "total_tokens": 3903424} |
| {"current_steps": 6260, "total_steps": 9960, "loss": 0.0367, "lr": 3.6484486187997927e-06, "epoch": 12.570281124497992, "percentage": 62.85, "elapsed_time": "0:16:16", "remaining_time": "0:09:37", "throughput": 3999.57, "total_tokens": 3906528} |
| {"current_steps": 6265, "total_steps": 9960, "loss": 0.0005, "lr": 3.640015174430864e-06, "epoch": 12.580321285140561, "percentage": 62.9, "elapsed_time": "0:16:17", "remaining_time": "0:09:36", "throughput": 3999.72, "total_tokens": 3909728} |
| {"current_steps": 6270, "total_steps": 9960, "loss": 0.1657, "lr": 3.6315859061534743e-06, "epoch": 12.590361445783133, "percentage": 62.95, "elapsed_time": "0:16:18", "remaining_time": "0:09:35", "throughput": 3999.91, "total_tokens": 3913056} |
| {"current_steps": 6275, "total_steps": 9960, "loss": 0.1218, "lr": 3.623160839851292e-06, "epoch": 12.600401606425702, "percentage": 63.0, "elapsed_time": "0:16:18", "remaining_time": "0:09:34", "throughput": 4000.1, "total_tokens": 3916032} |
| {"current_steps": 6280, "total_steps": 9960, "loss": 0.0096, "lr": 3.6147400013950833e-06, "epoch": 12.610441767068274, "percentage": 63.05, "elapsed_time": "0:16:19", "remaining_time": "0:09:34", "throughput": 4000.38, "total_tokens": 3919200} |
| {"current_steps": 6285, "total_steps": 9960, "loss": 0.0043, "lr": 3.60632341664263e-06, "epoch": 12.620481927710843, "percentage": 63.1, "elapsed_time": "0:16:20", "remaining_time": "0:09:33", "throughput": 4000.46, "total_tokens": 3922048} |
| {"current_steps": 6290, "total_steps": 9960, "loss": 0.0002, "lr": 3.5979111114386556e-06, "epoch": 12.630522088353414, "percentage": 63.15, "elapsed_time": "0:16:21", "remaining_time": "0:09:32", "throughput": 4000.89, "total_tokens": 3926208} |
| {"current_steps": 6295, "total_steps": 9960, "loss": 0.038, "lr": 3.5895031116147355e-06, "epoch": 12.640562248995984, "percentage": 63.2, "elapsed_time": "0:16:22", "remaining_time": "0:09:31", "throughput": 4001.19, "total_tokens": 3929792} |
| {"current_steps": 6300, "total_steps": 9960, "loss": 0.0355, "lr": 3.5810994429892343e-06, "epoch": 12.650602409638553, "percentage": 63.25, "elapsed_time": "0:16:22", "remaining_time": "0:09:30", "throughput": 4001.32, "total_tokens": 3932768} |
| {"current_steps": 6305, "total_steps": 9960, "loss": 0.1505, "lr": 3.5727001313672073e-06, "epoch": 12.660642570281125, "percentage": 63.3, "elapsed_time": "0:16:23", "remaining_time": "0:09:30", "throughput": 4001.63, "total_tokens": 3936032} |
| {"current_steps": 6310, "total_steps": 9960, "loss": 0.0018, "lr": 3.5643052025403366e-06, "epoch": 12.670682730923694, "percentage": 63.35, "elapsed_time": "0:16:24", "remaining_time": "0:09:29", "throughput": 4001.82, "total_tokens": 3939136} |
| {"current_steps": 6315, "total_steps": 9960, "loss": 0.0002, "lr": 3.555914682286845e-06, "epoch": 12.680722891566266, "percentage": 63.4, "elapsed_time": "0:16:25", "remaining_time": "0:09:28", "throughput": 4002.16, "total_tokens": 3942688} |
| {"current_steps": 6320, "total_steps": 9960, "loss": 0.0031, "lr": 3.547528596371418e-06, "epoch": 12.690763052208835, "percentage": 63.45, "elapsed_time": "0:16:25", "remaining_time": "0:09:27", "throughput": 4002.18, "total_tokens": 3945472} |
| {"current_steps": 6325, "total_steps": 9960, "loss": 0.0006, "lr": 3.539146970545124e-06, "epoch": 12.700803212851406, "percentage": 63.5, "elapsed_time": "0:16:26", "remaining_time": "0:09:26", "throughput": 4002.29, "total_tokens": 3948224} |
| {"current_steps": 6330, "total_steps": 9960, "loss": 0.0669, "lr": 3.530769830545333e-06, "epoch": 12.710843373493976, "percentage": 63.55, "elapsed_time": "0:16:27", "remaining_time": "0:09:26", "throughput": 4002.68, "total_tokens": 3951840} |
| {"current_steps": 6335, "total_steps": 9960, "loss": 0.1137, "lr": 3.5223972020956454e-06, "epoch": 12.720883534136545, "percentage": 63.6, "elapsed_time": "0:16:28", "remaining_time": "0:09:25", "throughput": 4003.02, "total_tokens": 3955424} |
| {"current_steps": 6340, "total_steps": 9960, "loss": 0.0008, "lr": 3.514029110905809e-06, "epoch": 12.730923694779117, "percentage": 63.65, "elapsed_time": "0:16:28", "remaining_time": "0:09:24", "throughput": 4003.1, "total_tokens": 3957952} |
| {"current_steps": 6345, "total_steps": 9960, "loss": 0.0704, "lr": 3.505665582671631e-06, "epoch": 12.740963855421686, "percentage": 63.7, "elapsed_time": "0:16:29", "remaining_time": "0:09:23", "throughput": 4003.36, "total_tokens": 3961152} |
| {"current_steps": 6350, "total_steps": 9960, "loss": 0.0007, "lr": 3.4973066430749175e-06, "epoch": 12.751004016064257, "percentage": 63.76, "elapsed_time": "0:16:30", "remaining_time": "0:09:22", "throughput": 4003.61, "total_tokens": 3964480} |
| {"current_steps": 6355, "total_steps": 9960, "loss": 0.024, "lr": 3.488952317783374e-06, "epoch": 12.761044176706827, "percentage": 63.81, "elapsed_time": "0:16:30", "remaining_time": "0:09:22", "throughput": 4003.6, "total_tokens": 3966912} |
| {"current_steps": 6360, "total_steps": 9960, "loss": 0.0229, "lr": 3.480602632450545e-06, "epoch": 12.771084337349398, "percentage": 63.86, "elapsed_time": "0:16:31", "remaining_time": "0:09:21", "throughput": 4003.54, "total_tokens": 3969152} |
| {"current_steps": 6365, "total_steps": 9960, "loss": 0.0341, "lr": 3.4722576127157244e-06, "epoch": 12.781124497991968, "percentage": 63.91, "elapsed_time": "0:16:32", "remaining_time": "0:09:20", "throughput": 4003.7, "total_tokens": 3972160} |
| {"current_steps": 6370, "total_steps": 9960, "loss": 0.0002, "lr": 3.4639172842038766e-06, "epoch": 12.791164658634537, "percentage": 63.96, "elapsed_time": "0:16:32", "remaining_time": "0:09:19", "throughput": 4003.74, "total_tokens": 3974784} |
| {"current_steps": 6375, "total_steps": 9960, "loss": 0.0721, "lr": 3.4555816725255666e-06, "epoch": 12.801204819277109, "percentage": 64.01, "elapsed_time": "0:16:33", "remaining_time": "0:09:18", "throughput": 4004.08, "total_tokens": 3978592} |
| {"current_steps": 6380, "total_steps": 9960, "loss": 0.076, "lr": 3.447250803276869e-06, "epoch": 12.811244979919678, "percentage": 64.06, "elapsed_time": "0:16:34", "remaining_time": "0:09:18", "throughput": 4004.47, "total_tokens": 3982272} |
| {"current_steps": 6385, "total_steps": 9960, "loss": 0.0026, "lr": 3.438924702039301e-06, "epoch": 12.82128514056225, "percentage": 64.11, "elapsed_time": "0:16:35", "remaining_time": "0:09:17", "throughput": 4004.64, "total_tokens": 3985344} |
| {"current_steps": 6390, "total_steps": 9960, "loss": 0.003, "lr": 3.430603394379738e-06, "epoch": 12.831325301204819, "percentage": 64.16, "elapsed_time": "0:16:35", "remaining_time": "0:09:16", "throughput": 4004.74, "total_tokens": 3988064} |
| {"current_steps": 6395, "total_steps": 9960, "loss": 0.014, "lr": 3.422286905850332e-06, "epoch": 12.84136546184739, "percentage": 64.21, "elapsed_time": "0:16:36", "remaining_time": "0:09:15", "throughput": 4004.84, "total_tokens": 3990976} |
| {"current_steps": 6400, "total_steps": 9960, "loss": 0.1316, "lr": 3.4139752619884415e-06, "epoch": 12.85140562248996, "percentage": 64.26, "elapsed_time": "0:16:37", "remaining_time": "0:09:14", "throughput": 4005.3, "total_tokens": 3994848} |
| {"current_steps": 6405, "total_steps": 9960, "loss": 0.0486, "lr": 3.4056684883165454e-06, "epoch": 12.861445783132531, "percentage": 64.31, "elapsed_time": "0:16:38", "remaining_time": "0:09:13", "throughput": 4005.54, "total_tokens": 3997984} |
| {"current_steps": 6410, "total_steps": 9960, "loss": 0.0143, "lr": 3.3973666103421675e-06, "epoch": 12.8714859437751, "percentage": 64.36, "elapsed_time": "0:16:38", "remaining_time": "0:09:13", "throughput": 4005.66, "total_tokens": 4000896} |
| {"current_steps": 6415, "total_steps": 9960, "loss": 0.0106, "lr": 3.389069653557805e-06, "epoch": 12.88152610441767, "percentage": 64.41, "elapsed_time": "0:16:39", "remaining_time": "0:09:12", "throughput": 4005.77, "total_tokens": 4003776} |
| {"current_steps": 6420, "total_steps": 9960, "loss": 0.0806, "lr": 3.3807776434408326e-06, "epoch": 12.891566265060241, "percentage": 64.46, "elapsed_time": "0:16:40", "remaining_time": "0:09:11", "throughput": 4005.81, "total_tokens": 4006656} |
| {"current_steps": 6425, "total_steps": 9960, "loss": 0.0295, "lr": 3.3724906054534434e-06, "epoch": 12.901606425702811, "percentage": 64.51, "elapsed_time": "0:16:41", "remaining_time": "0:09:10", "throughput": 4006.09, "total_tokens": 4010432} |
| {"current_steps": 6430, "total_steps": 9960, "loss": 0.0012, "lr": 3.3642085650425625e-06, "epoch": 12.911646586345382, "percentage": 64.56, "elapsed_time": "0:16:41", "remaining_time": "0:09:09", "throughput": 4006.14, "total_tokens": 4013312} |
| {"current_steps": 6435, "total_steps": 9960, "loss": 0.0029, "lr": 3.355931547639764e-06, "epoch": 12.921686746987952, "percentage": 64.61, "elapsed_time": "0:16:42", "remaining_time": "0:09:09", "throughput": 4006.14, "total_tokens": 4016256} |
| {"current_steps": 6440, "total_steps": 9960, "loss": 0.006, "lr": 3.3476595786612044e-06, "epoch": 12.931726907630523, "percentage": 64.66, "elapsed_time": "0:16:43", "remaining_time": "0:09:08", "throughput": 4006.32, "total_tokens": 4019264} |
| {"current_steps": 6445, "total_steps": 9960, "loss": 0.0607, "lr": 3.3393926835075307e-06, "epoch": 12.941767068273093, "percentage": 64.71, "elapsed_time": "0:16:43", "remaining_time": "0:09:07", "throughput": 4006.55, "total_tokens": 4022496} |
| {"current_steps": 6450, "total_steps": 9960, "loss": 0.0022, "lr": 3.331130887563815e-06, "epoch": 12.951807228915662, "percentage": 64.76, "elapsed_time": "0:16:44", "remaining_time": "0:09:06", "throughput": 4006.56, "total_tokens": 4025504} |
| {"current_steps": 6455, "total_steps": 9960, "loss": 0.0381, "lr": 3.322874216199471e-06, "epoch": 12.961847389558233, "percentage": 64.81, "elapsed_time": "0:16:45", "remaining_time": "0:09:05", "throughput": 4006.77, "total_tokens": 4028672} |
| {"current_steps": 6460, "total_steps": 9960, "loss": 0.152, "lr": 3.3146226947681724e-06, "epoch": 12.971887550200803, "percentage": 64.86, "elapsed_time": "0:16:46", "remaining_time": "0:09:05", "throughput": 4007.18, "total_tokens": 4032672} |
| {"current_steps": 6465, "total_steps": 9960, "loss": 0.0037, "lr": 3.306376348607787e-06, "epoch": 12.981927710843374, "percentage": 64.91, "elapsed_time": "0:16:47", "remaining_time": "0:09:04", "throughput": 4007.28, "total_tokens": 4035968} |
| {"current_steps": 6470, "total_steps": 9960, "loss": 0.0083, "lr": 3.2981352030402795e-06, "epoch": 12.991967871485944, "percentage": 64.96, "elapsed_time": "0:16:47", "remaining_time": "0:09:03", "throughput": 4007.58, "total_tokens": 4039200} |
| {"current_steps": 6475, "total_steps": 9960, "loss": 0.0037, "lr": 3.289899283371657e-06, "epoch": 13.002008032128513, "percentage": 65.01, "elapsed_time": "0:16:48", "remaining_time": "0:09:02", "throughput": 4007.25, "total_tokens": 4042080} |
| {"current_steps": 6480, "total_steps": 9960, "loss": 0.0004, "lr": 3.2816686148918708e-06, "epoch": 13.012048192771084, "percentage": 65.06, "elapsed_time": "0:16:49", "remaining_time": "0:09:02", "throughput": 4007.35, "total_tokens": 4045088} |
| {"current_steps": 6485, "total_steps": 9960, "loss": 0.0001, "lr": 3.2734432228747527e-06, "epoch": 13.022088353413654, "percentage": 65.11, "elapsed_time": "0:16:50", "remaining_time": "0:09:01", "throughput": 4007.6, "total_tokens": 4048736} |
| {"current_steps": 6490, "total_steps": 9960, "loss": 0.0145, "lr": 3.26522313257793e-06, "epoch": 13.032128514056225, "percentage": 65.16, "elapsed_time": "0:16:51", "remaining_time": "0:09:00", "throughput": 4007.96, "total_tokens": 4052416} |
| {"current_steps": 6495, "total_steps": 9960, "loss": 0.0338, "lr": 3.2570083692427474e-06, "epoch": 13.042168674698795, "percentage": 65.21, "elapsed_time": "0:16:51", "remaining_time": "0:08:59", "throughput": 4008.07, "total_tokens": 4055328} |
| {"current_steps": 6500, "total_steps": 9960, "loss": 0.0124, "lr": 3.248798958094197e-06, "epoch": 13.052208835341366, "percentage": 65.26, "elapsed_time": "0:16:52", "remaining_time": "0:08:58", "throughput": 4008.23, "total_tokens": 4058496} |
| {"current_steps": 6505, "total_steps": 9960, "loss": 0.018, "lr": 3.240594924340835e-06, "epoch": 13.062248995983936, "percentage": 65.31, "elapsed_time": "0:16:53", "remaining_time": "0:08:58", "throughput": 4008.2, "total_tokens": 4060832} |
| {"current_steps": 6510, "total_steps": 9960, "loss": 0.0559, "lr": 3.232396293174702e-06, "epoch": 13.072289156626505, "percentage": 65.36, "elapsed_time": "0:16:53", "remaining_time": "0:08:57", "throughput": 4008.18, "total_tokens": 4063584} |
| {"current_steps": 6515, "total_steps": 9960, "loss": 0.0004, "lr": 3.224203089771254e-06, "epoch": 13.082329317269076, "percentage": 65.41, "elapsed_time": "0:16:54", "remaining_time": "0:08:56", "throughput": 4008.29, "total_tokens": 4066368} |
| {"current_steps": 6520, "total_steps": 9960, "loss": 0.0001, "lr": 3.2160153392892737e-06, "epoch": 13.092369477911646, "percentage": 65.46, "elapsed_time": "0:16:55", "remaining_time": "0:08:55", "throughput": 4008.45, "total_tokens": 4069312} |
| {"current_steps": 6525, "total_steps": 9960, "loss": 0.0104, "lr": 3.2078330668708057e-06, "epoch": 13.102409638554217, "percentage": 65.51, "elapsed_time": "0:16:55", "remaining_time": "0:08:54", "throughput": 4008.59, "total_tokens": 4072416} |
| {"current_steps": 6530, "total_steps": 9960, "loss": 0.0019, "lr": 3.19965629764107e-06, "epoch": 13.112449799196787, "percentage": 65.56, "elapsed_time": "0:16:56", "remaining_time": "0:08:54", "throughput": 4008.67, "total_tokens": 4075424} |
| {"current_steps": 6535, "total_steps": 9960, "loss": 0.028, "lr": 3.1914850567083866e-06, "epoch": 13.122489959839358, "percentage": 65.61, "elapsed_time": "0:16:57", "remaining_time": "0:08:53", "throughput": 4008.91, "total_tokens": 4078656} |
| {"current_steps": 6540, "total_steps": 9960, "loss": 0.061, "lr": 3.1833193691641045e-06, "epoch": 13.132530120481928, "percentage": 65.66, "elapsed_time": "0:16:58", "remaining_time": "0:08:52", "throughput": 4008.82, "total_tokens": 4081216} |
| {"current_steps": 6545, "total_steps": 9960, "loss": 0.0281, "lr": 3.1751592600825143e-06, "epoch": 13.142570281124499, "percentage": 65.71, "elapsed_time": "0:16:58", "remaining_time": "0:08:51", "throughput": 4008.95, "total_tokens": 4084256} |
| {"current_steps": 6550, "total_steps": 9960, "loss": 0.0015, "lr": 3.1670047545207817e-06, "epoch": 13.152610441767068, "percentage": 65.76, "elapsed_time": "0:16:59", "remaining_time": "0:08:50", "throughput": 4009.22, "total_tokens": 4087712} |
| {"current_steps": 6555, "total_steps": 9960, "loss": 0.0005, "lr": 3.1588558775188647e-06, "epoch": 13.162650602409638, "percentage": 65.81, "elapsed_time": "0:17:00", "remaining_time": "0:08:49", "throughput": 4009.33, "total_tokens": 4090464} |
| {"current_steps": 6560, "total_steps": 9960, "loss": 0.0249, "lr": 3.1507126540994337e-06, "epoch": 13.17269076305221, "percentage": 65.86, "elapsed_time": "0:17:00", "remaining_time": "0:08:49", "throughput": 4009.51, "total_tokens": 4093600} |
| {"current_steps": 6565, "total_steps": 9960, "loss": 0.019, "lr": 3.1425751092678064e-06, "epoch": 13.182730923694779, "percentage": 65.91, "elapsed_time": "0:17:01", "remaining_time": "0:08:48", "throughput": 4009.59, "total_tokens": 4096864} |
| {"current_steps": 6570, "total_steps": 9960, "loss": 0.0003, "lr": 3.134443268011855e-06, "epoch": 13.19277108433735, "percentage": 65.96, "elapsed_time": "0:17:02", "remaining_time": "0:08:47", "throughput": 4009.82, "total_tokens": 4100480} |
| {"current_steps": 6575, "total_steps": 9960, "loss": 0.1201, "lr": 3.126317155301941e-06, "epoch": 13.20281124497992, "percentage": 66.01, "elapsed_time": "0:17:03", "remaining_time": "0:08:46", "throughput": 4009.93, "total_tokens": 4103712} |
| {"current_steps": 6580, "total_steps": 9960, "loss": 0.0016, "lr": 3.11819679609084e-06, "epoch": 13.21285140562249, "percentage": 66.06, "elapsed_time": "0:17:04", "remaining_time": "0:08:46", "throughput": 4010.1, "total_tokens": 4106976} |
| {"current_steps": 6585, "total_steps": 9960, "loss": 0.0047, "lr": 3.1100822153136513e-06, "epoch": 13.22289156626506, "percentage": 66.11, "elapsed_time": "0:17:04", "remaining_time": "0:08:45", "throughput": 4010.34, "total_tokens": 4110464} |
| {"current_steps": 6590, "total_steps": 9960, "loss": 0.012, "lr": 3.1019734378877403e-06, "epoch": 13.23293172690763, "percentage": 66.16, "elapsed_time": "0:17:05", "remaining_time": "0:08:44", "throughput": 4010.6, "total_tokens": 4113600} |
| {"current_steps": 6595, "total_steps": 9960, "loss": 0.0006, "lr": 3.0938704887126425e-06, "epoch": 13.242971887550201, "percentage": 66.21, "elapsed_time": "0:17:06", "remaining_time": "0:08:43", "throughput": 4010.7, "total_tokens": 4116800} |
| {"current_steps": 6600, "total_steps": 9960, "loss": 0.0002, "lr": 3.0857733926700033e-06, "epoch": 13.25301204819277, "percentage": 66.27, "elapsed_time": "0:17:07", "remaining_time": "0:08:42", "throughput": 4011.01, "total_tokens": 4120256} |
| {"current_steps": 6605, "total_steps": 9960, "loss": 0.0218, "lr": 3.077682174623495e-06, "epoch": 13.263052208835342, "percentage": 66.32, "elapsed_time": "0:17:07", "remaining_time": "0:08:42", "throughput": 4011.15, "total_tokens": 4123136} |
| {"current_steps": 6610, "total_steps": 9960, "loss": 0.0001, "lr": 3.0695968594187366e-06, "epoch": 13.273092369477911, "percentage": 66.37, "elapsed_time": "0:17:08", "remaining_time": "0:08:41", "throughput": 4011.43, "total_tokens": 4126752} |
| {"current_steps": 6615, "total_steps": 9960, "loss": 0.0001, "lr": 3.0615174718832218e-06, "epoch": 13.283132530120483, "percentage": 66.42, "elapsed_time": "0:17:09", "remaining_time": "0:08:40", "throughput": 4011.54, "total_tokens": 4130080} |
| {"current_steps": 6620, "total_steps": 9960, "loss": 0.0001, "lr": 3.053444036826246e-06, "epoch": 13.293172690763052, "percentage": 66.47, "elapsed_time": "0:17:10", "remaining_time": "0:08:39", "throughput": 4011.85, "total_tokens": 4133184} |
| {"current_steps": 6625, "total_steps": 9960, "loss": 0.0007, "lr": 3.045376579038821e-06, "epoch": 13.303212851405622, "percentage": 66.52, "elapsed_time": "0:17:10", "remaining_time": "0:08:38", "throughput": 4011.99, "total_tokens": 4136192} |
| {"current_steps": 6630, "total_steps": 9960, "loss": 0.1104, "lr": 3.037315123293611e-06, "epoch": 13.313253012048193, "percentage": 66.57, "elapsed_time": "0:17:11", "remaining_time": "0:08:38", "throughput": 4012.19, "total_tokens": 4139552} |
| {"current_steps": 6635, "total_steps": 9960, "loss": 0.0125, "lr": 3.0292596943448416e-06, "epoch": 13.323293172690763, "percentage": 66.62, "elapsed_time": "0:17:12", "remaining_time": "0:08:37", "throughput": 4012.37, "total_tokens": 4143040} |
| {"current_steps": 6640, "total_steps": 9960, "loss": 0.0001, "lr": 3.0212103169282415e-06, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:17:13", "remaining_time": "0:08:36", "throughput": 4012.55, "total_tokens": 4146240} |
| {"current_steps": 6645, "total_steps": 9960, "loss": 0.0003, "lr": 3.013167015760946e-06, "epoch": 13.343373493975903, "percentage": 66.72, "elapsed_time": "0:17:14", "remaining_time": "0:08:35", "throughput": 4012.93, "total_tokens": 4150272} |
| {"current_steps": 6650, "total_steps": 9960, "loss": 0.0003, "lr": 3.0051298155414426e-06, "epoch": 13.353413654618475, "percentage": 66.77, "elapsed_time": "0:17:15", "remaining_time": "0:08:35", "throughput": 4013.38, "total_tokens": 4154624} |
| {"current_steps": 6655, "total_steps": 9960, "loss": 0.0158, "lr": 2.9970987409494784e-06, "epoch": 13.363453815261044, "percentage": 66.82, "elapsed_time": "0:17:15", "remaining_time": "0:08:34", "throughput": 4013.45, "total_tokens": 4157152} |
| {"current_steps": 6660, "total_steps": 9960, "loss": 0.0005, "lr": 2.989073816645992e-06, "epoch": 13.373493975903614, "percentage": 66.87, "elapsed_time": "0:17:16", "remaining_time": "0:08:33", "throughput": 4013.42, "total_tokens": 4159552} |
| {"current_steps": 6665, "total_steps": 9960, "loss": 0.0344, "lr": 2.9810550672730367e-06, "epoch": 13.383534136546185, "percentage": 66.92, "elapsed_time": "0:17:17", "remaining_time": "0:08:32", "throughput": 4013.66, "total_tokens": 4163008} |
| {"current_steps": 6670, "total_steps": 9960, "loss": 0.0005, "lr": 2.9730425174537057e-06, "epoch": 13.393574297188755, "percentage": 66.97, "elapsed_time": "0:17:17", "remaining_time": "0:08:31", "throughput": 4013.96, "total_tokens": 4166432} |
| {"current_steps": 6675, "total_steps": 9960, "loss": 0.0, "lr": 2.965036191792052e-06, "epoch": 13.403614457831326, "percentage": 67.02, "elapsed_time": "0:17:18", "remaining_time": "0:08:31", "throughput": 4014.19, "total_tokens": 4169472} |
| {"current_steps": 6680, "total_steps": 9960, "loss": 0.0002, "lr": 2.9570361148730213e-06, "epoch": 13.413654618473895, "percentage": 67.07, "elapsed_time": "0:17:19", "remaining_time": "0:08:30", "throughput": 4014.39, "total_tokens": 4172704} |
| {"current_steps": 6685, "total_steps": 9960, "loss": 0.0648, "lr": 2.9490423112623646e-06, "epoch": 13.423694779116467, "percentage": 67.12, "elapsed_time": "0:17:20", "remaining_time": "0:08:29", "throughput": 4014.71, "total_tokens": 4176000} |
| {"current_steps": 6690, "total_steps": 9960, "loss": 0.0001, "lr": 2.9410548055065748e-06, "epoch": 13.433734939759036, "percentage": 67.17, "elapsed_time": "0:17:20", "remaining_time": "0:08:28", "throughput": 4014.8, "total_tokens": 4178720} |
| {"current_steps": 6695, "total_steps": 9960, "loss": 0.0001, "lr": 2.933073622132806e-06, "epoch": 13.443775100401606, "percentage": 67.22, "elapsed_time": "0:17:21", "remaining_time": "0:08:27", "throughput": 4014.93, "total_tokens": 4181760} |
| {"current_steps": 6700, "total_steps": 9960, "loss": 0.0604, "lr": 2.9250987856487932e-06, "epoch": 13.453815261044177, "percentage": 67.27, "elapsed_time": "0:17:22", "remaining_time": "0:08:27", "throughput": 4015.18, "total_tokens": 4185152} |
| {"current_steps": 6705, "total_steps": 9960, "loss": 0.0001, "lr": 2.9171303205427883e-06, "epoch": 13.463855421686747, "percentage": 67.32, "elapsed_time": "0:17:23", "remaining_time": "0:08:26", "throughput": 4015.42, "total_tokens": 4188320} |
| {"current_steps": 6710, "total_steps": 9960, "loss": 0.0507, "lr": 2.909168251283474e-06, "epoch": 13.473895582329318, "percentage": 67.37, "elapsed_time": "0:17:23", "remaining_time": "0:08:25", "throughput": 4015.67, "total_tokens": 4191776} |
| {"current_steps": 6715, "total_steps": 9960, "loss": 0.0001, "lr": 2.9012126023198973e-06, "epoch": 13.483935742971887, "percentage": 67.42, "elapsed_time": "0:17:24", "remaining_time": "0:08:24", "throughput": 4015.74, "total_tokens": 4194752} |
| {"current_steps": 6720, "total_steps": 9960, "loss": 0.0001, "lr": 2.893263398081386e-06, "epoch": 13.493975903614459, "percentage": 67.47, "elapsed_time": "0:17:25", "remaining_time": "0:08:23", "throughput": 4015.71, "total_tokens": 4197280} |
| {"current_steps": 6725, "total_steps": 9960, "loss": 0.0579, "lr": 2.8853206629774823e-06, "epoch": 13.504016064257028, "percentage": 67.52, "elapsed_time": "0:17:26", "remaining_time": "0:08:23", "throughput": 4015.9, "total_tokens": 4200736} |
| {"current_steps": 6730, "total_steps": 9960, "loss": 0.0004, "lr": 2.877384421397862e-06, "epoch": 13.514056224899598, "percentage": 67.57, "elapsed_time": "0:17:26", "remaining_time": "0:08:22", "throughput": 4016.04, "total_tokens": 4203968} |
| {"current_steps": 6735, "total_steps": 9960, "loss": 0.0273, "lr": 2.8694546977122595e-06, "epoch": 13.524096385542169, "percentage": 67.62, "elapsed_time": "0:17:27", "remaining_time": "0:08:21", "throughput": 4016.07, "total_tokens": 4206528} |
| {"current_steps": 6740, "total_steps": 9960, "loss": 0.0669, "lr": 2.8615315162703962e-06, "epoch": 13.534136546184738, "percentage": 67.67, "elapsed_time": "0:17:28", "remaining_time": "0:08:20", "throughput": 4016.22, "total_tokens": 4209472} |
| {"current_steps": 6745, "total_steps": 9960, "loss": 0.0193, "lr": 2.853614901401909e-06, "epoch": 13.54417670682731, "percentage": 67.72, "elapsed_time": "0:17:28", "remaining_time": "0:08:19", "throughput": 4016.39, "total_tokens": 4212960} |
| {"current_steps": 6750, "total_steps": 9960, "loss": 0.0445, "lr": 2.84570487741626e-06, "epoch": 13.55421686746988, "percentage": 67.77, "elapsed_time": "0:17:29", "remaining_time": "0:08:19", "throughput": 4016.55, "total_tokens": 4216160} |
| {"current_steps": 6755, "total_steps": 9960, "loss": 0.0006, "lr": 2.837801468602687e-06, "epoch": 13.56425702811245, "percentage": 67.82, "elapsed_time": "0:17:30", "remaining_time": "0:08:18", "throughput": 4016.76, "total_tokens": 4219232} |
| {"current_steps": 6760, "total_steps": 9960, "loss": 0.1348, "lr": 2.8299046992300995e-06, "epoch": 13.57429718875502, "percentage": 67.87, "elapsed_time": "0:17:31", "remaining_time": "0:08:17", "throughput": 4016.8, "total_tokens": 4221920} |
| {"current_steps": 6765, "total_steps": 9960, "loss": 0.0464, "lr": 2.8220145935470276e-06, "epoch": 13.58433734939759, "percentage": 67.92, "elapsed_time": "0:17:31", "remaining_time": "0:08:16", "throughput": 4016.97, "total_tokens": 4225152} |
| {"current_steps": 6770, "total_steps": 9960, "loss": 0.0004, "lr": 2.8141311757815454e-06, "epoch": 13.594377510040161, "percentage": 67.97, "elapsed_time": "0:17:32", "remaining_time": "0:08:16", "throughput": 4017.2, "total_tokens": 4228736} |
| {"current_steps": 6775, "total_steps": 9960, "loss": 0.0001, "lr": 2.806254470141174e-06, "epoch": 13.60441767068273, "percentage": 68.02, "elapsed_time": "0:17:33", "remaining_time": "0:08:15", "throughput": 4017.47, "total_tokens": 4231872} |
| {"current_steps": 6780, "total_steps": 9960, "loss": 0.028, "lr": 2.798384500812842e-06, "epoch": 13.614457831325302, "percentage": 68.07, "elapsed_time": "0:17:34", "remaining_time": "0:08:14", "throughput": 4017.57, "total_tokens": 4234784} |
| {"current_steps": 6785, "total_steps": 9960, "loss": 0.0424, "lr": 2.790521291962775e-06, "epoch": 13.624497991967871, "percentage": 68.12, "elapsed_time": "0:17:34", "remaining_time": "0:08:13", "throughput": 4017.61, "total_tokens": 4237696} |
| {"current_steps": 6790, "total_steps": 9960, "loss": 0.0216, "lr": 2.7826648677364555e-06, "epoch": 13.634538152610443, "percentage": 68.17, "elapsed_time": "0:17:35", "remaining_time": "0:08:12", "throughput": 4017.79, "total_tokens": 4240928} |
| {"current_steps": 6795, "total_steps": 9960, "loss": 0.0002, "lr": 2.774815252258522e-06, "epoch": 13.644578313253012, "percentage": 68.22, "elapsed_time": "0:17:36", "remaining_time": "0:08:12", "throughput": 4018.0, "total_tokens": 4244480} |
| {"current_steps": 6800, "total_steps": 9960, "loss": 0.0276, "lr": 2.7669724696327094e-06, "epoch": 13.654618473895582, "percentage": 68.27, "elapsed_time": "0:17:37", "remaining_time": "0:08:11", "throughput": 4018.21, "total_tokens": 4247552} |
| {"current_steps": 6805, "total_steps": 9960, "loss": 0.0004, "lr": 2.759136543941773e-06, "epoch": 13.664658634538153, "percentage": 68.32, "elapsed_time": "0:17:37", "remaining_time": "0:08:10", "throughput": 4018.28, "total_tokens": 4250304} |
| {"current_steps": 6810, "total_steps": 9960, "loss": 0.0005, "lr": 2.751307499247403e-06, "epoch": 13.674698795180722, "percentage": 68.37, "elapsed_time": "0:17:38", "remaining_time": "0:08:09", "throughput": 4018.67, "total_tokens": 4254016} |
| {"current_steps": 6815, "total_steps": 9960, "loss": 0.011, "lr": 2.743485359590173e-06, "epoch": 13.684738955823294, "percentage": 68.42, "elapsed_time": "0:17:39", "remaining_time": "0:08:08", "throughput": 4018.7, "total_tokens": 4256704} |
| {"current_steps": 6820, "total_steps": 9960, "loss": 0.0004, "lr": 2.7356701489894468e-06, "epoch": 13.694779116465863, "percentage": 68.47, "elapsed_time": "0:17:39", "remaining_time": "0:08:07", "throughput": 4018.82, "total_tokens": 4259584} |
| {"current_steps": 6825, "total_steps": 9960, "loss": 0.0145, "lr": 2.7278618914433105e-06, "epoch": 13.704819277108435, "percentage": 68.52, "elapsed_time": "0:17:40", "remaining_time": "0:08:07", "throughput": 4018.91, "total_tokens": 4262368} |
| {"current_steps": 6830, "total_steps": 9960, "loss": 0.0001, "lr": 2.720060610928501e-06, "epoch": 13.714859437751004, "percentage": 68.57, "elapsed_time": "0:17:41", "remaining_time": "0:08:06", "throughput": 4019.26, "total_tokens": 4265792} |
| {"current_steps": 6835, "total_steps": 9960, "loss": 0.0004, "lr": 2.712266331400332e-06, "epoch": 13.724899598393574, "percentage": 68.62, "elapsed_time": "0:17:41", "remaining_time": "0:08:05", "throughput": 4019.39, "total_tokens": 4268448} |
| {"current_steps": 6840, "total_steps": 9960, "loss": 0.0001, "lr": 2.704479076792618e-06, "epoch": 13.734939759036145, "percentage": 68.67, "elapsed_time": "0:17:42", "remaining_time": "0:08:04", "throughput": 4019.56, "total_tokens": 4272192} |
| {"current_steps": 6845, "total_steps": 9960, "loss": 0.0001, "lr": 2.696698871017601e-06, "epoch": 13.744979919678714, "percentage": 68.72, "elapsed_time": "0:17:43", "remaining_time": "0:08:03", "throughput": 4019.63, "total_tokens": 4275040} |
| {"current_steps": 6850, "total_steps": 9960, "loss": 0.0166, "lr": 2.6889257379658804e-06, "epoch": 13.755020080321286, "percentage": 68.78, "elapsed_time": "0:17:44", "remaining_time": "0:08:03", "throughput": 4019.74, "total_tokens": 4278144} |
| {"current_steps": 6855, "total_steps": 9960, "loss": 0.0872, "lr": 2.6811597015063373e-06, "epoch": 13.765060240963855, "percentage": 68.83, "elapsed_time": "0:17:45", "remaining_time": "0:08:02", "throughput": 4019.91, "total_tokens": 4281344} |
| {"current_steps": 6860, "total_steps": 9960, "loss": 0.034, "lr": 2.6734007854860596e-06, "epoch": 13.775100401606426, "percentage": 68.88, "elapsed_time": "0:17:45", "remaining_time": "0:08:01", "throughput": 4019.95, "total_tokens": 4284032} |
| {"current_steps": 6865, "total_steps": 9960, "loss": 0.0428, "lr": 2.66564901373027e-06, "epoch": 13.785140562248996, "percentage": 68.93, "elapsed_time": "0:17:46", "remaining_time": "0:08:00", "throughput": 4020.04, "total_tokens": 4286848} |
| {"current_steps": 6870, "total_steps": 9960, "loss": 0.048, "lr": 2.657904410042261e-06, "epoch": 13.795180722891565, "percentage": 68.98, "elapsed_time": "0:17:47", "remaining_time": "0:07:59", "throughput": 4020.16, "total_tokens": 4289536} |
| {"current_steps": 6875, "total_steps": 9960, "loss": 0.0023, "lr": 2.6501669982033006e-06, "epoch": 13.805220883534137, "percentage": 69.03, "elapsed_time": "0:17:47", "remaining_time": "0:07:59", "throughput": 4020.37, "total_tokens": 4292960} |
| {"current_steps": 6880, "total_steps": 9960, "loss": 0.0009, "lr": 2.6424368019725877e-06, "epoch": 13.815261044176706, "percentage": 69.08, "elapsed_time": "0:17:48", "remaining_time": "0:07:58", "throughput": 4020.58, "total_tokens": 4296064} |
| {"current_steps": 6885, "total_steps": 9960, "loss": 0.0001, "lr": 2.634713845087152e-06, "epoch": 13.825301204819278, "percentage": 69.13, "elapsed_time": "0:17:49", "remaining_time": "0:07:57", "throughput": 4020.81, "total_tokens": 4299744} |
| {"current_steps": 6890, "total_steps": 9960, "loss": 0.0173, "lr": 2.626998151261798e-06, "epoch": 13.835341365461847, "percentage": 69.18, "elapsed_time": "0:17:50", "remaining_time": "0:07:56", "throughput": 4021.05, "total_tokens": 4302912} |
| {"current_steps": 6895, "total_steps": 9960, "loss": 0.0001, "lr": 2.6192897441890337e-06, "epoch": 13.845381526104418, "percentage": 69.23, "elapsed_time": "0:17:50", "remaining_time": "0:07:56", "throughput": 4021.24, "total_tokens": 4306464} |
| {"current_steps": 6900, "total_steps": 9960, "loss": 0.0425, "lr": 2.6115886475389786e-06, "epoch": 13.855421686746988, "percentage": 69.28, "elapsed_time": "0:17:51", "remaining_time": "0:07:55", "throughput": 4021.53, "total_tokens": 4310240} |
| {"current_steps": 6905, "total_steps": 9960, "loss": 0.1078, "lr": 2.603894884959317e-06, "epoch": 13.865461847389557, "percentage": 69.33, "elapsed_time": "0:17:52", "remaining_time": "0:07:54", "throughput": 4021.73, "total_tokens": 4313568} |
| {"current_steps": 6910, "total_steps": 9960, "loss": 0.0262, "lr": 2.5962084800752064e-06, "epoch": 13.875502008032129, "percentage": 69.38, "elapsed_time": "0:17:53", "remaining_time": "0:07:53", "throughput": 4021.89, "total_tokens": 4316832} |
| {"current_steps": 6915, "total_steps": 9960, "loss": 0.002, "lr": 2.588529456489211e-06, "epoch": 13.885542168674698, "percentage": 69.43, "elapsed_time": "0:17:54", "remaining_time": "0:07:52", "throughput": 4022.04, "total_tokens": 4319904} |
| {"current_steps": 6920, "total_steps": 9960, "loss": 0.0003, "lr": 2.580857837781231e-06, "epoch": 13.89558232931727, "percentage": 69.48, "elapsed_time": "0:17:54", "remaining_time": "0:07:52", "throughput": 4022.0, "total_tokens": 4322496} |
| {"current_steps": 6925, "total_steps": 9960, "loss": 0.0508, "lr": 2.573193647508426e-06, "epoch": 13.905622489959839, "percentage": 69.53, "elapsed_time": "0:17:55", "remaining_time": "0:07:51", "throughput": 4022.18, "total_tokens": 4325696} |
| {"current_steps": 6930, "total_steps": 9960, "loss": 0.0536, "lr": 2.5655369092051495e-06, "epoch": 13.91566265060241, "percentage": 69.58, "elapsed_time": "0:17:56", "remaining_time": "0:07:50", "throughput": 4022.33, "total_tokens": 4328672} |
| {"current_steps": 6935, "total_steps": 9960, "loss": 0.03, "lr": 2.557887646382868e-06, "epoch": 13.92570281124498, "percentage": 69.63, "elapsed_time": "0:17:56", "remaining_time": "0:07:49", "throughput": 4022.39, "total_tokens": 4331680} |
| {"current_steps": 6940, "total_steps": 9960, "loss": 0.0061, "lr": 2.5502458825300956e-06, "epoch": 13.93574297188755, "percentage": 69.68, "elapsed_time": "0:17:57", "remaining_time": "0:07:48", "throughput": 4022.59, "total_tokens": 4334688} |
| {"current_steps": 6945, "total_steps": 9960, "loss": 0.0367, "lr": 2.542611641112318e-06, "epoch": 13.94578313253012, "percentage": 69.73, "elapsed_time": "0:17:58", "remaining_time": "0:07:48", "throughput": 4022.95, "total_tokens": 4338240} |
| {"current_steps": 6950, "total_steps": 9960, "loss": 0.0002, "lr": 2.534984945571923e-06, "epoch": 13.95582329317269, "percentage": 69.78, "elapsed_time": "0:17:59", "remaining_time": "0:07:47", "throughput": 4023.25, "total_tokens": 4341824} |
| {"current_steps": 6955, "total_steps": 9960, "loss": 0.0002, "lr": 2.5273658193281252e-06, "epoch": 13.965863453815262, "percentage": 69.83, "elapsed_time": "0:17:59", "remaining_time": "0:07:46", "throughput": 4023.33, "total_tokens": 4344800} |
| {"current_steps": 6960, "total_steps": 9960, "loss": 0.0111, "lr": 2.519754285776903e-06, "epoch": 13.975903614457831, "percentage": 69.88, "elapsed_time": "0:18:00", "remaining_time": "0:07:45", "throughput": 4023.46, "total_tokens": 4347936} |
| {"current_steps": 6965, "total_steps": 9960, "loss": 0.0019, "lr": 2.5121503682909095e-06, "epoch": 13.985943775100402, "percentage": 69.93, "elapsed_time": "0:18:01", "remaining_time": "0:07:44", "throughput": 4023.63, "total_tokens": 4350976} |
| {"current_steps": 6970, "total_steps": 9960, "loss": 0.0009, "lr": 2.504554090219418e-06, "epoch": 13.995983935742972, "percentage": 69.98, "elapsed_time": "0:18:02", "remaining_time": "0:07:44", "throughput": 4023.8, "total_tokens": 4354016} |
| {"current_steps": 6972, "total_steps": 9960, "eval_loss": 0.8435496687889099, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:18:10", "remaining_time": "0:07:47", "throughput": 3993.89, "total_tokens": 4355328} |
| {"current_steps": 6975, "total_steps": 9960, "loss": 0.0001, "lr": 2.496965474888243e-06, "epoch": 14.006024096385541, "percentage": 70.03, "elapsed_time": "0:18:12", "remaining_time": "0:07:47", "throughput": 3989.15, "total_tokens": 4356832} |
| {"current_steps": 6980, "total_steps": 9960, "loss": 0.0001, "lr": 2.489384545599666e-06, "epoch": 14.016064257028113, "percentage": 70.08, "elapsed_time": "0:18:13", "remaining_time": "0:07:46", "throughput": 3989.28, "total_tokens": 4360320} |
| {"current_steps": 6985, "total_steps": 9960, "loss": 0.0025, "lr": 2.4818113256323745e-06, "epoch": 14.026104417670682, "percentage": 70.13, "elapsed_time": "0:18:13", "remaining_time": "0:07:45", "throughput": 3989.42, "total_tokens": 4363424} |
| {"current_steps": 6990, "total_steps": 9960, "loss": 0.0003, "lr": 2.474245838241371e-06, "epoch": 14.036144578313253, "percentage": 70.18, "elapsed_time": "0:18:14", "remaining_time": "0:07:45", "throughput": 3989.6, "total_tokens": 4366240} |
| {"current_steps": 6995, "total_steps": 9960, "loss": 0.0, "lr": 2.466688106657927e-06, "epoch": 14.046184738955823, "percentage": 70.23, "elapsed_time": "0:18:15", "remaining_time": "0:07:44", "throughput": 3989.57, "total_tokens": 4368704} |
| {"current_steps": 7000, "total_steps": 9960, "loss": 0.0157, "lr": 2.459138154089486e-06, "epoch": 14.056224899598394, "percentage": 70.28, "elapsed_time": "0:18:15", "remaining_time": "0:07:43", "throughput": 3989.79, "total_tokens": 4372320} |
| {"current_steps": 7005, "total_steps": 9960, "loss": 0.0001, "lr": 2.4515960037196146e-06, "epoch": 14.066265060240964, "percentage": 70.33, "elapsed_time": "0:18:16", "remaining_time": "0:07:42", "throughput": 3989.95, "total_tokens": 4375104} |
| {"current_steps": 7010, "total_steps": 9960, "loss": 0.017, "lr": 2.444061678707915e-06, "epoch": 14.076305220883533, "percentage": 70.38, "elapsed_time": "0:18:17", "remaining_time": "0:07:41", "throughput": 3990.04, "total_tokens": 4377888} |
| {"current_steps": 7015, "total_steps": 9960, "loss": 0.0001, "lr": 2.4365352021899635e-06, "epoch": 14.086345381526105, "percentage": 70.43, "elapsed_time": "0:18:18", "remaining_time": "0:07:40", "throughput": 3990.3, "total_tokens": 4381536} |
| {"current_steps": 7020, "total_steps": 9960, "loss": 0.0002, "lr": 2.4290165972772363e-06, "epoch": 14.096385542168674, "percentage": 70.48, "elapsed_time": "0:18:18", "remaining_time": "0:07:40", "throughput": 3990.31, "total_tokens": 4384096} |
| {"current_steps": 7025, "total_steps": 9960, "loss": 0.0072, "lr": 2.42150588705703e-06, "epoch": 14.106425702811245, "percentage": 70.53, "elapsed_time": "0:18:19", "remaining_time": "0:07:39", "throughput": 3990.48, "total_tokens": 4387168} |
| {"current_steps": 7030, "total_steps": 9960, "loss": 0.0612, "lr": 2.4140030945924137e-06, "epoch": 14.116465863453815, "percentage": 70.58, "elapsed_time": "0:18:20", "remaining_time": "0:07:38", "throughput": 3990.48, "total_tokens": 4389728} |
| {"current_steps": 7035, "total_steps": 9960, "loss": 0.0002, "lr": 2.4065082429221315e-06, "epoch": 14.126506024096386, "percentage": 70.63, "elapsed_time": "0:18:20", "remaining_time": "0:07:37", "throughput": 3990.69, "total_tokens": 4393184} |
| {"current_steps": 7040, "total_steps": 9960, "loss": 0.0024, "lr": 2.3990213550605496e-06, "epoch": 14.136546184738956, "percentage": 70.68, "elapsed_time": "0:18:21", "remaining_time": "0:07:36", "throughput": 3990.92, "total_tokens": 4396608} |
| {"current_steps": 7045, "total_steps": 9960, "loss": 0.0001, "lr": 2.391542453997578e-06, "epoch": 14.146586345381525, "percentage": 70.73, "elapsed_time": "0:18:22", "remaining_time": "0:07:36", "throughput": 3991.03, "total_tokens": 4399520} |
| {"current_steps": 7050, "total_steps": 9960, "loss": 0.0004, "lr": 2.3840715626986016e-06, "epoch": 14.156626506024097, "percentage": 70.78, "elapsed_time": "0:18:23", "remaining_time": "0:07:35", "throughput": 3991.3, "total_tokens": 4402784} |
| {"current_steps": 7055, "total_steps": 9960, "loss": 0.0016, "lr": 2.37660870410441e-06, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "0:18:23", "remaining_time": "0:07:34", "throughput": 3991.48, "total_tokens": 4406016} |
| {"current_steps": 7060, "total_steps": 9960, "loss": 0.0001, "lr": 2.3691539011311276e-06, "epoch": 14.176706827309237, "percentage": 70.88, "elapsed_time": "0:18:24", "remaining_time": "0:07:33", "throughput": 3991.75, "total_tokens": 4409600} |
| {"current_steps": 7065, "total_steps": 9960, "loss": 0.0001, "lr": 2.3617071766701415e-06, "epoch": 14.186746987951807, "percentage": 70.93, "elapsed_time": "0:18:25", "remaining_time": "0:07:32", "throughput": 3991.68, "total_tokens": 4412352} |
| {"current_steps": 7070, "total_steps": 9960, "loss": 0.0, "lr": 2.354268553588033e-06, "epoch": 14.196787148594378, "percentage": 70.98, "elapsed_time": "0:18:26", "remaining_time": "0:07:32", "throughput": 3991.65, "total_tokens": 4415072} |
| {"current_steps": 7075, "total_steps": 9960, "loss": 0.0006, "lr": 2.346838054726505e-06, "epoch": 14.206827309236948, "percentage": 71.03, "elapsed_time": "0:18:26", "remaining_time": "0:07:31", "throughput": 3991.94, "total_tokens": 4418848} |
| {"current_steps": 7080, "total_steps": 9960, "loss": 0.0002, "lr": 2.3394157029023145e-06, "epoch": 14.216867469879517, "percentage": 71.08, "elapsed_time": "0:18:27", "remaining_time": "0:07:30", "throughput": 3992.12, "total_tokens": 4421664} |
| {"current_steps": 7085, "total_steps": 9960, "loss": 0.0001, "lr": 2.3320015209072056e-06, "epoch": 14.226907630522089, "percentage": 71.13, "elapsed_time": "0:18:28", "remaining_time": "0:07:29", "throughput": 3992.28, "total_tokens": 4424736} |
| {"current_steps": 7090, "total_steps": 9960, "loss": 0.0074, "lr": 2.324595531507827e-06, "epoch": 14.236947791164658, "percentage": 71.18, "elapsed_time": "0:18:28", "remaining_time": "0:07:28", "throughput": 3992.32, "total_tokens": 4427296} |
| {"current_steps": 7095, "total_steps": 9960, "loss": 0.0001, "lr": 2.317197757445676e-06, "epoch": 14.24698795180723, "percentage": 71.23, "elapsed_time": "0:18:29", "remaining_time": "0:07:28", "throughput": 3992.58, "total_tokens": 4430848} |
| {"current_steps": 7100, "total_steps": 9960, "loss": 0.0001, "lr": 2.309808221437022e-06, "epoch": 14.257028112449799, "percentage": 71.29, "elapsed_time": "0:18:30", "remaining_time": "0:07:27", "throughput": 3992.62, "total_tokens": 4433536} |
| {"current_steps": 7105, "total_steps": 9960, "loss": 0.0003, "lr": 2.302426946172836e-06, "epoch": 14.26706827309237, "percentage": 71.34, "elapsed_time": "0:18:31", "remaining_time": "0:07:26", "throughput": 3992.81, "total_tokens": 4436544} |
| {"current_steps": 7110, "total_steps": 9960, "loss": 0.0113, "lr": 2.295053954318731e-06, "epoch": 14.27710843373494, "percentage": 71.39, "elapsed_time": "0:18:31", "remaining_time": "0:07:25", "throughput": 3992.83, "total_tokens": 4439424} |
| {"current_steps": 7115, "total_steps": 9960, "loss": 0.0001, "lr": 2.2876892685148696e-06, "epoch": 14.28714859437751, "percentage": 71.44, "elapsed_time": "0:18:32", "remaining_time": "0:07:24", "throughput": 3992.98, "total_tokens": 4442400} |
| {"current_steps": 7120, "total_steps": 9960, "loss": 0.0001, "lr": 2.2803329113759256e-06, "epoch": 14.29718875502008, "percentage": 71.49, "elapsed_time": "0:18:33", "remaining_time": "0:07:24", "throughput": 3993.08, "total_tokens": 4445408} |
| {"current_steps": 7125, "total_steps": 9960, "loss": 0.0087, "lr": 2.2729849054909812e-06, "epoch": 14.30722891566265, "percentage": 71.54, "elapsed_time": "0:18:34", "remaining_time": "0:07:23", "throughput": 3993.39, "total_tokens": 4448928} |
| {"current_steps": 7130, "total_steps": 9960, "loss": 0.1558, "lr": 2.26564527342349e-06, "epoch": 14.317269076305221, "percentage": 71.59, "elapsed_time": "0:18:34", "remaining_time": "0:07:22", "throughput": 3993.59, "total_tokens": 4452416} |
| {"current_steps": 7135, "total_steps": 9960, "loss": 0.0001, "lr": 2.258314037711184e-06, "epoch": 14.32730923694779, "percentage": 71.64, "elapsed_time": "0:18:35", "remaining_time": "0:07:21", "throughput": 3993.63, "total_tokens": 4454976} |
| {"current_steps": 7140, "total_steps": 9960, "loss": 0.0016, "lr": 2.2509912208660125e-06, "epoch": 14.337349397590362, "percentage": 71.69, "elapsed_time": "0:18:36", "remaining_time": "0:07:20", "throughput": 3993.81, "total_tokens": 4457984} |
| {"current_steps": 7145, "total_steps": 9960, "loss": 0.0348, "lr": 2.2436768453740743e-06, "epoch": 14.347389558232932, "percentage": 71.74, "elapsed_time": "0:18:36", "remaining_time": "0:07:20", "throughput": 3994.02, "total_tokens": 4460992} |
| {"current_steps": 7150, "total_steps": 9960, "loss": 0.0001, "lr": 2.236370933695549e-06, "epoch": 14.357429718875501, "percentage": 71.79, "elapsed_time": "0:18:37", "remaining_time": "0:07:19", "throughput": 3994.22, "total_tokens": 4463904} |
| {"current_steps": 7155, "total_steps": 9960, "loss": 0.0001, "lr": 2.2290735082646254e-06, "epoch": 14.367469879518072, "percentage": 71.84, "elapsed_time": "0:18:38", "remaining_time": "0:07:18", "throughput": 3994.39, "total_tokens": 4466656} |
| {"current_steps": 7160, "total_steps": 9960, "loss": 0.0789, "lr": 2.2217845914894315e-06, "epoch": 14.377510040160642, "percentage": 71.89, "elapsed_time": "0:18:39", "remaining_time": "0:07:17", "throughput": 3994.61, "total_tokens": 4470208} |
| {"current_steps": 7165, "total_steps": 9960, "loss": 0.0002, "lr": 2.214504205751971e-06, "epoch": 14.387550200803213, "percentage": 71.94, "elapsed_time": "0:18:39", "remaining_time": "0:07:16", "throughput": 3994.94, "total_tokens": 4474144} |
| {"current_steps": 7170, "total_steps": 9960, "loss": 0.0002, "lr": 2.2072323734080503e-06, "epoch": 14.397590361445783, "percentage": 71.99, "elapsed_time": "0:18:40", "remaining_time": "0:07:16", "throughput": 3995.13, "total_tokens": 4477184} |
| {"current_steps": 7175, "total_steps": 9960, "loss": 0.0005, "lr": 2.1999691167872107e-06, "epoch": 14.407630522088354, "percentage": 72.04, "elapsed_time": "0:18:41", "remaining_time": "0:07:15", "throughput": 3995.16, "total_tokens": 4480064} |
| {"current_steps": 7180, "total_steps": 9960, "loss": 0.034, "lr": 2.1927144581926597e-06, "epoch": 14.417670682730924, "percentage": 72.09, "elapsed_time": "0:18:42", "remaining_time": "0:07:14", "throughput": 3995.39, "total_tokens": 4483616} |
| {"current_steps": 7185, "total_steps": 9960, "loss": 0.0182, "lr": 2.1854684199012036e-06, "epoch": 14.427710843373493, "percentage": 72.14, "elapsed_time": "0:18:43", "remaining_time": "0:07:13", "throughput": 3995.75, "total_tokens": 4487488} |
| {"current_steps": 7190, "total_steps": 9960, "loss": 0.0001, "lr": 2.178231024163179e-06, "epoch": 14.437751004016064, "percentage": 72.19, "elapsed_time": "0:18:43", "remaining_time": "0:07:12", "throughput": 3995.63, "total_tokens": 4489696} |
| {"current_steps": 7195, "total_steps": 9960, "loss": 0.0001, "lr": 2.1710022932023805e-06, "epoch": 14.447791164658634, "percentage": 72.24, "elapsed_time": "0:18:44", "remaining_time": "0:07:12", "throughput": 3995.87, "total_tokens": 4493088} |
| {"current_steps": 7200, "total_steps": 9960, "loss": 0.0001, "lr": 2.163782249216005e-06, "epoch": 14.457831325301205, "percentage": 72.29, "elapsed_time": "0:18:45", "remaining_time": "0:07:11", "throughput": 3996.17, "total_tokens": 4497024} |
| {"current_steps": 7205, "total_steps": 9960, "loss": 0.0022, "lr": 2.15657091437456e-06, "epoch": 14.467871485943775, "percentage": 72.34, "elapsed_time": "0:18:45", "remaining_time": "0:07:10", "throughput": 3996.27, "total_tokens": 4499712} |
| {"current_steps": 7210, "total_steps": 9960, "loss": 0.0003, "lr": 2.1493683108218254e-06, "epoch": 14.477911646586346, "percentage": 72.39, "elapsed_time": "0:18:46", "remaining_time": "0:07:09", "throughput": 3996.39, "total_tokens": 4502400} |
| {"current_steps": 7215, "total_steps": 9960, "loss": 0.0001, "lr": 2.142174460674755e-06, "epoch": 14.487951807228916, "percentage": 72.44, "elapsed_time": "0:18:47", "remaining_time": "0:07:08", "throughput": 3996.48, "total_tokens": 4505088} |
| {"current_steps": 7220, "total_steps": 9960, "loss": 0.0474, "lr": 2.134989386023437e-06, "epoch": 14.497991967871485, "percentage": 72.49, "elapsed_time": "0:18:48", "remaining_time": "0:07:08", "throughput": 3996.69, "total_tokens": 4508384} |
| {"current_steps": 7225, "total_steps": 9960, "loss": 0.0001, "lr": 2.127813108931007e-06, "epoch": 14.508032128514056, "percentage": 72.54, "elapsed_time": "0:18:48", "remaining_time": "0:07:07", "throughput": 3996.88, "total_tokens": 4511584} |
| {"current_steps": 7230, "total_steps": 9960, "loss": 0.0, "lr": 2.1206456514335794e-06, "epoch": 14.518072289156626, "percentage": 72.59, "elapsed_time": "0:18:49", "remaining_time": "0:07:06", "throughput": 3997.08, "total_tokens": 4514816} |
| {"current_steps": 7235, "total_steps": 9960, "loss": 0.0004, "lr": 2.113487035540201e-06, "epoch": 14.528112449799197, "percentage": 72.64, "elapsed_time": "0:18:50", "remaining_time": "0:07:05", "throughput": 3997.3, "total_tokens": 4517824} |
| {"current_steps": 7240, "total_steps": 9960, "loss": 0.0001, "lr": 2.1063372832327535e-06, "epoch": 14.538152610441767, "percentage": 72.69, "elapsed_time": "0:18:50", "remaining_time": "0:07:04", "throughput": 3997.5, "total_tokens": 4521088} |
| {"current_steps": 7245, "total_steps": 9960, "loss": 0.0092, "lr": 2.099196416465913e-06, "epoch": 14.548192771084338, "percentage": 72.74, "elapsed_time": "0:18:51", "remaining_time": "0:07:04", "throughput": 3997.79, "total_tokens": 4524416} |
| {"current_steps": 7250, "total_steps": 9960, "loss": 0.0919, "lr": 2.092064457167066e-06, "epoch": 14.558232931726907, "percentage": 72.79, "elapsed_time": "0:18:52", "remaining_time": "0:07:03", "throughput": 3997.96, "total_tokens": 4527520} |
| {"current_steps": 7255, "total_steps": 9960, "loss": 0.0014, "lr": 2.084941427236245e-06, "epoch": 14.568273092369477, "percentage": 72.84, "elapsed_time": "0:18:53", "remaining_time": "0:07:02", "throughput": 3998.16, "total_tokens": 4530976} |
| {"current_steps": 7260, "total_steps": 9960, "loss": 0.0001, "lr": 2.0778273485460677e-06, "epoch": 14.578313253012048, "percentage": 72.89, "elapsed_time": "0:18:53", "remaining_time": "0:07:01", "throughput": 3998.36, "total_tokens": 4534048} |
| {"current_steps": 7265, "total_steps": 9960, "loss": 0.0001, "lr": 2.0707222429416613e-06, "epoch": 14.588353413654618, "percentage": 72.94, "elapsed_time": "0:18:54", "remaining_time": "0:07:00", "throughput": 3998.56, "total_tokens": 4537536} |
| {"current_steps": 7270, "total_steps": 9960, "loss": 0.0002, "lr": 2.063626132240602e-06, "epoch": 14.598393574297189, "percentage": 72.99, "elapsed_time": "0:18:55", "remaining_time": "0:07:00", "throughput": 3998.55, "total_tokens": 4540256} |
| {"current_steps": 7275, "total_steps": 9960, "loss": 0.0001, "lr": 2.0565390382328448e-06, "epoch": 14.608433734939759, "percentage": 73.04, "elapsed_time": "0:18:56", "remaining_time": "0:06:59", "throughput": 3998.8, "total_tokens": 4543552} |
| {"current_steps": 7280, "total_steps": 9960, "loss": 0.0001, "lr": 2.049460982680656e-06, "epoch": 14.61847389558233, "percentage": 73.09, "elapsed_time": "0:18:56", "remaining_time": "0:06:58", "throughput": 3998.9, "total_tokens": 4546304} |
| {"current_steps": 7285, "total_steps": 9960, "loss": 0.0, "lr": 2.04239198731855e-06, "epoch": 14.6285140562249, "percentage": 73.14, "elapsed_time": "0:18:57", "remaining_time": "0:06:57", "throughput": 3998.98, "total_tokens": 4549312} |
| {"current_steps": 7290, "total_steps": 9960, "loss": 0.0001, "lr": 2.035332073853217e-06, "epoch": 14.638554216867469, "percentage": 73.19, "elapsed_time": "0:18:58", "remaining_time": "0:06:56", "throughput": 3999.44, "total_tokens": 4553152} |
| {"current_steps": 7295, "total_steps": 9960, "loss": 0.0692, "lr": 2.0282812639634636e-06, "epoch": 14.64859437751004, "percentage": 73.24, "elapsed_time": "0:18:59", "remaining_time": "0:06:56", "throughput": 3999.52, "total_tokens": 4555712} |
| {"current_steps": 7300, "total_steps": 9960, "loss": 0.0002, "lr": 2.0212395793001384e-06, "epoch": 14.65863453815261, "percentage": 73.29, "elapsed_time": "0:18:59", "remaining_time": "0:06:55", "throughput": 3999.58, "total_tokens": 4558304} |
| {"current_steps": 7305, "total_steps": 9960, "loss": 0.0001, "lr": 2.0142070414860704e-06, "epoch": 14.668674698795181, "percentage": 73.34, "elapsed_time": "0:19:00", "remaining_time": "0:06:54", "throughput": 3999.66, "total_tokens": 4560992} |
| {"current_steps": 7310, "total_steps": 9960, "loss": 0.0026, "lr": 2.007183672116002e-06, "epoch": 14.67871485943775, "percentage": 73.39, "elapsed_time": "0:19:01", "remaining_time": "0:06:53", "throughput": 3999.79, "total_tokens": 4564384} |
| {"current_steps": 7315, "total_steps": 9960, "loss": 0.0001, "lr": 2.000169492756523e-06, "epoch": 14.688755020080322, "percentage": 73.44, "elapsed_time": "0:19:01", "remaining_time": "0:06:52", "throughput": 4000.0, "total_tokens": 4567936} |
| {"current_steps": 7320, "total_steps": 9960, "loss": 0.0002, "lr": 1.9931645249459997e-06, "epoch": 14.698795180722891, "percentage": 73.49, "elapsed_time": "0:19:02", "remaining_time": "0:06:52", "throughput": 4000.11, "total_tokens": 4571072} |
| {"current_steps": 7325, "total_steps": 9960, "loss": 0.0013, "lr": 1.986168790194521e-06, "epoch": 14.708835341365463, "percentage": 73.54, "elapsed_time": "0:19:03", "remaining_time": "0:06:51", "throughput": 4000.37, "total_tokens": 4574496} |
| {"current_steps": 7330, "total_steps": 9960, "loss": 0.0039, "lr": 1.9791823099838107e-06, "epoch": 14.718875502008032, "percentage": 73.59, "elapsed_time": "0:19:04", "remaining_time": "0:06:50", "throughput": 4000.5, "total_tokens": 4577440} |
| {"current_steps": 7335, "total_steps": 9960, "loss": 0.0001, "lr": 1.9722051057671896e-06, "epoch": 14.728915662650602, "percentage": 73.64, "elapsed_time": "0:19:04", "remaining_time": "0:06:49", "throughput": 4000.71, "total_tokens": 4580608} |
| {"current_steps": 7340, "total_steps": 9960, "loss": 0.0201, "lr": 1.965237198969481e-06, "epoch": 14.738955823293173, "percentage": 73.69, "elapsed_time": "0:19:05", "remaining_time": "0:06:48", "throughput": 4000.88, "total_tokens": 4584160} |
| {"current_steps": 7345, "total_steps": 9960, "loss": 0.0036, "lr": 1.9582786109869713e-06, "epoch": 14.748995983935743, "percentage": 73.74, "elapsed_time": "0:19:06", "remaining_time": "0:06:48", "throughput": 4000.97, "total_tokens": 4587072} |
| {"current_steps": 7350, "total_steps": 9960, "loss": 0.0585, "lr": 1.951329363187323e-06, "epoch": 14.759036144578314, "percentage": 73.8, "elapsed_time": "0:19:07", "remaining_time": "0:06:47", "throughput": 4001.14, "total_tokens": 4590272} |
| {"current_steps": 7355, "total_steps": 9960, "loss": 0.0, "lr": 1.944389476909518e-06, "epoch": 14.769076305220883, "percentage": 73.85, "elapsed_time": "0:19:08", "remaining_time": "0:06:46", "throughput": 4001.4, "total_tokens": 4593824} |
| {"current_steps": 7360, "total_steps": 9960, "loss": 0.0, "lr": 1.9374589734638e-06, "epoch": 14.779116465863455, "percentage": 73.9, "elapsed_time": "0:19:08", "remaining_time": "0:06:45", "throughput": 4001.46, "total_tokens": 4596352} |
| {"current_steps": 7365, "total_steps": 9960, "loss": 0.0116, "lr": 1.930537874131588e-06, "epoch": 14.789156626506024, "percentage": 73.95, "elapsed_time": "0:19:09", "remaining_time": "0:06:44", "throughput": 4001.67, "total_tokens": 4599616} |
| {"current_steps": 7370, "total_steps": 9960, "loss": 0.0001, "lr": 1.9236262001654372e-06, "epoch": 14.799196787148594, "percentage": 74.0, "elapsed_time": "0:19:10", "remaining_time": "0:06:44", "throughput": 4001.92, "total_tokens": 4603584} |
| {"current_steps": 7375, "total_steps": 9960, "loss": 0.0001, "lr": 1.9167239727889527e-06, "epoch": 14.809236947791165, "percentage": 74.05, "elapsed_time": "0:19:11", "remaining_time": "0:06:43", "throughput": 4002.19, "total_tokens": 4607136} |
| {"current_steps": 7380, "total_steps": 9960, "loss": 0.0001, "lr": 1.9098312131967327e-06, "epoch": 14.819277108433734, "percentage": 74.1, "elapsed_time": "0:19:11", "remaining_time": "0:06:42", "throughput": 4002.3, "total_tokens": 4609888} |
| {"current_steps": 7385, "total_steps": 9960, "loss": 0.0, "lr": 1.9029479425543052e-06, "epoch": 14.829317269076306, "percentage": 74.15, "elapsed_time": "0:19:12", "remaining_time": "0:06:41", "throughput": 4002.25, "total_tokens": 4612384} |
| {"current_steps": 7390, "total_steps": 9960, "loss": 0.0029, "lr": 1.8960741819980576e-06, "epoch": 14.839357429718875, "percentage": 74.2, "elapsed_time": "0:19:13", "remaining_time": "0:06:41", "throughput": 4002.37, "total_tokens": 4615424} |
| {"current_steps": 7395, "total_steps": 9960, "loss": 0.0015, "lr": 1.889209952635178e-06, "epoch": 14.849397590361447, "percentage": 74.25, "elapsed_time": "0:19:13", "remaining_time": "0:06:40", "throughput": 4002.6, "total_tokens": 4618848} |
| {"current_steps": 7400, "total_steps": 9960, "loss": 0.0001, "lr": 1.8823552755435847e-06, "epoch": 14.859437751004016, "percentage": 74.3, "elapsed_time": "0:19:14", "remaining_time": "0:06:39", "throughput": 4002.86, "total_tokens": 4622176} |
| {"current_steps": 7405, "total_steps": 9960, "loss": 0.0002, "lr": 1.875510171771865e-06, "epoch": 14.869477911646586, "percentage": 74.35, "elapsed_time": "0:19:15", "remaining_time": "0:06:38", "throughput": 4002.92, "total_tokens": 4625088} |
| {"current_steps": 7410, "total_steps": 9960, "loss": 0.0001, "lr": 1.868674662339207e-06, "epoch": 14.879518072289157, "percentage": 74.4, "elapsed_time": "0:19:16", "remaining_time": "0:06:37", "throughput": 4003.24, "total_tokens": 4628640} |
| {"current_steps": 7415, "total_steps": 9960, "loss": 0.0, "lr": 1.8618487682353453e-06, "epoch": 14.889558232931726, "percentage": 74.45, "elapsed_time": "0:19:16", "remaining_time": "0:06:37", "throughput": 4003.36, "total_tokens": 4631808} |
| {"current_steps": 7420, "total_steps": 9960, "loss": 0.0001, "lr": 1.855032510420477e-06, "epoch": 14.899598393574298, "percentage": 74.5, "elapsed_time": "0:19:17", "remaining_time": "0:06:36", "throughput": 4003.35, "total_tokens": 4634176} |
| {"current_steps": 7425, "total_steps": 9960, "loss": 0.0002, "lr": 1.848225909825222e-06, "epoch": 14.909638554216867, "percentage": 74.55, "elapsed_time": "0:19:18", "remaining_time": "0:06:35", "throughput": 4003.33, "total_tokens": 4636704} |
| {"current_steps": 7430, "total_steps": 9960, "loss": 0.0001, "lr": 1.8414289873505337e-06, "epoch": 14.919678714859439, "percentage": 74.6, "elapsed_time": "0:19:18", "remaining_time": "0:06:34", "throughput": 4003.55, "total_tokens": 4639776} |
| {"current_steps": 7435, "total_steps": 9960, "loss": 0.0, "lr": 1.8346417638676533e-06, "epoch": 14.929718875502008, "percentage": 74.65, "elapsed_time": "0:19:19", "remaining_time": "0:06:33", "throughput": 4003.69, "total_tokens": 4642848} |
| {"current_steps": 7440, "total_steps": 9960, "loss": 0.0051, "lr": 1.8278642602180435e-06, "epoch": 14.939759036144578, "percentage": 74.7, "elapsed_time": "0:19:20", "remaining_time": "0:06:33", "throughput": 4003.96, "total_tokens": 4646400} |
| {"current_steps": 7445, "total_steps": 9960, "loss": 0.0857, "lr": 1.8210964972133095e-06, "epoch": 14.949799196787149, "percentage": 74.75, "elapsed_time": "0:19:21", "remaining_time": "0:06:32", "throughput": 4004.11, "total_tokens": 4649088} |
| {"current_steps": 7450, "total_steps": 9960, "loss": 0.0001, "lr": 1.814338495635158e-06, "epoch": 14.959839357429718, "percentage": 74.8, "elapsed_time": "0:19:21", "remaining_time": "0:06:31", "throughput": 4004.33, "total_tokens": 4652512} |
| {"current_steps": 7455, "total_steps": 9960, "loss": 0.0551, "lr": 1.8075902762353093e-06, "epoch": 14.96987951807229, "percentage": 74.85, "elapsed_time": "0:19:22", "remaining_time": "0:06:30", "throughput": 4004.4, "total_tokens": 4655584} |
| {"current_steps": 7460, "total_steps": 9960, "loss": 0.0, "lr": 1.8008518597354575e-06, "epoch": 14.97991967871486, "percentage": 74.9, "elapsed_time": "0:19:23", "remaining_time": "0:06:29", "throughput": 4004.46, "total_tokens": 4658272} |
| {"current_steps": 7465, "total_steps": 9960, "loss": 0.053, "lr": 1.7941232668271863e-06, "epoch": 14.98995983935743, "percentage": 74.95, "elapsed_time": "0:19:24", "remaining_time": "0:06:29", "throughput": 4004.85, "total_tokens": 4662432} |
| {"current_steps": 7470, "total_steps": 9960, "loss": 0.1219, "lr": 1.787404518171919e-06, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:19:24", "remaining_time": "0:06:28", "throughput": 4004.9, "total_tokens": 4665120} |
| {"current_steps": 7475, "total_steps": 9960, "loss": 0.0, "lr": 1.7806956344008475e-06, "epoch": 15.01004016064257, "percentage": 75.05, "elapsed_time": "0:19:25", "remaining_time": "0:06:27", "throughput": 4004.9, "total_tokens": 4668640} |
| {"current_steps": 7480, "total_steps": 9960, "loss": 0.0002, "lr": 1.773996636114873e-06, "epoch": 15.02008032128514, "percentage": 75.1, "elapsed_time": "0:19:26", "remaining_time": "0:06:26", "throughput": 4005.06, "total_tokens": 4672064} |
| {"current_steps": 7485, "total_steps": 9960, "loss": 0.0001, "lr": 1.7673075438845423e-06, "epoch": 15.03012048192771, "percentage": 75.15, "elapsed_time": "0:19:27", "remaining_time": "0:06:25", "throughput": 4005.26, "total_tokens": 4675264} |
| {"current_steps": 7490, "total_steps": 9960, "loss": 0.0004, "lr": 1.7606283782499812e-06, "epoch": 15.040160642570282, "percentage": 75.2, "elapsed_time": "0:19:27", "remaining_time": "0:06:25", "throughput": 4005.24, "total_tokens": 4677728} |
| {"current_steps": 7495, "total_steps": 9960, "loss": 0.0001, "lr": 1.753959159720836e-06, "epoch": 15.050200803212851, "percentage": 75.25, "elapsed_time": "0:19:28", "remaining_time": "0:06:24", "throughput": 4005.36, "total_tokens": 4680608} |
| {"current_steps": 7500, "total_steps": 9960, "loss": 0.0488, "lr": 1.7472999087762081e-06, "epoch": 15.060240963855422, "percentage": 75.3, "elapsed_time": "0:19:29", "remaining_time": "0:06:23", "throughput": 4005.51, "total_tokens": 4683712} |
| {"current_steps": 7505, "total_steps": 9960, "loss": 0.0001, "lr": 1.7406506458645923e-06, "epoch": 15.070281124497992, "percentage": 75.35, "elapsed_time": "0:19:30", "remaining_time": "0:06:22", "throughput": 4005.84, "total_tokens": 4687520} |
| {"current_steps": 7510, "total_steps": 9960, "loss": 0.0001, "lr": 1.7340113914038115e-06, "epoch": 15.080321285140561, "percentage": 75.4, "elapsed_time": "0:19:30", "remaining_time": "0:06:21", "throughput": 4005.93, "total_tokens": 4690560} |
| {"current_steps": 7515, "total_steps": 9960, "loss": 0.0018, "lr": 1.727382165780957e-06, "epoch": 15.090361445783133, "percentage": 75.45, "elapsed_time": "0:19:31", "remaining_time": "0:06:21", "throughput": 4006.08, "total_tokens": 4693696} |
| {"current_steps": 7520, "total_steps": 9960, "loss": 0.0672, "lr": 1.7207629893523236e-06, "epoch": 15.100401606425702, "percentage": 75.5, "elapsed_time": "0:19:32", "remaining_time": "0:06:20", "throughput": 4006.23, "total_tokens": 4696800} |
| {"current_steps": 7525, "total_steps": 9960, "loss": 0.0001, "lr": 1.7141538824433506e-06, "epoch": 15.110441767068274, "percentage": 75.55, "elapsed_time": "0:19:33", "remaining_time": "0:06:19", "throughput": 4006.39, "total_tokens": 4699776} |
| {"current_steps": 7530, "total_steps": 9960, "loss": 0.0001, "lr": 1.7075548653485535e-06, "epoch": 15.120481927710843, "percentage": 75.6, "elapsed_time": "0:19:33", "remaining_time": "0:06:18", "throughput": 4006.55, "total_tokens": 4702528} |
| {"current_steps": 7535, "total_steps": 9960, "loss": 0.0001, "lr": 1.7009659583314659e-06, "epoch": 15.130522088353414, "percentage": 75.65, "elapsed_time": "0:19:34", "remaining_time": "0:06:17", "throughput": 4006.61, "total_tokens": 4705152} |
| {"current_steps": 7540, "total_steps": 9960, "loss": 0.0005, "lr": 1.6943871816245826e-06, "epoch": 15.140562248995984, "percentage": 75.7, "elapsed_time": "0:19:34", "remaining_time": "0:06:17", "throughput": 4006.67, "total_tokens": 4707776} |
| {"current_steps": 7545, "total_steps": 9960, "loss": 0.0003, "lr": 1.6878185554292787e-06, "epoch": 15.150602409638553, "percentage": 75.75, "elapsed_time": "0:19:35", "remaining_time": "0:06:16", "throughput": 4006.58, "total_tokens": 4710368} |
| {"current_steps": 7550, "total_steps": 9960, "loss": 0.0005, "lr": 1.6812600999157753e-06, "epoch": 15.160642570281125, "percentage": 75.8, "elapsed_time": "0:19:36", "remaining_time": "0:06:15", "throughput": 4006.66, "total_tokens": 4713536} |
| {"current_steps": 7555, "total_steps": 9960, "loss": 0.0, "lr": 1.6747118352230495e-06, "epoch": 15.170682730923694, "percentage": 75.85, "elapsed_time": "0:19:37", "remaining_time": "0:06:14", "throughput": 4006.82, "total_tokens": 4716672} |
| {"current_steps": 7560, "total_steps": 9960, "loss": 0.0001, "lr": 1.6681737814587912e-06, "epoch": 15.180722891566266, "percentage": 75.9, "elapsed_time": "0:19:37", "remaining_time": "0:06:13", "throughput": 4007.09, "total_tokens": 4719872} |
| {"current_steps": 7565, "total_steps": 9960, "loss": 0.0, "lr": 1.6616459586993394e-06, "epoch": 15.190763052208835, "percentage": 75.95, "elapsed_time": "0:19:38", "remaining_time": "0:06:13", "throughput": 4007.43, "total_tokens": 4723776} |
| {"current_steps": 7570, "total_steps": 9960, "loss": 0.0001, "lr": 1.6551283869896073e-06, "epoch": 15.200803212851406, "percentage": 76.0, "elapsed_time": "0:19:39", "remaining_time": "0:06:12", "throughput": 4007.69, "total_tokens": 4726976} |
| {"current_steps": 7575, "total_steps": 9960, "loss": 0.0002, "lr": 1.6486210863430424e-06, "epoch": 15.210843373493976, "percentage": 76.05, "elapsed_time": "0:19:40", "remaining_time": "0:06:11", "throughput": 4007.8, "total_tokens": 4730176} |
| {"current_steps": 7580, "total_steps": 9960, "loss": 0.0, "lr": 1.6421240767415397e-06, "epoch": 15.220883534136545, "percentage": 76.1, "elapsed_time": "0:19:40", "remaining_time": "0:06:10", "throughput": 4007.97, "total_tokens": 4733152} |
| {"current_steps": 7585, "total_steps": 9960, "loss": 0.0, "lr": 1.6356373781354058e-06, "epoch": 15.230923694779117, "percentage": 76.15, "elapsed_time": "0:19:41", "remaining_time": "0:06:09", "throughput": 4007.81, "total_tokens": 4735648} |
| {"current_steps": 7590, "total_steps": 9960, "loss": 0.0004, "lr": 1.629161010443277e-06, "epoch": 15.240963855421686, "percentage": 76.2, "elapsed_time": "0:19:42", "remaining_time": "0:06:09", "throughput": 4007.96, "total_tokens": 4739136} |
| {"current_steps": 7595, "total_steps": 9960, "loss": 0.0001, "lr": 1.6226949935520708e-06, "epoch": 15.251004016064257, "percentage": 76.26, "elapsed_time": "0:19:43", "remaining_time": "0:06:08", "throughput": 4008.14, "total_tokens": 4742432} |
| {"current_steps": 7600, "total_steps": 9960, "loss": 0.0, "lr": 1.6162393473169186e-06, "epoch": 15.261044176706827, "percentage": 76.31, "elapsed_time": "0:19:44", "remaining_time": "0:06:07", "throughput": 4008.43, "total_tokens": 4746304} |
| {"current_steps": 7605, "total_steps": 9960, "loss": 0.0001, "lr": 1.6097940915611082e-06, "epoch": 15.271084337349398, "percentage": 76.36, "elapsed_time": "0:19:44", "remaining_time": "0:06:06", "throughput": 4008.61, "total_tokens": 4749536} |
| {"current_steps": 7610, "total_steps": 9960, "loss": 0.0001, "lr": 1.60335924607602e-06, "epoch": 15.281124497991968, "percentage": 76.41, "elapsed_time": "0:19:45", "remaining_time": "0:06:06", "throughput": 4008.83, "total_tokens": 4753120} |
| {"current_steps": 7615, "total_steps": 9960, "loss": 0.0, "lr": 1.5969348306210692e-06, "epoch": 15.291164658634537, "percentage": 76.46, "elapsed_time": "0:19:46", "remaining_time": "0:06:05", "throughput": 4008.91, "total_tokens": 4755968} |
| {"current_steps": 7620, "total_steps": 9960, "loss": 0.0001, "lr": 1.5905208649236426e-06, "epoch": 15.301204819277109, "percentage": 76.51, "elapsed_time": "0:19:46", "remaining_time": "0:06:04", "throughput": 4008.96, "total_tokens": 4758560} |
| {"current_steps": 7625, "total_steps": 9960, "loss": 0.0139, "lr": 1.5841173686790368e-06, "epoch": 15.311244979919678, "percentage": 76.56, "elapsed_time": "0:19:47", "remaining_time": "0:06:03", "throughput": 4009.24, "total_tokens": 4762368} |
| {"current_steps": 7630, "total_steps": 9960, "loss": 0.0001, "lr": 1.5777243615504085e-06, "epoch": 15.32128514056225, "percentage": 76.61, "elapsed_time": "0:19:48", "remaining_time": "0:06:02", "throughput": 4009.45, "total_tokens": 4765888} |
| {"current_steps": 7635, "total_steps": 9960, "loss": 0.0022, "lr": 1.5713418631686938e-06, "epoch": 15.331325301204819, "percentage": 76.66, "elapsed_time": "0:19:49", "remaining_time": "0:06:02", "throughput": 4009.52, "total_tokens": 4768928} |
| {"current_steps": 7640, "total_steps": 9960, "loss": 0.0003, "lr": 1.564969893132568e-06, "epoch": 15.34136546184739, "percentage": 76.71, "elapsed_time": "0:19:50", "remaining_time": "0:06:01", "throughput": 4009.62, "total_tokens": 4771904} |
| {"current_steps": 7645, "total_steps": 9960, "loss": 0.0, "lr": 1.5586084710083737e-06, "epoch": 15.35140562248996, "percentage": 76.76, "elapsed_time": "0:19:50", "remaining_time": "0:06:00", "throughput": 4009.81, "total_tokens": 4775104} |
| {"current_steps": 7650, "total_steps": 9960, "loss": 0.0, "lr": 1.5522576163300635e-06, "epoch": 15.36144578313253, "percentage": 76.81, "elapsed_time": "0:19:51", "remaining_time": "0:05:59", "throughput": 4009.98, "total_tokens": 4778496} |
| {"current_steps": 7655, "total_steps": 9960, "loss": 0.0001, "lr": 1.545917348599147e-06, "epoch": 15.3714859437751, "percentage": 76.86, "elapsed_time": "0:19:52", "remaining_time": "0:05:59", "throughput": 4010.05, "total_tokens": 4781344} |
| {"current_steps": 7660, "total_steps": 9960, "loss": 0.0213, "lr": 1.5395876872846132e-06, "epoch": 15.38152610441767, "percentage": 76.91, "elapsed_time": "0:19:53", "remaining_time": "0:05:58", "throughput": 4010.25, "total_tokens": 4784352} |
| {"current_steps": 7665, "total_steps": 9960, "loss": 0.0, "lr": 1.5332686518228951e-06, "epoch": 15.391566265060241, "percentage": 76.96, "elapsed_time": "0:19:53", "remaining_time": "0:05:57", "throughput": 4010.42, "total_tokens": 4787424} |
| {"current_steps": 7670, "total_steps": 9960, "loss": 0.0, "lr": 1.5269602616177842e-06, "epoch": 15.401606425702811, "percentage": 77.01, "elapsed_time": "0:19:54", "remaining_time": "0:05:56", "throughput": 4010.55, "total_tokens": 4790656} |
| {"current_steps": 7675, "total_steps": 9960, "loss": 0.0001, "lr": 1.5206625360403943e-06, "epoch": 15.411646586345382, "percentage": 77.06, "elapsed_time": "0:19:55", "remaining_time": "0:05:55", "throughput": 4010.56, "total_tokens": 4793536} |
| {"current_steps": 7680, "total_steps": 9960, "loss": 0.0, "lr": 1.5143754944290862e-06, "epoch": 15.421686746987952, "percentage": 77.11, "elapsed_time": "0:19:55", "remaining_time": "0:05:55", "throughput": 4010.71, "total_tokens": 4796704} |
| {"current_steps": 7685, "total_steps": 9960, "loss": 0.0001, "lr": 1.5080991560894142e-06, "epoch": 15.431726907630521, "percentage": 77.16, "elapsed_time": "0:19:56", "remaining_time": "0:05:54", "throughput": 4010.9, "total_tokens": 4800032} |
| {"current_steps": 7690, "total_steps": 9960, "loss": 0.0001, "lr": 1.5018335402940681e-06, "epoch": 15.441767068273093, "percentage": 77.21, "elapsed_time": "0:19:57", "remaining_time": "0:05:53", "throughput": 4011.18, "total_tokens": 4803552} |
| {"current_steps": 7695, "total_steps": 9960, "loss": 0.0, "lr": 1.4955786662828053e-06, "epoch": 15.451807228915662, "percentage": 77.26, "elapsed_time": "0:19:58", "remaining_time": "0:05:52", "throughput": 4011.42, "total_tokens": 4806848} |
| {"current_steps": 7700, "total_steps": 9960, "loss": 0.0002, "lr": 1.4893345532624086e-06, "epoch": 15.461847389558233, "percentage": 77.31, "elapsed_time": "0:19:58", "remaining_time": "0:05:51", "throughput": 4011.47, "total_tokens": 4809152} |
| {"current_steps": 7705, "total_steps": 9960, "loss": 0.0, "lr": 1.4831012204066114e-06, "epoch": 15.471887550200803, "percentage": 77.36, "elapsed_time": "0:19:59", "remaining_time": "0:05:51", "throughput": 4011.56, "total_tokens": 4812064} |
| {"current_steps": 7710, "total_steps": 9960, "loss": 0.0001, "lr": 1.4768786868560443e-06, "epoch": 15.481927710843374, "percentage": 77.41, "elapsed_time": "0:20:00", "remaining_time": "0:05:50", "throughput": 4011.58, "total_tokens": 4815040} |
| {"current_steps": 7715, "total_steps": 9960, "loss": 0.0002, "lr": 1.4706669717181782e-06, "epoch": 15.491967871485944, "percentage": 77.46, "elapsed_time": "0:20:01", "remaining_time": "0:05:49", "throughput": 4011.97, "total_tokens": 4818880} |
| {"current_steps": 7720, "total_steps": 9960, "loss": 0.0345, "lr": 1.4644660940672628e-06, "epoch": 15.502008032128515, "percentage": 77.51, "elapsed_time": "0:20:01", "remaining_time": "0:05:48", "throughput": 4012.0, "total_tokens": 4821696} |
| {"current_steps": 7725, "total_steps": 9960, "loss": 0.0, "lr": 1.4582760729442707e-06, "epoch": 15.512048192771084, "percentage": 77.56, "elapsed_time": "0:20:02", "remaining_time": "0:05:47", "throughput": 4012.04, "total_tokens": 4824608} |
| {"current_steps": 7730, "total_steps": 9960, "loss": 0.0004, "lr": 1.4520969273568364e-06, "epoch": 15.522088353413654, "percentage": 77.61, "elapsed_time": "0:20:03", "remaining_time": "0:05:47", "throughput": 4012.14, "total_tokens": 4827360} |
| {"current_steps": 7735, "total_steps": 9960, "loss": 0.0, "lr": 1.445928676279199e-06, "epoch": 15.532128514056225, "percentage": 77.66, "elapsed_time": "0:20:03", "remaining_time": "0:05:46", "throughput": 4012.32, "total_tokens": 4830496} |
| {"current_steps": 7740, "total_steps": 9960, "loss": 0.0001, "lr": 1.4397713386521444e-06, "epoch": 15.542168674698795, "percentage": 77.71, "elapsed_time": "0:20:04", "remaining_time": "0:05:45", "throughput": 4012.54, "total_tokens": 4833536} |
| {"current_steps": 7745, "total_steps": 9960, "loss": 0.0001, "lr": 1.4336249333829466e-06, "epoch": 15.552208835341366, "percentage": 77.76, "elapsed_time": "0:20:05", "remaining_time": "0:05:44", "throughput": 4012.59, "total_tokens": 4836192} |
| {"current_steps": 7750, "total_steps": 9960, "loss": 0.0002, "lr": 1.4274894793453075e-06, "epoch": 15.562248995983936, "percentage": 77.81, "elapsed_time": "0:20:06", "remaining_time": "0:05:43", "throughput": 4012.94, "total_tokens": 4840320} |
| {"current_steps": 7755, "total_steps": 9960, "loss": 0.0766, "lr": 1.421364995379309e-06, "epoch": 15.572289156626507, "percentage": 77.86, "elapsed_time": "0:20:06", "remaining_time": "0:05:43", "throughput": 4013.11, "total_tokens": 4843744} |
| {"current_steps": 7760, "total_steps": 9960, "loss": 0.0003, "lr": 1.4152515002913358e-06, "epoch": 15.582329317269076, "percentage": 77.91, "elapsed_time": "0:20:07", "remaining_time": "0:05:42", "throughput": 4013.26, "total_tokens": 4846464} |
| {"current_steps": 7765, "total_steps": 9960, "loss": 0.0, "lr": 1.4091490128540374e-06, "epoch": 15.592369477911646, "percentage": 77.96, "elapsed_time": "0:20:08", "remaining_time": "0:05:41", "throughput": 4013.31, "total_tokens": 4849184} |
| {"current_steps": 7770, "total_steps": 9960, "loss": 0.0025, "lr": 1.403057551806259e-06, "epoch": 15.602409638554217, "percentage": 78.01, "elapsed_time": "0:20:08", "remaining_time": "0:05:40", "throughput": 4013.43, "total_tokens": 4851936} |
| {"current_steps": 7775, "total_steps": 9960, "loss": 0.0003, "lr": 1.3969771358529866e-06, "epoch": 15.612449799196787, "percentage": 78.06, "elapsed_time": "0:20:09", "remaining_time": "0:05:39", "throughput": 4013.5, "total_tokens": 4855040} |
| {"current_steps": 7780, "total_steps": 9960, "loss": 0.0002, "lr": 1.3909077836652968e-06, "epoch": 15.622489959839358, "percentage": 78.11, "elapsed_time": "0:20:10", "remaining_time": "0:05:39", "throughput": 4013.55, "total_tokens": 4857952} |
| {"current_steps": 7785, "total_steps": 9960, "loss": 0.0001, "lr": 1.3848495138802803e-06, "epoch": 15.632530120481928, "percentage": 78.16, "elapsed_time": "0:20:11", "remaining_time": "0:05:38", "throughput": 4013.73, "total_tokens": 4860960} |
| {"current_steps": 7790, "total_steps": 9960, "loss": 0.0, "lr": 1.3788023451010114e-06, "epoch": 15.642570281124499, "percentage": 78.21, "elapsed_time": "0:20:11", "remaining_time": "0:05:37", "throughput": 4013.93, "total_tokens": 4864544} |
| {"current_steps": 7795, "total_steps": 9960, "loss": 0.0002, "lr": 1.3727662958964627e-06, "epoch": 15.652610441767068, "percentage": 78.26, "elapsed_time": "0:20:12", "remaining_time": "0:05:36", "throughput": 4014.06, "total_tokens": 4867616} |
| {"current_steps": 7800, "total_steps": 9960, "loss": 0.0001, "lr": 1.3667413848014738e-06, "epoch": 15.662650602409638, "percentage": 78.31, "elapsed_time": "0:20:13", "remaining_time": "0:05:35", "throughput": 4014.11, "total_tokens": 4870304} |
| {"current_steps": 7805, "total_steps": 9960, "loss": 0.0001, "lr": 1.3607276303166766e-06, "epoch": 15.67269076305221, "percentage": 78.36, "elapsed_time": "0:20:14", "remaining_time": "0:05:35", "throughput": 4014.45, "total_tokens": 4874240} |
| {"current_steps": 7810, "total_steps": 9960, "loss": 0.0249, "lr": 1.3547250509084453e-06, "epoch": 15.682730923694779, "percentage": 78.41, "elapsed_time": "0:20:14", "remaining_time": "0:05:34", "throughput": 4014.5, "total_tokens": 4876960} |
| {"current_steps": 7815, "total_steps": 9960, "loss": 0.0, "lr": 1.3487336650088417e-06, "epoch": 15.69277108433735, "percentage": 78.46, "elapsed_time": "0:20:15", "remaining_time": "0:05:33", "throughput": 4014.61, "total_tokens": 4879872} |
| {"current_steps": 7820, "total_steps": 9960, "loss": 0.0001, "lr": 1.3427534910155475e-06, "epoch": 15.70281124497992, "percentage": 78.51, "elapsed_time": "0:20:16", "remaining_time": "0:05:32", "throughput": 4014.78, "total_tokens": 4883424} |
| {"current_steps": 7825, "total_steps": 9960, "loss": 0.0061, "lr": 1.3367845472918272e-06, "epoch": 15.71285140562249, "percentage": 78.56, "elapsed_time": "0:20:17", "remaining_time": "0:05:32", "throughput": 4014.95, "total_tokens": 4886912} |
| {"current_steps": 7830, "total_steps": 9960, "loss": 0.0611, "lr": 1.330826852166454e-06, "epoch": 15.72289156626506, "percentage": 78.61, "elapsed_time": "0:20:17", "remaining_time": "0:05:31", "throughput": 4015.24, "total_tokens": 4890336} |
| {"current_steps": 7835, "total_steps": 9960, "loss": 0.0278, "lr": 1.3248804239336616e-06, "epoch": 15.73293172690763, "percentage": 78.66, "elapsed_time": "0:20:18", "remaining_time": "0:05:30", "throughput": 4015.45, "total_tokens": 4894144} |
| {"current_steps": 7840, "total_steps": 9960, "loss": 0.0006, "lr": 1.3189452808530866e-06, "epoch": 15.742971887550201, "percentage": 78.71, "elapsed_time": "0:20:19", "remaining_time": "0:05:29", "throughput": 4015.73, "total_tokens": 4897536} |
| {"current_steps": 7845, "total_steps": 9960, "loss": 0.0001, "lr": 1.3130214411497121e-06, "epoch": 15.75301204819277, "percentage": 78.77, "elapsed_time": "0:20:20", "remaining_time": "0:05:28", "throughput": 4015.8, "total_tokens": 4900544} |
| {"current_steps": 7850, "total_steps": 9960, "loss": 0.0001, "lr": 1.3071089230138124e-06, "epoch": 15.763052208835342, "percentage": 78.82, "elapsed_time": "0:20:21", "remaining_time": "0:05:28", "throughput": 4015.99, "total_tokens": 4903680} |
| {"current_steps": 7855, "total_steps": 9960, "loss": 0.0004, "lr": 1.3012077446008969e-06, "epoch": 15.773092369477911, "percentage": 78.87, "elapsed_time": "0:20:21", "remaining_time": "0:05:27", "throughput": 4016.03, "total_tokens": 4906528} |
| {"current_steps": 7860, "total_steps": 9960, "loss": 0.0001, "lr": 1.2953179240316533e-06, "epoch": 15.783132530120483, "percentage": 78.92, "elapsed_time": "0:20:22", "remaining_time": "0:05:26", "throughput": 4016.24, "total_tokens": 4910176} |
| {"current_steps": 7865, "total_steps": 9960, "loss": 0.0001, "lr": 1.289439479391893e-06, "epoch": 15.793172690763052, "percentage": 78.97, "elapsed_time": "0:20:23", "remaining_time": "0:05:25", "throughput": 4016.35, "total_tokens": 4913184} |
| {"current_steps": 7870, "total_steps": 9960, "loss": 0.0001, "lr": 1.2835724287325001e-06, "epoch": 15.803212851405622, "percentage": 79.02, "elapsed_time": "0:20:24", "remaining_time": "0:05:25", "throughput": 4016.49, "total_tokens": 4916320} |
| {"current_steps": 7875, "total_steps": 9960, "loss": 0.0001, "lr": 1.277716790069361e-06, "epoch": 15.813253012048193, "percentage": 79.07, "elapsed_time": "0:20:24", "remaining_time": "0:05:24", "throughput": 4016.46, "total_tokens": 4919360} |
| {"current_steps": 7880, "total_steps": 9960, "loss": 0.0001, "lr": 1.2718725813833322e-06, "epoch": 15.823293172690763, "percentage": 79.12, "elapsed_time": "0:20:25", "remaining_time": "0:05:23", "throughput": 4016.68, "total_tokens": 4922880} |
| {"current_steps": 7885, "total_steps": 9960, "loss": 0.0001, "lr": 1.266039820620159e-06, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "0:20:26", "remaining_time": "0:05:22", "throughput": 4016.78, "total_tokens": 4925408} |
| {"current_steps": 7890, "total_steps": 9960, "loss": 0.0, "lr": 1.2602185256904453e-06, "epoch": 15.843373493975903, "percentage": 79.22, "elapsed_time": "0:20:27", "remaining_time": "0:05:21", "throughput": 4017.01, "total_tokens": 4928896} |
| {"current_steps": 7895, "total_steps": 9960, "loss": 0.0, "lr": 1.2544087144695826e-06, "epoch": 15.853413654618475, "percentage": 79.27, "elapsed_time": "0:20:27", "remaining_time": "0:05:21", "throughput": 4017.1, "total_tokens": 4931872} |
| {"current_steps": 7900, "total_steps": 9960, "loss": 0.0, "lr": 1.2486104047976937e-06, "epoch": 15.863453815261044, "percentage": 79.32, "elapsed_time": "0:20:28", "remaining_time": "0:05:20", "throughput": 4017.28, "total_tokens": 4935136} |
| {"current_steps": 7905, "total_steps": 9960, "loss": 0.0, "lr": 1.2428236144795959e-06, "epoch": 15.873493975903614, "percentage": 79.37, "elapsed_time": "0:20:29", "remaining_time": "0:05:19", "throughput": 4017.5, "total_tokens": 4938176} |
| {"current_steps": 7910, "total_steps": 9960, "loss": 0.0, "lr": 1.2370483612847201e-06, "epoch": 15.883534136546185, "percentage": 79.42, "elapsed_time": "0:20:29", "remaining_time": "0:05:18", "throughput": 4017.35, "total_tokens": 4940672} |
| {"current_steps": 7915, "total_steps": 9960, "loss": 0.0, "lr": 1.2312846629470826e-06, "epoch": 15.893574297188755, "percentage": 79.47, "elapsed_time": "0:20:30", "remaining_time": "0:05:17", "throughput": 4017.58, "total_tokens": 4944192} |
| {"current_steps": 7920, "total_steps": 9960, "loss": 0.0, "lr": 1.225532537165211e-06, "epoch": 15.903614457831326, "percentage": 79.52, "elapsed_time": "0:20:31", "remaining_time": "0:05:17", "throughput": 4017.74, "total_tokens": 4947488} |
| {"current_steps": 7925, "total_steps": 9960, "loss": 0.0, "lr": 1.219792001602101e-06, "epoch": 15.913654618473895, "percentage": 79.57, "elapsed_time": "0:20:32", "remaining_time": "0:05:16", "throughput": 4017.7, "total_tokens": 4949824} |
| {"current_steps": 7930, "total_steps": 9960, "loss": 0.0001, "lr": 1.2140630738851544e-06, "epoch": 15.923694779116467, "percentage": 79.62, "elapsed_time": "0:20:32", "remaining_time": "0:05:15", "throughput": 4017.79, "total_tokens": 4952768} |
| {"current_steps": 7935, "total_steps": 9960, "loss": 0.0001, "lr": 1.2083457716061326e-06, "epoch": 15.933734939759036, "percentage": 79.67, "elapsed_time": "0:20:33", "remaining_time": "0:05:14", "throughput": 4018.12, "total_tokens": 4956544} |
| {"current_steps": 7940, "total_steps": 9960, "loss": 0.0005, "lr": 1.2026401123210968e-06, "epoch": 15.943775100401606, "percentage": 79.72, "elapsed_time": "0:20:34", "remaining_time": "0:05:14", "throughput": 4018.19, "total_tokens": 4959648} |
| {"current_steps": 7945, "total_steps": 9960, "loss": 0.0104, "lr": 1.1969461135503573e-06, "epoch": 15.953815261044177, "percentage": 79.77, "elapsed_time": "0:20:34", "remaining_time": "0:05:13", "throughput": 4018.2, "total_tokens": 4961888} |
| {"current_steps": 7950, "total_steps": 9960, "loss": 0.0, "lr": 1.1912637927784176e-06, "epoch": 15.963855421686747, "percentage": 79.82, "elapsed_time": "0:20:35", "remaining_time": "0:05:12", "throughput": 4018.4, "total_tokens": 4965216} |
| {"current_steps": 7955, "total_steps": 9960, "loss": 0.0001, "lr": 1.1855931674539222e-06, "epoch": 15.973895582329318, "percentage": 79.87, "elapsed_time": "0:20:36", "remaining_time": "0:05:11", "throughput": 4018.55, "total_tokens": 4968608} |
| {"current_steps": 7960, "total_steps": 9960, "loss": 0.0, "lr": 1.1799342549896027e-06, "epoch": 15.983935742971887, "percentage": 79.92, "elapsed_time": "0:20:37", "remaining_time": "0:05:10", "throughput": 4018.55, "total_tokens": 4971456} |
| {"current_steps": 7965, "total_steps": 9960, "loss": 0.0001, "lr": 1.174287072762224e-06, "epoch": 15.993975903614459, "percentage": 79.97, "elapsed_time": "0:20:37", "remaining_time": "0:05:10", "throughput": 4018.66, "total_tokens": 4974112} |
| {"current_steps": 7968, "total_steps": 9960, "eval_loss": 1.0401936769485474, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:20:46", "remaining_time": "0:05:11", "throughput": 3992.52, "total_tokens": 4976032} |
| {"current_steps": 7970, "total_steps": 9960, "loss": 0.0017, "lr": 1.1686516381125307e-06, "epoch": 16.004016064257026, "percentage": 80.02, "elapsed_time": "0:20:47", "remaining_time": "0:05:11", "throughput": 3988.6, "total_tokens": 4977152} |
| {"current_steps": 7975, "total_steps": 9960, "loss": 0.0001, "lr": 1.163027968345195e-06, "epoch": 16.014056224899598, "percentage": 80.07, "elapsed_time": "0:20:48", "remaining_time": "0:05:10", "throughput": 3988.78, "total_tokens": 4981088} |
| {"current_steps": 7980, "total_steps": 9960, "loss": 0.0, "lr": 1.1574160807287615e-06, "epoch": 16.02409638554217, "percentage": 80.12, "elapsed_time": "0:20:49", "remaining_time": "0:05:10", "throughput": 3988.8, "total_tokens": 4984064} |
| {"current_steps": 7985, "total_steps": 9960, "loss": 0.0001, "lr": 1.1518159924955974e-06, "epoch": 16.03413654618474, "percentage": 80.17, "elapsed_time": "0:20:50", "remaining_time": "0:05:09", "throughput": 3988.95, "total_tokens": 4987424} |
| {"current_steps": 7990, "total_steps": 9960, "loss": 0.0001, "lr": 1.1462277208418338e-06, "epoch": 16.044176706827308, "percentage": 80.22, "elapsed_time": "0:20:50", "remaining_time": "0:05:08", "throughput": 3989.03, "total_tokens": 4990112} |
| {"current_steps": 7995, "total_steps": 9960, "loss": 0.0, "lr": 1.1406512829273253e-06, "epoch": 16.05421686746988, "percentage": 80.27, "elapsed_time": "0:20:51", "remaining_time": "0:05:07", "throughput": 3989.33, "total_tokens": 4993600} |
| {"current_steps": 8000, "total_steps": 9960, "loss": 0.0, "lr": 1.1350866958755757e-06, "epoch": 16.06425702811245, "percentage": 80.32, "elapsed_time": "0:20:52", "remaining_time": "0:05:06", "throughput": 3989.44, "total_tokens": 4996512} |
| {"current_steps": 8005, "total_steps": 9960, "loss": 0.0002, "lr": 1.1295339767737125e-06, "epoch": 16.07429718875502, "percentage": 80.37, "elapsed_time": "0:20:53", "remaining_time": "0:05:06", "throughput": 3989.48, "total_tokens": 4999168} |
| {"current_steps": 8010, "total_steps": 9960, "loss": 0.0, "lr": 1.1239931426724076e-06, "epoch": 16.08433734939759, "percentage": 80.42, "elapsed_time": "0:20:53", "remaining_time": "0:05:05", "throughput": 3989.59, "total_tokens": 5002336} |
| {"current_steps": 8015, "total_steps": 9960, "loss": 0.0001, "lr": 1.1184642105858484e-06, "epoch": 16.09437751004016, "percentage": 80.47, "elapsed_time": "0:20:54", "remaining_time": "0:05:04", "throughput": 3989.79, "total_tokens": 5005536} |
| {"current_steps": 8020, "total_steps": 9960, "loss": 0.0001, "lr": 1.1129471974916696e-06, "epoch": 16.104417670682732, "percentage": 80.52, "elapsed_time": "0:20:55", "remaining_time": "0:05:03", "throughput": 3989.81, "total_tokens": 5008192} |
| {"current_steps": 8025, "total_steps": 9960, "loss": 0.0001, "lr": 1.1074421203309033e-06, "epoch": 16.1144578313253, "percentage": 80.57, "elapsed_time": "0:20:55", "remaining_time": "0:05:02", "throughput": 3989.93, "total_tokens": 5010944} |
| {"current_steps": 8030, "total_steps": 9960, "loss": 0.0, "lr": 1.1019489960079389e-06, "epoch": 16.12449799196787, "percentage": 80.62, "elapsed_time": "0:20:56", "remaining_time": "0:05:02", "throughput": 3990.05, "total_tokens": 5013888} |
| {"current_steps": 8035, "total_steps": 9960, "loss": 0.0001, "lr": 1.0964678413904529e-06, "epoch": 16.134538152610443, "percentage": 80.67, "elapsed_time": "0:20:57", "remaining_time": "0:05:01", "throughput": 3990.25, "total_tokens": 5017184} |
| {"current_steps": 8040, "total_steps": 9960, "loss": 0.0002, "lr": 1.0909986733093737e-06, "epoch": 16.14457831325301, "percentage": 80.72, "elapsed_time": "0:20:58", "remaining_time": "0:05:00", "throughput": 3990.39, "total_tokens": 5020256} |
| {"current_steps": 8045, "total_steps": 9960, "loss": 0.0, "lr": 1.0855415085588194e-06, "epoch": 16.15461847389558, "percentage": 80.77, "elapsed_time": "0:20:58", "remaining_time": "0:04:59", "throughput": 3990.49, "total_tokens": 5023040} |
| {"current_steps": 8050, "total_steps": 9960, "loss": 0.0, "lr": 1.08009636389605e-06, "epoch": 16.164658634538153, "percentage": 80.82, "elapsed_time": "0:20:59", "remaining_time": "0:04:58", "throughput": 3990.82, "total_tokens": 5026752} |
| {"current_steps": 8055, "total_steps": 9960, "loss": 0.0004, "lr": 1.0746632560414154e-06, "epoch": 16.174698795180724, "percentage": 80.87, "elapsed_time": "0:21:00", "remaining_time": "0:04:58", "throughput": 3990.79, "total_tokens": 5029536} |
| {"current_steps": 8060, "total_steps": 9960, "loss": 0.0001, "lr": 1.069242201678305e-06, "epoch": 16.184738955823292, "percentage": 80.92, "elapsed_time": "0:21:01", "remaining_time": "0:04:57", "throughput": 3991.01, "total_tokens": 5032832} |
| {"current_steps": 8065, "total_steps": 9960, "loss": 0.0, "lr": 1.0638332174530953e-06, "epoch": 16.194779116465863, "percentage": 80.97, "elapsed_time": "0:21:01", "remaining_time": "0:04:56", "throughput": 3991.17, "total_tokens": 5036416} |
| {"current_steps": 8070, "total_steps": 9960, "loss": 0.0, "lr": 1.058436319975098e-06, "epoch": 16.204819277108435, "percentage": 81.02, "elapsed_time": "0:21:02", "remaining_time": "0:04:55", "throughput": 3991.31, "total_tokens": 5039392} |
| {"current_steps": 8075, "total_steps": 9960, "loss": 0.0, "lr": 1.053051525816512e-06, "epoch": 16.214859437751002, "percentage": 81.07, "elapsed_time": "0:21:03", "remaining_time": "0:04:54", "throughput": 3991.49, "total_tokens": 5042720} |
| {"current_steps": 8080, "total_steps": 9960, "loss": 0.0, "lr": 1.0476788515123687e-06, "epoch": 16.224899598393574, "percentage": 81.12, "elapsed_time": "0:21:04", "remaining_time": "0:04:54", "throughput": 3991.58, "total_tokens": 5045760} |
| {"current_steps": 8085, "total_steps": 9960, "loss": 0.0, "lr": 1.0423183135604874e-06, "epoch": 16.234939759036145, "percentage": 81.17, "elapsed_time": "0:21:04", "remaining_time": "0:04:53", "throughput": 3991.53, "total_tokens": 5048032} |
| {"current_steps": 8090, "total_steps": 9960, "loss": 0.0001, "lr": 1.036969928421413e-06, "epoch": 16.244979919678716, "percentage": 81.22, "elapsed_time": "0:21:05", "remaining_time": "0:04:52", "throughput": 3991.68, "total_tokens": 5051040} |
| {"current_steps": 8095, "total_steps": 9960, "loss": 0.0, "lr": 1.0316337125183817e-06, "epoch": 16.255020080321284, "percentage": 81.28, "elapsed_time": "0:21:06", "remaining_time": "0:04:51", "throughput": 3991.86, "total_tokens": 5054080} |
| {"current_steps": 8100, "total_steps": 9960, "loss": 0.0, "lr": 1.0263096822372537e-06, "epoch": 16.265060240963855, "percentage": 81.33, "elapsed_time": "0:21:06", "remaining_time": "0:04:50", "throughput": 3991.9, "total_tokens": 5057088} |
| {"current_steps": 8105, "total_steps": 9960, "loss": 0.0008, "lr": 1.0209978539264747e-06, "epoch": 16.275100401606426, "percentage": 81.38, "elapsed_time": "0:21:07", "remaining_time": "0:04:50", "throughput": 3991.98, "total_tokens": 5059904} |
| {"current_steps": 8110, "total_steps": 9960, "loss": 0.0, "lr": 1.0156982438970254e-06, "epoch": 16.285140562248998, "percentage": 81.43, "elapsed_time": "0:21:08", "remaining_time": "0:04:49", "throughput": 3992.07, "total_tokens": 5062656} |
| {"current_steps": 8115, "total_steps": 9960, "loss": 0.0, "lr": 1.010410868422359e-06, "epoch": 16.295180722891565, "percentage": 81.48, "elapsed_time": "0:21:09", "remaining_time": "0:04:48", "throughput": 3992.3, "total_tokens": 5066240} |
| {"current_steps": 8120, "total_steps": 9960, "loss": 0.0018, "lr": 1.0051357437383708e-06, "epoch": 16.305220883534137, "percentage": 81.53, "elapsed_time": "0:21:09", "remaining_time": "0:04:47", "throughput": 3992.39, "total_tokens": 5069600} |
| {"current_steps": 8125, "total_steps": 9960, "loss": 0.0, "lr": 9.998728860433277e-07, "epoch": 16.315261044176708, "percentage": 81.58, "elapsed_time": "0:21:10", "remaining_time": "0:04:46", "throughput": 3992.71, "total_tokens": 5073280} |
| {"current_steps": 8130, "total_steps": 9960, "loss": 0.0, "lr": 9.94622311497836e-07, "epoch": 16.325301204819276, "percentage": 81.63, "elapsed_time": "0:21:11", "remaining_time": "0:04:46", "throughput": 3992.79, "total_tokens": 5076128} |
| {"current_steps": 8135, "total_steps": 9960, "loss": 0.0, "lr": 9.893840362247809e-07, "epoch": 16.335341365461847, "percentage": 81.68, "elapsed_time": "0:21:12", "remaining_time": "0:04:45", "throughput": 3993.03, "total_tokens": 5079776} |
| {"current_steps": 8140, "total_steps": 9960, "loss": 0.0139, "lr": 9.841580763092812e-07, "epoch": 16.34538152610442, "percentage": 81.73, "elapsed_time": "0:21:12", "remaining_time": "0:04:44", "throughput": 3993.14, "total_tokens": 5083168} |
| {"current_steps": 8145, "total_steps": 9960, "loss": 0.0023, "lr": 9.789444477986375e-07, "epoch": 16.355421686746986, "percentage": 81.78, "elapsed_time": "0:21:13", "remaining_time": "0:04:43", "throughput": 3993.23, "total_tokens": 5085792} |
| {"current_steps": 8150, "total_steps": 9960, "loss": 0.0, "lr": 9.737431667022866e-07, "epoch": 16.365461847389557, "percentage": 81.83, "elapsed_time": "0:21:14", "remaining_time": "0:04:43", "throughput": 3993.5, "total_tokens": 5089632} |
| {"current_steps": 8155, "total_steps": 9960, "loss": 0.0, "lr": 9.685542489917494e-07, "epoch": 16.37550200803213, "percentage": 81.88, "elapsed_time": "0:21:15", "remaining_time": "0:04:42", "throughput": 3993.5, "total_tokens": 5092064} |
| {"current_steps": 8160, "total_steps": 9960, "loss": 0.0003, "lr": 9.633777106005826e-07, "epoch": 16.3855421686747, "percentage": 81.93, "elapsed_time": "0:21:15", "remaining_time": "0:04:41", "throughput": 3993.79, "total_tokens": 5095488} |
| {"current_steps": 8165, "total_steps": 9960, "loss": 0.0, "lr": 9.582135674243292e-07, "epoch": 16.395582329317268, "percentage": 81.98, "elapsed_time": "0:21:16", "remaining_time": "0:04:40", "throughput": 3994.05, "total_tokens": 5098944} |
| {"current_steps": 8170, "total_steps": 9960, "loss": 0.0, "lr": 9.530618353204718e-07, "epoch": 16.40562248995984, "percentage": 82.03, "elapsed_time": "0:21:17", "remaining_time": "0:04:39", "throughput": 3994.07, "total_tokens": 5101600} |
| {"current_steps": 8175, "total_steps": 9960, "loss": 0.0001, "lr": 9.479225301083811e-07, "epoch": 16.41566265060241, "percentage": 82.08, "elapsed_time": "0:21:17", "remaining_time": "0:04:39", "throughput": 3994.09, "total_tokens": 5103904} |
| {"current_steps": 8180, "total_steps": 9960, "loss": 0.0, "lr": 9.427956675692695e-07, "epoch": 16.42570281124498, "percentage": 82.13, "elapsed_time": "0:21:18", "remaining_time": "0:04:38", "throughput": 3994.32, "total_tokens": 5107616} |
| {"current_steps": 8185, "total_steps": 9960, "loss": 0.0, "lr": 9.376812634461418e-07, "epoch": 16.43574297188755, "percentage": 82.18, "elapsed_time": "0:21:19", "remaining_time": "0:04:37", "throughput": 3994.28, "total_tokens": 5110400} |
| {"current_steps": 8190, "total_steps": 9960, "loss": 0.0033, "lr": 9.32579333443746e-07, "epoch": 16.44578313253012, "percentage": 82.23, "elapsed_time": "0:21:20", "remaining_time": "0:04:36", "throughput": 3994.48, "total_tokens": 5113504} |
| {"current_steps": 8195, "total_steps": 9960, "loss": 0.0, "lr": 9.27489893228527e-07, "epoch": 16.455823293172692, "percentage": 82.28, "elapsed_time": "0:21:20", "remaining_time": "0:04:35", "throughput": 3994.62, "total_tokens": 5116768} |
| {"current_steps": 8200, "total_steps": 9960, "loss": 0.0013, "lr": 9.224129584285768e-07, "epoch": 16.46586345381526, "percentage": 82.33, "elapsed_time": "0:21:21", "remaining_time": "0:04:35", "throughput": 3994.9, "total_tokens": 5120224} |
| {"current_steps": 8205, "total_steps": 9960, "loss": 0.0001, "lr": 9.173485446335862e-07, "epoch": 16.47590361445783, "percentage": 82.38, "elapsed_time": "0:21:22", "remaining_time": "0:04:34", "throughput": 3995.13, "total_tokens": 5123584} |
| {"current_steps": 8210, "total_steps": 9960, "loss": 0.0, "lr": 9.122966673948025e-07, "epoch": 16.485943775100402, "percentage": 82.43, "elapsed_time": "0:21:23", "remaining_time": "0:04:33", "throughput": 3995.36, "total_tokens": 5126752} |
| {"current_steps": 8215, "total_steps": 9960, "loss": 0.0, "lr": 9.072573422249692e-07, "epoch": 16.495983935742974, "percentage": 82.48, "elapsed_time": "0:21:23", "remaining_time": "0:04:32", "throughput": 3995.44, "total_tokens": 5129312} |
| {"current_steps": 8220, "total_steps": 9960, "loss": 0.0584, "lr": 9.022305845982948e-07, "epoch": 16.50602409638554, "percentage": 82.53, "elapsed_time": "0:21:24", "remaining_time": "0:04:31", "throughput": 3995.51, "total_tokens": 5132192} |
| {"current_steps": 8225, "total_steps": 9960, "loss": 0.0002, "lr": 8.972164099503899e-07, "epoch": 16.516064257028113, "percentage": 82.58, "elapsed_time": "0:21:25", "remaining_time": "0:04:31", "throughput": 3995.73, "total_tokens": 5135520} |
| {"current_steps": 8230, "total_steps": 9960, "loss": 0.0001, "lr": 8.922148336782288e-07, "epoch": 16.526104417670684, "percentage": 82.63, "elapsed_time": "0:21:25", "remaining_time": "0:04:30", "throughput": 3995.96, "total_tokens": 5138432} |
| {"current_steps": 8235, "total_steps": 9960, "loss": 0.004, "lr": 8.87225871140105e-07, "epoch": 16.53614457831325, "percentage": 82.68, "elapsed_time": "0:21:26", "remaining_time": "0:04:29", "throughput": 3996.19, "total_tokens": 5141952} |
| {"current_steps": 8240, "total_steps": 9960, "loss": 0.0, "lr": 8.822495376555695e-07, "epoch": 16.546184738955823, "percentage": 82.73, "elapsed_time": "0:21:27", "remaining_time": "0:04:28", "throughput": 3996.4, "total_tokens": 5145344} |
| {"current_steps": 8245, "total_steps": 9960, "loss": 0.0003, "lr": 8.772858485054042e-07, "epoch": 16.556224899598394, "percentage": 82.78, "elapsed_time": "0:21:28", "remaining_time": "0:04:27", "throughput": 3996.5, "total_tokens": 5148096} |
| {"current_steps": 8250, "total_steps": 9960, "loss": 0.0003, "lr": 8.723348189315534e-07, "epoch": 16.566265060240966, "percentage": 82.83, "elapsed_time": "0:21:28", "remaining_time": "0:04:27", "throughput": 3996.49, "total_tokens": 5150784} |
| {"current_steps": 8255, "total_steps": 9960, "loss": 0.0, "lr": 8.673964641370974e-07, "epoch": 16.576305220883533, "percentage": 82.88, "elapsed_time": "0:21:29", "remaining_time": "0:04:26", "throughput": 3996.5, "total_tokens": 5153056} |
| {"current_steps": 8260, "total_steps": 9960, "loss": 0.0, "lr": 8.624707992861897e-07, "epoch": 16.586345381526105, "percentage": 82.93, "elapsed_time": "0:21:30", "remaining_time": "0:04:25", "throughput": 3996.73, "total_tokens": 5156448} |
| {"current_steps": 8265, "total_steps": 9960, "loss": 0.0, "lr": 8.575578395040202e-07, "epoch": 16.596385542168676, "percentage": 82.98, "elapsed_time": "0:21:31", "remaining_time": "0:04:24", "throughput": 3997.03, "total_tokens": 5160672} |
| {"current_steps": 8270, "total_steps": 9960, "loss": 0.0, "lr": 8.526575998767638e-07, "epoch": 16.606425702811244, "percentage": 83.03, "elapsed_time": "0:21:31", "remaining_time": "0:04:23", "throughput": 3997.19, "total_tokens": 5163840} |
| {"current_steps": 8275, "total_steps": 9960, "loss": 0.0, "lr": 8.477700954515372e-07, "epoch": 16.616465863453815, "percentage": 83.08, "elapsed_time": "0:21:32", "remaining_time": "0:04:23", "throughput": 3997.43, "total_tokens": 5167552} |
| {"current_steps": 8280, "total_steps": 9960, "loss": 0.0, "lr": 8.428953412363495e-07, "epoch": 16.626506024096386, "percentage": 83.13, "elapsed_time": "0:21:33", "remaining_time": "0:04:22", "throughput": 3997.49, "total_tokens": 5170496} |
| {"current_steps": 8285, "total_steps": 9960, "loss": 0.0, "lr": 8.380333522000588e-07, "epoch": 16.636546184738958, "percentage": 83.18, "elapsed_time": "0:21:34", "remaining_time": "0:04:21", "throughput": 3997.63, "total_tokens": 5173504} |
| {"current_steps": 8290, "total_steps": 9960, "loss": 0.0, "lr": 8.331841432723253e-07, "epoch": 16.646586345381525, "percentage": 83.23, "elapsed_time": "0:21:34", "remaining_time": "0:04:20", "throughput": 3997.73, "total_tokens": 5176640} |
| {"current_steps": 8295, "total_steps": 9960, "loss": 0.0, "lr": 8.28347729343566e-07, "epoch": 16.656626506024097, "percentage": 83.28, "elapsed_time": "0:21:35", "remaining_time": "0:04:20", "throughput": 3997.94, "total_tokens": 5180096} |
| {"current_steps": 8300, "total_steps": 9960, "loss": 0.0001, "lr": 8.235241252649073e-07, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:21:36", "remaining_time": "0:04:19", "throughput": 3998.2, "total_tokens": 5183616} |
| {"current_steps": 8305, "total_steps": 9960, "loss": 0.0765, "lr": 8.187133458481416e-07, "epoch": 16.676706827309236, "percentage": 83.38, "elapsed_time": "0:21:37", "remaining_time": "0:04:18", "throughput": 3998.35, "total_tokens": 5186720} |
| {"current_steps": 8310, "total_steps": 9960, "loss": 0.0, "lr": 8.139154058656801e-07, "epoch": 16.686746987951807, "percentage": 83.43, "elapsed_time": "0:21:38", "remaining_time": "0:04:17", "throughput": 3998.6, "total_tokens": 5190560} |
| {"current_steps": 8315, "total_steps": 9960, "loss": 0.0, "lr": 8.091303200505074e-07, "epoch": 16.696787148594378, "percentage": 83.48, "elapsed_time": "0:21:38", "remaining_time": "0:04:16", "throughput": 3998.83, "total_tokens": 5194304} |
| {"current_steps": 8320, "total_steps": 9960, "loss": 0.0, "lr": 8.043581030961372e-07, "epoch": 16.70682730923695, "percentage": 83.53, "elapsed_time": "0:21:39", "remaining_time": "0:04:16", "throughput": 3999.11, "total_tokens": 5197792} |
| {"current_steps": 8325, "total_steps": 9960, "loss": 0.0155, "lr": 7.99598769656571e-07, "epoch": 16.716867469879517, "percentage": 83.58, "elapsed_time": "0:21:40", "remaining_time": "0:04:15", "throughput": 3999.26, "total_tokens": 5201280} |
| {"current_steps": 8330, "total_steps": 9960, "loss": 0.0, "lr": 7.948523343462411e-07, "epoch": 16.72690763052209, "percentage": 83.63, "elapsed_time": "0:21:41", "remaining_time": "0:04:14", "throughput": 3999.47, "total_tokens": 5204704} |
| {"current_steps": 8335, "total_steps": 9960, "loss": 0.0, "lr": 7.901188117399817e-07, "epoch": 16.73694779116466, "percentage": 83.68, "elapsed_time": "0:21:42", "remaining_time": "0:04:13", "throughput": 3999.75, "total_tokens": 5208320} |
| {"current_steps": 8340, "total_steps": 9960, "loss": 0.0, "lr": 7.853982163729684e-07, "epoch": 16.746987951807228, "percentage": 83.73, "elapsed_time": "0:21:42", "remaining_time": "0:04:13", "throughput": 3999.86, "total_tokens": 5211136} |
| {"current_steps": 8345, "total_steps": 9960, "loss": 0.0, "lr": 7.806905627406891e-07, "epoch": 16.7570281124498, "percentage": 83.79, "elapsed_time": "0:21:43", "remaining_time": "0:04:12", "throughput": 4000.06, "total_tokens": 5214528} |
| {"current_steps": 8350, "total_steps": 9960, "loss": 0.0001, "lr": 7.759958652988858e-07, "epoch": 16.76706827309237, "percentage": 83.84, "elapsed_time": "0:21:44", "remaining_time": "0:04:11", "throughput": 4000.14, "total_tokens": 5218048} |
| {"current_steps": 8355, "total_steps": 9960, "loss": 0.0, "lr": 7.713141384635186e-07, "epoch": 16.77710843373494, "percentage": 83.89, "elapsed_time": "0:21:45", "remaining_time": "0:04:10", "throughput": 4000.32, "total_tokens": 5221248} |
| {"current_steps": 8360, "total_steps": 9960, "loss": 0.0, "lr": 7.666453966107201e-07, "epoch": 16.78714859437751, "percentage": 83.94, "elapsed_time": "0:21:45", "remaining_time": "0:04:09", "throughput": 4000.33, "total_tokens": 5223776} |
| {"current_steps": 8365, "total_steps": 9960, "loss": 0.0, "lr": 7.619896540767435e-07, "epoch": 16.79718875502008, "percentage": 83.99, "elapsed_time": "0:21:46", "remaining_time": "0:04:09", "throughput": 4000.31, "total_tokens": 5226176} |
| {"current_steps": 8370, "total_steps": 9960, "loss": 0.0, "lr": 7.573469251579346e-07, "epoch": 16.80722891566265, "percentage": 84.04, "elapsed_time": "0:21:47", "remaining_time": "0:04:08", "throughput": 4000.48, "total_tokens": 5229312} |
| {"current_steps": 8375, "total_steps": 9960, "loss": 0.0067, "lr": 7.527172241106718e-07, "epoch": 16.81726907630522, "percentage": 84.09, "elapsed_time": "0:21:47", "remaining_time": "0:04:07", "throughput": 4000.48, "total_tokens": 5231744} |
| {"current_steps": 8380, "total_steps": 9960, "loss": 0.0, "lr": 7.481005651513312e-07, "epoch": 16.82730923694779, "percentage": 84.14, "elapsed_time": "0:21:48", "remaining_time": "0:04:06", "throughput": 4000.56, "total_tokens": 5234464} |
| {"current_steps": 8385, "total_steps": 9960, "loss": 0.0, "lr": 7.434969624562405e-07, "epoch": 16.837349397590362, "percentage": 84.19, "elapsed_time": "0:21:49", "remaining_time": "0:04:05", "throughput": 4000.92, "total_tokens": 5238368} |
| {"current_steps": 8390, "total_steps": 9960, "loss": 0.0, "lr": 7.389064301616355e-07, "epoch": 16.847389558232933, "percentage": 84.24, "elapsed_time": "0:21:50", "remaining_time": "0:04:05", "throughput": 4001.11, "total_tokens": 5241792} |
| {"current_steps": 8395, "total_steps": 9960, "loss": 0.0, "lr": 7.343289823636168e-07, "epoch": 16.8574297188755, "percentage": 84.29, "elapsed_time": "0:21:50", "remaining_time": "0:04:04", "throughput": 4001.23, "total_tokens": 5244960} |
| {"current_steps": 8400, "total_steps": 9960, "loss": 0.0001, "lr": 7.297646331181069e-07, "epoch": 16.867469879518072, "percentage": 84.34, "elapsed_time": "0:21:51", "remaining_time": "0:04:03", "throughput": 4001.18, "total_tokens": 5247520} |
| {"current_steps": 8405, "total_steps": 9960, "loss": 0.0, "lr": 7.252133964408065e-07, "epoch": 16.877510040160644, "percentage": 84.39, "elapsed_time": "0:21:52", "remaining_time": "0:04:02", "throughput": 4001.3, "total_tokens": 5250272} |
| {"current_steps": 8410, "total_steps": 9960, "loss": 0.0, "lr": 7.206752863071515e-07, "epoch": 16.88755020080321, "percentage": 84.44, "elapsed_time": "0:21:52", "remaining_time": "0:04:01", "throughput": 4001.32, "total_tokens": 5252864} |
| {"current_steps": 8415, "total_steps": 9960, "loss": 0.0002, "lr": 7.161503166522704e-07, "epoch": 16.897590361445783, "percentage": 84.49, "elapsed_time": "0:21:53", "remaining_time": "0:04:01", "throughput": 4001.37, "total_tokens": 5255840} |
| {"current_steps": 8420, "total_steps": 9960, "loss": 0.0001, "lr": 7.116385013709404e-07, "epoch": 16.907630522088354, "percentage": 84.54, "elapsed_time": "0:21:54", "remaining_time": "0:04:00", "throughput": 4001.33, "total_tokens": 5258400} |
| {"current_steps": 8425, "total_steps": 9960, "loss": 0.0, "lr": 7.0713985431755e-07, "epoch": 16.917670682730925, "percentage": 84.59, "elapsed_time": "0:21:54", "remaining_time": "0:03:59", "throughput": 4001.4, "total_tokens": 5261376} |
| {"current_steps": 8430, "total_steps": 9960, "loss": 0.0001, "lr": 7.026543893060456e-07, "epoch": 16.927710843373493, "percentage": 84.64, "elapsed_time": "0:21:55", "remaining_time": "0:03:58", "throughput": 4001.37, "total_tokens": 5263968} |
| {"current_steps": 8435, "total_steps": 9960, "loss": 0.0001, "lr": 6.981821201098999e-07, "epoch": 16.937751004016064, "percentage": 84.69, "elapsed_time": "0:21:56", "remaining_time": "0:03:57", "throughput": 4001.64, "total_tokens": 5267488} |
| {"current_steps": 8440, "total_steps": 9960, "loss": 0.0, "lr": 6.937230604620642e-07, "epoch": 16.947791164658636, "percentage": 84.74, "elapsed_time": "0:21:57", "remaining_time": "0:03:57", "throughput": 4001.78, "total_tokens": 5270528} |
| {"current_steps": 8445, "total_steps": 9960, "loss": 0.0, "lr": 6.892772240549267e-07, "epoch": 16.957831325301203, "percentage": 84.79, "elapsed_time": "0:21:57", "remaining_time": "0:03:56", "throughput": 4002.07, "total_tokens": 5274048} |
| {"current_steps": 8450, "total_steps": 9960, "loss": 0.0, "lr": 6.848446245402751e-07, "epoch": 16.967871485943775, "percentage": 84.84, "elapsed_time": "0:21:58", "remaining_time": "0:03:55", "throughput": 4002.07, "total_tokens": 5276320} |
| {"current_steps": 8455, "total_steps": 9960, "loss": 0.0, "lr": 6.804252755292429e-07, "epoch": 16.977911646586346, "percentage": 84.89, "elapsed_time": "0:21:58", "remaining_time": "0:03:54", "throughput": 4002.05, "total_tokens": 5278688} |
| {"current_steps": 8460, "total_steps": 9960, "loss": 0.0002, "lr": 6.760191905922847e-07, "epoch": 16.987951807228917, "percentage": 84.94, "elapsed_time": "0:21:59", "remaining_time": "0:03:53", "throughput": 4002.1, "total_tokens": 5281120} |
| {"current_steps": 8465, "total_steps": 9960, "loss": 0.0, "lr": 6.716263832591163e-07, "epoch": 16.997991967871485, "percentage": 84.99, "elapsed_time": "0:22:00", "remaining_time": "0:03:53", "throughput": 4002.22, "total_tokens": 5284064} |
| {"current_steps": 8470, "total_steps": 9960, "loss": 0.0, "lr": 6.672468670186899e-07, "epoch": 17.008032128514056, "percentage": 85.04, "elapsed_time": "0:22:01", "remaining_time": "0:03:52", "throughput": 4002.14, "total_tokens": 5287968} |
| {"current_steps": 8475, "total_steps": 9960, "loss": 0.0, "lr": 6.628806553191397e-07, "epoch": 17.018072289156628, "percentage": 85.09, "elapsed_time": "0:22:02", "remaining_time": "0:03:51", "throughput": 4002.4, "total_tokens": 5291744} |
| {"current_steps": 8480, "total_steps": 9960, "loss": 0.0, "lr": 6.585277615677472e-07, "epoch": 17.028112449799195, "percentage": 85.14, "elapsed_time": "0:22:02", "remaining_time": "0:03:50", "throughput": 4002.46, "total_tokens": 5293984} |
| {"current_steps": 8485, "total_steps": 9960, "loss": 0.0, "lr": 6.541881991309013e-07, "epoch": 17.038152610441767, "percentage": 85.19, "elapsed_time": "0:22:03", "remaining_time": "0:03:50", "throughput": 4002.45, "total_tokens": 5296704} |
| {"current_steps": 8490, "total_steps": 9960, "loss": 0.0, "lr": 6.498619813340473e-07, "epoch": 17.048192771084338, "percentage": 85.24, "elapsed_time": "0:22:04", "remaining_time": "0:03:49", "throughput": 4002.56, "total_tokens": 5299872} |
| {"current_steps": 8495, "total_steps": 9960, "loss": 0.0, "lr": 6.455491214616622e-07, "epoch": 17.05823293172691, "percentage": 85.29, "elapsed_time": "0:22:04", "remaining_time": "0:03:48", "throughput": 4002.74, "total_tokens": 5303584} |
| {"current_steps": 8500, "total_steps": 9960, "loss": 0.0, "lr": 6.412496327571999e-07, "epoch": 17.068273092369477, "percentage": 85.34, "elapsed_time": "0:22:05", "remaining_time": "0:03:47", "throughput": 4003.04, "total_tokens": 5307488} |
| {"current_steps": 8505, "total_steps": 9960, "loss": 0.0, "lr": 6.369635284230563e-07, "epoch": 17.07831325301205, "percentage": 85.39, "elapsed_time": "0:22:06", "remaining_time": "0:03:46", "throughput": 4003.26, "total_tokens": 5311328} |
| {"current_steps": 8510, "total_steps": 9960, "loss": 0.0, "lr": 6.32690821620528e-07, "epoch": 17.08835341365462, "percentage": 85.44, "elapsed_time": "0:22:07", "remaining_time": "0:03:46", "throughput": 4003.39, "total_tokens": 5314720} |
| {"current_steps": 8515, "total_steps": 9960, "loss": 0.0, "lr": 6.284315254697726e-07, "epoch": 17.098393574297187, "percentage": 85.49, "elapsed_time": "0:22:08", "remaining_time": "0:03:45", "throughput": 4003.74, "total_tokens": 5318752} |
| {"current_steps": 8520, "total_steps": 9960, "loss": 0.0, "lr": 6.241856530497669e-07, "epoch": 17.10843373493976, "percentage": 85.54, "elapsed_time": "0:22:09", "remaining_time": "0:03:44", "throughput": 4003.96, "total_tokens": 5321952} |
| {"current_steps": 8525, "total_steps": 9960, "loss": 0.0, "lr": 6.199532173982692e-07, "epoch": 17.11847389558233, "percentage": 85.59, "elapsed_time": "0:22:09", "remaining_time": "0:03:43", "throughput": 4004.08, "total_tokens": 5325056} |
| {"current_steps": 8530, "total_steps": 9960, "loss": 0.0, "lr": 6.157342315117754e-07, "epoch": 17.1285140562249, "percentage": 85.64, "elapsed_time": "0:22:10", "remaining_time": "0:03:43", "throughput": 4004.2, "total_tokens": 5327936} |
| {"current_steps": 8535, "total_steps": 9960, "loss": 0.0, "lr": 6.115287083454823e-07, "epoch": 17.13855421686747, "percentage": 85.69, "elapsed_time": "0:22:11", "remaining_time": "0:03:42", "throughput": 4004.54, "total_tokens": 5331968} |
| {"current_steps": 8540, "total_steps": 9960, "loss": 0.0, "lr": 6.073366608132481e-07, "epoch": 17.14859437751004, "percentage": 85.74, "elapsed_time": "0:22:12", "remaining_time": "0:03:41", "throughput": 4004.45, "total_tokens": 5334144} |
| {"current_steps": 8545, "total_steps": 9960, "loss": 0.0, "lr": 6.031581017875482e-07, "epoch": 17.15863453815261, "percentage": 85.79, "elapsed_time": "0:22:12", "remaining_time": "0:03:40", "throughput": 4004.6, "total_tokens": 5336928} |
| {"current_steps": 8550, "total_steps": 9960, "loss": 0.0, "lr": 5.989930440994451e-07, "epoch": 17.16867469879518, "percentage": 85.84, "elapsed_time": "0:22:13", "remaining_time": "0:03:39", "throughput": 4004.64, "total_tokens": 5339904} |
| {"current_steps": 8555, "total_steps": 9960, "loss": 0.0, "lr": 5.948415005385344e-07, "epoch": 17.17871485943775, "percentage": 85.89, "elapsed_time": "0:22:14", "remaining_time": "0:03:39", "throughput": 4004.87, "total_tokens": 5343552} |
| {"current_steps": 8560, "total_steps": 9960, "loss": 0.0001, "lr": 5.907034838529224e-07, "epoch": 17.188755020080322, "percentage": 85.94, "elapsed_time": "0:22:15", "remaining_time": "0:03:38", "throughput": 4005.0, "total_tokens": 5346752} |
| {"current_steps": 8565, "total_steps": 9960, "loss": 0.0, "lr": 5.865790067491739e-07, "epoch": 17.198795180722893, "percentage": 85.99, "elapsed_time": "0:22:15", "remaining_time": "0:03:37", "throughput": 4005.17, "total_tokens": 5349952} |
| {"current_steps": 8570, "total_steps": 9960, "loss": 0.0002, "lr": 5.824680818922762e-07, "epoch": 17.20883534136546, "percentage": 86.04, "elapsed_time": "0:22:16", "remaining_time": "0:03:36", "throughput": 4005.18, "total_tokens": 5352448} |
| {"current_steps": 8575, "total_steps": 9960, "loss": 0.0, "lr": 5.783707219056078e-07, "epoch": 17.218875502008032, "percentage": 86.09, "elapsed_time": "0:22:17", "remaining_time": "0:03:35", "throughput": 4005.43, "total_tokens": 5356032} |
| {"current_steps": 8580, "total_steps": 9960, "loss": 0.0, "lr": 5.742869393708872e-07, "epoch": 17.228915662650603, "percentage": 86.14, "elapsed_time": "0:22:17", "remaining_time": "0:03:35", "throughput": 4005.44, "total_tokens": 5358368} |
| {"current_steps": 8585, "total_steps": 9960, "loss": 0.0001, "lr": 5.702167468281461e-07, "epoch": 17.23895582329317, "percentage": 86.19, "elapsed_time": "0:22:18", "remaining_time": "0:03:34", "throughput": 4005.48, "total_tokens": 5361216} |
| {"current_steps": 8590, "total_steps": 9960, "loss": 0.0, "lr": 5.661601567756819e-07, "epoch": 17.248995983935743, "percentage": 86.24, "elapsed_time": "0:22:19", "remaining_time": "0:03:33", "throughput": 4005.53, "total_tokens": 5364128} |
| {"current_steps": 8595, "total_steps": 9960, "loss": 0.0, "lr": 5.621171816700249e-07, "epoch": 17.259036144578314, "percentage": 86.3, "elapsed_time": "0:22:19", "remaining_time": "0:03:32", "throughput": 4005.58, "total_tokens": 5367200} |
| {"current_steps": 8600, "total_steps": 9960, "loss": 0.0001, "lr": 5.580878339258978e-07, "epoch": 17.269076305220885, "percentage": 86.35, "elapsed_time": "0:22:20", "remaining_time": "0:03:32", "throughput": 4005.72, "total_tokens": 5370144} |
| {"current_steps": 8605, "total_steps": 9960, "loss": 0.0, "lr": 5.540721259161774e-07, "epoch": 17.279116465863453, "percentage": 86.4, "elapsed_time": "0:22:21", "remaining_time": "0:03:31", "throughput": 4005.77, "total_tokens": 5373024} |
| {"current_steps": 8610, "total_steps": 9960, "loss": 0.0, "lr": 5.500700699718564e-07, "epoch": 17.289156626506024, "percentage": 86.45, "elapsed_time": "0:22:22", "remaining_time": "0:03:30", "throughput": 4005.79, "total_tokens": 5375904} |
| {"current_steps": 8615, "total_steps": 9960, "loss": 0.0, "lr": 5.460816783820089e-07, "epoch": 17.299196787148595, "percentage": 86.5, "elapsed_time": "0:22:22", "remaining_time": "0:03:29", "throughput": 4005.92, "total_tokens": 5379264} |
| {"current_steps": 8620, "total_steps": 9960, "loss": 0.0, "lr": 5.42106963393747e-07, "epoch": 17.309236947791163, "percentage": 86.55, "elapsed_time": "0:22:23", "remaining_time": "0:03:28", "throughput": 4006.04, "total_tokens": 5382208} |
| {"current_steps": 8625, "total_steps": 9960, "loss": 0.0, "lr": 5.381459372121878e-07, "epoch": 17.319277108433734, "percentage": 86.6, "elapsed_time": "0:22:24", "remaining_time": "0:03:28", "throughput": 4006.22, "total_tokens": 5385568} |
| {"current_steps": 8630, "total_steps": 9960, "loss": 0.0, "lr": 5.341986120004145e-07, "epoch": 17.329317269076306, "percentage": 86.65, "elapsed_time": "0:22:25", "remaining_time": "0:03:27", "throughput": 4006.41, "total_tokens": 5389056} |
| {"current_steps": 8635, "total_steps": 9960, "loss": 0.0, "lr": 5.302649998794368e-07, "epoch": 17.339357429718877, "percentage": 86.7, "elapsed_time": "0:22:25", "remaining_time": "0:03:26", "throughput": 4006.49, "total_tokens": 5391840} |
| {"current_steps": 8640, "total_steps": 9960, "loss": 0.0, "lr": 5.263451129281605e-07, "epoch": 17.349397590361445, "percentage": 86.75, "elapsed_time": "0:22:26", "remaining_time": "0:03:25", "throughput": 4006.62, "total_tokens": 5395008} |
| {"current_steps": 8645, "total_steps": 9960, "loss": 0.0001, "lr": 5.224389631833393e-07, "epoch": 17.359437751004016, "percentage": 86.8, "elapsed_time": "0:22:27", "remaining_time": "0:03:24", "throughput": 4006.63, "total_tokens": 5397728} |
| {"current_steps": 8650, "total_steps": 9960, "loss": 0.0, "lr": 5.185465626395486e-07, "epoch": 17.369477911646587, "percentage": 86.85, "elapsed_time": "0:22:28", "remaining_time": "0:03:24", "throughput": 4006.82, "total_tokens": 5401248} |
| {"current_steps": 8655, "total_steps": 9960, "loss": 0.0, "lr": 5.146679232491436e-07, "epoch": 17.379518072289155, "percentage": 86.9, "elapsed_time": "0:22:28", "remaining_time": "0:03:23", "throughput": 4006.99, "total_tokens": 5405024} |
| {"current_steps": 8660, "total_steps": 9960, "loss": 0.0, "lr": 5.108030569222211e-07, "epoch": 17.389558232931726, "percentage": 86.95, "elapsed_time": "0:22:29", "remaining_time": "0:03:22", "throughput": 4007.1, "total_tokens": 5407968} |
| {"current_steps": 8665, "total_steps": 9960, "loss": 0.0, "lr": 5.0695197552659e-07, "epoch": 17.399598393574298, "percentage": 87.0, "elapsed_time": "0:22:30", "remaining_time": "0:03:21", "throughput": 4007.16, "total_tokens": 5410944} |
| {"current_steps": 8670, "total_steps": 9960, "loss": 0.0, "lr": 5.031146908877221e-07, "epoch": 17.40963855421687, "percentage": 87.05, "elapsed_time": "0:22:31", "remaining_time": "0:03:21", "throughput": 4007.34, "total_tokens": 5414240} |
| {"current_steps": 8675, "total_steps": 9960, "loss": 0.0, "lr": 4.99291214788733e-07, "epoch": 17.419678714859437, "percentage": 87.1, "elapsed_time": "0:22:31", "remaining_time": "0:03:20", "throughput": 4007.36, "total_tokens": 5416896} |
| {"current_steps": 8680, "total_steps": 9960, "loss": 0.0, "lr": 4.954815589703277e-07, "epoch": 17.429718875502008, "percentage": 87.15, "elapsed_time": "0:22:32", "remaining_time": "0:03:19", "throughput": 4007.52, "total_tokens": 5419744} |
| {"current_steps": 8685, "total_steps": 9960, "loss": 0.0, "lr": 4.916857351307802e-07, "epoch": 17.43975903614458, "percentage": 87.2, "elapsed_time": "0:22:33", "remaining_time": "0:03:18", "throughput": 4007.6, "total_tokens": 5422560} |
| {"current_steps": 8690, "total_steps": 9960, "loss": 0.0, "lr": 4.879037549258875e-07, "epoch": 17.449799196787147, "percentage": 87.25, "elapsed_time": "0:22:33", "remaining_time": "0:03:17", "throughput": 4007.81, "total_tokens": 5426016} |
| {"current_steps": 8695, "total_steps": 9960, "loss": 0.0, "lr": 4.841356299689359e-07, "epoch": 17.45983935742972, "percentage": 87.3, "elapsed_time": "0:22:34", "remaining_time": "0:03:17", "throughput": 4007.99, "total_tokens": 5429280} |
| {"current_steps": 8700, "total_steps": 9960, "loss": 0.0, "lr": 4.803813718306716e-07, "epoch": 17.46987951807229, "percentage": 87.35, "elapsed_time": "0:22:35", "remaining_time": "0:03:16", "throughput": 4008.11, "total_tokens": 5432576} |
| {"current_steps": 8705, "total_steps": 9960, "loss": 0.0, "lr": 4.7664099203925284e-07, "epoch": 17.47991967871486, "percentage": 87.4, "elapsed_time": "0:22:36", "remaining_time": "0:03:15", "throughput": 4008.25, "total_tokens": 5436064} |
| {"current_steps": 8710, "total_steps": 9960, "loss": 0.0, "lr": 4.7291450208022836e-07, "epoch": 17.48995983935743, "percentage": 87.45, "elapsed_time": "0:22:36", "remaining_time": "0:03:14", "throughput": 4008.39, "total_tokens": 5438880} |
| {"current_steps": 8715, "total_steps": 9960, "loss": 0.0, "lr": 4.692019133964931e-07, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:22:37", "remaining_time": "0:03:13", "throughput": 4008.46, "total_tokens": 5441696} |
| {"current_steps": 8720, "total_steps": 9960, "loss": 0.0009, "lr": 4.65503237388254e-07, "epoch": 17.51004016064257, "percentage": 87.55, "elapsed_time": "0:22:38", "remaining_time": "0:03:13", "throughput": 4008.53, "total_tokens": 5444448} |
| {"current_steps": 8725, "total_steps": 9960, "loss": 0.0, "lr": 4.618184854129981e-07, "epoch": 17.52008032128514, "percentage": 87.6, "elapsed_time": "0:22:38", "remaining_time": "0:03:12", "throughput": 4008.61, "total_tokens": 5447424} |
| {"current_steps": 8730, "total_steps": 9960, "loss": 0.0619, "lr": 4.581476687854558e-07, "epoch": 17.53012048192771, "percentage": 87.65, "elapsed_time": "0:22:39", "remaining_time": "0:03:11", "throughput": 4008.78, "total_tokens": 5450688} |
| {"current_steps": 8735, "total_steps": 9960, "loss": 0.0, "lr": 4.5449079877756653e-07, "epoch": 17.54016064257028, "percentage": 87.7, "elapsed_time": "0:22:40", "remaining_time": "0:03:10", "throughput": 4008.84, "total_tokens": 5453472} |
| {"current_steps": 8740, "total_steps": 9960, "loss": 0.0, "lr": 4.508478866184435e-07, "epoch": 17.550200803212853, "percentage": 87.75, "elapsed_time": "0:22:41", "remaining_time": "0:03:10", "throughput": 4008.96, "total_tokens": 5456800} |
| {"current_steps": 8745, "total_steps": 9960, "loss": 0.0, "lr": 4.4721894349434027e-07, "epoch": 17.56024096385542, "percentage": 87.8, "elapsed_time": "0:22:41", "remaining_time": "0:03:09", "throughput": 4009.12, "total_tokens": 5460256} |
| {"current_steps": 8750, "total_steps": 9960, "loss": 0.0, "lr": 4.4360398054861473e-07, "epoch": 17.570281124497992, "percentage": 87.85, "elapsed_time": "0:22:42", "remaining_time": "0:03:08", "throughput": 4009.33, "total_tokens": 5463712} |
| {"current_steps": 8755, "total_steps": 9960, "loss": 0.0, "lr": 4.4000300888169753e-07, "epoch": 17.580321285140563, "percentage": 87.9, "elapsed_time": "0:22:43", "remaining_time": "0:03:07", "throughput": 4009.49, "total_tokens": 5467104} |
| {"current_steps": 8760, "total_steps": 9960, "loss": 0.0, "lr": 4.364160395510547e-07, "epoch": 17.59036144578313, "percentage": 87.95, "elapsed_time": "0:22:44", "remaining_time": "0:03:06", "throughput": 4009.6, "total_tokens": 5469888} |
| {"current_steps": 8765, "total_steps": 9960, "loss": 0.0, "lr": 4.328430835711589e-07, "epoch": 17.600401606425702, "percentage": 88.0, "elapsed_time": "0:22:44", "remaining_time": "0:03:06", "throughput": 4009.78, "total_tokens": 5473216} |
| {"current_steps": 8770, "total_steps": 9960, "loss": 0.0, "lr": 4.2928415191344664e-07, "epoch": 17.610441767068274, "percentage": 88.05, "elapsed_time": "0:22:45", "remaining_time": "0:03:05", "throughput": 4010.03, "total_tokens": 5476768} |
| {"current_steps": 8775, "total_steps": 9960, "loss": 0.0002, "lr": 4.2573925550629393e-07, "epoch": 17.620481927710845, "percentage": 88.1, "elapsed_time": "0:22:46", "remaining_time": "0:03:04", "throughput": 4010.13, "total_tokens": 5479648} |
| {"current_steps": 8780, "total_steps": 9960, "loss": 0.0178, "lr": 4.2220840523497896e-07, "epoch": 17.630522088353413, "percentage": 88.15, "elapsed_time": "0:22:47", "remaining_time": "0:03:03", "throughput": 4010.34, "total_tokens": 5483360} |
| {"current_steps": 8785, "total_steps": 9960, "loss": 0.0, "lr": 4.1869161194164565e-07, "epoch": 17.640562248995984, "percentage": 88.2, "elapsed_time": "0:22:48", "remaining_time": "0:03:02", "throughput": 4010.51, "total_tokens": 5486528} |
| {"current_steps": 8790, "total_steps": 9960, "loss": 0.0, "lr": 4.15188886425279e-07, "epoch": 17.650602409638555, "percentage": 88.25, "elapsed_time": "0:22:48", "remaining_time": "0:03:02", "throughput": 4010.73, "total_tokens": 5489728} |
| {"current_steps": 8795, "total_steps": 9960, "loss": 0.0, "lr": 4.117002394416586e-07, "epoch": 17.660642570281123, "percentage": 88.3, "elapsed_time": "0:22:49", "remaining_time": "0:03:01", "throughput": 4010.79, "total_tokens": 5492320} |
| {"current_steps": 8800, "total_steps": 9960, "loss": 0.0, "lr": 4.082256817033392e-07, "epoch": 17.670682730923694, "percentage": 88.35, "elapsed_time": "0:22:50", "remaining_time": "0:03:00", "throughput": 4010.98, "total_tokens": 5495840} |
| {"current_steps": 8805, "total_steps": 9960, "loss": 0.0002, "lr": 4.047652238796096e-07, "epoch": 17.680722891566266, "percentage": 88.4, "elapsed_time": "0:22:50", "remaining_time": "0:02:59", "throughput": 4011.06, "total_tokens": 5498784} |
| {"current_steps": 8810, "total_steps": 9960, "loss": 0.0, "lr": 4.0131887659646265e-07, "epoch": 17.690763052208837, "percentage": 88.45, "elapsed_time": "0:22:51", "remaining_time": "0:02:59", "throughput": 4011.05, "total_tokens": 5501088} |
| {"current_steps": 8815, "total_steps": 9960, "loss": 0.0, "lr": 3.9788665043656083e-07, "epoch": 17.700803212851405, "percentage": 88.5, "elapsed_time": "0:22:52", "remaining_time": "0:02:58", "throughput": 4011.25, "total_tokens": 5504512} |
| {"current_steps": 8820, "total_steps": 9960, "loss": 0.0, "lr": 3.94468555939207e-07, "epoch": 17.710843373493976, "percentage": 88.55, "elapsed_time": "0:22:52", "remaining_time": "0:02:57", "throughput": 4011.39, "total_tokens": 5507616} |
| {"current_steps": 8825, "total_steps": 9960, "loss": 0.0, "lr": 3.9106460360030853e-07, "epoch": 17.720883534136547, "percentage": 88.6, "elapsed_time": "0:22:53", "remaining_time": "0:02:56", "throughput": 4011.49, "total_tokens": 5510624} |
| {"current_steps": 8830, "total_steps": 9960, "loss": 0.0, "lr": 3.8767480387234714e-07, "epoch": 17.730923694779115, "percentage": 88.65, "elapsed_time": "0:22:54", "remaining_time": "0:02:55", "throughput": 4011.62, "total_tokens": 5513952} |
| {"current_steps": 8835, "total_steps": 9960, "loss": 0.0001, "lr": 3.84299167164347e-07, "epoch": 17.740963855421686, "percentage": 88.7, "elapsed_time": "0:22:55", "remaining_time": "0:02:55", "throughput": 4011.72, "total_tokens": 5516704} |
| {"current_steps": 8840, "total_steps": 9960, "loss": 0.0, "lr": 3.809377038418405e-07, "epoch": 17.751004016064257, "percentage": 88.76, "elapsed_time": "0:22:55", "remaining_time": "0:02:54", "throughput": 4011.78, "total_tokens": 5519328} |
| {"current_steps": 8845, "total_steps": 9960, "loss": 0.0, "lr": 3.775904242268391e-07, "epoch": 17.76104417670683, "percentage": 88.81, "elapsed_time": "0:22:56", "remaining_time": "0:02:53", "throughput": 4011.87, "total_tokens": 5522688} |
| {"current_steps": 8850, "total_steps": 9960, "loss": 0.0001, "lr": 3.742573385977999e-07, "epoch": 17.771084337349397, "percentage": 88.86, "elapsed_time": "0:22:57", "remaining_time": "0:02:52", "throughput": 4011.83, "total_tokens": 5525024} |
| {"current_steps": 8855, "total_steps": 9960, "loss": 0.0, "lr": 3.7093845718959575e-07, "epoch": 17.781124497991968, "percentage": 88.91, "elapsed_time": "0:22:57", "remaining_time": "0:02:51", "throughput": 4011.87, "total_tokens": 5527808} |
| {"current_steps": 8860, "total_steps": 9960, "loss": 0.0003, "lr": 3.676337901934812e-07, "epoch": 17.79116465863454, "percentage": 88.96, "elapsed_time": "0:22:58", "remaining_time": "0:02:51", "throughput": 4011.97, "total_tokens": 5530688} |
| {"current_steps": 8865, "total_steps": 9960, "loss": 0.0, "lr": 3.6434334775706403e-07, "epoch": 17.801204819277107, "percentage": 89.01, "elapsed_time": "0:22:59", "remaining_time": "0:02:50", "throughput": 4012.02, "total_tokens": 5533696} |
| {"current_steps": 8870, "total_steps": 9960, "loss": 0.0, "lr": 3.610671399842719e-07, "epoch": 17.811244979919678, "percentage": 89.06, "elapsed_time": "0:22:59", "remaining_time": "0:02:49", "throughput": 4012.09, "total_tokens": 5536448} |
| {"current_steps": 8875, "total_steps": 9960, "loss": 0.0, "lr": 3.578051769353219e-07, "epoch": 17.82128514056225, "percentage": 89.11, "elapsed_time": "0:23:00", "remaining_time": "0:02:48", "throughput": 4012.26, "total_tokens": 5539808} |
| {"current_steps": 8880, "total_steps": 9960, "loss": 0.0, "lr": 3.5455746862669336e-07, "epoch": 17.83132530120482, "percentage": 89.16, "elapsed_time": "0:23:01", "remaining_time": "0:02:48", "throughput": 4012.4, "total_tokens": 5542848} |
| {"current_steps": 8885, "total_steps": 9960, "loss": 0.0, "lr": 3.513240250310873e-07, "epoch": 17.84136546184739, "percentage": 89.21, "elapsed_time": "0:23:02", "remaining_time": "0:02:47", "throughput": 4012.36, "total_tokens": 5545376} |
| {"current_steps": 8890, "total_steps": 9960, "loss": 0.0411, "lr": 3.4810485607740975e-07, "epoch": 17.85140562248996, "percentage": 89.26, "elapsed_time": "0:23:02", "remaining_time": "0:02:46", "throughput": 4012.55, "total_tokens": 5549088} |
| {"current_steps": 8895, "total_steps": 9960, "loss": 0.0003, "lr": 3.4489997165072785e-07, "epoch": 17.86144578313253, "percentage": 89.31, "elapsed_time": "0:23:03", "remaining_time": "0:02:45", "throughput": 4012.62, "total_tokens": 5551712} |
| {"current_steps": 8900, "total_steps": 9960, "loss": 0.0001, "lr": 3.4170938159224675e-07, "epoch": 17.8714859437751, "percentage": 89.36, "elapsed_time": "0:23:04", "remaining_time": "0:02:44", "throughput": 4012.69, "total_tokens": 5554432} |
| {"current_steps": 8905, "total_steps": 9960, "loss": 0.0, "lr": 3.385330956992816e-07, "epoch": 17.88152610441767, "percentage": 89.41, "elapsed_time": "0:23:04", "remaining_time": "0:02:44", "throughput": 4012.79, "total_tokens": 5557504} |
| {"current_steps": 8910, "total_steps": 9960, "loss": 0.0, "lr": 3.3537112372521777e-07, "epoch": 17.89156626506024, "percentage": 89.46, "elapsed_time": "0:23:05", "remaining_time": "0:02:43", "throughput": 4012.86, "total_tokens": 5560608} |
| {"current_steps": 8915, "total_steps": 9960, "loss": 0.0, "lr": 3.3222347537949395e-07, "epoch": 17.901606425702813, "percentage": 89.51, "elapsed_time": "0:23:06", "remaining_time": "0:02:42", "throughput": 4012.98, "total_tokens": 5563584} |
| {"current_steps": 8920, "total_steps": 9960, "loss": 0.0, "lr": 3.290901603275587e-07, "epoch": 17.91164658634538, "percentage": 89.56, "elapsed_time": "0:23:07", "remaining_time": "0:02:41", "throughput": 4013.11, "total_tokens": 5566592} |
| {"current_steps": 8925, "total_steps": 9960, "loss": 0.0, "lr": 3.2597118819085227e-07, "epoch": 17.92168674698795, "percentage": 89.61, "elapsed_time": "0:23:07", "remaining_time": "0:02:40", "throughput": 4013.27, "total_tokens": 5569536} |
| {"current_steps": 8930, "total_steps": 9960, "loss": 0.0001, "lr": 3.228665685467702e-07, "epoch": 17.931726907630523, "percentage": 89.66, "elapsed_time": "0:23:08", "remaining_time": "0:02:40", "throughput": 4013.28, "total_tokens": 5572448} |
| {"current_steps": 8935, "total_steps": 9960, "loss": 0.0, "lr": 3.1977631092863613e-07, "epoch": 17.94176706827309, "percentage": 89.71, "elapsed_time": "0:23:09", "remaining_time": "0:02:39", "throughput": 4013.33, "total_tokens": 5575296} |
| {"current_steps": 8940, "total_steps": 9960, "loss": 0.0, "lr": 3.167004248256733e-07, "epoch": 17.951807228915662, "percentage": 89.76, "elapsed_time": "0:23:10", "remaining_time": "0:02:38", "throughput": 4013.53, "total_tokens": 5578912} |
| {"current_steps": 8945, "total_steps": 9960, "loss": 0.0, "lr": 3.1363891968297367e-07, "epoch": 17.961847389558233, "percentage": 89.81, "elapsed_time": "0:23:10", "remaining_time": "0:02:37", "throughput": 4013.55, "total_tokens": 5581056} |
| {"current_steps": 8950, "total_steps": 9960, "loss": 0.0, "lr": 3.105918049014689e-07, "epoch": 17.971887550200805, "percentage": 89.86, "elapsed_time": "0:23:11", "remaining_time": "0:02:37", "throughput": 4013.73, "total_tokens": 5584352} |
| {"current_steps": 8955, "total_steps": 9960, "loss": 0.0, "lr": 3.075590898379044e-07, "epoch": 17.981927710843372, "percentage": 89.91, "elapsed_time": "0:23:12", "remaining_time": "0:02:36", "throughput": 4013.9, "total_tokens": 5587872} |
| {"current_steps": 8960, "total_steps": 9960, "loss": 0.0, "lr": 3.04540783804807e-07, "epoch": 17.991967871485944, "percentage": 89.96, "elapsed_time": "0:23:12", "remaining_time": "0:02:35", "throughput": 4014.19, "total_tokens": 5591680} |
| {"current_steps": 8964, "total_steps": 9960, "eval_loss": 1.093041181564331, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:23:21", "remaining_time": "0:02:35", "throughput": 3991.14, "total_tokens": 5594752} |
| {"current_steps": 8965, "total_steps": 9960, "loss": 0.0001, "lr": 3.015368960704584e-07, "epoch": 18.002008032128515, "percentage": 90.01, "elapsed_time": "0:23:23", "remaining_time": "0:02:35", "throughput": 3987.57, "total_tokens": 5595360} |
| {"current_steps": 8970, "total_steps": 9960, "loss": 0.0, "lr": 2.985474358588658e-07, "epoch": 18.012048192771083, "percentage": 90.06, "elapsed_time": "0:23:23", "remaining_time": "0:02:34", "throughput": 3987.5, "total_tokens": 5598368} |
| {"current_steps": 8975, "total_steps": 9960, "loss": 0.0, "lr": 2.9557241234973446e-07, "epoch": 18.022088353413654, "percentage": 90.11, "elapsed_time": "0:23:24", "remaining_time": "0:02:34", "throughput": 3987.62, "total_tokens": 5601664} |
| {"current_steps": 8980, "total_steps": 9960, "loss": 0.0, "lr": 2.926118346784379e-07, "epoch": 18.032128514056225, "percentage": 90.16, "elapsed_time": "0:23:25", "remaining_time": "0:02:33", "throughput": 3987.77, "total_tokens": 5604736} |
| {"current_steps": 8985, "total_steps": 9960, "loss": 0.0, "lr": 2.8966571193599304e-07, "epoch": 18.042168674698797, "percentage": 90.21, "elapsed_time": "0:23:26", "remaining_time": "0:02:32", "throughput": 3987.93, "total_tokens": 5607936} |
| {"current_steps": 8990, "total_steps": 9960, "loss": 0.0157, "lr": 2.8673405316902824e-07, "epoch": 18.052208835341364, "percentage": 90.26, "elapsed_time": "0:23:27", "remaining_time": "0:02:31", "throughput": 3988.03, "total_tokens": 5611200} |
| {"current_steps": 8995, "total_steps": 9960, "loss": 0.0, "lr": 2.8381686737975867e-07, "epoch": 18.062248995983936, "percentage": 90.31, "elapsed_time": "0:23:27", "remaining_time": "0:02:31", "throughput": 3988.13, "total_tokens": 5613856} |
| {"current_steps": 9000, "total_steps": 9960, "loss": 0.0002, "lr": 2.809141635259555e-07, "epoch": 18.072289156626507, "percentage": 90.36, "elapsed_time": "0:23:28", "remaining_time": "0:02:30", "throughput": 3988.26, "total_tokens": 5617152} |
| {"current_steps": 9005, "total_steps": 9960, "loss": 0.0, "lr": 2.780259505209249e-07, "epoch": 18.082329317269075, "percentage": 90.41, "elapsed_time": "0:23:29", "remaining_time": "0:02:29", "throughput": 3988.37, "total_tokens": 5620160} |
| {"current_steps": 9010, "total_steps": 9960, "loss": 0.0, "lr": 2.7515223723346974e-07, "epoch": 18.092369477911646, "percentage": 90.46, "elapsed_time": "0:23:29", "remaining_time": "0:02:28", "throughput": 3988.55, "total_tokens": 5623424} |
| {"current_steps": 9015, "total_steps": 9960, "loss": 0.0, "lr": 2.722930324878748e-07, "epoch": 18.102409638554217, "percentage": 90.51, "elapsed_time": "0:23:30", "remaining_time": "0:02:27", "throughput": 3988.54, "total_tokens": 5626208} |
| {"current_steps": 9020, "total_steps": 9960, "loss": 0.0, "lr": 2.694483450638685e-07, "epoch": 18.11244979919679, "percentage": 90.56, "elapsed_time": "0:23:31", "remaining_time": "0:02:27", "throughput": 3988.7, "total_tokens": 5629280} |
| {"current_steps": 9025, "total_steps": 9960, "loss": 0.0, "lr": 2.666181836966053e-07, "epoch": 18.122489959839356, "percentage": 90.61, "elapsed_time": "0:23:32", "remaining_time": "0:02:26", "throughput": 3988.75, "total_tokens": 5632256} |
| {"current_steps": 9030, "total_steps": 9960, "loss": 0.0002, "lr": 2.6380255707663285e-07, "epoch": 18.132530120481928, "percentage": 90.66, "elapsed_time": "0:23:32", "remaining_time": "0:02:25", "throughput": 3988.76, "total_tokens": 5634688} |
| {"current_steps": 9035, "total_steps": 9960, "loss": 0.0, "lr": 2.610014738498656e-07, "epoch": 18.1425702811245, "percentage": 90.71, "elapsed_time": "0:23:33", "remaining_time": "0:02:24", "throughput": 3988.88, "total_tokens": 5637984} |
| {"current_steps": 9040, "total_steps": 9960, "loss": 0.0, "lr": 2.5821494261756284e-07, "epoch": 18.152610441767067, "percentage": 90.76, "elapsed_time": "0:23:34", "remaining_time": "0:02:23", "throughput": 3989.05, "total_tokens": 5641440} |
| {"current_steps": 9045, "total_steps": 9960, "loss": 0.0529, "lr": 2.554429719362972e-07, "epoch": 18.162650602409638, "percentage": 90.81, "elapsed_time": "0:23:35", "remaining_time": "0:02:23", "throughput": 3989.28, "total_tokens": 5644960} |
| {"current_steps": 9050, "total_steps": 9960, "loss": 0.0, "lr": 2.526855703179304e-07, "epoch": 18.17269076305221, "percentage": 90.86, "elapsed_time": "0:23:35", "remaining_time": "0:02:22", "throughput": 3989.5, "total_tokens": 5648512} |
| {"current_steps": 9055, "total_steps": 9960, "loss": 0.0, "lr": 2.4994274622958726e-07, "epoch": 18.18273092369478, "percentage": 90.91, "elapsed_time": "0:23:36", "remaining_time": "0:02:21", "throughput": 3989.75, "total_tokens": 5651584} |
| {"current_steps": 9060, "total_steps": 9960, "loss": 0.0, "lr": 2.4721450809363054e-07, "epoch": 18.19277108433735, "percentage": 90.96, "elapsed_time": "0:23:37", "remaining_time": "0:02:20", "throughput": 3989.86, "total_tokens": 5654720} |
| {"current_steps": 9065, "total_steps": 9960, "loss": 0.0, "lr": 2.4450086428763345e-07, "epoch": 18.20281124497992, "percentage": 91.01, "elapsed_time": "0:23:38", "remaining_time": "0:02:20", "throughput": 3989.96, "total_tokens": 5657952} |
| {"current_steps": 9070, "total_steps": 9960, "loss": 0.0, "lr": 2.4180182314435305e-07, "epoch": 18.21285140562249, "percentage": 91.06, "elapsed_time": "0:23:38", "remaining_time": "0:02:19", "throughput": 3990.13, "total_tokens": 5661120} |
| {"current_steps": 9075, "total_steps": 9960, "loss": 0.0, "lr": 2.3911739295170875e-07, "epoch": 18.22289156626506, "percentage": 91.11, "elapsed_time": "0:23:39", "remaining_time": "0:02:18", "throughput": 3990.37, "total_tokens": 5664704} |
| {"current_steps": 9080, "total_steps": 9960, "loss": 0.0, "lr": 2.364475819527523e-07, "epoch": 18.23293172690763, "percentage": 91.16, "elapsed_time": "0:23:40", "remaining_time": "0:02:17", "throughput": 3990.52, "total_tokens": 5667744} |
| {"current_steps": 9085, "total_steps": 9960, "loss": 0.0, "lr": 2.3379239834564526e-07, "epoch": 18.2429718875502, "percentage": 91.21, "elapsed_time": "0:23:40", "remaining_time": "0:02:16", "throughput": 3990.61, "total_tokens": 5670496} |
| {"current_steps": 9090, "total_steps": 9960, "loss": 0.0, "lr": 2.3115185028363186e-07, "epoch": 18.253012048192772, "percentage": 91.27, "elapsed_time": "0:23:41", "remaining_time": "0:02:16", "throughput": 3990.78, "total_tokens": 5673632} |
| {"current_steps": 9095, "total_steps": 9960, "loss": 0.0, "lr": 2.2852594587501887e-07, "epoch": 18.26305220883534, "percentage": 91.32, "elapsed_time": "0:23:42", "remaining_time": "0:02:15", "throughput": 3990.88, "total_tokens": 5676672} |
| {"current_steps": 9100, "total_steps": 9960, "loss": 0.0, "lr": 2.259146931831413e-07, "epoch": 18.27309236947791, "percentage": 91.37, "elapsed_time": "0:23:43", "remaining_time": "0:02:14", "throughput": 3991.1, "total_tokens": 5680352} |
| {"current_steps": 9105, "total_steps": 9960, "loss": 0.0, "lr": 2.2331810022634847e-07, "epoch": 18.283132530120483, "percentage": 91.42, "elapsed_time": "0:23:43", "remaining_time": "0:02:13", "throughput": 3991.13, "total_tokens": 5683104} |
| {"current_steps": 9110, "total_steps": 9960, "loss": 0.0001, "lr": 2.2073617497797018e-07, "epoch": 18.29317269076305, "percentage": 91.47, "elapsed_time": "0:23:44", "remaining_time": "0:02:12", "throughput": 3991.27, "total_tokens": 5686688} |
| {"current_steps": 9115, "total_steps": 9960, "loss": 0.0, "lr": 2.1816892536629775e-07, "epoch": 18.303212851405622, "percentage": 91.52, "elapsed_time": "0:23:45", "remaining_time": "0:02:12", "throughput": 3991.36, "total_tokens": 5689600} |
| {"current_steps": 9120, "total_steps": 9960, "loss": 0.0, "lr": 2.1561635927456083e-07, "epoch": 18.313253012048193, "percentage": 91.57, "elapsed_time": "0:23:46", "remaining_time": "0:02:11", "throughput": 3991.53, "total_tokens": 5692768} |
| {"current_steps": 9125, "total_steps": 9960, "loss": 0.0, "lr": 2.1307848454089452e-07, "epoch": 18.323293172690764, "percentage": 91.62, "elapsed_time": "0:23:46", "remaining_time": "0:02:10", "throughput": 3991.63, "total_tokens": 5695584} |
| {"current_steps": 9130, "total_steps": 9960, "loss": 0.0, "lr": 2.1055530895832897e-07, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:23:47", "remaining_time": "0:02:09", "throughput": 3991.76, "total_tokens": 5698784} |
| {"current_steps": 9135, "total_steps": 9960, "loss": 0.0, "lr": 2.0804684027474987e-07, "epoch": 18.343373493975903, "percentage": 91.72, "elapsed_time": "0:23:48", "remaining_time": "0:02:08", "throughput": 3991.81, "total_tokens": 5701504} |
| {"current_steps": 9140, "total_steps": 9960, "loss": 0.0, "lr": 2.055530861928884e-07, "epoch": 18.353413654618475, "percentage": 91.77, "elapsed_time": "0:23:49", "remaining_time": "0:02:08", "throughput": 3992.04, "total_tokens": 5705216} |
| {"current_steps": 9145, "total_steps": 9960, "loss": 0.0001, "lr": 2.0307405437029027e-07, "epoch": 18.363453815261042, "percentage": 91.82, "elapsed_time": "0:23:49", "remaining_time": "0:02:07", "throughput": 3992.25, "total_tokens": 5708576} |
| {"current_steps": 9150, "total_steps": 9960, "loss": 0.0, "lr": 2.006097524192918e-07, "epoch": 18.373493975903614, "percentage": 91.87, "elapsed_time": "0:23:50", "remaining_time": "0:02:06", "throughput": 3992.56, "total_tokens": 5712288} |
| {"current_steps": 9155, "total_steps": 9960, "loss": 0.0, "lr": 1.9816018790700165e-07, "epoch": 18.383534136546185, "percentage": 91.92, "elapsed_time": "0:23:51", "remaining_time": "0:02:05", "throughput": 3992.74, "total_tokens": 5715648} |
| {"current_steps": 9160, "total_steps": 9960, "loss": 0.0, "lr": 1.9572536835527013e-07, "epoch": 18.393574297188756, "percentage": 91.97, "elapsed_time": "0:23:52", "remaining_time": "0:02:05", "throughput": 3992.88, "total_tokens": 5718720} |
| {"current_steps": 9165, "total_steps": 9960, "loss": 0.0, "lr": 1.933053012406749e-07, "epoch": 18.403614457831324, "percentage": 92.02, "elapsed_time": "0:23:53", "remaining_time": "0:02:04", "throughput": 3993.13, "total_tokens": 5722560} |
| {"current_steps": 9170, "total_steps": 9960, "loss": 0.0, "lr": 1.908999939944911e-07, "epoch": 18.413654618473895, "percentage": 92.07, "elapsed_time": "0:23:53", "remaining_time": "0:02:03", "throughput": 3993.18, "total_tokens": 5725408} |
| {"current_steps": 9175, "total_steps": 9960, "loss": 0.0001, "lr": 1.8850945400266994e-07, "epoch": 18.423694779116467, "percentage": 92.12, "elapsed_time": "0:23:54", "remaining_time": "0:02:02", "throughput": 3993.36, "total_tokens": 5729024} |
| {"current_steps": 9180, "total_steps": 9960, "loss": 0.0, "lr": 1.861336886058196e-07, "epoch": 18.433734939759034, "percentage": 92.17, "elapsed_time": "0:23:55", "remaining_time": "0:02:01", "throughput": 3993.46, "total_tokens": 5731584} |
| {"current_steps": 9185, "total_steps": 9960, "loss": 0.0, "lr": 1.8377270509917777e-07, "epoch": 18.443775100401606, "percentage": 92.22, "elapsed_time": "0:23:55", "remaining_time": "0:02:01", "throughput": 3993.58, "total_tokens": 5734624} |
| {"current_steps": 9190, "total_steps": 9960, "loss": 0.0001, "lr": 1.81426510732593e-07, "epoch": 18.453815261044177, "percentage": 92.27, "elapsed_time": "0:23:56", "remaining_time": "0:02:00", "throughput": 3993.73, "total_tokens": 5737920} |
| {"current_steps": 9195, "total_steps": 9960, "loss": 0.0, "lr": 1.7909511271050006e-07, "epoch": 18.46385542168675, "percentage": 92.32, "elapsed_time": "0:23:57", "remaining_time": "0:01:59", "throughput": 3993.81, "total_tokens": 5740896} |
| {"current_steps": 9200, "total_steps": 9960, "loss": 0.0, "lr": 1.7677851819189907e-07, "epoch": 18.473895582329316, "percentage": 92.37, "elapsed_time": "0:23:58", "remaining_time": "0:01:58", "throughput": 3993.94, "total_tokens": 5744000} |
| {"current_steps": 9205, "total_steps": 9960, "loss": 0.0, "lr": 1.7447673429033361e-07, "epoch": 18.483935742971887, "percentage": 92.42, "elapsed_time": "0:23:58", "remaining_time": "0:01:58", "throughput": 3993.92, "total_tokens": 5746816} |
| {"current_steps": 9210, "total_steps": 9960, "loss": 0.0, "lr": 1.7218976807386767e-07, "epoch": 18.49397590361446, "percentage": 92.47, "elapsed_time": "0:23:59", "remaining_time": "0:01:57", "throughput": 3993.97, "total_tokens": 5749696} |
| {"current_steps": 9215, "total_steps": 9960, "loss": 0.0, "lr": 1.6991762656506483e-07, "epoch": 18.50401606425703, "percentage": 92.52, "elapsed_time": "0:24:00", "remaining_time": "0:01:56", "throughput": 3994.08, "total_tokens": 5752544} |
| {"current_steps": 9220, "total_steps": 9960, "loss": 0.0, "lr": 1.6766031674096795e-07, "epoch": 18.514056224899598, "percentage": 92.57, "elapsed_time": "0:24:01", "remaining_time": "0:01:55", "throughput": 3994.38, "total_tokens": 5756672} |
| {"current_steps": 9225, "total_steps": 9960, "loss": 0.0, "lr": 1.654178455330735e-07, "epoch": 18.52409638554217, "percentage": 92.62, "elapsed_time": "0:24:01", "remaining_time": "0:01:54", "throughput": 3994.52, "total_tokens": 5759520} |
| {"current_steps": 9230, "total_steps": 9960, "loss": 0.0, "lr": 1.631902198273172e-07, "epoch": 18.53413654618474, "percentage": 92.67, "elapsed_time": "0:24:02", "remaining_time": "0:01:54", "throughput": 3994.6, "total_tokens": 5762848} |
| {"current_steps": 9235, "total_steps": 9960, "loss": 0.0, "lr": 1.6097744646404457e-07, "epoch": 18.544176706827308, "percentage": 92.72, "elapsed_time": "0:24:03", "remaining_time": "0:01:53", "throughput": 3994.82, "total_tokens": 5766496} |
| {"current_steps": 9240, "total_steps": 9960, "loss": 0.0, "lr": 1.5877953223799703e-07, "epoch": 18.55421686746988, "percentage": 92.77, "elapsed_time": "0:24:04", "remaining_time": "0:01:52", "throughput": 3995.0, "total_tokens": 5769600} |
| {"current_steps": 9245, "total_steps": 9960, "loss": 0.0, "lr": 1.565964838982881e-07, "epoch": 18.56425702811245, "percentage": 92.82, "elapsed_time": "0:24:04", "remaining_time": "0:01:51", "throughput": 3995.14, "total_tokens": 5772800} |
| {"current_steps": 9250, "total_steps": 9960, "loss": 0.0, "lr": 1.544283081483805e-07, "epoch": 18.57429718875502, "percentage": 92.87, "elapsed_time": "0:24:05", "remaining_time": "0:01:50", "throughput": 3995.31, "total_tokens": 5776416} |
| {"current_steps": 9255, "total_steps": 9960, "loss": 0.0, "lr": 1.5227501164607138e-07, "epoch": 18.58433734939759, "percentage": 92.92, "elapsed_time": "0:24:06", "remaining_time": "0:01:50", "throughput": 3995.28, "total_tokens": 5778976} |
| {"current_steps": 9260, "total_steps": 9960, "loss": 0.0, "lr": 1.501366010034644e-07, "epoch": 18.59437751004016, "percentage": 92.97, "elapsed_time": "0:24:07", "remaining_time": "0:01:49", "throughput": 3995.46, "total_tokens": 5782400} |
| {"current_steps": 9265, "total_steps": 9960, "loss": 0.0, "lr": 1.4801308278695636e-07, "epoch": 18.604417670682732, "percentage": 93.02, "elapsed_time": "0:24:07", "remaining_time": "0:01:48", "throughput": 3995.45, "total_tokens": 5784640} |
| {"current_steps": 9270, "total_steps": 9960, "loss": 0.0, "lr": 1.45904463517213e-07, "epoch": 18.6144578313253, "percentage": 93.07, "elapsed_time": "0:24:08", "remaining_time": "0:01:47", "throughput": 3995.67, "total_tokens": 5787936} |
| {"current_steps": 9275, "total_steps": 9960, "loss": 0.0, "lr": 1.4381074966914987e-07, "epoch": 18.62449799196787, "percentage": 93.12, "elapsed_time": "0:24:09", "remaining_time": "0:01:47", "throughput": 3995.85, "total_tokens": 5791584} |
| {"current_steps": 9280, "total_steps": 9960, "loss": 0.0, "lr": 1.4173194767191257e-07, "epoch": 18.634538152610443, "percentage": 93.17, "elapsed_time": "0:24:10", "remaining_time": "0:01:46", "throughput": 3995.96, "total_tokens": 5794912} |
| {"current_steps": 9285, "total_steps": 9960, "loss": 0.0, "lr": 1.396680639088571e-07, "epoch": 18.644578313253014, "percentage": 93.22, "elapsed_time": "0:24:10", "remaining_time": "0:01:45", "throughput": 3996.02, "total_tokens": 5797568} |
| {"current_steps": 9290, "total_steps": 9960, "loss": 0.0, "lr": 1.3761910471753126e-07, "epoch": 18.65461847389558, "percentage": 93.27, "elapsed_time": "0:24:11", "remaining_time": "0:01:44", "throughput": 3996.19, "total_tokens": 5801088} |
| {"current_steps": 9295, "total_steps": 9960, "loss": 0.0, "lr": 1.3558507638965158e-07, "epoch": 18.664658634538153, "percentage": 93.32, "elapsed_time": "0:24:12", "remaining_time": "0:01:43", "throughput": 3996.27, "total_tokens": 5804096} |
| {"current_steps": 9300, "total_steps": 9960, "loss": 0.0001, "lr": 1.3356598517108966e-07, "epoch": 18.674698795180724, "percentage": 93.37, "elapsed_time": "0:24:13", "remaining_time": "0:01:43", "throughput": 3996.45, "total_tokens": 5807392} |
| {"current_steps": 9305, "total_steps": 9960, "loss": 0.0, "lr": 1.3156183726184657e-07, "epoch": 18.684738955823292, "percentage": 93.42, "elapsed_time": "0:24:13", "remaining_time": "0:01:42", "throughput": 3996.63, "total_tokens": 5810848} |
| {"current_steps": 9310, "total_steps": 9960, "loss": 0.0, "lr": 1.295726388160412e-07, "epoch": 18.694779116465863, "percentage": 93.47, "elapsed_time": "0:24:14", "remaining_time": "0:01:41", "throughput": 3996.79, "total_tokens": 5814176} |
| {"current_steps": 9315, "total_steps": 9960, "loss": 0.0, "lr": 1.2759839594188307e-07, "epoch": 18.704819277108435, "percentage": 93.52, "elapsed_time": "0:24:15", "remaining_time": "0:01:40", "throughput": 3996.91, "total_tokens": 5816736} |
| {"current_steps": 9320, "total_steps": 9960, "loss": 0.0, "lr": 1.2563911470166057e-07, "epoch": 18.714859437751002, "percentage": 93.57, "elapsed_time": "0:24:15", "remaining_time": "0:01:39", "throughput": 3996.89, "total_tokens": 5819360} |
| {"current_steps": 9325, "total_steps": 9960, "loss": 0.0, "lr": 1.2369480111171784e-07, "epoch": 18.724899598393574, "percentage": 93.62, "elapsed_time": "0:24:16", "remaining_time": "0:01:39", "throughput": 3997.0, "total_tokens": 5822304} |
| {"current_steps": 9330, "total_steps": 9960, "loss": 0.0, "lr": 1.2176546114243903e-07, "epoch": 18.734939759036145, "percentage": 93.67, "elapsed_time": "0:24:17", "remaining_time": "0:01:38", "throughput": 3997.1, "total_tokens": 5824768} |
| {"current_steps": 9335, "total_steps": 9960, "loss": 0.0, "lr": 1.198511007182296e-07, "epoch": 18.744979919678716, "percentage": 93.72, "elapsed_time": "0:24:17", "remaining_time": "0:01:37", "throughput": 3997.15, "total_tokens": 5827488} |
| {"current_steps": 9340, "total_steps": 9960, "loss": 0.0, "lr": 1.1795172571749503e-07, "epoch": 18.755020080321284, "percentage": 93.78, "elapsed_time": "0:24:18", "remaining_time": "0:01:36", "throughput": 3997.21, "total_tokens": 5830496} |
| {"current_steps": 9345, "total_steps": 9960, "loss": 0.0, "lr": 1.160673419726288e-07, "epoch": 18.765060240963855, "percentage": 93.83, "elapsed_time": "0:24:19", "remaining_time": "0:01:36", "throughput": 3997.4, "total_tokens": 5833952} |
| {"current_steps": 9350, "total_steps": 9960, "loss": 0.0, "lr": 1.1419795526998679e-07, "epoch": 18.775100401606426, "percentage": 93.88, "elapsed_time": "0:24:20", "remaining_time": "0:01:35", "throughput": 3997.59, "total_tokens": 5837280} |
| {"current_steps": 9355, "total_steps": 9960, "loss": 0.0, "lr": 1.1234357134987717e-07, "epoch": 18.785140562248998, "percentage": 93.93, "elapsed_time": "0:24:20", "remaining_time": "0:01:34", "throughput": 3997.62, "total_tokens": 5839936} |
| {"current_steps": 9360, "total_steps": 9960, "loss": 0.0002, "lr": 1.1050419590653726e-07, "epoch": 18.795180722891565, "percentage": 93.98, "elapsed_time": "0:24:21", "remaining_time": "0:01:33", "throughput": 3997.79, "total_tokens": 5843584} |
| {"current_steps": 9365, "total_steps": 9960, "loss": 0.0, "lr": 1.0867983458811792e-07, "epoch": 18.805220883534137, "percentage": 94.03, "elapsed_time": "0:24:22", "remaining_time": "0:01:32", "throughput": 3997.88, "total_tokens": 5846624} |
| {"current_steps": 9370, "total_steps": 9960, "loss": 0.0, "lr": 1.0687049299666796e-07, "epoch": 18.815261044176708, "percentage": 94.08, "elapsed_time": "0:24:23", "remaining_time": "0:01:32", "throughput": 3998.09, "total_tokens": 5850112} |
| {"current_steps": 9375, "total_steps": 9960, "loss": 0.0, "lr": 1.050761766881131e-07, "epoch": 18.825301204819276, "percentage": 94.13, "elapsed_time": "0:24:24", "remaining_time": "0:01:31", "throughput": 3998.33, "total_tokens": 5853856} |
| {"current_steps": 9380, "total_steps": 9960, "loss": 0.0, "lr": 1.0329689117224262e-07, "epoch": 18.835341365461847, "percentage": 94.18, "elapsed_time": "0:24:24", "remaining_time": "0:01:30", "throughput": 3998.53, "total_tokens": 5857024} |
| {"current_steps": 9385, "total_steps": 9960, "loss": 0.0, "lr": 1.0153264191269052e-07, "epoch": 18.84538152610442, "percentage": 94.23, "elapsed_time": "0:24:25", "remaining_time": "0:01:29", "throughput": 3998.64, "total_tokens": 5860128} |
| {"current_steps": 9390, "total_steps": 9960, "loss": 0.0, "lr": 9.978343432691884e-08, "epoch": 18.855421686746986, "percentage": 94.28, "elapsed_time": "0:24:26", "remaining_time": "0:01:28", "throughput": 3998.68, "total_tokens": 5862336} |
| {"current_steps": 9395, "total_steps": 9960, "loss": 0.0, "lr": 9.804927378620155e-08, "epoch": 18.865461847389557, "percentage": 94.33, "elapsed_time": "0:24:26", "remaining_time": "0:01:28", "throughput": 3998.95, "total_tokens": 5865792} |
| {"current_steps": 9400, "total_steps": 9960, "loss": 0.0, "lr": 9.633016561560793e-08, "epoch": 18.87550200803213, "percentage": 94.38, "elapsed_time": "0:24:27", "remaining_time": "0:01:27", "throughput": 3999.23, "total_tokens": 5869280} |
| {"current_steps": 9405, "total_steps": 9960, "loss": 0.0, "lr": 9.462611509398534e-08, "epoch": 18.8855421686747, "percentage": 94.43, "elapsed_time": "0:24:28", "remaining_time": "0:01:26", "throughput": 3999.36, "total_tokens": 5872288} |
| {"current_steps": 9410, "total_steps": 9960, "loss": 0.0001, "lr": 9.293712745394479e-08, "epoch": 18.895582329317268, "percentage": 94.48, "elapsed_time": "0:24:28", "remaining_time": "0:01:25", "throughput": 3999.33, "total_tokens": 5874688} |
| {"current_steps": 9415, "total_steps": 9960, "loss": 0.0, "lr": 9.126320788184374e-08, "epoch": 18.90562248995984, "percentage": 94.53, "elapsed_time": "0:24:29", "remaining_time": "0:01:25", "throughput": 3999.47, "total_tokens": 5877824} |
| {"current_steps": 9420, "total_steps": 9960, "loss": 0.0, "lr": 8.960436151776886e-08, "epoch": 18.91566265060241, "percentage": 94.58, "elapsed_time": "0:24:30", "remaining_time": "0:01:24", "throughput": 3999.61, "total_tokens": 5881056} |
| {"current_steps": 9425, "total_steps": 9960, "loss": 0.0, "lr": 8.796059345552389e-08, "epoch": 18.92570281124498, "percentage": 94.63, "elapsed_time": "0:24:31", "remaining_time": "0:01:23", "throughput": 3999.81, "total_tokens": 5884320} |
| {"current_steps": 9430, "total_steps": 9960, "loss": 0.0, "lr": 8.633190874261011e-08, "epoch": 18.93574297188755, "percentage": 94.68, "elapsed_time": "0:24:31", "remaining_time": "0:01:22", "throughput": 3999.93, "total_tokens": 5887648} |
| {"current_steps": 9435, "total_steps": 9960, "loss": 0.0, "lr": 8.471831238021366e-08, "epoch": 18.94578313253012, "percentage": 94.73, "elapsed_time": "0:24:32", "remaining_time": "0:01:21", "throughput": 4000.09, "total_tokens": 5890976} |
| {"current_steps": 9440, "total_steps": 9960, "loss": 0.0, "lr": 8.31198093231872e-08, "epoch": 18.955823293172692, "percentage": 94.78, "elapsed_time": "0:24:33", "remaining_time": "0:01:21", "throughput": 4000.12, "total_tokens": 5893344} |
| {"current_steps": 9445, "total_steps": 9960, "loss": 0.0, "lr": 8.153640448003875e-08, "epoch": 18.96586345381526, "percentage": 94.83, "elapsed_time": "0:24:33", "remaining_time": "0:01:20", "throughput": 4000.16, "total_tokens": 5895808} |
| {"current_steps": 9450, "total_steps": 9960, "loss": 0.0, "lr": 7.996810271291344e-08, "epoch": 18.97590361445783, "percentage": 94.88, "elapsed_time": "0:24:34", "remaining_time": "0:01:19", "throughput": 4000.23, "total_tokens": 5899200} |
| {"current_steps": 9455, "total_steps": 9960, "loss": 0.0, "lr": 7.841490883757907e-08, "epoch": 18.985943775100402, "percentage": 94.93, "elapsed_time": "0:24:35", "remaining_time": "0:01:18", "throughput": 4000.37, "total_tokens": 5902336} |
| {"current_steps": 9460, "total_steps": 9960, "loss": 0.0, "lr": 7.687682762341276e-08, "epoch": 18.99598393574297, "percentage": 94.98, "elapsed_time": "0:24:36", "remaining_time": "0:01:18", "throughput": 4000.51, "total_tokens": 5905248} |
| {"current_steps": 9465, "total_steps": 9960, "loss": 0.0, "lr": 7.535386379338371e-08, "epoch": 19.00602409638554, "percentage": 95.03, "elapsed_time": "0:24:37", "remaining_time": "0:01:17", "throughput": 4000.41, "total_tokens": 5908704} |
| {"current_steps": 9470, "total_steps": 9960, "loss": 0.0, "lr": 7.384602202404335e-08, "epoch": 19.016064257028113, "percentage": 95.08, "elapsed_time": "0:24:37", "remaining_time": "0:01:16", "throughput": 4000.65, "total_tokens": 5912832} |
| {"current_steps": 9475, "total_steps": 9960, "loss": 0.0, "lr": 7.235330694550402e-08, "epoch": 19.026104417670684, "percentage": 95.13, "elapsed_time": "0:24:38", "remaining_time": "0:01:15", "throughput": 4000.94, "total_tokens": 5917056} |
| {"current_steps": 9480, "total_steps": 9960, "loss": 0.0, "lr": 7.087572314143198e-08, "epoch": 19.03614457831325, "percentage": 95.18, "elapsed_time": "0:24:39", "remaining_time": "0:01:14", "throughput": 4001.1, "total_tokens": 5920192} |
| {"current_steps": 9485, "total_steps": 9960, "loss": 0.0059, "lr": 6.94132751490284e-08, "epoch": 19.046184738955823, "percentage": 95.23, "elapsed_time": "0:24:40", "remaining_time": "0:01:14", "throughput": 4001.11, "total_tokens": 5922368} |
| {"current_steps": 9490, "total_steps": 9960, "loss": 0.0, "lr": 6.796596745901717e-08, "epoch": 19.056224899598394, "percentage": 95.28, "elapsed_time": "0:24:40", "remaining_time": "0:01:13", "throughput": 4001.16, "total_tokens": 5925056} |
| {"current_steps": 9495, "total_steps": 9960, "loss": 0.0, "lr": 6.653380451563219e-08, "epoch": 19.066265060240966, "percentage": 95.33, "elapsed_time": "0:24:41", "remaining_time": "0:01:12", "throughput": 4001.22, "total_tokens": 5928256} |
| {"current_steps": 9500, "total_steps": 9960, "loss": 0.0, "lr": 6.511679071659949e-08, "epoch": 19.076305220883533, "percentage": 95.38, "elapsed_time": "0:24:42", "remaining_time": "0:01:11", "throughput": 4001.34, "total_tokens": 5931392} |
| {"current_steps": 9505, "total_steps": 9960, "loss": 0.0, "lr": 6.371493041313126e-08, "epoch": 19.086345381526105, "percentage": 95.43, "elapsed_time": "0:24:43", "remaining_time": "0:01:10", "throughput": 4001.49, "total_tokens": 5934464} |
| {"current_steps": 9510, "total_steps": 9960, "loss": 0.0, "lr": 6.232822790990467e-08, "epoch": 19.096385542168676, "percentage": 95.48, "elapsed_time": "0:24:43", "remaining_time": "0:01:10", "throughput": 4001.63, "total_tokens": 5937568} |
| {"current_steps": 9515, "total_steps": 9960, "loss": 0.0, "lr": 6.095668746505245e-08, "epoch": 19.106425702811244, "percentage": 95.53, "elapsed_time": "0:24:44", "remaining_time": "0:01:09", "throughput": 4001.8, "total_tokens": 5940768} |
| {"current_steps": 9520, "total_steps": 9960, "loss": 0.0001, "lr": 5.96003132901507e-08, "epoch": 19.116465863453815, "percentage": 95.58, "elapsed_time": "0:24:45", "remaining_time": "0:01:08", "throughput": 4001.91, "total_tokens": 5944032} |
| {"current_steps": 9525, "total_steps": 9960, "loss": 0.0001, "lr": 5.825910955020386e-08, "epoch": 19.126506024096386, "percentage": 95.63, "elapsed_time": "0:24:46", "remaining_time": "0:01:07", "throughput": 4002.05, "total_tokens": 5947168} |
| {"current_steps": 9530, "total_steps": 9960, "loss": 0.0, "lr": 5.693308036363143e-08, "epoch": 19.136546184738958, "percentage": 95.68, "elapsed_time": "0:24:46", "remaining_time": "0:01:07", "throughput": 4002.2, "total_tokens": 5950592} |
| {"current_steps": 9535, "total_steps": 9960, "loss": 0.0, "lr": 5.562222980225907e-08, "epoch": 19.146586345381525, "percentage": 95.73, "elapsed_time": "0:24:47", "remaining_time": "0:01:06", "throughput": 4002.26, "total_tokens": 5952960} |
| {"current_steps": 9540, "total_steps": 9960, "loss": 0.0, "lr": 5.432656189130137e-08, "epoch": 19.156626506024097, "percentage": 95.78, "elapsed_time": "0:24:48", "remaining_time": "0:01:05", "throughput": 4002.45, "total_tokens": 5956288} |
| {"current_steps": 9545, "total_steps": 9960, "loss": 0.0, "lr": 5.3046080609352455e-08, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:24:48", "remaining_time": "0:01:04", "throughput": 4002.47, "total_tokens": 5958752} |
| {"current_steps": 9550, "total_steps": 9960, "loss": 0.0, "lr": 5.178078988837432e-08, "epoch": 19.176706827309236, "percentage": 95.88, "elapsed_time": "0:24:49", "remaining_time": "0:01:03", "throughput": 4002.57, "total_tokens": 5962144} |
| {"current_steps": 9555, "total_steps": 9960, "loss": 0.0, "lr": 5.053069361368068e-08, "epoch": 19.186746987951807, "percentage": 95.93, "elapsed_time": "0:24:50", "remaining_time": "0:01:03", "throughput": 4002.78, "total_tokens": 5965280} |
| {"current_steps": 9560, "total_steps": 9960, "loss": 0.0, "lr": 4.9295795623930945e-08, "epoch": 19.196787148594378, "percentage": 95.98, "elapsed_time": "0:24:50", "remaining_time": "0:01:02", "throughput": 4002.86, "total_tokens": 5968192} |
| {"current_steps": 9565, "total_steps": 9960, "loss": 0.0, "lr": 4.807609971111238e-08, "epoch": 19.20682730923695, "percentage": 96.03, "elapsed_time": "0:24:51", "remaining_time": "0:01:01", "throughput": 4002.97, "total_tokens": 5971264} |
| {"current_steps": 9570, "total_steps": 9960, "loss": 0.0, "lr": 4.68716096205335e-08, "epoch": 19.216867469879517, "percentage": 96.08, "elapsed_time": "0:24:52", "remaining_time": "0:01:00", "throughput": 4002.96, "total_tokens": 5973344} |
| {"current_steps": 9575, "total_steps": 9960, "loss": 0.0, "lr": 4.5682329050810715e-08, "epoch": 19.22690763052209, "percentage": 96.13, "elapsed_time": "0:24:53", "remaining_time": "0:01:00", "throughput": 4003.19, "total_tokens": 5977248} |
| {"current_steps": 9580, "total_steps": 9960, "loss": 0.0, "lr": 4.450826165385336e-08, "epoch": 19.23694779116466, "percentage": 96.18, "elapsed_time": "0:24:53", "remaining_time": "0:00:59", "throughput": 4003.37, "total_tokens": 5980704} |
| {"current_steps": 9585, "total_steps": 9960, "loss": 0.0, "lr": 4.33494110348609e-08, "epoch": 19.246987951807228, "percentage": 96.23, "elapsed_time": "0:24:54", "remaining_time": "0:00:58", "throughput": 4003.53, "total_tokens": 5983936} |
| {"current_steps": 9590, "total_steps": 9960, "loss": 0.0, "lr": 4.2205780752301865e-08, "epoch": 19.2570281124498, "percentage": 96.29, "elapsed_time": "0:24:55", "remaining_time": "0:00:57", "throughput": 4003.7, "total_tokens": 5987424} |
| {"current_steps": 9595, "total_steps": 9960, "loss": 0.0, "lr": 4.107737431791159e-08, "epoch": 19.26706827309237, "percentage": 96.34, "elapsed_time": "0:24:56", "remaining_time": "0:00:56", "throughput": 4003.68, "total_tokens": 5990112} |
| {"current_steps": 9600, "total_steps": 9960, "loss": 0.0, "lr": 3.996419519667505e-08, "epoch": 19.27710843373494, "percentage": 96.39, "elapsed_time": "0:24:56", "remaining_time": "0:00:56", "throughput": 4003.81, "total_tokens": 5993472} |
| {"current_steps": 9605, "total_steps": 9960, "loss": 0.0, "lr": 3.8866246806821273e-08, "epoch": 19.28714859437751, "percentage": 96.44, "elapsed_time": "0:24:57", "remaining_time": "0:00:55", "throughput": 4003.84, "total_tokens": 5996320} |
| {"current_steps": 9610, "total_steps": 9960, "loss": 0.0, "lr": 3.7783532519808376e-08, "epoch": 19.29718875502008, "percentage": 96.49, "elapsed_time": "0:24:58", "remaining_time": "0:00:54", "throughput": 4003.92, "total_tokens": 5999360} |
| {"current_steps": 9615, "total_steps": 9960, "loss": 0.0, "lr": 3.671605566031633e-08, "epoch": 19.30722891566265, "percentage": 96.54, "elapsed_time": "0:24:59", "remaining_time": "0:00:53", "throughput": 4003.93, "total_tokens": 6002016} |
| {"current_steps": 9620, "total_steps": 9960, "loss": 0.0, "lr": 3.566381950623588e-08, "epoch": 19.31726907630522, "percentage": 96.59, "elapsed_time": "0:24:59", "remaining_time": "0:00:53", "throughput": 4003.95, "total_tokens": 6004448} |
| {"current_steps": 9625, "total_steps": 9960, "loss": 0.0, "lr": 3.462682728865685e-08, "epoch": 19.32730923694779, "percentage": 96.64, "elapsed_time": "0:25:00", "remaining_time": "0:00:52", "throughput": 4004.09, "total_tokens": 6007392} |
| {"current_steps": 9630, "total_steps": 9960, "loss": 0.0, "lr": 3.3605082191860985e-08, "epoch": 19.337349397590362, "percentage": 96.69, "elapsed_time": "0:25:00", "remaining_time": "0:00:51", "throughput": 4004.16, "total_tokens": 6010176} |
| {"current_steps": 9635, "total_steps": 9960, "loss": 0.0, "lr": 3.259858735331134e-08, "epoch": 19.347389558232933, "percentage": 96.74, "elapsed_time": "0:25:01", "remaining_time": "0:00:50", "throughput": 4004.33, "total_tokens": 6013120} |
| {"current_steps": 9640, "total_steps": 9960, "loss": 0.0, "lr": 3.1607345863640114e-08, "epoch": 19.3574297188755, "percentage": 96.79, "elapsed_time": "0:25:02", "remaining_time": "0:00:49", "throughput": 4004.35, "total_tokens": 6015296} |
| {"current_steps": 9645, "total_steps": 9960, "loss": 0.0001, "lr": 3.063136076664364e-08, "epoch": 19.367469879518072, "percentage": 96.84, "elapsed_time": "0:25:02", "remaining_time": "0:00:49", "throughput": 4004.39, "total_tokens": 6018016} |
| {"current_steps": 9650, "total_steps": 9960, "loss": 0.0474, "lr": 2.967063505926848e-08, "epoch": 19.377510040160644, "percentage": 96.89, "elapsed_time": "0:25:03", "remaining_time": "0:00:48", "throughput": 4004.49, "total_tokens": 6021408} |
| {"current_steps": 9655, "total_steps": 9960, "loss": 0.0, "lr": 2.8725171691605934e-08, "epoch": 19.38755020080321, "percentage": 96.94, "elapsed_time": "0:25:04", "remaining_time": "0:00:47", "throughput": 4004.63, "total_tokens": 6024544} |
| {"current_steps": 9660, "total_steps": 9960, "loss": 0.0, "lr": 2.7794973566880323e-08, "epoch": 19.397590361445783, "percentage": 96.99, "elapsed_time": "0:25:05", "remaining_time": "0:00:46", "throughput": 4004.83, "total_tokens": 6027872} |
| {"current_steps": 9665, "total_steps": 9960, "loss": 0.0, "lr": 2.6880043541441804e-08, "epoch": 19.407630522088354, "percentage": 97.04, "elapsed_time": "0:25:05", "remaining_time": "0:00:45", "throughput": 4004.92, "total_tokens": 6030784} |
| {"current_steps": 9670, "total_steps": 9960, "loss": 0.0, "lr": 2.5980384424756366e-08, "epoch": 19.417670682730925, "percentage": 97.09, "elapsed_time": "0:25:06", "remaining_time": "0:00:45", "throughput": 4005.08, "total_tokens": 6034208} |
| {"current_steps": 9675, "total_steps": 9960, "loss": 0.0001, "lr": 2.5095998979398628e-08, "epoch": 19.427710843373493, "percentage": 97.14, "elapsed_time": "0:25:07", "remaining_time": "0:00:44", "throughput": 4005.25, "total_tokens": 6037632} |
| {"current_steps": 9680, "total_steps": 9960, "loss": 0.0, "lr": 2.4226889921041273e-08, "epoch": 19.437751004016064, "percentage": 97.19, "elapsed_time": "0:25:08", "remaining_time": "0:00:43", "throughput": 4005.45, "total_tokens": 6040928} |
| {"current_steps": 9685, "total_steps": 9960, "loss": 0.0, "lr": 2.3373059918448958e-08, "epoch": 19.447791164658636, "percentage": 97.24, "elapsed_time": "0:25:08", "remaining_time": "0:00:42", "throughput": 4005.52, "total_tokens": 6044096} |
| {"current_steps": 9690, "total_steps": 9960, "loss": 0.0, "lr": 2.2534511593468866e-08, "epoch": 19.457831325301203, "percentage": 97.29, "elapsed_time": "0:25:09", "remaining_time": "0:00:42", "throughput": 4005.71, "total_tokens": 6047456} |
| {"current_steps": 9695, "total_steps": 9960, "loss": 0.0, "lr": 2.171124752102238e-08, "epoch": 19.467871485943775, "percentage": 97.34, "elapsed_time": "0:25:10", "remaining_time": "0:00:41", "throughput": 4005.72, "total_tokens": 6049856} |
| {"current_steps": 9700, "total_steps": 9960, "loss": 0.0, "lr": 2.0903270229098992e-08, "epoch": 19.477911646586346, "percentage": 97.39, "elapsed_time": "0:25:10", "remaining_time": "0:00:40", "throughput": 4005.83, "total_tokens": 6052704} |
| {"current_steps": 9705, "total_steps": 9960, "loss": 0.0, "lr": 2.0110582198745177e-08, "epoch": 19.487951807228917, "percentage": 97.44, "elapsed_time": "0:25:11", "remaining_time": "0:00:39", "throughput": 4005.95, "total_tokens": 6055456} |
| {"current_steps": 9710, "total_steps": 9960, "loss": 0.0, "lr": 1.9333185864061077e-08, "epoch": 19.497991967871485, "percentage": 97.49, "elapsed_time": "0:25:12", "remaining_time": "0:00:38", "throughput": 4006.01, "total_tokens": 6058304} |
| {"current_steps": 9715, "total_steps": 9960, "loss": 0.0, "lr": 1.8571083612188845e-08, "epoch": 19.508032128514056, "percentage": 97.54, "elapsed_time": "0:25:13", "remaining_time": "0:00:38", "throughput": 4006.09, "total_tokens": 6061440} |
| {"current_steps": 9720, "total_steps": 9960, "loss": 0.0, "lr": 1.7824277783308197e-08, "epoch": 19.518072289156628, "percentage": 97.59, "elapsed_time": "0:25:13", "remaining_time": "0:00:37", "throughput": 4006.27, "total_tokens": 6065024} |
| {"current_steps": 9725, "total_steps": 9960, "loss": 0.0, "lr": 1.7092770670628644e-08, "epoch": 19.528112449799195, "percentage": 97.64, "elapsed_time": "0:25:14", "remaining_time": "0:00:36", "throughput": 4006.44, "total_tokens": 6068256} |
| {"current_steps": 9730, "total_steps": 9960, "loss": 0.0, "lr": 1.637656452038172e-08, "epoch": 19.538152610441767, "percentage": 97.69, "elapsed_time": "0:25:15", "remaining_time": "0:00:35", "throughput": 4006.6, "total_tokens": 6071200} |
| {"current_steps": 9735, "total_steps": 9960, "loss": 0.0, "lr": 1.5675661531813215e-08, "epoch": 19.548192771084338, "percentage": 97.74, "elapsed_time": "0:25:16", "remaining_time": "0:00:35", "throughput": 4006.7, "total_tokens": 6074656} |
| {"current_steps": 9740, "total_steps": 9960, "loss": 0.0, "lr": 1.4990063857180383e-08, "epoch": 19.55823293172691, "percentage": 97.79, "elapsed_time": "0:25:16", "remaining_time": "0:00:34", "throughput": 4006.74, "total_tokens": 6077408} |
| {"current_steps": 9745, "total_steps": 9960, "loss": 0.0, "lr": 1.431977360173975e-08, "epoch": 19.568273092369477, "percentage": 97.84, "elapsed_time": "0:25:17", "remaining_time": "0:00:33", "throughput": 4006.82, "total_tokens": 6080352} |
| {"current_steps": 9750, "total_steps": 9960, "loss": 0.0, "lr": 1.3664792823745442e-08, "epoch": 19.57831325301205, "percentage": 97.89, "elapsed_time": "0:25:18", "remaining_time": "0:00:32", "throughput": 4006.86, "total_tokens": 6082848} |
| {"current_steps": 9755, "total_steps": 9960, "loss": 0.0, "lr": 1.3025123534440299e-08, "epoch": 19.58835341365462, "percentage": 97.94, "elapsed_time": "0:25:18", "remaining_time": "0:00:31", "throughput": 4006.99, "total_tokens": 6085664} |
| {"current_steps": 9760, "total_steps": 9960, "loss": 0.0, "lr": 1.240076769804921e-08, "epoch": 19.598393574297187, "percentage": 97.99, "elapsed_time": "0:25:19", "remaining_time": "0:00:31", "throughput": 4007.02, "total_tokens": 6088608} |
| {"current_steps": 9765, "total_steps": 9960, "loss": 0.0, "lr": 1.1791727231776906e-08, "epoch": 19.60843373493976, "percentage": 98.04, "elapsed_time": "0:25:20", "remaining_time": "0:00:30", "throughput": 4007.05, "total_tokens": 6091296} |
| {"current_steps": 9770, "total_steps": 9960, "loss": 0.0, "lr": 1.1198004005796847e-08, "epoch": 19.61847389558233, "percentage": 98.09, "elapsed_time": "0:25:20", "remaining_time": "0:00:29", "throughput": 4007.18, "total_tokens": 6094880} |
| {"current_steps": 9775, "total_steps": 9960, "loss": 0.0, "lr": 1.0619599843249006e-08, "epoch": 19.6285140562249, "percentage": 98.14, "elapsed_time": "0:25:21", "remaining_time": "0:00:28", "throughput": 4007.33, "total_tokens": 6098208} |
| {"current_steps": 9780, "total_steps": 9960, "loss": 0.0, "lr": 1.0056516520232651e-08, "epoch": 19.63855421686747, "percentage": 98.19, "elapsed_time": "0:25:22", "remaining_time": "0:00:28", "throughput": 4007.36, "total_tokens": 6101024} |
| {"current_steps": 9785, "total_steps": 9960, "loss": 0.0, "lr": 9.508755765802457e-09, "epoch": 19.64859437751004, "percentage": 98.24, "elapsed_time": "0:25:23", "remaining_time": "0:00:27", "throughput": 4007.44, "total_tokens": 6103904} |
| {"current_steps": 9790, "total_steps": 9960, "loss": 0.0, "lr": 8.976319261962407e-09, "epoch": 19.65863453815261, "percentage": 98.29, "elapsed_time": "0:25:23", "remaining_time": "0:00:26", "throughput": 4007.57, "total_tokens": 6106816} |
| {"current_steps": 9795, "total_steps": 9960, "loss": 0.0, "lr": 8.459208643659122e-09, "epoch": 19.66867469879518, "percentage": 98.34, "elapsed_time": "0:25:24", "remaining_time": "0:00:25", "throughput": 4007.72, "total_tokens": 6110368} |
| {"current_steps": 9800, "total_steps": 9960, "loss": 0.0, "lr": 7.957425498778537e-09, "epoch": 19.67871485943775, "percentage": 98.39, "elapsed_time": "0:25:25", "remaining_time": "0:00:24", "throughput": 4007.88, "total_tokens": 6113856} |
| {"current_steps": 9805, "total_steps": 9960, "loss": 0.0, "lr": 7.470971368142011e-09, "epoch": 19.688755020080322, "percentage": 98.44, "elapsed_time": "0:25:26", "remaining_time": "0:00:24", "throughput": 4007.94, "total_tokens": 6116448} |
| {"current_steps": 9810, "total_steps": 9960, "loss": 0.0, "lr": 6.999847745498556e-09, "epoch": 19.698795180722893, "percentage": 98.49, "elapsed_time": "0:25:26", "remaining_time": "0:00:23", "throughput": 4008.19, "total_tokens": 6120096} |
| {"current_steps": 9815, "total_steps": 9960, "loss": 0.0, "lr": 6.544056077523175e-09, "epoch": 19.70883534136546, "percentage": 98.54, "elapsed_time": "0:25:27", "remaining_time": "0:00:22", "throughput": 4008.25, "total_tokens": 6123008} |
| {"current_steps": 9820, "total_steps": 9960, "loss": 0.0, "lr": 6.1035977638101985e-09, "epoch": 19.718875502008032, "percentage": 98.59, "elapsed_time": "0:25:28", "remaining_time": "0:00:21", "throughput": 4008.44, "total_tokens": 6126720} |
| {"current_steps": 9825, "total_steps": 9960, "loss": 0.0, "lr": 5.678474156871061e-09, "epoch": 19.728915662650603, "percentage": 98.64, "elapsed_time": "0:25:29", "remaining_time": "0:00:21", "throughput": 4008.51, "total_tokens": 6129760} |
| {"current_steps": 9830, "total_steps": 9960, "loss": 0.0, "lr": 5.268686562127645e-09, "epoch": 19.73895582329317, "percentage": 98.69, "elapsed_time": "0:25:30", "remaining_time": "0:00:20", "throughput": 4008.67, "total_tokens": 6133344} |
| {"current_steps": 9835, "total_steps": 9960, "loss": 0.0, "lr": 4.874236237911723e-09, "epoch": 19.748995983935743, "percentage": 98.74, "elapsed_time": "0:25:30", "remaining_time": "0:00:19", "throughput": 4008.84, "total_tokens": 6136576} |
| {"current_steps": 9840, "total_steps": 9960, "loss": 0.0, "lr": 4.495124395456629e-09, "epoch": 19.759036144578314, "percentage": 98.8, "elapsed_time": "0:25:31", "remaining_time": "0:00:18", "throughput": 4008.86, "total_tokens": 6139136} |
| {"current_steps": 9845, "total_steps": 9960, "loss": 0.0, "lr": 4.1313521988983754e-09, "epoch": 19.769076305220885, "percentage": 98.85, "elapsed_time": "0:25:32", "remaining_time": "0:00:17", "throughput": 4008.98, "total_tokens": 6142240} |
| {"current_steps": 9850, "total_steps": 9960, "loss": 0.0, "lr": 3.7829207652673175e-09, "epoch": 19.779116465863453, "percentage": 98.9, "elapsed_time": "0:25:32", "remaining_time": "0:00:17", "throughput": 4009.04, "total_tokens": 6145088} |
| {"current_steps": 9855, "total_steps": 9960, "loss": 0.0, "lr": 3.44983116448927e-09, "epoch": 19.789156626506024, "percentage": 98.95, "elapsed_time": "0:25:33", "remaining_time": "0:00:16", "throughput": 4009.11, "total_tokens": 6147968} |
| {"current_steps": 9860, "total_steps": 9960, "loss": 0.0, "lr": 3.1320844193788445e-09, "epoch": 19.799196787148595, "percentage": 99.0, "elapsed_time": "0:25:34", "remaining_time": "0:00:15", "throughput": 4009.28, "total_tokens": 6151296} |
| {"current_steps": 9865, "total_steps": 9960, "loss": 0.0, "lr": 2.8296815056377824e-09, "epoch": 19.809236947791163, "percentage": 99.05, "elapsed_time": "0:25:35", "remaining_time": "0:00:14", "throughput": 4009.54, "total_tokens": 6154880} |
| {"current_steps": 9870, "total_steps": 9960, "loss": 0.0, "lr": 2.54262335185107e-09, "epoch": 19.819277108433734, "percentage": 99.1, "elapsed_time": "0:25:35", "remaining_time": "0:00:14", "throughput": 4009.68, "total_tokens": 6158528} |
| {"current_steps": 9875, "total_steps": 9960, "loss": 0.0, "lr": 2.2709108394863845e-09, "epoch": 19.829317269076306, "percentage": 99.15, "elapsed_time": "0:25:36", "remaining_time": "0:00:13", "throughput": 4009.81, "total_tokens": 6161600} |
| {"current_steps": 9880, "total_steps": 9960, "loss": 0.0002, "lr": 2.0145448028874305e-09, "epoch": 19.839357429718877, "percentage": 99.2, "elapsed_time": "0:25:37", "remaining_time": "0:00:12", "throughput": 4009.85, "total_tokens": 6164288} |
| {"current_steps": 9885, "total_steps": 9960, "loss": 0.0, "lr": 1.7735260292750522e-09, "epoch": 19.849397590361445, "percentage": 99.25, "elapsed_time": "0:25:38", "remaining_time": "0:00:11", "throughput": 4010.07, "total_tokens": 6167904} |
| {"current_steps": 9890, "total_steps": 9960, "loss": 0.0, "lr": 1.547855258743347e-09, "epoch": 19.859437751004016, "percentage": 99.3, "elapsed_time": "0:25:38", "remaining_time": "0:00:10", "throughput": 4010.23, "total_tokens": 6171360} |
| {"current_steps": 9895, "total_steps": 9960, "loss": 0.0, "lr": 1.3375331842574446e-09, "epoch": 19.869477911646587, "percentage": 99.35, "elapsed_time": "0:25:39", "remaining_time": "0:00:10", "throughput": 4010.4, "total_tokens": 6174848} |
| {"current_steps": 9900, "total_steps": 9960, "loss": 0.0, "lr": 1.1425604516512868e-09, "epoch": 19.879518072289155, "percentage": 99.4, "elapsed_time": "0:25:40", "remaining_time": "0:00:09", "throughput": 4010.34, "total_tokens": 6177472} |
| {"current_steps": 9905, "total_steps": 9960, "loss": 0.0, "lr": 9.629376596248518e-10, "epoch": 19.889558232931726, "percentage": 99.45, "elapsed_time": "0:25:41", "remaining_time": "0:00:08", "throughput": 4010.51, "total_tokens": 6180576} |
| {"current_steps": 9910, "total_steps": 9960, "loss": 0.0, "lr": 7.986653597447102e-10, "epoch": 19.899598393574298, "percentage": 99.5, "elapsed_time": "0:25:41", "remaining_time": "0:00:07", "throughput": 4010.6, "total_tokens": 6183520} |
| {"current_steps": 9915, "total_steps": 9960, "loss": 0.0, "lr": 6.497440564395829e-10, "epoch": 19.90963855421687, "percentage": 99.55, "elapsed_time": "0:25:42", "remaining_time": "0:00:07", "throughput": 4010.98, "total_tokens": 6188000} |
| {"current_steps": 9920, "total_steps": 9960, "loss": 0.0, "lr": 5.161742070014519e-10, "epoch": 19.919678714859437, "percentage": 99.6, "elapsed_time": "0:25:43", "remaining_time": "0:00:06", "throughput": 4011.0, "total_tokens": 6190560} |
| {"current_steps": 9925, "total_steps": 9960, "loss": 0.0, "lr": 3.9795622158111945e-10, "epoch": 19.929718875502008, "percentage": 99.65, "elapsed_time": "0:25:44", "remaining_time": "0:00:05", "throughput": 4011.2, "total_tokens": 6194080} |
| {"current_steps": 9930, "total_steps": 9960, "loss": 0.0, "lr": 2.950904631893181e-10, "epoch": 19.93975903614458, "percentage": 99.7, "elapsed_time": "0:25:45", "remaining_time": "0:00:04", "throughput": 4011.29, "total_tokens": 6197728} |
| {"current_steps": 9935, "total_steps": 9960, "loss": 0.0, "lr": 2.0757724769560062e-10, "epoch": 19.949799196787147, "percentage": 99.75, "elapsed_time": "0:25:45", "remaining_time": "0:00:03", "throughput": 4011.37, "total_tokens": 6201088} |
| {"current_steps": 9940, "total_steps": 9960, "loss": 0.0, "lr": 1.354168438255643e-10, "epoch": 19.95983935742972, "percentage": 99.8, "elapsed_time": "0:25:46", "remaining_time": "0:00:03", "throughput": 4011.54, "total_tokens": 6204672} |
| {"current_steps": 9945, "total_steps": 9960, "loss": 0.0, "lr": 7.860947316140621e-11, "epoch": 19.96987951807229, "percentage": 99.85, "elapsed_time": "0:25:47", "remaining_time": "0:00:02", "throughput": 4011.62, "total_tokens": 6207360} |
| {"current_steps": 9950, "total_steps": 9960, "loss": 0.0, "lr": 3.715531014025775e-11, "epoch": 19.97991967871486, "percentage": 99.9, "elapsed_time": "0:25:48", "remaining_time": "0:00:01", "throughput": 4011.78, "total_tokens": 6210368} |
| {"current_steps": 9955, "total_steps": 9960, "loss": 0.0, "lr": 1.1054482056405136e-11, "epoch": 19.98995983935743, "percentage": 99.95, "elapsed_time": "0:25:48", "remaining_time": "0:00:00", "throughput": 4011.77, "total_tokens": 6212800} |
| {"current_steps": 9960, "total_steps": 9960, "loss": 0.0001, "lr": 3.0706905573829603e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:49", "remaining_time": "0:00:00", "throughput": 4011.79, "total_tokens": 6215968} |
| {"current_steps": 9960, "total_steps": 9960, "eval_loss": 1.102670431137085, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:57", "remaining_time": "0:00:00", "throughput": 3990.99, "total_tokens": 6215968} |
| {"current_steps": 9960, "total_steps": 9960, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:58", "remaining_time": "0:00:00", "throughput": 3987.92, "total_tokens": 6215968} |
|
|