{"current_steps": 5, "total_steps": 2490, "loss": 11.8262, "lr": 8.032128514056225e-07, "epoch": 0.020080321285140562, "percentage": 0.2, "elapsed_time": "0:00:01", "remaining_time": "0:08:43", "throughput": 789.2, "total_tokens": 832} {"current_steps": 10, "total_steps": 2490, "loss": 10.3109, "lr": 1.8072289156626506e-06, "epoch": 0.040160642570281124, "percentage": 0.4, "elapsed_time": "0:00:01", "remaining_time": "0:06:08", "throughput": 1183.79, "total_tokens": 1760} {"current_steps": 15, "total_steps": 2490, "loss": 8.5244, "lr": 2.811244979919679e-06, "epoch": 0.060240963855421686, "percentage": 0.6, "elapsed_time": "0:00:01", "remaining_time": "0:05:15", "throughput": 1365.1, "total_tokens": 2608} {"current_steps": 20, "total_steps": 2490, "loss": 6.312, "lr": 3.8152610441767074e-06, "epoch": 0.08032128514056225, "percentage": 0.8, "elapsed_time": "0:00:02", "remaining_time": "0:04:48", "throughput": 1511.55, "total_tokens": 3536} {"current_steps": 25, "total_steps": 2490, "loss": 4.854, "lr": 4.819277108433735e-06, "epoch": 0.10040160642570281, "percentage": 1.0, "elapsed_time": "0:00:02", "remaining_time": "0:04:32", "throughput": 1624.42, "total_tokens": 4496} {"current_steps": 30, "total_steps": 2490, "loss": 2.9588, "lr": 5.823293172690764e-06, "epoch": 0.12048192771084337, "percentage": 1.2, "elapsed_time": "0:00:03", "remaining_time": "0:04:21", "throughput": 1700.62, "total_tokens": 5424} {"current_steps": 35, "total_steps": 2490, "loss": 1.8179, "lr": 6.827309236947792e-06, "epoch": 0.14056224899598393, "percentage": 1.41, "elapsed_time": "0:00:03", "remaining_time": "0:04:13", "throughput": 1745.95, "total_tokens": 6304} {"current_steps": 40, "total_steps": 2490, "loss": 0.9304, "lr": 7.83132530120482e-06, "epoch": 0.1606425702811245, "percentage": 1.61, "elapsed_time": "0:00:04", "remaining_time": "0:04:07", "throughput": 1753.14, "total_tokens": 7072} {"current_steps": 45, "total_steps": 2490, "loss": 0.7447, "lr": 8.835341365461847e-06, "epoch": 0.18072289156626506, "percentage": 1.81, "elapsed_time": "0:00:04", "remaining_time": "0:04:02", "throughput": 1762.93, "total_tokens": 7856} {"current_steps": 50, "total_steps": 2490, "loss": 0.4663, "lr": 9.839357429718876e-06, "epoch": 0.20080321285140562, "percentage": 2.01, "elapsed_time": "0:00:04", "remaining_time": "0:03:58", "throughput": 1818.7, "total_tokens": 8880} {"current_steps": 55, "total_steps": 2490, "loss": 0.419, "lr": 1.0843373493975904e-05, "epoch": 0.22088353413654618, "percentage": 2.21, "elapsed_time": "0:00:05", "remaining_time": "0:03:55", "throughput": 1823.0, "total_tokens": 9680} {"current_steps": 60, "total_steps": 2490, "loss": 0.3886, "lr": 1.1847389558232933e-05, "epoch": 0.24096385542168675, "percentage": 2.41, "elapsed_time": "0:00:05", "remaining_time": "0:03:52", "throughput": 1845.66, "total_tokens": 10576} {"current_steps": 65, "total_steps": 2490, "loss": 0.3719, "lr": 1.285140562248996e-05, "epoch": 0.26104417670682734, "percentage": 2.61, "elapsed_time": "0:00:06", "remaining_time": "0:03:49", "throughput": 1857.6, "total_tokens": 11424} {"current_steps": 70, "total_steps": 2490, "loss": 0.4068, "lr": 1.3855421686746989e-05, "epoch": 0.28112449799196787, "percentage": 2.81, "elapsed_time": "0:00:06", "remaining_time": "0:03:47", "throughput": 1860.55, "total_tokens": 12224} {"current_steps": 75, "total_steps": 2490, "loss": 0.4348, "lr": 1.4859437751004016e-05, "epoch": 0.30120481927710846, "percentage": 3.01, "elapsed_time": "0:00:06", "remaining_time": "0:03:45", "throughput": 1883.43, "total_tokens": 13168} {"current_steps": 80, "total_steps": 2490, "loss": 0.4399, "lr": 1.5863453815261046e-05, "epoch": 0.321285140562249, "percentage": 3.21, "elapsed_time": "0:00:07", "remaining_time": "0:03:43", "throughput": 1899.69, "total_tokens": 14080} {"current_steps": 85, "total_steps": 2490, "loss": 0.873, "lr": 1.6867469879518073e-05, "epoch": 0.3413654618473896, "percentage": 3.41, "elapsed_time": "0:00:07", "remaining_time": "0:03:41", "throughput": 1922.7, "total_tokens": 15056} {"current_steps": 90, "total_steps": 2490, "loss": 0.5697, "lr": 1.78714859437751e-05, "epoch": 0.3614457831325301, "percentage": 3.61, "elapsed_time": "0:00:08", "remaining_time": "0:03:39", "throughput": 1927.88, "total_tokens": 15904} {"current_steps": 95, "total_steps": 2490, "loss": 0.318, "lr": 1.8875502008032127e-05, "epoch": 0.3815261044176707, "percentage": 3.82, "elapsed_time": "0:00:08", "remaining_time": "0:03:38", "throughput": 1922.77, "total_tokens": 16688} {"current_steps": 100, "total_steps": 2490, "loss": 0.0212, "lr": 1.9879518072289157e-05, "epoch": 0.40160642570281124, "percentage": 4.02, "elapsed_time": "0:00:09", "remaining_time": "0:03:37", "throughput": 1928.11, "total_tokens": 17552} {"current_steps": 105, "total_steps": 2490, "loss": 1.5137, "lr": 2.0883534136546184e-05, "epoch": 0.42168674698795183, "percentage": 4.22, "elapsed_time": "0:00:09", "remaining_time": "0:03:36", "throughput": 1930.59, "total_tokens": 18400} {"current_steps": 110, "total_steps": 2490, "loss": 0.9558, "lr": 2.1887550200803214e-05, "epoch": 0.44176706827309237, "percentage": 4.42, "elapsed_time": "0:00:09", "remaining_time": "0:03:35", "throughput": 1952.14, "total_tokens": 19456} {"current_steps": 115, "total_steps": 2490, "loss": 0.2867, "lr": 2.289156626506024e-05, "epoch": 0.46184738955823296, "percentage": 4.62, "elapsed_time": "0:00:10", "remaining_time": "0:03:34", "throughput": 1952.14, "total_tokens": 20288} {"current_steps": 120, "total_steps": 2490, "loss": 0.7216, "lr": 2.389558232931727e-05, "epoch": 0.4819277108433735, "percentage": 4.82, "elapsed_time": "0:00:10", "remaining_time": "0:03:33", "throughput": 1969.65, "total_tokens": 21328} {"current_steps": 125, "total_steps": 2490, "loss": 0.5349, "lr": 2.48995983935743e-05, "epoch": 0.5020080321285141, "percentage": 5.02, "elapsed_time": "0:00:11", "remaining_time": "0:03:33", "throughput": 1980.5, "total_tokens": 22304} {"current_steps": 125, "total_steps": 2490, "eval_loss": 1.2113524675369263, "epoch": 0.5020080321285141, "percentage": 5.02, "elapsed_time": "0:00:12", "remaining_time": "0:03:56", "throughput": 1784.89, "total_tokens": 22304} {"current_steps": 130, "total_steps": 2490, "loss": 1.3225, "lr": 2.5903614457831325e-05, "epoch": 0.5220883534136547, "percentage": 5.22, "elapsed_time": "0:00:14", "remaining_time": "0:04:26", "throughput": 1572.2, "total_tokens": 23056} {"current_steps": 135, "total_steps": 2490, "loss": 0.3651, "lr": 2.6907630522088356e-05, "epoch": 0.5421686746987951, "percentage": 5.42, "elapsed_time": "0:00:15", "remaining_time": "0:04:23", "throughput": 1579.97, "total_tokens": 23840} {"current_steps": 140, "total_steps": 2490, "loss": 0.5608, "lr": 2.791164658634538e-05, "epoch": 0.5622489959839357, "percentage": 5.62, "elapsed_time": "0:00:15", "remaining_time": "0:04:21", "throughput": 1592.28, "total_tokens": 24832} {"current_steps": 145, "total_steps": 2490, "loss": 0.4016, "lr": 2.891566265060241e-05, "epoch": 0.5823293172690763, "percentage": 5.82, "elapsed_time": "0:00:16", "remaining_time": "0:04:19", "throughput": 1598.87, "total_tokens": 25648} {"current_steps": 150, "total_steps": 2490, "loss": 0.4123, "lr": 2.991967871485944e-05, "epoch": 0.6024096385542169, "percentage": 6.02, "elapsed_time": "0:00:16", "remaining_time": "0:04:16", "throughput": 1609.26, "total_tokens": 26496} {"current_steps": 155, "total_steps": 2490, "loss": 0.538, "lr": 3.092369477911647e-05, "epoch": 0.6224899598393574, "percentage": 6.22, "elapsed_time": "0:00:16", "remaining_time": "0:04:14", "throughput": 1622.05, "total_tokens": 27392} {"current_steps": 160, "total_steps": 2490, "loss": 0.3054, "lr": 3.192771084337349e-05, "epoch": 0.642570281124498, "percentage": 6.43, "elapsed_time": "0:00:17", "remaining_time": "0:04:12", "throughput": 1631.42, "total_tokens": 28272} {"current_steps": 165, "total_steps": 2490, "loss": 0.4957, "lr": 3.2931726907630524e-05, "epoch": 0.6626506024096386, "percentage": 6.63, "elapsed_time": "0:00:17", "remaining_time": "0:04:10", "throughput": 1644.7, "total_tokens": 29184} {"current_steps": 170, "total_steps": 2490, "loss": 0.3453, "lr": 3.393574297188755e-05, "epoch": 0.6827309236947792, "percentage": 6.83, "elapsed_time": "0:00:18", "remaining_time": "0:04:08", "throughput": 1656.61, "total_tokens": 30128} {"current_steps": 175, "total_steps": 2490, "loss": 0.4688, "lr": 3.4939759036144585e-05, "epoch": 0.7028112449799196, "percentage": 7.03, "elapsed_time": "0:00:18", "remaining_time": "0:04:06", "throughput": 1665.18, "total_tokens": 30976} {"current_steps": 180, "total_steps": 2490, "loss": 0.4289, "lr": 3.5943775100401605e-05, "epoch": 0.7228915662650602, "percentage": 7.23, "elapsed_time": "0:00:19", "remaining_time": "0:04:04", "throughput": 1670.98, "total_tokens": 31776} {"current_steps": 185, "total_steps": 2490, "loss": 0.3237, "lr": 3.694779116465863e-05, "epoch": 0.7429718875502008, "percentage": 7.43, "elapsed_time": "0:00:19", "remaining_time": "0:04:02", "throughput": 1677.83, "total_tokens": 32608} {"current_steps": 190, "total_steps": 2490, "loss": 0.5221, "lr": 3.7951807228915666e-05, "epoch": 0.7630522088353414, "percentage": 7.63, "elapsed_time": "0:00:19", "remaining_time": "0:04:00", "throughput": 1680.84, "total_tokens": 33360} {"current_steps": 195, "total_steps": 2490, "loss": 0.2575, "lr": 3.895582329317269e-05, "epoch": 0.7831325301204819, "percentage": 7.83, "elapsed_time": "0:00:20", "remaining_time": "0:03:58", "throughput": 1686.72, "total_tokens": 34176} {"current_steps": 200, "total_steps": 2490, "loss": 1.23, "lr": 3.995983935742972e-05, "epoch": 0.8032128514056225, "percentage": 8.03, "elapsed_time": "0:00:20", "remaining_time": "0:03:56", "throughput": 1692.29, "total_tokens": 34992} {"current_steps": 205, "total_steps": 2490, "loss": 0.4066, "lr": 4.0963855421686746e-05, "epoch": 0.8232931726907631, "percentage": 8.23, "elapsed_time": "0:00:21", "remaining_time": "0:03:55", "throughput": 1701.51, "total_tokens": 35888} {"current_steps": 210, "total_steps": 2490, "loss": 0.3846, "lr": 4.196787148594378e-05, "epoch": 0.8433734939759037, "percentage": 8.43, "elapsed_time": "0:00:21", "remaining_time": "0:03:53", "throughput": 1713.45, "total_tokens": 36848} {"current_steps": 215, "total_steps": 2490, "loss": 0.3303, "lr": 4.297188755020081e-05, "epoch": 0.8634538152610441, "percentage": 8.63, "elapsed_time": "0:00:21", "remaining_time": "0:03:52", "throughput": 1727.71, "total_tokens": 37888} {"current_steps": 220, "total_steps": 2490, "loss": 0.4356, "lr": 4.3975903614457834e-05, "epoch": 0.8835341365461847, "percentage": 8.84, "elapsed_time": "0:00:22", "remaining_time": "0:03:50", "throughput": 1735.11, "total_tokens": 38768} {"current_steps": 225, "total_steps": 2490, "loss": 0.4328, "lr": 4.497991967871486e-05, "epoch": 0.9036144578313253, "percentage": 9.04, "elapsed_time": "0:00:22", "remaining_time": "0:03:49", "throughput": 1735.08, "total_tokens": 39488} {"current_steps": 230, "total_steps": 2490, "loss": 0.6742, "lr": 4.598393574297189e-05, "epoch": 0.9236947791164659, "percentage": 9.24, "elapsed_time": "0:00:23", "remaining_time": "0:03:47", "throughput": 1740.53, "total_tokens": 40336} {"current_steps": 235, "total_steps": 2490, "loss": 0.3987, "lr": 4.698795180722892e-05, "epoch": 0.9437751004016064, "percentage": 9.44, "elapsed_time": "0:00:23", "remaining_time": "0:03:46", "throughput": 1751.29, "total_tokens": 41328} {"current_steps": 240, "total_steps": 2490, "loss": 0.3388, "lr": 4.799196787148594e-05, "epoch": 0.963855421686747, "percentage": 9.64, "elapsed_time": "0:00:24", "remaining_time": "0:03:45", "throughput": 1756.42, "total_tokens": 42176} {"current_steps": 245, "total_steps": 2490, "loss": 0.8551, "lr": 4.8995983935742975e-05, "epoch": 0.9839357429718876, "percentage": 9.84, "elapsed_time": "0:00:24", "remaining_time": "0:03:43", "throughput": 1772.48, "total_tokens": 43312} {"current_steps": 250, "total_steps": 2490, "loss": 0.4306, "lr": 5e-05, "epoch": 1.0040160642570282, "percentage": 10.04, "elapsed_time": "0:00:25", "remaining_time": "0:03:44", "throughput": 1758.02, "total_tokens": 44064} {"current_steps": 250, "total_steps": 2490, "eval_loss": 0.4651975631713867, "epoch": 1.0040160642570282, "percentage": 10.04, "elapsed_time": "0:00:26", "remaining_time": "0:03:55", "throughput": 1676.13, "total_tokens": 44064} {"current_steps": 255, "total_steps": 2490, "loss": 0.7501, "lr": 4.9999385864396127e-05, "epoch": 1.0240963855421688, "percentage": 10.24, "elapsed_time": "0:00:28", "remaining_time": "0:04:06", "throughput": 1593.62, "total_tokens": 44816} {"current_steps": 260, "total_steps": 2490, "loss": 1.1828, "lr": 4.99975434877575e-05, "epoch": 1.0441767068273093, "percentage": 10.44, "elapsed_time": "0:00:28", "remaining_time": "0:04:04", "throughput": 1603.72, "total_tokens": 45776} {"current_steps": 265, "total_steps": 2490, "loss": 0.7021, "lr": 4.999447296060165e-05, "epoch": 1.0642570281124497, "percentage": 10.64, "elapsed_time": "0:00:28", "remaining_time": "0:04:03", "throughput": 1607.09, "total_tokens": 46592} {"current_steps": 270, "total_steps": 2490, "loss": 0.37, "lr": 4.999017443378618e-05, "epoch": 1.0843373493975903, "percentage": 10.84, "elapsed_time": "0:00:29", "remaining_time": "0:04:01", "throughput": 1615.51, "total_tokens": 47536} {"current_steps": 275, "total_steps": 2490, "loss": 0.3415, "lr": 4.998464811850137e-05, "epoch": 1.104417670682731, "percentage": 11.04, "elapsed_time": "0:00:29", "remaining_time": "0:04:00", "throughput": 1619.05, "total_tokens": 48320} {"current_steps": 280, "total_steps": 2490, "loss": 0.381, "lr": 4.997789428625975e-05, "epoch": 1.1244979919678715, "percentage": 11.24, "elapsed_time": "0:00:30", "remaining_time": "0:03:58", "throughput": 1625.49, "total_tokens": 49216} {"current_steps": 285, "total_steps": 2490, "loss": 0.3487, "lr": 4.996991326888286e-05, "epoch": 1.144578313253012, "percentage": 11.45, "elapsed_time": "0:00:30", "remaining_time": "0:03:57", "throughput": 1629.98, "total_tokens": 50048} {"current_steps": 290, "total_steps": 2490, "loss": 0.346, "lr": 4.996070545848484e-05, "epoch": 1.1646586345381527, "percentage": 11.65, "elapsed_time": "0:00:31", "remaining_time": "0:03:56", "throughput": 1633.3, "total_tokens": 50832} {"current_steps": 295, "total_steps": 2490, "loss": 0.3439, "lr": 4.995027130745321e-05, "epoch": 1.1847389558232932, "percentage": 11.85, "elapsed_time": "0:00:31", "remaining_time": "0:03:54", "throughput": 1642.72, "total_tokens": 51824} {"current_steps": 300, "total_steps": 2490, "loss": 0.5375, "lr": 4.9938611328426685e-05, "epoch": 1.2048192771084336, "percentage": 12.05, "elapsed_time": "0:00:31", "remaining_time": "0:03:53", "throughput": 1645.8, "total_tokens": 52608} {"current_steps": 305, "total_steps": 2490, "loss": 0.3537, "lr": 4.992572609426992e-05, "epoch": 1.2248995983935742, "percentage": 12.25, "elapsed_time": "0:00:32", "remaining_time": "0:03:51", "throughput": 1650.27, "total_tokens": 53440} {"current_steps": 310, "total_steps": 2490, "loss": 0.3549, "lr": 4.99116162380454e-05, "epoch": 1.2449799196787148, "percentage": 12.45, "elapsed_time": "0:00:32", "remaining_time": "0:03:50", "throughput": 1656.02, "total_tokens": 54320} {"current_steps": 315, "total_steps": 2490, "loss": 0.3352, "lr": 4.989628245298233e-05, "epoch": 1.2650602409638554, "percentage": 12.65, "elapsed_time": "0:00:33", "remaining_time": "0:03:49", "throughput": 1657.77, "total_tokens": 55072} {"current_steps": 320, "total_steps": 2490, "loss": 0.3695, "lr": 4.987972549244257e-05, "epoch": 1.285140562248996, "percentage": 12.85, "elapsed_time": "0:00:33", "remaining_time": "0:03:48", "throughput": 1671.13, "total_tokens": 56224} {"current_steps": 325, "total_steps": 2490, "loss": 0.281, "lr": 4.986194616988364e-05, "epoch": 1.3052208835341366, "percentage": 13.05, "elapsed_time": "0:00:34", "remaining_time": "0:03:46", "throughput": 1671.13, "total_tokens": 56912} {"current_steps": 330, "total_steps": 2490, "loss": 0.488, "lr": 4.984294535881875e-05, "epoch": 1.3253012048192772, "percentage": 13.25, "elapsed_time": "0:00:34", "remaining_time": "0:03:45", "throughput": 1672.63, "total_tokens": 57648} {"current_steps": 335, "total_steps": 2490, "loss": 0.3598, "lr": 4.982272399277386e-05, "epoch": 1.3453815261044177, "percentage": 13.45, "elapsed_time": "0:00:34", "remaining_time": "0:03:44", "throughput": 1680.46, "total_tokens": 58608} {"current_steps": 340, "total_steps": 2490, "loss": 0.3973, "lr": 4.980128306524183e-05, "epoch": 1.3654618473895583, "percentage": 13.65, "elapsed_time": "0:00:35", "remaining_time": "0:03:43", "throughput": 1683.75, "total_tokens": 59424} {"current_steps": 345, "total_steps": 2490, "loss": 0.3078, "lr": 4.9778623629633635e-05, "epoch": 1.3855421686746987, "percentage": 13.86, "elapsed_time": "0:00:35", "remaining_time": "0:03:41", "throughput": 1688.06, "total_tokens": 60272} {"current_steps": 350, "total_steps": 2490, "loss": 0.4871, "lr": 4.975474679922655e-05, "epoch": 1.4056224899598393, "percentage": 14.06, "elapsed_time": "0:00:36", "remaining_time": "0:03:40", "throughput": 1690.18, "total_tokens": 61056} {"current_steps": 355, "total_steps": 2490, "loss": 0.283, "lr": 4.972965374710952e-05, "epoch": 1.4257028112449799, "percentage": 14.26, "elapsed_time": "0:00:36", "remaining_time": "0:03:39", "throughput": 1695.7, "total_tokens": 61968} {"current_steps": 360, "total_steps": 2490, "loss": 0.3467, "lr": 4.9703345706125485e-05, "epoch": 1.4457831325301205, "percentage": 14.46, "elapsed_time": "0:00:36", "remaining_time": "0:03:38", "throughput": 1699.2, "total_tokens": 62800} {"current_steps": 365, "total_steps": 2490, "loss": 0.4493, "lr": 4.96758239688108e-05, "epoch": 1.465863453815261, "percentage": 14.66, "elapsed_time": "0:00:37", "remaining_time": "0:03:37", "throughput": 1707.03, "total_tokens": 63824} {"current_steps": 370, "total_steps": 2490, "loss": 0.3217, "lr": 4.964708988733178e-05, "epoch": 1.4859437751004017, "percentage": 14.86, "elapsed_time": "0:00:37", "remaining_time": "0:03:36", "throughput": 1713.14, "total_tokens": 64800} {"current_steps": 375, "total_steps": 2490, "loss": 0.3766, "lr": 4.961714487341822e-05, "epoch": 1.5060240963855422, "percentage": 15.06, "elapsed_time": "0:00:38", "remaining_time": "0:03:35", "throughput": 1720.44, "total_tokens": 65808} {"current_steps": 375, "total_steps": 2490, "eval_loss": 0.38677313923835754, "epoch": 1.5060240963855422, "percentage": 15.06, "elapsed_time": "0:00:39", "remaining_time": "0:03:42", "throughput": 1665.86, "total_tokens": 65808} {"current_steps": 380, "total_steps": 2490, "loss": 0.4091, "lr": 4.9585990398294043e-05, "epoch": 1.5261044176706826, "percentage": 15.26, "elapsed_time": "0:00:41", "remaining_time": "0:03:48", "throughput": 1621.61, "total_tokens": 66752} {"current_steps": 385, "total_steps": 2490, "loss": 0.3531, "lr": 4.9553627992605066e-05, "epoch": 1.5461847389558234, "percentage": 15.46, "elapsed_time": "0:00:41", "remaining_time": "0:03:47", "throughput": 1625.63, "total_tokens": 67632} {"current_steps": 390, "total_steps": 2490, "loss": 0.3506, "lr": 4.952005924634372e-05, "epoch": 1.5662650602409638, "percentage": 15.66, "elapsed_time": "0:00:42", "remaining_time": "0:03:46", "throughput": 1627.46, "total_tokens": 68400} {"current_steps": 395, "total_steps": 2490, "loss": 0.3255, "lr": 4.948528580877099e-05, "epoch": 1.5863453815261044, "percentage": 15.86, "elapsed_time": "0:00:42", "remaining_time": "0:03:45", "throughput": 1635.26, "total_tokens": 69408} {"current_steps": 400, "total_steps": 2490, "loss": 0.3689, "lr": 4.944930938833535e-05, "epoch": 1.606425702811245, "percentage": 16.06, "elapsed_time": "0:00:42", "remaining_time": "0:03:43", "throughput": 1641.79, "total_tokens": 70352} {"current_steps": 405, "total_steps": 2490, "loss": 0.374, "lr": 4.9412131752588874e-05, "epoch": 1.6265060240963856, "percentage": 16.27, "elapsed_time": "0:00:43", "remaining_time": "0:03:42", "throughput": 1645.69, "total_tokens": 71184} {"current_steps": 410, "total_steps": 2490, "loss": 0.3785, "lr": 4.937375472810033e-05, "epoch": 1.6465863453815262, "percentage": 16.47, "elapsed_time": "0:00:43", "remaining_time": "0:03:41", "throughput": 1654.86, "total_tokens": 72272} {"current_steps": 415, "total_steps": 2490, "loss": 0.3645, "lr": 4.9334180200365486e-05, "epoch": 1.6666666666666665, "percentage": 16.67, "elapsed_time": "0:00:44", "remaining_time": "0:03:40", "throughput": 1659.27, "total_tokens": 73136} {"current_steps": 420, "total_steps": 2490, "loss": 0.3477, "lr": 4.929341011371448e-05, "epoch": 1.6867469879518073, "percentage": 16.87, "elapsed_time": "0:00:44", "remaining_time": "0:03:39", "throughput": 1660.77, "total_tokens": 73872} {"current_steps": 425, "total_steps": 2490, "loss": 0.3495, "lr": 4.9251446471216226e-05, "epoch": 1.7068273092369477, "percentage": 17.07, "elapsed_time": "0:00:44", "remaining_time": "0:03:38", "throughput": 1665.99, "total_tokens": 74784} {"current_steps": 430, "total_steps": 2490, "loss": 0.3477, "lr": 4.9208291334580104e-05, "epoch": 1.7269076305220885, "percentage": 17.27, "elapsed_time": "0:00:45", "remaining_time": "0:03:36", "throughput": 1670.45, "total_tokens": 75664} {"current_steps": 435, "total_steps": 2490, "loss": 0.4005, "lr": 4.9163946824054574e-05, "epoch": 1.7469879518072289, "percentage": 17.47, "elapsed_time": "0:00:45", "remaining_time": "0:03:35", "throughput": 1675.61, "total_tokens": 76592} {"current_steps": 440, "total_steps": 2490, "loss": 0.3454, "lr": 4.911841511832305e-05, "epoch": 1.7670682730923695, "percentage": 17.67, "elapsed_time": "0:00:46", "remaining_time": "0:03:34", "throughput": 1678.6, "total_tokens": 77408} {"current_steps": 445, "total_steps": 2490, "loss": 0.3494, "lr": 4.907169845439688e-05, "epoch": 1.78714859437751, "percentage": 17.87, "elapsed_time": "0:00:46", "remaining_time": "0:03:33", "throughput": 1682.62, "total_tokens": 78272} {"current_steps": 450, "total_steps": 2490, "loss": 0.3211, "lr": 4.902379912750537e-05, "epoch": 1.8072289156626506, "percentage": 18.07, "elapsed_time": "0:00:46", "remaining_time": "0:03:32", "throughput": 1687.92, "total_tokens": 79200} {"current_steps": 455, "total_steps": 2490, "loss": 0.3843, "lr": 4.897471949098309e-05, "epoch": 1.8273092369477912, "percentage": 18.27, "elapsed_time": "0:00:47", "remaining_time": "0:03:31", "throughput": 1692.71, "total_tokens": 80112} {"current_steps": 460, "total_steps": 2490, "loss": 0.3143, "lr": 4.892446195615423e-05, "epoch": 1.8473895582329316, "percentage": 18.47, "elapsed_time": "0:00:47", "remaining_time": "0:03:30", "throughput": 1700.4, "total_tokens": 81168} {"current_steps": 465, "total_steps": 2490, "loss": 0.3845, "lr": 4.88730289922141e-05, "epoch": 1.8674698795180724, "percentage": 18.67, "elapsed_time": "0:00:48", "remaining_time": "0:03:29", "throughput": 1705.66, "total_tokens": 82112} {"current_steps": 470, "total_steps": 2490, "loss": 0.3467, "lr": 4.8820423126107845e-05, "epoch": 1.8875502008032128, "percentage": 18.88, "elapsed_time": "0:00:48", "remaining_time": "0:03:28", "throughput": 1711.25, "total_tokens": 83072} {"current_steps": 475, "total_steps": 2490, "loss": 0.3683, "lr": 4.87666469424063e-05, "epoch": 1.9076305220883534, "percentage": 19.08, "elapsed_time": "0:00:48", "remaining_time": "0:03:27", "throughput": 1714.41, "total_tokens": 83920} {"current_steps": 480, "total_steps": 2490, "loss": 0.3512, "lr": 4.8711703083178986e-05, "epoch": 1.927710843373494, "percentage": 19.28, "elapsed_time": "0:00:49", "remaining_time": "0:03:26", "throughput": 1717.5, "total_tokens": 84768} {"current_steps": 485, "total_steps": 2490, "loss": 0.3478, "lr": 4.865559424786432e-05, "epoch": 1.9477911646586346, "percentage": 19.48, "elapsed_time": "0:00:49", "remaining_time": "0:03:25", "throughput": 1720.51, "total_tokens": 85616} {"current_steps": 490, "total_steps": 2490, "loss": 0.3477, "lr": 4.859832319313697e-05, "epoch": 1.9678714859437751, "percentage": 19.68, "elapsed_time": "0:00:50", "remaining_time": "0:03:24", "throughput": 1722.4, "total_tokens": 86400} {"current_steps": 495, "total_steps": 2490, "loss": 0.3753, "lr": 4.8539892732772455e-05, "epoch": 1.9879518072289155, "percentage": 19.88, "elapsed_time": "0:00:50", "remaining_time": "0:03:23", "throughput": 1725.16, "total_tokens": 87216} {"current_steps": 500, "total_steps": 2490, "loss": 0.3159, "lr": 4.848030573750885e-05, "epoch": 2.0080321285140563, "percentage": 20.08, "elapsed_time": "0:00:51", "remaining_time": "0:03:23", "throughput": 1719.14, "total_tokens": 88048} {"current_steps": 500, "total_steps": 2490, "eval_loss": 0.3829512596130371, "epoch": 2.0080321285140563, "percentage": 20.08, "elapsed_time": "0:00:52", "remaining_time": "0:03:28", "throughput": 1678.87, "total_tokens": 88048} {"current_steps": 505, "total_steps": 2490, "loss": 0.3501, "lr": 4.841956513490577e-05, "epoch": 2.0281124497991967, "percentage": 20.28, "elapsed_time": "0:00:54", "remaining_time": "0:03:33", "throughput": 1639.6, "total_tokens": 88896} {"current_steps": 510, "total_steps": 2490, "loss": 0.3452, "lr": 4.8357673909200563e-05, "epoch": 2.0481927710843375, "percentage": 20.48, "elapsed_time": "0:00:54", "remaining_time": "0:03:32", "throughput": 1642.71, "total_tokens": 89744} {"current_steps": 515, "total_steps": 2490, "loss": 0.3738, "lr": 4.8294635101161645e-05, "epoch": 2.068273092369478, "percentage": 20.68, "elapsed_time": "0:00:55", "remaining_time": "0:03:31", "throughput": 1644.67, "total_tokens": 90528} {"current_steps": 520, "total_steps": 2490, "loss": 0.3347, "lr": 4.8230451807939135e-05, "epoch": 2.0883534136546187, "percentage": 20.88, "elapsed_time": "0:00:55", "remaining_time": "0:03:30", "throughput": 1647.53, "total_tokens": 91360} {"current_steps": 525, "total_steps": 2490, "loss": 0.346, "lr": 4.816512718291267e-05, "epoch": 2.108433734939759, "percentage": 21.08, "elapsed_time": "0:00:55", "remaining_time": "0:03:29", "throughput": 1650.15, "total_tokens": 92176} {"current_steps": 530, "total_steps": 2490, "loss": 0.341, "lr": 4.80986644355365e-05, "epoch": 2.1285140562248994, "percentage": 21.29, "elapsed_time": "0:00:56", "remaining_time": "0:03:28", "throughput": 1654.76, "total_tokens": 93104} {"current_steps": 535, "total_steps": 2490, "loss": 0.3588, "lr": 4.803106683118177e-05, "epoch": 2.1485943775100402, "percentage": 21.49, "elapsed_time": "0:00:56", "remaining_time": "0:03:27", "throughput": 1658.37, "total_tokens": 93984} {"current_steps": 540, "total_steps": 2490, "loss": 0.3438, "lr": 4.796233769097615e-05, "epoch": 2.1686746987951806, "percentage": 21.69, "elapsed_time": "0:00:57", "remaining_time": "0:03:26", "throughput": 1662.6, "total_tokens": 94896} {"current_steps": 545, "total_steps": 2490, "loss": 0.375, "lr": 4.789248039164058e-05, "epoch": 2.1887550200803214, "percentage": 21.89, "elapsed_time": "0:00:57", "remaining_time": "0:03:25", "throughput": 1667.04, "total_tokens": 95824} {"current_steps": 550, "total_steps": 2490, "loss": 0.3607, "lr": 4.782149836532345e-05, "epoch": 2.208835341365462, "percentage": 22.09, "elapsed_time": "0:00:57", "remaining_time": "0:03:24", "throughput": 1670.38, "total_tokens": 96688} {"current_steps": 555, "total_steps": 2490, "loss": 0.3312, "lr": 4.7749395099431924e-05, "epoch": 2.2289156626506026, "percentage": 22.29, "elapsed_time": "0:00:58", "remaining_time": "0:03:23", "throughput": 1672.55, "total_tokens": 97488} {"current_steps": 560, "total_steps": 2490, "loss": 0.4083, "lr": 4.7676174136460625e-05, "epoch": 2.248995983935743, "percentage": 22.49, "elapsed_time": "0:00:58", "remaining_time": "0:03:22", "throughput": 1674.68, "total_tokens": 98288} {"current_steps": 565, "total_steps": 2490, "loss": 0.3447, "lr": 4.760183907381757e-05, "epoch": 2.2690763052208833, "percentage": 22.69, "elapsed_time": "0:00:59", "remaining_time": "0:03:21", "throughput": 1678.37, "total_tokens": 99200} {"current_steps": 570, "total_steps": 2490, "loss": 0.3228, "lr": 4.752639356364744e-05, "epoch": 2.289156626506024, "percentage": 22.89, "elapsed_time": "0:00:59", "remaining_time": "0:03:20", "throughput": 1680.2, "total_tokens": 99984} {"current_steps": 575, "total_steps": 2490, "loss": 0.3781, "lr": 4.7449841312652166e-05, "epoch": 2.3092369477911645, "percentage": 23.09, "elapsed_time": "0:00:59", "remaining_time": "0:03:19", "throughput": 1682.19, "total_tokens": 100784} {"current_steps": 580, "total_steps": 2490, "loss": 0.367, "lr": 4.737218608190878e-05, "epoch": 2.3293172690763053, "percentage": 23.29, "elapsed_time": "0:01:00", "remaining_time": "0:03:18", "throughput": 1684.08, "total_tokens": 101584} {"current_steps": 585, "total_steps": 2490, "loss": 0.3603, "lr": 4.729343168668463e-05, "epoch": 2.3493975903614457, "percentage": 23.49, "elapsed_time": "0:01:00", "remaining_time": "0:03:17", "throughput": 1687.63, "total_tokens": 102480} {"current_steps": 590, "total_steps": 2490, "loss": 0.3631, "lr": 4.721358199624997e-05, "epoch": 2.3694779116465865, "percentage": 23.69, "elapsed_time": "0:01:01", "remaining_time": "0:03:16", "throughput": 1691.63, "total_tokens": 103408} {"current_steps": 595, "total_steps": 2490, "loss": 0.3911, "lr": 4.713264093368783e-05, "epoch": 2.389558232931727, "percentage": 23.9, "elapsed_time": "0:01:01", "remaining_time": "0:03:15", "throughput": 1692.77, "total_tokens": 104160} {"current_steps": 600, "total_steps": 2490, "loss": 0.3406, "lr": 4.705061247570128e-05, "epoch": 2.4096385542168672, "percentage": 24.1, "elapsed_time": "0:01:01", "remaining_time": "0:03:15", "throughput": 1695.93, "total_tokens": 105040} {"current_steps": 605, "total_steps": 2490, "loss": 0.3582, "lr": 4.6967500652418034e-05, "epoch": 2.429718875502008, "percentage": 24.3, "elapsed_time": "0:01:02", "remaining_time": "0:03:14", "throughput": 1698.06, "total_tokens": 105856} {"current_steps": 610, "total_steps": 2490, "loss": 0.3701, "lr": 4.6883309547192476e-05, "epoch": 2.4497991967871484, "percentage": 24.5, "elapsed_time": "0:01:02", "remaining_time": "0:03:13", "throughput": 1703.81, "total_tokens": 106928} {"current_steps": 615, "total_steps": 2490, "loss": 0.3423, "lr": 4.679804329640505e-05, "epoch": 2.4698795180722892, "percentage": 24.7, "elapsed_time": "0:01:03", "remaining_time": "0:03:12", "throughput": 1706.68, "total_tokens": 107808} {"current_steps": 620, "total_steps": 2490, "loss": 0.3104, "lr": 4.6711706089258955e-05, "epoch": 2.4899598393574296, "percentage": 24.9, "elapsed_time": "0:01:03", "remaining_time": "0:03:11", "throughput": 1709.18, "total_tokens": 108656} {"current_steps": 625, "total_steps": 2490, "loss": 0.3958, "lr": 4.6624302167574436e-05, "epoch": 2.5100401606425704, "percentage": 25.1, "elapsed_time": "0:01:03", "remaining_time": "0:03:10", "throughput": 1714.34, "total_tokens": 109696} {"current_steps": 625, "total_steps": 2490, "eval_loss": 0.3675794303417206, "epoch": 2.5100401606425704, "percentage": 25.1, "elapsed_time": "0:01:05", "remaining_time": "0:03:14", "throughput": 1682.42, "total_tokens": 109696} {"current_steps": 630, "total_steps": 2490, "loss": 0.3587, "lr": 4.653583582558031e-05, "epoch": 2.5301204819277108, "percentage": 25.3, "elapsed_time": "0:01:06", "remaining_time": "0:03:17", "throughput": 1654.57, "total_tokens": 110576} {"current_steps": 635, "total_steps": 2490, "loss": 0.365, "lr": 4.6446311409703006e-05, "epoch": 2.550200803212851, "percentage": 25.5, "elapsed_time": "0:01:07", "remaining_time": "0:03:16", "throughput": 1657.41, "total_tokens": 111440} {"current_steps": 640, "total_steps": 2490, "loss": 0.3339, "lr": 4.635573331835302e-05, "epoch": 2.570281124497992, "percentage": 25.7, "elapsed_time": "0:01:07", "remaining_time": "0:03:15", "throughput": 1658.45, "total_tokens": 112192} {"current_steps": 645, "total_steps": 2490, "loss": 0.3631, "lr": 4.6264106001708824e-05, "epoch": 2.5903614457831328, "percentage": 25.9, "elapsed_time": "0:01:08", "remaining_time": "0:03:14", "throughput": 1660.84, "total_tokens": 113024} {"current_steps": 650, "total_steps": 2490, "loss": 0.3795, "lr": 4.61714339614982e-05, "epoch": 2.610441767068273, "percentage": 26.1, "elapsed_time": "0:01:08", "remaining_time": "0:03:13", "throughput": 1664.61, "total_tokens": 113952} {"current_steps": 655, "total_steps": 2490, "loss": 0.3586, "lr": 4.607772175077711e-05, "epoch": 2.6305220883534135, "percentage": 26.31, "elapsed_time": "0:01:08", "remaining_time": "0:03:12", "throughput": 1668.77, "total_tokens": 114928} {"current_steps": 660, "total_steps": 2490, "loss": 0.3726, "lr": 4.598297397370596e-05, "epoch": 2.6506024096385543, "percentage": 26.51, "elapsed_time": "0:01:09", "remaining_time": "0:03:12", "throughput": 1670.52, "total_tokens": 115728} {"current_steps": 665, "total_steps": 2490, "loss": 0.3549, "lr": 4.588719528532342e-05, "epoch": 2.6706827309236947, "percentage": 26.71, "elapsed_time": "0:01:09", "remaining_time": "0:03:11", "throughput": 1672.57, "total_tokens": 116544} {"current_steps": 670, "total_steps": 2490, "loss": 0.3379, "lr": 4.5790390391317675e-05, "epoch": 2.6907630522088355, "percentage": 26.91, "elapsed_time": "0:01:10", "remaining_time": "0:03:10", "throughput": 1677.26, "total_tokens": 117568} {"current_steps": 675, "total_steps": 2490, "loss": 0.3688, "lr": 4.5692564047795316e-05, "epoch": 2.710843373493976, "percentage": 27.11, "elapsed_time": "0:01:10", "remaining_time": "0:03:09", "throughput": 1678.97, "total_tokens": 118368} {"current_steps": 680, "total_steps": 2490, "loss": 0.3455, "lr": 4.5593721061047576e-05, "epoch": 2.7309236947791167, "percentage": 27.31, "elapsed_time": "0:01:10", "remaining_time": "0:03:08", "throughput": 1679.99, "total_tokens": 119120} {"current_steps": 685, "total_steps": 2490, "loss": 0.3575, "lr": 4.549386628731425e-05, "epoch": 2.751004016064257, "percentage": 27.51, "elapsed_time": "0:01:11", "remaining_time": "0:03:07", "throughput": 1683.7, "total_tokens": 120064} {"current_steps": 690, "total_steps": 2490, "loss": 0.3721, "lr": 4.5393004632545064e-05, "epoch": 2.7710843373493974, "percentage": 27.71, "elapsed_time": "0:01:11", "remaining_time": "0:03:07", "throughput": 1686.43, "total_tokens": 120960} {"current_steps": 695, "total_steps": 2490, "loss": 0.3545, "lr": 4.529114105215869e-05, "epoch": 2.791164658634538, "percentage": 27.91, "elapsed_time": "0:01:12", "remaining_time": "0:03:06", "throughput": 1688.01, "total_tokens": 121760} {"current_steps": 700, "total_steps": 2490, "loss": 0.3675, "lr": 4.518828055079925e-05, "epoch": 2.8112449799196786, "percentage": 28.11, "elapsed_time": "0:01:12", "remaining_time": "0:03:05", "throughput": 1691.8, "total_tokens": 122720} {"current_steps": 705, "total_steps": 2490, "loss": 0.3543, "lr": 4.508442818209042e-05, "epoch": 2.8313253012048194, "percentage": 28.31, "elapsed_time": "0:01:12", "remaining_time": "0:03:04", "throughput": 1696.02, "total_tokens": 123712} {"current_steps": 710, "total_steps": 2490, "loss": 0.3561, "lr": 4.4979589048387186e-05, "epoch": 2.8514056224899598, "percentage": 28.51, "elapsed_time": "0:01:13", "remaining_time": "0:03:03", "throughput": 1698.88, "total_tokens": 124624} {"current_steps": 715, "total_steps": 2490, "loss": 0.3474, "lr": 4.487376830052511e-05, "epoch": 2.8714859437751006, "percentage": 28.71, "elapsed_time": "0:01:13", "remaining_time": "0:03:03", "throughput": 1703.84, "total_tokens": 125696} {"current_steps": 720, "total_steps": 2490, "loss": 0.2977, "lr": 4.476697113756731e-05, "epoch": 2.891566265060241, "percentage": 28.92, "elapsed_time": "0:01:14", "remaining_time": "0:03:02", "throughput": 1705.15, "total_tokens": 126480} {"current_steps": 725, "total_steps": 2490, "loss": 0.3658, "lr": 4.465920280654901e-05, "epoch": 2.9116465863453813, "percentage": 29.12, "elapsed_time": "0:01:14", "remaining_time": "0:03:01", "throughput": 1707.13, "total_tokens": 127312} {"current_steps": 730, "total_steps": 2490, "loss": 0.3475, "lr": 4.4550468602219716e-05, "epoch": 2.931726907630522, "percentage": 29.32, "elapsed_time": "0:01:14", "remaining_time": "0:03:00", "throughput": 1711.64, "total_tokens": 128352} {"current_steps": 735, "total_steps": 2490, "loss": 0.4262, "lr": 4.4440773866783136e-05, "epoch": 2.9518072289156625, "percentage": 29.52, "elapsed_time": "0:01:15", "remaining_time": "0:03:00", "throughput": 1714.03, "total_tokens": 129232} {"current_steps": 740, "total_steps": 2490, "loss": 0.4037, "lr": 4.433012398963468e-05, "epoch": 2.9718875502008033, "percentage": 29.72, "elapsed_time": "0:01:15", "remaining_time": "0:02:59", "throughput": 1716.21, "total_tokens": 130080} {"current_steps": 745, "total_steps": 2490, "loss": 0.3459, "lr": 4.421852440709666e-05, "epoch": 2.9919678714859437, "percentage": 29.92, "elapsed_time": "0:01:16", "remaining_time": "0:02:58", "throughput": 1717.88, "total_tokens": 130880} {"current_steps": 750, "total_steps": 2490, "loss": 0.3521, "lr": 4.4105980602151256e-05, "epoch": 3.0120481927710845, "percentage": 30.12, "elapsed_time": "0:01:16", "remaining_time": "0:02:58", "throughput": 1715.69, "total_tokens": 131872} {"current_steps": 750, "total_steps": 2490, "eval_loss": 0.35073402523994446, "epoch": 3.0120481927710845, "percentage": 30.12, "elapsed_time": "0:01:18", "remaining_time": "0:03:01", "throughput": 1688.87, "total_tokens": 131872} {"current_steps": 755, "total_steps": 2490, "loss": 0.354, "lr": 4.399249810417108e-05, "epoch": 3.032128514056225, "percentage": 30.32, "elapsed_time": "0:01:19", "remaining_time": "0:03:03", "throughput": 1663.84, "total_tokens": 132656} {"current_steps": 760, "total_steps": 2490, "loss": 0.3708, "lr": 4.387808248864751e-05, "epoch": 3.0522088353413657, "percentage": 30.52, "elapsed_time": "0:01:20", "remaining_time": "0:03:02", "throughput": 1665.46, "total_tokens": 133472} {"current_steps": 765, "total_steps": 2490, "loss": 0.3463, "lr": 4.376273937691681e-05, "epoch": 3.072289156626506, "percentage": 30.72, "elapsed_time": "0:01:20", "remaining_time": "0:03:01", "throughput": 1668.49, "total_tokens": 134416} {"current_steps": 770, "total_steps": 2490, "loss": 0.3485, "lr": 4.364647443588389e-05, "epoch": 3.0923694779116464, "percentage": 30.92, "elapsed_time": "0:01:20", "remaining_time": "0:03:00", "throughput": 1671.61, "total_tokens": 135344} {"current_steps": 775, "total_steps": 2490, "loss": 0.3382, "lr": 4.352929337774395e-05, "epoch": 3.112449799196787, "percentage": 31.12, "elapsed_time": "0:01:21", "remaining_time": "0:03:00", "throughput": 1674.3, "total_tokens": 136240} {"current_steps": 780, "total_steps": 2490, "loss": 0.3559, "lr": 4.341120195970178e-05, "epoch": 3.1325301204819276, "percentage": 31.33, "elapsed_time": "0:01:21", "remaining_time": "0:02:59", "throughput": 1676.67, "total_tokens": 137120} {"current_steps": 785, "total_steps": 2490, "loss": 0.36, "lr": 4.3292205983688905e-05, "epoch": 3.1526104417670684, "percentage": 31.53, "elapsed_time": "0:01:22", "remaining_time": "0:02:58", "throughput": 1680.45, "total_tokens": 138112} {"current_steps": 790, "total_steps": 2490, "loss": 0.3472, "lr": 4.3172311296078595e-05, "epoch": 3.1726907630522088, "percentage": 31.73, "elapsed_time": "0:01:22", "remaining_time": "0:02:57", "throughput": 1682.5, "total_tokens": 138960} {"current_steps": 795, "total_steps": 2490, "loss": 0.3646, "lr": 4.305152378739855e-05, "epoch": 3.1927710843373496, "percentage": 31.93, "elapsed_time": "0:01:23", "remaining_time": "0:02:56", "throughput": 1686.75, "total_tokens": 140016} {"current_steps": 800, "total_steps": 2490, "loss": 0.3357, "lr": 4.292984939204155e-05, "epoch": 3.21285140562249, "percentage": 32.13, "elapsed_time": "0:01:23", "remaining_time": "0:02:56", "throughput": 1687.49, "total_tokens": 140768} {"current_steps": 805, "total_steps": 2490, "loss": 0.3444, "lr": 4.2807294087973834e-05, "epoch": 3.2329317269076308, "percentage": 32.33, "elapsed_time": "0:01:23", "remaining_time": "0:02:55", "throughput": 1690.05, "total_tokens": 141664} {"current_steps": 810, "total_steps": 2490, "loss": 0.3541, "lr": 4.2683863896441475e-05, "epoch": 3.253012048192771, "percentage": 32.53, "elapsed_time": "0:01:24", "remaining_time": "0:02:54", "throughput": 1691.27, "total_tokens": 142448} {"current_steps": 815, "total_steps": 2490, "loss": 0.3619, "lr": 4.255956488167449e-05, "epoch": 3.2730923694779115, "percentage": 32.73, "elapsed_time": "0:01:24", "remaining_time": "0:02:53", "throughput": 1694.23, "total_tokens": 143408} {"current_steps": 820, "total_steps": 2490, "loss": 0.3449, "lr": 4.2434403150588895e-05, "epoch": 3.2931726907630523, "percentage": 32.93, "elapsed_time": "0:01:25", "remaining_time": "0:02:53", "throughput": 1696.14, "total_tokens": 144256} {"current_steps": 825, "total_steps": 2490, "loss": 0.3504, "lr": 4.230838485248674e-05, "epoch": 3.3132530120481927, "percentage": 33.13, "elapsed_time": "0:01:25", "remaining_time": "0:02:52", "throughput": 1698.27, "total_tokens": 145120} {"current_steps": 830, "total_steps": 2490, "loss": 0.3445, "lr": 4.21815161787539e-05, "epoch": 3.3333333333333335, "percentage": 33.33, "elapsed_time": "0:01:25", "remaining_time": "0:02:51", "throughput": 1701.23, "total_tokens": 146080} {"current_steps": 835, "total_steps": 2490, "loss": 0.3418, "lr": 4.205380336255594e-05, "epoch": 3.353413654618474, "percentage": 33.53, "elapsed_time": "0:01:26", "remaining_time": "0:02:50", "throughput": 1702.92, "total_tokens": 146912} {"current_steps": 840, "total_steps": 2490, "loss": 0.2934, "lr": 4.192525267853188e-05, "epoch": 3.3734939759036147, "percentage": 33.73, "elapsed_time": "0:01:26", "remaining_time": "0:02:50", "throughput": 1704.89, "total_tokens": 147776} {"current_steps": 845, "total_steps": 2490, "loss": 0.2829, "lr": 4.179587044248585e-05, "epoch": 3.393574297188755, "percentage": 33.94, "elapsed_time": "0:01:27", "remaining_time": "0:02:49", "throughput": 1708.16, "total_tokens": 148768} {"current_steps": 850, "total_steps": 2490, "loss": 0.5387, "lr": 4.166566301107687e-05, "epoch": 3.4136546184738954, "percentage": 34.14, "elapsed_time": "0:01:27", "remaining_time": "0:02:48", "throughput": 1711.21, "total_tokens": 149728} {"current_steps": 855, "total_steps": 2490, "loss": 0.3639, "lr": 4.153463678150651e-05, "epoch": 3.433734939759036, "percentage": 34.34, "elapsed_time": "0:01:27", "remaining_time": "0:02:48", "throughput": 1715.14, "total_tokens": 150784} {"current_steps": 860, "total_steps": 2490, "loss": 0.3721, "lr": 4.140279819120457e-05, "epoch": 3.4538152610441766, "percentage": 34.54, "elapsed_time": "0:01:28", "remaining_time": "0:02:47", "throughput": 1717.94, "total_tokens": 151728} {"current_steps": 865, "total_steps": 2490, "loss": 0.3656, "lr": 4.127015371751284e-05, "epoch": 3.4738955823293174, "percentage": 34.74, "elapsed_time": "0:01:28", "remaining_time": "0:02:46", "throughput": 1720.39, "total_tokens": 152640} {"current_steps": 870, "total_steps": 2490, "loss": 0.3193, "lr": 4.1136709877366844e-05, "epoch": 3.4939759036144578, "percentage": 34.94, "elapsed_time": "0:01:29", "remaining_time": "0:02:45", "throughput": 1721.43, "total_tokens": 153424} {"current_steps": 875, "total_steps": 2490, "loss": 0.3677, "lr": 4.100247322697562e-05, "epoch": 3.5140562248995986, "percentage": 35.14, "elapsed_time": "0:01:29", "remaining_time": "0:02:45", "throughput": 1724.59, "total_tokens": 154416} {"current_steps": 875, "total_steps": 2490, "eval_loss": 0.3535325825214386, "epoch": 3.5140562248995986, "percentage": 35.14, "elapsed_time": "0:01:30", "remaining_time": "0:02:47", "throughput": 1701.37, "total_tokens": 154416} {"current_steps": 880, "total_steps": 2490, "loss": 0.3907, "lr": 4.08674503614997e-05, "epoch": 3.534136546184739, "percentage": 35.34, "elapsed_time": "0:01:32", "remaining_time": "0:02:48", "throughput": 1680.52, "total_tokens": 155184} {"current_steps": 885, "total_steps": 2490, "loss": 0.3941, "lr": 4.0731647914727004e-05, "epoch": 3.5542168674698793, "percentage": 35.54, "elapsed_time": "0:01:32", "remaining_time": "0:02:48", "throughput": 1681.96, "total_tokens": 156000} {"current_steps": 890, "total_steps": 2490, "loss": 0.345, "lr": 4.059507255874694e-05, "epoch": 3.57429718875502, "percentage": 35.74, "elapsed_time": "0:01:33", "remaining_time": "0:02:47", "throughput": 1684.69, "total_tokens": 156976} {"current_steps": 895, "total_steps": 2490, "loss": 0.3331, "lr": 4.0457731003622606e-05, "epoch": 3.5943775100401605, "percentage": 35.94, "elapsed_time": "0:01:33", "remaining_time": "0:02:46", "throughput": 1687.33, "total_tokens": 157904} {"current_steps": 900, "total_steps": 2490, "loss": 0.3339, "lr": 4.0319629997061116e-05, "epoch": 3.6144578313253013, "percentage": 36.14, "elapsed_time": "0:01:33", "remaining_time": "0:02:46", "throughput": 1690.21, "total_tokens": 158864} {"current_steps": 905, "total_steps": 2490, "loss": 0.2827, "lr": 4.018077632408207e-05, "epoch": 3.6345381526104417, "percentage": 36.35, "elapsed_time": "0:01:34", "remaining_time": "0:02:45", "throughput": 1692.29, "total_tokens": 159744} {"current_steps": 910, "total_steps": 2490, "loss": 0.3838, "lr": 4.004117680668422e-05, "epoch": 3.6546184738955825, "percentage": 36.55, "elapsed_time": "0:01:34", "remaining_time": "0:02:44", "throughput": 1694.18, "total_tokens": 160608} {"current_steps": 915, "total_steps": 2490, "loss": 0.3816, "lr": 3.990083830351027e-05, "epoch": 3.674698795180723, "percentage": 36.75, "elapsed_time": "0:01:35", "remaining_time": "0:02:43", "throughput": 1696.25, "total_tokens": 161488} {"current_steps": 920, "total_steps": 2490, "loss": 0.4066, "lr": 3.975976770950994e-05, "epoch": 3.694779116465863, "percentage": 36.95, "elapsed_time": "0:01:35", "remaining_time": "0:02:43", "throughput": 1696.79, "total_tokens": 162224} {"current_steps": 925, "total_steps": 2490, "loss": 0.3183, "lr": 3.961797195560118e-05, "epoch": 3.714859437751004, "percentage": 37.15, "elapsed_time": "0:01:36", "remaining_time": "0:02:42", "throughput": 1698.3, "total_tokens": 163056} {"current_steps": 930, "total_steps": 2490, "loss": 0.3641, "lr": 3.947545800832967e-05, "epoch": 3.734939759036145, "percentage": 37.35, "elapsed_time": "0:01:36", "remaining_time": "0:02:41", "throughput": 1699.51, "total_tokens": 163856} {"current_steps": 935, "total_steps": 2490, "loss": 0.3394, "lr": 3.9332232869526534e-05, "epoch": 3.755020080321285, "percentage": 37.55, "elapsed_time": "0:01:36", "remaining_time": "0:02:41", "throughput": 1701.84, "total_tokens": 164768} {"current_steps": 940, "total_steps": 2490, "loss": 0.3368, "lr": 3.918830357596434e-05, "epoch": 3.7751004016064256, "percentage": 37.75, "elapsed_time": "0:01:37", "remaining_time": "0:02:40", "throughput": 1703.35, "total_tokens": 165600} {"current_steps": 945, "total_steps": 2490, "loss": 0.3511, "lr": 3.9043677199011364e-05, "epoch": 3.7951807228915664, "percentage": 37.95, "elapsed_time": "0:01:37", "remaining_time": "0:02:39", "throughput": 1704.46, "total_tokens": 166400} {"current_steps": 950, "total_steps": 2490, "loss": 0.328, "lr": 3.889836084428422e-05, "epoch": 3.8152610441767068, "percentage": 38.15, "elapsed_time": "0:01:38", "remaining_time": "0:02:38", "throughput": 1706.54, "total_tokens": 167296} {"current_steps": 955, "total_steps": 2490, "loss": 0.369, "lr": 3.8752361651298675e-05, "epoch": 3.835341365461847, "percentage": 38.35, "elapsed_time": "0:01:38", "remaining_time": "0:02:38", "throughput": 1708.73, "total_tokens": 168208} {"current_steps": 960, "total_steps": 2490, "loss": 0.3657, "lr": 3.860568679311893e-05, "epoch": 3.855421686746988, "percentage": 38.55, "elapsed_time": "0:01:38", "remaining_time": "0:02:37", "throughput": 1710.3, "total_tokens": 169056} {"current_steps": 965, "total_steps": 2490, "loss": 0.3849, "lr": 3.8458343476005196e-05, "epoch": 3.8755020080321287, "percentage": 38.76, "elapsed_time": "0:01:39", "remaining_time": "0:02:36", "throughput": 1711.7, "total_tokens": 169888} {"current_steps": 970, "total_steps": 2490, "loss": 0.3541, "lr": 3.8310338939059644e-05, "epoch": 3.895582329317269, "percentage": 38.96, "elapsed_time": "0:01:39", "remaining_time": "0:02:36", "throughput": 1712.93, "total_tokens": 170704} {"current_steps": 975, "total_steps": 2490, "loss": 0.3558, "lr": 3.8161680453870715e-05, "epoch": 3.9156626506024095, "percentage": 39.16, "elapsed_time": "0:01:40", "remaining_time": "0:02:35", "throughput": 1714.97, "total_tokens": 171600} {"current_steps": 980, "total_steps": 2490, "loss": 0.3131, "lr": 3.8012375324155904e-05, "epoch": 3.9357429718875503, "percentage": 39.36, "elapsed_time": "0:01:40", "remaining_time": "0:02:34", "throughput": 1716.83, "total_tokens": 172480} {"current_steps": 985, "total_steps": 2490, "loss": 0.3661, "lr": 3.7862430885402876e-05, "epoch": 3.9558232931726907, "percentage": 39.56, "elapsed_time": "0:01:40", "remaining_time": "0:02:34", "throughput": 1719.91, "total_tokens": 173504} {"current_steps": 990, "total_steps": 2490, "loss": 0.3373, "lr": 3.7711854504509135e-05, "epoch": 3.9759036144578315, "percentage": 39.76, "elapsed_time": "0:01:41", "remaining_time": "0:02:33", "throughput": 1720.91, "total_tokens": 174288} {"current_steps": 995, "total_steps": 2490, "loss": 0.3623, "lr": 3.756065357941999e-05, "epoch": 3.995983935742972, "percentage": 39.96, "elapsed_time": "0:01:41", "remaining_time": "0:02:32", "throughput": 1722.3, "total_tokens": 175104} {"current_steps": 1000, "total_steps": 2490, "loss": 0.3426, "lr": 3.740883553876515e-05, "epoch": 4.016064257028113, "percentage": 40.16, "elapsed_time": "0:01:42", "remaining_time": "0:02:32", "throughput": 1720.08, "total_tokens": 176048} {"current_steps": 1000, "total_steps": 2490, "eval_loss": 0.35071006417274475, "epoch": 4.016064257028113, "percentage": 40.16, "elapsed_time": "0:01:43", "remaining_time": "0:02:34", "throughput": 1699.8, "total_tokens": 176048} {"current_steps": 1005, "total_steps": 2490, "loss": 0.4204, "lr": 3.725640784149375e-05, "epoch": 4.036144578313253, "percentage": 40.36, "elapsed_time": "0:01:45", "remaining_time": "0:02:35", "throughput": 1680.11, "total_tokens": 176880} {"current_steps": 1010, "total_steps": 2490, "loss": 0.339, "lr": 3.710337797650787e-05, "epoch": 4.056224899598393, "percentage": 40.56, "elapsed_time": "0:01:45", "remaining_time": "0:02:34", "throughput": 1681.15, "total_tokens": 177680} {"current_steps": 1015, "total_steps": 2490, "loss": 0.3311, "lr": 3.694975346229458e-05, "epoch": 4.076305220883534, "percentage": 40.76, "elapsed_time": "0:01:46", "remaining_time": "0:02:34", "throughput": 1683.41, "total_tokens": 178608} {"current_steps": 1020, "total_steps": 2490, "loss": 0.3611, "lr": 3.679554184655659e-05, "epoch": 4.096385542168675, "percentage": 40.96, "elapsed_time": "0:01:46", "remaining_time": "0:02:33", "throughput": 1686.29, "total_tokens": 179600} {"current_steps": 1025, "total_steps": 2490, "loss": 0.3403, "lr": 3.6640750705841405e-05, "epoch": 4.116465863453815, "percentage": 41.16, "elapsed_time": "0:01:46", "remaining_time": "0:02:32", "throughput": 1687.99, "total_tokens": 180464} {"current_steps": 1030, "total_steps": 2490, "loss": 0.3243, "lr": 3.6485387645169064e-05, "epoch": 4.136546184738956, "percentage": 41.37, "elapsed_time": "0:01:47", "remaining_time": "0:02:32", "throughput": 1689.71, "total_tokens": 181344} {"current_steps": 1035, "total_steps": 2490, "loss": 0.3965, "lr": 3.632946029765856e-05, "epoch": 4.156626506024097, "percentage": 41.57, "elapsed_time": "0:01:47", "remaining_time": "0:02:31", "throughput": 1690.22, "total_tokens": 182080} {"current_steps": 1040, "total_steps": 2490, "loss": 0.3719, "lr": 3.617297632415273e-05, "epoch": 4.176706827309237, "percentage": 41.77, "elapsed_time": "0:01:48", "remaining_time": "0:02:30", "throughput": 1690.96, "total_tokens": 182848} {"current_steps": 1045, "total_steps": 2490, "loss": 0.3512, "lr": 3.601594341284195e-05, "epoch": 4.196787148594377, "percentage": 41.97, "elapsed_time": "0:01:48", "remaining_time": "0:02:30", "throughput": 1693.61, "total_tokens": 183840} {"current_steps": 1050, "total_steps": 2490, "loss": 0.3388, "lr": 3.5858369278886354e-05, "epoch": 4.216867469879518, "percentage": 42.17, "elapsed_time": "0:01:48", "remaining_time": "0:02:29", "throughput": 1695.41, "total_tokens": 184720} {"current_steps": 1055, "total_steps": 2490, "loss": 0.3457, "lr": 3.5700261664036827e-05, "epoch": 4.236947791164659, "percentage": 42.37, "elapsed_time": "0:01:49", "remaining_time": "0:02:28", "throughput": 1696.3, "total_tokens": 185504} {"current_steps": 1060, "total_steps": 2490, "loss": 0.3588, "lr": 3.55416283362546e-05, "epoch": 4.257028112449799, "percentage": 42.57, "elapsed_time": "0:01:49", "remaining_time": "0:02:28", "throughput": 1697.06, "total_tokens": 186272} {"current_steps": 1065, "total_steps": 2490, "loss": 0.3579, "lr": 3.5382477089329646e-05, "epoch": 4.27710843373494, "percentage": 42.77, "elapsed_time": "0:01:50", "remaining_time": "0:02:27", "throughput": 1699.94, "total_tokens": 187296} {"current_steps": 1070, "total_steps": 2490, "loss": 0.348, "lr": 3.522281574249774e-05, "epoch": 4.2971887550200805, "percentage": 42.97, "elapsed_time": "0:01:50", "remaining_time": "0:02:26", "throughput": 1702.8, "total_tokens": 188320} {"current_steps": 1075, "total_steps": 2490, "loss": 0.3282, "lr": 3.5062652140056275e-05, "epoch": 4.317269076305221, "percentage": 43.17, "elapsed_time": "0:01:51", "remaining_time": "0:02:26", "throughput": 1704.78, "total_tokens": 189248} {"current_steps": 1080, "total_steps": 2490, "loss": 0.3005, "lr": 3.490199415097892e-05, "epoch": 4.337349397590361, "percentage": 43.37, "elapsed_time": "0:01:51", "remaining_time": "0:02:25", "throughput": 1709.04, "total_tokens": 190432} {"current_steps": 1085, "total_steps": 2490, "loss": 0.4539, "lr": 3.474084966852897e-05, "epoch": 4.357429718875502, "percentage": 43.57, "elapsed_time": "0:01:51", "remaining_time": "0:02:24", "throughput": 1710.56, "total_tokens": 191296} {"current_steps": 1090, "total_steps": 2490, "loss": 0.3682, "lr": 3.457922660987155e-05, "epoch": 4.377510040160643, "percentage": 43.78, "elapsed_time": "0:01:52", "remaining_time": "0:02:24", "throughput": 1713.82, "total_tokens": 192368} {"current_steps": 1095, "total_steps": 2490, "loss": 0.3338, "lr": 3.441713291568462e-05, "epoch": 4.397590361445783, "percentage": 43.98, "elapsed_time": "0:01:52", "remaining_time": "0:02:23", "throughput": 1715.19, "total_tokens": 193232} {"current_steps": 1100, "total_steps": 2490, "loss": 0.3587, "lr": 3.42545765497689e-05, "epoch": 4.417670682730924, "percentage": 44.18, "elapsed_time": "0:01:53", "remaining_time": "0:02:22", "throughput": 1716.78, "total_tokens": 194128} {"current_steps": 1105, "total_steps": 2490, "loss": 0.3609, "lr": 3.409156549865654e-05, "epoch": 4.437751004016064, "percentage": 44.38, "elapsed_time": "0:01:53", "remaining_time": "0:02:22", "throughput": 1717.72, "total_tokens": 194944} {"current_steps": 1110, "total_steps": 2490, "loss": 0.3477, "lr": 3.392810777121876e-05, "epoch": 4.457831325301205, "percentage": 44.58, "elapsed_time": "0:01:53", "remaining_time": "0:02:21", "throughput": 1719.44, "total_tokens": 195840} {"current_steps": 1115, "total_steps": 2490, "loss": 0.3871, "lr": 3.376421139827237e-05, "epoch": 4.477911646586345, "percentage": 44.78, "elapsed_time": "0:01:54", "remaining_time": "0:02:20", "throughput": 1720.34, "total_tokens": 196640} {"current_steps": 1120, "total_steps": 2490, "loss": 0.3481, "lr": 3.3599884432185225e-05, "epoch": 4.497991967871486, "percentage": 44.98, "elapsed_time": "0:01:54", "remaining_time": "0:02:20", "throughput": 1721.24, "total_tokens": 197440} {"current_steps": 1125, "total_steps": 2490, "loss": 0.3393, "lr": 3.343513494648055e-05, "epoch": 4.518072289156627, "percentage": 45.18, "elapsed_time": "0:01:55", "remaining_time": "0:02:19", "throughput": 1723.71, "total_tokens": 198432} {"current_steps": 1125, "total_steps": 2490, "eval_loss": 0.3545505702495575, "epoch": 4.518072289156627, "percentage": 45.18, "elapsed_time": "0:01:56", "remaining_time": "0:02:21", "throughput": 1702.58, "total_tokens": 198432} {"current_steps": 1130, "total_steps": 2490, "loss": 0.3349, "lr": 3.326997103544035e-05, "epoch": 4.538152610441767, "percentage": 45.38, "elapsed_time": "0:01:58", "remaining_time": "0:02:22", "throughput": 1686.02, "total_tokens": 199232} {"current_steps": 1135, "total_steps": 2490, "loss": 0.3373, "lr": 3.310440081370767e-05, "epoch": 4.5582329317269075, "percentage": 45.58, "elapsed_time": "0:01:58", "remaining_time": "0:02:21", "throughput": 1687.87, "total_tokens": 200144} {"current_steps": 1140, "total_steps": 2490, "loss": 0.3213, "lr": 3.2938432415887984e-05, "epoch": 4.578313253012048, "percentage": 45.78, "elapsed_time": "0:01:58", "remaining_time": "0:02:20", "throughput": 1688.28, "total_tokens": 200896} {"current_steps": 1145, "total_steps": 2490, "loss": 0.3475, "lr": 3.2772073996149435e-05, "epoch": 4.598393574297189, "percentage": 45.98, "elapsed_time": "0:01:59", "remaining_time": "0:02:20", "throughput": 1689.76, "total_tokens": 201760} {"current_steps": 1150, "total_steps": 2490, "loss": 0.4032, "lr": 3.260533372782234e-05, "epoch": 4.618473895582329, "percentage": 46.18, "elapsed_time": "0:01:59", "remaining_time": "0:02:19", "throughput": 1691.74, "total_tokens": 202688} {"current_steps": 1155, "total_steps": 2490, "loss": 0.3564, "lr": 3.24382198029975e-05, "epoch": 4.63855421686747, "percentage": 46.39, "elapsed_time": "0:02:00", "remaining_time": "0:02:18", "throughput": 1691.79, "total_tokens": 203392} {"current_steps": 1160, "total_steps": 2490, "loss": 0.322, "lr": 3.227074043212383e-05, "epoch": 4.658634538152611, "percentage": 46.59, "elapsed_time": "0:02:00", "remaining_time": "0:02:18", "throughput": 1691.79, "total_tokens": 204080} {"current_steps": 1165, "total_steps": 2490, "loss": 0.373, "lr": 3.2102903843604885e-05, "epoch": 4.678714859437751, "percentage": 46.79, "elapsed_time": "0:02:01", "remaining_time": "0:02:17", "throughput": 1692.2, "total_tokens": 204816} {"current_steps": 1170, "total_steps": 2490, "loss": 0.3587, "lr": 3.1934718283394646e-05, "epoch": 4.698795180722891, "percentage": 46.99, "elapsed_time": "0:02:01", "remaining_time": "0:02:17", "throughput": 1693.15, "total_tokens": 205616} {"current_steps": 1175, "total_steps": 2490, "loss": 0.3571, "lr": 3.1766192014592344e-05, "epoch": 4.718875502008032, "percentage": 47.19, "elapsed_time": "0:02:01", "remaining_time": "0:02:16", "throughput": 1694.88, "total_tokens": 206512} {"current_steps": 1180, "total_steps": 2490, "loss": 0.3507, "lr": 3.1597333317036545e-05, "epoch": 4.738955823293173, "percentage": 47.39, "elapsed_time": "0:02:02", "remaining_time": "0:02:15", "throughput": 1696.71, "total_tokens": 207424} {"current_steps": 1185, "total_steps": 2490, "loss": 0.3575, "lr": 3.142815048689828e-05, "epoch": 4.759036144578313, "percentage": 47.59, "elapsed_time": "0:02:02", "remaining_time": "0:02:15", "throughput": 1699.47, "total_tokens": 208464} {"current_steps": 1190, "total_steps": 2490, "loss": 0.3579, "lr": 3.125865183627354e-05, "epoch": 4.779116465863454, "percentage": 47.79, "elapsed_time": "0:02:03", "remaining_time": "0:02:14", "throughput": 1700.52, "total_tokens": 209280} {"current_steps": 1195, "total_steps": 2490, "loss": 0.3327, "lr": 3.10888456927748e-05, "epoch": 4.7991967871485945, "percentage": 47.99, "elapsed_time": "0:02:03", "remaining_time": "0:02:13", "throughput": 1701.42, "total_tokens": 210080} {"current_steps": 1200, "total_steps": 2490, "loss": 0.3619, "lr": 3.091874039912195e-05, "epoch": 4.8192771084337345, "percentage": 48.19, "elapsed_time": "0:02:03", "remaining_time": "0:02:13", "throughput": 1703.0, "total_tokens": 210960} {"current_steps": 1205, "total_steps": 2490, "loss": 0.3488, "lr": 3.074834431273236e-05, "epoch": 4.839357429718875, "percentage": 48.39, "elapsed_time": "0:02:04", "remaining_time": "0:02:12", "throughput": 1704.04, "total_tokens": 211776} {"current_steps": 1210, "total_steps": 2490, "loss": 0.3542, "lr": 3.057766580531031e-05, "epoch": 4.859437751004016, "percentage": 48.59, "elapsed_time": "0:02:04", "remaining_time": "0:02:11", "throughput": 1704.93, "total_tokens": 212576} {"current_steps": 1215, "total_steps": 2490, "loss": 0.3362, "lr": 3.0406713262435656e-05, "epoch": 4.879518072289157, "percentage": 48.8, "elapsed_time": "0:02:05", "remaining_time": "0:02:11", "throughput": 1705.68, "total_tokens": 213360} {"current_steps": 1220, "total_steps": 2490, "loss": 0.3814, "lr": 3.0235495083151844e-05, "epoch": 4.899598393574297, "percentage": 49.0, "elapsed_time": "0:02:05", "remaining_time": "0:02:10", "throughput": 1707.71, "total_tokens": 214304} {"current_steps": 1225, "total_steps": 2490, "loss": 0.3492, "lr": 3.0064019679553274e-05, "epoch": 4.919678714859438, "percentage": 49.2, "elapsed_time": "0:02:05", "remaining_time": "0:02:10", "throughput": 1708.34, "total_tokens": 215072} {"current_steps": 1230, "total_steps": 2490, "loss": 0.3542, "lr": 2.9892295476371988e-05, "epoch": 4.9397590361445785, "percentage": 49.4, "elapsed_time": "0:02:06", "remaining_time": "0:02:09", "throughput": 1709.46, "total_tokens": 215904} {"current_steps": 1235, "total_steps": 2490, "loss": 0.3543, "lr": 2.9720330910563772e-05, "epoch": 4.959839357429718, "percentage": 49.6, "elapsed_time": "0:02:06", "remaining_time": "0:02:08", "throughput": 1711.57, "total_tokens": 216864} {"current_steps": 1240, "total_steps": 2490, "loss": 0.3387, "lr": 2.9548134430893604e-05, "epoch": 4.979919678714859, "percentage": 49.8, "elapsed_time": "0:02:07", "remaining_time": "0:02:08", "throughput": 1713.89, "total_tokens": 217856} {"current_steps": 1245, "total_steps": 2490, "loss": 0.339, "lr": 2.9375714497520623e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:02:07", "remaining_time": "0:02:07", "throughput": 1714.94, "total_tokens": 218864} {"current_steps": 1250, "total_steps": 2490, "loss": 0.3601, "lr": 2.920307958158241e-05, "epoch": 5.020080321285141, "percentage": 50.2, "elapsed_time": "0:02:08", "remaining_time": "0:02:07", "throughput": 1713.76, "total_tokens": 219680} {"current_steps": 1250, "total_steps": 2490, "eval_loss": 0.3591695725917816, "epoch": 5.020080321285141, "percentage": 50.2, "elapsed_time": "0:02:09", "remaining_time": "0:02:08", "throughput": 1697.6, "total_tokens": 219680} {"current_steps": 1255, "total_steps": 2490, "loss": 0.3239, "lr": 2.903023816477885e-05, "epoch": 5.040160642570281, "percentage": 50.4, "elapsed_time": "0:02:11", "remaining_time": "0:02:08", "throughput": 1683.07, "total_tokens": 220560} {"current_steps": 1260, "total_steps": 2490, "loss": 0.3419, "lr": 2.885719873895536e-05, "epoch": 5.0602409638554215, "percentage": 50.6, "elapsed_time": "0:02:11", "remaining_time": "0:02:08", "throughput": 1684.46, "total_tokens": 221440} {"current_steps": 1265, "total_steps": 2490, "loss": 0.348, "lr": 2.868396980568572e-05, "epoch": 5.080321285140562, "percentage": 50.8, "elapsed_time": "0:02:11", "remaining_time": "0:02:07", "throughput": 1685.79, "total_tokens": 222304} {"current_steps": 1270, "total_steps": 2490, "loss": 0.2762, "lr": 2.8510559875854377e-05, "epoch": 5.100401606425703, "percentage": 51.0, "elapsed_time": "0:02:12", "remaining_time": "0:02:07", "throughput": 1687.69, "total_tokens": 223248} {"current_steps": 1275, "total_steps": 2490, "loss": 0.2662, "lr": 2.833697746923829e-05, "epoch": 5.120481927710843, "percentage": 51.2, "elapsed_time": "0:02:12", "remaining_time": "0:02:06", "throughput": 1688.21, "total_tokens": 224000} {"current_steps": 1280, "total_steps": 2490, "loss": 0.3421, "lr": 2.816323111408835e-05, "epoch": 5.140562248995984, "percentage": 51.41, "elapsed_time": "0:02:13", "remaining_time": "0:02:05", "throughput": 1689.69, "total_tokens": 224880} {"current_steps": 1285, "total_steps": 2490, "loss": 0.4232, "lr": 2.7989329346710375e-05, "epoch": 5.160642570281125, "percentage": 51.61, "elapsed_time": "0:02:13", "remaining_time": "0:02:05", "throughput": 1691.26, "total_tokens": 225776} {"current_steps": 1290, "total_steps": 2490, "loss": 0.3838, "lr": 2.7815280711045717e-05, "epoch": 5.180722891566265, "percentage": 51.81, "elapsed_time": "0:02:13", "remaining_time": "0:02:04", "throughput": 1692.14, "total_tokens": 226576} {"current_steps": 1295, "total_steps": 2490, "loss": 0.3104, "lr": 2.7641093758251497e-05, "epoch": 5.2008032128514055, "percentage": 52.01, "elapsed_time": "0:02:14", "remaining_time": "0:02:03", "throughput": 1692.88, "total_tokens": 227360} {"current_steps": 1300, "total_steps": 2490, "loss": 0.3105, "lr": 2.7466777046280457e-05, "epoch": 5.220883534136546, "percentage": 52.21, "elapsed_time": "0:02:14", "remaining_time": "0:02:03", "throughput": 1693.3, "total_tokens": 228112} {"current_steps": 1305, "total_steps": 2490, "loss": 0.3474, "lr": 2.7292339139460556e-05, "epoch": 5.240963855421687, "percentage": 52.41, "elapsed_time": "0:02:15", "remaining_time": "0:02:02", "throughput": 1694.75, "total_tokens": 228992} {"current_steps": 1310, "total_steps": 2490, "loss": 0.3076, "lr": 2.71177886080741e-05, "epoch": 5.261044176706827, "percentage": 52.61, "elapsed_time": "0:02:15", "remaining_time": "0:02:02", "throughput": 1696.12, "total_tokens": 229872} {"current_steps": 1315, "total_steps": 2490, "loss": 0.4231, "lr": 2.69431340279368e-05, "epoch": 5.281124497991968, "percentage": 52.81, "elapsed_time": "0:02:15", "remaining_time": "0:02:01", "throughput": 1697.33, "total_tokens": 230720} {"current_steps": 1320, "total_steps": 2490, "loss": 0.3725, "lr": 2.676838397997633e-05, "epoch": 5.301204819277109, "percentage": 53.01, "elapsed_time": "0:02:16", "remaining_time": "0:02:00", "throughput": 1698.51, "total_tokens": 231568} {"current_steps": 1325, "total_steps": 2490, "loss": 0.3237, "lr": 2.659354704981078e-05, "epoch": 5.321285140562249, "percentage": 53.21, "elapsed_time": "0:02:16", "remaining_time": "0:02:00", "throughput": 1699.34, "total_tokens": 232368} {"current_steps": 1330, "total_steps": 2490, "loss": 0.3534, "lr": 2.6418631827326857e-05, "epoch": 5.341365461847389, "percentage": 53.41, "elapsed_time": "0:02:17", "remaining_time": "0:01:59", "throughput": 1700.28, "total_tokens": 233184} {"current_steps": 1335, "total_steps": 2490, "loss": 0.338, "lr": 2.6243646906257806e-05, "epoch": 5.36144578313253, "percentage": 53.61, "elapsed_time": "0:02:17", "remaining_time": "0:01:59", "throughput": 1701.07, "total_tokens": 233984} {"current_steps": 1340, "total_steps": 2490, "loss": 0.3687, "lr": 2.606860088376126e-05, "epoch": 5.381526104417671, "percentage": 53.82, "elapsed_time": "0:02:17", "remaining_time": "0:01:58", "throughput": 1702.35, "total_tokens": 234848} {"current_steps": 1345, "total_steps": 2490, "loss": 0.3449, "lr": 2.5893502359996786e-05, "epoch": 5.401606425702811, "percentage": 54.02, "elapsed_time": "0:02:18", "remaining_time": "0:01:57", "throughput": 1703.93, "total_tokens": 235760} {"current_steps": 1350, "total_steps": 2490, "loss": 0.3504, "lr": 2.5718359937703408e-05, "epoch": 5.421686746987952, "percentage": 54.22, "elapsed_time": "0:02:18", "remaining_time": "0:01:57", "throughput": 1705.31, "total_tokens": 236640} {"current_steps": 1355, "total_steps": 2490, "loss": 0.3538, "lr": 2.554318222177689e-05, "epoch": 5.4417670682730925, "percentage": 54.42, "elapsed_time": "0:02:19", "remaining_time": "0:01:56", "throughput": 1707.35, "total_tokens": 237616} {"current_steps": 1360, "total_steps": 2490, "loss": 0.3354, "lr": 2.5367977818847034e-05, "epoch": 5.461847389558233, "percentage": 54.62, "elapsed_time": "0:02:19", "remaining_time": "0:01:55", "throughput": 1708.91, "total_tokens": 238528} {"current_steps": 1365, "total_steps": 2490, "loss": 0.3354, "lr": 2.519275533685477e-05, "epoch": 5.481927710843373, "percentage": 54.82, "elapsed_time": "0:02:19", "remaining_time": "0:01:55", "throughput": 1710.38, "total_tokens": 239424} {"current_steps": 1370, "total_steps": 2490, "loss": 0.354, "lr": 2.5017523384629298e-05, "epoch": 5.502008032128514, "percentage": 55.02, "elapsed_time": "0:02:20", "remaining_time": "0:01:54", "throughput": 1711.5, "total_tokens": 240272} {"current_steps": 1375, "total_steps": 2490, "loss": 0.3422, "lr": 2.484229057146507e-05, "epoch": 5.522088353413655, "percentage": 55.22, "elapsed_time": "0:02:20", "remaining_time": "0:01:54", "throughput": 1712.73, "total_tokens": 241136} {"current_steps": 1375, "total_steps": 2490, "eval_loss": 0.35063984990119934, "epoch": 5.522088353413655, "percentage": 55.22, "elapsed_time": "0:02:22", "remaining_time": "0:01:55", "throughput": 1695.54, "total_tokens": 241136} {"current_steps": 1380, "total_steps": 2490, "loss": 0.3574, "lr": 2.466706550669886e-05, "epoch": 5.542168674698795, "percentage": 55.42, "elapsed_time": "0:02:23", "remaining_time": "0:01:55", "throughput": 1681.23, "total_tokens": 241936} {"current_steps": 1385, "total_steps": 2490, "loss": 0.3748, "lr": 2.449185679928672e-05, "epoch": 5.562248995983936, "percentage": 55.62, "elapsed_time": "0:02:24", "remaining_time": "0:01:55", "throughput": 1681.52, "total_tokens": 242672} {"current_steps": 1390, "total_steps": 2490, "loss": 0.3507, "lr": 2.431667305738112e-05, "epoch": 5.582329317269076, "percentage": 55.82, "elapsed_time": "0:02:24", "remaining_time": "0:01:54", "throughput": 1684.37, "total_tokens": 243808} {"current_steps": 1395, "total_steps": 2490, "loss": 0.3506, "lr": 2.414152288790787e-05, "epoch": 5.602409638554217, "percentage": 56.02, "elapsed_time": "0:02:25", "remaining_time": "0:01:53", "throughput": 1685.65, "total_tokens": 244688} {"current_steps": 1400, "total_steps": 2490, "loss": 0.3386, "lr": 2.3966414896143385e-05, "epoch": 5.622489959839357, "percentage": 56.22, "elapsed_time": "0:02:25", "remaining_time": "0:01:53", "throughput": 1687.74, "total_tokens": 245696} {"current_steps": 1405, "total_steps": 2490, "loss": 0.3298, "lr": 2.3791357685291863e-05, "epoch": 5.642570281124498, "percentage": 56.43, "elapsed_time": "0:02:25", "remaining_time": "0:01:52", "throughput": 1688.88, "total_tokens": 246544} {"current_steps": 1410, "total_steps": 2490, "loss": 0.3413, "lr": 2.361635985606256e-05, "epoch": 5.662650602409639, "percentage": 56.63, "elapsed_time": "0:02:26", "remaining_time": "0:01:52", "throughput": 1692.18, "total_tokens": 247744} {"current_steps": 1415, "total_steps": 2490, "loss": 0.3623, "lr": 2.344143000624729e-05, "epoch": 5.682730923694779, "percentage": 56.83, "elapsed_time": "0:02:26", "remaining_time": "0:01:51", "throughput": 1692.55, "total_tokens": 248480} {"current_steps": 1420, "total_steps": 2490, "loss": 0.3284, "lr": 2.3266576730297956e-05, "epoch": 5.7028112449799195, "percentage": 57.03, "elapsed_time": "0:02:27", "remaining_time": "0:01:50", "throughput": 1693.59, "total_tokens": 249312} {"current_steps": 1425, "total_steps": 2490, "loss": 0.3679, "lr": 2.3091808618904352e-05, "epoch": 5.72289156626506, "percentage": 57.23, "elapsed_time": "0:02:27", "remaining_time": "0:01:50", "throughput": 1695.67, "total_tokens": 250304} {"current_steps": 1430, "total_steps": 2490, "loss": 0.3506, "lr": 2.2917134258572038e-05, "epoch": 5.742971887550201, "percentage": 57.43, "elapsed_time": "0:02:28", "remaining_time": "0:01:49", "throughput": 1697.22, "total_tokens": 251216} {"current_steps": 1435, "total_steps": 2490, "loss": 0.3512, "lr": 2.274256223120051e-05, "epoch": 5.763052208835341, "percentage": 57.63, "elapsed_time": "0:02:28", "remaining_time": "0:01:49", "throughput": 1697.59, "total_tokens": 251952} {"current_steps": 1440, "total_steps": 2490, "loss": 0.3292, "lr": 2.2568101113661577e-05, "epoch": 5.783132530120482, "percentage": 57.83, "elapsed_time": "0:02:28", "remaining_time": "0:01:48", "throughput": 1700.37, "total_tokens": 253072} {"current_steps": 1445, "total_steps": 2490, "loss": 0.3499, "lr": 2.239375947737793e-05, "epoch": 5.803212851405623, "percentage": 58.03, "elapsed_time": "0:02:29", "remaining_time": "0:01:47", "throughput": 1700.93, "total_tokens": 253840} {"current_steps": 1450, "total_steps": 2490, "loss": 0.3647, "lr": 2.221954588790206e-05, "epoch": 5.823293172690763, "percentage": 58.23, "elapsed_time": "0:02:29", "remaining_time": "0:01:47", "throughput": 1701.7, "total_tokens": 254640} {"current_steps": 1455, "total_steps": 2490, "loss": 0.3518, "lr": 2.2045468904495415e-05, "epoch": 5.843373493975903, "percentage": 58.43, "elapsed_time": "0:02:30", "remaining_time": "0:01:46", "throughput": 1702.57, "total_tokens": 255456} {"current_steps": 1460, "total_steps": 2490, "loss": 0.354, "lr": 2.1871537079707833e-05, "epoch": 5.863453815261044, "percentage": 58.63, "elapsed_time": "0:02:30", "remaining_time": "0:01:46", "throughput": 1703.6, "total_tokens": 256304} {"current_steps": 1465, "total_steps": 2490, "loss": 0.3385, "lr": 2.1697758958957448e-05, "epoch": 5.883534136546185, "percentage": 58.84, "elapsed_time": "0:02:30", "remaining_time": "0:01:45", "throughput": 1704.34, "total_tokens": 257104} {"current_steps": 1470, "total_steps": 2490, "loss": 0.3532, "lr": 2.1524143080110716e-05, "epoch": 5.903614457831325, "percentage": 59.04, "elapsed_time": "0:02:31", "remaining_time": "0:01:44", "throughput": 1706.14, "total_tokens": 258080} {"current_steps": 1475, "total_steps": 2490, "loss": 0.3701, "lr": 2.135069797306308e-05, "epoch": 5.923694779116466, "percentage": 59.24, "elapsed_time": "0:02:31", "remaining_time": "0:01:44", "throughput": 1708.0, "total_tokens": 259056} {"current_steps": 1480, "total_steps": 2490, "loss": 0.3721, "lr": 2.1177432159319754e-05, "epoch": 5.943775100401607, "percentage": 59.44, "elapsed_time": "0:02:32", "remaining_time": "0:01:43", "throughput": 1709.55, "total_tokens": 260000} {"current_steps": 1485, "total_steps": 2490, "loss": 0.3517, "lr": 2.100435415157718e-05, "epoch": 5.9638554216867465, "percentage": 59.64, "elapsed_time": "0:02:32", "remaining_time": "0:01:43", "throughput": 1710.06, "total_tokens": 260768} {"current_steps": 1490, "total_steps": 2490, "loss": 0.3572, "lr": 2.083147245330468e-05, "epoch": 5.983935742971887, "percentage": 59.84, "elapsed_time": "0:02:32", "remaining_time": "0:01:42", "throughput": 1712.12, "total_tokens": 261760} {"current_steps": 1495, "total_steps": 2490, "loss": 0.3476, "lr": 2.0658795558326743e-05, "epoch": 6.004016064257028, "percentage": 60.04, "elapsed_time": "0:02:33", "remaining_time": "0:01:42", "throughput": 1711.05, "total_tokens": 262752} {"current_steps": 1500, "total_steps": 2490, "loss": 0.3609, "lr": 2.048633195040572e-05, "epoch": 6.024096385542169, "percentage": 60.24, "elapsed_time": "0:02:33", "remaining_time": "0:01:41", "throughput": 1712.16, "total_tokens": 263616} {"current_steps": 1500, "total_steps": 2490, "eval_loss": 0.3502369225025177, "epoch": 6.024096385542169, "percentage": 60.24, "elapsed_time": "0:02:35", "remaining_time": "0:01:42", "throughput": 1698.78, "total_tokens": 263616} {"current_steps": 1505, "total_steps": 2490, "loss": 0.3669, "lr": 2.0314090102824963e-05, "epoch": 6.044176706827309, "percentage": 60.44, "elapsed_time": "0:02:36", "remaining_time": "0:01:42", "throughput": 1686.05, "total_tokens": 264432} {"current_steps": 1510, "total_steps": 2490, "loss": 0.3542, "lr": 2.014207847797256e-05, "epoch": 6.06425702811245, "percentage": 60.64, "elapsed_time": "0:02:37", "remaining_time": "0:01:42", "throughput": 1686.38, "total_tokens": 265184} {"current_steps": 1515, "total_steps": 2490, "loss": 0.3509, "lr": 1.997030552692556e-05, "epoch": 6.0843373493975905, "percentage": 60.84, "elapsed_time": "0:02:37", "remaining_time": "0:01:41", "throughput": 1687.48, "total_tokens": 266064} {"current_steps": 1520, "total_steps": 2490, "loss": 0.3483, "lr": 1.9798779689034757e-05, "epoch": 6.104417670682731, "percentage": 61.04, "elapsed_time": "0:02:38", "remaining_time": "0:01:40", "throughput": 1688.48, "total_tokens": 266928} {"current_steps": 1525, "total_steps": 2490, "loss": 0.3542, "lr": 1.9627509391510086e-05, "epoch": 6.124497991967871, "percentage": 61.24, "elapsed_time": "0:02:38", "remaining_time": "0:01:40", "throughput": 1689.84, "total_tokens": 267824} {"current_steps": 1530, "total_steps": 2490, "loss": 0.3479, "lr": 1.9456503049006542e-05, "epoch": 6.144578313253012, "percentage": 61.45, "elapsed_time": "0:02:38", "remaining_time": "0:01:39", "throughput": 1690.46, "total_tokens": 268608} {"current_steps": 1535, "total_steps": 2490, "loss": 0.3477, "lr": 1.9285769063210812e-05, "epoch": 6.164658634538153, "percentage": 61.65, "elapsed_time": "0:02:39", "remaining_time": "0:01:39", "throughput": 1692.82, "total_tokens": 269696} {"current_steps": 1540, "total_steps": 2490, "loss": 0.351, "lr": 1.9115315822428437e-05, "epoch": 6.184738955823293, "percentage": 61.85, "elapsed_time": "0:02:39", "remaining_time": "0:01:38", "throughput": 1694.76, "total_tokens": 270704} {"current_steps": 1545, "total_steps": 2490, "loss": 0.3447, "lr": 1.8945151701171755e-05, "epoch": 6.204819277108434, "percentage": 62.05, "elapsed_time": "0:02:40", "remaining_time": "0:01:37", "throughput": 1695.88, "total_tokens": 271568} {"current_steps": 1550, "total_steps": 2490, "loss": 0.3386, "lr": 1.877528505974838e-05, "epoch": 6.224899598393574, "percentage": 62.25, "elapsed_time": "0:02:40", "remaining_time": "0:01:37", "throughput": 1696.2, "total_tokens": 272304} {"current_steps": 1555, "total_steps": 2490, "loss": 0.3302, "lr": 1.8605724243850502e-05, "epoch": 6.244979919678715, "percentage": 62.45, "elapsed_time": "0:02:40", "remaining_time": "0:01:36", "throughput": 1697.22, "total_tokens": 273152} {"current_steps": 1560, "total_steps": 2490, "loss": 0.3962, "lr": 1.8436477584144863e-05, "epoch": 6.265060240963855, "percentage": 62.65, "elapsed_time": "0:02:41", "remaining_time": "0:01:36", "throughput": 1698.91, "total_tokens": 274112} {"current_steps": 1565, "total_steps": 2490, "loss": 0.3337, "lr": 1.826755339586341e-05, "epoch": 6.285140562248996, "percentage": 62.85, "elapsed_time": "0:02:41", "remaining_time": "0:01:35", "throughput": 1699.82, "total_tokens": 274944} {"current_steps": 1570, "total_steps": 2490, "loss": 0.3484, "lr": 1.809895997839482e-05, "epoch": 6.305220883534137, "percentage": 63.05, "elapsed_time": "0:02:42", "remaining_time": "0:01:35", "throughput": 1700.32, "total_tokens": 275712} {"current_steps": 1575, "total_steps": 2490, "loss": 0.3391, "lr": 1.793070561487672e-05, "epoch": 6.325301204819277, "percentage": 63.25, "elapsed_time": "0:02:42", "remaining_time": "0:01:34", "throughput": 1701.31, "total_tokens": 276560} {"current_steps": 1580, "total_steps": 2490, "loss": 0.3948, "lr": 1.7762798571788707e-05, "epoch": 6.3453815261044175, "percentage": 63.45, "elapsed_time": "0:02:42", "remaining_time": "0:01:33", "throughput": 1702.62, "total_tokens": 277456} {"current_steps": 1585, "total_steps": 2490, "loss": 0.3246, "lr": 1.759524709854626e-05, "epoch": 6.365461847389558, "percentage": 63.65, "elapsed_time": "0:02:43", "remaining_time": "0:01:33", "throughput": 1703.89, "total_tokens": 278352} {"current_steps": 1590, "total_steps": 2490, "loss": 0.3468, "lr": 1.742805942709538e-05, "epoch": 6.385542168674699, "percentage": 63.86, "elapsed_time": "0:02:43", "remaining_time": "0:01:32", "throughput": 1705.27, "total_tokens": 279264} {"current_steps": 1595, "total_steps": 2490, "loss": 0.3428, "lr": 1.7261243771508208e-05, "epoch": 6.405622489959839, "percentage": 64.06, "elapsed_time": "0:02:44", "remaining_time": "0:01:32", "throughput": 1706.44, "total_tokens": 280144} {"current_steps": 1600, "total_steps": 2490, "loss": 0.3439, "lr": 1.70948083275794e-05, "epoch": 6.42570281124498, "percentage": 64.26, "elapsed_time": "0:02:44", "remaining_time": "0:01:31", "throughput": 1707.52, "total_tokens": 281008} {"current_steps": 1605, "total_steps": 2490, "loss": 0.3717, "lr": 1.6928761272423522e-05, "epoch": 6.445783132530121, "percentage": 64.46, "elapsed_time": "0:02:44", "remaining_time": "0:01:30", "throughput": 1708.09, "total_tokens": 281792} {"current_steps": 1610, "total_steps": 2490, "loss": 0.3517, "lr": 1.6763110764073235e-05, "epoch": 6.4658634538152615, "percentage": 64.66, "elapsed_time": "0:02:45", "remaining_time": "0:01:30", "throughput": 1708.54, "total_tokens": 282560} {"current_steps": 1615, "total_steps": 2490, "loss": 0.3423, "lr": 1.6597864941078552e-05, "epoch": 6.485943775100401, "percentage": 64.86, "elapsed_time": "0:02:45", "remaining_time": "0:01:29", "throughput": 1709.65, "total_tokens": 283440} {"current_steps": 1620, "total_steps": 2490, "loss": 0.358, "lr": 1.643303192210693e-05, "epoch": 6.506024096385542, "percentage": 65.06, "elapsed_time": "0:02:46", "remaining_time": "0:01:29", "throughput": 1712.21, "total_tokens": 284592} {"current_steps": 1625, "total_steps": 2490, "loss": 0.3457, "lr": 1.626861980554441e-05, "epoch": 6.526104417670683, "percentage": 65.26, "elapsed_time": "0:02:46", "remaining_time": "0:01:28", "throughput": 1713.04, "total_tokens": 285424} {"current_steps": 1625, "total_steps": 2490, "eval_loss": 0.3553968071937561, "epoch": 6.526104417670683, "percentage": 65.26, "elapsed_time": "0:02:47", "remaining_time": "0:01:29", "throughput": 1700.62, "total_tokens": 285424} {"current_steps": 1630, "total_steps": 2490, "loss": 0.3518, "lr": 1.6104636669097776e-05, "epoch": 6.546184738955823, "percentage": 65.46, "elapsed_time": "0:02:49", "remaining_time": "0:01:29", "throughput": 1689.52, "total_tokens": 286272} {"current_steps": 1635, "total_steps": 2490, "loss": 0.3512, "lr": 1.5941090569397616e-05, "epoch": 6.566265060240964, "percentage": 65.66, "elapsed_time": "0:02:49", "remaining_time": "0:01:28", "throughput": 1690.91, "total_tokens": 287200} {"current_steps": 1640, "total_steps": 2490, "loss": 0.348, "lr": 1.5777989541602533e-05, "epoch": 6.586345381526105, "percentage": 65.86, "elapsed_time": "0:02:50", "remaining_time": "0:01:28", "throughput": 1692.72, "total_tokens": 288224} {"current_steps": 1645, "total_steps": 2490, "loss": 0.3353, "lr": 1.561534159900441e-05, "epoch": 6.606425702811245, "percentage": 66.06, "elapsed_time": "0:02:50", "remaining_time": "0:01:27", "throughput": 1694.5, "total_tokens": 289216} {"current_steps": 1650, "total_steps": 2490, "loss": 0.3476, "lr": 1.5453154732634616e-05, "epoch": 6.626506024096385, "percentage": 66.27, "elapsed_time": "0:02:51", "remaining_time": "0:01:27", "throughput": 1695.5, "total_tokens": 290080} {"current_steps": 1655, "total_steps": 2490, "loss": 0.351, "lr": 1.52914369108715e-05, "epoch": 6.646586345381526, "percentage": 66.47, "elapsed_time": "0:02:51", "remaining_time": "0:01:26", "throughput": 1696.17, "total_tokens": 290880} {"current_steps": 1660, "total_steps": 2490, "loss": 0.3607, "lr": 1.513019607904882e-05, "epoch": 6.666666666666667, "percentage": 66.67, "elapsed_time": "0:02:51", "remaining_time": "0:01:25", "throughput": 1697.12, "total_tokens": 291728} {"current_steps": 1665, "total_steps": 2490, "loss": 0.3573, "lr": 1.4969440159065439e-05, "epoch": 6.686746987951807, "percentage": 66.87, "elapsed_time": "0:02:52", "remaining_time": "0:01:25", "throughput": 1698.35, "total_tokens": 292624} {"current_steps": 1670, "total_steps": 2490, "loss": 0.3476, "lr": 1.4809177048996064e-05, "epoch": 6.706827309236948, "percentage": 67.07, "elapsed_time": "0:02:52", "remaining_time": "0:01:24", "throughput": 1699.41, "total_tokens": 293488} {"current_steps": 1675, "total_steps": 2490, "loss": 0.3477, "lr": 1.464941462270325e-05, "epoch": 6.7269076305220885, "percentage": 67.27, "elapsed_time": "0:02:53", "remaining_time": "0:01:24", "throughput": 1700.69, "total_tokens": 294400} {"current_steps": 1680, "total_steps": 2490, "loss": 0.357, "lr": 1.449016072945053e-05, "epoch": 6.746987951807229, "percentage": 67.47, "elapsed_time": "0:02:53", "remaining_time": "0:01:23", "throughput": 1701.21, "total_tokens": 295184} {"current_steps": 1685, "total_steps": 2490, "loss": 0.3575, "lr": 1.4331423193516768e-05, "epoch": 6.767068273092369, "percentage": 67.67, "elapsed_time": "0:02:53", "remaining_time": "0:01:23", "throughput": 1702.92, "total_tokens": 296176} {"current_steps": 1690, "total_steps": 2490, "loss": 0.3358, "lr": 1.4173209813811788e-05, "epoch": 6.78714859437751, "percentage": 67.87, "elapsed_time": "0:02:54", "remaining_time": "0:01:22", "throughput": 1704.11, "total_tokens": 297072} {"current_steps": 1695, "total_steps": 2490, "loss": 0.3491, "lr": 1.4015528363493125e-05, "epoch": 6.807228915662651, "percentage": 68.07, "elapsed_time": "0:02:54", "remaining_time": "0:01:21", "throughput": 1704.65, "total_tokens": 297856} {"current_steps": 1700, "total_steps": 2490, "loss": 0.3253, "lr": 1.3858386589584187e-05, "epoch": 6.827309236947791, "percentage": 68.27, "elapsed_time": "0:02:55", "remaining_time": "0:01:21", "throughput": 1706.61, "total_tokens": 298896} {"current_steps": 1705, "total_steps": 2490, "loss": 0.3302, "lr": 1.3701792212593662e-05, "epoch": 6.847389558232932, "percentage": 68.47, "elapsed_time": "0:02:55", "remaining_time": "0:01:20", "throughput": 1707.34, "total_tokens": 299712} {"current_steps": 1710, "total_steps": 2490, "loss": 0.3882, "lr": 1.354575292613611e-05, "epoch": 6.867469879518072, "percentage": 68.67, "elapsed_time": "0:02:55", "remaining_time": "0:01:20", "throughput": 1709.03, "total_tokens": 300720} {"current_steps": 1715, "total_steps": 2490, "loss": 0.3658, "lr": 1.3390276396554052e-05, "epoch": 6.887550200803213, "percentage": 68.88, "elapsed_time": "0:02:56", "remaining_time": "0:01:19", "throughput": 1709.84, "total_tokens": 301552} {"current_steps": 1720, "total_steps": 2490, "loss": 0.3388, "lr": 1.3235370262541272e-05, "epoch": 6.907630522088353, "percentage": 69.08, "elapsed_time": "0:02:56", "remaining_time": "0:01:19", "throughput": 1710.45, "total_tokens": 302352} {"current_steps": 1725, "total_steps": 2490, "loss": 0.3335, "lr": 1.3081042134767554e-05, "epoch": 6.927710843373494, "percentage": 69.28, "elapsed_time": "0:02:57", "remaining_time": "0:01:18", "throughput": 1711.49, "total_tokens": 303232} {"current_steps": 1730, "total_steps": 2490, "loss": 0.3262, "lr": 1.292729959550473e-05, "epoch": 6.947791164658635, "percentage": 69.48, "elapsed_time": "0:02:57", "remaining_time": "0:01:18", "throughput": 1712.01, "total_tokens": 304016} {"current_steps": 1735, "total_steps": 2490, "loss": 0.3396, "lr": 1.277415019825417e-05, "epoch": 6.967871485943775, "percentage": 69.68, "elapsed_time": "0:02:57", "remaining_time": "0:01:17", "throughput": 1713.32, "total_tokens": 304944} {"current_steps": 1740, "total_steps": 2490, "loss": 0.3422, "lr": 1.2621601467375684e-05, "epoch": 6.9879518072289155, "percentage": 69.88, "elapsed_time": "0:02:58", "remaining_time": "0:01:16", "throughput": 1715.25, "total_tokens": 305984} {"current_steps": 1745, "total_steps": 2490, "loss": 0.3182, "lr": 1.2469660897717816e-05, "epoch": 7.008032128514056, "percentage": 70.08, "elapsed_time": "0:02:59", "remaining_time": "0:01:16", "throughput": 1714.44, "total_tokens": 306992} {"current_steps": 1750, "total_steps": 2490, "loss": 0.315, "lr": 1.2318335954249669e-05, "epoch": 7.028112449799197, "percentage": 70.28, "elapsed_time": "0:02:59", "remaining_time": "0:01:15", "throughput": 1715.02, "total_tokens": 307792} {"current_steps": 1750, "total_steps": 2490, "eval_loss": 0.36506387591362, "epoch": 7.028112449799197, "percentage": 70.28, "elapsed_time": "0:03:00", "remaining_time": "0:01:16", "throughput": 1703.44, "total_tokens": 307792} {"current_steps": 1755, "total_steps": 2490, "loss": 0.3174, "lr": 1.2167634071694081e-05, "epoch": 7.048192771084337, "percentage": 70.48, "elapsed_time": "0:03:02", "remaining_time": "0:01:16", "throughput": 1692.21, "total_tokens": 308624} {"current_steps": 1760, "total_steps": 2490, "loss": 0.2887, "lr": 1.2017562654162357e-05, "epoch": 7.068273092369478, "percentage": 70.68, "elapsed_time": "0:03:02", "remaining_time": "0:01:15", "throughput": 1694.07, "total_tokens": 309680} {"current_steps": 1765, "total_steps": 2490, "loss": 0.3394, "lr": 1.1868129074790577e-05, "epoch": 7.088353413654619, "percentage": 70.88, "elapsed_time": "0:03:03", "remaining_time": "0:01:15", "throughput": 1695.0, "total_tokens": 310544} {"current_steps": 1770, "total_steps": 2490, "loss": 0.3113, "lr": 1.1719340675377252e-05, "epoch": 7.108433734939759, "percentage": 71.08, "elapsed_time": "0:03:03", "remaining_time": "0:01:14", "throughput": 1696.73, "total_tokens": 311568} {"current_steps": 1775, "total_steps": 2490, "loss": 0.4907, "lr": 1.1571204766022665e-05, "epoch": 7.128514056224899, "percentage": 71.29, "elapsed_time": "0:03:04", "remaining_time": "0:01:14", "throughput": 1697.67, "total_tokens": 312432} {"current_steps": 1780, "total_steps": 2490, "loss": 0.3627, "lr": 1.1423728624769695e-05, "epoch": 7.14859437751004, "percentage": 71.49, "elapsed_time": "0:03:04", "remaining_time": "0:01:13", "throughput": 1697.94, "total_tokens": 313168} {"current_steps": 1785, "total_steps": 2490, "loss": 0.3648, "lr": 1.1276919497246288e-05, "epoch": 7.168674698795181, "percentage": 71.69, "elapsed_time": "0:03:04", "remaining_time": "0:01:13", "throughput": 1698.54, "total_tokens": 313968} {"current_steps": 1790, "total_steps": 2490, "loss": 0.3585, "lr": 1.1130784596309409e-05, "epoch": 7.188755020080321, "percentage": 71.89, "elapsed_time": "0:03:05", "remaining_time": "0:01:12", "throughput": 1699.0, "total_tokens": 314736} {"current_steps": 1795, "total_steps": 2490, "loss": 0.3485, "lr": 1.098533110169071e-05, "epoch": 7.208835341365462, "percentage": 72.09, "elapsed_time": "0:03:05", "remaining_time": "0:01:11", "throughput": 1700.18, "total_tokens": 315664} {"current_steps": 1800, "total_steps": 2490, "loss": 0.3442, "lr": 1.084056615964377e-05, "epoch": 7.228915662650603, "percentage": 72.29, "elapsed_time": "0:03:06", "remaining_time": "0:01:11", "throughput": 1702.07, "total_tokens": 316704} {"current_steps": 1805, "total_steps": 2490, "loss": 0.388, "lr": 1.069649688259299e-05, "epoch": 7.2489959839357425, "percentage": 72.49, "elapsed_time": "0:03:06", "remaining_time": "0:01:10", "throughput": 1702.78, "total_tokens": 317520} {"current_steps": 1810, "total_steps": 2490, "loss": 0.3306, "lr": 1.0553130348784182e-05, "epoch": 7.269076305220883, "percentage": 72.69, "elapsed_time": "0:03:06", "remaining_time": "0:01:10", "throughput": 1704.24, "total_tokens": 318496} {"current_steps": 1815, "total_steps": 2490, "loss": 0.3181, "lr": 1.0410473601936765e-05, "epoch": 7.289156626506024, "percentage": 72.89, "elapsed_time": "0:03:07", "remaining_time": "0:01:09", "throughput": 1705.1, "total_tokens": 319344} {"current_steps": 1820, "total_steps": 2490, "loss": 0.3494, "lr": 1.026853365089773e-05, "epoch": 7.309236947791165, "percentage": 73.09, "elapsed_time": "0:03:07", "remaining_time": "0:01:09", "throughput": 1706.07, "total_tokens": 320224} {"current_steps": 1825, "total_steps": 2490, "loss": 0.3193, "lr": 1.0127317469297277e-05, "epoch": 7.329317269076305, "percentage": 73.29, "elapsed_time": "0:03:08", "remaining_time": "0:01:08", "throughput": 1706.38, "total_tokens": 320976} {"current_steps": 1830, "total_steps": 2490, "loss": 0.3271, "lr": 9.986831995206195e-06, "epoch": 7.349397590361446, "percentage": 73.49, "elapsed_time": "0:03:08", "remaining_time": "0:01:07", "throughput": 1707.14, "total_tokens": 321808} {"current_steps": 1835, "total_steps": 2490, "loss": 0.3504, "lr": 9.847084130795028e-06, "epoch": 7.3694779116465865, "percentage": 73.69, "elapsed_time": "0:03:08", "remaining_time": "0:01:07", "throughput": 1707.82, "total_tokens": 322624} {"current_steps": 1840, "total_steps": 2490, "loss": 0.3165, "lr": 9.708080741994868e-06, "epoch": 7.389558232931727, "percentage": 73.9, "elapsed_time": "0:03:09", "remaining_time": "0:01:06", "throughput": 1709.73, "total_tokens": 323696} {"current_steps": 1845, "total_steps": 2490, "loss": 0.3184, "lr": 9.569828658160158e-06, "epoch": 7.409638554216867, "percentage": 74.1, "elapsed_time": "0:03:09", "remaining_time": "0:01:06", "throughput": 1710.32, "total_tokens": 324496} {"current_steps": 1850, "total_steps": 2490, "loss": 0.3824, "lr": 9.432334671733039e-06, "epoch": 7.429718875502008, "percentage": 74.3, "elapsed_time": "0:03:10", "remaining_time": "0:01:05", "throughput": 1711.06, "total_tokens": 325328} {"current_steps": 1855, "total_steps": 2490, "loss": 0.3336, "lr": 9.295605537909708e-06, "epoch": 7.449799196787149, "percentage": 74.5, "elapsed_time": "0:03:10", "remaining_time": "0:01:05", "throughput": 1712.52, "total_tokens": 326304} {"current_steps": 1860, "total_steps": 2490, "loss": 0.3148, "lr": 9.159647974308494e-06, "epoch": 7.469879518072289, "percentage": 74.7, "elapsed_time": "0:03:10", "remaining_time": "0:01:04", "throughput": 1713.16, "total_tokens": 327120} {"current_steps": 1865, "total_steps": 2490, "loss": 0.3811, "lr": 9.024468660639826e-06, "epoch": 7.48995983935743, "percentage": 74.9, "elapsed_time": "0:03:11", "remaining_time": "0:01:04", "throughput": 1714.72, "total_tokens": 328128} {"current_steps": 1870, "total_steps": 2490, "loss": 0.351, "lr": 8.890074238378074e-06, "epoch": 7.51004016064257, "percentage": 75.1, "elapsed_time": "0:03:11", "remaining_time": "0:01:03", "throughput": 1715.7, "total_tokens": 329008} {"current_steps": 1875, "total_steps": 2490, "loss": 0.3149, "lr": 8.756471310435204e-06, "epoch": 7.530120481927711, "percentage": 75.3, "elapsed_time": "0:03:12", "remaining_time": "0:01:03", "throughput": 1716.43, "total_tokens": 329840} {"current_steps": 1875, "total_steps": 2490, "eval_loss": 0.3625907897949219, "epoch": 7.530120481927711, "percentage": 75.3, "elapsed_time": "0:03:13", "remaining_time": "0:01:03", "throughput": 1705.64, "total_tokens": 329840} {"current_steps": 1880, "total_steps": 2490, "loss": 0.3623, "lr": 8.623666440836404e-06, "epoch": 7.550200803212851, "percentage": 75.5, "elapsed_time": "0:03:15", "remaining_time": "0:01:03", "throughput": 1695.31, "total_tokens": 330624} {"current_steps": 1885, "total_steps": 2490, "loss": 0.3149, "lr": 8.491666154397573e-06, "epoch": 7.570281124497992, "percentage": 75.7, "elapsed_time": "0:03:15", "remaining_time": "0:01:02", "throughput": 1695.84, "total_tokens": 331440} {"current_steps": 1890, "total_steps": 2490, "loss": 0.3897, "lr": 8.360476936404754e-06, "epoch": 7.590361445783133, "percentage": 75.9, "elapsed_time": "0:03:15", "remaining_time": "0:01:02", "throughput": 1696.16, "total_tokens": 332192} {"current_steps": 1895, "total_steps": 2490, "loss": 0.3736, "lr": 8.230105232295538e-06, "epoch": 7.610441767068274, "percentage": 76.1, "elapsed_time": "0:03:16", "remaining_time": "0:01:01", "throughput": 1697.52, "total_tokens": 333168} {"current_steps": 1900, "total_steps": 2490, "loss": 0.3618, "lr": 8.100557447342327e-06, "epoch": 7.6305220883534135, "percentage": 76.31, "elapsed_time": "0:03:16", "remaining_time": "0:01:01", "throughput": 1698.65, "total_tokens": 334080} {"current_steps": 1905, "total_steps": 2490, "loss": 0.3533, "lr": 7.971839946337698e-06, "epoch": 7.650602409638554, "percentage": 76.51, "elapsed_time": "0:03:17", "remaining_time": "0:01:00", "throughput": 1700.0, "total_tokens": 335040} {"current_steps": 1910, "total_steps": 2490, "loss": 0.3532, "lr": 7.843959053281663e-06, "epoch": 7.670682730923695, "percentage": 76.71, "elapsed_time": "0:03:17", "remaining_time": "0:00:59", "throughput": 1700.49, "total_tokens": 335824} {"current_steps": 1915, "total_steps": 2490, "loss": 0.3362, "lr": 7.71692105107098e-06, "epoch": 7.690763052208835, "percentage": 76.91, "elapsed_time": "0:03:17", "remaining_time": "0:00:59", "throughput": 1701.23, "total_tokens": 336656} {"current_steps": 1920, "total_steps": 2490, "loss": 0.3608, "lr": 7.590732181190482e-06, "epoch": 7.710843373493976, "percentage": 77.11, "elapsed_time": "0:03:18", "remaining_time": "0:00:58", "throughput": 1701.94, "total_tokens": 337488} {"current_steps": 1925, "total_steps": 2490, "loss": 0.342, "lr": 7.465398643406366e-06, "epoch": 7.730923694779117, "percentage": 77.31, "elapsed_time": "0:03:18", "remaining_time": "0:00:58", "throughput": 1703.07, "total_tokens": 338400} {"current_steps": 1930, "total_steps": 2490, "loss": 0.3573, "lr": 7.340926595461687e-06, "epoch": 7.7510040160642575, "percentage": 77.51, "elapsed_time": "0:03:19", "remaining_time": "0:00:57", "throughput": 1703.88, "total_tokens": 339248} {"current_steps": 1935, "total_steps": 2490, "loss": 0.3539, "lr": 7.217322152773742e-06, "epoch": 7.771084337349397, "percentage": 77.71, "elapsed_time": "0:03:19", "remaining_time": "0:00:57", "throughput": 1704.77, "total_tokens": 340112} {"current_steps": 1940, "total_steps": 2490, "loss": 0.3471, "lr": 7.094591388133659e-06, "epoch": 7.791164658634538, "percentage": 77.91, "elapsed_time": "0:03:19", "remaining_time": "0:00:56", "throughput": 1705.26, "total_tokens": 340896} {"current_steps": 1945, "total_steps": 2490, "loss": 0.3599, "lr": 6.972740331408015e-06, "epoch": 7.811244979919679, "percentage": 78.11, "elapsed_time": "0:03:20", "remaining_time": "0:00:56", "throughput": 1706.12, "total_tokens": 341760} {"current_steps": 1950, "total_steps": 2490, "loss": 0.3382, "lr": 6.851774969242589e-06, "epoch": 7.831325301204819, "percentage": 78.31, "elapsed_time": "0:03:20", "remaining_time": "0:00:55", "throughput": 1706.9, "total_tokens": 342608} {"current_steps": 1955, "total_steps": 2490, "loss": 0.338, "lr": 6.731701244768254e-06, "epoch": 7.85140562248996, "percentage": 78.51, "elapsed_time": "0:03:21", "remaining_time": "0:00:55", "throughput": 1708.53, "total_tokens": 343632} {"current_steps": 1960, "total_steps": 2490, "loss": 0.3473, "lr": 6.612525057308949e-06, "epoch": 7.871485943775101, "percentage": 78.71, "elapsed_time": "0:03:21", "remaining_time": "0:00:54", "throughput": 1709.55, "total_tokens": 344528} {"current_steps": 1965, "total_steps": 2490, "loss": 0.3505, "lr": 6.494252262091857e-06, "epoch": 7.891566265060241, "percentage": 78.92, "elapsed_time": "0:03:21", "remaining_time": "0:00:53", "throughput": 1711.19, "total_tokens": 345568} {"current_steps": 1970, "total_steps": 2490, "loss": 0.3443, "lr": 6.3768886699597436e-06, "epoch": 7.911646586345381, "percentage": 79.12, "elapsed_time": "0:03:22", "remaining_time": "0:00:53", "throughput": 1712.31, "total_tokens": 346496} {"current_steps": 1975, "total_steps": 2490, "loss": 0.3473, "lr": 6.260440047085439e-06, "epoch": 7.931726907630522, "percentage": 79.32, "elapsed_time": "0:03:22", "remaining_time": "0:00:52", "throughput": 1713.16, "total_tokens": 347360} {"current_steps": 1980, "total_steps": 2490, "loss": 0.3445, "lr": 6.1449121146885894e-06, "epoch": 7.951807228915663, "percentage": 79.52, "elapsed_time": "0:03:23", "remaining_time": "0:00:52", "throughput": 1713.53, "total_tokens": 348128} {"current_steps": 1985, "total_steps": 2490, "loss": 0.3509, "lr": 6.030310548754506e-06, "epoch": 7.971887550200803, "percentage": 79.72, "elapsed_time": "0:03:23", "remaining_time": "0:00:51", "throughput": 1714.25, "total_tokens": 348960} {"current_steps": 1990, "total_steps": 2490, "loss": 0.3477, "lr": 5.9166409797553415e-06, "epoch": 7.991967871485944, "percentage": 79.92, "elapsed_time": "0:03:23", "remaining_time": "0:00:51", "throughput": 1715.33, "total_tokens": 349856} {"current_steps": 1995, "total_steps": 2490, "loss": 0.338, "lr": 5.803908992373449e-06, "epoch": 8.012048192771084, "percentage": 80.12, "elapsed_time": "0:03:24", "remaining_time": "0:00:50", "throughput": 1714.27, "total_tokens": 350784} {"current_steps": 2000, "total_steps": 2490, "loss": 0.3441, "lr": 5.692120125226993e-06, "epoch": 8.032128514056225, "percentage": 80.32, "elapsed_time": "0:03:25", "remaining_time": "0:00:50", "throughput": 1714.63, "total_tokens": 351552} {"current_steps": 2000, "total_steps": 2490, "eval_loss": 0.3484961986541748, "epoch": 8.032128514056225, "percentage": 80.32, "elapsed_time": "0:03:26", "remaining_time": "0:00:50", "throughput": 1704.51, "total_tokens": 351552} {"current_steps": 2005, "total_steps": 2490, "loss": 0.3537, "lr": 5.581279870597867e-06, "epoch": 8.052208835341366, "percentage": 80.52, "elapsed_time": "0:03:28", "remaining_time": "0:00:50", "throughput": 1693.7, "total_tokens": 352592} {"current_steps": 2010, "total_steps": 2490, "loss": 0.3441, "lr": 5.4713936741617845e-06, "epoch": 8.072289156626505, "percentage": 80.72, "elapsed_time": "0:03:28", "remaining_time": "0:00:49", "throughput": 1694.16, "total_tokens": 353392} {"current_steps": 2015, "total_steps": 2490, "loss": 0.3473, "lr": 5.3624669347208085e-06, "epoch": 8.092369477911646, "percentage": 80.92, "elapsed_time": "0:03:28", "remaining_time": "0:00:49", "throughput": 1694.62, "total_tokens": 354176} {"current_steps": 2020, "total_steps": 2490, "loss": 0.335, "lr": 5.254505003938043e-06, "epoch": 8.112449799196787, "percentage": 81.12, "elapsed_time": "0:03:29", "remaining_time": "0:00:48", "throughput": 1695.44, "total_tokens": 355040} {"current_steps": 2025, "total_steps": 2490, "loss": 0.3445, "lr": 5.147513186074751e-06, "epoch": 8.132530120481928, "percentage": 81.33, "elapsed_time": "0:03:29", "remaining_time": "0:00:48", "throughput": 1696.58, "total_tokens": 355984} {"current_steps": 2030, "total_steps": 2490, "loss": 0.3443, "lr": 5.041496737729687e-06, "epoch": 8.152610441767068, "percentage": 81.53, "elapsed_time": "0:03:30", "remaining_time": "0:00:47", "throughput": 1697.03, "total_tokens": 356768} {"current_steps": 2035, "total_steps": 2490, "loss": 0.3604, "lr": 4.936460867580889e-06, "epoch": 8.17269076305221, "percentage": 81.73, "elapsed_time": "0:03:30", "remaining_time": "0:00:47", "throughput": 1697.95, "total_tokens": 357648} {"current_steps": 2040, "total_steps": 2490, "loss": 0.3509, "lr": 4.832410736129778e-06, "epoch": 8.19277108433735, "percentage": 81.93, "elapsed_time": "0:03:31", "remaining_time": "0:00:46", "throughput": 1698.55, "total_tokens": 358464} {"current_steps": 2045, "total_steps": 2490, "loss": 0.3421, "lr": 4.729351455447573e-06, "epoch": 8.21285140562249, "percentage": 82.13, "elapsed_time": "0:03:31", "remaining_time": "0:00:46", "throughput": 1700.21, "total_tokens": 359520} {"current_steps": 2050, "total_steps": 2490, "loss": 0.3447, "lr": 4.627288088924156e-06, "epoch": 8.23293172690763, "percentage": 82.33, "elapsed_time": "0:03:31", "remaining_time": "0:00:45", "throughput": 1700.45, "total_tokens": 360256} {"current_steps": 2055, "total_steps": 2490, "loss": 0.3479, "lr": 4.526225651019309e-06, "epoch": 8.25301204819277, "percentage": 82.53, "elapsed_time": "0:03:32", "remaining_time": "0:00:44", "throughput": 1701.44, "total_tokens": 361184} {"current_steps": 2060, "total_steps": 2490, "loss": 0.3447, "lr": 4.4261691070163316e-06, "epoch": 8.273092369477911, "percentage": 82.73, "elapsed_time": "0:03:32", "remaining_time": "0:00:44", "throughput": 1702.3, "total_tokens": 362064} {"current_steps": 2065, "total_steps": 2490, "loss": 0.3415, "lr": 4.327123372778122e-06, "epoch": 8.293172690763052, "percentage": 82.93, "elapsed_time": "0:03:33", "remaining_time": "0:00:43", "throughput": 1703.11, "total_tokens": 362928} {"current_steps": 2070, "total_steps": 2490, "loss": 0.336, "lr": 4.229093314505619e-06, "epoch": 8.313253012048193, "percentage": 83.13, "elapsed_time": "0:03:33", "remaining_time": "0:00:43", "throughput": 1704.38, "total_tokens": 363888} {"current_steps": 2075, "total_steps": 2490, "loss": 0.3572, "lr": 4.132083748498744e-06, "epoch": 8.333333333333334, "percentage": 83.33, "elapsed_time": "0:03:33", "remaining_time": "0:00:42", "throughput": 1705.44, "total_tokens": 364800} {"current_steps": 2080, "total_steps": 2490, "loss": 0.3479, "lr": 4.036099440919763e-06, "epoch": 8.353413654618475, "percentage": 83.53, "elapsed_time": "0:03:34", "remaining_time": "0:00:42", "throughput": 1706.32, "total_tokens": 365680} {"current_steps": 2085, "total_steps": 2490, "loss": 0.3477, "lr": 3.9411451075591464e-06, "epoch": 8.373493975903614, "percentage": 83.73, "elapsed_time": "0:03:34", "remaining_time": "0:00:41", "throughput": 1707.19, "total_tokens": 366560} {"current_steps": 2090, "total_steps": 2490, "loss": 0.3449, "lr": 3.847225413603839e-06, "epoch": 8.393574297188755, "percentage": 83.94, "elapsed_time": "0:03:35", "remaining_time": "0:00:41", "throughput": 1708.01, "total_tokens": 367424} {"current_steps": 2095, "total_steps": 2490, "loss": 0.3538, "lr": 3.754344973408064e-06, "epoch": 8.413654618473895, "percentage": 84.14, "elapsed_time": "0:03:35", "remaining_time": "0:00:40", "throughput": 1708.75, "total_tokens": 368272} {"current_steps": 2100, "total_steps": 2490, "loss": 0.3472, "lr": 3.6625083502666554e-06, "epoch": 8.433734939759036, "percentage": 84.34, "elapsed_time": "0:03:35", "remaining_time": "0:00:40", "throughput": 1709.12, "total_tokens": 369040} {"current_steps": 2105, "total_steps": 2490, "loss": 0.3411, "lr": 3.5717200561908026e-06, "epoch": 8.453815261044177, "percentage": 84.54, "elapsed_time": "0:03:36", "remaining_time": "0:00:39", "throughput": 1709.49, "total_tokens": 369808} {"current_steps": 2110, "total_steps": 2490, "loss": 0.3383, "lr": 3.481984551686429e-06, "epoch": 8.473895582329318, "percentage": 84.74, "elapsed_time": "0:03:36", "remaining_time": "0:00:39", "throughput": 1710.23, "total_tokens": 370672} {"current_steps": 2115, "total_steps": 2490, "loss": 0.3417, "lr": 3.3933062455349744e-06, "epoch": 8.493975903614459, "percentage": 84.94, "elapsed_time": "0:03:37", "remaining_time": "0:00:38", "throughput": 1710.93, "total_tokens": 371520} {"current_steps": 2120, "total_steps": 2490, "loss": 0.36, "lr": 3.305689494576847e-06, "epoch": 8.514056224899598, "percentage": 85.14, "elapsed_time": "0:03:37", "remaining_time": "0:00:37", "throughput": 1711.66, "total_tokens": 372368} {"current_steps": 2125, "total_steps": 2490, "loss": 0.3574, "lr": 3.2191386034973627e-06, "epoch": 8.534136546184738, "percentage": 85.34, "elapsed_time": "0:03:37", "remaining_time": "0:00:37", "throughput": 1713.31, "total_tokens": 373424} {"current_steps": 2125, "total_steps": 2490, "eval_loss": 0.3515996038913727, "epoch": 8.534136546184738, "percentage": 85.34, "elapsed_time": "0:03:39", "remaining_time": "0:00:37", "throughput": 1703.77, "total_tokens": 373424} {"current_steps": 2130, "total_steps": 2490, "loss": 0.3443, "lr": 3.1336578246152103e-06, "epoch": 8.55421686746988, "percentage": 85.54, "elapsed_time": "0:03:40", "remaining_time": "0:00:37", "throughput": 1695.13, "total_tokens": 374240} {"current_steps": 2135, "total_steps": 2490, "loss": 0.3383, "lr": 3.049251357673577e-06, "epoch": 8.57429718875502, "percentage": 85.74, "elapsed_time": "0:03:41", "remaining_time": "0:00:36", "throughput": 1695.91, "total_tokens": 375104} {"current_steps": 2140, "total_steps": 2490, "loss": 0.3476, "lr": 2.9659233496337786e-06, "epoch": 8.594377510040161, "percentage": 85.94, "elapsed_time": "0:03:41", "remaining_time": "0:00:36", "throughput": 1697.07, "total_tokens": 376080} {"current_steps": 2145, "total_steps": 2490, "loss": 0.3415, "lr": 2.8836778944715454e-06, "epoch": 8.614457831325302, "percentage": 86.14, "elapsed_time": "0:03:42", "remaining_time": "0:00:35", "throughput": 1697.73, "total_tokens": 376928} {"current_steps": 2150, "total_steps": 2490, "loss": 0.351, "lr": 2.802519032975859e-06, "epoch": 8.634538152610443, "percentage": 86.35, "elapsed_time": "0:03:42", "remaining_time": "0:00:35", "throughput": 1698.7, "total_tokens": 377856} {"current_steps": 2155, "total_steps": 2490, "loss": 0.3417, "lr": 2.722450752550429e-06, "epoch": 8.654618473895582, "percentage": 86.55, "elapsed_time": "0:03:42", "remaining_time": "0:00:34", "throughput": 1699.72, "total_tokens": 378784} {"current_steps": 2160, "total_steps": 2490, "loss": 0.3604, "lr": 2.6434769870177985e-06, "epoch": 8.674698795180722, "percentage": 86.75, "elapsed_time": "0:03:43", "remaining_time": "0:00:34", "throughput": 1700.68, "total_tokens": 379696} {"current_steps": 2165, "total_steps": 2490, "loss": 0.3447, "lr": 2.5656016164260554e-06, "epoch": 8.694779116465863, "percentage": 86.95, "elapsed_time": "0:03:43", "remaining_time": "0:00:33", "throughput": 1701.23, "total_tokens": 380512} {"current_steps": 2170, "total_steps": 2490, "loss": 0.3445, "lr": 2.4888284668582285e-06, "epoch": 8.714859437751004, "percentage": 87.15, "elapsed_time": "0:03:44", "remaining_time": "0:00:33", "throughput": 1702.57, "total_tokens": 381520} {"current_steps": 2175, "total_steps": 2490, "loss": 0.3354, "lr": 2.4131613102442857e-06, "epoch": 8.734939759036145, "percentage": 87.35, "elapsed_time": "0:03:44", "remaining_time": "0:00:32", "throughput": 1703.76, "total_tokens": 382480} {"current_steps": 2180, "total_steps": 2490, "loss": 0.3383, "lr": 2.3386038641758063e-06, "epoch": 8.755020080321286, "percentage": 87.55, "elapsed_time": "0:03:44", "remaining_time": "0:00:31", "throughput": 1704.86, "total_tokens": 383440} {"current_steps": 2185, "total_steps": 2490, "loss": 0.3508, "lr": 2.265159791723373e-06, "epoch": 8.775100401606426, "percentage": 87.75, "elapsed_time": "0:03:45", "remaining_time": "0:00:31", "throughput": 1705.34, "total_tokens": 384240} {"current_steps": 2190, "total_steps": 2490, "loss": 0.3483, "lr": 2.1928327012565696e-06, "epoch": 8.795180722891565, "percentage": 87.95, "elapsed_time": "0:03:45", "remaining_time": "0:00:30", "throughput": 1706.17, "total_tokens": 385120} {"current_steps": 2195, "total_steps": 2490, "loss": 0.3546, "lr": 2.121626146266706e-06, "epoch": 8.815261044176706, "percentage": 88.15, "elapsed_time": "0:03:46", "remaining_time": "0:00:30", "throughput": 1706.94, "total_tokens": 385984} {"current_steps": 2200, "total_steps": 2490, "loss": 0.3609, "lr": 2.051543625192226e-06, "epoch": 8.835341365461847, "percentage": 88.35, "elapsed_time": "0:03:46", "remaining_time": "0:00:29", "throughput": 1707.91, "total_tokens": 386896} {"current_steps": 2205, "total_steps": 2490, "loss": 0.33, "lr": 1.9825885812468524e-06, "epoch": 8.855421686746988, "percentage": 88.55, "elapsed_time": "0:03:46", "remaining_time": "0:00:29", "throughput": 1708.75, "total_tokens": 387776} {"current_steps": 2210, "total_steps": 2490, "loss": 0.3487, "lr": 1.914764402250385e-06, "epoch": 8.875502008032129, "percentage": 88.76, "elapsed_time": "0:03:47", "remaining_time": "0:00:28", "throughput": 1709.77, "total_tokens": 388704} {"current_steps": 2215, "total_steps": 2490, "loss": 0.3512, "lr": 1.8480744204622757e-06, "epoch": 8.89558232931727, "percentage": 88.96, "elapsed_time": "0:03:47", "remaining_time": "0:00:28", "throughput": 1710.04, "total_tokens": 389456} {"current_steps": 2220, "total_steps": 2490, "loss": 0.3522, "lr": 1.7825219124179004e-06, "epoch": 8.91566265060241, "percentage": 89.16, "elapsed_time": "0:03:48", "remaining_time": "0:00:27", "throughput": 1710.73, "total_tokens": 390304} {"current_steps": 2225, "total_steps": 2490, "loss": 0.3356, "lr": 1.7181100987675862e-06, "epoch": 8.93574297188755, "percentage": 89.36, "elapsed_time": "0:03:48", "remaining_time": "0:00:27", "throughput": 1711.21, "total_tokens": 391104} {"current_steps": 2230, "total_steps": 2490, "loss": 0.3516, "lr": 1.6548421441183875e-06, "epoch": 8.95582329317269, "percentage": 89.56, "elapsed_time": "0:03:48", "remaining_time": "0:00:26", "throughput": 1712.49, "total_tokens": 392112} {"current_steps": 2235, "total_steps": 2490, "loss": 0.3449, "lr": 1.5927211568785878e-06, "epoch": 8.975903614457831, "percentage": 89.76, "elapsed_time": "0:03:49", "remaining_time": "0:00:26", "throughput": 1712.87, "total_tokens": 392880} {"current_steps": 2240, "total_steps": 2490, "loss": 0.3302, "lr": 1.5317501891049719e-06, "epoch": 8.995983935742972, "percentage": 89.96, "elapsed_time": "0:03:49", "remaining_time": "0:00:25", "throughput": 1713.61, "total_tokens": 393728} {"current_steps": 2245, "total_steps": 2490, "loss": 0.3487, "lr": 1.4719322363529242e-06, "epoch": 9.016064257028113, "percentage": 90.16, "elapsed_time": "0:03:50", "remaining_time": "0:00:25", "throughput": 1712.71, "total_tokens": 394688} {"current_steps": 2250, "total_steps": 2490, "loss": 0.3673, "lr": 1.4132702375291989e-06, "epoch": 9.036144578313253, "percentage": 90.36, "elapsed_time": "0:03:50", "remaining_time": "0:00:24", "throughput": 1713.71, "total_tokens": 395616} {"current_steps": 2250, "total_steps": 2490, "eval_loss": 0.35450634360313416, "epoch": 9.036144578313253, "percentage": 90.36, "elapsed_time": "0:03:52", "remaining_time": "0:00:24", "throughput": 1704.76, "total_tokens": 395616} {"current_steps": 2255, "total_steps": 2490, "loss": 0.3455, "lr": 1.3557670747475714e-06, "epoch": 9.056224899598394, "percentage": 90.56, "elapsed_time": "0:03:53", "remaining_time": "0:00:24", "throughput": 1696.82, "total_tokens": 396560} {"current_steps": 2260, "total_steps": 2490, "loss": 0.3489, "lr": 1.2994255731871963e-06, "epoch": 9.076305220883533, "percentage": 90.76, "elapsed_time": "0:03:54", "remaining_time": "0:00:23", "throughput": 1697.7, "total_tokens": 397456} {"current_steps": 2265, "total_steps": 2490, "loss": 0.3648, "lr": 1.244248500953854e-06, "epoch": 9.096385542168674, "percentage": 90.96, "elapsed_time": "0:03:54", "remaining_time": "0:00:23", "throughput": 1698.91, "total_tokens": 398448} {"current_steps": 2270, "total_steps": 2490, "loss": 0.3544, "lr": 1.1902385689439022e-06, "epoch": 9.116465863453815, "percentage": 91.16, "elapsed_time": "0:03:54", "remaining_time": "0:00:22", "throughput": 1699.35, "total_tokens": 399248} {"current_steps": 2275, "total_steps": 2490, "loss": 0.3574, "lr": 1.137398430711123e-06, "epoch": 9.136546184738956, "percentage": 91.37, "elapsed_time": "0:03:55", "remaining_time": "0:00:22", "throughput": 1700.04, "total_tokens": 400096} {"current_steps": 2280, "total_steps": 2490, "loss": 0.3477, "lr": 1.085730682336325e-06, "epoch": 9.156626506024097, "percentage": 91.57, "elapsed_time": "0:03:55", "remaining_time": "0:00:21", "throughput": 1700.97, "total_tokens": 401024} {"current_steps": 2285, "total_steps": 2490, "loss": 0.3506, "lr": 1.0352378622998204e-06, "epoch": 9.176706827309237, "percentage": 91.77, "elapsed_time": "0:03:56", "remaining_time": "0:00:21", "throughput": 1701.58, "total_tokens": 401856} {"current_steps": 2290, "total_steps": 2490, "loss": 0.3388, "lr": 9.85922451356694e-07, "epoch": 9.196787148594378, "percentage": 91.97, "elapsed_time": "0:03:56", "remaining_time": "0:00:20", "throughput": 1702.38, "total_tokens": 402736} {"current_steps": 2295, "total_steps": 2490, "loss": 0.3413, "lr": 9.377868724149197e-07, "epoch": 9.216867469879517, "percentage": 92.17, "elapsed_time": "0:03:56", "remaining_time": "0:00:20", "throughput": 1703.46, "total_tokens": 403696} {"current_steps": 2300, "total_steps": 2490, "loss": 0.3445, "lr": 8.908334904163207e-07, "epoch": 9.236947791164658, "percentage": 92.37, "elapsed_time": "0:03:57", "remaining_time": "0:00:19", "throughput": 1703.85, "total_tokens": 404480} {"current_steps": 2305, "total_steps": 2490, "loss": 0.3233, "lr": 8.450646122203865e-07, "epoch": 9.257028112449799, "percentage": 92.57, "elapsed_time": "0:03:57", "remaining_time": "0:00:19", "throughput": 1705.32, "total_tokens": 405536} {"current_steps": 2310, "total_steps": 2490, "loss": 0.3513, "lr": 8.004824864909277e-07, "epoch": 9.27710843373494, "percentage": 92.77, "elapsed_time": "0:03:58", "remaining_time": "0:00:18", "throughput": 1705.92, "total_tokens": 406368} {"current_steps": 2315, "total_steps": 2490, "loss": 0.3417, "lr": 7.570893035856091e-07, "epoch": 9.29718875502008, "percentage": 92.97, "elapsed_time": "0:03:58", "remaining_time": "0:00:18", "throughput": 1706.45, "total_tokens": 407184} {"current_steps": 2320, "total_steps": 2490, "loss": 0.3542, "lr": 7.148871954483105e-07, "epoch": 9.317269076305221, "percentage": 93.17, "elapsed_time": "0:03:59", "remaining_time": "0:00:17", "throughput": 1706.58, "total_tokens": 407904} {"current_steps": 2325, "total_steps": 2490, "loss": 0.3387, "lr": 6.738782355044049e-07, "epoch": 9.337349397590362, "percentage": 93.37, "elapsed_time": "0:03:59", "remaining_time": "0:00:16", "throughput": 1707.17, "total_tokens": 408736} {"current_steps": 2330, "total_steps": 2490, "loss": 0.33, "lr": 6.340644385588846e-07, "epoch": 9.357429718875501, "percentage": 93.57, "elapsed_time": "0:03:59", "remaining_time": "0:00:16", "throughput": 1708.15, "total_tokens": 409664} {"current_steps": 2335, "total_steps": 2490, "loss": 0.3643, "lr": 5.954477606973679e-07, "epoch": 9.377510040160642, "percentage": 93.78, "elapsed_time": "0:04:00", "remaining_time": "0:00:15", "throughput": 1709.65, "total_tokens": 410736} {"current_steps": 2340, "total_steps": 2490, "loss": 0.3612, "lr": 5.580300991899989e-07, "epoch": 9.397590361445783, "percentage": 93.98, "elapsed_time": "0:04:00", "remaining_time": "0:00:15", "throughput": 1710.68, "total_tokens": 411680} {"current_steps": 2345, "total_steps": 2490, "loss": 0.3417, "lr": 5.218132923982267e-07, "epoch": 9.417670682730924, "percentage": 94.18, "elapsed_time": "0:04:01", "remaining_time": "0:00:14", "throughput": 1711.13, "total_tokens": 412480} {"current_steps": 2350, "total_steps": 2490, "loss": 0.3352, "lr": 4.867991196844918e-07, "epoch": 9.437751004016064, "percentage": 94.38, "elapsed_time": "0:04:01", "remaining_time": "0:00:14", "throughput": 1711.44, "total_tokens": 413248} {"current_steps": 2355, "total_steps": 2490, "loss": 0.3606, "lr": 4.5298930132480213e-07, "epoch": 9.457831325301205, "percentage": 94.58, "elapsed_time": "0:04:01", "remaining_time": "0:00:13", "throughput": 1712.02, "total_tokens": 414080} {"current_steps": 2360, "total_steps": 2490, "loss": 0.3481, "lr": 4.203854984242195e-07, "epoch": 9.477911646586346, "percentage": 94.78, "elapsed_time": "0:04:02", "remaining_time": "0:00:13", "throughput": 1712.65, "total_tokens": 414928} {"current_steps": 2365, "total_steps": 2490, "loss": 0.364, "lr": 3.8898931283523344e-07, "epoch": 9.497991967871485, "percentage": 94.98, "elapsed_time": "0:04:02", "remaining_time": "0:00:12", "throughput": 1713.09, "total_tokens": 415728} {"current_steps": 2370, "total_steps": 2490, "loss": 0.3336, "lr": 3.5880228707907417e-07, "epoch": 9.518072289156626, "percentage": 95.18, "elapsed_time": "0:04:03", "remaining_time": "0:00:12", "throughput": 1714.11, "total_tokens": 416672} {"current_steps": 2375, "total_steps": 2490, "loss": 0.3419, "lr": 3.2982590426993145e-07, "epoch": 9.538152610441767, "percentage": 95.38, "elapsed_time": "0:04:03", "remaining_time": "0:00:11", "throughput": 1714.73, "total_tokens": 417520} {"current_steps": 2375, "total_steps": 2490, "eval_loss": 0.3474566638469696, "epoch": 9.538152610441767, "percentage": 95.38, "elapsed_time": "0:04:04", "remaining_time": "0:00:11", "throughput": 1706.23, "total_tokens": 417520} {"current_steps": 2380, "total_steps": 2490, "loss": 0.3326, "lr": 3.020615880420713e-07, "epoch": 9.558232931726907, "percentage": 95.58, "elapsed_time": "0:04:06", "remaining_time": "0:00:11", "throughput": 1698.3, "total_tokens": 418400} {"current_steps": 2385, "total_steps": 2490, "loss": 0.3449, "lr": 2.7551070247990305e-07, "epoch": 9.578313253012048, "percentage": 95.78, "elapsed_time": "0:04:06", "remaining_time": "0:00:10", "throughput": 1698.89, "total_tokens": 419248} {"current_steps": 2390, "total_steps": 2490, "loss": 0.3481, "lr": 2.501745520509552e-07, "epoch": 9.598393574297189, "percentage": 95.98, "elapsed_time": "0:04:07", "remaining_time": "0:00:10", "throughput": 1699.53, "total_tokens": 420096} {"current_steps": 2395, "total_steps": 2490, "loss": 0.3386, "lr": 2.2605438154179038e-07, "epoch": 9.61847389558233, "percentage": 96.18, "elapsed_time": "0:04:07", "remaining_time": "0:00:09", "throughput": 1699.77, "total_tokens": 420848} {"current_steps": 2400, "total_steps": 2490, "loss": 0.3294, "lr": 2.0315137599685174e-07, "epoch": 9.638554216867469, "percentage": 96.39, "elapsed_time": "0:04:07", "remaining_time": "0:00:09", "throughput": 1700.53, "total_tokens": 421728} {"current_steps": 2405, "total_steps": 2490, "loss": 0.3604, "lr": 1.814666606602261e-07, "epoch": 9.65863453815261, "percentage": 96.59, "elapsed_time": "0:04:08", "remaining_time": "0:00:08", "throughput": 1701.45, "total_tokens": 422656} {"current_steps": 2410, "total_steps": 2490, "loss": 0.3457, "lr": 1.6100130092037703e-07, "epoch": 9.67871485943775, "percentage": 96.79, "elapsed_time": "0:04:08", "remaining_time": "0:00:08", "throughput": 1702.45, "total_tokens": 423600} {"current_steps": 2415, "total_steps": 2490, "loss": 0.3447, "lr": 1.4175630225778947e-07, "epoch": 9.698795180722891, "percentage": 96.99, "elapsed_time": "0:04:09", "remaining_time": "0:00:07", "throughput": 1703.09, "total_tokens": 424448} {"current_steps": 2420, "total_steps": 2490, "loss": 0.3544, "lr": 1.237326101955677e-07, "epoch": 9.718875502008032, "percentage": 97.19, "elapsed_time": "0:04:09", "remaining_time": "0:00:07", "throughput": 1704.91, "total_tokens": 425632} {"current_steps": 2425, "total_steps": 2490, "loss": 0.3385, "lr": 1.0693111025300017e-07, "epoch": 9.738955823293173, "percentage": 97.39, "elapsed_time": "0:04:10", "remaining_time": "0:00:06", "throughput": 1705.86, "total_tokens": 426576} {"current_steps": 2430, "total_steps": 2490, "loss": 0.3296, "lr": 9.13526279020277e-08, "epoch": 9.759036144578314, "percentage": 97.59, "elapsed_time": "0:04:10", "remaining_time": "0:00:06", "throughput": 1706.3, "total_tokens": 427376} {"current_steps": 2435, "total_steps": 2490, "loss": 0.3514, "lr": 7.699792852670362e-08, "epoch": 9.779116465863455, "percentage": 97.79, "elapsed_time": "0:04:10", "remaining_time": "0:00:05", "throughput": 1707.06, "total_tokens": 428256} {"current_steps": 2440, "total_steps": 2490, "loss": 0.3389, "lr": 6.386771738558506e-08, "epoch": 9.799196787148594, "percentage": 97.99, "elapsed_time": "0:04:11", "remaining_time": "0:00:05", "throughput": 1708.13, "total_tokens": 429216} {"current_steps": 2445, "total_steps": 2490, "loss": 0.3542, "lr": 5.196263957708836e-08, "epoch": 9.819277108433734, "percentage": 98.19, "elapsed_time": "0:04:11", "remaining_time": "0:00:04", "throughput": 1709.26, "total_tokens": 430208} {"current_steps": 2450, "total_steps": 2490, "loss": 0.3292, "lr": 4.1283280007778366e-08, "epoch": 9.839357429718875, "percentage": 98.39, "elapsed_time": "0:04:12", "remaining_time": "0:00:04", "throughput": 1709.51, "total_tokens": 430960} {"current_steps": 2455, "total_steps": 2490, "loss": 0.355, "lr": 3.1830163363655296e-08, "epoch": 9.859437751004016, "percentage": 98.59, "elapsed_time": "0:04:12", "remaining_time": "0:00:03", "throughput": 1710.64, "total_tokens": 431936} {"current_steps": 2460, "total_steps": 2490, "loss": 0.3425, "lr": 2.3603754084358663e-08, "epoch": 9.879518072289157, "percentage": 98.8, "elapsed_time": "0:04:12", "remaining_time": "0:00:03", "throughput": 1711.74, "total_tokens": 432912} {"current_steps": 2465, "total_steps": 2490, "loss": 0.3449, "lr": 1.6604456340352235e-08, "epoch": 9.899598393574298, "percentage": 99.0, "elapsed_time": "0:04:13", "remaining_time": "0:00:02", "throughput": 1712.11, "total_tokens": 433696} {"current_steps": 2470, "total_steps": 2490, "loss": 0.3513, "lr": 1.0832614013073228e-08, "epoch": 9.919678714859439, "percentage": 99.2, "elapsed_time": "0:04:13", "remaining_time": "0:00:02", "throughput": 1712.67, "total_tokens": 434528} {"current_steps": 2475, "total_steps": 2490, "loss": 0.3633, "lr": 6.288510678031934e-09, "epoch": 9.939759036144578, "percentage": 99.4, "elapsed_time": "0:04:14", "remaining_time": "0:00:01", "throughput": 1712.91, "total_tokens": 435280} {"current_steps": 2480, "total_steps": 2490, "loss": 0.3483, "lr": 2.972369590878432e-09, "epoch": 9.959839357429718, "percentage": 99.6, "elapsed_time": "0:04:14", "remaining_time": "0:00:01", "throughput": 1713.4, "total_tokens": 436096} {"current_steps": 2485, "total_steps": 2490, "loss": 0.3521, "lr": 8.843536764419069e-10, "epoch": 9.97991967871486, "percentage": 99.8, "elapsed_time": "0:04:14", "remaining_time": "0:00:00", "throughput": 1714.28, "total_tokens": 437008} {"current_steps": 2490, "total_steps": 2490, "loss": 0.3294, "lr": 2.4565520709285417e-11, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:04:15", "remaining_time": "0:00:00", "throughput": 1713.86, "total_tokens": 437760} {"current_steps": 2490, "total_steps": 2490, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:04:16", "remaining_time": "0:00:00", "throughput": 1705.85, "total_tokens": 437760}