train_mrpc_1754652143 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 8260
422defc verified
{"current_steps": 5, "total_steps": 8260, "loss": 13.3823, "lr": 2.421307506053269e-07, "epoch": 0.006053268765133172, "percentage": 0.06, "elapsed_time": "0:00:01", "remaining_time": "0:33:19", "throughput": 1585.63, "total_tokens": 1920}
{"current_steps": 10, "total_steps": 8260, "loss": 13.6601, "lr": 5.447941888619855e-07, "epoch": 0.012106537530266344, "percentage": 0.12, "elapsed_time": "0:00:01", "remaining_time": "0:23:52", "throughput": 2192.56, "total_tokens": 3808}
{"current_steps": 15, "total_steps": 8260, "loss": 13.6942, "lr": 8.474576271186441e-07, "epoch": 0.018159806295399514, "percentage": 0.18, "elapsed_time": "0:00:02", "remaining_time": "0:20:49", "throughput": 2576.58, "total_tokens": 5856}
{"current_steps": 20, "total_steps": 8260, "loss": 13.5439, "lr": 1.1501210653753028e-06, "epoch": 0.024213075060532687, "percentage": 0.24, "elapsed_time": "0:00:02", "remaining_time": "0:19:14", "throughput": 2798.38, "total_tokens": 7840}
{"current_steps": 25, "total_steps": 8260, "loss": 13.587, "lr": 1.4527845036319614e-06, "epoch": 0.03026634382566586, "percentage": 0.3, "elapsed_time": "0:00:03", "remaining_time": "0:18:34", "throughput": 2978.76, "total_tokens": 10080}
{"current_steps": 30, "total_steps": 8260, "loss": 13.6849, "lr": 1.7554479418886198e-06, "epoch": 0.03631961259079903, "percentage": 0.36, "elapsed_time": "0:00:03", "remaining_time": "0:17:52", "throughput": 3085.41, "total_tokens": 12064}
{"current_steps": 35, "total_steps": 8260, "loss": 13.4941, "lr": 2.0581113801452785e-06, "epoch": 0.0423728813559322, "percentage": 0.42, "elapsed_time": "0:00:04", "remaining_time": "0:17:33", "throughput": 3156.37, "total_tokens": 14144}
{"current_steps": 40, "total_steps": 8260, "loss": 13.4791, "lr": 2.3607748184019373e-06, "epoch": 0.048426150121065374, "percentage": 0.48, "elapsed_time": "0:00:04", "remaining_time": "0:17:04", "throughput": 3234.46, "total_tokens": 16128}
{"current_steps": 45, "total_steps": 8260, "loss": 13.4883, "lr": 2.6634382566585957e-06, "epoch": 0.05447941888619855, "percentage": 0.54, "elapsed_time": "0:00:05", "remaining_time": "0:16:46", "throughput": 3284.49, "total_tokens": 18112}
{"current_steps": 50, "total_steps": 8260, "loss": 13.5754, "lr": 2.9661016949152545e-06, "epoch": 0.06053268765133172, "percentage": 0.61, "elapsed_time": "0:00:06", "remaining_time": "0:16:41", "throughput": 3327.78, "total_tokens": 20288}
{"current_steps": 55, "total_steps": 8260, "loss": 13.369, "lr": 3.268765133171913e-06, "epoch": 0.06658595641646489, "percentage": 0.67, "elapsed_time": "0:00:06", "remaining_time": "0:16:34", "throughput": 3355.0, "total_tokens": 22368}
{"current_steps": 60, "total_steps": 8260, "loss": 13.44, "lr": 3.5714285714285714e-06, "epoch": 0.07263922518159806, "percentage": 0.73, "elapsed_time": "0:00:07", "remaining_time": "0:16:19", "throughput": 3385.12, "total_tokens": 24256}
{"current_steps": 65, "total_steps": 8260, "loss": 13.2884, "lr": 3.87409200968523e-06, "epoch": 0.07869249394673124, "percentage": 0.79, "elapsed_time": "0:00:07", "remaining_time": "0:16:18", "throughput": 3408.78, "total_tokens": 26464}
{"current_steps": 70, "total_steps": 8260, "loss": 13.2649, "lr": 4.176755447941889e-06, "epoch": 0.0847457627118644, "percentage": 0.85, "elapsed_time": "0:00:08", "remaining_time": "0:16:16", "throughput": 3443.34, "total_tokens": 28736}
{"current_steps": 75, "total_steps": 8260, "loss": 13.2703, "lr": 4.479418886198548e-06, "epoch": 0.09079903147699758, "percentage": 0.91, "elapsed_time": "0:00:08", "remaining_time": "0:16:10", "throughput": 3467.32, "total_tokens": 30848}
{"current_steps": 80, "total_steps": 8260, "loss": 13.1985, "lr": 4.782082324455206e-06, "epoch": 0.09685230024213075, "percentage": 0.97, "elapsed_time": "0:00:09", "remaining_time": "0:16:08", "throughput": 3483.65, "total_tokens": 32992}
{"current_steps": 85, "total_steps": 8260, "loss": 13.0688, "lr": 5.084745762711865e-06, "epoch": 0.10290556900726393, "percentage": 1.03, "elapsed_time": "0:00:09", "remaining_time": "0:16:01", "throughput": 3500.36, "total_tokens": 34976}
{"current_steps": 90, "total_steps": 8260, "loss": 13.4134, "lr": 5.3874092009685235e-06, "epoch": 0.1089588377723971, "percentage": 1.09, "elapsed_time": "0:00:10", "remaining_time": "0:15:54", "throughput": 3509.83, "total_tokens": 36896}
{"current_steps": 95, "total_steps": 8260, "loss": 13.24, "lr": 5.6900726392251815e-06, "epoch": 0.11501210653753027, "percentage": 1.15, "elapsed_time": "0:00:11", "remaining_time": "0:15:48", "throughput": 3520.82, "total_tokens": 38848}
{"current_steps": 100, "total_steps": 8260, "loss": 12.9479, "lr": 5.99273607748184e-06, "epoch": 0.12106537530266344, "percentage": 1.21, "elapsed_time": "0:00:11", "remaining_time": "0:15:47", "throughput": 3529.32, "total_tokens": 40960}
{"current_steps": 105, "total_steps": 8260, "loss": 12.9054, "lr": 6.295399515738499e-06, "epoch": 0.1271186440677966, "percentage": 1.27, "elapsed_time": "0:00:12", "remaining_time": "0:15:45", "throughput": 3532.0, "total_tokens": 43008}
{"current_steps": 110, "total_steps": 8260, "loss": 12.9692, "lr": 6.598062953995157e-06, "epoch": 0.13317191283292978, "percentage": 1.33, "elapsed_time": "0:00:12", "remaining_time": "0:15:41", "throughput": 3542.28, "total_tokens": 44992}
{"current_steps": 115, "total_steps": 8260, "loss": 12.9, "lr": 6.900726392251816e-06, "epoch": 0.13922518159806296, "percentage": 1.39, "elapsed_time": "0:00:13", "remaining_time": "0:15:38", "throughput": 3552.13, "total_tokens": 47072}
{"current_steps": 120, "total_steps": 8260, "loss": 12.7543, "lr": 7.203389830508475e-06, "epoch": 0.14527845036319612, "percentage": 1.45, "elapsed_time": "0:00:13", "remaining_time": "0:15:37", "throughput": 3555.79, "total_tokens": 49152}
{"current_steps": 125, "total_steps": 8260, "loss": 12.6946, "lr": 7.5060532687651345e-06, "epoch": 0.1513317191283293, "percentage": 1.51, "elapsed_time": "0:00:14", "remaining_time": "0:15:35", "throughput": 3563.59, "total_tokens": 51232}
{"current_steps": 130, "total_steps": 8260, "loss": 12.5596, "lr": 7.808716707021792e-06, "epoch": 0.15738498789346247, "percentage": 1.57, "elapsed_time": "0:00:14", "remaining_time": "0:15:34", "throughput": 3566.96, "total_tokens": 53312}
{"current_steps": 135, "total_steps": 8260, "loss": 12.5213, "lr": 8.111380145278451e-06, "epoch": 0.16343825665859565, "percentage": 1.63, "elapsed_time": "0:00:15", "remaining_time": "0:15:34", "throughput": 3576.79, "total_tokens": 55520}
{"current_steps": 140, "total_steps": 8260, "loss": 12.6219, "lr": 8.41404358353511e-06, "epoch": 0.1694915254237288, "percentage": 1.69, "elapsed_time": "0:00:16", "remaining_time": "0:15:30", "throughput": 3580.48, "total_tokens": 57440}
{"current_steps": 145, "total_steps": 8260, "loss": 12.4553, "lr": 8.716707021791767e-06, "epoch": 0.17554479418886199, "percentage": 1.76, "elapsed_time": "0:00:16", "remaining_time": "0:15:28", "throughput": 3590.77, "total_tokens": 59584}
{"current_steps": 150, "total_steps": 8260, "loss": 12.5005, "lr": 9.019370460048427e-06, "epoch": 0.18159806295399517, "percentage": 1.82, "elapsed_time": "0:00:17", "remaining_time": "0:15:27", "throughput": 3600.65, "total_tokens": 61760}
{"current_steps": 155, "total_steps": 8260, "loss": 12.2724, "lr": 9.322033898305085e-06, "epoch": 0.18765133171912832, "percentage": 1.88, "elapsed_time": "0:00:17", "remaining_time": "0:15:25", "throughput": 3602.3, "total_tokens": 63744}
{"current_steps": 160, "total_steps": 8260, "loss": 12.2898, "lr": 9.624697336561745e-06, "epoch": 0.1937046004842615, "percentage": 1.94, "elapsed_time": "0:00:18", "remaining_time": "0:15:24", "throughput": 3605.15, "total_tokens": 65856}
{"current_steps": 165, "total_steps": 8260, "loss": 12.0826, "lr": 9.927360774818403e-06, "epoch": 0.19975786924939468, "percentage": 2.0, "elapsed_time": "0:00:18", "remaining_time": "0:15:22", "throughput": 3606.07, "total_tokens": 67840}
{"current_steps": 170, "total_steps": 8260, "loss": 11.9301, "lr": 1.023002421307506e-05, "epoch": 0.20581113801452786, "percentage": 2.06, "elapsed_time": "0:00:19", "remaining_time": "0:15:21", "throughput": 3611.32, "total_tokens": 69920}
{"current_steps": 175, "total_steps": 8260, "loss": 11.993, "lr": 1.053268765133172e-05, "epoch": 0.211864406779661, "percentage": 2.12, "elapsed_time": "0:00:19", "remaining_time": "0:15:21", "throughput": 3613.28, "total_tokens": 72032}
{"current_steps": 180, "total_steps": 8260, "loss": 11.8034, "lr": 1.0835351089588378e-05, "epoch": 0.2179176755447942, "percentage": 2.18, "elapsed_time": "0:00:20", "remaining_time": "0:15:20", "throughput": 3614.04, "total_tokens": 74112}
{"current_steps": 185, "total_steps": 8260, "loss": 11.9466, "lr": 1.1138014527845036e-05, "epoch": 0.22397094430992737, "percentage": 2.24, "elapsed_time": "0:00:21", "remaining_time": "0:15:21", "throughput": 3615.01, "total_tokens": 76288}
{"current_steps": 190, "total_steps": 8260, "loss": 11.8707, "lr": 1.1440677966101696e-05, "epoch": 0.23002421307506055, "percentage": 2.3, "elapsed_time": "0:00:21", "remaining_time": "0:15:20", "throughput": 3620.51, "total_tokens": 78496}
{"current_steps": 195, "total_steps": 8260, "loss": 11.6647, "lr": 1.1743341404358354e-05, "epoch": 0.2360774818401937, "percentage": 2.36, "elapsed_time": "0:00:22", "remaining_time": "0:15:18", "throughput": 3621.64, "total_tokens": 80416}
{"current_steps": 200, "total_steps": 8260, "loss": 11.4604, "lr": 1.2046004842615012e-05, "epoch": 0.24213075060532688, "percentage": 2.42, "elapsed_time": "0:00:22", "remaining_time": "0:15:16", "throughput": 3623.5, "total_tokens": 82432}
{"current_steps": 205, "total_steps": 8260, "loss": 11.2802, "lr": 1.2348668280871672e-05, "epoch": 0.24818401937046006, "percentage": 2.48, "elapsed_time": "0:00:23", "remaining_time": "0:15:15", "throughput": 3623.29, "total_tokens": 84416}
{"current_steps": 210, "total_steps": 8260, "loss": 11.2422, "lr": 1.2651331719128328e-05, "epoch": 0.2542372881355932, "percentage": 2.54, "elapsed_time": "0:00:23", "remaining_time": "0:15:12", "throughput": 3625.05, "total_tokens": 86336}
{"current_steps": 215, "total_steps": 8260, "loss": 10.9734, "lr": 1.2953995157384988e-05, "epoch": 0.2602905569007264, "percentage": 2.6, "elapsed_time": "0:00:24", "remaining_time": "0:15:12", "throughput": 3622.82, "total_tokens": 88352}
{"current_steps": 220, "total_steps": 8260, "loss": 10.9734, "lr": 1.3256658595641647e-05, "epoch": 0.26634382566585957, "percentage": 2.66, "elapsed_time": "0:00:24", "remaining_time": "0:15:11", "throughput": 3626.54, "total_tokens": 90432}
{"current_steps": 225, "total_steps": 8260, "loss": 10.9114, "lr": 1.3559322033898305e-05, "epoch": 0.27239709443099275, "percentage": 2.72, "elapsed_time": "0:00:25", "remaining_time": "0:15:10", "throughput": 3629.47, "total_tokens": 92512}
{"current_steps": 230, "total_steps": 8260, "loss": 10.6533, "lr": 1.3861985472154965e-05, "epoch": 0.2784503631961259, "percentage": 2.78, "elapsed_time": "0:00:26", "remaining_time": "0:15:09", "throughput": 3630.54, "total_tokens": 94528}
{"current_steps": 235, "total_steps": 8260, "loss": 10.6038, "lr": 1.4164648910411623e-05, "epoch": 0.2845036319612591, "percentage": 2.85, "elapsed_time": "0:00:26", "remaining_time": "0:15:07", "throughput": 3632.64, "total_tokens": 96576}
{"current_steps": 240, "total_steps": 8260, "loss": 10.4043, "lr": 1.4467312348668283e-05, "epoch": 0.29055690072639223, "percentage": 2.91, "elapsed_time": "0:00:27", "remaining_time": "0:15:07", "throughput": 3632.25, "total_tokens": 98624}
{"current_steps": 245, "total_steps": 8260, "loss": 10.6318, "lr": 1.4769975786924939e-05, "epoch": 0.2966101694915254, "percentage": 2.97, "elapsed_time": "0:00:27", "remaining_time": "0:15:07", "throughput": 3636.4, "total_tokens": 100832}
{"current_steps": 250, "total_steps": 8260, "loss": 10.2144, "lr": 1.5072639225181599e-05, "epoch": 0.3026634382566586, "percentage": 3.03, "elapsed_time": "0:00:28", "remaining_time": "0:15:05", "throughput": 3638.1, "total_tokens": 102784}
{"current_steps": 255, "total_steps": 8260, "loss": 9.9711, "lr": 1.5375302663438258e-05, "epoch": 0.30871670702179177, "percentage": 3.09, "elapsed_time": "0:00:28", "remaining_time": "0:15:03", "throughput": 3641.74, "total_tokens": 104800}
{"current_steps": 260, "total_steps": 8260, "loss": 10.1949, "lr": 1.5677966101694916e-05, "epoch": 0.31476997578692495, "percentage": 3.15, "elapsed_time": "0:00:29", "remaining_time": "0:15:02", "throughput": 3644.56, "total_tokens": 106880}
{"current_steps": 265, "total_steps": 8260, "loss": 9.769, "lr": 1.5980629539951574e-05, "epoch": 0.32082324455205813, "percentage": 3.21, "elapsed_time": "0:00:29", "remaining_time": "0:15:01", "throughput": 3644.49, "total_tokens": 108960}
{"current_steps": 270, "total_steps": 8260, "loss": 9.3465, "lr": 1.6283292978208232e-05, "epoch": 0.3268765133171913, "percentage": 3.27, "elapsed_time": "0:00:30", "remaining_time": "0:14:59", "throughput": 3648.79, "total_tokens": 110944}
{"current_steps": 275, "total_steps": 8260, "loss": 9.471, "lr": 1.6585956416464894e-05, "epoch": 0.33292978208232443, "percentage": 3.33, "elapsed_time": "0:00:30", "remaining_time": "0:14:59", "throughput": 3650.21, "total_tokens": 113088}
{"current_steps": 280, "total_steps": 8260, "loss": 9.384, "lr": 1.6888619854721548e-05, "epoch": 0.3389830508474576, "percentage": 3.39, "elapsed_time": "0:00:31", "remaining_time": "0:14:57", "throughput": 3652.61, "total_tokens": 115072}
{"current_steps": 285, "total_steps": 8260, "loss": 9.3895, "lr": 1.719128329297821e-05, "epoch": 0.3450363196125908, "percentage": 3.45, "elapsed_time": "0:00:32", "remaining_time": "0:14:57", "throughput": 3658.03, "total_tokens": 117280}
{"current_steps": 290, "total_steps": 8260, "loss": 8.9285, "lr": 1.7493946731234868e-05, "epoch": 0.35108958837772397, "percentage": 3.51, "elapsed_time": "0:00:32", "remaining_time": "0:14:56", "throughput": 3661.33, "total_tokens": 119456}
{"current_steps": 295, "total_steps": 8260, "loss": 8.765, "lr": 1.7796610169491526e-05, "epoch": 0.35714285714285715, "percentage": 3.57, "elapsed_time": "0:00:33", "remaining_time": "0:14:55", "throughput": 3661.96, "total_tokens": 121472}
{"current_steps": 300, "total_steps": 8260, "loss": 8.5887, "lr": 1.8099273607748184e-05, "epoch": 0.36319612590799033, "percentage": 3.63, "elapsed_time": "0:00:33", "remaining_time": "0:14:56", "throughput": 3661.14, "total_tokens": 123648}
{"current_steps": 305, "total_steps": 8260, "loss": 8.5794, "lr": 1.8401937046004845e-05, "epoch": 0.3692493946731235, "percentage": 3.69, "elapsed_time": "0:00:34", "remaining_time": "0:14:54", "throughput": 3663.32, "total_tokens": 125632}
{"current_steps": 310, "total_steps": 8260, "loss": 8.5837, "lr": 1.8704600484261503e-05, "epoch": 0.37530266343825663, "percentage": 3.75, "elapsed_time": "0:00:34", "remaining_time": "0:14:53", "throughput": 3664.41, "total_tokens": 127680}
{"current_steps": 315, "total_steps": 8260, "loss": 8.06, "lr": 1.900726392251816e-05, "epoch": 0.3813559322033898, "percentage": 3.81, "elapsed_time": "0:00:35", "remaining_time": "0:14:53", "throughput": 3665.37, "total_tokens": 129856}
{"current_steps": 320, "total_steps": 8260, "loss": 8.0184, "lr": 1.930992736077482e-05, "epoch": 0.387409200968523, "percentage": 3.87, "elapsed_time": "0:00:35", "remaining_time": "0:14:52", "throughput": 3667.87, "total_tokens": 131872}
{"current_steps": 325, "total_steps": 8260, "loss": 8.2151, "lr": 1.9612590799031477e-05, "epoch": 0.3934624697336562, "percentage": 3.93, "elapsed_time": "0:00:36", "remaining_time": "0:14:51", "throughput": 3668.19, "total_tokens": 133984}
{"current_steps": 330, "total_steps": 8260, "loss": 7.8231, "lr": 1.9915254237288135e-05, "epoch": 0.39951573849878935, "percentage": 4.0, "elapsed_time": "0:00:37", "remaining_time": "0:14:51", "throughput": 3669.25, "total_tokens": 136128}
{"current_steps": 335, "total_steps": 8260, "loss": 7.4718, "lr": 2.0217917675544796e-05, "epoch": 0.40556900726392253, "percentage": 4.06, "elapsed_time": "0:00:37", "remaining_time": "0:14:50", "throughput": 3670.73, "total_tokens": 138208}
{"current_steps": 340, "total_steps": 8260, "loss": 7.2558, "lr": 2.0520581113801454e-05, "epoch": 0.4116222760290557, "percentage": 4.12, "elapsed_time": "0:00:38", "remaining_time": "0:14:50", "throughput": 3670.34, "total_tokens": 140288}
{"current_steps": 345, "total_steps": 8260, "loss": 7.1186, "lr": 2.0823244552058112e-05, "epoch": 0.41767554479418884, "percentage": 4.18, "elapsed_time": "0:00:38", "remaining_time": "0:14:49", "throughput": 3671.32, "total_tokens": 142336}
{"current_steps": 350, "total_steps": 8260, "loss": 6.71, "lr": 2.1125907990314774e-05, "epoch": 0.423728813559322, "percentage": 4.24, "elapsed_time": "0:00:39", "remaining_time": "0:14:49", "throughput": 3671.56, "total_tokens": 144448}
{"current_steps": 355, "total_steps": 8260, "loss": 7.2955, "lr": 2.1428571428571428e-05, "epoch": 0.4297820823244552, "percentage": 4.3, "elapsed_time": "0:00:39", "remaining_time": "0:14:48", "throughput": 3671.79, "total_tokens": 146464}
{"current_steps": 360, "total_steps": 8260, "loss": 6.6765, "lr": 2.1731234866828086e-05, "epoch": 0.4358353510895884, "percentage": 4.36, "elapsed_time": "0:00:40", "remaining_time": "0:14:47", "throughput": 3672.89, "total_tokens": 148512}
{"current_steps": 365, "total_steps": 8260, "loss": 6.5497, "lr": 2.2033898305084748e-05, "epoch": 0.44188861985472155, "percentage": 4.42, "elapsed_time": "0:00:40", "remaining_time": "0:14:45", "throughput": 3674.57, "total_tokens": 150496}
{"current_steps": 370, "total_steps": 8260, "loss": 6.3671, "lr": 2.2336561743341405e-05, "epoch": 0.44794188861985473, "percentage": 4.48, "elapsed_time": "0:00:41", "remaining_time": "0:14:45", "throughput": 3673.09, "total_tokens": 152608}
{"current_steps": 375, "total_steps": 8260, "loss": 6.2463, "lr": 2.2639225181598063e-05, "epoch": 0.4539951573849879, "percentage": 4.54, "elapsed_time": "0:00:42", "remaining_time": "0:14:45", "throughput": 3673.39, "total_tokens": 154624}
{"current_steps": 380, "total_steps": 8260, "loss": 6.4107, "lr": 2.2941888619854725e-05, "epoch": 0.4600484261501211, "percentage": 4.6, "elapsed_time": "0:00:42", "remaining_time": "0:14:43", "throughput": 3674.8, "total_tokens": 156608}
{"current_steps": 385, "total_steps": 8260, "loss": 6.3329, "lr": 2.3244552058111383e-05, "epoch": 0.4661016949152542, "percentage": 4.66, "elapsed_time": "0:00:43", "remaining_time": "0:14:42", "throughput": 3675.59, "total_tokens": 158560}
{"current_steps": 390, "total_steps": 8260, "loss": 6.2334, "lr": 2.3547215496368037e-05, "epoch": 0.4721549636803874, "percentage": 4.72, "elapsed_time": "0:00:43", "remaining_time": "0:14:41", "throughput": 3676.71, "total_tokens": 160640}
{"current_steps": 395, "total_steps": 8260, "loss": 6.096, "lr": 2.38498789346247e-05, "epoch": 0.4782082324455206, "percentage": 4.78, "elapsed_time": "0:00:44", "remaining_time": "0:14:41", "throughput": 3675.86, "total_tokens": 162784}
{"current_steps": 400, "total_steps": 8260, "loss": 5.8738, "lr": 2.4152542372881357e-05, "epoch": 0.48426150121065376, "percentage": 4.84, "elapsed_time": "0:00:44", "remaining_time": "0:14:41", "throughput": 3674.83, "total_tokens": 164832}
{"current_steps": 405, "total_steps": 8260, "loss": 5.7706, "lr": 2.4455205811138015e-05, "epoch": 0.49031476997578693, "percentage": 4.9, "elapsed_time": "0:00:45", "remaining_time": "0:14:40", "throughput": 3677.46, "total_tokens": 166880}
{"current_steps": 410, "total_steps": 8260, "loss": 5.8127, "lr": 2.4757869249394676e-05, "epoch": 0.4963680387409201, "percentage": 4.96, "elapsed_time": "0:00:45", "remaining_time": "0:14:39", "throughput": 3679.16, "total_tokens": 169088}
{"current_steps": 413, "total_steps": 8260, "eval_loss": 5.548953533172607, "epoch": 0.5, "percentage": 5.0, "elapsed_time": "0:00:50", "remaining_time": "0:16:07", "throughput": 3344.48, "total_tokens": 170336}
{"current_steps": 415, "total_steps": 8260, "loss": 5.6743, "lr": 2.5060532687651334e-05, "epoch": 0.5024213075060533, "percentage": 5.02, "elapsed_time": "0:00:52", "remaining_time": "0:16:24", "throughput": 3288.77, "total_tokens": 171232}
{"current_steps": 420, "total_steps": 8260, "loss": 5.8522, "lr": 2.536319612590799e-05, "epoch": 0.5084745762711864, "percentage": 5.08, "elapsed_time": "0:00:52", "remaining_time": "0:16:22", "throughput": 3293.62, "total_tokens": 173376}
{"current_steps": 425, "total_steps": 8260, "loss": 5.3048, "lr": 2.566585956416465e-05, "epoch": 0.5145278450363197, "percentage": 5.15, "elapsed_time": "0:00:53", "remaining_time": "0:16:20", "throughput": 3296.93, "total_tokens": 175424}
{"current_steps": 430, "total_steps": 8260, "loss": 5.5025, "lr": 2.5968523002421308e-05, "epoch": 0.5205811138014528, "percentage": 5.21, "elapsed_time": "0:00:53", "remaining_time": "0:16:19", "throughput": 3301.27, "total_tokens": 177664}
{"current_steps": 435, "total_steps": 8260, "loss": 5.1395, "lr": 2.627118644067797e-05, "epoch": 0.5266343825665859, "percentage": 5.27, "elapsed_time": "0:00:54", "remaining_time": "0:16:17", "throughput": 3304.77, "total_tokens": 179648}
{"current_steps": 440, "total_steps": 8260, "loss": 5.2626, "lr": 2.6573849878934624e-05, "epoch": 0.5326876513317191, "percentage": 5.33, "elapsed_time": "0:00:54", "remaining_time": "0:16:15", "throughput": 3308.54, "total_tokens": 181664}
{"current_steps": 445, "total_steps": 8260, "loss": 5.4184, "lr": 2.6876513317191282e-05, "epoch": 0.5387409200968523, "percentage": 5.39, "elapsed_time": "0:00:55", "remaining_time": "0:16:14", "throughput": 3313.33, "total_tokens": 183840}
{"current_steps": 450, "total_steps": 8260, "loss": 5.0142, "lr": 2.7179176755447943e-05, "epoch": 0.5447941888619855, "percentage": 5.45, "elapsed_time": "0:00:55", "remaining_time": "0:16:11", "throughput": 3317.02, "total_tokens": 185696}
{"current_steps": 455, "total_steps": 8260, "loss": 5.021, "lr": 2.74818401937046e-05, "epoch": 0.5508474576271186, "percentage": 5.51, "elapsed_time": "0:00:56", "remaining_time": "0:16:09", "throughput": 3320.7, "total_tokens": 187712}
{"current_steps": 460, "total_steps": 8260, "loss": 5.1033, "lr": 2.7784503631961263e-05, "epoch": 0.5569007263922519, "percentage": 5.57, "elapsed_time": "0:00:57", "remaining_time": "0:16:07", "throughput": 3324.25, "total_tokens": 189728}
{"current_steps": 465, "total_steps": 8260, "loss": 4.8707, "lr": 2.8087167070217917e-05, "epoch": 0.562953995157385, "percentage": 5.63, "elapsed_time": "0:00:57", "remaining_time": "0:16:06", "throughput": 3326.54, "total_tokens": 191744}
{"current_steps": 470, "total_steps": 8260, "loss": 4.7531, "lr": 2.838983050847458e-05, "epoch": 0.5690072639225182, "percentage": 5.69, "elapsed_time": "0:00:58", "remaining_time": "0:16:04", "throughput": 3330.46, "total_tokens": 193888}
{"current_steps": 475, "total_steps": 8260, "loss": 4.7276, "lr": 2.8692493946731237e-05, "epoch": 0.5750605326876513, "percentage": 5.75, "elapsed_time": "0:00:58", "remaining_time": "0:16:03", "throughput": 3333.49, "total_tokens": 195904}
{"current_steps": 480, "total_steps": 8260, "loss": 4.6746, "lr": 2.899515738498789e-05, "epoch": 0.5811138014527845, "percentage": 5.81, "elapsed_time": "0:00:59", "remaining_time": "0:16:01", "throughput": 3336.15, "total_tokens": 197952}
{"current_steps": 485, "total_steps": 8260, "loss": 4.5068, "lr": 2.9297820823244553e-05, "epoch": 0.5871670702179177, "percentage": 5.87, "elapsed_time": "0:00:59", "remaining_time": "0:15:59", "throughput": 3339.35, "total_tokens": 199968}
{"current_steps": 490, "total_steps": 8260, "loss": 4.422, "lr": 2.960048426150121e-05, "epoch": 0.5932203389830508, "percentage": 5.93, "elapsed_time": "0:01:00", "remaining_time": "0:15:58", "throughput": 3342.26, "total_tokens": 201984}
{"current_steps": 495, "total_steps": 8260, "loss": 4.3623, "lr": 2.9903147699757872e-05, "epoch": 0.5992736077481841, "percentage": 5.99, "elapsed_time": "0:01:00", "remaining_time": "0:15:56", "throughput": 3345.41, "total_tokens": 204000}
{"current_steps": 500, "total_steps": 8260, "loss": 4.4207, "lr": 3.0205811138014527e-05, "epoch": 0.6053268765133172, "percentage": 6.05, "elapsed_time": "0:01:01", "remaining_time": "0:15:55", "throughput": 3348.05, "total_tokens": 206080}
{"current_steps": 505, "total_steps": 8260, "loss": 4.3104, "lr": 3.050847457627119e-05, "epoch": 0.6113801452784504, "percentage": 6.11, "elapsed_time": "0:01:02", "remaining_time": "0:15:53", "throughput": 3349.71, "total_tokens": 208000}
{"current_steps": 510, "total_steps": 8260, "loss": 4.7137, "lr": 3.0811138014527846e-05, "epoch": 0.6174334140435835, "percentage": 6.17, "elapsed_time": "0:01:02", "remaining_time": "0:15:52", "throughput": 3355.13, "total_tokens": 210208}
{"current_steps": 515, "total_steps": 8260, "loss": 4.0774, "lr": 3.111380145278451e-05, "epoch": 0.6234866828087167, "percentage": 6.23, "elapsed_time": "0:01:03", "remaining_time": "0:15:50", "throughput": 3357.69, "total_tokens": 212288}
{"current_steps": 520, "total_steps": 8260, "loss": 4.1321, "lr": 3.141646489104116e-05, "epoch": 0.6295399515738499, "percentage": 6.3, "elapsed_time": "0:01:03", "remaining_time": "0:15:49", "throughput": 3361.6, "total_tokens": 214400}
{"current_steps": 525, "total_steps": 8260, "loss": 3.9528, "lr": 3.1719128329297823e-05, "epoch": 0.635593220338983, "percentage": 6.36, "elapsed_time": "0:01:04", "remaining_time": "0:15:48", "throughput": 3364.43, "total_tokens": 216512}
{"current_steps": 530, "total_steps": 8260, "loss": 3.9925, "lr": 3.2021791767554485e-05, "epoch": 0.6416464891041163, "percentage": 6.42, "elapsed_time": "0:01:04", "remaining_time": "0:15:46", "throughput": 3367.81, "total_tokens": 218496}
{"current_steps": 535, "total_steps": 8260, "loss": 3.7069, "lr": 3.232445520581114e-05, "epoch": 0.6476997578692494, "percentage": 6.48, "elapsed_time": "0:01:05", "remaining_time": "0:15:44", "throughput": 3370.26, "total_tokens": 220512}
{"current_steps": 540, "total_steps": 8260, "loss": 3.7106, "lr": 3.26271186440678e-05, "epoch": 0.6537530266343826, "percentage": 6.54, "elapsed_time": "0:01:05", "remaining_time": "0:15:43", "throughput": 3373.32, "total_tokens": 222560}
{"current_steps": 545, "total_steps": 8260, "loss": 3.5129, "lr": 3.2929782082324455e-05, "epoch": 0.6598062953995157, "percentage": 6.6, "elapsed_time": "0:01:06", "remaining_time": "0:15:41", "throughput": 3375.78, "total_tokens": 224480}
{"current_steps": 550, "total_steps": 8260, "loss": 3.572, "lr": 3.323244552058112e-05, "epoch": 0.6658595641646489, "percentage": 6.66, "elapsed_time": "0:01:07", "remaining_time": "0:15:39", "throughput": 3380.31, "total_tokens": 226560}
{"current_steps": 555, "total_steps": 8260, "loss": 3.3315, "lr": 3.353510895883777e-05, "epoch": 0.6719128329297821, "percentage": 6.72, "elapsed_time": "0:01:07", "remaining_time": "0:15:38", "throughput": 3381.98, "total_tokens": 228512}
{"current_steps": 560, "total_steps": 8260, "loss": 3.3729, "lr": 3.383777239709443e-05, "epoch": 0.6779661016949152, "percentage": 6.78, "elapsed_time": "0:01:08", "remaining_time": "0:15:36", "throughput": 3385.54, "total_tokens": 230528}
{"current_steps": 565, "total_steps": 8260, "loss": 3.3802, "lr": 3.4140435835351094e-05, "epoch": 0.6840193704600485, "percentage": 6.84, "elapsed_time": "0:01:08", "remaining_time": "0:15:35", "throughput": 3388.49, "total_tokens": 232768}
{"current_steps": 570, "total_steps": 8260, "loss": 3.077, "lr": 3.444309927360775e-05, "epoch": 0.6900726392251816, "percentage": 6.9, "elapsed_time": "0:01:09", "remaining_time": "0:15:34", "throughput": 3390.88, "total_tokens": 234784}
{"current_steps": 575, "total_steps": 8260, "loss": 3.0162, "lr": 3.474576271186441e-05, "epoch": 0.6961259079903148, "percentage": 6.96, "elapsed_time": "0:01:09", "remaining_time": "0:15:32", "throughput": 3393.52, "total_tokens": 236832}
{"current_steps": 580, "total_steps": 8260, "loss": 2.9916, "lr": 3.5048426150121065e-05, "epoch": 0.7021791767554479, "percentage": 7.02, "elapsed_time": "0:01:10", "remaining_time": "0:15:30", "throughput": 3396.84, "total_tokens": 238784}
{"current_steps": 585, "total_steps": 8260, "loss": 2.8985, "lr": 3.5351089588377726e-05, "epoch": 0.7082324455205811, "percentage": 7.08, "elapsed_time": "0:01:10", "remaining_time": "0:15:29", "throughput": 3399.56, "total_tokens": 240768}
{"current_steps": 590, "total_steps": 8260, "loss": 2.6847, "lr": 3.565375302663439e-05, "epoch": 0.7142857142857143, "percentage": 7.14, "elapsed_time": "0:01:11", "remaining_time": "0:15:28", "throughput": 3401.54, "total_tokens": 242848}
{"current_steps": 595, "total_steps": 8260, "loss": 2.6642, "lr": 3.595641646489104e-05, "epoch": 0.7203389830508474, "percentage": 7.2, "elapsed_time": "0:01:11", "remaining_time": "0:15:27", "throughput": 3404.15, "total_tokens": 244992}
{"current_steps": 600, "total_steps": 8260, "loss": 2.6654, "lr": 3.62590799031477e-05, "epoch": 0.7263922518159807, "percentage": 7.26, "elapsed_time": "0:01:12", "remaining_time": "0:15:25", "throughput": 3406.72, "total_tokens": 247040}
{"current_steps": 605, "total_steps": 8260, "loss": 2.6397, "lr": 3.656174334140436e-05, "epoch": 0.7324455205811138, "percentage": 7.32, "elapsed_time": "0:01:13", "remaining_time": "0:15:24", "throughput": 3409.32, "total_tokens": 249184}
{"current_steps": 610, "total_steps": 8260, "loss": 2.4848, "lr": 3.686440677966102e-05, "epoch": 0.738498789346247, "percentage": 7.38, "elapsed_time": "0:01:13", "remaining_time": "0:15:23", "throughput": 3411.31, "total_tokens": 251104}
{"current_steps": 615, "total_steps": 8260, "loss": 2.2126, "lr": 3.7167070217917674e-05, "epoch": 0.7445520581113801, "percentage": 7.45, "elapsed_time": "0:01:14", "remaining_time": "0:15:21", "throughput": 3413.59, "total_tokens": 253056}
{"current_steps": 620, "total_steps": 8260, "loss": 2.1763, "lr": 3.7469733656174335e-05, "epoch": 0.7506053268765133, "percentage": 7.51, "elapsed_time": "0:01:14", "remaining_time": "0:15:19", "throughput": 3415.83, "total_tokens": 255008}
{"current_steps": 625, "total_steps": 8260, "loss": 2.1731, "lr": 3.7772397094431e-05, "epoch": 0.7566585956416465, "percentage": 7.57, "elapsed_time": "0:01:15", "remaining_time": "0:15:18", "throughput": 3418.82, "total_tokens": 257120}
{"current_steps": 630, "total_steps": 8260, "loss": 1.9734, "lr": 3.807506053268765e-05, "epoch": 0.7627118644067796, "percentage": 7.63, "elapsed_time": "0:01:15", "remaining_time": "0:15:17", "throughput": 3421.07, "total_tokens": 259168}
{"current_steps": 635, "total_steps": 8260, "loss": 2.0066, "lr": 3.837772397094431e-05, "epoch": 0.7687651331719129, "percentage": 7.69, "elapsed_time": "0:01:16", "remaining_time": "0:15:16", "throughput": 3424.28, "total_tokens": 261216}
{"current_steps": 640, "total_steps": 8260, "loss": 1.8286, "lr": 3.868038740920097e-05, "epoch": 0.774818401937046, "percentage": 7.75, "elapsed_time": "0:01:16", "remaining_time": "0:15:14", "throughput": 3426.03, "total_tokens": 263136}
{"current_steps": 645, "total_steps": 8260, "loss": 1.6871, "lr": 3.898305084745763e-05, "epoch": 0.7808716707021792, "percentage": 7.81, "elapsed_time": "0:01:17", "remaining_time": "0:15:13", "throughput": 3428.11, "total_tokens": 265184}
{"current_steps": 650, "total_steps": 8260, "loss": 1.7286, "lr": 3.928571428571429e-05, "epoch": 0.7869249394673123, "percentage": 7.87, "elapsed_time": "0:01:17", "remaining_time": "0:15:12", "throughput": 3430.71, "total_tokens": 267264}
{"current_steps": 655, "total_steps": 8260, "loss": 1.6063, "lr": 3.958837772397095e-05, "epoch": 0.7929782082324455, "percentage": 7.93, "elapsed_time": "0:01:18", "remaining_time": "0:15:10", "throughput": 3432.74, "total_tokens": 269216}
{"current_steps": 660, "total_steps": 8260, "loss": 1.4514, "lr": 3.9891041162227606e-05, "epoch": 0.7990314769975787, "percentage": 7.99, "elapsed_time": "0:01:18", "remaining_time": "0:15:09", "throughput": 3434.92, "total_tokens": 271264}
{"current_steps": 665, "total_steps": 8260, "loss": 1.3309, "lr": 4.019370460048426e-05, "epoch": 0.8050847457627118, "percentage": 8.05, "elapsed_time": "0:01:19", "remaining_time": "0:15:08", "throughput": 3436.65, "total_tokens": 273280}
{"current_steps": 670, "total_steps": 8260, "loss": 1.2326, "lr": 4.049636803874092e-05, "epoch": 0.8111380145278451, "percentage": 8.11, "elapsed_time": "0:01:20", "remaining_time": "0:15:07", "throughput": 3438.49, "total_tokens": 275392}
{"current_steps": 675, "total_steps": 8260, "loss": 1.4568, "lr": 4.0799031476997577e-05, "epoch": 0.8171912832929782, "percentage": 8.17, "elapsed_time": "0:01:20", "remaining_time": "0:15:06", "throughput": 3439.6, "total_tokens": 277440}
{"current_steps": 680, "total_steps": 8260, "loss": 1.3072, "lr": 4.110169491525424e-05, "epoch": 0.8232445520581114, "percentage": 8.23, "elapsed_time": "0:01:21", "remaining_time": "0:15:05", "throughput": 3441.27, "total_tokens": 279456}
{"current_steps": 685, "total_steps": 8260, "loss": 1.1032, "lr": 4.14043583535109e-05, "epoch": 0.8292978208232445, "percentage": 8.29, "elapsed_time": "0:01:21", "remaining_time": "0:15:04", "throughput": 3442.7, "total_tokens": 281632}
{"current_steps": 690, "total_steps": 8260, "loss": 1.0202, "lr": 4.170702179176756e-05, "epoch": 0.8353510895883777, "percentage": 8.35, "elapsed_time": "0:01:22", "remaining_time": "0:15:03", "throughput": 3443.83, "total_tokens": 283680}
{"current_steps": 695, "total_steps": 8260, "loss": 1.1615, "lr": 4.2009685230024215e-05, "epoch": 0.8414043583535109, "percentage": 8.41, "elapsed_time": "0:01:22", "remaining_time": "0:15:02", "throughput": 3446.08, "total_tokens": 285760}
{"current_steps": 700, "total_steps": 8260, "loss": 1.0373, "lr": 4.231234866828087e-05, "epoch": 0.847457627118644, "percentage": 8.47, "elapsed_time": "0:01:23", "remaining_time": "0:15:01", "throughput": 3448.03, "total_tokens": 287904}
{"current_steps": 705, "total_steps": 8260, "loss": 0.8951, "lr": 4.261501210653753e-05, "epoch": 0.8535108958837773, "percentage": 8.54, "elapsed_time": "0:01:24", "remaining_time": "0:15:00", "throughput": 3450.03, "total_tokens": 289952}
{"current_steps": 710, "total_steps": 8260, "loss": 0.7963, "lr": 4.2917675544794186e-05, "epoch": 0.8595641646489104, "percentage": 8.6, "elapsed_time": "0:01:24", "remaining_time": "0:14:59", "throughput": 3451.52, "total_tokens": 291968}
{"current_steps": 715, "total_steps": 8260, "loss": 0.8121, "lr": 4.3220338983050854e-05, "epoch": 0.8656174334140436, "percentage": 8.66, "elapsed_time": "0:01:25", "remaining_time": "0:14:58", "throughput": 3453.67, "total_tokens": 293952}
{"current_steps": 720, "total_steps": 8260, "loss": 1.0699, "lr": 4.352300242130751e-05, "epoch": 0.8716707021791767, "percentage": 8.72, "elapsed_time": "0:01:25", "remaining_time": "0:14:57", "throughput": 3455.62, "total_tokens": 296192}
{"current_steps": 725, "total_steps": 8260, "loss": 0.8578, "lr": 4.382566585956417e-05, "epoch": 0.8777239709443099, "percentage": 8.78, "elapsed_time": "0:01:26", "remaining_time": "0:14:56", "throughput": 3457.46, "total_tokens": 298336}
{"current_steps": 730, "total_steps": 8260, "loss": 0.6948, "lr": 4.4128329297820825e-05, "epoch": 0.8837772397094431, "percentage": 8.84, "elapsed_time": "0:01:26", "remaining_time": "0:14:55", "throughput": 3458.93, "total_tokens": 300352}
{"current_steps": 735, "total_steps": 8260, "loss": 0.658, "lr": 4.443099273607748e-05, "epoch": 0.8898305084745762, "percentage": 8.9, "elapsed_time": "0:01:27", "remaining_time": "0:14:54", "throughput": 3460.96, "total_tokens": 302432}
{"current_steps": 740, "total_steps": 8260, "loss": 0.8707, "lr": 4.473365617433414e-05, "epoch": 0.8958837772397095, "percentage": 8.96, "elapsed_time": "0:01:27", "remaining_time": "0:14:53", "throughput": 3462.47, "total_tokens": 304544}
{"current_steps": 745, "total_steps": 8260, "loss": 0.6984, "lr": 4.50363196125908e-05, "epoch": 0.9019370460048426, "percentage": 9.02, "elapsed_time": "0:01:28", "remaining_time": "0:14:52", "throughput": 3464.15, "total_tokens": 306592}
{"current_steps": 750, "total_steps": 8260, "loss": 0.532, "lr": 4.533898305084746e-05, "epoch": 0.9079903147699758, "percentage": 9.08, "elapsed_time": "0:01:29", "remaining_time": "0:14:51", "throughput": 3465.21, "total_tokens": 308672}
{"current_steps": 755, "total_steps": 8260, "loss": 0.6657, "lr": 4.564164648910412e-05, "epoch": 0.914043583535109, "percentage": 9.14, "elapsed_time": "0:01:29", "remaining_time": "0:14:50", "throughput": 3467.06, "total_tokens": 310656}
{"current_steps": 760, "total_steps": 8260, "loss": 0.5989, "lr": 4.594430992736078e-05, "epoch": 0.9200968523002422, "percentage": 9.2, "elapsed_time": "0:01:30", "remaining_time": "0:14:49", "throughput": 3469.25, "total_tokens": 312768}
{"current_steps": 765, "total_steps": 8260, "loss": 0.5621, "lr": 4.6246973365617434e-05, "epoch": 0.9261501210653753, "percentage": 9.26, "elapsed_time": "0:01:30", "remaining_time": "0:14:48", "throughput": 3471.28, "total_tokens": 314880}
{"current_steps": 770, "total_steps": 8260, "loss": 0.6839, "lr": 4.654963680387409e-05, "epoch": 0.9322033898305084, "percentage": 9.32, "elapsed_time": "0:01:31", "remaining_time": "0:14:47", "throughput": 3472.36, "total_tokens": 316960}
{"current_steps": 775, "total_steps": 8260, "loss": 0.502, "lr": 4.685230024213076e-05, "epoch": 0.9382566585956417, "percentage": 9.38, "elapsed_time": "0:01:31", "remaining_time": "0:14:47", "throughput": 3473.77, "total_tokens": 319072}
{"current_steps": 780, "total_steps": 8260, "loss": 0.7602, "lr": 4.715496368038741e-05, "epoch": 0.9443099273607748, "percentage": 9.44, "elapsed_time": "0:01:32", "remaining_time": "0:14:46", "throughput": 3475.81, "total_tokens": 321248}
{"current_steps": 785, "total_steps": 8260, "loss": 0.4837, "lr": 4.745762711864407e-05, "epoch": 0.950363196125908, "percentage": 9.5, "elapsed_time": "0:01:32", "remaining_time": "0:14:45", "throughput": 3476.16, "total_tokens": 323168}
{"current_steps": 790, "total_steps": 8260, "loss": 0.6654, "lr": 4.776029055690073e-05, "epoch": 0.9564164648910412, "percentage": 9.56, "elapsed_time": "0:01:33", "remaining_time": "0:14:44", "throughput": 3477.3, "total_tokens": 325280}
{"current_steps": 795, "total_steps": 8260, "loss": 0.5072, "lr": 4.806295399515739e-05, "epoch": 0.9624697336561744, "percentage": 9.62, "elapsed_time": "0:01:34", "remaining_time": "0:14:43", "throughput": 3478.12, "total_tokens": 327264}
{"current_steps": 800, "total_steps": 8260, "loss": 0.5474, "lr": 4.836561743341404e-05, "epoch": 0.9685230024213075, "percentage": 9.69, "elapsed_time": "0:01:34", "remaining_time": "0:14:42", "throughput": 3479.66, "total_tokens": 329312}
{"current_steps": 805, "total_steps": 8260, "loss": 0.527, "lr": 4.8668280871670705e-05, "epoch": 0.9745762711864406, "percentage": 9.75, "elapsed_time": "0:01:35", "remaining_time": "0:14:41", "throughput": 3481.35, "total_tokens": 331296}
{"current_steps": 810, "total_steps": 8260, "loss": 0.5027, "lr": 4.8970944309927366e-05, "epoch": 0.9806295399515739, "percentage": 9.81, "elapsed_time": "0:01:35", "remaining_time": "0:14:40", "throughput": 3482.17, "total_tokens": 333280}
{"current_steps": 815, "total_steps": 8260, "loss": 0.5586, "lr": 4.927360774818402e-05, "epoch": 0.986682808716707, "percentage": 9.87, "elapsed_time": "0:01:36", "remaining_time": "0:14:39", "throughput": 3483.32, "total_tokens": 335392}
{"current_steps": 820, "total_steps": 8260, "loss": 0.4215, "lr": 4.957627118644068e-05, "epoch": 0.9927360774818402, "percentage": 9.93, "elapsed_time": "0:01:36", "remaining_time": "0:14:38", "throughput": 3484.6, "total_tokens": 337504}
{"current_steps": 825, "total_steps": 8260, "loss": 0.5016, "lr": 4.9878934624697336e-05, "epoch": 0.9987893462469734, "percentage": 9.99, "elapsed_time": "0:01:37", "remaining_time": "0:14:37", "throughput": 3486.36, "total_tokens": 339488}
{"current_steps": 826, "total_steps": 8260, "eval_loss": 0.46163591742515564, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:01:42", "remaining_time": "0:15:20", "throughput": 3321.25, "total_tokens": 339568}
{"current_steps": 830, "total_steps": 8260, "loss": 0.5053, "lr": 4.99999799087436e-05, "epoch": 1.0048426150121066, "percentage": 10.05, "elapsed_time": "0:01:43", "remaining_time": "0:15:28", "throughput": 3289.31, "total_tokens": 341168}
{"current_steps": 835, "total_steps": 8260, "loss": 0.484, "lr": 4.999985712896029e-05, "epoch": 1.0108958837772397, "percentage": 10.11, "elapsed_time": "0:01:44", "remaining_time": "0:15:27", "throughput": 3291.41, "total_tokens": 343216}
{"current_steps": 840, "total_steps": 8260, "loss": 0.357, "lr": 4.9999622731750315e-05, "epoch": 1.0169491525423728, "percentage": 10.17, "elapsed_time": "0:01:44", "remaining_time": "0:15:25", "throughput": 3293.79, "total_tokens": 345200}
{"current_steps": 845, "total_steps": 8260, "loss": 0.5291, "lr": 4.999927671816018e-05, "epoch": 1.023002421307506, "percentage": 10.23, "elapsed_time": "0:01:45", "remaining_time": "0:15:24", "throughput": 3296.32, "total_tokens": 347408}
{"current_steps": 850, "total_steps": 8260, "loss": 0.5035, "lr": 4.999881908973474e-05, "epoch": 1.0290556900726393, "percentage": 10.29, "elapsed_time": "0:01:45", "remaining_time": "0:15:23", "throughput": 3298.78, "total_tokens": 349488}
{"current_steps": 855, "total_steps": 8260, "loss": 0.4471, "lr": 4.9998249848517185e-05, "epoch": 1.0351089588377724, "percentage": 10.35, "elapsed_time": "0:01:46", "remaining_time": "0:15:22", "throughput": 3300.92, "total_tokens": 351536}
{"current_steps": 860, "total_steps": 8260, "loss": 0.3337, "lr": 4.999756899704902e-05, "epoch": 1.0411622276029056, "percentage": 10.41, "elapsed_time": "0:01:46", "remaining_time": "0:15:20", "throughput": 3303.7, "total_tokens": 353488}
{"current_steps": 865, "total_steps": 8260, "loss": 0.4115, "lr": 4.999677653837004e-05, "epoch": 1.0472154963680387, "percentage": 10.47, "elapsed_time": "0:01:47", "remaining_time": "0:15:19", "throughput": 3306.32, "total_tokens": 355504}
{"current_steps": 870, "total_steps": 8260, "loss": 0.4941, "lr": 4.999587247601837e-05, "epoch": 1.053268765133172, "percentage": 10.53, "elapsed_time": "0:01:48", "remaining_time": "0:15:17", "throughput": 3308.45, "total_tokens": 357456}
{"current_steps": 875, "total_steps": 8260, "loss": 0.5042, "lr": 4.99948568140304e-05, "epoch": 1.0593220338983051, "percentage": 10.59, "elapsed_time": "0:01:48", "remaining_time": "0:15:16", "throughput": 3311.12, "total_tokens": 359568}
{"current_steps": 880, "total_steps": 8260, "loss": 0.374, "lr": 4.999372955694077e-05, "epoch": 1.0653753026634383, "percentage": 10.65, "elapsed_time": "0:01:49", "remaining_time": "0:15:15", "throughput": 3313.3, "total_tokens": 361616}
{"current_steps": 885, "total_steps": 8260, "loss": 0.3439, "lr": 4.999249070978237e-05, "epoch": 1.0714285714285714, "percentage": 10.71, "elapsed_time": "0:01:49", "remaining_time": "0:15:14", "throughput": 3315.05, "total_tokens": 363696}
{"current_steps": 890, "total_steps": 8260, "loss": 0.3907, "lr": 4.9991140278086316e-05, "epoch": 1.0774818401937045, "percentage": 10.77, "elapsed_time": "0:01:50", "remaining_time": "0:15:13", "throughput": 3316.89, "total_tokens": 365712}
{"current_steps": 895, "total_steps": 8260, "loss": 0.4433, "lr": 4.998967826788191e-05, "epoch": 1.0835351089588379, "percentage": 10.84, "elapsed_time": "0:01:50", "remaining_time": "0:15:12", "throughput": 3318.81, "total_tokens": 367824}
{"current_steps": 900, "total_steps": 8260, "loss": 0.3428, "lr": 4.998810468569661e-05, "epoch": 1.089588377723971, "percentage": 10.9, "elapsed_time": "0:01:51", "remaining_time": "0:15:10", "throughput": 3321.29, "total_tokens": 369840}
{"current_steps": 905, "total_steps": 8260, "loss": 0.5121, "lr": 4.998641953855604e-05, "epoch": 1.0956416464891041, "percentage": 10.96, "elapsed_time": "0:01:51", "remaining_time": "0:15:09", "throughput": 3323.07, "total_tokens": 371952}
{"current_steps": 910, "total_steps": 8260, "loss": 0.4262, "lr": 4.9984622833983906e-05, "epoch": 1.1016949152542372, "percentage": 11.02, "elapsed_time": "0:01:52", "remaining_time": "0:15:08", "throughput": 3325.24, "total_tokens": 374032}
{"current_steps": 915, "total_steps": 8260, "loss": 0.5254, "lr": 4.9982714580002e-05, "epoch": 1.1077481840193704, "percentage": 11.08, "elapsed_time": "0:01:53", "remaining_time": "0:15:07", "throughput": 3327.84, "total_tokens": 376176}
{"current_steps": 920, "total_steps": 8260, "loss": 0.2685, "lr": 4.998069478513013e-05, "epoch": 1.1138014527845037, "percentage": 11.14, "elapsed_time": "0:01:53", "remaining_time": "0:15:06", "throughput": 3329.24, "total_tokens": 378224}
{"current_steps": 925, "total_steps": 8260, "loss": 0.399, "lr": 4.997856345838615e-05, "epoch": 1.1198547215496368, "percentage": 11.2, "elapsed_time": "0:01:54", "remaining_time": "0:15:05", "throughput": 3330.82, "total_tokens": 380304}
{"current_steps": 930, "total_steps": 8260, "loss": 0.4422, "lr": 4.997632060928582e-05, "epoch": 1.12590799031477, "percentage": 11.26, "elapsed_time": "0:01:54", "remaining_time": "0:15:04", "throughput": 3333.22, "total_tokens": 382416}
{"current_steps": 935, "total_steps": 8260, "loss": 0.4599, "lr": 4.997396624784284e-05, "epoch": 1.131961259079903, "percentage": 11.32, "elapsed_time": "0:01:55", "remaining_time": "0:15:03", "throughput": 3335.34, "total_tokens": 384592}
{"current_steps": 940, "total_steps": 8260, "loss": 0.275, "lr": 4.9971500384568795e-05, "epoch": 1.1380145278450362, "percentage": 11.38, "elapsed_time": "0:01:55", "remaining_time": "0:15:02", "throughput": 3337.16, "total_tokens": 386640}
{"current_steps": 945, "total_steps": 8260, "loss": 0.4313, "lr": 4.996892303047306e-05, "epoch": 1.1440677966101696, "percentage": 11.44, "elapsed_time": "0:01:56", "remaining_time": "0:15:01", "throughput": 3338.84, "total_tokens": 388752}
{"current_steps": 950, "total_steps": 8260, "loss": 0.4488, "lr": 4.996623419706282e-05, "epoch": 1.1501210653753027, "percentage": 11.5, "elapsed_time": "0:01:57", "remaining_time": "0:15:00", "throughput": 3341.06, "total_tokens": 390928}
{"current_steps": 955, "total_steps": 8260, "loss": 0.4004, "lr": 4.996343389634298e-05, "epoch": 1.1561743341404358, "percentage": 11.56, "elapsed_time": "0:01:57", "remaining_time": "0:14:59", "throughput": 3343.2, "total_tokens": 393104}
{"current_steps": 960, "total_steps": 8260, "loss": 0.3452, "lr": 4.996052214081608e-05, "epoch": 1.162227602905569, "percentage": 11.62, "elapsed_time": "0:01:58", "remaining_time": "0:14:58", "throughput": 3345.26, "total_tokens": 395184}
{"current_steps": 965, "total_steps": 8260, "loss": 0.3525, "lr": 4.995749894348232e-05, "epoch": 1.1682808716707023, "percentage": 11.68, "elapsed_time": "0:01:58", "remaining_time": "0:14:57", "throughput": 3346.73, "total_tokens": 397264}
{"current_steps": 970, "total_steps": 8260, "loss": 0.2774, "lr": 4.995436431783942e-05, "epoch": 1.1743341404358354, "percentage": 11.74, "elapsed_time": "0:01:59", "remaining_time": "0:14:56", "throughput": 3348.9, "total_tokens": 399376}
{"current_steps": 975, "total_steps": 8260, "loss": 0.384, "lr": 4.9951118277882636e-05, "epoch": 1.1803874092009685, "percentage": 11.8, "elapsed_time": "0:01:59", "remaining_time": "0:14:54", "throughput": 3350.94, "total_tokens": 401296}
{"current_steps": 980, "total_steps": 8260, "loss": 0.296, "lr": 4.994776083810463e-05, "epoch": 1.1864406779661016, "percentage": 11.86, "elapsed_time": "0:02:00", "remaining_time": "0:14:53", "throughput": 3352.13, "total_tokens": 403344}
{"current_steps": 985, "total_steps": 8260, "loss": 0.3192, "lr": 4.994429201349542e-05, "epoch": 1.192493946731235, "percentage": 11.92, "elapsed_time": "0:02:00", "remaining_time": "0:14:52", "throughput": 3354.61, "total_tokens": 405328}
{"current_steps": 990, "total_steps": 8260, "loss": 0.3341, "lr": 4.994071181954237e-05, "epoch": 1.1985472154963681, "percentage": 11.99, "elapsed_time": "0:02:01", "remaining_time": "0:14:51", "throughput": 3356.13, "total_tokens": 407344}
{"current_steps": 995, "total_steps": 8260, "loss": 0.3795, "lr": 4.993702027223004e-05, "epoch": 1.2046004842615012, "percentage": 12.05, "elapsed_time": "0:02:01", "remaining_time": "0:14:49", "throughput": 3357.85, "total_tokens": 409296}
{"current_steps": 1000, "total_steps": 8260, "loss": 0.5332, "lr": 4.9933217388040164e-05, "epoch": 1.2106537530266344, "percentage": 12.11, "elapsed_time": "0:02:02", "remaining_time": "0:14:49", "throughput": 3360.23, "total_tokens": 411536}
{"current_steps": 1005, "total_steps": 8260, "loss": 0.2651, "lr": 4.992930318395157e-05, "epoch": 1.2167070217917675, "percentage": 12.17, "elapsed_time": "0:02:03", "remaining_time": "0:14:48", "throughput": 3362.22, "total_tokens": 413712}
{"current_steps": 1010, "total_steps": 8260, "loss": 0.3058, "lr": 4.99252776774401e-05, "epoch": 1.2227602905569008, "percentage": 12.23, "elapsed_time": "0:02:03", "remaining_time": "0:14:47", "throughput": 3363.93, "total_tokens": 415856}
{"current_steps": 1015, "total_steps": 8260, "loss": 0.3491, "lr": 4.99211408864785e-05, "epoch": 1.228813559322034, "percentage": 12.29, "elapsed_time": "0:02:04", "remaining_time": "0:14:46", "throughput": 3365.44, "total_tokens": 417808}
{"current_steps": 1020, "total_steps": 8260, "loss": 0.2502, "lr": 4.991689282953642e-05, "epoch": 1.234866828087167, "percentage": 12.35, "elapsed_time": "0:02:04", "remaining_time": "0:14:44", "throughput": 3367.09, "total_tokens": 419760}
{"current_steps": 1025, "total_steps": 8260, "loss": 0.3296, "lr": 4.991253352558025e-05, "epoch": 1.2409200968523002, "percentage": 12.41, "elapsed_time": "0:02:05", "remaining_time": "0:14:43", "throughput": 3368.4, "total_tokens": 421840}
{"current_steps": 1030, "total_steps": 8260, "loss": 0.4066, "lr": 4.9908062994073056e-05, "epoch": 1.2469733656174333, "percentage": 12.47, "elapsed_time": "0:02:05", "remaining_time": "0:14:43", "throughput": 3370.12, "total_tokens": 424112}
{"current_steps": 1035, "total_steps": 8260, "loss": 0.2812, "lr": 4.990348125497454e-05, "epoch": 1.2530266343825667, "percentage": 12.53, "elapsed_time": "0:02:06", "remaining_time": "0:14:41", "throughput": 3372.65, "total_tokens": 426128}
{"current_steps": 1040, "total_steps": 8260, "loss": 0.3355, "lr": 4.9898788328740884e-05, "epoch": 1.2590799031476998, "percentage": 12.59, "elapsed_time": "0:02:06", "remaining_time": "0:14:40", "throughput": 3374.83, "total_tokens": 428272}
{"current_steps": 1045, "total_steps": 8260, "loss": 0.3011, "lr": 4.989398423632471e-05, "epoch": 1.265133171912833, "percentage": 12.65, "elapsed_time": "0:02:07", "remaining_time": "0:14:40", "throughput": 3375.86, "total_tokens": 430320}
{"current_steps": 1050, "total_steps": 8260, "loss": 0.2315, "lr": 4.988906899917496e-05, "epoch": 1.271186440677966, "percentage": 12.71, "elapsed_time": "0:02:07", "remaining_time": "0:14:38", "throughput": 3377.43, "total_tokens": 432272}
{"current_steps": 1055, "total_steps": 8260, "loss": 0.3452, "lr": 4.988404263923679e-05, "epoch": 1.2772397094430992, "percentage": 12.77, "elapsed_time": "0:02:08", "remaining_time": "0:14:37", "throughput": 3379.36, "total_tokens": 434384}
{"current_steps": 1060, "total_steps": 8260, "loss": 0.2505, "lr": 4.987890517895152e-05, "epoch": 1.2832929782082325, "percentage": 12.83, "elapsed_time": "0:02:09", "remaining_time": "0:14:36", "throughput": 3380.81, "total_tokens": 436336}
{"current_steps": 1065, "total_steps": 8260, "loss": 0.3073, "lr": 4.987365664125647e-05, "epoch": 1.2893462469733656, "percentage": 12.89, "elapsed_time": "0:02:09", "remaining_time": "0:14:35", "throughput": 3382.57, "total_tokens": 438352}
{"current_steps": 1070, "total_steps": 8260, "loss": 0.2981, "lr": 4.986829704958491e-05, "epoch": 1.2953995157384988, "percentage": 12.95, "elapsed_time": "0:02:10", "remaining_time": "0:14:34", "throughput": 3384.17, "total_tokens": 440336}
{"current_steps": 1075, "total_steps": 8260, "loss": 0.1811, "lr": 4.986282642786594e-05, "epoch": 1.3014527845036319, "percentage": 13.01, "elapsed_time": "0:02:10", "remaining_time": "0:14:33", "throughput": 3385.42, "total_tokens": 442352}
{"current_steps": 1080, "total_steps": 8260, "loss": 0.3333, "lr": 4.985724480052435e-05, "epoch": 1.307506053268765, "percentage": 13.08, "elapsed_time": "0:02:11", "remaining_time": "0:14:32", "throughput": 3386.81, "total_tokens": 444496}
{"current_steps": 1085, "total_steps": 8260, "loss": 0.3168, "lr": 4.985155219248057e-05, "epoch": 1.3135593220338984, "percentage": 13.14, "elapsed_time": "0:02:11", "remaining_time": "0:14:31", "throughput": 3387.92, "total_tokens": 446576}
{"current_steps": 1090, "total_steps": 8260, "loss": 0.3162, "lr": 4.9845748629150514e-05, "epoch": 1.3196125907990315, "percentage": 13.2, "elapsed_time": "0:02:12", "remaining_time": "0:14:30", "throughput": 3389.44, "total_tokens": 448656}
{"current_steps": 1095, "total_steps": 8260, "loss": 0.2642, "lr": 4.9839834136445485e-05, "epoch": 1.3256658595641646, "percentage": 13.26, "elapsed_time": "0:02:12", "remaining_time": "0:14:29", "throughput": 3390.65, "total_tokens": 450672}
{"current_steps": 1100, "total_steps": 8260, "loss": 0.2715, "lr": 4.983380874077204e-05, "epoch": 1.331719128329298, "percentage": 13.32, "elapsed_time": "0:02:13", "remaining_time": "0:14:28", "throughput": 3391.88, "total_tokens": 452784}
{"current_steps": 1105, "total_steps": 8260, "loss": 0.3167, "lr": 4.982767246903192e-05, "epoch": 1.3377723970944309, "percentage": 13.38, "elapsed_time": "0:02:14", "remaining_time": "0:14:27", "throughput": 3394.02, "total_tokens": 454864}
{"current_steps": 1110, "total_steps": 8260, "loss": 0.2723, "lr": 4.982142534862185e-05, "epoch": 1.3438256658595642, "percentage": 13.44, "elapsed_time": "0:02:14", "remaining_time": "0:14:27", "throughput": 3395.35, "total_tokens": 457072}
{"current_steps": 1115, "total_steps": 8260, "loss": 0.2981, "lr": 4.981506740743351e-05, "epoch": 1.3498789346246973, "percentage": 13.5, "elapsed_time": "0:02:15", "remaining_time": "0:14:26", "throughput": 3396.01, "total_tokens": 459184}
{"current_steps": 1120, "total_steps": 8260, "loss": 0.2494, "lr": 4.980859867385334e-05, "epoch": 1.3559322033898304, "percentage": 13.56, "elapsed_time": "0:02:15", "remaining_time": "0:14:25", "throughput": 3396.79, "total_tokens": 461136}
{"current_steps": 1125, "total_steps": 8260, "loss": 0.2137, "lr": 4.9802019176762434e-05, "epoch": 1.3619854721549638, "percentage": 13.62, "elapsed_time": "0:02:16", "remaining_time": "0:14:24", "throughput": 3397.98, "total_tokens": 463152}
{"current_steps": 1130, "total_steps": 8260, "loss": 0.2228, "lr": 4.9795328945536425e-05, "epoch": 1.368038740920097, "percentage": 13.68, "elapsed_time": "0:02:16", "remaining_time": "0:14:23", "throughput": 3399.02, "total_tokens": 465232}
{"current_steps": 1135, "total_steps": 8260, "loss": 0.2569, "lr": 4.978852801004534e-05, "epoch": 1.37409200968523, "percentage": 13.74, "elapsed_time": "0:02:17", "remaining_time": "0:14:22", "throughput": 3400.19, "total_tokens": 467248}
{"current_steps": 1140, "total_steps": 8260, "loss": 0.2992, "lr": 4.9781616400653464e-05, "epoch": 1.3801452784503632, "percentage": 13.8, "elapsed_time": "0:02:18", "remaining_time": "0:14:22", "throughput": 3401.18, "total_tokens": 469424}
{"current_steps": 1145, "total_steps": 8260, "loss": 0.3521, "lr": 4.9774594148219225e-05, "epoch": 1.3861985472154963, "percentage": 13.86, "elapsed_time": "0:02:18", "remaining_time": "0:14:20", "throughput": 3402.31, "total_tokens": 471344}
{"current_steps": 1150, "total_steps": 8260, "loss": 0.2699, "lr": 4.976746128409502e-05, "epoch": 1.3922518159806296, "percentage": 13.92, "elapsed_time": "0:02:19", "remaining_time": "0:14:19", "throughput": 3403.58, "total_tokens": 473296}
{"current_steps": 1155, "total_steps": 8260, "loss": 0.271, "lr": 4.9760217840127126e-05, "epoch": 1.3983050847457628, "percentage": 13.98, "elapsed_time": "0:02:19", "remaining_time": "0:14:18", "throughput": 3405.12, "total_tokens": 475312}
{"current_steps": 1160, "total_steps": 8260, "loss": 0.2154, "lr": 4.97528638486555e-05, "epoch": 1.4043583535108959, "percentage": 14.04, "elapsed_time": "0:02:20", "remaining_time": "0:14:17", "throughput": 3406.33, "total_tokens": 477360}
{"current_steps": 1165, "total_steps": 8260, "loss": 0.2866, "lr": 4.9745399342513666e-05, "epoch": 1.410411622276029, "percentage": 14.1, "elapsed_time": "0:02:20", "remaining_time": "0:14:16", "throughput": 3407.41, "total_tokens": 479376}
{"current_steps": 1170, "total_steps": 8260, "loss": 0.2401, "lr": 4.9737824355028584e-05, "epoch": 1.4164648910411621, "percentage": 14.16, "elapsed_time": "0:02:21", "remaining_time": "0:14:16", "throughput": 3408.23, "total_tokens": 481456}
{"current_steps": 1175, "total_steps": 8260, "loss": 0.2769, "lr": 4.973013892002047e-05, "epoch": 1.4225181598062955, "percentage": 14.23, "elapsed_time": "0:02:21", "remaining_time": "0:14:15", "throughput": 3409.25, "total_tokens": 483632}
{"current_steps": 1180, "total_steps": 8260, "loss": 0.2788, "lr": 4.9722343071802665e-05, "epoch": 1.4285714285714286, "percentage": 14.29, "elapsed_time": "0:02:22", "remaining_time": "0:14:14", "throughput": 3410.94, "total_tokens": 485680}
{"current_steps": 1185, "total_steps": 8260, "loss": 0.3334, "lr": 4.971443684518145e-05, "epoch": 1.4346246973365617, "percentage": 14.35, "elapsed_time": "0:02:22", "remaining_time": "0:14:13", "throughput": 3412.41, "total_tokens": 487792}
{"current_steps": 1190, "total_steps": 8260, "loss": 0.248, "lr": 4.970642027545596e-05, "epoch": 1.4406779661016949, "percentage": 14.41, "elapsed_time": "0:02:23", "remaining_time": "0:14:12", "throughput": 3413.67, "total_tokens": 489840}
{"current_steps": 1195, "total_steps": 8260, "loss": 0.2583, "lr": 4.969829339841793e-05, "epoch": 1.446731234866828, "percentage": 14.47, "elapsed_time": "0:02:24", "remaining_time": "0:14:11", "throughput": 3414.54, "total_tokens": 491824}
{"current_steps": 1200, "total_steps": 8260, "loss": 0.3112, "lr": 4.9690056250351626e-05, "epoch": 1.4527845036319613, "percentage": 14.53, "elapsed_time": "0:02:24", "remaining_time": "0:14:10", "throughput": 3415.77, "total_tokens": 493872}
{"current_steps": 1205, "total_steps": 8260, "loss": 0.2475, "lr": 4.9681708868033616e-05, "epoch": 1.4588377723970944, "percentage": 14.59, "elapsed_time": "0:02:25", "remaining_time": "0:14:09", "throughput": 3417.19, "total_tokens": 495952}
{"current_steps": 1210, "total_steps": 8260, "loss": 0.3119, "lr": 4.967325128873266e-05, "epoch": 1.4648910411622276, "percentage": 14.65, "elapsed_time": "0:02:25", "remaining_time": "0:14:08", "throughput": 3418.04, "total_tokens": 497936}
{"current_steps": 1215, "total_steps": 8260, "loss": 0.2317, "lr": 4.96646835502095e-05, "epoch": 1.4709443099273607, "percentage": 14.71, "elapsed_time": "0:02:26", "remaining_time": "0:14:07", "throughput": 3418.97, "total_tokens": 499856}
{"current_steps": 1220, "total_steps": 8260, "loss": 0.2481, "lr": 4.965600569071671e-05, "epoch": 1.4769975786924938, "percentage": 14.77, "elapsed_time": "0:02:26", "remaining_time": "0:14:06", "throughput": 3420.3, "total_tokens": 501840}
{"current_steps": 1225, "total_steps": 8260, "loss": 0.2594, "lr": 4.9647217748998534e-05, "epoch": 1.4830508474576272, "percentage": 14.83, "elapsed_time": "0:02:27", "remaining_time": "0:14:05", "throughput": 3421.3, "total_tokens": 503952}
{"current_steps": 1230, "total_steps": 8260, "loss": 0.2606, "lr": 4.963831976429067e-05, "epoch": 1.4891041162227603, "percentage": 14.89, "elapsed_time": "0:02:27", "remaining_time": "0:14:04", "throughput": 3422.62, "total_tokens": 505936}
{"current_steps": 1235, "total_steps": 8260, "loss": 0.2334, "lr": 4.9629311776320176e-05, "epoch": 1.4951573849878934, "percentage": 14.95, "elapsed_time": "0:02:28", "remaining_time": "0:14:04", "throughput": 3423.36, "total_tokens": 507984}
{"current_steps": 1239, "total_steps": 8260, "eval_loss": 0.2614063620567322, "epoch": 1.5, "percentage": 15.0, "elapsed_time": "0:02:33", "remaining_time": "0:14:29", "throughput": 3320.56, "total_tokens": 509456}
{"current_steps": 1240, "total_steps": 8260, "loss": 0.2787, "lr": 4.962019382530521e-05, "epoch": 1.5012106537530268, "percentage": 15.01, "elapsed_time": "0:02:34", "remaining_time": "0:14:34", "throughput": 3301.58, "total_tokens": 509840}
{"current_steps": 1245, "total_steps": 8260, "loss": 0.2242, "lr": 4.9610965951954885e-05, "epoch": 1.5072639225181597, "percentage": 15.07, "elapsed_time": "0:02:34", "remaining_time": "0:14:33", "throughput": 3302.72, "total_tokens": 511856}
{"current_steps": 1250, "total_steps": 8260, "loss": 0.2267, "lr": 4.960162819746911e-05, "epoch": 1.513317191283293, "percentage": 15.13, "elapsed_time": "0:02:35", "remaining_time": "0:14:32", "throughput": 3304.62, "total_tokens": 513904}
{"current_steps": 1255, "total_steps": 8260, "loss": 0.2478, "lr": 4.9592180603538366e-05, "epoch": 1.5193704600484261, "percentage": 15.19, "elapsed_time": "0:02:36", "remaining_time": "0:14:30", "throughput": 3305.84, "total_tokens": 515824}
{"current_steps": 1260, "total_steps": 8260, "loss": 0.2903, "lr": 4.958262321234353e-05, "epoch": 1.5254237288135593, "percentage": 15.25, "elapsed_time": "0:02:36", "remaining_time": "0:14:29", "throughput": 3307.54, "total_tokens": 517744}
{"current_steps": 1265, "total_steps": 8260, "loss": 0.1913, "lr": 4.95729560665557e-05, "epoch": 1.5314769975786926, "percentage": 15.31, "elapsed_time": "0:02:37", "remaining_time": "0:14:28", "throughput": 3308.75, "total_tokens": 519824}
{"current_steps": 1270, "total_steps": 8260, "loss": 0.3283, "lr": 4.956317920933602e-05, "epoch": 1.5375302663438255, "percentage": 15.38, "elapsed_time": "0:02:37", "remaining_time": "0:14:27", "throughput": 3310.12, "total_tokens": 521936}
{"current_steps": 1275, "total_steps": 8260, "loss": 0.3203, "lr": 4.955329268433543e-05, "epoch": 1.5435835351089588, "percentage": 15.44, "elapsed_time": "0:02:38", "remaining_time": "0:14:27", "throughput": 3312.11, "total_tokens": 524176}
{"current_steps": 1280, "total_steps": 8260, "loss": 0.3194, "lr": 4.954329653569452e-05, "epoch": 1.549636803874092, "percentage": 15.5, "elapsed_time": "0:02:38", "remaining_time": "0:14:26", "throughput": 3313.33, "total_tokens": 526256}
{"current_steps": 1285, "total_steps": 8260, "loss": 0.3004, "lr": 4.953319080804333e-05, "epoch": 1.555690072639225, "percentage": 15.56, "elapsed_time": "0:02:39", "remaining_time": "0:14:25", "throughput": 3315.11, "total_tokens": 528304}
{"current_steps": 1290, "total_steps": 8260, "loss": 0.2989, "lr": 4.952297554650113e-05, "epoch": 1.5617433414043584, "percentage": 15.62, "elapsed_time": "0:02:39", "remaining_time": "0:14:24", "throughput": 3316.33, "total_tokens": 530320}
{"current_steps": 1295, "total_steps": 8260, "loss": 0.2392, "lr": 4.9512650796676216e-05, "epoch": 1.5677966101694916, "percentage": 15.68, "elapsed_time": "0:02:40", "remaining_time": "0:14:23", "throughput": 3317.54, "total_tokens": 532400}
{"current_steps": 1300, "total_steps": 8260, "loss": 0.2752, "lr": 4.9502216604665744e-05, "epoch": 1.5738498789346247, "percentage": 15.74, "elapsed_time": "0:02:41", "remaining_time": "0:14:21", "throughput": 3319.24, "total_tokens": 534416}
{"current_steps": 1305, "total_steps": 8260, "loss": 0.2915, "lr": 4.949167301705548e-05, "epoch": 1.5799031476997578, "percentage": 15.8, "elapsed_time": "0:02:41", "remaining_time": "0:14:21", "throughput": 3320.85, "total_tokens": 536592}
{"current_steps": 1310, "total_steps": 8260, "loss": 0.1677, "lr": 4.948102008091962e-05, "epoch": 1.585956416464891, "percentage": 15.86, "elapsed_time": "0:02:42", "remaining_time": "0:14:20", "throughput": 3322.57, "total_tokens": 538704}
{"current_steps": 1315, "total_steps": 8260, "loss": 0.3603, "lr": 4.947025784382057e-05, "epoch": 1.5920096852300243, "percentage": 15.92, "elapsed_time": "0:02:42", "remaining_time": "0:14:19", "throughput": 3323.9, "total_tokens": 540912}
{"current_steps": 1320, "total_steps": 8260, "loss": 0.2694, "lr": 4.9459386353808736e-05, "epoch": 1.5980629539951574, "percentage": 15.98, "elapsed_time": "0:02:43", "remaining_time": "0:14:18", "throughput": 3325.2, "total_tokens": 542864}
{"current_steps": 1325, "total_steps": 8260, "loss": 0.2722, "lr": 4.944840565942229e-05, "epoch": 1.6041162227602905, "percentage": 16.04, "elapsed_time": "0:02:43", "remaining_time": "0:14:17", "throughput": 3326.11, "total_tokens": 544816}
{"current_steps": 1330, "total_steps": 8260, "loss": 0.265, "lr": 4.943731580968701e-05, "epoch": 1.6101694915254239, "percentage": 16.1, "elapsed_time": "0:02:44", "remaining_time": "0:14:16", "throughput": 3327.91, "total_tokens": 546864}
{"current_steps": 1335, "total_steps": 8260, "loss": 0.2585, "lr": 4.942611685411598e-05, "epoch": 1.6162227602905568, "percentage": 16.16, "elapsed_time": "0:02:44", "remaining_time": "0:14:15", "throughput": 3329.27, "total_tokens": 548912}
{"current_steps": 1340, "total_steps": 8260, "loss": 0.275, "lr": 4.9414808842709435e-05, "epoch": 1.6222760290556901, "percentage": 16.22, "elapsed_time": "0:02:45", "remaining_time": "0:14:14", "throughput": 3330.95, "total_tokens": 551056}
{"current_steps": 1345, "total_steps": 8260, "loss": 0.257, "lr": 4.940339182595451e-05, "epoch": 1.6283292978208233, "percentage": 16.28, "elapsed_time": "0:02:45", "remaining_time": "0:14:13", "throughput": 3332.52, "total_tokens": 553072}
{"current_steps": 1350, "total_steps": 8260, "loss": 0.2395, "lr": 4.9391865854825015e-05, "epoch": 1.6343825665859564, "percentage": 16.34, "elapsed_time": "0:02:46", "remaining_time": "0:14:12", "throughput": 3333.66, "total_tokens": 555248}
{"current_steps": 1355, "total_steps": 8260, "loss": 0.2474, "lr": 4.938023098078122e-05, "epoch": 1.6404358353510897, "percentage": 16.4, "elapsed_time": "0:02:47", "remaining_time": "0:14:11", "throughput": 3335.3, "total_tokens": 557360}
{"current_steps": 1360, "total_steps": 8260, "loss": 0.2685, "lr": 4.93684872557696e-05, "epoch": 1.6464891041162226, "percentage": 16.46, "elapsed_time": "0:02:47", "remaining_time": "0:14:10", "throughput": 3336.44, "total_tokens": 559376}
{"current_steps": 1365, "total_steps": 8260, "loss": 0.1913, "lr": 4.935663473222264e-05, "epoch": 1.652542372881356, "percentage": 16.53, "elapsed_time": "0:02:48", "remaining_time": "0:14:09", "throughput": 3337.92, "total_tokens": 561296}
{"current_steps": 1370, "total_steps": 8260, "loss": 0.2767, "lr": 4.934467346305856e-05, "epoch": 1.658595641646489, "percentage": 16.59, "elapsed_time": "0:02:48", "remaining_time": "0:14:08", "throughput": 3339.35, "total_tokens": 563376}
{"current_steps": 1375, "total_steps": 8260, "loss": 0.1696, "lr": 4.933260350168112e-05, "epoch": 1.6646489104116222, "percentage": 16.65, "elapsed_time": "0:02:49", "remaining_time": "0:14:07", "throughput": 3340.59, "total_tokens": 565424}
{"current_steps": 1380, "total_steps": 8260, "loss": 0.2666, "lr": 4.932042490197933e-05, "epoch": 1.6707021791767556, "percentage": 16.71, "elapsed_time": "0:02:49", "remaining_time": "0:14:06", "throughput": 3341.54, "total_tokens": 567504}
{"current_steps": 1385, "total_steps": 8260, "loss": 0.3846, "lr": 4.930813771832728e-05, "epoch": 1.6767554479418885, "percentage": 16.77, "elapsed_time": "0:02:50", "remaining_time": "0:14:06", "throughput": 3342.92, "total_tokens": 569744}
{"current_steps": 1390, "total_steps": 8260, "loss": 0.1865, "lr": 4.929574200558382e-05, "epoch": 1.6828087167070218, "percentage": 16.83, "elapsed_time": "0:02:51", "remaining_time": "0:14:05", "throughput": 3343.8, "total_tokens": 571888}
{"current_steps": 1395, "total_steps": 8260, "loss": 0.2233, "lr": 4.928323781909239e-05, "epoch": 1.688861985472155, "percentage": 16.89, "elapsed_time": "0:02:51", "remaining_time": "0:14:04", "throughput": 3345.34, "total_tokens": 574000}
{"current_steps": 1400, "total_steps": 8260, "loss": 0.2334, "lr": 4.927062521468068e-05, "epoch": 1.694915254237288, "percentage": 16.95, "elapsed_time": "0:02:52", "remaining_time": "0:14:03", "throughput": 3346.7, "total_tokens": 575984}
{"current_steps": 1405, "total_steps": 8260, "loss": 0.2089, "lr": 4.92579042486605e-05, "epoch": 1.7009685230024214, "percentage": 17.01, "elapsed_time": "0:02:52", "remaining_time": "0:14:02", "throughput": 3347.64, "total_tokens": 577968}
{"current_steps": 1410, "total_steps": 8260, "loss": 0.2712, "lr": 4.924507497782743e-05, "epoch": 1.7070217917675545, "percentage": 17.07, "elapsed_time": "0:02:53", "remaining_time": "0:14:01", "throughput": 3348.94, "total_tokens": 580112}
{"current_steps": 1415, "total_steps": 8260, "loss": 0.3309, "lr": 4.923213745946059e-05, "epoch": 1.7130750605326877, "percentage": 17.13, "elapsed_time": "0:02:53", "remaining_time": "0:14:00", "throughput": 3350.59, "total_tokens": 582160}
{"current_steps": 1420, "total_steps": 8260, "loss": 0.4003, "lr": 4.921909175132242e-05, "epoch": 1.7191283292978208, "percentage": 17.19, "elapsed_time": "0:02:54", "remaining_time": "0:13:59", "throughput": 3352.25, "total_tokens": 584304}
{"current_steps": 1425, "total_steps": 8260, "loss": 0.2769, "lr": 4.920593791165839e-05, "epoch": 1.725181598062954, "percentage": 17.25, "elapsed_time": "0:02:54", "remaining_time": "0:13:58", "throughput": 3353.05, "total_tokens": 586096}
{"current_steps": 1430, "total_steps": 8260, "loss": 0.2079, "lr": 4.919267599919674e-05, "epoch": 1.7312348668280872, "percentage": 17.31, "elapsed_time": "0:02:55", "remaining_time": "0:13:57", "throughput": 3353.93, "total_tokens": 588240}
{"current_steps": 1435, "total_steps": 8260, "loss": 0.2321, "lr": 4.917930607314823e-05, "epoch": 1.7372881355932204, "percentage": 17.37, "elapsed_time": "0:02:55", "remaining_time": "0:13:56", "throughput": 3355.59, "total_tokens": 590480}
{"current_steps": 1440, "total_steps": 8260, "loss": 0.2686, "lr": 4.916582819320588e-05, "epoch": 1.7433414043583535, "percentage": 17.43, "elapsed_time": "0:02:56", "remaining_time": "0:13:56", "throughput": 3356.68, "total_tokens": 592528}
{"current_steps": 1445, "total_steps": 8260, "loss": 0.2943, "lr": 4.915224241954467e-05, "epoch": 1.7493946731234868, "percentage": 17.49, "elapsed_time": "0:02:57", "remaining_time": "0:13:54", "throughput": 3357.89, "total_tokens": 594416}
{"current_steps": 1450, "total_steps": 8260, "loss": 0.1879, "lr": 4.9138548812821316e-05, "epoch": 1.7554479418886197, "percentage": 17.55, "elapsed_time": "0:02:57", "remaining_time": "0:13:54", "throughput": 3358.54, "total_tokens": 596432}
{"current_steps": 1455, "total_steps": 8260, "loss": 0.2289, "lr": 4.912474743417399e-05, "epoch": 1.761501210653753, "percentage": 17.62, "elapsed_time": "0:02:58", "remaining_time": "0:13:53", "throughput": 3359.51, "total_tokens": 598352}
{"current_steps": 1460, "total_steps": 8260, "loss": 0.2596, "lr": 4.911083834522199e-05, "epoch": 1.7675544794188862, "percentage": 17.68, "elapsed_time": "0:02:58", "remaining_time": "0:13:52", "throughput": 3360.82, "total_tokens": 600432}
{"current_steps": 1465, "total_steps": 8260, "loss": 0.3007, "lr": 4.909682160806556e-05, "epoch": 1.7736077481840193, "percentage": 17.74, "elapsed_time": "0:02:59", "remaining_time": "0:13:51", "throughput": 3362.39, "total_tokens": 602480}
{"current_steps": 1470, "total_steps": 8260, "loss": 0.198, "lr": 4.908269728528553e-05, "epoch": 1.7796610169491527, "percentage": 17.8, "elapsed_time": "0:02:59", "remaining_time": "0:13:50", "throughput": 3363.42, "total_tokens": 604496}
{"current_steps": 1475, "total_steps": 8260, "loss": 0.2296, "lr": 4.90684654399431e-05, "epoch": 1.7857142857142856, "percentage": 17.86, "elapsed_time": "0:03:00", "remaining_time": "0:13:49", "throughput": 3364.67, "total_tokens": 606672}
{"current_steps": 1480, "total_steps": 8260, "loss": 0.1677, "lr": 4.9054126135579495e-05, "epoch": 1.791767554479419, "percentage": 17.92, "elapsed_time": "0:03:00", "remaining_time": "0:13:48", "throughput": 3365.56, "total_tokens": 608752}
{"current_steps": 1485, "total_steps": 8260, "loss": 0.2834, "lr": 4.9039679436215734e-05, "epoch": 1.797820823244552, "percentage": 17.98, "elapsed_time": "0:03:01", "remaining_time": "0:13:47", "throughput": 3366.94, "total_tokens": 610608}
{"current_steps": 1490, "total_steps": 8260, "loss": 0.3047, "lr": 4.9025125406352335e-05, "epoch": 1.8038740920096852, "percentage": 18.04, "elapsed_time": "0:03:01", "remaining_time": "0:13:46", "throughput": 3367.77, "total_tokens": 612688}
{"current_steps": 1495, "total_steps": 8260, "loss": 0.179, "lr": 4.9010464110968976e-05, "epoch": 1.8099273607748185, "percentage": 18.1, "elapsed_time": "0:03:02", "remaining_time": "0:13:45", "throughput": 3369.14, "total_tokens": 614704}
{"current_steps": 1500, "total_steps": 8260, "loss": 0.2198, "lr": 4.89956956155243e-05, "epoch": 1.8159806295399514, "percentage": 18.16, "elapsed_time": "0:03:03", "remaining_time": "0:13:44", "throughput": 3370.0, "total_tokens": 616784}
{"current_steps": 1505, "total_steps": 8260, "loss": 0.2524, "lr": 4.898081998595555e-05, "epoch": 1.8220338983050848, "percentage": 18.22, "elapsed_time": "0:03:03", "remaining_time": "0:13:43", "throughput": 3371.07, "total_tokens": 618736}
{"current_steps": 1510, "total_steps": 8260, "loss": 0.2251, "lr": 4.8965837288678253e-05, "epoch": 1.828087167070218, "percentage": 18.28, "elapsed_time": "0:03:04", "remaining_time": "0:13:43", "throughput": 3372.07, "total_tokens": 620944}
{"current_steps": 1515, "total_steps": 8260, "loss": 0.2785, "lr": 4.895074759058601e-05, "epoch": 1.834140435835351, "percentage": 18.34, "elapsed_time": "0:03:04", "remaining_time": "0:13:42", "throughput": 3373.29, "total_tokens": 623120}
{"current_steps": 1520, "total_steps": 8260, "loss": 0.1872, "lr": 4.893555095905014e-05, "epoch": 1.8401937046004844, "percentage": 18.4, "elapsed_time": "0:03:05", "remaining_time": "0:13:41", "throughput": 3374.18, "total_tokens": 625136}
{"current_steps": 1525, "total_steps": 8260, "loss": 0.2694, "lr": 4.892024746191939e-05, "epoch": 1.8462469733656173, "percentage": 18.46, "elapsed_time": "0:03:05", "remaining_time": "0:13:40", "throughput": 3374.91, "total_tokens": 627216}
{"current_steps": 1530, "total_steps": 8260, "loss": 0.2124, "lr": 4.890483716751961e-05, "epoch": 1.8523002421307506, "percentage": 18.52, "elapsed_time": "0:03:06", "remaining_time": "0:13:39", "throughput": 3375.72, "total_tokens": 629136}
{"current_steps": 1535, "total_steps": 8260, "loss": 0.2333, "lr": 4.888932014465352e-05, "epoch": 1.8583535108958837, "percentage": 18.58, "elapsed_time": "0:03:06", "remaining_time": "0:13:39", "throughput": 3376.57, "total_tokens": 631216}
{"current_steps": 1540, "total_steps": 8260, "loss": 0.2444, "lr": 4.8873696462600303e-05, "epoch": 1.8644067796610169, "percentage": 18.64, "elapsed_time": "0:03:07", "remaining_time": "0:13:38", "throughput": 3377.86, "total_tokens": 633328}
{"current_steps": 1545, "total_steps": 8260, "loss": 0.2802, "lr": 4.8857966191115365e-05, "epoch": 1.8704600484261502, "percentage": 18.7, "elapsed_time": "0:03:08", "remaining_time": "0:13:37", "throughput": 3378.62, "total_tokens": 635440}
{"current_steps": 1550, "total_steps": 8260, "loss": 0.2128, "lr": 4.884212940043001e-05, "epoch": 1.8765133171912833, "percentage": 18.77, "elapsed_time": "0:03:08", "remaining_time": "0:13:36", "throughput": 3379.86, "total_tokens": 637360}
{"current_steps": 1555, "total_steps": 8260, "loss": 0.2471, "lr": 4.882618616125111e-05, "epoch": 1.8825665859564165, "percentage": 18.83, "elapsed_time": "0:03:09", "remaining_time": "0:13:35", "throughput": 3380.73, "total_tokens": 639280}
{"current_steps": 1560, "total_steps": 8260, "loss": 0.21, "lr": 4.881013654476081e-05, "epoch": 1.8886198547215496, "percentage": 18.89, "elapsed_time": "0:03:09", "remaining_time": "0:13:34", "throughput": 3381.73, "total_tokens": 641328}
{"current_steps": 1565, "total_steps": 8260, "loss": 0.2287, "lr": 4.8793980622616195e-05, "epoch": 1.8946731234866827, "percentage": 18.95, "elapsed_time": "0:03:10", "remaining_time": "0:13:33", "throughput": 3382.75, "total_tokens": 643376}
{"current_steps": 1570, "total_steps": 8260, "loss": 0.2396, "lr": 4.877771846694897e-05, "epoch": 1.900726392251816, "percentage": 19.01, "elapsed_time": "0:03:10", "remaining_time": "0:13:32", "throughput": 3383.67, "total_tokens": 645488}
{"current_steps": 1575, "total_steps": 8260, "loss": 0.2261, "lr": 4.876135015036515e-05, "epoch": 1.9067796610169492, "percentage": 19.07, "elapsed_time": "0:03:11", "remaining_time": "0:13:32", "throughput": 3384.23, "total_tokens": 647600}
{"current_steps": 1580, "total_steps": 8260, "loss": 0.1929, "lr": 4.874487574594473e-05, "epoch": 1.9128329297820823, "percentage": 19.13, "elapsed_time": "0:03:11", "remaining_time": "0:13:31", "throughput": 3384.86, "total_tokens": 649648}
{"current_steps": 1585, "total_steps": 8260, "loss": 0.2879, "lr": 4.872829532724136e-05, "epoch": 1.9188861985472156, "percentage": 19.19, "elapsed_time": "0:03:12", "remaining_time": "0:13:30", "throughput": 3386.1, "total_tokens": 651760}
{"current_steps": 1590, "total_steps": 8260, "loss": 0.2402, "lr": 4.8711608968282e-05, "epoch": 1.9249394673123486, "percentage": 19.25, "elapsed_time": "0:03:13", "remaining_time": "0:13:29", "throughput": 3387.21, "total_tokens": 653840}
{"current_steps": 1595, "total_steps": 8260, "loss": 0.2143, "lr": 4.8694816743566616e-05, "epoch": 1.930992736077482, "percentage": 19.31, "elapsed_time": "0:03:13", "remaining_time": "0:13:28", "throughput": 3387.83, "total_tokens": 655888}
{"current_steps": 1600, "total_steps": 8260, "loss": 0.2147, "lr": 4.867791872806785e-05, "epoch": 1.937046004842615, "percentage": 19.37, "elapsed_time": "0:03:14", "remaining_time": "0:13:27", "throughput": 3388.85, "total_tokens": 657776}
{"current_steps": 1605, "total_steps": 8260, "loss": 0.2042, "lr": 4.8660914997230624e-05, "epoch": 1.9430992736077481, "percentage": 19.43, "elapsed_time": "0:03:14", "remaining_time": "0:13:27", "throughput": 3389.57, "total_tokens": 659952}
{"current_steps": 1610, "total_steps": 8260, "loss": 0.2411, "lr": 4.8643805626971894e-05, "epoch": 1.9491525423728815, "percentage": 19.49, "elapsed_time": "0:03:15", "remaining_time": "0:13:26", "throughput": 3390.38, "total_tokens": 661968}
{"current_steps": 1615, "total_steps": 8260, "loss": 0.2589, "lr": 4.862659069368026e-05, "epoch": 1.9552058111380144, "percentage": 19.55, "elapsed_time": "0:03:15", "remaining_time": "0:13:25", "throughput": 3390.79, "total_tokens": 664048}
{"current_steps": 1620, "total_steps": 8260, "loss": 0.185, "lr": 4.8609270274215614e-05, "epoch": 1.9612590799031477, "percentage": 19.61, "elapsed_time": "0:03:16", "remaining_time": "0:13:24", "throughput": 3391.57, "total_tokens": 665968}
{"current_steps": 1625, "total_steps": 8260, "loss": 0.248, "lr": 4.859184444590882e-05, "epoch": 1.9673123486682809, "percentage": 19.67, "elapsed_time": "0:03:16", "remaining_time": "0:13:23", "throughput": 3392.54, "total_tokens": 667920}
{"current_steps": 1630, "total_steps": 8260, "loss": 0.1896, "lr": 4.857431328656137e-05, "epoch": 1.973365617433414, "percentage": 19.73, "elapsed_time": "0:03:17", "remaining_time": "0:13:23", "throughput": 3393.47, "total_tokens": 669968}
{"current_steps": 1635, "total_steps": 8260, "loss": 0.183, "lr": 4.855667687444504e-05, "epoch": 1.9794188861985473, "percentage": 19.79, "elapsed_time": "0:03:17", "remaining_time": "0:13:22", "throughput": 3394.43, "total_tokens": 672016}
{"current_steps": 1640, "total_steps": 8260, "loss": 0.2504, "lr": 4.85389352883015e-05, "epoch": 1.9854721549636802, "percentage": 19.85, "elapsed_time": "0:03:18", "remaining_time": "0:13:21", "throughput": 3395.19, "total_tokens": 674096}
{"current_steps": 1645, "total_steps": 8260, "loss": 0.1675, "lr": 4.8521088607342016e-05, "epoch": 1.9915254237288136, "percentage": 19.92, "elapsed_time": "0:03:19", "remaining_time": "0:13:20", "throughput": 3396.42, "total_tokens": 676208}
{"current_steps": 1650, "total_steps": 8260, "loss": 0.2324, "lr": 4.850313691124707e-05, "epoch": 1.9975786924939467, "percentage": 19.98, "elapsed_time": "0:03:19", "remaining_time": "0:13:19", "throughput": 3397.76, "total_tokens": 678256}
{"current_steps": 1652, "total_steps": 8260, "eval_loss": 0.2291734516620636, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:03:24", "remaining_time": "0:13:38", "throughput": 3318.75, "total_tokens": 678688}
{"current_steps": 1655, "total_steps": 8260, "loss": 0.229, "lr": 4.8485080280166006e-05, "epoch": 2.00363196125908, "percentage": 20.04, "elapsed_time": "0:03:25", "remaining_time": "0:13:41", "throughput": 3301.78, "total_tokens": 679936}
{"current_steps": 1660, "total_steps": 8260, "loss": 0.2426, "lr": 4.8466918794716666e-05, "epoch": 2.009685230024213, "percentage": 20.1, "elapsed_time": "0:03:26", "remaining_time": "0:13:41", "throughput": 3302.59, "total_tokens": 681984}
{"current_steps": 1665, "total_steps": 8260, "loss": 0.2209, "lr": 4.8448652535985045e-05, "epoch": 2.015738498789346, "percentage": 20.16, "elapsed_time": "0:03:27", "remaining_time": "0:13:40", "throughput": 3303.55, "total_tokens": 684096}
{"current_steps": 1670, "total_steps": 8260, "loss": 0.2454, "lr": 4.8430281585524926e-05, "epoch": 2.0217917675544794, "percentage": 20.22, "elapsed_time": "0:03:27", "remaining_time": "0:13:39", "throughput": 3304.54, "total_tokens": 686016}
{"current_steps": 1675, "total_steps": 8260, "loss": 0.2267, "lr": 4.841180602535751e-05, "epoch": 2.0278450363196128, "percentage": 20.28, "elapsed_time": "0:03:28", "remaining_time": "0:13:38", "throughput": 3305.4, "total_tokens": 687904}
{"current_steps": 1680, "total_steps": 8260, "loss": 0.1869, "lr": 4.839322593797104e-05, "epoch": 2.0338983050847457, "percentage": 20.34, "elapsed_time": "0:03:28", "remaining_time": "0:13:37", "throughput": 3306.25, "total_tokens": 689888}
{"current_steps": 1685, "total_steps": 8260, "loss": 0.2365, "lr": 4.837454140632045e-05, "epoch": 2.039951573849879, "percentage": 20.4, "elapsed_time": "0:03:29", "remaining_time": "0:13:36", "throughput": 3307.48, "total_tokens": 691808}
{"current_steps": 1690, "total_steps": 8260, "loss": 0.2478, "lr": 4.8355752513826995e-05, "epoch": 2.046004842615012, "percentage": 20.46, "elapsed_time": "0:03:29", "remaining_time": "0:13:35", "throughput": 3308.5, "total_tokens": 693856}
{"current_steps": 1695, "total_steps": 8260, "loss": 0.2242, "lr": 4.833685934437787e-05, "epoch": 2.0520581113801453, "percentage": 20.52, "elapsed_time": "0:03:30", "remaining_time": "0:13:34", "throughput": 3309.63, "total_tokens": 695680}
{"current_steps": 1700, "total_steps": 8260, "loss": 0.2342, "lr": 4.831786198232583e-05, "epoch": 2.0581113801452786, "percentage": 20.58, "elapsed_time": "0:03:30", "remaining_time": "0:13:33", "throughput": 3310.5, "total_tokens": 697792}
{"current_steps": 1705, "total_steps": 8260, "loss": 0.225, "lr": 4.8298760512488824e-05, "epoch": 2.0641646489104115, "percentage": 20.64, "elapsed_time": "0:03:31", "remaining_time": "0:13:32", "throughput": 3311.4, "total_tokens": 699712}
{"current_steps": 1710, "total_steps": 8260, "loss": 0.1977, "lr": 4.827955502014963e-05, "epoch": 2.070217917675545, "percentage": 20.7, "elapsed_time": "0:03:31", "remaining_time": "0:13:31", "throughput": 3312.27, "total_tokens": 701792}
{"current_steps": 1715, "total_steps": 8260, "loss": 0.2402, "lr": 4.826024559105542e-05, "epoch": 2.0762711864406778, "percentage": 20.76, "elapsed_time": "0:03:32", "remaining_time": "0:13:30", "throughput": 3313.67, "total_tokens": 703936}
{"current_steps": 1720, "total_steps": 8260, "loss": 0.1678, "lr": 4.8240832311417465e-05, "epoch": 2.082324455205811, "percentage": 20.82, "elapsed_time": "0:03:32", "remaining_time": "0:13:29", "throughput": 3314.63, "total_tokens": 705952}
{"current_steps": 1725, "total_steps": 8260, "loss": 0.2266, "lr": 4.822131526791065e-05, "epoch": 2.0883777239709445, "percentage": 20.88, "elapsed_time": "0:03:33", "remaining_time": "0:13:29", "throughput": 3315.59, "total_tokens": 708064}
{"current_steps": 1730, "total_steps": 8260, "loss": 0.3038, "lr": 4.820169454767318e-05, "epoch": 2.0944309927360774, "percentage": 20.94, "elapsed_time": "0:03:34", "remaining_time": "0:13:27", "throughput": 3316.67, "total_tokens": 709952}
{"current_steps": 1735, "total_steps": 8260, "loss": 0.245, "lr": 4.8181970238306104e-05, "epoch": 2.1004842615012107, "percentage": 21.0, "elapsed_time": "0:03:34", "remaining_time": "0:13:27", "throughput": 3317.82, "total_tokens": 712032}
{"current_steps": 1740, "total_steps": 8260, "loss": 0.197, "lr": 4.816214242787302e-05, "epoch": 2.106537530266344, "percentage": 21.07, "elapsed_time": "0:03:35", "remaining_time": "0:13:26", "throughput": 3318.8, "total_tokens": 714048}
{"current_steps": 1745, "total_steps": 8260, "loss": 0.2401, "lr": 4.814221120489958e-05, "epoch": 2.112590799031477, "percentage": 21.13, "elapsed_time": "0:03:35", "remaining_time": "0:13:25", "throughput": 3319.69, "total_tokens": 716224}
{"current_steps": 1750, "total_steps": 8260, "loss": 0.1895, "lr": 4.812217665837316e-05, "epoch": 2.1186440677966103, "percentage": 21.19, "elapsed_time": "0:03:36", "remaining_time": "0:13:24", "throughput": 3320.88, "total_tokens": 718144}
{"current_steps": 1755, "total_steps": 8260, "loss": 0.1908, "lr": 4.810203887774247e-05, "epoch": 2.124697336561743, "percentage": 21.25, "elapsed_time": "0:03:36", "remaining_time": "0:13:23", "throughput": 3321.99, "total_tokens": 720288}
{"current_steps": 1760, "total_steps": 8260, "loss": 0.2969, "lr": 4.808179795291712e-05, "epoch": 2.1307506053268765, "percentage": 21.31, "elapsed_time": "0:03:37", "remaining_time": "0:13:22", "throughput": 3322.75, "total_tokens": 722368}
{"current_steps": 1765, "total_steps": 8260, "loss": 0.1986, "lr": 4.8061453974267195e-05, "epoch": 2.13680387409201, "percentage": 21.37, "elapsed_time": "0:03:37", "remaining_time": "0:13:22", "throughput": 3324.02, "total_tokens": 724480}
{"current_steps": 1770, "total_steps": 8260, "loss": 0.2429, "lr": 4.804100703262294e-05, "epoch": 2.142857142857143, "percentage": 21.43, "elapsed_time": "0:03:38", "remaining_time": "0:13:21", "throughput": 3325.23, "total_tokens": 726784}
{"current_steps": 1775, "total_steps": 8260, "loss": 0.199, "lr": 4.8020457219274266e-05, "epoch": 2.148910411622276, "percentage": 21.49, "elapsed_time": "0:03:39", "remaining_time": "0:13:20", "throughput": 3326.05, "total_tokens": 728768}
{"current_steps": 1780, "total_steps": 8260, "loss": 0.2168, "lr": 4.799980462597039e-05, "epoch": 2.154963680387409, "percentage": 21.55, "elapsed_time": "0:03:39", "remaining_time": "0:13:19", "throughput": 3327.1, "total_tokens": 730816}
{"current_steps": 1785, "total_steps": 8260, "loss": 0.2536, "lr": 4.7979049344919416e-05, "epoch": 2.1610169491525424, "percentage": 21.61, "elapsed_time": "0:03:40", "remaining_time": "0:13:18", "throughput": 3328.57, "total_tokens": 732896}
{"current_steps": 1790, "total_steps": 8260, "loss": 0.2468, "lr": 4.795819146878792e-05, "epoch": 2.1670702179176757, "percentage": 21.67, "elapsed_time": "0:03:40", "remaining_time": "0:13:17", "throughput": 3329.72, "total_tokens": 734816}
{"current_steps": 1795, "total_steps": 8260, "loss": 0.1677, "lr": 4.7937231090700516e-05, "epoch": 2.1731234866828086, "percentage": 21.73, "elapsed_time": "0:03:41", "remaining_time": "0:13:16", "throughput": 3330.82, "total_tokens": 736800}
{"current_steps": 1800, "total_steps": 8260, "loss": 0.1666, "lr": 4.7916168304239496e-05, "epoch": 2.179176755447942, "percentage": 21.79, "elapsed_time": "0:03:41", "remaining_time": "0:13:15", "throughput": 3331.85, "total_tokens": 738944}
{"current_steps": 1805, "total_steps": 8260, "loss": 0.2675, "lr": 4.789500320344435e-05, "epoch": 2.185230024213075, "percentage": 21.85, "elapsed_time": "0:03:42", "remaining_time": "0:13:15", "throughput": 3333.2, "total_tokens": 741184}
{"current_steps": 1810, "total_steps": 8260, "loss": 0.2482, "lr": 4.787373588281138e-05, "epoch": 2.1912832929782082, "percentage": 21.91, "elapsed_time": "0:03:42", "remaining_time": "0:13:14", "throughput": 3334.42, "total_tokens": 743136}
{"current_steps": 1815, "total_steps": 8260, "loss": 0.2148, "lr": 4.785236643729327e-05, "epoch": 2.1973365617433416, "percentage": 21.97, "elapsed_time": "0:03:43", "remaining_time": "0:13:13", "throughput": 3335.61, "total_tokens": 745152}
{"current_steps": 1820, "total_steps": 8260, "loss": 0.2538, "lr": 4.7830894962298675e-05, "epoch": 2.2033898305084745, "percentage": 22.03, "elapsed_time": "0:03:43", "remaining_time": "0:13:12", "throughput": 3336.94, "total_tokens": 747200}
{"current_steps": 1825, "total_steps": 8260, "loss": 0.2507, "lr": 4.7809321553691764e-05, "epoch": 2.209443099273608, "percentage": 22.09, "elapsed_time": "0:03:44", "remaining_time": "0:13:11", "throughput": 3337.98, "total_tokens": 749344}
{"current_steps": 1830, "total_steps": 8260, "loss": 0.2707, "lr": 4.778764630779183e-05, "epoch": 2.2154963680387407, "percentage": 22.15, "elapsed_time": "0:03:44", "remaining_time": "0:13:10", "throughput": 3339.45, "total_tokens": 751360}
{"current_steps": 1835, "total_steps": 8260, "loss": 0.237, "lr": 4.7765869321372836e-05, "epoch": 2.221549636803874, "percentage": 22.22, "elapsed_time": "0:03:45", "remaining_time": "0:13:09", "throughput": 3340.42, "total_tokens": 753408}
{"current_steps": 1840, "total_steps": 8260, "loss": 0.2454, "lr": 4.774399069166296e-05, "epoch": 2.2276029055690074, "percentage": 22.28, "elapsed_time": "0:03:46", "remaining_time": "0:13:08", "throughput": 3341.08, "total_tokens": 755456}
{"current_steps": 1845, "total_steps": 8260, "loss": 0.2385, "lr": 4.772201051634426e-05, "epoch": 2.2336561743341403, "percentage": 22.34, "elapsed_time": "0:03:46", "remaining_time": "0:13:08", "throughput": 3342.08, "total_tokens": 757504}
{"current_steps": 1850, "total_steps": 8260, "loss": 0.2128, "lr": 4.769992889355208e-05, "epoch": 2.2397094430992737, "percentage": 22.4, "elapsed_time": "0:03:47", "remaining_time": "0:13:07", "throughput": 3342.87, "total_tokens": 759584}
{"current_steps": 1855, "total_steps": 8260, "loss": 0.2185, "lr": 4.767774592187475e-05, "epoch": 2.2457627118644066, "percentage": 22.46, "elapsed_time": "0:03:47", "remaining_time": "0:13:06", "throughput": 3343.54, "total_tokens": 761632}
{"current_steps": 1860, "total_steps": 8260, "loss": 0.2257, "lr": 4.76554617003531e-05, "epoch": 2.25181598062954, "percentage": 22.52, "elapsed_time": "0:03:48", "remaining_time": "0:13:05", "throughput": 3344.36, "total_tokens": 763648}
{"current_steps": 1865, "total_steps": 8260, "loss": 0.2483, "lr": 4.763307632847998e-05, "epoch": 2.2578692493946733, "percentage": 22.58, "elapsed_time": "0:03:48", "remaining_time": "0:13:04", "throughput": 3345.24, "total_tokens": 765696}
{"current_steps": 1870, "total_steps": 8260, "loss": 0.2865, "lr": 4.761058990619986e-05, "epoch": 2.263922518159806, "percentage": 22.64, "elapsed_time": "0:03:49", "remaining_time": "0:13:04", "throughput": 3345.91, "total_tokens": 767776}
{"current_steps": 1875, "total_steps": 8260, "loss": 0.1749, "lr": 4.7588002533908405e-05, "epoch": 2.2699757869249395, "percentage": 22.7, "elapsed_time": "0:03:50", "remaining_time": "0:13:03", "throughput": 3346.35, "total_tokens": 769856}
{"current_steps": 1880, "total_steps": 8260, "loss": 0.208, "lr": 4.756531431245195e-05, "epoch": 2.2760290556900724, "percentage": 22.76, "elapsed_time": "0:03:50", "remaining_time": "0:13:02", "throughput": 3346.99, "total_tokens": 771904}
{"current_steps": 1885, "total_steps": 8260, "loss": 0.2293, "lr": 4.75425253431271e-05, "epoch": 2.2820823244552058, "percentage": 22.82, "elapsed_time": "0:03:51", "remaining_time": "0:13:01", "throughput": 3347.85, "total_tokens": 774016}
{"current_steps": 1890, "total_steps": 8260, "loss": 0.3051, "lr": 4.7519635727680286e-05, "epoch": 2.288135593220339, "percentage": 22.88, "elapsed_time": "0:03:51", "remaining_time": "0:13:01", "throughput": 3348.79, "total_tokens": 776000}
{"current_steps": 1895, "total_steps": 8260, "loss": 0.1857, "lr": 4.749664556830731e-05, "epoch": 2.294188861985472, "percentage": 22.94, "elapsed_time": "0:03:52", "remaining_time": "0:13:00", "throughput": 3349.64, "total_tokens": 778208}
{"current_steps": 1900, "total_steps": 8260, "loss": 0.2359, "lr": 4.747355496765283e-05, "epoch": 2.3002421307506054, "percentage": 23.0, "elapsed_time": "0:03:52", "remaining_time": "0:12:59", "throughput": 3350.42, "total_tokens": 780128}
{"current_steps": 1905, "total_steps": 8260, "loss": 0.1434, "lr": 4.745036402880999e-05, "epoch": 2.3062953995157383, "percentage": 23.06, "elapsed_time": "0:03:53", "remaining_time": "0:12:58", "throughput": 3351.43, "total_tokens": 782112}
{"current_steps": 1910, "total_steps": 8260, "loss": 0.2755, "lr": 4.7427072855319886e-05, "epoch": 2.3123486682808716, "percentage": 23.12, "elapsed_time": "0:03:53", "remaining_time": "0:12:57", "throughput": 3352.37, "total_tokens": 784256}
{"current_steps": 1915, "total_steps": 8260, "loss": 0.2429, "lr": 4.740368155117116e-05, "epoch": 2.318401937046005, "percentage": 23.18, "elapsed_time": "0:03:54", "remaining_time": "0:12:56", "throughput": 3353.18, "total_tokens": 786272}
{"current_steps": 1920, "total_steps": 8260, "loss": 0.1957, "lr": 4.7380190220799484e-05, "epoch": 2.324455205811138, "percentage": 23.24, "elapsed_time": "0:03:55", "remaining_time": "0:12:56", "throughput": 3354.18, "total_tokens": 788352}
{"current_steps": 1925, "total_steps": 8260, "loss": 0.1719, "lr": 4.735659896908713e-05, "epoch": 2.330508474576271, "percentage": 23.31, "elapsed_time": "0:03:55", "remaining_time": "0:12:55", "throughput": 3354.88, "total_tokens": 790368}
{"current_steps": 1930, "total_steps": 8260, "loss": 0.1832, "lr": 4.73329079013625e-05, "epoch": 2.3365617433414045, "percentage": 23.37, "elapsed_time": "0:03:56", "remaining_time": "0:12:54", "throughput": 3355.74, "total_tokens": 792320}
{"current_steps": 1935, "total_steps": 8260, "loss": 0.2471, "lr": 4.730911712339964e-05, "epoch": 2.3426150121065374, "percentage": 23.43, "elapsed_time": "0:03:56", "remaining_time": "0:12:53", "throughput": 3356.59, "total_tokens": 794272}
{"current_steps": 1940, "total_steps": 8260, "loss": 0.2533, "lr": 4.728522674141776e-05, "epoch": 2.348668280871671, "percentage": 23.49, "elapsed_time": "0:03:57", "remaining_time": "0:12:52", "throughput": 3357.76, "total_tokens": 796416}
{"current_steps": 1945, "total_steps": 8260, "loss": 0.2337, "lr": 4.7261236862080805e-05, "epoch": 2.3547215496368037, "percentage": 23.55, "elapsed_time": "0:03:57", "remaining_time": "0:12:51", "throughput": 3358.52, "total_tokens": 798432}
{"current_steps": 1950, "total_steps": 8260, "loss": 0.1992, "lr": 4.723714759249692e-05, "epoch": 2.360774818401937, "percentage": 23.61, "elapsed_time": "0:03:58", "remaining_time": "0:12:51", "throughput": 3359.11, "total_tokens": 800480}
{"current_steps": 1955, "total_steps": 8260, "loss": 0.2159, "lr": 4.721295904021802e-05, "epoch": 2.3668280871670704, "percentage": 23.67, "elapsed_time": "0:03:58", "remaining_time": "0:12:50", "throughput": 3359.67, "total_tokens": 802432}
{"current_steps": 1960, "total_steps": 8260, "loss": 0.2435, "lr": 4.718867131323927e-05, "epoch": 2.3728813559322033, "percentage": 23.73, "elapsed_time": "0:03:59", "remaining_time": "0:12:49", "throughput": 3360.72, "total_tokens": 804352}
{"current_steps": 1965, "total_steps": 8260, "loss": 0.2462, "lr": 4.7164284519998644e-05, "epoch": 2.3789346246973366, "percentage": 23.79, "elapsed_time": "0:03:59", "remaining_time": "0:12:48", "throughput": 3361.42, "total_tokens": 806272}
{"current_steps": 1970, "total_steps": 8260, "loss": 0.1979, "lr": 4.71397987693764e-05, "epoch": 2.38498789346247, "percentage": 23.85, "elapsed_time": "0:04:00", "remaining_time": "0:12:47", "throughput": 3362.33, "total_tokens": 808352}
{"current_steps": 1975, "total_steps": 8260, "loss": 0.1903, "lr": 4.711521417069462e-05, "epoch": 2.391041162227603, "percentage": 23.91, "elapsed_time": "0:04:00", "remaining_time": "0:12:46", "throughput": 3362.86, "total_tokens": 810304}
{"current_steps": 1980, "total_steps": 8260, "loss": 0.1952, "lr": 4.709053083371672e-05, "epoch": 2.3970944309927362, "percentage": 23.97, "elapsed_time": "0:04:01", "remaining_time": "0:12:45", "throughput": 3363.84, "total_tokens": 812384}
{"current_steps": 1985, "total_steps": 8260, "loss": 0.1962, "lr": 4.706574886864696e-05, "epoch": 2.403147699757869, "percentage": 24.03, "elapsed_time": "0:04:02", "remaining_time": "0:12:45", "throughput": 3364.86, "total_tokens": 814400}
{"current_steps": 1990, "total_steps": 8260, "loss": 0.188, "lr": 4.7040868386129935e-05, "epoch": 2.4092009685230025, "percentage": 24.09, "elapsed_time": "0:04:02", "remaining_time": "0:12:44", "throughput": 3365.68, "total_tokens": 816448}
{"current_steps": 1995, "total_steps": 8260, "loss": 0.2802, "lr": 4.701588949725009e-05, "epoch": 2.415254237288136, "percentage": 24.15, "elapsed_time": "0:04:03", "remaining_time": "0:12:43", "throughput": 3366.54, "total_tokens": 818624}
{"current_steps": 2000, "total_steps": 8260, "loss": 0.2065, "lr": 4.699081231353124e-05, "epoch": 2.4213075060532687, "percentage": 24.21, "elapsed_time": "0:04:03", "remaining_time": "0:12:42", "throughput": 3367.3, "total_tokens": 820640}
{"current_steps": 2005, "total_steps": 8260, "loss": 0.2265, "lr": 4.696563694693605e-05, "epoch": 2.427360774818402, "percentage": 24.27, "elapsed_time": "0:04:04", "remaining_time": "0:12:41", "throughput": 3368.63, "total_tokens": 822752}
{"current_steps": 2010, "total_steps": 8260, "loss": 0.2394, "lr": 4.694036350986556e-05, "epoch": 2.433414043583535, "percentage": 24.33, "elapsed_time": "0:04:04", "remaining_time": "0:12:41", "throughput": 3369.3, "total_tokens": 824832}
{"current_steps": 2015, "total_steps": 8260, "loss": 0.2701, "lr": 4.6914992115158634e-05, "epoch": 2.4394673123486683, "percentage": 24.39, "elapsed_time": "0:04:05", "remaining_time": "0:12:40", "throughput": 3369.95, "total_tokens": 826816}
{"current_steps": 2020, "total_steps": 8260, "loss": 0.2004, "lr": 4.688952287609152e-05, "epoch": 2.4455205811138017, "percentage": 24.46, "elapsed_time": "0:04:05", "remaining_time": "0:12:39", "throughput": 3370.71, "total_tokens": 828672}
{"current_steps": 2025, "total_steps": 8260, "loss": 0.1773, "lr": 4.686395590637732e-05, "epoch": 2.4515738498789346, "percentage": 24.52, "elapsed_time": "0:04:06", "remaining_time": "0:12:38", "throughput": 3371.37, "total_tokens": 830752}
{"current_steps": 2030, "total_steps": 8260, "loss": 0.235, "lr": 4.683829132016544e-05, "epoch": 2.457627118644068, "percentage": 24.58, "elapsed_time": "0:04:06", "remaining_time": "0:12:37", "throughput": 3371.96, "total_tokens": 832832}
{"current_steps": 2035, "total_steps": 8260, "loss": 0.2063, "lr": 4.6812529232041144e-05, "epoch": 2.463680387409201, "percentage": 24.64, "elapsed_time": "0:04:07", "remaining_time": "0:12:37", "throughput": 3372.7, "total_tokens": 834848}
{"current_steps": 2040, "total_steps": 8260, "loss": 0.2091, "lr": 4.6786669757025016e-05, "epoch": 2.469733656174334, "percentage": 24.7, "elapsed_time": "0:04:08", "remaining_time": "0:12:36", "throughput": 3373.71, "total_tokens": 836864}
{"current_steps": 2045, "total_steps": 8260, "loss": 0.182, "lr": 4.676071301057243e-05, "epoch": 2.4757869249394675, "percentage": 24.76, "elapsed_time": "0:04:08", "remaining_time": "0:12:35", "throughput": 3374.37, "total_tokens": 838784}
{"current_steps": 2050, "total_steps": 8260, "loss": 0.1244, "lr": 4.673465910857306e-05, "epoch": 2.4818401937046004, "percentage": 24.82, "elapsed_time": "0:04:09", "remaining_time": "0:12:34", "throughput": 3375.17, "total_tokens": 840832}
{"current_steps": 2055, "total_steps": 8260, "loss": 0.2182, "lr": 4.670850816735035e-05, "epoch": 2.4878934624697338, "percentage": 24.88, "elapsed_time": "0:04:09", "remaining_time": "0:12:33", "throughput": 3376.13, "total_tokens": 842752}
{"current_steps": 2060, "total_steps": 8260, "loss": 0.2897, "lr": 4.668226030366101e-05, "epoch": 2.4939467312348667, "percentage": 24.94, "elapsed_time": "0:04:10", "remaining_time": "0:12:32", "throughput": 3377.33, "total_tokens": 844928}
{"current_steps": 2065, "total_steps": 8260, "loss": 0.2087, "lr": 4.665591563469445e-05, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "0:04:10", "remaining_time": "0:12:32", "throughput": 3378.28, "total_tokens": 847104}
{"current_steps": 2065, "total_steps": 8260, "eval_loss": 0.230377197265625, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "0:04:15", "remaining_time": "0:12:46", "throughput": 3317.03, "total_tokens": 847104}
{"current_steps": 2070, "total_steps": 8260, "loss": 0.2175, "lr": 4.662947427807231e-05, "epoch": 2.5060532687651333, "percentage": 25.06, "elapsed_time": "0:04:16", "remaining_time": "0:12:47", "throughput": 3306.53, "total_tokens": 849120}
{"current_steps": 2075, "total_steps": 8260, "loss": 0.1827, "lr": 4.6602936351847924e-05, "epoch": 2.5121065375302662, "percentage": 25.12, "elapsed_time": "0:04:17", "remaining_time": "0:12:47", "throughput": 3307.27, "total_tokens": 851296}
{"current_steps": 2080, "total_steps": 8260, "loss": 0.2175, "lr": 4.657630197450577e-05, "epoch": 2.5181598062953996, "percentage": 25.18, "elapsed_time": "0:04:17", "remaining_time": "0:12:46", "throughput": 3308.16, "total_tokens": 853344}
{"current_steps": 2085, "total_steps": 8260, "loss": 0.2372, "lr": 4.6549571264960945e-05, "epoch": 2.5242130750605325, "percentage": 25.24, "elapsed_time": "0:04:18", "remaining_time": "0:12:45", "throughput": 3308.93, "total_tokens": 855264}
{"current_steps": 2090, "total_steps": 8260, "loss": 0.2483, "lr": 4.652274434255866e-05, "epoch": 2.530266343825666, "percentage": 25.3, "elapsed_time": "0:04:18", "remaining_time": "0:12:44", "throughput": 3309.83, "total_tokens": 857152}
{"current_steps": 2095, "total_steps": 8260, "loss": 0.2164, "lr": 4.6495821327073675e-05, "epoch": 2.536319612590799, "percentage": 25.36, "elapsed_time": "0:04:19", "remaining_time": "0:12:43", "throughput": 3311.08, "total_tokens": 859232}
{"current_steps": 2100, "total_steps": 8260, "loss": 0.2056, "lr": 4.6468802338709783e-05, "epoch": 2.542372881355932, "percentage": 25.42, "elapsed_time": "0:04:20", "remaining_time": "0:12:42", "throughput": 3312.01, "total_tokens": 861312}
{"current_steps": 2105, "total_steps": 8260, "loss": 0.5111, "lr": 4.644168749809929e-05, "epoch": 2.5484261501210654, "percentage": 25.48, "elapsed_time": "0:04:20", "remaining_time": "0:12:42", "throughput": 3312.91, "total_tokens": 863552}
{"current_steps": 2110, "total_steps": 8260, "loss": 0.2772, "lr": 4.6414476926302406e-05, "epoch": 2.5544794188861983, "percentage": 25.54, "elapsed_time": "0:04:21", "remaining_time": "0:12:41", "throughput": 3313.63, "total_tokens": 865568}
{"current_steps": 2115, "total_steps": 8260, "loss": 0.1862, "lr": 4.638717074480682e-05, "epoch": 2.5605326876513317, "percentage": 25.61, "elapsed_time": "0:04:21", "remaining_time": "0:12:40", "throughput": 3314.31, "total_tokens": 867488}
{"current_steps": 2120, "total_steps": 8260, "loss": 0.1921, "lr": 4.6359769075527026e-05, "epoch": 2.566585956416465, "percentage": 25.67, "elapsed_time": "0:04:22", "remaining_time": "0:12:39", "throughput": 3315.05, "total_tokens": 869504}
{"current_steps": 2125, "total_steps": 8260, "loss": 0.2397, "lr": 4.6332272040803895e-05, "epoch": 2.572639225181598, "percentage": 25.73, "elapsed_time": "0:04:22", "remaining_time": "0:12:38", "throughput": 3316.08, "total_tokens": 871680}
{"current_steps": 2130, "total_steps": 8260, "loss": 0.2386, "lr": 4.630467976340405e-05, "epoch": 2.5786924939467313, "percentage": 25.79, "elapsed_time": "0:04:23", "remaining_time": "0:12:38", "throughput": 3317.07, "total_tokens": 873856}
{"current_steps": 2135, "total_steps": 8260, "loss": 0.2178, "lr": 4.6276992366519365e-05, "epoch": 2.584745762711864, "percentage": 25.85, "elapsed_time": "0:04:24", "remaining_time": "0:12:37", "throughput": 3317.88, "total_tokens": 875968}
{"current_steps": 2140, "total_steps": 8260, "loss": 0.1815, "lr": 4.624920997376637e-05, "epoch": 2.5907990314769975, "percentage": 25.91, "elapsed_time": "0:04:24", "remaining_time": "0:12:36", "throughput": 3318.78, "total_tokens": 878144}
{"current_steps": 2145, "total_steps": 8260, "loss": 0.223, "lr": 4.622133270918576e-05, "epoch": 2.596852300242131, "percentage": 25.97, "elapsed_time": "0:04:25", "remaining_time": "0:12:35", "throughput": 3320.05, "total_tokens": 880320}
{"current_steps": 2150, "total_steps": 8260, "loss": 0.2262, "lr": 4.619336069724177e-05, "epoch": 2.6029055690072638, "percentage": 26.03, "elapsed_time": "0:04:25", "remaining_time": "0:12:35", "throughput": 3321.02, "total_tokens": 882400}
{"current_steps": 2155, "total_steps": 8260, "loss": 0.248, "lr": 4.6165294062821696e-05, "epoch": 2.608958837772397, "percentage": 26.09, "elapsed_time": "0:04:26", "remaining_time": "0:12:34", "throughput": 3321.79, "total_tokens": 884512}
{"current_steps": 2160, "total_steps": 8260, "loss": 0.2558, "lr": 4.613713293123525e-05, "epoch": 2.61501210653753, "percentage": 26.15, "elapsed_time": "0:04:26", "remaining_time": "0:12:33", "throughput": 3322.74, "total_tokens": 886592}
{"current_steps": 2165, "total_steps": 8260, "loss": 0.1644, "lr": 4.610887742821408e-05, "epoch": 2.6210653753026634, "percentage": 26.21, "elapsed_time": "0:04:27", "remaining_time": "0:12:32", "throughput": 3323.59, "total_tokens": 888640}
{"current_steps": 2170, "total_steps": 8260, "loss": 0.2197, "lr": 4.608052767991118e-05, "epoch": 2.6271186440677967, "percentage": 26.27, "elapsed_time": "0:04:27", "remaining_time": "0:12:32", "throughput": 3324.49, "total_tokens": 890880}
{"current_steps": 2175, "total_steps": 8260, "loss": 0.2382, "lr": 4.605208381290029e-05, "epoch": 2.6331719128329296, "percentage": 26.33, "elapsed_time": "0:04:28", "remaining_time": "0:12:31", "throughput": 3325.06, "total_tokens": 892832}
{"current_steps": 2180, "total_steps": 8260, "loss": 0.1989, "lr": 4.6023545954175406e-05, "epoch": 2.639225181598063, "percentage": 26.39, "elapsed_time": "0:04:29", "remaining_time": "0:12:30", "throughput": 3325.97, "total_tokens": 894912}
{"current_steps": 2185, "total_steps": 8260, "loss": 0.2088, "lr": 4.599491423115014e-05, "epoch": 2.645278450363196, "percentage": 26.45, "elapsed_time": "0:04:29", "remaining_time": "0:12:29", "throughput": 3326.57, "total_tokens": 896960}
{"current_steps": 2190, "total_steps": 8260, "loss": 0.2212, "lr": 4.59661887716572e-05, "epoch": 2.651331719128329, "percentage": 26.51, "elapsed_time": "0:04:30", "remaining_time": "0:12:28", "throughput": 3327.59, "total_tokens": 898816}
{"current_steps": 2195, "total_steps": 8260, "loss": 0.2057, "lr": 4.5937369703947785e-05, "epoch": 2.6573849878934626, "percentage": 26.57, "elapsed_time": "0:04:30", "remaining_time": "0:12:27", "throughput": 3328.28, "total_tokens": 900832}
{"current_steps": 2200, "total_steps": 8260, "loss": 0.2003, "lr": 4.590845715669104e-05, "epoch": 2.663438256658596, "percentage": 26.63, "elapsed_time": "0:04:31", "remaining_time": "0:12:27", "throughput": 3329.03, "total_tokens": 902944}
{"current_steps": 2205, "total_steps": 8260, "loss": 0.2072, "lr": 4.5879451258973465e-05, "epoch": 2.669491525423729, "percentage": 26.69, "elapsed_time": "0:04:31", "remaining_time": "0:12:26", "throughput": 3329.79, "total_tokens": 905056}
{"current_steps": 2210, "total_steps": 8260, "loss": 0.2206, "lr": 4.5850352140298356e-05, "epoch": 2.6755447941888617, "percentage": 26.76, "elapsed_time": "0:04:32", "remaining_time": "0:12:25", "throughput": 3330.72, "total_tokens": 907232}
{"current_steps": 2215, "total_steps": 8260, "loss": 0.2011, "lr": 4.582115993058519e-05, "epoch": 2.681598062953995, "percentage": 26.82, "elapsed_time": "0:04:32", "remaining_time": "0:12:24", "throughput": 3331.43, "total_tokens": 909248}
{"current_steps": 2220, "total_steps": 8260, "loss": 0.2359, "lr": 4.5791874760169095e-05, "epoch": 2.6876513317191284, "percentage": 26.88, "elapsed_time": "0:04:33", "remaining_time": "0:12:24", "throughput": 3332.15, "total_tokens": 911264}
{"current_steps": 2225, "total_steps": 8260, "loss": 0.2035, "lr": 4.5762496759800246e-05, "epoch": 2.6937046004842617, "percentage": 26.94, "elapsed_time": "0:04:34", "remaining_time": "0:12:23", "throughput": 3332.62, "total_tokens": 913216}
{"current_steps": 2230, "total_steps": 8260, "loss": 0.2113, "lr": 4.573302606064324e-05, "epoch": 2.6997578692493946, "percentage": 27.0, "elapsed_time": "0:04:34", "remaining_time": "0:12:22", "throughput": 3333.34, "total_tokens": 915328}
{"current_steps": 2235, "total_steps": 8260, "loss": 0.2475, "lr": 4.5703462794276574e-05, "epoch": 2.705811138014528, "percentage": 27.06, "elapsed_time": "0:04:35", "remaining_time": "0:12:21", "throughput": 3334.35, "total_tokens": 917568}
{"current_steps": 2240, "total_steps": 8260, "loss": 0.2355, "lr": 4.567380709269205e-05, "epoch": 2.711864406779661, "percentage": 27.12, "elapsed_time": "0:04:35", "remaining_time": "0:12:20", "throughput": 3335.12, "total_tokens": 919424}
{"current_steps": 2245, "total_steps": 8260, "loss": 0.1681, "lr": 4.5644059088294145e-05, "epoch": 2.7179176755447942, "percentage": 27.18, "elapsed_time": "0:04:36", "remaining_time": "0:12:20", "throughput": 3335.88, "total_tokens": 921376}
{"current_steps": 2250, "total_steps": 8260, "loss": 0.2392, "lr": 4.561421891389943e-05, "epoch": 2.7239709443099276, "percentage": 27.24, "elapsed_time": "0:04:36", "remaining_time": "0:12:19", "throughput": 3336.51, "total_tokens": 923456}
{"current_steps": 2255, "total_steps": 8260, "loss": 0.2094, "lr": 4.558428670273601e-05, "epoch": 2.7300242130750605, "percentage": 27.3, "elapsed_time": "0:04:37", "remaining_time": "0:12:18", "throughput": 3337.02, "total_tokens": 925504}
{"current_steps": 2260, "total_steps": 8260, "loss": 0.1588, "lr": 4.555426258844292e-05, "epoch": 2.736077481840194, "percentage": 27.36, "elapsed_time": "0:04:37", "remaining_time": "0:12:17", "throughput": 3337.7, "total_tokens": 927456}
{"current_steps": 2265, "total_steps": 8260, "loss": 0.1986, "lr": 4.552414670506949e-05, "epoch": 2.7421307506053267, "percentage": 27.42, "elapsed_time": "0:04:38", "remaining_time": "0:12:16", "throughput": 3338.28, "total_tokens": 929440}
{"current_steps": 2270, "total_steps": 8260, "loss": 0.2535, "lr": 4.5493939187074784e-05, "epoch": 2.74818401937046, "percentage": 27.48, "elapsed_time": "0:04:39", "remaining_time": "0:12:16", "throughput": 3339.2, "total_tokens": 931712}
{"current_steps": 2275, "total_steps": 8260, "loss": 0.1995, "lr": 4.5463640169326994e-05, "epoch": 2.7542372881355934, "percentage": 27.54, "elapsed_time": "0:04:39", "remaining_time": "0:12:15", "throughput": 3339.98, "total_tokens": 933664}
{"current_steps": 2280, "total_steps": 8260, "loss": 0.2054, "lr": 4.5433249787102816e-05, "epoch": 2.7602905569007263, "percentage": 27.6, "elapsed_time": "0:04:40", "remaining_time": "0:12:14", "throughput": 3340.81, "total_tokens": 935840}
{"current_steps": 2285, "total_steps": 8260, "loss": 0.1939, "lr": 4.54027681760869e-05, "epoch": 2.7663438256658597, "percentage": 27.66, "elapsed_time": "0:04:40", "remaining_time": "0:12:13", "throughput": 3341.45, "total_tokens": 937920}
{"current_steps": 2290, "total_steps": 8260, "loss": 0.2561, "lr": 4.537219547237115e-05, "epoch": 2.7723970944309926, "percentage": 27.72, "elapsed_time": "0:04:41", "remaining_time": "0:12:13", "throughput": 3342.17, "total_tokens": 940160}
{"current_steps": 2295, "total_steps": 8260, "loss": 0.2831, "lr": 4.5341531812454234e-05, "epoch": 2.778450363196126, "percentage": 27.78, "elapsed_time": "0:04:41", "remaining_time": "0:12:12", "throughput": 3343.22, "total_tokens": 942304}
{"current_steps": 2300, "total_steps": 8260, "loss": 0.2341, "lr": 4.5310777333240885e-05, "epoch": 2.7845036319612593, "percentage": 27.85, "elapsed_time": "0:04:42", "remaining_time": "0:12:11", "throughput": 3344.29, "total_tokens": 944288}
{"current_steps": 2305, "total_steps": 8260, "loss": 0.1908, "lr": 4.52799321720413e-05, "epoch": 2.790556900726392, "percentage": 27.91, "elapsed_time": "0:04:42", "remaining_time": "0:12:10", "throughput": 3345.13, "total_tokens": 946368}
{"current_steps": 2310, "total_steps": 8260, "loss": 0.1951, "lr": 4.524899646657059e-05, "epoch": 2.7966101694915255, "percentage": 27.97, "elapsed_time": "0:04:43", "remaining_time": "0:12:10", "throughput": 3345.64, "total_tokens": 948512}
{"current_steps": 2315, "total_steps": 8260, "loss": 0.2099, "lr": 4.521797035494809e-05, "epoch": 2.8026634382566584, "percentage": 28.03, "elapsed_time": "0:04:44", "remaining_time": "0:12:09", "throughput": 3346.11, "total_tokens": 950624}
{"current_steps": 2320, "total_steps": 8260, "loss": 0.1764, "lr": 4.5186853975696775e-05, "epoch": 2.8087167070217918, "percentage": 28.09, "elapsed_time": "0:04:44", "remaining_time": "0:12:08", "throughput": 3346.79, "total_tokens": 952640}
{"current_steps": 2325, "total_steps": 8260, "loss": 0.2336, "lr": 4.515564746774265e-05, "epoch": 2.814769975786925, "percentage": 28.15, "elapsed_time": "0:04:45", "remaining_time": "0:12:08", "throughput": 3347.38, "total_tokens": 954656}
{"current_steps": 2330, "total_steps": 8260, "loss": 0.2183, "lr": 4.512435097041412e-05, "epoch": 2.820823244552058, "percentage": 28.21, "elapsed_time": "0:04:45", "remaining_time": "0:12:07", "throughput": 3347.96, "total_tokens": 956736}
{"current_steps": 2335, "total_steps": 8260, "loss": 0.1925, "lr": 4.509296462344136e-05, "epoch": 2.8268765133171914, "percentage": 28.27, "elapsed_time": "0:04:46", "remaining_time": "0:12:06", "throughput": 3348.54, "total_tokens": 958816}
{"current_steps": 2340, "total_steps": 8260, "loss": 0.2269, "lr": 4.50614885669557e-05, "epoch": 2.8329297820823243, "percentage": 28.33, "elapsed_time": "0:04:46", "remaining_time": "0:12:05", "throughput": 3349.37, "total_tokens": 960800}
{"current_steps": 2345, "total_steps": 8260, "loss": 0.1738, "lr": 4.5029922941489e-05, "epoch": 2.8389830508474576, "percentage": 28.39, "elapsed_time": "0:04:47", "remaining_time": "0:12:05", "throughput": 3349.99, "total_tokens": 962976}
{"current_steps": 2350, "total_steps": 8260, "loss": 0.2219, "lr": 4.499826788797302e-05, "epoch": 2.845036319612591, "percentage": 28.45, "elapsed_time": "0:04:48", "remaining_time": "0:12:04", "throughput": 3350.93, "total_tokens": 965088}
{"current_steps": 2355, "total_steps": 8260, "loss": 0.2156, "lr": 4.49665235477388e-05, "epoch": 2.851089588377724, "percentage": 28.51, "elapsed_time": "0:04:48", "remaining_time": "0:12:03", "throughput": 3351.63, "total_tokens": 967136}
{"current_steps": 2360, "total_steps": 8260, "loss": 0.2257, "lr": 4.493469006251601e-05, "epoch": 2.857142857142857, "percentage": 28.57, "elapsed_time": "0:04:49", "remaining_time": "0:12:02", "throughput": 3352.29, "total_tokens": 969248}
{"current_steps": 2365, "total_steps": 8260, "loss": 0.1836, "lr": 4.490276757443233e-05, "epoch": 2.86319612590799, "percentage": 28.63, "elapsed_time": "0:04:49", "remaining_time": "0:12:02", "throughput": 3352.96, "total_tokens": 971360}
{"current_steps": 2370, "total_steps": 8260, "loss": 0.1731, "lr": 4.487075622601281e-05, "epoch": 2.8692493946731235, "percentage": 28.69, "elapsed_time": "0:04:50", "remaining_time": "0:12:01", "throughput": 3353.7, "total_tokens": 973408}
{"current_steps": 2375, "total_steps": 8260, "loss": 0.156, "lr": 4.483865616017924e-05, "epoch": 2.875302663438257, "percentage": 28.75, "elapsed_time": "0:04:50", "remaining_time": "0:12:00", "throughput": 3354.49, "total_tokens": 975392}
{"current_steps": 2380, "total_steps": 8260, "loss": 0.1803, "lr": 4.480646752024951e-05, "epoch": 2.8813559322033897, "percentage": 28.81, "elapsed_time": "0:04:51", "remaining_time": "0:11:59", "throughput": 3355.26, "total_tokens": 977376}
{"current_steps": 2385, "total_steps": 8260, "loss": 0.2354, "lr": 4.477419044993697e-05, "epoch": 2.887409200968523, "percentage": 28.87, "elapsed_time": "0:04:51", "remaining_time": "0:11:58", "throughput": 3355.91, "total_tokens": 979328}
{"current_steps": 2390, "total_steps": 8260, "loss": 0.2065, "lr": 4.474182509334978e-05, "epoch": 2.893462469733656, "percentage": 28.93, "elapsed_time": "0:04:52", "remaining_time": "0:11:58", "throughput": 3356.39, "total_tokens": 981376}
{"current_steps": 2395, "total_steps": 8260, "loss": 0.1647, "lr": 4.470937159499029e-05, "epoch": 2.8995157384987893, "percentage": 29.0, "elapsed_time": "0:04:52", "remaining_time": "0:11:57", "throughput": 3357.17, "total_tokens": 983360}
{"current_steps": 2400, "total_steps": 8260, "loss": 0.2377, "lr": 4.467683009975435e-05, "epoch": 2.9055690072639226, "percentage": 29.06, "elapsed_time": "0:04:53", "remaining_time": "0:11:56", "throughput": 3357.66, "total_tokens": 985408}
{"current_steps": 2405, "total_steps": 8260, "loss": 0.2373, "lr": 4.464420075293072e-05, "epoch": 2.9116222760290555, "percentage": 29.12, "elapsed_time": "0:04:54", "remaining_time": "0:11:55", "throughput": 3358.73, "total_tokens": 987584}
{"current_steps": 2410, "total_steps": 8260, "loss": 0.2066, "lr": 4.4611483700200374e-05, "epoch": 2.917675544794189, "percentage": 29.18, "elapsed_time": "0:04:54", "remaining_time": "0:11:55", "throughput": 3359.4, "total_tokens": 989632}
{"current_steps": 2415, "total_steps": 8260, "loss": 0.2088, "lr": 4.457867908763589e-05, "epoch": 2.923728813559322, "percentage": 29.24, "elapsed_time": "0:04:55", "remaining_time": "0:11:54", "throughput": 3360.12, "total_tokens": 991680}
{"current_steps": 2420, "total_steps": 8260, "loss": 0.193, "lr": 4.454578706170075e-05, "epoch": 2.929782082324455, "percentage": 29.3, "elapsed_time": "0:04:55", "remaining_time": "0:11:53", "throughput": 3360.84, "total_tokens": 993824}
{"current_steps": 2425, "total_steps": 8260, "loss": 0.1598, "lr": 4.4512807769248723e-05, "epoch": 2.9358353510895885, "percentage": 29.36, "elapsed_time": "0:04:56", "remaining_time": "0:11:52", "throughput": 3361.63, "total_tokens": 995904}
{"current_steps": 2430, "total_steps": 8260, "loss": 0.1746, "lr": 4.447974135752321e-05, "epoch": 2.9418886198547214, "percentage": 29.42, "elapsed_time": "0:04:56", "remaining_time": "0:11:52", "throughput": 3362.41, "total_tokens": 998080}
{"current_steps": 2435, "total_steps": 8260, "loss": 0.1786, "lr": 4.444658797415656e-05, "epoch": 2.9479418886198547, "percentage": 29.48, "elapsed_time": "0:04:57", "remaining_time": "0:11:51", "throughput": 3362.95, "total_tokens": 1000160}
{"current_steps": 2440, "total_steps": 8260, "loss": 0.3046, "lr": 4.441334776716944e-05, "epoch": 2.9539951573849876, "percentage": 29.54, "elapsed_time": "0:04:58", "remaining_time": "0:11:50", "throughput": 3363.56, "total_tokens": 1002368}
{"current_steps": 2445, "total_steps": 8260, "loss": 0.2215, "lr": 4.438002088497015e-05, "epoch": 2.960048426150121, "percentage": 29.6, "elapsed_time": "0:04:58", "remaining_time": "0:11:50", "throughput": 3364.32, "total_tokens": 1004448}
{"current_steps": 2450, "total_steps": 8260, "loss": 0.1854, "lr": 4.434660747635396e-05, "epoch": 2.9661016949152543, "percentage": 29.66, "elapsed_time": "0:04:59", "remaining_time": "0:11:49", "throughput": 3365.03, "total_tokens": 1006336}
{"current_steps": 2455, "total_steps": 8260, "loss": 0.2229, "lr": 4.4313107690502485e-05, "epoch": 2.9721549636803877, "percentage": 29.72, "elapsed_time": "0:04:59", "remaining_time": "0:11:48", "throughput": 3365.57, "total_tokens": 1008416}
{"current_steps": 2460, "total_steps": 8260, "loss": 0.1818, "lr": 4.427952167698298e-05, "epoch": 2.9782082324455206, "percentage": 29.78, "elapsed_time": "0:05:00", "remaining_time": "0:11:47", "throughput": 3366.03, "total_tokens": 1010400}
{"current_steps": 2465, "total_steps": 8260, "loss": 0.2567, "lr": 4.4245849585747654e-05, "epoch": 2.9842615012106535, "percentage": 29.84, "elapsed_time": "0:05:00", "remaining_time": "0:11:46", "throughput": 3366.6, "total_tokens": 1012320}
{"current_steps": 2470, "total_steps": 8260, "loss": 0.1993, "lr": 4.4212091567133083e-05, "epoch": 2.990314769975787, "percentage": 29.9, "elapsed_time": "0:05:01", "remaining_time": "0:11:46", "throughput": 3367.14, "total_tokens": 1014400}
{"current_steps": 2475, "total_steps": 8260, "loss": 0.2261, "lr": 4.417824777185943e-05, "epoch": 2.99636803874092, "percentage": 29.96, "elapsed_time": "0:05:01", "remaining_time": "0:11:45", "throughput": 3368.16, "total_tokens": 1016480}
{"current_steps": 2478, "total_steps": 8260, "eval_loss": 0.21814429759979248, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:05:06", "remaining_time": "0:11:55", "throughput": 3316.09, "total_tokens": 1017368}
{"current_steps": 2480, "total_steps": 8260, "loss": 0.1889, "lr": 4.414431835102987e-05, "epoch": 3.002421307506053, "percentage": 30.02, "elapsed_time": "0:05:08", "remaining_time": "0:11:58", "throughput": 3304.11, "total_tokens": 1018232}
{"current_steps": 2485, "total_steps": 8260, "loss": 0.2774, "lr": 4.411030345612984e-05, "epoch": 3.0084745762711864, "percentage": 30.08, "elapsed_time": "0:05:08", "remaining_time": "0:11:57", "throughput": 3305.05, "total_tokens": 1020280}
{"current_steps": 2490, "total_steps": 8260, "loss": 0.2065, "lr": 4.407620323902643e-05, "epoch": 3.0145278450363198, "percentage": 30.15, "elapsed_time": "0:05:09", "remaining_time": "0:11:56", "throughput": 3305.61, "total_tokens": 1022328}
{"current_steps": 2495, "total_steps": 8260, "loss": 0.2353, "lr": 4.404201785196762e-05, "epoch": 3.0205811138014527, "percentage": 30.21, "elapsed_time": "0:05:09", "remaining_time": "0:11:55", "throughput": 3306.19, "total_tokens": 1024312}
{"current_steps": 2500, "total_steps": 8260, "loss": 0.204, "lr": 4.400774744758171e-05, "epoch": 3.026634382566586, "percentage": 30.27, "elapsed_time": "0:05:10", "remaining_time": "0:11:55", "throughput": 3307.01, "total_tokens": 1026392}
{"current_steps": 2505, "total_steps": 8260, "loss": 0.1685, "lr": 4.397339217887652e-05, "epoch": 3.032687651331719, "percentage": 30.33, "elapsed_time": "0:05:10", "remaining_time": "0:11:54", "throughput": 3307.73, "total_tokens": 1028344}
{"current_steps": 2510, "total_steps": 8260, "loss": 0.1962, "lr": 4.393895219923881e-05, "epoch": 3.0387409200968523, "percentage": 30.39, "elapsed_time": "0:05:11", "remaining_time": "0:11:53", "throughput": 3308.36, "total_tokens": 1030520}
{"current_steps": 2515, "total_steps": 8260, "loss": 0.1797, "lr": 4.3904427662433534e-05, "epoch": 3.0447941888619856, "percentage": 30.45, "elapsed_time": "0:05:12", "remaining_time": "0:11:52", "throughput": 3309.08, "total_tokens": 1032728}
{"current_steps": 2520, "total_steps": 8260, "loss": 0.2169, "lr": 4.386981872260317e-05, "epoch": 3.0508474576271185, "percentage": 30.51, "elapsed_time": "0:05:12", "remaining_time": "0:11:52", "throughput": 3309.89, "total_tokens": 1034808}
{"current_steps": 2525, "total_steps": 8260, "loss": 0.2562, "lr": 4.383512553426703e-05, "epoch": 3.056900726392252, "percentage": 30.57, "elapsed_time": "0:05:13", "remaining_time": "0:11:51", "throughput": 3310.91, "total_tokens": 1036888}
{"current_steps": 2530, "total_steps": 8260, "loss": 0.2157, "lr": 4.380034825232059e-05, "epoch": 3.062953995157385, "percentage": 30.63, "elapsed_time": "0:05:13", "remaining_time": "0:11:50", "throughput": 3311.61, "total_tokens": 1039000}
{"current_steps": 2535, "total_steps": 8260, "loss": 0.2123, "lr": 4.376548703203474e-05, "epoch": 3.069007263922518, "percentage": 30.69, "elapsed_time": "0:05:14", "remaining_time": "0:11:49", "throughput": 3312.41, "total_tokens": 1040984}
{"current_steps": 2540, "total_steps": 8260, "loss": 0.2561, "lr": 4.3730542029055174e-05, "epoch": 3.0750605326876514, "percentage": 30.75, "elapsed_time": "0:05:14", "remaining_time": "0:11:48", "throughput": 3313.1, "total_tokens": 1043000}
{"current_steps": 2545, "total_steps": 8260, "loss": 0.2274, "lr": 4.3695513399401646e-05, "epoch": 3.0811138014527844, "percentage": 30.81, "elapsed_time": "0:05:15", "remaining_time": "0:11:48", "throughput": 3313.85, "total_tokens": 1044984}
{"current_steps": 2550, "total_steps": 8260, "loss": 0.2184, "lr": 4.366040129946725e-05, "epoch": 3.0871670702179177, "percentage": 30.87, "elapsed_time": "0:05:15", "remaining_time": "0:11:47", "throughput": 3314.54, "total_tokens": 1047096}
{"current_steps": 2555, "total_steps": 8260, "loss": 0.2154, "lr": 4.362520588601777e-05, "epoch": 3.093220338983051, "percentage": 30.93, "elapsed_time": "0:05:16", "remaining_time": "0:11:46", "throughput": 3315.24, "total_tokens": 1049208}
{"current_steps": 2560, "total_steps": 8260, "loss": 0.24, "lr": 4.3589927316190983e-05, "epoch": 3.099273607748184, "percentage": 30.99, "elapsed_time": "0:05:17", "remaining_time": "0:11:45", "throughput": 3315.81, "total_tokens": 1051192}
{"current_steps": 2565, "total_steps": 8260, "loss": 0.1924, "lr": 4.35545657474959e-05, "epoch": 3.1053268765133173, "percentage": 31.05, "elapsed_time": "0:05:17", "remaining_time": "0:11:45", "throughput": 3316.55, "total_tokens": 1053240}
{"current_steps": 2570, "total_steps": 8260, "loss": 0.1849, "lr": 4.351912133781213e-05, "epoch": 3.11138014527845, "percentage": 31.11, "elapsed_time": "0:05:18", "remaining_time": "0:11:44", "throughput": 3317.09, "total_tokens": 1055384}
{"current_steps": 2575, "total_steps": 8260, "loss": 0.2065, "lr": 4.3483594245389106e-05, "epoch": 3.1174334140435835, "percentage": 31.17, "elapsed_time": "0:05:18", "remaining_time": "0:11:43", "throughput": 3317.84, "total_tokens": 1057464}
{"current_steps": 2580, "total_steps": 8260, "loss": 0.2335, "lr": 4.3447984628845464e-05, "epoch": 3.123486682808717, "percentage": 31.23, "elapsed_time": "0:05:19", "remaining_time": "0:11:43", "throughput": 3318.68, "total_tokens": 1059736}
{"current_steps": 2585, "total_steps": 8260, "loss": 0.1397, "lr": 4.341229264716825e-05, "epoch": 3.12953995157385, "percentage": 31.3, "elapsed_time": "0:05:19", "remaining_time": "0:11:42", "throughput": 3319.47, "total_tokens": 1061656}
{"current_steps": 2590, "total_steps": 8260, "loss": 0.2087, "lr": 4.337651845971227e-05, "epoch": 3.135593220338983, "percentage": 31.36, "elapsed_time": "0:05:20", "remaining_time": "0:11:41", "throughput": 3320.47, "total_tokens": 1063736}
{"current_steps": 2595, "total_steps": 8260, "loss": 0.1601, "lr": 4.334066222619933e-05, "epoch": 3.141646489104116, "percentage": 31.42, "elapsed_time": "0:05:20", "remaining_time": "0:11:40", "throughput": 3321.03, "total_tokens": 1065720}
{"current_steps": 2600, "total_steps": 8260, "loss": 0.2306, "lr": 4.3304724106717584e-05, "epoch": 3.1476997578692494, "percentage": 31.48, "elapsed_time": "0:05:21", "remaining_time": "0:11:39", "throughput": 3321.68, "total_tokens": 1067736}
{"current_steps": 2605, "total_steps": 8260, "loss": 0.2189, "lr": 4.326870426172075e-05, "epoch": 3.1537530266343827, "percentage": 31.54, "elapsed_time": "0:05:22", "remaining_time": "0:11:39", "throughput": 3322.35, "total_tokens": 1069848}
{"current_steps": 2610, "total_steps": 8260, "loss": 0.2051, "lr": 4.323260285202746e-05, "epoch": 3.1598062953995156, "percentage": 31.6, "elapsed_time": "0:05:22", "remaining_time": "0:11:38", "throughput": 3323.22, "total_tokens": 1071960}
{"current_steps": 2615, "total_steps": 8260, "loss": 0.1844, "lr": 4.3196420038820475e-05, "epoch": 3.165859564164649, "percentage": 31.66, "elapsed_time": "0:05:23", "remaining_time": "0:11:37", "throughput": 3323.88, "total_tokens": 1074072}
{"current_steps": 2620, "total_steps": 8260, "loss": 0.2022, "lr": 4.316015598364603e-05, "epoch": 3.171912832929782, "percentage": 31.72, "elapsed_time": "0:05:23", "remaining_time": "0:11:36", "throughput": 3324.32, "total_tokens": 1076184}
{"current_steps": 2625, "total_steps": 8260, "loss": 0.2204, "lr": 4.312381084841307e-05, "epoch": 3.1779661016949152, "percentage": 31.78, "elapsed_time": "0:05:24", "remaining_time": "0:11:36", "throughput": 3325.14, "total_tokens": 1078360}
{"current_steps": 2630, "total_steps": 8260, "loss": 0.2064, "lr": 4.308738479539254e-05, "epoch": 3.1840193704600486, "percentage": 31.84, "elapsed_time": "0:05:24", "remaining_time": "0:11:35", "throughput": 3325.68, "total_tokens": 1080440}
{"current_steps": 2635, "total_steps": 8260, "loss": 0.2453, "lr": 4.305087798721665e-05, "epoch": 3.1900726392251815, "percentage": 31.9, "elapsed_time": "0:05:25", "remaining_time": "0:11:34", "throughput": 3326.26, "total_tokens": 1082520}
{"current_steps": 2640, "total_steps": 8260, "loss": 0.1597, "lr": 4.30142905868782e-05, "epoch": 3.196125907990315, "percentage": 31.96, "elapsed_time": "0:05:25", "remaining_time": "0:11:33", "throughput": 3326.92, "total_tokens": 1084408}
{"current_steps": 2645, "total_steps": 8260, "loss": 0.2247, "lr": 4.297762275772975e-05, "epoch": 3.2021791767554477, "percentage": 32.02, "elapsed_time": "0:05:26", "remaining_time": "0:11:33", "throughput": 3327.51, "total_tokens": 1086424}
{"current_steps": 2650, "total_steps": 8260, "loss": 0.2736, "lr": 4.2940874663483005e-05, "epoch": 3.208232445520581, "percentage": 32.08, "elapsed_time": "0:05:27", "remaining_time": "0:11:32", "throughput": 3328.14, "total_tokens": 1088440}
{"current_steps": 2655, "total_steps": 8260, "loss": 0.234, "lr": 4.2904046468208006e-05, "epoch": 3.2142857142857144, "percentage": 32.14, "elapsed_time": "0:05:27", "remaining_time": "0:11:31", "throughput": 3329.09, "total_tokens": 1090648}
{"current_steps": 2660, "total_steps": 8260, "loss": 0.1795, "lr": 4.286713833633242e-05, "epoch": 3.2203389830508473, "percentage": 32.2, "elapsed_time": "0:05:28", "remaining_time": "0:11:30", "throughput": 3329.83, "total_tokens": 1092632}
{"current_steps": 2665, "total_steps": 8260, "loss": 0.2081, "lr": 4.283015043264084e-05, "epoch": 3.2263922518159807, "percentage": 32.26, "elapsed_time": "0:05:28", "remaining_time": "0:11:30", "throughput": 3330.4, "total_tokens": 1094648}
{"current_steps": 2670, "total_steps": 8260, "loss": 0.1966, "lr": 4.279308292227396e-05, "epoch": 3.232445520581114, "percentage": 32.32, "elapsed_time": "0:05:29", "remaining_time": "0:11:29", "throughput": 3331.15, "total_tokens": 1096728}
{"current_steps": 2675, "total_steps": 8260, "loss": 0.1671, "lr": 4.275593597072796e-05, "epoch": 3.238498789346247, "percentage": 32.38, "elapsed_time": "0:05:29", "remaining_time": "0:11:28", "throughput": 3331.86, "total_tokens": 1098872}
{"current_steps": 2680, "total_steps": 8260, "loss": 0.1849, "lr": 4.2718709743853654e-05, "epoch": 3.2445520581113803, "percentage": 32.45, "elapsed_time": "0:05:30", "remaining_time": "0:11:27", "throughput": 3332.63, "total_tokens": 1101048}
{"current_steps": 2685, "total_steps": 8260, "loss": 0.3271, "lr": 4.268140440785584e-05, "epoch": 3.250605326876513, "percentage": 32.51, "elapsed_time": "0:05:30", "remaining_time": "0:11:27", "throughput": 3333.33, "total_tokens": 1103320}
{"current_steps": 2690, "total_steps": 8260, "loss": 0.2101, "lr": 4.264402012929247e-05, "epoch": 3.2566585956416465, "percentage": 32.57, "elapsed_time": "0:05:31", "remaining_time": "0:11:26", "throughput": 3334.12, "total_tokens": 1105336}
{"current_steps": 2695, "total_steps": 8260, "loss": 0.1829, "lr": 4.2606557075073996e-05, "epoch": 3.26271186440678, "percentage": 32.63, "elapsed_time": "0:05:32", "remaining_time": "0:11:25", "throughput": 3334.76, "total_tokens": 1107288}
{"current_steps": 2700, "total_steps": 8260, "loss": 0.2122, "lr": 4.256901541246255e-05, "epoch": 3.2687651331719128, "percentage": 32.69, "elapsed_time": "0:05:32", "remaining_time": "0:11:24", "throughput": 3335.48, "total_tokens": 1109368}
{"current_steps": 2705, "total_steps": 8260, "loss": 0.2214, "lr": 4.253139530907124e-05, "epoch": 3.274818401937046, "percentage": 32.75, "elapsed_time": "0:05:33", "remaining_time": "0:11:24", "throughput": 3336.31, "total_tokens": 1111576}
{"current_steps": 2710, "total_steps": 8260, "loss": 0.2293, "lr": 4.249369693286341e-05, "epoch": 3.280871670702179, "percentage": 32.81, "elapsed_time": "0:05:33", "remaining_time": "0:11:23", "throughput": 3336.77, "total_tokens": 1113624}
{"current_steps": 2715, "total_steps": 8260, "loss": 0.2148, "lr": 4.245592045215182e-05, "epoch": 3.2869249394673123, "percentage": 32.87, "elapsed_time": "0:05:34", "remaining_time": "0:11:22", "throughput": 3337.26, "total_tokens": 1115512}
{"current_steps": 2720, "total_steps": 8260, "loss": 0.187, "lr": 4.2418066035598e-05, "epoch": 3.2929782082324457, "percentage": 32.93, "elapsed_time": "0:05:34", "remaining_time": "0:11:21", "throughput": 3338.12, "total_tokens": 1117464}
{"current_steps": 2725, "total_steps": 8260, "loss": 0.2007, "lr": 4.238013385221142e-05, "epoch": 3.2990314769975786, "percentage": 32.99, "elapsed_time": "0:05:35", "remaining_time": "0:11:21", "throughput": 3338.72, "total_tokens": 1119480}
{"current_steps": 2730, "total_steps": 8260, "loss": 0.1579, "lr": 4.2342124071348744e-05, "epoch": 3.305084745762712, "percentage": 33.05, "elapsed_time": "0:05:35", "remaining_time": "0:11:20", "throughput": 3339.3, "total_tokens": 1121496}
{"current_steps": 2735, "total_steps": 8260, "loss": 0.1768, "lr": 4.230403686271309e-05, "epoch": 3.3111380145278453, "percentage": 33.11, "elapsed_time": "0:05:36", "remaining_time": "0:11:19", "throughput": 3339.99, "total_tokens": 1123480}
{"current_steps": 2740, "total_steps": 8260, "loss": 0.2612, "lr": 4.2265872396353314e-05, "epoch": 3.317191283292978, "percentage": 33.17, "elapsed_time": "0:05:36", "remaining_time": "0:11:18", "throughput": 3340.63, "total_tokens": 1125528}
{"current_steps": 2745, "total_steps": 8260, "loss": 0.2047, "lr": 4.2227630842663136e-05, "epoch": 3.3232445520581115, "percentage": 33.23, "elapsed_time": "0:05:37", "remaining_time": "0:11:18", "throughput": 3341.1, "total_tokens": 1127512}
{"current_steps": 2750, "total_steps": 8260, "loss": 0.199, "lr": 4.21893123723805e-05, "epoch": 3.3292978208232444, "percentage": 33.29, "elapsed_time": "0:05:38", "remaining_time": "0:11:17", "throughput": 3341.61, "total_tokens": 1129592}
{"current_steps": 2755, "total_steps": 8260, "loss": 0.2115, "lr": 4.2150917156586735e-05, "epoch": 3.335351089588378, "percentage": 33.35, "elapsed_time": "0:05:38", "remaining_time": "0:11:16", "throughput": 3342.1, "total_tokens": 1131576}
{"current_steps": 2760, "total_steps": 8260, "loss": 0.2291, "lr": 4.211244536670584e-05, "epoch": 3.341404358353511, "percentage": 33.41, "elapsed_time": "0:05:39", "remaining_time": "0:11:15", "throughput": 3342.72, "total_tokens": 1133784}
{"current_steps": 2765, "total_steps": 8260, "loss": 0.192, "lr": 4.207389717450368e-05, "epoch": 3.347457627118644, "percentage": 33.47, "elapsed_time": "0:05:39", "remaining_time": "0:11:15", "throughput": 3343.49, "total_tokens": 1135800}
{"current_steps": 2770, "total_steps": 8260, "loss": 0.2107, "lr": 4.203527275208723e-05, "epoch": 3.3535108958837774, "percentage": 33.54, "elapsed_time": "0:05:40", "remaining_time": "0:11:14", "throughput": 3344.19, "total_tokens": 1137784}
{"current_steps": 2775, "total_steps": 8260, "loss": 0.2128, "lr": 4.199657227190384e-05, "epoch": 3.3595641646489103, "percentage": 33.6, "elapsed_time": "0:05:40", "remaining_time": "0:11:13", "throughput": 3344.96, "total_tokens": 1139896}
{"current_steps": 2780, "total_steps": 8260, "loss": 0.2298, "lr": 4.195779590674041e-05, "epoch": 3.3656174334140436, "percentage": 33.66, "elapsed_time": "0:05:41", "remaining_time": "0:11:12", "throughput": 3345.6, "total_tokens": 1142040}
{"current_steps": 2785, "total_steps": 8260, "loss": 0.16, "lr": 4.191894382972264e-05, "epoch": 3.371670702179177, "percentage": 33.72, "elapsed_time": "0:05:41", "remaining_time": "0:11:12", "throughput": 3346.2, "total_tokens": 1144088}
{"current_steps": 2790, "total_steps": 8260, "loss": 0.2844, "lr": 4.188001621431429e-05, "epoch": 3.37772397094431, "percentage": 33.78, "elapsed_time": "0:05:42", "remaining_time": "0:11:11", "throughput": 3346.65, "total_tokens": 1146232}
{"current_steps": 2795, "total_steps": 8260, "loss": 0.2043, "lr": 4.184101323431636e-05, "epoch": 3.383777239709443, "percentage": 33.84, "elapsed_time": "0:05:43", "remaining_time": "0:11:10", "throughput": 3347.62, "total_tokens": 1148344}
{"current_steps": 2800, "total_steps": 8260, "loss": 0.2092, "lr": 4.180193506386634e-05, "epoch": 3.389830508474576, "percentage": 33.9, "elapsed_time": "0:05:43", "remaining_time": "0:11:09", "throughput": 3348.16, "total_tokens": 1150360}
{"current_steps": 2805, "total_steps": 8260, "loss": 0.2195, "lr": 4.1762781877437406e-05, "epoch": 3.3958837772397095, "percentage": 33.96, "elapsed_time": "0:05:44", "remaining_time": "0:11:09", "throughput": 3348.89, "total_tokens": 1152376}
{"current_steps": 2810, "total_steps": 8260, "loss": 0.196, "lr": 4.172355384983769e-05, "epoch": 3.401937046004843, "percentage": 34.02, "elapsed_time": "0:05:44", "remaining_time": "0:11:08", "throughput": 3349.48, "total_tokens": 1154424}
{"current_steps": 2815, "total_steps": 8260, "loss": 0.1842, "lr": 4.168425115620944e-05, "epoch": 3.4079903147699757, "percentage": 34.08, "elapsed_time": "0:05:45", "remaining_time": "0:11:07", "throughput": 3350.09, "total_tokens": 1156472}
{"current_steps": 2820, "total_steps": 8260, "loss": 0.1802, "lr": 4.164487397202829e-05, "epoch": 3.414043583535109, "percentage": 34.14, "elapsed_time": "0:05:45", "remaining_time": "0:11:06", "throughput": 3350.78, "total_tokens": 1158552}
{"current_steps": 2825, "total_steps": 8260, "loss": 0.2498, "lr": 4.160542247310244e-05, "epoch": 3.420096852300242, "percentage": 34.2, "elapsed_time": "0:05:46", "remaining_time": "0:11:06", "throughput": 3351.42, "total_tokens": 1160696}
{"current_steps": 2830, "total_steps": 8260, "loss": 0.2504, "lr": 4.156589683557189e-05, "epoch": 3.4261501210653753, "percentage": 34.26, "elapsed_time": "0:05:46", "remaining_time": "0:11:05", "throughput": 3351.98, "total_tokens": 1162808}
{"current_steps": 2835, "total_steps": 8260, "loss": 0.1793, "lr": 4.1526297235907635e-05, "epoch": 3.4322033898305087, "percentage": 34.32, "elapsed_time": "0:05:47", "remaining_time": "0:11:04", "throughput": 3352.71, "total_tokens": 1164728}
{"current_steps": 2840, "total_steps": 8260, "loss": 0.2104, "lr": 4.148662385091091e-05, "epoch": 3.4382566585956416, "percentage": 34.38, "elapsed_time": "0:05:47", "remaining_time": "0:11:04", "throughput": 3353.19, "total_tokens": 1166808}
{"current_steps": 2845, "total_steps": 8260, "loss": 0.2173, "lr": 4.144687685771238e-05, "epoch": 3.444309927360775, "percentage": 34.44, "elapsed_time": "0:05:48", "remaining_time": "0:11:03", "throughput": 3353.88, "total_tokens": 1168888}
{"current_steps": 2850, "total_steps": 8260, "loss": 0.2152, "lr": 4.140705643377133e-05, "epoch": 3.450363196125908, "percentage": 34.5, "elapsed_time": "0:05:49", "remaining_time": "0:11:02", "throughput": 3354.53, "total_tokens": 1170872}
{"current_steps": 2855, "total_steps": 8260, "loss": 0.2391, "lr": 4.1367162756874925e-05, "epoch": 3.456416464891041, "percentage": 34.56, "elapsed_time": "0:05:49", "remaining_time": "0:11:01", "throughput": 3355.08, "total_tokens": 1172984}
{"current_steps": 2860, "total_steps": 8260, "loss": 0.1998, "lr": 4.132719600513734e-05, "epoch": 3.4624697336561745, "percentage": 34.62, "elapsed_time": "0:05:50", "remaining_time": "0:11:01", "throughput": 3355.71, "total_tokens": 1175032}
{"current_steps": 2865, "total_steps": 8260, "loss": 0.1725, "lr": 4.128715635699905e-05, "epoch": 3.4685230024213074, "percentage": 34.69, "elapsed_time": "0:05:50", "remaining_time": "0:11:00", "throughput": 3356.46, "total_tokens": 1177240}
{"current_steps": 2870, "total_steps": 8260, "loss": 0.1902, "lr": 4.124704399122597e-05, "epoch": 3.4745762711864407, "percentage": 34.75, "elapsed_time": "0:05:51", "remaining_time": "0:10:59", "throughput": 3357.03, "total_tokens": 1179192}
{"current_steps": 2875, "total_steps": 8260, "loss": 0.2381, "lr": 4.120685908690869e-05, "epoch": 3.4806295399515736, "percentage": 34.81, "elapsed_time": "0:05:51", "remaining_time": "0:10:58", "throughput": 3357.48, "total_tokens": 1181112}
{"current_steps": 2880, "total_steps": 8260, "loss": 0.2012, "lr": 4.1166601823461656e-05, "epoch": 3.486682808716707, "percentage": 34.87, "elapsed_time": "0:05:52", "remaining_time": "0:10:58", "throughput": 3357.96, "total_tokens": 1183128}
{"current_steps": 2885, "total_steps": 8260, "loss": 0.2458, "lr": 4.112627238062239e-05, "epoch": 3.4927360774818403, "percentage": 34.93, "elapsed_time": "0:05:52", "remaining_time": "0:10:57", "throughput": 3358.51, "total_tokens": 1185240}
{"current_steps": 2890, "total_steps": 8260, "loss": 0.2267, "lr": 4.1085870938450656e-05, "epoch": 3.4987893462469732, "percentage": 34.99, "elapsed_time": "0:05:53", "remaining_time": "0:10:56", "throughput": 3359.34, "total_tokens": 1187320}
{"current_steps": 2891, "total_steps": 8260, "eval_loss": 0.2109345942735672, "epoch": 3.5, "percentage": 35.0, "elapsed_time": "0:05:58", "remaining_time": "0:11:05", "throughput": 3315.98, "total_tokens": 1187704}
{"current_steps": 2895, "total_steps": 8260, "loss": 0.2044, "lr": 4.1045397677327684e-05, "epoch": 3.5048426150121066, "percentage": 35.05, "elapsed_time": "0:05:59", "remaining_time": "0:11:06", "throughput": 3308.15, "total_tokens": 1189400}
{"current_steps": 2900, "total_steps": 8260, "loss": 0.2232, "lr": 4.1004852777955364e-05, "epoch": 3.5108958837772395, "percentage": 35.11, "elapsed_time": "0:06:00", "remaining_time": "0:11:05", "throughput": 3308.79, "total_tokens": 1191384}
{"current_steps": 2905, "total_steps": 8260, "loss": 0.203, "lr": 4.096423642135543e-05, "epoch": 3.516949152542373, "percentage": 35.17, "elapsed_time": "0:06:00", "remaining_time": "0:11:04", "throughput": 3309.66, "total_tokens": 1193368}
{"current_steps": 2910, "total_steps": 8260, "loss": 0.1391, "lr": 4.0923548788868625e-05, "epoch": 3.523002421307506, "percentage": 35.23, "elapsed_time": "0:06:01", "remaining_time": "0:11:03", "throughput": 3310.29, "total_tokens": 1195512}
{"current_steps": 2915, "total_steps": 8260, "loss": 0.2104, "lr": 4.0882790062153957e-05, "epoch": 3.529055690072639, "percentage": 35.29, "elapsed_time": "0:06:01", "remaining_time": "0:11:03", "throughput": 3310.72, "total_tokens": 1197560}
{"current_steps": 2920, "total_steps": 8260, "loss": 0.1759, "lr": 4.084196042318783e-05, "epoch": 3.5351089588377724, "percentage": 35.35, "elapsed_time": "0:06:02", "remaining_time": "0:11:02", "throughput": 3311.36, "total_tokens": 1199768}
{"current_steps": 2925, "total_steps": 8260, "loss": 0.2499, "lr": 4.080106005426326e-05, "epoch": 3.5411622276029053, "percentage": 35.41, "elapsed_time": "0:06:02", "remaining_time": "0:11:01", "throughput": 3311.87, "total_tokens": 1201848}
{"current_steps": 2930, "total_steps": 8260, "loss": 0.1784, "lr": 4.076008913798903e-05, "epoch": 3.5472154963680387, "percentage": 35.47, "elapsed_time": "0:06:03", "remaining_time": "0:11:01", "throughput": 3312.34, "total_tokens": 1203896}
{"current_steps": 2935, "total_steps": 8260, "loss": 0.1995, "lr": 4.071904785728894e-05, "epoch": 3.553268765133172, "percentage": 35.53, "elapsed_time": "0:06:04", "remaining_time": "0:11:00", "throughput": 3312.65, "total_tokens": 1205880}
{"current_steps": 2940, "total_steps": 8260, "loss": 0.2185, "lr": 4.0677936395400906e-05, "epoch": 3.559322033898305, "percentage": 35.59, "elapsed_time": "0:06:04", "remaining_time": "0:10:59", "throughput": 3313.21, "total_tokens": 1207896}
{"current_steps": 2945, "total_steps": 8260, "loss": 0.2321, "lr": 4.063675493587621e-05, "epoch": 3.5653753026634383, "percentage": 35.65, "elapsed_time": "0:06:05", "remaining_time": "0:10:58", "throughput": 3314.01, "total_tokens": 1210008}
{"current_steps": 2950, "total_steps": 8260, "loss": 0.1987, "lr": 4.059550366257864e-05, "epoch": 3.571428571428571, "percentage": 35.71, "elapsed_time": "0:06:05", "remaining_time": "0:10:58", "throughput": 3314.77, "total_tokens": 1212024}
{"current_steps": 2955, "total_steps": 8260, "loss": 0.1917, "lr": 4.055418275968368e-05, "epoch": 3.5774818401937045, "percentage": 35.77, "elapsed_time": "0:06:06", "remaining_time": "0:10:57", "throughput": 3315.34, "total_tokens": 1214040}
{"current_steps": 2960, "total_steps": 8260, "loss": 0.2193, "lr": 4.0512792411677705e-05, "epoch": 3.583535108958838, "percentage": 35.84, "elapsed_time": "0:06:06", "remaining_time": "0:10:56", "throughput": 3315.79, "total_tokens": 1216088}
{"current_steps": 2965, "total_steps": 8260, "loss": 0.2061, "lr": 4.047133280335713e-05, "epoch": 3.589588377723971, "percentage": 35.9, "elapsed_time": "0:06:07", "remaining_time": "0:10:55", "throughput": 3316.43, "total_tokens": 1218136}
{"current_steps": 2970, "total_steps": 8260, "loss": 0.1835, "lr": 4.042980411982762e-05, "epoch": 3.595641646489104, "percentage": 35.96, "elapsed_time": "0:06:07", "remaining_time": "0:10:55", "throughput": 3317.03, "total_tokens": 1220248}
{"current_steps": 2975, "total_steps": 8260, "loss": 0.1841, "lr": 4.0388206546503215e-05, "epoch": 3.601694915254237, "percentage": 36.02, "elapsed_time": "0:06:08", "remaining_time": "0:10:54", "throughput": 3317.81, "total_tokens": 1222360}
{"current_steps": 2980, "total_steps": 8260, "loss": 0.2415, "lr": 4.0346540269105546e-05, "epoch": 3.6077481840193704, "percentage": 36.08, "elapsed_time": "0:06:08", "remaining_time": "0:10:53", "throughput": 3318.8, "total_tokens": 1224568}
{"current_steps": 2985, "total_steps": 8260, "loss": 0.1543, "lr": 4.030480547366297e-05, "epoch": 3.6138014527845037, "percentage": 36.14, "elapsed_time": "0:06:09", "remaining_time": "0:10:53", "throughput": 3319.3, "total_tokens": 1226648}
{"current_steps": 2990, "total_steps": 8260, "loss": 0.2468, "lr": 4.026300234650979e-05, "epoch": 3.619854721549637, "percentage": 36.2, "elapsed_time": "0:06:10", "remaining_time": "0:10:52", "throughput": 3319.79, "total_tokens": 1228600}
{"current_steps": 2995, "total_steps": 8260, "loss": 0.1888, "lr": 4.022113107428536e-05, "epoch": 3.62590799031477, "percentage": 36.26, "elapsed_time": "0:06:10", "remaining_time": "0:10:51", "throughput": 3320.49, "total_tokens": 1230616}
{"current_steps": 3000, "total_steps": 8260, "loss": 0.2066, "lr": 4.0179191843933286e-05, "epoch": 3.6319612590799033, "percentage": 36.32, "elapsed_time": "0:06:11", "remaining_time": "0:10:50", "throughput": 3321.04, "total_tokens": 1232632}
{"current_steps": 3005, "total_steps": 8260, "loss": 0.1838, "lr": 4.013718484270061e-05, "epoch": 3.638014527845036, "percentage": 36.38, "elapsed_time": "0:06:11", "remaining_time": "0:10:49", "throughput": 3321.74, "total_tokens": 1234552}
{"current_steps": 3010, "total_steps": 8260, "loss": 0.2367, "lr": 4.009511025813694e-05, "epoch": 3.6440677966101696, "percentage": 36.44, "elapsed_time": "0:06:12", "remaining_time": "0:10:49", "throughput": 3322.4, "total_tokens": 1236728}
{"current_steps": 3015, "total_steps": 8260, "loss": 0.2113, "lr": 4.005296827809362e-05, "epoch": 3.650121065375303, "percentage": 36.5, "elapsed_time": "0:06:12", "remaining_time": "0:10:48", "throughput": 3323.03, "total_tokens": 1238776}
{"current_steps": 3020, "total_steps": 8260, "loss": 0.2041, "lr": 4.001075909072289e-05, "epoch": 3.656174334140436, "percentage": 36.56, "elapsed_time": "0:06:13", "remaining_time": "0:10:47", "throughput": 3323.71, "total_tokens": 1240856}
{"current_steps": 3025, "total_steps": 8260, "loss": 0.1842, "lr": 3.9968482884477075e-05, "epoch": 3.662227602905569, "percentage": 36.62, "elapsed_time": "0:06:13", "remaining_time": "0:10:47", "throughput": 3324.4, "total_tokens": 1242936}
{"current_steps": 3030, "total_steps": 8260, "loss": 0.2028, "lr": 3.992613984810771e-05, "epoch": 3.668280871670702, "percentage": 36.68, "elapsed_time": "0:06:14", "remaining_time": "0:10:46", "throughput": 3325.03, "total_tokens": 1245080}
{"current_steps": 3035, "total_steps": 8260, "loss": 0.1903, "lr": 3.988373017066469e-05, "epoch": 3.6743341404358354, "percentage": 36.74, "elapsed_time": "0:06:15", "remaining_time": "0:10:45", "throughput": 3325.77, "total_tokens": 1247192}
{"current_steps": 3040, "total_steps": 8260, "loss": 0.1769, "lr": 3.984125404149548e-05, "epoch": 3.6803874092009687, "percentage": 36.8, "elapsed_time": "0:06:15", "remaining_time": "0:10:44", "throughput": 3326.19, "total_tokens": 1249240}
{"current_steps": 3045, "total_steps": 8260, "loss": 0.2723, "lr": 3.9798711650244194e-05, "epoch": 3.6864406779661016, "percentage": 36.86, "elapsed_time": "0:06:16", "remaining_time": "0:10:44", "throughput": 3326.66, "total_tokens": 1251320}
{"current_steps": 3050, "total_steps": 8260, "loss": 0.2016, "lr": 3.9756103186850825e-05, "epoch": 3.692493946731235, "percentage": 36.92, "elapsed_time": "0:06:16", "remaining_time": "0:10:43", "throughput": 3327.2, "total_tokens": 1253336}
{"current_steps": 3055, "total_steps": 8260, "loss": 0.1887, "lr": 3.971342884155033e-05, "epoch": 3.698547215496368, "percentage": 36.99, "elapsed_time": "0:06:17", "remaining_time": "0:10:42", "throughput": 3327.72, "total_tokens": 1255352}
{"current_steps": 3060, "total_steps": 8260, "loss": 0.1945, "lr": 3.9670688804871815e-05, "epoch": 3.7046004842615012, "percentage": 37.05, "elapsed_time": "0:06:17", "remaining_time": "0:10:41", "throughput": 3328.21, "total_tokens": 1257272}
{"current_steps": 3065, "total_steps": 8260, "loss": 0.1922, "lr": 3.96278832676377e-05, "epoch": 3.7106537530266346, "percentage": 37.11, "elapsed_time": "0:06:18", "remaining_time": "0:10:41", "throughput": 3328.62, "total_tokens": 1259416}
{"current_steps": 3070, "total_steps": 8260, "loss": 0.2029, "lr": 3.958501242096283e-05, "epoch": 3.7167070217917675, "percentage": 37.17, "elapsed_time": "0:06:18", "remaining_time": "0:10:40", "throughput": 3329.29, "total_tokens": 1261496}
{"current_steps": 3075, "total_steps": 8260, "loss": 0.1748, "lr": 3.954207645625365e-05, "epoch": 3.722760290556901, "percentage": 37.23, "elapsed_time": "0:06:19", "remaining_time": "0:10:39", "throughput": 3329.89, "total_tokens": 1263480}
{"current_steps": 3080, "total_steps": 8260, "loss": 0.1956, "lr": 3.949907556520731e-05, "epoch": 3.7288135593220337, "percentage": 37.29, "elapsed_time": "0:06:20", "remaining_time": "0:10:39", "throughput": 3330.3, "total_tokens": 1265528}
{"current_steps": 3085, "total_steps": 8260, "loss": 0.1966, "lr": 3.9456009939810886e-05, "epoch": 3.734866828087167, "percentage": 37.35, "elapsed_time": "0:06:20", "remaining_time": "0:10:38", "throughput": 3330.72, "total_tokens": 1267512}
{"current_steps": 3090, "total_steps": 8260, "loss": 0.1987, "lr": 3.941287977234043e-05, "epoch": 3.7409200968523004, "percentage": 37.41, "elapsed_time": "0:06:21", "remaining_time": "0:10:37", "throughput": 3331.33, "total_tokens": 1269560}
{"current_steps": 3095, "total_steps": 8260, "loss": 0.1997, "lr": 3.9369685255360175e-05, "epoch": 3.7469733656174333, "percentage": 37.47, "elapsed_time": "0:06:21", "remaining_time": "0:10:36", "throughput": 3332.19, "total_tokens": 1271640}
{"current_steps": 3100, "total_steps": 8260, "loss": 0.1834, "lr": 3.9326426581721663e-05, "epoch": 3.7530266343825667, "percentage": 37.53, "elapsed_time": "0:06:22", "remaining_time": "0:10:36", "throughput": 3332.61, "total_tokens": 1273688}
{"current_steps": 3105, "total_steps": 8260, "loss": 0.1682, "lr": 3.9283103944562874e-05, "epoch": 3.7590799031476996, "percentage": 37.59, "elapsed_time": "0:06:22", "remaining_time": "0:10:35", "throughput": 3333.07, "total_tokens": 1275768}
{"current_steps": 3110, "total_steps": 8260, "loss": 0.1793, "lr": 3.923971753730735e-05, "epoch": 3.765133171912833, "percentage": 37.65, "elapsed_time": "0:06:23", "remaining_time": "0:10:34", "throughput": 3333.49, "total_tokens": 1277752}
{"current_steps": 3115, "total_steps": 8260, "loss": 0.2727, "lr": 3.919626755366338e-05, "epoch": 3.7711864406779663, "percentage": 37.71, "elapsed_time": "0:06:23", "remaining_time": "0:10:34", "throughput": 3334.21, "total_tokens": 1279864}
{"current_steps": 3120, "total_steps": 8260, "loss": 0.2247, "lr": 3.9152754187623086e-05, "epoch": 3.777239709443099, "percentage": 37.77, "elapsed_time": "0:06:24", "remaining_time": "0:10:33", "throughput": 3334.73, "total_tokens": 1281880}
{"current_steps": 3125, "total_steps": 8260, "loss": 0.1999, "lr": 3.910917763346156e-05, "epoch": 3.7832929782082325, "percentage": 37.83, "elapsed_time": "0:06:24", "remaining_time": "0:10:32", "throughput": 3335.29, "total_tokens": 1283928}
{"current_steps": 3130, "total_steps": 8260, "loss": 0.204, "lr": 3.906553808573604e-05, "epoch": 3.7893462469733654, "percentage": 37.89, "elapsed_time": "0:06:25", "remaining_time": "0:10:31", "throughput": 3335.96, "total_tokens": 1285944}
{"current_steps": 3135, "total_steps": 8260, "loss": 0.1793, "lr": 3.9021835739285e-05, "epoch": 3.7953995157384988, "percentage": 37.95, "elapsed_time": "0:06:26", "remaining_time": "0:10:31", "throughput": 3336.42, "total_tokens": 1287928}
{"current_steps": 3140, "total_steps": 8260, "loss": 0.1977, "lr": 3.897807078922728e-05, "epoch": 3.801452784503632, "percentage": 38.01, "elapsed_time": "0:06:26", "remaining_time": "0:10:30", "throughput": 3336.9, "total_tokens": 1290008}
{"current_steps": 3145, "total_steps": 8260, "loss": 0.2113, "lr": 3.8934243430961265e-05, "epoch": 3.807506053268765, "percentage": 38.08, "elapsed_time": "0:06:27", "remaining_time": "0:10:29", "throughput": 3337.42, "total_tokens": 1292120}
{"current_steps": 3150, "total_steps": 8260, "loss": 0.1643, "lr": 3.889035386016393e-05, "epoch": 3.8135593220338984, "percentage": 38.14, "elapsed_time": "0:06:27", "remaining_time": "0:10:28", "throughput": 3338.09, "total_tokens": 1294040}
{"current_steps": 3155, "total_steps": 8260, "loss": 0.2509, "lr": 3.8846402272790044e-05, "epoch": 3.8196125907990313, "percentage": 38.2, "elapsed_time": "0:06:28", "remaining_time": "0:10:28", "throughput": 3338.88, "total_tokens": 1296024}
{"current_steps": 3160, "total_steps": 8260, "loss": 0.2598, "lr": 3.8802388865071246e-05, "epoch": 3.8256658595641646, "percentage": 38.26, "elapsed_time": "0:06:28", "remaining_time": "0:10:27", "throughput": 3339.35, "total_tokens": 1298104}
{"current_steps": 3165, "total_steps": 8260, "loss": 0.1928, "lr": 3.875831383351519e-05, "epoch": 3.831719128329298, "percentage": 38.32, "elapsed_time": "0:06:29", "remaining_time": "0:10:26", "throughput": 3339.92, "total_tokens": 1300248}
{"current_steps": 3170, "total_steps": 8260, "loss": 0.2229, "lr": 3.8714177374904683e-05, "epoch": 3.837772397094431, "percentage": 38.38, "elapsed_time": "0:06:29", "remaining_time": "0:10:25", "throughput": 3340.43, "total_tokens": 1302104}
{"current_steps": 3175, "total_steps": 8260, "loss": 0.197, "lr": 3.866997968629674e-05, "epoch": 3.843825665859564, "percentage": 38.44, "elapsed_time": "0:06:30", "remaining_time": "0:10:25", "throughput": 3340.93, "total_tokens": 1304056}
{"current_steps": 3180, "total_steps": 8260, "loss": 0.1896, "lr": 3.86257209650218e-05, "epoch": 3.849878934624697, "percentage": 38.5, "elapsed_time": "0:06:30", "remaining_time": "0:10:24", "throughput": 3341.28, "total_tokens": 1306104}
{"current_steps": 3185, "total_steps": 8260, "loss": 0.2072, "lr": 3.858140140868276e-05, "epoch": 3.8559322033898304, "percentage": 38.56, "elapsed_time": "0:06:31", "remaining_time": "0:10:23", "throughput": 3341.79, "total_tokens": 1308056}
{"current_steps": 3190, "total_steps": 8260, "loss": 0.1904, "lr": 3.853702121515416e-05, "epoch": 3.861985472154964, "percentage": 38.62, "elapsed_time": "0:06:31", "remaining_time": "0:10:22", "throughput": 3342.34, "total_tokens": 1310104}
{"current_steps": 3195, "total_steps": 8260, "loss": 0.1597, "lr": 3.849258058258124e-05, "epoch": 3.8680387409200967, "percentage": 38.68, "elapsed_time": "0:06:32", "remaining_time": "0:10:22", "throughput": 3342.73, "total_tokens": 1312152}
{"current_steps": 3200, "total_steps": 8260, "loss": 0.186, "lr": 3.84480797093791e-05, "epoch": 3.87409200968523, "percentage": 38.74, "elapsed_time": "0:06:33", "remaining_time": "0:10:21", "throughput": 3343.28, "total_tokens": 1314328}
{"current_steps": 3205, "total_steps": 8260, "loss": 0.2072, "lr": 3.8403518794231795e-05, "epoch": 3.880145278450363, "percentage": 38.8, "elapsed_time": "0:06:33", "remaining_time": "0:10:20", "throughput": 3343.95, "total_tokens": 1316344}
{"current_steps": 3210, "total_steps": 8260, "loss": 0.1445, "lr": 3.835889803609145e-05, "epoch": 3.8861985472154963, "percentage": 38.86, "elapsed_time": "0:06:34", "remaining_time": "0:10:20", "throughput": 3344.44, "total_tokens": 1318360}
{"current_steps": 3215, "total_steps": 8260, "loss": 0.1644, "lr": 3.8314217634177376e-05, "epoch": 3.8922518159806296, "percentage": 38.92, "elapsed_time": "0:06:34", "remaining_time": "0:10:19", "throughput": 3344.92, "total_tokens": 1320376}
{"current_steps": 3220, "total_steps": 8260, "loss": 0.1563, "lr": 3.826947778797516e-05, "epoch": 3.898305084745763, "percentage": 38.98, "elapsed_time": "0:06:35", "remaining_time": "0:10:18", "throughput": 3345.51, "total_tokens": 1322616}
{"current_steps": 3225, "total_steps": 8260, "loss": 0.2555, "lr": 3.822467869723581e-05, "epoch": 3.904358353510896, "percentage": 39.04, "elapsed_time": "0:06:35", "remaining_time": "0:10:18", "throughput": 3346.06, "total_tokens": 1324664}
{"current_steps": 3230, "total_steps": 8260, "loss": 0.21, "lr": 3.8179820561974835e-05, "epoch": 3.910411622276029, "percentage": 39.1, "elapsed_time": "0:06:36", "remaining_time": "0:10:17", "throughput": 3346.6, "total_tokens": 1326616}
{"current_steps": 3235, "total_steps": 8260, "loss": 0.2759, "lr": 3.813490358247137e-05, "epoch": 3.916464891041162, "percentage": 39.16, "elapsed_time": "0:06:36", "remaining_time": "0:10:16", "throughput": 3347.17, "total_tokens": 1328760}
{"current_steps": 3240, "total_steps": 8260, "loss": 0.1817, "lr": 3.8089927959267255e-05, "epoch": 3.9225181598062955, "percentage": 39.23, "elapsed_time": "0:06:37", "remaining_time": "0:10:16", "throughput": 3347.66, "total_tokens": 1330968}
{"current_steps": 3245, "total_steps": 8260, "loss": 0.1879, "lr": 3.8044893893166203e-05, "epoch": 3.928571428571429, "percentage": 39.29, "elapsed_time": "0:06:38", "remaining_time": "0:10:15", "throughput": 3348.09, "total_tokens": 1332952}
{"current_steps": 3250, "total_steps": 8260, "loss": 0.2231, "lr": 3.799980158523279e-05, "epoch": 3.9346246973365617, "percentage": 39.35, "elapsed_time": "0:06:38", "remaining_time": "0:10:14", "throughput": 3348.6, "total_tokens": 1335064}
{"current_steps": 3255, "total_steps": 8260, "loss": 0.2479, "lr": 3.795465123679167e-05, "epoch": 3.940677966101695, "percentage": 39.41, "elapsed_time": "0:06:39", "remaining_time": "0:10:13", "throughput": 3349.39, "total_tokens": 1337080}
{"current_steps": 3260, "total_steps": 8260, "loss": 0.1979, "lr": 3.790944304942664e-05, "epoch": 3.946731234866828, "percentage": 39.47, "elapsed_time": "0:06:39", "remaining_time": "0:10:13", "throughput": 3349.69, "total_tokens": 1339096}
{"current_steps": 3265, "total_steps": 8260, "loss": 0.1909, "lr": 3.7864177224979696e-05, "epoch": 3.9527845036319613, "percentage": 39.53, "elapsed_time": "0:06:40", "remaining_time": "0:10:12", "throughput": 3350.37, "total_tokens": 1341048}
{"current_steps": 3270, "total_steps": 8260, "loss": 0.1797, "lr": 3.781885396555019e-05, "epoch": 3.9588377723970947, "percentage": 39.59, "elapsed_time": "0:06:40", "remaining_time": "0:10:11", "throughput": 3350.81, "total_tokens": 1343224}
{"current_steps": 3275, "total_steps": 8260, "loss": 0.2289, "lr": 3.777347347349392e-05, "epoch": 3.9648910411622276, "percentage": 39.65, "elapsed_time": "0:06:41", "remaining_time": "0:10:11", "throughput": 3351.17, "total_tokens": 1345272}
{"current_steps": 3280, "total_steps": 8260, "loss": 0.1959, "lr": 3.7728035951422166e-05, "epoch": 3.970944309927361, "percentage": 39.71, "elapsed_time": "0:06:42", "remaining_time": "0:10:10", "throughput": 3351.72, "total_tokens": 1347416}
{"current_steps": 3285, "total_steps": 8260, "loss": 0.2026, "lr": 3.7682541602200875e-05, "epoch": 3.976997578692494, "percentage": 39.77, "elapsed_time": "0:06:42", "remaining_time": "0:10:09", "throughput": 3352.23, "total_tokens": 1349464}
{"current_steps": 3290, "total_steps": 8260, "loss": 0.1669, "lr": 3.76369906289497e-05, "epoch": 3.983050847457627, "percentage": 39.83, "elapsed_time": "0:06:43", "remaining_time": "0:10:08", "throughput": 3352.58, "total_tokens": 1351352}
{"current_steps": 3295, "total_steps": 8260, "loss": 0.1814, "lr": 3.7591383235041086e-05, "epoch": 3.9891041162227605, "percentage": 39.89, "elapsed_time": "0:06:43", "remaining_time": "0:10:08", "throughput": 3353.18, "total_tokens": 1353368}
{"current_steps": 3300, "total_steps": 8260, "loss": 0.2226, "lr": 3.75457196240994e-05, "epoch": 3.9951573849878934, "percentage": 39.95, "elapsed_time": "0:06:44", "remaining_time": "0:10:07", "throughput": 3353.54, "total_tokens": 1355416}
{"current_steps": 3304, "total_steps": 8260, "eval_loss": 0.19647635519504547, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:06:49", "remaining_time": "0:10:13", "throughput": 3314.73, "total_tokens": 1356744}
{"current_steps": 3305, "total_steps": 8260, "loss": 0.2059, "lr": 3.7500000000000003e-05, "epoch": 4.001210653753026, "percentage": 40.01, "elapsed_time": "0:06:50", "remaining_time": "0:10:15", "throughput": 3305.88, "total_tokens": 1357192}
{"current_steps": 3310, "total_steps": 8260, "loss": 0.1767, "lr": 3.7454224566868327e-05, "epoch": 4.00726392251816, "percentage": 40.07, "elapsed_time": "0:06:51", "remaining_time": "0:10:14", "throughput": 3306.49, "total_tokens": 1359272}
{"current_steps": 3315, "total_steps": 8260, "loss": 0.1993, "lr": 3.7408393529078985e-05, "epoch": 4.013317191283293, "percentage": 40.13, "elapsed_time": "0:06:51", "remaining_time": "0:10:14", "throughput": 3307.11, "total_tokens": 1361480}
{"current_steps": 3320, "total_steps": 8260, "loss": 0.1972, "lr": 3.7362507091254836e-05, "epoch": 4.019370460048426, "percentage": 40.19, "elapsed_time": "0:06:52", "remaining_time": "0:10:13", "throughput": 3307.59, "total_tokens": 1363560}
{"current_steps": 3325, "total_steps": 8260, "loss": 0.2433, "lr": 3.7316565458266114e-05, "epoch": 4.02542372881356, "percentage": 40.25, "elapsed_time": "0:06:52", "remaining_time": "0:10:12", "throughput": 3308.06, "total_tokens": 1365640}
{"current_steps": 3330, "total_steps": 8260, "loss": 0.1593, "lr": 3.727056883522945e-05, "epoch": 4.031476997578692, "percentage": 40.31, "elapsed_time": "0:06:53", "remaining_time": "0:10:11", "throughput": 3308.68, "total_tokens": 1367720}
{"current_steps": 3335, "total_steps": 8260, "loss": 0.1921, "lr": 3.722451742750701e-05, "epoch": 4.0375302663438255, "percentage": 40.38, "elapsed_time": "0:06:53", "remaining_time": "0:10:11", "throughput": 3309.29, "total_tokens": 1369704}
{"current_steps": 3340, "total_steps": 8260, "loss": 0.1749, "lr": 3.717841144070556e-05, "epoch": 4.043583535108959, "percentage": 40.44, "elapsed_time": "0:06:54", "remaining_time": "0:10:10", "throughput": 3309.79, "total_tokens": 1371816}
{"current_steps": 3345, "total_steps": 8260, "loss": 0.1805, "lr": 3.713225108067553e-05, "epoch": 4.049636803874092, "percentage": 40.5, "elapsed_time": "0:06:55", "remaining_time": "0:10:09", "throughput": 3310.04, "total_tokens": 1373800}
{"current_steps": 3350, "total_steps": 8260, "loss": 0.1596, "lr": 3.708603655351012e-05, "epoch": 4.0556900726392255, "percentage": 40.56, "elapsed_time": "0:06:55", "remaining_time": "0:10:09", "throughput": 3310.6, "total_tokens": 1375752}
{"current_steps": 3355, "total_steps": 8260, "loss": 0.2301, "lr": 3.7039768065544395e-05, "epoch": 4.061743341404358, "percentage": 40.62, "elapsed_time": "0:06:56", "remaining_time": "0:10:08", "throughput": 3310.98, "total_tokens": 1377896}
{"current_steps": 3360, "total_steps": 8260, "loss": 0.2725, "lr": 3.69934458233543e-05, "epoch": 4.067796610169491, "percentage": 40.68, "elapsed_time": "0:06:56", "remaining_time": "0:10:07", "throughput": 3311.48, "total_tokens": 1379912}
{"current_steps": 3365, "total_steps": 8260, "loss": 0.231, "lr": 3.694707003375579e-05, "epoch": 4.073849878934625, "percentage": 40.74, "elapsed_time": "0:06:57", "remaining_time": "0:10:06", "throughput": 3311.88, "total_tokens": 1381960}
{"current_steps": 3370, "total_steps": 8260, "loss": 0.1959, "lr": 3.690064090380392e-05, "epoch": 4.079903147699758, "percentage": 40.8, "elapsed_time": "0:06:57", "remaining_time": "0:10:06", "throughput": 3312.25, "total_tokens": 1384072}
{"current_steps": 3375, "total_steps": 8260, "loss": 0.2056, "lr": 3.685415864079185e-05, "epoch": 4.085956416464891, "percentage": 40.86, "elapsed_time": "0:06:58", "remaining_time": "0:10:05", "throughput": 3312.89, "total_tokens": 1386152}
{"current_steps": 3380, "total_steps": 8260, "loss": 0.2108, "lr": 3.680762345225001e-05, "epoch": 4.092009685230024, "percentage": 40.92, "elapsed_time": "0:06:58", "remaining_time": "0:10:04", "throughput": 3313.4, "total_tokens": 1388168}
{"current_steps": 3385, "total_steps": 8260, "loss": 0.2236, "lr": 3.676103554594511e-05, "epoch": 4.098062953995157, "percentage": 40.98, "elapsed_time": "0:06:59", "remaining_time": "0:10:04", "throughput": 3314.01, "total_tokens": 1390152}
{"current_steps": 3390, "total_steps": 8260, "loss": 0.1699, "lr": 3.671439512987921e-05, "epoch": 4.1041162227602905, "percentage": 41.04, "elapsed_time": "0:07:00", "remaining_time": "0:10:03", "throughput": 3314.66, "total_tokens": 1392168}
{"current_steps": 3395, "total_steps": 8260, "loss": 0.2018, "lr": 3.666770241228883e-05, "epoch": 4.110169491525424, "percentage": 41.1, "elapsed_time": "0:07:00", "remaining_time": "0:10:02", "throughput": 3315.17, "total_tokens": 1394376}
{"current_steps": 3400, "total_steps": 8260, "loss": 0.1659, "lr": 3.6620957601644016e-05, "epoch": 4.116222760290557, "percentage": 41.16, "elapsed_time": "0:07:01", "remaining_time": "0:10:02", "throughput": 3315.74, "total_tokens": 1396520}
{"current_steps": 3405, "total_steps": 8260, "loss": 0.2209, "lr": 3.657416090664737e-05, "epoch": 4.12227602905569, "percentage": 41.22, "elapsed_time": "0:07:01", "remaining_time": "0:10:01", "throughput": 3316.33, "total_tokens": 1398600}
{"current_steps": 3410, "total_steps": 8260, "loss": 0.2055, "lr": 3.652731253623315e-05, "epoch": 4.128329297820823, "percentage": 41.28, "elapsed_time": "0:07:02", "remaining_time": "0:10:00", "throughput": 3316.9, "total_tokens": 1400584}
{"current_steps": 3415, "total_steps": 8260, "loss": 0.2078, "lr": 3.648041269956634e-05, "epoch": 4.134382566585956, "percentage": 41.34, "elapsed_time": "0:07:02", "remaining_time": "0:09:59", "throughput": 3317.53, "total_tokens": 1402760}
{"current_steps": 3420, "total_steps": 8260, "loss": 0.2204, "lr": 3.6433461606041695e-05, "epoch": 4.14043583535109, "percentage": 41.4, "elapsed_time": "0:07:03", "remaining_time": "0:09:59", "throughput": 3318.31, "total_tokens": 1404936}
{"current_steps": 3425, "total_steps": 8260, "loss": 0.1664, "lr": 3.6386459465282824e-05, "epoch": 4.146489104116223, "percentage": 41.46, "elapsed_time": "0:07:03", "remaining_time": "0:09:58", "throughput": 3318.87, "total_tokens": 1406920}
{"current_steps": 3430, "total_steps": 8260, "loss": 0.2006, "lr": 3.6339406487141255e-05, "epoch": 4.1525423728813555, "percentage": 41.53, "elapsed_time": "0:07:04", "remaining_time": "0:09:57", "throughput": 3319.33, "total_tokens": 1408840}
{"current_steps": 3435, "total_steps": 8260, "loss": 0.1948, "lr": 3.6292302881695464e-05, "epoch": 4.158595641646489, "percentage": 41.59, "elapsed_time": "0:07:04", "remaining_time": "0:09:56", "throughput": 3319.82, "total_tokens": 1410696}
{"current_steps": 3440, "total_steps": 8260, "loss": 0.1636, "lr": 3.6245148859249996e-05, "epoch": 4.164648910411622, "percentage": 41.65, "elapsed_time": "0:07:05", "remaining_time": "0:09:56", "throughput": 3320.39, "total_tokens": 1412680}
{"current_steps": 3445, "total_steps": 8260, "loss": 0.2027, "lr": 3.619794463033447e-05, "epoch": 4.170702179176756, "percentage": 41.71, "elapsed_time": "0:07:06", "remaining_time": "0:09:55", "throughput": 3320.77, "total_tokens": 1414728}
{"current_steps": 3450, "total_steps": 8260, "loss": 0.1466, "lr": 3.6150690405702685e-05, "epoch": 4.176755447941889, "percentage": 41.77, "elapsed_time": "0:07:06", "remaining_time": "0:09:54", "throughput": 3321.34, "total_tokens": 1416712}
{"current_steps": 3455, "total_steps": 8260, "loss": 0.2497, "lr": 3.6103386396331635e-05, "epoch": 4.182808716707021, "percentage": 41.83, "elapsed_time": "0:07:07", "remaining_time": "0:09:54", "throughput": 3321.88, "total_tokens": 1418952}
{"current_steps": 3460, "total_steps": 8260, "loss": 0.1785, "lr": 3.605603281342061e-05, "epoch": 4.188861985472155, "percentage": 41.89, "elapsed_time": "0:07:07", "remaining_time": "0:09:53", "throughput": 3322.45, "total_tokens": 1421032}
{"current_steps": 3465, "total_steps": 8260, "loss": 0.188, "lr": 3.6008629868390204e-05, "epoch": 4.194915254237288, "percentage": 41.95, "elapsed_time": "0:07:08", "remaining_time": "0:09:52", "throughput": 3323.09, "total_tokens": 1423048}
{"current_steps": 3470, "total_steps": 8260, "loss": 0.2429, "lr": 3.5961177772881434e-05, "epoch": 4.200968523002421, "percentage": 42.01, "elapsed_time": "0:07:08", "remaining_time": "0:09:51", "throughput": 3323.63, "total_tokens": 1425192}
{"current_steps": 3475, "total_steps": 8260, "loss": 0.1919, "lr": 3.591367673875472e-05, "epoch": 4.207021791767555, "percentage": 42.07, "elapsed_time": "0:07:09", "remaining_time": "0:09:51", "throughput": 3324.29, "total_tokens": 1427304}
{"current_steps": 3480, "total_steps": 8260, "loss": 0.1988, "lr": 3.5866126978089025e-05, "epoch": 4.213075060532688, "percentage": 42.13, "elapsed_time": "0:07:09", "remaining_time": "0:09:50", "throughput": 3324.86, "total_tokens": 1429288}
{"current_steps": 3485, "total_steps": 8260, "loss": 0.1889, "lr": 3.5818528703180826e-05, "epoch": 4.219128329297821, "percentage": 42.19, "elapsed_time": "0:07:10", "remaining_time": "0:09:49", "throughput": 3325.19, "total_tokens": 1431400}
{"current_steps": 3490, "total_steps": 8260, "loss": 0.2126, "lr": 3.577088212654322e-05, "epoch": 4.225181598062954, "percentage": 42.25, "elapsed_time": "0:07:11", "remaining_time": "0:09:49", "throughput": 3325.8, "total_tokens": 1433576}
{"current_steps": 3495, "total_steps": 8260, "loss": 0.1897, "lr": 3.572318746090496e-05, "epoch": 4.231234866828087, "percentage": 42.31, "elapsed_time": "0:07:11", "remaining_time": "0:09:48", "throughput": 3326.35, "total_tokens": 1435560}
{"current_steps": 3500, "total_steps": 8260, "loss": 0.202, "lr": 3.5675444919209486e-05, "epoch": 4.237288135593221, "percentage": 42.37, "elapsed_time": "0:07:12", "remaining_time": "0:09:47", "throughput": 3327.0, "total_tokens": 1437672}
{"current_steps": 3505, "total_steps": 8260, "loss": 0.2192, "lr": 3.5627654714614e-05, "epoch": 4.243341404358354, "percentage": 42.43, "elapsed_time": "0:07:12", "remaining_time": "0:09:46", "throughput": 3327.4, "total_tokens": 1439656}
{"current_steps": 3510, "total_steps": 8260, "loss": 0.1864, "lr": 3.557981706048852e-05, "epoch": 4.249394673123486, "percentage": 42.49, "elapsed_time": "0:07:13", "remaining_time": "0:09:46", "throughput": 3327.88, "total_tokens": 1441608}
{"current_steps": 3515, "total_steps": 8260, "loss": 0.1622, "lr": 3.5531932170414896e-05, "epoch": 4.25544794188862, "percentage": 42.55, "elapsed_time": "0:07:13", "remaining_time": "0:09:45", "throughput": 3328.33, "total_tokens": 1443624}
{"current_steps": 3520, "total_steps": 8260, "loss": 0.222, "lr": 3.5484000258185876e-05, "epoch": 4.261501210653753, "percentage": 42.62, "elapsed_time": "0:07:14", "remaining_time": "0:09:44", "throughput": 3329.12, "total_tokens": 1445736}
{"current_steps": 3525, "total_steps": 8260, "loss": 0.1907, "lr": 3.5436021537804144e-05, "epoch": 4.267554479418886, "percentage": 42.68, "elapsed_time": "0:07:14", "remaining_time": "0:09:44", "throughput": 3329.51, "total_tokens": 1447880}
{"current_steps": 3530, "total_steps": 8260, "loss": 0.1882, "lr": 3.538799622348139e-05, "epoch": 4.27360774818402, "percentage": 42.74, "elapsed_time": "0:07:15", "remaining_time": "0:09:43", "throughput": 3330.09, "total_tokens": 1449896}
{"current_steps": 3535, "total_steps": 8260, "loss": 0.1605, "lr": 3.5339924529637304e-05, "epoch": 4.279661016949152, "percentage": 42.8, "elapsed_time": "0:07:15", "remaining_time": "0:09:42", "throughput": 3330.66, "total_tokens": 1451880}
{"current_steps": 3540, "total_steps": 8260, "loss": 0.1536, "lr": 3.529180667089868e-05, "epoch": 4.285714285714286, "percentage": 42.86, "elapsed_time": "0:07:16", "remaining_time": "0:09:41", "throughput": 3331.3, "total_tokens": 1453992}
{"current_steps": 3545, "total_steps": 8260, "loss": 0.2359, "lr": 3.52436428620984e-05, "epoch": 4.291767554479419, "percentage": 42.92, "elapsed_time": "0:07:16", "remaining_time": "0:09:41", "throughput": 3331.9, "total_tokens": 1455912}
{"current_steps": 3550, "total_steps": 8260, "loss": 0.1858, "lr": 3.5195433318274516e-05, "epoch": 4.297820823244552, "percentage": 42.98, "elapsed_time": "0:07:17", "remaining_time": "0:09:40", "throughput": 3332.35, "total_tokens": 1458024}
{"current_steps": 3555, "total_steps": 8260, "loss": 0.2425, "lr": 3.514717825466925e-05, "epoch": 4.303874092009686, "percentage": 43.04, "elapsed_time": "0:07:18", "remaining_time": "0:09:39", "throughput": 3332.83, "total_tokens": 1459976}
{"current_steps": 3560, "total_steps": 8260, "loss": 0.1324, "lr": 3.509887788672809e-05, "epoch": 4.309927360774818, "percentage": 43.1, "elapsed_time": "0:07:18", "remaining_time": "0:09:39", "throughput": 3333.37, "total_tokens": 1462120}
{"current_steps": 3565, "total_steps": 8260, "loss": 0.182, "lr": 3.5050532430098774e-05, "epoch": 4.315980629539951, "percentage": 43.16, "elapsed_time": "0:07:19", "remaining_time": "0:09:38", "throughput": 3333.74, "total_tokens": 1464104}
{"current_steps": 3570, "total_steps": 8260, "loss": 0.1687, "lr": 3.500214210063035e-05, "epoch": 4.322033898305085, "percentage": 43.22, "elapsed_time": "0:07:19", "remaining_time": "0:09:37", "throughput": 3334.21, "total_tokens": 1466216}
{"current_steps": 3575, "total_steps": 8260, "loss": 0.1743, "lr": 3.495370711437221e-05, "epoch": 4.328087167070218, "percentage": 43.28, "elapsed_time": "0:07:20", "remaining_time": "0:09:37", "throughput": 3334.7, "total_tokens": 1468264}
{"current_steps": 3580, "total_steps": 8260, "loss": 0.2128, "lr": 3.490522768757316e-05, "epoch": 4.3341404358353515, "percentage": 43.34, "elapsed_time": "0:07:20", "remaining_time": "0:09:36", "throughput": 3335.21, "total_tokens": 1470408}
{"current_steps": 3585, "total_steps": 8260, "loss": 0.1284, "lr": 3.485670403668036e-05, "epoch": 4.340193704600484, "percentage": 43.4, "elapsed_time": "0:07:21", "remaining_time": "0:09:35", "throughput": 3335.76, "total_tokens": 1472392}
{"current_steps": 3590, "total_steps": 8260, "loss": 0.229, "lr": 3.480813637833846e-05, "epoch": 4.346246973365617, "percentage": 43.46, "elapsed_time": "0:07:21", "remaining_time": "0:09:34", "throughput": 3336.38, "total_tokens": 1474504}
{"current_steps": 3595, "total_steps": 8260, "loss": 0.2352, "lr": 3.475952492938859e-05, "epoch": 4.352300242130751, "percentage": 43.52, "elapsed_time": "0:07:22", "remaining_time": "0:09:34", "throughput": 3336.83, "total_tokens": 1476616}
{"current_steps": 3600, "total_steps": 8260, "loss": 0.1646, "lr": 3.471086990686737e-05, "epoch": 4.358353510895884, "percentage": 43.58, "elapsed_time": "0:07:23", "remaining_time": "0:09:33", "throughput": 3337.33, "total_tokens": 1478664}
{"current_steps": 3605, "total_steps": 8260, "loss": 0.2157, "lr": 3.466217152800598e-05, "epoch": 4.364406779661017, "percentage": 43.64, "elapsed_time": "0:07:23", "remaining_time": "0:09:32", "throughput": 3337.87, "total_tokens": 1480648}
{"current_steps": 3610, "total_steps": 8260, "loss": 0.1768, "lr": 3.461343001022919e-05, "epoch": 4.37046004842615, "percentage": 43.7, "elapsed_time": "0:07:24", "remaining_time": "0:09:32", "throughput": 3338.33, "total_tokens": 1482760}
{"current_steps": 3615, "total_steps": 8260, "loss": 0.186, "lr": 3.456464557115433e-05, "epoch": 4.376513317191283, "percentage": 43.77, "elapsed_time": "0:07:24", "remaining_time": "0:09:31", "throughput": 3338.71, "total_tokens": 1484744}
{"current_steps": 3620, "total_steps": 8260, "loss": 0.1555, "lr": 3.45158184285904e-05, "epoch": 4.3825665859564165, "percentage": 43.83, "elapsed_time": "0:07:25", "remaining_time": "0:09:30", "throughput": 3339.22, "total_tokens": 1486728}
{"current_steps": 3625, "total_steps": 8260, "loss": 0.2291, "lr": 3.446694880053704e-05, "epoch": 4.38861985472155, "percentage": 43.89, "elapsed_time": "0:07:25", "remaining_time": "0:09:29", "throughput": 3339.77, "total_tokens": 1488808}
{"current_steps": 3630, "total_steps": 8260, "loss": 0.1844, "lr": 3.441803690518359e-05, "epoch": 4.394673123486683, "percentage": 43.95, "elapsed_time": "0:07:26", "remaining_time": "0:09:29", "throughput": 3340.44, "total_tokens": 1490984}
{"current_steps": 3635, "total_steps": 8260, "loss": 0.217, "lr": 3.4369082960908084e-05, "epoch": 4.400726392251816, "percentage": 44.01, "elapsed_time": "0:07:26", "remaining_time": "0:09:28", "throughput": 3340.83, "total_tokens": 1493000}
{"current_steps": 3640, "total_steps": 8260, "loss": 0.2349, "lr": 3.432008718627631e-05, "epoch": 4.406779661016949, "percentage": 44.07, "elapsed_time": "0:07:27", "remaining_time": "0:09:27", "throughput": 3341.21, "total_tokens": 1494920}
{"current_steps": 3645, "total_steps": 8260, "loss": 0.208, "lr": 3.4271049800040805e-05, "epoch": 4.412832929782082, "percentage": 44.13, "elapsed_time": "0:07:27", "remaining_time": "0:09:27", "throughput": 3341.57, "total_tokens": 1496904}
{"current_steps": 3650, "total_steps": 8260, "loss": 0.1765, "lr": 3.42219710211399e-05, "epoch": 4.418886198547216, "percentage": 44.19, "elapsed_time": "0:07:28", "remaining_time": "0:09:26", "throughput": 3341.77, "total_tokens": 1498792}
{"current_steps": 3655, "total_steps": 8260, "loss": 0.1913, "lr": 3.417285106869673e-05, "epoch": 4.424939467312349, "percentage": 44.25, "elapsed_time": "0:07:29", "remaining_time": "0:09:25", "throughput": 3342.41, "total_tokens": 1500840}
{"current_steps": 3660, "total_steps": 8260, "loss": 0.2011, "lr": 3.4123690162018246e-05, "epoch": 4.4309927360774815, "percentage": 44.31, "elapsed_time": "0:07:29", "remaining_time": "0:09:25", "throughput": 3342.87, "total_tokens": 1502888}
{"current_steps": 3665, "total_steps": 8260, "loss": 0.2332, "lr": 3.407448852059426e-05, "epoch": 4.437046004842615, "percentage": 44.37, "elapsed_time": "0:07:30", "remaining_time": "0:09:24", "throughput": 3343.46, "total_tokens": 1504904}
{"current_steps": 3670, "total_steps": 8260, "loss": 0.1929, "lr": 3.4025246364096455e-05, "epoch": 4.443099273607748, "percentage": 44.43, "elapsed_time": "0:07:30", "remaining_time": "0:09:23", "throughput": 3344.02, "total_tokens": 1506824}
{"current_steps": 3675, "total_steps": 8260, "loss": 0.2259, "lr": 3.397596391237739e-05, "epoch": 4.4491525423728815, "percentage": 44.49, "elapsed_time": "0:07:31", "remaining_time": "0:09:22", "throughput": 3344.63, "total_tokens": 1508872}
{"current_steps": 3680, "total_steps": 8260, "loss": 0.2245, "lr": 3.3926641385469556e-05, "epoch": 4.455205811138015, "percentage": 44.55, "elapsed_time": "0:07:31", "remaining_time": "0:09:22", "throughput": 3345.07, "total_tokens": 1510824}
{"current_steps": 3685, "total_steps": 8260, "loss": 0.1907, "lr": 3.387727900358435e-05, "epoch": 4.461259079903147, "percentage": 44.61, "elapsed_time": "0:07:32", "remaining_time": "0:09:21", "throughput": 3345.72, "total_tokens": 1512968}
{"current_steps": 3690, "total_steps": 8260, "loss": 0.2102, "lr": 3.38278769871111e-05, "epoch": 4.467312348668281, "percentage": 44.67, "elapsed_time": "0:07:32", "remaining_time": "0:09:20", "throughput": 3346.12, "total_tokens": 1515144}
{"current_steps": 3695, "total_steps": 8260, "loss": 0.1936, "lr": 3.377843555661612e-05, "epoch": 4.473365617433414, "percentage": 44.73, "elapsed_time": "0:07:33", "remaining_time": "0:09:20", "throughput": 3346.6, "total_tokens": 1517192}
{"current_steps": 3700, "total_steps": 8260, "loss": 0.1758, "lr": 3.372895493284167e-05, "epoch": 4.479418886198547, "percentage": 44.79, "elapsed_time": "0:07:33", "remaining_time": "0:09:19", "throughput": 3347.06, "total_tokens": 1519400}
{"current_steps": 3705, "total_steps": 8260, "loss": 0.1447, "lr": 3.367943533670501e-05, "epoch": 4.485472154963681, "percentage": 44.85, "elapsed_time": "0:07:34", "remaining_time": "0:09:18", "throughput": 3347.65, "total_tokens": 1521416}
{"current_steps": 3710, "total_steps": 8260, "loss": 0.2027, "lr": 3.3629876989297405e-05, "epoch": 4.491525423728813, "percentage": 44.92, "elapsed_time": "0:07:34", "remaining_time": "0:09:17", "throughput": 3348.04, "total_tokens": 1523240}
{"current_steps": 3715, "total_steps": 8260, "loss": 0.164, "lr": 3.3580280111883125e-05, "epoch": 4.4975786924939465, "percentage": 44.98, "elapsed_time": "0:07:35", "remaining_time": "0:09:17", "throughput": 3348.33, "total_tokens": 1525288}
{"current_steps": 3717, "total_steps": 8260, "eval_loss": 0.1990930587053299, "epoch": 4.5, "percentage": 45.0, "elapsed_time": "0:07:40", "remaining_time": "0:09:22", "throughput": 3314.71, "total_tokens": 1526088}
{"current_steps": 3720, "total_steps": 8260, "loss": 0.1816, "lr": 3.3530644925898465e-05, "epoch": 4.50363196125908, "percentage": 45.04, "elapsed_time": "0:07:41", "remaining_time": "0:09:23", "throughput": 3308.53, "total_tokens": 1527304}
{"current_steps": 3725, "total_steps": 8260, "loss": 0.2076, "lr": 3.348097165295076e-05, "epoch": 4.509685230024213, "percentage": 45.1, "elapsed_time": "0:07:42", "remaining_time": "0:09:22", "throughput": 3308.97, "total_tokens": 1529384}
{"current_steps": 3730, "total_steps": 8260, "loss": 0.1885, "lr": 3.34312605148174e-05, "epoch": 4.5157384987893465, "percentage": 45.16, "elapsed_time": "0:07:42", "remaining_time": "0:09:22", "throughput": 3309.35, "total_tokens": 1531464}
{"current_steps": 3735, "total_steps": 8260, "loss": 0.1685, "lr": 3.338151173344483e-05, "epoch": 4.521791767554479, "percentage": 45.22, "elapsed_time": "0:07:43", "remaining_time": "0:09:21", "throughput": 3309.74, "total_tokens": 1533608}
{"current_steps": 3740, "total_steps": 8260, "loss": 0.198, "lr": 3.333172553094754e-05, "epoch": 4.527845036319612, "percentage": 45.28, "elapsed_time": "0:07:43", "remaining_time": "0:09:20", "throughput": 3310.38, "total_tokens": 1535656}
{"current_steps": 3745, "total_steps": 8260, "loss": 0.1896, "lr": 3.328190212960712e-05, "epoch": 4.533898305084746, "percentage": 45.34, "elapsed_time": "0:07:44", "remaining_time": "0:09:19", "throughput": 3310.78, "total_tokens": 1537640}
{"current_steps": 3750, "total_steps": 8260, "loss": 0.2125, "lr": 3.323204175187125e-05, "epoch": 4.539951573849879, "percentage": 45.4, "elapsed_time": "0:07:44", "remaining_time": "0:09:19", "throughput": 3311.39, "total_tokens": 1539592}
{"current_steps": 3755, "total_steps": 8260, "loss": 0.1875, "lr": 3.318214462035266e-05, "epoch": 4.546004842615012, "percentage": 45.46, "elapsed_time": "0:07:45", "remaining_time": "0:09:18", "throughput": 3311.92, "total_tokens": 1541576}
{"current_steps": 3760, "total_steps": 8260, "loss": 0.2181, "lr": 3.3132210957828226e-05, "epoch": 4.552058111380145, "percentage": 45.52, "elapsed_time": "0:07:45", "remaining_time": "0:09:17", "throughput": 3312.57, "total_tokens": 1543464}
{"current_steps": 3765, "total_steps": 8260, "loss": 0.1892, "lr": 3.3082240987237875e-05, "epoch": 4.558111380145278, "percentage": 45.58, "elapsed_time": "0:07:46", "remaining_time": "0:09:16", "throughput": 3312.87, "total_tokens": 1545416}
{"current_steps": 3770, "total_steps": 8260, "loss": 0.2365, "lr": 3.3032234931683684e-05, "epoch": 4.5641646489104115, "percentage": 45.64, "elapsed_time": "0:07:47", "remaining_time": "0:09:16", "throughput": 3313.31, "total_tokens": 1547432}
{"current_steps": 3775, "total_steps": 8260, "loss": 0.2051, "lr": 3.2982193014428805e-05, "epoch": 4.570217917675545, "percentage": 45.7, "elapsed_time": "0:07:47", "remaining_time": "0:09:15", "throughput": 3313.82, "total_tokens": 1549576}
{"current_steps": 3780, "total_steps": 8260, "loss": 0.2103, "lr": 3.2932115458896515e-05, "epoch": 4.576271186440678, "percentage": 45.76, "elapsed_time": "0:07:48", "remaining_time": "0:09:14", "throughput": 3314.28, "total_tokens": 1551688}
{"current_steps": 3785, "total_steps": 8260, "loss": 0.1843, "lr": 3.2882002488669204e-05, "epoch": 4.582324455205811, "percentage": 45.82, "elapsed_time": "0:07:48", "remaining_time": "0:09:14", "throughput": 3314.82, "total_tokens": 1553672}
{"current_steps": 3790, "total_steps": 8260, "loss": 0.1787, "lr": 3.28318543274874e-05, "epoch": 4.588377723970944, "percentage": 45.88, "elapsed_time": "0:07:49", "remaining_time": "0:09:13", "throughput": 3315.27, "total_tokens": 1555720}
{"current_steps": 3795, "total_steps": 8260, "loss": 0.1915, "lr": 3.278167119924872e-05, "epoch": 4.594430992736077, "percentage": 45.94, "elapsed_time": "0:07:49", "remaining_time": "0:09:12", "throughput": 3315.73, "total_tokens": 1557672}
{"current_steps": 3800, "total_steps": 8260, "loss": 0.172, "lr": 3.27314533280069e-05, "epoch": 4.600484261501211, "percentage": 46.0, "elapsed_time": "0:07:50", "remaining_time": "0:09:12", "throughput": 3316.36, "total_tokens": 1559880}
{"current_steps": 3805, "total_steps": 8260, "loss": 0.2445, "lr": 3.268120093797082e-05, "epoch": 4.606537530266344, "percentage": 46.07, "elapsed_time": "0:07:50", "remaining_time": "0:09:11", "throughput": 3316.9, "total_tokens": 1561960}
{"current_steps": 3810, "total_steps": 8260, "loss": 0.1731, "lr": 3.263091425350345e-05, "epoch": 4.6125907990314765, "percentage": 46.13, "elapsed_time": "0:07:51", "remaining_time": "0:09:10", "throughput": 3317.45, "total_tokens": 1563880}
{"current_steps": 3815, "total_steps": 8260, "loss": 0.1948, "lr": 3.258059349912089e-05, "epoch": 4.61864406779661, "percentage": 46.19, "elapsed_time": "0:07:51", "remaining_time": "0:09:09", "throughput": 3317.87, "total_tokens": 1565896}
{"current_steps": 3820, "total_steps": 8260, "loss": 0.2373, "lr": 3.253023889949135e-05, "epoch": 4.624697336561743, "percentage": 46.25, "elapsed_time": "0:07:52", "remaining_time": "0:09:09", "throughput": 3318.35, "total_tokens": 1568040}
{"current_steps": 3825, "total_steps": 8260, "loss": 0.1731, "lr": 3.247985067943414e-05, "epoch": 4.6307506053268765, "percentage": 46.31, "elapsed_time": "0:07:53", "remaining_time": "0:09:08", "throughput": 3318.94, "total_tokens": 1570056}
{"current_steps": 3830, "total_steps": 8260, "loss": 0.1759, "lr": 3.2429429063918696e-05, "epoch": 4.63680387409201, "percentage": 46.37, "elapsed_time": "0:07:53", "remaining_time": "0:09:07", "throughput": 3319.37, "total_tokens": 1572168}
{"current_steps": 3835, "total_steps": 8260, "loss": 0.3003, "lr": 3.2378974278063534e-05, "epoch": 4.642857142857143, "percentage": 46.43, "elapsed_time": "0:07:54", "remaining_time": "0:09:07", "throughput": 3319.85, "total_tokens": 1574216}
{"current_steps": 3840, "total_steps": 8260, "loss": 0.1835, "lr": 3.232848654713528e-05, "epoch": 4.648910411622276, "percentage": 46.49, "elapsed_time": "0:07:54", "remaining_time": "0:09:06", "throughput": 3320.47, "total_tokens": 1576168}
{"current_steps": 3845, "total_steps": 8260, "loss": 0.1647, "lr": 3.227796609654765e-05, "epoch": 4.654963680387409, "percentage": 46.55, "elapsed_time": "0:07:55", "remaining_time": "0:09:05", "throughput": 3320.98, "total_tokens": 1578152}
{"current_steps": 3850, "total_steps": 8260, "loss": 0.2044, "lr": 3.222741315186043e-05, "epoch": 4.661016949152542, "percentage": 46.61, "elapsed_time": "0:07:55", "remaining_time": "0:09:04", "throughput": 3321.29, "total_tokens": 1580104}
{"current_steps": 3855, "total_steps": 8260, "loss": 0.2507, "lr": 3.217682793877851e-05, "epoch": 4.667070217917676, "percentage": 46.67, "elapsed_time": "0:07:56", "remaining_time": "0:09:04", "throughput": 3321.89, "total_tokens": 1582056}
{"current_steps": 3860, "total_steps": 8260, "loss": 0.1498, "lr": 3.212621068315081e-05, "epoch": 4.673123486682809, "percentage": 46.73, "elapsed_time": "0:07:56", "remaining_time": "0:09:03", "throughput": 3322.28, "total_tokens": 1584136}
{"current_steps": 3865, "total_steps": 8260, "loss": 0.1818, "lr": 3.207556161096935e-05, "epoch": 4.6791767554479415, "percentage": 46.79, "elapsed_time": "0:07:57", "remaining_time": "0:09:02", "throughput": 3322.77, "total_tokens": 1586184}
{"current_steps": 3870, "total_steps": 8260, "loss": 0.2087, "lr": 3.202488094836819e-05, "epoch": 4.685230024213075, "percentage": 46.85, "elapsed_time": "0:07:57", "remaining_time": "0:09:02", "throughput": 3323.2, "total_tokens": 1588296}
{"current_steps": 3875, "total_steps": 8260, "loss": 0.1661, "lr": 3.197416892162242e-05, "epoch": 4.691283292978208, "percentage": 46.91, "elapsed_time": "0:07:58", "remaining_time": "0:09:01", "throughput": 3323.63, "total_tokens": 1590504}
{"current_steps": 3880, "total_steps": 8260, "loss": 0.1799, "lr": 3.1923425757147175e-05, "epoch": 4.697336561743342, "percentage": 46.97, "elapsed_time": "0:07:59", "remaining_time": "0:09:00", "throughput": 3324.03, "total_tokens": 1592584}
{"current_steps": 3885, "total_steps": 8260, "loss": 0.2737, "lr": 3.1872651681496604e-05, "epoch": 4.703389830508475, "percentage": 47.03, "elapsed_time": "0:07:59", "remaining_time": "0:09:00", "throughput": 3324.64, "total_tokens": 1594728}
{"current_steps": 3890, "total_steps": 8260, "loss": 0.2093, "lr": 3.182184692136287e-05, "epoch": 4.709443099273607, "percentage": 47.09, "elapsed_time": "0:08:00", "remaining_time": "0:08:59", "throughput": 3325.1, "total_tokens": 1596776}
{"current_steps": 3895, "total_steps": 8260, "loss": 0.1865, "lr": 3.177101170357513e-05, "epoch": 4.715496368038741, "percentage": 47.15, "elapsed_time": "0:08:00", "remaining_time": "0:08:58", "throughput": 3325.64, "total_tokens": 1598984}
{"current_steps": 3900, "total_steps": 8260, "loss": 0.1883, "lr": 3.1720146255098535e-05, "epoch": 4.721549636803874, "percentage": 47.22, "elapsed_time": "0:08:01", "remaining_time": "0:08:58", "throughput": 3325.93, "total_tokens": 1601096}
{"current_steps": 3905, "total_steps": 8260, "loss": 0.1948, "lr": 3.16692508030332e-05, "epoch": 4.727602905569007, "percentage": 47.28, "elapsed_time": "0:08:01", "remaining_time": "0:08:57", "throughput": 3326.56, "total_tokens": 1603336}
{"current_steps": 3910, "total_steps": 8260, "loss": 0.1765, "lr": 3.16183255746132e-05, "epoch": 4.733656174334141, "percentage": 47.34, "elapsed_time": "0:08:02", "remaining_time": "0:08:56", "throughput": 3327.06, "total_tokens": 1605320}
{"current_steps": 3915, "total_steps": 8260, "loss": 0.2056, "lr": 3.156737079720555e-05, "epoch": 4.739709443099273, "percentage": 47.4, "elapsed_time": "0:08:03", "remaining_time": "0:08:56", "throughput": 3327.57, "total_tokens": 1607304}
{"current_steps": 3920, "total_steps": 8260, "loss": 0.2443, "lr": 3.151638669830919e-05, "epoch": 4.745762711864407, "percentage": 47.46, "elapsed_time": "0:08:03", "remaining_time": "0:08:55", "throughput": 3327.95, "total_tokens": 1609384}
{"current_steps": 3925, "total_steps": 8260, "loss": 0.1949, "lr": 3.1465373505554e-05, "epoch": 4.75181598062954, "percentage": 47.52, "elapsed_time": "0:08:04", "remaining_time": "0:08:54", "throughput": 3328.49, "total_tokens": 1611304}
{"current_steps": 3930, "total_steps": 8260, "loss": 0.2061, "lr": 3.14143314466997e-05, "epoch": 4.757869249394673, "percentage": 47.58, "elapsed_time": "0:08:04", "remaining_time": "0:08:53", "throughput": 3328.82, "total_tokens": 1613192}
{"current_steps": 3935, "total_steps": 8260, "loss": 0.186, "lr": 3.136326074963494e-05, "epoch": 4.763922518159807, "percentage": 47.64, "elapsed_time": "0:08:05", "remaining_time": "0:08:53", "throughput": 3329.37, "total_tokens": 1615304}
{"current_steps": 3940, "total_steps": 8260, "loss": 0.179, "lr": 3.131216164237622e-05, "epoch": 4.76997578692494, "percentage": 47.7, "elapsed_time": "0:08:05", "remaining_time": "0:08:52", "throughput": 3329.83, "total_tokens": 1617288}
{"current_steps": 3945, "total_steps": 8260, "loss": 0.1628, "lr": 3.1261034353066884e-05, "epoch": 4.776029055690072, "percentage": 47.76, "elapsed_time": "0:08:06", "remaining_time": "0:08:51", "throughput": 3330.31, "total_tokens": 1619336}
{"current_steps": 3950, "total_steps": 8260, "loss": 0.1642, "lr": 3.1209879109976064e-05, "epoch": 4.782082324455206, "percentage": 47.82, "elapsed_time": "0:08:06", "remaining_time": "0:08:51", "throughput": 3330.81, "total_tokens": 1621416}
{"current_steps": 3955, "total_steps": 8260, "loss": 0.1444, "lr": 3.115869614149776e-05, "epoch": 4.788135593220339, "percentage": 47.88, "elapsed_time": "0:08:07", "remaining_time": "0:08:50", "throughput": 3331.22, "total_tokens": 1623432}
{"current_steps": 3960, "total_steps": 8260, "loss": 0.2617, "lr": 3.1107485676149714e-05, "epoch": 4.7941888619854724, "percentage": 47.94, "elapsed_time": "0:08:07", "remaining_time": "0:08:49", "throughput": 3331.78, "total_tokens": 1625448}
{"current_steps": 3965, "total_steps": 8260, "loss": 0.219, "lr": 3.105624794257245e-05, "epoch": 4.800242130750606, "percentage": 48.0, "elapsed_time": "0:08:08", "remaining_time": "0:08:49", "throughput": 3332.17, "total_tokens": 1627624}
{"current_steps": 3970, "total_steps": 8260, "loss": 0.1936, "lr": 3.100498316952823e-05, "epoch": 4.806295399515738, "percentage": 48.06, "elapsed_time": "0:08:09", "remaining_time": "0:08:48", "throughput": 3332.54, "total_tokens": 1629800}
{"current_steps": 3975, "total_steps": 8260, "loss": 0.1728, "lr": 3.095369158590006e-05, "epoch": 4.812348668280872, "percentage": 48.12, "elapsed_time": "0:08:09", "remaining_time": "0:08:47", "throughput": 3332.93, "total_tokens": 1631720}
{"current_steps": 3980, "total_steps": 8260, "loss": 0.174, "lr": 3.09023734206906e-05, "epoch": 4.818401937046005, "percentage": 48.18, "elapsed_time": "0:08:10", "remaining_time": "0:08:47", "throughput": 3333.26, "total_tokens": 1633704}
{"current_steps": 3985, "total_steps": 8260, "loss": 0.1985, "lr": 3.085102890302125e-05, "epoch": 4.824455205811138, "percentage": 48.24, "elapsed_time": "0:08:10", "remaining_time": "0:08:46", "throughput": 3333.69, "total_tokens": 1635656}
{"current_steps": 3990, "total_steps": 8260, "loss": 0.2001, "lr": 3.079965826213102e-05, "epoch": 4.830508474576272, "percentage": 48.31, "elapsed_time": "0:08:11", "remaining_time": "0:08:45", "throughput": 3334.04, "total_tokens": 1637736}
{"current_steps": 3995, "total_steps": 8260, "loss": 0.22, "lr": 3.074826172737559e-05, "epoch": 4.836561743341404, "percentage": 48.37, "elapsed_time": "0:08:11", "remaining_time": "0:08:45", "throughput": 3334.24, "total_tokens": 1639816}
{"current_steps": 4000, "total_steps": 8260, "loss": 0.1287, "lr": 3.0696839528226206e-05, "epoch": 4.842615012106537, "percentage": 48.43, "elapsed_time": "0:08:12", "remaining_time": "0:08:44", "throughput": 3334.58, "total_tokens": 1641736}
{"current_steps": 4005, "total_steps": 8260, "loss": 0.2003, "lr": 3.064539189426874e-05, "epoch": 4.848668280871671, "percentage": 48.49, "elapsed_time": "0:08:12", "remaining_time": "0:08:43", "throughput": 3334.95, "total_tokens": 1643656}
{"current_steps": 4010, "total_steps": 8260, "loss": 0.184, "lr": 3.059391905520259e-05, "epoch": 4.854721549636804, "percentage": 48.55, "elapsed_time": "0:08:13", "remaining_time": "0:08:42", "throughput": 3335.45, "total_tokens": 1645736}
{"current_steps": 4015, "total_steps": 8260, "loss": 0.2378, "lr": 3.054242124083972e-05, "epoch": 4.8607748184019375, "percentage": 48.61, "elapsed_time": "0:08:13", "remaining_time": "0:08:42", "throughput": 3335.87, "total_tokens": 1647688}
{"current_steps": 4020, "total_steps": 8260, "loss": 0.2172, "lr": 3.0490898681103575e-05, "epoch": 4.86682808716707, "percentage": 48.67, "elapsed_time": "0:08:14", "remaining_time": "0:08:41", "throughput": 3336.5, "total_tokens": 1649768}
{"current_steps": 4025, "total_steps": 8260, "loss": 0.1981, "lr": 3.0439351606028094e-05, "epoch": 4.872881355932203, "percentage": 48.73, "elapsed_time": "0:08:14", "remaining_time": "0:08:40", "throughput": 3336.87, "total_tokens": 1651688}
{"current_steps": 4030, "total_steps": 8260, "loss": 0.1827, "lr": 3.0387780245756655e-05, "epoch": 4.878934624697337, "percentage": 48.79, "elapsed_time": "0:08:15", "remaining_time": "0:08:40", "throughput": 3337.28, "total_tokens": 1653896}
{"current_steps": 4035, "total_steps": 8260, "loss": 0.1881, "lr": 3.0336184830541093e-05, "epoch": 4.88498789346247, "percentage": 48.85, "elapsed_time": "0:08:16", "remaining_time": "0:08:39", "throughput": 3337.55, "total_tokens": 1656008}
{"current_steps": 4040, "total_steps": 8260, "loss": 0.1832, "lr": 3.028456559074061e-05, "epoch": 4.891041162227603, "percentage": 48.91, "elapsed_time": "0:08:16", "remaining_time": "0:08:38", "throughput": 3338.06, "total_tokens": 1658088}
{"current_steps": 4045, "total_steps": 8260, "loss": 0.1803, "lr": 3.0232922756820804e-05, "epoch": 4.897094430992736, "percentage": 48.97, "elapsed_time": "0:08:17", "remaining_time": "0:08:38", "throughput": 3338.6, "total_tokens": 1660200}
{"current_steps": 4050, "total_steps": 8260, "loss": 0.2514, "lr": 3.0181256559352587e-05, "epoch": 4.903147699757869, "percentage": 49.03, "elapsed_time": "0:08:17", "remaining_time": "0:08:37", "throughput": 3339.13, "total_tokens": 1662440}
{"current_steps": 4055, "total_steps": 8260, "loss": 0.2163, "lr": 3.0129567229011214e-05, "epoch": 4.9092009685230025, "percentage": 49.09, "elapsed_time": "0:08:18", "remaining_time": "0:08:36", "throughput": 3339.52, "total_tokens": 1664552}
{"current_steps": 4060, "total_steps": 8260, "loss": 0.1903, "lr": 3.0077854996575184e-05, "epoch": 4.915254237288136, "percentage": 49.15, "elapsed_time": "0:08:19", "remaining_time": "0:08:36", "throughput": 3339.8, "total_tokens": 1666600}
{"current_steps": 4065, "total_steps": 8260, "loss": 0.1926, "lr": 3.0026120092925293e-05, "epoch": 4.921307506053269, "percentage": 49.21, "elapsed_time": "0:08:19", "remaining_time": "0:08:35", "throughput": 3340.32, "total_tokens": 1668776}
{"current_steps": 4070, "total_steps": 8260, "loss": 0.2174, "lr": 2.9974362749043512e-05, "epoch": 4.927360774818402, "percentage": 49.27, "elapsed_time": "0:08:20", "remaining_time": "0:08:34", "throughput": 3340.67, "total_tokens": 1670952}
{"current_steps": 4075, "total_steps": 8260, "loss": 0.2114, "lr": 2.9922583196012037e-05, "epoch": 4.933414043583535, "percentage": 49.33, "elapsed_time": "0:08:20", "remaining_time": "0:08:34", "throughput": 3341.03, "total_tokens": 1673128}
{"current_steps": 4080, "total_steps": 8260, "loss": 0.2087, "lr": 2.9870781665012204e-05, "epoch": 4.939467312348668, "percentage": 49.39, "elapsed_time": "0:08:21", "remaining_time": "0:08:33", "throughput": 3341.34, "total_tokens": 1675112}
{"current_steps": 4085, "total_steps": 8260, "loss": 0.2063, "lr": 2.981895838732348e-05, "epoch": 4.945520581113802, "percentage": 49.46, "elapsed_time": "0:08:21", "remaining_time": "0:08:32", "throughput": 3341.66, "total_tokens": 1677096}
{"current_steps": 4090, "total_steps": 8260, "loss": 0.1886, "lr": 2.9767113594322426e-05, "epoch": 4.951573849878935, "percentage": 49.52, "elapsed_time": "0:08:22", "remaining_time": "0:08:32", "throughput": 3342.24, "total_tokens": 1679080}
{"current_steps": 4095, "total_steps": 8260, "loss": 0.193, "lr": 2.9715247517481655e-05, "epoch": 4.9576271186440675, "percentage": 49.58, "elapsed_time": "0:08:22", "remaining_time": "0:08:31", "throughput": 3342.73, "total_tokens": 1681000}
{"current_steps": 4100, "total_steps": 8260, "loss": 0.1913, "lr": 2.96633603883688e-05, "epoch": 4.963680387409201, "percentage": 49.64, "elapsed_time": "0:08:23", "remaining_time": "0:08:30", "throughput": 3343.14, "total_tokens": 1683048}
{"current_steps": 4105, "total_steps": 8260, "loss": 0.1875, "lr": 2.961145243864552e-05, "epoch": 4.969733656174334, "percentage": 49.7, "elapsed_time": "0:08:24", "remaining_time": "0:08:30", "throughput": 3343.52, "total_tokens": 1685160}
{"current_steps": 4110, "total_steps": 8260, "loss": 0.1799, "lr": 2.9559523900066395e-05, "epoch": 4.9757869249394675, "percentage": 49.76, "elapsed_time": "0:08:24", "remaining_time": "0:08:29", "throughput": 3343.83, "total_tokens": 1687048}
{"current_steps": 4115, "total_steps": 8260, "loss": 0.2211, "lr": 2.9507575004477955e-05, "epoch": 4.981840193704601, "percentage": 49.82, "elapsed_time": "0:08:25", "remaining_time": "0:08:28", "throughput": 3344.25, "total_tokens": 1689000}
{"current_steps": 4120, "total_steps": 8260, "loss": 0.1979, "lr": 2.9455605983817598e-05, "epoch": 4.987893462469733, "percentage": 49.88, "elapsed_time": "0:08:25", "remaining_time": "0:08:28", "throughput": 3344.65, "total_tokens": 1691112}
{"current_steps": 4125, "total_steps": 8260, "loss": 0.1927, "lr": 2.9403617070112587e-05, "epoch": 4.993946731234867, "percentage": 49.94, "elapsed_time": "0:08:26", "remaining_time": "0:08:27", "throughput": 3345.23, "total_tokens": 1693160}
{"current_steps": 4130, "total_steps": 8260, "loss": 0.1817, "lr": 2.9351608495479004e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:08:26", "remaining_time": "0:08:26", "throughput": 3344.85, "total_tokens": 1694912}
{"current_steps": 4130, "total_steps": 8260, "eval_loss": 0.20252937078475952, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:08:31", "remaining_time": "0:08:31", "throughput": 3314.65, "total_tokens": 1694912}
{"current_steps": 4135, "total_steps": 8260, "loss": 0.2291, "lr": 2.92995804921207e-05, "epoch": 5.006053268765133, "percentage": 50.06, "elapsed_time": "0:08:32", "remaining_time": "0:08:31", "throughput": 3308.35, "total_tokens": 1697056}
{"current_steps": 4140, "total_steps": 8260, "loss": 0.1772, "lr": 2.9247533292328273e-05, "epoch": 5.012106537530267, "percentage": 50.12, "elapsed_time": "0:08:33", "remaining_time": "0:08:31", "throughput": 3308.74, "total_tokens": 1699168}
{"current_steps": 4145, "total_steps": 8260, "loss": 0.2169, "lr": 2.9195467128478044e-05, "epoch": 5.018159806295399, "percentage": 50.18, "elapsed_time": "0:08:34", "remaining_time": "0:08:30", "throughput": 3309.07, "total_tokens": 1701152}
{"current_steps": 4150, "total_steps": 8260, "loss": 0.2558, "lr": 2.914338223303098e-05, "epoch": 5.0242130750605325, "percentage": 50.24, "elapsed_time": "0:08:34", "remaining_time": "0:08:29", "throughput": 3309.62, "total_tokens": 1703168}
{"current_steps": 4155, "total_steps": 8260, "loss": 0.1478, "lr": 2.9091278838531695e-05, "epoch": 5.030266343825666, "percentage": 50.3, "elapsed_time": "0:08:35", "remaining_time": "0:08:28", "throughput": 3310.05, "total_tokens": 1705120}
{"current_steps": 4160, "total_steps": 8260, "loss": 0.2132, "lr": 2.9039157177607383e-05, "epoch": 5.036319612590799, "percentage": 50.36, "elapsed_time": "0:08:35", "remaining_time": "0:08:28", "throughput": 3310.42, "total_tokens": 1707200}
{"current_steps": 4165, "total_steps": 8260, "loss": 0.2245, "lr": 2.8987017482966815e-05, "epoch": 5.0423728813559325, "percentage": 50.42, "elapsed_time": "0:08:36", "remaining_time": "0:08:27", "throughput": 3310.96, "total_tokens": 1709120}
{"current_steps": 4170, "total_steps": 8260, "loss": 0.1834, "lr": 2.893485998739926e-05, "epoch": 5.048426150121065, "percentage": 50.48, "elapsed_time": "0:08:36", "remaining_time": "0:08:26", "throughput": 3311.2, "total_tokens": 1711200}
{"current_steps": 4175, "total_steps": 8260, "loss": 0.186, "lr": 2.8882684923773458e-05, "epoch": 5.054479418886198, "percentage": 50.54, "elapsed_time": "0:08:37", "remaining_time": "0:08:26", "throughput": 3311.66, "total_tokens": 1713248}
{"current_steps": 4180, "total_steps": 8260, "loss": 0.2172, "lr": 2.883049252503659e-05, "epoch": 5.060532687651332, "percentage": 50.61, "elapsed_time": "0:08:37", "remaining_time": "0:08:25", "throughput": 3312.12, "total_tokens": 1715296}
{"current_steps": 4185, "total_steps": 8260, "loss": 0.1909, "lr": 2.877828302421325e-05, "epoch": 5.066585956416465, "percentage": 50.67, "elapsed_time": "0:08:38", "remaining_time": "0:08:24", "throughput": 3312.6, "total_tokens": 1717280}
{"current_steps": 4190, "total_steps": 8260, "loss": 0.161, "lr": 2.872605665440436e-05, "epoch": 5.072639225181598, "percentage": 50.73, "elapsed_time": "0:08:38", "remaining_time": "0:08:24", "throughput": 3313.13, "total_tokens": 1719136}
{"current_steps": 4195, "total_steps": 8260, "loss": 0.198, "lr": 2.8673813648786196e-05, "epoch": 5.078692493946731, "percentage": 50.79, "elapsed_time": "0:08:39", "remaining_time": "0:08:23", "throughput": 3313.65, "total_tokens": 1721152}
{"current_steps": 4200, "total_steps": 8260, "loss": 0.1253, "lr": 2.862155424060926e-05, "epoch": 5.084745762711864, "percentage": 50.85, "elapsed_time": "0:08:39", "remaining_time": "0:08:22", "throughput": 3314.14, "total_tokens": 1723328}
{"current_steps": 4205, "total_steps": 8260, "loss": 0.186, "lr": 2.856927866319733e-05, "epoch": 5.0907990314769975, "percentage": 50.91, "elapsed_time": "0:08:40", "remaining_time": "0:08:21", "throughput": 3314.67, "total_tokens": 1725280}
{"current_steps": 4210, "total_steps": 8260, "loss": 0.1606, "lr": 2.851698714994635e-05, "epoch": 5.096852300242131, "percentage": 50.97, "elapsed_time": "0:08:41", "remaining_time": "0:08:21", "throughput": 3314.97, "total_tokens": 1727328}
{"current_steps": 4215, "total_steps": 8260, "loss": 0.2299, "lr": 2.8464679934323424e-05, "epoch": 5.102905569007264, "percentage": 51.03, "elapsed_time": "0:08:41", "remaining_time": "0:08:20", "throughput": 3315.43, "total_tokens": 1729472}
{"current_steps": 4220, "total_steps": 8260, "loss": 0.2126, "lr": 2.841235724986575e-05, "epoch": 5.108958837772397, "percentage": 51.09, "elapsed_time": "0:08:42", "remaining_time": "0:08:19", "throughput": 3315.92, "total_tokens": 1731392}
{"current_steps": 4225, "total_steps": 8260, "loss": 0.2132, "lr": 2.8360019330179604e-05, "epoch": 5.11501210653753, "percentage": 51.15, "elapsed_time": "0:08:42", "remaining_time": "0:08:19", "throughput": 3316.27, "total_tokens": 1733472}
{"current_steps": 4230, "total_steps": 8260, "loss": 0.1951, "lr": 2.8307666408939278e-05, "epoch": 5.121065375302663, "percentage": 51.21, "elapsed_time": "0:08:43", "remaining_time": "0:08:18", "throughput": 3316.59, "total_tokens": 1735520}
{"current_steps": 4235, "total_steps": 8260, "loss": 0.2034, "lr": 2.8255298719886043e-05, "epoch": 5.127118644067797, "percentage": 51.27, "elapsed_time": "0:08:43", "remaining_time": "0:08:17", "throughput": 3316.95, "total_tokens": 1737536}
{"current_steps": 4240, "total_steps": 8260, "loss": 0.2318, "lr": 2.820291649682709e-05, "epoch": 5.13317191283293, "percentage": 51.33, "elapsed_time": "0:08:44", "remaining_time": "0:08:17", "throughput": 3317.25, "total_tokens": 1739424}
{"current_steps": 4245, "total_steps": 8260, "loss": 0.1857, "lr": 2.8150519973634543e-05, "epoch": 5.1392251815980625, "percentage": 51.39, "elapsed_time": "0:08:44", "remaining_time": "0:08:16", "throughput": 3317.54, "total_tokens": 1741536}
{"current_steps": 4250, "total_steps": 8260, "loss": 0.1996, "lr": 2.809810938424432e-05, "epoch": 5.145278450363196, "percentage": 51.45, "elapsed_time": "0:08:45", "remaining_time": "0:08:15", "throughput": 3317.96, "total_tokens": 1743488}
{"current_steps": 4255, "total_steps": 8260, "loss": 0.1667, "lr": 2.804568496265516e-05, "epoch": 5.151331719128329, "percentage": 51.51, "elapsed_time": "0:08:46", "remaining_time": "0:08:15", "throughput": 3318.41, "total_tokens": 1745728}
{"current_steps": 4260, "total_steps": 8260, "loss": 0.1449, "lr": 2.799324694292757e-05, "epoch": 5.157384987893463, "percentage": 51.57, "elapsed_time": "0:08:46", "remaining_time": "0:08:14", "throughput": 3318.75, "total_tokens": 1747808}
{"current_steps": 4265, "total_steps": 8260, "loss": 0.1368, "lr": 2.7940795559182764e-05, "epoch": 5.163438256658596, "percentage": 51.63, "elapsed_time": "0:08:47", "remaining_time": "0:08:13", "throughput": 3319.2, "total_tokens": 1749856}
{"current_steps": 4270, "total_steps": 8260, "loss": 0.1949, "lr": 2.788833104560161e-05, "epoch": 5.169491525423728, "percentage": 51.69, "elapsed_time": "0:08:47", "remaining_time": "0:08:13", "throughput": 3319.64, "total_tokens": 1751904}
{"current_steps": 4275, "total_steps": 8260, "loss": 0.2563, "lr": 2.7835853636423616e-05, "epoch": 5.175544794188862, "percentage": 51.76, "elapsed_time": "0:08:48", "remaining_time": "0:08:12", "throughput": 3320.1, "total_tokens": 1753984}
{"current_steps": 4280, "total_steps": 8260, "loss": 0.2207, "lr": 2.7783363565945847e-05, "epoch": 5.181598062953995, "percentage": 51.82, "elapsed_time": "0:08:48", "remaining_time": "0:08:11", "throughput": 3320.49, "total_tokens": 1756000}
{"current_steps": 4285, "total_steps": 8260, "loss": 0.1729, "lr": 2.773086106852192e-05, "epoch": 5.187651331719128, "percentage": 51.88, "elapsed_time": "0:08:49", "remaining_time": "0:08:11", "throughput": 3320.81, "total_tokens": 1758080}
{"current_steps": 4290, "total_steps": 8260, "loss": 0.2408, "lr": 2.7678346378560903e-05, "epoch": 5.193704600484262, "percentage": 51.94, "elapsed_time": "0:08:49", "remaining_time": "0:08:10", "throughput": 3321.36, "total_tokens": 1760224}
{"current_steps": 4295, "total_steps": 8260, "loss": 0.1871, "lr": 2.762581973052633e-05, "epoch": 5.199757869249395, "percentage": 52.0, "elapsed_time": "0:08:50", "remaining_time": "0:08:09", "throughput": 3321.79, "total_tokens": 1762176}
{"current_steps": 4300, "total_steps": 8260, "loss": 0.1717, "lr": 2.7573281358935104e-05, "epoch": 5.2058111380145276, "percentage": 52.06, "elapsed_time": "0:08:51", "remaining_time": "0:08:09", "throughput": 3322.14, "total_tokens": 1764352}
{"current_steps": 4305, "total_steps": 8260, "loss": 0.1821, "lr": 2.7520731498356494e-05, "epoch": 5.211864406779661, "percentage": 52.12, "elapsed_time": "0:08:51", "remaining_time": "0:08:08", "throughput": 3322.49, "total_tokens": 1766432}
{"current_steps": 4310, "total_steps": 8260, "loss": 0.1951, "lr": 2.746817038341103e-05, "epoch": 5.217917675544794, "percentage": 52.18, "elapsed_time": "0:08:52", "remaining_time": "0:08:07", "throughput": 3322.86, "total_tokens": 1768608}
{"current_steps": 4315, "total_steps": 8260, "loss": 0.1673, "lr": 2.7415598248769524e-05, "epoch": 5.223970944309928, "percentage": 52.24, "elapsed_time": "0:08:52", "remaining_time": "0:08:07", "throughput": 3323.31, "total_tokens": 1770752}
{"current_steps": 4320, "total_steps": 8260, "loss": 0.1288, "lr": 2.7363015329151965e-05, "epoch": 5.230024213075061, "percentage": 52.3, "elapsed_time": "0:08:53", "remaining_time": "0:08:06", "throughput": 3323.65, "total_tokens": 1772832}
{"current_steps": 4325, "total_steps": 8260, "loss": 0.1709, "lr": 2.73104218593265e-05, "epoch": 5.236077481840193, "percentage": 52.36, "elapsed_time": "0:08:53", "remaining_time": "0:08:05", "throughput": 3324.12, "total_tokens": 1774912}
{"current_steps": 4330, "total_steps": 8260, "loss": 0.1919, "lr": 2.7257818074108394e-05, "epoch": 5.242130750605327, "percentage": 52.42, "elapsed_time": "0:08:54", "remaining_time": "0:08:05", "throughput": 3324.51, "total_tokens": 1777024}
{"current_steps": 4335, "total_steps": 8260, "loss": 0.1716, "lr": 2.7205204208358947e-05, "epoch": 5.24818401937046, "percentage": 52.48, "elapsed_time": "0:08:55", "remaining_time": "0:08:04", "throughput": 3325.11, "total_tokens": 1779008}
{"current_steps": 4340, "total_steps": 8260, "loss": 0.1762, "lr": 2.715258049698446e-05, "epoch": 5.254237288135593, "percentage": 52.54, "elapsed_time": "0:08:55", "remaining_time": "0:08:03", "throughput": 3325.56, "total_tokens": 1780896}
{"current_steps": 4345, "total_steps": 8260, "loss": 0.197, "lr": 2.709994717493523e-05, "epoch": 5.260290556900727, "percentage": 52.6, "elapsed_time": "0:08:56", "remaining_time": "0:08:03", "throughput": 3326.03, "total_tokens": 1783008}
{"current_steps": 4350, "total_steps": 8260, "loss": 0.1357, "lr": 2.7047304477204416e-05, "epoch": 5.266343825665859, "percentage": 52.66, "elapsed_time": "0:08:56", "remaining_time": "0:08:02", "throughput": 3326.35, "total_tokens": 1784992}
{"current_steps": 4355, "total_steps": 8260, "loss": 0.2734, "lr": 2.6994652638827078e-05, "epoch": 5.272397094430993, "percentage": 52.72, "elapsed_time": "0:08:57", "remaining_time": "0:08:01", "throughput": 3326.84, "total_tokens": 1787008}
{"current_steps": 4360, "total_steps": 8260, "loss": 0.1816, "lr": 2.694199189487906e-05, "epoch": 5.278450363196126, "percentage": 52.78, "elapsed_time": "0:08:57", "remaining_time": "0:08:00", "throughput": 3327.24, "total_tokens": 1789120}
{"current_steps": 4365, "total_steps": 8260, "loss": 0.1873, "lr": 2.688932248047597e-05, "epoch": 5.284503631961259, "percentage": 52.85, "elapsed_time": "0:08:58", "remaining_time": "0:08:00", "throughput": 3327.64, "total_tokens": 1791232}
{"current_steps": 4370, "total_steps": 8260, "loss": 0.2193, "lr": 2.683664463077214e-05, "epoch": 5.290556900726393, "percentage": 52.91, "elapsed_time": "0:08:58", "remaining_time": "0:07:59", "throughput": 3328.15, "total_tokens": 1793440}
{"current_steps": 4375, "total_steps": 8260, "loss": 0.1867, "lr": 2.678395858095955e-05, "epoch": 5.296610169491525, "percentage": 52.97, "elapsed_time": "0:08:59", "remaining_time": "0:07:59", "throughput": 3328.57, "total_tokens": 1795488}
{"current_steps": 4380, "total_steps": 8260, "loss": 0.2211, "lr": 2.6731264566266795e-05, "epoch": 5.302663438256658, "percentage": 53.03, "elapsed_time": "0:08:59", "remaining_time": "0:07:58", "throughput": 3328.94, "total_tokens": 1797600}
{"current_steps": 4385, "total_steps": 8260, "loss": 0.1946, "lr": 2.6678562821958043e-05, "epoch": 5.308716707021792, "percentage": 53.09, "elapsed_time": "0:09:00", "remaining_time": "0:07:57", "throughput": 3329.25, "total_tokens": 1799584}
{"current_steps": 4390, "total_steps": 8260, "loss": 0.1942, "lr": 2.6625853583331943e-05, "epoch": 5.314769975786925, "percentage": 53.15, "elapsed_time": "0:09:01", "remaining_time": "0:07:56", "throughput": 3329.77, "total_tokens": 1801440}
{"current_steps": 4395, "total_steps": 8260, "loss": 0.1975, "lr": 2.6573137085720638e-05, "epoch": 5.3208232445520585, "percentage": 53.21, "elapsed_time": "0:09:01", "remaining_time": "0:07:56", "throughput": 3330.14, "total_tokens": 1803456}
{"current_steps": 4400, "total_steps": 8260, "loss": 0.2253, "lr": 2.6520413564488672e-05, "epoch": 5.326876513317191, "percentage": 53.27, "elapsed_time": "0:09:02", "remaining_time": "0:07:55", "throughput": 3330.59, "total_tokens": 1805440}
{"current_steps": 4405, "total_steps": 8260, "loss": 0.1953, "lr": 2.6467683255031918e-05, "epoch": 5.332929782082324, "percentage": 53.33, "elapsed_time": "0:09:02", "remaining_time": "0:07:54", "throughput": 3331.2, "total_tokens": 1807360}
{"current_steps": 4410, "total_steps": 8260, "loss": 0.1825, "lr": 2.6414946392776597e-05, "epoch": 5.338983050847458, "percentage": 53.39, "elapsed_time": "0:09:03", "remaining_time": "0:07:54", "throughput": 3331.51, "total_tokens": 1809344}
{"current_steps": 4415, "total_steps": 8260, "loss": 0.2422, "lr": 2.636220321317816e-05, "epoch": 5.345036319612591, "percentage": 53.45, "elapsed_time": "0:09:03", "remaining_time": "0:07:53", "throughput": 3331.87, "total_tokens": 1811456}
{"current_steps": 4420, "total_steps": 8260, "loss": 0.1797, "lr": 2.6309453951720274e-05, "epoch": 5.351089588377724, "percentage": 53.51, "elapsed_time": "0:09:04", "remaining_time": "0:07:52", "throughput": 3332.28, "total_tokens": 1813600}
{"current_steps": 4425, "total_steps": 8260, "loss": 0.1589, "lr": 2.625669884391377e-05, "epoch": 5.357142857142857, "percentage": 53.57, "elapsed_time": "0:09:04", "remaining_time": "0:07:52", "throughput": 3332.73, "total_tokens": 1815424}
{"current_steps": 4430, "total_steps": 8260, "loss": 0.2051, "lr": 2.6203938125295552e-05, "epoch": 5.36319612590799, "percentage": 53.63, "elapsed_time": "0:09:05", "remaining_time": "0:07:51", "throughput": 3333.05, "total_tokens": 1817600}
{"current_steps": 4435, "total_steps": 8260, "loss": 0.1411, "lr": 2.6151172031427597e-05, "epoch": 5.3692493946731235, "percentage": 53.69, "elapsed_time": "0:09:05", "remaining_time": "0:07:50", "throughput": 3333.6, "total_tokens": 1819648}
{"current_steps": 4440, "total_steps": 8260, "loss": 0.1649, "lr": 2.609840079789588e-05, "epoch": 5.375302663438257, "percentage": 53.75, "elapsed_time": "0:09:06", "remaining_time": "0:07:50", "throughput": 3334.06, "total_tokens": 1821728}
{"current_steps": 4445, "total_steps": 8260, "loss": 0.1931, "lr": 2.604562466030931e-05, "epoch": 5.38135593220339, "percentage": 53.81, "elapsed_time": "0:09:06", "remaining_time": "0:07:49", "throughput": 3334.61, "total_tokens": 1823776}
{"current_steps": 4450, "total_steps": 8260, "loss": 0.1899, "lr": 2.599284385429871e-05, "epoch": 5.387409200968523, "percentage": 53.87, "elapsed_time": "0:09:07", "remaining_time": "0:07:48", "throughput": 3335.07, "total_tokens": 1825856}
{"current_steps": 4455, "total_steps": 8260, "loss": 0.1858, "lr": 2.594005861551574e-05, "epoch": 5.393462469733656, "percentage": 53.93, "elapsed_time": "0:09:08", "remaining_time": "0:07:48", "throughput": 3335.52, "total_tokens": 1827936}
{"current_steps": 4460, "total_steps": 8260, "loss": 0.2195, "lr": 2.588726917963183e-05, "epoch": 5.399515738498789, "percentage": 54.0, "elapsed_time": "0:09:08", "remaining_time": "0:07:47", "throughput": 3335.95, "total_tokens": 1829824}
{"current_steps": 4465, "total_steps": 8260, "loss": 0.1859, "lr": 2.5834475782337187e-05, "epoch": 5.405569007263923, "percentage": 54.06, "elapsed_time": "0:09:09", "remaining_time": "0:07:46", "throughput": 3336.33, "total_tokens": 1831936}
{"current_steps": 4470, "total_steps": 8260, "loss": 0.2156, "lr": 2.578167865933967e-05, "epoch": 5.411622276029056, "percentage": 54.12, "elapsed_time": "0:09:09", "remaining_time": "0:07:46", "throughput": 3336.69, "total_tokens": 1834048}
{"current_steps": 4475, "total_steps": 8260, "loss": 0.1929, "lr": 2.5728878046363785e-05, "epoch": 5.4176755447941884, "percentage": 54.18, "elapsed_time": "0:09:10", "remaining_time": "0:07:45", "throughput": 3337.1, "total_tokens": 1836192}
{"current_steps": 4480, "total_steps": 8260, "loss": 0.1866, "lr": 2.5676074179149635e-05, "epoch": 5.423728813559322, "percentage": 54.24, "elapsed_time": "0:09:10", "remaining_time": "0:07:44", "throughput": 3337.5, "total_tokens": 1838432}
{"current_steps": 4485, "total_steps": 8260, "loss": 0.1779, "lr": 2.5623267293451826e-05, "epoch": 5.429782082324455, "percentage": 54.3, "elapsed_time": "0:09:11", "remaining_time": "0:07:44", "throughput": 3337.9, "total_tokens": 1840576}
{"current_steps": 4490, "total_steps": 8260, "loss": 0.1786, "lr": 2.5570457625038457e-05, "epoch": 5.4358353510895885, "percentage": 54.36, "elapsed_time": "0:09:11", "remaining_time": "0:07:43", "throughput": 3338.3, "total_tokens": 1842624}
{"current_steps": 4495, "total_steps": 8260, "loss": 0.1868, "lr": 2.551764540969005e-05, "epoch": 5.441888619854722, "percentage": 54.42, "elapsed_time": "0:09:12", "remaining_time": "0:07:42", "throughput": 3338.56, "total_tokens": 1844576}
{"current_steps": 4500, "total_steps": 8260, "loss": 0.2294, "lr": 2.5464830883198492e-05, "epoch": 5.447941888619855, "percentage": 54.48, "elapsed_time": "0:09:13", "remaining_time": "0:07:42", "throughput": 3338.98, "total_tokens": 1846560}
{"current_steps": 4505, "total_steps": 8260, "loss": 0.1694, "lr": 2.5412014281365986e-05, "epoch": 5.453995157384988, "percentage": 54.54, "elapsed_time": "0:09:13", "remaining_time": "0:07:41", "throughput": 3339.45, "total_tokens": 1848576}
{"current_steps": 4510, "total_steps": 8260, "loss": 0.198, "lr": 2.5359195840004023e-05, "epoch": 5.460048426150121, "percentage": 54.6, "elapsed_time": "0:09:14", "remaining_time": "0:07:40", "throughput": 3339.75, "total_tokens": 1850688}
{"current_steps": 4515, "total_steps": 8260, "loss": 0.171, "lr": 2.5306375794932273e-05, "epoch": 5.466101694915254, "percentage": 54.66, "elapsed_time": "0:09:14", "remaining_time": "0:07:40", "throughput": 3340.35, "total_tokens": 1852800}
{"current_steps": 4520, "total_steps": 8260, "loss": 0.1829, "lr": 2.52535543819776e-05, "epoch": 5.472154963680388, "percentage": 54.72, "elapsed_time": "0:09:15", "remaining_time": "0:07:39", "throughput": 3340.65, "total_tokens": 1854720}
{"current_steps": 4525, "total_steps": 8260, "loss": 0.1849, "lr": 2.5200731836972956e-05, "epoch": 5.478208232445521, "percentage": 54.78, "elapsed_time": "0:09:15", "remaining_time": "0:07:38", "throughput": 3341.03, "total_tokens": 1856768}
{"current_steps": 4530, "total_steps": 8260, "loss": 0.1732, "lr": 2.5147908395756343e-05, "epoch": 5.4842615012106535, "percentage": 54.84, "elapsed_time": "0:09:16", "remaining_time": "0:07:38", "throughput": 3341.33, "total_tokens": 1858848}
{"current_steps": 4535, "total_steps": 8260, "loss": 0.1359, "lr": 2.5095084294169768e-05, "epoch": 5.490314769975787, "percentage": 54.9, "elapsed_time": "0:09:16", "remaining_time": "0:07:37", "throughput": 3341.58, "total_tokens": 1860896}
{"current_steps": 4540, "total_steps": 8260, "loss": 0.1843, "lr": 2.5042259768058208e-05, "epoch": 5.49636803874092, "percentage": 54.96, "elapsed_time": "0:09:17", "remaining_time": "0:07:36", "throughput": 3341.91, "total_tokens": 1862816}
{"current_steps": 4543, "total_steps": 8260, "eval_loss": 0.1929798126220703, "epoch": 5.5, "percentage": 55.0, "elapsed_time": "0:09:22", "remaining_time": "0:07:40", "throughput": 3314.49, "total_tokens": 1864000}
{"current_steps": 4545, "total_steps": 8260, "loss": 0.1937, "lr": 2.4989435053268497e-05, "epoch": 5.5024213075060535, "percentage": 55.02, "elapsed_time": "0:09:23", "remaining_time": "0:07:40", "throughput": 3309.08, "total_tokens": 1864832}
{"current_steps": 4550, "total_steps": 8260, "loss": 0.2514, "lr": 2.493661038564835e-05, "epoch": 5.508474576271187, "percentage": 55.08, "elapsed_time": "0:09:24", "remaining_time": "0:07:39", "throughput": 3309.56, "total_tokens": 1866752}
{"current_steps": 4555, "total_steps": 8260, "loss": 0.2109, "lr": 2.4883786001045238e-05, "epoch": 5.514527845036319, "percentage": 55.15, "elapsed_time": "0:09:24", "remaining_time": "0:07:39", "throughput": 3310.04, "total_tokens": 1868928}
{"current_steps": 4560, "total_steps": 8260, "loss": 0.2199, "lr": 2.4830962135305398e-05, "epoch": 5.520581113801453, "percentage": 55.21, "elapsed_time": "0:09:25", "remaining_time": "0:07:38", "throughput": 3310.46, "total_tokens": 1871072}
{"current_steps": 4565, "total_steps": 8260, "loss": 0.1787, "lr": 2.4778139024272724e-05, "epoch": 5.526634382566586, "percentage": 55.27, "elapsed_time": "0:09:25", "remaining_time": "0:07:37", "throughput": 3310.8, "total_tokens": 1873152}
{"current_steps": 4570, "total_steps": 8260, "loss": 0.2149, "lr": 2.4725316903787765e-05, "epoch": 5.532687651331719, "percentage": 55.33, "elapsed_time": "0:09:26", "remaining_time": "0:07:37", "throughput": 3311.33, "total_tokens": 1875040}
{"current_steps": 4575, "total_steps": 8260, "loss": 0.1545, "lr": 2.4672496009686622e-05, "epoch": 5.538740920096853, "percentage": 55.39, "elapsed_time": "0:09:26", "remaining_time": "0:07:36", "throughput": 3311.76, "total_tokens": 1877184}
{"current_steps": 4580, "total_steps": 8260, "loss": 0.17, "lr": 2.4619676577799946e-05, "epoch": 5.544794188861985, "percentage": 55.45, "elapsed_time": "0:09:27", "remaining_time": "0:07:35", "throughput": 3312.17, "total_tokens": 1879232}
{"current_steps": 4585, "total_steps": 8260, "loss": 0.1955, "lr": 2.4566858843951847e-05, "epoch": 5.5508474576271185, "percentage": 55.51, "elapsed_time": "0:09:27", "remaining_time": "0:07:35", "throughput": 3312.51, "total_tokens": 1881312}
{"current_steps": 4590, "total_steps": 8260, "loss": 0.212, "lr": 2.451404304395884e-05, "epoch": 5.556900726392252, "percentage": 55.57, "elapsed_time": "0:09:28", "remaining_time": "0:07:34", "throughput": 3312.98, "total_tokens": 1883328}
{"current_steps": 4595, "total_steps": 8260, "loss": 0.199, "lr": 2.446122941362883e-05, "epoch": 5.562953995157385, "percentage": 55.63, "elapsed_time": "0:09:28", "remaining_time": "0:07:33", "throughput": 3313.33, "total_tokens": 1885248}
{"current_steps": 4600, "total_steps": 8260, "loss": 0.1814, "lr": 2.4408418188760026e-05, "epoch": 5.5690072639225185, "percentage": 55.69, "elapsed_time": "0:09:29", "remaining_time": "0:07:33", "throughput": 3313.77, "total_tokens": 1887328}
{"current_steps": 4605, "total_steps": 8260, "loss": 0.1962, "lr": 2.435560960513989e-05, "epoch": 5.575060532687651, "percentage": 55.75, "elapsed_time": "0:09:30", "remaining_time": "0:07:32", "throughput": 3314.22, "total_tokens": 1889408}
{"current_steps": 4610, "total_steps": 8260, "loss": 0.1847, "lr": 2.4302803898544106e-05, "epoch": 5.581113801452784, "percentage": 55.81, "elapsed_time": "0:09:30", "remaining_time": "0:07:31", "throughput": 3314.47, "total_tokens": 1891456}
{"current_steps": 4615, "total_steps": 8260, "loss": 0.1668, "lr": 2.425000130473549e-05, "epoch": 5.587167070217918, "percentage": 55.87, "elapsed_time": "0:09:31", "remaining_time": "0:07:31", "throughput": 3314.9, "total_tokens": 1893696}
{"current_steps": 4620, "total_steps": 8260, "loss": 0.1371, "lr": 2.4197202059463e-05, "epoch": 5.593220338983051, "percentage": 55.93, "elapsed_time": "0:09:31", "remaining_time": "0:07:30", "throughput": 3315.39, "total_tokens": 1895616}
{"current_steps": 4625, "total_steps": 8260, "loss": 0.1707, "lr": 2.4144406398460594e-05, "epoch": 5.599273607748184, "percentage": 55.99, "elapsed_time": "0:09:32", "remaining_time": "0:07:29", "throughput": 3315.71, "total_tokens": 1897600}
{"current_steps": 4630, "total_steps": 8260, "loss": 0.2739, "lr": 2.4091614557446267e-05, "epoch": 5.605326876513317, "percentage": 56.05, "elapsed_time": "0:09:32", "remaining_time": "0:07:29", "throughput": 3316.18, "total_tokens": 1899616}
{"current_steps": 4635, "total_steps": 8260, "loss": 0.1846, "lr": 2.4038826772120932e-05, "epoch": 5.61138014527845, "percentage": 56.11, "elapsed_time": "0:09:33", "remaining_time": "0:07:28", "throughput": 3316.45, "total_tokens": 1901568}
{"current_steps": 4640, "total_steps": 8260, "loss": 0.1731, "lr": 2.398604327816742e-05, "epoch": 5.6174334140435835, "percentage": 56.17, "elapsed_time": "0:09:33", "remaining_time": "0:07:27", "throughput": 3316.97, "total_tokens": 1903616}
{"current_steps": 4645, "total_steps": 8260, "loss": 0.2097, "lr": 2.3933264311249377e-05, "epoch": 5.623486682808717, "percentage": 56.23, "elapsed_time": "0:09:34", "remaining_time": "0:07:27", "throughput": 3317.39, "total_tokens": 1905504}
{"current_steps": 4650, "total_steps": 8260, "loss": 0.2012, "lr": 2.3880490107010255e-05, "epoch": 5.62953995157385, "percentage": 56.3, "elapsed_time": "0:09:34", "remaining_time": "0:07:26", "throughput": 3317.75, "total_tokens": 1907456}
{"current_steps": 4655, "total_steps": 8260, "loss": 0.1993, "lr": 2.382772090107223e-05, "epoch": 5.635593220338983, "percentage": 56.36, "elapsed_time": "0:09:35", "remaining_time": "0:07:25", "throughput": 3318.1, "total_tokens": 1909568}
{"current_steps": 4660, "total_steps": 8260, "loss": 0.2132, "lr": 2.3774956929035177e-05, "epoch": 5.641646489104116, "percentage": 56.42, "elapsed_time": "0:09:36", "remaining_time": "0:07:25", "throughput": 3318.66, "total_tokens": 1911840}
{"current_steps": 4665, "total_steps": 8260, "loss": 0.2068, "lr": 2.3722198426475593e-05, "epoch": 5.647699757869249, "percentage": 56.48, "elapsed_time": "0:09:36", "remaining_time": "0:07:24", "throughput": 3319.02, "total_tokens": 1913952}
{"current_steps": 4670, "total_steps": 8260, "loss": 0.2105, "lr": 2.3669445628945542e-05, "epoch": 5.653753026634383, "percentage": 56.54, "elapsed_time": "0:09:37", "remaining_time": "0:07:23", "throughput": 3319.52, "total_tokens": 1916160}
{"current_steps": 4675, "total_steps": 8260, "loss": 0.2077, "lr": 2.3616698771971633e-05, "epoch": 5.659806295399516, "percentage": 56.6, "elapsed_time": "0:09:37", "remaining_time": "0:07:23", "throughput": 3319.95, "total_tokens": 1918144}
{"current_steps": 4680, "total_steps": 8260, "loss": 0.1754, "lr": 2.356395809105396e-05, "epoch": 5.6658595641646485, "percentage": 56.66, "elapsed_time": "0:09:38", "remaining_time": "0:07:22", "throughput": 3320.29, "total_tokens": 1920160}
{"current_steps": 4685, "total_steps": 8260, "loss": 0.2222, "lr": 2.3511223821665028e-05, "epoch": 5.671912832929782, "percentage": 56.72, "elapsed_time": "0:09:38", "remaining_time": "0:07:21", "throughput": 3320.66, "total_tokens": 1922368}
{"current_steps": 4690, "total_steps": 8260, "loss": 0.1685, "lr": 2.3458496199248717e-05, "epoch": 5.677966101694915, "percentage": 56.78, "elapsed_time": "0:09:39", "remaining_time": "0:07:21", "throughput": 3320.98, "total_tokens": 1924448}
{"current_steps": 4695, "total_steps": 8260, "loss": 0.2092, "lr": 2.340577545921923e-05, "epoch": 5.684019370460049, "percentage": 56.84, "elapsed_time": "0:09:40", "remaining_time": "0:07:20", "throughput": 3321.38, "total_tokens": 1926592}
{"current_steps": 4700, "total_steps": 8260, "loss": 0.1856, "lr": 2.335306183696006e-05, "epoch": 5.690072639225182, "percentage": 56.9, "elapsed_time": "0:09:40", "remaining_time": "0:07:19", "throughput": 3321.8, "total_tokens": 1928672}
{"current_steps": 4705, "total_steps": 8260, "loss": 0.2032, "lr": 2.3300355567822897e-05, "epoch": 5.696125907990314, "percentage": 56.96, "elapsed_time": "0:09:41", "remaining_time": "0:07:19", "throughput": 3322.2, "total_tokens": 1930816}
{"current_steps": 4710, "total_steps": 8260, "loss": 0.1879, "lr": 2.324765688712661e-05, "epoch": 5.702179176755448, "percentage": 57.02, "elapsed_time": "0:09:41", "remaining_time": "0:07:18", "throughput": 3322.52, "total_tokens": 1932896}
{"current_steps": 4715, "total_steps": 8260, "loss": 0.2136, "lr": 2.3194966030156187e-05, "epoch": 5.708232445520581, "percentage": 57.08, "elapsed_time": "0:09:42", "remaining_time": "0:07:17", "throughput": 3322.9, "total_tokens": 1934944}
{"current_steps": 4720, "total_steps": 8260, "loss": 0.1767, "lr": 2.31422832321617e-05, "epoch": 5.714285714285714, "percentage": 57.14, "elapsed_time": "0:09:42", "remaining_time": "0:07:17", "throughput": 3323.28, "total_tokens": 1937152}
{"current_steps": 4725, "total_steps": 8260, "loss": 0.171, "lr": 2.308960872835721e-05, "epoch": 5.720338983050848, "percentage": 57.2, "elapsed_time": "0:09:43", "remaining_time": "0:07:16", "throughput": 3323.73, "total_tokens": 1939328}
{"current_steps": 4730, "total_steps": 8260, "loss": 0.2292, "lr": 2.3036942753919775e-05, "epoch": 5.72639225181598, "percentage": 57.26, "elapsed_time": "0:09:44", "remaining_time": "0:07:15", "throughput": 3324.02, "total_tokens": 1941312}
{"current_steps": 4735, "total_steps": 8260, "loss": 0.1818, "lr": 2.2984285543988352e-05, "epoch": 5.732445520581114, "percentage": 57.32, "elapsed_time": "0:09:44", "remaining_time": "0:07:15", "throughput": 3324.38, "total_tokens": 1943264}
{"current_steps": 4740, "total_steps": 8260, "loss": 0.2522, "lr": 2.2931637333662785e-05, "epoch": 5.738498789346247, "percentage": 57.38, "elapsed_time": "0:09:45", "remaining_time": "0:07:14", "throughput": 3324.75, "total_tokens": 1945312}
{"current_steps": 4745, "total_steps": 8260, "loss": 0.1958, "lr": 2.287899835800273e-05, "epoch": 5.74455205811138, "percentage": 57.45, "elapsed_time": "0:09:45", "remaining_time": "0:07:13", "throughput": 3325.21, "total_tokens": 1947328}
{"current_steps": 4750, "total_steps": 8260, "loss": 0.2085, "lr": 2.2826368852026597e-05, "epoch": 5.750605326876514, "percentage": 57.51, "elapsed_time": "0:09:46", "remaining_time": "0:07:13", "throughput": 3325.6, "total_tokens": 1949376}
{"current_steps": 4755, "total_steps": 8260, "loss": 0.1942, "lr": 2.277374905071053e-05, "epoch": 5.756658595641646, "percentage": 57.57, "elapsed_time": "0:09:46", "remaining_time": "0:07:12", "throughput": 3325.91, "total_tokens": 1951456}
{"current_steps": 4760, "total_steps": 8260, "loss": 0.1805, "lr": 2.2721139188987357e-05, "epoch": 5.762711864406779, "percentage": 57.63, "elapsed_time": "0:09:47", "remaining_time": "0:07:11", "throughput": 3326.08, "total_tokens": 1953536}
{"current_steps": 4765, "total_steps": 8260, "loss": 0.2071, "lr": 2.26685395017455e-05, "epoch": 5.768765133171913, "percentage": 57.69, "elapsed_time": "0:09:47", "remaining_time": "0:07:11", "throughput": 3326.46, "total_tokens": 1955744}
{"current_steps": 4770, "total_steps": 8260, "loss": 0.1778, "lr": 2.261595022382799e-05, "epoch": 5.774818401937046, "percentage": 57.75, "elapsed_time": "0:09:48", "remaining_time": "0:07:10", "throughput": 3326.89, "total_tokens": 1957824}
{"current_steps": 4775, "total_steps": 8260, "loss": 0.2057, "lr": 2.256337159003134e-05, "epoch": 5.780871670702179, "percentage": 57.81, "elapsed_time": "0:09:49", "remaining_time": "0:07:09", "throughput": 3327.33, "total_tokens": 1960000}
{"current_steps": 4780, "total_steps": 8260, "loss": 0.2104, "lr": 2.251080383510459e-05, "epoch": 5.786924939467312, "percentage": 57.87, "elapsed_time": "0:09:49", "remaining_time": "0:07:09", "throughput": 3327.73, "total_tokens": 1962144}
{"current_steps": 4785, "total_steps": 8260, "loss": 0.1806, "lr": 2.2458247193748155e-05, "epoch": 5.792978208232445, "percentage": 57.93, "elapsed_time": "0:09:50", "remaining_time": "0:07:08", "throughput": 3328.0, "total_tokens": 1964192}
{"current_steps": 4790, "total_steps": 8260, "loss": 0.1566, "lr": 2.240570190061288e-05, "epoch": 5.799031476997579, "percentage": 57.99, "elapsed_time": "0:09:50", "remaining_time": "0:07:07", "throughput": 3328.36, "total_tokens": 1966240}
{"current_steps": 4795, "total_steps": 8260, "loss": 0.1924, "lr": 2.2353168190298915e-05, "epoch": 5.805084745762712, "percentage": 58.05, "elapsed_time": "0:09:51", "remaining_time": "0:07:07", "throughput": 3328.79, "total_tokens": 1968320}
{"current_steps": 4800, "total_steps": 8260, "loss": 0.24, "lr": 2.2300646297354704e-05, "epoch": 5.811138014527845, "percentage": 58.11, "elapsed_time": "0:09:51", "remaining_time": "0:07:06", "throughput": 3329.2, "total_tokens": 1970304}
{"current_steps": 4805, "total_steps": 8260, "loss": 0.1716, "lr": 2.224813645627592e-05, "epoch": 5.817191283292978, "percentage": 58.17, "elapsed_time": "0:09:52", "remaining_time": "0:07:05", "throughput": 3329.54, "total_tokens": 1972320}
{"current_steps": 4810, "total_steps": 8260, "loss": 0.1447, "lr": 2.2195638901504452e-05, "epoch": 5.823244552058111, "percentage": 58.23, "elapsed_time": "0:09:52", "remaining_time": "0:07:05", "throughput": 3329.78, "total_tokens": 1974432}
{"current_steps": 4815, "total_steps": 8260, "loss": 0.1996, "lr": 2.2143153867427305e-05, "epoch": 5.829297820823244, "percentage": 58.29, "elapsed_time": "0:09:53", "remaining_time": "0:07:04", "throughput": 3330.15, "total_tokens": 1976480}
{"current_steps": 4820, "total_steps": 8260, "loss": 0.1511, "lr": 2.2090681588375594e-05, "epoch": 5.835351089588378, "percentage": 58.35, "elapsed_time": "0:09:54", "remaining_time": "0:07:03", "throughput": 3330.67, "total_tokens": 1978624}
{"current_steps": 4825, "total_steps": 8260, "loss": 0.2011, "lr": 2.2038222298623507e-05, "epoch": 5.841404358353511, "percentage": 58.41, "elapsed_time": "0:09:54", "remaining_time": "0:07:03", "throughput": 3331.02, "total_tokens": 1980736}
{"current_steps": 4830, "total_steps": 8260, "loss": 0.1839, "lr": 2.1985776232387202e-05, "epoch": 5.847457627118644, "percentage": 58.47, "elapsed_time": "0:09:55", "remaining_time": "0:07:02", "throughput": 3331.33, "total_tokens": 1982656}
{"current_steps": 4835, "total_steps": 8260, "loss": 0.2535, "lr": 2.1933343623823814e-05, "epoch": 5.853510895883777, "percentage": 58.54, "elapsed_time": "0:09:55", "remaining_time": "0:07:02", "throughput": 3331.65, "total_tokens": 1984832}
{"current_steps": 4840, "total_steps": 8260, "loss": 0.1874, "lr": 2.1880924707030407e-05, "epoch": 5.85956416464891, "percentage": 58.6, "elapsed_time": "0:09:56", "remaining_time": "0:07:01", "throughput": 3331.98, "total_tokens": 1986848}
{"current_steps": 4845, "total_steps": 8260, "loss": 0.1827, "lr": 2.1828519716042888e-05, "epoch": 5.865617433414044, "percentage": 58.66, "elapsed_time": "0:09:56", "remaining_time": "0:07:00", "throughput": 3332.37, "total_tokens": 1988992}
{"current_steps": 4850, "total_steps": 8260, "loss": 0.1822, "lr": 2.177612888483502e-05, "epoch": 5.871670702179177, "percentage": 58.72, "elapsed_time": "0:09:57", "remaining_time": "0:07:00", "throughput": 3332.77, "total_tokens": 1990976}
{"current_steps": 4855, "total_steps": 8260, "loss": 0.1976, "lr": 2.1723752447317312e-05, "epoch": 5.877723970944309, "percentage": 58.78, "elapsed_time": "0:09:57", "remaining_time": "0:06:59", "throughput": 3333.23, "total_tokens": 1992928}
{"current_steps": 4860, "total_steps": 8260, "loss": 0.2132, "lr": 2.167139063733605e-05, "epoch": 5.883777239709443, "percentage": 58.84, "elapsed_time": "0:09:58", "remaining_time": "0:06:58", "throughput": 3333.77, "total_tokens": 1995104}
{"current_steps": 4865, "total_steps": 8260, "loss": 0.1605, "lr": 2.161904368867217e-05, "epoch": 5.889830508474576, "percentage": 58.9, "elapsed_time": "0:09:59", "remaining_time": "0:06:58", "throughput": 3334.29, "total_tokens": 1997312}
{"current_steps": 4870, "total_steps": 8260, "loss": 0.1874, "lr": 2.1566711835040284e-05, "epoch": 5.8958837772397095, "percentage": 58.96, "elapsed_time": "0:09:59", "remaining_time": "0:06:57", "throughput": 3334.72, "total_tokens": 1999232}
{"current_steps": 4875, "total_steps": 8260, "loss": 0.25, "lr": 2.1514395310087596e-05, "epoch": 5.901937046004843, "percentage": 59.02, "elapsed_time": "0:10:00", "remaining_time": "0:06:56", "throughput": 3335.04, "total_tokens": 2001088}
{"current_steps": 4880, "total_steps": 8260, "loss": 0.2266, "lr": 2.1462094347392887e-05, "epoch": 5.907990314769976, "percentage": 59.08, "elapsed_time": "0:10:00", "remaining_time": "0:06:55", "throughput": 3335.52, "total_tokens": 2003232}
{"current_steps": 4885, "total_steps": 8260, "loss": 0.2051, "lr": 2.1409809180465436e-05, "epoch": 5.914043583535109, "percentage": 59.14, "elapsed_time": "0:10:01", "remaining_time": "0:06:55", "throughput": 3335.71, "total_tokens": 2005312}
{"current_steps": 4890, "total_steps": 8260, "loss": 0.1846, "lr": 2.1357540042744006e-05, "epoch": 5.920096852300242, "percentage": 59.2, "elapsed_time": "0:10:01", "remaining_time": "0:06:54", "throughput": 3336.11, "total_tokens": 2007392}
{"current_steps": 4895, "total_steps": 8260, "loss": 0.1483, "lr": 2.1305287167595808e-05, "epoch": 5.926150121065375, "percentage": 59.26, "elapsed_time": "0:10:02", "remaining_time": "0:06:54", "throughput": 3336.53, "total_tokens": 2009568}
{"current_steps": 4900, "total_steps": 8260, "loss": 0.1665, "lr": 2.1253050788315436e-05, "epoch": 5.932203389830509, "percentage": 59.32, "elapsed_time": "0:10:02", "remaining_time": "0:06:53", "throughput": 3336.85, "total_tokens": 2011424}
{"current_steps": 4905, "total_steps": 8260, "loss": 0.2136, "lr": 2.120083113812381e-05, "epoch": 5.938256658595642, "percentage": 59.38, "elapsed_time": "0:10:03", "remaining_time": "0:06:52", "throughput": 3337.24, "total_tokens": 2013504}
{"current_steps": 4910, "total_steps": 8260, "loss": 0.2468, "lr": 2.1148628450167203e-05, "epoch": 5.9443099273607745, "percentage": 59.44, "elapsed_time": "0:10:03", "remaining_time": "0:06:52", "throughput": 3337.75, "total_tokens": 2015872}
{"current_steps": 4915, "total_steps": 8260, "loss": 0.245, "lr": 2.109644295751612e-05, "epoch": 5.950363196125908, "percentage": 59.5, "elapsed_time": "0:10:04", "remaining_time": "0:06:51", "throughput": 3338.19, "total_tokens": 2017888}
{"current_steps": 4920, "total_steps": 8260, "loss": 0.1942, "lr": 2.1044274893164316e-05, "epoch": 5.956416464891041, "percentage": 59.56, "elapsed_time": "0:10:05", "remaining_time": "0:06:50", "throughput": 3338.5, "total_tokens": 2019904}
{"current_steps": 4925, "total_steps": 8260, "loss": 0.1662, "lr": 2.0992124490027727e-05, "epoch": 5.9624697336561745, "percentage": 59.62, "elapsed_time": "0:10:05", "remaining_time": "0:06:50", "throughput": 3338.81, "total_tokens": 2022016}
{"current_steps": 4930, "total_steps": 8260, "loss": 0.2331, "lr": 2.0939991980943437e-05, "epoch": 5.968523002421308, "percentage": 59.69, "elapsed_time": "0:10:06", "remaining_time": "0:06:49", "throughput": 3339.11, "total_tokens": 2023936}
{"current_steps": 4935, "total_steps": 8260, "loss": 0.1937, "lr": 2.088787759866863e-05, "epoch": 5.97457627118644, "percentage": 59.75, "elapsed_time": "0:10:06", "remaining_time": "0:06:48", "throughput": 3339.47, "total_tokens": 2026080}
{"current_steps": 4940, "total_steps": 8260, "loss": 0.1622, "lr": 2.0835781575879574e-05, "epoch": 5.980629539951574, "percentage": 59.81, "elapsed_time": "0:10:07", "remaining_time": "0:06:48", "throughput": 3339.76, "total_tokens": 2028064}
{"current_steps": 4945, "total_steps": 8260, "loss": 0.215, "lr": 2.0783704145170547e-05, "epoch": 5.986682808716707, "percentage": 59.87, "elapsed_time": "0:10:07", "remaining_time": "0:06:47", "throughput": 3340.21, "total_tokens": 2029920}
{"current_steps": 4950, "total_steps": 8260, "loss": 0.183, "lr": 2.0731645539052845e-05, "epoch": 5.99273607748184, "percentage": 59.93, "elapsed_time": "0:10:08", "remaining_time": "0:06:46", "throughput": 3340.55, "total_tokens": 2031776}
{"current_steps": 4955, "total_steps": 8260, "loss": 0.1636, "lr": 2.067960598995369e-05, "epoch": 5.998789346246974, "percentage": 59.99, "elapsed_time": "0:10:08", "remaining_time": "0:06:46", "throughput": 3341.0, "total_tokens": 2033888}
{"current_steps": 4956, "total_steps": 8260, "eval_loss": 0.18926513195037842, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:10:13", "remaining_time": "0:06:49", "throughput": 3314.96, "total_tokens": 2033992}
{"current_steps": 4960, "total_steps": 8260, "loss": 0.227, "lr": 2.062758573021523e-05, "epoch": 6.004842615012106, "percentage": 60.05, "elapsed_time": "0:10:15", "remaining_time": "0:06:49", "throughput": 3309.47, "total_tokens": 2035752}
{"current_steps": 4965, "total_steps": 8260, "loss": 0.2239, "lr": 2.0575584992093505e-05, "epoch": 6.0108958837772395, "percentage": 60.11, "elapsed_time": "0:10:15", "remaining_time": "0:06:48", "throughput": 3309.85, "total_tokens": 2037896}
{"current_steps": 4970, "total_steps": 8260, "loss": 0.1947, "lr": 2.0523604007757374e-05, "epoch": 6.016949152542373, "percentage": 60.17, "elapsed_time": "0:10:16", "remaining_time": "0:06:47", "throughput": 3310.19, "total_tokens": 2040008}
{"current_steps": 4975, "total_steps": 8260, "loss": 0.1963, "lr": 2.0471643009287523e-05, "epoch": 6.023002421307506, "percentage": 60.23, "elapsed_time": "0:10:16", "remaining_time": "0:06:47", "throughput": 3310.57, "total_tokens": 2042056}
{"current_steps": 4980, "total_steps": 8260, "loss": 0.2111, "lr": 2.0419702228675395e-05, "epoch": 6.0290556900726395, "percentage": 60.29, "elapsed_time": "0:10:17", "remaining_time": "0:06:46", "throughput": 3310.97, "total_tokens": 2044040}
{"current_steps": 4985, "total_steps": 8260, "loss": 0.1899, "lr": 2.0367781897822147e-05, "epoch": 6.035108958837772, "percentage": 60.35, "elapsed_time": "0:10:17", "remaining_time": "0:06:45", "throughput": 3311.37, "total_tokens": 2046056}
{"current_steps": 4990, "total_steps": 8260, "loss": 0.1856, "lr": 2.031588224853767e-05, "epoch": 6.041162227602905, "percentage": 60.41, "elapsed_time": "0:10:18", "remaining_time": "0:06:45", "throughput": 3311.64, "total_tokens": 2047944}
{"current_steps": 4995, "total_steps": 8260, "loss": 0.1774, "lr": 2.0264003512539493e-05, "epoch": 6.047215496368039, "percentage": 60.47, "elapsed_time": "0:10:18", "remaining_time": "0:06:44", "throughput": 3311.96, "total_tokens": 2050024}
{"current_steps": 5000, "total_steps": 8260, "loss": 0.2022, "lr": 2.0212145921451787e-05, "epoch": 6.053268765133172, "percentage": 60.53, "elapsed_time": "0:10:19", "remaining_time": "0:06:43", "throughput": 3312.29, "total_tokens": 2052040}
{"current_steps": 5005, "total_steps": 8260, "loss": 0.2019, "lr": 2.01603097068043e-05, "epoch": 6.059322033898305, "percentage": 60.59, "elapsed_time": "0:10:20", "remaining_time": "0:06:43", "throughput": 3312.82, "total_tokens": 2054120}
{"current_steps": 5010, "total_steps": 8260, "loss": 0.176, "lr": 2.0108495100031364e-05, "epoch": 6.065375302663438, "percentage": 60.65, "elapsed_time": "0:10:20", "remaining_time": "0:06:42", "throughput": 3313.25, "total_tokens": 2056296}
{"current_steps": 5015, "total_steps": 8260, "loss": 0.1703, "lr": 2.0056702332470806e-05, "epoch": 6.071428571428571, "percentage": 60.71, "elapsed_time": "0:10:21", "remaining_time": "0:06:41", "throughput": 3313.61, "total_tokens": 2058408}
{"current_steps": 5020, "total_steps": 8260, "loss": 0.1831, "lr": 2.0004931635362982e-05, "epoch": 6.0774818401937045, "percentage": 60.77, "elapsed_time": "0:10:21", "remaining_time": "0:06:41", "throughput": 3313.88, "total_tokens": 2060456}
{"current_steps": 5025, "total_steps": 8260, "loss": 0.1483, "lr": 1.995318323984969e-05, "epoch": 6.083535108958838, "percentage": 60.84, "elapsed_time": "0:10:22", "remaining_time": "0:06:40", "throughput": 3314.35, "total_tokens": 2062408}
{"current_steps": 5030, "total_steps": 8260, "loss": 0.2151, "lr": 1.9901457376973143e-05, "epoch": 6.089588377723971, "percentage": 60.9, "elapsed_time": "0:10:22", "remaining_time": "0:06:39", "throughput": 3314.76, "total_tokens": 2064392}
{"current_steps": 5035, "total_steps": 8260, "loss": 0.1953, "lr": 1.9849754277674993e-05, "epoch": 6.095641646489105, "percentage": 60.96, "elapsed_time": "0:10:23", "remaining_time": "0:06:39", "throughput": 3315.0, "total_tokens": 2066504}
{"current_steps": 5040, "total_steps": 8260, "loss": 0.1774, "lr": 1.979807417279521e-05, "epoch": 6.101694915254237, "percentage": 61.02, "elapsed_time": "0:10:23", "remaining_time": "0:06:38", "throughput": 3315.42, "total_tokens": 2068584}
{"current_steps": 5045, "total_steps": 8260, "loss": 0.164, "lr": 1.974641729307115e-05, "epoch": 6.10774818401937, "percentage": 61.08, "elapsed_time": "0:10:24", "remaining_time": "0:06:37", "throughput": 3315.87, "total_tokens": 2070696}
{"current_steps": 5050, "total_steps": 8260, "loss": 0.2396, "lr": 1.9694783869136435e-05, "epoch": 6.113801452784504, "percentage": 61.14, "elapsed_time": "0:10:25", "remaining_time": "0:06:37", "throughput": 3316.41, "total_tokens": 2072872}
{"current_steps": 5055, "total_steps": 8260, "loss": 0.21, "lr": 1.9643174131519986e-05, "epoch": 6.119854721549637, "percentage": 61.2, "elapsed_time": "0:10:25", "remaining_time": "0:06:36", "throughput": 3316.74, "total_tokens": 2074824}
{"current_steps": 5060, "total_steps": 8260, "loss": 0.1762, "lr": 1.9591588310644967e-05, "epoch": 6.12590799031477, "percentage": 61.26, "elapsed_time": "0:10:26", "remaining_time": "0:06:35", "throughput": 3317.12, "total_tokens": 2076808}
{"current_steps": 5065, "total_steps": 8260, "loss": 0.2023, "lr": 1.9540026636827742e-05, "epoch": 6.131961259079903, "percentage": 61.32, "elapsed_time": "0:10:26", "remaining_time": "0:06:35", "throughput": 3317.49, "total_tokens": 2078856}
{"current_steps": 5070, "total_steps": 8260, "loss": 0.1588, "lr": 1.948848934027689e-05, "epoch": 6.138014527845036, "percentage": 61.38, "elapsed_time": "0:10:27", "remaining_time": "0:06:34", "throughput": 3317.81, "total_tokens": 2081096}
{"current_steps": 5075, "total_steps": 8260, "loss": 0.1746, "lr": 1.9436976651092144e-05, "epoch": 6.1440677966101696, "percentage": 61.44, "elapsed_time": "0:10:27", "remaining_time": "0:06:34", "throughput": 3318.08, "total_tokens": 2083144}
{"current_steps": 5080, "total_steps": 8260, "loss": 0.2136, "lr": 1.9385488799263372e-05, "epoch": 6.150121065375303, "percentage": 61.5, "elapsed_time": "0:10:28", "remaining_time": "0:06:33", "throughput": 3318.43, "total_tokens": 2085096}
{"current_steps": 5085, "total_steps": 8260, "loss": 0.1698, "lr": 1.9334026014669543e-05, "epoch": 6.156174334140436, "percentage": 61.56, "elapsed_time": "0:10:28", "remaining_time": "0:06:32", "throughput": 3318.86, "total_tokens": 2087272}
{"current_steps": 5090, "total_steps": 8260, "loss": 0.2021, "lr": 1.9282588527077715e-05, "epoch": 6.162227602905569, "percentage": 61.62, "elapsed_time": "0:10:29", "remaining_time": "0:06:31", "throughput": 3319.33, "total_tokens": 2089224}
{"current_steps": 5095, "total_steps": 8260, "loss": 0.1807, "lr": 1.9231176566142006e-05, "epoch": 6.168280871670702, "percentage": 61.68, "elapsed_time": "0:10:29", "remaining_time": "0:06:31", "throughput": 3319.79, "total_tokens": 2091432}
{"current_steps": 5100, "total_steps": 8260, "loss": 0.1756, "lr": 1.917979036140255e-05, "epoch": 6.174334140435835, "percentage": 61.74, "elapsed_time": "0:10:30", "remaining_time": "0:06:30", "throughput": 3320.11, "total_tokens": 2093384}
{"current_steps": 5105, "total_steps": 8260, "loss": 0.2143, "lr": 1.9128430142284503e-05, "epoch": 6.180387409200969, "percentage": 61.8, "elapsed_time": "0:10:31", "remaining_time": "0:06:30", "throughput": 3320.6, "total_tokens": 2095528}
{"current_steps": 5110, "total_steps": 8260, "loss": 0.1654, "lr": 1.9077096138096992e-05, "epoch": 6.186440677966102, "percentage": 61.86, "elapsed_time": "0:10:31", "remaining_time": "0:06:29", "throughput": 3320.92, "total_tokens": 2097544}
{"current_steps": 5115, "total_steps": 8260, "loss": 0.1657, "lr": 1.9025788578032113e-05, "epoch": 6.1924939467312345, "percentage": 61.92, "elapsed_time": "0:10:32", "remaining_time": "0:06:28", "throughput": 3321.26, "total_tokens": 2099592}
{"current_steps": 5120, "total_steps": 8260, "loss": 0.1897, "lr": 1.8974507691163867e-05, "epoch": 6.198547215496368, "percentage": 61.99, "elapsed_time": "0:10:32", "remaining_time": "0:06:27", "throughput": 3321.69, "total_tokens": 2101448}
{"current_steps": 5125, "total_steps": 8260, "loss": 0.2223, "lr": 1.892325370644721e-05, "epoch": 6.204600484261501, "percentage": 62.05, "elapsed_time": "0:10:33", "remaining_time": "0:06:27", "throughput": 3321.97, "total_tokens": 2103528}
{"current_steps": 5130, "total_steps": 8260, "loss": 0.1884, "lr": 1.8872026852716954e-05, "epoch": 6.210653753026635, "percentage": 62.11, "elapsed_time": "0:10:33", "remaining_time": "0:06:26", "throughput": 3322.28, "total_tokens": 2105640}
{"current_steps": 5135, "total_steps": 8260, "loss": 0.2368, "lr": 1.8820827358686793e-05, "epoch": 6.216707021791768, "percentage": 62.17, "elapsed_time": "0:10:34", "remaining_time": "0:06:26", "throughput": 3322.64, "total_tokens": 2107688}
{"current_steps": 5140, "total_steps": 8260, "loss": 0.1825, "lr": 1.8769655452948274e-05, "epoch": 6.2227602905569, "percentage": 62.23, "elapsed_time": "0:10:34", "remaining_time": "0:06:25", "throughput": 3322.94, "total_tokens": 2109608}
{"current_steps": 5145, "total_steps": 8260, "loss": 0.1863, "lr": 1.8718511363969733e-05, "epoch": 6.228813559322034, "percentage": 62.29, "elapsed_time": "0:10:35", "remaining_time": "0:06:24", "throughput": 3323.2, "total_tokens": 2111752}
{"current_steps": 5150, "total_steps": 8260, "loss": 0.2062, "lr": 1.8667395320095367e-05, "epoch": 6.234866828087167, "percentage": 62.35, "elapsed_time": "0:10:36", "remaining_time": "0:06:24", "throughput": 3323.61, "total_tokens": 2113928}
{"current_steps": 5155, "total_steps": 8260, "loss": 0.1589, "lr": 1.8616307549544113e-05, "epoch": 6.2409200968523, "percentage": 62.41, "elapsed_time": "0:10:36", "remaining_time": "0:06:23", "throughput": 3323.94, "total_tokens": 2115944}
{"current_steps": 5160, "total_steps": 8260, "loss": 0.1547, "lr": 1.85652482804087e-05, "epoch": 6.246973365617434, "percentage": 62.47, "elapsed_time": "0:10:37", "remaining_time": "0:06:22", "throughput": 3324.36, "total_tokens": 2118120}
{"current_steps": 5165, "total_steps": 8260, "loss": 0.1846, "lr": 1.85142177406546e-05, "epoch": 6.253026634382566, "percentage": 62.53, "elapsed_time": "0:10:37", "remaining_time": "0:06:22", "throughput": 3324.68, "total_tokens": 2120136}
{"current_steps": 5170, "total_steps": 8260, "loss": 0.1603, "lr": 1.8463216158119015e-05, "epoch": 6.2590799031477, "percentage": 62.59, "elapsed_time": "0:10:38", "remaining_time": "0:06:21", "throughput": 3324.9, "total_tokens": 2122184}
{"current_steps": 5175, "total_steps": 8260, "loss": 0.2089, "lr": 1.8412243760509867e-05, "epoch": 6.265133171912833, "percentage": 62.65, "elapsed_time": "0:10:38", "remaining_time": "0:06:20", "throughput": 3325.44, "total_tokens": 2124200}
{"current_steps": 5180, "total_steps": 8260, "loss": 0.202, "lr": 1.8361300775404765e-05, "epoch": 6.271186440677966, "percentage": 62.71, "elapsed_time": "0:10:39", "remaining_time": "0:06:20", "throughput": 3325.94, "total_tokens": 2126280}
{"current_steps": 5185, "total_steps": 8260, "loss": 0.1701, "lr": 1.8310387430250014e-05, "epoch": 6.2772397094431, "percentage": 62.77, "elapsed_time": "0:10:39", "remaining_time": "0:06:19", "throughput": 3326.24, "total_tokens": 2128360}
{"current_steps": 5190, "total_steps": 8260, "loss": 0.1865, "lr": 1.825950395235956e-05, "epoch": 6.283292978208232, "percentage": 62.83, "elapsed_time": "0:10:40", "remaining_time": "0:06:18", "throughput": 3326.72, "total_tokens": 2130248}
{"current_steps": 5195, "total_steps": 8260, "loss": 0.1991, "lr": 1.8208650568914033e-05, "epoch": 6.289346246973365, "percentage": 62.89, "elapsed_time": "0:10:40", "remaining_time": "0:06:18", "throughput": 3327.06, "total_tokens": 2132200}
{"current_steps": 5200, "total_steps": 8260, "loss": 0.176, "lr": 1.815782750695967e-05, "epoch": 6.295399515738499, "percentage": 62.95, "elapsed_time": "0:10:41", "remaining_time": "0:06:17", "throughput": 3327.48, "total_tokens": 2134216}
{"current_steps": 5205, "total_steps": 8260, "loss": 0.1944, "lr": 1.810703499340735e-05, "epoch": 6.301452784503632, "percentage": 63.01, "elapsed_time": "0:10:41", "remaining_time": "0:06:16", "throughput": 3327.84, "total_tokens": 2136392}
{"current_steps": 5210, "total_steps": 8260, "loss": 0.205, "lr": 1.8056273255031552e-05, "epoch": 6.3075060532687655, "percentage": 63.08, "elapsed_time": "0:10:42", "remaining_time": "0:06:16", "throughput": 3328.14, "total_tokens": 2138504}
{"current_steps": 5215, "total_steps": 8260, "loss": 0.2619, "lr": 1.8005542518469366e-05, "epoch": 6.313559322033898, "percentage": 63.14, "elapsed_time": "0:10:43", "remaining_time": "0:06:15", "throughput": 3328.47, "total_tokens": 2140616}
{"current_steps": 5220, "total_steps": 8260, "loss": 0.1474, "lr": 1.7954843010219446e-05, "epoch": 6.319612590799031, "percentage": 63.2, "elapsed_time": "0:10:43", "remaining_time": "0:06:14", "throughput": 3328.73, "total_tokens": 2142664}
{"current_steps": 5225, "total_steps": 8260, "loss": 0.1464, "lr": 1.790417495664103e-05, "epoch": 6.325665859564165, "percentage": 63.26, "elapsed_time": "0:10:44", "remaining_time": "0:06:14", "throughput": 3329.0, "total_tokens": 2144744}
{"current_steps": 5230, "total_steps": 8260, "loss": 0.204, "lr": 1.785353858395292e-05, "epoch": 6.331719128329298, "percentage": 63.32, "elapsed_time": "0:10:44", "remaining_time": "0:06:13", "throughput": 3329.31, "total_tokens": 2146760}
{"current_steps": 5235, "total_steps": 8260, "loss": 0.159, "lr": 1.7802934118232482e-05, "epoch": 6.337772397094431, "percentage": 63.38, "elapsed_time": "0:10:45", "remaining_time": "0:06:12", "throughput": 3329.79, "total_tokens": 2148904}
{"current_steps": 5240, "total_steps": 8260, "loss": 0.163, "lr": 1.775236178541461e-05, "epoch": 6.343825665859564, "percentage": 63.44, "elapsed_time": "0:10:45", "remaining_time": "0:06:12", "throughput": 3330.08, "total_tokens": 2151080}
{"current_steps": 5245, "total_steps": 8260, "loss": 0.1878, "lr": 1.7701821811290743e-05, "epoch": 6.349878934624697, "percentage": 63.5, "elapsed_time": "0:10:46", "remaining_time": "0:06:11", "throughput": 3330.39, "total_tokens": 2153032}
{"current_steps": 5250, "total_steps": 8260, "loss": 0.2443, "lr": 1.7651314421507843e-05, "epoch": 6.3559322033898304, "percentage": 63.56, "elapsed_time": "0:10:47", "remaining_time": "0:06:10", "throughput": 3330.74, "total_tokens": 2155080}
{"current_steps": 5255, "total_steps": 8260, "loss": 0.1695, "lr": 1.7600839841567395e-05, "epoch": 6.361985472154964, "percentage": 63.62, "elapsed_time": "0:10:47", "remaining_time": "0:06:10", "throughput": 3331.0, "total_tokens": 2157064}
{"current_steps": 5260, "total_steps": 8260, "loss": 0.1986, "lr": 1.7550398296824395e-05, "epoch": 6.368038740920097, "percentage": 63.68, "elapsed_time": "0:10:48", "remaining_time": "0:06:09", "throughput": 3331.35, "total_tokens": 2159208}
{"current_steps": 5265, "total_steps": 8260, "loss": 0.1556, "lr": 1.749999001248635e-05, "epoch": 6.37409200968523, "percentage": 63.74, "elapsed_time": "0:10:48", "remaining_time": "0:06:09", "throughput": 3331.63, "total_tokens": 2161384}
{"current_steps": 5270, "total_steps": 8260, "loss": 0.2112, "lr": 1.7449615213612264e-05, "epoch": 6.380145278450363, "percentage": 63.8, "elapsed_time": "0:10:49", "remaining_time": "0:06:08", "throughput": 3331.98, "total_tokens": 2163432}
{"current_steps": 5275, "total_steps": 8260, "loss": 0.2311, "lr": 1.7399274125111635e-05, "epoch": 6.386198547215496, "percentage": 63.86, "elapsed_time": "0:10:49", "remaining_time": "0:06:07", "throughput": 3332.51, "total_tokens": 2165640}
{"current_steps": 5280, "total_steps": 8260, "loss": 0.1996, "lr": 1.7348966971743465e-05, "epoch": 6.39225181598063, "percentage": 63.92, "elapsed_time": "0:10:50", "remaining_time": "0:06:07", "throughput": 3332.79, "total_tokens": 2167656}
{"current_steps": 5285, "total_steps": 8260, "loss": 0.1763, "lr": 1.729869397811523e-05, "epoch": 6.398305084745763, "percentage": 63.98, "elapsed_time": "0:10:50", "remaining_time": "0:06:06", "throughput": 3333.07, "total_tokens": 2169672}
{"current_steps": 5290, "total_steps": 8260, "loss": 0.1683, "lr": 1.72484553686819e-05, "epoch": 6.404358353510895, "percentage": 64.04, "elapsed_time": "0:10:51", "remaining_time": "0:06:05", "throughput": 3333.48, "total_tokens": 2171592}
{"current_steps": 5295, "total_steps": 8260, "loss": 0.1929, "lr": 1.719825136774494e-05, "epoch": 6.410411622276029, "percentage": 64.1, "elapsed_time": "0:10:51", "remaining_time": "0:06:05", "throughput": 3333.84, "total_tokens": 2173576}
{"current_steps": 5300, "total_steps": 8260, "loss": 0.1549, "lr": 1.714808219945129e-05, "epoch": 6.416464891041162, "percentage": 64.16, "elapsed_time": "0:10:52", "remaining_time": "0:06:04", "throughput": 3334.23, "total_tokens": 2175592}
{"current_steps": 5305, "total_steps": 8260, "loss": 0.191, "lr": 1.709794808779234e-05, "epoch": 6.4225181598062955, "percentage": 64.23, "elapsed_time": "0:10:53", "remaining_time": "0:06:03", "throughput": 3334.51, "total_tokens": 2177512}
{"current_steps": 5310, "total_steps": 8260, "loss": 0.2191, "lr": 1.704784925660301e-05, "epoch": 6.428571428571429, "percentage": 64.29, "elapsed_time": "0:10:53", "remaining_time": "0:06:03", "throughput": 3334.86, "total_tokens": 2179560}
{"current_steps": 5315, "total_steps": 8260, "loss": 0.1405, "lr": 1.699778592956069e-05, "epoch": 6.434624697336561, "percentage": 64.35, "elapsed_time": "0:10:54", "remaining_time": "0:06:02", "throughput": 3335.2, "total_tokens": 2181608}
{"current_steps": 5320, "total_steps": 8260, "loss": 0.1715, "lr": 1.6947758330184226e-05, "epoch": 6.440677966101695, "percentage": 64.41, "elapsed_time": "0:10:54", "remaining_time": "0:06:01", "throughput": 3335.44, "total_tokens": 2183656}
{"current_steps": 5325, "total_steps": 8260, "loss": 0.1884, "lr": 1.689776668183299e-05, "epoch": 6.446731234866828, "percentage": 64.47, "elapsed_time": "0:10:55", "remaining_time": "0:06:01", "throughput": 3335.84, "total_tokens": 2185576}
{"current_steps": 5330, "total_steps": 8260, "loss": 0.1525, "lr": 1.6847811207705813e-05, "epoch": 6.452784503631961, "percentage": 64.53, "elapsed_time": "0:10:55", "remaining_time": "0:06:00", "throughput": 3336.04, "total_tokens": 2187592}
{"current_steps": 5335, "total_steps": 8260, "loss": 0.182, "lr": 1.6797892130840036e-05, "epoch": 6.458837772397095, "percentage": 64.59, "elapsed_time": "0:10:56", "remaining_time": "0:05:59", "throughput": 3336.48, "total_tokens": 2189544}
{"current_steps": 5340, "total_steps": 8260, "loss": 0.1472, "lr": 1.6748009674110477e-05, "epoch": 6.464891041162228, "percentage": 64.65, "elapsed_time": "0:10:56", "remaining_time": "0:05:59", "throughput": 3336.7, "total_tokens": 2191496}
{"current_steps": 5345, "total_steps": 8260, "loss": 0.211, "lr": 1.669816406022848e-05, "epoch": 6.4709443099273605, "percentage": 64.71, "elapsed_time": "0:10:57", "remaining_time": "0:05:58", "throughput": 3337.08, "total_tokens": 2193416}
{"current_steps": 5350, "total_steps": 8260, "loss": 0.1873, "lr": 1.6648355511740876e-05, "epoch": 6.476997578692494, "percentage": 64.77, "elapsed_time": "0:10:57", "remaining_time": "0:05:57", "throughput": 3337.35, "total_tokens": 2195432}
{"current_steps": 5355, "total_steps": 8260, "loss": 0.1749, "lr": 1.659858425102902e-05, "epoch": 6.483050847457627, "percentage": 64.83, "elapsed_time": "0:10:58", "remaining_time": "0:05:57", "throughput": 3337.64, "total_tokens": 2197288}
{"current_steps": 5360, "total_steps": 8260, "loss": 0.1787, "lr": 1.6548850500307772e-05, "epoch": 6.4891041162227605, "percentage": 64.89, "elapsed_time": "0:10:58", "remaining_time": "0:05:56", "throughput": 3338.01, "total_tokens": 2199368}
{"current_steps": 5365, "total_steps": 8260, "loss": 0.1749, "lr": 1.649915448162455e-05, "epoch": 6.495157384987894, "percentage": 64.95, "elapsed_time": "0:10:59", "remaining_time": "0:05:55", "throughput": 3338.5, "total_tokens": 2201640}
{"current_steps": 5369, "total_steps": 8260, "eval_loss": 0.18574486672878265, "epoch": 6.5, "percentage": 65.0, "elapsed_time": "0:11:04", "remaining_time": "0:05:57", "throughput": 3315.34, "total_tokens": 2203208}
{"current_steps": 5370, "total_steps": 8260, "loss": 0.2153, "lr": 1.6449496416858284e-05, "epoch": 6.501210653753026, "percentage": 65.01, "elapsed_time": "0:11:05", "remaining_time": "0:05:58", "throughput": 3310.97, "total_tokens": 2203592}
{"current_steps": 5375, "total_steps": 8260, "loss": 0.1595, "lr": 1.6399876527718456e-05, "epoch": 6.50726392251816, "percentage": 65.07, "elapsed_time": "0:11:06", "remaining_time": "0:05:57", "throughput": 3311.29, "total_tokens": 2205640}
{"current_steps": 5380, "total_steps": 8260, "loss": 0.1957, "lr": 1.6350295035744094e-05, "epoch": 6.513317191283293, "percentage": 65.13, "elapsed_time": "0:11:06", "remaining_time": "0:05:56", "throughput": 3311.6, "total_tokens": 2207752}
{"current_steps": 5385, "total_steps": 8260, "loss": 0.1935, "lr": 1.6300752162302822e-05, "epoch": 6.519370460048426, "percentage": 65.19, "elapsed_time": "0:11:07", "remaining_time": "0:05:56", "throughput": 3312.0, "total_tokens": 2209864}
{"current_steps": 5390, "total_steps": 8260, "loss": 0.1946, "lr": 1.625124812858982e-05, "epoch": 6.52542372881356, "percentage": 65.25, "elapsed_time": "0:11:07", "remaining_time": "0:05:55", "throughput": 3312.34, "total_tokens": 2211944}
{"current_steps": 5395, "total_steps": 8260, "loss": 0.1929, "lr": 1.6201783155626862e-05, "epoch": 6.531476997578692, "percentage": 65.31, "elapsed_time": "0:11:08", "remaining_time": "0:05:54", "throughput": 3312.81, "total_tokens": 2213928}
{"current_steps": 5400, "total_steps": 8260, "loss": 0.1631, "lr": 1.615235746426133e-05, "epoch": 6.5375302663438255, "percentage": 65.38, "elapsed_time": "0:11:08", "remaining_time": "0:05:54", "throughput": 3313.22, "total_tokens": 2215944}
{"current_steps": 5405, "total_steps": 8260, "loss": 0.2223, "lr": 1.6102971275165228e-05, "epoch": 6.543583535108959, "percentage": 65.44, "elapsed_time": "0:11:09", "remaining_time": "0:05:53", "throughput": 3313.42, "total_tokens": 2217960}
{"current_steps": 5410, "total_steps": 8260, "loss": 0.2156, "lr": 1.6053624808834188e-05, "epoch": 6.549636803874092, "percentage": 65.5, "elapsed_time": "0:11:09", "remaining_time": "0:05:52", "throughput": 3313.73, "total_tokens": 2219976}
{"current_steps": 5415, "total_steps": 8260, "loss": 0.1829, "lr": 1.6004318285586497e-05, "epoch": 6.5556900726392255, "percentage": 65.56, "elapsed_time": "0:11:10", "remaining_time": "0:05:52", "throughput": 3314.02, "total_tokens": 2221992}
{"current_steps": 5420, "total_steps": 8260, "loss": 0.1856, "lr": 1.5955051925562092e-05, "epoch": 6.561743341404358, "percentage": 65.62, "elapsed_time": "0:11:10", "remaining_time": "0:05:51", "throughput": 3314.49, "total_tokens": 2223976}
{"current_steps": 5425, "total_steps": 8260, "loss": 0.1776, "lr": 1.590582594872162e-05, "epoch": 6.567796610169491, "percentage": 65.68, "elapsed_time": "0:11:11", "remaining_time": "0:05:50", "throughput": 3314.89, "total_tokens": 2226088}
{"current_steps": 5430, "total_steps": 8260, "loss": 0.1826, "lr": 1.585664057484539e-05, "epoch": 6.573849878934625, "percentage": 65.74, "elapsed_time": "0:11:12", "remaining_time": "0:05:50", "throughput": 3315.18, "total_tokens": 2228008}
{"current_steps": 5435, "total_steps": 8260, "loss": 0.2239, "lr": 1.5807496023532472e-05, "epoch": 6.579903147699758, "percentage": 65.8, "elapsed_time": "0:11:12", "remaining_time": "0:05:49", "throughput": 3315.47, "total_tokens": 2229928}
{"current_steps": 5440, "total_steps": 8260, "loss": 0.1769, "lr": 1.5758392514199644e-05, "epoch": 6.585956416464891, "percentage": 65.86, "elapsed_time": "0:11:13", "remaining_time": "0:05:48", "throughput": 3315.63, "total_tokens": 2231912}
{"current_steps": 5445, "total_steps": 8260, "loss": 0.1748, "lr": 1.5709330266080446e-05, "epoch": 6.592009685230024, "percentage": 65.92, "elapsed_time": "0:11:13", "remaining_time": "0:05:48", "throughput": 3315.92, "total_tokens": 2233992}
{"current_steps": 5450, "total_steps": 8260, "loss": 0.2281, "lr": 1.5660309498224225e-05, "epoch": 6.598062953995157, "percentage": 65.98, "elapsed_time": "0:11:14", "remaining_time": "0:05:47", "throughput": 3316.08, "total_tokens": 2235976}
{"current_steps": 5455, "total_steps": 8260, "loss": 0.1923, "lr": 1.5611330429495096e-05, "epoch": 6.6041162227602905, "percentage": 66.04, "elapsed_time": "0:11:14", "remaining_time": "0:05:47", "throughput": 3316.39, "total_tokens": 2238088}
{"current_steps": 5460, "total_steps": 8260, "loss": 0.1688, "lr": 1.556239327857101e-05, "epoch": 6.610169491525424, "percentage": 66.1, "elapsed_time": "0:11:15", "remaining_time": "0:05:46", "throughput": 3316.63, "total_tokens": 2240136}
{"current_steps": 5465, "total_steps": 8260, "loss": 0.1988, "lr": 1.551349826394278e-05, "epoch": 6.616222760290557, "percentage": 66.16, "elapsed_time": "0:11:15", "remaining_time": "0:05:45", "throughput": 3317.08, "total_tokens": 2242280}
{"current_steps": 5470, "total_steps": 8260, "loss": 0.2003, "lr": 1.5464645603913066e-05, "epoch": 6.622276029055691, "percentage": 66.22, "elapsed_time": "0:11:16", "remaining_time": "0:05:45", "throughput": 3317.54, "total_tokens": 2244360}
{"current_steps": 5475, "total_steps": 8260, "loss": 0.215, "lr": 1.5415835516595465e-05, "epoch": 6.628329297820823, "percentage": 66.28, "elapsed_time": "0:11:17", "remaining_time": "0:05:44", "throughput": 3317.94, "total_tokens": 2246536}
{"current_steps": 5480, "total_steps": 8260, "loss": 0.1892, "lr": 1.5367068219913456e-05, "epoch": 6.634382566585956, "percentage": 66.34, "elapsed_time": "0:11:17", "remaining_time": "0:05:43", "throughput": 3318.29, "total_tokens": 2248616}
{"current_steps": 5485, "total_steps": 8260, "loss": 0.1675, "lr": 1.5318343931599503e-05, "epoch": 6.64043583535109, "percentage": 66.4, "elapsed_time": "0:11:18", "remaining_time": "0:05:43", "throughput": 3318.52, "total_tokens": 2250664}
{"current_steps": 5490, "total_steps": 8260, "loss": 0.1784, "lr": 1.5269662869194036e-05, "epoch": 6.646489104116223, "percentage": 66.46, "elapsed_time": "0:11:18", "remaining_time": "0:05:42", "throughput": 3318.87, "total_tokens": 2252808}
{"current_steps": 5495, "total_steps": 8260, "loss": 0.1705, "lr": 1.5221025250044486e-05, "epoch": 6.652542372881356, "percentage": 66.53, "elapsed_time": "0:11:19", "remaining_time": "0:05:41", "throughput": 3319.26, "total_tokens": 2254984}
{"current_steps": 5500, "total_steps": 8260, "loss": 0.1668, "lr": 1.517243129130433e-05, "epoch": 6.658595641646489, "percentage": 66.59, "elapsed_time": "0:11:19", "remaining_time": "0:05:41", "throughput": 3319.53, "total_tokens": 2257064}
{"current_steps": 5505, "total_steps": 8260, "loss": 0.2541, "lr": 1.512388120993212e-05, "epoch": 6.664648910411622, "percentage": 66.65, "elapsed_time": "0:11:20", "remaining_time": "0:05:40", "throughput": 3319.86, "total_tokens": 2259112}
{"current_steps": 5510, "total_steps": 8260, "loss": 0.2176, "lr": 1.5075375222690496e-05, "epoch": 6.670702179176756, "percentage": 66.71, "elapsed_time": "0:11:21", "remaining_time": "0:05:39", "throughput": 3320.18, "total_tokens": 2261224}
{"current_steps": 5515, "total_steps": 8260, "loss": 0.1867, "lr": 1.5026913546145232e-05, "epoch": 6.676755447941889, "percentage": 66.77, "elapsed_time": "0:11:21", "remaining_time": "0:05:39", "throughput": 3320.56, "total_tokens": 2263400}
{"current_steps": 5520, "total_steps": 8260, "loss": 0.184, "lr": 1.4978496396664279e-05, "epoch": 6.682808716707022, "percentage": 66.83, "elapsed_time": "0:11:22", "remaining_time": "0:05:38", "throughput": 3320.86, "total_tokens": 2265416}
{"current_steps": 5525, "total_steps": 8260, "loss": 0.1918, "lr": 1.4930123990416766e-05, "epoch": 6.688861985472155, "percentage": 66.89, "elapsed_time": "0:11:22", "remaining_time": "0:05:37", "throughput": 3321.3, "total_tokens": 2267560}
{"current_steps": 5530, "total_steps": 8260, "loss": 0.2066, "lr": 1.4881796543372079e-05, "epoch": 6.694915254237288, "percentage": 66.95, "elapsed_time": "0:11:23", "remaining_time": "0:05:37", "throughput": 3321.55, "total_tokens": 2269544}
{"current_steps": 5535, "total_steps": 8260, "loss": 0.1883, "lr": 1.4833514271298859e-05, "epoch": 6.700968523002421, "percentage": 67.01, "elapsed_time": "0:11:23", "remaining_time": "0:05:36", "throughput": 3321.83, "total_tokens": 2271720}
{"current_steps": 5540, "total_steps": 8260, "loss": 0.1848, "lr": 1.4785277389764046e-05, "epoch": 6.707021791767555, "percentage": 67.07, "elapsed_time": "0:11:24", "remaining_time": "0:05:36", "throughput": 3322.23, "total_tokens": 2273928}
{"current_steps": 5545, "total_steps": 8260, "loss": 0.1819, "lr": 1.4737086114131943e-05, "epoch": 6.713075060532688, "percentage": 67.13, "elapsed_time": "0:11:24", "remaining_time": "0:05:35", "throughput": 3322.6, "total_tokens": 2275912}
{"current_steps": 5550, "total_steps": 8260, "loss": 0.1917, "lr": 1.4688940659563225e-05, "epoch": 6.719128329297821, "percentage": 67.19, "elapsed_time": "0:11:25", "remaining_time": "0:05:34", "throughput": 3322.91, "total_tokens": 2278024}
{"current_steps": 5555, "total_steps": 8260, "loss": 0.2033, "lr": 1.4640841241013995e-05, "epoch": 6.725181598062954, "percentage": 67.25, "elapsed_time": "0:11:26", "remaining_time": "0:05:34", "throughput": 3323.21, "total_tokens": 2280136}
{"current_steps": 5560, "total_steps": 8260, "loss": 0.2256, "lr": 1.4592788073234803e-05, "epoch": 6.731234866828087, "percentage": 67.31, "elapsed_time": "0:11:26", "remaining_time": "0:05:33", "throughput": 3323.52, "total_tokens": 2282248}
{"current_steps": 5565, "total_steps": 8260, "loss": 0.1968, "lr": 1.4544781370769723e-05, "epoch": 6.737288135593221, "percentage": 67.37, "elapsed_time": "0:11:27", "remaining_time": "0:05:32", "throughput": 3323.9, "total_tokens": 2284424}
{"current_steps": 5570, "total_steps": 8260, "loss": 0.2186, "lr": 1.4496821347955359e-05, "epoch": 6.743341404358354, "percentage": 67.43, "elapsed_time": "0:11:27", "remaining_time": "0:05:32", "throughput": 3324.28, "total_tokens": 2286344}
{"current_steps": 5575, "total_steps": 8260, "loss": 0.171, "lr": 1.444890821891991e-05, "epoch": 6.749394673123486, "percentage": 67.49, "elapsed_time": "0:11:28", "remaining_time": "0:05:31", "throughput": 3324.7, "total_tokens": 2288456}
{"current_steps": 5580, "total_steps": 8260, "loss": 0.2069, "lr": 1.4401042197582193e-05, "epoch": 6.75544794188862, "percentage": 67.55, "elapsed_time": "0:11:28", "remaining_time": "0:05:30", "throughput": 3325.11, "total_tokens": 2290312}
{"current_steps": 5585, "total_steps": 8260, "loss": 0.1838, "lr": 1.4353223497650731e-05, "epoch": 6.761501210653753, "percentage": 67.62, "elapsed_time": "0:11:29", "remaining_time": "0:05:30", "throughput": 3325.49, "total_tokens": 2292392}
{"current_steps": 5590, "total_steps": 8260, "loss": 0.1897, "lr": 1.4305452332622748e-05, "epoch": 6.767554479418886, "percentage": 67.68, "elapsed_time": "0:11:29", "remaining_time": "0:05:29", "throughput": 3325.82, "total_tokens": 2294472}
{"current_steps": 5595, "total_steps": 8260, "loss": 0.1664, "lr": 1.4257728915783244e-05, "epoch": 6.77360774818402, "percentage": 67.74, "elapsed_time": "0:11:30", "remaining_time": "0:05:28", "throughput": 3326.16, "total_tokens": 2296616}
{"current_steps": 5600, "total_steps": 8260, "loss": 0.1809, "lr": 1.4210053460204023e-05, "epoch": 6.779661016949152, "percentage": 67.8, "elapsed_time": "0:11:31", "remaining_time": "0:05:28", "throughput": 3326.61, "total_tokens": 2298856}
{"current_steps": 5605, "total_steps": 8260, "loss": 0.2011, "lr": 1.4162426178742788e-05, "epoch": 6.785714285714286, "percentage": 67.86, "elapsed_time": "0:11:31", "remaining_time": "0:05:27", "throughput": 3327.03, "total_tokens": 2301064}
{"current_steps": 5610, "total_steps": 8260, "loss": 0.1671, "lr": 1.4114847284042132e-05, "epoch": 6.791767554479419, "percentage": 67.92, "elapsed_time": "0:11:32", "remaining_time": "0:05:26", "throughput": 3327.44, "total_tokens": 2303208}
{"current_steps": 5615, "total_steps": 8260, "loss": 0.2777, "lr": 1.4067316988528617e-05, "epoch": 6.797820823244552, "percentage": 67.98, "elapsed_time": "0:11:32", "remaining_time": "0:05:26", "throughput": 3327.84, "total_tokens": 2305288}
{"current_steps": 5620, "total_steps": 8260, "loss": 0.1824, "lr": 1.4019835504411827e-05, "epoch": 6.803874092009686, "percentage": 68.04, "elapsed_time": "0:11:33", "remaining_time": "0:05:25", "throughput": 3328.03, "total_tokens": 2307304}
{"current_steps": 5625, "total_steps": 8260, "loss": 0.1314, "lr": 1.3972403043683419e-05, "epoch": 6.809927360774818, "percentage": 68.1, "elapsed_time": "0:11:33", "remaining_time": "0:05:25", "throughput": 3328.36, "total_tokens": 2309448}
{"current_steps": 5630, "total_steps": 8260, "loss": 0.215, "lr": 1.3925019818116164e-05, "epoch": 6.815980629539951, "percentage": 68.16, "elapsed_time": "0:11:34", "remaining_time": "0:05:24", "throughput": 3328.61, "total_tokens": 2311368}
{"current_steps": 5635, "total_steps": 8260, "loss": 0.2309, "lr": 1.387768603926302e-05, "epoch": 6.822033898305085, "percentage": 68.22, "elapsed_time": "0:11:34", "remaining_time": "0:05:23", "throughput": 3328.95, "total_tokens": 2313448}
{"current_steps": 5640, "total_steps": 8260, "loss": 0.2108, "lr": 1.383040191845619e-05, "epoch": 6.828087167070218, "percentage": 68.28, "elapsed_time": "0:11:35", "remaining_time": "0:05:23", "throughput": 3329.19, "total_tokens": 2315464}
{"current_steps": 5645, "total_steps": 8260, "loss": 0.1697, "lr": 1.378316766680615e-05, "epoch": 6.8341404358353515, "percentage": 68.34, "elapsed_time": "0:11:36", "remaining_time": "0:05:22", "throughput": 3329.58, "total_tokens": 2317480}
{"current_steps": 5650, "total_steps": 8260, "loss": 0.1941, "lr": 1.373598349520073e-05, "epoch": 6.840193704600484, "percentage": 68.4, "elapsed_time": "0:11:36", "remaining_time": "0:05:21", "throughput": 3329.88, "total_tokens": 2319432}
{"current_steps": 5655, "total_steps": 8260, "loss": 0.1739, "lr": 1.3688849614304164e-05, "epoch": 6.846246973365617, "percentage": 68.46, "elapsed_time": "0:11:37", "remaining_time": "0:05:21", "throughput": 3330.15, "total_tokens": 2321512}
{"current_steps": 5660, "total_steps": 8260, "loss": 0.2405, "lr": 1.3641766234556146e-05, "epoch": 6.852300242130751, "percentage": 68.52, "elapsed_time": "0:11:37", "remaining_time": "0:05:20", "throughput": 3330.45, "total_tokens": 2323560}
{"current_steps": 5665, "total_steps": 8260, "loss": 0.178, "lr": 1.3594733566170926e-05, "epoch": 6.858353510895884, "percentage": 68.58, "elapsed_time": "0:11:38", "remaining_time": "0:05:19", "throughput": 3330.91, "total_tokens": 2325576}
{"current_steps": 5670, "total_steps": 8260, "loss": 0.1976, "lr": 1.3547751819136309e-05, "epoch": 6.864406779661017, "percentage": 68.64, "elapsed_time": "0:11:38", "remaining_time": "0:05:19", "throughput": 3331.19, "total_tokens": 2327496}
{"current_steps": 5675, "total_steps": 8260, "loss": 0.1908, "lr": 1.350082120321276e-05, "epoch": 6.87046004842615, "percentage": 68.7, "elapsed_time": "0:11:39", "remaining_time": "0:05:18", "throughput": 3331.46, "total_tokens": 2329576}
{"current_steps": 5680, "total_steps": 8260, "loss": 0.1771, "lr": 1.3453941927932456e-05, "epoch": 6.876513317191283, "percentage": 68.77, "elapsed_time": "0:11:39", "remaining_time": "0:05:17", "throughput": 3331.61, "total_tokens": 2331656}
{"current_steps": 5685, "total_steps": 8260, "loss": 0.1961, "lr": 1.3407114202598369e-05, "epoch": 6.8825665859564165, "percentage": 68.83, "elapsed_time": "0:11:40", "remaining_time": "0:05:17", "throughput": 3331.94, "total_tokens": 2333800}
{"current_steps": 5690, "total_steps": 8260, "loss": 0.1343, "lr": 1.3360338236283295e-05, "epoch": 6.88861985472155, "percentage": 68.89, "elapsed_time": "0:11:41", "remaining_time": "0:05:16", "throughput": 3332.35, "total_tokens": 2336008}
{"current_steps": 5695, "total_steps": 8260, "loss": 0.2452, "lr": 1.3313614237828948e-05, "epoch": 6.894673123486683, "percentage": 68.95, "elapsed_time": "0:11:41", "remaining_time": "0:05:15", "throughput": 3332.61, "total_tokens": 2337928}
{"current_steps": 5700, "total_steps": 8260, "loss": 0.2005, "lr": 1.3266942415845018e-05, "epoch": 6.900726392251816, "percentage": 69.01, "elapsed_time": "0:11:42", "remaining_time": "0:05:15", "throughput": 3332.83, "total_tokens": 2340008}
{"current_steps": 5705, "total_steps": 8260, "loss": 0.2041, "lr": 1.3220322978708242e-05, "epoch": 6.906779661016949, "percentage": 69.07, "elapsed_time": "0:11:42", "remaining_time": "0:05:14", "throughput": 3333.09, "total_tokens": 2342024}
{"current_steps": 5710, "total_steps": 8260, "loss": 0.1744, "lr": 1.317375613456147e-05, "epoch": 6.912832929782082, "percentage": 69.13, "elapsed_time": "0:11:43", "remaining_time": "0:05:14", "throughput": 3333.4, "total_tokens": 2343976}
{"current_steps": 5715, "total_steps": 8260, "loss": 0.1741, "lr": 1.3127242091312752e-05, "epoch": 6.918886198547216, "percentage": 69.19, "elapsed_time": "0:11:43", "remaining_time": "0:05:13", "throughput": 3333.79, "total_tokens": 2345928}
{"current_steps": 5720, "total_steps": 8260, "loss": 0.1746, "lr": 1.3080781056634373e-05, "epoch": 6.924939467312349, "percentage": 69.25, "elapsed_time": "0:11:44", "remaining_time": "0:05:12", "throughput": 3334.02, "total_tokens": 2347976}
{"current_steps": 5725, "total_steps": 8260, "loss": 0.1981, "lr": 1.3034373237961983e-05, "epoch": 6.9309927360774815, "percentage": 69.31, "elapsed_time": "0:11:44", "remaining_time": "0:05:12", "throughput": 3334.28, "total_tokens": 2349992}
{"current_steps": 5730, "total_steps": 8260, "loss": 0.221, "lr": 1.2988018842493604e-05, "epoch": 6.937046004842615, "percentage": 69.37, "elapsed_time": "0:11:45", "remaining_time": "0:05:11", "throughput": 3334.6, "total_tokens": 2351976}
{"current_steps": 5735, "total_steps": 8260, "loss": 0.1947, "lr": 1.2941718077188758e-05, "epoch": 6.943099273607748, "percentage": 69.43, "elapsed_time": "0:11:45", "remaining_time": "0:05:10", "throughput": 3334.87, "total_tokens": 2353992}
{"current_steps": 5740, "total_steps": 8260, "loss": 0.1857, "lr": 1.2895471148767508e-05, "epoch": 6.9491525423728815, "percentage": 69.49, "elapsed_time": "0:11:46", "remaining_time": "0:05:10", "throughput": 3335.21, "total_tokens": 2355976}
{"current_steps": 5745, "total_steps": 8260, "loss": 0.1973, "lr": 1.2849278263709572e-05, "epoch": 6.955205811138015, "percentage": 69.55, "elapsed_time": "0:11:46", "remaining_time": "0:05:09", "throughput": 3335.57, "total_tokens": 2358152}
{"current_steps": 5750, "total_steps": 8260, "loss": 0.189, "lr": 1.2803139628253364e-05, "epoch": 6.961259079903147, "percentage": 69.61, "elapsed_time": "0:11:47", "remaining_time": "0:05:08", "throughput": 3335.89, "total_tokens": 2360040}
{"current_steps": 5755, "total_steps": 8260, "loss": 0.164, "lr": 1.2757055448395092e-05, "epoch": 6.967312348668281, "percentage": 69.67, "elapsed_time": "0:11:48", "remaining_time": "0:05:08", "throughput": 3336.16, "total_tokens": 2362056}
{"current_steps": 5760, "total_steps": 8260, "loss": 0.1774, "lr": 1.271102592988782e-05, "epoch": 6.973365617433414, "percentage": 69.73, "elapsed_time": "0:11:48", "remaining_time": "0:05:07", "throughput": 3336.56, "total_tokens": 2364008}
{"current_steps": 5765, "total_steps": 8260, "loss": 0.1558, "lr": 1.2665051278240602e-05, "epoch": 6.979418886198547, "percentage": 69.79, "elapsed_time": "0:11:49", "remaining_time": "0:05:06", "throughput": 3336.79, "total_tokens": 2366056}
{"current_steps": 5770, "total_steps": 8260, "loss": 0.1274, "lr": 1.2619131698717504e-05, "epoch": 6.985472154963681, "percentage": 69.85, "elapsed_time": "0:11:49", "remaining_time": "0:05:06", "throughput": 3337.06, "total_tokens": 2368072}
{"current_steps": 5775, "total_steps": 8260, "loss": 0.1914, "lr": 1.2573267396336686e-05, "epoch": 6.991525423728813, "percentage": 69.92, "elapsed_time": "0:11:50", "remaining_time": "0:05:05", "throughput": 3337.36, "total_tokens": 2370024}
{"current_steps": 5780, "total_steps": 8260, "loss": 0.1559, "lr": 1.2527458575869539e-05, "epoch": 6.9975786924939465, "percentage": 69.98, "elapsed_time": "0:11:50", "remaining_time": "0:05:04", "throughput": 3337.61, "total_tokens": 2372008}
{"current_steps": 5782, "total_steps": 8260, "eval_loss": 0.18673697113990784, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:11:55", "remaining_time": "0:05:06", "throughput": 3315.46, "total_tokens": 2372464}
{"current_steps": 5785, "total_steps": 8260, "loss": 0.1858, "lr": 1.2481705441839756e-05, "epoch": 7.00363196125908, "percentage": 70.04, "elapsed_time": "0:11:56", "remaining_time": "0:05:06", "throughput": 3310.92, "total_tokens": 2373648}
{"current_steps": 5790, "total_steps": 8260, "loss": 0.1959, "lr": 1.2436008198522376e-05, "epoch": 7.009685230024213, "percentage": 70.1, "elapsed_time": "0:11:57", "remaining_time": "0:05:06", "throughput": 3311.14, "total_tokens": 2375696}
{"current_steps": 5795, "total_steps": 8260, "loss": 0.2061, "lr": 1.2390367049942916e-05, "epoch": 7.0157384987893465, "percentage": 70.16, "elapsed_time": "0:11:58", "remaining_time": "0:05:05", "throughput": 3311.5, "total_tokens": 2377872}
{"current_steps": 5800, "total_steps": 8260, "loss": 0.1737, "lr": 1.2344782199876431e-05, "epoch": 7.021791767554479, "percentage": 70.22, "elapsed_time": "0:11:58", "remaining_time": "0:05:04", "throughput": 3311.77, "total_tokens": 2379792}
{"current_steps": 5805, "total_steps": 8260, "loss": 0.1678, "lr": 1.2299253851846651e-05, "epoch": 7.027845036319612, "percentage": 70.28, "elapsed_time": "0:11:59", "remaining_time": "0:05:04", "throughput": 3311.97, "total_tokens": 2381744}
{"current_steps": 5810, "total_steps": 8260, "loss": 0.1616, "lr": 1.2253782209125012e-05, "epoch": 7.033898305084746, "percentage": 70.34, "elapsed_time": "0:11:59", "remaining_time": "0:05:03", "throughput": 3312.35, "total_tokens": 2383664}
{"current_steps": 5815, "total_steps": 8260, "loss": 0.1728, "lr": 1.220836747472978e-05, "epoch": 7.039951573849879, "percentage": 70.4, "elapsed_time": "0:12:00", "remaining_time": "0:05:02", "throughput": 3312.71, "total_tokens": 2385744}
{"current_steps": 5820, "total_steps": 8260, "loss": 0.2317, "lr": 1.2163009851425137e-05, "epoch": 7.046004842615012, "percentage": 70.46, "elapsed_time": "0:12:00", "remaining_time": "0:05:02", "throughput": 3313.04, "total_tokens": 2387920}
{"current_steps": 5825, "total_steps": 8260, "loss": 0.1761, "lr": 1.2117709541720306e-05, "epoch": 7.052058111380146, "percentage": 70.52, "elapsed_time": "0:12:01", "remaining_time": "0:05:01", "throughput": 3313.3, "total_tokens": 2390000}
{"current_steps": 5830, "total_steps": 8260, "loss": 0.2308, "lr": 1.2072466747868597e-05, "epoch": 7.058111380145278, "percentage": 70.58, "elapsed_time": "0:12:01", "remaining_time": "0:05:00", "throughput": 3313.58, "total_tokens": 2392016}
{"current_steps": 5835, "total_steps": 8260, "loss": 0.149, "lr": 1.2027281671866531e-05, "epoch": 7.0641646489104115, "percentage": 70.64, "elapsed_time": "0:12:02", "remaining_time": "0:05:00", "throughput": 3313.85, "total_tokens": 2394032}
{"current_steps": 5840, "total_steps": 8260, "loss": 0.1806, "lr": 1.198215451545293e-05, "epoch": 7.070217917675545, "percentage": 70.7, "elapsed_time": "0:12:02", "remaining_time": "0:04:59", "throughput": 3314.24, "total_tokens": 2395888}
{"current_steps": 5845, "total_steps": 8260, "loss": 0.1666, "lr": 1.193708548010804e-05, "epoch": 7.076271186440678, "percentage": 70.76, "elapsed_time": "0:12:03", "remaining_time": "0:04:58", "throughput": 3314.57, "total_tokens": 2398032}
{"current_steps": 5850, "total_steps": 8260, "loss": 0.1835, "lr": 1.1892074767052611e-05, "epoch": 7.0823244552058116, "percentage": 70.82, "elapsed_time": "0:12:04", "remaining_time": "0:04:58", "throughput": 3314.82, "total_tokens": 2400016}
{"current_steps": 5855, "total_steps": 8260, "loss": 0.1666, "lr": 1.1847122577246964e-05, "epoch": 7.088377723970944, "percentage": 70.88, "elapsed_time": "0:12:04", "remaining_time": "0:04:57", "throughput": 3315.1, "total_tokens": 2402032}
{"current_steps": 5860, "total_steps": 8260, "loss": 0.1678, "lr": 1.1802229111390157e-05, "epoch": 7.094430992736077, "percentage": 70.94, "elapsed_time": "0:12:05", "remaining_time": "0:04:56", "throughput": 3315.48, "total_tokens": 2404048}
{"current_steps": 5865, "total_steps": 8260, "loss": 0.2043, "lr": 1.175739456991908e-05, "epoch": 7.100484261501211, "percentage": 71.0, "elapsed_time": "0:12:05", "remaining_time": "0:04:56", "throughput": 3315.82, "total_tokens": 2406128}
{"current_steps": 5870, "total_steps": 8260, "loss": 0.2071, "lr": 1.1712619153007517e-05, "epoch": 7.106537530266344, "percentage": 71.07, "elapsed_time": "0:12:06", "remaining_time": "0:04:55", "throughput": 3316.15, "total_tokens": 2408112}
{"current_steps": 5875, "total_steps": 8260, "loss": 0.1959, "lr": 1.166790306056528e-05, "epoch": 7.112590799031477, "percentage": 71.13, "elapsed_time": "0:12:06", "remaining_time": "0:04:55", "throughput": 3316.37, "total_tokens": 2410160}
{"current_steps": 5880, "total_steps": 8260, "loss": 0.2073, "lr": 1.1623246492237305e-05, "epoch": 7.11864406779661, "percentage": 71.19, "elapsed_time": "0:12:07", "remaining_time": "0:04:54", "throughput": 3316.69, "total_tokens": 2412112}
{"current_steps": 5885, "total_steps": 8260, "loss": 0.1913, "lr": 1.1578649647402806e-05, "epoch": 7.124697336561743, "percentage": 71.25, "elapsed_time": "0:12:07", "remaining_time": "0:04:53", "throughput": 3317.06, "total_tokens": 2414224}
{"current_steps": 5890, "total_steps": 8260, "loss": 0.1847, "lr": 1.1534112725174306e-05, "epoch": 7.1307506053268765, "percentage": 71.31, "elapsed_time": "0:12:08", "remaining_time": "0:04:53", "throughput": 3317.46, "total_tokens": 2416272}
{"current_steps": 5895, "total_steps": 8260, "loss": 0.2023, "lr": 1.1489635924396817e-05, "epoch": 7.13680387409201, "percentage": 71.37, "elapsed_time": "0:12:08", "remaining_time": "0:04:52", "throughput": 3317.66, "total_tokens": 2418224}
{"current_steps": 5900, "total_steps": 8260, "loss": 0.1785, "lr": 1.1445219443646896e-05, "epoch": 7.142857142857143, "percentage": 71.43, "elapsed_time": "0:12:09", "remaining_time": "0:04:51", "throughput": 3317.94, "total_tokens": 2420336}
{"current_steps": 5905, "total_steps": 8260, "loss": 0.2247, "lr": 1.1400863481231833e-05, "epoch": 7.148910411622276, "percentage": 71.49, "elapsed_time": "0:12:10", "remaining_time": "0:04:51", "throughput": 3318.11, "total_tokens": 2422288}
{"current_steps": 5910, "total_steps": 8260, "loss": 0.1844, "lr": 1.1356568235188682e-05, "epoch": 7.154963680387409, "percentage": 71.55, "elapsed_time": "0:12:10", "remaining_time": "0:04:50", "throughput": 3318.5, "total_tokens": 2424400}
{"current_steps": 5915, "total_steps": 8260, "loss": 0.2062, "lr": 1.1312333903283435e-05, "epoch": 7.161016949152542, "percentage": 71.61, "elapsed_time": "0:12:11", "remaining_time": "0:04:49", "throughput": 3318.83, "total_tokens": 2426384}
{"current_steps": 5920, "total_steps": 8260, "loss": 0.1981, "lr": 1.1268160683010096e-05, "epoch": 7.167070217917676, "percentage": 71.67, "elapsed_time": "0:12:11", "remaining_time": "0:04:49", "throughput": 3319.21, "total_tokens": 2428400}
{"current_steps": 5925, "total_steps": 8260, "loss": 0.1722, "lr": 1.122404877158986e-05, "epoch": 7.173123486682809, "percentage": 71.73, "elapsed_time": "0:12:12", "remaining_time": "0:04:48", "throughput": 3319.49, "total_tokens": 2430416}
{"current_steps": 5930, "total_steps": 8260, "loss": 0.1994, "lr": 1.1179998365970174e-05, "epoch": 7.1791767554479415, "percentage": 71.79, "elapsed_time": "0:12:12", "remaining_time": "0:04:47", "throughput": 3319.77, "total_tokens": 2432464}
{"current_steps": 5935, "total_steps": 8260, "loss": 0.1742, "lr": 1.113600966282386e-05, "epoch": 7.185230024213075, "percentage": 71.85, "elapsed_time": "0:12:13", "remaining_time": "0:04:47", "throughput": 3320.04, "total_tokens": 2434480}
{"current_steps": 5940, "total_steps": 8260, "loss": 0.1853, "lr": 1.1092082858548275e-05, "epoch": 7.191283292978208, "percentage": 71.91, "elapsed_time": "0:12:13", "remaining_time": "0:04:46", "throughput": 3320.31, "total_tokens": 2436496}
{"current_steps": 5945, "total_steps": 8260, "loss": 0.1726, "lr": 1.1048218149264434e-05, "epoch": 7.197336561743342, "percentage": 71.97, "elapsed_time": "0:12:14", "remaining_time": "0:04:45", "throughput": 3320.62, "total_tokens": 2438544}
{"current_steps": 5950, "total_steps": 8260, "loss": 0.184, "lr": 1.1004415730816083e-05, "epoch": 7.203389830508475, "percentage": 72.03, "elapsed_time": "0:12:14", "remaining_time": "0:04:45", "throughput": 3320.96, "total_tokens": 2440528}
{"current_steps": 5955, "total_steps": 8260, "loss": 0.1833, "lr": 1.0960675798768871e-05, "epoch": 7.209443099273607, "percentage": 72.09, "elapsed_time": "0:12:15", "remaining_time": "0:04:44", "throughput": 3321.27, "total_tokens": 2442576}
{"current_steps": 5960, "total_steps": 8260, "loss": 0.1799, "lr": 1.0916998548409449e-05, "epoch": 7.215496368038741, "percentage": 72.15, "elapsed_time": "0:12:15", "remaining_time": "0:04:44", "throughput": 3321.6, "total_tokens": 2444560}
{"current_steps": 5965, "total_steps": 8260, "loss": 0.2, "lr": 1.0873384174744641e-05, "epoch": 7.221549636803874, "percentage": 72.22, "elapsed_time": "0:12:16", "remaining_time": "0:04:43", "throughput": 3321.92, "total_tokens": 2446704}
{"current_steps": 5970, "total_steps": 8260, "loss": 0.187, "lr": 1.0829832872500523e-05, "epoch": 7.227602905569007, "percentage": 72.28, "elapsed_time": "0:12:17", "remaining_time": "0:04:42", "throughput": 3322.21, "total_tokens": 2448720}
{"current_steps": 5975, "total_steps": 8260, "loss": 0.1752, "lr": 1.078634483612157e-05, "epoch": 7.233656174334141, "percentage": 72.34, "elapsed_time": "0:12:17", "remaining_time": "0:04:42", "throughput": 3322.48, "total_tokens": 2450832}
{"current_steps": 5980, "total_steps": 8260, "loss": 0.1891, "lr": 1.0742920259769792e-05, "epoch": 7.239709443099273, "percentage": 72.4, "elapsed_time": "0:12:18", "remaining_time": "0:04:41", "throughput": 3322.72, "total_tokens": 2452752}
{"current_steps": 5985, "total_steps": 8260, "loss": 0.1957, "lr": 1.06995593373239e-05, "epoch": 7.245762711864407, "percentage": 72.46, "elapsed_time": "0:12:18", "remaining_time": "0:04:40", "throughput": 3323.14, "total_tokens": 2454832}
{"current_steps": 5990, "total_steps": 8260, "loss": 0.1824, "lr": 1.0656262262378367e-05, "epoch": 7.25181598062954, "percentage": 72.52, "elapsed_time": "0:12:19", "remaining_time": "0:04:40", "throughput": 3323.4, "total_tokens": 2456848}
{"current_steps": 5995, "total_steps": 8260, "loss": 0.2161, "lr": 1.0613029228242627e-05, "epoch": 7.257869249394673, "percentage": 72.58, "elapsed_time": "0:12:19", "remaining_time": "0:04:39", "throughput": 3323.81, "total_tokens": 2459024}
{"current_steps": 6000, "total_steps": 8260, "loss": 0.1906, "lr": 1.0569860427940179e-05, "epoch": 7.263922518159807, "percentage": 72.64, "elapsed_time": "0:12:20", "remaining_time": "0:04:38", "throughput": 3324.03, "total_tokens": 2461168}
{"current_steps": 6005, "total_steps": 8260, "loss": 0.2011, "lr": 1.0526756054207737e-05, "epoch": 7.269975786924939, "percentage": 72.7, "elapsed_time": "0:12:20", "remaining_time": "0:04:38", "throughput": 3324.42, "total_tokens": 2463120}
{"current_steps": 6010, "total_steps": 8260, "loss": 0.2036, "lr": 1.0483716299494392e-05, "epoch": 7.276029055690072, "percentage": 72.76, "elapsed_time": "0:12:21", "remaining_time": "0:04:37", "throughput": 3324.71, "total_tokens": 2465168}
{"current_steps": 6015, "total_steps": 8260, "loss": 0.2296, "lr": 1.044074135596069e-05, "epoch": 7.282082324455206, "percentage": 72.82, "elapsed_time": "0:12:22", "remaining_time": "0:04:36", "throughput": 3324.96, "total_tokens": 2467248}
{"current_steps": 6020, "total_steps": 8260, "loss": 0.1758, "lr": 1.0397831415477823e-05, "epoch": 7.288135593220339, "percentage": 72.88, "elapsed_time": "0:12:22", "remaining_time": "0:04:36", "throughput": 3325.35, "total_tokens": 2469200}
{"current_steps": 6025, "total_steps": 8260, "loss": 0.1775, "lr": 1.0354986669626796e-05, "epoch": 7.2941888619854724, "percentage": 72.94, "elapsed_time": "0:12:23", "remaining_time": "0:04:35", "throughput": 3325.63, "total_tokens": 2471312}
{"current_steps": 6030, "total_steps": 8260, "loss": 0.1724, "lr": 1.0312207309697502e-05, "epoch": 7.300242130750606, "percentage": 73.0, "elapsed_time": "0:12:23", "remaining_time": "0:04:35", "throughput": 3325.89, "total_tokens": 2473424}
{"current_steps": 6035, "total_steps": 8260, "loss": 0.1889, "lr": 1.0269493526687915e-05, "epoch": 7.306295399515738, "percentage": 73.06, "elapsed_time": "0:12:24", "remaining_time": "0:04:34", "throughput": 3326.31, "total_tokens": 2475504}
{"current_steps": 6040, "total_steps": 8260, "loss": 0.1817, "lr": 1.0226845511303219e-05, "epoch": 7.312348668280872, "percentage": 73.12, "elapsed_time": "0:12:24", "remaining_time": "0:04:33", "throughput": 3326.55, "total_tokens": 2477392}
{"current_steps": 6045, "total_steps": 8260, "loss": 0.1838, "lr": 1.0184263453954988e-05, "epoch": 7.318401937046005, "percentage": 73.18, "elapsed_time": "0:12:25", "remaining_time": "0:04:33", "throughput": 3326.92, "total_tokens": 2479504}
{"current_steps": 6050, "total_steps": 8260, "loss": 0.1756, "lr": 1.0141747544760285e-05, "epoch": 7.324455205811138, "percentage": 73.24, "elapsed_time": "0:12:25", "remaining_time": "0:04:32", "throughput": 3327.17, "total_tokens": 2481584}
{"current_steps": 6055, "total_steps": 8260, "loss": 0.212, "lr": 1.0099297973540852e-05, "epoch": 7.330508474576272, "percentage": 73.31, "elapsed_time": "0:12:26", "remaining_time": "0:04:31", "throughput": 3327.38, "total_tokens": 2483632}
{"current_steps": 6060, "total_steps": 8260, "loss": 0.1985, "lr": 1.0056914929822248e-05, "epoch": 7.336561743341404, "percentage": 73.37, "elapsed_time": "0:12:26", "remaining_time": "0:04:31", "throughput": 3327.57, "total_tokens": 2485680}
{"current_steps": 6065, "total_steps": 8260, "loss": 0.205, "lr": 1.0014598602832995e-05, "epoch": 7.342615012106537, "percentage": 73.43, "elapsed_time": "0:12:27", "remaining_time": "0:04:30", "throughput": 3327.98, "total_tokens": 2487824}
{"current_steps": 6070, "total_steps": 8260, "loss": 0.2085, "lr": 9.972349181503773e-06, "epoch": 7.348668280871671, "percentage": 73.49, "elapsed_time": "0:12:28", "remaining_time": "0:04:29", "throughput": 3328.27, "total_tokens": 2489872}
{"current_steps": 6075, "total_steps": 8260, "loss": 0.1719, "lr": 9.930166854466516e-06, "epoch": 7.354721549636804, "percentage": 73.55, "elapsed_time": "0:12:28", "remaining_time": "0:04:29", "throughput": 3328.53, "total_tokens": 2491888}
{"current_steps": 6080, "total_steps": 8260, "loss": 0.1991, "lr": 9.888051810053617e-06, "epoch": 7.3607748184019375, "percentage": 73.61, "elapsed_time": "0:12:29", "remaining_time": "0:04:28", "throughput": 3328.87, "total_tokens": 2493968}
{"current_steps": 6085, "total_steps": 8260, "loss": 0.1729, "lr": 9.846004236297052e-06, "epoch": 7.36682808716707, "percentage": 73.67, "elapsed_time": "0:12:29", "remaining_time": "0:04:27", "throughput": 3329.23, "total_tokens": 2495920}
{"current_steps": 6090, "total_steps": 8260, "loss": 0.2109, "lr": 9.804024320927604e-06, "epoch": 7.372881355932203, "percentage": 73.73, "elapsed_time": "0:12:30", "remaining_time": "0:04:27", "throughput": 3329.47, "total_tokens": 2498000}
{"current_steps": 6095, "total_steps": 8260, "loss": 0.1669, "lr": 9.76211225137392e-06, "epoch": 7.378934624697337, "percentage": 73.79, "elapsed_time": "0:12:30", "remaining_time": "0:04:26", "throughput": 3329.85, "total_tokens": 2499952}
{"current_steps": 6100, "total_steps": 8260, "loss": 0.2078, "lr": 9.720268214761763e-06, "epoch": 7.38498789346247, "percentage": 73.85, "elapsed_time": "0:12:31", "remaining_time": "0:04:26", "throughput": 3330.19, "total_tokens": 2501968}
{"current_steps": 6105, "total_steps": 8260, "loss": 0.1782, "lr": 9.678492397913167e-06, "epoch": 7.391041162227603, "percentage": 73.91, "elapsed_time": "0:12:31", "remaining_time": "0:04:25", "throughput": 3330.45, "total_tokens": 2503984}
{"current_steps": 6110, "total_steps": 8260, "loss": 0.1821, "lr": 9.636784987345554e-06, "epoch": 7.397094430992736, "percentage": 73.97, "elapsed_time": "0:12:32", "remaining_time": "0:04:24", "throughput": 3330.67, "total_tokens": 2505968}
{"current_steps": 6115, "total_steps": 8260, "loss": 0.1701, "lr": 9.595146169270944e-06, "epoch": 7.403147699757869, "percentage": 74.03, "elapsed_time": "0:12:32", "remaining_time": "0:04:24", "throughput": 3330.95, "total_tokens": 2508080}
{"current_steps": 6120, "total_steps": 8260, "loss": 0.213, "lr": 9.553576129595101e-06, "epoch": 7.4092009685230025, "percentage": 74.09, "elapsed_time": "0:12:33", "remaining_time": "0:04:23", "throughput": 3331.28, "total_tokens": 2510160}
{"current_steps": 6125, "total_steps": 8260, "loss": 0.1625, "lr": 9.512075053916735e-06, "epoch": 7.415254237288136, "percentage": 74.15, "elapsed_time": "0:12:34", "remaining_time": "0:04:22", "throughput": 3331.49, "total_tokens": 2512304}
{"current_steps": 6130, "total_steps": 8260, "loss": 0.2039, "lr": 9.470643127526627e-06, "epoch": 7.421307506053269, "percentage": 74.21, "elapsed_time": "0:12:34", "remaining_time": "0:04:22", "throughput": 3331.8, "total_tokens": 2514544}
{"current_steps": 6135, "total_steps": 8260, "loss": 0.156, "lr": 9.429280535406834e-06, "epoch": 7.427360774818402, "percentage": 74.27, "elapsed_time": "0:12:35", "remaining_time": "0:04:21", "throughput": 3332.09, "total_tokens": 2516592}
{"current_steps": 6140, "total_steps": 8260, "loss": 0.2241, "lr": 9.387987462229859e-06, "epoch": 7.433414043583535, "percentage": 74.33, "elapsed_time": "0:12:35", "remaining_time": "0:04:20", "throughput": 3332.42, "total_tokens": 2518512}
{"current_steps": 6145, "total_steps": 8260, "loss": 0.1951, "lr": 9.346764092357801e-06, "epoch": 7.439467312348668, "percentage": 74.39, "elapsed_time": "0:12:36", "remaining_time": "0:04:20", "throughput": 3332.65, "total_tokens": 2520592}
{"current_steps": 6150, "total_steps": 8260, "loss": 0.2072, "lr": 9.305610609841598e-06, "epoch": 7.445520581113802, "percentage": 74.46, "elapsed_time": "0:12:36", "remaining_time": "0:04:19", "throughput": 3332.95, "total_tokens": 2522640}
{"current_steps": 6155, "total_steps": 8260, "loss": 0.1907, "lr": 9.264527198420117e-06, "epoch": 7.451573849878935, "percentage": 74.52, "elapsed_time": "0:12:37", "remaining_time": "0:04:19", "throughput": 3333.12, "total_tokens": 2524592}
{"current_steps": 6160, "total_steps": 8260, "loss": 0.1754, "lr": 9.2235140415194e-06, "epoch": 7.4576271186440675, "percentage": 74.58, "elapsed_time": "0:12:37", "remaining_time": "0:04:18", "throughput": 3333.34, "total_tokens": 2526576}
{"current_steps": 6165, "total_steps": 8260, "loss": 0.1642, "lr": 9.182571322251796e-06, "epoch": 7.463680387409201, "percentage": 74.64, "elapsed_time": "0:12:38", "remaining_time": "0:04:17", "throughput": 3333.56, "total_tokens": 2528656}
{"current_steps": 6170, "total_steps": 8260, "loss": 0.1789, "lr": 9.141699223415221e-06, "epoch": 7.469733656174334, "percentage": 74.7, "elapsed_time": "0:12:39", "remaining_time": "0:04:17", "throughput": 3333.93, "total_tokens": 2530864}
{"current_steps": 6175, "total_steps": 8260, "loss": 0.2073, "lr": 9.10089792749223e-06, "epoch": 7.4757869249394675, "percentage": 74.76, "elapsed_time": "0:12:39", "remaining_time": "0:04:16", "throughput": 3334.19, "total_tokens": 2532880}
{"current_steps": 6180, "total_steps": 8260, "loss": 0.1787, "lr": 9.06016761664929e-06, "epoch": 7.481840193704601, "percentage": 74.82, "elapsed_time": "0:12:40", "remaining_time": "0:04:15", "throughput": 3334.5, "total_tokens": 2534864}
{"current_steps": 6185, "total_steps": 8260, "loss": 0.1829, "lr": 9.019508472735958e-06, "epoch": 7.487893462469733, "percentage": 74.88, "elapsed_time": "0:12:40", "remaining_time": "0:04:15", "throughput": 3334.7, "total_tokens": 2536912}
{"current_steps": 6190, "total_steps": 8260, "loss": 0.1702, "lr": 8.978920677284022e-06, "epoch": 7.493946731234867, "percentage": 74.94, "elapsed_time": "0:12:41", "remaining_time": "0:04:14", "throughput": 3334.94, "total_tokens": 2538832}
{"current_steps": 6195, "total_steps": 8260, "loss": 0.1646, "lr": 8.938404411506732e-06, "epoch": 7.5, "percentage": 75.0, "elapsed_time": "0:12:41", "remaining_time": "0:04:13", "throughput": 3335.32, "total_tokens": 2540880}
{"current_steps": 6195, "total_steps": 8260, "eval_loss": 0.18596947193145752, "epoch": 7.5, "percentage": 75.0, "elapsed_time": "0:12:46", "remaining_time": "0:04:15", "throughput": 3315.09, "total_tokens": 2540880}
{"current_steps": 6200, "total_steps": 8260, "loss": 0.1683, "lr": 8.897959856297971e-06, "epoch": 7.506053268765133, "percentage": 75.06, "elapsed_time": "0:12:47", "remaining_time": "0:04:15", "throughput": 3311.46, "total_tokens": 2542832}
{"current_steps": 6205, "total_steps": 8260, "loss": 0.2169, "lr": 8.857587192231452e-06, "epoch": 7.512106537530267, "percentage": 75.12, "elapsed_time": "0:12:48", "remaining_time": "0:04:14", "throughput": 3311.73, "total_tokens": 2544784}
{"current_steps": 6210, "total_steps": 8260, "loss": 0.2199, "lr": 8.817286599559932e-06, "epoch": 7.518159806295399, "percentage": 75.18, "elapsed_time": "0:12:48", "remaining_time": "0:04:13", "throughput": 3312.01, "total_tokens": 2546832}
{"current_steps": 6215, "total_steps": 8260, "loss": 0.1622, "lr": 8.777058258214377e-06, "epoch": 7.5242130750605325, "percentage": 75.24, "elapsed_time": "0:12:49", "remaining_time": "0:04:13", "throughput": 3312.26, "total_tokens": 2549008}
{"current_steps": 6220, "total_steps": 8260, "loss": 0.1761, "lr": 8.736902347803163e-06, "epoch": 7.530266343825666, "percentage": 75.3, "elapsed_time": "0:12:50", "remaining_time": "0:04:12", "throughput": 3312.63, "total_tokens": 2551120}
{"current_steps": 6225, "total_steps": 8260, "loss": 0.1908, "lr": 8.696819047611288e-06, "epoch": 7.536319612590799, "percentage": 75.36, "elapsed_time": "0:12:50", "remaining_time": "0:04:11", "throughput": 3313.02, "total_tokens": 2553168}
{"current_steps": 6230, "total_steps": 8260, "loss": 0.2036, "lr": 8.65680853659958e-06, "epoch": 7.5423728813559325, "percentage": 75.42, "elapsed_time": "0:12:51", "remaining_time": "0:04:11", "throughput": 3313.28, "total_tokens": 2555184}
{"current_steps": 6235, "total_steps": 8260, "loss": 0.178, "lr": 8.616870993403864e-06, "epoch": 7.548426150121065, "percentage": 75.48, "elapsed_time": "0:12:51", "remaining_time": "0:04:10", "throughput": 3313.51, "total_tokens": 2557264}
{"current_steps": 6240, "total_steps": 8260, "loss": 0.1568, "lr": 8.577006596334191e-06, "epoch": 7.554479418886198, "percentage": 75.54, "elapsed_time": "0:12:52", "remaining_time": "0:04:10", "throughput": 3313.81, "total_tokens": 2559248}
{"current_steps": 6245, "total_steps": 8260, "loss": 0.1678, "lr": 8.537215523374038e-06, "epoch": 7.560532687651332, "percentage": 75.61, "elapsed_time": "0:12:52", "remaining_time": "0:04:09", "throughput": 3314.07, "total_tokens": 2561264}
{"current_steps": 6250, "total_steps": 8260, "loss": 0.1985, "lr": 8.4974979521795e-06, "epoch": 7.566585956416465, "percentage": 75.67, "elapsed_time": "0:12:53", "remaining_time": "0:04:08", "throughput": 3314.44, "total_tokens": 2563120}
{"current_steps": 6255, "total_steps": 8260, "loss": 0.1773, "lr": 8.45785406007852e-06, "epoch": 7.572639225181598, "percentage": 75.73, "elapsed_time": "0:12:53", "remaining_time": "0:04:08", "throughput": 3314.76, "total_tokens": 2565104}
{"current_steps": 6260, "total_steps": 8260, "loss": 0.1959, "lr": 8.418284024070069e-06, "epoch": 7.578692493946731, "percentage": 75.79, "elapsed_time": "0:12:54", "remaining_time": "0:04:07", "throughput": 3315.03, "total_tokens": 2567216}
{"current_steps": 6265, "total_steps": 8260, "loss": 0.1939, "lr": 8.378788020823394e-06, "epoch": 7.584745762711864, "percentage": 75.85, "elapsed_time": "0:12:55", "remaining_time": "0:04:06", "throughput": 3315.26, "total_tokens": 2569360}
{"current_steps": 6270, "total_steps": 8260, "loss": 0.1821, "lr": 8.33936622667719e-06, "epoch": 7.5907990314769975, "percentage": 75.91, "elapsed_time": "0:12:55", "remaining_time": "0:04:06", "throughput": 3315.51, "total_tokens": 2571536}
{"current_steps": 6275, "total_steps": 8260, "loss": 0.1771, "lr": 8.300018817638825e-06, "epoch": 7.596852300242131, "percentage": 75.97, "elapsed_time": "0:12:56", "remaining_time": "0:04:05", "throughput": 3315.87, "total_tokens": 2573648}
{"current_steps": 6280, "total_steps": 8260, "loss": 0.1534, "lr": 8.260745969383565e-06, "epoch": 7.602905569007264, "percentage": 76.03, "elapsed_time": "0:12:56", "remaining_time": "0:04:04", "throughput": 3316.18, "total_tokens": 2575888}
{"current_steps": 6285, "total_steps": 8260, "loss": 0.1916, "lr": 8.221547857253781e-06, "epoch": 7.608958837772397, "percentage": 76.09, "elapsed_time": "0:12:57", "remaining_time": "0:04:04", "throughput": 3316.58, "total_tokens": 2578032}
{"current_steps": 6290, "total_steps": 8260, "loss": 0.18, "lr": 8.182424656258178e-06, "epoch": 7.61501210653753, "percentage": 76.15, "elapsed_time": "0:12:57", "remaining_time": "0:04:03", "throughput": 3316.8, "total_tokens": 2580016}
{"current_steps": 6295, "total_steps": 8260, "loss": 0.2007, "lr": 8.143376541070993e-06, "epoch": 7.621065375302663, "percentage": 76.21, "elapsed_time": "0:12:58", "remaining_time": "0:04:02", "throughput": 3317.04, "total_tokens": 2582192}
{"current_steps": 6300, "total_steps": 8260, "loss": 0.1919, "lr": 8.104403686031225e-06, "epoch": 7.627118644067797, "percentage": 76.27, "elapsed_time": "0:12:59", "remaining_time": "0:04:02", "throughput": 3317.3, "total_tokens": 2584208}
{"current_steps": 6305, "total_steps": 8260, "loss": 0.1513, "lr": 8.06550626514185e-06, "epoch": 7.63317191283293, "percentage": 76.33, "elapsed_time": "0:12:59", "remaining_time": "0:04:01", "throughput": 3317.47, "total_tokens": 2586160}
{"current_steps": 6310, "total_steps": 8260, "loss": 0.1615, "lr": 8.026684452069084e-06, "epoch": 7.6392251815980625, "percentage": 76.39, "elapsed_time": "0:13:00", "remaining_time": "0:04:01", "throughput": 3317.77, "total_tokens": 2588240}
{"current_steps": 6315, "total_steps": 8260, "loss": 0.2145, "lr": 7.987938420141536e-06, "epoch": 7.645278450363196, "percentage": 76.45, "elapsed_time": "0:13:00", "remaining_time": "0:04:00", "throughput": 3318.16, "total_tokens": 2590480}
{"current_steps": 6320, "total_steps": 8260, "loss": 0.1862, "lr": 7.949268342349495e-06, "epoch": 7.651331719128329, "percentage": 76.51, "elapsed_time": "0:13:01", "remaining_time": "0:03:59", "throughput": 3318.48, "total_tokens": 2592400}
{"current_steps": 6325, "total_steps": 8260, "loss": 0.2262, "lr": 7.910674391344129e-06, "epoch": 7.657384987893463, "percentage": 76.57, "elapsed_time": "0:13:01", "remaining_time": "0:03:59", "throughput": 3318.76, "total_tokens": 2594352}
{"current_steps": 6330, "total_steps": 8260, "loss": 0.1956, "lr": 7.872156739436722e-06, "epoch": 7.663438256658596, "percentage": 76.63, "elapsed_time": "0:13:02", "remaining_time": "0:03:58", "throughput": 3319.04, "total_tokens": 2596464}
{"current_steps": 6335, "total_steps": 8260, "loss": 0.1619, "lr": 7.833715558597907e-06, "epoch": 7.669491525423728, "percentage": 76.69, "elapsed_time": "0:13:02", "remaining_time": "0:03:57", "throughput": 3319.37, "total_tokens": 2598544}
{"current_steps": 6340, "total_steps": 8260, "loss": 0.2114, "lr": 7.795351020456887e-06, "epoch": 7.675544794188862, "percentage": 76.76, "elapsed_time": "0:13:03", "remaining_time": "0:03:57", "throughput": 3319.67, "total_tokens": 2600656}
{"current_steps": 6345, "total_steps": 8260, "loss": 0.179, "lr": 7.757063296300681e-06, "epoch": 7.681598062953995, "percentage": 76.82, "elapsed_time": "0:13:04", "remaining_time": "0:03:56", "throughput": 3319.92, "total_tokens": 2602832}
{"current_steps": 6350, "total_steps": 8260, "loss": 0.1799, "lr": 7.718852557073366e-06, "epoch": 7.687651331719128, "percentage": 76.88, "elapsed_time": "0:13:04", "remaining_time": "0:03:55", "throughput": 3320.26, "total_tokens": 2605008}
{"current_steps": 6355, "total_steps": 8260, "loss": 0.1691, "lr": 7.680718973375287e-06, "epoch": 7.693704600484262, "percentage": 76.94, "elapsed_time": "0:13:05", "remaining_time": "0:03:55", "throughput": 3320.47, "total_tokens": 2607152}
{"current_steps": 6360, "total_steps": 8260, "loss": 0.1783, "lr": 7.642662715462315e-06, "epoch": 7.699757869249394, "percentage": 77.0, "elapsed_time": "0:13:05", "remaining_time": "0:03:54", "throughput": 3320.84, "total_tokens": 2609264}
{"current_steps": 6365, "total_steps": 8260, "loss": 0.1818, "lr": 7.604683953245076e-06, "epoch": 7.7058111380145276, "percentage": 77.06, "elapsed_time": "0:13:06", "remaining_time": "0:03:54", "throughput": 3321.07, "total_tokens": 2611344}
{"current_steps": 6370, "total_steps": 8260, "loss": 0.1896, "lr": 7.566782856288224e-06, "epoch": 7.711864406779661, "percentage": 77.12, "elapsed_time": "0:13:06", "remaining_time": "0:03:53", "throughput": 3321.36, "total_tokens": 2613488}
{"current_steps": 6375, "total_steps": 8260, "loss": 0.2026, "lr": 7.5289595938096344e-06, "epoch": 7.717917675544794, "percentage": 77.18, "elapsed_time": "0:13:07", "remaining_time": "0:03:52", "throughput": 3321.7, "total_tokens": 2615408}
{"current_steps": 6380, "total_steps": 8260, "loss": 0.1693, "lr": 7.4912143346796805e-06, "epoch": 7.723970944309928, "percentage": 77.24, "elapsed_time": "0:13:07", "remaining_time": "0:03:52", "throughput": 3321.96, "total_tokens": 2617424}
{"current_steps": 6385, "total_steps": 8260, "loss": 0.1975, "lr": 7.4535472474204645e-06, "epoch": 7.73002421307506, "percentage": 77.3, "elapsed_time": "0:13:08", "remaining_time": "0:03:51", "throughput": 3322.16, "total_tokens": 2619312}
{"current_steps": 6390, "total_steps": 8260, "loss": 0.2262, "lr": 7.415958500205103e-06, "epoch": 7.736077481840193, "percentage": 77.36, "elapsed_time": "0:13:08", "remaining_time": "0:03:50", "throughput": 3322.52, "total_tokens": 2621168}
{"current_steps": 6395, "total_steps": 8260, "loss": 0.1644, "lr": 7.37844826085691e-06, "epoch": 7.742130750605327, "percentage": 77.42, "elapsed_time": "0:13:09", "remaining_time": "0:03:50", "throughput": 3322.83, "total_tokens": 2623152}
{"current_steps": 6400, "total_steps": 8260, "loss": 0.1469, "lr": 7.341016696848699e-06, "epoch": 7.74818401937046, "percentage": 77.48, "elapsed_time": "0:13:10", "remaining_time": "0:03:49", "throughput": 3323.08, "total_tokens": 2625328}
{"current_steps": 6405, "total_steps": 8260, "loss": 0.1738, "lr": 7.303663975302022e-06, "epoch": 7.754237288135593, "percentage": 77.54, "elapsed_time": "0:13:10", "remaining_time": "0:03:48", "throughput": 3323.35, "total_tokens": 2627536}
{"current_steps": 6410, "total_steps": 8260, "loss": 0.1884, "lr": 7.2663902629864165e-06, "epoch": 7.760290556900727, "percentage": 77.6, "elapsed_time": "0:13:11", "remaining_time": "0:03:48", "throughput": 3323.67, "total_tokens": 2629616}
{"current_steps": 6415, "total_steps": 8260, "loss": 0.1432, "lr": 7.229195726318669e-06, "epoch": 7.766343825665859, "percentage": 77.66, "elapsed_time": "0:13:11", "remaining_time": "0:03:47", "throughput": 3323.9, "total_tokens": 2631696}
{"current_steps": 6420, "total_steps": 8260, "loss": 0.1887, "lr": 7.192080531362067e-06, "epoch": 7.772397094430993, "percentage": 77.72, "elapsed_time": "0:13:12", "remaining_time": "0:03:47", "throughput": 3324.22, "total_tokens": 2633776}
{"current_steps": 6425, "total_steps": 8260, "loss": 0.2194, "lr": 7.155044843825651e-06, "epoch": 7.778450363196126, "percentage": 77.78, "elapsed_time": "0:13:12", "remaining_time": "0:03:46", "throughput": 3324.52, "total_tokens": 2635920}
{"current_steps": 6430, "total_steps": 8260, "loss": 0.1497, "lr": 7.118088829063504e-06, "epoch": 7.784503631961259, "percentage": 77.85, "elapsed_time": "0:13:13", "remaining_time": "0:03:45", "throughput": 3324.77, "total_tokens": 2637936}
{"current_steps": 6435, "total_steps": 8260, "loss": 0.2412, "lr": 7.081212652073979e-06, "epoch": 7.790556900726393, "percentage": 77.91, "elapsed_time": "0:13:13", "remaining_time": "0:03:45", "throughput": 3325.06, "total_tokens": 2639984}
{"current_steps": 6440, "total_steps": 8260, "loss": 0.168, "lr": 7.044416477498972e-06, "epoch": 7.796610169491525, "percentage": 77.97, "elapsed_time": "0:13:14", "remaining_time": "0:03:44", "throughput": 3325.4, "total_tokens": 2642000}
{"current_steps": 6445, "total_steps": 8260, "loss": 0.2033, "lr": 7.007700469623185e-06, "epoch": 7.802663438256658, "percentage": 78.03, "elapsed_time": "0:13:15", "remaining_time": "0:03:43", "throughput": 3325.64, "total_tokens": 2644016}
{"current_steps": 6450, "total_steps": 8260, "loss": 0.2043, "lr": 6.971064792373427e-06, "epoch": 7.808716707021792, "percentage": 78.09, "elapsed_time": "0:13:15", "remaining_time": "0:03:43", "throughput": 3325.94, "total_tokens": 2646000}
{"current_steps": 6455, "total_steps": 8260, "loss": 0.2006, "lr": 6.934509609317821e-06, "epoch": 7.814769975786925, "percentage": 78.15, "elapsed_time": "0:13:16", "remaining_time": "0:03:42", "throughput": 3326.23, "total_tokens": 2648048}
{"current_steps": 6460, "total_steps": 8260, "loss": 0.1714, "lr": 6.898035083665124e-06, "epoch": 7.8208232445520585, "percentage": 78.21, "elapsed_time": "0:13:16", "remaining_time": "0:03:41", "throughput": 3326.47, "total_tokens": 2650064}
{"current_steps": 6465, "total_steps": 8260, "loss": 0.1906, "lr": 6.861641378263964e-06, "epoch": 7.826876513317191, "percentage": 78.27, "elapsed_time": "0:13:17", "remaining_time": "0:03:41", "throughput": 3326.75, "total_tokens": 2652016}
{"current_steps": 6470, "total_steps": 8260, "loss": 0.1945, "lr": 6.825328655602153e-06, "epoch": 7.832929782082324, "percentage": 78.33, "elapsed_time": "0:13:17", "remaining_time": "0:03:40", "throughput": 3327.07, "total_tokens": 2654192}
{"current_steps": 6475, "total_steps": 8260, "loss": 0.1901, "lr": 6.789097077805917e-06, "epoch": 7.838983050847458, "percentage": 78.39, "elapsed_time": "0:13:18", "remaining_time": "0:03:40", "throughput": 3327.33, "total_tokens": 2656208}
{"current_steps": 6480, "total_steps": 8260, "loss": 0.2156, "lr": 6.7529468066392015e-06, "epoch": 7.845036319612591, "percentage": 78.45, "elapsed_time": "0:13:18", "remaining_time": "0:03:39", "throughput": 3327.56, "total_tokens": 2658384}
{"current_steps": 6485, "total_steps": 8260, "loss": 0.1958, "lr": 6.7168780035029385e-06, "epoch": 7.851089588377724, "percentage": 78.51, "elapsed_time": "0:13:19", "remaining_time": "0:03:38", "throughput": 3327.94, "total_tokens": 2660528}
{"current_steps": 6490, "total_steps": 8260, "loss": 0.1976, "lr": 6.680890829434325e-06, "epoch": 7.857142857142857, "percentage": 78.57, "elapsed_time": "0:13:20", "remaining_time": "0:03:38", "throughput": 3328.29, "total_tokens": 2662640}
{"current_steps": 6495, "total_steps": 8260, "loss": 0.1813, "lr": 6.644985445106114e-06, "epoch": 7.86319612590799, "percentage": 78.63, "elapsed_time": "0:13:20", "remaining_time": "0:03:37", "throughput": 3328.58, "total_tokens": 2664624}
{"current_steps": 6500, "total_steps": 8260, "loss": 0.1716, "lr": 6.609162010825881e-06, "epoch": 7.8692493946731235, "percentage": 78.69, "elapsed_time": "0:13:21", "remaining_time": "0:03:36", "throughput": 3328.81, "total_tokens": 2666640}
{"current_steps": 6505, "total_steps": 8260, "loss": 0.2174, "lr": 6.573420686535317e-06, "epoch": 7.875302663438257, "percentage": 78.75, "elapsed_time": "0:13:21", "remaining_time": "0:03:36", "throughput": 3329.07, "total_tokens": 2668592}
{"current_steps": 6510, "total_steps": 8260, "loss": 0.2011, "lr": 6.537761631809533e-06, "epoch": 7.88135593220339, "percentage": 78.81, "elapsed_time": "0:13:22", "remaining_time": "0:03:35", "throughput": 3329.42, "total_tokens": 2670704}
{"current_steps": 6515, "total_steps": 8260, "loss": 0.1832, "lr": 6.502185005856312e-06, "epoch": 7.8874092009685235, "percentage": 78.87, "elapsed_time": "0:13:22", "remaining_time": "0:03:34", "throughput": 3329.69, "total_tokens": 2672752}
{"current_steps": 6520, "total_steps": 8260, "loss": 0.1728, "lr": 6.4666909675154155e-06, "epoch": 7.893462469733656, "percentage": 78.93, "elapsed_time": "0:13:23", "remaining_time": "0:03:34", "throughput": 3329.95, "total_tokens": 2674864}
{"current_steps": 6525, "total_steps": 8260, "loss": 0.1835, "lr": 6.431279675257873e-06, "epoch": 7.899515738498789, "percentage": 79.0, "elapsed_time": "0:13:23", "remaining_time": "0:03:33", "throughput": 3330.34, "total_tokens": 2676944}
{"current_steps": 6530, "total_steps": 8260, "loss": 0.1974, "lr": 6.395951287185295e-06, "epoch": 7.905569007263923, "percentage": 79.06, "elapsed_time": "0:13:24", "remaining_time": "0:03:33", "throughput": 3330.56, "total_tokens": 2679024}
{"current_steps": 6535, "total_steps": 8260, "loss": 0.1759, "lr": 6.360705961029126e-06, "epoch": 7.911622276029056, "percentage": 79.12, "elapsed_time": "0:13:24", "remaining_time": "0:03:32", "throughput": 3330.81, "total_tokens": 2680976}
{"current_steps": 6540, "total_steps": 8260, "loss": 0.182, "lr": 6.325543854149968e-06, "epoch": 7.917675544794189, "percentage": 79.18, "elapsed_time": "0:13:25", "remaining_time": "0:03:31", "throughput": 3330.98, "total_tokens": 2683088}
{"current_steps": 6545, "total_steps": 8260, "loss": 0.2174, "lr": 6.290465123536876e-06, "epoch": 7.923728813559322, "percentage": 79.24, "elapsed_time": "0:13:26", "remaining_time": "0:03:31", "throughput": 3331.28, "total_tokens": 2685072}
{"current_steps": 6550, "total_steps": 8260, "loss": 0.1721, "lr": 6.255469925806643e-06, "epoch": 7.929782082324455, "percentage": 79.3, "elapsed_time": "0:13:26", "remaining_time": "0:03:30", "throughput": 3331.61, "total_tokens": 2687184}
{"current_steps": 6555, "total_steps": 8260, "loss": 0.1929, "lr": 6.220558417203132e-06, "epoch": 7.9358353510895885, "percentage": 79.36, "elapsed_time": "0:13:27", "remaining_time": "0:03:29", "throughput": 3331.98, "total_tokens": 2689232}
{"current_steps": 6560, "total_steps": 8260, "loss": 0.1665, "lr": 6.185730753596539e-06, "epoch": 7.941888619854722, "percentage": 79.42, "elapsed_time": "0:13:27", "remaining_time": "0:03:29", "throughput": 3332.17, "total_tokens": 2691280}
{"current_steps": 6565, "total_steps": 8260, "loss": 0.1685, "lr": 6.150987090482715e-06, "epoch": 7.947941888619855, "percentage": 79.48, "elapsed_time": "0:13:28", "remaining_time": "0:03:28", "throughput": 3332.44, "total_tokens": 2693328}
{"current_steps": 6570, "total_steps": 8260, "loss": 0.1748, "lr": 6.116327582982484e-06, "epoch": 7.953995157384988, "percentage": 79.54, "elapsed_time": "0:13:28", "remaining_time": "0:03:28", "throughput": 3332.78, "total_tokens": 2695440}
{"current_steps": 6575, "total_steps": 8260, "loss": 0.1328, "lr": 6.0817523858409245e-06, "epoch": 7.960048426150121, "percentage": 79.6, "elapsed_time": "0:13:29", "remaining_time": "0:03:27", "throughput": 3332.96, "total_tokens": 2697488}
{"current_steps": 6580, "total_steps": 8260, "loss": 0.1867, "lr": 6.047261653426708e-06, "epoch": 7.966101694915254, "percentage": 79.66, "elapsed_time": "0:13:29", "remaining_time": "0:03:26", "throughput": 3333.2, "total_tokens": 2699504}
{"current_steps": 6585, "total_steps": 8260, "loss": 0.1777, "lr": 6.012855539731374e-06, "epoch": 7.972154963680388, "percentage": 79.72, "elapsed_time": "0:13:30", "remaining_time": "0:03:26", "throughput": 3333.45, "total_tokens": 2701456}
{"current_steps": 6590, "total_steps": 8260, "loss": 0.1362, "lr": 5.978534198368691e-06, "epoch": 7.978208232445521, "percentage": 79.78, "elapsed_time": "0:13:30", "remaining_time": "0:03:25", "throughput": 3333.73, "total_tokens": 2703504}
{"current_steps": 6595, "total_steps": 8260, "loss": 0.1647, "lr": 5.944297782573918e-06, "epoch": 7.9842615012106535, "percentage": 79.84, "elapsed_time": "0:13:31", "remaining_time": "0:03:24", "throughput": 3334.06, "total_tokens": 2705616}
{"current_steps": 6600, "total_steps": 8260, "loss": 0.1726, "lr": 5.910146445203154e-06, "epoch": 7.990314769975787, "percentage": 79.9, "elapsed_time": "0:13:32", "remaining_time": "0:03:24", "throughput": 3334.4, "total_tokens": 2707728}
{"current_steps": 6605, "total_steps": 8260, "loss": 0.1817, "lr": 5.876080338732643e-06, "epoch": 7.99636803874092, "percentage": 79.96, "elapsed_time": "0:13:32", "remaining_time": "0:03:23", "throughput": 3334.68, "total_tokens": 2709776}
{"current_steps": 6608, "total_steps": 8260, "eval_loss": 0.1831730306148529, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:13:37", "remaining_time": "0:03:24", "throughput": 3315.32, "total_tokens": 2710624}
{"current_steps": 6610, "total_steps": 8260, "loss": 0.2046, "lr": 5.842099615258109e-06, "epoch": 8.002421307506053, "percentage": 80.02, "elapsed_time": "0:13:38", "remaining_time": "0:03:24", "throughput": 3310.92, "total_tokens": 2711456}
{"current_steps": 6615, "total_steps": 8260, "loss": 0.2016, "lr": 5.808204426494054e-06, "epoch": 8.008474576271187, "percentage": 80.08, "elapsed_time": "0:13:39", "remaining_time": "0:03:23", "throughput": 3311.16, "total_tokens": 2713536}
{"current_steps": 6620, "total_steps": 8260, "loss": 0.1858, "lr": 5.774394923773088e-06, "epoch": 8.01452784503632, "percentage": 80.15, "elapsed_time": "0:13:40", "remaining_time": "0:03:23", "throughput": 3311.36, "total_tokens": 2715680}
{"current_steps": 6625, "total_steps": 8260, "loss": 0.1664, "lr": 5.74067125804526e-06, "epoch": 8.020581113801454, "percentage": 80.21, "elapsed_time": "0:13:40", "remaining_time": "0:03:22", "throughput": 3311.63, "total_tokens": 2717728}
{"current_steps": 6630, "total_steps": 8260, "loss": 0.166, "lr": 5.70703357987738e-06, "epoch": 8.026634382566586, "percentage": 80.27, "elapsed_time": "0:13:41", "remaining_time": "0:03:21", "throughput": 3311.92, "total_tokens": 2719712}
{"current_steps": 6635, "total_steps": 8260, "loss": 0.1692, "lr": 5.673482039452363e-06, "epoch": 8.032687651331718, "percentage": 80.33, "elapsed_time": "0:13:41", "remaining_time": "0:03:21", "throughput": 3312.22, "total_tokens": 2721856}
{"current_steps": 6640, "total_steps": 8260, "loss": 0.2112, "lr": 5.640016786568525e-06, "epoch": 8.038740920096853, "percentage": 80.39, "elapsed_time": "0:13:42", "remaining_time": "0:03:20", "throughput": 3312.58, "total_tokens": 2724000}
{"current_steps": 6645, "total_steps": 8260, "loss": 0.2024, "lr": 5.606637970638917e-06, "epoch": 8.044794188861985, "percentage": 80.45, "elapsed_time": "0:13:42", "remaining_time": "0:03:19", "throughput": 3312.87, "total_tokens": 2725792}
{"current_steps": 6650, "total_steps": 8260, "loss": 0.2067, "lr": 5.573345740690714e-06, "epoch": 8.05084745762712, "percentage": 80.51, "elapsed_time": "0:13:43", "remaining_time": "0:03:19", "throughput": 3313.13, "total_tokens": 2727744}
{"current_steps": 6655, "total_steps": 8260, "loss": 0.1657, "lr": 5.540140245364478e-06, "epoch": 8.056900726392252, "percentage": 80.57, "elapsed_time": "0:13:43", "remaining_time": "0:03:18", "throughput": 3313.46, "total_tokens": 2729856}
{"current_steps": 6660, "total_steps": 8260, "loss": 0.1627, "lr": 5.5070216329135365e-06, "epoch": 8.062953995157384, "percentage": 80.63, "elapsed_time": "0:13:44", "remaining_time": "0:03:18", "throughput": 3313.81, "total_tokens": 2731872}
{"current_steps": 6665, "total_steps": 8260, "loss": 0.1853, "lr": 5.473990051203298e-06, "epoch": 8.069007263922519, "percentage": 80.69, "elapsed_time": "0:13:44", "remaining_time": "0:03:17", "throughput": 3314.09, "total_tokens": 2734016}
{"current_steps": 6670, "total_steps": 8260, "loss": 0.1603, "lr": 5.441045647710627e-06, "epoch": 8.075060532687651, "percentage": 80.75, "elapsed_time": "0:13:45", "remaining_time": "0:03:16", "throughput": 3314.51, "total_tokens": 2736032}
{"current_steps": 6675, "total_steps": 8260, "loss": 0.2299, "lr": 5.408188569523137e-06, "epoch": 8.081113801452785, "percentage": 80.81, "elapsed_time": "0:13:45", "remaining_time": "0:03:16", "throughput": 3314.73, "total_tokens": 2737952}
{"current_steps": 6680, "total_steps": 8260, "loss": 0.1946, "lr": 5.375418963338566e-06, "epoch": 8.087167070217918, "percentage": 80.87, "elapsed_time": "0:13:46", "remaining_time": "0:03:15", "throughput": 3314.99, "total_tokens": 2739968}
{"current_steps": 6685, "total_steps": 8260, "loss": 0.1689, "lr": 5.342736975464116e-06, "epoch": 8.09322033898305, "percentage": 80.93, "elapsed_time": "0:13:47", "remaining_time": "0:03:14", "throughput": 3315.27, "total_tokens": 2742016}
{"current_steps": 6690, "total_steps": 8260, "loss": 0.1775, "lr": 5.310142751815792e-06, "epoch": 8.099273607748184, "percentage": 80.99, "elapsed_time": "0:13:47", "remaining_time": "0:03:14", "throughput": 3315.6, "total_tokens": 2744128}
{"current_steps": 6695, "total_steps": 8260, "loss": 0.2043, "lr": 5.277636437917769e-06, "epoch": 8.105326876513317, "percentage": 81.05, "elapsed_time": "0:13:48", "remaining_time": "0:03:13", "throughput": 3315.88, "total_tokens": 2746112}
{"current_steps": 6700, "total_steps": 8260, "loss": 0.1814, "lr": 5.245218178901717e-06, "epoch": 8.111380145278451, "percentage": 81.11, "elapsed_time": "0:13:48", "remaining_time": "0:03:12", "throughput": 3316.2, "total_tokens": 2748128}
{"current_steps": 6705, "total_steps": 8260, "loss": 0.2152, "lr": 5.212888119506168e-06, "epoch": 8.117433414043584, "percentage": 81.17, "elapsed_time": "0:13:49", "remaining_time": "0:03:12", "throughput": 3316.49, "total_tokens": 2750176}
{"current_steps": 6710, "total_steps": 8260, "loss": 0.1775, "lr": 5.180646404075862e-06, "epoch": 8.123486682808716, "percentage": 81.23, "elapsed_time": "0:13:49", "remaining_time": "0:03:11", "throughput": 3316.8, "total_tokens": 2752256}
{"current_steps": 6715, "total_steps": 8260, "loss": 0.184, "lr": 5.1484931765611286e-06, "epoch": 8.12953995157385, "percentage": 81.3, "elapsed_time": "0:13:50", "remaining_time": "0:03:11", "throughput": 3317.05, "total_tokens": 2754208}
{"current_steps": 6720, "total_steps": 8260, "loss": 0.2202, "lr": 5.116428580517207e-06, "epoch": 8.135593220338983, "percentage": 81.36, "elapsed_time": "0:13:50", "remaining_time": "0:03:10", "throughput": 3317.31, "total_tokens": 2756352}
{"current_steps": 6725, "total_steps": 8260, "loss": 0.2027, "lr": 5.084452759103603e-06, "epoch": 8.141646489104117, "percentage": 81.42, "elapsed_time": "0:13:51", "remaining_time": "0:03:09", "throughput": 3317.62, "total_tokens": 2758528}
{"current_steps": 6730, "total_steps": 8260, "loss": 0.1725, "lr": 5.052565855083511e-06, "epoch": 8.14769975786925, "percentage": 81.48, "elapsed_time": "0:13:52", "remaining_time": "0:03:09", "throughput": 3317.91, "total_tokens": 2760608}
{"current_steps": 6735, "total_steps": 8260, "loss": 0.2005, "lr": 5.020768010823102e-06, "epoch": 8.153753026634382, "percentage": 81.54, "elapsed_time": "0:13:52", "remaining_time": "0:03:08", "throughput": 3318.19, "total_tokens": 2762656}
{"current_steps": 6740, "total_steps": 8260, "loss": 0.1781, "lr": 4.98905936829093e-06, "epoch": 8.159806295399516, "percentage": 81.6, "elapsed_time": "0:13:53", "remaining_time": "0:03:07", "throughput": 3318.44, "total_tokens": 2764672}
{"current_steps": 6745, "total_steps": 8260, "loss": 0.1698, "lr": 4.957440069057281e-06, "epoch": 8.165859564164649, "percentage": 81.66, "elapsed_time": "0:13:53", "remaining_time": "0:03:07", "throughput": 3318.66, "total_tokens": 2766752}
{"current_steps": 6750, "total_steps": 8260, "loss": 0.1867, "lr": 4.92591025429357e-06, "epoch": 8.171912832929783, "percentage": 81.72, "elapsed_time": "0:13:54", "remaining_time": "0:03:06", "throughput": 3318.97, "total_tokens": 2768736}
{"current_steps": 6755, "total_steps": 8260, "loss": 0.1912, "lr": 4.8944700647716616e-06, "epoch": 8.177966101694915, "percentage": 81.78, "elapsed_time": "0:13:54", "remaining_time": "0:03:05", "throughput": 3319.21, "total_tokens": 2770752}
{"current_steps": 6760, "total_steps": 8260, "loss": 0.2046, "lr": 4.863119640863284e-06, "epoch": 8.184019370460048, "percentage": 81.84, "elapsed_time": "0:13:55", "remaining_time": "0:03:05", "throughput": 3319.45, "total_tokens": 2772928}
{"current_steps": 6765, "total_steps": 8260, "loss": 0.1967, "lr": 4.831859122539381e-06, "epoch": 8.190072639225182, "percentage": 81.9, "elapsed_time": "0:13:55", "remaining_time": "0:03:04", "throughput": 3319.71, "total_tokens": 2774976}
{"current_steps": 6770, "total_steps": 8260, "loss": 0.2061, "lr": 4.800688649369489e-06, "epoch": 8.196125907990314, "percentage": 81.96, "elapsed_time": "0:13:56", "remaining_time": "0:03:04", "throughput": 3320.03, "total_tokens": 2777152}
{"current_steps": 6775, "total_steps": 8260, "loss": 0.1817, "lr": 4.769608360521135e-06, "epoch": 8.202179176755449, "percentage": 82.02, "elapsed_time": "0:13:57", "remaining_time": "0:03:03", "throughput": 3320.32, "total_tokens": 2779296}
{"current_steps": 6780, "total_steps": 8260, "loss": 0.1814, "lr": 4.7386183947591815e-06, "epoch": 8.208232445520581, "percentage": 82.08, "elapsed_time": "0:13:57", "remaining_time": "0:03:02", "throughput": 3320.45, "total_tokens": 2781376}
{"current_steps": 6785, "total_steps": 8260, "loss": 0.1937, "lr": 4.7077188904452255e-06, "epoch": 8.214285714285714, "percentage": 82.14, "elapsed_time": "0:13:58", "remaining_time": "0:03:02", "throughput": 3320.65, "total_tokens": 2783424}
{"current_steps": 6790, "total_steps": 8260, "loss": 0.1804, "lr": 4.676909985536981e-06, "epoch": 8.220338983050848, "percentage": 82.2, "elapsed_time": "0:13:58", "remaining_time": "0:03:01", "throughput": 3320.92, "total_tokens": 2785376}
{"current_steps": 6795, "total_steps": 8260, "loss": 0.1967, "lr": 4.64619181758767e-06, "epoch": 8.22639225181598, "percentage": 82.26, "elapsed_time": "0:13:59", "remaining_time": "0:03:00", "throughput": 3321.15, "total_tokens": 2787456}
{"current_steps": 6800, "total_steps": 8260, "loss": 0.1543, "lr": 4.615564523745391e-06, "epoch": 8.232445520581114, "percentage": 82.32, "elapsed_time": "0:13:59", "remaining_time": "0:03:00", "throughput": 3321.4, "total_tokens": 2789568}
{"current_steps": 6805, "total_steps": 8260, "loss": 0.1861, "lr": 4.585028240752498e-06, "epoch": 8.238498789346247, "percentage": 82.38, "elapsed_time": "0:14:00", "remaining_time": "0:02:59", "throughput": 3321.55, "total_tokens": 2791584}
{"current_steps": 6810, "total_steps": 8260, "loss": 0.1846, "lr": 4.554583104945037e-06, "epoch": 8.24455205811138, "percentage": 82.45, "elapsed_time": "0:14:00", "remaining_time": "0:02:59", "throughput": 3321.83, "total_tokens": 2793632}
{"current_steps": 6815, "total_steps": 8260, "loss": 0.1952, "lr": 4.524229252252091e-06, "epoch": 8.250605326876514, "percentage": 82.51, "elapsed_time": "0:14:01", "remaining_time": "0:02:58", "throughput": 3322.09, "total_tokens": 2795744}
{"current_steps": 6820, "total_steps": 8260, "loss": 0.1921, "lr": 4.493966818195191e-06, "epoch": 8.256658595641646, "percentage": 82.57, "elapsed_time": "0:14:02", "remaining_time": "0:02:57", "throughput": 3322.24, "total_tokens": 2797696}
{"current_steps": 6825, "total_steps": 8260, "loss": 0.1889, "lr": 4.463795937887713e-06, "epoch": 8.26271186440678, "percentage": 82.63, "elapsed_time": "0:14:02", "remaining_time": "0:02:57", "throughput": 3322.5, "total_tokens": 2799744}
{"current_steps": 6830, "total_steps": 8260, "loss": 0.1546, "lr": 4.433716746034252e-06, "epoch": 8.268765133171913, "percentage": 82.69, "elapsed_time": "0:14:03", "remaining_time": "0:02:56", "throughput": 3322.79, "total_tokens": 2801824}
{"current_steps": 6835, "total_steps": 8260, "loss": 0.181, "lr": 4.40372937693008e-06, "epoch": 8.274818401937045, "percentage": 82.75, "elapsed_time": "0:14:03", "remaining_time": "0:02:55", "throughput": 3322.96, "total_tokens": 2803872}
{"current_steps": 6840, "total_steps": 8260, "loss": 0.2102, "lr": 4.3738339644604635e-06, "epoch": 8.28087167070218, "percentage": 82.81, "elapsed_time": "0:14:04", "remaining_time": "0:02:55", "throughput": 3323.13, "total_tokens": 2805824}
{"current_steps": 6845, "total_steps": 8260, "loss": 0.1917, "lr": 4.344030642100133e-06, "epoch": 8.286924939467312, "percentage": 82.87, "elapsed_time": "0:14:04", "remaining_time": "0:02:54", "throughput": 3323.33, "total_tokens": 2807712}
{"current_steps": 6850, "total_steps": 8260, "loss": 0.1681, "lr": 4.314319542912643e-06, "epoch": 8.292978208232446, "percentage": 82.93, "elapsed_time": "0:14:05", "remaining_time": "0:02:54", "throughput": 3323.6, "total_tokens": 2809760}
{"current_steps": 6855, "total_steps": 8260, "loss": 0.2292, "lr": 4.284700799549829e-06, "epoch": 8.299031476997579, "percentage": 82.99, "elapsed_time": "0:14:05", "remaining_time": "0:02:53", "throughput": 3324.01, "total_tokens": 2811872}
{"current_steps": 6860, "total_steps": 8260, "loss": 0.1913, "lr": 4.255174544251147e-06, "epoch": 8.305084745762711, "percentage": 83.05, "elapsed_time": "0:14:06", "remaining_time": "0:02:52", "throughput": 3324.23, "total_tokens": 2813888}
{"current_steps": 6865, "total_steps": 8260, "loss": 0.1561, "lr": 4.225740908843146e-06, "epoch": 8.311138014527845, "percentage": 83.11, "elapsed_time": "0:14:06", "remaining_time": "0:02:52", "throughput": 3324.46, "total_tokens": 2815808}
{"current_steps": 6870, "total_steps": 8260, "loss": 0.1738, "lr": 4.196400024738831e-06, "epoch": 8.317191283292978, "percentage": 83.17, "elapsed_time": "0:14:07", "remaining_time": "0:02:51", "throughput": 3324.73, "total_tokens": 2817760}
{"current_steps": 6875, "total_steps": 8260, "loss": 0.1726, "lr": 4.167152022937124e-06, "epoch": 8.323244552058112, "percentage": 83.23, "elapsed_time": "0:14:08", "remaining_time": "0:02:50", "throughput": 3324.97, "total_tokens": 2819776}
{"current_steps": 6880, "total_steps": 8260, "loss": 0.1866, "lr": 4.137997034022237e-06, "epoch": 8.329297820823244, "percentage": 83.29, "elapsed_time": "0:14:08", "remaining_time": "0:02:50", "throughput": 3325.24, "total_tokens": 2821824}
{"current_steps": 6885, "total_steps": 8260, "loss": 0.1557, "lr": 4.108935188163096e-06, "epoch": 8.335351089588377, "percentage": 83.35, "elapsed_time": "0:14:09", "remaining_time": "0:02:49", "throughput": 3325.43, "total_tokens": 2823968}
{"current_steps": 6890, "total_steps": 8260, "loss": 0.1707, "lr": 4.079966615112782e-06, "epoch": 8.341404358353511, "percentage": 83.41, "elapsed_time": "0:14:09", "remaining_time": "0:02:48", "throughput": 3325.76, "total_tokens": 2825920}
{"current_steps": 6895, "total_steps": 8260, "loss": 0.1365, "lr": 4.05109144420795e-06, "epoch": 8.347457627118644, "percentage": 83.47, "elapsed_time": "0:14:10", "remaining_time": "0:02:48", "throughput": 3326.14, "total_tokens": 2828224}
{"current_steps": 6900, "total_steps": 8260, "loss": 0.1419, "lr": 4.022309804368215e-06, "epoch": 8.353510895883778, "percentage": 83.54, "elapsed_time": "0:14:10", "remaining_time": "0:02:47", "throughput": 3326.43, "total_tokens": 2830208}
{"current_steps": 6905, "total_steps": 8260, "loss": 0.1706, "lr": 3.993621824095622e-06, "epoch": 8.35956416464891, "percentage": 83.6, "elapsed_time": "0:14:11", "remaining_time": "0:02:47", "throughput": 3326.68, "total_tokens": 2832160}
{"current_steps": 6910, "total_steps": 8260, "loss": 0.1744, "lr": 3.965027631474036e-06, "epoch": 8.365617433414043, "percentage": 83.66, "elapsed_time": "0:14:11", "remaining_time": "0:02:46", "throughput": 3326.86, "total_tokens": 2834208}
{"current_steps": 6915, "total_steps": 8260, "loss": 0.1757, "lr": 3.936527354168606e-06, "epoch": 8.371670702179177, "percentage": 83.72, "elapsed_time": "0:14:12", "remaining_time": "0:02:45", "throughput": 3327.19, "total_tokens": 2836320}
{"current_steps": 6920, "total_steps": 8260, "loss": 0.179, "lr": 3.90812111942516e-06, "epoch": 8.37772397094431, "percentage": 83.78, "elapsed_time": "0:14:13", "remaining_time": "0:02:45", "throughput": 3327.55, "total_tokens": 2838464}
{"current_steps": 6925, "total_steps": 8260, "loss": 0.1908, "lr": 3.8798090540696495e-06, "epoch": 8.383777239709444, "percentage": 83.84, "elapsed_time": "0:14:13", "remaining_time": "0:02:44", "throughput": 3327.73, "total_tokens": 2840512}
{"current_steps": 6930, "total_steps": 8260, "loss": 0.197, "lr": 3.851591284507591e-06, "epoch": 8.389830508474576, "percentage": 83.9, "elapsed_time": "0:14:14", "remaining_time": "0:02:43", "throughput": 3328.03, "total_tokens": 2842592}
{"current_steps": 6935, "total_steps": 8260, "loss": 0.1903, "lr": 3.82346793672351e-06, "epoch": 8.39588377723971, "percentage": 83.96, "elapsed_time": "0:14:14", "remaining_time": "0:02:43", "throughput": 3328.31, "total_tokens": 2844576}
{"current_steps": 6940, "total_steps": 8260, "loss": 0.2109, "lr": 3.795439136280346e-06, "epoch": 8.401937046004843, "percentage": 84.02, "elapsed_time": "0:14:15", "remaining_time": "0:02:42", "throughput": 3328.58, "total_tokens": 2846720}
{"current_steps": 6945, "total_steps": 8260, "loss": 0.2015, "lr": 3.767505008318914e-06, "epoch": 8.407990314769975, "percentage": 84.08, "elapsed_time": "0:14:15", "remaining_time": "0:02:42", "throughput": 3328.89, "total_tokens": 2848832}
{"current_steps": 6950, "total_steps": 8260, "loss": 0.1926, "lr": 3.739665677557341e-06, "epoch": 8.41404358353511, "percentage": 84.14, "elapsed_time": "0:14:16", "remaining_time": "0:02:41", "throughput": 3329.15, "total_tokens": 2850880}
{"current_steps": 6955, "total_steps": 8260, "loss": 0.1666, "lr": 3.711921268290533e-06, "epoch": 8.420096852300242, "percentage": 84.2, "elapsed_time": "0:14:16", "remaining_time": "0:02:40", "throughput": 3329.39, "total_tokens": 2852896}
{"current_steps": 6960, "total_steps": 8260, "loss": 0.1862, "lr": 3.6842719043895748e-06, "epoch": 8.426150121065376, "percentage": 84.26, "elapsed_time": "0:14:17", "remaining_time": "0:02:40", "throughput": 3329.62, "total_tokens": 2855008}
{"current_steps": 6965, "total_steps": 8260, "loss": 0.1611, "lr": 3.656717709301194e-06, "epoch": 8.432203389830509, "percentage": 84.32, "elapsed_time": "0:14:17", "remaining_time": "0:02:39", "throughput": 3329.87, "total_tokens": 2856960}
{"current_steps": 6970, "total_steps": 8260, "loss": 0.1938, "lr": 3.629258806047231e-06, "epoch": 8.438256658595641, "percentage": 84.38, "elapsed_time": "0:14:18", "remaining_time": "0:02:38", "throughput": 3330.14, "total_tokens": 2859200}
{"current_steps": 6975, "total_steps": 8260, "loss": 0.1745, "lr": 3.60189531722408e-06, "epoch": 8.444309927360775, "percentage": 84.44, "elapsed_time": "0:14:19", "remaining_time": "0:02:38", "throughput": 3330.43, "total_tokens": 2861280}
{"current_steps": 6980, "total_steps": 8260, "loss": 0.1662, "lr": 3.5746273650021228e-06, "epoch": 8.450363196125908, "percentage": 84.5, "elapsed_time": "0:14:19", "remaining_time": "0:02:37", "throughput": 3330.75, "total_tokens": 2863392}
{"current_steps": 6985, "total_steps": 8260, "loss": 0.2104, "lr": 3.5474550711252026e-06, "epoch": 8.456416464891042, "percentage": 84.56, "elapsed_time": "0:14:20", "remaining_time": "0:02:37", "throughput": 3331.0, "total_tokens": 2865344}
{"current_steps": 6990, "total_steps": 8260, "loss": 0.2225, "lr": 3.5203785569100674e-06, "epoch": 8.462469733656174, "percentage": 84.62, "elapsed_time": "0:14:20", "remaining_time": "0:02:36", "throughput": 3331.21, "total_tokens": 2867424}
{"current_steps": 6995, "total_steps": 8260, "loss": 0.1517, "lr": 3.493397943245852e-06, "epoch": 8.468523002421307, "percentage": 84.69, "elapsed_time": "0:14:21", "remaining_time": "0:02:35", "throughput": 3331.47, "total_tokens": 2869472}
{"current_steps": 7000, "total_steps": 8260, "loss": 0.2319, "lr": 3.466513350593506e-06, "epoch": 8.474576271186441, "percentage": 84.75, "elapsed_time": "0:14:21", "remaining_time": "0:02:35", "throughput": 3331.72, "total_tokens": 2871680}
{"current_steps": 7005, "total_steps": 8260, "loss": 0.1687, "lr": 3.439724898985278e-06, "epoch": 8.480629539951574, "percentage": 84.81, "elapsed_time": "0:14:22", "remaining_time": "0:02:34", "throughput": 3331.93, "total_tokens": 2873760}
{"current_steps": 7010, "total_steps": 8260, "loss": 0.1824, "lr": 3.4130327080241636e-06, "epoch": 8.486682808716708, "percentage": 84.87, "elapsed_time": "0:14:23", "remaining_time": "0:02:33", "throughput": 3332.18, "total_tokens": 2875712}
{"current_steps": 7015, "total_steps": 8260, "loss": 0.1691, "lr": 3.3864368968834074e-06, "epoch": 8.49273607748184, "percentage": 84.93, "elapsed_time": "0:14:23", "remaining_time": "0:02:33", "throughput": 3332.47, "total_tokens": 2877696}
{"current_steps": 7020, "total_steps": 8260, "loss": 0.1882, "lr": 3.3599375843059193e-06, "epoch": 8.498789346246973, "percentage": 84.99, "elapsed_time": "0:14:24", "remaining_time": "0:02:32", "throughput": 3332.66, "total_tokens": 2879680}
{"current_steps": 7021, "total_steps": 8260, "eval_loss": 0.18608687818050385, "epoch": 8.5, "percentage": 85.0, "elapsed_time": "0:14:28", "remaining_time": "0:02:33", "throughput": 3314.94, "total_tokens": 2880128}
{"current_steps": 7025, "total_steps": 8260, "loss": 0.2187, "lr": 3.3335348886037815e-06, "epoch": 8.504842615012107, "percentage": 85.05, "elapsed_time": "0:14:30", "remaining_time": "0:02:32", "throughput": 3311.52, "total_tokens": 2881728}
{"current_steps": 7030, "total_steps": 8260, "loss": 0.1865, "lr": 3.3072289276576964e-06, "epoch": 8.51089588377724, "percentage": 85.11, "elapsed_time": "0:14:30", "remaining_time": "0:02:32", "throughput": 3311.75, "total_tokens": 2883744}
{"current_steps": 7035, "total_steps": 8260, "loss": 0.2091, "lr": 3.281019818916492e-06, "epoch": 8.516949152542374, "percentage": 85.17, "elapsed_time": "0:14:31", "remaining_time": "0:02:31", "throughput": 3312.09, "total_tokens": 2885792}
{"current_steps": 7040, "total_steps": 8260, "loss": 0.1913, "lr": 3.254907679396574e-06, "epoch": 8.523002421307506, "percentage": 85.23, "elapsed_time": "0:14:31", "remaining_time": "0:02:31", "throughput": 3312.42, "total_tokens": 2888000}
{"current_steps": 7045, "total_steps": 8260, "loss": 0.1636, "lr": 3.2288926256813846e-06, "epoch": 8.529055690072639, "percentage": 85.29, "elapsed_time": "0:14:32", "remaining_time": "0:02:30", "throughput": 3312.66, "total_tokens": 2890016}
{"current_steps": 7050, "total_steps": 8260, "loss": 0.1787, "lr": 3.2029747739209247e-06, "epoch": 8.535108958837773, "percentage": 85.35, "elapsed_time": "0:14:32", "remaining_time": "0:02:29", "throughput": 3313.0, "total_tokens": 2891968}
{"current_steps": 7055, "total_steps": 8260, "loss": 0.1842, "lr": 3.177154239831223e-06, "epoch": 8.541162227602905, "percentage": 85.41, "elapsed_time": "0:14:33", "remaining_time": "0:02:29", "throughput": 3313.19, "total_tokens": 2893952}
{"current_steps": 7060, "total_steps": 8260, "loss": 0.1847, "lr": 3.1514311386937917e-06, "epoch": 8.54721549636804, "percentage": 85.47, "elapsed_time": "0:14:33", "remaining_time": "0:02:28", "throughput": 3313.45, "total_tokens": 2895904}
{"current_steps": 7065, "total_steps": 8260, "loss": 0.1667, "lr": 3.1258055853551487e-06, "epoch": 8.553268765133172, "percentage": 85.53, "elapsed_time": "0:14:34", "remaining_time": "0:02:27", "throughput": 3313.64, "total_tokens": 2897952}
{"current_steps": 7070, "total_steps": 8260, "loss": 0.2127, "lr": 3.1002776942262696e-06, "epoch": 8.559322033898304, "percentage": 85.59, "elapsed_time": "0:14:35", "remaining_time": "0:02:27", "throughput": 3314.0, "total_tokens": 2900064}
{"current_steps": 7075, "total_steps": 8260, "loss": 0.1938, "lr": 3.0748475792821197e-06, "epoch": 8.565375302663439, "percentage": 85.65, "elapsed_time": "0:14:35", "remaining_time": "0:02:26", "throughput": 3314.36, "total_tokens": 2902432}
{"current_steps": 7080, "total_steps": 8260, "loss": 0.1724, "lr": 3.0495153540611e-06, "epoch": 8.571428571428571, "percentage": 85.71, "elapsed_time": "0:14:36", "remaining_time": "0:02:26", "throughput": 3314.65, "total_tokens": 2904512}
{"current_steps": 7085, "total_steps": 8260, "loss": 0.1563, "lr": 3.024281131664569e-06, "epoch": 8.577481840193705, "percentage": 85.77, "elapsed_time": "0:14:36", "remaining_time": "0:02:25", "throughput": 3315.0, "total_tokens": 2906496}
{"current_steps": 7090, "total_steps": 8260, "loss": 0.193, "lr": 2.999145024756325e-06, "epoch": 8.583535108958838, "percentage": 85.84, "elapsed_time": "0:14:37", "remaining_time": "0:02:24", "throughput": 3315.27, "total_tokens": 2908544}
{"current_steps": 7095, "total_steps": 8260, "loss": 0.1634, "lr": 2.9741071455621245e-06, "epoch": 8.58958837772397, "percentage": 85.9, "elapsed_time": "0:14:37", "remaining_time": "0:02:24", "throughput": 3315.63, "total_tokens": 2910688}
{"current_steps": 7100, "total_steps": 8260, "loss": 0.1566, "lr": 2.9491676058691437e-06, "epoch": 8.595641646489105, "percentage": 85.96, "elapsed_time": "0:14:38", "remaining_time": "0:02:23", "throughput": 3315.97, "total_tokens": 2912832}
{"current_steps": 7105, "total_steps": 8260, "loss": 0.1833, "lr": 2.924326517025508e-06, "epoch": 8.601694915254237, "percentage": 86.02, "elapsed_time": "0:14:38", "remaining_time": "0:02:22", "throughput": 3316.34, "total_tokens": 2914816}
{"current_steps": 7110, "total_steps": 8260, "loss": 0.1527, "lr": 2.8995839899397915e-06, "epoch": 8.607748184019371, "percentage": 86.08, "elapsed_time": "0:14:39", "remaining_time": "0:02:22", "throughput": 3316.5, "total_tokens": 2916928}
{"current_steps": 7115, "total_steps": 8260, "loss": 0.1662, "lr": 2.8749401350805115e-06, "epoch": 8.613801452784504, "percentage": 86.14, "elapsed_time": "0:14:40", "remaining_time": "0:02:21", "throughput": 3316.7, "total_tokens": 2918912}
{"current_steps": 7120, "total_steps": 8260, "loss": 0.1864, "lr": 2.8503950624756415e-06, "epoch": 8.619854721549636, "percentage": 86.2, "elapsed_time": "0:14:40", "remaining_time": "0:02:20", "throughput": 3316.98, "total_tokens": 2920896}
{"current_steps": 7125, "total_steps": 8260, "loss": 0.1769, "lr": 2.825948881712123e-06, "epoch": 8.62590799031477, "percentage": 86.26, "elapsed_time": "0:14:41", "remaining_time": "0:02:20", "throughput": 3317.27, "total_tokens": 2922976}
{"current_steps": 7130, "total_steps": 8260, "loss": 0.1998, "lr": 2.801601701935369e-06, "epoch": 8.631961259079903, "percentage": 86.32, "elapsed_time": "0:14:41", "remaining_time": "0:02:19", "throughput": 3317.54, "total_tokens": 2925120}
{"current_steps": 7135, "total_steps": 8260, "loss": 0.2247, "lr": 2.777353631848789e-06, "epoch": 8.638014527845037, "percentage": 86.38, "elapsed_time": "0:14:42", "remaining_time": "0:02:19", "throughput": 3317.71, "total_tokens": 2927072}
{"current_steps": 7140, "total_steps": 8260, "loss": 0.155, "lr": 2.7532047797132867e-06, "epoch": 8.64406779661017, "percentage": 86.44, "elapsed_time": "0:14:42", "remaining_time": "0:02:18", "throughput": 3318.01, "total_tokens": 2929152}
{"current_steps": 7145, "total_steps": 8260, "loss": 0.1678, "lr": 2.7291552533467853e-06, "epoch": 8.650121065375302, "percentage": 86.5, "elapsed_time": "0:14:43", "remaining_time": "0:02:17", "throughput": 3318.29, "total_tokens": 2931136}
{"current_steps": 7150, "total_steps": 8260, "loss": 0.1452, "lr": 2.7052051601237473e-06, "epoch": 8.656174334140436, "percentage": 86.56, "elapsed_time": "0:14:43", "remaining_time": "0:02:17", "throughput": 3318.5, "total_tokens": 2933312}
{"current_steps": 7155, "total_steps": 8260, "loss": 0.1988, "lr": 2.681354606974698e-06, "epoch": 8.662227602905569, "percentage": 86.62, "elapsed_time": "0:14:44", "remaining_time": "0:02:16", "throughput": 3318.72, "total_tokens": 2935328}
{"current_steps": 7160, "total_steps": 8260, "loss": 0.1719, "lr": 2.6576037003857414e-06, "epoch": 8.668280871670703, "percentage": 86.68, "elapsed_time": "0:14:44", "remaining_time": "0:02:15", "throughput": 3318.9, "total_tokens": 2937216}
{"current_steps": 7165, "total_steps": 8260, "loss": 0.1898, "lr": 2.633952546398083e-06, "epoch": 8.674334140435835, "percentage": 86.74, "elapsed_time": "0:14:45", "remaining_time": "0:02:15", "throughput": 3319.15, "total_tokens": 2939168}
{"current_steps": 7170, "total_steps": 8260, "loss": 0.2114, "lr": 2.6104012506075692e-06, "epoch": 8.680387409200968, "percentage": 86.8, "elapsed_time": "0:14:46", "remaining_time": "0:02:14", "throughput": 3319.51, "total_tokens": 2941504}
{"current_steps": 7175, "total_steps": 8260, "loss": 0.1896, "lr": 2.5869499181641916e-06, "epoch": 8.686440677966102, "percentage": 86.86, "elapsed_time": "0:14:46", "remaining_time": "0:02:14", "throughput": 3319.81, "total_tokens": 2943520}
{"current_steps": 7180, "total_steps": 8260, "loss": 0.214, "lr": 2.5635986537716538e-06, "epoch": 8.692493946731235, "percentage": 86.92, "elapsed_time": "0:14:47", "remaining_time": "0:02:13", "throughput": 3320.07, "total_tokens": 2945568}
{"current_steps": 7185, "total_steps": 8260, "loss": 0.146, "lr": 2.540347561686873e-06, "epoch": 8.698547215496369, "percentage": 86.99, "elapsed_time": "0:14:47", "remaining_time": "0:02:12", "throughput": 3320.36, "total_tokens": 2947488}
{"current_steps": 7190, "total_steps": 8260, "loss": 0.2173, "lr": 2.5171967457195216e-06, "epoch": 8.704600484261501, "percentage": 87.05, "elapsed_time": "0:14:48", "remaining_time": "0:02:12", "throughput": 3320.58, "total_tokens": 2949504}
{"current_steps": 7195, "total_steps": 8260, "loss": 0.1762, "lr": 2.494146309231571e-06, "epoch": 8.710653753026634, "percentage": 87.11, "elapsed_time": "0:14:48", "remaining_time": "0:02:11", "throughput": 3320.76, "total_tokens": 2951552}
{"current_steps": 7200, "total_steps": 8260, "loss": 0.1749, "lr": 2.471196355136826e-06, "epoch": 8.716707021791768, "percentage": 87.17, "elapsed_time": "0:14:49", "remaining_time": "0:02:10", "throughput": 3321.05, "total_tokens": 2953632}
{"current_steps": 7205, "total_steps": 8260, "loss": 0.1647, "lr": 2.4483469859004625e-06, "epoch": 8.7227602905569, "percentage": 87.23, "elapsed_time": "0:14:49", "remaining_time": "0:02:10", "throughput": 3321.46, "total_tokens": 2955936}
{"current_steps": 7210, "total_steps": 8260, "loss": 0.1829, "lr": 2.425598303538576e-06, "epoch": 8.728813559322035, "percentage": 87.29, "elapsed_time": "0:14:50", "remaining_time": "0:02:09", "throughput": 3321.68, "total_tokens": 2958048}
{"current_steps": 7215, "total_steps": 8260, "loss": 0.1721, "lr": 2.402950409617727e-06, "epoch": 8.734866828087167, "percentage": 87.35, "elapsed_time": "0:14:51", "remaining_time": "0:02:09", "throughput": 3321.93, "total_tokens": 2960160}
{"current_steps": 7220, "total_steps": 8260, "loss": 0.2078, "lr": 2.380403405254475e-06, "epoch": 8.7409200968523, "percentage": 87.41, "elapsed_time": "0:14:51", "remaining_time": "0:02:08", "throughput": 3322.11, "total_tokens": 2962208}
{"current_steps": 7225, "total_steps": 8260, "loss": 0.2083, "lr": 2.35795739111494e-06, "epoch": 8.746973365617434, "percentage": 87.47, "elapsed_time": "0:14:52", "remaining_time": "0:02:07", "throughput": 3322.33, "total_tokens": 2964320}
{"current_steps": 7230, "total_steps": 8260, "loss": 0.1722, "lr": 2.335612467414344e-06, "epoch": 8.753026634382566, "percentage": 87.53, "elapsed_time": "0:14:52", "remaining_time": "0:02:07", "throughput": 3322.58, "total_tokens": 2966272}
{"current_steps": 7235, "total_steps": 8260, "loss": 0.2155, "lr": 2.313368733916585e-06, "epoch": 8.7590799031477, "percentage": 87.59, "elapsed_time": "0:14:53", "remaining_time": "0:02:06", "throughput": 3322.89, "total_tokens": 2968288}
{"current_steps": 7240, "total_steps": 8260, "loss": 0.1731, "lr": 2.291226289933751e-06, "epoch": 8.765133171912833, "percentage": 87.65, "elapsed_time": "0:14:53", "remaining_time": "0:02:05", "throughput": 3323.11, "total_tokens": 2970208}
{"current_steps": 7245, "total_steps": 8260, "loss": 0.163, "lr": 2.2691852343257157e-06, "epoch": 8.771186440677965, "percentage": 87.71, "elapsed_time": "0:14:54", "remaining_time": "0:02:05", "throughput": 3323.38, "total_tokens": 2972352}
{"current_steps": 7250, "total_steps": 8260, "loss": 0.1508, "lr": 2.2472456654996755e-06, "epoch": 8.7772397094431, "percentage": 87.77, "elapsed_time": "0:14:54", "remaining_time": "0:02:04", "throughput": 3323.6, "total_tokens": 2974368}
{"current_steps": 7255, "total_steps": 8260, "loss": 0.215, "lr": 2.2254076814097163e-06, "epoch": 8.783292978208232, "percentage": 87.83, "elapsed_time": "0:14:55", "remaining_time": "0:02:04", "throughput": 3323.83, "total_tokens": 2976288}
{"current_steps": 7260, "total_steps": 8260, "loss": 0.1914, "lr": 2.203671379556388e-06, "epoch": 8.789346246973366, "percentage": 87.89, "elapsed_time": "0:14:55", "remaining_time": "0:02:03", "throughput": 3324.08, "total_tokens": 2978240}
{"current_steps": 7265, "total_steps": 8260, "loss": 0.21, "lr": 2.1820368569862444e-06, "epoch": 8.795399515738499, "percentage": 87.95, "elapsed_time": "0:14:56", "remaining_time": "0:02:02", "throughput": 3324.3, "total_tokens": 2980256}
{"current_steps": 7270, "total_steps": 8260, "loss": 0.2023, "lr": 2.1605042102914227e-06, "epoch": 8.801452784503631, "percentage": 88.01, "elapsed_time": "0:14:57", "remaining_time": "0:02:02", "throughput": 3324.56, "total_tokens": 2982400}
{"current_steps": 7275, "total_steps": 8260, "loss": 0.1697, "lr": 2.1390735356092206e-06, "epoch": 8.807506053268765, "percentage": 88.08, "elapsed_time": "0:14:57", "remaining_time": "0:02:01", "throughput": 3324.78, "total_tokens": 2984416}
{"current_steps": 7280, "total_steps": 8260, "loss": 0.199, "lr": 2.1177449286216565e-06, "epoch": 8.813559322033898, "percentage": 88.14, "elapsed_time": "0:14:58", "remaining_time": "0:02:00", "throughput": 3325.06, "total_tokens": 2986496}
{"current_steps": 7285, "total_steps": 8260, "loss": 0.17, "lr": 2.0965184845550407e-06, "epoch": 8.819612590799032, "percentage": 88.2, "elapsed_time": "0:14:58", "remaining_time": "0:02:00", "throughput": 3325.28, "total_tokens": 2988512}
{"current_steps": 7290, "total_steps": 8260, "loss": 0.1847, "lr": 2.075394298179553e-06, "epoch": 8.825665859564165, "percentage": 88.26, "elapsed_time": "0:14:59", "remaining_time": "0:01:59", "throughput": 3325.61, "total_tokens": 2990560}
{"current_steps": 7295, "total_steps": 8260, "loss": 0.1887, "lr": 2.0543724638088347e-06, "epoch": 8.831719128329297, "percentage": 88.32, "elapsed_time": "0:14:59", "remaining_time": "0:01:59", "throughput": 3325.86, "total_tokens": 2992768}
{"current_steps": 7300, "total_steps": 8260, "loss": 0.1484, "lr": 2.0334530752995433e-06, "epoch": 8.837772397094431, "percentage": 88.38, "elapsed_time": "0:15:00", "remaining_time": "0:01:58", "throughput": 3326.07, "total_tokens": 2994784}
{"current_steps": 7305, "total_steps": 8260, "loss": 0.1869, "lr": 2.01263622605094e-06, "epoch": 8.843825665859564, "percentage": 88.44, "elapsed_time": "0:15:00", "remaining_time": "0:01:57", "throughput": 3326.3, "total_tokens": 2996896}
{"current_steps": 7310, "total_steps": 8260, "loss": 0.2197, "lr": 1.991922009004485e-06, "epoch": 8.849878934624698, "percentage": 88.5, "elapsed_time": "0:15:01", "remaining_time": "0:01:57", "throughput": 3326.57, "total_tokens": 2998976}
{"current_steps": 7315, "total_steps": 8260, "loss": 0.1753, "lr": 1.9713105166434042e-06, "epoch": 8.85593220338983, "percentage": 88.56, "elapsed_time": "0:15:02", "remaining_time": "0:01:56", "throughput": 3326.81, "total_tokens": 3001184}
{"current_steps": 7320, "total_steps": 8260, "loss": 0.1645, "lr": 1.950801840992303e-06, "epoch": 8.861985472154963, "percentage": 88.62, "elapsed_time": "0:15:02", "remaining_time": "0:01:55", "throughput": 3327.09, "total_tokens": 3003168}
{"current_steps": 7325, "total_steps": 8260, "loss": 0.1818, "lr": 1.930396073616725e-06, "epoch": 8.868038740920097, "percentage": 88.68, "elapsed_time": "0:15:03", "remaining_time": "0:01:55", "throughput": 3327.36, "total_tokens": 3005152}
{"current_steps": 7330, "total_steps": 8260, "loss": 0.1992, "lr": 1.9100933056227593e-06, "epoch": 8.87409200968523, "percentage": 88.74, "elapsed_time": "0:15:03", "remaining_time": "0:01:54", "throughput": 3327.69, "total_tokens": 3007200}
{"current_steps": 7335, "total_steps": 8260, "loss": 0.189, "lr": 1.8898936276566303e-06, "epoch": 8.880145278450364, "percentage": 88.8, "elapsed_time": "0:15:04", "remaining_time": "0:01:54", "throughput": 3327.97, "total_tokens": 3009280}
{"current_steps": 7340, "total_steps": 8260, "loss": 0.1871, "lr": 1.8697971299043048e-06, "epoch": 8.886198547215496, "percentage": 88.86, "elapsed_time": "0:15:04", "remaining_time": "0:01:53", "throughput": 3328.25, "total_tokens": 3011360}
{"current_steps": 7345, "total_steps": 8260, "loss": 0.2047, "lr": 1.8498039020910628e-06, "epoch": 8.892251815980629, "percentage": 88.92, "elapsed_time": "0:15:05", "remaining_time": "0:01:52", "throughput": 3328.49, "total_tokens": 3013568}
{"current_steps": 7350, "total_steps": 8260, "loss": 0.1814, "lr": 1.8299140334811226e-06, "epoch": 8.898305084745763, "percentage": 88.98, "elapsed_time": "0:15:05", "remaining_time": "0:01:52", "throughput": 3328.68, "total_tokens": 3015552}
{"current_steps": 7355, "total_steps": 8260, "loss": 0.1998, "lr": 1.8101276128772272e-06, "epoch": 8.904358353510895, "percentage": 89.04, "elapsed_time": "0:15:06", "remaining_time": "0:01:51", "throughput": 3328.96, "total_tokens": 3017536}
{"current_steps": 7360, "total_steps": 8260, "loss": 0.2008, "lr": 1.7904447286202607e-06, "epoch": 8.91041162227603, "percentage": 89.1, "elapsed_time": "0:15:07", "remaining_time": "0:01:50", "throughput": 3329.14, "total_tokens": 3019584}
{"current_steps": 7365, "total_steps": 8260, "loss": 0.1406, "lr": 1.7708654685888337e-06, "epoch": 8.916464891041162, "percentage": 89.16, "elapsed_time": "0:15:07", "remaining_time": "0:01:50", "throughput": 3329.39, "total_tokens": 3021728}
{"current_steps": 7370, "total_steps": 8260, "loss": 0.1584, "lr": 1.7513899201989148e-06, "epoch": 8.922518159806295, "percentage": 89.23, "elapsed_time": "0:15:08", "remaining_time": "0:01:49", "throughput": 3329.62, "total_tokens": 3023584}
{"current_steps": 7375, "total_steps": 8260, "loss": 0.1471, "lr": 1.7320181704034237e-06, "epoch": 8.928571428571429, "percentage": 89.29, "elapsed_time": "0:15:08", "remaining_time": "0:01:49", "throughput": 3329.85, "total_tokens": 3025600}
{"current_steps": 7380, "total_steps": 8260, "loss": 0.2265, "lr": 1.7127503056918542e-06, "epoch": 8.934624697336561, "percentage": 89.35, "elapsed_time": "0:15:09", "remaining_time": "0:01:48", "throughput": 3329.97, "total_tokens": 3027680}
{"current_steps": 7385, "total_steps": 8260, "loss": 0.1562, "lr": 1.6935864120898704e-06, "epoch": 8.940677966101696, "percentage": 89.41, "elapsed_time": "0:15:09", "remaining_time": "0:01:47", "throughput": 3330.33, "total_tokens": 3029856}
{"current_steps": 7390, "total_steps": 8260, "loss": 0.1808, "lr": 1.674526575158944e-06, "epoch": 8.946731234866828, "percentage": 89.47, "elapsed_time": "0:15:10", "remaining_time": "0:01:47", "throughput": 3330.6, "total_tokens": 3032096}
{"current_steps": 7395, "total_steps": 8260, "loss": 0.1983, "lr": 1.6555708799959547e-06, "epoch": 8.95278450363196, "percentage": 89.53, "elapsed_time": "0:15:10", "remaining_time": "0:01:46", "throughput": 3330.72, "total_tokens": 3034112}
{"current_steps": 7400, "total_steps": 8260, "loss": 0.1812, "lr": 1.6367194112328288e-06, "epoch": 8.958837772397095, "percentage": 89.59, "elapsed_time": "0:15:11", "remaining_time": "0:01:45", "throughput": 3330.98, "total_tokens": 3036096}
{"current_steps": 7405, "total_steps": 8260, "loss": 0.1632, "lr": 1.617972253036143e-06, "epoch": 8.964891041162227, "percentage": 89.65, "elapsed_time": "0:15:12", "remaining_time": "0:01:45", "throughput": 3331.23, "total_tokens": 3038080}
{"current_steps": 7410, "total_steps": 8260, "loss": 0.1714, "lr": 1.5993294891067573e-06, "epoch": 8.970944309927361, "percentage": 89.71, "elapsed_time": "0:15:12", "remaining_time": "0:01:44", "throughput": 3331.48, "total_tokens": 3040064}
{"current_steps": 7415, "total_steps": 8260, "loss": 0.1872, "lr": 1.580791202679438e-06, "epoch": 8.976997578692494, "percentage": 89.77, "elapsed_time": "0:15:13", "remaining_time": "0:01:44", "throughput": 3331.73, "total_tokens": 3042048}
{"current_steps": 7420, "total_steps": 8260, "loss": 0.1928, "lr": 1.562357476522497e-06, "epoch": 8.983050847457626, "percentage": 89.83, "elapsed_time": "0:15:13", "remaining_time": "0:01:43", "throughput": 3331.97, "total_tokens": 3044192}
{"current_steps": 7425, "total_steps": 8260, "loss": 0.2271, "lr": 1.5440283929374023e-06, "epoch": 8.98910411622276, "percentage": 89.89, "elapsed_time": "0:15:14", "remaining_time": "0:01:42", "throughput": 3332.24, "total_tokens": 3046112}
{"current_steps": 7430, "total_steps": 8260, "loss": 0.1904, "lr": 1.5258040337584322e-06, "epoch": 8.995157384987893, "percentage": 89.95, "elapsed_time": "0:15:14", "remaining_time": "0:01:42", "throughput": 3332.5, "total_tokens": 3048000}
{"current_steps": 7434, "total_steps": 8260, "eval_loss": 0.18556244671344757, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:15:19", "remaining_time": "0:01:42", "throughput": 3315.43, "total_tokens": 3049392}
{"current_steps": 7435, "total_steps": 8260, "loss": 0.1512, "lr": 1.5076844803522922e-06, "epoch": 9.001210653753027, "percentage": 90.01, "elapsed_time": "0:15:21", "remaining_time": "0:01:42", "throughput": 3310.74, "total_tokens": 3049808}
{"current_steps": 7440, "total_steps": 8260, "loss": 0.1469, "lr": 1.4896698136177612e-06, "epoch": 9.00726392251816, "percentage": 90.07, "elapsed_time": "0:15:21", "remaining_time": "0:01:41", "throughput": 3310.93, "total_tokens": 3051792}
{"current_steps": 7445, "total_steps": 8260, "loss": 0.1906, "lr": 1.4717601139853266e-06, "epoch": 9.013317191283292, "percentage": 90.13, "elapsed_time": "0:15:22", "remaining_time": "0:01:40", "throughput": 3311.11, "total_tokens": 3053776}
{"current_steps": 7450, "total_steps": 8260, "loss": 0.1892, "lr": 1.4539554614168339e-06, "epoch": 9.019370460048426, "percentage": 90.19, "elapsed_time": "0:15:22", "remaining_time": "0:01:40", "throughput": 3311.3, "total_tokens": 3055856}
{"current_steps": 7455, "total_steps": 8260, "loss": 0.1901, "lr": 1.4362559354051092e-06, "epoch": 9.025423728813559, "percentage": 90.25, "elapsed_time": "0:15:23", "remaining_time": "0:01:39", "throughput": 3311.56, "total_tokens": 3057840}
{"current_steps": 7460, "total_steps": 8260, "loss": 0.2102, "lr": 1.4186616149736349e-06, "epoch": 9.031476997578693, "percentage": 90.31, "elapsed_time": "0:15:23", "remaining_time": "0:01:39", "throughput": 3311.82, "total_tokens": 3059920}
{"current_steps": 7465, "total_steps": 8260, "loss": 0.1801, "lr": 1.401172578676166e-06, "epoch": 9.037530266343826, "percentage": 90.38, "elapsed_time": "0:15:24", "remaining_time": "0:01:38", "throughput": 3311.98, "total_tokens": 3061872}
{"current_steps": 7470, "total_steps": 8260, "loss": 0.168, "lr": 1.383788904596403e-06, "epoch": 9.043583535108958, "percentage": 90.44, "elapsed_time": "0:15:25", "remaining_time": "0:01:37", "throughput": 3312.19, "total_tokens": 3063888}
{"current_steps": 7475, "total_steps": 8260, "loss": 0.1817, "lr": 1.3665106703476178e-06, "epoch": 9.049636803874092, "percentage": 90.5, "elapsed_time": "0:15:25", "remaining_time": "0:01:37", "throughput": 3312.54, "total_tokens": 3065872}
{"current_steps": 7480, "total_steps": 8260, "loss": 0.1491, "lr": 1.349337953072341e-06, "epoch": 9.055690072639225, "percentage": 90.56, "elapsed_time": "0:15:26", "remaining_time": "0:01:36", "throughput": 3312.76, "total_tokens": 3067888}
{"current_steps": 7485, "total_steps": 8260, "loss": 0.1919, "lr": 1.3322708294419923e-06, "epoch": 9.061743341404359, "percentage": 90.62, "elapsed_time": "0:15:26", "remaining_time": "0:01:35", "throughput": 3313.04, "total_tokens": 3069968}
{"current_steps": 7490, "total_steps": 8260, "loss": 0.1716, "lr": 1.3153093756565426e-06, "epoch": 9.067796610169491, "percentage": 90.68, "elapsed_time": "0:15:27", "remaining_time": "0:01:35", "throughput": 3313.28, "total_tokens": 3072176}
{"current_steps": 7495, "total_steps": 8260, "loss": 0.2037, "lr": 1.298453667444169e-06, "epoch": 9.073849878934624, "percentage": 90.74, "elapsed_time": "0:15:27", "remaining_time": "0:01:34", "throughput": 3313.59, "total_tokens": 3074288}
{"current_steps": 7500, "total_steps": 8260, "loss": 0.1998, "lr": 1.281703780060947e-06, "epoch": 9.079903147699758, "percentage": 90.8, "elapsed_time": "0:15:28", "remaining_time": "0:01:34", "throughput": 3313.81, "total_tokens": 3076304}
{"current_steps": 7505, "total_steps": 8260, "loss": 0.1785, "lr": 1.265059788290468e-06, "epoch": 9.08595641646489, "percentage": 90.86, "elapsed_time": "0:15:28", "remaining_time": "0:01:33", "throughput": 3313.96, "total_tokens": 3078320}
{"current_steps": 7510, "total_steps": 8260, "loss": 0.1901, "lr": 1.2485217664435418e-06, "epoch": 9.092009685230025, "percentage": 90.92, "elapsed_time": "0:15:29", "remaining_time": "0:01:32", "throughput": 3314.29, "total_tokens": 3080464}
{"current_steps": 7515, "total_steps": 8260, "loss": 0.1932, "lr": 1.232089788357843e-06, "epoch": 9.098062953995157, "percentage": 90.98, "elapsed_time": "0:15:30", "remaining_time": "0:01:32", "throughput": 3314.59, "total_tokens": 3082672}
{"current_steps": 7520, "total_steps": 8260, "loss": 0.2038, "lr": 1.2157639273975979e-06, "epoch": 9.104116222760291, "percentage": 91.04, "elapsed_time": "0:15:30", "remaining_time": "0:01:31", "throughput": 3314.91, "total_tokens": 3084720}
{"current_steps": 7525, "total_steps": 8260, "loss": 0.1839, "lr": 1.19954425645325e-06, "epoch": 9.110169491525424, "percentage": 91.1, "elapsed_time": "0:15:31", "remaining_time": "0:01:30", "throughput": 3315.23, "total_tokens": 3086864}
{"current_steps": 7530, "total_steps": 8260, "loss": 0.1392, "lr": 1.183430847941125e-06, "epoch": 9.116222760290556, "percentage": 91.16, "elapsed_time": "0:15:31", "remaining_time": "0:01:30", "throughput": 3315.52, "total_tokens": 3088880}
{"current_steps": 7535, "total_steps": 8260, "loss": 0.1712, "lr": 1.1674237738031223e-06, "epoch": 9.12227602905569, "percentage": 91.22, "elapsed_time": "0:15:32", "remaining_time": "0:01:29", "throughput": 3315.72, "total_tokens": 3090960}
{"current_steps": 7540, "total_steps": 8260, "loss": 0.1517, "lr": 1.1515231055063914e-06, "epoch": 9.128329297820823, "percentage": 91.28, "elapsed_time": "0:15:32", "remaining_time": "0:01:29", "throughput": 3316.0, "total_tokens": 3093040}
{"current_steps": 7545, "total_steps": 8260, "loss": 0.1793, "lr": 1.135728914043005e-06, "epoch": 9.134382566585957, "percentage": 91.34, "elapsed_time": "0:15:33", "remaining_time": "0:01:28", "throughput": 3316.18, "total_tokens": 3095024}
{"current_steps": 7550, "total_steps": 8260, "loss": 0.1732, "lr": 1.120041269929642e-06, "epoch": 9.14043583535109, "percentage": 91.4, "elapsed_time": "0:15:33", "remaining_time": "0:01:27", "throughput": 3316.46, "total_tokens": 3097008}
{"current_steps": 7555, "total_steps": 8260, "loss": 0.1792, "lr": 1.1044602432072836e-06, "epoch": 9.146489104116222, "percentage": 91.46, "elapsed_time": "0:15:34", "remaining_time": "0:01:27", "throughput": 3316.82, "total_tokens": 3099184}
{"current_steps": 7560, "total_steps": 8260, "loss": 0.2302, "lr": 1.0889859034408922e-06, "epoch": 9.152542372881356, "percentage": 91.53, "elapsed_time": "0:15:34", "remaining_time": "0:01:26", "throughput": 3317.07, "total_tokens": 3101328}
{"current_steps": 7565, "total_steps": 8260, "loss": 0.184, "lr": 1.0736183197191024e-06, "epoch": 9.158595641646489, "percentage": 91.59, "elapsed_time": "0:15:35", "remaining_time": "0:01:25", "throughput": 3317.27, "total_tokens": 3103408}
{"current_steps": 7570, "total_steps": 8260, "loss": 0.2284, "lr": 1.0583575606539108e-06, "epoch": 9.164648910411623, "percentage": 91.65, "elapsed_time": "0:15:36", "remaining_time": "0:01:25", "throughput": 3317.59, "total_tokens": 3105616}
{"current_steps": 7575, "total_steps": 8260, "loss": 0.1749, "lr": 1.0432036943803708e-06, "epoch": 9.170702179176756, "percentage": 91.71, "elapsed_time": "0:15:36", "remaining_time": "0:01:24", "throughput": 3317.78, "total_tokens": 3107536}
{"current_steps": 7580, "total_steps": 8260, "loss": 0.1974, "lr": 1.0281567885562947e-06, "epoch": 9.176755447941888, "percentage": 91.77, "elapsed_time": "0:15:37", "remaining_time": "0:01:24", "throughput": 3318.04, "total_tokens": 3109648}
{"current_steps": 7585, "total_steps": 8260, "loss": 0.1903, "lr": 1.0132169103619444e-06, "epoch": 9.182808716707022, "percentage": 91.83, "elapsed_time": "0:15:37", "remaining_time": "0:01:23", "throughput": 3318.36, "total_tokens": 3111504}
{"current_steps": 7590, "total_steps": 8260, "loss": 0.1906, "lr": 9.98384126499735e-07, "epoch": 9.188861985472155, "percentage": 91.89, "elapsed_time": "0:15:38", "remaining_time": "0:01:22", "throughput": 3318.57, "total_tokens": 3113424}
{"current_steps": 7595, "total_steps": 8260, "loss": 0.1762, "lr": 9.836585031939154e-07, "epoch": 9.194915254237289, "percentage": 91.95, "elapsed_time": "0:15:38", "remaining_time": "0:01:22", "throughput": 3318.85, "total_tokens": 3115504}
{"current_steps": 7600, "total_steps": 8260, "loss": 0.2075, "lr": 9.690401061903249e-07, "epoch": 9.200968523002421, "percentage": 92.01, "elapsed_time": "0:15:39", "remaining_time": "0:01:21", "throughput": 3319.09, "total_tokens": 3117488}
{"current_steps": 7605, "total_steps": 8260, "loss": 0.1968, "lr": 9.545290007560437e-07, "epoch": 9.207021791767554, "percentage": 92.07, "elapsed_time": "0:15:39", "remaining_time": "0:01:20", "throughput": 3319.27, "total_tokens": 3119376}
{"current_steps": 7610, "total_steps": 8260, "loss": 0.1735, "lr": 9.401252516791304e-07, "epoch": 9.213075060532688, "percentage": 92.13, "elapsed_time": "0:15:40", "remaining_time": "0:01:20", "throughput": 3319.51, "total_tokens": 3121424}
{"current_steps": 7615, "total_steps": 8260, "loss": 0.1712, "lr": 9.258289232683321e-07, "epoch": 9.21912832929782, "percentage": 92.19, "elapsed_time": "0:15:40", "remaining_time": "0:01:19", "throughput": 3319.64, "total_tokens": 3123504}
{"current_steps": 7620, "total_steps": 8260, "loss": 0.1632, "lr": 9.11640079352788e-07, "epoch": 9.225181598062955, "percentage": 92.25, "elapsed_time": "0:15:41", "remaining_time": "0:01:19", "throughput": 3319.87, "total_tokens": 3125712}
{"current_steps": 7625, "total_steps": 8260, "loss": 0.2028, "lr": 8.975587832817545e-07, "epoch": 9.231234866828087, "percentage": 92.31, "elapsed_time": "0:15:42", "remaining_time": "0:01:18", "throughput": 3320.11, "total_tokens": 3127824}
{"current_steps": 7630, "total_steps": 8260, "loss": 0.2067, "lr": 8.835850979243055e-07, "epoch": 9.23728813559322, "percentage": 92.37, "elapsed_time": "0:15:42", "remaining_time": "0:01:17", "throughput": 3320.33, "total_tokens": 3129936}
{"current_steps": 7635, "total_steps": 8260, "loss": 0.1763, "lr": 8.697190856690685e-07, "epoch": 9.243341404358354, "percentage": 92.43, "elapsed_time": "0:15:43", "remaining_time": "0:01:17", "throughput": 3320.57, "total_tokens": 3131984}
{"current_steps": 7640, "total_steps": 8260, "loss": 0.2028, "lr": 8.559608084239474e-07, "epoch": 9.249394673123486, "percentage": 92.49, "elapsed_time": "0:15:43", "remaining_time": "0:01:16", "throughput": 3320.85, "total_tokens": 3134064}
{"current_steps": 7645, "total_steps": 8260, "loss": 0.1538, "lr": 8.423103276158306e-07, "epoch": 9.25544794188862, "percentage": 92.55, "elapsed_time": "0:15:44", "remaining_time": "0:01:15", "throughput": 3321.08, "total_tokens": 3136176}
{"current_steps": 7650, "total_steps": 8260, "loss": 0.1805, "lr": 8.287677041903308e-07, "epoch": 9.261501210653753, "percentage": 92.62, "elapsed_time": "0:15:44", "remaining_time": "0:01:15", "throughput": 3321.29, "total_tokens": 3138288}
{"current_steps": 7655, "total_steps": 8260, "loss": 0.1657, "lr": 8.15332998611501e-07, "epoch": 9.267554479418886, "percentage": 92.68, "elapsed_time": "0:15:45", "remaining_time": "0:01:14", "throughput": 3321.54, "total_tokens": 3140272}
{"current_steps": 7660, "total_steps": 8260, "loss": 0.1823, "lr": 8.020062708615745e-07, "epoch": 9.27360774818402, "percentage": 92.74, "elapsed_time": "0:15:46", "remaining_time": "0:01:14", "throughput": 3321.74, "total_tokens": 3142448}
{"current_steps": 7665, "total_steps": 8260, "loss": 0.1787, "lr": 7.887875804406946e-07, "epoch": 9.279661016949152, "percentage": 92.8, "elapsed_time": "0:15:46", "remaining_time": "0:01:13", "throughput": 3321.87, "total_tokens": 3144528}
{"current_steps": 7670, "total_steps": 8260, "loss": 0.1731, "lr": 7.756769863666524e-07, "epoch": 9.285714285714286, "percentage": 92.86, "elapsed_time": "0:15:47", "remaining_time": "0:01:12", "throughput": 3322.05, "total_tokens": 3146512}
{"current_steps": 7675, "total_steps": 8260, "loss": 0.1674, "lr": 7.626745471746022e-07, "epoch": 9.291767554479419, "percentage": 92.92, "elapsed_time": "0:15:47", "remaining_time": "0:01:12", "throughput": 3322.3, "total_tokens": 3148560}
{"current_steps": 7680, "total_steps": 8260, "loss": 0.2072, "lr": 7.497803209168347e-07, "epoch": 9.297820823244551, "percentage": 92.98, "elapsed_time": "0:15:48", "remaining_time": "0:01:11", "throughput": 3322.57, "total_tokens": 3150640}
{"current_steps": 7685, "total_steps": 8260, "loss": 0.1688, "lr": 7.369943651624938e-07, "epoch": 9.303874092009686, "percentage": 93.04, "elapsed_time": "0:15:48", "remaining_time": "0:01:10", "throughput": 3322.8, "total_tokens": 3152688}
{"current_steps": 7690, "total_steps": 8260, "loss": 0.1976, "lr": 7.243167369973242e-07, "epoch": 9.309927360774818, "percentage": 93.1, "elapsed_time": "0:15:49", "remaining_time": "0:01:10", "throughput": 3323.05, "total_tokens": 3154672}
{"current_steps": 7695, "total_steps": 8260, "loss": 0.1943, "lr": 7.117474930234124e-07, "epoch": 9.315980629539952, "percentage": 93.16, "elapsed_time": "0:15:49", "remaining_time": "0:01:09", "throughput": 3323.31, "total_tokens": 3156656}
{"current_steps": 7700, "total_steps": 8260, "loss": 0.2079, "lr": 6.992866893589578e-07, "epoch": 9.322033898305085, "percentage": 93.22, "elapsed_time": "0:15:50", "remaining_time": "0:01:09", "throughput": 3323.55, "total_tokens": 3158640}
{"current_steps": 7705, "total_steps": 8260, "loss": 0.1795, "lr": 6.869343816379825e-07, "epoch": 9.328087167070217, "percentage": 93.28, "elapsed_time": "0:15:50", "remaining_time": "0:01:08", "throughput": 3323.82, "total_tokens": 3160624}
{"current_steps": 7710, "total_steps": 8260, "loss": 0.1716, "lr": 6.74690625010116e-07, "epoch": 9.334140435835351, "percentage": 93.34, "elapsed_time": "0:15:51", "remaining_time": "0:01:07", "throughput": 3324.08, "total_tokens": 3162608}
{"current_steps": 7715, "total_steps": 8260, "loss": 0.183, "lr": 6.625554741403333e-07, "epoch": 9.340193704600484, "percentage": 93.4, "elapsed_time": "0:15:51", "remaining_time": "0:01:07", "throughput": 3324.38, "total_tokens": 3164560}
{"current_steps": 7720, "total_steps": 8260, "loss": 0.183, "lr": 6.505289832087231e-07, "epoch": 9.346246973365618, "percentage": 93.46, "elapsed_time": "0:15:52", "remaining_time": "0:01:06", "throughput": 3324.65, "total_tokens": 3166768}
{"current_steps": 7725, "total_steps": 8260, "loss": 0.1818, "lr": 6.386112059102251e-07, "epoch": 9.35230024213075, "percentage": 93.52, "elapsed_time": "0:15:53", "remaining_time": "0:01:06", "throughput": 3324.83, "total_tokens": 3168912}
{"current_steps": 7730, "total_steps": 8260, "loss": 0.2002, "lr": 6.268021954544096e-07, "epoch": 9.358353510895883, "percentage": 93.58, "elapsed_time": "0:15:53", "remaining_time": "0:01:05", "throughput": 3325.14, "total_tokens": 3170800}
{"current_steps": 7735, "total_steps": 8260, "loss": 0.1946, "lr": 6.15102004565235e-07, "epoch": 9.364406779661017, "percentage": 93.64, "elapsed_time": "0:15:54", "remaining_time": "0:01:04", "throughput": 3325.41, "total_tokens": 3172784}
{"current_steps": 7740, "total_steps": 8260, "loss": 0.1584, "lr": 6.035106854808014e-07, "epoch": 9.37046004842615, "percentage": 93.7, "elapsed_time": "0:15:54", "remaining_time": "0:01:04", "throughput": 3325.65, "total_tokens": 3174928}
{"current_steps": 7745, "total_steps": 8260, "loss": 0.1855, "lr": 5.920282899531421e-07, "epoch": 9.376513317191284, "percentage": 93.77, "elapsed_time": "0:15:55", "remaining_time": "0:01:03", "throughput": 3325.81, "total_tokens": 3176976}
{"current_steps": 7750, "total_steps": 8260, "loss": 0.1921, "lr": 5.806548692479624e-07, "epoch": 9.382566585956416, "percentage": 93.83, "elapsed_time": "0:15:55", "remaining_time": "0:01:02", "throughput": 3325.79, "total_tokens": 3178896}
{"current_steps": 7755, "total_steps": 8260, "loss": 0.1779, "lr": 5.693904741444267e-07, "epoch": 9.388619854721549, "percentage": 93.89, "elapsed_time": "0:15:56", "remaining_time": "0:01:02", "throughput": 3326.09, "total_tokens": 3180848}
{"current_steps": 7760, "total_steps": 8260, "loss": 0.1647, "lr": 5.58235154934944e-07, "epoch": 9.394673123486683, "percentage": 93.95, "elapsed_time": "0:15:56", "remaining_time": "0:01:01", "throughput": 3326.3, "total_tokens": 3182704}
{"current_steps": 7765, "total_steps": 8260, "loss": 0.1371, "lr": 5.471889614249104e-07, "epoch": 9.400726392251816, "percentage": 94.01, "elapsed_time": "0:15:57", "remaining_time": "0:01:01", "throughput": 3326.52, "total_tokens": 3184848}
{"current_steps": 7770, "total_steps": 8260, "loss": 0.1927, "lr": 5.362519429325225e-07, "epoch": 9.40677966101695, "percentage": 94.07, "elapsed_time": "0:15:57", "remaining_time": "0:01:00", "throughput": 3326.7, "total_tokens": 3186832}
{"current_steps": 7775, "total_steps": 8260, "loss": 0.1596, "lr": 5.254241482885253e-07, "epoch": 9.412832929782082, "percentage": 94.13, "elapsed_time": "0:15:58", "remaining_time": "0:00:59", "throughput": 3327.03, "total_tokens": 3188912}
{"current_steps": 7780, "total_steps": 8260, "loss": 0.1648, "lr": 5.147056258360289e-07, "epoch": 9.418886198547215, "percentage": 94.19, "elapsed_time": "0:15:59", "remaining_time": "0:00:59", "throughput": 3327.27, "total_tokens": 3191152}
{"current_steps": 7785, "total_steps": 8260, "loss": 0.1868, "lr": 5.040964234302559e-07, "epoch": 9.424939467312349, "percentage": 94.25, "elapsed_time": "0:15:59", "remaining_time": "0:00:58", "throughput": 3327.45, "total_tokens": 3193232}
{"current_steps": 7790, "total_steps": 8260, "loss": 0.1826, "lr": 4.935965884383525e-07, "epoch": 9.430992736077481, "percentage": 94.31, "elapsed_time": "0:16:00", "remaining_time": "0:00:57", "throughput": 3327.7, "total_tokens": 3195312}
{"current_steps": 7795, "total_steps": 8260, "loss": 0.1705, "lr": 4.832061677391697e-07, "epoch": 9.437046004842616, "percentage": 94.37, "elapsed_time": "0:16:00", "remaining_time": "0:00:57", "throughput": 3327.98, "total_tokens": 3197328}
{"current_steps": 7800, "total_steps": 8260, "loss": 0.2088, "lr": 4.729252077230517e-07, "epoch": 9.443099273607748, "percentage": 94.43, "elapsed_time": "0:16:01", "remaining_time": "0:00:56", "throughput": 3328.27, "total_tokens": 3199280}
{"current_steps": 7805, "total_steps": 8260, "loss": 0.1938, "lr": 4.6275375429163656e-07, "epoch": 9.44915254237288, "percentage": 94.49, "elapsed_time": "0:16:01", "remaining_time": "0:00:56", "throughput": 3328.51, "total_tokens": 3201328}
{"current_steps": 7810, "total_steps": 8260, "loss": 0.1718, "lr": 4.526918528576396e-07, "epoch": 9.455205811138015, "percentage": 94.55, "elapsed_time": "0:16:02", "remaining_time": "0:00:55", "throughput": 3328.71, "total_tokens": 3203344}
{"current_steps": 7815, "total_steps": 8260, "loss": 0.1913, "lr": 4.427395483446617e-07, "epoch": 9.461259079903147, "percentage": 94.61, "elapsed_time": "0:16:02", "remaining_time": "0:00:54", "throughput": 3328.95, "total_tokens": 3205488}
{"current_steps": 7820, "total_steps": 8260, "loss": 0.2076, "lr": 4.328968851869758e-07, "epoch": 9.467312348668282, "percentage": 94.67, "elapsed_time": "0:16:03", "remaining_time": "0:00:54", "throughput": 3329.15, "total_tokens": 3207504}
{"current_steps": 7825, "total_steps": 8260, "loss": 0.1991, "lr": 4.231639073293492e-07, "epoch": 9.473365617433414, "percentage": 94.73, "elapsed_time": "0:16:04", "remaining_time": "0:00:53", "throughput": 3329.37, "total_tokens": 3209712}
{"current_steps": 7830, "total_steps": 8260, "loss": 0.1672, "lr": 4.13540658226827e-07, "epoch": 9.479418886198546, "percentage": 94.79, "elapsed_time": "0:16:04", "remaining_time": "0:00:52", "throughput": 3329.57, "total_tokens": 3211728}
{"current_steps": 7835, "total_steps": 8260, "loss": 0.1685, "lr": 4.040271808445406e-07, "epoch": 9.48547215496368, "percentage": 94.85, "elapsed_time": "0:16:05", "remaining_time": "0:00:52", "throughput": 3329.73, "total_tokens": 3213616}
{"current_steps": 7840, "total_steps": 8260, "loss": 0.1574, "lr": 3.94623517657533e-07, "epoch": 9.491525423728813, "percentage": 94.92, "elapsed_time": "0:16:05", "remaining_time": "0:00:51", "throughput": 3330.0, "total_tokens": 3215536}
{"current_steps": 7845, "total_steps": 8260, "loss": 0.1687, "lr": 3.8532971065055045e-07, "epoch": 9.497578692493947, "percentage": 94.98, "elapsed_time": "0:16:06", "remaining_time": "0:00:51", "throughput": 3330.27, "total_tokens": 3217552}
{"current_steps": 7847, "total_steps": 8260, "eval_loss": 0.18610365688800812, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:16:11", "remaining_time": "0:00:51", "throughput": 3314.42, "total_tokens": 3218352}
{"current_steps": 7850, "total_steps": 8260, "loss": 0.1552, "lr": 3.761458013178648e-07, "epoch": 9.50363196125908, "percentage": 95.04, "elapsed_time": "0:16:12", "remaining_time": "0:00:50", "throughput": 3311.46, "total_tokens": 3219664}
{"current_steps": 7855, "total_steps": 8260, "loss": 0.1898, "lr": 3.670718306630766e-07, "epoch": 9.509685230024212, "percentage": 95.1, "elapsed_time": "0:16:12", "remaining_time": "0:00:50", "throughput": 3311.69, "total_tokens": 3221648}
{"current_steps": 7860, "total_steps": 8260, "loss": 0.1619, "lr": 3.5810783919895673e-07, "epoch": 9.515738498789347, "percentage": 95.16, "elapsed_time": "0:16:13", "remaining_time": "0:00:49", "throughput": 3311.94, "total_tokens": 3223632}
{"current_steps": 7865, "total_steps": 8260, "loss": 0.1771, "lr": 3.4925386694723284e-07, "epoch": 9.521791767554479, "percentage": 95.22, "elapsed_time": "0:16:13", "remaining_time": "0:00:48", "throughput": 3312.1, "total_tokens": 3225616}
{"current_steps": 7870, "total_steps": 8260, "loss": 0.1762, "lr": 3.405099534384393e-07, "epoch": 9.527845036319613, "percentage": 95.28, "elapsed_time": "0:16:14", "remaining_time": "0:00:48", "throughput": 3312.38, "total_tokens": 3227728}
{"current_steps": 7875, "total_steps": 8260, "loss": 0.1882, "lr": 3.31876137711723e-07, "epoch": 9.533898305084746, "percentage": 95.34, "elapsed_time": "0:16:14", "remaining_time": "0:00:47", "throughput": 3312.58, "total_tokens": 3229744}
{"current_steps": 7880, "total_steps": 8260, "loss": 0.2062, "lr": 3.233524583146741e-07, "epoch": 9.539951573849878, "percentage": 95.4, "elapsed_time": "0:16:15", "remaining_time": "0:00:47", "throughput": 3312.86, "total_tokens": 3231664}
{"current_steps": 7885, "total_steps": 8260, "loss": 0.1716, "lr": 3.149389533031566e-07, "epoch": 9.546004842615012, "percentage": 95.46, "elapsed_time": "0:16:16", "remaining_time": "0:00:46", "throughput": 3313.1, "total_tokens": 3233712}
{"current_steps": 7890, "total_steps": 8260, "loss": 0.162, "lr": 3.066356602411419e-07, "epoch": 9.552058111380145, "percentage": 95.52, "elapsed_time": "0:16:16", "remaining_time": "0:00:45", "throughput": 3313.37, "total_tokens": 3235728}
{"current_steps": 7895, "total_steps": 8260, "loss": 0.216, "lr": 2.984426162005227e-07, "epoch": 9.558111380145279, "percentage": 95.58, "elapsed_time": "0:16:17", "remaining_time": "0:00:45", "throughput": 3313.64, "total_tokens": 3237712}
{"current_steps": 7900, "total_steps": 8260, "loss": 0.1872, "lr": 2.903598577609717e-07, "epoch": 9.564164648910412, "percentage": 95.64, "elapsed_time": "0:16:17", "remaining_time": "0:00:44", "throughput": 3313.8, "total_tokens": 3239664}
{"current_steps": 7905, "total_steps": 8260, "loss": 0.1983, "lr": 2.823874210097638e-07, "epoch": 9.570217917675544, "percentage": 95.7, "elapsed_time": "0:16:18", "remaining_time": "0:00:43", "throughput": 3314.01, "total_tokens": 3241776}
{"current_steps": 7910, "total_steps": 8260, "loss": 0.1846, "lr": 2.745253415416177e-07, "epoch": 9.576271186440678, "percentage": 95.76, "elapsed_time": "0:16:18", "remaining_time": "0:00:43", "throughput": 3314.32, "total_tokens": 3243824}
{"current_steps": 7915, "total_steps": 8260, "loss": 0.1802, "lr": 2.6677365445852976e-07, "epoch": 9.58232445520581, "percentage": 95.82, "elapsed_time": "0:16:19", "remaining_time": "0:00:42", "throughput": 3314.52, "total_tokens": 3246000}
{"current_steps": 7920, "total_steps": 8260, "loss": 0.1698, "lr": 2.5913239436964054e-07, "epoch": 9.588377723970945, "percentage": 95.88, "elapsed_time": "0:16:19", "remaining_time": "0:00:42", "throughput": 3314.82, "total_tokens": 3248112}
{"current_steps": 7925, "total_steps": 8260, "loss": 0.195, "lr": 2.5160159539105443e-07, "epoch": 9.594430992736077, "percentage": 95.94, "elapsed_time": "0:16:20", "remaining_time": "0:00:41", "throughput": 3315.11, "total_tokens": 3250256}
{"current_steps": 7930, "total_steps": 8260, "loss": 0.1914, "lr": 2.441812911456981e-07, "epoch": 9.600484261501212, "percentage": 96.0, "elapsed_time": "0:16:20", "remaining_time": "0:00:40", "throughput": 3315.41, "total_tokens": 3252368}
{"current_steps": 7935, "total_steps": 8260, "loss": 0.1968, "lr": 2.3687151476317337e-07, "epoch": 9.606537530266344, "percentage": 96.07, "elapsed_time": "0:16:21", "remaining_time": "0:00:40", "throughput": 3315.64, "total_tokens": 3254416}
{"current_steps": 7940, "total_steps": 8260, "loss": 0.221, "lr": 2.2967229887960186e-07, "epoch": 9.612590799031477, "percentage": 96.13, "elapsed_time": "0:16:22", "remaining_time": "0:00:39", "throughput": 3315.76, "total_tokens": 3256496}
{"current_steps": 7945, "total_steps": 8260, "loss": 0.1964, "lr": 2.2258367563748884e-07, "epoch": 9.61864406779661, "percentage": 96.19, "elapsed_time": "0:16:22", "remaining_time": "0:00:38", "throughput": 3316.02, "total_tokens": 3258576}
{"current_steps": 7950, "total_steps": 8260, "loss": 0.1939, "lr": 2.1560567668556797e-07, "epoch": 9.624697336561743, "percentage": 96.25, "elapsed_time": "0:16:23", "remaining_time": "0:00:38", "throughput": 3316.2, "total_tokens": 3260496}
{"current_steps": 7955, "total_steps": 8260, "loss": 0.1997, "lr": 2.0873833317866798e-07, "epoch": 9.630750605326877, "percentage": 96.31, "elapsed_time": "0:16:23", "remaining_time": "0:00:37", "throughput": 3316.48, "total_tokens": 3262608}
{"current_steps": 7960, "total_steps": 8260, "loss": 0.212, "lr": 2.019816757775711e-07, "epoch": 9.63680387409201, "percentage": 96.37, "elapsed_time": "0:16:24", "remaining_time": "0:00:37", "throughput": 3316.72, "total_tokens": 3264592}
{"current_steps": 7965, "total_steps": 8260, "loss": 0.1611, "lr": 1.9533573464888543e-07, "epoch": 9.642857142857142, "percentage": 96.43, "elapsed_time": "0:16:24", "remaining_time": "0:00:36", "throughput": 3316.94, "total_tokens": 3266704}
{"current_steps": 7970, "total_steps": 8260, "loss": 0.193, "lr": 1.8880053946488675e-07, "epoch": 9.648910411622277, "percentage": 96.49, "elapsed_time": "0:16:25", "remaining_time": "0:00:35", "throughput": 3317.15, "total_tokens": 3268816}
{"current_steps": 7975, "total_steps": 8260, "loss": 0.2111, "lr": 1.8237611940341291e-07, "epoch": 9.654963680387409, "percentage": 96.55, "elapsed_time": "0:16:25", "remaining_time": "0:00:35", "throughput": 3317.38, "total_tokens": 3270864}
{"current_steps": 7980, "total_steps": 8260, "loss": 0.1854, "lr": 1.760625031477142e-07, "epoch": 9.661016949152543, "percentage": 96.61, "elapsed_time": "0:16:26", "remaining_time": "0:00:34", "throughput": 3317.57, "total_tokens": 3272944}
{"current_steps": 7985, "total_steps": 8260, "loss": 0.1695, "lr": 1.6985971888633935e-07, "epoch": 9.667070217917676, "percentage": 96.67, "elapsed_time": "0:16:27", "remaining_time": "0:00:33", "throughput": 3317.8, "total_tokens": 3274992}
{"current_steps": 7990, "total_steps": 8260, "loss": 0.1488, "lr": 1.637677943129967e-07, "epoch": 9.673123486682808, "percentage": 96.73, "elapsed_time": "0:16:27", "remaining_time": "0:00:33", "throughput": 3318.01, "total_tokens": 3277008}
{"current_steps": 7995, "total_steps": 8260, "loss": 0.2143, "lr": 1.5778675662643793e-07, "epoch": 9.679176755447942, "percentage": 96.79, "elapsed_time": "0:16:28", "remaining_time": "0:00:32", "throughput": 3318.19, "total_tokens": 3278928}
{"current_steps": 8000, "total_steps": 8260, "loss": 0.1922, "lr": 1.5191663253034116e-07, "epoch": 9.685230024213075, "percentage": 96.85, "elapsed_time": "0:16:28", "remaining_time": "0:00:32", "throughput": 3318.39, "total_tokens": 3280944}
{"current_steps": 8005, "total_steps": 8260, "loss": 0.2114, "lr": 1.461574482331779e-07, "epoch": 9.69128329297821, "percentage": 96.91, "elapsed_time": "0:16:29", "remaining_time": "0:00:31", "throughput": 3318.53, "total_tokens": 3282960}
{"current_steps": 8010, "total_steps": 8260, "loss": 0.1732, "lr": 1.4050922944811305e-07, "epoch": 9.697336561743342, "percentage": 96.97, "elapsed_time": "0:16:29", "remaining_time": "0:00:30", "throughput": 3318.74, "total_tokens": 3285008}
{"current_steps": 8015, "total_steps": 8260, "loss": 0.2237, "lr": 1.349720013928718e-07, "epoch": 9.703389830508474, "percentage": 97.03, "elapsed_time": "0:16:30", "remaining_time": "0:00:30", "throughput": 3319.0, "total_tokens": 3287088}
{"current_steps": 8020, "total_steps": 8260, "loss": 0.1781, "lr": 1.2954578878964507e-07, "epoch": 9.709443099273608, "percentage": 97.09, "elapsed_time": "0:16:30", "remaining_time": "0:00:29", "throughput": 3319.24, "total_tokens": 3289168}
{"current_steps": 8025, "total_steps": 8260, "loss": 0.2143, "lr": 1.2423061586496477e-07, "epoch": 9.71549636803874, "percentage": 97.15, "elapsed_time": "0:16:31", "remaining_time": "0:00:29", "throughput": 3319.47, "total_tokens": 3291376}
{"current_steps": 8030, "total_steps": 8260, "loss": 0.1804, "lr": 1.1902650634960378e-07, "epoch": 9.721549636803875, "percentage": 97.22, "elapsed_time": "0:16:32", "remaining_time": "0:00:28", "throughput": 3319.72, "total_tokens": 3293360}
{"current_steps": 8035, "total_steps": 8260, "loss": 0.1749, "lr": 1.1393348347846777e-07, "epoch": 9.727602905569007, "percentage": 97.28, "elapsed_time": "0:16:32", "remaining_time": "0:00:27", "throughput": 3319.9, "total_tokens": 3295344}
{"current_steps": 8040, "total_steps": 8260, "loss": 0.1736, "lr": 1.0895156999048972e-07, "epoch": 9.73365617433414, "percentage": 97.34, "elapsed_time": "0:16:33", "remaining_time": "0:00:27", "throughput": 3320.12, "total_tokens": 3297392}
{"current_steps": 8045, "total_steps": 8260, "loss": 0.204, "lr": 1.0408078812853273e-07, "epoch": 9.739709443099274, "percentage": 97.4, "elapsed_time": "0:16:33", "remaining_time": "0:00:26", "throughput": 3320.38, "total_tokens": 3299376}
{"current_steps": 8050, "total_steps": 8260, "loss": 0.1752, "lr": 9.932115963928734e-08, "epoch": 9.745762711864407, "percentage": 97.46, "elapsed_time": "0:16:34", "remaining_time": "0:00:25", "throughput": 3320.7, "total_tokens": 3301360}
{"current_steps": 8055, "total_steps": 8260, "loss": 0.2065, "lr": 9.467270577317167e-08, "epoch": 9.75181598062954, "percentage": 97.52, "elapsed_time": "0:16:34", "remaining_time": "0:00:25", "throughput": 3320.96, "total_tokens": 3303440}
{"current_steps": 8060, "total_steps": 8260, "loss": 0.1783, "lr": 9.013544728424528e-08, "epoch": 9.757869249394673, "percentage": 97.58, "elapsed_time": "0:16:35", "remaining_time": "0:00:24", "throughput": 3321.25, "total_tokens": 3305552}
{"current_steps": 8065, "total_steps": 8260, "loss": 0.186, "lr": 8.570940443010655e-08, "epoch": 9.763922518159806, "percentage": 97.64, "elapsed_time": "0:16:35", "remaining_time": "0:00:24", "throughput": 3321.51, "total_tokens": 3307728}
{"current_steps": 8070, "total_steps": 8260, "loss": 0.2001, "lr": 8.139459697181218e-08, "epoch": 9.76997578692494, "percentage": 97.7, "elapsed_time": "0:16:36", "remaining_time": "0:00:23", "throughput": 3321.74, "total_tokens": 3309776}
{"current_steps": 8075, "total_steps": 8260, "loss": 0.1761, "lr": 7.719104417377443e-08, "epoch": 9.776029055690072, "percentage": 97.76, "elapsed_time": "0:16:36", "remaining_time": "0:00:22", "throughput": 3321.99, "total_tokens": 3311760}
{"current_steps": 8080, "total_steps": 8260, "loss": 0.1639, "lr": 7.30987648036946e-08, "epoch": 9.782082324455207, "percentage": 97.82, "elapsed_time": "0:16:37", "remaining_time": "0:00:22", "throughput": 3322.2, "total_tokens": 3313808}
{"current_steps": 8085, "total_steps": 8260, "loss": 0.1776, "lr": 6.911777713246581e-08, "epoch": 9.788135593220339, "percentage": 97.88, "elapsed_time": "0:16:38", "remaining_time": "0:00:21", "throughput": 3322.39, "total_tokens": 3315888}
{"current_steps": 8090, "total_steps": 8260, "loss": 0.1788, "lr": 6.524809893409256e-08, "epoch": 9.794188861985472, "percentage": 97.94, "elapsed_time": "0:16:38", "remaining_time": "0:00:20", "throughput": 3322.6, "total_tokens": 3318000}
{"current_steps": 8095, "total_steps": 8260, "loss": 0.1538, "lr": 6.148974748561299e-08, "epoch": 9.800242130750606, "percentage": 98.0, "elapsed_time": "0:16:39", "remaining_time": "0:00:20", "throughput": 3322.8, "total_tokens": 3320016}
{"current_steps": 8100, "total_steps": 8260, "loss": 0.188, "lr": 5.784273956702391e-08, "epoch": 9.806295399515738, "percentage": 98.06, "elapsed_time": "0:16:39", "remaining_time": "0:00:19", "throughput": 3322.98, "total_tokens": 3322096}
{"current_steps": 8105, "total_steps": 8260, "loss": 0.1457, "lr": 5.4307091461205936e-08, "epoch": 9.812348668280872, "percentage": 98.12, "elapsed_time": "0:16:40", "remaining_time": "0:00:19", "throughput": 3323.15, "total_tokens": 3324176}
{"current_steps": 8110, "total_steps": 8260, "loss": 0.179, "lr": 5.08828189538485e-08, "epoch": 9.818401937046005, "percentage": 98.18, "elapsed_time": "0:16:40", "remaining_time": "0:00:18", "throughput": 3323.31, "total_tokens": 3326320}
{"current_steps": 8115, "total_steps": 8260, "loss": 0.1771, "lr": 4.7569937333372115e-08, "epoch": 9.824455205811137, "percentage": 98.24, "elapsed_time": "0:16:41", "remaining_time": "0:00:17", "throughput": 3323.55, "total_tokens": 3328464}
{"current_steps": 8120, "total_steps": 8260, "loss": 0.1587, "lr": 4.436846139087847e-08, "epoch": 9.830508474576272, "percentage": 98.31, "elapsed_time": "0:16:42", "remaining_time": "0:00:17", "throughput": 3323.75, "total_tokens": 3330480}
{"current_steps": 8125, "total_steps": 8260, "loss": 0.2105, "lr": 4.127840542006711e-08, "epoch": 9.836561743341404, "percentage": 98.37, "elapsed_time": "0:16:42", "remaining_time": "0:00:16", "throughput": 3323.99, "total_tokens": 3332624}
{"current_steps": 8130, "total_steps": 8260, "loss": 0.155, "lr": 3.829978321718553e-08, "epoch": 9.842615012106538, "percentage": 98.43, "elapsed_time": "0:16:43", "remaining_time": "0:00:16", "throughput": 3324.31, "total_tokens": 3334768}
{"current_steps": 8135, "total_steps": 8260, "loss": 0.1654, "lr": 3.543260808095139e-08, "epoch": 9.84866828087167, "percentage": 98.49, "elapsed_time": "0:16:43", "remaining_time": "0:00:15", "throughput": 3324.51, "total_tokens": 3336784}
{"current_steps": 8140, "total_steps": 8260, "loss": 0.1586, "lr": 3.267689281250541e-08, "epoch": 9.854721549636803, "percentage": 98.55, "elapsed_time": "0:16:44", "remaining_time": "0:00:14", "throughput": 3324.73, "total_tokens": 3338832}
{"current_steps": 8145, "total_steps": 8260, "loss": 0.2214, "lr": 3.003264971535857e-08, "epoch": 9.860774818401937, "percentage": 98.61, "elapsed_time": "0:16:44", "remaining_time": "0:00:14", "throughput": 3324.94, "total_tokens": 3340848}
{"current_steps": 8150, "total_steps": 8260, "loss": 0.1815, "lr": 2.7499890595314438e-08, "epoch": 9.86682808716707, "percentage": 98.67, "elapsed_time": "0:16:45", "remaining_time": "0:00:13", "throughput": 3325.13, "total_tokens": 3342960}
{"current_steps": 8155, "total_steps": 8260, "loss": 0.1815, "lr": 2.507862676044137e-08, "epoch": 9.872881355932204, "percentage": 98.73, "elapsed_time": "0:16:45", "remaining_time": "0:00:12", "throughput": 3325.37, "total_tokens": 3345104}
{"current_steps": 8160, "total_steps": 8260, "loss": 0.1746, "lr": 2.2768869021014274e-08, "epoch": 9.878934624697337, "percentage": 98.79, "elapsed_time": "0:16:46", "remaining_time": "0:00:12", "throughput": 3325.56, "total_tokens": 3347024}
{"current_steps": 8165, "total_steps": 8260, "loss": 0.1641, "lr": 2.0570627689459054e-08, "epoch": 9.884987893462469, "percentage": 98.85, "elapsed_time": "0:16:47", "remaining_time": "0:00:11", "throughput": 3325.79, "total_tokens": 3349200}
{"current_steps": 8170, "total_steps": 8260, "loss": 0.1776, "lr": 1.848391258031379e-08, "epoch": 9.891041162227603, "percentage": 98.91, "elapsed_time": "0:16:47", "remaining_time": "0:00:11", "throughput": 3326.07, "total_tokens": 3351248}
{"current_steps": 8175, "total_steps": 8260, "loss": 0.1944, "lr": 1.6508733010184297e-08, "epoch": 9.897094430992736, "percentage": 98.97, "elapsed_time": "0:16:48", "remaining_time": "0:00:10", "throughput": 3326.31, "total_tokens": 3353488}
{"current_steps": 8180, "total_steps": 8260, "loss": 0.148, "lr": 1.4645097797694186e-08, "epoch": 9.90314769975787, "percentage": 99.03, "elapsed_time": "0:16:48", "remaining_time": "0:00:09", "throughput": 3326.59, "total_tokens": 3355440}
{"current_steps": 8185, "total_steps": 8260, "loss": 0.2169, "lr": 1.2893015263459874e-08, "epoch": 9.909200968523002, "percentage": 99.09, "elapsed_time": "0:16:49", "remaining_time": "0:00:09", "throughput": 3326.79, "total_tokens": 3357296}
{"current_steps": 8190, "total_steps": 8260, "loss": 0.1789, "lr": 1.125249323004618e-08, "epoch": 9.915254237288135, "percentage": 99.15, "elapsed_time": "0:16:49", "remaining_time": "0:00:08", "throughput": 3327.04, "total_tokens": 3359280}
{"current_steps": 8195, "total_steps": 8260, "loss": 0.1882, "lr": 9.723539021927463e-09, "epoch": 9.92130750605327, "percentage": 99.21, "elapsed_time": "0:16:50", "remaining_time": "0:00:08", "throughput": 3327.33, "total_tokens": 3361328}
{"current_steps": 8200, "total_steps": 8260, "loss": 0.1793, "lr": 8.306159465459872e-09, "epoch": 9.927360774818402, "percentage": 99.27, "elapsed_time": "0:16:50", "remaining_time": "0:00:07", "throughput": 3327.53, "total_tokens": 3363344}
{"current_steps": 8205, "total_steps": 8260, "loss": 0.178, "lr": 7.00036088885081e-09, "epoch": 9.933414043583536, "percentage": 99.33, "elapsed_time": "0:16:51", "remaining_time": "0:00:06", "throughput": 3327.75, "total_tokens": 3365296}
{"current_steps": 8210, "total_steps": 8260, "loss": 0.1711, "lr": 5.806149122128401e-09, "epoch": 9.939467312348668, "percentage": 99.39, "elapsed_time": "0:16:51", "remaining_time": "0:00:06", "throughput": 3327.97, "total_tokens": 3367504}
{"current_steps": 8215, "total_steps": 8260, "loss": 0.2003, "lr": 4.723529497113743e-09, "epoch": 9.9455205811138, "percentage": 99.46, "elapsed_time": "0:16:52", "remaining_time": "0:00:05", "throughput": 3328.18, "total_tokens": 3369616}
{"current_steps": 8220, "total_steps": 8260, "loss": 0.1774, "lr": 3.752506847407023e-09, "epoch": 9.951573849878935, "percentage": 99.52, "elapsed_time": "0:16:53", "remaining_time": "0:00:04", "throughput": 3328.38, "total_tokens": 3371728}
{"current_steps": 8225, "total_steps": 8260, "loss": 0.2027, "lr": 2.8930855083542096e-09, "epoch": 9.957627118644067, "percentage": 99.58, "elapsed_time": "0:16:53", "remaining_time": "0:00:04", "throughput": 3328.56, "total_tokens": 3373648}
{"current_steps": 8230, "total_steps": 8260, "loss": 0.1772, "lr": 2.145269317033183e-09, "epoch": 9.963680387409202, "percentage": 99.64, "elapsed_time": "0:16:54", "remaining_time": "0:00:03", "throughput": 3328.76, "total_tokens": 3375664}
{"current_steps": 8235, "total_steps": 8260, "loss": 0.1493, "lr": 1.509061612234297e-09, "epoch": 9.969733656174334, "percentage": 99.7, "elapsed_time": "0:16:54", "remaining_time": "0:00:03", "throughput": 3328.98, "total_tokens": 3377808}
{"current_steps": 8240, "total_steps": 8260, "loss": 0.1874, "lr": 9.844652344492832e-10, "epoch": 9.975786924939467, "percentage": 99.76, "elapsed_time": "0:16:55", "remaining_time": "0:00:02", "throughput": 3329.2, "total_tokens": 3379888}
{"current_steps": 8245, "total_steps": 8260, "loss": 0.2404, "lr": 5.714825258545942e-10, "epoch": 9.9818401937046, "percentage": 99.82, "elapsed_time": "0:16:55", "remaining_time": "0:00:01", "throughput": 3329.48, "total_tokens": 3382064}
{"current_steps": 8250, "total_steps": 8260, "loss": 0.1511, "lr": 2.7011533030585347e-10, "epoch": 9.987893462469733, "percentage": 99.88, "elapsed_time": "0:16:56", "remaining_time": "0:00:01", "throughput": 3329.72, "total_tokens": 3384144}
{"current_steps": 8255, "total_steps": 8260, "loss": 0.1651, "lr": 8.036499332397807e-11, "epoch": 9.993946731234868, "percentage": 99.94, "elapsed_time": "0:16:56", "remaining_time": "0:00:00", "throughput": 3329.99, "total_tokens": 3386160}
{"current_steps": 8260, "total_steps": 8260, "loss": 0.2114, "lr": 2.2323620896269604e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:16:57", "remaining_time": "0:00:00", "throughput": 3329.75, "total_tokens": 3388032}
{"current_steps": 8260, "total_steps": 8260, "eval_loss": 0.18457433581352234, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:02", "remaining_time": "0:00:00", "throughput": 3314.65, "total_tokens": 3388032}
{"current_steps": 8260, "total_steps": 8260, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:03", "remaining_time": "0:00:00", "throughput": 3311.36, "total_tokens": 3388032}