| {"current_steps": 5, "total_steps": 8260, "loss": 5.7552, "lr": 2.421307506053269e-07, "epoch": 0.006053268765133172, "percentage": 0.06, "elapsed_time": "0:00:01", "remaining_time": "0:45:24", "throughput": 1163.26, "total_tokens": 1920} |
| {"current_steps": 10, "total_steps": 8260, "loss": 6.0888, "lr": 5.447941888619855e-07, "epoch": 0.012106537530266344, "percentage": 0.12, "elapsed_time": "0:00:02", "remaining_time": "0:35:43", "throughput": 1465.9, "total_tokens": 3808} |
| {"current_steps": 15, "total_steps": 8260, "loss": 5.2076, "lr": 8.474576271186441e-07, "epoch": 0.018159806295399514, "percentage": 0.18, "elapsed_time": "0:00:03", "remaining_time": "0:33:04", "throughput": 1621.77, "total_tokens": 5856} |
| {"current_steps": 20, "total_steps": 8260, "loss": 4.0578, "lr": 1.1501210653753028e-06, "epoch": 0.024213075060532687, "percentage": 0.24, "elapsed_time": "0:00:04", "remaining_time": "0:31:32", "throughput": 1706.7, "total_tokens": 7840} |
| {"current_steps": 25, "total_steps": 8260, "loss": 3.3968, "lr": 1.4527845036319614e-06, "epoch": 0.03026634382566586, "percentage": 0.3, "elapsed_time": "0:00:05", "remaining_time": "0:30:55", "throughput": 1789.79, "total_tokens": 10080} |
| {"current_steps": 30, "total_steps": 8260, "loss": 3.2369, "lr": 1.7554479418886198e-06, "epoch": 0.03631961259079903, "percentage": 0.36, "elapsed_time": "0:00:06", "remaining_time": "0:30:15", "throughput": 1823.38, "total_tokens": 12064} |
| {"current_steps": 35, "total_steps": 8260, "loss": 2.5178, "lr": 2.0581113801452785e-06, "epoch": 0.0423728813559322, "percentage": 0.42, "elapsed_time": "0:00:07", "remaining_time": "0:29:53", "throughput": 1852.8, "total_tokens": 14144} |
| {"current_steps": 40, "total_steps": 8260, "loss": 1.9685, "lr": 2.3607748184019373e-06, "epoch": 0.048426150121065374, "percentage": 0.48, "elapsed_time": "0:00:08", "remaining_time": "0:29:37", "throughput": 1864.3, "total_tokens": 16128} |
| {"current_steps": 45, "total_steps": 8260, "loss": 1.5857, "lr": 2.6634382566585957e-06, "epoch": 0.05447941888619855, "percentage": 0.54, "elapsed_time": "0:00:09", "remaining_time": "0:29:13", "throughput": 1885.47, "total_tokens": 18112} |
| {"current_steps": 50, "total_steps": 8260, "loss": 1.2369, "lr": 2.9661016949152545e-06, "epoch": 0.06053268765133172, "percentage": 0.61, "elapsed_time": "0:00:10", "remaining_time": "0:29:05", "throughput": 1908.41, "total_tokens": 20288} |
| {"current_steps": 55, "total_steps": 8260, "loss": 0.9656, "lr": 3.268765133171913e-06, "epoch": 0.06658595641646489, "percentage": 0.67, "elapsed_time": "0:00:11", "remaining_time": "0:28:53", "throughput": 1924.76, "total_tokens": 22368} |
| {"current_steps": 60, "total_steps": 8260, "loss": 0.5838, "lr": 3.5714285714285714e-06, "epoch": 0.07263922518159806, "percentage": 0.73, "elapsed_time": "0:00:12", "remaining_time": "0:28:38", "throughput": 1928.69, "total_tokens": 24256} |
| {"current_steps": 65, "total_steps": 8260, "loss": 0.4362, "lr": 3.87409200968523e-06, "epoch": 0.07869249394673124, "percentage": 0.79, "elapsed_time": "0:00:13", "remaining_time": "0:28:34", "throughput": 1946.25, "total_tokens": 26464} |
| {"current_steps": 70, "total_steps": 8260, "loss": 0.3219, "lr": 4.176755447941889e-06, "epoch": 0.0847457627118644, "percentage": 0.85, "elapsed_time": "0:00:14", "remaining_time": "0:28:32", "throughput": 1963.19, "total_tokens": 28736} |
| {"current_steps": 75, "total_steps": 8260, "loss": 0.235, "lr": 4.479418886198548e-06, "epoch": 0.09079903147699758, "percentage": 0.91, "elapsed_time": "0:00:15", "remaining_time": "0:28:26", "throughput": 1972.21, "total_tokens": 30848} |
| {"current_steps": 80, "total_steps": 8260, "loss": 0.2269, "lr": 4.782082324455206e-06, "epoch": 0.09685230024213075, "percentage": 0.97, "elapsed_time": "0:00:16", "remaining_time": "0:28:22", "throughput": 1981.45, "total_tokens": 32992} |
| {"current_steps": 85, "total_steps": 8260, "loss": 0.2561, "lr": 5.084745762711865e-06, "epoch": 0.10290556900726393, "percentage": 1.03, "elapsed_time": "0:00:17", "remaining_time": "0:28:15", "throughput": 1983.63, "total_tokens": 34976} |
| {"current_steps": 90, "total_steps": 8260, "loss": 0.3883, "lr": 5.3874092009685235e-06, "epoch": 0.1089588377723971, "percentage": 1.09, "elapsed_time": "0:00:18", "remaining_time": "0:28:09", "throughput": 1982.48, "total_tokens": 36896} |
| {"current_steps": 95, "total_steps": 8260, "loss": 0.3251, "lr": 5.6900726392251815e-06, "epoch": 0.11501210653753027, "percentage": 1.15, "elapsed_time": "0:00:19", "remaining_time": "0:28:03", "throughput": 1983.06, "total_tokens": 38848} |
| {"current_steps": 100, "total_steps": 8260, "loss": 0.2998, "lr": 5.99273607748184e-06, "epoch": 0.12106537530266344, "percentage": 1.21, "elapsed_time": "0:00:20", "remaining_time": "0:28:01", "throughput": 1987.76, "total_tokens": 40960} |
| {"current_steps": 105, "total_steps": 8260, "loss": 0.2764, "lr": 6.295399515738499e-06, "epoch": 0.1271186440677966, "percentage": 1.27, "elapsed_time": "0:00:21", "remaining_time": "0:27:57", "throughput": 1991.61, "total_tokens": 43008} |
| {"current_steps": 110, "total_steps": 8260, "loss": 0.4061, "lr": 6.598062953995157e-06, "epoch": 0.13317191283292978, "percentage": 1.33, "elapsed_time": "0:00:22", "remaining_time": "0:27:52", "throughput": 1992.84, "total_tokens": 44992} |
| {"current_steps": 115, "total_steps": 8260, "loss": 0.2232, "lr": 6.900726392251816e-06, "epoch": 0.13922518159806296, "percentage": 1.39, "elapsed_time": "0:00:23", "remaining_time": "0:27:50", "throughput": 1995.34, "total_tokens": 47072} |
| {"current_steps": 120, "total_steps": 8260, "loss": 0.3396, "lr": 7.203389830508475e-06, "epoch": 0.14527845036319612, "percentage": 1.45, "elapsed_time": "0:00:24", "remaining_time": "0:27:49", "throughput": 1997.28, "total_tokens": 49152} |
| {"current_steps": 125, "total_steps": 8260, "loss": 0.2809, "lr": 7.5060532687651345e-06, "epoch": 0.1513317191283293, "percentage": 1.51, "elapsed_time": "0:00:25", "remaining_time": "0:27:47", "throughput": 1999.4, "total_tokens": 51232} |
| {"current_steps": 130, "total_steps": 8260, "loss": 0.2392, "lr": 7.808716707021792e-06, "epoch": 0.15738498789346247, "percentage": 1.57, "elapsed_time": "0:00:26", "remaining_time": "0:27:45", "throughput": 2001.55, "total_tokens": 53312} |
| {"current_steps": 135, "total_steps": 8260, "loss": 0.2454, "lr": 8.111380145278451e-06, "epoch": 0.16343825665859565, "percentage": 1.63, "elapsed_time": "0:00:27", "remaining_time": "0:27:45", "throughput": 2006.75, "total_tokens": 55520} |
| {"current_steps": 140, "total_steps": 8260, "loss": 0.201, "lr": 8.41404358353511e-06, "epoch": 0.1694915254237288, "percentage": 1.69, "elapsed_time": "0:00:28", "remaining_time": "0:27:41", "throughput": 2004.96, "total_tokens": 57440} |
| {"current_steps": 145, "total_steps": 8260, "loss": 0.2001, "lr": 8.716707021791767e-06, "epoch": 0.17554479418886199, "percentage": 1.76, "elapsed_time": "0:00:29", "remaining_time": "0:27:41", "throughput": 2007.39, "total_tokens": 59584} |
| {"current_steps": 150, "total_steps": 8260, "loss": 0.2507, "lr": 9.019370460048427e-06, "epoch": 0.18159806295399517, "percentage": 1.82, "elapsed_time": "0:00:30", "remaining_time": "0:27:40", "throughput": 2010.48, "total_tokens": 61760} |
| {"current_steps": 155, "total_steps": 8260, "loss": 0.2266, "lr": 9.322033898305085e-06, "epoch": 0.18765133171912832, "percentage": 1.88, "elapsed_time": "0:00:31", "remaining_time": "0:27:37", "throughput": 2010.8, "total_tokens": 63744} |
| {"current_steps": 160, "total_steps": 8260, "loss": 0.2203, "lr": 9.624697336561745e-06, "epoch": 0.1937046004842615, "percentage": 1.94, "elapsed_time": "0:00:32", "remaining_time": "0:27:35", "throughput": 2014.3, "total_tokens": 65856} |
| {"current_steps": 165, "total_steps": 8260, "loss": 0.3766, "lr": 9.927360774818403e-06, "epoch": 0.19975786924939468, "percentage": 2.0, "elapsed_time": "0:00:33", "remaining_time": "0:27:32", "throughput": 2014.33, "total_tokens": 67840} |
| {"current_steps": 170, "total_steps": 8260, "loss": 0.3689, "lr": 1.023002421307506e-05, "epoch": 0.20581113801452786, "percentage": 2.06, "elapsed_time": "0:00:34", "remaining_time": "0:27:30", "throughput": 2015.54, "total_tokens": 69920} |
| {"current_steps": 175, "total_steps": 8260, "loss": 0.2491, "lr": 1.053268765133172e-05, "epoch": 0.211864406779661, "percentage": 2.12, "elapsed_time": "0:00:35", "remaining_time": "0:27:28", "throughput": 2018.88, "total_tokens": 72032} |
| {"current_steps": 180, "total_steps": 8260, "loss": 0.4227, "lr": 1.0835351089588378e-05, "epoch": 0.2179176755447942, "percentage": 2.18, "elapsed_time": "0:00:36", "remaining_time": "0:27:25", "throughput": 2021.24, "total_tokens": 74112} |
| {"current_steps": 185, "total_steps": 8260, "loss": 0.2486, "lr": 1.1138014527845036e-05, "epoch": 0.22397094430992737, "percentage": 2.24, "elapsed_time": "0:00:37", "remaining_time": "0:27:24", "throughput": 2024.32, "total_tokens": 76288} |
| {"current_steps": 190, "total_steps": 8260, "loss": 0.1912, "lr": 1.1440677966101696e-05, "epoch": 0.23002421307506055, "percentage": 2.3, "elapsed_time": "0:00:38", "remaining_time": "0:27:24", "throughput": 2027.59, "total_tokens": 78496} |
| {"current_steps": 195, "total_steps": 8260, "loss": 0.2173, "lr": 1.1743341404358354e-05, "epoch": 0.2360774818401937, "percentage": 2.36, "elapsed_time": "0:00:39", "remaining_time": "0:27:20", "throughput": 2027.41, "total_tokens": 80416} |
| {"current_steps": 200, "total_steps": 8260, "loss": 0.188, "lr": 1.2046004842615012e-05, "epoch": 0.24213075060532688, "percentage": 2.42, "elapsed_time": "0:00:40", "remaining_time": "0:27:18", "throughput": 2027.99, "total_tokens": 82432} |
| {"current_steps": 205, "total_steps": 8260, "loss": 0.2485, "lr": 1.2348668280871672e-05, "epoch": 0.24818401937046006, "percentage": 2.48, "elapsed_time": "0:00:41", "remaining_time": "0:27:15", "throughput": 2027.94, "total_tokens": 84416} |
| {"current_steps": 210, "total_steps": 8260, "loss": 0.3272, "lr": 1.2651331719128328e-05, "epoch": 0.2542372881355932, "percentage": 2.54, "elapsed_time": "0:00:42", "remaining_time": "0:27:13", "throughput": 2026.43, "total_tokens": 86336} |
| {"current_steps": 215, "total_steps": 8260, "loss": 0.2385, "lr": 1.2953995157384988e-05, "epoch": 0.2602905569007264, "percentage": 2.6, "elapsed_time": "0:00:43", "remaining_time": "0:27:11", "throughput": 2026.91, "total_tokens": 88352} |
| {"current_steps": 220, "total_steps": 8260, "loss": 0.2572, "lr": 1.3256658595641647e-05, "epoch": 0.26634382566585957, "percentage": 2.66, "elapsed_time": "0:00:44", "remaining_time": "0:27:10", "throughput": 2027.53, "total_tokens": 90432} |
| {"current_steps": 225, "total_steps": 8260, "loss": 0.2279, "lr": 1.3559322033898305e-05, "epoch": 0.27239709443099275, "percentage": 2.72, "elapsed_time": "0:00:45", "remaining_time": "0:27:08", "throughput": 2028.93, "total_tokens": 92512} |
| {"current_steps": 230, "total_steps": 8260, "loss": 0.2158, "lr": 1.3861985472154965e-05, "epoch": 0.2784503631961259, "percentage": 2.78, "elapsed_time": "0:00:46", "remaining_time": "0:27:07", "throughput": 2028.22, "total_tokens": 94528} |
| {"current_steps": 235, "total_steps": 8260, "loss": 0.1993, "lr": 1.4164648910411623e-05, "epoch": 0.2845036319612591, "percentage": 2.85, "elapsed_time": "0:00:47", "remaining_time": "0:27:05", "throughput": 2029.2, "total_tokens": 96576} |
| {"current_steps": 240, "total_steps": 8260, "loss": 0.243, "lr": 1.4467312348668283e-05, "epoch": 0.29055690072639223, "percentage": 2.91, "elapsed_time": "0:00:48", "remaining_time": "0:27:04", "throughput": 2028.93, "total_tokens": 98624} |
| {"current_steps": 245, "total_steps": 8260, "loss": 0.2415, "lr": 1.4769975786924939e-05, "epoch": 0.2966101694915254, "percentage": 2.97, "elapsed_time": "0:00:49", "remaining_time": "0:27:04", "throughput": 2031.17, "total_tokens": 100832} |
| {"current_steps": 250, "total_steps": 8260, "loss": 0.3151, "lr": 1.5072639225181599e-05, "epoch": 0.3026634382566586, "percentage": 3.03, "elapsed_time": "0:00:50", "remaining_time": "0:27:02", "throughput": 2029.92, "total_tokens": 102784} |
| {"current_steps": 255, "total_steps": 8260, "loss": 0.296, "lr": 1.5375302663438258e-05, "epoch": 0.30871670702179177, "percentage": 3.09, "elapsed_time": "0:00:51", "remaining_time": "0:27:01", "throughput": 2029.07, "total_tokens": 104800} |
| {"current_steps": 260, "total_steps": 8260, "loss": 0.2686, "lr": 1.5677966101694916e-05, "epoch": 0.31476997578692495, "percentage": 3.15, "elapsed_time": "0:00:52", "remaining_time": "0:27:00", "throughput": 2029.44, "total_tokens": 106880} |
| {"current_steps": 265, "total_steps": 8260, "loss": 0.2306, "lr": 1.5980629539951574e-05, "epoch": 0.32082324455205813, "percentage": 3.21, "elapsed_time": "0:00:53", "remaining_time": "0:26:58", "throughput": 2030.62, "total_tokens": 108960} |
| {"current_steps": 270, "total_steps": 8260, "loss": 0.2287, "lr": 1.6283292978208232e-05, "epoch": 0.3268765133171913, "percentage": 3.27, "elapsed_time": "0:00:54", "remaining_time": "0:26:57", "throughput": 2029.22, "total_tokens": 110944} |
| {"current_steps": 275, "total_steps": 8260, "loss": 0.3015, "lr": 1.6585956416464894e-05, "epoch": 0.33292978208232443, "percentage": 3.33, "elapsed_time": "0:00:55", "remaining_time": "0:26:57", "throughput": 2030.55, "total_tokens": 113088} |
| {"current_steps": 280, "total_steps": 8260, "loss": 0.2975, "lr": 1.6888619854721548e-05, "epoch": 0.3389830508474576, "percentage": 3.39, "elapsed_time": "0:00:56", "remaining_time": "0:26:55", "throughput": 2029.57, "total_tokens": 115072} |
| {"current_steps": 285, "total_steps": 8260, "loss": 0.2439, "lr": 1.719128329297821e-05, "epoch": 0.3450363196125908, "percentage": 3.45, "elapsed_time": "0:00:57", "remaining_time": "0:26:55", "throughput": 2031.53, "total_tokens": 117280} |
| {"current_steps": 290, "total_steps": 8260, "loss": 0.1727, "lr": 1.7493946731234868e-05, "epoch": 0.35108958837772397, "percentage": 3.51, "elapsed_time": "0:00:58", "remaining_time": "0:26:54", "throughput": 2032.89, "total_tokens": 119456} |
| {"current_steps": 295, "total_steps": 8260, "loss": 0.2793, "lr": 1.7796610169491526e-05, "epoch": 0.35714285714285715, "percentage": 3.57, "elapsed_time": "0:00:59", "remaining_time": "0:26:53", "throughput": 2033.13, "total_tokens": 121472} |
| {"current_steps": 300, "total_steps": 8260, "loss": 0.1856, "lr": 1.8099273607748184e-05, "epoch": 0.36319612590799033, "percentage": 3.63, "elapsed_time": "0:01:00", "remaining_time": "0:26:52", "throughput": 2034.86, "total_tokens": 123648} |
| {"current_steps": 305, "total_steps": 8260, "loss": 0.41, "lr": 1.8401937046004845e-05, "epoch": 0.3692493946731235, "percentage": 3.69, "elapsed_time": "0:01:01", "remaining_time": "0:26:50", "throughput": 2034.59, "total_tokens": 125632} |
| {"current_steps": 310, "total_steps": 8260, "loss": 0.2404, "lr": 1.8704600484261503e-05, "epoch": 0.37530266343825663, "percentage": 3.75, "elapsed_time": "0:01:02", "remaining_time": "0:26:48", "throughput": 2035.16, "total_tokens": 127680} |
| {"current_steps": 315, "total_steps": 8260, "loss": 0.1925, "lr": 1.900726392251816e-05, "epoch": 0.3813559322033898, "percentage": 3.81, "elapsed_time": "0:01:03", "remaining_time": "0:26:47", "throughput": 2037.13, "total_tokens": 129856} |
| {"current_steps": 320, "total_steps": 8260, "loss": 0.5573, "lr": 1.930992736077482e-05, "epoch": 0.387409200968523, "percentage": 3.87, "elapsed_time": "0:01:04", "remaining_time": "0:26:46", "throughput": 2037.31, "total_tokens": 131872} |
| {"current_steps": 325, "total_steps": 8260, "loss": 0.4137, "lr": 1.9612590799031477e-05, "epoch": 0.3934624697336562, "percentage": 3.93, "elapsed_time": "0:01:05", "remaining_time": "0:26:45", "throughput": 2038.04, "total_tokens": 133984} |
| {"current_steps": 330, "total_steps": 8260, "loss": 0.311, "lr": 1.9915254237288135e-05, "epoch": 0.39951573849878935, "percentage": 4.0, "elapsed_time": "0:01:06", "remaining_time": "0:26:44", "throughput": 2039.02, "total_tokens": 136128} |
| {"current_steps": 335, "total_steps": 8260, "loss": 0.221, "lr": 2.0217917675544796e-05, "epoch": 0.40556900726392253, "percentage": 4.06, "elapsed_time": "0:01:07", "remaining_time": "0:26:43", "throughput": 2039.27, "total_tokens": 138208} |
| {"current_steps": 340, "total_steps": 8260, "loss": 0.2509, "lr": 2.0520581113801454e-05, "epoch": 0.4116222760290557, "percentage": 4.12, "elapsed_time": "0:01:08", "remaining_time": "0:26:42", "throughput": 2039.53, "total_tokens": 140288} |
| {"current_steps": 345, "total_steps": 8260, "loss": 0.224, "lr": 2.0823244552058112e-05, "epoch": 0.41767554479418884, "percentage": 4.18, "elapsed_time": "0:01:09", "remaining_time": "0:26:41", "throughput": 2039.3, "total_tokens": 142336} |
| {"current_steps": 350, "total_steps": 8260, "loss": 0.2109, "lr": 2.1125907990314774e-05, "epoch": 0.423728813559322, "percentage": 4.24, "elapsed_time": "0:01:10", "remaining_time": "0:26:40", "throughput": 2039.8, "total_tokens": 144448} |
| {"current_steps": 355, "total_steps": 8260, "loss": 0.2606, "lr": 2.1428571428571428e-05, "epoch": 0.4297820823244552, "percentage": 4.3, "elapsed_time": "0:01:11", "remaining_time": "0:26:38", "throughput": 2039.94, "total_tokens": 146464} |
| {"current_steps": 360, "total_steps": 8260, "loss": 0.2049, "lr": 2.1731234866828086e-05, "epoch": 0.4358353510895884, "percentage": 4.36, "elapsed_time": "0:01:12", "remaining_time": "0:26:37", "throughput": 2040.4, "total_tokens": 148512} |
| {"current_steps": 365, "total_steps": 8260, "loss": 0.2353, "lr": 2.2033898305084748e-05, "epoch": 0.44188861985472155, "percentage": 4.42, "elapsed_time": "0:01:13", "remaining_time": "0:26:36", "throughput": 2039.5, "total_tokens": 150496} |
| {"current_steps": 370, "total_steps": 8260, "loss": 0.2673, "lr": 2.2336561743341405e-05, "epoch": 0.44794188861985473, "percentage": 4.48, "elapsed_time": "0:01:14", "remaining_time": "0:26:35", "throughput": 2040.09, "total_tokens": 152608} |
| {"current_steps": 375, "total_steps": 8260, "loss": 0.2185, "lr": 2.2639225181598063e-05, "epoch": 0.4539951573849879, "percentage": 4.54, "elapsed_time": "0:01:15", "remaining_time": "0:26:33", "throughput": 2040.85, "total_tokens": 154624} |
| {"current_steps": 380, "total_steps": 8260, "loss": 0.2079, "lr": 2.2941888619854725e-05, "epoch": 0.4600484261501211, "percentage": 4.6, "elapsed_time": "0:01:16", "remaining_time": "0:26:31", "throughput": 2040.61, "total_tokens": 156608} |
| {"current_steps": 385, "total_steps": 8260, "loss": 0.2326, "lr": 2.3244552058111383e-05, "epoch": 0.4661016949152542, "percentage": 4.66, "elapsed_time": "0:01:17", "remaining_time": "0:26:29", "throughput": 2040.01, "total_tokens": 158560} |
| {"current_steps": 390, "total_steps": 8260, "loss": 0.1594, "lr": 2.3547215496368037e-05, "epoch": 0.4721549636803874, "percentage": 4.72, "elapsed_time": "0:01:18", "remaining_time": "0:26:28", "throughput": 2040.09, "total_tokens": 160640} |
| {"current_steps": 395, "total_steps": 8260, "loss": 0.1853, "lr": 2.38498789346247e-05, "epoch": 0.4782082324455206, "percentage": 4.78, "elapsed_time": "0:01:19", "remaining_time": "0:26:28", "throughput": 2040.86, "total_tokens": 162784} |
| {"current_steps": 400, "total_steps": 8260, "loss": 0.2642, "lr": 2.4152542372881357e-05, "epoch": 0.48426150121065376, "percentage": 4.84, "elapsed_time": "0:01:20", "remaining_time": "0:26:26", "throughput": 2041.24, "total_tokens": 164832} |
| {"current_steps": 405, "total_steps": 8260, "loss": 0.1782, "lr": 2.4455205811138015e-05, "epoch": 0.49031476997578693, "percentage": 4.9, "elapsed_time": "0:01:21", "remaining_time": "0:26:25", "throughput": 2040.98, "total_tokens": 166880} |
| {"current_steps": 410, "total_steps": 8260, "loss": 0.2989, "lr": 2.4757869249394676e-05, "epoch": 0.4963680387409201, "percentage": 4.96, "elapsed_time": "0:01:22", "remaining_time": "0:26:25", "throughput": 2042.47, "total_tokens": 169088} |
| {"current_steps": 413, "total_steps": 8260, "eval_loss": 0.17581701278686523, "epoch": 0.5, "percentage": 5.0, "elapsed_time": "0:01:31", "remaining_time": "0:28:59", "throughput": 1860.91, "total_tokens": 170336} |
| {"current_steps": 415, "total_steps": 8260, "loss": 0.1817, "lr": 2.5060532687651334e-05, "epoch": 0.5024213075060533, "percentage": 5.02, "elapsed_time": "0:01:33", "remaining_time": "0:29:23", "throughput": 1835.07, "total_tokens": 171232} |
| {"current_steps": 420, "total_steps": 8260, "loss": 0.1937, "lr": 2.536319612590799e-05, "epoch": 0.5084745762711864, "percentage": 5.08, "elapsed_time": "0:01:34", "remaining_time": "0:29:20", "throughput": 1837.8, "total_tokens": 173376} |
| {"current_steps": 425, "total_steps": 8260, "loss": 0.1224, "lr": 2.566585956416465e-05, "epoch": 0.5145278450363197, "percentage": 5.15, "elapsed_time": "0:01:35", "remaining_time": "0:29:17", "throughput": 1839.85, "total_tokens": 175424} |
| {"current_steps": 430, "total_steps": 8260, "loss": 0.2028, "lr": 2.5968523002421308e-05, "epoch": 0.5205811138014528, "percentage": 5.21, "elapsed_time": "0:01:36", "remaining_time": "0:29:14", "throughput": 1843.54, "total_tokens": 177664} |
| {"current_steps": 435, "total_steps": 8260, "loss": 0.236, "lr": 2.627118644067797e-05, "epoch": 0.5266343825665859, "percentage": 5.27, "elapsed_time": "0:01:37", "remaining_time": "0:29:11", "throughput": 1845.22, "total_tokens": 179648} |
| {"current_steps": 440, "total_steps": 8260, "loss": 0.1922, "lr": 2.6573849878934624e-05, "epoch": 0.5326876513317191, "percentage": 5.33, "elapsed_time": "0:01:38", "remaining_time": "0:29:07", "throughput": 1847.26, "total_tokens": 181664} |
| {"current_steps": 445, "total_steps": 8260, "loss": 0.2585, "lr": 2.6876513317191282e-05, "epoch": 0.5387409200968523, "percentage": 5.39, "elapsed_time": "0:01:39", "remaining_time": "0:29:04", "throughput": 1850.21, "total_tokens": 183840} |
| {"current_steps": 450, "total_steps": 8260, "loss": 0.2166, "lr": 2.7179176755447943e-05, "epoch": 0.5447941888619855, "percentage": 5.45, "elapsed_time": "0:01:40", "remaining_time": "0:29:00", "throughput": 1851.21, "total_tokens": 185696} |
| {"current_steps": 455, "total_steps": 8260, "loss": 0.2224, "lr": 2.74818401937046e-05, "epoch": 0.5508474576271186, "percentage": 5.51, "elapsed_time": "0:01:41", "remaining_time": "0:28:57", "throughput": 1852.7, "total_tokens": 187712} |
| {"current_steps": 460, "total_steps": 8260, "loss": 0.256, "lr": 2.7784503631961263e-05, "epoch": 0.5569007263922519, "percentage": 5.57, "elapsed_time": "0:01:42", "remaining_time": "0:28:54", "throughput": 1854.99, "total_tokens": 189728} |
| {"current_steps": 465, "total_steps": 8260, "loss": 0.2361, "lr": 2.8087167070217917e-05, "epoch": 0.562953995157385, "percentage": 5.63, "elapsed_time": "0:01:43", "remaining_time": "0:28:51", "throughput": 1856.76, "total_tokens": 191744} |
| {"current_steps": 470, "total_steps": 8260, "loss": 0.1637, "lr": 2.838983050847458e-05, "epoch": 0.5690072639225182, "percentage": 5.69, "elapsed_time": "0:01:44", "remaining_time": "0:28:48", "throughput": 1859.4, "total_tokens": 193888} |
| {"current_steps": 475, "total_steps": 8260, "loss": 0.1943, "lr": 2.8692493946731237e-05, "epoch": 0.5750605326876513, "percentage": 5.75, "elapsed_time": "0:01:45", "remaining_time": "0:28:45", "throughput": 1861.1, "total_tokens": 195904} |
| {"current_steps": 480, "total_steps": 8260, "loss": 0.1638, "lr": 2.899515738498789e-05, "epoch": 0.5811138014527845, "percentage": 5.81, "elapsed_time": "0:01:46", "remaining_time": "0:28:42", "throughput": 1862.66, "total_tokens": 197952} |
| {"current_steps": 485, "total_steps": 8260, "loss": 0.1966, "lr": 2.9297820823244553e-05, "epoch": 0.5871670702179177, "percentage": 5.87, "elapsed_time": "0:01:47", "remaining_time": "0:28:39", "throughput": 1864.33, "total_tokens": 199968} |
| {"current_steps": 490, "total_steps": 8260, "loss": 0.2752, "lr": 2.960048426150121e-05, "epoch": 0.5932203389830508, "percentage": 5.93, "elapsed_time": "0:01:48", "remaining_time": "0:28:36", "throughput": 1865.47, "total_tokens": 201984} |
| {"current_steps": 495, "total_steps": 8260, "loss": 0.1482, "lr": 2.9903147699757872e-05, "epoch": 0.5992736077481841, "percentage": 5.99, "elapsed_time": "0:01:49", "remaining_time": "0:28:33", "throughput": 1867.15, "total_tokens": 204000} |
| {"current_steps": 500, "total_steps": 8260, "loss": 0.2021, "lr": 3.0205811138014527e-05, "epoch": 0.6053268765133172, "percentage": 6.05, "elapsed_time": "0:01:50", "remaining_time": "0:28:31", "throughput": 1868.9, "total_tokens": 206080} |
| {"current_steps": 505, "total_steps": 8260, "loss": 0.2472, "lr": 3.050847457627119e-05, "epoch": 0.6113801452784504, "percentage": 6.11, "elapsed_time": "0:01:51", "remaining_time": "0:28:27", "throughput": 1870.15, "total_tokens": 208000} |
| {"current_steps": 510, "total_steps": 8260, "loss": 0.1733, "lr": 3.0811138014527846e-05, "epoch": 0.6174334140435835, "percentage": 6.17, "elapsed_time": "0:01:52", "remaining_time": "0:28:26", "throughput": 1872.36, "total_tokens": 210208} |
| {"current_steps": 515, "total_steps": 8260, "loss": 0.1584, "lr": 3.111380145278451e-05, "epoch": 0.6234866828087167, "percentage": 6.23, "elapsed_time": "0:01:53", "remaining_time": "0:28:23", "throughput": 1873.95, "total_tokens": 212288} |
| {"current_steps": 520, "total_steps": 8260, "loss": 0.1895, "lr": 3.141646489104116e-05, "epoch": 0.6295399515738499, "percentage": 6.3, "elapsed_time": "0:01:54", "remaining_time": "0:28:21", "throughput": 1875.75, "total_tokens": 214400} |
| {"current_steps": 525, "total_steps": 8260, "loss": 0.151, "lr": 3.1719128329297823e-05, "epoch": 0.635593220338983, "percentage": 6.36, "elapsed_time": "0:01:55", "remaining_time": "0:28:18", "throughput": 1877.56, "total_tokens": 216512} |
| {"current_steps": 530, "total_steps": 8260, "loss": 0.3145, "lr": 3.2021791767554485e-05, "epoch": 0.6416464891041163, "percentage": 6.42, "elapsed_time": "0:01:56", "remaining_time": "0:28:16", "throughput": 1878.69, "total_tokens": 218496} |
| {"current_steps": 535, "total_steps": 8260, "loss": 0.1886, "lr": 3.232445520581114e-05, "epoch": 0.6476997578692494, "percentage": 6.48, "elapsed_time": "0:01:57", "remaining_time": "0:28:14", "throughput": 1879.59, "total_tokens": 220512} |
| {"current_steps": 540, "total_steps": 8260, "loss": 0.1516, "lr": 3.26271186440678e-05, "epoch": 0.6537530266343826, "percentage": 6.54, "elapsed_time": "0:01:58", "remaining_time": "0:28:11", "throughput": 1880.63, "total_tokens": 222560} |
| {"current_steps": 545, "total_steps": 8260, "loss": 0.139, "lr": 3.2929782082324455e-05, "epoch": 0.6598062953995157, "percentage": 6.6, "elapsed_time": "0:01:59", "remaining_time": "0:28:08", "throughput": 1881.64, "total_tokens": 224480} |
| {"current_steps": 550, "total_steps": 8260, "loss": 0.2028, "lr": 3.323244552058112e-05, "epoch": 0.6658595641646489, "percentage": 6.66, "elapsed_time": "0:02:00", "remaining_time": "0:28:06", "throughput": 1882.93, "total_tokens": 226560} |
| {"current_steps": 555, "total_steps": 8260, "loss": 0.179, "lr": 3.353510895883777e-05, "epoch": 0.6719128329297821, "percentage": 6.72, "elapsed_time": "0:02:01", "remaining_time": "0:28:03", "throughput": 1884.22, "total_tokens": 228512} |
| {"current_steps": 560, "total_steps": 8260, "loss": 0.2853, "lr": 3.383777239709443e-05, "epoch": 0.6779661016949152, "percentage": 6.78, "elapsed_time": "0:02:02", "remaining_time": "0:28:01", "throughput": 1885.5, "total_tokens": 230528} |
| {"current_steps": 565, "total_steps": 8260, "loss": 0.1885, "lr": 3.4140435835351094e-05, "epoch": 0.6840193704600485, "percentage": 6.84, "elapsed_time": "0:02:03", "remaining_time": "0:27:59", "throughput": 1887.81, "total_tokens": 232768} |
| {"current_steps": 570, "total_steps": 8260, "loss": 0.1626, "lr": 3.444309927360775e-05, "epoch": 0.6900726392251816, "percentage": 6.9, "elapsed_time": "0:02:04", "remaining_time": "0:27:57", "throughput": 1888.65, "total_tokens": 234784} |
| {"current_steps": 575, "total_steps": 8260, "loss": 0.2537, "lr": 3.474576271186441e-05, "epoch": 0.6961259079903148, "percentage": 6.96, "elapsed_time": "0:02:05", "remaining_time": "0:27:55", "throughput": 1889.72, "total_tokens": 236832} |
| {"current_steps": 580, "total_steps": 8260, "loss": 0.1645, "lr": 3.5048426150121065e-05, "epoch": 0.7021791767554479, "percentage": 7.02, "elapsed_time": "0:02:06", "remaining_time": "0:27:52", "throughput": 1890.06, "total_tokens": 238784} |
| {"current_steps": 585, "total_steps": 8260, "loss": 0.226, "lr": 3.5351089588377726e-05, "epoch": 0.7082324455205811, "percentage": 7.08, "elapsed_time": "0:02:07", "remaining_time": "0:27:50", "throughput": 1890.63, "total_tokens": 240768} |
| {"current_steps": 590, "total_steps": 8260, "loss": 0.1961, "lr": 3.565375302663439e-05, "epoch": 0.7142857142857143, "percentage": 7.14, "elapsed_time": "0:02:08", "remaining_time": "0:27:48", "throughput": 1891.75, "total_tokens": 242848} |
| {"current_steps": 595, "total_steps": 8260, "loss": 0.1189, "lr": 3.595641646489104e-05, "epoch": 0.7203389830508474, "percentage": 7.2, "elapsed_time": "0:02:09", "remaining_time": "0:27:46", "throughput": 1893.38, "total_tokens": 244992} |
| {"current_steps": 600, "total_steps": 8260, "loss": 0.1701, "lr": 3.62590799031477e-05, "epoch": 0.7263922518159807, "percentage": 7.26, "elapsed_time": "0:02:10", "remaining_time": "0:27:44", "throughput": 1894.65, "total_tokens": 247040} |
| {"current_steps": 605, "total_steps": 8260, "loss": 0.1743, "lr": 3.656174334140436e-05, "epoch": 0.7324455205811138, "percentage": 7.32, "elapsed_time": "0:02:11", "remaining_time": "0:27:42", "throughput": 1896.22, "total_tokens": 249184} |
| {"current_steps": 610, "total_steps": 8260, "loss": 0.1463, "lr": 3.686440677966102e-05, "epoch": 0.738498789346247, "percentage": 7.38, "elapsed_time": "0:02:12", "remaining_time": "0:27:39", "throughput": 1897.08, "total_tokens": 251104} |
| {"current_steps": 615, "total_steps": 8260, "loss": 0.349, "lr": 3.7167070217917674e-05, "epoch": 0.7445520581113801, "percentage": 7.45, "elapsed_time": "0:02:13", "remaining_time": "0:27:37", "throughput": 1897.75, "total_tokens": 253056} |
| {"current_steps": 620, "total_steps": 8260, "loss": 0.2638, "lr": 3.7469733656174335e-05, "epoch": 0.7506053268765133, "percentage": 7.51, "elapsed_time": "0:02:14", "remaining_time": "0:27:34", "throughput": 1899.15, "total_tokens": 255008} |
| {"current_steps": 625, "total_steps": 8260, "loss": 0.3117, "lr": 3.7772397094431e-05, "epoch": 0.7566585956416465, "percentage": 7.57, "elapsed_time": "0:02:15", "remaining_time": "0:27:32", "throughput": 1900.46, "total_tokens": 257120} |
| {"current_steps": 630, "total_steps": 8260, "loss": 0.1552, "lr": 3.807506053268765e-05, "epoch": 0.7627118644067796, "percentage": 7.63, "elapsed_time": "0:02:16", "remaining_time": "0:27:30", "throughput": 1901.39, "total_tokens": 259168} |
| {"current_steps": 635, "total_steps": 8260, "loss": 0.1723, "lr": 3.837772397094431e-05, "epoch": 0.7687651331719129, "percentage": 7.69, "elapsed_time": "0:02:17", "remaining_time": "0:27:28", "throughput": 1902.31, "total_tokens": 261216} |
| {"current_steps": 640, "total_steps": 8260, "loss": 0.7071, "lr": 3.868038740920097e-05, "epoch": 0.774818401937046, "percentage": 7.75, "elapsed_time": "0:02:18", "remaining_time": "0:27:26", "throughput": 1903.1, "total_tokens": 263136} |
| {"current_steps": 645, "total_steps": 8260, "loss": 0.2025, "lr": 3.898305084745763e-05, "epoch": 0.7808716707021792, "percentage": 7.81, "elapsed_time": "0:02:19", "remaining_time": "0:27:24", "throughput": 1903.97, "total_tokens": 265184} |
| {"current_steps": 650, "total_steps": 8260, "loss": 0.1424, "lr": 3.928571428571429e-05, "epoch": 0.7869249394673123, "percentage": 7.87, "elapsed_time": "0:02:20", "remaining_time": "0:27:22", "throughput": 1904.89, "total_tokens": 267264} |
| {"current_steps": 655, "total_steps": 8260, "loss": 0.1792, "lr": 3.958837772397095e-05, "epoch": 0.7929782082324455, "percentage": 7.93, "elapsed_time": "0:02:21", "remaining_time": "0:27:20", "throughput": 1905.43, "total_tokens": 269216} |
| {"current_steps": 660, "total_steps": 8260, "loss": 0.2204, "lr": 3.9891041162227606e-05, "epoch": 0.7990314769975787, "percentage": 7.99, "elapsed_time": "0:02:22", "remaining_time": "0:27:18", "throughput": 1906.63, "total_tokens": 271264} |
| {"current_steps": 665, "total_steps": 8260, "loss": 0.1117, "lr": 4.019370460048426e-05, "epoch": 0.8050847457627118, "percentage": 8.05, "elapsed_time": "0:02:23", "remaining_time": "0:27:16", "throughput": 1907.6, "total_tokens": 273280} |
| {"current_steps": 670, "total_steps": 8260, "loss": 0.117, "lr": 4.049636803874092e-05, "epoch": 0.8111380145278451, "percentage": 8.11, "elapsed_time": "0:02:24", "remaining_time": "0:27:14", "throughput": 1908.84, "total_tokens": 275392} |
| {"current_steps": 675, "total_steps": 8260, "loss": 0.2062, "lr": 4.0799031476997577e-05, "epoch": 0.8171912832929782, "percentage": 8.17, "elapsed_time": "0:02:25", "remaining_time": "0:27:12", "throughput": 1909.94, "total_tokens": 277440} |
| {"current_steps": 680, "total_steps": 8260, "loss": 0.1635, "lr": 4.110169491525424e-05, "epoch": 0.8232445520581114, "percentage": 8.23, "elapsed_time": "0:02:26", "remaining_time": "0:27:10", "throughput": 1910.9, "total_tokens": 279456} |
| {"current_steps": 685, "total_steps": 8260, "loss": 0.1623, "lr": 4.14043583535109e-05, "epoch": 0.8292978208232445, "percentage": 8.29, "elapsed_time": "0:02:27", "remaining_time": "0:27:08", "throughput": 1912.4, "total_tokens": 281632} |
| {"current_steps": 690, "total_steps": 8260, "loss": 0.1991, "lr": 4.170702179176756e-05, "epoch": 0.8353510895883777, "percentage": 8.35, "elapsed_time": "0:02:28", "remaining_time": "0:27:06", "throughput": 1913.51, "total_tokens": 283680} |
| {"current_steps": 695, "total_steps": 8260, "loss": 0.1168, "lr": 4.2009685230024215e-05, "epoch": 0.8414043583535109, "percentage": 8.41, "elapsed_time": "0:02:29", "remaining_time": "0:27:04", "throughput": 1914.47, "total_tokens": 285760} |
| {"current_steps": 700, "total_steps": 8260, "loss": 0.2535, "lr": 4.231234866828087e-05, "epoch": 0.847457627118644, "percentage": 8.47, "elapsed_time": "0:02:30", "remaining_time": "0:27:03", "throughput": 1915.77, "total_tokens": 287904} |
| {"current_steps": 705, "total_steps": 8260, "loss": 0.3266, "lr": 4.261501210653753e-05, "epoch": 0.8535108958837773, "percentage": 8.54, "elapsed_time": "0:02:31", "remaining_time": "0:27:01", "throughput": 1916.53, "total_tokens": 289952} |
| {"current_steps": 710, "total_steps": 8260, "loss": 0.168, "lr": 4.2917675544794186e-05, "epoch": 0.8595641646489104, "percentage": 8.6, "elapsed_time": "0:02:32", "remaining_time": "0:26:59", "throughput": 1917.09, "total_tokens": 291968} |
| {"current_steps": 715, "total_steps": 8260, "loss": 0.1877, "lr": 4.3220338983050854e-05, "epoch": 0.8656174334140436, "percentage": 8.66, "elapsed_time": "0:02:33", "remaining_time": "0:26:57", "throughput": 1917.39, "total_tokens": 293952} |
| {"current_steps": 720, "total_steps": 8260, "loss": 0.1664, "lr": 4.352300242130751e-05, "epoch": 0.8716707021791767, "percentage": 8.72, "elapsed_time": "0:02:34", "remaining_time": "0:26:56", "throughput": 1919.17, "total_tokens": 296192} |
| {"current_steps": 725, "total_steps": 8260, "loss": 0.1047, "lr": 4.382566585956417e-05, "epoch": 0.8777239709443099, "percentage": 8.78, "elapsed_time": "0:02:35", "remaining_time": "0:26:54", "throughput": 1920.4, "total_tokens": 298336} |
| {"current_steps": 730, "total_steps": 8260, "loss": 0.1764, "lr": 4.4128329297820825e-05, "epoch": 0.8837772397094431, "percentage": 8.84, "elapsed_time": "0:02:36", "remaining_time": "0:26:52", "throughput": 1921.17, "total_tokens": 300352} |
| {"current_steps": 735, "total_steps": 8260, "loss": 0.1037, "lr": 4.443099273607748e-05, "epoch": 0.8898305084745762, "percentage": 8.9, "elapsed_time": "0:02:37", "remaining_time": "0:26:51", "throughput": 1921.98, "total_tokens": 302432} |
| {"current_steps": 740, "total_steps": 8260, "loss": 0.2892, "lr": 4.473365617433414e-05, "epoch": 0.8958837772397095, "percentage": 8.96, "elapsed_time": "0:02:38", "remaining_time": "0:26:49", "throughput": 1923.03, "total_tokens": 304544} |
| {"current_steps": 745, "total_steps": 8260, "loss": 0.2717, "lr": 4.50363196125908e-05, "epoch": 0.9019370460048426, "percentage": 9.02, "elapsed_time": "0:02:39", "remaining_time": "0:26:47", "throughput": 1923.99, "total_tokens": 306592} |
| {"current_steps": 750, "total_steps": 8260, "loss": 0.1698, "lr": 4.533898305084746e-05, "epoch": 0.9079903147699758, "percentage": 9.08, "elapsed_time": "0:02:40", "remaining_time": "0:26:45", "throughput": 1925.06, "total_tokens": 308672} |
| {"current_steps": 755, "total_steps": 8260, "loss": 0.0728, "lr": 4.564164648910412e-05, "epoch": 0.914043583535109, "percentage": 9.14, "elapsed_time": "0:02:41", "remaining_time": "0:26:43", "throughput": 1925.34, "total_tokens": 310656} |
| {"current_steps": 760, "total_steps": 8260, "loss": 0.31, "lr": 4.594430992736078e-05, "epoch": 0.9200968523002422, "percentage": 9.2, "elapsed_time": "0:02:42", "remaining_time": "0:26:42", "throughput": 1926.31, "total_tokens": 312768} |
| {"current_steps": 765, "total_steps": 8260, "loss": 0.0739, "lr": 4.6246973365617434e-05, "epoch": 0.9261501210653753, "percentage": 9.26, "elapsed_time": "0:02:43", "remaining_time": "0:26:40", "throughput": 1927.16, "total_tokens": 314880} |
| {"current_steps": 770, "total_steps": 8260, "loss": 0.0547, "lr": 4.654963680387409e-05, "epoch": 0.9322033898305084, "percentage": 9.32, "elapsed_time": "0:02:44", "remaining_time": "0:26:39", "throughput": 1927.95, "total_tokens": 316960} |
| {"current_steps": 775, "total_steps": 8260, "loss": 0.1238, "lr": 4.685230024213076e-05, "epoch": 0.9382566585956417, "percentage": 9.38, "elapsed_time": "0:02:45", "remaining_time": "0:26:37", "throughput": 1928.88, "total_tokens": 319072} |
| {"current_steps": 780, "total_steps": 8260, "loss": 0.1621, "lr": 4.715496368038741e-05, "epoch": 0.9443099273607748, "percentage": 9.44, "elapsed_time": "0:02:46", "remaining_time": "0:26:36", "throughput": 1930.0, "total_tokens": 321248} |
| {"current_steps": 785, "total_steps": 8260, "loss": 0.2879, "lr": 4.745762711864407e-05, "epoch": 0.950363196125908, "percentage": 9.5, "elapsed_time": "0:02:47", "remaining_time": "0:26:34", "throughput": 1930.54, "total_tokens": 323168} |
| {"current_steps": 790, "total_steps": 8260, "loss": 0.1443, "lr": 4.776029055690073e-05, "epoch": 0.9564164648910412, "percentage": 9.56, "elapsed_time": "0:02:48", "remaining_time": "0:26:32", "throughput": 1931.67, "total_tokens": 325280} |
| {"current_steps": 795, "total_steps": 8260, "loss": 0.1444, "lr": 4.806295399515739e-05, "epoch": 0.9624697336561744, "percentage": 9.62, "elapsed_time": "0:02:49", "remaining_time": "0:26:30", "throughput": 1932.19, "total_tokens": 327264} |
| {"current_steps": 800, "total_steps": 8260, "loss": 0.12, "lr": 4.836561743341404e-05, "epoch": 0.9685230024213075, "percentage": 9.69, "elapsed_time": "0:02:50", "remaining_time": "0:26:28", "throughput": 1933.03, "total_tokens": 329312} |
| {"current_steps": 805, "total_steps": 8260, "loss": 0.2624, "lr": 4.8668280871670705e-05, "epoch": 0.9745762711864406, "percentage": 9.75, "elapsed_time": "0:02:51", "remaining_time": "0:26:26", "throughput": 1933.28, "total_tokens": 331296} |
| {"current_steps": 810, "total_steps": 8260, "loss": 0.2324, "lr": 4.8970944309927366e-05, "epoch": 0.9806295399515739, "percentage": 9.81, "elapsed_time": "0:02:52", "remaining_time": "0:26:25", "throughput": 1933.8, "total_tokens": 333280} |
| {"current_steps": 815, "total_steps": 8260, "loss": 0.1293, "lr": 4.927360774818402e-05, "epoch": 0.986682808716707, "percentage": 9.87, "elapsed_time": "0:02:53", "remaining_time": "0:26:23", "throughput": 1934.67, "total_tokens": 335392} |
| {"current_steps": 820, "total_steps": 8260, "loss": 0.2803, "lr": 4.957627118644068e-05, "epoch": 0.9927360774818402, "percentage": 9.93, "elapsed_time": "0:02:54", "remaining_time": "0:26:22", "throughput": 1935.53, "total_tokens": 337504} |
| {"current_steps": 825, "total_steps": 8260, "loss": 0.1096, "lr": 4.9878934624697336e-05, "epoch": 0.9987893462469734, "percentage": 9.99, "elapsed_time": "0:02:55", "remaining_time": "0:26:20", "throughput": 1936.0, "total_tokens": 339488} |
| {"current_steps": 826, "total_steps": 8260, "eval_loss": 0.14574502408504486, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:03:03", "remaining_time": "0:27:32", "throughput": 1849.43, "total_tokens": 339568} |
| {"current_steps": 830, "total_steps": 8260, "loss": 0.1355, "lr": 4.99999799087436e-05, "epoch": 1.0048426150121066, "percentage": 10.05, "elapsed_time": "0:03:06", "remaining_time": "0:27:45", "throughput": 1834.05, "total_tokens": 341168} |
| {"current_steps": 835, "total_steps": 8260, "loss": 0.0598, "lr": 4.999985712896029e-05, "epoch": 1.0108958837772397, "percentage": 10.11, "elapsed_time": "0:03:07", "remaining_time": "0:27:43", "throughput": 1835.07, "total_tokens": 343216} |
| {"current_steps": 840, "total_steps": 8260, "loss": 0.1515, "lr": 4.9999622731750315e-05, "epoch": 1.0169491525423728, "percentage": 10.17, "elapsed_time": "0:03:08", "remaining_time": "0:27:40", "throughput": 1836.02, "total_tokens": 345200} |
| {"current_steps": 845, "total_steps": 8260, "loss": 0.0661, "lr": 4.999927671816018e-05, "epoch": 1.023002421307506, "percentage": 10.23, "elapsed_time": "0:03:09", "remaining_time": "0:27:38", "throughput": 1837.93, "total_tokens": 347408} |
| {"current_steps": 850, "total_steps": 8260, "loss": 0.4256, "lr": 4.999881908973474e-05, "epoch": 1.0290556900726393, "percentage": 10.29, "elapsed_time": "0:03:10", "remaining_time": "0:27:36", "throughput": 1839.05, "total_tokens": 349488} |
| {"current_steps": 855, "total_steps": 8260, "loss": 0.2912, "lr": 4.9998249848517185e-05, "epoch": 1.0351089588377724, "percentage": 10.35, "elapsed_time": "0:03:11", "remaining_time": "0:27:34", "throughput": 1840.27, "total_tokens": 351536} |
| {"current_steps": 860, "total_steps": 8260, "loss": 0.2137, "lr": 4.999756899704902e-05, "epoch": 1.0411622276029056, "percentage": 10.41, "elapsed_time": "0:03:12", "remaining_time": "0:27:32", "throughput": 1840.82, "total_tokens": 353488} |
| {"current_steps": 865, "total_steps": 8260, "loss": 0.1311, "lr": 4.999677653837004e-05, "epoch": 1.0472154963680387, "percentage": 10.47, "elapsed_time": "0:03:13", "remaining_time": "0:27:30", "throughput": 1841.64, "total_tokens": 355504} |
| {"current_steps": 870, "total_steps": 8260, "loss": 0.1441, "lr": 4.999587247601837e-05, "epoch": 1.053268765133172, "percentage": 10.53, "elapsed_time": "0:03:13", "remaining_time": "0:27:27", "throughput": 1842.65, "total_tokens": 357456} |
| {"current_steps": 875, "total_steps": 8260, "loss": 0.1756, "lr": 4.99948568140304e-05, "epoch": 1.0593220338983051, "percentage": 10.59, "elapsed_time": "0:03:14", "remaining_time": "0:27:25", "throughput": 1844.01, "total_tokens": 359568} |
| {"current_steps": 880, "total_steps": 8260, "loss": 0.1768, "lr": 4.999372955694077e-05, "epoch": 1.0653753026634383, "percentage": 10.65, "elapsed_time": "0:03:15", "remaining_time": "0:27:23", "throughput": 1845.15, "total_tokens": 361616} |
| {"current_steps": 885, "total_steps": 8260, "loss": 0.1503, "lr": 4.999249070978237e-05, "epoch": 1.0714285714285714, "percentage": 10.71, "elapsed_time": "0:03:16", "remaining_time": "0:27:21", "throughput": 1846.24, "total_tokens": 363696} |
| {"current_steps": 890, "total_steps": 8260, "loss": 0.3323, "lr": 4.9991140278086316e-05, "epoch": 1.0774818401937045, "percentage": 10.77, "elapsed_time": "0:03:17", "remaining_time": "0:27:19", "throughput": 1847.26, "total_tokens": 365712} |
| {"current_steps": 895, "total_steps": 8260, "loss": 0.0454, "lr": 4.998967826788191e-05, "epoch": 1.0835351089588379, "percentage": 10.84, "elapsed_time": "0:03:18", "remaining_time": "0:27:17", "throughput": 1848.42, "total_tokens": 367824} |
| {"current_steps": 900, "total_steps": 8260, "loss": 0.2099, "lr": 4.998810468569661e-05, "epoch": 1.089588377723971, "percentage": 10.9, "elapsed_time": "0:03:19", "remaining_time": "0:27:15", "throughput": 1849.38, "total_tokens": 369840} |
| {"current_steps": 905, "total_steps": 8260, "loss": 0.1235, "lr": 4.998641953855604e-05, "epoch": 1.0956416464891041, "percentage": 10.96, "elapsed_time": "0:03:20", "remaining_time": "0:27:13", "throughput": 1850.78, "total_tokens": 371952} |
| {"current_steps": 910, "total_steps": 8260, "loss": 0.1582, "lr": 4.9984622833983906e-05, "epoch": 1.1016949152542372, "percentage": 11.02, "elapsed_time": "0:03:21", "remaining_time": "0:27:11", "throughput": 1851.78, "total_tokens": 374032} |
| {"current_steps": 915, "total_steps": 8260, "loss": 0.1473, "lr": 4.9982714580002e-05, "epoch": 1.1077481840193704, "percentage": 11.08, "elapsed_time": "0:03:23", "remaining_time": "0:27:09", "throughput": 1852.87, "total_tokens": 376176} |
| {"current_steps": 920, "total_steps": 8260, "loss": 0.2965, "lr": 4.998069478513013e-05, "epoch": 1.1138014527845037, "percentage": 11.14, "elapsed_time": "0:03:24", "remaining_time": "0:27:07", "throughput": 1853.91, "total_tokens": 378224} |
| {"current_steps": 925, "total_steps": 8260, "loss": 0.1551, "lr": 4.997856345838615e-05, "epoch": 1.1198547215496368, "percentage": 11.2, "elapsed_time": "0:03:25", "remaining_time": "0:27:05", "throughput": 1854.89, "total_tokens": 380304} |
| {"current_steps": 930, "total_steps": 8260, "loss": 0.0947, "lr": 4.997632060928582e-05, "epoch": 1.12590799031477, "percentage": 11.26, "elapsed_time": "0:03:26", "remaining_time": "0:27:03", "throughput": 1855.98, "total_tokens": 382416} |
| {"current_steps": 935, "total_steps": 8260, "loss": 0.2101, "lr": 4.997396624784284e-05, "epoch": 1.131961259079903, "percentage": 11.32, "elapsed_time": "0:03:27", "remaining_time": "0:27:02", "throughput": 1857.27, "total_tokens": 384592} |
| {"current_steps": 940, "total_steps": 8260, "loss": 0.1112, "lr": 4.9971500384568795e-05, "epoch": 1.1380145278450362, "percentage": 11.38, "elapsed_time": "0:03:28", "remaining_time": "0:27:00", "throughput": 1858.27, "total_tokens": 386640} |
| {"current_steps": 945, "total_steps": 8260, "loss": 0.1297, "lr": 4.996892303047306e-05, "epoch": 1.1440677966101696, "percentage": 11.44, "elapsed_time": "0:03:29", "remaining_time": "0:26:58", "throughput": 1859.29, "total_tokens": 388752} |
| {"current_steps": 950, "total_steps": 8260, "loss": 0.2243, "lr": 4.996623419706282e-05, "epoch": 1.1501210653753027, "percentage": 11.5, "elapsed_time": "0:03:30", "remaining_time": "0:26:56", "throughput": 1860.55, "total_tokens": 390928} |
| {"current_steps": 955, "total_steps": 8260, "loss": 0.0389, "lr": 4.996343389634298e-05, "epoch": 1.1561743341404358, "percentage": 11.56, "elapsed_time": "0:03:31", "remaining_time": "0:26:55", "throughput": 1861.71, "total_tokens": 393104} |
| {"current_steps": 960, "total_steps": 8260, "loss": 0.1117, "lr": 4.996052214081608e-05, "epoch": 1.162227602905569, "percentage": 11.62, "elapsed_time": "0:03:32", "remaining_time": "0:26:53", "throughput": 1862.5, "total_tokens": 395184} |
| {"current_steps": 965, "total_steps": 8260, "loss": 0.2485, "lr": 4.995749894348232e-05, "epoch": 1.1682808716707023, "percentage": 11.68, "elapsed_time": "0:03:33", "remaining_time": "0:26:51", "throughput": 1863.31, "total_tokens": 397264} |
| {"current_steps": 970, "total_steps": 8260, "loss": 0.1718, "lr": 4.995436431783942e-05, "epoch": 1.1743341404358354, "percentage": 11.74, "elapsed_time": "0:03:34", "remaining_time": "0:26:50", "throughput": 1864.23, "total_tokens": 399376} |
| {"current_steps": 975, "total_steps": 8260, "loss": 0.2069, "lr": 4.9951118277882636e-05, "epoch": 1.1803874092009685, "percentage": 11.8, "elapsed_time": "0:03:35", "remaining_time": "0:26:48", "throughput": 1864.67, "total_tokens": 401296} |
| {"current_steps": 980, "total_steps": 8260, "loss": 0.121, "lr": 4.994776083810463e-05, "epoch": 1.1864406779661016, "percentage": 11.86, "elapsed_time": "0:03:36", "remaining_time": "0:26:46", "throughput": 1865.38, "total_tokens": 403344} |
| {"current_steps": 985, "total_steps": 8260, "loss": 0.2146, "lr": 4.994429201349542e-05, "epoch": 1.192493946731235, "percentage": 11.92, "elapsed_time": "0:03:37", "remaining_time": "0:26:44", "throughput": 1865.85, "total_tokens": 405328} |
| {"current_steps": 990, "total_steps": 8260, "loss": 0.1685, "lr": 4.994071181954237e-05, "epoch": 1.1985472154963681, "percentage": 11.99, "elapsed_time": "0:03:38", "remaining_time": "0:26:42", "throughput": 1866.47, "total_tokens": 407344} |
| {"current_steps": 995, "total_steps": 8260, "loss": 0.1702, "lr": 4.993702027223004e-05, "epoch": 1.2046004842615012, "percentage": 12.05, "elapsed_time": "0:03:39", "remaining_time": "0:26:40", "throughput": 1867.04, "total_tokens": 409296} |
| {"current_steps": 1000, "total_steps": 8260, "loss": 0.1899, "lr": 4.9933217388040164e-05, "epoch": 1.2106537530266344, "percentage": 12.11, "elapsed_time": "0:03:40", "remaining_time": "0:26:39", "throughput": 1868.44, "total_tokens": 411536} |
| {"current_steps": 1005, "total_steps": 8260, "loss": 0.0907, "lr": 4.992930318395157e-05, "epoch": 1.2167070217917675, "percentage": 12.17, "elapsed_time": "0:03:41", "remaining_time": "0:26:37", "throughput": 1869.62, "total_tokens": 413712} |
| {"current_steps": 1010, "total_steps": 8260, "loss": 0.0931, "lr": 4.99252776774401e-05, "epoch": 1.2227602905569008, "percentage": 12.23, "elapsed_time": "0:03:42", "remaining_time": "0:26:35", "throughput": 1870.87, "total_tokens": 415856} |
| {"current_steps": 1015, "total_steps": 8260, "loss": 0.2803, "lr": 4.99211408864785e-05, "epoch": 1.228813559322034, "percentage": 12.29, "elapsed_time": "0:03:43", "remaining_time": "0:26:33", "throughput": 1871.43, "total_tokens": 417808} |
| {"current_steps": 1020, "total_steps": 8260, "loss": 0.367, "lr": 4.991689282953642e-05, "epoch": 1.234866828087167, "percentage": 12.35, "elapsed_time": "0:03:44", "remaining_time": "0:26:31", "throughput": 1871.97, "total_tokens": 419760} |
| {"current_steps": 1025, "total_steps": 8260, "loss": 0.2222, "lr": 4.991253352558025e-05, "epoch": 1.2409200968523002, "percentage": 12.41, "elapsed_time": "0:03:45", "remaining_time": "0:26:29", "throughput": 1872.79, "total_tokens": 421840} |
| {"current_steps": 1030, "total_steps": 8260, "loss": 0.1684, "lr": 4.9908062994073056e-05, "epoch": 1.2469733656174333, "percentage": 12.47, "elapsed_time": "0:03:46", "remaining_time": "0:26:28", "throughput": 1874.21, "total_tokens": 424112} |
| {"current_steps": 1035, "total_steps": 8260, "loss": 0.1347, "lr": 4.990348125497454e-05, "epoch": 1.2530266343825667, "percentage": 12.53, "elapsed_time": "0:03:47", "remaining_time": "0:26:26", "throughput": 1874.72, "total_tokens": 426128} |
| {"current_steps": 1040, "total_steps": 8260, "loss": 0.1947, "lr": 4.9898788328740884e-05, "epoch": 1.2590799031476998, "percentage": 12.59, "elapsed_time": "0:03:48", "remaining_time": "0:26:25", "throughput": 1875.65, "total_tokens": 428272} |
| {"current_steps": 1045, "total_steps": 8260, "loss": 0.2443, "lr": 4.989398423632471e-05, "epoch": 1.265133171912833, "percentage": 12.65, "elapsed_time": "0:03:49", "remaining_time": "0:26:23", "throughput": 1876.34, "total_tokens": 430320} |
| {"current_steps": 1050, "total_steps": 8260, "loss": 0.0931, "lr": 4.988906899917496e-05, "epoch": 1.271186440677966, "percentage": 12.71, "elapsed_time": "0:03:50", "remaining_time": "0:26:21", "throughput": 1876.83, "total_tokens": 432272} |
| {"current_steps": 1055, "total_steps": 8260, "loss": 0.1204, "lr": 4.988404263923679e-05, "epoch": 1.2772397094430992, "percentage": 12.77, "elapsed_time": "0:03:51", "remaining_time": "0:26:19", "throughput": 1877.63, "total_tokens": 434384} |
| {"current_steps": 1060, "total_steps": 8260, "loss": 0.1073, "lr": 4.987890517895152e-05, "epoch": 1.2832929782082325, "percentage": 12.83, "elapsed_time": "0:03:52", "remaining_time": "0:26:18", "throughput": 1878.14, "total_tokens": 436336} |
| {"current_steps": 1065, "total_steps": 8260, "loss": 0.1582, "lr": 4.987365664125647e-05, "epoch": 1.2893462469733656, "percentage": 12.89, "elapsed_time": "0:03:53", "remaining_time": "0:26:16", "throughput": 1878.66, "total_tokens": 438352} |
| {"current_steps": 1070, "total_steps": 8260, "loss": 0.204, "lr": 4.986829704958491e-05, "epoch": 1.2953995157384988, "percentage": 12.95, "elapsed_time": "0:03:54", "remaining_time": "0:26:14", "throughput": 1879.26, "total_tokens": 440336} |
| {"current_steps": 1075, "total_steps": 8260, "loss": 0.1136, "lr": 4.986282642786594e-05, "epoch": 1.3014527845036319, "percentage": 13.01, "elapsed_time": "0:03:55", "remaining_time": "0:26:12", "throughput": 1879.96, "total_tokens": 442352} |
| {"current_steps": 1080, "total_steps": 8260, "loss": 0.1038, "lr": 4.985724480052435e-05, "epoch": 1.307506053268765, "percentage": 13.08, "elapsed_time": "0:03:56", "remaining_time": "0:26:11", "throughput": 1880.92, "total_tokens": 444496} |
| {"current_steps": 1085, "total_steps": 8260, "loss": 0.1029, "lr": 4.985155219248057e-05, "epoch": 1.3135593220338984, "percentage": 13.14, "elapsed_time": "0:03:57", "remaining_time": "0:26:09", "throughput": 1881.67, "total_tokens": 446576} |
| {"current_steps": 1090, "total_steps": 8260, "loss": 0.1404, "lr": 4.9845748629150514e-05, "epoch": 1.3196125907990315, "percentage": 13.2, "elapsed_time": "0:03:58", "remaining_time": "0:26:07", "throughput": 1882.4, "total_tokens": 448656} |
| {"current_steps": 1095, "total_steps": 8260, "loss": 0.0984, "lr": 4.9839834136445485e-05, "epoch": 1.3256658595641646, "percentage": 13.26, "elapsed_time": "0:03:59", "remaining_time": "0:26:05", "throughput": 1883.1, "total_tokens": 450672} |
| {"current_steps": 1100, "total_steps": 8260, "loss": 0.1441, "lr": 4.983380874077204e-05, "epoch": 1.331719128329298, "percentage": 13.32, "elapsed_time": "0:04:00", "remaining_time": "0:26:04", "throughput": 1883.93, "total_tokens": 452784} |
| {"current_steps": 1105, "total_steps": 8260, "loss": 0.1062, "lr": 4.982767246903192e-05, "epoch": 1.3377723970944309, "percentage": 13.38, "elapsed_time": "0:04:01", "remaining_time": "0:26:02", "throughput": 1884.52, "total_tokens": 454864} |
| {"current_steps": 1110, "total_steps": 8260, "loss": 0.2014, "lr": 4.982142534862185e-05, "epoch": 1.3438256658595642, "percentage": 13.44, "elapsed_time": "0:04:02", "remaining_time": "0:26:01", "throughput": 1885.66, "total_tokens": 457072} |
| {"current_steps": 1115, "total_steps": 8260, "loss": 0.0932, "lr": 4.981506740743351e-05, "epoch": 1.3498789346246973, "percentage": 13.5, "elapsed_time": "0:04:03", "remaining_time": "0:25:59", "throughput": 1886.49, "total_tokens": 459184} |
| {"current_steps": 1120, "total_steps": 8260, "loss": 0.1265, "lr": 4.980859867385334e-05, "epoch": 1.3559322033898304, "percentage": 13.56, "elapsed_time": "0:04:04", "remaining_time": "0:25:57", "throughput": 1886.92, "total_tokens": 461136} |
| {"current_steps": 1125, "total_steps": 8260, "loss": 0.2014, "lr": 4.9802019176762434e-05, "epoch": 1.3619854721549638, "percentage": 13.62, "elapsed_time": "0:04:05", "remaining_time": "0:25:56", "throughput": 1887.31, "total_tokens": 463152} |
| {"current_steps": 1130, "total_steps": 8260, "loss": 0.0997, "lr": 4.9795328945536425e-05, "epoch": 1.368038740920097, "percentage": 13.68, "elapsed_time": "0:04:06", "remaining_time": "0:25:54", "throughput": 1888.16, "total_tokens": 465232} |
| {"current_steps": 1135, "total_steps": 8260, "loss": 0.1589, "lr": 4.978852801004534e-05, "epoch": 1.37409200968523, "percentage": 13.74, "elapsed_time": "0:04:07", "remaining_time": "0:25:53", "throughput": 1888.59, "total_tokens": 467248} |
| {"current_steps": 1140, "total_steps": 8260, "loss": 0.0805, "lr": 4.9781616400653464e-05, "epoch": 1.3801452784503632, "percentage": 13.8, "elapsed_time": "0:04:08", "remaining_time": "0:25:51", "throughput": 1889.6, "total_tokens": 469424} |
| {"current_steps": 1145, "total_steps": 8260, "loss": 0.4526, "lr": 4.9774594148219225e-05, "epoch": 1.3861985472154963, "percentage": 13.86, "elapsed_time": "0:04:09", "remaining_time": "0:25:49", "throughput": 1890.1, "total_tokens": 471344} |
| {"current_steps": 1150, "total_steps": 8260, "loss": 0.1833, "lr": 4.976746128409502e-05, "epoch": 1.3922518159806296, "percentage": 13.92, "elapsed_time": "0:04:10", "remaining_time": "0:25:47", "throughput": 1890.5, "total_tokens": 473296} |
| {"current_steps": 1155, "total_steps": 8260, "loss": 0.2007, "lr": 4.9760217840127126e-05, "epoch": 1.3983050847457628, "percentage": 13.98, "elapsed_time": "0:04:11", "remaining_time": "0:25:46", "throughput": 1891.15, "total_tokens": 475312} |
| {"current_steps": 1160, "total_steps": 8260, "loss": 0.1373, "lr": 4.97528638486555e-05, "epoch": 1.4043583535108959, "percentage": 14.04, "elapsed_time": "0:04:12", "remaining_time": "0:25:44", "throughput": 1891.86, "total_tokens": 477360} |
| {"current_steps": 1165, "total_steps": 8260, "loss": 0.1133, "lr": 4.9745399342513666e-05, "epoch": 1.410411622276029, "percentage": 14.1, "elapsed_time": "0:04:13", "remaining_time": "0:25:42", "throughput": 1892.25, "total_tokens": 479376} |
| {"current_steps": 1170, "total_steps": 8260, "loss": 0.1703, "lr": 4.9737824355028584e-05, "epoch": 1.4164648910411621, "percentage": 14.16, "elapsed_time": "0:04:14", "remaining_time": "0:25:41", "throughput": 1892.91, "total_tokens": 481456} |
| {"current_steps": 1175, "total_steps": 8260, "loss": 0.1203, "lr": 4.973013892002047e-05, "epoch": 1.4225181598062955, "percentage": 14.23, "elapsed_time": "0:04:15", "remaining_time": "0:25:39", "throughput": 1893.85, "total_tokens": 483632} |
| {"current_steps": 1180, "total_steps": 8260, "loss": 0.1649, "lr": 4.9722343071802665e-05, "epoch": 1.4285714285714286, "percentage": 14.29, "elapsed_time": "0:04:16", "remaining_time": "0:25:38", "throughput": 1894.54, "total_tokens": 485680} |
| {"current_steps": 1185, "total_steps": 8260, "loss": 0.1362, "lr": 4.971443684518145e-05, "epoch": 1.4346246973365617, "percentage": 14.35, "elapsed_time": "0:04:17", "remaining_time": "0:25:36", "throughput": 1895.27, "total_tokens": 487792} |
| {"current_steps": 1190, "total_steps": 8260, "loss": 0.1805, "lr": 4.970642027545596e-05, "epoch": 1.4406779661016949, "percentage": 14.41, "elapsed_time": "0:04:18", "remaining_time": "0:25:35", "throughput": 1895.77, "total_tokens": 489840} |
| {"current_steps": 1195, "total_steps": 8260, "loss": 0.0716, "lr": 4.969829339841793e-05, "epoch": 1.446731234866828, "percentage": 14.47, "elapsed_time": "0:04:19", "remaining_time": "0:25:33", "throughput": 1896.24, "total_tokens": 491824} |
| {"current_steps": 1200, "total_steps": 8260, "loss": 0.1681, "lr": 4.9690056250351626e-05, "epoch": 1.4527845036319613, "percentage": 14.53, "elapsed_time": "0:04:20", "remaining_time": "0:25:31", "throughput": 1896.71, "total_tokens": 493872} |
| {"current_steps": 1205, "total_steps": 8260, "loss": 0.0898, "lr": 4.9681708868033616e-05, "epoch": 1.4588377723970944, "percentage": 14.59, "elapsed_time": "0:04:21", "remaining_time": "0:25:30", "throughput": 1897.47, "total_tokens": 495952} |
| {"current_steps": 1210, "total_steps": 8260, "loss": 0.2288, "lr": 4.967325128873266e-05, "epoch": 1.4648910411622276, "percentage": 14.65, "elapsed_time": "0:04:22", "remaining_time": "0:25:28", "throughput": 1897.95, "total_tokens": 497936} |
| {"current_steps": 1215, "total_steps": 8260, "loss": 0.1016, "lr": 4.96646835502095e-05, "epoch": 1.4709443099273607, "percentage": 14.71, "elapsed_time": "0:04:23", "remaining_time": "0:25:26", "throughput": 1898.35, "total_tokens": 499856} |
| {"current_steps": 1220, "total_steps": 8260, "loss": 0.1139, "lr": 4.965600569071671e-05, "epoch": 1.4769975786924938, "percentage": 14.77, "elapsed_time": "0:04:24", "remaining_time": "0:25:25", "throughput": 1898.62, "total_tokens": 501840} |
| {"current_steps": 1225, "total_steps": 8260, "loss": 0.0892, "lr": 4.9647217748998534e-05, "epoch": 1.4830508474576272, "percentage": 14.83, "elapsed_time": "0:04:25", "remaining_time": "0:25:23", "throughput": 1899.32, "total_tokens": 503952} |
| {"current_steps": 1230, "total_steps": 8260, "loss": 0.1693, "lr": 4.963831976429067e-05, "epoch": 1.4891041162227603, "percentage": 14.89, "elapsed_time": "0:04:26", "remaining_time": "0:25:22", "throughput": 1899.77, "total_tokens": 505936} |
| {"current_steps": 1235, "total_steps": 8260, "loss": 0.161, "lr": 4.9629311776320176e-05, "epoch": 1.4951573849878934, "percentage": 14.95, "elapsed_time": "0:04:27", "remaining_time": "0:25:20", "throughput": 1900.27, "total_tokens": 507984} |
| {"current_steps": 1239, "total_steps": 8260, "eval_loss": 0.1938663125038147, "epoch": 1.5, "percentage": 15.0, "elapsed_time": "0:04:36", "remaining_time": "0:26:04", "throughput": 1844.99, "total_tokens": 509456} |
| {"current_steps": 1240, "total_steps": 8260, "loss": 0.2233, "lr": 4.962019382530521e-05, "epoch": 1.5012106537530268, "percentage": 15.01, "elapsed_time": "0:04:37", "remaining_time": "0:26:12", "throughput": 1835.95, "total_tokens": 509840} |
| {"current_steps": 1245, "total_steps": 8260, "loss": 0.1841, "lr": 4.9610965951954885e-05, "epoch": 1.5072639225181597, "percentage": 15.07, "elapsed_time": "0:04:38", "remaining_time": "0:26:10", "throughput": 1836.46, "total_tokens": 511856} |
| {"current_steps": 1250, "total_steps": 8260, "loss": 0.1907, "lr": 4.960162819746911e-05, "epoch": 1.513317191283293, "percentage": 15.13, "elapsed_time": "0:04:39", "remaining_time": "0:26:08", "throughput": 1837.05, "total_tokens": 513904} |
| {"current_steps": 1255, "total_steps": 8260, "loss": 0.1324, "lr": 4.9592180603538366e-05, "epoch": 1.5193704600484261, "percentage": 15.19, "elapsed_time": "0:04:40", "remaining_time": "0:26:06", "throughput": 1837.37, "total_tokens": 515824} |
| {"current_steps": 1260, "total_steps": 8260, "loss": 0.1081, "lr": 4.958262321234353e-05, "epoch": 1.5254237288135593, "percentage": 15.25, "elapsed_time": "0:04:41", "remaining_time": "0:26:05", "throughput": 1837.83, "total_tokens": 517744} |
| {"current_steps": 1265, "total_steps": 8260, "loss": 0.1099, "lr": 4.95729560665557e-05, "epoch": 1.5314769975786926, "percentage": 15.31, "elapsed_time": "0:04:42", "remaining_time": "0:26:03", "throughput": 1838.75, "total_tokens": 519824} |
| {"current_steps": 1270, "total_steps": 8260, "loss": 0.1783, "lr": 4.956317920933602e-05, "epoch": 1.5375302663438255, "percentage": 15.38, "elapsed_time": "0:04:43", "remaining_time": "0:26:01", "throughput": 1839.79, "total_tokens": 521936} |
| {"current_steps": 1275, "total_steps": 8260, "loss": 0.0738, "lr": 4.955329268433543e-05, "epoch": 1.5435835351089588, "percentage": 15.44, "elapsed_time": "0:04:44", "remaining_time": "0:25:59", "throughput": 1840.9, "total_tokens": 524176} |
| {"current_steps": 1280, "total_steps": 8260, "loss": 0.1763, "lr": 4.954329653569452e-05, "epoch": 1.549636803874092, "percentage": 15.5, "elapsed_time": "0:04:45", "remaining_time": "0:25:58", "throughput": 1841.82, "total_tokens": 526256} |
| {"current_steps": 1285, "total_steps": 8260, "loss": 0.1029, "lr": 4.953319080804333e-05, "epoch": 1.555690072639225, "percentage": 15.56, "elapsed_time": "0:04:46", "remaining_time": "0:25:56", "throughput": 1842.41, "total_tokens": 528304} |
| {"current_steps": 1290, "total_steps": 8260, "loss": 0.2286, "lr": 4.952297554650113e-05, "epoch": 1.5617433414043584, "percentage": 15.62, "elapsed_time": "0:04:47", "remaining_time": "0:25:54", "throughput": 1842.93, "total_tokens": 530320} |
| {"current_steps": 1295, "total_steps": 8260, "loss": 0.0676, "lr": 4.9512650796676216e-05, "epoch": 1.5677966101694916, "percentage": 15.68, "elapsed_time": "0:04:48", "remaining_time": "0:25:53", "throughput": 1843.81, "total_tokens": 532400} |
| {"current_steps": 1300, "total_steps": 8260, "loss": 0.1226, "lr": 4.9502216604665744e-05, "epoch": 1.5738498789346247, "percentage": 15.74, "elapsed_time": "0:04:49", "remaining_time": "0:25:51", "throughput": 1844.59, "total_tokens": 534416} |
| {"current_steps": 1305, "total_steps": 8260, "loss": 0.1141, "lr": 4.949167301705548e-05, "epoch": 1.5799031476997578, "percentage": 15.8, "elapsed_time": "0:04:50", "remaining_time": "0:25:49", "throughput": 1845.61, "total_tokens": 536592} |
| {"current_steps": 1310, "total_steps": 8260, "loss": 0.0245, "lr": 4.948102008091962e-05, "epoch": 1.585956416464891, "percentage": 15.86, "elapsed_time": "0:04:51", "remaining_time": "0:25:47", "throughput": 1846.43, "total_tokens": 538704} |
| {"current_steps": 1315, "total_steps": 8260, "loss": 0.2506, "lr": 4.947025784382057e-05, "epoch": 1.5920096852300243, "percentage": 15.92, "elapsed_time": "0:04:52", "remaining_time": "0:25:46", "throughput": 1847.45, "total_tokens": 540912} |
| {"current_steps": 1320, "total_steps": 8260, "loss": 0.2054, "lr": 4.9459386353808736e-05, "epoch": 1.5980629539951574, "percentage": 15.98, "elapsed_time": "0:04:53", "remaining_time": "0:25:44", "throughput": 1847.95, "total_tokens": 542864} |
| {"current_steps": 1325, "total_steps": 8260, "loss": 0.1701, "lr": 4.944840565942229e-05, "epoch": 1.6041162227602905, "percentage": 16.04, "elapsed_time": "0:04:54", "remaining_time": "0:25:42", "throughput": 1848.59, "total_tokens": 544816} |
| {"current_steps": 1330, "total_steps": 8260, "loss": 0.2628, "lr": 4.943731580968701e-05, "epoch": 1.6101694915254239, "percentage": 16.1, "elapsed_time": "0:04:55", "remaining_time": "0:25:40", "throughput": 1849.19, "total_tokens": 546864} |
| {"current_steps": 1335, "total_steps": 8260, "loss": 0.1997, "lr": 4.942611685411598e-05, "epoch": 1.6162227602905568, "percentage": 16.16, "elapsed_time": "0:04:56", "remaining_time": "0:25:39", "throughput": 1849.77, "total_tokens": 548912} |
| {"current_steps": 1340, "total_steps": 8260, "loss": 0.2455, "lr": 4.9414808842709435e-05, "epoch": 1.6222760290556901, "percentage": 16.22, "elapsed_time": "0:04:57", "remaining_time": "0:25:37", "throughput": 1850.81, "total_tokens": 551056} |
| {"current_steps": 1345, "total_steps": 8260, "loss": 0.0968, "lr": 4.940339182595451e-05, "epoch": 1.6283292978208233, "percentage": 16.28, "elapsed_time": "0:04:58", "remaining_time": "0:25:35", "throughput": 1851.62, "total_tokens": 553072} |
| {"current_steps": 1350, "total_steps": 8260, "loss": 0.1576, "lr": 4.9391865854825015e-05, "epoch": 1.6343825665859564, "percentage": 16.34, "elapsed_time": "0:04:59", "remaining_time": "0:25:34", "throughput": 1852.54, "total_tokens": 555248} |
| {"current_steps": 1355, "total_steps": 8260, "loss": 0.0726, "lr": 4.938023098078122e-05, "epoch": 1.6404358353510897, "percentage": 16.4, "elapsed_time": "0:05:00", "remaining_time": "0:25:32", "throughput": 1853.25, "total_tokens": 557360} |
| {"current_steps": 1360, "total_steps": 8260, "loss": 0.0799, "lr": 4.93684872557696e-05, "epoch": 1.6464891041162226, "percentage": 16.46, "elapsed_time": "0:05:01", "remaining_time": "0:25:31", "throughput": 1853.69, "total_tokens": 559376} |
| {"current_steps": 1365, "total_steps": 8260, "loss": 0.2712, "lr": 4.935663473222264e-05, "epoch": 1.652542372881356, "percentage": 16.53, "elapsed_time": "0:05:02", "remaining_time": "0:25:29", "throughput": 1854.02, "total_tokens": 561296} |
| {"current_steps": 1370, "total_steps": 8260, "loss": 0.1349, "lr": 4.934467346305856e-05, "epoch": 1.658595641646489, "percentage": 16.59, "elapsed_time": "0:05:03", "remaining_time": "0:25:27", "throughput": 1854.76, "total_tokens": 563376} |
| {"current_steps": 1375, "total_steps": 8260, "loss": 0.0876, "lr": 4.933260350168112e-05, "epoch": 1.6646489104116222, "percentage": 16.65, "elapsed_time": "0:05:04", "remaining_time": "0:25:25", "throughput": 1855.34, "total_tokens": 565424} |
| {"current_steps": 1380, "total_steps": 8260, "loss": 0.0999, "lr": 4.932042490197933e-05, "epoch": 1.6707021791767556, "percentage": 16.71, "elapsed_time": "0:05:05", "remaining_time": "0:25:24", "throughput": 1856.14, "total_tokens": 567504} |
| {"current_steps": 1385, "total_steps": 8260, "loss": 0.3148, "lr": 4.930813771832728e-05, "epoch": 1.6767554479418885, "percentage": 16.77, "elapsed_time": "0:05:06", "remaining_time": "0:25:22", "throughput": 1857.15, "total_tokens": 569744} |
| {"current_steps": 1390, "total_steps": 8260, "loss": 0.0742, "lr": 4.929574200558382e-05, "epoch": 1.6828087167070218, "percentage": 16.83, "elapsed_time": "0:05:07", "remaining_time": "0:25:21", "throughput": 1857.89, "total_tokens": 571888} |
| {"current_steps": 1395, "total_steps": 8260, "loss": 0.0708, "lr": 4.928323781909239e-05, "epoch": 1.688861985472155, "percentage": 16.89, "elapsed_time": "0:05:08", "remaining_time": "0:25:19", "throughput": 1858.57, "total_tokens": 574000} |
| {"current_steps": 1400, "total_steps": 8260, "loss": 0.2423, "lr": 4.927062521468068e-05, "epoch": 1.694915254237288, "percentage": 16.95, "elapsed_time": "0:05:09", "remaining_time": "0:25:18", "throughput": 1858.86, "total_tokens": 575984} |
| {"current_steps": 1405, "total_steps": 8260, "loss": 0.1239, "lr": 4.92579042486605e-05, "epoch": 1.7009685230024214, "percentage": 17.01, "elapsed_time": "0:05:10", "remaining_time": "0:25:16", "throughput": 1859.33, "total_tokens": 577968} |
| {"current_steps": 1410, "total_steps": 8260, "loss": 0.1221, "lr": 4.924507497782743e-05, "epoch": 1.7070217917675545, "percentage": 17.07, "elapsed_time": "0:05:11", "remaining_time": "0:25:15", "throughput": 1860.11, "total_tokens": 580112} |
| {"current_steps": 1415, "total_steps": 8260, "loss": 0.166, "lr": 4.923213745946059e-05, "epoch": 1.7130750605326877, "percentage": 17.13, "elapsed_time": "0:05:12", "remaining_time": "0:25:13", "throughput": 1860.72, "total_tokens": 582160} |
| {"current_steps": 1420, "total_steps": 8260, "loss": 0.1291, "lr": 4.921909175132242e-05, "epoch": 1.7191283292978208, "percentage": 17.19, "elapsed_time": "0:05:13", "remaining_time": "0:25:12", "throughput": 1861.43, "total_tokens": 584304} |
| {"current_steps": 1425, "total_steps": 8260, "loss": 0.1332, "lr": 4.920593791165839e-05, "epoch": 1.725181598062954, "percentage": 17.25, "elapsed_time": "0:05:14", "remaining_time": "0:25:09", "throughput": 1861.81, "total_tokens": 586096} |
| {"current_steps": 1430, "total_steps": 8260, "loss": 0.1062, "lr": 4.919267599919674e-05, "epoch": 1.7312348668280872, "percentage": 17.31, "elapsed_time": "0:05:15", "remaining_time": "0:25:08", "throughput": 1862.6, "total_tokens": 588240} |
| {"current_steps": 1435, "total_steps": 8260, "loss": 0.1974, "lr": 4.917930607314823e-05, "epoch": 1.7372881355932204, "percentage": 17.37, "elapsed_time": "0:05:16", "remaining_time": "0:25:06", "throughput": 1863.62, "total_tokens": 590480} |
| {"current_steps": 1440, "total_steps": 8260, "loss": 0.206, "lr": 4.916582819320588e-05, "epoch": 1.7433414043583535, "percentage": 17.43, "elapsed_time": "0:05:17", "remaining_time": "0:25:05", "throughput": 1864.13, "total_tokens": 592528} |
| {"current_steps": 1445, "total_steps": 8260, "loss": 0.1952, "lr": 4.915224241954467e-05, "epoch": 1.7493946731234868, "percentage": 17.49, "elapsed_time": "0:05:18", "remaining_time": "0:25:03", "throughput": 1864.46, "total_tokens": 594416} |
| {"current_steps": 1450, "total_steps": 8260, "loss": 0.1267, "lr": 4.9138548812821316e-05, "epoch": 1.7554479418886197, "percentage": 17.55, "elapsed_time": "0:05:19", "remaining_time": "0:25:01", "throughput": 1865.03, "total_tokens": 596432} |
| {"current_steps": 1455, "total_steps": 8260, "loss": 0.1102, "lr": 4.912474743417399e-05, "epoch": 1.761501210653753, "percentage": 17.62, "elapsed_time": "0:05:20", "remaining_time": "0:25:00", "throughput": 1865.49, "total_tokens": 598352} |
| {"current_steps": 1460, "total_steps": 8260, "loss": 0.0541, "lr": 4.911083834522199e-05, "epoch": 1.7675544794188862, "percentage": 17.68, "elapsed_time": "0:05:21", "remaining_time": "0:24:58", "throughput": 1866.09, "total_tokens": 600432} |
| {"current_steps": 1465, "total_steps": 8260, "loss": 0.1506, "lr": 4.909682160806556e-05, "epoch": 1.7736077481840193, "percentage": 17.74, "elapsed_time": "0:05:22", "remaining_time": "0:24:57", "throughput": 1866.57, "total_tokens": 602480} |
| {"current_steps": 1470, "total_steps": 8260, "loss": 0.2005, "lr": 4.908269728528553e-05, "epoch": 1.7796610169491527, "percentage": 17.8, "elapsed_time": "0:05:23", "remaining_time": "0:24:55", "throughput": 1866.97, "total_tokens": 604496} |
| {"current_steps": 1475, "total_steps": 8260, "loss": 0.1171, "lr": 4.90684654399431e-05, "epoch": 1.7857142857142856, "percentage": 17.86, "elapsed_time": "0:05:24", "remaining_time": "0:24:54", "throughput": 1867.81, "total_tokens": 606672} |
| {"current_steps": 1480, "total_steps": 8260, "loss": 0.1143, "lr": 4.9054126135579495e-05, "epoch": 1.791767554479419, "percentage": 17.92, "elapsed_time": "0:05:25", "remaining_time": "0:24:52", "throughput": 1868.38, "total_tokens": 608752} |
| {"current_steps": 1485, "total_steps": 8260, "loss": 0.1178, "lr": 4.9039679436215734e-05, "epoch": 1.797820823244552, "percentage": 17.98, "elapsed_time": "0:05:26", "remaining_time": "0:24:50", "throughput": 1868.64, "total_tokens": 610608} |
| {"current_steps": 1490, "total_steps": 8260, "loss": 0.2375, "lr": 4.9025125406352335e-05, "epoch": 1.8038740920096852, "percentage": 18.04, "elapsed_time": "0:05:27", "remaining_time": "0:24:49", "throughput": 1869.22, "total_tokens": 612688} |
| {"current_steps": 1495, "total_steps": 8260, "loss": 0.0663, "lr": 4.9010464110968976e-05, "epoch": 1.8099273607748185, "percentage": 18.1, "elapsed_time": "0:05:28", "remaining_time": "0:24:47", "throughput": 1869.61, "total_tokens": 614704} |
| {"current_steps": 1500, "total_steps": 8260, "loss": 0.1502, "lr": 4.89956956155243e-05, "epoch": 1.8159806295399514, "percentage": 18.16, "elapsed_time": "0:05:29", "remaining_time": "0:24:46", "throughput": 1870.29, "total_tokens": 616784} |
| {"current_steps": 1505, "total_steps": 8260, "loss": 0.1109, "lr": 4.898081998595555e-05, "epoch": 1.8220338983050848, "percentage": 18.22, "elapsed_time": "0:05:30", "remaining_time": "0:24:44", "throughput": 1870.6, "total_tokens": 618736} |
| {"current_steps": 1510, "total_steps": 8260, "loss": 0.0927, "lr": 4.8965837288678253e-05, "epoch": 1.828087167070218, "percentage": 18.28, "elapsed_time": "0:05:31", "remaining_time": "0:24:43", "throughput": 1871.48, "total_tokens": 620944} |
| {"current_steps": 1515, "total_steps": 8260, "loss": 0.1174, "lr": 4.895074759058601e-05, "epoch": 1.834140435835351, "percentage": 18.34, "elapsed_time": "0:05:32", "remaining_time": "0:24:41", "throughput": 1872.24, "total_tokens": 623120} |
| {"current_steps": 1520, "total_steps": 8260, "loss": 0.0706, "lr": 4.893555095905014e-05, "epoch": 1.8401937046004844, "percentage": 18.4, "elapsed_time": "0:05:33", "remaining_time": "0:24:40", "throughput": 1872.58, "total_tokens": 625136} |
| {"current_steps": 1525, "total_steps": 8260, "loss": 0.0895, "lr": 4.892024746191939e-05, "epoch": 1.8462469733656173, "percentage": 18.46, "elapsed_time": "0:05:34", "remaining_time": "0:24:38", "throughput": 1873.1, "total_tokens": 627216} |
| {"current_steps": 1530, "total_steps": 8260, "loss": 0.0849, "lr": 4.890483716751961e-05, "epoch": 1.8523002421307506, "percentage": 18.52, "elapsed_time": "0:05:35", "remaining_time": "0:24:37", "throughput": 1873.51, "total_tokens": 629136} |
| {"current_steps": 1535, "total_steps": 8260, "loss": 0.1762, "lr": 4.888932014465352e-05, "epoch": 1.8583535108958837, "percentage": 18.58, "elapsed_time": "0:05:36", "remaining_time": "0:24:35", "throughput": 1874.03, "total_tokens": 631216} |
| {"current_steps": 1540, "total_steps": 8260, "loss": 0.0629, "lr": 4.8873696462600303e-05, "epoch": 1.8644067796610169, "percentage": 18.64, "elapsed_time": "0:05:37", "remaining_time": "0:24:34", "throughput": 1874.65, "total_tokens": 633328} |
| {"current_steps": 1545, "total_steps": 8260, "loss": 0.2325, "lr": 4.8857966191115365e-05, "epoch": 1.8704600484261502, "percentage": 18.7, "elapsed_time": "0:05:38", "remaining_time": "0:24:32", "throughput": 1875.31, "total_tokens": 635440} |
| {"current_steps": 1550, "total_steps": 8260, "loss": 0.1899, "lr": 4.884212940043001e-05, "epoch": 1.8765133171912833, "percentage": 18.77, "elapsed_time": "0:05:39", "remaining_time": "0:24:31", "throughput": 1875.69, "total_tokens": 637360} |
| {"current_steps": 1555, "total_steps": 8260, "loss": 0.2543, "lr": 4.882618616125111e-05, "epoch": 1.8825665859564165, "percentage": 18.83, "elapsed_time": "0:05:40", "remaining_time": "0:24:29", "throughput": 1876.07, "total_tokens": 639280} |
| {"current_steps": 1560, "total_steps": 8260, "loss": 0.2006, "lr": 4.881013654476081e-05, "epoch": 1.8886198547215496, "percentage": 18.89, "elapsed_time": "0:05:41", "remaining_time": "0:24:27", "throughput": 1876.52, "total_tokens": 641328} |
| {"current_steps": 1565, "total_steps": 8260, "loss": 0.2019, "lr": 4.8793980622616195e-05, "epoch": 1.8946731234866827, "percentage": 18.95, "elapsed_time": "0:05:42", "remaining_time": "0:24:26", "throughput": 1876.97, "total_tokens": 643376} |
| {"current_steps": 1570, "total_steps": 8260, "loss": 0.0908, "lr": 4.877771846694897e-05, "epoch": 1.900726392251816, "percentage": 19.01, "elapsed_time": "0:05:43", "remaining_time": "0:24:24", "throughput": 1877.57, "total_tokens": 645488} |
| {"current_steps": 1575, "total_steps": 8260, "loss": 0.142, "lr": 4.876135015036515e-05, "epoch": 1.9067796610169492, "percentage": 19.07, "elapsed_time": "0:05:44", "remaining_time": "0:24:23", "throughput": 1878.18, "total_tokens": 647600} |
| {"current_steps": 1580, "total_steps": 8260, "loss": 0.1755, "lr": 4.874487574594473e-05, "epoch": 1.9128329297820823, "percentage": 19.13, "elapsed_time": "0:05:45", "remaining_time": "0:24:21", "throughput": 1878.74, "total_tokens": 649648} |
| {"current_steps": 1585, "total_steps": 8260, "loss": 0.2109, "lr": 4.872829532724136e-05, "epoch": 1.9188861985472156, "percentage": 19.19, "elapsed_time": "0:05:46", "remaining_time": "0:24:20", "throughput": 1879.37, "total_tokens": 651760} |
| {"current_steps": 1590, "total_steps": 8260, "loss": 0.1727, "lr": 4.8711608968282e-05, "epoch": 1.9249394673123486, "percentage": 19.25, "elapsed_time": "0:05:47", "remaining_time": "0:24:19", "throughput": 1879.93, "total_tokens": 653840} |
| {"current_steps": 1595, "total_steps": 8260, "loss": 0.1873, "lr": 4.8694816743566616e-05, "epoch": 1.930992736077482, "percentage": 19.31, "elapsed_time": "0:05:48", "remaining_time": "0:24:17", "throughput": 1880.5, "total_tokens": 655888} |
| {"current_steps": 1600, "total_steps": 8260, "loss": 0.0814, "lr": 4.867791872806785e-05, "epoch": 1.937046004842615, "percentage": 19.37, "elapsed_time": "0:05:49", "remaining_time": "0:24:15", "throughput": 1880.8, "total_tokens": 657776} |
| {"current_steps": 1605, "total_steps": 8260, "loss": 0.1707, "lr": 4.8660914997230624e-05, "epoch": 1.9430992736077481, "percentage": 19.43, "elapsed_time": "0:05:50", "remaining_time": "0:24:14", "throughput": 1881.51, "total_tokens": 659952} |
| {"current_steps": 1610, "total_steps": 8260, "loss": 0.1737, "lr": 4.8643805626971894e-05, "epoch": 1.9491525423728815, "percentage": 19.49, "elapsed_time": "0:05:51", "remaining_time": "0:24:12", "throughput": 1881.97, "total_tokens": 661968} |
| {"current_steps": 1615, "total_steps": 8260, "loss": 0.1599, "lr": 4.862659069368026e-05, "epoch": 1.9552058111380144, "percentage": 19.55, "elapsed_time": "0:05:52", "remaining_time": "0:24:11", "throughput": 1882.45, "total_tokens": 664048} |
| {"current_steps": 1620, "total_steps": 8260, "loss": 0.1398, "lr": 4.8609270274215614e-05, "epoch": 1.9612590799031477, "percentage": 19.61, "elapsed_time": "0:05:53", "remaining_time": "0:24:09", "throughput": 1882.67, "total_tokens": 665968} |
| {"current_steps": 1625, "total_steps": 8260, "loss": 0.1459, "lr": 4.859184444590882e-05, "epoch": 1.9673123486682809, "percentage": 19.67, "elapsed_time": "0:05:54", "remaining_time": "0:24:08", "throughput": 1882.95, "total_tokens": 667920} |
| {"current_steps": 1630, "total_steps": 8260, "loss": 0.0917, "lr": 4.857431328656137e-05, "epoch": 1.973365617433414, "percentage": 19.73, "elapsed_time": "0:05:55", "remaining_time": "0:24:06", "throughput": 1883.47, "total_tokens": 669968} |
| {"current_steps": 1635, "total_steps": 8260, "loss": 0.0853, "lr": 4.855667687444504e-05, "epoch": 1.9794188861985473, "percentage": 19.79, "elapsed_time": "0:05:56", "remaining_time": "0:24:05", "throughput": 1883.84, "total_tokens": 672016} |
| {"current_steps": 1640, "total_steps": 8260, "loss": 0.098, "lr": 4.85389352883015e-05, "epoch": 1.9854721549636802, "percentage": 19.85, "elapsed_time": "0:05:57", "remaining_time": "0:24:03", "throughput": 1884.42, "total_tokens": 674096} |
| {"current_steps": 1645, "total_steps": 8260, "loss": 0.1238, "lr": 4.8521088607342016e-05, "epoch": 1.9915254237288136, "percentage": 19.92, "elapsed_time": "0:05:58", "remaining_time": "0:24:02", "throughput": 1884.96, "total_tokens": 676208} |
| {"current_steps": 1650, "total_steps": 8260, "loss": 0.0214, "lr": 4.850313691124707e-05, "epoch": 1.9975786924939467, "percentage": 19.98, "elapsed_time": "0:05:59", "remaining_time": "0:24:01", "throughput": 1885.36, "total_tokens": 678256} |
| {"current_steps": 1652, "total_steps": 8260, "eval_loss": 0.13030049204826355, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:06:08", "remaining_time": "0:24:32", "throughput": 1843.41, "total_tokens": 678688} |
| {"current_steps": 1655, "total_steps": 8260, "loss": 0.1162, "lr": 4.8485080280166006e-05, "epoch": 2.00363196125908, "percentage": 20.04, "elapsed_time": "0:06:10", "remaining_time": "0:24:38", "throughput": 1835.86, "total_tokens": 679936} |
| {"current_steps": 1660, "total_steps": 8260, "loss": 0.1483, "lr": 4.8466918794716666e-05, "epoch": 2.009685230024213, "percentage": 20.1, "elapsed_time": "0:06:11", "remaining_time": "0:24:36", "throughput": 1836.5, "total_tokens": 681984} |
| {"current_steps": 1665, "total_steps": 8260, "loss": 0.1106, "lr": 4.8448652535985045e-05, "epoch": 2.015738498789346, "percentage": 20.16, "elapsed_time": "0:06:12", "remaining_time": "0:24:34", "throughput": 1837.17, "total_tokens": 684096} |
| {"current_steps": 1670, "total_steps": 8260, "loss": 0.0845, "lr": 4.8430281585524926e-05, "epoch": 2.0217917675544794, "percentage": 20.22, "elapsed_time": "0:06:13", "remaining_time": "0:24:33", "throughput": 1837.62, "total_tokens": 686016} |
| {"current_steps": 1675, "total_steps": 8260, "loss": 0.1759, "lr": 4.841180602535751e-05, "epoch": 2.0278450363196128, "percentage": 20.28, "elapsed_time": "0:06:14", "remaining_time": "0:24:31", "throughput": 1838.11, "total_tokens": 687904} |
| {"current_steps": 1680, "total_steps": 8260, "loss": 0.1656, "lr": 4.839322593797104e-05, "epoch": 2.0338983050847457, "percentage": 20.34, "elapsed_time": "0:06:15", "remaining_time": "0:24:29", "throughput": 1838.58, "total_tokens": 689888} |
| {"current_steps": 1685, "total_steps": 8260, "loss": 0.074, "lr": 4.837454140632045e-05, "epoch": 2.039951573849879, "percentage": 20.4, "elapsed_time": "0:06:16", "remaining_time": "0:24:27", "throughput": 1838.91, "total_tokens": 691808} |
| {"current_steps": 1690, "total_steps": 8260, "loss": 0.1083, "lr": 4.8355752513826995e-05, "epoch": 2.046004842615012, "percentage": 20.46, "elapsed_time": "0:06:17", "remaining_time": "0:24:26", "throughput": 1839.37, "total_tokens": 693856} |
| {"current_steps": 1695, "total_steps": 8260, "loss": 0.161, "lr": 4.833685934437787e-05, "epoch": 2.0520581113801453, "percentage": 20.52, "elapsed_time": "0:06:18", "remaining_time": "0:24:24", "throughput": 1839.67, "total_tokens": 695680} |
| {"current_steps": 1700, "total_steps": 8260, "loss": 0.1389, "lr": 4.831786198232583e-05, "epoch": 2.0581113801452786, "percentage": 20.58, "elapsed_time": "0:06:19", "remaining_time": "0:24:23", "throughput": 1840.32, "total_tokens": 697792} |
| {"current_steps": 1705, "total_steps": 8260, "loss": 0.2292, "lr": 4.8298760512488824e-05, "epoch": 2.0641646489104115, "percentage": 20.64, "elapsed_time": "0:06:20", "remaining_time": "0:24:21", "throughput": 1840.76, "total_tokens": 699712} |
| {"current_steps": 1710, "total_steps": 8260, "loss": 0.0851, "lr": 4.827955502014963e-05, "epoch": 2.070217917675545, "percentage": 20.7, "elapsed_time": "0:06:21", "remaining_time": "0:24:19", "throughput": 1841.31, "total_tokens": 701792} |
| {"current_steps": 1715, "total_steps": 8260, "loss": 0.1261, "lr": 4.826024559105542e-05, "epoch": 2.0762711864406778, "percentage": 20.76, "elapsed_time": "0:06:22", "remaining_time": "0:24:18", "throughput": 1842.07, "total_tokens": 703936} |
| {"current_steps": 1720, "total_steps": 8260, "loss": 0.1487, "lr": 4.8240832311417465e-05, "epoch": 2.082324455205811, "percentage": 20.82, "elapsed_time": "0:06:23", "remaining_time": "0:24:16", "throughput": 1842.59, "total_tokens": 705952} |
| {"current_steps": 1725, "total_steps": 8260, "loss": 0.1542, "lr": 4.822131526791065e-05, "epoch": 2.0883777239709445, "percentage": 20.88, "elapsed_time": "0:06:24", "remaining_time": "0:24:15", "throughput": 1843.21, "total_tokens": 708064} |
| {"current_steps": 1730, "total_steps": 8260, "loss": 0.1238, "lr": 4.820169454767318e-05, "epoch": 2.0944309927360774, "percentage": 20.94, "elapsed_time": "0:06:25", "remaining_time": "0:24:13", "throughput": 1843.55, "total_tokens": 709952} |
| {"current_steps": 1735, "total_steps": 8260, "loss": 0.0884, "lr": 4.8181970238306104e-05, "epoch": 2.1004842615012107, "percentage": 21.0, "elapsed_time": "0:06:26", "remaining_time": "0:24:12", "throughput": 1844.1, "total_tokens": 712032} |
| {"current_steps": 1740, "total_steps": 8260, "loss": 0.0498, "lr": 4.816214242787302e-05, "epoch": 2.106537530266344, "percentage": 21.07, "elapsed_time": "0:06:27", "remaining_time": "0:24:10", "throughput": 1844.62, "total_tokens": 714048} |
| {"current_steps": 1745, "total_steps": 8260, "loss": 0.1447, "lr": 4.814221120489958e-05, "epoch": 2.112590799031477, "percentage": 21.13, "elapsed_time": "0:06:28", "remaining_time": "0:24:09", "throughput": 1845.33, "total_tokens": 716224} |
| {"current_steps": 1750, "total_steps": 8260, "loss": 0.1725, "lr": 4.812217665837316e-05, "epoch": 2.1186440677966103, "percentage": 21.19, "elapsed_time": "0:06:29", "remaining_time": "0:24:07", "throughput": 1845.57, "total_tokens": 718144} |
| {"current_steps": 1755, "total_steps": 8260, "loss": 0.1138, "lr": 4.810203887774247e-05, "epoch": 2.124697336561743, "percentage": 21.25, "elapsed_time": "0:06:30", "remaining_time": "0:24:06", "throughput": 1846.24, "total_tokens": 720288} |
| {"current_steps": 1760, "total_steps": 8260, "loss": 0.2025, "lr": 4.808179795291712e-05, "epoch": 2.1307506053268765, "percentage": 21.31, "elapsed_time": "0:06:31", "remaining_time": "0:24:04", "throughput": 1846.89, "total_tokens": 722368} |
| {"current_steps": 1765, "total_steps": 8260, "loss": 0.1678, "lr": 4.8061453974267195e-05, "epoch": 2.13680387409201, "percentage": 21.37, "elapsed_time": "0:06:32", "remaining_time": "0:24:03", "throughput": 1847.49, "total_tokens": 724480} |
| {"current_steps": 1770, "total_steps": 8260, "loss": 0.0949, "lr": 4.804100703262294e-05, "epoch": 2.142857142857143, "percentage": 21.43, "elapsed_time": "0:06:33", "remaining_time": "0:24:01", "throughput": 1848.45, "total_tokens": 726784} |
| {"current_steps": 1775, "total_steps": 8260, "loss": 0.1672, "lr": 4.8020457219274266e-05, "epoch": 2.148910411622276, "percentage": 21.49, "elapsed_time": "0:06:34", "remaining_time": "0:24:00", "throughput": 1848.85, "total_tokens": 728768} |
| {"current_steps": 1780, "total_steps": 8260, "loss": 0.1165, "lr": 4.799980462597039e-05, "epoch": 2.154963680387409, "percentage": 21.55, "elapsed_time": "0:06:35", "remaining_time": "0:23:58", "throughput": 1849.3, "total_tokens": 730816} |
| {"current_steps": 1785, "total_steps": 8260, "loss": 0.0833, "lr": 4.7979049344919416e-05, "epoch": 2.1610169491525424, "percentage": 21.61, "elapsed_time": "0:06:36", "remaining_time": "0:23:57", "throughput": 1849.78, "total_tokens": 732896} |
| {"current_steps": 1790, "total_steps": 8260, "loss": 0.2183, "lr": 4.795819146878792e-05, "epoch": 2.1670702179176757, "percentage": 21.67, "elapsed_time": "0:06:37", "remaining_time": "0:23:55", "throughput": 1850.06, "total_tokens": 734816} |
| {"current_steps": 1795, "total_steps": 8260, "loss": 0.1487, "lr": 4.7937231090700516e-05, "epoch": 2.1731234866828086, "percentage": 21.73, "elapsed_time": "0:06:38", "remaining_time": "0:23:54", "throughput": 1850.36, "total_tokens": 736800} |
| {"current_steps": 1800, "total_steps": 8260, "loss": 0.1569, "lr": 4.7916168304239496e-05, "epoch": 2.179176755447942, "percentage": 21.79, "elapsed_time": "0:06:39", "remaining_time": "0:23:52", "throughput": 1851.0, "total_tokens": 738944} |
| {"current_steps": 1805, "total_steps": 8260, "loss": 0.0978, "lr": 4.789500320344435e-05, "epoch": 2.185230024213075, "percentage": 21.85, "elapsed_time": "0:06:40", "remaining_time": "0:23:51", "throughput": 1851.78, "total_tokens": 741184} |
| {"current_steps": 1810, "total_steps": 8260, "loss": 0.0944, "lr": 4.787373588281138e-05, "epoch": 2.1912832929782082, "percentage": 21.91, "elapsed_time": "0:06:41", "remaining_time": "0:23:49", "throughput": 1852.08, "total_tokens": 743136} |
| {"current_steps": 1815, "total_steps": 8260, "loss": 0.0583, "lr": 4.785236643729327e-05, "epoch": 2.1973365617433416, "percentage": 21.97, "elapsed_time": "0:06:42", "remaining_time": "0:23:48", "throughput": 1852.42, "total_tokens": 745152} |
| {"current_steps": 1820, "total_steps": 8260, "loss": 0.2117, "lr": 4.7830894962298675e-05, "epoch": 2.2033898305084745, "percentage": 22.03, "elapsed_time": "0:06:43", "remaining_time": "0:23:46", "throughput": 1852.81, "total_tokens": 747200} |
| {"current_steps": 1825, "total_steps": 8260, "loss": 0.1821, "lr": 4.7809321553691764e-05, "epoch": 2.209443099273608, "percentage": 22.09, "elapsed_time": "0:06:44", "remaining_time": "0:23:45", "throughput": 1853.5, "total_tokens": 749344} |
| {"current_steps": 1830, "total_steps": 8260, "loss": 0.1639, "lr": 4.778764630779183e-05, "epoch": 2.2154963680387407, "percentage": 22.15, "elapsed_time": "0:06:45", "remaining_time": "0:23:44", "throughput": 1853.92, "total_tokens": 751360} |
| {"current_steps": 1835, "total_steps": 8260, "loss": 0.1186, "lr": 4.7765869321372836e-05, "epoch": 2.221549636803874, "percentage": 22.22, "elapsed_time": "0:06:46", "remaining_time": "0:23:42", "throughput": 1854.35, "total_tokens": 753408} |
| {"current_steps": 1840, "total_steps": 8260, "loss": 0.141, "lr": 4.774399069166296e-05, "epoch": 2.2276029055690074, "percentage": 22.28, "elapsed_time": "0:06:47", "remaining_time": "0:23:41", "throughput": 1854.79, "total_tokens": 755456} |
| {"current_steps": 1845, "total_steps": 8260, "loss": 0.148, "lr": 4.772201051634426e-05, "epoch": 2.2336561743341403, "percentage": 22.34, "elapsed_time": "0:06:48", "remaining_time": "0:23:39", "throughput": 1855.22, "total_tokens": 757504} |
| {"current_steps": 1850, "total_steps": 8260, "loss": 0.1751, "lr": 4.769992889355208e-05, "epoch": 2.2397094430992737, "percentage": 22.4, "elapsed_time": "0:06:49", "remaining_time": "0:23:38", "throughput": 1855.71, "total_tokens": 759584} |
| {"current_steps": 1855, "total_steps": 8260, "loss": 0.087, "lr": 4.767774592187475e-05, "epoch": 2.2457627118644066, "percentage": 22.46, "elapsed_time": "0:06:50", "remaining_time": "0:23:36", "throughput": 1856.22, "total_tokens": 761632} |
| {"current_steps": 1860, "total_steps": 8260, "loss": 0.1973, "lr": 4.76554617003531e-05, "epoch": 2.25181598062954, "percentage": 22.52, "elapsed_time": "0:06:51", "remaining_time": "0:23:35", "throughput": 1856.76, "total_tokens": 763648} |
| {"current_steps": 1865, "total_steps": 8260, "loss": 0.1566, "lr": 4.763307632847998e-05, "epoch": 2.2578692493946733, "percentage": 22.58, "elapsed_time": "0:06:52", "remaining_time": "0:23:33", "throughput": 1857.38, "total_tokens": 765696} |
| {"current_steps": 1870, "total_steps": 8260, "loss": 0.1483, "lr": 4.761058990619986e-05, "epoch": 2.263922518159806, "percentage": 22.64, "elapsed_time": "0:06:53", "remaining_time": "0:23:32", "throughput": 1857.95, "total_tokens": 767776} |
| {"current_steps": 1875, "total_steps": 8260, "loss": 0.1479, "lr": 4.7588002533908405e-05, "epoch": 2.2699757869249395, "percentage": 22.7, "elapsed_time": "0:06:54", "remaining_time": "0:23:30", "throughput": 1858.42, "total_tokens": 769856} |
| {"current_steps": 1880, "total_steps": 8260, "loss": 0.1302, "lr": 4.756531431245195e-05, "epoch": 2.2760290556900724, "percentage": 22.76, "elapsed_time": "0:06:55", "remaining_time": "0:23:29", "throughput": 1858.91, "total_tokens": 771904} |
| {"current_steps": 1885, "total_steps": 8260, "loss": 0.0498, "lr": 4.75425253431271e-05, "epoch": 2.2820823244552058, "percentage": 22.82, "elapsed_time": "0:06:56", "remaining_time": "0:23:27", "throughput": 1859.5, "total_tokens": 774016} |
| {"current_steps": 1890, "total_steps": 8260, "loss": 0.2686, "lr": 4.7519635727680286e-05, "epoch": 2.288135593220339, "percentage": 22.88, "elapsed_time": "0:06:57", "remaining_time": "0:23:26", "throughput": 1860.0, "total_tokens": 776000} |
| {"current_steps": 1895, "total_steps": 8260, "loss": 0.1204, "lr": 4.749664556830731e-05, "epoch": 2.294188861985472, "percentage": 22.94, "elapsed_time": "0:06:58", "remaining_time": "0:23:24", "throughput": 1860.64, "total_tokens": 778208} |
| {"current_steps": 1900, "total_steps": 8260, "loss": 0.1188, "lr": 4.747355496765283e-05, "epoch": 2.3002421307506054, "percentage": 23.0, "elapsed_time": "0:06:59", "remaining_time": "0:23:23", "throughput": 1861.0, "total_tokens": 780128} |
| {"current_steps": 1905, "total_steps": 8260, "loss": 0.1179, "lr": 4.745036402880999e-05, "epoch": 2.3062953995157383, "percentage": 23.06, "elapsed_time": "0:07:00", "remaining_time": "0:23:21", "throughput": 1861.35, "total_tokens": 782112} |
| {"current_steps": 1910, "total_steps": 8260, "loss": 0.1186, "lr": 4.7427072855319886e-05, "epoch": 2.3123486682808716, "percentage": 23.12, "elapsed_time": "0:07:01", "remaining_time": "0:23:20", "throughput": 1861.93, "total_tokens": 784256} |
| {"current_steps": 1915, "total_steps": 8260, "loss": 0.0896, "lr": 4.740368155117116e-05, "epoch": 2.318401937046005, "percentage": 23.18, "elapsed_time": "0:07:02", "remaining_time": "0:23:18", "throughput": 1862.24, "total_tokens": 786272} |
| {"current_steps": 1920, "total_steps": 8260, "loss": 0.0842, "lr": 4.7380190220799484e-05, "epoch": 2.324455205811138, "percentage": 23.24, "elapsed_time": "0:07:03", "remaining_time": "0:23:17", "throughput": 1862.69, "total_tokens": 788352} |
| {"current_steps": 1925, "total_steps": 8260, "loss": 0.1751, "lr": 4.735659896908713e-05, "epoch": 2.330508474576271, "percentage": 23.31, "elapsed_time": "0:07:04", "remaining_time": "0:23:16", "throughput": 1863.0, "total_tokens": 790368} |
| {"current_steps": 1930, "total_steps": 8260, "loss": 0.2163, "lr": 4.73329079013625e-05, "epoch": 2.3365617433414045, "percentage": 23.37, "elapsed_time": "0:07:05", "remaining_time": "0:23:14", "throughput": 1863.4, "total_tokens": 792320} |
| {"current_steps": 1935, "total_steps": 8260, "loss": 0.0566, "lr": 4.730911712339964e-05, "epoch": 2.3426150121065374, "percentage": 23.43, "elapsed_time": "0:07:06", "remaining_time": "0:23:13", "throughput": 1863.7, "total_tokens": 794272} |
| {"current_steps": 1940, "total_steps": 8260, "loss": 0.2005, "lr": 4.728522674141776e-05, "epoch": 2.348668280871671, "percentage": 23.49, "elapsed_time": "0:07:07", "remaining_time": "0:23:11", "throughput": 1864.37, "total_tokens": 796416} |
| {"current_steps": 1945, "total_steps": 8260, "loss": 0.0867, "lr": 4.7261236862080805e-05, "epoch": 2.3547215496368037, "percentage": 23.55, "elapsed_time": "0:07:08", "remaining_time": "0:23:10", "throughput": 1864.92, "total_tokens": 798432} |
| {"current_steps": 1950, "total_steps": 8260, "loss": 0.1207, "lr": 4.723714759249692e-05, "epoch": 2.360774818401937, "percentage": 23.61, "elapsed_time": "0:07:09", "remaining_time": "0:23:08", "throughput": 1865.3, "total_tokens": 800480} |
| {"current_steps": 1955, "total_steps": 8260, "loss": 0.17, "lr": 4.721295904021802e-05, "epoch": 2.3668280871670704, "percentage": 23.67, "elapsed_time": "0:07:10", "remaining_time": "0:23:07", "throughput": 1865.57, "total_tokens": 802432} |
| {"current_steps": 1960, "total_steps": 8260, "loss": 0.1169, "lr": 4.718867131323927e-05, "epoch": 2.3728813559322033, "percentage": 23.73, "elapsed_time": "0:07:11", "remaining_time": "0:23:05", "throughput": 1865.81, "total_tokens": 804352} |
| {"current_steps": 1965, "total_steps": 8260, "loss": 0.1278, "lr": 4.7164284519998644e-05, "epoch": 2.3789346246973366, "percentage": 23.79, "elapsed_time": "0:07:12", "remaining_time": "0:23:04", "throughput": 1866.14, "total_tokens": 806272} |
| {"current_steps": 1970, "total_steps": 8260, "loss": 0.1337, "lr": 4.71397987693764e-05, "epoch": 2.38498789346247, "percentage": 23.85, "elapsed_time": "0:07:13", "remaining_time": "0:23:02", "throughput": 1866.56, "total_tokens": 808352} |
| {"current_steps": 1975, "total_steps": 8260, "loss": 0.0696, "lr": 4.711521417069462e-05, "epoch": 2.391041162227603, "percentage": 23.91, "elapsed_time": "0:07:14", "remaining_time": "0:23:01", "throughput": 1866.96, "total_tokens": 810304} |
| {"current_steps": 1980, "total_steps": 8260, "loss": 0.0967, "lr": 4.709053083371672e-05, "epoch": 2.3970944309927362, "percentage": 23.97, "elapsed_time": "0:07:15", "remaining_time": "0:22:59", "throughput": 1867.39, "total_tokens": 812384} |
| {"current_steps": 1985, "total_steps": 8260, "loss": 0.1226, "lr": 4.706574886864696e-05, "epoch": 2.403147699757869, "percentage": 24.03, "elapsed_time": "0:07:16", "remaining_time": "0:22:58", "throughput": 1867.78, "total_tokens": 814400} |
| {"current_steps": 1990, "total_steps": 8260, "loss": 0.1083, "lr": 4.7040868386129935e-05, "epoch": 2.4092009685230025, "percentage": 24.09, "elapsed_time": "0:07:17", "remaining_time": "0:22:56", "throughput": 1868.16, "total_tokens": 816448} |
| {"current_steps": 1995, "total_steps": 8260, "loss": 0.1296, "lr": 4.701588949725009e-05, "epoch": 2.415254237288136, "percentage": 24.15, "elapsed_time": "0:07:18", "remaining_time": "0:22:55", "throughput": 1868.85, "total_tokens": 818624} |
| {"current_steps": 2000, "total_steps": 8260, "loss": 0.0944, "lr": 4.699081231353124e-05, "epoch": 2.4213075060532687, "percentage": 24.21, "elapsed_time": "0:07:19", "remaining_time": "0:22:54", "throughput": 1869.25, "total_tokens": 820640} |
| {"current_steps": 2005, "total_steps": 8260, "loss": 0.1974, "lr": 4.696563694693605e-05, "epoch": 2.427360774818402, "percentage": 24.27, "elapsed_time": "0:07:20", "remaining_time": "0:22:52", "throughput": 1869.66, "total_tokens": 822752} |
| {"current_steps": 2010, "total_steps": 8260, "loss": 0.1785, "lr": 4.694036350986556e-05, "epoch": 2.433414043583535, "percentage": 24.33, "elapsed_time": "0:07:21", "remaining_time": "0:22:51", "throughput": 1870.04, "total_tokens": 824832} |
| {"current_steps": 2015, "total_steps": 8260, "loss": 0.1265, "lr": 4.6914992115158634e-05, "epoch": 2.4394673123486683, "percentage": 24.39, "elapsed_time": "0:07:22", "remaining_time": "0:22:50", "throughput": 1870.37, "total_tokens": 826816} |
| {"current_steps": 2020, "total_steps": 8260, "loss": 0.1277, "lr": 4.688952287609152e-05, "epoch": 2.4455205811138017, "percentage": 24.46, "elapsed_time": "0:07:23", "remaining_time": "0:22:48", "throughput": 1870.56, "total_tokens": 828672} |
| {"current_steps": 2025, "total_steps": 8260, "loss": 0.0989, "lr": 4.686395590637732e-05, "epoch": 2.4515738498789346, "percentage": 24.52, "elapsed_time": "0:07:24", "remaining_time": "0:22:47", "throughput": 1870.94, "total_tokens": 830752} |
| {"current_steps": 2030, "total_steps": 8260, "loss": 0.159, "lr": 4.683829132016544e-05, "epoch": 2.457627118644068, "percentage": 24.58, "elapsed_time": "0:07:25", "remaining_time": "0:22:45", "throughput": 1871.34, "total_tokens": 832832} |
| {"current_steps": 2035, "total_steps": 8260, "loss": 0.1517, "lr": 4.6812529232041144e-05, "epoch": 2.463680387409201, "percentage": 24.64, "elapsed_time": "0:07:26", "remaining_time": "0:22:44", "throughput": 1871.64, "total_tokens": 834848} |
| {"current_steps": 2040, "total_steps": 8260, "loss": 0.0649, "lr": 4.6786669757025016e-05, "epoch": 2.469733656174334, "percentage": 24.7, "elapsed_time": "0:07:27", "remaining_time": "0:22:43", "throughput": 1872.02, "total_tokens": 836864} |
| {"current_steps": 2045, "total_steps": 8260, "loss": 0.1772, "lr": 4.676071301057243e-05, "epoch": 2.4757869249394675, "percentage": 24.76, "elapsed_time": "0:07:27", "remaining_time": "0:22:41", "throughput": 1872.43, "total_tokens": 838784} |
| {"current_steps": 2050, "total_steps": 8260, "loss": 0.092, "lr": 4.673465910857306e-05, "epoch": 2.4818401937046004, "percentage": 24.82, "elapsed_time": "0:07:28", "remaining_time": "0:22:40", "throughput": 1872.86, "total_tokens": 840832} |
| {"current_steps": 2055, "total_steps": 8260, "loss": 0.0931, "lr": 4.670850816735035e-05, "epoch": 2.4878934624697338, "percentage": 24.88, "elapsed_time": "0:07:29", "remaining_time": "0:22:38", "throughput": 1873.05, "total_tokens": 842752} |
| {"current_steps": 2060, "total_steps": 8260, "loss": 0.1937, "lr": 4.668226030366101e-05, "epoch": 2.4939467312348667, "percentage": 24.94, "elapsed_time": "0:07:30", "remaining_time": "0:22:37", "throughput": 1873.59, "total_tokens": 844928} |
| {"current_steps": 2065, "total_steps": 8260, "loss": 0.1076, "lr": 4.665591563469445e-05, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "0:07:31", "remaining_time": "0:22:35", "throughput": 1874.13, "total_tokens": 847104} |
| {"current_steps": 2065, "total_steps": 8260, "eval_loss": 0.12984032928943634, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "0:07:40", "remaining_time": "0:23:00", "throughput": 1841.1, "total_tokens": 847104} |
| {"current_steps": 2070, "total_steps": 8260, "loss": 0.0915, "lr": 4.662947427807231e-05, "epoch": 2.5060532687651333, "percentage": 25.06, "elapsed_time": "0:07:42", "remaining_time": "0:23:02", "throughput": 1836.09, "total_tokens": 849120} |
| {"current_steps": 2075, "total_steps": 8260, "loss": 0.1028, "lr": 4.6602936351847924e-05, "epoch": 2.5121065375302662, "percentage": 25.12, "elapsed_time": "0:07:43", "remaining_time": "0:23:01", "throughput": 1836.74, "total_tokens": 851296} |
| {"current_steps": 2080, "total_steps": 8260, "loss": 0.2251, "lr": 4.657630197450577e-05, "epoch": 2.5181598062953996, "percentage": 25.18, "elapsed_time": "0:07:44", "remaining_time": "0:23:00", "throughput": 1837.13, "total_tokens": 853344} |
| {"current_steps": 2085, "total_steps": 8260, "loss": 0.1715, "lr": 4.6549571264960945e-05, "epoch": 2.5242130750605325, "percentage": 25.24, "elapsed_time": "0:07:45", "remaining_time": "0:22:58", "throughput": 1837.38, "total_tokens": 855264} |
| {"current_steps": 2090, "total_steps": 8260, "loss": 0.1721, "lr": 4.652274434255866e-05, "epoch": 2.530266343825666, "percentage": 25.3, "elapsed_time": "0:07:46", "remaining_time": "0:22:56", "throughput": 1837.69, "total_tokens": 857152} |
| {"current_steps": 2095, "total_steps": 8260, "loss": 0.1249, "lr": 4.6495821327073675e-05, "epoch": 2.536319612590799, "percentage": 25.36, "elapsed_time": "0:07:47", "remaining_time": "0:22:55", "throughput": 1838.22, "total_tokens": 859232} |
| {"current_steps": 2100, "total_steps": 8260, "loss": 0.1052, "lr": 4.6468802338709783e-05, "epoch": 2.542372881355932, "percentage": 25.42, "elapsed_time": "0:07:48", "remaining_time": "0:22:54", "throughput": 1838.68, "total_tokens": 861312} |
| {"current_steps": 2105, "total_steps": 8260, "loss": 0.1612, "lr": 4.644168749809929e-05, "epoch": 2.5484261501210654, "percentage": 25.48, "elapsed_time": "0:07:49", "remaining_time": "0:22:52", "throughput": 1839.42, "total_tokens": 863552} |
| {"current_steps": 2110, "total_steps": 8260, "loss": 0.1182, "lr": 4.6414476926302406e-05, "epoch": 2.5544794188861983, "percentage": 25.54, "elapsed_time": "0:07:50", "remaining_time": "0:22:51", "throughput": 1839.86, "total_tokens": 865568} |
| {"current_steps": 2115, "total_steps": 8260, "loss": 0.1087, "lr": 4.638717074480682e-05, "epoch": 2.5605326876513317, "percentage": 25.61, "elapsed_time": "0:07:51", "remaining_time": "0:22:49", "throughput": 1840.22, "total_tokens": 867488} |
| {"current_steps": 2120, "total_steps": 8260, "loss": 0.0899, "lr": 4.6359769075527026e-05, "epoch": 2.566585956416465, "percentage": 25.67, "elapsed_time": "0:07:52", "remaining_time": "0:22:48", "throughput": 1840.64, "total_tokens": 869504} |
| {"current_steps": 2125, "total_steps": 8260, "loss": 0.0571, "lr": 4.6332272040803895e-05, "epoch": 2.572639225181598, "percentage": 25.73, "elapsed_time": "0:07:53", "remaining_time": "0:22:46", "throughput": 1841.25, "total_tokens": 871680} |
| {"current_steps": 2130, "total_steps": 8260, "loss": 0.0686, "lr": 4.630467976340405e-05, "epoch": 2.5786924939467313, "percentage": 25.79, "elapsed_time": "0:07:54", "remaining_time": "0:22:45", "throughput": 1841.96, "total_tokens": 873856} |
| {"current_steps": 2135, "total_steps": 8260, "loss": 0.1269, "lr": 4.6276992366519365e-05, "epoch": 2.584745762711864, "percentage": 25.85, "elapsed_time": "0:07:55", "remaining_time": "0:22:43", "throughput": 1842.44, "total_tokens": 875968} |
| {"current_steps": 2140, "total_steps": 8260, "loss": 0.1176, "lr": 4.624920997376637e-05, "epoch": 2.5907990314769975, "percentage": 25.91, "elapsed_time": "0:07:56", "remaining_time": "0:22:42", "throughput": 1843.06, "total_tokens": 878144} |
| {"current_steps": 2145, "total_steps": 8260, "loss": 0.1298, "lr": 4.622133270918576e-05, "epoch": 2.596852300242131, "percentage": 25.97, "elapsed_time": "0:07:57", "remaining_time": "0:22:41", "throughput": 1843.65, "total_tokens": 880320} |
| {"current_steps": 2150, "total_steps": 8260, "loss": 0.0948, "lr": 4.619336069724177e-05, "epoch": 2.6029055690072638, "percentage": 26.03, "elapsed_time": "0:07:58", "remaining_time": "0:22:39", "throughput": 1844.1, "total_tokens": 882400} |
| {"current_steps": 2155, "total_steps": 8260, "loss": 0.1303, "lr": 4.6165294062821696e-05, "epoch": 2.608958837772397, "percentage": 26.09, "elapsed_time": "0:07:59", "remaining_time": "0:22:38", "throughput": 1844.68, "total_tokens": 884512} |
| {"current_steps": 2160, "total_steps": 8260, "loss": 0.0698, "lr": 4.613713293123525e-05, "epoch": 2.61501210653753, "percentage": 26.15, "elapsed_time": "0:08:00", "remaining_time": "0:22:37", "throughput": 1845.07, "total_tokens": 886592} |
| {"current_steps": 2165, "total_steps": 8260, "loss": 0.0822, "lr": 4.610887742821408e-05, "epoch": 2.6210653753026634, "percentage": 26.21, "elapsed_time": "0:08:01", "remaining_time": "0:22:35", "throughput": 1845.39, "total_tokens": 888640} |
| {"current_steps": 2170, "total_steps": 8260, "loss": 0.1111, "lr": 4.608052767991118e-05, "epoch": 2.6271186440677967, "percentage": 26.27, "elapsed_time": "0:08:02", "remaining_time": "0:22:34", "throughput": 1846.04, "total_tokens": 890880} |
| {"current_steps": 2175, "total_steps": 8260, "loss": 0.0978, "lr": 4.605208381290029e-05, "epoch": 2.6331719128329296, "percentage": 26.33, "elapsed_time": "0:08:03", "remaining_time": "0:22:32", "throughput": 1846.42, "total_tokens": 892832} |
| {"current_steps": 2180, "total_steps": 8260, "loss": 0.124, "lr": 4.6023545954175406e-05, "epoch": 2.639225181598063, "percentage": 26.39, "elapsed_time": "0:08:04", "remaining_time": "0:22:31", "throughput": 1846.86, "total_tokens": 894912} |
| {"current_steps": 2185, "total_steps": 8260, "loss": 0.071, "lr": 4.599491423115014e-05, "epoch": 2.645278450363196, "percentage": 26.45, "elapsed_time": "0:08:05", "remaining_time": "0:22:29", "throughput": 1847.34, "total_tokens": 896960} |
| {"current_steps": 2190, "total_steps": 8260, "loss": 0.0596, "lr": 4.59661887716572e-05, "epoch": 2.651331719128329, "percentage": 26.51, "elapsed_time": "0:08:06", "remaining_time": "0:22:28", "throughput": 1847.54, "total_tokens": 898816} |
| {"current_steps": 2195, "total_steps": 8260, "loss": 0.0851, "lr": 4.5937369703947785e-05, "epoch": 2.6573849878934626, "percentage": 26.57, "elapsed_time": "0:08:07", "remaining_time": "0:22:27", "throughput": 1847.84, "total_tokens": 900832} |
| {"current_steps": 2200, "total_steps": 8260, "loss": 0.1357, "lr": 4.590845715669104e-05, "epoch": 2.663438256658596, "percentage": 26.63, "elapsed_time": "0:08:08", "remaining_time": "0:22:25", "throughput": 1848.3, "total_tokens": 902944} |
| {"current_steps": 2205, "total_steps": 8260, "loss": 0.1258, "lr": 4.5879451258973465e-05, "epoch": 2.669491525423729, "percentage": 26.69, "elapsed_time": "0:08:09", "remaining_time": "0:22:24", "throughput": 1848.75, "total_tokens": 905056} |
| {"current_steps": 2210, "total_steps": 8260, "loss": 0.095, "lr": 4.5850352140298356e-05, "epoch": 2.6755447941888617, "percentage": 26.76, "elapsed_time": "0:08:10", "remaining_time": "0:22:22", "throughput": 1849.32, "total_tokens": 907232} |
| {"current_steps": 2215, "total_steps": 8260, "loss": 0.1285, "lr": 4.582115993058519e-05, "epoch": 2.681598062953995, "percentage": 26.82, "elapsed_time": "0:08:11", "remaining_time": "0:22:21", "throughput": 1849.71, "total_tokens": 909248} |
| {"current_steps": 2220, "total_steps": 8260, "loss": 0.1192, "lr": 4.5791874760169095e-05, "epoch": 2.6876513317191284, "percentage": 26.88, "elapsed_time": "0:08:12", "remaining_time": "0:22:20", "throughput": 1850.01, "total_tokens": 911264} |
| {"current_steps": 2225, "total_steps": 8260, "loss": 0.1189, "lr": 4.5762496759800246e-05, "epoch": 2.6937046004842617, "percentage": 26.94, "elapsed_time": "0:08:13", "remaining_time": "0:22:18", "throughput": 1850.39, "total_tokens": 913216} |
| {"current_steps": 2230, "total_steps": 8260, "loss": 0.0983, "lr": 4.573302606064324e-05, "epoch": 2.6997578692493946, "percentage": 27.0, "elapsed_time": "0:08:14", "remaining_time": "0:22:17", "throughput": 1850.96, "total_tokens": 915328} |
| {"current_steps": 2235, "total_steps": 8260, "loss": 0.1138, "lr": 4.5703462794276574e-05, "epoch": 2.705811138014528, "percentage": 27.06, "elapsed_time": "0:08:15", "remaining_time": "0:22:15", "throughput": 1851.6, "total_tokens": 917568} |
| {"current_steps": 2240, "total_steps": 8260, "loss": 0.139, "lr": 4.567380709269205e-05, "epoch": 2.711864406779661, "percentage": 27.12, "elapsed_time": "0:08:16", "remaining_time": "0:22:14", "throughput": 1851.8, "total_tokens": 919424} |
| {"current_steps": 2245, "total_steps": 8260, "loss": 0.1266, "lr": 4.5644059088294145e-05, "epoch": 2.7179176755447942, "percentage": 27.18, "elapsed_time": "0:08:17", "remaining_time": "0:22:12", "throughput": 1852.08, "total_tokens": 921376} |
| {"current_steps": 2250, "total_steps": 8260, "loss": 0.065, "lr": 4.561421891389943e-05, "epoch": 2.7239709443099276, "percentage": 27.24, "elapsed_time": "0:08:18", "remaining_time": "0:22:11", "throughput": 1852.57, "total_tokens": 923456} |
| {"current_steps": 2255, "total_steps": 8260, "loss": 0.1429, "lr": 4.558428670273601e-05, "epoch": 2.7300242130750605, "percentage": 27.3, "elapsed_time": "0:08:19", "remaining_time": "0:22:10", "throughput": 1852.91, "total_tokens": 925504} |
| {"current_steps": 2260, "total_steps": 8260, "loss": 0.0692, "lr": 4.555426258844292e-05, "epoch": 2.736077481840194, "percentage": 27.36, "elapsed_time": "0:08:20", "remaining_time": "0:22:08", "throughput": 1853.17, "total_tokens": 927456} |
| {"current_steps": 2265, "total_steps": 8260, "loss": 0.073, "lr": 4.552414670506949e-05, "epoch": 2.7421307506053267, "percentage": 27.42, "elapsed_time": "0:08:21", "remaining_time": "0:22:07", "throughput": 1853.49, "total_tokens": 929440} |
| {"current_steps": 2270, "total_steps": 8260, "loss": 0.0886, "lr": 4.5493939187074784e-05, "epoch": 2.74818401937046, "percentage": 27.48, "elapsed_time": "0:08:22", "remaining_time": "0:22:05", "throughput": 1854.18, "total_tokens": 931712} |
| {"current_steps": 2275, "total_steps": 8260, "loss": 0.147, "lr": 4.5463640169326994e-05, "epoch": 2.7542372881355934, "percentage": 27.54, "elapsed_time": "0:08:23", "remaining_time": "0:22:04", "throughput": 1854.46, "total_tokens": 933664} |
| {"current_steps": 2280, "total_steps": 8260, "loss": 0.0842, "lr": 4.5433249787102816e-05, "epoch": 2.7602905569007263, "percentage": 27.6, "elapsed_time": "0:08:24", "remaining_time": "0:22:03", "throughput": 1855.12, "total_tokens": 935840} |
| {"current_steps": 2285, "total_steps": 8260, "loss": 0.0898, "lr": 4.54027681760869e-05, "epoch": 2.7663438256658597, "percentage": 27.66, "elapsed_time": "0:08:25", "remaining_time": "0:22:01", "throughput": 1855.48, "total_tokens": 937920} |
| {"current_steps": 2290, "total_steps": 8260, "loss": 0.0488, "lr": 4.537219547237115e-05, "epoch": 2.7723970944309926, "percentage": 27.72, "elapsed_time": "0:08:26", "remaining_time": "0:22:00", "throughput": 1856.13, "total_tokens": 940160} |
| {"current_steps": 2295, "total_steps": 8260, "loss": 0.0871, "lr": 4.5341531812454234e-05, "epoch": 2.778450363196126, "percentage": 27.78, "elapsed_time": "0:08:27", "remaining_time": "0:21:59", "throughput": 1856.68, "total_tokens": 942304} |
| {"current_steps": 2300, "total_steps": 8260, "loss": 0.1389, "lr": 4.5310777333240885e-05, "epoch": 2.7845036319612593, "percentage": 27.85, "elapsed_time": "0:08:28", "remaining_time": "0:21:57", "throughput": 1856.98, "total_tokens": 944288} |
| {"current_steps": 2305, "total_steps": 8260, "loss": 0.1125, "lr": 4.52799321720413e-05, "epoch": 2.790556900726392, "percentage": 27.91, "elapsed_time": "0:08:29", "remaining_time": "0:21:56", "throughput": 1857.34, "total_tokens": 946368} |
| {"current_steps": 2310, "total_steps": 8260, "loss": 0.1695, "lr": 4.524899646657059e-05, "epoch": 2.7966101694915255, "percentage": 27.97, "elapsed_time": "0:08:30", "remaining_time": "0:21:55", "throughput": 1857.82, "total_tokens": 948512} |
| {"current_steps": 2315, "total_steps": 8260, "loss": 0.1143, "lr": 4.521797035494809e-05, "epoch": 2.8026634382566584, "percentage": 28.03, "elapsed_time": "0:08:31", "remaining_time": "0:21:53", "throughput": 1858.26, "total_tokens": 950624} |
| {"current_steps": 2320, "total_steps": 8260, "loss": 0.0794, "lr": 4.5186853975696775e-05, "epoch": 2.8087167070217918, "percentage": 28.09, "elapsed_time": "0:08:32", "remaining_time": "0:21:52", "throughput": 1858.63, "total_tokens": 952640} |
| {"current_steps": 2325, "total_steps": 8260, "loss": 0.1604, "lr": 4.515564746774265e-05, "epoch": 2.814769975786925, "percentage": 28.15, "elapsed_time": "0:08:33", "remaining_time": "0:21:50", "throughput": 1858.98, "total_tokens": 954656} |
| {"current_steps": 2330, "total_steps": 8260, "loss": 0.0873, "lr": 4.512435097041412e-05, "epoch": 2.820823244552058, "percentage": 28.21, "elapsed_time": "0:08:34", "remaining_time": "0:21:49", "throughput": 1859.37, "total_tokens": 956736} |
| {"current_steps": 2335, "total_steps": 8260, "loss": 0.0432, "lr": 4.509296462344136e-05, "epoch": 2.8268765133171914, "percentage": 28.27, "elapsed_time": "0:08:35", "remaining_time": "0:21:48", "throughput": 1859.76, "total_tokens": 958816} |
| {"current_steps": 2340, "total_steps": 8260, "loss": 0.1927, "lr": 4.50614885669557e-05, "epoch": 2.8329297820823243, "percentage": 28.33, "elapsed_time": "0:08:36", "remaining_time": "0:21:46", "throughput": 1860.08, "total_tokens": 960800} |
| {"current_steps": 2345, "total_steps": 8260, "loss": 0.0799, "lr": 4.5029922941489e-05, "epoch": 2.8389830508474576, "percentage": 28.39, "elapsed_time": "0:08:37", "remaining_time": "0:21:45", "throughput": 1860.61, "total_tokens": 962976} |
| {"current_steps": 2350, "total_steps": 8260, "loss": 0.1606, "lr": 4.499826788797302e-05, "epoch": 2.845036319612591, "percentage": 28.45, "elapsed_time": "0:08:38", "remaining_time": "0:21:44", "throughput": 1861.02, "total_tokens": 965088} |
| {"current_steps": 2355, "total_steps": 8260, "loss": 0.165, "lr": 4.49665235477388e-05, "epoch": 2.851089588377724, "percentage": 28.51, "elapsed_time": "0:08:39", "remaining_time": "0:21:42", "throughput": 1861.3, "total_tokens": 967136} |
| {"current_steps": 2360, "total_steps": 8260, "loss": 0.0935, "lr": 4.493469006251601e-05, "epoch": 2.857142857142857, "percentage": 28.57, "elapsed_time": "0:08:40", "remaining_time": "0:21:41", "throughput": 1861.8, "total_tokens": 969248} |
| {"current_steps": 2365, "total_steps": 8260, "loss": 0.1148, "lr": 4.490276757443233e-05, "epoch": 2.86319612590799, "percentage": 28.63, "elapsed_time": "0:08:41", "remaining_time": "0:21:40", "throughput": 1862.22, "total_tokens": 971360} |
| {"current_steps": 2370, "total_steps": 8260, "loss": 0.0993, "lr": 4.487075622601281e-05, "epoch": 2.8692493946731235, "percentage": 28.69, "elapsed_time": "0:08:42", "remaining_time": "0:21:38", "throughput": 1862.53, "total_tokens": 973408} |
| {"current_steps": 2375, "total_steps": 8260, "loss": 0.0812, "lr": 4.483865616017924e-05, "epoch": 2.875302663438257, "percentage": 28.75, "elapsed_time": "0:08:43", "remaining_time": "0:21:37", "throughput": 1862.82, "total_tokens": 975392} |
| {"current_steps": 2380, "total_steps": 8260, "loss": 0.1069, "lr": 4.480646752024951e-05, "epoch": 2.8813559322033897, "percentage": 28.81, "elapsed_time": "0:08:44", "remaining_time": "0:21:36", "throughput": 1863.02, "total_tokens": 977376} |
| {"current_steps": 2385, "total_steps": 8260, "loss": 0.0945, "lr": 4.477419044993697e-05, "epoch": 2.887409200968523, "percentage": 28.87, "elapsed_time": "0:08:45", "remaining_time": "0:21:34", "throughput": 1863.25, "total_tokens": 979328} |
| {"current_steps": 2390, "total_steps": 8260, "loss": 0.0838, "lr": 4.474182509334978e-05, "epoch": 2.893462469733656, "percentage": 28.93, "elapsed_time": "0:08:46", "remaining_time": "0:21:33", "throughput": 1863.66, "total_tokens": 981376} |
| {"current_steps": 2395, "total_steps": 8260, "loss": 0.0701, "lr": 4.470937159499029e-05, "epoch": 2.8995157384987893, "percentage": 29.0, "elapsed_time": "0:08:47", "remaining_time": "0:21:32", "throughput": 1863.86, "total_tokens": 983360} |
| {"current_steps": 2400, "total_steps": 8260, "loss": 0.1238, "lr": 4.467683009975435e-05, "epoch": 2.9055690072639226, "percentage": 29.06, "elapsed_time": "0:08:48", "remaining_time": "0:21:30", "throughput": 1864.17, "total_tokens": 985408} |
| {"current_steps": 2405, "total_steps": 8260, "loss": 0.0684, "lr": 4.464420075293072e-05, "epoch": 2.9116222760290555, "percentage": 29.12, "elapsed_time": "0:08:49", "remaining_time": "0:21:29", "throughput": 1864.67, "total_tokens": 987584} |
| {"current_steps": 2410, "total_steps": 8260, "loss": 0.1471, "lr": 4.4611483700200374e-05, "epoch": 2.917675544794189, "percentage": 29.18, "elapsed_time": "0:08:50", "remaining_time": "0:21:28", "throughput": 1865.05, "total_tokens": 989632} |
| {"current_steps": 2415, "total_steps": 8260, "loss": 0.0467, "lr": 4.457867908763589e-05, "epoch": 2.923728813559322, "percentage": 29.24, "elapsed_time": "0:08:51", "remaining_time": "0:21:26", "throughput": 1865.42, "total_tokens": 991680} |
| {"current_steps": 2420, "total_steps": 8260, "loss": 0.0927, "lr": 4.454578706170075e-05, "epoch": 2.929782082324455, "percentage": 29.3, "elapsed_time": "0:08:52", "remaining_time": "0:21:25", "throughput": 1865.88, "total_tokens": 993824} |
| {"current_steps": 2425, "total_steps": 8260, "loss": 0.0764, "lr": 4.4512807769248723e-05, "epoch": 2.9358353510895885, "percentage": 29.36, "elapsed_time": "0:08:53", "remaining_time": "0:21:24", "throughput": 1866.24, "total_tokens": 995904} |
| {"current_steps": 2430, "total_steps": 8260, "loss": 0.053, "lr": 4.447974135752321e-05, "epoch": 2.9418886198547214, "percentage": 29.42, "elapsed_time": "0:08:54", "remaining_time": "0:21:22", "throughput": 1866.81, "total_tokens": 998080} |
| {"current_steps": 2435, "total_steps": 8260, "loss": 0.1816, "lr": 4.444658797415656e-05, "epoch": 2.9479418886198547, "percentage": 29.48, "elapsed_time": "0:08:55", "remaining_time": "0:21:21", "throughput": 1867.13, "total_tokens": 1000160} |
| {"current_steps": 2440, "total_steps": 8260, "loss": 0.2027, "lr": 4.441334776716944e-05, "epoch": 2.9539951573849876, "percentage": 29.54, "elapsed_time": "0:08:56", "remaining_time": "0:21:20", "throughput": 1867.68, "total_tokens": 1002368} |
| {"current_steps": 2445, "total_steps": 8260, "loss": 0.1352, "lr": 4.438002088497015e-05, "epoch": 2.960048426150121, "percentage": 29.6, "elapsed_time": "0:08:57", "remaining_time": "0:21:18", "throughput": 1868.0, "total_tokens": 1004448} |
| {"current_steps": 2450, "total_steps": 8260, "loss": 0.0609, "lr": 4.434660747635396e-05, "epoch": 2.9661016949152543, "percentage": 29.66, "elapsed_time": "0:08:58", "remaining_time": "0:21:17", "throughput": 1868.21, "total_tokens": 1006336} |
| {"current_steps": 2455, "total_steps": 8260, "loss": 0.1161, "lr": 4.4313107690502485e-05, "epoch": 2.9721549636803877, "percentage": 29.72, "elapsed_time": "0:08:59", "remaining_time": "0:21:16", "throughput": 1868.65, "total_tokens": 1008416} |
| {"current_steps": 2460, "total_steps": 8260, "loss": 0.0437, "lr": 4.427952167698298e-05, "epoch": 2.9782082324455206, "percentage": 29.78, "elapsed_time": "0:09:00", "remaining_time": "0:21:14", "throughput": 1868.92, "total_tokens": 1010400} |
| {"current_steps": 2465, "total_steps": 8260, "loss": 0.1083, "lr": 4.4245849585747654e-05, "epoch": 2.9842615012106535, "percentage": 29.84, "elapsed_time": "0:09:01", "remaining_time": "0:21:13", "throughput": 1869.19, "total_tokens": 1012320} |
| {"current_steps": 2470, "total_steps": 8260, "loss": 0.1042, "lr": 4.4212091567133083e-05, "epoch": 2.990314769975787, "percentage": 29.9, "elapsed_time": "0:09:02", "remaining_time": "0:21:11", "throughput": 1869.54, "total_tokens": 1014400} |
| {"current_steps": 2475, "total_steps": 8260, "loss": 0.0259, "lr": 4.417824777185943e-05, "epoch": 2.99636803874092, "percentage": 29.96, "elapsed_time": "0:09:03", "remaining_time": "0:21:10", "throughput": 1869.89, "total_tokens": 1016480} |
| {"current_steps": 2478, "total_steps": 8260, "eval_loss": 0.12481644004583359, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:09:12", "remaining_time": "0:21:28", "throughput": 1842.06, "total_tokens": 1017368} |
| {"current_steps": 2480, "total_steps": 8260, "loss": 0.0697, "lr": 4.414431835102987e-05, "epoch": 3.002421307506053, "percentage": 30.02, "elapsed_time": "0:09:14", "remaining_time": "0:21:31", "throughput": 1837.16, "total_tokens": 1018232} |
| {"current_steps": 2485, "total_steps": 8260, "loss": 0.0881, "lr": 4.411030345612984e-05, "epoch": 3.0084745762711864, "percentage": 30.08, "elapsed_time": "0:09:15", "remaining_time": "0:21:30", "throughput": 1837.57, "total_tokens": 1020280} |
| {"current_steps": 2490, "total_steps": 8260, "loss": 0.0987, "lr": 4.407620323902643e-05, "epoch": 3.0145278450363198, "percentage": 30.15, "elapsed_time": "0:09:16", "remaining_time": "0:21:28", "throughput": 1837.9, "total_tokens": 1022328} |
| {"current_steps": 2495, "total_steps": 8260, "loss": 0.1605, "lr": 4.404201785196762e-05, "epoch": 3.0205811138014527, "percentage": 30.21, "elapsed_time": "0:09:17", "remaining_time": "0:21:27", "throughput": 1838.23, "total_tokens": 1024312} |
| {"current_steps": 2500, "total_steps": 8260, "loss": 0.0225, "lr": 4.400774744758171e-05, "epoch": 3.026634382566586, "percentage": 30.27, "elapsed_time": "0:09:18", "remaining_time": "0:21:26", "throughput": 1838.69, "total_tokens": 1026392} |
| {"current_steps": 2505, "total_steps": 8260, "loss": 0.0838, "lr": 4.397339217887652e-05, "epoch": 3.032687651331719, "percentage": 30.33, "elapsed_time": "0:09:19", "remaining_time": "0:21:24", "throughput": 1838.95, "total_tokens": 1028344} |
| {"current_steps": 2510, "total_steps": 8260, "loss": 0.19, "lr": 4.393895219923881e-05, "epoch": 3.0387409200968523, "percentage": 30.39, "elapsed_time": "0:09:20", "remaining_time": "0:21:23", "throughput": 1839.47, "total_tokens": 1030520} |
| {"current_steps": 2515, "total_steps": 8260, "loss": 0.1885, "lr": 4.3904427662433534e-05, "epoch": 3.0447941888619856, "percentage": 30.45, "elapsed_time": "0:09:21", "remaining_time": "0:21:22", "throughput": 1840.04, "total_tokens": 1032728} |
| {"current_steps": 2520, "total_steps": 8260, "loss": 0.104, "lr": 4.386981872260317e-05, "epoch": 3.0508474576271185, "percentage": 30.51, "elapsed_time": "0:09:22", "remaining_time": "0:21:20", "throughput": 1840.4, "total_tokens": 1034808} |
| {"current_steps": 2525, "total_steps": 8260, "loss": 0.1049, "lr": 4.383512553426703e-05, "epoch": 3.056900726392252, "percentage": 30.57, "elapsed_time": "0:09:23", "remaining_time": "0:21:19", "throughput": 1840.75, "total_tokens": 1036888} |
| {"current_steps": 2530, "total_steps": 8260, "loss": 0.0686, "lr": 4.380034825232059e-05, "epoch": 3.062953995157385, "percentage": 30.63, "elapsed_time": "0:09:24", "remaining_time": "0:21:18", "throughput": 1841.18, "total_tokens": 1039000} |
| {"current_steps": 2535, "total_steps": 8260, "loss": 0.1112, "lr": 4.376548703203474e-05, "epoch": 3.069007263922518, "percentage": 30.69, "elapsed_time": "0:09:25", "remaining_time": "0:21:16", "throughput": 1841.41, "total_tokens": 1040984} |
| {"current_steps": 2540, "total_steps": 8260, "loss": 0.0325, "lr": 4.3730542029055174e-05, "epoch": 3.0750605326876514, "percentage": 30.75, "elapsed_time": "0:09:26", "remaining_time": "0:21:15", "throughput": 1841.75, "total_tokens": 1043000} |
| {"current_steps": 2545, "total_steps": 8260, "loss": 0.0779, "lr": 4.3695513399401646e-05, "epoch": 3.0811138014527844, "percentage": 30.81, "elapsed_time": "0:09:27", "remaining_time": "0:21:13", "throughput": 1842.1, "total_tokens": 1044984} |
| {"current_steps": 2550, "total_steps": 8260, "loss": 0.0799, "lr": 4.366040129946725e-05, "epoch": 3.0871670702179177, "percentage": 30.87, "elapsed_time": "0:09:28", "remaining_time": "0:21:12", "throughput": 1842.53, "total_tokens": 1047096} |
| {"current_steps": 2555, "total_steps": 8260, "loss": 0.0561, "lr": 4.362520588601777e-05, "epoch": 3.093220338983051, "percentage": 30.93, "elapsed_time": "0:09:29", "remaining_time": "0:21:11", "throughput": 1842.95, "total_tokens": 1049208} |
| {"current_steps": 2560, "total_steps": 8260, "loss": 0.3089, "lr": 4.3589927316190983e-05, "epoch": 3.099273607748184, "percentage": 30.99, "elapsed_time": "0:09:30", "remaining_time": "0:21:09", "throughput": 1843.24, "total_tokens": 1051192} |
| {"current_steps": 2565, "total_steps": 8260, "loss": 0.2034, "lr": 4.35545657474959e-05, "epoch": 3.1053268765133173, "percentage": 31.05, "elapsed_time": "0:09:31", "remaining_time": "0:21:08", "throughput": 1843.55, "total_tokens": 1053240} |
| {"current_steps": 2570, "total_steps": 8260, "loss": 0.0247, "lr": 4.351912133781213e-05, "epoch": 3.11138014527845, "percentage": 31.11, "elapsed_time": "0:09:32", "remaining_time": "0:21:07", "throughput": 1844.02, "total_tokens": 1055384} |
| {"current_steps": 2575, "total_steps": 8260, "loss": 0.1526, "lr": 4.3483594245389106e-05, "epoch": 3.1174334140435835, "percentage": 31.17, "elapsed_time": "0:09:33", "remaining_time": "0:21:05", "throughput": 1844.47, "total_tokens": 1057464} |
| {"current_steps": 2580, "total_steps": 8260, "loss": 0.1192, "lr": 4.3447984628845464e-05, "epoch": 3.123486682808717, "percentage": 31.23, "elapsed_time": "0:09:34", "remaining_time": "0:21:04", "throughput": 1845.13, "total_tokens": 1059736} |
| {"current_steps": 2585, "total_steps": 8260, "loss": 0.1241, "lr": 4.341229264716825e-05, "epoch": 3.12953995157385, "percentage": 31.3, "elapsed_time": "0:09:35", "remaining_time": "0:21:03", "throughput": 1845.33, "total_tokens": 1061656} |
| {"current_steps": 2590, "total_steps": 8260, "loss": 0.058, "lr": 4.337651845971227e-05, "epoch": 3.135593220338983, "percentage": 31.36, "elapsed_time": "0:09:36", "remaining_time": "0:21:01", "throughput": 1845.74, "total_tokens": 1063736} |
| {"current_steps": 2595, "total_steps": 8260, "loss": 0.0999, "lr": 4.334066222619933e-05, "epoch": 3.141646489104116, "percentage": 31.42, "elapsed_time": "0:09:37", "remaining_time": "0:21:00", "throughput": 1846.04, "total_tokens": 1065720} |
| {"current_steps": 2600, "total_steps": 8260, "loss": 0.1333, "lr": 4.3304724106717584e-05, "epoch": 3.1476997578692494, "percentage": 31.48, "elapsed_time": "0:09:38", "remaining_time": "0:20:58", "throughput": 1846.31, "total_tokens": 1067736} |
| {"current_steps": 2605, "total_steps": 8260, "loss": 0.0887, "lr": 4.326870426172075e-05, "epoch": 3.1537530266343827, "percentage": 31.54, "elapsed_time": "0:09:39", "remaining_time": "0:20:57", "throughput": 1846.72, "total_tokens": 1069848} |
| {"current_steps": 2610, "total_steps": 8260, "loss": 0.0463, "lr": 4.323260285202746e-05, "epoch": 3.1598062953995156, "percentage": 31.6, "elapsed_time": "0:09:40", "remaining_time": "0:20:56", "throughput": 1847.12, "total_tokens": 1071960} |
| {"current_steps": 2615, "total_steps": 8260, "loss": 0.0597, "lr": 4.3196420038820475e-05, "epoch": 3.165859564164649, "percentage": 31.66, "elapsed_time": "0:09:41", "remaining_time": "0:20:54", "throughput": 1847.53, "total_tokens": 1074072} |
| {"current_steps": 2620, "total_steps": 8260, "loss": 0.1861, "lr": 4.316015598364603e-05, "epoch": 3.171912832929782, "percentage": 31.72, "elapsed_time": "0:09:42", "remaining_time": "0:20:53", "throughput": 1847.94, "total_tokens": 1076184} |
| {"current_steps": 2625, "total_steps": 8260, "loss": 0.1747, "lr": 4.312381084841307e-05, "epoch": 3.1779661016949152, "percentage": 31.78, "elapsed_time": "0:09:43", "remaining_time": "0:20:52", "throughput": 1848.43, "total_tokens": 1078360} |
| {"current_steps": 2630, "total_steps": 8260, "loss": 0.1242, "lr": 4.308738479539254e-05, "epoch": 3.1840193704600486, "percentage": 31.84, "elapsed_time": "0:09:44", "remaining_time": "0:20:50", "throughput": 1848.87, "total_tokens": 1080440} |
| {"current_steps": 2635, "total_steps": 8260, "loss": 0.1133, "lr": 4.305087798721665e-05, "epoch": 3.1900726392251815, "percentage": 31.9, "elapsed_time": "0:09:45", "remaining_time": "0:20:49", "throughput": 1849.31, "total_tokens": 1082520} |
| {"current_steps": 2640, "total_steps": 8260, "loss": 0.1748, "lr": 4.30142905868782e-05, "epoch": 3.196125907990315, "percentage": 31.96, "elapsed_time": "0:09:46", "remaining_time": "0:20:48", "throughput": 1849.53, "total_tokens": 1084408} |
| {"current_steps": 2645, "total_steps": 8260, "loss": 0.0534, "lr": 4.297762275772975e-05, "epoch": 3.2021791767554477, "percentage": 32.02, "elapsed_time": "0:09:47", "remaining_time": "0:20:46", "throughput": 1849.87, "total_tokens": 1086424} |
| {"current_steps": 2650, "total_steps": 8260, "loss": 0.1526, "lr": 4.2940874663483005e-05, "epoch": 3.208232445520581, "percentage": 32.08, "elapsed_time": "0:09:48", "remaining_time": "0:20:45", "throughput": 1850.2, "total_tokens": 1088440} |
| {"current_steps": 2655, "total_steps": 8260, "loss": 0.1298, "lr": 4.2904046468208006e-05, "epoch": 3.2142857142857144, "percentage": 32.14, "elapsed_time": "0:09:49", "remaining_time": "0:20:44", "throughput": 1850.71, "total_tokens": 1090648} |
| {"current_steps": 2660, "total_steps": 8260, "loss": 0.1463, "lr": 4.286713833633242e-05, "epoch": 3.2203389830508473, "percentage": 32.2, "elapsed_time": "0:09:50", "remaining_time": "0:20:42", "throughput": 1851.08, "total_tokens": 1092632} |
| {"current_steps": 2665, "total_steps": 8260, "loss": 0.0843, "lr": 4.283015043264084e-05, "epoch": 3.2263922518159807, "percentage": 32.26, "elapsed_time": "0:09:51", "remaining_time": "0:20:41", "throughput": 1851.33, "total_tokens": 1094648} |
| {"current_steps": 2670, "total_steps": 8260, "loss": 0.0989, "lr": 4.279308292227396e-05, "epoch": 3.232445520581114, "percentage": 32.32, "elapsed_time": "0:09:52", "remaining_time": "0:20:40", "throughput": 1851.68, "total_tokens": 1096728} |
| {"current_steps": 2675, "total_steps": 8260, "loss": 0.038, "lr": 4.275593597072796e-05, "epoch": 3.238498789346247, "percentage": 32.38, "elapsed_time": "0:09:53", "remaining_time": "0:20:38", "throughput": 1852.12, "total_tokens": 1098872} |
| {"current_steps": 2680, "total_steps": 8260, "loss": 0.0351, "lr": 4.2718709743853654e-05, "epoch": 3.2445520581113803, "percentage": 32.45, "elapsed_time": "0:09:54", "remaining_time": "0:20:37", "throughput": 1852.56, "total_tokens": 1101048} |
| {"current_steps": 2685, "total_steps": 8260, "loss": 0.1365, "lr": 4.268140440785584e-05, "epoch": 3.250605326876513, "percentage": 32.51, "elapsed_time": "0:09:55", "remaining_time": "0:20:36", "throughput": 1853.15, "total_tokens": 1103320} |
| {"current_steps": 2690, "total_steps": 8260, "loss": 0.038, "lr": 4.264402012929247e-05, "epoch": 3.2566585956416465, "percentage": 32.57, "elapsed_time": "0:09:56", "remaining_time": "0:20:34", "throughput": 1853.52, "total_tokens": 1105336} |
| {"current_steps": 2695, "total_steps": 8260, "loss": 0.1211, "lr": 4.2606557075073996e-05, "epoch": 3.26271186440678, "percentage": 32.63, "elapsed_time": "0:09:57", "remaining_time": "0:20:33", "throughput": 1853.73, "total_tokens": 1107288} |
| {"current_steps": 2700, "total_steps": 8260, "loss": 0.0719, "lr": 4.256901541246255e-05, "epoch": 3.2687651331719128, "percentage": 32.69, "elapsed_time": "0:09:58", "remaining_time": "0:20:32", "throughput": 1854.22, "total_tokens": 1109368} |
| {"current_steps": 2705, "total_steps": 8260, "loss": 0.0568, "lr": 4.253139530907124e-05, "epoch": 3.274818401937046, "percentage": 32.75, "elapsed_time": "0:09:59", "remaining_time": "0:20:30", "throughput": 1854.78, "total_tokens": 1111576} |
| {"current_steps": 2710, "total_steps": 8260, "loss": 0.1426, "lr": 4.249369693286341e-05, "epoch": 3.280871670702179, "percentage": 32.81, "elapsed_time": "0:10:00", "remaining_time": "0:20:29", "throughput": 1855.05, "total_tokens": 1113624} |
| {"current_steps": 2715, "total_steps": 8260, "loss": 0.1715, "lr": 4.245592045215182e-05, "epoch": 3.2869249394673123, "percentage": 32.87, "elapsed_time": "0:10:01", "remaining_time": "0:20:28", "throughput": 1855.27, "total_tokens": 1115512} |
| {"current_steps": 2720, "total_steps": 8260, "loss": 0.0644, "lr": 4.2418066035598e-05, "epoch": 3.2929782082324457, "percentage": 32.93, "elapsed_time": "0:10:02", "remaining_time": "0:20:26", "throughput": 1855.48, "total_tokens": 1117464} |
| {"current_steps": 2725, "total_steps": 8260, "loss": 0.1253, "lr": 4.238013385221142e-05, "epoch": 3.2990314769975786, "percentage": 32.99, "elapsed_time": "0:10:03", "remaining_time": "0:20:25", "throughput": 1855.79, "total_tokens": 1119480} |
| {"current_steps": 2730, "total_steps": 8260, "loss": 0.1011, "lr": 4.2342124071348744e-05, "epoch": 3.305084745762712, "percentage": 33.05, "elapsed_time": "0:10:04", "remaining_time": "0:20:23", "throughput": 1856.11, "total_tokens": 1121496} |
| {"current_steps": 2735, "total_steps": 8260, "loss": 0.0764, "lr": 4.230403686271309e-05, "epoch": 3.3111380145278453, "percentage": 33.11, "elapsed_time": "0:10:05", "remaining_time": "0:20:22", "throughput": 1856.38, "total_tokens": 1123480} |
| {"current_steps": 2740, "total_steps": 8260, "loss": 0.0741, "lr": 4.2265872396353314e-05, "epoch": 3.317191283292978, "percentage": 33.17, "elapsed_time": "0:10:06", "remaining_time": "0:20:21", "throughput": 1856.65, "total_tokens": 1125528} |
| {"current_steps": 2745, "total_steps": 8260, "loss": 0.1128, "lr": 4.2227630842663136e-05, "epoch": 3.3232445520581115, "percentage": 33.23, "elapsed_time": "0:10:07", "remaining_time": "0:20:19", "throughput": 1856.9, "total_tokens": 1127512} |
| {"current_steps": 2750, "total_steps": 8260, "loss": 0.0218, "lr": 4.21893123723805e-05, "epoch": 3.3292978208232444, "percentage": 33.29, "elapsed_time": "0:10:08", "remaining_time": "0:20:18", "throughput": 1857.21, "total_tokens": 1129592} |
| {"current_steps": 2755, "total_steps": 8260, "loss": 0.0278, "lr": 4.2150917156586735e-05, "epoch": 3.335351089588378, "percentage": 33.35, "elapsed_time": "0:10:09", "remaining_time": "0:20:17", "throughput": 1857.46, "total_tokens": 1131576} |
| {"current_steps": 2760, "total_steps": 8260, "loss": 0.062, "lr": 4.211244536670584e-05, "epoch": 3.341404358353511, "percentage": 33.41, "elapsed_time": "0:10:10", "remaining_time": "0:20:16", "throughput": 1857.97, "total_tokens": 1133784} |
| {"current_steps": 2765, "total_steps": 8260, "loss": 0.2284, "lr": 4.207389717450368e-05, "epoch": 3.347457627118644, "percentage": 33.47, "elapsed_time": "0:10:11", "remaining_time": "0:20:14", "throughput": 1858.19, "total_tokens": 1135800} |
| {"current_steps": 2770, "total_steps": 8260, "loss": 0.1403, "lr": 4.203527275208723e-05, "epoch": 3.3535108958837774, "percentage": 33.54, "elapsed_time": "0:10:12", "remaining_time": "0:20:13", "throughput": 1858.44, "total_tokens": 1137784} |
| {"current_steps": 2775, "total_steps": 8260, "loss": 0.1353, "lr": 4.199657227190384e-05, "epoch": 3.3595641646489103, "percentage": 33.6, "elapsed_time": "0:10:13", "remaining_time": "0:20:12", "throughput": 1858.79, "total_tokens": 1139896} |
| {"current_steps": 2780, "total_steps": 8260, "loss": 0.0358, "lr": 4.195779590674041e-05, "epoch": 3.3656174334140436, "percentage": 33.66, "elapsed_time": "0:10:14", "remaining_time": "0:20:10", "throughput": 1859.18, "total_tokens": 1142040} |
| {"current_steps": 2785, "total_steps": 8260, "loss": 0.0709, "lr": 4.191894382972264e-05, "epoch": 3.371670702179177, "percentage": 33.72, "elapsed_time": "0:10:15", "remaining_time": "0:20:09", "throughput": 1859.44, "total_tokens": 1144088} |
| {"current_steps": 2790, "total_steps": 8260, "loss": 0.1037, "lr": 4.188001621431429e-05, "epoch": 3.37772397094431, "percentage": 33.78, "elapsed_time": "0:10:16", "remaining_time": "0:20:08", "throughput": 1859.84, "total_tokens": 1146232} |
| {"current_steps": 2795, "total_steps": 8260, "loss": 0.2604, "lr": 4.184101323431636e-05, "epoch": 3.383777239709443, "percentage": 33.84, "elapsed_time": "0:10:17", "remaining_time": "0:20:07", "throughput": 1860.2, "total_tokens": 1148344} |
| {"current_steps": 2800, "total_steps": 8260, "loss": 0.0786, "lr": 4.180193506386634e-05, "epoch": 3.389830508474576, "percentage": 33.9, "elapsed_time": "0:10:18", "remaining_time": "0:20:05", "throughput": 1860.49, "total_tokens": 1150360} |
| {"current_steps": 2805, "total_steps": 8260, "loss": 0.0508, "lr": 4.1762781877437406e-05, "epoch": 3.3958837772397095, "percentage": 33.96, "elapsed_time": "0:10:19", "remaining_time": "0:20:04", "throughput": 1860.79, "total_tokens": 1152376} |
| {"current_steps": 2810, "total_steps": 8260, "loss": 0.1793, "lr": 4.172355384983769e-05, "epoch": 3.401937046004843, "percentage": 34.02, "elapsed_time": "0:10:20", "remaining_time": "0:20:03", "throughput": 1861.12, "total_tokens": 1154424} |
| {"current_steps": 2815, "total_steps": 8260, "loss": 0.0945, "lr": 4.168425115620944e-05, "epoch": 3.4079903147699757, "percentage": 34.08, "elapsed_time": "0:10:21", "remaining_time": "0:20:01", "throughput": 1861.47, "total_tokens": 1156472} |
| {"current_steps": 2820, "total_steps": 8260, "loss": 0.1123, "lr": 4.164487397202829e-05, "epoch": 3.414043583535109, "percentage": 34.14, "elapsed_time": "0:10:22", "remaining_time": "0:20:00", "throughput": 1861.85, "total_tokens": 1158552} |
| {"current_steps": 2825, "total_steps": 8260, "loss": 0.1462, "lr": 4.160542247310244e-05, "epoch": 3.420096852300242, "percentage": 34.2, "elapsed_time": "0:10:23", "remaining_time": "0:19:59", "throughput": 1862.24, "total_tokens": 1160696} |
| {"current_steps": 2830, "total_steps": 8260, "loss": 0.105, "lr": 4.156589683557189e-05, "epoch": 3.4261501210653753, "percentage": 34.26, "elapsed_time": "0:10:24", "remaining_time": "0:19:57", "throughput": 1862.6, "total_tokens": 1162808} |
| {"current_steps": 2835, "total_steps": 8260, "loss": 0.1314, "lr": 4.1526297235907635e-05, "epoch": 3.4322033898305087, "percentage": 34.32, "elapsed_time": "0:10:25", "remaining_time": "0:19:56", "throughput": 1862.76, "total_tokens": 1164728} |
| {"current_steps": 2840, "total_steps": 8260, "loss": 0.078, "lr": 4.148662385091091e-05, "epoch": 3.4382566585956416, "percentage": 34.38, "elapsed_time": "0:10:26", "remaining_time": "0:19:55", "throughput": 1863.06, "total_tokens": 1166808} |
| {"current_steps": 2845, "total_steps": 8260, "loss": 0.0591, "lr": 4.144687685771238e-05, "epoch": 3.444309927360775, "percentage": 34.44, "elapsed_time": "0:10:27", "remaining_time": "0:19:53", "throughput": 1863.37, "total_tokens": 1168888} |
| {"current_steps": 2850, "total_steps": 8260, "loss": 0.1137, "lr": 4.140705643377133e-05, "epoch": 3.450363196125908, "percentage": 34.5, "elapsed_time": "0:10:28", "remaining_time": "0:19:52", "throughput": 1863.68, "total_tokens": 1170872} |
| {"current_steps": 2855, "total_steps": 8260, "loss": 0.0972, "lr": 4.1367162756874925e-05, "epoch": 3.456416464891041, "percentage": 34.56, "elapsed_time": "0:10:29", "remaining_time": "0:19:51", "throughput": 1864.06, "total_tokens": 1172984} |
| {"current_steps": 2860, "total_steps": 8260, "loss": 0.043, "lr": 4.132719600513734e-05, "epoch": 3.4624697336561745, "percentage": 34.62, "elapsed_time": "0:10:30", "remaining_time": "0:19:50", "throughput": 1864.31, "total_tokens": 1175032} |
| {"current_steps": 2865, "total_steps": 8260, "loss": 0.2042, "lr": 4.128715635699905e-05, "epoch": 3.4685230024213074, "percentage": 34.69, "elapsed_time": "0:10:31", "remaining_time": "0:19:48", "throughput": 1864.74, "total_tokens": 1177240} |
| {"current_steps": 2870, "total_steps": 8260, "loss": 0.1431, "lr": 4.124704399122597e-05, "epoch": 3.4745762711864407, "percentage": 34.75, "elapsed_time": "0:10:32", "remaining_time": "0:19:47", "throughput": 1864.94, "total_tokens": 1179192} |
| {"current_steps": 2875, "total_steps": 8260, "loss": 0.1295, "lr": 4.120685908690869e-05, "epoch": 3.4806295399515736, "percentage": 34.81, "elapsed_time": "0:10:33", "remaining_time": "0:19:46", "throughput": 1865.15, "total_tokens": 1181112} |
| {"current_steps": 2880, "total_steps": 8260, "loss": 0.1034, "lr": 4.1166601823461656e-05, "epoch": 3.486682808716707, "percentage": 34.87, "elapsed_time": "0:10:34", "remaining_time": "0:19:44", "throughput": 1865.44, "total_tokens": 1183128} |
| {"current_steps": 2885, "total_steps": 8260, "loss": 0.0947, "lr": 4.112627238062239e-05, "epoch": 3.4927360774818403, "percentage": 34.93, "elapsed_time": "0:10:35", "remaining_time": "0:19:43", "throughput": 1865.77, "total_tokens": 1185240} |
| {"current_steps": 2890, "total_steps": 8260, "loss": 0.1086, "lr": 4.1085870938450656e-05, "epoch": 3.4987893462469732, "percentage": 34.99, "elapsed_time": "0:10:36", "remaining_time": "0:19:42", "throughput": 1866.03, "total_tokens": 1187320} |
| {"current_steps": 2891, "total_steps": 8260, "eval_loss": 0.10590487718582153, "epoch": 3.5, "percentage": 35.0, "elapsed_time": "0:10:44", "remaining_time": "0:19:57", "throughput": 1842.57, "total_tokens": 1187704} |
| {"current_steps": 2895, "total_steps": 8260, "loss": 0.1421, "lr": 4.1045397677327684e-05, "epoch": 3.5048426150121066, "percentage": 35.05, "elapsed_time": "0:10:46", "remaining_time": "0:19:58", "throughput": 1838.75, "total_tokens": 1189400} |
| {"current_steps": 2900, "total_steps": 8260, "loss": 0.1182, "lr": 4.1004852777955364e-05, "epoch": 3.5108958837772395, "percentage": 35.11, "elapsed_time": "0:10:47", "remaining_time": "0:19:57", "throughput": 1839.01, "total_tokens": 1191384} |
| {"current_steps": 2905, "total_steps": 8260, "loss": 0.077, "lr": 4.096423642135543e-05, "epoch": 3.516949152542373, "percentage": 35.17, "elapsed_time": "0:10:48", "remaining_time": "0:19:56", "throughput": 1839.2, "total_tokens": 1193368} |
| {"current_steps": 2910, "total_steps": 8260, "loss": 0.0321, "lr": 4.0923548788868625e-05, "epoch": 3.523002421307506, "percentage": 35.23, "elapsed_time": "0:10:49", "remaining_time": "0:19:54", "throughput": 1839.59, "total_tokens": 1195512} |
| {"current_steps": 2915, "total_steps": 8260, "loss": 0.1141, "lr": 4.0882790062153957e-05, "epoch": 3.529055690072639, "percentage": 35.29, "elapsed_time": "0:10:50", "remaining_time": "0:19:53", "throughput": 1839.87, "total_tokens": 1197560} |
| {"current_steps": 2920, "total_steps": 8260, "loss": 0.0701, "lr": 4.084196042318783e-05, "epoch": 3.5351089588377724, "percentage": 35.35, "elapsed_time": "0:10:51", "remaining_time": "0:19:52", "throughput": 1840.34, "total_tokens": 1199768} |
| {"current_steps": 2925, "total_steps": 8260, "loss": 0.1937, "lr": 4.080106005426326e-05, "epoch": 3.5411622276029053, "percentage": 35.41, "elapsed_time": "0:10:52", "remaining_time": "0:19:50", "throughput": 1840.72, "total_tokens": 1201848} |
| {"current_steps": 2930, "total_steps": 8260, "loss": 0.0905, "lr": 4.076008913798903e-05, "epoch": 3.5472154963680387, "percentage": 35.47, "elapsed_time": "0:10:53", "remaining_time": "0:19:49", "throughput": 1841.07, "total_tokens": 1203896} |
| {"current_steps": 2935, "total_steps": 8260, "loss": 0.0996, "lr": 4.071904785728894e-05, "epoch": 3.553268765133172, "percentage": 35.53, "elapsed_time": "0:10:54", "remaining_time": "0:19:48", "throughput": 1841.34, "total_tokens": 1205880} |
| {"current_steps": 2940, "total_steps": 8260, "loss": 0.1777, "lr": 4.0677936395400906e-05, "epoch": 3.559322033898305, "percentage": 35.59, "elapsed_time": "0:10:55", "remaining_time": "0:19:46", "throughput": 1841.59, "total_tokens": 1207896} |
| {"current_steps": 2945, "total_steps": 8260, "loss": 0.1361, "lr": 4.063675493587621e-05, "epoch": 3.5653753026634383, "percentage": 35.65, "elapsed_time": "0:10:56", "remaining_time": "0:19:45", "throughput": 1842.0, "total_tokens": 1210008} |
| {"current_steps": 2950, "total_steps": 8260, "loss": 0.1313, "lr": 4.059550366257864e-05, "epoch": 3.571428571428571, "percentage": 35.71, "elapsed_time": "0:10:57", "remaining_time": "0:19:44", "throughput": 1842.2, "total_tokens": 1212024} |
| {"current_steps": 2955, "total_steps": 8260, "loss": 0.0613, "lr": 4.055418275968368e-05, "epoch": 3.5774818401937045, "percentage": 35.77, "elapsed_time": "0:10:58", "remaining_time": "0:19:42", "throughput": 1842.4, "total_tokens": 1214040} |
| {"current_steps": 2960, "total_steps": 8260, "loss": 0.0796, "lr": 4.0512792411677705e-05, "epoch": 3.583535108958838, "percentage": 35.84, "elapsed_time": "0:10:59", "remaining_time": "0:19:41", "throughput": 1842.73, "total_tokens": 1216088} |
| {"current_steps": 2965, "total_steps": 8260, "loss": 0.0616, "lr": 4.047133280335713e-05, "epoch": 3.589588377723971, "percentage": 35.9, "elapsed_time": "0:11:00", "remaining_time": "0:19:40", "throughput": 1843.07, "total_tokens": 1218136} |
| {"current_steps": 2970, "total_steps": 8260, "loss": 0.0693, "lr": 4.042980411982762e-05, "epoch": 3.595641646489104, "percentage": 35.96, "elapsed_time": "0:11:01", "remaining_time": "0:19:39", "throughput": 1843.42, "total_tokens": 1220248} |
| {"current_steps": 2975, "total_steps": 8260, "loss": 0.1484, "lr": 4.0388206546503215e-05, "epoch": 3.601694915254237, "percentage": 36.02, "elapsed_time": "0:11:02", "remaining_time": "0:19:37", "throughput": 1843.75, "total_tokens": 1222360} |
| {"current_steps": 2980, "total_steps": 8260, "loss": 0.1342, "lr": 4.0346540269105546e-05, "epoch": 3.6077481840193704, "percentage": 36.08, "elapsed_time": "0:11:04", "remaining_time": "0:19:36", "throughput": 1844.19, "total_tokens": 1224568} |
| {"current_steps": 2985, "total_steps": 8260, "loss": 0.2002, "lr": 4.030480547366297e-05, "epoch": 3.6138014527845037, "percentage": 36.14, "elapsed_time": "0:11:05", "remaining_time": "0:19:35", "throughput": 1844.57, "total_tokens": 1226648} |
| {"current_steps": 2990, "total_steps": 8260, "loss": 0.1495, "lr": 4.026300234650979e-05, "epoch": 3.619854721549637, "percentage": 36.2, "elapsed_time": "0:11:05", "remaining_time": "0:19:33", "throughput": 1844.86, "total_tokens": 1228600} |
| {"current_steps": 2995, "total_steps": 8260, "loss": 0.07, "lr": 4.022113107428536e-05, "epoch": 3.62590799031477, "percentage": 36.26, "elapsed_time": "0:11:06", "remaining_time": "0:19:32", "throughput": 1845.08, "total_tokens": 1230616} |
| {"current_steps": 3000, "total_steps": 8260, "loss": 0.1121, "lr": 4.0179191843933286e-05, "epoch": 3.6319612590799033, "percentage": 36.32, "elapsed_time": "0:11:07", "remaining_time": "0:19:31", "throughput": 1845.31, "total_tokens": 1232632} |
| {"current_steps": 3005, "total_steps": 8260, "loss": 0.0738, "lr": 4.013718484270061e-05, "epoch": 3.638014527845036, "percentage": 36.38, "elapsed_time": "0:11:08", "remaining_time": "0:19:29", "throughput": 1845.49, "total_tokens": 1234552} |
| {"current_steps": 3010, "total_steps": 8260, "loss": 0.0654, "lr": 4.009511025813694e-05, "epoch": 3.6440677966101696, "percentage": 36.44, "elapsed_time": "0:11:09", "remaining_time": "0:19:28", "throughput": 1845.99, "total_tokens": 1236728} |
| {"current_steps": 3015, "total_steps": 8260, "loss": 0.1617, "lr": 4.005296827809362e-05, "epoch": 3.650121065375303, "percentage": 36.5, "elapsed_time": "0:11:10", "remaining_time": "0:19:27", "throughput": 1846.25, "total_tokens": 1238776} |
| {"current_steps": 3020, "total_steps": 8260, "loss": 0.0987, "lr": 4.001075909072289e-05, "epoch": 3.656174334140436, "percentage": 36.56, "elapsed_time": "0:11:11", "remaining_time": "0:19:25", "throughput": 1846.55, "total_tokens": 1240856} |
| {"current_steps": 3025, "total_steps": 8260, "loss": 0.1065, "lr": 3.9968482884477075e-05, "epoch": 3.662227602905569, "percentage": 36.62, "elapsed_time": "0:11:12", "remaining_time": "0:19:24", "throughput": 1846.86, "total_tokens": 1242936} |
| {"current_steps": 3030, "total_steps": 8260, "loss": 0.1145, "lr": 3.992613984810771e-05, "epoch": 3.668280871670702, "percentage": 36.68, "elapsed_time": "0:11:14", "remaining_time": "0:19:23", "throughput": 1847.25, "total_tokens": 1245080} |
| {"current_steps": 3035, "total_steps": 8260, "loss": 0.0348, "lr": 3.988373017066469e-05, "epoch": 3.6743341404358354, "percentage": 36.74, "elapsed_time": "0:11:15", "remaining_time": "0:19:22", "throughput": 1847.59, "total_tokens": 1247192} |
| {"current_steps": 3040, "total_steps": 8260, "loss": 0.0321, "lr": 3.984125404149548e-05, "epoch": 3.6803874092009687, "percentage": 36.8, "elapsed_time": "0:11:16", "remaining_time": "0:19:20", "throughput": 1847.93, "total_tokens": 1249240} |
| {"current_steps": 3045, "total_steps": 8260, "loss": 0.1196, "lr": 3.9798711650244194e-05, "epoch": 3.6864406779661016, "percentage": 36.86, "elapsed_time": "0:11:17", "remaining_time": "0:19:19", "throughput": 1848.22, "total_tokens": 1251320} |
| {"current_steps": 3050, "total_steps": 8260, "loss": 0.175, "lr": 3.9756103186850825e-05, "epoch": 3.692493946731235, "percentage": 36.92, "elapsed_time": "0:11:18", "remaining_time": "0:19:18", "throughput": 1848.49, "total_tokens": 1253336} |
| {"current_steps": 3055, "total_steps": 8260, "loss": 0.1921, "lr": 3.971342884155033e-05, "epoch": 3.698547215496368, "percentage": 36.99, "elapsed_time": "0:11:19", "remaining_time": "0:19:16", "throughput": 1848.78, "total_tokens": 1255352} |
| {"current_steps": 3060, "total_steps": 8260, "loss": 0.0838, "lr": 3.9670688804871815e-05, "epoch": 3.7046004842615012, "percentage": 37.05, "elapsed_time": "0:11:19", "remaining_time": "0:19:15", "throughput": 1849.0, "total_tokens": 1257272} |
| {"current_steps": 3065, "total_steps": 8260, "loss": 0.0452, "lr": 3.96278832676377e-05, "epoch": 3.7106537530266346, "percentage": 37.11, "elapsed_time": "0:11:20", "remaining_time": "0:19:14", "throughput": 1849.38, "total_tokens": 1259416} |
| {"current_steps": 3070, "total_steps": 8260, "loss": 0.1143, "lr": 3.958501242096283e-05, "epoch": 3.7167070217917675, "percentage": 37.17, "elapsed_time": "0:11:22", "remaining_time": "0:19:12", "throughput": 1849.67, "total_tokens": 1261496} |
| {"current_steps": 3075, "total_steps": 8260, "loss": 0.0709, "lr": 3.954207645625365e-05, "epoch": 3.722760290556901, "percentage": 37.23, "elapsed_time": "0:11:23", "remaining_time": "0:19:11", "throughput": 1849.84, "total_tokens": 1263480} |
| {"current_steps": 3080, "total_steps": 8260, "loss": 0.0714, "lr": 3.949907556520731e-05, "epoch": 3.7288135593220337, "percentage": 37.29, "elapsed_time": "0:11:24", "remaining_time": "0:19:10", "throughput": 1850.16, "total_tokens": 1265528} |
| {"current_steps": 3085, "total_steps": 8260, "loss": 0.0492, "lr": 3.9456009939810886e-05, "epoch": 3.734866828087167, "percentage": 37.35, "elapsed_time": "0:11:24", "remaining_time": "0:19:09", "throughput": 1850.39, "total_tokens": 1267512} |
| {"current_steps": 3090, "total_steps": 8260, "loss": 0.0756, "lr": 3.941287977234043e-05, "epoch": 3.7409200968523004, "percentage": 37.41, "elapsed_time": "0:11:26", "remaining_time": "0:19:07", "throughput": 1850.64, "total_tokens": 1269560} |
| {"current_steps": 3095, "total_steps": 8260, "loss": 0.0716, "lr": 3.9369685255360175e-05, "epoch": 3.7469733656174333, "percentage": 37.47, "elapsed_time": "0:11:27", "remaining_time": "0:19:06", "throughput": 1850.91, "total_tokens": 1271640} |
| {"current_steps": 3100, "total_steps": 8260, "loss": 0.0675, "lr": 3.9326426581721663e-05, "epoch": 3.7530266343825667, "percentage": 37.53, "elapsed_time": "0:11:28", "remaining_time": "0:19:05", "throughput": 1851.24, "total_tokens": 1273688} |
| {"current_steps": 3105, "total_steps": 8260, "loss": 0.0597, "lr": 3.9283103944562874e-05, "epoch": 3.7590799031476996, "percentage": 37.59, "elapsed_time": "0:11:29", "remaining_time": "0:19:03", "throughput": 1851.53, "total_tokens": 1275768} |
| {"current_steps": 3110, "total_steps": 8260, "loss": 0.0593, "lr": 3.923971753730735e-05, "epoch": 3.765133171912833, "percentage": 37.65, "elapsed_time": "0:11:30", "remaining_time": "0:19:02", "throughput": 1851.77, "total_tokens": 1277752} |
| {"current_steps": 3115, "total_steps": 8260, "loss": 0.1274, "lr": 3.919626755366338e-05, "epoch": 3.7711864406779663, "percentage": 37.71, "elapsed_time": "0:11:31", "remaining_time": "0:19:01", "throughput": 1852.07, "total_tokens": 1279864} |
| {"current_steps": 3120, "total_steps": 8260, "loss": 0.0935, "lr": 3.9152754187623086e-05, "epoch": 3.777239709443099, "percentage": 37.77, "elapsed_time": "0:11:32", "remaining_time": "0:19:00", "throughput": 1852.33, "total_tokens": 1281880} |
| {"current_steps": 3125, "total_steps": 8260, "loss": 0.0675, "lr": 3.910917763346156e-05, "epoch": 3.7832929782082325, "percentage": 37.83, "elapsed_time": "0:11:33", "remaining_time": "0:18:58", "throughput": 1852.58, "total_tokens": 1283928} |
| {"current_steps": 3130, "total_steps": 8260, "loss": 0.2427, "lr": 3.906553808573604e-05, "epoch": 3.7893462469733654, "percentage": 37.89, "elapsed_time": "0:11:34", "remaining_time": "0:18:57", "throughput": 1852.86, "total_tokens": 1285944} |
| {"current_steps": 3135, "total_steps": 8260, "loss": 0.1861, "lr": 3.9021835739285e-05, "epoch": 3.7953995157384988, "percentage": 37.95, "elapsed_time": "0:11:35", "remaining_time": "0:18:56", "throughput": 1853.1, "total_tokens": 1287928} |
| {"current_steps": 3140, "total_steps": 8260, "loss": 0.2217, "lr": 3.897807078922728e-05, "epoch": 3.801452784503632, "percentage": 38.01, "elapsed_time": "0:11:36", "remaining_time": "0:18:54", "throughput": 1853.45, "total_tokens": 1290008} |
| {"current_steps": 3145, "total_steps": 8260, "loss": 0.0406, "lr": 3.8934243430961265e-05, "epoch": 3.807506053268765, "percentage": 38.08, "elapsed_time": "0:11:36", "remaining_time": "0:18:53", "throughput": 1853.86, "total_tokens": 1292120} |
| {"current_steps": 3150, "total_steps": 8260, "loss": 0.0805, "lr": 3.889035386016393e-05, "epoch": 3.8135593220338984, "percentage": 38.14, "elapsed_time": "0:11:37", "remaining_time": "0:18:52", "throughput": 1854.02, "total_tokens": 1294040} |
| {"current_steps": 3155, "total_steps": 8260, "loss": 0.0865, "lr": 3.8846402272790044e-05, "epoch": 3.8196125907990313, "percentage": 38.2, "elapsed_time": "0:11:38", "remaining_time": "0:18:50", "throughput": 1854.18, "total_tokens": 1296024} |
| {"current_steps": 3160, "total_steps": 8260, "loss": 0.1828, "lr": 3.8802388865071246e-05, "epoch": 3.8256658595641646, "percentage": 38.26, "elapsed_time": "0:11:39", "remaining_time": "0:18:49", "throughput": 1854.47, "total_tokens": 1298104} |
| {"current_steps": 3165, "total_steps": 8260, "loss": 0.0467, "lr": 3.875831383351519e-05, "epoch": 3.831719128329298, "percentage": 38.32, "elapsed_time": "0:11:41", "remaining_time": "0:18:48", "throughput": 1854.83, "total_tokens": 1300248} |
| {"current_steps": 3170, "total_steps": 8260, "loss": 0.168, "lr": 3.8714177374904683e-05, "epoch": 3.837772397094431, "percentage": 38.38, "elapsed_time": "0:11:41", "remaining_time": "0:18:47", "throughput": 1854.96, "total_tokens": 1302104} |
| {"current_steps": 3175, "total_steps": 8260, "loss": 0.0779, "lr": 3.866997968629674e-05, "epoch": 3.843825665859564, "percentage": 38.44, "elapsed_time": "0:11:42", "remaining_time": "0:18:45", "throughput": 1855.15, "total_tokens": 1304056} |
| {"current_steps": 3180, "total_steps": 8260, "loss": 0.0653, "lr": 3.86257209650218e-05, "epoch": 3.849878934624697, "percentage": 38.5, "elapsed_time": "0:11:43", "remaining_time": "0:18:44", "throughput": 1855.39, "total_tokens": 1306104} |
| {"current_steps": 3185, "total_steps": 8260, "loss": 0.0618, "lr": 3.858140140868276e-05, "epoch": 3.8559322033898304, "percentage": 38.56, "elapsed_time": "0:11:44", "remaining_time": "0:18:43", "throughput": 1855.58, "total_tokens": 1308056} |
| {"current_steps": 3190, "total_steps": 8260, "loss": 0.041, "lr": 3.853702121515416e-05, "epoch": 3.861985472154964, "percentage": 38.62, "elapsed_time": "0:11:45", "remaining_time": "0:18:41", "throughput": 1855.83, "total_tokens": 1310104} |
| {"current_steps": 3195, "total_steps": 8260, "loss": 0.0828, "lr": 3.849258058258124e-05, "epoch": 3.8680387409200967, "percentage": 38.68, "elapsed_time": "0:11:46", "remaining_time": "0:18:40", "throughput": 1856.08, "total_tokens": 1312152} |
| {"current_steps": 3200, "total_steps": 8260, "loss": 0.0624, "lr": 3.84480797093791e-05, "epoch": 3.87409200968523, "percentage": 38.74, "elapsed_time": "0:11:47", "remaining_time": "0:18:39", "throughput": 1856.45, "total_tokens": 1314328} |
| {"current_steps": 3205, "total_steps": 8260, "loss": 0.0636, "lr": 3.8403518794231795e-05, "epoch": 3.880145278450363, "percentage": 38.8, "elapsed_time": "0:11:48", "remaining_time": "0:18:38", "throughput": 1856.65, "total_tokens": 1316344} |
| {"current_steps": 3210, "total_steps": 8260, "loss": 0.0317, "lr": 3.835889803609145e-05, "epoch": 3.8861985472154963, "percentage": 38.86, "elapsed_time": "0:11:49", "remaining_time": "0:18:36", "throughput": 1856.92, "total_tokens": 1318360} |
| {"current_steps": 3215, "total_steps": 8260, "loss": 0.0081, "lr": 3.8314217634177376e-05, "epoch": 3.8922518159806296, "percentage": 38.92, "elapsed_time": "0:11:50", "remaining_time": "0:18:35", "throughput": 1857.18, "total_tokens": 1320376} |
| {"current_steps": 3220, "total_steps": 8260, "loss": 0.0817, "lr": 3.826947778797516e-05, "epoch": 3.898305084745763, "percentage": 38.98, "elapsed_time": "0:11:51", "remaining_time": "0:18:34", "throughput": 1857.64, "total_tokens": 1322616} |
| {"current_steps": 3225, "total_steps": 8260, "loss": 0.2346, "lr": 3.822467869723581e-05, "epoch": 3.904358353510896, "percentage": 39.04, "elapsed_time": "0:11:52", "remaining_time": "0:18:33", "throughput": 1857.94, "total_tokens": 1324664} |
| {"current_steps": 3230, "total_steps": 8260, "loss": 0.0838, "lr": 3.8179820561974835e-05, "epoch": 3.910411622276029, "percentage": 39.1, "elapsed_time": "0:11:53", "remaining_time": "0:18:31", "throughput": 1858.13, "total_tokens": 1326616} |
| {"current_steps": 3235, "total_steps": 8260, "loss": 0.0545, "lr": 3.813490358247137e-05, "epoch": 3.916464891041162, "percentage": 39.16, "elapsed_time": "0:11:54", "remaining_time": "0:18:30", "throughput": 1858.48, "total_tokens": 1328760} |
| {"current_steps": 3240, "total_steps": 8260, "loss": 0.0741, "lr": 3.8089927959267255e-05, "epoch": 3.9225181598062955, "percentage": 39.23, "elapsed_time": "0:11:55", "remaining_time": "0:18:29", "throughput": 1858.9, "total_tokens": 1330968} |
| {"current_steps": 3245, "total_steps": 8260, "loss": 0.1291, "lr": 3.8044893893166203e-05, "epoch": 3.928571428571429, "percentage": 39.29, "elapsed_time": "0:11:56", "remaining_time": "0:18:28", "throughput": 1859.12, "total_tokens": 1332952} |
| {"current_steps": 3250, "total_steps": 8260, "loss": 0.1834, "lr": 3.799980158523279e-05, "epoch": 3.9346246973365617, "percentage": 39.35, "elapsed_time": "0:11:58", "remaining_time": "0:18:26", "throughput": 1859.42, "total_tokens": 1335064} |
| {"current_steps": 3255, "total_steps": 8260, "loss": 0.0621, "lr": 3.795465123679167e-05, "epoch": 3.940677966101695, "percentage": 39.41, "elapsed_time": "0:11:58", "remaining_time": "0:18:25", "throughput": 1859.64, "total_tokens": 1337080} |
| {"current_steps": 3260, "total_steps": 8260, "loss": 0.1765, "lr": 3.790944304942664e-05, "epoch": 3.946731234866828, "percentage": 39.47, "elapsed_time": "0:11:59", "remaining_time": "0:18:24", "throughput": 1859.89, "total_tokens": 1339096} |
| {"current_steps": 3265, "total_steps": 8260, "loss": 0.1071, "lr": 3.7864177224979696e-05, "epoch": 3.9527845036319613, "percentage": 39.53, "elapsed_time": "0:12:00", "remaining_time": "0:18:23", "throughput": 1860.0, "total_tokens": 1341048} |
| {"current_steps": 3270, "total_steps": 8260, "loss": 0.0683, "lr": 3.781885396555019e-05, "epoch": 3.9588377723970947, "percentage": 39.59, "elapsed_time": "0:12:02", "remaining_time": "0:18:21", "throughput": 1860.35, "total_tokens": 1343224} |
| {"current_steps": 3275, "total_steps": 8260, "loss": 0.1431, "lr": 3.777347347349392e-05, "epoch": 3.9648910411622276, "percentage": 39.65, "elapsed_time": "0:12:03", "remaining_time": "0:18:20", "throughput": 1860.64, "total_tokens": 1345272} |
| {"current_steps": 3280, "total_steps": 8260, "loss": 0.1878, "lr": 3.7728035951422166e-05, "epoch": 3.970944309927361, "percentage": 39.71, "elapsed_time": "0:12:04", "remaining_time": "0:18:19", "throughput": 1860.98, "total_tokens": 1347416} |
| {"current_steps": 3285, "total_steps": 8260, "loss": 0.0579, "lr": 3.7682541602200875e-05, "epoch": 3.976997578692494, "percentage": 39.77, "elapsed_time": "0:12:05", "remaining_time": "0:18:18", "throughput": 1861.27, "total_tokens": 1349464} |
| {"current_steps": 3290, "total_steps": 8260, "loss": 0.0689, "lr": 3.76369906289497e-05, "epoch": 3.983050847457627, "percentage": 39.83, "elapsed_time": "0:12:05", "remaining_time": "0:18:16", "throughput": 1861.5, "total_tokens": 1351352} |
| {"current_steps": 3295, "total_steps": 8260, "loss": 0.0563, "lr": 3.7591383235041086e-05, "epoch": 3.9891041162227605, "percentage": 39.89, "elapsed_time": "0:12:06", "remaining_time": "0:18:15", "throughput": 1861.69, "total_tokens": 1353368} |
| {"current_steps": 3300, "total_steps": 8260, "loss": 0.1231, "lr": 3.75457196240994e-05, "epoch": 3.9951573849878934, "percentage": 39.95, "elapsed_time": "0:12:07", "remaining_time": "0:18:14", "throughput": 1861.99, "total_tokens": 1355416} |
| {"current_steps": 3304, "total_steps": 8260, "eval_loss": 0.11559449136257172, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:12:16", "remaining_time": "0:18:25", "throughput": 1841.48, "total_tokens": 1356744} |
| {"current_steps": 3305, "total_steps": 8260, "loss": 0.0638, "lr": 3.7500000000000003e-05, "epoch": 4.001210653753026, "percentage": 40.01, "elapsed_time": "0:12:18", "remaining_time": "0:18:27", "throughput": 1837.83, "total_tokens": 1357192} |
| {"current_steps": 3310, "total_steps": 8260, "loss": 0.0195, "lr": 3.7454224566868327e-05, "epoch": 4.00726392251816, "percentage": 40.07, "elapsed_time": "0:12:19", "remaining_time": "0:18:25", "throughput": 1838.11, "total_tokens": 1359272} |
| {"current_steps": 3315, "total_steps": 8260, "loss": 0.0688, "lr": 3.7408393529078985e-05, "epoch": 4.013317191283293, "percentage": 40.13, "elapsed_time": "0:12:20", "remaining_time": "0:18:24", "throughput": 1838.5, "total_tokens": 1361480} |
| {"current_steps": 3320, "total_steps": 8260, "loss": 0.0809, "lr": 3.7362507091254836e-05, "epoch": 4.019370460048426, "percentage": 40.19, "elapsed_time": "0:12:21", "remaining_time": "0:18:23", "throughput": 1838.76, "total_tokens": 1363560} |
| {"current_steps": 3325, "total_steps": 8260, "loss": 0.2098, "lr": 3.7316565458266114e-05, "epoch": 4.02542372881356, "percentage": 40.25, "elapsed_time": "0:12:22", "remaining_time": "0:18:22", "throughput": 1839.02, "total_tokens": 1365640} |
| {"current_steps": 3330, "total_steps": 8260, "loss": 0.1072, "lr": 3.727056883522945e-05, "epoch": 4.031476997578692, "percentage": 40.31, "elapsed_time": "0:12:23", "remaining_time": "0:18:20", "throughput": 1839.35, "total_tokens": 1367720} |
| {"current_steps": 3335, "total_steps": 8260, "loss": 0.1912, "lr": 3.722451742750701e-05, "epoch": 4.0375302663438255, "percentage": 40.38, "elapsed_time": "0:12:24", "remaining_time": "0:18:19", "throughput": 1839.57, "total_tokens": 1369704} |
| {"current_steps": 3340, "total_steps": 8260, "loss": 0.0719, "lr": 3.717841144070556e-05, "epoch": 4.043583535108959, "percentage": 40.44, "elapsed_time": "0:12:25", "remaining_time": "0:18:18", "throughput": 1839.89, "total_tokens": 1371816} |
| {"current_steps": 3345, "total_steps": 8260, "loss": 0.0235, "lr": 3.713225108067553e-05, "epoch": 4.049636803874092, "percentage": 40.5, "elapsed_time": "0:12:26", "remaining_time": "0:18:16", "throughput": 1840.13, "total_tokens": 1373800} |
| {"current_steps": 3350, "total_steps": 8260, "loss": 0.0315, "lr": 3.708603655351012e-05, "epoch": 4.0556900726392255, "percentage": 40.56, "elapsed_time": "0:12:27", "remaining_time": "0:18:15", "throughput": 1840.39, "total_tokens": 1375752} |
| {"current_steps": 3355, "total_steps": 8260, "loss": 0.0479, "lr": 3.7039768065544395e-05, "epoch": 4.061743341404358, "percentage": 40.62, "elapsed_time": "0:12:28", "remaining_time": "0:18:14", "throughput": 1840.74, "total_tokens": 1377896} |
| {"current_steps": 3360, "total_steps": 8260, "loss": 0.04, "lr": 3.69934458233543e-05, "epoch": 4.067796610169491, "percentage": 40.68, "elapsed_time": "0:12:29", "remaining_time": "0:18:13", "throughput": 1840.95, "total_tokens": 1379912} |
| {"current_steps": 3365, "total_steps": 8260, "loss": 0.0979, "lr": 3.694707003375579e-05, "epoch": 4.073849878934625, "percentage": 40.74, "elapsed_time": "0:12:30", "remaining_time": "0:18:11", "throughput": 1841.19, "total_tokens": 1381960} |
| {"current_steps": 3370, "total_steps": 8260, "loss": 0.056, "lr": 3.690064090380392e-05, "epoch": 4.079903147699758, "percentage": 40.8, "elapsed_time": "0:12:31", "remaining_time": "0:18:10", "throughput": 1841.51, "total_tokens": 1384072} |
| {"current_steps": 3375, "total_steps": 8260, "loss": 0.1003, "lr": 3.685415864079185e-05, "epoch": 4.085956416464891, "percentage": 40.86, "elapsed_time": "0:12:32", "remaining_time": "0:18:09", "throughput": 1841.79, "total_tokens": 1386152} |
| {"current_steps": 3380, "total_steps": 8260, "loss": 0.0748, "lr": 3.680762345225001e-05, "epoch": 4.092009685230024, "percentage": 40.92, "elapsed_time": "0:12:33", "remaining_time": "0:18:08", "throughput": 1842.06, "total_tokens": 1388168} |
| {"current_steps": 3385, "total_steps": 8260, "loss": 0.0064, "lr": 3.676103554594511e-05, "epoch": 4.098062953995157, "percentage": 40.98, "elapsed_time": "0:12:34", "remaining_time": "0:18:06", "throughput": 1842.28, "total_tokens": 1390152} |
| {"current_steps": 3390, "total_steps": 8260, "loss": 0.1631, "lr": 3.671439512987921e-05, "epoch": 4.1041162227602905, "percentage": 41.04, "elapsed_time": "0:12:35", "remaining_time": "0:18:05", "throughput": 1842.47, "total_tokens": 1392168} |
| {"current_steps": 3395, "total_steps": 8260, "loss": 0.0671, "lr": 3.666770241228883e-05, "epoch": 4.110169491525424, "percentage": 41.1, "elapsed_time": "0:12:36", "remaining_time": "0:18:04", "throughput": 1842.89, "total_tokens": 1394376} |
| {"current_steps": 3400, "total_steps": 8260, "loss": 0.0143, "lr": 3.6620957601644016e-05, "epoch": 4.116222760290557, "percentage": 41.16, "elapsed_time": "0:12:37", "remaining_time": "0:18:02", "throughput": 1843.24, "total_tokens": 1396520} |
| {"current_steps": 3405, "total_steps": 8260, "loss": 0.1034, "lr": 3.657416090664737e-05, "epoch": 4.12227602905569, "percentage": 41.22, "elapsed_time": "0:12:38", "remaining_time": "0:18:01", "throughput": 1843.52, "total_tokens": 1398600} |
| {"current_steps": 3410, "total_steps": 8260, "loss": 0.1193, "lr": 3.652731253623315e-05, "epoch": 4.128329297820823, "percentage": 41.28, "elapsed_time": "0:12:39", "remaining_time": "0:18:00", "throughput": 1843.8, "total_tokens": 1400584} |
| {"current_steps": 3415, "total_steps": 8260, "loss": 0.0511, "lr": 3.648041269956634e-05, "epoch": 4.134382566585956, "percentage": 41.34, "elapsed_time": "0:12:40", "remaining_time": "0:17:59", "throughput": 1844.18, "total_tokens": 1402760} |
| {"current_steps": 3420, "total_steps": 8260, "loss": 0.0324, "lr": 3.6433461606041695e-05, "epoch": 4.14043583535109, "percentage": 41.4, "elapsed_time": "0:12:41", "remaining_time": "0:17:57", "throughput": 1844.53, "total_tokens": 1404936} |
| {"current_steps": 3425, "total_steps": 8260, "loss": 0.1215, "lr": 3.6386459465282824e-05, "epoch": 4.146489104116223, "percentage": 41.46, "elapsed_time": "0:12:42", "remaining_time": "0:17:56", "throughput": 1844.69, "total_tokens": 1406920} |
| {"current_steps": 3430, "total_steps": 8260, "loss": 0.0579, "lr": 3.6339406487141255e-05, "epoch": 4.1525423728813555, "percentage": 41.53, "elapsed_time": "0:12:43", "remaining_time": "0:17:55", "throughput": 1844.9, "total_tokens": 1408840} |
| {"current_steps": 3435, "total_steps": 8260, "loss": 0.0937, "lr": 3.6292302881695464e-05, "epoch": 4.158595641646489, "percentage": 41.59, "elapsed_time": "0:12:44", "remaining_time": "0:17:53", "throughput": 1845.09, "total_tokens": 1410696} |
| {"current_steps": 3440, "total_steps": 8260, "loss": 0.1445, "lr": 3.6245148859249996e-05, "epoch": 4.164648910411622, "percentage": 41.65, "elapsed_time": "0:12:45", "remaining_time": "0:17:52", "throughput": 1845.26, "total_tokens": 1412680} |
| {"current_steps": 3445, "total_steps": 8260, "loss": 0.1322, "lr": 3.619794463033447e-05, "epoch": 4.170702179176756, "percentage": 41.71, "elapsed_time": "0:12:46", "remaining_time": "0:17:51", "throughput": 1845.55, "total_tokens": 1414728} |
| {"current_steps": 3450, "total_steps": 8260, "loss": 0.0257, "lr": 3.6150690405702685e-05, "epoch": 4.176755447941889, "percentage": 41.77, "elapsed_time": "0:12:47", "remaining_time": "0:17:50", "throughput": 1845.75, "total_tokens": 1416712} |
| {"current_steps": 3455, "total_steps": 8260, "loss": 0.0354, "lr": 3.6103386396331635e-05, "epoch": 4.182808716707021, "percentage": 41.83, "elapsed_time": "0:12:48", "remaining_time": "0:17:48", "throughput": 1846.2, "total_tokens": 1418952} |
| {"current_steps": 3460, "total_steps": 8260, "loss": 0.0882, "lr": 3.605603281342061e-05, "epoch": 4.188861985472155, "percentage": 41.89, "elapsed_time": "0:12:49", "remaining_time": "0:17:47", "throughput": 1846.47, "total_tokens": 1421032} |
| {"current_steps": 3465, "total_steps": 8260, "loss": 0.0351, "lr": 3.6008629868390204e-05, "epoch": 4.194915254237288, "percentage": 41.95, "elapsed_time": "0:12:50", "remaining_time": "0:17:46", "throughput": 1846.66, "total_tokens": 1423048} |
| {"current_steps": 3470, "total_steps": 8260, "loss": 0.114, "lr": 3.5961177772881434e-05, "epoch": 4.200968523002421, "percentage": 42.01, "elapsed_time": "0:12:51", "remaining_time": "0:17:45", "throughput": 1846.99, "total_tokens": 1425192} |
| {"current_steps": 3475, "total_steps": 8260, "loss": 0.1092, "lr": 3.591367673875472e-05, "epoch": 4.207021791767555, "percentage": 42.07, "elapsed_time": "0:12:52", "remaining_time": "0:17:43", "throughput": 1847.3, "total_tokens": 1427304} |
| {"current_steps": 3480, "total_steps": 8260, "loss": 0.2393, "lr": 3.5866126978089025e-05, "epoch": 4.213075060532688, "percentage": 42.13, "elapsed_time": "0:12:53", "remaining_time": "0:17:42", "throughput": 1847.53, "total_tokens": 1429288} |
| {"current_steps": 3485, "total_steps": 8260, "loss": 0.0641, "lr": 3.5818528703180826e-05, "epoch": 4.219128329297821, "percentage": 42.19, "elapsed_time": "0:12:54", "remaining_time": "0:17:41", "throughput": 1847.83, "total_tokens": 1431400} |
| {"current_steps": 3490, "total_steps": 8260, "loss": 0.1746, "lr": 3.577088212654322e-05, "epoch": 4.225181598062954, "percentage": 42.25, "elapsed_time": "0:12:55", "remaining_time": "0:17:40", "throughput": 1848.2, "total_tokens": 1433576} |
| {"current_steps": 3495, "total_steps": 8260, "loss": 0.068, "lr": 3.572318746090496e-05, "epoch": 4.231234866828087, "percentage": 42.31, "elapsed_time": "0:12:56", "remaining_time": "0:17:38", "throughput": 1848.42, "total_tokens": 1435560} |
| {"current_steps": 3500, "total_steps": 8260, "loss": 0.0978, "lr": 3.5675444919209486e-05, "epoch": 4.237288135593221, "percentage": 42.37, "elapsed_time": "0:12:57", "remaining_time": "0:17:37", "throughput": 1848.81, "total_tokens": 1437672} |
| {"current_steps": 3505, "total_steps": 8260, "loss": 0.0869, "lr": 3.5627654714614e-05, "epoch": 4.243341404358354, "percentage": 42.43, "elapsed_time": "0:12:58", "remaining_time": "0:17:36", "throughput": 1849.08, "total_tokens": 1439656} |
| {"current_steps": 3510, "total_steps": 8260, "loss": 0.0452, "lr": 3.557981706048852e-05, "epoch": 4.249394673123486, "percentage": 42.49, "elapsed_time": "0:12:59", "remaining_time": "0:17:34", "throughput": 1849.24, "total_tokens": 1441608} |
| {"current_steps": 3515, "total_steps": 8260, "loss": 0.1002, "lr": 3.5531932170414896e-05, "epoch": 4.25544794188862, "percentage": 42.55, "elapsed_time": "0:13:00", "remaining_time": "0:17:33", "throughput": 1849.42, "total_tokens": 1443624} |
| {"current_steps": 3520, "total_steps": 8260, "loss": 0.0465, "lr": 3.5484000258185876e-05, "epoch": 4.261501210653753, "percentage": 42.62, "elapsed_time": "0:13:01", "remaining_time": "0:17:32", "throughput": 1849.68, "total_tokens": 1445736} |
| {"current_steps": 3525, "total_steps": 8260, "loss": 0.1034, "lr": 3.5436021537804144e-05, "epoch": 4.267554479418886, "percentage": 42.68, "elapsed_time": "0:13:02", "remaining_time": "0:17:31", "throughput": 1850.02, "total_tokens": 1447880} |
| {"current_steps": 3530, "total_steps": 8260, "loss": 0.1288, "lr": 3.538799622348139e-05, "epoch": 4.27360774818402, "percentage": 42.74, "elapsed_time": "0:13:03", "remaining_time": "0:17:30", "throughput": 1850.2, "total_tokens": 1449896} |
| {"current_steps": 3535, "total_steps": 8260, "loss": 0.0677, "lr": 3.5339924529637304e-05, "epoch": 4.279661016949152, "percentage": 42.8, "elapsed_time": "0:13:04", "remaining_time": "0:17:28", "throughput": 1850.35, "total_tokens": 1451880} |
| {"current_steps": 3540, "total_steps": 8260, "loss": 0.1307, "lr": 3.529180667089868e-05, "epoch": 4.285714285714286, "percentage": 42.86, "elapsed_time": "0:13:05", "remaining_time": "0:17:27", "throughput": 1850.7, "total_tokens": 1453992} |
| {"current_steps": 3545, "total_steps": 8260, "loss": 0.058, "lr": 3.52436428620984e-05, "epoch": 4.291767554479419, "percentage": 42.92, "elapsed_time": "0:13:06", "remaining_time": "0:17:26", "throughput": 1850.9, "total_tokens": 1455912} |
| {"current_steps": 3550, "total_steps": 8260, "loss": 0.1341, "lr": 3.5195433318274516e-05, "epoch": 4.297820823244552, "percentage": 42.98, "elapsed_time": "0:13:07", "remaining_time": "0:17:24", "throughput": 1851.19, "total_tokens": 1458024} |
| {"current_steps": 3555, "total_steps": 8260, "loss": 0.0551, "lr": 3.514717825466925e-05, "epoch": 4.303874092009686, "percentage": 43.04, "elapsed_time": "0:13:08", "remaining_time": "0:17:23", "throughput": 1851.36, "total_tokens": 1459976} |
| {"current_steps": 3560, "total_steps": 8260, "loss": 0.1853, "lr": 3.509887788672809e-05, "epoch": 4.309927360774818, "percentage": 43.1, "elapsed_time": "0:13:09", "remaining_time": "0:17:22", "throughput": 1851.68, "total_tokens": 1462120} |
| {"current_steps": 3565, "total_steps": 8260, "loss": 0.1765, "lr": 3.5050532430098774e-05, "epoch": 4.315980629539951, "percentage": 43.16, "elapsed_time": "0:13:10", "remaining_time": "0:17:21", "throughput": 1851.89, "total_tokens": 1464104} |
| {"current_steps": 3570, "total_steps": 8260, "loss": 0.1376, "lr": 3.500214210063035e-05, "epoch": 4.322033898305085, "percentage": 43.22, "elapsed_time": "0:13:11", "remaining_time": "0:17:19", "throughput": 1852.23, "total_tokens": 1466216} |
| {"current_steps": 3575, "total_steps": 8260, "loss": 0.1046, "lr": 3.495370711437221e-05, "epoch": 4.328087167070218, "percentage": 43.28, "elapsed_time": "0:13:12", "remaining_time": "0:17:18", "throughput": 1852.46, "total_tokens": 1468264} |
| {"current_steps": 3580, "total_steps": 8260, "loss": 0.1052, "lr": 3.490522768757316e-05, "epoch": 4.3341404358353515, "percentage": 43.34, "elapsed_time": "0:13:13", "remaining_time": "0:17:17", "throughput": 1852.78, "total_tokens": 1470408} |
| {"current_steps": 3585, "total_steps": 8260, "loss": 0.0727, "lr": 3.485670403668036e-05, "epoch": 4.340193704600484, "percentage": 43.4, "elapsed_time": "0:13:14", "remaining_time": "0:17:16", "throughput": 1853.05, "total_tokens": 1472392} |
| {"current_steps": 3590, "total_steps": 8260, "loss": 0.063, "lr": 3.480813637833846e-05, "epoch": 4.346246973365617, "percentage": 43.46, "elapsed_time": "0:13:15", "remaining_time": "0:17:14", "throughput": 1853.31, "total_tokens": 1474504} |
| {"current_steps": 3595, "total_steps": 8260, "loss": 0.1132, "lr": 3.475952492938859e-05, "epoch": 4.352300242130751, "percentage": 43.52, "elapsed_time": "0:13:16", "remaining_time": "0:17:13", "throughput": 1853.59, "total_tokens": 1476616} |
| {"current_steps": 3600, "total_steps": 8260, "loss": 0.0321, "lr": 3.471086990686737e-05, "epoch": 4.358353510895884, "percentage": 43.58, "elapsed_time": "0:13:17", "remaining_time": "0:17:12", "throughput": 1853.87, "total_tokens": 1478664} |
| {"current_steps": 3605, "total_steps": 8260, "loss": 0.1957, "lr": 3.466217152800598e-05, "epoch": 4.364406779661017, "percentage": 43.64, "elapsed_time": "0:13:18", "remaining_time": "0:17:11", "throughput": 1854.02, "total_tokens": 1480648} |
| {"current_steps": 3610, "total_steps": 8260, "loss": 0.0888, "lr": 3.461343001022919e-05, "epoch": 4.37046004842615, "percentage": 43.7, "elapsed_time": "0:13:19", "remaining_time": "0:17:09", "throughput": 1854.31, "total_tokens": 1482760} |
| {"current_steps": 3615, "total_steps": 8260, "loss": 0.0389, "lr": 3.456464557115433e-05, "epoch": 4.376513317191283, "percentage": 43.77, "elapsed_time": "0:13:20", "remaining_time": "0:17:08", "throughput": 1854.52, "total_tokens": 1484744} |
| {"current_steps": 3620, "total_steps": 8260, "loss": 0.0371, "lr": 3.45158184285904e-05, "epoch": 4.3825665859564165, "percentage": 43.83, "elapsed_time": "0:13:21", "remaining_time": "0:17:07", "throughput": 1854.65, "total_tokens": 1486728} |
| {"current_steps": 3625, "total_steps": 8260, "loss": 0.0869, "lr": 3.446694880053704e-05, "epoch": 4.38861985472155, "percentage": 43.89, "elapsed_time": "0:13:22", "remaining_time": "0:17:06", "throughput": 1854.89, "total_tokens": 1488808} |
| {"current_steps": 3630, "total_steps": 8260, "loss": 0.0762, "lr": 3.441803690518359e-05, "epoch": 4.394673123486683, "percentage": 43.95, "elapsed_time": "0:13:23", "remaining_time": "0:17:05", "throughput": 1855.22, "total_tokens": 1490984} |
| {"current_steps": 3635, "total_steps": 8260, "loss": 0.0991, "lr": 3.4369082960908084e-05, "epoch": 4.400726392251816, "percentage": 44.01, "elapsed_time": "0:13:24", "remaining_time": "0:17:03", "throughput": 1855.45, "total_tokens": 1493000} |
| {"current_steps": 3640, "total_steps": 8260, "loss": 0.1266, "lr": 3.432008718627631e-05, "epoch": 4.406779661016949, "percentage": 44.07, "elapsed_time": "0:13:25", "remaining_time": "0:17:02", "throughput": 1855.57, "total_tokens": 1494920} |
| {"current_steps": 3645, "total_steps": 8260, "loss": 0.1099, "lr": 3.4271049800040805e-05, "epoch": 4.412832929782082, "percentage": 44.13, "elapsed_time": "0:13:26", "remaining_time": "0:17:01", "throughput": 1855.76, "total_tokens": 1496904} |
| {"current_steps": 3650, "total_steps": 8260, "loss": 0.0628, "lr": 3.42219710211399e-05, "epoch": 4.418886198547216, "percentage": 44.19, "elapsed_time": "0:13:27", "remaining_time": "0:16:59", "throughput": 1855.91, "total_tokens": 1498792} |
| {"current_steps": 3655, "total_steps": 8260, "loss": 0.0539, "lr": 3.417285106869673e-05, "epoch": 4.424939467312349, "percentage": 44.25, "elapsed_time": "0:13:28", "remaining_time": "0:16:58", "throughput": 1856.12, "total_tokens": 1500840} |
| {"current_steps": 3660, "total_steps": 8260, "loss": 0.0558, "lr": 3.4123690162018246e-05, "epoch": 4.4309927360774815, "percentage": 44.31, "elapsed_time": "0:13:29", "remaining_time": "0:16:57", "throughput": 1856.39, "total_tokens": 1502888} |
| {"current_steps": 3665, "total_steps": 8260, "loss": 0.0915, "lr": 3.407448852059426e-05, "epoch": 4.437046004842615, "percentage": 44.37, "elapsed_time": "0:13:30", "remaining_time": "0:16:56", "throughput": 1856.62, "total_tokens": 1504904} |
| {"current_steps": 3670, "total_steps": 8260, "loss": 0.0843, "lr": 3.4025246364096455e-05, "epoch": 4.443099273607748, "percentage": 44.43, "elapsed_time": "0:13:31", "remaining_time": "0:16:54", "throughput": 1856.8, "total_tokens": 1506824} |
| {"current_steps": 3675, "total_steps": 8260, "loss": 0.1102, "lr": 3.397596391237739e-05, "epoch": 4.4491525423728815, "percentage": 44.49, "elapsed_time": "0:13:32", "remaining_time": "0:16:53", "throughput": 1857.01, "total_tokens": 1508872} |
| {"current_steps": 3680, "total_steps": 8260, "loss": 0.1804, "lr": 3.3926641385469556e-05, "epoch": 4.455205811138015, "percentage": 44.55, "elapsed_time": "0:13:33", "remaining_time": "0:16:52", "throughput": 1857.17, "total_tokens": 1510824} |
| {"current_steps": 3685, "total_steps": 8260, "loss": 0.1798, "lr": 3.387727900358435e-05, "epoch": 4.461259079903147, "percentage": 44.61, "elapsed_time": "0:13:34", "remaining_time": "0:16:51", "throughput": 1857.46, "total_tokens": 1512968} |
| {"current_steps": 3690, "total_steps": 8260, "loss": 0.065, "lr": 3.38278769871111e-05, "epoch": 4.467312348668281, "percentage": 44.67, "elapsed_time": "0:13:35", "remaining_time": "0:16:50", "throughput": 1857.79, "total_tokens": 1515144} |
| {"current_steps": 3695, "total_steps": 8260, "loss": 0.0292, "lr": 3.377843555661612e-05, "epoch": 4.473365617433414, "percentage": 44.73, "elapsed_time": "0:13:36", "remaining_time": "0:16:48", "throughput": 1857.99, "total_tokens": 1517192} |
| {"current_steps": 3700, "total_steps": 8260, "loss": 0.0747, "lr": 3.372895493284167e-05, "epoch": 4.479418886198547, "percentage": 44.79, "elapsed_time": "0:13:37", "remaining_time": "0:16:47", "throughput": 1858.35, "total_tokens": 1519400} |
| {"current_steps": 3705, "total_steps": 8260, "loss": 0.0675, "lr": 3.367943533670501e-05, "epoch": 4.485472154963681, "percentage": 44.85, "elapsed_time": "0:13:38", "remaining_time": "0:16:46", "throughput": 1858.5, "total_tokens": 1521416} |
| {"current_steps": 3710, "total_steps": 8260, "loss": 0.1236, "lr": 3.3629876989297405e-05, "epoch": 4.491525423728813, "percentage": 44.92, "elapsed_time": "0:13:39", "remaining_time": "0:16:45", "throughput": 1858.64, "total_tokens": 1523240} |
| {"current_steps": 3715, "total_steps": 8260, "loss": 0.0881, "lr": 3.3580280111883125e-05, "epoch": 4.4975786924939465, "percentage": 44.98, "elapsed_time": "0:13:40", "remaining_time": "0:16:43", "throughput": 1858.83, "total_tokens": 1525288} |
| {"current_steps": 3717, "total_steps": 8260, "eval_loss": 0.11460315436124802, "epoch": 4.5, "percentage": 45.0, "elapsed_time": "0:13:49", "remaining_time": "0:16:53", "throughput": 1840.7, "total_tokens": 1526088} |
| {"current_steps": 3720, "total_steps": 8260, "loss": 0.1083, "lr": 3.3530644925898465e-05, "epoch": 4.50363196125908, "percentage": 45.04, "elapsed_time": "0:13:51", "remaining_time": "0:16:54", "throughput": 1837.62, "total_tokens": 1527304} |
| {"current_steps": 3725, "total_steps": 8260, "loss": 0.1781, "lr": 3.348097165295076e-05, "epoch": 4.509685230024213, "percentage": 45.1, "elapsed_time": "0:13:52", "remaining_time": "0:16:53", "throughput": 1837.88, "total_tokens": 1529384} |
| {"current_steps": 3730, "total_steps": 8260, "loss": 0.0696, "lr": 3.34312605148174e-05, "epoch": 4.5157384987893465, "percentage": 45.16, "elapsed_time": "0:13:53", "remaining_time": "0:16:51", "throughput": 1838.14, "total_tokens": 1531464} |
| {"current_steps": 3735, "total_steps": 8260, "loss": 0.1117, "lr": 3.338151173344483e-05, "epoch": 4.521791767554479, "percentage": 45.22, "elapsed_time": "0:13:54", "remaining_time": "0:16:50", "throughput": 1838.48, "total_tokens": 1533608} |
| {"current_steps": 3740, "total_steps": 8260, "loss": 0.0971, "lr": 3.333172553094754e-05, "epoch": 4.527845036319612, "percentage": 45.28, "elapsed_time": "0:13:55", "remaining_time": "0:16:49", "throughput": 1838.73, "total_tokens": 1535656} |
| {"current_steps": 3745, "total_steps": 8260, "loss": 0.0946, "lr": 3.328190212960712e-05, "epoch": 4.533898305084746, "percentage": 45.34, "elapsed_time": "0:13:56", "remaining_time": "0:16:48", "throughput": 1838.94, "total_tokens": 1537640} |
| {"current_steps": 3750, "total_steps": 8260, "loss": 0.1283, "lr": 3.323204175187125e-05, "epoch": 4.539951573849879, "percentage": 45.4, "elapsed_time": "0:13:57", "remaining_time": "0:16:46", "throughput": 1839.07, "total_tokens": 1539592} |
| {"current_steps": 3755, "total_steps": 8260, "loss": 0.082, "lr": 3.318214462035266e-05, "epoch": 4.546004842615012, "percentage": 45.46, "elapsed_time": "0:13:58", "remaining_time": "0:16:45", "throughput": 1839.28, "total_tokens": 1541576} |
| {"current_steps": 3760, "total_steps": 8260, "loss": 0.0886, "lr": 3.3132210957828226e-05, "epoch": 4.552058111380145, "percentage": 45.52, "elapsed_time": "0:13:59", "remaining_time": "0:16:44", "throughput": 1839.39, "total_tokens": 1543464} |
| {"current_steps": 3765, "total_steps": 8260, "loss": 0.1542, "lr": 3.3082240987237875e-05, "epoch": 4.558111380145278, "percentage": 45.58, "elapsed_time": "0:14:00", "remaining_time": "0:16:42", "throughput": 1839.55, "total_tokens": 1545416} |
| {"current_steps": 3770, "total_steps": 8260, "loss": 0.0205, "lr": 3.3032234931683684e-05, "epoch": 4.5641646489104115, "percentage": 45.64, "elapsed_time": "0:14:01", "remaining_time": "0:16:41", "throughput": 1839.78, "total_tokens": 1547432} |
| {"current_steps": 3775, "total_steps": 8260, "loss": 0.0411, "lr": 3.2982193014428805e-05, "epoch": 4.570217917675545, "percentage": 45.7, "elapsed_time": "0:14:02", "remaining_time": "0:16:40", "throughput": 1840.15, "total_tokens": 1549576} |
| {"current_steps": 3780, "total_steps": 8260, "loss": 0.044, "lr": 3.2932115458896515e-05, "epoch": 4.576271186440678, "percentage": 45.76, "elapsed_time": "0:14:03", "remaining_time": "0:16:39", "throughput": 1840.5, "total_tokens": 1551688} |
| {"current_steps": 3785, "total_steps": 8260, "loss": 0.1011, "lr": 3.2882002488669204e-05, "epoch": 4.582324455205811, "percentage": 45.82, "elapsed_time": "0:14:04", "remaining_time": "0:16:37", "throughput": 1840.7, "total_tokens": 1553672} |
| {"current_steps": 3790, "total_steps": 8260, "loss": 0.0388, "lr": 3.28318543274874e-05, "epoch": 4.588377723970944, "percentage": 45.88, "elapsed_time": "0:14:05", "remaining_time": "0:16:36", "throughput": 1840.9, "total_tokens": 1555720} |
| {"current_steps": 3795, "total_steps": 8260, "loss": 0.107, "lr": 3.278167119924872e-05, "epoch": 4.594430992736077, "percentage": 45.94, "elapsed_time": "0:14:06", "remaining_time": "0:16:35", "throughput": 1841.05, "total_tokens": 1557672} |
| {"current_steps": 3800, "total_steps": 8260, "loss": 0.0351, "lr": 3.27314533280069e-05, "epoch": 4.600484261501211, "percentage": 46.0, "elapsed_time": "0:14:07", "remaining_time": "0:16:34", "throughput": 1841.43, "total_tokens": 1559880} |
| {"current_steps": 3805, "total_steps": 8260, "loss": 0.0482, "lr": 3.268120093797082e-05, "epoch": 4.606537530266344, "percentage": 46.07, "elapsed_time": "0:14:08", "remaining_time": "0:16:32", "throughput": 1841.73, "total_tokens": 1561960} |
| {"current_steps": 3810, "total_steps": 8260, "loss": 0.1032, "lr": 3.263091425350345e-05, "epoch": 4.6125907990314765, "percentage": 46.13, "elapsed_time": "0:14:09", "remaining_time": "0:16:31", "throughput": 1841.93, "total_tokens": 1563880} |
| {"current_steps": 3815, "total_steps": 8260, "loss": 0.1256, "lr": 3.258059349912089e-05, "epoch": 4.61864406779661, "percentage": 46.19, "elapsed_time": "0:14:10", "remaining_time": "0:16:30", "throughput": 1842.16, "total_tokens": 1565896} |
| {"current_steps": 3820, "total_steps": 8260, "loss": 0.1922, "lr": 3.253023889949135e-05, "epoch": 4.624697336561743, "percentage": 46.25, "elapsed_time": "0:14:11", "remaining_time": "0:16:29", "throughput": 1842.49, "total_tokens": 1568040} |
| {"current_steps": 3825, "total_steps": 8260, "loss": 0.1464, "lr": 3.247985067943414e-05, "epoch": 4.6307506053268765, "percentage": 46.31, "elapsed_time": "0:14:12", "remaining_time": "0:16:27", "throughput": 1842.68, "total_tokens": 1570056} |
| {"current_steps": 3830, "total_steps": 8260, "loss": 0.1453, "lr": 3.2429429063918696e-05, "epoch": 4.63680387409201, "percentage": 46.37, "elapsed_time": "0:14:13", "remaining_time": "0:16:26", "throughput": 1842.96, "total_tokens": 1572168} |
| {"current_steps": 3835, "total_steps": 8260, "loss": 0.0454, "lr": 3.2378974278063534e-05, "epoch": 4.642857142857143, "percentage": 46.43, "elapsed_time": "0:14:14", "remaining_time": "0:16:25", "throughput": 1843.23, "total_tokens": 1574216} |
| {"current_steps": 3840, "total_steps": 8260, "loss": 0.0568, "lr": 3.232848654713528e-05, "epoch": 4.648910411622276, "percentage": 46.49, "elapsed_time": "0:14:15", "remaining_time": "0:16:24", "throughput": 1843.44, "total_tokens": 1576168} |
| {"current_steps": 3845, "total_steps": 8260, "loss": 0.1196, "lr": 3.227796609654765e-05, "epoch": 4.654963680387409, "percentage": 46.55, "elapsed_time": "0:14:15", "remaining_time": "0:16:22", "throughput": 1843.64, "total_tokens": 1578152} |
| {"current_steps": 3850, "total_steps": 8260, "loss": 0.0543, "lr": 3.222741315186043e-05, "epoch": 4.661016949152542, "percentage": 46.61, "elapsed_time": "0:14:16", "remaining_time": "0:16:21", "throughput": 1843.81, "total_tokens": 1580104} |
| {"current_steps": 3855, "total_steps": 8260, "loss": 0.1408, "lr": 3.217682793877851e-05, "epoch": 4.667070217917676, "percentage": 46.67, "elapsed_time": "0:14:17", "remaining_time": "0:16:20", "throughput": 1843.92, "total_tokens": 1582056} |
| {"current_steps": 3860, "total_steps": 8260, "loss": 0.085, "lr": 3.212621068315081e-05, "epoch": 4.673123486682809, "percentage": 46.73, "elapsed_time": "0:14:19", "remaining_time": "0:16:19", "throughput": 1844.16, "total_tokens": 1584136} |
| {"current_steps": 3865, "total_steps": 8260, "loss": 0.0992, "lr": 3.207556161096935e-05, "epoch": 4.6791767554479415, "percentage": 46.79, "elapsed_time": "0:14:20", "remaining_time": "0:16:17", "throughput": 1844.37, "total_tokens": 1586184} |
| {"current_steps": 3870, "total_steps": 8260, "loss": 0.0705, "lr": 3.202488094836819e-05, "epoch": 4.685230024213075, "percentage": 46.85, "elapsed_time": "0:14:21", "remaining_time": "0:16:16", "throughput": 1844.65, "total_tokens": 1588296} |
| {"current_steps": 3875, "total_steps": 8260, "loss": 0.1287, "lr": 3.197416892162242e-05, "epoch": 4.691283292978208, "percentage": 46.91, "elapsed_time": "0:14:22", "remaining_time": "0:16:15", "throughput": 1845.02, "total_tokens": 1590504} |
| {"current_steps": 3880, "total_steps": 8260, "loss": 0.1111, "lr": 3.1923425757147175e-05, "epoch": 4.697336561743342, "percentage": 46.97, "elapsed_time": "0:14:23", "remaining_time": "0:16:14", "throughput": 1845.31, "total_tokens": 1592584} |
| {"current_steps": 3885, "total_steps": 8260, "loss": 0.1022, "lr": 3.1872651681496604e-05, "epoch": 4.703389830508475, "percentage": 47.03, "elapsed_time": "0:14:24", "remaining_time": "0:16:13", "throughput": 1845.59, "total_tokens": 1594728} |
| {"current_steps": 3890, "total_steps": 8260, "loss": 0.0574, "lr": 3.182184692136287e-05, "epoch": 4.709443099273607, "percentage": 47.09, "elapsed_time": "0:14:25", "remaining_time": "0:16:11", "throughput": 1845.85, "total_tokens": 1596776} |
| {"current_steps": 3895, "total_steps": 8260, "loss": 0.0519, "lr": 3.177101170357513e-05, "epoch": 4.715496368038741, "percentage": 47.15, "elapsed_time": "0:14:26", "remaining_time": "0:16:10", "throughput": 1846.19, "total_tokens": 1598984} |
| {"current_steps": 3900, "total_steps": 8260, "loss": 0.0556, "lr": 3.1720146255098535e-05, "epoch": 4.721549636803874, "percentage": 47.22, "elapsed_time": "0:14:27", "remaining_time": "0:16:09", "throughput": 1846.46, "total_tokens": 1601096} |
| {"current_steps": 3905, "total_steps": 8260, "loss": 0.1542, "lr": 3.16692508030332e-05, "epoch": 4.727602905569007, "percentage": 47.28, "elapsed_time": "0:14:28", "remaining_time": "0:16:08", "throughput": 1846.86, "total_tokens": 1603336} |
| {"current_steps": 3910, "total_steps": 8260, "loss": 0.0325, "lr": 3.16183255746132e-05, "epoch": 4.733656174334141, "percentage": 47.34, "elapsed_time": "0:14:29", "remaining_time": "0:16:06", "throughput": 1847.06, "total_tokens": 1605320} |
| {"current_steps": 3915, "total_steps": 8260, "loss": 0.0394, "lr": 3.156737079720555e-05, "epoch": 4.739709443099273, "percentage": 47.4, "elapsed_time": "0:14:30", "remaining_time": "0:16:05", "throughput": 1847.25, "total_tokens": 1607304} |
| {"current_steps": 3920, "total_steps": 8260, "loss": 0.1401, "lr": 3.151638669830919e-05, "epoch": 4.745762711864407, "percentage": 47.46, "elapsed_time": "0:14:31", "remaining_time": "0:16:04", "throughput": 1847.48, "total_tokens": 1609384} |
| {"current_steps": 3925, "total_steps": 8260, "loss": 0.1393, "lr": 3.1465373505554e-05, "epoch": 4.75181598062954, "percentage": 47.52, "elapsed_time": "0:14:32", "remaining_time": "0:16:03", "throughput": 1847.61, "total_tokens": 1611304} |
| {"current_steps": 3930, "total_steps": 8260, "loss": 0.1648, "lr": 3.14143314466997e-05, "epoch": 4.757869249394673, "percentage": 47.58, "elapsed_time": "0:14:33", "remaining_time": "0:16:01", "throughput": 1847.76, "total_tokens": 1613192} |
| {"current_steps": 3935, "total_steps": 8260, "loss": 0.0329, "lr": 3.136326074963494e-05, "epoch": 4.763922518159807, "percentage": 47.64, "elapsed_time": "0:14:34", "remaining_time": "0:16:00", "throughput": 1848.01, "total_tokens": 1615304} |
| {"current_steps": 3940, "total_steps": 8260, "loss": 0.073, "lr": 3.131216164237622e-05, "epoch": 4.76997578692494, "percentage": 47.7, "elapsed_time": "0:14:35", "remaining_time": "0:15:59", "throughput": 1848.2, "total_tokens": 1617288} |
| {"current_steps": 3945, "total_steps": 8260, "loss": 0.0754, "lr": 3.1261034353066884e-05, "epoch": 4.776029055690072, "percentage": 47.76, "elapsed_time": "0:14:36", "remaining_time": "0:15:58", "throughput": 1848.45, "total_tokens": 1619336} |
| {"current_steps": 3950, "total_steps": 8260, "loss": 0.0947, "lr": 3.1209879109976064e-05, "epoch": 4.782082324455206, "percentage": 47.82, "elapsed_time": "0:14:37", "remaining_time": "0:15:57", "throughput": 1848.68, "total_tokens": 1621416} |
| {"current_steps": 3955, "total_steps": 8260, "loss": 0.0464, "lr": 3.115869614149776e-05, "epoch": 4.788135593220339, "percentage": 47.88, "elapsed_time": "0:14:38", "remaining_time": "0:15:55", "throughput": 1848.9, "total_tokens": 1623432} |
| {"current_steps": 3960, "total_steps": 8260, "loss": 0.2139, "lr": 3.1107485676149714e-05, "epoch": 4.7941888619854724, "percentage": 47.94, "elapsed_time": "0:14:39", "remaining_time": "0:15:54", "throughput": 1849.12, "total_tokens": 1625448} |
| {"current_steps": 3965, "total_steps": 8260, "loss": 0.1268, "lr": 3.105624794257245e-05, "epoch": 4.800242130750606, "percentage": 48.0, "elapsed_time": "0:14:40", "remaining_time": "0:15:53", "throughput": 1849.45, "total_tokens": 1627624} |
| {"current_steps": 3970, "total_steps": 8260, "loss": 0.1084, "lr": 3.100498316952823e-05, "epoch": 4.806295399515738, "percentage": 48.06, "elapsed_time": "0:14:41", "remaining_time": "0:15:52", "throughput": 1849.78, "total_tokens": 1629800} |
| {"current_steps": 3975, "total_steps": 8260, "loss": 0.0368, "lr": 3.095369158590006e-05, "epoch": 4.812348668280872, "percentage": 48.12, "elapsed_time": "0:14:42", "remaining_time": "0:15:50", "throughput": 1849.95, "total_tokens": 1631720} |
| {"current_steps": 3980, "total_steps": 8260, "loss": 0.1241, "lr": 3.09023734206906e-05, "epoch": 4.818401937046005, "percentage": 48.18, "elapsed_time": "0:14:42", "remaining_time": "0:15:49", "throughput": 1850.2, "total_tokens": 1633704} |
| {"current_steps": 3985, "total_steps": 8260, "loss": 0.148, "lr": 3.085102890302125e-05, "epoch": 4.824455205811138, "percentage": 48.24, "elapsed_time": "0:14:43", "remaining_time": "0:15:48", "throughput": 1850.41, "total_tokens": 1635656} |
| {"current_steps": 3990, "total_steps": 8260, "loss": 0.1334, "lr": 3.079965826213102e-05, "epoch": 4.830508474576272, "percentage": 48.31, "elapsed_time": "0:14:44", "remaining_time": "0:15:47", "throughput": 1850.65, "total_tokens": 1637736} |
| {"current_steps": 3995, "total_steps": 8260, "loss": 0.1053, "lr": 3.074826172737559e-05, "epoch": 4.836561743341404, "percentage": 48.37, "elapsed_time": "0:14:45", "remaining_time": "0:15:45", "throughput": 1850.89, "total_tokens": 1639816} |
| {"current_steps": 4000, "total_steps": 8260, "loss": 0.0672, "lr": 3.0696839528226206e-05, "epoch": 4.842615012106537, "percentage": 48.43, "elapsed_time": "0:14:46", "remaining_time": "0:15:44", "throughput": 1851.07, "total_tokens": 1641736} |
| {"current_steps": 4005, "total_steps": 8260, "loss": 0.1232, "lr": 3.064539189426874e-05, "epoch": 4.848668280871671, "percentage": 48.49, "elapsed_time": "0:14:47", "remaining_time": "0:15:43", "throughput": 1851.3, "total_tokens": 1643656} |
| {"current_steps": 4010, "total_steps": 8260, "loss": 0.1346, "lr": 3.059391905520259e-05, "epoch": 4.854721549636804, "percentage": 48.55, "elapsed_time": "0:14:48", "remaining_time": "0:15:42", "throughput": 1851.5, "total_tokens": 1645736} |
| {"current_steps": 4015, "total_steps": 8260, "loss": 0.0779, "lr": 3.054242124083972e-05, "epoch": 4.8607748184019375, "percentage": 48.61, "elapsed_time": "0:14:49", "remaining_time": "0:15:40", "throughput": 1851.7, "total_tokens": 1647688} |
| {"current_steps": 4020, "total_steps": 8260, "loss": 0.0782, "lr": 3.0490898681103575e-05, "epoch": 4.86682808716707, "percentage": 48.67, "elapsed_time": "0:14:50", "remaining_time": "0:15:39", "throughput": 1851.91, "total_tokens": 1649768} |
| {"current_steps": 4025, "total_steps": 8260, "loss": 0.0757, "lr": 3.0439351606028094e-05, "epoch": 4.872881355932203, "percentage": 48.73, "elapsed_time": "0:14:51", "remaining_time": "0:15:38", "throughput": 1852.08, "total_tokens": 1651688} |
| {"current_steps": 4030, "total_steps": 8260, "loss": 0.1056, "lr": 3.0387780245756655e-05, "epoch": 4.878934624697337, "percentage": 48.79, "elapsed_time": "0:14:52", "remaining_time": "0:15:37", "throughput": 1852.41, "total_tokens": 1653896} |
| {"current_steps": 4035, "total_steps": 8260, "loss": 0.0121, "lr": 3.0336184830541093e-05, "epoch": 4.88498789346247, "percentage": 48.85, "elapsed_time": "0:14:53", "remaining_time": "0:15:35", "throughput": 1852.67, "total_tokens": 1656008} |
| {"current_steps": 4040, "total_steps": 8260, "loss": 0.1195, "lr": 3.028456559074061e-05, "epoch": 4.891041162227603, "percentage": 48.91, "elapsed_time": "0:14:54", "remaining_time": "0:15:34", "throughput": 1852.89, "total_tokens": 1658088} |
| {"current_steps": 4045, "total_steps": 8260, "loss": 0.0596, "lr": 3.0232922756820804e-05, "epoch": 4.897094430992736, "percentage": 48.97, "elapsed_time": "0:14:55", "remaining_time": "0:15:33", "throughput": 1853.12, "total_tokens": 1660200} |
| {"current_steps": 4050, "total_steps": 8260, "loss": 0.1441, "lr": 3.0181256559352587e-05, "epoch": 4.903147699757869, "percentage": 49.03, "elapsed_time": "0:14:56", "remaining_time": "0:15:32", "throughput": 1853.49, "total_tokens": 1662440} |
| {"current_steps": 4055, "total_steps": 8260, "loss": 0.1405, "lr": 3.0129567229011214e-05, "epoch": 4.9092009685230025, "percentage": 49.09, "elapsed_time": "0:14:57", "remaining_time": "0:15:31", "throughput": 1853.79, "total_tokens": 1664552} |
| {"current_steps": 4060, "total_steps": 8260, "loss": 0.0743, "lr": 3.0077854996575184e-05, "epoch": 4.915254237288136, "percentage": 49.15, "elapsed_time": "0:14:58", "remaining_time": "0:15:29", "throughput": 1854.03, "total_tokens": 1666600} |
| {"current_steps": 4065, "total_steps": 8260, "loss": 0.0927, "lr": 3.0026120092925293e-05, "epoch": 4.921307506053269, "percentage": 49.21, "elapsed_time": "0:14:59", "remaining_time": "0:15:28", "throughput": 1854.33, "total_tokens": 1668776} |
| {"current_steps": 4070, "total_steps": 8260, "loss": 0.0887, "lr": 2.9974362749043512e-05, "epoch": 4.927360774818402, "percentage": 49.27, "elapsed_time": "0:15:00", "remaining_time": "0:15:27", "throughput": 1854.63, "total_tokens": 1670952} |
| {"current_steps": 4075, "total_steps": 8260, "loss": 0.0242, "lr": 2.9922583196012037e-05, "epoch": 4.933414043583535, "percentage": 49.33, "elapsed_time": "0:15:02", "remaining_time": "0:15:26", "throughput": 1854.91, "total_tokens": 1673128} |
| {"current_steps": 4080, "total_steps": 8260, "loss": 0.0493, "lr": 2.9870781665012204e-05, "epoch": 4.939467312348668, "percentage": 49.39, "elapsed_time": "0:15:02", "remaining_time": "0:15:25", "throughput": 1855.08, "total_tokens": 1675112} |
| {"current_steps": 4085, "total_steps": 8260, "loss": 0.0568, "lr": 2.981895838732348e-05, "epoch": 4.945520581113802, "percentage": 49.46, "elapsed_time": "0:15:03", "remaining_time": "0:15:23", "throughput": 1855.27, "total_tokens": 1677096} |
| {"current_steps": 4090, "total_steps": 8260, "loss": 0.1336, "lr": 2.9767113594322426e-05, "epoch": 4.951573849878935, "percentage": 49.52, "elapsed_time": "0:15:04", "remaining_time": "0:15:22", "throughput": 1855.44, "total_tokens": 1679080} |
| {"current_steps": 4095, "total_steps": 8260, "loss": 0.0935, "lr": 2.9715247517481655e-05, "epoch": 4.9576271186440675, "percentage": 49.58, "elapsed_time": "0:15:05", "remaining_time": "0:15:21", "throughput": 1855.56, "total_tokens": 1681000} |
| {"current_steps": 4100, "total_steps": 8260, "loss": 0.07, "lr": 2.96633603883688e-05, "epoch": 4.963680387409201, "percentage": 49.64, "elapsed_time": "0:15:06", "remaining_time": "0:15:20", "throughput": 1855.8, "total_tokens": 1683048} |
| {"current_steps": 4105, "total_steps": 8260, "loss": 0.0536, "lr": 2.961145243864552e-05, "epoch": 4.969733656174334, "percentage": 49.7, "elapsed_time": "0:15:07", "remaining_time": "0:15:18", "throughput": 1856.04, "total_tokens": 1685160} |
| {"current_steps": 4110, "total_steps": 8260, "loss": 0.162, "lr": 2.9559523900066395e-05, "epoch": 4.9757869249394675, "percentage": 49.76, "elapsed_time": "0:15:08", "remaining_time": "0:15:17", "throughput": 1856.18, "total_tokens": 1687048} |
| {"current_steps": 4115, "total_steps": 8260, "loss": 0.1296, "lr": 2.9507575004477955e-05, "epoch": 4.981840193704601, "percentage": 49.82, "elapsed_time": "0:15:09", "remaining_time": "0:15:16", "throughput": 1856.33, "total_tokens": 1689000} |
| {"current_steps": 4120, "total_steps": 8260, "loss": 0.0889, "lr": 2.9455605983817598e-05, "epoch": 4.987893462469733, "percentage": 49.88, "elapsed_time": "0:15:10", "remaining_time": "0:15:15", "throughput": 1856.55, "total_tokens": 1691112} |
| {"current_steps": 4125, "total_steps": 8260, "loss": 0.0816, "lr": 2.9403617070112587e-05, "epoch": 4.993946731234867, "percentage": 49.94, "elapsed_time": "0:15:11", "remaining_time": "0:15:14", "throughput": 1856.73, "total_tokens": 1693160} |
| {"current_steps": 4130, "total_steps": 8260, "loss": 0.1287, "lr": 2.9351608495479004e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:15:12", "remaining_time": "0:15:12", "throughput": 1856.64, "total_tokens": 1694912} |
| {"current_steps": 4130, "total_steps": 8260, "eval_loss": 0.1349330097436905, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:15:21", "remaining_time": "0:15:21", "throughput": 1840.29, "total_tokens": 1694912} |
| {"current_steps": 4135, "total_steps": 8260, "loss": 0.0696, "lr": 2.92995804921207e-05, "epoch": 5.006053268765133, "percentage": 50.06, "elapsed_time": "0:15:23", "remaining_time": "0:15:21", "throughput": 1837.38, "total_tokens": 1697056} |
| {"current_steps": 4140, "total_steps": 8260, "loss": 0.0345, "lr": 2.9247533292328273e-05, "epoch": 5.012106537530267, "percentage": 50.12, "elapsed_time": "0:15:24", "remaining_time": "0:15:20", "throughput": 1837.64, "total_tokens": 1699168} |
| {"current_steps": 4145, "total_steps": 8260, "loss": 0.0072, "lr": 2.9195467128478044e-05, "epoch": 5.018159806295399, "percentage": 50.18, "elapsed_time": "0:15:25", "remaining_time": "0:15:18", "throughput": 1837.82, "total_tokens": 1701152} |
| {"current_steps": 4150, "total_steps": 8260, "loss": 0.0488, "lr": 2.914338223303098e-05, "epoch": 5.0242130750605325, "percentage": 50.24, "elapsed_time": "0:15:26", "remaining_time": "0:15:17", "throughput": 1837.97, "total_tokens": 1703168} |
| {"current_steps": 4155, "total_steps": 8260, "loss": 0.0745, "lr": 2.9091278838531695e-05, "epoch": 5.030266343825666, "percentage": 50.3, "elapsed_time": "0:15:27", "remaining_time": "0:15:16", "throughput": 1838.11, "total_tokens": 1705120} |
| {"current_steps": 4160, "total_steps": 8260, "loss": 0.0971, "lr": 2.9039157177607383e-05, "epoch": 5.036319612590799, "percentage": 50.36, "elapsed_time": "0:15:28", "remaining_time": "0:15:15", "throughput": 1838.33, "total_tokens": 1707200} |
| {"current_steps": 4165, "total_steps": 8260, "loss": 0.1624, "lr": 2.8987017482966815e-05, "epoch": 5.0423728813559325, "percentage": 50.42, "elapsed_time": "0:15:29", "remaining_time": "0:15:13", "throughput": 1838.51, "total_tokens": 1709120} |
| {"current_steps": 4170, "total_steps": 8260, "loss": 0.0577, "lr": 2.893485998739926e-05, "epoch": 5.048426150121065, "percentage": 50.48, "elapsed_time": "0:15:30", "remaining_time": "0:15:12", "throughput": 1838.73, "total_tokens": 1711200} |
| {"current_steps": 4175, "total_steps": 8260, "loss": 0.1978, "lr": 2.8882684923773458e-05, "epoch": 5.054479418886198, "percentage": 50.54, "elapsed_time": "0:15:31", "remaining_time": "0:15:11", "throughput": 1838.98, "total_tokens": 1713248} |
| {"current_steps": 4180, "total_steps": 8260, "loss": 0.0265, "lr": 2.883049252503659e-05, "epoch": 5.060532687651332, "percentage": 50.61, "elapsed_time": "0:15:32", "remaining_time": "0:15:10", "throughput": 1839.23, "total_tokens": 1715296} |
| {"current_steps": 4185, "total_steps": 8260, "loss": 0.0852, "lr": 2.877828302421325e-05, "epoch": 5.066585956416465, "percentage": 50.67, "elapsed_time": "0:15:33", "remaining_time": "0:15:09", "throughput": 1839.41, "total_tokens": 1717280} |
| {"current_steps": 4190, "total_steps": 8260, "loss": 0.0791, "lr": 2.872605665440436e-05, "epoch": 5.072639225181598, "percentage": 50.73, "elapsed_time": "0:15:34", "remaining_time": "0:15:07", "throughput": 1839.53, "total_tokens": 1719136} |
| {"current_steps": 4195, "total_steps": 8260, "loss": 0.0549, "lr": 2.8673813648786196e-05, "epoch": 5.078692493946731, "percentage": 50.79, "elapsed_time": "0:15:35", "remaining_time": "0:15:06", "throughput": 1839.7, "total_tokens": 1721152} |
| {"current_steps": 4200, "total_steps": 8260, "loss": 0.0353, "lr": 2.862155424060926e-05, "epoch": 5.084745762711864, "percentage": 50.85, "elapsed_time": "0:15:36", "remaining_time": "0:15:05", "throughput": 1839.99, "total_tokens": 1723328} |
| {"current_steps": 4205, "total_steps": 8260, "loss": 0.0831, "lr": 2.856927866319733e-05, "epoch": 5.0907990314769975, "percentage": 50.91, "elapsed_time": "0:15:37", "remaining_time": "0:15:04", "throughput": 1840.1, "total_tokens": 1725280} |
| {"current_steps": 4210, "total_steps": 8260, "loss": 0.0419, "lr": 2.851698714994635e-05, "epoch": 5.096852300242131, "percentage": 50.97, "elapsed_time": "0:15:38", "remaining_time": "0:15:02", "throughput": 1840.34, "total_tokens": 1727328} |
| {"current_steps": 4215, "total_steps": 8260, "loss": 0.1896, "lr": 2.8464679934323424e-05, "epoch": 5.102905569007264, "percentage": 51.03, "elapsed_time": "0:15:39", "remaining_time": "0:15:01", "throughput": 1840.65, "total_tokens": 1729472} |
| {"current_steps": 4220, "total_steps": 8260, "loss": 0.0477, "lr": 2.841235724986575e-05, "epoch": 5.108958837772397, "percentage": 51.09, "elapsed_time": "0:15:40", "remaining_time": "0:15:00", "throughput": 1840.76, "total_tokens": 1731392} |
| {"current_steps": 4225, "total_steps": 8260, "loss": 0.1226, "lr": 2.8360019330179604e-05, "epoch": 5.11501210653753, "percentage": 51.15, "elapsed_time": "0:15:41", "remaining_time": "0:14:59", "throughput": 1840.99, "total_tokens": 1733472} |
| {"current_steps": 4230, "total_steps": 8260, "loss": 0.08, "lr": 2.8307666408939278e-05, "epoch": 5.121065375302663, "percentage": 51.21, "elapsed_time": "0:15:42", "remaining_time": "0:14:58", "throughput": 1841.18, "total_tokens": 1735520} |
| {"current_steps": 4235, "total_steps": 8260, "loss": 0.0885, "lr": 2.8255298719886043e-05, "epoch": 5.127118644067797, "percentage": 51.27, "elapsed_time": "0:15:43", "remaining_time": "0:14:56", "throughput": 1841.39, "total_tokens": 1737536} |
| {"current_steps": 4240, "total_steps": 8260, "loss": 0.0581, "lr": 2.820291649682709e-05, "epoch": 5.13317191283293, "percentage": 51.33, "elapsed_time": "0:15:44", "remaining_time": "0:14:55", "throughput": 1841.52, "total_tokens": 1739424} |
| {"current_steps": 4245, "total_steps": 8260, "loss": 0.1165, "lr": 2.8150519973634543e-05, "epoch": 5.1392251815980625, "percentage": 51.39, "elapsed_time": "0:15:45", "remaining_time": "0:14:54", "throughput": 1841.78, "total_tokens": 1741536} |
| {"current_steps": 4250, "total_steps": 8260, "loss": 0.0939, "lr": 2.809810938424432e-05, "epoch": 5.145278450363196, "percentage": 51.45, "elapsed_time": "0:15:46", "remaining_time": "0:14:53", "throughput": 1841.92, "total_tokens": 1743488} |
| {"current_steps": 4255, "total_steps": 8260, "loss": 0.1163, "lr": 2.804568496265516e-05, "epoch": 5.151331719128329, "percentage": 51.51, "elapsed_time": "0:15:47", "remaining_time": "0:14:51", "throughput": 1842.29, "total_tokens": 1745728} |
| {"current_steps": 4260, "total_steps": 8260, "loss": 0.0938, "lr": 2.799324694292757e-05, "epoch": 5.157384987893463, "percentage": 51.57, "elapsed_time": "0:15:48", "remaining_time": "0:14:50", "throughput": 1842.56, "total_tokens": 1747808} |
| {"current_steps": 4265, "total_steps": 8260, "loss": 0.0276, "lr": 2.7940795559182764e-05, "epoch": 5.163438256658596, "percentage": 51.63, "elapsed_time": "0:15:49", "remaining_time": "0:14:49", "throughput": 1842.76, "total_tokens": 1749856} |
| {"current_steps": 4270, "total_steps": 8260, "loss": 0.0462, "lr": 2.788833104560161e-05, "epoch": 5.169491525423728, "percentage": 51.69, "elapsed_time": "0:15:50", "remaining_time": "0:14:48", "throughput": 1843.0, "total_tokens": 1751904} |
| {"current_steps": 4275, "total_steps": 8260, "loss": 0.103, "lr": 2.7835853636423616e-05, "epoch": 5.175544794188862, "percentage": 51.76, "elapsed_time": "0:15:51", "remaining_time": "0:14:47", "throughput": 1843.23, "total_tokens": 1753984} |
| {"current_steps": 4280, "total_steps": 8260, "loss": 0.0398, "lr": 2.7783363565945847e-05, "epoch": 5.181598062953995, "percentage": 51.82, "elapsed_time": "0:15:52", "remaining_time": "0:14:45", "throughput": 1843.49, "total_tokens": 1756000} |
| {"current_steps": 4285, "total_steps": 8260, "loss": 0.1273, "lr": 2.773086106852192e-05, "epoch": 5.187651331719128, "percentage": 51.88, "elapsed_time": "0:15:53", "remaining_time": "0:14:44", "throughput": 1843.76, "total_tokens": 1758080} |
| {"current_steps": 4290, "total_steps": 8260, "loss": 0.0681, "lr": 2.7678346378560903e-05, "epoch": 5.193704600484262, "percentage": 51.94, "elapsed_time": "0:15:54", "remaining_time": "0:14:43", "throughput": 1844.03, "total_tokens": 1760224} |
| {"current_steps": 4295, "total_steps": 8260, "loss": 0.1305, "lr": 2.762581973052633e-05, "epoch": 5.199757869249395, "percentage": 52.0, "elapsed_time": "0:15:55", "remaining_time": "0:14:42", "throughput": 1844.15, "total_tokens": 1762176} |
| {"current_steps": 4300, "total_steps": 8260, "loss": 0.0543, "lr": 2.7573281358935104e-05, "epoch": 5.2058111380145276, "percentage": 52.06, "elapsed_time": "0:15:56", "remaining_time": "0:14:40", "throughput": 1844.43, "total_tokens": 1764352} |
| {"current_steps": 4305, "total_steps": 8260, "loss": 0.0769, "lr": 2.7520731498356494e-05, "epoch": 5.211864406779661, "percentage": 52.12, "elapsed_time": "0:15:57", "remaining_time": "0:14:39", "throughput": 1844.7, "total_tokens": 1766432} |
| {"current_steps": 4310, "total_steps": 8260, "loss": 0.2154, "lr": 2.746817038341103e-05, "epoch": 5.217917675544794, "percentage": 52.18, "elapsed_time": "0:15:58", "remaining_time": "0:14:38", "throughput": 1845.01, "total_tokens": 1768608} |
| {"current_steps": 4315, "total_steps": 8260, "loss": 0.0493, "lr": 2.7415598248769524e-05, "epoch": 5.223970944309928, "percentage": 52.24, "elapsed_time": "0:15:59", "remaining_time": "0:14:37", "throughput": 1845.28, "total_tokens": 1770752} |
| {"current_steps": 4320, "total_steps": 8260, "loss": 0.0376, "lr": 2.7363015329151965e-05, "epoch": 5.230024213075061, "percentage": 52.3, "elapsed_time": "0:16:00", "remaining_time": "0:14:36", "throughput": 1845.49, "total_tokens": 1772832} |
| {"current_steps": 4325, "total_steps": 8260, "loss": 0.0471, "lr": 2.73104218593265e-05, "epoch": 5.236077481840193, "percentage": 52.36, "elapsed_time": "0:16:01", "remaining_time": "0:14:34", "throughput": 1845.71, "total_tokens": 1774912} |
| {"current_steps": 4330, "total_steps": 8260, "loss": 0.1561, "lr": 2.7257818074108394e-05, "epoch": 5.242130750605327, "percentage": 52.42, "elapsed_time": "0:16:02", "remaining_time": "0:14:33", "throughput": 1845.96, "total_tokens": 1777024} |
| {"current_steps": 4335, "total_steps": 8260, "loss": 0.1141, "lr": 2.7205204208358947e-05, "epoch": 5.24818401937046, "percentage": 52.48, "elapsed_time": "0:16:03", "remaining_time": "0:14:32", "throughput": 1846.07, "total_tokens": 1779008} |
| {"current_steps": 4340, "total_steps": 8260, "loss": 0.0136, "lr": 2.715258049698446e-05, "epoch": 5.254237288135593, "percentage": 52.54, "elapsed_time": "0:16:04", "remaining_time": "0:14:31", "throughput": 1846.21, "total_tokens": 1780896} |
| {"current_steps": 4345, "total_steps": 8260, "loss": 0.0903, "lr": 2.709994717493523e-05, "epoch": 5.260290556900727, "percentage": 52.6, "elapsed_time": "0:16:05", "remaining_time": "0:14:30", "throughput": 1846.52, "total_tokens": 1783008} |
| {"current_steps": 4350, "total_steps": 8260, "loss": 0.01, "lr": 2.7047304477204416e-05, "epoch": 5.266343825665859, "percentage": 52.66, "elapsed_time": "0:16:06", "remaining_time": "0:14:28", "throughput": 1846.69, "total_tokens": 1784992} |
| {"current_steps": 4355, "total_steps": 8260, "loss": 0.1361, "lr": 2.6994652638827078e-05, "epoch": 5.272397094430993, "percentage": 52.72, "elapsed_time": "0:16:07", "remaining_time": "0:14:27", "throughput": 1846.9, "total_tokens": 1787008} |
| {"current_steps": 4360, "total_steps": 8260, "loss": 0.1002, "lr": 2.694199189487906e-05, "epoch": 5.278450363196126, "percentage": 52.78, "elapsed_time": "0:16:08", "remaining_time": "0:14:26", "throughput": 1847.14, "total_tokens": 1789120} |
| {"current_steps": 4365, "total_steps": 8260, "loss": 0.0743, "lr": 2.688932248047597e-05, "epoch": 5.284503631961259, "percentage": 52.85, "elapsed_time": "0:16:09", "remaining_time": "0:14:25", "throughput": 1847.39, "total_tokens": 1791232} |
| {"current_steps": 4370, "total_steps": 8260, "loss": 0.0525, "lr": 2.683664463077214e-05, "epoch": 5.290556900726393, "percentage": 52.91, "elapsed_time": "0:16:10", "remaining_time": "0:14:24", "throughput": 1847.69, "total_tokens": 1793440} |
| {"current_steps": 4375, "total_steps": 8260, "loss": 0.0567, "lr": 2.678395858095955e-05, "epoch": 5.296610169491525, "percentage": 52.97, "elapsed_time": "0:16:11", "remaining_time": "0:14:22", "throughput": 1847.86, "total_tokens": 1795488} |
| {"current_steps": 4380, "total_steps": 8260, "loss": 0.0108, "lr": 2.6731264566266795e-05, "epoch": 5.302663438256658, "percentage": 53.03, "elapsed_time": "0:16:12", "remaining_time": "0:14:21", "throughput": 1848.1, "total_tokens": 1797600} |
| {"current_steps": 4385, "total_steps": 8260, "loss": 0.0957, "lr": 2.6678562821958043e-05, "epoch": 5.308716707021792, "percentage": 53.09, "elapsed_time": "0:16:13", "remaining_time": "0:14:20", "throughput": 1848.28, "total_tokens": 1799584} |
| {"current_steps": 4390, "total_steps": 8260, "loss": 0.0536, "lr": 2.6625853583331943e-05, "epoch": 5.314769975786925, "percentage": 53.15, "elapsed_time": "0:16:14", "remaining_time": "0:14:19", "throughput": 1848.36, "total_tokens": 1801440} |
| {"current_steps": 4395, "total_steps": 8260, "loss": 0.0146, "lr": 2.6573137085720638e-05, "epoch": 5.3208232445520585, "percentage": 53.21, "elapsed_time": "0:16:15", "remaining_time": "0:14:17", "throughput": 1848.55, "total_tokens": 1803456} |
| {"current_steps": 4400, "total_steps": 8260, "loss": 0.1033, "lr": 2.6520413564488672e-05, "epoch": 5.326876513317191, "percentage": 53.27, "elapsed_time": "0:16:16", "remaining_time": "0:14:16", "throughput": 1848.68, "total_tokens": 1805440} |
| {"current_steps": 4405, "total_steps": 8260, "loss": 0.1228, "lr": 2.6467683255031918e-05, "epoch": 5.332929782082324, "percentage": 53.33, "elapsed_time": "0:16:17", "remaining_time": "0:14:15", "throughput": 1848.75, "total_tokens": 1807360} |
| {"current_steps": 4410, "total_steps": 8260, "loss": 0.0345, "lr": 2.6414946392776597e-05, "epoch": 5.338983050847458, "percentage": 53.39, "elapsed_time": "0:16:18", "remaining_time": "0:14:14", "throughput": 1848.92, "total_tokens": 1809344} |
| {"current_steps": 4415, "total_steps": 8260, "loss": 0.0332, "lr": 2.636220321317816e-05, "epoch": 5.345036319612591, "percentage": 53.45, "elapsed_time": "0:16:19", "remaining_time": "0:14:13", "throughput": 1849.18, "total_tokens": 1811456} |
| {"current_steps": 4420, "total_steps": 8260, "loss": 0.0782, "lr": 2.6309453951720274e-05, "epoch": 5.351089588377724, "percentage": 53.51, "elapsed_time": "0:16:20", "remaining_time": "0:14:11", "throughput": 1849.44, "total_tokens": 1813600} |
| {"current_steps": 4425, "total_steps": 8260, "loss": 0.2662, "lr": 2.625669884391377e-05, "epoch": 5.357142857142857, "percentage": 53.57, "elapsed_time": "0:16:21", "remaining_time": "0:14:10", "throughput": 1849.56, "total_tokens": 1815424} |
| {"current_steps": 4430, "total_steps": 8260, "loss": 0.0349, "lr": 2.6203938125295552e-05, "epoch": 5.36319612590799, "percentage": 53.63, "elapsed_time": "0:16:22", "remaining_time": "0:14:09", "throughput": 1849.85, "total_tokens": 1817600} |
| {"current_steps": 4435, "total_steps": 8260, "loss": 0.043, "lr": 2.6151172031427597e-05, "epoch": 5.3692493946731235, "percentage": 53.69, "elapsed_time": "0:16:23", "remaining_time": "0:14:08", "throughput": 1850.03, "total_tokens": 1819648} |
| {"current_steps": 4440, "total_steps": 8260, "loss": 0.1072, "lr": 2.609840079789588e-05, "epoch": 5.375302663438257, "percentage": 53.75, "elapsed_time": "0:16:24", "remaining_time": "0:14:07", "throughput": 1850.24, "total_tokens": 1821728} |
| {"current_steps": 4445, "total_steps": 8260, "loss": 0.0497, "lr": 2.604562466030931e-05, "epoch": 5.38135593220339, "percentage": 53.81, "elapsed_time": "0:16:25", "remaining_time": "0:14:05", "throughput": 1850.41, "total_tokens": 1823776} |
| {"current_steps": 4450, "total_steps": 8260, "loss": 0.1582, "lr": 2.599284385429871e-05, "epoch": 5.387409200968523, "percentage": 53.87, "elapsed_time": "0:16:26", "remaining_time": "0:14:04", "throughput": 1850.61, "total_tokens": 1825856} |
| {"current_steps": 4455, "total_steps": 8260, "loss": 0.0692, "lr": 2.594005861551574e-05, "epoch": 5.393462469733656, "percentage": 53.93, "elapsed_time": "0:16:27", "remaining_time": "0:14:03", "throughput": 1850.86, "total_tokens": 1827936} |
| {"current_steps": 4460, "total_steps": 8260, "loss": 0.1414, "lr": 2.588726917963183e-05, "epoch": 5.399515738498789, "percentage": 54.0, "elapsed_time": "0:16:28", "remaining_time": "0:14:02", "throughput": 1850.99, "total_tokens": 1829824} |
| {"current_steps": 4465, "total_steps": 8260, "loss": 0.0137, "lr": 2.5834475782337187e-05, "epoch": 5.405569007263923, "percentage": 54.06, "elapsed_time": "0:16:29", "remaining_time": "0:14:01", "throughput": 1851.22, "total_tokens": 1831936} |
| {"current_steps": 4470, "total_steps": 8260, "loss": 0.0105, "lr": 2.578167865933967e-05, "epoch": 5.411622276029056, "percentage": 54.12, "elapsed_time": "0:16:30", "remaining_time": "0:13:59", "throughput": 1851.44, "total_tokens": 1834048} |
| {"current_steps": 4475, "total_steps": 8260, "loss": 0.1127, "lr": 2.5728878046363785e-05, "epoch": 5.4176755447941884, "percentage": 54.18, "elapsed_time": "0:16:31", "remaining_time": "0:13:58", "throughput": 1851.7, "total_tokens": 1836192} |
| {"current_steps": 4480, "total_steps": 8260, "loss": 0.0279, "lr": 2.5676074179149635e-05, "epoch": 5.423728813559322, "percentage": 54.24, "elapsed_time": "0:16:32", "remaining_time": "0:13:57", "throughput": 1852.02, "total_tokens": 1838432} |
| {"current_steps": 4485, "total_steps": 8260, "loss": 0.054, "lr": 2.5623267293451826e-05, "epoch": 5.429782082324455, "percentage": 54.3, "elapsed_time": "0:16:33", "remaining_time": "0:13:56", "throughput": 1852.33, "total_tokens": 1840576} |
| {"current_steps": 4490, "total_steps": 8260, "loss": 0.0641, "lr": 2.5570457625038457e-05, "epoch": 5.4358353510895885, "percentage": 54.36, "elapsed_time": "0:16:34", "remaining_time": "0:13:55", "throughput": 1852.5, "total_tokens": 1842624} |
| {"current_steps": 4495, "total_steps": 8260, "loss": 0.0636, "lr": 2.551764540969005e-05, "epoch": 5.441888619854722, "percentage": 54.42, "elapsed_time": "0:16:35", "remaining_time": "0:13:53", "throughput": 1852.63, "total_tokens": 1844576} |
| {"current_steps": 4500, "total_steps": 8260, "loss": 0.0442, "lr": 2.5464830883198492e-05, "epoch": 5.447941888619855, "percentage": 54.48, "elapsed_time": "0:16:36", "remaining_time": "0:13:52", "throughput": 1852.74, "total_tokens": 1846560} |
| {"current_steps": 4505, "total_steps": 8260, "loss": 0.0686, "lr": 2.5412014281365986e-05, "epoch": 5.453995157384988, "percentage": 54.54, "elapsed_time": "0:16:37", "remaining_time": "0:13:51", "throughput": 1852.88, "total_tokens": 1848576} |
| {"current_steps": 4510, "total_steps": 8260, "loss": 0.0137, "lr": 2.5359195840004023e-05, "epoch": 5.460048426150121, "percentage": 54.6, "elapsed_time": "0:16:38", "remaining_time": "0:13:50", "throughput": 1853.1, "total_tokens": 1850688} |
| {"current_steps": 4515, "total_steps": 8260, "loss": 0.0402, "lr": 2.5306375794932273e-05, "epoch": 5.466101694915254, "percentage": 54.66, "elapsed_time": "0:16:39", "remaining_time": "0:13:49", "throughput": 1853.31, "total_tokens": 1852800} |
| {"current_steps": 4520, "total_steps": 8260, "loss": 0.0295, "lr": 2.52535543819776e-05, "epoch": 5.472154963680388, "percentage": 54.72, "elapsed_time": "0:16:40", "remaining_time": "0:13:47", "throughput": 1853.46, "total_tokens": 1854720} |
| {"current_steps": 4525, "total_steps": 8260, "loss": 0.122, "lr": 2.5200731836972956e-05, "epoch": 5.478208232445521, "percentage": 54.78, "elapsed_time": "0:16:41", "remaining_time": "0:13:46", "throughput": 1853.63, "total_tokens": 1856768} |
| {"current_steps": 4530, "total_steps": 8260, "loss": 0.0103, "lr": 2.5147908395756343e-05, "epoch": 5.4842615012106535, "percentage": 54.84, "elapsed_time": "0:16:42", "remaining_time": "0:13:45", "throughput": 1853.82, "total_tokens": 1858848} |
| {"current_steps": 4535, "total_steps": 8260, "loss": 0.0062, "lr": 2.5095084294169768e-05, "epoch": 5.490314769975787, "percentage": 54.9, "elapsed_time": "0:16:43", "remaining_time": "0:13:44", "throughput": 1854.0, "total_tokens": 1860896} |
| {"current_steps": 4540, "total_steps": 8260, "loss": 0.259, "lr": 2.5042259768058208e-05, "epoch": 5.49636803874092, "percentage": 54.96, "elapsed_time": "0:16:44", "remaining_time": "0:13:43", "throughput": 1854.15, "total_tokens": 1862816} |
| {"current_steps": 4543, "total_steps": 8260, "eval_loss": 0.14337657392024994, "epoch": 5.5, "percentage": 55.0, "elapsed_time": "0:16:53", "remaining_time": "0:13:49", "throughput": 1839.41, "total_tokens": 1864000} |
| {"current_steps": 4545, "total_steps": 8260, "loss": 0.2096, "lr": 2.4989435053268497e-05, "epoch": 5.5024213075060535, "percentage": 55.02, "elapsed_time": "0:16:55", "remaining_time": "0:13:49", "throughput": 1836.95, "total_tokens": 1864832} |
| {"current_steps": 4550, "total_steps": 8260, "loss": 0.0859, "lr": 2.493661038564835e-05, "epoch": 5.508474576271187, "percentage": 55.08, "elapsed_time": "0:16:56", "remaining_time": "0:13:48", "throughput": 1837.11, "total_tokens": 1866752} |
| {"current_steps": 4555, "total_steps": 8260, "loss": 0.0471, "lr": 2.4883786001045238e-05, "epoch": 5.514527845036319, "percentage": 55.15, "elapsed_time": "0:16:57", "remaining_time": "0:13:47", "throughput": 1837.4, "total_tokens": 1868928} |
| {"current_steps": 4560, "total_steps": 8260, "loss": 0.1254, "lr": 2.4830962135305398e-05, "epoch": 5.520581113801453, "percentage": 55.21, "elapsed_time": "0:16:58", "remaining_time": "0:13:46", "throughput": 1837.67, "total_tokens": 1871072} |
| {"current_steps": 4565, "total_steps": 8260, "loss": 0.0926, "lr": 2.4778139024272724e-05, "epoch": 5.526634382566586, "percentage": 55.27, "elapsed_time": "0:16:59", "remaining_time": "0:13:44", "throughput": 1837.93, "total_tokens": 1873152} |
| {"current_steps": 4570, "total_steps": 8260, "loss": 0.1329, "lr": 2.4725316903787765e-05, "epoch": 5.532687651331719, "percentage": 55.33, "elapsed_time": "0:17:00", "remaining_time": "0:13:43", "throughput": 1838.0, "total_tokens": 1875040} |
| {"current_steps": 4575, "total_steps": 8260, "loss": 0.0055, "lr": 2.4672496009686622e-05, "epoch": 5.538740920096853, "percentage": 55.39, "elapsed_time": "0:17:01", "remaining_time": "0:13:42", "throughput": 1838.27, "total_tokens": 1877184} |
| {"current_steps": 4580, "total_steps": 8260, "loss": 0.049, "lr": 2.4619676577799946e-05, "epoch": 5.544794188861985, "percentage": 55.45, "elapsed_time": "0:17:02", "remaining_time": "0:13:41", "throughput": 1838.46, "total_tokens": 1879232} |
| {"current_steps": 4585, "total_steps": 8260, "loss": 0.0918, "lr": 2.4566858843951847e-05, "epoch": 5.5508474576271185, "percentage": 55.51, "elapsed_time": "0:17:03", "remaining_time": "0:13:40", "throughput": 1838.66, "total_tokens": 1881312} |
| {"current_steps": 4590, "total_steps": 8260, "loss": 0.0879, "lr": 2.451404304395884e-05, "epoch": 5.556900726392252, "percentage": 55.57, "elapsed_time": "0:17:04", "remaining_time": "0:13:38", "throughput": 1838.82, "total_tokens": 1883328} |
| {"current_steps": 4595, "total_steps": 8260, "loss": 0.053, "lr": 2.446122941362883e-05, "epoch": 5.562953995157385, "percentage": 55.63, "elapsed_time": "0:17:05", "remaining_time": "0:13:37", "throughput": 1838.94, "total_tokens": 1885248} |
| {"current_steps": 4600, "total_steps": 8260, "loss": 0.1262, "lr": 2.4408418188760026e-05, "epoch": 5.5690072639225185, "percentage": 55.69, "elapsed_time": "0:17:06", "remaining_time": "0:13:36", "throughput": 1839.19, "total_tokens": 1887328} |
| {"current_steps": 4605, "total_steps": 8260, "loss": 0.1127, "lr": 2.435560960513989e-05, "epoch": 5.575060532687651, "percentage": 55.75, "elapsed_time": "0:17:07", "remaining_time": "0:13:35", "throughput": 1839.39, "total_tokens": 1889408} |
| {"current_steps": 4610, "total_steps": 8260, "loss": 0.0273, "lr": 2.4302803898544106e-05, "epoch": 5.581113801452784, "percentage": 55.81, "elapsed_time": "0:17:08", "remaining_time": "0:13:34", "throughput": 1839.61, "total_tokens": 1891456} |
| {"current_steps": 4615, "total_steps": 8260, "loss": 0.0275, "lr": 2.425000130473549e-05, "epoch": 5.587167070217918, "percentage": 55.87, "elapsed_time": "0:17:09", "remaining_time": "0:13:32", "throughput": 1839.95, "total_tokens": 1893696} |
| {"current_steps": 4620, "total_steps": 8260, "loss": 0.188, "lr": 2.4197202059463e-05, "epoch": 5.593220338983051, "percentage": 55.93, "elapsed_time": "0:17:10", "remaining_time": "0:13:31", "throughput": 1840.07, "total_tokens": 1895616} |
| {"current_steps": 4625, "total_steps": 8260, "loss": 0.1577, "lr": 2.4144406398460594e-05, "epoch": 5.599273607748184, "percentage": 55.99, "elapsed_time": "0:17:11", "remaining_time": "0:13:30", "throughput": 1840.23, "total_tokens": 1897600} |
| {"current_steps": 4630, "total_steps": 8260, "loss": 0.207, "lr": 2.4091614557446267e-05, "epoch": 5.605326876513317, "percentage": 56.05, "elapsed_time": "0:17:12", "remaining_time": "0:13:29", "throughput": 1840.38, "total_tokens": 1899616} |
| {"current_steps": 4635, "total_steps": 8260, "loss": 0.0095, "lr": 2.4038826772120932e-05, "epoch": 5.61138014527845, "percentage": 56.11, "elapsed_time": "0:17:13", "remaining_time": "0:13:28", "throughput": 1840.53, "total_tokens": 1901568} |
| {"current_steps": 4640, "total_steps": 8260, "loss": 0.0863, "lr": 2.398604327816742e-05, "epoch": 5.6174334140435835, "percentage": 56.17, "elapsed_time": "0:17:14", "remaining_time": "0:13:26", "throughput": 1840.7, "total_tokens": 1903616} |
| {"current_steps": 4645, "total_steps": 8260, "loss": 0.1551, "lr": 2.3933264311249377e-05, "epoch": 5.623486682808717, "percentage": 56.23, "elapsed_time": "0:17:15", "remaining_time": "0:13:25", "throughput": 1840.83, "total_tokens": 1905504} |
| {"current_steps": 4650, "total_steps": 8260, "loss": 0.0356, "lr": 2.3880490107010255e-05, "epoch": 5.62953995157385, "percentage": 56.3, "elapsed_time": "0:17:16", "remaining_time": "0:13:24", "throughput": 1840.97, "total_tokens": 1907456} |
| {"current_steps": 4655, "total_steps": 8260, "loss": 0.0675, "lr": 2.382772090107223e-05, "epoch": 5.635593220338983, "percentage": 56.36, "elapsed_time": "0:17:17", "remaining_time": "0:13:23", "throughput": 1841.25, "total_tokens": 1909568} |
| {"current_steps": 4660, "total_steps": 8260, "loss": 0.0388, "lr": 2.3774956929035177e-05, "epoch": 5.641646489104116, "percentage": 56.42, "elapsed_time": "0:17:18", "remaining_time": "0:13:22", "throughput": 1841.58, "total_tokens": 1911840} |
| {"current_steps": 4665, "total_steps": 8260, "loss": 0.0404, "lr": 2.3722198426475593e-05, "epoch": 5.647699757869249, "percentage": 56.48, "elapsed_time": "0:17:19", "remaining_time": "0:13:20", "throughput": 1841.8, "total_tokens": 1913952} |
| {"current_steps": 4670, "total_steps": 8260, "loss": 0.0493, "lr": 2.3669445628945542e-05, "epoch": 5.653753026634383, "percentage": 56.54, "elapsed_time": "0:17:20", "remaining_time": "0:13:19", "throughput": 1842.13, "total_tokens": 1916160} |
| {"current_steps": 4675, "total_steps": 8260, "loss": 0.049, "lr": 2.3616698771971633e-05, "epoch": 5.659806295399516, "percentage": 56.6, "elapsed_time": "0:17:21", "remaining_time": "0:13:18", "throughput": 1842.23, "total_tokens": 1918144} |
| {"current_steps": 4680, "total_steps": 8260, "loss": 0.1027, "lr": 2.356395809105396e-05, "epoch": 5.6658595641646485, "percentage": 56.66, "elapsed_time": "0:17:22", "remaining_time": "0:13:17", "throughput": 1842.38, "total_tokens": 1920160} |
| {"current_steps": 4685, "total_steps": 8260, "loss": 0.1365, "lr": 2.3511223821665028e-05, "epoch": 5.671912832929782, "percentage": 56.72, "elapsed_time": "0:17:23", "remaining_time": "0:13:16", "throughput": 1842.68, "total_tokens": 1922368} |
| {"current_steps": 4690, "total_steps": 8260, "loss": 0.2605, "lr": 2.3458496199248717e-05, "epoch": 5.677966101694915, "percentage": 56.78, "elapsed_time": "0:17:24", "remaining_time": "0:13:14", "throughput": 1842.93, "total_tokens": 1924448} |
| {"current_steps": 4695, "total_steps": 8260, "loss": 0.142, "lr": 2.340577545921923e-05, "epoch": 5.684019370460049, "percentage": 56.84, "elapsed_time": "0:17:25", "remaining_time": "0:13:13", "throughput": 1843.18, "total_tokens": 1926592} |
| {"current_steps": 4700, "total_steps": 8260, "loss": 0.1171, "lr": 2.335306183696006e-05, "epoch": 5.690072639225182, "percentage": 56.9, "elapsed_time": "0:17:26", "remaining_time": "0:13:12", "throughput": 1843.36, "total_tokens": 1928672} |
| {"current_steps": 4705, "total_steps": 8260, "loss": 0.0368, "lr": 2.3300355567822897e-05, "epoch": 5.696125907990314, "percentage": 56.96, "elapsed_time": "0:17:27", "remaining_time": "0:13:11", "throughput": 1843.6, "total_tokens": 1930816} |
| {"current_steps": 4710, "total_steps": 8260, "loss": 0.063, "lr": 2.324765688712661e-05, "epoch": 5.702179176755448, "percentage": 57.02, "elapsed_time": "0:17:28", "remaining_time": "0:13:10", "throughput": 1843.83, "total_tokens": 1932896} |
| {"current_steps": 4715, "total_steps": 8260, "loss": 0.1117, "lr": 2.3194966030156187e-05, "epoch": 5.708232445520581, "percentage": 57.08, "elapsed_time": "0:17:29", "remaining_time": "0:13:08", "throughput": 1844.04, "total_tokens": 1934944} |
| {"current_steps": 4720, "total_steps": 8260, "loss": 0.071, "lr": 2.31422832321617e-05, "epoch": 5.714285714285714, "percentage": 57.14, "elapsed_time": "0:17:30", "remaining_time": "0:13:07", "throughput": 1844.35, "total_tokens": 1937152} |
| {"current_steps": 4725, "total_steps": 8260, "loss": 0.0687, "lr": 2.308960872835721e-05, "epoch": 5.720338983050848, "percentage": 57.2, "elapsed_time": "0:17:31", "remaining_time": "0:13:06", "throughput": 1844.6, "total_tokens": 1939328} |
| {"current_steps": 4730, "total_steps": 8260, "loss": 0.085, "lr": 2.3036942753919775e-05, "epoch": 5.72639225181598, "percentage": 57.26, "elapsed_time": "0:17:32", "remaining_time": "0:13:05", "throughput": 1844.76, "total_tokens": 1941312} |
| {"current_steps": 4735, "total_steps": 8260, "loss": 0.0404, "lr": 2.2984285543988352e-05, "epoch": 5.732445520581114, "percentage": 57.32, "elapsed_time": "0:17:33", "remaining_time": "0:13:04", "throughput": 1844.9, "total_tokens": 1943264} |
| {"current_steps": 4740, "total_steps": 8260, "loss": 0.0154, "lr": 2.2931637333662785e-05, "epoch": 5.738498789346247, "percentage": 57.38, "elapsed_time": "0:17:34", "remaining_time": "0:13:02", "throughput": 1845.11, "total_tokens": 1945312} |
| {"current_steps": 4745, "total_steps": 8260, "loss": 0.0435, "lr": 2.287899835800273e-05, "epoch": 5.74455205811138, "percentage": 57.45, "elapsed_time": "0:17:35", "remaining_time": "0:13:01", "throughput": 1845.26, "total_tokens": 1947328} |
| {"current_steps": 4750, "total_steps": 8260, "loss": 0.1481, "lr": 2.2826368852026597e-05, "epoch": 5.750605326876514, "percentage": 57.51, "elapsed_time": "0:17:36", "remaining_time": "0:13:00", "throughput": 1845.43, "total_tokens": 1949376} |
| {"current_steps": 4755, "total_steps": 8260, "loss": 0.1563, "lr": 2.277374905071053e-05, "epoch": 5.756658595641646, "percentage": 57.57, "elapsed_time": "0:17:37", "remaining_time": "0:12:59", "throughput": 1845.63, "total_tokens": 1951456} |
| {"current_steps": 4760, "total_steps": 8260, "loss": 0.033, "lr": 2.2721139188987357e-05, "epoch": 5.762711864406779, "percentage": 57.63, "elapsed_time": "0:17:38", "remaining_time": "0:12:58", "throughput": 1845.83, "total_tokens": 1953536} |
| {"current_steps": 4765, "total_steps": 8260, "loss": 0.1386, "lr": 2.26685395017455e-05, "epoch": 5.768765133171913, "percentage": 57.69, "elapsed_time": "0:17:39", "remaining_time": "0:12:57", "throughput": 1846.13, "total_tokens": 1955744} |
| {"current_steps": 4770, "total_steps": 8260, "loss": 0.1046, "lr": 2.261595022382799e-05, "epoch": 5.774818401937046, "percentage": 57.75, "elapsed_time": "0:17:40", "remaining_time": "0:12:55", "throughput": 1846.37, "total_tokens": 1957824} |
| {"current_steps": 4775, "total_steps": 8260, "loss": 0.1059, "lr": 2.256337159003134e-05, "epoch": 5.780871670702179, "percentage": 57.81, "elapsed_time": "0:17:41", "remaining_time": "0:12:54", "throughput": 1846.63, "total_tokens": 1960000} |
| {"current_steps": 4780, "total_steps": 8260, "loss": 0.0602, "lr": 2.251080383510459e-05, "epoch": 5.786924939467312, "percentage": 57.87, "elapsed_time": "0:17:42", "remaining_time": "0:12:53", "throughput": 1846.87, "total_tokens": 1962144} |
| {"current_steps": 4785, "total_steps": 8260, "loss": 0.0255, "lr": 2.2458247193748155e-05, "epoch": 5.792978208232445, "percentage": 57.93, "elapsed_time": "0:17:43", "remaining_time": "0:12:52", "throughput": 1847.03, "total_tokens": 1964192} |
| {"current_steps": 4790, "total_steps": 8260, "loss": 0.1645, "lr": 2.240570190061288e-05, "epoch": 5.799031476997579, "percentage": 57.99, "elapsed_time": "0:17:44", "remaining_time": "0:12:51", "throughput": 1847.18, "total_tokens": 1966240} |
| {"current_steps": 4795, "total_steps": 8260, "loss": 0.1161, "lr": 2.2353168190298915e-05, "epoch": 5.805084745762712, "percentage": 58.05, "elapsed_time": "0:17:45", "remaining_time": "0:12:49", "throughput": 1847.41, "total_tokens": 1968320} |
| {"current_steps": 4800, "total_steps": 8260, "loss": 0.0394, "lr": 2.2300646297354704e-05, "epoch": 5.811138014527845, "percentage": 58.11, "elapsed_time": "0:17:46", "remaining_time": "0:12:48", "throughput": 1847.55, "total_tokens": 1970304} |
| {"current_steps": 4805, "total_steps": 8260, "loss": 0.0457, "lr": 2.224813645627592e-05, "epoch": 5.817191283292978, "percentage": 58.17, "elapsed_time": "0:17:47", "remaining_time": "0:12:47", "throughput": 1847.69, "total_tokens": 1972320} |
| {"current_steps": 4810, "total_steps": 8260, "loss": 0.0216, "lr": 2.2195638901504452e-05, "epoch": 5.823244552058111, "percentage": 58.23, "elapsed_time": "0:17:48", "remaining_time": "0:12:46", "throughput": 1847.89, "total_tokens": 1974432} |
| {"current_steps": 4815, "total_steps": 8260, "loss": 0.0414, "lr": 2.2143153867427305e-05, "epoch": 5.829297820823244, "percentage": 58.29, "elapsed_time": "0:17:49", "remaining_time": "0:12:45", "throughput": 1848.09, "total_tokens": 1976480} |
| {"current_steps": 4820, "total_steps": 8260, "loss": 0.1861, "lr": 2.2090681588375594e-05, "epoch": 5.835351089588378, "percentage": 58.35, "elapsed_time": "0:17:50", "remaining_time": "0:12:44", "throughput": 1848.3, "total_tokens": 1978624} |
| {"current_steps": 4825, "total_steps": 8260, "loss": 0.0813, "lr": 2.2038222298623507e-05, "epoch": 5.841404358353511, "percentage": 58.41, "elapsed_time": "0:17:51", "remaining_time": "0:12:42", "throughput": 1848.51, "total_tokens": 1980736} |
| {"current_steps": 4830, "total_steps": 8260, "loss": 0.0391, "lr": 2.1985776232387202e-05, "epoch": 5.847457627118644, "percentage": 58.47, "elapsed_time": "0:17:52", "remaining_time": "0:12:41", "throughput": 1848.65, "total_tokens": 1982656} |
| {"current_steps": 4835, "total_steps": 8260, "loss": 0.0491, "lr": 2.1933343623823814e-05, "epoch": 5.853510895883777, "percentage": 58.54, "elapsed_time": "0:17:53", "remaining_time": "0:12:40", "throughput": 1848.91, "total_tokens": 1984832} |
| {"current_steps": 4840, "total_steps": 8260, "loss": 0.0534, "lr": 2.1880924707030407e-05, "epoch": 5.85956416464891, "percentage": 58.6, "elapsed_time": "0:17:54", "remaining_time": "0:12:39", "throughput": 1849.07, "total_tokens": 1986848} |
| {"current_steps": 4845, "total_steps": 8260, "loss": 0.1297, "lr": 2.1828519716042888e-05, "epoch": 5.865617433414044, "percentage": 58.66, "elapsed_time": "0:17:55", "remaining_time": "0:12:38", "throughput": 1849.29, "total_tokens": 1988992} |
| {"current_steps": 4850, "total_steps": 8260, "loss": 0.1604, "lr": 2.177612888483502e-05, "epoch": 5.871670702179177, "percentage": 58.72, "elapsed_time": "0:17:56", "remaining_time": "0:12:36", "throughput": 1849.43, "total_tokens": 1990976} |
| {"current_steps": 4855, "total_steps": 8260, "loss": 0.0932, "lr": 2.1723752447317312e-05, "epoch": 5.877723970944309, "percentage": 58.78, "elapsed_time": "0:17:57", "remaining_time": "0:12:35", "throughput": 1849.55, "total_tokens": 1992928} |
| {"current_steps": 4860, "total_steps": 8260, "loss": 0.0957, "lr": 2.167139063733605e-05, "epoch": 5.883777239709443, "percentage": 58.84, "elapsed_time": "0:17:58", "remaining_time": "0:12:34", "throughput": 1849.79, "total_tokens": 1995104} |
| {"current_steps": 4865, "total_steps": 8260, "loss": 0.0964, "lr": 2.161904368867217e-05, "epoch": 5.889830508474576, "percentage": 58.9, "elapsed_time": "0:17:59", "remaining_time": "0:12:33", "throughput": 1850.05, "total_tokens": 1997312} |
| {"current_steps": 4870, "total_steps": 8260, "loss": 0.104, "lr": 2.1566711835040284e-05, "epoch": 5.8958837772397095, "percentage": 58.96, "elapsed_time": "0:18:00", "remaining_time": "0:12:32", "throughput": 1850.14, "total_tokens": 1999232} |
| {"current_steps": 4875, "total_steps": 8260, "loss": 0.1246, "lr": 2.1514395310087596e-05, "epoch": 5.901937046004843, "percentage": 59.02, "elapsed_time": "0:18:01", "remaining_time": "0:12:30", "throughput": 1850.24, "total_tokens": 2001088} |
| {"current_steps": 4880, "total_steps": 8260, "loss": 0.0446, "lr": 2.1462094347392887e-05, "epoch": 5.907990314769976, "percentage": 59.08, "elapsed_time": "0:18:02", "remaining_time": "0:12:29", "throughput": 1850.46, "total_tokens": 2003232} |
| {"current_steps": 4885, "total_steps": 8260, "loss": 0.0365, "lr": 2.1409809180465436e-05, "epoch": 5.914043583535109, "percentage": 59.14, "elapsed_time": "0:18:03", "remaining_time": "0:12:28", "throughput": 1850.65, "total_tokens": 2005312} |
| {"current_steps": 4890, "total_steps": 8260, "loss": 0.1331, "lr": 2.1357540042744006e-05, "epoch": 5.920096852300242, "percentage": 59.2, "elapsed_time": "0:18:04", "remaining_time": "0:12:27", "throughput": 1850.83, "total_tokens": 2007392} |
| {"current_steps": 4895, "total_steps": 8260, "loss": 0.1303, "lr": 2.1305287167595808e-05, "epoch": 5.926150121065375, "percentage": 59.26, "elapsed_time": "0:18:05", "remaining_time": "0:12:26", "throughput": 1851.07, "total_tokens": 2009568} |
| {"current_steps": 4900, "total_steps": 8260, "loss": 0.0602, "lr": 2.1253050788315436e-05, "epoch": 5.932203389830509, "percentage": 59.32, "elapsed_time": "0:18:06", "remaining_time": "0:12:25", "throughput": 1851.17, "total_tokens": 2011424} |
| {"current_steps": 4905, "total_steps": 8260, "loss": 0.0786, "lr": 2.120083113812381e-05, "epoch": 5.938256658595642, "percentage": 59.38, "elapsed_time": "0:18:07", "remaining_time": "0:12:23", "throughput": 1851.37, "total_tokens": 2013504} |
| {"current_steps": 4910, "total_steps": 8260, "loss": 0.0789, "lr": 2.1148628450167203e-05, "epoch": 5.9443099273607745, "percentage": 59.44, "elapsed_time": "0:18:08", "remaining_time": "0:12:22", "throughput": 1851.77, "total_tokens": 2015872} |
| {"current_steps": 4915, "total_steps": 8260, "loss": 0.1102, "lr": 2.109644295751612e-05, "epoch": 5.950363196125908, "percentage": 59.5, "elapsed_time": "0:18:09", "remaining_time": "0:12:21", "throughput": 1851.9, "total_tokens": 2017888} |
| {"current_steps": 4920, "total_steps": 8260, "loss": 0.0789, "lr": 2.1044274893164316e-05, "epoch": 5.956416464891041, "percentage": 59.56, "elapsed_time": "0:18:10", "remaining_time": "0:12:20", "throughput": 1852.11, "total_tokens": 2019904} |
| {"current_steps": 4925, "total_steps": 8260, "loss": 0.1214, "lr": 2.0992124490027727e-05, "epoch": 5.9624697336561745, "percentage": 59.62, "elapsed_time": "0:18:11", "remaining_time": "0:12:19", "throughput": 1852.32, "total_tokens": 2022016} |
| {"current_steps": 4930, "total_steps": 8260, "loss": 0.1057, "lr": 2.0939991980943437e-05, "epoch": 5.968523002421308, "percentage": 59.69, "elapsed_time": "0:18:12", "remaining_time": "0:12:17", "throughput": 1852.47, "total_tokens": 2023936} |
| {"current_steps": 4935, "total_steps": 8260, "loss": 0.1097, "lr": 2.088787759866863e-05, "epoch": 5.97457627118644, "percentage": 59.75, "elapsed_time": "0:18:13", "remaining_time": "0:12:16", "throughput": 1852.71, "total_tokens": 2026080} |
| {"current_steps": 4940, "total_steps": 8260, "loss": 0.0174, "lr": 2.0835781575879574e-05, "epoch": 5.980629539951574, "percentage": 59.81, "elapsed_time": "0:18:14", "remaining_time": "0:12:15", "throughput": 1852.86, "total_tokens": 2028064} |
| {"current_steps": 4945, "total_steps": 8260, "loss": 0.0393, "lr": 2.0783704145170547e-05, "epoch": 5.986682808716707, "percentage": 59.87, "elapsed_time": "0:18:15", "remaining_time": "0:12:14", "throughput": 1852.95, "total_tokens": 2029920} |
| {"current_steps": 4950, "total_steps": 8260, "loss": 0.162, "lr": 2.0731645539052845e-05, "epoch": 5.99273607748184, "percentage": 59.93, "elapsed_time": "0:18:16", "remaining_time": "0:12:13", "throughput": 1853.09, "total_tokens": 2031776} |
| {"current_steps": 4955, "total_steps": 8260, "loss": 0.0429, "lr": 2.067960598995369e-05, "epoch": 5.998789346246974, "percentage": 59.99, "elapsed_time": "0:18:17", "remaining_time": "0:12:11", "throughput": 1853.3, "total_tokens": 2033888} |
| {"current_steps": 4956, "total_steps": 8260, "eval_loss": 0.10304119437932968, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:18:25", "remaining_time": "0:12:17", "throughput": 1839.55, "total_tokens": 2033992} |
| {"current_steps": 4960, "total_steps": 8260, "loss": 0.0408, "lr": 2.062758573021523e-05, "epoch": 6.004842615012106, "percentage": 60.05, "elapsed_time": "0:18:28", "remaining_time": "0:12:17", "throughput": 1837.02, "total_tokens": 2035752} |
| {"current_steps": 4965, "total_steps": 8260, "loss": 0.0861, "lr": 2.0575584992093505e-05, "epoch": 6.0108958837772395, "percentage": 60.11, "elapsed_time": "0:18:29", "remaining_time": "0:12:16", "throughput": 1837.26, "total_tokens": 2037896} |
| {"current_steps": 4970, "total_steps": 8260, "loss": 0.0724, "lr": 2.0523604007757374e-05, "epoch": 6.016949152542373, "percentage": 60.17, "elapsed_time": "0:18:30", "remaining_time": "0:12:14", "throughput": 1837.51, "total_tokens": 2040008} |
| {"current_steps": 4975, "total_steps": 8260, "loss": 0.0937, "lr": 2.0471643009287523e-05, "epoch": 6.023002421307506, "percentage": 60.23, "elapsed_time": "0:18:31", "remaining_time": "0:12:13", "throughput": 1837.72, "total_tokens": 2042056} |
| {"current_steps": 4980, "total_steps": 8260, "loss": 0.1298, "lr": 2.0419702228675395e-05, "epoch": 6.0290556900726395, "percentage": 60.29, "elapsed_time": "0:18:32", "remaining_time": "0:12:12", "throughput": 1837.88, "total_tokens": 2044040} |
| {"current_steps": 4985, "total_steps": 8260, "loss": 0.0095, "lr": 2.0367781897822147e-05, "epoch": 6.035108958837772, "percentage": 60.35, "elapsed_time": "0:18:33", "remaining_time": "0:12:11", "throughput": 1838.03, "total_tokens": 2046056} |
| {"current_steps": 4990, "total_steps": 8260, "loss": 0.1368, "lr": 2.031588224853767e-05, "epoch": 6.041162227602905, "percentage": 60.41, "elapsed_time": "0:18:34", "remaining_time": "0:12:10", "throughput": 1838.2, "total_tokens": 2047944} |
| {"current_steps": 4995, "total_steps": 8260, "loss": 0.0285, "lr": 2.0264003512539493e-05, "epoch": 6.047215496368039, "percentage": 60.47, "elapsed_time": "0:18:35", "remaining_time": "0:12:08", "throughput": 1838.4, "total_tokens": 2050024} |
| {"current_steps": 5000, "total_steps": 8260, "loss": 0.1022, "lr": 2.0212145921451787e-05, "epoch": 6.053268765133172, "percentage": 60.53, "elapsed_time": "0:18:36", "remaining_time": "0:12:07", "throughput": 1838.61, "total_tokens": 2052040} |
| {"current_steps": 5005, "total_steps": 8260, "loss": 0.0224, "lr": 2.01603097068043e-05, "epoch": 6.059322033898305, "percentage": 60.59, "elapsed_time": "0:18:37", "remaining_time": "0:12:06", "throughput": 1838.79, "total_tokens": 2054120} |
| {"current_steps": 5010, "total_steps": 8260, "loss": 0.1311, "lr": 2.0108495100031364e-05, "epoch": 6.065375302663438, "percentage": 60.65, "elapsed_time": "0:18:38", "remaining_time": "0:12:05", "throughput": 1839.05, "total_tokens": 2056296} |
| {"current_steps": 5015, "total_steps": 8260, "loss": 0.0098, "lr": 2.0056702332470806e-05, "epoch": 6.071428571428571, "percentage": 60.71, "elapsed_time": "0:18:39", "remaining_time": "0:12:04", "throughput": 1839.25, "total_tokens": 2058408} |
| {"current_steps": 5020, "total_steps": 8260, "loss": 0.1196, "lr": 2.0004931635362982e-05, "epoch": 6.0774818401937045, "percentage": 60.77, "elapsed_time": "0:18:40", "remaining_time": "0:12:02", "throughput": 1839.42, "total_tokens": 2060456} |
| {"current_steps": 5025, "total_steps": 8260, "loss": 0.0191, "lr": 1.995318323984969e-05, "epoch": 6.083535108958838, "percentage": 60.84, "elapsed_time": "0:18:41", "remaining_time": "0:12:01", "throughput": 1839.52, "total_tokens": 2062408} |
| {"current_steps": 5030, "total_steps": 8260, "loss": 0.0956, "lr": 1.9901457376973143e-05, "epoch": 6.089588377723971, "percentage": 60.9, "elapsed_time": "0:18:42", "remaining_time": "0:12:00", "throughput": 1839.63, "total_tokens": 2064392} |
| {"current_steps": 5035, "total_steps": 8260, "loss": 0.0696, "lr": 1.9849754277674993e-05, "epoch": 6.095641646489105, "percentage": 60.96, "elapsed_time": "0:18:43", "remaining_time": "0:11:59", "throughput": 1839.84, "total_tokens": 2066504} |
| {"current_steps": 5040, "total_steps": 8260, "loss": 0.0865, "lr": 1.979807417279521e-05, "epoch": 6.101694915254237, "percentage": 61.02, "elapsed_time": "0:18:44", "remaining_time": "0:11:58", "throughput": 1840.02, "total_tokens": 2068584} |
| {"current_steps": 5045, "total_steps": 8260, "loss": 0.0121, "lr": 1.974641729307115e-05, "epoch": 6.10774818401937, "percentage": 61.08, "elapsed_time": "0:18:45", "remaining_time": "0:11:57", "throughput": 1840.26, "total_tokens": 2070696} |
| {"current_steps": 5050, "total_steps": 8260, "loss": 0.0835, "lr": 1.9694783869136435e-05, "epoch": 6.113801452784504, "percentage": 61.14, "elapsed_time": "0:18:46", "remaining_time": "0:11:55", "throughput": 1840.51, "total_tokens": 2072872} |
| {"current_steps": 5055, "total_steps": 8260, "loss": 0.0422, "lr": 1.9643174131519986e-05, "epoch": 6.119854721549637, "percentage": 61.2, "elapsed_time": "0:18:47", "remaining_time": "0:11:54", "throughput": 1840.64, "total_tokens": 2074824} |
| {"current_steps": 5060, "total_steps": 8260, "loss": 0.0724, "lr": 1.9591588310644967e-05, "epoch": 6.12590799031477, "percentage": 61.26, "elapsed_time": "0:18:48", "remaining_time": "0:11:53", "throughput": 1840.84, "total_tokens": 2076808} |
| {"current_steps": 5065, "total_steps": 8260, "loss": 0.1062, "lr": 1.9540026636827742e-05, "epoch": 6.131961259079903, "percentage": 61.32, "elapsed_time": "0:18:49", "remaining_time": "0:11:52", "throughput": 1841.04, "total_tokens": 2078856} |
| {"current_steps": 5070, "total_steps": 8260, "loss": 0.1156, "lr": 1.948848934027689e-05, "epoch": 6.138014527845036, "percentage": 61.38, "elapsed_time": "0:18:50", "remaining_time": "0:11:51", "throughput": 1841.33, "total_tokens": 2081096} |
| {"current_steps": 5075, "total_steps": 8260, "loss": 0.0585, "lr": 1.9436976651092144e-05, "epoch": 6.1440677966101696, "percentage": 61.44, "elapsed_time": "0:18:51", "remaining_time": "0:11:49", "throughput": 1841.49, "total_tokens": 2083144} |
| {"current_steps": 5080, "total_steps": 8260, "loss": 0.105, "lr": 1.9385488799263372e-05, "epoch": 6.150121065375303, "percentage": 61.5, "elapsed_time": "0:18:52", "remaining_time": "0:11:48", "throughput": 1841.62, "total_tokens": 2085096} |
| {"current_steps": 5085, "total_steps": 8260, "loss": 0.042, "lr": 1.9334026014669543e-05, "epoch": 6.156174334140436, "percentage": 61.56, "elapsed_time": "0:18:53", "remaining_time": "0:11:47", "throughput": 1841.93, "total_tokens": 2087272} |
| {"current_steps": 5090, "total_steps": 8260, "loss": 0.1471, "lr": 1.9282588527077715e-05, "epoch": 6.162227602905569, "percentage": 61.62, "elapsed_time": "0:18:54", "remaining_time": "0:11:46", "throughput": 1842.01, "total_tokens": 2089224} |
| {"current_steps": 5095, "total_steps": 8260, "loss": 0.0988, "lr": 1.9231176566142006e-05, "epoch": 6.168280871670702, "percentage": 61.68, "elapsed_time": "0:18:55", "remaining_time": "0:11:45", "throughput": 1842.3, "total_tokens": 2091432} |
| {"current_steps": 5100, "total_steps": 8260, "loss": 0.0162, "lr": 1.917979036140255e-05, "epoch": 6.174334140435835, "percentage": 61.74, "elapsed_time": "0:18:56", "remaining_time": "0:11:43", "throughput": 1842.47, "total_tokens": 2093384} |
| {"current_steps": 5105, "total_steps": 8260, "loss": 0.0875, "lr": 1.9128430142284503e-05, "epoch": 6.180387409200969, "percentage": 61.8, "elapsed_time": "0:18:57", "remaining_time": "0:11:42", "throughput": 1842.72, "total_tokens": 2095528} |
| {"current_steps": 5110, "total_steps": 8260, "loss": 0.0601, "lr": 1.9077096138096992e-05, "epoch": 6.186440677966102, "percentage": 61.86, "elapsed_time": "0:18:58", "remaining_time": "0:11:41", "throughput": 1842.9, "total_tokens": 2097544} |
| {"current_steps": 5115, "total_steps": 8260, "loss": 0.0994, "lr": 1.9025788578032113e-05, "epoch": 6.1924939467312345, "percentage": 61.92, "elapsed_time": "0:18:59", "remaining_time": "0:11:40", "throughput": 1843.07, "total_tokens": 2099592} |
| {"current_steps": 5120, "total_steps": 8260, "loss": 0.0328, "lr": 1.8974507691163867e-05, "epoch": 6.198547215496368, "percentage": 61.99, "elapsed_time": "0:19:00", "remaining_time": "0:11:39", "throughput": 1843.16, "total_tokens": 2101448} |
| {"current_steps": 5125, "total_steps": 8260, "loss": 0.0444, "lr": 1.892325370644721e-05, "epoch": 6.204600484261501, "percentage": 62.05, "elapsed_time": "0:19:01", "remaining_time": "0:11:38", "throughput": 1843.38, "total_tokens": 2103528} |
| {"current_steps": 5130, "total_steps": 8260, "loss": 0.0264, "lr": 1.8872026852716954e-05, "epoch": 6.210653753026635, "percentage": 62.11, "elapsed_time": "0:19:02", "remaining_time": "0:11:36", "throughput": 1843.59, "total_tokens": 2105640} |
| {"current_steps": 5135, "total_steps": 8260, "loss": 0.0767, "lr": 1.8820827358686793e-05, "epoch": 6.216707021791768, "percentage": 62.17, "elapsed_time": "0:19:03", "remaining_time": "0:11:35", "throughput": 1843.79, "total_tokens": 2107688} |
| {"current_steps": 5140, "total_steps": 8260, "loss": 0.0946, "lr": 1.8769655452948274e-05, "epoch": 6.2227602905569, "percentage": 62.23, "elapsed_time": "0:19:04", "remaining_time": "0:11:34", "throughput": 1843.88, "total_tokens": 2109608} |
| {"current_steps": 5145, "total_steps": 8260, "loss": 0.0691, "lr": 1.8718511363969733e-05, "epoch": 6.228813559322034, "percentage": 62.29, "elapsed_time": "0:19:05", "remaining_time": "0:11:33", "throughput": 1844.1, "total_tokens": 2111752} |
| {"current_steps": 5150, "total_steps": 8260, "loss": 0.0447, "lr": 1.8667395320095367e-05, "epoch": 6.234866828087167, "percentage": 62.35, "elapsed_time": "0:19:06", "remaining_time": "0:11:32", "throughput": 1844.36, "total_tokens": 2113928} |
| {"current_steps": 5155, "total_steps": 8260, "loss": 0.1179, "lr": 1.8616307549544113e-05, "epoch": 6.2409200968523, "percentage": 62.41, "elapsed_time": "0:19:07", "remaining_time": "0:11:30", "throughput": 1844.49, "total_tokens": 2115944} |
| {"current_steps": 5160, "total_steps": 8260, "loss": 0.1186, "lr": 1.85652482804087e-05, "epoch": 6.246973365617434, "percentage": 62.47, "elapsed_time": "0:19:08", "remaining_time": "0:11:29", "throughput": 1844.76, "total_tokens": 2118120} |
| {"current_steps": 5165, "total_steps": 8260, "loss": 0.1351, "lr": 1.85142177406546e-05, "epoch": 6.253026634382566, "percentage": 62.53, "elapsed_time": "0:19:09", "remaining_time": "0:11:28", "throughput": 1844.89, "total_tokens": 2120136} |
| {"current_steps": 5170, "total_steps": 8260, "loss": 0.1226, "lr": 1.8463216158119015e-05, "epoch": 6.2590799031477, "percentage": 62.59, "elapsed_time": "0:19:10", "remaining_time": "0:11:27", "throughput": 1845.05, "total_tokens": 2122184} |
| {"current_steps": 5175, "total_steps": 8260, "loss": 0.056, "lr": 1.8412243760509867e-05, "epoch": 6.265133171912833, "percentage": 62.65, "elapsed_time": "0:19:11", "remaining_time": "0:11:26", "throughput": 1845.2, "total_tokens": 2124200} |
| {"current_steps": 5180, "total_steps": 8260, "loss": 0.096, "lr": 1.8361300775404765e-05, "epoch": 6.271186440677966, "percentage": 62.71, "elapsed_time": "0:19:12", "remaining_time": "0:11:25", "throughput": 1845.4, "total_tokens": 2126280} |
| {"current_steps": 5185, "total_steps": 8260, "loss": 0.01, "lr": 1.8310387430250014e-05, "epoch": 6.2772397094431, "percentage": 62.77, "elapsed_time": "0:19:13", "remaining_time": "0:11:23", "throughput": 1845.59, "total_tokens": 2128360} |
| {"current_steps": 5190, "total_steps": 8260, "loss": 0.1548, "lr": 1.825950395235956e-05, "epoch": 6.283292978208232, "percentage": 62.83, "elapsed_time": "0:19:14", "remaining_time": "0:11:22", "throughput": 1845.66, "total_tokens": 2130248} |
| {"current_steps": 5195, "total_steps": 8260, "loss": 0.0623, "lr": 1.8208650568914033e-05, "epoch": 6.289346246973365, "percentage": 62.89, "elapsed_time": "0:19:15", "remaining_time": "0:11:21", "throughput": 1845.78, "total_tokens": 2132200} |
| {"current_steps": 5200, "total_steps": 8260, "loss": 0.1016, "lr": 1.815782750695967e-05, "epoch": 6.295399515738499, "percentage": 62.95, "elapsed_time": "0:19:16", "remaining_time": "0:11:20", "throughput": 1845.91, "total_tokens": 2134216} |
| {"current_steps": 5205, "total_steps": 8260, "loss": 0.0259, "lr": 1.810703499340735e-05, "epoch": 6.301452784503632, "percentage": 63.01, "elapsed_time": "0:19:17", "remaining_time": "0:11:19", "throughput": 1846.19, "total_tokens": 2136392} |
| {"current_steps": 5210, "total_steps": 8260, "loss": 0.044, "lr": 1.8056273255031552e-05, "epoch": 6.3075060532687655, "percentage": 63.08, "elapsed_time": "0:19:18", "remaining_time": "0:11:18", "throughput": 1846.43, "total_tokens": 2138504} |
| {"current_steps": 5215, "total_steps": 8260, "loss": 0.0714, "lr": 1.8005542518469366e-05, "epoch": 6.313559322033898, "percentage": 63.14, "elapsed_time": "0:19:19", "remaining_time": "0:11:16", "throughput": 1846.62, "total_tokens": 2140616} |
| {"current_steps": 5220, "total_steps": 8260, "loss": 0.0999, "lr": 1.7954843010219446e-05, "epoch": 6.319612590799031, "percentage": 63.2, "elapsed_time": "0:19:20", "remaining_time": "0:11:15", "throughput": 1846.77, "total_tokens": 2142664} |
| {"current_steps": 5225, "total_steps": 8260, "loss": 0.0703, "lr": 1.790417495664103e-05, "epoch": 6.325665859564165, "percentage": 63.26, "elapsed_time": "0:19:21", "remaining_time": "0:11:14", "throughput": 1846.98, "total_tokens": 2144744} |
| {"current_steps": 5230, "total_steps": 8260, "loss": 0.2731, "lr": 1.785353858395292e-05, "epoch": 6.331719128329298, "percentage": 63.32, "elapsed_time": "0:19:22", "remaining_time": "0:11:13", "throughput": 1847.16, "total_tokens": 2146760} |
| {"current_steps": 5235, "total_steps": 8260, "loss": 0.1637, "lr": 1.7802934118232482e-05, "epoch": 6.337772397094431, "percentage": 63.38, "elapsed_time": "0:19:23", "remaining_time": "0:11:12", "throughput": 1847.41, "total_tokens": 2148904} |
| {"current_steps": 5240, "total_steps": 8260, "loss": 0.0664, "lr": 1.775236178541461e-05, "epoch": 6.343825665859564, "percentage": 63.44, "elapsed_time": "0:19:24", "remaining_time": "0:11:10", "throughput": 1847.66, "total_tokens": 2151080} |
| {"current_steps": 5245, "total_steps": 8260, "loss": 0.0488, "lr": 1.7701821811290743e-05, "epoch": 6.349878934624697, "percentage": 63.5, "elapsed_time": "0:19:25", "remaining_time": "0:11:09", "throughput": 1847.81, "total_tokens": 2153032} |
| {"current_steps": 5250, "total_steps": 8260, "loss": 0.0375, "lr": 1.7651314421507843e-05, "epoch": 6.3559322033898304, "percentage": 63.56, "elapsed_time": "0:19:26", "remaining_time": "0:11:08", "throughput": 1848.01, "total_tokens": 2155080} |
| {"current_steps": 5255, "total_steps": 8260, "loss": 0.0928, "lr": 1.7600839841567395e-05, "epoch": 6.361985472154964, "percentage": 63.62, "elapsed_time": "0:19:27", "remaining_time": "0:11:07", "throughput": 1848.16, "total_tokens": 2157064} |
| {"current_steps": 5260, "total_steps": 8260, "loss": 0.0327, "lr": 1.7550398296824395e-05, "epoch": 6.368038740920097, "percentage": 63.68, "elapsed_time": "0:19:28", "remaining_time": "0:11:06", "throughput": 1848.38, "total_tokens": 2159208} |
| {"current_steps": 5265, "total_steps": 8260, "loss": 0.006, "lr": 1.749999001248635e-05, "epoch": 6.37409200968523, "percentage": 63.74, "elapsed_time": "0:19:29", "remaining_time": "0:11:05", "throughput": 1848.62, "total_tokens": 2161384} |
| {"current_steps": 5270, "total_steps": 8260, "loss": 0.0835, "lr": 1.7449615213612264e-05, "epoch": 6.380145278450363, "percentage": 63.8, "elapsed_time": "0:19:30", "remaining_time": "0:11:03", "throughput": 1848.81, "total_tokens": 2163432} |
| {"current_steps": 5275, "total_steps": 8260, "loss": 0.0703, "lr": 1.7399274125111635e-05, "epoch": 6.386198547215496, "percentage": 63.86, "elapsed_time": "0:19:31", "remaining_time": "0:11:02", "throughput": 1849.1, "total_tokens": 2165640} |
| {"current_steps": 5280, "total_steps": 8260, "loss": 0.0297, "lr": 1.7348966971743465e-05, "epoch": 6.39225181598063, "percentage": 63.92, "elapsed_time": "0:19:32", "remaining_time": "0:11:01", "throughput": 1849.21, "total_tokens": 2167656} |
| {"current_steps": 5285, "total_steps": 8260, "loss": 0.0477, "lr": 1.729869397811523e-05, "epoch": 6.398305084745763, "percentage": 63.98, "elapsed_time": "0:19:33", "remaining_time": "0:11:00", "throughput": 1849.33, "total_tokens": 2169672} |
| {"current_steps": 5290, "total_steps": 8260, "loss": 0.0647, "lr": 1.72484553686819e-05, "epoch": 6.404358353510895, "percentage": 64.04, "elapsed_time": "0:19:34", "remaining_time": "0:10:59", "throughput": 1849.42, "total_tokens": 2171592} |
| {"current_steps": 5295, "total_steps": 8260, "loss": 0.0547, "lr": 1.719825136774494e-05, "epoch": 6.410411622276029, "percentage": 64.1, "elapsed_time": "0:19:35", "remaining_time": "0:10:58", "throughput": 1849.56, "total_tokens": 2173576} |
| {"current_steps": 5300, "total_steps": 8260, "loss": 0.0047, "lr": 1.714808219945129e-05, "epoch": 6.416464891041162, "percentage": 64.16, "elapsed_time": "0:19:36", "remaining_time": "0:10:56", "throughput": 1849.71, "total_tokens": 2175592} |
| {"current_steps": 5305, "total_steps": 8260, "loss": 0.1255, "lr": 1.709794808779234e-05, "epoch": 6.4225181598062955, "percentage": 64.23, "elapsed_time": "0:19:37", "remaining_time": "0:10:55", "throughput": 1849.85, "total_tokens": 2177512} |
| {"current_steps": 5310, "total_steps": 8260, "loss": 0.054, "lr": 1.704784925660301e-05, "epoch": 6.428571428571429, "percentage": 64.29, "elapsed_time": "0:19:38", "remaining_time": "0:10:54", "throughput": 1850.0, "total_tokens": 2179560} |
| {"current_steps": 5315, "total_steps": 8260, "loss": 0.08, "lr": 1.699778592956069e-05, "epoch": 6.434624697336561, "percentage": 64.35, "elapsed_time": "0:19:39", "remaining_time": "0:10:53", "throughput": 1850.16, "total_tokens": 2181608} |
| {"current_steps": 5320, "total_steps": 8260, "loss": 0.0575, "lr": 1.6947758330184226e-05, "epoch": 6.440677966101695, "percentage": 64.41, "elapsed_time": "0:19:40", "remaining_time": "0:10:52", "throughput": 1850.31, "total_tokens": 2183656} |
| {"current_steps": 5325, "total_steps": 8260, "loss": 0.0243, "lr": 1.689776668183299e-05, "epoch": 6.446731234866828, "percentage": 64.47, "elapsed_time": "0:19:41", "remaining_time": "0:10:51", "throughput": 1850.41, "total_tokens": 2185576} |
| {"current_steps": 5330, "total_steps": 8260, "loss": 0.0371, "lr": 1.6847811207705813e-05, "epoch": 6.452784503631961, "percentage": 64.53, "elapsed_time": "0:19:42", "remaining_time": "0:10:49", "throughput": 1850.58, "total_tokens": 2187592} |
| {"current_steps": 5335, "total_steps": 8260, "loss": 0.0066, "lr": 1.6797892130840036e-05, "epoch": 6.458837772397095, "percentage": 64.59, "elapsed_time": "0:19:43", "remaining_time": "0:10:48", "throughput": 1850.69, "total_tokens": 2189544} |
| {"current_steps": 5340, "total_steps": 8260, "loss": 0.1118, "lr": 1.6748009674110477e-05, "epoch": 6.464891041162228, "percentage": 64.65, "elapsed_time": "0:19:44", "remaining_time": "0:10:47", "throughput": 1850.8, "total_tokens": 2191496} |
| {"current_steps": 5345, "total_steps": 8260, "loss": 0.058, "lr": 1.669816406022848e-05, "epoch": 6.4709443099273605, "percentage": 64.71, "elapsed_time": "0:19:45", "remaining_time": "0:10:46", "throughput": 1850.89, "total_tokens": 2193416} |
| {"current_steps": 5350, "total_steps": 8260, "loss": 0.0774, "lr": 1.6648355511740876e-05, "epoch": 6.476997578692494, "percentage": 64.77, "elapsed_time": "0:19:46", "remaining_time": "0:10:45", "throughput": 1851.01, "total_tokens": 2195432} |
| {"current_steps": 5355, "total_steps": 8260, "loss": 0.0263, "lr": 1.659858425102902e-05, "epoch": 6.483050847457627, "percentage": 64.83, "elapsed_time": "0:19:46", "remaining_time": "0:10:43", "throughput": 1851.13, "total_tokens": 2197288} |
| {"current_steps": 5360, "total_steps": 8260, "loss": 0.0064, "lr": 1.6548850500307772e-05, "epoch": 6.4891041162227605, "percentage": 64.89, "elapsed_time": "0:19:48", "remaining_time": "0:10:42", "throughput": 1851.29, "total_tokens": 2199368} |
| {"current_steps": 5365, "total_steps": 8260, "loss": 0.1231, "lr": 1.649915448162455e-05, "epoch": 6.495157384987894, "percentage": 64.95, "elapsed_time": "0:19:49", "remaining_time": "0:10:41", "throughput": 1851.58, "total_tokens": 2201640} |
| {"current_steps": 5369, "total_steps": 8260, "eval_loss": 0.13233190774917603, "epoch": 6.5, "percentage": 65.0, "elapsed_time": "0:19:57", "remaining_time": "0:10:45", "throughput": 1839.17, "total_tokens": 2203208} |
| {"current_steps": 5370, "total_steps": 8260, "loss": 0.1258, "lr": 1.6449496416858284e-05, "epoch": 6.501210653753026, "percentage": 65.01, "elapsed_time": "0:19:59", "remaining_time": "0:10:45", "throughput": 1836.36, "total_tokens": 2203592} |
| {"current_steps": 5375, "total_steps": 8260, "loss": 0.0443, "lr": 1.6399876527718456e-05, "epoch": 6.50726392251816, "percentage": 65.07, "elapsed_time": "0:20:00", "remaining_time": "0:10:44", "throughput": 1836.55, "total_tokens": 2205640} |
| {"current_steps": 5380, "total_steps": 8260, "loss": 0.0818, "lr": 1.6350295035744094e-05, "epoch": 6.513317191283293, "percentage": 65.13, "elapsed_time": "0:20:01", "remaining_time": "0:10:43", "throughput": 1836.79, "total_tokens": 2207752} |
| {"current_steps": 5385, "total_steps": 8260, "loss": 0.0655, "lr": 1.6300752162302822e-05, "epoch": 6.519370460048426, "percentage": 65.19, "elapsed_time": "0:20:02", "remaining_time": "0:10:42", "throughput": 1837.0, "total_tokens": 2209864} |
| {"current_steps": 5390, "total_steps": 8260, "loss": 0.0677, "lr": 1.625124812858982e-05, "epoch": 6.52542372881356, "percentage": 65.25, "elapsed_time": "0:20:03", "remaining_time": "0:10:41", "throughput": 1837.17, "total_tokens": 2211944} |
| {"current_steps": 5395, "total_steps": 8260, "loss": 0.0281, "lr": 1.6201783155626862e-05, "epoch": 6.531476997578692, "percentage": 65.31, "elapsed_time": "0:20:05", "remaining_time": "0:10:39", "throughput": 1837.28, "total_tokens": 2213928} |
| {"current_steps": 5400, "total_steps": 8260, "loss": 0.1077, "lr": 1.615235746426133e-05, "epoch": 6.5375302663438255, "percentage": 65.38, "elapsed_time": "0:20:06", "remaining_time": "0:10:38", "throughput": 1837.42, "total_tokens": 2215944} |
| {"current_steps": 5405, "total_steps": 8260, "loss": 0.0628, "lr": 1.6102971275165228e-05, "epoch": 6.543583535108959, "percentage": 65.44, "elapsed_time": "0:20:06", "remaining_time": "0:10:37", "throughput": 1837.58, "total_tokens": 2217960} |
| {"current_steps": 5410, "total_steps": 8260, "loss": 0.1529, "lr": 1.6053624808834188e-05, "epoch": 6.549636803874092, "percentage": 65.5, "elapsed_time": "0:20:07", "remaining_time": "0:10:36", "throughput": 1837.75, "total_tokens": 2219976} |
| {"current_steps": 5415, "total_steps": 8260, "loss": 0.1528, "lr": 1.6004318285586497e-05, "epoch": 6.5556900726392255, "percentage": 65.56, "elapsed_time": "0:20:08", "remaining_time": "0:10:35", "throughput": 1837.89, "total_tokens": 2221992} |
| {"current_steps": 5420, "total_steps": 8260, "loss": 0.0908, "lr": 1.5955051925562092e-05, "epoch": 6.561743341404358, "percentage": 65.62, "elapsed_time": "0:20:09", "remaining_time": "0:10:34", "throughput": 1838.04, "total_tokens": 2223976} |
| {"current_steps": 5425, "total_steps": 8260, "loss": 0.0599, "lr": 1.590582594872162e-05, "epoch": 6.567796610169491, "percentage": 65.68, "elapsed_time": "0:20:10", "remaining_time": "0:10:32", "throughput": 1838.24, "total_tokens": 2226088} |
| {"current_steps": 5430, "total_steps": 8260, "loss": 0.061, "lr": 1.585664057484539e-05, "epoch": 6.573849878934625, "percentage": 65.74, "elapsed_time": "0:20:11", "remaining_time": "0:10:31", "throughput": 1838.38, "total_tokens": 2228008} |
| {"current_steps": 5435, "total_steps": 8260, "loss": 0.1131, "lr": 1.5807496023532472e-05, "epoch": 6.579903147699758, "percentage": 65.8, "elapsed_time": "0:20:12", "remaining_time": "0:10:30", "throughput": 1838.52, "total_tokens": 2229928} |
| {"current_steps": 5440, "total_steps": 8260, "loss": 0.0372, "lr": 1.5758392514199644e-05, "epoch": 6.585956416464891, "percentage": 65.86, "elapsed_time": "0:20:13", "remaining_time": "0:10:29", "throughput": 1838.67, "total_tokens": 2231912} |
| {"current_steps": 5445, "total_steps": 8260, "loss": 0.0657, "lr": 1.5709330266080446e-05, "epoch": 6.592009685230024, "percentage": 65.92, "elapsed_time": "0:20:14", "remaining_time": "0:10:28", "throughput": 1838.88, "total_tokens": 2233992} |
| {"current_steps": 5450, "total_steps": 8260, "loss": 0.036, "lr": 1.5660309498224225e-05, "epoch": 6.598062953995157, "percentage": 65.98, "elapsed_time": "0:20:15", "remaining_time": "0:10:26", "throughput": 1839.03, "total_tokens": 2235976} |
| {"current_steps": 5455, "total_steps": 8260, "loss": 0.124, "lr": 1.5611330429495096e-05, "epoch": 6.6041162227602905, "percentage": 66.04, "elapsed_time": "0:20:16", "remaining_time": "0:10:25", "throughput": 1839.26, "total_tokens": 2238088} |
| {"current_steps": 5460, "total_steps": 8260, "loss": 0.1365, "lr": 1.556239327857101e-05, "epoch": 6.610169491525424, "percentage": 66.1, "elapsed_time": "0:20:17", "remaining_time": "0:10:24", "throughput": 1839.41, "total_tokens": 2240136} |
| {"current_steps": 5465, "total_steps": 8260, "loss": 0.1054, "lr": 1.551349826394278e-05, "epoch": 6.616222760290557, "percentage": 66.16, "elapsed_time": "0:20:18", "remaining_time": "0:10:23", "throughput": 1839.64, "total_tokens": 2242280} |
| {"current_steps": 5470, "total_steps": 8260, "loss": 0.0485, "lr": 1.5464645603913066e-05, "epoch": 6.622276029055691, "percentage": 66.22, "elapsed_time": "0:20:19", "remaining_time": "0:10:22", "throughput": 1839.8, "total_tokens": 2244360} |
| {"current_steps": 5475, "total_steps": 8260, "loss": 0.0307, "lr": 1.5415835516595465e-05, "epoch": 6.628329297820823, "percentage": 66.28, "elapsed_time": "0:20:20", "remaining_time": "0:10:21", "throughput": 1840.05, "total_tokens": 2246536} |
| {"current_steps": 5480, "total_steps": 8260, "loss": 0.0478, "lr": 1.5367068219913456e-05, "epoch": 6.634382566585956, "percentage": 66.34, "elapsed_time": "0:20:21", "remaining_time": "0:10:19", "throughput": 1840.23, "total_tokens": 2248616} |
| {"current_steps": 5485, "total_steps": 8260, "loss": 0.0045, "lr": 1.5318343931599503e-05, "epoch": 6.64043583535109, "percentage": 66.4, "elapsed_time": "0:20:22", "remaining_time": "0:10:18", "throughput": 1840.41, "total_tokens": 2250664} |
| {"current_steps": 5490, "total_steps": 8260, "loss": 0.0626, "lr": 1.5269662869194036e-05, "epoch": 6.646489104116223, "percentage": 66.46, "elapsed_time": "0:20:23", "remaining_time": "0:10:17", "throughput": 1840.62, "total_tokens": 2252808} |
| {"current_steps": 5495, "total_steps": 8260, "loss": 0.0159, "lr": 1.5221025250044486e-05, "epoch": 6.652542372881356, "percentage": 66.53, "elapsed_time": "0:20:24", "remaining_time": "0:10:16", "throughput": 1840.85, "total_tokens": 2254984} |
| {"current_steps": 5500, "total_steps": 8260, "loss": 0.0214, "lr": 1.517243129130433e-05, "epoch": 6.658595641646489, "percentage": 66.59, "elapsed_time": "0:20:25", "remaining_time": "0:10:15", "throughput": 1841.03, "total_tokens": 2257064} |
| {"current_steps": 5505, "total_steps": 8260, "loss": 0.0236, "lr": 1.512388120993212e-05, "epoch": 6.664648910411622, "percentage": 66.65, "elapsed_time": "0:20:26", "remaining_time": "0:10:14", "throughput": 1841.21, "total_tokens": 2259112} |
| {"current_steps": 5510, "total_steps": 8260, "loss": 0.0486, "lr": 1.5075375222690496e-05, "epoch": 6.670702179176756, "percentage": 66.71, "elapsed_time": "0:20:27", "remaining_time": "0:10:12", "throughput": 1841.41, "total_tokens": 2261224} |
| {"current_steps": 5515, "total_steps": 8260, "loss": 0.0717, "lr": 1.5026913546145232e-05, "epoch": 6.676755447941889, "percentage": 66.77, "elapsed_time": "0:20:29", "remaining_time": "0:10:11", "throughput": 1841.65, "total_tokens": 2263400} |
| {"current_steps": 5520, "total_steps": 8260, "loss": 0.0036, "lr": 1.4978496396664279e-05, "epoch": 6.682808716707022, "percentage": 66.83, "elapsed_time": "0:20:30", "remaining_time": "0:10:10", "throughput": 1841.78, "total_tokens": 2265416} |
| {"current_steps": 5525, "total_steps": 8260, "loss": 0.1315, "lr": 1.4930123990416766e-05, "epoch": 6.688861985472155, "percentage": 66.89, "elapsed_time": "0:20:31", "remaining_time": "0:10:09", "throughput": 1842.01, "total_tokens": 2267560} |
| {"current_steps": 5530, "total_steps": 8260, "loss": 0.0219, "lr": 1.4881796543372079e-05, "epoch": 6.694915254237288, "percentage": 66.95, "elapsed_time": "0:20:32", "remaining_time": "0:10:08", "throughput": 1842.15, "total_tokens": 2269544} |
| {"current_steps": 5535, "total_steps": 8260, "loss": 0.0735, "lr": 1.4833514271298859e-05, "epoch": 6.700968523002421, "percentage": 67.01, "elapsed_time": "0:20:33", "remaining_time": "0:10:07", "throughput": 1842.38, "total_tokens": 2271720} |
| {"current_steps": 5540, "total_steps": 8260, "loss": 0.0039, "lr": 1.4785277389764046e-05, "epoch": 6.707021791767555, "percentage": 67.07, "elapsed_time": "0:20:34", "remaining_time": "0:10:05", "throughput": 1842.61, "total_tokens": 2273928} |
| {"current_steps": 5545, "total_steps": 8260, "loss": 0.0475, "lr": 1.4737086114131943e-05, "epoch": 6.713075060532688, "percentage": 67.13, "elapsed_time": "0:20:35", "remaining_time": "0:10:04", "throughput": 1842.74, "total_tokens": 2275912} |
| {"current_steps": 5550, "total_steps": 8260, "loss": 0.0317, "lr": 1.4688940659563225e-05, "epoch": 6.719128329297821, "percentage": 67.19, "elapsed_time": "0:20:36", "remaining_time": "0:10:03", "throughput": 1842.94, "total_tokens": 2278024} |
| {"current_steps": 5555, "total_steps": 8260, "loss": 0.0064, "lr": 1.4640841241013995e-05, "epoch": 6.725181598062954, "percentage": 67.25, "elapsed_time": "0:20:37", "remaining_time": "0:10:02", "throughput": 1843.13, "total_tokens": 2280136} |
| {"current_steps": 5560, "total_steps": 8260, "loss": 0.0175, "lr": 1.4592788073234803e-05, "epoch": 6.731234866828087, "percentage": 67.31, "elapsed_time": "0:20:38", "remaining_time": "0:10:01", "throughput": 1843.31, "total_tokens": 2282248} |
| {"current_steps": 5565, "total_steps": 8260, "loss": 0.0438, "lr": 1.4544781370769723e-05, "epoch": 6.737288135593221, "percentage": 67.37, "elapsed_time": "0:20:39", "remaining_time": "0:10:00", "throughput": 1843.54, "total_tokens": 2284424} |
| {"current_steps": 5570, "total_steps": 8260, "loss": 0.0216, "lr": 1.4496821347955359e-05, "epoch": 6.743341404358354, "percentage": 67.43, "elapsed_time": "0:20:40", "remaining_time": "0:09:58", "throughput": 1843.63, "total_tokens": 2286344} |
| {"current_steps": 5575, "total_steps": 8260, "loss": 0.1289, "lr": 1.444890821891991e-05, "epoch": 6.749394673123486, "percentage": 67.49, "elapsed_time": "0:20:41", "remaining_time": "0:09:57", "throughput": 1843.8, "total_tokens": 2288456} |
| {"current_steps": 5580, "total_steps": 8260, "loss": 0.0283, "lr": 1.4401042197582193e-05, "epoch": 6.75544794188862, "percentage": 67.55, "elapsed_time": "0:20:42", "remaining_time": "0:09:56", "throughput": 1843.88, "total_tokens": 2290312} |
| {"current_steps": 5585, "total_steps": 8260, "loss": 0.0387, "lr": 1.4353223497650731e-05, "epoch": 6.761501210653753, "percentage": 67.62, "elapsed_time": "0:20:43", "remaining_time": "0:09:55", "throughput": 1844.09, "total_tokens": 2292392} |
| {"current_steps": 5590, "total_steps": 8260, "loss": 0.1823, "lr": 1.4305452332622748e-05, "epoch": 6.767554479418886, "percentage": 67.68, "elapsed_time": "0:20:44", "remaining_time": "0:09:54", "throughput": 1844.26, "total_tokens": 2294472} |
| {"current_steps": 5595, "total_steps": 8260, "loss": 0.0079, "lr": 1.4257728915783244e-05, "epoch": 6.77360774818402, "percentage": 67.74, "elapsed_time": "0:20:45", "remaining_time": "0:09:53", "throughput": 1844.46, "total_tokens": 2296616} |
| {"current_steps": 5600, "total_steps": 8260, "loss": 0.1444, "lr": 1.4210053460204023e-05, "epoch": 6.779661016949152, "percentage": 67.8, "elapsed_time": "0:20:46", "remaining_time": "0:09:51", "throughput": 1844.73, "total_tokens": 2298856} |
| {"current_steps": 5605, "total_steps": 8260, "loss": 0.0991, "lr": 1.4162426178742788e-05, "epoch": 6.785714285714286, "percentage": 67.86, "elapsed_time": "0:20:47", "remaining_time": "0:09:50", "throughput": 1844.97, "total_tokens": 2301064} |
| {"current_steps": 5610, "total_steps": 8260, "loss": 0.3319, "lr": 1.4114847284042132e-05, "epoch": 6.791767554479419, "percentage": 67.92, "elapsed_time": "0:20:48", "remaining_time": "0:09:49", "throughput": 1845.18, "total_tokens": 2303208} |
| {"current_steps": 5615, "total_steps": 8260, "loss": 0.0937, "lr": 1.4067316988528617e-05, "epoch": 6.797820823244552, "percentage": 67.98, "elapsed_time": "0:20:49", "remaining_time": "0:09:48", "throughput": 1845.37, "total_tokens": 2305288} |
| {"current_steps": 5620, "total_steps": 8260, "loss": 0.0307, "lr": 1.4019835504411827e-05, "epoch": 6.803874092009686, "percentage": 68.04, "elapsed_time": "0:20:50", "remaining_time": "0:09:47", "throughput": 1845.52, "total_tokens": 2307304} |
| {"current_steps": 5625, "total_steps": 8260, "loss": 0.0122, "lr": 1.3972403043683419e-05, "epoch": 6.809927360774818, "percentage": 68.1, "elapsed_time": "0:20:51", "remaining_time": "0:09:46", "throughput": 1845.73, "total_tokens": 2309448} |
| {"current_steps": 5630, "total_steps": 8260, "loss": 0.071, "lr": 1.3925019818116164e-05, "epoch": 6.815980629539951, "percentage": 68.16, "elapsed_time": "0:20:52", "remaining_time": "0:09:44", "throughput": 1845.83, "total_tokens": 2311368} |
| {"current_steps": 5635, "total_steps": 8260, "loss": 0.1829, "lr": 1.387768603926302e-05, "epoch": 6.822033898305085, "percentage": 68.22, "elapsed_time": "0:20:53", "remaining_time": "0:09:43", "throughput": 1846.01, "total_tokens": 2313448} |
| {"current_steps": 5640, "total_steps": 8260, "loss": 0.0612, "lr": 1.383040191845619e-05, "epoch": 6.828087167070218, "percentage": 68.28, "elapsed_time": "0:20:54", "remaining_time": "0:09:42", "throughput": 1846.2, "total_tokens": 2315464} |
| {"current_steps": 5645, "total_steps": 8260, "loss": 0.1428, "lr": 1.378316766680615e-05, "epoch": 6.8341404358353515, "percentage": 68.34, "elapsed_time": "0:20:55", "remaining_time": "0:09:41", "throughput": 1846.32, "total_tokens": 2317480} |
| {"current_steps": 5650, "total_steps": 8260, "loss": 0.0447, "lr": 1.373598349520073e-05, "epoch": 6.840193704600484, "percentage": 68.4, "elapsed_time": "0:20:56", "remaining_time": "0:09:40", "throughput": 1846.44, "total_tokens": 2319432} |
| {"current_steps": 5655, "total_steps": 8260, "loss": 0.1719, "lr": 1.3688849614304164e-05, "epoch": 6.846246973365617, "percentage": 68.46, "elapsed_time": "0:20:57", "remaining_time": "0:09:39", "throughput": 1846.61, "total_tokens": 2321512} |
| {"current_steps": 5660, "total_steps": 8260, "loss": 0.1285, "lr": 1.3641766234556146e-05, "epoch": 6.852300242130751, "percentage": 68.52, "elapsed_time": "0:20:58", "remaining_time": "0:09:37", "throughput": 1846.82, "total_tokens": 2323560} |
| {"current_steps": 5665, "total_steps": 8260, "loss": 0.0205, "lr": 1.3594733566170926e-05, "epoch": 6.858353510895884, "percentage": 68.58, "elapsed_time": "0:20:59", "remaining_time": "0:09:36", "throughput": 1846.94, "total_tokens": 2325576} |
| {"current_steps": 5670, "total_steps": 8260, "loss": 0.034, "lr": 1.3547751819136309e-05, "epoch": 6.864406779661017, "percentage": 68.64, "elapsed_time": "0:21:00", "remaining_time": "0:09:35", "throughput": 1847.06, "total_tokens": 2327496} |
| {"current_steps": 5675, "total_steps": 8260, "loss": 0.0935, "lr": 1.350082120321276e-05, "epoch": 6.87046004842615, "percentage": 68.7, "elapsed_time": "0:21:01", "remaining_time": "0:09:34", "throughput": 1847.22, "total_tokens": 2329576} |
| {"current_steps": 5680, "total_steps": 8260, "loss": 0.1601, "lr": 1.3453941927932456e-05, "epoch": 6.876513317191283, "percentage": 68.77, "elapsed_time": "0:21:02", "remaining_time": "0:09:33", "throughput": 1847.39, "total_tokens": 2331656} |
| {"current_steps": 5685, "total_steps": 8260, "loss": 0.1157, "lr": 1.3407114202598369e-05, "epoch": 6.8825665859564165, "percentage": 68.83, "elapsed_time": "0:21:03", "remaining_time": "0:09:32", "throughput": 1847.6, "total_tokens": 2333800} |
| {"current_steps": 5690, "total_steps": 8260, "loss": 0.0098, "lr": 1.3360338236283295e-05, "epoch": 6.88861985472155, "percentage": 68.89, "elapsed_time": "0:21:04", "remaining_time": "0:09:30", "throughput": 1847.84, "total_tokens": 2336008} |
| {"current_steps": 5695, "total_steps": 8260, "loss": 0.1059, "lr": 1.3313614237828948e-05, "epoch": 6.894673123486683, "percentage": 68.95, "elapsed_time": "0:21:05", "remaining_time": "0:09:29", "throughput": 1847.97, "total_tokens": 2337928} |
| {"current_steps": 5700, "total_steps": 8260, "loss": 0.1009, "lr": 1.3266942415845018e-05, "epoch": 6.900726392251816, "percentage": 69.01, "elapsed_time": "0:21:06", "remaining_time": "0:09:28", "throughput": 1848.14, "total_tokens": 2340008} |
| {"current_steps": 5705, "total_steps": 8260, "loss": 0.1949, "lr": 1.3220322978708242e-05, "epoch": 6.906779661016949, "percentage": 69.07, "elapsed_time": "0:21:07", "remaining_time": "0:09:27", "throughput": 1848.26, "total_tokens": 2342024} |
| {"current_steps": 5710, "total_steps": 8260, "loss": 0.0282, "lr": 1.317375613456147e-05, "epoch": 6.912832929782082, "percentage": 69.13, "elapsed_time": "0:21:08", "remaining_time": "0:09:26", "throughput": 1848.41, "total_tokens": 2343976} |
| {"current_steps": 5715, "total_steps": 8260, "loss": 0.0122, "lr": 1.3127242091312752e-05, "epoch": 6.918886198547216, "percentage": 69.19, "elapsed_time": "0:21:09", "remaining_time": "0:09:25", "throughput": 1848.49, "total_tokens": 2345928} |
| {"current_steps": 5720, "total_steps": 8260, "loss": 0.2, "lr": 1.3080781056634373e-05, "epoch": 6.924939467312349, "percentage": 69.25, "elapsed_time": "0:21:10", "remaining_time": "0:09:23", "throughput": 1848.67, "total_tokens": 2347976} |
| {"current_steps": 5725, "total_steps": 8260, "loss": 0.0858, "lr": 1.3034373237961983e-05, "epoch": 6.9309927360774815, "percentage": 69.31, "elapsed_time": "0:21:11", "remaining_time": "0:09:22", "throughput": 1848.79, "total_tokens": 2349992} |
| {"current_steps": 5730, "total_steps": 8260, "loss": 0.0573, "lr": 1.2988018842493604e-05, "epoch": 6.937046004842615, "percentage": 69.37, "elapsed_time": "0:21:12", "remaining_time": "0:09:21", "throughput": 1848.92, "total_tokens": 2351976} |
| {"current_steps": 5735, "total_steps": 8260, "loss": 0.0364, "lr": 1.2941718077188758e-05, "epoch": 6.943099273607748, "percentage": 69.43, "elapsed_time": "0:21:13", "remaining_time": "0:09:20", "throughput": 1849.08, "total_tokens": 2353992} |
| {"current_steps": 5740, "total_steps": 8260, "loss": 0.1017, "lr": 1.2895471148767508e-05, "epoch": 6.9491525423728815, "percentage": 69.49, "elapsed_time": "0:21:14", "remaining_time": "0:09:19", "throughput": 1849.21, "total_tokens": 2355976} |
| {"current_steps": 5745, "total_steps": 8260, "loss": 0.0724, "lr": 1.2849278263709572e-05, "epoch": 6.955205811138015, "percentage": 69.55, "elapsed_time": "0:21:15", "remaining_time": "0:09:18", "throughput": 1849.41, "total_tokens": 2358152} |
| {"current_steps": 5750, "total_steps": 8260, "loss": 0.0674, "lr": 1.2803139628253364e-05, "epoch": 6.961259079903147, "percentage": 69.61, "elapsed_time": "0:21:16", "remaining_time": "0:09:17", "throughput": 1849.48, "total_tokens": 2360040} |
| {"current_steps": 5755, "total_steps": 8260, "loss": 0.0308, "lr": 1.2757055448395092e-05, "epoch": 6.967312348668281, "percentage": 69.67, "elapsed_time": "0:21:17", "remaining_time": "0:09:15", "throughput": 1849.6, "total_tokens": 2362056} |
| {"current_steps": 5760, "total_steps": 8260, "loss": 0.1565, "lr": 1.271102592988782e-05, "epoch": 6.973365617433414, "percentage": 69.73, "elapsed_time": "0:21:18", "remaining_time": "0:09:14", "throughput": 1849.7, "total_tokens": 2364008} |
| {"current_steps": 5765, "total_steps": 8260, "loss": 0.0585, "lr": 1.2665051278240602e-05, "epoch": 6.979418886198547, "percentage": 69.79, "elapsed_time": "0:21:19", "remaining_time": "0:09:13", "throughput": 1849.84, "total_tokens": 2366056} |
| {"current_steps": 5770, "total_steps": 8260, "loss": 0.059, "lr": 1.2619131698717504e-05, "epoch": 6.985472154963681, "percentage": 69.85, "elapsed_time": "0:21:20", "remaining_time": "0:09:12", "throughput": 1849.95, "total_tokens": 2368072} |
| {"current_steps": 5775, "total_steps": 8260, "loss": 0.0454, "lr": 1.2573267396336686e-05, "epoch": 6.991525423728813, "percentage": 69.92, "elapsed_time": "0:21:21", "remaining_time": "0:09:11", "throughput": 1850.06, "total_tokens": 2370024} |
| {"current_steps": 5780, "total_steps": 8260, "loss": 0.0422, "lr": 1.2527458575869539e-05, "epoch": 6.9975786924939465, "percentage": 69.98, "elapsed_time": "0:21:22", "remaining_time": "0:09:10", "throughput": 1850.19, "total_tokens": 2372008} |
| {"current_steps": 5782, "total_steps": 8260, "eval_loss": 0.11770041286945343, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:21:30", "remaining_time": "0:09:13", "throughput": 1838.21, "total_tokens": 2372464} |
| {"current_steps": 5785, "total_steps": 8260, "loss": 0.2173, "lr": 1.2481705441839756e-05, "epoch": 7.00363196125908, "percentage": 70.04, "elapsed_time": "0:21:32", "remaining_time": "0:09:13", "throughput": 1836.09, "total_tokens": 2373648} |
| {"current_steps": 5790, "total_steps": 8260, "loss": 0.0532, "lr": 1.2436008198522376e-05, "epoch": 7.009685230024213, "percentage": 70.1, "elapsed_time": "0:21:33", "remaining_time": "0:09:11", "throughput": 1836.27, "total_tokens": 2375696} |
| {"current_steps": 5795, "total_steps": 8260, "loss": 0.0311, "lr": 1.2390367049942916e-05, "epoch": 7.0157384987893465, "percentage": 70.16, "elapsed_time": "0:21:34", "remaining_time": "0:09:10", "throughput": 1836.53, "total_tokens": 2377872} |
| {"current_steps": 5800, "total_steps": 8260, "loss": 0.0193, "lr": 1.2344782199876431e-05, "epoch": 7.021791767554479, "percentage": 70.22, "elapsed_time": "0:21:35", "remaining_time": "0:09:09", "throughput": 1836.63, "total_tokens": 2379792} |
| {"current_steps": 5805, "total_steps": 8260, "loss": 0.0572, "lr": 1.2299253851846651e-05, "epoch": 7.027845036319612, "percentage": 70.28, "elapsed_time": "0:21:36", "remaining_time": "0:09:08", "throughput": 1836.75, "total_tokens": 2381744} |
| {"current_steps": 5810, "total_steps": 8260, "loss": 0.126, "lr": 1.2253782209125012e-05, "epoch": 7.033898305084746, "percentage": 70.34, "elapsed_time": "0:21:37", "remaining_time": "0:09:07", "throughput": 1836.85, "total_tokens": 2383664} |
| {"current_steps": 5815, "total_steps": 8260, "loss": 0.0298, "lr": 1.220836747472978e-05, "epoch": 7.039951573849879, "percentage": 70.4, "elapsed_time": "0:21:38", "remaining_time": "0:09:06", "throughput": 1837.02, "total_tokens": 2385744} |
| {"current_steps": 5820, "total_steps": 8260, "loss": 0.1225, "lr": 1.2163009851425137e-05, "epoch": 7.046004842615012, "percentage": 70.46, "elapsed_time": "0:21:39", "remaining_time": "0:09:04", "throughput": 1837.28, "total_tokens": 2387920} |
| {"current_steps": 5825, "total_steps": 8260, "loss": 0.0279, "lr": 1.2117709541720306e-05, "epoch": 7.052058111380146, "percentage": 70.52, "elapsed_time": "0:21:40", "remaining_time": "0:09:03", "throughput": 1837.48, "total_tokens": 2390000} |
| {"current_steps": 5830, "total_steps": 8260, "loss": 0.0134, "lr": 1.2072466747868597e-05, "epoch": 7.058111380145278, "percentage": 70.58, "elapsed_time": "0:21:41", "remaining_time": "0:09:02", "throughput": 1837.64, "total_tokens": 2392016} |
| {"current_steps": 5835, "total_steps": 8260, "loss": 0.0261, "lr": 1.2027281671866531e-05, "epoch": 7.0641646489104115, "percentage": 70.64, "elapsed_time": "0:21:42", "remaining_time": "0:09:01", "throughput": 1837.75, "total_tokens": 2394032} |
| {"current_steps": 5840, "total_steps": 8260, "loss": 0.0877, "lr": 1.198215451545293e-05, "epoch": 7.070217917675545, "percentage": 70.7, "elapsed_time": "0:21:43", "remaining_time": "0:09:00", "throughput": 1837.84, "total_tokens": 2395888} |
| {"current_steps": 5845, "total_steps": 8260, "loss": 0.0427, "lr": 1.193708548010804e-05, "epoch": 7.076271186440678, "percentage": 70.76, "elapsed_time": "0:21:44", "remaining_time": "0:08:59", "throughput": 1838.08, "total_tokens": 2398032} |
| {"current_steps": 5850, "total_steps": 8260, "loss": 0.0322, "lr": 1.1892074767052611e-05, "epoch": 7.0823244552058116, "percentage": 70.82, "elapsed_time": "0:21:45", "remaining_time": "0:08:57", "throughput": 1838.2, "total_tokens": 2400016} |
| {"current_steps": 5855, "total_steps": 8260, "loss": 0.0335, "lr": 1.1847122577246964e-05, "epoch": 7.088377723970944, "percentage": 70.88, "elapsed_time": "0:21:46", "remaining_time": "0:08:56", "throughput": 1838.36, "total_tokens": 2402032} |
| {"current_steps": 5860, "total_steps": 8260, "loss": 0.0076, "lr": 1.1802229111390157e-05, "epoch": 7.094430992736077, "percentage": 70.94, "elapsed_time": "0:21:47", "remaining_time": "0:08:55", "throughput": 1838.51, "total_tokens": 2404048} |
| {"current_steps": 5865, "total_steps": 8260, "loss": 0.1126, "lr": 1.175739456991908e-05, "epoch": 7.100484261501211, "percentage": 71.0, "elapsed_time": "0:21:48", "remaining_time": "0:08:54", "throughput": 1838.71, "total_tokens": 2406128} |
| {"current_steps": 5870, "total_steps": 8260, "loss": 0.0135, "lr": 1.1712619153007517e-05, "epoch": 7.106537530266344, "percentage": 71.07, "elapsed_time": "0:21:49", "remaining_time": "0:08:53", "throughput": 1838.84, "total_tokens": 2408112} |
| {"current_steps": 5875, "total_steps": 8260, "loss": 0.041, "lr": 1.166790306056528e-05, "epoch": 7.112590799031477, "percentage": 71.13, "elapsed_time": "0:21:50", "remaining_time": "0:08:52", "throughput": 1838.99, "total_tokens": 2410160} |
| {"current_steps": 5880, "total_steps": 8260, "loss": 0.0249, "lr": 1.1623246492237305e-05, "epoch": 7.11864406779661, "percentage": 71.19, "elapsed_time": "0:21:51", "remaining_time": "0:08:50", "throughput": 1839.13, "total_tokens": 2412112} |
| {"current_steps": 5885, "total_steps": 8260, "loss": 0.0759, "lr": 1.1578649647402806e-05, "epoch": 7.124697336561743, "percentage": 71.25, "elapsed_time": "0:21:52", "remaining_time": "0:08:49", "throughput": 1839.32, "total_tokens": 2414224} |
| {"current_steps": 5890, "total_steps": 8260, "loss": 0.0931, "lr": 1.1534112725174306e-05, "epoch": 7.1307506053268765, "percentage": 71.31, "elapsed_time": "0:21:53", "remaining_time": "0:08:48", "throughput": 1839.46, "total_tokens": 2416272} |
| {"current_steps": 5895, "total_steps": 8260, "loss": 0.1162, "lr": 1.1489635924396817e-05, "epoch": 7.13680387409201, "percentage": 71.37, "elapsed_time": "0:21:54", "remaining_time": "0:08:47", "throughput": 1839.61, "total_tokens": 2418224} |
| {"current_steps": 5900, "total_steps": 8260, "loss": 0.0084, "lr": 1.1445219443646896e-05, "epoch": 7.142857142857143, "percentage": 71.43, "elapsed_time": "0:21:55", "remaining_time": "0:08:46", "throughput": 1839.79, "total_tokens": 2420336} |
| {"current_steps": 5905, "total_steps": 8260, "loss": 0.0374, "lr": 1.1400863481231833e-05, "epoch": 7.148910411622276, "percentage": 71.49, "elapsed_time": "0:21:56", "remaining_time": "0:08:45", "throughput": 1839.94, "total_tokens": 2422288} |
| {"current_steps": 5910, "total_steps": 8260, "loss": 0.1008, "lr": 1.1356568235188682e-05, "epoch": 7.154963680387409, "percentage": 71.55, "elapsed_time": "0:21:57", "remaining_time": "0:08:43", "throughput": 1840.16, "total_tokens": 2424400} |
| {"current_steps": 5915, "total_steps": 8260, "loss": 0.1296, "lr": 1.1312333903283435e-05, "epoch": 7.161016949152542, "percentage": 71.61, "elapsed_time": "0:21:58", "remaining_time": "0:08:42", "throughput": 1840.29, "total_tokens": 2426384} |
| {"current_steps": 5920, "total_steps": 8260, "loss": 0.0351, "lr": 1.1268160683010096e-05, "epoch": 7.167070217917676, "percentage": 71.67, "elapsed_time": "0:21:59", "remaining_time": "0:08:41", "throughput": 1840.45, "total_tokens": 2428400} |
| {"current_steps": 5925, "total_steps": 8260, "loss": 0.0166, "lr": 1.122404877158986e-05, "epoch": 7.173123486682809, "percentage": 71.73, "elapsed_time": "0:22:00", "remaining_time": "0:08:40", "throughput": 1840.6, "total_tokens": 2430416} |
| {"current_steps": 5930, "total_steps": 8260, "loss": 0.0506, "lr": 1.1179998365970174e-05, "epoch": 7.1791767554479415, "percentage": 71.79, "elapsed_time": "0:22:01", "remaining_time": "0:08:39", "throughput": 1840.77, "total_tokens": 2432464} |
| {"current_steps": 5935, "total_steps": 8260, "loss": 0.0929, "lr": 1.113600966282386e-05, "epoch": 7.185230024213075, "percentage": 71.85, "elapsed_time": "0:22:02", "remaining_time": "0:08:38", "throughput": 1840.89, "total_tokens": 2434480} |
| {"current_steps": 5940, "total_steps": 8260, "loss": 0.0487, "lr": 1.1092082858548275e-05, "epoch": 7.191283292978208, "percentage": 71.91, "elapsed_time": "0:22:03", "remaining_time": "0:08:36", "throughput": 1841.0, "total_tokens": 2436496} |
| {"current_steps": 5945, "total_steps": 8260, "loss": 0.0208, "lr": 1.1048218149264434e-05, "epoch": 7.197336561743342, "percentage": 71.97, "elapsed_time": "0:22:04", "remaining_time": "0:08:35", "throughput": 1841.14, "total_tokens": 2438544} |
| {"current_steps": 5950, "total_steps": 8260, "loss": 0.043, "lr": 1.1004415730816083e-05, "epoch": 7.203389830508475, "percentage": 72.03, "elapsed_time": "0:22:05", "remaining_time": "0:08:34", "throughput": 1841.27, "total_tokens": 2440528} |
| {"current_steps": 5955, "total_steps": 8260, "loss": 0.156, "lr": 1.0960675798768871e-05, "epoch": 7.209443099273607, "percentage": 72.09, "elapsed_time": "0:22:06", "remaining_time": "0:08:33", "throughput": 1841.4, "total_tokens": 2442576} |
| {"current_steps": 5960, "total_steps": 8260, "loss": 0.0196, "lr": 1.0916998548409449e-05, "epoch": 7.215496368038741, "percentage": 72.15, "elapsed_time": "0:22:07", "remaining_time": "0:08:32", "throughput": 1841.54, "total_tokens": 2444560} |
| {"current_steps": 5965, "total_steps": 8260, "loss": 0.0342, "lr": 1.0873384174744641e-05, "epoch": 7.221549636803874, "percentage": 72.22, "elapsed_time": "0:22:08", "remaining_time": "0:08:31", "throughput": 1841.74, "total_tokens": 2446704} |
| {"current_steps": 5970, "total_steps": 8260, "loss": 0.0866, "lr": 1.0829832872500523e-05, "epoch": 7.227602905569007, "percentage": 72.28, "elapsed_time": "0:22:09", "remaining_time": "0:08:29", "throughput": 1841.89, "total_tokens": 2448720} |
| {"current_steps": 5975, "total_steps": 8260, "loss": 0.1339, "lr": 1.078634483612157e-05, "epoch": 7.233656174334141, "percentage": 72.34, "elapsed_time": "0:22:10", "remaining_time": "0:08:28", "throughput": 1842.06, "total_tokens": 2450832} |
| {"current_steps": 5980, "total_steps": 8260, "loss": 0.0033, "lr": 1.0742920259769792e-05, "epoch": 7.239709443099273, "percentage": 72.4, "elapsed_time": "0:22:11", "remaining_time": "0:08:27", "throughput": 1842.18, "total_tokens": 2452752} |
| {"current_steps": 5985, "total_steps": 8260, "loss": 0.0846, "lr": 1.06995593373239e-05, "epoch": 7.245762711864407, "percentage": 72.46, "elapsed_time": "0:22:12", "remaining_time": "0:08:26", "throughput": 1842.34, "total_tokens": 2454832} |
| {"current_steps": 5990, "total_steps": 8260, "loss": 0.0949, "lr": 1.0656262262378367e-05, "epoch": 7.25181598062954, "percentage": 72.52, "elapsed_time": "0:22:13", "remaining_time": "0:08:25", "throughput": 1842.45, "total_tokens": 2456848} |
| {"current_steps": 5995, "total_steps": 8260, "loss": 0.0253, "lr": 1.0613029228242627e-05, "epoch": 7.257869249394673, "percentage": 72.58, "elapsed_time": "0:22:14", "remaining_time": "0:08:24", "throughput": 1842.68, "total_tokens": 2459024} |
| {"current_steps": 6000, "total_steps": 8260, "loss": 0.1017, "lr": 1.0569860427940179e-05, "epoch": 7.263922518159807, "percentage": 72.64, "elapsed_time": "0:22:15", "remaining_time": "0:08:23", "throughput": 1842.88, "total_tokens": 2461168} |
| {"current_steps": 6005, "total_steps": 8260, "loss": 0.0059, "lr": 1.0526756054207737e-05, "epoch": 7.269975786924939, "percentage": 72.7, "elapsed_time": "0:22:16", "remaining_time": "0:08:21", "throughput": 1842.99, "total_tokens": 2463120} |
| {"current_steps": 6010, "total_steps": 8260, "loss": 0.1278, "lr": 1.0483716299494392e-05, "epoch": 7.276029055690072, "percentage": 72.76, "elapsed_time": "0:22:17", "remaining_time": "0:08:20", "throughput": 1843.16, "total_tokens": 2465168} |
| {"current_steps": 6015, "total_steps": 8260, "loss": 0.0719, "lr": 1.044074135596069e-05, "epoch": 7.282082324455206, "percentage": 72.82, "elapsed_time": "0:22:18", "remaining_time": "0:08:19", "throughput": 1843.32, "total_tokens": 2467248} |
| {"current_steps": 6020, "total_steps": 8260, "loss": 0.11, "lr": 1.0397831415477823e-05, "epoch": 7.288135593220339, "percentage": 72.88, "elapsed_time": "0:22:19", "remaining_time": "0:08:18", "throughput": 1843.42, "total_tokens": 2469200} |
| {"current_steps": 6025, "total_steps": 8260, "loss": 0.0129, "lr": 1.0354986669626796e-05, "epoch": 7.2941888619854724, "percentage": 72.94, "elapsed_time": "0:22:20", "remaining_time": "0:08:17", "throughput": 1843.6, "total_tokens": 2471312} |
| {"current_steps": 6030, "total_steps": 8260, "loss": 0.0715, "lr": 1.0312207309697502e-05, "epoch": 7.300242130750606, "percentage": 73.0, "elapsed_time": "0:22:21", "remaining_time": "0:08:16", "throughput": 1843.77, "total_tokens": 2473424} |
| {"current_steps": 6035, "total_steps": 8260, "loss": 0.0975, "lr": 1.0269493526687915e-05, "epoch": 7.306295399515738, "percentage": 73.06, "elapsed_time": "0:22:22", "remaining_time": "0:08:14", "throughput": 1843.94, "total_tokens": 2475504} |
| {"current_steps": 6040, "total_steps": 8260, "loss": 0.0636, "lr": 1.0226845511303219e-05, "epoch": 7.312348668280872, "percentage": 73.12, "elapsed_time": "0:22:23", "remaining_time": "0:08:13", "throughput": 1844.04, "total_tokens": 2477392} |
| {"current_steps": 6045, "total_steps": 8260, "loss": 0.0268, "lr": 1.0184263453954988e-05, "epoch": 7.318401937046005, "percentage": 73.18, "elapsed_time": "0:22:24", "remaining_time": "0:08:12", "throughput": 1844.21, "total_tokens": 2479504} |
| {"current_steps": 6050, "total_steps": 8260, "loss": 0.116, "lr": 1.0141747544760285e-05, "epoch": 7.324455205811138, "percentage": 73.24, "elapsed_time": "0:22:25", "remaining_time": "0:08:11", "throughput": 1844.4, "total_tokens": 2481584} |
| {"current_steps": 6055, "total_steps": 8260, "loss": 0.0893, "lr": 1.0099297973540852e-05, "epoch": 7.330508474576272, "percentage": 73.31, "elapsed_time": "0:22:26", "remaining_time": "0:08:10", "throughput": 1844.54, "total_tokens": 2483632} |
| {"current_steps": 6060, "total_steps": 8260, "loss": 0.0928, "lr": 1.0056914929822248e-05, "epoch": 7.336561743341404, "percentage": 73.37, "elapsed_time": "0:22:27", "remaining_time": "0:08:09", "throughput": 1844.7, "total_tokens": 2485680} |
| {"current_steps": 6065, "total_steps": 8260, "loss": 0.1178, "lr": 1.0014598602832995e-05, "epoch": 7.342615012106537, "percentage": 73.43, "elapsed_time": "0:22:28", "remaining_time": "0:08:08", "throughput": 1844.89, "total_tokens": 2487824} |
| {"current_steps": 6070, "total_steps": 8260, "loss": 0.0394, "lr": 9.972349181503773e-06, "epoch": 7.348668280871671, "percentage": 73.49, "elapsed_time": "0:22:29", "remaining_time": "0:08:06", "throughput": 1845.05, "total_tokens": 2489872} |
| {"current_steps": 6075, "total_steps": 8260, "loss": 0.1036, "lr": 9.930166854466516e-06, "epoch": 7.354721549636804, "percentage": 73.55, "elapsed_time": "0:22:30", "remaining_time": "0:08:05", "throughput": 1845.24, "total_tokens": 2491888} |
| {"current_steps": 6080, "total_steps": 8260, "loss": 0.054, "lr": 9.888051810053617e-06, "epoch": 7.3607748184019375, "percentage": 73.61, "elapsed_time": "0:22:31", "remaining_time": "0:08:04", "throughput": 1845.39, "total_tokens": 2493968} |
| {"current_steps": 6085, "total_steps": 8260, "loss": 0.0951, "lr": 9.846004236297052e-06, "epoch": 7.36682808716707, "percentage": 73.67, "elapsed_time": "0:22:32", "remaining_time": "0:08:03", "throughput": 1845.54, "total_tokens": 2495920} |
| {"current_steps": 6090, "total_steps": 8260, "loss": 0.0585, "lr": 9.804024320927604e-06, "epoch": 7.372881355932203, "percentage": 73.73, "elapsed_time": "0:22:33", "remaining_time": "0:08:02", "throughput": 1845.72, "total_tokens": 2498000} |
| {"current_steps": 6095, "total_steps": 8260, "loss": 0.0504, "lr": 9.76211225137392e-06, "epoch": 7.378934624697337, "percentage": 73.79, "elapsed_time": "0:22:34", "remaining_time": "0:08:01", "throughput": 1845.82, "total_tokens": 2499952} |
| {"current_steps": 6100, "total_steps": 8260, "loss": 0.0491, "lr": 9.720268214761763e-06, "epoch": 7.38498789346247, "percentage": 73.85, "elapsed_time": "0:22:35", "remaining_time": "0:07:59", "throughput": 1845.97, "total_tokens": 2501968} |
| {"current_steps": 6105, "total_steps": 8260, "loss": 0.0526, "lr": 9.678492397913167e-06, "epoch": 7.391041162227603, "percentage": 73.91, "elapsed_time": "0:22:36", "remaining_time": "0:07:58", "throughput": 1846.12, "total_tokens": 2503984} |
| {"current_steps": 6110, "total_steps": 8260, "loss": 0.0371, "lr": 9.636784987345554e-06, "epoch": 7.397094430992736, "percentage": 73.97, "elapsed_time": "0:22:37", "remaining_time": "0:07:57", "throughput": 1846.24, "total_tokens": 2505968} |
| {"current_steps": 6115, "total_steps": 8260, "loss": 0.0127, "lr": 9.595146169270944e-06, "epoch": 7.403147699757869, "percentage": 74.03, "elapsed_time": "0:22:38", "remaining_time": "0:07:56", "throughput": 1846.42, "total_tokens": 2508080} |
| {"current_steps": 6120, "total_steps": 8260, "loss": 0.0945, "lr": 9.553576129595101e-06, "epoch": 7.4092009685230025, "percentage": 74.09, "elapsed_time": "0:22:39", "remaining_time": "0:07:55", "throughput": 1846.57, "total_tokens": 2510160} |
| {"current_steps": 6125, "total_steps": 8260, "loss": 0.0846, "lr": 9.512075053916735e-06, "epoch": 7.415254237288136, "percentage": 74.15, "elapsed_time": "0:22:40", "remaining_time": "0:07:54", "throughput": 1846.76, "total_tokens": 2512304} |
| {"current_steps": 6130, "total_steps": 8260, "loss": 0.0413, "lr": 9.470643127526627e-06, "epoch": 7.421307506053269, "percentage": 74.21, "elapsed_time": "0:22:41", "remaining_time": "0:07:53", "throughput": 1847.01, "total_tokens": 2514544} |
| {"current_steps": 6135, "total_steps": 8260, "loss": 0.0067, "lr": 9.429280535406834e-06, "epoch": 7.427360774818402, "percentage": 74.27, "elapsed_time": "0:22:42", "remaining_time": "0:07:51", "throughput": 1847.18, "total_tokens": 2516592} |
| {"current_steps": 6140, "total_steps": 8260, "loss": 0.0918, "lr": 9.387987462229859e-06, "epoch": 7.433414043583535, "percentage": 74.33, "elapsed_time": "0:22:43", "remaining_time": "0:07:50", "throughput": 1847.25, "total_tokens": 2518512} |
| {"current_steps": 6145, "total_steps": 8260, "loss": 0.0878, "lr": 9.346764092357801e-06, "epoch": 7.439467312348668, "percentage": 74.39, "elapsed_time": "0:22:44", "remaining_time": "0:07:49", "throughput": 1847.44, "total_tokens": 2520592} |
| {"current_steps": 6150, "total_steps": 8260, "loss": 0.0138, "lr": 9.305610609841598e-06, "epoch": 7.445520581113802, "percentage": 74.46, "elapsed_time": "0:22:45", "remaining_time": "0:07:48", "throughput": 1847.57, "total_tokens": 2522640} |
| {"current_steps": 6155, "total_steps": 8260, "loss": 0.1144, "lr": 9.264527198420117e-06, "epoch": 7.451573849878935, "percentage": 74.52, "elapsed_time": "0:22:46", "remaining_time": "0:07:47", "throughput": 1847.7, "total_tokens": 2524592} |
| {"current_steps": 6160, "total_steps": 8260, "loss": 0.1312, "lr": 9.2235140415194e-06, "epoch": 7.4576271186440675, "percentage": 74.58, "elapsed_time": "0:22:47", "remaining_time": "0:07:46", "throughput": 1847.86, "total_tokens": 2526576} |
| {"current_steps": 6165, "total_steps": 8260, "loss": 0.0089, "lr": 9.182571322251796e-06, "epoch": 7.463680387409201, "percentage": 74.64, "elapsed_time": "0:22:48", "remaining_time": "0:07:44", "throughput": 1848.01, "total_tokens": 2528656} |
| {"current_steps": 6170, "total_steps": 8260, "loss": 0.0862, "lr": 9.141699223415221e-06, "epoch": 7.469733656174334, "percentage": 74.7, "elapsed_time": "0:22:49", "remaining_time": "0:07:43", "throughput": 1848.24, "total_tokens": 2530864} |
| {"current_steps": 6175, "total_steps": 8260, "loss": 0.046, "lr": 9.10089792749223e-06, "epoch": 7.4757869249394675, "percentage": 74.76, "elapsed_time": "0:22:50", "remaining_time": "0:07:42", "throughput": 1848.38, "total_tokens": 2532880} |
| {"current_steps": 6180, "total_steps": 8260, "loss": 0.1266, "lr": 9.06016761664929e-06, "epoch": 7.481840193704601, "percentage": 74.82, "elapsed_time": "0:22:51", "remaining_time": "0:07:41", "throughput": 1848.51, "total_tokens": 2534864} |
| {"current_steps": 6185, "total_steps": 8260, "loss": 0.0391, "lr": 9.019508472735958e-06, "epoch": 7.487893462469733, "percentage": 74.88, "elapsed_time": "0:22:52", "remaining_time": "0:07:40", "throughput": 1848.63, "total_tokens": 2536912} |
| {"current_steps": 6190, "total_steps": 8260, "loss": 0.0126, "lr": 8.978920677284022e-06, "epoch": 7.493946731234867, "percentage": 74.94, "elapsed_time": "0:22:53", "remaining_time": "0:07:39", "throughput": 1848.78, "total_tokens": 2538832} |
| {"current_steps": 6195, "total_steps": 8260, "loss": 0.1697, "lr": 8.938404411506732e-06, "epoch": 7.5, "percentage": 75.0, "elapsed_time": "0:22:54", "remaining_time": "0:07:38", "throughput": 1848.91, "total_tokens": 2540880} |
| {"current_steps": 6195, "total_steps": 8260, "eval_loss": 0.13244937360286713, "epoch": 7.5, "percentage": 75.0, "elapsed_time": "0:23:02", "remaining_time": "0:07:40", "throughput": 1838.06, "total_tokens": 2540880} |
| {"current_steps": 6200, "total_steps": 8260, "loss": 0.028, "lr": 8.897959856297971e-06, "epoch": 7.506053268765133, "percentage": 75.06, "elapsed_time": "0:23:04", "remaining_time": "0:07:40", "throughput": 1836.27, "total_tokens": 2542832} |
| {"current_steps": 6205, "total_steps": 8260, "loss": 0.0152, "lr": 8.857587192231452e-06, "epoch": 7.512106537530267, "percentage": 75.12, "elapsed_time": "0:23:05", "remaining_time": "0:07:38", "throughput": 1836.36, "total_tokens": 2544784} |
| {"current_steps": 6210, "total_steps": 8260, "loss": 0.0303, "lr": 8.817286599559932e-06, "epoch": 7.518159806295399, "percentage": 75.18, "elapsed_time": "0:23:06", "remaining_time": "0:07:37", "throughput": 1836.51, "total_tokens": 2546832} |
| {"current_steps": 6215, "total_steps": 8260, "loss": 0.0442, "lr": 8.777058258214377e-06, "epoch": 7.5242130750605325, "percentage": 75.24, "elapsed_time": "0:23:07", "remaining_time": "0:07:36", "throughput": 1836.71, "total_tokens": 2549008} |
| {"current_steps": 6220, "total_steps": 8260, "loss": 0.0592, "lr": 8.736902347803163e-06, "epoch": 7.530266343825666, "percentage": 75.3, "elapsed_time": "0:23:08", "remaining_time": "0:07:35", "throughput": 1836.89, "total_tokens": 2551120} |
| {"current_steps": 6225, "total_steps": 8260, "loss": 0.0884, "lr": 8.696819047611288e-06, "epoch": 7.536319612590799, "percentage": 75.36, "elapsed_time": "0:23:09", "remaining_time": "0:07:34", "throughput": 1837.05, "total_tokens": 2553168} |
| {"current_steps": 6230, "total_steps": 8260, "loss": 0.0785, "lr": 8.65680853659958e-06, "epoch": 7.5423728813559325, "percentage": 75.42, "elapsed_time": "0:23:10", "remaining_time": "0:07:33", "throughput": 1837.19, "total_tokens": 2555184} |
| {"current_steps": 6235, "total_steps": 8260, "loss": 0.0134, "lr": 8.616870993403864e-06, "epoch": 7.548426150121065, "percentage": 75.48, "elapsed_time": "0:23:11", "remaining_time": "0:07:32", "throughput": 1837.38, "total_tokens": 2557264} |
| {"current_steps": 6240, "total_steps": 8260, "loss": 0.0384, "lr": 8.577006596334191e-06, "epoch": 7.554479418886198, "percentage": 75.54, "elapsed_time": "0:23:12", "remaining_time": "0:07:30", "throughput": 1837.51, "total_tokens": 2559248} |
| {"current_steps": 6245, "total_steps": 8260, "loss": 0.1115, "lr": 8.537215523374038e-06, "epoch": 7.560532687651332, "percentage": 75.61, "elapsed_time": "0:23:13", "remaining_time": "0:07:29", "throughput": 1837.63, "total_tokens": 2561264} |
| {"current_steps": 6250, "total_steps": 8260, "loss": 0.1003, "lr": 8.4974979521795e-06, "epoch": 7.566585956416465, "percentage": 75.67, "elapsed_time": "0:23:14", "remaining_time": "0:07:28", "throughput": 1837.71, "total_tokens": 2563120} |
| {"current_steps": 6255, "total_steps": 8260, "loss": 0.2838, "lr": 8.45785406007852e-06, "epoch": 7.572639225181598, "percentage": 75.73, "elapsed_time": "0:23:15", "remaining_time": "0:07:27", "throughput": 1837.81, "total_tokens": 2565104} |
| {"current_steps": 6260, "total_steps": 8260, "loss": 0.1059, "lr": 8.418284024070069e-06, "epoch": 7.578692493946731, "percentage": 75.79, "elapsed_time": "0:23:16", "remaining_time": "0:07:26", "throughput": 1837.99, "total_tokens": 2567216} |
| {"current_steps": 6265, "total_steps": 8260, "loss": 0.0834, "lr": 8.378788020823394e-06, "epoch": 7.584745762711864, "percentage": 75.85, "elapsed_time": "0:23:17", "remaining_time": "0:07:25", "throughput": 1838.18, "total_tokens": 2569360} |
| {"current_steps": 6270, "total_steps": 8260, "loss": 0.0263, "lr": 8.33936622667719e-06, "epoch": 7.5907990314769975, "percentage": 75.91, "elapsed_time": "0:23:18", "remaining_time": "0:07:23", "throughput": 1838.4, "total_tokens": 2571536} |
| {"current_steps": 6275, "total_steps": 8260, "loss": 0.2096, "lr": 8.300018817638825e-06, "epoch": 7.596852300242131, "percentage": 75.97, "elapsed_time": "0:23:19", "remaining_time": "0:07:22", "throughput": 1838.57, "total_tokens": 2573648} |
| {"current_steps": 6280, "total_steps": 8260, "loss": 0.006, "lr": 8.260745969383565e-06, "epoch": 7.602905569007264, "percentage": 76.03, "elapsed_time": "0:23:20", "remaining_time": "0:07:21", "throughput": 1838.82, "total_tokens": 2575888} |
| {"current_steps": 6285, "total_steps": 8260, "loss": 0.0325, "lr": 8.221547857253781e-06, "epoch": 7.608958837772397, "percentage": 76.09, "elapsed_time": "0:23:21", "remaining_time": "0:07:20", "throughput": 1838.99, "total_tokens": 2578032} |
| {"current_steps": 6290, "total_steps": 8260, "loss": 0.0846, "lr": 8.182424656258178e-06, "epoch": 7.61501210653753, "percentage": 76.15, "elapsed_time": "0:23:22", "remaining_time": "0:07:19", "throughput": 1839.15, "total_tokens": 2580016} |
| {"current_steps": 6295, "total_steps": 8260, "loss": 0.1547, "lr": 8.143376541070993e-06, "epoch": 7.621065375302663, "percentage": 76.21, "elapsed_time": "0:23:23", "remaining_time": "0:07:18", "throughput": 1839.35, "total_tokens": 2582192} |
| {"current_steps": 6300, "total_steps": 8260, "loss": 0.1513, "lr": 8.104403686031225e-06, "epoch": 7.627118644067797, "percentage": 76.27, "elapsed_time": "0:23:24", "remaining_time": "0:07:17", "throughput": 1839.46, "total_tokens": 2584208} |
| {"current_steps": 6305, "total_steps": 8260, "loss": 0.0576, "lr": 8.06550626514185e-06, "epoch": 7.63317191283293, "percentage": 76.33, "elapsed_time": "0:23:25", "remaining_time": "0:07:15", "throughput": 1839.59, "total_tokens": 2586160} |
| {"current_steps": 6310, "total_steps": 8260, "loss": 0.0938, "lr": 8.026684452069084e-06, "epoch": 7.6392251815980625, "percentage": 76.39, "elapsed_time": "0:23:26", "remaining_time": "0:07:14", "throughput": 1839.74, "total_tokens": 2588240} |
| {"current_steps": 6315, "total_steps": 8260, "loss": 0.0614, "lr": 7.987938420141536e-06, "epoch": 7.645278450363196, "percentage": 76.45, "elapsed_time": "0:23:27", "remaining_time": "0:07:13", "throughput": 1839.97, "total_tokens": 2590480} |
| {"current_steps": 6320, "total_steps": 8260, "loss": 0.0541, "lr": 7.949268342349495e-06, "epoch": 7.651331719128329, "percentage": 76.51, "elapsed_time": "0:23:28", "remaining_time": "0:07:12", "throughput": 1840.06, "total_tokens": 2592400} |
| {"current_steps": 6325, "total_steps": 8260, "loss": 0.0565, "lr": 7.910674391344129e-06, "epoch": 7.657384987893463, "percentage": 76.57, "elapsed_time": "0:23:29", "remaining_time": "0:07:11", "throughput": 1840.15, "total_tokens": 2594352} |
| {"current_steps": 6330, "total_steps": 8260, "loss": 0.0388, "lr": 7.872156739436722e-06, "epoch": 7.663438256658596, "percentage": 76.63, "elapsed_time": "0:23:30", "remaining_time": "0:07:10", "throughput": 1840.33, "total_tokens": 2596464} |
| {"current_steps": 6335, "total_steps": 8260, "loss": 0.0585, "lr": 7.833715558597907e-06, "epoch": 7.669491525423728, "percentage": 76.69, "elapsed_time": "0:23:31", "remaining_time": "0:07:09", "throughput": 1840.5, "total_tokens": 2598544} |
| {"current_steps": 6340, "total_steps": 8260, "loss": 0.0896, "lr": 7.795351020456887e-06, "epoch": 7.675544794188862, "percentage": 76.76, "elapsed_time": "0:23:32", "remaining_time": "0:07:07", "throughput": 1840.7, "total_tokens": 2600656} |
| {"current_steps": 6345, "total_steps": 8260, "loss": 0.0446, "lr": 7.757063296300681e-06, "epoch": 7.681598062953995, "percentage": 76.82, "elapsed_time": "0:23:33", "remaining_time": "0:07:06", "throughput": 1840.91, "total_tokens": 2602832} |
| {"current_steps": 6350, "total_steps": 8260, "loss": 0.1752, "lr": 7.718852557073366e-06, "epoch": 7.687651331719128, "percentage": 76.88, "elapsed_time": "0:23:34", "remaining_time": "0:07:05", "throughput": 1841.12, "total_tokens": 2605008} |
| {"current_steps": 6355, "total_steps": 8260, "loss": 0.1387, "lr": 7.680718973375287e-06, "epoch": 7.693704600484262, "percentage": 76.94, "elapsed_time": "0:23:35", "remaining_time": "0:07:04", "throughput": 1841.31, "total_tokens": 2607152} |
| {"current_steps": 6360, "total_steps": 8260, "loss": 0.0676, "lr": 7.642662715462315e-06, "epoch": 7.699757869249394, "percentage": 77.0, "elapsed_time": "0:23:36", "remaining_time": "0:07:03", "throughput": 1841.49, "total_tokens": 2609264} |
| {"current_steps": 6365, "total_steps": 8260, "loss": 0.0044, "lr": 7.604683953245076e-06, "epoch": 7.7058111380145276, "percentage": 77.06, "elapsed_time": "0:23:37", "remaining_time": "0:07:02", "throughput": 1841.64, "total_tokens": 2611344} |
| {"current_steps": 6370, "total_steps": 8260, "loss": 0.0499, "lr": 7.566782856288224e-06, "epoch": 7.711864406779661, "percentage": 77.12, "elapsed_time": "0:23:38", "remaining_time": "0:07:01", "throughput": 1841.83, "total_tokens": 2613488} |
| {"current_steps": 6375, "total_steps": 8260, "loss": 0.0998, "lr": 7.5289595938096344e-06, "epoch": 7.717917675544794, "percentage": 77.18, "elapsed_time": "0:23:39", "remaining_time": "0:06:59", "throughput": 1841.91, "total_tokens": 2615408} |
| {"current_steps": 6380, "total_steps": 8260, "loss": 0.0694, "lr": 7.4912143346796805e-06, "epoch": 7.723970944309928, "percentage": 77.24, "elapsed_time": "0:23:40", "remaining_time": "0:06:58", "throughput": 1842.05, "total_tokens": 2617424} |
| {"current_steps": 6385, "total_steps": 8260, "loss": 0.0656, "lr": 7.4535472474204645e-06, "epoch": 7.73002421307506, "percentage": 77.3, "elapsed_time": "0:23:41", "remaining_time": "0:06:57", "throughput": 1842.18, "total_tokens": 2619312} |
| {"current_steps": 6390, "total_steps": 8260, "loss": 0.093, "lr": 7.415958500205103e-06, "epoch": 7.736077481840193, "percentage": 77.36, "elapsed_time": "0:23:42", "remaining_time": "0:06:56", "throughput": 1842.22, "total_tokens": 2621168} |
| {"current_steps": 6395, "total_steps": 8260, "loss": 0.0464, "lr": 7.37844826085691e-06, "epoch": 7.742130750605327, "percentage": 77.42, "elapsed_time": "0:23:43", "remaining_time": "0:06:55", "throughput": 1842.33, "total_tokens": 2623152} |
| {"current_steps": 6400, "total_steps": 8260, "loss": 0.0582, "lr": 7.341016696848699e-06, "epoch": 7.74818401937046, "percentage": 77.48, "elapsed_time": "0:23:44", "remaining_time": "0:06:54", "throughput": 1842.54, "total_tokens": 2625328} |
| {"current_steps": 6405, "total_steps": 8260, "loss": 0.0031, "lr": 7.303663975302022e-06, "epoch": 7.754237288135593, "percentage": 77.54, "elapsed_time": "0:23:45", "remaining_time": "0:06:52", "throughput": 1842.77, "total_tokens": 2627536} |
| {"current_steps": 6410, "total_steps": 8260, "loss": 0.0176, "lr": 7.2663902629864165e-06, "epoch": 7.760290556900727, "percentage": 77.6, "elapsed_time": "0:23:46", "remaining_time": "0:06:51", "throughput": 1842.92, "total_tokens": 2629616} |
| {"current_steps": 6415, "total_steps": 8260, "loss": 0.0658, "lr": 7.229195726318669e-06, "epoch": 7.766343825665859, "percentage": 77.66, "elapsed_time": "0:23:47", "remaining_time": "0:06:50", "throughput": 1843.06, "total_tokens": 2631696} |
| {"current_steps": 6420, "total_steps": 8260, "loss": 0.0135, "lr": 7.192080531362067e-06, "epoch": 7.772397094430993, "percentage": 77.72, "elapsed_time": "0:23:48", "remaining_time": "0:06:49", "throughput": 1843.21, "total_tokens": 2633776} |
| {"current_steps": 6425, "total_steps": 8260, "loss": 0.0102, "lr": 7.155044843825651e-06, "epoch": 7.778450363196126, "percentage": 77.78, "elapsed_time": "0:23:49", "remaining_time": "0:06:48", "throughput": 1843.42, "total_tokens": 2635920} |
| {"current_steps": 6430, "total_steps": 8260, "loss": 0.089, "lr": 7.118088829063504e-06, "epoch": 7.784503631961259, "percentage": 77.85, "elapsed_time": "0:23:50", "remaining_time": "0:06:47", "throughput": 1843.56, "total_tokens": 2637936} |
| {"current_steps": 6435, "total_steps": 8260, "loss": 0.0194, "lr": 7.081212652073979e-06, "epoch": 7.790556900726393, "percentage": 77.91, "elapsed_time": "0:23:51", "remaining_time": "0:06:46", "throughput": 1843.68, "total_tokens": 2639984} |
| {"current_steps": 6440, "total_steps": 8260, "loss": 0.0472, "lr": 7.044416477498972e-06, "epoch": 7.796610169491525, "percentage": 77.97, "elapsed_time": "0:23:52", "remaining_time": "0:06:44", "throughput": 1843.82, "total_tokens": 2642000} |
| {"current_steps": 6445, "total_steps": 8260, "loss": 0.0072, "lr": 7.007700469623185e-06, "epoch": 7.802663438256658, "percentage": 78.03, "elapsed_time": "0:23:53", "remaining_time": "0:06:43", "throughput": 1843.96, "total_tokens": 2644016} |
| {"current_steps": 6450, "total_steps": 8260, "loss": 0.1703, "lr": 6.971064792373427e-06, "epoch": 7.808716707021792, "percentage": 78.09, "elapsed_time": "0:23:54", "remaining_time": "0:06:42", "throughput": 1844.08, "total_tokens": 2646000} |
| {"current_steps": 6455, "total_steps": 8260, "loss": 0.1406, "lr": 6.934509609317821e-06, "epoch": 7.814769975786925, "percentage": 78.15, "elapsed_time": "0:23:55", "remaining_time": "0:06:41", "throughput": 1844.2, "total_tokens": 2648048} |
| {"current_steps": 6460, "total_steps": 8260, "loss": 0.0149, "lr": 6.898035083665124e-06, "epoch": 7.8208232445520585, "percentage": 78.21, "elapsed_time": "0:23:56", "remaining_time": "0:06:40", "throughput": 1844.37, "total_tokens": 2650064} |
| {"current_steps": 6465, "total_steps": 8260, "loss": 0.0545, "lr": 6.861641378263964e-06, "epoch": 7.826876513317191, "percentage": 78.27, "elapsed_time": "0:23:57", "remaining_time": "0:06:39", "throughput": 1844.47, "total_tokens": 2652016} |
| {"current_steps": 6470, "total_steps": 8260, "loss": 0.0628, "lr": 6.825328655602153e-06, "epoch": 7.832929782082324, "percentage": 78.33, "elapsed_time": "0:23:58", "remaining_time": "0:06:38", "throughput": 1844.66, "total_tokens": 2654192} |
| {"current_steps": 6475, "total_steps": 8260, "loss": 0.0621, "lr": 6.789097077805917e-06, "epoch": 7.838983050847458, "percentage": 78.39, "elapsed_time": "0:23:59", "remaining_time": "0:06:36", "throughput": 1844.79, "total_tokens": 2656208} |
| {"current_steps": 6480, "total_steps": 8260, "loss": 0.0697, "lr": 6.7529468066392015e-06, "epoch": 7.845036319612591, "percentage": 78.45, "elapsed_time": "0:24:00", "remaining_time": "0:06:35", "throughput": 1844.99, "total_tokens": 2658384} |
| {"current_steps": 6485, "total_steps": 8260, "loss": 0.0262, "lr": 6.7168780035029385e-06, "epoch": 7.851089588377724, "percentage": 78.51, "elapsed_time": "0:24:01", "remaining_time": "0:06:34", "throughput": 1845.15, "total_tokens": 2660528} |
| {"current_steps": 6490, "total_steps": 8260, "loss": 0.0834, "lr": 6.680890829434325e-06, "epoch": 7.857142857142857, "percentage": 78.57, "elapsed_time": "0:24:02", "remaining_time": "0:06:33", "throughput": 1845.32, "total_tokens": 2662640} |
| {"current_steps": 6495, "total_steps": 8260, "loss": 0.0834, "lr": 6.644985445106114e-06, "epoch": 7.86319612590799, "percentage": 78.63, "elapsed_time": "0:24:03", "remaining_time": "0:06:32", "throughput": 1845.41, "total_tokens": 2664624} |
| {"current_steps": 6500, "total_steps": 8260, "loss": 0.0539, "lr": 6.609162010825881e-06, "epoch": 7.8692493946731235, "percentage": 78.69, "elapsed_time": "0:24:04", "remaining_time": "0:06:31", "throughput": 1845.55, "total_tokens": 2666640} |
| {"current_steps": 6505, "total_steps": 8260, "loss": 0.0486, "lr": 6.573420686535317e-06, "epoch": 7.875302663438257, "percentage": 78.75, "elapsed_time": "0:24:05", "remaining_time": "0:06:30", "throughput": 1845.67, "total_tokens": 2668592} |
| {"current_steps": 6510, "total_steps": 8260, "loss": 0.0981, "lr": 6.537761631809533e-06, "epoch": 7.88135593220339, "percentage": 78.81, "elapsed_time": "0:24:06", "remaining_time": "0:06:28", "throughput": 1845.82, "total_tokens": 2670704} |
| {"current_steps": 6515, "total_steps": 8260, "loss": 0.017, "lr": 6.502185005856312e-06, "epoch": 7.8874092009685235, "percentage": 78.87, "elapsed_time": "0:24:07", "remaining_time": "0:06:27", "throughput": 1845.96, "total_tokens": 2672752} |
| {"current_steps": 6520, "total_steps": 8260, "loss": 0.0572, "lr": 6.4666909675154155e-06, "epoch": 7.893462469733656, "percentage": 78.93, "elapsed_time": "0:24:08", "remaining_time": "0:06:26", "throughput": 1846.11, "total_tokens": 2674864} |
| {"current_steps": 6525, "total_steps": 8260, "loss": 0.08, "lr": 6.431279675257873e-06, "epoch": 7.899515738498789, "percentage": 79.0, "elapsed_time": "0:24:09", "remaining_time": "0:06:25", "throughput": 1846.23, "total_tokens": 2676944} |
| {"current_steps": 6530, "total_steps": 8260, "loss": 0.0213, "lr": 6.395951287185295e-06, "epoch": 7.905569007263923, "percentage": 79.06, "elapsed_time": "0:24:10", "remaining_time": "0:06:24", "throughput": 1846.37, "total_tokens": 2679024} |
| {"current_steps": 6535, "total_steps": 8260, "loss": 0.0863, "lr": 6.360705961029126e-06, "epoch": 7.911622276029056, "percentage": 79.12, "elapsed_time": "0:24:11", "remaining_time": "0:06:23", "throughput": 1846.48, "total_tokens": 2680976} |
| {"current_steps": 6540, "total_steps": 8260, "loss": 0.0114, "lr": 6.325543854149968e-06, "epoch": 7.917675544794189, "percentage": 79.18, "elapsed_time": "0:24:12", "remaining_time": "0:06:22", "throughput": 1846.64, "total_tokens": 2683088} |
| {"current_steps": 6545, "total_steps": 8260, "loss": 0.0604, "lr": 6.290465123536876e-06, "epoch": 7.923728813559322, "percentage": 79.24, "elapsed_time": "0:24:13", "remaining_time": "0:06:20", "throughput": 1846.71, "total_tokens": 2685072} |
| {"current_steps": 6550, "total_steps": 8260, "loss": 0.0291, "lr": 6.255469925806643e-06, "epoch": 7.929782082324455, "percentage": 79.3, "elapsed_time": "0:24:14", "remaining_time": "0:06:19", "throughput": 1846.87, "total_tokens": 2687184} |
| {"current_steps": 6555, "total_steps": 8260, "loss": 0.0695, "lr": 6.220558417203132e-06, "epoch": 7.9358353510895885, "percentage": 79.36, "elapsed_time": "0:24:16", "remaining_time": "0:06:18", "throughput": 1846.98, "total_tokens": 2689232} |
| {"current_steps": 6560, "total_steps": 8260, "loss": 0.0082, "lr": 6.185730753596539e-06, "epoch": 7.941888619854722, "percentage": 79.42, "elapsed_time": "0:24:17", "remaining_time": "0:06:17", "throughput": 1847.1, "total_tokens": 2691280} |
| {"current_steps": 6565, "total_steps": 8260, "loss": 0.0101, "lr": 6.150987090482715e-06, "epoch": 7.947941888619855, "percentage": 79.48, "elapsed_time": "0:24:18", "remaining_time": "0:06:16", "throughput": 1847.22, "total_tokens": 2693328} |
| {"current_steps": 6570, "total_steps": 8260, "loss": 0.1343, "lr": 6.116327582982484e-06, "epoch": 7.953995157384988, "percentage": 79.54, "elapsed_time": "0:24:19", "remaining_time": "0:06:15", "throughput": 1847.36, "total_tokens": 2695440} |
| {"current_steps": 6575, "total_steps": 8260, "loss": 0.0098, "lr": 6.0817523858409245e-06, "epoch": 7.960048426150121, "percentage": 79.6, "elapsed_time": "0:24:20", "remaining_time": "0:06:14", "throughput": 1847.51, "total_tokens": 2697488} |
| {"current_steps": 6580, "total_steps": 8260, "loss": 0.0852, "lr": 6.047261653426708e-06, "epoch": 7.966101694915254, "percentage": 79.66, "elapsed_time": "0:24:21", "remaining_time": "0:06:13", "throughput": 1847.63, "total_tokens": 2699504} |
| {"current_steps": 6585, "total_steps": 8260, "loss": 0.0189, "lr": 6.012855539731374e-06, "epoch": 7.972154963680388, "percentage": 79.72, "elapsed_time": "0:24:22", "remaining_time": "0:06:11", "throughput": 1847.76, "total_tokens": 2701456} |
| {"current_steps": 6590, "total_steps": 8260, "loss": 0.1155, "lr": 5.978534198368691e-06, "epoch": 7.978208232445521, "percentage": 79.78, "elapsed_time": "0:24:23", "remaining_time": "0:06:10", "throughput": 1847.88, "total_tokens": 2703504} |
| {"current_steps": 6595, "total_steps": 8260, "loss": 0.0131, "lr": 5.944297782573918e-06, "epoch": 7.9842615012106535, "percentage": 79.84, "elapsed_time": "0:24:24", "remaining_time": "0:06:09", "throughput": 1848.04, "total_tokens": 2705616} |
| {"current_steps": 6600, "total_steps": 8260, "loss": 0.0716, "lr": 5.910146445203154e-06, "epoch": 7.990314769975787, "percentage": 79.9, "elapsed_time": "0:24:25", "remaining_time": "0:06:08", "throughput": 1848.2, "total_tokens": 2707728} |
| {"current_steps": 6605, "total_steps": 8260, "loss": 0.11, "lr": 5.876080338732643e-06, "epoch": 7.99636803874092, "percentage": 79.96, "elapsed_time": "0:24:26", "remaining_time": "0:06:07", "throughput": 1848.32, "total_tokens": 2709776} |
| {"current_steps": 6608, "total_steps": 8260, "eval_loss": 0.131322979927063, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:24:34", "remaining_time": "0:06:08", "throughput": 1838.07, "total_tokens": 2710624} |
| {"current_steps": 6610, "total_steps": 8260, "loss": 0.0102, "lr": 5.842099615258109e-06, "epoch": 8.002421307506053, "percentage": 80.02, "elapsed_time": "0:24:36", "remaining_time": "0:06:08", "throughput": 1836.16, "total_tokens": 2711456} |
| {"current_steps": 6615, "total_steps": 8260, "loss": 0.1227, "lr": 5.808204426494054e-06, "epoch": 8.008474576271187, "percentage": 80.08, "elapsed_time": "0:24:37", "remaining_time": "0:06:07", "throughput": 1836.29, "total_tokens": 2713536} |
| {"current_steps": 6620, "total_steps": 8260, "loss": 0.0279, "lr": 5.774394923773088e-06, "epoch": 8.01452784503632, "percentage": 80.15, "elapsed_time": "0:24:38", "remaining_time": "0:06:06", "throughput": 1836.47, "total_tokens": 2715680} |
| {"current_steps": 6625, "total_steps": 8260, "loss": 0.0674, "lr": 5.74067125804526e-06, "epoch": 8.020581113801454, "percentage": 80.21, "elapsed_time": "0:24:39", "remaining_time": "0:06:05", "throughput": 1836.59, "total_tokens": 2717728} |
| {"current_steps": 6630, "total_steps": 8260, "loss": 0.0044, "lr": 5.70703357987738e-06, "epoch": 8.026634382566586, "percentage": 80.27, "elapsed_time": "0:24:40", "remaining_time": "0:06:04", "throughput": 1836.7, "total_tokens": 2719712} |
| {"current_steps": 6635, "total_steps": 8260, "loss": 0.009, "lr": 5.673482039452363e-06, "epoch": 8.032687651331718, "percentage": 80.33, "elapsed_time": "0:24:41", "remaining_time": "0:06:02", "throughput": 1836.88, "total_tokens": 2721856} |
| {"current_steps": 6640, "total_steps": 8260, "loss": 0.0657, "lr": 5.640016786568525e-06, "epoch": 8.038740920096853, "percentage": 80.39, "elapsed_time": "0:24:42", "remaining_time": "0:06:01", "throughput": 1837.08, "total_tokens": 2724000} |
| {"current_steps": 6645, "total_steps": 8260, "loss": 0.0715, "lr": 5.606637970638917e-06, "epoch": 8.044794188861985, "percentage": 80.45, "elapsed_time": "0:24:43", "remaining_time": "0:06:00", "throughput": 1837.15, "total_tokens": 2725792} |
| {"current_steps": 6650, "total_steps": 8260, "loss": 0.0472, "lr": 5.573345740690714e-06, "epoch": 8.05084745762712, "percentage": 80.51, "elapsed_time": "0:24:44", "remaining_time": "0:05:59", "throughput": 1837.25, "total_tokens": 2727744} |
| {"current_steps": 6655, "total_steps": 8260, "loss": 0.0493, "lr": 5.540140245364478e-06, "epoch": 8.056900726392252, "percentage": 80.57, "elapsed_time": "0:24:45", "remaining_time": "0:05:58", "throughput": 1837.42, "total_tokens": 2729856} |
| {"current_steps": 6660, "total_steps": 8260, "loss": 0.0028, "lr": 5.5070216329135365e-06, "epoch": 8.062953995157384, "percentage": 80.63, "elapsed_time": "0:24:46", "remaining_time": "0:05:57", "throughput": 1837.52, "total_tokens": 2731872} |
| {"current_steps": 6665, "total_steps": 8260, "loss": 0.0678, "lr": 5.473990051203298e-06, "epoch": 8.069007263922519, "percentage": 80.69, "elapsed_time": "0:24:47", "remaining_time": "0:05:56", "throughput": 1837.69, "total_tokens": 2734016} |
| {"current_steps": 6670, "total_steps": 8260, "loss": 0.0626, "lr": 5.441045647710627e-06, "epoch": 8.075060532687651, "percentage": 80.75, "elapsed_time": "0:24:48", "remaining_time": "0:05:54", "throughput": 1837.82, "total_tokens": 2736032} |
| {"current_steps": 6675, "total_steps": 8260, "loss": 0.1259, "lr": 5.408188569523137e-06, "epoch": 8.081113801452785, "percentage": 80.81, "elapsed_time": "0:24:49", "remaining_time": "0:05:53", "throughput": 1837.9, "total_tokens": 2737952} |
| {"current_steps": 6680, "total_steps": 8260, "loss": 0.0138, "lr": 5.375418963338566e-06, "epoch": 8.087167070217918, "percentage": 80.87, "elapsed_time": "0:24:50", "remaining_time": "0:05:52", "throughput": 1838.02, "total_tokens": 2739968} |
| {"current_steps": 6685, "total_steps": 8260, "loss": 0.0289, "lr": 5.342736975464116e-06, "epoch": 8.09322033898305, "percentage": 80.93, "elapsed_time": "0:24:51", "remaining_time": "0:05:51", "throughput": 1838.15, "total_tokens": 2742016} |
| {"current_steps": 6690, "total_steps": 8260, "loss": 0.1123, "lr": 5.310142751815792e-06, "epoch": 8.099273607748184, "percentage": 80.99, "elapsed_time": "0:24:52", "remaining_time": "0:05:50", "throughput": 1838.31, "total_tokens": 2744128} |
| {"current_steps": 6695, "total_steps": 8260, "loss": 0.0157, "lr": 5.277636437917769e-06, "epoch": 8.105326876513317, "percentage": 81.05, "elapsed_time": "0:24:53", "remaining_time": "0:05:49", "throughput": 1838.4, "total_tokens": 2746112} |
| {"current_steps": 6700, "total_steps": 8260, "loss": 0.0172, "lr": 5.245218178901717e-06, "epoch": 8.111380145278451, "percentage": 81.11, "elapsed_time": "0:24:54", "remaining_time": "0:05:48", "throughput": 1838.53, "total_tokens": 2748128} |
| {"current_steps": 6705, "total_steps": 8260, "loss": 0.0246, "lr": 5.212888119506168e-06, "epoch": 8.117433414043584, "percentage": 81.17, "elapsed_time": "0:24:55", "remaining_time": "0:05:46", "throughput": 1838.69, "total_tokens": 2750176} |
| {"current_steps": 6710, "total_steps": 8260, "loss": 0.1293, "lr": 5.180646404075862e-06, "epoch": 8.123486682808716, "percentage": 81.23, "elapsed_time": "0:24:56", "remaining_time": "0:05:45", "throughput": 1838.83, "total_tokens": 2752256} |
| {"current_steps": 6715, "total_steps": 8260, "loss": 0.0201, "lr": 5.1484931765611286e-06, "epoch": 8.12953995157385, "percentage": 81.3, "elapsed_time": "0:24:57", "remaining_time": "0:05:44", "throughput": 1838.93, "total_tokens": 2754208} |
| {"current_steps": 6720, "total_steps": 8260, "loss": 0.0037, "lr": 5.116428580517207e-06, "epoch": 8.135593220338983, "percentage": 81.36, "elapsed_time": "0:24:58", "remaining_time": "0:05:43", "throughput": 1839.15, "total_tokens": 2756352} |
| {"current_steps": 6725, "total_steps": 8260, "loss": 0.0579, "lr": 5.084452759103603e-06, "epoch": 8.141646489104117, "percentage": 81.42, "elapsed_time": "0:24:59", "remaining_time": "0:05:42", "throughput": 1839.35, "total_tokens": 2758528} |
| {"current_steps": 6730, "total_steps": 8260, "loss": 0.0065, "lr": 5.052565855083511e-06, "epoch": 8.14769975786925, "percentage": 81.48, "elapsed_time": "0:25:00", "remaining_time": "0:05:41", "throughput": 1839.52, "total_tokens": 2760608} |
| {"current_steps": 6735, "total_steps": 8260, "loss": 0.0573, "lr": 5.020768010823102e-06, "epoch": 8.153753026634382, "percentage": 81.54, "elapsed_time": "0:25:01", "remaining_time": "0:05:40", "throughput": 1839.67, "total_tokens": 2762656} |
| {"current_steps": 6740, "total_steps": 8260, "loss": 0.2164, "lr": 4.98905936829093e-06, "epoch": 8.159806295399516, "percentage": 81.6, "elapsed_time": "0:25:02", "remaining_time": "0:05:38", "throughput": 1839.77, "total_tokens": 2764672} |
| {"current_steps": 6745, "total_steps": 8260, "loss": 0.0368, "lr": 4.957440069057281e-06, "epoch": 8.165859564164649, "percentage": 81.66, "elapsed_time": "0:25:03", "remaining_time": "0:05:37", "throughput": 1839.91, "total_tokens": 2766752} |
| {"current_steps": 6750, "total_steps": 8260, "loss": 0.1087, "lr": 4.92591025429357e-06, "epoch": 8.171912832929783, "percentage": 81.72, "elapsed_time": "0:25:04", "remaining_time": "0:05:36", "throughput": 1840.05, "total_tokens": 2768736} |
| {"current_steps": 6755, "total_steps": 8260, "loss": 0.0155, "lr": 4.8944700647716616e-06, "epoch": 8.177966101694915, "percentage": 81.78, "elapsed_time": "0:25:05", "remaining_time": "0:05:35", "throughput": 1840.15, "total_tokens": 2770752} |
| {"current_steps": 6760, "total_steps": 8260, "loss": 0.0298, "lr": 4.863119640863284e-06, "epoch": 8.184019370460048, "percentage": 81.84, "elapsed_time": "0:25:06", "remaining_time": "0:05:34", "throughput": 1840.35, "total_tokens": 2772928} |
| {"current_steps": 6765, "total_steps": 8260, "loss": 0.1074, "lr": 4.831859122539381e-06, "epoch": 8.190072639225182, "percentage": 81.9, "elapsed_time": "0:25:07", "remaining_time": "0:05:33", "throughput": 1840.47, "total_tokens": 2774976} |
| {"current_steps": 6770, "total_steps": 8260, "loss": 0.0032, "lr": 4.800688649369489e-06, "epoch": 8.196125907990314, "percentage": 81.96, "elapsed_time": "0:25:08", "remaining_time": "0:05:32", "throughput": 1840.67, "total_tokens": 2777152} |
| {"current_steps": 6775, "total_steps": 8260, "loss": 0.0547, "lr": 4.769608360521135e-06, "epoch": 8.202179176755449, "percentage": 82.02, "elapsed_time": "0:25:09", "remaining_time": "0:05:30", "throughput": 1840.84, "total_tokens": 2779296} |
| {"current_steps": 6780, "total_steps": 8260, "loss": 0.0269, "lr": 4.7386183947591815e-06, "epoch": 8.208232445520581, "percentage": 82.08, "elapsed_time": "0:25:10", "remaining_time": "0:05:29", "throughput": 1840.98, "total_tokens": 2781376} |
| {"current_steps": 6785, "total_steps": 8260, "loss": 0.1081, "lr": 4.7077188904452255e-06, "epoch": 8.214285714285714, "percentage": 82.14, "elapsed_time": "0:25:11", "remaining_time": "0:05:28", "throughput": 1841.14, "total_tokens": 2783424} |
| {"current_steps": 6790, "total_steps": 8260, "loss": 0.0648, "lr": 4.676909985536981e-06, "epoch": 8.220338983050848, "percentage": 82.2, "elapsed_time": "0:25:12", "remaining_time": "0:05:27", "throughput": 1841.23, "total_tokens": 2785376} |
| {"current_steps": 6795, "total_steps": 8260, "loss": 0.007, "lr": 4.64619181758767e-06, "epoch": 8.22639225181598, "percentage": 82.26, "elapsed_time": "0:25:13", "remaining_time": "0:05:26", "throughput": 1841.39, "total_tokens": 2787456} |
| {"current_steps": 6800, "total_steps": 8260, "loss": 0.0055, "lr": 4.615564523745391e-06, "epoch": 8.232445520581114, "percentage": 82.32, "elapsed_time": "0:25:14", "remaining_time": "0:05:25", "throughput": 1841.56, "total_tokens": 2789568} |
| {"current_steps": 6805, "total_steps": 8260, "loss": 0.0492, "lr": 4.585028240752498e-06, "epoch": 8.238498789346247, "percentage": 82.38, "elapsed_time": "0:25:15", "remaining_time": "0:05:24", "throughput": 1841.69, "total_tokens": 2791584} |
| {"current_steps": 6810, "total_steps": 8260, "loss": 0.0491, "lr": 4.554583104945037e-06, "epoch": 8.24455205811138, "percentage": 82.45, "elapsed_time": "0:25:16", "remaining_time": "0:05:22", "throughput": 1841.8, "total_tokens": 2793632} |
| {"current_steps": 6815, "total_steps": 8260, "loss": 0.1031, "lr": 4.524229252252091e-06, "epoch": 8.250605326876514, "percentage": 82.51, "elapsed_time": "0:25:17", "remaining_time": "0:05:21", "throughput": 1841.95, "total_tokens": 2795744} |
| {"current_steps": 6820, "total_steps": 8260, "loss": 0.0902, "lr": 4.493966818195191e-06, "epoch": 8.256658595641646, "percentage": 82.57, "elapsed_time": "0:25:18", "remaining_time": "0:05:20", "throughput": 1842.04, "total_tokens": 2797696} |
| {"current_steps": 6825, "total_steps": 8260, "loss": 0.0592, "lr": 4.463795937887713e-06, "epoch": 8.26271186440678, "percentage": 82.63, "elapsed_time": "0:25:19", "remaining_time": "0:05:19", "throughput": 1842.16, "total_tokens": 2799744} |
| {"current_steps": 6830, "total_steps": 8260, "loss": 0.0851, "lr": 4.433716746034252e-06, "epoch": 8.268765133171913, "percentage": 82.69, "elapsed_time": "0:25:20", "remaining_time": "0:05:18", "throughput": 1842.32, "total_tokens": 2801824} |
| {"current_steps": 6835, "total_steps": 8260, "loss": 0.0532, "lr": 4.40372937693008e-06, "epoch": 8.274818401937045, "percentage": 82.75, "elapsed_time": "0:25:21", "remaining_time": "0:05:17", "throughput": 1842.46, "total_tokens": 2803872} |
| {"current_steps": 6840, "total_steps": 8260, "loss": 0.1058, "lr": 4.3738339644604635e-06, "epoch": 8.28087167070218, "percentage": 82.81, "elapsed_time": "0:25:22", "remaining_time": "0:05:16", "throughput": 1842.59, "total_tokens": 2805824} |
| {"current_steps": 6845, "total_steps": 8260, "loss": 0.005, "lr": 4.344030642100133e-06, "epoch": 8.286924939467312, "percentage": 82.87, "elapsed_time": "0:25:23", "remaining_time": "0:05:14", "throughput": 1842.67, "total_tokens": 2807712} |
| {"current_steps": 6850, "total_steps": 8260, "loss": 0.0199, "lr": 4.314319542912643e-06, "epoch": 8.292978208232446, "percentage": 82.93, "elapsed_time": "0:25:24", "remaining_time": "0:05:13", "throughput": 1842.79, "total_tokens": 2809760} |
| {"current_steps": 6855, "total_steps": 8260, "loss": 0.1096, "lr": 4.284700799549829e-06, "epoch": 8.299031476997579, "percentage": 82.99, "elapsed_time": "0:25:25", "remaining_time": "0:05:12", "throughput": 1842.93, "total_tokens": 2811872} |
| {"current_steps": 6860, "total_steps": 8260, "loss": 0.0983, "lr": 4.255174544251147e-06, "epoch": 8.305084745762711, "percentage": 83.05, "elapsed_time": "0:25:26", "remaining_time": "0:05:11", "throughput": 1843.07, "total_tokens": 2813888} |
| {"current_steps": 6865, "total_steps": 8260, "loss": 0.0652, "lr": 4.225740908843146e-06, "epoch": 8.311138014527845, "percentage": 83.11, "elapsed_time": "0:25:27", "remaining_time": "0:05:10", "throughput": 1843.14, "total_tokens": 2815808} |
| {"current_steps": 6870, "total_steps": 8260, "loss": 0.088, "lr": 4.196400024738831e-06, "epoch": 8.317191283292978, "percentage": 83.17, "elapsed_time": "0:25:28", "remaining_time": "0:05:09", "throughput": 1843.23, "total_tokens": 2817760} |
| {"current_steps": 6875, "total_steps": 8260, "loss": 0.081, "lr": 4.167152022937124e-06, "epoch": 8.323244552058112, "percentage": 83.23, "elapsed_time": "0:25:29", "remaining_time": "0:05:08", "throughput": 1843.33, "total_tokens": 2819776} |
| {"current_steps": 6880, "total_steps": 8260, "loss": 0.015, "lr": 4.137997034022237e-06, "epoch": 8.329297820823244, "percentage": 83.29, "elapsed_time": "0:25:30", "remaining_time": "0:05:07", "throughput": 1843.48, "total_tokens": 2821824} |
| {"current_steps": 6885, "total_steps": 8260, "loss": 0.0093, "lr": 4.108935188163096e-06, "epoch": 8.335351089588377, "percentage": 83.35, "elapsed_time": "0:25:31", "remaining_time": "0:05:05", "throughput": 1843.66, "total_tokens": 2823968} |
| {"current_steps": 6890, "total_steps": 8260, "loss": 0.0151, "lr": 4.079966615112782e-06, "epoch": 8.341404358353511, "percentage": 83.41, "elapsed_time": "0:25:32", "remaining_time": "0:05:04", "throughput": 1843.75, "total_tokens": 2825920} |
| {"current_steps": 6895, "total_steps": 8260, "loss": 0.0251, "lr": 4.05109144420795e-06, "epoch": 8.347457627118644, "percentage": 83.47, "elapsed_time": "0:25:33", "remaining_time": "0:05:03", "throughput": 1844.0, "total_tokens": 2828224} |
| {"current_steps": 6900, "total_steps": 8260, "loss": 0.0145, "lr": 4.022309804368215e-06, "epoch": 8.353510895883778, "percentage": 83.54, "elapsed_time": "0:25:34", "remaining_time": "0:05:02", "throughput": 1844.08, "total_tokens": 2830208} |
| {"current_steps": 6905, "total_steps": 8260, "loss": 0.1523, "lr": 3.993621824095622e-06, "epoch": 8.35956416464891, "percentage": 83.6, "elapsed_time": "0:25:35", "remaining_time": "0:05:01", "throughput": 1844.18, "total_tokens": 2832160} |
| {"current_steps": 6910, "total_steps": 8260, "loss": 0.0043, "lr": 3.965027631474036e-06, "epoch": 8.365617433414043, "percentage": 83.66, "elapsed_time": "0:25:36", "remaining_time": "0:05:00", "throughput": 1844.3, "total_tokens": 2834208} |
| {"current_steps": 6915, "total_steps": 8260, "loss": 0.003, "lr": 3.936527354168606e-06, "epoch": 8.371670702179177, "percentage": 83.72, "elapsed_time": "0:25:37", "remaining_time": "0:04:59", "throughput": 1844.43, "total_tokens": 2836320} |
| {"current_steps": 6920, "total_steps": 8260, "loss": 0.0066, "lr": 3.90812111942516e-06, "epoch": 8.37772397094431, "percentage": 83.78, "elapsed_time": "0:25:38", "remaining_time": "0:04:57", "throughput": 1844.59, "total_tokens": 2838464} |
| {"current_steps": 6925, "total_steps": 8260, "loss": 0.0822, "lr": 3.8798090540696495e-06, "epoch": 8.383777239709444, "percentage": 83.84, "elapsed_time": "0:25:39", "remaining_time": "0:04:56", "throughput": 1844.74, "total_tokens": 2840512} |
| {"current_steps": 6930, "total_steps": 8260, "loss": 0.0229, "lr": 3.851591284507591e-06, "epoch": 8.389830508474576, "percentage": 83.9, "elapsed_time": "0:25:40", "remaining_time": "0:04:55", "throughput": 1844.87, "total_tokens": 2842592} |
| {"current_steps": 6935, "total_steps": 8260, "loss": 0.0929, "lr": 3.82346793672351e-06, "epoch": 8.39588377723971, "percentage": 83.96, "elapsed_time": "0:25:41", "remaining_time": "0:04:54", "throughput": 1844.98, "total_tokens": 2844576} |
| {"current_steps": 6940, "total_steps": 8260, "loss": 0.0273, "lr": 3.795439136280346e-06, "epoch": 8.401937046004843, "percentage": 84.02, "elapsed_time": "0:25:42", "remaining_time": "0:04:53", "throughput": 1845.17, "total_tokens": 2846720} |
| {"current_steps": 6945, "total_steps": 8260, "loss": 0.0268, "lr": 3.767505008318914e-06, "epoch": 8.407990314769975, "percentage": 84.08, "elapsed_time": "0:25:43", "remaining_time": "0:04:52", "throughput": 1845.35, "total_tokens": 2848832} |
| {"current_steps": 6950, "total_steps": 8260, "loss": 0.0207, "lr": 3.739665677557341e-06, "epoch": 8.41404358353511, "percentage": 84.14, "elapsed_time": "0:25:44", "remaining_time": "0:04:51", "throughput": 1845.45, "total_tokens": 2850880} |
| {"current_steps": 6955, "total_steps": 8260, "loss": 0.0087, "lr": 3.711921268290533e-06, "epoch": 8.420096852300242, "percentage": 84.2, "elapsed_time": "0:25:45", "remaining_time": "0:04:50", "throughput": 1845.58, "total_tokens": 2852896} |
| {"current_steps": 6960, "total_steps": 8260, "loss": 0.071, "lr": 3.6842719043895748e-06, "epoch": 8.426150121065376, "percentage": 84.26, "elapsed_time": "0:25:46", "remaining_time": "0:04:48", "throughput": 1845.73, "total_tokens": 2855008} |
| {"current_steps": 6965, "total_steps": 8260, "loss": 0.0595, "lr": 3.656717709301194e-06, "epoch": 8.432203389830509, "percentage": 84.32, "elapsed_time": "0:25:47", "remaining_time": "0:04:47", "throughput": 1845.83, "total_tokens": 2856960} |
| {"current_steps": 6970, "total_steps": 8260, "loss": 0.0183, "lr": 3.629258806047231e-06, "epoch": 8.438256658595641, "percentage": 84.38, "elapsed_time": "0:25:48", "remaining_time": "0:04:46", "throughput": 1846.05, "total_tokens": 2859200} |
| {"current_steps": 6975, "total_steps": 8260, "loss": 0.0023, "lr": 3.60189531722408e-06, "epoch": 8.444309927360775, "percentage": 84.44, "elapsed_time": "0:25:49", "remaining_time": "0:04:45", "throughput": 1846.22, "total_tokens": 2861280} |
| {"current_steps": 6980, "total_steps": 8260, "loss": 0.0547, "lr": 3.5746273650021228e-06, "epoch": 8.450363196125908, "percentage": 84.5, "elapsed_time": "0:25:50", "remaining_time": "0:04:44", "throughput": 1846.42, "total_tokens": 2863392} |
| {"current_steps": 6985, "total_steps": 8260, "loss": 0.0218, "lr": 3.5474550711252026e-06, "epoch": 8.456416464891042, "percentage": 84.56, "elapsed_time": "0:25:51", "remaining_time": "0:04:43", "throughput": 1846.51, "total_tokens": 2865344} |
| {"current_steps": 6990, "total_steps": 8260, "loss": 0.0038, "lr": 3.5203785569100674e-06, "epoch": 8.462469733656174, "percentage": 84.62, "elapsed_time": "0:25:52", "remaining_time": "0:04:42", "throughput": 1846.65, "total_tokens": 2867424} |
| {"current_steps": 6995, "total_steps": 8260, "loss": 0.0012, "lr": 3.493397943245852e-06, "epoch": 8.468523002421307, "percentage": 84.69, "elapsed_time": "0:25:53", "remaining_time": "0:04:40", "throughput": 1846.76, "total_tokens": 2869472} |
| {"current_steps": 7000, "total_steps": 8260, "loss": 0.0815, "lr": 3.466513350593506e-06, "epoch": 8.474576271186441, "percentage": 84.75, "elapsed_time": "0:25:54", "remaining_time": "0:04:39", "throughput": 1846.97, "total_tokens": 2871680} |
| {"current_steps": 7005, "total_steps": 8260, "loss": 0.0923, "lr": 3.439724898985278e-06, "epoch": 8.480629539951574, "percentage": 84.81, "elapsed_time": "0:25:55", "remaining_time": "0:04:38", "throughput": 1847.1, "total_tokens": 2873760} |
| {"current_steps": 7010, "total_steps": 8260, "loss": 0.2666, "lr": 3.4130327080241636e-06, "epoch": 8.486682808716708, "percentage": 84.87, "elapsed_time": "0:25:56", "remaining_time": "0:04:37", "throughput": 1847.2, "total_tokens": 2875712} |
| {"current_steps": 7015, "total_steps": 8260, "loss": 0.0496, "lr": 3.3864368968834074e-06, "epoch": 8.49273607748184, "percentage": 84.93, "elapsed_time": "0:25:57", "remaining_time": "0:04:36", "throughput": 1847.27, "total_tokens": 2877696} |
| {"current_steps": 7020, "total_steps": 8260, "loss": 0.0609, "lr": 3.3599375843059193e-06, "epoch": 8.498789346246973, "percentage": 84.99, "elapsed_time": "0:25:58", "remaining_time": "0:04:35", "throughput": 1847.38, "total_tokens": 2879680} |
| {"current_steps": 7021, "total_steps": 8260, "eval_loss": 0.1579342782497406, "epoch": 8.5, "percentage": 85.0, "elapsed_time": "0:26:07", "remaining_time": "0:04:36", "throughput": 1837.85, "total_tokens": 2880128} |
| {"current_steps": 7025, "total_steps": 8260, "loss": 0.0444, "lr": 3.3335348886037815e-06, "epoch": 8.504842615012107, "percentage": 85.05, "elapsed_time": "0:26:09", "remaining_time": "0:04:35", "throughput": 1835.96, "total_tokens": 2881728} |
| {"current_steps": 7030, "total_steps": 8260, "loss": 0.0862, "lr": 3.3072289276576964e-06, "epoch": 8.51089588377724, "percentage": 85.11, "elapsed_time": "0:26:10", "remaining_time": "0:04:34", "throughput": 1836.06, "total_tokens": 2883744} |
| {"current_steps": 7035, "total_steps": 8260, "loss": 0.0951, "lr": 3.281019818916492e-06, "epoch": 8.516949152542374, "percentage": 85.17, "elapsed_time": "0:26:11", "remaining_time": "0:04:33", "throughput": 1836.18, "total_tokens": 2885792} |
| {"current_steps": 7040, "total_steps": 8260, "loss": 0.031, "lr": 3.254907679396574e-06, "epoch": 8.523002421307506, "percentage": 85.23, "elapsed_time": "0:26:12", "remaining_time": "0:04:32", "throughput": 1836.38, "total_tokens": 2888000} |
| {"current_steps": 7045, "total_steps": 8260, "loss": 0.0091, "lr": 3.2288926256813846e-06, "epoch": 8.529055690072639, "percentage": 85.29, "elapsed_time": "0:26:13", "remaining_time": "0:04:31", "throughput": 1836.48, "total_tokens": 2890016} |
| {"current_steps": 7050, "total_steps": 8260, "loss": 0.0309, "lr": 3.2029747739209247e-06, "epoch": 8.535108958837773, "percentage": 85.35, "elapsed_time": "0:26:14", "remaining_time": "0:04:30", "throughput": 1836.58, "total_tokens": 2891968} |
| {"current_steps": 7055, "total_steps": 8260, "loss": 0.111, "lr": 3.177154239831223e-06, "epoch": 8.541162227602905, "percentage": 85.41, "elapsed_time": "0:26:15", "remaining_time": "0:04:29", "throughput": 1836.69, "total_tokens": 2893952} |
| {"current_steps": 7060, "total_steps": 8260, "loss": 0.1537, "lr": 3.1514311386937917e-06, "epoch": 8.54721549636804, "percentage": 85.47, "elapsed_time": "0:26:16", "remaining_time": "0:04:27", "throughput": 1836.82, "total_tokens": 2895904} |
| {"current_steps": 7065, "total_steps": 8260, "loss": 0.0315, "lr": 3.1258055853551487e-06, "epoch": 8.553268765133172, "percentage": 85.53, "elapsed_time": "0:26:17", "remaining_time": "0:04:26", "throughput": 1836.96, "total_tokens": 2897952} |
| {"current_steps": 7070, "total_steps": 8260, "loss": 0.0101, "lr": 3.1002776942262696e-06, "epoch": 8.559322033898304, "percentage": 85.59, "elapsed_time": "0:26:18", "remaining_time": "0:04:25", "throughput": 1837.14, "total_tokens": 2900064} |
| {"current_steps": 7075, "total_steps": 8260, "loss": 0.0776, "lr": 3.0748475792821197e-06, "epoch": 8.565375302663439, "percentage": 85.65, "elapsed_time": "0:26:19", "remaining_time": "0:04:24", "throughput": 1837.42, "total_tokens": 2902432} |
| {"current_steps": 7080, "total_steps": 8260, "loss": 0.0655, "lr": 3.0495153540611e-06, "epoch": 8.571428571428571, "percentage": 85.71, "elapsed_time": "0:26:20", "remaining_time": "0:04:23", "throughput": 1837.55, "total_tokens": 2904512} |
| {"current_steps": 7085, "total_steps": 8260, "loss": 0.0523, "lr": 3.024281131664569e-06, "epoch": 8.577481840193705, "percentage": 85.77, "elapsed_time": "0:26:21", "remaining_time": "0:04:22", "throughput": 1837.66, "total_tokens": 2906496} |
| {"current_steps": 7090, "total_steps": 8260, "loss": 0.1176, "lr": 2.999145024756325e-06, "epoch": 8.583535108958838, "percentage": 85.84, "elapsed_time": "0:26:22", "remaining_time": "0:04:21", "throughput": 1837.78, "total_tokens": 2908544} |
| {"current_steps": 7095, "total_steps": 8260, "loss": 0.0411, "lr": 2.9741071455621245e-06, "epoch": 8.58958837772397, "percentage": 85.9, "elapsed_time": "0:26:23", "remaining_time": "0:04:20", "throughput": 1837.95, "total_tokens": 2910688} |
| {"current_steps": 7100, "total_steps": 8260, "loss": 0.0489, "lr": 2.9491676058691437e-06, "epoch": 8.595641646489105, "percentage": 85.96, "elapsed_time": "0:26:24", "remaining_time": "0:04:18", "throughput": 1838.12, "total_tokens": 2912832} |
| {"current_steps": 7105, "total_steps": 8260, "loss": 0.0324, "lr": 2.924326517025508e-06, "epoch": 8.601694915254237, "percentage": 86.02, "elapsed_time": "0:26:25", "remaining_time": "0:04:17", "throughput": 1838.2, "total_tokens": 2914816} |
| {"current_steps": 7110, "total_steps": 8260, "loss": 0.0023, "lr": 2.8995839899397915e-06, "epoch": 8.607748184019371, "percentage": 86.08, "elapsed_time": "0:26:26", "remaining_time": "0:04:16", "throughput": 1838.36, "total_tokens": 2916928} |
| {"current_steps": 7115, "total_steps": 8260, "loss": 0.0141, "lr": 2.8749401350805115e-06, "epoch": 8.613801452784504, "percentage": 86.14, "elapsed_time": "0:26:27", "remaining_time": "0:04:15", "throughput": 1838.5, "total_tokens": 2918912} |
| {"current_steps": 7120, "total_steps": 8260, "loss": 0.0618, "lr": 2.8503950624756415e-06, "epoch": 8.619854721549636, "percentage": 86.2, "elapsed_time": "0:26:28", "remaining_time": "0:04:14", "throughput": 1838.6, "total_tokens": 2920896} |
| {"current_steps": 7125, "total_steps": 8260, "loss": 0.0053, "lr": 2.825948881712123e-06, "epoch": 8.62590799031477, "percentage": 86.26, "elapsed_time": "0:26:29", "remaining_time": "0:04:13", "throughput": 1838.73, "total_tokens": 2922976} |
| {"current_steps": 7130, "total_steps": 8260, "loss": 0.0207, "lr": 2.801601701935369e-06, "epoch": 8.631961259079903, "percentage": 86.32, "elapsed_time": "0:26:30", "remaining_time": "0:04:12", "throughput": 1838.89, "total_tokens": 2925120} |
| {"current_steps": 7135, "total_steps": 8260, "loss": 0.0271, "lr": 2.777353631848789e-06, "epoch": 8.638014527845037, "percentage": 86.38, "elapsed_time": "0:26:31", "remaining_time": "0:04:10", "throughput": 1839.02, "total_tokens": 2927072} |
| {"current_steps": 7140, "total_steps": 8260, "loss": 0.0513, "lr": 2.7532047797132867e-06, "epoch": 8.64406779661017, "percentage": 86.44, "elapsed_time": "0:26:32", "remaining_time": "0:04:09", "throughput": 1839.18, "total_tokens": 2929152} |
| {"current_steps": 7145, "total_steps": 8260, "loss": 0.067, "lr": 2.7291552533467853e-06, "epoch": 8.650121065375302, "percentage": 86.5, "elapsed_time": "0:26:33", "remaining_time": "0:04:08", "throughput": 1839.29, "total_tokens": 2931136} |
| {"current_steps": 7150, "total_steps": 8260, "loss": 0.0112, "lr": 2.7052051601237473e-06, "epoch": 8.656174334140436, "percentage": 86.56, "elapsed_time": "0:26:34", "remaining_time": "0:04:07", "throughput": 1839.47, "total_tokens": 2933312} |
| {"current_steps": 7155, "total_steps": 8260, "loss": 0.0516, "lr": 2.681354606974698e-06, "epoch": 8.662227602905569, "percentage": 86.62, "elapsed_time": "0:26:35", "remaining_time": "0:04:06", "throughput": 1839.58, "total_tokens": 2935328} |
| {"current_steps": 7160, "total_steps": 8260, "loss": 0.0969, "lr": 2.6576037003857414e-06, "epoch": 8.668280871670703, "percentage": 86.68, "elapsed_time": "0:26:36", "remaining_time": "0:04:05", "throughput": 1839.66, "total_tokens": 2937216} |
| {"current_steps": 7165, "total_steps": 8260, "loss": 0.0056, "lr": 2.633952546398083e-06, "epoch": 8.674334140435835, "percentage": 86.74, "elapsed_time": "0:26:37", "remaining_time": "0:04:04", "throughput": 1839.79, "total_tokens": 2939168} |
| {"current_steps": 7170, "total_steps": 8260, "loss": 0.0376, "lr": 2.6104012506075692e-06, "epoch": 8.680387409200968, "percentage": 86.8, "elapsed_time": "0:26:38", "remaining_time": "0:04:03", "throughput": 1840.05, "total_tokens": 2941504} |
| {"current_steps": 7175, "total_steps": 8260, "loss": 0.0602, "lr": 2.5869499181641916e-06, "epoch": 8.686440677966102, "percentage": 86.86, "elapsed_time": "0:26:39", "remaining_time": "0:04:01", "throughput": 1840.13, "total_tokens": 2943520} |
| {"current_steps": 7180, "total_steps": 8260, "loss": 0.1961, "lr": 2.5635986537716538e-06, "epoch": 8.692493946731235, "percentage": 86.92, "elapsed_time": "0:26:40", "remaining_time": "0:04:00", "throughput": 1840.25, "total_tokens": 2945568} |
| {"current_steps": 7185, "total_steps": 8260, "loss": 0.0054, "lr": 2.540347561686873e-06, "epoch": 8.698547215496369, "percentage": 86.99, "elapsed_time": "0:26:41", "remaining_time": "0:03:59", "throughput": 1840.32, "total_tokens": 2947488} |
| {"current_steps": 7190, "total_steps": 8260, "loss": 0.2084, "lr": 2.5171967457195216e-06, "epoch": 8.704600484261501, "percentage": 87.05, "elapsed_time": "0:26:42", "remaining_time": "0:03:58", "throughput": 1840.45, "total_tokens": 2949504} |
| {"current_steps": 7195, "total_steps": 8260, "loss": 0.0315, "lr": 2.494146309231571e-06, "epoch": 8.710653753026634, "percentage": 87.11, "elapsed_time": "0:26:43", "remaining_time": "0:03:57", "throughput": 1840.59, "total_tokens": 2951552} |
| {"current_steps": 7200, "total_steps": 8260, "loss": 0.0159, "lr": 2.471196355136826e-06, "epoch": 8.716707021791768, "percentage": 87.17, "elapsed_time": "0:26:44", "remaining_time": "0:03:56", "throughput": 1840.73, "total_tokens": 2953632} |
| {"current_steps": 7205, "total_steps": 8260, "loss": 0.1054, "lr": 2.4483469859004625e-06, "epoch": 8.7227602905569, "percentage": 87.23, "elapsed_time": "0:26:45", "remaining_time": "0:03:55", "throughput": 1840.98, "total_tokens": 2955936} |
| {"current_steps": 7210, "total_steps": 8260, "loss": 0.0246, "lr": 2.425598303538576e-06, "epoch": 8.728813559322035, "percentage": 87.29, "elapsed_time": "0:26:46", "remaining_time": "0:03:53", "throughput": 1841.12, "total_tokens": 2958048} |
| {"current_steps": 7215, "total_steps": 8260, "loss": 0.0293, "lr": 2.402950409617727e-06, "epoch": 8.734866828087167, "percentage": 87.35, "elapsed_time": "0:26:47", "remaining_time": "0:03:52", "throughput": 1841.27, "total_tokens": 2960160} |
| {"current_steps": 7220, "total_steps": 8260, "loss": 0.1462, "lr": 2.380403405254475e-06, "epoch": 8.7409200968523, "percentage": 87.41, "elapsed_time": "0:26:48", "remaining_time": "0:03:51", "throughput": 1841.4, "total_tokens": 2962208} |
| {"current_steps": 7225, "total_steps": 8260, "loss": 0.002, "lr": 2.35795739111494e-06, "epoch": 8.746973365617434, "percentage": 87.47, "elapsed_time": "0:26:49", "remaining_time": "0:03:50", "throughput": 1841.54, "total_tokens": 2964320} |
| {"current_steps": 7230, "total_steps": 8260, "loss": 0.0275, "lr": 2.335612467414344e-06, "epoch": 8.753026634382566, "percentage": 87.53, "elapsed_time": "0:26:50", "remaining_time": "0:03:49", "throughput": 1841.63, "total_tokens": 2966272} |
| {"current_steps": 7235, "total_steps": 8260, "loss": 0.0028, "lr": 2.313368733916585e-06, "epoch": 8.7590799031477, "percentage": 87.59, "elapsed_time": "0:26:51", "remaining_time": "0:03:48", "throughput": 1841.73, "total_tokens": 2968288} |
| {"current_steps": 7240, "total_steps": 8260, "loss": 0.0742, "lr": 2.291226289933751e-06, "epoch": 8.765133171912833, "percentage": 87.65, "elapsed_time": "0:26:52", "remaining_time": "0:03:47", "throughput": 1841.81, "total_tokens": 2970208} |
| {"current_steps": 7245, "total_steps": 8260, "loss": 0.0367, "lr": 2.2691852343257157e-06, "epoch": 8.771186440677965, "percentage": 87.71, "elapsed_time": "0:26:53", "remaining_time": "0:03:46", "throughput": 1841.97, "total_tokens": 2972352} |
| {"current_steps": 7250, "total_steps": 8260, "loss": 0.1363, "lr": 2.2472456654996755e-06, "epoch": 8.7772397094431, "percentage": 87.77, "elapsed_time": "0:26:54", "remaining_time": "0:03:44", "throughput": 1842.07, "total_tokens": 2974368} |
| {"current_steps": 7255, "total_steps": 8260, "loss": 0.0365, "lr": 2.2254076814097163e-06, "epoch": 8.783292978208232, "percentage": 87.83, "elapsed_time": "0:26:55", "remaining_time": "0:03:43", "throughput": 1842.16, "total_tokens": 2976288} |
| {"current_steps": 7260, "total_steps": 8260, "loss": 0.0286, "lr": 2.203671379556388e-06, "epoch": 8.789346246973366, "percentage": 87.89, "elapsed_time": "0:26:56", "remaining_time": "0:03:42", "throughput": 1842.25, "total_tokens": 2978240} |
| {"current_steps": 7265, "total_steps": 8260, "loss": 0.1335, "lr": 2.1820368569862444e-06, "epoch": 8.795399515738499, "percentage": 87.95, "elapsed_time": "0:26:57", "remaining_time": "0:03:41", "throughput": 1842.37, "total_tokens": 2980256} |
| {"current_steps": 7270, "total_steps": 8260, "loss": 0.1125, "lr": 2.1605042102914227e-06, "epoch": 8.801452784503631, "percentage": 88.01, "elapsed_time": "0:26:58", "remaining_time": "0:03:40", "throughput": 1842.55, "total_tokens": 2982400} |
| {"current_steps": 7275, "total_steps": 8260, "loss": 0.2068, "lr": 2.1390735356092206e-06, "epoch": 8.807506053268765, "percentage": 88.08, "elapsed_time": "0:26:59", "remaining_time": "0:03:39", "throughput": 1842.64, "total_tokens": 2984416} |
| {"current_steps": 7280, "total_steps": 8260, "loss": 0.0902, "lr": 2.1177449286216565e-06, "epoch": 8.813559322033898, "percentage": 88.14, "elapsed_time": "0:27:00", "remaining_time": "0:03:38", "throughput": 1842.8, "total_tokens": 2986496} |
| {"current_steps": 7285, "total_steps": 8260, "loss": 0.0033, "lr": 2.0965184845550407e-06, "epoch": 8.819612590799032, "percentage": 88.2, "elapsed_time": "0:27:01", "remaining_time": "0:03:37", "throughput": 1842.92, "total_tokens": 2988512} |
| {"current_steps": 7290, "total_steps": 8260, "loss": 0.0876, "lr": 2.075394298179553e-06, "epoch": 8.825665859564165, "percentage": 88.26, "elapsed_time": "0:27:02", "remaining_time": "0:03:35", "throughput": 1843.04, "total_tokens": 2990560} |
| {"current_steps": 7295, "total_steps": 8260, "loss": 0.0701, "lr": 2.0543724638088347e-06, "epoch": 8.831719128329297, "percentage": 88.32, "elapsed_time": "0:27:03", "remaining_time": "0:03:34", "throughput": 1843.25, "total_tokens": 2992768} |
| {"current_steps": 7300, "total_steps": 8260, "loss": 0.1504, "lr": 2.0334530752995433e-06, "epoch": 8.837772397094431, "percentage": 88.38, "elapsed_time": "0:27:04", "remaining_time": "0:03:33", "throughput": 1843.37, "total_tokens": 2994784} |
| {"current_steps": 7305, "total_steps": 8260, "loss": 0.0217, "lr": 2.01263622605094e-06, "epoch": 8.843825665859564, "percentage": 88.44, "elapsed_time": "0:27:05", "remaining_time": "0:03:32", "throughput": 1843.53, "total_tokens": 2996896} |
| {"current_steps": 7310, "total_steps": 8260, "loss": 0.0354, "lr": 1.991922009004485e-06, "epoch": 8.849878934624698, "percentage": 88.5, "elapsed_time": "0:27:06", "remaining_time": "0:03:31", "throughput": 1843.66, "total_tokens": 2998976} |
| {"current_steps": 7315, "total_steps": 8260, "loss": 0.1618, "lr": 1.9713105166434042e-06, "epoch": 8.85593220338983, "percentage": 88.56, "elapsed_time": "0:27:07", "remaining_time": "0:03:30", "throughput": 1843.85, "total_tokens": 3001184} |
| {"current_steps": 7320, "total_steps": 8260, "loss": 0.0087, "lr": 1.950801840992303e-06, "epoch": 8.861985472154963, "percentage": 88.62, "elapsed_time": "0:27:08", "remaining_time": "0:03:29", "throughput": 1843.93, "total_tokens": 3003168} |
| {"current_steps": 7325, "total_steps": 8260, "loss": 0.0421, "lr": 1.930396073616725e-06, "epoch": 8.868038740920097, "percentage": 88.68, "elapsed_time": "0:27:09", "remaining_time": "0:03:28", "throughput": 1844.03, "total_tokens": 3005152} |
| {"current_steps": 7330, "total_steps": 8260, "loss": 0.0225, "lr": 1.9100933056227593e-06, "epoch": 8.87409200968523, "percentage": 88.74, "elapsed_time": "0:27:10", "remaining_time": "0:03:26", "throughput": 1844.17, "total_tokens": 3007200} |
| {"current_steps": 7335, "total_steps": 8260, "loss": 0.021, "lr": 1.8898936276566303e-06, "epoch": 8.880145278450364, "percentage": 88.8, "elapsed_time": "0:27:11", "remaining_time": "0:03:25", "throughput": 1844.3, "total_tokens": 3009280} |
| {"current_steps": 7340, "total_steps": 8260, "loss": 0.0664, "lr": 1.8697971299043048e-06, "epoch": 8.886198547215496, "percentage": 88.86, "elapsed_time": "0:27:12", "remaining_time": "0:03:24", "throughput": 1844.43, "total_tokens": 3011360} |
| {"current_steps": 7345, "total_steps": 8260, "loss": 0.0099, "lr": 1.8498039020910628e-06, "epoch": 8.892251815980629, "percentage": 88.92, "elapsed_time": "0:27:13", "remaining_time": "0:03:23", "throughput": 1844.63, "total_tokens": 3013568} |
| {"current_steps": 7350, "total_steps": 8260, "loss": 0.0273, "lr": 1.8299140334811226e-06, "epoch": 8.898305084745763, "percentage": 88.98, "elapsed_time": "0:27:14", "remaining_time": "0:03:22", "throughput": 1844.76, "total_tokens": 3015552} |
| {"current_steps": 7355, "total_steps": 8260, "loss": 0.036, "lr": 1.8101276128772272e-06, "epoch": 8.904358353510895, "percentage": 89.04, "elapsed_time": "0:27:15", "remaining_time": "0:03:21", "throughput": 1844.84, "total_tokens": 3017536} |
| {"current_steps": 7360, "total_steps": 8260, "loss": 0.0193, "lr": 1.7904447286202607e-06, "epoch": 8.91041162227603, "percentage": 89.1, "elapsed_time": "0:27:16", "remaining_time": "0:03:20", "throughput": 1844.95, "total_tokens": 3019584} |
| {"current_steps": 7365, "total_steps": 8260, "loss": 0.1496, "lr": 1.7708654685888337e-06, "epoch": 8.916464891041162, "percentage": 89.16, "elapsed_time": "0:27:17", "remaining_time": "0:03:19", "throughput": 1845.11, "total_tokens": 3021728} |
| {"current_steps": 7370, "total_steps": 8260, "loss": 0.0041, "lr": 1.7513899201989148e-06, "epoch": 8.922518159806295, "percentage": 89.23, "elapsed_time": "0:27:18", "remaining_time": "0:03:17", "throughput": 1845.18, "total_tokens": 3023584} |
| {"current_steps": 7375, "total_steps": 8260, "loss": 0.0716, "lr": 1.7320181704034237e-06, "epoch": 8.928571428571429, "percentage": 89.29, "elapsed_time": "0:27:19", "remaining_time": "0:03:16", "throughput": 1845.31, "total_tokens": 3025600} |
| {"current_steps": 7380, "total_steps": 8260, "loss": 0.0103, "lr": 1.7127503056918542e-06, "epoch": 8.934624697336561, "percentage": 89.35, "elapsed_time": "0:27:20", "remaining_time": "0:03:15", "throughput": 1845.43, "total_tokens": 3027680} |
| {"current_steps": 7385, "total_steps": 8260, "loss": 0.0023, "lr": 1.6935864120898704e-06, "epoch": 8.940677966101696, "percentage": 89.41, "elapsed_time": "0:27:21", "remaining_time": "0:03:14", "throughput": 1845.59, "total_tokens": 3029856} |
| {"current_steps": 7390, "total_steps": 8260, "loss": 0.1403, "lr": 1.674526575158944e-06, "epoch": 8.946731234866828, "percentage": 89.47, "elapsed_time": "0:27:22", "remaining_time": "0:03:13", "throughput": 1845.8, "total_tokens": 3032096} |
| {"current_steps": 7395, "total_steps": 8260, "loss": 0.0081, "lr": 1.6555708799959547e-06, "epoch": 8.95278450363196, "percentage": 89.53, "elapsed_time": "0:27:23", "remaining_time": "0:03:12", "throughput": 1845.93, "total_tokens": 3034112} |
| {"current_steps": 7400, "total_steps": 8260, "loss": 0.0135, "lr": 1.6367194112328288e-06, "epoch": 8.958837772397095, "percentage": 89.59, "elapsed_time": "0:27:24", "remaining_time": "0:03:11", "throughput": 1846.03, "total_tokens": 3036096} |
| {"current_steps": 7405, "total_steps": 8260, "loss": 0.1338, "lr": 1.617972253036143e-06, "epoch": 8.964891041162227, "percentage": 89.65, "elapsed_time": "0:27:25", "remaining_time": "0:03:10", "throughput": 1846.1, "total_tokens": 3038080} |
| {"current_steps": 7410, "total_steps": 8260, "loss": 0.0089, "lr": 1.5993294891067573e-06, "epoch": 8.970944309927361, "percentage": 89.71, "elapsed_time": "0:27:26", "remaining_time": "0:03:08", "throughput": 1846.2, "total_tokens": 3040064} |
| {"current_steps": 7415, "total_steps": 8260, "loss": 0.2091, "lr": 1.580791202679438e-06, "epoch": 8.976997578692494, "percentage": 89.77, "elapsed_time": "0:27:27", "remaining_time": "0:03:07", "throughput": 1846.3, "total_tokens": 3042048} |
| {"current_steps": 7420, "total_steps": 8260, "loss": 0.0461, "lr": 1.562357476522497e-06, "epoch": 8.983050847457626, "percentage": 89.83, "elapsed_time": "0:27:28", "remaining_time": "0:03:06", "throughput": 1846.46, "total_tokens": 3044192} |
| {"current_steps": 7425, "total_steps": 8260, "loss": 0.0961, "lr": 1.5440283929374023e-06, "epoch": 8.98910411622276, "percentage": 89.89, "elapsed_time": "0:27:29", "remaining_time": "0:03:05", "throughput": 1846.53, "total_tokens": 3046112} |
| {"current_steps": 7430, "total_steps": 8260, "loss": 0.1143, "lr": 1.5258040337584322e-06, "epoch": 8.995157384987893, "percentage": 89.95, "elapsed_time": "0:27:30", "remaining_time": "0:03:04", "throughput": 1846.62, "total_tokens": 3048000} |
| {"current_steps": 7434, "total_steps": 8260, "eval_loss": 0.14868541061878204, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:27:39", "remaining_time": "0:03:04", "throughput": 1837.57, "total_tokens": 3049392} |
| {"current_steps": 7435, "total_steps": 8260, "loss": 0.0567, "lr": 1.5076844803522922e-06, "epoch": 9.001210653753027, "percentage": 90.01, "elapsed_time": "0:27:41", "remaining_time": "0:03:04", "throughput": 1835.82, "total_tokens": 3049808} |
| {"current_steps": 7440, "total_steps": 8260, "loss": 0.0191, "lr": 1.4896698136177612e-06, "epoch": 9.00726392251816, "percentage": 90.07, "elapsed_time": "0:27:42", "remaining_time": "0:03:03", "throughput": 1835.96, "total_tokens": 3051792} |
| {"current_steps": 7445, "total_steps": 8260, "loss": 0.0317, "lr": 1.4717601139853266e-06, "epoch": 9.013317191283292, "percentage": 90.13, "elapsed_time": "0:27:43", "remaining_time": "0:03:02", "throughput": 1836.07, "total_tokens": 3053776} |
| {"current_steps": 7450, "total_steps": 8260, "loss": 0.0064, "lr": 1.4539554614168339e-06, "epoch": 9.019370460048426, "percentage": 90.19, "elapsed_time": "0:27:44", "remaining_time": "0:03:00", "throughput": 1836.2, "total_tokens": 3055856} |
| {"current_steps": 7455, "total_steps": 8260, "loss": 0.0179, "lr": 1.4362559354051092e-06, "epoch": 9.025423728813559, "percentage": 90.25, "elapsed_time": "0:27:45", "remaining_time": "0:02:59", "throughput": 1836.31, "total_tokens": 3057840} |
| {"current_steps": 7460, "total_steps": 8260, "loss": 0.1383, "lr": 1.4186616149736349e-06, "epoch": 9.031476997578693, "percentage": 90.31, "elapsed_time": "0:27:46", "remaining_time": "0:02:58", "throughput": 1836.44, "total_tokens": 3059920} |
| {"current_steps": 7465, "total_steps": 8260, "loss": 0.0962, "lr": 1.401172578676166e-06, "epoch": 9.037530266343826, "percentage": 90.38, "elapsed_time": "0:27:47", "remaining_time": "0:02:57", "throughput": 1836.56, "total_tokens": 3061872} |
| {"current_steps": 7470, "total_steps": 8260, "loss": 0.0155, "lr": 1.383788904596403e-06, "epoch": 9.043583535108958, "percentage": 90.44, "elapsed_time": "0:27:48", "remaining_time": "0:02:56", "throughput": 1836.67, "total_tokens": 3063888} |
| {"current_steps": 7475, "total_steps": 8260, "loss": 0.0774, "lr": 1.3665106703476178e-06, "epoch": 9.049636803874092, "percentage": 90.5, "elapsed_time": "0:27:49", "remaining_time": "0:02:55", "throughput": 1836.78, "total_tokens": 3065872} |
| {"current_steps": 7480, "total_steps": 8260, "loss": 0.0394, "lr": 1.349337953072341e-06, "epoch": 9.055690072639225, "percentage": 90.56, "elapsed_time": "0:27:50", "remaining_time": "0:02:54", "throughput": 1836.91, "total_tokens": 3067888} |
| {"current_steps": 7485, "total_steps": 8260, "loss": 0.0529, "lr": 1.3322708294419923e-06, "epoch": 9.061743341404359, "percentage": 90.62, "elapsed_time": "0:27:51", "remaining_time": "0:02:53", "throughput": 1837.03, "total_tokens": 3069968} |
| {"current_steps": 7490, "total_steps": 8260, "loss": 0.0363, "lr": 1.3153093756565426e-06, "epoch": 9.067796610169491, "percentage": 90.68, "elapsed_time": "0:27:52", "remaining_time": "0:02:51", "throughput": 1837.2, "total_tokens": 3072176} |
| {"current_steps": 7495, "total_steps": 8260, "loss": 0.0044, "lr": 1.298453667444169e-06, "epoch": 9.073849878934624, "percentage": 90.74, "elapsed_time": "0:27:53", "remaining_time": "0:02:50", "throughput": 1837.33, "total_tokens": 3074288} |
| {"current_steps": 7500, "total_steps": 8260, "loss": 0.0226, "lr": 1.281703780060947e-06, "epoch": 9.079903147699758, "percentage": 90.8, "elapsed_time": "0:27:54", "remaining_time": "0:02:49", "throughput": 1837.45, "total_tokens": 3076304} |
| {"current_steps": 7505, "total_steps": 8260, "loss": 0.0164, "lr": 1.265059788290468e-06, "epoch": 9.08595641646489, "percentage": 90.86, "elapsed_time": "0:27:55", "remaining_time": "0:02:48", "throughput": 1837.58, "total_tokens": 3078320} |
| {"current_steps": 7510, "total_steps": 8260, "loss": 0.0157, "lr": 1.2485217664435418e-06, "epoch": 9.092009685230025, "percentage": 90.92, "elapsed_time": "0:27:56", "remaining_time": "0:02:47", "throughput": 1837.74, "total_tokens": 3080464} |
| {"current_steps": 7515, "total_steps": 8260, "loss": 0.0379, "lr": 1.232089788357843e-06, "epoch": 9.098062953995157, "percentage": 90.98, "elapsed_time": "0:27:57", "remaining_time": "0:02:46", "throughput": 1837.93, "total_tokens": 3082672} |
| {"current_steps": 7520, "total_steps": 8260, "loss": 0.0456, "lr": 1.2157639273975979e-06, "epoch": 9.104116222760291, "percentage": 91.04, "elapsed_time": "0:27:58", "remaining_time": "0:02:45", "throughput": 1838.07, "total_tokens": 3084720} |
| {"current_steps": 7525, "total_steps": 8260, "loss": 0.0084, "lr": 1.19954425645325e-06, "epoch": 9.110169491525424, "percentage": 91.1, "elapsed_time": "0:27:59", "remaining_time": "0:02:44", "throughput": 1838.26, "total_tokens": 3086864} |
| {"current_steps": 7530, "total_steps": 8260, "loss": 0.0276, "lr": 1.183430847941125e-06, "epoch": 9.116222760290556, "percentage": 91.16, "elapsed_time": "0:28:00", "remaining_time": "0:02:42", "throughput": 1838.38, "total_tokens": 3088880} |
| {"current_steps": 7535, "total_steps": 8260, "loss": 0.1087, "lr": 1.1674237738031223e-06, "epoch": 9.12227602905569, "percentage": 91.22, "elapsed_time": "0:28:01", "remaining_time": "0:02:41", "throughput": 1838.51, "total_tokens": 3090960} |
| {"current_steps": 7540, "total_steps": 8260, "loss": 0.0058, "lr": 1.1515231055063914e-06, "epoch": 9.128329297820823, "percentage": 91.28, "elapsed_time": "0:28:02", "remaining_time": "0:02:40", "throughput": 1838.65, "total_tokens": 3093040} |
| {"current_steps": 7545, "total_steps": 8260, "loss": 0.0013, "lr": 1.135728914043005e-06, "epoch": 9.134382566585957, "percentage": 91.34, "elapsed_time": "0:28:03", "remaining_time": "0:02:39", "throughput": 1838.76, "total_tokens": 3095024} |
| {"current_steps": 7550, "total_steps": 8260, "loss": 0.0745, "lr": 1.120041269929642e-06, "epoch": 9.14043583535109, "percentage": 91.4, "elapsed_time": "0:28:04", "remaining_time": "0:02:38", "throughput": 1838.85, "total_tokens": 3097008} |
| {"current_steps": 7555, "total_steps": 8260, "loss": 0.0026, "lr": 1.1044602432072836e-06, "epoch": 9.146489104116222, "percentage": 91.46, "elapsed_time": "0:28:05", "remaining_time": "0:02:37", "throughput": 1839.02, "total_tokens": 3099184} |
| {"current_steps": 7560, "total_steps": 8260, "loss": 0.026, "lr": 1.0889859034408922e-06, "epoch": 9.152542372881356, "percentage": 91.53, "elapsed_time": "0:28:06", "remaining_time": "0:02:36", "throughput": 1839.17, "total_tokens": 3101328} |
| {"current_steps": 7565, "total_steps": 8260, "loss": 0.0455, "lr": 1.0736183197191024e-06, "epoch": 9.158595641646489, "percentage": 91.59, "elapsed_time": "0:28:07", "remaining_time": "0:02:35", "throughput": 1839.3, "total_tokens": 3103408} |
| {"current_steps": 7570, "total_steps": 8260, "loss": 0.0368, "lr": 1.0583575606539108e-06, "epoch": 9.164648910411623, "percentage": 91.65, "elapsed_time": "0:28:08", "remaining_time": "0:02:33", "throughput": 1839.49, "total_tokens": 3105616} |
| {"current_steps": 7575, "total_steps": 8260, "loss": 0.0041, "lr": 1.0432036943803708e-06, "epoch": 9.170702179176756, "percentage": 91.71, "elapsed_time": "0:28:09", "remaining_time": "0:02:32", "throughput": 1839.59, "total_tokens": 3107536} |
| {"current_steps": 7580, "total_steps": 8260, "loss": 0.0569, "lr": 1.0281567885562947e-06, "epoch": 9.176755447941888, "percentage": 91.77, "elapsed_time": "0:28:10", "remaining_time": "0:02:31", "throughput": 1839.74, "total_tokens": 3109648} |
| {"current_steps": 7585, "total_steps": 8260, "loss": 0.0655, "lr": 1.0132169103619444e-06, "epoch": 9.182808716707022, "percentage": 91.83, "elapsed_time": "0:28:11", "remaining_time": "0:02:30", "throughput": 1839.81, "total_tokens": 3111504} |
| {"current_steps": 7590, "total_steps": 8260, "loss": 0.0087, "lr": 9.98384126499735e-07, "epoch": 9.188861985472155, "percentage": 91.89, "elapsed_time": "0:28:12", "remaining_time": "0:02:29", "throughput": 1839.91, "total_tokens": 3113424} |
| {"current_steps": 7595, "total_steps": 8260, "loss": 0.0428, "lr": 9.836585031939154e-07, "epoch": 9.194915254237289, "percentage": 91.95, "elapsed_time": "0:28:13", "remaining_time": "0:02:28", "throughput": 1840.04, "total_tokens": 3115504} |
| {"current_steps": 7600, "total_steps": 8260, "loss": 0.0439, "lr": 9.690401061903249e-07, "epoch": 9.200968523002421, "percentage": 92.01, "elapsed_time": "0:28:14", "remaining_time": "0:02:27", "throughput": 1840.17, "total_tokens": 3117488} |
| {"current_steps": 7605, "total_steps": 8260, "loss": 0.0204, "lr": 9.545290007560437e-07, "epoch": 9.207021791767554, "percentage": 92.07, "elapsed_time": "0:28:15", "remaining_time": "0:02:25", "throughput": 1840.28, "total_tokens": 3119376} |
| {"current_steps": 7610, "total_steps": 8260, "loss": 0.0055, "lr": 9.401252516791304e-07, "epoch": 9.213075060532688, "percentage": 92.13, "elapsed_time": "0:28:16", "remaining_time": "0:02:24", "throughput": 1840.39, "total_tokens": 3121424} |
| {"current_steps": 7615, "total_steps": 8260, "loss": 0.049, "lr": 9.258289232683321e-07, "epoch": 9.21912832929782, "percentage": 92.19, "elapsed_time": "0:28:17", "remaining_time": "0:02:23", "throughput": 1840.51, "total_tokens": 3123504} |
| {"current_steps": 7620, "total_steps": 8260, "loss": 0.0502, "lr": 9.11640079352788e-07, "epoch": 9.225181598062955, "percentage": 92.25, "elapsed_time": "0:28:18", "remaining_time": "0:02:22", "throughput": 1840.7, "total_tokens": 3125712} |
| {"current_steps": 7625, "total_steps": 8260, "loss": 0.1999, "lr": 8.975587832817545e-07, "epoch": 9.231234866828087, "percentage": 92.31, "elapsed_time": "0:28:19", "remaining_time": "0:02:21", "throughput": 1840.85, "total_tokens": 3127824} |
| {"current_steps": 7630, "total_steps": 8260, "loss": 0.0497, "lr": 8.835850979243055e-07, "epoch": 9.23728813559322, "percentage": 92.37, "elapsed_time": "0:28:20", "remaining_time": "0:02:20", "throughput": 1840.99, "total_tokens": 3129936} |
| {"current_steps": 7635, "total_steps": 8260, "loss": 0.123, "lr": 8.697190856690685e-07, "epoch": 9.243341404358354, "percentage": 92.43, "elapsed_time": "0:28:21", "remaining_time": "0:02:19", "throughput": 1841.12, "total_tokens": 3131984} |
| {"current_steps": 7640, "total_steps": 8260, "loss": 0.0087, "lr": 8.559608084239474e-07, "epoch": 9.249394673123486, "percentage": 92.49, "elapsed_time": "0:28:22", "remaining_time": "0:02:18", "throughput": 1841.25, "total_tokens": 3134064} |
| {"current_steps": 7645, "total_steps": 8260, "loss": 0.0948, "lr": 8.423103276158306e-07, "epoch": 9.25544794188862, "percentage": 92.55, "elapsed_time": "0:28:23", "remaining_time": "0:02:17", "throughput": 1841.39, "total_tokens": 3136176} |
| {"current_steps": 7650, "total_steps": 8260, "loss": 0.0128, "lr": 8.287677041903308e-07, "epoch": 9.261501210653753, "percentage": 92.62, "elapsed_time": "0:28:24", "remaining_time": "0:02:15", "throughput": 1841.53, "total_tokens": 3138288} |
| {"current_steps": 7655, "total_steps": 8260, "loss": 0.0383, "lr": 8.15332998611501e-07, "epoch": 9.267554479418886, "percentage": 92.68, "elapsed_time": "0:28:25", "remaining_time": "0:02:14", "throughput": 1841.63, "total_tokens": 3140272} |
| {"current_steps": 7660, "total_steps": 8260, "loss": 0.0898, "lr": 8.020062708615745e-07, "epoch": 9.27360774818402, "percentage": 92.74, "elapsed_time": "0:28:26", "remaining_time": "0:02:13", "throughput": 1841.8, "total_tokens": 3142448} |
| {"current_steps": 7665, "total_steps": 8260, "loss": 0.1137, "lr": 7.887875804406946e-07, "epoch": 9.279661016949152, "percentage": 92.8, "elapsed_time": "0:28:27", "remaining_time": "0:02:12", "throughput": 1841.92, "total_tokens": 3144528} |
| {"current_steps": 7670, "total_steps": 8260, "loss": 0.1138, "lr": 7.756769863666524e-07, "epoch": 9.285714285714286, "percentage": 92.86, "elapsed_time": "0:28:28", "remaining_time": "0:02:11", "throughput": 1842.05, "total_tokens": 3146512} |
| {"current_steps": 7675, "total_steps": 8260, "loss": 0.0596, "lr": 7.626745471746022e-07, "epoch": 9.291767554479419, "percentage": 92.92, "elapsed_time": "0:28:29", "remaining_time": "0:02:10", "throughput": 1842.16, "total_tokens": 3148560} |
| {"current_steps": 7680, "total_steps": 8260, "loss": 0.0779, "lr": 7.497803209168347e-07, "epoch": 9.297820823244551, "percentage": 92.98, "elapsed_time": "0:28:30", "remaining_time": "0:02:09", "throughput": 1842.28, "total_tokens": 3150640} |
| {"current_steps": 7685, "total_steps": 8260, "loss": 0.0796, "lr": 7.369943651624938e-07, "epoch": 9.303874092009686, "percentage": 93.04, "elapsed_time": "0:28:31", "remaining_time": "0:02:08", "throughput": 1842.42, "total_tokens": 3152688} |
| {"current_steps": 7690, "total_steps": 8260, "loss": 0.0755, "lr": 7.243167369973242e-07, "epoch": 9.309927360774818, "percentage": 93.1, "elapsed_time": "0:28:32", "remaining_time": "0:02:06", "throughput": 1842.52, "total_tokens": 3154672} |
| {"current_steps": 7695, "total_steps": 8260, "loss": 0.1081, "lr": 7.117474930234124e-07, "epoch": 9.315980629539952, "percentage": 93.16, "elapsed_time": "0:28:33", "remaining_time": "0:02:05", "throughput": 1842.62, "total_tokens": 3156656} |
| {"current_steps": 7700, "total_steps": 8260, "loss": 0.0799, "lr": 6.992866893589578e-07, "epoch": 9.322033898305085, "percentage": 93.22, "elapsed_time": "0:28:34", "remaining_time": "0:02:04", "throughput": 1842.75, "total_tokens": 3158640} |
| {"current_steps": 7705, "total_steps": 8260, "loss": 0.002, "lr": 6.869343816379825e-07, "epoch": 9.328087167070217, "percentage": 93.28, "elapsed_time": "0:28:35", "remaining_time": "0:02:03", "throughput": 1842.83, "total_tokens": 3160624} |
| {"current_steps": 7710, "total_steps": 8260, "loss": 0.0063, "lr": 6.74690625010116e-07, "epoch": 9.334140435835351, "percentage": 93.34, "elapsed_time": "0:28:36", "remaining_time": "0:02:02", "throughput": 1842.92, "total_tokens": 3162608} |
| {"current_steps": 7715, "total_steps": 8260, "loss": 0.0226, "lr": 6.625554741403333e-07, "epoch": 9.340193704600484, "percentage": 93.4, "elapsed_time": "0:28:37", "remaining_time": "0:02:01", "throughput": 1843.0, "total_tokens": 3164560} |
| {"current_steps": 7720, "total_steps": 8260, "loss": 0.0958, "lr": 6.505289832087231e-07, "epoch": 9.346246973365618, "percentage": 93.46, "elapsed_time": "0:28:38", "remaining_time": "0:02:00", "throughput": 1843.18, "total_tokens": 3166768} |
| {"current_steps": 7725, "total_steps": 8260, "loss": 0.0182, "lr": 6.386112059102251e-07, "epoch": 9.35230024213075, "percentage": 93.52, "elapsed_time": "0:28:39", "remaining_time": "0:01:59", "throughput": 1843.34, "total_tokens": 3168912} |
| {"current_steps": 7730, "total_steps": 8260, "loss": 0.0027, "lr": 6.268021954544096e-07, "epoch": 9.358353510895883, "percentage": 93.58, "elapsed_time": "0:28:40", "remaining_time": "0:01:57", "throughput": 1843.4, "total_tokens": 3170800} |
| {"current_steps": 7735, "total_steps": 8260, "loss": 0.0916, "lr": 6.15102004565235e-07, "epoch": 9.364406779661017, "percentage": 93.64, "elapsed_time": "0:28:41", "remaining_time": "0:01:56", "throughput": 1843.5, "total_tokens": 3172784} |
| {"current_steps": 7740, "total_steps": 8260, "loss": 0.0088, "lr": 6.035106854808014e-07, "epoch": 9.37046004842615, "percentage": 93.7, "elapsed_time": "0:28:42", "remaining_time": "0:01:55", "throughput": 1843.67, "total_tokens": 3174928} |
| {"current_steps": 7745, "total_steps": 8260, "loss": 0.1109, "lr": 5.920282899531421e-07, "epoch": 9.376513317191284, "percentage": 93.77, "elapsed_time": "0:28:43", "remaining_time": "0:01:54", "throughput": 1843.78, "total_tokens": 3176976} |
| {"current_steps": 7750, "total_steps": 8260, "loss": 0.0677, "lr": 5.806548692479624e-07, "epoch": 9.382566585956416, "percentage": 93.83, "elapsed_time": "0:28:44", "remaining_time": "0:01:53", "throughput": 1843.88, "total_tokens": 3178896} |
| {"current_steps": 7755, "total_steps": 8260, "loss": 0.0382, "lr": 5.693904741444267e-07, "epoch": 9.388619854721549, "percentage": 93.89, "elapsed_time": "0:28:45", "remaining_time": "0:01:52", "throughput": 1843.96, "total_tokens": 3180848} |
| {"current_steps": 7760, "total_steps": 8260, "loss": 0.0462, "lr": 5.58235154934944e-07, "epoch": 9.394673123486683, "percentage": 93.95, "elapsed_time": "0:28:45", "remaining_time": "0:01:51", "throughput": 1844.04, "total_tokens": 3182704} |
| {"current_steps": 7765, "total_steps": 8260, "loss": 0.0021, "lr": 5.471889614249104e-07, "epoch": 9.400726392251816, "percentage": 94.01, "elapsed_time": "0:28:46", "remaining_time": "0:01:50", "throughput": 1844.2, "total_tokens": 3184848} |
| {"current_steps": 7770, "total_steps": 8260, "loss": 0.1045, "lr": 5.362519429325225e-07, "epoch": 9.40677966101695, "percentage": 94.07, "elapsed_time": "0:28:47", "remaining_time": "0:01:48", "throughput": 1844.3, "total_tokens": 3186832} |
| {"current_steps": 7775, "total_steps": 8260, "loss": 0.0212, "lr": 5.254241482885253e-07, "epoch": 9.412832929782082, "percentage": 94.13, "elapsed_time": "0:28:48", "remaining_time": "0:01:47", "throughput": 1844.43, "total_tokens": 3188912} |
| {"current_steps": 7780, "total_steps": 8260, "loss": 0.0295, "lr": 5.147056258360289e-07, "epoch": 9.418886198547215, "percentage": 94.19, "elapsed_time": "0:28:49", "remaining_time": "0:01:46", "throughput": 1844.61, "total_tokens": 3191152} |
| {"current_steps": 7785, "total_steps": 8260, "loss": 0.0107, "lr": 5.040964234302559e-07, "epoch": 9.424939467312349, "percentage": 94.25, "elapsed_time": "0:28:51", "remaining_time": "0:01:45", "throughput": 1844.72, "total_tokens": 3193232} |
| {"current_steps": 7790, "total_steps": 8260, "loss": 0.0102, "lr": 4.935965884383525e-07, "epoch": 9.430992736077481, "percentage": 94.31, "elapsed_time": "0:28:52", "remaining_time": "0:01:44", "throughput": 1844.83, "total_tokens": 3195312} |
| {"current_steps": 7795, "total_steps": 8260, "loss": 0.0055, "lr": 4.832061677391697e-07, "epoch": 9.437046004842616, "percentage": 94.37, "elapsed_time": "0:28:53", "remaining_time": "0:01:43", "throughput": 1844.94, "total_tokens": 3197328} |
| {"current_steps": 7800, "total_steps": 8260, "loss": 0.0425, "lr": 4.729252077230517e-07, "epoch": 9.443099273607748, "percentage": 94.43, "elapsed_time": "0:28:54", "remaining_time": "0:01:42", "throughput": 1844.99, "total_tokens": 3199280} |
| {"current_steps": 7805, "total_steps": 8260, "loss": 0.002, "lr": 4.6275375429163656e-07, "epoch": 9.44915254237288, "percentage": 94.49, "elapsed_time": "0:28:55", "remaining_time": "0:01:41", "throughput": 1845.13, "total_tokens": 3201328} |
| {"current_steps": 7810, "total_steps": 8260, "loss": 0.007, "lr": 4.526918528576396e-07, "epoch": 9.455205811138015, "percentage": 94.55, "elapsed_time": "0:28:55", "remaining_time": "0:01:40", "throughput": 1845.27, "total_tokens": 3203344} |
| {"current_steps": 7815, "total_steps": 8260, "loss": 0.1337, "lr": 4.427395483446617e-07, "epoch": 9.461259079903147, "percentage": 94.61, "elapsed_time": "0:28:57", "remaining_time": "0:01:38", "throughput": 1845.41, "total_tokens": 3205488} |
| {"current_steps": 7820, "total_steps": 8260, "loss": 0.1452, "lr": 4.328968851869758e-07, "epoch": 9.467312348668282, "percentage": 94.67, "elapsed_time": "0:28:57", "remaining_time": "0:01:37", "throughput": 1845.52, "total_tokens": 3207504} |
| {"current_steps": 7825, "total_steps": 8260, "loss": 0.0071, "lr": 4.231639073293492e-07, "epoch": 9.473365617433414, "percentage": 94.73, "elapsed_time": "0:28:59", "remaining_time": "0:01:36", "throughput": 1845.7, "total_tokens": 3209712} |
| {"current_steps": 7830, "total_steps": 8260, "loss": 0.0344, "lr": 4.13540658226827e-07, "epoch": 9.479418886198546, "percentage": 94.79, "elapsed_time": "0:29:00", "remaining_time": "0:01:35", "throughput": 1845.81, "total_tokens": 3211728} |
| {"current_steps": 7835, "total_steps": 8260, "loss": 0.0966, "lr": 4.040271808445406e-07, "epoch": 9.48547215496368, "percentage": 94.85, "elapsed_time": "0:29:00", "remaining_time": "0:01:34", "throughput": 1845.89, "total_tokens": 3213616} |
| {"current_steps": 7840, "total_steps": 8260, "loss": 0.002, "lr": 3.94623517657533e-07, "epoch": 9.491525423728813, "percentage": 94.92, "elapsed_time": "0:29:01", "remaining_time": "0:01:33", "throughput": 1845.98, "total_tokens": 3215536} |
| {"current_steps": 7845, "total_steps": 8260, "loss": 0.0957, "lr": 3.8532971065055045e-07, "epoch": 9.497578692493947, "percentage": 94.98, "elapsed_time": "0:29:02", "remaining_time": "0:01:32", "throughput": 1846.09, "total_tokens": 3217552} |
| {"current_steps": 7847, "total_steps": 8260, "eval_loss": 0.1515314131975174, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:29:11", "remaining_time": "0:01:32", "throughput": 1837.56, "total_tokens": 3218352} |
| {"current_steps": 7850, "total_steps": 8260, "loss": 0.0554, "lr": 3.761458013178648e-07, "epoch": 9.50363196125908, "percentage": 95.04, "elapsed_time": "0:29:13", "remaining_time": "0:01:31", "throughput": 1836.14, "total_tokens": 3219664} |
| {"current_steps": 7855, "total_steps": 8260, "loss": 0.0115, "lr": 3.670718306630766e-07, "epoch": 9.509685230024212, "percentage": 95.1, "elapsed_time": "0:29:14", "remaining_time": "0:01:30", "throughput": 1836.24, "total_tokens": 3221648} |
| {"current_steps": 7860, "total_steps": 8260, "loss": 0.0074, "lr": 3.5810783919895673e-07, "epoch": 9.515738498789347, "percentage": 95.16, "elapsed_time": "0:29:15", "remaining_time": "0:01:29", "throughput": 1836.32, "total_tokens": 3223632} |
| {"current_steps": 7865, "total_steps": 8260, "loss": 0.0032, "lr": 3.4925386694723284e-07, "epoch": 9.521791767554479, "percentage": 95.22, "elapsed_time": "0:29:16", "remaining_time": "0:01:28", "throughput": 1836.41, "total_tokens": 3225616} |
| {"current_steps": 7870, "total_steps": 8260, "loss": 0.0407, "lr": 3.405099534384393e-07, "epoch": 9.527845036319613, "percentage": 95.28, "elapsed_time": "0:29:17", "remaining_time": "0:01:27", "throughput": 1836.58, "total_tokens": 3227728} |
| {"current_steps": 7875, "total_steps": 8260, "loss": 0.0764, "lr": 3.31876137711723e-07, "epoch": 9.533898305084746, "percentage": 95.34, "elapsed_time": "0:29:18", "remaining_time": "0:01:25", "throughput": 1836.69, "total_tokens": 3229744} |
| {"current_steps": 7880, "total_steps": 8260, "loss": 0.1019, "lr": 3.233524583146741e-07, "epoch": 9.539951573849878, "percentage": 95.4, "elapsed_time": "0:29:19", "remaining_time": "0:01:24", "throughput": 1836.78, "total_tokens": 3231664} |
| {"current_steps": 7885, "total_steps": 8260, "loss": 0.0871, "lr": 3.149389533031566e-07, "epoch": 9.546004842615012, "percentage": 95.46, "elapsed_time": "0:29:20", "remaining_time": "0:01:23", "throughput": 1836.89, "total_tokens": 3233712} |
| {"current_steps": 7890, "total_steps": 8260, "loss": 0.0593, "lr": 3.066356602411419e-07, "epoch": 9.552058111380145, "percentage": 95.52, "elapsed_time": "0:29:21", "remaining_time": "0:01:22", "throughput": 1836.97, "total_tokens": 3235728} |
| {"current_steps": 7895, "total_steps": 8260, "loss": 0.1185, "lr": 2.984426162005227e-07, "epoch": 9.558111380145279, "percentage": 95.58, "elapsed_time": "0:29:22", "remaining_time": "0:01:21", "throughput": 1837.07, "total_tokens": 3237712} |
| {"current_steps": 7900, "total_steps": 8260, "loss": 0.15, "lr": 2.903598577609717e-07, "epoch": 9.564164648910412, "percentage": 95.64, "elapsed_time": "0:29:23", "remaining_time": "0:01:20", "throughput": 1837.16, "total_tokens": 3239664} |
| {"current_steps": 7905, "total_steps": 8260, "loss": 0.0158, "lr": 2.823874210097638e-07, "epoch": 9.570217917675544, "percentage": 95.7, "elapsed_time": "0:29:24", "remaining_time": "0:01:19", "throughput": 1837.29, "total_tokens": 3241776} |
| {"current_steps": 7910, "total_steps": 8260, "loss": 0.0042, "lr": 2.745253415416177e-07, "epoch": 9.576271186440678, "percentage": 95.76, "elapsed_time": "0:29:25", "remaining_time": "0:01:18", "throughput": 1837.4, "total_tokens": 3243824} |
| {"current_steps": 7915, "total_steps": 8260, "loss": 0.1994, "lr": 2.6677365445852976e-07, "epoch": 9.58232445520581, "percentage": 95.82, "elapsed_time": "0:29:26", "remaining_time": "0:01:16", "throughput": 1837.57, "total_tokens": 3246000} |
| {"current_steps": 7920, "total_steps": 8260, "loss": 0.0448, "lr": 2.5913239436964054e-07, "epoch": 9.588377723970945, "percentage": 95.88, "elapsed_time": "0:29:27", "remaining_time": "0:01:15", "throughput": 1837.71, "total_tokens": 3248112} |
| {"current_steps": 7925, "total_steps": 8260, "loss": 0.0057, "lr": 2.5160159539105443e-07, "epoch": 9.594430992736077, "percentage": 95.94, "elapsed_time": "0:29:28", "remaining_time": "0:01:14", "throughput": 1837.86, "total_tokens": 3250256} |
| {"current_steps": 7930, "total_steps": 8260, "loss": 0.005, "lr": 2.441812911456981e-07, "epoch": 9.600484261501212, "percentage": 96.0, "elapsed_time": "0:29:29", "remaining_time": "0:01:13", "throughput": 1837.99, "total_tokens": 3252368} |
| {"current_steps": 7935, "total_steps": 8260, "loss": 0.0047, "lr": 2.3687151476317337e-07, "epoch": 9.606537530266344, "percentage": 96.07, "elapsed_time": "0:29:30", "remaining_time": "0:01:12", "throughput": 1838.12, "total_tokens": 3254416} |
| {"current_steps": 7940, "total_steps": 8260, "loss": 0.0074, "lr": 2.2967229887960186e-07, "epoch": 9.612590799031477, "percentage": 96.13, "elapsed_time": "0:29:31", "remaining_time": "0:01:11", "throughput": 1838.24, "total_tokens": 3256496} |
| {"current_steps": 7945, "total_steps": 8260, "loss": 0.0248, "lr": 2.2258367563748884e-07, "epoch": 9.61864406779661, "percentage": 96.19, "elapsed_time": "0:29:32", "remaining_time": "0:01:10", "throughput": 1838.35, "total_tokens": 3258576} |
| {"current_steps": 7950, "total_steps": 8260, "loss": 0.0689, "lr": 2.1560567668556797e-07, "epoch": 9.624697336561743, "percentage": 96.25, "elapsed_time": "0:29:33", "remaining_time": "0:01:09", "throughput": 1838.44, "total_tokens": 3260496} |
| {"current_steps": 7955, "total_steps": 8260, "loss": 0.0873, "lr": 2.0873833317866798e-07, "epoch": 9.630750605326877, "percentage": 96.31, "elapsed_time": "0:29:34", "remaining_time": "0:01:08", "throughput": 1838.59, "total_tokens": 3262608} |
| {"current_steps": 7960, "total_steps": 8260, "loss": 0.0035, "lr": 2.019816757775711e-07, "epoch": 9.63680387409201, "percentage": 96.37, "elapsed_time": "0:29:35", "remaining_time": "0:01:06", "throughput": 1838.69, "total_tokens": 3264592} |
| {"current_steps": 7965, "total_steps": 8260, "loss": 0.011, "lr": 1.9533573464888543e-07, "epoch": 9.642857142857142, "percentage": 96.43, "elapsed_time": "0:29:36", "remaining_time": "0:01:05", "throughput": 1838.83, "total_tokens": 3266704} |
| {"current_steps": 7970, "total_steps": 8260, "loss": 0.0855, "lr": 1.8880053946488675e-07, "epoch": 9.648910411622277, "percentage": 96.49, "elapsed_time": "0:29:37", "remaining_time": "0:01:04", "throughput": 1838.97, "total_tokens": 3268816} |
| {"current_steps": 7975, "total_steps": 8260, "loss": 0.1444, "lr": 1.8237611940341291e-07, "epoch": 9.654963680387409, "percentage": 96.55, "elapsed_time": "0:29:38", "remaining_time": "0:01:03", "throughput": 1839.07, "total_tokens": 3270864} |
| {"current_steps": 7980, "total_steps": 8260, "loss": 0.122, "lr": 1.760625031477142e-07, "epoch": 9.661016949152543, "percentage": 96.61, "elapsed_time": "0:29:39", "remaining_time": "0:01:02", "throughput": 1839.2, "total_tokens": 3272944} |
| {"current_steps": 7985, "total_steps": 8260, "loss": 0.0032, "lr": 1.6985971888633935e-07, "epoch": 9.667070217917676, "percentage": 96.67, "elapsed_time": "0:29:40", "remaining_time": "0:01:01", "throughput": 1839.3, "total_tokens": 3274992} |
| {"current_steps": 7990, "total_steps": 8260, "loss": 0.1599, "lr": 1.637677943129967e-07, "epoch": 9.673123486682808, "percentage": 96.73, "elapsed_time": "0:29:41", "remaining_time": "0:01:00", "throughput": 1839.4, "total_tokens": 3277008} |
| {"current_steps": 7995, "total_steps": 8260, "loss": 0.054, "lr": 1.5778675662643793e-07, "epoch": 9.679176755447942, "percentage": 96.79, "elapsed_time": "0:29:42", "remaining_time": "0:00:59", "throughput": 1839.49, "total_tokens": 3278928} |
| {"current_steps": 8000, "total_steps": 8260, "loss": 0.0144, "lr": 1.5191663253034116e-07, "epoch": 9.685230024213075, "percentage": 96.85, "elapsed_time": "0:29:43", "remaining_time": "0:00:57", "throughput": 1839.61, "total_tokens": 3280944} |
| {"current_steps": 8005, "total_steps": 8260, "loss": 0.029, "lr": 1.461574482331779e-07, "epoch": 9.69128329297821, "percentage": 96.91, "elapsed_time": "0:29:44", "remaining_time": "0:00:56", "throughput": 1839.73, "total_tokens": 3282960} |
| {"current_steps": 8010, "total_steps": 8260, "loss": 0.0255, "lr": 1.4050922944811305e-07, "epoch": 9.697336561743342, "percentage": 96.97, "elapsed_time": "0:29:45", "remaining_time": "0:00:55", "throughput": 1839.86, "total_tokens": 3285008} |
| {"current_steps": 8015, "total_steps": 8260, "loss": 0.0339, "lr": 1.349720013928718e-07, "epoch": 9.703389830508474, "percentage": 97.03, "elapsed_time": "0:29:46", "remaining_time": "0:00:54", "throughput": 1839.99, "total_tokens": 3287088} |
| {"current_steps": 8020, "total_steps": 8260, "loss": 0.1811, "lr": 1.2954578878964507e-07, "epoch": 9.709443099273608, "percentage": 97.09, "elapsed_time": "0:29:47", "remaining_time": "0:00:53", "throughput": 1840.11, "total_tokens": 3289168} |
| {"current_steps": 8025, "total_steps": 8260, "loss": 0.1474, "lr": 1.2423061586496477e-07, "epoch": 9.71549636803874, "percentage": 97.15, "elapsed_time": "0:29:48", "remaining_time": "0:00:52", "throughput": 1840.28, "total_tokens": 3291376} |
| {"current_steps": 8030, "total_steps": 8260, "loss": 0.1167, "lr": 1.1902650634960378e-07, "epoch": 9.721549636803875, "percentage": 97.22, "elapsed_time": "0:29:49", "remaining_time": "0:00:51", "throughput": 1840.35, "total_tokens": 3293360} |
| {"current_steps": 8035, "total_steps": 8260, "loss": 0.0465, "lr": 1.1393348347846777e-07, "epoch": 9.727602905569007, "percentage": 97.28, "elapsed_time": "0:29:50", "remaining_time": "0:00:50", "throughput": 1840.47, "total_tokens": 3295344} |
| {"current_steps": 8040, "total_steps": 8260, "loss": 0.0017, "lr": 1.0895156999048972e-07, "epoch": 9.73365617433414, "percentage": 97.34, "elapsed_time": "0:29:51", "remaining_time": "0:00:49", "throughput": 1840.58, "total_tokens": 3297392} |
| {"current_steps": 8045, "total_steps": 8260, "loss": 0.0726, "lr": 1.0408078812853273e-07, "epoch": 9.739709443099274, "percentage": 97.4, "elapsed_time": "0:29:52", "remaining_time": "0:00:47", "throughput": 1840.68, "total_tokens": 3299376} |
| {"current_steps": 8050, "total_steps": 8260, "loss": 0.0603, "lr": 9.932115963928734e-08, "epoch": 9.745762711864407, "percentage": 97.46, "elapsed_time": "0:29:53", "remaining_time": "0:00:46", "throughput": 1840.75, "total_tokens": 3301360} |
| {"current_steps": 8055, "total_steps": 8260, "loss": 0.005, "lr": 9.467270577317167e-08, "epoch": 9.75181598062954, "percentage": 97.52, "elapsed_time": "0:29:54", "remaining_time": "0:00:45", "throughput": 1840.86, "total_tokens": 3303440} |
| {"current_steps": 8060, "total_steps": 8260, "loss": 0.0379, "lr": 9.013544728424528e-08, "epoch": 9.757869249394673, "percentage": 97.58, "elapsed_time": "0:29:55", "remaining_time": "0:00:44", "throughput": 1841.0, "total_tokens": 3305552} |
| {"current_steps": 8065, "total_steps": 8260, "loss": 0.0108, "lr": 8.570940443010655e-08, "epoch": 9.763922518159806, "percentage": 97.64, "elapsed_time": "0:29:56", "remaining_time": "0:00:43", "throughput": 1841.16, "total_tokens": 3307728} |
| {"current_steps": 8070, "total_steps": 8260, "loss": 0.0115, "lr": 8.139459697181218e-08, "epoch": 9.76997578692494, "percentage": 97.7, "elapsed_time": "0:29:57", "remaining_time": "0:00:42", "throughput": 1841.27, "total_tokens": 3309776} |
| {"current_steps": 8075, "total_steps": 8260, "loss": 0.1422, "lr": 7.719104417377443e-08, "epoch": 9.776029055690072, "percentage": 97.76, "elapsed_time": "0:29:58", "remaining_time": "0:00:41", "throughput": 1841.34, "total_tokens": 3311760} |
| {"current_steps": 8080, "total_steps": 8260, "loss": 0.061, "lr": 7.30987648036946e-08, "epoch": 9.782082324455207, "percentage": 97.82, "elapsed_time": "0:29:59", "remaining_time": "0:00:40", "throughput": 1841.44, "total_tokens": 3313808} |
| {"current_steps": 8085, "total_steps": 8260, "loss": 0.012, "lr": 6.911777713246581e-08, "epoch": 9.788135593220339, "percentage": 97.88, "elapsed_time": "0:30:00", "remaining_time": "0:00:38", "throughput": 1841.55, "total_tokens": 3315888} |
| {"current_steps": 8090, "total_steps": 8260, "loss": 0.002, "lr": 6.524809893409256e-08, "epoch": 9.794188861985472, "percentage": 97.94, "elapsed_time": "0:30:01", "remaining_time": "0:00:37", "throughput": 1841.68, "total_tokens": 3318000} |
| {"current_steps": 8095, "total_steps": 8260, "loss": 0.0107, "lr": 6.148974748561299e-08, "epoch": 9.800242130750606, "percentage": 98.0, "elapsed_time": "0:30:02", "remaining_time": "0:00:36", "throughput": 1841.79, "total_tokens": 3320016} |
| {"current_steps": 8100, "total_steps": 8260, "loss": 0.0394, "lr": 5.784273956702391e-08, "epoch": 9.806295399515738, "percentage": 98.06, "elapsed_time": "0:30:03", "remaining_time": "0:00:35", "throughput": 1841.91, "total_tokens": 3322096} |
| {"current_steps": 8105, "total_steps": 8260, "loss": 0.0591, "lr": 5.4307091461205936e-08, "epoch": 9.812348668280872, "percentage": 98.12, "elapsed_time": "0:30:04", "remaining_time": "0:00:34", "throughput": 1842.03, "total_tokens": 3324176} |
| {"current_steps": 8110, "total_steps": 8260, "loss": 0.1512, "lr": 5.08828189538485e-08, "epoch": 9.818401937046005, "percentage": 98.18, "elapsed_time": "0:30:05", "remaining_time": "0:00:33", "throughput": 1842.18, "total_tokens": 3326320} |
| {"current_steps": 8115, "total_steps": 8260, "loss": 0.0388, "lr": 4.7569937333372115e-08, "epoch": 9.824455205811137, "percentage": 98.24, "elapsed_time": "0:30:06", "remaining_time": "0:00:32", "throughput": 1842.33, "total_tokens": 3328464} |
| {"current_steps": 8120, "total_steps": 8260, "loss": 0.0079, "lr": 4.436846139087847e-08, "epoch": 9.830508474576272, "percentage": 98.31, "elapsed_time": "0:30:07", "remaining_time": "0:00:31", "throughput": 1842.42, "total_tokens": 3330480} |
| {"current_steps": 8125, "total_steps": 8260, "loss": 0.0372, "lr": 4.127840542006711e-08, "epoch": 9.836561743341404, "percentage": 98.37, "elapsed_time": "0:30:08", "remaining_time": "0:00:30", "throughput": 1842.56, "total_tokens": 3332624} |
| {"current_steps": 8130, "total_steps": 8260, "loss": 0.0195, "lr": 3.829978321718553e-08, "epoch": 9.842615012106538, "percentage": 98.43, "elapsed_time": "0:30:09", "remaining_time": "0:00:28", "throughput": 1842.7, "total_tokens": 3334768} |
| {"current_steps": 8135, "total_steps": 8260, "loss": 0.0041, "lr": 3.543260808095139e-08, "epoch": 9.84866828087167, "percentage": 98.49, "elapsed_time": "0:30:10", "remaining_time": "0:00:27", "throughput": 1842.81, "total_tokens": 3336784} |
| {"current_steps": 8140, "total_steps": 8260, "loss": 0.0048, "lr": 3.267689281250541e-08, "epoch": 9.854721549636803, "percentage": 98.55, "elapsed_time": "0:30:11", "remaining_time": "0:00:26", "throughput": 1842.94, "total_tokens": 3338832} |
| {"current_steps": 8145, "total_steps": 8260, "loss": 0.1548, "lr": 3.003264971535857e-08, "epoch": 9.860774818401937, "percentage": 98.61, "elapsed_time": "0:30:12", "remaining_time": "0:00:25", "throughput": 1843.02, "total_tokens": 3340848} |
| {"current_steps": 8150, "total_steps": 8260, "loss": 0.0191, "lr": 2.7499890595314438e-08, "epoch": 9.86682808716707, "percentage": 98.67, "elapsed_time": "0:30:13", "remaining_time": "0:00:24", "throughput": 1843.15, "total_tokens": 3342960} |
| {"current_steps": 8155, "total_steps": 8260, "loss": 0.0906, "lr": 2.507862676044137e-08, "epoch": 9.872881355932204, "percentage": 98.73, "elapsed_time": "0:30:14", "remaining_time": "0:00:23", "throughput": 1843.3, "total_tokens": 3345104} |
| {"current_steps": 8160, "total_steps": 8260, "loss": 0.1081, "lr": 2.2768869021014274e-08, "epoch": 9.878934624697337, "percentage": 98.79, "elapsed_time": "0:30:15", "remaining_time": "0:00:22", "throughput": 1843.39, "total_tokens": 3347024} |
| {"current_steps": 8165, "total_steps": 8260, "loss": 0.0573, "lr": 2.0570627689459054e-08, "epoch": 9.884987893462469, "percentage": 98.85, "elapsed_time": "0:30:16", "remaining_time": "0:00:21", "throughput": 1843.57, "total_tokens": 3349200} |
| {"current_steps": 8170, "total_steps": 8260, "loss": 0.0249, "lr": 1.848391258031379e-08, "epoch": 9.891041162227603, "percentage": 98.91, "elapsed_time": "0:30:17", "remaining_time": "0:00:20", "throughput": 1843.67, "total_tokens": 3351248} |
| {"current_steps": 8175, "total_steps": 8260, "loss": 0.0119, "lr": 1.6508733010184297e-08, "epoch": 9.897094430992736, "percentage": 98.97, "elapsed_time": "0:30:18", "remaining_time": "0:00:18", "throughput": 1843.87, "total_tokens": 3353488} |
| {"current_steps": 8180, "total_steps": 8260, "loss": 0.0038, "lr": 1.4645097797694186e-08, "epoch": 9.90314769975787, "percentage": 99.03, "elapsed_time": "0:30:19", "remaining_time": "0:00:17", "throughput": 1843.92, "total_tokens": 3355440} |
| {"current_steps": 8185, "total_steps": 8260, "loss": 0.1112, "lr": 1.2893015263459874e-08, "epoch": 9.909200968523002, "percentage": 99.09, "elapsed_time": "0:30:20", "remaining_time": "0:00:16", "throughput": 1843.99, "total_tokens": 3357296} |
| {"current_steps": 8190, "total_steps": 8260, "loss": 0.1755, "lr": 1.125249323004618e-08, "epoch": 9.915254237288135, "percentage": 99.15, "elapsed_time": "0:30:21", "remaining_time": "0:00:15", "throughput": 1844.06, "total_tokens": 3359280} |
| {"current_steps": 8195, "total_steps": 8260, "loss": 0.2839, "lr": 9.723539021927463e-09, "epoch": 9.92130750605327, "percentage": 99.21, "elapsed_time": "0:30:22", "remaining_time": "0:00:14", "throughput": 1844.18, "total_tokens": 3361328} |
| {"current_steps": 8200, "total_steps": 8260, "loss": 0.0355, "lr": 8.306159465459872e-09, "epoch": 9.927360774818402, "percentage": 99.27, "elapsed_time": "0:30:23", "remaining_time": "0:00:13", "throughput": 1844.27, "total_tokens": 3363344} |
| {"current_steps": 8205, "total_steps": 8260, "loss": 0.0499, "lr": 7.00036088885081e-09, "epoch": 9.933414043583536, "percentage": 99.33, "elapsed_time": "0:30:24", "remaining_time": "0:00:12", "throughput": 1844.35, "total_tokens": 3365296} |
| {"current_steps": 8210, "total_steps": 8260, "loss": 0.0087, "lr": 5.806149122128401e-09, "epoch": 9.939467312348668, "percentage": 99.39, "elapsed_time": "0:30:25", "remaining_time": "0:00:11", "throughput": 1844.53, "total_tokens": 3367504} |
| {"current_steps": 8215, "total_steps": 8260, "loss": 0.0807, "lr": 4.723529497113743e-09, "epoch": 9.9455205811138, "percentage": 99.46, "elapsed_time": "0:30:26", "remaining_time": "0:00:10", "throughput": 1844.65, "total_tokens": 3369616} |
| {"current_steps": 8220, "total_steps": 8260, "loss": 0.0721, "lr": 3.752506847407023e-09, "epoch": 9.951573849878935, "percentage": 99.52, "elapsed_time": "0:30:27", "remaining_time": "0:00:08", "throughput": 1844.78, "total_tokens": 3371728} |
| {"current_steps": 8225, "total_steps": 8260, "loss": 0.0848, "lr": 2.8930855083542096e-09, "epoch": 9.957627118644067, "percentage": 99.58, "elapsed_time": "0:30:28", "remaining_time": "0:00:07", "throughput": 1844.87, "total_tokens": 3373648} |
| {"current_steps": 8230, "total_steps": 8260, "loss": 0.1028, "lr": 2.145269317033183e-09, "epoch": 9.963680387409202, "percentage": 99.64, "elapsed_time": "0:30:29", "remaining_time": "0:00:06", "throughput": 1844.98, "total_tokens": 3375664} |
| {"current_steps": 8235, "total_steps": 8260, "loss": 0.0092, "lr": 1.509061612234297e-09, "epoch": 9.969733656174334, "percentage": 99.7, "elapsed_time": "0:30:30", "remaining_time": "0:00:05", "throughput": 1845.12, "total_tokens": 3377808} |
| {"current_steps": 8240, "total_steps": 8260, "loss": 0.0089, "lr": 9.844652344492832e-10, "epoch": 9.975786924939467, "percentage": 99.76, "elapsed_time": "0:30:31", "remaining_time": "0:00:04", "throughput": 1845.23, "total_tokens": 3379888} |
| {"current_steps": 8245, "total_steps": 8260, "loss": 0.0831, "lr": 5.714825258545942e-10, "epoch": 9.9818401937046, "percentage": 99.82, "elapsed_time": "0:30:32", "remaining_time": "0:00:03", "throughput": 1845.39, "total_tokens": 3382064} |
| {"current_steps": 8250, "total_steps": 8260, "loss": 0.0461, "lr": 2.7011533030585347e-10, "epoch": 9.987893462469733, "percentage": 99.88, "elapsed_time": "0:30:33", "remaining_time": "0:00:02", "throughput": 1845.53, "total_tokens": 3384144} |
| {"current_steps": 8255, "total_steps": 8260, "loss": 0.1077, "lr": 8.036499332397807e-11, "epoch": 9.993946731234868, "percentage": 99.94, "elapsed_time": "0:30:34", "remaining_time": "0:00:01", "throughput": 1845.62, "total_tokens": 3386160} |
| {"current_steps": 8260, "total_steps": 8260, "loss": 0.0692, "lr": 2.2323620896269604e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:30:35", "remaining_time": "0:00:00", "throughput": 1845.64, "total_tokens": 3388032} |
| {"current_steps": 8260, "total_steps": 8260, "eval_loss": 0.15202637016773224, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:30:43", "remaining_time": "0:00:00", "throughput": 1837.53, "total_tokens": 3388032} |
| {"current_steps": 8260, "total_steps": 8260, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:30:45", "remaining_time": "0:00:00", "throughput": 1836.09, "total_tokens": 3388032} |
|
|