| {"current_steps": 10, "total_steps": 3096, "loss": 0.8519, "lr": 2.9999227754514262e-05, "epoch": 0.009685230024213076, "percentage": 0.32, "elapsed_time": "0:00:18", "remaining_time": "1:32:49", "throughput": 1416.65, "total_tokens": 25568} | |
| {"current_steps": 20, "total_steps": 3096, "loss": 0.189, "lr": 2.9996911097572118e-05, "epoch": 0.01937046004842615, "percentage": 0.65, "elapsed_time": "0:00:26", "remaining_time": "1:08:38", "throughput": 1907.44, "total_tokens": 51072} | |
| {"current_steps": 30, "total_steps": 3096, "loss": 0.1648, "lr": 2.9993050267710624e-05, "epoch": 0.029055690072639227, "percentage": 0.97, "elapsed_time": "0:00:35", "remaining_time": "1:00:32", "throughput": 2150.07, "total_tokens": 76416} | |
| {"current_steps": 40, "total_steps": 3096, "loss": 0.1905, "lr": 2.9987645662464235e-05, "epoch": 0.0387409200968523, "percentage": 1.29, "elapsed_time": "0:00:44", "remaining_time": "0:56:21", "throughput": 2289.58, "total_tokens": 101344} | |
| {"current_steps": 50, "total_steps": 3096, "loss": 0.1794, "lr": 2.9980697838323884e-05, "epoch": 0.048426150121065374, "percentage": 1.61, "elapsed_time": "0:00:53", "remaining_time": "0:53:53", "throughput": 2385.88, "total_tokens": 126656} | |
| {"current_steps": 60, "total_steps": 3096, "loss": 0.1528, "lr": 2.9972207510679677e-05, "epoch": 0.05811138014527845, "percentage": 1.94, "elapsed_time": "0:01:02", "remaining_time": "0:52:28", "throughput": 2430.28, "total_tokens": 151200} | |
| {"current_steps": 70, "total_steps": 3096, "loss": 0.1742, "lr": 2.996217555374725e-05, "epoch": 0.06779661016949153, "percentage": 2.26, "elapsed_time": "0:01:10", "remaining_time": "0:51:08", "throughput": 2479.2, "total_tokens": 175968} | |
| {"current_steps": 80, "total_steps": 3096, "loss": 0.1565, "lr": 2.9950603000477722e-05, "epoch": 0.0774818401937046, "percentage": 2.58, "elapsed_time": "0:01:19", "remaining_time": "0:50:11", "throughput": 2519.74, "total_tokens": 201280} | |
| {"current_steps": 90, "total_steps": 3096, "loss": 0.1499, "lr": 2.993749104245137e-05, "epoch": 0.08716707021791767, "percentage": 2.91, "elapsed_time": "0:01:28", "remaining_time": "0:49:22", "throughput": 2553.02, "total_tokens": 226432} | |
| {"current_steps": 100, "total_steps": 3096, "loss": 0.1441, "lr": 2.992284102975491e-05, "epoch": 0.09685230024213075, "percentage": 3.23, "elapsed_time": "0:01:37", "remaining_time": "0:48:46", "throughput": 2577.35, "total_tokens": 251744} | |
| {"current_steps": 110, "total_steps": 3096, "loss": 0.1245, "lr": 2.9906654470842492e-05, "epoch": 0.10653753026634383, "percentage": 3.55, "elapsed_time": "0:01:46", "remaining_time": "0:48:11", "throughput": 2596.04, "total_tokens": 276480} | |
| {"current_steps": 120, "total_steps": 3096, "loss": 0.1333, "lr": 2.9888933032380397e-05, "epoch": 0.1162227602905569, "percentage": 3.88, "elapsed_time": "0:01:55", "remaining_time": "0:47:44", "throughput": 2611.86, "total_tokens": 301664} | |
| {"current_steps": 130, "total_steps": 3096, "loss": 0.1728, "lr": 2.9869678539075403e-05, "epoch": 0.12590799031476999, "percentage": 4.2, "elapsed_time": "0:02:04", "remaining_time": "0:47:19", "throughput": 2626.12, "total_tokens": 326784} | |
| {"current_steps": 140, "total_steps": 3096, "loss": 0.1281, "lr": 2.9848892973486912e-05, "epoch": 0.13559322033898305, "percentage": 4.52, "elapsed_time": "0:02:13", "remaining_time": "0:46:54", "throughput": 2635.19, "total_tokens": 351328} | |
| {"current_steps": 150, "total_steps": 3096, "loss": 0.1136, "lr": 2.9826578475822825e-05, "epoch": 0.14527845036319612, "percentage": 4.84, "elapsed_time": "0:02:22", "remaining_time": "0:46:35", "throughput": 2641.76, "total_tokens": 376000} | |
| {"current_steps": 160, "total_steps": 3096, "loss": 0.1277, "lr": 2.980273734371914e-05, "epoch": 0.1549636803874092, "percentage": 5.17, "elapsed_time": "0:02:31", "remaining_time": "0:46:15", "throughput": 2646.81, "total_tokens": 400384} | |
| {"current_steps": 170, "total_steps": 3096, "loss": 0.1028, "lr": 2.9777372032003423e-05, "epoch": 0.16464891041162227, "percentage": 5.49, "elapsed_time": "0:02:40", "remaining_time": "0:46:04", "throughput": 2654.85, "total_tokens": 426432} | |
| {"current_steps": 180, "total_steps": 3096, "loss": 0.1245, "lr": 2.975048515244199e-05, "epoch": 0.17433414043583534, "percentage": 5.81, "elapsed_time": "0:02:49", "remaining_time": "0:45:47", "throughput": 2663.01, "total_tokens": 451712} | |
| {"current_steps": 190, "total_steps": 3096, "loss": 0.142, "lr": 2.9722079473471035e-05, "epoch": 0.18401937046004843, "percentage": 6.14, "elapsed_time": "0:02:58", "remaining_time": "0:45:32", "throughput": 2669.52, "total_tokens": 476960} | |
| {"current_steps": 200, "total_steps": 3096, "loss": 0.1342, "lr": 2.9692157919911536e-05, "epoch": 0.1937046004842615, "percentage": 6.46, "elapsed_time": "0:03:07", "remaining_time": "0:45:14", "throughput": 2675.06, "total_tokens": 501440} | |
| {"current_steps": 210, "total_steps": 3096, "loss": 0.1314, "lr": 2.966072357266811e-05, "epoch": 0.2033898305084746, "percentage": 6.78, "elapsed_time": "0:03:16", "remaining_time": "0:44:58", "throughput": 2682.22, "total_tokens": 526656} | |
| {"current_steps": 220, "total_steps": 3096, "loss": 0.171, "lr": 2.9627779668411795e-05, "epoch": 0.21307506053268765, "percentage": 7.11, "elapsed_time": "0:03:25", "remaining_time": "0:44:45", "throughput": 2689.42, "total_tokens": 552544} | |
| {"current_steps": 230, "total_steps": 3096, "loss": 0.115, "lr": 2.9593329599246766e-05, "epoch": 0.22276029055690072, "percentage": 7.43, "elapsed_time": "0:03:34", "remaining_time": "0:44:30", "throughput": 2694.31, "total_tokens": 577472} | |
| {"current_steps": 240, "total_steps": 3096, "loss": 0.1158, "lr": 2.955737691236108e-05, "epoch": 0.2324455205811138, "percentage": 7.75, "elapsed_time": "0:03:43", "remaining_time": "0:44:14", "throughput": 2698.0, "total_tokens": 601856} | |
| {"current_steps": 250, "total_steps": 3096, "loss": 0.111, "lr": 2.9519925309661422e-05, "epoch": 0.24213075060532688, "percentage": 8.07, "elapsed_time": "0:03:52", "remaining_time": "0:44:04", "throughput": 2703.38, "total_tokens": 627904} | |
| {"current_steps": 260, "total_steps": 3096, "loss": 0.1314, "lr": 2.948097864739194e-05, "epoch": 0.25181598062953997, "percentage": 8.4, "elapsed_time": "0:04:01", "remaining_time": "0:43:49", "throughput": 2704.86, "total_tokens": 651936} | |
| {"current_steps": 270, "total_steps": 3096, "loss": 0.143, "lr": 2.944054093573719e-05, "epoch": 0.26150121065375304, "percentage": 8.72, "elapsed_time": "0:04:09", "remaining_time": "0:43:35", "throughput": 2707.26, "total_tokens": 676416} | |
| {"current_steps": 280, "total_steps": 3096, "loss": 0.1121, "lr": 2.93986163384092e-05, "epoch": 0.2711864406779661, "percentage": 9.04, "elapsed_time": "0:04:18", "remaining_time": "0:43:23", "throughput": 2707.57, "total_tokens": 700832} | |
| {"current_steps": 290, "total_steps": 3096, "loss": 0.1159, "lr": 2.9355209172218777e-05, "epoch": 0.28087167070217917, "percentage": 9.37, "elapsed_time": "0:04:27", "remaining_time": "0:43:12", "throughput": 2709.07, "total_tokens": 725824} | |
| {"current_steps": 300, "total_steps": 3096, "loss": 0.133, "lr": 2.931032390663101e-05, "epoch": 0.29055690072639223, "percentage": 9.69, "elapsed_time": "0:04:36", "remaining_time": "0:42:58", "throughput": 2709.19, "total_tokens": 749408} | |
| {"current_steps": 310, "total_steps": 3096, "loss": 0.1172, "lr": 2.926396516330506e-05, "epoch": 0.30024213075060535, "percentage": 10.01, "elapsed_time": "0:04:45", "remaining_time": "0:42:45", "throughput": 2711.22, "total_tokens": 773984} | |
| {"current_steps": 320, "total_steps": 3096, "loss": 0.136, "lr": 2.921613771561829e-05, "epoch": 0.3099273607748184, "percentage": 10.34, "elapsed_time": "0:04:54", "remaining_time": "0:42:34", "throughput": 2713.85, "total_tokens": 799168} | |
| {"current_steps": 330, "total_steps": 3096, "loss": 0.0973, "lr": 2.916684648817478e-05, "epoch": 0.3196125907990315, "percentage": 10.66, "elapsed_time": "0:05:03", "remaining_time": "0:42:22", "throughput": 2717.36, "total_tokens": 824320} | |
| {"current_steps": 340, "total_steps": 3096, "loss": 0.13, "lr": 2.9116096556298256e-05, "epoch": 0.32929782082324455, "percentage": 10.98, "elapsed_time": "0:05:12", "remaining_time": "0:42:11", "throughput": 2720.32, "total_tokens": 849632} | |
| {"current_steps": 350, "total_steps": 3096, "loss": 0.1257, "lr": 2.9063893145509475e-05, "epoch": 0.3389830508474576, "percentage": 11.3, "elapsed_time": "0:05:21", "remaining_time": "0:41:59", "throughput": 2722.61, "total_tokens": 874400} | |
| {"current_steps": 360, "total_steps": 3096, "loss": 0.1155, "lr": 2.901024163098822e-05, "epoch": 0.3486682808716707, "percentage": 11.63, "elapsed_time": "0:05:30", "remaining_time": "0:41:48", "throughput": 2725.0, "total_tokens": 899264} | |
| {"current_steps": 370, "total_steps": 3096, "loss": 0.1251, "lr": 2.8955147537019815e-05, "epoch": 0.3583535108958838, "percentage": 11.95, "elapsed_time": "0:05:38", "remaining_time": "0:41:37", "throughput": 2727.61, "total_tokens": 924544} | |
| {"current_steps": 380, "total_steps": 3096, "loss": 0.1147, "lr": 2.88986165364263e-05, "epoch": 0.36803874092009686, "percentage": 12.27, "elapsed_time": "0:05:47", "remaining_time": "0:41:26", "throughput": 2730.14, "total_tokens": 949792} | |
| {"current_steps": 390, "total_steps": 3096, "loss": 0.1433, "lr": 2.8840654449982344e-05, "epoch": 0.37772397094430993, "percentage": 12.6, "elapsed_time": "0:05:56", "remaining_time": "0:41:13", "throughput": 2732.0, "total_tokens": 974112} | |
| {"current_steps": 400, "total_steps": 3096, "loss": 0.1117, "lr": 2.8781267245815898e-05, "epoch": 0.387409200968523, "percentage": 12.92, "elapsed_time": "0:06:05", "remaining_time": "0:41:02", "throughput": 2734.47, "total_tokens": 999168} | |
| {"current_steps": 410, "total_steps": 3096, "loss": 0.1353, "lr": 2.8720461038793672e-05, "epoch": 0.39709443099273606, "percentage": 13.24, "elapsed_time": "0:06:14", "remaining_time": "0:40:52", "throughput": 2736.46, "total_tokens": 1024320} | |
| {"current_steps": 420, "total_steps": 3096, "loss": 0.1165, "lr": 2.8658242089891515e-05, "epoch": 0.4067796610169492, "percentage": 13.57, "elapsed_time": "0:06:23", "remaining_time": "0:40:41", "throughput": 2737.84, "total_tokens": 1049088} | |
| {"current_steps": 430, "total_steps": 3096, "loss": 0.1215, "lr": 2.8594616805549752e-05, "epoch": 0.41646489104116224, "percentage": 13.89, "elapsed_time": "0:06:32", "remaining_time": "0:40:30", "throughput": 2738.85, "total_tokens": 1073632} | |
| {"current_steps": 440, "total_steps": 3096, "loss": 0.1066, "lr": 2.8529591737013526e-05, "epoch": 0.4261501210653753, "percentage": 14.21, "elapsed_time": "0:06:40", "remaining_time": "0:40:19", "throughput": 2739.65, "total_tokens": 1098208} | |
| {"current_steps": 450, "total_steps": 3096, "loss": 0.0879, "lr": 2.8463173579658258e-05, "epoch": 0.4358353510895884, "percentage": 14.53, "elapsed_time": "0:06:49", "remaining_time": "0:40:08", "throughput": 2740.27, "total_tokens": 1122336} | |
| {"current_steps": 460, "total_steps": 3096, "loss": 0.1141, "lr": 2.8395369172300235e-05, "epoch": 0.44552058111380144, "percentage": 14.86, "elapsed_time": "0:06:58", "remaining_time": "0:39:59", "throughput": 2740.53, "total_tokens": 1147392} | |
| {"current_steps": 470, "total_steps": 3096, "loss": 0.1052, "lr": 2.8326185496492464e-05, "epoch": 0.4552058111380145, "percentage": 15.18, "elapsed_time": "0:07:08", "remaining_time": "0:39:51", "throughput": 2741.16, "total_tokens": 1173248} | |
| {"current_steps": 480, "total_steps": 3096, "loss": 0.1086, "lr": 2.825562967580579e-05, "epoch": 0.4648910411622276, "percentage": 15.5, "elapsed_time": "0:07:16", "remaining_time": "0:39:41", "throughput": 2742.04, "total_tokens": 1197984} | |
| {"current_steps": 490, "total_steps": 3096, "loss": 0.1201, "lr": 2.8183708975095406e-05, "epoch": 0.4745762711864407, "percentage": 15.83, "elapsed_time": "0:07:25", "remaining_time": "0:39:30", "throughput": 2743.17, "total_tokens": 1222720} | |
| {"current_steps": 500, "total_steps": 3096, "loss": 0.1319, "lr": 2.8110430799752845e-05, "epoch": 0.48426150121065376, "percentage": 16.15, "elapsed_time": "0:07:34", "remaining_time": "0:39:19", "throughput": 2744.65, "total_tokens": 1247232} | |
| {"current_steps": 510, "total_steps": 3096, "loss": 0.1071, "lr": 2.8035802694943457e-05, "epoch": 0.4939467312348668, "percentage": 16.47, "elapsed_time": "0:07:43", "remaining_time": "0:39:10", "throughput": 2746.99, "total_tokens": 1273184} | |
| {"current_steps": 520, "total_steps": 3096, "loss": 0.1554, "lr": 2.7959832344829512e-05, "epoch": 0.5036319612590799, "percentage": 16.8, "elapsed_time": "0:07:52", "remaining_time": "0:39:00", "throughput": 2749.1, "total_tokens": 1298688} | |
| {"current_steps": 530, "total_steps": 3096, "loss": 0.1196, "lr": 2.7882527571779003e-05, "epoch": 0.513317191283293, "percentage": 17.12, "elapsed_time": "0:08:01", "remaining_time": "0:38:50", "throughput": 2750.88, "total_tokens": 1324128} | |
| {"current_steps": 540, "total_steps": 3096, "loss": 0.1334, "lr": 2.78038963355602e-05, "epoch": 0.5230024213075061, "percentage": 17.44, "elapsed_time": "0:08:10", "remaining_time": "0:38:40", "throughput": 2752.4, "total_tokens": 1349120} | |
| {"current_steps": 550, "total_steps": 3096, "loss": 0.1109, "lr": 2.7723946732522055e-05, "epoch": 0.5326876513317191, "percentage": 17.76, "elapsed_time": "0:08:19", "remaining_time": "0:38:29", "throughput": 2754.1, "total_tokens": 1374304} | |
| {"current_steps": 560, "total_steps": 3096, "loss": 0.1274, "lr": 2.764268699476058e-05, "epoch": 0.5423728813559322, "percentage": 18.09, "elapsed_time": "0:08:27", "remaining_time": "0:38:19", "throughput": 2755.75, "total_tokens": 1399136} | |
| {"current_steps": 570, "total_steps": 3096, "loss": 0.1397, "lr": 2.756012548927119e-05, "epoch": 0.5520581113801453, "percentage": 18.41, "elapsed_time": "0:08:36", "remaining_time": "0:38:09", "throughput": 2757.7, "total_tokens": 1424672} | |
| {"current_steps": 580, "total_steps": 3096, "loss": 0.101, "lr": 2.7476270717087215e-05, "epoch": 0.5617433414043583, "percentage": 18.73, "elapsed_time": "0:08:45", "remaining_time": "0:37:58", "throughput": 2758.4, "total_tokens": 1449024} | |
| {"current_steps": 590, "total_steps": 3096, "loss": 0.0941, "lr": 2.7391131312404556e-05, "epoch": 0.5714285714285714, "percentage": 19.06, "elapsed_time": "0:08:54", "remaining_time": "0:37:50", "throughput": 2760.25, "total_tokens": 1475264} | |
| {"current_steps": 600, "total_steps": 3096, "loss": 0.0865, "lr": 2.7304716041692663e-05, "epoch": 0.5811138014527845, "percentage": 19.38, "elapsed_time": "0:09:03", "remaining_time": "0:37:39", "throughput": 2761.38, "total_tokens": 1500064} | |
| {"current_steps": 610, "total_steps": 3096, "loss": 0.1596, "lr": 2.7217033802791906e-05, "epoch": 0.5907990314769975, "percentage": 19.7, "elapsed_time": "0:09:11", "remaining_time": "0:37:29", "throughput": 2762.12, "total_tokens": 1524448} | |
| {"current_steps": 620, "total_steps": 3096, "loss": 0.0891, "lr": 2.7128093623997368e-05, "epoch": 0.6004842615012107, "percentage": 20.03, "elapsed_time": "0:09:20", "remaining_time": "0:37:19", "throughput": 2763.26, "total_tokens": 1549536} | |
| {"current_steps": 630, "total_steps": 3096, "loss": 0.1085, "lr": 2.7037904663129262e-05, "epoch": 0.6101694915254238, "percentage": 20.35, "elapsed_time": "0:09:29", "remaining_time": "0:37:08", "throughput": 2763.29, "total_tokens": 1573408} | |
| {"current_steps": 640, "total_steps": 3096, "loss": 0.1023, "lr": 2.6946476206589972e-05, "epoch": 0.6198547215496368, "percentage": 20.67, "elapsed_time": "0:09:38", "remaining_time": "0:36:58", "throughput": 2763.37, "total_tokens": 1597888} | |
| {"current_steps": 650, "total_steps": 3096, "loss": 0.0669, "lr": 2.6853817668407875e-05, "epoch": 0.6295399515738499, "percentage": 20.99, "elapsed_time": "0:09:47", "remaining_time": "0:36:50", "throughput": 2763.92, "total_tokens": 1623296} | |
| {"current_steps": 660, "total_steps": 3096, "loss": 0.1017, "lr": 2.6759938589268023e-05, "epoch": 0.639225181598063, "percentage": 21.32, "elapsed_time": "0:09:56", "remaining_time": "0:36:42", "throughput": 2764.08, "total_tokens": 1649216} | |
| {"current_steps": 670, "total_steps": 3096, "loss": 0.1432, "lr": 2.6664848635529742e-05, "epoch": 0.648910411622276, "percentage": 21.64, "elapsed_time": "0:10:05", "remaining_time": "0:36:33", "throughput": 2763.38, "total_tokens": 1673760} | |
| {"current_steps": 680, "total_steps": 3096, "loss": 0.1081, "lr": 2.6568557598231385e-05, "epoch": 0.6585956416464891, "percentage": 21.96, "elapsed_time": "0:10:14", "remaining_time": "0:36:23", "throughput": 2763.82, "total_tokens": 1698592} | |
| {"current_steps": 690, "total_steps": 3096, "loss": 0.1037, "lr": 2.6471075392082125e-05, "epoch": 0.6682808716707022, "percentage": 22.29, "elapsed_time": "0:10:23", "remaining_time": "0:36:13", "throughput": 2764.45, "total_tokens": 1723296} | |
| {"current_steps": 700, "total_steps": 3096, "loss": 0.1216, "lr": 2.6372412054441116e-05, "epoch": 0.6779661016949152, "percentage": 22.61, "elapsed_time": "0:10:32", "remaining_time": "0:36:04", "throughput": 2765.43, "total_tokens": 1748384} | |
| {"current_steps": 710, "total_steps": 3096, "loss": 0.0853, "lr": 2.6272577744283965e-05, "epoch": 0.6876513317191283, "percentage": 22.93, "elapsed_time": "0:10:41", "remaining_time": "0:35:54", "throughput": 2766.56, "total_tokens": 1773600} | |
| {"current_steps": 720, "total_steps": 3096, "loss": 0.1034, "lr": 2.617158274115673e-05, "epoch": 0.6973365617433414, "percentage": 23.26, "elapsed_time": "0:10:49", "remaining_time": "0:35:44", "throughput": 2767.65, "total_tokens": 1798656} | |
| {"current_steps": 730, "total_steps": 3096, "loss": 0.0872, "lr": 2.6069437444117432e-05, "epoch": 0.7070217917675545, "percentage": 23.58, "elapsed_time": "0:10:58", "remaining_time": "0:35:35", "throughput": 2769.29, "total_tokens": 1824544} | |
| {"current_steps": 740, "total_steps": 3096, "loss": 0.1063, "lr": 2.596615237066535e-05, "epoch": 0.7167070217917676, "percentage": 23.9, "elapsed_time": "0:11:07", "remaining_time": "0:35:24", "throughput": 2770.12, "total_tokens": 1848896} | |
| {"current_steps": 750, "total_steps": 3096, "loss": 0.1104, "lr": 2.586173815565805e-05, "epoch": 0.7263922518159807, "percentage": 24.22, "elapsed_time": "0:11:16", "remaining_time": "0:35:14", "throughput": 2770.8, "total_tokens": 1873248} | |
| {"current_steps": 760, "total_steps": 3096, "loss": 0.1125, "lr": 2.575620555021634e-05, "epoch": 0.7360774818401937, "percentage": 24.55, "elapsed_time": "0:11:24", "remaining_time": "0:35:04", "throughput": 2771.35, "total_tokens": 1897184} | |
| {"current_steps": 770, "total_steps": 3096, "loss": 0.0969, "lr": 2.564956542061732e-05, "epoch": 0.7457627118644068, "percentage": 24.87, "elapsed_time": "0:11:33", "remaining_time": "0:34:54", "throughput": 2772.39, "total_tokens": 1922368} | |
| {"current_steps": 780, "total_steps": 3096, "loss": 0.1142, "lr": 2.5541828747175477e-05, "epoch": 0.7554479418886199, "percentage": 25.19, "elapsed_time": "0:11:42", "remaining_time": "0:34:45", "throughput": 2773.75, "total_tokens": 1947904} | |
| {"current_steps": 790, "total_steps": 3096, "loss": 0.0926, "lr": 2.543300662311211e-05, "epoch": 0.7651331719128329, "percentage": 25.52, "elapsed_time": "0:11:50", "remaining_time": "0:34:34", "throughput": 2774.29, "total_tokens": 1971872} | |
| {"current_steps": 800, "total_steps": 3096, "loss": 0.0802, "lr": 2.532311025341309e-05, "epoch": 0.774818401937046, "percentage": 25.84, "elapsed_time": "0:11:59", "remaining_time": "0:34:24", "throughput": 2775.06, "total_tokens": 1996352} | |
| {"current_steps": 810, "total_steps": 3096, "loss": 0.1248, "lr": 2.5212150953675133e-05, "epoch": 0.784503631961259, "percentage": 26.16, "elapsed_time": "0:12:07", "remaining_time": "0:34:14", "throughput": 2775.57, "total_tokens": 2020480} | |
| {"current_steps": 820, "total_steps": 3096, "loss": 0.0767, "lr": 2.5100140148940688e-05, "epoch": 0.7941888619854721, "percentage": 26.49, "elapsed_time": "0:12:16", "remaining_time": "0:34:04", "throughput": 2775.96, "total_tokens": 2044448} | |
| {"current_steps": 830, "total_steps": 3096, "loss": 0.1239, "lr": 2.498708937252153e-05, "epoch": 0.8038740920096852, "percentage": 26.81, "elapsed_time": "0:12:25", "remaining_time": "0:33:55", "throughput": 2777.21, "total_tokens": 2070400} | |
| {"current_steps": 840, "total_steps": 3096, "loss": 0.108, "lr": 2.4873010264811222e-05, "epoch": 0.8135593220338984, "percentage": 27.13, "elapsed_time": "0:12:34", "remaining_time": "0:33:45", "throughput": 2778.0, "total_tokens": 2095392} | |
| {"current_steps": 850, "total_steps": 3096, "loss": 0.0994, "lr": 2.4757914572086555e-05, "epoch": 0.8232445520581114, "percentage": 27.45, "elapsed_time": "0:12:43", "remaining_time": "0:33:36", "throughput": 2778.73, "total_tokens": 2120192} | |
| {"current_steps": 860, "total_steps": 3096, "loss": 0.0927, "lr": 2.464181414529809e-05, "epoch": 0.8329297820823245, "percentage": 27.78, "elapsed_time": "0:12:51", "remaining_time": "0:33:26", "throughput": 2779.17, "total_tokens": 2144384} | |
| {"current_steps": 870, "total_steps": 3096, "loss": 0.1328, "lr": 2.4524720938849883e-05, "epoch": 0.8426150121065376, "percentage": 28.1, "elapsed_time": "0:13:00", "remaining_time": "0:33:16", "throughput": 2779.46, "total_tokens": 2168704} | |
| {"current_steps": 880, "total_steps": 3096, "loss": 0.1229, "lr": 2.440664700936861e-05, "epoch": 0.8523002421307506, "percentage": 28.42, "elapsed_time": "0:13:08", "remaining_time": "0:33:06", "throughput": 2779.84, "total_tokens": 2193248} | |
| {"current_steps": 890, "total_steps": 3096, "loss": 0.0957, "lr": 2.4287604514462152e-05, "epoch": 0.8619854721549637, "percentage": 28.75, "elapsed_time": "0:13:17", "remaining_time": "0:32:57", "throughput": 2780.12, "total_tokens": 2217568} | |
| {"current_steps": 900, "total_steps": 3096, "loss": 0.0975, "lr": 2.416760571146774e-05, "epoch": 0.8716707021791767, "percentage": 29.07, "elapsed_time": "0:13:26", "remaining_time": "0:32:47", "throughput": 2780.32, "total_tokens": 2242048} | |
| {"current_steps": 910, "total_steps": 3096, "loss": 0.1068, "lr": 2.4046662956189898e-05, "epoch": 0.8813559322033898, "percentage": 29.39, "elapsed_time": "0:13:35", "remaining_time": "0:32:38", "throughput": 2779.87, "total_tokens": 2266112} | |
| {"current_steps": 920, "total_steps": 3096, "loss": 0.0688, "lr": 2.3924788701628197e-05, "epoch": 0.8910411622276029, "percentage": 29.72, "elapsed_time": "0:13:44", "remaining_time": "0:32:29", "throughput": 2779.78, "total_tokens": 2290720} | |
| {"current_steps": 930, "total_steps": 3096, "loss": 0.1141, "lr": 2.3801995496695028e-05, "epoch": 0.9007263922518159, "percentage": 30.04, "elapsed_time": "0:13:53", "remaining_time": "0:32:20", "throughput": 2779.31, "total_tokens": 2315488} | |
| {"current_steps": 940, "total_steps": 3096, "loss": 0.1328, "lr": 2.367829598492348e-05, "epoch": 0.910411622276029, "percentage": 30.36, "elapsed_time": "0:14:02", "remaining_time": "0:32:12", "throughput": 2779.06, "total_tokens": 2340992} | |
| {"current_steps": 950, "total_steps": 3096, "loss": 0.1, "lr": 2.3553702903165502e-05, "epoch": 0.9200968523002422, "percentage": 30.68, "elapsed_time": "0:14:11", "remaining_time": "0:32:03", "throughput": 2779.62, "total_tokens": 2366880} | |
| {"current_steps": 960, "total_steps": 3096, "loss": 0.1089, "lr": 2.3428229080280407e-05, "epoch": 0.9297820823244553, "percentage": 31.01, "elapsed_time": "0:14:20", "remaining_time": "0:31:54", "throughput": 2779.94, "total_tokens": 2392000} | |
| {"current_steps": 970, "total_steps": 3096, "loss": 0.0924, "lr": 2.330188743581398e-05, "epoch": 0.9394673123486683, "percentage": 31.33, "elapsed_time": "0:14:29", "remaining_time": "0:31:45", "throughput": 2780.37, "total_tokens": 2417472} | |
| {"current_steps": 980, "total_steps": 3096, "loss": 0.1205, "lr": 2.3174690978668155e-05, "epoch": 0.9491525423728814, "percentage": 31.65, "elapsed_time": "0:14:38", "remaining_time": "0:31:36", "throughput": 2780.89, "total_tokens": 2442496} | |
| {"current_steps": 990, "total_steps": 3096, "loss": 0.1004, "lr": 2.3046652805761588e-05, "epoch": 0.9588377723970944, "percentage": 31.98, "elapsed_time": "0:14:47", "remaining_time": "0:31:27", "throughput": 2781.37, "total_tokens": 2467392} | |
| {"current_steps": 1000, "total_steps": 3096, "loss": 0.1007, "lr": 2.2917786100681078e-05, "epoch": 0.9685230024213075, "percentage": 32.3, "elapsed_time": "0:14:56", "remaining_time": "0:31:18", "throughput": 2782.02, "total_tokens": 2492768} | |
| {"current_steps": 1010, "total_steps": 3096, "loss": 0.1179, "lr": 2.2788104132324125e-05, "epoch": 0.9782082324455206, "percentage": 32.62, "elapsed_time": "0:15:05", "remaining_time": "0:31:10", "throughput": 2780.53, "total_tokens": 2518176} | |
| {"current_steps": 1020, "total_steps": 3096, "loss": 0.0971, "lr": 2.2657620253532685e-05, "epoch": 0.9878934624697336, "percentage": 32.95, "elapsed_time": "0:15:14", "remaining_time": "0:31:01", "throughput": 2781.07, "total_tokens": 2543296} | |
| {"current_steps": 1030, "total_steps": 3096, "loss": 0.0932, "lr": 2.252634789971827e-05, "epoch": 0.9975786924939467, "percentage": 33.27, "elapsed_time": "0:15:23", "remaining_time": "0:30:51", "throughput": 2781.24, "total_tokens": 2567680} | |
| {"current_steps": 1040, "total_steps": 3096, "loss": 0.0924, "lr": 2.2394300587478566e-05, "epoch": 1.006779661016949, "percentage": 33.59, "elapsed_time": "0:15:31", "remaining_time": "0:30:41", "throughput": 2781.23, "total_tokens": 2591016} | |
| {"current_steps": 1050, "total_steps": 3096, "loss": 0.0985, "lr": 2.2261491913205684e-05, "epoch": 1.0164648910411622, "percentage": 33.91, "elapsed_time": "0:15:40", "remaining_time": "0:30:32", "throughput": 2781.65, "total_tokens": 2615752} | |
| {"current_steps": 1060, "total_steps": 3096, "loss": 0.0853, "lr": 2.212793555168617e-05, "epoch": 1.0261501210653754, "percentage": 34.24, "elapsed_time": "0:15:49", "remaining_time": "0:30:22", "throughput": 2781.81, "total_tokens": 2640200} | |
| {"current_steps": 1070, "total_steps": 3096, "loss": 0.116, "lr": 2.1993645254692994e-05, "epoch": 1.0358353510895884, "percentage": 34.56, "elapsed_time": "0:15:58", "remaining_time": "0:30:13", "throughput": 2782.26, "total_tokens": 2665416} | |
| {"current_steps": 1080, "total_steps": 3096, "loss": 0.0972, "lr": 2.1858634849569578e-05, "epoch": 1.0455205811138015, "percentage": 34.88, "elapsed_time": "0:16:06", "remaining_time": "0:30:04", "throughput": 2782.34, "total_tokens": 2690376} | |
| {"current_steps": 1090, "total_steps": 3096, "loss": 0.0884, "lr": 2.1722918237806042e-05, "epoch": 1.0552058111380145, "percentage": 35.21, "elapsed_time": "0:16:15", "remaining_time": "0:29:55", "throughput": 2782.48, "total_tokens": 2715080} | |
| {"current_steps": 1100, "total_steps": 3096, "loss": 0.073, "lr": 2.158650939360782e-05, "epoch": 1.0648910411622277, "percentage": 35.53, "elapsed_time": "0:16:24", "remaining_time": "0:29:47", "throughput": 2782.44, "total_tokens": 2740424} | |
| {"current_steps": 1110, "total_steps": 3096, "loss": 0.0813, "lr": 2.1449422362456794e-05, "epoch": 1.0745762711864406, "percentage": 35.85, "elapsed_time": "0:16:34", "remaining_time": "0:29:38", "throughput": 2781.98, "total_tokens": 2765640} | |
| {"current_steps": 1120, "total_steps": 3096, "loss": 0.0953, "lr": 2.13116712596651e-05, "epoch": 1.0842615012106538, "percentage": 36.18, "elapsed_time": "0:16:43", "remaining_time": "0:29:30", "throughput": 2781.92, "total_tokens": 2791176} | |
| {"current_steps": 1130, "total_steps": 3096, "loss": 0.0933, "lr": 2.1173270268921703e-05, "epoch": 1.0939467312348667, "percentage": 36.5, "elapsed_time": "0:16:52", "remaining_time": "0:29:21", "throughput": 2781.99, "total_tokens": 2816072} | |
| {"current_steps": 1140, "total_steps": 3096, "loss": 0.0819, "lr": 2.1034233640831988e-05, "epoch": 1.10363196125908, "percentage": 36.82, "elapsed_time": "0:17:01", "remaining_time": "0:29:12", "throughput": 2782.02, "total_tokens": 2840776} | |
| {"current_steps": 1150, "total_steps": 3096, "loss": 0.0611, "lr": 2.0894575691450396e-05, "epoch": 1.113317191283293, "percentage": 37.14, "elapsed_time": "0:17:09", "remaining_time": "0:29:02", "throughput": 2782.23, "total_tokens": 2865416} | |
| {"current_steps": 1160, "total_steps": 3096, "loss": 0.0748, "lr": 2.0754310800806395e-05, "epoch": 1.123002421307506, "percentage": 37.47, "elapsed_time": "0:17:18", "remaining_time": "0:28:53", "throughput": 2782.56, "total_tokens": 2890248} | |
| {"current_steps": 1170, "total_steps": 3096, "loss": 0.0959, "lr": 2.0613453411423797e-05, "epoch": 1.1326876513317192, "percentage": 37.79, "elapsed_time": "0:17:27", "remaining_time": "0:28:44", "throughput": 2783.24, "total_tokens": 2916392} | |
| {"current_steps": 1180, "total_steps": 3096, "loss": 0.0709, "lr": 2.0472018026833684e-05, "epoch": 1.1423728813559322, "percentage": 38.11, "elapsed_time": "0:17:36", "remaining_time": "0:28:35", "throughput": 2783.5, "total_tokens": 2941160} | |
| {"current_steps": 1190, "total_steps": 3096, "loss": 0.0731, "lr": 2.0330019210081022e-05, "epoch": 1.1520581113801454, "percentage": 38.44, "elapsed_time": "0:17:45", "remaining_time": "0:28:26", "throughput": 2784.04, "total_tokens": 2966120} | |
| {"current_steps": 1200, "total_steps": 3096, "loss": 0.1005, "lr": 2.0187471582225173e-05, "epoch": 1.1617433414043583, "percentage": 38.76, "elapsed_time": "0:17:54", "remaining_time": "0:28:16", "throughput": 2784.05, "total_tokens": 2990088} | |
| {"current_steps": 1210, "total_steps": 3096, "loss": 0.0579, "lr": 2.004438982083442e-05, "epoch": 1.1714285714285715, "percentage": 39.08, "elapsed_time": "0:18:02", "remaining_time": "0:28:07", "throughput": 2784.41, "total_tokens": 3015400} | |
| {"current_steps": 1220, "total_steps": 3096, "loss": 0.0792, "lr": 1.9900788658474677e-05, "epoch": 1.1811138014527844, "percentage": 39.41, "elapsed_time": "0:18:11", "remaining_time": "0:27:58", "throughput": 2784.44, "total_tokens": 3039464} | |
| {"current_steps": 1230, "total_steps": 3096, "loss": 0.057, "lr": 1.975668288119252e-05, "epoch": 1.1907990314769976, "percentage": 39.73, "elapsed_time": "0:18:20", "remaining_time": "0:27:49", "throughput": 2784.75, "total_tokens": 3063816} | |
| {"current_steps": 1240, "total_steps": 3096, "loss": 0.102, "lr": 1.961208732699275e-05, "epoch": 1.2004842615012106, "percentage": 40.05, "elapsed_time": "0:18:29", "remaining_time": "0:27:40", "throughput": 2785.1, "total_tokens": 3088968} | |
| {"current_steps": 1250, "total_steps": 3096, "loss": 0.0691, "lr": 1.9467016884310565e-05, "epoch": 1.2101694915254237, "percentage": 40.37, "elapsed_time": "0:18:37", "remaining_time": "0:27:31", "throughput": 2785.11, "total_tokens": 3113736} | |
| {"current_steps": 1260, "total_steps": 3096, "loss": 0.0668, "lr": 1.9321486490478565e-05, "epoch": 1.2198547215496367, "percentage": 40.7, "elapsed_time": "0:18:46", "remaining_time": "0:27:22", "throughput": 2784.86, "total_tokens": 3138344} | |
| {"current_steps": 1270, "total_steps": 3096, "loss": 0.0711, "lr": 1.91755111301887e-05, "epoch": 1.2295399515738499, "percentage": 41.02, "elapsed_time": "0:18:55", "remaining_time": "0:27:13", "throughput": 2784.88, "total_tokens": 3163496} | |
| {"current_steps": 1280, "total_steps": 3096, "loss": 0.0605, "lr": 1.902910583394938e-05, "epoch": 1.239225181598063, "percentage": 41.34, "elapsed_time": "0:19:05", "remaining_time": "0:27:04", "throughput": 2784.37, "total_tokens": 3188392} | |
| {"current_steps": 1290, "total_steps": 3096, "loss": 0.0448, "lr": 1.888228567653781e-05, "epoch": 1.248910411622276, "percentage": 41.67, "elapsed_time": "0:19:14", "remaining_time": "0:26:56", "throughput": 2783.65, "total_tokens": 3213224} | |
| {"current_steps": 1300, "total_steps": 3096, "loss": 0.0815, "lr": 1.873506577544784e-05, "epoch": 1.2585956416464892, "percentage": 41.99, "elapsed_time": "0:19:23", "remaining_time": "0:26:47", "throughput": 2783.48, "total_tokens": 3238568} | |
| {"current_steps": 1310, "total_steps": 3096, "loss": 0.1043, "lr": 1.8587461289333327e-05, "epoch": 1.2682808716707021, "percentage": 42.31, "elapsed_time": "0:19:32", "remaining_time": "0:26:38", "throughput": 2783.68, "total_tokens": 3264264} | |
| {"current_steps": 1320, "total_steps": 3096, "loss": 0.1037, "lr": 1.8439487416447353e-05, "epoch": 1.2779661016949153, "percentage": 42.64, "elapsed_time": "0:19:41", "remaining_time": "0:26:29", "throughput": 2783.47, "total_tokens": 3288840} | |
| {"current_steps": 1330, "total_steps": 3096, "loss": 0.0928, "lr": 1.8291159393077294e-05, "epoch": 1.2876513317191283, "percentage": 42.96, "elapsed_time": "0:19:50", "remaining_time": "0:26:20", "throughput": 2783.45, "total_tokens": 3313832} | |
| {"current_steps": 1340, "total_steps": 3096, "loss": 0.0775, "lr": 1.814249249197602e-05, "epoch": 1.2973365617433414, "percentage": 43.28, "elapsed_time": "0:19:59", "remaining_time": "0:26:11", "throughput": 2783.17, "total_tokens": 3337736} | |
| {"current_steps": 1350, "total_steps": 3096, "loss": 0.0521, "lr": 1.7993502020789294e-05, "epoch": 1.3070217917675544, "percentage": 43.6, "elapsed_time": "0:20:08", "remaining_time": "0:26:02", "throughput": 2782.93, "total_tokens": 3362024} | |
| {"current_steps": 1360, "total_steps": 3096, "loss": 0.0687, "lr": 1.7844203320479614e-05, "epoch": 1.3167070217917676, "percentage": 43.93, "elapsed_time": "0:20:17", "remaining_time": "0:25:53", "throughput": 2782.97, "total_tokens": 3387496} | |
| {"current_steps": 1370, "total_steps": 3096, "loss": 0.0704, "lr": 1.7694611763746632e-05, "epoch": 1.3263922518159807, "percentage": 44.25, "elapsed_time": "0:20:26", "remaining_time": "0:25:44", "throughput": 2782.53, "total_tokens": 3412072} | |
| {"current_steps": 1380, "total_steps": 3096, "loss": 0.0826, "lr": 1.754474275344427e-05, "epoch": 1.3360774818401937, "percentage": 44.57, "elapsed_time": "0:20:35", "remaining_time": "0:25:36", "throughput": 2782.31, "total_tokens": 3437096} | |
| {"current_steps": 1390, "total_steps": 3096, "loss": 0.0445, "lr": 1.7394611720994747e-05, "epoch": 1.3457627118644067, "percentage": 44.9, "elapsed_time": "0:20:44", "remaining_time": "0:25:27", "throughput": 2781.98, "total_tokens": 3462120} | |
| {"current_steps": 1400, "total_steps": 3096, "loss": 0.0951, "lr": 1.724423412479967e-05, "epoch": 1.3554479418886198, "percentage": 45.22, "elapsed_time": "0:20:53", "remaining_time": "0:25:18", "throughput": 2782.03, "total_tokens": 3486952} | |
| {"current_steps": 1410, "total_steps": 3096, "loss": 0.0539, "lr": 1.7093625448648348e-05, "epoch": 1.365133171912833, "percentage": 45.54, "elapsed_time": "0:21:02", "remaining_time": "0:25:09", "throughput": 2782.37, "total_tokens": 3512264} | |
| {"current_steps": 1420, "total_steps": 3096, "loss": 0.0848, "lr": 1.694280120012349e-05, "epoch": 1.374818401937046, "percentage": 45.87, "elapsed_time": "0:21:11", "remaining_time": "0:25:00", "throughput": 2782.67, "total_tokens": 3537192} | |
| {"current_steps": 1430, "total_steps": 3096, "loss": 0.0629, "lr": 1.6791776909004434e-05, "epoch": 1.3845036319612591, "percentage": 46.19, "elapsed_time": "0:21:19", "remaining_time": "0:24:50", "throughput": 2782.67, "total_tokens": 3560872} | |
| {"current_steps": 1440, "total_steps": 3096, "loss": 0.079, "lr": 1.664056812566812e-05, "epoch": 1.394188861985472, "percentage": 46.51, "elapsed_time": "0:21:28", "remaining_time": "0:24:41", "throughput": 2783.19, "total_tokens": 3586216} | |
| {"current_steps": 1450, "total_steps": 3096, "loss": 0.0798, "lr": 1.648919041948792e-05, "epoch": 1.4038740920096853, "percentage": 46.83, "elapsed_time": "0:21:37", "remaining_time": "0:24:32", "throughput": 2783.47, "total_tokens": 3610792} | |
| {"current_steps": 1460, "total_steps": 3096, "loss": 0.0897, "lr": 1.6337659377230544e-05, "epoch": 1.4135593220338982, "percentage": 47.16, "elapsed_time": "0:21:45", "remaining_time": "0:24:23", "throughput": 2783.54, "total_tokens": 3634760} | |
| {"current_steps": 1470, "total_steps": 3096, "loss": 0.0858, "lr": 1.61859906014511e-05, "epoch": 1.4232445520581114, "percentage": 47.48, "elapsed_time": "0:21:54", "remaining_time": "0:24:14", "throughput": 2783.95, "total_tokens": 3659560} | |
| {"current_steps": 1480, "total_steps": 3096, "loss": 0.0532, "lr": 1.6034199708886573e-05, "epoch": 1.4329297820823244, "percentage": 47.8, "elapsed_time": "0:22:03", "remaining_time": "0:24:04", "throughput": 2784.49, "total_tokens": 3684840} | |
| {"current_steps": 1490, "total_steps": 3096, "loss": 0.0842, "lr": 1.5882302328847847e-05, "epoch": 1.4426150121065375, "percentage": 48.13, "elapsed_time": "0:22:11", "remaining_time": "0:23:55", "throughput": 2784.73, "total_tokens": 3709096} | |
| {"current_steps": 1500, "total_steps": 3096, "loss": 0.0367, "lr": 1.5730314101610376e-05, "epoch": 1.4523002421307507, "percentage": 48.45, "elapsed_time": "0:22:20", "remaining_time": "0:23:46", "throughput": 2785.43, "total_tokens": 3734728} | |
| {"current_steps": 1510, "total_steps": 3096, "loss": 0.1085, "lr": 1.5578250676803824e-05, "epoch": 1.4619854721549637, "percentage": 48.77, "elapsed_time": "0:22:29", "remaining_time": "0:23:37", "throughput": 2785.74, "total_tokens": 3758984} | |
| {"current_steps": 1520, "total_steps": 3096, "loss": 0.0712, "lr": 1.5426127711800636e-05, "epoch": 1.4716707021791768, "percentage": 49.1, "elapsed_time": "0:22:38", "remaining_time": "0:23:28", "throughput": 2786.32, "total_tokens": 3784296} | |
| {"current_steps": 1530, "total_steps": 3096, "loss": 0.0705, "lr": 1.5273960870103872e-05, "epoch": 1.4813559322033898, "percentage": 49.42, "elapsed_time": "0:22:47", "remaining_time": "0:23:19", "throughput": 2786.94, "total_tokens": 3809768} | |
| {"current_steps": 1540, "total_steps": 3096, "loss": 0.071, "lr": 1.5121765819734418e-05, "epoch": 1.491041162227603, "percentage": 49.74, "elapsed_time": "0:22:55", "remaining_time": "0:23:10", "throughput": 2787.27, "total_tokens": 3834536} | |
| {"current_steps": 1550, "total_steps": 3096, "loss": 0.0648, "lr": 1.4969558231617681e-05, "epoch": 1.5007263922518161, "percentage": 50.06, "elapsed_time": "0:23:04", "remaining_time": "0:23:00", "throughput": 2787.46, "total_tokens": 3858792} | |
| {"current_steps": 1560, "total_steps": 3096, "loss": 0.0633, "lr": 1.4817353777970038e-05, "epoch": 1.510411622276029, "percentage": 50.39, "elapsed_time": "0:23:13", "remaining_time": "0:22:51", "throughput": 2787.76, "total_tokens": 3883976} | |
| {"current_steps": 1570, "total_steps": 3096, "loss": 0.0726, "lr": 1.466516813068512e-05, "epoch": 1.520096852300242, "percentage": 50.71, "elapsed_time": "0:23:21", "remaining_time": "0:22:42", "throughput": 2787.8, "total_tokens": 3908392} | |
| {"current_steps": 1580, "total_steps": 3096, "loss": 0.0882, "lr": 1.451301695972015e-05, "epoch": 1.5297820823244552, "percentage": 51.03, "elapsed_time": "0:23:30", "remaining_time": "0:22:33", "throughput": 2787.8, "total_tokens": 3932552} | |
| {"current_steps": 1590, "total_steps": 3096, "loss": 0.1149, "lr": 1.436091593148244e-05, "epoch": 1.5394673123486684, "percentage": 51.36, "elapsed_time": "0:23:39", "remaining_time": "0:22:24", "throughput": 2788.11, "total_tokens": 3957672} | |
| {"current_steps": 1600, "total_steps": 3096, "loss": 0.0841, "lr": 1.4208880707216323e-05, "epoch": 1.5491525423728814, "percentage": 51.68, "elapsed_time": "0:23:48", "remaining_time": "0:22:15", "throughput": 2788.46, "total_tokens": 3982824} | |
| {"current_steps": 1610, "total_steps": 3096, "loss": 0.0896, "lr": 1.405692694139054e-05, "epoch": 1.5588377723970943, "percentage": 52.0, "elapsed_time": "0:23:57", "remaining_time": "0:22:06", "throughput": 2788.68, "total_tokens": 4008072} | |
| {"current_steps": 1620, "total_steps": 3096, "loss": 0.0629, "lr": 1.3905070280086387e-05, "epoch": 1.5685230024213075, "percentage": 52.33, "elapsed_time": "0:24:06", "remaining_time": "0:21:57", "throughput": 2788.94, "total_tokens": 4033096} | |
| {"current_steps": 1630, "total_steps": 3096, "loss": 0.077, "lr": 1.3753326359386695e-05, "epoch": 1.5782082324455207, "percentage": 52.65, "elapsed_time": "0:24:15", "remaining_time": "0:21:48", "throughput": 2789.08, "total_tokens": 4058120} | |
| {"current_steps": 1640, "total_steps": 3096, "loss": 0.0853, "lr": 1.3601710803765814e-05, "epoch": 1.5878934624697336, "percentage": 52.97, "elapsed_time": "0:24:23", "remaining_time": "0:21:39", "throughput": 2789.03, "total_tokens": 4082792} | |
| {"current_steps": 1650, "total_steps": 3096, "loss": 0.0605, "lr": 1.3450239224480884e-05, "epoch": 1.5975786924939466, "percentage": 53.29, "elapsed_time": "0:24:32", "remaining_time": "0:21:30", "throughput": 2788.78, "total_tokens": 4107336} | |
| {"current_steps": 1660, "total_steps": 3096, "loss": 0.0985, "lr": 1.329892721796433e-05, "epoch": 1.6072639225181597, "percentage": 53.62, "elapsed_time": "0:24:41", "remaining_time": "0:21:21", "throughput": 2788.49, "total_tokens": 4132456} | |
| {"current_steps": 1670, "total_steps": 3096, "loss": 0.0547, "lr": 1.314779036421802e-05, "epoch": 1.616949152542373, "percentage": 53.94, "elapsed_time": "0:24:50", "remaining_time": "0:21:12", "throughput": 2788.15, "total_tokens": 4156584} | |
| {"current_steps": 1680, "total_steps": 3096, "loss": 0.0919, "lr": 1.2996844225209033e-05, "epoch": 1.626634382566586, "percentage": 54.26, "elapsed_time": "0:24:59", "remaining_time": "0:21:04", "throughput": 2788.18, "total_tokens": 4181448} | |
| {"current_steps": 1690, "total_steps": 3096, "loss": 0.1204, "lr": 1.2846104343267283e-05, "epoch": 1.636319612590799, "percentage": 54.59, "elapsed_time": "0:25:08", "remaining_time": "0:20:55", "throughput": 2788.65, "total_tokens": 4207560} | |
| {"current_steps": 1700, "total_steps": 3096, "loss": 0.0664, "lr": 1.2695586239485223e-05, "epoch": 1.646004842615012, "percentage": 54.91, "elapsed_time": "0:25:17", "remaining_time": "0:20:46", "throughput": 2788.82, "total_tokens": 4232040} | |
| {"current_steps": 1710, "total_steps": 3096, "loss": 0.0805, "lr": 1.254530541211968e-05, "epoch": 1.6556900726392252, "percentage": 55.23, "elapsed_time": "0:25:26", "remaining_time": "0:20:37", "throughput": 2789.37, "total_tokens": 4257576} | |
| {"current_steps": 1720, "total_steps": 3096, "loss": 0.073, "lr": 1.2395277334996045e-05, "epoch": 1.6653753026634384, "percentage": 55.56, "elapsed_time": "0:25:35", "remaining_time": "0:20:28", "throughput": 2789.67, "total_tokens": 4282472} | |
| {"current_steps": 1730, "total_steps": 3096, "loss": 0.0734, "lr": 1.2245517455915036e-05, "epoch": 1.6750605326876513, "percentage": 55.88, "elapsed_time": "0:25:43", "remaining_time": "0:20:18", "throughput": 2789.89, "total_tokens": 4306792} | |
| {"current_steps": 1740, "total_steps": 3096, "loss": 0.0831, "lr": 1.2096041195062051e-05, "epoch": 1.6847457627118643, "percentage": 56.2, "elapsed_time": "0:25:52", "remaining_time": "0:20:10", "throughput": 2790.64, "total_tokens": 4333384} | |
| {"current_steps": 1750, "total_steps": 3096, "loss": 0.0691, "lr": 1.1946863943419452e-05, "epoch": 1.6944309927360774, "percentage": 56.52, "elapsed_time": "0:26:01", "remaining_time": "0:20:01", "throughput": 2790.93, "total_tokens": 4358344} | |
| {"current_steps": 1760, "total_steps": 3096, "loss": 0.0988, "lr": 1.1798001061181799e-05, "epoch": 1.7041162227602906, "percentage": 56.85, "elapsed_time": "0:26:09", "remaining_time": "0:19:51", "throughput": 2791.08, "total_tokens": 4381768} | |
| {"current_steps": 1770, "total_steps": 3096, "loss": 0.0936, "lr": 1.1649467876174252e-05, "epoch": 1.7138014527845038, "percentage": 57.17, "elapsed_time": "0:26:18", "remaining_time": "0:19:42", "throughput": 2791.17, "total_tokens": 4405192} | |
| {"current_steps": 1780, "total_steps": 3096, "loss": 0.0901, "lr": 1.1501279682274368e-05, "epoch": 1.7234866828087168, "percentage": 57.49, "elapsed_time": "0:26:27", "remaining_time": "0:19:33", "throughput": 2791.55, "total_tokens": 4430344} | |
| {"current_steps": 1790, "total_steps": 3096, "loss": 0.0691, "lr": 1.1353451737837312e-05, "epoch": 1.7331719128329297, "percentage": 57.82, "elapsed_time": "0:26:35", "remaining_time": "0:19:24", "throughput": 2791.94, "total_tokens": 4455336} | |
| {"current_steps": 1800, "total_steps": 3096, "loss": 0.0668, "lr": 1.1205999264124788e-05, "epoch": 1.7428571428571429, "percentage": 58.14, "elapsed_time": "0:26:44", "remaining_time": "0:19:15", "throughput": 2792.36, "total_tokens": 4480648} | |
| {"current_steps": 1810, "total_steps": 3096, "loss": 0.0788, "lr": 1.105893744373776e-05, "epoch": 1.752542372881356, "percentage": 58.46, "elapsed_time": "0:26:53", "remaining_time": "0:19:06", "throughput": 2792.91, "total_tokens": 4506600} | |
| {"current_steps": 1820, "total_steps": 3096, "loss": 0.0723, "lr": 1.0912281419053139e-05, "epoch": 1.762227602905569, "percentage": 58.79, "elapsed_time": "0:27:02", "remaining_time": "0:18:57", "throughput": 2793.18, "total_tokens": 4531368} | |
| {"current_steps": 1830, "total_steps": 3096, "loss": 0.0779, "lr": 1.0766046290664662e-05, "epoch": 1.771912832929782, "percentage": 59.11, "elapsed_time": "0:27:10", "remaining_time": "0:18:48", "throughput": 2793.16, "total_tokens": 4555272} | |
| {"current_steps": 1840, "total_steps": 3096, "loss": 0.0838, "lr": 1.0620247115828044e-05, "epoch": 1.7815980629539951, "percentage": 59.43, "elapsed_time": "0:27:19", "remaining_time": "0:18:39", "throughput": 2793.47, "total_tokens": 4580328} | |
| {"current_steps": 1850, "total_steps": 3096, "loss": 0.0594, "lr": 1.047489890691055e-05, "epoch": 1.7912832929782083, "percentage": 59.75, "elapsed_time": "0:27:28", "remaining_time": "0:18:30", "throughput": 2793.82, "total_tokens": 4605768} | |
| {"current_steps": 1860, "total_steps": 3096, "loss": 0.04, "lr": 1.0330016629845276e-05, "epoch": 1.8009685230024213, "percentage": 60.08, "elapsed_time": "0:27:37", "remaining_time": "0:18:21", "throughput": 2794.15, "total_tokens": 4631048} | |
| {"current_steps": 1870, "total_steps": 3096, "loss": 0.084, "lr": 1.0185615202590144e-05, "epoch": 1.8106537530266342, "percentage": 60.4, "elapsed_time": "0:27:46", "remaining_time": "0:18:12", "throughput": 2794.46, "total_tokens": 4656456} | |
| {"current_steps": 1880, "total_steps": 3096, "loss": 0.0654, "lr": 1.004170949359187e-05, "epoch": 1.8203389830508474, "percentage": 60.72, "elapsed_time": "0:27:55", "remaining_time": "0:18:03", "throughput": 2794.65, "total_tokens": 4681384} | |
| {"current_steps": 1890, "total_steps": 3096, "loss": 0.0712, "lr": 9.89831432025501e-06, "epoch": 1.8300242130750606, "percentage": 61.05, "elapsed_time": "0:28:03", "remaining_time": "0:17:54", "throughput": 2794.74, "total_tokens": 4706216} | |
| {"current_steps": 1900, "total_steps": 3096, "loss": 0.0829, "lr": 9.755444447416255e-06, "epoch": 1.8397094430992738, "percentage": 61.37, "elapsed_time": "0:28:12", "remaining_time": "0:17:45", "throughput": 2794.72, "total_tokens": 4730984} | |
| {"current_steps": 1910, "total_steps": 3096, "loss": 0.0532, "lr": 9.613114585824196e-06, "epoch": 1.8493946731234867, "percentage": 61.69, "elapsed_time": "0:28:21", "remaining_time": "0:17:36", "throughput": 2794.46, "total_tokens": 4755112} | |
| {"current_steps": 1920, "total_steps": 3096, "loss": 0.0781, "lr": 9.471339390624574e-06, "epoch": 1.8590799031476997, "percentage": 62.02, "elapsed_time": "0:28:30", "remaining_time": "0:17:27", "throughput": 2794.27, "total_tokens": 4780232} | |
| {"current_steps": 1930, "total_steps": 3096, "loss": 0.0908, "lr": 9.330133459851323e-06, "epoch": 1.8687651331719128, "percentage": 62.34, "elapsed_time": "0:28:39", "remaining_time": "0:17:19", "throughput": 2794.0, "total_tokens": 4805192} | |
| {"current_steps": 1940, "total_steps": 3096, "loss": 0.0398, "lr": 9.189511332923463e-06, "epoch": 1.878450363196126, "percentage": 62.66, "elapsed_time": "0:28:48", "remaining_time": "0:17:10", "throughput": 2794.17, "total_tokens": 4830856} | |
| {"current_steps": 1950, "total_steps": 3096, "loss": 0.0912, "lr": 9.049487489148008e-06, "epoch": 1.888135593220339, "percentage": 62.98, "elapsed_time": "0:28:57", "remaining_time": "0:17:01", "throughput": 2794.18, "total_tokens": 4855656} | |
| {"current_steps": 1960, "total_steps": 3096, "loss": 0.0746, "lr": 8.910076346229134e-06, "epoch": 1.897820823244552, "percentage": 63.31, "elapsed_time": "0:29:06", "remaining_time": "0:16:52", "throughput": 2794.25, "total_tokens": 4880392} | |
| {"current_steps": 1970, "total_steps": 3096, "loss": 0.1066, "lr": 8.77129225878361e-06, "epoch": 1.907506053268765, "percentage": 63.63, "elapsed_time": "0:29:15", "remaining_time": "0:16:43", "throughput": 2794.4, "total_tokens": 4905320} | |
| {"current_steps": 1980, "total_steps": 3096, "loss": 0.0839, "lr": 8.633149516862777e-06, "epoch": 1.9171912832929783, "percentage": 63.95, "elapsed_time": "0:29:24", "remaining_time": "0:16:34", "throughput": 2794.72, "total_tokens": 4930536} | |
| {"current_steps": 1990, "total_steps": 3096, "loss": 0.0527, "lr": 8.495662344481135e-06, "epoch": 1.9268765133171912, "percentage": 64.28, "elapsed_time": "0:29:33", "remaining_time": "0:16:25", "throughput": 2795.04, "total_tokens": 4956168} | |
| {"current_steps": 2000, "total_steps": 3096, "loss": 0.1033, "lr": 8.358844898151791e-06, "epoch": 1.9365617433414044, "percentage": 64.6, "elapsed_time": "0:29:41", "remaining_time": "0:16:16", "throughput": 2795.26, "total_tokens": 4980584} | |
| {"current_steps": 2010, "total_steps": 3096, "loss": 0.079, "lr": 8.222711265428779e-06, "epoch": 1.9462469733656174, "percentage": 64.92, "elapsed_time": "0:29:51", "remaining_time": "0:16:08", "throughput": 2794.07, "total_tokens": 5005992} | |
| {"current_steps": 2020, "total_steps": 3096, "loss": 0.0652, "lr": 8.087275463456548e-06, "epoch": 1.9559322033898305, "percentage": 65.25, "elapsed_time": "0:30:00", "remaining_time": "0:15:59", "throughput": 2794.48, "total_tokens": 5032168} | |
| {"current_steps": 2030, "total_steps": 3096, "loss": 0.0593, "lr": 7.952551437526648e-06, "epoch": 1.9656174334140437, "percentage": 65.57, "elapsed_time": "0:30:09", "remaining_time": "0:15:50", "throughput": 2794.57, "total_tokens": 5056296} | |
| {"current_steps": 2040, "total_steps": 3096, "loss": 0.0933, "lr": 7.818553059641868e-06, "epoch": 1.9753026634382567, "percentage": 65.89, "elapsed_time": "0:30:17", "remaining_time": "0:15:41", "throughput": 2794.66, "total_tokens": 5080424} | |
| {"current_steps": 2050, "total_steps": 3096, "loss": 0.059, "lr": 7.685294127087852e-06, "epoch": 1.9849878934624696, "percentage": 66.21, "elapsed_time": "0:30:26", "remaining_time": "0:15:32", "throughput": 2794.68, "total_tokens": 5104904} | |
| {"current_steps": 2060, "total_steps": 3096, "loss": 0.0766, "lr": 7.552788361012486e-06, "epoch": 1.9946731234866828, "percentage": 66.54, "elapsed_time": "0:30:35", "remaining_time": "0:15:23", "throughput": 2794.63, "total_tokens": 5129064} | |
| {"current_steps": 2070, "total_steps": 3096, "loss": 0.0637, "lr": 7.421049405013061e-06, "epoch": 2.0038740920096854, "percentage": 66.86, "elapsed_time": "0:30:43", "remaining_time": "0:15:13", "throughput": 2794.47, "total_tokens": 5152120} | |
| {"current_steps": 2080, "total_steps": 3096, "loss": 0.0419, "lr": 7.290090823731452e-06, "epoch": 2.013559322033898, "percentage": 67.18, "elapsed_time": "0:30:52", "remaining_time": "0:15:04", "throughput": 2794.45, "total_tokens": 5176728} | |
| {"current_steps": 2090, "total_steps": 3096, "loss": 0.0586, "lr": 7.159926101457423e-06, "epoch": 2.0232445520581113, "percentage": 67.51, "elapsed_time": "0:31:01", "remaining_time": "0:14:55", "throughput": 2794.47, "total_tokens": 5201176} | |
| {"current_steps": 2100, "total_steps": 3096, "loss": 0.0382, "lr": 7.030568640740202e-06, "epoch": 2.0329297820823244, "percentage": 67.83, "elapsed_time": "0:31:09", "remaining_time": "0:14:46", "throughput": 2794.32, "total_tokens": 5225368} | |
| {"current_steps": 2110, "total_steps": 3096, "loss": 0.0597, "lr": 6.902031761008456e-06, "epoch": 2.0426150121065376, "percentage": 68.15, "elapsed_time": "0:31:18", "remaining_time": "0:14:38", "throughput": 2794.17, "total_tokens": 5250136} | |
| {"current_steps": 2120, "total_steps": 3096, "loss": 0.0367, "lr": 6.774328697198879e-06, "epoch": 2.052300242130751, "percentage": 68.48, "elapsed_time": "0:31:27", "remaining_time": "0:14:29", "throughput": 2793.88, "total_tokens": 5274264} | |
| {"current_steps": 2130, "total_steps": 3096, "loss": 0.04, "lr": 6.647472598393399e-06, "epoch": 2.0619854721549635, "percentage": 68.8, "elapsed_time": "0:31:36", "remaining_time": "0:14:20", "throughput": 2793.29, "total_tokens": 5298264} | |
| {"current_steps": 2140, "total_steps": 3096, "loss": 0.0426, "lr": 6.521476526465309e-06, "epoch": 2.0716707021791767, "percentage": 69.12, "elapsed_time": "0:31:45", "remaining_time": "0:14:11", "throughput": 2793.0, "total_tokens": 5322872} | |
| {"current_steps": 2150, "total_steps": 3096, "loss": 0.0706, "lr": 6.3963534547343126e-06, "epoch": 2.08135593220339, "percentage": 69.44, "elapsed_time": "0:31:54", "remaining_time": "0:14:02", "throughput": 2792.93, "total_tokens": 5348120} | |
| {"current_steps": 2160, "total_steps": 3096, "loss": 0.0377, "lr": 6.27211626663071e-06, "epoch": 2.091041162227603, "percentage": 69.77, "elapsed_time": "0:32:03", "remaining_time": "0:13:53", "throughput": 2792.9, "total_tokens": 5373240} | |
| {"current_steps": 2170, "total_steps": 3096, "loss": 0.0608, "lr": 6.148777754368862e-06, "epoch": 2.100726392251816, "percentage": 70.09, "elapsed_time": "0:32:12", "remaining_time": "0:13:44", "throughput": 2792.81, "total_tokens": 5398296} | |
| {"current_steps": 2180, "total_steps": 3096, "loss": 0.0334, "lr": 6.026350617630011e-06, "epoch": 2.110411622276029, "percentage": 70.41, "elapsed_time": "0:32:22", "remaining_time": "0:13:36", "throughput": 2792.9, "total_tokens": 5424408} | |
| {"current_steps": 2190, "total_steps": 3096, "loss": 0.0445, "lr": 5.904847462254646e-06, "epoch": 2.120096852300242, "percentage": 70.74, "elapsed_time": "0:32:31", "remaining_time": "0:13:27", "throughput": 2792.9, "total_tokens": 5449880} | |
| {"current_steps": 2200, "total_steps": 3096, "loss": 0.0735, "lr": 5.784280798944537e-06, "epoch": 2.1297820823244553, "percentage": 71.06, "elapsed_time": "0:32:40", "remaining_time": "0:13:18", "throughput": 2792.75, "total_tokens": 5474808} | |
| {"current_steps": 2210, "total_steps": 3096, "loss": 0.056, "lr": 5.6646630419745404e-06, "epoch": 2.1394673123486685, "percentage": 71.38, "elapsed_time": "0:32:49", "remaining_time": "0:13:09", "throughput": 2792.6, "total_tokens": 5499672} | |
| {"current_steps": 2220, "total_steps": 3096, "loss": 0.0703, "lr": 5.5460065079143694e-06, "epoch": 2.1491525423728812, "percentage": 71.71, "elapsed_time": "0:32:58", "remaining_time": "0:13:00", "throughput": 2792.24, "total_tokens": 5523672} | |
| {"current_steps": 2230, "total_steps": 3096, "loss": 0.0504, "lr": 5.428323414360401e-06, "epoch": 2.1588377723970944, "percentage": 72.03, "elapsed_time": "0:33:07", "remaining_time": "0:12:51", "throughput": 2791.86, "total_tokens": 5548664} | |
| {"current_steps": 2240, "total_steps": 3096, "loss": 0.0398, "lr": 5.311625878677658e-06, "epoch": 2.1685230024213076, "percentage": 72.35, "elapsed_time": "0:33:16", "remaining_time": "0:12:43", "throughput": 2791.58, "total_tokens": 5573944} | |
| {"current_steps": 2250, "total_steps": 3096, "loss": 0.045, "lr": 5.195925916752166e-06, "epoch": 2.1782082324455208, "percentage": 72.67, "elapsed_time": "0:33:25", "remaining_time": "0:12:34", "throughput": 2791.51, "total_tokens": 5599224} | |
| {"current_steps": 2260, "total_steps": 3096, "loss": 0.0483, "lr": 5.081235441753685e-06, "epoch": 2.1878934624697335, "percentage": 73.0, "elapsed_time": "0:33:34", "remaining_time": "0:12:25", "throughput": 2791.52, "total_tokens": 5623864} | |
| {"current_steps": 2270, "total_steps": 3096, "loss": 0.0476, "lr": 4.9675662629091055e-06, "epoch": 2.1975786924939467, "percentage": 73.32, "elapsed_time": "0:33:43", "remaining_time": "0:12:16", "throughput": 2791.6, "total_tokens": 5648760} | |
| {"current_steps": 2280, "total_steps": 3096, "loss": 0.0537, "lr": 4.854930084286458e-06, "epoch": 2.20726392251816, "percentage": 73.64, "elapsed_time": "0:33:52", "remaining_time": "0:12:07", "throughput": 2791.69, "total_tokens": 5673720} | |
| {"current_steps": 2290, "total_steps": 3096, "loss": 0.0567, "lr": 4.743338503589796e-06, "epoch": 2.216949152542373, "percentage": 73.97, "elapsed_time": "0:34:01", "remaining_time": "0:11:58", "throughput": 2791.63, "total_tokens": 5697784} | |
| {"current_steps": 2300, "total_steps": 3096, "loss": 0.0502, "lr": 4.632803010965056e-06, "epoch": 2.226634382566586, "percentage": 74.29, "elapsed_time": "0:34:09", "remaining_time": "0:11:49", "throughput": 2791.75, "total_tokens": 5722040} | |
| {"current_steps": 2310, "total_steps": 3096, "loss": 0.0444, "lr": 4.523334987816917e-06, "epoch": 2.236319612590799, "percentage": 74.61, "elapsed_time": "0:34:18", "remaining_time": "0:11:40", "throughput": 2792.0, "total_tokens": 5747672} | |
| {"current_steps": 2320, "total_steps": 3096, "loss": 0.0482, "lr": 4.414945705636949e-06, "epoch": 2.246004842615012, "percentage": 74.94, "elapsed_time": "0:34:27", "remaining_time": "0:11:31", "throughput": 2792.11, "total_tokens": 5772056} | |
| {"current_steps": 2330, "total_steps": 3096, "loss": 0.0398, "lr": 4.307646324843004e-06, "epoch": 2.2556900726392253, "percentage": 75.26, "elapsed_time": "0:34:35", "remaining_time": "0:11:22", "throughput": 2792.26, "total_tokens": 5796728} | |
| {"current_steps": 2340, "total_steps": 3096, "loss": 0.0268, "lr": 4.201447893630065e-06, "epoch": 2.2653753026634385, "percentage": 75.58, "elapsed_time": "0:34:44", "remaining_time": "0:11:13", "throughput": 2792.59, "total_tokens": 5822520} | |
| {"current_steps": 2350, "total_steps": 3096, "loss": 0.0427, "lr": 4.096361346832681e-06, "epoch": 2.275060532687651, "percentage": 75.9, "elapsed_time": "0:34:53", "remaining_time": "0:11:04", "throughput": 2792.73, "total_tokens": 5847768} | |
| {"current_steps": 2360, "total_steps": 3096, "loss": 0.0363, "lr": 3.992397504799039e-06, "epoch": 2.2847457627118644, "percentage": 76.23, "elapsed_time": "0:35:02", "remaining_time": "0:10:55", "throughput": 2792.93, "total_tokens": 5873208} | |
| {"current_steps": 2370, "total_steps": 3096, "loss": 0.0432, "lr": 3.889567072276827e-06, "epoch": 2.2944309927360775, "percentage": 76.55, "elapsed_time": "0:35:11", "remaining_time": "0:10:46", "throughput": 2792.88, "total_tokens": 5897368} | |
| {"current_steps": 2380, "total_steps": 3096, "loss": 0.0662, "lr": 3.78788063731103e-06, "epoch": 2.3041162227602907, "percentage": 76.87, "elapsed_time": "0:35:20", "remaining_time": "0:10:37", "throughput": 2792.95, "total_tokens": 5921656} | |
| {"current_steps": 2390, "total_steps": 3096, "loss": 0.0434, "lr": 3.6873486701536814e-06, "epoch": 2.3138014527845034, "percentage": 77.2, "elapsed_time": "0:35:29", "remaining_time": "0:10:28", "throughput": 2792.85, "total_tokens": 5946328} | |
| {"current_steps": 2400, "total_steps": 3096, "loss": 0.0425, "lr": 3.587981522185829e-06, "epoch": 2.3234866828087166, "percentage": 77.52, "elapsed_time": "0:35:38", "remaining_time": "0:10:20", "throughput": 2792.83, "total_tokens": 5971352} | |
| {"current_steps": 2410, "total_steps": 3096, "loss": 0.0533, "lr": 3.4897894248516736e-06, "epoch": 2.33317191283293, "percentage": 77.84, "elapsed_time": "0:35:47", "remaining_time": "0:10:11", "throughput": 2792.46, "total_tokens": 5995544} | |
| {"current_steps": 2420, "total_steps": 3096, "loss": 0.0499, "lr": 3.3927824886050555e-06, "epoch": 2.342857142857143, "percentage": 78.17, "elapsed_time": "0:35:56", "remaining_time": "0:10:02", "throughput": 2792.23, "total_tokens": 6020600} | |
| {"current_steps": 2430, "total_steps": 3096, "loss": 0.021, "lr": 3.2969707018684657e-06, "epoch": 2.3525423728813557, "percentage": 78.49, "elapsed_time": "0:36:05", "remaining_time": "0:09:53", "throughput": 2792.02, "total_tokens": 6045304} | |
| {"current_steps": 2440, "total_steps": 3096, "loss": 0.0216, "lr": 3.202363930004536e-06, "epoch": 2.362227602905569, "percentage": 78.81, "elapsed_time": "0:36:14", "remaining_time": "0:09:44", "throughput": 2792.19, "total_tokens": 6070776} | |
| {"current_steps": 2450, "total_steps": 3096, "loss": 0.0431, "lr": 3.1089719143002615e-06, "epoch": 2.371912832929782, "percentage": 79.13, "elapsed_time": "0:36:22", "remaining_time": "0:09:35", "throughput": 2792.23, "total_tokens": 6095256} | |
| {"current_steps": 2460, "total_steps": 3096, "loss": 0.0515, "lr": 3.016804270963994e-06, "epoch": 2.3815980629539952, "percentage": 79.46, "elapsed_time": "0:36:31", "remaining_time": "0:09:26", "throughput": 2792.42, "total_tokens": 6120088} | |
| {"current_steps": 2470, "total_steps": 3096, "loss": 0.0349, "lr": 2.925870490135255e-06, "epoch": 2.3912832929782084, "percentage": 79.78, "elapsed_time": "0:36:40", "remaining_time": "0:09:17", "throughput": 2792.56, "total_tokens": 6144792} | |
| {"current_steps": 2480, "total_steps": 3096, "loss": 0.0251, "lr": 2.8361799349076143e-06, "epoch": 2.400968523002421, "percentage": 80.1, "elapsed_time": "0:36:49", "remaining_time": "0:09:08", "throughput": 2792.73, "total_tokens": 6169688} | |
| {"current_steps": 2490, "total_steps": 3096, "loss": 0.0634, "lr": 2.747741840364593e-06, "epoch": 2.4106537530266343, "percentage": 80.43, "elapsed_time": "0:36:58", "remaining_time": "0:08:59", "throughput": 2792.87, "total_tokens": 6194680} | |
| {"current_steps": 2500, "total_steps": 3096, "loss": 0.0451, "lr": 2.6605653126287555e-06, "epoch": 2.4203389830508475, "percentage": 80.75, "elapsed_time": "0:37:06", "remaining_time": "0:08:50", "throughput": 2792.92, "total_tokens": 6218712} | |
| {"current_steps": 2510, "total_steps": 3096, "loss": 0.0395, "lr": 2.5746593279241105e-06, "epoch": 2.4300242130750607, "percentage": 81.07, "elapsed_time": "0:37:15", "remaining_time": "0:08:41", "throughput": 2793.03, "total_tokens": 6243384} | |
| {"current_steps": 2520, "total_steps": 3096, "loss": 0.0537, "lr": 2.490032731651833e-06, "epoch": 2.4397094430992734, "percentage": 81.4, "elapsed_time": "0:37:23", "remaining_time": "0:08:32", "throughput": 2793.03, "total_tokens": 6267416} | |
| {"current_steps": 2530, "total_steps": 3096, "loss": 0.0402, "lr": 2.4066942374795205e-06, "epoch": 2.4493946731234866, "percentage": 81.72, "elapsed_time": "0:37:32", "remaining_time": "0:08:23", "throughput": 2793.22, "total_tokens": 6292696} | |
| {"current_steps": 2540, "total_steps": 3096, "loss": 0.0295, "lr": 2.324652426443962e-06, "epoch": 2.4590799031476998, "percentage": 82.04, "elapsed_time": "0:37:41", "remaining_time": "0:08:15", "throughput": 2793.27, "total_tokens": 6317208} | |
| {"current_steps": 2550, "total_steps": 3096, "loss": 0.0515, "lr": 2.243915746067587e-06, "epoch": 2.468765133171913, "percentage": 82.36, "elapsed_time": "0:37:50", "remaining_time": "0:08:06", "throughput": 2793.34, "total_tokens": 6341688} | |
| {"current_steps": 2560, "total_steps": 3096, "loss": 0.0443, "lr": 2.164492509488657e-06, "epoch": 2.478450363196126, "percentage": 82.69, "elapsed_time": "0:37:59", "remaining_time": "0:07:57", "throughput": 2793.53, "total_tokens": 6366712} | |
| {"current_steps": 2570, "total_steps": 3096, "loss": 0.0555, "lr": 2.086390894605288e-06, "epoch": 2.488135593220339, "percentage": 83.01, "elapsed_time": "0:38:07", "remaining_time": "0:07:48", "throughput": 2793.56, "total_tokens": 6391256} | |
| {"current_steps": 2580, "total_steps": 3096, "loss": 0.054, "lr": 2.0096189432334194e-06, "epoch": 2.497820823244552, "percentage": 83.33, "elapsed_time": "0:38:16", "remaining_time": "0:07:39", "throughput": 2793.6, "total_tokens": 6416184} | |
| {"current_steps": 2590, "total_steps": 3096, "loss": 0.075, "lr": 1.9341845602787733e-06, "epoch": 2.507506053268765, "percentage": 83.66, "elapsed_time": "0:38:25", "remaining_time": "0:07:30", "throughput": 2793.74, "total_tokens": 6441176} | |
| {"current_steps": 2600, "total_steps": 3096, "loss": 0.0384, "lr": 1.8600955129229009e-06, "epoch": 2.5171912832929784, "percentage": 83.98, "elapsed_time": "0:38:34", "remaining_time": "0:07:21", "throughput": 2793.75, "total_tokens": 6465688} | |
| {"current_steps": 2610, "total_steps": 3096, "loss": 0.038, "lr": 1.7873594298234557e-06, "epoch": 2.526876513317191, "percentage": 84.3, "elapsed_time": "0:38:43", "remaining_time": "0:07:12", "throughput": 2793.66, "total_tokens": 6490456} | |
| {"current_steps": 2620, "total_steps": 3096, "loss": 0.0233, "lr": 1.7159838003286848e-06, "epoch": 2.5365617433414043, "percentage": 84.63, "elapsed_time": "0:38:52", "remaining_time": "0:07:03", "throughput": 2793.54, "total_tokens": 6515704} | |
| {"current_steps": 2630, "total_steps": 3096, "loss": 0.0634, "lr": 1.645975973706269e-06, "epoch": 2.5462469733656174, "percentage": 84.95, "elapsed_time": "0:39:01", "remaining_time": "0:06:54", "throughput": 2793.29, "total_tokens": 6540920} | |
| {"current_steps": 2640, "total_steps": 3096, "loss": 0.0333, "lr": 1.5773431583866227e-06, "epoch": 2.5559322033898306, "percentage": 85.27, "elapsed_time": "0:39:10", "remaining_time": "0:06:46", "throughput": 2793.17, "total_tokens": 6565880} | |
| {"current_steps": 2650, "total_steps": 3096, "loss": 0.0649, "lr": 1.5100924212206534e-06, "epoch": 2.565617433414044, "percentage": 85.59, "elapsed_time": "0:39:19", "remaining_time": "0:06:37", "throughput": 2793.15, "total_tokens": 6591000} | |
| {"current_steps": 2660, "total_steps": 3096, "loss": 0.0531, "lr": 1.44423068675212e-06, "epoch": 2.5753026634382565, "percentage": 85.92, "elapsed_time": "0:39:28", "remaining_time": "0:06:28", "throughput": 2793.16, "total_tokens": 6615800} | |
| {"current_steps": 2670, "total_steps": 3096, "loss": 0.0426, "lr": 1.3797647365046411e-06, "epoch": 2.5849878934624697, "percentage": 86.24, "elapsed_time": "0:39:37", "remaining_time": "0:06:19", "throughput": 2792.98, "total_tokens": 6639288} | |
| {"current_steps": 2680, "total_steps": 3096, "loss": 0.0368, "lr": 1.3167012082834212e-06, "epoch": 2.594673123486683, "percentage": 86.56, "elapsed_time": "0:39:46", "remaining_time": "0:06:10", "throughput": 2793.09, "total_tokens": 6664632} | |
| {"current_steps": 2690, "total_steps": 3096, "loss": 0.0165, "lr": 1.2550465954917932e-06, "epoch": 2.6043583535108956, "percentage": 86.89, "elapsed_time": "0:39:54", "remaining_time": "0:06:01", "throughput": 2793.13, "total_tokens": 6689496} | |
| {"current_steps": 2700, "total_steps": 3096, "loss": 0.0331, "lr": 1.1948072464626102e-06, "epoch": 2.614043583535109, "percentage": 87.21, "elapsed_time": "0:40:03", "remaining_time": "0:05:52", "throughput": 2793.11, "total_tokens": 6714552} | |
| {"current_steps": 2710, "total_steps": 3096, "loss": 0.0226, "lr": 1.1359893638045854e-06, "epoch": 2.623728813559322, "percentage": 87.53, "elapsed_time": "0:40:12", "remaining_time": "0:05:43", "throughput": 2793.13, "total_tokens": 6739320} | |
| {"current_steps": 2720, "total_steps": 3096, "loss": 0.0611, "lr": 1.0785990037636335e-06, "epoch": 2.633414043583535, "percentage": 87.86, "elapsed_time": "0:40:21", "remaining_time": "0:05:34", "throughput": 2792.95, "total_tokens": 6763352} | |
| {"current_steps": 2730, "total_steps": 3096, "loss": 0.0615, "lr": 1.022642075599286e-06, "epoch": 2.6430992736077483, "percentage": 88.18, "elapsed_time": "0:40:30", "remaining_time": "0:05:25", "throughput": 2792.74, "total_tokens": 6787544} | |
| {"current_steps": 2740, "total_steps": 3096, "loss": 0.0393, "lr": 9.68124340976232e-07, "epoch": 2.6527845036319615, "percentage": 88.5, "elapsed_time": "0:40:39", "remaining_time": "0:05:16", "throughput": 2792.63, "total_tokens": 6812760} | |
| {"current_steps": 2750, "total_steps": 3096, "loss": 0.0656, "lr": 9.150514133710647e-07, "epoch": 2.6624697336561742, "percentage": 88.82, "elapsed_time": "0:40:48", "remaining_time": "0:05:08", "throughput": 2792.42, "total_tokens": 6838008} | |
| {"current_steps": 2760, "total_steps": 3096, "loss": 0.0452, "lr": 8.634287574942834e-07, "epoch": 2.6721549636803874, "percentage": 89.15, "elapsed_time": "0:40:57", "remaining_time": "0:04:59", "throughput": 2792.37, "total_tokens": 6863320} | |
| {"current_steps": 2770, "total_steps": 3096, "loss": 0.0404, "lr": 8.132616887276212e-07, "epoch": 2.6818401937046006, "percentage": 89.47, "elapsed_time": "0:41:06", "remaining_time": "0:04:50", "throughput": 2792.41, "total_tokens": 6888824} | |
| {"current_steps": 2780, "total_steps": 3096, "loss": 0.0543, "lr": 7.645553725767229e-07, "epoch": 2.6915254237288133, "percentage": 89.79, "elapsed_time": "0:41:15", "remaining_time": "0:04:41", "throughput": 2792.42, "total_tokens": 6913048} | |
| {"current_steps": 2790, "total_steps": 3096, "loss": 0.0459, "lr": 7.173148241392957e-07, "epoch": 2.7012106537530265, "percentage": 90.12, "elapsed_time": "0:41:24", "remaining_time": "0:04:32", "throughput": 2792.46, "total_tokens": 6937432} | |
| {"current_steps": 2800, "total_steps": 3096, "loss": 0.0386, "lr": 6.71544907588712e-07, "epoch": 2.7108958837772397, "percentage": 90.44, "elapsed_time": "0:41:33", "remaining_time": "0:04:23", "throughput": 2792.61, "total_tokens": 6962584} | |
| {"current_steps": 2810, "total_steps": 3096, "loss": 0.0714, "lr": 6.272503356731601e-07, "epoch": 2.720581113801453, "percentage": 90.76, "elapsed_time": "0:41:42", "remaining_time": "0:04:14", "throughput": 2792.72, "total_tokens": 6987768} | |
| {"current_steps": 2820, "total_steps": 3096, "loss": 0.0364, "lr": 5.84435669230401e-07, "epoch": 2.730266343825666, "percentage": 91.09, "elapsed_time": "0:41:51", "remaining_time": "0:04:05", "throughput": 2792.9, "total_tokens": 7013336} | |
| {"current_steps": 2830, "total_steps": 3096, "loss": 0.0346, "lr": 5.431053167181515e-07, "epoch": 2.739951573849879, "percentage": 91.41, "elapsed_time": "0:42:00", "remaining_time": "0:03:56", "throughput": 2793.04, "total_tokens": 7038648} | |
| {"current_steps": 2840, "total_steps": 3096, "loss": 0.0337, "lr": 5.032635337601687e-07, "epoch": 2.749636803874092, "percentage": 91.73, "elapsed_time": "0:42:09", "remaining_time": "0:03:47", "throughput": 2793.19, "total_tokens": 7064184} | |
| {"current_steps": 2850, "total_steps": 3096, "loss": 0.0229, "lr": 4.6491442270805596e-07, "epoch": 2.759322033898305, "percentage": 92.05, "elapsed_time": "0:42:18", "remaining_time": "0:03:39", "throughput": 2793.25, "total_tokens": 7089336} | |
| {"current_steps": 2860, "total_steps": 3096, "loss": 0.0472, "lr": 4.280619322188628e-07, "epoch": 2.7690072639225183, "percentage": 92.38, "elapsed_time": "0:42:26", "remaining_time": "0:03:30", "throughput": 2793.23, "total_tokens": 7114072} | |
| {"current_steps": 2870, "total_steps": 3096, "loss": 0.0498, "lr": 3.9270985684851545e-07, "epoch": 2.778692493946731, "percentage": 92.7, "elapsed_time": "0:42:38", "remaining_time": "0:03:21", "throughput": 2790.0, "total_tokens": 7139576} | |
| {"current_steps": 2880, "total_steps": 3096, "loss": 0.0442, "lr": 3.588618366610941e-07, "epoch": 2.788377723970944, "percentage": 93.02, "elapsed_time": "0:42:48", "remaining_time": "0:03:12", "throughput": 2790.16, "total_tokens": 7165432} | |
| {"current_steps": 2890, "total_steps": 3096, "loss": 0.0324, "lr": 3.2652135685403593e-07, "epoch": 2.7980629539951574, "percentage": 93.35, "elapsed_time": "0:42:57", "remaining_time": "0:03:03", "throughput": 2790.24, "total_tokens": 7190808} | |
| {"current_steps": 2900, "total_steps": 3096, "loss": 0.0497, "lr": 2.9569174739928096e-07, "epoch": 2.8077481840193705, "percentage": 93.67, "elapsed_time": "0:43:06", "remaining_time": "0:02:54", "throughput": 2790.31, "total_tokens": 7216440} | |
| {"current_steps": 2910, "total_steps": 3096, "loss": 0.0404, "lr": 2.663761827003941e-07, "epoch": 2.8174334140435837, "percentage": 93.99, "elapsed_time": "0:43:15", "remaining_time": "0:02:45", "throughput": 2790.45, "total_tokens": 7243480} | |
| {"current_steps": 2920, "total_steps": 3096, "loss": 0.0479, "lr": 2.38577681265707e-07, "epoch": 2.8271186440677964, "percentage": 94.32, "elapsed_time": "0:43:25", "remaining_time": "0:02:37", "throughput": 2790.24, "total_tokens": 7268568} | |
| {"current_steps": 2930, "total_steps": 3096, "loss": 0.0378, "lr": 2.122991053975215e-07, "epoch": 2.8368038740920096, "percentage": 94.64, "elapsed_time": "0:43:34", "remaining_time": "0:02:28", "throughput": 2790.04, "total_tokens": 7293784} | |
| {"current_steps": 2940, "total_steps": 3096, "loss": 0.0328, "lr": 1.8754316089737878e-07, "epoch": 2.846489104116223, "percentage": 94.96, "elapsed_time": "0:43:43", "remaining_time": "0:02:19", "throughput": 2789.98, "total_tokens": 7318680} | |
| {"current_steps": 2950, "total_steps": 3096, "loss": 0.0411, "lr": 1.6431239678746546e-07, "epoch": 2.856174334140436, "percentage": 95.28, "elapsed_time": "0:43:52", "remaining_time": "0:02:10", "throughput": 2789.95, "total_tokens": 7343864} | |
| {"current_steps": 2960, "total_steps": 3096, "loss": 0.0649, "lr": 1.4260920504814366e-07, "epoch": 2.8658595641646487, "percentage": 95.61, "elapsed_time": "0:44:01", "remaining_time": "0:02:01", "throughput": 2790.09, "total_tokens": 7370232} | |
| {"current_steps": 2970, "total_steps": 3096, "loss": 0.0462, "lr": 1.22435820371658e-07, "epoch": 2.875544794188862, "percentage": 95.93, "elapsed_time": "0:44:10", "remaining_time": "0:01:52", "throughput": 2789.96, "total_tokens": 7394936} | |
| {"current_steps": 2980, "total_steps": 3096, "loss": 0.0425, "lr": 1.0379431993204458e-07, "epoch": 2.885230024213075, "percentage": 96.25, "elapsed_time": "0:44:19", "remaining_time": "0:01:43", "throughput": 2789.87, "total_tokens": 7420088} | |
| {"current_steps": 2990, "total_steps": 3096, "loss": 0.0418, "lr": 8.668662317124043e-08, "epoch": 2.8949152542372882, "percentage": 96.58, "elapsed_time": "0:44:28", "remaining_time": "0:01:34", "throughput": 2789.75, "total_tokens": 7445048} | |
| {"current_steps": 3000, "total_steps": 3096, "loss": 0.022, "lr": 7.111449160146333e-08, "epoch": 2.9046004842615014, "percentage": 96.9, "elapsed_time": "0:44:37", "remaining_time": "0:01:25", "throughput": 2789.46, "total_tokens": 7469144} | |
| {"current_steps": 3010, "total_steps": 3096, "loss": 0.0529, "lr": 5.7079528623816824e-08, "epoch": 2.914285714285714, "percentage": 97.22, "elapsed_time": "0:44:47", "remaining_time": "0:01:16", "throughput": 2788.47, "total_tokens": 7493528} | |
| {"current_steps": 3020, "total_steps": 3096, "loss": 0.0335, "lr": 4.4583179363210656e-08, "epoch": 2.9239709443099273, "percentage": 97.55, "elapsed_time": "0:44:56", "remaining_time": "0:01:07", "throughput": 2788.27, "total_tokens": 7517560} | |
| {"current_steps": 3030, "total_steps": 3096, "loss": 0.0338, "lr": 3.3626730519551455e-08, "epoch": 2.9336561743341405, "percentage": 97.87, "elapsed_time": "0:45:05", "remaining_time": "0:00:58", "throughput": 2788.26, "total_tokens": 7542552} | |
| {"current_steps": 3040, "total_steps": 3096, "loss": 0.0403, "lr": 2.4211310235258687e-08, "epoch": 2.9433414043583537, "percentage": 98.19, "elapsed_time": "0:45:13", "remaining_time": "0:00:49", "throughput": 2788.32, "total_tokens": 7566968} | |
| {"current_steps": 3050, "total_steps": 3096, "loss": 0.0259, "lr": 1.633788797910929e-08, "epoch": 2.9530266343825664, "percentage": 98.51, "elapsed_time": "0:45:22", "remaining_time": "0:00:41", "throughput": 2788.36, "total_tokens": 7591672} | |
| {"current_steps": 3060, "total_steps": 3096, "loss": 0.0392, "lr": 1.0007274446409143e-08, "epoch": 2.9627118644067796, "percentage": 98.84, "elapsed_time": "0:45:31", "remaining_time": "0:00:32", "throughput": 2788.41, "total_tokens": 7616536} | |
| {"current_steps": 3070, "total_steps": 3096, "loss": 0.0487, "lr": 5.220121475519868e-09, "epoch": 2.9723970944309928, "percentage": 99.16, "elapsed_time": "0:45:40", "remaining_time": "0:00:23", "throughput": 2788.47, "total_tokens": 7640824} | |
| {"current_steps": 3080, "total_steps": 3096, "loss": 0.056, "lr": 1.976921980745838e-09, "epoch": 2.982082324455206, "percentage": 99.48, "elapsed_time": "0:45:49", "remaining_time": "0:00:14", "throughput": 2788.7, "total_tokens": 7666328} | |
| {"current_steps": 3090, "total_steps": 3096, "loss": 0.0201, "lr": 2.780099015747828e-10, "epoch": 2.991767554479419, "percentage": 99.81, "elapsed_time": "0:45:57", "remaining_time": "0:00:05", "throughput": 2788.8, "total_tokens": 7691224} | |
| {"current_steps": 3096, "total_steps": 3096, "epoch": 2.9975786924939465, "percentage": 100.0, "elapsed_time": "0:46:03", "remaining_time": "0:00:00", "throughput": 2788.21, "total_tokens": 7706072} | |