KYCOCR / trainer_log.jsonl
HiteshKamwal's picture
Upload 16 files
dcd3f6e verified
{"current_steps": 5, "total_steps": 996, "loss": 2.4707, "lr": 4.9996890990217804e-05, "epoch": 0.015048908954100828, "percentage": 0.5, "elapsed_time": "0:00:54", "remaining_time": "2:59:14", "throughput": 108.07, "total_tokens": 5864}
{"current_steps": 10, "total_steps": 996, "loss": 2.2509, "lr": 4.9987564734146566e-05, "epoch": 0.030097817908201655, "percentage": 1.0, "elapsed_time": "0:01:47", "remaining_time": "2:57:04", "throughput": 106.09, "total_tokens": 11432}
{"current_steps": 15, "total_steps": 996, "loss": 1.6895, "lr": 4.997202355141999e-05, "epoch": 0.045146726862302484, "percentage": 1.51, "elapsed_time": "0:02:40", "remaining_time": "2:54:28", "throughput": 106.2, "total_tokens": 17000}
{"current_steps": 20, "total_steps": 996, "loss": 1.4876, "lr": 4.995027130745321e-05, "epoch": 0.06019563581640331, "percentage": 2.01, "elapsed_time": "0:03:33", "remaining_time": "2:53:25", "throughput": 107.12, "total_tokens": 22840}
{"current_steps": 25, "total_steps": 996, "loss": 1.4812, "lr": 4.992231341248137e-05, "epoch": 0.07524454477050414, "percentage": 2.51, "elapsed_time": "0:04:27", "remaining_time": "2:53:07", "throughput": 108.37, "total_tokens": 28984}
{"current_steps": 30, "total_steps": 996, "loss": 1.3642, "lr": 4.9888156820213974e-05, "epoch": 0.09029345372460497, "percentage": 3.01, "elapsed_time": "0:05:20", "remaining_time": "2:51:47", "throughput": 108.89, "total_tokens": 34856}
{"current_steps": 35, "total_steps": 996, "loss": 1.3651, "lr": 4.9847810026105394e-05, "epoch": 0.1053423626787058, "percentage": 3.51, "elapsed_time": "0:06:15", "remaining_time": "2:51:37", "throughput": 109.89, "total_tokens": 41216}
{"current_steps": 40, "total_steps": 996, "loss": 1.1321, "lr": 4.980128306524183e-05, "epoch": 0.12039127163280662, "percentage": 4.02, "elapsed_time": "0:07:09", "remaining_time": "2:50:57", "throughput": 110.22, "total_tokens": 47304}
{"current_steps": 45, "total_steps": 996, "loss": 1.3012, "lr": 4.97485875098454e-05, "epoch": 0.13544018058690746, "percentage": 4.52, "elapsed_time": "0:08:02", "remaining_time": "2:49:52", "throughput": 110.28, "total_tokens": 53184}
{"current_steps": 50, "total_steps": 996, "loss": 0.9827, "lr": 4.968973646639589e-05, "epoch": 0.1504890895410083, "percentage": 5.02, "elapsed_time": "0:08:55", "remaining_time": "2:48:50", "throughput": 110.24, "total_tokens": 59024}
{"current_steps": 55, "total_steps": 996, "loss": 1.2313, "lr": 4.9624744572370865e-05, "epoch": 0.1655379984951091, "percentage": 5.52, "elapsed_time": "0:09:49", "remaining_time": "2:47:59", "throughput": 110.02, "total_tokens": 64816}
{"current_steps": 60, "total_steps": 996, "loss": 1.0347, "lr": 4.9553627992605066e-05, "epoch": 0.18058690744920994, "percentage": 6.02, "elapsed_time": "0:10:43", "remaining_time": "2:47:18", "throughput": 110.1, "total_tokens": 70848}
{"current_steps": 65, "total_steps": 996, "loss": 1.0422, "lr": 4.947640441526989e-05, "epoch": 0.19563581640331076, "percentage": 6.53, "elapsed_time": "0:11:37", "remaining_time": "2:46:23", "throughput": 110.31, "total_tokens": 76888}
{"current_steps": 70, "total_steps": 996, "loss": 0.9996, "lr": 4.939309304747391e-05, "epoch": 0.2106847253574116, "percentage": 7.03, "elapsed_time": "0:12:30", "remaining_time": "2:45:29", "throughput": 110.36, "total_tokens": 82840}
{"current_steps": 75, "total_steps": 996, "loss": 1.0755, "lr": 4.930371461048571e-05, "epoch": 0.22573363431151242, "percentage": 7.53, "elapsed_time": "0:13:24", "remaining_time": "2:44:38", "throughput": 110.41, "total_tokens": 88824}
{"current_steps": 80, "total_steps": 996, "loss": 1.026, "lr": 4.9208291334580104e-05, "epoch": 0.24078254326561324, "percentage": 8.03, "elapsed_time": "0:14:15", "remaining_time": "2:43:18", "throughput": 110.15, "total_tokens": 94264}
{"current_steps": 85, "total_steps": 996, "loss": 1.1307, "lr": 4.910684695350895e-05, "epoch": 0.2558314522197141, "percentage": 8.53, "elapsed_time": "0:15:07", "remaining_time": "2:42:09", "throughput": 110.04, "total_tokens": 99896}
{"current_steps": 90, "total_steps": 996, "loss": 1.0221, "lr": 4.8999406698598074e-05, "epoch": 0.2708803611738149, "percentage": 9.04, "elapsed_time": "0:16:00", "remaining_time": "2:41:13", "throughput": 109.93, "total_tokens": 105640}
{"current_steps": 95, "total_steps": 996, "loss": 1.012, "lr": 4.8885997292471774e-05, "epoch": 0.28592927012791575, "percentage": 9.54, "elapsed_time": "0:16:52", "remaining_time": "2:39:58", "throughput": 109.96, "total_tokens": 111280}
{"current_steps": 100, "total_steps": 996, "loss": 1.0151, "lr": 4.87666469424063e-05, "epoch": 0.3009781790820166, "percentage": 10.04, "elapsed_time": "0:17:42", "remaining_time": "2:38:41", "throughput": 109.77, "total_tokens": 116640}
{"current_steps": 105, "total_steps": 996, "loss": 1.0028, "lr": 4.86413853333141e-05, "epoch": 0.3160270880361174, "percentage": 10.54, "elapsed_time": "0:18:34", "remaining_time": "2:37:40", "throughput": 109.3, "total_tokens": 121864}
{"current_steps": 110, "total_steps": 996, "loss": 1.143, "lr": 4.851024362036064e-05, "epoch": 0.3310759969902182, "percentage": 11.04, "elapsed_time": "0:19:26", "remaining_time": "2:36:35", "throughput": 109.2, "total_tokens": 127384}
{"current_steps": 115, "total_steps": 996, "loss": 0.9695, "lr": 4.837325442121538e-05, "epoch": 0.34612490594431905, "percentage": 11.55, "elapsed_time": "0:20:18", "remaining_time": "2:35:32", "throughput": 109.18, "total_tokens": 133008}
{"current_steps": 120, "total_steps": 996, "loss": 0.9017, "lr": 4.8230451807939135e-05, "epoch": 0.3611738148984199, "percentage": 12.05, "elapsed_time": "0:21:12", "remaining_time": "2:34:49", "throughput": 109.34, "total_tokens": 139144}
{"current_steps": 125, "total_steps": 996, "loss": 1.035, "lr": 4.808187129850963e-05, "epoch": 0.3762227238525207, "percentage": 12.55, "elapsed_time": "0:22:05", "remaining_time": "2:33:55", "throughput": 109.29, "total_tokens": 144848}
{"current_steps": 130, "total_steps": 996, "loss": 1.0128, "lr": 4.792754984798745e-05, "epoch": 0.3912716328066215, "percentage": 13.05, "elapsed_time": "0:22:57", "remaining_time": "2:32:55", "throughput": 109.25, "total_tokens": 150480}
{"current_steps": 135, "total_steps": 996, "loss": 0.9432, "lr": 4.776752583932454e-05, "epoch": 0.40632054176072235, "percentage": 13.55, "elapsed_time": "0:23:50", "remaining_time": "2:32:00", "throughput": 109.32, "total_tokens": 156336}
{"current_steps": 140, "total_steps": 996, "loss": 1.0344, "lr": 4.760183907381757e-05, "epoch": 0.4213694507148232, "percentage": 14.06, "elapsed_time": "0:24:43", "remaining_time": "2:31:10", "throughput": 109.5, "total_tokens": 162440}
{"current_steps": 145, "total_steps": 996, "loss": 0.9452, "lr": 4.7430530761208494e-05, "epoch": 0.436418359668924, "percentage": 14.56, "elapsed_time": "0:25:36", "remaining_time": "2:30:16", "throughput": 109.55, "total_tokens": 168304}
{"current_steps": 150, "total_steps": 996, "loss": 0.9559, "lr": 4.725364350943492e-05, "epoch": 0.45146726862302483, "percentage": 15.06, "elapsed_time": "0:26:28", "remaining_time": "2:29:18", "throughput": 109.53, "total_tokens": 173984}
{"current_steps": 155, "total_steps": 996, "loss": 0.9726, "lr": 4.707122131403251e-05, "epoch": 0.46651617757712566, "percentage": 15.56, "elapsed_time": "0:27:21", "remaining_time": "2:28:25", "throughput": 109.6, "total_tokens": 179896}
{"current_steps": 160, "total_steps": 996, "loss": 0.9344, "lr": 4.6883309547192476e-05, "epoch": 0.4815650865312265, "percentage": 16.06, "elapsed_time": "0:28:12", "remaining_time": "2:27:23", "throughput": 109.48, "total_tokens": 185296}
{"current_steps": 165, "total_steps": 996, "loss": 0.9497, "lr": 4.668995494647653e-05, "epoch": 0.4966139954853273, "percentage": 16.57, "elapsed_time": "0:29:05", "remaining_time": "2:26:29", "throughput": 109.4, "total_tokens": 190928}
{"current_steps": 170, "total_steps": 996, "loss": 1.057, "lr": 4.649120560319225e-05, "epoch": 0.5116629044394282, "percentage": 17.07, "elapsed_time": "0:30:00", "remaining_time": "2:25:49", "throughput": 109.59, "total_tokens": 197352}
{"current_steps": 175, "total_steps": 996, "loss": 0.9847, "lr": 4.6287110950431865e-05, "epoch": 0.526711813393529, "percentage": 17.57, "elapsed_time": "0:30:53", "remaining_time": "2:24:54", "throughput": 109.65, "total_tokens": 203216}
{"current_steps": 180, "total_steps": 996, "loss": 1.001, "lr": 4.607772175077711e-05, "epoch": 0.5417607223476298, "percentage": 18.07, "elapsed_time": "0:31:44", "remaining_time": "2:23:55", "throughput": 109.52, "total_tokens": 208624}
{"current_steps": 185, "total_steps": 996, "loss": 0.9384, "lr": 4.586309008367359e-05, "epoch": 0.5568096313017307, "percentage": 18.57, "elapsed_time": "0:32:38", "remaining_time": "2:23:04", "throughput": 109.56, "total_tokens": 214552}
{"current_steps": 190, "total_steps": 996, "loss": 1.0312, "lr": 4.564326933247752e-05, "epoch": 0.5718585402558315, "percentage": 19.08, "elapsed_time": "0:33:32", "remaining_time": "2:22:16", "throughput": 109.68, "total_tokens": 220704}
{"current_steps": 195, "total_steps": 996, "loss": 0.9112, "lr": 4.541831417117815e-05, "epoch": 0.5869074492099323, "percentage": 19.58, "elapsed_time": "0:34:24", "remaining_time": "2:21:20", "throughput": 109.7, "total_tokens": 226480}
{"current_steps": 200, "total_steps": 996, "loss": 0.9967, "lr": 4.518828055079925e-05, "epoch": 0.6019563581640331, "percentage": 20.08, "elapsed_time": "0:35:16", "remaining_time": "2:20:25", "throughput": 109.66, "total_tokens": 232136}
{"current_steps": 205, "total_steps": 996, "loss": 1.0905, "lr": 4.4953225685482904e-05, "epoch": 0.617005267118134, "percentage": 20.58, "elapsed_time": "0:36:11", "remaining_time": "2:19:38", "throughput": 109.64, "total_tokens": 238072}
{"current_steps": 210, "total_steps": 996, "loss": 0.9487, "lr": 4.471320803825915e-05, "epoch": 0.6320541760722348, "percentage": 21.08, "elapsed_time": "0:37:04", "remaining_time": "2:18:44", "throughput": 109.56, "total_tokens": 243680}
{"current_steps": 215, "total_steps": 996, "loss": 0.8675, "lr": 4.4468287306505045e-05, "epoch": 0.6471030850263356, "percentage": 21.59, "elapsed_time": "0:37:56", "remaining_time": "2:17:48", "throughput": 109.56, "total_tokens": 249376}
{"current_steps": 220, "total_steps": 996, "loss": 0.8624, "lr": 4.421852440709666e-05, "epoch": 0.6621519939804364, "percentage": 22.09, "elapsed_time": "0:38:48", "remaining_time": "2:16:54", "throughput": 109.61, "total_tokens": 255288}
{"current_steps": 225, "total_steps": 996, "loss": 1.0489, "lr": 4.39639814612578e-05, "epoch": 0.6772009029345373, "percentage": 22.59, "elapsed_time": "0:39:43", "remaining_time": "2:16:08", "throughput": 109.74, "total_tokens": 261592}
{"current_steps": 230, "total_steps": 996, "loss": 0.9139, "lr": 4.370472177910914e-05, "epoch": 0.6922498118886381, "percentage": 23.09, "elapsed_time": "0:40:36", "remaining_time": "2:15:13", "throughput": 109.68, "total_tokens": 267192}
{"current_steps": 235, "total_steps": 996, "loss": 0.9905, "lr": 4.3440809843921725e-05, "epoch": 0.7072987208427389, "percentage": 23.59, "elapsed_time": "0:41:27", "remaining_time": "2:14:16", "throughput": 109.62, "total_tokens": 272712}
{"current_steps": 240, "total_steps": 996, "loss": 0.8974, "lr": 4.3172311296078595e-05, "epoch": 0.7223476297968398, "percentage": 24.1, "elapsed_time": "0:42:21", "remaining_time": "2:13:25", "throughput": 109.66, "total_tokens": 278720}
{"current_steps": 245, "total_steps": 996, "loss": 0.999, "lr": 4.28992929167487e-05, "epoch": 0.7373965387509406, "percentage": 24.6, "elapsed_time": "0:43:14", "remaining_time": "2:12:33", "throughput": 109.68, "total_tokens": 284584}
{"current_steps": 250, "total_steps": 996, "loss": 0.9916, "lr": 4.2621822611277e-05, "epoch": 0.7524454477050414, "percentage": 25.1, "elapsed_time": "0:44:08", "remaining_time": "2:11:42", "throughput": 109.66, "total_tokens": 290408}
{"current_steps": 255, "total_steps": 996, "loss": 0.9242, "lr": 4.233996939229502e-05, "epoch": 0.7674943566591422, "percentage": 25.6, "elapsed_time": "0:45:00", "remaining_time": "2:10:46", "throughput": 109.54, "total_tokens": 295776}
{"current_steps": 260, "total_steps": 996, "loss": 1.0426, "lr": 4.205380336255594e-05, "epoch": 0.782543265613243, "percentage": 26.1, "elapsed_time": "0:45:54", "remaining_time": "2:09:55", "throughput": 109.56, "total_tokens": 301736}
{"current_steps": 265, "total_steps": 996, "loss": 0.8625, "lr": 4.176339569749865e-05, "epoch": 0.7975921745673439, "percentage": 26.61, "elapsed_time": "0:46:45", "remaining_time": "2:08:59", "throughput": 109.5, "total_tokens": 307224}
{"current_steps": 270, "total_steps": 996, "loss": 0.9959, "lr": 4.1468818627544845e-05, "epoch": 0.8126410835214447, "percentage": 27.11, "elapsed_time": "0:47:38", "remaining_time": "2:08:06", "throughput": 109.51, "total_tokens": 313040}
{"current_steps": 275, "total_steps": 996, "loss": 0.939, "lr": 4.11701454201339e-05, "epoch": 0.8276899924755455, "percentage": 27.61, "elapsed_time": "0:48:32", "remaining_time": "2:07:15", "throughput": 109.57, "total_tokens": 319112}
{"current_steps": 280, "total_steps": 996, "loss": 0.9741, "lr": 4.08674503614997e-05, "epoch": 0.8427389014296464, "percentage": 28.11, "elapsed_time": "0:49:25", "remaining_time": "2:06:22", "throughput": 109.61, "total_tokens": 325040}
{"current_steps": 285, "total_steps": 996, "loss": 0.98, "lr": 4.0560808738194114e-05, "epoch": 0.8577878103837472, "percentage": 28.61, "elapsed_time": "0:50:18", "remaining_time": "2:05:30", "throughput": 109.62, "total_tokens": 330904}
{"current_steps": 290, "total_steps": 996, "loss": 0.8898, "lr": 4.0250296818361647e-05, "epoch": 0.872836719337848, "percentage": 29.12, "elapsed_time": "0:51:10", "remaining_time": "2:04:35", "throughput": 109.54, "total_tokens": 336392}
{"current_steps": 295, "total_steps": 996, "loss": 0.953, "lr": 3.993599183277001e-05, "epoch": 0.8878856282919488, "percentage": 29.62, "elapsed_time": "0:52:06", "remaining_time": "2:03:48", "throughput": 109.67, "total_tokens": 342832}
{"current_steps": 300, "total_steps": 996, "loss": 0.9311, "lr": 3.961797195560118e-05, "epoch": 0.9029345372460497, "percentage": 30.12, "elapsed_time": "0:52:59", "remaining_time": "2:02:56", "throughput": 109.75, "total_tokens": 348944}
{"current_steps": 305, "total_steps": 996, "loss": 0.9114, "lr": 3.9296316285007887e-05, "epoch": 0.9179834462001505, "percentage": 30.62, "elapsed_time": "0:53:52", "remaining_time": "2:02:03", "throughput": 109.73, "total_tokens": 354680}
{"current_steps": 310, "total_steps": 996, "loss": 0.9674, "lr": 3.897110482344024e-05, "epoch": 0.9330323551542513, "percentage": 31.12, "elapsed_time": "0:54:46", "remaining_time": "2:01:12", "throughput": 109.85, "total_tokens": 361008}
{"current_steps": 315, "total_steps": 996, "loss": 0.9582, "lr": 3.864241845774746e-05, "epoch": 0.9480812641083521, "percentage": 31.63, "elapsed_time": "0:55:38", "remaining_time": "2:00:17", "throughput": 109.86, "total_tokens": 366760}
{"current_steps": 320, "total_steps": 996, "loss": 0.9863, "lr": 3.8310338939059644e-05, "epoch": 0.963130173062453, "percentage": 32.13, "elapsed_time": "0:56:30", "remaining_time": "1:59:23", "throughput": 109.84, "total_tokens": 372448}
{"current_steps": 325, "total_steps": 996, "loss": 0.906, "lr": 3.797494886245456e-05, "epoch": 0.9781790820165538, "percentage": 32.63, "elapsed_time": "0:57:24", "remaining_time": "1:58:31", "throughput": 109.89, "total_tokens": 378520}
{"current_steps": 330, "total_steps": 996, "loss": 0.8958, "lr": 3.7636331646414524e-05, "epoch": 0.9932279909706546, "percentage": 33.13, "elapsed_time": "0:58:17", "remaining_time": "1:57:38", "throughput": 109.87, "total_tokens": 384272}
{"current_steps": 335, "total_steps": 996, "loss": 0.8349, "lr": 3.7294571512078506e-05, "epoch": 1.0060195635816402, "percentage": 33.63, "elapsed_time": "0:59:02", "remaining_time": "1:56:30", "throughput": 109.89, "total_tokens": 389280}
{"current_steps": 340, "total_steps": 996, "loss": 0.8507, "lr": 3.694975346229458e-05, "epoch": 1.021068472535741, "percentage": 34.14, "elapsed_time": "0:59:54", "remaining_time": "1:55:36", "throughput": 109.86, "total_tokens": 394944}
{"current_steps": 345, "total_steps": 996, "loss": 0.9287, "lr": 3.6601963260477924e-05, "epoch": 1.036117381489842, "percentage": 34.64, "elapsed_time": "1:00:47", "remaining_time": "1:54:42", "throughput": 109.88, "total_tokens": 400800}
{"current_steps": 350, "total_steps": 996, "loss": 0.9107, "lr": 3.625128740927971e-05, "epoch": 1.0511662904439427, "percentage": 35.14, "elapsed_time": "1:01:40", "remaining_time": "1:53:50", "throughput": 109.91, "total_tokens": 406728}
{"current_steps": 355, "total_steps": 996, "loss": 0.952, "lr": 3.589781312907207e-05, "epoch": 1.0662151993980435, "percentage": 35.64, "elapsed_time": "1:02:33", "remaining_time": "1:52:57", "throughput": 109.93, "total_tokens": 412656}
{"current_steps": 360, "total_steps": 996, "loss": 0.9526, "lr": 3.55416283362546e-05, "epoch": 1.0812641083521444, "percentage": 36.14, "elapsed_time": "1:03:27", "remaining_time": "1:52:05", "throughput": 109.92, "total_tokens": 418488}
{"current_steps": 365, "total_steps": 996, "loss": 0.8775, "lr": 3.518282162138772e-05, "epoch": 1.0963130173062452, "percentage": 36.65, "elapsed_time": "1:04:19", "remaining_time": "1:51:12", "throughput": 109.9, "total_tokens": 424192}
{"current_steps": 370, "total_steps": 996, "loss": 0.883, "lr": 3.482148222715835e-05, "epoch": 1.111361926260346, "percentage": 37.15, "elapsed_time": "1:05:14", "remaining_time": "1:50:22", "throughput": 109.94, "total_tokens": 430312}
{"current_steps": 375, "total_steps": 996, "loss": 1.0032, "lr": 3.4457700026183374e-05, "epoch": 1.1264108352144468, "percentage": 37.65, "elapsed_time": "1:06:07", "remaining_time": "1:49:29", "throughput": 109.93, "total_tokens": 436128}
{"current_steps": 380, "total_steps": 996, "loss": 0.943, "lr": 3.409156549865654e-05, "epoch": 1.141459744168548, "percentage": 38.15, "elapsed_time": "1:06:59", "remaining_time": "1:48:36", "throughput": 109.94, "total_tokens": 441928}
{"current_steps": 385, "total_steps": 996, "loss": 0.801, "lr": 3.3723169709844026e-05, "epoch": 1.1565086531226485, "percentage": 38.65, "elapsed_time": "1:07:51", "remaining_time": "1:47:42", "throughput": 109.91, "total_tokens": 447560}
{"current_steps": 390, "total_steps": 996, "loss": 0.9294, "lr": 3.335260428743475e-05, "epoch": 1.1715575620767495, "percentage": 39.16, "elapsed_time": "1:08:44", "remaining_time": "1:46:48", "throughput": 109.91, "total_tokens": 453296}
{"current_steps": 395, "total_steps": 996, "loss": 0.9528, "lr": 3.297996139875055e-05, "epoch": 1.1866064710308502, "percentage": 39.66, "elapsed_time": "1:09:37", "remaining_time": "1:45:56", "throughput": 109.96, "total_tokens": 459336}
{"current_steps": 400, "total_steps": 996, "loss": 0.8981, "lr": 3.260533372782234e-05, "epoch": 1.2016553799849512, "percentage": 40.16, "elapsed_time": "1:10:29", "remaining_time": "1:45:01", "throughput": 109.93, "total_tokens": 464944}
{"current_steps": 405, "total_steps": 996, "loss": 0.9823, "lr": 3.222881445233759e-05, "epoch": 1.2167042889390518, "percentage": 40.66, "elapsed_time": "1:11:24", "remaining_time": "1:44:11", "throughput": 109.94, "total_tokens": 470992}
{"current_steps": 410, "total_steps": 996, "loss": 0.9047, "lr": 3.185049722046516e-05, "epoch": 1.2317531978931529, "percentage": 41.16, "elapsed_time": "1:12:14", "remaining_time": "1:43:15", "throughput": 109.86, "total_tokens": 476216}
{"current_steps": 415, "total_steps": 996, "loss": 0.8582, "lr": 3.147047612756302e-05, "epoch": 1.2468021068472535, "percentage": 41.67, "elapsed_time": "1:13:06", "remaining_time": "1:42:21", "throughput": 109.84, "total_tokens": 481824}
{"current_steps": 420, "total_steps": 996, "loss": 0.8787, "lr": 3.10888456927748e-05, "epoch": 1.2618510158013545, "percentage": 42.17, "elapsed_time": "1:13:58", "remaining_time": "1:41:27", "throughput": 109.85, "total_tokens": 487576}
{"current_steps": 425, "total_steps": 996, "loss": 0.8729, "lr": 3.0705700835520895e-05, "epoch": 1.276899924755455, "percentage": 42.67, "elapsed_time": "1:14:50", "remaining_time": "1:40:33", "throughput": 109.85, "total_tokens": 493336}
{"current_steps": 430, "total_steps": 996, "loss": 0.8772, "lr": 3.0321136851890036e-05, "epoch": 1.2919488337095562, "percentage": 43.17, "elapsed_time": "1:15:45", "remaining_time": "1:39:43", "throughput": 109.95, "total_tokens": 499760}
{"current_steps": 435, "total_steps": 996, "loss": 0.9451, "lr": 2.9935249390937183e-05, "epoch": 1.3069977426636568, "percentage": 43.67, "elapsed_time": "1:16:37", "remaining_time": "1:38:48", "throughput": 109.94, "total_tokens": 505400}
{"current_steps": 440, "total_steps": 996, "loss": 0.8202, "lr": 2.9548134430893604e-05, "epoch": 1.3220466516177578, "percentage": 44.18, "elapsed_time": "1:17:31", "remaining_time": "1:37:57", "throughput": 110.03, "total_tokens": 511760}
{"current_steps": 445, "total_steps": 996, "loss": 0.9773, "lr": 2.9159888255295116e-05, "epoch": 1.3370955605718584, "percentage": 44.68, "elapsed_time": "1:18:23", "remaining_time": "1:37:03", "throughput": 110.05, "total_tokens": 517616}
{"current_steps": 450, "total_steps": 996, "loss": 0.9101, "lr": 2.8770607429034352e-05, "epoch": 1.3521444695259595, "percentage": 45.18, "elapsed_time": "1:19:13", "remaining_time": "1:36:07", "throughput": 109.98, "total_tokens": 522744}
{"current_steps": 455, "total_steps": 996, "loss": 0.9633, "lr": 2.8380388774343047e-05, "epoch": 1.36719337848006, "percentage": 45.68, "elapsed_time": "1:20:05", "remaining_time": "1:35:13", "throughput": 110.01, "total_tokens": 528648}
{"current_steps": 460, "total_steps": 996, "loss": 0.8886, "lr": 2.7989329346710375e-05, "epoch": 1.382242287434161, "percentage": 46.18, "elapsed_time": "1:20:56", "remaining_time": "1:34:18", "throughput": 109.96, "total_tokens": 534000}
{"current_steps": 465, "total_steps": 996, "loss": 0.9258, "lr": 2.759752641074322e-05, "epoch": 1.3972911963882617, "percentage": 46.69, "elapsed_time": "1:21:48", "remaining_time": "1:33:24", "throughput": 109.96, "total_tokens": 539688}
{"current_steps": 470, "total_steps": 996, "loss": 0.9039, "lr": 2.7205077415974416e-05, "epoch": 1.4123401053423628, "percentage": 47.19, "elapsed_time": "1:22:38", "remaining_time": "1:32:29", "throughput": 109.93, "total_tokens": 545112}
{"current_steps": 475, "total_steps": 996, "loss": 1.0116, "lr": 2.6812079972625077e-05, "epoch": 1.4273890142964636, "percentage": 47.69, "elapsed_time": "1:23:32", "remaining_time": "1:31:37", "throughput": 109.99, "total_tokens": 551328}
{"current_steps": 480, "total_steps": 996, "loss": 0.8218, "lr": 2.6418631827326857e-05, "epoch": 1.4424379232505644, "percentage": 48.19, "elapsed_time": "1:24:23", "remaining_time": "1:30:42", "throughput": 109.97, "total_tokens": 556816}
{"current_steps": 485, "total_steps": 996, "loss": 0.8604, "lr": 2.602483083881035e-05, "epoch": 1.4574868322046652, "percentage": 48.69, "elapsed_time": "1:25:15", "remaining_time": "1:29:50", "throughput": 109.96, "total_tokens": 562552}
{"current_steps": 490, "total_steps": 996, "loss": 0.8044, "lr": 2.563077495356561e-05, "epoch": 1.472535741158766, "percentage": 49.2, "elapsed_time": "1:26:08", "remaining_time": "1:28:56", "throughput": 110.0, "total_tokens": 568480}
{"current_steps": 495, "total_steps": 996, "loss": 0.9198, "lr": 2.5236562181480794e-05, "epoch": 1.487584650112867, "percentage": 49.7, "elapsed_time": "1:26:59", "remaining_time": "1:28:03", "throughput": 109.98, "total_tokens": 574072}
{"current_steps": 500, "total_steps": 996, "loss": 0.9181, "lr": 2.484229057146507e-05, "epoch": 1.5026335590669677, "percentage": 50.2, "elapsed_time": "1:27:53", "remaining_time": "1:27:11", "throughput": 109.99, "total_tokens": 580040}
{"current_steps": 505, "total_steps": 996, "loss": 0.8644, "lr": 2.4448058187061835e-05, "epoch": 1.5176824680210683, "percentage": 50.7, "elapsed_time": "1:28:48", "remaining_time": "1:26:20", "throughput": 110.01, "total_tokens": 586128}
{"current_steps": 510, "total_steps": 996, "loss": 1.0127, "lr": 2.4053963082058244e-05, "epoch": 1.5327313769751694, "percentage": 51.2, "elapsed_time": "1:29:41", "remaining_time": "1:25:28", "throughput": 110.05, "total_tokens": 592256}
{"current_steps": 515, "total_steps": 996, "loss": 0.7937, "lr": 2.3660103276097232e-05, "epoch": 1.54778028592927, "percentage": 51.71, "elapsed_time": "1:30:32", "remaining_time": "1:24:33", "throughput": 110.02, "total_tokens": 597704}
{"current_steps": 520, "total_steps": 996, "loss": 0.9806, "lr": 2.3266576730297956e-05, "epoch": 1.562829194883371, "percentage": 52.21, "elapsed_time": "1:31:23", "remaining_time": "1:23:39", "throughput": 110.01, "total_tokens": 603240}
{"current_steps": 525, "total_steps": 996, "loss": 0.934, "lr": 2.2873481322890862e-05, "epoch": 1.5778781038374716, "percentage": 52.71, "elapsed_time": "1:32:18", "remaining_time": "1:22:48", "throughput": 110.08, "total_tokens": 609616}
{"current_steps": 530, "total_steps": 996, "loss": 0.9288, "lr": 2.2480914824873297e-05, "epoch": 1.5929270127915727, "percentage": 53.21, "elapsed_time": "1:33:11", "remaining_time": "1:21:56", "throughput": 110.09, "total_tokens": 615520}
{"current_steps": 535, "total_steps": 996, "loss": 0.8597, "lr": 2.2088974875691863e-05, "epoch": 1.6079759217456733, "percentage": 53.71, "elapsed_time": "1:34:03", "remaining_time": "1:21:03", "throughput": 110.07, "total_tokens": 621208}
{"current_steps": 540, "total_steps": 996, "loss": 0.8817, "lr": 2.1697758958957448e-05, "epoch": 1.6230248306997743, "percentage": 54.22, "elapsed_time": "1:34:56", "remaining_time": "1:20:10", "throughput": 110.1, "total_tokens": 627176}
{"current_steps": 545, "total_steps": 996, "loss": 0.777, "lr": 2.1307364378199005e-05, "epoch": 1.6380737396538751, "percentage": 54.72, "elapsed_time": "1:35:50", "remaining_time": "1:19:18", "throughput": 110.13, "total_tokens": 633248}
{"current_steps": 550, "total_steps": 996, "loss": 0.798, "lr": 2.0917888232662196e-05, "epoch": 1.653122648607976, "percentage": 55.22, "elapsed_time": "1:36:43", "remaining_time": "1:18:25", "throughput": 110.11, "total_tokens": 639000}
{"current_steps": 555, "total_steps": 996, "loss": 0.9104, "lr": 2.0529427393158705e-05, "epoch": 1.6681715575620768, "percentage": 55.72, "elapsed_time": "1:37:37", "remaining_time": "1:17:33", "throughput": 110.17, "total_tokens": 645280}
{"current_steps": 560, "total_steps": 996, "loss": 0.8293, "lr": 2.014207847797256e-05, "epoch": 1.6832204665161776, "percentage": 56.22, "elapsed_time": "1:38:31", "remaining_time": "1:16:42", "throughput": 110.26, "total_tokens": 651760}
{"current_steps": 565, "total_steps": 996, "loss": 0.8821, "lr": 1.9755937828829067e-05, "epoch": 1.6982693754702785, "percentage": 56.73, "elapsed_time": "1:39:22", "remaining_time": "1:15:48", "throughput": 110.23, "total_tokens": 657272}
{"current_steps": 570, "total_steps": 996, "loss": 0.8253, "lr": 1.937110148693265e-05, "epoch": 1.7133182844243793, "percentage": 57.23, "elapsed_time": "1:40:15", "remaining_time": "1:14:56", "throughput": 110.27, "total_tokens": 663336}
{"current_steps": 575, "total_steps": 996, "loss": 0.9391, "lr": 1.8987665169079454e-05, "epoch": 1.72836719337848, "percentage": 57.73, "elapsed_time": "1:41:07", "remaining_time": "1:14:02", "throughput": 110.25, "total_tokens": 668936}
{"current_steps": 580, "total_steps": 996, "loss": 0.8711, "lr": 1.8605724243850502e-05, "epoch": 1.743416102332581, "percentage": 58.23, "elapsed_time": "1:42:00", "remaining_time": "1:13:10", "throughput": 110.28, "total_tokens": 675000}
{"current_steps": 585, "total_steps": 996, "loss": 0.8346, "lr": 1.822537370789163e-05, "epoch": 1.7584650112866818, "percentage": 58.73, "elapsed_time": "1:42:52", "remaining_time": "1:12:16", "throughput": 110.26, "total_tokens": 680584}
{"current_steps": 590, "total_steps": 996, "loss": 0.8275, "lr": 1.7846708162285785e-05, "epoch": 1.7735139202407826, "percentage": 59.24, "elapsed_time": "1:43:44", "remaining_time": "1:11:23", "throughput": 110.27, "total_tokens": 686416}
{"current_steps": 595, "total_steps": 996, "loss": 0.9435, "lr": 1.7469821789023815e-05, "epoch": 1.7885628291948834, "percentage": 59.74, "elapsed_time": "1:44:35", "remaining_time": "1:10:29", "throughput": 110.27, "total_tokens": 692016}
{"current_steps": 600, "total_steps": 996, "loss": 0.8584, "lr": 1.70948083275794e-05, "epoch": 1.8036117381489842, "percentage": 60.24, "elapsed_time": "1:45:28", "remaining_time": "1:09:36", "throughput": 110.29, "total_tokens": 697984}
{"current_steps": 605, "total_steps": 996, "loss": 0.88, "lr": 1.672176105159417e-05, "epoch": 1.818660647103085, "percentage": 60.74, "elapsed_time": "1:46:23", "remaining_time": "1:08:45", "throughput": 110.3, "total_tokens": 704056}
{"current_steps": 610, "total_steps": 996, "loss": 0.8825, "lr": 1.635077274567854e-05, "epoch": 1.8337095560571859, "percentage": 61.24, "elapsed_time": "1:47:15", "remaining_time": "1:07:52", "throughput": 110.3, "total_tokens": 709760}
{"current_steps": 615, "total_steps": 996, "loss": 0.9978, "lr": 1.5981935682334264e-05, "epoch": 1.8487584650112867, "percentage": 61.75, "elapsed_time": "1:48:08", "remaining_time": "1:06:59", "throughput": 110.33, "total_tokens": 715872}
{"current_steps": 620, "total_steps": 996, "loss": 0.9626, "lr": 1.561534159900441e-05, "epoch": 1.8638073739653875, "percentage": 62.25, "elapsed_time": "1:49:02", "remaining_time": "1:06:07", "throughput": 110.38, "total_tokens": 722184}
{"current_steps": 625, "total_steps": 996, "loss": 0.9308, "lr": 1.525108167525624e-05, "epoch": 1.8788562829194884, "percentage": 62.75, "elapsed_time": "1:49:54", "remaining_time": "1:05:14", "throughput": 110.37, "total_tokens": 727776}
{"current_steps": 630, "total_steps": 996, "loss": 0.9757, "lr": 1.4889246510103077e-05, "epoch": 1.8939051918735892, "percentage": 63.25, "elapsed_time": "1:50:47", "remaining_time": "1:04:21", "throughput": 110.38, "total_tokens": 733760}
{"current_steps": 635, "total_steps": 996, "loss": 0.767, "lr": 1.4529926099470348e-05, "epoch": 1.90895410082769, "percentage": 63.76, "elapsed_time": "1:51:41", "remaining_time": "1:03:29", "throughput": 110.43, "total_tokens": 740024}
{"current_steps": 640, "total_steps": 996, "loss": 0.9272, "lr": 1.4173209813811788e-05, "epoch": 1.9240030097817908, "percentage": 64.26, "elapsed_time": "1:52:32", "remaining_time": "1:02:36", "throughput": 110.4, "total_tokens": 745480}
{"current_steps": 645, "total_steps": 996, "loss": 0.7941, "lr": 1.381918637588112e-05, "epoch": 1.9390519187358917, "percentage": 64.76, "elapsed_time": "1:53:25", "remaining_time": "1:01:43", "throughput": 110.41, "total_tokens": 751384}
{"current_steps": 650, "total_steps": 996, "loss": 0.8408, "lr": 1.3467943838664863e-05, "epoch": 1.9541008276899925, "percentage": 65.26, "elapsed_time": "1:54:17", "remaining_time": "1:00:50", "throughput": 110.38, "total_tokens": 756920}
{"current_steps": 655, "total_steps": 996, "loss": 0.8459, "lr": 1.311956956348177e-05, "epoch": 1.9691497366440933, "percentage": 65.76, "elapsed_time": "1:55:08", "remaining_time": "0:59:56", "throughput": 110.35, "total_tokens": 762424}
{"current_steps": 660, "total_steps": 996, "loss": 1.0117, "lr": 1.277415019825417e-05, "epoch": 1.9841986455981941, "percentage": 66.27, "elapsed_time": "1:56:01", "remaining_time": "0:59:03", "throughput": 110.36, "total_tokens": 768224}
{"current_steps": 665, "total_steps": 996, "loss": 0.9665, "lr": 1.2431771655956925e-05, "epoch": 1.999247554552295, "percentage": 66.77, "elapsed_time": "1:56:52", "remaining_time": "0:58:10", "throughput": 110.32, "total_tokens": 773568}
{"current_steps": 670, "total_steps": 996, "loss": 0.7625, "lr": 1.2092519093248988e-05, "epoch": 2.0120391271632805, "percentage": 67.27, "elapsed_time": "1:57:37", "remaining_time": "0:57:13", "throughput": 110.34, "total_tokens": 778672}
{"current_steps": 675, "total_steps": 996, "loss": 0.8667, "lr": 1.1756476889293269e-05, "epoch": 2.0270880361173815, "percentage": 67.77, "elapsed_time": "1:58:30", "remaining_time": "0:56:21", "throughput": 110.33, "total_tokens": 784488}
{"current_steps": 680, "total_steps": 996, "loss": 0.8297, "lr": 1.1423728624769695e-05, "epoch": 2.042136945071482, "percentage": 68.27, "elapsed_time": "1:59:22", "remaining_time": "0:55:28", "throughput": 110.34, "total_tokens": 790304}
{"current_steps": 685, "total_steps": 996, "loss": 0.8774, "lr": 1.1094357061087033e-05, "epoch": 2.057185854025583, "percentage": 68.78, "elapsed_time": "2:00:15", "remaining_time": "0:54:35", "throughput": 110.35, "total_tokens": 796192}
{"current_steps": 690, "total_steps": 996, "loss": 0.8476, "lr": 1.0768444119798357e-05, "epoch": 2.072234762979684, "percentage": 69.28, "elapsed_time": "2:01:07", "remaining_time": "0:53:43", "throughput": 110.37, "total_tokens": 802144}
{"current_steps": 695, "total_steps": 996, "loss": 0.8641, "lr": 1.0446070862225463e-05, "epoch": 2.087283671933785, "percentage": 69.78, "elapsed_time": "2:02:00", "remaining_time": "0:52:50", "throughput": 110.35, "total_tokens": 807768}
{"current_steps": 700, "total_steps": 996, "loss": 0.8383, "lr": 1.0127317469297277e-05, "epoch": 2.1023325808878854, "percentage": 70.28, "elapsed_time": "2:02:53", "remaining_time": "0:51:57", "throughput": 110.36, "total_tokens": 813712}
{"current_steps": 705, "total_steps": 996, "loss": 0.9123, "lr": 9.812263221607112e-06, "epoch": 2.1173814898419865, "percentage": 70.78, "elapsed_time": "2:03:46", "remaining_time": "0:51:05", "throughput": 110.32, "total_tokens": 819360}
{"current_steps": 710, "total_steps": 996, "loss": 0.9635, "lr": 9.500986479694036e-06, "epoch": 2.132430398796087, "percentage": 71.29, "elapsed_time": "2:04:37", "remaining_time": "0:50:11", "throughput": 110.28, "total_tokens": 824584}
{"current_steps": 715, "total_steps": 996, "loss": 0.9221, "lr": 9.19356466455287e-06, "epoch": 2.147479307750188, "percentage": 71.79, "elapsed_time": "2:05:30", "remaining_time": "0:49:19", "throughput": 110.3, "total_tokens": 830600}
{"current_steps": 720, "total_steps": 996, "loss": 0.8757, "lr": 8.890074238378074e-06, "epoch": 2.1625282167042887, "percentage": 72.29, "elapsed_time": "2:06:24", "remaining_time": "0:48:27", "throughput": 110.34, "total_tokens": 836856}
{"current_steps": 725, "total_steps": 996, "loss": 0.7958, "lr": 8.590590685545946e-06, "epoch": 2.17757712565839, "percentage": 72.79, "elapsed_time": "2:07:17", "remaining_time": "0:47:34", "throughput": 110.36, "total_tokens": 842872}
{"current_steps": 730, "total_steps": 996, "loss": 0.7993, "lr": 8.295188493840104e-06, "epoch": 2.1926260346124904, "percentage": 73.29, "elapsed_time": "2:08:10", "remaining_time": "0:46:42", "throughput": 110.36, "total_tokens": 848664}
{"current_steps": 735, "total_steps": 996, "loss": 0.8436, "lr": 8.003941135924858e-06, "epoch": 2.2076749435665914, "percentage": 73.8, "elapsed_time": "2:09:04", "remaining_time": "0:45:49", "throughput": 110.37, "total_tokens": 854712}
{"current_steps": 740, "total_steps": 996, "loss": 0.896, "lr": 7.71692105107098e-06, "epoch": 2.222723852520692, "percentage": 74.3, "elapsed_time": "2:09:56", "remaining_time": "0:44:57", "throughput": 110.38, "total_tokens": 860648}
{"current_steps": 745, "total_steps": 996, "loss": 0.8948, "lr": 7.434199627138602e-06, "epoch": 2.237772761474793, "percentage": 74.8, "elapsed_time": "2:10:48", "remaining_time": "0:44:04", "throughput": 110.35, "total_tokens": 866080}
{"current_steps": 750, "total_steps": 996, "loss": 0.8546, "lr": 7.155847182821523e-06, "epoch": 2.2528216704288937, "percentage": 75.3, "elapsed_time": "2:11:39", "remaining_time": "0:43:11", "throughput": 110.33, "total_tokens": 871560}
{"current_steps": 755, "total_steps": 996, "loss": 0.8494, "lr": 6.881932950157538e-06, "epoch": 2.2678705793829947, "percentage": 75.8, "elapsed_time": "2:12:32", "remaining_time": "0:42:18", "throughput": 110.35, "total_tokens": 877568}
{"current_steps": 760, "total_steps": 996, "loss": 0.7723, "lr": 6.612525057308949e-06, "epoch": 2.282919488337096, "percentage": 76.31, "elapsed_time": "2:13:25", "remaining_time": "0:41:26", "throughput": 110.4, "total_tokens": 883808}
{"current_steps": 765, "total_steps": 996, "loss": 0.9168, "lr": 6.347690511617693e-06, "epoch": 2.2979683972911964, "percentage": 76.81, "elapsed_time": "2:14:17", "remaining_time": "0:40:32", "throughput": 110.37, "total_tokens": 889296}
{"current_steps": 770, "total_steps": 996, "loss": 0.8831, "lr": 6.0874951829392234e-06, "epoch": 2.313017306245297, "percentage": 77.31, "elapsed_time": "2:15:09", "remaining_time": "0:39:40", "throughput": 110.38, "total_tokens": 895120}
{"current_steps": 775, "total_steps": 996, "loss": 0.854, "lr": 5.832003787259327e-06, "epoch": 2.328066215199398, "percentage": 77.81, "elapsed_time": "2:16:00", "remaining_time": "0:38:47", "throughput": 110.32, "total_tokens": 900320}
{"current_steps": 780, "total_steps": 996, "loss": 0.8843, "lr": 5.581279870597867e-06, "epoch": 2.343115124153499, "percentage": 78.31, "elapsed_time": "2:16:52", "remaining_time": "0:37:54", "throughput": 110.32, "total_tokens": 905928}
{"current_steps": 785, "total_steps": 996, "loss": 0.862, "lr": 5.335385793203604e-06, "epoch": 2.3581640331075997, "percentage": 78.82, "elapsed_time": "2:17:44", "remaining_time": "0:37:01", "throughput": 110.34, "total_tokens": 911976}
{"current_steps": 790, "total_steps": 996, "loss": 0.985, "lr": 5.094382714043907e-06, "epoch": 2.3732129420617003, "percentage": 79.32, "elapsed_time": "2:18:37", "remaining_time": "0:36:08", "throughput": 110.34, "total_tokens": 917840}
{"current_steps": 795, "total_steps": 996, "loss": 0.7679, "lr": 4.85833057559322e-06, "epoch": 2.3882618510158014, "percentage": 79.82, "elapsed_time": "2:19:28", "remaining_time": "0:35:15", "throughput": 110.31, "total_tokens": 923168}
{"current_steps": 800, "total_steps": 996, "loss": 0.8198, "lr": 4.627288088924156e-06, "epoch": 2.4033107599699024, "percentage": 80.32, "elapsed_time": "2:20:20", "remaining_time": "0:34:22", "throughput": 110.3, "total_tokens": 928720}
{"current_steps": 805, "total_steps": 996, "loss": 0.7773, "lr": 4.401312719104802e-06, "epoch": 2.418359668924003, "percentage": 80.82, "elapsed_time": "2:21:14", "remaining_time": "0:33:30", "throughput": 110.28, "total_tokens": 934568}
{"current_steps": 810, "total_steps": 996, "loss": 0.9312, "lr": 4.180460670905978e-06, "epoch": 2.4334085778781036, "percentage": 81.33, "elapsed_time": "2:22:06", "remaining_time": "0:32:38", "throughput": 110.27, "total_tokens": 940264}
{"current_steps": 815, "total_steps": 996, "loss": 0.8497, "lr": 3.964786874821955e-06, "epoch": 2.4484574868322047, "percentage": 81.83, "elapsed_time": "2:22:59", "remaining_time": "0:31:45", "throughput": 110.28, "total_tokens": 946128}
{"current_steps": 820, "total_steps": 996, "loss": 0.782, "lr": 3.754344973408064e-06, "epoch": 2.4635063957863057, "percentage": 82.33, "elapsed_time": "2:23:52", "remaining_time": "0:30:52", "throughput": 110.29, "total_tokens": 952032}
{"current_steps": 825, "total_steps": 996, "loss": 0.8937, "lr": 3.5491873079387256e-06, "epoch": 2.4785553047404063, "percentage": 82.83, "elapsed_time": "2:24:44", "remaining_time": "0:30:00", "throughput": 110.31, "total_tokens": 957960}
{"current_steps": 830, "total_steps": 996, "loss": 0.7039, "lr": 3.3493649053890326e-06, "epoch": 2.493604213694507, "percentage": 83.33, "elapsed_time": "2:25:38", "remaining_time": "0:29:07", "throughput": 110.36, "total_tokens": 964336}
{"current_steps": 835, "total_steps": 996, "loss": 0.9265, "lr": 3.1549274657433375e-06, "epoch": 2.508653122648608, "percentage": 83.84, "elapsed_time": "2:26:30", "remaining_time": "0:28:15", "throughput": 110.36, "total_tokens": 970168}
{"current_steps": 840, "total_steps": 996, "loss": 0.8669, "lr": 2.9659233496337786e-06, "epoch": 2.523702031602709, "percentage": 84.34, "elapsed_time": "2:27:21", "remaining_time": "0:27:22", "throughput": 110.36, "total_tokens": 975752}
{"current_steps": 845, "total_steps": 996, "loss": 0.9174, "lr": 2.7823995663120327e-06, "epoch": 2.5387509405568096, "percentage": 84.84, "elapsed_time": "2:28:14", "remaining_time": "0:26:29", "throughput": 110.37, "total_tokens": 981672}
{"current_steps": 850, "total_steps": 996, "loss": 0.8718, "lr": 2.6044017619571065e-06, "epoch": 2.55379984951091, "percentage": 85.34, "elapsed_time": "2:29:06", "remaining_time": "0:25:36", "throughput": 110.38, "total_tokens": 987560}
{"current_steps": 855, "total_steps": 996, "loss": 0.8634, "lr": 2.431974208322191e-06, "epoch": 2.5688487584650113, "percentage": 85.84, "elapsed_time": "2:29:58", "remaining_time": "0:24:44", "throughput": 110.37, "total_tokens": 993200}
{"current_steps": 860, "total_steps": 996, "loss": 0.845, "lr": 2.265159791723373e-06, "epoch": 2.5838976674191123, "percentage": 86.35, "elapsed_time": "2:30:52", "remaining_time": "0:23:51", "throughput": 110.38, "total_tokens": 999192}
{"current_steps": 865, "total_steps": 996, "loss": 0.8008, "lr": 2.104000002372886e-06, "epoch": 2.598946576373213, "percentage": 86.85, "elapsed_time": "2:31:42", "remaining_time": "0:22:58", "throughput": 110.36, "total_tokens": 1004576}
{"current_steps": 870, "total_steps": 996, "loss": 0.8797, "lr": 1.9485349240596613e-06, "epoch": 2.6139954853273135, "percentage": 87.35, "elapsed_time": "2:32:34", "remaining_time": "0:22:05", "throughput": 110.36, "total_tokens": 1010352}
{"current_steps": 875, "total_steps": 996, "loss": 0.946, "lr": 1.7988032241796376e-06, "epoch": 2.6290443942814146, "percentage": 87.85, "elapsed_time": "2:33:27", "remaining_time": "0:21:13", "throughput": 110.37, "total_tokens": 1016272}
{"current_steps": 880, "total_steps": 996, "loss": 0.8032, "lr": 1.6548421441183875e-06, "epoch": 2.6440933032355156, "percentage": 88.35, "elapsed_time": "2:34:19", "remaining_time": "0:20:20", "throughput": 110.36, "total_tokens": 1021896}
{"current_steps": 885, "total_steps": 996, "loss": 0.8892, "lr": 1.5166874899884053e-06, "epoch": 2.659142212189616, "percentage": 88.86, "elapsed_time": "2:35:11", "remaining_time": "0:19:27", "throughput": 110.37, "total_tokens": 1027704}
{"current_steps": 890, "total_steps": 996, "loss": 0.856, "lr": 1.3843736237233784e-06, "epoch": 2.674191121143717, "percentage": 89.36, "elapsed_time": "2:36:04", "remaining_time": "0:18:35", "throughput": 110.39, "total_tokens": 1033800}
{"current_steps": 895, "total_steps": 996, "loss": 0.8617, "lr": 1.2579334545316733e-06, "epoch": 2.689240030097818, "percentage": 89.86, "elapsed_time": "2:36:58", "remaining_time": "0:17:42", "throughput": 110.43, "total_tokens": 1040008}
{"current_steps": 900, "total_steps": 996, "loss": 0.9117, "lr": 1.137398430711123e-06, "epoch": 2.704288939051919, "percentage": 90.36, "elapsed_time": "2:37:52", "remaining_time": "0:16:50", "throughput": 110.46, "total_tokens": 1046272}
{"current_steps": 905, "total_steps": 996, "loss": 0.7855, "lr": 1.0227985318271682e-06, "epoch": 2.7193378480060195, "percentage": 90.86, "elapsed_time": "2:38:45", "remaining_time": "0:15:57", "throughput": 110.44, "total_tokens": 1052032}
{"current_steps": 910, "total_steps": 996, "loss": 0.8212, "lr": 9.141622612563571e-07, "epoch": 2.73438675696012, "percentage": 91.37, "elapsed_time": "2:39:37", "remaining_time": "0:15:05", "throughput": 110.42, "total_tokens": 1057584}
{"current_steps": 915, "total_steps": 996, "loss": 0.8404, "lr": 8.115166390969125e-07, "epoch": 2.749435665914221, "percentage": 91.87, "elapsed_time": "2:40:31", "remaining_time": "0:14:12", "throughput": 110.45, "total_tokens": 1063760}
{"current_steps": 920, "total_steps": 996, "loss": 0.7782, "lr": 7.148871954483105e-07, "epoch": 2.764484574868322, "percentage": 92.37, "elapsed_time": "2:41:23", "remaining_time": "0:13:19", "throughput": 110.45, "total_tokens": 1069544}
{"current_steps": 925, "total_steps": 996, "loss": 0.7847, "lr": 6.242979640613933e-07, "epoch": 2.779533483822423, "percentage": 92.87, "elapsed_time": "2:42:16", "remaining_time": "0:12:27", "throughput": 110.45, "total_tokens": 1075472}
{"current_steps": 930, "total_steps": 996, "loss": 0.8857, "lr": 5.397714763606843e-07, "epoch": 2.7945823927765234, "percentage": 93.37, "elapsed_time": "2:43:10", "remaining_time": "0:11:34", "throughput": 110.46, "total_tokens": 1081464}
{"current_steps": 935, "total_steps": 996, "loss": 0.8029, "lr": 4.613287558403512e-07, "epoch": 2.8096313017306245, "percentage": 93.88, "elapsed_time": "2:44:03", "remaining_time": "0:10:42", "throughput": 110.48, "total_tokens": 1087464}
{"current_steps": 940, "total_steps": 996, "loss": 0.8154, "lr": 3.8898931283523344e-07, "epoch": 2.8246802106847255, "percentage": 94.38, "elapsed_time": "2:44:54", "remaining_time": "0:09:49", "throughput": 110.45, "total_tokens": 1092888}
{"current_steps": 945, "total_steps": 996, "loss": 0.8791, "lr": 3.227711396682015e-07, "epoch": 2.839729119638826, "percentage": 94.88, "elapsed_time": "2:45:48", "remaining_time": "0:08:56", "throughput": 110.45, "total_tokens": 1098808}
{"current_steps": 950, "total_steps": 996, "loss": 0.787, "lr": 2.626907061751116e-07, "epoch": 2.854778028592927, "percentage": 95.38, "elapsed_time": "2:46:40", "remaining_time": "0:08:04", "throughput": 110.46, "total_tokens": 1104688}
{"current_steps": 955, "total_steps": 996, "loss": 0.8831, "lr": 2.0876295560839364e-07, "epoch": 2.869826937547028, "percentage": 95.88, "elapsed_time": "2:47:34", "remaining_time": "0:07:11", "throughput": 110.49, "total_tokens": 1110960}
{"current_steps": 960, "total_steps": 996, "loss": 0.7677, "lr": 1.6100130092037703e-07, "epoch": 2.884875846501129, "percentage": 96.39, "elapsed_time": "2:48:27", "remaining_time": "0:06:19", "throughput": 110.49, "total_tokens": 1116800}
{"current_steps": 965, "total_steps": 996, "loss": 0.7567, "lr": 1.194176214271897e-07, "epoch": 2.8999247554552294, "percentage": 96.89, "elapsed_time": "2:49:18", "remaining_time": "0:05:26", "throughput": 110.47, "total_tokens": 1122248}
{"current_steps": 970, "total_steps": 996, "loss": 0.8944, "lr": 8.402225985413848e-08, "epoch": 2.9149736644093305, "percentage": 97.39, "elapsed_time": "2:50:10", "remaining_time": "0:04:33", "throughput": 110.46, "total_tokens": 1127928}
{"current_steps": 975, "total_steps": 996, "loss": 0.9737, "lr": 5.4824019763252685e-08, "epoch": 2.930022573363431, "percentage": 97.89, "elapsed_time": "2:51:01", "remaining_time": "0:03:41", "throughput": 110.44, "total_tokens": 1133336}
{"current_steps": 980, "total_steps": 996, "loss": 0.8965, "lr": 3.1830163363655296e-08, "epoch": 2.945071482317532, "percentage": 98.39, "elapsed_time": "2:51:54", "remaining_time": "0:02:48", "throughput": 110.44, "total_tokens": 1139048}
{"current_steps": 985, "total_steps": 996, "loss": 0.837, "lr": 1.504640970531046e-08, "epoch": 2.9601203912716327, "percentage": 98.9, "elapsed_time": "2:52:45", "remaining_time": "0:01:55", "throughput": 110.41, "total_tokens": 1144456}
{"current_steps": 990, "total_steps": 996, "loss": 0.7812, "lr": 4.4769332565558485e-09, "epoch": 2.975169300225734, "percentage": 99.4, "elapsed_time": "2:53:37", "remaining_time": "0:01:03", "throughput": 110.4, "total_tokens": 1150160}
{"current_steps": 995, "total_steps": 996, "loss": 0.8613, "lr": 1.2436286584982527e-10, "epoch": 2.9902182091798344, "percentage": 99.9, "elapsed_time": "2:54:33", "remaining_time": "0:00:10", "throughput": 110.45, "total_tokens": 1156704}
{"current_steps": 996, "total_steps": 996, "epoch": 2.9932279909706545, "percentage": 100.0, "elapsed_time": "2:54:44", "remaining_time": "0:00:00", "throughput": 110.43, "total_tokens": 1157808}