{"current_steps": 5, "total_steps": 40000, "loss": 2.4052, "lr": 4.999999876629946e-05, "epoch": 0.0008157272208173587, "percentage": 0.01, "elapsed_time": "0:00:03", "remaining_time": "7:12:09", "throughput": 2448.19, "total_tokens": 7936} {"current_steps": 10, "total_steps": 40000, "loss": 1.3686, "lr": 4.999999375439123e-05, "epoch": 0.0016314544416347174, "percentage": 0.03, "elapsed_time": "0:00:05", "remaining_time": "5:55:40", "throughput": 3202.11, "total_tokens": 17088} {"current_steps": 15, "total_steps": 40000, "loss": 0.7674, "lr": 4.9999984887169785e-05, "epoch": 0.002447181662452076, "percentage": 0.04, "elapsed_time": "0:00:07", "remaining_time": "5:29:48", "throughput": 3633.94, "total_tokens": 26976} {"current_steps": 20, "total_steps": 40000, "loss": 0.5275, "lr": 4.9999972164636506e-05, "epoch": 0.0032629088832694347, "percentage": 0.05, "elapsed_time": "0:00:09", "remaining_time": "5:16:13", "throughput": 3954.65, "total_tokens": 37536} {"current_steps": 25, "total_steps": 40000, "loss": 0.5193, "lr": 4.999995558679334e-05, "epoch": 0.004078636104086793, "percentage": 0.06, "elapsed_time": "0:00:11", "remaining_time": "5:07:58", "throughput": 3999.87, "total_tokens": 46224} {"current_steps": 30, "total_steps": 40000, "loss": 0.4229, "lr": 4.999993515364287e-05, "epoch": 0.004894363324904152, "percentage": 0.07, "elapsed_time": "0:00:13", "remaining_time": "5:02:34", "throughput": 4108.51, "total_tokens": 55984} {"current_steps": 35, "total_steps": 40000, "loss": 0.2686, "lr": 4.999991086518822e-05, "epoch": 0.005710090545721511, "percentage": 0.09, "elapsed_time": "0:00:15", "remaining_time": "4:59:09", "throughput": 4195.61, "total_tokens": 65952} {"current_steps": 40, "total_steps": 40000, "loss": 0.3117, "lr": 4.999988272143315e-05, "epoch": 0.0065258177665388694, "percentage": 0.1, "elapsed_time": "0:00:17", "remaining_time": "4:56:06", "throughput": 4287.74, "total_tokens": 76256} {"current_steps": 45, "total_steps": 40000, "loss": 0.2426, "lr": 4.999985072238199e-05, "epoch": 0.007341544987356228, "percentage": 0.11, "elapsed_time": "0:00:19", "remaining_time": "4:53:47", "throughput": 4335.91, "total_tokens": 86080} {"current_steps": 50, "total_steps": 40000, "loss": 0.2255, "lr": 4.999981486803969e-05, "epoch": 0.008157272208173586, "percentage": 0.12, "elapsed_time": "0:00:21", "remaining_time": "4:51:48", "throughput": 4405.01, "total_tokens": 96528} {"current_steps": 55, "total_steps": 40000, "loss": 0.1462, "lr": 4.999977515841176e-05, "epoch": 0.008972999428990946, "percentage": 0.14, "elapsed_time": "0:00:23", "remaining_time": "4:50:14", "throughput": 4426.75, "total_tokens": 106144} {"current_steps": 60, "total_steps": 40000, "loss": 0.2708, "lr": 4.9999731593504344e-05, "epoch": 0.009788726649808304, "percentage": 0.15, "elapsed_time": "0:00:26", "remaining_time": "4:48:53", "throughput": 4423.35, "total_tokens": 115184} {"current_steps": 65, "total_steps": 40000, "loss": 0.1493, "lr": 4.999968417332415e-05, "epoch": 0.010604453870625663, "percentage": 0.16, "elapsed_time": "0:00:28", "remaining_time": "4:47:44", "throughput": 4416.18, "total_tokens": 124096} {"current_steps": 70, "total_steps": 40000, "loss": 0.161, "lr": 4.999963289787848e-05, "epoch": 0.011420181091443021, "percentage": 0.18, "elapsed_time": "0:00:30", "remaining_time": "4:46:55", "throughput": 4416.23, "total_tokens": 133280} {"current_steps": 75, "total_steps": 40000, "loss": 0.2371, "lr": 4.999957776717526e-05, "epoch": 0.012235908312260381, "percentage": 0.19, "elapsed_time": "0:00:32", "remaining_time": "4:46:05", "throughput": 4436.82, "total_tokens": 143072} {"current_steps": 80, "total_steps": 40000, "loss": 0.4047, "lr": 4.9999518781222984e-05, "epoch": 0.013051635533077739, "percentage": 0.2, "elapsed_time": "0:00:34", "remaining_time": "4:45:20", "throughput": 4424.75, "total_tokens": 151808} {"current_steps": 85, "total_steps": 40000, "loss": 0.1763, "lr": 4.9999455940030746e-05, "epoch": 0.013867362753895097, "percentage": 0.21, "elapsed_time": "0:00:36", "remaining_time": "4:44:38", "throughput": 4431.8, "total_tokens": 161184} {"current_steps": 90, "total_steps": 40000, "loss": 0.1932, "lr": 4.999938924360824e-05, "epoch": 0.014683089974712456, "percentage": 0.22, "elapsed_time": "0:00:38", "remaining_time": "4:44:02", "throughput": 4458.04, "total_tokens": 171328} {"current_steps": 95, "total_steps": 40000, "loss": 0.3198, "lr": 4.999931869196575e-05, "epoch": 0.015498817195529814, "percentage": 0.24, "elapsed_time": "0:00:40", "remaining_time": "4:43:32", "throughput": 4451.88, "total_tokens": 180304} {"current_steps": 100, "total_steps": 40000, "loss": 0.2359, "lr": 4.999924428511416e-05, "epoch": 0.016314544416347172, "percentage": 0.25, "elapsed_time": "0:00:42", "remaining_time": "4:43:01", "throughput": 4488.44, "total_tokens": 191024} {"current_steps": 105, "total_steps": 40000, "loss": 0.2293, "lr": 4.999916602306494e-05, "epoch": 0.017130271637164532, "percentage": 0.26, "elapsed_time": "0:00:44", "remaining_time": "4:42:40", "throughput": 4504.02, "total_tokens": 201056} {"current_steps": 110, "total_steps": 40000, "loss": 0.2658, "lr": 4.999908390583016e-05, "epoch": 0.01794599885798189, "percentage": 0.27, "elapsed_time": "0:00:46", "remaining_time": "4:42:26", "throughput": 4532.53, "total_tokens": 211808} {"current_steps": 115, "total_steps": 40000, "loss": 0.1653, "lr": 4.999899793342247e-05, "epoch": 0.01876172607879925, "percentage": 0.29, "elapsed_time": "0:00:48", "remaining_time": "4:42:05", "throughput": 4510.74, "total_tokens": 220128} {"current_steps": 120, "total_steps": 40000, "loss": 0.2473, "lr": 4.999890810585516e-05, "epoch": 0.019577453299616607, "percentage": 0.3, "elapsed_time": "0:00:50", "remaining_time": "4:41:43", "throughput": 4507.4, "total_tokens": 229264} {"current_steps": 125, "total_steps": 40000, "loss": 0.2832, "lr": 4.999881442314206e-05, "epoch": 0.020393180520433967, "percentage": 0.31, "elapsed_time": "0:00:52", "remaining_time": "4:41:23", "throughput": 4517.9, "total_tokens": 239120} {"current_steps": 130, "total_steps": 40000, "loss": 0.1388, "lr": 4.9998716885297617e-05, "epoch": 0.021208907741251327, "percentage": 0.33, "elapsed_time": "0:00:54", "remaining_time": "4:41:07", "throughput": 4521.3, "total_tokens": 248656} {"current_steps": 135, "total_steps": 40000, "loss": 0.1793, "lr": 4.999861549233688e-05, "epoch": 0.022024634962068683, "percentage": 0.34, "elapsed_time": "0:00:57", "remaining_time": "4:40:49", "throughput": 4536.5, "total_tokens": 258848} {"current_steps": 140, "total_steps": 40000, "loss": 0.1571, "lr": 4.999851024427548e-05, "epoch": 0.022840362182886043, "percentage": 0.35, "elapsed_time": "0:00:59", "remaining_time": "4:40:33", "throughput": 4531.67, "total_tokens": 267936} {"current_steps": 145, "total_steps": 40000, "loss": 0.2039, "lr": 4.999840114112965e-05, "epoch": 0.023656089403703402, "percentage": 0.36, "elapsed_time": "0:01:01", "remaining_time": "4:40:20", "throughput": 4533.94, "total_tokens": 277456} {"current_steps": 150, "total_steps": 40000, "loss": 0.2658, "lr": 4.999828818291621e-05, "epoch": 0.024471816624520762, "percentage": 0.38, "elapsed_time": "0:01:03", "remaining_time": "4:40:05", "throughput": 4529.74, "total_tokens": 286544} {"current_steps": 155, "total_steps": 40000, "loss": 0.2373, "lr": 4.999817136965259e-05, "epoch": 0.025287543845338118, "percentage": 0.39, "elapsed_time": "0:01:05", "remaining_time": "4:39:51", "throughput": 4550.18, "total_tokens": 297216} {"current_steps": 160, "total_steps": 40000, "loss": 0.2183, "lr": 4.9998050701356794e-05, "epoch": 0.026103271066155478, "percentage": 0.4, "elapsed_time": "0:01:07", "remaining_time": "4:39:37", "throughput": 4563.23, "total_tokens": 307472} {"current_steps": 165, "total_steps": 40000, "loss": 0.1058, "lr": 4.999792617804744e-05, "epoch": 0.026918998286972837, "percentage": 0.41, "elapsed_time": "0:01:09", "remaining_time": "4:39:24", "throughput": 4579.31, "total_tokens": 317984} {"current_steps": 170, "total_steps": 40000, "loss": 0.3465, "lr": 4.9997797799743724e-05, "epoch": 0.027734725507790194, "percentage": 0.43, "elapsed_time": "0:01:11", "remaining_time": "4:39:10", "throughput": 4591.23, "total_tokens": 328240} {"current_steps": 175, "total_steps": 40000, "loss": 0.1936, "lr": 4.999766556646545e-05, "epoch": 0.028550452728607553, "percentage": 0.44, "elapsed_time": "0:01:13", "remaining_time": "4:38:58", "throughput": 4579.55, "total_tokens": 336848} {"current_steps": 180, "total_steps": 40000, "loss": 0.143, "lr": 4.9997529478232996e-05, "epoch": 0.029366179949424913, "percentage": 0.45, "elapsed_time": "0:01:15", "remaining_time": "4:38:51", "throughput": 4578.86, "total_tokens": 346304} {"current_steps": 185, "total_steps": 40000, "loss": 0.2509, "lr": 4.9997389535067365e-05, "epoch": 0.030181907170242273, "percentage": 0.46, "elapsed_time": "0:01:17", "remaining_time": "4:38:38", "throughput": 4584.74, "total_tokens": 356160} {"current_steps": 190, "total_steps": 40000, "loss": 0.2983, "lr": 4.999724573699012e-05, "epoch": 0.03099763439105963, "percentage": 0.47, "elapsed_time": "0:01:19", "remaining_time": "4:38:28", "throughput": 4583.39, "total_tokens": 365488} {"current_steps": 195, "total_steps": 40000, "loss": 0.0909, "lr": 4.9997098084023457e-05, "epoch": 0.031813361611876985, "percentage": 0.49, "elapsed_time": "0:01:21", "remaining_time": "4:38:17", "throughput": 4591.73, "total_tokens": 375600} {"current_steps": 200, "total_steps": 40000, "loss": 0.08, "lr": 4.999694657619013e-05, "epoch": 0.032629088832694345, "percentage": 0.5, "elapsed_time": "0:01:23", "remaining_time": "4:38:07", "throughput": 4592.22, "total_tokens": 385088} {"current_steps": 200, "total_steps": 40000, "eval_loss": 0.2234242558479309, "epoch": 0.032629088832694345, "percentage": 0.5, "elapsed_time": "0:02:44", "remaining_time": "9:04:54", "throughput": 2343.88, "total_tokens": 385088} {"current_steps": 205, "total_steps": 40000, "loss": 0.2614, "lr": 4.999679121351352e-05, "epoch": 0.033444816053511704, "percentage": 0.51, "elapsed_time": "0:02:47", "remaining_time": "9:02:58", "throughput": 2354.68, "total_tokens": 395168} {"current_steps": 210, "total_steps": 40000, "loss": 0.1616, "lr": 4.9996631996017565e-05, "epoch": 0.034260543274329064, "percentage": 0.53, "elapsed_time": "0:02:49", "remaining_time": "8:56:47", "throughput": 2388.56, "total_tokens": 406016} {"current_steps": 215, "total_steps": 40000, "loss": 0.1697, "lr": 4.9996468923726835e-05, "epoch": 0.035076270495146424, "percentage": 0.54, "elapsed_time": "0:02:52", "remaining_time": "8:50:42", "throughput": 2415.19, "total_tokens": 415600} {"current_steps": 220, "total_steps": 40000, "loss": 0.3501, "lr": 4.999630199666647e-05, "epoch": 0.03589199771596378, "percentage": 0.55, "elapsed_time": "0:02:54", "remaining_time": "8:44:49", "throughput": 2449.36, "total_tokens": 426560} {"current_steps": 225, "total_steps": 40000, "loss": 0.3154, "lr": 4.999613121486222e-05, "epoch": 0.03670772493678114, "percentage": 0.56, "elapsed_time": "0:02:56", "remaining_time": "8:39:12", "throughput": 2477.77, "total_tokens": 436640} {"current_steps": 230, "total_steps": 40000, "loss": 0.2006, "lr": 4.999595657834041e-05, "epoch": 0.0375234521575985, "percentage": 0.57, "elapsed_time": "0:02:58", "remaining_time": "8:33:47", "throughput": 2500.0, "total_tokens": 445712} {"current_steps": 235, "total_steps": 40000, "loss": 0.2806, "lr": 4.999577808712798e-05, "epoch": 0.038339179378415855, "percentage": 0.59, "elapsed_time": "0:03:00", "remaining_time": "8:28:40", "throughput": 2522.44, "total_tokens": 454960} {"current_steps": 240, "total_steps": 40000, "loss": 0.3143, "lr": 4.999559574125244e-05, "epoch": 0.039154906599233215, "percentage": 0.6, "elapsed_time": "0:03:02", "remaining_time": "8:23:42", "throughput": 2549.53, "total_tokens": 465104} {"current_steps": 245, "total_steps": 40000, "loss": 0.1285, "lr": 4.9995409540741934e-05, "epoch": 0.039970633820050575, "percentage": 0.61, "elapsed_time": "0:03:04", "remaining_time": "8:18:56", "throughput": 2569.42, "total_tokens": 474032} {"current_steps": 250, "total_steps": 40000, "loss": 0.2862, "lr": 4.999521948562516e-05, "epoch": 0.040786361040867934, "percentage": 0.62, "elapsed_time": "0:03:06", "remaining_time": "8:14:22", "throughput": 2594.59, "total_tokens": 484032} {"current_steps": 255, "total_steps": 40000, "loss": 0.1644, "lr": 4.999502557593143e-05, "epoch": 0.041602088261685294, "percentage": 0.64, "elapsed_time": "0:03:08", "remaining_time": "8:09:58", "throughput": 2612.2, "total_tokens": 492704} {"current_steps": 260, "total_steps": 40000, "loss": 0.154, "lr": 4.999482781169066e-05, "epoch": 0.042417815482502653, "percentage": 0.65, "elapsed_time": "0:03:10", "remaining_time": "8:05:45", "throughput": 2630.44, "total_tokens": 501584} {"current_steps": 265, "total_steps": 40000, "loss": 0.2047, "lr": 4.9994626192933324e-05, "epoch": 0.04323354270332001, "percentage": 0.66, "elapsed_time": "0:03:12", "remaining_time": "8:01:41", "throughput": 2658.79, "total_tokens": 512480} {"current_steps": 270, "total_steps": 40000, "loss": 0.1446, "lr": 4.999442071969054e-05, "epoch": 0.044049269924137366, "percentage": 0.68, "elapsed_time": "0:03:14", "remaining_time": "7:57:48", "throughput": 2676.87, "total_tokens": 521520} {"current_steps": 275, "total_steps": 40000, "loss": 0.2746, "lr": 4.999421139199397e-05, "epoch": 0.044864997144954726, "percentage": 0.69, "elapsed_time": "0:03:16", "remaining_time": "7:54:01", "throughput": 2699.82, "total_tokens": 531568} {"current_steps": 280, "total_steps": 40000, "loss": 0.2376, "lr": 4.999399820987592e-05, "epoch": 0.045680724365772085, "percentage": 0.7, "elapsed_time": "0:03:18", "remaining_time": "7:50:23", "throughput": 2721.24, "total_tokens": 541408} {"current_steps": 285, "total_steps": 40000, "loss": 0.204, "lr": 4.999378117336924e-05, "epoch": 0.046496451586589445, "percentage": 0.71, "elapsed_time": "0:03:21", "remaining_time": "7:46:51", "throughput": 2741.09, "total_tokens": 551008} {"current_steps": 290, "total_steps": 40000, "loss": 0.2562, "lr": 4.9993560282507415e-05, "epoch": 0.047312178807406804, "percentage": 0.73, "elapsed_time": "0:03:23", "remaining_time": "7:43:28", "throughput": 2757.43, "total_tokens": 559984} {"current_steps": 295, "total_steps": 40000, "loss": 0.3027, "lr": 4.9993335537324495e-05, "epoch": 0.048127906028224164, "percentage": 0.74, "elapsed_time": "0:03:25", "remaining_time": "7:40:10", "throughput": 2777.38, "total_tokens": 569760} {"current_steps": 300, "total_steps": 40000, "loss": 0.1503, "lr": 4.999310693785516e-05, "epoch": 0.048943633249041524, "percentage": 0.75, "elapsed_time": "0:03:27", "remaining_time": "7:37:01", "throughput": 2796.66, "total_tokens": 579504} {"current_steps": 305, "total_steps": 40000, "loss": 0.1521, "lr": 4.9992874484134653e-05, "epoch": 0.049759360469858877, "percentage": 0.76, "elapsed_time": "0:03:29", "remaining_time": "7:33:56", "throughput": 2822.21, "total_tokens": 590624} {"current_steps": 310, "total_steps": 40000, "loss": 0.1643, "lr": 4.999263817619882e-05, "epoch": 0.050575087690676236, "percentage": 0.78, "elapsed_time": "0:03:31", "remaining_time": "7:31:00", "throughput": 2836.54, "total_tokens": 599520} {"current_steps": 315, "total_steps": 40000, "loss": 0.1998, "lr": 4.9992398014084105e-05, "epoch": 0.051390814911493596, "percentage": 0.79, "elapsed_time": "0:03:33", "remaining_time": "7:28:07", "throughput": 2857.35, "total_tokens": 609808} {"current_steps": 320, "total_steps": 40000, "loss": 0.118, "lr": 4.999215399782754e-05, "epoch": 0.052206542132310955, "percentage": 0.8, "elapsed_time": "0:03:35", "remaining_time": "7:25:18", "throughput": 2876.69, "total_tokens": 619856} {"current_steps": 325, "total_steps": 40000, "loss": 0.1491, "lr": 4.999190612746675e-05, "epoch": 0.053022269353128315, "percentage": 0.81, "elapsed_time": "0:03:37", "remaining_time": "7:22:35", "throughput": 2892.2, "total_tokens": 629152} {"current_steps": 330, "total_steps": 40000, "loss": 0.1131, "lr": 4.999165440303998e-05, "epoch": 0.053837996573945675, "percentage": 0.83, "elapsed_time": "0:03:39", "remaining_time": "7:19:58", "throughput": 2904.81, "total_tokens": 637904} {"current_steps": 335, "total_steps": 40000, "loss": 0.2898, "lr": 4.999139882458603e-05, "epoch": 0.054653723794763034, "percentage": 0.84, "elapsed_time": "0:03:41", "remaining_time": "7:17:25", "throughput": 2922.72, "total_tokens": 647856} {"current_steps": 340, "total_steps": 40000, "loss": 0.1406, "lr": 4.9991139392144314e-05, "epoch": 0.05546945101558039, "percentage": 0.85, "elapsed_time": "0:03:43", "remaining_time": "7:14:56", "throughput": 2938.7, "total_tokens": 657456} {"current_steps": 345, "total_steps": 40000, "loss": 0.3754, "lr": 4.999087610575485e-05, "epoch": 0.05628517823639775, "percentage": 0.86, "elapsed_time": "0:03:45", "remaining_time": "7:12:34", "throughput": 2949.36, "total_tokens": 665968} {"current_steps": 350, "total_steps": 40000, "loss": 0.1411, "lr": 4.999060896545824e-05, "epoch": 0.057100905457215106, "percentage": 0.88, "elapsed_time": "0:03:47", "remaining_time": "7:10:14", "throughput": 2963.06, "total_tokens": 675184} {"current_steps": 355, "total_steps": 40000, "loss": 0.1409, "lr": 4.999033797129568e-05, "epoch": 0.057916632678032466, "percentage": 0.89, "elapsed_time": "0:03:49", "remaining_time": "7:07:57", "throughput": 2983.33, "total_tokens": 685952} {"current_steps": 360, "total_steps": 40000, "loss": 0.3955, "lr": 4.999006312330894e-05, "epoch": 0.058732359898849826, "percentage": 0.9, "elapsed_time": "0:03:51", "remaining_time": "7:05:44", "throughput": 2995.62, "total_tokens": 694960} {"current_steps": 365, "total_steps": 40000, "loss": 0.1654, "lr": 4.998978442154043e-05, "epoch": 0.059548087119667185, "percentage": 0.91, "elapsed_time": "0:03:54", "remaining_time": "7:03:35", "throughput": 3015.71, "total_tokens": 705840} {"current_steps": 370, "total_steps": 40000, "loss": 0.1621, "lr": 4.9989501866033125e-05, "epoch": 0.060363814340484545, "percentage": 0.92, "elapsed_time": "0:03:56", "remaining_time": "7:01:30", "throughput": 3030.87, "total_tokens": 715648} {"current_steps": 375, "total_steps": 40000, "loss": 0.1704, "lr": 4.998921545683059e-05, "epoch": 0.0611795415613019, "percentage": 0.94, "elapsed_time": "0:03:58", "remaining_time": "6:59:27", "throughput": 3045.89, "total_tokens": 725472} {"current_steps": 380, "total_steps": 40000, "loss": 0.1311, "lr": 4.9988925193976996e-05, "epoch": 0.06199526878211926, "percentage": 0.95, "elapsed_time": "0:04:00", "remaining_time": "6:57:29", "throughput": 3058.62, "total_tokens": 734848} {"current_steps": 385, "total_steps": 40000, "loss": 0.2051, "lr": 4.998863107751711e-05, "epoch": 0.06281099600293662, "percentage": 0.96, "elapsed_time": "0:04:02", "remaining_time": "6:55:33", "throughput": 3069.45, "total_tokens": 743776} {"current_steps": 390, "total_steps": 40000, "loss": 0.354, "lr": 4.998833310749629e-05, "epoch": 0.06362672322375397, "percentage": 0.97, "elapsed_time": "0:04:04", "remaining_time": "6:53:39", "throughput": 3077.89, "total_tokens": 752160} {"current_steps": 395, "total_steps": 40000, "loss": 0.2271, "lr": 4.998803128396047e-05, "epoch": 0.06444245044457134, "percentage": 0.99, "elapsed_time": "0:04:06", "remaining_time": "6:51:49", "throughput": 3087.76, "total_tokens": 760944} {"current_steps": 400, "total_steps": 40000, "loss": 0.1261, "lr": 4.9987725606956215e-05, "epoch": 0.06525817766538869, "percentage": 1.0, "elapsed_time": "0:04:08", "remaining_time": "6:50:01", "throughput": 3100.01, "total_tokens": 770352} {"current_steps": 400, "total_steps": 40000, "eval_loss": 0.19892781972885132, "epoch": 0.06525817766538869, "percentage": 1.0, "elapsed_time": "0:05:29", "remaining_time": "9:02:53", "throughput": 2341.34, "total_tokens": 770352} {"current_steps": 405, "total_steps": 40000, "loss": 0.1671, "lr": 4.998741607653066e-05, "epoch": 0.06607390488620606, "percentage": 1.01, "elapsed_time": "0:05:32", "remaining_time": "9:02:18", "throughput": 2342.13, "total_tokens": 779504} {"current_steps": 410, "total_steps": 40000, "loss": 0.1209, "lr": 4.9987102692731523e-05, "epoch": 0.06688963210702341, "percentage": 1.03, "elapsed_time": "0:05:34", "remaining_time": "8:58:58", "throughput": 2356.71, "total_tokens": 789264} {"current_steps": 415, "total_steps": 40000, "loss": 0.4041, "lr": 4.9986785455607157e-05, "epoch": 0.06770535932784078, "percentage": 1.04, "elapsed_time": "0:05:36", "remaining_time": "8:55:41", "throughput": 2371.91, "total_tokens": 799248} {"current_steps": 420, "total_steps": 40000, "loss": 0.0997, "lr": 4.9986464365206456e-05, "epoch": 0.06852108654865813, "percentage": 1.05, "elapsed_time": "0:05:39", "remaining_time": "8:52:29", "throughput": 2387.55, "total_tokens": 809456} {"current_steps": 425, "total_steps": 40000, "loss": 0.1324, "lr": 4.9986139421578956e-05, "epoch": 0.06933681376947548, "percentage": 1.06, "elapsed_time": "0:05:41", "remaining_time": "8:49:22", "throughput": 2400.62, "total_tokens": 818848} {"current_steps": 430, "total_steps": 40000, "loss": 0.1638, "lr": 4.998581062477477e-05, "epoch": 0.07015254099029285, "percentage": 1.07, "elapsed_time": "0:05:43", "remaining_time": "8:46:19", "throughput": 2414.75, "total_tokens": 828656} {"current_steps": 435, "total_steps": 40000, "loss": 0.1425, "lr": 4.998547797484458e-05, "epoch": 0.0709682682111102, "percentage": 1.09, "elapsed_time": "0:05:45", "remaining_time": "8:43:21", "throughput": 2428.96, "total_tokens": 838576} {"current_steps": 440, "total_steps": 40000, "loss": 0.3307, "lr": 4.9985141471839706e-05, "epoch": 0.07178399543192757, "percentage": 1.1, "elapsed_time": "0:05:47", "remaining_time": "8:40:26", "throughput": 2440.93, "total_tokens": 847760} {"current_steps": 445, "total_steps": 40000, "loss": 0.2363, "lr": 4.998480111581203e-05, "epoch": 0.07259972265274492, "percentage": 1.11, "elapsed_time": "0:05:49", "remaining_time": "8:37:34", "throughput": 2453.4, "total_tokens": 857152} {"current_steps": 450, "total_steps": 40000, "loss": 0.2565, "lr": 4.998445690681405e-05, "epoch": 0.07341544987356229, "percentage": 1.12, "elapsed_time": "0:05:51", "remaining_time": "8:34:47", "throughput": 2466.36, "total_tokens": 866784} {"current_steps": 455, "total_steps": 40000, "loss": 0.1983, "lr": 4.9984108844898834e-05, "epoch": 0.07423117709437964, "percentage": 1.14, "elapsed_time": "0:05:53", "remaining_time": "8:32:04", "throughput": 2480.73, "total_tokens": 876960} {"current_steps": 460, "total_steps": 40000, "loss": 0.1215, "lr": 4.9983756930120076e-05, "epoch": 0.075046904315197, "percentage": 1.15, "elapsed_time": "0:05:55", "remaining_time": "8:29:23", "throughput": 2492.18, "total_tokens": 886144} {"current_steps": 465, "total_steps": 40000, "loss": 0.2265, "lr": 4.9983401162532025e-05, "epoch": 0.07586263153601436, "percentage": 1.16, "elapsed_time": "0:05:57", "remaining_time": "8:26:46", "throughput": 2506.83, "total_tokens": 896528} {"current_steps": 470, "total_steps": 40000, "loss": 0.137, "lr": 4.998304154218955e-05, "epoch": 0.07667835875683171, "percentage": 1.18, "elapsed_time": "0:05:59", "remaining_time": "8:24:13", "throughput": 2521.97, "total_tokens": 907168} {"current_steps": 475, "total_steps": 40000, "loss": 0.3018, "lr": 4.998267806914812e-05, "epoch": 0.07749408597764908, "percentage": 1.19, "elapsed_time": "0:06:01", "remaining_time": "8:21:43", "throughput": 2533.79, "total_tokens": 916656} {"current_steps": 480, "total_steps": 40000, "loss": 0.1544, "lr": 4.998231074346378e-05, "epoch": 0.07830981319846643, "percentage": 1.2, "elapsed_time": "0:06:03", "remaining_time": "8:19:15", "throughput": 2544.14, "total_tokens": 925648} {"current_steps": 485, "total_steps": 40000, "loss": 0.2134, "lr": 4.998193956519317e-05, "epoch": 0.0791255404192838, "percentage": 1.21, "elapsed_time": "0:06:05", "remaining_time": "8:16:51", "throughput": 2558.37, "total_tokens": 936096} {"current_steps": 490, "total_steps": 40000, "loss": 0.257, "lr": 4.9981564534393545e-05, "epoch": 0.07994126764010115, "percentage": 1.23, "elapsed_time": "0:06:07", "remaining_time": "8:14:29", "throughput": 2572.23, "total_tokens": 946464} {"current_steps": 495, "total_steps": 40000, "loss": 0.2332, "lr": 4.998118565112272e-05, "epoch": 0.08075699486091852, "percentage": 1.24, "elapsed_time": "0:06:10", "remaining_time": "8:12:10", "throughput": 2579.44, "total_tokens": 954448} {"current_steps": 500, "total_steps": 40000, "loss": 0.1084, "lr": 4.998080291543914e-05, "epoch": 0.08157272208173587, "percentage": 1.25, "elapsed_time": "0:06:12", "remaining_time": "8:09:55", "throughput": 2592.71, "total_tokens": 964720} {"current_steps": 505, "total_steps": 40000, "loss": 0.1389, "lr": 4.9980416327401826e-05, "epoch": 0.08238844930255322, "percentage": 1.26, "elapsed_time": "0:06:14", "remaining_time": "8:07:41", "throughput": 2603.87, "total_tokens": 974240} {"current_steps": 510, "total_steps": 40000, "loss": 0.1564, "lr": 4.998002588707038e-05, "epoch": 0.08320417652337059, "percentage": 1.27, "elapsed_time": "0:06:16", "remaining_time": "8:05:32", "throughput": 2613.15, "total_tokens": 983152} {"current_steps": 515, "total_steps": 40000, "loss": 0.1981, "lr": 4.997963159450503e-05, "epoch": 0.08401990374418794, "percentage": 1.29, "elapsed_time": "0:06:18", "remaining_time": "8:03:23", "throughput": 2625.52, "total_tokens": 993216} {"current_steps": 520, "total_steps": 40000, "loss": 0.2121, "lr": 4.9979233449766575e-05, "epoch": 0.08483563096500531, "percentage": 1.3, "elapsed_time": "0:06:20", "remaining_time": "8:01:18", "throughput": 2635.6, "total_tokens": 1002480} {"current_steps": 525, "total_steps": 40000, "loss": 0.0921, "lr": 4.997883145291641e-05, "epoch": 0.08565135818582266, "percentage": 1.31, "elapsed_time": "0:06:22", "remaining_time": "7:59:14", "throughput": 2646.8, "total_tokens": 1012192} {"current_steps": 530, "total_steps": 40000, "loss": 0.2231, "lr": 4.9978425604016536e-05, "epoch": 0.08646708540664003, "percentage": 1.32, "elapsed_time": "0:06:24", "remaining_time": "7:57:13", "throughput": 2657.64, "total_tokens": 1021824} {"current_steps": 535, "total_steps": 40000, "loss": 0.2767, "lr": 4.9978015903129536e-05, "epoch": 0.08728281262745738, "percentage": 1.34, "elapsed_time": "0:06:26", "remaining_time": "7:55:14", "throughput": 2670.79, "total_tokens": 1032384} {"current_steps": 540, "total_steps": 40000, "loss": 0.227, "lr": 4.997760235031859e-05, "epoch": 0.08809853984827473, "percentage": 1.35, "elapsed_time": "0:06:28", "remaining_time": "7:53:18", "throughput": 2680.96, "total_tokens": 1041888} {"current_steps": 545, "total_steps": 40000, "loss": 0.1751, "lr": 4.9977184945647473e-05, "epoch": 0.0889142670690921, "percentage": 1.36, "elapsed_time": "0:06:30", "remaining_time": "7:51:25", "throughput": 2692.61, "total_tokens": 1052032} {"current_steps": 550, "total_steps": 40000, "loss": 0.2559, "lr": 4.997676368918055e-05, "epoch": 0.08972999428990945, "percentage": 1.38, "elapsed_time": "0:06:32", "remaining_time": "7:49:32", "throughput": 2706.65, "total_tokens": 1063104} {"current_steps": 555, "total_steps": 40000, "loss": 0.1594, "lr": 4.9976338580982794e-05, "epoch": 0.09054572151072682, "percentage": 1.39, "elapsed_time": "0:06:34", "remaining_time": "7:47:42", "throughput": 2720.09, "total_tokens": 1074016} {"current_steps": 560, "total_steps": 40000, "loss": 0.1242, "lr": 4.9975909621119755e-05, "epoch": 0.09136144873154417, "percentage": 1.4, "elapsed_time": "0:06:36", "remaining_time": "7:45:53", "throughput": 2730.56, "total_tokens": 1083776} {"current_steps": 565, "total_steps": 40000, "loss": 0.2617, "lr": 4.997547680965758e-05, "epoch": 0.09217717595236154, "percentage": 1.41, "elapsed_time": "0:06:38", "remaining_time": "7:44:06", "throughput": 2739.32, "total_tokens": 1092896} {"current_steps": 570, "total_steps": 40000, "loss": 0.2124, "lr": 4.997504014666302e-05, "epoch": 0.09299290317317889, "percentage": 1.43, "elapsed_time": "0:06:41", "remaining_time": "7:42:21", "throughput": 2747.73, "total_tokens": 1101920} {"current_steps": 575, "total_steps": 40000, "loss": 0.2258, "lr": 4.997459963220342e-05, "epoch": 0.09380863039399624, "percentage": 1.44, "elapsed_time": "0:06:43", "remaining_time": "7:40:37", "throughput": 2758.45, "total_tokens": 1111904} {"current_steps": 580, "total_steps": 40000, "loss": 0.145, "lr": 4.997415526634671e-05, "epoch": 0.09462435761481361, "percentage": 1.45, "elapsed_time": "0:06:45", "remaining_time": "7:38:56", "throughput": 2768.65, "total_tokens": 1121744} {"current_steps": 585, "total_steps": 40000, "loss": 0.2283, "lr": 4.99737070491614e-05, "epoch": 0.09544008483563096, "percentage": 1.46, "elapsed_time": "0:06:47", "remaining_time": "7:37:17", "throughput": 2775.85, "total_tokens": 1130400} {"current_steps": 590, "total_steps": 40000, "loss": 0.1617, "lr": 4.997325498071663e-05, "epoch": 0.09625581205644833, "percentage": 1.47, "elapsed_time": "0:06:49", "remaining_time": "7:35:39", "throughput": 2787.37, "total_tokens": 1140848} {"current_steps": 595, "total_steps": 40000, "loss": 0.2155, "lr": 4.997279906108211e-05, "epoch": 0.09707153927726568, "percentage": 1.49, "elapsed_time": "0:06:51", "remaining_time": "7:34:02", "throughput": 2798.09, "total_tokens": 1151008} {"current_steps": 600, "total_steps": 40000, "loss": 0.1522, "lr": 4.9972339290328155e-05, "epoch": 0.09788726649808305, "percentage": 1.5, "elapsed_time": "0:06:53", "remaining_time": "7:32:27", "throughput": 2807.06, "total_tokens": 1160480} {"current_steps": 600, "total_steps": 40000, "eval_loss": 0.1722775250673294, "epoch": 0.09788726649808305, "percentage": 1.5, "elapsed_time": "0:08:13", "remaining_time": "9:00:30", "throughput": 2349.79, "total_tokens": 1160480} {"current_steps": 605, "total_steps": 40000, "loss": 0.1845, "lr": 4.9971875668525646e-05, "epoch": 0.0987029937189004, "percentage": 1.51, "elapsed_time": "0:08:17", "remaining_time": "9:00:19", "throughput": 2350.28, "total_tokens": 1170144} {"current_steps": 610, "total_steps": 40000, "loss": 0.1687, "lr": 4.997140819574609e-05, "epoch": 0.09951872093971775, "percentage": 1.52, "elapsed_time": "0:08:19", "remaining_time": "8:58:04", "throughput": 2360.28, "total_tokens": 1180064} {"current_steps": 615, "total_steps": 40000, "loss": 0.2332, "lr": 4.997093687206159e-05, "epoch": 0.10033444816053512, "percentage": 1.54, "elapsed_time": "0:08:22", "remaining_time": "8:55:50", "throughput": 2370.82, "total_tokens": 1190224} {"current_steps": 620, "total_steps": 40000, "loss": 0.158, "lr": 4.997046169754482e-05, "epoch": 0.10115017538135247, "percentage": 1.55, "elapsed_time": "0:08:24", "remaining_time": "8:53:37", "throughput": 2379.54, "total_tokens": 1199504} {"current_steps": 625, "total_steps": 40000, "loss": 0.095, "lr": 4.996998267226905e-05, "epoch": 0.10196590260216984, "percentage": 1.56, "elapsed_time": "0:08:26", "remaining_time": "8:51:27", "throughput": 2388.83, "total_tokens": 1209120} {"current_steps": 630, "total_steps": 40000, "loss": 0.1523, "lr": 4.996949979630817e-05, "epoch": 0.10278162982298719, "percentage": 1.57, "elapsed_time": "0:08:28", "remaining_time": "8:49:19", "throughput": 2397.47, "total_tokens": 1218432} {"current_steps": 635, "total_steps": 40000, "loss": 0.2523, "lr": 4.996901306973663e-05, "epoch": 0.10359735704380456, "percentage": 1.59, "elapsed_time": "0:08:30", "remaining_time": "8:47:13", "throughput": 2405.27, "total_tokens": 1227376} {"current_steps": 640, "total_steps": 40000, "loss": 0.1068, "lr": 4.996852249262949e-05, "epoch": 0.10441308426462191, "percentage": 1.6, "elapsed_time": "0:08:32", "remaining_time": "8:45:09", "throughput": 2411.5, "total_tokens": 1235536} {"current_steps": 645, "total_steps": 40000, "loss": 0.2402, "lr": 4.996802806506241e-05, "epoch": 0.10522881148543926, "percentage": 1.61, "elapsed_time": "0:08:34", "remaining_time": "8:43:07", "throughput": 2420.54, "total_tokens": 1245152} {"current_steps": 650, "total_steps": 40000, "loss": 0.143, "lr": 4.996752978711164e-05, "epoch": 0.10604453870625663, "percentage": 1.62, "elapsed_time": "0:08:36", "remaining_time": "8:41:06", "throughput": 2429.49, "total_tokens": 1254768} {"current_steps": 655, "total_steps": 40000, "loss": 0.2261, "lr": 4.996702765885401e-05, "epoch": 0.10686026592707398, "percentage": 1.64, "elapsed_time": "0:08:38", "remaining_time": "8:39:07", "throughput": 2439.31, "total_tokens": 1264864} {"current_steps": 660, "total_steps": 40000, "loss": 0.2615, "lr": 4.9966521680366964e-05, "epoch": 0.10767599314789135, "percentage": 1.65, "elapsed_time": "0:08:40", "remaining_time": "8:37:11", "throughput": 2446.57, "total_tokens": 1273696} {"current_steps": 665, "total_steps": 40000, "loss": 0.2185, "lr": 4.9966011851728524e-05, "epoch": 0.1084917203687087, "percentage": 1.66, "elapsed_time": "0:08:42", "remaining_time": "8:35:16", "throughput": 2452.61, "total_tokens": 1281904} {"current_steps": 670, "total_steps": 40000, "loss": 0.257, "lr": 4.996549817301731e-05, "epoch": 0.10930744758952607, "percentage": 1.68, "elapsed_time": "0:08:44", "remaining_time": "8:33:23", "throughput": 2462.44, "total_tokens": 1292144} {"current_steps": 675, "total_steps": 40000, "loss": 0.1283, "lr": 4.9964980644312544e-05, "epoch": 0.11012317481034342, "percentage": 1.69, "elapsed_time": "0:08:46", "remaining_time": "8:31:31", "throughput": 2471.49, "total_tokens": 1302000} {"current_steps": 680, "total_steps": 40000, "loss": 0.2018, "lr": 4.996445926569403e-05, "epoch": 0.11093890203116077, "percentage": 1.7, "elapsed_time": "0:08:48", "remaining_time": "8:29:41", "throughput": 2480.8, "total_tokens": 1312016} {"current_steps": 685, "total_steps": 40000, "loss": 0.1954, "lr": 4.996393403724218e-05, "epoch": 0.11175462925197814, "percentage": 1.71, "elapsed_time": "0:08:50", "remaining_time": "8:27:52", "throughput": 2487.95, "total_tokens": 1320928} {"current_steps": 690, "total_steps": 40000, "loss": 0.285, "lr": 4.9963404959037985e-05, "epoch": 0.1125703564727955, "percentage": 1.73, "elapsed_time": "0:08:52", "remaining_time": "8:26:05", "throughput": 2496.22, "total_tokens": 1330464} {"current_steps": 695, "total_steps": 40000, "loss": 0.2239, "lr": 4.996287203116303e-05, "epoch": 0.11338608369361286, "percentage": 1.74, "elapsed_time": "0:08:55", "remaining_time": "8:24:19", "throughput": 2506.11, "total_tokens": 1340896} {"current_steps": 700, "total_steps": 40000, "loss": 0.1688, "lr": 4.996233525369951e-05, "epoch": 0.11420181091443021, "percentage": 1.75, "elapsed_time": "0:08:57", "remaining_time": "8:22:35", "throughput": 2513.67, "total_tokens": 1350128} {"current_steps": 705, "total_steps": 40000, "loss": 0.1853, "lr": 4.99617946267302e-05, "epoch": 0.11501753813524758, "percentage": 1.76, "elapsed_time": "0:08:59", "remaining_time": "8:20:52", "throughput": 2521.53, "total_tokens": 1359552} {"current_steps": 710, "total_steps": 40000, "loss": 0.1948, "lr": 4.996125015033846e-05, "epoch": 0.11583326535606493, "percentage": 1.77, "elapsed_time": "0:09:01", "remaining_time": "8:19:11", "throughput": 2531.99, "total_tokens": 1370448} {"current_steps": 715, "total_steps": 40000, "loss": 0.1294, "lr": 4.996070182460827e-05, "epoch": 0.11664899257688228, "percentage": 1.79, "elapsed_time": "0:09:03", "remaining_time": "8:17:31", "throughput": 2540.96, "total_tokens": 1380528} {"current_steps": 720, "total_steps": 40000, "loss": 0.2038, "lr": 4.996014964962418e-05, "epoch": 0.11746471979769965, "percentage": 1.8, "elapsed_time": "0:09:05", "remaining_time": "8:15:53", "throughput": 2547.49, "total_tokens": 1389328} {"current_steps": 725, "total_steps": 40000, "loss": 0.1001, "lr": 4.9959593625471344e-05, "epoch": 0.118280447018517, "percentage": 1.81, "elapsed_time": "0:09:07", "remaining_time": "8:14:15", "throughput": 2555.45, "total_tokens": 1398944} {"current_steps": 730, "total_steps": 40000, "loss": 0.4022, "lr": 4.995903375223552e-05, "epoch": 0.11909617423933437, "percentage": 1.82, "elapsed_time": "0:09:09", "remaining_time": "8:12:39", "throughput": 2561.85, "total_tokens": 1407728} {"current_steps": 735, "total_steps": 40000, "loss": 0.2167, "lr": 4.995847003000302e-05, "epoch": 0.11991190146015172, "percentage": 1.84, "elapsed_time": "0:09:11", "remaining_time": "8:11:05", "throughput": 2570.4, "total_tokens": 1417744} {"current_steps": 740, "total_steps": 40000, "loss": 0.2059, "lr": 4.9957902458860804e-05, "epoch": 0.12072762868096909, "percentage": 1.85, "elapsed_time": "0:09:13", "remaining_time": "8:09:32", "throughput": 2579.35, "total_tokens": 1428016} {"current_steps": 745, "total_steps": 40000, "loss": 0.2557, "lr": 4.995733103889639e-05, "epoch": 0.12154335590178644, "percentage": 1.86, "elapsed_time": "0:09:15", "remaining_time": "8:08:01", "throughput": 2589.03, "total_tokens": 1438784} {"current_steps": 750, "total_steps": 40000, "loss": 0.2066, "lr": 4.99567557701979e-05, "epoch": 0.1223590831226038, "percentage": 1.88, "elapsed_time": "0:09:17", "remaining_time": "8:06:30", "throughput": 2597.98, "total_tokens": 1449120} {"current_steps": 755, "total_steps": 40000, "loss": 0.1827, "lr": 4.995617665285403e-05, "epoch": 0.12317481034342116, "percentage": 1.89, "elapsed_time": "0:09:19", "remaining_time": "8:05:01", "throughput": 2606.61, "total_tokens": 1459312} {"current_steps": 760, "total_steps": 40000, "loss": 0.2396, "lr": 4.99555936869541e-05, "epoch": 0.12399053756423851, "percentage": 1.9, "elapsed_time": "0:09:21", "remaining_time": "8:03:32", "throughput": 2615.6, "total_tokens": 1469744} {"current_steps": 765, "total_steps": 40000, "loss": 0.1619, "lr": 4.995500687258803e-05, "epoch": 0.12480626478505588, "percentage": 1.91, "elapsed_time": "0:09:23", "remaining_time": "8:02:04", "throughput": 2623.13, "total_tokens": 1479376} {"current_steps": 770, "total_steps": 40000, "loss": 0.152, "lr": 4.995441620984628e-05, "epoch": 0.12562199200587323, "percentage": 1.93, "elapsed_time": "0:09:26", "remaining_time": "8:00:38", "throughput": 2630.23, "total_tokens": 1488816} {"current_steps": 775, "total_steps": 40000, "loss": 0.3288, "lr": 4.995382169881996e-05, "epoch": 0.1264377192266906, "percentage": 1.94, "elapsed_time": "0:09:28", "remaining_time": "7:59:13", "throughput": 2635.2, "total_tokens": 1497056} {"current_steps": 780, "total_steps": 40000, "loss": 0.1297, "lr": 4.9953223339600755e-05, "epoch": 0.12725344644750794, "percentage": 1.95, "elapsed_time": "0:09:30", "remaining_time": "7:57:49", "throughput": 2641.87, "total_tokens": 1506304} {"current_steps": 785, "total_steps": 40000, "loss": 0.2263, "lr": 4.995262113228091e-05, "epoch": 0.12806917366832532, "percentage": 1.96, "elapsed_time": "0:09:32", "remaining_time": "7:56:26", "throughput": 2648.07, "total_tokens": 1515328} {"current_steps": 790, "total_steps": 40000, "loss": 0.222, "lr": 4.995201507695332e-05, "epoch": 0.12888490088914267, "percentage": 1.98, "elapsed_time": "0:09:34", "remaining_time": "7:55:03", "throughput": 2656.2, "total_tokens": 1525440} {"current_steps": 795, "total_steps": 40000, "loss": 0.1505, "lr": 4.995140517371144e-05, "epoch": 0.12970062810996003, "percentage": 1.99, "elapsed_time": "0:09:36", "remaining_time": "7:53:42", "throughput": 2661.2, "total_tokens": 1533792} {"current_steps": 800, "total_steps": 40000, "loss": 0.1221, "lr": 4.995079142264932e-05, "epoch": 0.13051635533077738, "percentage": 2.0, "elapsed_time": "0:09:38", "remaining_time": "7:52:22", "throughput": 2668.16, "total_tokens": 1543296} {"current_steps": 800, "total_steps": 40000, "eval_loss": 0.17393815517425537, "epoch": 0.13051635533077738, "percentage": 2.0, "elapsed_time": "0:10:59", "remaining_time": "8:58:13", "throughput": 2341.71, "total_tokens": 1543296} {"current_steps": 805, "total_steps": 40000, "loss": 0.1947, "lr": 4.995017382386162e-05, "epoch": 0.13133208255159476, "percentage": 2.01, "elapsed_time": "0:11:02", "remaining_time": "8:57:41", "throughput": 2344.02, "total_tokens": 1553120} {"current_steps": 810, "total_steps": 40000, "loss": 0.2138, "lr": 4.994955237744356e-05, "epoch": 0.1321478097724121, "percentage": 2.02, "elapsed_time": "0:11:04", "remaining_time": "8:55:58", "throughput": 2350.54, "total_tokens": 1562320} {"current_steps": 815, "total_steps": 40000, "loss": 0.1636, "lr": 4.994892708349101e-05, "epoch": 0.13296353699322946, "percentage": 2.04, "elapsed_time": "0:11:06", "remaining_time": "8:54:16", "throughput": 2358.72, "total_tokens": 1572624} {"current_steps": 820, "total_steps": 40000, "loss": 0.1175, "lr": 4.994829794210035e-05, "epoch": 0.13377926421404682, "percentage": 2.05, "elapsed_time": "0:11:08", "remaining_time": "8:52:34", "throughput": 2366.08, "total_tokens": 1582400} {"current_steps": 825, "total_steps": 40000, "loss": 0.0782, "lr": 4.994766495336864e-05, "epoch": 0.13459499143486417, "percentage": 2.06, "elapsed_time": "0:11:10", "remaining_time": "8:50:55", "throughput": 2371.84, "total_tokens": 1591136} {"current_steps": 830, "total_steps": 40000, "loss": 0.103, "lr": 4.994702811739348e-05, "epoch": 0.13541071865568155, "percentage": 2.08, "elapsed_time": "0:11:12", "remaining_time": "8:49:16", "throughput": 2378.56, "total_tokens": 1600544} {"current_steps": 835, "total_steps": 40000, "loss": 0.2403, "lr": 4.994638743427308e-05, "epoch": 0.1362264458764989, "percentage": 2.09, "elapsed_time": "0:11:14", "remaining_time": "8:47:39", "throughput": 2385.23, "total_tokens": 1609968} {"current_steps": 840, "total_steps": 40000, "loss": 0.1422, "lr": 4.994574290410624e-05, "epoch": 0.13704217309731626, "percentage": 2.1, "elapsed_time": "0:11:17", "remaining_time": "8:46:03", "throughput": 2391.61, "total_tokens": 1619232} {"current_steps": 845, "total_steps": 40000, "loss": 0.268, "lr": 4.9945094526992364e-05, "epoch": 0.1378579003181336, "percentage": 2.11, "elapsed_time": "0:11:19", "remaining_time": "8:44:28", "throughput": 2397.89, "total_tokens": 1628448} {"current_steps": 850, "total_steps": 40000, "loss": 0.1572, "lr": 4.994444230303142e-05, "epoch": 0.13867362753895096, "percentage": 2.12, "elapsed_time": "0:11:21", "remaining_time": "8:42:54", "throughput": 2406.88, "total_tokens": 1639536} {"current_steps": 855, "total_steps": 40000, "loss": 0.1568, "lr": 4.994378623232402e-05, "epoch": 0.13948935475976834, "percentage": 2.14, "elapsed_time": "0:11:23", "remaining_time": "8:41:21", "throughput": 2414.68, "total_tokens": 1649840} {"current_steps": 860, "total_steps": 40000, "loss": 0.1522, "lr": 4.99431263149713e-05, "epoch": 0.1403050819805857, "percentage": 2.15, "elapsed_time": "0:11:25", "remaining_time": "8:39:49", "throughput": 2421.58, "total_tokens": 1659552} {"current_steps": 865, "total_steps": 40000, "loss": 0.1579, "lr": 4.9942462551075056e-05, "epoch": 0.14112080920140305, "percentage": 2.16, "elapsed_time": "0:11:27", "remaining_time": "8:38:19", "throughput": 2427.45, "total_tokens": 1668592} {"current_steps": 870, "total_steps": 40000, "loss": 0.2029, "lr": 4.994179494073764e-05, "epoch": 0.1419365364222204, "percentage": 2.17, "elapsed_time": "0:11:29", "remaining_time": "8:36:49", "throughput": 2436.68, "total_tokens": 1679968} {"current_steps": 875, "total_steps": 40000, "loss": 0.1563, "lr": 4.9941123484062e-05, "epoch": 0.14275226364303778, "percentage": 2.19, "elapsed_time": "0:11:31", "remaining_time": "8:35:21", "throughput": 2446.22, "total_tokens": 1691664} {"current_steps": 880, "total_steps": 40000, "loss": 0.1187, "lr": 4.99404481811517e-05, "epoch": 0.14356799086385513, "percentage": 2.2, "elapsed_time": "0:11:33", "remaining_time": "8:33:53", "throughput": 2452.53, "total_tokens": 1701088} {"current_steps": 885, "total_steps": 40000, "loss": 0.1769, "lr": 4.9939769032110864e-05, "epoch": 0.14438371808467249, "percentage": 2.21, "elapsed_time": "0:11:35", "remaining_time": "8:32:26", "throughput": 2457.95, "total_tokens": 1709904} {"current_steps": 890, "total_steps": 40000, "loss": 0.2754, "lr": 4.993908603704423e-05, "epoch": 0.14519944530548984, "percentage": 2.23, "elapsed_time": "0:11:37", "remaining_time": "8:31:00", "throughput": 2463.83, "total_tokens": 1719072} {"current_steps": 895, "total_steps": 40000, "loss": 0.091, "lr": 4.9938399196057126e-05, "epoch": 0.1460151725263072, "percentage": 2.24, "elapsed_time": "0:11:39", "remaining_time": "8:29:35", "throughput": 2468.87, "total_tokens": 1727680} {"current_steps": 900, "total_steps": 40000, "loss": 0.2754, "lr": 4.993770850925547e-05, "epoch": 0.14683089974712457, "percentage": 2.25, "elapsed_time": "0:11:41", "remaining_time": "8:28:11", "throughput": 2474.66, "total_tokens": 1736832} {"current_steps": 905, "total_steps": 40000, "loss": 0.1225, "lr": 4.993701397674577e-05, "epoch": 0.14764662696794192, "percentage": 2.26, "elapsed_time": "0:11:43", "remaining_time": "8:26:48", "throughput": 2481.94, "total_tokens": 1747056} {"current_steps": 910, "total_steps": 40000, "loss": 0.1842, "lr": 4.993631559863515e-05, "epoch": 0.14846235418875928, "percentage": 2.27, "elapsed_time": "0:11:45", "remaining_time": "8:25:26", "throughput": 2490.24, "total_tokens": 1758064} {"current_steps": 915, "total_steps": 40000, "loss": 0.2701, "lr": 4.9935613375031283e-05, "epoch": 0.14927808140957663, "percentage": 2.29, "elapsed_time": "0:11:48", "remaining_time": "8:24:04", "throughput": 2496.48, "total_tokens": 1767616} {"current_steps": 920, "total_steps": 40000, "loss": 0.1472, "lr": 4.993490730604248e-05, "epoch": 0.150093808630394, "percentage": 2.3, "elapsed_time": "0:11:50", "remaining_time": "8:22:44", "throughput": 2502.09, "total_tokens": 1776752} {"current_steps": 925, "total_steps": 40000, "loss": 0.1796, "lr": 4.993419739177761e-05, "epoch": 0.15090953585121136, "percentage": 2.31, "elapsed_time": "0:11:52", "remaining_time": "8:21:24", "throughput": 2508.93, "total_tokens": 1786800} {"current_steps": 930, "total_steps": 40000, "loss": 0.187, "lr": 4.9933483632346164e-05, "epoch": 0.15172526307202872, "percentage": 2.33, "elapsed_time": "0:11:54", "remaining_time": "8:20:05", "throughput": 2515.7, "total_tokens": 1796816} {"current_steps": 935, "total_steps": 40000, "loss": 0.2038, "lr": 4.993276602785821e-05, "epoch": 0.15254099029284607, "percentage": 2.34, "elapsed_time": "0:11:56", "remaining_time": "8:18:47", "throughput": 2523.33, "total_tokens": 1807472} {"current_steps": 940, "total_steps": 40000, "loss": 0.1414, "lr": 4.993204457842441e-05, "epoch": 0.15335671751366342, "percentage": 2.35, "elapsed_time": "0:11:58", "remaining_time": "8:17:30", "throughput": 2529.27, "total_tokens": 1816944} {"current_steps": 945, "total_steps": 40000, "loss": 0.1308, "lr": 4.993131928415602e-05, "epoch": 0.1541724447344808, "percentage": 2.36, "elapsed_time": "0:12:00", "remaining_time": "8:16:14", "throughput": 2534.84, "total_tokens": 1826208} {"current_steps": 950, "total_steps": 40000, "loss": 0.3003, "lr": 4.993059014516489e-05, "epoch": 0.15498817195529815, "percentage": 2.38, "elapsed_time": "0:12:02", "remaining_time": "8:14:58", "throughput": 2541.91, "total_tokens": 1836544} {"current_steps": 955, "total_steps": 40000, "loss": 0.1475, "lr": 4.9929857161563464e-05, "epoch": 0.1558038991761155, "percentage": 2.39, "elapsed_time": "0:12:04", "remaining_time": "8:13:44", "throughput": 2547.29, "total_tokens": 1845696} {"current_steps": 960, "total_steps": 40000, "loss": 0.1477, "lr": 4.992912033346477e-05, "epoch": 0.15661962639693286, "percentage": 2.4, "elapsed_time": "0:12:06", "remaining_time": "8:12:29", "throughput": 2555.38, "total_tokens": 1856832} {"current_steps": 965, "total_steps": 40000, "loss": 0.0967, "lr": 4.992837966098245e-05, "epoch": 0.1574353536177502, "percentage": 2.41, "elapsed_time": "0:12:08", "remaining_time": "8:11:16", "throughput": 2560.43, "total_tokens": 1865776} {"current_steps": 970, "total_steps": 40000, "loss": 0.1087, "lr": 4.992763514423071e-05, "epoch": 0.1582510808385676, "percentage": 2.43, "elapsed_time": "0:12:10", "remaining_time": "8:10:03", "throughput": 2565.42, "total_tokens": 1874704} {"current_steps": 975, "total_steps": 40000, "loss": 0.2848, "lr": 4.992688678332437e-05, "epoch": 0.15906680805938495, "percentage": 2.44, "elapsed_time": "0:12:12", "remaining_time": "8:08:51", "throughput": 2571.43, "total_tokens": 1884384} {"current_steps": 980, "total_steps": 40000, "loss": 0.0531, "lr": 4.992613457837884e-05, "epoch": 0.1598825352802023, "percentage": 2.45, "elapsed_time": "0:12:14", "remaining_time": "8:07:40", "throughput": 2577.5, "total_tokens": 1894160} {"current_steps": 985, "total_steps": 40000, "loss": 0.1993, "lr": 4.992537852951011e-05, "epoch": 0.16069826250101965, "percentage": 2.46, "elapsed_time": "0:12:16", "remaining_time": "8:06:29", "throughput": 2582.94, "total_tokens": 1903488} {"current_steps": 990, "total_steps": 40000, "loss": 0.3059, "lr": 4.9924618636834785e-05, "epoch": 0.16151398972183703, "percentage": 2.48, "elapsed_time": "0:12:19", "remaining_time": "8:05:19", "throughput": 2589.6, "total_tokens": 1913728} {"current_steps": 995, "total_steps": 40000, "loss": 0.1894, "lr": 4.9923854900470046e-05, "epoch": 0.16232971694265438, "percentage": 2.49, "elapsed_time": "0:12:21", "remaining_time": "8:04:10", "throughput": 2594.89, "total_tokens": 1922976} {"current_steps": 1000, "total_steps": 40000, "loss": 0.1766, "lr": 4.992308732053367e-05, "epoch": 0.16314544416347174, "percentage": 2.5, "elapsed_time": "0:12:23", "remaining_time": "8:03:01", "throughput": 2599.57, "total_tokens": 1931808} {"current_steps": 1000, "total_steps": 40000, "eval_loss": 0.17772261798381805, "epoch": 0.16314544416347174, "percentage": 2.5, "elapsed_time": "0:13:43", "remaining_time": "8:55:24", "throughput": 2345.25, "total_tokens": 1931808} {"current_steps": 1005, "total_steps": 40000, "loss": 0.1628, "lr": 4.992231589714402e-05, "epoch": 0.1639611713842891, "percentage": 2.51, "elapsed_time": "0:13:47", "remaining_time": "8:54:59", "throughput": 2348.11, "total_tokens": 1942560} {"current_steps": 1010, "total_steps": 40000, "loss": 0.1643, "lr": 4.992154063042007e-05, "epoch": 0.16477689860510644, "percentage": 2.53, "elapsed_time": "0:13:49", "remaining_time": "8:53:36", "throughput": 2352.28, "total_tokens": 1950912} {"current_steps": 1015, "total_steps": 40000, "loss": 0.1076, "lr": 4.992076152048136e-05, "epoch": 0.16559262582592382, "percentage": 2.54, "elapsed_time": "0:13:51", "remaining_time": "8:52:14", "throughput": 2359.4, "total_tokens": 1961696} {"current_steps": 1020, "total_steps": 40000, "loss": 0.2818, "lr": 4.991997856744807e-05, "epoch": 0.16640835304674118, "percentage": 2.55, "elapsed_time": "0:13:53", "remaining_time": "8:50:52", "throughput": 2365.05, "total_tokens": 1971280} {"current_steps": 1025, "total_steps": 40000, "loss": 0.1464, "lr": 4.9919191771440905e-05, "epoch": 0.16722408026755853, "percentage": 2.56, "elapsed_time": "0:13:55", "remaining_time": "8:49:32", "throughput": 2371.04, "total_tokens": 1981200} {"current_steps": 1030, "total_steps": 40000, "loss": 0.1527, "lr": 4.991840113258122e-05, "epoch": 0.16803980748837588, "percentage": 2.57, "elapsed_time": "0:13:57", "remaining_time": "8:48:12", "throughput": 2376.18, "total_tokens": 1990400} {"current_steps": 1035, "total_steps": 40000, "loss": 0.1042, "lr": 4.9917606650990933e-05, "epoch": 0.16885553470919323, "percentage": 2.59, "elapsed_time": "0:13:59", "remaining_time": "8:46:53", "throughput": 2381.77, "total_tokens": 2000016} {"current_steps": 1040, "total_steps": 40000, "loss": 0.1441, "lr": 4.9916808326792566e-05, "epoch": 0.16967126193001061, "percentage": 2.6, "elapsed_time": "0:14:01", "remaining_time": "8:45:34", "throughput": 2387.31, "total_tokens": 2009600} {"current_steps": 1045, "total_steps": 40000, "loss": 0.1776, "lr": 4.9916006160109235e-05, "epoch": 0.17048698915082797, "percentage": 2.61, "elapsed_time": "0:14:03", "remaining_time": "8:44:16", "throughput": 2393.43, "total_tokens": 2019696} {"current_steps": 1050, "total_steps": 40000, "loss": 0.2092, "lr": 4.991520015106464e-05, "epoch": 0.17130271637164532, "percentage": 2.62, "elapsed_time": "0:14:05", "remaining_time": "8:42:59", "throughput": 2399.53, "total_tokens": 2029776} {"current_steps": 1055, "total_steps": 40000, "loss": 0.1671, "lr": 4.991439029978308e-05, "epoch": 0.17211844359246267, "percentage": 2.64, "elapsed_time": "0:14:07", "remaining_time": "8:41:42", "throughput": 2403.07, "total_tokens": 2037728} {"current_steps": 1060, "total_steps": 40000, "loss": 0.2225, "lr": 4.9913576606389434e-05, "epoch": 0.17293417081328005, "percentage": 2.65, "elapsed_time": "0:14:10", "remaining_time": "8:40:26", "throughput": 2408.88, "total_tokens": 2047616} {"current_steps": 1065, "total_steps": 40000, "loss": 0.2792, "lr": 4.991275907100919e-05, "epoch": 0.1737498980340974, "percentage": 2.66, "elapsed_time": "0:14:12", "remaining_time": "8:39:11", "throughput": 2414.56, "total_tokens": 2057440} {"current_steps": 1070, "total_steps": 40000, "loss": 0.2697, "lr": 4.9911937693768434e-05, "epoch": 0.17456562525491476, "percentage": 2.67, "elapsed_time": "0:14:14", "remaining_time": "8:37:57", "throughput": 2419.37, "total_tokens": 2066528} {"current_steps": 1075, "total_steps": 40000, "loss": 0.1894, "lr": 4.991111247479382e-05, "epoch": 0.1753813524757321, "percentage": 2.69, "elapsed_time": "0:14:16", "remaining_time": "8:36:43", "throughput": 2424.9, "total_tokens": 2076288} {"current_steps": 1080, "total_steps": 40000, "loss": 0.2011, "lr": 4.9910283414212605e-05, "epoch": 0.17619707969654946, "percentage": 2.7, "elapsed_time": "0:14:18", "remaining_time": "8:35:30", "throughput": 2430.29, "total_tokens": 2085920} {"current_steps": 1085, "total_steps": 40000, "loss": 0.1608, "lr": 4.990945051215265e-05, "epoch": 0.17701280691736684, "percentage": 2.71, "elapsed_time": "0:14:20", "remaining_time": "8:34:18", "throughput": 2435.42, "total_tokens": 2095344} {"current_steps": 1090, "total_steps": 40000, "loss": 0.1176, "lr": 4.99086137687424e-05, "epoch": 0.1778285341381842, "percentage": 2.73, "elapsed_time": "0:14:22", "remaining_time": "8:33:06", "throughput": 2440.68, "total_tokens": 2104928} {"current_steps": 1095, "total_steps": 40000, "loss": 0.2544, "lr": 4.9907773184110874e-05, "epoch": 0.17864426135900155, "percentage": 2.74, "elapsed_time": "0:14:24", "remaining_time": "8:31:55", "throughput": 2448.86, "total_tokens": 2117072} {"current_steps": 1100, "total_steps": 40000, "loss": 0.3657, "lr": 4.9906928758387715e-05, "epoch": 0.1794599885798189, "percentage": 2.75, "elapsed_time": "0:14:26", "remaining_time": "8:30:45", "throughput": 2454.08, "total_tokens": 2126640} {"current_steps": 1105, "total_steps": 40000, "loss": 0.1765, "lr": 4.9906080491703146e-05, "epoch": 0.18027571580063625, "percentage": 2.76, "elapsed_time": "0:14:28", "remaining_time": "8:29:35", "throughput": 2459.14, "total_tokens": 2136112} {"current_steps": 1110, "total_steps": 40000, "loss": 0.2221, "lr": 4.990522838418797e-05, "epoch": 0.18109144302145364, "percentage": 2.77, "elapsed_time": "0:14:30", "remaining_time": "8:28:26", "throughput": 2464.5, "total_tokens": 2145904} {"current_steps": 1115, "total_steps": 40000, "loss": 0.2205, "lr": 4.9904372435973604e-05, "epoch": 0.181907170242271, "percentage": 2.79, "elapsed_time": "0:14:32", "remaining_time": "8:27:18", "throughput": 2471.43, "total_tokens": 2157072} {"current_steps": 1120, "total_steps": 40000, "loss": 0.1354, "lr": 4.990351264719203e-05, "epoch": 0.18272289746308834, "percentage": 2.8, "elapsed_time": "0:14:34", "remaining_time": "8:26:10", "throughput": 2476.52, "total_tokens": 2166624} {"current_steps": 1125, "total_steps": 40000, "loss": 0.1545, "lr": 4.990264901797586e-05, "epoch": 0.1835386246839057, "percentage": 2.81, "elapsed_time": "0:14:36", "remaining_time": "8:25:02", "throughput": 2481.65, "total_tokens": 2176224} {"current_steps": 1130, "total_steps": 40000, "loss": 0.1358, "lr": 4.990178154845826e-05, "epoch": 0.18435435190472307, "percentage": 2.83, "elapsed_time": "0:14:38", "remaining_time": "8:23:55", "throughput": 2486.76, "total_tokens": 2185856} {"current_steps": 1135, "total_steps": 40000, "loss": 0.1975, "lr": 4.9900910238773014e-05, "epoch": 0.18517007912554043, "percentage": 2.84, "elapsed_time": "0:14:41", "remaining_time": "8:22:49", "throughput": 2490.87, "total_tokens": 2194608} {"current_steps": 1140, "total_steps": 40000, "loss": 0.1583, "lr": 4.990003508905448e-05, "epoch": 0.18598580634635778, "percentage": 2.85, "elapsed_time": "0:14:43", "remaining_time": "8:21:43", "throughput": 2494.9, "total_tokens": 2203312} {"current_steps": 1145, "total_steps": 40000, "loss": 0.1189, "lr": 4.989915609943763e-05, "epoch": 0.18680153356717513, "percentage": 2.86, "elapsed_time": "0:14:45", "remaining_time": "8:20:38", "throughput": 2501.18, "total_tokens": 2214048} {"current_steps": 1150, "total_steps": 40000, "loss": 0.158, "lr": 4.9898273270058e-05, "epoch": 0.18761726078799248, "percentage": 2.88, "elapsed_time": "0:14:47", "remaining_time": "8:19:34", "throughput": 2504.35, "total_tokens": 2222032} {"current_steps": 1155, "total_steps": 40000, "loss": 0.1914, "lr": 4.989738660105174e-05, "epoch": 0.18843298800880987, "percentage": 2.89, "elapsed_time": "0:14:49", "remaining_time": "8:18:30", "throughput": 2508.88, "total_tokens": 2231232} {"current_steps": 1160, "total_steps": 40000, "loss": 0.255, "lr": 4.989649609255559e-05, "epoch": 0.18924871522962722, "percentage": 2.9, "elapsed_time": "0:14:51", "remaining_time": "8:17:26", "throughput": 2513.05, "total_tokens": 2240128} {"current_steps": 1165, "total_steps": 40000, "loss": 0.1206, "lr": 4.989560174470687e-05, "epoch": 0.19006444245044457, "percentage": 2.91, "elapsed_time": "0:14:53", "remaining_time": "8:16:23", "throughput": 2518.32, "total_tokens": 2250016} {"current_steps": 1170, "total_steps": 40000, "loss": 0.1787, "lr": 4.989470355764351e-05, "epoch": 0.19088016967126192, "percentage": 2.93, "elapsed_time": "0:14:55", "remaining_time": "8:15:20", "throughput": 2523.9, "total_tokens": 2260192} {"current_steps": 1175, "total_steps": 40000, "loss": 0.1302, "lr": 4.9893801531504e-05, "epoch": 0.19169589689207928, "percentage": 2.94, "elapsed_time": "0:14:57", "remaining_time": "8:14:18", "throughput": 2529.35, "total_tokens": 2270288} {"current_steps": 1180, "total_steps": 40000, "loss": 0.1692, "lr": 4.9892895666427475e-05, "epoch": 0.19251162411289666, "percentage": 2.95, "elapsed_time": "0:14:59", "remaining_time": "8:13:16", "throughput": 2534.48, "total_tokens": 2280112} {"current_steps": 1185, "total_steps": 40000, "loss": 0.2129, "lr": 4.9891985962553606e-05, "epoch": 0.193327351333714, "percentage": 2.96, "elapsed_time": "0:15:01", "remaining_time": "8:12:15", "throughput": 2539.79, "total_tokens": 2290160} {"current_steps": 1190, "total_steps": 40000, "loss": 0.2249, "lr": 4.989107242002269e-05, "epoch": 0.19414307855453136, "percentage": 2.97, "elapsed_time": "0:15:03", "remaining_time": "8:11:15", "throughput": 2543.3, "total_tokens": 2298560} {"current_steps": 1195, "total_steps": 40000, "loss": 0.0919, "lr": 4.989015503897561e-05, "epoch": 0.19495880577534871, "percentage": 2.99, "elapsed_time": "0:15:05", "remaining_time": "8:10:14", "throughput": 2547.17, "total_tokens": 2307312} {"current_steps": 1200, "total_steps": 40000, "loss": 0.163, "lr": 4.988923381955383e-05, "epoch": 0.1957745329961661, "percentage": 3.0, "elapsed_time": "0:15:07", "remaining_time": "8:09:15", "throughput": 2550.67, "total_tokens": 2315744} {"current_steps": 1200, "total_steps": 40000, "eval_loss": 0.1670651137828827, "epoch": 0.1957745329961661, "percentage": 3.0, "elapsed_time": "0:16:28", "remaining_time": "8:52:43", "throughput": 2342.57, "total_tokens": 2315744} {"current_steps": 1205, "total_steps": 40000, "loss": 0.2159, "lr": 4.988830876189942e-05, "epoch": 0.19659026021698345, "percentage": 3.01, "elapsed_time": "0:16:32", "remaining_time": "8:52:24", "throughput": 2343.7, "total_tokens": 2325456} {"current_steps": 1210, "total_steps": 40000, "loss": 0.1526, "lr": 4.988737986615503e-05, "epoch": 0.1974059874378008, "percentage": 3.02, "elapsed_time": "0:16:34", "remaining_time": "8:51:15", "throughput": 2348.15, "total_tokens": 2334768} {"current_steps": 1215, "total_steps": 40000, "loss": 0.1153, "lr": 4.988644713246391e-05, "epoch": 0.19822171465861815, "percentage": 3.04, "elapsed_time": "0:16:36", "remaining_time": "8:50:05", "throughput": 2353.78, "total_tokens": 2345232} {"current_steps": 1220, "total_steps": 40000, "loss": 0.1224, "lr": 4.988551056096991e-05, "epoch": 0.1990374418794355, "percentage": 3.05, "elapsed_time": "0:16:38", "remaining_time": "8:48:57", "throughput": 2359.12, "total_tokens": 2355424} {"current_steps": 1225, "total_steps": 40000, "loss": 0.2857, "lr": 4.988457015181743e-05, "epoch": 0.1998531691002529, "percentage": 3.06, "elapsed_time": "0:16:40", "remaining_time": "8:47:48", "throughput": 2363.39, "total_tokens": 2364576} {"current_steps": 1230, "total_steps": 40000, "loss": 0.2914, "lr": 4.988362590515153e-05, "epoch": 0.20066889632107024, "percentage": 3.08, "elapsed_time": "0:16:42", "remaining_time": "8:46:41", "throughput": 2369.42, "total_tokens": 2375488} {"current_steps": 1235, "total_steps": 40000, "loss": 0.3731, "lr": 4.9882677821117805e-05, "epoch": 0.2014846235418876, "percentage": 3.09, "elapsed_time": "0:16:44", "remaining_time": "8:45:33", "throughput": 2375.28, "total_tokens": 2386272} {"current_steps": 1240, "total_steps": 40000, "loss": 0.1389, "lr": 4.988172589986246e-05, "epoch": 0.20230035076270494, "percentage": 3.1, "elapsed_time": "0:16:46", "remaining_time": "8:44:27", "throughput": 2380.0, "total_tokens": 2395952} {"current_steps": 1245, "total_steps": 40000, "loss": 0.1366, "lr": 4.9880770141532304e-05, "epoch": 0.2031160779835223, "percentage": 3.11, "elapsed_time": "0:16:48", "remaining_time": "8:43:21", "throughput": 2384.05, "total_tokens": 2404944} {"current_steps": 1250, "total_steps": 40000, "loss": 0.1825, "lr": 4.987981054627472e-05, "epoch": 0.20393180520433968, "percentage": 3.12, "elapsed_time": "0:16:50", "remaining_time": "8:42:15", "throughput": 2388.8, "total_tokens": 2414656} {"current_steps": 1255, "total_steps": 40000, "loss": 0.3315, "lr": 4.987884711423769e-05, "epoch": 0.20474753242515703, "percentage": 3.14, "elapsed_time": "0:16:52", "remaining_time": "8:41:10", "throughput": 2392.62, "total_tokens": 2423456} {"current_steps": 1260, "total_steps": 40000, "loss": 0.1648, "lr": 4.9877879845569784e-05, "epoch": 0.20556325964597438, "percentage": 3.15, "elapsed_time": "0:16:54", "remaining_time": "8:40:05", "throughput": 2398.5, "total_tokens": 2434368} {"current_steps": 1265, "total_steps": 40000, "loss": 0.2243, "lr": 4.9876908740420175e-05, "epoch": 0.20637898686679174, "percentage": 3.16, "elapsed_time": "0:16:57", "remaining_time": "8:39:01", "throughput": 2403.34, "total_tokens": 2444240} {"current_steps": 1270, "total_steps": 40000, "loss": 0.1922, "lr": 4.987593379893861e-05, "epoch": 0.20719471408760912, "percentage": 3.17, "elapsed_time": "0:16:59", "remaining_time": "8:37:57", "throughput": 2407.52, "total_tokens": 2453456} {"current_steps": 1275, "total_steps": 40000, "loss": 0.2116, "lr": 4.987495502127545e-05, "epoch": 0.20801044130842647, "percentage": 3.19, "elapsed_time": "0:17:01", "remaining_time": "8:36:55", "throughput": 2412.46, "total_tokens": 2463504} {"current_steps": 1280, "total_steps": 40000, "loss": 0.1721, "lr": 4.987397240758162e-05, "epoch": 0.20882616852924382, "percentage": 3.2, "elapsed_time": "0:17:03", "remaining_time": "8:35:52", "throughput": 2417.28, "total_tokens": 2473408} {"current_steps": 1285, "total_steps": 40000, "loss": 0.2366, "lr": 4.9872985958008664e-05, "epoch": 0.20964189575006117, "percentage": 3.21, "elapsed_time": "0:17:05", "remaining_time": "8:34:50", "throughput": 2421.62, "total_tokens": 2482832} {"current_steps": 1290, "total_steps": 40000, "loss": 0.1015, "lr": 4.987199567270871e-05, "epoch": 0.21045762297087853, "percentage": 3.23, "elapsed_time": "0:17:07", "remaining_time": "8:33:48", "throughput": 2426.38, "total_tokens": 2492720} {"current_steps": 1295, "total_steps": 40000, "loss": 0.1498, "lr": 4.9871001551834444e-05, "epoch": 0.2112733501916959, "percentage": 3.24, "elapsed_time": "0:17:09", "remaining_time": "8:32:46", "throughput": 2431.63, "total_tokens": 2503136} {"current_steps": 1300, "total_steps": 40000, "loss": 0.1634, "lr": 4.98700035955392e-05, "epoch": 0.21208907741251326, "percentage": 3.25, "elapsed_time": "0:17:11", "remaining_time": "8:31:46", "throughput": 2434.94, "total_tokens": 2511568} {"current_steps": 1305, "total_steps": 40000, "loss": 0.1908, "lr": 4.986900180397686e-05, "epoch": 0.2129048046333306, "percentage": 3.26, "elapsed_time": "0:17:13", "remaining_time": "8:30:45", "throughput": 2440.42, "total_tokens": 2522256} {"current_steps": 1310, "total_steps": 40000, "loss": 0.3022, "lr": 4.9867996177301926e-05, "epoch": 0.21372053185414797, "percentage": 3.28, "elapsed_time": "0:17:15", "remaining_time": "8:29:45", "throughput": 2444.96, "total_tokens": 2532016} {"current_steps": 1315, "total_steps": 40000, "loss": 0.1208, "lr": 4.9866986715669464e-05, "epoch": 0.21453625907496532, "percentage": 3.29, "elapsed_time": "0:17:17", "remaining_time": "8:28:46", "throughput": 2448.56, "total_tokens": 2540800} {"current_steps": 1320, "total_steps": 40000, "loss": 0.1023, "lr": 4.9865973419235155e-05, "epoch": 0.2153519862957827, "percentage": 3.3, "elapsed_time": "0:17:19", "remaining_time": "8:27:47", "throughput": 2453.69, "total_tokens": 2551200} {"current_steps": 1325, "total_steps": 40000, "loss": 0.1223, "lr": 4.986495628815526e-05, "epoch": 0.21616771351660005, "percentage": 3.31, "elapsed_time": "0:17:21", "remaining_time": "8:26:48", "throughput": 2458.54, "total_tokens": 2561312} {"current_steps": 1330, "total_steps": 40000, "loss": 0.2117, "lr": 4.986393532258663e-05, "epoch": 0.2169834407374174, "percentage": 3.33, "elapsed_time": "0:17:23", "remaining_time": "8:25:50", "throughput": 2462.73, "total_tokens": 2570784} {"current_steps": 1335, "total_steps": 40000, "loss": 0.0897, "lr": 4.986291052268671e-05, "epoch": 0.21779916795823476, "percentage": 3.34, "elapsed_time": "0:17:25", "remaining_time": "8:24:53", "throughput": 2467.79, "total_tokens": 2581168} {"current_steps": 1340, "total_steps": 40000, "loss": 0.1549, "lr": 4.986188188861355e-05, "epoch": 0.21861489517905214, "percentage": 3.35, "elapsed_time": "0:17:28", "remaining_time": "8:23:55", "throughput": 2471.75, "total_tokens": 2590416} {"current_steps": 1345, "total_steps": 40000, "loss": 0.0621, "lr": 4.9860849420525766e-05, "epoch": 0.2194306223998695, "percentage": 3.36, "elapsed_time": "0:17:30", "remaining_time": "8:22:59", "throughput": 2476.99, "total_tokens": 2601056} {"current_steps": 1350, "total_steps": 40000, "loss": 0.3058, "lr": 4.9859813118582575e-05, "epoch": 0.22024634962068684, "percentage": 3.38, "elapsed_time": "0:17:32", "remaining_time": "8:22:02", "throughput": 2481.71, "total_tokens": 2611152} {"current_steps": 1355, "total_steps": 40000, "loss": 0.0967, "lr": 4.98587729829438e-05, "epoch": 0.2210620768415042, "percentage": 3.39, "elapsed_time": "0:17:34", "remaining_time": "8:21:06", "throughput": 2485.47, "total_tokens": 2620240} {"current_steps": 1360, "total_steps": 40000, "loss": 0.1434, "lr": 4.985772901376983e-05, "epoch": 0.22187780406232155, "percentage": 3.4, "elapsed_time": "0:17:36", "remaining_time": "8:20:10", "throughput": 2490.72, "total_tokens": 2630912} {"current_steps": 1365, "total_steps": 40000, "loss": 0.2058, "lr": 4.9856681211221666e-05, "epoch": 0.22269353128313893, "percentage": 3.41, "elapsed_time": "0:17:38", "remaining_time": "8:19:15", "throughput": 2494.98, "total_tokens": 2640560} {"current_steps": 1370, "total_steps": 40000, "loss": 0.2436, "lr": 4.985562957546089e-05, "epoch": 0.22350925850395628, "percentage": 3.43, "elapsed_time": "0:17:40", "remaining_time": "8:18:20", "throughput": 2499.85, "total_tokens": 2650864} {"current_steps": 1375, "total_steps": 40000, "loss": 0.1747, "lr": 4.9854574106649686e-05, "epoch": 0.22432498572477363, "percentage": 3.44, "elapsed_time": "0:17:42", "remaining_time": "8:17:25", "throughput": 2505.39, "total_tokens": 2661920} {"current_steps": 1380, "total_steps": 40000, "loss": 0.142, "lr": 4.985351480495081e-05, "epoch": 0.225140712945591, "percentage": 3.45, "elapsed_time": "0:17:44", "remaining_time": "8:16:31", "throughput": 2509.68, "total_tokens": 2671648} {"current_steps": 1385, "total_steps": 40000, "loss": 0.1122, "lr": 4.985245167052762e-05, "epoch": 0.22595644016640834, "percentage": 3.46, "elapsed_time": "0:17:46", "remaining_time": "8:15:37", "throughput": 2514.06, "total_tokens": 2681520} {"current_steps": 1390, "total_steps": 40000, "loss": 0.1388, "lr": 4.9851384703544066e-05, "epoch": 0.22677216738722572, "percentage": 3.48, "elapsed_time": "0:17:48", "remaining_time": "8:14:44", "throughput": 2517.83, "total_tokens": 2690720} {"current_steps": 1395, "total_steps": 40000, "loss": 0.1684, "lr": 4.985031390416469e-05, "epoch": 0.22758789460804307, "percentage": 3.49, "elapsed_time": "0:17:50", "remaining_time": "8:13:51", "throughput": 2522.85, "total_tokens": 2701280} {"current_steps": 1400, "total_steps": 40000, "loss": 0.1673, "lr": 4.984923927255461e-05, "epoch": 0.22840362182886043, "percentage": 3.5, "elapsed_time": "0:17:52", "remaining_time": "8:12:58", "throughput": 2526.32, "total_tokens": 2710208} {"current_steps": 1400, "total_steps": 40000, "eval_loss": 0.15960896015167236, "epoch": 0.22840362182886043, "percentage": 3.5, "elapsed_time": "0:19:13", "remaining_time": "8:50:00", "throughput": 2349.78, "total_tokens": 2710208} {"current_steps": 1405, "total_steps": 40000, "loss": 0.1826, "lr": 4.984816080887958e-05, "epoch": 0.22921934904967778, "percentage": 3.51, "elapsed_time": "0:19:16", "remaining_time": "8:49:41", "throughput": 2352.37, "total_tokens": 2721632} {"current_steps": 1410, "total_steps": 40000, "loss": 0.1716, "lr": 4.9847078513305875e-05, "epoch": 0.23003507627049516, "percentage": 3.52, "elapsed_time": "0:19:19", "remaining_time": "8:48:41", "throughput": 2356.26, "total_tokens": 2731008} {"current_steps": 1415, "total_steps": 40000, "loss": 0.1045, "lr": 4.984599238600043e-05, "epoch": 0.2308508034913125, "percentage": 3.54, "elapsed_time": "0:19:21", "remaining_time": "8:47:41", "throughput": 2359.93, "total_tokens": 2740128} {"current_steps": 1420, "total_steps": 40000, "loss": 0.2998, "lr": 4.9844902427130716e-05, "epoch": 0.23166653071212986, "percentage": 3.55, "elapsed_time": "0:19:23", "remaining_time": "8:46:42", "throughput": 2363.66, "total_tokens": 2749328} {"current_steps": 1425, "total_steps": 40000, "loss": 0.3173, "lr": 4.984380863686482e-05, "epoch": 0.23248225793294722, "percentage": 3.56, "elapsed_time": "0:19:25", "remaining_time": "8:45:43", "throughput": 2366.44, "total_tokens": 2757456} {"current_steps": 1430, "total_steps": 40000, "loss": 0.2083, "lr": 4.984271101537143e-05, "epoch": 0.23329798515376457, "percentage": 3.57, "elapsed_time": "0:19:27", "remaining_time": "8:44:44", "throughput": 2368.89, "total_tokens": 2765200} {"current_steps": 1435, "total_steps": 40000, "loss": 0.2005, "lr": 4.9841609562819816e-05, "epoch": 0.23411371237458195, "percentage": 3.59, "elapsed_time": "0:19:29", "remaining_time": "8:43:46", "throughput": 2373.15, "total_tokens": 2775072} {"current_steps": 1440, "total_steps": 40000, "loss": 0.1568, "lr": 4.984050427937983e-05, "epoch": 0.2349294395953993, "percentage": 3.6, "elapsed_time": "0:19:31", "remaining_time": "8:42:48", "throughput": 2377.32, "total_tokens": 2784896} {"current_steps": 1445, "total_steps": 40000, "loss": 0.1551, "lr": 4.983939516522191e-05, "epoch": 0.23574516681621666, "percentage": 3.61, "elapsed_time": "0:19:33", "remaining_time": "8:41:50", "throughput": 2381.14, "total_tokens": 2794272} {"current_steps": 1450, "total_steps": 40000, "loss": 0.1768, "lr": 4.983828222051711e-05, "epoch": 0.236560894037034, "percentage": 3.62, "elapsed_time": "0:19:35", "remaining_time": "8:40:53", "throughput": 2384.44, "total_tokens": 2803072} {"current_steps": 1455, "total_steps": 40000, "loss": 0.1594, "lr": 4.983716544543705e-05, "epoch": 0.23737662125785136, "percentage": 3.64, "elapsed_time": "0:19:37", "remaining_time": "8:39:57", "throughput": 2388.3, "total_tokens": 2812528} {"current_steps": 1460, "total_steps": 40000, "loss": 0.1251, "lr": 4.983604484015395e-05, "epoch": 0.23819234847866874, "percentage": 3.65, "elapsed_time": "0:19:39", "remaining_time": "8:39:00", "throughput": 2392.55, "total_tokens": 2822464} {"current_steps": 1465, "total_steps": 40000, "loss": 0.2847, "lr": 4.983492040484064e-05, "epoch": 0.2390080756994861, "percentage": 3.66, "elapsed_time": "0:19:41", "remaining_time": "8:38:04", "throughput": 2397.12, "total_tokens": 2832832} {"current_steps": 1470, "total_steps": 40000, "loss": 0.1608, "lr": 4.98337921396705e-05, "epoch": 0.23982380292030345, "percentage": 3.67, "elapsed_time": "0:19:43", "remaining_time": "8:37:09", "throughput": 2402.64, "total_tokens": 2844336} {"current_steps": 1475, "total_steps": 40000, "loss": 0.2354, "lr": 4.983266004481753e-05, "epoch": 0.2406395301411208, "percentage": 3.69, "elapsed_time": "0:19:45", "remaining_time": "8:36:14", "throughput": 2406.73, "total_tokens": 2854176} {"current_steps": 1480, "total_steps": 40000, "loss": 0.2694, "lr": 4.9831524120456316e-05, "epoch": 0.24145525736193818, "percentage": 3.7, "elapsed_time": "0:19:47", "remaining_time": "8:35:19", "throughput": 2410.72, "total_tokens": 2863888} {"current_steps": 1485, "total_steps": 40000, "loss": 0.1234, "lr": 4.9830384366762026e-05, "epoch": 0.24227098458275553, "percentage": 3.71, "elapsed_time": "0:19:50", "remaining_time": "8:34:24", "throughput": 2413.9, "total_tokens": 2872640} {"current_steps": 1490, "total_steps": 40000, "loss": 0.2559, "lr": 4.9829240783910436e-05, "epoch": 0.24308671180357289, "percentage": 3.72, "elapsed_time": "0:19:52", "remaining_time": "8:33:30", "throughput": 2417.13, "total_tokens": 2881488} {"current_steps": 1495, "total_steps": 40000, "loss": 0.2511, "lr": 4.982809337207789e-05, "epoch": 0.24390243902439024, "percentage": 3.74, "elapsed_time": "0:19:54", "remaining_time": "8:32:36", "throughput": 2422.04, "total_tokens": 2892336} {"current_steps": 1500, "total_steps": 40000, "loss": 0.2359, "lr": 4.9826942131441337e-05, "epoch": 0.2447181662452076, "percentage": 3.75, "elapsed_time": "0:19:56", "remaining_time": "8:31:43", "throughput": 2425.6, "total_tokens": 2901600} {"current_steps": 1505, "total_steps": 40000, "loss": 0.1698, "lr": 4.9825787062178315e-05, "epoch": 0.24553389346602497, "percentage": 3.76, "elapsed_time": "0:19:58", "remaining_time": "8:30:50", "throughput": 2429.82, "total_tokens": 2911664} {"current_steps": 1510, "total_steps": 40000, "loss": 0.1691, "lr": 4.9824628164466945e-05, "epoch": 0.24634962068684232, "percentage": 3.77, "elapsed_time": "0:20:00", "remaining_time": "8:29:58", "throughput": 2433.35, "total_tokens": 2920976} {"current_steps": 1515, "total_steps": 40000, "loss": 0.2693, "lr": 4.982346543848595e-05, "epoch": 0.24716534790765968, "percentage": 3.79, "elapsed_time": "0:20:02", "remaining_time": "8:29:05", "throughput": 2437.4, "total_tokens": 2930864} {"current_steps": 1520, "total_steps": 40000, "loss": 0.1767, "lr": 4.9822298884414626e-05, "epoch": 0.24798107512847703, "percentage": 3.8, "elapsed_time": "0:20:04", "remaining_time": "8:28:13", "throughput": 2441.59, "total_tokens": 2940944} {"current_steps": 1525, "total_steps": 40000, "loss": 0.155, "lr": 4.982112850243288e-05, "epoch": 0.24879680234929438, "percentage": 3.81, "elapsed_time": "0:20:06", "remaining_time": "8:27:28", "throughput": 2445.01, "total_tokens": 2950768} {"current_steps": 1530, "total_steps": 40000, "loss": 0.2191, "lr": 4.98199542927212e-05, "epoch": 0.24961252957011176, "percentage": 3.82, "elapsed_time": "0:20:08", "remaining_time": "8:26:36", "throughput": 2448.15, "total_tokens": 2959600} {"current_steps": 1535, "total_steps": 40000, "loss": 0.2092, "lr": 4.981877625546066e-05, "epoch": 0.2504282567909291, "percentage": 3.84, "elapsed_time": "0:20:10", "remaining_time": "8:25:45", "throughput": 2452.56, "total_tokens": 2970000} {"current_steps": 1540, "total_steps": 40000, "loss": 0.1298, "lr": 4.981759439083293e-05, "epoch": 0.25124398401174647, "percentage": 3.85, "elapsed_time": "0:20:13", "remaining_time": "8:24:54", "throughput": 2457.17, "total_tokens": 2980640} {"current_steps": 1545, "total_steps": 40000, "loss": 0.1309, "lr": 4.981640869902027e-05, "epoch": 0.2520597112325638, "percentage": 3.86, "elapsed_time": "0:20:15", "remaining_time": "8:24:04", "throughput": 2461.04, "total_tokens": 2990448} {"current_steps": 1550, "total_steps": 40000, "loss": 0.1576, "lr": 4.9815219180205517e-05, "epoch": 0.2528754384533812, "percentage": 3.88, "elapsed_time": "0:20:17", "remaining_time": "8:23:14", "throughput": 2464.35, "total_tokens": 2999568} {"current_steps": 1555, "total_steps": 40000, "loss": 0.1545, "lr": 4.9814025834572126e-05, "epoch": 0.2536911656741985, "percentage": 3.89, "elapsed_time": "0:20:19", "remaining_time": "8:22:24", "throughput": 2468.03, "total_tokens": 3009136} {"current_steps": 1560, "total_steps": 40000, "loss": 0.0864, "lr": 4.981282866230411e-05, "epoch": 0.2545068928950159, "percentage": 3.9, "elapsed_time": "0:20:21", "remaining_time": "8:21:34", "throughput": 2471.33, "total_tokens": 3018272} {"current_steps": 1565, "total_steps": 40000, "loss": 0.3156, "lr": 4.981162766358611e-05, "epoch": 0.2553226201158333, "percentage": 3.91, "elapsed_time": "0:20:23", "remaining_time": "8:20:45", "throughput": 2475.0, "total_tokens": 3027856} {"current_steps": 1570, "total_steps": 40000, "loss": 0.1742, "lr": 4.9810422838603316e-05, "epoch": 0.25613834733665064, "percentage": 3.92, "elapsed_time": "0:20:25", "remaining_time": "8:19:55", "throughput": 2478.79, "total_tokens": 3037584} {"current_steps": 1575, "total_steps": 40000, "loss": 0.0834, "lr": 4.9809214187541533e-05, "epoch": 0.256954074557468, "percentage": 3.94, "elapsed_time": "0:20:27", "remaining_time": "8:19:06", "throughput": 2481.37, "total_tokens": 3045856} {"current_steps": 1580, "total_steps": 40000, "loss": 0.066, "lr": 4.980800171058715e-05, "epoch": 0.25776980177828535, "percentage": 3.95, "elapsed_time": "0:20:29", "remaining_time": "8:18:18", "throughput": 2483.73, "total_tokens": 3053872} {"current_steps": 1585, "total_steps": 40000, "loss": 0.1909, "lr": 4.980678540792715e-05, "epoch": 0.2585855289991027, "percentage": 3.96, "elapsed_time": "0:20:31", "remaining_time": "8:17:30", "throughput": 2488.22, "total_tokens": 3064560} {"current_steps": 1590, "total_steps": 40000, "loss": 0.1509, "lr": 4.980556527974909e-05, "epoch": 0.25940125621992005, "percentage": 3.98, "elapsed_time": "0:20:33", "remaining_time": "8:16:42", "throughput": 2492.85, "total_tokens": 3075408} {"current_steps": 1595, "total_steps": 40000, "loss": 0.2524, "lr": 4.980434132624114e-05, "epoch": 0.2602169834407374, "percentage": 3.99, "elapsed_time": "0:20:35", "remaining_time": "8:15:54", "throughput": 2497.47, "total_tokens": 3086240} {"current_steps": 1600, "total_steps": 40000, "loss": 0.1716, "lr": 4.980311354759205e-05, "epoch": 0.26103271066155476, "percentage": 4.0, "elapsed_time": "0:20:37", "remaining_time": "8:15:07", "throughput": 2500.56, "total_tokens": 3095216} {"current_steps": 1600, "total_steps": 40000, "eval_loss": 0.16884565353393555, "epoch": 0.26103271066155476, "percentage": 4.0, "elapsed_time": "0:21:58", "remaining_time": "8:47:18", "throughput": 2347.91, "total_tokens": 3095216} {"current_steps": 1605, "total_steps": 40000, "loss": 0.0563, "lr": 4.980188194399116e-05, "epoch": 0.2618484378823721, "percentage": 4.01, "elapsed_time": "0:22:02", "remaining_time": "8:47:05", "throughput": 2348.72, "total_tokens": 3105056} {"current_steps": 1610, "total_steps": 40000, "loss": 0.2176, "lr": 4.9800646515628384e-05, "epoch": 0.2626641651031895, "percentage": 4.03, "elapsed_time": "0:22:04", "remaining_time": "8:46:13", "throughput": 2353.33, "total_tokens": 3116064} {"current_steps": 1615, "total_steps": 40000, "loss": 0.1024, "lr": 4.979940726269426e-05, "epoch": 0.26347989232400687, "percentage": 4.04, "elapsed_time": "0:22:06", "remaining_time": "8:45:20", "throughput": 2357.37, "total_tokens": 3126272} {"current_steps": 1620, "total_steps": 40000, "loss": 0.3802, "lr": 4.979816418537988e-05, "epoch": 0.2642956195448242, "percentage": 4.05, "elapsed_time": "0:22:08", "remaining_time": "8:44:27", "throughput": 2360.68, "total_tokens": 3135520} {"current_steps": 1625, "total_steps": 40000, "loss": 0.2414, "lr": 4.979691728387696e-05, "epoch": 0.2651113467656416, "percentage": 4.06, "elapsed_time": "0:22:10", "remaining_time": "8:43:35", "throughput": 2364.65, "total_tokens": 3145680} {"current_steps": 1630, "total_steps": 40000, "loss": 0.2088, "lr": 4.979566655837776e-05, "epoch": 0.26592707398645893, "percentage": 4.08, "elapsed_time": "0:22:12", "remaining_time": "8:42:43", "throughput": 2368.75, "total_tokens": 3156032} {"current_steps": 1635, "total_steps": 40000, "loss": 0.1595, "lr": 4.9794412009075184e-05, "epoch": 0.2667428012072763, "percentage": 4.09, "elapsed_time": "0:22:14", "remaining_time": "8:41:52", "throughput": 2371.74, "total_tokens": 3164912} {"current_steps": 1640, "total_steps": 40000, "loss": 0.1067, "lr": 4.979315363616269e-05, "epoch": 0.26755852842809363, "percentage": 4.1, "elapsed_time": "0:22:16", "remaining_time": "8:41:01", "throughput": 2375.65, "total_tokens": 3175056} {"current_steps": 1645, "total_steps": 40000, "loss": 0.1238, "lr": 4.979189143983434e-05, "epoch": 0.268374255648911, "percentage": 4.11, "elapsed_time": "0:22:18", "remaining_time": "8:40:10", "throughput": 2379.4, "total_tokens": 3184976} {"current_steps": 1650, "total_steps": 40000, "loss": 0.1339, "lr": 4.979062542028478e-05, "epoch": 0.26918998286972834, "percentage": 4.12, "elapsed_time": "0:22:20", "remaining_time": "8:39:19", "throughput": 2383.64, "total_tokens": 3195552} {"current_steps": 1655, "total_steps": 40000, "loss": 0.1481, "lr": 4.978935557770923e-05, "epoch": 0.27000571009054575, "percentage": 4.14, "elapsed_time": "0:22:22", "remaining_time": "8:38:28", "throughput": 2387.24, "total_tokens": 3205312} {"current_steps": 1660, "total_steps": 40000, "loss": 0.1049, "lr": 4.978808191230353e-05, "epoch": 0.2708214373113631, "percentage": 4.15, "elapsed_time": "0:22:24", "remaining_time": "8:37:38", "throughput": 2390.94, "total_tokens": 3215216} {"current_steps": 1665, "total_steps": 40000, "loss": 0.2217, "lr": 4.9786804424264085e-05, "epoch": 0.27163716453218045, "percentage": 4.16, "elapsed_time": "0:22:26", "remaining_time": "8:36:48", "throughput": 2394.57, "total_tokens": 3225024} {"current_steps": 1670, "total_steps": 40000, "loss": 0.3031, "lr": 4.978552311378792e-05, "epoch": 0.2724528917529978, "percentage": 4.17, "elapsed_time": "0:22:28", "remaining_time": "8:35:59", "throughput": 2397.6, "total_tokens": 3234032} {"current_steps": 1675, "total_steps": 40000, "loss": 0.2426, "lr": 4.978423798107261e-05, "epoch": 0.27326861897381516, "percentage": 4.19, "elapsed_time": "0:22:30", "remaining_time": "8:35:10", "throughput": 2401.39, "total_tokens": 3244160} {"current_steps": 1680, "total_steps": 40000, "loss": 0.2013, "lr": 4.978294902631635e-05, "epoch": 0.2740843461946325, "percentage": 4.2, "elapsed_time": "0:22:33", "remaining_time": "8:34:21", "throughput": 2405.11, "total_tokens": 3254144} {"current_steps": 1685, "total_steps": 40000, "loss": 0.2164, "lr": 4.9781656249717914e-05, "epoch": 0.27490007341544986, "percentage": 4.21, "elapsed_time": "0:22:35", "remaining_time": "8:33:32", "throughput": 2409.02, "total_tokens": 3264400} {"current_steps": 1690, "total_steps": 40000, "loss": 0.2439, "lr": 4.9780359651476645e-05, "epoch": 0.2757158006362672, "percentage": 4.23, "elapsed_time": "0:22:37", "remaining_time": "8:32:44", "throughput": 2412.03, "total_tokens": 3273456} {"current_steps": 1695, "total_steps": 40000, "loss": 0.1545, "lr": 4.977905923179251e-05, "epoch": 0.27653152785708457, "percentage": 4.24, "elapsed_time": "0:22:39", "remaining_time": "8:31:56", "throughput": 2415.89, "total_tokens": 3283680} {"current_steps": 1700, "total_steps": 40000, "loss": 0.1596, "lr": 4.977775499086606e-05, "epoch": 0.2773472550779019, "percentage": 4.25, "elapsed_time": "0:22:41", "remaining_time": "8:31:08", "throughput": 2419.04, "total_tokens": 3292960} {"current_steps": 1705, "total_steps": 40000, "loss": 0.1284, "lr": 4.97764469288984e-05, "epoch": 0.27816298229871933, "percentage": 4.26, "elapsed_time": "0:22:43", "remaining_time": "8:30:21", "throughput": 2422.11, "total_tokens": 3302160} {"current_steps": 1710, "total_steps": 40000, "loss": 0.0943, "lr": 4.977513504609127e-05, "epoch": 0.2789787095195367, "percentage": 4.28, "elapsed_time": "0:22:45", "remaining_time": "8:29:33", "throughput": 2425.94, "total_tokens": 3312400} {"current_steps": 1715, "total_steps": 40000, "loss": 0.0516, "lr": 4.9773819342646965e-05, "epoch": 0.27979443674035404, "percentage": 4.29, "elapsed_time": "0:22:47", "remaining_time": "8:28:46", "throughput": 2429.55, "total_tokens": 3322352} {"current_steps": 1720, "total_steps": 40000, "loss": 0.256, "lr": 4.97724998187684e-05, "epoch": 0.2806101639611714, "percentage": 4.3, "elapsed_time": "0:22:49", "remaining_time": "8:28:00", "throughput": 2432.6, "total_tokens": 3331536} {"current_steps": 1725, "total_steps": 40000, "loss": 0.1276, "lr": 4.9771176474659045e-05, "epoch": 0.28142589118198874, "percentage": 4.31, "elapsed_time": "0:22:51", "remaining_time": "8:27:13", "throughput": 2436.3, "total_tokens": 3341632} {"current_steps": 1730, "total_steps": 40000, "loss": 0.1156, "lr": 4.976984931052299e-05, "epoch": 0.2822416184028061, "percentage": 4.32, "elapsed_time": "0:22:53", "remaining_time": "8:26:27", "throughput": 2439.6, "total_tokens": 3351200} {"current_steps": 1735, "total_steps": 40000, "loss": 0.1533, "lr": 4.976851832656489e-05, "epoch": 0.28305734562362345, "percentage": 4.34, "elapsed_time": "0:22:55", "remaining_time": "8:25:41", "throughput": 2442.7, "total_tokens": 3360480} {"current_steps": 1740, "total_steps": 40000, "loss": 0.2537, "lr": 4.9767183522990004e-05, "epoch": 0.2838730728444408, "percentage": 4.35, "elapsed_time": "0:22:57", "remaining_time": "8:24:55", "throughput": 2445.11, "total_tokens": 3368848} {"current_steps": 1745, "total_steps": 40000, "loss": 0.3346, "lr": 4.9765844900004176e-05, "epoch": 0.28468880006525815, "percentage": 4.36, "elapsed_time": "0:22:59", "remaining_time": "8:24:10", "throughput": 2447.85, "total_tokens": 3377680} {"current_steps": 1750, "total_steps": 40000, "loss": 0.1168, "lr": 4.9764502457813834e-05, "epoch": 0.28550452728607556, "percentage": 4.38, "elapsed_time": "0:23:01", "remaining_time": "8:23:24", "throughput": 2452.04, "total_tokens": 3388528} {"current_steps": 1755, "total_steps": 40000, "loss": 0.099, "lr": 4.9763156196626005e-05, "epoch": 0.2863202545068929, "percentage": 4.39, "elapsed_time": "0:23:03", "remaining_time": "8:22:39", "throughput": 2455.55, "total_tokens": 3398448} {"current_steps": 1760, "total_steps": 40000, "loss": 0.2439, "lr": 4.97618061166483e-05, "epoch": 0.28713598172771027, "percentage": 4.4, "elapsed_time": "0:23:06", "remaining_time": "8:21:54", "throughput": 2458.26, "total_tokens": 3407248} {"current_steps": 1765, "total_steps": 40000, "loss": 0.1194, "lr": 4.9760452218088915e-05, "epoch": 0.2879517089485276, "percentage": 4.41, "elapsed_time": "0:23:08", "remaining_time": "8:21:10", "throughput": 2461.37, "total_tokens": 3416640} {"current_steps": 1770, "total_steps": 40000, "loss": 0.3219, "lr": 4.975909450115663e-05, "epoch": 0.28876743616934497, "percentage": 4.42, "elapsed_time": "0:23:10", "remaining_time": "8:20:25", "throughput": 2464.47, "total_tokens": 3426016} {"current_steps": 1775, "total_steps": 40000, "loss": 0.1398, "lr": 4.975773296606084e-05, "epoch": 0.2895831633901623, "percentage": 4.44, "elapsed_time": "0:23:12", "remaining_time": "8:19:41", "throughput": 2468.12, "total_tokens": 3436160} {"current_steps": 1780, "total_steps": 40000, "loss": 0.1764, "lr": 4.97563676130115e-05, "epoch": 0.2903988906109797, "percentage": 4.45, "elapsed_time": "0:23:14", "remaining_time": "8:18:57", "throughput": 2471.81, "total_tokens": 3446384} {"current_steps": 1785, "total_steps": 40000, "loss": 0.1299, "lr": 4.9754998442219166e-05, "epoch": 0.29121461783179703, "percentage": 4.46, "elapsed_time": "0:23:16", "remaining_time": "8:18:14", "throughput": 2474.59, "total_tokens": 3455392} {"current_steps": 1790, "total_steps": 40000, "loss": 0.1675, "lr": 4.9753625453894984e-05, "epoch": 0.2920303450526144, "percentage": 4.47, "elapsed_time": "0:23:18", "remaining_time": "8:17:31", "throughput": 2477.2, "total_tokens": 3464144} {"current_steps": 1795, "total_steps": 40000, "loss": 0.182, "lr": 4.975224864825068e-05, "epoch": 0.2928460722734318, "percentage": 4.49, "elapsed_time": "0:23:20", "remaining_time": "8:16:47", "throughput": 2480.24, "total_tokens": 3473504} {"current_steps": 1800, "total_steps": 40000, "loss": 0.1506, "lr": 4.9750868025498576e-05, "epoch": 0.29366179949424914, "percentage": 4.5, "elapsed_time": "0:23:22", "remaining_time": "8:16:04", "throughput": 2483.72, "total_tokens": 3483504} {"current_steps": 1800, "total_steps": 40000, "eval_loss": 0.16898152232170105, "epoch": 0.29366179949424914, "percentage": 4.5, "elapsed_time": "0:24:43", "remaining_time": "8:44:32", "throughput": 2348.94, "total_tokens": 3483504} {"current_steps": 1805, "total_steps": 40000, "loss": 0.1361, "lr": 4.974948358585158e-05, "epoch": 0.2944775267150665, "percentage": 4.51, "elapsed_time": "0:24:46", "remaining_time": "8:44:22", "throughput": 2349.82, "total_tokens": 3493856} {"current_steps": 1810, "total_steps": 40000, "loss": 0.1946, "lr": 4.9748095329523205e-05, "epoch": 0.29529325393588385, "percentage": 4.52, "elapsed_time": "0:24:48", "remaining_time": "8:43:35", "throughput": 2352.57, "total_tokens": 3502848} {"current_steps": 1815, "total_steps": 40000, "loss": 0.1452, "lr": 4.974670325672752e-05, "epoch": 0.2961089811567012, "percentage": 4.54, "elapsed_time": "0:24:51", "remaining_time": "8:42:48", "throughput": 2356.41, "total_tokens": 3513424} {"current_steps": 1820, "total_steps": 40000, "loss": 0.2157, "lr": 4.974530736767921e-05, "epoch": 0.29692470837751855, "percentage": 4.55, "elapsed_time": "0:24:53", "remaining_time": "8:42:01", "throughput": 2358.1, "total_tokens": 3520816} {"current_steps": 1825, "total_steps": 40000, "loss": 0.2954, "lr": 4.9743907662593524e-05, "epoch": 0.2977404355983359, "percentage": 4.56, "elapsed_time": "0:24:55", "remaining_time": "8:41:15", "throughput": 2361.9, "total_tokens": 3531376} {"current_steps": 1830, "total_steps": 40000, "loss": 0.1157, "lr": 4.974250414168633e-05, "epoch": 0.29855616281915326, "percentage": 4.58, "elapsed_time": "0:24:57", "remaining_time": "8:40:28", "throughput": 2365.85, "total_tokens": 3542144} {"current_steps": 1835, "total_steps": 40000, "loss": 0.1904, "lr": 4.974109680517407e-05, "epoch": 0.2993718900399706, "percentage": 4.59, "elapsed_time": "0:24:59", "remaining_time": "8:39:42", "throughput": 2368.58, "total_tokens": 3551120} {"current_steps": 1840, "total_steps": 40000, "loss": 0.1413, "lr": 4.973968565327376e-05, "epoch": 0.300187617260788, "percentage": 4.6, "elapsed_time": "0:25:01", "remaining_time": "8:38:56", "throughput": 2371.75, "total_tokens": 3560800} {"current_steps": 1845, "total_steps": 40000, "loss": 0.0998, "lr": 4.973827068620303e-05, "epoch": 0.3010033444816054, "percentage": 4.61, "elapsed_time": "0:25:03", "remaining_time": "8:38:10", "throughput": 2374.14, "total_tokens": 3569280} {"current_steps": 1850, "total_steps": 40000, "loss": 0.2406, "lr": 4.973685190418008e-05, "epoch": 0.3018190717024227, "percentage": 4.62, "elapsed_time": "0:25:05", "remaining_time": "8:37:25", "throughput": 2377.14, "total_tokens": 3578704} {"current_steps": 1855, "total_steps": 40000, "loss": 0.1875, "lr": 4.97354293074237e-05, "epoch": 0.3026347989232401, "percentage": 4.64, "elapsed_time": "0:25:07", "remaining_time": "8:36:39", "throughput": 2379.9, "total_tokens": 3587760} {"current_steps": 1860, "total_steps": 40000, "loss": 0.1487, "lr": 4.9734002896153276e-05, "epoch": 0.30345052614405743, "percentage": 4.65, "elapsed_time": "0:25:09", "remaining_time": "8:35:54", "throughput": 2382.43, "total_tokens": 3596496} {"current_steps": 1865, "total_steps": 40000, "loss": 0.1756, "lr": 4.973257267058877e-05, "epoch": 0.3042662533648748, "percentage": 4.66, "elapsed_time": "0:25:11", "remaining_time": "8:35:09", "throughput": 2385.3, "total_tokens": 3605744} {"current_steps": 1870, "total_steps": 40000, "loss": 0.2611, "lr": 4.973113863095076e-05, "epoch": 0.30508198058569214, "percentage": 4.67, "elapsed_time": "0:25:13", "remaining_time": "8:34:25", "throughput": 2388.59, "total_tokens": 3615648} {"current_steps": 1875, "total_steps": 40000, "loss": 0.1913, "lr": 4.9729700777460384e-05, "epoch": 0.3058977078065095, "percentage": 4.69, "elapsed_time": "0:25:15", "remaining_time": "8:33:41", "throughput": 2391.78, "total_tokens": 3625456} {"current_steps": 1880, "total_steps": 40000, "loss": 0.14, "lr": 4.972825911033937e-05, "epoch": 0.30671343502732684, "percentage": 4.7, "elapsed_time": "0:25:17", "remaining_time": "8:32:56", "throughput": 2395.11, "total_tokens": 3635424} {"current_steps": 1885, "total_steps": 40000, "loss": 0.1275, "lr": 4.9726813629810056e-05, "epoch": 0.3075291622481442, "percentage": 4.71, "elapsed_time": "0:25:19", "remaining_time": "8:32:13", "throughput": 2398.03, "total_tokens": 3644816} {"current_steps": 1890, "total_steps": 40000, "loss": 0.1496, "lr": 4.9725364336095326e-05, "epoch": 0.3083448894689616, "percentage": 4.72, "elapsed_time": "0:25:21", "remaining_time": "8:31:29", "throughput": 2401.2, "total_tokens": 3654592} {"current_steps": 1895, "total_steps": 40000, "loss": 0.1442, "lr": 4.972391122941871e-05, "epoch": 0.30916061668977896, "percentage": 4.74, "elapsed_time": "0:25:24", "remaining_time": "8:30:45", "throughput": 2403.84, "total_tokens": 3663568} {"current_steps": 1900, "total_steps": 40000, "loss": 0.2052, "lr": 4.972245431000428e-05, "epoch": 0.3099763439105963, "percentage": 4.75, "elapsed_time": "0:25:26", "remaining_time": "8:30:02", "throughput": 2407.12, "total_tokens": 3673520} {"current_steps": 1905, "total_steps": 40000, "loss": 0.1248, "lr": 4.972099357807671e-05, "epoch": 0.31079207113141366, "percentage": 4.76, "elapsed_time": "0:25:28", "remaining_time": "8:29:19", "throughput": 2410.99, "total_tokens": 3684416} {"current_steps": 1910, "total_steps": 40000, "loss": 0.1743, "lr": 4.971952903386127e-05, "epoch": 0.311607798352231, "percentage": 4.78, "elapsed_time": "0:25:30", "remaining_time": "8:28:36", "throughput": 2414.46, "total_tokens": 3694736} {"current_steps": 1915, "total_steps": 40000, "loss": 0.1198, "lr": 4.971806067758381e-05, "epoch": 0.31242352557304837, "percentage": 4.79, "elapsed_time": "0:25:32", "remaining_time": "8:27:54", "throughput": 2417.51, "total_tokens": 3704400} {"current_steps": 1920, "total_steps": 40000, "loss": 0.2946, "lr": 4.971658850947076e-05, "epoch": 0.3132392527938657, "percentage": 4.8, "elapsed_time": "0:25:34", "remaining_time": "8:27:11", "throughput": 2420.62, "total_tokens": 3714160} {"current_steps": 1925, "total_steps": 40000, "loss": 0.1879, "lr": 4.9715112529749165e-05, "epoch": 0.31405498001468307, "percentage": 4.81, "elapsed_time": "0:25:36", "remaining_time": "8:26:29", "throughput": 2424.33, "total_tokens": 3724864} {"current_steps": 1930, "total_steps": 40000, "loss": 0.1212, "lr": 4.9713632738646624e-05, "epoch": 0.3148707072355004, "percentage": 4.83, "elapsed_time": "0:25:38", "remaining_time": "8:25:47", "throughput": 2427.73, "total_tokens": 3735088} {"current_steps": 1935, "total_steps": 40000, "loss": 0.1506, "lr": 4.971214913639134e-05, "epoch": 0.31568643445631783, "percentage": 4.84, "elapsed_time": "0:25:40", "remaining_time": "8:25:05", "throughput": 2431.15, "total_tokens": 3745360} {"current_steps": 1940, "total_steps": 40000, "loss": 0.1813, "lr": 4.9710661723212104e-05, "epoch": 0.3165021616771352, "percentage": 4.85, "elapsed_time": "0:25:42", "remaining_time": "8:24:24", "throughput": 2434.77, "total_tokens": 3755968} {"current_steps": 1945, "total_steps": 40000, "loss": 0.1484, "lr": 4.9709170499338295e-05, "epoch": 0.31731788889795254, "percentage": 4.86, "elapsed_time": "0:25:44", "remaining_time": "8:23:43", "throughput": 2437.14, "total_tokens": 3764672} {"current_steps": 1950, "total_steps": 40000, "loss": 0.2445, "lr": 4.9707675464999895e-05, "epoch": 0.3181336161187699, "percentage": 4.88, "elapsed_time": "0:25:46", "remaining_time": "8:23:02", "throughput": 2440.04, "total_tokens": 3774208} {"current_steps": 1955, "total_steps": 40000, "loss": 0.1172, "lr": 4.970617662042743e-05, "epoch": 0.31894934333958724, "percentage": 4.89, "elapsed_time": "0:25:48", "remaining_time": "8:22:21", "throughput": 2443.45, "total_tokens": 3784528} {"current_steps": 1960, "total_steps": 40000, "loss": 0.1819, "lr": 4.970467396585206e-05, "epoch": 0.3197650705604046, "percentage": 4.9, "elapsed_time": "0:25:50", "remaining_time": "8:21:40", "throughput": 2446.12, "total_tokens": 3793696} {"current_steps": 1965, "total_steps": 40000, "loss": 0.2811, "lr": 4.97031675015055e-05, "epoch": 0.32058079778122195, "percentage": 4.91, "elapsed_time": "0:25:52", "remaining_time": "8:20:59", "throughput": 2448.77, "total_tokens": 3802848} {"current_steps": 1970, "total_steps": 40000, "loss": 0.2013, "lr": 4.9701657227620075e-05, "epoch": 0.3213965250020393, "percentage": 4.92, "elapsed_time": "0:25:55", "remaining_time": "8:20:19", "throughput": 2451.79, "total_tokens": 3812592} {"current_steps": 1975, "total_steps": 40000, "loss": 0.162, "lr": 4.9700143144428685e-05, "epoch": 0.32221225222285665, "percentage": 4.94, "elapsed_time": "0:25:57", "remaining_time": "8:19:38", "throughput": 2454.74, "total_tokens": 3822224} {"current_steps": 1980, "total_steps": 40000, "loss": 0.0946, "lr": 4.969862525216482e-05, "epoch": 0.32302797944367406, "percentage": 4.95, "elapsed_time": "0:25:59", "remaining_time": "8:18:58", "throughput": 2457.65, "total_tokens": 3831824} {"current_steps": 1985, "total_steps": 40000, "loss": 0.0949, "lr": 4.9697103551062556e-05, "epoch": 0.3238437066644914, "percentage": 4.96, "elapsed_time": "0:26:01", "remaining_time": "8:18:19", "throughput": 2460.51, "total_tokens": 3841392} {"current_steps": 1990, "total_steps": 40000, "loss": 0.2679, "lr": 4.9695578041356565e-05, "epoch": 0.32465943388530877, "percentage": 4.98, "elapsed_time": "0:26:03", "remaining_time": "8:17:39", "throughput": 2463.55, "total_tokens": 3851200} {"current_steps": 1995, "total_steps": 40000, "loss": 0.1717, "lr": 4.969404872328209e-05, "epoch": 0.3254751611061261, "percentage": 4.99, "elapsed_time": "0:26:05", "remaining_time": "8:16:59", "throughput": 2466.98, "total_tokens": 3861664} {"current_steps": 2000, "total_steps": 40000, "loss": 0.2079, "lr": 4.969251559707498e-05, "epoch": 0.3262908883269435, "percentage": 5.0, "elapsed_time": "0:26:07", "remaining_time": "8:16:20", "throughput": 2470.93, "total_tokens": 3872976} {"current_steps": 2000, "total_steps": 40000, "eval_loss": 0.1561587154865265, "epoch": 0.3262908883269435, "percentage": 5.0, "elapsed_time": "0:27:27", "remaining_time": "8:41:51", "throughput": 2350.16, "total_tokens": 3872976} {"current_steps": 2005, "total_steps": 40000, "loss": 0.1744, "lr": 4.9690978662971674e-05, "epoch": 0.3271066155477608, "percentage": 5.01, "elapsed_time": "0:27:31", "remaining_time": "8:41:39", "throughput": 2350.9, "total_tokens": 3882896} {"current_steps": 2010, "total_steps": 40000, "loss": 0.2405, "lr": 4.968943792120916e-05, "epoch": 0.3279223427685782, "percentage": 5.03, "elapsed_time": "0:27:33", "remaining_time": "8:40:56", "throughput": 2353.98, "total_tokens": 3892848} {"current_steps": 2015, "total_steps": 40000, "loss": 0.2457, "lr": 4.9687893372025046e-05, "epoch": 0.32873806998939553, "percentage": 5.04, "elapsed_time": "0:27:35", "remaining_time": "8:40:13", "throughput": 2357.21, "total_tokens": 3903072} {"current_steps": 2020, "total_steps": 40000, "loss": 0.1865, "lr": 4.9686345015657535e-05, "epoch": 0.3295537972102129, "percentage": 5.05, "elapsed_time": "0:27:37", "remaining_time": "8:39:31", "throughput": 2360.73, "total_tokens": 3913776} {"current_steps": 2025, "total_steps": 40000, "loss": 0.1612, "lr": 4.968479285234538e-05, "epoch": 0.33036952443103024, "percentage": 5.06, "elapsed_time": "0:27:39", "remaining_time": "8:38:48", "throughput": 2363.69, "total_tokens": 3923568} {"current_steps": 2030, "total_steps": 40000, "loss": 0.1829, "lr": 4.9683236882327974e-05, "epoch": 0.33118525165184765, "percentage": 5.08, "elapsed_time": "0:27:41", "remaining_time": "8:38:06", "throughput": 2366.6, "total_tokens": 3933280} {"current_steps": 2035, "total_steps": 40000, "loss": 0.1736, "lr": 4.968167710584526e-05, "epoch": 0.332000978872665, "percentage": 5.09, "elapsed_time": "0:27:44", "remaining_time": "8:37:24", "throughput": 2368.92, "total_tokens": 3942032} {"current_steps": 2040, "total_steps": 40000, "loss": 0.142, "lr": 4.968011352313775e-05, "epoch": 0.33281670609348235, "percentage": 5.1, "elapsed_time": "0:27:46", "remaining_time": "8:36:43", "throughput": 2371.79, "total_tokens": 3951728} {"current_steps": 2045, "total_steps": 40000, "loss": 0.1236, "lr": 4.967854613444659e-05, "epoch": 0.3336324333142997, "percentage": 5.11, "elapsed_time": "0:27:48", "remaining_time": "8:36:01", "throughput": 2374.74, "total_tokens": 3961536} {"current_steps": 2050, "total_steps": 40000, "loss": 0.2773, "lr": 4.967697494001349e-05, "epoch": 0.33444816053511706, "percentage": 5.12, "elapsed_time": "0:27:50", "remaining_time": "8:35:20", "throughput": 2377.35, "total_tokens": 3970800} {"current_steps": 2055, "total_steps": 40000, "loss": 0.1891, "lr": 4.9675399940080736e-05, "epoch": 0.3352638877559344, "percentage": 5.14, "elapsed_time": "0:27:52", "remaining_time": "8:34:39", "throughput": 2380.3, "total_tokens": 3980640} {"current_steps": 2060, "total_steps": 40000, "loss": 0.0988, "lr": 4.9673821134891226e-05, "epoch": 0.33607961497675176, "percentage": 5.15, "elapsed_time": "0:27:54", "remaining_time": "8:33:58", "throughput": 2382.69, "total_tokens": 3989552} {"current_steps": 2065, "total_steps": 40000, "loss": 0.1487, "lr": 4.967223852468842e-05, "epoch": 0.3368953421975691, "percentage": 5.16, "elapsed_time": "0:27:56", "remaining_time": "8:33:17", "throughput": 2385.48, "total_tokens": 3999152} {"current_steps": 2070, "total_steps": 40000, "loss": 0.075, "lr": 4.967065210971639e-05, "epoch": 0.33771106941838647, "percentage": 5.17, "elapsed_time": "0:27:58", "remaining_time": "8:32:36", "throughput": 2388.7, "total_tokens": 4009472} {"current_steps": 2075, "total_steps": 40000, "loss": 0.1887, "lr": 4.966906189021977e-05, "epoch": 0.3385267966392039, "percentage": 5.19, "elapsed_time": "0:28:00", "remaining_time": "8:31:56", "throughput": 2391.76, "total_tokens": 4019552} {"current_steps": 2080, "total_steps": 40000, "loss": 0.2441, "lr": 4.966746786644379e-05, "epoch": 0.33934252386002123, "percentage": 5.2, "elapsed_time": "0:28:02", "remaining_time": "8:31:16", "throughput": 2393.82, "total_tokens": 4027968} {"current_steps": 2085, "total_steps": 40000, "loss": 0.1545, "lr": 4.966587003863429e-05, "epoch": 0.3401582510808386, "percentage": 5.21, "elapsed_time": "0:28:04", "remaining_time": "8:30:35", "throughput": 2396.36, "total_tokens": 4037184} {"current_steps": 2090, "total_steps": 40000, "loss": 0.1523, "lr": 4.966426840703765e-05, "epoch": 0.34097397830165593, "percentage": 5.22, "elapsed_time": "0:28:06", "remaining_time": "8:29:56", "throughput": 2399.69, "total_tokens": 4047760} {"current_steps": 2095, "total_steps": 40000, "loss": 0.1655, "lr": 4.9662662971900875e-05, "epoch": 0.3417897055224733, "percentage": 5.24, "elapsed_time": "0:28:08", "remaining_time": "8:29:16", "throughput": 2402.32, "total_tokens": 4057152} {"current_steps": 2100, "total_steps": 40000, "loss": 0.1785, "lr": 4.9661053733471534e-05, "epoch": 0.34260543274329064, "percentage": 5.25, "elapsed_time": "0:28:10", "remaining_time": "8:28:36", "throughput": 2404.81, "total_tokens": 4066320} {"current_steps": 2105, "total_steps": 40000, "loss": 0.2461, "lr": 4.965944069199781e-05, "epoch": 0.343421159964108, "percentage": 5.26, "elapsed_time": "0:28:12", "remaining_time": "8:27:57", "throughput": 2407.87, "total_tokens": 4076464} {"current_steps": 2110, "total_steps": 40000, "loss": 0.1525, "lr": 4.965782384772842e-05, "epoch": 0.34423688718492534, "percentage": 5.27, "elapsed_time": "0:28:15", "remaining_time": "8:27:18", "throughput": 2410.23, "total_tokens": 4085456} {"current_steps": 2115, "total_steps": 40000, "loss": 0.2106, "lr": 4.9656203200912734e-05, "epoch": 0.3450526144057427, "percentage": 5.29, "elapsed_time": "0:28:17", "remaining_time": "8:26:39", "throughput": 2412.98, "total_tokens": 4095104} {"current_steps": 2120, "total_steps": 40000, "loss": 0.1593, "lr": 4.965457875180067e-05, "epoch": 0.3458683416265601, "percentage": 5.3, "elapsed_time": "0:28:19", "remaining_time": "8:26:00", "throughput": 2416.06, "total_tokens": 4105312} {"current_steps": 2125, "total_steps": 40000, "loss": 0.1009, "lr": 4.9652950500642724e-05, "epoch": 0.34668406884737746, "percentage": 5.31, "elapsed_time": "0:28:21", "remaining_time": "8:25:22", "throughput": 2418.97, "total_tokens": 4115248} {"current_steps": 2130, "total_steps": 40000, "loss": 0.1532, "lr": 4.965131844769001e-05, "epoch": 0.3474997960681948, "percentage": 5.33, "elapsed_time": "0:28:23", "remaining_time": "8:24:43", "throughput": 2421.51, "total_tokens": 4124560} {"current_steps": 2135, "total_steps": 40000, "loss": 0.2949, "lr": 4.96496825931942e-05, "epoch": 0.34831552328901216, "percentage": 5.34, "elapsed_time": "0:28:25", "remaining_time": "8:24:05", "throughput": 2423.98, "total_tokens": 4133776} {"current_steps": 2140, "total_steps": 40000, "loss": 0.1752, "lr": 4.9648042937407566e-05, "epoch": 0.3491312505098295, "percentage": 5.35, "elapsed_time": "0:28:27", "remaining_time": "8:23:27", "throughput": 2426.15, "total_tokens": 4142480} {"current_steps": 2145, "total_steps": 40000, "loss": 0.1064, "lr": 4.964639948058297e-05, "epoch": 0.34994697773064687, "percentage": 5.36, "elapsed_time": "0:28:29", "remaining_time": "8:22:49", "throughput": 2428.3, "total_tokens": 4151184} {"current_steps": 2150, "total_steps": 40000, "loss": 0.1985, "lr": 4.9644752222973846e-05, "epoch": 0.3507627049514642, "percentage": 5.38, "elapsed_time": "0:28:31", "remaining_time": "8:22:11", "throughput": 2430.87, "total_tokens": 4160656} {"current_steps": 2155, "total_steps": 40000, "loss": 0.1613, "lr": 4.964310116483422e-05, "epoch": 0.3515784321722816, "percentage": 5.39, "elapsed_time": "0:28:33", "remaining_time": "8:21:34", "throughput": 2434.03, "total_tokens": 4171104} {"current_steps": 2160, "total_steps": 40000, "loss": 0.2142, "lr": 4.964144630641872e-05, "epoch": 0.3523941593930989, "percentage": 5.4, "elapsed_time": "0:28:35", "remaining_time": "8:20:56", "throughput": 2436.37, "total_tokens": 4180128} {"current_steps": 2165, "total_steps": 40000, "loss": 0.1648, "lr": 4.9639787647982525e-05, "epoch": 0.3532098866139163, "percentage": 5.41, "elapsed_time": "0:28:37", "remaining_time": "8:20:19", "throughput": 2438.57, "total_tokens": 4188944} {"current_steps": 2170, "total_steps": 40000, "loss": 0.1627, "lr": 4.963812518978143e-05, "epoch": 0.3540256138347337, "percentage": 5.42, "elapsed_time": "0:28:39", "remaining_time": "8:19:42", "throughput": 2441.78, "total_tokens": 4199472} {"current_steps": 2175, "total_steps": 40000, "loss": 0.1648, "lr": 4.963645893207182e-05, "epoch": 0.35484134105555104, "percentage": 5.44, "elapsed_time": "0:28:41", "remaining_time": "8:19:05", "throughput": 2444.16, "total_tokens": 4208624} {"current_steps": 2180, "total_steps": 40000, "loss": 0.1379, "lr": 4.963478887511063e-05, "epoch": 0.3556570682763684, "percentage": 5.45, "elapsed_time": "0:28:43", "remaining_time": "8:18:28", "throughput": 2446.38, "total_tokens": 4217488} {"current_steps": 2185, "total_steps": 40000, "loss": 0.0572, "lr": 4.963311501915542e-05, "epoch": 0.35647279549718575, "percentage": 5.46, "elapsed_time": "0:28:46", "remaining_time": "8:17:52", "throughput": 2448.56, "total_tokens": 4226336} {"current_steps": 2190, "total_steps": 40000, "loss": 0.2935, "lr": 4.963143736446432e-05, "epoch": 0.3572885227180031, "percentage": 5.47, "elapsed_time": "0:28:48", "remaining_time": "8:17:15", "throughput": 2450.75, "total_tokens": 4235168} {"current_steps": 2195, "total_steps": 40000, "loss": 0.175, "lr": 4.962975591129603e-05, "epoch": 0.35810424993882045, "percentage": 5.49, "elapsed_time": "0:28:50", "remaining_time": "8:16:39", "throughput": 2453.46, "total_tokens": 4244912} {"current_steps": 2200, "total_steps": 40000, "loss": 0.1947, "lr": 4.962807065990986e-05, "epoch": 0.3589199771596378, "percentage": 5.5, "elapsed_time": "0:28:52", "remaining_time": "8:16:02", "throughput": 2455.95, "total_tokens": 4254272} {"current_steps": 2200, "total_steps": 40000, "eval_loss": 0.1468677520751953, "epoch": 0.3589199771596378, "percentage": 5.5, "elapsed_time": "0:30:12", "remaining_time": "8:39:07", "throughput": 2346.81, "total_tokens": 4254272} {"current_steps": 2205, "total_steps": 40000, "loss": 0.1642, "lr": 4.9626381610565714e-05, "epoch": 0.35973570438045516, "percentage": 5.51, "elapsed_time": "0:30:16", "remaining_time": "8:38:53", "throughput": 2347.12, "total_tokens": 4263248} {"current_steps": 2210, "total_steps": 40000, "loss": 0.0926, "lr": 4.9624688763524043e-05, "epoch": 0.3605514316012725, "percentage": 5.53, "elapsed_time": "0:30:18", "remaining_time": "8:38:14", "throughput": 2350.19, "total_tokens": 4273664} {"current_steps": 2215, "total_steps": 40000, "loss": 0.1995, "lr": 4.962299211904591e-05, "epoch": 0.3613671588220899, "percentage": 5.54, "elapsed_time": "0:30:20", "remaining_time": "8:37:35", "throughput": 2352.85, "total_tokens": 4283360} {"current_steps": 2220, "total_steps": 40000, "loss": 0.1665, "lr": 4.962129167739296e-05, "epoch": 0.36218288604290727, "percentage": 5.55, "elapsed_time": "0:30:22", "remaining_time": "8:36:56", "throughput": 2355.83, "total_tokens": 4293648} {"current_steps": 2225, "total_steps": 40000, "loss": 0.1306, "lr": 4.961958743882742e-05, "epoch": 0.3629986132637246, "percentage": 5.56, "elapsed_time": "0:30:24", "remaining_time": "8:36:17", "throughput": 2358.43, "total_tokens": 4303264} {"current_steps": 2230, "total_steps": 40000, "loss": 0.1562, "lr": 4.961787940361211e-05, "epoch": 0.363814340484542, "percentage": 5.58, "elapsed_time": "0:30:26", "remaining_time": "8:35:39", "throughput": 2361.17, "total_tokens": 4313136} {"current_steps": 2235, "total_steps": 40000, "loss": 0.2057, "lr": 4.961616757201043e-05, "epoch": 0.36463006770535933, "percentage": 5.59, "elapsed_time": "0:30:28", "remaining_time": "8:35:00", "throughput": 2364.13, "total_tokens": 4323424} {"current_steps": 2240, "total_steps": 40000, "loss": 0.1268, "lr": 4.961445194428637e-05, "epoch": 0.3654457949261767, "percentage": 5.6, "elapsed_time": "0:30:30", "remaining_time": "8:34:22", "throughput": 2366.6, "total_tokens": 4332848} {"current_steps": 2245, "total_steps": 40000, "loss": 0.1491, "lr": 4.9612732520704486e-05, "epoch": 0.36626152214699403, "percentage": 5.61, "elapsed_time": "0:30:32", "remaining_time": "8:33:44", "throughput": 2369.66, "total_tokens": 4343344} {"current_steps": 2250, "total_steps": 40000, "loss": 0.2679, "lr": 4.961100930152994e-05, "epoch": 0.3670772493678114, "percentage": 5.62, "elapsed_time": "0:30:34", "remaining_time": "8:33:06", "throughput": 2372.06, "total_tokens": 4352656} {"current_steps": 2255, "total_steps": 40000, "loss": 0.1233, "lr": 4.960928228702849e-05, "epoch": 0.36789297658862874, "percentage": 5.64, "elapsed_time": "0:30:37", "remaining_time": "8:32:28", "throughput": 2374.89, "total_tokens": 4362736} {"current_steps": 2260, "total_steps": 40000, "loss": 0.181, "lr": 4.960755147746645e-05, "epoch": 0.36870870380944615, "percentage": 5.65, "elapsed_time": "0:30:39", "remaining_time": "8:31:51", "throughput": 2376.05, "total_tokens": 4369776} {"current_steps": 2265, "total_steps": 40000, "loss": 0.1741, "lr": 4.9605816873110736e-05, "epoch": 0.3695244310302635, "percentage": 5.66, "elapsed_time": "0:30:41", "remaining_time": "8:31:13", "throughput": 2378.51, "total_tokens": 4379200} {"current_steps": 2270, "total_steps": 40000, "loss": 0.1529, "lr": 4.960407847422883e-05, "epoch": 0.37034015825108085, "percentage": 5.67, "elapsed_time": "0:30:43", "remaining_time": "8:30:36", "throughput": 2380.66, "total_tokens": 4388080} {"current_steps": 2275, "total_steps": 40000, "loss": 0.1643, "lr": 4.960233628108885e-05, "epoch": 0.3711558854718982, "percentage": 5.69, "elapsed_time": "0:30:45", "remaining_time": "8:29:59", "throughput": 2383.55, "total_tokens": 4398368} {"current_steps": 2280, "total_steps": 40000, "loss": 0.1048, "lr": 4.960059029395942e-05, "epoch": 0.37197161269271556, "percentage": 5.7, "elapsed_time": "0:30:47", "remaining_time": "8:29:22", "throughput": 2386.07, "total_tokens": 4407936} {"current_steps": 2285, "total_steps": 40000, "loss": 0.128, "lr": 4.959884051310983e-05, "epoch": 0.3727873399135329, "percentage": 5.71, "elapsed_time": "0:30:49", "remaining_time": "8:28:45", "throughput": 2389.1, "total_tokens": 4418464} {"current_steps": 2290, "total_steps": 40000, "loss": 0.1702, "lr": 4.959708693880991e-05, "epoch": 0.37360306713435026, "percentage": 5.73, "elapsed_time": "0:30:51", "remaining_time": "8:28:08", "throughput": 2391.17, "total_tokens": 4427232} {"current_steps": 2295, "total_steps": 40000, "loss": 0.0603, "lr": 4.9595329571330074e-05, "epoch": 0.3744187943551676, "percentage": 5.74, "elapsed_time": "0:30:53", "remaining_time": "8:27:32", "throughput": 2393.2, "total_tokens": 4435920} {"current_steps": 2300, "total_steps": 40000, "loss": 0.1879, "lr": 4.9593568410941326e-05, "epoch": 0.37523452157598497, "percentage": 5.75, "elapsed_time": "0:30:55", "remaining_time": "8:26:55", "throughput": 2395.56, "total_tokens": 4445232} {"current_steps": 2305, "total_steps": 40000, "loss": 0.1598, "lr": 4.959180345791528e-05, "epoch": 0.3760502487968023, "percentage": 5.76, "elapsed_time": "0:30:57", "remaining_time": "8:26:19", "throughput": 2397.86, "total_tokens": 4454464} {"current_steps": 2310, "total_steps": 40000, "loss": 0.1941, "lr": 4.9590034712524086e-05, "epoch": 0.37686597601761973, "percentage": 5.78, "elapsed_time": "0:30:59", "remaining_time": "8:25:43", "throughput": 2400.54, "total_tokens": 4464432} {"current_steps": 2315, "total_steps": 40000, "loss": 0.1927, "lr": 4.958826217504053e-05, "epoch": 0.3776817032384371, "percentage": 5.79, "elapsed_time": "0:31:01", "remaining_time": "8:25:08", "throughput": 2403.19, "total_tokens": 4474368} {"current_steps": 2320, "total_steps": 40000, "loss": 0.3066, "lr": 4.958648584573795e-05, "epoch": 0.37849743045925444, "percentage": 5.8, "elapsed_time": "0:31:03", "remaining_time": "8:24:32", "throughput": 2405.61, "total_tokens": 4483856} {"current_steps": 2325, "total_steps": 40000, "loss": 0.1374, "lr": 4.958470572489028e-05, "epoch": 0.3793131576800718, "percentage": 5.81, "elapsed_time": "0:31:05", "remaining_time": "8:23:56", "throughput": 2407.9, "total_tokens": 4493104} {"current_steps": 2330, "total_steps": 40000, "loss": 0.1034, "lr": 4.958292181277203e-05, "epoch": 0.38012888490088914, "percentage": 5.83, "elapsed_time": "0:31:08", "remaining_time": "8:23:21", "throughput": 2410.45, "total_tokens": 4502848} {"current_steps": 2335, "total_steps": 40000, "loss": 0.1067, "lr": 4.958113410965832e-05, "epoch": 0.3809446121217065, "percentage": 5.84, "elapsed_time": "0:31:10", "remaining_time": "8:22:46", "throughput": 2412.88, "total_tokens": 4512384} {"current_steps": 2340, "total_steps": 40000, "loss": 0.1629, "lr": 4.957934261582481e-05, "epoch": 0.38176033934252385, "percentage": 5.85, "elapsed_time": "0:31:12", "remaining_time": "8:22:11", "throughput": 2416.27, "total_tokens": 4523760} {"current_steps": 2345, "total_steps": 40000, "loss": 0.2076, "lr": 4.95775473315478e-05, "epoch": 0.3825760665633412, "percentage": 5.86, "elapsed_time": "0:31:14", "remaining_time": "8:21:36", "throughput": 2418.78, "total_tokens": 4533472} {"current_steps": 2350, "total_steps": 40000, "loss": 0.1381, "lr": 4.9575748257104124e-05, "epoch": 0.38339179378415855, "percentage": 5.88, "elapsed_time": "0:31:16", "remaining_time": "8:21:01", "throughput": 2421.13, "total_tokens": 4542896} {"current_steps": 2355, "total_steps": 40000, "loss": 0.1867, "lr": 4.9573945392771224e-05, "epoch": 0.38420752100497596, "percentage": 5.89, "elapsed_time": "0:31:18", "remaining_time": "8:20:26", "throughput": 2423.34, "total_tokens": 4552032} {"current_steps": 2360, "total_steps": 40000, "loss": 0.1266, "lr": 4.9572138738827134e-05, "epoch": 0.3850232482257933, "percentage": 5.9, "elapsed_time": "0:31:20", "remaining_time": "8:19:52", "throughput": 2425.41, "total_tokens": 4560928} {"current_steps": 2365, "total_steps": 40000, "loss": 0.1309, "lr": 4.957032829555046e-05, "epoch": 0.38583897544661067, "percentage": 5.91, "elapsed_time": "0:31:22", "remaining_time": "8:19:17", "throughput": 2427.34, "total_tokens": 4569568} {"current_steps": 2370, "total_steps": 40000, "loss": 0.1899, "lr": 4.956851406322039e-05, "epoch": 0.386654702667428, "percentage": 5.92, "elapsed_time": "0:31:24", "remaining_time": "8:18:43", "throughput": 2429.64, "total_tokens": 4578896} {"current_steps": 2375, "total_steps": 40000, "loss": 0.1112, "lr": 4.9566696042116704e-05, "epoch": 0.38747042988824537, "percentage": 5.94, "elapsed_time": "0:31:26", "remaining_time": "8:18:08", "throughput": 2432.14, "total_tokens": 4588624} {"current_steps": 2380, "total_steps": 40000, "loss": 0.0771, "lr": 4.9564874232519766e-05, "epoch": 0.3882861571090627, "percentage": 5.95, "elapsed_time": "0:31:28", "remaining_time": "8:17:34", "throughput": 2434.89, "total_tokens": 4598864} {"current_steps": 2385, "total_steps": 40000, "loss": 0.1854, "lr": 4.9563048634710516e-05, "epoch": 0.3891018843298801, "percentage": 5.96, "elapsed_time": "0:31:30", "remaining_time": "8:17:00", "throughput": 2437.4, "total_tokens": 4608672} {"current_steps": 2390, "total_steps": 40000, "loss": 0.1545, "lr": 4.956121924897049e-05, "epoch": 0.38991761155069743, "percentage": 5.97, "elapsed_time": "0:31:32", "remaining_time": "8:16:27", "throughput": 2439.82, "total_tokens": 4618288} {"current_steps": 2395, "total_steps": 40000, "loss": 0.1407, "lr": 4.955938607558181e-05, "epoch": 0.3907333387715148, "percentage": 5.99, "elapsed_time": "0:31:34", "remaining_time": "8:15:53", "throughput": 2442.24, "total_tokens": 4627904} {"current_steps": 2400, "total_steps": 40000, "loss": 0.1659, "lr": 4.955754911482715e-05, "epoch": 0.3915490659923322, "percentage": 6.0, "elapsed_time": "0:31:37", "remaining_time": "8:15:19", "throughput": 2444.58, "total_tokens": 4637376} {"current_steps": 2400, "total_steps": 40000, "eval_loss": 0.15362855792045593, "epoch": 0.3915490659923322, "percentage": 6.0, "elapsed_time": "0:32:57", "remaining_time": "8:36:22", "throughput": 2344.95, "total_tokens": 4637376} {"current_steps": 2405, "total_steps": 40000, "loss": 0.1795, "lr": 4.9555708366989804e-05, "epoch": 0.39236479321314954, "percentage": 6.01, "elapsed_time": "0:33:01", "remaining_time": "8:36:09", "throughput": 2345.57, "total_tokens": 4646928} {"current_steps": 2410, "total_steps": 40000, "loss": 0.1467, "lr": 4.9553863832353655e-05, "epoch": 0.3931805204339669, "percentage": 6.02, "elapsed_time": "0:33:03", "remaining_time": "8:35:33", "throughput": 2347.74, "total_tokens": 4656080} {"current_steps": 2415, "total_steps": 40000, "loss": 0.2426, "lr": 4.955201551120313e-05, "epoch": 0.39399624765478425, "percentage": 6.04, "elapsed_time": "0:33:05", "remaining_time": "8:34:57", "throughput": 2350.41, "total_tokens": 4666240} {"current_steps": 2420, "total_steps": 40000, "loss": 0.1847, "lr": 4.955016340382328e-05, "epoch": 0.3948119748756016, "percentage": 6.05, "elapsed_time": "0:33:07", "remaining_time": "8:34:21", "throughput": 2353.23, "total_tokens": 4676704} {"current_steps": 2425, "total_steps": 40000, "loss": 0.1705, "lr": 4.954830751049972e-05, "epoch": 0.39562770209641895, "percentage": 6.06, "elapsed_time": "0:33:09", "remaining_time": "8:33:45", "throughput": 2355.51, "total_tokens": 4686096} {"current_steps": 2430, "total_steps": 40000, "loss": 0.1568, "lr": 4.954644783151864e-05, "epoch": 0.3964434293172363, "percentage": 6.08, "elapsed_time": "0:33:11", "remaining_time": "8:33:10", "throughput": 2358.07, "total_tokens": 4696048} {"current_steps": 2435, "total_steps": 40000, "loss": 0.1104, "lr": 4.954458436716684e-05, "epoch": 0.39725915653805366, "percentage": 6.09, "elapsed_time": "0:33:13", "remaining_time": "8:32:34", "throughput": 2360.14, "total_tokens": 4705040} {"current_steps": 2440, "total_steps": 40000, "loss": 0.2213, "lr": 4.954271711773168e-05, "epoch": 0.398074883758871, "percentage": 6.1, "elapsed_time": "0:33:15", "remaining_time": "8:31:59", "throughput": 2362.74, "total_tokens": 4715152} {"current_steps": 2445, "total_steps": 40000, "loss": 0.105, "lr": 4.9540846083501115e-05, "epoch": 0.39889061097968836, "percentage": 6.11, "elapsed_time": "0:33:17", "remaining_time": "8:31:24", "throughput": 2365.45, "total_tokens": 4725456} {"current_steps": 2450, "total_steps": 40000, "loss": 0.0618, "lr": 4.953897126476369e-05, "epoch": 0.3997063382005058, "percentage": 6.12, "elapsed_time": "0:33:19", "remaining_time": "8:30:49", "throughput": 2368.06, "total_tokens": 4735552} {"current_steps": 2455, "total_steps": 40000, "loss": 0.1822, "lr": 4.9537092661808514e-05, "epoch": 0.4005220654213231, "percentage": 6.14, "elapsed_time": "0:33:21", "remaining_time": "8:30:14", "throughput": 2370.61, "total_tokens": 4745552} {"current_steps": 2460, "total_steps": 40000, "loss": 0.2691, "lr": 4.9535210274925306e-05, "epoch": 0.4013377926421405, "percentage": 6.15, "elapsed_time": "0:33:23", "remaining_time": "8:29:39", "throughput": 2373.34, "total_tokens": 4755920} {"current_steps": 2465, "total_steps": 40000, "loss": 0.0931, "lr": 4.953332410440435e-05, "epoch": 0.40215351986295783, "percentage": 6.16, "elapsed_time": "0:33:25", "remaining_time": "8:29:05", "throughput": 2376.19, "total_tokens": 4766544} {"current_steps": 2470, "total_steps": 40000, "loss": 0.0746, "lr": 4.9531434150536496e-05, "epoch": 0.4029692470837752, "percentage": 6.17, "elapsed_time": "0:33:28", "remaining_time": "8:28:30", "throughput": 2378.47, "total_tokens": 4776032} {"current_steps": 2475, "total_steps": 40000, "loss": 0.1612, "lr": 4.952954041361322e-05, "epoch": 0.40378497430459254, "percentage": 6.19, "elapsed_time": "0:33:30", "remaining_time": "8:27:56", "throughput": 2381.39, "total_tokens": 4786848} {"current_steps": 2480, "total_steps": 40000, "loss": 0.0812, "lr": 4.952764289392655e-05, "epoch": 0.4046007015254099, "percentage": 6.2, "elapsed_time": "0:33:32", "remaining_time": "8:27:22", "throughput": 2383.97, "total_tokens": 4796960} {"current_steps": 2485, "total_steps": 40000, "loss": 0.063, "lr": 4.952574159176912e-05, "epoch": 0.40541642874622724, "percentage": 6.21, "elapsed_time": "0:33:34", "remaining_time": "8:26:48", "throughput": 2385.51, "total_tokens": 4804992} {"current_steps": 2490, "total_steps": 40000, "loss": 0.0304, "lr": 4.952383650743413e-05, "epoch": 0.4062321559670446, "percentage": 6.22, "elapsed_time": "0:33:36", "remaining_time": "8:26:14", "throughput": 2387.2, "total_tokens": 4813328} {"current_steps": 2495, "total_steps": 40000, "loss": 0.3103, "lr": 4.952192764121536e-05, "epoch": 0.407047883187862, "percentage": 6.24, "elapsed_time": "0:33:38", "remaining_time": "8:25:40", "throughput": 2389.57, "total_tokens": 4823056} {"current_steps": 2500, "total_steps": 40000, "loss": 0.2057, "lr": 4.9520014993407185e-05, "epoch": 0.40786361040867936, "percentage": 6.25, "elapsed_time": "0:33:40", "remaining_time": "8:25:06", "throughput": 2391.55, "total_tokens": 4831984} {"current_steps": 2505, "total_steps": 40000, "loss": 0.0714, "lr": 4.951809856430456e-05, "epoch": 0.4086793376294967, "percentage": 6.26, "elapsed_time": "0:33:42", "remaining_time": "8:24:33", "throughput": 2394.02, "total_tokens": 4841920} {"current_steps": 2510, "total_steps": 40000, "loss": 0.1553, "lr": 4.951617835420303e-05, "epoch": 0.40949506485031406, "percentage": 6.28, "elapsed_time": "0:33:44", "remaining_time": "8:23:59", "throughput": 2396.11, "total_tokens": 4851104} {"current_steps": 2515, "total_steps": 40000, "loss": 0.347, "lr": 4.951425436339869e-05, "epoch": 0.4103107920711314, "percentage": 6.29, "elapsed_time": "0:33:46", "remaining_time": "8:23:26", "throughput": 2397.77, "total_tokens": 4859456} {"current_steps": 2520, "total_steps": 40000, "loss": 0.2516, "lr": 4.9512326592188274e-05, "epoch": 0.41112651929194877, "percentage": 6.3, "elapsed_time": "0:33:48", "remaining_time": "8:22:53", "throughput": 2400.06, "total_tokens": 4869040} {"current_steps": 2525, "total_steps": 40000, "loss": 0.097, "lr": 4.9510395040869054e-05, "epoch": 0.4119422465127661, "percentage": 6.31, "elapsed_time": "0:33:50", "remaining_time": "8:22:20", "throughput": 2402.17, "total_tokens": 4878288} {"current_steps": 2530, "total_steps": 40000, "loss": 0.1485, "lr": 4.9508459709738905e-05, "epoch": 0.41275797373358347, "percentage": 6.33, "elapsed_time": "0:33:52", "remaining_time": "8:21:47", "throughput": 2404.69, "total_tokens": 4888368} {"current_steps": 2535, "total_steps": 40000, "loss": 0.2793, "lr": 4.950652059909627e-05, "epoch": 0.4135737009544008, "percentage": 6.34, "elapsed_time": "0:33:54", "remaining_time": "8:21:14", "throughput": 2405.93, "total_tokens": 4895840} {"current_steps": 2540, "total_steps": 40000, "loss": 0.1705, "lr": 4.95045777092402e-05, "epoch": 0.41438942817521823, "percentage": 6.35, "elapsed_time": "0:33:56", "remaining_time": "8:20:41", "throughput": 2407.87, "total_tokens": 4904768} {"current_steps": 2545, "total_steps": 40000, "loss": 0.226, "lr": 4.950263104047031e-05, "epoch": 0.4152051553960356, "percentage": 6.36, "elapsed_time": "0:33:59", "remaining_time": "8:20:08", "throughput": 2410.09, "total_tokens": 4914272} {"current_steps": 2550, "total_steps": 40000, "loss": 0.0879, "lr": 4.9500680593086775e-05, "epoch": 0.41602088261685294, "percentage": 6.38, "elapsed_time": "0:34:01", "remaining_time": "8:19:36", "throughput": 2412.42, "total_tokens": 4924048} {"current_steps": 2555, "total_steps": 40000, "loss": 0.1636, "lr": 4.94987263673904e-05, "epoch": 0.4168366098376703, "percentage": 6.39, "elapsed_time": "0:34:03", "remaining_time": "8:19:04", "throughput": 2414.63, "total_tokens": 4933552} {"current_steps": 2560, "total_steps": 40000, "loss": 0.1304, "lr": 4.949676836368256e-05, "epoch": 0.41765233705848764, "percentage": 6.4, "elapsed_time": "0:34:05", "remaining_time": "8:18:31", "throughput": 2417.01, "total_tokens": 4943408} {"current_steps": 2565, "total_steps": 40000, "loss": 0.1307, "lr": 4.949480658226518e-05, "epoch": 0.418468064279305, "percentage": 6.41, "elapsed_time": "0:34:07", "remaining_time": "8:17:59", "throughput": 2419.83, "total_tokens": 4954176} {"current_steps": 2570, "total_steps": 40000, "loss": 0.2472, "lr": 4.949284102344082e-05, "epoch": 0.41928379150012235, "percentage": 6.42, "elapsed_time": "0:34:09", "remaining_time": "8:17:27", "throughput": 2421.61, "total_tokens": 4962800} {"current_steps": 2575, "total_steps": 40000, "loss": 0.1436, "lr": 4.9490871687512565e-05, "epoch": 0.4200995187209397, "percentage": 6.44, "elapsed_time": "0:34:11", "remaining_time": "8:16:55", "throughput": 2424.24, "total_tokens": 4973200} {"current_steps": 2580, "total_steps": 40000, "loss": 0.0815, "lr": 4.948889857478413e-05, "epoch": 0.42091524594175705, "percentage": 6.45, "elapsed_time": "0:34:13", "remaining_time": "8:16:23", "throughput": 2426.16, "total_tokens": 4982160} {"current_steps": 2585, "total_steps": 40000, "loss": 0.0751, "lr": 4.948692168555978e-05, "epoch": 0.4217309731625744, "percentage": 6.46, "elapsed_time": "0:34:15", "remaining_time": "8:15:52", "throughput": 2428.1, "total_tokens": 4991168} {"current_steps": 2590, "total_steps": 40000, "loss": 0.1395, "lr": 4.94849410201444e-05, "epoch": 0.4225467003833918, "percentage": 6.48, "elapsed_time": "0:34:17", "remaining_time": "8:15:20", "throughput": 2429.84, "total_tokens": 4999760} {"current_steps": 2595, "total_steps": 40000, "loss": 0.3019, "lr": 4.948295657884341e-05, "epoch": 0.42336242760420917, "percentage": 6.49, "elapsed_time": "0:34:19", "remaining_time": "8:14:49", "throughput": 2432.37, "total_tokens": 5009984} {"current_steps": 2600, "total_steps": 40000, "loss": 0.2789, "lr": 4.9480968361962835e-05, "epoch": 0.4241781548250265, "percentage": 6.5, "elapsed_time": "0:34:21", "remaining_time": "8:14:17", "throughput": 2434.63, "total_tokens": 5019664} {"current_steps": 2600, "total_steps": 40000, "eval_loss": 0.1615014225244522, "epoch": 0.4241781548250265, "percentage": 6.5, "elapsed_time": "0:35:42", "remaining_time": "8:33:37", "throughput": 2343.0, "total_tokens": 5019664} {"current_steps": 2605, "total_steps": 40000, "loss": 0.2514, "lr": 4.9478976369809305e-05, "epoch": 0.4249938820458439, "percentage": 6.51, "elapsed_time": "0:35:46", "remaining_time": "8:33:39", "throughput": 2342.72, "total_tokens": 5029696} {"current_steps": 2610, "total_steps": 40000, "loss": 0.1603, "lr": 4.947698060268999e-05, "epoch": 0.4258096092666612, "percentage": 6.53, "elapsed_time": "0:35:49", "remaining_time": "8:33:06", "throughput": 2344.72, "total_tokens": 5038816} {"current_steps": 2615, "total_steps": 40000, "loss": 0.1575, "lr": 4.9474981060912665e-05, "epoch": 0.4266253364874786, "percentage": 6.54, "elapsed_time": "0:35:51", "remaining_time": "8:32:32", "throughput": 2346.64, "total_tokens": 5047792} {"current_steps": 2620, "total_steps": 40000, "loss": 0.1816, "lr": 4.94729777447857e-05, "epoch": 0.42744106370829593, "percentage": 6.55, "elapsed_time": "0:35:53", "remaining_time": "8:31:59", "throughput": 2348.65, "total_tokens": 5056960} {"current_steps": 2625, "total_steps": 40000, "loss": 0.1882, "lr": 4.947097065461801e-05, "epoch": 0.4282567909291133, "percentage": 6.56, "elapsed_time": "0:35:55", "remaining_time": "8:31:25", "throughput": 2351.01, "total_tokens": 5066912} {"current_steps": 2630, "total_steps": 40000, "loss": 0.0784, "lr": 4.9468959790719125e-05, "epoch": 0.42907251814993064, "percentage": 6.58, "elapsed_time": "0:35:57", "remaining_time": "8:30:52", "throughput": 2353.35, "total_tokens": 5076800} {"current_steps": 2635, "total_steps": 40000, "loss": 0.14, "lr": 4.9466945153399146e-05, "epoch": 0.42988824537074805, "percentage": 6.59, "elapsed_time": "0:35:59", "remaining_time": "8:30:19", "throughput": 2355.1, "total_tokens": 5085456} {"current_steps": 2640, "total_steps": 40000, "loss": 0.1354, "lr": 4.9464926742968755e-05, "epoch": 0.4307039725915654, "percentage": 6.6, "elapsed_time": "0:36:01", "remaining_time": "8:29:47", "throughput": 2357.59, "total_tokens": 5095712} {"current_steps": 2645, "total_steps": 40000, "loss": 0.1214, "lr": 4.946290455973921e-05, "epoch": 0.43151969981238275, "percentage": 6.61, "elapsed_time": "0:36:03", "remaining_time": "8:29:14", "throughput": 2360.22, "total_tokens": 5106272} {"current_steps": 2650, "total_steps": 40000, "loss": 0.3491, "lr": 4.9460878604022365e-05, "epoch": 0.4323354270332001, "percentage": 6.62, "elapsed_time": "0:36:05", "remaining_time": "8:28:41", "throughput": 2362.42, "total_tokens": 5115936} {"current_steps": 2655, "total_steps": 40000, "loss": 0.1031, "lr": 4.945884887613065e-05, "epoch": 0.43315115425401746, "percentage": 6.64, "elapsed_time": "0:36:07", "remaining_time": "8:28:09", "throughput": 2364.47, "total_tokens": 5125248} {"current_steps": 2660, "total_steps": 40000, "loss": 0.1837, "lr": 4.9456815376377055e-05, "epoch": 0.4339668814748348, "percentage": 6.65, "elapsed_time": "0:36:09", "remaining_time": "8:27:37", "throughput": 2366.63, "total_tokens": 5134832} {"current_steps": 2665, "total_steps": 40000, "loss": 0.0851, "lr": 4.9454778105075195e-05, "epoch": 0.43478260869565216, "percentage": 6.66, "elapsed_time": "0:36:11", "remaining_time": "8:27:04", "throughput": 2368.79, "total_tokens": 5144400} {"current_steps": 2670, "total_steps": 40000, "loss": 0.0609, "lr": 4.945273706253924e-05, "epoch": 0.4355983359164695, "percentage": 6.68, "elapsed_time": "0:36:13", "remaining_time": "8:26:32", "throughput": 2371.16, "total_tokens": 5154448} {"current_steps": 2675, "total_steps": 40000, "loss": 0.2015, "lr": 4.9450692249083925e-05, "epoch": 0.43641406313728687, "percentage": 6.69, "elapsed_time": "0:36:15", "remaining_time": "8:26:00", "throughput": 2373.18, "total_tokens": 5163760} {"current_steps": 2680, "total_steps": 40000, "loss": 0.1535, "lr": 4.9448643665024605e-05, "epoch": 0.4372297903581043, "percentage": 6.7, "elapsed_time": "0:36:17", "remaining_time": "8:25:28", "throughput": 2375.25, "total_tokens": 5173168} {"current_steps": 2685, "total_steps": 40000, "loss": 0.0547, "lr": 4.944659131067719e-05, "epoch": 0.43804551757892163, "percentage": 6.71, "elapsed_time": "0:36:20", "remaining_time": "8:24:56", "throughput": 2377.81, "total_tokens": 5183648} {"current_steps": 2690, "total_steps": 40000, "loss": 0.0734, "lr": 4.944453518635818e-05, "epoch": 0.438861244799739, "percentage": 6.73, "elapsed_time": "0:36:22", "remaining_time": "8:24:25", "throughput": 2379.75, "total_tokens": 5192800} {"current_steps": 2695, "total_steps": 40000, "loss": 0.2466, "lr": 4.944247529238465e-05, "epoch": 0.43967697202055633, "percentage": 6.74, "elapsed_time": "0:36:24", "remaining_time": "8:23:53", "throughput": 2381.5, "total_tokens": 5201536} {"current_steps": 2700, "total_steps": 40000, "loss": 0.1495, "lr": 4.944041162907427e-05, "epoch": 0.4404926992413737, "percentage": 6.75, "elapsed_time": "0:36:26", "remaining_time": "8:23:21", "throughput": 2382.94, "total_tokens": 5209584} {"current_steps": 2705, "total_steps": 40000, "loss": 0.3011, "lr": 4.943834419674529e-05, "epoch": 0.44130842646219104, "percentage": 6.76, "elapsed_time": "0:36:28", "remaining_time": "8:22:50", "throughput": 2385.36, "total_tokens": 5219792} {"current_steps": 2710, "total_steps": 40000, "loss": 0.1767, "lr": 4.9436272995716506e-05, "epoch": 0.4421241536830084, "percentage": 6.78, "elapsed_time": "0:36:30", "remaining_time": "8:22:19", "throughput": 2387.05, "total_tokens": 5228448} {"current_steps": 2715, "total_steps": 40000, "loss": 0.0787, "lr": 4.943419802630735e-05, "epoch": 0.44293988090382574, "percentage": 6.79, "elapsed_time": "0:36:32", "remaining_time": "8:21:48", "throughput": 2389.11, "total_tokens": 5237888} {"current_steps": 2720, "total_steps": 40000, "loss": 0.2152, "lr": 4.94321192888378e-05, "epoch": 0.4437556081246431, "percentage": 6.8, "elapsed_time": "0:36:34", "remaining_time": "8:21:17", "throughput": 2391.7, "total_tokens": 5248512} {"current_steps": 2725, "total_steps": 40000, "loss": 0.146, "lr": 4.943003678362842e-05, "epoch": 0.4445713353454605, "percentage": 6.81, "elapsed_time": "0:36:36", "remaining_time": "8:20:46", "throughput": 2393.98, "total_tokens": 5258448} {"current_steps": 2730, "total_steps": 40000, "loss": 0.0695, "lr": 4.942795051100036e-05, "epoch": 0.44538706256627786, "percentage": 6.83, "elapsed_time": "0:36:38", "remaining_time": "8:20:15", "throughput": 2395.76, "total_tokens": 5267312} {"current_steps": 2735, "total_steps": 40000, "loss": 0.154, "lr": 4.942586047127536e-05, "epoch": 0.4462027897870952, "percentage": 6.84, "elapsed_time": "0:36:40", "remaining_time": "8:19:44", "throughput": 2398.2, "total_tokens": 5277616} {"current_steps": 2740, "total_steps": 40000, "loss": 0.2451, "lr": 4.942376666477571e-05, "epoch": 0.44701851700791256, "percentage": 6.85, "elapsed_time": "0:36:42", "remaining_time": "8:19:13", "throughput": 2400.33, "total_tokens": 5287280} {"current_steps": 2745, "total_steps": 40000, "loss": 0.1477, "lr": 4.9421669091824304e-05, "epoch": 0.4478342442287299, "percentage": 6.86, "elapsed_time": "0:36:44", "remaining_time": "8:18:43", "throughput": 2401.99, "total_tokens": 5295936} {"current_steps": 2750, "total_steps": 40000, "loss": 0.2415, "lr": 4.9419567752744634e-05, "epoch": 0.44864997144954727, "percentage": 6.88, "elapsed_time": "0:36:46", "remaining_time": "8:18:13", "throughput": 2404.7, "total_tokens": 5306896} {"current_steps": 2755, "total_steps": 40000, "loss": 0.1849, "lr": 4.941746264786074e-05, "epoch": 0.4494656986703646, "percentage": 6.89, "elapsed_time": "0:36:48", "remaining_time": "8:17:42", "throughput": 2407.02, "total_tokens": 5316992} {"current_steps": 2760, "total_steps": 40000, "loss": 0.2242, "lr": 4.9415353777497254e-05, "epoch": 0.450281425891182, "percentage": 6.9, "elapsed_time": "0:36:51", "remaining_time": "8:17:12", "throughput": 2409.48, "total_tokens": 5327408} {"current_steps": 2765, "total_steps": 40000, "loss": 0.178, "lr": 4.9413241141979394e-05, "epoch": 0.4510971531119993, "percentage": 6.91, "elapsed_time": "0:36:53", "remaining_time": "8:16:42", "throughput": 2411.89, "total_tokens": 5337712} {"current_steps": 2770, "total_steps": 40000, "loss": 0.1256, "lr": 4.9411124741632956e-05, "epoch": 0.4519128803328167, "percentage": 6.93, "elapsed_time": "0:36:55", "remaining_time": "8:16:12", "throughput": 2413.99, "total_tokens": 5347328} {"current_steps": 2775, "total_steps": 40000, "loss": 0.1199, "lr": 4.940900457678431e-05, "epoch": 0.4527286075536341, "percentage": 6.94, "elapsed_time": "0:36:57", "remaining_time": "8:15:42", "throughput": 2416.43, "total_tokens": 5357728} {"current_steps": 2780, "total_steps": 40000, "loss": 0.1955, "lr": 4.9406880647760425e-05, "epoch": 0.45354433477445144, "percentage": 6.95, "elapsed_time": "0:36:59", "remaining_time": "8:15:12", "throughput": 2418.8, "total_tokens": 5367968} {"current_steps": 2785, "total_steps": 40000, "loss": 0.1102, "lr": 4.9404752954888824e-05, "epoch": 0.4543600619952688, "percentage": 6.96, "elapsed_time": "0:37:01", "remaining_time": "8:14:43", "throughput": 2421.12, "total_tokens": 5378128} {"current_steps": 2790, "total_steps": 40000, "loss": 0.0701, "lr": 4.940262149849762e-05, "epoch": 0.45517578921608615, "percentage": 6.98, "elapsed_time": "0:37:03", "remaining_time": "8:14:13", "throughput": 2422.74, "total_tokens": 5386736} {"current_steps": 2795, "total_steps": 40000, "loss": 0.1971, "lr": 4.9400486278915526e-05, "epoch": 0.4559915164369035, "percentage": 6.99, "elapsed_time": "0:37:05", "remaining_time": "8:13:43", "throughput": 2425.44, "total_tokens": 5397744} {"current_steps": 2800, "total_steps": 40000, "loss": 0.3232, "lr": 4.939834729647181e-05, "epoch": 0.45680724365772085, "percentage": 7.0, "elapsed_time": "0:37:07", "remaining_time": "8:13:14", "throughput": 2427.31, "total_tokens": 5406912} {"current_steps": 2800, "total_steps": 40000, "eval_loss": 0.1564914733171463, "epoch": 0.45680724365772085, "percentage": 7.0, "elapsed_time": "0:38:28", "remaining_time": "8:31:04", "throughput": 2342.62, "total_tokens": 5406912} {"current_steps": 2805, "total_steps": 40000, "loss": 0.097, "lr": 4.9396204551496326e-05, "epoch": 0.4576229708785382, "percentage": 7.01, "elapsed_time": "0:38:32", "remaining_time": "8:30:58", "throughput": 2342.42, "total_tokens": 5415840} {"current_steps": 2810, "total_steps": 40000, "loss": 0.1122, "lr": 4.939405804431952e-05, "epoch": 0.45843869809935556, "percentage": 7.03, "elapsed_time": "0:38:34", "remaining_time": "8:30:27", "throughput": 2344.2, "total_tokens": 5424816} {"current_steps": 2815, "total_steps": 40000, "loss": 0.2002, "lr": 4.9391907775272414e-05, "epoch": 0.4592544253201729, "percentage": 7.04, "elapsed_time": "0:38:36", "remaining_time": "8:29:56", "throughput": 2345.9, "total_tokens": 5433600} {"current_steps": 2820, "total_steps": 40000, "loss": 0.0899, "lr": 4.9389753744686604e-05, "epoch": 0.4600701525409903, "percentage": 7.05, "elapsed_time": "0:38:38", "remaining_time": "8:29:25", "throughput": 2348.3, "total_tokens": 5444016} {"current_steps": 2825, "total_steps": 40000, "loss": 0.148, "lr": 4.938759595289426e-05, "epoch": 0.46088587976180767, "percentage": 7.06, "elapsed_time": "0:38:40", "remaining_time": "8:28:54", "throughput": 2350.42, "total_tokens": 5453776} {"current_steps": 2830, "total_steps": 40000, "loss": 0.2506, "lr": 4.938543440022815e-05, "epoch": 0.461701606982625, "percentage": 7.07, "elapsed_time": "0:38:42", "remaining_time": "8:28:23", "throughput": 2352.5, "total_tokens": 5463456} {"current_steps": 2835, "total_steps": 40000, "loss": 0.0621, "lr": 4.938326908702161e-05, "epoch": 0.4625173342034424, "percentage": 7.09, "elapsed_time": "0:38:44", "remaining_time": "8:27:52", "throughput": 2354.31, "total_tokens": 5472496} {"current_steps": 2840, "total_steps": 40000, "loss": 0.1483, "lr": 4.9381100013608554e-05, "epoch": 0.46333306142425973, "percentage": 7.1, "elapsed_time": "0:38:46", "remaining_time": "8:27:21", "throughput": 2356.53, "total_tokens": 5482560} {"current_steps": 2845, "total_steps": 40000, "loss": 0.074, "lr": 4.9378927180323485e-05, "epoch": 0.4641487886450771, "percentage": 7.11, "elapsed_time": "0:38:48", "remaining_time": "8:26:50", "throughput": 2358.66, "total_tokens": 5492368} {"current_steps": 2850, "total_steps": 40000, "loss": 0.2351, "lr": 4.937675058750148e-05, "epoch": 0.46496451586589443, "percentage": 7.12, "elapsed_time": "0:38:50", "remaining_time": "8:26:20", "throughput": 2360.72, "total_tokens": 5502048} {"current_steps": 2855, "total_steps": 40000, "loss": 0.1906, "lr": 4.937457023547819e-05, "epoch": 0.4657802430867118, "percentage": 7.14, "elapsed_time": "0:38:52", "remaining_time": "8:25:49", "throughput": 2362.85, "total_tokens": 5511888} {"current_steps": 2860, "total_steps": 40000, "loss": 0.1908, "lr": 4.9372386124589876e-05, "epoch": 0.46659597030752914, "percentage": 7.15, "elapsed_time": "0:38:54", "remaining_time": "8:25:19", "throughput": 2364.96, "total_tokens": 5521680} {"current_steps": 2865, "total_steps": 40000, "loss": 0.0806, "lr": 4.937019825517333e-05, "epoch": 0.46741169752834655, "percentage": 7.16, "elapsed_time": "0:38:56", "remaining_time": "8:24:49", "throughput": 2366.73, "total_tokens": 5530688} {"current_steps": 2870, "total_steps": 40000, "loss": 0.1613, "lr": 4.9368006627565954e-05, "epoch": 0.4682274247491639, "percentage": 7.17, "elapsed_time": "0:38:58", "remaining_time": "8:24:19", "throughput": 2368.57, "total_tokens": 5539872} {"current_steps": 2875, "total_steps": 40000, "loss": 0.2407, "lr": 4.936581124210573e-05, "epoch": 0.46904315196998125, "percentage": 7.19, "elapsed_time": "0:39:00", "remaining_time": "8:23:49", "throughput": 2370.48, "total_tokens": 5549248} {"current_steps": 2880, "total_steps": 40000, "loss": 0.2723, "lr": 4.9363612099131216e-05, "epoch": 0.4698588791907986, "percentage": 7.2, "elapsed_time": "0:39:03", "remaining_time": "8:23:19", "throughput": 2372.44, "total_tokens": 5558736} {"current_steps": 2885, "total_steps": 40000, "loss": 0.1938, "lr": 4.936140919898155e-05, "epoch": 0.47067460641161596, "percentage": 7.21, "elapsed_time": "0:39:05", "remaining_time": "8:22:49", "throughput": 2374.27, "total_tokens": 5567936} {"current_steps": 2890, "total_steps": 40000, "loss": 0.1371, "lr": 4.9359202541996426e-05, "epoch": 0.4714903336324333, "percentage": 7.22, "elapsed_time": "0:39:07", "remaining_time": "8:22:19", "throughput": 2375.86, "total_tokens": 5576576} {"current_steps": 2895, "total_steps": 40000, "loss": 0.1857, "lr": 4.935699212851616e-05, "epoch": 0.47230606085325066, "percentage": 7.24, "elapsed_time": "0:39:09", "remaining_time": "8:21:49", "throughput": 2377.86, "total_tokens": 5586144} {"current_steps": 2900, "total_steps": 40000, "loss": 0.122, "lr": 4.935477795888162e-05, "epoch": 0.473121788074068, "percentage": 7.25, "elapsed_time": "0:39:11", "remaining_time": "8:21:20", "throughput": 2380.0, "total_tokens": 5596112} {"current_steps": 2905, "total_steps": 40000, "loss": 0.1331, "lr": 4.935256003343426e-05, "epoch": 0.47393751529488537, "percentage": 7.26, "elapsed_time": "0:39:13", "remaining_time": "8:20:51", "throughput": 2381.65, "total_tokens": 5604912} {"current_steps": 2910, "total_steps": 40000, "loss": 0.1951, "lr": 4.93503383525161e-05, "epoch": 0.4747532425157027, "percentage": 7.27, "elapsed_time": "0:39:15", "remaining_time": "8:20:21", "throughput": 2383.77, "total_tokens": 5614848} {"current_steps": 2915, "total_steps": 40000, "loss": 0.0296, "lr": 4.934811291646977e-05, "epoch": 0.47556896973652013, "percentage": 7.29, "elapsed_time": "0:39:17", "remaining_time": "8:19:52", "throughput": 2385.58, "total_tokens": 5624016} {"current_steps": 2920, "total_steps": 40000, "loss": 0.1144, "lr": 4.934588372563845e-05, "epoch": 0.4763846969573375, "percentage": 7.3, "elapsed_time": "0:39:19", "remaining_time": "8:19:23", "throughput": 2387.47, "total_tokens": 5633392} {"current_steps": 2925, "total_steps": 40000, "loss": 0.2443, "lr": 4.93436507803659e-05, "epoch": 0.47720042417815484, "percentage": 7.31, "elapsed_time": "0:39:21", "remaining_time": "8:18:54", "throughput": 2389.42, "total_tokens": 5642928} {"current_steps": 2930, "total_steps": 40000, "loss": 0.2059, "lr": 4.934141408099649e-05, "epoch": 0.4780161513989722, "percentage": 7.32, "elapsed_time": "0:39:23", "remaining_time": "8:18:25", "throughput": 2391.08, "total_tokens": 5651776} {"current_steps": 2935, "total_steps": 40000, "loss": 0.1083, "lr": 4.9339173627875135e-05, "epoch": 0.47883187861978954, "percentage": 7.34, "elapsed_time": "0:39:25", "remaining_time": "8:17:56", "throughput": 2393.28, "total_tokens": 5661888} {"current_steps": 2940, "total_steps": 40000, "loss": 0.3268, "lr": 4.9336929421347335e-05, "epoch": 0.4796476058406069, "percentage": 7.35, "elapsed_time": "0:39:27", "remaining_time": "8:17:27", "throughput": 2394.86, "total_tokens": 5670592} {"current_steps": 2945, "total_steps": 40000, "loss": 0.0487, "lr": 4.933468146175918e-05, "epoch": 0.48046333306142425, "percentage": 7.36, "elapsed_time": "0:39:29", "remaining_time": "8:16:58", "throughput": 2396.2, "total_tokens": 5678720} {"current_steps": 2950, "total_steps": 40000, "loss": 0.2619, "lr": 4.933242974945734e-05, "epoch": 0.4812790602822416, "percentage": 7.38, "elapsed_time": "0:39:31", "remaining_time": "8:16:30", "throughput": 2398.15, "total_tokens": 5688288} {"current_steps": 2955, "total_steps": 40000, "loss": 0.0601, "lr": 4.933017428478906e-05, "epoch": 0.48209478750305895, "percentage": 7.39, "elapsed_time": "0:39:34", "remaining_time": "8:16:01", "throughput": 2400.23, "total_tokens": 5698160} {"current_steps": 2960, "total_steps": 40000, "loss": 0.1416, "lr": 4.932791506810214e-05, "epoch": 0.48291051472387636, "percentage": 7.4, "elapsed_time": "0:39:36", "remaining_time": "8:15:32", "throughput": 2402.13, "total_tokens": 5707632} {"current_steps": 2965, "total_steps": 40000, "loss": 0.0438, "lr": 4.932565209974499e-05, "epoch": 0.4837262419446937, "percentage": 7.41, "elapsed_time": "0:39:38", "remaining_time": "8:15:04", "throughput": 2404.5, "total_tokens": 5718240} {"current_steps": 2970, "total_steps": 40000, "loss": 0.2323, "lr": 4.93233853800666e-05, "epoch": 0.48454196916551107, "percentage": 7.42, "elapsed_time": "0:39:40", "remaining_time": "8:14:36", "throughput": 2406.33, "total_tokens": 5727552} {"current_steps": 2975, "total_steps": 40000, "loss": 0.2556, "lr": 4.932111490941651e-05, "epoch": 0.4853576963863284, "percentage": 7.44, "elapsed_time": "0:39:42", "remaining_time": "8:14:08", "throughput": 2408.23, "total_tokens": 5737040} {"current_steps": 2980, "total_steps": 40000, "loss": 0.1233, "lr": 4.9318840688144876e-05, "epoch": 0.48617342360714577, "percentage": 7.45, "elapsed_time": "0:39:44", "remaining_time": "8:13:39", "throughput": 2410.62, "total_tokens": 5747696} {"current_steps": 2985, "total_steps": 40000, "loss": 0.197, "lr": 4.9316562716602387e-05, "epoch": 0.4869891508279631, "percentage": 7.46, "elapsed_time": "0:39:46", "remaining_time": "8:13:12", "throughput": 2412.63, "total_tokens": 5757488} {"current_steps": 2990, "total_steps": 40000, "loss": 0.2009, "lr": 4.9314280995140346e-05, "epoch": 0.4878048780487805, "percentage": 7.47, "elapsed_time": "0:39:48", "remaining_time": "8:12:44", "throughput": 2414.27, "total_tokens": 5766368} {"current_steps": 2995, "total_steps": 40000, "loss": 0.1005, "lr": 4.931199552411063e-05, "epoch": 0.48862060526959783, "percentage": 7.49, "elapsed_time": "0:39:50", "remaining_time": "8:12:16", "throughput": 2416.34, "total_tokens": 5776288} {"current_steps": 3000, "total_steps": 40000, "loss": 0.2185, "lr": 4.930970630386568e-05, "epoch": 0.4894363324904152, "percentage": 7.5, "elapsed_time": "0:39:52", "remaining_time": "8:11:48", "throughput": 2418.36, "total_tokens": 5786080} {"current_steps": 3000, "total_steps": 40000, "eval_loss": 0.14820662140846252, "epoch": 0.4894363324904152, "percentage": 7.5, "elapsed_time": "0:41:13", "remaining_time": "8:28:22", "throughput": 2339.56, "total_tokens": 5786080} {"current_steps": 3005, "total_steps": 40000, "loss": 0.102, "lr": 4.9307413334758524e-05, "epoch": 0.4902520597112326, "percentage": 7.51, "elapsed_time": "0:41:17", "remaining_time": "8:28:16", "throughput": 2339.83, "total_tokens": 5796080} {"current_steps": 3010, "total_steps": 40000, "loss": 0.2125, "lr": 4.930511661714276e-05, "epoch": 0.49106778693204994, "percentage": 7.52, "elapsed_time": "0:41:19", "remaining_time": "8:27:47", "throughput": 2341.75, "total_tokens": 5805696} {"current_steps": 3015, "total_steps": 40000, "loss": 0.1831, "lr": 4.9302816151372576e-05, "epoch": 0.4918835141528673, "percentage": 7.54, "elapsed_time": "0:41:21", "remaining_time": "8:27:17", "throughput": 2343.38, "total_tokens": 5814576} {"current_steps": 3020, "total_steps": 40000, "loss": 0.177, "lr": 4.930051193780274e-05, "epoch": 0.49269924137368465, "percentage": 7.55, "elapsed_time": "0:41:23", "remaining_time": "8:26:48", "throughput": 2345.03, "total_tokens": 5823520} {"current_steps": 3025, "total_steps": 40000, "loss": 0.1023, "lr": 4.929820397678858e-05, "epoch": 0.493514968594502, "percentage": 7.56, "elapsed_time": "0:41:25", "remaining_time": "8:26:19", "throughput": 2346.74, "total_tokens": 5832624} {"current_steps": 3030, "total_steps": 40000, "loss": 0.1383, "lr": 4.9295892268686015e-05, "epoch": 0.49433069581531935, "percentage": 7.58, "elapsed_time": "0:41:27", "remaining_time": "8:25:50", "throughput": 2348.53, "total_tokens": 5841920} {"current_steps": 3035, "total_steps": 40000, "loss": 0.1035, "lr": 4.9293576813851536e-05, "epoch": 0.4951464230361367, "percentage": 7.59, "elapsed_time": "0:41:29", "remaining_time": "8:25:21", "throughput": 2350.45, "total_tokens": 5851552} {"current_steps": 3040, "total_steps": 40000, "loss": 0.1064, "lr": 4.929125761264223e-05, "epoch": 0.49596215025695406, "percentage": 7.6, "elapsed_time": "0:41:31", "remaining_time": "8:24:52", "throughput": 2352.54, "total_tokens": 5861632} {"current_steps": 3045, "total_steps": 40000, "loss": 0.1501, "lr": 4.928893466541573e-05, "epoch": 0.4967778774777714, "percentage": 7.61, "elapsed_time": "0:41:33", "remaining_time": "8:24:24", "throughput": 2354.74, "total_tokens": 5872000} {"current_steps": 3050, "total_steps": 40000, "loss": 0.107, "lr": 4.928660797253027e-05, "epoch": 0.49759360469858877, "percentage": 7.62, "elapsed_time": "0:41:35", "remaining_time": "8:23:55", "throughput": 2356.93, "total_tokens": 5882336} {"current_steps": 3055, "total_steps": 40000, "loss": 0.1558, "lr": 4.928427753434467e-05, "epoch": 0.4984093319194062, "percentage": 7.64, "elapsed_time": "0:41:37", "remaining_time": "8:23:26", "throughput": 2358.97, "total_tokens": 5892304} {"current_steps": 3060, "total_steps": 40000, "loss": 0.1724, "lr": 4.9281943351218286e-05, "epoch": 0.4992250591402235, "percentage": 7.65, "elapsed_time": "0:41:39", "remaining_time": "8:22:58", "throughput": 2360.59, "total_tokens": 5901216} {"current_steps": 3065, "total_steps": 40000, "loss": 0.13, "lr": 4.9279605423511095e-05, "epoch": 0.5000407863610409, "percentage": 7.66, "elapsed_time": "0:41:41", "remaining_time": "8:22:29", "throughput": 2362.35, "total_tokens": 5910496} {"current_steps": 3070, "total_steps": 40000, "loss": 0.2389, "lr": 4.927726375158363e-05, "epoch": 0.5008565135818582, "percentage": 7.67, "elapsed_time": "0:41:44", "remaining_time": "8:22:01", "throughput": 2364.24, "total_tokens": 5920112} {"current_steps": 3075, "total_steps": 40000, "loss": 0.1095, "lr": 4.9274918335797004e-05, "epoch": 0.5016722408026756, "percentage": 7.69, "elapsed_time": "0:41:46", "remaining_time": "8:21:33", "throughput": 2365.84, "total_tokens": 5929024} {"current_steps": 3080, "total_steps": 40000, "loss": 0.2664, "lr": 4.927256917651292e-05, "epoch": 0.5024879680234929, "percentage": 7.7, "elapsed_time": "0:41:48", "remaining_time": "8:21:05", "throughput": 2367.66, "total_tokens": 5938448} {"current_steps": 3085, "total_steps": 40000, "loss": 0.1262, "lr": 4.927021627409364e-05, "epoch": 0.5033036952443103, "percentage": 7.71, "elapsed_time": "0:41:50", "remaining_time": "8:20:37", "throughput": 2369.42, "total_tokens": 5947760} {"current_steps": 3090, "total_steps": 40000, "loss": 0.1965, "lr": 4.9267859628902005e-05, "epoch": 0.5041194224651276, "percentage": 7.72, "elapsed_time": "0:41:52", "remaining_time": "8:20:09", "throughput": 2371.0, "total_tokens": 5956624} {"current_steps": 3095, "total_steps": 40000, "loss": 0.2725, "lr": 4.9265499241301454e-05, "epoch": 0.504935149685945, "percentage": 7.74, "elapsed_time": "0:41:54", "remaining_time": "8:19:41", "throughput": 2372.9, "total_tokens": 5966304} {"current_steps": 3100, "total_steps": 40000, "loss": 0.215, "lr": 4.926313511165598e-05, "epoch": 0.5057508769067623, "percentage": 7.75, "elapsed_time": "0:41:56", "remaining_time": "8:19:13", "throughput": 2374.66, "total_tokens": 5975632} {"current_steps": 3105, "total_steps": 40000, "loss": 0.1453, "lr": 4.926076724033016e-05, "epoch": 0.5065666041275797, "percentage": 7.76, "elapsed_time": "0:41:58", "remaining_time": "8:18:45", "throughput": 2376.77, "total_tokens": 5985840} {"current_steps": 3110, "total_steps": 40000, "loss": 0.1398, "lr": 4.9258395627689146e-05, "epoch": 0.507382331348397, "percentage": 7.78, "elapsed_time": "0:42:00", "remaining_time": "8:18:18", "throughput": 2378.93, "total_tokens": 5996224} {"current_steps": 3115, "total_steps": 40000, "loss": 0.123, "lr": 4.925602027409868e-05, "epoch": 0.5081980585692144, "percentage": 7.79, "elapsed_time": "0:42:02", "remaining_time": "8:17:50", "throughput": 2380.78, "total_tokens": 6005808} {"current_steps": 3120, "total_steps": 40000, "loss": 0.1747, "lr": 4.925364117992507e-05, "epoch": 0.5090137857900318, "percentage": 7.8, "elapsed_time": "0:42:04", "remaining_time": "8:17:22", "throughput": 2382.72, "total_tokens": 6015600} {"current_steps": 3125, "total_steps": 40000, "loss": 0.1692, "lr": 4.92512583455352e-05, "epoch": 0.5098295130108492, "percentage": 7.81, "elapsed_time": "0:42:06", "remaining_time": "8:16:55", "throughput": 2384.66, "total_tokens": 6025440} {"current_steps": 3130, "total_steps": 40000, "loss": 0.2213, "lr": 4.9248871771296536e-05, "epoch": 0.5106452402316666, "percentage": 7.83, "elapsed_time": "0:42:08", "remaining_time": "8:16:28", "throughput": 2386.32, "total_tokens": 6034560} {"current_steps": 3135, "total_steps": 40000, "loss": 0.1446, "lr": 4.924648145757711e-05, "epoch": 0.5114609674524839, "percentage": 7.84, "elapsed_time": "0:42:10", "remaining_time": "8:16:01", "throughput": 2387.83, "total_tokens": 6043312} {"current_steps": 3140, "total_steps": 40000, "loss": 0.1229, "lr": 4.924408740474554e-05, "epoch": 0.5122766946733013, "percentage": 7.85, "elapsed_time": "0:42:12", "remaining_time": "8:15:33", "throughput": 2389.68, "total_tokens": 6052912} {"current_steps": 3145, "total_steps": 40000, "loss": 0.1056, "lr": 4.924168961317103e-05, "epoch": 0.5130924218941186, "percentage": 7.86, "elapsed_time": "0:42:15", "remaining_time": "8:15:06", "throughput": 2391.32, "total_tokens": 6062032} {"current_steps": 3150, "total_steps": 40000, "loss": 0.1185, "lr": 4.9239288083223334e-05, "epoch": 0.513908149114936, "percentage": 7.88, "elapsed_time": "0:42:17", "remaining_time": "8:14:39", "throughput": 2392.71, "total_tokens": 6070496} {"current_steps": 3155, "total_steps": 40000, "loss": 0.1973, "lr": 4.9236882815272803e-05, "epoch": 0.5147238763357533, "percentage": 7.89, "elapsed_time": "0:42:19", "remaining_time": "8:14:12", "throughput": 2394.01, "total_tokens": 6078736} {"current_steps": 3160, "total_steps": 40000, "loss": 0.0805, "lr": 4.9234473809690365e-05, "epoch": 0.5155396035565707, "percentage": 7.9, "elapsed_time": "0:42:21", "remaining_time": "8:13:45", "throughput": 2395.5, "total_tokens": 6087472} {"current_steps": 3165, "total_steps": 40000, "loss": 0.1437, "lr": 4.923206106684752e-05, "epoch": 0.516355330777388, "percentage": 7.91, "elapsed_time": "0:42:23", "remaining_time": "8:13:19", "throughput": 2397.61, "total_tokens": 6097760} {"current_steps": 3170, "total_steps": 40000, "loss": 0.1506, "lr": 4.922964458711634e-05, "epoch": 0.5171710579982054, "percentage": 7.92, "elapsed_time": "0:42:25", "remaining_time": "8:12:52", "throughput": 2399.75, "total_tokens": 6108160} {"current_steps": 3175, "total_steps": 40000, "loss": 0.2375, "lr": 4.9227224370869474e-05, "epoch": 0.5179867852190227, "percentage": 7.94, "elapsed_time": "0:42:27", "remaining_time": "8:12:25", "throughput": 2401.44, "total_tokens": 6117440} {"current_steps": 3180, "total_steps": 40000, "loss": 0.1582, "lr": 4.9224800418480155e-05, "epoch": 0.5188025124398401, "percentage": 7.95, "elapsed_time": "0:42:29", "remaining_time": "8:11:59", "throughput": 2403.21, "total_tokens": 6126880} {"current_steps": 3185, "total_steps": 40000, "loss": 0.2288, "lr": 4.9222372730322176e-05, "epoch": 0.5196182396606575, "percentage": 7.96, "elapsed_time": "0:42:31", "remaining_time": "8:11:32", "throughput": 2405.27, "total_tokens": 6137136} {"current_steps": 3190, "total_steps": 40000, "loss": 0.1426, "lr": 4.921994130676993e-05, "epoch": 0.5204339668814748, "percentage": 7.98, "elapsed_time": "0:42:33", "remaining_time": "8:11:06", "throughput": 2407.61, "total_tokens": 6148080} {"current_steps": 3195, "total_steps": 40000, "loss": 0.1291, "lr": 4.9217506148198366e-05, "epoch": 0.5212496941022922, "percentage": 7.99, "elapsed_time": "0:42:35", "remaining_time": "8:10:40", "throughput": 2409.49, "total_tokens": 6157840} {"current_steps": 3200, "total_steps": 40000, "loss": 0.2453, "lr": 4.921506725498302e-05, "epoch": 0.5220654213231095, "percentage": 8.0, "elapsed_time": "0:42:37", "remaining_time": "8:10:13", "throughput": 2411.36, "total_tokens": 6167600} {"current_steps": 3200, "total_steps": 40000, "eval_loss": 0.1551085114479065, "epoch": 0.5220654213231095, "percentage": 8.0, "elapsed_time": "0:43:58", "remaining_time": "8:25:39", "throughput": 2337.8, "total_tokens": 6167600} {"current_steps": 3205, "total_steps": 40000, "loss": 0.1741, "lr": 4.9212624627499994e-05, "epoch": 0.5228811485439269, "percentage": 8.01, "elapsed_time": "0:44:02", "remaining_time": "8:25:35", "throughput": 2337.86, "total_tokens": 6177536} {"current_steps": 3210, "total_steps": 40000, "loss": 0.1413, "lr": 4.921017826612597e-05, "epoch": 0.5236968757647442, "percentage": 8.03, "elapsed_time": "0:44:04", "remaining_time": "8:25:08", "throughput": 2339.61, "total_tokens": 6187008} {"current_steps": 3215, "total_steps": 40000, "loss": 0.1998, "lr": 4.9207728171238223e-05, "epoch": 0.5245126029855617, "percentage": 8.04, "elapsed_time": "0:44:06", "remaining_time": "8:24:40", "throughput": 2341.46, "total_tokens": 6196736} {"current_steps": 3220, "total_steps": 40000, "loss": 0.2115, "lr": 4.920527434321458e-05, "epoch": 0.525328330206379, "percentage": 8.05, "elapsed_time": "0:44:08", "remaining_time": "8:24:13", "throughput": 2342.91, "total_tokens": 6205408} {"current_steps": 3225, "total_steps": 40000, "loss": 0.1685, "lr": 4.920281678243345e-05, "epoch": 0.5261440574271964, "percentage": 8.06, "elapsed_time": "0:44:10", "remaining_time": "8:23:45", "throughput": 2344.8, "total_tokens": 6215248} {"current_steps": 3230, "total_steps": 40000, "loss": 0.1173, "lr": 4.920035548927381e-05, "epoch": 0.5269597846480137, "percentage": 8.08, "elapsed_time": "0:44:12", "remaining_time": "8:23:18", "throughput": 2346.47, "total_tokens": 6224512} {"current_steps": 3235, "total_steps": 40000, "loss": 0.0745, "lr": 4.919789046411525e-05, "epoch": 0.5277755118688311, "percentage": 8.09, "elapsed_time": "0:44:14", "remaining_time": "8:22:50", "throughput": 2348.04, "total_tokens": 6233536} {"current_steps": 3240, "total_steps": 40000, "loss": 0.2178, "lr": 4.919542170733787e-05, "epoch": 0.5285912390896484, "percentage": 8.1, "elapsed_time": "0:44:16", "remaining_time": "8:22:23", "throughput": 2350.0, "total_tokens": 6243584} {"current_steps": 3245, "total_steps": 40000, "loss": 0.1672, "lr": 4.919294921932242e-05, "epoch": 0.5294069663104658, "percentage": 8.11, "elapsed_time": "0:44:18", "remaining_time": "8:21:56", "throughput": 2351.59, "total_tokens": 6252672} {"current_steps": 3250, "total_steps": 40000, "loss": 0.1899, "lr": 4.919047300045016e-05, "epoch": 0.5302226935312832, "percentage": 8.12, "elapsed_time": "0:44:20", "remaining_time": "8:21:29", "throughput": 2353.39, "total_tokens": 6262320} {"current_steps": 3255, "total_steps": 40000, "loss": 0.2278, "lr": 4.918799305110299e-05, "epoch": 0.5310384207521005, "percentage": 8.14, "elapsed_time": "0:44:23", "remaining_time": "8:21:02", "throughput": 2355.37, "total_tokens": 6272448} {"current_steps": 3260, "total_steps": 40000, "loss": 0.2702, "lr": 4.918550937166331e-05, "epoch": 0.5318541479729179, "percentage": 8.15, "elapsed_time": "0:44:25", "remaining_time": "8:20:35", "throughput": 2357.03, "total_tokens": 6281744} {"current_steps": 3265, "total_steps": 40000, "loss": 0.1351, "lr": 4.918302196251415e-05, "epoch": 0.5326698751937352, "percentage": 8.16, "elapsed_time": "0:44:27", "remaining_time": "8:20:08", "throughput": 2358.9, "total_tokens": 6291568} {"current_steps": 3270, "total_steps": 40000, "loss": 0.2162, "lr": 4.91805308240391e-05, "epoch": 0.5334856024145526, "percentage": 8.18, "elapsed_time": "0:44:29", "remaining_time": "8:19:41", "throughput": 2360.67, "total_tokens": 6301168} {"current_steps": 3275, "total_steps": 40000, "loss": 0.2648, "lr": 4.9178035956622326e-05, "epoch": 0.5343013296353699, "percentage": 8.19, "elapsed_time": "0:44:31", "remaining_time": "8:19:15", "throughput": 2362.63, "total_tokens": 6311296} {"current_steps": 3280, "total_steps": 40000, "loss": 0.1211, "lr": 4.917553736064857e-05, "epoch": 0.5351170568561873, "percentage": 8.2, "elapsed_time": "0:44:33", "remaining_time": "8:18:48", "throughput": 2364.82, "total_tokens": 6322032} {"current_steps": 3285, "total_steps": 40000, "loss": 0.1923, "lr": 4.917303503650314e-05, "epoch": 0.5359327840770046, "percentage": 8.21, "elapsed_time": "0:44:35", "remaining_time": "8:18:22", "throughput": 2366.8, "total_tokens": 6332208} {"current_steps": 3290, "total_steps": 40000, "loss": 0.1144, "lr": 4.917052898457194e-05, "epoch": 0.536748511297822, "percentage": 8.22, "elapsed_time": "0:44:37", "remaining_time": "8:17:55", "throughput": 2368.41, "total_tokens": 6341408} {"current_steps": 3295, "total_steps": 40000, "loss": 0.1454, "lr": 4.916801920524141e-05, "epoch": 0.5375642385186393, "percentage": 8.24, "elapsed_time": "0:44:39", "remaining_time": "8:17:29", "throughput": 2370.11, "total_tokens": 6350848} {"current_steps": 3300, "total_steps": 40000, "loss": 0.1029, "lr": 4.916550569889862e-05, "epoch": 0.5383799657394567, "percentage": 8.25, "elapsed_time": "0:44:41", "remaining_time": "8:17:02", "throughput": 2371.91, "total_tokens": 6360560} {"current_steps": 3305, "total_steps": 40000, "loss": 0.0888, "lr": 4.916298846593116e-05, "epoch": 0.539195692960274, "percentage": 8.26, "elapsed_time": "0:44:43", "remaining_time": "8:16:36", "throughput": 2373.93, "total_tokens": 6370880} {"current_steps": 3310, "total_steps": 40000, "loss": 0.1111, "lr": 4.916046750672722e-05, "epoch": 0.5400114201810915, "percentage": 8.28, "elapsed_time": "0:44:45", "remaining_time": "8:16:10", "throughput": 2375.8, "total_tokens": 6380832} {"current_steps": 3315, "total_steps": 40000, "loss": 0.1099, "lr": 4.915794282167559e-05, "epoch": 0.5408271474019088, "percentage": 8.29, "elapsed_time": "0:44:47", "remaining_time": "8:15:44", "throughput": 2377.71, "total_tokens": 6390848} {"current_steps": 3320, "total_steps": 40000, "loss": 0.2081, "lr": 4.915541441116558e-05, "epoch": 0.5416428746227262, "percentage": 8.3, "elapsed_time": "0:44:49", "remaining_time": "8:15:18", "throughput": 2379.33, "total_tokens": 6400128} {"current_steps": 3325, "total_steps": 40000, "loss": 0.2119, "lr": 4.915288227558711e-05, "epoch": 0.5424586018435436, "percentage": 8.31, "elapsed_time": "0:44:51", "remaining_time": "8:14:52", "throughput": 2381.11, "total_tokens": 6409824} {"current_steps": 3330, "total_steps": 40000, "loss": 0.1812, "lr": 4.915034641533066e-05, "epoch": 0.5432743290643609, "percentage": 8.33, "elapsed_time": "0:44:54", "remaining_time": "8:14:26", "throughput": 2382.68, "total_tokens": 6418976} {"current_steps": 3335, "total_steps": 40000, "loss": 0.1213, "lr": 4.914780683078731e-05, "epoch": 0.5440900562851783, "percentage": 8.34, "elapsed_time": "0:44:56", "remaining_time": "8:14:00", "throughput": 2384.34, "total_tokens": 6428384} {"current_steps": 3340, "total_steps": 40000, "loss": 0.1227, "lr": 4.9145263522348695e-05, "epoch": 0.5449057835059956, "percentage": 8.35, "elapsed_time": "0:44:58", "remaining_time": "8:13:35", "throughput": 2386.04, "total_tokens": 6437888} {"current_steps": 3345, "total_steps": 40000, "loss": 0.1498, "lr": 4.9142716490407e-05, "epoch": 0.545721510726813, "percentage": 8.36, "elapsed_time": "0:45:00", "remaining_time": "8:13:09", "throughput": 2387.84, "total_tokens": 6447712} {"current_steps": 3350, "total_steps": 40000, "loss": 0.136, "lr": 4.914016573535504e-05, "epoch": 0.5465372379476303, "percentage": 8.38, "elapsed_time": "0:45:02", "remaining_time": "8:12:43", "throughput": 2389.4, "total_tokens": 6456864} {"current_steps": 3355, "total_steps": 40000, "loss": 0.1683, "lr": 4.9137611257586154e-05, "epoch": 0.5473529651684477, "percentage": 8.39, "elapsed_time": "0:45:04", "remaining_time": "8:12:18", "throughput": 2390.89, "total_tokens": 6465824} {"current_steps": 3360, "total_steps": 40000, "loss": 0.1031, "lr": 4.9135053057494274e-05, "epoch": 0.548168692389265, "percentage": 8.4, "elapsed_time": "0:45:06", "remaining_time": "8:11:52", "throughput": 2392.67, "total_tokens": 6475584} {"current_steps": 3365, "total_steps": 40000, "loss": 0.0959, "lr": 4.913249113547392e-05, "epoch": 0.5489844196100824, "percentage": 8.41, "elapsed_time": "0:45:08", "remaining_time": "8:11:27", "throughput": 2394.42, "total_tokens": 6485264} {"current_steps": 3370, "total_steps": 40000, "loss": 0.1577, "lr": 4.912992549192016e-05, "epoch": 0.5498001468308997, "percentage": 8.43, "elapsed_time": "0:45:10", "remaining_time": "8:11:02", "throughput": 2396.1, "total_tokens": 6494736} {"current_steps": 3375, "total_steps": 40000, "loss": 0.171, "lr": 4.9127356127228665e-05, "epoch": 0.5506158740517171, "percentage": 8.44, "elapsed_time": "0:45:12", "remaining_time": "8:10:36", "throughput": 2397.88, "total_tokens": 6504512} {"current_steps": 3380, "total_steps": 40000, "loss": 0.1174, "lr": 4.912478304179564e-05, "epoch": 0.5514316012725344, "percentage": 8.45, "elapsed_time": "0:45:14", "remaining_time": "8:10:11", "throughput": 2399.75, "total_tokens": 6514544} {"current_steps": 3385, "total_steps": 40000, "loss": 0.165, "lr": 4.9122206236017896e-05, "epoch": 0.5522473284933518, "percentage": 8.46, "elapsed_time": "0:45:16", "remaining_time": "8:09:46", "throughput": 2401.95, "total_tokens": 6525504} {"current_steps": 3390, "total_steps": 40000, "loss": 0.1595, "lr": 4.911962571029282e-05, "epoch": 0.5530630557141691, "percentage": 8.48, "elapsed_time": "0:45:18", "remaining_time": "8:09:21", "throughput": 2403.84, "total_tokens": 6535584} {"current_steps": 3395, "total_steps": 40000, "loss": 0.2362, "lr": 4.9117041465018353e-05, "epoch": 0.5538787829349865, "percentage": 8.49, "elapsed_time": "0:45:20", "remaining_time": "8:08:56", "throughput": 2405.51, "total_tokens": 6545104} {"current_steps": 3400, "total_steps": 40000, "loss": 0.1106, "lr": 4.911445350059302e-05, "epoch": 0.5546945101558038, "percentage": 8.5, "elapsed_time": "0:45:22", "remaining_time": "8:08:31", "throughput": 2406.92, "total_tokens": 6553904} {"current_steps": 3400, "total_steps": 40000, "eval_loss": 0.1407547891139984, "epoch": 0.5546945101558038, "percentage": 8.5, "elapsed_time": "0:46:43", "remaining_time": "8:22:59", "throughput": 2337.72, "total_tokens": 6553904} {"current_steps": 3405, "total_steps": 40000, "loss": 0.1278, "lr": 4.9111861817415905e-05, "epoch": 0.5555102373766213, "percentage": 8.51, "elapsed_time": "0:46:47", "remaining_time": "8:22:49", "throughput": 2338.02, "total_tokens": 6563056} {"current_steps": 3410, "total_steps": 40000, "loss": 0.0628, "lr": 4.91092664158867e-05, "epoch": 0.5563259645974387, "percentage": 8.53, "elapsed_time": "0:46:49", "remaining_time": "8:22:23", "throughput": 2339.32, "total_tokens": 6571568} {"current_steps": 3415, "total_steps": 40000, "loss": 0.1078, "lr": 4.910666729640563e-05, "epoch": 0.557141691818256, "percentage": 8.54, "elapsed_time": "0:46:51", "remaining_time": "8:21:57", "throughput": 2340.98, "total_tokens": 6581072} {"current_steps": 3420, "total_steps": 40000, "loss": 0.2131, "lr": 4.910406445937353e-05, "epoch": 0.5579574190390734, "percentage": 8.55, "elapsed_time": "0:46:53", "remaining_time": "8:21:30", "throughput": 2342.84, "total_tokens": 6591136} {"current_steps": 3425, "total_steps": 40000, "loss": 0.1129, "lr": 4.9101457905191774e-05, "epoch": 0.5587731462598907, "percentage": 8.56, "elapsed_time": "0:46:55", "remaining_time": "8:21:04", "throughput": 2344.46, "total_tokens": 6600528} {"current_steps": 3430, "total_steps": 40000, "loss": 0.1903, "lr": 4.909884763426233e-05, "epoch": 0.5595888734807081, "percentage": 8.58, "elapsed_time": "0:46:57", "remaining_time": "8:20:39", "throughput": 2345.93, "total_tokens": 6609536} {"current_steps": 3435, "total_steps": 40000, "loss": 0.2874, "lr": 4.9096233646987736e-05, "epoch": 0.5604046007015254, "percentage": 8.59, "elapsed_time": "0:46:59", "remaining_time": "8:20:13", "throughput": 2347.79, "total_tokens": 6619600} {"current_steps": 3440, "total_steps": 40000, "loss": 0.1775, "lr": 4.9093615943771104e-05, "epoch": 0.5612203279223428, "percentage": 8.6, "elapsed_time": "0:47:01", "remaining_time": "8:19:47", "throughput": 2349.36, "total_tokens": 6628928} {"current_steps": 3445, "total_steps": 40000, "loss": 0.1355, "lr": 4.909099452501611e-05, "epoch": 0.5620360551431601, "percentage": 8.61, "elapsed_time": "0:47:03", "remaining_time": "8:19:21", "throughput": 2350.51, "total_tokens": 6637008} {"current_steps": 3450, "total_steps": 40000, "loss": 0.1131, "lr": 4.908836939112702e-05, "epoch": 0.5628517823639775, "percentage": 8.62, "elapsed_time": "0:47:05", "remaining_time": "8:18:56", "throughput": 2352.15, "total_tokens": 6646512} {"current_steps": 3455, "total_steps": 40000, "loss": 0.178, "lr": 4.908574054250865e-05, "epoch": 0.5636675095847948, "percentage": 8.64, "elapsed_time": "0:47:07", "remaining_time": "8:18:30", "throughput": 2354.2, "total_tokens": 6657168} {"current_steps": 3460, "total_steps": 40000, "loss": 0.2248, "lr": 4.9083107979566414e-05, "epoch": 0.5644832368056122, "percentage": 8.65, "elapsed_time": "0:47:09", "remaining_time": "8:18:05", "throughput": 2355.93, "total_tokens": 6666944} {"current_steps": 3465, "total_steps": 40000, "loss": 0.1607, "lr": 4.908047170270628e-05, "epoch": 0.5652989640264295, "percentage": 8.66, "elapsed_time": "0:47:11", "remaining_time": "8:17:39", "throughput": 2357.79, "total_tokens": 6677072} {"current_steps": 3470, "total_steps": 40000, "loss": 0.1523, "lr": 4.9077831712334784e-05, "epoch": 0.5661146912472469, "percentage": 8.67, "elapsed_time": "0:47:13", "remaining_time": "8:17:14", "throughput": 2359.41, "total_tokens": 6686528} {"current_steps": 3475, "total_steps": 40000, "loss": 0.1534, "lr": 4.907518800885907e-05, "epoch": 0.5669304184680642, "percentage": 8.69, "elapsed_time": "0:47:16", "remaining_time": "8:16:49", "throughput": 2361.25, "total_tokens": 6696624} {"current_steps": 3480, "total_steps": 40000, "loss": 0.267, "lr": 4.907254059268681e-05, "epoch": 0.5677461456888816, "percentage": 8.7, "elapsed_time": "0:47:18", "remaining_time": "8:16:23", "throughput": 2362.61, "total_tokens": 6705376} {"current_steps": 3485, "total_steps": 40000, "loss": 0.1526, "lr": 4.906988946422628e-05, "epoch": 0.568561872909699, "percentage": 8.71, "elapsed_time": "0:47:20", "remaining_time": "8:15:58", "throughput": 2364.34, "total_tokens": 6715168} {"current_steps": 3490, "total_steps": 40000, "loss": 0.2012, "lr": 4.9067234623886315e-05, "epoch": 0.5693776001305163, "percentage": 8.72, "elapsed_time": "0:47:22", "remaining_time": "8:15:33", "throughput": 2366.09, "total_tokens": 6725008} {"current_steps": 3495, "total_steps": 40000, "loss": 0.127, "lr": 4.9064576072076316e-05, "epoch": 0.5701933273513338, "percentage": 8.74, "elapsed_time": "0:47:24", "remaining_time": "8:15:08", "throughput": 2368.07, "total_tokens": 6735520} {"current_steps": 3500, "total_steps": 40000, "loss": 0.1378, "lr": 4.906191380920628e-05, "epoch": 0.5710090545721511, "percentage": 8.75, "elapsed_time": "0:47:26", "remaining_time": "8:14:43", "throughput": 2369.68, "total_tokens": 6744992} {"current_steps": 3505, "total_steps": 40000, "loss": 0.1029, "lr": 4.905924783568675e-05, "epoch": 0.5718247817929685, "percentage": 8.76, "elapsed_time": "0:47:28", "remaining_time": "8:14:18", "throughput": 2370.88, "total_tokens": 6753312} {"current_steps": 3510, "total_steps": 40000, "loss": 0.1901, "lr": 4.905657815192886e-05, "epoch": 0.5726405090137858, "percentage": 8.77, "elapsed_time": "0:47:30", "remaining_time": "8:13:54", "throughput": 2372.64, "total_tokens": 6763264} {"current_steps": 3515, "total_steps": 40000, "loss": 0.2079, "lr": 4.90539047583443e-05, "epoch": 0.5734562362346032, "percentage": 8.79, "elapsed_time": "0:47:32", "remaining_time": "8:13:29", "throughput": 2374.54, "total_tokens": 6773568} {"current_steps": 3520, "total_steps": 40000, "loss": 0.137, "lr": 4.905122765534534e-05, "epoch": 0.5742719634554205, "percentage": 8.8, "elapsed_time": "0:47:34", "remaining_time": "8:13:04", "throughput": 2376.01, "total_tokens": 6782672} {"current_steps": 3525, "total_steps": 40000, "loss": 0.1146, "lr": 4.9048546843344846e-05, "epoch": 0.5750876906762379, "percentage": 8.81, "elapsed_time": "0:47:36", "remaining_time": "8:12:39", "throughput": 2377.46, "total_tokens": 6791728} {"current_steps": 3530, "total_steps": 40000, "loss": 0.0682, "lr": 4.9045862322756206e-05, "epoch": 0.5759034178970552, "percentage": 8.82, "elapsed_time": "0:47:38", "remaining_time": "8:12:15", "throughput": 2379.28, "total_tokens": 6801856} {"current_steps": 3535, "total_steps": 40000, "loss": 0.1976, "lr": 4.904317409399342e-05, "epoch": 0.5767191451178726, "percentage": 8.84, "elapsed_time": "0:47:40", "remaining_time": "8:11:50", "throughput": 2381.2, "total_tokens": 6812272} {"current_steps": 3540, "total_steps": 40000, "loss": 0.1855, "lr": 4.904048215747104e-05, "epoch": 0.5775348723386899, "percentage": 8.85, "elapsed_time": "0:47:42", "remaining_time": "8:11:26", "throughput": 2382.73, "total_tokens": 6821552} {"current_steps": 3545, "total_steps": 40000, "loss": 0.1511, "lr": 4.90377865136042e-05, "epoch": 0.5783505995595073, "percentage": 8.86, "elapsed_time": "0:47:44", "remaining_time": "8:11:02", "throughput": 2384.31, "total_tokens": 6831040} {"current_steps": 3550, "total_steps": 40000, "loss": 0.2709, "lr": 4.90350871628086e-05, "epoch": 0.5791663267803246, "percentage": 8.88, "elapsed_time": "0:47:47", "remaining_time": "8:10:37", "throughput": 2386.0, "total_tokens": 6840832} {"current_steps": 3555, "total_steps": 40000, "loss": 0.1328, "lr": 4.903238410550052e-05, "epoch": 0.579982054001142, "percentage": 8.89, "elapsed_time": "0:47:49", "remaining_time": "8:10:13", "throughput": 2387.77, "total_tokens": 6850832} {"current_steps": 3560, "total_steps": 40000, "loss": 0.1243, "lr": 4.90296773420968e-05, "epoch": 0.5807977812219594, "percentage": 8.9, "elapsed_time": "0:47:51", "remaining_time": "8:09:49", "throughput": 2389.69, "total_tokens": 6861296} {"current_steps": 3565, "total_steps": 40000, "loss": 0.1325, "lr": 4.902696687301486e-05, "epoch": 0.5816135084427767, "percentage": 8.91, "elapsed_time": "0:47:53", "remaining_time": "8:09:25", "throughput": 2391.18, "total_tokens": 6870480} {"current_steps": 3570, "total_steps": 40000, "loss": 0.1645, "lr": 4.902425269867268e-05, "epoch": 0.5824292356635941, "percentage": 8.92, "elapsed_time": "0:47:55", "remaining_time": "8:09:01", "throughput": 2392.65, "total_tokens": 6879648} {"current_steps": 3575, "total_steps": 40000, "loss": 0.1243, "lr": 4.902153481948883e-05, "epoch": 0.5832449628844114, "percentage": 8.94, "elapsed_time": "0:47:57", "remaining_time": "8:08:37", "throughput": 2394.44, "total_tokens": 6889728} {"current_steps": 3580, "total_steps": 40000, "loss": 0.1267, "lr": 4.901881323588244e-05, "epoch": 0.5840606901052288, "percentage": 8.95, "elapsed_time": "0:47:59", "remaining_time": "8:08:13", "throughput": 2395.74, "total_tokens": 6898416} {"current_steps": 3585, "total_steps": 40000, "loss": 0.0674, "lr": 4.90160879482732e-05, "epoch": 0.5848764173260461, "percentage": 8.96, "elapsed_time": "0:48:01", "remaining_time": "8:07:49", "throughput": 2397.02, "total_tokens": 6907072} {"current_steps": 3590, "total_steps": 40000, "loss": 0.1122, "lr": 4.9013358957081405e-05, "epoch": 0.5856921445468636, "percentage": 8.97, "elapsed_time": "0:48:03", "remaining_time": "8:07:25", "throughput": 2398.7, "total_tokens": 6916880} {"current_steps": 3595, "total_steps": 40000, "loss": 0.0949, "lr": 4.901062626272789e-05, "epoch": 0.5865078717676809, "percentage": 8.99, "elapsed_time": "0:48:05", "remaining_time": "8:07:01", "throughput": 2400.67, "total_tokens": 6927504} {"current_steps": 3600, "total_steps": 40000, "loss": 0.0803, "lr": 4.900788986563406e-05, "epoch": 0.5873235989884983, "percentage": 9.0, "elapsed_time": "0:48:07", "remaining_time": "8:06:37", "throughput": 2402.13, "total_tokens": 6936656} {"current_steps": 3600, "total_steps": 40000, "eval_loss": 0.1551746428012848, "epoch": 0.5873235989884983, "percentage": 9.0, "elapsed_time": "0:49:28", "remaining_time": "8:20:13", "throughput": 2336.87, "total_tokens": 6936656} {"current_steps": 3605, "total_steps": 40000, "loss": 0.1958, "lr": 4.9005149766221915e-05, "epoch": 0.5881393262093156, "percentage": 9.01, "elapsed_time": "0:49:31", "remaining_time": "8:20:03", "throughput": 2337.27, "total_tokens": 6946256} {"current_steps": 3610, "total_steps": 40000, "loss": 0.096, "lr": 4.9002405964914e-05, "epoch": 0.588955053430133, "percentage": 9.03, "elapsed_time": "0:49:34", "remaining_time": "8:19:39", "throughput": 2338.9, "total_tokens": 6955936} {"current_steps": 3615, "total_steps": 40000, "loss": 0.3361, "lr": 4.899965846213346e-05, "epoch": 0.5897707806509503, "percentage": 9.04, "elapsed_time": "0:49:36", "remaining_time": "8:19:14", "throughput": 2340.49, "total_tokens": 6965504} {"current_steps": 3620, "total_steps": 40000, "loss": 0.1923, "lr": 4.899690725830399e-05, "epoch": 0.5905865078717677, "percentage": 9.05, "elapsed_time": "0:49:38", "remaining_time": "8:18:49", "throughput": 2341.98, "total_tokens": 6974768} {"current_steps": 3625, "total_steps": 40000, "loss": 0.1875, "lr": 4.899415235384985e-05, "epoch": 0.591402235092585, "percentage": 9.06, "elapsed_time": "0:49:40", "remaining_time": "8:18:24", "throughput": 2344.08, "total_tokens": 6985872} {"current_steps": 3630, "total_steps": 40000, "loss": 0.3257, "lr": 4.899139374919589e-05, "epoch": 0.5922179623134024, "percentage": 9.07, "elapsed_time": "0:49:42", "remaining_time": "8:18:00", "throughput": 2345.59, "total_tokens": 6995248} {"current_steps": 3635, "total_steps": 40000, "loss": 0.1735, "lr": 4.898863144476752e-05, "epoch": 0.5930336895342198, "percentage": 9.09, "elapsed_time": "0:49:44", "remaining_time": "8:17:35", "throughput": 2347.22, "total_tokens": 7004928} {"current_steps": 3640, "total_steps": 40000, "loss": 0.1151, "lr": 4.898586544099072e-05, "epoch": 0.5938494167550371, "percentage": 9.1, "elapsed_time": "0:49:46", "remaining_time": "8:17:11", "throughput": 2348.47, "total_tokens": 7013552} {"current_steps": 3645, "total_steps": 40000, "loss": 0.1319, "lr": 4.898309573829204e-05, "epoch": 0.5946651439758545, "percentage": 9.11, "elapsed_time": "0:49:48", "remaining_time": "8:16:47", "throughput": 2349.92, "total_tokens": 7022720} {"current_steps": 3650, "total_steps": 40000, "loss": 0.1666, "lr": 4.898032233709862e-05, "epoch": 0.5954808711966718, "percentage": 9.12, "elapsed_time": "0:49:50", "remaining_time": "8:16:22", "throughput": 2351.61, "total_tokens": 7032624} {"current_steps": 3655, "total_steps": 40000, "loss": 0.231, "lr": 4.8977545237838123e-05, "epoch": 0.5962965984174892, "percentage": 9.14, "elapsed_time": "0:49:52", "remaining_time": "8:15:58", "throughput": 2353.61, "total_tokens": 7043456} {"current_steps": 3660, "total_steps": 40000, "loss": 0.1095, "lr": 4.8974764440938836e-05, "epoch": 0.5971123256383065, "percentage": 9.15, "elapsed_time": "0:49:54", "remaining_time": "8:15:34", "throughput": 2355.52, "total_tokens": 7054048} {"current_steps": 3665, "total_steps": 40000, "loss": 0.2064, "lr": 4.897197994682959e-05, "epoch": 0.5979280528591239, "percentage": 9.16, "elapsed_time": "0:49:56", "remaining_time": "8:15:09", "throughput": 2356.98, "total_tokens": 7063280} {"current_steps": 3670, "total_steps": 40000, "loss": 0.1693, "lr": 4.8969191755939786e-05, "epoch": 0.5987437800799412, "percentage": 9.18, "elapsed_time": "0:49:58", "remaining_time": "8:14:45", "throughput": 2358.33, "total_tokens": 7072192} {"current_steps": 3675, "total_steps": 40000, "loss": 0.1391, "lr": 4.8966399868699396e-05, "epoch": 0.5995595073007586, "percentage": 9.19, "elapsed_time": "0:50:00", "remaining_time": "8:14:21", "throughput": 2360.06, "total_tokens": 7082288} {"current_steps": 3680, "total_steps": 40000, "loss": 0.115, "lr": 4.8963604285538965e-05, "epoch": 0.600375234521576, "percentage": 9.2, "elapsed_time": "0:50:02", "remaining_time": "8:13:57", "throughput": 2362.33, "total_tokens": 7093952} {"current_steps": 3685, "total_steps": 40000, "loss": 0.104, "lr": 4.8960805006889604e-05, "epoch": 0.6011909617423934, "percentage": 9.21, "elapsed_time": "0:50:05", "remaining_time": "8:13:33", "throughput": 2363.27, "total_tokens": 7101680} {"current_steps": 3690, "total_steps": 40000, "loss": 0.1921, "lr": 4.8958002033183004e-05, "epoch": 0.6020066889632107, "percentage": 9.22, "elapsed_time": "0:50:07", "remaining_time": "8:13:10", "throughput": 2365.08, "total_tokens": 7111984} {"current_steps": 3695, "total_steps": 40000, "loss": 0.0837, "lr": 4.8955195364851414e-05, "epoch": 0.6028224161840281, "percentage": 9.24, "elapsed_time": "0:50:09", "remaining_time": "8:12:46", "throughput": 2366.21, "total_tokens": 7120272} {"current_steps": 3700, "total_steps": 40000, "loss": 0.0966, "lr": 4.895238500232766e-05, "epoch": 0.6036381434048455, "percentage": 9.25, "elapsed_time": "0:50:11", "remaining_time": "8:12:22", "throughput": 2367.64, "total_tokens": 7129456} {"current_steps": 3705, "total_steps": 40000, "loss": 0.1672, "lr": 4.8949570946045143e-05, "epoch": 0.6044538706256628, "percentage": 9.26, "elapsed_time": "0:50:13", "remaining_time": "8:11:58", "throughput": 2369.46, "total_tokens": 7139824} {"current_steps": 3710, "total_steps": 40000, "loss": 0.19, "lr": 4.89467531964378e-05, "epoch": 0.6052695978464802, "percentage": 9.28, "elapsed_time": "0:50:15", "remaining_time": "8:11:35", "throughput": 2370.76, "total_tokens": 7148640} {"current_steps": 3715, "total_steps": 40000, "loss": 0.1846, "lr": 4.894393175394019e-05, "epoch": 0.6060853250672975, "percentage": 9.29, "elapsed_time": "0:50:17", "remaining_time": "8:11:11", "throughput": 2372.21, "total_tokens": 7157920} {"current_steps": 3720, "total_steps": 40000, "loss": 0.1132, "lr": 4.8941106618987406e-05, "epoch": 0.6069010522881149, "percentage": 9.3, "elapsed_time": "0:50:19", "remaining_time": "8:10:48", "throughput": 2373.56, "total_tokens": 7166928} {"current_steps": 3725, "total_steps": 40000, "loss": 0.2157, "lr": 4.893827779201512e-05, "epoch": 0.6077167795089322, "percentage": 9.31, "elapsed_time": "0:50:21", "remaining_time": "8:10:24", "throughput": 2375.17, "total_tokens": 7176688} {"current_steps": 3730, "total_steps": 40000, "loss": 0.169, "lr": 4.893544527345957e-05, "epoch": 0.6085325067297496, "percentage": 9.32, "elapsed_time": "0:50:23", "remaining_time": "8:10:01", "throughput": 2376.57, "total_tokens": 7185824} {"current_steps": 3735, "total_steps": 40000, "loss": 0.211, "lr": 4.8932609063757563e-05, "epoch": 0.6093482339505669, "percentage": 9.34, "elapsed_time": "0:50:25", "remaining_time": "8:09:37", "throughput": 2378.65, "total_tokens": 7197040} {"current_steps": 3740, "total_steps": 40000, "loss": 0.2348, "lr": 4.8929769163346484e-05, "epoch": 0.6101639611713843, "percentage": 9.35, "elapsed_time": "0:50:27", "remaining_time": "8:09:14", "throughput": 2380.37, "total_tokens": 7207136} {"current_steps": 3745, "total_steps": 40000, "loss": 0.1403, "lr": 4.892692557266429e-05, "epoch": 0.6109796883922016, "percentage": 9.36, "elapsed_time": "0:50:29", "remaining_time": "8:08:51", "throughput": 2381.87, "total_tokens": 7216624} {"current_steps": 3750, "total_steps": 40000, "loss": 0.1544, "lr": 4.8924078292149464e-05, "epoch": 0.611795415613019, "percentage": 9.38, "elapsed_time": "0:50:31", "remaining_time": "8:08:28", "throughput": 2383.45, "total_tokens": 7226320} {"current_steps": 3755, "total_steps": 40000, "loss": 0.1351, "lr": 4.892122732224114e-05, "epoch": 0.6126111428338363, "percentage": 9.39, "elapsed_time": "0:50:33", "remaining_time": "8:08:04", "throughput": 2384.93, "total_tokens": 7235728} {"current_steps": 3760, "total_steps": 40000, "loss": 0.1098, "lr": 4.8918372663378944e-05, "epoch": 0.6134268700546537, "percentage": 9.4, "elapsed_time": "0:50:36", "remaining_time": "8:07:41", "throughput": 2386.25, "total_tokens": 7244656} {"current_steps": 3765, "total_steps": 40000, "loss": 0.0645, "lr": 4.89155143160031e-05, "epoch": 0.614242597275471, "percentage": 9.41, "elapsed_time": "0:50:38", "remaining_time": "8:07:18", "throughput": 2387.91, "total_tokens": 7254624} {"current_steps": 3770, "total_steps": 40000, "loss": 0.0989, "lr": 4.891265228055441e-05, "epoch": 0.6150583244962884, "percentage": 9.43, "elapsed_time": "0:50:40", "remaining_time": "8:06:55", "throughput": 2389.76, "total_tokens": 7265168} {"current_steps": 3775, "total_steps": 40000, "loss": 0.2385, "lr": 4.890978655747424e-05, "epoch": 0.6158740517171059, "percentage": 9.44, "elapsed_time": "0:50:42", "remaining_time": "8:06:32", "throughput": 2391.54, "total_tokens": 7275504} {"current_steps": 3780, "total_steps": 40000, "loss": 0.2812, "lr": 4.89069171472045e-05, "epoch": 0.6166897789379232, "percentage": 9.45, "elapsed_time": "0:50:44", "remaining_time": "8:06:09", "throughput": 2392.99, "total_tokens": 7284864} {"current_steps": 3785, "total_steps": 40000, "loss": 0.2746, "lr": 4.890404405018772e-05, "epoch": 0.6175055061587406, "percentage": 9.46, "elapsed_time": "0:50:46", "remaining_time": "8:05:47", "throughput": 2394.24, "total_tokens": 7293616} {"current_steps": 3790, "total_steps": 40000, "loss": 0.1741, "lr": 4.8901167266866934e-05, "epoch": 0.6183212333795579, "percentage": 9.47, "elapsed_time": "0:50:48", "remaining_time": "8:05:24", "throughput": 2395.74, "total_tokens": 7303152} {"current_steps": 3795, "total_steps": 40000, "loss": 0.2382, "lr": 4.88982867976858e-05, "epoch": 0.6191369606003753, "percentage": 9.49, "elapsed_time": "0:50:50", "remaining_time": "8:05:01", "throughput": 2397.24, "total_tokens": 7312672} {"current_steps": 3800, "total_steps": 40000, "loss": 0.1782, "lr": 4.889540264308852e-05, "epoch": 0.6199526878211926, "percentage": 9.5, "elapsed_time": "0:50:52", "remaining_time": "8:04:39", "throughput": 2398.39, "total_tokens": 7321136} {"current_steps": 3800, "total_steps": 40000, "eval_loss": 0.1527053713798523, "epoch": 0.6199526878211926, "percentage": 9.5, "elapsed_time": "0:52:13", "remaining_time": "8:17:29", "throughput": 2336.52, "total_tokens": 7321136} {"current_steps": 3805, "total_steps": 40000, "loss": 0.1917, "lr": 4.889251480351986e-05, "epoch": 0.62076841504201, "percentage": 9.51, "elapsed_time": "0:52:16", "remaining_time": "8:17:19", "throughput": 2336.86, "total_tokens": 7330464} {"current_steps": 3810, "total_steps": 40000, "loss": 0.1391, "lr": 4.888962327942517e-05, "epoch": 0.6215841422628273, "percentage": 9.53, "elapsed_time": "0:52:18", "remaining_time": "8:16:55", "throughput": 2338.28, "total_tokens": 7339760} {"current_steps": 3815, "total_steps": 40000, "loss": 0.1618, "lr": 4.8886728071250356e-05, "epoch": 0.6223998694836447, "percentage": 9.54, "elapsed_time": "0:52:21", "remaining_time": "8:16:32", "throughput": 2340.32, "total_tokens": 7351008} {"current_steps": 3820, "total_steps": 40000, "loss": 0.1693, "lr": 4.8883829179441884e-05, "epoch": 0.623215596704462, "percentage": 9.55, "elapsed_time": "0:52:23", "remaining_time": "8:16:08", "throughput": 2341.94, "total_tokens": 7360928} {"current_steps": 3825, "total_steps": 40000, "loss": 0.1884, "lr": 4.888092660444682e-05, "epoch": 0.6240313239252794, "percentage": 9.56, "elapsed_time": "0:52:25", "remaining_time": "8:15:45", "throughput": 2343.49, "total_tokens": 7370672} {"current_steps": 3830, "total_steps": 40000, "loss": 0.161, "lr": 4.887802034671276e-05, "epoch": 0.6248470511460967, "percentage": 9.57, "elapsed_time": "0:52:27", "remaining_time": "8:15:21", "throughput": 2345.07, "total_tokens": 7380448} {"current_steps": 3835, "total_steps": 40000, "loss": 0.1719, "lr": 4.88751104066879e-05, "epoch": 0.6256627783669141, "percentage": 9.59, "elapsed_time": "0:52:29", "remaining_time": "8:14:58", "throughput": 2346.37, "total_tokens": 7389392} {"current_steps": 3840, "total_steps": 40000, "loss": 0.1157, "lr": 4.887219678482098e-05, "epoch": 0.6264785055877314, "percentage": 9.6, "elapsed_time": "0:52:31", "remaining_time": "8:14:35", "throughput": 2348.18, "total_tokens": 7399984} {"current_steps": 3845, "total_steps": 40000, "loss": 0.0636, "lr": 4.8869279481561316e-05, "epoch": 0.6272942328085488, "percentage": 9.61, "elapsed_time": "0:52:33", "remaining_time": "8:14:12", "throughput": 2349.32, "total_tokens": 7408432} {"current_steps": 3850, "total_steps": 40000, "loss": 0.1475, "lr": 4.88663584973588e-05, "epoch": 0.6281099600293661, "percentage": 9.62, "elapsed_time": "0:52:35", "remaining_time": "8:13:49", "throughput": 2351.08, "total_tokens": 7418864} {"current_steps": 3855, "total_steps": 40000, "loss": 0.1435, "lr": 4.8863433832663874e-05, "epoch": 0.6289256872501835, "percentage": 9.64, "elapsed_time": "0:52:37", "remaining_time": "8:13:25", "throughput": 2352.02, "total_tokens": 7426688} {"current_steps": 3860, "total_steps": 40000, "loss": 0.143, "lr": 4.886050548792757e-05, "epoch": 0.6297414144710008, "percentage": 9.65, "elapsed_time": "0:52:39", "remaining_time": "8:13:02", "throughput": 2353.62, "total_tokens": 7436592} {"current_steps": 3865, "total_steps": 40000, "loss": 0.0639, "lr": 4.8857573463601465e-05, "epoch": 0.6305571416918182, "percentage": 9.66, "elapsed_time": "0:52:41", "remaining_time": "8:12:39", "throughput": 2355.22, "total_tokens": 7446512} {"current_steps": 3870, "total_steps": 40000, "loss": 0.2142, "lr": 4.885463776013772e-05, "epoch": 0.6313728689126357, "percentage": 9.68, "elapsed_time": "0:52:43", "remaining_time": "8:12:16", "throughput": 2356.96, "total_tokens": 7456896} {"current_steps": 3875, "total_steps": 40000, "loss": 0.1849, "lr": 4.8851698377989056e-05, "epoch": 0.632188596133453, "percentage": 9.69, "elapsed_time": "0:52:45", "remaining_time": "8:11:53", "throughput": 2358.56, "total_tokens": 7466832} {"current_steps": 3880, "total_steps": 40000, "loss": 0.1749, "lr": 4.884875531760876e-05, "epoch": 0.6330043233542704, "percentage": 9.7, "elapsed_time": "0:52:47", "remaining_time": "8:11:31", "throughput": 2360.08, "total_tokens": 7476528} {"current_steps": 3885, "total_steps": 40000, "loss": 0.0847, "lr": 4.88458085794507e-05, "epoch": 0.6338200505750877, "percentage": 9.71, "elapsed_time": "0:52:49", "remaining_time": "8:11:08", "throughput": 2361.3, "total_tokens": 7485280} {"current_steps": 3890, "total_steps": 40000, "loss": 0.2444, "lr": 4.884285816396929e-05, "epoch": 0.6346357777959051, "percentage": 9.72, "elapsed_time": "0:52:52", "remaining_time": "8:10:45", "throughput": 2362.62, "total_tokens": 7494336} {"current_steps": 3895, "total_steps": 40000, "loss": 0.1411, "lr": 4.8839904071619526e-05, "epoch": 0.6354515050167224, "percentage": 9.74, "elapsed_time": "0:52:54", "remaining_time": "8:10:22", "throughput": 2364.05, "total_tokens": 7503792} {"current_steps": 3900, "total_steps": 40000, "loss": 0.1612, "lr": 4.8836946302856955e-05, "epoch": 0.6362672322375398, "percentage": 9.75, "elapsed_time": "0:52:56", "remaining_time": "8:10:00", "throughput": 2366.02, "total_tokens": 7514912} {"current_steps": 3905, "total_steps": 40000, "loss": 0.1139, "lr": 4.8833984858137715e-05, "epoch": 0.6370829594583571, "percentage": 9.76, "elapsed_time": "0:52:58", "remaining_time": "8:09:37", "throughput": 2367.37, "total_tokens": 7524112} {"current_steps": 3910, "total_steps": 40000, "loss": 0.0936, "lr": 4.8831019737918494e-05, "epoch": 0.6378986866791745, "percentage": 9.78, "elapsed_time": "0:53:00", "remaining_time": "8:09:15", "throughput": 2369.22, "total_tokens": 7534912} {"current_steps": 3915, "total_steps": 40000, "loss": 0.0391, "lr": 4.882805094265655e-05, "epoch": 0.6387144138999918, "percentage": 9.79, "elapsed_time": "0:53:02", "remaining_time": "8:08:52", "throughput": 2370.58, "total_tokens": 7544112} {"current_steps": 3920, "total_steps": 40000, "loss": 0.1545, "lr": 4.8825078472809706e-05, "epoch": 0.6395301411208092, "percentage": 9.8, "elapsed_time": "0:53:04", "remaining_time": "8:08:30", "throughput": 2371.96, "total_tokens": 7553424} {"current_steps": 3925, "total_steps": 40000, "loss": 0.1768, "lr": 4.882210232883635e-05, "epoch": 0.6403458683416265, "percentage": 9.81, "elapsed_time": "0:53:06", "remaining_time": "8:08:07", "throughput": 2373.61, "total_tokens": 7563616} {"current_steps": 3930, "total_steps": 40000, "loss": 0.1399, "lr": 4.881912251119546e-05, "epoch": 0.6411615955624439, "percentage": 9.83, "elapsed_time": "0:53:08", "remaining_time": "8:07:45", "throughput": 2375.28, "total_tokens": 7573824} {"current_steps": 3935, "total_steps": 40000, "loss": 0.1239, "lr": 4.881613902034654e-05, "epoch": 0.6419773227832613, "percentage": 9.84, "elapsed_time": "0:53:10", "remaining_time": "8:07:23", "throughput": 2376.76, "total_tokens": 7583456} {"current_steps": 3940, "total_steps": 40000, "loss": 0.3056, "lr": 4.88131518567497e-05, "epoch": 0.6427930500040786, "percentage": 9.85, "elapsed_time": "0:53:12", "remaining_time": "8:07:00", "throughput": 2378.29, "total_tokens": 7593280} {"current_steps": 3945, "total_steps": 40000, "loss": 0.1046, "lr": 4.881016102086558e-05, "epoch": 0.643608777224896, "percentage": 9.86, "elapsed_time": "0:53:14", "remaining_time": "8:06:38", "throughput": 2379.78, "total_tokens": 7602976} {"current_steps": 3950, "total_steps": 40000, "loss": 0.2312, "lr": 4.8807166513155425e-05, "epoch": 0.6444245044457133, "percentage": 9.88, "elapsed_time": "0:53:16", "remaining_time": "8:06:16", "throughput": 2381.48, "total_tokens": 7613328} {"current_steps": 3955, "total_steps": 40000, "loss": 0.2819, "lr": 4.8804168334081004e-05, "epoch": 0.6452402316665307, "percentage": 9.89, "elapsed_time": "0:53:18", "remaining_time": "8:05:54", "throughput": 2383.17, "total_tokens": 7623664} {"current_steps": 3960, "total_steps": 40000, "loss": 0.1569, "lr": 4.880116648410468e-05, "epoch": 0.6460559588873481, "percentage": 9.9, "elapsed_time": "0:53:21", "remaining_time": "8:05:32", "throughput": 2384.72, "total_tokens": 7633552} {"current_steps": 3965, "total_steps": 40000, "loss": 0.1205, "lr": 4.879816096368939e-05, "epoch": 0.6468716861081655, "percentage": 9.91, "elapsed_time": "0:53:23", "remaining_time": "8:05:10", "throughput": 2386.11, "total_tokens": 7642912} {"current_steps": 3970, "total_steps": 40000, "loss": 0.1557, "lr": 4.879515177329861e-05, "epoch": 0.6476874133289828, "percentage": 9.93, "elapsed_time": "0:53:25", "remaining_time": "8:04:48", "throughput": 2387.35, "total_tokens": 7651792} {"current_steps": 3975, "total_steps": 40000, "loss": 0.1107, "lr": 4.8792138913396394e-05, "epoch": 0.6485031405498002, "percentage": 9.94, "elapsed_time": "0:53:27", "remaining_time": "8:04:26", "throughput": 2389.0, "total_tokens": 7662016} {"current_steps": 3980, "total_steps": 40000, "loss": 0.1878, "lr": 4.8789122384447374e-05, "epoch": 0.6493188677706175, "percentage": 9.95, "elapsed_time": "0:53:29", "remaining_time": "8:04:04", "throughput": 2390.43, "total_tokens": 7671552} {"current_steps": 3985, "total_steps": 40000, "loss": 0.2282, "lr": 4.878610218691673e-05, "epoch": 0.6501345949914349, "percentage": 9.96, "elapsed_time": "0:53:31", "remaining_time": "8:03:43", "throughput": 2391.87, "total_tokens": 7681152} {"current_steps": 3990, "total_steps": 40000, "loss": 0.2628, "lr": 4.87830783212702e-05, "epoch": 0.6509503222122522, "percentage": 9.98, "elapsed_time": "0:53:33", "remaining_time": "8:03:21", "throughput": 2393.39, "total_tokens": 7690960} {"current_steps": 3995, "total_steps": 40000, "loss": 0.1283, "lr": 4.878005078797413e-05, "epoch": 0.6517660494330696, "percentage": 9.99, "elapsed_time": "0:53:35", "remaining_time": "8:02:59", "throughput": 2394.68, "total_tokens": 7700032} {"current_steps": 4000, "total_steps": 40000, "loss": 0.1269, "lr": 4.877701958749539e-05, "epoch": 0.652581776653887, "percentage": 10.0, "elapsed_time": "0:53:37", "remaining_time": "8:02:37", "throughput": 2396.19, "total_tokens": 7709856} {"current_steps": 4000, "total_steps": 40000, "eval_loss": 0.1362190544605255, "epoch": 0.652581776653887, "percentage": 10.0, "elapsed_time": "0:54:58", "remaining_time": "8:14:43", "throughput": 2337.64, "total_tokens": 7709856} {"current_steps": 4005, "total_steps": 40000, "loss": 0.1159, "lr": 4.877398472030142e-05, "epoch": 0.6533975038747043, "percentage": 10.01, "elapsed_time": "0:55:01", "remaining_time": "8:14:36", "throughput": 2337.77, "total_tokens": 7719216} {"current_steps": 4010, "total_steps": 40000, "loss": 0.1344, "lr": 4.877094618686024e-05, "epoch": 0.6542132310955217, "percentage": 10.03, "elapsed_time": "0:55:04", "remaining_time": "8:14:14", "throughput": 2339.43, "total_tokens": 7729584} {"current_steps": 4015, "total_steps": 40000, "loss": 0.1539, "lr": 4.876790398764045e-05, "epoch": 0.655028958316339, "percentage": 10.04, "elapsed_time": "0:55:06", "remaining_time": "8:13:51", "throughput": 2340.92, "total_tokens": 7739328} {"current_steps": 4020, "total_steps": 40000, "loss": 0.1764, "lr": 4.8764858123111167e-05, "epoch": 0.6558446855371564, "percentage": 10.05, "elapsed_time": "0:55:08", "remaining_time": "8:13:29", "throughput": 2342.24, "total_tokens": 7748544} {"current_steps": 4025, "total_steps": 40000, "loss": 0.0866, "lr": 4.876180859374212e-05, "epoch": 0.6566604127579737, "percentage": 10.06, "elapsed_time": "0:55:10", "remaining_time": "8:13:06", "throughput": 2343.76, "total_tokens": 7758400} {"current_steps": 4030, "total_steps": 40000, "loss": 0.0635, "lr": 4.875875540000357e-05, "epoch": 0.6574761399787911, "percentage": 10.08, "elapsed_time": "0:55:12", "remaining_time": "8:12:44", "throughput": 2345.52, "total_tokens": 7769088} {"current_steps": 4035, "total_steps": 40000, "loss": 0.225, "lr": 4.8755698542366376e-05, "epoch": 0.6582918671996084, "percentage": 10.09, "elapsed_time": "0:55:14", "remaining_time": "8:12:21", "throughput": 2347.23, "total_tokens": 7779616} {"current_steps": 4040, "total_steps": 40000, "loss": 0.2078, "lr": 4.875263802130193e-05, "epoch": 0.6591075944204258, "percentage": 10.1, "elapsed_time": "0:55:16", "remaining_time": "8:11:59", "throughput": 2349.16, "total_tokens": 7790928} {"current_steps": 4045, "total_steps": 40000, "loss": 0.4037, "lr": 4.8749573837282207e-05, "epoch": 0.6599233216412431, "percentage": 10.11, "elapsed_time": "0:55:18", "remaining_time": "8:11:37", "throughput": 2350.45, "total_tokens": 7800032} {"current_steps": 4050, "total_steps": 40000, "loss": 0.125, "lr": 4.874650599077974e-05, "epoch": 0.6607390488620605, "percentage": 10.12, "elapsed_time": "0:55:20", "remaining_time": "8:11:15", "throughput": 2351.74, "total_tokens": 7809184} {"current_steps": 4055, "total_steps": 40000, "loss": 0.1635, "lr": 4.874343448226764e-05, "epoch": 0.6615547760828779, "percentage": 10.14, "elapsed_time": "0:55:22", "remaining_time": "8:10:53", "throughput": 2353.3, "total_tokens": 7819232} {"current_steps": 4060, "total_steps": 40000, "loss": 0.152, "lr": 4.874035931221955e-05, "epoch": 0.6623705033036953, "percentage": 10.15, "elapsed_time": "0:55:24", "remaining_time": "8:10:31", "throughput": 2354.9, "total_tokens": 7829392} {"current_steps": 4065, "total_steps": 40000, "loss": 0.1831, "lr": 4.8737280481109724e-05, "epoch": 0.6631862305245126, "percentage": 10.16, "elapsed_time": "0:55:26", "remaining_time": "8:10:09", "throughput": 2356.22, "total_tokens": 7838640} {"current_steps": 4070, "total_steps": 40000, "loss": 0.0747, "lr": 4.873419798941294e-05, "epoch": 0.66400195774533, "percentage": 10.17, "elapsed_time": "0:55:28", "remaining_time": "8:09:47", "throughput": 2357.84, "total_tokens": 7848912} {"current_steps": 4075, "total_steps": 40000, "loss": 0.0855, "lr": 4.873111183760458e-05, "epoch": 0.6648176849661473, "percentage": 10.19, "elapsed_time": "0:55:30", "remaining_time": "8:09:25", "throughput": 2359.78, "total_tokens": 7860272} {"current_steps": 4080, "total_steps": 40000, "loss": 0.1194, "lr": 4.8728022026160537e-05, "epoch": 0.6656334121869647, "percentage": 10.2, "elapsed_time": "0:55:32", "remaining_time": "8:09:03", "throughput": 2360.83, "total_tokens": 7868640} {"current_steps": 4085, "total_steps": 40000, "loss": 0.1715, "lr": 4.872492855555732e-05, "epoch": 0.666449139407782, "percentage": 10.21, "elapsed_time": "0:55:35", "remaining_time": "8:08:41", "throughput": 2362.25, "total_tokens": 7878256} {"current_steps": 4090, "total_steps": 40000, "loss": 0.1791, "lr": 4.8721831426271956e-05, "epoch": 0.6672648666285994, "percentage": 10.22, "elapsed_time": "0:55:37", "remaining_time": "8:08:19", "throughput": 2363.28, "total_tokens": 7886576} {"current_steps": 4095, "total_steps": 40000, "loss": 0.1414, "lr": 4.87187306387821e-05, "epoch": 0.6680805938494168, "percentage": 10.24, "elapsed_time": "0:55:39", "remaining_time": "8:07:58", "throughput": 2364.93, "total_tokens": 7896944} {"current_steps": 4100, "total_steps": 40000, "loss": 0.1119, "lr": 4.87156261935659e-05, "epoch": 0.6688963210702341, "percentage": 10.25, "elapsed_time": "0:55:41", "remaining_time": "8:07:36", "throughput": 2366.62, "total_tokens": 7907472} {"current_steps": 4105, "total_steps": 40000, "loss": 0.1623, "lr": 4.871251809110211e-05, "epoch": 0.6697120482910515, "percentage": 10.26, "elapsed_time": "0:55:43", "remaining_time": "8:07:14", "throughput": 2368.11, "total_tokens": 7917328} {"current_steps": 4110, "total_steps": 40000, "loss": 0.2686, "lr": 4.8709406331870044e-05, "epoch": 0.6705277755118688, "percentage": 10.27, "elapsed_time": "0:55:45", "remaining_time": "8:06:53", "throughput": 2369.63, "total_tokens": 7927344} {"current_steps": 4115, "total_steps": 40000, "loss": 0.1247, "lr": 4.8706290916349574e-05, "epoch": 0.6713435027326862, "percentage": 10.29, "elapsed_time": "0:55:47", "remaining_time": "8:06:31", "throughput": 2370.7, "total_tokens": 7935840} {"current_steps": 4120, "total_steps": 40000, "loss": 0.1523, "lr": 4.8703171845021134e-05, "epoch": 0.6721592299535035, "percentage": 10.3, "elapsed_time": "0:55:49", "remaining_time": "8:06:10", "throughput": 2372.18, "total_tokens": 7945696} {"current_steps": 4125, "total_steps": 40000, "loss": 0.2719, "lr": 4.870004911836572e-05, "epoch": 0.6729749571743209, "percentage": 10.31, "elapsed_time": "0:55:51", "remaining_time": "8:05:48", "throughput": 2373.66, "total_tokens": 7955536} {"current_steps": 4130, "total_steps": 40000, "loss": 0.1802, "lr": 4.869692273686489e-05, "epoch": 0.6737906843951382, "percentage": 10.32, "elapsed_time": "0:55:53", "remaining_time": "8:05:27", "throughput": 2375.08, "total_tokens": 7965200} {"current_steps": 4135, "total_steps": 40000, "loss": 0.0676, "lr": 4.869379270100079e-05, "epoch": 0.6746064116159556, "percentage": 10.34, "elapsed_time": "0:55:55", "remaining_time": "8:05:05", "throughput": 2376.56, "total_tokens": 7975072} {"current_steps": 4140, "total_steps": 40000, "loss": 0.1093, "lr": 4.86906590112561e-05, "epoch": 0.6754221388367729, "percentage": 10.35, "elapsed_time": "0:55:57", "remaining_time": "8:04:44", "throughput": 2377.78, "total_tokens": 7984080} {"current_steps": 4145, "total_steps": 40000, "loss": 0.1339, "lr": 4.8687521668114064e-05, "epoch": 0.6762378660575903, "percentage": 10.36, "elapsed_time": "0:55:59", "remaining_time": "8:04:23", "throughput": 2378.99, "total_tokens": 7993088} {"current_steps": 4150, "total_steps": 40000, "loss": 0.1487, "lr": 4.868438067205853e-05, "epoch": 0.6770535932784078, "percentage": 10.38, "elapsed_time": "0:56:01", "remaining_time": "8:04:02", "throughput": 2380.27, "total_tokens": 8002320} {"current_steps": 4155, "total_steps": 40000, "loss": 0.247, "lr": 4.8681236023573844e-05, "epoch": 0.6778693204992251, "percentage": 10.39, "elapsed_time": "0:56:04", "remaining_time": "8:03:41", "throughput": 2381.71, "total_tokens": 8012096} {"current_steps": 4160, "total_steps": 40000, "loss": 0.1219, "lr": 4.867808772314497e-05, "epoch": 0.6786850477200425, "percentage": 10.4, "elapsed_time": "0:56:06", "remaining_time": "8:03:19", "throughput": 2383.24, "total_tokens": 8022160} {"current_steps": 4165, "total_steps": 40000, "loss": 0.1279, "lr": 4.867493577125741e-05, "epoch": 0.6795007749408598, "percentage": 10.41, "elapsed_time": "0:56:08", "remaining_time": "8:02:58", "throughput": 2384.57, "total_tokens": 8031552} {"current_steps": 4170, "total_steps": 40000, "loss": 0.2319, "lr": 4.867178016839725e-05, "epoch": 0.6803165021616772, "percentage": 10.42, "elapsed_time": "0:56:10", "remaining_time": "8:02:37", "throughput": 2386.07, "total_tokens": 8041504} {"current_steps": 4175, "total_steps": 40000, "loss": 0.1193, "lr": 4.8668620915051094e-05, "epoch": 0.6811322293824945, "percentage": 10.44, "elapsed_time": "0:56:12", "remaining_time": "8:02:16", "throughput": 2387.48, "total_tokens": 8051168} {"current_steps": 4180, "total_steps": 40000, "loss": 0.1761, "lr": 4.866545801170616e-05, "epoch": 0.6819479566033119, "percentage": 10.45, "elapsed_time": "0:56:14", "remaining_time": "8:01:55", "throughput": 2389.21, "total_tokens": 8061936} {"current_steps": 4185, "total_steps": 40000, "loss": 0.2202, "lr": 4.86622914588502e-05, "epoch": 0.6827636838241292, "percentage": 10.46, "elapsed_time": "0:56:16", "remaining_time": "8:01:34", "throughput": 2390.7, "total_tokens": 8071936} {"current_steps": 4190, "total_steps": 40000, "loss": 0.1861, "lr": 4.865912125697154e-05, "epoch": 0.6835794110449466, "percentage": 10.47, "elapsed_time": "0:56:18", "remaining_time": "8:01:14", "throughput": 2392.09, "total_tokens": 8081568} {"current_steps": 4195, "total_steps": 40000, "loss": 0.3101, "lr": 4.865594740655907e-05, "epoch": 0.6843951382657639, "percentage": 10.49, "elapsed_time": "0:56:20", "remaining_time": "8:00:53", "throughput": 2393.3, "total_tokens": 8090592} {"current_steps": 4200, "total_steps": 40000, "loss": 0.0605, "lr": 4.865276990810222e-05, "epoch": 0.6852108654865813, "percentage": 10.5, "elapsed_time": "0:56:22", "remaining_time": "8:00:32", "throughput": 2394.78, "total_tokens": 8100560} {"current_steps": 4200, "total_steps": 40000, "eval_loss": 0.13741271197795868, "epoch": 0.6852108654865813, "percentage": 10.5, "elapsed_time": "0:57:43", "remaining_time": "8:11:59", "throughput": 2339.03, "total_tokens": 8100560} {"current_steps": 4205, "total_steps": 40000, "loss": 0.1465, "lr": 4.8649588762091016e-05, "epoch": 0.6860265927073986, "percentage": 10.51, "elapsed_time": "0:57:46", "remaining_time": "8:11:50", "throughput": 2339.57, "total_tokens": 8110688} {"current_steps": 4210, "total_steps": 40000, "loss": 0.1408, "lr": 4.8646403969016016e-05, "epoch": 0.686842319928216, "percentage": 10.53, "elapsed_time": "0:57:48", "remaining_time": "8:11:29", "throughput": 2340.98, "total_tokens": 8120448} {"current_steps": 4215, "total_steps": 40000, "loss": 0.0862, "lr": 4.864321552936838e-05, "epoch": 0.6876580471490333, "percentage": 10.54, "elapsed_time": "0:57:50", "remaining_time": "8:11:07", "throughput": 2342.52, "total_tokens": 8130608} {"current_steps": 4220, "total_steps": 40000, "loss": 0.0827, "lr": 4.864002344363978e-05, "epoch": 0.6884737743698507, "percentage": 10.55, "elapsed_time": "0:57:52", "remaining_time": "8:10:45", "throughput": 2344.0, "total_tokens": 8140592} {"current_steps": 4225, "total_steps": 40000, "loss": 0.1154, "lr": 4.863682771232248e-05, "epoch": 0.689289501590668, "percentage": 10.56, "elapsed_time": "0:57:55", "remaining_time": "8:10:24", "throughput": 2345.36, "total_tokens": 8150176} {"current_steps": 4230, "total_steps": 40000, "loss": 0.1176, "lr": 4.8633628335909324e-05, "epoch": 0.6901052288114854, "percentage": 10.57, "elapsed_time": "0:57:57", "remaining_time": "8:10:03", "throughput": 2347.25, "total_tokens": 8161568} {"current_steps": 4235, "total_steps": 40000, "loss": 0.1603, "lr": 4.8630425314893676e-05, "epoch": 0.6909209560323027, "percentage": 10.59, "elapsed_time": "0:57:59", "remaining_time": "8:09:41", "throughput": 2348.54, "total_tokens": 8170896} {"current_steps": 4240, "total_steps": 40000, "loss": 0.1523, "lr": 4.862721864976948e-05, "epoch": 0.6917366832531202, "percentage": 10.6, "elapsed_time": "0:58:01", "remaining_time": "8:09:20", "throughput": 2349.64, "total_tokens": 8179616} {"current_steps": 4245, "total_steps": 40000, "loss": 0.1084, "lr": 4.862400834103125e-05, "epoch": 0.6925524104739376, "percentage": 10.61, "elapsed_time": "0:58:03", "remaining_time": "8:08:59", "throughput": 2350.83, "total_tokens": 8188608} {"current_steps": 4250, "total_steps": 40000, "loss": 0.1255, "lr": 4.862079438917406e-05, "epoch": 0.6933681376947549, "percentage": 10.62, "elapsed_time": "0:58:05", "remaining_time": "8:08:37", "throughput": 2352.12, "total_tokens": 8197952} {"current_steps": 4255, "total_steps": 40000, "loss": 0.1131, "lr": 4.8617576794693536e-05, "epoch": 0.6941838649155723, "percentage": 10.64, "elapsed_time": "0:58:07", "remaining_time": "8:08:16", "throughput": 2353.11, "total_tokens": 8206256} {"current_steps": 4260, "total_steps": 40000, "loss": 0.0596, "lr": 4.8614355558085875e-05, "epoch": 0.6949995921363896, "percentage": 10.65, "elapsed_time": "0:58:09", "remaining_time": "8:07:55", "throughput": 2354.35, "total_tokens": 8215408} {"current_steps": 4265, "total_steps": 40000, "loss": 0.069, "lr": 4.861113067984783e-05, "epoch": 0.695815319357207, "percentage": 10.66, "elapsed_time": "0:58:11", "remaining_time": "8:07:34", "throughput": 2355.81, "total_tokens": 8225376} {"current_steps": 4270, "total_steps": 40000, "loss": 0.1086, "lr": 4.860790216047671e-05, "epoch": 0.6966310465780243, "percentage": 10.67, "elapsed_time": "0:58:13", "remaining_time": "8:07:13", "throughput": 2357.04, "total_tokens": 8234512} {"current_steps": 4275, "total_steps": 40000, "loss": 0.0756, "lr": 4.860467000047041e-05, "epoch": 0.6974467737988417, "percentage": 10.69, "elapsed_time": "0:58:15", "remaining_time": "8:06:52", "throughput": 2358.11, "total_tokens": 8243152} {"current_steps": 4280, "total_steps": 40000, "loss": 0.1886, "lr": 4.860143420032737e-05, "epoch": 0.698262501019659, "percentage": 10.7, "elapsed_time": "0:58:17", "remaining_time": "8:06:31", "throughput": 2359.55, "total_tokens": 8253056} {"current_steps": 4285, "total_steps": 40000, "loss": 0.2421, "lr": 4.859819476054657e-05, "epoch": 0.6990782282404764, "percentage": 10.71, "elapsed_time": "0:58:19", "remaining_time": "8:06:10", "throughput": 2360.83, "total_tokens": 8262416} {"current_steps": 4290, "total_steps": 40000, "loss": 0.1539, "lr": 4.859495168162758e-05, "epoch": 0.6998939554612937, "percentage": 10.72, "elapsed_time": "0:58:21", "remaining_time": "8:05:49", "throughput": 2362.14, "total_tokens": 8271856} {"current_steps": 4295, "total_steps": 40000, "loss": 0.2073, "lr": 4.859170496407054e-05, "epoch": 0.7007096826821111, "percentage": 10.74, "elapsed_time": "0:58:23", "remaining_time": "8:05:28", "throughput": 2363.08, "total_tokens": 8280048} {"current_steps": 4300, "total_steps": 40000, "loss": 0.0703, "lr": 4.8588454608376114e-05, "epoch": 0.7015254099029284, "percentage": 10.75, "elapsed_time": "0:58:25", "remaining_time": "8:05:07", "throughput": 2364.08, "total_tokens": 8288464} {"current_steps": 4305, "total_steps": 40000, "loss": 0.2675, "lr": 4.8585200615045555e-05, "epoch": 0.7023411371237458, "percentage": 10.76, "elapsed_time": "0:58:28", "remaining_time": "8:04:47", "throughput": 2365.3, "total_tokens": 8297616} {"current_steps": 4310, "total_steps": 40000, "loss": 0.1532, "lr": 4.8581942984580674e-05, "epoch": 0.7031568643445631, "percentage": 10.78, "elapsed_time": "0:58:30", "remaining_time": "8:04:26", "throughput": 2366.65, "total_tokens": 8307248} {"current_steps": 4315, "total_steps": 40000, "loss": 0.1854, "lr": 4.857868171748384e-05, "epoch": 0.7039725915653805, "percentage": 10.79, "elapsed_time": "0:58:32", "remaining_time": "8:04:05", "throughput": 2367.86, "total_tokens": 8316384} {"current_steps": 4320, "total_steps": 40000, "loss": 0.2107, "lr": 4.8575416814257976e-05, "epoch": 0.7047883187861979, "percentage": 10.8, "elapsed_time": "0:58:34", "remaining_time": "8:03:45", "throughput": 2369.32, "total_tokens": 8326416} {"current_steps": 4325, "total_steps": 40000, "loss": 0.1006, "lr": 4.857214827540657e-05, "epoch": 0.7056040460070152, "percentage": 10.81, "elapsed_time": "0:58:36", "remaining_time": "8:03:24", "throughput": 2370.81, "total_tokens": 8336544} {"current_steps": 4330, "total_steps": 40000, "loss": 0.1363, "lr": 4.856887610143367e-05, "epoch": 0.7064197732278326, "percentage": 10.82, "elapsed_time": "0:58:38", "remaining_time": "8:03:04", "throughput": 2372.5, "total_tokens": 8347376} {"current_steps": 4335, "total_steps": 40000, "loss": 0.1621, "lr": 4.8565600292843896e-05, "epoch": 0.70723550044865, "percentage": 10.84, "elapsed_time": "0:58:40", "remaining_time": "8:02:43", "throughput": 2373.75, "total_tokens": 8356672} {"current_steps": 4340, "total_steps": 40000, "loss": 0.1857, "lr": 4.856232085014241e-05, "epoch": 0.7080512276694674, "percentage": 10.85, "elapsed_time": "0:58:42", "remaining_time": "8:02:23", "throughput": 2375.41, "total_tokens": 8367408} {"current_steps": 4345, "total_steps": 40000, "loss": 0.1403, "lr": 4.855903777383495e-05, "epoch": 0.7088669548902847, "percentage": 10.86, "elapsed_time": "0:58:44", "remaining_time": "8:02:02", "throughput": 2377.03, "total_tokens": 8378032} {"current_steps": 4350, "total_steps": 40000, "loss": 0.198, "lr": 4.85557510644278e-05, "epoch": 0.7096826821111021, "percentage": 10.88, "elapsed_time": "0:58:46", "remaining_time": "8:01:42", "throughput": 2378.48, "total_tokens": 8388080} {"current_steps": 4355, "total_steps": 40000, "loss": 0.0903, "lr": 4.855246072242782e-05, "epoch": 0.7104984093319194, "percentage": 10.89, "elapsed_time": "0:58:48", "remaining_time": "8:01:21", "throughput": 2379.95, "total_tokens": 8398160} {"current_steps": 4360, "total_steps": 40000, "loss": 0.1252, "lr": 4.8549166748342414e-05, "epoch": 0.7113141365527368, "percentage": 10.9, "elapsed_time": "0:58:50", "remaining_time": "8:01:01", "throughput": 2381.05, "total_tokens": 8406944} {"current_steps": 4365, "total_steps": 40000, "loss": 0.1134, "lr": 4.8545869142679556e-05, "epoch": 0.7121298637735541, "percentage": 10.91, "elapsed_time": "0:58:52", "remaining_time": "8:00:41", "throughput": 2382.06, "total_tokens": 8415424} {"current_steps": 4370, "total_steps": 40000, "loss": 0.2409, "lr": 4.8542567905947776e-05, "epoch": 0.7129455909943715, "percentage": 10.93, "elapsed_time": "0:58:54", "remaining_time": "8:00:21", "throughput": 2383.51, "total_tokens": 8425488} {"current_steps": 4375, "total_steps": 40000, "loss": 0.2119, "lr": 4.853926303865618e-05, "epoch": 0.7137613182151888, "percentage": 10.94, "elapsed_time": "0:58:56", "remaining_time": "8:00:00", "throughput": 2384.73, "total_tokens": 8434704} {"current_steps": 4380, "total_steps": 40000, "loss": 0.127, "lr": 4.853595454131441e-05, "epoch": 0.7145770454360062, "percentage": 10.95, "elapsed_time": "0:58:59", "remaining_time": "7:59:40", "throughput": 2385.82, "total_tokens": 8443488} {"current_steps": 4385, "total_steps": 40000, "loss": 0.1135, "lr": 4.8532642414432674e-05, "epoch": 0.7153927726568236, "percentage": 10.96, "elapsed_time": "0:59:01", "remaining_time": "7:59:20", "throughput": 2387.15, "total_tokens": 8453120} {"current_steps": 4390, "total_steps": 40000, "loss": 0.1758, "lr": 4.8529326658521754e-05, "epoch": 0.7162084998776409, "percentage": 10.97, "elapsed_time": "0:59:03", "remaining_time": "7:59:00", "throughput": 2388.57, "total_tokens": 8463088} {"current_steps": 4395, "total_steps": 40000, "loss": 0.1626, "lr": 4.8526007274092965e-05, "epoch": 0.7170242270984583, "percentage": 10.99, "elapsed_time": "0:59:05", "remaining_time": "7:58:40", "throughput": 2389.8, "total_tokens": 8472368} {"current_steps": 4400, "total_steps": 40000, "loss": 0.1342, "lr": 4.852268426165822e-05, "epoch": 0.7178399543192756, "percentage": 11.0, "elapsed_time": "0:59:07", "remaining_time": "7:58:20", "throughput": 2391.19, "total_tokens": 8482208} {"current_steps": 4400, "total_steps": 40000, "eval_loss": 0.13959182798862457, "epoch": 0.7178399543192756, "percentage": 11.0, "elapsed_time": "1:00:27", "remaining_time": "8:09:13", "throughput": 2338.03, "total_tokens": 8482208} {"current_steps": 4405, "total_steps": 40000, "loss": 0.1165, "lr": 4.851935762172995e-05, "epoch": 0.718655681540093, "percentage": 11.01, "elapsed_time": "1:00:31", "remaining_time": "8:09:05", "throughput": 2338.13, "total_tokens": 8491040} {"current_steps": 4410, "total_steps": 40000, "loss": 0.197, "lr": 4.8516027354821175e-05, "epoch": 0.7194714087609103, "percentage": 11.03, "elapsed_time": "1:00:33", "remaining_time": "8:08:44", "throughput": 2339.57, "total_tokens": 8501152} {"current_steps": 4415, "total_steps": 40000, "loss": 0.2401, "lr": 4.851269346144546e-05, "epoch": 0.7202871359817277, "percentage": 11.04, "elapsed_time": "1:00:35", "remaining_time": "8:08:23", "throughput": 2340.67, "total_tokens": 8509984} {"current_steps": 4420, "total_steps": 40000, "loss": 0.2153, "lr": 4.850935594211693e-05, "epoch": 0.721102863202545, "percentage": 11.05, "elapsed_time": "1:00:37", "remaining_time": "8:08:03", "throughput": 2341.88, "total_tokens": 8519200} {"current_steps": 4425, "total_steps": 40000, "loss": 0.1124, "lr": 4.850601479735029e-05, "epoch": 0.7219185904233625, "percentage": 11.06, "elapsed_time": "1:00:39", "remaining_time": "8:07:42", "throughput": 2343.13, "total_tokens": 8528624} {"current_steps": 4430, "total_steps": 40000, "loss": 0.1351, "lr": 4.850267002766076e-05, "epoch": 0.7227343176441798, "percentage": 11.07, "elapsed_time": "1:00:41", "remaining_time": "8:07:22", "throughput": 2344.21, "total_tokens": 8537408} {"current_steps": 4435, "total_steps": 40000, "loss": 0.1582, "lr": 4.849932163356417e-05, "epoch": 0.7235500448649972, "percentage": 11.09, "elapsed_time": "1:00:43", "remaining_time": "8:07:01", "throughput": 2345.62, "total_tokens": 8547408} {"current_steps": 4440, "total_steps": 40000, "loss": 0.1044, "lr": 4.8495969615576864e-05, "epoch": 0.7243657720858145, "percentage": 11.1, "elapsed_time": "1:00:46", "remaining_time": "8:06:41", "throughput": 2347.02, "total_tokens": 8557360} {"current_steps": 4445, "total_steps": 40000, "loss": 0.1262, "lr": 4.849261397421577e-05, "epoch": 0.7251814993066319, "percentage": 11.11, "elapsed_time": "1:00:48", "remaining_time": "8:06:20", "throughput": 2348.36, "total_tokens": 8567088} {"current_steps": 4450, "total_steps": 40000, "loss": 0.1707, "lr": 4.848925470999839e-05, "epoch": 0.7259972265274492, "percentage": 11.12, "elapsed_time": "1:00:50", "remaining_time": "8:06:00", "throughput": 2349.56, "total_tokens": 8576320} {"current_steps": 4455, "total_steps": 40000, "loss": 0.2246, "lr": 4.848589182344273e-05, "epoch": 0.7268129537482666, "percentage": 11.14, "elapsed_time": "1:00:52", "remaining_time": "8:05:40", "throughput": 2350.72, "total_tokens": 8585392} {"current_steps": 4460, "total_steps": 40000, "loss": 0.1463, "lr": 4.848252531506742e-05, "epoch": 0.727628680969084, "percentage": 11.15, "elapsed_time": "1:00:54", "remaining_time": "8:05:19", "throughput": 2351.92, "total_tokens": 8594656} {"current_steps": 4465, "total_steps": 40000, "loss": 0.1622, "lr": 4.847915518539161e-05, "epoch": 0.7284444081899013, "percentage": 11.16, "elapsed_time": "1:00:56", "remaining_time": "8:04:59", "throughput": 2353.61, "total_tokens": 8605664} {"current_steps": 4470, "total_steps": 40000, "loss": 0.1826, "lr": 4.847578143493501e-05, "epoch": 0.7292601354107187, "percentage": 11.18, "elapsed_time": "1:00:58", "remaining_time": "8:04:39", "throughput": 2355.27, "total_tokens": 8616608} {"current_steps": 4475, "total_steps": 40000, "loss": 0.3089, "lr": 4.847240406421789e-05, "epoch": 0.730075862631536, "percentage": 11.19, "elapsed_time": "1:01:00", "remaining_time": "8:04:19", "throughput": 2356.57, "total_tokens": 8626272} {"current_steps": 4480, "total_steps": 40000, "loss": 0.2247, "lr": 4.84690230737611e-05, "epoch": 0.7308915898523534, "percentage": 11.2, "elapsed_time": "1:01:02", "remaining_time": "8:03:59", "throughput": 2358.05, "total_tokens": 8636544} {"current_steps": 4485, "total_steps": 40000, "loss": 0.1593, "lr": 4.846563846408602e-05, "epoch": 0.7317073170731707, "percentage": 11.21, "elapsed_time": "1:01:04", "remaining_time": "8:03:38", "throughput": 2359.35, "total_tokens": 8646176} {"current_steps": 4490, "total_steps": 40000, "loss": 0.197, "lr": 4.84622502357146e-05, "epoch": 0.7325230442939881, "percentage": 11.22, "elapsed_time": "1:01:06", "remaining_time": "8:03:18", "throughput": 2360.71, "total_tokens": 8656016} {"current_steps": 4495, "total_steps": 40000, "loss": 0.1635, "lr": 4.8458858389169345e-05, "epoch": 0.7333387715148054, "percentage": 11.24, "elapsed_time": "1:01:08", "remaining_time": "8:02:58", "throughput": 2361.99, "total_tokens": 8665584} {"current_steps": 4500, "total_steps": 40000, "loss": 0.1778, "lr": 4.8455462924973334e-05, "epoch": 0.7341544987356228, "percentage": 11.25, "elapsed_time": "1:01:10", "remaining_time": "8:02:38", "throughput": 2363.0, "total_tokens": 8674192} {"current_steps": 4505, "total_steps": 40000, "loss": 0.1939, "lr": 4.845206384365018e-05, "epoch": 0.7349702259564401, "percentage": 11.26, "elapsed_time": "1:01:12", "remaining_time": "8:02:18", "throughput": 2364.27, "total_tokens": 8683696} {"current_steps": 4510, "total_steps": 40000, "loss": 0.1162, "lr": 4.844866114572405e-05, "epoch": 0.7357859531772575, "percentage": 11.28, "elapsed_time": "1:01:14", "remaining_time": "8:01:58", "throughput": 2365.27, "total_tokens": 8692272} {"current_steps": 4515, "total_steps": 40000, "loss": 0.1621, "lr": 4.8445254831719706e-05, "epoch": 0.7366016803980748, "percentage": 11.29, "elapsed_time": "1:01:17", "remaining_time": "8:01:39", "throughput": 2366.53, "total_tokens": 8701792} {"current_steps": 4520, "total_steps": 40000, "loss": 0.1006, "lr": 4.8441844902162434e-05, "epoch": 0.7374174076188923, "percentage": 11.3, "elapsed_time": "1:01:19", "remaining_time": "8:01:19", "throughput": 2367.47, "total_tokens": 8710160} {"current_steps": 4525, "total_steps": 40000, "loss": 0.1278, "lr": 4.843843135757809e-05, "epoch": 0.7382331348397096, "percentage": 11.31, "elapsed_time": "1:01:21", "remaining_time": "8:00:59", "throughput": 2369.08, "total_tokens": 8720960} {"current_steps": 4530, "total_steps": 40000, "loss": 0.1189, "lr": 4.843501419849308e-05, "epoch": 0.739048862060527, "percentage": 11.33, "elapsed_time": "1:01:23", "remaining_time": "8:00:39", "throughput": 2370.51, "total_tokens": 8731152} {"current_steps": 4535, "total_steps": 40000, "loss": 0.0827, "lr": 4.8431593425434386e-05, "epoch": 0.7398645892813444, "percentage": 11.34, "elapsed_time": "1:01:25", "remaining_time": "8:00:20", "throughput": 2372.1, "total_tokens": 8741888} {"current_steps": 4540, "total_steps": 40000, "loss": 0.1529, "lr": 4.8428169038929526e-05, "epoch": 0.7406803165021617, "percentage": 11.35, "elapsed_time": "1:01:27", "remaining_time": "8:00:00", "throughput": 2373.44, "total_tokens": 8751728} {"current_steps": 4545, "total_steps": 40000, "loss": 0.1211, "lr": 4.8424741039506575e-05, "epoch": 0.7414960437229791, "percentage": 11.36, "elapsed_time": "1:01:29", "remaining_time": "7:59:40", "throughput": 2374.5, "total_tokens": 8760544} {"current_steps": 4550, "total_steps": 40000, "loss": 0.0864, "lr": 4.842130942769419e-05, "epoch": 0.7423117709437964, "percentage": 11.38, "elapsed_time": "1:01:31", "remaining_time": "7:59:21", "throughput": 2375.97, "total_tokens": 8770928} {"current_steps": 4555, "total_steps": 40000, "loss": 0.181, "lr": 4.841787420402156e-05, "epoch": 0.7431274981646138, "percentage": 11.39, "elapsed_time": "1:01:33", "remaining_time": "7:59:01", "throughput": 2377.11, "total_tokens": 8780032} {"current_steps": 4560, "total_steps": 40000, "loss": 0.1343, "lr": 4.841443536901844e-05, "epoch": 0.7439432253854311, "percentage": 11.4, "elapsed_time": "1:01:35", "remaining_time": "7:58:42", "throughput": 2378.57, "total_tokens": 8790320} {"current_steps": 4565, "total_steps": 40000, "loss": 0.0821, "lr": 4.841099292321514e-05, "epoch": 0.7447589526062485, "percentage": 11.41, "elapsed_time": "1:01:37", "remaining_time": "7:58:22", "throughput": 2379.61, "total_tokens": 8799072} {"current_steps": 4570, "total_steps": 40000, "loss": 0.2545, "lr": 4.8407546867142525e-05, "epoch": 0.7455746798270658, "percentage": 11.43, "elapsed_time": "1:01:39", "remaining_time": "7:58:03", "throughput": 2380.69, "total_tokens": 8807968} {"current_steps": 4575, "total_steps": 40000, "loss": 0.1927, "lr": 4.840409720133203e-05, "epoch": 0.7463904070478832, "percentage": 11.44, "elapsed_time": "1:01:41", "remaining_time": "7:57:43", "throughput": 2382.03, "total_tokens": 8817824} {"current_steps": 4580, "total_steps": 40000, "loss": 0.1574, "lr": 4.8400643926315634e-05, "epoch": 0.7472061342687005, "percentage": 11.45, "elapsed_time": "1:01:43", "remaining_time": "7:57:24", "throughput": 2383.28, "total_tokens": 8827392} {"current_steps": 4585, "total_steps": 40000, "loss": 0.1894, "lr": 4.839718704262587e-05, "epoch": 0.7480218614895179, "percentage": 11.46, "elapsed_time": "1:01:45", "remaining_time": "7:57:05", "throughput": 2384.54, "total_tokens": 8836992} {"current_steps": 4590, "total_steps": 40000, "loss": 0.0774, "lr": 4.839372655079585e-05, "epoch": 0.7488375887103352, "percentage": 11.47, "elapsed_time": "1:01:48", "remaining_time": "7:56:45", "throughput": 2385.69, "total_tokens": 8846176} {"current_steps": 4595, "total_steps": 40000, "loss": 0.1649, "lr": 4.83902624513592e-05, "epoch": 0.7496533159311526, "percentage": 11.49, "elapsed_time": "1:01:50", "remaining_time": "7:56:26", "throughput": 2387.46, "total_tokens": 8857664} {"current_steps": 4600, "total_steps": 40000, "loss": 0.1543, "lr": 4.838679474485014e-05, "epoch": 0.7504690431519699, "percentage": 11.5, "elapsed_time": "1:01:52", "remaining_time": "7:56:07", "throughput": 2388.92, "total_tokens": 8868016} {"current_steps": 4600, "total_steps": 40000, "eval_loss": 0.13483543694019318, "epoch": 0.7504690431519699, "percentage": 11.5, "elapsed_time": "1:03:12", "remaining_time": "8:06:28", "throughput": 2338.08, "total_tokens": 8868016} {"current_steps": 4605, "total_steps": 40000, "loss": 0.0408, "lr": 4.838332343180343e-05, "epoch": 0.7512847703727873, "percentage": 11.51, "elapsed_time": "1:03:16", "remaining_time": "8:06:20", "throughput": 2338.46, "total_tokens": 8877904} {"current_steps": 4610, "total_steps": 40000, "loss": 0.1372, "lr": 4.83798485127544e-05, "epoch": 0.7521004975936046, "percentage": 11.53, "elapsed_time": "1:03:18", "remaining_time": "8:06:00", "throughput": 2339.92, "total_tokens": 8888288} {"current_steps": 4615, "total_steps": 40000, "loss": 0.0769, "lr": 4.837636998823892e-05, "epoch": 0.7529162248144221, "percentage": 11.54, "elapsed_time": "1:03:20", "remaining_time": "8:05:40", "throughput": 2341.04, "total_tokens": 8897376} {"current_steps": 4620, "total_steps": 40000, "loss": 0.146, "lr": 4.8372887858793414e-05, "epoch": 0.7537319520352395, "percentage": 11.55, "elapsed_time": "1:03:22", "remaining_time": "8:05:20", "throughput": 2342.1, "total_tokens": 8906240} {"current_steps": 4625, "total_steps": 40000, "loss": 0.2003, "lr": 4.836940212495489e-05, "epoch": 0.7545476792560568, "percentage": 11.56, "elapsed_time": "1:03:24", "remaining_time": "8:05:01", "throughput": 2343.32, "total_tokens": 8915712} {"current_steps": 4630, "total_steps": 40000, "loss": 0.1391, "lr": 4.836591278726087e-05, "epoch": 0.7553634064768742, "percentage": 11.58, "elapsed_time": "1:03:26", "remaining_time": "8:04:41", "throughput": 2344.29, "total_tokens": 8924240} {"current_steps": 4635, "total_steps": 40000, "loss": 0.1241, "lr": 4.836241984624947e-05, "epoch": 0.7561791336976915, "percentage": 11.59, "elapsed_time": "1:03:28", "remaining_time": "8:04:21", "throughput": 2345.44, "total_tokens": 8933488} {"current_steps": 4640, "total_steps": 40000, "loss": 0.1306, "lr": 4.8358923302459336e-05, "epoch": 0.7569948609185089, "percentage": 11.6, "elapsed_time": "1:03:30", "remaining_time": "8:04:02", "throughput": 2346.73, "total_tokens": 8943264} {"current_steps": 4645, "total_steps": 40000, "loss": 0.0862, "lr": 4.835542315642968e-05, "epoch": 0.7578105881393262, "percentage": 11.61, "elapsed_time": "1:03:33", "remaining_time": "8:03:42", "throughput": 2347.99, "total_tokens": 8952912} {"current_steps": 4650, "total_steps": 40000, "loss": 0.1318, "lr": 4.8351919408700274e-05, "epoch": 0.7586263153601436, "percentage": 11.62, "elapsed_time": "1:03:35", "remaining_time": "8:03:22", "throughput": 2349.43, "total_tokens": 8963248} {"current_steps": 4655, "total_steps": 40000, "loss": 0.2171, "lr": 4.834841205981144e-05, "epoch": 0.7594420425809609, "percentage": 11.64, "elapsed_time": "1:03:37", "remaining_time": "8:03:03", "throughput": 2350.61, "total_tokens": 8972608} {"current_steps": 4660, "total_steps": 40000, "loss": 0.2287, "lr": 4.8344901110304054e-05, "epoch": 0.7602577698017783, "percentage": 11.65, "elapsed_time": "1:03:39", "remaining_time": "8:02:43", "throughput": 2351.81, "total_tokens": 8982064} {"current_steps": 4665, "total_steps": 40000, "loss": 0.0803, "lr": 4.8341386560719534e-05, "epoch": 0.7610734970225956, "percentage": 11.66, "elapsed_time": "1:03:41", "remaining_time": "8:02:24", "throughput": 2353.2, "total_tokens": 8992240} {"current_steps": 4670, "total_steps": 40000, "loss": 0.1018, "lr": 4.833786841159989e-05, "epoch": 0.761889224243413, "percentage": 11.68, "elapsed_time": "1:03:43", "remaining_time": "8:02:04", "throughput": 2354.68, "total_tokens": 9002784} {"current_steps": 4675, "total_steps": 40000, "loss": 0.2399, "lr": 4.833434666348765e-05, "epoch": 0.7627049514642303, "percentage": 11.69, "elapsed_time": "1:03:45", "remaining_time": "8:01:45", "throughput": 2355.88, "total_tokens": 9012240} {"current_steps": 4680, "total_steps": 40000, "loss": 0.1561, "lr": 4.833082131692592e-05, "epoch": 0.7635206786850477, "percentage": 11.7, "elapsed_time": "1:03:47", "remaining_time": "8:01:26", "throughput": 2357.03, "total_tokens": 9021520} {"current_steps": 4685, "total_steps": 40000, "loss": 0.2433, "lr": 4.832729237245835e-05, "epoch": 0.764336405905865, "percentage": 11.71, "elapsed_time": "1:03:49", "remaining_time": "8:01:06", "throughput": 2358.17, "total_tokens": 9030752} {"current_steps": 4690, "total_steps": 40000, "loss": 0.1505, "lr": 4.8323759830629145e-05, "epoch": 0.7651521331266824, "percentage": 11.72, "elapsed_time": "1:03:51", "remaining_time": "8:00:47", "throughput": 2359.15, "total_tokens": 9039376} {"current_steps": 4695, "total_steps": 40000, "loss": 0.1024, "lr": 4.8320223691983066e-05, "epoch": 0.7659678603474998, "percentage": 11.74, "elapsed_time": "1:03:53", "remaining_time": "8:00:28", "throughput": 2360.5, "total_tokens": 9049424} {"current_steps": 4700, "total_steps": 40000, "loss": 0.1181, "lr": 4.831668395706544e-05, "epoch": 0.7667835875683171, "percentage": 11.75, "elapsed_time": "1:03:55", "remaining_time": "8:00:08", "throughput": 2362.11, "total_tokens": 9060480} {"current_steps": 4705, "total_steps": 40000, "loss": 0.0458, "lr": 4.8313140626422125e-05, "epoch": 0.7675993147891346, "percentage": 11.76, "elapsed_time": "1:03:57", "remaining_time": "7:59:49", "throughput": 2363.42, "total_tokens": 9070384} {"current_steps": 4710, "total_steps": 40000, "loss": 0.0732, "lr": 4.830959370059956e-05, "epoch": 0.7684150420099519, "percentage": 11.77, "elapsed_time": "1:03:59", "remaining_time": "7:59:30", "throughput": 2364.65, "total_tokens": 9080016} {"current_steps": 4715, "total_steps": 40000, "loss": 0.0832, "lr": 4.830604318014472e-05, "epoch": 0.7692307692307693, "percentage": 11.79, "elapsed_time": "1:04:01", "remaining_time": "7:59:11", "throughput": 2365.83, "total_tokens": 9089440} {"current_steps": 4720, "total_steps": 40000, "loss": 0.0472, "lr": 4.830248906560514e-05, "epoch": 0.7700464964515866, "percentage": 11.8, "elapsed_time": "1:04:04", "remaining_time": "7:58:52", "throughput": 2366.89, "total_tokens": 9098416} {"current_steps": 4725, "total_steps": 40000, "loss": 0.1285, "lr": 4.829893135752891e-05, "epoch": 0.770862223672404, "percentage": 11.81, "elapsed_time": "1:04:06", "remaining_time": "7:58:33", "throughput": 2368.11, "total_tokens": 9108000} {"current_steps": 4730, "total_steps": 40000, "loss": 0.1751, "lr": 4.829537005646466e-05, "epoch": 0.7716779508932213, "percentage": 11.82, "elapsed_time": "1:04:08", "remaining_time": "7:58:14", "throughput": 2369.42, "total_tokens": 9117920} {"current_steps": 4735, "total_steps": 40000, "loss": 0.0722, "lr": 4.8291805162961615e-05, "epoch": 0.7724936781140387, "percentage": 11.84, "elapsed_time": "1:04:10", "remaining_time": "7:57:55", "throughput": 2370.95, "total_tokens": 9128704} {"current_steps": 4740, "total_steps": 40000, "loss": 0.1551, "lr": 4.82882366775695e-05, "epoch": 0.773309405334856, "percentage": 11.85, "elapsed_time": "1:04:12", "remaining_time": "7:57:36", "throughput": 2371.84, "total_tokens": 9137024} {"current_steps": 4745, "total_steps": 40000, "loss": 0.0723, "lr": 4.828466460083864e-05, "epoch": 0.7741251325556734, "percentage": 11.86, "elapsed_time": "1:04:14", "remaining_time": "7:57:17", "throughput": 2373.08, "total_tokens": 9146768} {"current_steps": 4750, "total_steps": 40000, "loss": 0.0496, "lr": 4.8281088933319877e-05, "epoch": 0.7749408597764907, "percentage": 11.88, "elapsed_time": "1:04:16", "remaining_time": "7:56:59", "throughput": 2374.19, "total_tokens": 9155968} {"current_steps": 4755, "total_steps": 40000, "loss": 0.1361, "lr": 4.827750967556464e-05, "epoch": 0.7757565869973081, "percentage": 11.89, "elapsed_time": "1:04:18", "remaining_time": "7:56:40", "throughput": 2375.55, "total_tokens": 9166176} {"current_steps": 4760, "total_steps": 40000, "loss": 0.2683, "lr": 4.827392682812488e-05, "epoch": 0.7765723142181254, "percentage": 11.9, "elapsed_time": "1:04:20", "remaining_time": "7:56:21", "throughput": 2376.73, "total_tokens": 9175632} {"current_steps": 4765, "total_steps": 40000, "loss": 0.1048, "lr": 4.827034039155312e-05, "epoch": 0.7773880414389428, "percentage": 11.91, "elapsed_time": "1:04:22", "remaining_time": "7:56:02", "throughput": 2378.18, "total_tokens": 9186160} {"current_steps": 4770, "total_steps": 40000, "loss": 0.0663, "lr": 4.8266750366402445e-05, "epoch": 0.7782037686597602, "percentage": 11.92, "elapsed_time": "1:04:24", "remaining_time": "7:55:44", "throughput": 2379.48, "total_tokens": 9196096} {"current_steps": 4775, "total_steps": 40000, "loss": 0.1113, "lr": 4.8263156753226476e-05, "epoch": 0.7790194958805775, "percentage": 11.94, "elapsed_time": "1:04:26", "remaining_time": "7:55:25", "throughput": 2380.37, "total_tokens": 9204448} {"current_steps": 4780, "total_steps": 40000, "loss": 0.2188, "lr": 4.8259559552579394e-05, "epoch": 0.7798352231013949, "percentage": 11.95, "elapsed_time": "1:04:29", "remaining_time": "7:55:08", "throughput": 2381.49, "total_tokens": 9214240} {"current_steps": 4785, "total_steps": 40000, "loss": 0.1456, "lr": 4.825595876501593e-05, "epoch": 0.7806509503222122, "percentage": 11.96, "elapsed_time": "1:04:31", "remaining_time": "7:54:49", "throughput": 2382.86, "total_tokens": 9224528} {"current_steps": 4790, "total_steps": 40000, "loss": 0.3098, "lr": 4.825235439109137e-05, "epoch": 0.7814666775430296, "percentage": 11.97, "elapsed_time": "1:04:33", "remaining_time": "7:54:31", "throughput": 2384.16, "total_tokens": 9234496} {"current_steps": 4795, "total_steps": 40000, "loss": 0.1448, "lr": 4.824874643136156e-05, "epoch": 0.7822824047638469, "percentage": 11.99, "elapsed_time": "1:04:35", "remaining_time": "7:54:12", "throughput": 2385.41, "total_tokens": 9244288} {"current_steps": 4800, "total_steps": 40000, "loss": 0.1647, "lr": 4.824513488638288e-05, "epoch": 0.7830981319846644, "percentage": 12.0, "elapsed_time": "1:04:37", "remaining_time": "7:53:54", "throughput": 2386.8, "total_tokens": 9254560} {"current_steps": 4800, "total_steps": 40000, "eval_loss": 0.1545816957950592, "epoch": 0.7830981319846644, "percentage": 12.0, "elapsed_time": "1:05:58", "remaining_time": "8:03:45", "throughput": 2338.18, "total_tokens": 9254560} {"current_steps": 4805, "total_steps": 40000, "loss": 0.2541, "lr": 4.8241519756712293e-05, "epoch": 0.7839138592054817, "percentage": 12.01, "elapsed_time": "1:06:01", "remaining_time": "8:03:40", "throughput": 2338.04, "total_tokens": 9263248} {"current_steps": 4810, "total_steps": 40000, "loss": 0.2545, "lr": 4.8237901042907285e-05, "epoch": 0.7847295864262991, "percentage": 12.03, "elapsed_time": "1:06:04", "remaining_time": "8:03:21", "throughput": 2339.41, "total_tokens": 9273568} {"current_steps": 4815, "total_steps": 40000, "loss": 0.0649, "lr": 4.823427874552591e-05, "epoch": 0.7855453136471164, "percentage": 12.04, "elapsed_time": "1:06:06", "remaining_time": "8:03:01", "throughput": 2340.42, "total_tokens": 9282384} {"current_steps": 4820, "total_steps": 40000, "loss": 0.0783, "lr": 4.823065286512677e-05, "epoch": 0.7863610408679338, "percentage": 12.05, "elapsed_time": "1:06:08", "remaining_time": "8:02:42", "throughput": 2341.48, "total_tokens": 9291408} {"current_steps": 4825, "total_steps": 40000, "loss": 0.173, "lr": 4.8227023402269025e-05, "epoch": 0.7871767680887511, "percentage": 12.06, "elapsed_time": "1:06:10", "remaining_time": "8:02:23", "throughput": 2342.78, "total_tokens": 9301408} {"current_steps": 4830, "total_steps": 40000, "loss": 0.1791, "lr": 4.822339035751239e-05, "epoch": 0.7879924953095685, "percentage": 12.07, "elapsed_time": "1:06:12", "remaining_time": "8:02:04", "throughput": 2343.79, "total_tokens": 9310272} {"current_steps": 4835, "total_steps": 40000, "loss": 0.0962, "lr": 4.8219753731417104e-05, "epoch": 0.7888082225303858, "percentage": 12.09, "elapsed_time": "1:06:14", "remaining_time": "8:01:45", "throughput": 2345.05, "total_tokens": 9320112} {"current_steps": 4840, "total_steps": 40000, "loss": 0.1086, "lr": 4.821611352454401e-05, "epoch": 0.7896239497512032, "percentage": 12.1, "elapsed_time": "1:06:16", "remaining_time": "8:01:26", "throughput": 2346.21, "total_tokens": 9329584} {"current_steps": 4845, "total_steps": 40000, "loss": 0.0763, "lr": 4.8212469737454444e-05, "epoch": 0.7904396769720206, "percentage": 12.11, "elapsed_time": "1:06:18", "remaining_time": "8:01:07", "throughput": 2347.21, "total_tokens": 9338416} {"current_steps": 4850, "total_steps": 40000, "loss": 0.2224, "lr": 4.820882237071035e-05, "epoch": 0.7912554041928379, "percentage": 12.12, "elapsed_time": "1:06:20", "remaining_time": "8:00:48", "throughput": 2348.56, "total_tokens": 9348624} {"current_steps": 4855, "total_steps": 40000, "loss": 0.1126, "lr": 4.820517142487417e-05, "epoch": 0.7920711314136553, "percentage": 12.14, "elapsed_time": "1:06:22", "remaining_time": "8:00:30", "throughput": 2349.3, "total_tokens": 9356432} {"current_steps": 4860, "total_steps": 40000, "loss": 0.1716, "lr": 4.8201516900508956e-05, "epoch": 0.7928868586344726, "percentage": 12.15, "elapsed_time": "1:06:24", "remaining_time": "8:00:11", "throughput": 2350.19, "total_tokens": 9364816} {"current_steps": 4865, "total_steps": 40000, "loss": 0.2729, "lr": 4.819785879817827e-05, "epoch": 0.79370258585529, "percentage": 12.16, "elapsed_time": "1:06:26", "remaining_time": "7:59:52", "throughput": 2351.5, "total_tokens": 9374880} {"current_steps": 4870, "total_steps": 40000, "loss": 0.1745, "lr": 4.8194197118446226e-05, "epoch": 0.7945183130761073, "percentage": 12.17, "elapsed_time": "1:06:28", "remaining_time": "7:59:33", "throughput": 2352.54, "total_tokens": 9383904} {"current_steps": 4875, "total_steps": 40000, "loss": 0.162, "lr": 4.819053186187752e-05, "epoch": 0.7953340402969247, "percentage": 12.19, "elapsed_time": "1:06:30", "remaining_time": "7:59:15", "throughput": 2353.72, "total_tokens": 9393504} {"current_steps": 4880, "total_steps": 40000, "loss": 0.1511, "lr": 4.818686302903736e-05, "epoch": 0.796149767517742, "percentage": 12.2, "elapsed_time": "1:06:32", "remaining_time": "7:58:56", "throughput": 2354.77, "total_tokens": 9402576} {"current_steps": 4885, "total_steps": 40000, "loss": 0.0949, "lr": 4.818319062049154e-05, "epoch": 0.7969654947385594, "percentage": 12.21, "elapsed_time": "1:06:35", "remaining_time": "7:58:37", "throughput": 2356.06, "total_tokens": 9412592} {"current_steps": 4890, "total_steps": 40000, "loss": 0.2249, "lr": 4.817951463680639e-05, "epoch": 0.7977812219593767, "percentage": 12.22, "elapsed_time": "1:06:37", "remaining_time": "7:58:19", "throughput": 2357.28, "total_tokens": 9422368} {"current_steps": 4895, "total_steps": 40000, "loss": 0.1209, "lr": 4.817583507854879e-05, "epoch": 0.7985969491801942, "percentage": 12.24, "elapsed_time": "1:06:39", "remaining_time": "7:58:00", "throughput": 2358.56, "total_tokens": 9432352} {"current_steps": 4900, "total_steps": 40000, "loss": 0.1032, "lr": 4.817215194628617e-05, "epoch": 0.7994126764010115, "percentage": 12.25, "elapsed_time": "1:06:41", "remaining_time": "7:57:42", "throughput": 2359.52, "total_tokens": 9441056} {"current_steps": 4905, "total_steps": 40000, "loss": 0.1052, "lr": 4.816846524058653e-05, "epoch": 0.8002284036218289, "percentage": 12.26, "elapsed_time": "1:06:43", "remaining_time": "7:57:23", "throughput": 2360.9, "total_tokens": 9451456} {"current_steps": 4910, "total_steps": 40000, "loss": 0.0877, "lr": 4.816477496201839e-05, "epoch": 0.8010441308426463, "percentage": 12.28, "elapsed_time": "1:06:45", "remaining_time": "7:57:05", "throughput": 2361.82, "total_tokens": 9460064} {"current_steps": 4915, "total_steps": 40000, "loss": 0.295, "lr": 4.8161081111150845e-05, "epoch": 0.8018598580634636, "percentage": 12.29, "elapsed_time": "1:06:47", "remaining_time": "7:56:46", "throughput": 2363.05, "total_tokens": 9469824} {"current_steps": 4920, "total_steps": 40000, "loss": 0.0876, "lr": 4.815738368855354e-05, "epoch": 0.802675585284281, "percentage": 12.3, "elapsed_time": "1:06:49", "remaining_time": "7:56:28", "throughput": 2363.79, "total_tokens": 9477680} {"current_steps": 4925, "total_steps": 40000, "loss": 0.1174, "lr": 4.815368269479664e-05, "epoch": 0.8034913125050983, "percentage": 12.31, "elapsed_time": "1:06:51", "remaining_time": "7:56:09", "throughput": 2364.82, "total_tokens": 9486720} {"current_steps": 4930, "total_steps": 40000, "loss": 0.1597, "lr": 4.814997813045092e-05, "epoch": 0.8043070397259157, "percentage": 12.32, "elapsed_time": "1:06:53", "remaining_time": "7:55:51", "throughput": 2366.01, "total_tokens": 9496352} {"current_steps": 4935, "total_steps": 40000, "loss": 0.1015, "lr": 4.814626999608764e-05, "epoch": 0.805122766946733, "percentage": 12.34, "elapsed_time": "1:06:55", "remaining_time": "7:55:33", "throughput": 2367.06, "total_tokens": 9505488} {"current_steps": 4940, "total_steps": 40000, "loss": 0.1497, "lr": 4.814255829227865e-05, "epoch": 0.8059384941675504, "percentage": 12.35, "elapsed_time": "1:06:57", "remaining_time": "7:55:14", "throughput": 2368.28, "total_tokens": 9515264} {"current_steps": 4945, "total_steps": 40000, "loss": 0.1042, "lr": 4.813884301959635e-05, "epoch": 0.8067542213883677, "percentage": 12.36, "elapsed_time": "1:06:59", "remaining_time": "7:54:56", "throughput": 2369.67, "total_tokens": 9525760} {"current_steps": 4950, "total_steps": 40000, "loss": 0.2226, "lr": 4.813512417861368e-05, "epoch": 0.8075699486091851, "percentage": 12.38, "elapsed_time": "1:07:01", "remaining_time": "7:54:38", "throughput": 2371.07, "total_tokens": 9536320} {"current_steps": 4955, "total_steps": 40000, "loss": 0.0854, "lr": 4.813140176990411e-05, "epoch": 0.8083856758300024, "percentage": 12.39, "elapsed_time": "1:07:04", "remaining_time": "7:54:20", "throughput": 2372.43, "total_tokens": 9546672} {"current_steps": 4960, "total_steps": 40000, "loss": 0.1436, "lr": 4.8127675794041714e-05, "epoch": 0.8092014030508198, "percentage": 12.4, "elapsed_time": "1:07:06", "remaining_time": "7:54:02", "throughput": 2373.16, "total_tokens": 9554544} {"current_steps": 4965, "total_steps": 40000, "loss": 0.1546, "lr": 4.812394625160107e-05, "epoch": 0.8100171302716371, "percentage": 12.41, "elapsed_time": "1:07:08", "remaining_time": "7:53:44", "throughput": 2374.4, "total_tokens": 9564416} {"current_steps": 4970, "total_steps": 40000, "loss": 0.1424, "lr": 4.812021314315732e-05, "epoch": 0.8108328574924545, "percentage": 12.43, "elapsed_time": "1:07:10", "remaining_time": "7:53:26", "throughput": 2375.75, "total_tokens": 9574768} {"current_steps": 4975, "total_steps": 40000, "loss": 0.1564, "lr": 4.811647646928616e-05, "epoch": 0.8116485847132718, "percentage": 12.44, "elapsed_time": "1:07:12", "remaining_time": "7:53:08", "throughput": 2377.28, "total_tokens": 9585856} {"current_steps": 4980, "total_steps": 40000, "loss": 0.0644, "lr": 4.8112736230563814e-05, "epoch": 0.8124643119340892, "percentage": 12.45, "elapsed_time": "1:07:14", "remaining_time": "7:52:49", "throughput": 2378.44, "total_tokens": 9595440} {"current_steps": 4985, "total_steps": 40000, "loss": 0.0887, "lr": 4.81089924275671e-05, "epoch": 0.8132800391549067, "percentage": 12.46, "elapsed_time": "1:07:16", "remaining_time": "7:52:32", "throughput": 2379.69, "total_tokens": 9605424} {"current_steps": 4990, "total_steps": 40000, "loss": 0.1094, "lr": 4.810524506087335e-05, "epoch": 0.814095766375724, "percentage": 12.47, "elapsed_time": "1:07:18", "remaining_time": "7:52:14", "throughput": 2381.05, "total_tokens": 9615808} {"current_steps": 4995, "total_steps": 40000, "loss": 0.2214, "lr": 4.810149413106044e-05, "epoch": 0.8149114935965414, "percentage": 12.49, "elapsed_time": "1:07:20", "remaining_time": "7:51:56", "throughput": 2381.96, "total_tokens": 9624432} {"current_steps": 5000, "total_steps": 40000, "loss": 0.0188, "lr": 4.809773963870684e-05, "epoch": 0.8157272208173587, "percentage": 12.5, "elapsed_time": "1:07:22", "remaining_time": "7:51:38", "throughput": 2383.25, "total_tokens": 9634544} {"current_steps": 5000, "total_steps": 40000, "eval_loss": 0.1498986780643463, "epoch": 0.8157272208173587, "percentage": 12.5, "elapsed_time": "1:08:43", "remaining_time": "8:01:01", "throughput": 2336.74, "total_tokens": 9634544} {"current_steps": 5005, "total_steps": 40000, "loss": 0.1071, "lr": 4.809398158439151e-05, "epoch": 0.8165429480381761, "percentage": 12.51, "elapsed_time": "1:08:46", "remaining_time": "8:00:53", "throughput": 2337.09, "total_tokens": 9644320} {"current_steps": 5010, "total_steps": 40000, "loss": 0.1366, "lr": 4.8090219968694005e-05, "epoch": 0.8173586752589934, "percentage": 12.53, "elapsed_time": "1:08:48", "remaining_time": "8:00:35", "throughput": 2338.42, "total_tokens": 9654656} {"current_steps": 5015, "total_steps": 40000, "loss": 0.1681, "lr": 4.808645479219442e-05, "epoch": 0.8181744024798108, "percentage": 12.54, "elapsed_time": "1:08:50", "remaining_time": "8:00:16", "throughput": 2339.34, "total_tokens": 9663280} {"current_steps": 5020, "total_steps": 40000, "loss": 0.1812, "lr": 4.8082686055473375e-05, "epoch": 0.8189901297006281, "percentage": 12.55, "elapsed_time": "1:08:52", "remaining_time": "7:59:58", "throughput": 2340.74, "total_tokens": 9673904} {"current_steps": 5025, "total_steps": 40000, "loss": 0.1099, "lr": 4.8078913759112066e-05, "epoch": 0.8198058569214455, "percentage": 12.56, "elapsed_time": "1:08:54", "remaining_time": "7:59:39", "throughput": 2341.87, "total_tokens": 9683392} {"current_steps": 5030, "total_steps": 40000, "loss": 0.1634, "lr": 4.807513790369223e-05, "epoch": 0.8206215841422628, "percentage": 12.57, "elapsed_time": "1:08:56", "remaining_time": "7:59:21", "throughput": 2343.03, "total_tokens": 9693008} {"current_steps": 5035, "total_steps": 40000, "loss": 0.2687, "lr": 4.8071358489796145e-05, "epoch": 0.8214373113630802, "percentage": 12.59, "elapsed_time": "1:08:59", "remaining_time": "7:59:02", "throughput": 2344.37, "total_tokens": 9703376} {"current_steps": 5040, "total_steps": 40000, "loss": 0.2111, "lr": 4.806757551800665e-05, "epoch": 0.8222530385838975, "percentage": 12.6, "elapsed_time": "1:09:01", "remaining_time": "7:58:44", "throughput": 2345.43, "total_tokens": 9712624} {"current_steps": 5045, "total_steps": 40000, "loss": 0.0469, "lr": 4.806378898890713e-05, "epoch": 0.8230687658047149, "percentage": 12.61, "elapsed_time": "1:09:03", "remaining_time": "7:58:26", "throughput": 2346.45, "total_tokens": 9721680} {"current_steps": 5050, "total_steps": 40000, "loss": 0.1547, "lr": 4.80599989030815e-05, "epoch": 0.8238844930255322, "percentage": 12.62, "elapsed_time": "1:09:05", "remaining_time": "7:58:08", "throughput": 2347.6, "total_tokens": 9731312} {"current_steps": 5055, "total_steps": 40000, "loss": 0.2831, "lr": 4.805620526111426e-05, "epoch": 0.8247002202463496, "percentage": 12.64, "elapsed_time": "1:09:07", "remaining_time": "7:57:49", "throughput": 2348.85, "total_tokens": 9741312} {"current_steps": 5060, "total_steps": 40000, "loss": 0.3001, "lr": 4.805240806359042e-05, "epoch": 0.8255159474671669, "percentage": 12.65, "elapsed_time": "1:09:09", "remaining_time": "7:57:31", "throughput": 2349.91, "total_tokens": 9750560} {"current_steps": 5065, "total_steps": 40000, "loss": 0.0903, "lr": 4.804860731109557e-05, "epoch": 0.8263316746879843, "percentage": 12.66, "elapsed_time": "1:09:11", "remaining_time": "7:57:13", "throughput": 2350.73, "total_tokens": 9758800} {"current_steps": 5070, "total_steps": 40000, "loss": 0.0894, "lr": 4.804480300421581e-05, "epoch": 0.8271474019088016, "percentage": 12.68, "elapsed_time": "1:09:13", "remaining_time": "7:56:55", "throughput": 2351.65, "total_tokens": 9767472} {"current_steps": 5075, "total_steps": 40000, "loss": 0.0994, "lr": 4.804099514353784e-05, "epoch": 0.827963129129619, "percentage": 12.69, "elapsed_time": "1:09:15", "remaining_time": "7:56:37", "throughput": 2352.78, "total_tokens": 9777056} {"current_steps": 5080, "total_steps": 40000, "loss": 0.0679, "lr": 4.8037183729648867e-05, "epoch": 0.8287788563504365, "percentage": 12.7, "elapsed_time": "1:09:17", "remaining_time": "7:56:19", "throughput": 2353.74, "total_tokens": 9785888} {"current_steps": 5085, "total_steps": 40000, "loss": 0.0839, "lr": 4.803336876313666e-05, "epoch": 0.8295945835712538, "percentage": 12.71, "elapsed_time": "1:09:19", "remaining_time": "7:56:01", "throughput": 2355.03, "total_tokens": 9796080} {"current_steps": 5090, "total_steps": 40000, "loss": 0.1754, "lr": 4.802955024458953e-05, "epoch": 0.8304103107920712, "percentage": 12.72, "elapsed_time": "1:09:21", "remaining_time": "7:55:43", "throughput": 2355.88, "total_tokens": 9804512} {"current_steps": 5095, "total_steps": 40000, "loss": 0.059, "lr": 4.802572817459634e-05, "epoch": 0.8312260380128885, "percentage": 12.74, "elapsed_time": "1:09:23", "remaining_time": "7:55:25", "throughput": 2356.99, "total_tokens": 9813984} {"current_steps": 5100, "total_steps": 40000, "loss": 0.16, "lr": 4.802190255374651e-05, "epoch": 0.8320417652337059, "percentage": 12.75, "elapsed_time": "1:09:25", "remaining_time": "7:55:07", "throughput": 2358.11, "total_tokens": 9823504} {"current_steps": 5105, "total_steps": 40000, "loss": 0.1818, "lr": 4.801807338263e-05, "epoch": 0.8328574924545232, "percentage": 12.76, "elapsed_time": "1:09:27", "remaining_time": "7:54:49", "throughput": 2359.29, "total_tokens": 9833296} {"current_steps": 5110, "total_steps": 40000, "loss": 0.0335, "lr": 4.8014240661837306e-05, "epoch": 0.8336732196753406, "percentage": 12.78, "elapsed_time": "1:09:29", "remaining_time": "7:54:31", "throughput": 2360.53, "total_tokens": 9843360} {"current_steps": 5115, "total_steps": 40000, "loss": 0.1962, "lr": 4.80104043919595e-05, "epoch": 0.8344889468961579, "percentage": 12.79, "elapsed_time": "1:09:32", "remaining_time": "7:54:13", "throughput": 2361.74, "total_tokens": 9853264} {"current_steps": 5120, "total_steps": 40000, "loss": 0.114, "lr": 4.800656457358815e-05, "epoch": 0.8353046741169753, "percentage": 12.8, "elapsed_time": "1:09:34", "remaining_time": "7:53:56", "throughput": 2362.65, "total_tokens": 9861952} {"current_steps": 5125, "total_steps": 40000, "loss": 0.067, "lr": 4.800272120731544e-05, "epoch": 0.8361204013377926, "percentage": 12.81, "elapsed_time": "1:09:36", "remaining_time": "7:53:38", "throughput": 2363.75, "total_tokens": 9871424} {"current_steps": 5130, "total_steps": 40000, "loss": 0.1886, "lr": 4.799887429373404e-05, "epoch": 0.83693612855861, "percentage": 12.83, "elapsed_time": "1:09:38", "remaining_time": "7:53:20", "throughput": 2364.96, "total_tokens": 9881344} {"current_steps": 5135, "total_steps": 40000, "loss": 0.094, "lr": 4.79950238334372e-05, "epoch": 0.8377518557794273, "percentage": 12.84, "elapsed_time": "1:09:40", "remaining_time": "7:53:02", "throughput": 2366.12, "total_tokens": 9891104} {"current_steps": 5140, "total_steps": 40000, "loss": 0.1696, "lr": 4.799116982701872e-05, "epoch": 0.8385675830002447, "percentage": 12.85, "elapsed_time": "1:09:42", "remaining_time": "7:52:45", "throughput": 2367.24, "total_tokens": 9900656} {"current_steps": 5145, "total_steps": 40000, "loss": 0.0525, "lr": 4.7987312275072926e-05, "epoch": 0.839383310221062, "percentage": 12.86, "elapsed_time": "1:09:44", "remaining_time": "7:52:27", "throughput": 2368.05, "total_tokens": 9908944} {"current_steps": 5150, "total_steps": 40000, "loss": 0.1015, "lr": 4.79834511781947e-05, "epoch": 0.8401990374418794, "percentage": 12.88, "elapsed_time": "1:09:46", "remaining_time": "7:52:10", "throughput": 2368.89, "total_tokens": 9917344} {"current_steps": 5155, "total_steps": 40000, "loss": 0.1004, "lr": 4.797958653697947e-05, "epoch": 0.8410147646626968, "percentage": 12.89, "elapsed_time": "1:09:48", "remaining_time": "7:51:52", "throughput": 2369.69, "total_tokens": 9925616} {"current_steps": 5160, "total_steps": 40000, "loss": 0.1359, "lr": 4.7975718352023225e-05, "epoch": 0.8418304918835141, "percentage": 12.9, "elapsed_time": "1:09:50", "remaining_time": "7:51:34", "throughput": 2371.07, "total_tokens": 9936288} {"current_steps": 5165, "total_steps": 40000, "loss": 0.2317, "lr": 4.7971846623922476e-05, "epoch": 0.8426462191043315, "percentage": 12.91, "elapsed_time": "1:09:52", "remaining_time": "7:51:17", "throughput": 2372.28, "total_tokens": 9946224} {"current_steps": 5170, "total_steps": 40000, "loss": 0.117, "lr": 4.7967971353274294e-05, "epoch": 0.8434619463251488, "percentage": 12.93, "elapsed_time": "1:09:54", "remaining_time": "7:50:59", "throughput": 2373.18, "total_tokens": 9954896} {"current_steps": 5175, "total_steps": 40000, "loss": 0.2031, "lr": 4.79640925406763e-05, "epoch": 0.8442776735459663, "percentage": 12.94, "elapsed_time": "1:09:56", "remaining_time": "7:50:42", "throughput": 2374.41, "total_tokens": 9964960} {"current_steps": 5180, "total_steps": 40000, "loss": 0.2108, "lr": 4.796021018672664e-05, "epoch": 0.8450934007667836, "percentage": 12.95, "elapsed_time": "1:09:58", "remaining_time": "7:50:24", "throughput": 2375.72, "total_tokens": 9975344} {"current_steps": 5185, "total_steps": 40000, "loss": 0.2057, "lr": 4.795632429202405e-05, "epoch": 0.845909127987601, "percentage": 12.96, "elapsed_time": "1:10:00", "remaining_time": "7:50:07", "throughput": 2376.76, "total_tokens": 9984624} {"current_steps": 5190, "total_steps": 40000, "loss": 0.2346, "lr": 4.795243485716775e-05, "epoch": 0.8467248552084183, "percentage": 12.97, "elapsed_time": "1:10:02", "remaining_time": "7:49:50", "throughput": 2378.07, "total_tokens": 9995008} {"current_steps": 5195, "total_steps": 40000, "loss": 0.2088, "lr": 4.794854188275757e-05, "epoch": 0.8475405824292357, "percentage": 12.99, "elapsed_time": "1:10:05", "remaining_time": "7:49:32", "throughput": 2379.33, "total_tokens": 10005200} {"current_steps": 5200, "total_steps": 40000, "loss": 0.1237, "lr": 4.794464536939384e-05, "epoch": 0.848356309650053, "percentage": 13.0, "elapsed_time": "1:10:07", "remaining_time": "7:49:15", "throughput": 2380.25, "total_tokens": 10013984} {"current_steps": 5200, "total_steps": 40000, "eval_loss": 0.13520273566246033, "epoch": 0.848356309650053, "percentage": 13.0, "elapsed_time": "1:11:27", "remaining_time": "7:58:14", "throughput": 2335.56, "total_tokens": 10013984} {"current_steps": 5205, "total_steps": 40000, "loss": 0.0808, "lr": 4.794074531767745e-05, "epoch": 0.8491720368708704, "percentage": 13.01, "elapsed_time": "1:11:31", "remaining_time": "7:58:07", "throughput": 2335.75, "total_tokens": 10023648} {"current_steps": 5210, "total_steps": 40000, "loss": 0.2055, "lr": 4.7936841728209834e-05, "epoch": 0.8499877640916877, "percentage": 13.03, "elapsed_time": "1:11:33", "remaining_time": "7:57:49", "throughput": 2336.55, "total_tokens": 10031968} {"current_steps": 5215, "total_steps": 40000, "loss": 0.1435, "lr": 4.7932934601593e-05, "epoch": 0.8508034913125051, "percentage": 13.04, "elapsed_time": "1:11:35", "remaining_time": "7:57:32", "throughput": 2337.54, "total_tokens": 10041040} {"current_steps": 5220, "total_steps": 40000, "loss": 0.2076, "lr": 4.792902393842943e-05, "epoch": 0.8516192185333225, "percentage": 13.05, "elapsed_time": "1:11:37", "remaining_time": "7:57:14", "throughput": 2338.89, "total_tokens": 10051664} {"current_steps": 5225, "total_steps": 40000, "loss": 0.2355, "lr": 4.792510973932225e-05, "epoch": 0.8524349457541398, "percentage": 13.06, "elapsed_time": "1:11:39", "remaining_time": "7:56:56", "throughput": 2339.81, "total_tokens": 10060432} {"current_steps": 5230, "total_steps": 40000, "loss": 0.1219, "lr": 4.7921192004875036e-05, "epoch": 0.8532506729749572, "percentage": 13.08, "elapsed_time": "1:11:41", "remaining_time": "7:56:38", "throughput": 2340.98, "total_tokens": 10070288} {"current_steps": 5235, "total_steps": 40000, "loss": 0.1552, "lr": 4.791727073569198e-05, "epoch": 0.8540664001957745, "percentage": 13.09, "elapsed_time": "1:11:43", "remaining_time": "7:56:21", "throughput": 2341.97, "total_tokens": 10079392} {"current_steps": 5240, "total_steps": 40000, "loss": 0.0523, "lr": 4.7913345932377775e-05, "epoch": 0.8548821274165919, "percentage": 13.1, "elapsed_time": "1:11:45", "remaining_time": "7:56:03", "throughput": 2342.92, "total_tokens": 10088320} {"current_steps": 5245, "total_steps": 40000, "loss": 0.0688, "lr": 4.790941759553769e-05, "epoch": 0.8556978546374092, "percentage": 13.11, "elapsed_time": "1:11:47", "remaining_time": "7:55:45", "throughput": 2343.91, "total_tokens": 10097456} {"current_steps": 5250, "total_steps": 40000, "loss": 0.1684, "lr": 4.79054857257775e-05, "epoch": 0.8565135818582266, "percentage": 13.12, "elapsed_time": "1:11:50", "remaining_time": "7:55:28", "throughput": 2345.25, "total_tokens": 10108048} {"current_steps": 5255, "total_steps": 40000, "loss": 0.104, "lr": 4.790155032370357e-05, "epoch": 0.8573293090790439, "percentage": 13.14, "elapsed_time": "1:11:52", "remaining_time": "7:55:10", "throughput": 2346.11, "total_tokens": 10116608} {"current_steps": 5260, "total_steps": 40000, "loss": 0.2, "lr": 4.789761138992278e-05, "epoch": 0.8581450362998613, "percentage": 13.15, "elapsed_time": "1:11:54", "remaining_time": "7:54:52", "throughput": 2347.15, "total_tokens": 10125936} {"current_steps": 5265, "total_steps": 40000, "loss": 0.0673, "lr": 4.7893668925042565e-05, "epoch": 0.8589607635206787, "percentage": 13.16, "elapsed_time": "1:11:56", "remaining_time": "7:54:35", "throughput": 2348.33, "total_tokens": 10135856} {"current_steps": 5270, "total_steps": 40000, "loss": 0.1028, "lr": 4.78897229296709e-05, "epoch": 0.8597764907414961, "percentage": 13.18, "elapsed_time": "1:11:58", "remaining_time": "7:54:17", "throughput": 2349.49, "total_tokens": 10145696} {"current_steps": 5275, "total_steps": 40000, "loss": 0.1991, "lr": 4.7885773404416315e-05, "epoch": 0.8605922179623134, "percentage": 13.19, "elapsed_time": "1:12:00", "remaining_time": "7:54:00", "throughput": 2350.26, "total_tokens": 10153936} {"current_steps": 5280, "total_steps": 40000, "loss": 0.1008, "lr": 4.788182034988786e-05, "epoch": 0.8614079451831308, "percentage": 13.2, "elapsed_time": "1:12:02", "remaining_time": "7:53:43", "throughput": 2351.51, "total_tokens": 10164192} {"current_steps": 5285, "total_steps": 40000, "loss": 0.1441, "lr": 4.787786376669516e-05, "epoch": 0.8622236724039481, "percentage": 13.21, "elapsed_time": "1:12:04", "remaining_time": "7:53:25", "throughput": 2352.65, "total_tokens": 10173968} {"current_steps": 5290, "total_steps": 40000, "loss": 0.1142, "lr": 4.787390365544837e-05, "epoch": 0.8630393996247655, "percentage": 13.23, "elapsed_time": "1:12:06", "remaining_time": "7:53:08", "throughput": 2353.88, "total_tokens": 10184160} {"current_steps": 5295, "total_steps": 40000, "loss": 0.1671, "lr": 4.786994001675818e-05, "epoch": 0.8638551268455829, "percentage": 13.24, "elapsed_time": "1:12:08", "remaining_time": "7:52:50", "throughput": 2355.11, "total_tokens": 10194352} {"current_steps": 5300, "total_steps": 40000, "loss": 0.142, "lr": 4.786597285123584e-05, "epoch": 0.8646708540664002, "percentage": 13.25, "elapsed_time": "1:12:10", "remaining_time": "7:52:33", "throughput": 2355.86, "total_tokens": 10202464} {"current_steps": 5305, "total_steps": 40000, "loss": 0.3672, "lr": 4.7862002159493135e-05, "epoch": 0.8654865812872176, "percentage": 13.26, "elapsed_time": "1:12:12", "remaining_time": "7:52:16", "throughput": 2356.87, "total_tokens": 10211696} {"current_steps": 5310, "total_steps": 40000, "loss": 0.1728, "lr": 4.785802794214239e-05, "epoch": 0.8663023085080349, "percentage": 13.28, "elapsed_time": "1:12:14", "remaining_time": "7:51:59", "throughput": 2357.95, "total_tokens": 10221264} {"current_steps": 5315, "total_steps": 40000, "loss": 0.2327, "lr": 4.7854050199796495e-05, "epoch": 0.8671180357288523, "percentage": 13.29, "elapsed_time": "1:12:16", "remaining_time": "7:51:41", "throughput": 2359.35, "total_tokens": 10232208} {"current_steps": 5320, "total_steps": 40000, "loss": 0.1353, "lr": 4.7850068933068845e-05, "epoch": 0.8679337629496696, "percentage": 13.3, "elapsed_time": "1:12:18", "remaining_time": "7:51:24", "throughput": 2360.55, "total_tokens": 10242272} {"current_steps": 5325, "total_steps": 40000, "loss": 0.0422, "lr": 4.7846084142573425e-05, "epoch": 0.868749490170487, "percentage": 13.31, "elapsed_time": "1:12:20", "remaining_time": "7:51:07", "throughput": 2361.88, "total_tokens": 10252912} {"current_steps": 5330, "total_steps": 40000, "loss": 0.1516, "lr": 4.7842095828924725e-05, "epoch": 0.8695652173913043, "percentage": 13.33, "elapsed_time": "1:12:23", "remaining_time": "7:50:50", "throughput": 2362.75, "total_tokens": 10261584} {"current_steps": 5335, "total_steps": 40000, "loss": 0.1492, "lr": 4.783810399273779e-05, "epoch": 0.8703809446121217, "percentage": 13.34, "elapsed_time": "1:12:25", "remaining_time": "7:50:33", "throughput": 2363.9, "total_tokens": 10271456} {"current_steps": 5340, "total_steps": 40000, "loss": 0.1705, "lr": 4.7834108634628226e-05, "epoch": 0.871196671832939, "percentage": 13.35, "elapsed_time": "1:12:27", "remaining_time": "7:50:16", "throughput": 2365.05, "total_tokens": 10281296} {"current_steps": 5345, "total_steps": 40000, "loss": 0.223, "lr": 4.783010975521216e-05, "epoch": 0.8720123990537564, "percentage": 13.36, "elapsed_time": "1:12:29", "remaining_time": "7:49:58", "throughput": 2366.25, "total_tokens": 10291408} {"current_steps": 5350, "total_steps": 40000, "loss": 0.1514, "lr": 4.782610735510626e-05, "epoch": 0.8728281262745737, "percentage": 13.38, "elapsed_time": "1:12:31", "remaining_time": "7:49:41", "throughput": 2367.38, "total_tokens": 10301232} {"current_steps": 5355, "total_steps": 40000, "loss": 0.2838, "lr": 4.782210143492776e-05, "epoch": 0.8736438534953911, "percentage": 13.39, "elapsed_time": "1:12:33", "remaining_time": "7:49:24", "throughput": 2368.46, "total_tokens": 10310864} {"current_steps": 5360, "total_steps": 40000, "loss": 0.136, "lr": 4.781809199529442e-05, "epoch": 0.8744595807162086, "percentage": 13.4, "elapsed_time": "1:12:35", "remaining_time": "7:49:07", "throughput": 2369.86, "total_tokens": 10321840} {"current_steps": 5365, "total_steps": 40000, "loss": 0.1353, "lr": 4.781407903682454e-05, "epoch": 0.8752753079370259, "percentage": 13.41, "elapsed_time": "1:12:37", "remaining_time": "7:48:51", "throughput": 2371.12, "total_tokens": 10332208} {"current_steps": 5370, "total_steps": 40000, "loss": 0.0971, "lr": 4.781006256013698e-05, "epoch": 0.8760910351578433, "percentage": 13.43, "elapsed_time": "1:12:39", "remaining_time": "7:48:34", "throughput": 2372.43, "total_tokens": 10342848} {"current_steps": 5375, "total_steps": 40000, "loss": 0.0922, "lr": 4.7806042565851115e-05, "epoch": 0.8769067623786606, "percentage": 13.44, "elapsed_time": "1:12:41", "remaining_time": "7:48:17", "throughput": 2373.48, "total_tokens": 10352304} {"current_steps": 5380, "total_steps": 40000, "loss": 0.211, "lr": 4.7802019054586895e-05, "epoch": 0.877722489599478, "percentage": 13.45, "elapsed_time": "1:12:43", "remaining_time": "7:48:00", "throughput": 2374.48, "total_tokens": 10361536} {"current_steps": 5385, "total_steps": 40000, "loss": 0.1129, "lr": 4.779799202696479e-05, "epoch": 0.8785382168202953, "percentage": 13.46, "elapsed_time": "1:12:45", "remaining_time": "7:47:43", "throughput": 2375.36, "total_tokens": 10370320} {"current_steps": 5390, "total_steps": 40000, "loss": 0.1539, "lr": 4.779396148360581e-05, "epoch": 0.8793539440411127, "percentage": 13.48, "elapsed_time": "1:12:47", "remaining_time": "7:47:26", "throughput": 2376.53, "total_tokens": 10380288} {"current_steps": 5395, "total_steps": 40000, "loss": 0.0676, "lr": 4.7789927425131517e-05, "epoch": 0.88016967126193, "percentage": 13.49, "elapsed_time": "1:12:49", "remaining_time": "7:47:09", "throughput": 2377.41, "total_tokens": 10389024} {"current_steps": 5400, "total_steps": 40000, "loss": 0.1465, "lr": 4.778588985216403e-05, "epoch": 0.8809853984827474, "percentage": 13.5, "elapsed_time": "1:12:51", "remaining_time": "7:46:52", "throughput": 2378.29, "total_tokens": 10397792} {"current_steps": 5400, "total_steps": 40000, "eval_loss": 0.15891703963279724, "epoch": 0.8809853984827474, "percentage": 13.5, "elapsed_time": "1:14:12", "remaining_time": "7:55:28", "throughput": 2335.31, "total_tokens": 10397792} {"current_steps": 5405, "total_steps": 40000, "loss": 0.1919, "lr": 4.778184876532598e-05, "epoch": 0.8818011257035647, "percentage": 13.51, "elapsed_time": "1:14:16", "remaining_time": "7:55:22", "throughput": 2335.6, "total_tokens": 10407952} {"current_steps": 5410, "total_steps": 40000, "loss": 0.1037, "lr": 4.7777804165240556e-05, "epoch": 0.8826168529243821, "percentage": 13.53, "elapsed_time": "1:14:18", "remaining_time": "7:55:04", "throughput": 2336.35, "total_tokens": 10416096} {"current_steps": 5415, "total_steps": 40000, "loss": 0.0628, "lr": 4.7773756052531485e-05, "epoch": 0.8834325801451994, "percentage": 13.54, "elapsed_time": "1:14:20", "remaining_time": "7:54:47", "throughput": 2337.53, "total_tokens": 10426176} {"current_steps": 5420, "total_steps": 40000, "loss": 0.0948, "lr": 4.7769704427823035e-05, "epoch": 0.8842483073660168, "percentage": 13.55, "elapsed_time": "1:14:22", "remaining_time": "7:54:30", "throughput": 2338.48, "total_tokens": 10435264} {"current_steps": 5425, "total_steps": 40000, "loss": 0.132, "lr": 4.776564929174003e-05, "epoch": 0.8850640345868341, "percentage": 13.56, "elapsed_time": "1:14:24", "remaining_time": "7:54:13", "throughput": 2339.46, "total_tokens": 10444448} {"current_steps": 5430, "total_steps": 40000, "loss": 0.1048, "lr": 4.7761590644907806e-05, "epoch": 0.8858797618076515, "percentage": 13.58, "elapsed_time": "1:14:26", "remaining_time": "7:53:56", "throughput": 2340.77, "total_tokens": 10455152} {"current_steps": 5435, "total_steps": 40000, "loss": 0.274, "lr": 4.7757528487952263e-05, "epoch": 0.8866954890284688, "percentage": 13.59, "elapsed_time": "1:14:28", "remaining_time": "7:53:39", "throughput": 2341.86, "total_tokens": 10464848} {"current_steps": 5440, "total_steps": 40000, "loss": 0.213, "lr": 4.7753462821499836e-05, "epoch": 0.8875112162492862, "percentage": 13.6, "elapsed_time": "1:14:30", "remaining_time": "7:53:22", "throughput": 2342.97, "total_tokens": 10474704} {"current_steps": 5445, "total_steps": 40000, "loss": 0.1102, "lr": 4.774939364617751e-05, "epoch": 0.8883269434701035, "percentage": 13.61, "elapsed_time": "1:14:32", "remaining_time": "7:53:04", "throughput": 2344.06, "total_tokens": 10484400} {"current_steps": 5450, "total_steps": 40000, "loss": 0.1642, "lr": 4.7745320962612795e-05, "epoch": 0.889142670690921, "percentage": 13.63, "elapsed_time": "1:14:34", "remaining_time": "7:52:47", "throughput": 2345.23, "total_tokens": 10494480} {"current_steps": 5455, "total_steps": 40000, "loss": 0.2939, "lr": 4.7741244771433756e-05, "epoch": 0.8899583979117384, "percentage": 13.64, "elapsed_time": "1:14:36", "remaining_time": "7:52:30", "throughput": 2346.13, "total_tokens": 10503328} {"current_steps": 5460, "total_steps": 40000, "loss": 0.1316, "lr": 4.7737165073268985e-05, "epoch": 0.8907741251325557, "percentage": 13.65, "elapsed_time": "1:14:38", "remaining_time": "7:52:13", "throughput": 2347.21, "total_tokens": 10513008} {"current_steps": 5465, "total_steps": 40000, "loss": 0.1072, "lr": 4.7733081868747626e-05, "epoch": 0.8915898523533731, "percentage": 13.66, "elapsed_time": "1:14:41", "remaining_time": "7:51:56", "throughput": 2347.99, "total_tokens": 10521344} {"current_steps": 5470, "total_steps": 40000, "loss": 0.0732, "lr": 4.772899515849936e-05, "epoch": 0.8924055795741904, "percentage": 13.68, "elapsed_time": "1:14:43", "remaining_time": "7:51:39", "throughput": 2349.0, "total_tokens": 10530736} {"current_steps": 5475, "total_steps": 40000, "loss": 0.1055, "lr": 4.7724904943154414e-05, "epoch": 0.8932213067950078, "percentage": 13.69, "elapsed_time": "1:14:45", "remaining_time": "7:51:23", "throughput": 2350.2, "total_tokens": 10540992} {"current_steps": 5480, "total_steps": 40000, "loss": 0.1469, "lr": 4.772081122334354e-05, "epoch": 0.8940370340158251, "percentage": 13.7, "elapsed_time": "1:14:47", "remaining_time": "7:51:06", "throughput": 2351.38, "total_tokens": 10551120} {"current_steps": 5485, "total_steps": 40000, "loss": 0.1268, "lr": 4.771671399969806e-05, "epoch": 0.8948527612366425, "percentage": 13.71, "elapsed_time": "1:14:49", "remaining_time": "7:50:49", "throughput": 2352.44, "total_tokens": 10560752} {"current_steps": 5490, "total_steps": 40000, "loss": 0.2256, "lr": 4.7712613272849794e-05, "epoch": 0.8956684884574598, "percentage": 13.73, "elapsed_time": "1:14:51", "remaining_time": "7:50:32", "throughput": 2353.46, "total_tokens": 10570176} {"current_steps": 5495, "total_steps": 40000, "loss": 0.1186, "lr": 4.770850904343114e-05, "epoch": 0.8964842156782772, "percentage": 13.74, "elapsed_time": "1:14:53", "remaining_time": "7:50:15", "throughput": 2354.78, "total_tokens": 10580992} {"current_steps": 5500, "total_steps": 40000, "loss": 0.0435, "lr": 4.770440131207502e-05, "epoch": 0.8972999428990945, "percentage": 13.75, "elapsed_time": "1:14:55", "remaining_time": "7:49:58", "throughput": 2356.0, "total_tokens": 10591296} {"current_steps": 5505, "total_steps": 40000, "loss": 0.3146, "lr": 4.7700290079414896e-05, "epoch": 0.8981156701199119, "percentage": 13.76, "elapsed_time": "1:14:57", "remaining_time": "7:49:42", "throughput": 2357.06, "total_tokens": 10600960} {"current_steps": 5510, "total_steps": 40000, "loss": 0.1306, "lr": 4.769617534608477e-05, "epoch": 0.8989313973407292, "percentage": 13.78, "elapsed_time": "1:14:59", "remaining_time": "7:49:25", "throughput": 2358.15, "total_tokens": 10610704} {"current_steps": 5515, "total_steps": 40000, "loss": 0.1226, "lr": 4.7692057112719193e-05, "epoch": 0.8997471245615466, "percentage": 13.79, "elapsed_time": "1:15:01", "remaining_time": "7:49:08", "throughput": 2359.18, "total_tokens": 10620224} {"current_steps": 5520, "total_steps": 40000, "loss": 0.1824, "lr": 4.7687935379953234e-05, "epoch": 0.900562851782364, "percentage": 13.8, "elapsed_time": "1:15:03", "remaining_time": "7:48:51", "throughput": 2360.1, "total_tokens": 10629248} {"current_steps": 5525, "total_steps": 40000, "loss": 0.1551, "lr": 4.7683810148422534e-05, "epoch": 0.9013785790031813, "percentage": 13.81, "elapsed_time": "1:15:05", "remaining_time": "7:48:35", "throughput": 2361.05, "total_tokens": 10638400} {"current_steps": 5530, "total_steps": 40000, "loss": 0.2012, "lr": 4.767968141876324e-05, "epoch": 0.9021943062239987, "percentage": 13.83, "elapsed_time": "1:15:07", "remaining_time": "7:48:18", "throughput": 2362.15, "total_tokens": 10648176} {"current_steps": 5535, "total_steps": 40000, "loss": 0.1248, "lr": 4.767554919161207e-05, "epoch": 0.903010033444816, "percentage": 13.84, "elapsed_time": "1:15:09", "remaining_time": "7:48:02", "throughput": 2362.99, "total_tokens": 10656848} {"current_steps": 5540, "total_steps": 40000, "loss": 0.0933, "lr": 4.767141346760624e-05, "epoch": 0.9038257606656334, "percentage": 13.85, "elapsed_time": "1:15:11", "remaining_time": "7:47:45", "throughput": 2364.14, "total_tokens": 10666944} {"current_steps": 5545, "total_steps": 40000, "loss": 0.1366, "lr": 4.766727424738356e-05, "epoch": 0.9046414878864508, "percentage": 13.86, "elapsed_time": "1:15:14", "remaining_time": "7:47:28", "throughput": 2365.23, "total_tokens": 10676720} {"current_steps": 5550, "total_steps": 40000, "loss": 0.2569, "lr": 4.7663131531582325e-05, "epoch": 0.9054572151072682, "percentage": 13.88, "elapsed_time": "1:15:16", "remaining_time": "7:47:12", "throughput": 2366.39, "total_tokens": 10686896} {"current_steps": 5555, "total_steps": 40000, "loss": 0.0968, "lr": 4.765898532084142e-05, "epoch": 0.9062729423280855, "percentage": 13.89, "elapsed_time": "1:15:18", "remaining_time": "7:46:56", "throughput": 2367.44, "total_tokens": 10696560} {"current_steps": 5560, "total_steps": 40000, "loss": 0.2482, "lr": 4.765483561580022e-05, "epoch": 0.9070886695489029, "percentage": 13.9, "elapsed_time": "1:15:20", "remaining_time": "7:46:39", "throughput": 2368.74, "total_tokens": 10707296} {"current_steps": 5565, "total_steps": 40000, "loss": 0.0847, "lr": 4.7650682417098666e-05, "epoch": 0.9079043967697202, "percentage": 13.91, "elapsed_time": "1:15:22", "remaining_time": "7:46:23", "throughput": 2369.51, "total_tokens": 10715664} {"current_steps": 5570, "total_steps": 40000, "loss": 0.139, "lr": 4.7646525725377244e-05, "epoch": 0.9087201239905376, "percentage": 13.93, "elapsed_time": "1:15:24", "remaining_time": "7:46:06", "throughput": 2370.64, "total_tokens": 10725648} {"current_steps": 5575, "total_steps": 40000, "loss": 0.1292, "lr": 4.764236554127696e-05, "epoch": 0.9095358512113549, "percentage": 13.94, "elapsed_time": "1:15:26", "remaining_time": "7:45:50", "throughput": 2371.89, "total_tokens": 10736192} {"current_steps": 5580, "total_steps": 40000, "loss": 0.1649, "lr": 4.7638201865439356e-05, "epoch": 0.9103515784321723, "percentage": 13.95, "elapsed_time": "1:15:28", "remaining_time": "7:45:33", "throughput": 2373.02, "total_tokens": 10746176} {"current_steps": 5585, "total_steps": 40000, "loss": 0.2183, "lr": 4.7634034698506545e-05, "epoch": 0.9111673056529896, "percentage": 13.96, "elapsed_time": "1:15:30", "remaining_time": "7:45:17", "throughput": 2373.92, "total_tokens": 10755200} {"current_steps": 5590, "total_steps": 40000, "loss": 0.0778, "lr": 4.762986404112115e-05, "epoch": 0.911983032873807, "percentage": 13.98, "elapsed_time": "1:15:32", "remaining_time": "7:45:01", "throughput": 2374.96, "total_tokens": 10764816} {"current_steps": 5595, "total_steps": 40000, "loss": 0.2251, "lr": 4.762568989392633e-05, "epoch": 0.9127987600946244, "percentage": 13.99, "elapsed_time": "1:15:34", "remaining_time": "7:44:44", "throughput": 2375.97, "total_tokens": 10774272} {"current_steps": 5600, "total_steps": 40000, "loss": 0.1414, "lr": 4.76215122575658e-05, "epoch": 0.9136144873154417, "percentage": 14.0, "elapsed_time": "1:15:36", "remaining_time": "7:44:28", "throughput": 2377.14, "total_tokens": 10784512} {"current_steps": 5600, "total_steps": 40000, "eval_loss": 0.1298934519290924, "epoch": 0.9136144873154417, "percentage": 14.0, "elapsed_time": "1:16:57", "remaining_time": "7:52:43", "throughput": 2335.67, "total_tokens": 10784512} {"current_steps": 5605, "total_steps": 40000, "loss": 0.1728, "lr": 4.7617331132683795e-05, "epoch": 0.9144302145362591, "percentage": 14.01, "elapsed_time": "1:17:01", "remaining_time": "7:52:37", "throughput": 2335.86, "total_tokens": 10794256} {"current_steps": 5610, "total_steps": 40000, "loss": 0.1124, "lr": 4.7613146519925105e-05, "epoch": 0.9152459417570764, "percentage": 14.03, "elapsed_time": "1:17:03", "remaining_time": "7:52:20", "throughput": 2336.95, "total_tokens": 10804112} {"current_steps": 5615, "total_steps": 40000, "loss": 0.1142, "lr": 4.7608958419935045e-05, "epoch": 0.9160616689778938, "percentage": 14.04, "elapsed_time": "1:17:05", "remaining_time": "7:52:03", "throughput": 2337.77, "total_tokens": 10812752} {"current_steps": 5620, "total_steps": 40000, "loss": 0.0951, "lr": 4.760476683335948e-05, "epoch": 0.9168773961987111, "percentage": 14.05, "elapsed_time": "1:17:07", "remaining_time": "7:51:47", "throughput": 2338.63, "total_tokens": 10821552} {"current_steps": 5625, "total_steps": 40000, "loss": 0.0831, "lr": 4.760057176084479e-05, "epoch": 0.9176931234195285, "percentage": 14.06, "elapsed_time": "1:17:09", "remaining_time": "7:51:30", "throughput": 2339.57, "total_tokens": 10830704} {"current_steps": 5630, "total_steps": 40000, "loss": 0.143, "lr": 4.759637320303793e-05, "epoch": 0.9185088506403458, "percentage": 14.07, "elapsed_time": "1:17:11", "remaining_time": "7:51:13", "throughput": 2340.71, "total_tokens": 10840816} {"current_steps": 5635, "total_steps": 40000, "loss": 0.1064, "lr": 4.759217116058635e-05, "epoch": 0.9193245778611632, "percentage": 14.09, "elapsed_time": "1:17:13", "remaining_time": "7:50:57", "throughput": 2341.53, "total_tokens": 10849472} {"current_steps": 5640, "total_steps": 40000, "loss": 0.1775, "lr": 4.758796563413807e-05, "epoch": 0.9201403050819806, "percentage": 14.1, "elapsed_time": "1:17:15", "remaining_time": "7:50:40", "throughput": 2342.6, "total_tokens": 10859296} {"current_steps": 5645, "total_steps": 40000, "loss": 0.1593, "lr": 4.758375662434163e-05, "epoch": 0.920956032302798, "percentage": 14.11, "elapsed_time": "1:17:17", "remaining_time": "7:50:24", "throughput": 2343.54, "total_tokens": 10868448} {"current_steps": 5650, "total_steps": 40000, "loss": 0.064, "lr": 4.7579544131846114e-05, "epoch": 0.9217717595236153, "percentage": 14.12, "elapsed_time": "1:17:19", "remaining_time": "7:50:07", "throughput": 2344.4, "total_tokens": 10877312} {"current_steps": 5655, "total_steps": 40000, "loss": 0.1064, "lr": 4.757532815730114e-05, "epoch": 0.9225874867444327, "percentage": 14.14, "elapsed_time": "1:17:21", "remaining_time": "7:49:51", "throughput": 2345.43, "total_tokens": 10886912} {"current_steps": 5660, "total_steps": 40000, "loss": 0.1643, "lr": 4.7571108701356865e-05, "epoch": 0.92340321396525, "percentage": 14.15, "elapsed_time": "1:17:23", "remaining_time": "7:49:34", "throughput": 2346.35, "total_tokens": 10896032} {"current_steps": 5665, "total_steps": 40000, "loss": 0.1705, "lr": 4.756688576466398e-05, "epoch": 0.9242189411860674, "percentage": 14.16, "elapsed_time": "1:17:25", "remaining_time": "7:49:18", "throughput": 2347.54, "total_tokens": 10906400} {"current_steps": 5670, "total_steps": 40000, "loss": 0.2473, "lr": 4.756265934787372e-05, "epoch": 0.9250346684068848, "percentage": 14.17, "elapsed_time": "1:17:27", "remaining_time": "7:49:01", "throughput": 2348.47, "total_tokens": 10915552} {"current_steps": 5675, "total_steps": 40000, "loss": 0.1277, "lr": 4.755842945163785e-05, "epoch": 0.9258503956277021, "percentage": 14.19, "elapsed_time": "1:17:30", "remaining_time": "7:48:45", "throughput": 2349.39, "total_tokens": 10924720} {"current_steps": 5680, "total_steps": 40000, "loss": 0.0964, "lr": 4.755419607660867e-05, "epoch": 0.9266661228485195, "percentage": 14.2, "elapsed_time": "1:17:32", "remaining_time": "7:48:29", "throughput": 2350.51, "total_tokens": 10934768} {"current_steps": 5685, "total_steps": 40000, "loss": 0.1746, "lr": 4.7549959223439016e-05, "epoch": 0.9274818500693368, "percentage": 14.21, "elapsed_time": "1:17:34", "remaining_time": "7:48:12", "throughput": 2351.52, "total_tokens": 10944336} {"current_steps": 5690, "total_steps": 40000, "loss": 0.3014, "lr": 4.754571889278228e-05, "epoch": 0.9282975772901542, "percentage": 14.22, "elapsed_time": "1:17:36", "remaining_time": "7:47:56", "throughput": 2352.64, "total_tokens": 10954480} {"current_steps": 5695, "total_steps": 40000, "loss": 0.1432, "lr": 4.754147508529235e-05, "epoch": 0.9291133045109715, "percentage": 14.24, "elapsed_time": "1:17:38", "remaining_time": "7:47:40", "throughput": 2353.74, "total_tokens": 10964432} {"current_steps": 5700, "total_steps": 40000, "loss": 0.2148, "lr": 4.75372278016237e-05, "epoch": 0.9299290317317889, "percentage": 14.25, "elapsed_time": "1:17:40", "remaining_time": "7:47:23", "throughput": 2354.69, "total_tokens": 10973728} {"current_steps": 5705, "total_steps": 40000, "loss": 0.1726, "lr": 4.753297704243129e-05, "epoch": 0.9307447589526062, "percentage": 14.26, "elapsed_time": "1:17:42", "remaining_time": "7:47:07", "throughput": 2355.72, "total_tokens": 10983376} {"current_steps": 5710, "total_steps": 40000, "loss": 0.1102, "lr": 4.752872280837066e-05, "epoch": 0.9315604861734236, "percentage": 14.27, "elapsed_time": "1:17:44", "remaining_time": "7:46:51", "throughput": 2356.84, "total_tokens": 10993472} {"current_steps": 5715, "total_steps": 40000, "loss": 0.1326, "lr": 4.752446510009786e-05, "epoch": 0.9323762133942409, "percentage": 14.29, "elapsed_time": "1:17:46", "remaining_time": "7:46:35", "throughput": 2358.1, "total_tokens": 11004240} {"current_steps": 5720, "total_steps": 40000, "loss": 0.1637, "lr": 4.7520203918269476e-05, "epoch": 0.9331919406150583, "percentage": 14.3, "elapsed_time": "1:17:48", "remaining_time": "7:46:19", "throughput": 2358.97, "total_tokens": 11013168} {"current_steps": 5725, "total_steps": 40000, "loss": 0.2623, "lr": 4.751593926354265e-05, "epoch": 0.9340076678358756, "percentage": 14.31, "elapsed_time": "1:17:50", "remaining_time": "7:46:03", "throughput": 2359.99, "total_tokens": 11022832} {"current_steps": 5730, "total_steps": 40000, "loss": 0.1549, "lr": 4.751167113657503e-05, "epoch": 0.9348233950566931, "percentage": 14.32, "elapsed_time": "1:17:52", "remaining_time": "7:45:46", "throughput": 2360.78, "total_tokens": 11031376} {"current_steps": 5735, "total_steps": 40000, "loss": 0.121, "lr": 4.7507399538024834e-05, "epoch": 0.9356391222775104, "percentage": 14.34, "elapsed_time": "1:17:54", "remaining_time": "7:45:30", "throughput": 2361.83, "total_tokens": 11041152} {"current_steps": 5740, "total_steps": 40000, "loss": 0.1488, "lr": 4.750312446855077e-05, "epoch": 0.9364548494983278, "percentage": 14.35, "elapsed_time": "1:17:56", "remaining_time": "7:45:14", "throughput": 2362.83, "total_tokens": 11050688} {"current_steps": 5745, "total_steps": 40000, "loss": 0.2582, "lr": 4.749884592881212e-05, "epoch": 0.9372705767191452, "percentage": 14.36, "elapsed_time": "1:17:58", "remaining_time": "7:44:58", "throughput": 2363.86, "total_tokens": 11060400} {"current_steps": 5750, "total_steps": 40000, "loss": 0.1524, "lr": 4.74945639194687e-05, "epoch": 0.9380863039399625, "percentage": 14.37, "elapsed_time": "1:18:01", "remaining_time": "7:44:42", "throughput": 2364.97, "total_tokens": 11070480} {"current_steps": 5755, "total_steps": 40000, "loss": 0.1472, "lr": 4.749027844118083e-05, "epoch": 0.9389020311607799, "percentage": 14.39, "elapsed_time": "1:18:03", "remaining_time": "7:44:26", "throughput": 2365.92, "total_tokens": 11079824} {"current_steps": 5760, "total_steps": 40000, "loss": 0.0511, "lr": 4.7485989494609395e-05, "epoch": 0.9397177583815972, "percentage": 14.4, "elapsed_time": "1:18:05", "remaining_time": "7:44:10", "throughput": 2367.21, "total_tokens": 11090752} {"current_steps": 5765, "total_steps": 40000, "loss": 0.1013, "lr": 4.748169708041581e-05, "epoch": 0.9405334856024146, "percentage": 14.41, "elapsed_time": "1:18:07", "remaining_time": "7:43:54", "throughput": 2368.27, "total_tokens": 11100608} {"current_steps": 5770, "total_steps": 40000, "loss": 0.2075, "lr": 4.7477401199262004e-05, "epoch": 0.9413492128232319, "percentage": 14.42, "elapsed_time": "1:18:09", "remaining_time": "7:43:38", "throughput": 2369.33, "total_tokens": 11110416} {"current_steps": 5775, "total_steps": 40000, "loss": 0.1825, "lr": 4.747310185181048e-05, "epoch": 0.9421649400440493, "percentage": 14.44, "elapsed_time": "1:18:11", "remaining_time": "7:43:22", "throughput": 2370.38, "total_tokens": 11120256} {"current_steps": 5780, "total_steps": 40000, "loss": 0.1363, "lr": 4.746879903872422e-05, "epoch": 0.9429806672648666, "percentage": 14.45, "elapsed_time": "1:18:13", "remaining_time": "7:43:06", "throughput": 2371.13, "total_tokens": 11128672} {"current_steps": 5785, "total_steps": 40000, "loss": 0.1547, "lr": 4.746449276066679e-05, "epoch": 0.943796394485684, "percentage": 14.46, "elapsed_time": "1:18:15", "remaining_time": "7:42:51", "throughput": 2371.91, "total_tokens": 11137248} {"current_steps": 5790, "total_steps": 40000, "loss": 0.1011, "lr": 4.746018301830227e-05, "epoch": 0.9446121217065013, "percentage": 14.47, "elapsed_time": "1:18:17", "remaining_time": "7:42:35", "throughput": 2372.87, "total_tokens": 11146640} {"current_steps": 5795, "total_steps": 40000, "loss": 0.259, "lr": 4.7455869812295275e-05, "epoch": 0.9454278489273187, "percentage": 14.49, "elapsed_time": "1:18:19", "remaining_time": "7:42:19", "throughput": 2373.75, "total_tokens": 11155664} {"current_steps": 5800, "total_steps": 40000, "loss": 0.1428, "lr": 4.7451553143310964e-05, "epoch": 0.946243576148136, "percentage": 14.5, "elapsed_time": "1:18:21", "remaining_time": "7:42:03", "throughput": 2374.73, "total_tokens": 11165168} {"current_steps": 5800, "total_steps": 40000, "eval_loss": 0.12852467596530914, "epoch": 0.946243576148136, "percentage": 14.5, "elapsed_time": "1:19:42", "remaining_time": "7:49:59", "throughput": 2334.69, "total_tokens": 11165168} {"current_steps": 5805, "total_steps": 40000, "loss": 0.1137, "lr": 4.744723301201501e-05, "epoch": 0.9470593033689534, "percentage": 14.51, "elapsed_time": "1:19:45", "remaining_time": "7:49:51", "throughput": 2335.03, "total_tokens": 11175088} {"current_steps": 5810, "total_steps": 40000, "loss": 0.0769, "lr": 4.744290941907364e-05, "epoch": 0.9478750305897707, "percentage": 14.52, "elapsed_time": "1:19:47", "remaining_time": "7:49:35", "throughput": 2336.03, "total_tokens": 11184752} {"current_steps": 5815, "total_steps": 40000, "loss": 0.2378, "lr": 4.7438582365153594e-05, "epoch": 0.9486907578105881, "percentage": 14.54, "elapsed_time": "1:19:49", "remaining_time": "7:49:19", "throughput": 2337.08, "total_tokens": 11194624} {"current_steps": 5820, "total_steps": 40000, "loss": 0.2069, "lr": 4.743425185092217e-05, "epoch": 0.9495064850314054, "percentage": 14.55, "elapsed_time": "1:19:52", "remaining_time": "7:49:03", "throughput": 2338.24, "total_tokens": 11204992} {"current_steps": 5825, "total_steps": 40000, "loss": 0.081, "lr": 4.742991787704719e-05, "epoch": 0.9503222122522229, "percentage": 14.56, "elapsed_time": "1:19:54", "remaining_time": "7:48:46", "throughput": 2339.47, "total_tokens": 11215728} {"current_steps": 5830, "total_steps": 40000, "loss": 0.0414, "lr": 4.7425580444196994e-05, "epoch": 0.9511379394730403, "percentage": 14.57, "elapsed_time": "1:19:56", "remaining_time": "7:48:30", "throughput": 2340.62, "total_tokens": 11226064} {"current_steps": 5835, "total_steps": 40000, "loss": 0.1472, "lr": 4.742123955304048e-05, "epoch": 0.9519536666938576, "percentage": 14.59, "elapsed_time": "1:19:58", "remaining_time": "7:48:14", "throughput": 2341.58, "total_tokens": 11235504} {"current_steps": 5840, "total_steps": 40000, "loss": 0.0781, "lr": 4.741689520424706e-05, "epoch": 0.952769393914675, "percentage": 14.6, "elapsed_time": "1:20:00", "remaining_time": "7:47:58", "throughput": 2342.47, "total_tokens": 11244624} {"current_steps": 5845, "total_steps": 40000, "loss": 0.0142, "lr": 4.741254739848669e-05, "epoch": 0.9535851211354923, "percentage": 14.61, "elapsed_time": "1:20:02", "remaining_time": "7:47:42", "throughput": 2343.5, "total_tokens": 11254416} {"current_steps": 5850, "total_steps": 40000, "loss": 0.1869, "lr": 4.740819613642987e-05, "epoch": 0.9544008483563097, "percentage": 14.62, "elapsed_time": "1:20:04", "remaining_time": "7:47:26", "throughput": 2344.37, "total_tokens": 11263408} {"current_steps": 5855, "total_steps": 40000, "loss": 0.0345, "lr": 4.74038414187476e-05, "epoch": 0.955216575577127, "percentage": 14.64, "elapsed_time": "1:20:06", "remaining_time": "7:47:10", "throughput": 2345.26, "total_tokens": 11272560} {"current_steps": 5860, "total_steps": 40000, "loss": 0.181, "lr": 4.739948324611144e-05, "epoch": 0.9560323027979444, "percentage": 14.65, "elapsed_time": "1:20:08", "remaining_time": "7:46:54", "throughput": 2346.37, "total_tokens": 11282704} {"current_steps": 5865, "total_steps": 40000, "loss": 0.2121, "lr": 4.7395121619193465e-05, "epoch": 0.9568480300187617, "percentage": 14.66, "elapsed_time": "1:20:10", "remaining_time": "7:46:38", "throughput": 2347.23, "total_tokens": 11291712} {"current_steps": 5870, "total_steps": 40000, "loss": 0.1015, "lr": 4.7390756538666313e-05, "epoch": 0.9576637572395791, "percentage": 14.67, "elapsed_time": "1:20:12", "remaining_time": "7:46:22", "throughput": 2348.24, "total_tokens": 11301424} {"current_steps": 5875, "total_steps": 40000, "loss": 0.1538, "lr": 4.738638800520311e-05, "epoch": 0.9584794844603964, "percentage": 14.69, "elapsed_time": "1:20:14", "remaining_time": "7:46:06", "throughput": 2349.09, "total_tokens": 11310368} {"current_steps": 5880, "total_steps": 40000, "loss": 0.1241, "lr": 4.738201601947757e-05, "epoch": 0.9592952116812138, "percentage": 14.7, "elapsed_time": "1:20:16", "remaining_time": "7:45:50", "throughput": 2350.09, "total_tokens": 11320064} {"current_steps": 5885, "total_steps": 40000, "loss": 0.1884, "lr": 4.7377640582163876e-05, "epoch": 0.9601109389020311, "percentage": 14.71, "elapsed_time": "1:20:18", "remaining_time": "7:45:34", "throughput": 2351.13, "total_tokens": 11329904} {"current_steps": 5890, "total_steps": 40000, "loss": 0.1483, "lr": 4.7373261693936786e-05, "epoch": 0.9609266661228485, "percentage": 14.72, "elapsed_time": "1:20:20", "remaining_time": "7:45:19", "throughput": 2351.98, "total_tokens": 11338848} {"current_steps": 5895, "total_steps": 40000, "loss": 0.0807, "lr": 4.7368879355471595e-05, "epoch": 0.9617423933436658, "percentage": 14.74, "elapsed_time": "1:20:23", "remaining_time": "7:45:03", "throughput": 2352.89, "total_tokens": 11348112} {"current_steps": 5900, "total_steps": 40000, "loss": 0.1847, "lr": 4.736449356744409e-05, "epoch": 0.9625581205644832, "percentage": 14.75, "elapsed_time": "1:20:25", "remaining_time": "7:44:47", "throughput": 2353.69, "total_tokens": 11356880} {"current_steps": 5905, "total_steps": 40000, "loss": 0.1268, "lr": 4.736010433053064e-05, "epoch": 0.9633738477853006, "percentage": 14.76, "elapsed_time": "1:20:27", "remaining_time": "7:44:31", "throughput": 2354.68, "total_tokens": 11366512} {"current_steps": 5910, "total_steps": 40000, "loss": 0.1507, "lr": 4.73557116454081e-05, "epoch": 0.9641895750061179, "percentage": 14.77, "elapsed_time": "1:20:29", "remaining_time": "7:44:16", "throughput": 2355.61, "total_tokens": 11375856} {"current_steps": 5915, "total_steps": 40000, "loss": 0.1391, "lr": 4.735131551275389e-05, "epoch": 0.9650053022269353, "percentage": 14.79, "elapsed_time": "1:20:31", "remaining_time": "7:44:00", "throughput": 2356.74, "total_tokens": 11386208} {"current_steps": 5920, "total_steps": 40000, "loss": 0.0699, "lr": 4.734691593324594e-05, "epoch": 0.9658210294477527, "percentage": 14.8, "elapsed_time": "1:20:33", "remaining_time": "7:43:44", "throughput": 2357.8, "total_tokens": 11396224} {"current_steps": 5925, "total_steps": 40000, "loss": 0.1256, "lr": 4.734251290756272e-05, "epoch": 0.9666367566685701, "percentage": 14.81, "elapsed_time": "1:20:35", "remaining_time": "7:43:29", "throughput": 2358.8, "total_tokens": 11405904} {"current_steps": 5930, "total_steps": 40000, "loss": 0.1421, "lr": 4.7338106436383246e-05, "epoch": 0.9674524838893874, "percentage": 14.82, "elapsed_time": "1:20:37", "remaining_time": "7:43:13", "throughput": 2359.69, "total_tokens": 11415120} {"current_steps": 5935, "total_steps": 40000, "loss": 0.0929, "lr": 4.733369652038703e-05, "epoch": 0.9682682111102048, "percentage": 14.84, "elapsed_time": "1:20:39", "remaining_time": "7:42:57", "throughput": 2360.86, "total_tokens": 11425648} {"current_steps": 5940, "total_steps": 40000, "loss": 0.1358, "lr": 4.7329283160254156e-05, "epoch": 0.9690839383310221, "percentage": 14.85, "elapsed_time": "1:20:41", "remaining_time": "7:42:42", "throughput": 2362.1, "total_tokens": 11436576} {"current_steps": 5945, "total_steps": 40000, "loss": 0.1473, "lr": 4.732486635666521e-05, "epoch": 0.9698996655518395, "percentage": 14.86, "elapsed_time": "1:20:43", "remaining_time": "7:42:26", "throughput": 2363.18, "total_tokens": 11446672} {"current_steps": 5950, "total_steps": 40000, "loss": 0.2031, "lr": 4.732044611030132e-05, "epoch": 0.9707153927726568, "percentage": 14.88, "elapsed_time": "1:20:45", "remaining_time": "7:42:11", "throughput": 2364.2, "total_tokens": 11456480} {"current_steps": 5955, "total_steps": 40000, "loss": 0.1701, "lr": 4.731602242184414e-05, "epoch": 0.9715311199934742, "percentage": 14.89, "elapsed_time": "1:20:47", "remaining_time": "7:41:55", "throughput": 2365.13, "total_tokens": 11465872} {"current_steps": 5960, "total_steps": 40000, "loss": 0.2442, "lr": 4.7311595291975864e-05, "epoch": 0.9723468472142915, "percentage": 14.9, "elapsed_time": "1:20:49", "remaining_time": "7:41:40", "throughput": 2366.23, "total_tokens": 11476112} {"current_steps": 5965, "total_steps": 40000, "loss": 0.0738, "lr": 4.7307164721379216e-05, "epoch": 0.9731625744351089, "percentage": 14.91, "elapsed_time": "1:20:52", "remaining_time": "7:41:24", "throughput": 2367.43, "total_tokens": 11486816} {"current_steps": 5970, "total_steps": 40000, "loss": 0.0989, "lr": 4.730273071073743e-05, "epoch": 0.9739783016559262, "percentage": 14.92, "elapsed_time": "1:20:54", "remaining_time": "7:41:09", "throughput": 2368.4, "total_tokens": 11496400} {"current_steps": 5975, "total_steps": 40000, "loss": 0.2929, "lr": 4.729829326073429e-05, "epoch": 0.9747940288767436, "percentage": 14.94, "elapsed_time": "1:20:56", "remaining_time": "7:40:53", "throughput": 2369.24, "total_tokens": 11505376} {"current_steps": 5980, "total_steps": 40000, "loss": 0.2074, "lr": 4.7293852372054126e-05, "epoch": 0.975609756097561, "percentage": 14.95, "elapsed_time": "1:20:58", "remaining_time": "7:40:38", "throughput": 2370.32, "total_tokens": 11515472} {"current_steps": 5985, "total_steps": 40000, "loss": 0.1321, "lr": 4.728940804538176e-05, "epoch": 0.9764254833183783, "percentage": 14.96, "elapsed_time": "1:21:00", "remaining_time": "7:40:22", "throughput": 2371.1, "total_tokens": 11524208} {"current_steps": 5990, "total_steps": 40000, "loss": 0.1077, "lr": 4.7284960281402556e-05, "epoch": 0.9772412105391957, "percentage": 14.97, "elapsed_time": "1:21:02", "remaining_time": "7:40:07", "throughput": 2372.15, "total_tokens": 11534176} {"current_steps": 5995, "total_steps": 40000, "loss": 0.2447, "lr": 4.728050908080244e-05, "epoch": 0.978056937760013, "percentage": 14.99, "elapsed_time": "1:21:04", "remaining_time": "7:39:51", "throughput": 2373.29, "total_tokens": 11544592} {"current_steps": 6000, "total_steps": 40000, "loss": 0.1544, "lr": 4.727605444426782e-05, "epoch": 0.9788726649808304, "percentage": 15.0, "elapsed_time": "1:21:06", "remaining_time": "7:39:36", "throughput": 2374.02, "total_tokens": 11553056} {"current_steps": 6000, "total_steps": 40000, "eval_loss": 0.1307803988456726, "epoch": 0.9788726649808304, "percentage": 15.0, "elapsed_time": "1:22:27", "remaining_time": "7:47:13", "throughput": 2335.31, "total_tokens": 11553056} {"current_steps": 6005, "total_steps": 40000, "loss": 0.1424, "lr": 4.727159637248567e-05, "epoch": 0.9796883922016477, "percentage": 15.01, "elapsed_time": "1:22:31", "remaining_time": "7:47:10", "throughput": 2335.25, "total_tokens": 11562896} {"current_steps": 6010, "total_steps": 40000, "loss": 0.2438, "lr": 4.7267134866143474e-05, "epoch": 0.9805041194224652, "percentage": 15.02, "elapsed_time": "1:22:33", "remaining_time": "7:46:55", "throughput": 2336.11, "total_tokens": 11572016} {"current_steps": 6015, "total_steps": 40000, "loss": 0.0601, "lr": 4.726266992592926e-05, "epoch": 0.9813198466432825, "percentage": 15.04, "elapsed_time": "1:22:35", "remaining_time": "7:46:39", "throughput": 2336.86, "total_tokens": 11580560} {"current_steps": 6020, "total_steps": 40000, "loss": 0.1571, "lr": 4.725820155253157e-05, "epoch": 0.9821355738640999, "percentage": 15.05, "elapsed_time": "1:22:37", "remaining_time": "7:46:23", "throughput": 2337.93, "total_tokens": 11590672} {"current_steps": 6025, "total_steps": 40000, "loss": 0.0914, "lr": 4.725372974663948e-05, "epoch": 0.9829513010849172, "percentage": 15.06, "elapsed_time": "1:22:39", "remaining_time": "7:46:07", "throughput": 2339.01, "total_tokens": 11600848} {"current_steps": 6030, "total_steps": 40000, "loss": 0.1391, "lr": 4.724925450894262e-05, "epoch": 0.9837670283057346, "percentage": 15.07, "elapsed_time": "1:22:41", "remaining_time": "7:45:52", "throughput": 2340.13, "total_tokens": 11611232} {"current_steps": 6035, "total_steps": 40000, "loss": 0.1895, "lr": 4.72447758401311e-05, "epoch": 0.984582755526552, "percentage": 15.09, "elapsed_time": "1:22:43", "remaining_time": "7:45:36", "throughput": 2341.09, "total_tokens": 11620832} {"current_steps": 6040, "total_steps": 40000, "loss": 0.178, "lr": 4.7240293740895616e-05, "epoch": 0.9853984827473693, "percentage": 15.1, "elapsed_time": "1:22:45", "remaining_time": "7:45:21", "throughput": 2342.05, "total_tokens": 11630464} {"current_steps": 6045, "total_steps": 40000, "loss": 0.0501, "lr": 4.723580821192733e-05, "epoch": 0.9862142099681867, "percentage": 15.11, "elapsed_time": "1:22:48", "remaining_time": "7:45:05", "throughput": 2342.92, "total_tokens": 11639632} {"current_steps": 6050, "total_steps": 40000, "loss": 0.1879, "lr": 4.7231319253917996e-05, "epoch": 0.987029937189004, "percentage": 15.12, "elapsed_time": "1:22:50", "remaining_time": "7:44:49", "throughput": 2343.66, "total_tokens": 11648144} {"current_steps": 6055, "total_steps": 40000, "loss": 0.0763, "lr": 4.722682686755986e-05, "epoch": 0.9878456644098214, "percentage": 15.14, "elapsed_time": "1:22:52", "remaining_time": "7:44:34", "throughput": 2344.81, "total_tokens": 11658688} {"current_steps": 6060, "total_steps": 40000, "loss": 0.2592, "lr": 4.722233105354569e-05, "epoch": 0.9886613916306387, "percentage": 15.15, "elapsed_time": "1:22:54", "remaining_time": "7:44:18", "throughput": 2345.81, "total_tokens": 11668480} {"current_steps": 6065, "total_steps": 40000, "loss": 0.1873, "lr": 4.7217831812568815e-05, "epoch": 0.9894771188514561, "percentage": 15.16, "elapsed_time": "1:22:56", "remaining_time": "7:44:03", "throughput": 2346.86, "total_tokens": 11678576} {"current_steps": 6070, "total_steps": 40000, "loss": 0.0882, "lr": 4.721332914532307e-05, "epoch": 0.9902928460722734, "percentage": 15.17, "elapsed_time": "1:22:58", "remaining_time": "7:43:47", "throughput": 2347.88, "total_tokens": 11688480} {"current_steps": 6075, "total_steps": 40000, "loss": 0.2036, "lr": 4.720882305250281e-05, "epoch": 0.9911085732930908, "percentage": 15.19, "elapsed_time": "1:23:00", "remaining_time": "7:43:32", "throughput": 2348.84, "total_tokens": 11698128} {"current_steps": 6080, "total_steps": 40000, "loss": 0.1796, "lr": 4.720431353480295e-05, "epoch": 0.9919243005139081, "percentage": 15.2, "elapsed_time": "1:23:02", "remaining_time": "7:43:16", "throughput": 2349.85, "total_tokens": 11708000} {"current_steps": 6085, "total_steps": 40000, "loss": 0.0977, "lr": 4.719980059291891e-05, "epoch": 0.9927400277347255, "percentage": 15.21, "elapsed_time": "1:23:04", "remaining_time": "7:43:01", "throughput": 2350.55, "total_tokens": 11716352} {"current_steps": 6090, "total_steps": 40000, "loss": 0.1113, "lr": 4.7195284227546634e-05, "epoch": 0.9935557549555428, "percentage": 15.22, "elapsed_time": "1:23:06", "remaining_time": "7:42:45", "throughput": 2351.44, "total_tokens": 11725632} {"current_steps": 6095, "total_steps": 40000, "loss": 0.1146, "lr": 4.7190764439382604e-05, "epoch": 0.9943714821763602, "percentage": 15.24, "elapsed_time": "1:23:08", "remaining_time": "7:42:30", "throughput": 2352.63, "total_tokens": 11736448} {"current_steps": 6100, "total_steps": 40000, "loss": 0.1016, "lr": 4.7186241229123826e-05, "epoch": 0.9951872093971775, "percentage": 15.25, "elapsed_time": "1:23:10", "remaining_time": "7:42:15", "throughput": 2353.52, "total_tokens": 11745712} {"current_steps": 6105, "total_steps": 40000, "loss": 0.1288, "lr": 4.718171459746785e-05, "epoch": 0.996002936617995, "percentage": 15.26, "elapsed_time": "1:23:12", "remaining_time": "7:41:59", "throughput": 2354.31, "total_tokens": 11754512} {"current_steps": 6110, "total_steps": 40000, "loss": 0.0559, "lr": 4.717718454511273e-05, "epoch": 0.9968186638388123, "percentage": 15.28, "elapsed_time": "1:23:14", "remaining_time": "7:41:44", "throughput": 2355.19, "total_tokens": 11763808} {"current_steps": 6115, "total_steps": 40000, "loss": 0.1291, "lr": 4.7172651072757056e-05, "epoch": 0.9976343910596297, "percentage": 15.29, "elapsed_time": "1:23:16", "remaining_time": "7:41:29", "throughput": 2356.18, "total_tokens": 11773632} {"current_steps": 6120, "total_steps": 40000, "loss": 0.0955, "lr": 4.7168114181099945e-05, "epoch": 0.998450118280447, "percentage": 15.3, "elapsed_time": "1:23:18", "remaining_time": "7:41:14", "throughput": 2357.25, "total_tokens": 11783840} {"current_steps": 6125, "total_steps": 40000, "loss": 0.2449, "lr": 4.716357387084105e-05, "epoch": 0.9992658455012644, "percentage": 15.31, "elapsed_time": "1:23:21", "remaining_time": "7:40:58", "throughput": 2358.35, "total_tokens": 11794208} {"current_steps": 6130, "total_steps": 40000, "loss": 0.0648, "lr": 4.715903014268054e-05, "epoch": 1.0, "percentage": 15.32, "elapsed_time": "1:23:22", "remaining_time": "7:40:42", "throughput": 2359.25, "total_tokens": 11803328} {"current_steps": 6135, "total_steps": 40000, "loss": 0.0785, "lr": 4.715448299731911e-05, "epoch": 1.0008157272208174, "percentage": 15.34, "elapsed_time": "1:23:25", "remaining_time": "7:40:29", "throughput": 2360.19, "total_tokens": 11813488} {"current_steps": 6140, "total_steps": 40000, "loss": 0.0709, "lr": 4.7149932435457986e-05, "epoch": 1.0016314544416347, "percentage": 15.35, "elapsed_time": "1:23:27", "remaining_time": "7:40:13", "throughput": 2361.25, "total_tokens": 11823680} {"current_steps": 6145, "total_steps": 40000, "loss": 0.2011, "lr": 4.714537845779894e-05, "epoch": 1.002447181662452, "percentage": 15.36, "elapsed_time": "1:23:29", "remaining_time": "7:39:58", "throughput": 2362.18, "total_tokens": 11833184} {"current_steps": 6150, "total_steps": 40000, "loss": 0.0872, "lr": 4.714082106504423e-05, "epoch": 1.0032629088832694, "percentage": 15.38, "elapsed_time": "1:23:31", "remaining_time": "7:39:43", "throughput": 2363.31, "total_tokens": 11843792} {"current_steps": 6155, "total_steps": 40000, "loss": 0.1119, "lr": 4.713626025789667e-05, "epoch": 1.0040786361040868, "percentage": 15.39, "elapsed_time": "1:23:33", "remaining_time": "7:39:28", "throughput": 2364.25, "total_tokens": 11853344} {"current_steps": 6160, "total_steps": 40000, "loss": 0.1906, "lr": 4.7131696037059606e-05, "epoch": 1.0048943633249041, "percentage": 15.4, "elapsed_time": "1:23:35", "remaining_time": "7:39:13", "throughput": 2365.08, "total_tokens": 11862416} {"current_steps": 6165, "total_steps": 40000, "loss": 0.1038, "lr": 4.712712840323689e-05, "epoch": 1.0057100905457215, "percentage": 15.41, "elapsed_time": "1:23:37", "remaining_time": "7:38:58", "throughput": 2366.13, "total_tokens": 11872544} {"current_steps": 6170, "total_steps": 40000, "loss": 0.1875, "lr": 4.71225573571329e-05, "epoch": 1.0065258177665388, "percentage": 15.43, "elapsed_time": "1:23:39", "remaining_time": "7:38:43", "throughput": 2367.22, "total_tokens": 11882896} {"current_steps": 6175, "total_steps": 40000, "loss": 0.1409, "lr": 4.711798289945256e-05, "epoch": 1.0073415449873562, "percentage": 15.44, "elapsed_time": "1:23:41", "remaining_time": "7:38:28", "throughput": 2368.16, "total_tokens": 11892512} {"current_steps": 6180, "total_steps": 40000, "loss": 0.1118, "lr": 4.71134050309013e-05, "epoch": 1.0081572722081735, "percentage": 15.45, "elapsed_time": "1:23:43", "remaining_time": "7:38:13", "throughput": 2369.19, "total_tokens": 11902592} {"current_steps": 6185, "total_steps": 40000, "loss": 0.1241, "lr": 4.710882375218509e-05, "epoch": 1.0089729994289909, "percentage": 15.46, "elapsed_time": "1:23:45", "remaining_time": "7:37:58", "throughput": 2370.24, "total_tokens": 11912752} {"current_steps": 6190, "total_steps": 40000, "loss": 0.1164, "lr": 4.7104239064010424e-05, "epoch": 1.0097887266498082, "percentage": 15.47, "elapsed_time": "1:23:48", "remaining_time": "7:37:43", "throughput": 2371.18, "total_tokens": 11922368} {"current_steps": 6195, "total_steps": 40000, "loss": 0.0859, "lr": 4.709965096708432e-05, "epoch": 1.0106044538706256, "percentage": 15.49, "elapsed_time": "1:23:50", "remaining_time": "7:37:28", "throughput": 2371.76, "total_tokens": 11930176} {"current_steps": 6200, "total_steps": 40000, "loss": 0.0942, "lr": 4.709505946211431e-05, "epoch": 1.011420181091443, "percentage": 15.5, "elapsed_time": "1:23:52", "remaining_time": "7:37:13", "throughput": 2372.8, "total_tokens": 11940352} {"current_steps": 6200, "total_steps": 40000, "eval_loss": 0.14942443370819092, "epoch": 1.011420181091443, "percentage": 15.5, "elapsed_time": "1:25:12", "remaining_time": "7:44:32", "throughput": 2335.41, "total_tokens": 11940352} {"current_steps": 6205, "total_steps": 40000, "loss": 0.0941, "lr": 4.709046454980846e-05, "epoch": 1.0122359083122603, "percentage": 15.51, "elapsed_time": "1:25:16", "remaining_time": "7:44:27", "throughput": 2335.75, "total_tokens": 11951168} {"current_steps": 6210, "total_steps": 40000, "loss": 0.1031, "lr": 4.708586623087538e-05, "epoch": 1.0130516355330776, "percentage": 15.53, "elapsed_time": "1:25:18", "remaining_time": "7:44:12", "throughput": 2336.67, "total_tokens": 11960720} {"current_steps": 6215, "total_steps": 40000, "loss": 0.1016, "lr": 4.708126450602418e-05, "epoch": 1.013867362753895, "percentage": 15.54, "elapsed_time": "1:25:20", "remaining_time": "7:43:56", "throughput": 2337.83, "total_tokens": 11971488} {"current_steps": 6220, "total_steps": 40000, "loss": 0.0317, "lr": 4.7076659375964495e-05, "epoch": 1.0146830899747123, "percentage": 15.55, "elapsed_time": "1:25:22", "remaining_time": "7:43:41", "throughput": 2338.87, "total_tokens": 11981648} {"current_steps": 6225, "total_steps": 40000, "loss": 0.0911, "lr": 4.707205084140651e-05, "epoch": 1.01549881719553, "percentage": 15.56, "elapsed_time": "1:25:24", "remaining_time": "7:43:26", "throughput": 2339.47, "total_tokens": 11989584} {"current_steps": 6230, "total_steps": 40000, "loss": 0.108, "lr": 4.7067438903060904e-05, "epoch": 1.0163145444163473, "percentage": 15.57, "elapsed_time": "1:25:26", "remaining_time": "7:43:10", "throughput": 2340.49, "total_tokens": 11999600} {"current_steps": 6235, "total_steps": 40000, "loss": 0.1346, "lr": 4.70628235616389e-05, "epoch": 1.0171302716371646, "percentage": 15.59, "elapsed_time": "1:25:29", "remaining_time": "7:42:55", "throughput": 2341.33, "total_tokens": 12008752} {"current_steps": 6240, "total_steps": 40000, "loss": 0.0991, "lr": 4.7058204817852256e-05, "epoch": 1.017945998857982, "percentage": 15.6, "elapsed_time": "1:25:31", "remaining_time": "7:42:40", "throughput": 2342.21, "total_tokens": 12018128} {"current_steps": 6245, "total_steps": 40000, "loss": 0.1736, "lr": 4.705358267241322e-05, "epoch": 1.0187617260787993, "percentage": 15.61, "elapsed_time": "1:25:33", "remaining_time": "7:42:25", "throughput": 2343.26, "total_tokens": 12028368} {"current_steps": 6250, "total_steps": 40000, "loss": 0.1495, "lr": 4.704895712603459e-05, "epoch": 1.0195774532996167, "percentage": 15.62, "elapsed_time": "1:25:35", "remaining_time": "7:42:10", "throughput": 2344.26, "total_tokens": 12038336} {"current_steps": 6255, "total_steps": 40000, "loss": 0.1679, "lr": 4.704432817942969e-05, "epoch": 1.020393180520434, "percentage": 15.64, "elapsed_time": "1:25:37", "remaining_time": "7:41:55", "throughput": 2345.16, "total_tokens": 12047808} {"current_steps": 6260, "total_steps": 40000, "loss": 0.322, "lr": 4.703969583331236e-05, "epoch": 1.0212089077412514, "percentage": 15.65, "elapsed_time": "1:25:39", "remaining_time": "7:41:40", "throughput": 2346.14, "total_tokens": 12057680} {"current_steps": 6265, "total_steps": 40000, "loss": 0.0689, "lr": 4.7035060088396965e-05, "epoch": 1.0220246349620687, "percentage": 15.66, "elapsed_time": "1:25:41", "remaining_time": "7:41:24", "throughput": 2347.03, "total_tokens": 12067088} {"current_steps": 6270, "total_steps": 40000, "loss": 0.1128, "lr": 4.703042094539839e-05, "epoch": 1.022840362182886, "percentage": 15.68, "elapsed_time": "1:25:43", "remaining_time": "7:41:09", "throughput": 2347.7, "total_tokens": 12075392} {"current_steps": 6275, "total_steps": 40000, "loss": 0.1725, "lr": 4.702577840503206e-05, "epoch": 1.0236560894037035, "percentage": 15.69, "elapsed_time": "1:25:45", "remaining_time": "7:40:54", "throughput": 2348.57, "total_tokens": 12084752} {"current_steps": 6280, "total_steps": 40000, "loss": 0.1351, "lr": 4.70211324680139e-05, "epoch": 1.0244718166245208, "percentage": 15.7, "elapsed_time": "1:25:47", "remaining_time": "7:40:39", "throughput": 2349.51, "total_tokens": 12094448} {"current_steps": 6285, "total_steps": 40000, "loss": 0.0954, "lr": 4.7016483135060386e-05, "epoch": 1.0252875438453382, "percentage": 15.71, "elapsed_time": "1:25:49", "remaining_time": "7:40:24", "throughput": 2350.72, "total_tokens": 12105504} {"current_steps": 6290, "total_steps": 40000, "loss": 0.0846, "lr": 4.701183040688849e-05, "epoch": 1.0261032710661555, "percentage": 15.72, "elapsed_time": "1:25:51", "remaining_time": "7:40:09", "throughput": 2351.62, "total_tokens": 12114992} {"current_steps": 6295, "total_steps": 40000, "loss": 0.0323, "lr": 4.700717428421573e-05, "epoch": 1.0269189982869729, "percentage": 15.74, "elapsed_time": "1:25:53", "remaining_time": "7:39:54", "throughput": 2352.7, "total_tokens": 12125424} {"current_steps": 6300, "total_steps": 40000, "loss": 0.194, "lr": 4.700251476776014e-05, "epoch": 1.0277347255077902, "percentage": 15.75, "elapsed_time": "1:25:55", "remaining_time": "7:39:39", "throughput": 2353.56, "total_tokens": 12134704} {"current_steps": 6305, "total_steps": 40000, "loss": 0.1032, "lr": 4.699785185824026e-05, "epoch": 1.0285504527286076, "percentage": 15.76, "elapsed_time": "1:25:57", "remaining_time": "7:39:25", "throughput": 2354.48, "total_tokens": 12144320} {"current_steps": 6310, "total_steps": 40000, "loss": 0.0818, "lr": 4.699318555637519e-05, "epoch": 1.029366179949425, "percentage": 15.78, "elapsed_time": "1:26:00", "remaining_time": "7:39:10", "throughput": 2355.34, "total_tokens": 12153680} {"current_steps": 6315, "total_steps": 40000, "loss": 0.1318, "lr": 4.6988515862884525e-05, "epoch": 1.0301819071702423, "percentage": 15.79, "elapsed_time": "1:26:02", "remaining_time": "7:38:55", "throughput": 2356.52, "total_tokens": 12164656} {"current_steps": 6320, "total_steps": 40000, "loss": 0.1842, "lr": 4.698384277848838e-05, "epoch": 1.0309976343910596, "percentage": 15.8, "elapsed_time": "1:26:04", "remaining_time": "7:38:40", "throughput": 2357.47, "total_tokens": 12174416} {"current_steps": 6325, "total_steps": 40000, "loss": 0.2469, "lr": 4.6979166303907425e-05, "epoch": 1.031813361611877, "percentage": 15.81, "elapsed_time": "1:26:06", "remaining_time": "7:38:25", "throughput": 2358.46, "total_tokens": 12184384} {"current_steps": 6330, "total_steps": 40000, "loss": 0.0965, "lr": 4.697448643986281e-05, "epoch": 1.0326290888326943, "percentage": 15.82, "elapsed_time": "1:26:08", "remaining_time": "7:38:10", "throughput": 2359.51, "total_tokens": 12194688} {"current_steps": 6335, "total_steps": 40000, "loss": 0.1518, "lr": 4.696980318707624e-05, "epoch": 1.0334448160535117, "percentage": 15.84, "elapsed_time": "1:26:10", "remaining_time": "7:37:56", "throughput": 2360.38, "total_tokens": 12204032} {"current_steps": 6340, "total_steps": 40000, "loss": 0.1178, "lr": 4.6965116546269924e-05, "epoch": 1.034260543274329, "percentage": 15.85, "elapsed_time": "1:26:12", "remaining_time": "7:37:41", "throughput": 2361.31, "total_tokens": 12213728} {"current_steps": 6345, "total_steps": 40000, "loss": 0.1189, "lr": 4.6960426518166615e-05, "epoch": 1.0350762704951464, "percentage": 15.86, "elapsed_time": "1:26:14", "remaining_time": "7:37:26", "throughput": 2362.33, "total_tokens": 12223888} {"current_steps": 6350, "total_steps": 40000, "loss": 0.0513, "lr": 4.6955733103489556e-05, "epoch": 1.0358919977159637, "percentage": 15.88, "elapsed_time": "1:26:16", "remaining_time": "7:37:11", "throughput": 2363.43, "total_tokens": 12234544} {"current_steps": 6355, "total_steps": 40000, "loss": 0.1402, "lr": 4.695103630296255e-05, "epoch": 1.036707724936781, "percentage": 15.89, "elapsed_time": "1:26:18", "remaining_time": "7:36:57", "throughput": 2364.47, "total_tokens": 12244800} {"current_steps": 6360, "total_steps": 40000, "loss": 0.164, "lr": 4.694633611730988e-05, "epoch": 1.0375234521575984, "percentage": 15.9, "elapsed_time": "1:26:20", "remaining_time": "7:36:42", "throughput": 2365.35, "total_tokens": 12254240} {"current_steps": 6365, "total_steps": 40000, "loss": 0.1212, "lr": 4.694163254725639e-05, "epoch": 1.0383391793784158, "percentage": 15.91, "elapsed_time": "1:26:22", "remaining_time": "7:36:27", "throughput": 2366.45, "total_tokens": 12264800} {"current_steps": 6370, "total_steps": 40000, "loss": 0.1735, "lr": 4.693692559352743e-05, "epoch": 1.0391549065992332, "percentage": 15.93, "elapsed_time": "1:26:24", "remaining_time": "7:36:13", "throughput": 2367.44, "total_tokens": 12274816} {"current_steps": 6375, "total_steps": 40000, "loss": 0.0292, "lr": 4.693221525684886e-05, "epoch": 1.0399706338200505, "percentage": 15.94, "elapsed_time": "1:26:26", "remaining_time": "7:35:58", "throughput": 2368.25, "total_tokens": 12283904} {"current_steps": 6380, "total_steps": 40000, "loss": 0.1231, "lr": 4.6927501537947084e-05, "epoch": 1.0407863610408679, "percentage": 15.95, "elapsed_time": "1:26:28", "remaining_time": "7:35:43", "throughput": 2369.08, "total_tokens": 12293088} {"current_steps": 6385, "total_steps": 40000, "loss": 0.1449, "lr": 4.692278443754901e-05, "epoch": 1.0416020882616852, "percentage": 15.96, "elapsed_time": "1:26:31", "remaining_time": "7:35:29", "throughput": 2369.93, "total_tokens": 12302448} {"current_steps": 6390, "total_steps": 40000, "loss": 0.1482, "lr": 4.691806395638208e-05, "epoch": 1.0424178154825026, "percentage": 15.97, "elapsed_time": "1:26:33", "remaining_time": "7:35:14", "throughput": 2370.83, "total_tokens": 12311984} {"current_steps": 6395, "total_steps": 40000, "loss": 0.114, "lr": 4.6913340095174255e-05, "epoch": 1.04323354270332, "percentage": 15.99, "elapsed_time": "1:26:35", "remaining_time": "7:35:00", "throughput": 2372.05, "total_tokens": 12323232} {"current_steps": 6400, "total_steps": 40000, "loss": 0.1933, "lr": 4.690861285465399e-05, "epoch": 1.0440492699241373, "percentage": 16.0, "elapsed_time": "1:26:37", "remaining_time": "7:34:45", "throughput": 2372.78, "total_tokens": 12331920} {"current_steps": 6400, "total_steps": 40000, "eval_loss": 0.14100046455860138, "epoch": 1.0440492699241373, "percentage": 16.0, "elapsed_time": "1:27:57", "remaining_time": "7:41:48", "throughput": 2336.53, "total_tokens": 12331920} {"current_steps": 6405, "total_steps": 40000, "loss": 0.1121, "lr": 4.690388223555031e-05, "epoch": 1.0448649971449546, "percentage": 16.01, "elapsed_time": "1:28:01", "remaining_time": "7:41:42", "throughput": 2336.74, "total_tokens": 12341824} {"current_steps": 6410, "total_steps": 40000, "loss": 0.0415, "lr": 4.689914823859273e-05, "epoch": 1.0456807243657722, "percentage": 16.02, "elapsed_time": "1:28:03", "remaining_time": "7:41:27", "throughput": 2337.8, "total_tokens": 12352224} {"current_steps": 6415, "total_steps": 40000, "loss": 0.0252, "lr": 4.689441086451129e-05, "epoch": 1.0464964515865895, "percentage": 16.04, "elapsed_time": "1:28:05", "remaining_time": "7:41:13", "throughput": 2338.46, "total_tokens": 12360544} {"current_steps": 6420, "total_steps": 40000, "loss": 0.1551, "lr": 4.688967011403655e-05, "epoch": 1.047312178807407, "percentage": 16.05, "elapsed_time": "1:28:07", "remaining_time": "7:40:58", "throughput": 2339.44, "total_tokens": 12370592} {"current_steps": 6425, "total_steps": 40000, "loss": 0.0399, "lr": 4.68849259878996e-05, "epoch": 1.0481279060282243, "percentage": 16.06, "elapsed_time": "1:28:09", "remaining_time": "7:40:43", "throughput": 2340.48, "total_tokens": 12380912} {"current_steps": 6430, "total_steps": 40000, "loss": 0.1527, "lr": 4.6880178486832036e-05, "epoch": 1.0489436332490416, "percentage": 16.07, "elapsed_time": "1:28:11", "remaining_time": "7:40:28", "throughput": 2341.37, "total_tokens": 12390432} {"current_steps": 6435, "total_steps": 40000, "loss": 0.1558, "lr": 4.687542761156598e-05, "epoch": 1.049759360469859, "percentage": 16.09, "elapsed_time": "1:28:14", "remaining_time": "7:40:13", "throughput": 2342.3, "total_tokens": 12400240} {"current_steps": 6440, "total_steps": 40000, "loss": 0.0321, "lr": 4.6870673362834096e-05, "epoch": 1.0505750876906763, "percentage": 16.1, "elapsed_time": "1:28:16", "remaining_time": "7:39:58", "throughput": 2343.04, "total_tokens": 12409024} {"current_steps": 6445, "total_steps": 40000, "loss": 0.0978, "lr": 4.6865915741369526e-05, "epoch": 1.0513908149114937, "percentage": 16.11, "elapsed_time": "1:28:18", "remaining_time": "7:39:44", "throughput": 2344.17, "total_tokens": 12419792} {"current_steps": 6450, "total_steps": 40000, "loss": 0.1755, "lr": 4.686115474790597e-05, "epoch": 1.052206542132311, "percentage": 16.12, "elapsed_time": "1:28:20", "remaining_time": "7:39:29", "throughput": 2345.1, "total_tokens": 12429600} {"current_steps": 6455, "total_steps": 40000, "loss": 0.1368, "lr": 4.685639038317762e-05, "epoch": 1.0530222693531284, "percentage": 16.14, "elapsed_time": "1:28:22", "remaining_time": "7:39:14", "throughput": 2345.79, "total_tokens": 12438064} {"current_steps": 6460, "total_steps": 40000, "loss": 0.0139, "lr": 4.685162264791921e-05, "epoch": 1.0538379965739457, "percentage": 16.15, "elapsed_time": "1:28:24", "remaining_time": "7:38:59", "throughput": 2346.98, "total_tokens": 12449232} {"current_steps": 6465, "total_steps": 40000, "loss": 0.1463, "lr": 4.684685154286599e-05, "epoch": 1.054653723794763, "percentage": 16.16, "elapsed_time": "1:28:26", "remaining_time": "7:38:45", "throughput": 2347.8, "total_tokens": 12458448} {"current_steps": 6470, "total_steps": 40000, "loss": 0.0855, "lr": 4.684207706875371e-05, "epoch": 1.0554694510155804, "percentage": 16.18, "elapsed_time": "1:28:28", "remaining_time": "7:38:30", "throughput": 2348.72, "total_tokens": 12468144} {"current_steps": 6475, "total_steps": 40000, "loss": 0.0915, "lr": 4.683729922631866e-05, "epoch": 1.0562851782363978, "percentage": 16.19, "elapsed_time": "1:28:30", "remaining_time": "7:38:16", "throughput": 2349.64, "total_tokens": 12477936} {"current_steps": 6480, "total_steps": 40000, "loss": 0.1252, "lr": 4.683251801629765e-05, "epoch": 1.0571009054572151, "percentage": 16.2, "elapsed_time": "1:28:32", "remaining_time": "7:38:01", "throughput": 2350.6, "total_tokens": 12487888} {"current_steps": 6485, "total_steps": 40000, "loss": 0.1003, "lr": 4.6827733439428e-05, "epoch": 1.0579166326780325, "percentage": 16.21, "elapsed_time": "1:28:34", "remaining_time": "7:37:46", "throughput": 2351.63, "total_tokens": 12498176} {"current_steps": 6490, "total_steps": 40000, "loss": 0.1228, "lr": 4.682294549644754e-05, "epoch": 1.0587323598988498, "percentage": 16.23, "elapsed_time": "1:28:36", "remaining_time": "7:37:32", "throughput": 2352.67, "total_tokens": 12508560} {"current_steps": 6495, "total_steps": 40000, "loss": 0.0883, "lr": 4.681815418809464e-05, "epoch": 1.0595480871196672, "percentage": 16.24, "elapsed_time": "1:28:38", "remaining_time": "7:37:17", "throughput": 2353.46, "total_tokens": 12517632} {"current_steps": 6500, "total_steps": 40000, "loss": 0.0623, "lr": 4.681335951510819e-05, "epoch": 1.0603638143404845, "percentage": 16.25, "elapsed_time": "1:28:40", "remaining_time": "7:37:03", "throughput": 2354.14, "total_tokens": 12526112} {"current_steps": 6505, "total_steps": 40000, "loss": 0.1835, "lr": 4.6808561478227576e-05, "epoch": 1.061179541561302, "percentage": 16.26, "elapsed_time": "1:28:42", "remaining_time": "7:36:48", "throughput": 2355.19, "total_tokens": 12536544} {"current_steps": 6510, "total_steps": 40000, "loss": 0.0805, "lr": 4.680376007819271e-05, "epoch": 1.0619952687821193, "percentage": 16.28, "elapsed_time": "1:28:45", "remaining_time": "7:36:34", "throughput": 2356.19, "total_tokens": 12546784} {"current_steps": 6515, "total_steps": 40000, "loss": 0.1405, "lr": 4.679895531574405e-05, "epoch": 1.0628109960029366, "percentage": 16.29, "elapsed_time": "1:28:47", "remaining_time": "7:36:19", "throughput": 2357.21, "total_tokens": 12557072} {"current_steps": 6520, "total_steps": 40000, "loss": 0.2206, "lr": 4.679414719162253e-05, "epoch": 1.063626723223754, "percentage": 16.3, "elapsed_time": "1:28:49", "remaining_time": "7:36:05", "throughput": 2358.01, "total_tokens": 12566176} {"current_steps": 6525, "total_steps": 40000, "loss": 0.0839, "lr": 4.6789335706569635e-05, "epoch": 1.0644424504445713, "percentage": 16.31, "elapsed_time": "1:28:51", "remaining_time": "7:35:50", "throughput": 2358.87, "total_tokens": 12575632} {"current_steps": 6530, "total_steps": 40000, "loss": 0.0983, "lr": 4.678452086132734e-05, "epoch": 1.0652581776653887, "percentage": 16.32, "elapsed_time": "1:28:53", "remaining_time": "7:35:36", "throughput": 2359.9, "total_tokens": 12586000} {"current_steps": 6535, "total_steps": 40000, "loss": 0.1503, "lr": 4.677970265663818e-05, "epoch": 1.066073904886206, "percentage": 16.34, "elapsed_time": "1:28:55", "remaining_time": "7:35:21", "throughput": 2360.99, "total_tokens": 12596704} {"current_steps": 6540, "total_steps": 40000, "loss": 0.0941, "lr": 4.677488109324517e-05, "epoch": 1.0668896321070234, "percentage": 16.35, "elapsed_time": "1:28:57", "remaining_time": "7:35:07", "throughput": 2361.68, "total_tokens": 12605264} {"current_steps": 6545, "total_steps": 40000, "loss": 0.1031, "lr": 4.6770056171891846e-05, "epoch": 1.0677053593278407, "percentage": 16.36, "elapsed_time": "1:28:59", "remaining_time": "7:34:52", "throughput": 2362.81, "total_tokens": 12616192} {"current_steps": 6550, "total_steps": 40000, "loss": 0.045, "lr": 4.6765227893322286e-05, "epoch": 1.068521086548658, "percentage": 16.38, "elapsed_time": "1:29:01", "remaining_time": "7:34:38", "throughput": 2363.75, "total_tokens": 12626096} {"current_steps": 6555, "total_steps": 40000, "loss": 0.1586, "lr": 4.676039625828107e-05, "epoch": 1.0693368137694754, "percentage": 16.39, "elapsed_time": "1:29:03", "remaining_time": "7:34:24", "throughput": 2364.73, "total_tokens": 12636208} {"current_steps": 6560, "total_steps": 40000, "loss": 0.1111, "lr": 4.675556126751328e-05, "epoch": 1.0701525409902928, "percentage": 16.4, "elapsed_time": "1:29:05", "remaining_time": "7:34:09", "throughput": 2365.68, "total_tokens": 12646144} {"current_steps": 6565, "total_steps": 40000, "loss": 0.0256, "lr": 4.6750722921764556e-05, "epoch": 1.0709682682111101, "percentage": 16.41, "elapsed_time": "1:29:07", "remaining_time": "7:33:55", "throughput": 2366.77, "total_tokens": 12656880} {"current_steps": 6570, "total_steps": 40000, "loss": 0.0763, "lr": 4.674588122178102e-05, "epoch": 1.0717839954319275, "percentage": 16.43, "elapsed_time": "1:29:09", "remaining_time": "7:33:41", "throughput": 2367.68, "total_tokens": 12666640} {"current_steps": 6575, "total_steps": 40000, "loss": 0.0691, "lr": 4.674103616830931e-05, "epoch": 1.0725997226527448, "percentage": 16.44, "elapsed_time": "1:29:11", "remaining_time": "7:33:27", "throughput": 2368.75, "total_tokens": 12677264} {"current_steps": 6580, "total_steps": 40000, "loss": 0.2272, "lr": 4.673618776209663e-05, "epoch": 1.0734154498735622, "percentage": 16.45, "elapsed_time": "1:29:13", "remaining_time": "7:33:12", "throughput": 2369.81, "total_tokens": 12687808} {"current_steps": 6585, "total_steps": 40000, "loss": 0.0615, "lr": 4.673133600389063e-05, "epoch": 1.0742311770943795, "percentage": 16.46, "elapsed_time": "1:29:16", "remaining_time": "7:32:58", "throughput": 2370.61, "total_tokens": 12697024} {"current_steps": 6590, "total_steps": 40000, "loss": 0.0827, "lr": 4.672648089443953e-05, "epoch": 1.075046904315197, "percentage": 16.48, "elapsed_time": "1:29:18", "remaining_time": "7:32:44", "throughput": 2371.43, "total_tokens": 12706304} {"current_steps": 6595, "total_steps": 40000, "loss": 0.2231, "lr": 4.672162243449204e-05, "epoch": 1.0758626315360145, "percentage": 16.49, "elapsed_time": "1:29:20", "remaining_time": "7:32:30", "throughput": 2372.48, "total_tokens": 12716768} {"current_steps": 6600, "total_steps": 40000, "loss": 0.0982, "lr": 4.67167606247974e-05, "epoch": 1.0766783587568316, "percentage": 16.5, "elapsed_time": "1:29:22", "remaining_time": "7:32:15", "throughput": 2373.35, "total_tokens": 12726352} {"current_steps": 6600, "total_steps": 40000, "eval_loss": 0.14559750258922577, "epoch": 1.0766783587568316, "percentage": 16.5, "elapsed_time": "1:30:42", "remaining_time": "7:39:03", "throughput": 2338.22, "total_tokens": 12726352} {"current_steps": 6605, "total_steps": 40000, "loss": 0.0293, "lr": 4.671189546610536e-05, "epoch": 1.0774940859776492, "percentage": 16.51, "elapsed_time": "1:30:46", "remaining_time": "7:38:58", "throughput": 2338.43, "total_tokens": 12736480} {"current_steps": 6610, "total_steps": 40000, "loss": 0.0638, "lr": 4.67070269591662e-05, "epoch": 1.0783098131984665, "percentage": 16.53, "elapsed_time": "1:30:48", "remaining_time": "7:38:43", "throughput": 2339.29, "total_tokens": 12745984} {"current_steps": 6615, "total_steps": 40000, "loss": 0.0554, "lr": 4.670215510473068e-05, "epoch": 1.0791255404192839, "percentage": 16.54, "elapsed_time": "1:30:50", "remaining_time": "7:38:28", "throughput": 2340.23, "total_tokens": 12755920} {"current_steps": 6620, "total_steps": 40000, "loss": 0.0915, "lr": 4.669727990355013e-05, "epoch": 1.0799412676401012, "percentage": 16.55, "elapsed_time": "1:30:52", "remaining_time": "7:38:14", "throughput": 2341.08, "total_tokens": 12765376} {"current_steps": 6625, "total_steps": 40000, "loss": 0.1013, "lr": 4.669240135637635e-05, "epoch": 1.0807569948609186, "percentage": 16.56, "elapsed_time": "1:30:54", "remaining_time": "7:38:00", "throughput": 2341.73, "total_tokens": 12773776} {"current_steps": 6630, "total_steps": 40000, "loss": 0.1489, "lr": 4.6687519463961675e-05, "epoch": 1.081572722081736, "percentage": 16.57, "elapsed_time": "1:30:56", "remaining_time": "7:37:45", "throughput": 2342.77, "total_tokens": 12784288} {"current_steps": 6635, "total_steps": 40000, "loss": 0.0835, "lr": 4.668263422705896e-05, "epoch": 1.0823884493025533, "percentage": 16.59, "elapsed_time": "1:30:58", "remaining_time": "7:37:31", "throughput": 2343.53, "total_tokens": 12793264} {"current_steps": 6640, "total_steps": 40000, "loss": 0.1338, "lr": 4.667774564642156e-05, "epoch": 1.0832041765233706, "percentage": 16.6, "elapsed_time": "1:31:01", "remaining_time": "7:37:16", "throughput": 2344.19, "total_tokens": 12801728} {"current_steps": 6645, "total_steps": 40000, "loss": 0.1104, "lr": 4.6672853722803365e-05, "epoch": 1.084019903744188, "percentage": 16.61, "elapsed_time": "1:31:03", "remaining_time": "7:37:02", "throughput": 2345.18, "total_tokens": 12811952} {"current_steps": 6650, "total_steps": 40000, "loss": 0.0771, "lr": 4.666795845695877e-05, "epoch": 1.0848356309650053, "percentage": 16.62, "elapsed_time": "1:31:05", "remaining_time": "7:36:48", "throughput": 2346.06, "total_tokens": 12821632} {"current_steps": 6655, "total_steps": 40000, "loss": 0.202, "lr": 4.666305984964269e-05, "epoch": 1.0856513581858227, "percentage": 16.64, "elapsed_time": "1:31:07", "remaining_time": "7:36:33", "throughput": 2346.86, "total_tokens": 12830832} {"current_steps": 6660, "total_steps": 40000, "loss": 0.3061, "lr": 4.6658157901610535e-05, "epoch": 1.08646708540664, "percentage": 16.65, "elapsed_time": "1:31:09", "remaining_time": "7:36:19", "throughput": 2347.93, "total_tokens": 12841520} {"current_steps": 6665, "total_steps": 40000, "loss": 0.1183, "lr": 4.665325261361826e-05, "epoch": 1.0872828126274574, "percentage": 16.66, "elapsed_time": "1:31:11", "remaining_time": "7:36:04", "throughput": 2349.06, "total_tokens": 12852560} {"current_steps": 6670, "total_steps": 40000, "loss": 0.1435, "lr": 4.664834398642232e-05, "epoch": 1.0880985398482748, "percentage": 16.68, "elapsed_time": "1:31:13", "remaining_time": "7:35:50", "throughput": 2349.89, "total_tokens": 12861952} {"current_steps": 6675, "total_steps": 40000, "loss": 0.1147, "lr": 4.6643432020779686e-05, "epoch": 1.0889142670690921, "percentage": 16.69, "elapsed_time": "1:31:15", "remaining_time": "7:35:36", "throughput": 2350.76, "total_tokens": 12871600} {"current_steps": 6680, "total_steps": 40000, "loss": 0.1058, "lr": 4.663851671744786e-05, "epoch": 1.0897299942899095, "percentage": 16.7, "elapsed_time": "1:31:17", "remaining_time": "7:35:22", "throughput": 2351.69, "total_tokens": 12881520} {"current_steps": 6685, "total_steps": 40000, "loss": 0.0975, "lr": 4.6633598077184815e-05, "epoch": 1.0905457215107268, "percentage": 16.71, "elapsed_time": "1:31:19", "remaining_time": "7:35:08", "throughput": 2352.41, "total_tokens": 12890368} {"current_steps": 6690, "total_steps": 40000, "loss": 0.1462, "lr": 4.662867610074908e-05, "epoch": 1.0913614487315442, "percentage": 16.73, "elapsed_time": "1:31:21", "remaining_time": "7:34:53", "throughput": 2353.36, "total_tokens": 12900384} {"current_steps": 6695, "total_steps": 40000, "loss": 0.0568, "lr": 4.6623750788899696e-05, "epoch": 1.0921771759523615, "percentage": 16.74, "elapsed_time": "1:31:23", "remaining_time": "7:34:39", "throughput": 2354.05, "total_tokens": 12909056} {"current_steps": 6700, "total_steps": 40000, "loss": 0.151, "lr": 4.6618822142396195e-05, "epoch": 1.0929929031731789, "percentage": 16.75, "elapsed_time": "1:31:25", "remaining_time": "7:34:25", "throughput": 2355.0, "total_tokens": 12919088} {"current_steps": 6705, "total_steps": 40000, "loss": 0.1348, "lr": 4.661389016199864e-05, "epoch": 1.0938086303939962, "percentage": 16.76, "elapsed_time": "1:31:27", "remaining_time": "7:34:11", "throughput": 2356.01, "total_tokens": 12929504} {"current_steps": 6710, "total_steps": 40000, "loss": 0.0942, "lr": 4.660895484846761e-05, "epoch": 1.0946243576148136, "percentage": 16.78, "elapsed_time": "1:31:29", "remaining_time": "7:33:57", "throughput": 2356.94, "total_tokens": 12939520} {"current_steps": 6715, "total_steps": 40000, "loss": 0.0526, "lr": 4.660401620256418e-05, "epoch": 1.095440084835631, "percentage": 16.79, "elapsed_time": "1:31:32", "remaining_time": "7:33:42", "throughput": 2357.75, "total_tokens": 12948832} {"current_steps": 6720, "total_steps": 40000, "loss": 0.0351, "lr": 4.659907422504997e-05, "epoch": 1.0962558120564483, "percentage": 16.8, "elapsed_time": "1:31:34", "remaining_time": "7:33:28", "throughput": 2358.57, "total_tokens": 12958176} {"current_steps": 6725, "total_steps": 40000, "loss": 0.1264, "lr": 4.6594128916687074e-05, "epoch": 1.0970715392772656, "percentage": 16.81, "elapsed_time": "1:31:36", "remaining_time": "7:33:14", "throughput": 2359.32, "total_tokens": 12967168} {"current_steps": 6730, "total_steps": 40000, "loss": 0.0867, "lr": 4.658918027823813e-05, "epoch": 1.097887266498083, "percentage": 16.83, "elapsed_time": "1:31:38", "remaining_time": "7:33:00", "throughput": 2360.06, "total_tokens": 12976096} {"current_steps": 6735, "total_steps": 40000, "loss": 0.0217, "lr": 4.658422831046628e-05, "epoch": 1.0987029937189003, "percentage": 16.84, "elapsed_time": "1:31:40", "remaining_time": "7:32:46", "throughput": 2360.77, "total_tokens": 12984880} {"current_steps": 6740, "total_steps": 40000, "loss": 0.0783, "lr": 4.657927301413518e-05, "epoch": 1.0995187209397177, "percentage": 16.85, "elapsed_time": "1:31:42", "remaining_time": "7:32:32", "throughput": 2361.76, "total_tokens": 12995232} {"current_steps": 6745, "total_steps": 40000, "loss": 0.107, "lr": 4.657431439000901e-05, "epoch": 1.100334448160535, "percentage": 16.86, "elapsed_time": "1:31:44", "remaining_time": "7:32:18", "throughput": 2362.21, "total_tokens": 13002576} {"current_steps": 6750, "total_steps": 40000, "loss": 0.1428, "lr": 4.656935243885243e-05, "epoch": 1.1011501753813524, "percentage": 16.88, "elapsed_time": "1:31:46", "remaining_time": "7:32:04", "throughput": 2362.95, "total_tokens": 13011584} {"current_steps": 6755, "total_steps": 40000, "loss": 0.0194, "lr": 4.656438716143066e-05, "epoch": 1.1019659026021698, "percentage": 16.89, "elapsed_time": "1:31:48", "remaining_time": "7:31:50", "throughput": 2364.01, "total_tokens": 13022288} {"current_steps": 6760, "total_steps": 40000, "loss": 0.119, "lr": 4.6559418558509384e-05, "epoch": 1.102781629822987, "percentage": 16.9, "elapsed_time": "1:31:50", "remaining_time": "7:31:36", "throughput": 2364.89, "total_tokens": 13032000} {"current_steps": 6765, "total_steps": 40000, "loss": 0.1655, "lr": 4.6554446630854833e-05, "epoch": 1.1035973570438045, "percentage": 16.91, "elapsed_time": "1:31:52", "remaining_time": "7:31:22", "throughput": 2365.47, "total_tokens": 13040048} {"current_steps": 6770, "total_steps": 40000, "loss": 0.1255, "lr": 4.654947137923374e-05, "epoch": 1.1044130842646218, "percentage": 16.93, "elapsed_time": "1:31:54", "remaining_time": "7:31:08", "throughput": 2366.25, "total_tokens": 13049248} {"current_steps": 6775, "total_steps": 40000, "loss": 0.1434, "lr": 4.654449280441335e-05, "epoch": 1.1052288114854392, "percentage": 16.94, "elapsed_time": "1:31:56", "remaining_time": "7:30:54", "throughput": 2366.89, "total_tokens": 13057616} {"current_steps": 6780, "total_steps": 40000, "loss": 0.0835, "lr": 4.653951090716143e-05, "epoch": 1.1060445387062567, "percentage": 16.95, "elapsed_time": "1:31:58", "remaining_time": "7:30:40", "throughput": 2367.58, "total_tokens": 13066320} {"current_steps": 6785, "total_steps": 40000, "loss": 0.2111, "lr": 4.653452568824625e-05, "epoch": 1.1068602659270739, "percentage": 16.96, "elapsed_time": "1:32:00", "remaining_time": "7:30:26", "throughput": 2368.39, "total_tokens": 13075696} {"current_steps": 6790, "total_steps": 40000, "loss": 0.0807, "lr": 4.6529537148436585e-05, "epoch": 1.1076759931478914, "percentage": 16.98, "elapsed_time": "1:32:02", "remaining_time": "7:30:12", "throughput": 2369.35, "total_tokens": 13085872} {"current_steps": 6795, "total_steps": 40000, "loss": 0.1779, "lr": 4.6524545288501734e-05, "epoch": 1.1084917203687088, "percentage": 16.99, "elapsed_time": "1:32:05", "remaining_time": "7:29:59", "throughput": 2370.16, "total_tokens": 13095248} {"current_steps": 6800, "total_steps": 40000, "loss": 0.2651, "lr": 4.6519550109211506e-05, "epoch": 1.1093074475895262, "percentage": 17.0, "elapsed_time": "1:32:07", "remaining_time": "7:29:45", "throughput": 2371.08, "total_tokens": 13105200} {"current_steps": 6800, "total_steps": 40000, "eval_loss": 0.1489081233739853, "epoch": 1.1093074475895262, "percentage": 17.0, "elapsed_time": "1:33:27", "remaining_time": "7:36:18", "throughput": 2337.04, "total_tokens": 13105200} {"current_steps": 6805, "total_steps": 40000, "loss": 0.1436, "lr": 4.651455161133622e-05, "epoch": 1.1101231748103435, "percentage": 17.01, "elapsed_time": "1:33:31", "remaining_time": "7:36:11", "throughput": 2337.08, "total_tokens": 13113760} {"current_steps": 6810, "total_steps": 40000, "loss": 0.0835, "lr": 4.6509549795646704e-05, "epoch": 1.1109389020311609, "percentage": 17.03, "elapsed_time": "1:33:33", "remaining_time": "7:35:57", "throughput": 2338.17, "total_tokens": 13124768} {"current_steps": 6815, "total_steps": 40000, "loss": 0.1966, "lr": 4.6504544662914306e-05, "epoch": 1.1117546292519782, "percentage": 17.04, "elapsed_time": "1:33:35", "remaining_time": "7:35:43", "throughput": 2339.07, "total_tokens": 13134608} {"current_steps": 6820, "total_steps": 40000, "loss": 0.0714, "lr": 4.6499536213910876e-05, "epoch": 1.1125703564727956, "percentage": 17.05, "elapsed_time": "1:33:37", "remaining_time": "7:35:29", "throughput": 2339.88, "total_tokens": 13144000} {"current_steps": 6825, "total_steps": 40000, "loss": 0.1044, "lr": 4.6494524449408786e-05, "epoch": 1.113386083693613, "percentage": 17.06, "elapsed_time": "1:33:39", "remaining_time": "7:35:15", "throughput": 2340.61, "total_tokens": 13152928} {"current_steps": 6830, "total_steps": 40000, "loss": 0.0539, "lr": 4.6489509370180903e-05, "epoch": 1.1142018109144303, "percentage": 17.08, "elapsed_time": "1:33:41", "remaining_time": "7:35:00", "throughput": 2341.41, "total_tokens": 13162288} {"current_steps": 6835, "total_steps": 40000, "loss": 0.0782, "lr": 4.648449097700063e-05, "epoch": 1.1150175381352476, "percentage": 17.09, "elapsed_time": "1:33:43", "remaining_time": "7:34:46", "throughput": 2341.96, "total_tokens": 13170192} {"current_steps": 6840, "total_steps": 40000, "loss": 0.042, "lr": 4.647946927064185e-05, "epoch": 1.115833265356065, "percentage": 17.1, "elapsed_time": "1:33:45", "remaining_time": "7:34:32", "throughput": 2342.77, "total_tokens": 13179584} {"current_steps": 6845, "total_steps": 40000, "loss": 0.0439, "lr": 4.647444425187898e-05, "epoch": 1.1166489925768823, "percentage": 17.11, "elapsed_time": "1:33:47", "remaining_time": "7:34:18", "throughput": 2343.51, "total_tokens": 13188624} {"current_steps": 6850, "total_steps": 40000, "loss": 0.1835, "lr": 4.646941592148695e-05, "epoch": 1.1174647197976997, "percentage": 17.12, "elapsed_time": "1:33:49", "remaining_time": "7:34:04", "throughput": 2344.48, "total_tokens": 13198944} {"current_steps": 6855, "total_steps": 40000, "loss": 0.1292, "lr": 4.646438428024117e-05, "epoch": 1.118280447018517, "percentage": 17.14, "elapsed_time": "1:33:51", "remaining_time": "7:33:50", "throughput": 2345.26, "total_tokens": 13208176} {"current_steps": 6860, "total_steps": 40000, "loss": 0.2033, "lr": 4.64593493289176e-05, "epoch": 1.1190961742393344, "percentage": 17.15, "elapsed_time": "1:33:53", "remaining_time": "7:33:36", "throughput": 2346.15, "total_tokens": 13218000} {"current_steps": 6865, "total_steps": 40000, "loss": 0.0712, "lr": 4.64543110682927e-05, "epoch": 1.1199119014601517, "percentage": 17.16, "elapsed_time": "1:33:55", "remaining_time": "7:33:22", "throughput": 2347.05, "total_tokens": 13227952} {"current_steps": 6870, "total_steps": 40000, "loss": 0.1364, "lr": 4.644926949914341e-05, "epoch": 1.120727628680969, "percentage": 17.18, "elapsed_time": "1:33:58", "remaining_time": "7:33:08", "throughput": 2348.06, "total_tokens": 13238480} {"current_steps": 6875, "total_steps": 40000, "loss": 0.1567, "lr": 4.644422462224722e-05, "epoch": 1.1215433559017864, "percentage": 17.19, "elapsed_time": "1:34:00", "remaining_time": "7:32:55", "throughput": 2348.8, "total_tokens": 13247488} {"current_steps": 6880, "total_steps": 40000, "loss": 0.0279, "lr": 4.643917643838211e-05, "epoch": 1.1223590831226038, "percentage": 17.2, "elapsed_time": "1:34:02", "remaining_time": "7:32:41", "throughput": 2349.45, "total_tokens": 13256000} {"current_steps": 6885, "total_steps": 40000, "loss": 0.0921, "lr": 4.6434124948326564e-05, "epoch": 1.1231748103434211, "percentage": 17.21, "elapsed_time": "1:34:04", "remaining_time": "7:32:27", "throughput": 2350.27, "total_tokens": 13265488} {"current_steps": 6890, "total_steps": 40000, "loss": 0.2564, "lr": 4.6429070152859594e-05, "epoch": 1.1239905375642385, "percentage": 17.22, "elapsed_time": "1:34:06", "remaining_time": "7:32:13", "throughput": 2350.94, "total_tokens": 13274128} {"current_steps": 6895, "total_steps": 40000, "loss": 0.1562, "lr": 4.6424012052760714e-05, "epoch": 1.1248062647850559, "percentage": 17.24, "elapsed_time": "1:34:08", "remaining_time": "7:31:59", "throughput": 2351.89, "total_tokens": 13284384} {"current_steps": 6900, "total_steps": 40000, "loss": 0.091, "lr": 4.6418950648809945e-05, "epoch": 1.1256219920058732, "percentage": 17.25, "elapsed_time": "1:34:10", "remaining_time": "7:31:45", "throughput": 2352.84, "total_tokens": 13294560} {"current_steps": 6905, "total_steps": 40000, "loss": 0.1662, "lr": 4.641388594178782e-05, "epoch": 1.1264377192266906, "percentage": 17.26, "elapsed_time": "1:34:12", "remaining_time": "7:31:31", "throughput": 2353.76, "total_tokens": 13304656} {"current_steps": 6910, "total_steps": 40000, "loss": 0.106, "lr": 4.640881793247538e-05, "epoch": 1.127253446447508, "percentage": 17.27, "elapsed_time": "1:34:14", "remaining_time": "7:31:18", "throughput": 2354.78, "total_tokens": 13315296} {"current_steps": 6915, "total_steps": 40000, "loss": 0.0562, "lr": 4.6403746621654173e-05, "epoch": 1.1280691736683253, "percentage": 17.29, "elapsed_time": "1:34:16", "remaining_time": "7:31:04", "throughput": 2355.66, "total_tokens": 13325152} {"current_steps": 6920, "total_steps": 40000, "loss": 0.2027, "lr": 4.639867201010626e-05, "epoch": 1.1288849008891426, "percentage": 17.3, "elapsed_time": "1:34:18", "remaining_time": "7:30:50", "throughput": 2356.32, "total_tokens": 13333728} {"current_steps": 6925, "total_steps": 40000, "loss": 0.1546, "lr": 4.6393594098614204e-05, "epoch": 1.12970062810996, "percentage": 17.31, "elapsed_time": "1:34:20", "remaining_time": "7:30:36", "throughput": 2357.07, "total_tokens": 13342880} {"current_steps": 6930, "total_steps": 40000, "loss": 0.1402, "lr": 4.63885128879611e-05, "epoch": 1.1305163553307773, "percentage": 17.32, "elapsed_time": "1:34:22", "remaining_time": "7:30:23", "throughput": 2357.76, "total_tokens": 13351616} {"current_steps": 6935, "total_steps": 40000, "loss": 0.2242, "lr": 4.638342837893052e-05, "epoch": 1.1313320825515947, "percentage": 17.34, "elapsed_time": "1:34:24", "remaining_time": "7:30:09", "throughput": 2358.71, "total_tokens": 13361872} {"current_steps": 6940, "total_steps": 40000, "loss": 0.2235, "lr": 4.6378340572306565e-05, "epoch": 1.132147809772412, "percentage": 17.35, "elapsed_time": "1:34:26", "remaining_time": "7:29:55", "throughput": 2359.34, "total_tokens": 13370336} {"current_steps": 6945, "total_steps": 40000, "loss": 0.0861, "lr": 4.6373249468873833e-05, "epoch": 1.1329635369932294, "percentage": 17.36, "elapsed_time": "1:34:29", "remaining_time": "7:29:42", "throughput": 2359.96, "total_tokens": 13378720} {"current_steps": 6950, "total_steps": 40000, "loss": 0.1003, "lr": 4.636815506941744e-05, "epoch": 1.1337792642140467, "percentage": 17.38, "elapsed_time": "1:34:31", "remaining_time": "7:29:28", "throughput": 2360.86, "total_tokens": 13388720} {"current_steps": 6955, "total_steps": 40000, "loss": 0.0975, "lr": 4.6363057374723004e-05, "epoch": 1.134594991434864, "percentage": 17.39, "elapsed_time": "1:34:33", "remaining_time": "7:29:14", "throughput": 2361.69, "total_tokens": 13398304} {"current_steps": 6960, "total_steps": 40000, "loss": 0.0622, "lr": 4.635795638557666e-05, "epoch": 1.1354107186556814, "percentage": 17.4, "elapsed_time": "1:34:35", "remaining_time": "7:29:01", "throughput": 2362.6, "total_tokens": 13408368} {"current_steps": 6965, "total_steps": 40000, "loss": 0.1307, "lr": 4.635285210276504e-05, "epoch": 1.136226445876499, "percentage": 17.41, "elapsed_time": "1:34:37", "remaining_time": "7:28:47", "throughput": 2363.53, "total_tokens": 13418512} {"current_steps": 6970, "total_steps": 40000, "loss": 0.0888, "lr": 4.6347744527075295e-05, "epoch": 1.1370421730973161, "percentage": 17.42, "elapsed_time": "1:34:39", "remaining_time": "7:28:33", "throughput": 2364.22, "total_tokens": 13427296} {"current_steps": 6975, "total_steps": 40000, "loss": 0.0698, "lr": 4.634263365929506e-05, "epoch": 1.1378579003181337, "percentage": 17.44, "elapsed_time": "1:34:41", "remaining_time": "7:28:20", "throughput": 2364.76, "total_tokens": 13435264} {"current_steps": 6980, "total_steps": 40000, "loss": 0.1668, "lr": 4.6337519500212515e-05, "epoch": 1.1386736275389508, "percentage": 17.45, "elapsed_time": "1:34:43", "remaining_time": "7:28:06", "throughput": 2365.54, "total_tokens": 13444608} {"current_steps": 6985, "total_steps": 40000, "loss": 0.0603, "lr": 4.633240205061632e-05, "epoch": 1.1394893547597684, "percentage": 17.46, "elapsed_time": "1:34:45", "remaining_time": "7:27:53", "throughput": 2366.43, "total_tokens": 13454528} {"current_steps": 6990, "total_steps": 40000, "loss": 0.092, "lr": 4.632728131129565e-05, "epoch": 1.1403050819805858, "percentage": 17.47, "elapsed_time": "1:34:47", "remaining_time": "7:27:39", "throughput": 2367.24, "total_tokens": 13464016} {"current_steps": 6995, "total_steps": 40000, "loss": 0.1711, "lr": 4.632215728304018e-05, "epoch": 1.1411208092014031, "percentage": 17.49, "elapsed_time": "1:34:49", "remaining_time": "7:27:26", "throughput": 2368.04, "total_tokens": 13473488} {"current_steps": 7000, "total_steps": 40000, "loss": 0.106, "lr": 4.63170299666401e-05, "epoch": 1.1419365364222205, "percentage": 17.5, "elapsed_time": "1:34:51", "remaining_time": "7:27:12", "throughput": 2368.97, "total_tokens": 13483648} {"current_steps": 7000, "total_steps": 40000, "eval_loss": 0.14278674125671387, "epoch": 1.1419365364222205, "percentage": 17.5, "elapsed_time": "1:36:12", "remaining_time": "7:33:32", "throughput": 2335.92, "total_tokens": 13483648} {"current_steps": 7005, "total_steps": 40000, "loss": 0.1229, "lr": 4.631189936288612e-05, "epoch": 1.1427522636430378, "percentage": 17.51, "elapsed_time": "1:36:16", "remaining_time": "7:33:26", "throughput": 2336.0, "total_tokens": 13492992} {"current_steps": 7010, "total_steps": 40000, "loss": 0.1081, "lr": 4.630676547256944e-05, "epoch": 1.1435679908638552, "percentage": 17.52, "elapsed_time": "1:36:18", "remaining_time": "7:33:12", "throughput": 2336.76, "total_tokens": 13502208} {"current_steps": 7015, "total_steps": 40000, "loss": 0.0326, "lr": 4.630162829648176e-05, "epoch": 1.1443837180846725, "percentage": 17.54, "elapsed_time": "1:36:20", "remaining_time": "7:32:59", "throughput": 2337.55, "total_tokens": 13511600} {"current_steps": 7020, "total_steps": 40000, "loss": 0.1669, "lr": 4.629648783541531e-05, "epoch": 1.14519944530549, "percentage": 17.55, "elapsed_time": "1:36:22", "remaining_time": "7:32:45", "throughput": 2338.47, "total_tokens": 13521760} {"current_steps": 7025, "total_steps": 40000, "loss": 0.1906, "lr": 4.6291344090162804e-05, "epoch": 1.1460151725263072, "percentage": 17.56, "elapsed_time": "1:36:24", "remaining_time": "7:32:31", "throughput": 2339.19, "total_tokens": 13530752} {"current_steps": 7030, "total_steps": 40000, "loss": 0.163, "lr": 4.628619706151748e-05, "epoch": 1.1468308997471246, "percentage": 17.57, "elapsed_time": "1:36:26", "remaining_time": "7:32:17", "throughput": 2340.0, "total_tokens": 13540256} {"current_steps": 7035, "total_steps": 40000, "loss": 0.0843, "lr": 4.628104675027306e-05, "epoch": 1.147646626967942, "percentage": 17.59, "elapsed_time": "1:36:28", "remaining_time": "7:32:04", "throughput": 2341.05, "total_tokens": 13551168} {"current_steps": 7040, "total_steps": 40000, "loss": 0.1483, "lr": 4.6275893157223805e-05, "epoch": 1.1484623541887593, "percentage": 17.6, "elapsed_time": "1:36:30", "remaining_time": "7:31:50", "throughput": 2341.84, "total_tokens": 13560608} {"current_steps": 7045, "total_steps": 40000, "loss": 0.1621, "lr": 4.627073628316445e-05, "epoch": 1.1492780814095767, "percentage": 17.61, "elapsed_time": "1:36:32", "remaining_time": "7:31:36", "throughput": 2342.53, "total_tokens": 13569424} {"current_steps": 7050, "total_steps": 40000, "loss": 0.0491, "lr": 4.626557612889026e-05, "epoch": 1.150093808630394, "percentage": 17.62, "elapsed_time": "1:36:34", "remaining_time": "7:31:23", "throughput": 2343.22, "total_tokens": 13578272} {"current_steps": 7055, "total_steps": 40000, "loss": 0.1151, "lr": 4.626041269519699e-05, "epoch": 1.1509095358512114, "percentage": 17.64, "elapsed_time": "1:36:36", "remaining_time": "7:31:09", "throughput": 2344.1, "total_tokens": 13588192} {"current_steps": 7060, "total_steps": 40000, "loss": 0.1487, "lr": 4.6255245982880905e-05, "epoch": 1.1517252630720287, "percentage": 17.65, "elapsed_time": "1:36:38", "remaining_time": "7:30:55", "throughput": 2344.78, "total_tokens": 13596976} {"current_steps": 7065, "total_steps": 40000, "loss": 0.0772, "lr": 4.625007599273879e-05, "epoch": 1.152540990292846, "percentage": 17.66, "elapsed_time": "1:36:40", "remaining_time": "7:30:42", "throughput": 2345.85, "total_tokens": 13608016} {"current_steps": 7070, "total_steps": 40000, "loss": 0.1646, "lr": 4.6244902725567895e-05, "epoch": 1.1533567175136634, "percentage": 17.68, "elapsed_time": "1:36:42", "remaining_time": "7:30:28", "throughput": 2346.56, "total_tokens": 13617040} {"current_steps": 7075, "total_steps": 40000, "loss": 0.08, "lr": 4.6239726182166024e-05, "epoch": 1.1541724447344808, "percentage": 17.69, "elapsed_time": "1:36:45", "remaining_time": "7:30:15", "throughput": 2347.24, "total_tokens": 13625856} {"current_steps": 7080, "total_steps": 40000, "loss": 0.1165, "lr": 4.623454636333147e-05, "epoch": 1.1549881719552981, "percentage": 17.7, "elapsed_time": "1:36:47", "remaining_time": "7:30:01", "throughput": 2347.96, "total_tokens": 13634928} {"current_steps": 7085, "total_steps": 40000, "loss": 0.1281, "lr": 4.622936326986301e-05, "epoch": 1.1558038991761155, "percentage": 17.71, "elapsed_time": "1:36:49", "remaining_time": "7:29:47", "throughput": 2348.81, "total_tokens": 13644736} {"current_steps": 7090, "total_steps": 40000, "loss": 0.0678, "lr": 4.6224176902559946e-05, "epoch": 1.1566196263969328, "percentage": 17.72, "elapsed_time": "1:36:51", "remaining_time": "7:29:34", "throughput": 2349.77, "total_tokens": 13655136} {"current_steps": 7095, "total_steps": 40000, "loss": 0.1737, "lr": 4.621898726222209e-05, "epoch": 1.1574353536177502, "percentage": 17.74, "elapsed_time": "1:36:53", "remaining_time": "7:29:20", "throughput": 2350.57, "total_tokens": 13664656} {"current_steps": 7100, "total_steps": 40000, "loss": 0.1494, "lr": 4.6213794349649744e-05, "epoch": 1.1582510808385675, "percentage": 17.75, "elapsed_time": "1:36:55", "remaining_time": "7:29:07", "throughput": 2351.15, "total_tokens": 13672880} {"current_steps": 7105, "total_steps": 40000, "loss": 0.0834, "lr": 4.6208598165643715e-05, "epoch": 1.159066808059385, "percentage": 17.76, "elapsed_time": "1:36:57", "remaining_time": "7:28:53", "throughput": 2351.92, "total_tokens": 13682240} {"current_steps": 7110, "total_steps": 40000, "loss": 0.0493, "lr": 4.620339871100533e-05, "epoch": 1.1598825352802022, "percentage": 17.77, "elapsed_time": "1:36:59", "remaining_time": "7:28:40", "throughput": 2352.55, "total_tokens": 13690720} {"current_steps": 7115, "total_steps": 40000, "loss": 0.0234, "lr": 4.6198195986536394e-05, "epoch": 1.1606982625010196, "percentage": 17.79, "elapsed_time": "1:37:01", "remaining_time": "7:28:27", "throughput": 2353.28, "total_tokens": 13699888} {"current_steps": 7120, "total_steps": 40000, "loss": 0.0307, "lr": 4.619298999303926e-05, "epoch": 1.161513989721837, "percentage": 17.8, "elapsed_time": "1:37:03", "remaining_time": "7:28:13", "throughput": 2354.07, "total_tokens": 13709360} {"current_steps": 7125, "total_steps": 40000, "loss": 0.1643, "lr": 4.618778073131673e-05, "epoch": 1.1623297169426543, "percentage": 17.81, "elapsed_time": "1:37:05", "remaining_time": "7:28:00", "throughput": 2354.9, "total_tokens": 13719040} {"current_steps": 7130, "total_steps": 40000, "loss": 0.2368, "lr": 4.618256820217215e-05, "epoch": 1.1631454441634717, "percentage": 17.82, "elapsed_time": "1:37:07", "remaining_time": "7:27:46", "throughput": 2355.83, "total_tokens": 13729328} {"current_steps": 7135, "total_steps": 40000, "loss": 0.1419, "lr": 4.617735240640936e-05, "epoch": 1.163961171384289, "percentage": 17.84, "elapsed_time": "1:37:09", "remaining_time": "7:27:33", "throughput": 2356.79, "total_tokens": 13739792} {"current_steps": 7140, "total_steps": 40000, "loss": 0.1923, "lr": 4.6172133344832705e-05, "epoch": 1.1647768986051064, "percentage": 17.85, "elapsed_time": "1:37:11", "remaining_time": "7:27:19", "throughput": 2357.52, "total_tokens": 13748912} {"current_steps": 7145, "total_steps": 40000, "loss": 0.1105, "lr": 4.6166911018247004e-05, "epoch": 1.1655926258259237, "percentage": 17.86, "elapsed_time": "1:37:13", "remaining_time": "7:27:06", "throughput": 2358.36, "total_tokens": 13758656} {"current_steps": 7150, "total_steps": 40000, "loss": 0.1382, "lr": 4.616168542745764e-05, "epoch": 1.1664083530467413, "percentage": 17.88, "elapsed_time": "1:37:16", "remaining_time": "7:26:53", "throughput": 2359.28, "total_tokens": 13768944} {"current_steps": 7155, "total_steps": 40000, "loss": 0.1767, "lr": 4.6156456573270446e-05, "epoch": 1.1672240802675584, "percentage": 17.89, "elapsed_time": "1:37:18", "remaining_time": "7:26:39", "throughput": 2359.81, "total_tokens": 13776880} {"current_steps": 7160, "total_steps": 40000, "loss": 0.0638, "lr": 4.615122445649177e-05, "epoch": 1.168039807488376, "percentage": 17.9, "elapsed_time": "1:37:20", "remaining_time": "7:26:26", "throughput": 2360.45, "total_tokens": 13785488} {"current_steps": 7165, "total_steps": 40000, "loss": 0.0321, "lr": 4.6145989077928486e-05, "epoch": 1.1688555347091931, "percentage": 17.91, "elapsed_time": "1:37:22", "remaining_time": "7:26:13", "throughput": 2361.27, "total_tokens": 13795136} {"current_steps": 7170, "total_steps": 40000, "loss": 0.114, "lr": 4.6140750438387953e-05, "epoch": 1.1696712619300107, "percentage": 17.93, "elapsed_time": "1:37:24", "remaining_time": "7:25:59", "throughput": 2362.08, "total_tokens": 13804720} {"current_steps": 7175, "total_steps": 40000, "loss": 0.1322, "lr": 4.613550853867803e-05, "epoch": 1.170486989150828, "percentage": 17.94, "elapsed_time": "1:37:26", "remaining_time": "7:25:46", "throughput": 2362.88, "total_tokens": 13814272} {"current_steps": 7180, "total_steps": 40000, "loss": 0.1005, "lr": 4.613026337960708e-05, "epoch": 1.1713027163716454, "percentage": 17.95, "elapsed_time": "1:37:28", "remaining_time": "7:25:33", "throughput": 2363.81, "total_tokens": 13824624} {"current_steps": 7185, "total_steps": 40000, "loss": 0.0889, "lr": 4.612501496198398e-05, "epoch": 1.1721184435924628, "percentage": 17.96, "elapsed_time": "1:37:30", "remaining_time": "7:25:20", "throughput": 2364.46, "total_tokens": 13833328} {"current_steps": 7190, "total_steps": 40000, "loss": 0.0859, "lr": 4.61197632866181e-05, "epoch": 1.17293417081328, "percentage": 17.97, "elapsed_time": "1:37:32", "remaining_time": "7:25:06", "throughput": 2365.37, "total_tokens": 13843520} {"current_steps": 7195, "total_steps": 40000, "loss": 0.0763, "lr": 4.611450835431931e-05, "epoch": 1.1737498980340975, "percentage": 17.99, "elapsed_time": "1:37:34", "remaining_time": "7:24:53", "throughput": 2366.11, "total_tokens": 13852736} {"current_steps": 7200, "total_steps": 40000, "loss": 0.0809, "lr": 4.6109250165898e-05, "epoch": 1.1745656252549148, "percentage": 18.0, "elapsed_time": "1:37:36", "remaining_time": "7:24:40", "throughput": 2367.0, "total_tokens": 13862816} {"current_steps": 7200, "total_steps": 40000, "eval_loss": 0.14510487020015717, "epoch": 1.1745656252549148, "percentage": 18.0, "elapsed_time": "1:38:57", "remaining_time": "7:30:47", "throughput": 2334.87, "total_tokens": 13862816} {"current_steps": 7205, "total_steps": 40000, "loss": 0.1054, "lr": 4.610398872216503e-05, "epoch": 1.1753813524757322, "percentage": 18.01, "elapsed_time": "1:39:00", "remaining_time": "7:30:40", "throughput": 2335.16, "total_tokens": 13872784} {"current_steps": 7210, "total_steps": 40000, "loss": 0.2095, "lr": 4.6098724023931796e-05, "epoch": 1.1761970796965495, "percentage": 18.02, "elapsed_time": "1:39:02", "remaining_time": "7:30:27", "throughput": 2336.06, "total_tokens": 13883040} {"current_steps": 7215, "total_steps": 40000, "loss": 0.0433, "lr": 4.609345607201017e-05, "epoch": 1.1770128069173669, "percentage": 18.04, "elapsed_time": "1:39:04", "remaining_time": "7:30:14", "throughput": 2336.7, "total_tokens": 13891632} {"current_steps": 7220, "total_steps": 40000, "loss": 0.0949, "lr": 4.608818486721254e-05, "epoch": 1.1778285341381842, "percentage": 18.05, "elapsed_time": "1:39:07", "remaining_time": "7:30:00", "throughput": 2337.45, "total_tokens": 13900928} {"current_steps": 7225, "total_steps": 40000, "loss": 0.2016, "lr": 4.608291041035179e-05, "epoch": 1.1786442613590016, "percentage": 18.06, "elapsed_time": "1:39:09", "remaining_time": "7:29:47", "throughput": 2338.21, "total_tokens": 13910304} {"current_steps": 7230, "total_steps": 40000, "loss": 0.0451, "lr": 4.607763270224132e-05, "epoch": 1.179459988579819, "percentage": 18.07, "elapsed_time": "1:39:11", "remaining_time": "7:29:33", "throughput": 2339.0, "total_tokens": 13919824} {"current_steps": 7235, "total_steps": 40000, "loss": 0.2784, "lr": 4.6072351743695e-05, "epoch": 1.1802757158006363, "percentage": 18.09, "elapsed_time": "1:39:13", "remaining_time": "7:29:20", "throughput": 2339.76, "total_tokens": 13929184} {"current_steps": 7240, "total_steps": 40000, "loss": 0.0823, "lr": 4.606706753552723e-05, "epoch": 1.1810914430214536, "percentage": 18.1, "elapsed_time": "1:39:15", "remaining_time": "7:29:07", "throughput": 2340.72, "total_tokens": 13939744} {"current_steps": 7245, "total_steps": 40000, "loss": 0.1021, "lr": 4.6061780078552906e-05, "epoch": 1.181907170242271, "percentage": 18.11, "elapsed_time": "1:39:17", "remaining_time": "7:28:53", "throughput": 2341.57, "total_tokens": 13949632} {"current_steps": 7250, "total_steps": 40000, "loss": 0.0476, "lr": 4.605648937358742e-05, "epoch": 1.1827228974630883, "percentage": 18.12, "elapsed_time": "1:39:19", "remaining_time": "7:28:40", "throughput": 2342.47, "total_tokens": 13959872} {"current_steps": 7255, "total_steps": 40000, "loss": 0.1647, "lr": 4.605119542144665e-05, "epoch": 1.1835386246839057, "percentage": 18.14, "elapsed_time": "1:39:21", "remaining_time": "7:28:26", "throughput": 2343.28, "total_tokens": 13969520} {"current_steps": 7260, "total_steps": 40000, "loss": 0.1017, "lr": 4.604589822294701e-05, "epoch": 1.184354351904723, "percentage": 18.15, "elapsed_time": "1:39:23", "remaining_time": "7:28:13", "throughput": 2343.84, "total_tokens": 13977680} {"current_steps": 7265, "total_steps": 40000, "loss": 0.2035, "lr": 4.604059777890537e-05, "epoch": 1.1851700791255404, "percentage": 18.16, "elapsed_time": "1:39:25", "remaining_time": "7:28:00", "throughput": 2344.66, "total_tokens": 13987440} {"current_steps": 7270, "total_steps": 40000, "loss": 0.2121, "lr": 4.6035294090139145e-05, "epoch": 1.1859858063463578, "percentage": 18.18, "elapsed_time": "1:39:27", "remaining_time": "7:27:47", "throughput": 2345.45, "total_tokens": 13996976} {"current_steps": 7275, "total_steps": 40000, "loss": 0.1219, "lr": 4.6029987157466226e-05, "epoch": 1.186801533567175, "percentage": 18.19, "elapsed_time": "1:39:29", "remaining_time": "7:27:33", "throughput": 2346.39, "total_tokens": 14007424} {"current_steps": 7280, "total_steps": 40000, "loss": 0.1346, "lr": 4.602467698170502e-05, "epoch": 1.1876172607879925, "percentage": 18.2, "elapsed_time": "1:39:31", "remaining_time": "7:27:20", "throughput": 2347.53, "total_tokens": 14019120} {"current_steps": 7285, "total_steps": 40000, "loss": 0.2016, "lr": 4.601936356367439e-05, "epoch": 1.1884329880088098, "percentage": 18.21, "elapsed_time": "1:39:33", "remaining_time": "7:27:07", "throughput": 2348.37, "total_tokens": 14028976} {"current_steps": 7290, "total_steps": 40000, "loss": 0.1076, "lr": 4.601404690419377e-05, "epoch": 1.1892487152296272, "percentage": 18.22, "elapsed_time": "1:39:35", "remaining_time": "7:26:54", "throughput": 2349.27, "total_tokens": 14039200} {"current_steps": 7295, "total_steps": 40000, "loss": 0.1731, "lr": 4.600872700408303e-05, "epoch": 1.1900644424504445, "percentage": 18.24, "elapsed_time": "1:39:38", "remaining_time": "7:26:40", "throughput": 2349.93, "total_tokens": 14047984} {"current_steps": 7300, "total_steps": 40000, "loss": 0.2404, "lr": 4.600340386416258e-05, "epoch": 1.1908801696712619, "percentage": 18.25, "elapsed_time": "1:39:40", "remaining_time": "7:26:27", "throughput": 2350.76, "total_tokens": 14057792} {"current_steps": 7305, "total_steps": 40000, "loss": 0.1837, "lr": 4.5998077485253296e-05, "epoch": 1.1916958968920792, "percentage": 18.26, "elapsed_time": "1:39:42", "remaining_time": "7:26:14", "throughput": 2351.33, "total_tokens": 14066048} {"current_steps": 7310, "total_steps": 40000, "loss": 0.1399, "lr": 4.59927478681766e-05, "epoch": 1.1925116241128966, "percentage": 18.27, "elapsed_time": "1:39:44", "remaining_time": "7:26:01", "throughput": 2352.13, "total_tokens": 14075744} {"current_steps": 7315, "total_steps": 40000, "loss": 0.1053, "lr": 4.5987415013754366e-05, "epoch": 1.193327351333714, "percentage": 18.29, "elapsed_time": "1:39:46", "remaining_time": "7:25:48", "throughput": 2353.07, "total_tokens": 14086224} {"current_steps": 7320, "total_steps": 40000, "loss": 0.0842, "lr": 4.598207892280899e-05, "epoch": 1.1941430785545313, "percentage": 18.3, "elapsed_time": "1:39:48", "remaining_time": "7:25:35", "throughput": 2354.18, "total_tokens": 14097744} {"current_steps": 7325, "total_steps": 40000, "loss": 0.0911, "lr": 4.597673959616337e-05, "epoch": 1.1949588057753486, "percentage": 18.31, "elapsed_time": "1:39:50", "remaining_time": "7:25:21", "throughput": 2354.87, "total_tokens": 14106752} {"current_steps": 7330, "total_steps": 40000, "loss": 0.1206, "lr": 4.597139703464089e-05, "epoch": 1.195774532996166, "percentage": 18.32, "elapsed_time": "1:39:52", "remaining_time": "7:25:08", "throughput": 2355.65, "total_tokens": 14116256} {"current_steps": 7335, "total_steps": 40000, "loss": 0.0822, "lr": 4.596605123906545e-05, "epoch": 1.1965902602169836, "percentage": 18.34, "elapsed_time": "1:39:54", "remaining_time": "7:24:55", "throughput": 2356.56, "total_tokens": 14126608} {"current_steps": 7340, "total_steps": 40000, "loss": 0.1049, "lr": 4.596070221026143e-05, "epoch": 1.1974059874378007, "percentage": 18.35, "elapsed_time": "1:39:56", "remaining_time": "7:24:42", "throughput": 2357.33, "total_tokens": 14136064} {"current_steps": 7345, "total_steps": 40000, "loss": 0.0729, "lr": 4.595534994905372e-05, "epoch": 1.1982217146586183, "percentage": 18.36, "elapsed_time": "1:39:58", "remaining_time": "7:24:29", "throughput": 2358.2, "total_tokens": 14146160} {"current_steps": 7350, "total_steps": 40000, "loss": 0.0456, "lr": 4.594999445626771e-05, "epoch": 1.1990374418794354, "percentage": 18.38, "elapsed_time": "1:40:00", "remaining_time": "7:24:16", "throughput": 2359.0, "total_tokens": 14155888} {"current_steps": 7355, "total_steps": 40000, "loss": 0.1298, "lr": 4.5944635732729276e-05, "epoch": 1.199853169100253, "percentage": 18.39, "elapsed_time": "1:40:02", "remaining_time": "7:24:03", "throughput": 2359.69, "total_tokens": 14164912} {"current_steps": 7360, "total_steps": 40000, "loss": 0.1426, "lr": 4.5939273779264804e-05, "epoch": 1.2006688963210703, "percentage": 18.4, "elapsed_time": "1:40:04", "remaining_time": "7:23:50", "throughput": 2360.41, "total_tokens": 14174096} {"current_steps": 7365, "total_steps": 40000, "loss": 0.0606, "lr": 4.593390859670118e-05, "epoch": 1.2014846235418877, "percentage": 18.41, "elapsed_time": "1:40:07", "remaining_time": "7:23:37", "throughput": 2361.2, "total_tokens": 14183760} {"current_steps": 7370, "total_steps": 40000, "loss": 0.2541, "lr": 4.5928540185865776e-05, "epoch": 1.202300350762705, "percentage": 18.43, "elapsed_time": "1:40:09", "remaining_time": "7:23:24", "throughput": 2361.88, "total_tokens": 14192704} {"current_steps": 7375, "total_steps": 40000, "loss": 0.1973, "lr": 4.592316854758648e-05, "epoch": 1.2031160779835224, "percentage": 18.44, "elapsed_time": "1:40:11", "remaining_time": "7:23:11", "throughput": 2362.74, "total_tokens": 14202800} {"current_steps": 7380, "total_steps": 40000, "loss": 0.0176, "lr": 4.5917793682691646e-05, "epoch": 1.2039318052043397, "percentage": 18.45, "elapsed_time": "1:40:13", "remaining_time": "7:22:58", "throughput": 2363.54, "total_tokens": 14212480} {"current_steps": 7385, "total_steps": 40000, "loss": 0.1886, "lr": 4.5912415592010164e-05, "epoch": 1.204747532425157, "percentage": 18.46, "elapsed_time": "1:40:15", "remaining_time": "7:22:45", "throughput": 2364.25, "total_tokens": 14221648} {"current_steps": 7390, "total_steps": 40000, "loss": 0.1906, "lr": 4.5907034276371386e-05, "epoch": 1.2055632596459744, "percentage": 18.48, "elapsed_time": "1:40:17", "remaining_time": "7:22:32", "throughput": 2365.1, "total_tokens": 14231680} {"current_steps": 7395, "total_steps": 40000, "loss": 0.0406, "lr": 4.5901649736605196e-05, "epoch": 1.2063789868667918, "percentage": 18.49, "elapsed_time": "1:40:19", "remaining_time": "7:22:19", "throughput": 2365.98, "total_tokens": 14241824} {"current_steps": 7400, "total_steps": 40000, "loss": 0.0991, "lr": 4.589626197354195e-05, "epoch": 1.2071947140876091, "percentage": 18.5, "elapsed_time": "1:40:21", "remaining_time": "7:22:07", "throughput": 2366.91, "total_tokens": 14252288} {"current_steps": 7400, "total_steps": 40000, "eval_loss": 0.13805000483989716, "epoch": 1.2071947140876091, "percentage": 18.5, "elapsed_time": "1:41:42", "remaining_time": "7:28:02", "throughput": 2335.62, "total_tokens": 14252288} {"current_steps": 7405, "total_steps": 40000, "loss": 0.1313, "lr": 4.5890870988012504e-05, "epoch": 1.2080104413084265, "percentage": 18.51, "elapsed_time": "1:41:45", "remaining_time": "7:27:56", "throughput": 2335.64, "total_tokens": 14261088} {"current_steps": 7410, "total_steps": 40000, "loss": 0.1136, "lr": 4.5885476780848226e-05, "epoch": 1.2088261685292438, "percentage": 18.52, "elapsed_time": "1:41:47", "remaining_time": "7:27:43", "throughput": 2336.5, "total_tokens": 14271248} {"current_steps": 7415, "total_steps": 40000, "loss": 0.1231, "lr": 4.5880079352880964e-05, "epoch": 1.2096418957500612, "percentage": 18.54, "elapsed_time": "1:41:50", "remaining_time": "7:27:30", "throughput": 2337.35, "total_tokens": 14281216} {"current_steps": 7420, "total_steps": 40000, "loss": 0.0818, "lr": 4.5874678704943065e-05, "epoch": 1.2104576229708786, "percentage": 18.55, "elapsed_time": "1:41:52", "remaining_time": "7:27:17", "throughput": 2338.18, "total_tokens": 14291184} {"current_steps": 7425, "total_steps": 40000, "loss": 0.0609, "lr": 4.5869274837867394e-05, "epoch": 1.211273350191696, "percentage": 18.56, "elapsed_time": "1:41:54", "remaining_time": "7:27:04", "throughput": 2339.08, "total_tokens": 14301472} {"current_steps": 7430, "total_steps": 40000, "loss": 0.1907, "lr": 4.5863867752487275e-05, "epoch": 1.2120890774125133, "percentage": 18.57, "elapsed_time": "1:41:56", "remaining_time": "7:26:50", "throughput": 2339.83, "total_tokens": 14310880} {"current_steps": 7435, "total_steps": 40000, "loss": 0.0369, "lr": 4.5858457449636554e-05, "epoch": 1.2129048046333306, "percentage": 18.59, "elapsed_time": "1:41:58", "remaining_time": "7:26:37", "throughput": 2340.75, "total_tokens": 14321408} {"current_steps": 7440, "total_steps": 40000, "loss": 0.0315, "lr": 4.5853043930149574e-05, "epoch": 1.213720531854148, "percentage": 18.6, "elapsed_time": "1:42:00", "remaining_time": "7:26:24", "throughput": 2341.46, "total_tokens": 14330608} {"current_steps": 7445, "total_steps": 40000, "loss": 0.1587, "lr": 4.584762719486117e-05, "epoch": 1.2145362590749653, "percentage": 18.61, "elapsed_time": "1:42:02", "remaining_time": "7:26:11", "throughput": 2342.38, "total_tokens": 14341072} {"current_steps": 7450, "total_steps": 40000, "loss": 0.1933, "lr": 4.584220724460665e-05, "epoch": 1.2153519862957827, "percentage": 18.62, "elapsed_time": "1:42:04", "remaining_time": "7:25:58", "throughput": 2343.27, "total_tokens": 14351360} {"current_steps": 7455, "total_steps": 40000, "loss": 0.0368, "lr": 4.5836784080221865e-05, "epoch": 1.2161677135166, "percentage": 18.64, "elapsed_time": "1:42:06", "remaining_time": "7:25:45", "throughput": 2344.04, "total_tokens": 14360880} {"current_steps": 7460, "total_steps": 40000, "loss": 0.1702, "lr": 4.583135770254312e-05, "epoch": 1.2169834407374174, "percentage": 18.65, "elapsed_time": "1:42:08", "remaining_time": "7:25:32", "throughput": 2344.92, "total_tokens": 14371120} {"current_steps": 7465, "total_steps": 40000, "loss": 0.1485, "lr": 4.5825928112407236e-05, "epoch": 1.2177991679582347, "percentage": 18.66, "elapsed_time": "1:42:10", "remaining_time": "7:25:19", "throughput": 2345.66, "total_tokens": 14380544} {"current_steps": 7470, "total_steps": 40000, "loss": 0.1973, "lr": 4.582049531065152e-05, "epoch": 1.218614895179052, "percentage": 18.68, "elapsed_time": "1:42:12", "remaining_time": "7:25:06", "throughput": 2346.39, "total_tokens": 14389824} {"current_steps": 7475, "total_steps": 40000, "loss": 0.2094, "lr": 4.5815059298113783e-05, "epoch": 1.2194306223998694, "percentage": 18.69, "elapsed_time": "1:42:14", "remaining_time": "7:24:53", "throughput": 2347.06, "total_tokens": 14398816} {"current_steps": 7480, "total_steps": 40000, "loss": 0.0879, "lr": 4.580962007563232e-05, "epoch": 1.2202463496206868, "percentage": 18.7, "elapsed_time": "1:42:16", "remaining_time": "7:24:40", "throughput": 2347.78, "total_tokens": 14408096} {"current_steps": 7485, "total_steps": 40000, "loss": 0.1202, "lr": 4.5804177644045935e-05, "epoch": 1.2210620768415041, "percentage": 18.71, "elapsed_time": "1:42:18", "remaining_time": "7:24:27", "throughput": 2348.53, "total_tokens": 14417552} {"current_steps": 7490, "total_steps": 40000, "loss": 0.1542, "lr": 4.579873200419391e-05, "epoch": 1.2218778040623215, "percentage": 18.73, "elapsed_time": "1:42:21", "remaining_time": "7:24:14", "throughput": 2349.24, "total_tokens": 14426816} {"current_steps": 7495, "total_steps": 40000, "loss": 0.2297, "lr": 4.5793283156916046e-05, "epoch": 1.2226935312831388, "percentage": 18.74, "elapsed_time": "1:42:23", "remaining_time": "7:24:02", "throughput": 2349.86, "total_tokens": 14435472} {"current_steps": 7500, "total_steps": 40000, "loss": 0.1396, "lr": 4.578783110305261e-05, "epoch": 1.2235092585039562, "percentage": 18.75, "elapsed_time": "1:42:25", "remaining_time": "7:23:49", "throughput": 2350.68, "total_tokens": 14445344} {"current_steps": 7505, "total_steps": 40000, "loss": 0.088, "lr": 4.578237584344438e-05, "epoch": 1.2243249857247736, "percentage": 18.76, "elapsed_time": "1:42:27", "remaining_time": "7:23:36", "throughput": 2351.63, "total_tokens": 14456080} {"current_steps": 7510, "total_steps": 40000, "loss": 0.1176, "lr": 4.577691737893263e-05, "epoch": 1.225140712945591, "percentage": 18.77, "elapsed_time": "1:42:29", "remaining_time": "7:23:23", "throughput": 2352.38, "total_tokens": 14465536} {"current_steps": 7515, "total_steps": 40000, "loss": 0.1099, "lr": 4.577145571035912e-05, "epoch": 1.2259564401664083, "percentage": 18.79, "elapsed_time": "1:42:31", "remaining_time": "7:23:10", "throughput": 2353.15, "total_tokens": 14475152} {"current_steps": 7520, "total_steps": 40000, "loss": 0.086, "lr": 4.576599083856611e-05, "epoch": 1.2267721673872258, "percentage": 18.8, "elapsed_time": "1:42:33", "remaining_time": "7:22:57", "throughput": 2353.91, "total_tokens": 14484720} {"current_steps": 7525, "total_steps": 40000, "loss": 0.1184, "lr": 4.576052276439635e-05, "epoch": 1.227587894608043, "percentage": 18.81, "elapsed_time": "1:42:35", "remaining_time": "7:22:44", "throughput": 2354.59, "total_tokens": 14493744} {"current_steps": 7530, "total_steps": 40000, "loss": 0.2109, "lr": 4.575505148869308e-05, "epoch": 1.2284036218288605, "percentage": 18.82, "elapsed_time": "1:42:37", "remaining_time": "7:22:32", "throughput": 2355.38, "total_tokens": 14503440} {"current_steps": 7535, "total_steps": 40000, "loss": 0.1181, "lr": 4.574957701230006e-05, "epoch": 1.2292193490496777, "percentage": 18.84, "elapsed_time": "1:42:39", "remaining_time": "7:22:19", "throughput": 2356.05, "total_tokens": 14512432} {"current_steps": 7540, "total_steps": 40000, "loss": 0.1243, "lr": 4.57440993360615e-05, "epoch": 1.2300350762704952, "percentage": 18.85, "elapsed_time": "1:42:41", "remaining_time": "7:22:06", "throughput": 2356.84, "total_tokens": 14522176} {"current_steps": 7545, "total_steps": 40000, "loss": 0.1319, "lr": 4.5738618460822134e-05, "epoch": 1.2308508034913126, "percentage": 18.86, "elapsed_time": "1:42:43", "remaining_time": "7:21:53", "throughput": 2357.6, "total_tokens": 14531728} {"current_steps": 7550, "total_steps": 40000, "loss": 0.0991, "lr": 4.573313438742719e-05, "epoch": 1.23166653071213, "percentage": 18.88, "elapsed_time": "1:42:45", "remaining_time": "7:21:41", "throughput": 2358.33, "total_tokens": 14541184} {"current_steps": 7555, "total_steps": 40000, "loss": 0.0838, "lr": 4.5727647116722374e-05, "epoch": 1.2324822579329473, "percentage": 18.89, "elapsed_time": "1:42:47", "remaining_time": "7:21:28", "throughput": 2359.17, "total_tokens": 14551232} {"current_steps": 7560, "total_steps": 40000, "loss": 0.2042, "lr": 4.5722156649553884e-05, "epoch": 1.2332979851537647, "percentage": 18.9, "elapsed_time": "1:42:50", "remaining_time": "7:21:15", "throughput": 2359.96, "total_tokens": 14561008} {"current_steps": 7565, "total_steps": 40000, "loss": 0.173, "lr": 4.571666298676843e-05, "epoch": 1.234113712374582, "percentage": 18.91, "elapsed_time": "1:42:52", "remaining_time": "7:21:02", "throughput": 2360.84, "total_tokens": 14571264} {"current_steps": 7570, "total_steps": 40000, "loss": 0.0677, "lr": 4.571116612921321e-05, "epoch": 1.2349294395953994, "percentage": 18.93, "elapsed_time": "1:42:54", "remaining_time": "7:20:50", "throughput": 2361.5, "total_tokens": 14580240} {"current_steps": 7575, "total_steps": 40000, "loss": 0.0167, "lr": 4.57056660777359e-05, "epoch": 1.2357451668162167, "percentage": 18.94, "elapsed_time": "1:42:56", "remaining_time": "7:20:37", "throughput": 2362.46, "total_tokens": 14591040} {"current_steps": 7580, "total_steps": 40000, "loss": 0.1035, "lr": 4.5700162833184666e-05, "epoch": 1.236560894037034, "percentage": 18.95, "elapsed_time": "1:42:58", "remaining_time": "7:20:24", "throughput": 2363.18, "total_tokens": 14600368} {"current_steps": 7585, "total_steps": 40000, "loss": 0.0428, "lr": 4.5694656396408195e-05, "epoch": 1.2373766212578514, "percentage": 18.96, "elapsed_time": "1:43:00", "remaining_time": "7:20:12", "throughput": 2363.9, "total_tokens": 14609712} {"current_steps": 7590, "total_steps": 40000, "loss": 0.1936, "lr": 4.5689146768255646e-05, "epoch": 1.2381923484786688, "percentage": 18.98, "elapsed_time": "1:43:02", "remaining_time": "7:19:59", "throughput": 2364.79, "total_tokens": 14620128} {"current_steps": 7595, "total_steps": 40000, "loss": 0.1791, "lr": 4.568363394957667e-05, "epoch": 1.2390080756994861, "percentage": 18.99, "elapsed_time": "1:43:04", "remaining_time": "7:19:46", "throughput": 2365.52, "total_tokens": 14629488} {"current_steps": 7600, "total_steps": 40000, "loss": 0.0932, "lr": 4.567811794122141e-05, "epoch": 1.2398238029203035, "percentage": 19.0, "elapsed_time": "1:43:06", "remaining_time": "7:19:34", "throughput": 2366.24, "total_tokens": 14638816} {"current_steps": 7600, "total_steps": 40000, "eval_loss": 0.14474880695343018, "epoch": 1.2398238029203035, "percentage": 19.0, "elapsed_time": "1:44:27", "remaining_time": "7:25:17", "throughput": 2335.79, "total_tokens": 14638816} {"current_steps": 7605, "total_steps": 40000, "loss": 0.1821, "lr": 4.56725987440405e-05, "epoch": 1.2406395301411208, "percentage": 19.01, "elapsed_time": "1:44:30", "remaining_time": "7:25:11", "throughput": 2335.94, "total_tokens": 14648240} {"current_steps": 7610, "total_steps": 40000, "loss": 0.2218, "lr": 4.566707635888508e-05, "epoch": 1.2414552573619382, "percentage": 19.02, "elapsed_time": "1:44:32", "remaining_time": "7:24:58", "throughput": 2336.65, "total_tokens": 14657520} {"current_steps": 7615, "total_steps": 40000, "loss": 0.1998, "lr": 4.566155078660677e-05, "epoch": 1.2422709845827555, "percentage": 19.04, "elapsed_time": "1:44:34", "remaining_time": "7:24:45", "throughput": 2337.19, "total_tokens": 14665696} {"current_steps": 7620, "total_steps": 40000, "loss": 0.1043, "lr": 4.565602202805768e-05, "epoch": 1.2430867118035729, "percentage": 19.05, "elapsed_time": "1:44:37", "remaining_time": "7:24:33", "throughput": 2337.96, "total_tokens": 14675392} {"current_steps": 7625, "total_steps": 40000, "loss": 0.1047, "lr": 4.56504900840904e-05, "epoch": 1.2439024390243902, "percentage": 19.06, "elapsed_time": "1:44:39", "remaining_time": "7:24:20", "throughput": 2338.64, "total_tokens": 14684496} {"current_steps": 7630, "total_steps": 40000, "loss": 0.0512, "lr": 4.564495495555805e-05, "epoch": 1.2447181662452076, "percentage": 19.07, "elapsed_time": "1:44:41", "remaining_time": "7:24:07", "throughput": 2339.29, "total_tokens": 14693408} {"current_steps": 7635, "total_steps": 40000, "loss": 0.0422, "lr": 4.5639416643314204e-05, "epoch": 1.245533893466025, "percentage": 19.09, "elapsed_time": "1:44:43", "remaining_time": "7:23:54", "throughput": 2340.1, "total_tokens": 14703328} {"current_steps": 7640, "total_steps": 40000, "loss": 0.0699, "lr": 4.5633875148212946e-05, "epoch": 1.2463496206868423, "percentage": 19.1, "elapsed_time": "1:44:45", "remaining_time": "7:23:41", "throughput": 2340.96, "total_tokens": 14713536} {"current_steps": 7645, "total_steps": 40000, "loss": 0.1024, "lr": 4.562833047110883e-05, "epoch": 1.2471653479076596, "percentage": 19.11, "elapsed_time": "1:44:47", "remaining_time": "7:23:29", "throughput": 2341.83, "total_tokens": 14723856} {"current_steps": 7650, "total_steps": 40000, "loss": 0.1666, "lr": 4.5622782612856923e-05, "epoch": 1.247981075128477, "percentage": 19.12, "elapsed_time": "1:44:49", "remaining_time": "7:23:16", "throughput": 2342.73, "total_tokens": 14734352} {"current_steps": 7655, "total_steps": 40000, "loss": 0.119, "lr": 4.561723157431278e-05, "epoch": 1.2487968023492944, "percentage": 19.14, "elapsed_time": "1:44:51", "remaining_time": "7:23:03", "throughput": 2343.36, "total_tokens": 14743168} {"current_steps": 7660, "total_steps": 40000, "loss": 0.203, "lr": 4.5611677356332435e-05, "epoch": 1.2496125295701117, "percentage": 19.15, "elapsed_time": "1:44:53", "remaining_time": "7:22:50", "throughput": 2344.07, "total_tokens": 14752480} {"current_steps": 7665, "total_steps": 40000, "loss": 0.206, "lr": 4.560611995977242e-05, "epoch": 1.250428256790929, "percentage": 19.16, "elapsed_time": "1:44:55", "remaining_time": "7:22:38", "throughput": 2344.83, "total_tokens": 14762096} {"current_steps": 7670, "total_steps": 40000, "loss": 0.3419, "lr": 4.560055938548975e-05, "epoch": 1.2512439840117464, "percentage": 19.18, "elapsed_time": "1:44:57", "remaining_time": "7:22:25", "throughput": 2345.81, "total_tokens": 14773104} {"current_steps": 7675, "total_steps": 40000, "loss": 0.1547, "lr": 4.5594995634341944e-05, "epoch": 1.2520597112325638, "percentage": 19.19, "elapsed_time": "1:44:59", "remaining_time": "7:22:12", "throughput": 2346.44, "total_tokens": 14781968} {"current_steps": 7680, "total_steps": 40000, "loss": 0.0295, "lr": 4.5589428707187e-05, "epoch": 1.2528754384533811, "percentage": 19.2, "elapsed_time": "1:45:01", "remaining_time": "7:22:00", "throughput": 2347.27, "total_tokens": 14792016} {"current_steps": 7685, "total_steps": 40000, "loss": 0.0661, "lr": 4.55838586048834e-05, "epoch": 1.2536911656741985, "percentage": 19.21, "elapsed_time": "1:45:03", "remaining_time": "7:21:47", "throughput": 2347.99, "total_tokens": 14801440} {"current_steps": 7690, "total_steps": 40000, "loss": 0.1324, "lr": 4.557828532829013e-05, "epoch": 1.2545068928950158, "percentage": 19.23, "elapsed_time": "1:45:05", "remaining_time": "7:21:34", "throughput": 2348.98, "total_tokens": 14812496} {"current_steps": 7695, "total_steps": 40000, "loss": 0.1086, "lr": 4.557270887826667e-05, "epoch": 1.2553226201158334, "percentage": 19.24, "elapsed_time": "1:45:07", "remaining_time": "7:21:22", "throughput": 2349.74, "total_tokens": 14822144} {"current_steps": 7700, "total_steps": 40000, "loss": 0.2371, "lr": 4.556712925567296e-05, "epoch": 1.2561383473366505, "percentage": 19.25, "elapsed_time": "1:45:10", "remaining_time": "7:21:09", "throughput": 2350.53, "total_tokens": 14832000} {"current_steps": 7705, "total_steps": 40000, "loss": 0.0804, "lr": 4.5561546461369454e-05, "epoch": 1.256954074557468, "percentage": 19.26, "elapsed_time": "1:45:12", "remaining_time": "7:20:56", "throughput": 2351.36, "total_tokens": 14842064} {"current_steps": 7710, "total_steps": 40000, "loss": 0.0882, "lr": 4.55559604962171e-05, "epoch": 1.2577698017782852, "percentage": 19.28, "elapsed_time": "1:45:14", "remaining_time": "7:20:44", "throughput": 2351.99, "total_tokens": 14850880} {"current_steps": 7715, "total_steps": 40000, "loss": 0.0503, "lr": 4.55503713610773e-05, "epoch": 1.2585855289991028, "percentage": 19.29, "elapsed_time": "1:45:16", "remaining_time": "7:20:31", "throughput": 2352.57, "total_tokens": 14859456} {"current_steps": 7720, "total_steps": 40000, "loss": 0.1248, "lr": 4.5544779056812e-05, "epoch": 1.25940125621992, "percentage": 19.3, "elapsed_time": "1:45:18", "remaining_time": "7:20:19", "throughput": 2353.49, "total_tokens": 14870112} {"current_steps": 7725, "total_steps": 40000, "loss": 0.1246, "lr": 4.553918358428358e-05, "epoch": 1.2602169834407375, "percentage": 19.31, "elapsed_time": "1:45:20", "remaining_time": "7:20:06", "throughput": 2354.14, "total_tokens": 14879088} {"current_steps": 7730, "total_steps": 40000, "loss": 0.1855, "lr": 4.553358494435494e-05, "epoch": 1.2610327106615546, "percentage": 19.32, "elapsed_time": "1:45:22", "remaining_time": "7:19:53", "throughput": 2354.87, "total_tokens": 14888560} {"current_steps": 7735, "total_steps": 40000, "loss": 0.0434, "lr": 4.5527983137889464e-05, "epoch": 1.2618484378823722, "percentage": 19.34, "elapsed_time": "1:45:24", "remaining_time": "7:19:41", "throughput": 2355.71, "total_tokens": 14898720} {"current_steps": 7740, "total_steps": 40000, "loss": 0.015, "lr": 4.5522378165751015e-05, "epoch": 1.2626641651031896, "percentage": 19.35, "elapsed_time": "1:45:26", "remaining_time": "7:19:28", "throughput": 2356.44, "total_tokens": 14908176} {"current_steps": 7745, "total_steps": 40000, "loss": 0.0923, "lr": 4.5516770028803954e-05, "epoch": 1.263479892324007, "percentage": 19.36, "elapsed_time": "1:45:28", "remaining_time": "7:19:16", "throughput": 2357.34, "total_tokens": 14918736} {"current_steps": 7750, "total_steps": 40000, "loss": 0.1621, "lr": 4.5511158727913116e-05, "epoch": 1.2642956195448243, "percentage": 19.38, "elapsed_time": "1:45:30", "remaining_time": "7:19:03", "throughput": 2357.83, "total_tokens": 14926784} {"current_steps": 7755, "total_steps": 40000, "loss": 0.2148, "lr": 4.5505544263943856e-05, "epoch": 1.2651113467656416, "percentage": 19.39, "elapsed_time": "1:45:32", "remaining_time": "7:18:51", "throughput": 2358.63, "total_tokens": 14936688} {"current_steps": 7760, "total_steps": 40000, "loss": 0.0116, "lr": 4.549992663776197e-05, "epoch": 1.265927073986459, "percentage": 19.4, "elapsed_time": "1:45:34", "remaining_time": "7:18:39", "throughput": 2359.35, "total_tokens": 14946112} {"current_steps": 7765, "total_steps": 40000, "loss": 0.1449, "lr": 4.5494305850233786e-05, "epoch": 1.2667428012072763, "percentage": 19.41, "elapsed_time": "1:45:36", "remaining_time": "7:18:26", "throughput": 2360.12, "total_tokens": 14955904} {"current_steps": 7770, "total_steps": 40000, "loss": 0.0676, "lr": 4.5488681902226094e-05, "epoch": 1.2675585284280937, "percentage": 19.43, "elapsed_time": "1:45:38", "remaining_time": "7:18:14", "throughput": 2360.88, "total_tokens": 14965600} {"current_steps": 7775, "total_steps": 40000, "loss": 0.1418, "lr": 4.5483054794606174e-05, "epoch": 1.268374255648911, "percentage": 19.44, "elapsed_time": "1:45:41", "remaining_time": "7:18:01", "throughput": 2361.96, "total_tokens": 14977296} {"current_steps": 7780, "total_steps": 40000, "loss": 0.095, "lr": 4.547742452824179e-05, "epoch": 1.2691899828697284, "percentage": 19.45, "elapsed_time": "1:45:43", "remaining_time": "7:17:49", "throughput": 2362.64, "total_tokens": 14986496} {"current_steps": 7785, "total_steps": 40000, "loss": 0.1466, "lr": 4.5471791104001215e-05, "epoch": 1.2700057100905457, "percentage": 19.46, "elapsed_time": "1:45:45", "remaining_time": "7:17:36", "throughput": 2363.41, "total_tokens": 14996256} {"current_steps": 7790, "total_steps": 40000, "loss": 0.0201, "lr": 4.546615452275319e-05, "epoch": 1.270821437311363, "percentage": 19.48, "elapsed_time": "1:45:47", "remaining_time": "7:17:24", "throughput": 2364.18, "total_tokens": 15006032} {"current_steps": 7795, "total_steps": 40000, "loss": 0.126, "lr": 4.5460514785366944e-05, "epoch": 1.2716371645321805, "percentage": 19.49, "elapsed_time": "1:45:49", "remaining_time": "7:17:12", "throughput": 2364.86, "total_tokens": 15015216} {"current_steps": 7800, "total_steps": 40000, "loss": 0.1439, "lr": 4.545487189271219e-05, "epoch": 1.2724528917529978, "percentage": 19.5, "elapsed_time": "1:45:51", "remaining_time": "7:16:59", "throughput": 2365.56, "total_tokens": 15024560} {"current_steps": 7800, "total_steps": 40000, "eval_loss": 0.14069685339927673, "epoch": 1.2724528917529978, "percentage": 19.5, "elapsed_time": "1:47:11", "remaining_time": "7:22:31", "throughput": 2335.98, "total_tokens": 15024560} {"current_steps": 7805, "total_steps": 40000, "loss": 0.2049, "lr": 4.544922584565914e-05, "epoch": 1.2732686189738152, "percentage": 19.51, "elapsed_time": "1:47:15", "remaining_time": "7:22:25", "throughput": 2336.15, "total_tokens": 15034032} {"current_steps": 7810, "total_steps": 40000, "loss": 0.0728, "lr": 4.544357664507848e-05, "epoch": 1.2740843461946325, "percentage": 19.53, "elapsed_time": "1:47:17", "remaining_time": "7:22:13", "throughput": 2336.85, "total_tokens": 15043456} {"current_steps": 7815, "total_steps": 40000, "loss": 0.0776, "lr": 4.54379242918414e-05, "epoch": 1.2749000734154499, "percentage": 19.54, "elapsed_time": "1:47:19", "remaining_time": "7:22:00", "throughput": 2337.51, "total_tokens": 15052496} {"current_steps": 7820, "total_steps": 40000, "loss": 0.0838, "lr": 4.543226878681955e-05, "epoch": 1.2757158006362672, "percentage": 19.55, "elapsed_time": "1:47:21", "remaining_time": "7:21:47", "throughput": 2338.41, "total_tokens": 15063120} {"current_steps": 7825, "total_steps": 40000, "loss": 0.2926, "lr": 4.5426610130885087e-05, "epoch": 1.2765315278570846, "percentage": 19.56, "elapsed_time": "1:47:23", "remaining_time": "7:21:35", "throughput": 2339.07, "total_tokens": 15072192} {"current_steps": 7830, "total_steps": 40000, "loss": 0.1326, "lr": 4.542094832491064e-05, "epoch": 1.277347255077902, "percentage": 19.57, "elapsed_time": "1:47:25", "remaining_time": "7:21:22", "throughput": 2339.96, "total_tokens": 15082784} {"current_steps": 7835, "total_steps": 40000, "loss": 0.1335, "lr": 4.541528336976934e-05, "epoch": 1.2781629822987193, "percentage": 19.59, "elapsed_time": "1:47:27", "remaining_time": "7:21:10", "throughput": 2340.88, "total_tokens": 15093520} {"current_steps": 7840, "total_steps": 40000, "loss": 0.1346, "lr": 4.540961526633479e-05, "epoch": 1.2789787095195366, "percentage": 19.6, "elapsed_time": "1:47:29", "remaining_time": "7:20:57", "throughput": 2341.7, "total_tokens": 15103696} {"current_steps": 7845, "total_steps": 40000, "loss": 0.285, "lr": 4.540394401548108e-05, "epoch": 1.279794436740354, "percentage": 19.61, "elapsed_time": "1:47:31", "remaining_time": "7:20:45", "throughput": 2342.4, "total_tokens": 15113072} {"current_steps": 7850, "total_steps": 40000, "loss": 0.1843, "lr": 4.539826961808279e-05, "epoch": 1.2806101639611713, "percentage": 19.62, "elapsed_time": "1:47:34", "remaining_time": "7:20:32", "throughput": 2343.08, "total_tokens": 15122272} {"current_steps": 7855, "total_steps": 40000, "loss": 0.0874, "lr": 4.5392592075014994e-05, "epoch": 1.2814258911819887, "percentage": 19.64, "elapsed_time": "1:47:36", "remaining_time": "7:20:20", "throughput": 2343.72, "total_tokens": 15131232} {"current_steps": 7860, "total_steps": 40000, "loss": 0.137, "lr": 4.538691138715322e-05, "epoch": 1.282241618402806, "percentage": 19.65, "elapsed_time": "1:47:38", "remaining_time": "7:20:07", "throughput": 2344.39, "total_tokens": 15140384} {"current_steps": 7865, "total_steps": 40000, "loss": 0.0987, "lr": 4.5381227555373516e-05, "epoch": 1.2830573456236234, "percentage": 19.66, "elapsed_time": "1:47:40", "remaining_time": "7:19:55", "throughput": 2345.18, "total_tokens": 15150336} {"current_steps": 7870, "total_steps": 40000, "loss": 0.0984, "lr": 4.537554058055239e-05, "epoch": 1.2838730728444407, "percentage": 19.68, "elapsed_time": "1:47:42", "remaining_time": "7:19:42", "throughput": 2345.83, "total_tokens": 15159408} {"current_steps": 7875, "total_steps": 40000, "loss": 0.1531, "lr": 4.5369850463566865e-05, "epoch": 1.284688800065258, "percentage": 19.69, "elapsed_time": "1:47:44", "remaining_time": "7:19:30", "throughput": 2346.46, "total_tokens": 15168288} {"current_steps": 7880, "total_steps": 40000, "loss": 0.151, "lr": 4.5364157205294404e-05, "epoch": 1.2855045272860757, "percentage": 19.7, "elapsed_time": "1:47:46", "remaining_time": "7:19:17", "throughput": 2347.3, "total_tokens": 15178576} {"current_steps": 7885, "total_steps": 40000, "loss": 0.1559, "lr": 4.5358460806612996e-05, "epoch": 1.2863202545068928, "percentage": 19.71, "elapsed_time": "1:47:48", "remaining_time": "7:19:05", "throughput": 2348.04, "total_tokens": 15188240} {"current_steps": 7890, "total_steps": 40000, "loss": 0.0568, "lr": 4.535276126840109e-05, "epoch": 1.2871359817277104, "percentage": 19.73, "elapsed_time": "1:47:50", "remaining_time": "7:18:53", "throughput": 2348.87, "total_tokens": 15198448} {"current_steps": 7895, "total_steps": 40000, "loss": 0.1636, "lr": 4.5347058591537626e-05, "epoch": 1.2879517089485275, "percentage": 19.74, "elapsed_time": "1:47:52", "remaining_time": "7:18:40", "throughput": 2349.65, "total_tokens": 15208368} {"current_steps": 7900, "total_steps": 40000, "loss": 0.1178, "lr": 4.534135277690203e-05, "epoch": 1.288767436169345, "percentage": 19.75, "elapsed_time": "1:47:54", "remaining_time": "7:18:28", "throughput": 2350.37, "total_tokens": 15217840} {"current_steps": 7905, "total_steps": 40000, "loss": 0.1225, "lr": 4.533564382537421e-05, "epoch": 1.2895831633901622, "percentage": 19.76, "elapsed_time": "1:47:56", "remaining_time": "7:18:16", "throughput": 2351.06, "total_tokens": 15227184} {"current_steps": 7910, "total_steps": 40000, "loss": 0.2969, "lr": 4.532993173783456e-05, "epoch": 1.2903988906109798, "percentage": 19.78, "elapsed_time": "1:47:58", "remaining_time": "7:18:03", "throughput": 2351.85, "total_tokens": 15237136} {"current_steps": 7915, "total_steps": 40000, "loss": 0.1411, "lr": 4.5324216515163954e-05, "epoch": 1.291214617831797, "percentage": 19.79, "elapsed_time": "1:48:00", "remaining_time": "7:17:51", "throughput": 2352.71, "total_tokens": 15247600} {"current_steps": 7920, "total_steps": 40000, "loss": 0.1071, "lr": 4.531849815824375e-05, "epoch": 1.2920303450526145, "percentage": 19.8, "elapsed_time": "1:48:02", "remaining_time": "7:17:39", "throughput": 2353.41, "total_tokens": 15256976} {"current_steps": 7925, "total_steps": 40000, "loss": 0.0314, "lr": 4.5312776667955795e-05, "epoch": 1.2928460722734318, "percentage": 19.81, "elapsed_time": "1:48:04", "remaining_time": "7:17:26", "throughput": 2354.2, "total_tokens": 15266992} {"current_steps": 7930, "total_steps": 40000, "loss": 0.0896, "lr": 4.5307052045182405e-05, "epoch": 1.2936617994942492, "percentage": 19.82, "elapsed_time": "1:48:07", "remaining_time": "7:17:14", "throughput": 2354.91, "total_tokens": 15276416} {"current_steps": 7935, "total_steps": 40000, "loss": 0.102, "lr": 4.53013242908064e-05, "epoch": 1.2944775267150666, "percentage": 19.84, "elapsed_time": "1:48:09", "remaining_time": "7:17:02", "throughput": 2355.69, "total_tokens": 15286352} {"current_steps": 7940, "total_steps": 40000, "loss": 0.0741, "lr": 4.529559340571107e-05, "epoch": 1.295293253935884, "percentage": 19.85, "elapsed_time": "1:48:11", "remaining_time": "7:16:49", "throughput": 2356.37, "total_tokens": 15295616} {"current_steps": 7945, "total_steps": 40000, "loss": 0.068, "lr": 4.528985939078018e-05, "epoch": 1.2961089811567013, "percentage": 19.86, "elapsed_time": "1:48:13", "remaining_time": "7:16:37", "throughput": 2357.02, "total_tokens": 15304704} {"current_steps": 7950, "total_steps": 40000, "loss": 0.152, "lr": 4.5284122246898e-05, "epoch": 1.2969247083775186, "percentage": 19.88, "elapsed_time": "1:48:15", "remaining_time": "7:16:25", "throughput": 2357.87, "total_tokens": 15315168} {"current_steps": 7955, "total_steps": 40000, "loss": 0.1617, "lr": 4.527838197494926e-05, "epoch": 1.297740435598336, "percentage": 19.89, "elapsed_time": "1:48:17", "remaining_time": "7:16:13", "throughput": 2358.62, "total_tokens": 15324912} {"current_steps": 7960, "total_steps": 40000, "loss": 0.1508, "lr": 4.527263857581918e-05, "epoch": 1.2985561628191533, "percentage": 19.9, "elapsed_time": "1:48:19", "remaining_time": "7:16:01", "throughput": 2359.45, "total_tokens": 15335152} {"current_steps": 7965, "total_steps": 40000, "loss": 0.1641, "lr": 4.526689205039347e-05, "epoch": 1.2993718900399707, "percentage": 19.91, "elapsed_time": "1:48:21", "remaining_time": "7:15:48", "throughput": 2360.36, "total_tokens": 15345936} {"current_steps": 7970, "total_steps": 40000, "loss": 0.1083, "lr": 4.5261142399558324e-05, "epoch": 1.300187617260788, "percentage": 19.93, "elapsed_time": "1:48:23", "remaining_time": "7:15:36", "throughput": 2361.07, "total_tokens": 15355376} {"current_steps": 7975, "total_steps": 40000, "loss": 0.0244, "lr": 4.525538962420041e-05, "epoch": 1.3010033444816054, "percentage": 19.94, "elapsed_time": "1:48:25", "remaining_time": "7:15:24", "throughput": 2361.69, "total_tokens": 15364304} {"current_steps": 7980, "total_steps": 40000, "loss": 0.1249, "lr": 4.524963372520685e-05, "epoch": 1.3018190717024227, "percentage": 19.95, "elapsed_time": "1:48:27", "remaining_time": "7:15:12", "throughput": 2362.49, "total_tokens": 15374336} {"current_steps": 7985, "total_steps": 40000, "loss": 0.0695, "lr": 4.524387470346531e-05, "epoch": 1.30263479892324, "percentage": 19.96, "elapsed_time": "1:48:29", "remaining_time": "7:15:00", "throughput": 2363.24, "total_tokens": 15384128} {"current_steps": 7990, "total_steps": 40000, "loss": 0.0213, "lr": 4.5238112559863885e-05, "epoch": 1.3034505261440574, "percentage": 19.98, "elapsed_time": "1:48:31", "remaining_time": "7:14:48", "throughput": 2364.08, "total_tokens": 15394496} {"current_steps": 7995, "total_steps": 40000, "loss": 0.0942, "lr": 4.5232347295291175e-05, "epoch": 1.3042662533648748, "percentage": 19.99, "elapsed_time": "1:48:33", "remaining_time": "7:14:35", "throughput": 2364.64, "total_tokens": 15402992} {"current_steps": 8000, "total_steps": 40000, "loss": 0.0544, "lr": 4.522657891063626e-05, "epoch": 1.3050819805856921, "percentage": 20.0, "elapsed_time": "1:48:35", "remaining_time": "7:14:23", "throughput": 2365.28, "total_tokens": 15412000} {"current_steps": 8000, "total_steps": 40000, "eval_loss": 0.1473751664161682, "epoch": 1.3050819805856921, "percentage": 20.0, "elapsed_time": "1:49:56", "remaining_time": "7:19:46", "throughput": 2336.39, "total_tokens": 15412000} {"current_steps": 8005, "total_steps": 40000, "loss": 0.0832, "lr": 4.52208074067887e-05, "epoch": 1.3058977078065095, "percentage": 20.01, "elapsed_time": "1:50:00", "remaining_time": "7:19:39", "throughput": 2336.55, "total_tokens": 15421504} {"current_steps": 8010, "total_steps": 40000, "loss": 0.217, "lr": 4.5215032784638516e-05, "epoch": 1.3067134350273268, "percentage": 20.03, "elapsed_time": "1:50:02", "remaining_time": "7:19:27", "throughput": 2337.08, "total_tokens": 15429856} {"current_steps": 8015, "total_steps": 40000, "loss": 0.2323, "lr": 4.5209255045076245e-05, "epoch": 1.3075291622481442, "percentage": 20.04, "elapsed_time": "1:50:04", "remaining_time": "7:19:15", "throughput": 2337.84, "total_tokens": 15439712} {"current_steps": 8020, "total_steps": 40000, "loss": 0.0835, "lr": 4.5203474188992875e-05, "epoch": 1.3083448894689615, "percentage": 20.05, "elapsed_time": "1:50:06", "remaining_time": "7:19:02", "throughput": 2338.65, "total_tokens": 15449888} {"current_steps": 8025, "total_steps": 40000, "loss": 0.0959, "lr": 4.51976902172799e-05, "epoch": 1.309160616689779, "percentage": 20.06, "elapsed_time": "1:50:08", "remaining_time": "7:18:50", "throughput": 2339.36, "total_tokens": 15459440} {"current_steps": 8030, "total_steps": 40000, "loss": 0.1673, "lr": 4.519190313082927e-05, "epoch": 1.3099763439105963, "percentage": 20.08, "elapsed_time": "1:50:10", "remaining_time": "7:18:38", "throughput": 2340.03, "total_tokens": 15468704} {"current_steps": 8035, "total_steps": 40000, "loss": 0.1359, "lr": 4.518611293053343e-05, "epoch": 1.3107920711314136, "percentage": 20.09, "elapsed_time": "1:50:12", "remaining_time": "7:18:26", "throughput": 2340.71, "total_tokens": 15477984} {"current_steps": 8040, "total_steps": 40000, "loss": 0.0475, "lr": 4.51803196172853e-05, "epoch": 1.311607798352231, "percentage": 20.1, "elapsed_time": "1:50:14", "remaining_time": "7:18:13", "throughput": 2341.12, "total_tokens": 15485520} {"current_steps": 8045, "total_steps": 40000, "loss": 0.2286, "lr": 4.517452319197828e-05, "epoch": 1.3124235255730483, "percentage": 20.11, "elapsed_time": "1:50:16", "remaining_time": "7:18:01", "throughput": 2341.75, "total_tokens": 15494592} {"current_steps": 8050, "total_steps": 40000, "loss": 0.0558, "lr": 4.5168723655506265e-05, "epoch": 1.3132392527938657, "percentage": 20.12, "elapsed_time": "1:50:18", "remaining_time": "7:17:49", "throughput": 2342.31, "total_tokens": 15503136} {"current_steps": 8055, "total_steps": 40000, "loss": 0.0764, "lr": 4.51629210087636e-05, "epoch": 1.314054980014683, "percentage": 20.14, "elapsed_time": "1:50:20", "remaining_time": "7:17:37", "throughput": 2342.9, "total_tokens": 15511856} {"current_steps": 8060, "total_steps": 40000, "loss": 0.1837, "lr": 4.515711525264513e-05, "epoch": 1.3148707072355004, "percentage": 20.15, "elapsed_time": "1:50:22", "remaining_time": "7:17:24", "throughput": 2343.53, "total_tokens": 15520880} {"current_steps": 8065, "total_steps": 40000, "loss": 0.0516, "lr": 4.5151306388046175e-05, "epoch": 1.315686434456318, "percentage": 20.16, "elapsed_time": "1:50:24", "remaining_time": "7:17:12", "throughput": 2344.4, "total_tokens": 15531472} {"current_steps": 8070, "total_steps": 40000, "loss": 0.1462, "lr": 4.514549441586255e-05, "epoch": 1.316502161677135, "percentage": 20.18, "elapsed_time": "1:50:26", "remaining_time": "7:17:00", "throughput": 2345.05, "total_tokens": 15540640} {"current_steps": 8075, "total_steps": 40000, "loss": 0.061, "lr": 4.513967933699051e-05, "epoch": 1.3173178888979526, "percentage": 20.19, "elapsed_time": "1:50:29", "remaining_time": "7:16:48", "throughput": 2345.81, "total_tokens": 15550496} {"current_steps": 8080, "total_steps": 40000, "loss": 0.2481, "lr": 4.513386115232684e-05, "epoch": 1.3181336161187698, "percentage": 20.2, "elapsed_time": "1:50:31", "remaining_time": "7:16:36", "throughput": 2346.49, "total_tokens": 15559856} {"current_steps": 8085, "total_steps": 40000, "loss": 0.0852, "lr": 4.5128039862768745e-05, "epoch": 1.3189493433395874, "percentage": 20.21, "elapsed_time": "1:50:33", "remaining_time": "7:16:24", "throughput": 2347.12, "total_tokens": 15568880} {"current_steps": 8090, "total_steps": 40000, "loss": 0.1412, "lr": 4.512221546921397e-05, "epoch": 1.3197650705604045, "percentage": 20.23, "elapsed_time": "1:50:35", "remaining_time": "7:16:11", "throughput": 2347.9, "total_tokens": 15578944} {"current_steps": 8095, "total_steps": 40000, "loss": 0.0936, "lr": 4.5116387972560694e-05, "epoch": 1.320580797781222, "percentage": 20.24, "elapsed_time": "1:50:37", "remaining_time": "7:15:59", "throughput": 2348.47, "total_tokens": 15587600} {"current_steps": 8100, "total_steps": 40000, "loss": 0.0903, "lr": 4.511055737370759e-05, "epoch": 1.3213965250020392, "percentage": 20.25, "elapsed_time": "1:50:39", "remaining_time": "7:15:47", "throughput": 2349.2, "total_tokens": 15597264} {"current_steps": 8105, "total_steps": 40000, "loss": 0.0874, "lr": 4.510472367355383e-05, "epoch": 1.3222122522228568, "percentage": 20.26, "elapsed_time": "1:50:41", "remaining_time": "7:15:35", "throughput": 2350.0, "total_tokens": 15607392} {"current_steps": 8110, "total_steps": 40000, "loss": 0.0639, "lr": 4.509888687299901e-05, "epoch": 1.3230279794436741, "percentage": 20.28, "elapsed_time": "1:50:43", "remaining_time": "7:15:23", "throughput": 2350.57, "total_tokens": 15616048} {"current_steps": 8115, "total_steps": 40000, "loss": 0.2483, "lr": 4.5093046972943266e-05, "epoch": 1.3238437066644915, "percentage": 20.29, "elapsed_time": "1:50:45", "remaining_time": "7:15:11", "throughput": 2351.25, "total_tokens": 15625488} {"current_steps": 8120, "total_steps": 40000, "loss": 0.2053, "lr": 4.508720397428717e-05, "epoch": 1.3246594338853088, "percentage": 20.3, "elapsed_time": "1:50:47", "remaining_time": "7:14:59", "throughput": 2351.8, "total_tokens": 15634000} {"current_steps": 8125, "total_steps": 40000, "loss": 0.064, "lr": 4.508135787793178e-05, "epoch": 1.3254751611061262, "percentage": 20.31, "elapsed_time": "1:50:49", "remaining_time": "7:14:47", "throughput": 2352.65, "total_tokens": 15644464} {"current_steps": 8130, "total_steps": 40000, "loss": 0.2062, "lr": 4.5075508684778664e-05, "epoch": 1.3262908883269435, "percentage": 20.32, "elapsed_time": "1:50:51", "remaining_time": "7:14:35", "throughput": 2353.27, "total_tokens": 15653488} {"current_steps": 8135, "total_steps": 40000, "loss": 0.0928, "lr": 4.506965639572982e-05, "epoch": 1.3271066155477609, "percentage": 20.34, "elapsed_time": "1:50:53", "remaining_time": "7:14:23", "throughput": 2354.2, "total_tokens": 15664512} {"current_steps": 8140, "total_steps": 40000, "loss": 0.1041, "lr": 4.506380101168774e-05, "epoch": 1.3279223427685782, "percentage": 20.35, "elapsed_time": "1:50:55", "remaining_time": "7:14:11", "throughput": 2354.95, "total_tokens": 15674400} {"current_steps": 8145, "total_steps": 40000, "loss": 0.0861, "lr": 4.505794253355542e-05, "epoch": 1.3287380699893956, "percentage": 20.36, "elapsed_time": "1:50:57", "remaining_time": "7:13:59", "throughput": 2355.57, "total_tokens": 15683392} {"current_steps": 8150, "total_steps": 40000, "loss": 0.2513, "lr": 4.5052080962236286e-05, "epoch": 1.329553797210213, "percentage": 20.38, "elapsed_time": "1:51:00", "remaining_time": "7:13:47", "throughput": 2356.26, "total_tokens": 15692848} {"current_steps": 8155, "total_steps": 40000, "loss": 0.174, "lr": 4.504621629863428e-05, "epoch": 1.3303695244310303, "percentage": 20.39, "elapsed_time": "1:51:02", "remaining_time": "7:13:35", "throughput": 2357.06, "total_tokens": 15703088} {"current_steps": 8160, "total_steps": 40000, "loss": 0.067, "lr": 4.504034854365381e-05, "epoch": 1.3311852516518476, "percentage": 20.4, "elapsed_time": "1:51:04", "remaining_time": "7:13:23", "throughput": 2357.79, "total_tokens": 15712800} {"current_steps": 8165, "total_steps": 40000, "loss": 0.092, "lr": 4.503447769819974e-05, "epoch": 1.332000978872665, "percentage": 20.41, "elapsed_time": "1:51:06", "remaining_time": "7:13:11", "throughput": 2358.39, "total_tokens": 15721664} {"current_steps": 8170, "total_steps": 40000, "loss": 0.1982, "lr": 4.502860376317745e-05, "epoch": 1.3328167060934824, "percentage": 20.42, "elapsed_time": "1:51:08", "remaining_time": "7:12:59", "throughput": 2359.12, "total_tokens": 15731328} {"current_steps": 8175, "total_steps": 40000, "loss": 0.1585, "lr": 4.502272673949276e-05, "epoch": 1.3336324333142997, "percentage": 20.44, "elapsed_time": "1:51:10", "remaining_time": "7:12:47", "throughput": 2359.75, "total_tokens": 15740448} {"current_steps": 8180, "total_steps": 40000, "loss": 0.0523, "lr": 4.501684662805199e-05, "epoch": 1.334448160535117, "percentage": 20.45, "elapsed_time": "1:51:12", "remaining_time": "7:12:35", "throughput": 2360.49, "total_tokens": 15750208} {"current_steps": 8185, "total_steps": 40000, "loss": 0.0889, "lr": 4.5010963429761924e-05, "epoch": 1.3352638877559344, "percentage": 20.46, "elapsed_time": "1:51:14", "remaining_time": "7:12:23", "throughput": 2361.28, "total_tokens": 15760384} {"current_steps": 8190, "total_steps": 40000, "loss": 0.054, "lr": 4.500507714552982e-05, "epoch": 1.3360796149767518, "percentage": 20.47, "elapsed_time": "1:51:16", "remaining_time": "7:12:11", "throughput": 2362.03, "total_tokens": 15770320} {"current_steps": 8195, "total_steps": 40000, "loss": 0.0978, "lr": 4.499918777626342e-05, "epoch": 1.3368953421975691, "percentage": 20.49, "elapsed_time": "1:51:18", "remaining_time": "7:12:00", "throughput": 2362.7, "total_tokens": 15779632} {"current_steps": 8200, "total_steps": 40000, "loss": 0.1348, "lr": 4.499329532287093e-05, "epoch": 1.3377110694183865, "percentage": 20.5, "elapsed_time": "1:51:20", "remaining_time": "7:11:48", "throughput": 2363.44, "total_tokens": 15789456} {"current_steps": 8200, "total_steps": 40000, "eval_loss": 0.13672591745853424, "epoch": 1.3377110694183865, "percentage": 20.5, "elapsed_time": "1:52:41", "remaining_time": "7:17:00", "throughput": 2335.26, "total_tokens": 15789456} {"current_steps": 8205, "total_steps": 40000, "loss": 0.0306, "lr": 4.4987399786261064e-05, "epoch": 1.3385267966392038, "percentage": 20.51, "elapsed_time": "1:52:45", "remaining_time": "7:16:55", "throughput": 2335.24, "total_tokens": 15798384} {"current_steps": 8210, "total_steps": 40000, "loss": 0.1579, "lr": 4.498150116734297e-05, "epoch": 1.3393425238600212, "percentage": 20.52, "elapsed_time": "1:52:47", "remaining_time": "7:16:43", "throughput": 2335.8, "total_tokens": 15807008} {"current_steps": 8215, "total_steps": 40000, "loss": 0.2549, "lr": 4.4975599467026294e-05, "epoch": 1.3401582510808385, "percentage": 20.54, "elapsed_time": "1:52:49", "remaining_time": "7:16:31", "throughput": 2336.51, "total_tokens": 15816640} {"current_steps": 8220, "total_steps": 40000, "loss": 0.0787, "lr": 4.496969468622114e-05, "epoch": 1.3409739783016559, "percentage": 20.55, "elapsed_time": "1:52:51", "remaining_time": "7:16:19", "throughput": 2337.48, "total_tokens": 15828048} {"current_steps": 8225, "total_steps": 40000, "loss": 0.0622, "lr": 4.496378682583813e-05, "epoch": 1.3417897055224732, "percentage": 20.56, "elapsed_time": "1:52:53", "remaining_time": "7:16:07", "throughput": 2338.08, "total_tokens": 15836976} {"current_steps": 8230, "total_steps": 40000, "loss": 0.0986, "lr": 4.495787588678829e-05, "epoch": 1.3426054327432906, "percentage": 20.57, "elapsed_time": "1:52:55", "remaining_time": "7:15:55", "throughput": 2338.76, "total_tokens": 15846368} {"current_steps": 8235, "total_steps": 40000, "loss": 0.0316, "lr": 4.4951961869983196e-05, "epoch": 1.343421159964108, "percentage": 20.59, "elapsed_time": "1:52:57", "remaining_time": "7:15:43", "throughput": 2339.54, "total_tokens": 15856496} {"current_steps": 8240, "total_steps": 40000, "loss": 0.0396, "lr": 4.494604477633485e-05, "epoch": 1.3442368871849253, "percentage": 20.6, "elapsed_time": "1:52:59", "remaining_time": "7:15:31", "throughput": 2340.3, "total_tokens": 15866528} {"current_steps": 8245, "total_steps": 40000, "loss": 0.0405, "lr": 4.4940124606755734e-05, "epoch": 1.3450526144057426, "percentage": 20.61, "elapsed_time": "1:53:01", "remaining_time": "7:15:19", "throughput": 2340.98, "total_tokens": 15875936} {"current_steps": 8250, "total_steps": 40000, "loss": 0.0107, "lr": 4.493420136215882e-05, "epoch": 1.3458683416265602, "percentage": 20.62, "elapsed_time": "1:53:03", "remaining_time": "7:15:07", "throughput": 2341.54, "total_tokens": 15884560} {"current_steps": 8255, "total_steps": 40000, "loss": 0.1721, "lr": 4.492827504345756e-05, "epoch": 1.3466840688473773, "percentage": 20.64, "elapsed_time": "1:53:05", "remaining_time": "7:14:55", "throughput": 2342.21, "total_tokens": 15893968} {"current_steps": 8260, "total_steps": 40000, "loss": 0.1041, "lr": 4.492234565156584e-05, "epoch": 1.347499796068195, "percentage": 20.65, "elapsed_time": "1:53:07", "remaining_time": "7:14:43", "throughput": 2342.94, "total_tokens": 15903776} {"current_steps": 8265, "total_steps": 40000, "loss": 0.1535, "lr": 4.491641318739807e-05, "epoch": 1.348315523289012, "percentage": 20.66, "elapsed_time": "1:53:10", "remaining_time": "7:14:31", "throughput": 2343.47, "total_tokens": 15912208} {"current_steps": 8270, "total_steps": 40000, "loss": 0.0795, "lr": 4.4910477651869096e-05, "epoch": 1.3491312505098296, "percentage": 20.67, "elapsed_time": "1:53:12", "remaining_time": "7:14:19", "throughput": 2344.33, "total_tokens": 15922832} {"current_steps": 8275, "total_steps": 40000, "loss": 0.0811, "lr": 4.4904539045894254e-05, "epoch": 1.3499469777306468, "percentage": 20.69, "elapsed_time": "1:53:14", "remaining_time": "7:14:07", "throughput": 2345.19, "total_tokens": 15933520} {"current_steps": 8280, "total_steps": 40000, "loss": 0.1257, "lr": 4.4898597370389364e-05, "epoch": 1.3507627049514643, "percentage": 20.7, "elapsed_time": "1:53:16", "remaining_time": "7:13:55", "throughput": 2345.91, "total_tokens": 15943296} {"current_steps": 8285, "total_steps": 40000, "loss": 0.1683, "lr": 4.489265262627069e-05, "epoch": 1.3515784321722815, "percentage": 20.71, "elapsed_time": "1:53:18", "remaining_time": "7:13:43", "throughput": 2346.53, "total_tokens": 15952336} {"current_steps": 8290, "total_steps": 40000, "loss": 0.116, "lr": 4.488670481445499e-05, "epoch": 1.352394159393099, "percentage": 20.72, "elapsed_time": "1:53:20", "remaining_time": "7:13:31", "throughput": 2347.18, "total_tokens": 15961648} {"current_steps": 8295, "total_steps": 40000, "loss": 0.0432, "lr": 4.488075393585951e-05, "epoch": 1.3532098866139162, "percentage": 20.74, "elapsed_time": "1:53:22", "remaining_time": "7:13:20", "throughput": 2347.86, "total_tokens": 15971072} {"current_steps": 8300, "total_steps": 40000, "loss": 0.0842, "lr": 4.487479999140193e-05, "epoch": 1.3540256138347337, "percentage": 20.75, "elapsed_time": "1:53:24", "remaining_time": "7:13:08", "throughput": 2348.48, "total_tokens": 15980128} {"current_steps": 8305, "total_steps": 40000, "loss": 0.0934, "lr": 4.4868842982000425e-05, "epoch": 1.354841341055551, "percentage": 20.76, "elapsed_time": "1:53:26", "remaining_time": "7:12:56", "throughput": 2349.15, "total_tokens": 15989584} {"current_steps": 8310, "total_steps": 40000, "loss": 0.1476, "lr": 4.486288290857365e-05, "epoch": 1.3556570682763684, "percentage": 20.77, "elapsed_time": "1:53:28", "remaining_time": "7:12:44", "throughput": 2349.81, "total_tokens": 15998880} {"current_steps": 8315, "total_steps": 40000, "loss": 0.1723, "lr": 4.4856919772040715e-05, "epoch": 1.3564727954971858, "percentage": 20.79, "elapsed_time": "1:53:30", "remaining_time": "7:12:32", "throughput": 2350.59, "total_tokens": 16009104} {"current_steps": 8320, "total_steps": 40000, "loss": 0.0871, "lr": 4.485095357332122e-05, "epoch": 1.3572885227180032, "percentage": 20.8, "elapsed_time": "1:53:32", "remaining_time": "7:12:20", "throughput": 2351.28, "total_tokens": 16018640} {"current_steps": 8325, "total_steps": 40000, "loss": 0.0216, "lr": 4.484498431333521e-05, "epoch": 1.3581042499388205, "percentage": 20.81, "elapsed_time": "1:53:34", "remaining_time": "7:12:08", "throughput": 2352.04, "total_tokens": 16028688} {"current_steps": 8330, "total_steps": 40000, "loss": 0.1484, "lr": 4.4839011993003245e-05, "epoch": 1.3589199771596379, "percentage": 20.82, "elapsed_time": "1:53:36", "remaining_time": "7:11:57", "throughput": 2352.88, "total_tokens": 16039232} {"current_steps": 8335, "total_steps": 40000, "loss": 0.0862, "lr": 4.4833036613246305e-05, "epoch": 1.3597357043804552, "percentage": 20.84, "elapsed_time": "1:53:38", "remaining_time": "7:11:45", "throughput": 2353.67, "total_tokens": 16049536} {"current_steps": 8340, "total_steps": 40000, "loss": 0.1136, "lr": 4.482705817498589e-05, "epoch": 1.3605514316012726, "percentage": 20.85, "elapsed_time": "1:53:40", "remaining_time": "7:11:33", "throughput": 2354.5, "total_tokens": 16060032} {"current_steps": 8345, "total_steps": 40000, "loss": 0.1947, "lr": 4.4821076679143934e-05, "epoch": 1.36136715882209, "percentage": 20.86, "elapsed_time": "1:53:43", "remaining_time": "7:11:21", "throughput": 2355.26, "total_tokens": 16070064} {"current_steps": 8350, "total_steps": 40000, "loss": 0.0126, "lr": 4.481509212664288e-05, "epoch": 1.3621828860429073, "percentage": 20.88, "elapsed_time": "1:53:45", "remaining_time": "7:11:10", "throughput": 2355.91, "total_tokens": 16079408} {"current_steps": 8355, "total_steps": 40000, "loss": 0.1652, "lr": 4.480910451840559e-05, "epoch": 1.3629986132637246, "percentage": 20.89, "elapsed_time": "1:53:47", "remaining_time": "7:10:58", "throughput": 2356.37, "total_tokens": 16087440} {"current_steps": 8360, "total_steps": 40000, "loss": 0.1168, "lr": 4.480311385535546e-05, "epoch": 1.363814340484542, "percentage": 20.9, "elapsed_time": "1:53:49", "remaining_time": "7:10:46", "throughput": 2357.15, "total_tokens": 16097584} {"current_steps": 8365, "total_steps": 40000, "loss": 0.095, "lr": 4.47971201384163e-05, "epoch": 1.3646300677053593, "percentage": 20.91, "elapsed_time": "1:53:51", "remaining_time": "7:10:34", "throughput": 2357.79, "total_tokens": 16106864} {"current_steps": 8370, "total_steps": 40000, "loss": 0.1482, "lr": 4.4791123368512446e-05, "epoch": 1.3654457949261767, "percentage": 20.93, "elapsed_time": "1:53:53", "remaining_time": "7:10:23", "throughput": 2358.52, "total_tokens": 16116656} {"current_steps": 8375, "total_steps": 40000, "loss": 0.1005, "lr": 4.478512354656864e-05, "epoch": 1.366261522146994, "percentage": 20.94, "elapsed_time": "1:53:55", "remaining_time": "7:10:11", "throughput": 2359.13, "total_tokens": 16125776} {"current_steps": 8380, "total_steps": 40000, "loss": 0.1137, "lr": 4.477912067351016e-05, "epoch": 1.3670772493678114, "percentage": 20.95, "elapsed_time": "1:53:57", "remaining_time": "7:09:59", "throughput": 2359.8, "total_tokens": 16135216} {"current_steps": 8385, "total_steps": 40000, "loss": 0.0675, "lr": 4.477311475026271e-05, "epoch": 1.3678929765886287, "percentage": 20.96, "elapsed_time": "1:53:59", "remaining_time": "7:09:48", "throughput": 2360.38, "total_tokens": 16144080} {"current_steps": 8390, "total_steps": 40000, "loss": 0.1608, "lr": 4.476710577775248e-05, "epoch": 1.368708703809446, "percentage": 20.97, "elapsed_time": "1:54:01", "remaining_time": "7:09:36", "throughput": 2361.03, "total_tokens": 16153392} {"current_steps": 8395, "total_steps": 40000, "loss": 0.0606, "lr": 4.476109375690612e-05, "epoch": 1.3695244310302634, "percentage": 20.99, "elapsed_time": "1:54:03", "remaining_time": "7:09:24", "throughput": 2361.69, "total_tokens": 16162784} {"current_steps": 8400, "total_steps": 40000, "loss": 0.0182, "lr": 4.4755078688650784e-05, "epoch": 1.3703401582510808, "percentage": 21.0, "elapsed_time": "1:54:05", "remaining_time": "7:09:13", "throughput": 2362.56, "total_tokens": 16173616} {"current_steps": 8400, "total_steps": 40000, "eval_loss": 0.1445779800415039, "epoch": 1.3703401582510808, "percentage": 21.0, "elapsed_time": "1:55:26", "remaining_time": "7:14:16", "throughput": 2335.1, "total_tokens": 16173616} {"current_steps": 8405, "total_steps": 40000, "loss": 0.0505, "lr": 4.474906057391406e-05, "epoch": 1.3711558854718982, "percentage": 21.01, "elapsed_time": "1:55:30", "remaining_time": "7:14:13", "throughput": 2335.03, "total_tokens": 16183520} {"current_steps": 8410, "total_steps": 40000, "loss": 0.3477, "lr": 4.4743039413624e-05, "epoch": 1.3719716126927155, "percentage": 21.02, "elapsed_time": "1:55:32", "remaining_time": "7:14:01", "throughput": 2335.59, "total_tokens": 16192192} {"current_steps": 8415, "total_steps": 40000, "loss": 0.086, "lr": 4.473701520870916e-05, "epoch": 1.3727873399135329, "percentage": 21.04, "elapsed_time": "1:55:34", "remaining_time": "7:13:49", "throughput": 2336.29, "total_tokens": 16201888} {"current_steps": 8420, "total_steps": 40000, "loss": 0.2345, "lr": 4.4730987960098544e-05, "epoch": 1.3736030671343502, "percentage": 21.05, "elapsed_time": "1:55:36", "remaining_time": "7:13:37", "throughput": 2337.02, "total_tokens": 16211760} {"current_steps": 8425, "total_steps": 40000, "loss": 0.1655, "lr": 4.4724957668721635e-05, "epoch": 1.3744187943551676, "percentage": 21.06, "elapsed_time": "1:55:38", "remaining_time": "7:13:25", "throughput": 2337.84, "total_tokens": 16222256} {"current_steps": 8430, "total_steps": 40000, "loss": 0.0298, "lr": 4.471892433550836e-05, "epoch": 1.375234521575985, "percentage": 21.07, "elapsed_time": "1:55:41", "remaining_time": "7:13:13", "throughput": 2338.53, "total_tokens": 16231856} {"current_steps": 8435, "total_steps": 40000, "loss": 0.1449, "lr": 4.471288796138916e-05, "epoch": 1.3760502487968023, "percentage": 21.09, "elapsed_time": "1:55:43", "remaining_time": "7:13:02", "throughput": 2339.11, "total_tokens": 16240752} {"current_steps": 8440, "total_steps": 40000, "loss": 0.1315, "lr": 4.470684854729491e-05, "epoch": 1.3768659760176196, "percentage": 21.1, "elapsed_time": "1:55:45", "remaining_time": "7:12:50", "throughput": 2339.89, "total_tokens": 16250960} {"current_steps": 8445, "total_steps": 40000, "loss": 0.1344, "lr": 4.4700806094156955e-05, "epoch": 1.3776817032384372, "percentage": 21.11, "elapsed_time": "1:55:47", "remaining_time": "7:12:38", "throughput": 2340.49, "total_tokens": 16260016} {"current_steps": 8450, "total_steps": 40000, "loss": 0.2106, "lr": 4.469476060290713e-05, "epoch": 1.3784974304592543, "percentage": 21.12, "elapsed_time": "1:55:49", "remaining_time": "7:12:26", "throughput": 2341.28, "total_tokens": 16270304} {"current_steps": 8455, "total_steps": 40000, "loss": 0.0486, "lr": 4.468871207447772e-05, "epoch": 1.379313157680072, "percentage": 21.14, "elapsed_time": "1:55:51", "remaining_time": "7:12:15", "throughput": 2341.93, "total_tokens": 16279664} {"current_steps": 8460, "total_steps": 40000, "loss": 0.0752, "lr": 4.4682660509801486e-05, "epoch": 1.380128884900889, "percentage": 21.15, "elapsed_time": "1:55:53", "remaining_time": "7:12:03", "throughput": 2342.6, "total_tokens": 16289184} {"current_steps": 8465, "total_steps": 40000, "loss": 0.2016, "lr": 4.467660590981165e-05, "epoch": 1.3809446121217066, "percentage": 21.16, "elapsed_time": "1:55:55", "remaining_time": "7:11:51", "throughput": 2343.49, "total_tokens": 16300224} {"current_steps": 8470, "total_steps": 40000, "loss": 0.091, "lr": 4.467054827544191e-05, "epoch": 1.3817603393425237, "percentage": 21.18, "elapsed_time": "1:55:57", "remaining_time": "7:11:40", "throughput": 2344.13, "total_tokens": 16309536} {"current_steps": 8475, "total_steps": 40000, "loss": 0.0902, "lr": 4.4664487607626434e-05, "epoch": 1.3825760665633413, "percentage": 21.19, "elapsed_time": "1:55:59", "remaining_time": "7:11:28", "throughput": 2344.59, "total_tokens": 16317600} {"current_steps": 8480, "total_steps": 40000, "loss": 0.246, "lr": 4.4658423907299845e-05, "epoch": 1.3833917937841584, "percentage": 21.2, "elapsed_time": "1:56:01", "remaining_time": "7:11:16", "throughput": 2345.35, "total_tokens": 16327792} {"current_steps": 8485, "total_steps": 40000, "loss": 0.1123, "lr": 4.465235717539725e-05, "epoch": 1.384207521004976, "percentage": 21.21, "elapsed_time": "1:56:03", "remaining_time": "7:11:05", "throughput": 2346.18, "total_tokens": 16338384} {"current_steps": 8490, "total_steps": 40000, "loss": 0.0136, "lr": 4.464628741285421e-05, "epoch": 1.3850232482257934, "percentage": 21.22, "elapsed_time": "1:56:05", "remaining_time": "7:10:53", "throughput": 2346.84, "total_tokens": 16347792} {"current_steps": 8495, "total_steps": 40000, "loss": 0.0689, "lr": 4.4640214620606754e-05, "epoch": 1.3858389754466107, "percentage": 21.24, "elapsed_time": "1:56:07", "remaining_time": "7:10:41", "throughput": 2347.7, "total_tokens": 16358672} {"current_steps": 8500, "total_steps": 40000, "loss": 0.0668, "lr": 4.46341387995914e-05, "epoch": 1.386654702667428, "percentage": 21.25, "elapsed_time": "1:56:10", "remaining_time": "7:10:30", "throughput": 2348.35, "total_tokens": 16368016} {"current_steps": 8505, "total_steps": 40000, "loss": 0.08, "lr": 4.4628059950745106e-05, "epoch": 1.3874704298882454, "percentage": 21.26, "elapsed_time": "1:56:12", "remaining_time": "7:10:18", "throughput": 2349.01, "total_tokens": 16377504} {"current_steps": 8510, "total_steps": 40000, "loss": 0.3058, "lr": 4.4621978075005297e-05, "epoch": 1.3882861571090628, "percentage": 21.27, "elapsed_time": "1:56:14", "remaining_time": "7:10:06", "throughput": 2349.61, "total_tokens": 16386528} {"current_steps": 8515, "total_steps": 40000, "loss": 0.1223, "lr": 4.461589317330989e-05, "epoch": 1.3891018843298801, "percentage": 21.29, "elapsed_time": "1:56:16", "remaining_time": "7:09:55", "throughput": 2350.46, "total_tokens": 16397376} {"current_steps": 8520, "total_steps": 40000, "loss": 0.1195, "lr": 4.460980524659724e-05, "epoch": 1.3899176115506975, "percentage": 21.3, "elapsed_time": "1:56:18", "remaining_time": "7:09:43", "throughput": 2351.05, "total_tokens": 16406304} {"current_steps": 8525, "total_steps": 40000, "loss": 0.1507, "lr": 4.46037142958062e-05, "epoch": 1.3907333387715148, "percentage": 21.31, "elapsed_time": "1:56:20", "remaining_time": "7:09:32", "throughput": 2351.66, "total_tokens": 16415424} {"current_steps": 8530, "total_steps": 40000, "loss": 0.072, "lr": 4.4597620321876046e-05, "epoch": 1.3915490659923322, "percentage": 21.32, "elapsed_time": "1:56:22", "remaining_time": "7:09:20", "throughput": 2352.41, "total_tokens": 16425536} {"current_steps": 8535, "total_steps": 40000, "loss": 0.0631, "lr": 4.459152332574656e-05, "epoch": 1.3923647932131495, "percentage": 21.34, "elapsed_time": "1:56:24", "remaining_time": "7:09:08", "throughput": 2352.82, "total_tokens": 16433248} {"current_steps": 8540, "total_steps": 40000, "loss": 0.1855, "lr": 4.4585423308357985e-05, "epoch": 1.393180520433967, "percentage": 21.35, "elapsed_time": "1:56:26", "remaining_time": "7:08:57", "throughput": 2353.43, "total_tokens": 16442416} {"current_steps": 8545, "total_steps": 40000, "loss": 0.1919, "lr": 4.457932027065102e-05, "epoch": 1.3939962476547842, "percentage": 21.36, "elapsed_time": "1:56:28", "remaining_time": "7:08:45", "throughput": 2354.17, "total_tokens": 16452416} {"current_steps": 8550, "total_steps": 40000, "loss": 0.1467, "lr": 4.45732142135668e-05, "epoch": 1.3948119748756016, "percentage": 21.38, "elapsed_time": "1:56:30", "remaining_time": "7:08:34", "throughput": 2354.83, "total_tokens": 16461952} {"current_steps": 8555, "total_steps": 40000, "loss": 0.1708, "lr": 4.4567105138046986e-05, "epoch": 1.395627702096419, "percentage": 21.39, "elapsed_time": "1:56:32", "remaining_time": "7:08:22", "throughput": 2355.64, "total_tokens": 16472432} {"current_steps": 8560, "total_steps": 40000, "loss": 0.2164, "lr": 4.456099304503365e-05, "epoch": 1.3964434293172363, "percentage": 21.4, "elapsed_time": "1:56:34", "remaining_time": "7:08:11", "throughput": 2356.35, "total_tokens": 16482288} {"current_steps": 8565, "total_steps": 40000, "loss": 0.2086, "lr": 4.455487793546939e-05, "epoch": 1.3972591565380537, "percentage": 21.41, "elapsed_time": "1:56:36", "remaining_time": "7:07:59", "throughput": 2357.12, "total_tokens": 16492544} {"current_steps": 8570, "total_steps": 40000, "loss": 0.139, "lr": 4.454875981029719e-05, "epoch": 1.398074883758871, "percentage": 21.43, "elapsed_time": "1:56:38", "remaining_time": "7:07:48", "throughput": 2357.78, "total_tokens": 16502048} {"current_steps": 8575, "total_steps": 40000, "loss": 0.0278, "lr": 4.454263867046057e-05, "epoch": 1.3988906109796884, "percentage": 21.44, "elapsed_time": "1:56:41", "remaining_time": "7:07:36", "throughput": 2358.37, "total_tokens": 16511008} {"current_steps": 8580, "total_steps": 40000, "loss": 0.1287, "lr": 4.4536514516903484e-05, "epoch": 1.3997063382005057, "percentage": 21.45, "elapsed_time": "1:56:43", "remaining_time": "7:07:25", "throughput": 2359.05, "total_tokens": 16520656} {"current_steps": 8585, "total_steps": 40000, "loss": 0.117, "lr": 4.453038735057034e-05, "epoch": 1.400522065421323, "percentage": 21.46, "elapsed_time": "1:56:45", "remaining_time": "7:07:13", "throughput": 2359.62, "total_tokens": 16529552} {"current_steps": 8590, "total_steps": 40000, "loss": 0.0916, "lr": 4.4524257172406034e-05, "epoch": 1.4013377926421404, "percentage": 21.48, "elapsed_time": "1:56:47", "remaining_time": "7:07:02", "throughput": 2360.33, "total_tokens": 16539392} {"current_steps": 8595, "total_steps": 40000, "loss": 0.2925, "lr": 4.451812398335592e-05, "epoch": 1.4021535198629578, "percentage": 21.49, "elapsed_time": "1:56:49", "remaining_time": "7:06:51", "throughput": 2360.98, "total_tokens": 16548800} {"current_steps": 8600, "total_steps": 40000, "loss": 0.0708, "lr": 4.4511987784365805e-05, "epoch": 1.4029692470837751, "percentage": 21.5, "elapsed_time": "1:56:51", "remaining_time": "7:06:39", "throughput": 2361.66, "total_tokens": 16558464} {"current_steps": 8600, "total_steps": 40000, "eval_loss": 0.13184860348701477, "epoch": 1.4029692470837751, "percentage": 21.5, "elapsed_time": "1:58:11", "remaining_time": "7:11:33", "throughput": 2334.85, "total_tokens": 16558464} {"current_steps": 8605, "total_steps": 40000, "loss": 0.2357, "lr": 4.450584857638197e-05, "epoch": 1.4037849743045925, "percentage": 21.51, "elapsed_time": "1:58:15", "remaining_time": "7:11:27", "throughput": 2334.85, "total_tokens": 16566832} {"current_steps": 8610, "total_steps": 40000, "loss": 0.078, "lr": 4.449970636035116e-05, "epoch": 1.4046007015254098, "percentage": 21.52, "elapsed_time": "1:58:17", "remaining_time": "7:11:15", "throughput": 2335.58, "total_tokens": 16576912} {"current_steps": 8615, "total_steps": 40000, "loss": 0.0755, "lr": 4.4493561137220574e-05, "epoch": 1.4054164287462272, "percentage": 21.54, "elapsed_time": "1:58:19", "remaining_time": "7:11:04", "throughput": 2336.26, "total_tokens": 16586544} {"current_steps": 8620, "total_steps": 40000, "loss": 0.2003, "lr": 4.44874129079379e-05, "epoch": 1.4062321559670445, "percentage": 21.55, "elapsed_time": "1:58:21", "remaining_time": "7:10:52", "throughput": 2336.89, "total_tokens": 16595808} {"current_steps": 8625, "total_steps": 40000, "loss": 0.0693, "lr": 4.4481261673451255e-05, "epoch": 1.407047883187862, "percentage": 21.56, "elapsed_time": "1:58:23", "remaining_time": "7:10:41", "throughput": 2337.48, "total_tokens": 16604848} {"current_steps": 8630, "total_steps": 40000, "loss": 0.0642, "lr": 4.4475107434709245e-05, "epoch": 1.4078636104086795, "percentage": 21.57, "elapsed_time": "1:58:25", "remaining_time": "7:10:29", "throughput": 2338.15, "total_tokens": 16614480} {"current_steps": 8635, "total_steps": 40000, "loss": 0.0296, "lr": 4.446895019266093e-05, "epoch": 1.4086793376294966, "percentage": 21.59, "elapsed_time": "1:58:27", "remaining_time": "7:10:17", "throughput": 2338.75, "total_tokens": 16623520} {"current_steps": 8640, "total_steps": 40000, "loss": 0.0805, "lr": 4.446278994825583e-05, "epoch": 1.4094950648503142, "percentage": 21.6, "elapsed_time": "1:58:29", "remaining_time": "7:10:06", "throughput": 2339.43, "total_tokens": 16633216} {"current_steps": 8645, "total_steps": 40000, "loss": 0.2129, "lr": 4.445662670244394e-05, "epoch": 1.4103107920711313, "percentage": 21.61, "elapsed_time": "1:58:32", "remaining_time": "7:09:54", "throughput": 2340.09, "total_tokens": 16642768} {"current_steps": 8650, "total_steps": 40000, "loss": 0.2205, "lr": 4.44504604561757e-05, "epoch": 1.4111265192919489, "percentage": 21.62, "elapsed_time": "1:58:34", "remaining_time": "7:09:43", "throughput": 2340.72, "total_tokens": 16652032} {"current_steps": 8655, "total_steps": 40000, "loss": 0.0905, "lr": 4.4444291210402035e-05, "epoch": 1.411942246512766, "percentage": 21.64, "elapsed_time": "1:58:36", "remaining_time": "7:09:31", "throughput": 2341.29, "total_tokens": 16660976} {"current_steps": 8660, "total_steps": 40000, "loss": 0.018, "lr": 4.443811896607431e-05, "epoch": 1.4127579737335836, "percentage": 21.65, "elapsed_time": "1:58:38", "remaining_time": "7:09:20", "throughput": 2341.95, "total_tokens": 16670496} {"current_steps": 8665, "total_steps": 40000, "loss": 0.0921, "lr": 4.443194372414436e-05, "epoch": 1.4135737009544007, "percentage": 21.66, "elapsed_time": "1:58:40", "remaining_time": "7:09:08", "throughput": 2342.78, "total_tokens": 16681232} {"current_steps": 8670, "total_steps": 40000, "loss": 0.1016, "lr": 4.442576548556449e-05, "epoch": 1.4143894281752183, "percentage": 21.68, "elapsed_time": "1:58:42", "remaining_time": "7:08:57", "throughput": 2343.41, "total_tokens": 16690576} {"current_steps": 8675, "total_steps": 40000, "loss": 0.0945, "lr": 4.441958425128747e-05, "epoch": 1.4152051553960356, "percentage": 21.69, "elapsed_time": "1:58:44", "remaining_time": "7:08:45", "throughput": 2344.11, "total_tokens": 16700352} {"current_steps": 8680, "total_steps": 40000, "loss": 0.0114, "lr": 4.4413400022266515e-05, "epoch": 1.416020882616853, "percentage": 21.7, "elapsed_time": "1:58:46", "remaining_time": "7:08:34", "throughput": 2344.95, "total_tokens": 16711264} {"current_steps": 8685, "total_steps": 40000, "loss": 0.0631, "lr": 4.4407212799455313e-05, "epoch": 1.4168366098376703, "percentage": 21.71, "elapsed_time": "1:58:48", "remaining_time": "7:08:22", "throughput": 2345.76, "total_tokens": 16721872} {"current_steps": 8690, "total_steps": 40000, "loss": 0.217, "lr": 4.4401022583808003e-05, "epoch": 1.4176523370584877, "percentage": 21.73, "elapsed_time": "1:58:50", "remaining_time": "7:08:11", "throughput": 2346.35, "total_tokens": 16730912} {"current_steps": 8695, "total_steps": 40000, "loss": 0.1469, "lr": 4.439482937627921e-05, "epoch": 1.418468064279305, "percentage": 21.74, "elapsed_time": "1:58:52", "remaining_time": "7:08:00", "throughput": 2347.18, "total_tokens": 16741632} {"current_steps": 8700, "total_steps": 40000, "loss": 0.1716, "lr": 4.4388633177824004e-05, "epoch": 1.4192837915001224, "percentage": 21.75, "elapsed_time": "1:58:54", "remaining_time": "7:07:48", "throughput": 2347.68, "total_tokens": 16750080} {"current_steps": 8705, "total_steps": 40000, "loss": 0.1829, "lr": 4.4382433989397895e-05, "epoch": 1.4200995187209398, "percentage": 21.76, "elapsed_time": "1:58:56", "remaining_time": "7:07:37", "throughput": 2348.15, "total_tokens": 16758240} {"current_steps": 8710, "total_steps": 40000, "loss": 0.2039, "lr": 4.4376231811956895e-05, "epoch": 1.420915245941757, "percentage": 21.77, "elapsed_time": "1:58:58", "remaining_time": "7:07:25", "throughput": 2348.87, "total_tokens": 16768224} {"current_steps": 8715, "total_steps": 40000, "loss": 0.1878, "lr": 4.437002664645745e-05, "epoch": 1.4217309731625745, "percentage": 21.79, "elapsed_time": "1:59:00", "remaining_time": "7:07:14", "throughput": 2349.65, "total_tokens": 16778688} {"current_steps": 8720, "total_steps": 40000, "loss": 0.155, "lr": 4.436381849385649e-05, "epoch": 1.4225467003833918, "percentage": 21.8, "elapsed_time": "1:59:02", "remaining_time": "7:07:03", "throughput": 2350.33, "total_tokens": 16788400} {"current_steps": 8725, "total_steps": 40000, "loss": 0.1273, "lr": 4.435760735511136e-05, "epoch": 1.4233624276042092, "percentage": 21.81, "elapsed_time": "1:59:05", "remaining_time": "7:06:51", "throughput": 2351.04, "total_tokens": 16798368} {"current_steps": 8730, "total_steps": 40000, "loss": 0.1833, "lr": 4.435139323117992e-05, "epoch": 1.4241781548250265, "percentage": 21.82, "elapsed_time": "1:59:07", "remaining_time": "7:06:40", "throughput": 2351.58, "total_tokens": 16807072} {"current_steps": 8735, "total_steps": 40000, "loss": 0.1023, "lr": 4.434517612302046e-05, "epoch": 1.4249938820458439, "percentage": 21.84, "elapsed_time": "1:59:09", "remaining_time": "7:06:28", "throughput": 2352.45, "total_tokens": 16818112} {"current_steps": 8740, "total_steps": 40000, "loss": 0.1334, "lr": 4.433895603159174e-05, "epoch": 1.4258096092666612, "percentage": 21.85, "elapsed_time": "1:59:11", "remaining_time": "7:06:17", "throughput": 2353.35, "total_tokens": 16829440} {"current_steps": 8745, "total_steps": 40000, "loss": 0.1156, "lr": 4.433273295785296e-05, "epoch": 1.4266253364874786, "percentage": 21.86, "elapsed_time": "1:59:13", "remaining_time": "7:06:06", "throughput": 2354.11, "total_tokens": 16839712} {"current_steps": 8750, "total_steps": 40000, "loss": 0.1234, "lr": 4.432650690276382e-05, "epoch": 1.427441063708296, "percentage": 21.88, "elapsed_time": "1:59:15", "remaining_time": "7:05:55", "throughput": 2354.67, "total_tokens": 16848608} {"current_steps": 8755, "total_steps": 40000, "loss": 0.0971, "lr": 4.4320277867284435e-05, "epoch": 1.4282567909291133, "percentage": 21.89, "elapsed_time": "1:59:17", "remaining_time": "7:05:43", "throughput": 2355.54, "total_tokens": 16859696} {"current_steps": 8760, "total_steps": 40000, "loss": 0.1164, "lr": 4.431404585237541e-05, "epoch": 1.4290725181499306, "percentage": 21.9, "elapsed_time": "1:59:19", "remaining_time": "7:05:32", "throughput": 2356.14, "total_tokens": 16868832} {"current_steps": 8765, "total_steps": 40000, "loss": 0.0671, "lr": 4.43078108589978e-05, "epoch": 1.429888245370748, "percentage": 21.91, "elapsed_time": "1:59:21", "remaining_time": "7:05:21", "throughput": 2356.78, "total_tokens": 16878288} {"current_steps": 8770, "total_steps": 40000, "loss": 0.1301, "lr": 4.4301572888113116e-05, "epoch": 1.4307039725915653, "percentage": 21.93, "elapsed_time": "1:59:23", "remaining_time": "7:05:09", "throughput": 2357.55, "total_tokens": 16888624} {"current_steps": 8775, "total_steps": 40000, "loss": 0.0434, "lr": 4.4295331940683337e-05, "epoch": 1.4315196998123827, "percentage": 21.94, "elapsed_time": "1:59:25", "remaining_time": "7:04:58", "throughput": 2358.14, "total_tokens": 16897744} {"current_steps": 8780, "total_steps": 40000, "loss": 0.0574, "lr": 4.428908801767089e-05, "epoch": 1.4323354270332, "percentage": 21.95, "elapsed_time": "1:59:27", "remaining_time": "7:04:47", "throughput": 2358.84, "total_tokens": 16907632} {"current_steps": 8785, "total_steps": 40000, "loss": 0.075, "lr": 4.428284112003868e-05, "epoch": 1.4331511542540174, "percentage": 21.96, "elapsed_time": "1:59:29", "remaining_time": "7:04:35", "throughput": 2359.43, "total_tokens": 16916752} {"current_steps": 8790, "total_steps": 40000, "loss": 0.1503, "lr": 4.4276591248750033e-05, "epoch": 1.4339668814748348, "percentage": 21.98, "elapsed_time": "1:59:31", "remaining_time": "7:04:24", "throughput": 2360.07, "total_tokens": 16926176} {"current_steps": 8795, "total_steps": 40000, "loss": 0.0425, "lr": 4.4270338404768774e-05, "epoch": 1.434782608695652, "percentage": 21.99, "elapsed_time": "1:59:33", "remaining_time": "7:04:13", "throughput": 2360.81, "total_tokens": 16936336} {"current_steps": 8800, "total_steps": 40000, "loss": 0.0205, "lr": 4.426408258905917e-05, "epoch": 1.4355983359164695, "percentage": 22.0, "elapsed_time": "1:59:36", "remaining_time": "7:04:02", "throughput": 2361.4, "total_tokens": 16945488} {"current_steps": 8800, "total_steps": 40000, "eval_loss": 0.14636728167533875, "epoch": 1.4355983359164695, "percentage": 22.0, "elapsed_time": "2:00:56", "remaining_time": "7:08:48", "throughput": 2335.13, "total_tokens": 16945488} {"current_steps": 8805, "total_steps": 40000, "loss": 0.2288, "lr": 4.425782380258594e-05, "epoch": 1.4364140631372868, "percentage": 22.01, "elapsed_time": "2:01:00", "remaining_time": "7:08:43", "throughput": 2335.22, "total_tokens": 16955408} {"current_steps": 8810, "total_steps": 40000, "loss": 0.0959, "lr": 4.425156204631427e-05, "epoch": 1.4372297903581042, "percentage": 22.02, "elapsed_time": "2:01:02", "remaining_time": "7:08:32", "throughput": 2335.97, "total_tokens": 16965680} {"current_steps": 8815, "total_steps": 40000, "loss": 0.0963, "lr": 4.424529732120981e-05, "epoch": 1.4380455175789217, "percentage": 22.04, "elapsed_time": "2:01:04", "remaining_time": "7:08:21", "throughput": 2336.67, "total_tokens": 16975632} {"current_steps": 8820, "total_steps": 40000, "loss": 0.044, "lr": 4.423902962823864e-05, "epoch": 1.4388612447997389, "percentage": 22.05, "elapsed_time": "2:01:06", "remaining_time": "7:08:09", "throughput": 2337.37, "total_tokens": 16985536} {"current_steps": 8825, "total_steps": 40000, "loss": 0.11, "lr": 4.423275896836733e-05, "epoch": 1.4396769720205564, "percentage": 22.06, "elapsed_time": "2:01:09", "remaining_time": "7:07:58", "throughput": 2337.92, "total_tokens": 16994352} {"current_steps": 8830, "total_steps": 40000, "loss": 0.1348, "lr": 4.42264853425629e-05, "epoch": 1.4404926992413736, "percentage": 22.07, "elapsed_time": "2:01:11", "remaining_time": "7:07:46", "throughput": 2338.72, "total_tokens": 17004960} {"current_steps": 8835, "total_steps": 40000, "loss": 0.1486, "lr": 4.4220208751792816e-05, "epoch": 1.4413084264621912, "percentage": 22.09, "elapsed_time": "2:01:13", "remaining_time": "7:07:35", "throughput": 2339.49, "total_tokens": 17015408} {"current_steps": 8840, "total_steps": 40000, "loss": 0.2395, "lr": 4.421392919702499e-05, "epoch": 1.4421241536830083, "percentage": 22.1, "elapsed_time": "2:01:15", "remaining_time": "7:07:24", "throughput": 2340.09, "total_tokens": 17024656} {"current_steps": 8845, "total_steps": 40000, "loss": 0.1692, "lr": 4.4207646679227846e-05, "epoch": 1.4429398809038259, "percentage": 22.11, "elapsed_time": "2:01:17", "remaining_time": "7:07:12", "throughput": 2340.58, "total_tokens": 17033056} {"current_steps": 8850, "total_steps": 40000, "loss": 0.1047, "lr": 4.42013611993702e-05, "epoch": 1.443755608124643, "percentage": 22.12, "elapsed_time": "2:01:19", "remaining_time": "7:07:01", "throughput": 2341.2, "total_tokens": 17042384} {"current_steps": 8855, "total_steps": 40000, "loss": 0.111, "lr": 4.419507275842135e-05, "epoch": 1.4445713353454606, "percentage": 22.14, "elapsed_time": "2:01:21", "remaining_time": "7:06:50", "throughput": 2341.91, "total_tokens": 17052384} {"current_steps": 8860, "total_steps": 40000, "loss": 0.1329, "lr": 4.418878135735106e-05, "epoch": 1.445387062566278, "percentage": 22.15, "elapsed_time": "2:01:23", "remaining_time": "7:06:39", "throughput": 2342.17, "total_tokens": 17059168} {"current_steps": 8865, "total_steps": 40000, "loss": 0.1371, "lr": 4.418248699712955e-05, "epoch": 1.4462027897870953, "percentage": 22.16, "elapsed_time": "2:01:25", "remaining_time": "7:06:27", "throughput": 2342.91, "total_tokens": 17069408} {"current_steps": 8870, "total_steps": 40000, "loss": 0.0271, "lr": 4.417618967872748e-05, "epoch": 1.4470185170079126, "percentage": 22.18, "elapsed_time": "2:01:27", "remaining_time": "7:06:16", "throughput": 2343.7, "total_tokens": 17080016} {"current_steps": 8875, "total_steps": 40000, "loss": 0.0979, "lr": 4.4169889403115985e-05, "epoch": 1.44783424422873, "percentage": 22.19, "elapsed_time": "2:01:29", "remaining_time": "7:06:05", "throughput": 2344.4, "total_tokens": 17089968} {"current_steps": 8880, "total_steps": 40000, "loss": 0.1198, "lr": 4.4163586171266627e-05, "epoch": 1.4486499714495473, "percentage": 22.2, "elapsed_time": "2:01:31", "remaining_time": "7:05:54", "throughput": 2345.04, "total_tokens": 17099504} {"current_steps": 8885, "total_steps": 40000, "loss": 0.2152, "lr": 4.415727998415147e-05, "epoch": 1.4494656986703647, "percentage": 22.21, "elapsed_time": "2:01:33", "remaining_time": "7:05:42", "throughput": 2345.8, "total_tokens": 17109840} {"current_steps": 8890, "total_steps": 40000, "loss": 0.0588, "lr": 4.4150970842742985e-05, "epoch": 1.450281425891182, "percentage": 22.23, "elapsed_time": "2:01:35", "remaining_time": "7:05:31", "throughput": 2346.34, "total_tokens": 17118640} {"current_steps": 8895, "total_steps": 40000, "loss": 0.051, "lr": 4.4144658748014134e-05, "epoch": 1.4510971531119994, "percentage": 22.24, "elapsed_time": "2:01:37", "remaining_time": "7:05:20", "throughput": 2347.04, "total_tokens": 17128608} {"current_steps": 8900, "total_steps": 40000, "loss": 0.1571, "lr": 4.413834370093831e-05, "epoch": 1.4519128803328167, "percentage": 22.25, "elapsed_time": "2:01:40", "remaining_time": "7:05:09", "throughput": 2347.72, "total_tokens": 17138384} {"current_steps": 8905, "total_steps": 40000, "loss": 0.2274, "lr": 4.413202570248939e-05, "epoch": 1.452728607553634, "percentage": 22.26, "elapsed_time": "2:01:42", "remaining_time": "7:04:57", "throughput": 2348.45, "total_tokens": 17148560} {"current_steps": 8910, "total_steps": 40000, "loss": 0.2292, "lr": 4.412570475364167e-05, "epoch": 1.4535443347744514, "percentage": 22.27, "elapsed_time": "2:01:44", "remaining_time": "7:04:46", "throughput": 2349.24, "total_tokens": 17159232} {"current_steps": 8915, "total_steps": 40000, "loss": 0.1101, "lr": 4.411938085536994e-05, "epoch": 1.4543600619952688, "percentage": 22.29, "elapsed_time": "2:01:46", "remaining_time": "7:04:35", "throughput": 2349.86, "total_tokens": 17168640} {"current_steps": 8920, "total_steps": 40000, "loss": 0.128, "lr": 4.41130540086494e-05, "epoch": 1.4551757892160861, "percentage": 22.3, "elapsed_time": "2:01:48", "remaining_time": "7:04:24", "throughput": 2350.48, "total_tokens": 17178000} {"current_steps": 8925, "total_steps": 40000, "loss": 0.0545, "lr": 4.4106724214455754e-05, "epoch": 1.4559915164369035, "percentage": 22.31, "elapsed_time": "2:01:50", "remaining_time": "7:04:13", "throughput": 2351.22, "total_tokens": 17188272} {"current_steps": 8930, "total_steps": 40000, "loss": 0.0785, "lr": 4.4100391473765115e-05, "epoch": 1.4568072436577209, "percentage": 22.32, "elapsed_time": "2:01:52", "remaining_time": "7:04:02", "throughput": 2352.02, "total_tokens": 17199008} {"current_steps": 8935, "total_steps": 40000, "loss": 0.1046, "lr": 4.409405578755408e-05, "epoch": 1.4576229708785382, "percentage": 22.34, "elapsed_time": "2:01:54", "remaining_time": "7:03:50", "throughput": 2352.72, "total_tokens": 17209008} {"current_steps": 8940, "total_steps": 40000, "loss": 0.0272, "lr": 4.4087717156799705e-05, "epoch": 1.4584386980993556, "percentage": 22.35, "elapsed_time": "2:01:56", "remaining_time": "7:03:39", "throughput": 2353.53, "total_tokens": 17219760} {"current_steps": 8945, "total_steps": 40000, "loss": 0.1466, "lr": 4.408137558247946e-05, "epoch": 1.459254425320173, "percentage": 22.36, "elapsed_time": "2:01:58", "remaining_time": "7:03:28", "throughput": 2354.23, "total_tokens": 17229728} {"current_steps": 8950, "total_steps": 40000, "loss": 0.2147, "lr": 4.4075031065571306e-05, "epoch": 1.4600701525409903, "percentage": 22.38, "elapsed_time": "2:02:00", "remaining_time": "7:03:17", "throughput": 2354.94, "total_tokens": 17239888} {"current_steps": 8955, "total_steps": 40000, "loss": 0.2188, "lr": 4.406868360705366e-05, "epoch": 1.4608858797618076, "percentage": 22.39, "elapsed_time": "2:02:02", "remaining_time": "7:03:06", "throughput": 2355.57, "total_tokens": 17249376} {"current_steps": 8960, "total_steps": 40000, "loss": 0.2185, "lr": 4.406233320790536e-05, "epoch": 1.461701606982625, "percentage": 22.4, "elapsed_time": "2:02:04", "remaining_time": "7:02:55", "throughput": 2356.23, "total_tokens": 17259072} {"current_steps": 8965, "total_steps": 40000, "loss": 0.0555, "lr": 4.4055979869105734e-05, "epoch": 1.4625173342034423, "percentage": 22.41, "elapsed_time": "2:02:06", "remaining_time": "7:02:44", "throughput": 2357.02, "total_tokens": 17269680} {"current_steps": 8970, "total_steps": 40000, "loss": 0.0918, "lr": 4.404962359163454e-05, "epoch": 1.4633330614242597, "percentage": 22.43, "elapsed_time": "2:02:08", "remaining_time": "7:02:33", "throughput": 2357.62, "total_tokens": 17278944} {"current_steps": 8975, "total_steps": 40000, "loss": 0.0348, "lr": 4.404326437647199e-05, "epoch": 1.464148788645077, "percentage": 22.44, "elapsed_time": "2:02:11", "remaining_time": "7:02:22", "throughput": 2358.24, "total_tokens": 17288384} {"current_steps": 8980, "total_steps": 40000, "loss": 0.0263, "lr": 4.403690222459877e-05, "epoch": 1.4649645158658944, "percentage": 22.45, "elapsed_time": "2:02:13", "remaining_time": "7:02:11", "throughput": 2358.91, "total_tokens": 17298176} {"current_steps": 8985, "total_steps": 40000, "loss": 0.121, "lr": 4.4030537136995984e-05, "epoch": 1.4657802430867117, "percentage": 22.46, "elapsed_time": "2:02:15", "remaining_time": "7:02:00", "throughput": 2359.65, "total_tokens": 17308416} {"current_steps": 8990, "total_steps": 40000, "loss": 0.2046, "lr": 4.402416911464523e-05, "epoch": 1.466595970307529, "percentage": 22.48, "elapsed_time": "2:02:17", "remaining_time": "7:01:49", "throughput": 2360.24, "total_tokens": 17317664} {"current_steps": 8995, "total_steps": 40000, "loss": 0.0431, "lr": 4.4017798158528516e-05, "epoch": 1.4674116975283464, "percentage": 22.49, "elapsed_time": "2:02:19", "remaining_time": "7:01:38", "throughput": 2361.02, "total_tokens": 17328256} {"current_steps": 9000, "total_steps": 40000, "loss": 0.1505, "lr": 4.401142426962834e-05, "epoch": 1.468227424749164, "percentage": 22.5, "elapsed_time": "2:02:21", "remaining_time": "7:01:26", "throughput": 2361.79, "total_tokens": 17338800} {"current_steps": 9000, "total_steps": 40000, "eval_loss": 0.1334640234708786, "epoch": 1.468227424749164, "percentage": 22.5, "elapsed_time": "2:03:41", "remaining_time": "7:06:04", "throughput": 2336.18, "total_tokens": 17338800} {"current_steps": 9005, "total_steps": 40000, "loss": 0.1247, "lr": 4.400504744892763e-05, "epoch": 1.4690431519699811, "percentage": 22.51, "elapsed_time": "2:03:45", "remaining_time": "7:05:58", "throughput": 2336.24, "total_tokens": 17348048} {"current_steps": 9010, "total_steps": 40000, "loss": 0.0817, "lr": 4.399866769740975e-05, "epoch": 1.4698588791907987, "percentage": 22.53, "elapsed_time": "2:03:47", "remaining_time": "7:05:47", "throughput": 2336.71, "total_tokens": 17356416} {"current_steps": 9015, "total_steps": 40000, "loss": 0.1488, "lr": 4.399228501605859e-05, "epoch": 1.4706746064116158, "percentage": 22.54, "elapsed_time": "2:03:49", "remaining_time": "7:05:36", "throughput": 2337.09, "total_tokens": 17364048} {"current_steps": 9020, "total_steps": 40000, "loss": 0.1536, "lr": 4.398589940585839e-05, "epoch": 1.4714903336324334, "percentage": 22.55, "elapsed_time": "2:03:51", "remaining_time": "7:05:25", "throughput": 2337.82, "total_tokens": 17374272} {"current_steps": 9025, "total_steps": 40000, "loss": 0.1052, "lr": 4.3979510867793917e-05, "epoch": 1.4723060608532506, "percentage": 22.56, "elapsed_time": "2:03:53", "remaining_time": "7:05:14", "throughput": 2338.36, "total_tokens": 17383152} {"current_steps": 9030, "total_steps": 40000, "loss": 0.2641, "lr": 4.3973119402850346e-05, "epoch": 1.4731217880740681, "percentage": 22.57, "elapsed_time": "2:03:55", "remaining_time": "7:05:03", "throughput": 2338.85, "total_tokens": 17391632} {"current_steps": 9035, "total_steps": 40000, "loss": 0.1711, "lr": 4.396672501201334e-05, "epoch": 1.4739375152948853, "percentage": 22.59, "elapsed_time": "2:03:58", "remaining_time": "7:04:51", "throughput": 2339.46, "total_tokens": 17400960} {"current_steps": 9040, "total_steps": 40000, "loss": 0.1812, "lr": 4.396032769626899e-05, "epoch": 1.4747532425157028, "percentage": 22.6, "elapsed_time": "2:04:00", "remaining_time": "7:04:40", "throughput": 2339.96, "total_tokens": 17409584} {"current_steps": 9045, "total_steps": 40000, "loss": 0.0723, "lr": 4.395392745660384e-05, "epoch": 1.4755689697365202, "percentage": 22.61, "elapsed_time": "2:04:02", "remaining_time": "7:04:29", "throughput": 2340.66, "total_tokens": 17419616} {"current_steps": 9050, "total_steps": 40000, "loss": 0.1415, "lr": 4.394752429400488e-05, "epoch": 1.4763846969573375, "percentage": 22.62, "elapsed_time": "2:04:04", "remaining_time": "7:04:18", "throughput": 2341.39, "total_tokens": 17429856} {"current_steps": 9055, "total_steps": 40000, "loss": 0.0985, "lr": 4.394111820945957e-05, "epoch": 1.477200424178155, "percentage": 22.64, "elapsed_time": "2:04:06", "remaining_time": "7:04:07", "throughput": 2342.26, "total_tokens": 17441168} {"current_steps": 9060, "total_steps": 40000, "loss": 0.1539, "lr": 4.393470920395579e-05, "epoch": 1.4780161513989722, "percentage": 22.65, "elapsed_time": "2:04:08", "remaining_time": "7:03:56", "throughput": 2342.75, "total_tokens": 17449712} {"current_steps": 9065, "total_steps": 40000, "loss": 0.1548, "lr": 4.392829727848192e-05, "epoch": 1.4788318786197896, "percentage": 22.66, "elapsed_time": "2:04:10", "remaining_time": "7:03:45", "throughput": 2343.38, "total_tokens": 17459232} {"current_steps": 9070, "total_steps": 40000, "loss": 0.0875, "lr": 4.392188243402673e-05, "epoch": 1.479647605840607, "percentage": 22.68, "elapsed_time": "2:04:12", "remaining_time": "7:03:34", "throughput": 2344.06, "total_tokens": 17469120} {"current_steps": 9075, "total_steps": 40000, "loss": 0.0684, "lr": 4.391546467157949e-05, "epoch": 1.4804633330614243, "percentage": 22.69, "elapsed_time": "2:04:14", "remaining_time": "7:03:23", "throughput": 2344.8, "total_tokens": 17479472} {"current_steps": 9080, "total_steps": 40000, "loss": 0.0544, "lr": 4.390904399212988e-05, "epoch": 1.4812790602822417, "percentage": 22.7, "elapsed_time": "2:04:16", "remaining_time": "7:03:12", "throughput": 2345.61, "total_tokens": 17490384} {"current_steps": 9085, "total_steps": 40000, "loss": 0.0463, "lr": 4.390262039666807e-05, "epoch": 1.482094787503059, "percentage": 22.71, "elapsed_time": "2:04:18", "remaining_time": "7:03:01", "throughput": 2346.4, "total_tokens": 17501168} {"current_steps": 9090, "total_steps": 40000, "loss": 0.2169, "lr": 4.389619388618464e-05, "epoch": 1.4829105147238764, "percentage": 22.73, "elapsed_time": "2:04:20", "remaining_time": "7:02:49", "throughput": 2346.95, "total_tokens": 17510112} {"current_steps": 9095, "total_steps": 40000, "loss": 0.1051, "lr": 4.3889764461670655e-05, "epoch": 1.4837262419446937, "percentage": 22.74, "elapsed_time": "2:04:22", "remaining_time": "7:02:38", "throughput": 2347.66, "total_tokens": 17520288} {"current_steps": 9100, "total_steps": 40000, "loss": 0.0903, "lr": 4.38833321241176e-05, "epoch": 1.484541969165511, "percentage": 22.75, "elapsed_time": "2:04:24", "remaining_time": "7:02:27", "throughput": 2348.41, "total_tokens": 17530704} {"current_steps": 9105, "total_steps": 40000, "loss": 0.155, "lr": 4.3876896874517434e-05, "epoch": 1.4853576963863284, "percentage": 22.76, "elapsed_time": "2:04:26", "remaining_time": "7:02:16", "throughput": 2349.05, "total_tokens": 17540320} {"current_steps": 9110, "total_steps": 40000, "loss": 0.1527, "lr": 4.3870458713862554e-05, "epoch": 1.4861734236071458, "percentage": 22.78, "elapsed_time": "2:04:29", "remaining_time": "7:02:05", "throughput": 2349.73, "total_tokens": 17550240} {"current_steps": 9115, "total_steps": 40000, "loss": 0.0659, "lr": 4.386401764314579e-05, "epoch": 1.4869891508279631, "percentage": 22.79, "elapsed_time": "2:04:31", "remaining_time": "7:01:54", "throughput": 2350.27, "total_tokens": 17559200} {"current_steps": 9120, "total_steps": 40000, "loss": 0.127, "lr": 4.385757366336045e-05, "epoch": 1.4878048780487805, "percentage": 22.8, "elapsed_time": "2:04:33", "remaining_time": "7:01:44", "throughput": 2351.04, "total_tokens": 17569824} {"current_steps": 9125, "total_steps": 40000, "loss": 0.3851, "lr": 4.385112677550027e-05, "epoch": 1.4886206052695978, "percentage": 22.81, "elapsed_time": "2:04:35", "remaining_time": "7:01:33", "throughput": 2351.81, "total_tokens": 17580400} {"current_steps": 9130, "total_steps": 40000, "loss": 0.1062, "lr": 4.384467698055945e-05, "epoch": 1.4894363324904152, "percentage": 22.82, "elapsed_time": "2:04:37", "remaining_time": "7:01:22", "throughput": 2352.5, "total_tokens": 17590400} {"current_steps": 9135, "total_steps": 40000, "loss": 0.087, "lr": 4.383822427953261e-05, "epoch": 1.4902520597112325, "percentage": 22.84, "elapsed_time": "2:04:39", "remaining_time": "7:01:11", "throughput": 2353.23, "total_tokens": 17600768} {"current_steps": 9140, "total_steps": 40000, "loss": 0.1478, "lr": 4.3831768673414864e-05, "epoch": 1.4910677869320499, "percentage": 22.85, "elapsed_time": "2:04:41", "remaining_time": "7:01:00", "throughput": 2354.02, "total_tokens": 17611568} {"current_steps": 9145, "total_steps": 40000, "loss": 0.0483, "lr": 4.382531016320173e-05, "epoch": 1.4918835141528672, "percentage": 22.86, "elapsed_time": "2:04:43", "remaining_time": "7:00:49", "throughput": 2354.84, "total_tokens": 17622560} {"current_steps": 9150, "total_steps": 40000, "loss": 0.063, "lr": 4.3818848749889184e-05, "epoch": 1.4926992413736846, "percentage": 22.88, "elapsed_time": "2:04:45", "remaining_time": "7:00:38", "throughput": 2355.75, "total_tokens": 17634256} {"current_steps": 9155, "total_steps": 40000, "loss": 0.1469, "lr": 4.381238443447368e-05, "epoch": 1.493514968594502, "percentage": 22.89, "elapsed_time": "2:04:47", "remaining_time": "7:00:27", "throughput": 2356.29, "total_tokens": 17643168} {"current_steps": 9160, "total_steps": 40000, "loss": 0.0537, "lr": 4.380591721795208e-05, "epoch": 1.4943306958153193, "percentage": 22.9, "elapsed_time": "2:04:49", "remaining_time": "7:00:16", "throughput": 2356.95, "total_tokens": 17652976} {"current_steps": 9165, "total_steps": 40000, "loss": 0.1742, "lr": 4.3799447101321723e-05, "epoch": 1.4951464230361367, "percentage": 22.91, "elapsed_time": "2:04:51", "remaining_time": "7:00:05", "throughput": 2357.54, "total_tokens": 17662256} {"current_steps": 9170, "total_steps": 40000, "loss": 0.1801, "lr": 4.379297408558036e-05, "epoch": 1.495962150256954, "percentage": 22.93, "elapsed_time": "2:04:53", "remaining_time": "6:59:54", "throughput": 2358.04, "total_tokens": 17670848} {"current_steps": 9175, "total_steps": 40000, "loss": 0.1709, "lr": 4.378649817172624e-05, "epoch": 1.4967778774777714, "percentage": 22.94, "elapsed_time": "2:04:55", "remaining_time": "6:59:43", "throughput": 2358.64, "total_tokens": 17680240} {"current_steps": 9180, "total_steps": 40000, "loss": 0.0532, "lr": 4.378001936075801e-05, "epoch": 1.4975936046985887, "percentage": 22.95, "elapsed_time": "2:04:58", "remaining_time": "6:59:33", "throughput": 2359.47, "total_tokens": 17691360} {"current_steps": 9185, "total_steps": 40000, "loss": 0.0828, "lr": 4.377353765367479e-05, "epoch": 1.4984093319194063, "percentage": 22.96, "elapsed_time": "2:05:00", "remaining_time": "6:59:22", "throughput": 2360.12, "total_tokens": 17701120} {"current_steps": 9190, "total_steps": 40000, "loss": 0.177, "lr": 4.376705305147614e-05, "epoch": 1.4992250591402234, "percentage": 22.98, "elapsed_time": "2:05:02", "remaining_time": "6:59:11", "throughput": 2360.68, "total_tokens": 17710144} {"current_steps": 9195, "total_steps": 40000, "loss": 0.1184, "lr": 4.376056555516206e-05, "epoch": 1.500040786361041, "percentage": 22.99, "elapsed_time": "2:05:04", "remaining_time": "6:59:00", "throughput": 2361.22, "total_tokens": 17719104} {"current_steps": 9200, "total_steps": 40000, "loss": 0.1536, "lr": 4.375407516573302e-05, "epoch": 1.5008565135818581, "percentage": 23.0, "elapsed_time": "2:05:06", "remaining_time": "6:58:49", "throughput": 2361.9, "total_tokens": 17729104} {"current_steps": 9200, "total_steps": 40000, "eval_loss": 0.13459455966949463, "epoch": 1.5008565135818581, "percentage": 23.0, "elapsed_time": "2:06:26", "remaining_time": "7:03:19", "throughput": 2336.83, "total_tokens": 17729104} {"current_steps": 9205, "total_steps": 40000, "loss": 0.1654, "lr": 4.3747581884189913e-05, "epoch": 1.5016722408026757, "percentage": 23.01, "elapsed_time": "2:06:30", "remaining_time": "7:03:14", "throughput": 2336.93, "total_tokens": 17738768} {"current_steps": 9210, "total_steps": 40000, "loss": 0.0586, "lr": 4.374108571153408e-05, "epoch": 1.5024879680234928, "percentage": 23.03, "elapsed_time": "2:06:32", "remaining_time": "7:03:03", "throughput": 2337.62, "total_tokens": 17748864} {"current_steps": 9215, "total_steps": 40000, "loss": 0.046, "lr": 4.3734586648767316e-05, "epoch": 1.5033036952443104, "percentage": 23.04, "elapsed_time": "2:06:34", "remaining_time": "7:02:52", "throughput": 2338.17, "total_tokens": 17757872} {"current_steps": 9220, "total_steps": 40000, "loss": 0.065, "lr": 4.372808469689186e-05, "epoch": 1.5041194224651275, "percentage": 23.05, "elapsed_time": "2:06:36", "remaining_time": "7:02:41", "throughput": 2338.91, "total_tokens": 17768320} {"current_steps": 9225, "total_steps": 40000, "loss": 0.0431, "lr": 4.372157985691039e-05, "epoch": 1.504935149685945, "percentage": 23.06, "elapsed_time": "2:06:38", "remaining_time": "7:02:30", "throughput": 2339.42, "total_tokens": 17776992} {"current_steps": 9230, "total_steps": 40000, "loss": 0.0876, "lr": 4.371507212982603e-05, "epoch": 1.5057508769067622, "percentage": 23.08, "elapsed_time": "2:06:40", "remaining_time": "7:02:19", "throughput": 2340.08, "total_tokens": 17786864} {"current_steps": 9235, "total_steps": 40000, "loss": 0.1892, "lr": 4.370856151664236e-05, "epoch": 1.5065666041275798, "percentage": 23.09, "elapsed_time": "2:06:43", "remaining_time": "7:02:08", "throughput": 2340.81, "total_tokens": 17797248} {"current_steps": 9240, "total_steps": 40000, "loss": 0.0699, "lr": 4.3702048018363404e-05, "epoch": 1.507382331348397, "percentage": 23.1, "elapsed_time": "2:06:45", "remaining_time": "7:01:57", "throughput": 2341.38, "total_tokens": 17806480} {"current_steps": 9245, "total_steps": 40000, "loss": 0.1438, "lr": 4.369553163599362e-05, "epoch": 1.5081980585692145, "percentage": 23.11, "elapsed_time": "2:06:47", "remaining_time": "7:01:46", "throughput": 2342.18, "total_tokens": 17817376} {"current_steps": 9250, "total_steps": 40000, "loss": 0.0859, "lr": 4.3689012370537904e-05, "epoch": 1.5090137857900316, "percentage": 23.12, "elapsed_time": "2:06:49", "remaining_time": "7:01:35", "throughput": 2342.71, "total_tokens": 17826224} {"current_steps": 9255, "total_steps": 40000, "loss": 0.0739, "lr": 4.368249022300164e-05, "epoch": 1.5098295130108492, "percentage": 23.14, "elapsed_time": "2:06:51", "remaining_time": "7:01:24", "throughput": 2343.42, "total_tokens": 17836528} {"current_steps": 9260, "total_steps": 40000, "loss": 0.2231, "lr": 4.367596519439059e-05, "epoch": 1.5106452402316666, "percentage": 23.15, "elapsed_time": "2:06:53", "remaining_time": "7:01:13", "throughput": 2343.94, "total_tokens": 17845312} {"current_steps": 9265, "total_steps": 40000, "loss": 0.1273, "lr": 4.366943728571101e-05, "epoch": 1.511460967452484, "percentage": 23.16, "elapsed_time": "2:06:55", "remaining_time": "7:01:02", "throughput": 2344.36, "total_tokens": 17853360} {"current_steps": 9270, "total_steps": 40000, "loss": 0.1099, "lr": 4.366290649796959e-05, "epoch": 1.5122766946733013, "percentage": 23.18, "elapsed_time": "2:06:57", "remaining_time": "7:00:52", "throughput": 2344.91, "total_tokens": 17862336} {"current_steps": 9275, "total_steps": 40000, "loss": 0.3006, "lr": 4.3656372832173456e-05, "epoch": 1.5130924218941186, "percentage": 23.19, "elapsed_time": "2:06:59", "remaining_time": "7:00:41", "throughput": 2345.57, "total_tokens": 17872224} {"current_steps": 9280, "total_steps": 40000, "loss": 0.0719, "lr": 4.364983628933017e-05, "epoch": 1.513908149114936, "percentage": 23.2, "elapsed_time": "2:07:01", "remaining_time": "7:00:30", "throughput": 2346.19, "total_tokens": 17881872} {"current_steps": 9285, "total_steps": 40000, "loss": 0.2299, "lr": 4.364329687044777e-05, "epoch": 1.5147238763357533, "percentage": 23.21, "elapsed_time": "2:07:03", "remaining_time": "7:00:19", "throughput": 2346.74, "total_tokens": 17890944} {"current_steps": 9290, "total_steps": 40000, "loss": 0.0993, "lr": 4.36367545765347e-05, "epoch": 1.5155396035565707, "percentage": 23.23, "elapsed_time": "2:07:05", "remaining_time": "7:00:08", "throughput": 2347.21, "total_tokens": 17899392} {"current_steps": 9295, "total_steps": 40000, "loss": 0.3937, "lr": 4.363020940859988e-05, "epoch": 1.516355330777388, "percentage": 23.24, "elapsed_time": "2:07:07", "remaining_time": "6:59:57", "throughput": 2347.76, "total_tokens": 17908432} {"current_steps": 9300, "total_steps": 40000, "loss": 0.1153, "lr": 4.362366136765263e-05, "epoch": 1.5171710579982054, "percentage": 23.25, "elapsed_time": "2:07:09", "remaining_time": "6:59:46", "throughput": 2348.39, "total_tokens": 17918080} {"current_steps": 9305, "total_steps": 40000, "loss": 0.138, "lr": 4.361711045470278e-05, "epoch": 1.5179867852190227, "percentage": 23.26, "elapsed_time": "2:07:12", "remaining_time": "6:59:36", "throughput": 2348.96, "total_tokens": 17927280} {"current_steps": 9310, "total_steps": 40000, "loss": 0.1204, "lr": 4.3610556670760524e-05, "epoch": 1.51880251243984, "percentage": 23.28, "elapsed_time": "2:07:14", "remaining_time": "6:59:25", "throughput": 2349.66, "total_tokens": 17937456} {"current_steps": 9315, "total_steps": 40000, "loss": 0.128, "lr": 4.360400001683657e-05, "epoch": 1.5196182396606575, "percentage": 23.29, "elapsed_time": "2:07:16", "remaining_time": "6:59:14", "throughput": 2350.32, "total_tokens": 17947440} {"current_steps": 9320, "total_steps": 40000, "loss": 0.0802, "lr": 4.3597440493942e-05, "epoch": 1.5204339668814748, "percentage": 23.3, "elapsed_time": "2:07:18", "remaining_time": "6:59:03", "throughput": 2350.95, "total_tokens": 17957056} {"current_steps": 9325, "total_steps": 40000, "loss": 0.1087, "lr": 4.3590878103088405e-05, "epoch": 1.5212496941022922, "percentage": 23.31, "elapsed_time": "2:07:20", "remaining_time": "6:58:53", "throughput": 2351.58, "total_tokens": 17966752} {"current_steps": 9330, "total_steps": 40000, "loss": 0.0604, "lr": 4.358431284528779e-05, "epoch": 1.5220654213231095, "percentage": 23.33, "elapsed_time": "2:07:22", "remaining_time": "6:58:42", "throughput": 2352.16, "total_tokens": 17975984} {"current_steps": 9335, "total_steps": 40000, "loss": 0.1157, "lr": 4.357774472155257e-05, "epoch": 1.5228811485439269, "percentage": 23.34, "elapsed_time": "2:07:24", "remaining_time": "6:58:31", "throughput": 2352.74, "total_tokens": 17985296} {"current_steps": 9340, "total_steps": 40000, "loss": 0.1343, "lr": 4.3571173732895664e-05, "epoch": 1.5236968757647442, "percentage": 23.35, "elapsed_time": "2:07:26", "remaining_time": "6:58:20", "throughput": 2353.29, "total_tokens": 17994304} {"current_steps": 9345, "total_steps": 40000, "loss": 0.0533, "lr": 4.356459988033039e-05, "epoch": 1.5245126029855616, "percentage": 23.36, "elapsed_time": "2:07:28", "remaining_time": "6:58:09", "throughput": 2353.76, "total_tokens": 18002768} {"current_steps": 9350, "total_steps": 40000, "loss": 0.166, "lr": 4.355802316487051e-05, "epoch": 1.5253283302063791, "percentage": 23.38, "elapsed_time": "2:07:30", "remaining_time": "6:57:59", "throughput": 2354.17, "total_tokens": 18010816} {"current_steps": 9355, "total_steps": 40000, "loss": 0.2233, "lr": 4.355144358753025e-05, "epoch": 1.5261440574271963, "percentage": 23.39, "elapsed_time": "2:07:32", "remaining_time": "6:57:48", "throughput": 2354.82, "total_tokens": 18020640} {"current_steps": 9360, "total_steps": 40000, "loss": 0.1569, "lr": 4.354486114932425e-05, "epoch": 1.5269597846480139, "percentage": 23.4, "elapsed_time": "2:07:34", "remaining_time": "6:57:37", "throughput": 2355.5, "total_tokens": 18030720} {"current_steps": 9365, "total_steps": 40000, "loss": 0.0395, "lr": 4.353827585126762e-05, "epoch": 1.527775511868831, "percentage": 23.41, "elapsed_time": "2:07:36", "remaining_time": "6:57:27", "throughput": 2356.15, "total_tokens": 18040576} {"current_steps": 9370, "total_steps": 40000, "loss": 0.1186, "lr": 4.353168769437588e-05, "epoch": 1.5285912390896486, "percentage": 23.43, "elapsed_time": "2:07:38", "remaining_time": "6:57:16", "throughput": 2356.69, "total_tokens": 18049552} {"current_steps": 9375, "total_steps": 40000, "loss": 0.0945, "lr": 4.3525096679665014e-05, "epoch": 1.5294069663104657, "percentage": 23.44, "elapsed_time": "2:07:40", "remaining_time": "6:57:05", "throughput": 2357.3, "total_tokens": 18059104} {"current_steps": 9380, "total_steps": 40000, "loss": 0.1083, "lr": 4.351850280815144e-05, "epoch": 1.5302226935312833, "percentage": 23.45, "elapsed_time": "2:07:42", "remaining_time": "6:56:54", "throughput": 2357.93, "total_tokens": 18068736} {"current_steps": 9385, "total_steps": 40000, "loss": 0.0942, "lr": 4.3511906080852014e-05, "epoch": 1.5310384207521004, "percentage": 23.46, "elapsed_time": "2:07:45", "remaining_time": "6:56:44", "throughput": 2358.48, "total_tokens": 18077904} {"current_steps": 9390, "total_steps": 40000, "loss": 0.1248, "lr": 4.350530649878404e-05, "epoch": 1.531854147972918, "percentage": 23.47, "elapsed_time": "2:07:47", "remaining_time": "6:56:33", "throughput": 2359.18, "total_tokens": 18088144} {"current_steps": 9395, "total_steps": 40000, "loss": 0.1571, "lr": 4.3498704062965246e-05, "epoch": 1.532669875193735, "percentage": 23.49, "elapsed_time": "2:07:49", "remaining_time": "6:56:22", "throughput": 2359.77, "total_tokens": 18097536} {"current_steps": 9400, "total_steps": 40000, "loss": 0.1391, "lr": 4.3492098774413815e-05, "epoch": 1.5334856024145527, "percentage": 23.5, "elapsed_time": "2:07:51", "remaining_time": "6:56:12", "throughput": 2360.42, "total_tokens": 18107328} {"current_steps": 9400, "total_steps": 40000, "eval_loss": 0.15398284792900085, "epoch": 1.5334856024145527, "percentage": 23.5, "elapsed_time": "2:09:11", "remaining_time": "7:00:34", "throughput": 2335.88, "total_tokens": 18107328} {"current_steps": 9405, "total_steps": 40000, "loss": 0.1986, "lr": 4.3485490634148375e-05, "epoch": 1.5343013296353698, "percentage": 23.51, "elapsed_time": "2:09:15", "remaining_time": "7:00:28", "throughput": 2336.11, "total_tokens": 18117552} {"current_steps": 9410, "total_steps": 40000, "loss": 0.0903, "lr": 4.347887964318797e-05, "epoch": 1.5351170568561874, "percentage": 23.52, "elapsed_time": "2:09:17", "remaining_time": "7:00:18", "throughput": 2336.64, "total_tokens": 18126496} {"current_steps": 9415, "total_steps": 40000, "loss": 0.0631, "lr": 4.34722658025521e-05, "epoch": 1.5359327840770045, "percentage": 23.54, "elapsed_time": "2:09:19", "remaining_time": "7:00:07", "throughput": 2337.3, "total_tokens": 18136480} {"current_steps": 9420, "total_steps": 40000, "loss": 0.0333, "lr": 4.346564911326071e-05, "epoch": 1.536748511297822, "percentage": 23.55, "elapsed_time": "2:09:21", "remaining_time": "6:59:56", "throughput": 2338.03, "total_tokens": 18146976} {"current_steps": 9425, "total_steps": 40000, "loss": 0.0401, "lr": 4.345902957633418e-05, "epoch": 1.5375642385186392, "percentage": 23.56, "elapsed_time": "2:09:23", "remaining_time": "6:59:45", "throughput": 2338.72, "total_tokens": 18157120} {"current_steps": 9430, "total_steps": 40000, "loss": 0.0654, "lr": 4.345240719279331e-05, "epoch": 1.5383799657394568, "percentage": 23.57, "elapsed_time": "2:09:25", "remaining_time": "6:59:34", "throughput": 2339.24, "total_tokens": 18166000} {"current_steps": 9435, "total_steps": 40000, "loss": 0.0794, "lr": 4.3445781963659374e-05, "epoch": 1.539195692960274, "percentage": 23.59, "elapsed_time": "2:09:27", "remaining_time": "6:59:24", "throughput": 2339.92, "total_tokens": 18176128} {"current_steps": 9440, "total_steps": 40000, "loss": 0.1045, "lr": 4.3439153889954045e-05, "epoch": 1.5400114201810915, "percentage": 23.6, "elapsed_time": "2:09:29", "remaining_time": "6:59:13", "throughput": 2340.62, "total_tokens": 18186416} {"current_steps": 9445, "total_steps": 40000, "loss": 0.2015, "lr": 4.343252297269946e-05, "epoch": 1.5408271474019088, "percentage": 23.61, "elapsed_time": "2:09:31", "remaining_time": "6:59:02", "throughput": 2341.25, "total_tokens": 18196144} {"current_steps": 9450, "total_steps": 40000, "loss": 0.2613, "lr": 4.342588921291821e-05, "epoch": 1.5416428746227262, "percentage": 23.62, "elapsed_time": "2:09:34", "remaining_time": "6:58:51", "throughput": 2341.99, "total_tokens": 18206720} {"current_steps": 9455, "total_steps": 40000, "loss": 0.1021, "lr": 4.341925261163328e-05, "epoch": 1.5424586018435436, "percentage": 23.64, "elapsed_time": "2:09:36", "remaining_time": "6:58:41", "throughput": 2342.53, "total_tokens": 18215760} {"current_steps": 9460, "total_steps": 40000, "loss": 0.0735, "lr": 4.341261316986813e-05, "epoch": 1.543274329064361, "percentage": 23.65, "elapsed_time": "2:09:38", "remaining_time": "6:58:30", "throughput": 2343.18, "total_tokens": 18225664} {"current_steps": 9465, "total_steps": 40000, "loss": 0.0811, "lr": 4.340597088864664e-05, "epoch": 1.5440900562851783, "percentage": 23.66, "elapsed_time": "2:09:40", "remaining_time": "6:58:19", "throughput": 2343.91, "total_tokens": 18236224} {"current_steps": 9470, "total_steps": 40000, "loss": 0.0938, "lr": 4.339932576899313e-05, "epoch": 1.5449057835059956, "percentage": 23.67, "elapsed_time": "2:09:42", "remaining_time": "6:58:09", "throughput": 2344.57, "total_tokens": 18246192} {"current_steps": 9475, "total_steps": 40000, "loss": 0.139, "lr": 4.3392677811932375e-05, "epoch": 1.545721510726813, "percentage": 23.69, "elapsed_time": "2:09:44", "remaining_time": "6:57:58", "throughput": 2345.25, "total_tokens": 18256368} {"current_steps": 9480, "total_steps": 40000, "loss": 0.129, "lr": 4.338602701848956e-05, "epoch": 1.5465372379476303, "percentage": 23.7, "elapsed_time": "2:09:46", "remaining_time": "6:57:47", "throughput": 2345.79, "total_tokens": 18265440} {"current_steps": 9485, "total_steps": 40000, "loss": 0.1468, "lr": 4.337937338969033e-05, "epoch": 1.5473529651684477, "percentage": 23.71, "elapsed_time": "2:09:48", "remaining_time": "6:57:37", "throughput": 2346.31, "total_tokens": 18274320} {"current_steps": 9490, "total_steps": 40000, "loss": 0.258, "lr": 4.337271692656075e-05, "epoch": 1.548168692389265, "percentage": 23.72, "elapsed_time": "2:09:50", "remaining_time": "6:57:26", "throughput": 2347.12, "total_tokens": 18285520} {"current_steps": 9495, "total_steps": 40000, "loss": 0.225, "lr": 4.336605763012733e-05, "epoch": 1.5489844196100824, "percentage": 23.74, "elapsed_time": "2:09:52", "remaining_time": "6:57:15", "throughput": 2347.75, "total_tokens": 18295280} {"current_steps": 9500, "total_steps": 40000, "loss": 0.2211, "lr": 4.3359395501417026e-05, "epoch": 1.5498001468308997, "percentage": 23.75, "elapsed_time": "2:09:54", "remaining_time": "6:57:05", "throughput": 2348.53, "total_tokens": 18306192} {"current_steps": 9505, "total_steps": 40000, "loss": 0.1906, "lr": 4.335273054145722e-05, "epoch": 1.550615874051717, "percentage": 23.76, "elapsed_time": "2:09:56", "remaining_time": "6:56:54", "throughput": 2349.29, "total_tokens": 18316944} {"current_steps": 9510, "total_steps": 40000, "loss": 0.0569, "lr": 4.334606275127572e-05, "epoch": 1.5514316012725344, "percentage": 23.77, "elapsed_time": "2:09:58", "remaining_time": "6:56:43", "throughput": 2349.95, "total_tokens": 18326992} {"current_steps": 9515, "total_steps": 40000, "loss": 0.1361, "lr": 4.33393921319008e-05, "epoch": 1.5522473284933518, "percentage": 23.79, "elapsed_time": "2:10:00", "remaining_time": "6:56:33", "throughput": 2350.51, "total_tokens": 18336256} {"current_steps": 9520, "total_steps": 40000, "loss": 0.1583, "lr": 4.3332718684361146e-05, "epoch": 1.5530630557141691, "percentage": 23.8, "elapsed_time": "2:10:03", "remaining_time": "6:56:22", "throughput": 2351.15, "total_tokens": 18346112} {"current_steps": 9525, "total_steps": 40000, "loss": 0.0598, "lr": 4.332604240968588e-05, "epoch": 1.5538787829349865, "percentage": 23.81, "elapsed_time": "2:10:05", "remaining_time": "6:56:12", "throughput": 2351.67, "total_tokens": 18355024} {"current_steps": 9530, "total_steps": 40000, "loss": 0.1038, "lr": 4.331936330890459e-05, "epoch": 1.5546945101558038, "percentage": 23.82, "elapsed_time": "2:10:07", "remaining_time": "6:56:01", "throughput": 2352.24, "total_tokens": 18364272} {"current_steps": 9535, "total_steps": 40000, "loss": 0.1005, "lr": 4.331268138304725e-05, "epoch": 1.5555102373766214, "percentage": 23.84, "elapsed_time": "2:10:09", "remaining_time": "6:55:51", "throughput": 2352.93, "total_tokens": 18374544} {"current_steps": 9540, "total_steps": 40000, "loss": 0.0695, "lr": 4.330599663314431e-05, "epoch": 1.5563259645974385, "percentage": 23.85, "elapsed_time": "2:10:11", "remaining_time": "6:55:40", "throughput": 2353.61, "total_tokens": 18384768} {"current_steps": 9545, "total_steps": 40000, "loss": 0.1019, "lr": 4.329930906022665e-05, "epoch": 1.5571416918182561, "percentage": 23.86, "elapsed_time": "2:10:13", "remaining_time": "6:55:29", "throughput": 2354.2, "total_tokens": 18394176} {"current_steps": 9550, "total_steps": 40000, "loss": 0.0392, "lr": 4.3292618665325564e-05, "epoch": 1.5579574190390733, "percentage": 23.88, "elapsed_time": "2:10:15", "remaining_time": "6:55:19", "throughput": 2354.57, "total_tokens": 18402000} {"current_steps": 9555, "total_steps": 40000, "loss": 0.1078, "lr": 4.3285925449472796e-05, "epoch": 1.5587731462598908, "percentage": 23.89, "elapsed_time": "2:10:17", "remaining_time": "6:55:08", "throughput": 2355.24, "total_tokens": 18412096} {"current_steps": 9560, "total_steps": 40000, "loss": 0.1525, "lr": 4.327922941370054e-05, "epoch": 1.559588873480708, "percentage": 23.9, "elapsed_time": "2:10:19", "remaining_time": "6:54:58", "throughput": 2355.81, "total_tokens": 18421472} {"current_steps": 9565, "total_steps": 40000, "loss": 0.1022, "lr": 4.3272530559041384e-05, "epoch": 1.5604046007015255, "percentage": 23.91, "elapsed_time": "2:10:21", "remaining_time": "6:54:47", "throughput": 2356.49, "total_tokens": 18431680} {"current_steps": 9570, "total_steps": 40000, "loss": 0.0391, "lr": 4.32658288865284e-05, "epoch": 1.5612203279223427, "percentage": 23.93, "elapsed_time": "2:10:23", "remaining_time": "6:54:37", "throughput": 2357.14, "total_tokens": 18441616} {"current_steps": 9575, "total_steps": 40000, "loss": 0.2426, "lr": 4.325912439719505e-05, "epoch": 1.5620360551431602, "percentage": 23.94, "elapsed_time": "2:10:25", "remaining_time": "6:54:26", "throughput": 2357.82, "total_tokens": 18451840} {"current_steps": 9580, "total_steps": 40000, "loss": 0.2161, "lr": 4.3252417092075266e-05, "epoch": 1.5628517823639774, "percentage": 23.95, "elapsed_time": "2:10:27", "remaining_time": "6:54:16", "throughput": 2358.41, "total_tokens": 18461296} {"current_steps": 9585, "total_steps": 40000, "loss": 0.1656, "lr": 4.3245706972203385e-05, "epoch": 1.563667509584795, "percentage": 23.96, "elapsed_time": "2:10:29", "remaining_time": "6:54:05", "throughput": 2359.14, "total_tokens": 18471856} {"current_steps": 9590, "total_steps": 40000, "loss": 0.0963, "lr": 4.323899403861421e-05, "epoch": 1.564483236805612, "percentage": 23.97, "elapsed_time": "2:10:31", "remaining_time": "6:53:55", "throughput": 2359.48, "total_tokens": 18479456} {"current_steps": 9595, "total_steps": 40000, "loss": 0.1367, "lr": 4.3232278292342935e-05, "epoch": 1.5652989640264297, "percentage": 23.99, "elapsed_time": "2:10:34", "remaining_time": "6:53:44", "throughput": 2360.03, "total_tokens": 18488592} {"current_steps": 9600, "total_steps": 40000, "loss": 0.044, "lr": 4.322555973442524e-05, "epoch": 1.5661146912472468, "percentage": 24.0, "elapsed_time": "2:10:36", "remaining_time": "6:53:34", "throughput": 2360.58, "total_tokens": 18497776} {"current_steps": 9600, "total_steps": 40000, "eval_loss": 0.14135172963142395, "epoch": 1.5661146912472468, "percentage": 24.0, "elapsed_time": "2:11:56", "remaining_time": "6:57:49", "throughput": 2336.51, "total_tokens": 18497776} {"current_steps": 9605, "total_steps": 40000, "loss": 0.0505, "lr": 4.3218838365897184e-05, "epoch": 1.5669304184680644, "percentage": 24.01, "elapsed_time": "2:12:00", "remaining_time": "6:57:44", "throughput": 2336.53, "total_tokens": 18506272} {"current_steps": 9610, "total_steps": 40000, "loss": 0.165, "lr": 4.3212114187795306e-05, "epoch": 1.5677461456888815, "percentage": 24.02, "elapsed_time": "2:12:02", "remaining_time": "6:57:33", "throughput": 2337.18, "total_tokens": 18516304} {"current_steps": 9615, "total_steps": 40000, "loss": 0.1697, "lr": 4.320538720115656e-05, "epoch": 1.568561872909699, "percentage": 24.04, "elapsed_time": "2:12:04", "remaining_time": "6:57:22", "throughput": 2337.69, "total_tokens": 18525168} {"current_steps": 9620, "total_steps": 40000, "loss": 0.0149, "lr": 4.319865740701831e-05, "epoch": 1.5693776001305162, "percentage": 24.05, "elapsed_time": "2:12:06", "remaining_time": "6:57:12", "throughput": 2338.25, "total_tokens": 18534400} {"current_steps": 9625, "total_steps": 40000, "loss": 0.173, "lr": 4.3191924806418396e-05, "epoch": 1.5701933273513338, "percentage": 24.06, "elapsed_time": "2:12:08", "remaining_time": "6:57:01", "throughput": 2339.0, "total_tokens": 18545248} {"current_steps": 9630, "total_steps": 40000, "loss": 0.1162, "lr": 4.318518940039507e-05, "epoch": 1.5710090545721511, "percentage": 24.07, "elapsed_time": "2:12:10", "remaining_time": "6:56:51", "throughput": 2339.51, "total_tokens": 18554128} {"current_steps": 9635, "total_steps": 40000, "loss": 0.1923, "lr": 4.3178451189987e-05, "epoch": 1.5718247817929685, "percentage": 24.09, "elapsed_time": "2:12:12", "remaining_time": "6:56:40", "throughput": 2340.18, "total_tokens": 18564288} {"current_steps": 9640, "total_steps": 40000, "loss": 0.0712, "lr": 4.3171710176233315e-05, "epoch": 1.5726405090137858, "percentage": 24.1, "elapsed_time": "2:12:14", "remaining_time": "6:56:30", "throughput": 2340.77, "total_tokens": 18573840} {"current_steps": 9645, "total_steps": 40000, "loss": 0.1223, "lr": 4.316496636017355e-05, "epoch": 1.5734562362346032, "percentage": 24.11, "elapsed_time": "2:12:16", "remaining_time": "6:56:19", "throughput": 2341.38, "total_tokens": 18583488} {"current_steps": 9650, "total_steps": 40000, "loss": 0.1297, "lr": 4.315821974284771e-05, "epoch": 1.5742719634554205, "percentage": 24.12, "elapsed_time": "2:12:19", "remaining_time": "6:56:08", "throughput": 2342.08, "total_tokens": 18593904} {"current_steps": 9655, "total_steps": 40000, "loss": 0.13, "lr": 4.315147032529619e-05, "epoch": 1.5750876906762379, "percentage": 24.14, "elapsed_time": "2:12:21", "remaining_time": "6:55:58", "throughput": 2342.53, "total_tokens": 18602320} {"current_steps": 9660, "total_steps": 40000, "loss": 0.0916, "lr": 4.3144718108559845e-05, "epoch": 1.5759034178970552, "percentage": 24.15, "elapsed_time": "2:12:23", "remaining_time": "6:55:47", "throughput": 2342.95, "total_tokens": 18610432} {"current_steps": 9665, "total_steps": 40000, "loss": 0.272, "lr": 4.3137963093679945e-05, "epoch": 1.5767191451178726, "percentage": 24.16, "elapsed_time": "2:12:25", "remaining_time": "6:55:37", "throughput": 2343.65, "total_tokens": 18620896} {"current_steps": 9670, "total_steps": 40000, "loss": 0.1654, "lr": 4.31312052816982e-05, "epoch": 1.57753487233869, "percentage": 24.18, "elapsed_time": "2:12:27", "remaining_time": "6:55:26", "throughput": 2344.11, "total_tokens": 18629376} {"current_steps": 9675, "total_steps": 40000, "loss": 0.097, "lr": 4.312444467365675e-05, "epoch": 1.5783505995595073, "percentage": 24.19, "elapsed_time": "2:12:29", "remaining_time": "6:55:16", "throughput": 2344.67, "total_tokens": 18638624} {"current_steps": 9680, "total_steps": 40000, "loss": 0.0413, "lr": 4.311768127059816e-05, "epoch": 1.5791663267803246, "percentage": 24.2, "elapsed_time": "2:12:31", "remaining_time": "6:55:05", "throughput": 2345.26, "total_tokens": 18648224} {"current_steps": 9685, "total_steps": 40000, "loss": 0.0333, "lr": 4.3110915073565444e-05, "epoch": 1.579982054001142, "percentage": 24.21, "elapsed_time": "2:12:33", "remaining_time": "6:54:55", "throughput": 2346.0, "total_tokens": 18658944} {"current_steps": 9690, "total_steps": 40000, "loss": 0.0917, "lr": 4.310414608360203e-05, "epoch": 1.5807977812219594, "percentage": 24.22, "elapsed_time": "2:12:35", "remaining_time": "6:54:44", "throughput": 2346.6, "total_tokens": 18668528} {"current_steps": 9695, "total_steps": 40000, "loss": 0.1164, "lr": 4.309737430175177e-05, "epoch": 1.5816135084427767, "percentage": 24.24, "elapsed_time": "2:12:37", "remaining_time": "6:54:34", "throughput": 2347.24, "total_tokens": 18678512} {"current_steps": 9700, "total_steps": 40000, "loss": 0.0754, "lr": 4.309059972905897e-05, "epoch": 1.582429235663594, "percentage": 24.25, "elapsed_time": "2:12:39", "remaining_time": "6:54:23", "throughput": 2347.95, "total_tokens": 18688960} {"current_steps": 9705, "total_steps": 40000, "loss": 0.076, "lr": 4.308382236656836e-05, "epoch": 1.5832449628844114, "percentage": 24.26, "elapsed_time": "2:12:41", "remaining_time": "6:54:13", "throughput": 2348.6, "total_tokens": 18698992} {"current_steps": 9710, "total_steps": 40000, "loss": 0.2093, "lr": 4.307704221532507e-05, "epoch": 1.5840606901052288, "percentage": 24.27, "elapsed_time": "2:12:43", "remaining_time": "6:54:02", "throughput": 2349.19, "total_tokens": 18708528} {"current_steps": 9715, "total_steps": 40000, "loss": 0.1353, "lr": 4.307025927637471e-05, "epoch": 1.5848764173260461, "percentage": 24.29, "elapsed_time": "2:12:45", "remaining_time": "6:53:52", "throughput": 2349.72, "total_tokens": 18717680} {"current_steps": 9720, "total_steps": 40000, "loss": 0.0388, "lr": 4.306347355076328e-05, "epoch": 1.5856921445468637, "percentage": 24.3, "elapsed_time": "2:12:47", "remaining_time": "6:53:42", "throughput": 2350.38, "total_tokens": 18727760} {"current_steps": 9725, "total_steps": 40000, "loss": 0.0977, "lr": 4.305668503953724e-05, "epoch": 1.5865078717676808, "percentage": 24.31, "elapsed_time": "2:12:50", "remaining_time": "6:53:31", "throughput": 2350.86, "total_tokens": 18736416} {"current_steps": 9730, "total_steps": 40000, "loss": 0.1085, "lr": 4.3049893743743436e-05, "epoch": 1.5873235989884984, "percentage": 24.32, "elapsed_time": "2:12:52", "remaining_time": "6:53:21", "throughput": 2351.35, "total_tokens": 18745184} {"current_steps": 9735, "total_steps": 40000, "loss": 0.0484, "lr": 4.304309966442919e-05, "epoch": 1.5881393262093155, "percentage": 24.34, "elapsed_time": "2:12:54", "remaining_time": "6:53:10", "throughput": 2351.87, "total_tokens": 18754224} {"current_steps": 9740, "total_steps": 40000, "loss": 0.1015, "lr": 4.303630280264224e-05, "epoch": 1.588955053430133, "percentage": 24.35, "elapsed_time": "2:12:56", "remaining_time": "6:53:00", "throughput": 2352.66, "total_tokens": 18765392} {"current_steps": 9745, "total_steps": 40000, "loss": 0.1609, "lr": 4.302950315943074e-05, "epoch": 1.5897707806509502, "percentage": 24.36, "elapsed_time": "2:12:58", "remaining_time": "6:52:49", "throughput": 2353.36, "total_tokens": 18775792} {"current_steps": 9750, "total_steps": 40000, "loss": 0.1693, "lr": 4.3022700735843275e-05, "epoch": 1.5905865078717678, "percentage": 24.38, "elapsed_time": "2:13:00", "remaining_time": "6:52:39", "throughput": 2353.84, "total_tokens": 18784544} {"current_steps": 9755, "total_steps": 40000, "loss": 0.1358, "lr": 4.301589553292887e-05, "epoch": 1.591402235092585, "percentage": 24.39, "elapsed_time": "2:13:02", "remaining_time": "6:52:29", "throughput": 2354.42, "total_tokens": 18794000} {"current_steps": 9760, "total_steps": 40000, "loss": 0.1035, "lr": 4.300908755173697e-05, "epoch": 1.5922179623134025, "percentage": 24.4, "elapsed_time": "2:13:04", "remaining_time": "6:52:18", "throughput": 2355.12, "total_tokens": 18804528} {"current_steps": 9765, "total_steps": 40000, "loss": 0.1824, "lr": 4.300227679331745e-05, "epoch": 1.5930336895342196, "percentage": 24.41, "elapsed_time": "2:13:06", "remaining_time": "6:52:08", "throughput": 2355.66, "total_tokens": 18813664} {"current_steps": 9770, "total_steps": 40000, "loss": 0.1357, "lr": 4.299546325872063e-05, "epoch": 1.5938494167550372, "percentage": 24.43, "elapsed_time": "2:13:08", "remaining_time": "6:51:58", "throughput": 2356.23, "total_tokens": 18823104} {"current_steps": 9775, "total_steps": 40000, "loss": 0.083, "lr": 4.2988646948997225e-05, "epoch": 1.5946651439758543, "percentage": 24.44, "elapsed_time": "2:13:10", "remaining_time": "6:51:47", "throughput": 2356.81, "total_tokens": 18832592} {"current_steps": 9780, "total_steps": 40000, "loss": 0.1366, "lr": 4.29818278651984e-05, "epoch": 1.595480871196672, "percentage": 24.45, "elapsed_time": "2:13:12", "remaining_time": "6:51:37", "throughput": 2357.46, "total_tokens": 18842640} {"current_steps": 9785, "total_steps": 40000, "loss": 0.151, "lr": 4.297500600837574e-05, "epoch": 1.596296598417489, "percentage": 24.46, "elapsed_time": "2:13:14", "remaining_time": "6:51:27", "throughput": 2358.02, "total_tokens": 18852016} {"current_steps": 9790, "total_steps": 40000, "loss": 0.061, "lr": 4.2968181379581276e-05, "epoch": 1.5971123256383066, "percentage": 24.47, "elapsed_time": "2:13:16", "remaining_time": "6:51:16", "throughput": 2358.77, "total_tokens": 18862880} {"current_steps": 9795, "total_steps": 40000, "loss": 0.0275, "lr": 4.296135397986743e-05, "epoch": 1.5979280528591238, "percentage": 24.49, "elapsed_time": "2:13:18", "remaining_time": "6:51:06", "throughput": 2359.21, "total_tokens": 18871312} {"current_steps": 9800, "total_steps": 40000, "loss": 0.1508, "lr": 4.295452381028709e-05, "epoch": 1.5987437800799413, "percentage": 24.5, "elapsed_time": "2:13:21", "remaining_time": "6:50:56", "throughput": 2359.82, "total_tokens": 18881008} {"current_steps": 9800, "total_steps": 40000, "eval_loss": 0.14867442846298218, "epoch": 1.5987437800799413, "percentage": 24.5, "elapsed_time": "2:14:41", "remaining_time": "6:55:04", "throughput": 2336.33, "total_tokens": 18881008} {"current_steps": 9805, "total_steps": 40000, "loss": 0.1359, "lr": 4.294769087189354e-05, "epoch": 1.5995595073007585, "percentage": 24.51, "elapsed_time": "2:14:45", "remaining_time": "6:54:58", "throughput": 2336.62, "total_tokens": 18891936} {"current_steps": 9810, "total_steps": 40000, "loss": 0.0957, "lr": 4.294085516574052e-05, "epoch": 1.600375234521576, "percentage": 24.52, "elapsed_time": "2:14:47", "remaining_time": "6:54:48", "throughput": 2337.2, "total_tokens": 18901440} {"current_steps": 9815, "total_steps": 40000, "loss": 0.0888, "lr": 4.2934016692882176e-05, "epoch": 1.6011909617423934, "percentage": 24.54, "elapsed_time": "2:14:49", "remaining_time": "6:54:37", "throughput": 2337.87, "total_tokens": 18911696} {"current_steps": 9820, "total_steps": 40000, "loss": 0.1687, "lr": 4.292717545437308e-05, "epoch": 1.6020066889632107, "percentage": 24.55, "elapsed_time": "2:14:51", "remaining_time": "6:54:27", "throughput": 2338.35, "total_tokens": 18920416} {"current_steps": 9825, "total_steps": 40000, "loss": 0.3199, "lr": 4.292033145126825e-05, "epoch": 1.602822416184028, "percentage": 24.56, "elapsed_time": "2:14:53", "remaining_time": "6:54:16", "throughput": 2338.97, "total_tokens": 18930256} {"current_steps": 9830, "total_steps": 40000, "loss": 0.2082, "lr": 4.29134846846231e-05, "epoch": 1.6036381434048455, "percentage": 24.57, "elapsed_time": "2:14:55", "remaining_time": "6:54:06", "throughput": 2339.63, "total_tokens": 18940400} {"current_steps": 9835, "total_steps": 40000, "loss": 0.0378, "lr": 4.29066351554935e-05, "epoch": 1.6044538706256628, "percentage": 24.59, "elapsed_time": "2:14:57", "remaining_time": "6:53:56", "throughput": 2340.22, "total_tokens": 18950032} {"current_steps": 9840, "total_steps": 40000, "loss": 0.0691, "lr": 4.289978286493574e-05, "epoch": 1.6052695978464802, "percentage": 24.6, "elapsed_time": "2:14:59", "remaining_time": "6:53:45", "throughput": 2340.74, "total_tokens": 18959056} {"current_steps": 9845, "total_steps": 40000, "loss": 0.0617, "lr": 4.28929278140065e-05, "epoch": 1.6060853250672975, "percentage": 24.61, "elapsed_time": "2:15:01", "remaining_time": "6:53:35", "throughput": 2341.4, "total_tokens": 18969296} {"current_steps": 9850, "total_steps": 40000, "loss": 0.208, "lr": 4.288607000376295e-05, "epoch": 1.6069010522881149, "percentage": 24.62, "elapsed_time": "2:15:03", "remaining_time": "6:53:24", "throughput": 2342.11, "total_tokens": 18979920} {"current_steps": 9855, "total_steps": 40000, "loss": 0.1415, "lr": 4.2879209435262624e-05, "epoch": 1.6077167795089322, "percentage": 24.64, "elapsed_time": "2:15:05", "remaining_time": "6:53:14", "throughput": 2342.69, "total_tokens": 18989424} {"current_steps": 9860, "total_steps": 40000, "loss": 0.067, "lr": 4.287234610956353e-05, "epoch": 1.6085325067297496, "percentage": 24.65, "elapsed_time": "2:15:07", "remaining_time": "6:53:04", "throughput": 2343.27, "total_tokens": 18998976} {"current_steps": 9865, "total_steps": 40000, "loss": 0.0235, "lr": 4.2865480027724056e-05, "epoch": 1.609348233950567, "percentage": 24.66, "elapsed_time": "2:15:09", "remaining_time": "6:52:53", "throughput": 2343.74, "total_tokens": 19007632} {"current_steps": 9870, "total_steps": 40000, "loss": 0.2162, "lr": 4.285861119080306e-05, "epoch": 1.6101639611713843, "percentage": 24.68, "elapsed_time": "2:15:12", "remaining_time": "6:52:43", "throughput": 2344.37, "total_tokens": 19017568} {"current_steps": 9875, "total_steps": 40000, "loss": 0.1254, "lr": 4.2851739599859784e-05, "epoch": 1.6109796883922016, "percentage": 24.69, "elapsed_time": "2:15:14", "remaining_time": "6:52:33", "throughput": 2345.0, "total_tokens": 19027520} {"current_steps": 9880, "total_steps": 40000, "loss": 0.0284, "lr": 4.2844865255953934e-05, "epoch": 1.611795415613019, "percentage": 24.7, "elapsed_time": "2:15:16", "remaining_time": "6:52:22", "throughput": 2345.43, "total_tokens": 19035904} {"current_steps": 9885, "total_steps": 40000, "loss": 0.6, "lr": 4.2837988160145605e-05, "epoch": 1.6126111428338363, "percentage": 24.71, "elapsed_time": "2:15:18", "remaining_time": "6:52:12", "throughput": 2346.0, "total_tokens": 19045360} {"current_steps": 9890, "total_steps": 40000, "loss": 0.1317, "lr": 4.2831108313495336e-05, "epoch": 1.6134268700546537, "percentage": 24.73, "elapsed_time": "2:15:20", "remaining_time": "6:52:02", "throughput": 2346.79, "total_tokens": 19056608} {"current_steps": 9895, "total_steps": 40000, "loss": 0.2066, "lr": 4.282422571706408e-05, "epoch": 1.614242597275471, "percentage": 24.74, "elapsed_time": "2:15:22", "remaining_time": "6:51:51", "throughput": 2347.4, "total_tokens": 19066464} {"current_steps": 9900, "total_steps": 40000, "loss": 0.1314, "lr": 4.281734037191323e-05, "epoch": 1.6150583244962884, "percentage": 24.75, "elapsed_time": "2:15:24", "remaining_time": "6:51:42", "throughput": 2347.91, "total_tokens": 19076016} {"current_steps": 9905, "total_steps": 40000, "loss": 0.0358, "lr": 4.281045227910459e-05, "epoch": 1.615874051717106, "percentage": 24.76, "elapsed_time": "2:15:26", "remaining_time": "6:51:31", "throughput": 2348.42, "total_tokens": 19084992} {"current_steps": 9910, "total_steps": 40000, "loss": 0.0528, "lr": 4.280356143970038e-05, "epoch": 1.616689778937923, "percentage": 24.77, "elapsed_time": "2:15:28", "remaining_time": "6:51:21", "throughput": 2348.9, "total_tokens": 19093712} {"current_steps": 9915, "total_steps": 40000, "loss": 0.1651, "lr": 4.279666785476327e-05, "epoch": 1.6175055061587407, "percentage": 24.79, "elapsed_time": "2:15:30", "remaining_time": "6:51:11", "throughput": 2349.55, "total_tokens": 19103888} {"current_steps": 9920, "total_steps": 40000, "loss": 0.1018, "lr": 4.2789771525356325e-05, "epoch": 1.6183212333795578, "percentage": 24.8, "elapsed_time": "2:15:32", "remaining_time": "6:51:01", "throughput": 2350.03, "total_tokens": 19112640} {"current_steps": 9925, "total_steps": 40000, "loss": 0.1508, "lr": 4.2782872452543056e-05, "epoch": 1.6191369606003754, "percentage": 24.81, "elapsed_time": "2:15:34", "remaining_time": "6:50:50", "throughput": 2350.62, "total_tokens": 19122304} {"current_steps": 9930, "total_steps": 40000, "loss": 0.1898, "lr": 4.2775970637387376e-05, "epoch": 1.6199526878211925, "percentage": 24.82, "elapsed_time": "2:15:37", "remaining_time": "6:50:40", "throughput": 2351.22, "total_tokens": 19132032} {"current_steps": 9935, "total_steps": 40000, "loss": 0.128, "lr": 4.276906608095363e-05, "epoch": 1.62076841504201, "percentage": 24.84, "elapsed_time": "2:15:39", "remaining_time": "6:50:30", "throughput": 2351.9, "total_tokens": 19142432} {"current_steps": 9940, "total_steps": 40000, "loss": 0.1022, "lr": 4.276215878430661e-05, "epoch": 1.6215841422628272, "percentage": 24.85, "elapsed_time": "2:15:41", "remaining_time": "6:50:20", "throughput": 2352.46, "total_tokens": 19151840} {"current_steps": 9945, "total_steps": 40000, "loss": 0.0459, "lr": 4.275524874851149e-05, "epoch": 1.6223998694836448, "percentage": 24.86, "elapsed_time": "2:15:43", "remaining_time": "6:50:09", "throughput": 2353.07, "total_tokens": 19161680} {"current_steps": 9950, "total_steps": 40000, "loss": 0.0853, "lr": 4.274833597463388e-05, "epoch": 1.623215596704462, "percentage": 24.88, "elapsed_time": "2:15:45", "remaining_time": "6:49:59", "throughput": 2353.6, "total_tokens": 19170832} {"current_steps": 9955, "total_steps": 40000, "loss": 0.1639, "lr": 4.2741420463739824e-05, "epoch": 1.6240313239252795, "percentage": 24.89, "elapsed_time": "2:15:47", "remaining_time": "6:49:49", "throughput": 2354.15, "total_tokens": 19180176} {"current_steps": 9960, "total_steps": 40000, "loss": 0.0611, "lr": 4.273450221689578e-05, "epoch": 1.6248470511460966, "percentage": 24.9, "elapsed_time": "2:15:49", "remaining_time": "6:49:39", "throughput": 2354.61, "total_tokens": 19188800} {"current_steps": 9965, "total_steps": 40000, "loss": 0.2871, "lr": 4.272758123516863e-05, "epoch": 1.6256627783669142, "percentage": 24.91, "elapsed_time": "2:15:51", "remaining_time": "6:49:29", "throughput": 2355.09, "total_tokens": 19197552} {"current_steps": 9970, "total_steps": 40000, "loss": 0.1938, "lr": 4.272065751962567e-05, "epoch": 1.6264785055877313, "percentage": 24.93, "elapsed_time": "2:15:53", "remaining_time": "6:49:18", "throughput": 2355.72, "total_tokens": 19207536} {"current_steps": 9975, "total_steps": 40000, "loss": 0.0329, "lr": 4.271373107133464e-05, "epoch": 1.627294232808549, "percentage": 24.94, "elapsed_time": "2:15:55", "remaining_time": "6:49:08", "throughput": 2356.37, "total_tokens": 19217680} {"current_steps": 9980, "total_steps": 40000, "loss": 0.1904, "lr": 4.270680189136366e-05, "epoch": 1.628109960029366, "percentage": 24.95, "elapsed_time": "2:15:57", "remaining_time": "6:48:58", "throughput": 2356.95, "total_tokens": 19227328} {"current_steps": 9985, "total_steps": 40000, "loss": 0.073, "lr": 4.269986998078132e-05, "epoch": 1.6289256872501836, "percentage": 24.96, "elapsed_time": "2:15:59", "remaining_time": "6:48:48", "throughput": 2357.55, "total_tokens": 19237088} {"current_steps": 9990, "total_steps": 40000, "loss": 0.1845, "lr": 4.2692935340656595e-05, "epoch": 1.6297414144710007, "percentage": 24.98, "elapsed_time": "2:16:01", "remaining_time": "6:48:38", "throughput": 2358.1, "total_tokens": 19246416} {"current_steps": 9995, "total_steps": 40000, "loss": 0.1517, "lr": 4.26859979720589e-05, "epoch": 1.6305571416918183, "percentage": 24.99, "elapsed_time": "2:16:03", "remaining_time": "6:48:28", "throughput": 2358.84, "total_tokens": 19257344} {"current_steps": 10000, "total_steps": 40000, "loss": 0.1213, "lr": 4.267905787605806e-05, "epoch": 1.6313728689126357, "percentage": 25.0, "elapsed_time": "2:16:05", "remaining_time": "6:48:17", "throughput": 2359.42, "total_tokens": 19266960} {"current_steps": 10000, "total_steps": 40000, "eval_loss": 0.13758963346481323, "epoch": 1.6313728689126357, "percentage": 25.0, "elapsed_time": "2:17:26", "remaining_time": "6:52:19", "throughput": 2336.35, "total_tokens": 19266960} {"current_steps": 10005, "total_steps": 40000, "loss": 0.0177, "lr": 4.267211505372433e-05, "epoch": 1.632188596133453, "percentage": 25.01, "elapsed_time": "2:17:30", "remaining_time": "6:52:14", "throughput": 2336.46, "total_tokens": 19276304} {"current_steps": 10010, "total_steps": 40000, "loss": 0.1933, "lr": 4.266516950612837e-05, "epoch": 1.6330043233542704, "percentage": 25.02, "elapsed_time": "2:17:32", "remaining_time": "6:52:03", "throughput": 2337.13, "total_tokens": 19286672} {"current_steps": 10015, "total_steps": 40000, "loss": 0.1222, "lr": 4.265822123434128e-05, "epoch": 1.6338200505750877, "percentage": 25.04, "elapsed_time": "2:17:34", "remaining_time": "6:51:53", "throughput": 2337.79, "total_tokens": 19296944} {"current_steps": 10020, "total_steps": 40000, "loss": 0.1615, "lr": 4.265127023943457e-05, "epoch": 1.634635777795905, "percentage": 25.05, "elapsed_time": "2:17:36", "remaining_time": "6:51:43", "throughput": 2338.44, "total_tokens": 19307120} {"current_steps": 10025, "total_steps": 40000, "loss": 0.0926, "lr": 4.2644316522480176e-05, "epoch": 1.6354515050167224, "percentage": 25.06, "elapsed_time": "2:17:38", "remaining_time": "6:51:33", "throughput": 2338.97, "total_tokens": 19316384} {"current_steps": 10030, "total_steps": 40000, "loss": 0.0379, "lr": 4.263736008455044e-05, "epoch": 1.6362672322375398, "percentage": 25.07, "elapsed_time": "2:17:40", "remaining_time": "6:51:22", "throughput": 2339.53, "total_tokens": 19325792} {"current_steps": 10035, "total_steps": 40000, "loss": 0.0999, "lr": 4.2630400926718125e-05, "epoch": 1.6370829594583571, "percentage": 25.09, "elapsed_time": "2:17:42", "remaining_time": "6:51:12", "throughput": 2340.23, "total_tokens": 19336448} {"current_steps": 10040, "total_steps": 40000, "loss": 0.0885, "lr": 4.262343905005644e-05, "epoch": 1.6378986866791745, "percentage": 25.1, "elapsed_time": "2:17:44", "remaining_time": "6:51:02", "throughput": 2340.71, "total_tokens": 19345280} {"current_steps": 10045, "total_steps": 40000, "loss": 0.0918, "lr": 4.261647445563897e-05, "epoch": 1.6387144138999918, "percentage": 25.11, "elapsed_time": "2:17:46", "remaining_time": "6:50:52", "throughput": 2341.33, "total_tokens": 19355248} {"current_steps": 10050, "total_steps": 40000, "loss": 0.0791, "lr": 4.260950714453976e-05, "epoch": 1.6395301411208092, "percentage": 25.12, "elapsed_time": "2:17:48", "remaining_time": "6:50:41", "throughput": 2341.87, "total_tokens": 19364480} {"current_steps": 10055, "total_steps": 40000, "loss": 0.1351, "lr": 4.2602537117833266e-05, "epoch": 1.6403458683416265, "percentage": 25.14, "elapsed_time": "2:17:50", "remaining_time": "6:50:31", "throughput": 2342.52, "total_tokens": 19374688} {"current_steps": 10060, "total_steps": 40000, "loss": 0.1469, "lr": 4.259556437659433e-05, "epoch": 1.641161595562444, "percentage": 25.15, "elapsed_time": "2:17:52", "remaining_time": "6:50:21", "throughput": 2343.1, "total_tokens": 19384320} {"current_steps": 10065, "total_steps": 40000, "loss": 0.0661, "lr": 4.258858892189825e-05, "epoch": 1.6419773227832613, "percentage": 25.16, "elapsed_time": "2:17:55", "remaining_time": "6:50:11", "throughput": 2343.62, "total_tokens": 19393488} {"current_steps": 10070, "total_steps": 40000, "loss": 0.15, "lr": 4.2581610754820725e-05, "epoch": 1.6427930500040786, "percentage": 25.17, "elapsed_time": "2:17:57", "remaining_time": "6:50:01", "throughput": 2344.08, "total_tokens": 19402128} {"current_steps": 10075, "total_steps": 40000, "loss": 0.2613, "lr": 4.2574629876437876e-05, "epoch": 1.643608777224896, "percentage": 25.19, "elapsed_time": "2:17:59", "remaining_time": "6:49:50", "throughput": 2344.55, "total_tokens": 19410896} {"current_steps": 10080, "total_steps": 40000, "loss": 0.0605, "lr": 4.256764628782625e-05, "epoch": 1.6444245044457133, "percentage": 25.2, "elapsed_time": "2:18:01", "remaining_time": "6:49:40", "throughput": 2345.23, "total_tokens": 19421360} {"current_steps": 10085, "total_steps": 40000, "loss": 0.0506, "lr": 4.256065999006279e-05, "epoch": 1.6452402316665307, "percentage": 25.21, "elapsed_time": "2:18:03", "remaining_time": "6:49:30", "throughput": 2345.83, "total_tokens": 19431200} {"current_steps": 10090, "total_steps": 40000, "loss": 0.1717, "lr": 4.2553670984224885e-05, "epoch": 1.6460559588873482, "percentage": 25.22, "elapsed_time": "2:18:05", "remaining_time": "6:49:20", "throughput": 2346.46, "total_tokens": 19441264} {"current_steps": 10095, "total_steps": 40000, "loss": 0.0786, "lr": 4.254667927139032e-05, "epoch": 1.6468716861081654, "percentage": 25.24, "elapsed_time": "2:18:07", "remaining_time": "6:49:10", "throughput": 2347.06, "total_tokens": 19451040} {"current_steps": 10100, "total_steps": 40000, "loss": 0.0365, "lr": 4.2539684852637295e-05, "epoch": 1.647687413328983, "percentage": 25.25, "elapsed_time": "2:18:09", "remaining_time": "6:49:00", "throughput": 2347.7, "total_tokens": 19461232} {"current_steps": 10105, "total_steps": 40000, "loss": 0.1762, "lr": 4.253268772904446e-05, "epoch": 1.6485031405498, "percentage": 25.26, "elapsed_time": "2:18:11", "remaining_time": "6:48:50", "throughput": 2348.42, "total_tokens": 19472000} {"current_steps": 10110, "total_steps": 40000, "loss": 0.2638, "lr": 4.252568790169085e-05, "epoch": 1.6493188677706176, "percentage": 25.27, "elapsed_time": "2:18:13", "remaining_time": "6:48:39", "throughput": 2349.01, "total_tokens": 19481776} {"current_steps": 10115, "total_steps": 40000, "loss": 0.1475, "lr": 4.251868537165592e-05, "epoch": 1.6501345949914348, "percentage": 25.29, "elapsed_time": "2:18:15", "remaining_time": "6:48:29", "throughput": 2349.72, "total_tokens": 19492560} {"current_steps": 10120, "total_steps": 40000, "loss": 0.2499, "lr": 4.251168014001955e-05, "epoch": 1.6509503222122524, "percentage": 25.3, "elapsed_time": "2:18:17", "remaining_time": "6:48:19", "throughput": 2350.19, "total_tokens": 19501344} {"current_steps": 10125, "total_steps": 40000, "loss": 0.1686, "lr": 4.250467220786204e-05, "epoch": 1.6517660494330695, "percentage": 25.31, "elapsed_time": "2:18:19", "remaining_time": "6:48:09", "throughput": 2350.9, "total_tokens": 19512048} {"current_steps": 10130, "total_steps": 40000, "loss": 0.1739, "lr": 4.249766157626409e-05, "epoch": 1.652581776653887, "percentage": 25.32, "elapsed_time": "2:18:21", "remaining_time": "6:47:59", "throughput": 2351.42, "total_tokens": 19521248} {"current_steps": 10135, "total_steps": 40000, "loss": 0.1336, "lr": 4.249064824630684e-05, "epoch": 1.6533975038747042, "percentage": 25.34, "elapsed_time": "2:18:23", "remaining_time": "6:47:49", "throughput": 2352.03, "total_tokens": 19531136} {"current_steps": 10140, "total_steps": 40000, "loss": 0.0846, "lr": 4.248363221907183e-05, "epoch": 1.6542132310955218, "percentage": 25.35, "elapsed_time": "2:18:26", "remaining_time": "6:47:39", "throughput": 2352.61, "total_tokens": 19540816} {"current_steps": 10145, "total_steps": 40000, "loss": 0.077, "lr": 4.2476613495641026e-05, "epoch": 1.655028958316339, "percentage": 25.36, "elapsed_time": "2:18:28", "remaining_time": "6:47:29", "throughput": 2353.24, "total_tokens": 19550912} {"current_steps": 10150, "total_steps": 40000, "loss": 0.0861, "lr": 4.246959207709679e-05, "epoch": 1.6558446855371565, "percentage": 25.37, "elapsed_time": "2:18:30", "remaining_time": "6:47:19", "throughput": 2353.79, "total_tokens": 19560352} {"current_steps": 10155, "total_steps": 40000, "loss": 0.0711, "lr": 4.246256796452192e-05, "epoch": 1.6566604127579736, "percentage": 25.39, "elapsed_time": "2:18:32", "remaining_time": "6:47:09", "throughput": 2354.3, "total_tokens": 19569488} {"current_steps": 10160, "total_steps": 40000, "loss": 0.1543, "lr": 4.245554115899962e-05, "epoch": 1.6574761399787912, "percentage": 25.4, "elapsed_time": "2:18:34", "remaining_time": "6:46:59", "throughput": 2354.86, "total_tokens": 19579024} {"current_steps": 10165, "total_steps": 40000, "loss": 0.1271, "lr": 4.2448511661613514e-05, "epoch": 1.6582918671996083, "percentage": 25.41, "elapsed_time": "2:18:36", "remaining_time": "6:46:49", "throughput": 2355.36, "total_tokens": 19587984} {"current_steps": 10170, "total_steps": 40000, "loss": 0.2113, "lr": 4.2441479473447635e-05, "epoch": 1.6591075944204259, "percentage": 25.42, "elapsed_time": "2:18:38", "remaining_time": "6:46:39", "throughput": 2355.81, "total_tokens": 19596608} {"current_steps": 10175, "total_steps": 40000, "loss": 0.1666, "lr": 4.243444459558644e-05, "epoch": 1.659923321641243, "percentage": 25.44, "elapsed_time": "2:18:40", "remaining_time": "6:46:29", "throughput": 2356.4, "total_tokens": 19606400} {"current_steps": 10180, "total_steps": 40000, "loss": 0.1727, "lr": 4.24274070291148e-05, "epoch": 1.6607390488620606, "percentage": 25.45, "elapsed_time": "2:18:42", "remaining_time": "6:46:19", "throughput": 2356.84, "total_tokens": 19614928} {"current_steps": 10185, "total_steps": 40000, "loss": 0.105, "lr": 4.242036677511798e-05, "epoch": 1.661554776082878, "percentage": 25.46, "elapsed_time": "2:18:44", "remaining_time": "6:46:09", "throughput": 2357.28, "total_tokens": 19623488} {"current_steps": 10190, "total_steps": 40000, "loss": 0.1634, "lr": 4.241332383468169e-05, "epoch": 1.6623705033036953, "percentage": 25.47, "elapsed_time": "2:18:46", "remaining_time": "6:45:59", "throughput": 2357.67, "total_tokens": 19631584} {"current_steps": 10195, "total_steps": 40000, "loss": 0.0512, "lr": 4.2406278208892034e-05, "epoch": 1.6631862305245126, "percentage": 25.49, "elapsed_time": "2:18:48", "remaining_time": "6:45:49", "throughput": 2358.22, "total_tokens": 19641056} {"current_steps": 10200, "total_steps": 40000, "loss": 0.1004, "lr": 4.2399229898835536e-05, "epoch": 1.66400195774533, "percentage": 25.5, "elapsed_time": "2:18:50", "remaining_time": "6:45:39", "throughput": 2358.77, "total_tokens": 19650480} {"current_steps": 10200, "total_steps": 40000, "eval_loss": 0.1352052241563797, "epoch": 1.66400195774533, "percentage": 25.5, "elapsed_time": "2:20:11", "remaining_time": "6:49:34", "throughput": 2336.17, "total_tokens": 19650480} {"current_steps": 10205, "total_steps": 40000, "loss": 0.0203, "lr": 4.239217890559914e-05, "epoch": 1.6648176849661473, "percentage": 25.51, "elapsed_time": "2:20:14", "remaining_time": "6:49:28", "throughput": 2336.39, "total_tokens": 19660592} {"current_steps": 10210, "total_steps": 40000, "loss": 0.1303, "lr": 4.238512523027019e-05, "epoch": 1.6656334121869647, "percentage": 25.52, "elapsed_time": "2:20:17", "remaining_time": "6:49:18", "throughput": 2336.96, "total_tokens": 19670240} {"current_steps": 10215, "total_steps": 40000, "loss": 0.024, "lr": 4.237806887393645e-05, "epoch": 1.666449139407782, "percentage": 25.54, "elapsed_time": "2:20:19", "remaining_time": "6:49:08", "throughput": 2337.52, "total_tokens": 19679808} {"current_steps": 10220, "total_steps": 40000, "loss": 0.1115, "lr": 4.237100983768611e-05, "epoch": 1.6672648666285994, "percentage": 25.55, "elapsed_time": "2:20:21", "remaining_time": "6:48:58", "throughput": 2338.21, "total_tokens": 19690432} {"current_steps": 10225, "total_steps": 40000, "loss": 0.0155, "lr": 4.2363948122607756e-05, "epoch": 1.6680805938494168, "percentage": 25.56, "elapsed_time": "2:20:23", "remaining_time": "6:48:48", "throughput": 2338.75, "total_tokens": 19699824} {"current_steps": 10230, "total_steps": 40000, "loss": 0.1771, "lr": 4.235688372979039e-05, "epoch": 1.6688963210702341, "percentage": 25.57, "elapsed_time": "2:20:25", "remaining_time": "6:48:38", "throughput": 2339.37, "total_tokens": 19709808} {"current_steps": 10235, "total_steps": 40000, "loss": 0.1415, "lr": 4.234981666032343e-05, "epoch": 1.6697120482910515, "percentage": 25.59, "elapsed_time": "2:20:27", "remaining_time": "6:48:28", "throughput": 2339.91, "total_tokens": 19719232} {"current_steps": 10240, "total_steps": 40000, "loss": 0.0699, "lr": 4.2342746915296704e-05, "epoch": 1.6705277755118688, "percentage": 25.6, "elapsed_time": "2:20:29", "remaining_time": "6:48:17", "throughput": 2340.54, "total_tokens": 19729360} {"current_steps": 10245, "total_steps": 40000, "loss": 0.0802, "lr": 4.233567449580047e-05, "epoch": 1.6713435027326862, "percentage": 25.61, "elapsed_time": "2:20:31", "remaining_time": "6:48:07", "throughput": 2341.24, "total_tokens": 19740160} {"current_steps": 10250, "total_steps": 40000, "loss": 0.2589, "lr": 4.232859940292537e-05, "epoch": 1.6721592299535035, "percentage": 25.62, "elapsed_time": "2:20:33", "remaining_time": "6:47:57", "throughput": 2341.63, "total_tokens": 19748304} {"current_steps": 10255, "total_steps": 40000, "loss": 0.1382, "lr": 4.232152163776248e-05, "epoch": 1.6729749571743209, "percentage": 25.64, "elapsed_time": "2:20:35", "remaining_time": "6:47:47", "throughput": 2342.26, "total_tokens": 19758464} {"current_steps": 10260, "total_steps": 40000, "loss": 0.1003, "lr": 4.231444120140328e-05, "epoch": 1.6737906843951382, "percentage": 25.65, "elapsed_time": "2:20:37", "remaining_time": "6:47:37", "throughput": 2342.93, "total_tokens": 19768992} {"current_steps": 10265, "total_steps": 40000, "loss": 0.1583, "lr": 4.230735809493967e-05, "epoch": 1.6746064116159556, "percentage": 25.66, "elapsed_time": "2:20:39", "remaining_time": "6:47:27", "throughput": 2343.52, "total_tokens": 19778768} {"current_steps": 10270, "total_steps": 40000, "loss": 0.0904, "lr": 4.2300272319463926e-05, "epoch": 1.675422138836773, "percentage": 25.67, "elapsed_time": "2:20:41", "remaining_time": "6:47:17", "throughput": 2344.13, "total_tokens": 19788768} {"current_steps": 10275, "total_steps": 40000, "loss": 0.1042, "lr": 4.2293183876068786e-05, "epoch": 1.6762378660575903, "percentage": 25.69, "elapsed_time": "2:20:43", "remaining_time": "6:47:07", "throughput": 2344.7, "total_tokens": 19798416} {"current_steps": 10280, "total_steps": 40000, "loss": 0.2312, "lr": 4.228609276584737e-05, "epoch": 1.6770535932784076, "percentage": 25.7, "elapsed_time": "2:20:45", "remaining_time": "6:46:57", "throughput": 2345.39, "total_tokens": 19809136} {"current_steps": 10285, "total_steps": 40000, "loss": 0.0985, "lr": 4.227899898989323e-05, "epoch": 1.6778693204992252, "percentage": 25.71, "elapsed_time": "2:20:48", "remaining_time": "6:46:47", "throughput": 2345.9, "total_tokens": 19818304} {"current_steps": 10290, "total_steps": 40000, "loss": 0.1598, "lr": 4.2271902549300293e-05, "epoch": 1.6786850477200423, "percentage": 25.72, "elapsed_time": "2:20:50", "remaining_time": "6:46:37", "throughput": 2346.37, "total_tokens": 19827120} {"current_steps": 10295, "total_steps": 40000, "loss": 0.214, "lr": 4.226480344516294e-05, "epoch": 1.67950077494086, "percentage": 25.74, "elapsed_time": "2:20:52", "remaining_time": "6:46:27", "throughput": 2347.03, "total_tokens": 19837520} {"current_steps": 10300, "total_steps": 40000, "loss": 0.1083, "lr": 4.2257701678575925e-05, "epoch": 1.680316502161677, "percentage": 25.75, "elapsed_time": "2:20:54", "remaining_time": "6:46:17", "throughput": 2347.57, "total_tokens": 19846960} {"current_steps": 10305, "total_steps": 40000, "loss": 0.1145, "lr": 4.225059725063444e-05, "epoch": 1.6811322293824946, "percentage": 25.76, "elapsed_time": "2:20:56", "remaining_time": "6:46:07", "throughput": 2348.12, "total_tokens": 19856432} {"current_steps": 10310, "total_steps": 40000, "loss": 0.1203, "lr": 4.2243490162434074e-05, "epoch": 1.6819479566033118, "percentage": 25.77, "elapsed_time": "2:20:58", "remaining_time": "6:45:57", "throughput": 2348.69, "total_tokens": 19866128} {"current_steps": 10315, "total_steps": 40000, "loss": 0.0678, "lr": 4.223638041507083e-05, "epoch": 1.6827636838241293, "percentage": 25.79, "elapsed_time": "2:21:00", "remaining_time": "6:45:47", "throughput": 2349.23, "total_tokens": 19875568} {"current_steps": 10320, "total_steps": 40000, "loss": 0.1473, "lr": 4.2229268009641124e-05, "epoch": 1.6835794110449465, "percentage": 25.8, "elapsed_time": "2:21:02", "remaining_time": "6:45:37", "throughput": 2349.84, "total_tokens": 19885600} {"current_steps": 10325, "total_steps": 40000, "loss": 0.1485, "lr": 4.222215294724177e-05, "epoch": 1.684395138265764, "percentage": 25.81, "elapsed_time": "2:21:04", "remaining_time": "6:45:28", "throughput": 2350.38, "total_tokens": 19895008} {"current_steps": 10330, "total_steps": 40000, "loss": 0.1029, "lr": 4.2215035228970005e-05, "epoch": 1.6852108654865812, "percentage": 25.82, "elapsed_time": "2:21:06", "remaining_time": "6:45:18", "throughput": 2350.8, "total_tokens": 19903424} {"current_steps": 10335, "total_steps": 40000, "loss": 0.1156, "lr": 4.2207914855923464e-05, "epoch": 1.6860265927073987, "percentage": 25.84, "elapsed_time": "2:21:08", "remaining_time": "6:45:08", "throughput": 2351.41, "total_tokens": 19913504} {"current_steps": 10340, "total_steps": 40000, "loss": 0.1079, "lr": 4.220079182920021e-05, "epoch": 1.6868423199282159, "percentage": 25.85, "elapsed_time": "2:21:10", "remaining_time": "6:44:58", "throughput": 2351.8, "total_tokens": 19921632} {"current_steps": 10345, "total_steps": 40000, "loss": 0.1782, "lr": 4.2193666149898705e-05, "epoch": 1.6876580471490334, "percentage": 25.86, "elapsed_time": "2:21:12", "remaining_time": "6:44:48", "throughput": 2352.48, "total_tokens": 19932256} {"current_steps": 10350, "total_steps": 40000, "loss": 0.1358, "lr": 4.21865378191178e-05, "epoch": 1.6884737743698506, "percentage": 25.87, "elapsed_time": "2:21:14", "remaining_time": "6:44:38", "throughput": 2353.06, "total_tokens": 19942064} {"current_steps": 10355, "total_steps": 40000, "loss": 0.0912, "lr": 4.217940683795678e-05, "epoch": 1.6892895015906682, "percentage": 25.89, "elapsed_time": "2:21:17", "remaining_time": "6:44:28", "throughput": 2353.68, "total_tokens": 19952144} {"current_steps": 10360, "total_steps": 40000, "loss": 0.0899, "lr": 4.217227320751534e-05, "epoch": 1.6901052288114853, "percentage": 25.9, "elapsed_time": "2:21:19", "remaining_time": "6:44:18", "throughput": 2354.34, "total_tokens": 19962592} {"current_steps": 10365, "total_steps": 40000, "loss": 0.1568, "lr": 4.216513692889358e-05, "epoch": 1.6909209560323029, "percentage": 25.91, "elapsed_time": "2:21:21", "remaining_time": "6:44:08", "throughput": 2354.82, "total_tokens": 19971520} {"current_steps": 10370, "total_steps": 40000, "loss": 0.1481, "lr": 4.215799800319199e-05, "epoch": 1.6917366832531202, "percentage": 25.92, "elapsed_time": "2:21:23", "remaining_time": "6:43:58", "throughput": 2355.33, "total_tokens": 19980784} {"current_steps": 10375, "total_steps": 40000, "loss": 0.1331, "lr": 4.2150856431511485e-05, "epoch": 1.6925524104739376, "percentage": 25.94, "elapsed_time": "2:21:25", "remaining_time": "6:43:49", "throughput": 2355.91, "total_tokens": 19990528} {"current_steps": 10380, "total_steps": 40000, "loss": 0.0629, "lr": 4.214371221495339e-05, "epoch": 1.693368137694755, "percentage": 25.95, "elapsed_time": "2:21:27", "remaining_time": "6:43:39", "throughput": 2356.45, "total_tokens": 20000000} {"current_steps": 10385, "total_steps": 40000, "loss": 0.1277, "lr": 4.213656535461942e-05, "epoch": 1.6941838649155723, "percentage": 25.96, "elapsed_time": "2:21:29", "remaining_time": "6:43:29", "throughput": 2357.12, "total_tokens": 20010544} {"current_steps": 10390, "total_steps": 40000, "loss": 0.1353, "lr": 4.2129415851611734e-05, "epoch": 1.6949995921363896, "percentage": 25.97, "elapsed_time": "2:21:31", "remaining_time": "6:43:19", "throughput": 2357.74, "total_tokens": 20020704} {"current_steps": 10395, "total_steps": 40000, "loss": 0.1937, "lr": 4.2122263707032855e-05, "epoch": 1.695815319357207, "percentage": 25.99, "elapsed_time": "2:21:33", "remaining_time": "6:43:09", "throughput": 2358.34, "total_tokens": 20030672} {"current_steps": 10400, "total_steps": 40000, "loss": 0.2218, "lr": 4.211510892198574e-05, "epoch": 1.6966310465780243, "percentage": 26.0, "elapsed_time": "2:21:35", "remaining_time": "6:42:59", "throughput": 2358.99, "total_tokens": 20041120} {"current_steps": 10400, "total_steps": 40000, "eval_loss": 0.12628936767578125, "epoch": 1.6966310465780243, "percentage": 26.0, "elapsed_time": "2:22:56", "remaining_time": "6:46:49", "throughput": 2336.8, "total_tokens": 20041120} {"current_steps": 10405, "total_steps": 40000, "loss": 0.0488, "lr": 4.210795149757375e-05, "epoch": 1.6974467737988417, "percentage": 26.01, "elapsed_time": "2:23:00", "remaining_time": "6:46:46", "throughput": 2336.6, "total_tokens": 20049536} {"current_steps": 10410, "total_steps": 40000, "loss": 0.1024, "lr": 4.210079143490065e-05, "epoch": 1.698262501019659, "percentage": 26.02, "elapsed_time": "2:23:02", "remaining_time": "6:46:36", "throughput": 2337.02, "total_tokens": 20058064} {"current_steps": 10415, "total_steps": 40000, "loss": 0.0873, "lr": 4.2093628735070604e-05, "epoch": 1.6990782282404764, "percentage": 26.04, "elapsed_time": "2:23:04", "remaining_time": "6:46:26", "throughput": 2337.49, "total_tokens": 20066880} {"current_steps": 10420, "total_steps": 40000, "loss": 0.052, "lr": 4.208646339918819e-05, "epoch": 1.6998939554612937, "percentage": 26.05, "elapsed_time": "2:23:06", "remaining_time": "6:46:16", "throughput": 2338.04, "total_tokens": 20076432} {"current_steps": 10425, "total_steps": 40000, "loss": 0.1591, "lr": 4.2079295428358414e-05, "epoch": 1.700709682682111, "percentage": 26.06, "elapsed_time": "2:23:08", "remaining_time": "6:46:06", "throughput": 2338.57, "total_tokens": 20085856} {"current_steps": 10430, "total_steps": 40000, "loss": 0.0559, "lr": 4.207212482368664e-05, "epoch": 1.7015254099029284, "percentage": 26.07, "elapsed_time": "2:23:11", "remaining_time": "6:45:56", "throughput": 2339.26, "total_tokens": 20096624} {"current_steps": 10435, "total_steps": 40000, "loss": 0.205, "lr": 4.206495158627867e-05, "epoch": 1.7023411371237458, "percentage": 26.09, "elapsed_time": "2:23:13", "remaining_time": "6:45:46", "throughput": 2339.9, "total_tokens": 20106976} {"current_steps": 10440, "total_steps": 40000, "loss": 0.0369, "lr": 4.205777571724073e-05, "epoch": 1.7031568643445631, "percentage": 26.1, "elapsed_time": "2:23:15", "remaining_time": "6:45:36", "throughput": 2340.51, "total_tokens": 20117088} {"current_steps": 10445, "total_steps": 40000, "loss": 0.0974, "lr": 4.20505972176794e-05, "epoch": 1.7039725915653805, "percentage": 26.11, "elapsed_time": "2:23:17", "remaining_time": "6:45:26", "throughput": 2341.07, "total_tokens": 20126688} {"current_steps": 10450, "total_steps": 40000, "loss": 0.0757, "lr": 4.204341608870171e-05, "epoch": 1.7047883187861979, "percentage": 26.12, "elapsed_time": "2:23:19", "remaining_time": "6:45:16", "throughput": 2341.75, "total_tokens": 20137424} {"current_steps": 10455, "total_steps": 40000, "loss": 0.0219, "lr": 4.203623233141508e-05, "epoch": 1.7056040460070152, "percentage": 26.14, "elapsed_time": "2:23:21", "remaining_time": "6:45:06", "throughput": 2342.35, "total_tokens": 20147392} {"current_steps": 10460, "total_steps": 40000, "loss": 0.2624, "lr": 4.2029045946927334e-05, "epoch": 1.7064197732278326, "percentage": 26.15, "elapsed_time": "2:23:23", "remaining_time": "6:44:56", "throughput": 2342.8, "total_tokens": 20156176} {"current_steps": 10465, "total_steps": 40000, "loss": 0.0088, "lr": 4.20218569363467e-05, "epoch": 1.70723550044865, "percentage": 26.16, "elapsed_time": "2:23:25", "remaining_time": "6:44:47", "throughput": 2343.2, "total_tokens": 20164464} {"current_steps": 10470, "total_steps": 40000, "loss": 0.038, "lr": 4.2014665300781834e-05, "epoch": 1.7080512276694675, "percentage": 26.17, "elapsed_time": "2:23:27", "remaining_time": "6:44:37", "throughput": 2343.76, "total_tokens": 20174128} {"current_steps": 10475, "total_steps": 40000, "loss": 0.1197, "lr": 4.200747104134174e-05, "epoch": 1.7088669548902846, "percentage": 26.19, "elapsed_time": "2:23:29", "remaining_time": "6:44:27", "throughput": 2344.17, "total_tokens": 20182496} {"current_steps": 10480, "total_steps": 40000, "loss": 0.3324, "lr": 4.200027415913588e-05, "epoch": 1.7096826821111022, "percentage": 26.2, "elapsed_time": "2:23:31", "remaining_time": "6:44:17", "throughput": 2344.64, "total_tokens": 20191440} {"current_steps": 10485, "total_steps": 40000, "loss": 0.2245, "lr": 4.1993074655274126e-05, "epoch": 1.7104984093319193, "percentage": 26.21, "elapsed_time": "2:23:33", "remaining_time": "6:44:07", "throughput": 2345.18, "total_tokens": 20200896} {"current_steps": 10490, "total_steps": 40000, "loss": 0.193, "lr": 4.198587253086669e-05, "epoch": 1.711314136552737, "percentage": 26.22, "elapsed_time": "2:23:35", "remaining_time": "6:43:57", "throughput": 2345.75, "total_tokens": 20210688} {"current_steps": 10495, "total_steps": 40000, "loss": 0.0665, "lr": 4.197866778702426e-05, "epoch": 1.712129863773554, "percentage": 26.24, "elapsed_time": "2:23:37", "remaining_time": "6:43:47", "throughput": 2346.37, "total_tokens": 20220848} {"current_steps": 10500, "total_steps": 40000, "loss": 0.2039, "lr": 4.197146042485789e-05, "epoch": 1.7129455909943716, "percentage": 26.25, "elapsed_time": "2:23:39", "remaining_time": "6:43:38", "throughput": 2346.91, "total_tokens": 20230320} {"current_steps": 10505, "total_steps": 40000, "loss": 0.1735, "lr": 4.1964250445479046e-05, "epoch": 1.7137613182151887, "percentage": 26.26, "elapsed_time": "2:23:42", "remaining_time": "6:43:28", "throughput": 2347.38, "total_tokens": 20239264} {"current_steps": 10510, "total_steps": 40000, "loss": 0.0428, "lr": 4.19570378499996e-05, "epoch": 1.7145770454360063, "percentage": 26.27, "elapsed_time": "2:23:44", "remaining_time": "6:43:18", "throughput": 2347.92, "total_tokens": 20248768} {"current_steps": 10515, "total_steps": 40000, "loss": 0.2205, "lr": 4.194982263953182e-05, "epoch": 1.7153927726568234, "percentage": 26.29, "elapsed_time": "2:23:46", "remaining_time": "6:43:08", "throughput": 2348.49, "total_tokens": 20258528} {"current_steps": 10520, "total_steps": 40000, "loss": 0.3621, "lr": 4.194260481518838e-05, "epoch": 1.716208499877641, "percentage": 26.3, "elapsed_time": "2:23:48", "remaining_time": "6:42:58", "throughput": 2348.97, "total_tokens": 20267536} {"current_steps": 10525, "total_steps": 40000, "loss": 0.1037, "lr": 4.1935384378082366e-05, "epoch": 1.7170242270984581, "percentage": 26.31, "elapsed_time": "2:23:50", "remaining_time": "6:42:49", "throughput": 2349.53, "total_tokens": 20277248} {"current_steps": 10530, "total_steps": 40000, "loss": 0.0676, "lr": 4.1928161329327267e-05, "epoch": 1.7178399543192757, "percentage": 26.32, "elapsed_time": "2:23:52", "remaining_time": "6:42:39", "throughput": 2350.18, "total_tokens": 20287664} {"current_steps": 10535, "total_steps": 40000, "loss": 0.1387, "lr": 4.1920935670036945e-05, "epoch": 1.7186556815400928, "percentage": 26.34, "elapsed_time": "2:23:54", "remaining_time": "6:42:29", "throughput": 2350.86, "total_tokens": 20298432} {"current_steps": 10540, "total_steps": 40000, "loss": 0.2113, "lr": 4.1913707401325705e-05, "epoch": 1.7194714087609104, "percentage": 26.35, "elapsed_time": "2:23:56", "remaining_time": "6:42:19", "throughput": 2351.44, "total_tokens": 20308320} {"current_steps": 10545, "total_steps": 40000, "loss": 0.1124, "lr": 4.1906476524308235e-05, "epoch": 1.7202871359817276, "percentage": 26.36, "elapsed_time": "2:23:58", "remaining_time": "6:42:09", "throughput": 2352.11, "total_tokens": 20318976} {"current_steps": 10550, "total_steps": 40000, "loss": 0.2259, "lr": 4.189924304009962e-05, "epoch": 1.7211028632025451, "percentage": 26.38, "elapsed_time": "2:24:00", "remaining_time": "6:42:00", "throughput": 2352.65, "total_tokens": 20328544} {"current_steps": 10555, "total_steps": 40000, "loss": 0.0925, "lr": 4.189200694981537e-05, "epoch": 1.7219185904233625, "percentage": 26.39, "elapsed_time": "2:24:02", "remaining_time": "6:41:50", "throughput": 2353.13, "total_tokens": 20337552} {"current_steps": 10560, "total_steps": 40000, "loss": 0.0618, "lr": 4.188476825457136e-05, "epoch": 1.7227343176441798, "percentage": 26.4, "elapsed_time": "2:24:04", "remaining_time": "6:41:40", "throughput": 2353.72, "total_tokens": 20347520} {"current_steps": 10565, "total_steps": 40000, "loss": 0.0403, "lr": 4.18775269554839e-05, "epoch": 1.7235500448649972, "percentage": 26.41, "elapsed_time": "2:24:06", "remaining_time": "6:41:30", "throughput": 2354.18, "total_tokens": 20356368} {"current_steps": 10570, "total_steps": 40000, "loss": 0.1487, "lr": 4.187028305366969e-05, "epoch": 1.7243657720858145, "percentage": 26.42, "elapsed_time": "2:24:08", "remaining_time": "6:41:21", "throughput": 2354.49, "total_tokens": 20363888} {"current_steps": 10575, "total_steps": 40000, "loss": 0.1468, "lr": 4.1863036550245824e-05, "epoch": 1.725181499306632, "percentage": 26.44, "elapsed_time": "2:24:11", "remaining_time": "6:41:11", "throughput": 2355.03, "total_tokens": 20373456} {"current_steps": 10580, "total_steps": 40000, "loss": 0.2004, "lr": 4.1855787446329806e-05, "epoch": 1.7259972265274492, "percentage": 26.45, "elapsed_time": "2:24:13", "remaining_time": "6:41:01", "throughput": 2355.64, "total_tokens": 20383600} {"current_steps": 10585, "total_steps": 40000, "loss": 0.1831, "lr": 4.184853574303955e-05, "epoch": 1.7268129537482666, "percentage": 26.46, "elapsed_time": "2:24:15", "remaining_time": "6:40:52", "throughput": 2356.21, "total_tokens": 20393424} {"current_steps": 10590, "total_steps": 40000, "loss": 0.0884, "lr": 4.184128144149334e-05, "epoch": 1.727628680969084, "percentage": 26.47, "elapsed_time": "2:24:17", "remaining_time": "6:40:42", "throughput": 2356.71, "total_tokens": 20402624} {"current_steps": 10595, "total_steps": 40000, "loss": 0.1559, "lr": 4.1834024542809896e-05, "epoch": 1.7284444081899013, "percentage": 26.49, "elapsed_time": "2:24:19", "remaining_time": "6:40:32", "throughput": 2357.2, "total_tokens": 20411760} {"current_steps": 10600, "total_steps": 40000, "loss": 0.1434, "lr": 4.1826765048108315e-05, "epoch": 1.7292601354107187, "percentage": 26.5, "elapsed_time": "2:24:21", "remaining_time": "6:40:23", "throughput": 2357.72, "total_tokens": 20421120} {"current_steps": 10600, "total_steps": 40000, "eval_loss": 0.12110266089439392, "epoch": 1.7292601354107187, "percentage": 26.5, "elapsed_time": "2:25:41", "remaining_time": "6:44:06", "throughput": 2335.99, "total_tokens": 20421120} {"current_steps": 10605, "total_steps": 40000, "loss": 0.1667, "lr": 4.181950295850811e-05, "epoch": 1.730075862631536, "percentage": 26.51, "elapsed_time": "2:25:45", "remaining_time": "6:44:01", "throughput": 2335.97, "total_tokens": 20430096} {"current_steps": 10610, "total_steps": 40000, "loss": 0.0684, "lr": 4.181223827512918e-05, "epoch": 1.7308915898523534, "percentage": 26.52, "elapsed_time": "2:25:47", "remaining_time": "6:43:52", "throughput": 2336.57, "total_tokens": 20440304} {"current_steps": 10615, "total_steps": 40000, "loss": 0.0521, "lr": 4.180497099909183e-05, "epoch": 1.7317073170731707, "percentage": 26.54, "elapsed_time": "2:25:50", "remaining_time": "6:43:42", "throughput": 2336.96, "total_tokens": 20448496} {"current_steps": 10620, "total_steps": 40000, "loss": 0.1519, "lr": 4.179770113151677e-05, "epoch": 1.732523044293988, "percentage": 26.55, "elapsed_time": "2:25:52", "remaining_time": "6:43:32", "throughput": 2337.39, "total_tokens": 20457104} {"current_steps": 10625, "total_steps": 40000, "loss": 0.1341, "lr": 4.179042867352511e-05, "epoch": 1.7333387715148054, "percentage": 26.56, "elapsed_time": "2:25:54", "remaining_time": "6:43:22", "throughput": 2338.02, "total_tokens": 20467424} {"current_steps": 10630, "total_steps": 40000, "loss": 0.1137, "lr": 4.1783153626238334e-05, "epoch": 1.7341544987356228, "percentage": 26.57, "elapsed_time": "2:25:56", "remaining_time": "6:43:12", "throughput": 2338.8, "total_tokens": 20479136} {"current_steps": 10635, "total_steps": 40000, "loss": 0.0821, "lr": 4.177587599077836e-05, "epoch": 1.7349702259564401, "percentage": 26.59, "elapsed_time": "2:25:58", "remaining_time": "6:43:03", "throughput": 2339.48, "total_tokens": 20489888} {"current_steps": 10640, "total_steps": 40000, "loss": 0.1308, "lr": 4.1768595768267494e-05, "epoch": 1.7357859531772575, "percentage": 26.6, "elapsed_time": "2:26:00", "remaining_time": "6:42:53", "throughput": 2340.01, "total_tokens": 20499392} {"current_steps": 10645, "total_steps": 40000, "loss": 0.0406, "lr": 4.176131295982843e-05, "epoch": 1.7366016803980748, "percentage": 26.61, "elapsed_time": "2:26:02", "remaining_time": "6:42:43", "throughput": 2340.58, "total_tokens": 20509232} {"current_steps": 10650, "total_steps": 40000, "loss": 0.189, "lr": 4.1754027566584276e-05, "epoch": 1.7374174076188922, "percentage": 26.62, "elapsed_time": "2:26:04", "remaining_time": "6:42:33", "throughput": 2341.04, "total_tokens": 20518112} {"current_steps": 10655, "total_steps": 40000, "loss": 0.2266, "lr": 4.174673958965852e-05, "epoch": 1.7382331348397098, "percentage": 26.64, "elapsed_time": "2:26:06", "remaining_time": "6:42:24", "throughput": 2341.6, "total_tokens": 20527792} {"current_steps": 10660, "total_steps": 40000, "loss": 0.0791, "lr": 4.173944903017507e-05, "epoch": 1.739048862060527, "percentage": 26.65, "elapsed_time": "2:26:08", "remaining_time": "6:42:14", "throughput": 2342.19, "total_tokens": 20537824} {"current_steps": 10665, "total_steps": 40000, "loss": 0.1338, "lr": 4.173215588925822e-05, "epoch": 1.7398645892813445, "percentage": 26.66, "elapsed_time": "2:26:10", "remaining_time": "6:42:04", "throughput": 2342.62, "total_tokens": 20546432} {"current_steps": 10670, "total_steps": 40000, "loss": 0.1178, "lr": 4.172486016803266e-05, "epoch": 1.7406803165021616, "percentage": 26.67, "elapsed_time": "2:26:12", "remaining_time": "6:41:54", "throughput": 2343.12, "total_tokens": 20555632} {"current_steps": 10675, "total_steps": 40000, "loss": 0.1688, "lr": 4.171756186762349e-05, "epoch": 1.7414960437229792, "percentage": 26.69, "elapsed_time": "2:26:14", "remaining_time": "6:41:45", "throughput": 2343.61, "total_tokens": 20564864} {"current_steps": 10680, "total_steps": 40000, "loss": 0.2128, "lr": 4.171026098915619e-05, "epoch": 1.7423117709437963, "percentage": 26.7, "elapsed_time": "2:26:16", "remaining_time": "6:41:35", "throughput": 2344.15, "total_tokens": 20574448} {"current_steps": 10685, "total_steps": 40000, "loss": 0.2137, "lr": 4.170295753375665e-05, "epoch": 1.7431274981646139, "percentage": 26.71, "elapsed_time": "2:26:18", "remaining_time": "6:41:25", "throughput": 2344.67, "total_tokens": 20583856} {"current_steps": 10690, "total_steps": 40000, "loss": 0.1043, "lr": 4.169565150255117e-05, "epoch": 1.743943225385431, "percentage": 26.72, "elapsed_time": "2:26:21", "remaining_time": "6:41:16", "throughput": 2345.14, "total_tokens": 20592784} {"current_steps": 10695, "total_steps": 40000, "loss": 0.1193, "lr": 4.16883428966664e-05, "epoch": 1.7447589526062486, "percentage": 26.74, "elapsed_time": "2:26:23", "remaining_time": "6:41:06", "throughput": 2345.68, "total_tokens": 20602400} {"current_steps": 10700, "total_steps": 40000, "loss": 0.1002, "lr": 4.168103171722944e-05, "epoch": 1.7455746798270657, "percentage": 26.75, "elapsed_time": "2:26:25", "remaining_time": "6:40:56", "throughput": 2346.31, "total_tokens": 20612784} {"current_steps": 10705, "total_steps": 40000, "loss": 0.0813, "lr": 4.167371796536777e-05, "epoch": 1.7463904070478833, "percentage": 26.76, "elapsed_time": "2:26:27", "remaining_time": "6:40:46", "throughput": 2346.95, "total_tokens": 20623280} {"current_steps": 10710, "total_steps": 40000, "loss": 0.1092, "lr": 4.166640164220924e-05, "epoch": 1.7472061342687004, "percentage": 26.77, "elapsed_time": "2:26:29", "remaining_time": "6:40:37", "throughput": 2347.46, "total_tokens": 20632592} {"current_steps": 10715, "total_steps": 40000, "loss": 0.0709, "lr": 4.1659082748882144e-05, "epoch": 1.748021861489518, "percentage": 26.79, "elapsed_time": "2:26:31", "remaining_time": "6:40:27", "throughput": 2347.93, "total_tokens": 20641616} {"current_steps": 10720, "total_steps": 40000, "loss": 0.1036, "lr": 4.1651761286515135e-05, "epoch": 1.7488375887103351, "percentage": 26.8, "elapsed_time": "2:26:33", "remaining_time": "6:40:17", "throughput": 2348.57, "total_tokens": 20652048} {"current_steps": 10725, "total_steps": 40000, "loss": 0.12, "lr": 4.164443725623728e-05, "epoch": 1.7496533159311527, "percentage": 26.81, "elapsed_time": "2:26:35", "remaining_time": "6:40:08", "throughput": 2349.0, "total_tokens": 20660720} {"current_steps": 10730, "total_steps": 40000, "loss": 0.1616, "lr": 4.163711065917802e-05, "epoch": 1.7504690431519698, "percentage": 26.82, "elapsed_time": "2:26:37", "remaining_time": "6:39:58", "throughput": 2349.47, "total_tokens": 20669680} {"current_steps": 10735, "total_steps": 40000, "loss": 0.1492, "lr": 4.1629781496467234e-05, "epoch": 1.7512847703727874, "percentage": 26.84, "elapsed_time": "2:26:39", "remaining_time": "6:39:49", "throughput": 2349.96, "total_tokens": 20678800} {"current_steps": 10740, "total_steps": 40000, "loss": 0.0979, "lr": 4.1622449769235164e-05, "epoch": 1.7521004975936045, "percentage": 26.85, "elapsed_time": "2:26:41", "remaining_time": "6:39:39", "throughput": 2350.53, "total_tokens": 20688688} {"current_steps": 10745, "total_steps": 40000, "loss": 0.1057, "lr": 4.161511547861243e-05, "epoch": 1.752916224814422, "percentage": 26.86, "elapsed_time": "2:26:43", "remaining_time": "6:39:29", "throughput": 2351.14, "total_tokens": 20698912} {"current_steps": 10750, "total_steps": 40000, "loss": 0.0607, "lr": 4.1607778625730104e-05, "epoch": 1.7537319520352395, "percentage": 26.88, "elapsed_time": "2:26:45", "remaining_time": "6:39:20", "throughput": 2351.66, "total_tokens": 20708416} {"current_steps": 10755, "total_steps": 40000, "loss": 0.1189, "lr": 4.160043921171961e-05, "epoch": 1.7545476792560568, "percentage": 26.89, "elapsed_time": "2:26:47", "remaining_time": "6:39:10", "throughput": 2352.12, "total_tokens": 20717328} {"current_steps": 10760, "total_steps": 40000, "loss": 0.1239, "lr": 4.159309723771276e-05, "epoch": 1.7553634064768742, "percentage": 26.9, "elapsed_time": "2:26:50", "remaining_time": "6:39:00", "throughput": 2352.81, "total_tokens": 20728272} {"current_steps": 10765, "total_steps": 40000, "loss": 0.0355, "lr": 4.158575270484181e-05, "epoch": 1.7561791336976915, "percentage": 26.91, "elapsed_time": "2:26:52", "remaining_time": "6:38:51", "throughput": 2353.47, "total_tokens": 20738928} {"current_steps": 10770, "total_steps": 40000, "loss": 0.0784, "lr": 4.157840561423936e-05, "epoch": 1.7569948609185089, "percentage": 26.92, "elapsed_time": "2:26:54", "remaining_time": "6:38:41", "throughput": 2354.19, "total_tokens": 20750176} {"current_steps": 10775, "total_steps": 40000, "loss": 0.0167, "lr": 4.1571055967038416e-05, "epoch": 1.7578105881393262, "percentage": 26.94, "elapsed_time": "2:26:56", "remaining_time": "6:38:32", "throughput": 2354.7, "total_tokens": 20759584} {"current_steps": 10780, "total_steps": 40000, "loss": 0.0281, "lr": 4.156370376437241e-05, "epoch": 1.7586263153601436, "percentage": 26.95, "elapsed_time": "2:26:58", "remaining_time": "6:38:22", "throughput": 2355.21, "total_tokens": 20768896} {"current_steps": 10785, "total_steps": 40000, "loss": 0.1451, "lr": 4.155634900737513e-05, "epoch": 1.759442042580961, "percentage": 26.96, "elapsed_time": "2:27:00", "remaining_time": "6:38:13", "throughput": 2355.83, "total_tokens": 20779232} {"current_steps": 10790, "total_steps": 40000, "loss": 0.1469, "lr": 4.1548991697180764e-05, "epoch": 1.7602577698017783, "percentage": 26.97, "elapsed_time": "2:27:02", "remaining_time": "6:38:03", "throughput": 2356.37, "total_tokens": 20788848} {"current_steps": 10795, "total_steps": 40000, "loss": 0.2624, "lr": 4.1541631834923914e-05, "epoch": 1.7610734970225956, "percentage": 26.99, "elapsed_time": "2:27:04", "remaining_time": "6:37:53", "throughput": 2356.99, "total_tokens": 20799216} {"current_steps": 10800, "total_steps": 40000, "loss": 0.0432, "lr": 4.153426942173956e-05, "epoch": 1.761889224243413, "percentage": 27.0, "elapsed_time": "2:27:06", "remaining_time": "6:37:44", "throughput": 2357.49, "total_tokens": 20808496} {"current_steps": 10800, "total_steps": 40000, "eval_loss": 0.13979317247867584, "epoch": 1.761889224243413, "percentage": 27.0, "elapsed_time": "2:28:27", "remaining_time": "6:41:22", "throughput": 2336.16, "total_tokens": 20808496} {"current_steps": 10805, "total_steps": 40000, "loss": 0.1832, "lr": 4.152690445876308e-05, "epoch": 1.7627049514642303, "percentage": 27.01, "elapsed_time": "2:28:30", "remaining_time": "6:41:16", "throughput": 2336.23, "total_tokens": 20817600} {"current_steps": 10810, "total_steps": 40000, "loss": 0.1104, "lr": 4.1519536947130245e-05, "epoch": 1.7635206786850477, "percentage": 27.02, "elapsed_time": "2:28:32", "remaining_time": "6:41:07", "throughput": 2336.68, "total_tokens": 20826496} {"current_steps": 10815, "total_steps": 40000, "loss": 0.1044, "lr": 4.151216688797722e-05, "epoch": 1.764336405905865, "percentage": 27.04, "elapsed_time": "2:28:34", "remaining_time": "6:40:57", "throughput": 2337.2, "total_tokens": 20835952} {"current_steps": 10820, "total_steps": 40000, "loss": 0.2129, "lr": 4.150479428244054e-05, "epoch": 1.7651521331266824, "percentage": 27.05, "elapsed_time": "2:28:36", "remaining_time": "6:40:47", "throughput": 2337.87, "total_tokens": 20846800} {"current_steps": 10825, "total_steps": 40000, "loss": 0.036, "lr": 4.1497419131657176e-05, "epoch": 1.7659678603474998, "percentage": 27.06, "elapsed_time": "2:28:39", "remaining_time": "6:40:38", "throughput": 2338.36, "total_tokens": 20855968} {"current_steps": 10830, "total_steps": 40000, "loss": 0.0398, "lr": 4.149004143676447e-05, "epoch": 1.766783587568317, "percentage": 27.07, "elapsed_time": "2:28:41", "remaining_time": "6:40:28", "throughput": 2339.09, "total_tokens": 20867360} {"current_steps": 10835, "total_steps": 40000, "loss": 0.0559, "lr": 4.148266119890015e-05, "epoch": 1.7675993147891345, "percentage": 27.09, "elapsed_time": "2:28:43", "remaining_time": "6:40:18", "throughput": 2339.48, "total_tokens": 20875616} {"current_steps": 10840, "total_steps": 40000, "loss": 0.1667, "lr": 4.1475278419202324e-05, "epoch": 1.768415042009952, "percentage": 27.1, "elapsed_time": "2:28:45", "remaining_time": "6:40:09", "throughput": 2339.85, "total_tokens": 20883792} {"current_steps": 10845, "total_steps": 40000, "loss": 0.194, "lr": 4.146789309880953e-05, "epoch": 1.7692307692307692, "percentage": 27.11, "elapsed_time": "2:28:47", "remaining_time": "6:39:59", "throughput": 2340.44, "total_tokens": 20893920} {"current_steps": 10850, "total_steps": 40000, "loss": 0.2724, "lr": 4.146050523886068e-05, "epoch": 1.7700464964515867, "percentage": 27.12, "elapsed_time": "2:28:49", "remaining_time": "6:39:50", "throughput": 2340.88, "total_tokens": 20902704} {"current_steps": 10855, "total_steps": 40000, "loss": 0.1627, "lr": 4.1453114840495055e-05, "epoch": 1.7708622236724039, "percentage": 27.14, "elapsed_time": "2:28:51", "remaining_time": "6:39:40", "throughput": 2341.5, "total_tokens": 20913008} {"current_steps": 10860, "total_steps": 40000, "loss": 0.0697, "lr": 4.1445721904852364e-05, "epoch": 1.7716779508932214, "percentage": 27.15, "elapsed_time": "2:28:53", "remaining_time": "6:39:30", "throughput": 2342.06, "total_tokens": 20922896} {"current_steps": 10865, "total_steps": 40000, "loss": 0.0971, "lr": 4.143832643307269e-05, "epoch": 1.7724936781140386, "percentage": 27.16, "elapsed_time": "2:28:55", "remaining_time": "6:39:21", "throughput": 2342.5, "total_tokens": 20931664} {"current_steps": 10870, "total_steps": 40000, "loss": 0.1222, "lr": 4.1430928426296503e-05, "epoch": 1.7733094053348561, "percentage": 27.18, "elapsed_time": "2:28:57", "remaining_time": "6:39:11", "throughput": 2343.08, "total_tokens": 20941632} {"current_steps": 10875, "total_steps": 40000, "loss": 0.1587, "lr": 4.142352788566466e-05, "epoch": 1.7741251325556733, "percentage": 27.19, "elapsed_time": "2:28:59", "remaining_time": "6:39:02", "throughput": 2343.57, "total_tokens": 20950928} {"current_steps": 10880, "total_steps": 40000, "loss": 0.0641, "lr": 4.1416124812318424e-05, "epoch": 1.7749408597764909, "percentage": 27.2, "elapsed_time": "2:29:01", "remaining_time": "6:38:52", "throughput": 2344.28, "total_tokens": 20962144} {"current_steps": 10885, "total_steps": 40000, "loss": 0.1877, "lr": 4.1408719207399453e-05, "epoch": 1.775756586997308, "percentage": 27.21, "elapsed_time": "2:29:03", "remaining_time": "6:38:42", "throughput": 2344.9, "total_tokens": 20972512} {"current_steps": 10890, "total_steps": 40000, "loss": 0.0911, "lr": 4.140131107204978e-05, "epoch": 1.7765723142181256, "percentage": 27.22, "elapsed_time": "2:29:05", "remaining_time": "6:38:33", "throughput": 2345.4, "total_tokens": 20981824} {"current_steps": 10895, "total_steps": 40000, "loss": 0.0609, "lr": 4.139390040741182e-05, "epoch": 1.7773880414389427, "percentage": 27.24, "elapsed_time": "2:29:08", "remaining_time": "6:38:23", "throughput": 2345.94, "total_tokens": 20991472} {"current_steps": 10900, "total_steps": 40000, "loss": 0.052, "lr": 4.1386487214628396e-05, "epoch": 1.7782037686597603, "percentage": 27.25, "elapsed_time": "2:29:10", "remaining_time": "6:38:14", "throughput": 2346.53, "total_tokens": 21001664} {"current_steps": 10905, "total_steps": 40000, "loss": 0.0821, "lr": 4.137907149484272e-05, "epoch": 1.7790194958805774, "percentage": 27.26, "elapsed_time": "2:29:12", "remaining_time": "6:38:04", "throughput": 2347.02, "total_tokens": 21010864} {"current_steps": 10910, "total_steps": 40000, "loss": 0.1419, "lr": 4.137165324919839e-05, "epoch": 1.779835223101395, "percentage": 27.27, "elapsed_time": "2:29:14", "remaining_time": "6:37:55", "throughput": 2347.55, "total_tokens": 21020480} {"current_steps": 10915, "total_steps": 40000, "loss": 0.0859, "lr": 4.136423247883939e-05, "epoch": 1.780650950322212, "percentage": 27.29, "elapsed_time": "2:29:16", "remaining_time": "6:37:45", "throughput": 2348.02, "total_tokens": 21029568} {"current_steps": 10920, "total_steps": 40000, "loss": 0.1389, "lr": 4.135680918491009e-05, "epoch": 1.7814666775430297, "percentage": 27.3, "elapsed_time": "2:29:18", "remaining_time": "6:37:36", "throughput": 2348.55, "total_tokens": 21039136} {"current_steps": 10925, "total_steps": 40000, "loss": 0.2551, "lr": 4.1349383368555265e-05, "epoch": 1.7822824047638468, "percentage": 27.31, "elapsed_time": "2:29:20", "remaining_time": "6:37:26", "throughput": 2349.08, "total_tokens": 21048752} {"current_steps": 10930, "total_steps": 40000, "loss": 0.0695, "lr": 4.1341955030920065e-05, "epoch": 1.7830981319846644, "percentage": 27.32, "elapsed_time": "2:29:22", "remaining_time": "6:37:17", "throughput": 2349.56, "total_tokens": 21057872} {"current_steps": 10935, "total_steps": 40000, "loss": 0.023, "lr": 4.1334524173150036e-05, "epoch": 1.7839138592054817, "percentage": 27.34, "elapsed_time": "2:29:24", "remaining_time": "6:37:07", "throughput": 2350.06, "total_tokens": 21067264} {"current_steps": 10940, "total_steps": 40000, "loss": 0.0794, "lr": 4.13270907963911e-05, "epoch": 1.784729586426299, "percentage": 27.35, "elapsed_time": "2:29:26", "remaining_time": "6:36:58", "throughput": 2350.53, "total_tokens": 21076304} {"current_steps": 10945, "total_steps": 40000, "loss": 0.109, "lr": 4.131965490178959e-05, "epoch": 1.7855453136471164, "percentage": 27.36, "elapsed_time": "2:29:28", "remaining_time": "6:36:48", "throughput": 2351.03, "total_tokens": 21085696} {"current_steps": 10950, "total_steps": 40000, "loss": 0.1606, "lr": 4.131221649049222e-05, "epoch": 1.7863610408679338, "percentage": 27.38, "elapsed_time": "2:29:30", "remaining_time": "6:36:39", "throughput": 2351.63, "total_tokens": 21095952} {"current_steps": 10955, "total_steps": 40000, "loss": 0.065, "lr": 4.130477556364606e-05, "epoch": 1.7871767680887511, "percentage": 27.39, "elapsed_time": "2:29:32", "remaining_time": "6:36:29", "throughput": 2352.1, "total_tokens": 21105056} {"current_steps": 10960, "total_steps": 40000, "loss": 0.0147, "lr": 4.129733212239861e-05, "epoch": 1.7879924953095685, "percentage": 27.4, "elapsed_time": "2:29:34", "remaining_time": "6:36:20", "throughput": 2352.55, "total_tokens": 21113904} {"current_steps": 10965, "total_steps": 40000, "loss": 0.1257, "lr": 4.128988616789774e-05, "epoch": 1.7888082225303858, "percentage": 27.41, "elapsed_time": "2:29:36", "remaining_time": "6:36:10", "throughput": 2353.15, "total_tokens": 21124224} {"current_steps": 10970, "total_steps": 40000, "loss": 0.1306, "lr": 4.1282437701291724e-05, "epoch": 1.7896239497512032, "percentage": 27.43, "elapsed_time": "2:29:39", "remaining_time": "6:36:01", "throughput": 2353.78, "total_tokens": 21134704} {"current_steps": 10975, "total_steps": 40000, "loss": 0.2052, "lr": 4.1274986723729184e-05, "epoch": 1.7904396769720206, "percentage": 27.44, "elapsed_time": "2:29:41", "remaining_time": "6:35:51", "throughput": 2354.37, "total_tokens": 21144864} {"current_steps": 10980, "total_steps": 40000, "loss": 0.1383, "lr": 4.126753323635917e-05, "epoch": 1.791255404192838, "percentage": 27.45, "elapsed_time": "2:29:43", "remaining_time": "6:35:42", "throughput": 2355.05, "total_tokens": 21155840} {"current_steps": 10985, "total_steps": 40000, "loss": 0.107, "lr": 4.12600772403311e-05, "epoch": 1.7920711314136553, "percentage": 27.46, "elapsed_time": "2:29:45", "remaining_time": "6:35:33", "throughput": 2355.66, "total_tokens": 21166272} {"current_steps": 10990, "total_steps": 40000, "loss": 0.0466, "lr": 4.125261873679479e-05, "epoch": 1.7928868586344726, "percentage": 27.47, "elapsed_time": "2:29:47", "remaining_time": "6:35:23", "throughput": 2356.25, "total_tokens": 21176416} {"current_steps": 10995, "total_steps": 40000, "loss": 0.1305, "lr": 4.124515772690042e-05, "epoch": 1.79370258585529, "percentage": 27.49, "elapsed_time": "2:29:49", "remaining_time": "6:35:14", "throughput": 2356.74, "total_tokens": 21185680} {"current_steps": 11000, "total_steps": 40000, "loss": 0.2285, "lr": 4.123769421179858e-05, "epoch": 1.7945183130761073, "percentage": 27.5, "elapsed_time": "2:29:51", "remaining_time": "6:35:04", "throughput": 2357.24, "total_tokens": 21195024} {"current_steps": 11000, "total_steps": 40000, "eval_loss": 0.14586400985717773, "epoch": 1.7945183130761073, "percentage": 27.5, "elapsed_time": "2:31:12", "remaining_time": "6:38:37", "throughput": 2336.3, "total_tokens": 21195024} {"current_steps": 11005, "total_steps": 40000, "loss": 0.1051, "lr": 4.1230228192640236e-05, "epoch": 1.7953340402969247, "percentage": 27.51, "elapsed_time": "2:31:15", "remaining_time": "6:38:31", "throughput": 2336.29, "total_tokens": 21203376} {"current_steps": 11010, "total_steps": 40000, "loss": 0.1287, "lr": 4.122275967057675e-05, "epoch": 1.796149767517742, "percentage": 27.52, "elapsed_time": "2:31:17", "remaining_time": "6:38:22", "throughput": 2336.78, "total_tokens": 21212672} {"current_steps": 11015, "total_steps": 40000, "loss": 0.1066, "lr": 4.1215288646759846e-05, "epoch": 1.7969654947385594, "percentage": 27.54, "elapsed_time": "2:31:19", "remaining_time": "6:38:12", "throughput": 2337.34, "total_tokens": 21222656} {"current_steps": 11020, "total_steps": 40000, "loss": 0.0961, "lr": 4.120781512234166e-05, "epoch": 1.7977812219593767, "percentage": 27.55, "elapsed_time": "2:31:21", "remaining_time": "6:38:03", "throughput": 2337.78, "total_tokens": 21231456} {"current_steps": 11025, "total_steps": 40000, "loss": 0.1074, "lr": 4.120033909847471e-05, "epoch": 1.7985969491801943, "percentage": 27.56, "elapsed_time": "2:31:23", "remaining_time": "6:37:53", "throughput": 2338.36, "total_tokens": 21241568} {"current_steps": 11030, "total_steps": 40000, "loss": 0.1007, "lr": 4.119286057631187e-05, "epoch": 1.7994126764010114, "percentage": 27.57, "elapsed_time": "2:31:26", "remaining_time": "6:37:44", "throughput": 2338.88, "total_tokens": 21251136} {"current_steps": 11035, "total_steps": 40000, "loss": 0.0291, "lr": 4.118537955700646e-05, "epoch": 1.800228403621829, "percentage": 27.59, "elapsed_time": "2:31:28", "remaining_time": "6:37:34", "throughput": 2339.58, "total_tokens": 21262320} {"current_steps": 11040, "total_steps": 40000, "loss": 0.0285, "lr": 4.11778960417121e-05, "epoch": 1.8010441308426461, "percentage": 27.6, "elapsed_time": "2:31:30", "remaining_time": "6:37:25", "throughput": 2340.14, "total_tokens": 21272256} {"current_steps": 11045, "total_steps": 40000, "loss": 0.0194, "lr": 4.117041003158288e-05, "epoch": 1.8018598580634637, "percentage": 27.61, "elapsed_time": "2:31:32", "remaining_time": "6:37:15", "throughput": 2340.63, "total_tokens": 21281600} {"current_steps": 11050, "total_steps": 40000, "loss": 0.0414, "lr": 4.1162921527773215e-05, "epoch": 1.8026755852842808, "percentage": 27.62, "elapsed_time": "2:31:34", "remaining_time": "6:37:06", "throughput": 2341.15, "total_tokens": 21291072} {"current_steps": 11055, "total_steps": 40000, "loss": 0.1555, "lr": 4.115543053143794e-05, "epoch": 1.8034913125050984, "percentage": 27.64, "elapsed_time": "2:31:36", "remaining_time": "6:36:56", "throughput": 2341.66, "total_tokens": 21300624} {"current_steps": 11060, "total_steps": 40000, "loss": 0.0485, "lr": 4.114793704373226e-05, "epoch": 1.8043070397259156, "percentage": 27.65, "elapsed_time": "2:31:38", "remaining_time": "6:36:47", "throughput": 2342.23, "total_tokens": 21310656} {"current_steps": 11065, "total_steps": 40000, "loss": 0.1177, "lr": 4.114044106581175e-05, "epoch": 1.8051227669467331, "percentage": 27.66, "elapsed_time": "2:31:40", "remaining_time": "6:36:37", "throughput": 2342.64, "total_tokens": 21319200} {"current_steps": 11070, "total_steps": 40000, "loss": 0.0286, "lr": 4.11329425988324e-05, "epoch": 1.8059384941675503, "percentage": 27.68, "elapsed_time": "2:31:42", "remaining_time": "6:36:28", "throughput": 2343.14, "total_tokens": 21328576} {"current_steps": 11075, "total_steps": 40000, "loss": 0.2685, "lr": 4.112544164395056e-05, "epoch": 1.8067542213883678, "percentage": 27.69, "elapsed_time": "2:31:44", "remaining_time": "6:36:18", "throughput": 2343.67, "total_tokens": 21338176} {"current_steps": 11080, "total_steps": 40000, "loss": 0.0934, "lr": 4.111793820232297e-05, "epoch": 1.807569948609185, "percentage": 27.7, "elapsed_time": "2:31:46", "remaining_time": "6:36:09", "throughput": 2344.2, "total_tokens": 21347936} {"current_steps": 11085, "total_steps": 40000, "loss": 0.1413, "lr": 4.1110432275106767e-05, "epoch": 1.8083856758300025, "percentage": 27.71, "elapsed_time": "2:31:48", "remaining_time": "6:35:59", "throughput": 2344.63, "total_tokens": 21356672} {"current_steps": 11090, "total_steps": 40000, "loss": 0.0822, "lr": 4.110292386345944e-05, "epoch": 1.8092014030508197, "percentage": 27.73, "elapsed_time": "2:31:50", "remaining_time": "6:35:50", "throughput": 2345.16, "total_tokens": 21366288} {"current_steps": 11095, "total_steps": 40000, "loss": 0.2335, "lr": 4.109541296853891e-05, "epoch": 1.8100171302716372, "percentage": 27.74, "elapsed_time": "2:31:52", "remaining_time": "6:35:41", "throughput": 2345.58, "total_tokens": 21375024} {"current_steps": 11100, "total_steps": 40000, "loss": 0.0993, "lr": 4.108789959150341e-05, "epoch": 1.8108328574924544, "percentage": 27.75, "elapsed_time": "2:31:54", "remaining_time": "6:35:31", "throughput": 2346.16, "total_tokens": 21385136} {"current_steps": 11105, "total_steps": 40000, "loss": 0.1442, "lr": 4.108038373351163e-05, "epoch": 1.811648584713272, "percentage": 27.76, "elapsed_time": "2:31:57", "remaining_time": "6:35:22", "throughput": 2346.5, "total_tokens": 21393024} {"current_steps": 11110, "total_steps": 40000, "loss": 0.0902, "lr": 4.10728653957226e-05, "epoch": 1.812464311934089, "percentage": 27.77, "elapsed_time": "2:31:59", "remaining_time": "6:35:12", "throughput": 2346.9, "total_tokens": 21401520} {"current_steps": 11115, "total_steps": 40000, "loss": 0.088, "lr": 4.106534457929575e-05, "epoch": 1.8132800391549067, "percentage": 27.79, "elapsed_time": "2:32:01", "remaining_time": "6:35:03", "throughput": 2347.35, "total_tokens": 21410544} {"current_steps": 11120, "total_steps": 40000, "loss": 0.095, "lr": 4.105782128539086e-05, "epoch": 1.814095766375724, "percentage": 27.8, "elapsed_time": "2:32:03", "remaining_time": "6:34:54", "throughput": 2347.71, "total_tokens": 21418656} {"current_steps": 11125, "total_steps": 40000, "loss": 0.0507, "lr": 4.1050295515168144e-05, "epoch": 1.8149114935965414, "percentage": 27.81, "elapsed_time": "2:32:05", "remaining_time": "6:34:44", "throughput": 2348.16, "total_tokens": 21427584} {"current_steps": 11130, "total_steps": 40000, "loss": 0.1394, "lr": 4.1042767269788155e-05, "epoch": 1.8157272208173587, "percentage": 27.82, "elapsed_time": "2:32:07", "remaining_time": "6:34:35", "throughput": 2348.59, "total_tokens": 21436416} {"current_steps": 11135, "total_steps": 40000, "loss": 0.1322, "lr": 4.103523655041185e-05, "epoch": 1.816542948038176, "percentage": 27.84, "elapsed_time": "2:32:09", "remaining_time": "6:34:25", "throughput": 2348.99, "total_tokens": 21444896} {"current_steps": 11140, "total_steps": 40000, "loss": 0.0651, "lr": 4.102770335820055e-05, "epoch": 1.8173586752589934, "percentage": 27.85, "elapsed_time": "2:32:11", "remaining_time": "6:34:16", "throughput": 2349.43, "total_tokens": 21453712} {"current_steps": 11145, "total_steps": 40000, "loss": 0.0251, "lr": 4.1020167694315984e-05, "epoch": 1.8181744024798108, "percentage": 27.86, "elapsed_time": "2:32:13", "remaining_time": "6:34:07", "throughput": 2349.88, "total_tokens": 21462704} {"current_steps": 11150, "total_steps": 40000, "loss": 0.0842, "lr": 4.101262955992023e-05, "epoch": 1.8189901297006281, "percentage": 27.88, "elapsed_time": "2:32:15", "remaining_time": "6:33:57", "throughput": 2350.26, "total_tokens": 21471024} {"current_steps": 11155, "total_steps": 40000, "loss": 0.0555, "lr": 4.100508895617578e-05, "epoch": 1.8198058569214455, "percentage": 27.89, "elapsed_time": "2:32:17", "remaining_time": "6:33:48", "throughput": 2350.75, "total_tokens": 21480352} {"current_steps": 11160, "total_steps": 40000, "loss": 0.155, "lr": 4.099754588424547e-05, "epoch": 1.8206215841422628, "percentage": 27.9, "elapsed_time": "2:32:19", "remaining_time": "6:33:39", "throughput": 2351.31, "total_tokens": 21490352} {"current_steps": 11165, "total_steps": 40000, "loss": 0.14, "lr": 4.0990000345292546e-05, "epoch": 1.8214373113630802, "percentage": 27.91, "elapsed_time": "2:32:21", "remaining_time": "6:33:29", "throughput": 2351.97, "total_tokens": 21501216} {"current_steps": 11170, "total_steps": 40000, "loss": 0.3381, "lr": 4.098245234048064e-05, "epoch": 1.8222530385838975, "percentage": 27.93, "elapsed_time": "2:32:23", "remaining_time": "6:33:20", "throughput": 2352.48, "total_tokens": 21510704} {"current_steps": 11175, "total_steps": 40000, "loss": 0.2451, "lr": 4.0974901870973726e-05, "epoch": 1.8230687658047149, "percentage": 27.94, "elapsed_time": "2:32:25", "remaining_time": "6:33:11", "throughput": 2353.04, "total_tokens": 21520704} {"current_steps": 11180, "total_steps": 40000, "loss": 0.3588, "lr": 4.096734893793619e-05, "epoch": 1.8238844930255322, "percentage": 27.95, "elapsed_time": "2:32:27", "remaining_time": "6:33:01", "throughput": 2353.53, "total_tokens": 21530016} {"current_steps": 11185, "total_steps": 40000, "loss": 0.128, "lr": 4.095979354253279e-05, "epoch": 1.8247002202463496, "percentage": 27.96, "elapsed_time": "2:32:30", "remaining_time": "6:32:52", "throughput": 2354.09, "total_tokens": 21540032} {"current_steps": 11190, "total_steps": 40000, "loss": 0.0568, "lr": 4.0952235685928656e-05, "epoch": 1.825515947467167, "percentage": 27.98, "elapsed_time": "2:32:32", "remaining_time": "6:32:43", "throughput": 2354.67, "total_tokens": 21550208} {"current_steps": 11195, "total_steps": 40000, "loss": 0.1102, "lr": 4.094467536928932e-05, "epoch": 1.8263316746879843, "percentage": 27.99, "elapsed_time": "2:32:34", "remaining_time": "6:32:33", "throughput": 2355.28, "total_tokens": 21560688} {"current_steps": 11200, "total_steps": 40000, "loss": 0.1986, "lr": 4.093711259378067e-05, "epoch": 1.8271474019088016, "percentage": 28.0, "elapsed_time": "2:32:36", "remaining_time": "6:32:24", "throughput": 2355.81, "total_tokens": 21570368} {"current_steps": 11200, "total_steps": 40000, "eval_loss": 0.130798801779747, "epoch": 1.8271474019088016, "percentage": 28.0, "elapsed_time": "2:33:56", "remaining_time": "6:35:51", "throughput": 2335.29, "total_tokens": 21570368} {"current_steps": 11205, "total_steps": 40000, "loss": 0.0715, "lr": 4.092954736056897e-05, "epoch": 1.827963129129619, "percentage": 28.01, "elapsed_time": "2:34:00", "remaining_time": "6:35:46", "throughput": 2335.43, "total_tokens": 21580208} {"current_steps": 11210, "total_steps": 40000, "loss": 0.2056, "lr": 4.09219796708209e-05, "epoch": 1.8287788563504366, "percentage": 28.02, "elapsed_time": "2:34:02", "remaining_time": "6:35:36", "throughput": 2335.96, "total_tokens": 21589920} {"current_steps": 11215, "total_steps": 40000, "loss": 0.0765, "lr": 4.0914409525703464e-05, "epoch": 1.8295945835712537, "percentage": 28.04, "elapsed_time": "2:34:04", "remaining_time": "6:35:27", "throughput": 2336.34, "total_tokens": 21598256} {"current_steps": 11220, "total_steps": 40000, "loss": 0.0475, "lr": 4.090683692638408e-05, "epoch": 1.8304103107920713, "percentage": 28.05, "elapsed_time": "2:34:06", "remaining_time": "6:35:18", "throughput": 2336.8, "total_tokens": 21607392} {"current_steps": 11225, "total_steps": 40000, "loss": 0.0989, "lr": 4.089926187403056e-05, "epoch": 1.8312260380128884, "percentage": 28.06, "elapsed_time": "2:34:08", "remaining_time": "6:35:08", "throughput": 2337.33, "total_tokens": 21617104} {"current_steps": 11230, "total_steps": 40000, "loss": 0.0637, "lr": 4.0891684369811044e-05, "epoch": 1.832041765233706, "percentage": 28.07, "elapsed_time": "2:34:10", "remaining_time": "6:34:59", "throughput": 2337.75, "total_tokens": 21625824} {"current_steps": 11235, "total_steps": 40000, "loss": 0.134, "lr": 4.0884104414894107e-05, "epoch": 1.8328574924545231, "percentage": 28.09, "elapsed_time": "2:34:12", "remaining_time": "6:34:49", "throughput": 2338.35, "total_tokens": 21636224} {"current_steps": 11240, "total_steps": 40000, "loss": 0.0927, "lr": 4.087652201044864e-05, "epoch": 1.8336732196753407, "percentage": 28.1, "elapsed_time": "2:34:14", "remaining_time": "6:34:40", "throughput": 2338.93, "total_tokens": 21646432} {"current_steps": 11245, "total_steps": 40000, "loss": 0.056, "lr": 4.086893715764397e-05, "epoch": 1.8344889468961578, "percentage": 28.11, "elapsed_time": "2:34:16", "remaining_time": "6:34:31", "throughput": 2339.44, "total_tokens": 21656048} {"current_steps": 11250, "total_steps": 40000, "loss": 0.0736, "lr": 4.086134985764977e-05, "epoch": 1.8353046741169754, "percentage": 28.12, "elapsed_time": "2:34:18", "remaining_time": "6:34:21", "throughput": 2339.99, "total_tokens": 21665872} {"current_steps": 11255, "total_steps": 40000, "loss": 0.0925, "lr": 4.0853760111636085e-05, "epoch": 1.8361204013377925, "percentage": 28.14, "elapsed_time": "2:34:21", "remaining_time": "6:34:12", "throughput": 2340.49, "total_tokens": 21675424} {"current_steps": 11260, "total_steps": 40000, "loss": 0.1874, "lr": 4.084616792077337e-05, "epoch": 1.83693612855861, "percentage": 28.15, "elapsed_time": "2:34:23", "remaining_time": "6:34:03", "throughput": 2341.03, "total_tokens": 21685264} {"current_steps": 11265, "total_steps": 40000, "loss": 0.0847, "lr": 4.083857328623243e-05, "epoch": 1.8377518557794272, "percentage": 28.16, "elapsed_time": "2:34:25", "remaining_time": "6:33:53", "throughput": 2341.49, "total_tokens": 21694384} {"current_steps": 11270, "total_steps": 40000, "loss": 0.1277, "lr": 4.083097620918444e-05, "epoch": 1.8385675830002448, "percentage": 28.18, "elapsed_time": "2:34:27", "remaining_time": "6:33:44", "throughput": 2342.03, "total_tokens": 21704224} {"current_steps": 11275, "total_steps": 40000, "loss": 0.1118, "lr": 4.082337669080097e-05, "epoch": 1.839383310221062, "percentage": 28.19, "elapsed_time": "2:34:29", "remaining_time": "6:33:35", "throughput": 2342.51, "total_tokens": 21713440} {"current_steps": 11280, "total_steps": 40000, "loss": 0.0841, "lr": 4.081577473225398e-05, "epoch": 1.8401990374418795, "percentage": 28.2, "elapsed_time": "2:34:31", "remaining_time": "6:33:25", "throughput": 2343.12, "total_tokens": 21723952} {"current_steps": 11285, "total_steps": 40000, "loss": 0.1091, "lr": 4.080817033471577e-05, "epoch": 1.8410147646626966, "percentage": 28.21, "elapsed_time": "2:34:33", "remaining_time": "6:33:16", "throughput": 2343.47, "total_tokens": 21732128} {"current_steps": 11290, "total_steps": 40000, "loss": 0.1538, "lr": 4.080056349935903e-05, "epoch": 1.8418304918835142, "percentage": 28.23, "elapsed_time": "2:34:35", "remaining_time": "6:33:07", "throughput": 2344.0, "total_tokens": 21741808} {"current_steps": 11295, "total_steps": 40000, "loss": 0.0616, "lr": 4.079295422735684e-05, "epoch": 1.8426462191043314, "percentage": 28.24, "elapsed_time": "2:34:37", "remaining_time": "6:32:57", "throughput": 2344.41, "total_tokens": 21750448} {"current_steps": 11300, "total_steps": 40000, "loss": 0.1477, "lr": 4.078534251988264e-05, "epoch": 1.843461946325149, "percentage": 28.25, "elapsed_time": "2:34:39", "remaining_time": "6:32:48", "throughput": 2344.92, "total_tokens": 21760048} {"current_steps": 11305, "total_steps": 40000, "loss": 0.0349, "lr": 4.077772837811025e-05, "epoch": 1.8442776735459663, "percentage": 28.26, "elapsed_time": "2:34:41", "remaining_time": "6:32:39", "throughput": 2345.32, "total_tokens": 21768592} {"current_steps": 11310, "total_steps": 40000, "loss": 0.203, "lr": 4.0770111803213874e-05, "epoch": 1.8450934007667836, "percentage": 28.27, "elapsed_time": "2:34:43", "remaining_time": "6:32:30", "throughput": 2345.95, "total_tokens": 21779328} {"current_steps": 11315, "total_steps": 40000, "loss": 0.2463, "lr": 4.076249279636807e-05, "epoch": 1.845909127987601, "percentage": 28.29, "elapsed_time": "2:34:45", "remaining_time": "6:32:20", "throughput": 2346.41, "total_tokens": 21788464} {"current_steps": 11320, "total_steps": 40000, "loss": 0.1112, "lr": 4.075487135874781e-05, "epoch": 1.8467248552084183, "percentage": 28.3, "elapsed_time": "2:34:47", "remaining_time": "6:32:11", "throughput": 2346.96, "total_tokens": 21798416} {"current_steps": 11325, "total_steps": 40000, "loss": 0.0442, "lr": 4.074724749152837e-05, "epoch": 1.8475405824292357, "percentage": 28.31, "elapsed_time": "2:34:49", "remaining_time": "6:32:02", "throughput": 2347.5, "total_tokens": 21808288} {"current_steps": 11330, "total_steps": 40000, "loss": 0.1133, "lr": 4.07396211958855e-05, "epoch": 1.848356309650053, "percentage": 28.32, "elapsed_time": "2:34:52", "remaining_time": "6:31:53", "throughput": 2348.03, "total_tokens": 21818080} {"current_steps": 11335, "total_steps": 40000, "loss": 0.1918, "lr": 4.073199247299523e-05, "epoch": 1.8491720368708704, "percentage": 28.34, "elapsed_time": "2:34:54", "remaining_time": "6:31:43", "throughput": 2348.46, "total_tokens": 21826880} {"current_steps": 11340, "total_steps": 40000, "loss": 0.1034, "lr": 4.072436132403403e-05, "epoch": 1.8499877640916877, "percentage": 28.35, "elapsed_time": "2:34:56", "remaining_time": "6:31:34", "throughput": 2348.99, "total_tokens": 21836688} {"current_steps": 11345, "total_steps": 40000, "loss": 0.089, "lr": 4.0716727750178704e-05, "epoch": 1.850803491312505, "percentage": 28.36, "elapsed_time": "2:34:58", "remaining_time": "6:31:25", "throughput": 2349.44, "total_tokens": 21845696} {"current_steps": 11350, "total_steps": 40000, "loss": 0.2052, "lr": 4.0709091752606455e-05, "epoch": 1.8516192185333225, "percentage": 28.38, "elapsed_time": "2:35:00", "remaining_time": "6:31:16", "throughput": 2349.99, "total_tokens": 21855648} {"current_steps": 11355, "total_steps": 40000, "loss": 0.0926, "lr": 4.070145333249484e-05, "epoch": 1.8524349457541398, "percentage": 28.39, "elapsed_time": "2:35:02", "remaining_time": "6:31:06", "throughput": 2350.55, "total_tokens": 21865744} {"current_steps": 11360, "total_steps": 40000, "loss": 0.072, "lr": 4.069381249102181e-05, "epoch": 1.8532506729749572, "percentage": 28.4, "elapsed_time": "2:35:04", "remaining_time": "6:30:57", "throughput": 2351.14, "total_tokens": 21876096} {"current_steps": 11365, "total_steps": 40000, "loss": 0.0346, "lr": 4.0686169229365665e-05, "epoch": 1.8540664001957745, "percentage": 28.41, "elapsed_time": "2:35:06", "remaining_time": "6:30:48", "throughput": 2351.7, "total_tokens": 21886144} {"current_steps": 11370, "total_steps": 40000, "loss": 0.2254, "lr": 4.067852354870511e-05, "epoch": 1.8548821274165919, "percentage": 28.43, "elapsed_time": "2:35:08", "remaining_time": "6:30:39", "throughput": 2352.17, "total_tokens": 21895344} {"current_steps": 11375, "total_steps": 40000, "loss": 0.1279, "lr": 4.067087545021919e-05, "epoch": 1.8556978546374092, "percentage": 28.44, "elapsed_time": "2:35:10", "remaining_time": "6:30:30", "throughput": 2352.6, "total_tokens": 21904192} {"current_steps": 11380, "total_steps": 40000, "loss": 0.1695, "lr": 4.066322493508734e-05, "epoch": 1.8565135818582266, "percentage": 28.45, "elapsed_time": "2:35:12", "remaining_time": "6:30:20", "throughput": 2353.14, "total_tokens": 21914080} {"current_steps": 11385, "total_steps": 40000, "loss": 0.1299, "lr": 4.065557200448937e-05, "epoch": 1.857329309079044, "percentage": 28.46, "elapsed_time": "2:35:14", "remaining_time": "6:30:11", "throughput": 2353.48, "total_tokens": 21922128} {"current_steps": 11390, "total_steps": 40000, "loss": 0.2197, "lr": 4.064791665960546e-05, "epoch": 1.8581450362998613, "percentage": 28.48, "elapsed_time": "2:35:16", "remaining_time": "6:30:02", "throughput": 2354.04, "total_tokens": 21932208} {"current_steps": 11395, "total_steps": 40000, "loss": 0.0516, "lr": 4.064025890161615e-05, "epoch": 1.8589607635206788, "percentage": 28.49, "elapsed_time": "2:35:18", "remaining_time": "6:29:53", "throughput": 2354.46, "total_tokens": 21940960} {"current_steps": 11400, "total_steps": 40000, "loss": 0.172, "lr": 4.0632598731702373e-05, "epoch": 1.859776490741496, "percentage": 28.5, "elapsed_time": "2:35:20", "remaining_time": "6:29:44", "throughput": 2355.0, "total_tokens": 21950896} {"current_steps": 11400, "total_steps": 40000, "eval_loss": 0.12488897144794464, "epoch": 1.859776490741496, "percentage": 28.5, "elapsed_time": "2:36:41", "remaining_time": "6:33:06", "throughput": 2334.82, "total_tokens": 21950896} {"current_steps": 11405, "total_steps": 40000, "loss": 0.163, "lr": 4.0624936151045426e-05, "epoch": 1.8605922179623136, "percentage": 28.51, "elapsed_time": "2:36:45", "remaining_time": "6:33:01", "throughput": 2334.9, "total_tokens": 21960224} {"current_steps": 11410, "total_steps": 40000, "loss": 0.1985, "lr": 4.061727116082696e-05, "epoch": 1.8614079451831307, "percentage": 28.52, "elapsed_time": "2:36:47", "remaining_time": "6:32:51", "throughput": 2335.38, "total_tokens": 21969552} {"current_steps": 11415, "total_steps": 40000, "loss": 0.0814, "lr": 4.060960376222903e-05, "epoch": 1.8622236724039483, "percentage": 28.54, "elapsed_time": "2:36:49", "remaining_time": "6:32:42", "throughput": 2335.73, "total_tokens": 21977712} {"current_steps": 11420, "total_steps": 40000, "loss": 0.1588, "lr": 4.0601933956434034e-05, "epoch": 1.8630393996247654, "percentage": 28.55, "elapsed_time": "2:36:51", "remaining_time": "6:32:33", "throughput": 2336.06, "total_tokens": 21985648} {"current_steps": 11425, "total_steps": 40000, "loss": 0.0636, "lr": 4.059426174462476e-05, "epoch": 1.863855126845583, "percentage": 28.56, "elapsed_time": "2:36:53", "remaining_time": "6:32:23", "throughput": 2336.62, "total_tokens": 21995696} {"current_steps": 11430, "total_steps": 40000, "loss": 0.103, "lr": 4.058658712798435e-05, "epoch": 1.8646708540664, "percentage": 28.57, "elapsed_time": "2:36:55", "remaining_time": "6:32:14", "throughput": 2337.22, "total_tokens": 22006240} {"current_steps": 11435, "total_steps": 40000, "loss": 0.0726, "lr": 4.0578910107696336e-05, "epoch": 1.8654865812872177, "percentage": 28.59, "elapsed_time": "2:36:57", "remaining_time": "6:32:05", "throughput": 2337.7, "total_tokens": 22015520} {"current_steps": 11440, "total_steps": 40000, "loss": 0.0477, "lr": 4.05712306849446e-05, "epoch": 1.8663023085080348, "percentage": 28.6, "elapsed_time": "2:36:59", "remaining_time": "6:31:56", "throughput": 2338.14, "total_tokens": 22024528} {"current_steps": 11445, "total_steps": 40000, "loss": 0.244, "lr": 4.0563548860913415e-05, "epoch": 1.8671180357288524, "percentage": 28.61, "elapsed_time": "2:37:01", "remaining_time": "6:31:47", "throughput": 2338.54, "total_tokens": 22033696} {"current_steps": 11450, "total_steps": 40000, "loss": 0.0617, "lr": 4.0555864636787414e-05, "epoch": 1.8679337629496695, "percentage": 28.62, "elapsed_time": "2:37:04", "remaining_time": "6:31:38", "throughput": 2339.08, "total_tokens": 22043600} {"current_steps": 11455, "total_steps": 40000, "loss": 0.0956, "lr": 4.054817801375159e-05, "epoch": 1.868749490170487, "percentage": 28.64, "elapsed_time": "2:37:06", "remaining_time": "6:31:29", "throughput": 2339.57, "total_tokens": 22053072} {"current_steps": 11460, "total_steps": 40000, "loss": 0.1962, "lr": 4.054048899299134e-05, "epoch": 1.8695652173913042, "percentage": 28.65, "elapsed_time": "2:37:08", "remaining_time": "6:31:19", "throughput": 2340.02, "total_tokens": 22062160} {"current_steps": 11465, "total_steps": 40000, "loss": 0.1343, "lr": 4.0532797575692385e-05, "epoch": 1.8703809446121218, "percentage": 28.66, "elapsed_time": "2:37:10", "remaining_time": "6:31:10", "throughput": 2340.55, "total_tokens": 22072000} {"current_steps": 11470, "total_steps": 40000, "loss": 0.1983, "lr": 4.052510376304085e-05, "epoch": 1.871196671832939, "percentage": 28.68, "elapsed_time": "2:37:12", "remaining_time": "6:31:01", "throughput": 2341.05, "total_tokens": 22081488} {"current_steps": 11475, "total_steps": 40000, "loss": 0.085, "lr": 4.051740755622321e-05, "epoch": 1.8720123990537565, "percentage": 28.69, "elapsed_time": "2:37:14", "remaining_time": "6:30:52", "throughput": 2341.5, "total_tokens": 22090576} {"current_steps": 11480, "total_steps": 40000, "loss": 0.1897, "lr": 4.050970895642632e-05, "epoch": 1.8728281262745736, "percentage": 28.7, "elapsed_time": "2:37:16", "remaining_time": "6:30:43", "throughput": 2341.83, "total_tokens": 22098624} {"current_steps": 11485, "total_steps": 40000, "loss": 0.0211, "lr": 4.050200796483741e-05, "epoch": 1.8736438534953912, "percentage": 28.71, "elapsed_time": "2:37:18", "remaining_time": "6:30:33", "throughput": 2342.29, "total_tokens": 22107776} {"current_steps": 11490, "total_steps": 40000, "loss": 0.125, "lr": 4.049430458264405e-05, "epoch": 1.8744595807162086, "percentage": 28.73, "elapsed_time": "2:37:20", "remaining_time": "6:30:24", "throughput": 2342.67, "total_tokens": 22116208} {"current_steps": 11495, "total_steps": 40000, "loss": 0.0643, "lr": 4.048659881103422e-05, "epoch": 1.875275307937026, "percentage": 28.74, "elapsed_time": "2:37:22", "remaining_time": "6:30:15", "throughput": 2343.3, "total_tokens": 22126960} {"current_steps": 11500, "total_steps": 40000, "loss": 0.167, "lr": 4.0478890651196235e-05, "epoch": 1.8760910351578433, "percentage": 28.75, "elapsed_time": "2:37:24", "remaining_time": "6:30:06", "throughput": 2343.93, "total_tokens": 22137792} {"current_steps": 11505, "total_steps": 40000, "loss": 0.1081, "lr": 4.047118010431879e-05, "epoch": 1.8769067623786606, "percentage": 28.76, "elapsed_time": "2:37:26", "remaining_time": "6:29:57", "throughput": 2344.46, "total_tokens": 22147648} {"current_steps": 11510, "total_steps": 40000, "loss": 0.0918, "lr": 4.046346717159094e-05, "epoch": 1.877722489599478, "percentage": 28.78, "elapsed_time": "2:37:28", "remaining_time": "6:29:48", "throughput": 2345.02, "total_tokens": 22157792} {"current_steps": 11515, "total_steps": 40000, "loss": 0.0152, "lr": 4.045575185420214e-05, "epoch": 1.8785382168202953, "percentage": 28.79, "elapsed_time": "2:37:30", "remaining_time": "6:29:39", "throughput": 2345.53, "total_tokens": 22167424} {"current_steps": 11520, "total_steps": 40000, "loss": 0.0752, "lr": 4.0448034153342165e-05, "epoch": 1.8793539440411127, "percentage": 28.8, "elapsed_time": "2:37:32", "remaining_time": "6:29:29", "throughput": 2345.96, "total_tokens": 22176368} {"current_steps": 11525, "total_steps": 40000, "loss": 0.2685, "lr": 4.0440314070201194e-05, "epoch": 1.88016967126193, "percentage": 28.81, "elapsed_time": "2:37:35", "remaining_time": "6:29:20", "throughput": 2346.44, "total_tokens": 22185744} {"current_steps": 11530, "total_steps": 40000, "loss": 0.0701, "lr": 4.043259160596976e-05, "epoch": 1.8809853984827474, "percentage": 28.82, "elapsed_time": "2:37:37", "remaining_time": "6:29:11", "throughput": 2346.87, "total_tokens": 22194704} {"current_steps": 11535, "total_steps": 40000, "loss": 0.18, "lr": 4.0424866761838767e-05, "epoch": 1.8818011257035647, "percentage": 28.84, "elapsed_time": "2:37:39", "remaining_time": "6:29:02", "throughput": 2347.54, "total_tokens": 22205808} {"current_steps": 11540, "total_steps": 40000, "loss": 0.1358, "lr": 4.041713953899948e-05, "epoch": 1.882616852924382, "percentage": 28.85, "elapsed_time": "2:37:41", "remaining_time": "6:28:53", "throughput": 2347.98, "total_tokens": 22214880} {"current_steps": 11545, "total_steps": 40000, "loss": 0.1095, "lr": 4.0409409938643515e-05, "epoch": 1.8834325801451994, "percentage": 28.86, "elapsed_time": "2:37:43", "remaining_time": "6:28:44", "throughput": 2348.33, "total_tokens": 22222976} {"current_steps": 11550, "total_steps": 40000, "loss": 0.0309, "lr": 4.0401677961962904e-05, "epoch": 1.8842483073660168, "percentage": 28.88, "elapsed_time": "2:37:45", "remaining_time": "6:28:35", "throughput": 2348.77, "total_tokens": 22232096} {"current_steps": 11555, "total_steps": 40000, "loss": 0.0822, "lr": 4.039394361015001e-05, "epoch": 1.8850640345868341, "percentage": 28.89, "elapsed_time": "2:37:47", "remaining_time": "6:28:26", "throughput": 2349.43, "total_tokens": 22243168} {"current_steps": 11560, "total_steps": 40000, "loss": 0.047, "lr": 4.038620688439755e-05, "epoch": 1.8858797618076515, "percentage": 28.9, "elapsed_time": "2:37:49", "remaining_time": "6:28:17", "throughput": 2349.82, "total_tokens": 22251648} {"current_steps": 11565, "total_steps": 40000, "loss": 0.0674, "lr": 4.037846778589862e-05, "epoch": 1.8866954890284688, "percentage": 28.91, "elapsed_time": "2:37:51", "remaining_time": "6:28:07", "throughput": 2350.42, "total_tokens": 22262208} {"current_steps": 11570, "total_steps": 40000, "loss": 0.3466, "lr": 4.0370726315846715e-05, "epoch": 1.8875112162492862, "percentage": 28.93, "elapsed_time": "2:37:53", "remaining_time": "6:27:58", "throughput": 2351.07, "total_tokens": 22273248} {"current_steps": 11575, "total_steps": 40000, "loss": 0.0865, "lr": 4.036298247543565e-05, "epoch": 1.8883269434701035, "percentage": 28.94, "elapsed_time": "2:37:55", "remaining_time": "6:27:49", "throughput": 2351.63, "total_tokens": 22283392} {"current_steps": 11580, "total_steps": 40000, "loss": 0.2373, "lr": 4.035523626585962e-05, "epoch": 1.8891426706909211, "percentage": 28.95, "elapsed_time": "2:37:57", "remaining_time": "6:27:40", "throughput": 2352.25, "total_tokens": 22294096} {"current_steps": 11585, "total_steps": 40000, "loss": 0.1683, "lr": 4.0347487688313194e-05, "epoch": 1.8899583979117383, "percentage": 28.96, "elapsed_time": "2:37:59", "remaining_time": "6:27:31", "throughput": 2352.72, "total_tokens": 22303424} {"current_steps": 11590, "total_steps": 40000, "loss": 0.0386, "lr": 4.0339736743991296e-05, "epoch": 1.8907741251325558, "percentage": 28.98, "elapsed_time": "2:38:01", "remaining_time": "6:27:22", "throughput": 2353.26, "total_tokens": 22313456} {"current_steps": 11595, "total_steps": 40000, "loss": 0.1035, "lr": 4.0331983434089227e-05, "epoch": 1.891589852353373, "percentage": 28.99, "elapsed_time": "2:38:03", "remaining_time": "6:27:13", "throughput": 2353.73, "total_tokens": 22322784} {"current_steps": 11600, "total_steps": 40000, "loss": 0.1569, "lr": 4.032422775980264e-05, "epoch": 1.8924055795741905, "percentage": 29.0, "elapsed_time": "2:38:06", "remaining_time": "6:27:04", "throughput": 2354.34, "total_tokens": 22333376} {"current_steps": 11600, "total_steps": 40000, "eval_loss": 0.13688918948173523, "epoch": 1.8924055795741905, "percentage": 29.0, "elapsed_time": "2:39:26", "remaining_time": "6:30:21", "throughput": 2334.5, "total_tokens": 22333376} {"current_steps": 11605, "total_steps": 40000, "loss": 0.1454, "lr": 4.031646972232754e-05, "epoch": 1.8932213067950077, "percentage": 29.01, "elapsed_time": "2:39:30", "remaining_time": "6:30:16", "throughput": 2334.56, "total_tokens": 22342944} {"current_steps": 11610, "total_steps": 40000, "loss": 0.1027, "lr": 4.0308709322860344e-05, "epoch": 1.8940370340158252, "percentage": 29.03, "elapsed_time": "2:39:32", "remaining_time": "6:30:07", "throughput": 2335.02, "total_tokens": 22352128} {"current_steps": 11615, "total_steps": 40000, "loss": 0.1561, "lr": 4.0300946562597784e-05, "epoch": 1.8948527612366424, "percentage": 29.04, "elapsed_time": "2:39:34", "remaining_time": "6:29:58", "throughput": 2335.62, "total_tokens": 22362688} {"current_steps": 11620, "total_steps": 40000, "loss": 0.1281, "lr": 4.029318144273698e-05, "epoch": 1.89566848845746, "percentage": 29.05, "elapsed_time": "2:39:36", "remaining_time": "6:29:49", "throughput": 2335.93, "total_tokens": 22370528} {"current_steps": 11625, "total_steps": 40000, "loss": 0.1501, "lr": 4.0285413964475415e-05, "epoch": 1.896484215678277, "percentage": 29.06, "elapsed_time": "2:39:38", "remaining_time": "6:29:40", "throughput": 2336.5, "total_tokens": 22380784} {"current_steps": 11630, "total_steps": 40000, "loss": 0.1648, "lr": 4.0277644129010927e-05, "epoch": 1.8972999428990946, "percentage": 29.07, "elapsed_time": "2:39:40", "remaining_time": "6:29:31", "throughput": 2337.08, "total_tokens": 22391200} {"current_steps": 11635, "total_steps": 40000, "loss": 0.169, "lr": 4.0269871937541724e-05, "epoch": 1.8981156701199118, "percentage": 29.09, "elapsed_time": "2:39:42", "remaining_time": "6:29:22", "throughput": 2337.53, "total_tokens": 22400288} {"current_steps": 11640, "total_steps": 40000, "loss": 0.1305, "lr": 4.026209739126637e-05, "epoch": 1.8989313973407294, "percentage": 29.1, "elapsed_time": "2:39:44", "remaining_time": "6:29:13", "throughput": 2337.95, "total_tokens": 22409152} {"current_steps": 11645, "total_steps": 40000, "loss": 0.1486, "lr": 4.025432049138381e-05, "epoch": 1.8997471245615465, "percentage": 29.11, "elapsed_time": "2:39:47", "remaining_time": "6:29:03", "throughput": 2338.46, "total_tokens": 22418880} {"current_steps": 11650, "total_steps": 40000, "loss": 0.1397, "lr": 4.0246541239093325e-05, "epoch": 1.900562851782364, "percentage": 29.12, "elapsed_time": "2:39:49", "remaining_time": "6:28:54", "throughput": 2338.91, "total_tokens": 22428048} {"current_steps": 11655, "total_steps": 40000, "loss": 0.1035, "lr": 4.023875963559459e-05, "epoch": 1.9013785790031812, "percentage": 29.14, "elapsed_time": "2:39:51", "remaining_time": "6:28:45", "throughput": 2339.33, "total_tokens": 22436944} {"current_steps": 11660, "total_steps": 40000, "loss": 0.1148, "lr": 4.023097568208761e-05, "epoch": 1.9021943062239988, "percentage": 29.15, "elapsed_time": "2:39:53", "remaining_time": "6:28:36", "throughput": 2339.87, "total_tokens": 22446960} {"current_steps": 11665, "total_steps": 40000, "loss": 0.0934, "lr": 4.022318937977277e-05, "epoch": 1.903010033444816, "percentage": 29.16, "elapsed_time": "2:39:55", "remaining_time": "6:28:27", "throughput": 2340.49, "total_tokens": 22457728} {"current_steps": 11670, "total_steps": 40000, "loss": 0.0977, "lr": 4.021540072985084e-05, "epoch": 1.9038257606656335, "percentage": 29.18, "elapsed_time": "2:39:57", "remaining_time": "6:28:18", "throughput": 2341.03, "total_tokens": 22467712} {"current_steps": 11675, "total_steps": 40000, "loss": 0.1934, "lr": 4.020760973352289e-05, "epoch": 1.9046414878864508, "percentage": 29.19, "elapsed_time": "2:39:59", "remaining_time": "6:28:09", "throughput": 2341.58, "total_tokens": 22477824} {"current_steps": 11680, "total_steps": 40000, "loss": 0.0492, "lr": 4.019981639199042e-05, "epoch": 1.9054572151072682, "percentage": 29.2, "elapsed_time": "2:40:01", "remaining_time": "6:28:00", "throughput": 2342.07, "total_tokens": 22487408} {"current_steps": 11685, "total_steps": 40000, "loss": 0.0995, "lr": 4.0192020706455245e-05, "epoch": 1.9062729423280855, "percentage": 29.21, "elapsed_time": "2:40:03", "remaining_time": "6:27:51", "throughput": 2342.52, "total_tokens": 22496560} {"current_steps": 11690, "total_steps": 40000, "loss": 0.0873, "lr": 4.018422267811956e-05, "epoch": 1.9070886695489029, "percentage": 29.23, "elapsed_time": "2:40:05", "remaining_time": "6:27:42", "throughput": 2343.06, "total_tokens": 22506608} {"current_steps": 11695, "total_steps": 40000, "loss": 0.1287, "lr": 4.017642230818592e-05, "epoch": 1.9079043967697202, "percentage": 29.24, "elapsed_time": "2:40:07", "remaining_time": "6:27:33", "throughput": 2343.52, "total_tokens": 22515808} {"current_steps": 11700, "total_steps": 40000, "loss": 0.1053, "lr": 4.0168619597857246e-05, "epoch": 1.9087201239905376, "percentage": 29.25, "elapsed_time": "2:40:09", "remaining_time": "6:27:24", "throughput": 2343.92, "total_tokens": 22524544} {"current_steps": 11705, "total_steps": 40000, "loss": 0.0744, "lr": 4.016081454833681e-05, "epoch": 1.909535851211355, "percentage": 29.26, "elapsed_time": "2:40:11", "remaining_time": "6:27:15", "throughput": 2344.41, "total_tokens": 22534080} {"current_steps": 11710, "total_steps": 40000, "loss": 0.099, "lr": 4.0153007160828245e-05, "epoch": 1.9103515784321723, "percentage": 29.28, "elapsed_time": "2:40:13", "remaining_time": "6:27:06", "throughput": 2344.82, "total_tokens": 22542800} {"current_steps": 11715, "total_steps": 40000, "loss": 0.1392, "lr": 4.0145197436535555e-05, "epoch": 1.9111673056529896, "percentage": 29.29, "elapsed_time": "2:40:15", "remaining_time": "6:26:57", "throughput": 2345.41, "total_tokens": 22553392} {"current_steps": 11720, "total_steps": 40000, "loss": 0.075, "lr": 4.0137385376663095e-05, "epoch": 1.911983032873807, "percentage": 29.3, "elapsed_time": "2:40:18", "remaining_time": "6:26:48", "throughput": 2345.79, "total_tokens": 22561936} {"current_steps": 11725, "total_steps": 40000, "loss": 0.1363, "lr": 4.012957098241558e-05, "epoch": 1.9127987600946244, "percentage": 29.31, "elapsed_time": "2:40:20", "remaining_time": "6:26:39", "throughput": 2346.26, "total_tokens": 22571312} {"current_steps": 11730, "total_steps": 40000, "loss": 0.0631, "lr": 4.0121754254998076e-05, "epoch": 1.9136144873154417, "percentage": 29.33, "elapsed_time": "2:40:22", "remaining_time": "6:26:29", "throughput": 2346.77, "total_tokens": 22580976} {"current_steps": 11735, "total_steps": 40000, "loss": 0.1084, "lr": 4.011393519561606e-05, "epoch": 1.914430214536259, "percentage": 29.34, "elapsed_time": "2:40:24", "remaining_time": "6:26:20", "throughput": 2347.11, "total_tokens": 22589152} {"current_steps": 11740, "total_steps": 40000, "loss": 0.1011, "lr": 4.010611380547529e-05, "epoch": 1.9152459417570764, "percentage": 29.35, "elapsed_time": "2:40:26", "remaining_time": "6:26:11", "throughput": 2347.53, "total_tokens": 22597968} {"current_steps": 11745, "total_steps": 40000, "loss": 0.1118, "lr": 4.009829008578192e-05, "epoch": 1.9160616689778938, "percentage": 29.36, "elapsed_time": "2:40:28", "remaining_time": "6:26:02", "throughput": 2348.04, "total_tokens": 22607760} {"current_steps": 11750, "total_steps": 40000, "loss": 0.0663, "lr": 4.00904640377425e-05, "epoch": 1.9168773961987111, "percentage": 29.38, "elapsed_time": "2:40:30", "remaining_time": "6:25:54", "throughput": 2348.47, "total_tokens": 22616816} {"current_steps": 11755, "total_steps": 40000, "loss": 0.0533, "lr": 4.0082635662563886e-05, "epoch": 1.9176931234195285, "percentage": 29.39, "elapsed_time": "2:40:32", "remaining_time": "6:25:45", "throughput": 2348.94, "total_tokens": 22626192} {"current_steps": 11760, "total_steps": 40000, "loss": 0.322, "lr": 4.007480496145331e-05, "epoch": 1.9185088506403458, "percentage": 29.4, "elapsed_time": "2:40:34", "remaining_time": "6:25:36", "throughput": 2349.54, "total_tokens": 22636832} {"current_steps": 11765, "total_steps": 40000, "loss": 0.071, "lr": 4.006697193561837e-05, "epoch": 1.9193245778611632, "percentage": 29.41, "elapsed_time": "2:40:36", "remaining_time": "6:25:27", "throughput": 2350.18, "total_tokens": 22647776} {"current_steps": 11770, "total_steps": 40000, "loss": 0.1343, "lr": 4.005913658626701e-05, "epoch": 1.9201403050819805, "percentage": 29.43, "elapsed_time": "2:40:38", "remaining_time": "6:25:18", "throughput": 2350.55, "total_tokens": 22656256} {"current_steps": 11775, "total_steps": 40000, "loss": 0.1678, "lr": 4.005129891460754e-05, "epoch": 1.920956032302798, "percentage": 29.44, "elapsed_time": "2:40:40", "remaining_time": "6:25:09", "throughput": 2351.16, "total_tokens": 22666992} {"current_steps": 11780, "total_steps": 40000, "loss": 0.1674, "lr": 4.004345892184864e-05, "epoch": 1.9217717595236152, "percentage": 29.45, "elapsed_time": "2:40:42", "remaining_time": "6:25:00", "throughput": 2351.49, "total_tokens": 22674992} {"current_steps": 11785, "total_steps": 40000, "loss": 0.2303, "lr": 4.003561660919932e-05, "epoch": 1.9225874867444328, "percentage": 29.46, "elapsed_time": "2:40:44", "remaining_time": "6:24:51", "throughput": 2352.09, "total_tokens": 22685648} {"current_steps": 11790, "total_steps": 40000, "loss": 0.0613, "lr": 4.002777197786897e-05, "epoch": 1.92340321396525, "percentage": 29.48, "elapsed_time": "2:40:46", "remaining_time": "6:24:42", "throughput": 2352.48, "total_tokens": 22694304} {"current_steps": 11795, "total_steps": 40000, "loss": 0.1392, "lr": 4.0019925029067326e-05, "epoch": 1.9242189411860675, "percentage": 29.49, "elapsed_time": "2:40:49", "remaining_time": "6:24:33", "throughput": 2352.96, "total_tokens": 22703808} {"current_steps": 11800, "total_steps": 40000, "loss": 0.0447, "lr": 4.0012075764004495e-05, "epoch": 1.9250346684068846, "percentage": 29.5, "elapsed_time": "2:40:51", "remaining_time": "6:24:24", "throughput": 2353.56, "total_tokens": 22714512} {"current_steps": 11800, "total_steps": 40000, "eval_loss": 0.12658576667308807, "epoch": 1.9250346684068846, "percentage": 29.5, "elapsed_time": "2:42:11", "remaining_time": "6:27:36", "throughput": 2334.1, "total_tokens": 22714512} {"current_steps": 11805, "total_steps": 40000, "loss": 0.1459, "lr": 4.000422418389094e-05, "epoch": 1.9258503956277022, "percentage": 29.51, "elapsed_time": "2:42:15", "remaining_time": "6:27:31", "throughput": 2334.26, "total_tokens": 22724432} {"current_steps": 11810, "total_steps": 40000, "loss": 0.1346, "lr": 3.999637028993744e-05, "epoch": 1.9266661228485193, "percentage": 29.53, "elapsed_time": "2:42:17", "remaining_time": "6:27:22", "throughput": 2334.83, "total_tokens": 22734736} {"current_steps": 11815, "total_steps": 40000, "loss": 0.2249, "lr": 3.99885140833552e-05, "epoch": 1.927481850069337, "percentage": 29.54, "elapsed_time": "2:42:19", "remaining_time": "6:27:13", "throughput": 2335.43, "total_tokens": 22745440} {"current_steps": 11820, "total_steps": 40000, "loss": 0.0285, "lr": 3.998065556535572e-05, "epoch": 1.928297577290154, "percentage": 29.55, "elapsed_time": "2:42:21", "remaining_time": "6:27:04", "throughput": 2335.77, "total_tokens": 22753568} {"current_steps": 11825, "total_steps": 40000, "loss": 0.1078, "lr": 3.9972794737150895e-05, "epoch": 1.9291133045109716, "percentage": 29.56, "elapsed_time": "2:42:23", "remaining_time": "6:26:55", "throughput": 2336.31, "total_tokens": 22763680} {"current_steps": 11830, "total_steps": 40000, "loss": 0.199, "lr": 3.996493159995297e-05, "epoch": 1.9299290317317888, "percentage": 29.58, "elapsed_time": "2:42:25", "remaining_time": "6:26:46", "throughput": 2336.8, "total_tokens": 22773264} {"current_steps": 11835, "total_steps": 40000, "loss": 0.0878, "lr": 3.995706615497453e-05, "epoch": 1.9307447589526063, "percentage": 29.59, "elapsed_time": "2:42:27", "remaining_time": "6:26:37", "throughput": 2337.28, "total_tokens": 22782784} {"current_steps": 11840, "total_steps": 40000, "loss": 0.1731, "lr": 3.994919840342852e-05, "epoch": 1.9315604861734235, "percentage": 29.6, "elapsed_time": "2:42:29", "remaining_time": "6:26:28", "throughput": 2337.74, "total_tokens": 22792064} {"current_steps": 11845, "total_steps": 40000, "loss": 0.1423, "lr": 3.994132834652825e-05, "epoch": 1.932376213394241, "percentage": 29.61, "elapsed_time": "2:42:31", "remaining_time": "6:26:19", "throughput": 2338.11, "total_tokens": 22800560} {"current_steps": 11850, "total_steps": 40000, "loss": 0.0517, "lr": 3.99334559854874e-05, "epoch": 1.9331919406150582, "percentage": 29.62, "elapsed_time": "2:42:33", "remaining_time": "6:26:10", "throughput": 2338.76, "total_tokens": 22811728} {"current_steps": 11855, "total_steps": 40000, "loss": 0.1561, "lr": 3.9925581321519955e-05, "epoch": 1.9340076678358757, "percentage": 29.64, "elapsed_time": "2:42:35", "remaining_time": "6:26:01", "throughput": 2339.1, "total_tokens": 22819904} {"current_steps": 11860, "total_steps": 40000, "loss": 0.1673, "lr": 3.991770435584031e-05, "epoch": 1.934823395056693, "percentage": 29.65, "elapsed_time": "2:42:37", "remaining_time": "6:25:52", "throughput": 2339.5, "total_tokens": 22828592} {"current_steps": 11865, "total_steps": 40000, "loss": 0.1037, "lr": 3.990982508966319e-05, "epoch": 1.9356391222775104, "percentage": 29.66, "elapsed_time": "2:42:39", "remaining_time": "6:25:43", "throughput": 2340.01, "total_tokens": 22838464} {"current_steps": 11870, "total_steps": 40000, "loss": 0.0987, "lr": 3.990194352420367e-05, "epoch": 1.9364548494983278, "percentage": 29.68, "elapsed_time": "2:42:42", "remaining_time": "6:25:34", "throughput": 2340.44, "total_tokens": 22847488} {"current_steps": 11875, "total_steps": 40000, "loss": 0.1024, "lr": 3.9894059660677184e-05, "epoch": 1.9372705767191452, "percentage": 29.69, "elapsed_time": "2:42:44", "remaining_time": "6:25:25", "throughput": 2340.86, "total_tokens": 22856384} {"current_steps": 11880, "total_steps": 40000, "loss": 0.1456, "lr": 3.9886173500299526e-05, "epoch": 1.9380863039399625, "percentage": 29.7, "elapsed_time": "2:42:46", "remaining_time": "6:25:16", "throughput": 2341.37, "total_tokens": 22866224} {"current_steps": 11885, "total_steps": 40000, "loss": 0.1909, "lr": 3.987828504428685e-05, "epoch": 1.9389020311607799, "percentage": 29.71, "elapsed_time": "2:42:48", "remaining_time": "6:25:07", "throughput": 2341.92, "total_tokens": 22876512} {"current_steps": 11890, "total_steps": 40000, "loss": 0.1144, "lr": 3.987039429385565e-05, "epoch": 1.9397177583815972, "percentage": 29.73, "elapsed_time": "2:42:50", "remaining_time": "6:24:58", "throughput": 2342.48, "total_tokens": 22886800} {"current_steps": 11895, "total_steps": 40000, "loss": 0.1038, "lr": 3.986250125022277e-05, "epoch": 1.9405334856024146, "percentage": 29.74, "elapsed_time": "2:42:52", "remaining_time": "6:24:49", "throughput": 2342.9, "total_tokens": 22895728} {"current_steps": 11900, "total_steps": 40000, "loss": 0.1028, "lr": 3.985460591460544e-05, "epoch": 1.941349212823232, "percentage": 29.75, "elapsed_time": "2:42:54", "remaining_time": "6:24:40", "throughput": 2343.4, "total_tokens": 22905424} {"current_steps": 11905, "total_steps": 40000, "loss": 0.0809, "lr": 3.984670828822118e-05, "epoch": 1.9421649400440493, "percentage": 29.76, "elapsed_time": "2:42:56", "remaining_time": "6:24:31", "throughput": 2343.78, "total_tokens": 22914016} {"current_steps": 11910, "total_steps": 40000, "loss": 0.1635, "lr": 3.983880837228794e-05, "epoch": 1.9429806672648666, "percentage": 29.78, "elapsed_time": "2:42:58", "remaining_time": "6:24:23", "throughput": 2344.35, "total_tokens": 22924448} {"current_steps": 11915, "total_steps": 40000, "loss": 0.0779, "lr": 3.983090616802396e-05, "epoch": 1.943796394485684, "percentage": 29.79, "elapsed_time": "2:43:00", "remaining_time": "6:24:14", "throughput": 2344.74, "total_tokens": 22933152} {"current_steps": 11920, "total_steps": 40000, "loss": 0.2566, "lr": 3.982300167664788e-05, "epoch": 1.9446121217065013, "percentage": 29.8, "elapsed_time": "2:43:02", "remaining_time": "6:24:05", "throughput": 2345.16, "total_tokens": 22942112} {"current_steps": 11925, "total_steps": 40000, "loss": 0.1974, "lr": 3.981509489937868e-05, "epoch": 1.9454278489273187, "percentage": 29.81, "elapsed_time": "2:43:04", "remaining_time": "6:23:56", "throughput": 2345.62, "total_tokens": 22951360} {"current_steps": 11930, "total_steps": 40000, "loss": 0.1519, "lr": 3.9807185837435643e-05, "epoch": 1.946243576148136, "percentage": 29.83, "elapsed_time": "2:43:06", "remaining_time": "6:23:47", "throughput": 2346.19, "total_tokens": 22961824} {"current_steps": 11935, "total_steps": 40000, "loss": 0.131, "lr": 3.9799274492038484e-05, "epoch": 1.9470593033689534, "percentage": 29.84, "elapsed_time": "2:43:08", "remaining_time": "6:23:38", "throughput": 2346.61, "total_tokens": 22970784} {"current_steps": 11940, "total_steps": 40000, "loss": 0.1022, "lr": 3.979136086440722e-05, "epoch": 1.9478750305897707, "percentage": 29.85, "elapsed_time": "2:43:10", "remaining_time": "6:23:29", "throughput": 2347.16, "total_tokens": 22980960} {"current_steps": 11945, "total_steps": 40000, "loss": 0.1027, "lr": 3.9783444955762226e-05, "epoch": 1.948690757810588, "percentage": 29.86, "elapsed_time": "2:43:13", "remaining_time": "6:23:20", "throughput": 2347.7, "total_tokens": 22991136} {"current_steps": 11950, "total_steps": 40000, "loss": 0.0607, "lr": 3.977552676732424e-05, "epoch": 1.9495064850314054, "percentage": 29.88, "elapsed_time": "2:43:15", "remaining_time": "6:23:11", "throughput": 2348.2, "total_tokens": 23000896} {"current_steps": 11955, "total_steps": 40000, "loss": 0.1324, "lr": 3.976760630031435e-05, "epoch": 1.9503222122522228, "percentage": 29.89, "elapsed_time": "2:43:17", "remaining_time": "6:23:03", "throughput": 2348.69, "total_tokens": 23010528} {"current_steps": 11960, "total_steps": 40000, "loss": 0.1389, "lr": 3.975968355595398e-05, "epoch": 1.9511379394730404, "percentage": 29.9, "elapsed_time": "2:43:19", "remaining_time": "6:22:54", "throughput": 2349.04, "total_tokens": 23018800} {"current_steps": 11965, "total_steps": 40000, "loss": 0.0626, "lr": 3.9751758535464935e-05, "epoch": 1.9519536666938575, "percentage": 29.91, "elapsed_time": "2:43:21", "remaining_time": "6:22:45", "throughput": 2349.62, "total_tokens": 23029392} {"current_steps": 11970, "total_steps": 40000, "loss": 0.0931, "lr": 3.9743831240069326e-05, "epoch": 1.952769393914675, "percentage": 29.93, "elapsed_time": "2:43:23", "remaining_time": "6:22:36", "throughput": 2350.24, "total_tokens": 23040336} {"current_steps": 11975, "total_steps": 40000, "loss": 0.1788, "lr": 3.9735901670989675e-05, "epoch": 1.9535851211354922, "percentage": 29.94, "elapsed_time": "2:43:25", "remaining_time": "6:22:27", "throughput": 2350.8, "total_tokens": 23050672} {"current_steps": 11980, "total_steps": 40000, "loss": 0.2206, "lr": 3.97279698294488e-05, "epoch": 1.9544008483563098, "percentage": 29.95, "elapsed_time": "2:43:27", "remaining_time": "6:22:18", "throughput": 2351.26, "total_tokens": 23059984} {"current_steps": 11985, "total_steps": 40000, "loss": 0.0651, "lr": 3.9720035716669876e-05, "epoch": 1.955216575577127, "percentage": 29.96, "elapsed_time": "2:43:29", "remaining_time": "6:22:09", "throughput": 2351.71, "total_tokens": 23069296} {"current_steps": 11990, "total_steps": 40000, "loss": 0.0489, "lr": 3.9712099333876474e-05, "epoch": 1.9560323027979445, "percentage": 29.98, "elapsed_time": "2:43:31", "remaining_time": "6:22:01", "throughput": 2352.26, "total_tokens": 23079568} {"current_steps": 11995, "total_steps": 40000, "loss": 0.0948, "lr": 3.9704160682292475e-05, "epoch": 1.9568480300187616, "percentage": 29.99, "elapsed_time": "2:43:33", "remaining_time": "6:21:52", "throughput": 2352.78, "total_tokens": 23089536} {"current_steps": 12000, "total_steps": 40000, "loss": 0.081, "lr": 3.9696219763142106e-05, "epoch": 1.9576637572395792, "percentage": 30.0, "elapsed_time": "2:43:35", "remaining_time": "6:21:43", "throughput": 2353.34, "total_tokens": 23099888} {"current_steps": 12000, "total_steps": 40000, "eval_loss": 0.13295525312423706, "epoch": 1.9576637572395792, "percentage": 30.0, "elapsed_time": "2:44:56", "remaining_time": "6:24:51", "throughput": 2334.15, "total_tokens": 23099888} {"current_steps": 12005, "total_steps": 40000, "loss": 0.0909, "lr": 3.968827657764997e-05, "epoch": 1.9584794844603963, "percentage": 30.01, "elapsed_time": "2:45:00", "remaining_time": "6:24:46", "throughput": 2334.17, "total_tokens": 23108880} {"current_steps": 12010, "total_steps": 40000, "loss": 0.1165, "lr": 3.9680331127041e-05, "epoch": 1.959295211681214, "percentage": 30.03, "elapsed_time": "2:45:02", "remaining_time": "6:24:37", "throughput": 2334.54, "total_tokens": 23117376} {"current_steps": 12015, "total_steps": 40000, "loss": 0.0923, "lr": 3.9672383412540495e-05, "epoch": 1.960110938902031, "percentage": 30.04, "elapsed_time": "2:45:04", "remaining_time": "6:24:29", "throughput": 2334.99, "total_tokens": 23126624} {"current_steps": 12020, "total_steps": 40000, "loss": 0.2495, "lr": 3.966443343537407e-05, "epoch": 1.9609266661228486, "percentage": 30.05, "elapsed_time": "2:45:06", "remaining_time": "6:24:20", "throughput": 2335.41, "total_tokens": 23135600} {"current_steps": 12025, "total_steps": 40000, "loss": 0.1055, "lr": 3.965648119676772e-05, "epoch": 1.9617423933436657, "percentage": 30.06, "elapsed_time": "2:45:08", "remaining_time": "6:24:11", "throughput": 2335.95, "total_tokens": 23145824} {"current_steps": 12030, "total_steps": 40000, "loss": 0.1431, "lr": 3.96485266979478e-05, "epoch": 1.9625581205644833, "percentage": 30.08, "elapsed_time": "2:45:10", "remaining_time": "6:24:02", "throughput": 2336.47, "total_tokens": 23155808} {"current_steps": 12035, "total_steps": 40000, "loss": 0.1876, "lr": 3.9640569940140974e-05, "epoch": 1.9633738477853004, "percentage": 30.09, "elapsed_time": "2:45:12", "remaining_time": "6:23:53", "throughput": 2337.08, "total_tokens": 23166720} {"current_steps": 12040, "total_steps": 40000, "loss": 0.1271, "lr": 3.963261092457428e-05, "epoch": 1.964189575006118, "percentage": 30.1, "elapsed_time": "2:45:14", "remaining_time": "6:23:44", "throughput": 2337.6, "total_tokens": 23176672} {"current_steps": 12045, "total_steps": 40000, "loss": 0.1418, "lr": 3.962464965247509e-05, "epoch": 1.9650053022269351, "percentage": 30.11, "elapsed_time": "2:45:16", "remaining_time": "6:23:35", "throughput": 2338.18, "total_tokens": 23187248} {"current_steps": 12050, "total_steps": 40000, "loss": 0.0295, "lr": 3.9616686125071135e-05, "epoch": 1.9658210294477527, "percentage": 30.12, "elapsed_time": "2:45:18", "remaining_time": "6:23:26", "throughput": 2338.61, "total_tokens": 23196416} {"current_steps": 12055, "total_steps": 40000, "loss": 0.1338, "lr": 3.9608720343590506e-05, "epoch": 1.96663675666857, "percentage": 30.14, "elapsed_time": "2:45:20", "remaining_time": "6:23:17", "throughput": 2338.96, "total_tokens": 23204736} {"current_steps": 12060, "total_steps": 40000, "loss": 0.0553, "lr": 3.960075230926161e-05, "epoch": 1.9674524838893874, "percentage": 30.15, "elapsed_time": "2:45:23", "remaining_time": "6:23:09", "throughput": 2339.55, "total_tokens": 23215360} {"current_steps": 12065, "total_steps": 40000, "loss": 0.0282, "lr": 3.959278202331322e-05, "epoch": 1.9682682111102048, "percentage": 30.16, "elapsed_time": "2:45:25", "remaining_time": "6:23:00", "throughput": 2340.02, "total_tokens": 23224848} {"current_steps": 12070, "total_steps": 40000, "loss": 0.1068, "lr": 3.958480948697446e-05, "epoch": 1.9690839383310221, "percentage": 30.18, "elapsed_time": "2:45:27", "remaining_time": "6:22:51", "throughput": 2340.54, "total_tokens": 23234848} {"current_steps": 12075, "total_steps": 40000, "loss": 0.1102, "lr": 3.95768347014748e-05, "epoch": 1.9698996655518395, "percentage": 30.19, "elapsed_time": "2:45:29", "remaining_time": "6:22:42", "throughput": 2341.05, "total_tokens": 23244800} {"current_steps": 12080, "total_steps": 40000, "loss": 0.0127, "lr": 3.956885766804404e-05, "epoch": 1.9707153927726568, "percentage": 30.2, "elapsed_time": "2:45:31", "remaining_time": "6:22:33", "throughput": 2341.52, "total_tokens": 23254336} {"current_steps": 12085, "total_steps": 40000, "loss": 0.0432, "lr": 3.956087838791235e-05, "epoch": 1.9715311199934742, "percentage": 30.21, "elapsed_time": "2:45:33", "remaining_time": "6:22:24", "throughput": 2342.01, "total_tokens": 23264000} {"current_steps": 12090, "total_steps": 40000, "loss": 0.1326, "lr": 3.955289686231022e-05, "epoch": 1.9723468472142915, "percentage": 30.23, "elapsed_time": "2:45:35", "remaining_time": "6:22:16", "throughput": 2342.47, "total_tokens": 23273456} {"current_steps": 12095, "total_steps": 40000, "loss": 0.1302, "lr": 3.9544913092468504e-05, "epoch": 1.973162574435109, "percentage": 30.24, "elapsed_time": "2:45:37", "remaining_time": "6:22:07", "throughput": 2342.91, "total_tokens": 23282624} {"current_steps": 12100, "total_steps": 40000, "loss": 0.0454, "lr": 3.9536927079618425e-05, "epoch": 1.9739783016559262, "percentage": 30.25, "elapsed_time": "2:45:39", "remaining_time": "6:21:58", "throughput": 2343.43, "total_tokens": 23292640} {"current_steps": 12105, "total_steps": 40000, "loss": 0.1884, "lr": 3.9528938824991494e-05, "epoch": 1.9747940288767436, "percentage": 30.26, "elapsed_time": "2:45:41", "remaining_time": "6:21:49", "throughput": 2343.8, "total_tokens": 23301168} {"current_steps": 12110, "total_steps": 40000, "loss": 0.064, "lr": 3.952094832981962e-05, "epoch": 1.975609756097561, "percentage": 30.28, "elapsed_time": "2:45:43", "remaining_time": "6:21:40", "throughput": 2344.36, "total_tokens": 23311648} {"current_steps": 12115, "total_steps": 40000, "loss": 0.1217, "lr": 3.951295559533503e-05, "epoch": 1.9764254833183783, "percentage": 30.29, "elapsed_time": "2:45:45", "remaining_time": "6:21:32", "throughput": 2344.85, "total_tokens": 23321408} {"current_steps": 12120, "total_steps": 40000, "loss": 0.2666, "lr": 3.95049606227703e-05, "epoch": 1.9772412105391957, "percentage": 30.3, "elapsed_time": "2:45:47", "remaining_time": "6:21:23", "throughput": 2345.26, "total_tokens": 23330320} {"current_steps": 12125, "total_steps": 40000, "loss": 0.2908, "lr": 3.949696341335838e-05, "epoch": 1.978056937760013, "percentage": 30.31, "elapsed_time": "2:45:49", "remaining_time": "6:21:14", "throughput": 2345.76, "total_tokens": 23340160} {"current_steps": 12130, "total_steps": 40000, "loss": 0.0664, "lr": 3.9488963968332503e-05, "epoch": 1.9788726649808304, "percentage": 30.33, "elapsed_time": "2:45:51", "remaining_time": "6:21:05", "throughput": 2346.24, "total_tokens": 23349776} {"current_steps": 12135, "total_steps": 40000, "loss": 0.1394, "lr": 3.948096228892631e-05, "epoch": 1.9796883922016477, "percentage": 30.34, "elapsed_time": "2:45:54", "remaining_time": "6:20:56", "throughput": 2346.8, "total_tokens": 23360144} {"current_steps": 12140, "total_steps": 40000, "loss": 0.1058, "lr": 3.947295837637375e-05, "epoch": 1.980504119422465, "percentage": 30.35, "elapsed_time": "2:45:56", "remaining_time": "6:20:48", "throughput": 2347.31, "total_tokens": 23370128} {"current_steps": 12145, "total_steps": 40000, "loss": 0.0586, "lr": 3.9464952231909135e-05, "epoch": 1.9813198466432826, "percentage": 30.36, "elapsed_time": "2:45:58", "remaining_time": "6:20:39", "throughput": 2347.77, "total_tokens": 23379584} {"current_steps": 12150, "total_steps": 40000, "loss": 0.0716, "lr": 3.945694385676711e-05, "epoch": 1.9821355738640998, "percentage": 30.38, "elapsed_time": "2:46:00", "remaining_time": "6:20:30", "throughput": 2348.13, "total_tokens": 23388032} {"current_steps": 12155, "total_steps": 40000, "loss": 0.0747, "lr": 3.944893325218265e-05, "epoch": 1.9829513010849174, "percentage": 30.39, "elapsed_time": "2:46:02", "remaining_time": "6:20:22", "throughput": 2348.54, "total_tokens": 23396960} {"current_steps": 12160, "total_steps": 40000, "loss": 0.1213, "lr": 3.944092041939112e-05, "epoch": 1.9837670283057345, "percentage": 30.4, "elapsed_time": "2:46:04", "remaining_time": "6:20:13", "throughput": 2348.99, "total_tokens": 23406272} {"current_steps": 12165, "total_steps": 40000, "loss": 0.1572, "lr": 3.943290535962818e-05, "epoch": 1.984582755526552, "percentage": 30.41, "elapsed_time": "2:46:06", "remaining_time": "6:20:04", "throughput": 2349.53, "total_tokens": 23416512} {"current_steps": 12170, "total_steps": 40000, "loss": 0.1441, "lr": 3.942488807412985e-05, "epoch": 1.9853984827473692, "percentage": 30.43, "elapsed_time": "2:46:08", "remaining_time": "6:19:55", "throughput": 2349.95, "total_tokens": 23425632} {"current_steps": 12175, "total_steps": 40000, "loss": 0.0955, "lr": 3.941686856413251e-05, "epoch": 1.9862142099681868, "percentage": 30.44, "elapsed_time": "2:46:10", "remaining_time": "6:19:47", "throughput": 2350.46, "total_tokens": 23435504} {"current_steps": 12180, "total_steps": 40000, "loss": 0.1278, "lr": 3.9408846830872874e-05, "epoch": 1.987029937189004, "percentage": 30.45, "elapsed_time": "2:46:12", "remaining_time": "6:19:38", "throughput": 2350.86, "total_tokens": 23444336} {"current_steps": 12185, "total_steps": 40000, "loss": 0.0861, "lr": 3.940082287558798e-05, "epoch": 1.9878456644098215, "percentage": 30.46, "elapsed_time": "2:46:14", "remaining_time": "6:19:29", "throughput": 2351.36, "total_tokens": 23454256} {"current_steps": 12190, "total_steps": 40000, "loss": 0.0658, "lr": 3.939279669951522e-05, "epoch": 1.9886613916306386, "percentage": 30.48, "elapsed_time": "2:46:16", "remaining_time": "6:19:20", "throughput": 2351.76, "total_tokens": 23463072} {"current_steps": 12195, "total_steps": 40000, "loss": 0.1574, "lr": 3.938476830389234e-05, "epoch": 1.9894771188514562, "percentage": 30.49, "elapsed_time": "2:46:18", "remaining_time": "6:19:12", "throughput": 2352.25, "total_tokens": 23472816} {"current_steps": 12200, "total_steps": 40000, "loss": 0.0422, "lr": 3.937673768995742e-05, "epoch": 1.9902928460722733, "percentage": 30.5, "elapsed_time": "2:46:20", "remaining_time": "6:19:03", "throughput": 2352.72, "total_tokens": 23482400} {"current_steps": 12200, "total_steps": 40000, "eval_loss": 0.1475783884525299, "epoch": 1.9902928460722733, "percentage": 30.5, "elapsed_time": "2:47:41", "remaining_time": "6:22:07", "throughput": 2333.89, "total_tokens": 23482400} {"current_steps": 12205, "total_steps": 40000, "loss": 0.1418, "lr": 3.936870485894888e-05, "epoch": 1.9911085732930909, "percentage": 30.51, "elapsed_time": "2:47:45", "remaining_time": "6:22:02", "throughput": 2333.91, "total_tokens": 23491392} {"current_steps": 12210, "total_steps": 40000, "loss": 0.0384, "lr": 3.9360669812105475e-05, "epoch": 1.991924300513908, "percentage": 30.53, "elapsed_time": "2:47:47", "remaining_time": "6:21:53", "throughput": 2334.42, "total_tokens": 23501344} {"current_steps": 12215, "total_steps": 40000, "loss": 0.112, "lr": 3.9352632550666325e-05, "epoch": 1.9927400277347256, "percentage": 30.54, "elapsed_time": "2:47:49", "remaining_time": "6:21:44", "throughput": 2334.83, "total_tokens": 23510304} {"current_steps": 12220, "total_steps": 40000, "loss": 0.1603, "lr": 3.9344593075870866e-05, "epoch": 1.9935557549555427, "percentage": 30.55, "elapsed_time": "2:47:51", "remaining_time": "6:21:35", "throughput": 2335.33, "total_tokens": 23520224} {"current_steps": 12225, "total_steps": 40000, "loss": 0.1286, "lr": 3.933655138895889e-05, "epoch": 1.9943714821763603, "percentage": 30.56, "elapsed_time": "2:47:53", "remaining_time": "6:21:26", "throughput": 2335.71, "total_tokens": 23528816} {"current_steps": 12230, "total_steps": 40000, "loss": 0.0376, "lr": 3.932850749117053e-05, "epoch": 1.9951872093971774, "percentage": 30.58, "elapsed_time": "2:47:55", "remaining_time": "6:21:18", "throughput": 2336.16, "total_tokens": 23538176} {"current_steps": 12235, "total_steps": 40000, "loss": 0.1369, "lr": 3.932046138374624e-05, "epoch": 1.996002936617995, "percentage": 30.59, "elapsed_time": "2:47:57", "remaining_time": "6:21:09", "throughput": 2336.52, "total_tokens": 23546608} {"current_steps": 12240, "total_steps": 40000, "loss": 0.0934, "lr": 3.9312413067926854e-05, "epoch": 1.9968186638388123, "percentage": 30.6, "elapsed_time": "2:47:59", "remaining_time": "6:21:00", "throughput": 2336.93, "total_tokens": 23555664} {"current_steps": 12245, "total_steps": 40000, "loss": 0.0359, "lr": 3.9304362544953506e-05, "epoch": 1.9976343910596297, "percentage": 30.61, "elapsed_time": "2:48:01", "remaining_time": "6:20:51", "throughput": 2337.37, "total_tokens": 23564912} {"current_steps": 12250, "total_steps": 40000, "loss": 0.045, "lr": 3.929630981606769e-05, "epoch": 1.998450118280447, "percentage": 30.63, "elapsed_time": "2:48:03", "remaining_time": "6:20:43", "throughput": 2337.89, "total_tokens": 23574976} {"current_steps": 12255, "total_steps": 40000, "loss": 0.0779, "lr": 3.928825488251124e-05, "epoch": 1.9992658455012644, "percentage": 30.64, "elapsed_time": "2:48:05", "remaining_time": "6:20:34", "throughput": 2338.46, "total_tokens": 23585520} {"current_steps": 12260, "total_steps": 40000, "loss": 0.1414, "lr": 3.9280197745526344e-05, "epoch": 2.0, "percentage": 30.65, "elapsed_time": "2:48:07", "remaining_time": "6:20:25", "throughput": 2338.86, "total_tokens": 23594288} {"current_steps": 12265, "total_steps": 40000, "loss": 0.1358, "lr": 3.9272138406355495e-05, "epoch": 2.0008157272208176, "percentage": 30.66, "elapsed_time": "2:48:10", "remaining_time": "6:20:17", "throughput": 2339.22, "total_tokens": 23603248} {"current_steps": 12270, "total_steps": 40000, "loss": 0.2254, "lr": 3.926407686624154e-05, "epoch": 2.0016314544416347, "percentage": 30.68, "elapsed_time": "2:48:12", "remaining_time": "6:20:08", "throughput": 2339.6, "total_tokens": 23611920} {"current_steps": 12275, "total_steps": 40000, "loss": 0.0869, "lr": 3.9256013126427684e-05, "epoch": 2.0024471816624523, "percentage": 30.69, "elapsed_time": "2:48:14", "remaining_time": "6:19:59", "throughput": 2340.15, "total_tokens": 23622272} {"current_steps": 12280, "total_steps": 40000, "loss": 0.0782, "lr": 3.9247947188157455e-05, "epoch": 2.0032629088832694, "percentage": 30.7, "elapsed_time": "2:48:16", "remaining_time": "6:19:50", "throughput": 2340.54, "total_tokens": 23631104} {"current_steps": 12285, "total_steps": 40000, "loss": 0.0145, "lr": 3.9239879052674715e-05, "epoch": 2.004078636104087, "percentage": 30.71, "elapsed_time": "2:48:18", "remaining_time": "6:19:42", "throughput": 2340.88, "total_tokens": 23639328} {"current_steps": 12290, "total_steps": 40000, "loss": 0.0988, "lr": 3.9231808721223673e-05, "epoch": 2.004894363324904, "percentage": 30.73, "elapsed_time": "2:48:20", "remaining_time": "6:19:33", "throughput": 2341.3, "total_tokens": 23648416} {"current_steps": 12295, "total_steps": 40000, "loss": 0.1461, "lr": 3.9223736195048886e-05, "epoch": 2.0057100905457217, "percentage": 30.74, "elapsed_time": "2:48:22", "remaining_time": "6:19:24", "throughput": 2341.68, "total_tokens": 23657184} {"current_steps": 12300, "total_steps": 40000, "loss": 0.1292, "lr": 3.921566147539523e-05, "epoch": 2.006525817766539, "percentage": 30.75, "elapsed_time": "2:48:24", "remaining_time": "6:19:16", "throughput": 2342.21, "total_tokens": 23667312} {"current_steps": 12305, "total_steps": 40000, "loss": 0.0744, "lr": 3.920758456350792e-05, "epoch": 2.0073415449873564, "percentage": 30.76, "elapsed_time": "2:48:26", "remaining_time": "6:19:07", "throughput": 2342.72, "total_tokens": 23677344} {"current_steps": 12310, "total_steps": 40000, "loss": 0.0706, "lr": 3.919950546063253e-05, "epoch": 2.0081572722081735, "percentage": 30.78, "elapsed_time": "2:48:28", "remaining_time": "6:18:58", "throughput": 2343.28, "total_tokens": 23687856} {"current_steps": 12315, "total_steps": 40000, "loss": 0.0147, "lr": 3.919142416801496e-05, "epoch": 2.008972999428991, "percentage": 30.79, "elapsed_time": "2:48:30", "remaining_time": "6:18:50", "throughput": 2343.65, "total_tokens": 23696432} {"current_steps": 12320, "total_steps": 40000, "loss": 0.0479, "lr": 3.918334068690144e-05, "epoch": 2.0097887266498082, "percentage": 30.8, "elapsed_time": "2:48:32", "remaining_time": "6:18:41", "throughput": 2344.18, "total_tokens": 23706560} {"current_steps": 12325, "total_steps": 40000, "loss": 0.0771, "lr": 3.917525501853855e-05, "epoch": 2.010604453870626, "percentage": 30.81, "elapsed_time": "2:48:35", "remaining_time": "6:18:32", "throughput": 2344.64, "total_tokens": 23716144} {"current_steps": 12330, "total_steps": 40000, "loss": 0.0398, "lr": 3.916716716417319e-05, "epoch": 2.011420181091443, "percentage": 30.83, "elapsed_time": "2:48:37", "remaining_time": "6:18:23", "throughput": 2345.13, "total_tokens": 23725856} {"current_steps": 12335, "total_steps": 40000, "loss": 0.0166, "lr": 3.915907712505263e-05, "epoch": 2.0122359083122605, "percentage": 30.84, "elapsed_time": "2:48:39", "remaining_time": "6:18:15", "throughput": 2345.48, "total_tokens": 23734240} {"current_steps": 12340, "total_steps": 40000, "loss": 0.2305, "lr": 3.915098490242444e-05, "epoch": 2.0130516355330776, "percentage": 30.85, "elapsed_time": "2:48:41", "remaining_time": "6:18:06", "throughput": 2345.98, "total_tokens": 23744208} {"current_steps": 12345, "total_steps": 40000, "loss": 0.0099, "lr": 3.914289049753654e-05, "epoch": 2.013867362753895, "percentage": 30.86, "elapsed_time": "2:48:43", "remaining_time": "6:17:57", "throughput": 2346.51, "total_tokens": 23754432} {"current_steps": 12350, "total_steps": 40000, "loss": 0.1615, "lr": 3.913479391163719e-05, "epoch": 2.0146830899747123, "percentage": 30.88, "elapsed_time": "2:48:45", "remaining_time": "6:17:49", "throughput": 2346.98, "total_tokens": 23764064} {"current_steps": 12355, "total_steps": 40000, "loss": 0.1182, "lr": 3.9126695145975e-05, "epoch": 2.01549881719553, "percentage": 30.89, "elapsed_time": "2:48:47", "remaining_time": "6:17:40", "throughput": 2347.5, "total_tokens": 23774144} {"current_steps": 12360, "total_steps": 40000, "loss": 0.1337, "lr": 3.911859420179889e-05, "epoch": 2.016314544416347, "percentage": 30.9, "elapsed_time": "2:48:49", "remaining_time": "6:17:32", "throughput": 2347.86, "total_tokens": 23782624} {"current_steps": 12365, "total_steps": 40000, "loss": 0.0262, "lr": 3.911049108035813e-05, "epoch": 2.0171302716371646, "percentage": 30.91, "elapsed_time": "2:48:51", "remaining_time": "6:17:23", "throughput": 2348.39, "total_tokens": 23792896} {"current_steps": 12370, "total_steps": 40000, "loss": 0.0695, "lr": 3.910238578290232e-05, "epoch": 2.0179459988579818, "percentage": 30.93, "elapsed_time": "2:48:53", "remaining_time": "6:17:14", "throughput": 2348.9, "total_tokens": 23802864} {"current_steps": 12375, "total_steps": 40000, "loss": 0.0427, "lr": 3.90942783106814e-05, "epoch": 2.0187617260787993, "percentage": 30.94, "elapsed_time": "2:48:55", "remaining_time": "6:17:06", "throughput": 2349.2, "total_tokens": 23810784} {"current_steps": 12380, "total_steps": 40000, "loss": 0.0128, "lr": 3.908616866494564e-05, "epoch": 2.0195774532996165, "percentage": 30.95, "elapsed_time": "2:48:57", "remaining_time": "6:16:57", "throughput": 2349.64, "total_tokens": 23820096} {"current_steps": 12385, "total_steps": 40000, "loss": 0.1098, "lr": 3.907805684694566e-05, "epoch": 2.020393180520434, "percentage": 30.96, "elapsed_time": "2:48:59", "remaining_time": "6:16:48", "throughput": 2350.16, "total_tokens": 23830272} {"current_steps": 12390, "total_steps": 40000, "loss": 0.0673, "lr": 3.90699428579324e-05, "epoch": 2.021208907741251, "percentage": 30.98, "elapsed_time": "2:49:01", "remaining_time": "6:16:40", "throughput": 2350.6, "total_tokens": 23839536} {"current_steps": 12395, "total_steps": 40000, "loss": 0.0686, "lr": 3.906182669915713e-05, "epoch": 2.0220246349620687, "percentage": 30.99, "elapsed_time": "2:49:03", "remaining_time": "6:16:31", "throughput": 2351.09, "total_tokens": 23849392} {"current_steps": 12400, "total_steps": 40000, "loss": 0.0054, "lr": 3.9053708371871476e-05, "epoch": 2.022840362182886, "percentage": 31.0, "elapsed_time": "2:49:06", "remaining_time": "6:16:23", "throughput": 2351.67, "total_tokens": 23860160} {"current_steps": 12400, "total_steps": 40000, "eval_loss": 0.16377586126327515, "epoch": 2.022840362182886, "percentage": 31.0, "elapsed_time": "2:50:26", "remaining_time": "6:19:22", "throughput": 2333.13, "total_tokens": 23860160} {"current_steps": 12405, "total_steps": 40000, "loss": 0.0705, "lr": 3.904558787732738e-05, "epoch": 2.0236560894037035, "percentage": 31.01, "elapsed_time": "2:50:30", "remaining_time": "6:19:18", "throughput": 2333.15, "total_tokens": 23869568} {"current_steps": 12410, "total_steps": 40000, "loss": 0.1036, "lr": 3.9037465216777135e-05, "epoch": 2.0244718166245206, "percentage": 31.03, "elapsed_time": "2:50:32", "remaining_time": "6:19:09", "throughput": 2333.7, "total_tokens": 23880016} {"current_steps": 12415, "total_steps": 40000, "loss": 0.0733, "lr": 3.902934039147334e-05, "epoch": 2.025287543845338, "percentage": 31.04, "elapsed_time": "2:50:34", "remaining_time": "6:19:00", "throughput": 2334.14, "total_tokens": 23889312} {"current_steps": 12420, "total_steps": 40000, "loss": 0.0574, "lr": 3.902121340266894e-05, "epoch": 2.0261032710661553, "percentage": 31.05, "elapsed_time": "2:50:36", "remaining_time": "6:18:52", "throughput": 2334.66, "total_tokens": 23899536} {"current_steps": 12425, "total_steps": 40000, "loss": 0.1242, "lr": 3.9013084251617246e-05, "epoch": 2.026918998286973, "percentage": 31.06, "elapsed_time": "2:50:38", "remaining_time": "6:18:43", "throughput": 2335.05, "total_tokens": 23908304} {"current_steps": 12430, "total_steps": 40000, "loss": 0.0382, "lr": 3.9004952939571865e-05, "epoch": 2.02773472550779, "percentage": 31.08, "elapsed_time": "2:50:40", "remaining_time": "6:18:34", "throughput": 2335.62, "total_tokens": 23918992} {"current_steps": 12435, "total_steps": 40000, "loss": 0.0729, "lr": 3.899681946778673e-05, "epoch": 2.0285504527286076, "percentage": 31.09, "elapsed_time": "2:50:43", "remaining_time": "6:18:25", "throughput": 2335.94, "total_tokens": 23927136} {"current_steps": 12440, "total_steps": 40000, "loss": 0.0241, "lr": 3.898868383751615e-05, "epoch": 2.0293661799494247, "percentage": 31.1, "elapsed_time": "2:50:45", "remaining_time": "6:18:17", "throughput": 2336.49, "total_tokens": 23937616} {"current_steps": 12445, "total_steps": 40000, "loss": 0.0767, "lr": 3.8980546050014724e-05, "epoch": 2.0301819071702423, "percentage": 31.11, "elapsed_time": "2:50:47", "remaining_time": "6:18:08", "throughput": 2337.0, "total_tokens": 23947584} {"current_steps": 12450, "total_steps": 40000, "loss": 0.075, "lr": 3.897240610653741e-05, "epoch": 2.03099763439106, "percentage": 31.13, "elapsed_time": "2:50:49", "remaining_time": "6:18:00", "throughput": 2337.51, "total_tokens": 23957632} {"current_steps": 12455, "total_steps": 40000, "loss": 0.1369, "lr": 3.896426400833948e-05, "epoch": 2.031813361611877, "percentage": 31.14, "elapsed_time": "2:50:51", "remaining_time": "6:17:51", "throughput": 2337.99, "total_tokens": 23967440} {"current_steps": 12460, "total_steps": 40000, "loss": 0.0745, "lr": 3.895611975667656e-05, "epoch": 2.0326290888326946, "percentage": 31.15, "elapsed_time": "2:50:53", "remaining_time": "6:17:42", "throughput": 2338.45, "total_tokens": 23976928} {"current_steps": 12465, "total_steps": 40000, "loss": 0.0903, "lr": 3.8947973352804584e-05, "epoch": 2.0334448160535117, "percentage": 31.16, "elapsed_time": "2:50:55", "remaining_time": "6:17:34", "throughput": 2339.03, "total_tokens": 23987728} {"current_steps": 12470, "total_steps": 40000, "loss": 0.1446, "lr": 3.893982479797984e-05, "epoch": 2.0342605432743293, "percentage": 31.18, "elapsed_time": "2:50:57", "remaining_time": "6:17:25", "throughput": 2339.52, "total_tokens": 23997568} {"current_steps": 12475, "total_steps": 40000, "loss": 0.0257, "lr": 3.8931674093458926e-05, "epoch": 2.0350762704951464, "percentage": 31.19, "elapsed_time": "2:50:59", "remaining_time": "6:17:16", "throughput": 2340.02, "total_tokens": 24007568} {"current_steps": 12480, "total_steps": 40000, "loss": 0.0376, "lr": 3.89235212404988e-05, "epoch": 2.035891997715964, "percentage": 31.2, "elapsed_time": "2:51:01", "remaining_time": "6:17:08", "throughput": 2340.57, "total_tokens": 24018032} {"current_steps": 12485, "total_steps": 40000, "loss": 0.0933, "lr": 3.891536624035672e-05, "epoch": 2.036707724936781, "percentage": 31.21, "elapsed_time": "2:51:03", "remaining_time": "6:16:59", "throughput": 2340.99, "total_tokens": 24027168} {"current_steps": 12490, "total_steps": 40000, "loss": 0.0045, "lr": 3.8907209094290295e-05, "epoch": 2.0375234521575987, "percentage": 31.23, "elapsed_time": "2:51:05", "remaining_time": "6:16:50", "throughput": 2341.5, "total_tokens": 24037296} {"current_steps": 12495, "total_steps": 40000, "loss": 0.0211, "lr": 3.8899049803557466e-05, "epoch": 2.038339179378416, "percentage": 31.24, "elapsed_time": "2:51:07", "remaining_time": "6:16:42", "throughput": 2341.89, "total_tokens": 24046112} {"current_steps": 12500, "total_steps": 40000, "loss": 0.0791, "lr": 3.889088836941648e-05, "epoch": 2.0391549065992334, "percentage": 31.25, "elapsed_time": "2:51:09", "remaining_time": "6:16:33", "throughput": 2342.26, "total_tokens": 24054768} {"current_steps": 12505, "total_steps": 40000, "loss": 0.0026, "lr": 3.8882724793125946e-05, "epoch": 2.0399706338200505, "percentage": 31.26, "elapsed_time": "2:51:11", "remaining_time": "6:16:25", "throughput": 2342.83, "total_tokens": 24065520} {"current_steps": 12510, "total_steps": 40000, "loss": 0.0083, "lr": 3.8874559075944794e-05, "epoch": 2.040786361040868, "percentage": 31.27, "elapsed_time": "2:51:14", "remaining_time": "6:16:16", "throughput": 2343.39, "total_tokens": 24076112} {"current_steps": 12515, "total_steps": 40000, "loss": 0.0916, "lr": 3.886639121913227e-05, "epoch": 2.041602088261685, "percentage": 31.29, "elapsed_time": "2:51:16", "remaining_time": "6:16:08", "throughput": 2343.86, "total_tokens": 24085824} {"current_steps": 12520, "total_steps": 40000, "loss": 0.0455, "lr": 3.885822122394797e-05, "epoch": 2.042417815482503, "percentage": 31.3, "elapsed_time": "2:51:18", "remaining_time": "6:15:59", "throughput": 2344.41, "total_tokens": 24096304} {"current_steps": 12525, "total_steps": 40000, "loss": 0.0587, "lr": 3.8850049091651794e-05, "epoch": 2.04323354270332, "percentage": 31.31, "elapsed_time": "2:51:20", "remaining_time": "6:15:50", "throughput": 2344.9, "total_tokens": 24106192} {"current_steps": 12530, "total_steps": 40000, "loss": 0.0099, "lr": 3.8841874823504e-05, "epoch": 2.0440492699241375, "percentage": 31.32, "elapsed_time": "2:51:22", "remaining_time": "6:15:42", "throughput": 2345.4, "total_tokens": 24116160} {"current_steps": 12535, "total_steps": 40000, "loss": 0.0339, "lr": 3.8833698420765157e-05, "epoch": 2.0448649971449546, "percentage": 31.34, "elapsed_time": "2:51:24", "remaining_time": "6:15:33", "throughput": 2345.83, "total_tokens": 24125408} {"current_steps": 12540, "total_steps": 40000, "loss": 0.0572, "lr": 3.882551988469618e-05, "epoch": 2.045680724365772, "percentage": 31.35, "elapsed_time": "2:51:26", "remaining_time": "6:15:25", "throughput": 2346.31, "total_tokens": 24135216} {"current_steps": 12545, "total_steps": 40000, "loss": 0.0738, "lr": 3.881733921655829e-05, "epoch": 2.0464964515865893, "percentage": 31.36, "elapsed_time": "2:51:28", "remaining_time": "6:15:16", "throughput": 2346.76, "total_tokens": 24144640} {"current_steps": 12550, "total_steps": 40000, "loss": 0.0435, "lr": 3.8809156417613054e-05, "epoch": 2.047312178807407, "percentage": 31.37, "elapsed_time": "2:51:30", "remaining_time": "6:15:08", "throughput": 2347.18, "total_tokens": 24153792} {"current_steps": 12555, "total_steps": 40000, "loss": 0.3246, "lr": 3.8800971489122364e-05, "epoch": 2.048127906028224, "percentage": 31.39, "elapsed_time": "2:51:32", "remaining_time": "6:14:59", "throughput": 2347.56, "total_tokens": 24162624} {"current_steps": 12560, "total_steps": 40000, "loss": 0.0013, "lr": 3.8792784432348434e-05, "epoch": 2.0489436332490416, "percentage": 31.4, "elapsed_time": "2:51:34", "remaining_time": "6:14:50", "throughput": 2347.99, "total_tokens": 24171856} {"current_steps": 12565, "total_steps": 40000, "loss": 0.1352, "lr": 3.878459524855381e-05, "epoch": 2.0497593604698587, "percentage": 31.41, "elapsed_time": "2:51:36", "remaining_time": "6:14:42", "throughput": 2348.49, "total_tokens": 24181888} {"current_steps": 12570, "total_steps": 40000, "loss": 0.0932, "lr": 3.8776403939001384e-05, "epoch": 2.0505750876906763, "percentage": 31.42, "elapsed_time": "2:51:38", "remaining_time": "6:14:33", "throughput": 2348.96, "total_tokens": 24191600} {"current_steps": 12575, "total_steps": 40000, "loss": 0.0151, "lr": 3.876821050495433e-05, "epoch": 2.0513908149114934, "percentage": 31.44, "elapsed_time": "2:51:40", "remaining_time": "6:14:25", "throughput": 2349.39, "total_tokens": 24200816} {"current_steps": 12580, "total_steps": 40000, "loss": 0.1306, "lr": 3.87600149476762e-05, "epoch": 2.052206542132311, "percentage": 31.45, "elapsed_time": "2:51:42", "remaining_time": "6:14:16", "throughput": 2349.94, "total_tokens": 24211344} {"current_steps": 12585, "total_steps": 40000, "loss": 0.0063, "lr": 3.8751817268430843e-05, "epoch": 2.053022269353128, "percentage": 31.46, "elapsed_time": "2:51:45", "remaining_time": "6:14:08", "throughput": 2350.42, "total_tokens": 24221136} {"current_steps": 12590, "total_steps": 40000, "loss": 0.0471, "lr": 3.8743617468482464e-05, "epoch": 2.0538379965739457, "percentage": 31.47, "elapsed_time": "2:51:47", "remaining_time": "6:13:59", "throughput": 2350.77, "total_tokens": 24229616} {"current_steps": 12595, "total_steps": 40000, "loss": 0.0062, "lr": 3.8735415549095535e-05, "epoch": 2.054653723794763, "percentage": 31.49, "elapsed_time": "2:51:49", "remaining_time": "6:13:51", "throughput": 2351.23, "total_tokens": 24239232} {"current_steps": 12600, "total_steps": 40000, "loss": 0.1379, "lr": 3.8727211511534934e-05, "epoch": 2.0554694510155804, "percentage": 31.5, "elapsed_time": "2:51:51", "remaining_time": "6:13:42", "throughput": 2351.71, "total_tokens": 24249008} {"current_steps": 12600, "total_steps": 40000, "eval_loss": 0.2060728222131729, "epoch": 2.0554694510155804, "percentage": 31.5, "elapsed_time": "2:53:11", "remaining_time": "6:16:38", "throughput": 2333.45, "total_tokens": 24249008} {"current_steps": 12605, "total_steps": 40000, "loss": 0.085, "lr": 3.8719005357065804e-05, "epoch": 2.0562851782363976, "percentage": 31.51, "elapsed_time": "2:53:15", "remaining_time": "6:16:33", "throughput": 2333.53, "total_tokens": 24258432} {"current_steps": 12610, "total_steps": 40000, "loss": 0.1083, "lr": 3.8710797086953645e-05, "epoch": 2.057100905457215, "percentage": 31.52, "elapsed_time": "2:53:17", "remaining_time": "6:16:24", "throughput": 2334.0, "total_tokens": 24268128} {"current_steps": 12615, "total_steps": 40000, "loss": 0.0447, "lr": 3.870258670246427e-05, "epoch": 2.0579166326780323, "percentage": 31.54, "elapsed_time": "2:53:19", "remaining_time": "6:16:15", "throughput": 2334.54, "total_tokens": 24278496} {"current_steps": 12620, "total_steps": 40000, "loss": 0.1042, "lr": 3.869437420486384e-05, "epoch": 2.05873235989885, "percentage": 31.55, "elapsed_time": "2:53:21", "remaining_time": "6:16:07", "throughput": 2334.97, "total_tokens": 24287792} {"current_steps": 12625, "total_steps": 40000, "loss": 0.048, "lr": 3.8686159595418805e-05, "epoch": 2.059548087119667, "percentage": 31.56, "elapsed_time": "2:53:23", "remaining_time": "6:15:58", "throughput": 2335.28, "total_tokens": 24295856} {"current_steps": 12630, "total_steps": 40000, "loss": 0.0075, "lr": 3.867794287539597e-05, "epoch": 2.0603638143404845, "percentage": 31.57, "elapsed_time": "2:53:25", "remaining_time": "6:15:50", "throughput": 2335.84, "total_tokens": 24306528} {"current_steps": 12635, "total_steps": 40000, "loss": 0.0828, "lr": 3.866972404606245e-05, "epoch": 2.0611795415613017, "percentage": 31.59, "elapsed_time": "2:53:27", "remaining_time": "6:15:41", "throughput": 2336.26, "total_tokens": 24315760} {"current_steps": 12640, "total_steps": 40000, "loss": 0.0035, "lr": 3.866150310868571e-05, "epoch": 2.0619952687821193, "percentage": 31.6, "elapsed_time": "2:53:30", "remaining_time": "6:15:33", "throughput": 2336.83, "total_tokens": 24326480} {"current_steps": 12645, "total_steps": 40000, "loss": 0.0056, "lr": 3.8653280064533506e-05, "epoch": 2.062810996002937, "percentage": 31.61, "elapsed_time": "2:53:32", "remaining_time": "6:15:24", "throughput": 2337.36, "total_tokens": 24336864} {"current_steps": 12650, "total_steps": 40000, "loss": 0.1849, "lr": 3.864505491487394e-05, "epoch": 2.063626723223754, "percentage": 31.62, "elapsed_time": "2:53:34", "remaining_time": "6:15:16", "throughput": 2337.84, "total_tokens": 24346736} {"current_steps": 12655, "total_steps": 40000, "loss": 0.0123, "lr": 3.8636827660975414e-05, "epoch": 2.0644424504445715, "percentage": 31.64, "elapsed_time": "2:53:36", "remaining_time": "6:15:07", "throughput": 2338.27, "total_tokens": 24356000} {"current_steps": 12660, "total_steps": 40000, "loss": 0.0028, "lr": 3.862859830410671e-05, "epoch": 2.0652581776653887, "percentage": 31.65, "elapsed_time": "2:53:38", "remaining_time": "6:14:58", "throughput": 2338.8, "total_tokens": 24366384} {"current_steps": 12665, "total_steps": 40000, "loss": 0.1215, "lr": 3.862036684553688e-05, "epoch": 2.0660739048862062, "percentage": 31.66, "elapsed_time": "2:53:40", "remaining_time": "6:14:50", "throughput": 2339.4, "total_tokens": 24377440} {"current_steps": 12670, "total_steps": 40000, "loss": 0.101, "lr": 3.8612133286535314e-05, "epoch": 2.0668896321070234, "percentage": 31.67, "elapsed_time": "2:53:42", "remaining_time": "6:14:41", "throughput": 2339.84, "total_tokens": 24386832} {"current_steps": 12675, "total_steps": 40000, "loss": 0.028, "lr": 3.860389762837173e-05, "epoch": 2.067705359327841, "percentage": 31.69, "elapsed_time": "2:53:44", "remaining_time": "6:14:33", "throughput": 2340.19, "total_tokens": 24395328} {"current_steps": 12680, "total_steps": 40000, "loss": 0.0032, "lr": 3.859565987231618e-05, "epoch": 2.068521086548658, "percentage": 31.7, "elapsed_time": "2:53:46", "remaining_time": "6:14:24", "throughput": 2340.53, "total_tokens": 24403824} {"current_steps": 12685, "total_steps": 40000, "loss": 0.1846, "lr": 3.858742001963902e-05, "epoch": 2.0693368137694756, "percentage": 31.71, "elapsed_time": "2:53:48", "remaining_time": "6:14:16", "throughput": 2341.09, "total_tokens": 24414448} {"current_steps": 12690, "total_steps": 40000, "loss": 0.0669, "lr": 3.857917807161094e-05, "epoch": 2.0701525409902928, "percentage": 31.72, "elapsed_time": "2:53:50", "remaining_time": "6:14:07", "throughput": 2341.55, "total_tokens": 24424128} {"current_steps": 12695, "total_steps": 40000, "loss": 0.0461, "lr": 3.857093402950296e-05, "epoch": 2.0709682682111104, "percentage": 31.74, "elapsed_time": "2:53:52", "remaining_time": "6:13:59", "throughput": 2342.05, "total_tokens": 24434096} {"current_steps": 12700, "total_steps": 40000, "loss": 0.0925, "lr": 3.8562687894586414e-05, "epoch": 2.0717839954319275, "percentage": 31.75, "elapsed_time": "2:53:54", "remaining_time": "6:13:50", "throughput": 2342.56, "total_tokens": 24444256} {"current_steps": 12705, "total_steps": 40000, "loss": 0.1565, "lr": 3.8554439668132946e-05, "epoch": 2.072599722652745, "percentage": 31.76, "elapsed_time": "2:53:56", "remaining_time": "6:13:42", "throughput": 2343.07, "total_tokens": 24454496} {"current_steps": 12710, "total_steps": 40000, "loss": 0.1677, "lr": 3.854618935141455e-05, "epoch": 2.073415449873562, "percentage": 31.77, "elapsed_time": "2:53:58", "remaining_time": "6:13:33", "throughput": 2343.56, "total_tokens": 24464432} {"current_steps": 12715, "total_steps": 40000, "loss": 0.064, "lr": 3.8537936945703525e-05, "epoch": 2.0742311770943798, "percentage": 31.79, "elapsed_time": "2:54:01", "remaining_time": "6:13:25", "throughput": 2344.09, "total_tokens": 24474816} {"current_steps": 12720, "total_steps": 40000, "loss": 0.0372, "lr": 3.852968245227249e-05, "epoch": 2.075046904315197, "percentage": 31.8, "elapsed_time": "2:54:03", "remaining_time": "6:13:16", "throughput": 2344.55, "total_tokens": 24484432} {"current_steps": 12725, "total_steps": 40000, "loss": 0.0843, "lr": 3.85214258723944e-05, "epoch": 2.0758626315360145, "percentage": 31.81, "elapsed_time": "2:54:05", "remaining_time": "6:13:08", "throughput": 2344.95, "total_tokens": 24493488} {"current_steps": 12730, "total_steps": 40000, "loss": 0.0688, "lr": 3.8513167207342524e-05, "epoch": 2.0766783587568316, "percentage": 31.82, "elapsed_time": "2:54:07", "remaining_time": "6:12:59", "throughput": 2345.51, "total_tokens": 24504208} {"current_steps": 12735, "total_steps": 40000, "loss": 0.0798, "lr": 3.850490645839044e-05, "epoch": 2.077494085977649, "percentage": 31.84, "elapsed_time": "2:54:09", "remaining_time": "6:12:51", "throughput": 2345.92, "total_tokens": 24513280} {"current_steps": 12740, "total_steps": 40000, "loss": 0.0447, "lr": 3.849664362681207e-05, "epoch": 2.0783098131984663, "percentage": 31.85, "elapsed_time": "2:54:11", "remaining_time": "6:12:43", "throughput": 2346.28, "total_tokens": 24521920} {"current_steps": 12745, "total_steps": 40000, "loss": 0.1161, "lr": 3.848837871388165e-05, "epoch": 2.079125540419284, "percentage": 31.86, "elapsed_time": "2:54:13", "remaining_time": "6:12:34", "throughput": 2346.81, "total_tokens": 24532272} {"current_steps": 12750, "total_steps": 40000, "loss": 0.0533, "lr": 3.848011172087371e-05, "epoch": 2.079941267640101, "percentage": 31.87, "elapsed_time": "2:54:15", "remaining_time": "6:12:26", "throughput": 2347.39, "total_tokens": 24543312} {"current_steps": 12755, "total_steps": 40000, "loss": 0.0081, "lr": 3.847184264906315e-05, "epoch": 2.0807569948609186, "percentage": 31.89, "elapsed_time": "2:54:17", "remaining_time": "6:12:17", "throughput": 2347.84, "total_tokens": 24552816} {"current_steps": 12760, "total_steps": 40000, "loss": 0.0422, "lr": 3.846357149972516e-05, "epoch": 2.0815727220817357, "percentage": 31.9, "elapsed_time": "2:54:19", "remaining_time": "6:12:09", "throughput": 2348.33, "total_tokens": 24562800} {"current_steps": 12765, "total_steps": 40000, "loss": 0.185, "lr": 3.8455298274135246e-05, "epoch": 2.0823884493025533, "percentage": 31.91, "elapsed_time": "2:54:21", "remaining_time": "6:12:00", "throughput": 2348.77, "total_tokens": 24572208} {"current_steps": 12770, "total_steps": 40000, "loss": 0.2123, "lr": 3.8447022973569254e-05, "epoch": 2.0832041765233704, "percentage": 31.92, "elapsed_time": "2:54:23", "remaining_time": "6:11:52", "throughput": 2349.2, "total_tokens": 24581536} {"current_steps": 12775, "total_steps": 40000, "loss": 0.1157, "lr": 3.843874559930332e-05, "epoch": 2.084019903744188, "percentage": 31.94, "elapsed_time": "2:54:25", "remaining_time": "6:11:43", "throughput": 2349.66, "total_tokens": 24591232} {"current_steps": 12780, "total_steps": 40000, "loss": 0.0619, "lr": 3.843046615261394e-05, "epoch": 2.084835630965005, "percentage": 31.95, "elapsed_time": "2:54:27", "remaining_time": "6:11:35", "throughput": 2350.14, "total_tokens": 24601152} {"current_steps": 12785, "total_steps": 40000, "loss": 0.0517, "lr": 3.842218463477791e-05, "epoch": 2.0856513581858227, "percentage": 31.96, "elapsed_time": "2:54:29", "remaining_time": "6:11:27", "throughput": 2350.62, "total_tokens": 24610944} {"current_steps": 12790, "total_steps": 40000, "loss": 0.2182, "lr": 3.841390104707233e-05, "epoch": 2.08646708540664, "percentage": 31.97, "elapsed_time": "2:54:32", "remaining_time": "6:11:18", "throughput": 2351.11, "total_tokens": 24620992} {"current_steps": 12795, "total_steps": 40000, "loss": 0.1488, "lr": 3.8405615390774643e-05, "epoch": 2.0872828126274574, "percentage": 31.99, "elapsed_time": "2:54:34", "remaining_time": "6:11:10", "throughput": 2351.59, "total_tokens": 24630864} {"current_steps": 12800, "total_steps": 40000, "loss": 0.1184, "lr": 3.839732766716259e-05, "epoch": 2.0880985398482745, "percentage": 32.0, "elapsed_time": "2:54:36", "remaining_time": "6:11:01", "throughput": 2351.96, "total_tokens": 24639552} {"current_steps": 12800, "total_steps": 40000, "eval_loss": 0.1538952738046646, "epoch": 2.0880985398482745, "percentage": 32.0, "elapsed_time": "2:55:56", "remaining_time": "6:13:53", "throughput": 2334.02, "total_tokens": 24639552} {"current_steps": 12805, "total_steps": 40000, "loss": 0.1097, "lr": 3.838903787751425e-05, "epoch": 2.088914267069092, "percentage": 32.01, "elapsed_time": "2:56:00", "remaining_time": "6:13:48", "throughput": 2333.98, "total_tokens": 24648400} {"current_steps": 12810, "total_steps": 40000, "loss": 0.0921, "lr": 3.838074602310802e-05, "epoch": 2.0897299942899092, "percentage": 32.02, "elapsed_time": "2:56:02", "remaining_time": "6:13:40", "throughput": 2334.46, "total_tokens": 24658336} {"current_steps": 12815, "total_steps": 40000, "loss": 0.003, "lr": 3.837245210522258e-05, "epoch": 2.090545721510727, "percentage": 32.04, "elapsed_time": "2:56:04", "remaining_time": "6:13:31", "throughput": 2334.9, "total_tokens": 24667792} {"current_steps": 12820, "total_steps": 40000, "loss": 0.1458, "lr": 3.8364156125136996e-05, "epoch": 2.0913614487315444, "percentage": 32.05, "elapsed_time": "2:56:06", "remaining_time": "6:13:23", "throughput": 2335.24, "total_tokens": 24676208} {"current_steps": 12825, "total_steps": 40000, "loss": 0.0944, "lr": 3.835585808413059e-05, "epoch": 2.0921771759523615, "percentage": 32.06, "elapsed_time": "2:56:08", "remaining_time": "6:13:14", "throughput": 2335.69, "total_tokens": 24685728} {"current_steps": 12830, "total_steps": 40000, "loss": 0.0696, "lr": 3.8347557983483024e-05, "epoch": 2.092992903173179, "percentage": 32.07, "elapsed_time": "2:56:10", "remaining_time": "6:13:06", "throughput": 2336.25, "total_tokens": 24696528} {"current_steps": 12835, "total_steps": 40000, "loss": 0.1106, "lr": 3.833925582447428e-05, "epoch": 2.0938086303939962, "percentage": 32.09, "elapsed_time": "2:56:13", "remaining_time": "6:12:57", "throughput": 2336.68, "total_tokens": 24705808} {"current_steps": 12840, "total_steps": 40000, "loss": 0.007, "lr": 3.8330951608384656e-05, "epoch": 2.094624357614814, "percentage": 32.1, "elapsed_time": "2:56:15", "remaining_time": "6:12:49", "throughput": 2337.12, "total_tokens": 24715280} {"current_steps": 12845, "total_steps": 40000, "loss": 0.0259, "lr": 3.832264533649477e-05, "epoch": 2.095440084835631, "percentage": 32.11, "elapsed_time": "2:56:17", "remaining_time": "6:12:40", "throughput": 2337.42, "total_tokens": 24723360} {"current_steps": 12850, "total_steps": 40000, "loss": 0.0527, "lr": 3.8314337010085555e-05, "epoch": 2.0962558120564485, "percentage": 32.12, "elapsed_time": "2:56:19", "remaining_time": "6:12:32", "throughput": 2337.88, "total_tokens": 24733072} {"current_steps": 12855, "total_steps": 40000, "loss": 0.0053, "lr": 3.830602663043824e-05, "epoch": 2.0970715392772656, "percentage": 32.14, "elapsed_time": "2:56:21", "remaining_time": "6:12:23", "throughput": 2338.36, "total_tokens": 24742896} {"current_steps": 12860, "total_steps": 40000, "loss": 0.0145, "lr": 3.8297714198834414e-05, "epoch": 2.097887266498083, "percentage": 32.15, "elapsed_time": "2:56:23", "remaining_time": "6:12:15", "throughput": 2338.84, "total_tokens": 24752864} {"current_steps": 12865, "total_steps": 40000, "loss": 0.0142, "lr": 3.828939971655595e-05, "epoch": 2.0987029937189003, "percentage": 32.16, "elapsed_time": "2:56:25", "remaining_time": "6:12:06", "throughput": 2339.19, "total_tokens": 24761392} {"current_steps": 12870, "total_steps": 40000, "loss": 0.111, "lr": 3.828108318488505e-05, "epoch": 2.099518720939718, "percentage": 32.17, "elapsed_time": "2:56:27", "remaining_time": "6:11:58", "throughput": 2339.74, "total_tokens": 24772000} {"current_steps": 12875, "total_steps": 40000, "loss": 0.02, "lr": 3.8272764605104216e-05, "epoch": 2.100334448160535, "percentage": 32.19, "elapsed_time": "2:56:29", "remaining_time": "6:11:50", "throughput": 2340.23, "total_tokens": 24782064} {"current_steps": 12880, "total_steps": 40000, "loss": 0.0732, "lr": 3.826444397849628e-05, "epoch": 2.1011501753813526, "percentage": 32.2, "elapsed_time": "2:56:31", "remaining_time": "6:11:41", "throughput": 2340.71, "total_tokens": 24791968} {"current_steps": 12885, "total_steps": 40000, "loss": 0.2518, "lr": 3.825612130634439e-05, "epoch": 2.1019659026021698, "percentage": 32.21, "elapsed_time": "2:56:33", "remaining_time": "6:11:33", "throughput": 2341.25, "total_tokens": 24802576} {"current_steps": 12890, "total_steps": 40000, "loss": 0.0585, "lr": 3.824779658993202e-05, "epoch": 2.1027816298229873, "percentage": 32.23, "elapsed_time": "2:56:35", "remaining_time": "6:11:24", "throughput": 2341.72, "total_tokens": 24812336} {"current_steps": 12895, "total_steps": 40000, "loss": 0.0026, "lr": 3.823946983054292e-05, "epoch": 2.1035973570438045, "percentage": 32.24, "elapsed_time": "2:56:37", "remaining_time": "6:11:16", "throughput": 2342.2, "total_tokens": 24822304} {"current_steps": 12900, "total_steps": 40000, "loss": 0.0156, "lr": 3.82311410294612e-05, "epoch": 2.104413084264622, "percentage": 32.25, "elapsed_time": "2:56:39", "remaining_time": "6:11:08", "throughput": 2342.67, "total_tokens": 24832048} {"current_steps": 12905, "total_steps": 40000, "loss": 0.0503, "lr": 3.822281018797127e-05, "epoch": 2.105228811485439, "percentage": 32.26, "elapsed_time": "2:56:41", "remaining_time": "6:10:59", "throughput": 2343.17, "total_tokens": 24842272} {"current_steps": 12910, "total_steps": 40000, "loss": 0.1389, "lr": 3.821447730735783e-05, "epoch": 2.1060445387062567, "percentage": 32.27, "elapsed_time": "2:56:44", "remaining_time": "6:10:51", "throughput": 2343.58, "total_tokens": 24851408} {"current_steps": 12915, "total_steps": 40000, "loss": 0.3545, "lr": 3.820614238890592e-05, "epoch": 2.106860265927074, "percentage": 32.29, "elapsed_time": "2:56:46", "remaining_time": "6:10:42", "throughput": 2344.14, "total_tokens": 24862192} {"current_steps": 12920, "total_steps": 40000, "loss": 0.0018, "lr": 3.819780543390091e-05, "epoch": 2.1076759931478914, "percentage": 32.3, "elapsed_time": "2:56:48", "remaining_time": "6:10:34", "throughput": 2344.67, "total_tokens": 24872704} {"current_steps": 12925, "total_steps": 40000, "loss": 0.0495, "lr": 3.818946644362844e-05, "epoch": 2.1084917203687086, "percentage": 32.31, "elapsed_time": "2:56:50", "remaining_time": "6:10:26", "throughput": 2345.02, "total_tokens": 24881232} {"current_steps": 12930, "total_steps": 40000, "loss": 0.0114, "lr": 3.81811254193745e-05, "epoch": 2.109307447589526, "percentage": 32.32, "elapsed_time": "2:56:52", "remaining_time": "6:10:17", "throughput": 2345.56, "total_tokens": 24891872} {"current_steps": 12935, "total_steps": 40000, "loss": 0.1051, "lr": 3.8172782362425366e-05, "epoch": 2.1101231748103433, "percentage": 32.34, "elapsed_time": "2:56:54", "remaining_time": "6:10:09", "throughput": 2346.05, "total_tokens": 24901840} {"current_steps": 12940, "total_steps": 40000, "loss": 0.0074, "lr": 3.816443727406765e-05, "epoch": 2.110938902031161, "percentage": 32.35, "elapsed_time": "2:56:56", "remaining_time": "6:10:01", "throughput": 2346.46, "total_tokens": 24911120} {"current_steps": 12945, "total_steps": 40000, "loss": 0.0605, "lr": 3.815609015558829e-05, "epoch": 2.111754629251978, "percentage": 32.36, "elapsed_time": "2:56:58", "remaining_time": "6:09:52", "throughput": 2346.92, "total_tokens": 24920784} {"current_steps": 12950, "total_steps": 40000, "loss": 0.1674, "lr": 3.814774100827448e-05, "epoch": 2.1125703564727956, "percentage": 32.38, "elapsed_time": "2:57:00", "remaining_time": "6:09:44", "throughput": 2347.34, "total_tokens": 24930208} {"current_steps": 12955, "total_steps": 40000, "loss": 0.086, "lr": 3.813938983341379e-05, "epoch": 2.1133860836936127, "percentage": 32.39, "elapsed_time": "2:57:02", "remaining_time": "6:09:36", "throughput": 2347.77, "total_tokens": 24939600} {"current_steps": 12960, "total_steps": 40000, "loss": 0.0215, "lr": 3.813103663229407e-05, "epoch": 2.1142018109144303, "percentage": 32.4, "elapsed_time": "2:57:04", "remaining_time": "6:09:27", "throughput": 2348.09, "total_tokens": 24947840} {"current_steps": 12965, "total_steps": 40000, "loss": 0.2012, "lr": 3.812268140620349e-05, "epoch": 2.1150175381352474, "percentage": 32.41, "elapsed_time": "2:57:06", "remaining_time": "6:09:19", "throughput": 2348.59, "total_tokens": 24958032} {"current_steps": 12970, "total_steps": 40000, "loss": 0.0617, "lr": 3.811432415643051e-05, "epoch": 2.115833265356065, "percentage": 32.42, "elapsed_time": "2:57:08", "remaining_time": "6:09:10", "throughput": 2348.98, "total_tokens": 24966976} {"current_steps": 12975, "total_steps": 40000, "loss": 0.1097, "lr": 3.8105964884263954e-05, "epoch": 2.116648992576882, "percentage": 32.44, "elapsed_time": "2:57:10", "remaining_time": "6:09:02", "throughput": 2349.59, "total_tokens": 24978288} {"current_steps": 12980, "total_steps": 40000, "loss": 0.1215, "lr": 3.809760359099291e-05, "epoch": 2.1174647197976997, "percentage": 32.45, "elapsed_time": "2:57:13", "remaining_time": "6:08:54", "throughput": 2350.01, "total_tokens": 24987616} {"current_steps": 12985, "total_steps": 40000, "loss": 0.0063, "lr": 3.8089240277906804e-05, "epoch": 2.118280447018517, "percentage": 32.46, "elapsed_time": "2:57:15", "remaining_time": "6:08:46", "throughput": 2350.51, "total_tokens": 24997824} {"current_steps": 12990, "total_steps": 40000, "loss": 0.1094, "lr": 3.808087494629535e-05, "epoch": 2.1190961742393344, "percentage": 32.48, "elapsed_time": "2:57:17", "remaining_time": "6:08:37", "throughput": 2350.98, "total_tokens": 25007712} {"current_steps": 12995, "total_steps": 40000, "loss": 0.109, "lr": 3.8072507597448595e-05, "epoch": 2.1199119014601515, "percentage": 32.49, "elapsed_time": "2:57:19", "remaining_time": "6:08:29", "throughput": 2351.4, "total_tokens": 25017024} {"current_steps": 13000, "total_steps": 40000, "loss": 0.0284, "lr": 3.806413823265689e-05, "epoch": 2.120727628680969, "percentage": 32.5, "elapsed_time": "2:57:21", "remaining_time": "6:08:21", "throughput": 2351.87, "total_tokens": 25026880} {"current_steps": 13000, "total_steps": 40000, "eval_loss": 0.164071187376976, "epoch": 2.120727628680969, "percentage": 32.5, "elapsed_time": "2:58:41", "remaining_time": "6:11:08", "throughput": 2334.2, "total_tokens": 25026880} {"current_steps": 13005, "total_steps": 40000, "loss": 0.0984, "lr": 3.805576685321089e-05, "epoch": 2.121543355901786, "percentage": 32.51, "elapsed_time": "2:58:47", "remaining_time": "6:11:07", "throughput": 2333.92, "total_tokens": 25037408} {"current_steps": 13010, "total_steps": 40000, "loss": 0.0437, "lr": 3.804739346040158e-05, "epoch": 2.122359083122604, "percentage": 32.52, "elapsed_time": "2:58:49", "remaining_time": "6:10:59", "throughput": 2334.25, "total_tokens": 25045760} {"current_steps": 13015, "total_steps": 40000, "loss": 0.097, "lr": 3.8039018055520234e-05, "epoch": 2.1231748103434214, "percentage": 32.54, "elapsed_time": "2:58:51", "remaining_time": "6:10:50", "throughput": 2334.65, "total_tokens": 25054896} {"current_steps": 13020, "total_steps": 40000, "loss": 0.0706, "lr": 3.803064063985844e-05, "epoch": 2.1239905375642385, "percentage": 32.55, "elapsed_time": "2:58:53", "remaining_time": "6:10:42", "throughput": 2335.12, "total_tokens": 25064768} {"current_steps": 13025, "total_steps": 40000, "loss": 0.1015, "lr": 3.802226121470811e-05, "epoch": 2.124806264785056, "percentage": 32.56, "elapsed_time": "2:58:55", "remaining_time": "6:10:34", "throughput": 2335.64, "total_tokens": 25075152} {"current_steps": 13030, "total_steps": 40000, "loss": 0.1342, "lr": 3.801387978136145e-05, "epoch": 2.125621992005873, "percentage": 32.57, "elapsed_time": "2:58:57", "remaining_time": "6:10:25", "throughput": 2336.12, "total_tokens": 25085120} {"current_steps": 13035, "total_steps": 40000, "loss": 0.0827, "lr": 3.800549634111099e-05, "epoch": 2.126437719226691, "percentage": 32.59, "elapsed_time": "2:59:00", "remaining_time": "6:10:17", "throughput": 2336.62, "total_tokens": 25095312} {"current_steps": 13040, "total_steps": 40000, "loss": 0.1107, "lr": 3.799711089524955e-05, "epoch": 2.127253446447508, "percentage": 32.6, "elapsed_time": "2:59:02", "remaining_time": "6:10:09", "throughput": 2336.99, "total_tokens": 25104160} {"current_steps": 13045, "total_steps": 40000, "loss": 0.0327, "lr": 3.7988723445070285e-05, "epoch": 2.1280691736683255, "percentage": 32.61, "elapsed_time": "2:59:04", "remaining_time": "6:10:00", "throughput": 2337.37, "total_tokens": 25113104} {"current_steps": 13050, "total_steps": 40000, "loss": 0.2605, "lr": 3.798033399186663e-05, "epoch": 2.1288849008891426, "percentage": 32.62, "elapsed_time": "2:59:06", "remaining_time": "6:09:52", "throughput": 2337.91, "total_tokens": 25123760} {"current_steps": 13055, "total_steps": 40000, "loss": 0.0085, "lr": 3.797194253693237e-05, "epoch": 2.12970062810996, "percentage": 32.64, "elapsed_time": "2:59:08", "remaining_time": "6:09:44", "throughput": 2338.34, "total_tokens": 25133168} {"current_steps": 13060, "total_steps": 40000, "loss": 0.0593, "lr": 3.796354908156153e-05, "epoch": 2.1305163553307773, "percentage": 32.65, "elapsed_time": "2:59:10", "remaining_time": "6:09:35", "throughput": 2338.73, "total_tokens": 25142208} {"current_steps": 13065, "total_steps": 40000, "loss": 0.1558, "lr": 3.795515362704853e-05, "epoch": 2.131332082551595, "percentage": 32.66, "elapsed_time": "2:59:12", "remaining_time": "6:09:27", "throughput": 2339.16, "total_tokens": 25151616} {"current_steps": 13070, "total_steps": 40000, "loss": 0.1023, "lr": 3.794675617468803e-05, "epoch": 2.132147809772412, "percentage": 32.67, "elapsed_time": "2:59:14", "remaining_time": "6:09:19", "throughput": 2339.63, "total_tokens": 25161536} {"current_steps": 13075, "total_steps": 40000, "loss": 0.1906, "lr": 3.793835672577503e-05, "epoch": 2.1329635369932296, "percentage": 32.69, "elapsed_time": "2:59:16", "remaining_time": "6:09:10", "throughput": 2340.15, "total_tokens": 25172032} {"current_steps": 13080, "total_steps": 40000, "loss": 0.0283, "lr": 3.7929955281604826e-05, "epoch": 2.1337792642140467, "percentage": 32.7, "elapsed_time": "2:59:18", "remaining_time": "6:09:02", "throughput": 2340.48, "total_tokens": 25180400} {"current_steps": 13085, "total_steps": 40000, "loss": 0.1406, "lr": 3.7921551843473036e-05, "epoch": 2.1345949914348643, "percentage": 32.71, "elapsed_time": "2:59:20", "remaining_time": "6:08:54", "throughput": 2340.85, "total_tokens": 25189232} {"current_steps": 13090, "total_steps": 40000, "loss": 0.0881, "lr": 3.791314641267557e-05, "epoch": 2.1354107186556814, "percentage": 32.73, "elapsed_time": "2:59:22", "remaining_time": "6:08:45", "throughput": 2341.37, "total_tokens": 25199584} {"current_steps": 13095, "total_steps": 40000, "loss": 0.0578, "lr": 3.790473899050864e-05, "epoch": 2.136226445876499, "percentage": 32.74, "elapsed_time": "2:59:24", "remaining_time": "6:08:37", "throughput": 2341.73, "total_tokens": 25208288} {"current_steps": 13100, "total_steps": 40000, "loss": 0.0455, "lr": 3.7896329578268794e-05, "epoch": 2.137042173097316, "percentage": 32.75, "elapsed_time": "2:59:26", "remaining_time": "6:08:29", "throughput": 2342.23, "total_tokens": 25218576} {"current_steps": 13105, "total_steps": 40000, "loss": 0.1775, "lr": 3.7887918177252855e-05, "epoch": 2.1378579003181337, "percentage": 32.76, "elapsed_time": "2:59:28", "remaining_time": "6:08:20", "throughput": 2342.66, "total_tokens": 25228032} {"current_steps": 13110, "total_steps": 40000, "loss": 0.0565, "lr": 3.787950478875798e-05, "epoch": 2.138673627538951, "percentage": 32.77, "elapsed_time": "2:59:31", "remaining_time": "6:08:12", "throughput": 2343.26, "total_tokens": 25239392} {"current_steps": 13115, "total_steps": 40000, "loss": 0.0986, "lr": 3.787108941408162e-05, "epoch": 2.1394893547597684, "percentage": 32.79, "elapsed_time": "2:59:33", "remaining_time": "6:08:04", "throughput": 2343.67, "total_tokens": 25248656} {"current_steps": 13120, "total_steps": 40000, "loss": 0.003, "lr": 3.786267205452151e-05, "epoch": 2.1403050819805856, "percentage": 32.8, "elapsed_time": "2:59:35", "remaining_time": "6:07:55", "throughput": 2344.03, "total_tokens": 25257360} {"current_steps": 13125, "total_steps": 40000, "loss": 0.0485, "lr": 3.785425271137573e-05, "epoch": 2.141120809201403, "percentage": 32.81, "elapsed_time": "2:59:37", "remaining_time": "6:07:47", "throughput": 2344.44, "total_tokens": 25266576} {"current_steps": 13130, "total_steps": 40000, "loss": 0.1017, "lr": 3.7845831385942655e-05, "epoch": 2.1419365364222203, "percentage": 32.82, "elapsed_time": "2:59:39", "remaining_time": "6:07:39", "throughput": 2345.0, "total_tokens": 25277456} {"current_steps": 13135, "total_steps": 40000, "loss": 0.0832, "lr": 3.7837408079520944e-05, "epoch": 2.142752263643038, "percentage": 32.84, "elapsed_time": "2:59:41", "remaining_time": "6:07:31", "throughput": 2345.35, "total_tokens": 25286160} {"current_steps": 13140, "total_steps": 40000, "loss": 0.122, "lr": 3.782898279340957e-05, "epoch": 2.143567990863855, "percentage": 32.85, "elapsed_time": "2:59:43", "remaining_time": "6:07:22", "throughput": 2345.73, "total_tokens": 25295040} {"current_steps": 13145, "total_steps": 40000, "loss": 0.0362, "lr": 3.782055552890784e-05, "epoch": 2.1443837180846725, "percentage": 32.86, "elapsed_time": "2:59:45", "remaining_time": "6:07:14", "throughput": 2346.17, "total_tokens": 25304656} {"current_steps": 13150, "total_steps": 40000, "loss": 0.0493, "lr": 3.781212628731534e-05, "epoch": 2.1451994453054897, "percentage": 32.88, "elapsed_time": "2:59:47", "remaining_time": "6:07:06", "throughput": 2346.55, "total_tokens": 25313616} {"current_steps": 13155, "total_steps": 40000, "loss": 0.1374, "lr": 3.7803695069931946e-05, "epoch": 2.1460151725263072, "percentage": 32.89, "elapsed_time": "2:59:49", "remaining_time": "6:06:58", "throughput": 2347.03, "total_tokens": 25323664} {"current_steps": 13160, "total_steps": 40000, "loss": 0.1427, "lr": 3.779526187805789e-05, "epoch": 2.1468308997471244, "percentage": 32.9, "elapsed_time": "2:59:51", "remaining_time": "6:06:49", "throughput": 2347.43, "total_tokens": 25332864} {"current_steps": 13165, "total_steps": 40000, "loss": 0.1506, "lr": 3.778682671299364e-05, "epoch": 2.147646626967942, "percentage": 32.91, "elapsed_time": "2:59:53", "remaining_time": "6:06:41", "throughput": 2347.88, "total_tokens": 25342512} {"current_steps": 13170, "total_steps": 40000, "loss": 0.0361, "lr": 3.777838957604003e-05, "epoch": 2.148462354188759, "percentage": 32.92, "elapsed_time": "2:59:55", "remaining_time": "6:06:33", "throughput": 2348.24, "total_tokens": 25351296} {"current_steps": 13175, "total_steps": 40000, "loss": 0.124, "lr": 3.776995046849816e-05, "epoch": 2.1492780814095767, "percentage": 32.94, "elapsed_time": "2:59:57", "remaining_time": "6:06:25", "throughput": 2348.66, "total_tokens": 25360592} {"current_steps": 13180, "total_steps": 40000, "loss": 0.1514, "lr": 3.776150939166945e-05, "epoch": 2.150093808630394, "percentage": 32.95, "elapsed_time": "2:59:59", "remaining_time": "6:06:16", "throughput": 2349.09, "total_tokens": 25370096} {"current_steps": 13185, "total_steps": 40000, "loss": 0.0285, "lr": 3.775306634685562e-05, "epoch": 2.1509095358512114, "percentage": 32.96, "elapsed_time": "3:00:02", "remaining_time": "6:06:08", "throughput": 2349.68, "total_tokens": 25381392} {"current_steps": 13190, "total_steps": 40000, "loss": 0.2144, "lr": 3.7744621335358696e-05, "epoch": 2.151725263072029, "percentage": 32.98, "elapsed_time": "3:00:04", "remaining_time": "6:06:00", "throughput": 2350.12, "total_tokens": 25390992} {"current_steps": 13195, "total_steps": 40000, "loss": 0.093, "lr": 3.7736174358481e-05, "epoch": 2.152540990292846, "percentage": 32.99, "elapsed_time": "3:00:06", "remaining_time": "6:05:52", "throughput": 2350.61, "total_tokens": 25401136} {"current_steps": 13200, "total_steps": 40000, "loss": 0.0089, "lr": 3.7727725417525175e-05, "epoch": 2.153356717513663, "percentage": 33.0, "elapsed_time": "3:00:08", "remaining_time": "6:05:44", "throughput": 2351.02, "total_tokens": 25410448} {"current_steps": 13200, "total_steps": 40000, "eval_loss": 0.15041252970695496, "epoch": 2.153356717513663, "percentage": 33.0, "elapsed_time": "3:01:28", "remaining_time": "6:08:27", "throughput": 2333.63, "total_tokens": 25410448} {"current_steps": 13205, "total_steps": 40000, "loss": 0.0481, "lr": 3.771927451379414e-05, "epoch": 2.1541724447344808, "percentage": 33.01, "elapsed_time": "3:01:33", "remaining_time": "6:08:23", "throughput": 2333.71, "total_tokens": 25421264} {"current_steps": 13210, "total_steps": 40000, "loss": 0.0091, "lr": 3.7710821648591135e-05, "epoch": 2.1549881719552983, "percentage": 33.02, "elapsed_time": "3:01:35", "remaining_time": "6:08:15", "throughput": 2334.05, "total_tokens": 25429792} {"current_steps": 13215, "total_steps": 40000, "loss": 0.125, "lr": 3.7702366823219694e-05, "epoch": 2.1558038991761155, "percentage": 33.04, "elapsed_time": "3:01:37", "remaining_time": "6:08:07", "throughput": 2334.43, "total_tokens": 25438736} {"current_steps": 13220, "total_steps": 40000, "loss": 0.1289, "lr": 3.769391003898366e-05, "epoch": 2.156619626396933, "percentage": 33.05, "elapsed_time": "3:01:39", "remaining_time": "6:07:58", "throughput": 2334.82, "total_tokens": 25447760} {"current_steps": 13225, "total_steps": 40000, "loss": 0.1362, "lr": 3.768545129718718e-05, "epoch": 2.15743535361775, "percentage": 33.06, "elapsed_time": "3:01:41", "remaining_time": "6:07:50", "throughput": 2335.39, "total_tokens": 25458848} {"current_steps": 13230, "total_steps": 40000, "loss": 0.0039, "lr": 3.7676990599134686e-05, "epoch": 2.1582510808385678, "percentage": 33.07, "elapsed_time": "3:01:43", "remaining_time": "6:07:42", "throughput": 2335.85, "total_tokens": 25468688} {"current_steps": 13235, "total_steps": 40000, "loss": 0.174, "lr": 3.766852794613095e-05, "epoch": 2.159066808059385, "percentage": 33.09, "elapsed_time": "3:01:45", "remaining_time": "6:07:33", "throughput": 2336.26, "total_tokens": 25478032} {"current_steps": 13240, "total_steps": 40000, "loss": 0.1918, "lr": 3.766006333948099e-05, "epoch": 2.1598825352802025, "percentage": 33.1, "elapsed_time": "3:01:47", "remaining_time": "6:07:25", "throughput": 2336.64, "total_tokens": 25486976} {"current_steps": 13245, "total_steps": 40000, "loss": 0.0494, "lr": 3.765159678049017e-05, "epoch": 2.1606982625010196, "percentage": 33.11, "elapsed_time": "3:01:49", "remaining_time": "6:07:17", "throughput": 2337.15, "total_tokens": 25497392} {"current_steps": 13250, "total_steps": 40000, "loss": 0.063, "lr": 3.7643128270464134e-05, "epoch": 2.161513989721837, "percentage": 33.12, "elapsed_time": "3:01:51", "remaining_time": "6:07:09", "throughput": 2337.67, "total_tokens": 25507872} {"current_steps": 13255, "total_steps": 40000, "loss": 0.0455, "lr": 3.763465781070884e-05, "epoch": 2.1623297169426543, "percentage": 33.14, "elapsed_time": "3:01:53", "remaining_time": "6:07:00", "throughput": 2338.07, "total_tokens": 25517040} {"current_steps": 13260, "total_steps": 40000, "loss": 0.086, "lr": 3.762618540253052e-05, "epoch": 2.163145444163472, "percentage": 33.15, "elapsed_time": "3:01:55", "remaining_time": "6:06:52", "throughput": 2338.53, "total_tokens": 25526816} {"current_steps": 13265, "total_steps": 40000, "loss": 0.0132, "lr": 3.761771104723576e-05, "epoch": 2.163961171384289, "percentage": 33.16, "elapsed_time": "3:01:57", "remaining_time": "6:06:44", "throughput": 2338.92, "total_tokens": 25535904} {"current_steps": 13270, "total_steps": 40000, "loss": 0.0674, "lr": 3.7609234746131386e-05, "epoch": 2.1647768986051066, "percentage": 33.17, "elapsed_time": "3:01:59", "remaining_time": "6:06:36", "throughput": 2339.36, "total_tokens": 25545552} {"current_steps": 13275, "total_steps": 40000, "loss": 0.0613, "lr": 3.7600756500524556e-05, "epoch": 2.1655926258259237, "percentage": 33.19, "elapsed_time": "3:02:01", "remaining_time": "6:06:27", "throughput": 2339.75, "total_tokens": 25554656} {"current_steps": 13280, "total_steps": 40000, "loss": 0.0367, "lr": 3.759227631172271e-05, "epoch": 2.1664083530467413, "percentage": 33.2, "elapsed_time": "3:02:04", "remaining_time": "6:06:19", "throughput": 2340.1, "total_tokens": 25563280} {"current_steps": 13285, "total_steps": 40000, "loss": 0.0336, "lr": 3.758379418103363e-05, "epoch": 2.1672240802675584, "percentage": 33.21, "elapsed_time": "3:02:06", "remaining_time": "6:06:11", "throughput": 2340.52, "total_tokens": 25572688} {"current_steps": 13290, "total_steps": 40000, "loss": 0.1203, "lr": 3.757531010976534e-05, "epoch": 2.168039807488376, "percentage": 33.23, "elapsed_time": "3:02:08", "remaining_time": "6:06:03", "throughput": 2340.91, "total_tokens": 25581808} {"current_steps": 13295, "total_steps": 40000, "loss": 0.1513, "lr": 3.75668240992262e-05, "epoch": 2.168855534709193, "percentage": 33.24, "elapsed_time": "3:02:10", "remaining_time": "6:05:54", "throughput": 2341.39, "total_tokens": 25591856} {"current_steps": 13300, "total_steps": 40000, "loss": 0.0278, "lr": 3.7558336150724865e-05, "epoch": 2.1696712619300107, "percentage": 33.25, "elapsed_time": "3:02:12", "remaining_time": "6:05:46", "throughput": 2341.83, "total_tokens": 25601568} {"current_steps": 13305, "total_steps": 40000, "loss": 0.0969, "lr": 3.754984626557028e-05, "epoch": 2.170486989150828, "percentage": 33.26, "elapsed_time": "3:02:14", "remaining_time": "6:05:38", "throughput": 2342.27, "total_tokens": 25611120} {"current_steps": 13310, "total_steps": 40000, "loss": 0.1026, "lr": 3.754135444507168e-05, "epoch": 2.1713027163716454, "percentage": 33.27, "elapsed_time": "3:02:16", "remaining_time": "6:05:30", "throughput": 2342.77, "total_tokens": 25621488} {"current_steps": 13315, "total_steps": 40000, "loss": 0.0705, "lr": 3.753286069053863e-05, "epoch": 2.1721184435924625, "percentage": 33.29, "elapsed_time": "3:02:18", "remaining_time": "6:05:22", "throughput": 2343.21, "total_tokens": 25631168} {"current_steps": 13320, "total_steps": 40000, "loss": 0.0482, "lr": 3.7524365003280945e-05, "epoch": 2.17293417081328, "percentage": 33.3, "elapsed_time": "3:02:20", "remaining_time": "6:05:13", "throughput": 2343.62, "total_tokens": 25640400} {"current_steps": 13325, "total_steps": 40000, "loss": 0.0582, "lr": 3.75158673846088e-05, "epoch": 2.1737498980340972, "percentage": 33.31, "elapsed_time": "3:02:22", "remaining_time": "6:05:05", "throughput": 2343.85, "total_tokens": 25647808} {"current_steps": 13330, "total_steps": 40000, "loss": 0.0963, "lr": 3.750736783583262e-05, "epoch": 2.174565625254915, "percentage": 33.32, "elapsed_time": "3:02:24", "remaining_time": "6:04:57", "throughput": 2344.18, "total_tokens": 25656224} {"current_steps": 13335, "total_steps": 40000, "loss": 0.1554, "lr": 3.7498866358263144e-05, "epoch": 2.175381352475732, "percentage": 33.34, "elapsed_time": "3:02:26", "remaining_time": "6:04:49", "throughput": 2344.66, "total_tokens": 25666368} {"current_steps": 13340, "total_steps": 40000, "loss": 0.0259, "lr": 3.74903629532114e-05, "epoch": 2.1761970796965495, "percentage": 33.35, "elapsed_time": "3:02:28", "remaining_time": "6:04:41", "throughput": 2345.1, "total_tokens": 25675968} {"current_steps": 13345, "total_steps": 40000, "loss": 0.1647, "lr": 3.748185762198873e-05, "epoch": 2.1770128069173666, "percentage": 33.36, "elapsed_time": "3:02:30", "remaining_time": "6:04:32", "throughput": 2345.46, "total_tokens": 25684800} {"current_steps": 13350, "total_steps": 40000, "loss": 0.0032, "lr": 3.747335036590676e-05, "epoch": 2.1778285341381842, "percentage": 33.38, "elapsed_time": "3:02:32", "remaining_time": "6:04:24", "throughput": 2345.83, "total_tokens": 25693712} {"current_steps": 13355, "total_steps": 40000, "loss": 0.0325, "lr": 3.7464841186277405e-05, "epoch": 2.1786442613590014, "percentage": 33.39, "elapsed_time": "3:02:34", "remaining_time": "6:04:16", "throughput": 2346.2, "total_tokens": 25702624} {"current_steps": 13360, "total_steps": 40000, "loss": 0.0511, "lr": 3.7456330084412896e-05, "epoch": 2.179459988579819, "percentage": 33.4, "elapsed_time": "3:02:37", "remaining_time": "6:04:08", "throughput": 2346.6, "total_tokens": 25711744} {"current_steps": 13365, "total_steps": 40000, "loss": 0.1668, "lr": 3.744781706162576e-05, "epoch": 2.180275715800636, "percentage": 33.41, "elapsed_time": "3:02:39", "remaining_time": "6:04:00", "throughput": 2346.91, "total_tokens": 25720064} {"current_steps": 13370, "total_steps": 40000, "loss": 0.1609, "lr": 3.743930211922879e-05, "epoch": 2.1810914430214536, "percentage": 33.42, "elapsed_time": "3:02:41", "remaining_time": "6:03:52", "throughput": 2347.44, "total_tokens": 25730672} {"current_steps": 13375, "total_steps": 40000, "loss": 0.1635, "lr": 3.743078525853513e-05, "epoch": 2.1819071702422708, "percentage": 33.44, "elapsed_time": "3:02:43", "remaining_time": "6:03:43", "throughput": 2347.93, "total_tokens": 25740928} {"current_steps": 13380, "total_steps": 40000, "loss": 0.0258, "lr": 3.7422266480858154e-05, "epoch": 2.1827228974630883, "percentage": 33.45, "elapsed_time": "3:02:45", "remaining_time": "6:03:35", "throughput": 2348.28, "total_tokens": 25749600} {"current_steps": 13385, "total_steps": 40000, "loss": 0.1035, "lr": 3.741374578751158e-05, "epoch": 2.183538624683906, "percentage": 33.46, "elapsed_time": "3:02:47", "remaining_time": "6:03:27", "throughput": 2348.7, "total_tokens": 25759056} {"current_steps": 13390, "total_steps": 40000, "loss": 0.0921, "lr": 3.740522317980941e-05, "epoch": 2.184354351904723, "percentage": 33.48, "elapsed_time": "3:02:49", "remaining_time": "6:03:19", "throughput": 2348.95, "total_tokens": 25766576} {"current_steps": 13395, "total_steps": 40000, "loss": 0.1326, "lr": 3.739669865906593e-05, "epoch": 2.1851700791255406, "percentage": 33.49, "elapsed_time": "3:02:51", "remaining_time": "6:03:11", "throughput": 2349.42, "total_tokens": 25776640} {"current_steps": 13400, "total_steps": 40000, "loss": 0.0075, "lr": 3.738817222659573e-05, "epoch": 2.1859858063463578, "percentage": 33.5, "elapsed_time": "3:02:53", "remaining_time": "6:03:03", "throughput": 2349.81, "total_tokens": 25785744} {"current_steps": 13400, "total_steps": 40000, "eval_loss": 0.1619856208562851, "epoch": 2.1859858063463578, "percentage": 33.5, "elapsed_time": "3:04:14", "remaining_time": "6:05:43", "throughput": 2332.67, "total_tokens": 25785744} {"current_steps": 13405, "total_steps": 40000, "loss": 0.2266, "lr": 3.73796438837137e-05, "epoch": 2.1868015335671753, "percentage": 33.51, "elapsed_time": "3:04:18", "remaining_time": "6:05:39", "throughput": 2332.6, "total_tokens": 25795504} {"current_steps": 13410, "total_steps": 40000, "loss": 0.1119, "lr": 3.7371113631735e-05, "epoch": 2.1876172607879925, "percentage": 33.52, "elapsed_time": "3:04:20", "remaining_time": "6:05:31", "throughput": 2333.1, "total_tokens": 25805776} {"current_steps": 13415, "total_steps": 40000, "loss": 0.029, "lr": 3.736258147197512e-05, "epoch": 2.18843298800881, "percentage": 33.54, "elapsed_time": "3:04:22", "remaining_time": "6:05:23", "throughput": 2333.57, "total_tokens": 25815840} {"current_steps": 13420, "total_steps": 40000, "loss": 0.0392, "lr": 3.735404740574981e-05, "epoch": 2.189248715229627, "percentage": 33.55, "elapsed_time": "3:04:24", "remaining_time": "6:05:15", "throughput": 2333.88, "total_tokens": 25824112} {"current_steps": 13425, "total_steps": 40000, "loss": 0.1795, "lr": 3.7345511434375145e-05, "epoch": 2.1900644424504447, "percentage": 33.56, "elapsed_time": "3:04:26", "remaining_time": "6:05:07", "throughput": 2334.31, "total_tokens": 25833664} {"current_steps": 13430, "total_steps": 40000, "loss": 0.0559, "lr": 3.733697355916748e-05, "epoch": 2.190880169671262, "percentage": 33.58, "elapsed_time": "3:04:28", "remaining_time": "6:04:58", "throughput": 2334.69, "total_tokens": 25842656} {"current_steps": 13435, "total_steps": 40000, "loss": 0.0954, "lr": 3.732843378144345e-05, "epoch": 2.1916958968920794, "percentage": 33.59, "elapsed_time": "3:04:31", "remaining_time": "6:04:50", "throughput": 2335.06, "total_tokens": 25851584} {"current_steps": 13440, "total_steps": 40000, "loss": 0.101, "lr": 3.7319892102519995e-05, "epoch": 2.1925116241128966, "percentage": 33.6, "elapsed_time": "3:04:33", "remaining_time": "6:04:42", "throughput": 2335.43, "total_tokens": 25860528} {"current_steps": 13445, "total_steps": 40000, "loss": 0.0699, "lr": 3.731134852371436e-05, "epoch": 2.193327351333714, "percentage": 33.61, "elapsed_time": "3:04:35", "remaining_time": "6:04:34", "throughput": 2335.89, "total_tokens": 25870448} {"current_steps": 13450, "total_steps": 40000, "loss": 0.0869, "lr": 3.730280304634408e-05, "epoch": 2.1941430785545313, "percentage": 33.62, "elapsed_time": "3:04:37", "remaining_time": "6:04:26", "throughput": 2336.36, "total_tokens": 25880496} {"current_steps": 13455, "total_steps": 40000, "loss": 0.0499, "lr": 3.729425567172696e-05, "epoch": 2.194958805775349, "percentage": 33.64, "elapsed_time": "3:04:39", "remaining_time": "6:04:18", "throughput": 2336.92, "total_tokens": 25891504} {"current_steps": 13460, "total_steps": 40000, "loss": 0.0783, "lr": 3.728570640118111e-05, "epoch": 2.195774532996166, "percentage": 33.65, "elapsed_time": "3:04:41", "remaining_time": "6:04:09", "throughput": 2337.38, "total_tokens": 25901488} {"current_steps": 13465, "total_steps": 40000, "loss": 0.0775, "lr": 3.727715523602494e-05, "epoch": 2.1965902602169836, "percentage": 33.66, "elapsed_time": "3:04:43", "remaining_time": "6:04:01", "throughput": 2337.77, "total_tokens": 25910608} {"current_steps": 13470, "total_steps": 40000, "loss": 0.0514, "lr": 3.726860217757715e-05, "epoch": 2.1974059874378007, "percentage": 33.67, "elapsed_time": "3:04:45", "remaining_time": "6:03:53", "throughput": 2338.26, "total_tokens": 25920864} {"current_steps": 13475, "total_steps": 40000, "loss": 0.1181, "lr": 3.726004722715673e-05, "epoch": 2.1982217146586183, "percentage": 33.69, "elapsed_time": "3:04:47", "remaining_time": "6:03:45", "throughput": 2338.56, "total_tokens": 25929040} {"current_steps": 13480, "total_steps": 40000, "loss": 0.0035, "lr": 3.725149038608296e-05, "epoch": 2.1990374418794354, "percentage": 33.7, "elapsed_time": "3:04:49", "remaining_time": "6:03:37", "throughput": 2339.02, "total_tokens": 25939008} {"current_steps": 13485, "total_steps": 40000, "loss": 0.0043, "lr": 3.7242931655675404e-05, "epoch": 2.199853169100253, "percentage": 33.71, "elapsed_time": "3:04:51", "remaining_time": "6:03:29", "throughput": 2339.46, "total_tokens": 25948720} {"current_steps": 13490, "total_steps": 40000, "loss": 0.0199, "lr": 3.7234371037253937e-05, "epoch": 2.20066889632107, "percentage": 33.73, "elapsed_time": "3:04:53", "remaining_time": "6:03:21", "throughput": 2339.81, "total_tokens": 25957360} {"current_steps": 13495, "total_steps": 40000, "loss": 0.0839, "lr": 3.7225808532138705e-05, "epoch": 2.2014846235418877, "percentage": 33.74, "elapsed_time": "3:04:55", "remaining_time": "6:03:12", "throughput": 2340.15, "total_tokens": 25966048} {"current_steps": 13500, "total_steps": 40000, "loss": 0.1735, "lr": 3.721724414165016e-05, "epoch": 2.202300350762705, "percentage": 33.75, "elapsed_time": "3:04:57", "remaining_time": "6:03:04", "throughput": 2340.62, "total_tokens": 25976080} {"current_steps": 13505, "total_steps": 40000, "loss": 0.0835, "lr": 3.720867786710904e-05, "epoch": 2.2031160779835224, "percentage": 33.76, "elapsed_time": "3:05:00", "remaining_time": "6:02:56", "throughput": 2341.01, "total_tokens": 25985232} {"current_steps": 13510, "total_steps": 40000, "loss": 0.1519, "lr": 3.7200109709836366e-05, "epoch": 2.2039318052043395, "percentage": 33.77, "elapsed_time": "3:05:02", "remaining_time": "6:02:48", "throughput": 2341.37, "total_tokens": 25994080} {"current_steps": 13515, "total_steps": 40000, "loss": 0.0094, "lr": 3.7191539671153465e-05, "epoch": 2.204747532425157, "percentage": 33.79, "elapsed_time": "3:05:04", "remaining_time": "6:02:40", "throughput": 2341.87, "total_tokens": 26004464} {"current_steps": 13520, "total_steps": 40000, "loss": 0.0291, "lr": 3.718296775238193e-05, "epoch": 2.205563259645974, "percentage": 33.8, "elapsed_time": "3:05:06", "remaining_time": "6:02:32", "throughput": 2342.24, "total_tokens": 26013504} {"current_steps": 13525, "total_steps": 40000, "loss": 0.0077, "lr": 3.7174393954843675e-05, "epoch": 2.206378986866792, "percentage": 33.81, "elapsed_time": "3:05:08", "remaining_time": "6:02:24", "throughput": 2342.62, "total_tokens": 26022496} {"current_steps": 13530, "total_steps": 40000, "loss": 0.113, "lr": 3.716581827986087e-05, "epoch": 2.207194714087609, "percentage": 33.83, "elapsed_time": "3:05:10", "remaining_time": "6:02:16", "throughput": 2343.04, "total_tokens": 26031984} {"current_steps": 13535, "total_steps": 40000, "loss": 0.0826, "lr": 3.7157240728756004e-05, "epoch": 2.2080104413084265, "percentage": 33.84, "elapsed_time": "3:05:12", "remaining_time": "6:02:08", "throughput": 2343.41, "total_tokens": 26040912} {"current_steps": 13540, "total_steps": 40000, "loss": 0.1165, "lr": 3.714866130285184e-05, "epoch": 2.2088261685292436, "percentage": 33.85, "elapsed_time": "3:05:14", "remaining_time": "6:02:00", "throughput": 2343.84, "total_tokens": 26050544} {"current_steps": 13545, "total_steps": 40000, "loss": 0.1248, "lr": 3.714008000347143e-05, "epoch": 2.209641895750061, "percentage": 33.86, "elapsed_time": "3:05:16", "remaining_time": "6:01:51", "throughput": 2344.2, "total_tokens": 26059488} {"current_steps": 13550, "total_steps": 40000, "loss": 0.1524, "lr": 3.7131496831938126e-05, "epoch": 2.2104576229708783, "percentage": 33.88, "elapsed_time": "3:05:18", "remaining_time": "6:01:43", "throughput": 2344.65, "total_tokens": 26069264} {"current_steps": 13555, "total_steps": 40000, "loss": 0.0185, "lr": 3.7122911789575565e-05, "epoch": 2.211273350191696, "percentage": 33.89, "elapsed_time": "3:05:20", "remaining_time": "6:01:35", "throughput": 2344.97, "total_tokens": 26077680} {"current_steps": 13560, "total_steps": 40000, "loss": 0.0619, "lr": 3.711432487770765e-05, "epoch": 2.2120890774125135, "percentage": 33.9, "elapsed_time": "3:05:22", "remaining_time": "6:01:27", "throughput": 2345.43, "total_tokens": 26087632} {"current_steps": 13565, "total_steps": 40000, "loss": 0.1784, "lr": 3.710573609765861e-05, "epoch": 2.2129048046333306, "percentage": 33.91, "elapsed_time": "3:05:24", "remaining_time": "6:01:19", "throughput": 2345.87, "total_tokens": 26097328} {"current_steps": 13570, "total_steps": 40000, "loss": 0.2022, "lr": 3.709714545075292e-05, "epoch": 2.2137205318541477, "percentage": 33.92, "elapsed_time": "3:05:26", "remaining_time": "6:01:11", "throughput": 2346.24, "total_tokens": 26106288} {"current_steps": 13575, "total_steps": 40000, "loss": 0.0146, "lr": 3.708855293831538e-05, "epoch": 2.2145362590749653, "percentage": 33.94, "elapsed_time": "3:05:28", "remaining_time": "6:01:03", "throughput": 2346.66, "total_tokens": 26115792} {"current_steps": 13580, "total_steps": 40000, "loss": 0.2115, "lr": 3.707995856167107e-05, "epoch": 2.215351986295783, "percentage": 33.95, "elapsed_time": "3:05:31", "remaining_time": "6:00:55", "throughput": 2347.0, "total_tokens": 26124512} {"current_steps": 13585, "total_steps": 40000, "loss": 0.0389, "lr": 3.707136232214534e-05, "epoch": 2.2161677135166, "percentage": 33.96, "elapsed_time": "3:05:33", "remaining_time": "6:00:47", "throughput": 2347.44, "total_tokens": 26134224} {"current_steps": 13590, "total_steps": 40000, "loss": 0.1845, "lr": 3.7062764221063844e-05, "epoch": 2.2169834407374176, "percentage": 33.98, "elapsed_time": "3:05:35", "remaining_time": "6:00:39", "throughput": 2347.85, "total_tokens": 26143584} {"current_steps": 13595, "total_steps": 40000, "loss": 0.0061, "lr": 3.705416425975252e-05, "epoch": 2.2177991679582347, "percentage": 33.99, "elapsed_time": "3:05:37", "remaining_time": "6:00:31", "throughput": 2348.29, "total_tokens": 26153408} {"current_steps": 13600, "total_steps": 40000, "loss": 0.0404, "lr": 3.704556243953758e-05, "epoch": 2.2186148951790523, "percentage": 34.0, "elapsed_time": "3:05:39", "remaining_time": "6:00:23", "throughput": 2348.73, "total_tokens": 26163104} {"current_steps": 13600, "total_steps": 40000, "eval_loss": 0.17065684497356415, "epoch": 2.2186148951790523, "percentage": 34.0, "elapsed_time": "3:06:59", "remaining_time": "6:02:59", "throughput": 2331.87, "total_tokens": 26163104} {"current_steps": 13605, "total_steps": 40000, "loss": 0.1406, "lr": 3.7036958761745535e-05, "epoch": 2.2194306223998694, "percentage": 34.01, "elapsed_time": "3:07:03", "remaining_time": "6:02:54", "throughput": 2331.86, "total_tokens": 26171696} {"current_steps": 13610, "total_steps": 40000, "loss": 0.1511, "lr": 3.702835322770318e-05, "epoch": 2.220246349620687, "percentage": 34.02, "elapsed_time": "3:07:05", "remaining_time": "6:02:46", "throughput": 2332.25, "total_tokens": 26180912} {"current_steps": 13615, "total_steps": 40000, "loss": 0.0171, "lr": 3.701974583873761e-05, "epoch": 2.221062076841504, "percentage": 34.04, "elapsed_time": "3:07:07", "remaining_time": "6:02:38", "throughput": 2332.6, "total_tokens": 26189680} {"current_steps": 13620, "total_steps": 40000, "loss": 0.095, "lr": 3.701113659617618e-05, "epoch": 2.2218778040623217, "percentage": 34.05, "elapsed_time": "3:07:09", "remaining_time": "6:02:30", "throughput": 2333.14, "total_tokens": 26200608} {"current_steps": 13625, "total_steps": 40000, "loss": 0.0573, "lr": 3.7002525501346535e-05, "epoch": 2.222693531283139, "percentage": 34.06, "elapsed_time": "3:07:11", "remaining_time": "6:02:22", "throughput": 2333.53, "total_tokens": 26209824} {"current_steps": 13630, "total_steps": 40000, "loss": 0.0266, "lr": 3.699391255557664e-05, "epoch": 2.2235092585039564, "percentage": 34.08, "elapsed_time": "3:07:13", "remaining_time": "6:02:14", "throughput": 2333.98, "total_tokens": 26219680} {"current_steps": 13635, "total_steps": 40000, "loss": 0.0671, "lr": 3.69852977601947e-05, "epoch": 2.2243249857247736, "percentage": 34.09, "elapsed_time": "3:07:15", "remaining_time": "6:02:06", "throughput": 2334.39, "total_tokens": 26229136} {"current_steps": 13640, "total_steps": 40000, "loss": 0.0478, "lr": 3.697668111652922e-05, "epoch": 2.225140712945591, "percentage": 34.1, "elapsed_time": "3:07:18", "remaining_time": "6:01:58", "throughput": 2334.77, "total_tokens": 26238208} {"current_steps": 13645, "total_steps": 40000, "loss": 0.1194, "lr": 3.6968062625909005e-05, "epoch": 2.2259564401664083, "percentage": 34.11, "elapsed_time": "3:07:20", "remaining_time": "6:01:49", "throughput": 2335.21, "total_tokens": 26247904} {"current_steps": 13650, "total_steps": 40000, "loss": 0.1431, "lr": 3.6959442289663135e-05, "epoch": 2.226772167387226, "percentage": 34.12, "elapsed_time": "3:07:22", "remaining_time": "6:01:41", "throughput": 2335.66, "total_tokens": 26257776} {"current_steps": 13655, "total_steps": 40000, "loss": 0.0667, "lr": 3.695082010912098e-05, "epoch": 2.227587894608043, "percentage": 34.14, "elapsed_time": "3:07:24", "remaining_time": "6:01:33", "throughput": 2336.13, "total_tokens": 26267936} {"current_steps": 13660, "total_steps": 40000, "loss": 0.0379, "lr": 3.694219608561217e-05, "epoch": 2.2284036218288605, "percentage": 34.15, "elapsed_time": "3:07:26", "remaining_time": "6:01:25", "throughput": 2336.58, "total_tokens": 26277840} {"current_steps": 13665, "total_steps": 40000, "loss": 0.0659, "lr": 3.693357022046665e-05, "epoch": 2.2292193490496777, "percentage": 34.16, "elapsed_time": "3:07:28", "remaining_time": "6:01:17", "throughput": 2336.99, "total_tokens": 26287296} {"current_steps": 13670, "total_steps": 40000, "loss": 0.1387, "lr": 3.6924942515014644e-05, "epoch": 2.2300350762704952, "percentage": 34.17, "elapsed_time": "3:07:30", "remaining_time": "6:01:09", "throughput": 2337.53, "total_tokens": 26298224} {"current_steps": 13675, "total_steps": 40000, "loss": 0.0416, "lr": 3.691631297058664e-05, "epoch": 2.2308508034913124, "percentage": 34.19, "elapsed_time": "3:07:32", "remaining_time": "6:01:01", "throughput": 2337.95, "total_tokens": 26307744} {"current_steps": 13680, "total_steps": 40000, "loss": 0.0389, "lr": 3.6907681588513424e-05, "epoch": 2.23166653071213, "percentage": 34.2, "elapsed_time": "3:07:34", "remaining_time": "6:00:53", "throughput": 2338.34, "total_tokens": 26316992} {"current_steps": 13685, "total_steps": 40000, "loss": 0.1392, "lr": 3.689904837012606e-05, "epoch": 2.232482257932947, "percentage": 34.21, "elapsed_time": "3:07:36", "remaining_time": "6:00:45", "throughput": 2338.84, "total_tokens": 26327424} {"current_steps": 13690, "total_steps": 40000, "loss": 0.0406, "lr": 3.689041331675591e-05, "epoch": 2.2332979851537647, "percentage": 34.23, "elapsed_time": "3:07:38", "remaining_time": "6:00:37", "throughput": 2339.35, "total_tokens": 26338000} {"current_steps": 13695, "total_steps": 40000, "loss": 0.3213, "lr": 3.688177642973461e-05, "epoch": 2.234113712374582, "percentage": 34.24, "elapsed_time": "3:07:40", "remaining_time": "6:00:29", "throughput": 2339.81, "total_tokens": 26347952} {"current_steps": 13700, "total_steps": 40000, "loss": 0.1075, "lr": 3.687313771039406e-05, "epoch": 2.2349294395953994, "percentage": 34.25, "elapsed_time": "3:07:42", "remaining_time": "6:00:21", "throughput": 2340.27, "total_tokens": 26358000} {"current_steps": 13705, "total_steps": 40000, "loss": 0.0841, "lr": 3.686449716006647e-05, "epoch": 2.2357451668162165, "percentage": 34.26, "elapsed_time": "3:07:44", "remaining_time": "6:00:13", "throughput": 2340.65, "total_tokens": 26367104} {"current_steps": 13710, "total_steps": 40000, "loss": 0.0602, "lr": 3.685585478008432e-05, "epoch": 2.236560894037034, "percentage": 34.27, "elapsed_time": "3:07:46", "remaining_time": "6:00:05", "throughput": 2341.08, "total_tokens": 26376848} {"current_steps": 13715, "total_steps": 40000, "loss": 0.0063, "lr": 3.6847210571780364e-05, "epoch": 2.237376621257851, "percentage": 34.29, "elapsed_time": "3:07:49", "remaining_time": "5:59:57", "throughput": 2341.48, "total_tokens": 26386128} {"current_steps": 13720, "total_steps": 40000, "loss": 0.0105, "lr": 3.683856453648767e-05, "epoch": 2.2381923484786688, "percentage": 34.3, "elapsed_time": "3:07:51", "remaining_time": "5:59:49", "throughput": 2341.81, "total_tokens": 26394688} {"current_steps": 13725, "total_steps": 40000, "loss": 0.0911, "lr": 3.682991667553954e-05, "epoch": 2.239008075699486, "percentage": 34.31, "elapsed_time": "3:07:53", "remaining_time": "5:59:41", "throughput": 2342.26, "total_tokens": 26404608} {"current_steps": 13730, "total_steps": 40000, "loss": 0.225, "lr": 3.6821266990269606e-05, "epoch": 2.2398238029203035, "percentage": 34.33, "elapsed_time": "3:07:55", "remaining_time": "5:59:33", "throughput": 2342.61, "total_tokens": 26413408} {"current_steps": 13735, "total_steps": 40000, "loss": 0.0768, "lr": 3.681261548201174e-05, "epoch": 2.2406395301411206, "percentage": 34.34, "elapsed_time": "3:07:57", "remaining_time": "5:59:25", "throughput": 2343.11, "total_tokens": 26423840} {"current_steps": 13740, "total_steps": 40000, "loss": 0.0777, "lr": 3.6803962152100125e-05, "epoch": 2.241455257361938, "percentage": 34.35, "elapsed_time": "3:07:59", "remaining_time": "5:59:17", "throughput": 2343.42, "total_tokens": 26432240} {"current_steps": 13745, "total_steps": 40000, "loss": 0.1224, "lr": 3.67953070018692e-05, "epoch": 2.2422709845827553, "percentage": 34.36, "elapsed_time": "3:08:01", "remaining_time": "5:59:09", "throughput": 2343.95, "total_tokens": 26443088} {"current_steps": 13750, "total_steps": 40000, "loss": 0.12, "lr": 3.678665003265371e-05, "epoch": 2.243086711803573, "percentage": 34.38, "elapsed_time": "3:08:03", "remaining_time": "5:59:01", "throughput": 2344.4, "total_tokens": 26453024} {"current_steps": 13755, "total_steps": 40000, "loss": 0.196, "lr": 3.677799124578867e-05, "epoch": 2.2439024390243905, "percentage": 34.39, "elapsed_time": "3:08:05", "remaining_time": "5:58:53", "throughput": 2344.83, "total_tokens": 26462704} {"current_steps": 13760, "total_steps": 40000, "loss": 0.0259, "lr": 3.676933064260937e-05, "epoch": 2.2447181662452076, "percentage": 34.4, "elapsed_time": "3:08:07", "remaining_time": "5:58:45", "throughput": 2345.33, "total_tokens": 26473200} {"current_steps": 13765, "total_steps": 40000, "loss": 0.0437, "lr": 3.6760668224451365e-05, "epoch": 2.245533893466025, "percentage": 34.41, "elapsed_time": "3:08:09", "remaining_time": "5:58:37", "throughput": 2345.69, "total_tokens": 26482048} {"current_steps": 13770, "total_steps": 40000, "loss": 0.0072, "lr": 3.675200399265054e-05, "epoch": 2.2463496206868423, "percentage": 34.42, "elapsed_time": "3:08:11", "remaining_time": "5:58:29", "throughput": 2346.04, "total_tokens": 26490816} {"current_steps": 13775, "total_steps": 40000, "loss": 0.0528, "lr": 3.6743337948543014e-05, "epoch": 2.24716534790766, "percentage": 34.44, "elapsed_time": "3:08:13", "remaining_time": "5:58:21", "throughput": 2346.5, "total_tokens": 26500928} {"current_steps": 13780, "total_steps": 40000, "loss": 0.0434, "lr": 3.6734670093465204e-05, "epoch": 2.247981075128477, "percentage": 34.45, "elapsed_time": "3:08:15", "remaining_time": "5:58:13", "throughput": 2346.93, "total_tokens": 26510688} {"current_steps": 13785, "total_steps": 40000, "loss": 0.0622, "lr": 3.672600042875379e-05, "epoch": 2.2487968023492946, "percentage": 34.46, "elapsed_time": "3:08:17", "remaining_time": "5:58:05", "throughput": 2347.21, "total_tokens": 26518704} {"current_steps": 13790, "total_steps": 40000, "loss": 0.0962, "lr": 3.671732895574575e-05, "epoch": 2.2496125295701117, "percentage": 34.48, "elapsed_time": "3:08:20", "remaining_time": "5:57:57", "throughput": 2347.55, "total_tokens": 26527376} {"current_steps": 13795, "total_steps": 40000, "loss": 0.2155, "lr": 3.670865567577834e-05, "epoch": 2.2504282567909293, "percentage": 34.49, "elapsed_time": "3:08:22", "remaining_time": "5:57:49", "throughput": 2347.89, "total_tokens": 26536080} {"current_steps": 13800, "total_steps": 40000, "loss": 0.0478, "lr": 3.669998059018909e-05, "epoch": 2.2512439840117464, "percentage": 34.5, "elapsed_time": "3:08:24", "remaining_time": "5:57:41", "throughput": 2348.36, "total_tokens": 26546240} {"current_steps": 13800, "total_steps": 40000, "eval_loss": 0.17794707417488098, "epoch": 2.2512439840117464, "percentage": 34.5, "elapsed_time": "3:09:44", "remaining_time": "6:00:14", "throughput": 2331.75, "total_tokens": 26546240} {"current_steps": 13805, "total_steps": 40000, "loss": 0.0871, "lr": 3.6691303700315796e-05, "epoch": 2.252059711232564, "percentage": 34.51, "elapsed_time": "3:09:48", "remaining_time": "6:00:09", "throughput": 2331.72, "total_tokens": 26554800} {"current_steps": 13810, "total_steps": 40000, "loss": 0.0659, "lr": 3.668262500749655e-05, "epoch": 2.252875438453381, "percentage": 34.52, "elapsed_time": "3:09:50", "remaining_time": "6:00:01", "throughput": 2332.06, "total_tokens": 26563408} {"current_steps": 13815, "total_steps": 40000, "loss": 0.1567, "lr": 3.667394451306971e-05, "epoch": 2.2536911656741987, "percentage": 34.54, "elapsed_time": "3:09:52", "remaining_time": "5:59:53", "throughput": 2332.45, "total_tokens": 26572720} {"current_steps": 13820, "total_steps": 40000, "loss": 0.0248, "lr": 3.666526221837393e-05, "epoch": 2.254506892895016, "percentage": 34.55, "elapsed_time": "3:09:54", "remaining_time": "5:59:45", "throughput": 2332.88, "total_tokens": 26582432} {"current_steps": 13825, "total_steps": 40000, "loss": 0.0767, "lr": 3.665657812474812e-05, "epoch": 2.2553226201158334, "percentage": 34.56, "elapsed_time": "3:09:56", "remaining_time": "5:59:37", "throughput": 2333.33, "total_tokens": 26592336} {"current_steps": 13830, "total_steps": 40000, "loss": 0.0073, "lr": 3.664789223353147e-05, "epoch": 2.2561383473366505, "percentage": 34.58, "elapsed_time": "3:09:58", "remaining_time": "5:59:29", "throughput": 2333.68, "total_tokens": 26601136} {"current_steps": 13835, "total_steps": 40000, "loss": 0.1118, "lr": 3.663920454606347e-05, "epoch": 2.256954074557468, "percentage": 34.59, "elapsed_time": "3:10:00", "remaining_time": "5:59:21", "throughput": 2334.19, "total_tokens": 26611904} {"current_steps": 13840, "total_steps": 40000, "loss": 0.1105, "lr": 3.6630515063683856e-05, "epoch": 2.2577698017782852, "percentage": 34.6, "elapsed_time": "3:10:02", "remaining_time": "5:59:13", "throughput": 2334.47, "total_tokens": 26619888} {"current_steps": 13845, "total_steps": 40000, "loss": 0.0056, "lr": 3.662182378773267e-05, "epoch": 2.258585528999103, "percentage": 34.61, "elapsed_time": "3:10:05", "remaining_time": "5:59:05", "throughput": 2334.93, "total_tokens": 26629936} {"current_steps": 13850, "total_steps": 40000, "loss": 0.1042, "lr": 3.66131307195502e-05, "epoch": 2.25940125621992, "percentage": 34.62, "elapsed_time": "3:10:07", "remaining_time": "5:58:57", "throughput": 2335.34, "total_tokens": 26639424} {"current_steps": 13855, "total_steps": 40000, "loss": 0.0862, "lr": 3.6604435860477034e-05, "epoch": 2.2602169834407375, "percentage": 34.64, "elapsed_time": "3:10:09", "remaining_time": "5:58:49", "throughput": 2335.69, "total_tokens": 26648288} {"current_steps": 13860, "total_steps": 40000, "loss": 0.0859, "lr": 3.6595739211854025e-05, "epoch": 2.2610327106615546, "percentage": 34.65, "elapsed_time": "3:10:11", "remaining_time": "5:58:41", "throughput": 2336.24, "total_tokens": 26659328} {"current_steps": 13865, "total_steps": 40000, "loss": 0.181, "lr": 3.658704077502231e-05, "epoch": 2.261848437882372, "percentage": 34.66, "elapsed_time": "3:10:13", "remaining_time": "5:58:33", "throughput": 2336.54, "total_tokens": 26667632} {"current_steps": 13870, "total_steps": 40000, "loss": 0.017, "lr": 3.65783405513233e-05, "epoch": 2.2626641651031894, "percentage": 34.67, "elapsed_time": "3:10:15", "remaining_time": "5:58:25", "throughput": 2336.9, "total_tokens": 26676640} {"current_steps": 13875, "total_steps": 40000, "loss": 0.0968, "lr": 3.656963854209867e-05, "epoch": 2.263479892324007, "percentage": 34.69, "elapsed_time": "3:10:17", "remaining_time": "5:58:17", "throughput": 2337.29, "total_tokens": 26685904} {"current_steps": 13880, "total_steps": 40000, "loss": 0.1043, "lr": 3.656093474869038e-05, "epoch": 2.264295619544824, "percentage": 34.7, "elapsed_time": "3:10:19", "remaining_time": "5:58:09", "throughput": 2337.72, "total_tokens": 26695632} {"current_steps": 13885, "total_steps": 40000, "loss": 0.1514, "lr": 3.655222917244068e-05, "epoch": 2.2651113467656416, "percentage": 34.71, "elapsed_time": "3:10:21", "remaining_time": "5:58:01", "throughput": 2338.11, "total_tokens": 26704880} {"current_steps": 13890, "total_steps": 40000, "loss": 0.0998, "lr": 3.6543521814692054e-05, "epoch": 2.2659270739864588, "percentage": 34.73, "elapsed_time": "3:10:23", "remaining_time": "5:57:53", "throughput": 2338.47, "total_tokens": 26713872} {"current_steps": 13895, "total_steps": 40000, "loss": 0.1238, "lr": 3.653481267678731e-05, "epoch": 2.2667428012072763, "percentage": 34.74, "elapsed_time": "3:10:25", "remaining_time": "5:57:45", "throughput": 2338.85, "total_tokens": 26723056} {"current_steps": 13900, "total_steps": 40000, "loss": 0.1063, "lr": 3.652610176006949e-05, "epoch": 2.2675585284280935, "percentage": 34.75, "elapsed_time": "3:10:27", "remaining_time": "5:57:37", "throughput": 2339.23, "total_tokens": 26732208} {"current_steps": 13905, "total_steps": 40000, "loss": 0.0769, "lr": 3.6517389065881925e-05, "epoch": 2.268374255648911, "percentage": 34.76, "elapsed_time": "3:10:29", "remaining_time": "5:57:29", "throughput": 2339.61, "total_tokens": 26741344} {"current_steps": 13910, "total_steps": 40000, "loss": 0.0152, "lr": 3.650867459556824e-05, "epoch": 2.269189982869728, "percentage": 34.77, "elapsed_time": "3:10:31", "remaining_time": "5:57:22", "throughput": 2339.99, "total_tokens": 26750592} {"current_steps": 13915, "total_steps": 40000, "loss": 0.1839, "lr": 3.64999583504723e-05, "epoch": 2.2700057100905457, "percentage": 34.79, "elapsed_time": "3:10:33", "remaining_time": "5:57:14", "throughput": 2340.3, "total_tokens": 26758928} {"current_steps": 13920, "total_steps": 40000, "loss": 0.1524, "lr": 3.649124033193827e-05, "epoch": 2.270821437311363, "percentage": 34.8, "elapsed_time": "3:10:36", "remaining_time": "5:57:06", "throughput": 2340.71, "total_tokens": 26768400} {"current_steps": 13925, "total_steps": 40000, "loss": 0.064, "lr": 3.648252054131057e-05, "epoch": 2.2716371645321805, "percentage": 34.81, "elapsed_time": "3:10:38", "remaining_time": "5:56:58", "throughput": 2341.02, "total_tokens": 26776832} {"current_steps": 13930, "total_steps": 40000, "loss": 0.0473, "lr": 3.647379897993391e-05, "epoch": 2.272452891752998, "percentage": 34.83, "elapsed_time": "3:10:40", "remaining_time": "5:56:50", "throughput": 2341.43, "total_tokens": 26786336} {"current_steps": 13935, "total_steps": 40000, "loss": 0.1703, "lr": 3.646507564915325e-05, "epoch": 2.273268618973815, "percentage": 34.84, "elapsed_time": "3:10:42", "remaining_time": "5:56:42", "throughput": 2341.76, "total_tokens": 26794992} {"current_steps": 13940, "total_steps": 40000, "loss": 0.3162, "lr": 3.645635055031385e-05, "epoch": 2.2740843461946323, "percentage": 34.85, "elapsed_time": "3:10:44", "remaining_time": "5:56:34", "throughput": 2342.24, "total_tokens": 26805328} {"current_steps": 13945, "total_steps": 40000, "loss": 0.0189, "lr": 3.6447623684761224e-05, "epoch": 2.27490007341545, "percentage": 34.86, "elapsed_time": "3:10:46", "remaining_time": "5:56:26", "throughput": 2342.62, "total_tokens": 26814480} {"current_steps": 13950, "total_steps": 40000, "loss": 0.0753, "lr": 3.643889505384117e-05, "epoch": 2.2757158006362674, "percentage": 34.88, "elapsed_time": "3:10:48", "remaining_time": "5:56:18", "throughput": 2343.1, "total_tokens": 26824832} {"current_steps": 13955, "total_steps": 40000, "loss": 0.0211, "lr": 3.6430164658899744e-05, "epoch": 2.2765315278570846, "percentage": 34.89, "elapsed_time": "3:10:50", "remaining_time": "5:56:10", "throughput": 2343.57, "total_tokens": 26835104} {"current_steps": 13960, "total_steps": 40000, "loss": 0.0085, "lr": 3.642143250128329e-05, "epoch": 2.2773472550779017, "percentage": 34.9, "elapsed_time": "3:10:52", "remaining_time": "5:56:02", "throughput": 2344.04, "total_tokens": 26845248} {"current_steps": 13965, "total_steps": 40000, "loss": 0.0486, "lr": 3.641269858233841e-05, "epoch": 2.2781629822987193, "percentage": 34.91, "elapsed_time": "3:10:54", "remaining_time": "5:55:54", "throughput": 2344.5, "total_tokens": 26855456} {"current_steps": 13970, "total_steps": 40000, "loss": 0.1744, "lr": 3.640396290341199e-05, "epoch": 2.278978709519537, "percentage": 34.92, "elapsed_time": "3:10:56", "remaining_time": "5:55:47", "throughput": 2344.93, "total_tokens": 26865168} {"current_steps": 13975, "total_steps": 40000, "loss": 0.1156, "lr": 3.639522546585118e-05, "epoch": 2.279794436740354, "percentage": 34.94, "elapsed_time": "3:10:58", "remaining_time": "5:55:39", "throughput": 2345.32, "total_tokens": 26874464} {"current_steps": 13980, "total_steps": 40000, "loss": 0.0471, "lr": 3.6386486271003404e-05, "epoch": 2.2806101639611716, "percentage": 34.95, "elapsed_time": "3:11:00", "remaining_time": "5:55:31", "throughput": 2345.7, "total_tokens": 26883664} {"current_steps": 13985, "total_steps": 40000, "loss": 0.1646, "lr": 3.6377745320216346e-05, "epoch": 2.2814258911819887, "percentage": 34.96, "elapsed_time": "3:11:02", "remaining_time": "5:55:23", "throughput": 2346.09, "total_tokens": 26892976} {"current_steps": 13990, "total_steps": 40000, "loss": 0.0302, "lr": 3.636900261483798e-05, "epoch": 2.2822416184028063, "percentage": 34.98, "elapsed_time": "3:11:04", "remaining_time": "5:55:15", "throughput": 2346.53, "total_tokens": 26902896} {"current_steps": 13995, "total_steps": 40000, "loss": 0.0452, "lr": 3.636025815621654e-05, "epoch": 2.2830573456236234, "percentage": 34.99, "elapsed_time": "3:11:07", "remaining_time": "5:55:07", "throughput": 2347.04, "total_tokens": 26913536} {"current_steps": 14000, "total_steps": 40000, "loss": 0.0021, "lr": 3.635151194570054e-05, "epoch": 2.283873072844441, "percentage": 35.0, "elapsed_time": "3:11:09", "remaining_time": "5:54:59", "throughput": 2347.48, "total_tokens": 26923408} {"current_steps": 14000, "total_steps": 40000, "eval_loss": 0.16863511502742767, "epoch": 2.283873072844441, "percentage": 35.0, "elapsed_time": "3:12:29", "remaining_time": "5:57:29", "throughput": 2331.1, "total_tokens": 26923408} {"current_steps": 14005, "total_steps": 40000, "loss": 0.157, "lr": 3.634276398463873e-05, "epoch": 2.284688800065258, "percentage": 35.01, "elapsed_time": "3:12:35", "remaining_time": "5:57:27", "throughput": 2330.9, "total_tokens": 26933664} {"current_steps": 14010, "total_steps": 40000, "loss": 0.078, "lr": 3.633401427438018e-05, "epoch": 2.2855045272860757, "percentage": 35.02, "elapsed_time": "3:12:37", "remaining_time": "5:57:19", "throughput": 2331.32, "total_tokens": 26943312} {"current_steps": 14015, "total_steps": 40000, "loss": 0.0272, "lr": 3.63252628162742e-05, "epoch": 2.286320254506893, "percentage": 35.04, "elapsed_time": "3:12:39", "remaining_time": "5:57:11", "throughput": 2331.68, "total_tokens": 26952288} {"current_steps": 14020, "total_steps": 40000, "loss": 0.0299, "lr": 3.6316509611670364e-05, "epoch": 2.2871359817277104, "percentage": 35.05, "elapsed_time": "3:12:41", "remaining_time": "5:57:03", "throughput": 2332.14, "total_tokens": 26962448} {"current_steps": 14025, "total_steps": 40000, "loss": 0.0477, "lr": 3.630775466191854e-05, "epoch": 2.2879517089485275, "percentage": 35.06, "elapsed_time": "3:12:43", "remaining_time": "5:56:55", "throughput": 2332.48, "total_tokens": 26971168} {"current_steps": 14030, "total_steps": 40000, "loss": 0.0482, "lr": 3.629899796836884e-05, "epoch": 2.288767436169345, "percentage": 35.08, "elapsed_time": "3:12:45", "remaining_time": "5:56:47", "throughput": 2332.85, "total_tokens": 26980336} {"current_steps": 14035, "total_steps": 40000, "loss": 0.0712, "lr": 3.6290239532371666e-05, "epoch": 2.289583163390162, "percentage": 35.09, "elapsed_time": "3:12:47", "remaining_time": "5:56:40", "throughput": 2333.28, "total_tokens": 26990160} {"current_steps": 14040, "total_steps": 40000, "loss": 0.0795, "lr": 3.628147935527767e-05, "epoch": 2.29039889061098, "percentage": 35.1, "elapsed_time": "3:12:49", "remaining_time": "5:56:32", "throughput": 2333.66, "total_tokens": 26999392} {"current_steps": 14045, "total_steps": 40000, "loss": 0.0673, "lr": 3.627271743843779e-05, "epoch": 2.291214617831797, "percentage": 35.11, "elapsed_time": "3:12:51", "remaining_time": "5:56:24", "throughput": 2333.99, "total_tokens": 27007968} {"current_steps": 14050, "total_steps": 40000, "loss": 0.0047, "lr": 3.626395378320321e-05, "epoch": 2.2920303450526145, "percentage": 35.12, "elapsed_time": "3:12:53", "remaining_time": "5:56:16", "throughput": 2334.44, "total_tokens": 27017952} {"current_steps": 14055, "total_steps": 40000, "loss": 0.1122, "lr": 3.625518839092541e-05, "epoch": 2.2928460722734316, "percentage": 35.14, "elapsed_time": "3:12:55", "remaining_time": "5:56:08", "throughput": 2334.82, "total_tokens": 27027264} {"current_steps": 14060, "total_steps": 40000, "loss": 0.1019, "lr": 3.624642126295612e-05, "epoch": 2.293661799494249, "percentage": 35.15, "elapsed_time": "3:12:57", "remaining_time": "5:56:00", "throughput": 2335.3, "total_tokens": 27037648} {"current_steps": 14065, "total_steps": 40000, "loss": 0.0534, "lr": 3.6237652400647345e-05, "epoch": 2.2944775267150663, "percentage": 35.16, "elapsed_time": "3:12:59", "remaining_time": "5:55:52", "throughput": 2335.7, "total_tokens": 27047056} {"current_steps": 14070, "total_steps": 40000, "loss": 0.0073, "lr": 3.622888180535134e-05, "epoch": 2.295293253935884, "percentage": 35.17, "elapsed_time": "3:13:01", "remaining_time": "5:55:44", "throughput": 2336.07, "total_tokens": 27056240} {"current_steps": 14075, "total_steps": 40000, "loss": 0.1271, "lr": 3.6220109478420655e-05, "epoch": 2.296108981156701, "percentage": 35.19, "elapsed_time": "3:13:04", "remaining_time": "5:55:36", "throughput": 2336.5, "total_tokens": 27066064} {"current_steps": 14080, "total_steps": 40000, "loss": 0.0743, "lr": 3.6211335421208084e-05, "epoch": 2.2969247083775186, "percentage": 35.2, "elapsed_time": "3:13:06", "remaining_time": "5:55:28", "throughput": 2336.95, "total_tokens": 27076064} {"current_steps": 14085, "total_steps": 40000, "loss": 0.1122, "lr": 3.62025596350667e-05, "epoch": 2.2977404355983357, "percentage": 35.21, "elapsed_time": "3:13:08", "remaining_time": "5:55:21", "throughput": 2337.36, "total_tokens": 27085744} {"current_steps": 14090, "total_steps": 40000, "loss": 0.1525, "lr": 3.619378212134984e-05, "epoch": 2.2985561628191533, "percentage": 35.23, "elapsed_time": "3:13:10", "remaining_time": "5:55:13", "throughput": 2337.81, "total_tokens": 27095760} {"current_steps": 14095, "total_steps": 40000, "loss": 0.0305, "lr": 3.618500288141111e-05, "epoch": 2.2993718900399704, "percentage": 35.24, "elapsed_time": "3:13:12", "remaining_time": "5:55:05", "throughput": 2338.27, "total_tokens": 27105952} {"current_steps": 14100, "total_steps": 40000, "loss": 0.0854, "lr": 3.617622191660438e-05, "epoch": 2.300187617260788, "percentage": 35.25, "elapsed_time": "3:13:14", "remaining_time": "5:54:57", "throughput": 2338.65, "total_tokens": 27115184} {"current_steps": 14105, "total_steps": 40000, "loss": 0.0686, "lr": 3.616743922828377e-05, "epoch": 2.3010033444816056, "percentage": 35.26, "elapsed_time": "3:13:16", "remaining_time": "5:54:49", "throughput": 2339.09, "total_tokens": 27125120} {"current_steps": 14110, "total_steps": 40000, "loss": 0.0251, "lr": 3.615865481780371e-05, "epoch": 2.3018190717024227, "percentage": 35.27, "elapsed_time": "3:13:18", "remaining_time": "5:54:41", "throughput": 2339.45, "total_tokens": 27134128} {"current_steps": 14115, "total_steps": 40000, "loss": 0.0072, "lr": 3.614986868651883e-05, "epoch": 2.30263479892324, "percentage": 35.29, "elapsed_time": "3:13:20", "remaining_time": "5:54:33", "throughput": 2339.88, "total_tokens": 27143904} {"current_steps": 14120, "total_steps": 40000, "loss": 0.0543, "lr": 3.614108083578409e-05, "epoch": 2.3034505261440574, "percentage": 35.3, "elapsed_time": "3:13:22", "remaining_time": "5:54:25", "throughput": 2340.34, "total_tokens": 27154112} {"current_steps": 14125, "total_steps": 40000, "loss": 0.1827, "lr": 3.613229126695467e-05, "epoch": 2.304266253364875, "percentage": 35.31, "elapsed_time": "3:13:24", "remaining_time": "5:54:18", "throughput": 2340.67, "total_tokens": 27162736} {"current_steps": 14130, "total_steps": 40000, "loss": 0.2436, "lr": 3.612349998138605e-05, "epoch": 2.305081980585692, "percentage": 35.33, "elapsed_time": "3:13:26", "remaining_time": "5:54:10", "throughput": 2341.05, "total_tokens": 27172016} {"current_steps": 14135, "total_steps": 40000, "loss": 0.0559, "lr": 3.6114706980433946e-05, "epoch": 2.3058977078065093, "percentage": 35.34, "elapsed_time": "3:13:28", "remaining_time": "5:54:02", "throughput": 2341.47, "total_tokens": 27181696} {"current_steps": 14140, "total_steps": 40000, "loss": 0.0105, "lr": 3.610591226545435e-05, "epoch": 2.306713435027327, "percentage": 35.35, "elapsed_time": "3:13:30", "remaining_time": "5:53:54", "throughput": 2341.84, "total_tokens": 27190848} {"current_steps": 14145, "total_steps": 40000, "loss": 0.0425, "lr": 3.6097115837803505e-05, "epoch": 2.3075291622481444, "percentage": 35.36, "elapsed_time": "3:13:32", "remaining_time": "5:53:46", "throughput": 2342.26, "total_tokens": 27200608} {"current_steps": 14150, "total_steps": 40000, "loss": 0.0292, "lr": 3.608831769883795e-05, "epoch": 2.3083448894689615, "percentage": 35.38, "elapsed_time": "3:13:35", "remaining_time": "5:53:38", "throughput": 2342.78, "total_tokens": 27211392} {"current_steps": 14155, "total_steps": 40000, "loss": 0.1042, "lr": 3.607951784991446e-05, "epoch": 2.309160616689779, "percentage": 35.39, "elapsed_time": "3:13:37", "remaining_time": "5:53:31", "throughput": 2343.14, "total_tokens": 27220432} {"current_steps": 14160, "total_steps": 40000, "loss": 0.1631, "lr": 3.6070716292390085e-05, "epoch": 2.3099763439105963, "percentage": 35.4, "elapsed_time": "3:13:39", "remaining_time": "5:53:23", "throughput": 2343.47, "total_tokens": 27229120} {"current_steps": 14165, "total_steps": 40000, "loss": 0.0073, "lr": 3.606191302762213e-05, "epoch": 2.310792071131414, "percentage": 35.41, "elapsed_time": "3:13:41", "remaining_time": "5:53:15", "throughput": 2343.93, "total_tokens": 27239312} {"current_steps": 14170, "total_steps": 40000, "loss": 0.1752, "lr": 3.605310805696818e-05, "epoch": 2.311607798352231, "percentage": 35.43, "elapsed_time": "3:13:43", "remaining_time": "5:53:07", "throughput": 2344.3, "total_tokens": 27248464} {"current_steps": 14175, "total_steps": 40000, "loss": 0.005, "lr": 3.6044301381786067e-05, "epoch": 2.3124235255730485, "percentage": 35.44, "elapsed_time": "3:13:45", "remaining_time": "5:52:59", "throughput": 2344.71, "total_tokens": 27258096} {"current_steps": 14180, "total_steps": 40000, "loss": 0.1998, "lr": 3.6035493003433883e-05, "epoch": 2.3132392527938657, "percentage": 35.45, "elapsed_time": "3:13:47", "remaining_time": "5:52:52", "throughput": 2345.27, "total_tokens": 27269424} {"current_steps": 14185, "total_steps": 40000, "loss": 0.0447, "lr": 3.6026682923269994e-05, "epoch": 2.3140549800146832, "percentage": 35.46, "elapsed_time": "3:13:49", "remaining_time": "5:52:44", "throughput": 2345.64, "total_tokens": 27278592} {"current_steps": 14190, "total_steps": 40000, "loss": 0.0412, "lr": 3.6017871142653034e-05, "epoch": 2.3148707072355004, "percentage": 35.48, "elapsed_time": "3:13:51", "remaining_time": "5:52:36", "throughput": 2346.11, "total_tokens": 27288880} {"current_steps": 14195, "total_steps": 40000, "loss": 0.0347, "lr": 3.600905766294189e-05, "epoch": 2.315686434456318, "percentage": 35.49, "elapsed_time": "3:13:53", "remaining_time": "5:52:28", "throughput": 2346.62, "total_tokens": 27299728} {"current_steps": 14200, "total_steps": 40000, "loss": 0.0934, "lr": 3.60002424854957e-05, "epoch": 2.316502161677135, "percentage": 35.5, "elapsed_time": "3:13:55", "remaining_time": "5:52:20", "throughput": 2347.03, "total_tokens": 27309344} {"current_steps": 14200, "total_steps": 40000, "eval_loss": 0.17132551968097687, "epoch": 2.316502161677135, "percentage": 35.5, "elapsed_time": "3:15:16", "remaining_time": "5:54:47", "throughput": 2330.89, "total_tokens": 27309344} {"current_steps": 14205, "total_steps": 40000, "loss": 0.073, "lr": 3.5991425611673876e-05, "epoch": 2.3173178888979526, "percentage": 35.51, "elapsed_time": "3:15:19", "remaining_time": "5:54:42", "throughput": 2331.08, "total_tokens": 27319936} {"current_steps": 14210, "total_steps": 40000, "loss": 0.2091, "lr": 3.5982607042836105e-05, "epoch": 2.31813361611877, "percentage": 35.52, "elapsed_time": "3:15:21", "remaining_time": "5:54:34", "throughput": 2331.5, "total_tokens": 27329680} {"current_steps": 14215, "total_steps": 40000, "loss": 0.1155, "lr": 3.597378678034231e-05, "epoch": 2.3189493433395874, "percentage": 35.54, "elapsed_time": "3:15:24", "remaining_time": "5:54:26", "throughput": 2332.02, "total_tokens": 27340576} {"current_steps": 14220, "total_steps": 40000, "loss": 0.0919, "lr": 3.596496482555269e-05, "epoch": 2.3197650705604045, "percentage": 35.55, "elapsed_time": "3:15:26", "remaining_time": "5:54:18", "throughput": 2332.5, "total_tokens": 27351056} {"current_steps": 14225, "total_steps": 40000, "loss": 0.0787, "lr": 3.595614117982769e-05, "epoch": 2.320580797781222, "percentage": 35.56, "elapsed_time": "3:15:28", "remaining_time": "5:54:10", "throughput": 2332.95, "total_tokens": 27361232} {"current_steps": 14230, "total_steps": 40000, "loss": 0.0187, "lr": 3.594731584452805e-05, "epoch": 2.321396525002039, "percentage": 35.58, "elapsed_time": "3:15:30", "remaining_time": "5:54:02", "throughput": 2333.3, "total_tokens": 27370096} {"current_steps": 14235, "total_steps": 40000, "loss": 0.0075, "lr": 3.593848882101472e-05, "epoch": 2.3222122522228568, "percentage": 35.59, "elapsed_time": "3:15:32", "remaining_time": "5:53:55", "throughput": 2333.72, "total_tokens": 27379840} {"current_steps": 14240, "total_steps": 40000, "loss": 0.0677, "lr": 3.592966011064896e-05, "epoch": 2.323027979443674, "percentage": 35.6, "elapsed_time": "3:15:34", "remaining_time": "5:53:47", "throughput": 2334.08, "total_tokens": 27388928} {"current_steps": 14245, "total_steps": 40000, "loss": 0.1232, "lr": 3.592082971479226e-05, "epoch": 2.3238437066644915, "percentage": 35.61, "elapsed_time": "3:15:36", "remaining_time": "5:53:39", "throughput": 2334.47, "total_tokens": 27398288} {"current_steps": 14250, "total_steps": 40000, "loss": 0.0844, "lr": 3.5911997634806385e-05, "epoch": 2.3246594338853086, "percentage": 35.62, "elapsed_time": "3:15:38", "remaining_time": "5:53:31", "throughput": 2334.97, "total_tokens": 27408960} {"current_steps": 14255, "total_steps": 40000, "loss": 0.2123, "lr": 3.5903163872053336e-05, "epoch": 2.325475161106126, "percentage": 35.64, "elapsed_time": "3:15:40", "remaining_time": "5:53:23", "throughput": 2335.44, "total_tokens": 27419296} {"current_steps": 14260, "total_steps": 40000, "loss": 0.0776, "lr": 3.58943284278954e-05, "epoch": 2.3262908883269433, "percentage": 35.65, "elapsed_time": "3:15:42", "remaining_time": "5:53:15", "throughput": 2335.85, "total_tokens": 27428912} {"current_steps": 14265, "total_steps": 40000, "loss": 0.079, "lr": 3.588549130369512e-05, "epoch": 2.327106615547761, "percentage": 35.66, "elapsed_time": "3:15:44", "remaining_time": "5:53:08", "throughput": 2336.05, "total_tokens": 27436144} {"current_steps": 14270, "total_steps": 40000, "loss": 0.0192, "lr": 3.5876652500815274e-05, "epoch": 2.327922342768578, "percentage": 35.68, "elapsed_time": "3:15:46", "remaining_time": "5:53:00", "throughput": 2336.5, "total_tokens": 27446272} {"current_steps": 14275, "total_steps": 40000, "loss": 0.0749, "lr": 3.586781202061894e-05, "epoch": 2.3287380699893956, "percentage": 35.69, "elapsed_time": "3:15:48", "remaining_time": "5:52:52", "throughput": 2336.88, "total_tokens": 27455536} {"current_steps": 14280, "total_steps": 40000, "loss": 0.0599, "lr": 3.585896986446942e-05, "epoch": 2.3295537972102127, "percentage": 35.7, "elapsed_time": "3:15:50", "remaining_time": "5:52:44", "throughput": 2337.32, "total_tokens": 27465584} {"current_steps": 14285, "total_steps": 40000, "loss": 0.0567, "lr": 3.585012603373028e-05, "epoch": 2.3303695244310303, "percentage": 35.71, "elapsed_time": "3:15:52", "remaining_time": "5:52:36", "throughput": 2337.74, "total_tokens": 27475280} {"current_steps": 14290, "total_steps": 40000, "loss": 0.0143, "lr": 3.584128052976535e-05, "epoch": 2.3311852516518474, "percentage": 35.73, "elapsed_time": "3:15:55", "remaining_time": "5:52:29", "throughput": 2338.23, "total_tokens": 27485888} {"current_steps": 14295, "total_steps": 40000, "loss": 0.0044, "lr": 3.5832433353938724e-05, "epoch": 2.332000978872665, "percentage": 35.74, "elapsed_time": "3:15:57", "remaining_time": "5:52:21", "throughput": 2338.76, "total_tokens": 27496976} {"current_steps": 14300, "total_steps": 40000, "loss": 0.0238, "lr": 3.5823584507614746e-05, "epoch": 2.3328167060934826, "percentage": 35.75, "elapsed_time": "3:15:59", "remaining_time": "5:52:13", "throughput": 2339.07, "total_tokens": 27505424} {"current_steps": 14305, "total_steps": 40000, "loss": 0.0921, "lr": 3.581473399215802e-05, "epoch": 2.3336324333142997, "percentage": 35.76, "elapsed_time": "3:16:01", "remaining_time": "5:52:05", "throughput": 2339.41, "total_tokens": 27514336} {"current_steps": 14310, "total_steps": 40000, "loss": 0.1117, "lr": 3.580588180893341e-05, "epoch": 2.334448160535117, "percentage": 35.77, "elapsed_time": "3:16:03", "remaining_time": "5:51:58", "throughput": 2339.77, "total_tokens": 27523456} {"current_steps": 14315, "total_steps": 40000, "loss": 0.0087, "lr": 3.579702795930602e-05, "epoch": 2.3352638877559344, "percentage": 35.79, "elapsed_time": "3:16:05", "remaining_time": "5:51:50", "throughput": 2340.08, "total_tokens": 27531904} {"current_steps": 14320, "total_steps": 40000, "loss": 0.0591, "lr": 3.578817244464125e-05, "epoch": 2.336079614976752, "percentage": 35.8, "elapsed_time": "3:16:07", "remaining_time": "5:51:42", "throughput": 2340.51, "total_tokens": 27541776} {"current_steps": 14325, "total_steps": 40000, "loss": 0.2779, "lr": 3.577931526630471e-05, "epoch": 2.336895342197569, "percentage": 35.81, "elapsed_time": "3:16:09", "remaining_time": "5:51:34", "throughput": 2340.94, "total_tokens": 27551648} {"current_steps": 14330, "total_steps": 40000, "loss": 0.033, "lr": 3.577045642566229e-05, "epoch": 2.3377110694183862, "percentage": 35.83, "elapsed_time": "3:16:11", "remaining_time": "5:51:26", "throughput": 2341.28, "total_tokens": 27560480} {"current_steps": 14335, "total_steps": 40000, "loss": 0.0463, "lr": 3.576159592408014e-05, "epoch": 2.338526796639204, "percentage": 35.84, "elapsed_time": "3:16:13", "remaining_time": "5:51:19", "throughput": 2341.73, "total_tokens": 27570624} {"current_steps": 14340, "total_steps": 40000, "loss": 0.0771, "lr": 3.575273376292466e-05, "epoch": 2.3393425238600214, "percentage": 35.85, "elapsed_time": "3:16:15", "remaining_time": "5:51:11", "throughput": 2342.12, "total_tokens": 27580128} {"current_steps": 14345, "total_steps": 40000, "loss": 0.0309, "lr": 3.574386994356251e-05, "epoch": 2.3401582510808385, "percentage": 35.86, "elapsed_time": "3:16:17", "remaining_time": "5:51:03", "throughput": 2342.52, "total_tokens": 27589648} {"current_steps": 14350, "total_steps": 40000, "loss": 0.0028, "lr": 3.573500446736059e-05, "epoch": 2.340973978301656, "percentage": 35.88, "elapsed_time": "3:16:19", "remaining_time": "5:50:55", "throughput": 2342.88, "total_tokens": 27598752} {"current_steps": 14355, "total_steps": 40000, "loss": 0.0276, "lr": 3.5726137335686094e-05, "epoch": 2.3417897055224732, "percentage": 35.89, "elapsed_time": "3:16:21", "remaining_time": "5:50:48", "throughput": 2343.24, "total_tokens": 27607808} {"current_steps": 14360, "total_steps": 40000, "loss": 0.2376, "lr": 3.571726854990642e-05, "epoch": 2.342605432743291, "percentage": 35.9, "elapsed_time": "3:16:23", "remaining_time": "5:50:40", "throughput": 2343.74, "total_tokens": 27618560} {"current_steps": 14365, "total_steps": 40000, "loss": 0.0752, "lr": 3.570839811138925e-05, "epoch": 2.343421159964108, "percentage": 35.91, "elapsed_time": "3:16:26", "remaining_time": "5:50:32", "throughput": 2344.29, "total_tokens": 27629888} {"current_steps": 14370, "total_steps": 40000, "loss": 0.163, "lr": 3.569952602150252e-05, "epoch": 2.3442368871849255, "percentage": 35.93, "elapsed_time": "3:16:28", "remaining_time": "5:50:24", "throughput": 2344.7, "total_tokens": 27639568} {"current_steps": 14375, "total_steps": 40000, "loss": 0.0041, "lr": 3.569065228161442e-05, "epoch": 2.3450526144057426, "percentage": 35.94, "elapsed_time": "3:16:30", "remaining_time": "5:50:17", "throughput": 2345.15, "total_tokens": 27649760} {"current_steps": 14380, "total_steps": 40000, "loss": 0.0773, "lr": 3.5681776893093395e-05, "epoch": 2.34586834162656, "percentage": 35.95, "elapsed_time": "3:16:32", "remaining_time": "5:50:09", "throughput": 2345.67, "total_tokens": 27660640} {"current_steps": 14385, "total_steps": 40000, "loss": 0.0518, "lr": 3.5672899857308134e-05, "epoch": 2.3466840688473773, "percentage": 35.96, "elapsed_time": "3:16:34", "remaining_time": "5:50:01", "throughput": 2346.08, "total_tokens": 27670304} {"current_steps": 14390, "total_steps": 40000, "loss": 0.2015, "lr": 3.566402117562759e-05, "epoch": 2.347499796068195, "percentage": 35.98, "elapsed_time": "3:16:36", "remaining_time": "5:49:54", "throughput": 2346.56, "total_tokens": 27680880} {"current_steps": 14395, "total_steps": 40000, "loss": 0.238, "lr": 3.565514084942097e-05, "epoch": 2.348315523289012, "percentage": 35.99, "elapsed_time": "3:16:38", "remaining_time": "5:49:46", "throughput": 2346.94, "total_tokens": 27690128} {"current_steps": 14400, "total_steps": 40000, "loss": 0.1286, "lr": 3.564625888005773e-05, "epoch": 2.3491312505098296, "percentage": 36.0, "elapsed_time": "3:16:40", "remaining_time": "5:49:38", "throughput": 2347.26, "total_tokens": 27698752} {"current_steps": 14400, "total_steps": 40000, "eval_loss": 0.1712968945503235, "epoch": 2.3491312505098296, "percentage": 36.0, "elapsed_time": "3:18:01", "remaining_time": "5:52:01", "throughput": 2331.34, "total_tokens": 27698752} {"current_steps": 14405, "total_steps": 40000, "loss": 0.0054, "lr": 3.563737526890759e-05, "epoch": 2.3499469777306468, "percentage": 36.01, "elapsed_time": "3:18:04", "remaining_time": "5:51:57", "throughput": 2331.39, "total_tokens": 27708048} {"current_steps": 14410, "total_steps": 40000, "loss": 0.2001, "lr": 3.562849001734049e-05, "epoch": 2.3507627049514643, "percentage": 36.02, "elapsed_time": "3:18:06", "remaining_time": "5:51:49", "throughput": 2331.73, "total_tokens": 27716912} {"current_steps": 14415, "total_steps": 40000, "loss": 0.0382, "lr": 3.561960312672667e-05, "epoch": 2.3515784321722815, "percentage": 36.04, "elapsed_time": "3:18:08", "remaining_time": "5:51:41", "throughput": 2332.14, "total_tokens": 27726640} {"current_steps": 14420, "total_steps": 40000, "loss": 0.0511, "lr": 3.5610714598436596e-05, "epoch": 2.352394159393099, "percentage": 36.05, "elapsed_time": "3:18:10", "remaining_time": "5:51:33", "throughput": 2332.61, "total_tokens": 27736976} {"current_steps": 14425, "total_steps": 40000, "loss": 0.0901, "lr": 3.5601824433840986e-05, "epoch": 2.353209886613916, "percentage": 36.06, "elapsed_time": "3:18:13", "remaining_time": "5:51:25", "throughput": 2332.97, "total_tokens": 27746128} {"current_steps": 14430, "total_steps": 40000, "loss": 0.2108, "lr": 3.559293263431082e-05, "epoch": 2.3540256138347337, "percentage": 36.08, "elapsed_time": "3:18:15", "remaining_time": "5:51:18", "throughput": 2333.44, "total_tokens": 27756576} {"current_steps": 14435, "total_steps": 40000, "loss": 0.1485, "lr": 3.558403920121732e-05, "epoch": 2.354841341055551, "percentage": 36.09, "elapsed_time": "3:18:17", "remaining_time": "5:51:10", "throughput": 2333.77, "total_tokens": 27765344} {"current_steps": 14440, "total_steps": 40000, "loss": 0.0356, "lr": 3.557514413593197e-05, "epoch": 2.3556570682763684, "percentage": 36.1, "elapsed_time": "3:18:19", "remaining_time": "5:51:02", "throughput": 2334.25, "total_tokens": 27775824} {"current_steps": 14445, "total_steps": 40000, "loss": 0.035, "lr": 3.55662474398265e-05, "epoch": 2.3564727954971856, "percentage": 36.11, "elapsed_time": "3:18:21", "remaining_time": "5:50:54", "throughput": 2334.63, "total_tokens": 27785184} {"current_steps": 14450, "total_steps": 40000, "loss": 0.1014, "lr": 3.555734911427288e-05, "epoch": 2.357288522718003, "percentage": 36.12, "elapsed_time": "3:18:23", "remaining_time": "5:50:47", "throughput": 2334.92, "total_tokens": 27793456} {"current_steps": 14455, "total_steps": 40000, "loss": 0.0154, "lr": 3.5548449160643363e-05, "epoch": 2.3581042499388203, "percentage": 36.14, "elapsed_time": "3:18:25", "remaining_time": "5:50:39", "throughput": 2335.42, "total_tokens": 27804288} {"current_steps": 14460, "total_steps": 40000, "loss": 0.0554, "lr": 3.553954758031043e-05, "epoch": 2.358919977159638, "percentage": 36.15, "elapsed_time": "3:18:27", "remaining_time": "5:50:31", "throughput": 2335.93, "total_tokens": 27815200} {"current_steps": 14465, "total_steps": 40000, "loss": 0.0953, "lr": 3.5530644374646815e-05, "epoch": 2.359735704380455, "percentage": 36.16, "elapsed_time": "3:18:29", "remaining_time": "5:50:23", "throughput": 2336.35, "total_tokens": 27825072} {"current_steps": 14470, "total_steps": 40000, "loss": 0.1008, "lr": 3.552173954502549e-05, "epoch": 2.3605514316012726, "percentage": 36.18, "elapsed_time": "3:18:31", "remaining_time": "5:50:16", "throughput": 2336.74, "total_tokens": 27834512} {"current_steps": 14475, "total_steps": 40000, "loss": 0.0654, "lr": 3.55128330928197e-05, "epoch": 2.36136715882209, "percentage": 36.19, "elapsed_time": "3:18:33", "remaining_time": "5:50:08", "throughput": 2337.07, "total_tokens": 27843232} {"current_steps": 14480, "total_steps": 40000, "loss": 0.1815, "lr": 3.550392501940294e-05, "epoch": 2.3621828860429073, "percentage": 36.2, "elapsed_time": "3:18:35", "remaining_time": "5:50:00", "throughput": 2337.52, "total_tokens": 27853456} {"current_steps": 14485, "total_steps": 40000, "loss": 0.1092, "lr": 3.5495015326148945e-05, "epoch": 2.3629986132637244, "percentage": 36.21, "elapsed_time": "3:18:37", "remaining_time": "5:49:53", "throughput": 2337.97, "total_tokens": 27863680} {"current_steps": 14490, "total_steps": 40000, "loss": 0.0046, "lr": 3.548610401443169e-05, "epoch": 2.363814340484542, "percentage": 36.23, "elapsed_time": "3:18:39", "remaining_time": "5:49:45", "throughput": 2338.27, "total_tokens": 27872016} {"current_steps": 14495, "total_steps": 40000, "loss": 0.0242, "lr": 3.547719108562543e-05, "epoch": 2.3646300677053596, "percentage": 36.24, "elapsed_time": "3:18:42", "remaining_time": "5:49:37", "throughput": 2338.7, "total_tokens": 27882048} {"current_steps": 14500, "total_steps": 40000, "loss": 0.1447, "lr": 3.546827654110464e-05, "epoch": 2.3654457949261767, "percentage": 36.25, "elapsed_time": "3:18:44", "remaining_time": "5:49:29", "throughput": 2339.05, "total_tokens": 27891040} {"current_steps": 14505, "total_steps": 40000, "loss": 0.119, "lr": 3.545936038224405e-05, "epoch": 2.366261522146994, "percentage": 36.26, "elapsed_time": "3:18:46", "remaining_time": "5:49:22", "throughput": 2339.38, "total_tokens": 27899872} {"current_steps": 14510, "total_steps": 40000, "loss": 0.0121, "lr": 3.545044261041864e-05, "epoch": 2.3670772493678114, "percentage": 36.27, "elapsed_time": "3:18:48", "remaining_time": "5:49:14", "throughput": 2339.73, "total_tokens": 27908848} {"current_steps": 14515, "total_steps": 40000, "loss": 0.022, "lr": 3.5441523227003657e-05, "epoch": 2.367892976588629, "percentage": 36.29, "elapsed_time": "3:18:50", "remaining_time": "5:49:06", "throughput": 2340.16, "total_tokens": 27918800} {"current_steps": 14520, "total_steps": 40000, "loss": 0.0864, "lr": 3.543260223337459e-05, "epoch": 2.368708703809446, "percentage": 36.3, "elapsed_time": "3:18:52", "remaining_time": "5:48:59", "throughput": 2340.58, "total_tokens": 27928688} {"current_steps": 14525, "total_steps": 40000, "loss": 0.0767, "lr": 3.542367963090714e-05, "epoch": 2.3695244310302637, "percentage": 36.31, "elapsed_time": "3:18:54", "remaining_time": "5:48:51", "throughput": 2340.92, "total_tokens": 27937536} {"current_steps": 14530, "total_steps": 40000, "loss": 0.0095, "lr": 3.5414755420977295e-05, "epoch": 2.370340158251081, "percentage": 36.33, "elapsed_time": "3:18:56", "remaining_time": "5:48:43", "throughput": 2341.24, "total_tokens": 27946192} {"current_steps": 14535, "total_steps": 40000, "loss": 0.0162, "lr": 3.54058296049613e-05, "epoch": 2.3711558854718984, "percentage": 36.34, "elapsed_time": "3:18:58", "remaining_time": "5:48:36", "throughput": 2341.52, "total_tokens": 27954336} {"current_steps": 14540, "total_steps": 40000, "loss": 0.0822, "lr": 3.53969021842356e-05, "epoch": 2.3719716126927155, "percentage": 36.35, "elapsed_time": "3:19:00", "remaining_time": "5:48:28", "throughput": 2341.9, "total_tokens": 27963840} {"current_steps": 14545, "total_steps": 40000, "loss": 0.0935, "lr": 3.5387973160176926e-05, "epoch": 2.372787339913533, "percentage": 36.36, "elapsed_time": "3:19:02", "remaining_time": "5:48:20", "throughput": 2342.29, "total_tokens": 27973344} {"current_steps": 14550, "total_steps": 40000, "loss": 0.0913, "lr": 3.537904253416224e-05, "epoch": 2.37360306713435, "percentage": 36.38, "elapsed_time": "3:19:04", "remaining_time": "5:48:13", "throughput": 2342.72, "total_tokens": 27983200} {"current_steps": 14555, "total_steps": 40000, "loss": 0.0871, "lr": 3.537011030756878e-05, "epoch": 2.374418794355168, "percentage": 36.39, "elapsed_time": "3:19:06", "remaining_time": "5:48:05", "throughput": 2343.1, "total_tokens": 27992576} {"current_steps": 14560, "total_steps": 40000, "loss": 0.0719, "lr": 3.536117648177399e-05, "epoch": 2.375234521575985, "percentage": 36.4, "elapsed_time": "3:19:08", "remaining_time": "5:47:57", "throughput": 2343.49, "total_tokens": 28002128} {"current_steps": 14565, "total_steps": 40000, "loss": 0.0729, "lr": 3.535224105815558e-05, "epoch": 2.3760502487968025, "percentage": 36.41, "elapsed_time": "3:19:10", "remaining_time": "5:47:50", "throughput": 2343.93, "total_tokens": 28012160} {"current_steps": 14570, "total_steps": 40000, "loss": 0.1153, "lr": 3.5343304038091494e-05, "epoch": 2.3768659760176196, "percentage": 36.43, "elapsed_time": "3:19:13", "remaining_time": "5:47:42", "throughput": 2344.28, "total_tokens": 28021264} {"current_steps": 14575, "total_steps": 40000, "loss": 0.0856, "lr": 3.5334365422959955e-05, "epoch": 2.377681703238437, "percentage": 36.44, "elapsed_time": "3:19:15", "remaining_time": "5:47:34", "throughput": 2344.73, "total_tokens": 28031472} {"current_steps": 14580, "total_steps": 40000, "loss": 0.0628, "lr": 3.5325425214139396e-05, "epoch": 2.3784974304592543, "percentage": 36.45, "elapsed_time": "3:19:17", "remaining_time": "5:47:27", "throughput": 2345.14, "total_tokens": 28041232} {"current_steps": 14585, "total_steps": 40000, "loss": 0.0806, "lr": 3.531648341300851e-05, "epoch": 2.379313157680072, "percentage": 36.46, "elapsed_time": "3:19:19", "remaining_time": "5:47:19", "throughput": 2345.6, "total_tokens": 28051600} {"current_steps": 14590, "total_steps": 40000, "loss": 0.0804, "lr": 3.530754002094623e-05, "epoch": 2.380128884900889, "percentage": 36.48, "elapsed_time": "3:19:21", "remaining_time": "5:47:11", "throughput": 2346.06, "total_tokens": 28061888} {"current_steps": 14595, "total_steps": 40000, "loss": 0.1169, "lr": 3.529859503933175e-05, "epoch": 2.3809446121217066, "percentage": 36.49, "elapsed_time": "3:19:23", "remaining_time": "5:47:04", "throughput": 2346.51, "total_tokens": 28072160} {"current_steps": 14600, "total_steps": 40000, "loss": 0.0139, "lr": 3.52896484695445e-05, "epoch": 2.3817603393425237, "percentage": 36.5, "elapsed_time": "3:19:25", "remaining_time": "5:46:56", "throughput": 2346.95, "total_tokens": 28082208} {"current_steps": 14600, "total_steps": 40000, "eval_loss": 0.16269946098327637, "epoch": 2.3817603393425237, "percentage": 36.5, "elapsed_time": "3:20:45", "remaining_time": "5:49:16", "throughput": 2331.25, "total_tokens": 28082208} {"current_steps": 14605, "total_steps": 40000, "loss": 0.0048, "lr": 3.528070031296414e-05, "epoch": 2.3825760665633413, "percentage": 36.51, "elapsed_time": "3:20:49", "remaining_time": "5:49:11", "throughput": 2331.34, "total_tokens": 28091728} {"current_steps": 14610, "total_steps": 40000, "loss": 0.0057, "lr": 3.5271750570970605e-05, "epoch": 2.3833917937841584, "percentage": 36.52, "elapsed_time": "3:20:51", "remaining_time": "5:49:04", "throughput": 2331.68, "total_tokens": 28100656} {"current_steps": 14615, "total_steps": 40000, "loss": 0.0045, "lr": 3.526279924494405e-05, "epoch": 2.384207521004976, "percentage": 36.54, "elapsed_time": "3:20:53", "remaining_time": "5:48:56", "throughput": 2331.99, "total_tokens": 28109216} {"current_steps": 14620, "total_steps": 40000, "loss": 0.0042, "lr": 3.5253846336264874e-05, "epoch": 2.385023248225793, "percentage": 36.55, "elapsed_time": "3:20:55", "remaining_time": "5:48:48", "throughput": 2332.38, "total_tokens": 28118752} {"current_steps": 14625, "total_steps": 40000, "loss": 0.0484, "lr": 3.5244891846313736e-05, "epoch": 2.3858389754466107, "percentage": 36.56, "elapsed_time": "3:20:57", "remaining_time": "5:48:40", "throughput": 2332.79, "total_tokens": 28128528} {"current_steps": 14630, "total_steps": 40000, "loss": 0.0392, "lr": 3.5235935776471527e-05, "epoch": 2.386654702667428, "percentage": 36.58, "elapsed_time": "3:20:59", "remaining_time": "5:48:33", "throughput": 2333.28, "total_tokens": 28139280} {"current_steps": 14635, "total_steps": 40000, "loss": 0.1148, "lr": 3.522697812811939e-05, "epoch": 2.3874704298882454, "percentage": 36.59, "elapsed_time": "3:21:02", "remaining_time": "5:48:25", "throughput": 2333.64, "total_tokens": 28148384} {"current_steps": 14640, "total_steps": 40000, "loss": 0.2484, "lr": 3.521801890263871e-05, "epoch": 2.3882861571090626, "percentage": 36.6, "elapsed_time": "3:21:04", "remaining_time": "5:48:17", "throughput": 2334.03, "total_tokens": 28157968} {"current_steps": 14645, "total_steps": 40000, "loss": 0.0639, "lr": 3.5209058101411114e-05, "epoch": 2.38910188432988, "percentage": 36.61, "elapsed_time": "3:21:06", "remaining_time": "5:48:10", "throughput": 2334.44, "total_tokens": 28167712} {"current_steps": 14650, "total_steps": 40000, "loss": 0.0976, "lr": 3.520009572581845e-05, "epoch": 2.3899176115506973, "percentage": 36.62, "elapsed_time": "3:21:08", "remaining_time": "5:48:02", "throughput": 2334.87, "total_tokens": 28177680} {"current_steps": 14655, "total_steps": 40000, "loss": 0.0814, "lr": 3.519113177724285e-05, "epoch": 2.390733338771515, "percentage": 36.64, "elapsed_time": "3:21:10", "remaining_time": "5:47:54", "throughput": 2335.29, "total_tokens": 28187664} {"current_steps": 14660, "total_steps": 40000, "loss": 0.1131, "lr": 3.5182166257066656e-05, "epoch": 2.391549065992332, "percentage": 36.65, "elapsed_time": "3:21:12", "remaining_time": "5:47:47", "throughput": 2335.75, "total_tokens": 28197968} {"current_steps": 14665, "total_steps": 40000, "loss": 0.1239, "lr": 3.517319916667247e-05, "epoch": 2.3923647932131495, "percentage": 36.66, "elapsed_time": "3:21:14", "remaining_time": "5:47:39", "throughput": 2336.11, "total_tokens": 28207216} {"current_steps": 14670, "total_steps": 40000, "loss": 0.1441, "lr": 3.516423050744313e-05, "epoch": 2.393180520433967, "percentage": 36.68, "elapsed_time": "3:21:16", "remaining_time": "5:47:31", "throughput": 2336.63, "total_tokens": 28218320} {"current_steps": 14675, "total_steps": 40000, "loss": 0.0672, "lr": 3.5155260280761704e-05, "epoch": 2.3939962476547842, "percentage": 36.69, "elapsed_time": "3:21:18", "remaining_time": "5:47:24", "throughput": 2337.08, "total_tokens": 28228512} {"current_steps": 14680, "total_steps": 40000, "loss": 0.0984, "lr": 3.514628848801154e-05, "epoch": 2.3948119748756014, "percentage": 36.7, "elapsed_time": "3:21:20", "remaining_time": "5:47:16", "throughput": 2337.41, "total_tokens": 28237408} {"current_steps": 14685, "total_steps": 40000, "loss": 0.0494, "lr": 3.5137315130576174e-05, "epoch": 2.395627702096419, "percentage": 36.71, "elapsed_time": "3:21:22", "remaining_time": "5:47:08", "throughput": 2337.91, "total_tokens": 28248288} {"current_steps": 14690, "total_steps": 40000, "loss": 0.225, "lr": 3.512834020983942e-05, "epoch": 2.3964434293172365, "percentage": 36.73, "elapsed_time": "3:21:24", "remaining_time": "5:47:01", "throughput": 2338.28, "total_tokens": 28257600} {"current_steps": 14695, "total_steps": 40000, "loss": 0.1128, "lr": 3.5119363727185334e-05, "epoch": 2.3972591565380537, "percentage": 36.74, "elapsed_time": "3:21:26", "remaining_time": "5:46:53", "throughput": 2338.73, "total_tokens": 28267840} {"current_steps": 14700, "total_steps": 40000, "loss": 0.1977, "lr": 3.511038568399819e-05, "epoch": 2.398074883758871, "percentage": 36.75, "elapsed_time": "3:21:28", "remaining_time": "5:46:46", "throughput": 2339.08, "total_tokens": 28276880} {"current_steps": 14705, "total_steps": 40000, "loss": 0.037, "lr": 3.510140608166251e-05, "epoch": 2.3988906109796884, "percentage": 36.76, "elapsed_time": "3:21:30", "remaining_time": "5:46:38", "throughput": 2339.41, "total_tokens": 28285776} {"current_steps": 14710, "total_steps": 40000, "loss": 0.0771, "lr": 3.509242492156308e-05, "epoch": 2.399706338200506, "percentage": 36.78, "elapsed_time": "3:21:33", "remaining_time": "5:46:30", "throughput": 2339.86, "total_tokens": 28296048} {"current_steps": 14715, "total_steps": 40000, "loss": 0.0565, "lr": 3.5083442205084896e-05, "epoch": 2.400522065421323, "percentage": 36.79, "elapsed_time": "3:21:35", "remaining_time": "5:46:23", "throughput": 2340.25, "total_tokens": 28305616} {"current_steps": 14720, "total_steps": 40000, "loss": 0.1427, "lr": 3.507445793361321e-05, "epoch": 2.4013377926421406, "percentage": 36.8, "elapsed_time": "3:21:37", "remaining_time": "5:46:15", "throughput": 2340.64, "total_tokens": 28315056} {"current_steps": 14725, "total_steps": 40000, "loss": 0.0096, "lr": 3.5065472108533505e-05, "epoch": 2.4021535198629578, "percentage": 36.81, "elapsed_time": "3:21:39", "remaining_time": "5:46:07", "throughput": 2341.07, "total_tokens": 28325104} {"current_steps": 14730, "total_steps": 40000, "loss": 0.1154, "lr": 3.5056484731231504e-05, "epoch": 2.4029692470837754, "percentage": 36.83, "elapsed_time": "3:21:41", "remaining_time": "5:46:00", "throughput": 2341.44, "total_tokens": 28335040} {"current_steps": 14735, "total_steps": 40000, "loss": 0.0476, "lr": 3.504749580309319e-05, "epoch": 2.4037849743045925, "percentage": 36.84, "elapsed_time": "3:21:43", "remaining_time": "5:45:53", "throughput": 2341.81, "total_tokens": 28344352} {"current_steps": 14740, "total_steps": 40000, "loss": 0.114, "lr": 3.5038505325504753e-05, "epoch": 2.40460070152541, "percentage": 36.85, "elapsed_time": "3:21:45", "remaining_time": "5:45:45", "throughput": 2342.21, "total_tokens": 28354064} {"current_steps": 14745, "total_steps": 40000, "loss": 0.0622, "lr": 3.502951329985264e-05, "epoch": 2.405416428746227, "percentage": 36.86, "elapsed_time": "3:21:47", "remaining_time": "5:45:37", "throughput": 2342.57, "total_tokens": 28363168} {"current_steps": 14750, "total_steps": 40000, "loss": 0.0288, "lr": 3.502051972752354e-05, "epoch": 2.4062321559670448, "percentage": 36.88, "elapsed_time": "3:21:49", "remaining_time": "5:45:30", "throughput": 2342.94, "total_tokens": 28372544} {"current_steps": 14755, "total_steps": 40000, "loss": 0.087, "lr": 3.5011524609904374e-05, "epoch": 2.407047883187862, "percentage": 36.89, "elapsed_time": "3:21:51", "remaining_time": "5:45:22", "throughput": 2343.42, "total_tokens": 28383152} {"current_steps": 14760, "total_steps": 40000, "loss": 0.0874, "lr": 3.50025279483823e-05, "epoch": 2.4078636104086795, "percentage": 36.9, "elapsed_time": "3:21:53", "remaining_time": "5:45:15", "throughput": 2343.72, "total_tokens": 28391712} {"current_steps": 14765, "total_steps": 40000, "loss": 0.1544, "lr": 3.499352974434472e-05, "epoch": 2.4086793376294966, "percentage": 36.91, "elapsed_time": "3:21:55", "remaining_time": "5:45:07", "throughput": 2344.11, "total_tokens": 28401168} {"current_steps": 14770, "total_steps": 40000, "loss": 0.1815, "lr": 3.498452999917926e-05, "epoch": 2.409495064850314, "percentage": 36.93, "elapsed_time": "3:21:58", "remaining_time": "5:44:59", "throughput": 2344.46, "total_tokens": 28410304} {"current_steps": 14775, "total_steps": 40000, "loss": 0.047, "lr": 3.4975528714273795e-05, "epoch": 2.4103107920711313, "percentage": 36.94, "elapsed_time": "3:22:00", "remaining_time": "5:44:52", "throughput": 2344.88, "total_tokens": 28420192} {"current_steps": 14780, "total_steps": 40000, "loss": 0.0187, "lr": 3.4966525891016454e-05, "epoch": 2.411126519291949, "percentage": 36.95, "elapsed_time": "3:22:02", "remaining_time": "5:44:44", "throughput": 2345.36, "total_tokens": 28430896} {"current_steps": 14785, "total_steps": 40000, "loss": 0.0259, "lr": 3.495752153079557e-05, "epoch": 2.411942246512766, "percentage": 36.96, "elapsed_time": "3:22:04", "remaining_time": "5:44:37", "throughput": 2345.8, "total_tokens": 28441104} {"current_steps": 14790, "total_steps": 40000, "loss": 0.0677, "lr": 3.494851563499974e-05, "epoch": 2.4127579737335836, "percentage": 36.98, "elapsed_time": "3:22:06", "remaining_time": "5:44:29", "throughput": 2346.13, "total_tokens": 28450000} {"current_steps": 14795, "total_steps": 40000, "loss": 0.0062, "lr": 3.493950820501777e-05, "epoch": 2.4135737009544007, "percentage": 36.99, "elapsed_time": "3:22:08", "remaining_time": "5:44:22", "throughput": 2346.45, "total_tokens": 28458672} {"current_steps": 14800, "total_steps": 40000, "loss": 0.163, "lr": 3.493049924223872e-05, "epoch": 2.4143894281752183, "percentage": 37.0, "elapsed_time": "3:22:10", "remaining_time": "5:44:14", "throughput": 2346.87, "total_tokens": 28468576} {"current_steps": 14800, "total_steps": 40000, "eval_loss": 0.15688742697238922, "epoch": 2.4143894281752183, "percentage": 37.0, "elapsed_time": "3:23:31", "remaining_time": "5:46:31", "throughput": 2331.37, "total_tokens": 28468576} {"current_steps": 14805, "total_steps": 40000, "loss": 0.1416, "lr": 3.49214887480519e-05, "epoch": 2.4152051553960354, "percentage": 37.01, "elapsed_time": "3:23:34", "remaining_time": "5:46:26", "throughput": 2331.45, "total_tokens": 28477920} {"current_steps": 14810, "total_steps": 40000, "loss": 0.1079, "lr": 3.4912476723846834e-05, "epoch": 2.416020882616853, "percentage": 37.03, "elapsed_time": "3:23:36", "remaining_time": "5:46:19", "throughput": 2331.78, "total_tokens": 28486848} {"current_steps": 14815, "total_steps": 40000, "loss": 0.0695, "lr": 3.490346317101328e-05, "epoch": 2.41683660983767, "percentage": 37.04, "elapsed_time": "3:23:38", "remaining_time": "5:46:11", "throughput": 2332.19, "total_tokens": 28496624} {"current_steps": 14820, "total_steps": 40000, "loss": 0.1091, "lr": 3.4894448090941266e-05, "epoch": 2.4176523370584877, "percentage": 37.05, "elapsed_time": "3:23:40", "remaining_time": "5:46:03", "throughput": 2332.56, "total_tokens": 28506032} {"current_steps": 14825, "total_steps": 40000, "loss": 0.0578, "lr": 3.488543148502101e-05, "epoch": 2.418468064279305, "percentage": 37.06, "elapsed_time": "3:23:42", "remaining_time": "5:45:56", "throughput": 2332.98, "total_tokens": 28515936} {"current_steps": 14830, "total_steps": 40000, "loss": 0.0413, "lr": 3.487641335464299e-05, "epoch": 2.4192837915001224, "percentage": 37.08, "elapsed_time": "3:23:45", "remaining_time": "5:45:48", "throughput": 2333.22, "total_tokens": 28523696} {"current_steps": 14835, "total_steps": 40000, "loss": 0.1145, "lr": 3.4867393701197914e-05, "epoch": 2.4200995187209395, "percentage": 37.09, "elapsed_time": "3:23:47", "remaining_time": "5:45:41", "throughput": 2333.7, "total_tokens": 28534384} {"current_steps": 14840, "total_steps": 40000, "loss": 0.0277, "lr": 3.485837252607673e-05, "epoch": 2.420915245941757, "percentage": 37.1, "elapsed_time": "3:23:49", "remaining_time": "5:45:33", "throughput": 2334.13, "total_tokens": 28544432} {"current_steps": 14845, "total_steps": 40000, "loss": 0.0924, "lr": 3.4849349830670615e-05, "epoch": 2.4217309731625742, "percentage": 37.11, "elapsed_time": "3:23:51", "remaining_time": "5:45:25", "throughput": 2334.51, "total_tokens": 28553872} {"current_steps": 14850, "total_steps": 40000, "loss": 0.0279, "lr": 3.4840325616370976e-05, "epoch": 2.422546700383392, "percentage": 37.12, "elapsed_time": "3:23:53", "remaining_time": "5:45:18", "throughput": 2334.85, "total_tokens": 28562848} {"current_steps": 14855, "total_steps": 40000, "loss": 0.05, "lr": 3.483129988456947e-05, "epoch": 2.423362427604209, "percentage": 37.14, "elapsed_time": "3:23:55", "remaining_time": "5:45:10", "throughput": 2335.19, "total_tokens": 28571840} {"current_steps": 14860, "total_steps": 40000, "loss": 0.0305, "lr": 3.482227263665797e-05, "epoch": 2.4241781548250265, "percentage": 37.15, "elapsed_time": "3:23:57", "remaining_time": "5:45:03", "throughput": 2335.49, "total_tokens": 28580336} {"current_steps": 14865, "total_steps": 40000, "loss": 0.0531, "lr": 3.48132438740286e-05, "epoch": 2.424993882045844, "percentage": 37.16, "elapsed_time": "3:23:59", "remaining_time": "5:44:55", "throughput": 2335.99, "total_tokens": 28591280} {"current_steps": 14870, "total_steps": 40000, "loss": 0.0713, "lr": 3.48042135980737e-05, "epoch": 2.4258096092666612, "percentage": 37.18, "elapsed_time": "3:24:01", "remaining_time": "5:44:47", "throughput": 2336.37, "total_tokens": 28600768} {"current_steps": 14875, "total_steps": 40000, "loss": 0.1477, "lr": 3.479518181018586e-05, "epoch": 2.4266253364874784, "percentage": 37.19, "elapsed_time": "3:24:03", "remaining_time": "5:44:40", "throughput": 2336.77, "total_tokens": 28610544} {"current_steps": 14880, "total_steps": 40000, "loss": 0.0488, "lr": 3.4786148511757886e-05, "epoch": 2.427441063708296, "percentage": 37.2, "elapsed_time": "3:24:05", "remaining_time": "5:44:32", "throughput": 2337.24, "total_tokens": 28621136} {"current_steps": 14885, "total_steps": 40000, "loss": 0.0135, "lr": 3.477711370418284e-05, "epoch": 2.4282567909291135, "percentage": 37.21, "elapsed_time": "3:24:07", "remaining_time": "5:44:25", "throughput": 2337.78, "total_tokens": 28632544} {"current_steps": 14890, "total_steps": 40000, "loss": 0.0779, "lr": 3.476807738885399e-05, "epoch": 2.4290725181499306, "percentage": 37.23, "elapsed_time": "3:24:09", "remaining_time": "5:44:17", "throughput": 2338.19, "total_tokens": 28642384} {"current_steps": 14895, "total_steps": 40000, "loss": 0.1253, "lr": 3.475903956716485e-05, "epoch": 2.429888245370748, "percentage": 37.24, "elapsed_time": "3:24:11", "remaining_time": "5:44:10", "throughput": 2338.55, "total_tokens": 28651632} {"current_steps": 14900, "total_steps": 40000, "loss": 0.1792, "lr": 3.475000024050917e-05, "epoch": 2.4307039725915653, "percentage": 37.25, "elapsed_time": "3:24:13", "remaining_time": "5:44:02", "throughput": 2338.9, "total_tokens": 28660800} {"current_steps": 14905, "total_steps": 40000, "loss": 0.0847, "lr": 3.4740959410280926e-05, "epoch": 2.431519699812383, "percentage": 37.26, "elapsed_time": "3:24:16", "remaining_time": "5:43:55", "throughput": 2339.25, "total_tokens": 28669904} {"current_steps": 14910, "total_steps": 40000, "loss": 0.1835, "lr": 3.4731917077874324e-05, "epoch": 2.4323354270332, "percentage": 37.28, "elapsed_time": "3:24:18", "remaining_time": "5:43:47", "throughput": 2339.79, "total_tokens": 28681392} {"current_steps": 14915, "total_steps": 40000, "loss": 0.1095, "lr": 3.4722873244683816e-05, "epoch": 2.4331511542540176, "percentage": 37.29, "elapsed_time": "3:24:20", "remaining_time": "5:43:39", "throughput": 2340.25, "total_tokens": 28691824} {"current_steps": 14920, "total_steps": 40000, "loss": 0.0659, "lr": 3.4713827912104065e-05, "epoch": 2.4339668814748348, "percentage": 37.3, "elapsed_time": "3:24:22", "remaining_time": "5:43:32", "throughput": 2340.66, "total_tokens": 28701680} {"current_steps": 14925, "total_steps": 40000, "loss": 0.0605, "lr": 3.470478108152998e-05, "epoch": 2.4347826086956523, "percentage": 37.31, "elapsed_time": "3:24:24", "remaining_time": "5:43:24", "throughput": 2341.0, "total_tokens": 28710688} {"current_steps": 14930, "total_steps": 40000, "loss": 0.1269, "lr": 3.4695732754356695e-05, "epoch": 2.4355983359164695, "percentage": 37.33, "elapsed_time": "3:24:26", "remaining_time": "5:43:17", "throughput": 2341.44, "total_tokens": 28720976} {"current_steps": 14935, "total_steps": 40000, "loss": 0.0564, "lr": 3.4686682931979576e-05, "epoch": 2.436414063137287, "percentage": 37.34, "elapsed_time": "3:24:28", "remaining_time": "5:43:09", "throughput": 2341.9, "total_tokens": 28731488} {"current_steps": 14940, "total_steps": 40000, "loss": 0.0059, "lr": 3.467763161579422e-05, "epoch": 2.437229790358104, "percentage": 37.35, "elapsed_time": "3:24:30", "remaining_time": "5:43:02", "throughput": 2342.21, "total_tokens": 28740064} {"current_steps": 14945, "total_steps": 40000, "loss": 0.0203, "lr": 3.466857880719645e-05, "epoch": 2.4380455175789217, "percentage": 37.36, "elapsed_time": "3:24:32", "remaining_time": "5:42:54", "throughput": 2342.52, "total_tokens": 28748752} {"current_steps": 14950, "total_steps": 40000, "loss": 0.01, "lr": 3.465952450758233e-05, "epoch": 2.438861244799739, "percentage": 37.38, "elapsed_time": "3:24:34", "remaining_time": "5:42:47", "throughput": 2342.91, "total_tokens": 28758384} {"current_steps": 14955, "total_steps": 40000, "loss": 0.1486, "lr": 3.4650468718348126e-05, "epoch": 2.4396769720205564, "percentage": 37.39, "elapsed_time": "3:24:36", "remaining_time": "5:42:39", "throughput": 2343.36, "total_tokens": 28768704} {"current_steps": 14960, "total_steps": 40000, "loss": 0.0557, "lr": 3.464141144089038e-05, "epoch": 2.4404926992413736, "percentage": 37.4, "elapsed_time": "3:24:38", "remaining_time": "5:42:32", "throughput": 2343.67, "total_tokens": 28777408} {"current_steps": 14965, "total_steps": 40000, "loss": 0.0029, "lr": 3.463235267660583e-05, "epoch": 2.441308426462191, "percentage": 37.41, "elapsed_time": "3:24:40", "remaining_time": "5:42:24", "throughput": 2344.1, "total_tokens": 28787456} {"current_steps": 14970, "total_steps": 40000, "loss": 0.1521, "lr": 3.462329242689145e-05, "epoch": 2.4421241536830083, "percentage": 37.43, "elapsed_time": "3:24:42", "remaining_time": "5:42:17", "throughput": 2344.52, "total_tokens": 28797504} {"current_steps": 14975, "total_steps": 40000, "loss": 0.258, "lr": 3.461423069314444e-05, "epoch": 2.442939880903826, "percentage": 37.44, "elapsed_time": "3:24:44", "remaining_time": "5:42:09", "throughput": 2344.97, "total_tokens": 28807824} {"current_steps": 14980, "total_steps": 40000, "loss": 0.0622, "lr": 3.460516747676224e-05, "epoch": 2.443755608124643, "percentage": 37.45, "elapsed_time": "3:24:47", "remaining_time": "5:42:02", "throughput": 2345.36, "total_tokens": 28817456} {"current_steps": 14985, "total_steps": 40000, "loss": 0.1036, "lr": 3.459610277914251e-05, "epoch": 2.4445713353454606, "percentage": 37.46, "elapsed_time": "3:24:49", "remaining_time": "5:41:54", "throughput": 2345.8, "total_tokens": 28827680} {"current_steps": 14990, "total_steps": 40000, "loss": 0.0206, "lr": 3.458703660168314e-05, "epoch": 2.4453870625662777, "percentage": 37.48, "elapsed_time": "3:24:51", "remaining_time": "5:41:47", "throughput": 2346.2, "total_tokens": 28837456} {"current_steps": 14995, "total_steps": 40000, "loss": 0.158, "lr": 3.457796894578224e-05, "epoch": 2.4462027897870953, "percentage": 37.49, "elapsed_time": "3:24:53", "remaining_time": "5:41:39", "throughput": 2346.59, "total_tokens": 28847136} {"current_steps": 15000, "total_steps": 40000, "loss": 0.0111, "lr": 3.456889981283817e-05, "epoch": 2.4470185170079124, "percentage": 37.5, "elapsed_time": "3:24:55", "remaining_time": "5:41:32", "throughput": 2346.94, "total_tokens": 28856272} {"current_steps": 15000, "total_steps": 40000, "eval_loss": 0.15830248594284058, "epoch": 2.4470185170079124, "percentage": 37.5, "elapsed_time": "3:26:15", "remaining_time": "5:43:46", "throughput": 2331.66, "total_tokens": 28856272} {"current_steps": 15005, "total_steps": 40000, "loss": 0.1665, "lr": 3.45598292042495e-05, "epoch": 2.44783424422873, "percentage": 37.51, "elapsed_time": "3:26:19", "remaining_time": "5:43:41", "throughput": 2331.65, "total_tokens": 28864672} {"current_steps": 15010, "total_steps": 40000, "loss": 0.1849, "lr": 3.4550757121415035e-05, "epoch": 2.448649971449547, "percentage": 37.52, "elapsed_time": "3:26:21", "remaining_time": "5:43:33", "throughput": 2331.91, "total_tokens": 28872784} {"current_steps": 15015, "total_steps": 40000, "loss": 0.0111, "lr": 3.454168356573378e-05, "epoch": 2.4494656986703647, "percentage": 37.54, "elapsed_time": "3:26:23", "remaining_time": "5:43:26", "throughput": 2332.29, "total_tokens": 28882320} {"current_steps": 15020, "total_steps": 40000, "loss": 0.0763, "lr": 3.453260853860503e-05, "epoch": 2.450281425891182, "percentage": 37.55, "elapsed_time": "3:26:25", "remaining_time": "5:43:18", "throughput": 2332.63, "total_tokens": 28891296} {"current_steps": 15025, "total_steps": 40000, "loss": 0.1252, "lr": 3.452353204142824e-05, "epoch": 2.4510971531119994, "percentage": 37.56, "elapsed_time": "3:26:27", "remaining_time": "5:43:11", "throughput": 2332.97, "total_tokens": 28900304} {"current_steps": 15030, "total_steps": 40000, "loss": 0.0785, "lr": 3.4514454075603136e-05, "epoch": 2.4519128803328165, "percentage": 37.57, "elapsed_time": "3:26:29", "remaining_time": "5:43:03", "throughput": 2333.32, "total_tokens": 28909440} {"current_steps": 15035, "total_steps": 40000, "loss": 0.1721, "lr": 3.450537464252964e-05, "epoch": 2.452728607553634, "percentage": 37.59, "elapsed_time": "3:26:31", "remaining_time": "5:42:56", "throughput": 2333.69, "total_tokens": 28918928} {"current_steps": 15040, "total_steps": 40000, "loss": 0.0174, "lr": 3.4496293743607925e-05, "epoch": 2.4535443347744517, "percentage": 37.6, "elapsed_time": "3:26:33", "remaining_time": "5:42:48", "throughput": 2334.07, "total_tokens": 28928480} {"current_steps": 15045, "total_steps": 40000, "loss": 0.0115, "lr": 3.448721138023838e-05, "epoch": 2.454360061995269, "percentage": 37.61, "elapsed_time": "3:26:36", "remaining_time": "5:42:41", "throughput": 2334.4, "total_tokens": 28937312} {"current_steps": 15050, "total_steps": 40000, "loss": 0.2322, "lr": 3.447812755382162e-05, "epoch": 2.455175789216086, "percentage": 37.62, "elapsed_time": "3:26:38", "remaining_time": "5:42:33", "throughput": 2334.85, "total_tokens": 28947760} {"current_steps": 15055, "total_steps": 40000, "loss": 0.199, "lr": 3.446904226575847e-05, "epoch": 2.4559915164369035, "percentage": 37.64, "elapsed_time": "3:26:40", "remaining_time": "5:42:26", "throughput": 2335.28, "total_tokens": 28957920} {"current_steps": 15060, "total_steps": 40000, "loss": 0.1251, "lr": 3.445995551745002e-05, "epoch": 2.456807243657721, "percentage": 37.65, "elapsed_time": "3:26:42", "remaining_time": "5:42:18", "throughput": 2335.63, "total_tokens": 28967136} {"current_steps": 15065, "total_steps": 40000, "loss": 0.0217, "lr": 3.445086731029753e-05, "epoch": 2.457622970878538, "percentage": 37.66, "elapsed_time": "3:26:44", "remaining_time": "5:42:11", "throughput": 2335.98, "total_tokens": 28976208} {"current_steps": 15070, "total_steps": 40000, "loss": 0.0201, "lr": 3.444177764570255e-05, "epoch": 2.4584386980993553, "percentage": 37.67, "elapsed_time": "3:26:46", "remaining_time": "5:42:03", "throughput": 2336.43, "total_tokens": 28986656} {"current_steps": 15075, "total_steps": 40000, "loss": 0.1715, "lr": 3.44326865250668e-05, "epoch": 2.459254425320173, "percentage": 37.69, "elapsed_time": "3:26:48", "remaining_time": "5:41:56", "throughput": 2336.87, "total_tokens": 28996928} {"current_steps": 15080, "total_steps": 40000, "loss": 0.0814, "lr": 3.442359394979225e-05, "epoch": 2.4600701525409905, "percentage": 37.7, "elapsed_time": "3:26:50", "remaining_time": "5:41:48", "throughput": 2337.29, "total_tokens": 29007024} {"current_steps": 15085, "total_steps": 40000, "loss": 0.1125, "lr": 3.441449992128108e-05, "epoch": 2.4608858797618076, "percentage": 37.71, "elapsed_time": "3:26:52", "remaining_time": "5:41:41", "throughput": 2337.66, "total_tokens": 29016480} {"current_steps": 15090, "total_steps": 40000, "loss": 0.1089, "lr": 3.440540444093573e-05, "epoch": 2.461701606982625, "percentage": 37.72, "elapsed_time": "3:26:54", "remaining_time": "5:41:33", "throughput": 2337.95, "total_tokens": 29024832} {"current_steps": 15095, "total_steps": 40000, "loss": 0.0256, "lr": 3.43963075101588e-05, "epoch": 2.4625173342034423, "percentage": 37.74, "elapsed_time": "3:26:56", "remaining_time": "5:41:26", "throughput": 2338.3, "total_tokens": 29034064} {"current_steps": 15100, "total_steps": 40000, "loss": 0.01, "lr": 3.438720913035318e-05, "epoch": 2.46333306142426, "percentage": 37.75, "elapsed_time": "3:26:58", "remaining_time": "5:41:18", "throughput": 2338.72, "total_tokens": 29044064} {"current_steps": 15105, "total_steps": 40000, "loss": 0.0834, "lr": 3.437810930292195e-05, "epoch": 2.464148788645077, "percentage": 37.76, "elapsed_time": "3:27:00", "remaining_time": "5:41:11", "throughput": 2339.13, "total_tokens": 29053968} {"current_steps": 15110, "total_steps": 40000, "loss": 0.0407, "lr": 3.43690080292684e-05, "epoch": 2.4649645158658946, "percentage": 37.77, "elapsed_time": "3:27:02", "remaining_time": "5:41:03", "throughput": 2339.4, "total_tokens": 29062144} {"current_steps": 15115, "total_steps": 40000, "loss": 0.078, "lr": 3.435990531079608e-05, "epoch": 2.4657802430867117, "percentage": 37.79, "elapsed_time": "3:27:04", "remaining_time": "5:40:56", "throughput": 2339.69, "total_tokens": 29070672} {"current_steps": 15120, "total_steps": 40000, "loss": 0.0288, "lr": 3.435080114890874e-05, "epoch": 2.4665959703075293, "percentage": 37.8, "elapsed_time": "3:27:07", "remaining_time": "5:40:48", "throughput": 2340.14, "total_tokens": 29081072} {"current_steps": 15125, "total_steps": 40000, "loss": 0.2502, "lr": 3.434169554501035e-05, "epoch": 2.4674116975283464, "percentage": 37.81, "elapsed_time": "3:27:09", "remaining_time": "5:40:41", "throughput": 2340.49, "total_tokens": 29090192} {"current_steps": 15130, "total_steps": 40000, "loss": 0.1559, "lr": 3.433258850050511e-05, "epoch": 2.468227424749164, "percentage": 37.82, "elapsed_time": "3:27:11", "remaining_time": "5:40:33", "throughput": 2340.92, "total_tokens": 29100416} {"current_steps": 15135, "total_steps": 40000, "loss": 0.0939, "lr": 3.4323480016797446e-05, "epoch": 2.469043151969981, "percentage": 37.84, "elapsed_time": "3:27:13", "remaining_time": "5:40:26", "throughput": 2341.25, "total_tokens": 29109328} {"current_steps": 15140, "total_steps": 40000, "loss": 0.0972, "lr": 3.4314370095291995e-05, "epoch": 2.4698588791907987, "percentage": 37.85, "elapsed_time": "3:27:15", "remaining_time": "5:40:18", "throughput": 2341.68, "total_tokens": 29119552} {"current_steps": 15145, "total_steps": 40000, "loss": 0.1656, "lr": 3.430525873739363e-05, "epoch": 2.470674606411616, "percentage": 37.86, "elapsed_time": "3:27:17", "remaining_time": "5:40:11", "throughput": 2342.02, "total_tokens": 29128656} {"current_steps": 15150, "total_steps": 40000, "loss": 0.0495, "lr": 3.429614594450743e-05, "epoch": 2.4714903336324334, "percentage": 37.88, "elapsed_time": "3:27:19", "remaining_time": "5:40:04", "throughput": 2342.36, "total_tokens": 29137760} {"current_steps": 15155, "total_steps": 40000, "loss": 0.0746, "lr": 3.428703171803869e-05, "epoch": 2.4723060608532506, "percentage": 37.89, "elapsed_time": "3:27:21", "remaining_time": "5:39:56", "throughput": 2342.64, "total_tokens": 29146064} {"current_steps": 15160, "total_steps": 40000, "loss": 0.1269, "lr": 3.4277916059392964e-05, "epoch": 2.473121788074068, "percentage": 37.9, "elapsed_time": "3:27:23", "remaining_time": "5:39:49", "throughput": 2343.09, "total_tokens": 29156528} {"current_steps": 15165, "total_steps": 40000, "loss": 0.0643, "lr": 3.426879896997598e-05, "epoch": 2.4739375152948853, "percentage": 37.91, "elapsed_time": "3:27:25", "remaining_time": "5:39:41", "throughput": 2343.46, "total_tokens": 29165936} {"current_steps": 15170, "total_steps": 40000, "loss": 0.0931, "lr": 3.425968045119372e-05, "epoch": 2.474753242515703, "percentage": 37.92, "elapsed_time": "3:27:27", "remaining_time": "5:39:34", "throughput": 2343.84, "total_tokens": 29175536} {"current_steps": 15175, "total_steps": 40000, "loss": 0.0456, "lr": 3.425056050445237e-05, "epoch": 2.47556896973652, "percentage": 37.94, "elapsed_time": "3:27:29", "remaining_time": "5:39:26", "throughput": 2344.21, "total_tokens": 29184928} {"current_steps": 15180, "total_steps": 40000, "loss": 0.0559, "lr": 3.4241439131158336e-05, "epoch": 2.4763846969573375, "percentage": 37.95, "elapsed_time": "3:27:31", "remaining_time": "5:39:19", "throughput": 2344.65, "total_tokens": 29195232} {"current_steps": 15185, "total_steps": 40000, "loss": 0.0176, "lr": 3.423231633271825e-05, "epoch": 2.4772004241781547, "percentage": 37.96, "elapsed_time": "3:27:33", "remaining_time": "5:39:11", "throughput": 2345.06, "total_tokens": 29205248} {"current_steps": 15190, "total_steps": 40000, "loss": 0.085, "lr": 3.4223192110538985e-05, "epoch": 2.4780161513989722, "percentage": 37.97, "elapsed_time": "3:27:36", "remaining_time": "5:39:04", "throughput": 2345.52, "total_tokens": 29215808} {"current_steps": 15195, "total_steps": 40000, "loss": 0.1772, "lr": 3.4214066466027575e-05, "epoch": 2.4788318786197894, "percentage": 37.99, "elapsed_time": "3:27:38", "remaining_time": "5:38:57", "throughput": 2345.95, "total_tokens": 29225952} {"current_steps": 15200, "total_steps": 40000, "loss": 0.1074, "lr": 3.4204939400591325e-05, "epoch": 2.479647605840607, "percentage": 38.0, "elapsed_time": "3:27:40", "remaining_time": "5:38:49", "throughput": 2346.26, "total_tokens": 29234704} {"current_steps": 15200, "total_steps": 40000, "eval_loss": 0.1568540632724762, "epoch": 2.479647605840607, "percentage": 38.0, "elapsed_time": "3:29:00", "remaining_time": "5:41:01", "throughput": 2331.16, "total_tokens": 29234704} {"current_steps": 15205, "total_steps": 40000, "loss": 0.1366, "lr": 3.419581091563775e-05, "epoch": 2.480463333061424, "percentage": 38.01, "elapsed_time": "3:29:04", "remaining_time": "5:40:56", "throughput": 2331.21, "total_tokens": 29244176} {"current_steps": 15210, "total_steps": 40000, "loss": 0.1914, "lr": 3.418668101257456e-05, "epoch": 2.4812790602822417, "percentage": 38.02, "elapsed_time": "3:29:06", "remaining_time": "5:40:49", "throughput": 2331.55, "total_tokens": 29253280} {"current_steps": 15215, "total_steps": 40000, "loss": 0.1228, "lr": 3.417754969280971e-05, "epoch": 2.482094787503059, "percentage": 38.04, "elapsed_time": "3:29:08", "remaining_time": "5:40:41", "throughput": 2331.95, "total_tokens": 29263024} {"current_steps": 15220, "total_steps": 40000, "loss": 0.1023, "lr": 3.416841695775137e-05, "epoch": 2.4829105147238764, "percentage": 38.05, "elapsed_time": "3:29:10", "remaining_time": "5:40:34", "throughput": 2332.32, "total_tokens": 29272528} {"current_steps": 15225, "total_steps": 40000, "loss": 0.0857, "lr": 3.415928280880792e-05, "epoch": 2.4837262419446935, "percentage": 38.06, "elapsed_time": "3:29:12", "remaining_time": "5:40:26", "throughput": 2332.71, "total_tokens": 29282192} {"current_steps": 15230, "total_steps": 40000, "loss": 0.006, "lr": 3.4150147247387965e-05, "epoch": 2.484541969165511, "percentage": 38.07, "elapsed_time": "3:29:14", "remaining_time": "5:40:19", "throughput": 2333.06, "total_tokens": 29291504} {"current_steps": 15235, "total_steps": 40000, "loss": 0.0772, "lr": 3.4141010274900306e-05, "epoch": 2.4853576963863286, "percentage": 38.09, "elapsed_time": "3:29:17", "remaining_time": "5:40:11", "throughput": 2333.31, "total_tokens": 29299424} {"current_steps": 15240, "total_steps": 40000, "loss": 0.0464, "lr": 3.413187189275399e-05, "epoch": 2.4861734236071458, "percentage": 38.1, "elapsed_time": "3:29:19", "remaining_time": "5:40:04", "throughput": 2333.72, "total_tokens": 29309376} {"current_steps": 15245, "total_steps": 40000, "loss": 0.1597, "lr": 3.4122732102358265e-05, "epoch": 2.486989150827963, "percentage": 38.11, "elapsed_time": "3:29:21", "remaining_time": "5:39:56", "throughput": 2334.06, "total_tokens": 29318544} {"current_steps": 15250, "total_steps": 40000, "loss": 0.0121, "lr": 3.411359090512261e-05, "epoch": 2.4878048780487805, "percentage": 38.12, "elapsed_time": "3:29:23", "remaining_time": "5:39:49", "throughput": 2334.47, "total_tokens": 29328448} {"current_steps": 15255, "total_steps": 40000, "loss": 0.004, "lr": 3.410444830245672e-05, "epoch": 2.488620605269598, "percentage": 38.14, "elapsed_time": "3:29:25", "remaining_time": "5:39:42", "throughput": 2334.84, "total_tokens": 29337936} {"current_steps": 15260, "total_steps": 40000, "loss": 0.0076, "lr": 3.409530429577048e-05, "epoch": 2.489436332490415, "percentage": 38.15, "elapsed_time": "3:29:27", "remaining_time": "5:39:34", "throughput": 2335.18, "total_tokens": 29347120} {"current_steps": 15265, "total_steps": 40000, "loss": 0.1831, "lr": 3.408615888647402e-05, "epoch": 2.4902520597112328, "percentage": 38.16, "elapsed_time": "3:29:29", "remaining_time": "5:39:27", "throughput": 2335.54, "total_tokens": 29356464} {"current_steps": 15270, "total_steps": 40000, "loss": 0.0407, "lr": 3.4077012075977675e-05, "epoch": 2.49106778693205, "percentage": 38.17, "elapsed_time": "3:29:31", "remaining_time": "5:39:19", "throughput": 2335.88, "total_tokens": 29365584} {"current_steps": 15275, "total_steps": 40000, "loss": 0.0497, "lr": 3.4067863865692e-05, "epoch": 2.4918835141528675, "percentage": 38.19, "elapsed_time": "3:29:33", "remaining_time": "5:39:12", "throughput": 2336.28, "total_tokens": 29375424} {"current_steps": 15280, "total_steps": 40000, "loss": 0.0259, "lr": 3.4058714257027755e-05, "epoch": 2.4926992413736846, "percentage": 38.2, "elapsed_time": "3:29:35", "remaining_time": "5:39:04", "throughput": 2336.66, "total_tokens": 29385056} {"current_steps": 15285, "total_steps": 40000, "loss": 0.0671, "lr": 3.404956325139594e-05, "epoch": 2.493514968594502, "percentage": 38.21, "elapsed_time": "3:29:37", "remaining_time": "5:38:57", "throughput": 2337.13, "total_tokens": 29395760} {"current_steps": 15290, "total_steps": 40000, "loss": 0.0669, "lr": 3.404041085020775e-05, "epoch": 2.4943306958153193, "percentage": 38.22, "elapsed_time": "3:29:39", "remaining_time": "5:38:50", "throughput": 2337.52, "total_tokens": 29405472} {"current_steps": 15295, "total_steps": 40000, "loss": 0.076, "lr": 3.403125705487459e-05, "epoch": 2.495146423036137, "percentage": 38.24, "elapsed_time": "3:29:41", "remaining_time": "5:38:42", "throughput": 2337.87, "total_tokens": 29414656} {"current_steps": 15300, "total_steps": 40000, "loss": 0.0919, "lr": 3.402210186680811e-05, "epoch": 2.495962150256954, "percentage": 38.25, "elapsed_time": "3:29:43", "remaining_time": "5:38:35", "throughput": 2338.24, "total_tokens": 29424240} {"current_steps": 15305, "total_steps": 40000, "loss": 0.0415, "lr": 3.4012945287420137e-05, "epoch": 2.4967778774777716, "percentage": 38.26, "elapsed_time": "3:29:45", "remaining_time": "5:38:27", "throughput": 2338.59, "total_tokens": 29433440} {"current_steps": 15310, "total_steps": 40000, "loss": 0.0788, "lr": 3.400378731812274e-05, "epoch": 2.4975936046985887, "percentage": 38.27, "elapsed_time": "3:29:48", "remaining_time": "5:38:20", "throughput": 2338.92, "total_tokens": 29442416} {"current_steps": 15315, "total_steps": 40000, "loss": 0.0689, "lr": 3.399462796032817e-05, "epoch": 2.4984093319194063, "percentage": 38.29, "elapsed_time": "3:29:50", "remaining_time": "5:38:12", "throughput": 2339.37, "total_tokens": 29452944} {"current_steps": 15320, "total_steps": 40000, "loss": 0.0073, "lr": 3.3985467215448954e-05, "epoch": 2.4992250591402234, "percentage": 38.3, "elapsed_time": "3:29:52", "remaining_time": "5:38:05", "throughput": 2339.7, "total_tokens": 29461920} {"current_steps": 15325, "total_steps": 40000, "loss": 0.0245, "lr": 3.3976305084897776e-05, "epoch": 2.500040786361041, "percentage": 38.31, "elapsed_time": "3:29:54", "remaining_time": "5:37:58", "throughput": 2340.08, "total_tokens": 29471520} {"current_steps": 15330, "total_steps": 40000, "loss": 0.0282, "lr": 3.3967141570087544e-05, "epoch": 2.500856513581858, "percentage": 38.32, "elapsed_time": "3:29:56", "remaining_time": "5:37:50", "throughput": 2340.46, "total_tokens": 29481088} {"current_steps": 15335, "total_steps": 40000, "loss": 0.0192, "lr": 3.39579766724314e-05, "epoch": 2.5016722408026757, "percentage": 38.34, "elapsed_time": "3:29:58", "remaining_time": "5:37:43", "throughput": 2340.93, "total_tokens": 29491904} {"current_steps": 15340, "total_steps": 40000, "loss": 0.1307, "lr": 3.3948810393342677e-05, "epoch": 2.502487968023493, "percentage": 38.35, "elapsed_time": "3:30:00", "remaining_time": "5:37:35", "throughput": 2341.33, "total_tokens": 29501776} {"current_steps": 15345, "total_steps": 40000, "loss": 0.1492, "lr": 3.3939642734234936e-05, "epoch": 2.5033036952443104, "percentage": 38.36, "elapsed_time": "3:30:02", "remaining_time": "5:37:28", "throughput": 2341.76, "total_tokens": 29512016} {"current_steps": 15350, "total_steps": 40000, "loss": 0.0296, "lr": 3.393047369652194e-05, "epoch": 2.5041194224651275, "percentage": 38.38, "elapsed_time": "3:30:04", "remaining_time": "5:37:21", "throughput": 2342.17, "total_tokens": 29522064} {"current_steps": 15355, "total_steps": 40000, "loss": 0.0734, "lr": 3.3921303281617664e-05, "epoch": 2.504935149685945, "percentage": 38.39, "elapsed_time": "3:30:06", "remaining_time": "5:37:13", "throughput": 2342.53, "total_tokens": 29531440} {"current_steps": 15360, "total_steps": 40000, "loss": 0.1904, "lr": 3.391213149093632e-05, "epoch": 2.5057508769067622, "percentage": 38.4, "elapsed_time": "3:30:08", "remaining_time": "5:37:06", "throughput": 2342.91, "total_tokens": 29541024} {"current_steps": 15365, "total_steps": 40000, "loss": 0.0319, "lr": 3.3902958325892303e-05, "epoch": 2.50656660412758, "percentage": 38.41, "elapsed_time": "3:30:10", "remaining_time": "5:36:59", "throughput": 2343.41, "total_tokens": 29552128} {"current_steps": 15370, "total_steps": 40000, "loss": 0.0792, "lr": 3.389378378790023e-05, "epoch": 2.507382331348397, "percentage": 38.42, "elapsed_time": "3:30:12", "remaining_time": "5:36:51", "throughput": 2343.75, "total_tokens": 29561248} {"current_steps": 15375, "total_steps": 40000, "loss": 0.0043, "lr": 3.388460787837493e-05, "epoch": 2.5081980585692145, "percentage": 38.44, "elapsed_time": "3:30:14", "remaining_time": "5:36:44", "throughput": 2344.15, "total_tokens": 29571152} {"current_steps": 15380, "total_steps": 40000, "loss": 0.0271, "lr": 3.387543059873145e-05, "epoch": 2.5090137857900316, "percentage": 38.45, "elapsed_time": "3:30:16", "remaining_time": "5:36:36", "throughput": 2344.56, "total_tokens": 29581248} {"current_steps": 15385, "total_steps": 40000, "loss": 0.0781, "lr": 3.386625195038503e-05, "epoch": 2.5098295130108492, "percentage": 38.46, "elapsed_time": "3:30:19", "remaining_time": "5:36:29", "throughput": 2344.94, "total_tokens": 29590896} {"current_steps": 15390, "total_steps": 40000, "loss": 0.2328, "lr": 3.3857071934751136e-05, "epoch": 2.510645240231667, "percentage": 38.48, "elapsed_time": "3:30:21", "remaining_time": "5:36:22", "throughput": 2345.29, "total_tokens": 29600064} {"current_steps": 15395, "total_steps": 40000, "loss": 0.0381, "lr": 3.384789055324544e-05, "epoch": 2.511460967452484, "percentage": 38.49, "elapsed_time": "3:30:23", "remaining_time": "5:36:14", "throughput": 2345.57, "total_tokens": 29608448} {"current_steps": 15400, "total_steps": 40000, "loss": 0.0931, "lr": 3.3838707807283843e-05, "epoch": 2.512276694673301, "percentage": 38.5, "elapsed_time": "3:30:25", "remaining_time": "5:36:07", "throughput": 2345.92, "total_tokens": 29617728} {"current_steps": 15400, "total_steps": 40000, "eval_loss": 0.17570002377033234, "epoch": 2.512276694673301, "percentage": 38.5, "elapsed_time": "3:31:45", "remaining_time": "5:38:16", "throughput": 2331.06, "total_tokens": 29617728} {"current_steps": 15405, "total_steps": 40000, "loss": 0.1121, "lr": 3.382952369828243e-05, "epoch": 2.5130924218941186, "percentage": 38.51, "elapsed_time": "3:31:49", "remaining_time": "5:38:11", "throughput": 2330.96, "total_tokens": 29625632} {"current_steps": 15410, "total_steps": 40000, "loss": 0.104, "lr": 3.38203382276575e-05, "epoch": 2.513908149114936, "percentage": 38.52, "elapsed_time": "3:31:51", "remaining_time": "5:38:04", "throughput": 2331.39, "total_tokens": 29635936} {"current_steps": 15415, "total_steps": 40000, "loss": 0.0101, "lr": 3.381115139682557e-05, "epoch": 2.5147238763357533, "percentage": 38.54, "elapsed_time": "3:31:53", "remaining_time": "5:37:56", "throughput": 2331.75, "total_tokens": 29645408} {"current_steps": 15420, "total_steps": 40000, "loss": 0.0035, "lr": 3.3801963207203366e-05, "epoch": 2.5155396035565705, "percentage": 38.55, "elapsed_time": "3:31:55", "remaining_time": "5:37:49", "throughput": 2332.24, "total_tokens": 29656480} {"current_steps": 15425, "total_steps": 40000, "loss": 0.0843, "lr": 3.379277366020782e-05, "epoch": 2.516355330777388, "percentage": 38.56, "elapsed_time": "3:31:57", "remaining_time": "5:37:42", "throughput": 2332.69, "total_tokens": 29667024} {"current_steps": 15430, "total_steps": 40000, "loss": 0.1824, "lr": 3.3783582757256085e-05, "epoch": 2.5171710579982056, "percentage": 38.57, "elapsed_time": "3:31:59", "remaining_time": "5:37:34", "throughput": 2333.09, "total_tokens": 29676944} {"current_steps": 15435, "total_steps": 40000, "loss": 0.0232, "lr": 3.3774390499765504e-05, "epoch": 2.5179867852190227, "percentage": 38.59, "elapsed_time": "3:32:02", "remaining_time": "5:37:27", "throughput": 2333.44, "total_tokens": 29686128} {"current_steps": 15440, "total_steps": 40000, "loss": 0.0656, "lr": 3.376519688915364e-05, "epoch": 2.51880251243984, "percentage": 38.6, "elapsed_time": "3:32:04", "remaining_time": "5:37:19", "throughput": 2333.8, "total_tokens": 29695536} {"current_steps": 15445, "total_steps": 40000, "loss": 0.0037, "lr": 3.3756001926838273e-05, "epoch": 2.5196182396606575, "percentage": 38.61, "elapsed_time": "3:32:06", "remaining_time": "5:37:12", "throughput": 2334.16, "total_tokens": 29704928} {"current_steps": 15450, "total_steps": 40000, "loss": 0.0585, "lr": 3.374680561423737e-05, "epoch": 2.520433966881475, "percentage": 38.62, "elapsed_time": "3:32:08", "remaining_time": "5:37:05", "throughput": 2334.59, "total_tokens": 29715296} {"current_steps": 15455, "total_steps": 40000, "loss": 0.2436, "lr": 3.373760795276912e-05, "epoch": 2.521249694102292, "percentage": 38.64, "elapsed_time": "3:32:10", "remaining_time": "5:36:57", "throughput": 2334.98, "total_tokens": 29725072} {"current_steps": 15460, "total_steps": 40000, "loss": 0.0691, "lr": 3.372840894385192e-05, "epoch": 2.5220654213231093, "percentage": 38.65, "elapsed_time": "3:32:12", "remaining_time": "5:36:50", "throughput": 2335.35, "total_tokens": 29734592} {"current_steps": 15465, "total_steps": 40000, "loss": 0.0546, "lr": 3.3719208588904375e-05, "epoch": 2.522881148543927, "percentage": 38.66, "elapsed_time": "3:32:14", "remaining_time": "5:36:43", "throughput": 2335.62, "total_tokens": 29742800} {"current_steps": 15470, "total_steps": 40000, "loss": 0.1029, "lr": 3.371000688934529e-05, "epoch": 2.5236968757647444, "percentage": 38.67, "elapsed_time": "3:32:16", "remaining_time": "5:36:35", "throughput": 2335.99, "total_tokens": 29752368} {"current_steps": 15475, "total_steps": 40000, "loss": 0.1161, "lr": 3.370080384659369e-05, "epoch": 2.5245126029855616, "percentage": 38.69, "elapsed_time": "3:32:18", "remaining_time": "5:36:28", "throughput": 2336.38, "total_tokens": 29762176} {"current_steps": 15480, "total_steps": 40000, "loss": 0.0096, "lr": 3.36915994620688e-05, "epoch": 2.525328330206379, "percentage": 38.7, "elapsed_time": "3:32:20", "remaining_time": "5:36:20", "throughput": 2336.77, "total_tokens": 29771920} {"current_steps": 15485, "total_steps": 40000, "loss": 0.0698, "lr": 3.3682393737190035e-05, "epoch": 2.5261440574271963, "percentage": 38.71, "elapsed_time": "3:32:22", "remaining_time": "5:36:13", "throughput": 2337.16, "total_tokens": 29781792} {"current_steps": 15490, "total_steps": 40000, "loss": 0.0126, "lr": 3.3673186673377054e-05, "epoch": 2.526959784648014, "percentage": 38.73, "elapsed_time": "3:32:24", "remaining_time": "5:36:06", "throughput": 2337.53, "total_tokens": 29791328} {"current_steps": 15495, "total_steps": 40000, "loss": 0.1874, "lr": 3.366397827204969e-05, "epoch": 2.527775511868831, "percentage": 38.74, "elapsed_time": "3:32:26", "remaining_time": "5:35:58", "throughput": 2337.95, "total_tokens": 29801472} {"current_steps": 15500, "total_steps": 40000, "loss": 0.1101, "lr": 3.3654768534628e-05, "epoch": 2.5285912390896486, "percentage": 38.75, "elapsed_time": "3:32:28", "remaining_time": "5:35:51", "throughput": 2338.31, "total_tokens": 29810944} {"current_steps": 15505, "total_steps": 40000, "loss": 0.0233, "lr": 3.3645557462532245e-05, "epoch": 2.5294069663104657, "percentage": 38.76, "elapsed_time": "3:32:30", "remaining_time": "5:35:44", "throughput": 2338.73, "total_tokens": 29821152} {"current_steps": 15510, "total_steps": 40000, "loss": 0.0055, "lr": 3.363634505718288e-05, "epoch": 2.5302226935312833, "percentage": 38.77, "elapsed_time": "3:32:33", "remaining_time": "5:35:36", "throughput": 2339.12, "total_tokens": 29830880} {"current_steps": 15515, "total_steps": 40000, "loss": 0.0074, "lr": 3.362713132000057e-05, "epoch": 2.5310384207521004, "percentage": 38.79, "elapsed_time": "3:32:35", "remaining_time": "5:35:29", "throughput": 2339.46, "total_tokens": 29840144} {"current_steps": 15520, "total_steps": 40000, "loss": 0.0172, "lr": 3.36179162524062e-05, "epoch": 2.531854147972918, "percentage": 38.8, "elapsed_time": "3:32:37", "remaining_time": "5:35:22", "throughput": 2339.84, "total_tokens": 29849712} {"current_steps": 15525, "total_steps": 40000, "loss": 0.1422, "lr": 3.3608699855820846e-05, "epoch": 2.532669875193735, "percentage": 38.81, "elapsed_time": "3:32:39", "remaining_time": "5:35:14", "throughput": 2340.22, "total_tokens": 29859520} {"current_steps": 15530, "total_steps": 40000, "loss": 0.0266, "lr": 3.359948213166578e-05, "epoch": 2.5334856024145527, "percentage": 38.82, "elapsed_time": "3:32:41", "remaining_time": "5:35:07", "throughput": 2340.67, "total_tokens": 29870016} {"current_steps": 15535, "total_steps": 40000, "loss": 0.0466, "lr": 3.359026308136252e-05, "epoch": 2.53430132963537, "percentage": 38.84, "elapsed_time": "3:32:43", "remaining_time": "5:35:00", "throughput": 2341.01, "total_tokens": 29879136} {"current_steps": 15540, "total_steps": 40000, "loss": 0.1914, "lr": 3.358104270633272e-05, "epoch": 2.5351170568561874, "percentage": 38.85, "elapsed_time": "3:32:45", "remaining_time": "5:34:52", "throughput": 2341.39, "total_tokens": 29888928} {"current_steps": 15545, "total_steps": 40000, "loss": 0.0079, "lr": 3.357182100799831e-05, "epoch": 2.5359327840770045, "percentage": 38.86, "elapsed_time": "3:32:47", "remaining_time": "5:34:45", "throughput": 2341.75, "total_tokens": 29898336} {"current_steps": 15550, "total_steps": 40000, "loss": 0.1628, "lr": 3.3562597987781384e-05, "epoch": 2.536748511297822, "percentage": 38.88, "elapsed_time": "3:32:49", "remaining_time": "5:34:38", "throughput": 2342.2, "total_tokens": 29908912} {"current_steps": 15555, "total_steps": 40000, "loss": 0.0347, "lr": 3.355337364710424e-05, "epoch": 2.537564238518639, "percentage": 38.89, "elapsed_time": "3:32:51", "remaining_time": "5:34:30", "throughput": 2342.5, "total_tokens": 29917584} {"current_steps": 15560, "total_steps": 40000, "loss": 0.1695, "lr": 3.354414798738939e-05, "epoch": 2.538379965739457, "percentage": 38.9, "elapsed_time": "3:32:53", "remaining_time": "5:34:23", "throughput": 2342.77, "total_tokens": 29925920} {"current_steps": 15565, "total_steps": 40000, "loss": 0.0048, "lr": 3.353492101005955e-05, "epoch": 2.539195692960274, "percentage": 38.91, "elapsed_time": "3:32:55", "remaining_time": "5:34:16", "throughput": 2343.15, "total_tokens": 29935520} {"current_steps": 15570, "total_steps": 40000, "loss": 0.1135, "lr": 3.352569271653763e-05, "epoch": 2.5400114201810915, "percentage": 38.92, "elapsed_time": "3:32:57", "remaining_time": "5:34:08", "throughput": 2343.6, "total_tokens": 29946112} {"current_steps": 15575, "total_steps": 40000, "loss": 0.1705, "lr": 3.351646310824675e-05, "epoch": 2.5408271474019086, "percentage": 38.94, "elapsed_time": "3:32:59", "remaining_time": "5:34:01", "throughput": 2343.96, "total_tokens": 29955664} {"current_steps": 15580, "total_steps": 40000, "loss": 0.0157, "lr": 3.350723218661023e-05, "epoch": 2.541642874622726, "percentage": 38.95, "elapsed_time": "3:33:01", "remaining_time": "5:33:54", "throughput": 2344.41, "total_tokens": 29966224} {"current_steps": 15585, "total_steps": 40000, "loss": 0.0187, "lr": 3.349799995305162e-05, "epoch": 2.5424586018435438, "percentage": 38.96, "elapsed_time": "3:33:04", "remaining_time": "5:33:47", "throughput": 2344.84, "total_tokens": 29976560} {"current_steps": 15590, "total_steps": 40000, "loss": 0.0019, "lr": 3.348876640899461e-05, "epoch": 2.543274329064361, "percentage": 38.98, "elapsed_time": "3:33:06", "remaining_time": "5:33:39", "throughput": 2345.17, "total_tokens": 29985536} {"current_steps": 15595, "total_steps": 40000, "loss": 0.456, "lr": 3.3479531555863144e-05, "epoch": 2.544090056285178, "percentage": 38.99, "elapsed_time": "3:33:08", "remaining_time": "5:33:32", "throughput": 2345.44, "total_tokens": 29993808} {"current_steps": 15600, "total_steps": 40000, "loss": 0.1366, "lr": 3.3470295395081344e-05, "epoch": 2.5449057835059956, "percentage": 39.0, "elapsed_time": "3:33:10", "remaining_time": "5:33:25", "throughput": 2345.86, "total_tokens": 30004032} {"current_steps": 15600, "total_steps": 40000, "eval_loss": 0.17634761333465576, "epoch": 2.5449057835059956, "percentage": 39.0, "elapsed_time": "3:34:30", "remaining_time": "5:35:31", "throughput": 2331.15, "total_tokens": 30004032} {"current_steps": 15605, "total_steps": 40000, "loss": 0.0042, "lr": 3.3461057928073556e-05, "epoch": 2.545721510726813, "percentage": 39.01, "elapsed_time": "3:34:34", "remaining_time": "5:35:26", "throughput": 2331.17, "total_tokens": 30012768} {"current_steps": 15610, "total_steps": 40000, "loss": 0.0824, "lr": 3.345181915626431e-05, "epoch": 2.5465372379476303, "percentage": 39.02, "elapsed_time": "3:34:36", "remaining_time": "5:35:19", "throughput": 2331.6, "total_tokens": 30023216} {"current_steps": 15615, "total_steps": 40000, "loss": 0.0097, "lr": 3.344257908107834e-05, "epoch": 2.5473529651684474, "percentage": 39.04, "elapsed_time": "3:34:38", "remaining_time": "5:35:11", "throughput": 2332.01, "total_tokens": 30033232} {"current_steps": 15620, "total_steps": 40000, "loss": 0.0191, "lr": 3.343333770394058e-05, "epoch": 2.548168692389265, "percentage": 39.05, "elapsed_time": "3:34:40", "remaining_time": "5:35:04", "throughput": 2332.33, "total_tokens": 30042224} {"current_steps": 15625, "total_steps": 40000, "loss": 0.0166, "lr": 3.342409502627616e-05, "epoch": 2.5489844196100826, "percentage": 39.06, "elapsed_time": "3:34:42", "remaining_time": "5:34:57", "throughput": 2332.66, "total_tokens": 30051232} {"current_steps": 15630, "total_steps": 40000, "loss": 0.0434, "lr": 3.341485104951043e-05, "epoch": 2.5498001468308997, "percentage": 39.07, "elapsed_time": "3:34:44", "remaining_time": "5:34:49", "throughput": 2332.93, "total_tokens": 30059520} {"current_steps": 15635, "total_steps": 40000, "loss": 0.1745, "lr": 3.340560577506892e-05, "epoch": 2.550615874051717, "percentage": 39.09, "elapsed_time": "3:34:46", "remaining_time": "5:34:42", "throughput": 2333.18, "total_tokens": 30067648} {"current_steps": 15640, "total_steps": 40000, "loss": 0.1463, "lr": 3.339635920437735e-05, "epoch": 2.5514316012725344, "percentage": 39.1, "elapsed_time": "3:34:49", "remaining_time": "5:34:35", "throughput": 2333.59, "total_tokens": 30077760} {"current_steps": 15645, "total_steps": 40000, "loss": 0.0965, "lr": 3.338711133886169e-05, "epoch": 2.552247328493352, "percentage": 39.11, "elapsed_time": "3:34:51", "remaining_time": "5:34:27", "throughput": 2333.96, "total_tokens": 30087328} {"current_steps": 15650, "total_steps": 40000, "loss": 0.1256, "lr": 3.3377862179948064e-05, "epoch": 2.553063055714169, "percentage": 39.12, "elapsed_time": "3:34:53", "remaining_time": "5:34:20", "throughput": 2334.32, "total_tokens": 30096720} {"current_steps": 15655, "total_steps": 40000, "loss": 0.0529, "lr": 3.336861172906281e-05, "epoch": 2.5538787829349863, "percentage": 39.14, "elapsed_time": "3:34:55", "remaining_time": "5:34:13", "throughput": 2334.73, "total_tokens": 30106864} {"current_steps": 15660, "total_steps": 40000, "loss": 0.0523, "lr": 3.335935998763245e-05, "epoch": 2.554694510155804, "percentage": 39.15, "elapsed_time": "3:34:57", "remaining_time": "5:34:05", "throughput": 2335.1, "total_tokens": 30116464} {"current_steps": 15665, "total_steps": 40000, "loss": 0.0129, "lr": 3.3350106957083744e-05, "epoch": 2.5555102373766214, "percentage": 39.16, "elapsed_time": "3:34:59", "remaining_time": "5:33:58", "throughput": 2335.57, "total_tokens": 30127328} {"current_steps": 15670, "total_steps": 40000, "loss": 0.0937, "lr": 3.33408526388436e-05, "epoch": 2.5563259645974385, "percentage": 39.17, "elapsed_time": "3:35:01", "remaining_time": "5:33:51", "throughput": 2335.9, "total_tokens": 30136512} {"current_steps": 15675, "total_steps": 40000, "loss": 0.1991, "lr": 3.3331597034339166e-05, "epoch": 2.557141691818256, "percentage": 39.19, "elapsed_time": "3:35:03", "remaining_time": "5:33:44", "throughput": 2336.22, "total_tokens": 30145360} {"current_steps": 15680, "total_steps": 40000, "loss": 0.3063, "lr": 3.3322340144997764e-05, "epoch": 2.5579574190390733, "percentage": 39.2, "elapsed_time": "3:35:05", "remaining_time": "5:33:36", "throughput": 2336.6, "total_tokens": 30155120} {"current_steps": 15685, "total_steps": 40000, "loss": 0.1829, "lr": 3.331308197224693e-05, "epoch": 2.558773146259891, "percentage": 39.21, "elapsed_time": "3:35:07", "remaining_time": "5:33:29", "throughput": 2336.89, "total_tokens": 30163712} {"current_steps": 15690, "total_steps": 40000, "loss": 0.0161, "lr": 3.330382251751438e-05, "epoch": 2.559588873480708, "percentage": 39.23, "elapsed_time": "3:35:09", "remaining_time": "5:33:22", "throughput": 2337.31, "total_tokens": 30173936} {"current_steps": 15695, "total_steps": 40000, "loss": 0.0273, "lr": 3.3294561782228054e-05, "epoch": 2.5604046007015255, "percentage": 39.24, "elapsed_time": "3:35:11", "remaining_time": "5:33:14", "throughput": 2337.66, "total_tokens": 30183376} {"current_steps": 15700, "total_steps": 40000, "loss": 0.0068, "lr": 3.328529976781607e-05, "epoch": 2.5612203279223427, "percentage": 39.25, "elapsed_time": "3:35:13", "remaining_time": "5:33:07", "throughput": 2338.02, "total_tokens": 30192768} {"current_steps": 15705, "total_steps": 40000, "loss": 0.2159, "lr": 3.327603647570673e-05, "epoch": 2.5620360551431602, "percentage": 39.26, "elapsed_time": "3:35:15", "remaining_time": "5:33:00", "throughput": 2338.39, "total_tokens": 30202448} {"current_steps": 15710, "total_steps": 40000, "loss": 0.0435, "lr": 3.326677190732857e-05, "epoch": 2.5628517823639774, "percentage": 39.27, "elapsed_time": "3:35:17", "remaining_time": "5:32:53", "throughput": 2338.89, "total_tokens": 30213664} {"current_steps": 15715, "total_steps": 40000, "loss": 0.0282, "lr": 3.325750606411029e-05, "epoch": 2.563667509584795, "percentage": 39.29, "elapsed_time": "3:35:20", "remaining_time": "5:32:45", "throughput": 2339.29, "total_tokens": 30223712} {"current_steps": 15720, "total_steps": 40000, "loss": 0.038, "lr": 3.3248238947480804e-05, "epoch": 2.564483236805612, "percentage": 39.3, "elapsed_time": "3:35:22", "remaining_time": "5:32:38", "throughput": 2339.61, "total_tokens": 30232640} {"current_steps": 15725, "total_steps": 40000, "loss": 0.042, "lr": 3.323897055886922e-05, "epoch": 2.5652989640264297, "percentage": 39.31, "elapsed_time": "3:35:24", "remaining_time": "5:32:31", "throughput": 2339.96, "total_tokens": 30242048} {"current_steps": 15730, "total_steps": 40000, "loss": 0.0049, "lr": 3.322970089970484e-05, "epoch": 2.566114691247247, "percentage": 39.32, "elapsed_time": "3:35:26", "remaining_time": "5:32:24", "throughput": 2340.28, "total_tokens": 30251008} {"current_steps": 15735, "total_steps": 40000, "loss": 0.0023, "lr": 3.3220429971417165e-05, "epoch": 2.5669304184680644, "percentage": 39.34, "elapsed_time": "3:35:28", "remaining_time": "5:32:16", "throughput": 2340.62, "total_tokens": 30260160} {"current_steps": 15740, "total_steps": 40000, "loss": 0.2055, "lr": 3.321115777543588e-05, "epoch": 2.5677461456888815, "percentage": 39.35, "elapsed_time": "3:35:30", "remaining_time": "5:32:09", "throughput": 2340.86, "total_tokens": 30268144} {"current_steps": 15745, "total_steps": 40000, "loss": 0.1707, "lr": 3.320188431319088e-05, "epoch": 2.568561872909699, "percentage": 39.36, "elapsed_time": "3:35:32", "remaining_time": "5:32:02", "throughput": 2341.2, "total_tokens": 30277392} {"current_steps": 15750, "total_steps": 40000, "loss": 0.1205, "lr": 3.319260958611224e-05, "epoch": 2.569377600130516, "percentage": 39.38, "elapsed_time": "3:35:34", "remaining_time": "5:31:54", "throughput": 2341.56, "total_tokens": 30286848} {"current_steps": 15755, "total_steps": 40000, "loss": 0.0971, "lr": 3.3183333595630256e-05, "epoch": 2.5701933273513338, "percentage": 39.39, "elapsed_time": "3:35:36", "remaining_time": "5:31:47", "throughput": 2341.86, "total_tokens": 30295616} {"current_steps": 15760, "total_steps": 40000, "loss": 0.1945, "lr": 3.317405634317538e-05, "epoch": 2.5710090545721513, "percentage": 39.4, "elapsed_time": "3:35:38", "remaining_time": "5:31:40", "throughput": 2342.25, "total_tokens": 30305456} {"current_steps": 15765, "total_steps": 40000, "loss": 0.1948, "lr": 3.3164777830178315e-05, "epoch": 2.5718247817929685, "percentage": 39.41, "elapsed_time": "3:35:40", "remaining_time": "5:31:33", "throughput": 2342.65, "total_tokens": 30315504} {"current_steps": 15770, "total_steps": 40000, "loss": 0.0852, "lr": 3.315549805806989e-05, "epoch": 2.5726405090137856, "percentage": 39.42, "elapsed_time": "3:35:42", "remaining_time": "5:31:26", "throughput": 2342.99, "total_tokens": 30324720} {"current_steps": 15775, "total_steps": 40000, "loss": 0.0526, "lr": 3.314621702828118e-05, "epoch": 2.573456236234603, "percentage": 39.44, "elapsed_time": "3:35:44", "remaining_time": "5:31:18", "throughput": 2343.34, "total_tokens": 30334160} {"current_steps": 15780, "total_steps": 40000, "loss": 0.1675, "lr": 3.313693474224342e-05, "epoch": 2.5742719634554208, "percentage": 39.45, "elapsed_time": "3:35:46", "remaining_time": "5:31:11", "throughput": 2343.8, "total_tokens": 30344848} {"current_steps": 15785, "total_steps": 40000, "loss": 0.0621, "lr": 3.312765120138809e-05, "epoch": 2.575087690676238, "percentage": 39.46, "elapsed_time": "3:35:48", "remaining_time": "5:31:04", "throughput": 2344.29, "total_tokens": 30356032} {"current_steps": 15790, "total_steps": 40000, "loss": 0.162, "lr": 3.311836640714679e-05, "epoch": 2.575903417897055, "percentage": 39.48, "elapsed_time": "3:35:50", "remaining_time": "5:30:57", "throughput": 2344.74, "total_tokens": 30366736} {"current_steps": 15795, "total_steps": 40000, "loss": 0.0061, "lr": 3.310908036095137e-05, "epoch": 2.5767191451178726, "percentage": 39.49, "elapsed_time": "3:35:53", "remaining_time": "5:30:49", "throughput": 2345.08, "total_tokens": 30375952} {"current_steps": 15800, "total_steps": 40000, "loss": 0.0933, "lr": 3.309979306423386e-05, "epoch": 2.57753487233869, "percentage": 39.5, "elapsed_time": "3:35:55", "remaining_time": "5:30:42", "throughput": 2345.54, "total_tokens": 30386752} {"current_steps": 15800, "total_steps": 40000, "eval_loss": 0.1606510579586029, "epoch": 2.57753487233869, "percentage": 39.5, "elapsed_time": "3:37:15", "remaining_time": "5:32:46", "throughput": 2331.04, "total_tokens": 30386752} {"current_steps": 15805, "total_steps": 40000, "loss": 0.0073, "lr": 3.309050451842647e-05, "epoch": 2.5783505995595073, "percentage": 39.51, "elapsed_time": "3:37:19", "remaining_time": "5:32:41", "throughput": 2331.02, "total_tokens": 30395568} {"current_steps": 15810, "total_steps": 40000, "loss": 0.1003, "lr": 3.3081214724961604e-05, "epoch": 2.5791663267803244, "percentage": 39.52, "elapsed_time": "3:37:21", "remaining_time": "5:32:34", "throughput": 2331.4, "total_tokens": 30405344} {"current_steps": 15815, "total_steps": 40000, "loss": 0.0536, "lr": 3.307192368527188e-05, "epoch": 2.579982054001142, "percentage": 39.54, "elapsed_time": "3:37:23", "remaining_time": "5:32:27", "throughput": 2331.74, "total_tokens": 30414576} {"current_steps": 15820, "total_steps": 40000, "loss": 0.0075, "lr": 3.306263140079008e-05, "epoch": 2.5807977812219596, "percentage": 39.55, "elapsed_time": "3:37:25", "remaining_time": "5:32:19", "throughput": 2332.1, "total_tokens": 30424080} {"current_steps": 15825, "total_steps": 40000, "loss": 0.0121, "lr": 3.30533378729492e-05, "epoch": 2.5816135084427767, "percentage": 39.56, "elapsed_time": "3:37:27", "remaining_time": "5:32:12", "throughput": 2332.47, "total_tokens": 30433680} {"current_steps": 15830, "total_steps": 40000, "loss": 0.0386, "lr": 3.304404310318242e-05, "epoch": 2.582429235663594, "percentage": 39.57, "elapsed_time": "3:37:29", "remaining_time": "5:32:05", "throughput": 2332.79, "total_tokens": 30442704} {"current_steps": 15835, "total_steps": 40000, "loss": 0.1194, "lr": 3.3034747092923105e-05, "epoch": 2.5832449628844114, "percentage": 39.59, "elapsed_time": "3:37:32", "remaining_time": "5:31:58", "throughput": 2333.18, "total_tokens": 30452656} {"current_steps": 15840, "total_steps": 40000, "loss": 0.088, "lr": 3.3025449843604806e-05, "epoch": 2.584060690105229, "percentage": 39.6, "elapsed_time": "3:37:34", "remaining_time": "5:31:50", "throughput": 2333.56, "total_tokens": 30462448} {"current_steps": 15845, "total_steps": 40000, "loss": 0.1267, "lr": 3.30161513566613e-05, "epoch": 2.584876417326046, "percentage": 39.61, "elapsed_time": "3:37:36", "remaining_time": "5:31:43", "throughput": 2333.94, "total_tokens": 30472192} {"current_steps": 15850, "total_steps": 40000, "loss": 0.1471, "lr": 3.3006851633526506e-05, "epoch": 2.5856921445468637, "percentage": 39.62, "elapsed_time": "3:37:38", "remaining_time": "5:31:36", "throughput": 2334.31, "total_tokens": 30481824} {"current_steps": 15855, "total_steps": 40000, "loss": 0.0213, "lr": 3.2997550675634584e-05, "epoch": 2.586507871767681, "percentage": 39.64, "elapsed_time": "3:37:40", "remaining_time": "5:31:29", "throughput": 2334.66, "total_tokens": 30491248} {"current_steps": 15860, "total_steps": 40000, "loss": 0.0629, "lr": 3.2988248484419825e-05, "epoch": 2.5873235989884984, "percentage": 39.65, "elapsed_time": "3:37:42", "remaining_time": "5:31:21", "throughput": 2334.99, "total_tokens": 30500448} {"current_steps": 15865, "total_steps": 40000, "loss": 0.0989, "lr": 3.2978945061316776e-05, "epoch": 2.5881393262093155, "percentage": 39.66, "elapsed_time": "3:37:44", "remaining_time": "5:31:14", "throughput": 2335.35, "total_tokens": 30509936} {"current_steps": 15870, "total_steps": 40000, "loss": 0.0264, "lr": 3.296964040776013e-05, "epoch": 2.588955053430133, "percentage": 39.67, "elapsed_time": "3:37:46", "remaining_time": "5:31:07", "throughput": 2335.7, "total_tokens": 30519440} {"current_steps": 15875, "total_steps": 40000, "loss": 0.0045, "lr": 3.296033452518478e-05, "epoch": 2.5897707806509502, "percentage": 39.69, "elapsed_time": "3:37:48", "remaining_time": "5:31:00", "throughput": 2336.07, "total_tokens": 30529040} {"current_steps": 15880, "total_steps": 40000, "loss": 0.0044, "lr": 3.2951027415025806e-05, "epoch": 2.590586507871768, "percentage": 39.7, "elapsed_time": "3:37:50", "remaining_time": "5:30:52", "throughput": 2336.49, "total_tokens": 30539344} {"current_steps": 15885, "total_steps": 40000, "loss": 0.1009, "lr": 3.294171907871849e-05, "epoch": 2.591402235092585, "percentage": 39.71, "elapsed_time": "3:37:52", "remaining_time": "5:30:45", "throughput": 2336.74, "total_tokens": 30547440} {"current_steps": 15890, "total_steps": 40000, "loss": 0.078, "lr": 3.293240951769828e-05, "epoch": 2.5922179623134025, "percentage": 39.73, "elapsed_time": "3:37:54", "remaining_time": "5:30:38", "throughput": 2337.04, "total_tokens": 30556288} {"current_steps": 15895, "total_steps": 40000, "loss": 0.1238, "lr": 3.2923098733400846e-05, "epoch": 2.5930336895342196, "percentage": 39.74, "elapsed_time": "3:37:56", "remaining_time": "5:30:31", "throughput": 2337.41, "total_tokens": 30565840} {"current_steps": 15900, "total_steps": 40000, "loss": 0.1541, "lr": 3.291378672726202e-05, "epoch": 2.593849416755037, "percentage": 39.75, "elapsed_time": "3:37:58", "remaining_time": "5:30:23", "throughput": 2337.75, "total_tokens": 30575216} {"current_steps": 15905, "total_steps": 40000, "loss": 0.074, "lr": 3.2904473500717824e-05, "epoch": 2.5946651439758543, "percentage": 39.76, "elapsed_time": "3:38:00", "remaining_time": "5:30:16", "throughput": 2338.16, "total_tokens": 30585424} {"current_steps": 15910, "total_steps": 40000, "loss": 0.1273, "lr": 3.289515905520449e-05, "epoch": 2.595480871196672, "percentage": 39.77, "elapsed_time": "3:38:03", "remaining_time": "5:30:09", "throughput": 2338.44, "total_tokens": 30593824} {"current_steps": 15915, "total_steps": 40000, "loss": 0.0382, "lr": 3.288584339215841e-05, "epoch": 2.596296598417489, "percentage": 39.79, "elapsed_time": "3:38:05", "remaining_time": "5:30:02", "throughput": 2338.86, "total_tokens": 30604176} {"current_steps": 15920, "total_steps": 40000, "loss": 0.1507, "lr": 3.287652651301617e-05, "epoch": 2.5971123256383066, "percentage": 39.8, "elapsed_time": "3:38:07", "remaining_time": "5:29:55", "throughput": 2339.23, "total_tokens": 30613904} {"current_steps": 15925, "total_steps": 40000, "loss": 0.063, "lr": 3.286720841921457e-05, "epoch": 2.5979280528591238, "percentage": 39.81, "elapsed_time": "3:38:09", "remaining_time": "5:29:47", "throughput": 2339.64, "total_tokens": 30624080} {"current_steps": 15930, "total_steps": 40000, "loss": 0.1152, "lr": 3.285788911219056e-05, "epoch": 2.5987437800799413, "percentage": 39.83, "elapsed_time": "3:38:11", "remaining_time": "5:29:40", "throughput": 2340.02, "total_tokens": 30633952} {"current_steps": 15935, "total_steps": 40000, "loss": 0.0586, "lr": 3.284856859338131e-05, "epoch": 2.5995595073007585, "percentage": 39.84, "elapsed_time": "3:38:13", "remaining_time": "5:29:33", "throughput": 2340.41, "total_tokens": 30643872} {"current_steps": 15940, "total_steps": 40000, "loss": 0.0399, "lr": 3.283924686422414e-05, "epoch": 2.600375234521576, "percentage": 39.85, "elapsed_time": "3:38:15", "remaining_time": "5:29:26", "throughput": 2340.89, "total_tokens": 30655024} {"current_steps": 15945, "total_steps": 40000, "loss": 0.0672, "lr": 3.282992392615659e-05, "epoch": 2.601190961742393, "percentage": 39.86, "elapsed_time": "3:38:17", "remaining_time": "5:29:19", "throughput": 2341.24, "total_tokens": 30664448} {"current_steps": 15950, "total_steps": 40000, "loss": 0.0057, "lr": 3.282059978061638e-05, "epoch": 2.6020066889632107, "percentage": 39.88, "elapsed_time": "3:38:19", "remaining_time": "5:29:12", "throughput": 2341.67, "total_tokens": 30674976} {"current_steps": 15955, "total_steps": 40000, "loss": 0.0062, "lr": 3.28112744290414e-05, "epoch": 2.6028224161840283, "percentage": 39.89, "elapsed_time": "3:38:21", "remaining_time": "5:29:04", "throughput": 2342.02, "total_tokens": 30684400} {"current_steps": 15960, "total_steps": 40000, "loss": 0.0301, "lr": 3.280194787286974e-05, "epoch": 2.6036381434048455, "percentage": 39.9, "elapsed_time": "3:38:23", "remaining_time": "5:28:57", "throughput": 2342.48, "total_tokens": 30695216} {"current_steps": 15965, "total_steps": 40000, "loss": 0.1554, "lr": 3.2792620113539674e-05, "epoch": 2.6044538706256626, "percentage": 39.91, "elapsed_time": "3:38:25", "remaining_time": "5:28:50", "throughput": 2342.9, "total_tokens": 30705520} {"current_steps": 15970, "total_steps": 40000, "loss": 0.0049, "lr": 3.278329115248966e-05, "epoch": 2.60526959784648, "percentage": 39.92, "elapsed_time": "3:38:27", "remaining_time": "5:28:43", "throughput": 2343.35, "total_tokens": 30716304} {"current_steps": 15975, "total_steps": 40000, "loss": 0.0662, "lr": 3.277396099115834e-05, "epoch": 2.6060853250672977, "percentage": 39.94, "elapsed_time": "3:38:29", "remaining_time": "5:28:36", "throughput": 2343.72, "total_tokens": 30726000} {"current_steps": 15980, "total_steps": 40000, "loss": 0.0738, "lr": 3.276462963098454e-05, "epoch": 2.606901052288115, "percentage": 39.95, "elapsed_time": "3:38:32", "remaining_time": "5:28:29", "throughput": 2344.08, "total_tokens": 30735712} {"current_steps": 15985, "total_steps": 40000, "loss": 0.0046, "lr": 3.275529707340728e-05, "epoch": 2.607716779508932, "percentage": 39.96, "elapsed_time": "3:38:34", "remaining_time": "5:28:21", "throughput": 2344.52, "total_tokens": 30746224} {"current_steps": 15990, "total_steps": 40000, "loss": 0.0047, "lr": 3.274596331986574e-05, "epoch": 2.6085325067297496, "percentage": 39.98, "elapsed_time": "3:38:36", "remaining_time": "5:28:14", "throughput": 2344.83, "total_tokens": 30755248} {"current_steps": 15995, "total_steps": 40000, "loss": 0.08, "lr": 3.273662837179932e-05, "epoch": 2.609348233950567, "percentage": 39.99, "elapsed_time": "3:38:38", "remaining_time": "5:28:07", "throughput": 2345.25, "total_tokens": 30765472} {"current_steps": 16000, "total_steps": 40000, "loss": 0.0025, "lr": 3.272729223064758e-05, "epoch": 2.6101639611713843, "percentage": 40.0, "elapsed_time": "3:38:40", "remaining_time": "5:28:00", "throughput": 2345.54, "total_tokens": 30774224} {"current_steps": 16000, "total_steps": 40000, "eval_loss": 0.2008722573518753, "epoch": 2.6101639611713843, "percentage": 40.0, "elapsed_time": "3:40:00", "remaining_time": "5:30:01", "throughput": 2331.21, "total_tokens": 30774224} {"current_steps": 16005, "total_steps": 40000, "loss": 0.1182, "lr": 3.2717954897850264e-05, "epoch": 2.6109796883922014, "percentage": 40.01, "elapsed_time": "3:40:04", "remaining_time": "5:29:56", "throughput": 2331.24, "total_tokens": 30783648} {"current_steps": 16010, "total_steps": 40000, "loss": 0.0031, "lr": 3.270861637484733e-05, "epoch": 2.611795415613019, "percentage": 40.02, "elapsed_time": "3:40:06", "remaining_time": "5:29:49", "throughput": 2331.6, "total_tokens": 30793296} {"current_steps": 16015, "total_steps": 40000, "loss": 0.0435, "lr": 3.2699276663078867e-05, "epoch": 2.6126111428338366, "percentage": 40.04, "elapsed_time": "3:40:08", "remaining_time": "5:29:42", "throughput": 2332.0, "total_tokens": 30803392} {"current_steps": 16020, "total_steps": 40000, "loss": 0.0607, "lr": 3.268993576398519e-05, "epoch": 2.6134268700546537, "percentage": 40.05, "elapsed_time": "3:40:11", "remaining_time": "5:29:35", "throughput": 2332.48, "total_tokens": 30814528} {"current_steps": 16025, "total_steps": 40000, "loss": 0.093, "lr": 3.268059367900678e-05, "epoch": 2.614242597275471, "percentage": 40.06, "elapsed_time": "3:40:13", "remaining_time": "5:29:28", "throughput": 2332.89, "total_tokens": 30824832} {"current_steps": 16030, "total_steps": 40000, "loss": 0.0803, "lr": 3.26712504095843e-05, "epoch": 2.6150583244962884, "percentage": 40.08, "elapsed_time": "3:40:15", "remaining_time": "5:29:20", "throughput": 2333.23, "total_tokens": 30834064} {"current_steps": 16035, "total_steps": 40000, "loss": 0.051, "lr": 3.2661905957158615e-05, "epoch": 2.615874051717106, "percentage": 40.09, "elapsed_time": "3:40:17", "remaining_time": "5:29:13", "throughput": 2333.62, "total_tokens": 30844048} {"current_steps": 16040, "total_steps": 40000, "loss": 0.0774, "lr": 3.2652560323170734e-05, "epoch": 2.616689778937923, "percentage": 40.1, "elapsed_time": "3:40:19", "remaining_time": "5:29:06", "throughput": 2334.02, "total_tokens": 30854256} {"current_steps": 16045, "total_steps": 40000, "loss": 0.0053, "lr": 3.264321350906189e-05, "epoch": 2.6175055061587407, "percentage": 40.11, "elapsed_time": "3:40:21", "remaining_time": "5:28:59", "throughput": 2334.3, "total_tokens": 30862752} {"current_steps": 16050, "total_steps": 40000, "loss": 0.0061, "lr": 3.263386551627346e-05, "epoch": 2.618321233379558, "percentage": 40.12, "elapsed_time": "3:40:23", "remaining_time": "5:28:52", "throughput": 2334.6, "total_tokens": 30871536} {"current_steps": 16055, "total_steps": 40000, "loss": 0.0782, "lr": 3.2624516346247055e-05, "epoch": 2.6191369606003754, "percentage": 40.14, "elapsed_time": "3:40:25", "remaining_time": "5:28:45", "throughput": 2334.97, "total_tokens": 30881200} {"current_steps": 16060, "total_steps": 40000, "loss": 0.0407, "lr": 3.2615166000424404e-05, "epoch": 2.6199526878211925, "percentage": 40.15, "elapsed_time": "3:40:27", "remaining_time": "5:28:37", "throughput": 2335.23, "total_tokens": 30889488} {"current_steps": 16065, "total_steps": 40000, "loss": 0.0744, "lr": 3.260581448024745e-05, "epoch": 2.62076841504201, "percentage": 40.16, "elapsed_time": "3:40:29", "remaining_time": "5:28:30", "throughput": 2335.54, "total_tokens": 30898416} {"current_steps": 16070, "total_steps": 40000, "loss": 0.0395, "lr": 3.2596461787158335e-05, "epoch": 2.621584142262827, "percentage": 40.17, "elapsed_time": "3:40:31", "remaining_time": "5:28:23", "throughput": 2335.91, "total_tokens": 30908176} {"current_steps": 16075, "total_steps": 40000, "loss": 0.0071, "lr": 3.258710792259934e-05, "epoch": 2.622399869483645, "percentage": 40.19, "elapsed_time": "3:40:33", "remaining_time": "5:28:16", "throughput": 2336.29, "total_tokens": 30917920} {"current_steps": 16080, "total_steps": 40000, "loss": 0.063, "lr": 3.257775288801296e-05, "epoch": 2.623215596704462, "percentage": 40.2, "elapsed_time": "3:40:35", "remaining_time": "5:28:09", "throughput": 2336.67, "total_tokens": 30927856} {"current_steps": 16085, "total_steps": 40000, "loss": 0.0892, "lr": 3.256839668484186e-05, "epoch": 2.6240313239252795, "percentage": 40.21, "elapsed_time": "3:40:37", "remaining_time": "5:28:01", "throughput": 2337.04, "total_tokens": 30937488} {"current_steps": 16090, "total_steps": 40000, "loss": 0.0239, "lr": 3.255903931452888e-05, "epoch": 2.6248470511460966, "percentage": 40.23, "elapsed_time": "3:40:39", "remaining_time": "5:27:54", "throughput": 2337.44, "total_tokens": 30947712} {"current_steps": 16095, "total_steps": 40000, "loss": 0.0063, "lr": 3.2549680778517045e-05, "epoch": 2.625662778366914, "percentage": 40.24, "elapsed_time": "3:40:42", "remaining_time": "5:27:47", "throughput": 2337.78, "total_tokens": 30957008} {"current_steps": 16100, "total_steps": 40000, "loss": 0.1893, "lr": 3.2540321078249556e-05, "epoch": 2.6264785055877313, "percentage": 40.25, "elapsed_time": "3:40:44", "remaining_time": "5:27:40", "throughput": 2338.2, "total_tokens": 30967408} {"current_steps": 16105, "total_steps": 40000, "loss": 0.1187, "lr": 3.2530960215169795e-05, "epoch": 2.627294232808549, "percentage": 40.26, "elapsed_time": "3:40:46", "remaining_time": "5:27:33", "throughput": 2338.5, "total_tokens": 30976256} {"current_steps": 16110, "total_steps": 40000, "loss": 0.0571, "lr": 3.2521598190721345e-05, "epoch": 2.628109960029366, "percentage": 40.27, "elapsed_time": "3:40:48", "remaining_time": "5:27:26", "throughput": 2338.8, "total_tokens": 30984976} {"current_steps": 16115, "total_steps": 40000, "loss": 0.0483, "lr": 3.251223500634792e-05, "epoch": 2.6289256872501836, "percentage": 40.29, "elapsed_time": "3:40:50", "remaining_time": "5:27:19", "throughput": 2339.06, "total_tokens": 30993232} {"current_steps": 16120, "total_steps": 40000, "loss": 0.0085, "lr": 3.2502870663493445e-05, "epoch": 2.6297414144710007, "percentage": 40.3, "elapsed_time": "3:40:52", "remaining_time": "5:27:11", "throughput": 2339.48, "total_tokens": 31003632} {"current_steps": 16125, "total_steps": 40000, "loss": 0.0417, "lr": 3.249350516360203e-05, "epoch": 2.6305571416918183, "percentage": 40.31, "elapsed_time": "3:40:54", "remaining_time": "5:27:04", "throughput": 2339.87, "total_tokens": 31013664} {"current_steps": 16130, "total_steps": 40000, "loss": 0.0031, "lr": 3.248413850811797e-05, "epoch": 2.631372868912636, "percentage": 40.33, "elapsed_time": "3:40:56", "remaining_time": "5:26:57", "throughput": 2340.31, "total_tokens": 31024400} {"current_steps": 16135, "total_steps": 40000, "loss": 0.0637, "lr": 3.2474770698485677e-05, "epoch": 2.632188596133453, "percentage": 40.34, "elapsed_time": "3:40:58", "remaining_time": "5:26:50", "throughput": 2340.67, "total_tokens": 31033984} {"current_steps": 16140, "total_steps": 40000, "loss": 0.0541, "lr": 3.246540173614983e-05, "epoch": 2.63300432335427, "percentage": 40.35, "elapsed_time": "3:41:00", "remaining_time": "5:26:43", "throughput": 2340.98, "total_tokens": 31042848} {"current_steps": 16145, "total_steps": 40000, "loss": 0.0072, "lr": 3.2456031622555197e-05, "epoch": 2.6338200505750877, "percentage": 40.36, "elapsed_time": "3:41:02", "remaining_time": "5:26:36", "throughput": 2341.46, "total_tokens": 31054128} {"current_steps": 16150, "total_steps": 40000, "loss": 0.358, "lr": 3.2446660359146794e-05, "epoch": 2.6346357777959053, "percentage": 40.38, "elapsed_time": "3:41:04", "remaining_time": "5:26:29", "throughput": 2341.85, "total_tokens": 31064128} {"current_steps": 16155, "total_steps": 40000, "loss": 0.08, "lr": 3.2437287947369786e-05, "epoch": 2.6354515050167224, "percentage": 40.39, "elapsed_time": "3:41:06", "remaining_time": "5:26:22", "throughput": 2342.25, "total_tokens": 31074240} {"current_steps": 16160, "total_steps": 40000, "loss": 0.1286, "lr": 3.2427914388669525e-05, "epoch": 2.6362672322375396, "percentage": 40.4, "elapsed_time": "3:41:08", "remaining_time": "5:26:14", "throughput": 2342.62, "total_tokens": 31084000} {"current_steps": 16165, "total_steps": 40000, "loss": 0.1036, "lr": 3.241853968449151e-05, "epoch": 2.637082959458357, "percentage": 40.41, "elapsed_time": "3:41:10", "remaining_time": "5:26:07", "throughput": 2342.98, "total_tokens": 31093552} {"current_steps": 16170, "total_steps": 40000, "loss": 0.0507, "lr": 3.240916383628144e-05, "epoch": 2.6378986866791747, "percentage": 40.42, "elapsed_time": "3:41:13", "remaining_time": "5:26:00", "throughput": 2343.35, "total_tokens": 31103360} {"current_steps": 16175, "total_steps": 40000, "loss": 0.1057, "lr": 3.239978684548521e-05, "epoch": 2.638714413899992, "percentage": 40.44, "elapsed_time": "3:41:15", "remaining_time": "5:25:53", "throughput": 2343.72, "total_tokens": 31113136} {"current_steps": 16180, "total_steps": 40000, "loss": 0.0721, "lr": 3.239040871354885e-05, "epoch": 2.639530141120809, "percentage": 40.45, "elapsed_time": "3:41:17", "remaining_time": "5:25:46", "throughput": 2344.12, "total_tokens": 31123280} {"current_steps": 16185, "total_steps": 40000, "loss": 0.0272, "lr": 3.2381029441918596e-05, "epoch": 2.6403458683416265, "percentage": 40.46, "elapsed_time": "3:41:19", "remaining_time": "5:25:39", "throughput": 2344.49, "total_tokens": 31133056} {"current_steps": 16190, "total_steps": 40000, "loss": 0.0657, "lr": 3.2371649032040845e-05, "epoch": 2.641161595562444, "percentage": 40.48, "elapsed_time": "3:41:21", "remaining_time": "5:25:32", "throughput": 2344.89, "total_tokens": 31143168} {"current_steps": 16195, "total_steps": 40000, "loss": 0.2079, "lr": 3.2362267485362174e-05, "epoch": 2.6419773227832613, "percentage": 40.49, "elapsed_time": "3:41:23", "remaining_time": "5:25:25", "throughput": 2345.29, "total_tokens": 31153328} {"current_steps": 16200, "total_steps": 40000, "loss": 0.071, "lr": 3.235288480332934e-05, "epoch": 2.6427930500040784, "percentage": 40.5, "elapsed_time": "3:41:25", "remaining_time": "5:25:18", "throughput": 2345.76, "total_tokens": 31164304} {"current_steps": 16200, "total_steps": 40000, "eval_loss": 0.16527904570102692, "epoch": 2.6427930500040784, "percentage": 40.5, "elapsed_time": "3:42:45", "remaining_time": "5:27:16", "throughput": 2331.62, "total_tokens": 31164304} {"current_steps": 16205, "total_steps": 40000, "loss": 0.0552, "lr": 3.234350098738927e-05, "epoch": 2.643608777224896, "percentage": 40.51, "elapsed_time": "3:42:49", "remaining_time": "5:27:11", "throughput": 2331.69, "total_tokens": 31174192} {"current_steps": 16210, "total_steps": 40000, "loss": 0.2037, "lr": 3.233411603898906e-05, "epoch": 2.6444245044457135, "percentage": 40.52, "elapsed_time": "3:42:51", "remaining_time": "5:27:04", "throughput": 2332.07, "total_tokens": 31184064} {"current_steps": 16215, "total_steps": 40000, "loss": 0.0775, "lr": 3.232472995957599e-05, "epoch": 2.6452402316665307, "percentage": 40.54, "elapsed_time": "3:42:53", "remaining_time": "5:26:57", "throughput": 2332.44, "total_tokens": 31193760} {"current_steps": 16220, "total_steps": 40000, "loss": 0.0334, "lr": 3.231534275059751e-05, "epoch": 2.6460559588873482, "percentage": 40.55, "elapsed_time": "3:42:56", "remaining_time": "5:26:50", "throughput": 2332.74, "total_tokens": 31203232} {"current_steps": 16225, "total_steps": 40000, "loss": 0.1469, "lr": 3.230595441350125e-05, "epoch": 2.6468716861081654, "percentage": 40.56, "elapsed_time": "3:42:58", "remaining_time": "5:26:43", "throughput": 2333.12, "total_tokens": 31213088} {"current_steps": 16230, "total_steps": 40000, "loss": 0.0487, "lr": 3.2296564949735e-05, "epoch": 2.647687413328983, "percentage": 40.58, "elapsed_time": "3:43:00", "remaining_time": "5:26:36", "throughput": 2333.46, "total_tokens": 31222496} {"current_steps": 16235, "total_steps": 40000, "loss": 0.12, "lr": 3.228717436074675e-05, "epoch": 2.6485031405498, "percentage": 40.59, "elapsed_time": "3:43:02", "remaining_time": "5:26:29", "throughput": 2333.78, "total_tokens": 31231584} {"current_steps": 16240, "total_steps": 40000, "loss": 0.1274, "lr": 3.227778264798463e-05, "epoch": 2.6493188677706176, "percentage": 40.6, "elapsed_time": "3:43:04", "remaining_time": "5:26:22", "throughput": 2334.08, "total_tokens": 31240416} {"current_steps": 16245, "total_steps": 40000, "loss": 0.0589, "lr": 3.226838981289698e-05, "epoch": 2.6501345949914348, "percentage": 40.61, "elapsed_time": "3:43:06", "remaining_time": "5:26:15", "throughput": 2334.48, "total_tokens": 31250608} {"current_steps": 16250, "total_steps": 40000, "loss": 0.0284, "lr": 3.225899585693227e-05, "epoch": 2.6509503222122524, "percentage": 40.62, "elapsed_time": "3:43:08", "remaining_time": "5:26:07", "throughput": 2334.82, "total_tokens": 31260016} {"current_steps": 16255, "total_steps": 40000, "loss": 0.0443, "lr": 3.224960078153918e-05, "epoch": 2.6517660494330695, "percentage": 40.64, "elapsed_time": "3:43:10", "remaining_time": "5:26:00", "throughput": 2335.25, "total_tokens": 31270576} {"current_steps": 16260, "total_steps": 40000, "loss": 0.0764, "lr": 3.224020458816655e-05, "epoch": 2.652581776653887, "percentage": 40.65, "elapsed_time": "3:43:12", "remaining_time": "5:25:53", "throughput": 2335.54, "total_tokens": 31279200} {"current_steps": 16265, "total_steps": 40000, "loss": 0.127, "lr": 3.223080727826337e-05, "epoch": 2.653397503874704, "percentage": 40.66, "elapsed_time": "3:43:14", "remaining_time": "5:25:46", "throughput": 2335.92, "total_tokens": 31289232} {"current_steps": 16270, "total_steps": 40000, "loss": 0.12, "lr": 3.222140885327885e-05, "epoch": 2.6542132310955218, "percentage": 40.67, "elapsed_time": "3:43:16", "remaining_time": "5:25:39", "throughput": 2336.32, "total_tokens": 31299328} {"current_steps": 16275, "total_steps": 40000, "loss": 0.0996, "lr": 3.221200931466234e-05, "epoch": 2.655028958316339, "percentage": 40.69, "elapsed_time": "3:43:18", "remaining_time": "5:25:32", "throughput": 2336.7, "total_tokens": 31309280} {"current_steps": 16280, "total_steps": 40000, "loss": 0.0514, "lr": 3.220260866386336e-05, "epoch": 2.6558446855371565, "percentage": 40.7, "elapsed_time": "3:43:20", "remaining_time": "5:25:25", "throughput": 2337.05, "total_tokens": 31318784} {"current_steps": 16285, "total_steps": 40000, "loss": 0.1098, "lr": 3.21932069023316e-05, "epoch": 2.6566604127579736, "percentage": 40.71, "elapsed_time": "3:43:23", "remaining_time": "5:25:18", "throughput": 2337.42, "total_tokens": 31328608} {"current_steps": 16290, "total_steps": 40000, "loss": 0.0712, "lr": 3.218380403151695e-05, "epoch": 2.657476139978791, "percentage": 40.73, "elapsed_time": "3:43:25", "remaining_time": "5:25:11", "throughput": 2337.78, "total_tokens": 31338192} {"current_steps": 16295, "total_steps": 40000, "loss": 0.0077, "lr": 3.217440005286943e-05, "epoch": 2.6582918671996083, "percentage": 40.74, "elapsed_time": "3:43:27", "remaining_time": "5:25:03", "throughput": 2338.18, "total_tokens": 31348416} {"current_steps": 16300, "total_steps": 40000, "loss": 0.0483, "lr": 3.216499496783928e-05, "epoch": 2.659107594420426, "percentage": 40.75, "elapsed_time": "3:43:29", "remaining_time": "5:24:56", "throughput": 2338.54, "total_tokens": 31358128} {"current_steps": 16305, "total_steps": 40000, "loss": 0.0195, "lr": 3.2155588777876856e-05, "epoch": 2.659923321641243, "percentage": 40.76, "elapsed_time": "3:43:31", "remaining_time": "5:24:49", "throughput": 2338.78, "total_tokens": 31366144} {"current_steps": 16310, "total_steps": 40000, "loss": 0.1207, "lr": 3.214618148443273e-05, "epoch": 2.6607390488620606, "percentage": 40.77, "elapsed_time": "3:43:33", "remaining_time": "5:24:42", "throughput": 2339.11, "total_tokens": 31375456} {"current_steps": 16315, "total_steps": 40000, "loss": 0.0223, "lr": 3.2136773088957595e-05, "epoch": 2.6615547760828777, "percentage": 40.79, "elapsed_time": "3:43:35", "remaining_time": "5:24:35", "throughput": 2339.42, "total_tokens": 31384432} {"current_steps": 16320, "total_steps": 40000, "loss": 0.2218, "lr": 3.2127363592902374e-05, "epoch": 2.6623705033036953, "percentage": 40.8, "elapsed_time": "3:43:37", "remaining_time": "5:24:28", "throughput": 2339.87, "total_tokens": 31395216} {"current_steps": 16325, "total_steps": 40000, "loss": 0.0936, "lr": 3.211795299771812e-05, "epoch": 2.663186230524513, "percentage": 40.81, "elapsed_time": "3:43:39", "remaining_time": "5:24:21", "throughput": 2340.14, "total_tokens": 31403744} {"current_steps": 16330, "total_steps": 40000, "loss": 0.0825, "lr": 3.210854130485605e-05, "epoch": 2.66400195774533, "percentage": 40.83, "elapsed_time": "3:43:41", "remaining_time": "5:24:14", "throughput": 2340.55, "total_tokens": 31414032} {"current_steps": 16335, "total_steps": 40000, "loss": 0.1527, "lr": 3.209912851576759e-05, "epoch": 2.664817684966147, "percentage": 40.84, "elapsed_time": "3:43:43", "remaining_time": "5:24:07", "throughput": 2340.82, "total_tokens": 31422464} {"current_steps": 16340, "total_steps": 40000, "loss": 0.0249, "lr": 3.208971463190431e-05, "epoch": 2.6656334121869647, "percentage": 40.85, "elapsed_time": "3:43:45", "remaining_time": "5:24:00", "throughput": 2341.16, "total_tokens": 31431984} {"current_steps": 16345, "total_steps": 40000, "loss": 0.067, "lr": 3.208029965471793e-05, "epoch": 2.6664491394077823, "percentage": 40.86, "elapsed_time": "3:43:47", "remaining_time": "5:23:53", "throughput": 2341.59, "total_tokens": 31442480} {"current_steps": 16350, "total_steps": 40000, "loss": 0.0691, "lr": 3.2070883585660364e-05, "epoch": 2.6672648666285994, "percentage": 40.88, "elapsed_time": "3:43:49", "remaining_time": "5:23:46", "throughput": 2342.01, "total_tokens": 31453088} {"current_steps": 16355, "total_steps": 40000, "loss": 0.0069, "lr": 3.20614664261837e-05, "epoch": 2.6680805938494165, "percentage": 40.89, "elapsed_time": "3:43:51", "remaining_time": "5:23:39", "throughput": 2342.3, "total_tokens": 31461728} {"current_steps": 16360, "total_steps": 40000, "loss": 0.0756, "lr": 3.205204817774016e-05, "epoch": 2.668896321070234, "percentage": 40.9, "elapsed_time": "3:43:54", "remaining_time": "5:23:32", "throughput": 2342.65, "total_tokens": 31471312} {"current_steps": 16365, "total_steps": 40000, "loss": 0.1532, "lr": 3.204262884178218e-05, "epoch": 2.6697120482910517, "percentage": 40.91, "elapsed_time": "3:43:56", "remaining_time": "5:23:24", "throughput": 2343.09, "total_tokens": 31482096} {"current_steps": 16370, "total_steps": 40000, "loss": 0.0702, "lr": 3.2033208419762314e-05, "epoch": 2.670527775511869, "percentage": 40.92, "elapsed_time": "3:43:58", "remaining_time": "5:23:17", "throughput": 2343.45, "total_tokens": 31491632} {"current_steps": 16375, "total_steps": 40000, "loss": 0.0876, "lr": 3.2023786913133344e-05, "epoch": 2.671343502732686, "percentage": 40.94, "elapsed_time": "3:44:00", "remaining_time": "5:23:10", "throughput": 2343.75, "total_tokens": 31500608} {"current_steps": 16380, "total_steps": 40000, "loss": 0.0896, "lr": 3.201436432334816e-05, "epoch": 2.6721592299535035, "percentage": 40.95, "elapsed_time": "3:44:02", "remaining_time": "5:23:03", "throughput": 2344.1, "total_tokens": 31510192} {"current_steps": 16385, "total_steps": 40000, "loss": 0.0065, "lr": 3.2004940651859844e-05, "epoch": 2.672974957174321, "percentage": 40.96, "elapsed_time": "3:44:04", "remaining_time": "5:22:56", "throughput": 2344.5, "total_tokens": 31520352} {"current_steps": 16390, "total_steps": 40000, "loss": 0.1561, "lr": 3.1995515900121655e-05, "epoch": 2.6737906843951382, "percentage": 40.98, "elapsed_time": "3:44:06", "remaining_time": "5:22:49", "throughput": 2344.9, "total_tokens": 31530560} {"current_steps": 16395, "total_steps": 40000, "loss": 0.1499, "lr": 3.1986090069587e-05, "epoch": 2.6746064116159554, "percentage": 40.99, "elapsed_time": "3:44:08", "remaining_time": "5:22:42", "throughput": 2345.28, "total_tokens": 31540560} {"current_steps": 16400, "total_steps": 40000, "loss": 0.1059, "lr": 3.1976663161709466e-05, "epoch": 2.675422138836773, "percentage": 41.0, "elapsed_time": "3:44:10", "remaining_time": "5:22:35", "throughput": 2345.54, "total_tokens": 31548832} {"current_steps": 16400, "total_steps": 40000, "eval_loss": 0.17207415401935577, "epoch": 2.675422138836773, "percentage": 41.0, "elapsed_time": "3:45:31", "remaining_time": "5:24:31", "throughput": 2331.58, "total_tokens": 31548832} {"current_steps": 16405, "total_steps": 40000, "loss": 0.0054, "lr": 3.196723517794279e-05, "epoch": 2.6762378660575905, "percentage": 41.01, "elapsed_time": "3:45:34", "remaining_time": "5:24:26", "throughput": 2331.7, "total_tokens": 31559040} {"current_steps": 16410, "total_steps": 40000, "loss": 0.1335, "lr": 3.19578061197409e-05, "epoch": 2.6770535932784076, "percentage": 41.02, "elapsed_time": "3:45:36", "remaining_time": "5:24:19", "throughput": 2332.11, "total_tokens": 31569376} {"current_steps": 16415, "total_steps": 40000, "loss": 0.1549, "lr": 3.194837598855787e-05, "epoch": 2.677869320499225, "percentage": 41.04, "elapsed_time": "3:45:38", "remaining_time": "5:24:12", "throughput": 2332.46, "total_tokens": 31578896} {"current_steps": 16420, "total_steps": 40000, "loss": 0.0949, "lr": 3.193894478584794e-05, "epoch": 2.6786850477200423, "percentage": 41.05, "elapsed_time": "3:45:40", "remaining_time": "5:24:05", "throughput": 2332.82, "total_tokens": 31588560} {"current_steps": 16425, "total_steps": 40000, "loss": 0.169, "lr": 3.192951251306553e-05, "epoch": 2.67950077494086, "percentage": 41.06, "elapsed_time": "3:45:43", "remaining_time": "5:23:58", "throughput": 2333.2, "total_tokens": 31598592} {"current_steps": 16430, "total_steps": 40000, "loss": 0.1302, "lr": 3.192007917166521e-05, "epoch": 2.680316502161677, "percentage": 41.08, "elapsed_time": "3:45:45", "remaining_time": "5:23:51", "throughput": 2333.6, "total_tokens": 31608800} {"current_steps": 16435, "total_steps": 40000, "loss": 0.1122, "lr": 3.191064476310171e-05, "epoch": 2.6811322293824946, "percentage": 41.09, "elapsed_time": "3:45:47", "remaining_time": "5:23:44", "throughput": 2333.97, "total_tokens": 31618656} {"current_steps": 16440, "total_steps": 40000, "loss": 0.3881, "lr": 3.1901209288829944e-05, "epoch": 2.6819479566033118, "percentage": 41.1, "elapsed_time": "3:45:49", "remaining_time": "5:23:37", "throughput": 2334.37, "total_tokens": 31628944} {"current_steps": 16445, "total_steps": 40000, "loss": 0.0911, "lr": 3.1891772750304985e-05, "epoch": 2.6827636838241293, "percentage": 41.11, "elapsed_time": "3:45:51", "remaining_time": "5:23:30", "throughput": 2334.72, "total_tokens": 31638384} {"current_steps": 16450, "total_steps": 40000, "loss": 0.0361, "lr": 3.188233514898206e-05, "epoch": 2.6835794110449465, "percentage": 41.12, "elapsed_time": "3:45:53", "remaining_time": "5:23:23", "throughput": 2335.07, "total_tokens": 31648016} {"current_steps": 16455, "total_steps": 40000, "loss": 0.1001, "lr": 3.187289648631657e-05, "epoch": 2.684395138265764, "percentage": 41.14, "elapsed_time": "3:45:55", "remaining_time": "5:23:16", "throughput": 2335.5, "total_tokens": 31658640} {"current_steps": 16460, "total_steps": 40000, "loss": 0.1103, "lr": 3.186345676376406e-05, "epoch": 2.685210865486581, "percentage": 41.15, "elapsed_time": "3:45:57", "remaining_time": "5:23:09", "throughput": 2335.89, "total_tokens": 31668736} {"current_steps": 16465, "total_steps": 40000, "loss": 0.0658, "lr": 3.1854015982780275e-05, "epoch": 2.6860265927073987, "percentage": 41.16, "elapsed_time": "3:45:59", "remaining_time": "5:23:01", "throughput": 2336.3, "total_tokens": 31679168} {"current_steps": 16470, "total_steps": 40000, "loss": 0.0079, "lr": 3.1844574144821084e-05, "epoch": 2.686842319928216, "percentage": 41.17, "elapsed_time": "3:46:01", "remaining_time": "5:22:54", "throughput": 2336.68, "total_tokens": 31689216} {"current_steps": 16475, "total_steps": 40000, "loss": 0.0682, "lr": 3.1835131251342554e-05, "epoch": 2.6876580471490334, "percentage": 41.19, "elapsed_time": "3:46:03", "remaining_time": "5:22:47", "throughput": 2337.09, "total_tokens": 31699600} {"current_steps": 16480, "total_steps": 40000, "loss": 0.0197, "lr": 3.182568730380089e-05, "epoch": 2.6884737743698506, "percentage": 41.2, "elapsed_time": "3:46:05", "remaining_time": "5:22:40", "throughput": 2337.35, "total_tokens": 31707920} {"current_steps": 16485, "total_steps": 40000, "loss": 0.011, "lr": 3.181624230365245e-05, "epoch": 2.689289501590668, "percentage": 41.21, "elapsed_time": "3:46:07", "remaining_time": "5:22:33", "throughput": 2337.71, "total_tokens": 31717648} {"current_steps": 16490, "total_steps": 40000, "loss": 0.0856, "lr": 3.180679625235381e-05, "epoch": 2.6901052288114853, "percentage": 41.23, "elapsed_time": "3:46:09", "remaining_time": "5:22:26", "throughput": 2338.05, "total_tokens": 31727008} {"current_steps": 16495, "total_steps": 40000, "loss": 0.0223, "lr": 3.1797349151361646e-05, "epoch": 2.690920956032303, "percentage": 41.24, "elapsed_time": "3:46:11", "remaining_time": "5:22:19", "throughput": 2338.44, "total_tokens": 31737200} {"current_steps": 16500, "total_steps": 40000, "loss": 0.1098, "lr": 3.178790100213281e-05, "epoch": 2.6917366832531204, "percentage": 41.25, "elapsed_time": "3:46:14", "remaining_time": "5:22:12", "throughput": 2338.8, "total_tokens": 31746896} {"current_steps": 16505, "total_steps": 40000, "loss": 0.1301, "lr": 3.1778451806124346e-05, "epoch": 2.6925524104739376, "percentage": 41.26, "elapsed_time": "3:46:16", "remaining_time": "5:22:05", "throughput": 2339.24, "total_tokens": 31757776} {"current_steps": 16510, "total_steps": 40000, "loss": 0.0084, "lr": 3.176900156479342e-05, "epoch": 2.6933681376947547, "percentage": 41.27, "elapsed_time": "3:46:18", "remaining_time": "5:21:58", "throughput": 2339.69, "total_tokens": 31768720} {"current_steps": 16515, "total_steps": 40000, "loss": 0.0185, "lr": 3.17595502795974e-05, "epoch": 2.6941838649155723, "percentage": 41.29, "elapsed_time": "3:46:20", "remaining_time": "5:21:51", "throughput": 2340.0, "total_tokens": 31777712} {"current_steps": 16520, "total_steps": 40000, "loss": 0.0089, "lr": 3.175009795199377e-05, "epoch": 2.69499959213639, "percentage": 41.3, "elapsed_time": "3:46:22", "remaining_time": "5:21:44", "throughput": 2340.32, "total_tokens": 31786944} {"current_steps": 16525, "total_steps": 40000, "loss": 0.0476, "lr": 3.1740644583440224e-05, "epoch": 2.695815319357207, "percentage": 41.31, "elapsed_time": "3:46:24", "remaining_time": "5:21:37", "throughput": 2340.7, "total_tokens": 31796976} {"current_steps": 16530, "total_steps": 40000, "loss": 0.0695, "lr": 3.173119017539457e-05, "epoch": 2.696631046578024, "percentage": 41.33, "elapsed_time": "3:46:26", "remaining_time": "5:21:30", "throughput": 2341.09, "total_tokens": 31807088} {"current_steps": 16535, "total_steps": 40000, "loss": 0.0923, "lr": 3.172173472931479e-05, "epoch": 2.6974467737988417, "percentage": 41.34, "elapsed_time": "3:46:28", "remaining_time": "5:21:23", "throughput": 2341.49, "total_tokens": 31817264} {"current_steps": 16540, "total_steps": 40000, "loss": 0.0275, "lr": 3.1712278246659055e-05, "epoch": 2.6982625010196593, "percentage": 41.35, "elapsed_time": "3:46:30", "remaining_time": "5:21:16", "throughput": 2341.8, "total_tokens": 31826416} {"current_steps": 16545, "total_steps": 40000, "loss": 0.049, "lr": 3.170282072888566e-05, "epoch": 2.6990782282404764, "percentage": 41.36, "elapsed_time": "3:46:32", "remaining_time": "5:21:09", "throughput": 2342.07, "total_tokens": 31834832} {"current_steps": 16550, "total_steps": 40000, "loss": 0.0939, "lr": 3.169336217745307e-05, "epoch": 2.6998939554612935, "percentage": 41.38, "elapsed_time": "3:46:34", "remaining_time": "5:21:02", "throughput": 2342.46, "total_tokens": 31845072} {"current_steps": 16555, "total_steps": 40000, "loss": 0.0032, "lr": 3.1683902593819924e-05, "epoch": 2.700709682682111, "percentage": 41.39, "elapsed_time": "3:46:36", "remaining_time": "5:20:55", "throughput": 2342.91, "total_tokens": 31855920} {"current_steps": 16560, "total_steps": 40000, "loss": 0.0013, "lr": 3.1674441979445e-05, "epoch": 2.7015254099029287, "percentage": 41.4, "elapsed_time": "3:46:38", "remaining_time": "5:20:48", "throughput": 2343.31, "total_tokens": 31866320} {"current_steps": 16565, "total_steps": 40000, "loss": 0.0994, "lr": 3.166498033578725e-05, "epoch": 2.702341137123746, "percentage": 41.41, "elapsed_time": "3:46:40", "remaining_time": "5:20:41", "throughput": 2343.7, "total_tokens": 31876384} {"current_steps": 16570, "total_steps": 40000, "loss": 0.1874, "lr": 3.165551766430578e-05, "epoch": 2.703156864344563, "percentage": 41.42, "elapsed_time": "3:46:42", "remaining_time": "5:20:34", "throughput": 2344.05, "total_tokens": 31885936} {"current_steps": 16575, "total_steps": 40000, "loss": 0.0451, "lr": 3.164605396645984e-05, "epoch": 2.7039725915653805, "percentage": 41.44, "elapsed_time": "3:46:45", "remaining_time": "5:20:27", "throughput": 2344.47, "total_tokens": 31896592} {"current_steps": 16580, "total_steps": 40000, "loss": 0.0738, "lr": 3.163658924370886e-05, "epoch": 2.704788318786198, "percentage": 41.45, "elapsed_time": "3:46:47", "remaining_time": "5:20:20", "throughput": 2344.9, "total_tokens": 31907296} {"current_steps": 16585, "total_steps": 40000, "loss": 0.0852, "lr": 3.1627123497512415e-05, "epoch": 2.705604046007015, "percentage": 41.46, "elapsed_time": "3:46:49", "remaining_time": "5:20:13", "throughput": 2345.26, "total_tokens": 31917024} {"current_steps": 16590, "total_steps": 40000, "loss": 0.0578, "lr": 3.1617656729330245e-05, "epoch": 2.7064197732278323, "percentage": 41.48, "elapsed_time": "3:46:51", "remaining_time": "5:20:06", "throughput": 2345.52, "total_tokens": 31925408} {"current_steps": 16595, "total_steps": 40000, "loss": 0.0783, "lr": 3.1608188940622255e-05, "epoch": 2.70723550044865, "percentage": 41.49, "elapsed_time": "3:46:53", "remaining_time": "5:19:59", "throughput": 2345.79, "total_tokens": 31933904} {"current_steps": 16600, "total_steps": 40000, "loss": 0.0747, "lr": 3.159872013284847e-05, "epoch": 2.7080512276694675, "percentage": 41.5, "elapsed_time": "3:46:55", "remaining_time": "5:19:52", "throughput": 2346.15, "total_tokens": 31943568} {"current_steps": 16600, "total_steps": 40000, "eval_loss": 0.1902150809764862, "epoch": 2.7080512276694675, "percentage": 41.5, "elapsed_time": "3:48:15", "remaining_time": "5:21:46", "throughput": 2332.33, "total_tokens": 31943568} {"current_steps": 16605, "total_steps": 40000, "loss": 0.09, "lr": 3.1589250307469134e-05, "epoch": 2.7088669548902846, "percentage": 41.51, "elapsed_time": "3:48:19", "remaining_time": "5:21:41", "throughput": 2332.37, "total_tokens": 31953312} {"current_steps": 16610, "total_steps": 40000, "loss": 0.0663, "lr": 3.1579779465944586e-05, "epoch": 2.709682682111102, "percentage": 41.52, "elapsed_time": "3:48:21", "remaining_time": "5:21:34", "throughput": 2332.77, "total_tokens": 31963616} {"current_steps": 16615, "total_steps": 40000, "loss": 0.0595, "lr": 3.1570307609735363e-05, "epoch": 2.7104984093319193, "percentage": 41.54, "elapsed_time": "3:48:24", "remaining_time": "5:21:27", "throughput": 2333.15, "total_tokens": 31973632} {"current_steps": 16620, "total_steps": 40000, "loss": 0.1544, "lr": 3.156083474030213e-05, "epoch": 2.711314136552737, "percentage": 41.55, "elapsed_time": "3:48:26", "remaining_time": "5:21:20", "throughput": 2333.55, "total_tokens": 31983904} {"current_steps": 16625, "total_steps": 40000, "loss": 0.0984, "lr": 3.155136085910573e-05, "epoch": 2.712129863773554, "percentage": 41.56, "elapsed_time": "3:48:28", "remaining_time": "5:21:13", "throughput": 2333.94, "total_tokens": 31994096} {"current_steps": 16630, "total_steps": 40000, "loss": 0.2009, "lr": 3.154188596760717e-05, "epoch": 2.7129455909943716, "percentage": 41.58, "elapsed_time": "3:48:30", "remaining_time": "5:21:06", "throughput": 2334.28, "total_tokens": 32003600} {"current_steps": 16635, "total_steps": 40000, "loss": 0.1312, "lr": 3.153241006726757e-05, "epoch": 2.7137613182151887, "percentage": 41.59, "elapsed_time": "3:48:32", "remaining_time": "5:20:59", "throughput": 2334.62, "total_tokens": 32013168} {"current_steps": 16640, "total_steps": 40000, "loss": 0.1565, "lr": 3.152293315954825e-05, "epoch": 2.7145770454360063, "percentage": 41.6, "elapsed_time": "3:48:34", "remaining_time": "5:20:52", "throughput": 2334.99, "total_tokens": 32023008} {"current_steps": 16645, "total_steps": 40000, "loss": 0.2162, "lr": 3.1513455245910666e-05, "epoch": 2.7153927726568234, "percentage": 41.61, "elapsed_time": "3:48:36", "remaining_time": "5:20:45", "throughput": 2335.28, "total_tokens": 32031840} {"current_steps": 16650, "total_steps": 40000, "loss": 0.1436, "lr": 3.150397632781643e-05, "epoch": 2.716208499877641, "percentage": 41.62, "elapsed_time": "3:48:38", "remaining_time": "5:20:38", "throughput": 2335.66, "total_tokens": 32041936} {"current_steps": 16655, "total_steps": 40000, "loss": 0.0385, "lr": 3.149449640672731e-05, "epoch": 2.717024227098458, "percentage": 41.64, "elapsed_time": "3:48:40", "remaining_time": "5:20:31", "throughput": 2335.97, "total_tokens": 32050896} {"current_steps": 16660, "total_steps": 40000, "loss": 0.1869, "lr": 3.148501548410523e-05, "epoch": 2.7178399543192757, "percentage": 41.65, "elapsed_time": "3:48:42", "remaining_time": "5:20:24", "throughput": 2336.33, "total_tokens": 32060736} {"current_steps": 16665, "total_steps": 40000, "loss": 0.1175, "lr": 3.1475533561412256e-05, "epoch": 2.718655681540093, "percentage": 41.66, "elapsed_time": "3:48:44", "remaining_time": "5:20:17", "throughput": 2336.64, "total_tokens": 32069792} {"current_steps": 16670, "total_steps": 40000, "loss": 0.0788, "lr": 3.146605064011065e-05, "epoch": 2.7194714087609104, "percentage": 41.68, "elapsed_time": "3:48:46", "remaining_time": "5:20:10", "throughput": 2336.98, "total_tokens": 32079344} {"current_steps": 16675, "total_steps": 40000, "loss": 0.0095, "lr": 3.145656672166277e-05, "epoch": 2.7202871359817276, "percentage": 41.69, "elapsed_time": "3:48:48", "remaining_time": "5:20:03", "throughput": 2337.26, "total_tokens": 32087984} {"current_steps": 16680, "total_steps": 40000, "loss": 0.079, "lr": 3.144708180753116e-05, "epoch": 2.721102863202545, "percentage": 41.7, "elapsed_time": "3:48:50", "remaining_time": "5:19:56", "throughput": 2337.61, "total_tokens": 32097648} {"current_steps": 16685, "total_steps": 40000, "loss": 0.034, "lr": 3.143759589917851e-05, "epoch": 2.7219185904233623, "percentage": 41.71, "elapsed_time": "3:48:53", "remaining_time": "5:19:50", "throughput": 2337.96, "total_tokens": 32107232} {"current_steps": 16690, "total_steps": 40000, "loss": 0.0862, "lr": 3.142810899806768e-05, "epoch": 2.72273431764418, "percentage": 41.73, "elapsed_time": "3:48:55", "remaining_time": "5:19:43", "throughput": 2338.28, "total_tokens": 32116496} {"current_steps": 16695, "total_steps": 40000, "loss": 0.0777, "lr": 3.141862110566166e-05, "epoch": 2.7235500448649974, "percentage": 41.74, "elapsed_time": "3:48:57", "remaining_time": "5:19:36", "throughput": 2338.51, "total_tokens": 32124512} {"current_steps": 16700, "total_steps": 40000, "loss": 0.0253, "lr": 3.1409132223423606e-05, "epoch": 2.7243657720858145, "percentage": 41.75, "elapsed_time": "3:48:59", "remaining_time": "5:19:29", "throughput": 2338.9, "total_tokens": 32134704} {"current_steps": 16705, "total_steps": 40000, "loss": 0.1311, "lr": 3.139964235281682e-05, "epoch": 2.7251814993066317, "percentage": 41.76, "elapsed_time": "3:49:01", "remaining_time": "5:19:22", "throughput": 2339.26, "total_tokens": 32144496} {"current_steps": 16710, "total_steps": 40000, "loss": 0.0063, "lr": 3.139015149530476e-05, "epoch": 2.7259972265274492, "percentage": 41.77, "elapsed_time": "3:49:03", "remaining_time": "5:19:15", "throughput": 2339.58, "total_tokens": 32153664} {"current_steps": 16715, "total_steps": 40000, "loss": 0.0418, "lr": 3.1380659652351034e-05, "epoch": 2.726812953748267, "percentage": 41.79, "elapsed_time": "3:49:05", "remaining_time": "5:19:08", "throughput": 2339.86, "total_tokens": 32162400} {"current_steps": 16720, "total_steps": 40000, "loss": 0.0844, "lr": 3.137116682541941e-05, "epoch": 2.727628680969084, "percentage": 41.8, "elapsed_time": "3:49:07", "remaining_time": "5:19:01", "throughput": 2340.25, "total_tokens": 32172544} {"current_steps": 16725, "total_steps": 40000, "loss": 0.005, "lr": 3.136167301597379e-05, "epoch": 2.728444408189901, "percentage": 41.81, "elapsed_time": "3:49:09", "remaining_time": "5:18:54", "throughput": 2340.57, "total_tokens": 32181792} {"current_steps": 16730, "total_steps": 40000, "loss": 0.0739, "lr": 3.1352178225478254e-05, "epoch": 2.7292601354107187, "percentage": 41.83, "elapsed_time": "3:49:11", "remaining_time": "5:18:47", "throughput": 2340.92, "total_tokens": 32191408} {"current_steps": 16735, "total_steps": 40000, "loss": 0.0587, "lr": 3.1342682455396996e-05, "epoch": 2.7300758626315362, "percentage": 41.84, "elapsed_time": "3:49:13", "remaining_time": "5:18:40", "throughput": 2341.22, "total_tokens": 32200432} {"current_steps": 16740, "total_steps": 40000, "loss": 0.1135, "lr": 3.133318570719441e-05, "epoch": 2.7308915898523534, "percentage": 41.85, "elapsed_time": "3:49:15", "remaining_time": "5:18:33", "throughput": 2341.67, "total_tokens": 32211424} {"current_steps": 16745, "total_steps": 40000, "loss": 0.0611, "lr": 3.132368798233499e-05, "epoch": 2.7317073170731705, "percentage": 41.86, "elapsed_time": "3:49:17", "remaining_time": "5:18:26", "throughput": 2342.04, "total_tokens": 32221408} {"current_steps": 16750, "total_steps": 40000, "loss": 0.0103, "lr": 3.131418928228342e-05, "epoch": 2.732523044293988, "percentage": 41.88, "elapsed_time": "3:49:19", "remaining_time": "5:18:19", "throughput": 2342.35, "total_tokens": 32230480} {"current_steps": 16755, "total_steps": 40000, "loss": 0.0995, "lr": 3.1304689608504514e-05, "epoch": 2.7333387715148056, "percentage": 41.89, "elapsed_time": "3:49:21", "remaining_time": "5:18:12", "throughput": 2342.66, "total_tokens": 32239664} {"current_steps": 16760, "total_steps": 40000, "loss": 0.0556, "lr": 3.129518896246324e-05, "epoch": 2.7341544987356228, "percentage": 41.9, "elapsed_time": "3:49:24", "remaining_time": "5:18:05", "throughput": 2343.0, "total_tokens": 32249072} {"current_steps": 16765, "total_steps": 40000, "loss": 0.2106, "lr": 3.128568734562472e-05, "epoch": 2.73497022595644, "percentage": 41.91, "elapsed_time": "3:49:26", "remaining_time": "5:17:58", "throughput": 2343.51, "total_tokens": 32260960} {"current_steps": 16770, "total_steps": 40000, "loss": 0.0562, "lr": 3.127618475945421e-05, "epoch": 2.7357859531772575, "percentage": 41.93, "elapsed_time": "3:49:28", "remaining_time": "5:17:51", "throughput": 2343.94, "total_tokens": 32271696} {"current_steps": 16775, "total_steps": 40000, "loss": 0.0064, "lr": 3.126668120541715e-05, "epoch": 2.736601680398075, "percentage": 41.94, "elapsed_time": "3:49:30", "remaining_time": "5:17:44", "throughput": 2344.28, "total_tokens": 32281328} {"current_steps": 16780, "total_steps": 40000, "loss": 0.1509, "lr": 3.1257176684979096e-05, "epoch": 2.737417407618892, "percentage": 41.95, "elapsed_time": "3:49:32", "remaining_time": "5:17:37", "throughput": 2344.7, "total_tokens": 32291984} {"current_steps": 16785, "total_steps": 40000, "loss": 0.2175, "lr": 3.124767119960576e-05, "epoch": 2.7382331348397098, "percentage": 41.96, "elapsed_time": "3:49:34", "remaining_time": "5:17:31", "throughput": 2344.94, "total_tokens": 32300128} {"current_steps": 16790, "total_steps": 40000, "loss": 0.045, "lr": 3.123816475076301e-05, "epoch": 2.739048862060527, "percentage": 41.98, "elapsed_time": "3:49:36", "remaining_time": "5:17:24", "throughput": 2345.36, "total_tokens": 32310656} {"current_steps": 16795, "total_steps": 40000, "loss": 0.0592, "lr": 3.122865733991687e-05, "epoch": 2.7398645892813445, "percentage": 41.99, "elapsed_time": "3:49:38", "remaining_time": "5:17:17", "throughput": 2345.63, "total_tokens": 32319248} {"current_steps": 16800, "total_steps": 40000, "loss": 0.0031, "lr": 3.1219148968533486e-05, "epoch": 2.7406803165021616, "percentage": 42.0, "elapsed_time": "3:49:40", "remaining_time": "5:17:10", "throughput": 2345.85, "total_tokens": 32327088} {"current_steps": 16800, "total_steps": 40000, "eval_loss": 0.17834709584712982, "epoch": 2.7406803165021616, "percentage": 42.0, "elapsed_time": "3:51:01", "remaining_time": "5:19:01", "throughput": 2332.21, "total_tokens": 32327088} {"current_steps": 16805, "total_steps": 40000, "loss": 0.0551, "lr": 3.120963963807918e-05, "epoch": 2.741496043722979, "percentage": 42.01, "elapsed_time": "3:51:04", "remaining_time": "5:18:56", "throughput": 2332.32, "total_tokens": 32337040} {"current_steps": 16810, "total_steps": 40000, "loss": 0.096, "lr": 3.12001293500204e-05, "epoch": 2.7423117709437963, "percentage": 42.02, "elapsed_time": "3:51:06", "remaining_time": "5:18:49", "throughput": 2332.68, "total_tokens": 32346800} {"current_steps": 16815, "total_steps": 40000, "loss": 0.2332, "lr": 3.1190618105823765e-05, "epoch": 2.743127498164614, "percentage": 42.04, "elapsed_time": "3:51:08", "remaining_time": "5:18:42", "throughput": 2332.97, "total_tokens": 32355728} {"current_steps": 16820, "total_steps": 40000, "loss": 0.0427, "lr": 3.118110590695603e-05, "epoch": 2.743943225385431, "percentage": 42.05, "elapsed_time": "3:51:10", "remaining_time": "5:18:35", "throughput": 2333.26, "total_tokens": 32364528} {"current_steps": 16825, "total_steps": 40000, "loss": 0.015, "lr": 3.117159275488407e-05, "epoch": 2.7447589526062486, "percentage": 42.06, "elapsed_time": "3:51:13", "remaining_time": "5:18:28", "throughput": 2333.63, "total_tokens": 32374448} {"current_steps": 16830, "total_steps": 40000, "loss": 0.223, "lr": 3.1162078651074956e-05, "epoch": 2.7455746798270657, "percentage": 42.08, "elapsed_time": "3:51:15", "remaining_time": "5:18:21", "throughput": 2333.95, "total_tokens": 32383760} {"current_steps": 16835, "total_steps": 40000, "loss": 0.0936, "lr": 3.1152563596995885e-05, "epoch": 2.7463904070478833, "percentage": 42.09, "elapsed_time": "3:51:17", "remaining_time": "5:18:14", "throughput": 2334.31, "total_tokens": 32393632} {"current_steps": 16840, "total_steps": 40000, "loss": 0.0978, "lr": 3.1143047594114186e-05, "epoch": 2.7472061342687004, "percentage": 42.1, "elapsed_time": "3:51:19", "remaining_time": "5:18:08", "throughput": 2334.67, "total_tokens": 32403456} {"current_steps": 16845, "total_steps": 40000, "loss": 0.0496, "lr": 3.113353064389734e-05, "epoch": 2.748021861489518, "percentage": 42.11, "elapsed_time": "3:51:21", "remaining_time": "5:18:01", "throughput": 2334.99, "total_tokens": 32412656} {"current_steps": 16850, "total_steps": 40000, "loss": 0.0461, "lr": 3.1124012747812993e-05, "epoch": 2.748837588710335, "percentage": 42.12, "elapsed_time": "3:51:23", "remaining_time": "5:17:54", "throughput": 2335.39, "total_tokens": 32422992} {"current_steps": 16855, "total_steps": 40000, "loss": 0.0732, "lr": 3.1114493907328936e-05, "epoch": 2.7496533159311527, "percentage": 42.14, "elapsed_time": "3:51:25", "remaining_time": "5:17:47", "throughput": 2335.75, "total_tokens": 32432816} {"current_steps": 16860, "total_steps": 40000, "loss": 0.0881, "lr": 3.110497412391306e-05, "epoch": 2.75046904315197, "percentage": 42.15, "elapsed_time": "3:51:27", "remaining_time": "5:17:40", "throughput": 2336.1, "total_tokens": 32442528} {"current_steps": 16865, "total_steps": 40000, "loss": 0.0321, "lr": 3.1095453399033466e-05, "epoch": 2.7512847703727874, "percentage": 42.16, "elapsed_time": "3:51:29", "remaining_time": "5:17:33", "throughput": 2336.31, "total_tokens": 32450272} {"current_steps": 16870, "total_steps": 40000, "loss": 0.0293, "lr": 3.108593173415835e-05, "epoch": 2.7521004975936045, "percentage": 42.18, "elapsed_time": "3:51:31", "remaining_time": "5:17:26", "throughput": 2336.74, "total_tokens": 32461152} {"current_steps": 16875, "total_steps": 40000, "loss": 0.241, "lr": 3.107640913075609e-05, "epoch": 2.752916224814422, "percentage": 42.19, "elapsed_time": "3:51:33", "remaining_time": "5:17:19", "throughput": 2337.09, "total_tokens": 32470832} {"current_steps": 16880, "total_steps": 40000, "loss": 0.2238, "lr": 3.106688559029517e-05, "epoch": 2.7537319520352392, "percentage": 42.2, "elapsed_time": "3:51:35", "remaining_time": "5:17:12", "throughput": 2337.41, "total_tokens": 32480048} {"current_steps": 16885, "total_steps": 40000, "loss": 0.1362, "lr": 3.105736111424425e-05, "epoch": 2.754547679256057, "percentage": 42.21, "elapsed_time": "3:51:37", "remaining_time": "5:17:05", "throughput": 2337.7, "total_tokens": 32488848} {"current_steps": 16890, "total_steps": 40000, "loss": 0.146, "lr": 3.1047835704072136e-05, "epoch": 2.7553634064768744, "percentage": 42.23, "elapsed_time": "3:51:39", "remaining_time": "5:16:58", "throughput": 2337.96, "total_tokens": 32497328} {"current_steps": 16895, "total_steps": 40000, "loss": 0.1356, "lr": 3.103830936124775e-05, "epoch": 2.7561791336976915, "percentage": 42.24, "elapsed_time": "3:51:41", "remaining_time": "5:16:51", "throughput": 2338.3, "total_tokens": 32506832} {"current_steps": 16900, "total_steps": 40000, "loss": 0.1053, "lr": 3.102878208724018e-05, "epoch": 2.7569948609185086, "percentage": 42.25, "elapsed_time": "3:51:44", "remaining_time": "5:16:44", "throughput": 2338.73, "total_tokens": 32517664} {"current_steps": 16905, "total_steps": 40000, "loss": 0.0794, "lr": 3.101925388351865e-05, "epoch": 2.7578105881393262, "percentage": 42.26, "elapsed_time": "3:51:46", "remaining_time": "5:16:37", "throughput": 2339.09, "total_tokens": 32527504} {"current_steps": 16910, "total_steps": 40000, "loss": 0.0519, "lr": 3.1009724751552515e-05, "epoch": 2.758626315360144, "percentage": 42.27, "elapsed_time": "3:51:48", "remaining_time": "5:16:31", "throughput": 2339.42, "total_tokens": 32536928} {"current_steps": 16915, "total_steps": 40000, "loss": 0.0608, "lr": 3.100019469281131e-05, "epoch": 2.759442042580961, "percentage": 42.29, "elapsed_time": "3:51:50", "remaining_time": "5:16:24", "throughput": 2339.81, "total_tokens": 32547232} {"current_steps": 16920, "total_steps": 40000, "loss": 0.2425, "lr": 3.0990663708764685e-05, "epoch": 2.760257769801778, "percentage": 42.3, "elapsed_time": "3:51:52", "remaining_time": "5:16:17", "throughput": 2340.2, "total_tokens": 32557536} {"current_steps": 16925, "total_steps": 40000, "loss": 0.1111, "lr": 3.098113180088243e-05, "epoch": 2.7610734970225956, "percentage": 42.31, "elapsed_time": "3:51:54", "remaining_time": "5:16:10", "throughput": 2340.58, "total_tokens": 32567632} {"current_steps": 16930, "total_steps": 40000, "loss": 0.0115, "lr": 3.097159897063448e-05, "epoch": 2.761889224243413, "percentage": 42.33, "elapsed_time": "3:51:56", "remaining_time": "5:16:03", "throughput": 2340.96, "total_tokens": 32577760} {"current_steps": 16935, "total_steps": 40000, "loss": 0.0629, "lr": 3.096206521949094e-05, "epoch": 2.7627049514642303, "percentage": 42.34, "elapsed_time": "3:51:58", "remaining_time": "5:15:56", "throughput": 2341.26, "total_tokens": 32586704} {"current_steps": 16940, "total_steps": 40000, "loss": 0.0124, "lr": 3.0952530548922006e-05, "epoch": 2.7635206786850475, "percentage": 42.35, "elapsed_time": "3:52:00", "remaining_time": "5:15:49", "throughput": 2341.61, "total_tokens": 32596448} {"current_steps": 16945, "total_steps": 40000, "loss": 0.0704, "lr": 3.0942994960398064e-05, "epoch": 2.764336405905865, "percentage": 42.36, "elapsed_time": "3:52:02", "remaining_time": "5:15:42", "throughput": 2341.98, "total_tokens": 32606464} {"current_steps": 16950, "total_steps": 40000, "loss": 0.1101, "lr": 3.093345845538961e-05, "epoch": 2.7651521331266826, "percentage": 42.38, "elapsed_time": "3:52:04", "remaining_time": "5:15:35", "throughput": 2342.3, "total_tokens": 32615792} {"current_steps": 16955, "total_steps": 40000, "loss": 0.0539, "lr": 3.09239210353673e-05, "epoch": 2.7659678603474998, "percentage": 42.39, "elapsed_time": "3:52:06", "remaining_time": "5:15:29", "throughput": 2342.63, "total_tokens": 32625168} {"current_steps": 16960, "total_steps": 40000, "loss": 0.0056, "lr": 3.0914382701801926e-05, "epoch": 2.766783587568317, "percentage": 42.4, "elapsed_time": "3:52:08", "remaining_time": "5:15:22", "throughput": 2342.99, "total_tokens": 32634976} {"current_steps": 16965, "total_steps": 40000, "loss": 0.0724, "lr": 3.090484345616441e-05, "epoch": 2.7675993147891345, "percentage": 42.41, "elapsed_time": "3:52:10", "remaining_time": "5:15:15", "throughput": 2343.34, "total_tokens": 32644800} {"current_steps": 16970, "total_steps": 40000, "loss": 0.1398, "lr": 3.0895303299925825e-05, "epoch": 2.768415042009952, "percentage": 42.43, "elapsed_time": "3:52:12", "remaining_time": "5:15:08", "throughput": 2343.65, "total_tokens": 32653856} {"current_steps": 16975, "total_steps": 40000, "loss": 0.1515, "lr": 3.0885762234557393e-05, "epoch": 2.769230769230769, "percentage": 42.44, "elapsed_time": "3:52:14", "remaining_time": "5:15:01", "throughput": 2344.04, "total_tokens": 32664192} {"current_steps": 16980, "total_steps": 40000, "loss": 0.048, "lr": 3.087622026153045e-05, "epoch": 2.7700464964515867, "percentage": 42.45, "elapsed_time": "3:52:17", "remaining_time": "5:14:54", "throughput": 2344.41, "total_tokens": 32674224} {"current_steps": 16985, "total_steps": 40000, "loss": 0.1774, "lr": 3.086667738231651e-05, "epoch": 2.770862223672404, "percentage": 42.46, "elapsed_time": "3:52:19", "remaining_time": "5:14:47", "throughput": 2344.7, "total_tokens": 32683088} {"current_steps": 16990, "total_steps": 40000, "loss": 0.1428, "lr": 3.085713359838718e-05, "epoch": 2.7716779508932214, "percentage": 42.48, "elapsed_time": "3:52:21", "remaining_time": "5:14:40", "throughput": 2345.05, "total_tokens": 32692800} {"current_steps": 16995, "total_steps": 40000, "loss": 0.0316, "lr": 3.084758891121425e-05, "epoch": 2.7724936781140386, "percentage": 42.49, "elapsed_time": "3:52:23", "remaining_time": "5:14:34", "throughput": 2345.45, "total_tokens": 32703104} {"current_steps": 17000, "total_steps": 40000, "loss": 0.0657, "lr": 3.083804332226963e-05, "epoch": 2.773309405334856, "percentage": 42.5, "elapsed_time": "3:52:25", "remaining_time": "5:14:27", "throughput": 2345.86, "total_tokens": 32713728} {"current_steps": 17000, "total_steps": 40000, "eval_loss": 0.1591905653476715, "epoch": 2.773309405334856, "percentage": 42.5, "elapsed_time": "3:53:46", "remaining_time": "5:16:16", "throughput": 2332.29, "total_tokens": 32713728} {"current_steps": 17005, "total_steps": 40000, "loss": 0.1297, "lr": 3.082849683302536e-05, "epoch": 2.7741251325556733, "percentage": 42.51, "elapsed_time": "3:53:50", "remaining_time": "5:16:12", "throughput": 2332.3, "total_tokens": 32723696} {"current_steps": 17010, "total_steps": 40000, "loss": 0.0719, "lr": 3.081894944495363e-05, "epoch": 2.774940859776491, "percentage": 42.52, "elapsed_time": "3:53:52", "remaining_time": "5:16:06", "throughput": 2332.62, "total_tokens": 32733120} {"current_steps": 17015, "total_steps": 40000, "loss": 0.0362, "lr": 3.080940115952677e-05, "epoch": 2.775756586997308, "percentage": 42.54, "elapsed_time": "3:53:54", "remaining_time": "5:15:59", "throughput": 2332.86, "total_tokens": 32741472} {"current_steps": 17020, "total_steps": 40000, "loss": 0.1388, "lr": 3.0799851978217245e-05, "epoch": 2.7765723142181256, "percentage": 42.55, "elapsed_time": "3:53:56", "remaining_time": "5:15:52", "throughput": 2333.18, "total_tokens": 32750832} {"current_steps": 17025, "total_steps": 40000, "loss": 0.169, "lr": 3.0790301902497666e-05, "epoch": 2.7773880414389427, "percentage": 42.56, "elapsed_time": "3:53:59", "remaining_time": "5:15:45", "throughput": 2333.5, "total_tokens": 32760256} {"current_steps": 17030, "total_steps": 40000, "loss": 0.0223, "lr": 3.078075093384076e-05, "epoch": 2.7782037686597603, "percentage": 42.58, "elapsed_time": "3:54:01", "remaining_time": "5:15:38", "throughput": 2333.86, "total_tokens": 32770256} {"current_steps": 17035, "total_steps": 40000, "loss": 0.1892, "lr": 3.077119907371942e-05, "epoch": 2.7790194958805774, "percentage": 42.59, "elapsed_time": "3:54:03", "remaining_time": "5:15:31", "throughput": 2334.28, "total_tokens": 32781088} {"current_steps": 17040, "total_steps": 40000, "loss": 0.1545, "lr": 3.076164632360666e-05, "epoch": 2.779835223101395, "percentage": 42.6, "elapsed_time": "3:54:05", "remaining_time": "5:15:25", "throughput": 2334.6, "total_tokens": 32790400} {"current_steps": 17045, "total_steps": 40000, "loss": 0.0759, "lr": 3.075209268497563e-05, "epoch": 2.780650950322212, "percentage": 42.61, "elapsed_time": "3:54:07", "remaining_time": "5:15:18", "throughput": 2334.97, "total_tokens": 32800464} {"current_steps": 17050, "total_steps": 40000, "loss": 0.0757, "lr": 3.074253815929961e-05, "epoch": 2.7814666775430297, "percentage": 42.62, "elapsed_time": "3:54:09", "remaining_time": "5:15:11", "throughput": 2335.32, "total_tokens": 32810272} {"current_steps": 17055, "total_steps": 40000, "loss": 0.0058, "lr": 3.0732982748052054e-05, "epoch": 2.782282404763847, "percentage": 42.64, "elapsed_time": "3:54:11", "remaining_time": "5:15:04", "throughput": 2335.72, "total_tokens": 32820768} {"current_steps": 17060, "total_steps": 40000, "loss": 0.118, "lr": 3.072342645270651e-05, "epoch": 2.7830981319846644, "percentage": 42.65, "elapsed_time": "3:54:13", "remaining_time": "5:14:57", "throughput": 2336.12, "total_tokens": 32831200} {"current_steps": 17065, "total_steps": 40000, "loss": 0.1283, "lr": 3.071386927473668e-05, "epoch": 2.783913859205482, "percentage": 42.66, "elapsed_time": "3:54:15", "remaining_time": "5:14:50", "throughput": 2336.34, "total_tokens": 32839216} {"current_steps": 17070, "total_steps": 40000, "loss": 0.2465, "lr": 3.0704311215616404e-05, "epoch": 2.784729586426299, "percentage": 42.68, "elapsed_time": "3:54:17", "remaining_time": "5:14:43", "throughput": 2336.67, "total_tokens": 32848688} {"current_steps": 17075, "total_steps": 40000, "loss": 0.0574, "lr": 3.0694752276819656e-05, "epoch": 2.785545313647116, "percentage": 42.69, "elapsed_time": "3:54:19", "remaining_time": "5:14:36", "throughput": 2336.93, "total_tokens": 32857152} {"current_steps": 17080, "total_steps": 40000, "loss": 0.0486, "lr": 3.068519245982054e-05, "epoch": 2.786361040867934, "percentage": 42.7, "elapsed_time": "3:54:22", "remaining_time": "5:14:30", "throughput": 2337.25, "total_tokens": 32866496} {"current_steps": 17085, "total_steps": 40000, "loss": 0.1835, "lr": 3.0675631766093304e-05, "epoch": 2.7871767680887514, "percentage": 42.71, "elapsed_time": "3:54:24", "remaining_time": "5:14:23", "throughput": 2337.52, "total_tokens": 32875088} {"current_steps": 17090, "total_steps": 40000, "loss": 0.1057, "lr": 3.066607019711232e-05, "epoch": 2.7879924953095685, "percentage": 42.73, "elapsed_time": "3:54:26", "remaining_time": "5:14:16", "throughput": 2337.89, "total_tokens": 32885136} {"current_steps": 17095, "total_steps": 40000, "loss": 0.1708, "lr": 3.065650775435211e-05, "epoch": 2.7888082225303856, "percentage": 42.74, "elapsed_time": "3:54:28", "remaining_time": "5:14:09", "throughput": 2338.24, "total_tokens": 32894896} {"current_steps": 17100, "total_steps": 40000, "loss": 0.0372, "lr": 3.0646944439287326e-05, "epoch": 2.789623949751203, "percentage": 42.75, "elapsed_time": "3:54:30", "remaining_time": "5:14:02", "throughput": 2338.5, "total_tokens": 32903440} {"current_steps": 17105, "total_steps": 40000, "loss": 0.0189, "lr": 3.0637380253392736e-05, "epoch": 2.7904396769720208, "percentage": 42.76, "elapsed_time": "3:54:32", "remaining_time": "5:13:55", "throughput": 2338.81, "total_tokens": 32912624} {"current_steps": 17110, "total_steps": 40000, "loss": 0.0814, "lr": 3.062781519814327e-05, "epoch": 2.791255404192838, "percentage": 42.77, "elapsed_time": "3:54:34", "remaining_time": "5:13:48", "throughput": 2339.08, "total_tokens": 32921216} {"current_steps": 17115, "total_steps": 40000, "loss": 0.2312, "lr": 3.0618249275013985e-05, "epoch": 2.792071131413655, "percentage": 42.79, "elapsed_time": "3:54:36", "remaining_time": "5:13:42", "throughput": 2339.44, "total_tokens": 32931136} {"current_steps": 17120, "total_steps": 40000, "loss": 0.1592, "lr": 3.060868248548005e-05, "epoch": 2.7928868586344726, "percentage": 42.8, "elapsed_time": "3:54:38", "remaining_time": "5:13:35", "throughput": 2339.8, "total_tokens": 32941056} {"current_steps": 17125, "total_steps": 40000, "loss": 0.051, "lr": 3.0599114831016796e-05, "epoch": 2.79370258585529, "percentage": 42.81, "elapsed_time": "3:54:40", "remaining_time": "5:13:28", "throughput": 2340.16, "total_tokens": 32951008} {"current_steps": 17130, "total_steps": 40000, "loss": 0.0336, "lr": 3.0589546313099666e-05, "epoch": 2.7945183130761073, "percentage": 42.83, "elapsed_time": "3:54:42", "remaining_time": "5:13:21", "throughput": 2340.38, "total_tokens": 32958864} {"current_steps": 17135, "total_steps": 40000, "loss": 0.0054, "lr": 3.0579976933204255e-05, "epoch": 2.7953340402969244, "percentage": 42.84, "elapsed_time": "3:54:44", "remaining_time": "5:13:14", "throughput": 2340.68, "total_tokens": 32967984} {"current_steps": 17140, "total_steps": 40000, "loss": 0.0651, "lr": 3.0570406692806284e-05, "epoch": 2.796149767517742, "percentage": 42.85, "elapsed_time": "3:54:46", "remaining_time": "5:13:07", "throughput": 2341.08, "total_tokens": 32978512} {"current_steps": 17145, "total_steps": 40000, "loss": 0.0103, "lr": 3.05608355933816e-05, "epoch": 2.7969654947385596, "percentage": 42.86, "elapsed_time": "3:54:48", "remaining_time": "5:13:01", "throughput": 2341.41, "total_tokens": 32987952} {"current_steps": 17150, "total_steps": 40000, "loss": 0.0043, "lr": 3.055126363640618e-05, "epoch": 2.7977812219593767, "percentage": 42.88, "elapsed_time": "3:54:51", "remaining_time": "5:12:54", "throughput": 2341.71, "total_tokens": 32997104} {"current_steps": 17155, "total_steps": 40000, "loss": 0.0384, "lr": 3.0541690823356146e-05, "epoch": 2.7985969491801943, "percentage": 42.89, "elapsed_time": "3:54:53", "remaining_time": "5:12:47", "throughput": 2342.09, "total_tokens": 33007216} {"current_steps": 17160, "total_steps": 40000, "loss": 0.0779, "lr": 3.053211715570775e-05, "epoch": 2.7994126764010114, "percentage": 42.9, "elapsed_time": "3:54:55", "remaining_time": "5:12:40", "throughput": 2342.46, "total_tokens": 33017280} {"current_steps": 17165, "total_steps": 40000, "loss": 0.1276, "lr": 3.052254263493736e-05, "epoch": 2.800228403621829, "percentage": 42.91, "elapsed_time": "3:54:57", "remaining_time": "5:12:33", "throughput": 2342.78, "total_tokens": 33026752} {"current_steps": 17170, "total_steps": 40000, "loss": 0.1132, "lr": 3.0512967262521498e-05, "epoch": 2.801044130842646, "percentage": 42.93, "elapsed_time": "3:54:59", "remaining_time": "5:12:27", "throughput": 2343.0, "total_tokens": 33034672} {"current_steps": 17175, "total_steps": 40000, "loss": 0.0092, "lr": 3.0503391039936803e-05, "epoch": 2.8018598580634637, "percentage": 42.94, "elapsed_time": "3:55:01", "remaining_time": "5:12:20", "throughput": 2343.29, "total_tokens": 33043616} {"current_steps": 17180, "total_steps": 40000, "loss": 0.062, "lr": 3.0493813968660056e-05, "epoch": 2.802675585284281, "percentage": 42.95, "elapsed_time": "3:55:03", "remaining_time": "5:12:13", "throughput": 2343.67, "total_tokens": 33053808} {"current_steps": 17185, "total_steps": 40000, "loss": 0.0585, "lr": 3.0484236050168153e-05, "epoch": 2.8034913125050984, "percentage": 42.96, "elapsed_time": "3:55:05", "remaining_time": "5:12:06", "throughput": 2344.05, "total_tokens": 33063968} {"current_steps": 17190, "total_steps": 40000, "loss": 0.0055, "lr": 3.0474657285938123e-05, "epoch": 2.8043070397259156, "percentage": 42.98, "elapsed_time": "3:55:07", "remaining_time": "5:11:59", "throughput": 2344.44, "total_tokens": 33074368} {"current_steps": 17195, "total_steps": 40000, "loss": 0.0631, "lr": 3.046507767744715e-05, "epoch": 2.805122766946733, "percentage": 42.99, "elapsed_time": "3:55:09", "remaining_time": "5:11:53", "throughput": 2344.79, "total_tokens": 33084160} {"current_steps": 17200, "total_steps": 40000, "loss": 0.0995, "lr": 3.045549722617252e-05, "epoch": 2.8059384941675503, "percentage": 43.0, "elapsed_time": "3:55:11", "remaining_time": "5:11:46", "throughput": 2345.13, "total_tokens": 33093744} {"current_steps": 17200, "total_steps": 40000, "eval_loss": 0.20447540283203125, "epoch": 2.8059384941675503, "percentage": 43.0, "elapsed_time": "3:56:32", "remaining_time": "5:13:33", "throughput": 2331.79, "total_tokens": 33093744} {"current_steps": 17205, "total_steps": 40000, "loss": 0.2528, "lr": 3.0445915933591658e-05, "epoch": 2.806754221388368, "percentage": 43.01, "elapsed_time": "3:56:36", "remaining_time": "5:13:28", "throughput": 2331.83, "total_tokens": 33103712} {"current_steps": 17210, "total_steps": 40000, "loss": 0.1325, "lr": 3.0436333801182114e-05, "epoch": 2.807569948609185, "percentage": 43.03, "elapsed_time": "3:56:38", "remaining_time": "5:13:22", "throughput": 2332.21, "total_tokens": 33113968} {"current_steps": 17215, "total_steps": 40000, "loss": 0.1517, "lr": 3.0426750830421596e-05, "epoch": 2.8083856758300025, "percentage": 43.04, "elapsed_time": "3:56:40", "remaining_time": "5:13:15", "throughput": 2332.52, "total_tokens": 33123200} {"current_steps": 17220, "total_steps": 40000, "loss": 0.0755, "lr": 3.0417167022787897e-05, "epoch": 2.8092014030508197, "percentage": 43.05, "elapsed_time": "3:56:42", "remaining_time": "5:13:08", "throughput": 2332.78, "total_tokens": 33131696} {"current_steps": 17225, "total_steps": 40000, "loss": 0.0806, "lr": 3.0407582379758966e-05, "epoch": 2.8100171302716372, "percentage": 43.06, "elapsed_time": "3:56:44", "remaining_time": "5:13:01", "throughput": 2333.15, "total_tokens": 33141872} {"current_steps": 17230, "total_steps": 40000, "loss": 0.0798, "lr": 3.039799690281287e-05, "epoch": 2.8108328574924544, "percentage": 43.08, "elapsed_time": "3:56:46", "remaining_time": "5:12:54", "throughput": 2333.37, "total_tokens": 33149808} {"current_steps": 17235, "total_steps": 40000, "loss": 0.0723, "lr": 3.0388410593427823e-05, "epoch": 2.811648584713272, "percentage": 43.09, "elapsed_time": "3:56:48", "remaining_time": "5:12:47", "throughput": 2333.63, "total_tokens": 33158304} {"current_steps": 17240, "total_steps": 40000, "loss": 0.0243, "lr": 3.0378823453082146e-05, "epoch": 2.812464311934089, "percentage": 43.1, "elapsed_time": "3:56:50", "remaining_time": "5:12:41", "throughput": 2333.98, "total_tokens": 33168144} {"current_steps": 17245, "total_steps": 40000, "loss": 0.0038, "lr": 3.03692354832543e-05, "epoch": 2.8132800391549067, "percentage": 43.11, "elapsed_time": "3:56:53", "remaining_time": "5:12:34", "throughput": 2334.42, "total_tokens": 33179152} {"current_steps": 17250, "total_steps": 40000, "loss": 0.0942, "lr": 3.0359646685422865e-05, "epoch": 2.814095766375724, "percentage": 43.12, "elapsed_time": "3:56:55", "remaining_time": "5:12:27", "throughput": 2334.86, "total_tokens": 33190320} {"current_steps": 17255, "total_steps": 40000, "loss": 0.0629, "lr": 3.035005706106656e-05, "epoch": 2.8149114935965414, "percentage": 43.14, "elapsed_time": "3:56:57", "remaining_time": "5:12:20", "throughput": 2335.2, "total_tokens": 33199936} {"current_steps": 17260, "total_steps": 40000, "loss": 0.1619, "lr": 3.034046661166422e-05, "epoch": 2.815727220817359, "percentage": 43.15, "elapsed_time": "3:56:59", "remaining_time": "5:12:13", "throughput": 2335.57, "total_tokens": 33210000} {"current_steps": 17265, "total_steps": 40000, "loss": 0.0117, "lr": 3.033087533869482e-05, "epoch": 2.816542948038176, "percentage": 43.16, "elapsed_time": "3:57:01", "remaining_time": "5:12:06", "throughput": 2335.94, "total_tokens": 33220112} {"current_steps": 17270, "total_steps": 40000, "loss": 0.073, "lr": 3.0321283243637444e-05, "epoch": 2.817358675258993, "percentage": 43.18, "elapsed_time": "3:57:03", "remaining_time": "5:12:00", "throughput": 2336.33, "total_tokens": 33230464} {"current_steps": 17275, "total_steps": 40000, "loss": 0.1095, "lr": 3.0311690327971326e-05, "epoch": 2.8181744024798108, "percentage": 43.19, "elapsed_time": "3:57:05", "remaining_time": "5:11:53", "throughput": 2336.67, "total_tokens": 33240112} {"current_steps": 17280, "total_steps": 40000, "loss": 0.2338, "lr": 3.030209659317581e-05, "epoch": 2.8189901297006283, "percentage": 43.2, "elapsed_time": "3:57:07", "remaining_time": "5:11:46", "throughput": 2337.04, "total_tokens": 33250224} {"current_steps": 17285, "total_steps": 40000, "loss": 0.1724, "lr": 3.0292502040730362e-05, "epoch": 2.8198058569214455, "percentage": 43.21, "elapsed_time": "3:57:09", "remaining_time": "5:11:39", "throughput": 2337.42, "total_tokens": 33260448} {"current_steps": 17290, "total_steps": 40000, "loss": 0.0197, "lr": 3.0282906672114597e-05, "epoch": 2.8206215841422626, "percentage": 43.23, "elapsed_time": "3:57:11", "remaining_time": "5:11:32", "throughput": 2337.8, "total_tokens": 33270784} {"current_steps": 17295, "total_steps": 40000, "loss": 0.0852, "lr": 3.027331048880823e-05, "epoch": 2.82143731136308, "percentage": 43.24, "elapsed_time": "3:57:13", "remaining_time": "5:11:26", "throughput": 2338.19, "total_tokens": 33281168} {"current_steps": 17300, "total_steps": 40000, "loss": 0.0672, "lr": 3.0263713492291123e-05, "epoch": 2.8222530385838978, "percentage": 43.25, "elapsed_time": "3:57:15", "remaining_time": "5:11:19", "throughput": 2338.52, "total_tokens": 33290816} {"current_steps": 17305, "total_steps": 40000, "loss": 0.0688, "lr": 3.0254115684043242e-05, "epoch": 2.823068765804715, "percentage": 43.26, "elapsed_time": "3:57:17", "remaining_time": "5:11:12", "throughput": 2338.94, "total_tokens": 33301488} {"current_steps": 17310, "total_steps": 40000, "loss": 0.1213, "lr": 3.024451706554469e-05, "epoch": 2.823884493025532, "percentage": 43.28, "elapsed_time": "3:57:19", "remaining_time": "5:11:05", "throughput": 2339.23, "total_tokens": 33310592} {"current_steps": 17315, "total_steps": 40000, "loss": 0.1557, "lr": 3.0234917638275705e-05, "epoch": 2.8247002202463496, "percentage": 43.29, "elapsed_time": "3:57:22", "remaining_time": "5:10:58", "throughput": 2339.54, "total_tokens": 33319808} {"current_steps": 17320, "total_steps": 40000, "loss": 0.0393, "lr": 3.0225317403716635e-05, "epoch": 2.825515947467167, "percentage": 43.3, "elapsed_time": "3:57:24", "remaining_time": "5:10:52", "throughput": 2339.93, "total_tokens": 33330192} {"current_steps": 17325, "total_steps": 40000, "loss": 0.1116, "lr": 3.0215716363347956e-05, "epoch": 2.8263316746879843, "percentage": 43.31, "elapsed_time": "3:57:26", "remaining_time": "5:10:45", "throughput": 2340.28, "total_tokens": 33339952} {"current_steps": 17330, "total_steps": 40000, "loss": 0.0458, "lr": 3.0206114518650275e-05, "epoch": 2.8271474019088014, "percentage": 43.33, "elapsed_time": "3:57:28", "remaining_time": "5:10:38", "throughput": 2340.69, "total_tokens": 33350752} {"current_steps": 17335, "total_steps": 40000, "loss": 0.1536, "lr": 3.0196511871104304e-05, "epoch": 2.827963129129619, "percentage": 43.34, "elapsed_time": "3:57:30", "remaining_time": "5:10:31", "throughput": 2341.1, "total_tokens": 33361392} {"current_steps": 17340, "total_steps": 40000, "loss": 0.0067, "lr": 3.01869084221909e-05, "epoch": 2.8287788563504366, "percentage": 43.35, "elapsed_time": "3:57:32", "remaining_time": "5:10:25", "throughput": 2341.39, "total_tokens": 33370400} {"current_steps": 17345, "total_steps": 40000, "loss": 0.1822, "lr": 3.0177304173391037e-05, "epoch": 2.8295945835712537, "percentage": 43.36, "elapsed_time": "3:57:34", "remaining_time": "5:10:18", "throughput": 2341.71, "total_tokens": 33379760} {"current_steps": 17350, "total_steps": 40000, "loss": 0.2161, "lr": 3.01676991261858e-05, "epoch": 2.8304103107920713, "percentage": 43.38, "elapsed_time": "3:57:36", "remaining_time": "5:10:11", "throughput": 2342.08, "total_tokens": 33389856} {"current_steps": 17355, "total_steps": 40000, "loss": 0.1325, "lr": 3.015809328205642e-05, "epoch": 2.8312260380128884, "percentage": 43.39, "elapsed_time": "3:57:38", "remaining_time": "5:10:04", "throughput": 2342.34, "total_tokens": 33398416} {"current_steps": 17360, "total_steps": 40000, "loss": 0.0529, "lr": 3.0148486642484248e-05, "epoch": 2.832041765233706, "percentage": 43.4, "elapsed_time": "3:57:40", "remaining_time": "5:09:57", "throughput": 2342.64, "total_tokens": 33407632} {"current_steps": 17365, "total_steps": 40000, "loss": 0.0179, "lr": 3.0138879208950722e-05, "epoch": 2.832857492454523, "percentage": 43.41, "elapsed_time": "3:57:42", "remaining_time": "5:09:51", "throughput": 2342.94, "total_tokens": 33416672} {"current_steps": 17370, "total_steps": 40000, "loss": 0.008, "lr": 3.012927098293744e-05, "epoch": 2.8336732196753407, "percentage": 43.43, "elapsed_time": "3:57:44", "remaining_time": "5:09:44", "throughput": 2343.29, "total_tokens": 33426608} {"current_steps": 17375, "total_steps": 40000, "loss": 0.0735, "lr": 3.0119661965926123e-05, "epoch": 2.834488946896158, "percentage": 43.44, "elapsed_time": "3:57:46", "remaining_time": "5:09:37", "throughput": 2343.55, "total_tokens": 33435072} {"current_steps": 17380, "total_steps": 40000, "loss": 0.218, "lr": 3.0110052159398587e-05, "epoch": 2.8353046741169754, "percentage": 43.45, "elapsed_time": "3:57:48", "remaining_time": "5:09:30", "throughput": 2343.87, "total_tokens": 33444512} {"current_steps": 17385, "total_steps": 40000, "loss": 0.145, "lr": 3.0100441564836802e-05, "epoch": 2.8361204013377925, "percentage": 43.46, "elapsed_time": "3:57:50", "remaining_time": "5:09:24", "throughput": 2344.21, "total_tokens": 33454256} {"current_steps": 17390, "total_steps": 40000, "loss": 0.0823, "lr": 3.0090830183722817e-05, "epoch": 2.83693612855861, "percentage": 43.48, "elapsed_time": "3:57:53", "remaining_time": "5:09:17", "throughput": 2344.57, "total_tokens": 33464208} {"current_steps": 17395, "total_steps": 40000, "loss": 0.0437, "lr": 3.0081218017538852e-05, "epoch": 2.8377518557794272, "percentage": 43.49, "elapsed_time": "3:57:55", "remaining_time": "5:09:10", "throughput": 2344.96, "total_tokens": 33474544} {"current_steps": 17400, "total_steps": 40000, "loss": 0.2777, "lr": 3.0071605067767212e-05, "epoch": 2.838567583000245, "percentage": 43.5, "elapsed_time": "3:57:57", "remaining_time": "5:09:03", "throughput": 2345.3, "total_tokens": 33484336} {"current_steps": 17400, "total_steps": 40000, "eval_loss": 0.15053951740264893, "epoch": 2.838567583000245, "percentage": 43.5, "elapsed_time": "3:59:18", "remaining_time": "5:10:48", "throughput": 2332.09, "total_tokens": 33484336} {"current_steps": 17405, "total_steps": 40000, "loss": 0.2882, "lr": 3.006199133589034e-05, "epoch": 2.839383310221062, "percentage": 43.51, "elapsed_time": "3:59:21", "remaining_time": "5:10:44", "throughput": 2332.23, "total_tokens": 33494864} {"current_steps": 17410, "total_steps": 40000, "loss": 0.0399, "lr": 3.005237682339079e-05, "epoch": 2.8401990374418795, "percentage": 43.53, "elapsed_time": "3:59:23", "remaining_time": "5:10:37", "throughput": 2332.54, "total_tokens": 33504224} {"current_steps": 17415, "total_steps": 40000, "loss": 0.0498, "lr": 3.0042761531751228e-05, "epoch": 2.8410147646626966, "percentage": 43.54, "elapsed_time": "3:59:25", "remaining_time": "5:10:30", "throughput": 2332.86, "total_tokens": 33513568} {"current_steps": 17420, "total_steps": 40000, "loss": 0.1135, "lr": 3.0033145462454482e-05, "epoch": 2.841830491883514, "percentage": 43.55, "elapsed_time": "3:59:27", "remaining_time": "5:10:23", "throughput": 2333.13, "total_tokens": 33522272} {"current_steps": 17425, "total_steps": 40000, "loss": 0.2042, "lr": 3.002352861698345e-05, "epoch": 2.8426462191043314, "percentage": 43.56, "elapsed_time": "3:59:30", "remaining_time": "5:10:17", "throughput": 2333.47, "total_tokens": 33532016} {"current_steps": 17430, "total_steps": 40000, "loss": 0.016, "lr": 3.0013910996821178e-05, "epoch": 2.843461946325149, "percentage": 43.58, "elapsed_time": "3:59:32", "remaining_time": "5:10:10", "throughput": 2333.83, "total_tokens": 33541984} {"current_steps": 17435, "total_steps": 40000, "loss": 0.0251, "lr": 3.0004292603450817e-05, "epoch": 2.8442776735459665, "percentage": 43.59, "elapsed_time": "3:59:34", "remaining_time": "5:10:03", "throughput": 2334.21, "total_tokens": 33552320} {"current_steps": 17440, "total_steps": 40000, "loss": 0.0224, "lr": 2.9994673438355653e-05, "epoch": 2.8450934007667836, "percentage": 43.6, "elapsed_time": "3:59:36", "remaining_time": "5:09:56", "throughput": 2334.57, "total_tokens": 33562336} {"current_steps": 17445, "total_steps": 40000, "loss": 0.0107, "lr": 2.9985053503019078e-05, "epoch": 2.8459091279876008, "percentage": 43.61, "elapsed_time": "3:59:38", "remaining_time": "5:09:50", "throughput": 2334.9, "total_tokens": 33571952} {"current_steps": 17450, "total_steps": 40000, "loss": 0.1783, "lr": 2.99754327989246e-05, "epoch": 2.8467248552084183, "percentage": 43.62, "elapsed_time": "3:59:40", "remaining_time": "5:09:43", "throughput": 2335.26, "total_tokens": 33581872} {"current_steps": 17455, "total_steps": 40000, "loss": 0.0674, "lr": 2.9965811327555864e-05, "epoch": 2.847540582429236, "percentage": 43.64, "elapsed_time": "3:59:42", "remaining_time": "5:09:36", "throughput": 2335.61, "total_tokens": 33591840} {"current_steps": 17460, "total_steps": 40000, "loss": 0.0081, "lr": 2.995618909039662e-05, "epoch": 2.848356309650053, "percentage": 43.65, "elapsed_time": "3:59:44", "remaining_time": "5:09:29", "throughput": 2335.97, "total_tokens": 33601824} {"current_steps": 17465, "total_steps": 40000, "loss": 0.0242, "lr": 2.9946566088930727e-05, "epoch": 2.84917203687087, "percentage": 43.66, "elapsed_time": "3:59:46", "remaining_time": "5:09:22", "throughput": 2336.31, "total_tokens": 33611584} {"current_steps": 17470, "total_steps": 40000, "loss": 0.1258, "lr": 2.9936942324642192e-05, "epoch": 2.8499877640916877, "percentage": 43.68, "elapsed_time": "3:59:48", "remaining_time": "5:09:16", "throughput": 2336.59, "total_tokens": 33620448} {"current_steps": 17475, "total_steps": 40000, "loss": 0.0034, "lr": 2.9927317799015097e-05, "epoch": 2.8508034913125053, "percentage": 43.69, "elapsed_time": "3:59:50", "remaining_time": "5:09:09", "throughput": 2336.9, "total_tokens": 33629776} {"current_steps": 17480, "total_steps": 40000, "loss": 0.1525, "lr": 2.9917692513533685e-05, "epoch": 2.8516192185333225, "percentage": 43.7, "elapsed_time": "3:59:52", "remaining_time": "5:09:02", "throughput": 2337.23, "total_tokens": 33639344} {"current_steps": 17485, "total_steps": 40000, "loss": 0.0046, "lr": 2.990806646968229e-05, "epoch": 2.8524349457541396, "percentage": 43.71, "elapsed_time": "3:59:54", "remaining_time": "5:08:55", "throughput": 2337.61, "total_tokens": 33649632} {"current_steps": 17490, "total_steps": 40000, "loss": 0.2605, "lr": 2.989843966894536e-05, "epoch": 2.853250672974957, "percentage": 43.73, "elapsed_time": "3:59:56", "remaining_time": "5:08:49", "throughput": 2337.94, "total_tokens": 33659280} {"current_steps": 17495, "total_steps": 40000, "loss": 0.0615, "lr": 2.9888812112807472e-05, "epoch": 2.8540664001957747, "percentage": 43.74, "elapsed_time": "3:59:59", "remaining_time": "5:08:42", "throughput": 2338.23, "total_tokens": 33668192} {"current_steps": 17500, "total_steps": 40000, "loss": 0.023, "lr": 2.987918380275333e-05, "epoch": 2.854882127416592, "percentage": 43.75, "elapsed_time": "4:00:01", "remaining_time": "5:08:35", "throughput": 2338.57, "total_tokens": 33678032} {"current_steps": 17505, "total_steps": 40000, "loss": 0.0185, "lr": 2.9869554740267724e-05, "epoch": 2.855697854637409, "percentage": 43.76, "elapsed_time": "4:00:03", "remaining_time": "5:08:28", "throughput": 2338.91, "total_tokens": 33687776} {"current_steps": 17510, "total_steps": 40000, "loss": 0.1449, "lr": 2.9859924926835585e-05, "epoch": 2.8565135818582266, "percentage": 43.77, "elapsed_time": "4:00:05", "remaining_time": "5:08:22", "throughput": 2339.2, "total_tokens": 33696704} {"current_steps": 17515, "total_steps": 40000, "loss": 0.1721, "lr": 2.9850294363941944e-05, "epoch": 2.857329309079044, "percentage": 43.79, "elapsed_time": "4:00:07", "remaining_time": "5:08:15", "throughput": 2339.48, "total_tokens": 33705600} {"current_steps": 17520, "total_steps": 40000, "loss": 0.1442, "lr": 2.9840663053071967e-05, "epoch": 2.8581450362998613, "percentage": 43.8, "elapsed_time": "4:00:09", "remaining_time": "5:08:08", "throughput": 2339.9, "total_tokens": 33716528} {"current_steps": 17525, "total_steps": 40000, "loss": 0.0121, "lr": 2.983103099571091e-05, "epoch": 2.858960763520679, "percentage": 43.81, "elapsed_time": "4:00:11", "remaining_time": "5:08:02", "throughput": 2340.29, "total_tokens": 33726928} {"current_steps": 17530, "total_steps": 40000, "loss": 0.0132, "lr": 2.9821398193344164e-05, "epoch": 2.859776490741496, "percentage": 43.82, "elapsed_time": "4:00:13", "remaining_time": "5:07:55", "throughput": 2340.67, "total_tokens": 33737280} {"current_steps": 17535, "total_steps": 40000, "loss": 0.0889, "lr": 2.9811764647457226e-05, "epoch": 2.8605922179623136, "percentage": 43.84, "elapsed_time": "4:00:15", "remaining_time": "5:07:48", "throughput": 2340.98, "total_tokens": 33746688} {"current_steps": 17540, "total_steps": 40000, "loss": 0.1121, "lr": 2.9802130359535714e-05, "epoch": 2.8614079451831307, "percentage": 43.85, "elapsed_time": "4:00:17", "remaining_time": "5:07:41", "throughput": 2341.4, "total_tokens": 33757568} {"current_steps": 17545, "total_steps": 40000, "loss": 0.051, "lr": 2.979249533106535e-05, "epoch": 2.8622236724039483, "percentage": 43.86, "elapsed_time": "4:00:19", "remaining_time": "5:07:35", "throughput": 2341.68, "total_tokens": 33766480} {"current_steps": 17550, "total_steps": 40000, "loss": 0.0764, "lr": 2.9782859563531986e-05, "epoch": 2.8630393996247654, "percentage": 43.88, "elapsed_time": "4:00:21", "remaining_time": "5:07:28", "throughput": 2342.02, "total_tokens": 33776240} {"current_steps": 17555, "total_steps": 40000, "loss": 0.1191, "lr": 2.977322305842156e-05, "epoch": 2.863855126845583, "percentage": 43.89, "elapsed_time": "4:00:23", "remaining_time": "5:07:21", "throughput": 2342.37, "total_tokens": 33786016} {"current_steps": 17560, "total_steps": 40000, "loss": 0.014, "lr": 2.9763585817220162e-05, "epoch": 2.8646708540664, "percentage": 43.9, "elapsed_time": "4:00:25", "remaining_time": "5:07:14", "throughput": 2342.66, "total_tokens": 33795088} {"current_steps": 17565, "total_steps": 40000, "loss": 0.1267, "lr": 2.975394784141397e-05, "epoch": 2.8654865812872177, "percentage": 43.91, "elapsed_time": "4:00:28", "remaining_time": "5:07:08", "throughput": 2342.99, "total_tokens": 33804704} {"current_steps": 17570, "total_steps": 40000, "loss": 0.0489, "lr": 2.974430913248928e-05, "epoch": 2.866302308508035, "percentage": 43.92, "elapsed_time": "4:00:30", "remaining_time": "5:07:01", "throughput": 2343.38, "total_tokens": 33815136} {"current_steps": 17575, "total_steps": 40000, "loss": 0.0716, "lr": 2.9734669691932497e-05, "epoch": 2.8671180357288524, "percentage": 43.94, "elapsed_time": "4:00:32", "remaining_time": "5:06:54", "throughput": 2343.73, "total_tokens": 33825088} {"current_steps": 17580, "total_steps": 40000, "loss": 0.008, "lr": 2.9725029521230147e-05, "epoch": 2.8679337629496695, "percentage": 43.95, "elapsed_time": "4:00:34", "remaining_time": "5:06:48", "throughput": 2344.09, "total_tokens": 33835120} {"current_steps": 17585, "total_steps": 40000, "loss": 0.1333, "lr": 2.9715388621868873e-05, "epoch": 2.868749490170487, "percentage": 43.96, "elapsed_time": "4:00:36", "remaining_time": "5:06:41", "throughput": 2344.42, "total_tokens": 33844704} {"current_steps": 17590, "total_steps": 40000, "loss": 0.0387, "lr": 2.970574699533541e-05, "epoch": 2.869565217391304, "percentage": 43.97, "elapsed_time": "4:00:38", "remaining_time": "5:06:34", "throughput": 2344.79, "total_tokens": 33854896} {"current_steps": 17595, "total_steps": 40000, "loss": 0.1104, "lr": 2.969610464311662e-05, "epoch": 2.870380944612122, "percentage": 43.99, "elapsed_time": "4:00:40", "remaining_time": "5:06:28", "throughput": 2345.11, "total_tokens": 33864400} {"current_steps": 17600, "total_steps": 40000, "loss": 0.0597, "lr": 2.9686461566699487e-05, "epoch": 2.871196671832939, "percentage": 44.0, "elapsed_time": "4:00:42", "remaining_time": "5:06:21", "throughput": 2345.52, "total_tokens": 33875072} {"current_steps": 17600, "total_steps": 40000, "eval_loss": 0.172089084982872, "epoch": 2.871196671832939, "percentage": 44.0, "elapsed_time": "4:02:03", "remaining_time": "5:08:04", "throughput": 2332.45, "total_tokens": 33875072} {"current_steps": 17605, "total_steps": 40000, "loss": 0.2606, "lr": 2.9676817767571086e-05, "epoch": 2.8720123990537565, "percentage": 44.01, "elapsed_time": "4:02:07", "remaining_time": "5:07:59", "throughput": 2332.56, "total_tokens": 33885840} {"current_steps": 17610, "total_steps": 40000, "loss": 0.1321, "lr": 2.966717324721861e-05, "epoch": 2.8728281262745736, "percentage": 44.02, "elapsed_time": "4:02:09", "remaining_time": "5:07:53", "throughput": 2332.9, "total_tokens": 33895728} {"current_steps": 17615, "total_steps": 40000, "loss": 0.0623, "lr": 2.9657528007129366e-05, "epoch": 2.873643853495391, "percentage": 44.04, "elapsed_time": "4:02:11", "remaining_time": "5:07:46", "throughput": 2333.23, "total_tokens": 33905344} {"current_steps": 17620, "total_steps": 40000, "loss": 0.0884, "lr": 2.9647882048790777e-05, "epoch": 2.8744595807162083, "percentage": 44.05, "elapsed_time": "4:02:13", "remaining_time": "5:07:39", "throughput": 2333.65, "total_tokens": 33916208} {"current_steps": 17625, "total_steps": 40000, "loss": 0.0828, "lr": 2.963823537369037e-05, "epoch": 2.875275307937026, "percentage": 44.06, "elapsed_time": "4:02:15", "remaining_time": "5:07:33", "throughput": 2334.0, "total_tokens": 33926096} {"current_steps": 17630, "total_steps": 40000, "loss": 0.0628, "lr": 2.9628587983315775e-05, "epoch": 2.8760910351578435, "percentage": 44.07, "elapsed_time": "4:02:17", "remaining_time": "5:07:26", "throughput": 2334.28, "total_tokens": 33935088} {"current_steps": 17635, "total_steps": 40000, "loss": 0.1181, "lr": 2.9618939879154746e-05, "epoch": 2.8769067623786606, "percentage": 44.09, "elapsed_time": "4:02:19", "remaining_time": "5:07:19", "throughput": 2334.64, "total_tokens": 33945120} {"current_steps": 17640, "total_steps": 40000, "loss": 0.173, "lr": 2.9609291062695143e-05, "epoch": 2.8777224895994777, "percentage": 44.1, "elapsed_time": "4:02:21", "remaining_time": "5:07:12", "throughput": 2334.92, "total_tokens": 33953952} {"current_steps": 17645, "total_steps": 40000, "loss": 0.0827, "lr": 2.9599641535424938e-05, "epoch": 2.8785382168202953, "percentage": 44.11, "elapsed_time": "4:02:23", "remaining_time": "5:07:06", "throughput": 2335.21, "total_tokens": 33963072} {"current_steps": 17650, "total_steps": 40000, "loss": 0.1571, "lr": 2.9589991298832202e-05, "epoch": 2.879353944041113, "percentage": 44.12, "elapsed_time": "4:02:25", "remaining_time": "5:06:59", "throughput": 2335.43, "total_tokens": 33971104} {"current_steps": 17655, "total_steps": 40000, "loss": 0.0766, "lr": 2.958034035440513e-05, "epoch": 2.88016967126193, "percentage": 44.14, "elapsed_time": "4:02:28", "remaining_time": "5:06:52", "throughput": 2335.76, "total_tokens": 33980672} {"current_steps": 17660, "total_steps": 40000, "loss": 0.0254, "lr": 2.957068870363201e-05, "epoch": 2.880985398482747, "percentage": 44.15, "elapsed_time": "4:02:30", "remaining_time": "5:06:45", "throughput": 2336.04, "total_tokens": 33989696} {"current_steps": 17665, "total_steps": 40000, "loss": 0.12, "lr": 2.956103634800126e-05, "epoch": 2.8818011257035647, "percentage": 44.16, "elapsed_time": "4:02:32", "remaining_time": "5:06:39", "throughput": 2336.36, "total_tokens": 33999120} {"current_steps": 17670, "total_steps": 40000, "loss": 0.021, "lr": 2.9551383289001384e-05, "epoch": 2.8826168529243823, "percentage": 44.17, "elapsed_time": "4:02:34", "remaining_time": "5:06:32", "throughput": 2336.73, "total_tokens": 34009328} {"current_steps": 17675, "total_steps": 40000, "loss": 0.1389, "lr": 2.9541729528121005e-05, "epoch": 2.8834325801451994, "percentage": 44.19, "elapsed_time": "4:02:36", "remaining_time": "5:06:25", "throughput": 2337.06, "total_tokens": 34019056} {"current_steps": 17680, "total_steps": 40000, "loss": 0.0276, "lr": 2.9532075066848856e-05, "epoch": 2.8842483073660166, "percentage": 44.2, "elapsed_time": "4:02:38", "remaining_time": "5:06:19", "throughput": 2337.45, "total_tokens": 34029472} {"current_steps": 17685, "total_steps": 40000, "loss": 0.0117, "lr": 2.9522419906673786e-05, "epoch": 2.885064034586834, "percentage": 44.21, "elapsed_time": "4:02:40", "remaining_time": "5:06:12", "throughput": 2337.7, "total_tokens": 34038032} {"current_steps": 17690, "total_steps": 40000, "loss": 0.0319, "lr": 2.951276404908474e-05, "epoch": 2.8858797618076517, "percentage": 44.22, "elapsed_time": "4:02:42", "remaining_time": "5:06:05", "throughput": 2338.05, "total_tokens": 34047968} {"current_steps": 17695, "total_steps": 40000, "loss": 0.0714, "lr": 2.9503107495570752e-05, "epoch": 2.886695489028469, "percentage": 44.24, "elapsed_time": "4:02:44", "remaining_time": "5:05:59", "throughput": 2338.45, "total_tokens": 34058624} {"current_steps": 17700, "total_steps": 40000, "loss": 0.0798, "lr": 2.9493450247621003e-05, "epoch": 2.887511216249286, "percentage": 44.25, "elapsed_time": "4:02:46", "remaining_time": "5:05:52", "throughput": 2338.71, "total_tokens": 34067248} {"current_steps": 17705, "total_steps": 40000, "loss": 0.167, "lr": 2.948379230672476e-05, "epoch": 2.8883269434701035, "percentage": 44.26, "elapsed_time": "4:02:48", "remaining_time": "5:05:45", "throughput": 2339.08, "total_tokens": 34077568} {"current_steps": 17710, "total_steps": 40000, "loss": 0.1157, "lr": 2.9474133674371396e-05, "epoch": 2.889142670690921, "percentage": 44.27, "elapsed_time": "4:02:50", "remaining_time": "5:05:39", "throughput": 2339.42, "total_tokens": 34087296} {"current_steps": 17715, "total_steps": 40000, "loss": 0.0702, "lr": 2.9464474352050387e-05, "epoch": 2.8899583979117383, "percentage": 44.29, "elapsed_time": "4:02:52", "remaining_time": "5:05:32", "throughput": 2339.73, "total_tokens": 34096576} {"current_steps": 17720, "total_steps": 40000, "loss": 0.0077, "lr": 2.9454814341251336e-05, "epoch": 2.890774125132556, "percentage": 44.3, "elapsed_time": "4:02:54", "remaining_time": "5:05:25", "throughput": 2340.05, "total_tokens": 34106160} {"current_steps": 17725, "total_steps": 40000, "loss": 0.0064, "lr": 2.9445153643463942e-05, "epoch": 2.891589852353373, "percentage": 44.31, "elapsed_time": "4:02:57", "remaining_time": "5:05:18", "throughput": 2340.43, "total_tokens": 34116608} {"current_steps": 17730, "total_steps": 40000, "loss": 0.0661, "lr": 2.943549226017798e-05, "epoch": 2.8924055795741905, "percentage": 44.32, "elapsed_time": "4:02:59", "remaining_time": "5:05:12", "throughput": 2340.87, "total_tokens": 34127808} {"current_steps": 17735, "total_steps": 40000, "loss": 0.0042, "lr": 2.942583019288337e-05, "epoch": 2.8932213067950077, "percentage": 44.34, "elapsed_time": "4:03:01", "remaining_time": "5:05:05", "throughput": 2341.19, "total_tokens": 34137280} {"current_steps": 17740, "total_steps": 40000, "loss": 0.2629, "lr": 2.9416167443070132e-05, "epoch": 2.8940370340158252, "percentage": 44.35, "elapsed_time": "4:03:03", "remaining_time": "5:04:58", "throughput": 2341.51, "total_tokens": 34146848} {"current_steps": 17745, "total_steps": 40000, "loss": 0.1481, "lr": 2.9406504012228375e-05, "epoch": 2.8948527612366424, "percentage": 44.36, "elapsed_time": "4:03:05", "remaining_time": "5:04:52", "throughput": 2341.8, "total_tokens": 34155920} {"current_steps": 17750, "total_steps": 40000, "loss": 0.0285, "lr": 2.939683990184832e-05, "epoch": 2.89566848845746, "percentage": 44.38, "elapsed_time": "4:03:07", "remaining_time": "5:04:45", "throughput": 2342.17, "total_tokens": 34166160} {"current_steps": 17755, "total_steps": 40000, "loss": 0.0324, "lr": 2.93871751134203e-05, "epoch": 2.896484215678277, "percentage": 44.39, "elapsed_time": "4:03:09", "remaining_time": "5:04:38", "throughput": 2342.47, "total_tokens": 34175424} {"current_steps": 17760, "total_steps": 40000, "loss": 0.0116, "lr": 2.9377509648434752e-05, "epoch": 2.8972999428990946, "percentage": 44.4, "elapsed_time": "4:03:11", "remaining_time": "5:04:32", "throughput": 2342.86, "total_tokens": 34185904} {"current_steps": 17765, "total_steps": 40000, "loss": 0.1272, "lr": 2.9367843508382203e-05, "epoch": 2.898115670119912, "percentage": 44.41, "elapsed_time": "4:03:13", "remaining_time": "5:04:25", "throughput": 2343.12, "total_tokens": 34194528} {"current_steps": 17770, "total_steps": 40000, "loss": 0.0063, "lr": 2.9358176694753293e-05, "epoch": 2.8989313973407294, "percentage": 44.42, "elapsed_time": "4:03:15", "remaining_time": "5:04:18", "throughput": 2343.44, "total_tokens": 34204048} {"current_steps": 17775, "total_steps": 40000, "loss": 0.0759, "lr": 2.9348509209038766e-05, "epoch": 2.8997471245615465, "percentage": 44.44, "elapsed_time": "4:03:17", "remaining_time": "5:04:12", "throughput": 2343.81, "total_tokens": 34214432} {"current_steps": 17780, "total_steps": 40000, "loss": 0.0699, "lr": 2.933884105272947e-05, "epoch": 2.900562851782364, "percentage": 44.45, "elapsed_time": "4:03:19", "remaining_time": "5:04:05", "throughput": 2344.13, "total_tokens": 34223936} {"current_steps": 17785, "total_steps": 40000, "loss": 0.1767, "lr": 2.9329172227316366e-05, "epoch": 2.901378579003181, "percentage": 44.46, "elapsed_time": "4:03:21", "remaining_time": "5:03:59", "throughput": 2344.57, "total_tokens": 34235152} {"current_steps": 17790, "total_steps": 40000, "loss": 0.1498, "lr": 2.93195027342905e-05, "epoch": 2.9021943062239988, "percentage": 44.47, "elapsed_time": "4:03:23", "remaining_time": "5:03:52", "throughput": 2344.9, "total_tokens": 34244880} {"current_steps": 17795, "total_steps": 40000, "loss": 0.064, "lr": 2.9309832575143024e-05, "epoch": 2.903010033444816, "percentage": 44.49, "elapsed_time": "4:03:26", "remaining_time": "5:03:45", "throughput": 2345.28, "total_tokens": 34255312} {"current_steps": 17800, "total_steps": 40000, "loss": 0.1232, "lr": 2.930016175136521e-05, "epoch": 2.9038257606656335, "percentage": 44.5, "elapsed_time": "4:03:28", "remaining_time": "5:03:39", "throughput": 2345.6, "total_tokens": 34264832} {"current_steps": 17800, "total_steps": 40000, "eval_loss": 0.15844771265983582, "epoch": 2.9038257606656335, "percentage": 44.5, "elapsed_time": "4:04:48", "remaining_time": "5:05:19", "throughput": 2332.7, "total_tokens": 34264832} {"current_steps": 17805, "total_steps": 40000, "loss": 0.082, "lr": 2.9290490264448412e-05, "epoch": 2.904641487886451, "percentage": 44.51, "elapsed_time": "4:04:52", "remaining_time": "5:05:15", "throughput": 2332.69, "total_tokens": 34273728} {"current_steps": 17810, "total_steps": 40000, "loss": 0.1346, "lr": 2.9280818115884094e-05, "epoch": 2.905457215107268, "percentage": 44.52, "elapsed_time": "4:04:54", "remaining_time": "5:05:08", "throughput": 2333.0, "total_tokens": 34283168} {"current_steps": 17815, "total_steps": 40000, "loss": 0.1049, "lr": 2.9271145307163828e-05, "epoch": 2.9062729423280853, "percentage": 44.54, "elapsed_time": "4:04:56", "remaining_time": "5:05:02", "throughput": 2333.35, "total_tokens": 34293200} {"current_steps": 17820, "total_steps": 40000, "loss": 0.0074, "lr": 2.9261471839779287e-05, "epoch": 2.907088669548903, "percentage": 44.55, "elapsed_time": "4:04:59", "remaining_time": "5:04:55", "throughput": 2333.68, "total_tokens": 34302832} {"current_steps": 17825, "total_steps": 40000, "loss": 0.0172, "lr": 2.925179771522223e-05, "epoch": 2.9079043967697205, "percentage": 44.56, "elapsed_time": "4:05:01", "remaining_time": "5:04:48", "throughput": 2333.99, "total_tokens": 34312304} {"current_steps": 17830, "total_steps": 40000, "loss": 0.0527, "lr": 2.9242122934984535e-05, "epoch": 2.9087201239905376, "percentage": 44.57, "elapsed_time": "4:05:03", "remaining_time": "5:04:42", "throughput": 2334.33, "total_tokens": 34322144} {"current_steps": 17835, "total_steps": 40000, "loss": 0.1672, "lr": 2.9232447500558176e-05, "epoch": 2.9095358512113547, "percentage": 44.59, "elapsed_time": "4:05:05", "remaining_time": "5:04:35", "throughput": 2334.74, "total_tokens": 34333024} {"current_steps": 17840, "total_steps": 40000, "loss": 0.1224, "lr": 2.9222771413435225e-05, "epoch": 2.9103515784321723, "percentage": 44.6, "elapsed_time": "4:05:07", "remaining_time": "5:04:28", "throughput": 2335.04, "total_tokens": 34342208} {"current_steps": 17845, "total_steps": 40000, "loss": 0.0824, "lr": 2.9213094675107848e-05, "epoch": 2.91116730565299, "percentage": 44.61, "elapsed_time": "4:05:09", "remaining_time": "5:04:22", "throughput": 2335.31, "total_tokens": 34350992} {"current_steps": 17850, "total_steps": 40000, "loss": 0.0562, "lr": 2.9203417287068335e-05, "epoch": 2.911983032873807, "percentage": 44.62, "elapsed_time": "4:05:11", "remaining_time": "5:04:15", "throughput": 2335.6, "total_tokens": 34360096} {"current_steps": 17855, "total_steps": 40000, "loss": 0.1163, "lr": 2.9193739250809042e-05, "epoch": 2.912798760094624, "percentage": 44.64, "elapsed_time": "4:05:13", "remaining_time": "5:04:08", "throughput": 2335.89, "total_tokens": 34369312} {"current_steps": 17860, "total_steps": 40000, "loss": 0.1147, "lr": 2.9184060567822463e-05, "epoch": 2.9136144873154417, "percentage": 44.65, "elapsed_time": "4:05:15", "remaining_time": "5:04:02", "throughput": 2336.35, "total_tokens": 34380880} {"current_steps": 17865, "total_steps": 40000, "loss": 0.156, "lr": 2.9174381239601166e-05, "epoch": 2.9144302145362593, "percentage": 44.66, "elapsed_time": "4:05:17", "remaining_time": "5:03:55", "throughput": 2336.6, "total_tokens": 34389408} {"current_steps": 17870, "total_steps": 40000, "loss": 0.091, "lr": 2.916470126763783e-05, "epoch": 2.9152459417570764, "percentage": 44.67, "elapsed_time": "4:05:19", "remaining_time": "5:03:48", "throughput": 2336.87, "total_tokens": 34398192} {"current_steps": 17875, "total_steps": 40000, "loss": 0.1442, "lr": 2.9155020653425203e-05, "epoch": 2.9160616689778935, "percentage": 44.69, "elapsed_time": "4:05:21", "remaining_time": "5:03:42", "throughput": 2337.11, "total_tokens": 34406608} {"current_steps": 17880, "total_steps": 40000, "loss": 0.0296, "lr": 2.9145339398456184e-05, "epoch": 2.916877396198711, "percentage": 44.7, "elapsed_time": "4:05:23", "remaining_time": "5:03:35", "throughput": 2337.41, "total_tokens": 34415808} {"current_steps": 17885, "total_steps": 40000, "loss": 0.2068, "lr": 2.913565750422374e-05, "epoch": 2.9176931234195287, "percentage": 44.71, "elapsed_time": "4:05:25", "remaining_time": "5:03:28", "throughput": 2337.71, "total_tokens": 34425088} {"current_steps": 17890, "total_steps": 40000, "loss": 0.1029, "lr": 2.9125974972220938e-05, "epoch": 2.918508850640346, "percentage": 44.73, "elapsed_time": "4:05:28", "remaining_time": "5:03:22", "throughput": 2338.03, "total_tokens": 34434656} {"current_steps": 17895, "total_steps": 40000, "loss": 0.0104, "lr": 2.9116291803940932e-05, "epoch": 2.919324577861163, "percentage": 44.74, "elapsed_time": "4:05:30", "remaining_time": "5:03:15", "throughput": 2338.32, "total_tokens": 34443808} {"current_steps": 17900, "total_steps": 40000, "loss": 0.0124, "lr": 2.910660800087701e-05, "epoch": 2.9201403050819805, "percentage": 44.75, "elapsed_time": "4:05:32", "remaining_time": "5:03:08", "throughput": 2338.64, "total_tokens": 34453296} {"current_steps": 17905, "total_steps": 40000, "loss": 0.1028, "lr": 2.909692356452254e-05, "epoch": 2.920956032302798, "percentage": 44.76, "elapsed_time": "4:05:34", "remaining_time": "5:03:02", "throughput": 2338.99, "total_tokens": 34463312} {"current_steps": 17910, "total_steps": 40000, "loss": 0.0284, "lr": 2.9087238496370962e-05, "epoch": 2.9217717595236152, "percentage": 44.77, "elapsed_time": "4:05:36", "remaining_time": "5:02:55", "throughput": 2339.36, "total_tokens": 34473600} {"current_steps": 17915, "total_steps": 40000, "loss": 0.0678, "lr": 2.907755279791583e-05, "epoch": 2.922587486744433, "percentage": 44.79, "elapsed_time": "4:05:38", "remaining_time": "5:02:49", "throughput": 2339.66, "total_tokens": 34482960} {"current_steps": 17920, "total_steps": 40000, "loss": 0.1124, "lr": 2.906786647065083e-05, "epoch": 2.92340321396525, "percentage": 44.8, "elapsed_time": "4:05:40", "remaining_time": "5:02:42", "throughput": 2340.06, "total_tokens": 34493680} {"current_steps": 17925, "total_steps": 40000, "loss": 0.0543, "lr": 2.9058179516069695e-05, "epoch": 2.9242189411860675, "percentage": 44.81, "elapsed_time": "4:05:42", "remaining_time": "5:02:35", "throughput": 2340.34, "total_tokens": 34502592} {"current_steps": 17930, "total_steps": 40000, "loss": 0.0521, "lr": 2.9048491935666282e-05, "epoch": 2.9250346684068846, "percentage": 44.82, "elapsed_time": "4:05:44", "remaining_time": "5:02:29", "throughput": 2340.66, "total_tokens": 34512176} {"current_steps": 17935, "total_steps": 40000, "loss": 0.191, "lr": 2.9038803730934534e-05, "epoch": 2.925850395627702, "percentage": 44.84, "elapsed_time": "4:05:46", "remaining_time": "5:02:22", "throughput": 2341.02, "total_tokens": 34522304} {"current_steps": 17940, "total_steps": 40000, "loss": 0.0449, "lr": 2.9029114903368503e-05, "epoch": 2.9266661228485193, "percentage": 44.85, "elapsed_time": "4:05:48", "remaining_time": "5:02:15", "throughput": 2341.38, "total_tokens": 34532560} {"current_steps": 17945, "total_steps": 40000, "loss": 0.1088, "lr": 2.9019425454462318e-05, "epoch": 2.927481850069337, "percentage": 44.86, "elapsed_time": "4:05:50", "remaining_time": "5:02:09", "throughput": 2341.75, "total_tokens": 34542880} {"current_steps": 17950, "total_steps": 40000, "loss": 0.048, "lr": 2.9009735385710212e-05, "epoch": 2.928297577290154, "percentage": 44.88, "elapsed_time": "4:05:52", "remaining_time": "5:02:02", "throughput": 2342.03, "total_tokens": 34551792} {"current_steps": 17955, "total_steps": 40000, "loss": 0.1087, "lr": 2.900004469860652e-05, "epoch": 2.9291133045109716, "percentage": 44.89, "elapsed_time": "4:05:54", "remaining_time": "5:01:56", "throughput": 2342.38, "total_tokens": 34561872} {"current_steps": 17960, "total_steps": 40000, "loss": 0.0097, "lr": 2.8990353394645668e-05, "epoch": 2.9299290317317888, "percentage": 44.9, "elapsed_time": "4:05:57", "remaining_time": "5:01:49", "throughput": 2342.74, "total_tokens": 34572032} {"current_steps": 17965, "total_steps": 40000, "loss": 0.0104, "lr": 2.8980661475322186e-05, "epoch": 2.9307447589526063, "percentage": 44.91, "elapsed_time": "4:05:59", "remaining_time": "5:01:42", "throughput": 2343.14, "total_tokens": 34582640} {"current_steps": 17970, "total_steps": 40000, "loss": 0.0648, "lr": 2.897096894213067e-05, "epoch": 2.9315604861734235, "percentage": 44.92, "elapsed_time": "4:06:01", "remaining_time": "5:01:36", "throughput": 2343.47, "total_tokens": 34592400} {"current_steps": 17975, "total_steps": 40000, "loss": 0.1195, "lr": 2.8961275796565845e-05, "epoch": 2.932376213394241, "percentage": 44.94, "elapsed_time": "4:06:03", "remaining_time": "5:01:29", "throughput": 2343.77, "total_tokens": 34601664} {"current_steps": 17980, "total_steps": 40000, "loss": 0.0712, "lr": 2.8951582040122517e-05, "epoch": 2.933191940615058, "percentage": 44.95, "elapsed_time": "4:06:05", "remaining_time": "5:01:23", "throughput": 2344.14, "total_tokens": 34612064} {"current_steps": 17985, "total_steps": 40000, "loss": 0.0051, "lr": 2.894188767429557e-05, "epoch": 2.9340076678358757, "percentage": 44.96, "elapsed_time": "4:06:07", "remaining_time": "5:01:16", "throughput": 2344.42, "total_tokens": 34621056} {"current_steps": 17990, "total_steps": 40000, "loss": 0.1112, "lr": 2.8932192700580014e-05, "epoch": 2.934823395056693, "percentage": 44.98, "elapsed_time": "4:06:09", "remaining_time": "5:01:09", "throughput": 2344.78, "total_tokens": 34631168} {"current_steps": 17995, "total_steps": 40000, "loss": 0.1187, "lr": 2.8922497120470916e-05, "epoch": 2.9356391222775104, "percentage": 44.99, "elapsed_time": "4:06:11", "remaining_time": "5:01:03", "throughput": 2345.15, "total_tokens": 34641488} {"current_steps": 18000, "total_steps": 40000, "loss": 0.2073, "lr": 2.891280093546348e-05, "epoch": 2.936454849498328, "percentage": 45.0, "elapsed_time": "4:06:13", "remaining_time": "5:00:56", "throughput": 2345.59, "total_tokens": 34652800} {"current_steps": 18000, "total_steps": 40000, "eval_loss": 0.18465572595596313, "epoch": 2.936454849498328, "percentage": 45.0, "elapsed_time": "4:07:34", "remaining_time": "5:02:35", "throughput": 2332.83, "total_tokens": 34652800} {"current_steps": 18005, "total_steps": 40000, "loss": 0.1171, "lr": 2.890310414705297e-05, "epoch": 2.937270576719145, "percentage": 45.01, "elapsed_time": "4:07:38", "remaining_time": "5:02:30", "throughput": 2332.94, "total_tokens": 34663040} {"current_steps": 18010, "total_steps": 40000, "loss": 0.0073, "lr": 2.8893406756734742e-05, "epoch": 2.9380863039399623, "percentage": 45.02, "elapsed_time": "4:07:40", "remaining_time": "5:02:24", "throughput": 2333.27, "total_tokens": 34672704} {"current_steps": 18015, "total_steps": 40000, "loss": 0.0079, "lr": 2.888370876600427e-05, "epoch": 2.93890203116078, "percentage": 45.04, "elapsed_time": "4:07:42", "remaining_time": "5:02:17", "throughput": 2333.59, "total_tokens": 34682384} {"current_steps": 18020, "total_steps": 40000, "loss": 0.0386, "lr": 2.8874010176357104e-05, "epoch": 2.9397177583815974, "percentage": 45.05, "elapsed_time": "4:07:44", "remaining_time": "5:02:10", "throughput": 2333.93, "total_tokens": 34692208} {"current_steps": 18025, "total_steps": 40000, "loss": 0.0876, "lr": 2.886431098928888e-05, "epoch": 2.9405334856024146, "percentage": 45.06, "elapsed_time": "4:07:46", "remaining_time": "5:02:04", "throughput": 2334.28, "total_tokens": 34702368} {"current_steps": 18030, "total_steps": 40000, "loss": 0.0073, "lr": 2.885461120629534e-05, "epoch": 2.9413492128232317, "percentage": 45.07, "elapsed_time": "4:07:48", "remaining_time": "5:01:57", "throughput": 2334.59, "total_tokens": 34711744} {"current_steps": 18035, "total_steps": 40000, "loss": 0.0919, "lr": 2.8844910828872317e-05, "epoch": 2.9421649400440493, "percentage": 45.09, "elapsed_time": "4:07:50", "remaining_time": "5:01:50", "throughput": 2334.81, "total_tokens": 34719840} {"current_steps": 18040, "total_steps": 40000, "loss": 0.2704, "lr": 2.8835209858515715e-05, "epoch": 2.942980667264867, "percentage": 45.1, "elapsed_time": "4:07:52", "remaining_time": "5:01:44", "throughput": 2335.14, "total_tokens": 34729680} {"current_steps": 18045, "total_steps": 40000, "loss": 0.0238, "lr": 2.8825508296721566e-05, "epoch": 2.943796394485684, "percentage": 45.11, "elapsed_time": "4:07:54", "remaining_time": "5:01:37", "throughput": 2335.54, "total_tokens": 34740384} {"current_steps": 18050, "total_steps": 40000, "loss": 0.104, "lr": 2.881580614498596e-05, "epoch": 2.944612121706501, "percentage": 45.12, "elapsed_time": "4:07:56", "remaining_time": "5:01:31", "throughput": 2335.88, "total_tokens": 34750256} {"current_steps": 18055, "total_steps": 40000, "loss": 0.1082, "lr": 2.8806103404805103e-05, "epoch": 2.9454278489273187, "percentage": 45.14, "elapsed_time": "4:07:58", "remaining_time": "5:01:24", "throughput": 2336.25, "total_tokens": 34760688} {"current_steps": 18060, "total_steps": 40000, "loss": 0.0127, "lr": 2.8796400077675257e-05, "epoch": 2.9462435761481363, "percentage": 45.15, "elapsed_time": "4:08:00", "remaining_time": "5:01:17", "throughput": 2336.63, "total_tokens": 34771152} {"current_steps": 18065, "total_steps": 40000, "loss": 0.068, "lr": 2.8786696165092812e-05, "epoch": 2.9470593033689534, "percentage": 45.16, "elapsed_time": "4:08:02", "remaining_time": "5:01:11", "throughput": 2337.0, "total_tokens": 34781488} {"current_steps": 18070, "total_steps": 40000, "loss": 0.1132, "lr": 2.8776991668554236e-05, "epoch": 2.9478750305897705, "percentage": 45.17, "elapsed_time": "4:08:05", "remaining_time": "5:01:04", "throughput": 2337.31, "total_tokens": 34790976} {"current_steps": 18075, "total_steps": 40000, "loss": 0.0408, "lr": 2.876728658955608e-05, "epoch": 2.948690757810588, "percentage": 45.19, "elapsed_time": "4:08:07", "remaining_time": "5:00:58", "throughput": 2337.66, "total_tokens": 34801056} {"current_steps": 18080, "total_steps": 40000, "loss": 0.0023, "lr": 2.8757580929594986e-05, "epoch": 2.9495064850314057, "percentage": 45.2, "elapsed_time": "4:08:09", "remaining_time": "5:00:51", "throughput": 2337.99, "total_tokens": 34810800} {"current_steps": 18085, "total_steps": 40000, "loss": 0.0576, "lr": 2.87478746901677e-05, "epoch": 2.950322212252223, "percentage": 45.21, "elapsed_time": "4:08:11", "remaining_time": "5:00:44", "throughput": 2338.21, "total_tokens": 34818880} {"current_steps": 18090, "total_steps": 40000, "loss": 0.0321, "lr": 2.873816787277103e-05, "epoch": 2.9511379394730404, "percentage": 45.23, "elapsed_time": "4:08:13", "remaining_time": "5:00:38", "throughput": 2338.53, "total_tokens": 34828448} {"current_steps": 18095, "total_steps": 40000, "loss": 0.2981, "lr": 2.8728460478901903e-05, "epoch": 2.9519536666938575, "percentage": 45.24, "elapsed_time": "4:08:15", "remaining_time": "5:00:31", "throughput": 2338.7, "total_tokens": 34835952} {"current_steps": 18100, "total_steps": 40000, "loss": 0.0069, "lr": 2.8718752510057307e-05, "epoch": 2.952769393914675, "percentage": 45.25, "elapsed_time": "4:08:17", "remaining_time": "5:00:25", "throughput": 2339.01, "total_tokens": 34845408} {"current_steps": 18105, "total_steps": 40000, "loss": 0.059, "lr": 2.870904396773435e-05, "epoch": 2.953585121135492, "percentage": 45.26, "elapsed_time": "4:08:19", "remaining_time": "5:00:18", "throughput": 2339.31, "total_tokens": 34854768} {"current_steps": 18110, "total_steps": 40000, "loss": 0.0166, "lr": 2.86993348534302e-05, "epoch": 2.95440084835631, "percentage": 45.27, "elapsed_time": "4:08:21", "remaining_time": "5:00:11", "throughput": 2339.67, "total_tokens": 34864880} {"current_steps": 18115, "total_steps": 40000, "loss": 0.1803, "lr": 2.868962516864212e-05, "epoch": 2.955216575577127, "percentage": 45.29, "elapsed_time": "4:08:23", "remaining_time": "5:00:05", "throughput": 2339.96, "total_tokens": 34874096} {"current_steps": 18120, "total_steps": 40000, "loss": 0.0132, "lr": 2.8679914914867477e-05, "epoch": 2.9560323027979445, "percentage": 45.3, "elapsed_time": "4:08:25", "remaining_time": "4:59:58", "throughput": 2340.27, "total_tokens": 34883536} {"current_steps": 18125, "total_steps": 40000, "loss": 0.1164, "lr": 2.8670204093603713e-05, "epoch": 2.9568480300187616, "percentage": 45.31, "elapsed_time": "4:08:27", "remaining_time": "4:59:52", "throughput": 2340.64, "total_tokens": 34893984} {"current_steps": 18130, "total_steps": 40000, "loss": 0.1251, "lr": 2.8660492706348357e-05, "epoch": 2.957663757239579, "percentage": 45.32, "elapsed_time": "4:08:29", "remaining_time": "4:59:45", "throughput": 2340.91, "total_tokens": 34902848} {"current_steps": 18135, "total_steps": 40000, "loss": 0.1002, "lr": 2.8650780754599022e-05, "epoch": 2.9584794844603963, "percentage": 45.34, "elapsed_time": "4:08:32", "remaining_time": "4:59:39", "throughput": 2341.27, "total_tokens": 34913040} {"current_steps": 18140, "total_steps": 40000, "loss": 0.078, "lr": 2.8641068239853407e-05, "epoch": 2.959295211681214, "percentage": 45.35, "elapsed_time": "4:08:34", "remaining_time": "4:59:32", "throughput": 2341.52, "total_tokens": 34921600} {"current_steps": 18145, "total_steps": 40000, "loss": 0.0571, "lr": 2.863135516360932e-05, "epoch": 2.960110938902031, "percentage": 45.36, "elapsed_time": "4:08:36", "remaining_time": "4:59:25", "throughput": 2341.9, "total_tokens": 34932160} {"current_steps": 18150, "total_steps": 40000, "loss": 0.1018, "lr": 2.8621641527364633e-05, "epoch": 2.9609266661228486, "percentage": 45.38, "elapsed_time": "4:08:38", "remaining_time": "4:59:19", "throughput": 2342.3, "total_tokens": 34943040} {"current_steps": 18155, "total_steps": 40000, "loss": 0.0651, "lr": 2.8611927332617313e-05, "epoch": 2.9617423933436657, "percentage": 45.39, "elapsed_time": "4:08:40", "remaining_time": "4:59:12", "throughput": 2342.64, "total_tokens": 34952864} {"current_steps": 18160, "total_steps": 40000, "loss": 0.1216, "lr": 2.8602212580865405e-05, "epoch": 2.9625581205644833, "percentage": 45.4, "elapsed_time": "4:08:42", "remaining_time": "4:59:06", "throughput": 2342.91, "total_tokens": 34961840} {"current_steps": 18165, "total_steps": 40000, "loss": 0.0865, "lr": 2.859249727360705e-05, "epoch": 2.9633738477853004, "percentage": 45.41, "elapsed_time": "4:08:44", "remaining_time": "4:58:59", "throughput": 2343.27, "total_tokens": 34971952} {"current_steps": 18170, "total_steps": 40000, "loss": 0.064, "lr": 2.8582781412340465e-05, "epoch": 2.964189575006118, "percentage": 45.42, "elapsed_time": "4:08:46", "remaining_time": "4:58:53", "throughput": 2343.67, "total_tokens": 34982800} {"current_steps": 18175, "total_steps": 40000, "loss": 0.1243, "lr": 2.857306499856397e-05, "epoch": 2.965005302226935, "percentage": 45.44, "elapsed_time": "4:08:48", "remaining_time": "4:58:46", "throughput": 2343.99, "total_tokens": 34992464} {"current_steps": 18180, "total_steps": 40000, "loss": 0.0786, "lr": 2.856334803377594e-05, "epoch": 2.9658210294477527, "percentage": 45.45, "elapsed_time": "4:08:50", "remaining_time": "4:58:40", "throughput": 2344.26, "total_tokens": 35001360} {"current_steps": 18185, "total_steps": 40000, "loss": 0.0462, "lr": 2.8553630519474867e-05, "epoch": 2.96663675666857, "percentage": 45.46, "elapsed_time": "4:08:52", "remaining_time": "4:58:33", "throughput": 2344.58, "total_tokens": 35010880} {"current_steps": 18190, "total_steps": 40000, "loss": 0.1297, "lr": 2.8543912457159317e-05, "epoch": 2.9674524838893874, "percentage": 45.48, "elapsed_time": "4:08:54", "remaining_time": "4:58:26", "throughput": 2344.82, "total_tokens": 35019360} {"current_steps": 18195, "total_steps": 40000, "loss": 0.0597, "lr": 2.853419384832792e-05, "epoch": 2.968268211110205, "percentage": 45.49, "elapsed_time": "4:08:56", "remaining_time": "4:58:20", "throughput": 2345.01, "total_tokens": 35027104} {"current_steps": 18200, "total_steps": 40000, "loss": 0.0482, "lr": 2.8524474694479423e-05, "epoch": 2.969083938331022, "percentage": 45.5, "elapsed_time": "4:08:58", "remaining_time": "4:58:13", "throughput": 2345.29, "total_tokens": 35036144} {"current_steps": 18200, "total_steps": 40000, "eval_loss": 0.1611875742673874, "epoch": 2.969083938331022, "percentage": 45.5, "elapsed_time": "4:10:19", "remaining_time": "4:59:50", "throughput": 2332.68, "total_tokens": 35036144} {"current_steps": 18205, "total_steps": 40000, "loss": 0.1057, "lr": 2.851475499711264e-05, "epoch": 2.9698996655518393, "percentage": 45.51, "elapsed_time": "4:10:23", "remaining_time": "4:59:45", "throughput": 2332.69, "total_tokens": 35044944} {"current_steps": 18210, "total_steps": 40000, "loss": 0.0715, "lr": 2.8505034757726468e-05, "epoch": 2.970715392772657, "percentage": 45.52, "elapsed_time": "4:10:25", "remaining_time": "4:59:39", "throughput": 2332.97, "total_tokens": 35053936} {"current_steps": 18215, "total_steps": 40000, "loss": 0.1235, "lr": 2.8495313977819886e-05, "epoch": 2.9715311199934744, "percentage": 45.54, "elapsed_time": "4:10:27", "remaining_time": "4:59:32", "throughput": 2333.32, "total_tokens": 35064032} {"current_steps": 18220, "total_steps": 40000, "loss": 0.1158, "lr": 2.8485592658891956e-05, "epoch": 2.9723468472142915, "percentage": 45.55, "elapsed_time": "4:10:29", "remaining_time": "4:59:26", "throughput": 2333.58, "total_tokens": 35072832} {"current_steps": 18225, "total_steps": 40000, "loss": 0.0392, "lr": 2.8475870802441844e-05, "epoch": 2.9731625744351087, "percentage": 45.56, "elapsed_time": "4:10:31", "remaining_time": "4:59:19", "throughput": 2333.91, "total_tokens": 35082672} {"current_steps": 18230, "total_steps": 40000, "loss": 0.1633, "lr": 2.8466148409968774e-05, "epoch": 2.9739783016559262, "percentage": 45.57, "elapsed_time": "4:10:33", "remaining_time": "4:59:13", "throughput": 2334.14, "total_tokens": 35090848} {"current_steps": 18235, "total_steps": 40000, "loss": 0.0065, "lr": 2.8456425482972067e-05, "epoch": 2.974794028876744, "percentage": 45.59, "elapsed_time": "4:10:35", "remaining_time": "4:59:06", "throughput": 2334.4, "total_tokens": 35099712} {"current_steps": 18240, "total_steps": 40000, "loss": 0.1186, "lr": 2.84467020229511e-05, "epoch": 2.975609756097561, "percentage": 45.6, "elapsed_time": "4:10:37", "remaining_time": "4:58:59", "throughput": 2334.67, "total_tokens": 35108560} {"current_steps": 18245, "total_steps": 40000, "loss": 0.0043, "lr": 2.8436978031405375e-05, "epoch": 2.976425483318378, "percentage": 45.61, "elapsed_time": "4:10:39", "remaining_time": "4:58:53", "throughput": 2334.96, "total_tokens": 35117696} {"current_steps": 18250, "total_steps": 40000, "loss": 0.0923, "lr": 2.842725350983445e-05, "epoch": 2.9772412105391957, "percentage": 45.62, "elapsed_time": "4:10:42", "remaining_time": "4:58:46", "throughput": 2335.28, "total_tokens": 35127344} {"current_steps": 18255, "total_steps": 40000, "loss": 0.153, "lr": 2.8417528459737957e-05, "epoch": 2.9780569377600132, "percentage": 45.64, "elapsed_time": "4:10:44", "remaining_time": "4:58:40", "throughput": 2335.5, "total_tokens": 35135600} {"current_steps": 18260, "total_steps": 40000, "loss": 0.0687, "lr": 2.8407802882615624e-05, "epoch": 2.9788726649808304, "percentage": 45.65, "elapsed_time": "4:10:46", "remaining_time": "4:58:33", "throughput": 2335.85, "total_tokens": 35145632} {"current_steps": 18265, "total_steps": 40000, "loss": 0.0043, "lr": 2.8398076779967277e-05, "epoch": 2.9796883922016475, "percentage": 45.66, "elapsed_time": "4:10:48", "remaining_time": "4:58:27", "throughput": 2336.12, "total_tokens": 35154512} {"current_steps": 18270, "total_steps": 40000, "loss": 0.0174, "lr": 2.8388350153292774e-05, "epoch": 2.980504119422465, "percentage": 45.67, "elapsed_time": "4:10:50", "remaining_time": "4:58:20", "throughput": 2336.42, "total_tokens": 35163952} {"current_steps": 18275, "total_steps": 40000, "loss": 0.0054, "lr": 2.8378623004092103e-05, "epoch": 2.9813198466432826, "percentage": 45.69, "elapsed_time": "4:10:52", "remaining_time": "4:58:14", "throughput": 2336.73, "total_tokens": 35173440} {"current_steps": 18280, "total_steps": 40000, "loss": 0.1202, "lr": 2.8368895333865302e-05, "epoch": 2.9821355738640998, "percentage": 45.7, "elapsed_time": "4:10:54", "remaining_time": "4:58:07", "throughput": 2337.0, "total_tokens": 35182272} {"current_steps": 18285, "total_steps": 40000, "loss": 0.1643, "lr": 2.835916714411251e-05, "epoch": 2.9829513010849174, "percentage": 45.71, "elapsed_time": "4:10:56", "remaining_time": "4:58:00", "throughput": 2337.27, "total_tokens": 35191200} {"current_steps": 18290, "total_steps": 40000, "loss": 0.2133, "lr": 2.8349438436333926e-05, "epoch": 2.9837670283057345, "percentage": 45.73, "elapsed_time": "4:10:58", "remaining_time": "4:57:54", "throughput": 2337.6, "total_tokens": 35201024} {"current_steps": 18295, "total_steps": 40000, "loss": 0.0843, "lr": 2.833970921202984e-05, "epoch": 2.984582755526552, "percentage": 45.74, "elapsed_time": "4:11:00", "remaining_time": "4:57:47", "throughput": 2338.03, "total_tokens": 35212336} {"current_steps": 18300, "total_steps": 40000, "loss": 0.0573, "lr": 2.8329979472700628e-05, "epoch": 2.985398482747369, "percentage": 45.75, "elapsed_time": "4:11:02", "remaining_time": "4:57:41", "throughput": 2338.31, "total_tokens": 35221408} {"current_steps": 18305, "total_steps": 40000, "loss": 0.115, "lr": 2.832024921984674e-05, "epoch": 2.9862142099681868, "percentage": 45.76, "elapsed_time": "4:11:04", "remaining_time": "4:57:34", "throughput": 2338.56, "total_tokens": 35230096} {"current_steps": 18310, "total_steps": 40000, "loss": 0.088, "lr": 2.8310518454968693e-05, "epoch": 2.987029937189004, "percentage": 45.77, "elapsed_time": "4:11:06", "remaining_time": "4:57:28", "throughput": 2338.95, "total_tokens": 35240832} {"current_steps": 18315, "total_steps": 40000, "loss": 0.05, "lr": 2.8300787179567095e-05, "epoch": 2.9878456644098215, "percentage": 45.79, "elapsed_time": "4:11:08", "remaining_time": "4:57:21", "throughput": 2339.28, "total_tokens": 35250544} {"current_steps": 18320, "total_steps": 40000, "loss": 0.0679, "lr": 2.8291055395142636e-05, "epoch": 2.9886613916306386, "percentage": 45.8, "elapsed_time": "4:11:11", "remaining_time": "4:57:15", "throughput": 2339.45, "total_tokens": 35258048} {"current_steps": 18325, "total_steps": 40000, "loss": 0.0161, "lr": 2.8281323103196073e-05, "epoch": 2.989477118851456, "percentage": 45.81, "elapsed_time": "4:11:13", "remaining_time": "4:57:08", "throughput": 2339.86, "total_tokens": 35269008} {"current_steps": 18330, "total_steps": 40000, "loss": 0.0997, "lr": 2.8271590305228256e-05, "epoch": 2.9902928460722733, "percentage": 45.82, "elapsed_time": "4:11:15", "remaining_time": "4:57:02", "throughput": 2340.24, "total_tokens": 35279616} {"current_steps": 18335, "total_steps": 40000, "loss": 0.0665, "lr": 2.82618570027401e-05, "epoch": 2.991108573293091, "percentage": 45.84, "elapsed_time": "4:11:17", "remaining_time": "4:56:55", "throughput": 2340.54, "total_tokens": 35288976} {"current_steps": 18340, "total_steps": 40000, "loss": 0.0334, "lr": 2.8252123197232604e-05, "epoch": 2.991924300513908, "percentage": 45.85, "elapsed_time": "4:11:19", "remaining_time": "4:56:49", "throughput": 2340.88, "total_tokens": 35298976} {"current_steps": 18345, "total_steps": 40000, "loss": 0.0098, "lr": 2.8242388890206843e-05, "epoch": 2.9927400277347256, "percentage": 45.86, "elapsed_time": "4:11:21", "remaining_time": "4:56:42", "throughput": 2341.21, "total_tokens": 35308832} {"current_steps": 18350, "total_steps": 40000, "loss": 0.0278, "lr": 2.8232654083163967e-05, "epoch": 2.9935557549555427, "percentage": 45.88, "elapsed_time": "4:11:23", "remaining_time": "4:56:36", "throughput": 2341.52, "total_tokens": 35318320} {"current_steps": 18355, "total_steps": 40000, "loss": 0.1566, "lr": 2.822291877760521e-05, "epoch": 2.9943714821763603, "percentage": 45.89, "elapsed_time": "4:11:25", "remaining_time": "4:56:29", "throughput": 2341.86, "total_tokens": 35328256} {"current_steps": 18360, "total_steps": 40000, "loss": 0.0594, "lr": 2.8213182975031864e-05, "epoch": 2.9951872093971774, "percentage": 45.9, "elapsed_time": "4:11:27", "remaining_time": "4:56:23", "throughput": 2342.18, "total_tokens": 35337984} {"current_steps": 18365, "total_steps": 40000, "loss": 0.0719, "lr": 2.8203446676945337e-05, "epoch": 2.996002936617995, "percentage": 45.91, "elapsed_time": "4:11:29", "remaining_time": "4:56:16", "throughput": 2342.47, "total_tokens": 35347136} {"current_steps": 18370, "total_steps": 40000, "loss": 0.0086, "lr": 2.8193709884847075e-05, "epoch": 2.9968186638388126, "percentage": 45.92, "elapsed_time": "4:11:31", "remaining_time": "4:56:10", "throughput": 2342.8, "total_tokens": 35357040} {"current_steps": 18375, "total_steps": 40000, "loss": 0.0657, "lr": 2.8183972600238605e-05, "epoch": 2.9976343910596297, "percentage": 45.94, "elapsed_time": "4:11:33", "remaining_time": "4:56:03", "throughput": 2343.1, "total_tokens": 35366304} {"current_steps": 18380, "total_steps": 40000, "loss": 0.0714, "lr": 2.817423482462156e-05, "epoch": 2.998450118280447, "percentage": 45.95, "elapsed_time": "4:11:35", "remaining_time": "4:55:56", "throughput": 2343.33, "total_tokens": 35374736} {"current_steps": 18385, "total_steps": 40000, "loss": 0.024, "lr": 2.8164496559497605e-05, "epoch": 2.9992658455012644, "percentage": 45.96, "elapsed_time": "4:11:37", "remaining_time": "4:55:50", "throughput": 2343.56, "total_tokens": 35382944} {"current_steps": 18390, "total_steps": 40000, "loss": 0.2098, "lr": 2.815475780636852e-05, "epoch": 3.0, "percentage": 45.98, "elapsed_time": "4:11:39", "remaining_time": "4:55:43", "throughput": 2343.73, "total_tokens": 35390256} {"current_steps": 18395, "total_steps": 40000, "loss": 0.0066, "lr": 2.814501856673613e-05, "epoch": 3.0008157272208176, "percentage": 45.99, "elapsed_time": "4:11:42", "remaining_time": "4:55:37", "throughput": 2343.96, "total_tokens": 35399136} {"current_steps": 18400, "total_steps": 40000, "loss": 0.0212, "lr": 2.8135278842102353e-05, "epoch": 3.0016314544416347, "percentage": 46.0, "elapsed_time": "4:11:44", "remaining_time": "4:55:31", "throughput": 2344.38, "total_tokens": 35410304} {"current_steps": 18400, "total_steps": 40000, "eval_loss": 0.16577646136283875, "epoch": 3.0016314544416347, "percentage": 46.0, "elapsed_time": "4:13:05", "remaining_time": "4:57:06", "throughput": 2331.91, "total_tokens": 35410304} {"current_steps": 18405, "total_steps": 40000, "loss": 0.0086, "lr": 2.8125538633969183e-05, "epoch": 3.0024471816624523, "percentage": 46.01, "elapsed_time": "4:13:09", "remaining_time": "4:57:02", "throughput": 2331.88, "total_tokens": 35420608} {"current_steps": 18410, "total_steps": 40000, "loss": 0.0231, "lr": 2.8115797943838677e-05, "epoch": 3.0032629088832694, "percentage": 46.02, "elapsed_time": "4:13:11", "remaining_time": "4:56:55", "throughput": 2332.14, "total_tokens": 35429248} {"current_steps": 18415, "total_steps": 40000, "loss": 0.0969, "lr": 2.810605677321298e-05, "epoch": 3.004078636104087, "percentage": 46.04, "elapsed_time": "4:13:13", "remaining_time": "4:56:49", "throughput": 2332.52, "total_tokens": 35439968} {"current_steps": 18420, "total_steps": 40000, "loss": 0.0812, "lr": 2.809631512359428e-05, "epoch": 3.004894363324904, "percentage": 46.05, "elapsed_time": "4:13:15", "remaining_time": "4:56:42", "throughput": 2332.79, "total_tokens": 35448848} {"current_steps": 18425, "total_steps": 40000, "loss": 0.0272, "lr": 2.8086572996484884e-05, "epoch": 3.0057100905457217, "percentage": 46.06, "elapsed_time": "4:13:17", "remaining_time": "4:56:36", "throughput": 2333.15, "total_tokens": 35459168} {"current_steps": 18430, "total_steps": 40000, "loss": 0.092, "lr": 2.8076830393387143e-05, "epoch": 3.006525817766539, "percentage": 46.08, "elapsed_time": "4:13:20", "remaining_time": "4:56:29", "throughput": 2333.44, "total_tokens": 35468368} {"current_steps": 18435, "total_steps": 40000, "loss": 0.0037, "lr": 2.8067087315803497e-05, "epoch": 3.0073415449873564, "percentage": 46.09, "elapsed_time": "4:13:22", "remaining_time": "4:56:23", "throughput": 2333.78, "total_tokens": 35478416} {"current_steps": 18440, "total_steps": 40000, "loss": 0.114, "lr": 2.8057343765236433e-05, "epoch": 3.0081572722081735, "percentage": 46.1, "elapsed_time": "4:13:24", "remaining_time": "4:56:16", "throughput": 2334.12, "total_tokens": 35488432} {"current_steps": 18445, "total_steps": 40000, "loss": 0.035, "lr": 2.804759974318854e-05, "epoch": 3.008972999428991, "percentage": 46.11, "elapsed_time": "4:13:26", "remaining_time": "4:56:10", "throughput": 2334.47, "total_tokens": 35498592} {"current_steps": 18450, "total_steps": 40000, "loss": 0.1108, "lr": 2.8037855251162482e-05, "epoch": 3.0097887266498082, "percentage": 46.12, "elapsed_time": "4:13:28", "remaining_time": "4:56:03", "throughput": 2334.83, "total_tokens": 35508768} {"current_steps": 18455, "total_steps": 40000, "loss": 0.0031, "lr": 2.802811029066096e-05, "epoch": 3.010604453870626, "percentage": 46.14, "elapsed_time": "4:13:30", "remaining_time": "4:55:57", "throughput": 2335.23, "total_tokens": 35519824} {"current_steps": 18460, "total_steps": 40000, "loss": 0.0065, "lr": 2.8018364863186764e-05, "epoch": 3.011420181091443, "percentage": 46.15, "elapsed_time": "4:13:32", "remaining_time": "4:55:50", "throughput": 2335.62, "total_tokens": 35530592} {"current_steps": 18465, "total_steps": 40000, "loss": 0.0358, "lr": 2.800861897024279e-05, "epoch": 3.0122359083122605, "percentage": 46.16, "elapsed_time": "4:13:34", "remaining_time": "4:55:44", "throughput": 2335.95, "total_tokens": 35540416} {"current_steps": 18470, "total_steps": 40000, "loss": 0.0184, "lr": 2.799887261333196e-05, "epoch": 3.0130516355330776, "percentage": 46.17, "elapsed_time": "4:13:36", "remaining_time": "4:55:37", "throughput": 2336.28, "total_tokens": 35550224} {"current_steps": 18475, "total_steps": 40000, "loss": 0.1635, "lr": 2.798912579395728e-05, "epoch": 3.013867362753895, "percentage": 46.19, "elapsed_time": "4:13:38", "remaining_time": "4:55:31", "throughput": 2336.56, "total_tokens": 35559344} {"current_steps": 18480, "total_steps": 40000, "loss": 0.002, "lr": 2.797937851362185e-05, "epoch": 3.0146830899747123, "percentage": 46.2, "elapsed_time": "4:13:40", "remaining_time": "4:55:24", "throughput": 2336.9, "total_tokens": 35569280} {"current_steps": 18485, "total_steps": 40000, "loss": 0.0018, "lr": 2.7969630773828802e-05, "epoch": 3.01549881719553, "percentage": 46.21, "elapsed_time": "4:13:42", "remaining_time": "4:55:18", "throughput": 2337.13, "total_tokens": 35577616} {"current_steps": 18490, "total_steps": 40000, "loss": 0.0041, "lr": 2.7959882576081382e-05, "epoch": 3.016314544416347, "percentage": 46.23, "elapsed_time": "4:13:44", "remaining_time": "4:55:11", "throughput": 2337.42, "total_tokens": 35586976} {"current_steps": 18495, "total_steps": 40000, "loss": 0.0031, "lr": 2.795013392188286e-05, "epoch": 3.0171302716371646, "percentage": 46.24, "elapsed_time": "4:13:46", "remaining_time": "4:55:05", "throughput": 2337.76, "total_tokens": 35596928} {"current_steps": 18500, "total_steps": 40000, "loss": 0.0368, "lr": 2.7940384812736614e-05, "epoch": 3.0179459988579818, "percentage": 46.25, "elapsed_time": "4:13:49", "remaining_time": "4:54:58", "throughput": 2338.12, "total_tokens": 35607296} {"current_steps": 18505, "total_steps": 40000, "loss": 0.0668, "lr": 2.7930635250146087e-05, "epoch": 3.0187617260787993, "percentage": 46.26, "elapsed_time": "4:13:51", "remaining_time": "4:54:52", "throughput": 2338.46, "total_tokens": 35617248} {"current_steps": 18510, "total_steps": 40000, "loss": 0.003, "lr": 2.792088523561477e-05, "epoch": 3.0195774532996165, "percentage": 46.27, "elapsed_time": "4:13:53", "remaining_time": "4:54:45", "throughput": 2338.88, "total_tokens": 35628432} {"current_steps": 18515, "total_steps": 40000, "loss": 0.1072, "lr": 2.7911134770646246e-05, "epoch": 3.020393180520434, "percentage": 46.29, "elapsed_time": "4:13:55", "remaining_time": "4:54:39", "throughput": 2339.24, "total_tokens": 35638784} {"current_steps": 18520, "total_steps": 40000, "loss": 0.0038, "lr": 2.7901383856744157e-05, "epoch": 3.021208907741251, "percentage": 46.3, "elapsed_time": "4:13:57", "remaining_time": "4:54:32", "throughput": 2339.56, "total_tokens": 35648496} {"current_steps": 18525, "total_steps": 40000, "loss": 0.0186, "lr": 2.7891632495412217e-05, "epoch": 3.0220246349620687, "percentage": 46.31, "elapsed_time": "4:13:59", "remaining_time": "4:54:26", "throughput": 2339.92, "total_tokens": 35658784} {"current_steps": 18530, "total_steps": 40000, "loss": 0.001, "lr": 2.7881880688154205e-05, "epoch": 3.022840362182886, "percentage": 46.33, "elapsed_time": "4:14:01", "remaining_time": "4:54:19", "throughput": 2340.29, "total_tokens": 35669328} {"current_steps": 18535, "total_steps": 40000, "loss": 0.1083, "lr": 2.7872128436473977e-05, "epoch": 3.0236560894037035, "percentage": 46.34, "elapsed_time": "4:14:03", "remaining_time": "4:54:13", "throughput": 2340.62, "total_tokens": 35679200} {"current_steps": 18540, "total_steps": 40000, "loss": 0.0901, "lr": 2.7862375741875448e-05, "epoch": 3.0244718166245206, "percentage": 46.35, "elapsed_time": "4:14:05", "remaining_time": "4:54:06", "throughput": 2340.96, "total_tokens": 35689296} {"current_steps": 18545, "total_steps": 40000, "loss": 0.0461, "lr": 2.785262260586261e-05, "epoch": 3.025287543845338, "percentage": 46.36, "elapsed_time": "4:14:07", "remaining_time": "4:54:00", "throughput": 2341.33, "total_tokens": 35699776} {"current_steps": 18550, "total_steps": 40000, "loss": 0.034, "lr": 2.7842869029939517e-05, "epoch": 3.0261032710661553, "percentage": 46.38, "elapsed_time": "4:14:09", "remaining_time": "4:53:53", "throughput": 2341.6, "total_tokens": 35708800} {"current_steps": 18555, "total_steps": 40000, "loss": 0.0018, "lr": 2.7833115015610296e-05, "epoch": 3.026918998286973, "percentage": 46.39, "elapsed_time": "4:14:11", "remaining_time": "4:53:47", "throughput": 2341.94, "total_tokens": 35718688} {"current_steps": 18560, "total_steps": 40000, "loss": 0.001, "lr": 2.7823360564379136e-05, "epoch": 3.02773472550779, "percentage": 46.4, "elapsed_time": "4:14:13", "remaining_time": "4:53:40", "throughput": 2342.19, "total_tokens": 35727376} {"current_steps": 18565, "total_steps": 40000, "loss": 0.0374, "lr": 2.7813605677750297e-05, "epoch": 3.0285504527286076, "percentage": 46.41, "elapsed_time": "4:14:15", "remaining_time": "4:53:34", "throughput": 2342.41, "total_tokens": 35735664} {"current_steps": 18570, "total_steps": 40000, "loss": 0.0024, "lr": 2.7803850357228102e-05, "epoch": 3.0293661799494247, "percentage": 46.42, "elapsed_time": "4:14:18", "remaining_time": "4:53:27", "throughput": 2342.82, "total_tokens": 35746832} {"current_steps": 18575, "total_steps": 40000, "loss": 0.0749, "lr": 2.779409460431695e-05, "epoch": 3.0301819071702423, "percentage": 46.44, "elapsed_time": "4:14:20", "remaining_time": "4:53:21", "throughput": 2343.19, "total_tokens": 35757200} {"current_steps": 18580, "total_steps": 40000, "loss": 0.0023, "lr": 2.778433842052129e-05, "epoch": 3.03099763439106, "percentage": 46.45, "elapsed_time": "4:14:22", "remaining_time": "4:53:14", "throughput": 2343.53, "total_tokens": 35767264} {"current_steps": 18585, "total_steps": 40000, "loss": 0.101, "lr": 2.7774581807345664e-05, "epoch": 3.031813361611877, "percentage": 46.46, "elapsed_time": "4:14:24", "remaining_time": "4:53:08", "throughput": 2343.91, "total_tokens": 35778000} {"current_steps": 18590, "total_steps": 40000, "loss": 0.0161, "lr": 2.776482476629465e-05, "epoch": 3.0326290888326946, "percentage": 46.48, "elapsed_time": "4:14:26", "remaining_time": "4:53:02", "throughput": 2344.26, "total_tokens": 35788080} {"current_steps": 18595, "total_steps": 40000, "loss": 0.0462, "lr": 2.7755067298872924e-05, "epoch": 3.0334448160535117, "percentage": 46.49, "elapsed_time": "4:14:28", "remaining_time": "4:52:55", "throughput": 2344.54, "total_tokens": 35797264} {"current_steps": 18600, "total_steps": 40000, "loss": 0.0058, "lr": 2.774530940658518e-05, "epoch": 3.0342605432743293, "percentage": 46.5, "elapsed_time": "4:14:30", "remaining_time": "4:52:49", "throughput": 2344.97, "total_tokens": 35808688} {"current_steps": 18600, "total_steps": 40000, "eval_loss": 0.20917591452598572, "epoch": 3.0342605432743293, "percentage": 46.5, "elapsed_time": "4:15:51", "remaining_time": "4:54:22", "throughput": 2332.61, "total_tokens": 35808688} {"current_steps": 18605, "total_steps": 40000, "loss": 0.0021, "lr": 2.7735551090936236e-05, "epoch": 3.0350762704951464, "percentage": 46.51, "elapsed_time": "4:15:55", "remaining_time": "4:54:17", "throughput": 2332.67, "total_tokens": 35818112} {"current_steps": 18610, "total_steps": 40000, "loss": 0.0309, "lr": 2.7725792353430934e-05, "epoch": 3.035891997715964, "percentage": 46.52, "elapsed_time": "4:15:57", "remaining_time": "4:54:11", "throughput": 2333.01, "total_tokens": 35828208} {"current_steps": 18615, "total_steps": 40000, "loss": 0.0734, "lr": 2.77160331955742e-05, "epoch": 3.036707724936781, "percentage": 46.54, "elapsed_time": "4:15:59", "remaining_time": "4:54:04", "throughput": 2333.33, "total_tokens": 35837984} {"current_steps": 18620, "total_steps": 40000, "loss": 0.0245, "lr": 2.7706273618871008e-05, "epoch": 3.0375234521575987, "percentage": 46.55, "elapsed_time": "4:16:01", "remaining_time": "4:53:58", "throughput": 2333.69, "total_tokens": 35848416} {"current_steps": 18625, "total_steps": 40000, "loss": 0.0766, "lr": 2.769651362482642e-05, "epoch": 3.038339179378416, "percentage": 46.56, "elapsed_time": "4:16:03", "remaining_time": "4:53:51", "throughput": 2333.98, "total_tokens": 35857680} {"current_steps": 18630, "total_steps": 40000, "loss": 0.0775, "lr": 2.768675321494555e-05, "epoch": 3.0391549065992334, "percentage": 46.58, "elapsed_time": "4:16:05", "remaining_time": "4:53:45", "throughput": 2334.29, "total_tokens": 35867248} {"current_steps": 18635, "total_steps": 40000, "loss": 0.026, "lr": 2.7676992390733565e-05, "epoch": 3.0399706338200505, "percentage": 46.59, "elapsed_time": "4:16:07", "remaining_time": "4:53:38", "throughput": 2334.64, "total_tokens": 35877440} {"current_steps": 18640, "total_steps": 40000, "loss": 0.0017, "lr": 2.766723115369571e-05, "epoch": 3.040786361040868, "percentage": 46.6, "elapsed_time": "4:16:09", "remaining_time": "4:53:32", "throughput": 2334.92, "total_tokens": 35886640} {"current_steps": 18645, "total_steps": 40000, "loss": 0.0808, "lr": 2.765746950533729e-05, "epoch": 3.041602088261685, "percentage": 46.61, "elapsed_time": "4:16:11", "remaining_time": "4:53:25", "throughput": 2335.22, "total_tokens": 35896096} {"current_steps": 18650, "total_steps": 40000, "loss": 0.0867, "lr": 2.7647707447163684e-05, "epoch": 3.042417815482503, "percentage": 46.62, "elapsed_time": "4:16:13", "remaining_time": "4:53:19", "throughput": 2335.63, "total_tokens": 35907264} {"current_steps": 18655, "total_steps": 40000, "loss": 0.0391, "lr": 2.7637944980680315e-05, "epoch": 3.04323354270332, "percentage": 46.64, "elapsed_time": "4:16:15", "remaining_time": "4:53:12", "throughput": 2335.93, "total_tokens": 35916656} {"current_steps": 18660, "total_steps": 40000, "loss": 0.0877, "lr": 2.762818210739268e-05, "epoch": 3.0440492699241375, "percentage": 46.65, "elapsed_time": "4:16:17", "remaining_time": "4:53:06", "throughput": 2336.25, "total_tokens": 35926448} {"current_steps": 18665, "total_steps": 40000, "loss": 0.0011, "lr": 2.7618418828806332e-05, "epoch": 3.0448649971449546, "percentage": 46.66, "elapsed_time": "4:16:19", "remaining_time": "4:52:59", "throughput": 2336.53, "total_tokens": 35935488} {"current_steps": 18670, "total_steps": 40000, "loss": 0.0843, "lr": 2.76086551464269e-05, "epoch": 3.045680724365772, "percentage": 46.67, "elapsed_time": "4:16:21", "remaining_time": "4:52:53", "throughput": 2336.79, "total_tokens": 35944400} {"current_steps": 18675, "total_steps": 40000, "loss": 0.0023, "lr": 2.759889106176006e-05, "epoch": 3.0464964515865893, "percentage": 46.69, "elapsed_time": "4:16:24", "remaining_time": "4:52:47", "throughput": 2337.2, "total_tokens": 35955536} {"current_steps": 18680, "total_steps": 40000, "loss": 0.1327, "lr": 2.758912657631156e-05, "epoch": 3.047312178807407, "percentage": 46.7, "elapsed_time": "4:16:26", "remaining_time": "4:52:40", "throughput": 2337.55, "total_tokens": 35965744} {"current_steps": 18685, "total_steps": 40000, "loss": 0.0013, "lr": 2.7579361691587198e-05, "epoch": 3.048127906028224, "percentage": 46.71, "elapsed_time": "4:16:28", "remaining_time": "4:52:34", "throughput": 2337.93, "total_tokens": 35976560} {"current_steps": 18690, "total_steps": 40000, "loss": 0.0046, "lr": 2.756959640909285e-05, "epoch": 3.0489436332490416, "percentage": 46.73, "elapsed_time": "4:16:30", "remaining_time": "4:52:27", "throughput": 2338.32, "total_tokens": 35987312} {"current_steps": 18695, "total_steps": 40000, "loss": 0.0508, "lr": 2.7559830730334452e-05, "epoch": 3.0497593604698587, "percentage": 46.74, "elapsed_time": "4:16:32", "remaining_time": "4:52:21", "throughput": 2338.63, "total_tokens": 35996928} {"current_steps": 18700, "total_steps": 40000, "loss": 0.0011, "lr": 2.7550064656817988e-05, "epoch": 3.0505750876906763, "percentage": 46.75, "elapsed_time": "4:16:34", "remaining_time": "4:52:14", "throughput": 2338.89, "total_tokens": 36005888} {"current_steps": 18705, "total_steps": 40000, "loss": 0.0181, "lr": 2.7540298190049503e-05, "epoch": 3.0513908149114934, "percentage": 46.76, "elapsed_time": "4:16:36", "remaining_time": "4:52:08", "throughput": 2339.26, "total_tokens": 36016432} {"current_steps": 18710, "total_steps": 40000, "loss": 0.0446, "lr": 2.7530531331535107e-05, "epoch": 3.052206542132311, "percentage": 46.77, "elapsed_time": "4:16:38", "remaining_time": "4:52:01", "throughput": 2339.58, "total_tokens": 36026160} {"current_steps": 18715, "total_steps": 40000, "loss": 0.1064, "lr": 2.752076408278099e-05, "epoch": 3.053022269353128, "percentage": 46.79, "elapsed_time": "4:16:40", "remaining_time": "4:51:55", "throughput": 2339.86, "total_tokens": 36035360} {"current_steps": 18720, "total_steps": 40000, "loss": 0.0337, "lr": 2.751099644529337e-05, "epoch": 3.0538379965739457, "percentage": 46.8, "elapsed_time": "4:16:42", "remaining_time": "4:51:49", "throughput": 2340.27, "total_tokens": 36046400} {"current_steps": 18725, "total_steps": 40000, "loss": 0.0777, "lr": 2.7501228420578533e-05, "epoch": 3.054653723794763, "percentage": 46.81, "elapsed_time": "4:16:44", "remaining_time": "4:51:42", "throughput": 2340.49, "total_tokens": 36054640} {"current_steps": 18730, "total_steps": 40000, "loss": 0.1434, "lr": 2.7491460010142857e-05, "epoch": 3.0554694510155804, "percentage": 46.83, "elapsed_time": "4:16:46", "remaining_time": "4:51:36", "throughput": 2340.81, "total_tokens": 36064480} {"current_steps": 18735, "total_steps": 40000, "loss": 0.0596, "lr": 2.7481691215492727e-05, "epoch": 3.0562851782363976, "percentage": 46.84, "elapsed_time": "4:16:48", "remaining_time": "4:51:29", "throughput": 2341.11, "total_tokens": 36073936} {"current_steps": 18740, "total_steps": 40000, "loss": 0.0011, "lr": 2.747192203813463e-05, "epoch": 3.057100905457215, "percentage": 46.85, "elapsed_time": "4:16:50", "remaining_time": "4:51:23", "throughput": 2341.46, "total_tokens": 36084240} {"current_steps": 18745, "total_steps": 40000, "loss": 0.1184, "lr": 2.7462152479575087e-05, "epoch": 3.0579166326780323, "percentage": 46.86, "elapsed_time": "4:16:53", "remaining_time": "4:51:16", "throughput": 2341.77, "total_tokens": 36093728} {"current_steps": 18750, "total_steps": 40000, "loss": 0.0996, "lr": 2.7452382541320697e-05, "epoch": 3.05873235989885, "percentage": 46.88, "elapsed_time": "4:16:55", "remaining_time": "4:51:10", "throughput": 2342.08, "total_tokens": 36103456} {"current_steps": 18755, "total_steps": 40000, "loss": 0.058, "lr": 2.7442612224878096e-05, "epoch": 3.059548087119667, "percentage": 46.89, "elapsed_time": "4:16:57", "remaining_time": "4:51:04", "throughput": 2342.42, "total_tokens": 36113440} {"current_steps": 18760, "total_steps": 40000, "loss": 0.074, "lr": 2.7432841531753994e-05, "epoch": 3.0603638143404845, "percentage": 46.9, "elapsed_time": "4:16:59", "remaining_time": "4:50:57", "throughput": 2342.77, "total_tokens": 36123728} {"current_steps": 18765, "total_steps": 40000, "loss": 0.0717, "lr": 2.7423070463455147e-05, "epoch": 3.0611795415613017, "percentage": 46.91, "elapsed_time": "4:17:01", "remaining_time": "4:50:51", "throughput": 2343.07, "total_tokens": 36133328} {"current_steps": 18770, "total_steps": 40000, "loss": 0.1271, "lr": 2.7413299021488397e-05, "epoch": 3.0619952687821193, "percentage": 46.92, "elapsed_time": "4:17:03", "remaining_time": "4:50:44", "throughput": 2343.32, "total_tokens": 36141952} {"current_steps": 18775, "total_steps": 40000, "loss": 0.0663, "lr": 2.7403527207360615e-05, "epoch": 3.062810996002937, "percentage": 46.94, "elapsed_time": "4:17:05", "remaining_time": "4:50:38", "throughput": 2343.67, "total_tokens": 36152192} {"current_steps": 18780, "total_steps": 40000, "loss": 0.0362, "lr": 2.7393755022578722e-05, "epoch": 3.063626723223754, "percentage": 46.95, "elapsed_time": "4:17:07", "remaining_time": "4:50:31", "throughput": 2343.93, "total_tokens": 36160992} {"current_steps": 18785, "total_steps": 40000, "loss": 0.0063, "lr": 2.7383982468649714e-05, "epoch": 3.0644424504445715, "percentage": 46.96, "elapsed_time": "4:17:09", "remaining_time": "4:50:25", "throughput": 2344.27, "total_tokens": 36171088} {"current_steps": 18790, "total_steps": 40000, "loss": 0.0411, "lr": 2.7374209547080665e-05, "epoch": 3.0652581776653887, "percentage": 46.98, "elapsed_time": "4:17:11", "remaining_time": "4:50:19", "throughput": 2344.54, "total_tokens": 36180192} {"current_steps": 18795, "total_steps": 40000, "loss": 0.0064, "lr": 2.7364436259378663e-05, "epoch": 3.0660739048862062, "percentage": 46.99, "elapsed_time": "4:17:13", "remaining_time": "4:50:12", "throughput": 2344.91, "total_tokens": 36190672} {"current_steps": 18800, "total_steps": 40000, "loss": 0.048, "lr": 2.735466260705088e-05, "epoch": 3.0668896321070234, "percentage": 47.0, "elapsed_time": "4:17:15", "remaining_time": "4:50:06", "throughput": 2345.24, "total_tokens": 36200720} {"current_steps": 18800, "total_steps": 40000, "eval_loss": 0.18425863981246948, "epoch": 3.0668896321070234, "percentage": 47.0, "elapsed_time": "4:18:36", "remaining_time": "4:51:37", "throughput": 2333.03, "total_tokens": 36200720} {"current_steps": 18805, "total_steps": 40000, "loss": 0.0094, "lr": 2.7344888591604524e-05, "epoch": 3.067705359327841, "percentage": 47.01, "elapsed_time": "4:18:40", "remaining_time": "4:51:33", "throughput": 2333.06, "total_tokens": 36210592} {"current_steps": 18810, "total_steps": 40000, "loss": 0.0133, "lr": 2.7335114214546893e-05, "epoch": 3.068521086548658, "percentage": 47.02, "elapsed_time": "4:18:42", "remaining_time": "4:51:26", "throughput": 2333.29, "total_tokens": 36219040} {"current_steps": 18815, "total_steps": 40000, "loss": 0.0098, "lr": 2.7325339477385293e-05, "epoch": 3.0693368137694756, "percentage": 47.04, "elapsed_time": "4:18:44", "remaining_time": "4:51:20", "throughput": 2333.57, "total_tokens": 36228224} {"current_steps": 18820, "total_steps": 40000, "loss": 0.0879, "lr": 2.7315564381627128e-05, "epoch": 3.0701525409902928, "percentage": 47.05, "elapsed_time": "4:18:46", "remaining_time": "4:51:13", "throughput": 2333.9, "total_tokens": 36238160} {"current_steps": 18825, "total_steps": 40000, "loss": 0.0069, "lr": 2.7305788928779835e-05, "epoch": 3.0709682682111104, "percentage": 47.06, "elapsed_time": "4:18:48", "remaining_time": "4:51:07", "throughput": 2334.2, "total_tokens": 36247760} {"current_steps": 18830, "total_steps": 40000, "loss": 0.0444, "lr": 2.729601312035091e-05, "epoch": 3.0717839954319275, "percentage": 47.08, "elapsed_time": "4:18:51", "remaining_time": "4:51:01", "throughput": 2334.4, "total_tokens": 36255584} {"current_steps": 18835, "total_steps": 40000, "loss": 0.0068, "lr": 2.7286236957847915e-05, "epoch": 3.072599722652745, "percentage": 47.09, "elapsed_time": "4:18:53", "remaining_time": "4:50:54", "throughput": 2334.63, "total_tokens": 36264112} {"current_steps": 18840, "total_steps": 40000, "loss": 0.0008, "lr": 2.7276460442778446e-05, "epoch": 3.073415449873562, "percentage": 47.1, "elapsed_time": "4:18:55", "remaining_time": "4:50:48", "throughput": 2334.94, "total_tokens": 36273664} {"current_steps": 18845, "total_steps": 40000, "loss": 0.0425, "lr": 2.726668357665017e-05, "epoch": 3.0742311770943798, "percentage": 47.11, "elapsed_time": "4:18:57", "remaining_time": "4:50:41", "throughput": 2335.32, "total_tokens": 36284384} {"current_steps": 18850, "total_steps": 40000, "loss": 0.0023, "lr": 2.7256906360970808e-05, "epoch": 3.075046904315197, "percentage": 47.12, "elapsed_time": "4:18:59", "remaining_time": "4:50:35", "throughput": 2335.65, "total_tokens": 36294464} {"current_steps": 18855, "total_steps": 40000, "loss": 0.0489, "lr": 2.7247128797248117e-05, "epoch": 3.0758626315360145, "percentage": 47.14, "elapsed_time": "4:19:01", "remaining_time": "4:50:28", "throughput": 2335.92, "total_tokens": 36303424} {"current_steps": 18860, "total_steps": 40000, "loss": 0.0042, "lr": 2.7237350886989925e-05, "epoch": 3.0766783587568316, "percentage": 47.15, "elapsed_time": "4:19:03", "remaining_time": "4:50:22", "throughput": 2336.17, "total_tokens": 36312240} {"current_steps": 18865, "total_steps": 40000, "loss": 0.0078, "lr": 2.7227572631704107e-05, "epoch": 3.077494085977649, "percentage": 47.16, "elapsed_time": "4:19:05", "remaining_time": "4:50:16", "throughput": 2336.46, "total_tokens": 36321456} {"current_steps": 18870, "total_steps": 40000, "loss": 0.0011, "lr": 2.7217794032898596e-05, "epoch": 3.0783098131984663, "percentage": 47.17, "elapsed_time": "4:19:07", "remaining_time": "4:50:09", "throughput": 2336.78, "total_tokens": 36331296} {"current_steps": 18875, "total_steps": 40000, "loss": 0.0725, "lr": 2.7208015092081384e-05, "epoch": 3.079125540419284, "percentage": 47.19, "elapsed_time": "4:19:09", "remaining_time": "4:50:03", "throughput": 2337.11, "total_tokens": 36341344} {"current_steps": 18880, "total_steps": 40000, "loss": 0.0797, "lr": 2.719823581076049e-05, "epoch": 3.079941267640101, "percentage": 47.2, "elapsed_time": "4:19:11", "remaining_time": "4:49:56", "throughput": 2337.33, "total_tokens": 36349552} {"current_steps": 18885, "total_steps": 40000, "loss": 0.0775, "lr": 2.718845619044401e-05, "epoch": 3.0807569948609186, "percentage": 47.21, "elapsed_time": "4:19:13", "remaining_time": "4:49:50", "throughput": 2337.62, "total_tokens": 36358880} {"current_steps": 18890, "total_steps": 40000, "loss": 0.0006, "lr": 2.7178676232640088e-05, "epoch": 3.0815727220817357, "percentage": 47.23, "elapsed_time": "4:19:15", "remaining_time": "4:49:44", "throughput": 2337.93, "total_tokens": 36368576} {"current_steps": 18895, "total_steps": 40000, "loss": 0.001, "lr": 2.716889593885691e-05, "epoch": 3.0823884493025533, "percentage": 47.24, "elapsed_time": "4:19:17", "remaining_time": "4:49:37", "throughput": 2338.15, "total_tokens": 36376832} {"current_steps": 18900, "total_steps": 40000, "loss": 0.0325, "lr": 2.7159115310602716e-05, "epoch": 3.0832041765233704, "percentage": 47.25, "elapsed_time": "4:19:20", "remaining_time": "4:49:31", "throughput": 2338.53, "total_tokens": 36387712} {"current_steps": 18905, "total_steps": 40000, "loss": 0.0472, "lr": 2.7149334349385814e-05, "epoch": 3.084019903744188, "percentage": 47.26, "elapsed_time": "4:19:22", "remaining_time": "4:49:24", "throughput": 2338.85, "total_tokens": 36397456} {"current_steps": 18910, "total_steps": 40000, "loss": 0.0005, "lr": 2.713955305671454e-05, "epoch": 3.084835630965005, "percentage": 47.27, "elapsed_time": "4:19:24", "remaining_time": "4:49:18", "throughput": 2339.15, "total_tokens": 36406944} {"current_steps": 18915, "total_steps": 40000, "loss": 0.0636, "lr": 2.71297714340973e-05, "epoch": 3.0856513581858227, "percentage": 47.29, "elapsed_time": "4:19:26", "remaining_time": "4:49:12", "throughput": 2339.47, "total_tokens": 36416864} {"current_steps": 18920, "total_steps": 40000, "loss": 0.0342, "lr": 2.7119989483042545e-05, "epoch": 3.08646708540664, "percentage": 47.3, "elapsed_time": "4:19:28", "remaining_time": "4:49:05", "throughput": 2339.81, "total_tokens": 36426880} {"current_steps": 18925, "total_steps": 40000, "loss": 0.0004, "lr": 2.7110207205058768e-05, "epoch": 3.0872828126274574, "percentage": 47.31, "elapsed_time": "4:19:30", "remaining_time": "4:48:59", "throughput": 2340.07, "total_tokens": 36435840} {"current_steps": 18930, "total_steps": 40000, "loss": 0.0017, "lr": 2.7100424601654517e-05, "epoch": 3.0880985398482745, "percentage": 47.33, "elapsed_time": "4:19:32", "remaining_time": "4:48:52", "throughput": 2340.32, "total_tokens": 36444672} {"current_steps": 18935, "total_steps": 40000, "loss": 0.0033, "lr": 2.7090641674338403e-05, "epoch": 3.088914267069092, "percentage": 47.34, "elapsed_time": "4:19:34", "remaining_time": "4:48:46", "throughput": 2340.6, "total_tokens": 36453744} {"current_steps": 18940, "total_steps": 40000, "loss": 0.0952, "lr": 2.7080858424619072e-05, "epoch": 3.0897299942899092, "percentage": 47.35, "elapsed_time": "4:19:36", "remaining_time": "4:48:40", "throughput": 2340.95, "total_tokens": 36464016} {"current_steps": 18945, "total_steps": 40000, "loss": 0.0496, "lr": 2.707107485400521e-05, "epoch": 3.090545721510727, "percentage": 47.36, "elapsed_time": "4:19:38", "remaining_time": "4:48:33", "throughput": 2341.2, "total_tokens": 36472800} {"current_steps": 18950, "total_steps": 40000, "loss": 0.0018, "lr": 2.7061290964005586e-05, "epoch": 3.0913614487315444, "percentage": 47.38, "elapsed_time": "4:19:40", "remaining_time": "4:48:27", "throughput": 2341.52, "total_tokens": 36482672} {"current_steps": 18955, "total_steps": 40000, "loss": 0.1169, "lr": 2.7051506756129e-05, "epoch": 3.0921771759523615, "percentage": 47.39, "elapsed_time": "4:19:42", "remaining_time": "4:48:21", "throughput": 2341.88, "total_tokens": 36493216} {"current_steps": 18960, "total_steps": 40000, "loss": 0.0683, "lr": 2.704172223188428e-05, "epoch": 3.092992903173179, "percentage": 47.4, "elapsed_time": "4:19:44", "remaining_time": "4:48:14", "throughput": 2342.12, "total_tokens": 36501840} {"current_steps": 18965, "total_steps": 40000, "loss": 0.0035, "lr": 2.7031937392780334e-05, "epoch": 3.0938086303939962, "percentage": 47.41, "elapsed_time": "4:19:46", "remaining_time": "4:48:08", "throughput": 2342.43, "total_tokens": 36511456} {"current_steps": 18970, "total_steps": 40000, "loss": 0.1277, "lr": 2.702215224032611e-05, "epoch": 3.094624357614814, "percentage": 47.42, "elapsed_time": "4:19:49", "remaining_time": "4:48:01", "throughput": 2342.71, "total_tokens": 36520640} {"current_steps": 18975, "total_steps": 40000, "loss": 0.0899, "lr": 2.70123667760306e-05, "epoch": 3.095440084835631, "percentage": 47.44, "elapsed_time": "4:19:51", "remaining_time": "4:47:55", "throughput": 2343.04, "total_tokens": 36530608} {"current_steps": 18980, "total_steps": 40000, "loss": 0.0193, "lr": 2.7002581001402845e-05, "epoch": 3.0962558120564485, "percentage": 47.45, "elapsed_time": "4:19:53", "remaining_time": "4:47:49", "throughput": 2343.42, "total_tokens": 36541360} {"current_steps": 18985, "total_steps": 40000, "loss": 0.0635, "lr": 2.6992794917951923e-05, "epoch": 3.0970715392772656, "percentage": 47.46, "elapsed_time": "4:19:55", "remaining_time": "4:47:42", "throughput": 2343.72, "total_tokens": 36550992} {"current_steps": 18990, "total_steps": 40000, "loss": 0.0187, "lr": 2.6983008527187e-05, "epoch": 3.097887266498083, "percentage": 47.48, "elapsed_time": "4:19:57", "remaining_time": "4:47:36", "throughput": 2344.03, "total_tokens": 36560656} {"current_steps": 18995, "total_steps": 40000, "loss": 0.0029, "lr": 2.697322183061723e-05, "epoch": 3.0987029937189003, "percentage": 47.49, "elapsed_time": "4:19:59", "remaining_time": "4:47:30", "throughput": 2344.37, "total_tokens": 36570736} {"current_steps": 19000, "total_steps": 40000, "loss": 0.1001, "lr": 2.696343482975186e-05, "epoch": 3.099518720939718, "percentage": 47.5, "elapsed_time": "4:20:01", "remaining_time": "4:47:23", "throughput": 2344.66, "total_tokens": 36580112} {"current_steps": 19000, "total_steps": 40000, "eval_loss": 0.22107887268066406, "epoch": 3.099518720939718, "percentage": 47.5, "elapsed_time": "4:21:22", "remaining_time": "4:48:53", "throughput": 2332.58, "total_tokens": 36580112} {"current_steps": 19005, "total_steps": 40000, "loss": 0.03, "lr": 2.695364752610016e-05, "epoch": 3.100334448160535, "percentage": 47.51, "elapsed_time": "4:21:25", "remaining_time": "4:48:48", "throughput": 2332.67, "total_tokens": 36590096} {"current_steps": 19010, "total_steps": 40000, "loss": 0.0017, "lr": 2.6943859921171467e-05, "epoch": 3.1011501753813526, "percentage": 47.52, "elapsed_time": "4:21:27", "remaining_time": "4:48:41", "throughput": 2332.99, "total_tokens": 36599936} {"current_steps": 19015, "total_steps": 40000, "loss": 0.0024, "lr": 2.6934072016475143e-05, "epoch": 3.1019659026021698, "percentage": 47.54, "elapsed_time": "4:21:30", "remaining_time": "4:48:35", "throughput": 2333.28, "total_tokens": 36609280} {"current_steps": 19020, "total_steps": 40000, "loss": 0.0015, "lr": 2.6924283813520606e-05, "epoch": 3.1027816298229873, "percentage": 47.55, "elapsed_time": "4:21:32", "remaining_time": "4:48:29", "throughput": 2333.57, "total_tokens": 36618704} {"current_steps": 19025, "total_steps": 40000, "loss": 0.0697, "lr": 2.691449531381733e-05, "epoch": 3.1035973570438045, "percentage": 47.56, "elapsed_time": "4:21:34", "remaining_time": "4:48:22", "throughput": 2333.86, "total_tokens": 36628144} {"current_steps": 19030, "total_steps": 40000, "loss": 0.0053, "lr": 2.6904706518874816e-05, "epoch": 3.104413084264622, "percentage": 47.58, "elapsed_time": "4:21:36", "remaining_time": "4:48:16", "throughput": 2334.23, "total_tokens": 36638704} {"current_steps": 19035, "total_steps": 40000, "loss": 0.0051, "lr": 2.6894917430202615e-05, "epoch": 3.105228811485439, "percentage": 47.59, "elapsed_time": "4:21:38", "remaining_time": "4:48:10", "throughput": 2334.49, "total_tokens": 36647680} {"current_steps": 19040, "total_steps": 40000, "loss": 0.0598, "lr": 2.6885128049310343e-05, "epoch": 3.1060445387062567, "percentage": 47.6, "elapsed_time": "4:21:40", "remaining_time": "4:48:03", "throughput": 2334.93, "total_tokens": 36659360} {"current_steps": 19045, "total_steps": 40000, "loss": 0.1114, "lr": 2.687533837770762e-05, "epoch": 3.106860265927074, "percentage": 47.61, "elapsed_time": "4:21:42", "remaining_time": "4:47:57", "throughput": 2335.22, "total_tokens": 36668752} {"current_steps": 19050, "total_steps": 40000, "loss": 0.0041, "lr": 2.6865548416904162e-05, "epoch": 3.1076759931478914, "percentage": 47.62, "elapsed_time": "4:21:44", "remaining_time": "4:47:50", "throughput": 2335.53, "total_tokens": 36678480} {"current_steps": 19055, "total_steps": 40000, "loss": 0.0025, "lr": 2.68557581684097e-05, "epoch": 3.1084917203687086, "percentage": 47.64, "elapsed_time": "4:21:46", "remaining_time": "4:47:44", "throughput": 2335.74, "total_tokens": 36686720} {"current_steps": 19060, "total_steps": 40000, "loss": 0.1347, "lr": 2.6845967633733998e-05, "epoch": 3.109307447589526, "percentage": 47.65, "elapsed_time": "4:21:48", "remaining_time": "4:47:38", "throughput": 2336.02, "total_tokens": 36695920} {"current_steps": 19065, "total_steps": 40000, "loss": 0.0939, "lr": 2.683617681438689e-05, "epoch": 3.1101231748103433, "percentage": 47.66, "elapsed_time": "4:21:50", "remaining_time": "4:47:31", "throughput": 2336.36, "total_tokens": 36706064} {"current_steps": 19070, "total_steps": 40000, "loss": 0.0343, "lr": 2.682638571187825e-05, "epoch": 3.110938902031161, "percentage": 47.67, "elapsed_time": "4:21:52", "remaining_time": "4:47:25", "throughput": 2336.57, "total_tokens": 36714176} {"current_steps": 19075, "total_steps": 40000, "loss": 0.0019, "lr": 2.6816594327717976e-05, "epoch": 3.111754629251978, "percentage": 47.69, "elapsed_time": "4:21:54", "remaining_time": "4:47:19", "throughput": 2336.94, "total_tokens": 36724848} {"current_steps": 19080, "total_steps": 40000, "loss": 0.0864, "lr": 2.680680266341603e-05, "epoch": 3.1125703564727956, "percentage": 47.7, "elapsed_time": "4:21:57", "remaining_time": "4:47:12", "throughput": 2337.21, "total_tokens": 36733984} {"current_steps": 19085, "total_steps": 40000, "loss": 0.054, "lr": 2.67970107204824e-05, "epoch": 3.1133860836936127, "percentage": 47.71, "elapsed_time": "4:21:59", "remaining_time": "4:47:06", "throughput": 2337.44, "total_tokens": 36742496} {"current_steps": 19090, "total_steps": 40000, "loss": 0.0006, "lr": 2.6787218500427142e-05, "epoch": 3.1142018109144303, "percentage": 47.73, "elapsed_time": "4:22:01", "remaining_time": "4:47:00", "throughput": 2337.76, "total_tokens": 36752368} {"current_steps": 19095, "total_steps": 40000, "loss": 0.1711, "lr": 2.6777426004760332e-05, "epoch": 3.1150175381352474, "percentage": 47.74, "elapsed_time": "4:22:03", "remaining_time": "4:46:53", "throughput": 2338.15, "total_tokens": 36763344} {"current_steps": 19100, "total_steps": 40000, "loss": 0.002, "lr": 2.6767633234992094e-05, "epoch": 3.115833265356065, "percentage": 47.75, "elapsed_time": "4:22:05", "remaining_time": "4:46:47", "throughput": 2338.36, "total_tokens": 36771520} {"current_steps": 19105, "total_steps": 40000, "loss": 0.002, "lr": 2.6757840192632598e-05, "epoch": 3.116648992576882, "percentage": 47.76, "elapsed_time": "4:22:07", "remaining_time": "4:46:40", "throughput": 2338.68, "total_tokens": 36781312} {"current_steps": 19110, "total_steps": 40000, "loss": 0.1435, "lr": 2.6748046879192052e-05, "epoch": 3.1174647197976997, "percentage": 47.77, "elapsed_time": "4:22:09", "remaining_time": "4:46:34", "throughput": 2338.92, "total_tokens": 36789984} {"current_steps": 19115, "total_steps": 40000, "loss": 0.001, "lr": 2.673825329618071e-05, "epoch": 3.118280447018517, "percentage": 47.79, "elapsed_time": "4:22:11", "remaining_time": "4:46:28", "throughput": 2339.22, "total_tokens": 36799520} {"current_steps": 19120, "total_steps": 40000, "loss": 0.0682, "lr": 2.6728459445108866e-05, "epoch": 3.1190961742393344, "percentage": 47.8, "elapsed_time": "4:22:13", "remaining_time": "4:46:21", "throughput": 2339.55, "total_tokens": 36809568} {"current_steps": 19125, "total_steps": 40000, "loss": 0.0779, "lr": 2.6718665327486854e-05, "epoch": 3.1199119014601515, "percentage": 47.81, "elapsed_time": "4:22:15", "remaining_time": "4:46:15", "throughput": 2339.88, "total_tokens": 36819632} {"current_steps": 19130, "total_steps": 40000, "loss": 0.0033, "lr": 2.6708870944825048e-05, "epoch": 3.120727628680969, "percentage": 47.83, "elapsed_time": "4:22:17", "remaining_time": "4:46:09", "throughput": 2340.2, "total_tokens": 36829568} {"current_steps": 19135, "total_steps": 40000, "loss": 0.0723, "lr": 2.6699076298633874e-05, "epoch": 3.121543355901786, "percentage": 47.84, "elapsed_time": "4:22:19", "remaining_time": "4:46:02", "throughput": 2340.52, "total_tokens": 36839328} {"current_steps": 19140, "total_steps": 40000, "loss": 0.1543, "lr": 2.6689281390423788e-05, "epoch": 3.122359083122604, "percentage": 47.85, "elapsed_time": "4:22:21", "remaining_time": "4:45:56", "throughput": 2340.8, "total_tokens": 36848672} {"current_steps": 19145, "total_steps": 40000, "loss": 0.0464, "lr": 2.667948622170527e-05, "epoch": 3.1231748103434214, "percentage": 47.86, "elapsed_time": "4:22:23", "remaining_time": "4:45:50", "throughput": 2341.13, "total_tokens": 36858624} {"current_steps": 19150, "total_steps": 40000, "loss": 0.0054, "lr": 2.6669690793988873e-05, "epoch": 3.1239905375642385, "percentage": 47.88, "elapsed_time": "4:22:26", "remaining_time": "4:45:43", "throughput": 2341.4, "total_tokens": 36867728} {"current_steps": 19155, "total_steps": 40000, "loss": 0.0056, "lr": 2.665989510878518e-05, "epoch": 3.124806264785056, "percentage": 47.89, "elapsed_time": "4:22:28", "remaining_time": "4:45:37", "throughput": 2341.67, "total_tokens": 36876832} {"current_steps": 19160, "total_steps": 40000, "loss": 0.0706, "lr": 2.6650099167604793e-05, "epoch": 3.125621992005873, "percentage": 47.9, "elapsed_time": "4:22:30", "remaining_time": "4:45:31", "throughput": 2342.0, "total_tokens": 36886944} {"current_steps": 19165, "total_steps": 40000, "loss": 0.0703, "lr": 2.6640302971958376e-05, "epoch": 3.126437719226691, "percentage": 47.91, "elapsed_time": "4:22:32", "remaining_time": "4:45:24", "throughput": 2342.24, "total_tokens": 36895472} {"current_steps": 19170, "total_steps": 40000, "loss": 0.055, "lr": 2.6630506523356635e-05, "epoch": 3.127253446447508, "percentage": 47.93, "elapsed_time": "4:22:34", "remaining_time": "4:45:18", "throughput": 2342.53, "total_tokens": 36904912} {"current_steps": 19175, "total_steps": 40000, "loss": 0.0909, "lr": 2.6620709823310297e-05, "epoch": 3.1280691736683255, "percentage": 47.94, "elapsed_time": "4:22:36", "remaining_time": "4:45:12", "throughput": 2342.89, "total_tokens": 36915424} {"current_steps": 19180, "total_steps": 40000, "loss": 0.0067, "lr": 2.661091287333014e-05, "epoch": 3.1288849008891426, "percentage": 47.95, "elapsed_time": "4:22:38", "remaining_time": "4:45:05", "throughput": 2343.14, "total_tokens": 36924240} {"current_steps": 19185, "total_steps": 40000, "loss": 0.0031, "lr": 2.660111567492696e-05, "epoch": 3.12970062810996, "percentage": 47.96, "elapsed_time": "4:22:40", "remaining_time": "4:44:59", "throughput": 2343.48, "total_tokens": 36934448} {"current_steps": 19190, "total_steps": 40000, "loss": 0.0015, "lr": 2.6591318229611635e-05, "epoch": 3.1305163553307773, "percentage": 47.98, "elapsed_time": "4:22:42", "remaining_time": "4:44:53", "throughput": 2343.76, "total_tokens": 36943664} {"current_steps": 19195, "total_steps": 40000, "loss": 0.0026, "lr": 2.6581520538895037e-05, "epoch": 3.131332082551595, "percentage": 47.99, "elapsed_time": "4:22:44", "remaining_time": "4:44:46", "throughput": 2344.11, "total_tokens": 36954064} {"current_steps": 19200, "total_steps": 40000, "loss": 0.1011, "lr": 2.6571722604288102e-05, "epoch": 3.132147809772412, "percentage": 48.0, "elapsed_time": "4:22:46", "remaining_time": "4:44:40", "throughput": 2344.3, "total_tokens": 36961872} {"current_steps": 19200, "total_steps": 40000, "eval_loss": 0.22590163350105286, "epoch": 3.132147809772412, "percentage": 48.0, "elapsed_time": "4:24:07", "remaining_time": "4:46:08", "throughput": 2332.34, "total_tokens": 36961872} {"current_steps": 19205, "total_steps": 40000, "loss": 0.0019, "lr": 2.656192442730179e-05, "epoch": 3.1329635369932296, "percentage": 48.01, "elapsed_time": "4:24:11", "remaining_time": "4:46:03", "throughput": 2332.4, "total_tokens": 36971328} {"current_steps": 19210, "total_steps": 40000, "loss": 0.0024, "lr": 2.6552126009447098e-05, "epoch": 3.1337792642140467, "percentage": 48.02, "elapsed_time": "4:24:13", "remaining_time": "4:45:57", "throughput": 2332.66, "total_tokens": 36980224} {"current_steps": 19215, "total_steps": 40000, "loss": 0.1223, "lr": 2.654232735223507e-05, "epoch": 3.1345949914348643, "percentage": 48.04, "elapsed_time": "4:24:15", "remaining_time": "4:45:50", "throughput": 2332.91, "total_tokens": 36989024} {"current_steps": 19220, "total_steps": 40000, "loss": 0.0517, "lr": 2.6532528457176787e-05, "epoch": 3.1354107186556814, "percentage": 48.05, "elapsed_time": "4:24:17", "remaining_time": "4:45:44", "throughput": 2333.16, "total_tokens": 36997968} {"current_steps": 19225, "total_steps": 40000, "loss": 0.08, "lr": 2.6522729325783348e-05, "epoch": 3.136226445876499, "percentage": 48.06, "elapsed_time": "4:24:19", "remaining_time": "4:45:38", "throughput": 2333.44, "total_tokens": 37007136} {"current_steps": 19230, "total_steps": 40000, "loss": 0.1886, "lr": 2.6512929959565914e-05, "epoch": 3.137042173097316, "percentage": 48.08, "elapsed_time": "4:24:21", "remaining_time": "4:45:31", "throughput": 2333.77, "total_tokens": 37017264} {"current_steps": 19235, "total_steps": 40000, "loss": 0.1244, "lr": 2.6503130360035673e-05, "epoch": 3.1378579003181337, "percentage": 48.09, "elapsed_time": "4:24:23", "remaining_time": "4:45:25", "throughput": 2334.07, "total_tokens": 37026880} {"current_steps": 19240, "total_steps": 40000, "loss": 0.0289, "lr": 2.6493330528703835e-05, "epoch": 3.138673627538951, "percentage": 48.1, "elapsed_time": "4:24:25", "remaining_time": "4:45:19", "throughput": 2334.34, "total_tokens": 37035984} {"current_steps": 19245, "total_steps": 40000, "loss": 0.1105, "lr": 2.648353046708167e-05, "epoch": 3.1394893547597684, "percentage": 48.11, "elapsed_time": "4:24:27", "remaining_time": "4:45:12", "throughput": 2334.78, "total_tokens": 37047760} {"current_steps": 19250, "total_steps": 40000, "loss": 0.0058, "lr": 2.647373017668046e-05, "epoch": 3.1403050819805856, "percentage": 48.12, "elapsed_time": "4:24:29", "remaining_time": "4:45:06", "throughput": 2335.13, "total_tokens": 37058144} {"current_steps": 19255, "total_steps": 40000, "loss": 0.1345, "lr": 2.6463929659011537e-05, "epoch": 3.141120809201403, "percentage": 48.14, "elapsed_time": "4:24:31", "remaining_time": "4:45:00", "throughput": 2335.43, "total_tokens": 37067760} {"current_steps": 19260, "total_steps": 40000, "loss": 0.0031, "lr": 2.6454128915586262e-05, "epoch": 3.1419365364222203, "percentage": 48.15, "elapsed_time": "4:24:34", "remaining_time": "4:44:53", "throughput": 2335.67, "total_tokens": 37076384} {"current_steps": 19265, "total_steps": 40000, "loss": 0.0012, "lr": 2.6444327947916036e-05, "epoch": 3.142752263643038, "percentage": 48.16, "elapsed_time": "4:24:36", "remaining_time": "4:44:47", "throughput": 2335.97, "total_tokens": 37085968} {"current_steps": 19270, "total_steps": 40000, "loss": 0.0662, "lr": 2.6434526757512292e-05, "epoch": 3.143567990863855, "percentage": 48.18, "elapsed_time": "4:24:38", "remaining_time": "4:44:41", "throughput": 2336.3, "total_tokens": 37096128} {"current_steps": 19275, "total_steps": 40000, "loss": 0.061, "lr": 2.6424725345886486e-05, "epoch": 3.1443837180846725, "percentage": 48.19, "elapsed_time": "4:24:40", "remaining_time": "4:44:34", "throughput": 2336.6, "total_tokens": 37105664} {"current_steps": 19280, "total_steps": 40000, "loss": 0.0019, "lr": 2.641492371455014e-05, "epoch": 3.1451994453054897, "percentage": 48.2, "elapsed_time": "4:24:42", "remaining_time": "4:44:28", "throughput": 2336.86, "total_tokens": 37114688} {"current_steps": 19285, "total_steps": 40000, "loss": 0.0114, "lr": 2.640512186501477e-05, "epoch": 3.1460151725263072, "percentage": 48.21, "elapsed_time": "4:24:44", "remaining_time": "4:44:22", "throughput": 2337.17, "total_tokens": 37124416} {"current_steps": 19290, "total_steps": 40000, "loss": 0.1005, "lr": 2.639531979879195e-05, "epoch": 3.1468308997471244, "percentage": 48.23, "elapsed_time": "4:24:46", "remaining_time": "4:44:15", "throughput": 2337.45, "total_tokens": 37133680} {"current_steps": 19295, "total_steps": 40000, "loss": 0.0021, "lr": 2.638551751739328e-05, "epoch": 3.147646626967942, "percentage": 48.24, "elapsed_time": "4:24:48", "remaining_time": "4:44:09", "throughput": 2337.8, "total_tokens": 37144144} {"current_steps": 19300, "total_steps": 40000, "loss": 0.0099, "lr": 2.6375715022330404e-05, "epoch": 3.148462354188759, "percentage": 48.25, "elapsed_time": "4:24:50", "remaining_time": "4:44:03", "throughput": 2338.15, "total_tokens": 37154560} {"current_steps": 19305, "total_steps": 40000, "loss": 0.0036, "lr": 2.6365912315114976e-05, "epoch": 3.1492780814095767, "percentage": 48.26, "elapsed_time": "4:24:52", "remaining_time": "4:43:56", "throughput": 2338.49, "total_tokens": 37164848} {"current_steps": 19310, "total_steps": 40000, "loss": 0.0981, "lr": 2.6356109397258704e-05, "epoch": 3.150093808630394, "percentage": 48.27, "elapsed_time": "4:24:54", "remaining_time": "4:43:50", "throughput": 2338.89, "total_tokens": 37176000} {"current_steps": 19315, "total_steps": 40000, "loss": 0.0011, "lr": 2.6346306270273325e-05, "epoch": 3.1509095358512114, "percentage": 48.29, "elapsed_time": "4:24:56", "remaining_time": "4:43:44", "throughput": 2339.19, "total_tokens": 37185680} {"current_steps": 19320, "total_steps": 40000, "loss": 0.0705, "lr": 2.6336502935670608e-05, "epoch": 3.151725263072029, "percentage": 48.3, "elapsed_time": "4:24:58", "remaining_time": "4:43:38", "throughput": 2339.51, "total_tokens": 37195600} {"current_steps": 19325, "total_steps": 40000, "loss": 0.0011, "lr": 2.6326699394962333e-05, "epoch": 3.152540990292846, "percentage": 48.31, "elapsed_time": "4:25:00", "remaining_time": "4:43:31", "throughput": 2339.77, "total_tokens": 37204464} {"current_steps": 19330, "total_steps": 40000, "loss": 0.1484, "lr": 2.6316895649660334e-05, "epoch": 3.153356717513663, "percentage": 48.33, "elapsed_time": "4:25:03", "remaining_time": "4:43:25", "throughput": 2340.02, "total_tokens": 37213360} {"current_steps": 19335, "total_steps": 40000, "loss": 0.0459, "lr": 2.6307091701276486e-05, "epoch": 3.1541724447344808, "percentage": 48.34, "elapsed_time": "4:25:05", "remaining_time": "4:43:19", "throughput": 2340.3, "total_tokens": 37222608} {"current_steps": 19340, "total_steps": 40000, "loss": 0.0522, "lr": 2.629728755132267e-05, "epoch": 3.1549881719552983, "percentage": 48.35, "elapsed_time": "4:25:07", "remaining_time": "4:43:12", "throughput": 2340.65, "total_tokens": 37233072} {"current_steps": 19345, "total_steps": 40000, "loss": 0.0582, "lr": 2.628748320131081e-05, "epoch": 3.1558038991761155, "percentage": 48.36, "elapsed_time": "4:25:09", "remaining_time": "4:43:06", "throughput": 2340.91, "total_tokens": 37241984} {"current_steps": 19350, "total_steps": 40000, "loss": 0.0719, "lr": 2.6277678652752856e-05, "epoch": 3.156619626396933, "percentage": 48.38, "elapsed_time": "4:25:11", "remaining_time": "4:43:00", "throughput": 2341.16, "total_tokens": 37250832} {"current_steps": 19355, "total_steps": 40000, "loss": 0.0008, "lr": 2.6267873907160807e-05, "epoch": 3.15743535361775, "percentage": 48.39, "elapsed_time": "4:25:13", "remaining_time": "4:42:53", "throughput": 2341.47, "total_tokens": 37260608} {"current_steps": 19360, "total_steps": 40000, "loss": 0.0017, "lr": 2.6258068966046668e-05, "epoch": 3.1582510808385678, "percentage": 48.4, "elapsed_time": "4:25:15", "remaining_time": "4:42:47", "throughput": 2341.82, "total_tokens": 37271056} {"current_steps": 19365, "total_steps": 40000, "loss": 0.0007, "lr": 2.6248263830922475e-05, "epoch": 3.159066808059385, "percentage": 48.41, "elapsed_time": "4:25:17", "remaining_time": "4:42:41", "throughput": 2342.13, "total_tokens": 37280864} {"current_steps": 19370, "total_steps": 40000, "loss": 0.1153, "lr": 2.6238458503300318e-05, "epoch": 3.1598825352802025, "percentage": 48.43, "elapsed_time": "4:25:19", "remaining_time": "4:42:35", "throughput": 2342.44, "total_tokens": 37290592} {"current_steps": 19375, "total_steps": 40000, "loss": 0.1214, "lr": 2.6228652984692292e-05, "epoch": 3.1606982625010196, "percentage": 48.44, "elapsed_time": "4:25:21", "remaining_time": "4:42:28", "throughput": 2342.67, "total_tokens": 37299136} {"current_steps": 19380, "total_steps": 40000, "loss": 0.0679, "lr": 2.621884727661054e-05, "epoch": 3.161513989721837, "percentage": 48.45, "elapsed_time": "4:25:23", "remaining_time": "4:42:22", "throughput": 2342.94, "total_tokens": 37308272} {"current_steps": 19385, "total_steps": 40000, "loss": 0.0019, "lr": 2.6209041380567222e-05, "epoch": 3.1623297169426543, "percentage": 48.46, "elapsed_time": "4:25:25", "remaining_time": "4:42:16", "throughput": 2343.23, "total_tokens": 37317696} {"current_steps": 19390, "total_steps": 40000, "loss": 0.0524, "lr": 2.6199235298074527e-05, "epoch": 3.163145444163472, "percentage": 48.48, "elapsed_time": "4:25:27", "remaining_time": "4:42:10", "throughput": 2343.53, "total_tokens": 37327312} {"current_steps": 19395, "total_steps": 40000, "loss": 0.0023, "lr": 2.618942903064468e-05, "epoch": 3.163961171384289, "percentage": 48.49, "elapsed_time": "4:25:29", "remaining_time": "4:42:03", "throughput": 2343.79, "total_tokens": 37336384} {"current_steps": 19400, "total_steps": 40000, "loss": 0.0699, "lr": 2.6179622579789932e-05, "epoch": 3.1647768986051066, "percentage": 48.5, "elapsed_time": "4:25:31", "remaining_time": "4:41:57", "throughput": 2344.03, "total_tokens": 37345136} {"current_steps": 19400, "total_steps": 40000, "eval_loss": 0.21505676209926605, "epoch": 3.1647768986051066, "percentage": 48.5, "elapsed_time": "4:26:52", "remaining_time": "4:43:23", "throughput": 2332.19, "total_tokens": 37345136} {"current_steps": 19405, "total_steps": 40000, "loss": 0.4192, "lr": 2.6169815947022553e-05, "epoch": 3.1655926258259237, "percentage": 48.51, "elapsed_time": "4:26:56", "remaining_time": "4:43:18", "throughput": 2332.26, "total_tokens": 37354864} {"current_steps": 19410, "total_steps": 40000, "loss": 0.0068, "lr": 2.6160009133854853e-05, "epoch": 3.1664083530467413, "percentage": 48.52, "elapsed_time": "4:26:58", "remaining_time": "4:43:12", "throughput": 2332.62, "total_tokens": 37365408} {"current_steps": 19415, "total_steps": 40000, "loss": 0.066, "lr": 2.6150202141799168e-05, "epoch": 3.1672240802675584, "percentage": 48.54, "elapsed_time": "4:27:00", "remaining_time": "4:43:06", "throughput": 2332.97, "total_tokens": 37375952} {"current_steps": 19420, "total_steps": 40000, "loss": 0.047, "lr": 2.614039497236786e-05, "epoch": 3.168039807488376, "percentage": 48.55, "elapsed_time": "4:27:02", "remaining_time": "4:42:59", "throughput": 2333.29, "total_tokens": 37385808} {"current_steps": 19425, "total_steps": 40000, "loss": 0.0024, "lr": 2.6130587627073315e-05, "epoch": 3.168855534709193, "percentage": 48.56, "elapsed_time": "4:27:04", "remaining_time": "4:42:53", "throughput": 2333.61, "total_tokens": 37395808} {"current_steps": 19430, "total_steps": 40000, "loss": 0.0501, "lr": 2.6120780107427956e-05, "epoch": 3.1696712619300107, "percentage": 48.58, "elapsed_time": "4:27:06", "remaining_time": "4:42:47", "throughput": 2333.85, "total_tokens": 37404464} {"current_steps": 19435, "total_steps": 40000, "loss": 0.0867, "lr": 2.6110972414944214e-05, "epoch": 3.170486989150828, "percentage": 48.59, "elapsed_time": "4:27:09", "remaining_time": "4:42:41", "throughput": 2334.11, "total_tokens": 37414128} {"current_steps": 19440, "total_steps": 40000, "loss": 0.0029, "lr": 2.6101164551134565e-05, "epoch": 3.1713027163716454, "percentage": 48.6, "elapsed_time": "4:27:11", "remaining_time": "4:42:34", "throughput": 2334.44, "total_tokens": 37424240} {"current_steps": 19445, "total_steps": 40000, "loss": 0.0132, "lr": 2.6091356517511505e-05, "epoch": 3.1721184435924625, "percentage": 48.61, "elapsed_time": "4:27:13", "remaining_time": "4:42:28", "throughput": 2334.73, "total_tokens": 37433696} {"current_steps": 19450, "total_steps": 40000, "loss": 0.0025, "lr": 2.608154831558755e-05, "epoch": 3.17293417081328, "percentage": 48.62, "elapsed_time": "4:27:15", "remaining_time": "4:42:22", "throughput": 2335.03, "total_tokens": 37443392} {"current_steps": 19455, "total_steps": 40000, "loss": 0.2242, "lr": 2.607173994687526e-05, "epoch": 3.1737498980340972, "percentage": 48.64, "elapsed_time": "4:27:17", "remaining_time": "4:42:16", "throughput": 2335.39, "total_tokens": 37453968} {"current_steps": 19460, "total_steps": 40000, "loss": 0.1665, "lr": 2.6061931412887196e-05, "epoch": 3.174565625254915, "percentage": 48.65, "elapsed_time": "4:27:19", "remaining_time": "4:42:09", "throughput": 2335.61, "total_tokens": 37462336} {"current_steps": 19465, "total_steps": 40000, "loss": 0.0034, "lr": 2.6052122715135973e-05, "epoch": 3.175381352475732, "percentage": 48.66, "elapsed_time": "4:27:21", "remaining_time": "4:42:03", "throughput": 2335.97, "total_tokens": 37472944} {"current_steps": 19470, "total_steps": 40000, "loss": 0.0022, "lr": 2.60423138551342e-05, "epoch": 3.1761970796965495, "percentage": 48.68, "elapsed_time": "4:27:23", "remaining_time": "4:41:57", "throughput": 2336.28, "total_tokens": 37482736} {"current_steps": 19475, "total_steps": 40000, "loss": 0.0367, "lr": 2.6032504834394527e-05, "epoch": 3.1770128069173666, "percentage": 48.69, "elapsed_time": "4:27:25", "remaining_time": "4:41:50", "throughput": 2336.55, "total_tokens": 37491872} {"current_steps": 19480, "total_steps": 40000, "loss": 0.0623, "lr": 2.602269565442964e-05, "epoch": 3.1778285341381842, "percentage": 48.7, "elapsed_time": "4:27:27", "remaining_time": "4:41:44", "throughput": 2336.88, "total_tokens": 37502096} {"current_steps": 19485, "total_steps": 40000, "loss": 0.0712, "lr": 2.6012886316752227e-05, "epoch": 3.1786442613590014, "percentage": 48.71, "elapsed_time": "4:27:30", "remaining_time": "4:41:38", "throughput": 2337.2, "total_tokens": 37512032} {"current_steps": 19490, "total_steps": 40000, "loss": 0.0048, "lr": 2.6003076822875018e-05, "epoch": 3.179459988579819, "percentage": 48.73, "elapsed_time": "4:27:32", "remaining_time": "4:41:32", "throughput": 2337.55, "total_tokens": 37522656} {"current_steps": 19495, "total_steps": 40000, "loss": 0.0466, "lr": 2.5993267174310755e-05, "epoch": 3.180275715800636, "percentage": 48.74, "elapsed_time": "4:27:34", "remaining_time": "4:41:25", "throughput": 2337.82, "total_tokens": 37531840} {"current_steps": 19500, "total_steps": 40000, "loss": 0.0592, "lr": 2.5983457372572218e-05, "epoch": 3.1810914430214536, "percentage": 48.75, "elapsed_time": "4:27:36", "remaining_time": "4:41:19", "throughput": 2338.08, "total_tokens": 37540736} {"current_steps": 19505, "total_steps": 40000, "loss": 0.0277, "lr": 2.597364741917219e-05, "epoch": 3.1819071702422708, "percentage": 48.76, "elapsed_time": "4:27:38", "remaining_time": "4:41:13", "throughput": 2338.31, "total_tokens": 37549248} {"current_steps": 19510, "total_steps": 40000, "loss": 0.0085, "lr": 2.5963837315623492e-05, "epoch": 3.1827228974630883, "percentage": 48.77, "elapsed_time": "4:27:40", "remaining_time": "4:41:07", "throughput": 2338.59, "total_tokens": 37558640} {"current_steps": 19515, "total_steps": 40000, "loss": 0.002, "lr": 2.595402706343897e-05, "epoch": 3.183538624683906, "percentage": 48.79, "elapsed_time": "4:27:42", "remaining_time": "4:41:00", "throughput": 2338.9, "total_tokens": 37568416} {"current_steps": 19520, "total_steps": 40000, "loss": 0.0211, "lr": 2.594421666413148e-05, "epoch": 3.184354351904723, "percentage": 48.8, "elapsed_time": "4:27:44", "remaining_time": "4:40:54", "throughput": 2339.18, "total_tokens": 37577808} {"current_steps": 19525, "total_steps": 40000, "loss": 0.0017, "lr": 2.5934406119213928e-05, "epoch": 3.1851700791255406, "percentage": 48.81, "elapsed_time": "4:27:46", "remaining_time": "4:40:48", "throughput": 2339.56, "total_tokens": 37588768} {"current_steps": 19530, "total_steps": 40000, "loss": 0.0681, "lr": 2.5924595430199193e-05, "epoch": 3.1859858063463578, "percentage": 48.83, "elapsed_time": "4:27:48", "remaining_time": "4:40:42", "throughput": 2339.87, "total_tokens": 37598560} {"current_steps": 19535, "total_steps": 40000, "loss": 0.0607, "lr": 2.5914784598600238e-05, "epoch": 3.1868015335671753, "percentage": 48.84, "elapsed_time": "4:27:50", "remaining_time": "4:40:35", "throughput": 2340.21, "total_tokens": 37608992} {"current_steps": 19540, "total_steps": 40000, "loss": 0.002, "lr": 2.5904973625930002e-05, "epoch": 3.1876172607879925, "percentage": 48.85, "elapsed_time": "4:27:52", "remaining_time": "4:40:29", "throughput": 2340.57, "total_tokens": 37619648} {"current_steps": 19545, "total_steps": 40000, "loss": 0.1104, "lr": 2.5895162513701456e-05, "epoch": 3.18843298800881, "percentage": 48.86, "elapsed_time": "4:27:54", "remaining_time": "4:40:23", "throughput": 2340.94, "total_tokens": 37630352} {"current_steps": 19550, "total_steps": 40000, "loss": 0.0493, "lr": 2.5885351263427593e-05, "epoch": 3.189248715229627, "percentage": 48.88, "elapsed_time": "4:27:56", "remaining_time": "4:40:17", "throughput": 2341.2, "total_tokens": 37639440} {"current_steps": 19555, "total_steps": 40000, "loss": 0.0782, "lr": 2.5875539876621448e-05, "epoch": 3.1900644424504447, "percentage": 48.89, "elapsed_time": "4:27:59", "remaining_time": "4:40:10", "throughput": 2341.55, "total_tokens": 37649872} {"current_steps": 19560, "total_steps": 40000, "loss": 0.0267, "lr": 2.586572835479605e-05, "epoch": 3.190880169671262, "percentage": 48.9, "elapsed_time": "4:28:01", "remaining_time": "4:40:04", "throughput": 2341.82, "total_tokens": 37659088} {"current_steps": 19565, "total_steps": 40000, "loss": 0.0451, "lr": 2.585591669946446e-05, "epoch": 3.1916958968920794, "percentage": 48.91, "elapsed_time": "4:28:03", "remaining_time": "4:39:58", "throughput": 2342.08, "total_tokens": 37668080} {"current_steps": 19570, "total_steps": 40000, "loss": 0.0687, "lr": 2.5846104912139756e-05, "epoch": 3.1925116241128966, "percentage": 48.93, "elapsed_time": "4:28:05", "remaining_time": "4:39:52", "throughput": 2342.4, "total_tokens": 37678032} {"current_steps": 19575, "total_steps": 40000, "loss": 0.0364, "lr": 2.583629299433505e-05, "epoch": 3.193327351333714, "percentage": 48.94, "elapsed_time": "4:28:07", "remaining_time": "4:39:45", "throughput": 2342.7, "total_tokens": 37687728} {"current_steps": 19580, "total_steps": 40000, "loss": 0.0034, "lr": 2.582648094756345e-05, "epoch": 3.1941430785545313, "percentage": 48.95, "elapsed_time": "4:28:09", "remaining_time": "4:39:39", "throughput": 2342.98, "total_tokens": 37697104} {"current_steps": 19585, "total_steps": 40000, "loss": 0.0265, "lr": 2.5816668773338098e-05, "epoch": 3.194958805775349, "percentage": 48.96, "elapsed_time": "4:28:11", "remaining_time": "4:39:33", "throughput": 2343.2, "total_tokens": 37705536} {"current_steps": 19590, "total_steps": 40000, "loss": 0.0017, "lr": 2.580685647317216e-05, "epoch": 3.195774532996166, "percentage": 48.98, "elapsed_time": "4:28:13", "remaining_time": "4:39:27", "throughput": 2343.49, "total_tokens": 37714944} {"current_steps": 19595, "total_steps": 40000, "loss": 0.1226, "lr": 2.5797044048578818e-05, "epoch": 3.1965902602169836, "percentage": 48.99, "elapsed_time": "4:28:15", "remaining_time": "4:39:20", "throughput": 2343.78, "total_tokens": 37724512} {"current_steps": 19600, "total_steps": 40000, "loss": 0.0018, "lr": 2.5787231501071262e-05, "epoch": 3.1974059874378007, "percentage": 49.0, "elapsed_time": "4:28:17", "remaining_time": "4:39:14", "throughput": 2344.0, "total_tokens": 37732992} {"current_steps": 19600, "total_steps": 40000, "eval_loss": 0.22466830909252167, "epoch": 3.1974059874378007, "percentage": 49.0, "elapsed_time": "4:29:38", "remaining_time": "4:40:38", "throughput": 2332.29, "total_tokens": 37732992} {"current_steps": 19605, "total_steps": 40000, "loss": 0.0015, "lr": 2.577741883216272e-05, "epoch": 3.1982217146586183, "percentage": 49.01, "elapsed_time": "4:29:42", "remaining_time": "4:40:34", "throughput": 2332.39, "total_tokens": 37743120} {"current_steps": 19610, "total_steps": 40000, "loss": 0.0022, "lr": 2.576760604336642e-05, "epoch": 3.1990374418794354, "percentage": 49.02, "elapsed_time": "4:29:44", "remaining_time": "4:40:27", "throughput": 2332.56, "total_tokens": 37750736} {"current_steps": 19615, "total_steps": 40000, "loss": 0.0295, "lr": 2.575779313619563e-05, "epoch": 3.199853169100253, "percentage": 49.04, "elapsed_time": "4:29:46", "remaining_time": "4:40:21", "throughput": 2332.93, "total_tokens": 37761600} {"current_steps": 19620, "total_steps": 40000, "loss": 0.0689, "lr": 2.5747980112163605e-05, "epoch": 3.20066889632107, "percentage": 49.05, "elapsed_time": "4:29:48", "remaining_time": "4:40:15", "throughput": 2333.24, "total_tokens": 37771328} {"current_steps": 19625, "total_steps": 40000, "loss": 0.1619, "lr": 2.5738166972783656e-05, "epoch": 3.2014846235418877, "percentage": 49.06, "elapsed_time": "4:29:50", "remaining_time": "4:40:09", "throughput": 2333.54, "total_tokens": 37781104} {"current_steps": 19630, "total_steps": 40000, "loss": 0.0379, "lr": 2.5728353719569075e-05, "epoch": 3.202300350762705, "percentage": 49.08, "elapsed_time": "4:29:52", "remaining_time": "4:40:02", "throughput": 2333.88, "total_tokens": 37791424} {"current_steps": 19635, "total_steps": 40000, "loss": 0.0462, "lr": 2.57185403540332e-05, "epoch": 3.2031160779835224, "percentage": 49.09, "elapsed_time": "4:29:54", "remaining_time": "4:39:56", "throughput": 2334.12, "total_tokens": 37800160} {"current_steps": 19640, "total_steps": 40000, "loss": 0.001, "lr": 2.5708726877689375e-05, "epoch": 3.2039318052043395, "percentage": 49.1, "elapsed_time": "4:29:56", "remaining_time": "4:39:50", "throughput": 2334.44, "total_tokens": 37810064} {"current_steps": 19645, "total_steps": 40000, "loss": 0.0803, "lr": 2.5698913292050964e-05, "epoch": 3.204747532425157, "percentage": 49.11, "elapsed_time": "4:29:58", "remaining_time": "4:39:44", "throughput": 2334.65, "total_tokens": 37818368} {"current_steps": 19650, "total_steps": 40000, "loss": 0.0009, "lr": 2.568909959863133e-05, "epoch": 3.205563259645974, "percentage": 49.12, "elapsed_time": "4:30:00", "remaining_time": "4:39:37", "throughput": 2334.95, "total_tokens": 37828192} {"current_steps": 19655, "total_steps": 40000, "loss": 0.0009, "lr": 2.5679285798943887e-05, "epoch": 3.206378986866792, "percentage": 49.14, "elapsed_time": "4:30:02", "remaining_time": "4:39:31", "throughput": 2335.16, "total_tokens": 37836416} {"current_steps": 19660, "total_steps": 40000, "loss": 0.0006, "lr": 2.5669471894502035e-05, "epoch": 3.207194714087609, "percentage": 49.15, "elapsed_time": "4:30:04", "remaining_time": "4:39:25", "throughput": 2335.5, "total_tokens": 37846736} {"current_steps": 19665, "total_steps": 40000, "loss": 0.044, "lr": 2.56596578868192e-05, "epoch": 3.2080104413084265, "percentage": 49.16, "elapsed_time": "4:30:07", "remaining_time": "4:39:19", "throughput": 2335.77, "total_tokens": 37855920} {"current_steps": 19670, "total_steps": 40000, "loss": 0.0035, "lr": 2.564984377740883e-05, "epoch": 3.2088261685292436, "percentage": 49.18, "elapsed_time": "4:30:09", "remaining_time": "4:39:12", "throughput": 2336.07, "total_tokens": 37865632} {"current_steps": 19675, "total_steps": 40000, "loss": 0.0023, "lr": 2.564002956778438e-05, "epoch": 3.209641895750061, "percentage": 49.19, "elapsed_time": "4:30:11", "remaining_time": "4:39:06", "throughput": 2336.31, "total_tokens": 37874336} {"current_steps": 19680, "total_steps": 40000, "loss": 0.1804, "lr": 2.563021525945934e-05, "epoch": 3.2104576229708783, "percentage": 49.2, "elapsed_time": "4:30:13", "remaining_time": "4:39:00", "throughput": 2336.69, "total_tokens": 37885264} {"current_steps": 19685, "total_steps": 40000, "loss": 0.0873, "lr": 2.562040085394718e-05, "epoch": 3.211273350191696, "percentage": 49.21, "elapsed_time": "4:30:15", "remaining_time": "4:38:54", "throughput": 2336.94, "total_tokens": 37894240} {"current_steps": 19690, "total_steps": 40000, "loss": 0.1621, "lr": 2.56105863527614e-05, "epoch": 3.2120890774125135, "percentage": 49.23, "elapsed_time": "4:30:17", "remaining_time": "4:38:48", "throughput": 2337.34, "total_tokens": 37905600} {"current_steps": 19695, "total_steps": 40000, "loss": 0.0397, "lr": 2.5600771757415548e-05, "epoch": 3.2129048046333306, "percentage": 49.24, "elapsed_time": "4:30:19", "remaining_time": "4:38:41", "throughput": 2337.61, "total_tokens": 37914800} {"current_steps": 19700, "total_steps": 40000, "loss": 0.0011, "lr": 2.5590957069423134e-05, "epoch": 3.2137205318541477, "percentage": 49.25, "elapsed_time": "4:30:21", "remaining_time": "4:38:35", "throughput": 2337.85, "total_tokens": 37923520} {"current_steps": 19705, "total_steps": 40000, "loss": 0.1061, "lr": 2.5581142290297716e-05, "epoch": 3.2145362590749653, "percentage": 49.26, "elapsed_time": "4:30:23", "remaining_time": "4:38:29", "throughput": 2338.14, "total_tokens": 37933072} {"current_steps": 19710, "total_steps": 40000, "loss": 0.0522, "lr": 2.557132742155285e-05, "epoch": 3.215351986295783, "percentage": 49.28, "elapsed_time": "4:30:25", "remaining_time": "4:38:23", "throughput": 2338.48, "total_tokens": 37943440} {"current_steps": 19715, "total_steps": 40000, "loss": 0.0241, "lr": 2.556151246470212e-05, "epoch": 3.2161677135166, "percentage": 49.29, "elapsed_time": "4:30:27", "remaining_time": "4:38:16", "throughput": 2338.77, "total_tokens": 37952928} {"current_steps": 19720, "total_steps": 40000, "loss": 0.063, "lr": 2.5551697421259114e-05, "epoch": 3.2169834407374176, "percentage": 49.3, "elapsed_time": "4:30:29", "remaining_time": "4:38:10", "throughput": 2339.11, "total_tokens": 37963232} {"current_steps": 19725, "total_steps": 40000, "loss": 0.0022, "lr": 2.554188229273743e-05, "epoch": 3.2177991679582347, "percentage": 49.31, "elapsed_time": "4:30:31", "remaining_time": "4:38:04", "throughput": 2339.4, "total_tokens": 37972864} {"current_steps": 19730, "total_steps": 40000, "loss": 0.0633, "lr": 2.5532067080650678e-05, "epoch": 3.2186148951790523, "percentage": 49.33, "elapsed_time": "4:30:33", "remaining_time": "4:37:58", "throughput": 2339.72, "total_tokens": 37982896} {"current_steps": 19735, "total_steps": 40000, "loss": 0.051, "lr": 2.55222517865125e-05, "epoch": 3.2194306223998694, "percentage": 49.34, "elapsed_time": "4:30:36", "remaining_time": "4:37:52", "throughput": 2340.03, "total_tokens": 37992720} {"current_steps": 19740, "total_steps": 40000, "loss": 0.0924, "lr": 2.5512436411836538e-05, "epoch": 3.220246349620687, "percentage": 49.35, "elapsed_time": "4:30:38", "remaining_time": "4:37:45", "throughput": 2340.41, "total_tokens": 38003840} {"current_steps": 19745, "total_steps": 40000, "loss": 0.0054, "lr": 2.5502620958136443e-05, "epoch": 3.221062076841504, "percentage": 49.36, "elapsed_time": "4:30:40", "remaining_time": "4:37:39", "throughput": 2340.76, "total_tokens": 38014336} {"current_steps": 19750, "total_steps": 40000, "loss": 0.0275, "lr": 2.5492805426925874e-05, "epoch": 3.2218778040623217, "percentage": 49.38, "elapsed_time": "4:30:42", "remaining_time": "4:37:33", "throughput": 2341.06, "total_tokens": 38024048} {"current_steps": 19755, "total_steps": 40000, "loss": 0.0026, "lr": 2.5482989819718523e-05, "epoch": 3.222693531283139, "percentage": 49.39, "elapsed_time": "4:30:44", "remaining_time": "4:37:27", "throughput": 2341.34, "total_tokens": 38033440} {"current_steps": 19760, "total_steps": 40000, "loss": 0.1551, "lr": 2.5473174138028065e-05, "epoch": 3.2235092585039564, "percentage": 49.4, "elapsed_time": "4:30:46", "remaining_time": "4:37:21", "throughput": 2341.6, "total_tokens": 38042592} {"current_steps": 19765, "total_steps": 40000, "loss": 0.0047, "lr": 2.5463358383368212e-05, "epoch": 3.2243249857247736, "percentage": 49.41, "elapsed_time": "4:30:48", "remaining_time": "4:37:14", "throughput": 2341.83, "total_tokens": 38051200} {"current_steps": 19770, "total_steps": 40000, "loss": 0.0259, "lr": 2.545354255725267e-05, "epoch": 3.225140712945591, "percentage": 49.43, "elapsed_time": "4:30:50", "remaining_time": "4:37:08", "throughput": 2342.03, "total_tokens": 38059200} {"current_steps": 19775, "total_steps": 40000, "loss": 0.1374, "lr": 2.5443726661195165e-05, "epoch": 3.2259564401664083, "percentage": 49.44, "elapsed_time": "4:30:52", "remaining_time": "4:37:02", "throughput": 2342.32, "total_tokens": 38068848} {"current_steps": 19780, "total_steps": 40000, "loss": 0.0451, "lr": 2.543391069670944e-05, "epoch": 3.226772167387226, "percentage": 49.45, "elapsed_time": "4:30:54", "remaining_time": "4:36:56", "throughput": 2342.69, "total_tokens": 38079632} {"current_steps": 19785, "total_steps": 40000, "loss": 0.0276, "lr": 2.5424094665309228e-05, "epoch": 3.227587894608043, "percentage": 49.46, "elapsed_time": "4:30:56", "remaining_time": "4:36:50", "throughput": 2342.99, "total_tokens": 38089344} {"current_steps": 19790, "total_steps": 40000, "loss": 0.1633, "lr": 2.5414278568508292e-05, "epoch": 3.2284036218288605, "percentage": 49.48, "elapsed_time": "4:30:58", "remaining_time": "4:36:43", "throughput": 2343.3, "total_tokens": 38099200} {"current_steps": 19795, "total_steps": 40000, "loss": 0.0024, "lr": 2.540446240782039e-05, "epoch": 3.2292193490496777, "percentage": 49.49, "elapsed_time": "4:31:00", "remaining_time": "4:36:37", "throughput": 2343.55, "total_tokens": 38108160} {"current_steps": 19800, "total_steps": 40000, "loss": 0.0134, "lr": 2.5394646184759307e-05, "epoch": 3.2300350762704952, "percentage": 49.5, "elapsed_time": "4:31:02", "remaining_time": "4:36:31", "throughput": 2343.91, "total_tokens": 38118784} {"current_steps": 19800, "total_steps": 40000, "eval_loss": 0.232109934091568, "epoch": 3.2300350762704952, "percentage": 49.5, "elapsed_time": "4:32:23", "remaining_time": "4:37:53", "throughput": 2332.33, "total_tokens": 38118784} {"current_steps": 19805, "total_steps": 40000, "loss": 0.1011, "lr": 2.538482990083882e-05, "epoch": 3.2308508034913124, "percentage": 49.51, "elapsed_time": "4:32:27", "remaining_time": "4:37:49", "throughput": 2332.34, "total_tokens": 38127328} {"current_steps": 19810, "total_steps": 40000, "loss": 0.0369, "lr": 2.5375013557572725e-05, "epoch": 3.23166653071213, "percentage": 49.53, "elapsed_time": "4:32:29", "remaining_time": "4:37:42", "throughput": 2332.63, "total_tokens": 38136864} {"current_steps": 19815, "total_steps": 40000, "loss": 0.0826, "lr": 2.536519715647483e-05, "epoch": 3.232482257932947, "percentage": 49.54, "elapsed_time": "4:32:31", "remaining_time": "4:37:36", "throughput": 2332.91, "total_tokens": 38146304} {"current_steps": 19820, "total_steps": 40000, "loss": 0.0892, "lr": 2.535538069905894e-05, "epoch": 3.2332979851537647, "percentage": 49.55, "elapsed_time": "4:32:33", "remaining_time": "4:37:30", "throughput": 2333.17, "total_tokens": 38155520} {"current_steps": 19825, "total_steps": 40000, "loss": 0.0021, "lr": 2.534556418683888e-05, "epoch": 3.234113712374582, "percentage": 49.56, "elapsed_time": "4:32:35", "remaining_time": "4:37:24", "throughput": 2333.45, "total_tokens": 38164896} {"current_steps": 19830, "total_steps": 40000, "loss": 0.0013, "lr": 2.5335747621328486e-05, "epoch": 3.2349294395953994, "percentage": 49.58, "elapsed_time": "4:32:37", "remaining_time": "4:37:18", "throughput": 2333.76, "total_tokens": 38174736} {"current_steps": 19835, "total_steps": 40000, "loss": 0.1307, "lr": 2.5325931004041586e-05, "epoch": 3.2357451668162165, "percentage": 49.59, "elapsed_time": "4:32:39", "remaining_time": "4:37:11", "throughput": 2334.04, "total_tokens": 38184240} {"current_steps": 19840, "total_steps": 40000, "loss": 0.0134, "lr": 2.5316114336492032e-05, "epoch": 3.236560894037034, "percentage": 49.6, "elapsed_time": "4:32:41", "remaining_time": "4:37:05", "throughput": 2334.38, "total_tokens": 38194560} {"current_steps": 19845, "total_steps": 40000, "loss": 0.1344, "lr": 2.530629762019367e-05, "epoch": 3.237376621257851, "percentage": 49.61, "elapsed_time": "4:32:43", "remaining_time": "4:36:59", "throughput": 2334.71, "total_tokens": 38204752} {"current_steps": 19850, "total_steps": 40000, "loss": 0.0016, "lr": 2.5296480856660364e-05, "epoch": 3.2381923484786688, "percentage": 49.62, "elapsed_time": "4:32:45", "remaining_time": "4:36:53", "throughput": 2335.03, "total_tokens": 38214880} {"current_steps": 19855, "total_steps": 40000, "loss": 0.1216, "lr": 2.528666404740599e-05, "epoch": 3.239008075699486, "percentage": 49.64, "elapsed_time": "4:32:47", "remaining_time": "4:36:47", "throughput": 2335.31, "total_tokens": 38224352} {"current_steps": 19860, "total_steps": 40000, "loss": 0.0031, "lr": 2.527684719394442e-05, "epoch": 3.2398238029203035, "percentage": 49.65, "elapsed_time": "4:32:50", "remaining_time": "4:36:40", "throughput": 2335.5, "total_tokens": 38232208} {"current_steps": 19865, "total_steps": 40000, "loss": 0.1519, "lr": 2.526703029778953e-05, "epoch": 3.2406395301411206, "percentage": 49.66, "elapsed_time": "4:32:52", "remaining_time": "4:36:34", "throughput": 2335.81, "total_tokens": 38242160} {"current_steps": 19870, "total_steps": 40000, "loss": 0.0009, "lr": 2.5257213360455208e-05, "epoch": 3.241455257361938, "percentage": 49.68, "elapsed_time": "4:32:54", "remaining_time": "4:36:28", "throughput": 2336.11, "total_tokens": 38251952} {"current_steps": 19875, "total_steps": 40000, "loss": 0.0531, "lr": 2.5247396383455353e-05, "epoch": 3.2422709845827553, "percentage": 49.69, "elapsed_time": "4:32:56", "remaining_time": "4:36:22", "throughput": 2336.45, "total_tokens": 38262272} {"current_steps": 19880, "total_steps": 40000, "loss": 0.0248, "lr": 2.523757936830387e-05, "epoch": 3.243086711803573, "percentage": 49.7, "elapsed_time": "4:32:58", "remaining_time": "4:36:16", "throughput": 2336.75, "total_tokens": 38271952} {"current_steps": 19885, "total_steps": 40000, "loss": 0.1279, "lr": 2.5227762316514662e-05, "epoch": 3.2439024390243905, "percentage": 49.71, "elapsed_time": "4:33:00", "remaining_time": "4:36:09", "throughput": 2337.09, "total_tokens": 38282528} {"current_steps": 19890, "total_steps": 40000, "loss": 0.0568, "lr": 2.5217945229601648e-05, "epoch": 3.2447181662452076, "percentage": 49.73, "elapsed_time": "4:33:02", "remaining_time": "4:36:03", "throughput": 2337.38, "total_tokens": 38291952} {"current_steps": 19895, "total_steps": 40000, "loss": 0.0743, "lr": 2.5208128109078738e-05, "epoch": 3.245533893466025, "percentage": 49.74, "elapsed_time": "4:33:04", "remaining_time": "4:35:57", "throughput": 2337.69, "total_tokens": 38301920} {"current_steps": 19900, "total_steps": 40000, "loss": 0.0024, "lr": 2.5198310956459853e-05, "epoch": 3.2463496206868423, "percentage": 49.75, "elapsed_time": "4:33:06", "remaining_time": "4:35:51", "throughput": 2338.01, "total_tokens": 38312128} {"current_steps": 19905, "total_steps": 40000, "loss": 0.091, "lr": 2.518849377325893e-05, "epoch": 3.24716534790766, "percentage": 49.76, "elapsed_time": "4:33:08", "remaining_time": "4:35:45", "throughput": 2338.26, "total_tokens": 38321024} {"current_steps": 19910, "total_steps": 40000, "loss": 0.0951, "lr": 2.51786765609899e-05, "epoch": 3.247981075128477, "percentage": 49.78, "elapsed_time": "4:33:10", "remaining_time": "4:35:38", "throughput": 2338.61, "total_tokens": 38331520} {"current_steps": 19915, "total_steps": 40000, "loss": 0.0157, "lr": 2.5168859321166694e-05, "epoch": 3.2487968023492946, "percentage": 49.79, "elapsed_time": "4:33:12", "remaining_time": "4:35:32", "throughput": 2338.89, "total_tokens": 38341008} {"current_steps": 19920, "total_steps": 40000, "loss": 0.0807, "lr": 2.515904205530326e-05, "epoch": 3.2496125295701117, "percentage": 49.8, "elapsed_time": "4:33:14", "remaining_time": "4:35:26", "throughput": 2339.2, "total_tokens": 38350880} {"current_steps": 19925, "total_steps": 40000, "loss": 0.0664, "lr": 2.514922476491355e-05, "epoch": 3.2504282567909293, "percentage": 49.81, "elapsed_time": "4:33:16", "remaining_time": "4:35:20", "throughput": 2339.45, "total_tokens": 38359920} {"current_steps": 19930, "total_steps": 40000, "loss": 0.0018, "lr": 2.51394074515115e-05, "epoch": 3.2512439840117464, "percentage": 49.83, "elapsed_time": "4:33:19", "remaining_time": "4:35:14", "throughput": 2339.76, "total_tokens": 38369840} {"current_steps": 19935, "total_steps": 40000, "loss": 0.0173, "lr": 2.5129590116611067e-05, "epoch": 3.252059711232564, "percentage": 49.84, "elapsed_time": "4:33:21", "remaining_time": "4:35:08", "throughput": 2340.1, "total_tokens": 38380224} {"current_steps": 19940, "total_steps": 40000, "loss": 0.0619, "lr": 2.5119772761726212e-05, "epoch": 3.252875438453381, "percentage": 49.85, "elapsed_time": "4:33:23", "remaining_time": "4:35:01", "throughput": 2340.39, "total_tokens": 38389904} {"current_steps": 19945, "total_steps": 40000, "loss": 0.0054, "lr": 2.5109955388370893e-05, "epoch": 3.2536911656741987, "percentage": 49.86, "elapsed_time": "4:33:25", "remaining_time": "4:34:55", "throughput": 2340.72, "total_tokens": 38400032} {"current_steps": 19950, "total_steps": 40000, "loss": 0.0597, "lr": 2.510013799805907e-05, "epoch": 3.254506892895016, "percentage": 49.88, "elapsed_time": "4:33:27", "remaining_time": "4:34:49", "throughput": 2340.94, "total_tokens": 38408544} {"current_steps": 19955, "total_steps": 40000, "loss": 0.0013, "lr": 2.5090320592304706e-05, "epoch": 3.2553226201158334, "percentage": 49.89, "elapsed_time": "4:33:29", "remaining_time": "4:34:43", "throughput": 2341.18, "total_tokens": 38417312} {"current_steps": 19960, "total_steps": 40000, "loss": 0.007, "lr": 2.5080503172621777e-05, "epoch": 3.2561383473366505, "percentage": 49.9, "elapsed_time": "4:33:31", "remaining_time": "4:34:37", "throughput": 2341.44, "total_tokens": 38426416} {"current_steps": 19965, "total_steps": 40000, "loss": 0.0689, "lr": 2.5070685740524246e-05, "epoch": 3.256954074557468, "percentage": 49.91, "elapsed_time": "4:33:33", "remaining_time": "4:34:31", "throughput": 2341.81, "total_tokens": 38437264} {"current_steps": 19970, "total_steps": 40000, "loss": 0.0021, "lr": 2.5060868297526084e-05, "epoch": 3.2577698017782852, "percentage": 49.93, "elapsed_time": "4:33:35", "remaining_time": "4:34:24", "throughput": 2342.08, "total_tokens": 38446576} {"current_steps": 19975, "total_steps": 40000, "loss": 0.1109, "lr": 2.5051050845141267e-05, "epoch": 3.258585528999103, "percentage": 49.94, "elapsed_time": "4:33:37", "remaining_time": "4:34:18", "throughput": 2342.42, "total_tokens": 38457056} {"current_steps": 19980, "total_steps": 40000, "loss": 0.1549, "lr": 2.5041233384883765e-05, "epoch": 3.25940125621992, "percentage": 49.95, "elapsed_time": "4:33:39", "remaining_time": "4:34:12", "throughput": 2342.66, "total_tokens": 38465808} {"current_steps": 19985, "total_steps": 40000, "loss": 0.0052, "lr": 2.5031415918267564e-05, "epoch": 3.2602169834407375, "percentage": 49.96, "elapsed_time": "4:33:41", "remaining_time": "4:34:06", "throughput": 2342.9, "total_tokens": 38474592} {"current_steps": 19990, "total_steps": 40000, "loss": 0.0738, "lr": 2.5021598446806626e-05, "epoch": 3.2610327106615546, "percentage": 49.98, "elapsed_time": "4:33:43", "remaining_time": "4:34:00", "throughput": 2343.25, "total_tokens": 38485120} {"current_steps": 19995, "total_steps": 40000, "loss": 0.0011, "lr": 2.5011780972014937e-05, "epoch": 3.261848437882372, "percentage": 49.99, "elapsed_time": "4:33:45", "remaining_time": "4:33:54", "throughput": 2343.46, "total_tokens": 38493408} {"current_steps": 20000, "total_steps": 40000, "loss": 0.0028, "lr": 2.5001963495406478e-05, "epoch": 3.2626641651031894, "percentage": 50.0, "elapsed_time": "4:33:47", "remaining_time": "4:33:47", "throughput": 2343.77, "total_tokens": 38503392} {"current_steps": 20000, "total_steps": 40000, "eval_loss": 0.21684901416301727, "epoch": 3.2626641651031894, "percentage": 50.0, "elapsed_time": "4:35:08", "remaining_time": "4:35:08", "throughput": 2332.31, "total_tokens": 38503392} {"current_steps": 20005, "total_steps": 40000, "loss": 0.0093, "lr": 2.499214601849522e-05, "epoch": 3.263479892324007, "percentage": 50.01, "elapsed_time": "4:35:12", "remaining_time": "4:35:04", "throughput": 2332.28, "total_tokens": 38512656} {"current_steps": 20010, "total_steps": 40000, "loss": 0.1169, "lr": 2.4982328542795148e-05, "epoch": 3.264295619544824, "percentage": 50.02, "elapsed_time": "4:35:14", "remaining_time": "4:34:58", "throughput": 2332.55, "total_tokens": 38521952} {"current_steps": 20015, "total_steps": 40000, "loss": 0.0025, "lr": 2.497251106982024e-05, "epoch": 3.2651113467656416, "percentage": 50.04, "elapsed_time": "4:35:17", "remaining_time": "4:34:52", "throughput": 2332.89, "total_tokens": 38532384} {"current_steps": 20020, "total_steps": 40000, "loss": 0.0041, "lr": 2.4962693601084458e-05, "epoch": 3.2659270739864588, "percentage": 50.05, "elapsed_time": "4:35:19", "remaining_time": "4:34:46", "throughput": 2333.11, "total_tokens": 38540800} {"current_steps": 20025, "total_steps": 40000, "loss": 0.1156, "lr": 2.4952876138101794e-05, "epoch": 3.2667428012072763, "percentage": 50.06, "elapsed_time": "4:35:21", "remaining_time": "4:34:39", "throughput": 2333.4, "total_tokens": 38550448} {"current_steps": 20030, "total_steps": 40000, "loss": 0.1904, "lr": 2.4943058682386233e-05, "epoch": 3.2675585284280935, "percentage": 50.08, "elapsed_time": "4:35:23", "remaining_time": "4:34:33", "throughput": 2333.69, "total_tokens": 38560144} {"current_steps": 20035, "total_steps": 40000, "loss": 0.1544, "lr": 2.493324123545173e-05, "epoch": 3.268374255648911, "percentage": 50.09, "elapsed_time": "4:35:25", "remaining_time": "4:34:27", "throughput": 2333.98, "total_tokens": 38569728} {"current_steps": 20040, "total_steps": 40000, "loss": 0.0032, "lr": 2.4923423798812272e-05, "epoch": 3.269189982869728, "percentage": 50.1, "elapsed_time": "4:35:27", "remaining_time": "4:34:21", "throughput": 2334.35, "total_tokens": 38580544} {"current_steps": 20045, "total_steps": 40000, "loss": 0.1796, "lr": 2.4913606373981825e-05, "epoch": 3.2700057100905457, "percentage": 50.11, "elapsed_time": "4:35:29", "remaining_time": "4:34:15", "throughput": 2334.68, "total_tokens": 38590880} {"current_steps": 20050, "total_steps": 40000, "loss": 0.0034, "lr": 2.4903788962474357e-05, "epoch": 3.270821437311363, "percentage": 50.12, "elapsed_time": "4:35:31", "remaining_time": "4:34:09", "throughput": 2334.9, "total_tokens": 38599328} {"current_steps": 20055, "total_steps": 40000, "loss": 0.0067, "lr": 2.489397156580385e-05, "epoch": 3.2716371645321805, "percentage": 50.14, "elapsed_time": "4:35:33", "remaining_time": "4:34:02", "throughput": 2335.09, "total_tokens": 38607440} {"current_steps": 20060, "total_steps": 40000, "loss": 0.0579, "lr": 2.4884154185484246e-05, "epoch": 3.272452891752998, "percentage": 50.15, "elapsed_time": "4:35:35", "remaining_time": "4:33:56", "throughput": 2335.38, "total_tokens": 38617024} {"current_steps": 20065, "total_steps": 40000, "loss": 0.0025, "lr": 2.4874336823029526e-05, "epoch": 3.273268618973815, "percentage": 50.16, "elapsed_time": "4:35:37", "remaining_time": "4:33:50", "throughput": 2335.67, "total_tokens": 38626576} {"current_steps": 20070, "total_steps": 40000, "loss": 0.1053, "lr": 2.4864519479953656e-05, "epoch": 3.2740843461946323, "percentage": 50.18, "elapsed_time": "4:35:39", "remaining_time": "4:33:44", "throughput": 2335.93, "total_tokens": 38635760} {"current_steps": 20075, "total_steps": 40000, "loss": 0.0019, "lr": 2.485470215777058e-05, "epoch": 3.27490007341545, "percentage": 50.19, "elapsed_time": "4:35:41", "remaining_time": "4:33:38", "throughput": 2336.13, "total_tokens": 38643952} {"current_steps": 20080, "total_steps": 40000, "loss": 0.0159, "lr": 2.4844884857994258e-05, "epoch": 3.2757158006362674, "percentage": 50.2, "elapsed_time": "4:35:43", "remaining_time": "4:33:32", "throughput": 2336.46, "total_tokens": 38654272} {"current_steps": 20085, "total_steps": 40000, "loss": 0.0643, "lr": 2.4835067582138638e-05, "epoch": 3.2765315278570846, "percentage": 50.21, "elapsed_time": "4:35:46", "remaining_time": "4:33:25", "throughput": 2336.78, "total_tokens": 38664336} {"current_steps": 20090, "total_steps": 40000, "loss": 0.1383, "lr": 2.4825250331717666e-05, "epoch": 3.2773472550779017, "percentage": 50.22, "elapsed_time": "4:35:48", "remaining_time": "4:33:19", "throughput": 2337.08, "total_tokens": 38674128} {"current_steps": 20095, "total_steps": 40000, "loss": 0.0072, "lr": 2.4815433108245298e-05, "epoch": 3.2781629822987193, "percentage": 50.24, "elapsed_time": "4:35:50", "remaining_time": "4:33:13", "throughput": 2337.43, "total_tokens": 38684768} {"current_steps": 20100, "total_steps": 40000, "loss": 0.0584, "lr": 2.4805615913235456e-05, "epoch": 3.278978709519537, "percentage": 50.25, "elapsed_time": "4:35:52", "remaining_time": "4:33:07", "throughput": 2337.69, "total_tokens": 38693904} {"current_steps": 20105, "total_steps": 40000, "loss": 0.0828, "lr": 2.479579874820208e-05, "epoch": 3.279794436740354, "percentage": 50.26, "elapsed_time": "4:35:54", "remaining_time": "4:33:01", "throughput": 2338.05, "total_tokens": 38704752} {"current_steps": 20110, "total_steps": 40000, "loss": 0.0588, "lr": 2.4785981614659115e-05, "epoch": 3.2806101639611716, "percentage": 50.28, "elapsed_time": "4:35:56", "remaining_time": "4:32:55", "throughput": 2338.32, "total_tokens": 38714064} {"current_steps": 20115, "total_steps": 40000, "loss": 0.0603, "lr": 2.477616451412047e-05, "epoch": 3.2814258911819887, "percentage": 50.29, "elapsed_time": "4:35:58", "remaining_time": "4:32:49", "throughput": 2338.67, "total_tokens": 38724768} {"current_steps": 20120, "total_steps": 40000, "loss": 0.0531, "lr": 2.476634744810007e-05, "epoch": 3.2822416184028063, "percentage": 50.3, "elapsed_time": "4:36:00", "remaining_time": "4:32:42", "throughput": 2338.92, "total_tokens": 38733680} {"current_steps": 20125, "total_steps": 40000, "loss": 0.0785, "lr": 2.475653041811183e-05, "epoch": 3.2830573456236234, "percentage": 50.31, "elapsed_time": "4:36:02", "remaining_time": "4:32:36", "throughput": 2339.21, "total_tokens": 38743424} {"current_steps": 20130, "total_steps": 40000, "loss": 0.0005, "lr": 2.4746713425669652e-05, "epoch": 3.283873072844441, "percentage": 50.32, "elapsed_time": "4:36:04", "remaining_time": "4:32:30", "throughput": 2339.55, "total_tokens": 38753760} {"current_steps": 20135, "total_steps": 40000, "loss": 0.0034, "lr": 2.4736896472287458e-05, "epoch": 3.284688800065258, "percentage": 50.34, "elapsed_time": "4:36:06", "remaining_time": "4:32:24", "throughput": 2339.78, "total_tokens": 38762528} {"current_steps": 20140, "total_steps": 40000, "loss": 0.0017, "lr": 2.4727079559479124e-05, "epoch": 3.2855045272860757, "percentage": 50.35, "elapsed_time": "4:36:08", "remaining_time": "4:32:18", "throughput": 2340.03, "total_tokens": 38771472} {"current_steps": 20145, "total_steps": 40000, "loss": 0.1659, "lr": 2.4717262688758557e-05, "epoch": 3.286320254506893, "percentage": 50.36, "elapsed_time": "4:36:10", "remaining_time": "4:32:12", "throughput": 2340.32, "total_tokens": 38781104} {"current_steps": 20150, "total_steps": 40000, "loss": 0.0023, "lr": 2.4707445861639637e-05, "epoch": 3.2871359817277104, "percentage": 50.38, "elapsed_time": "4:36:12", "remaining_time": "4:32:06", "throughput": 2340.58, "total_tokens": 38790208} {"current_steps": 20155, "total_steps": 40000, "loss": 0.149, "lr": 2.4697629079636244e-05, "epoch": 3.2879517089485275, "percentage": 50.39, "elapsed_time": "4:36:15", "remaining_time": "4:32:00", "throughput": 2340.83, "total_tokens": 38799312} {"current_steps": 20160, "total_steps": 40000, "loss": 0.0131, "lr": 2.4687812344262244e-05, "epoch": 3.288767436169345, "percentage": 50.4, "elapsed_time": "4:36:17", "remaining_time": "4:31:53", "throughput": 2341.1, "total_tokens": 38808656} {"current_steps": 20165, "total_steps": 40000, "loss": 0.1419, "lr": 2.46779956570315e-05, "epoch": 3.289583163390162, "percentage": 50.41, "elapsed_time": "4:36:19", "remaining_time": "4:31:47", "throughput": 2341.35, "total_tokens": 38817584} {"current_steps": 20170, "total_steps": 40000, "loss": 0.0646, "lr": 2.466817901945787e-05, "epoch": 3.29039889061098, "percentage": 50.42, "elapsed_time": "4:36:21", "remaining_time": "4:31:41", "throughput": 2341.64, "total_tokens": 38827248} {"current_steps": 20175, "total_steps": 40000, "loss": 0.0073, "lr": 2.4658362433055217e-05, "epoch": 3.291214617831797, "percentage": 50.44, "elapsed_time": "4:36:23", "remaining_time": "4:31:35", "throughput": 2341.92, "total_tokens": 38836720} {"current_steps": 20180, "total_steps": 40000, "loss": 0.1365, "lr": 2.4648545899337356e-05, "epoch": 3.2920303450526145, "percentage": 50.45, "elapsed_time": "4:36:25", "remaining_time": "4:31:29", "throughput": 2342.18, "total_tokens": 38845888} {"current_steps": 20185, "total_steps": 40000, "loss": 0.003, "lr": 2.4638729419818143e-05, "epoch": 3.2928460722734316, "percentage": 50.46, "elapsed_time": "4:36:27", "remaining_time": "4:31:23", "throughput": 2342.46, "total_tokens": 38855408} {"current_steps": 20190, "total_steps": 40000, "loss": 0.0039, "lr": 2.46289129960114e-05, "epoch": 3.293661799494249, "percentage": 50.48, "elapsed_time": "4:36:29", "remaining_time": "4:31:17", "throughput": 2342.79, "total_tokens": 38865744} {"current_steps": 20195, "total_steps": 40000, "loss": 0.0537, "lr": 2.4619096629430924e-05, "epoch": 3.2944775267150663, "percentage": 50.49, "elapsed_time": "4:36:31", "remaining_time": "4:31:11", "throughput": 2343.16, "total_tokens": 38876688} {"current_steps": 20200, "total_steps": 40000, "loss": 0.2017, "lr": 2.4609280321590543e-05, "epoch": 3.295293253935884, "percentage": 50.5, "elapsed_time": "4:36:33", "remaining_time": "4:31:05", "throughput": 2343.41, "total_tokens": 38885696} {"current_steps": 20200, "total_steps": 40000, "eval_loss": 0.20447805523872375, "epoch": 3.295293253935884, "percentage": 50.5, "elapsed_time": "4:37:54", "remaining_time": "4:32:24", "throughput": 2332.05, "total_tokens": 38885696} {"current_steps": 20205, "total_steps": 40000, "loss": 0.0513, "lr": 2.4599464074004037e-05, "epoch": 3.296108981156701, "percentage": 50.51, "elapsed_time": "4:37:58", "remaining_time": "4:32:19", "throughput": 2332.15, "total_tokens": 38895952} {"current_steps": 20210, "total_steps": 40000, "loss": 0.0042, "lr": 2.4589647888185204e-05, "epoch": 3.2969247083775186, "percentage": 50.52, "elapsed_time": "4:38:00", "remaining_time": "4:32:13", "throughput": 2332.41, "total_tokens": 38905152} {"current_steps": 20215, "total_steps": 40000, "loss": 0.0032, "lr": 2.4579831765647836e-05, "epoch": 3.2977404355983357, "percentage": 50.54, "elapsed_time": "4:38:02", "remaining_time": "4:32:07", "throughput": 2332.73, "total_tokens": 38915424} {"current_steps": 20220, "total_steps": 40000, "loss": 0.0027, "lr": 2.4570015707905676e-05, "epoch": 3.2985561628191533, "percentage": 50.55, "elapsed_time": "4:38:04", "remaining_time": "4:32:01", "throughput": 2332.94, "total_tokens": 38923648} {"current_steps": 20225, "total_steps": 40000, "loss": 0.069, "lr": 2.4560199716472508e-05, "epoch": 3.2993718900399704, "percentage": 50.56, "elapsed_time": "4:38:06", "remaining_time": "4:31:55", "throughput": 2333.21, "total_tokens": 38933072} {"current_steps": 20230, "total_steps": 40000, "loss": 0.1926, "lr": 2.455038379286207e-05, "epoch": 3.300187617260788, "percentage": 50.58, "elapsed_time": "4:38:08", "remaining_time": "4:31:49", "throughput": 2333.58, "total_tokens": 38943952} {"current_steps": 20235, "total_steps": 40000, "loss": 0.1319, "lr": 2.4540567938588095e-05, "epoch": 3.3010033444816056, "percentage": 50.59, "elapsed_time": "4:38:10", "remaining_time": "4:31:42", "throughput": 2333.94, "total_tokens": 38954800} {"current_steps": 20240, "total_steps": 40000, "loss": 0.0041, "lr": 2.4530752155164328e-05, "epoch": 3.3018190717024227, "percentage": 50.6, "elapsed_time": "4:38:12", "remaining_time": "4:31:36", "throughput": 2334.25, "total_tokens": 38964880} {"current_steps": 20245, "total_steps": 40000, "loss": 0.0016, "lr": 2.4520936444104463e-05, "epoch": 3.30263479892324, "percentage": 50.61, "elapsed_time": "4:38:14", "remaining_time": "4:31:30", "throughput": 2334.51, "total_tokens": 38974064} {"current_steps": 20250, "total_steps": 40000, "loss": 0.002, "lr": 2.4511120806922218e-05, "epoch": 3.3034505261440574, "percentage": 50.62, "elapsed_time": "4:38:16", "remaining_time": "4:31:24", "throughput": 2334.86, "total_tokens": 38984752} {"current_steps": 20255, "total_steps": 40000, "loss": 0.0128, "lr": 2.45013052451313e-05, "epoch": 3.304266253364875, "percentage": 50.64, "elapsed_time": "4:38:18", "remaining_time": "4:31:18", "throughput": 2335.21, "total_tokens": 38995456} {"current_steps": 20260, "total_steps": 40000, "loss": 0.1157, "lr": 2.4491489760245376e-05, "epoch": 3.305081980585692, "percentage": 50.65, "elapsed_time": "4:38:20", "remaining_time": "4:31:12", "throughput": 2335.42, "total_tokens": 39003808} {"current_steps": 20265, "total_steps": 40000, "loss": 0.0314, "lr": 2.4481674353778115e-05, "epoch": 3.3058977078065093, "percentage": 50.66, "elapsed_time": "4:38:23", "remaining_time": "4:31:06", "throughput": 2335.68, "total_tokens": 39012992} {"current_steps": 20270, "total_steps": 40000, "loss": 0.0999, "lr": 2.447185902724319e-05, "epoch": 3.306713435027327, "percentage": 50.68, "elapsed_time": "4:38:25", "remaining_time": "4:31:00", "throughput": 2335.95, "total_tokens": 39022240} {"current_steps": 20275, "total_steps": 40000, "loss": 0.1569, "lr": 2.4462043782154233e-05, "epoch": 3.3075291622481444, "percentage": 50.69, "elapsed_time": "4:38:27", "remaining_time": "4:30:53", "throughput": 2336.24, "total_tokens": 39031920} {"current_steps": 20280, "total_steps": 40000, "loss": 0.0918, "lr": 2.4452228620024895e-05, "epoch": 3.3083448894689615, "percentage": 50.7, "elapsed_time": "4:38:29", "remaining_time": "4:30:47", "throughput": 2336.53, "total_tokens": 39041632} {"current_steps": 20285, "total_steps": 40000, "loss": 0.0456, "lr": 2.4442413542368776e-05, "epoch": 3.309160616689779, "percentage": 50.71, "elapsed_time": "4:38:31", "remaining_time": "4:30:41", "throughput": 2336.79, "total_tokens": 39050848} {"current_steps": 20290, "total_steps": 40000, "loss": 0.0028, "lr": 2.4432598550699502e-05, "epoch": 3.3099763439105963, "percentage": 50.72, "elapsed_time": "4:38:33", "remaining_time": "4:30:35", "throughput": 2337.12, "total_tokens": 39061184} {"current_steps": 20295, "total_steps": 40000, "loss": 0.0303, "lr": 2.4422783646530663e-05, "epoch": 3.310792071131414, "percentage": 50.74, "elapsed_time": "4:38:35", "remaining_time": "4:30:29", "throughput": 2337.41, "total_tokens": 39070816} {"current_steps": 20300, "total_steps": 40000, "loss": 0.1267, "lr": 2.441296883137584e-05, "epoch": 3.311607798352231, "percentage": 50.75, "elapsed_time": "4:38:37", "remaining_time": "4:30:23", "throughput": 2337.75, "total_tokens": 39081312} {"current_steps": 20305, "total_steps": 40000, "loss": 0.029, "lr": 2.4403154106748592e-05, "epoch": 3.3124235255730485, "percentage": 50.76, "elapsed_time": "4:38:39", "remaining_time": "4:30:17", "throughput": 2337.99, "total_tokens": 39090192} {"current_steps": 20310, "total_steps": 40000, "loss": 0.0041, "lr": 2.4393339474162494e-05, "epoch": 3.3132392527938657, "percentage": 50.78, "elapsed_time": "4:38:41", "remaining_time": "4:30:11", "throughput": 2338.25, "total_tokens": 39099488} {"current_steps": 20315, "total_steps": 40000, "loss": 0.0566, "lr": 2.4383524935131062e-05, "epoch": 3.3140549800146832, "percentage": 50.79, "elapsed_time": "4:38:43", "remaining_time": "4:30:05", "throughput": 2338.54, "total_tokens": 39109104} {"current_steps": 20320, "total_steps": 40000, "loss": 0.0094, "lr": 2.437371049116784e-05, "epoch": 3.3148707072355004, "percentage": 50.8, "elapsed_time": "4:38:45", "remaining_time": "4:29:59", "throughput": 2338.84, "total_tokens": 39119088} {"current_steps": 20325, "total_steps": 40000, "loss": 0.003, "lr": 2.436389614378632e-05, "epoch": 3.315686434456318, "percentage": 50.81, "elapsed_time": "4:38:47", "remaining_time": "4:29:52", "throughput": 2339.15, "total_tokens": 39129040} {"current_steps": 20330, "total_steps": 40000, "loss": 0.0198, "lr": 2.435408189450002e-05, "epoch": 3.316502161677135, "percentage": 50.82, "elapsed_time": "4:38:49", "remaining_time": "4:29:46", "throughput": 2339.42, "total_tokens": 39138416} {"current_steps": 20335, "total_steps": 40000, "loss": 0.0015, "lr": 2.4344267744822406e-05, "epoch": 3.3173178888979526, "percentage": 50.84, "elapsed_time": "4:38:52", "remaining_time": "4:29:40", "throughput": 2339.73, "total_tokens": 39148384} {"current_steps": 20340, "total_steps": 40000, "loss": 0.0386, "lr": 2.4334453696266944e-05, "epoch": 3.31813361611877, "percentage": 50.85, "elapsed_time": "4:38:54", "remaining_time": "4:29:34", "throughput": 2340.0, "total_tokens": 39157840} {"current_steps": 20345, "total_steps": 40000, "loss": 0.1552, "lr": 2.432463975034708e-05, "epoch": 3.3189493433395874, "percentage": 50.86, "elapsed_time": "4:38:56", "remaining_time": "4:29:28", "throughput": 2340.31, "total_tokens": 39167728} {"current_steps": 20350, "total_steps": 40000, "loss": 0.1798, "lr": 2.4314825908576265e-05, "epoch": 3.3197650705604045, "percentage": 50.88, "elapsed_time": "4:38:58", "remaining_time": "4:29:22", "throughput": 2340.56, "total_tokens": 39176752} {"current_steps": 20355, "total_steps": 40000, "loss": 0.0566, "lr": 2.4305012172467897e-05, "epoch": 3.320580797781222, "percentage": 50.89, "elapsed_time": "4:39:00", "remaining_time": "4:29:16", "throughput": 2340.85, "total_tokens": 39186480} {"current_steps": 20360, "total_steps": 40000, "loss": 0.0897, "lr": 2.4295198543535393e-05, "epoch": 3.321396525002039, "percentage": 50.9, "elapsed_time": "4:39:02", "remaining_time": "4:29:10", "throughput": 2341.16, "total_tokens": 39196512} {"current_steps": 20365, "total_steps": 40000, "loss": 0.0455, "lr": 2.4285385023292124e-05, "epoch": 3.3222122522228568, "percentage": 50.91, "elapsed_time": "4:39:04", "remaining_time": "4:29:04", "throughput": 2341.39, "total_tokens": 39205312} {"current_steps": 20370, "total_steps": 40000, "loss": 0.0424, "lr": 2.427557161325147e-05, "epoch": 3.323027979443674, "percentage": 50.92, "elapsed_time": "4:39:06", "remaining_time": "4:28:58", "throughput": 2341.67, "total_tokens": 39214880} {"current_steps": 20375, "total_steps": 40000, "loss": 0.0708, "lr": 2.4265758314926778e-05, "epoch": 3.3238437066644915, "percentage": 50.94, "elapsed_time": "4:39:08", "remaining_time": "4:28:52", "throughput": 2342.02, "total_tokens": 39225520} {"current_steps": 20380, "total_steps": 40000, "loss": 0.0028, "lr": 2.4255945129831373e-05, "epoch": 3.3246594338853086, "percentage": 50.95, "elapsed_time": "4:39:10", "remaining_time": "4:28:45", "throughput": 2342.3, "total_tokens": 39235072} {"current_steps": 20385, "total_steps": 40000, "loss": 0.0711, "lr": 2.4246132059478578e-05, "epoch": 3.325475161106126, "percentage": 50.96, "elapsed_time": "4:39:12", "remaining_time": "4:28:39", "throughput": 2342.58, "total_tokens": 39244592} {"current_steps": 20390, "total_steps": 40000, "loss": 0.0032, "lr": 2.4236319105381706e-05, "epoch": 3.3262908883269433, "percentage": 50.98, "elapsed_time": "4:39:14", "remaining_time": "4:28:33", "throughput": 2342.76, "total_tokens": 39252432} {"current_steps": 20395, "total_steps": 40000, "loss": 0.1068, "lr": 2.422650626905401e-05, "epoch": 3.327106615547761, "percentage": 50.99, "elapsed_time": "4:39:16", "remaining_time": "4:28:27", "throughput": 2343.01, "total_tokens": 39261520} {"current_steps": 20400, "total_steps": 40000, "loss": 0.0441, "lr": 2.4216693552008785e-05, "epoch": 3.327922342768578, "percentage": 51.0, "elapsed_time": "4:39:18", "remaining_time": "4:28:21", "throughput": 2343.25, "total_tokens": 39270320} {"current_steps": 20400, "total_steps": 40000, "eval_loss": 0.20466327667236328, "epoch": 3.327922342768578, "percentage": 51.0, "elapsed_time": "4:40:39", "remaining_time": "4:29:39", "throughput": 2332.0, "total_tokens": 39270320} {"current_steps": 20405, "total_steps": 40000, "loss": 0.0574, "lr": 2.4206880955759247e-05, "epoch": 3.3287380699893956, "percentage": 51.01, "elapsed_time": "4:40:43", "remaining_time": "4:29:35", "throughput": 2332.05, "total_tokens": 39280288} {"current_steps": 20410, "total_steps": 40000, "loss": 0.073, "lr": 2.419706848181863e-05, "epoch": 3.3295537972102127, "percentage": 51.02, "elapsed_time": "4:40:45", "remaining_time": "4:29:28", "throughput": 2332.35, "total_tokens": 39290240} {"current_steps": 20415, "total_steps": 40000, "loss": 0.002, "lr": 2.4187256131700153e-05, "epoch": 3.3303695244310303, "percentage": 51.04, "elapsed_time": "4:40:47", "remaining_time": "4:29:22", "throughput": 2332.63, "total_tokens": 39299616} {"current_steps": 20420, "total_steps": 40000, "loss": 0.0677, "lr": 2.4177443906916985e-05, "epoch": 3.3311852516518474, "percentage": 51.05, "elapsed_time": "4:40:49", "remaining_time": "4:29:16", "throughput": 2332.95, "total_tokens": 39309920} {"current_steps": 20425, "total_steps": 40000, "loss": 0.0704, "lr": 2.4167631808982303e-05, "epoch": 3.332000978872665, "percentage": 51.06, "elapsed_time": "4:40:51", "remaining_time": "4:29:10", "throughput": 2333.25, "total_tokens": 39319824} {"current_steps": 20430, "total_steps": 40000, "loss": 0.0022, "lr": 2.4157819839409264e-05, "epoch": 3.3328167060934826, "percentage": 51.08, "elapsed_time": "4:40:54", "remaining_time": "4:29:04", "throughput": 2333.57, "total_tokens": 39330032} {"current_steps": 20435, "total_steps": 40000, "loss": 0.002, "lr": 2.414800799971098e-05, "epoch": 3.3336324333142997, "percentage": 51.09, "elapsed_time": "4:40:56", "remaining_time": "4:28:58", "throughput": 2333.84, "total_tokens": 39339408} {"current_steps": 20440, "total_steps": 40000, "loss": 0.0209, "lr": 2.4138196291400582e-05, "epoch": 3.334448160535117, "percentage": 51.1, "elapsed_time": "4:40:58", "remaining_time": "4:28:52", "throughput": 2334.16, "total_tokens": 39349616} {"current_steps": 20445, "total_steps": 40000, "loss": 0.0022, "lr": 2.412838471599114e-05, "epoch": 3.3352638877559344, "percentage": 51.11, "elapsed_time": "4:41:00", "remaining_time": "4:28:46", "throughput": 2334.45, "total_tokens": 39359312} {"current_steps": 20450, "total_steps": 40000, "loss": 0.0021, "lr": 2.411857327499572e-05, "epoch": 3.336079614976752, "percentage": 51.12, "elapsed_time": "4:41:02", "remaining_time": "4:28:40", "throughput": 2334.68, "total_tokens": 39368080} {"current_steps": 20455, "total_steps": 40000, "loss": 0.0013, "lr": 2.410876196992739e-05, "epoch": 3.336895342197569, "percentage": 51.14, "elapsed_time": "4:41:04", "remaining_time": "4:28:34", "throughput": 2334.98, "total_tokens": 39377952} {"current_steps": 20460, "total_steps": 40000, "loss": 0.0223, "lr": 2.4098950802299156e-05, "epoch": 3.3377110694183862, "percentage": 51.15, "elapsed_time": "4:41:06", "remaining_time": "4:28:28", "throughput": 2335.29, "total_tokens": 39388064} {"current_steps": 20465, "total_steps": 40000, "loss": 0.0875, "lr": 2.4089139773624027e-05, "epoch": 3.338526796639204, "percentage": 51.16, "elapsed_time": "4:41:08", "remaining_time": "4:28:21", "throughput": 2335.64, "total_tokens": 39398704} {"current_steps": 20470, "total_steps": 40000, "loss": 0.0742, "lr": 2.4079328885415007e-05, "epoch": 3.3393425238600214, "percentage": 51.18, "elapsed_time": "4:41:10", "remaining_time": "4:28:15", "throughput": 2335.89, "total_tokens": 39407776} {"current_steps": 20475, "total_steps": 40000, "loss": 0.0729, "lr": 2.4069518139185036e-05, "epoch": 3.3401582510808385, "percentage": 51.19, "elapsed_time": "4:41:12", "remaining_time": "4:28:09", "throughput": 2336.19, "total_tokens": 39417776} {"current_steps": 20480, "total_steps": 40000, "loss": 0.0501, "lr": 2.405970753644706e-05, "epoch": 3.340973978301656, "percentage": 51.2, "elapsed_time": "4:41:14", "remaining_time": "4:28:03", "throughput": 2336.5, "total_tokens": 39427776} {"current_steps": 20485, "total_steps": 40000, "loss": 0.0385, "lr": 2.4049897078714e-05, "epoch": 3.3417897055224732, "percentage": 51.21, "elapsed_time": "4:41:16", "remaining_time": "4:27:57", "throughput": 2336.79, "total_tokens": 39437584} {"current_steps": 20490, "total_steps": 40000, "loss": 0.0484, "lr": 2.404008676749874e-05, "epoch": 3.342605432743291, "percentage": 51.23, "elapsed_time": "4:41:18", "remaining_time": "4:27:51", "throughput": 2337.13, "total_tokens": 39448064} {"current_steps": 20495, "total_steps": 40000, "loss": 0.0028, "lr": 2.403027660431418e-05, "epoch": 3.343421159964108, "percentage": 51.24, "elapsed_time": "4:41:20", "remaining_time": "4:27:45", "throughput": 2337.41, "total_tokens": 39457712} {"current_steps": 20500, "total_steps": 40000, "loss": 0.0621, "lr": 2.402046659067314e-05, "epoch": 3.3442368871849255, "percentage": 51.25, "elapsed_time": "4:41:23", "remaining_time": "4:27:39", "throughput": 2337.72, "total_tokens": 39467744} {"current_steps": 20505, "total_steps": 40000, "loss": 0.0955, "lr": 2.401065672808847e-05, "epoch": 3.3450526144057426, "percentage": 51.26, "elapsed_time": "4:41:25", "remaining_time": "4:27:33", "throughput": 2337.99, "total_tokens": 39477152} {"current_steps": 20510, "total_steps": 40000, "loss": 0.0006, "lr": 2.400084701807296e-05, "epoch": 3.34586834162656, "percentage": 51.28, "elapsed_time": "4:41:27", "remaining_time": "4:27:27", "throughput": 2338.38, "total_tokens": 39488576} {"current_steps": 20515, "total_steps": 40000, "loss": 0.1154, "lr": 2.39910374621394e-05, "epoch": 3.3466840688473773, "percentage": 51.29, "elapsed_time": "4:41:29", "remaining_time": "4:27:21", "throughput": 2338.68, "total_tokens": 39498416} {"current_steps": 20520, "total_steps": 40000, "loss": 0.1114, "lr": 2.3981228061800544e-05, "epoch": 3.347499796068195, "percentage": 51.3, "elapsed_time": "4:41:31", "remaining_time": "4:27:15", "throughput": 2338.94, "total_tokens": 39507712} {"current_steps": 20525, "total_steps": 40000, "loss": 0.0013, "lr": 2.3971418818569115e-05, "epoch": 3.348315523289012, "percentage": 51.31, "elapsed_time": "4:41:33", "remaining_time": "4:27:09", "throughput": 2339.21, "total_tokens": 39517184} {"current_steps": 20530, "total_steps": 40000, "loss": 0.0625, "lr": 2.3961609733957832e-05, "epoch": 3.3491312505098296, "percentage": 51.32, "elapsed_time": "4:41:35", "remaining_time": "4:27:03", "throughput": 2339.48, "total_tokens": 39526576} {"current_steps": 20535, "total_steps": 40000, "loss": 0.0942, "lr": 2.395180080947939e-05, "epoch": 3.3499469777306468, "percentage": 51.34, "elapsed_time": "4:41:37", "remaining_time": "4:26:57", "throughput": 2339.77, "total_tokens": 39536288} {"current_steps": 20540, "total_steps": 40000, "loss": 0.0693, "lr": 2.394199204664642e-05, "epoch": 3.3507627049514643, "percentage": 51.35, "elapsed_time": "4:41:39", "remaining_time": "4:26:50", "throughput": 2340.11, "total_tokens": 39546880} {"current_steps": 20545, "total_steps": 40000, "loss": 0.0568, "lr": 2.3932183446971583e-05, "epoch": 3.3515784321722815, "percentage": 51.36, "elapsed_time": "4:41:41", "remaining_time": "4:26:44", "throughput": 2340.39, "total_tokens": 39556448} {"current_steps": 20550, "total_steps": 40000, "loss": 0.1404, "lr": 2.3922375011967473e-05, "epoch": 3.352394159393099, "percentage": 51.38, "elapsed_time": "4:41:43", "remaining_time": "4:26:38", "throughput": 2340.71, "total_tokens": 39566736} {"current_steps": 20555, "total_steps": 40000, "loss": 0.0231, "lr": 2.3912566743146676e-05, "epoch": 3.353209886613916, "percentage": 51.39, "elapsed_time": "4:41:45", "remaining_time": "4:26:32", "throughput": 2341.06, "total_tokens": 39577472} {"current_steps": 20560, "total_steps": 40000, "loss": 0.0529, "lr": 2.390275864202176e-05, "epoch": 3.3540256138347337, "percentage": 51.4, "elapsed_time": "4:41:47", "remaining_time": "4:26:26", "throughput": 2341.32, "total_tokens": 39586736} {"current_steps": 20565, "total_steps": 40000, "loss": 0.1349, "lr": 2.3892950710105243e-05, "epoch": 3.354841341055551, "percentage": 51.41, "elapsed_time": "4:41:49", "remaining_time": "4:26:20", "throughput": 2341.62, "total_tokens": 39596640} {"current_steps": 20570, "total_steps": 40000, "loss": 0.128, "lr": 2.3883142948909635e-05, "epoch": 3.3556570682763684, "percentage": 51.42, "elapsed_time": "4:41:51", "remaining_time": "4:26:14", "throughput": 2341.93, "total_tokens": 39606800} {"current_steps": 20575, "total_steps": 40000, "loss": 0.0448, "lr": 2.3873335359947433e-05, "epoch": 3.3564727954971856, "percentage": 51.44, "elapsed_time": "4:41:54", "remaining_time": "4:26:08", "throughput": 2342.18, "total_tokens": 39615808} {"current_steps": 20580, "total_steps": 40000, "loss": 0.0441, "lr": 2.3863527944731066e-05, "epoch": 3.357288522718003, "percentage": 51.45, "elapsed_time": "4:41:56", "remaining_time": "4:26:02", "throughput": 2342.5, "total_tokens": 39625984} {"current_steps": 20585, "total_steps": 40000, "loss": 0.0158, "lr": 2.385372070477298e-05, "epoch": 3.3581042499388203, "percentage": 51.46, "elapsed_time": "4:41:58", "remaining_time": "4:25:56", "throughput": 2342.85, "total_tokens": 39636832} {"current_steps": 20590, "total_steps": 40000, "loss": 0.0521, "lr": 2.384391364158556e-05, "epoch": 3.358919977159638, "percentage": 51.48, "elapsed_time": "4:42:00", "remaining_time": "4:25:50", "throughput": 2343.13, "total_tokens": 39646416} {"current_steps": 20595, "total_steps": 40000, "loss": 0.0065, "lr": 2.3834106756681185e-05, "epoch": 3.359735704380455, "percentage": 51.49, "elapsed_time": "4:42:02", "remaining_time": "4:25:44", "throughput": 2343.42, "total_tokens": 39656128} {"current_steps": 20600, "total_steps": 40000, "loss": 0.0007, "lr": 2.3824300051572206e-05, "epoch": 3.3605514316012726, "percentage": 51.5, "elapsed_time": "4:42:04", "remaining_time": "4:25:38", "throughput": 2343.68, "total_tokens": 39665472} {"current_steps": 20600, "total_steps": 40000, "eval_loss": 0.213675394654274, "epoch": 3.3605514316012726, "percentage": 51.5, "elapsed_time": "4:43:25", "remaining_time": "4:26:54", "throughput": 2332.56, "total_tokens": 39665472} {"current_steps": 20605, "total_steps": 40000, "loss": 0.0064, "lr": 2.3814493527770923e-05, "epoch": 3.36136715882209, "percentage": 51.51, "elapsed_time": "4:43:29", "remaining_time": "4:26:50", "throughput": 2332.57, "total_tokens": 39674736} {"current_steps": 20610, "total_steps": 40000, "loss": 0.0882, "lr": 2.3804687186789637e-05, "epoch": 3.3621828860429073, "percentage": 51.52, "elapsed_time": "4:43:31", "remaining_time": "4:26:44", "throughput": 2332.88, "total_tokens": 39684928} {"current_steps": 20615, "total_steps": 40000, "loss": 0.0014, "lr": 2.379488103014062e-05, "epoch": 3.3629986132637244, "percentage": 51.54, "elapsed_time": "4:43:33", "remaining_time": "4:26:38", "throughput": 2333.1, "total_tokens": 39693504} {"current_steps": 20620, "total_steps": 40000, "loss": 0.0074, "lr": 2.3785075059336086e-05, "epoch": 3.363814340484542, "percentage": 51.55, "elapsed_time": "4:43:35", "remaining_time": "4:26:32", "throughput": 2333.41, "total_tokens": 39703632} {"current_steps": 20625, "total_steps": 40000, "loss": 0.0835, "lr": 2.3775269275888248e-05, "epoch": 3.3646300677053596, "percentage": 51.56, "elapsed_time": "4:43:37", "remaining_time": "4:26:25", "throughput": 2333.78, "total_tokens": 39714736} {"current_steps": 20630, "total_steps": 40000, "loss": 0.0676, "lr": 2.3765463681309274e-05, "epoch": 3.3654457949261767, "percentage": 51.58, "elapsed_time": "4:43:39", "remaining_time": "4:26:19", "throughput": 2334.03, "total_tokens": 39723840} {"current_steps": 20635, "total_steps": 40000, "loss": 0.001, "lr": 2.3755658277111313e-05, "epoch": 3.366261522146994, "percentage": 51.59, "elapsed_time": "4:43:41", "remaining_time": "4:26:13", "throughput": 2334.34, "total_tokens": 39733888} {"current_steps": 20640, "total_steps": 40000, "loss": 0.1029, "lr": 2.374585306480649e-05, "epoch": 3.3670772493678114, "percentage": 51.6, "elapsed_time": "4:43:43", "remaining_time": "4:26:07", "throughput": 2334.66, "total_tokens": 39744208} {"current_steps": 20645, "total_steps": 40000, "loss": 0.1377, "lr": 2.3736048045906877e-05, "epoch": 3.367892976588629, "percentage": 51.61, "elapsed_time": "4:43:45", "remaining_time": "4:26:01", "throughput": 2334.85, "total_tokens": 39752192} {"current_steps": 20650, "total_steps": 40000, "loss": 0.1548, "lr": 2.372624322192454e-05, "epoch": 3.368708703809446, "percentage": 51.62, "elapsed_time": "4:43:47", "remaining_time": "4:25:55", "throughput": 2335.15, "total_tokens": 39762288} {"current_steps": 20655, "total_steps": 40000, "loss": 0.1041, "lr": 2.3716438594371516e-05, "epoch": 3.3695244310302637, "percentage": 51.64, "elapsed_time": "4:43:49", "remaining_time": "4:25:49", "throughput": 2335.46, "total_tokens": 39772256} {"current_steps": 20660, "total_steps": 40000, "loss": 0.0174, "lr": 2.3706634164759784e-05, "epoch": 3.370340158251081, "percentage": 51.65, "elapsed_time": "4:43:51", "remaining_time": "4:25:43", "throughput": 2335.75, "total_tokens": 39782128} {"current_steps": 20665, "total_steps": 40000, "loss": 0.0013, "lr": 2.3696829934601323e-05, "epoch": 3.3711558854718984, "percentage": 51.66, "elapsed_time": "4:43:53", "remaining_time": "4:25:37", "throughput": 2336.03, "total_tokens": 39791696} {"current_steps": 20670, "total_steps": 40000, "loss": 0.0099, "lr": 2.3687025905408053e-05, "epoch": 3.3719716126927155, "percentage": 51.68, "elapsed_time": "4:43:55", "remaining_time": "4:25:31", "throughput": 2336.31, "total_tokens": 39801280} {"current_steps": 20675, "total_steps": 40000, "loss": 0.0878, "lr": 2.3677222078691886e-05, "epoch": 3.372787339913533, "percentage": 51.69, "elapsed_time": "4:43:58", "remaining_time": "4:25:25", "throughput": 2336.62, "total_tokens": 39811456} {"current_steps": 20680, "total_steps": 40000, "loss": 0.0653, "lr": 2.366741845596471e-05, "epoch": 3.37360306713435, "percentage": 51.7, "elapsed_time": "4:44:00", "remaining_time": "4:25:19", "throughput": 2336.89, "total_tokens": 39820880} {"current_steps": 20685, "total_steps": 40000, "loss": 0.0562, "lr": 2.3657615038738343e-05, "epoch": 3.374418794355168, "percentage": 51.71, "elapsed_time": "4:44:02", "remaining_time": "4:25:13", "throughput": 2337.23, "total_tokens": 39831504} {"current_steps": 20690, "total_steps": 40000, "loss": 0.0023, "lr": 2.3647811828524614e-05, "epoch": 3.375234521575985, "percentage": 51.73, "elapsed_time": "4:44:04", "remaining_time": "4:25:07", "throughput": 2337.56, "total_tokens": 39842032} {"current_steps": 20695, "total_steps": 40000, "loss": 0.013, "lr": 2.363800882683529e-05, "epoch": 3.3760502487968025, "percentage": 51.74, "elapsed_time": "4:44:06", "remaining_time": "4:25:01", "throughput": 2337.91, "total_tokens": 39852848} {"current_steps": 20700, "total_steps": 40000, "loss": 0.0018, "lr": 2.3628206035182125e-05, "epoch": 3.3768659760176196, "percentage": 51.75, "elapsed_time": "4:44:08", "remaining_time": "4:24:55", "throughput": 2338.18, "total_tokens": 39862288} {"current_steps": 20705, "total_steps": 40000, "loss": 0.0253, "lr": 2.361840345507683e-05, "epoch": 3.377681703238437, "percentage": 51.76, "elapsed_time": "4:44:10", "remaining_time": "4:24:49", "throughput": 2338.48, "total_tokens": 39872208} {"current_steps": 20710, "total_steps": 40000, "loss": 0.1156, "lr": 2.3608601088031073e-05, "epoch": 3.3784974304592543, "percentage": 51.78, "elapsed_time": "4:44:12", "remaining_time": "4:24:43", "throughput": 2338.77, "total_tokens": 39881936} {"current_steps": 20715, "total_steps": 40000, "loss": 0.0112, "lr": 2.3598798935556516e-05, "epoch": 3.379313157680072, "percentage": 51.79, "elapsed_time": "4:44:14", "remaining_time": "4:24:37", "throughput": 2339.04, "total_tokens": 39891376} {"current_steps": 20720, "total_steps": 40000, "loss": 0.0006, "lr": 2.3588996999164784e-05, "epoch": 3.380128884900889, "percentage": 51.8, "elapsed_time": "4:44:16", "remaining_time": "4:24:31", "throughput": 2339.25, "total_tokens": 39899920} {"current_steps": 20725, "total_steps": 40000, "loss": 0.0121, "lr": 2.3579195280367434e-05, "epoch": 3.3809446121217066, "percentage": 51.81, "elapsed_time": "4:44:18", "remaining_time": "4:24:25", "throughput": 2339.55, "total_tokens": 39909840} {"current_steps": 20730, "total_steps": 40000, "loss": 0.0102, "lr": 2.356939378067603e-05, "epoch": 3.3817603393425237, "percentage": 51.82, "elapsed_time": "4:44:20", "remaining_time": "4:24:19", "throughput": 2339.88, "total_tokens": 39920352} {"current_steps": 20735, "total_steps": 40000, "loss": 0.0004, "lr": 2.3559592501602092e-05, "epoch": 3.3825760665633413, "percentage": 51.84, "elapsed_time": "4:44:22", "remaining_time": "4:24:13", "throughput": 2340.16, "total_tokens": 39930032} {"current_steps": 20740, "total_steps": 40000, "loss": 0.0517, "lr": 2.3549791444657076e-05, "epoch": 3.3833917937841584, "percentage": 51.85, "elapsed_time": "4:44:24", "remaining_time": "4:24:07", "throughput": 2340.41, "total_tokens": 39939104} {"current_steps": 20745, "total_steps": 40000, "loss": 0.001, "lr": 2.353999061135246e-05, "epoch": 3.384207521004976, "percentage": 51.86, "elapsed_time": "4:44:27", "remaining_time": "4:24:01", "throughput": 2340.64, "total_tokens": 39947920} {"current_steps": 20750, "total_steps": 40000, "loss": 0.0011, "lr": 2.3530190003199626e-05, "epoch": 3.385023248225793, "percentage": 51.88, "elapsed_time": "4:44:29", "remaining_time": "4:23:55", "throughput": 2340.87, "total_tokens": 39956640} {"current_steps": 20755, "total_steps": 40000, "loss": 0.0847, "lr": 2.3520389621709965e-05, "epoch": 3.3858389754466107, "percentage": 51.89, "elapsed_time": "4:44:31", "remaining_time": "4:23:49", "throughput": 2341.2, "total_tokens": 39967072} {"current_steps": 20760, "total_steps": 40000, "loss": 0.0022, "lr": 2.351058946839483e-05, "epoch": 3.386654702667428, "percentage": 51.9, "elapsed_time": "4:44:33", "remaining_time": "4:23:43", "throughput": 2341.48, "total_tokens": 39976832} {"current_steps": 20765, "total_steps": 40000, "loss": 0.0014, "lr": 2.350078954476551e-05, "epoch": 3.3874704298882454, "percentage": 51.91, "elapsed_time": "4:44:35", "remaining_time": "4:23:37", "throughput": 2341.79, "total_tokens": 39986816} {"current_steps": 20770, "total_steps": 40000, "loss": 0.1399, "lr": 2.3490989852333272e-05, "epoch": 3.3882861571090626, "percentage": 51.92, "elapsed_time": "4:44:37", "remaining_time": "4:23:31", "throughput": 2341.97, "total_tokens": 39994800} {"current_steps": 20775, "total_steps": 40000, "loss": 0.0819, "lr": 2.3481190392609377e-05, "epoch": 3.38910188432988, "percentage": 51.94, "elapsed_time": "4:44:39", "remaining_time": "4:23:25", "throughput": 2342.22, "total_tokens": 40003968} {"current_steps": 20780, "total_steps": 40000, "loss": 0.0662, "lr": 2.3471391167105e-05, "epoch": 3.3899176115506973, "percentage": 51.95, "elapsed_time": "4:44:41", "remaining_time": "4:23:19", "throughput": 2342.43, "total_tokens": 40012352} {"current_steps": 20785, "total_steps": 40000, "loss": 0.1087, "lr": 2.3461592177331325e-05, "epoch": 3.390733338771515, "percentage": 51.96, "elapsed_time": "4:44:43", "remaining_time": "4:23:13", "throughput": 2342.65, "total_tokens": 40020928} {"current_steps": 20790, "total_steps": 40000, "loss": 0.1017, "lr": 2.345179342479946e-05, "epoch": 3.391549065992332, "percentage": 51.98, "elapsed_time": "4:44:45", "remaining_time": "4:23:07", "throughput": 2342.96, "total_tokens": 40031184} {"current_steps": 20795, "total_steps": 40000, "loss": 0.0799, "lr": 2.3441994911020503e-05, "epoch": 3.3923647932131495, "percentage": 51.99, "elapsed_time": "4:44:47", "remaining_time": "4:23:01", "throughput": 2343.24, "total_tokens": 40040784} {"current_steps": 20800, "total_steps": 40000, "loss": 0.0745, "lr": 2.3432196637505522e-05, "epoch": 3.393180520433967, "percentage": 52.0, "elapsed_time": "4:44:49", "remaining_time": "4:22:55", "throughput": 2343.48, "total_tokens": 40049680} {"current_steps": 20800, "total_steps": 40000, "eval_loss": 0.21692736446857452, "epoch": 3.393180520433967, "percentage": 52.0, "elapsed_time": "4:46:10", "remaining_time": "4:24:09", "throughput": 2332.45, "total_tokens": 40049680} {"current_steps": 20805, "total_steps": 40000, "loss": 0.0826, "lr": 2.3422398605765515e-05, "epoch": 3.3939962476547842, "percentage": 52.01, "elapsed_time": "4:46:14", "remaining_time": "4:24:05", "throughput": 2332.48, "total_tokens": 40058896} {"current_steps": 20810, "total_steps": 40000, "loss": 0.1913, "lr": 2.3412600817311462e-05, "epoch": 3.3948119748756014, "percentage": 52.02, "elapsed_time": "4:46:16", "remaining_time": "4:23:59", "throughput": 2332.78, "total_tokens": 40068944} {"current_steps": 20815, "total_steps": 40000, "loss": 0.0026, "lr": 2.3402803273654326e-05, "epoch": 3.395627702096419, "percentage": 52.04, "elapsed_time": "4:46:18", "remaining_time": "4:23:53", "throughput": 2333.03, "total_tokens": 40078144} {"current_steps": 20820, "total_steps": 40000, "loss": 0.1525, "lr": 2.3393005976304983e-05, "epoch": 3.3964434293172365, "percentage": 52.05, "elapsed_time": "4:46:20", "remaining_time": "4:23:47", "throughput": 2333.32, "total_tokens": 40087888} {"current_steps": 20825, "total_steps": 40000, "loss": 0.0029, "lr": 2.338320892677432e-05, "epoch": 3.3972591565380537, "percentage": 52.06, "elapsed_time": "4:46:22", "remaining_time": "4:23:41", "throughput": 2333.63, "total_tokens": 40098096} {"current_steps": 20830, "total_steps": 40000, "loss": 0.129, "lr": 2.3373412126573155e-05, "epoch": 3.398074883758871, "percentage": 52.08, "elapsed_time": "4:46:24", "remaining_time": "4:23:35", "throughput": 2333.97, "total_tokens": 40108816} {"current_steps": 20835, "total_steps": 40000, "loss": 0.0027, "lr": 2.3363615577212285e-05, "epoch": 3.3988906109796884, "percentage": 52.09, "elapsed_time": "4:46:26", "remaining_time": "4:23:29", "throughput": 2334.22, "total_tokens": 40117888} {"current_steps": 20840, "total_steps": 40000, "loss": 0.0077, "lr": 2.3353819280202455e-05, "epoch": 3.399706338200506, "percentage": 52.1, "elapsed_time": "4:46:28", "remaining_time": "4:23:23", "throughput": 2334.43, "total_tokens": 40126368} {"current_steps": 20845, "total_steps": 40000, "loss": 0.0015, "lr": 2.334402323705438e-05, "epoch": 3.400522065421323, "percentage": 52.11, "elapsed_time": "4:46:31", "remaining_time": "4:23:17", "throughput": 2334.69, "total_tokens": 40135728} {"current_steps": 20850, "total_steps": 40000, "loss": 0.0683, "lr": 2.3334227449278725e-05, "epoch": 3.4013377926421406, "percentage": 52.12, "elapsed_time": "4:46:33", "remaining_time": "4:23:11", "throughput": 2334.98, "total_tokens": 40145504} {"current_steps": 20855, "total_steps": 40000, "loss": 0.1568, "lr": 2.3324431918386143e-05, "epoch": 3.4021535198629578, "percentage": 52.14, "elapsed_time": "4:46:35", "remaining_time": "4:23:05", "throughput": 2335.27, "total_tokens": 40155312} {"current_steps": 20860, "total_steps": 40000, "loss": 0.086, "lr": 2.3314636645887207e-05, "epoch": 3.4029692470837754, "percentage": 52.15, "elapsed_time": "4:46:37", "remaining_time": "4:22:59", "throughput": 2335.48, "total_tokens": 40163840} {"current_steps": 20865, "total_steps": 40000, "loss": 0.0051, "lr": 2.3304841633292487e-05, "epoch": 3.4037849743045925, "percentage": 52.16, "elapsed_time": "4:46:39", "remaining_time": "4:22:53", "throughput": 2335.8, "total_tokens": 40174128} {"current_steps": 20870, "total_steps": 40000, "loss": 0.0528, "lr": 2.329504688211248e-05, "epoch": 3.40460070152541, "percentage": 52.18, "elapsed_time": "4:46:41", "remaining_time": "4:22:47", "throughput": 2336.0, "total_tokens": 40182416} {"current_steps": 20875, "total_steps": 40000, "loss": 0.0487, "lr": 2.3285252393857677e-05, "epoch": 3.405416428746227, "percentage": 52.19, "elapsed_time": "4:46:43", "remaining_time": "4:22:41", "throughput": 2336.33, "total_tokens": 40192848} {"current_steps": 20880, "total_steps": 40000, "loss": 0.0024, "lr": 2.327545817003851e-05, "epoch": 3.4062321559670448, "percentage": 52.2, "elapsed_time": "4:46:45", "remaining_time": "4:22:35", "throughput": 2336.59, "total_tokens": 40202240} {"current_steps": 20885, "total_steps": 40000, "loss": 0.1604, "lr": 2.326566421216535e-05, "epoch": 3.407047883187862, "percentage": 52.21, "elapsed_time": "4:46:47", "remaining_time": "4:22:29", "throughput": 2336.86, "total_tokens": 40211760} {"current_steps": 20890, "total_steps": 40000, "loss": 0.0984, "lr": 2.3255870521748565e-05, "epoch": 3.4078636104086795, "percentage": 52.23, "elapsed_time": "4:46:49", "remaining_time": "4:22:23", "throughput": 2337.16, "total_tokens": 40221808} {"current_steps": 20895, "total_steps": 40000, "loss": 0.1517, "lr": 2.3246077100298474e-05, "epoch": 3.4086793376294966, "percentage": 52.24, "elapsed_time": "4:46:51", "remaining_time": "4:22:17", "throughput": 2337.43, "total_tokens": 40231152} {"current_steps": 20900, "total_steps": 40000, "loss": 0.0018, "lr": 2.3236283949325328e-05, "epoch": 3.409495064850314, "percentage": 52.25, "elapsed_time": "4:46:53", "remaining_time": "4:22:11", "throughput": 2337.81, "total_tokens": 40242512} {"current_steps": 20905, "total_steps": 40000, "loss": 0.0054, "lr": 2.3226491070339368e-05, "epoch": 3.4103107920711313, "percentage": 52.26, "elapsed_time": "4:46:55", "remaining_time": "4:22:05", "throughput": 2338.08, "total_tokens": 40252160} {"current_steps": 20910, "total_steps": 40000, "loss": 0.0011, "lr": 2.3216698464850762e-05, "epoch": 3.411126519291949, "percentage": 52.28, "elapsed_time": "4:46:57", "remaining_time": "4:21:59", "throughput": 2338.33, "total_tokens": 40261264} {"current_steps": 20915, "total_steps": 40000, "loss": 0.005, "lr": 2.320690613436967e-05, "epoch": 3.411942246512766, "percentage": 52.29, "elapsed_time": "4:47:00", "remaining_time": "4:21:53", "throughput": 2338.64, "total_tokens": 40271472} {"current_steps": 20920, "total_steps": 40000, "loss": 0.0015, "lr": 2.3197114080406192e-05, "epoch": 3.4127579737335836, "percentage": 52.3, "elapsed_time": "4:47:02", "remaining_time": "4:21:47", "throughput": 2338.99, "total_tokens": 40282208} {"current_steps": 20925, "total_steps": 40000, "loss": 0.0036, "lr": 2.3187322304470365e-05, "epoch": 3.4135737009544007, "percentage": 52.31, "elapsed_time": "4:47:04", "remaining_time": "4:21:41", "throughput": 2339.22, "total_tokens": 40291008} {"current_steps": 20930, "total_steps": 40000, "loss": 0.002, "lr": 2.3177530808072222e-05, "epoch": 3.4143894281752183, "percentage": 52.33, "elapsed_time": "4:47:06", "remaining_time": "4:21:35", "throughput": 2339.54, "total_tokens": 40301440} {"current_steps": 20935, "total_steps": 40000, "loss": 0.0248, "lr": 2.316773959272174e-05, "epoch": 3.4152051553960354, "percentage": 52.34, "elapsed_time": "4:47:08", "remaining_time": "4:21:29", "throughput": 2339.83, "total_tokens": 40311328} {"current_steps": 20940, "total_steps": 40000, "loss": 0.0255, "lr": 2.3157948659928823e-05, "epoch": 3.416020882616853, "percentage": 52.35, "elapsed_time": "4:47:10", "remaining_time": "4:21:23", "throughput": 2340.07, "total_tokens": 40320208} {"current_steps": 20945, "total_steps": 40000, "loss": 0.0903, "lr": 2.3148158011203388e-05, "epoch": 3.41683660983767, "percentage": 52.36, "elapsed_time": "4:47:12", "remaining_time": "4:21:17", "throughput": 2340.34, "total_tokens": 40329808} {"current_steps": 20950, "total_steps": 40000, "loss": 0.0011, "lr": 2.3138367648055253e-05, "epoch": 3.4176523370584877, "percentage": 52.38, "elapsed_time": "4:47:14", "remaining_time": "4:21:11", "throughput": 2340.58, "total_tokens": 40338720} {"current_steps": 20955, "total_steps": 40000, "loss": 0.003, "lr": 2.312857757199422e-05, "epoch": 3.418468064279305, "percentage": 52.39, "elapsed_time": "4:47:16", "remaining_time": "4:21:05", "throughput": 2340.88, "total_tokens": 40348832} {"current_steps": 20960, "total_steps": 40000, "loss": 0.0011, "lr": 2.3118787784530048e-05, "epoch": 3.4192837915001224, "percentage": 52.4, "elapsed_time": "4:47:18", "remaining_time": "4:20:59", "throughput": 2341.13, "total_tokens": 40357872} {"current_steps": 20965, "total_steps": 40000, "loss": 0.0014, "lr": 2.310899828717243e-05, "epoch": 3.4200995187209395, "percentage": 52.41, "elapsed_time": "4:47:20", "remaining_time": "4:20:53", "throughput": 2341.41, "total_tokens": 40367584} {"current_steps": 20970, "total_steps": 40000, "loss": 0.0147, "lr": 2.309920908143104e-05, "epoch": 3.420915245941757, "percentage": 52.42, "elapsed_time": "4:47:22", "remaining_time": "4:20:47", "throughput": 2341.74, "total_tokens": 40378064} {"current_steps": 20975, "total_steps": 40000, "loss": 0.1209, "lr": 2.308942016881551e-05, "epoch": 3.4217309731625742, "percentage": 52.44, "elapsed_time": "4:47:24", "remaining_time": "4:20:41", "throughput": 2342.02, "total_tokens": 40387760} {"current_steps": 20980, "total_steps": 40000, "loss": 0.0044, "lr": 2.307963155083539e-05, "epoch": 3.422546700383392, "percentage": 52.45, "elapsed_time": "4:47:26", "remaining_time": "4:20:35", "throughput": 2342.32, "total_tokens": 40397824} {"current_steps": 20985, "total_steps": 40000, "loss": 0.0177, "lr": 2.306984322900022e-05, "epoch": 3.423362427604209, "percentage": 52.46, "elapsed_time": "4:47:28", "remaining_time": "4:20:29", "throughput": 2342.67, "total_tokens": 40408608} {"current_steps": 20990, "total_steps": 40000, "loss": 0.0008, "lr": 2.3060055204819482e-05, "epoch": 3.4241781548250265, "percentage": 52.48, "elapsed_time": "4:47:31", "remaining_time": "4:20:23", "throughput": 2342.92, "total_tokens": 40417936} {"current_steps": 20995, "total_steps": 40000, "loss": 0.0675, "lr": 2.3050267479802604e-05, "epoch": 3.424993882045844, "percentage": 52.49, "elapsed_time": "4:47:33", "remaining_time": "4:20:17", "throughput": 2343.2, "total_tokens": 40427600} {"current_steps": 21000, "total_steps": 40000, "loss": 0.0049, "lr": 2.304048005545899e-05, "epoch": 3.4258096092666612, "percentage": 52.5, "elapsed_time": "4:47:35", "remaining_time": "4:20:11", "throughput": 2343.44, "total_tokens": 40436560} {"current_steps": 21000, "total_steps": 40000, "eval_loss": 0.24008458852767944, "epoch": 3.4258096092666612, "percentage": 52.5, "elapsed_time": "4:48:56", "remaining_time": "4:21:24", "throughput": 2332.52, "total_tokens": 40436560} {"current_steps": 21005, "total_steps": 40000, "loss": 0.0969, "lr": 2.3030692933297972e-05, "epoch": 3.4266253364874784, "percentage": 52.51, "elapsed_time": "4:48:59", "remaining_time": "4:21:20", "throughput": 2332.6, "total_tokens": 40446528} {"current_steps": 21010, "total_steps": 40000, "loss": 0.1491, "lr": 2.3020906114828843e-05, "epoch": 3.427441063708296, "percentage": 52.52, "elapsed_time": "4:49:01", "remaining_time": "4:21:14", "throughput": 2332.83, "total_tokens": 40455296} {"current_steps": 21015, "total_steps": 40000, "loss": 0.2425, "lr": 2.301111960156088e-05, "epoch": 3.4282567909291135, "percentage": 52.54, "elapsed_time": "4:49:03", "remaining_time": "4:21:08", "throughput": 2333.15, "total_tokens": 40465712} {"current_steps": 21020, "total_steps": 40000, "loss": 0.076, "lr": 2.300133339500326e-05, "epoch": 3.4290725181499306, "percentage": 52.55, "elapsed_time": "4:49:05", "remaining_time": "4:21:02", "throughput": 2333.4, "total_tokens": 40474880} {"current_steps": 21025, "total_steps": 40000, "loss": 0.1318, "lr": 2.2991547496665148e-05, "epoch": 3.429888245370748, "percentage": 52.56, "elapsed_time": "4:49:07", "remaining_time": "4:20:56", "throughput": 2333.61, "total_tokens": 40483392} {"current_steps": 21030, "total_steps": 40000, "loss": 0.0037, "lr": 2.298176190805565e-05, "epoch": 3.4307039725915653, "percentage": 52.58, "elapsed_time": "4:49:10", "remaining_time": "4:20:50", "throughput": 2333.85, "total_tokens": 40492400} {"current_steps": 21035, "total_steps": 40000, "loss": 0.0022, "lr": 2.2971976630683826e-05, "epoch": 3.431519699812383, "percentage": 52.59, "elapsed_time": "4:49:12", "remaining_time": "4:20:44", "throughput": 2334.17, "total_tokens": 40502704} {"current_steps": 21040, "total_steps": 40000, "loss": 0.0009, "lr": 2.29621916660587e-05, "epoch": 3.4323354270332, "percentage": 52.6, "elapsed_time": "4:49:14", "remaining_time": "4:20:38", "throughput": 2334.37, "total_tokens": 40511152} {"current_steps": 21045, "total_steps": 40000, "loss": 0.0013, "lr": 2.295240701568922e-05, "epoch": 3.4331511542540176, "percentage": 52.61, "elapsed_time": "4:49:16", "remaining_time": "4:20:32", "throughput": 2334.67, "total_tokens": 40521648} {"current_steps": 21050, "total_steps": 40000, "loss": 0.0015, "lr": 2.2942622681084312e-05, "epoch": 3.4339668814748348, "percentage": 52.62, "elapsed_time": "4:49:18", "remaining_time": "4:20:26", "throughput": 2334.95, "total_tokens": 40531360} {"current_steps": 21055, "total_steps": 40000, "loss": 0.0451, "lr": 2.293283866375284e-05, "epoch": 3.4347826086956523, "percentage": 52.64, "elapsed_time": "4:49:20", "remaining_time": "4:20:20", "throughput": 2335.24, "total_tokens": 40541200} {"current_steps": 21060, "total_steps": 40000, "loss": 0.0023, "lr": 2.2923054965203627e-05, "epoch": 3.4355983359164695, "percentage": 52.65, "elapsed_time": "4:49:22", "remaining_time": "4:20:14", "throughput": 2335.53, "total_tokens": 40551104} {"current_steps": 21065, "total_steps": 40000, "loss": 0.0017, "lr": 2.2913271586945443e-05, "epoch": 3.436414063137287, "percentage": 52.66, "elapsed_time": "4:49:24", "remaining_time": "4:20:08", "throughput": 2335.8, "total_tokens": 40560608} {"current_steps": 21070, "total_steps": 40000, "loss": 0.0107, "lr": 2.290348853048699e-05, "epoch": 3.437229790358104, "percentage": 52.68, "elapsed_time": "4:49:26", "remaining_time": "4:20:02", "throughput": 2336.07, "total_tokens": 40570208} {"current_steps": 21075, "total_steps": 40000, "loss": 0.0011, "lr": 2.2893705797336956e-05, "epoch": 3.4380455175789217, "percentage": 52.69, "elapsed_time": "4:49:28", "remaining_time": "4:19:57", "throughput": 2336.25, "total_tokens": 40578160} {"current_steps": 21080, "total_steps": 40000, "loss": 0.1454, "lr": 2.288392338900397e-05, "epoch": 3.438861244799739, "percentage": 52.7, "elapsed_time": "4:49:31", "remaining_time": "4:19:51", "throughput": 2336.52, "total_tokens": 40587792} {"current_steps": 21085, "total_steps": 40000, "loss": 0.003, "lr": 2.2874141306996576e-05, "epoch": 3.4396769720205564, "percentage": 52.71, "elapsed_time": "4:49:33", "remaining_time": "4:19:45", "throughput": 2336.76, "total_tokens": 40596800} {"current_steps": 21090, "total_steps": 40000, "loss": 0.001, "lr": 2.2864359552823312e-05, "epoch": 3.4404926992413736, "percentage": 52.73, "elapsed_time": "4:49:35", "remaining_time": "4:19:39", "throughput": 2337.01, "total_tokens": 40605952} {"current_steps": 21095, "total_steps": 40000, "loss": 0.0833, "lr": 2.2854578127992648e-05, "epoch": 3.441308426462191, "percentage": 52.74, "elapsed_time": "4:49:37", "remaining_time": "4:19:33", "throughput": 2337.28, "total_tokens": 40615456} {"current_steps": 21100, "total_steps": 40000, "loss": 0.1153, "lr": 2.2844797034012988e-05, "epoch": 3.4421241536830083, "percentage": 52.75, "elapsed_time": "4:49:39", "remaining_time": "4:19:27", "throughput": 2337.58, "total_tokens": 40625536} {"current_steps": 21105, "total_steps": 40000, "loss": 0.0662, "lr": 2.2835016272392722e-05, "epoch": 3.442939880903826, "percentage": 52.76, "elapsed_time": "4:49:41", "remaining_time": "4:19:21", "throughput": 2337.91, "total_tokens": 40636000} {"current_steps": 21110, "total_steps": 40000, "loss": 0.0032, "lr": 2.2825235844640142e-05, "epoch": 3.443755608124643, "percentage": 52.78, "elapsed_time": "4:49:43", "remaining_time": "4:19:15", "throughput": 2338.21, "total_tokens": 40646032} {"current_steps": 21115, "total_steps": 40000, "loss": 0.0029, "lr": 2.2815455752263522e-05, "epoch": 3.4445713353454606, "percentage": 52.79, "elapsed_time": "4:49:45", "remaining_time": "4:19:09", "throughput": 2338.5, "total_tokens": 40656016} {"current_steps": 21120, "total_steps": 40000, "loss": 0.02, "lr": 2.2805675996771092e-05, "epoch": 3.4453870625662777, "percentage": 52.8, "elapsed_time": "4:49:47", "remaining_time": "4:19:03", "throughput": 2338.89, "total_tokens": 40667584} {"current_steps": 21125, "total_steps": 40000, "loss": 0.1826, "lr": 2.2795896579670987e-05, "epoch": 3.4462027897870953, "percentage": 52.81, "elapsed_time": "4:49:49", "remaining_time": "4:18:57", "throughput": 2339.22, "total_tokens": 40678176} {"current_steps": 21130, "total_steps": 40000, "loss": 0.0969, "lr": 2.2786117502471337e-05, "epoch": 3.4470185170079124, "percentage": 52.83, "elapsed_time": "4:49:51", "remaining_time": "4:18:51", "throughput": 2339.41, "total_tokens": 40686480} {"current_steps": 21135, "total_steps": 40000, "loss": 0.0345, "lr": 2.2776338766680185e-05, "epoch": 3.44783424422873, "percentage": 52.84, "elapsed_time": "4:49:53", "remaining_time": "4:18:45", "throughput": 2339.73, "total_tokens": 40696816} {"current_steps": 21140, "total_steps": 40000, "loss": 0.0011, "lr": 2.2766560373805533e-05, "epoch": 3.448649971449547, "percentage": 52.85, "elapsed_time": "4:49:55", "remaining_time": "4:18:39", "throughput": 2340.02, "total_tokens": 40706736} {"current_steps": 21145, "total_steps": 40000, "loss": 0.0919, "lr": 2.2756782325355353e-05, "epoch": 3.4494656986703647, "percentage": 52.86, "elapsed_time": "4:49:57", "remaining_time": "4:18:33", "throughput": 2340.29, "total_tokens": 40716192} {"current_steps": 21150, "total_steps": 40000, "loss": 0.0015, "lr": 2.2747004622837514e-05, "epoch": 3.450281425891182, "percentage": 52.88, "elapsed_time": "4:50:00", "remaining_time": "4:18:27", "throughput": 2340.54, "total_tokens": 40725520} {"current_steps": 21155, "total_steps": 40000, "loss": 0.09, "lr": 2.2737227267759878e-05, "epoch": 3.4510971531119994, "percentage": 52.89, "elapsed_time": "4:50:02", "remaining_time": "4:18:21", "throughput": 2340.8, "total_tokens": 40734800} {"current_steps": 21160, "total_steps": 40000, "loss": 0.0676, "lr": 2.272745026163024e-05, "epoch": 3.4519128803328165, "percentage": 52.9, "elapsed_time": "4:50:04", "remaining_time": "4:18:15", "throughput": 2341.06, "total_tokens": 40744208} {"current_steps": 21165, "total_steps": 40000, "loss": 0.0481, "lr": 2.271767360595633e-05, "epoch": 3.452728607553634, "percentage": 52.91, "elapsed_time": "4:50:06", "remaining_time": "4:18:10", "throughput": 2341.34, "total_tokens": 40753936} {"current_steps": 21170, "total_steps": 40000, "loss": 0.0046, "lr": 2.270789730224583e-05, "epoch": 3.4535443347744517, "percentage": 52.92, "elapsed_time": "4:50:08", "remaining_time": "4:18:04", "throughput": 2341.65, "total_tokens": 40764208} {"current_steps": 21175, "total_steps": 40000, "loss": 0.3085, "lr": 2.2698121352006367e-05, "epoch": 3.454360061995269, "percentage": 52.94, "elapsed_time": "4:50:10", "remaining_time": "4:17:58", "throughput": 2341.95, "total_tokens": 40774272} {"current_steps": 21180, "total_steps": 40000, "loss": 0.0096, "lr": 2.2688345756745517e-05, "epoch": 3.455175789216086, "percentage": 52.95, "elapsed_time": "4:50:12", "remaining_time": "4:17:52", "throughput": 2342.22, "total_tokens": 40783696} {"current_steps": 21185, "total_steps": 40000, "loss": 0.0034, "lr": 2.267857051797081e-05, "epoch": 3.4559915164369035, "percentage": 52.96, "elapsed_time": "4:50:14", "remaining_time": "4:17:46", "throughput": 2342.49, "total_tokens": 40793376} {"current_steps": 21190, "total_steps": 40000, "loss": 0.0019, "lr": 2.2668795637189695e-05, "epoch": 3.456807243657721, "percentage": 52.98, "elapsed_time": "4:50:16", "remaining_time": "4:17:40", "throughput": 2342.7, "total_tokens": 40801920} {"current_steps": 21195, "total_steps": 40000, "loss": 0.0908, "lr": 2.2659021115909586e-05, "epoch": 3.457622970878538, "percentage": 52.99, "elapsed_time": "4:50:18", "remaining_time": "4:17:34", "throughput": 2342.94, "total_tokens": 40810880} {"current_steps": 21200, "total_steps": 40000, "loss": 0.0513, "lr": 2.2649246955637847e-05, "epoch": 3.4584386980993553, "percentage": 53.0, "elapsed_time": "4:50:20", "remaining_time": "4:17:28", "throughput": 2343.23, "total_tokens": 40820704} {"current_steps": 21200, "total_steps": 40000, "eval_loss": 0.2144426554441452, "epoch": 3.4584386980993553, "percentage": 53.0, "elapsed_time": "4:51:41", "remaining_time": "4:18:40", "throughput": 2332.41, "total_tokens": 40820704} {"current_steps": 21205, "total_steps": 40000, "loss": 0.0028, "lr": 2.2639473157881766e-05, "epoch": 3.459254425320173, "percentage": 53.01, "elapsed_time": "4:51:45", "remaining_time": "4:18:35", "throughput": 2332.44, "total_tokens": 40830592} {"current_steps": 21210, "total_steps": 40000, "loss": 0.09, "lr": 2.2629699724148594e-05, "epoch": 3.4600701525409905, "percentage": 53.02, "elapsed_time": "4:51:47", "remaining_time": "4:18:30", "throughput": 2332.71, "total_tokens": 40840160} {"current_steps": 21215, "total_steps": 40000, "loss": 0.0887, "lr": 2.26199266559455e-05, "epoch": 3.4608858797618076, "percentage": 53.04, "elapsed_time": "4:51:49", "remaining_time": "4:18:24", "throughput": 2332.95, "total_tokens": 40849248} {"current_steps": 21220, "total_steps": 40000, "loss": 0.0012, "lr": 2.2610153954779625e-05, "epoch": 3.461701606982625, "percentage": 53.05, "elapsed_time": "4:51:51", "remaining_time": "4:18:18", "throughput": 2333.27, "total_tokens": 40859648} {"current_steps": 21225, "total_steps": 40000, "loss": 0.0014, "lr": 2.2600381622158056e-05, "epoch": 3.4625173342034423, "percentage": 53.06, "elapsed_time": "4:51:53", "remaining_time": "4:18:12", "throughput": 2333.51, "total_tokens": 40868672} {"current_steps": 21230, "total_steps": 40000, "loss": 0.1636, "lr": 2.2590609659587783e-05, "epoch": 3.46333306142426, "percentage": 53.08, "elapsed_time": "4:51:55", "remaining_time": "4:18:06", "throughput": 2333.74, "total_tokens": 40877552} {"current_steps": 21235, "total_steps": 40000, "loss": 0.0016, "lr": 2.2580838068575787e-05, "epoch": 3.464148788645077, "percentage": 53.09, "elapsed_time": "4:51:57", "remaining_time": "4:18:00", "throughput": 2333.96, "total_tokens": 40886224} {"current_steps": 21240, "total_steps": 40000, "loss": 0.0486, "lr": 2.257106685062896e-05, "epoch": 3.4649645158658946, "percentage": 53.1, "elapsed_time": "4:52:00", "remaining_time": "4:17:54", "throughput": 2334.15, "total_tokens": 40894464} {"current_steps": 21245, "total_steps": 40000, "loss": 0.191, "lr": 2.256129600725415e-05, "epoch": 3.4657802430867117, "percentage": 53.11, "elapsed_time": "4:52:02", "remaining_time": "4:17:48", "throughput": 2334.46, "total_tokens": 40904688} {"current_steps": 21250, "total_steps": 40000, "loss": 0.093, "lr": 2.2551525539958145e-05, "epoch": 3.4665959703075293, "percentage": 53.12, "elapsed_time": "4:52:04", "remaining_time": "4:17:42", "throughput": 2334.73, "total_tokens": 40914304} {"current_steps": 21255, "total_steps": 40000, "loss": 0.0664, "lr": 2.2541755450247663e-05, "epoch": 3.4674116975283464, "percentage": 53.14, "elapsed_time": "4:52:06", "remaining_time": "4:17:36", "throughput": 2334.98, "total_tokens": 40923504} {"current_steps": 21260, "total_steps": 40000, "loss": 0.031, "lr": 2.2531985739629382e-05, "epoch": 3.468227424749164, "percentage": 53.15, "elapsed_time": "4:52:08", "remaining_time": "4:17:30", "throughput": 2335.28, "total_tokens": 40933536} {"current_steps": 21265, "total_steps": 40000, "loss": 0.061, "lr": 2.2522216409609924e-05, "epoch": 3.469043151969981, "percentage": 53.16, "elapsed_time": "4:52:10", "remaining_time": "4:17:24", "throughput": 2335.51, "total_tokens": 40942336} {"current_steps": 21270, "total_steps": 40000, "loss": 0.1712, "lr": 2.2512447461695826e-05, "epoch": 3.4698588791907987, "percentage": 53.17, "elapsed_time": "4:52:12", "remaining_time": "4:17:18", "throughput": 2335.79, "total_tokens": 40952096} {"current_steps": 21275, "total_steps": 40000, "loss": 0.034, "lr": 2.2502678897393593e-05, "epoch": 3.470674606411616, "percentage": 53.19, "elapsed_time": "4:52:14", "remaining_time": "4:17:12", "throughput": 2336.1, "total_tokens": 40962416} {"current_steps": 21280, "total_steps": 40000, "loss": 0.0803, "lr": 2.2492910718209665e-05, "epoch": 3.4714903336324334, "percentage": 53.2, "elapsed_time": "4:52:16", "remaining_time": "4:17:06", "throughput": 2336.35, "total_tokens": 40971680} {"current_steps": 21285, "total_steps": 40000, "loss": 0.0092, "lr": 2.2483142925650398e-05, "epoch": 3.4723060608532506, "percentage": 53.21, "elapsed_time": "4:52:18", "remaining_time": "4:17:01", "throughput": 2336.7, "total_tokens": 40982656} {"current_steps": 21290, "total_steps": 40000, "loss": 0.0017, "lr": 2.247337552122213e-05, "epoch": 3.473121788074068, "percentage": 53.23, "elapsed_time": "4:52:20", "remaining_time": "4:16:55", "throughput": 2336.88, "total_tokens": 40990560} {"current_steps": 21295, "total_steps": 40000, "loss": 0.0562, "lr": 2.24636085064311e-05, "epoch": 3.4739375152948853, "percentage": 53.24, "elapsed_time": "4:52:22", "remaining_time": "4:16:49", "throughput": 2337.16, "total_tokens": 41000416} {"current_steps": 21300, "total_steps": 40000, "loss": 0.0076, "lr": 2.245384188278351e-05, "epoch": 3.474753242515703, "percentage": 53.25, "elapsed_time": "4:52:24", "remaining_time": "4:16:43", "throughput": 2337.44, "total_tokens": 41010144} {"current_steps": 21305, "total_steps": 40000, "loss": 0.0026, "lr": 2.2444075651785513e-05, "epoch": 3.47556896973652, "percentage": 53.26, "elapsed_time": "4:52:26", "remaining_time": "4:16:37", "throughput": 2337.74, "total_tokens": 41020288} {"current_steps": 21310, "total_steps": 40000, "loss": 0.0077, "lr": 2.243430981494316e-05, "epoch": 3.4763846969573375, "percentage": 53.27, "elapsed_time": "4:52:29", "remaining_time": "4:16:31", "throughput": 2337.97, "total_tokens": 41029040} {"current_steps": 21315, "total_steps": 40000, "loss": 0.0016, "lr": 2.2424544373762475e-05, "epoch": 3.4772004241781547, "percentage": 53.29, "elapsed_time": "4:52:31", "remaining_time": "4:16:25", "throughput": 2338.3, "total_tokens": 41039728} {"current_steps": 21320, "total_steps": 40000, "loss": 0.0604, "lr": 2.2414779329749418e-05, "epoch": 3.4780161513989722, "percentage": 53.3, "elapsed_time": "4:52:33", "remaining_time": "4:16:19", "throughput": 2338.54, "total_tokens": 41048736} {"current_steps": 21325, "total_steps": 40000, "loss": 0.0048, "lr": 2.2405014684409873e-05, "epoch": 3.4788318786197894, "percentage": 53.31, "elapsed_time": "4:52:35", "remaining_time": "4:16:13", "throughput": 2338.78, "total_tokens": 41057824} {"current_steps": 21330, "total_steps": 40000, "loss": 0.0021, "lr": 2.239525043924968e-05, "epoch": 3.479647605840607, "percentage": 53.33, "elapsed_time": "4:52:37", "remaining_time": "4:16:07", "throughput": 2339.07, "total_tokens": 41067824} {"current_steps": 21335, "total_steps": 40000, "loss": 0.0927, "lr": 2.2385486595774592e-05, "epoch": 3.480463333061424, "percentage": 53.34, "elapsed_time": "4:52:39", "remaining_time": "4:16:01", "throughput": 2339.39, "total_tokens": 41078144} {"current_steps": 21340, "total_steps": 40000, "loss": 0.0009, "lr": 2.237572315549033e-05, "epoch": 3.4812790602822417, "percentage": 53.35, "elapsed_time": "4:52:41", "remaining_time": "4:15:55", "throughput": 2339.61, "total_tokens": 41086896} {"current_steps": 21345, "total_steps": 40000, "loss": 0.1047, "lr": 2.2365960119902545e-05, "epoch": 3.482094787503059, "percentage": 53.36, "elapsed_time": "4:52:43", "remaining_time": "4:15:50", "throughput": 2339.9, "total_tokens": 41096944} {"current_steps": 21350, "total_steps": 40000, "loss": 0.0059, "lr": 2.2356197490516806e-05, "epoch": 3.4829105147238764, "percentage": 53.37, "elapsed_time": "4:52:45", "remaining_time": "4:15:44", "throughput": 2340.25, "total_tokens": 41107904} {"current_steps": 21355, "total_steps": 40000, "loss": 0.0066, "lr": 2.234643526883863e-05, "epoch": 3.4837262419446935, "percentage": 53.39, "elapsed_time": "4:52:47", "remaining_time": "4:15:38", "throughput": 2340.54, "total_tokens": 41117920} {"current_steps": 21360, "total_steps": 40000, "loss": 0.0036, "lr": 2.2336673456373497e-05, "epoch": 3.484541969165511, "percentage": 53.4, "elapsed_time": "4:52:49", "remaining_time": "4:15:32", "throughput": 2340.77, "total_tokens": 41126784} {"current_steps": 21365, "total_steps": 40000, "loss": 0.0012, "lr": 2.2326912054626772e-05, "epoch": 3.4853576963863286, "percentage": 53.41, "elapsed_time": "4:52:51", "remaining_time": "4:15:26", "throughput": 2341.01, "total_tokens": 41135792} {"current_steps": 21370, "total_steps": 40000, "loss": 0.0048, "lr": 2.2317151065103813e-05, "epoch": 3.4861734236071458, "percentage": 53.42, "elapsed_time": "4:52:53", "remaining_time": "4:15:20", "throughput": 2341.31, "total_tokens": 41145840} {"current_steps": 21375, "total_steps": 40000, "loss": 0.1579, "lr": 2.2307390489309865e-05, "epoch": 3.486989150827963, "percentage": 53.44, "elapsed_time": "4:52:55", "remaining_time": "4:15:14", "throughput": 2341.62, "total_tokens": 41156224} {"current_steps": 21380, "total_steps": 40000, "loss": 0.2276, "lr": 2.2297630328750146e-05, "epoch": 3.4878048780487805, "percentage": 53.45, "elapsed_time": "4:52:58", "remaining_time": "4:15:08", "throughput": 2341.89, "total_tokens": 41165776} {"current_steps": 21385, "total_steps": 40000, "loss": 0.0886, "lr": 2.228787058492979e-05, "epoch": 3.488620605269598, "percentage": 53.46, "elapsed_time": "4:53:00", "remaining_time": "4:15:02", "throughput": 2342.15, "total_tokens": 41175216} {"current_steps": 21390, "total_steps": 40000, "loss": 0.0925, "lr": 2.2278111259353875e-05, "epoch": 3.489436332490415, "percentage": 53.47, "elapsed_time": "4:53:02", "remaining_time": "4:14:57", "throughput": 2342.41, "total_tokens": 41184656} {"current_steps": 21395, "total_steps": 40000, "loss": 0.0263, "lr": 2.2268352353527395e-05, "epoch": 3.4902520597112328, "percentage": 53.49, "elapsed_time": "4:53:04", "remaining_time": "4:14:51", "throughput": 2342.66, "total_tokens": 41193888} {"current_steps": 21400, "total_steps": 40000, "loss": 0.0909, "lr": 2.225859386895533e-05, "epoch": 3.49106778693205, "percentage": 53.5, "elapsed_time": "4:53:06", "remaining_time": "4:14:45", "throughput": 2342.85, "total_tokens": 41202080} {"current_steps": 21400, "total_steps": 40000, "eval_loss": 0.23057134449481964, "epoch": 3.49106778693205, "percentage": 53.5, "elapsed_time": "4:54:27", "remaining_time": "4:15:55", "throughput": 2332.14, "total_tokens": 41202080} {"current_steps": 21405, "total_steps": 40000, "loss": 0.0502, "lr": 2.2248835807142525e-05, "epoch": 3.4918835141528675, "percentage": 53.51, "elapsed_time": "4:54:30", "remaining_time": "4:15:50", "throughput": 2332.21, "total_tokens": 41211888} {"current_steps": 21410, "total_steps": 40000, "loss": 0.0815, "lr": 2.2239078169593826e-05, "epoch": 3.4926992413736846, "percentage": 53.52, "elapsed_time": "4:54:32", "remaining_time": "4:15:45", "throughput": 2332.44, "total_tokens": 41220832} {"current_steps": 21415, "total_steps": 40000, "loss": 0.0011, "lr": 2.222932095781396e-05, "epoch": 3.493514968594502, "percentage": 53.54, "elapsed_time": "4:54:34", "remaining_time": "4:15:39", "throughput": 2332.67, "total_tokens": 41229600} {"current_steps": 21420, "total_steps": 40000, "loss": 0.0021, "lr": 2.221956417330762e-05, "epoch": 3.4943306958153193, "percentage": 53.55, "elapsed_time": "4:54:36", "remaining_time": "4:15:33", "throughput": 2332.89, "total_tokens": 41238416} {"current_steps": 21425, "total_steps": 40000, "loss": 0.1323, "lr": 2.2209807817579438e-05, "epoch": 3.495146423036137, "percentage": 53.56, "elapsed_time": "4:54:39", "remaining_time": "4:15:27", "throughput": 2333.21, "total_tokens": 41248816} {"current_steps": 21430, "total_steps": 40000, "loss": 0.0121, "lr": 2.220005189213394e-05, "epoch": 3.495962150256954, "percentage": 53.57, "elapsed_time": "4:54:41", "remaining_time": "4:15:21", "throughput": 2333.44, "total_tokens": 41257872} {"current_steps": 21435, "total_steps": 40000, "loss": 0.1264, "lr": 2.2190296398475624e-05, "epoch": 3.4967778774777716, "percentage": 53.59, "elapsed_time": "4:54:43", "remaining_time": "4:15:15", "throughput": 2333.74, "total_tokens": 41267920} {"current_steps": 21440, "total_steps": 40000, "loss": 0.018, "lr": 2.2180541338108926e-05, "epoch": 3.4975936046985887, "percentage": 53.6, "elapsed_time": "4:54:45", "remaining_time": "4:15:09", "throughput": 2334.03, "total_tokens": 41278000} {"current_steps": 21445, "total_steps": 40000, "loss": 0.056, "lr": 2.2170786712538176e-05, "epoch": 3.4984093319194063, "percentage": 53.61, "elapsed_time": "4:54:47", "remaining_time": "4:15:03", "throughput": 2334.32, "total_tokens": 41287872} {"current_steps": 21450, "total_steps": 40000, "loss": 0.0044, "lr": 2.216103252326768e-05, "epoch": 3.4992250591402234, "percentage": 53.62, "elapsed_time": "4:54:49", "remaining_time": "4:14:57", "throughput": 2334.61, "total_tokens": 41297840} {"current_steps": 21455, "total_steps": 40000, "loss": 0.0205, "lr": 2.2151278771801635e-05, "epoch": 3.500040786361041, "percentage": 53.64, "elapsed_time": "4:54:51", "remaining_time": "4:14:51", "throughput": 2334.88, "total_tokens": 41307440} {"current_steps": 21460, "total_steps": 40000, "loss": 0.0625, "lr": 2.21415254596442e-05, "epoch": 3.500856513581858, "percentage": 53.65, "elapsed_time": "4:54:53", "remaining_time": "4:14:46", "throughput": 2335.08, "total_tokens": 41315840} {"current_steps": 21465, "total_steps": 40000, "loss": 0.0022, "lr": 2.213177258829947e-05, "epoch": 3.5016722408026757, "percentage": 53.66, "elapsed_time": "4:54:55", "remaining_time": "4:14:40", "throughput": 2335.34, "total_tokens": 41325216} {"current_steps": 21470, "total_steps": 40000, "loss": 0.0989, "lr": 2.2122020159271445e-05, "epoch": 3.502487968023493, "percentage": 53.67, "elapsed_time": "4:54:57", "remaining_time": "4:14:34", "throughput": 2335.6, "total_tokens": 41334768} {"current_steps": 21475, "total_steps": 40000, "loss": 0.0018, "lr": 2.2112268174064075e-05, "epoch": 3.5033036952443104, "percentage": 53.69, "elapsed_time": "4:54:59", "remaining_time": "4:14:28", "throughput": 2335.87, "total_tokens": 41344432} {"current_steps": 21480, "total_steps": 40000, "loss": 0.1293, "lr": 2.2102516634181253e-05, "epoch": 3.5041194224651275, "percentage": 53.7, "elapsed_time": "4:55:01", "remaining_time": "4:14:22", "throughput": 2336.11, "total_tokens": 41353408} {"current_steps": 21485, "total_steps": 40000, "loss": 0.3536, "lr": 2.209276554112677e-05, "epoch": 3.504935149685945, "percentage": 53.71, "elapsed_time": "4:55:03", "remaining_time": "4:14:16", "throughput": 2336.33, "total_tokens": 41362272} {"current_steps": 21490, "total_steps": 40000, "loss": 0.0022, "lr": 2.2083014896404384e-05, "epoch": 3.5057508769067622, "percentage": 53.73, "elapsed_time": "4:55:05", "remaining_time": "4:14:10", "throughput": 2336.63, "total_tokens": 41372336} {"current_steps": 21495, "total_steps": 40000, "loss": 0.1453, "lr": 2.207326470151775e-05, "epoch": 3.50656660412758, "percentage": 53.74, "elapsed_time": "4:55:08", "remaining_time": "4:14:04", "throughput": 2336.92, "total_tokens": 41382368} {"current_steps": 21500, "total_steps": 40000, "loss": 0.0508, "lr": 2.2063514957970477e-05, "epoch": 3.507382331348397, "percentage": 53.75, "elapsed_time": "4:55:10", "remaining_time": "4:13:58", "throughput": 2337.23, "total_tokens": 41392672} {"current_steps": 21505, "total_steps": 40000, "loss": 0.0037, "lr": 2.205376566726611e-05, "epoch": 3.5081980585692145, "percentage": 53.76, "elapsed_time": "4:55:12", "remaining_time": "4:13:53", "throughput": 2337.54, "total_tokens": 41402992} {"current_steps": 21510, "total_steps": 40000, "loss": 0.1605, "lr": 2.204401683090809e-05, "epoch": 3.5090137857900316, "percentage": 53.77, "elapsed_time": "4:55:14", "remaining_time": "4:13:47", "throughput": 2337.85, "total_tokens": 41413312} {"current_steps": 21515, "total_steps": 40000, "loss": 0.0033, "lr": 2.203426845039982e-05, "epoch": 3.5098295130108492, "percentage": 53.79, "elapsed_time": "4:55:16", "remaining_time": "4:13:41", "throughput": 2338.09, "total_tokens": 41422448} {"current_steps": 21520, "total_steps": 40000, "loss": 0.0901, "lr": 2.202452052724464e-05, "epoch": 3.510645240231667, "percentage": 53.8, "elapsed_time": "4:55:18", "remaining_time": "4:13:35", "throughput": 2338.37, "total_tokens": 41432224} {"current_steps": 21525, "total_steps": 40000, "loss": 0.0671, "lr": 2.2014773062945777e-05, "epoch": 3.511460967452484, "percentage": 53.81, "elapsed_time": "4:55:20", "remaining_time": "4:13:29", "throughput": 2338.63, "total_tokens": 41441712} {"current_steps": 21530, "total_steps": 40000, "loss": 0.1109, "lr": 2.2005026059006427e-05, "epoch": 3.512276694673301, "percentage": 53.83, "elapsed_time": "4:55:22", "remaining_time": "4:13:23", "throughput": 2338.89, "total_tokens": 41451232} {"current_steps": 21535, "total_steps": 40000, "loss": 0.0607, "lr": 2.1995279516929695e-05, "epoch": 3.5130924218941186, "percentage": 53.84, "elapsed_time": "4:55:24", "remaining_time": "4:13:17", "throughput": 2339.17, "total_tokens": 41460960} {"current_steps": 21540, "total_steps": 40000, "loss": 0.0036, "lr": 2.1985533438218613e-05, "epoch": 3.513908149114936, "percentage": 53.85, "elapsed_time": "4:55:26", "remaining_time": "4:13:11", "throughput": 2339.47, "total_tokens": 41471072} {"current_steps": 21545, "total_steps": 40000, "loss": 0.191, "lr": 2.197578782437617e-05, "epoch": 3.5147238763357533, "percentage": 53.86, "elapsed_time": "4:55:28", "remaining_time": "4:13:06", "throughput": 2339.79, "total_tokens": 41481680} {"current_steps": 21550, "total_steps": 40000, "loss": 0.0029, "lr": 2.196604267690524e-05, "epoch": 3.5155396035565705, "percentage": 53.87, "elapsed_time": "4:55:30", "remaining_time": "4:13:00", "throughput": 2340.05, "total_tokens": 41491216} {"current_steps": 21555, "total_steps": 40000, "loss": 0.0594, "lr": 2.195629799730865e-05, "epoch": 3.516355330777388, "percentage": 53.89, "elapsed_time": "4:55:32", "remaining_time": "4:12:54", "throughput": 2340.35, "total_tokens": 41501232} {"current_steps": 21560, "total_steps": 40000, "loss": 0.0033, "lr": 2.1946553787089173e-05, "epoch": 3.5171710579982056, "percentage": 53.9, "elapsed_time": "4:55:35", "remaining_time": "4:12:48", "throughput": 2340.6, "total_tokens": 41510656} {"current_steps": 21565, "total_steps": 40000, "loss": 0.0016, "lr": 2.193681004774947e-05, "epoch": 3.5179867852190227, "percentage": 53.91, "elapsed_time": "4:55:37", "remaining_time": "4:12:42", "throughput": 2340.91, "total_tokens": 41520864} {"current_steps": 21570, "total_steps": 40000, "loss": 0.0685, "lr": 2.1927066780792154e-05, "epoch": 3.51880251243984, "percentage": 53.92, "elapsed_time": "4:55:39", "remaining_time": "4:12:36", "throughput": 2341.11, "total_tokens": 41529264} {"current_steps": 21575, "total_steps": 40000, "loss": 0.0019, "lr": 2.191732398771975e-05, "epoch": 3.5196182396606575, "percentage": 53.94, "elapsed_time": "4:55:41", "remaining_time": "4:12:30", "throughput": 2341.45, "total_tokens": 41540144} {"current_steps": 21580, "total_steps": 40000, "loss": 0.0433, "lr": 2.1907581670034725e-05, "epoch": 3.520433966881475, "percentage": 53.95, "elapsed_time": "4:55:43", "remaining_time": "4:12:25", "throughput": 2341.72, "total_tokens": 41549808} {"current_steps": 21585, "total_steps": 40000, "loss": 0.0581, "lr": 2.189783982923948e-05, "epoch": 3.521249694102292, "percentage": 53.96, "elapsed_time": "4:55:45", "remaining_time": "4:12:19", "throughput": 2341.92, "total_tokens": 41558192} {"current_steps": 21590, "total_steps": 40000, "loss": 0.0016, "lr": 2.1888098466836303e-05, "epoch": 3.5220654213231093, "percentage": 53.97, "elapsed_time": "4:55:47", "remaining_time": "4:12:13", "throughput": 2342.23, "total_tokens": 41568624} {"current_steps": 21595, "total_steps": 40000, "loss": 0.1447, "lr": 2.1878357584327457e-05, "epoch": 3.522881148543927, "percentage": 53.99, "elapsed_time": "4:55:49", "remaining_time": "4:12:07", "throughput": 2342.54, "total_tokens": 41578848} {"current_steps": 21600, "total_steps": 40000, "loss": 0.0033, "lr": 2.1868617183215103e-05, "epoch": 3.5236968757647444, "percentage": 54.0, "elapsed_time": "4:55:51", "remaining_time": "4:12:01", "throughput": 2342.81, "total_tokens": 41588560} {"current_steps": 21600, "total_steps": 40000, "eval_loss": 0.22020629048347473, "epoch": 3.5236968757647444, "percentage": 54.0, "elapsed_time": "4:57:12", "remaining_time": "4:13:10", "throughput": 2332.2, "total_tokens": 41588560} {"current_steps": 21605, "total_steps": 40000, "loss": 0.0025, "lr": 2.1858877265001327e-05, "epoch": 3.5245126029855616, "percentage": 54.01, "elapsed_time": "4:57:15", "remaining_time": "4:13:05", "throughput": 2332.22, "total_tokens": 41597536} {"current_steps": 21610, "total_steps": 40000, "loss": 0.1039, "lr": 2.184913783118816e-05, "epoch": 3.525328330206379, "percentage": 54.02, "elapsed_time": "4:57:18", "remaining_time": "4:13:00", "throughput": 2332.43, "total_tokens": 41606080} {"current_steps": 21615, "total_steps": 40000, "loss": 0.0347, "lr": 2.1839398883277522e-05, "epoch": 3.5261440574271963, "percentage": 54.04, "elapsed_time": "4:57:20", "remaining_time": "4:12:54", "throughput": 2332.71, "total_tokens": 41615952} {"current_steps": 21620, "total_steps": 40000, "loss": 0.0026, "lr": 2.182966042277129e-05, "epoch": 3.526959784648014, "percentage": 54.05, "elapsed_time": "4:57:22", "remaining_time": "4:12:48", "throughput": 2332.92, "total_tokens": 41624480} {"current_steps": 21625, "total_steps": 40000, "loss": 0.1642, "lr": 2.181992245117128e-05, "epoch": 3.527775511868831, "percentage": 54.06, "elapsed_time": "4:57:24", "remaining_time": "4:12:42", "throughput": 2333.18, "total_tokens": 41633904} {"current_steps": 21630, "total_steps": 40000, "loss": 0.0689, "lr": 2.181018496997918e-05, "epoch": 3.5285912390896486, "percentage": 54.07, "elapsed_time": "4:57:26", "remaining_time": "4:12:36", "throughput": 2333.47, "total_tokens": 41644000} {"current_steps": 21635, "total_steps": 40000, "loss": 0.2096, "lr": 2.1800447980696648e-05, "epoch": 3.5294069663104657, "percentage": 54.09, "elapsed_time": "4:57:28", "remaining_time": "4:12:30", "throughput": 2333.69, "total_tokens": 41652720} {"current_steps": 21640, "total_steps": 40000, "loss": 0.0997, "lr": 2.1790711484825248e-05, "epoch": 3.5302226935312833, "percentage": 54.1, "elapsed_time": "4:57:30", "remaining_time": "4:12:24", "throughput": 2333.95, "total_tokens": 41662144} {"current_steps": 21645, "total_steps": 40000, "loss": 0.0455, "lr": 2.178097548386646e-05, "epoch": 3.5310384207521004, "percentage": 54.11, "elapsed_time": "4:57:32", "remaining_time": "4:12:19", "throughput": 2334.23, "total_tokens": 41671984} {"current_steps": 21650, "total_steps": 40000, "loss": 0.1018, "lr": 2.1771239979321712e-05, "epoch": 3.531854147972918, "percentage": 54.12, "elapsed_time": "4:57:34", "remaining_time": "4:12:13", "throughput": 2334.48, "total_tokens": 41681312} {"current_steps": 21655, "total_steps": 40000, "loss": 0.0026, "lr": 2.1761504972692327e-05, "epoch": 3.532669875193735, "percentage": 54.14, "elapsed_time": "4:57:36", "remaining_time": "4:12:07", "throughput": 2334.75, "total_tokens": 41691056} {"current_steps": 21660, "total_steps": 40000, "loss": 0.0139, "lr": 2.1751770465479572e-05, "epoch": 3.5334856024145527, "percentage": 54.15, "elapsed_time": "4:57:38", "remaining_time": "4:12:01", "throughput": 2335.07, "total_tokens": 41701520} {"current_steps": 21665, "total_steps": 40000, "loss": 0.0733, "lr": 2.174203645918464e-05, "epoch": 3.53430132963537, "percentage": 54.16, "elapsed_time": "4:57:40", "remaining_time": "4:11:55", "throughput": 2335.31, "total_tokens": 41710752} {"current_steps": 21670, "total_steps": 40000, "loss": 0.0027, "lr": 2.1732302955308624e-05, "epoch": 3.5351170568561874, "percentage": 54.17, "elapsed_time": "4:57:42", "remaining_time": "4:11:49", "throughput": 2335.6, "total_tokens": 41720752} {"current_steps": 21675, "total_steps": 40000, "loss": 0.1476, "lr": 2.172256995535255e-05, "epoch": 3.5359327840770045, "percentage": 54.19, "elapsed_time": "4:57:45", "remaining_time": "4:11:43", "throughput": 2335.93, "total_tokens": 41731504} {"current_steps": 21680, "total_steps": 40000, "loss": 0.161, "lr": 2.171283746081739e-05, "epoch": 3.536748511297822, "percentage": 54.2, "elapsed_time": "4:57:47", "remaining_time": "4:11:38", "throughput": 2336.24, "total_tokens": 41741936} {"current_steps": 21685, "total_steps": 40000, "loss": 0.0691, "lr": 2.1703105473203988e-05, "epoch": 3.537564238518639, "percentage": 54.21, "elapsed_time": "4:57:49", "remaining_time": "4:11:32", "throughput": 2336.53, "total_tokens": 41751968} {"current_steps": 21690, "total_steps": 40000, "loss": 0.0451, "lr": 2.1693373994013168e-05, "epoch": 3.538379965739457, "percentage": 54.23, "elapsed_time": "4:57:51", "remaining_time": "4:11:26", "throughput": 2336.81, "total_tokens": 41761808} {"current_steps": 21695, "total_steps": 40000, "loss": 0.1123, "lr": 2.168364302474562e-05, "epoch": 3.539195692960274, "percentage": 54.24, "elapsed_time": "4:57:53", "remaining_time": "4:11:20", "throughput": 2337.08, "total_tokens": 41771504} {"current_steps": 21700, "total_steps": 40000, "loss": 0.0846, "lr": 2.167391256690199e-05, "epoch": 3.5400114201810915, "percentage": 54.25, "elapsed_time": "4:57:55", "remaining_time": "4:11:14", "throughput": 2337.41, "total_tokens": 41782160} {"current_steps": 21705, "total_steps": 40000, "loss": 0.0026, "lr": 2.1664182621982855e-05, "epoch": 3.5408271474019086, "percentage": 54.26, "elapsed_time": "4:57:57", "remaining_time": "4:11:08", "throughput": 2337.67, "total_tokens": 41791664} {"current_steps": 21710, "total_steps": 40000, "loss": 0.0938, "lr": 2.1654453191488673e-05, "epoch": 3.541642874622726, "percentage": 54.27, "elapsed_time": "4:57:59", "remaining_time": "4:11:02", "throughput": 2337.92, "total_tokens": 41800896} {"current_steps": 21715, "total_steps": 40000, "loss": 0.0045, "lr": 2.1644724276919846e-05, "epoch": 3.5424586018435438, "percentage": 54.29, "elapsed_time": "4:58:01", "remaining_time": "4:10:57", "throughput": 2338.23, "total_tokens": 41811424} {"current_steps": 21720, "total_steps": 40000, "loss": 0.003, "lr": 2.1634995879776715e-05, "epoch": 3.543274329064361, "percentage": 54.3, "elapsed_time": "4:58:03", "remaining_time": "4:10:51", "throughput": 2338.52, "total_tokens": 41821440} {"current_steps": 21725, "total_steps": 40000, "loss": 0.0876, "lr": 2.162526800155949e-05, "epoch": 3.544090056285178, "percentage": 54.31, "elapsed_time": "4:58:05", "remaining_time": "4:10:45", "throughput": 2338.72, "total_tokens": 41829776} {"current_steps": 21730, "total_steps": 40000, "loss": 0.067, "lr": 2.1615540643768363e-05, "epoch": 3.5449057835059956, "percentage": 54.33, "elapsed_time": "4:58:07", "remaining_time": "4:10:39", "throughput": 2339.05, "total_tokens": 41840512} {"current_steps": 21735, "total_steps": 40000, "loss": 0.1605, "lr": 2.160581380790339e-05, "epoch": 3.545721510726813, "percentage": 54.34, "elapsed_time": "4:58:09", "remaining_time": "4:10:33", "throughput": 2339.3, "total_tokens": 41849952} {"current_steps": 21740, "total_steps": 40000, "loss": 0.0107, "lr": 2.1596087495464586e-05, "epoch": 3.5465372379476303, "percentage": 54.35, "elapsed_time": "4:58:11", "remaining_time": "4:10:27", "throughput": 2339.54, "total_tokens": 41858944} {"current_steps": 21745, "total_steps": 40000, "loss": 0.0935, "lr": 2.1586361707951866e-05, "epoch": 3.5473529651684474, "percentage": 54.36, "elapsed_time": "4:58:14", "remaining_time": "4:10:22", "throughput": 2339.87, "total_tokens": 41869760} {"current_steps": 21750, "total_steps": 40000, "loss": 0.0151, "lr": 2.157663644686507e-05, "epoch": 3.548168692389265, "percentage": 54.37, "elapsed_time": "4:58:16", "remaining_time": "4:10:16", "throughput": 2340.12, "total_tokens": 41879072} {"current_steps": 21755, "total_steps": 40000, "loss": 0.004, "lr": 2.156691171370396e-05, "epoch": 3.5489844196100826, "percentage": 54.39, "elapsed_time": "4:58:18", "remaining_time": "4:10:10", "throughput": 2340.4, "total_tokens": 41888912} {"current_steps": 21760, "total_steps": 40000, "loss": 0.002, "lr": 2.1557187509968195e-05, "epoch": 3.5498001468308997, "percentage": 54.4, "elapsed_time": "4:58:20", "remaining_time": "4:10:04", "throughput": 2340.64, "total_tokens": 41898176} {"current_steps": 21765, "total_steps": 40000, "loss": 0.2004, "lr": 2.1547463837157382e-05, "epoch": 3.550615874051717, "percentage": 54.41, "elapsed_time": "4:58:22", "remaining_time": "4:09:58", "throughput": 2340.83, "total_tokens": 41906336} {"current_steps": 21770, "total_steps": 40000, "loss": 0.0413, "lr": 2.1537740696771045e-05, "epoch": 3.5514316012725344, "percentage": 54.43, "elapsed_time": "4:58:24", "remaining_time": "4:09:53", "throughput": 2341.13, "total_tokens": 41916640} {"current_steps": 21775, "total_steps": 40000, "loss": 0.0295, "lr": 2.1528018090308587e-05, "epoch": 3.552247328493352, "percentage": 54.44, "elapsed_time": "4:58:26", "remaining_time": "4:09:47", "throughput": 2341.43, "total_tokens": 41926880} {"current_steps": 21780, "total_steps": 40000, "loss": 0.0351, "lr": 2.151829601926938e-05, "epoch": 3.553063055714169, "percentage": 54.45, "elapsed_time": "4:58:28", "remaining_time": "4:09:41", "throughput": 2341.69, "total_tokens": 41936240} {"current_steps": 21785, "total_steps": 40000, "loss": 0.054, "lr": 2.1508574485152684e-05, "epoch": 3.5538787829349863, "percentage": 54.46, "elapsed_time": "4:58:30", "remaining_time": "4:09:35", "throughput": 2342.02, "total_tokens": 41947136} {"current_steps": 21790, "total_steps": 40000, "loss": 0.0414, "lr": 2.1498853489457667e-05, "epoch": 3.554694510155804, "percentage": 54.47, "elapsed_time": "4:58:32", "remaining_time": "4:09:29", "throughput": 2342.33, "total_tokens": 41957440} {"current_steps": 21795, "total_steps": 40000, "loss": 0.0031, "lr": 2.1489133033683455e-05, "epoch": 3.5555102373766214, "percentage": 54.49, "elapsed_time": "4:58:34", "remaining_time": "4:09:23", "throughput": 2342.65, "total_tokens": 41968000} {"current_steps": 21800, "total_steps": 40000, "loss": 0.0445, "lr": 2.1479413119329038e-05, "epoch": 3.5563259645974385, "percentage": 54.5, "elapsed_time": "4:58:36", "remaining_time": "4:09:18", "throughput": 2342.93, "total_tokens": 41977888} {"current_steps": 21800, "total_steps": 40000, "eval_loss": 0.22205375134944916, "epoch": 3.5563259645974385, "percentage": 54.5, "elapsed_time": "4:59:57", "remaining_time": "4:10:25", "throughput": 2332.41, "total_tokens": 41977888} {"current_steps": 21805, "total_steps": 40000, "loss": 0.0128, "lr": 2.1469693747893355e-05, "epoch": 3.557141691818256, "percentage": 54.51, "elapsed_time": "5:00:01", "remaining_time": "4:10:21", "throughput": 2332.5, "total_tokens": 41988144} {"current_steps": 21810, "total_steps": 40000, "loss": 0.0533, "lr": 2.1459974920875274e-05, "epoch": 3.5579574190390733, "percentage": 54.52, "elapsed_time": "5:00:03", "remaining_time": "4:10:15", "throughput": 2332.77, "total_tokens": 41997824} {"current_steps": 21815, "total_steps": 40000, "loss": 0.0019, "lr": 2.145025663977354e-05, "epoch": 3.558773146259891, "percentage": 54.54, "elapsed_time": "5:00:05", "remaining_time": "4:10:09", "throughput": 2333.06, "total_tokens": 42007808} {"current_steps": 21820, "total_steps": 40000, "loss": 0.0013, "lr": 2.1440538906086844e-05, "epoch": 3.559588873480708, "percentage": 54.55, "elapsed_time": "5:00:07", "remaining_time": "4:10:03", "throughput": 2333.29, "total_tokens": 42016816} {"current_steps": 21825, "total_steps": 40000, "loss": 0.0023, "lr": 2.1430821721313782e-05, "epoch": 3.5604046007015255, "percentage": 54.56, "elapsed_time": "5:00:09", "remaining_time": "4:09:57", "throughput": 2333.6, "total_tokens": 42027264} {"current_steps": 21830, "total_steps": 40000, "loss": 0.1673, "lr": 2.142110508695286e-05, "epoch": 3.5612203279223427, "percentage": 54.57, "elapsed_time": "5:00:11", "remaining_time": "4:09:51", "throughput": 2333.89, "total_tokens": 42037312} {"current_steps": 21835, "total_steps": 40000, "loss": 0.0017, "lr": 2.1411389004502515e-05, "epoch": 3.5620360551431602, "percentage": 54.59, "elapsed_time": "5:00:13", "remaining_time": "4:09:46", "throughput": 2334.16, "total_tokens": 42047008} {"current_steps": 21840, "total_steps": 40000, "loss": 0.0016, "lr": 2.140167347546107e-05, "epoch": 3.5628517823639774, "percentage": 54.6, "elapsed_time": "5:00:15", "remaining_time": "4:09:40", "throughput": 2334.45, "total_tokens": 42057056} {"current_steps": 21845, "total_steps": 40000, "loss": 0.0825, "lr": 2.1391958501326793e-05, "epoch": 3.563667509584795, "percentage": 54.61, "elapsed_time": "5:00:17", "remaining_time": "4:09:34", "throughput": 2334.71, "total_tokens": 42066512} {"current_steps": 21850, "total_steps": 40000, "loss": 0.1055, "lr": 2.1382244083597873e-05, "epoch": 3.564483236805612, "percentage": 54.62, "elapsed_time": "5:00:19", "remaining_time": "4:09:28", "throughput": 2334.93, "total_tokens": 42075280} {"current_steps": 21855, "total_steps": 40000, "loss": 0.0562, "lr": 2.137253022377237e-05, "epoch": 3.5652989640264297, "percentage": 54.64, "elapsed_time": "5:00:22", "remaining_time": "4:09:22", "throughput": 2335.22, "total_tokens": 42085392} {"current_steps": 21860, "total_steps": 40000, "loss": 0.0014, "lr": 2.136281692334829e-05, "epoch": 3.566114691247247, "percentage": 54.65, "elapsed_time": "5:00:24", "remaining_time": "4:09:16", "throughput": 2335.51, "total_tokens": 42095504} {"current_steps": 21865, "total_steps": 40000, "loss": 0.1531, "lr": 2.135310418382356e-05, "epoch": 3.5669304184680644, "percentage": 54.66, "elapsed_time": "5:00:26", "remaining_time": "4:09:11", "throughput": 2335.77, "total_tokens": 42104912} {"current_steps": 21870, "total_steps": 40000, "loss": 0.0008, "lr": 2.134339200669598e-05, "epoch": 3.5677461456888815, "percentage": 54.67, "elapsed_time": "5:00:28", "remaining_time": "4:09:05", "throughput": 2336.04, "total_tokens": 42114736} {"current_steps": 21875, "total_steps": 40000, "loss": 0.0599, "lr": 2.133368039346331e-05, "epoch": 3.568561872909699, "percentage": 54.69, "elapsed_time": "5:00:30", "remaining_time": "4:08:59", "throughput": 2336.27, "total_tokens": 42123760} {"current_steps": 21880, "total_steps": 40000, "loss": 0.031, "lr": 2.1323969345623195e-05, "epoch": 3.569377600130516, "percentage": 54.7, "elapsed_time": "5:00:32", "remaining_time": "4:08:53", "throughput": 2336.62, "total_tokens": 42134752} {"current_steps": 21885, "total_steps": 40000, "loss": 0.0835, "lr": 2.1314258864673207e-05, "epoch": 3.5701933273513338, "percentage": 54.71, "elapsed_time": "5:00:34", "remaining_time": "4:08:47", "throughput": 2336.89, "total_tokens": 42144592} {"current_steps": 21890, "total_steps": 40000, "loss": 0.0536, "lr": 2.130454895211082e-05, "epoch": 3.5710090545721513, "percentage": 54.73, "elapsed_time": "5:00:36", "remaining_time": "4:08:41", "throughput": 2337.11, "total_tokens": 42153296} {"current_steps": 21895, "total_steps": 40000, "loss": 0.04, "lr": 2.129483960943342e-05, "epoch": 3.5718247817929685, "percentage": 54.74, "elapsed_time": "5:00:38", "remaining_time": "4:08:36", "throughput": 2337.43, "total_tokens": 42163904} {"current_steps": 21900, "total_steps": 40000, "loss": 0.1574, "lr": 2.128513083813831e-05, "epoch": 3.5726405090137856, "percentage": 54.75, "elapsed_time": "5:00:40", "remaining_time": "4:08:30", "throughput": 2337.69, "total_tokens": 42173488} {"current_steps": 21905, "total_steps": 40000, "loss": 0.0602, "lr": 2.1275422639722724e-05, "epoch": 3.573456236234603, "percentage": 54.76, "elapsed_time": "5:00:42", "remaining_time": "4:08:24", "throughput": 2337.95, "total_tokens": 42182976} {"current_steps": 21910, "total_steps": 40000, "loss": 0.0426, "lr": 2.126571501568376e-05, "epoch": 3.5742719634554208, "percentage": 54.77, "elapsed_time": "5:00:44", "remaining_time": "4:08:18", "throughput": 2338.19, "total_tokens": 42192240} {"current_steps": 21915, "total_steps": 40000, "loss": 0.0859, "lr": 2.1256007967518478e-05, "epoch": 3.575087690676238, "percentage": 54.79, "elapsed_time": "5:00:46", "remaining_time": "4:08:12", "throughput": 2338.39, "total_tokens": 42200720} {"current_steps": 21920, "total_steps": 40000, "loss": 0.0846, "lr": 2.124630149672381e-05, "epoch": 3.575903417897055, "percentage": 54.8, "elapsed_time": "5:00:48", "remaining_time": "4:08:07", "throughput": 2338.67, "total_tokens": 42210544} {"current_steps": 21925, "total_steps": 40000, "loss": 0.0834, "lr": 2.1236595604796624e-05, "epoch": 3.5767191451178726, "percentage": 54.81, "elapsed_time": "5:00:51", "remaining_time": "4:08:01", "throughput": 2338.93, "total_tokens": 42220128} {"current_steps": 21930, "total_steps": 40000, "loss": 0.0892, "lr": 2.1226890293233693e-05, "epoch": 3.57753487233869, "percentage": 54.83, "elapsed_time": "5:00:53", "remaining_time": "4:07:55", "throughput": 2339.16, "total_tokens": 42229072} {"current_steps": 21935, "total_steps": 40000, "loss": 0.0829, "lr": 2.1217185563531694e-05, "epoch": 3.5783505995595073, "percentage": 54.84, "elapsed_time": "5:00:55", "remaining_time": "4:07:49", "throughput": 2339.44, "total_tokens": 42239088} {"current_steps": 21940, "total_steps": 40000, "loss": 0.0577, "lr": 2.120748141718721e-05, "epoch": 3.5791663267803244, "percentage": 54.85, "elapsed_time": "5:00:57", "remaining_time": "4:07:43", "throughput": 2339.72, "total_tokens": 42248864} {"current_steps": 21945, "total_steps": 40000, "loss": 0.1177, "lr": 2.1197777855696765e-05, "epoch": 3.579982054001142, "percentage": 54.86, "elapsed_time": "5:00:59", "remaining_time": "4:07:38", "throughput": 2339.89, "total_tokens": 42256960} {"current_steps": 21950, "total_steps": 40000, "loss": 0.0031, "lr": 2.1188074880556746e-05, "epoch": 3.5807977812219596, "percentage": 54.87, "elapsed_time": "5:01:01", "remaining_time": "4:07:32", "throughput": 2340.14, "total_tokens": 42266208} {"current_steps": 21955, "total_steps": 40000, "loss": 0.0876, "lr": 2.1178372493263495e-05, "epoch": 3.5816135084427767, "percentage": 54.89, "elapsed_time": "5:01:03", "remaining_time": "4:07:26", "throughput": 2340.41, "total_tokens": 42276016} {"current_steps": 21960, "total_steps": 40000, "loss": 0.0246, "lr": 2.116867069531322e-05, "epoch": 3.582429235663594, "percentage": 54.9, "elapsed_time": "5:01:05", "remaining_time": "4:07:20", "throughput": 2340.7, "total_tokens": 42286128} {"current_steps": 21965, "total_steps": 40000, "loss": 0.0439, "lr": 2.1158969488202073e-05, "epoch": 3.5832449628844114, "percentage": 54.91, "elapsed_time": "5:01:07", "remaining_time": "4:07:14", "throughput": 2340.93, "total_tokens": 42295120} {"current_steps": 21970, "total_steps": 40000, "loss": 0.0942, "lr": 2.114926887342611e-05, "epoch": 3.584060690105229, "percentage": 54.93, "elapsed_time": "5:01:09", "remaining_time": "4:07:09", "throughput": 2341.24, "total_tokens": 42305456} {"current_steps": 21975, "total_steps": 40000, "loss": 0.0026, "lr": 2.113956885248127e-05, "epoch": 3.584876417326046, "percentage": 54.94, "elapsed_time": "5:01:11", "remaining_time": "4:07:03", "throughput": 2341.48, "total_tokens": 42314688} {"current_steps": 21980, "total_steps": 40000, "loss": 0.0015, "lr": 2.112986942686342e-05, "epoch": 3.5856921445468637, "percentage": 54.95, "elapsed_time": "5:01:13", "remaining_time": "4:06:57", "throughput": 2341.73, "total_tokens": 42323936} {"current_steps": 21985, "total_steps": 40000, "loss": 0.001, "lr": 2.112017059806835e-05, "epoch": 3.586507871767681, "percentage": 54.96, "elapsed_time": "5:01:15", "remaining_time": "4:06:51", "throughput": 2341.93, "total_tokens": 42332496} {"current_steps": 21990, "total_steps": 40000, "loss": 0.1494, "lr": 2.1110472367591724e-05, "epoch": 3.5873235989884984, "percentage": 54.97, "elapsed_time": "5:01:17", "remaining_time": "4:06:46", "throughput": 2342.21, "total_tokens": 42342432} {"current_steps": 21995, "total_steps": 40000, "loss": 0.009, "lr": 2.1100774736929145e-05, "epoch": 3.5881393262093155, "percentage": 54.99, "elapsed_time": "5:01:20", "remaining_time": "4:06:40", "throughput": 2342.45, "total_tokens": 42351584} {"current_steps": 22000, "total_steps": 40000, "loss": 0.0054, "lr": 2.10910777075761e-05, "epoch": 3.588955053430133, "percentage": 55.0, "elapsed_time": "5:01:22", "remaining_time": "4:06:34", "throughput": 2342.72, "total_tokens": 42361392} {"current_steps": 22000, "total_steps": 40000, "eval_loss": 0.2167072296142578, "epoch": 3.588955053430133, "percentage": 55.0, "elapsed_time": "5:02:42", "remaining_time": "4:07:40", "throughput": 2332.3, "total_tokens": 42361392} {"current_steps": 22005, "total_steps": 40000, "loss": 0.0014, "lr": 2.108138128102799e-05, "epoch": 3.5897707806509502, "percentage": 55.01, "elapsed_time": "5:02:46", "remaining_time": "4:07:36", "throughput": 2332.36, "total_tokens": 42371904} {"current_steps": 22010, "total_steps": 40000, "loss": 0.0065, "lr": 2.107168545878014e-05, "epoch": 3.590586507871768, "percentage": 55.02, "elapsed_time": "5:02:49", "remaining_time": "4:07:30", "throughput": 2332.58, "total_tokens": 42380688} {"current_steps": 22015, "total_steps": 40000, "loss": 0.1269, "lr": 2.106199024232775e-05, "epoch": 3.591402235092585, "percentage": 55.04, "elapsed_time": "5:02:51", "remaining_time": "4:07:24", "throughput": 2332.87, "total_tokens": 42390864} {"current_steps": 22020, "total_steps": 40000, "loss": 0.0018, "lr": 2.105229563316595e-05, "epoch": 3.5922179623134025, "percentage": 55.05, "elapsed_time": "5:02:53", "remaining_time": "4:07:18", "throughput": 2333.09, "total_tokens": 42399696} {"current_steps": 22025, "total_steps": 40000, "loss": 0.0014, "lr": 2.1042601632789784e-05, "epoch": 3.5930336895342196, "percentage": 55.06, "elapsed_time": "5:02:55", "remaining_time": "4:07:13", "throughput": 2333.36, "total_tokens": 42409360} {"current_steps": 22030, "total_steps": 40000, "loss": 0.0055, "lr": 2.103290824269417e-05, "epoch": 3.593849416755037, "percentage": 55.07, "elapsed_time": "5:02:57", "remaining_time": "4:07:07", "throughput": 2333.63, "total_tokens": 42419168} {"current_steps": 22035, "total_steps": 40000, "loss": 0.1788, "lr": 2.1023215464373965e-05, "epoch": 3.5946651439758543, "percentage": 55.09, "elapsed_time": "5:02:59", "remaining_time": "4:07:01", "throughput": 2333.93, "total_tokens": 42429456} {"current_steps": 22040, "total_steps": 40000, "loss": 0.0042, "lr": 2.1013523299323908e-05, "epoch": 3.595480871196672, "percentage": 55.1, "elapsed_time": "5:03:01", "remaining_time": "4:06:55", "throughput": 2334.25, "total_tokens": 42440112} {"current_steps": 22045, "total_steps": 40000, "loss": 0.1266, "lr": 2.1003831749038654e-05, "epoch": 3.596296598417489, "percentage": 55.11, "elapsed_time": "5:03:03", "remaining_time": "4:06:49", "throughput": 2334.56, "total_tokens": 42450576} {"current_steps": 22050, "total_steps": 40000, "loss": 0.0285, "lr": 2.099414081501277e-05, "epoch": 3.5971123256383066, "percentage": 55.12, "elapsed_time": "5:03:05", "remaining_time": "4:06:44", "throughput": 2334.79, "total_tokens": 42459488} {"current_steps": 22055, "total_steps": 40000, "loss": 0.0017, "lr": 2.09844504987407e-05, "epoch": 3.5979280528591238, "percentage": 55.14, "elapsed_time": "5:03:07", "remaining_time": "4:06:38", "throughput": 2334.95, "total_tokens": 42467280} {"current_steps": 22060, "total_steps": 40000, "loss": 0.0513, "lr": 2.097476080171683e-05, "epoch": 3.5987437800799413, "percentage": 55.15, "elapsed_time": "5:03:09", "remaining_time": "4:06:32", "throughput": 2335.22, "total_tokens": 42477104} {"current_steps": 22065, "total_steps": 40000, "loss": 0.0022, "lr": 2.0965071725435436e-05, "epoch": 3.5995595073007585, "percentage": 55.16, "elapsed_time": "5:03:11", "remaining_time": "4:06:26", "throughput": 2335.5, "total_tokens": 42486960} {"current_steps": 22070, "total_steps": 40000, "loss": 0.0255, "lr": 2.0955383271390684e-05, "epoch": 3.600375234521576, "percentage": 55.17, "elapsed_time": "5:03:13", "remaining_time": "4:06:20", "throughput": 2335.68, "total_tokens": 42495104} {"current_steps": 22075, "total_steps": 40000, "loss": 0.0534, "lr": 2.094569544107666e-05, "epoch": 3.601190961742393, "percentage": 55.19, "elapsed_time": "5:03:15", "remaining_time": "4:06:15", "throughput": 2335.88, "total_tokens": 42503664} {"current_steps": 22080, "total_steps": 40000, "loss": 0.0032, "lr": 2.093600823598735e-05, "epoch": 3.6020066889632107, "percentage": 55.2, "elapsed_time": "5:03:18", "remaining_time": "4:06:09", "throughput": 2336.16, "total_tokens": 42513584} {"current_steps": 22085, "total_steps": 40000, "loss": 0.0828, "lr": 2.092632165761663e-05, "epoch": 3.6028224161840283, "percentage": 55.21, "elapsed_time": "5:03:20", "remaining_time": "4:06:03", "throughput": 2336.48, "total_tokens": 42524240} {"current_steps": 22090, "total_steps": 40000, "loss": 0.0745, "lr": 2.091663570745832e-05, "epoch": 3.6036381434048455, "percentage": 55.23, "elapsed_time": "5:03:22", "remaining_time": "4:05:57", "throughput": 2336.74, "total_tokens": 42533680} {"current_steps": 22095, "total_steps": 40000, "loss": 0.07, "lr": 2.0906950387006086e-05, "epoch": 3.6044538706256626, "percentage": 55.24, "elapsed_time": "5:03:24", "remaining_time": "4:05:52", "throughput": 2337.03, "total_tokens": 42543904} {"current_steps": 22100, "total_steps": 40000, "loss": 0.0352, "lr": 2.0897265697753543e-05, "epoch": 3.60526959784648, "percentage": 55.25, "elapsed_time": "5:03:26", "remaining_time": "4:05:46", "throughput": 2337.29, "total_tokens": 42553344} {"current_steps": 22105, "total_steps": 40000, "loss": 0.0015, "lr": 2.088758164119419e-05, "epoch": 3.6060853250672977, "percentage": 55.26, "elapsed_time": "5:03:28", "remaining_time": "4:05:40", "throughput": 2337.51, "total_tokens": 42562256} {"current_steps": 22110, "total_steps": 40000, "loss": 0.1651, "lr": 2.0877898218821428e-05, "epoch": 3.606901052288115, "percentage": 55.27, "elapsed_time": "5:03:30", "remaining_time": "4:05:34", "throughput": 2337.75, "total_tokens": 42571504} {"current_steps": 22115, "total_steps": 40000, "loss": 0.0011, "lr": 2.0868215432128565e-05, "epoch": 3.607716779508932, "percentage": 55.29, "elapsed_time": "5:03:32", "remaining_time": "4:05:28", "throughput": 2338.04, "total_tokens": 42581712} {"current_steps": 22120, "total_steps": 40000, "loss": 0.0201, "lr": 2.0858533282608796e-05, "epoch": 3.6085325067297496, "percentage": 55.3, "elapsed_time": "5:03:34", "remaining_time": "4:05:23", "throughput": 2338.31, "total_tokens": 42591440} {"current_steps": 22125, "total_steps": 40000, "loss": 0.0048, "lr": 2.084885177175524e-05, "epoch": 3.609348233950567, "percentage": 55.31, "elapsed_time": "5:03:36", "remaining_time": "4:05:17", "throughput": 2338.56, "total_tokens": 42600720} {"current_steps": 22130, "total_steps": 40000, "loss": 0.0015, "lr": 2.0839170901060917e-05, "epoch": 3.6101639611713843, "percentage": 55.33, "elapsed_time": "5:03:38", "remaining_time": "4:05:11", "throughput": 2338.87, "total_tokens": 42611248} {"current_steps": 22135, "total_steps": 40000, "loss": 0.0262, "lr": 2.082949067201872e-05, "epoch": 3.6109796883922014, "percentage": 55.34, "elapsed_time": "5:03:40", "remaining_time": "4:05:05", "throughput": 2339.1, "total_tokens": 42620304} {"current_steps": 22140, "total_steps": 40000, "loss": 0.0018, "lr": 2.0819811086121475e-05, "epoch": 3.611795415613019, "percentage": 55.35, "elapsed_time": "5:03:42", "remaining_time": "4:05:00", "throughput": 2339.36, "total_tokens": 42629936} {"current_steps": 22145, "total_steps": 40000, "loss": 0.0845, "lr": 2.08101321448619e-05, "epoch": 3.6126111428338366, "percentage": 55.36, "elapsed_time": "5:03:44", "remaining_time": "4:04:54", "throughput": 2339.65, "total_tokens": 42639968} {"current_steps": 22150, "total_steps": 40000, "loss": 0.001, "lr": 2.080045384973259e-05, "epoch": 3.6134268700546537, "percentage": 55.38, "elapsed_time": "5:03:47", "remaining_time": "4:04:48", "throughput": 2339.94, "total_tokens": 42650176} {"current_steps": 22155, "total_steps": 40000, "loss": 0.0032, "lr": 2.0790776202226082e-05, "epoch": 3.614242597275471, "percentage": 55.39, "elapsed_time": "5:03:49", "remaining_time": "4:04:42", "throughput": 2340.23, "total_tokens": 42660368} {"current_steps": 22160, "total_steps": 40000, "loss": 0.0175, "lr": 2.078109920383477e-05, "epoch": 3.6150583244962884, "percentage": 55.4, "elapsed_time": "5:03:51", "remaining_time": "4:04:37", "throughput": 2340.54, "total_tokens": 42670736} {"current_steps": 22165, "total_steps": 40000, "loss": 0.2362, "lr": 2.0771422856050978e-05, "epoch": 3.615874051717106, "percentage": 55.41, "elapsed_time": "5:03:53", "remaining_time": "4:04:31", "throughput": 2340.78, "total_tokens": 42679952} {"current_steps": 22170, "total_steps": 40000, "loss": 0.0029, "lr": 2.076174716036693e-05, "epoch": 3.616689778937923, "percentage": 55.43, "elapsed_time": "5:03:55", "remaining_time": "4:04:25", "throughput": 2341.02, "total_tokens": 42689216} {"current_steps": 22175, "total_steps": 40000, "loss": 0.001, "lr": 2.075207211827472e-05, "epoch": 3.6175055061587407, "percentage": 55.44, "elapsed_time": "5:03:57", "remaining_time": "4:04:19", "throughput": 2341.31, "total_tokens": 42699392} {"current_steps": 22180, "total_steps": 40000, "loss": 0.223, "lr": 2.074239773126638e-05, "epoch": 3.618321233379558, "percentage": 55.45, "elapsed_time": "5:03:59", "remaining_time": "4:04:14", "throughput": 2341.62, "total_tokens": 42709920} {"current_steps": 22185, "total_steps": 40000, "loss": 0.0013, "lr": 2.073272400083382e-05, "epoch": 3.6191369606003754, "percentage": 55.46, "elapsed_time": "5:04:01", "remaining_time": "4:04:08", "throughput": 2341.88, "total_tokens": 42719616} {"current_steps": 22190, "total_steps": 40000, "loss": 0.0818, "lr": 2.072305092846883e-05, "epoch": 3.6199526878211925, "percentage": 55.47, "elapsed_time": "5:04:03", "remaining_time": "4:04:02", "throughput": 2342.11, "total_tokens": 42728624} {"current_steps": 22195, "total_steps": 40000, "loss": 0.1258, "lr": 2.0713378515663152e-05, "epoch": 3.62076841504201, "percentage": 55.49, "elapsed_time": "5:04:05", "remaining_time": "4:03:56", "throughput": 2342.35, "total_tokens": 42737840} {"current_steps": 22200, "total_steps": 40000, "loss": 0.0047, "lr": 2.070370676390836e-05, "epoch": 3.621584142262827, "percentage": 55.5, "elapsed_time": "5:04:07", "remaining_time": "4:03:51", "throughput": 2342.56, "total_tokens": 42746416} {"current_steps": 22200, "total_steps": 40000, "eval_loss": 0.22714954614639282, "epoch": 3.621584142262827, "percentage": 55.5, "elapsed_time": "5:05:28", "remaining_time": "4:04:55", "throughput": 2332.23, "total_tokens": 42746416} {"current_steps": 22205, "total_steps": 40000, "loss": 0.0309, "lr": 2.0694035674695974e-05, "epoch": 3.622399869483645, "percentage": 55.51, "elapsed_time": "5:05:32", "remaining_time": "4:04:51", "throughput": 2332.29, "total_tokens": 42756288} {"current_steps": 22210, "total_steps": 40000, "loss": 0.0294, "lr": 2.0684365249517416e-05, "epoch": 3.623215596704462, "percentage": 55.53, "elapsed_time": "5:05:34", "remaining_time": "4:04:45", "throughput": 2332.59, "total_tokens": 42766688} {"current_steps": 22215, "total_steps": 40000, "loss": 0.1816, "lr": 2.067469548986396e-05, "epoch": 3.6240313239252795, "percentage": 55.54, "elapsed_time": "5:05:36", "remaining_time": "4:04:39", "throughput": 2332.81, "total_tokens": 42775520} {"current_steps": 22220, "total_steps": 40000, "loss": 0.0043, "lr": 2.066502639722681e-05, "epoch": 3.6248470511460966, "percentage": 55.55, "elapsed_time": "5:05:38", "remaining_time": "4:04:34", "throughput": 2333.09, "total_tokens": 42785360} {"current_steps": 22225, "total_steps": 40000, "loss": 0.0499, "lr": 2.065535797309708e-05, "epoch": 3.625662778366914, "percentage": 55.56, "elapsed_time": "5:05:40", "remaining_time": "4:04:28", "throughput": 2333.32, "total_tokens": 42794432} {"current_steps": 22230, "total_steps": 40000, "loss": 0.0181, "lr": 2.0645690218965736e-05, "epoch": 3.6264785055877313, "percentage": 55.57, "elapsed_time": "5:05:42", "remaining_time": "4:04:22", "throughput": 2333.59, "total_tokens": 42804272} {"current_steps": 22235, "total_steps": 40000, "loss": 0.1281, "lr": 2.063602313632369e-05, "epoch": 3.627294232808549, "percentage": 55.59, "elapsed_time": "5:05:44", "remaining_time": "4:04:16", "throughput": 2333.89, "total_tokens": 42814544} {"current_steps": 22240, "total_steps": 40000, "loss": 0.0652, "lr": 2.0626356726661704e-05, "epoch": 3.628109960029366, "percentage": 55.6, "elapsed_time": "5:05:46", "remaining_time": "4:04:11", "throughput": 2334.16, "total_tokens": 42824496} {"current_steps": 22245, "total_steps": 40000, "loss": 0.0041, "lr": 2.0616690991470477e-05, "epoch": 3.6289256872501836, "percentage": 55.61, "elapsed_time": "5:05:48", "remaining_time": "4:04:05", "throughput": 2334.42, "total_tokens": 42834032} {"current_steps": 22250, "total_steps": 40000, "loss": 0.1532, "lr": 2.0607025932240595e-05, "epoch": 3.6297414144710007, "percentage": 55.62, "elapsed_time": "5:05:50", "remaining_time": "4:03:59", "throughput": 2334.56, "total_tokens": 42841344} {"current_steps": 22255, "total_steps": 40000, "loss": 0.084, "lr": 2.059736155046251e-05, "epoch": 3.6305571416918183, "percentage": 55.64, "elapsed_time": "5:05:53", "remaining_time": "4:03:53", "throughput": 2334.75, "total_tokens": 42849776} {"current_steps": 22260, "total_steps": 40000, "loss": 0.028, "lr": 2.0587697847626603e-05, "epoch": 3.631372868912636, "percentage": 55.65, "elapsed_time": "5:05:55", "remaining_time": "4:03:47", "throughput": 2335.01, "total_tokens": 42859280} {"current_steps": 22265, "total_steps": 40000, "loss": 0.0014, "lr": 2.057803482522314e-05, "epoch": 3.632188596133453, "percentage": 55.66, "elapsed_time": "5:05:57", "remaining_time": "4:03:42", "throughput": 2335.26, "total_tokens": 42868800} {"current_steps": 22270, "total_steps": 40000, "loss": 0.2009, "lr": 2.056837248474227e-05, "epoch": 3.63300432335427, "percentage": 55.67, "elapsed_time": "5:05:59", "remaining_time": "4:03:36", "throughput": 2335.55, "total_tokens": 42878848} {"current_steps": 22275, "total_steps": 40000, "loss": 0.0455, "lr": 2.0558710827674064e-05, "epoch": 3.6338200505750877, "percentage": 55.69, "elapsed_time": "5:06:01", "remaining_time": "4:03:30", "throughput": 2335.76, "total_tokens": 42887648} {"current_steps": 22280, "total_steps": 40000, "loss": 0.0038, "lr": 2.054904985550845e-05, "epoch": 3.6346357777959053, "percentage": 55.7, "elapsed_time": "5:06:03", "remaining_time": "4:03:24", "throughput": 2336.03, "total_tokens": 42897392} {"current_steps": 22285, "total_steps": 40000, "loss": 0.0021, "lr": 2.0539389569735287e-05, "epoch": 3.6354515050167224, "percentage": 55.71, "elapsed_time": "5:06:05", "remaining_time": "4:03:19", "throughput": 2336.26, "total_tokens": 42906384} {"current_steps": 22290, "total_steps": 40000, "loss": 0.0015, "lr": 2.052972997184431e-05, "epoch": 3.6362672322375396, "percentage": 55.73, "elapsed_time": "5:06:07", "remaining_time": "4:03:13", "throughput": 2336.46, "total_tokens": 42915072} {"current_steps": 22295, "total_steps": 40000, "loss": 0.0013, "lr": 2.0520071063325146e-05, "epoch": 3.637082959458357, "percentage": 55.74, "elapsed_time": "5:06:09", "remaining_time": "4:03:07", "throughput": 2336.69, "total_tokens": 42924032} {"current_steps": 22300, "total_steps": 40000, "loss": 0.1234, "lr": 2.051041284566732e-05, "epoch": 3.6378986866791747, "percentage": 55.75, "elapsed_time": "5:06:11", "remaining_time": "4:03:01", "throughput": 2336.92, "total_tokens": 42933168} {"current_steps": 22305, "total_steps": 40000, "loss": 0.0032, "lr": 2.050075532036026e-05, "epoch": 3.638714413899992, "percentage": 55.76, "elapsed_time": "5:06:13", "remaining_time": "4:02:56", "throughput": 2337.19, "total_tokens": 42942944} {"current_steps": 22310, "total_steps": 40000, "loss": 0.0576, "lr": 2.0491098488893264e-05, "epoch": 3.639530141120809, "percentage": 55.77, "elapsed_time": "5:06:15", "remaining_time": "4:02:50", "throughput": 2337.45, "total_tokens": 42952464} {"current_steps": 22315, "total_steps": 40000, "loss": 0.1142, "lr": 2.0481442352755546e-05, "epoch": 3.6403458683416265, "percentage": 55.79, "elapsed_time": "5:06:17", "remaining_time": "4:02:44", "throughput": 2337.75, "total_tokens": 42962880} {"current_steps": 22320, "total_steps": 40000, "loss": 0.0106, "lr": 2.0471786913436198e-05, "epoch": 3.641161595562444, "percentage": 55.8, "elapsed_time": "5:06:19", "remaining_time": "4:02:39", "throughput": 2338.04, "total_tokens": 42972976} {"current_steps": 22325, "total_steps": 40000, "loss": 0.0059, "lr": 2.0462132172424218e-05, "epoch": 3.6419773227832613, "percentage": 55.81, "elapsed_time": "5:06:22", "remaining_time": "4:02:33", "throughput": 2338.32, "total_tokens": 42983024} {"current_steps": 22330, "total_steps": 40000, "loss": 0.1628, "lr": 2.0452478131208484e-05, "epoch": 3.6427930500040784, "percentage": 55.83, "elapsed_time": "5:06:24", "remaining_time": "4:02:27", "throughput": 2338.63, "total_tokens": 42993472} {"current_steps": 22335, "total_steps": 40000, "loss": 0.0712, "lr": 2.0442824791277765e-05, "epoch": 3.643608777224896, "percentage": 55.84, "elapsed_time": "5:06:26", "remaining_time": "4:02:21", "throughput": 2338.9, "total_tokens": 43003344} {"current_steps": 22340, "total_steps": 40000, "loss": 0.0047, "lr": 2.0433172154120727e-05, "epoch": 3.6444245044457135, "percentage": 55.85, "elapsed_time": "5:06:28", "remaining_time": "4:02:16", "throughput": 2339.11, "total_tokens": 43012080} {"current_steps": 22345, "total_steps": 40000, "loss": 0.0621, "lr": 2.0423520221225947e-05, "epoch": 3.6452402316665307, "percentage": 55.86, "elapsed_time": "5:06:30", "remaining_time": "4:02:10", "throughput": 2339.28, "total_tokens": 43020128} {"current_steps": 22350, "total_steps": 40000, "loss": 0.0013, "lr": 2.0413868994081848e-05, "epoch": 3.6460559588873482, "percentage": 55.88, "elapsed_time": "5:06:32", "remaining_time": "4:02:04", "throughput": 2339.54, "total_tokens": 43029712} {"current_steps": 22355, "total_steps": 40000, "loss": 0.0053, "lr": 2.0404218474176795e-05, "epoch": 3.6468716861081654, "percentage": 55.89, "elapsed_time": "5:06:34", "remaining_time": "4:01:58", "throughput": 2339.78, "total_tokens": 43039024} {"current_steps": 22360, "total_steps": 40000, "loss": 0.0105, "lr": 2.0394568662999002e-05, "epoch": 3.647687413328983, "percentage": 55.9, "elapsed_time": "5:06:36", "remaining_time": "4:01:53", "throughput": 2340.05, "total_tokens": 43048816} {"current_steps": 22365, "total_steps": 40000, "loss": 0.0187, "lr": 2.0384919562036593e-05, "epoch": 3.6485031405498, "percentage": 55.91, "elapsed_time": "5:06:38", "remaining_time": "4:01:47", "throughput": 2340.35, "total_tokens": 43059136} {"current_steps": 22370, "total_steps": 40000, "loss": 0.0012, "lr": 2.0375271172777593e-05, "epoch": 3.6493188677706176, "percentage": 55.93, "elapsed_time": "5:06:40", "remaining_time": "4:01:41", "throughput": 2340.56, "total_tokens": 43067904} {"current_steps": 22375, "total_steps": 40000, "loss": 0.0017, "lr": 2.0365623496709885e-05, "epoch": 3.6501345949914348, "percentage": 55.94, "elapsed_time": "5:06:42", "remaining_time": "4:01:36", "throughput": 2340.84, "total_tokens": 43077776} {"current_steps": 22380, "total_steps": 40000, "loss": 0.0052, "lr": 2.0355976535321283e-05, "epoch": 3.6509503222122524, "percentage": 55.95, "elapsed_time": "5:06:44", "remaining_time": "4:01:30", "throughput": 2341.12, "total_tokens": 43087824} {"current_steps": 22385, "total_steps": 40000, "loss": 0.0052, "lr": 2.034633029009945e-05, "epoch": 3.6517660494330695, "percentage": 55.96, "elapsed_time": "5:06:46", "remaining_time": "4:01:24", "throughput": 2341.39, "total_tokens": 43097760} {"current_steps": 22390, "total_steps": 40000, "loss": 0.0965, "lr": 2.0336684762531972e-05, "epoch": 3.652581776653887, "percentage": 55.97, "elapsed_time": "5:06:48", "remaining_time": "4:01:18", "throughput": 2341.65, "total_tokens": 43107264} {"current_steps": 22395, "total_steps": 40000, "loss": 0.0172, "lr": 2.032703995410631e-05, "epoch": 3.653397503874704, "percentage": 55.99, "elapsed_time": "5:06:51", "remaining_time": "4:01:13", "throughput": 2341.86, "total_tokens": 43115984} {"current_steps": 22400, "total_steps": 40000, "loss": 0.0009, "lr": 2.031739586630981e-05, "epoch": 3.6542132310955218, "percentage": 56.0, "elapsed_time": "5:06:53", "remaining_time": "4:01:07", "throughput": 2342.16, "total_tokens": 43126400} {"current_steps": 22400, "total_steps": 40000, "eval_loss": 0.25593921542167664, "epoch": 3.6542132310955218, "percentage": 56.0, "elapsed_time": "5:08:13", "remaining_time": "4:02:10", "throughput": 2331.92, "total_tokens": 43126400} {"current_steps": 22405, "total_steps": 40000, "loss": 0.045, "lr": 2.0307752500629707e-05, "epoch": 3.655028958316339, "percentage": 56.01, "elapsed_time": "5:08:17", "remaining_time": "4:02:06", "throughput": 2332.01, "total_tokens": 43136672} {"current_steps": 22410, "total_steps": 40000, "loss": 0.089, "lr": 2.0298109858553144e-05, "epoch": 3.6558446855371565, "percentage": 56.03, "elapsed_time": "5:08:19", "remaining_time": "4:02:00", "throughput": 2332.21, "total_tokens": 43145216} {"current_steps": 22415, "total_steps": 40000, "loss": 0.0026, "lr": 2.028846794156712e-05, "epoch": 3.6566604127579736, "percentage": 56.04, "elapsed_time": "5:08:21", "remaining_time": "4:01:55", "throughput": 2332.44, "total_tokens": 43154480} {"current_steps": 22420, "total_steps": 40000, "loss": 0.0238, "lr": 2.027882675115856e-05, "epoch": 3.657476139978791, "percentage": 56.05, "elapsed_time": "5:08:23", "remaining_time": "4:01:49", "throughput": 2332.72, "total_tokens": 43164496} {"current_steps": 22425, "total_steps": 40000, "loss": 0.0253, "lr": 2.026918628881423e-05, "epoch": 3.6582918671996083, "percentage": 56.06, "elapsed_time": "5:08:25", "remaining_time": "4:01:43", "throughput": 2332.99, "total_tokens": 43174192} {"current_steps": 22430, "total_steps": 40000, "loss": 0.0619, "lr": 2.0259546556020833e-05, "epoch": 3.659107594420426, "percentage": 56.07, "elapsed_time": "5:08:28", "remaining_time": "4:01:37", "throughput": 2333.29, "total_tokens": 43184688} {"current_steps": 22435, "total_steps": 40000, "loss": 0.1641, "lr": 2.024990755426493e-05, "epoch": 3.659923321641243, "percentage": 56.09, "elapsed_time": "5:08:30", "remaining_time": "4:01:32", "throughput": 2333.56, "total_tokens": 43194448} {"current_steps": 22440, "total_steps": 40000, "loss": 0.0006, "lr": 2.0240269285032975e-05, "epoch": 3.6607390488620606, "percentage": 56.1, "elapsed_time": "5:08:32", "remaining_time": "4:01:26", "throughput": 2333.87, "total_tokens": 43205184} {"current_steps": 22445, "total_steps": 40000, "loss": 0.0904, "lr": 2.0230631749811306e-05, "epoch": 3.6615547760828777, "percentage": 56.11, "elapsed_time": "5:08:34", "remaining_time": "4:01:20", "throughput": 2334.12, "total_tokens": 43214528} {"current_steps": 22450, "total_steps": 40000, "loss": 0.103, "lr": 2.0220994950086162e-05, "epoch": 3.6623705033036953, "percentage": 56.12, "elapsed_time": "5:08:36", "remaining_time": "4:01:14", "throughput": 2334.36, "total_tokens": 43223808} {"current_steps": 22455, "total_steps": 40000, "loss": 0.0417, "lr": 2.021135888734365e-05, "epoch": 3.663186230524513, "percentage": 56.14, "elapsed_time": "5:08:38", "remaining_time": "4:01:09", "throughput": 2334.63, "total_tokens": 43233712} {"current_steps": 22460, "total_steps": 40000, "loss": 0.001, "lr": 2.0201723563069783e-05, "epoch": 3.66400195774533, "percentage": 56.15, "elapsed_time": "5:08:40", "remaining_time": "4:01:03", "throughput": 2334.87, "total_tokens": 43243008} {"current_steps": 22465, "total_steps": 40000, "loss": 0.0694, "lr": 2.0192088978750433e-05, "epoch": 3.664817684966147, "percentage": 56.16, "elapsed_time": "5:08:42", "remaining_time": "4:00:57", "throughput": 2335.17, "total_tokens": 43253344} {"current_steps": 22470, "total_steps": 40000, "loss": 0.0007, "lr": 2.0182455135871385e-05, "epoch": 3.6656334121869647, "percentage": 56.17, "elapsed_time": "5:08:44", "remaining_time": "4:00:52", "throughput": 2335.37, "total_tokens": 43261904} {"current_steps": 22475, "total_steps": 40000, "loss": 0.0587, "lr": 2.0172822035918305e-05, "epoch": 3.6664491394077823, "percentage": 56.19, "elapsed_time": "5:08:46", "remaining_time": "4:00:46", "throughput": 2335.6, "total_tokens": 43271040} {"current_steps": 22480, "total_steps": 40000, "loss": 0.0023, "lr": 2.016318968037671e-05, "epoch": 3.6672648666285994, "percentage": 56.2, "elapsed_time": "5:08:48", "remaining_time": "4:00:40", "throughput": 2335.89, "total_tokens": 43281248} {"current_steps": 22485, "total_steps": 40000, "loss": 0.01, "lr": 2.015355807073206e-05, "epoch": 3.6680805938494165, "percentage": 56.21, "elapsed_time": "5:08:50", "remaining_time": "4:00:34", "throughput": 2336.07, "total_tokens": 43289312} {"current_steps": 22490, "total_steps": 40000, "loss": 0.1259, "lr": 2.0143927208469664e-05, "epoch": 3.668896321070234, "percentage": 56.23, "elapsed_time": "5:08:52", "remaining_time": "4:00:29", "throughput": 2336.38, "total_tokens": 43300016} {"current_steps": 22495, "total_steps": 40000, "loss": 0.0021, "lr": 2.0134297095074708e-05, "epoch": 3.6697120482910517, "percentage": 56.24, "elapsed_time": "5:08:55", "remaining_time": "4:00:23", "throughput": 2336.69, "total_tokens": 43310560} {"current_steps": 22500, "total_steps": 40000, "loss": 0.0684, "lr": 2.0124667732032297e-05, "epoch": 3.670527775511869, "percentage": 56.25, "elapsed_time": "5:08:57", "remaining_time": "4:00:17", "throughput": 2336.95, "total_tokens": 43320272} {"current_steps": 22505, "total_steps": 40000, "loss": 0.0013, "lr": 2.011503912082738e-05, "epoch": 3.671343502732686, "percentage": 56.26, "elapsed_time": "5:08:59", "remaining_time": "4:00:12", "throughput": 2337.18, "total_tokens": 43329264} {"current_steps": 22510, "total_steps": 40000, "loss": 0.0796, "lr": 2.0105411262944823e-05, "epoch": 3.6721592299535035, "percentage": 56.27, "elapsed_time": "5:09:01", "remaining_time": "4:00:06", "throughput": 2337.45, "total_tokens": 43339136} {"current_steps": 22515, "total_steps": 40000, "loss": 0.0328, "lr": 2.0095784159869366e-05, "epoch": 3.672974957174321, "percentage": 56.29, "elapsed_time": "5:09:03", "remaining_time": "4:00:00", "throughput": 2337.67, "total_tokens": 43348160} {"current_steps": 22520, "total_steps": 40000, "loss": 0.001, "lr": 2.0086157813085608e-05, "epoch": 3.6737906843951382, "percentage": 56.3, "elapsed_time": "5:09:05", "remaining_time": "3:59:54", "throughput": 2337.88, "total_tokens": 43356848} {"current_steps": 22525, "total_steps": 40000, "loss": 0.0069, "lr": 2.0076532224078068e-05, "epoch": 3.6746064116159554, "percentage": 56.31, "elapsed_time": "5:09:07", "remaining_time": "3:59:49", "throughput": 2338.16, "total_tokens": 43366944} {"current_steps": 22530, "total_steps": 40000, "loss": 0.1037, "lr": 2.0066907394331142e-05, "epoch": 3.675422138836773, "percentage": 56.33, "elapsed_time": "5:09:09", "remaining_time": "3:59:43", "throughput": 2338.44, "total_tokens": 43377024} {"current_steps": 22535, "total_steps": 40000, "loss": 0.0041, "lr": 2.0057283325329077e-05, "epoch": 3.6762378660575905, "percentage": 56.34, "elapsed_time": "5:09:11", "remaining_time": "3:59:37", "throughput": 2338.72, "total_tokens": 43386976} {"current_steps": 22540, "total_steps": 40000, "loss": 0.0681, "lr": 2.0047660018556047e-05, "epoch": 3.6770535932784076, "percentage": 56.35, "elapsed_time": "5:09:13", "remaining_time": "3:59:32", "throughput": 2338.97, "total_tokens": 43396496} {"current_steps": 22545, "total_steps": 40000, "loss": 0.1047, "lr": 2.0038037475496075e-05, "epoch": 3.677869320499225, "percentage": 56.36, "elapsed_time": "5:09:15", "remaining_time": "3:59:26", "throughput": 2339.25, "total_tokens": 43406592} {"current_steps": 22550, "total_steps": 40000, "loss": 0.1597, "lr": 2.0028415697633073e-05, "epoch": 3.6786850477200423, "percentage": 56.38, "elapsed_time": "5:09:17", "remaining_time": "3:59:20", "throughput": 2339.49, "total_tokens": 43415984} {"current_steps": 22555, "total_steps": 40000, "loss": 0.0872, "lr": 2.0018794686450858e-05, "epoch": 3.67950077494086, "percentage": 56.39, "elapsed_time": "5:09:19", "remaining_time": "3:59:15", "throughput": 2339.66, "total_tokens": 43423888} {"current_steps": 22560, "total_steps": 40000, "loss": 0.0004, "lr": 2.0009174443433088e-05, "epoch": 3.680316502161677, "percentage": 56.4, "elapsed_time": "5:09:21", "remaining_time": "3:59:09", "throughput": 2339.97, "total_tokens": 43434592} {"current_steps": 22565, "total_steps": 40000, "loss": 0.0864, "lr": 1.999955497006334e-05, "epoch": 3.6811322293824946, "percentage": 56.41, "elapsed_time": "5:09:24", "remaining_time": "3:59:03", "throughput": 2340.22, "total_tokens": 43443968} {"current_steps": 22570, "total_steps": 40000, "loss": 0.0816, "lr": 1.9989936267825067e-05, "epoch": 3.6819479566033118, "percentage": 56.43, "elapsed_time": "5:09:26", "remaining_time": "3:58:57", "throughput": 2340.47, "total_tokens": 43453488} {"current_steps": 22575, "total_steps": 40000, "loss": 0.1659, "lr": 1.9980318338201572e-05, "epoch": 3.6827636838241293, "percentage": 56.44, "elapsed_time": "5:09:28", "remaining_time": "3:58:52", "throughput": 2340.69, "total_tokens": 43462304} {"current_steps": 22580, "total_steps": 40000, "loss": 0.0027, "lr": 1.997070118267607e-05, "epoch": 3.6835794110449465, "percentage": 56.45, "elapsed_time": "5:09:30", "remaining_time": "3:58:46", "throughput": 2341.02, "total_tokens": 43473376} {"current_steps": 22585, "total_steps": 40000, "loss": 0.0714, "lr": 1.9961084802731654e-05, "epoch": 3.684395138265764, "percentage": 56.46, "elapsed_time": "5:09:32", "remaining_time": "3:58:40", "throughput": 2341.32, "total_tokens": 43483776} {"current_steps": 22590, "total_steps": 40000, "loss": 0.0019, "lr": 1.9951469199851273e-05, "epoch": 3.685210865486581, "percentage": 56.47, "elapsed_time": "5:09:34", "remaining_time": "3:58:35", "throughput": 2341.62, "total_tokens": 43494208} {"current_steps": 22595, "total_steps": 40000, "loss": 0.0505, "lr": 1.99418543755178e-05, "epoch": 3.6860265927073987, "percentage": 56.49, "elapsed_time": "5:09:36", "remaining_time": "3:58:29", "throughput": 2341.92, "total_tokens": 43504736} {"current_steps": 22600, "total_steps": 40000, "loss": 0.1059, "lr": 1.9932240331213936e-05, "epoch": 3.686842319928216, "percentage": 56.5, "elapsed_time": "5:09:38", "remaining_time": "3:58:23", "throughput": 2342.12, "total_tokens": 43513248} {"current_steps": 22600, "total_steps": 40000, "eval_loss": 0.22804485261440277, "epoch": 3.686842319928216, "percentage": 56.5, "elapsed_time": "5:10:59", "remaining_time": "3:59:26", "throughput": 2331.97, "total_tokens": 43513248} {"current_steps": 22605, "total_steps": 40000, "loss": 0.001, "lr": 1.9922627068422297e-05, "epoch": 3.6876580471490334, "percentage": 56.51, "elapsed_time": "5:11:03", "remaining_time": "3:59:21", "throughput": 2331.94, "total_tokens": 43521248} {"current_steps": 22610, "total_steps": 40000, "loss": 0.0669, "lr": 1.991301458862538e-05, "epoch": 3.6884737743698506, "percentage": 56.53, "elapsed_time": "5:11:05", "remaining_time": "3:59:15", "throughput": 2332.27, "total_tokens": 43532192} {"current_steps": 22615, "total_steps": 40000, "loss": 0.0282, "lr": 1.9903402893305536e-05, "epoch": 3.689289501590668, "percentage": 56.54, "elapsed_time": "5:11:07", "remaining_time": "3:59:10", "throughput": 2332.5, "total_tokens": 43541424} {"current_steps": 22620, "total_steps": 40000, "loss": 0.0053, "lr": 1.9893791983945016e-05, "epoch": 3.6901052288114853, "percentage": 56.55, "elapsed_time": "5:11:09", "remaining_time": "3:59:04", "throughput": 2332.83, "total_tokens": 43552416} {"current_steps": 22625, "total_steps": 40000, "loss": 0.0027, "lr": 1.988418186202594e-05, "epoch": 3.690920956032303, "percentage": 56.56, "elapsed_time": "5:11:11", "remaining_time": "3:58:58", "throughput": 2333.1, "total_tokens": 43562176} {"current_steps": 22630, "total_steps": 40000, "loss": 0.0682, "lr": 1.98745725290303e-05, "epoch": 3.6917366832531204, "percentage": 56.57, "elapsed_time": "5:11:13", "remaining_time": "3:58:53", "throughput": 2333.42, "total_tokens": 43572944} {"current_steps": 22635, "total_steps": 40000, "loss": 0.1211, "lr": 1.986496398644e-05, "epoch": 3.6925524104739376, "percentage": 56.59, "elapsed_time": "5:11:15", "remaining_time": "3:58:47", "throughput": 2333.61, "total_tokens": 43581360} {"current_steps": 22640, "total_steps": 40000, "loss": 0.0065, "lr": 1.9855356235736777e-05, "epoch": 3.6933681376947547, "percentage": 56.6, "elapsed_time": "5:11:17", "remaining_time": "3:58:41", "throughput": 2333.92, "total_tokens": 43592064} {"current_steps": 22645, "total_steps": 40000, "loss": 0.0499, "lr": 1.9845749278402277e-05, "epoch": 3.6941838649155723, "percentage": 56.61, "elapsed_time": "5:11:19", "remaining_time": "3:58:35", "throughput": 2334.2, "total_tokens": 43602064} {"current_steps": 22650, "total_steps": 40000, "loss": 0.0443, "lr": 1.9836143115918006e-05, "epoch": 3.69499959213639, "percentage": 56.62, "elapsed_time": "5:11:21", "remaining_time": "3:58:30", "throughput": 2334.4, "total_tokens": 43610592} {"current_steps": 22655, "total_steps": 40000, "loss": 0.0684, "lr": 1.9826537749765367e-05, "epoch": 3.695815319357207, "percentage": 56.64, "elapsed_time": "5:11:23", "remaining_time": "3:58:24", "throughput": 2334.64, "total_tokens": 43619968} {"current_steps": 22660, "total_steps": 40000, "loss": 0.0977, "lr": 1.9816933181425625e-05, "epoch": 3.696631046578024, "percentage": 56.65, "elapsed_time": "5:11:25", "remaining_time": "3:58:18", "throughput": 2334.9, "total_tokens": 43629616} {"current_steps": 22665, "total_steps": 40000, "loss": 0.0256, "lr": 1.9807329412379903e-05, "epoch": 3.6974467737988417, "percentage": 56.66, "elapsed_time": "5:11:27", "remaining_time": "3:58:13", "throughput": 2335.17, "total_tokens": 43639600} {"current_steps": 22670, "total_steps": 40000, "loss": 0.0067, "lr": 1.9797726444109247e-05, "epoch": 3.6982625010196593, "percentage": 56.67, "elapsed_time": "5:11:30", "remaining_time": "3:58:07", "throughput": 2335.45, "total_tokens": 43649568} {"current_steps": 22675, "total_steps": 40000, "loss": 0.0199, "lr": 1.9788124278094557e-05, "epoch": 3.6990782282404764, "percentage": 56.69, "elapsed_time": "5:11:32", "remaining_time": "3:58:01", "throughput": 2335.66, "total_tokens": 43658368} {"current_steps": 22680, "total_steps": 40000, "loss": 0.0012, "lr": 1.9778522915816594e-05, "epoch": 3.6998939554612935, "percentage": 56.7, "elapsed_time": "5:11:34", "remaining_time": "3:57:56", "throughput": 2335.88, "total_tokens": 43667888} {"current_steps": 22685, "total_steps": 40000, "loss": 0.1376, "lr": 1.9768922358756014e-05, "epoch": 3.700709682682111, "percentage": 56.71, "elapsed_time": "5:11:36", "remaining_time": "3:57:50", "throughput": 2336.15, "total_tokens": 43677776} {"current_steps": 22690, "total_steps": 40000, "loss": 0.0925, "lr": 1.9759322608393353e-05, "epoch": 3.7015254099029287, "percentage": 56.73, "elapsed_time": "5:11:38", "remaining_time": "3:57:44", "throughput": 2336.36, "total_tokens": 43686608} {"current_steps": 22695, "total_steps": 40000, "loss": 0.1578, "lr": 1.9749723666208992e-05, "epoch": 3.702341137123746, "percentage": 56.74, "elapsed_time": "5:11:40", "remaining_time": "3:57:39", "throughput": 2336.65, "total_tokens": 43696880} {"current_steps": 22700, "total_steps": 40000, "loss": 0.1091, "lr": 1.9740125533683235e-05, "epoch": 3.703156864344563, "percentage": 56.75, "elapsed_time": "5:11:42", "remaining_time": "3:57:33", "throughput": 2336.94, "total_tokens": 43707040} {"current_steps": 22705, "total_steps": 40000, "loss": 0.0966, "lr": 1.9730528212296208e-05, "epoch": 3.7039725915653805, "percentage": 56.76, "elapsed_time": "5:11:44", "remaining_time": "3:57:27", "throughput": 2337.24, "total_tokens": 43717568} {"current_steps": 22710, "total_steps": 40000, "loss": 0.0849, "lr": 1.9720931703527945e-05, "epoch": 3.704788318786198, "percentage": 56.77, "elapsed_time": "5:11:46", "remaining_time": "3:57:22", "throughput": 2337.48, "total_tokens": 43726800} {"current_steps": 22715, "total_steps": 40000, "loss": 0.059, "lr": 1.9711336008858373e-05, "epoch": 3.705604046007015, "percentage": 56.79, "elapsed_time": "5:11:48", "remaining_time": "3:57:16", "throughput": 2337.74, "total_tokens": 43736496} {"current_steps": 22720, "total_steps": 40000, "loss": 0.0626, "lr": 1.9701741129767233e-05, "epoch": 3.7064197732278323, "percentage": 56.8, "elapsed_time": "5:11:50", "remaining_time": "3:57:10", "throughput": 2338.03, "total_tokens": 43746816} {"current_steps": 22725, "total_steps": 40000, "loss": 0.1484, "lr": 1.9692147067734202e-05, "epoch": 3.70723550044865, "percentage": 56.81, "elapsed_time": "5:11:53", "remaining_time": "3:57:05", "throughput": 2338.25, "total_tokens": 43755696} {"current_steps": 22730, "total_steps": 40000, "loss": 0.0429, "lr": 1.96825538242388e-05, "epoch": 3.7080512276694675, "percentage": 56.83, "elapsed_time": "5:11:55", "remaining_time": "3:56:59", "throughput": 2338.52, "total_tokens": 43765600} {"current_steps": 22735, "total_steps": 40000, "loss": 0.0088, "lr": 1.967296140076041e-05, "epoch": 3.7088669548902846, "percentage": 56.84, "elapsed_time": "5:11:57", "remaining_time": "3:56:53", "throughput": 2338.75, "total_tokens": 43774832} {"current_steps": 22740, "total_steps": 40000, "loss": 0.0041, "lr": 1.966336979877833e-05, "epoch": 3.709682682111102, "percentage": 56.85, "elapsed_time": "5:11:59", "remaining_time": "3:56:48", "throughput": 2339.03, "total_tokens": 43784912} {"current_steps": 22745, "total_steps": 40000, "loss": 0.0504, "lr": 1.9653779019771678e-05, "epoch": 3.7104984093319193, "percentage": 56.86, "elapsed_time": "5:12:01", "remaining_time": "3:56:42", "throughput": 2339.2, "total_tokens": 43793024} {"current_steps": 22750, "total_steps": 40000, "loss": 0.0267, "lr": 1.9644189065219488e-05, "epoch": 3.711314136552737, "percentage": 56.88, "elapsed_time": "5:12:03", "remaining_time": "3:56:36", "throughput": 2339.47, "total_tokens": 43802896} {"current_steps": 22755, "total_steps": 40000, "loss": 0.0952, "lr": 1.9634599936600655e-05, "epoch": 3.712129863773554, "percentage": 56.89, "elapsed_time": "5:12:05", "remaining_time": "3:56:31", "throughput": 2339.7, "total_tokens": 43812032} {"current_steps": 22760, "total_steps": 40000, "loss": 0.0015, "lr": 1.9625011635393935e-05, "epoch": 3.7129455909943716, "percentage": 56.9, "elapsed_time": "5:12:07", "remaining_time": "3:56:25", "throughput": 2339.9, "total_tokens": 43820592} {"current_steps": 22765, "total_steps": 40000, "loss": 0.0492, "lr": 1.9615424163077963e-05, "epoch": 3.7137613182151887, "percentage": 56.91, "elapsed_time": "5:12:09", "remaining_time": "3:56:19", "throughput": 2340.12, "total_tokens": 43829648} {"current_steps": 22770, "total_steps": 40000, "loss": 0.0352, "lr": 1.9605837521131263e-05, "epoch": 3.7145770454360063, "percentage": 56.93, "elapsed_time": "5:12:11", "remaining_time": "3:56:14", "throughput": 2340.37, "total_tokens": 43839184} {"current_steps": 22775, "total_steps": 40000, "loss": 0.0028, "lr": 1.9596251711032192e-05, "epoch": 3.7153927726568234, "percentage": 56.94, "elapsed_time": "5:12:13", "remaining_time": "3:56:08", "throughput": 2340.67, "total_tokens": 43849488} {"current_steps": 22780, "total_steps": 40000, "loss": 0.0012, "lr": 1.958666673425903e-05, "epoch": 3.716208499877641, "percentage": 56.95, "elapsed_time": "5:12:15", "remaining_time": "3:56:02", "throughput": 2340.87, "total_tokens": 43858128} {"current_steps": 22785, "total_steps": 40000, "loss": 0.0059, "lr": 1.957708259228987e-05, "epoch": 3.717024227098458, "percentage": 56.96, "elapsed_time": "5:12:17", "remaining_time": "3:55:57", "throughput": 2341.2, "total_tokens": 43869136} {"current_steps": 22790, "total_steps": 40000, "loss": 0.034, "lr": 1.956749928660273e-05, "epoch": 3.7178399543192757, "percentage": 56.97, "elapsed_time": "5:12:19", "remaining_time": "3:55:51", "throughput": 2341.4, "total_tokens": 43877680} {"current_steps": 22795, "total_steps": 40000, "loss": 0.0015, "lr": 1.955791681867547e-05, "epoch": 3.718655681540093, "percentage": 56.99, "elapsed_time": "5:12:22", "remaining_time": "3:55:45", "throughput": 2341.63, "total_tokens": 43886960} {"current_steps": 22800, "total_steps": 40000, "loss": 0.1354, "lr": 1.9548335189985824e-05, "epoch": 3.7194714087609104, "percentage": 57.0, "elapsed_time": "5:12:24", "remaining_time": "3:55:40", "throughput": 2341.9, "total_tokens": 43896720} {"current_steps": 22800, "total_steps": 40000, "eval_loss": 0.22966504096984863, "epoch": 3.7194714087609104, "percentage": 57.0, "elapsed_time": "5:13:44", "remaining_time": "3:56:41", "throughput": 2331.84, "total_tokens": 43896720} {"current_steps": 22805, "total_steps": 40000, "loss": 0.0888, "lr": 1.9538754402011396e-05, "epoch": 3.7202871359817276, "percentage": 57.01, "elapsed_time": "5:13:48", "remaining_time": "3:56:36", "throughput": 2331.93, "total_tokens": 43906848} {"current_steps": 22810, "total_steps": 40000, "loss": 0.2289, "lr": 1.952917445622968e-05, "epoch": 3.721102863202545, "percentage": 57.03, "elapsed_time": "5:13:50", "remaining_time": "3:56:31", "throughput": 2332.17, "total_tokens": 43916304} {"current_steps": 22815, "total_steps": 40000, "loss": 0.0678, "lr": 1.9519595354118005e-05, "epoch": 3.7219185904233623, "percentage": 57.04, "elapsed_time": "5:13:52", "remaining_time": "3:56:25", "throughput": 2332.42, "total_tokens": 43925808} {"current_steps": 22820, "total_steps": 40000, "loss": 0.1333, "lr": 1.951001709715361e-05, "epoch": 3.72273431764418, "percentage": 57.05, "elapsed_time": "5:13:54", "remaining_time": "3:56:19", "throughput": 2332.67, "total_tokens": 43935408} {"current_steps": 22825, "total_steps": 40000, "loss": 0.0009, "lr": 1.9500439686813556e-05, "epoch": 3.7235500448649974, "percentage": 57.06, "elapsed_time": "5:13:56", "remaining_time": "3:56:14", "throughput": 2332.96, "total_tokens": 43945712} {"current_steps": 22830, "total_steps": 40000, "loss": 0.0929, "lr": 1.949086312457482e-05, "epoch": 3.7243657720858145, "percentage": 57.07, "elapsed_time": "5:13:58", "remaining_time": "3:56:08", "throughput": 2333.29, "total_tokens": 43956832} {"current_steps": 22835, "total_steps": 40000, "loss": 0.0079, "lr": 1.9481287411914223e-05, "epoch": 3.7251814993066317, "percentage": 57.09, "elapsed_time": "5:14:01", "remaining_time": "3:56:02", "throughput": 2333.55, "total_tokens": 43966512} {"current_steps": 22840, "total_steps": 40000, "loss": 0.0087, "lr": 1.9471712550308457e-05, "epoch": 3.7259972265274492, "percentage": 57.1, "elapsed_time": "5:14:03", "remaining_time": "3:55:57", "throughput": 2333.85, "total_tokens": 43977152} {"current_steps": 22845, "total_steps": 40000, "loss": 0.1391, "lr": 1.946213854123409e-05, "epoch": 3.726812953748267, "percentage": 57.11, "elapsed_time": "5:14:05", "remaining_time": "3:55:51", "throughput": 2334.1, "total_tokens": 43986592} {"current_steps": 22850, "total_steps": 40000, "loss": 0.0025, "lr": 1.9452565386167554e-05, "epoch": 3.727628680969084, "percentage": 57.12, "elapsed_time": "5:14:07", "remaining_time": "3:55:45", "throughput": 2334.35, "total_tokens": 43996080} {"current_steps": 22855, "total_steps": 40000, "loss": 0.0966, "lr": 1.9442993086585142e-05, "epoch": 3.728444408189901, "percentage": 57.14, "elapsed_time": "5:14:09", "remaining_time": "3:55:40", "throughput": 2334.57, "total_tokens": 44005232} {"current_steps": 22860, "total_steps": 40000, "loss": 0.0295, "lr": 1.9433421643963043e-05, "epoch": 3.7292601354107187, "percentage": 57.15, "elapsed_time": "5:14:11", "remaining_time": "3:55:34", "throughput": 2334.82, "total_tokens": 44014784} {"current_steps": 22865, "total_steps": 40000, "loss": 0.0734, "lr": 1.942385105977727e-05, "epoch": 3.7300758626315362, "percentage": 57.16, "elapsed_time": "5:14:13", "remaining_time": "3:55:28", "throughput": 2335.12, "total_tokens": 44025248} {"current_steps": 22870, "total_steps": 40000, "loss": 0.2226, "lr": 1.9414281335503743e-05, "epoch": 3.7308915898523534, "percentage": 57.17, "elapsed_time": "5:14:15", "remaining_time": "3:55:23", "throughput": 2335.4, "total_tokens": 44035328} {"current_steps": 22875, "total_steps": 40000, "loss": 0.1527, "lr": 1.9404712472618232e-05, "epoch": 3.7317073170731705, "percentage": 57.19, "elapsed_time": "5:14:17", "remaining_time": "3:55:17", "throughput": 2335.61, "total_tokens": 44044192} {"current_steps": 22880, "total_steps": 40000, "loss": 0.1022, "lr": 1.939514447259636e-05, "epoch": 3.732523044293988, "percentage": 57.2, "elapsed_time": "5:14:19", "remaining_time": "3:55:11", "throughput": 2335.86, "total_tokens": 44053712} {"current_steps": 22885, "total_steps": 40000, "loss": 0.0026, "lr": 1.938557733691365e-05, "epoch": 3.7333387715148056, "percentage": 57.21, "elapsed_time": "5:14:21", "remaining_time": "3:55:06", "throughput": 2336.11, "total_tokens": 44063280} {"current_steps": 22890, "total_steps": 40000, "loss": 0.0043, "lr": 1.9376011067045476e-05, "epoch": 3.7341544987356228, "percentage": 57.23, "elapsed_time": "5:14:23", "remaining_time": "3:55:00", "throughput": 2336.34, "total_tokens": 44072320} {"current_steps": 22895, "total_steps": 40000, "loss": 0.0271, "lr": 1.9366445664467065e-05, "epoch": 3.73497022595644, "percentage": 57.24, "elapsed_time": "5:14:25", "remaining_time": "3:54:54", "throughput": 2336.53, "total_tokens": 44080848} {"current_steps": 22900, "total_steps": 40000, "loss": 0.1796, "lr": 1.9356881130653533e-05, "epoch": 3.7357859531772575, "percentage": 57.25, "elapsed_time": "5:14:28", "remaining_time": "3:54:49", "throughput": 2336.76, "total_tokens": 44090080} {"current_steps": 22905, "total_steps": 40000, "loss": 0.0101, "lr": 1.9347317467079846e-05, "epoch": 3.736601680398075, "percentage": 57.26, "elapsed_time": "5:14:30", "remaining_time": "3:54:43", "throughput": 2337.0, "total_tokens": 44099408} {"current_steps": 22910, "total_steps": 40000, "loss": 0.0847, "lr": 1.9337754675220836e-05, "epoch": 3.737417407618892, "percentage": 57.27, "elapsed_time": "5:14:32", "remaining_time": "3:54:37", "throughput": 2337.21, "total_tokens": 44108256} {"current_steps": 22915, "total_steps": 40000, "loss": 0.064, "lr": 1.9328192756551218e-05, "epoch": 3.7382331348397098, "percentage": 57.29, "elapsed_time": "5:14:34", "remaining_time": "3:54:32", "throughput": 2337.43, "total_tokens": 44117200} {"current_steps": 22920, "total_steps": 40000, "loss": 0.0035, "lr": 1.931863171254555e-05, "epoch": 3.739048862060527, "percentage": 57.3, "elapsed_time": "5:14:36", "remaining_time": "3:54:26", "throughput": 2337.71, "total_tokens": 44127408} {"current_steps": 22925, "total_steps": 40000, "loss": 0.0068, "lr": 1.930907154467826e-05, "epoch": 3.7398645892813445, "percentage": 57.31, "elapsed_time": "5:14:38", "remaining_time": "3:54:20", "throughput": 2337.97, "total_tokens": 44137136} {"current_steps": 22930, "total_steps": 40000, "loss": 0.0796, "lr": 1.9299512254423673e-05, "epoch": 3.7406803165021616, "percentage": 57.33, "elapsed_time": "5:14:40", "remaining_time": "3:54:15", "throughput": 2338.13, "total_tokens": 44144944} {"current_steps": 22935, "total_steps": 40000, "loss": 0.0022, "lr": 1.9289953843255914e-05, "epoch": 3.741496043722979, "percentage": 57.34, "elapsed_time": "5:14:42", "remaining_time": "3:54:09", "throughput": 2338.39, "total_tokens": 44154656} {"current_steps": 22940, "total_steps": 40000, "loss": 0.0661, "lr": 1.9280396312649048e-05, "epoch": 3.7423117709437963, "percentage": 57.35, "elapsed_time": "5:14:44", "remaining_time": "3:54:04", "throughput": 2338.65, "total_tokens": 44164416} {"current_steps": 22945, "total_steps": 40000, "loss": 0.0013, "lr": 1.9270839664076936e-05, "epoch": 3.743127498164614, "percentage": 57.36, "elapsed_time": "5:14:46", "remaining_time": "3:53:58", "throughput": 2338.86, "total_tokens": 44173232} {"current_steps": 22950, "total_steps": 40000, "loss": 0.0032, "lr": 1.9261283899013345e-05, "epoch": 3.743943225385431, "percentage": 57.38, "elapsed_time": "5:14:48", "remaining_time": "3:53:52", "throughput": 2339.1, "total_tokens": 44182592} {"current_steps": 22955, "total_steps": 40000, "loss": 0.0014, "lr": 1.92517290189319e-05, "epoch": 3.7447589526062486, "percentage": 57.39, "elapsed_time": "5:14:50", "remaining_time": "3:53:47", "throughput": 2339.32, "total_tokens": 44191712} {"current_steps": 22960, "total_steps": 40000, "loss": 0.0794, "lr": 1.924217502530607e-05, "epoch": 3.7455746798270657, "percentage": 57.4, "elapsed_time": "5:14:52", "remaining_time": "3:53:41", "throughput": 2339.57, "total_tokens": 44201168} {"current_steps": 22965, "total_steps": 40000, "loss": 0.0019, "lr": 1.9232621919609207e-05, "epoch": 3.7463904070478833, "percentage": 57.41, "elapsed_time": "5:14:54", "remaining_time": "3:53:35", "throughput": 2339.86, "total_tokens": 44211632} {"current_steps": 22970, "total_steps": 40000, "loss": 0.0318, "lr": 1.9223069703314534e-05, "epoch": 3.7472061342687004, "percentage": 57.43, "elapsed_time": "5:14:57", "remaining_time": "3:53:30", "throughput": 2340.1, "total_tokens": 44221024} {"current_steps": 22975, "total_steps": 40000, "loss": 0.0289, "lr": 1.92135183778951e-05, "epoch": 3.748021861489518, "percentage": 57.44, "elapsed_time": "5:14:59", "remaining_time": "3:53:24", "throughput": 2340.39, "total_tokens": 44231280} {"current_steps": 22980, "total_steps": 40000, "loss": 0.0562, "lr": 1.9203967944823857e-05, "epoch": 3.748837588710335, "percentage": 57.45, "elapsed_time": "5:15:01", "remaining_time": "3:53:19", "throughput": 2340.65, "total_tokens": 44240960} {"current_steps": 22985, "total_steps": 40000, "loss": 0.0544, "lr": 1.9194418405573588e-05, "epoch": 3.7496533159311527, "percentage": 57.46, "elapsed_time": "5:15:03", "remaining_time": "3:53:13", "throughput": 2340.86, "total_tokens": 44249936} {"current_steps": 22990, "total_steps": 40000, "loss": 0.0584, "lr": 1.9184869761616954e-05, "epoch": 3.75046904315197, "percentage": 57.48, "elapsed_time": "5:15:05", "remaining_time": "3:53:07", "throughput": 2341.1, "total_tokens": 44259296} {"current_steps": 22995, "total_steps": 40000, "loss": 0.0978, "lr": 1.9175322014426495e-05, "epoch": 3.7512847703727874, "percentage": 57.49, "elapsed_time": "5:15:07", "remaining_time": "3:53:02", "throughput": 2341.35, "total_tokens": 44268800} {"current_steps": 23000, "total_steps": 40000, "loss": 0.1154, "lr": 1.9165775165474565e-05, "epoch": 3.7521004975936045, "percentage": 57.5, "elapsed_time": "5:15:09", "remaining_time": "3:52:56", "throughput": 2341.61, "total_tokens": 44278640} {"current_steps": 23000, "total_steps": 40000, "eval_loss": 0.2092617154121399, "epoch": 3.7521004975936045, "percentage": 57.5, "elapsed_time": "5:16:30", "remaining_time": "3:53:56", "throughput": 2331.65, "total_tokens": 44278640} {"current_steps": 23005, "total_steps": 40000, "loss": 0.0021, "lr": 1.9156229216233434e-05, "epoch": 3.752916224814422, "percentage": 57.51, "elapsed_time": "5:16:33", "remaining_time": "3:53:51", "throughput": 2331.69, "total_tokens": 44287952} {"current_steps": 23010, "total_steps": 40000, "loss": 0.0021, "lr": 1.9146684168175184e-05, "epoch": 3.7537319520352392, "percentage": 57.53, "elapsed_time": "5:16:36", "remaining_time": "3:53:46", "throughput": 2331.97, "total_tokens": 44298192} {"current_steps": 23015, "total_steps": 40000, "loss": 0.0031, "lr": 1.9137140022771796e-05, "epoch": 3.754547679256057, "percentage": 57.54, "elapsed_time": "5:16:38", "remaining_time": "3:53:40", "throughput": 2332.27, "total_tokens": 44308640} {"current_steps": 23020, "total_steps": 40000, "loss": 0.002, "lr": 1.9127596781495103e-05, "epoch": 3.7553634064768744, "percentage": 57.55, "elapsed_time": "5:16:40", "remaining_time": "3:53:34", "throughput": 2332.57, "total_tokens": 44319088} {"current_steps": 23025, "total_steps": 40000, "loss": 0.0091, "lr": 1.9118054445816767e-05, "epoch": 3.7561791336976915, "percentage": 57.56, "elapsed_time": "5:16:42", "remaining_time": "3:53:29", "throughput": 2332.8, "total_tokens": 44328336} {"current_steps": 23030, "total_steps": 40000, "loss": 0.0566, "lr": 1.9108513017208356e-05, "epoch": 3.7569948609185086, "percentage": 57.57, "elapsed_time": "5:16:44", "remaining_time": "3:53:23", "throughput": 2333.06, "total_tokens": 44338176} {"current_steps": 23035, "total_steps": 40000, "loss": 0.1442, "lr": 1.9098972497141287e-05, "epoch": 3.7578105881393262, "percentage": 57.59, "elapsed_time": "5:16:46", "remaining_time": "3:53:17", "throughput": 2333.3, "total_tokens": 44347552} {"current_steps": 23040, "total_steps": 40000, "loss": 0.1095, "lr": 1.9089432887086806e-05, "epoch": 3.758626315360144, "percentage": 57.6, "elapsed_time": "5:16:48", "remaining_time": "3:53:12", "throughput": 2333.52, "total_tokens": 44356560} {"current_steps": 23045, "total_steps": 40000, "loss": 0.1818, "lr": 1.9079894188516056e-05, "epoch": 3.759442042580961, "percentage": 57.61, "elapsed_time": "5:16:50", "remaining_time": "3:53:06", "throughput": 2333.79, "total_tokens": 44366560} {"current_steps": 23050, "total_steps": 40000, "loss": 0.0993, "lr": 1.907035640290002e-05, "epoch": 3.760257769801778, "percentage": 57.63, "elapsed_time": "5:16:52", "remaining_time": "3:53:01", "throughput": 2334.0, "total_tokens": 44375264} {"current_steps": 23055, "total_steps": 40000, "loss": 0.0922, "lr": 1.9060819531709534e-05, "epoch": 3.7610734970225956, "percentage": 57.64, "elapsed_time": "5:16:54", "remaining_time": "3:52:55", "throughput": 2334.31, "total_tokens": 44385968} {"current_steps": 23060, "total_steps": 40000, "loss": 0.0033, "lr": 1.9051283576415325e-05, "epoch": 3.761889224243413, "percentage": 57.65, "elapsed_time": "5:16:56", "remaining_time": "3:52:49", "throughput": 2334.49, "total_tokens": 44394208} {"current_steps": 23065, "total_steps": 40000, "loss": 0.0124, "lr": 1.904174853848793e-05, "epoch": 3.7627049514642303, "percentage": 57.66, "elapsed_time": "5:16:58", "remaining_time": "3:52:44", "throughput": 2334.74, "total_tokens": 44403808} {"current_steps": 23070, "total_steps": 40000, "loss": 0.0032, "lr": 1.903221441939779e-05, "epoch": 3.7635206786850475, "percentage": 57.67, "elapsed_time": "5:17:00", "remaining_time": "3:52:38", "throughput": 2335.0, "total_tokens": 44413728} {"current_steps": 23075, "total_steps": 40000, "loss": 0.0487, "lr": 1.9022681220615194e-05, "epoch": 3.764336405905865, "percentage": 57.69, "elapsed_time": "5:17:02", "remaining_time": "3:52:32", "throughput": 2335.3, "total_tokens": 44424256} {"current_steps": 23080, "total_steps": 40000, "loss": 0.0017, "lr": 1.9013148943610255e-05, "epoch": 3.7651521331266826, "percentage": 57.7, "elapsed_time": "5:17:04", "remaining_time": "3:52:27", "throughput": 2335.59, "total_tokens": 44434592} {"current_steps": 23085, "total_steps": 40000, "loss": 0.0046, "lr": 1.9003617589852998e-05, "epoch": 3.7659678603474998, "percentage": 57.71, "elapsed_time": "5:17:07", "remaining_time": "3:52:21", "throughput": 2335.82, "total_tokens": 44443824} {"current_steps": 23090, "total_steps": 40000, "loss": 0.0836, "lr": 1.899408716081326e-05, "epoch": 3.766783587568317, "percentage": 57.73, "elapsed_time": "5:17:09", "remaining_time": "3:52:16", "throughput": 2336.08, "total_tokens": 44453616} {"current_steps": 23095, "total_steps": 40000, "loss": 0.0026, "lr": 1.898455765796075e-05, "epoch": 3.7675993147891345, "percentage": 57.74, "elapsed_time": "5:17:11", "remaining_time": "3:52:10", "throughput": 2336.3, "total_tokens": 44462544} {"current_steps": 23100, "total_steps": 40000, "loss": 0.0575, "lr": 1.8975029082765053e-05, "epoch": 3.768415042009952, "percentage": 57.75, "elapsed_time": "5:17:13", "remaining_time": "3:52:04", "throughput": 2336.64, "total_tokens": 44473872} {"current_steps": 23105, "total_steps": 40000, "loss": 0.0013, "lr": 1.8965501436695577e-05, "epoch": 3.769230769230769, "percentage": 57.76, "elapsed_time": "5:17:15", "remaining_time": "3:51:59", "throughput": 2336.9, "total_tokens": 44483664} {"current_steps": 23110, "total_steps": 40000, "loss": 0.1022, "lr": 1.895597472122161e-05, "epoch": 3.7700464964515867, "percentage": 57.77, "elapsed_time": "5:17:17", "remaining_time": "3:51:53", "throughput": 2337.14, "total_tokens": 44493120} {"current_steps": 23115, "total_steps": 40000, "loss": 0.1499, "lr": 1.894644893781231e-05, "epoch": 3.770862223672404, "percentage": 57.79, "elapsed_time": "5:17:19", "remaining_time": "3:51:47", "throughput": 2337.36, "total_tokens": 44502048} {"current_steps": 23120, "total_steps": 40000, "loss": 0.0624, "lr": 1.893692408793665e-05, "epoch": 3.7716779508932214, "percentage": 57.8, "elapsed_time": "5:17:21", "remaining_time": "3:51:42", "throughput": 2337.62, "total_tokens": 44511920} {"current_steps": 23125, "total_steps": 40000, "loss": 0.1807, "lr": 1.8927400173063493e-05, "epoch": 3.7724936781140386, "percentage": 57.81, "elapsed_time": "5:17:23", "remaining_time": "3:51:36", "throughput": 2337.89, "total_tokens": 44521936} {"current_steps": 23130, "total_steps": 40000, "loss": 0.0124, "lr": 1.891787719466154e-05, "epoch": 3.773309405334856, "percentage": 57.83, "elapsed_time": "5:17:25", "remaining_time": "3:51:31", "throughput": 2338.16, "total_tokens": 44531840} {"current_steps": 23135, "total_steps": 40000, "loss": 0.0593, "lr": 1.8908355154199346e-05, "epoch": 3.7741251325556733, "percentage": 57.84, "elapsed_time": "5:17:27", "remaining_time": "3:51:25", "throughput": 2338.35, "total_tokens": 44540224} {"current_steps": 23140, "total_steps": 40000, "loss": 0.0048, "lr": 1.8898834053145357e-05, "epoch": 3.774940859776491, "percentage": 57.85, "elapsed_time": "5:17:29", "remaining_time": "3:51:19", "throughput": 2338.57, "total_tokens": 44549312} {"current_steps": 23145, "total_steps": 40000, "loss": 0.0979, "lr": 1.8889313892967813e-05, "epoch": 3.775756586997308, "percentage": 57.86, "elapsed_time": "5:17:31", "remaining_time": "3:51:14", "throughput": 2338.86, "total_tokens": 44559664} {"current_steps": 23150, "total_steps": 40000, "loss": 0.1041, "lr": 1.8879794675134863e-05, "epoch": 3.7765723142181256, "percentage": 57.88, "elapsed_time": "5:17:33", "remaining_time": "3:51:08", "throughput": 2339.09, "total_tokens": 44568800} {"current_steps": 23155, "total_steps": 40000, "loss": 0.0016, "lr": 1.8870276401114494e-05, "epoch": 3.7773880414389427, "percentage": 57.89, "elapsed_time": "5:17:36", "remaining_time": "3:51:03", "throughput": 2339.36, "total_tokens": 44578896} {"current_steps": 23160, "total_steps": 40000, "loss": 0.1231, "lr": 1.886075907237453e-05, "epoch": 3.7782037686597603, "percentage": 57.9, "elapsed_time": "5:17:38", "remaining_time": "3:50:57", "throughput": 2339.65, "total_tokens": 44589344} {"current_steps": 23165, "total_steps": 40000, "loss": 0.0249, "lr": 1.8851242690382672e-05, "epoch": 3.7790194958805774, "percentage": 57.91, "elapsed_time": "5:17:40", "remaining_time": "3:50:51", "throughput": 2339.94, "total_tokens": 44599744} {"current_steps": 23170, "total_steps": 40000, "loss": 0.0012, "lr": 1.884172725660645e-05, "epoch": 3.779835223101395, "percentage": 57.93, "elapsed_time": "5:17:42", "remaining_time": "3:50:46", "throughput": 2340.21, "total_tokens": 44609600} {"current_steps": 23175, "total_steps": 40000, "loss": 0.0747, "lr": 1.8832212772513277e-05, "epoch": 3.780650950322212, "percentage": 57.94, "elapsed_time": "5:17:44", "remaining_time": "3:50:40", "throughput": 2340.46, "total_tokens": 44619296} {"current_steps": 23180, "total_steps": 40000, "loss": 0.1168, "lr": 1.8822699239570414e-05, "epoch": 3.7814666775430297, "percentage": 57.95, "elapsed_time": "5:17:46", "remaining_time": "3:50:35", "throughput": 2340.71, "total_tokens": 44628928} {"current_steps": 23185, "total_steps": 40000, "loss": 0.0043, "lr": 1.8813186659244943e-05, "epoch": 3.782282404763847, "percentage": 57.96, "elapsed_time": "5:17:48", "remaining_time": "3:50:29", "throughput": 2340.93, "total_tokens": 44638016} {"current_steps": 23190, "total_steps": 40000, "loss": 0.0294, "lr": 1.880367503300385e-05, "epoch": 3.7830981319846644, "percentage": 57.98, "elapsed_time": "5:17:50", "remaining_time": "3:50:23", "throughput": 2341.18, "total_tokens": 44647520} {"current_steps": 23195, "total_steps": 40000, "loss": 0.0084, "lr": 1.8794164362313927e-05, "epoch": 3.783913859205482, "percentage": 57.99, "elapsed_time": "5:17:52", "remaining_time": "3:50:18", "throughput": 2341.41, "total_tokens": 44656784} {"current_steps": 23200, "total_steps": 40000, "loss": 0.0173, "lr": 1.878465464864185e-05, "epoch": 3.784729586426299, "percentage": 58.0, "elapsed_time": "5:17:54", "remaining_time": "3:50:12", "throughput": 2341.66, "total_tokens": 44666464} {"current_steps": 23200, "total_steps": 40000, "eval_loss": 0.21155737340450287, "epoch": 3.784729586426299, "percentage": 58.0, "elapsed_time": "5:19:15", "remaining_time": "3:51:11", "throughput": 2331.78, "total_tokens": 44666464} {"current_steps": 23205, "total_steps": 40000, "loss": 0.0478, "lr": 1.877514589345414e-05, "epoch": 3.785545313647116, "percentage": 58.01, "elapsed_time": "5:19:19", "remaining_time": "3:51:06", "throughput": 2331.89, "total_tokens": 44676944} {"current_steps": 23210, "total_steps": 40000, "loss": 0.0009, "lr": 1.876563809821715e-05, "epoch": 3.786361040867934, "percentage": 58.03, "elapsed_time": "5:19:21", "remaining_time": "3:51:01", "throughput": 2332.15, "total_tokens": 44686912} {"current_steps": 23215, "total_steps": 40000, "loss": 0.0022, "lr": 1.8756131264397106e-05, "epoch": 3.7871767680887514, "percentage": 58.04, "elapsed_time": "5:19:23", "remaining_time": "3:50:55", "throughput": 2332.38, "total_tokens": 44696080} {"current_steps": 23220, "total_steps": 40000, "loss": 0.0013, "lr": 1.87466253934601e-05, "epoch": 3.7879924953095685, "percentage": 58.05, "elapsed_time": "5:19:25", "remaining_time": "3:50:49", "throughput": 2332.61, "total_tokens": 44705408} {"current_steps": 23225, "total_steps": 40000, "loss": 0.0786, "lr": 1.8737120486872033e-05, "epoch": 3.7888082225303856, "percentage": 58.06, "elapsed_time": "5:19:27", "remaining_time": "3:50:44", "throughput": 2332.88, "total_tokens": 44715424} {"current_steps": 23230, "total_steps": 40000, "loss": 0.0824, "lr": 1.8727616546098696e-05, "epoch": 3.789623949751203, "percentage": 58.07, "elapsed_time": "5:19:29", "remaining_time": "3:50:38", "throughput": 2333.16, "total_tokens": 44725536} {"current_steps": 23235, "total_steps": 40000, "loss": 0.0083, "lr": 1.8718113572605716e-05, "epoch": 3.7904396769720208, "percentage": 58.09, "elapsed_time": "5:19:31", "remaining_time": "3:50:33", "throughput": 2333.44, "total_tokens": 44735840} {"current_steps": 23240, "total_steps": 40000, "loss": 0.0031, "lr": 1.8708611567858554e-05, "epoch": 3.791255404192838, "percentage": 58.1, "elapsed_time": "5:19:33", "remaining_time": "3:50:27", "throughput": 2333.71, "total_tokens": 44745792} {"current_steps": 23245, "total_steps": 40000, "loss": 0.2173, "lr": 1.8699110533322565e-05, "epoch": 3.792071131413655, "percentage": 58.11, "elapsed_time": "5:19:35", "remaining_time": "3:50:21", "throughput": 2333.96, "total_tokens": 44755424} {"current_steps": 23250, "total_steps": 40000, "loss": 0.0738, "lr": 1.8689610470462897e-05, "epoch": 3.7928868586344726, "percentage": 58.13, "elapsed_time": "5:19:37", "remaining_time": "3:50:16", "throughput": 2334.22, "total_tokens": 44765280} {"current_steps": 23255, "total_steps": 40000, "loss": 0.0009, "lr": 1.8680111380744604e-05, "epoch": 3.79370258585529, "percentage": 58.14, "elapsed_time": "5:19:39", "remaining_time": "3:50:10", "throughput": 2334.41, "total_tokens": 44773760} {"current_steps": 23260, "total_steps": 40000, "loss": 0.0402, "lr": 1.8670613265632564e-05, "epoch": 3.7945183130761073, "percentage": 58.15, "elapsed_time": "5:19:41", "remaining_time": "3:50:05", "throughput": 2334.65, "total_tokens": 44783280} {"current_steps": 23265, "total_steps": 40000, "loss": 0.0665, "lr": 1.866111612659149e-05, "epoch": 3.7953340402969244, "percentage": 58.16, "elapsed_time": "5:19:44", "remaining_time": "3:49:59", "throughput": 2334.91, "total_tokens": 44793040} {"current_steps": 23270, "total_steps": 40000, "loss": 0.0036, "lr": 1.8651619965085967e-05, "epoch": 3.796149767517742, "percentage": 58.17, "elapsed_time": "5:19:46", "remaining_time": "3:49:53", "throughput": 2335.18, "total_tokens": 44803072} {"current_steps": 23275, "total_steps": 40000, "loss": 0.0031, "lr": 1.8642124782580433e-05, "epoch": 3.7969654947385596, "percentage": 58.19, "elapsed_time": "5:19:48", "remaining_time": "3:49:48", "throughput": 2335.44, "total_tokens": 44812848} {"current_steps": 23280, "total_steps": 40000, "loss": 0.0258, "lr": 1.8632630580539144e-05, "epoch": 3.7977812219593767, "percentage": 58.2, "elapsed_time": "5:19:50", "remaining_time": "3:49:42", "throughput": 2335.64, "total_tokens": 44821664} {"current_steps": 23285, "total_steps": 40000, "loss": 0.0759, "lr": 1.862313736042625e-05, "epoch": 3.7985969491801943, "percentage": 58.21, "elapsed_time": "5:19:52", "remaining_time": "3:49:37", "throughput": 2335.89, "total_tokens": 44831312} {"current_steps": 23290, "total_steps": 40000, "loss": 0.0971, "lr": 1.8613645123705703e-05, "epoch": 3.7994126764010114, "percentage": 58.23, "elapsed_time": "5:19:54", "remaining_time": "3:49:31", "throughput": 2336.15, "total_tokens": 44841072} {"current_steps": 23295, "total_steps": 40000, "loss": 0.0621, "lr": 1.8604153871841328e-05, "epoch": 3.800228403621829, "percentage": 58.24, "elapsed_time": "5:19:56", "remaining_time": "3:49:25", "throughput": 2336.41, "total_tokens": 44850896} {"current_steps": 23300, "total_steps": 40000, "loss": 0.0954, "lr": 1.859466360629682e-05, "epoch": 3.801044130842646, "percentage": 58.25, "elapsed_time": "5:19:58", "remaining_time": "3:49:20", "throughput": 2336.6, "total_tokens": 44859344} {"current_steps": 23305, "total_steps": 40000, "loss": 0.0127, "lr": 1.8585174328535666e-05, "epoch": 3.8018598580634637, "percentage": 58.26, "elapsed_time": "5:20:00", "remaining_time": "3:49:14", "throughput": 2336.85, "total_tokens": 44868992} {"current_steps": 23310, "total_steps": 40000, "loss": 0.0022, "lr": 1.857568604002124e-05, "epoch": 3.802675585284281, "percentage": 58.27, "elapsed_time": "5:20:02", "remaining_time": "3:49:09", "throughput": 2337.18, "total_tokens": 44880224} {"current_steps": 23315, "total_steps": 40000, "loss": 0.0902, "lr": 1.8566198742216774e-05, "epoch": 3.8034913125050984, "percentage": 58.29, "elapsed_time": "5:20:04", "remaining_time": "3:49:03", "throughput": 2337.38, "total_tokens": 44888976} {"current_steps": 23320, "total_steps": 40000, "loss": 0.0617, "lr": 1.85567124365853e-05, "epoch": 3.8043070397259156, "percentage": 58.3, "elapsed_time": "5:20:06", "remaining_time": "3:48:58", "throughput": 2337.6, "total_tokens": 44898064} {"current_steps": 23325, "total_steps": 40000, "loss": 0.1426, "lr": 1.854722712458975e-05, "epoch": 3.805122766946733, "percentage": 58.31, "elapsed_time": "5:20:08", "remaining_time": "3:48:52", "throughput": 2337.87, "total_tokens": 44908000} {"current_steps": 23330, "total_steps": 40000, "loss": 0.0018, "lr": 1.853774280769286e-05, "epoch": 3.8059384941675503, "percentage": 58.33, "elapsed_time": "5:20:11", "remaining_time": "3:48:46", "throughput": 2338.07, "total_tokens": 44916768} {"current_steps": 23335, "total_steps": 40000, "loss": 0.0015, "lr": 1.852825948735724e-05, "epoch": 3.806754221388368, "percentage": 58.34, "elapsed_time": "5:20:13", "remaining_time": "3:48:41", "throughput": 2338.31, "total_tokens": 44926192} {"current_steps": 23340, "total_steps": 40000, "loss": 0.003, "lr": 1.851877716504534e-05, "epoch": 3.807569948609185, "percentage": 58.35, "elapsed_time": "5:20:15", "remaining_time": "3:48:35", "throughput": 2338.54, "total_tokens": 44935472} {"current_steps": 23345, "total_steps": 40000, "loss": 0.1638, "lr": 1.8509295842219448e-05, "epoch": 3.8083856758300025, "percentage": 58.36, "elapsed_time": "5:20:17", "remaining_time": "3:48:30", "throughput": 2338.76, "total_tokens": 44944560} {"current_steps": 23350, "total_steps": 40000, "loss": 0.1135, "lr": 1.8499815520341697e-05, "epoch": 3.8092014030508197, "percentage": 58.38, "elapsed_time": "5:20:19", "remaining_time": "3:48:24", "throughput": 2338.93, "total_tokens": 44952576} {"current_steps": 23355, "total_steps": 40000, "loss": 0.0489, "lr": 1.8490336200874094e-05, "epoch": 3.8100171302716372, "percentage": 58.39, "elapsed_time": "5:20:21", "remaining_time": "3:48:19", "throughput": 2339.19, "total_tokens": 44962432} {"current_steps": 23360, "total_steps": 40000, "loss": 0.0014, "lr": 1.848085788527844e-05, "epoch": 3.8108328574924544, "percentage": 58.4, "elapsed_time": "5:20:23", "remaining_time": "3:48:13", "throughput": 2339.46, "total_tokens": 44972480} {"current_steps": 23365, "total_steps": 40000, "loss": 0.1449, "lr": 1.847138057501644e-05, "epoch": 3.811648584713272, "percentage": 58.41, "elapsed_time": "5:20:25", "remaining_time": "3:48:07", "throughput": 2339.75, "total_tokens": 44983040} {"current_steps": 23370, "total_steps": 40000, "loss": 0.0822, "lr": 1.8461904271549582e-05, "epoch": 3.812464311934089, "percentage": 58.43, "elapsed_time": "5:20:27", "remaining_time": "3:48:02", "throughput": 2339.98, "total_tokens": 44992304} {"current_steps": 23375, "total_steps": 40000, "loss": 0.1184, "lr": 1.845242897633926e-05, "epoch": 3.8132800391549067, "percentage": 58.44, "elapsed_time": "5:20:29", "remaining_time": "3:47:56", "throughput": 2340.21, "total_tokens": 45001552} {"current_steps": 23380, "total_steps": 40000, "loss": 0.0324, "lr": 1.844295469084667e-05, "epoch": 3.814095766375724, "percentage": 58.45, "elapsed_time": "5:20:31", "remaining_time": "3:47:51", "throughput": 2340.46, "total_tokens": 45011152} {"current_steps": 23385, "total_steps": 40000, "loss": 0.0528, "lr": 1.843348141653286e-05, "epoch": 3.8149114935965414, "percentage": 58.46, "elapsed_time": "5:20:33", "remaining_time": "3:47:45", "throughput": 2340.69, "total_tokens": 45020496} {"current_steps": 23390, "total_steps": 40000, "loss": 0.0489, "lr": 1.842400915485874e-05, "epoch": 3.815727220817359, "percentage": 58.48, "elapsed_time": "5:20:35", "remaining_time": "3:47:40", "throughput": 2340.92, "total_tokens": 45029632} {"current_steps": 23395, "total_steps": 40000, "loss": 0.0679, "lr": 1.8414537907285053e-05, "epoch": 3.816542948038176, "percentage": 58.49, "elapsed_time": "5:20:37", "remaining_time": "3:47:34", "throughput": 2341.11, "total_tokens": 45038288} {"current_steps": 23400, "total_steps": 40000, "loss": 0.0032, "lr": 1.840506767527237e-05, "epoch": 3.817358675258993, "percentage": 58.5, "elapsed_time": "5:20:40", "remaining_time": "3:47:28", "throughput": 2341.33, "total_tokens": 45047360} {"current_steps": 23400, "total_steps": 40000, "eval_loss": 0.20784857869148254, "epoch": 3.817358675258993, "percentage": 58.5, "elapsed_time": "5:22:00", "remaining_time": "3:48:26", "throughput": 2331.54, "total_tokens": 45047360} {"current_steps": 23405, "total_steps": 40000, "loss": 0.073, "lr": 1.8395598460281137e-05, "epoch": 3.8181744024798108, "percentage": 58.51, "elapsed_time": "5:22:05", "remaining_time": "3:48:22", "throughput": 2331.49, "total_tokens": 45056672} {"current_steps": 23410, "total_steps": 40000, "loss": 0.0017, "lr": 1.838613026377161e-05, "epoch": 3.8189901297006283, "percentage": 58.53, "elapsed_time": "5:22:07", "remaining_time": "3:48:16", "throughput": 2331.7, "total_tokens": 45065712} {"current_steps": 23415, "total_steps": 40000, "loss": 0.0013, "lr": 1.8376663087203917e-05, "epoch": 3.8198058569214455, "percentage": 58.54, "elapsed_time": "5:22:09", "remaining_time": "3:48:11", "throughput": 2331.89, "total_tokens": 45074112} {"current_steps": 23420, "total_steps": 40000, "loss": 0.0041, "lr": 1.8367196932038014e-05, "epoch": 3.8206215841422626, "percentage": 58.55, "elapsed_time": "5:22:11", "remaining_time": "3:48:05", "throughput": 2332.04, "total_tokens": 45081824} {"current_steps": 23425, "total_steps": 40000, "loss": 0.0105, "lr": 1.8357731799733686e-05, "epoch": 3.82143731136308, "percentage": 58.56, "elapsed_time": "5:22:13", "remaining_time": "3:48:00", "throughput": 2332.28, "total_tokens": 45091424} {"current_steps": 23430, "total_steps": 40000, "loss": 0.0038, "lr": 1.8348267691750586e-05, "epoch": 3.8222530385838978, "percentage": 58.58, "elapsed_time": "5:22:15", "remaining_time": "3:47:54", "throughput": 2332.56, "total_tokens": 45101664} {"current_steps": 23435, "total_steps": 40000, "loss": 0.0028, "lr": 1.833880460954821e-05, "epoch": 3.823068765804715, "percentage": 58.59, "elapsed_time": "5:22:17", "remaining_time": "3:47:48", "throughput": 2332.86, "total_tokens": 45112192} {"current_steps": 23440, "total_steps": 40000, "loss": 0.0566, "lr": 1.8329342554585866e-05, "epoch": 3.823884493025532, "percentage": 58.6, "elapsed_time": "5:22:19", "remaining_time": "3:47:43", "throughput": 2333.18, "total_tokens": 45123392} {"current_steps": 23445, "total_steps": 40000, "loss": 0.0958, "lr": 1.8319881528322735e-05, "epoch": 3.8247002202463496, "percentage": 58.61, "elapsed_time": "5:22:21", "remaining_time": "3:47:37", "throughput": 2333.46, "total_tokens": 45133584} {"current_steps": 23450, "total_steps": 40000, "loss": 0.019, "lr": 1.8310421532217815e-05, "epoch": 3.825515947467167, "percentage": 58.63, "elapsed_time": "5:22:23", "remaining_time": "3:47:32", "throughput": 2333.73, "total_tokens": 45143552} {"current_steps": 23455, "total_steps": 40000, "loss": 0.0221, "lr": 1.8300962567729958e-05, "epoch": 3.8263316746879843, "percentage": 58.64, "elapsed_time": "5:22:26", "remaining_time": "3:47:26", "throughput": 2333.96, "total_tokens": 45152832} {"current_steps": 23460, "total_steps": 40000, "loss": 0.0628, "lr": 1.8291504636317866e-05, "epoch": 3.8271474019088014, "percentage": 58.65, "elapsed_time": "5:22:28", "remaining_time": "3:47:20", "throughput": 2334.17, "total_tokens": 45161776} {"current_steps": 23465, "total_steps": 40000, "loss": 0.0536, "lr": 1.8282047739440055e-05, "epoch": 3.827963129129619, "percentage": 58.66, "elapsed_time": "5:22:30", "remaining_time": "3:47:15", "throughput": 2334.47, "total_tokens": 45172352} {"current_steps": 23470, "total_steps": 40000, "loss": 0.0043, "lr": 1.8272591878554903e-05, "epoch": 3.8287788563504366, "percentage": 58.67, "elapsed_time": "5:22:32", "remaining_time": "3:47:09", "throughput": 2334.73, "total_tokens": 45182288} {"current_steps": 23475, "total_steps": 40000, "loss": 0.1149, "lr": 1.8263137055120638e-05, "epoch": 3.8295945835712537, "percentage": 58.69, "elapsed_time": "5:22:34", "remaining_time": "3:47:04", "throughput": 2334.96, "total_tokens": 45191664} {"current_steps": 23480, "total_steps": 40000, "loss": 0.1386, "lr": 1.8253683270595295e-05, "epoch": 3.8304103107920713, "percentage": 58.7, "elapsed_time": "5:22:36", "remaining_time": "3:46:58", "throughput": 2335.16, "total_tokens": 45200240} {"current_steps": 23485, "total_steps": 40000, "loss": 0.0038, "lr": 1.824423052643677e-05, "epoch": 3.8312260380128884, "percentage": 58.71, "elapsed_time": "5:22:38", "remaining_time": "3:46:53", "throughput": 2335.41, "total_tokens": 45209968} {"current_steps": 23490, "total_steps": 40000, "loss": 0.0688, "lr": 1.82347788241028e-05, "epoch": 3.832041765233706, "percentage": 58.73, "elapsed_time": "5:22:40", "remaining_time": "3:46:47", "throughput": 2335.65, "total_tokens": 45219520} {"current_steps": 23495, "total_steps": 40000, "loss": 0.1909, "lr": 1.8225328165050942e-05, "epoch": 3.832857492454523, "percentage": 58.74, "elapsed_time": "5:22:42", "remaining_time": "3:46:42", "throughput": 2335.88, "total_tokens": 45228832} {"current_steps": 23500, "total_steps": 40000, "loss": 0.0799, "lr": 1.821587855073863e-05, "epoch": 3.8336732196753407, "percentage": 58.75, "elapsed_time": "5:22:44", "remaining_time": "3:46:36", "throughput": 2336.14, "total_tokens": 45238704} {"current_steps": 23505, "total_steps": 40000, "loss": 0.0119, "lr": 1.8206429982623086e-05, "epoch": 3.834488946896158, "percentage": 58.76, "elapsed_time": "5:22:46", "remaining_time": "3:46:30", "throughput": 2336.38, "total_tokens": 45248208} {"current_steps": 23510, "total_steps": 40000, "loss": 0.0698, "lr": 1.8196982462161416e-05, "epoch": 3.8353046741169754, "percentage": 58.77, "elapsed_time": "5:22:48", "remaining_time": "3:46:25", "throughput": 2336.6, "total_tokens": 45257296} {"current_steps": 23515, "total_steps": 40000, "loss": 0.0097, "lr": 1.818753599081055e-05, "epoch": 3.8361204013377925, "percentage": 58.79, "elapsed_time": "5:22:50", "remaining_time": "3:46:19", "throughput": 2336.83, "total_tokens": 45266672} {"current_steps": 23520, "total_steps": 40000, "loss": 0.0022, "lr": 1.817809057002724e-05, "epoch": 3.83693612855861, "percentage": 58.8, "elapsed_time": "5:22:53", "remaining_time": "3:46:14", "throughput": 2337.09, "total_tokens": 45276528} {"current_steps": 23525, "total_steps": 40000, "loss": 0.0779, "lr": 1.8168646201268096e-05, "epoch": 3.8377518557794272, "percentage": 58.81, "elapsed_time": "5:22:55", "remaining_time": "3:46:08", "throughput": 2337.25, "total_tokens": 45284384} {"current_steps": 23530, "total_steps": 40000, "loss": 0.0009, "lr": 1.8159202885989557e-05, "epoch": 3.838567583000245, "percentage": 58.83, "elapsed_time": "5:22:57", "remaining_time": "3:46:03", "throughput": 2337.44, "total_tokens": 45292896} {"current_steps": 23535, "total_steps": 40000, "loss": 0.0157, "lr": 1.814976062564789e-05, "epoch": 3.839383310221062, "percentage": 58.84, "elapsed_time": "5:22:59", "remaining_time": "3:45:57", "throughput": 2337.72, "total_tokens": 45303168} {"current_steps": 23540, "total_steps": 40000, "loss": 0.0429, "lr": 1.8140319421699234e-05, "epoch": 3.8401990374418795, "percentage": 58.85, "elapsed_time": "5:23:01", "remaining_time": "3:45:52", "throughput": 2337.97, "total_tokens": 45312992} {"current_steps": 23545, "total_steps": 40000, "loss": 0.0007, "lr": 1.8130879275599515e-05, "epoch": 3.8410147646626966, "percentage": 58.86, "elapsed_time": "5:23:03", "remaining_time": "3:45:46", "throughput": 2338.26, "total_tokens": 45323296} {"current_steps": 23550, "total_steps": 40000, "loss": 0.0025, "lr": 1.8121440188804544e-05, "epoch": 3.841830491883514, "percentage": 58.88, "elapsed_time": "5:23:05", "remaining_time": "3:45:41", "throughput": 2338.43, "total_tokens": 45331424} {"current_steps": 23555, "total_steps": 40000, "loss": 0.1873, "lr": 1.811200216276993e-05, "epoch": 3.8426462191043314, "percentage": 58.89, "elapsed_time": "5:23:07", "remaining_time": "3:45:35", "throughput": 2338.61, "total_tokens": 45339904} {"current_steps": 23560, "total_steps": 40000, "loss": 0.0666, "lr": 1.810256519895115e-05, "epoch": 3.843461946325149, "percentage": 58.9, "elapsed_time": "5:23:09", "remaining_time": "3:45:29", "throughput": 2338.82, "total_tokens": 45348704} {"current_steps": 23565, "total_steps": 40000, "loss": 0.0763, "lr": 1.8093129298803494e-05, "epoch": 3.8442776735459665, "percentage": 58.91, "elapsed_time": "5:23:11", "remaining_time": "3:45:24", "throughput": 2339.04, "total_tokens": 45357840} {"current_steps": 23570, "total_steps": 40000, "loss": 0.0707, "lr": 1.808369446378209e-05, "epoch": 3.8450934007667836, "percentage": 58.93, "elapsed_time": "5:23:13", "remaining_time": "3:45:18", "throughput": 2339.27, "total_tokens": 45367216} {"current_steps": 23575, "total_steps": 40000, "loss": 0.04, "lr": 1.8074260695341914e-05, "epoch": 3.8459091279876008, "percentage": 58.94, "elapsed_time": "5:23:15", "remaining_time": "3:45:13", "throughput": 2339.43, "total_tokens": 45375296} {"current_steps": 23580, "total_steps": 40000, "loss": 0.0054, "lr": 1.8064827994937782e-05, "epoch": 3.8467248552084183, "percentage": 58.95, "elapsed_time": "5:23:17", "remaining_time": "3:45:07", "throughput": 2339.72, "total_tokens": 45385632} {"current_steps": 23585, "total_steps": 40000, "loss": 0.1558, "lr": 1.8055396364024317e-05, "epoch": 3.847540582429236, "percentage": 58.96, "elapsed_time": "5:23:19", "remaining_time": "3:45:02", "throughput": 2339.98, "total_tokens": 45395536} {"current_steps": 23590, "total_steps": 40000, "loss": 0.0009, "lr": 1.804596580405601e-05, "epoch": 3.848356309650053, "percentage": 58.98, "elapsed_time": "5:23:22", "remaining_time": "3:44:56", "throughput": 2340.22, "total_tokens": 45405072} {"current_steps": 23595, "total_steps": 40000, "loss": 0.0719, "lr": 1.8036536316487174e-05, "epoch": 3.84917203687087, "percentage": 58.99, "elapsed_time": "5:23:24", "remaining_time": "3:44:51", "throughput": 2340.54, "total_tokens": 45416080} {"current_steps": 23600, "total_steps": 40000, "loss": 0.0808, "lr": 1.802710790277193e-05, "epoch": 3.8499877640916877, "percentage": 59.0, "elapsed_time": "5:23:26", "remaining_time": "3:44:45", "throughput": 2340.82, "total_tokens": 45426496} {"current_steps": 23600, "total_steps": 40000, "eval_loss": 0.22055284678936005, "epoch": 3.8499877640916877, "percentage": 59.0, "elapsed_time": "5:24:47", "remaining_time": "3:45:41", "throughput": 2331.11, "total_tokens": 45426496} {"current_steps": 23605, "total_steps": 40000, "loss": 0.0038, "lr": 1.801768056436429e-05, "epoch": 3.8508034913125053, "percentage": 59.01, "elapsed_time": "5:24:50", "remaining_time": "3:45:37", "throughput": 2331.15, "total_tokens": 45435712} {"current_steps": 23610, "total_steps": 40000, "loss": 0.0411, "lr": 1.8008254302718035e-05, "epoch": 3.8516192185333225, "percentage": 59.03, "elapsed_time": "5:24:52", "remaining_time": "3:45:31", "throughput": 2331.45, "total_tokens": 45446432} {"current_steps": 23615, "total_steps": 40000, "loss": 0.0024, "lr": 1.7998829119286837e-05, "epoch": 3.8524349457541396, "percentage": 59.04, "elapsed_time": "5:24:54", "remaining_time": "3:45:26", "throughput": 2331.68, "total_tokens": 45455760} {"current_steps": 23620, "total_steps": 40000, "loss": 0.0022, "lr": 1.798940501552418e-05, "epoch": 3.853250672974957, "percentage": 59.05, "elapsed_time": "5:24:56", "remaining_time": "3:45:20", "throughput": 2331.97, "total_tokens": 45466304} {"current_steps": 23625, "total_steps": 40000, "loss": 0.0028, "lr": 1.797998199288336e-05, "epoch": 3.8540664001957747, "percentage": 59.06, "elapsed_time": "5:24:59", "remaining_time": "3:45:15", "throughput": 2332.14, "total_tokens": 45474544} {"current_steps": 23630, "total_steps": 40000, "loss": 0.0543, "lr": 1.7970560052817543e-05, "epoch": 3.854882127416592, "percentage": 59.08, "elapsed_time": "5:25:01", "remaining_time": "3:45:09", "throughput": 2332.42, "total_tokens": 45484736} {"current_steps": 23635, "total_steps": 40000, "loss": 0.015, "lr": 1.7961139196779702e-05, "epoch": 3.855697854637409, "percentage": 59.09, "elapsed_time": "5:25:03", "remaining_time": "3:45:04", "throughput": 2332.65, "total_tokens": 45494064} {"current_steps": 23640, "total_steps": 40000, "loss": 0.082, "lr": 1.7951719426222647e-05, "epoch": 3.8565135818582266, "percentage": 59.1, "elapsed_time": "5:25:05", "remaining_time": "3:44:58", "throughput": 2332.91, "total_tokens": 45504016} {"current_steps": 23645, "total_steps": 40000, "loss": 0.0746, "lr": 1.794230074259904e-05, "epoch": 3.857329309079044, "percentage": 59.11, "elapsed_time": "5:25:07", "remaining_time": "3:44:53", "throughput": 2333.11, "total_tokens": 45512720} {"current_steps": 23650, "total_steps": 40000, "loss": 0.095, "lr": 1.7932883147361336e-05, "epoch": 3.8581450362998613, "percentage": 59.13, "elapsed_time": "5:25:09", "remaining_time": "3:44:47", "throughput": 2333.39, "total_tokens": 45523056} {"current_steps": 23655, "total_steps": 40000, "loss": 0.0346, "lr": 1.7923466641961865e-05, "epoch": 3.858960763520679, "percentage": 59.14, "elapsed_time": "5:25:11", "remaining_time": "3:44:41", "throughput": 2333.63, "total_tokens": 45532672} {"current_steps": 23660, "total_steps": 40000, "loss": 0.1098, "lr": 1.791405122785278e-05, "epoch": 3.859776490741496, "percentage": 59.15, "elapsed_time": "5:25:13", "remaining_time": "3:44:36", "throughput": 2333.84, "total_tokens": 45541632} {"current_steps": 23665, "total_steps": 40000, "loss": 0.1102, "lr": 1.7904636906486037e-05, "epoch": 3.8605922179623136, "percentage": 59.16, "elapsed_time": "5:25:15", "remaining_time": "3:44:30", "throughput": 2334.13, "total_tokens": 45552016} {"current_steps": 23670, "total_steps": 40000, "loss": 0.0027, "lr": 1.7895223679313448e-05, "epoch": 3.8614079451831307, "percentage": 59.17, "elapsed_time": "5:25:17", "remaining_time": "3:44:25", "throughput": 2334.39, "total_tokens": 45562048} {"current_steps": 23675, "total_steps": 40000, "loss": 0.0124, "lr": 1.7885811547786653e-05, "epoch": 3.8622236724039483, "percentage": 59.19, "elapsed_time": "5:25:19", "remaining_time": "3:44:19", "throughput": 2334.68, "total_tokens": 45572544} {"current_steps": 23680, "total_steps": 40000, "loss": 0.0011, "lr": 1.7876400513357115e-05, "epoch": 3.8630393996247654, "percentage": 59.2, "elapsed_time": "5:25:21", "remaining_time": "3:44:14", "throughput": 2334.95, "total_tokens": 45582672} {"current_steps": 23685, "total_steps": 40000, "loss": 0.0016, "lr": 1.7866990577476146e-05, "epoch": 3.863855126845583, "percentage": 59.21, "elapsed_time": "5:25:23", "remaining_time": "3:44:08", "throughput": 2335.21, "total_tokens": 45592464} {"current_steps": 23690, "total_steps": 40000, "loss": 0.2124, "lr": 1.7857581741594863e-05, "epoch": 3.8646708540664, "percentage": 59.23, "elapsed_time": "5:25:26", "remaining_time": "3:44:03", "throughput": 2335.45, "total_tokens": 45602160} {"current_steps": 23695, "total_steps": 40000, "loss": 0.0014, "lr": 1.7848174007164237e-05, "epoch": 3.8654865812872177, "percentage": 59.24, "elapsed_time": "5:25:28", "remaining_time": "3:43:57", "throughput": 2335.69, "total_tokens": 45611504} {"current_steps": 23700, "total_steps": 40000, "loss": 0.0762, "lr": 1.7838767375635052e-05, "epoch": 3.866302308508035, "percentage": 59.25, "elapsed_time": "5:25:30", "remaining_time": "3:43:52", "throughput": 2335.93, "total_tokens": 45621232} {"current_steps": 23705, "total_steps": 40000, "loss": 0.002, "lr": 1.782936184845793e-05, "epoch": 3.8671180357288524, "percentage": 59.26, "elapsed_time": "5:25:32", "remaining_time": "3:43:46", "throughput": 2336.17, "total_tokens": 45630576} {"current_steps": 23710, "total_steps": 40000, "loss": 0.0019, "lr": 1.7819957427083334e-05, "epoch": 3.8679337629496695, "percentage": 59.27, "elapsed_time": "5:25:34", "remaining_time": "3:43:41", "throughput": 2336.41, "total_tokens": 45640240} {"current_steps": 23715, "total_steps": 40000, "loss": 0.0209, "lr": 1.7810554112961516e-05, "epoch": 3.868749490170487, "percentage": 59.29, "elapsed_time": "5:25:36", "remaining_time": "3:43:35", "throughput": 2336.65, "total_tokens": 45649680} {"current_steps": 23720, "total_steps": 40000, "loss": 0.0034, "lr": 1.7801151907542607e-05, "epoch": 3.869565217391304, "percentage": 59.3, "elapsed_time": "5:25:38", "remaining_time": "3:43:30", "throughput": 2336.87, "total_tokens": 45658832} {"current_steps": 23725, "total_steps": 40000, "loss": 0.0805, "lr": 1.7791750812276547e-05, "epoch": 3.870380944612122, "percentage": 59.31, "elapsed_time": "5:25:40", "remaining_time": "3:43:24", "throughput": 2337.08, "total_tokens": 45667744} {"current_steps": 23730, "total_steps": 40000, "loss": 0.1158, "lr": 1.778235082861309e-05, "epoch": 3.871196671832939, "percentage": 59.33, "elapsed_time": "5:25:42", "remaining_time": "3:43:18", "throughput": 2337.34, "total_tokens": 45677760} {"current_steps": 23735, "total_steps": 40000, "loss": 0.0019, "lr": 1.777295195800184e-05, "epoch": 3.8720123990537565, "percentage": 59.34, "elapsed_time": "5:25:44", "remaining_time": "3:43:13", "throughput": 2337.57, "total_tokens": 45687088} {"current_steps": 23740, "total_steps": 40000, "loss": 0.0065, "lr": 1.7763554201892215e-05, "epoch": 3.8728281262745736, "percentage": 59.35, "elapsed_time": "5:25:46", "remaining_time": "3:43:07", "throughput": 2337.82, "total_tokens": 45696816} {"current_steps": 23745, "total_steps": 40000, "loss": 0.0342, "lr": 1.7754157561733476e-05, "epoch": 3.873643853495391, "percentage": 59.36, "elapsed_time": "5:25:48", "remaining_time": "3:43:02", "throughput": 2338.07, "total_tokens": 45706544} {"current_steps": 23750, "total_steps": 40000, "loss": 0.0018, "lr": 1.7744762038974702e-05, "epoch": 3.8744595807162083, "percentage": 59.38, "elapsed_time": "5:25:50", "remaining_time": "3:42:56", "throughput": 2338.32, "total_tokens": 45716320} {"current_steps": 23755, "total_steps": 40000, "loss": 0.042, "lr": 1.7735367635064788e-05, "epoch": 3.875275307937026, "percentage": 59.39, "elapsed_time": "5:25:53", "remaining_time": "3:42:51", "throughput": 2338.62, "total_tokens": 45726992} {"current_steps": 23760, "total_steps": 40000, "loss": 0.1142, "lr": 1.7725974351452474e-05, "epoch": 3.8760910351578435, "percentage": 59.4, "elapsed_time": "5:25:55", "remaining_time": "3:42:45", "throughput": 2338.84, "total_tokens": 45736160} {"current_steps": 23765, "total_steps": 40000, "loss": 0.062, "lr": 1.771658218958634e-05, "epoch": 3.8769067623786606, "percentage": 59.41, "elapsed_time": "5:25:57", "remaining_time": "3:42:40", "throughput": 2339.04, "total_tokens": 45744976} {"current_steps": 23770, "total_steps": 40000, "loss": 0.0018, "lr": 1.770719115091475e-05, "epoch": 3.8777224895994777, "percentage": 59.42, "elapsed_time": "5:25:59", "remaining_time": "3:42:34", "throughput": 2339.3, "total_tokens": 45754912} {"current_steps": 23775, "total_steps": 40000, "loss": 0.0184, "lr": 1.7697801236885935e-05, "epoch": 3.8785382168202953, "percentage": 59.44, "elapsed_time": "5:26:01", "remaining_time": "3:42:29", "throughput": 2339.58, "total_tokens": 45765200} {"current_steps": 23780, "total_steps": 40000, "loss": 0.0037, "lr": 1.7688412448947944e-05, "epoch": 3.879353944041113, "percentage": 59.45, "elapsed_time": "5:26:03", "remaining_time": "3:42:23", "throughput": 2339.84, "total_tokens": 45775280} {"current_steps": 23785, "total_steps": 40000, "loss": 0.0014, "lr": 1.767902478854862e-05, "epoch": 3.88016967126193, "percentage": 59.46, "elapsed_time": "5:26:05", "remaining_time": "3:42:18", "throughput": 2340.09, "total_tokens": 45784832} {"current_steps": 23790, "total_steps": 40000, "loss": 0.0011, "lr": 1.766963825713569e-05, "epoch": 3.880985398482747, "percentage": 59.48, "elapsed_time": "5:26:07", "remaining_time": "3:42:12", "throughput": 2340.31, "total_tokens": 45794032} {"current_steps": 23795, "total_steps": 40000, "loss": 0.0714, "lr": 1.766025285615665e-05, "epoch": 3.8818011257035647, "percentage": 59.49, "elapsed_time": "5:26:09", "remaining_time": "3:42:07", "throughput": 2340.54, "total_tokens": 45803360} {"current_steps": 23800, "total_steps": 40000, "loss": 0.1363, "lr": 1.7650868587058854e-05, "epoch": 3.8826168529243823, "percentage": 59.5, "elapsed_time": "5:26:11", "remaining_time": "3:42:01", "throughput": 2340.81, "total_tokens": 45813536} {"current_steps": 23800, "total_steps": 40000, "eval_loss": 0.2621123790740967, "epoch": 3.8826168529243823, "percentage": 59.5, "elapsed_time": "5:27:32", "remaining_time": "3:42:56", "throughput": 2331.19, "total_tokens": 45813536} {"current_steps": 23805, "total_steps": 40000, "loss": 0.2097, "lr": 1.7641485451289484e-05, "epoch": 3.8834325801451994, "percentage": 59.51, "elapsed_time": "5:27:36", "remaining_time": "3:42:52", "throughput": 2331.24, "total_tokens": 45823008} {"current_steps": 23810, "total_steps": 40000, "loss": 0.0006, "lr": 1.7632103450295534e-05, "epoch": 3.8842483073660166, "percentage": 59.52, "elapsed_time": "5:27:38", "remaining_time": "3:42:46", "throughput": 2331.36, "total_tokens": 45830208} {"current_steps": 23815, "total_steps": 40000, "loss": 0.2892, "lr": 1.762272258552381e-05, "epoch": 3.885064034586834, "percentage": 59.54, "elapsed_time": "5:27:40", "remaining_time": "3:42:41", "throughput": 2331.65, "total_tokens": 45840928} {"current_steps": 23820, "total_steps": 40000, "loss": 0.0584, "lr": 1.7613342858420988e-05, "epoch": 3.8858797618076517, "percentage": 59.55, "elapsed_time": "5:27:42", "remaining_time": "3:42:35", "throughput": 2331.92, "total_tokens": 45850992} {"current_steps": 23825, "total_steps": 40000, "loss": 0.0637, "lr": 1.760396427043351e-05, "epoch": 3.886695489028469, "percentage": 59.56, "elapsed_time": "5:27:44", "remaining_time": "3:42:30", "throughput": 2332.15, "total_tokens": 45860320} {"current_steps": 23830, "total_steps": 40000, "loss": 0.0016, "lr": 1.7594586823007696e-05, "epoch": 3.887511216249286, "percentage": 59.58, "elapsed_time": "5:27:46", "remaining_time": "3:42:24", "throughput": 2332.41, "total_tokens": 45870368} {"current_steps": 23835, "total_steps": 40000, "loss": 0.2168, "lr": 1.7585210517589646e-05, "epoch": 3.8883269434701035, "percentage": 59.59, "elapsed_time": "5:27:48", "remaining_time": "3:42:19", "throughput": 2332.64, "total_tokens": 45879744} {"current_steps": 23840, "total_steps": 40000, "loss": 0.1038, "lr": 1.7575835355625314e-05, "epoch": 3.889142670690921, "percentage": 59.6, "elapsed_time": "5:27:50", "remaining_time": "3:42:13", "throughput": 2332.89, "total_tokens": 45889424} {"current_steps": 23845, "total_steps": 40000, "loss": 0.0787, "lr": 1.756646133856048e-05, "epoch": 3.8899583979117383, "percentage": 59.61, "elapsed_time": "5:27:52", "remaining_time": "3:42:08", "throughput": 2333.15, "total_tokens": 45899344} {"current_steps": 23850, "total_steps": 40000, "loss": 0.0426, "lr": 1.7557088467840714e-05, "epoch": 3.890774125132556, "percentage": 59.62, "elapsed_time": "5:27:54", "remaining_time": "3:42:02", "throughput": 2333.29, "total_tokens": 45907088} {"current_steps": 23855, "total_steps": 40000, "loss": 0.0734, "lr": 1.7547716744911438e-05, "epoch": 3.891589852353373, "percentage": 59.64, "elapsed_time": "5:27:56", "remaining_time": "3:41:57", "throughput": 2333.48, "total_tokens": 45915600} {"current_steps": 23860, "total_steps": 40000, "loss": 0.0727, "lr": 1.7538346171217902e-05, "epoch": 3.8924055795741905, "percentage": 59.65, "elapsed_time": "5:27:58", "remaining_time": "3:41:51", "throughput": 2333.76, "total_tokens": 45925856} {"current_steps": 23865, "total_steps": 40000, "loss": 0.0487, "lr": 1.7528976748205146e-05, "epoch": 3.8932213067950077, "percentage": 59.66, "elapsed_time": "5:28:01", "remaining_time": "3:41:46", "throughput": 2333.99, "total_tokens": 45935328} {"current_steps": 23870, "total_steps": 40000, "loss": 0.0038, "lr": 1.751960847731807e-05, "epoch": 3.8940370340158252, "percentage": 59.67, "elapsed_time": "5:28:03", "remaining_time": "3:41:40", "throughput": 2334.23, "total_tokens": 45944832} {"current_steps": 23875, "total_steps": 40000, "loss": 0.0551, "lr": 1.7510241360001362e-05, "epoch": 3.8948527612366424, "percentage": 59.69, "elapsed_time": "5:28:05", "remaining_time": "3:41:35", "throughput": 2334.51, "total_tokens": 45955264} {"current_steps": 23880, "total_steps": 40000, "loss": 0.0329, "lr": 1.7500875397699562e-05, "epoch": 3.89566848845746, "percentage": 59.7, "elapsed_time": "5:28:07", "remaining_time": "3:41:29", "throughput": 2334.68, "total_tokens": 45963312} {"current_steps": 23885, "total_steps": 40000, "loss": 0.0059, "lr": 1.7491510591857015e-05, "epoch": 3.896484215678277, "percentage": 59.71, "elapsed_time": "5:28:09", "remaining_time": "3:41:24", "throughput": 2334.92, "total_tokens": 45973056} {"current_steps": 23890, "total_steps": 40000, "loss": 0.1032, "lr": 1.7482146943917896e-05, "epoch": 3.8972999428990946, "percentage": 59.72, "elapsed_time": "5:28:11", "remaining_time": "3:41:18", "throughput": 2335.15, "total_tokens": 45982416} {"current_steps": 23895, "total_steps": 40000, "loss": 0.1281, "lr": 1.7472784455326185e-05, "epoch": 3.898115670119912, "percentage": 59.74, "elapsed_time": "5:28:13", "remaining_time": "3:41:13", "throughput": 2335.45, "total_tokens": 45993104} {"current_steps": 23900, "total_steps": 40000, "loss": 0.0313, "lr": 1.746342312752572e-05, "epoch": 3.8989313973407294, "percentage": 59.75, "elapsed_time": "5:28:15", "remaining_time": "3:41:07", "throughput": 2335.68, "total_tokens": 46002528} {"current_steps": 23905, "total_steps": 40000, "loss": 0.027, "lr": 1.74540629619601e-05, "epoch": 3.8997471245615465, "percentage": 59.76, "elapsed_time": "5:28:17", "remaining_time": "3:41:02", "throughput": 2335.89, "total_tokens": 46011408} {"current_steps": 23910, "total_steps": 40000, "loss": 0.0139, "lr": 1.7444703960072815e-05, "epoch": 3.900562851782364, "percentage": 59.77, "elapsed_time": "5:28:19", "remaining_time": "3:40:56", "throughput": 2336.04, "total_tokens": 46019280} {"current_steps": 23915, "total_steps": 40000, "loss": 0.0162, "lr": 1.7435346123307118e-05, "epoch": 3.901378579003181, "percentage": 59.79, "elapsed_time": "5:28:21", "remaining_time": "3:40:51", "throughput": 2336.27, "total_tokens": 46028720} {"current_steps": 23920, "total_steps": 40000, "loss": 0.0054, "lr": 1.742598945310611e-05, "epoch": 3.9021943062239988, "percentage": 59.8, "elapsed_time": "5:28:23", "remaining_time": "3:40:45", "throughput": 2336.51, "total_tokens": 46038192} {"current_steps": 23925, "total_steps": 40000, "loss": 0.0019, "lr": 1.741663395091272e-05, "epoch": 3.903010033444816, "percentage": 59.81, "elapsed_time": "5:28:25", "remaining_time": "3:40:40", "throughput": 2336.72, "total_tokens": 46047120} {"current_steps": 23930, "total_steps": 40000, "loss": 0.1203, "lr": 1.7407279618169657e-05, "epoch": 3.9038257606656335, "percentage": 59.82, "elapsed_time": "5:28:27", "remaining_time": "3:40:34", "throughput": 2336.97, "total_tokens": 46057008} {"current_steps": 23935, "total_steps": 40000, "loss": 0.0364, "lr": 1.73979264563195e-05, "epoch": 3.904641487886451, "percentage": 59.84, "elapsed_time": "5:28:30", "remaining_time": "3:40:29", "throughput": 2337.19, "total_tokens": 46066176} {"current_steps": 23940, "total_steps": 40000, "loss": 0.0016, "lr": 1.7388574466804625e-05, "epoch": 3.905457215107268, "percentage": 59.85, "elapsed_time": "5:28:32", "remaining_time": "3:40:23", "throughput": 2337.47, "total_tokens": 46076464} {"current_steps": 23945, "total_steps": 40000, "loss": 0.1013, "lr": 1.7379223651067207e-05, "epoch": 3.9062729423280853, "percentage": 59.86, "elapsed_time": "5:28:34", "remaining_time": "3:40:18", "throughput": 2337.74, "total_tokens": 46086656} {"current_steps": 23950, "total_steps": 40000, "loss": 0.003, "lr": 1.736987401054928e-05, "epoch": 3.907088669548903, "percentage": 59.88, "elapsed_time": "5:28:36", "remaining_time": "3:40:12", "throughput": 2337.95, "total_tokens": 46095648} {"current_steps": 23955, "total_steps": 40000, "loss": 0.1237, "lr": 1.736052554669266e-05, "epoch": 3.9079043967697205, "percentage": 59.89, "elapsed_time": "5:28:38", "remaining_time": "3:40:07", "throughput": 2338.17, "total_tokens": 46104960} {"current_steps": 23960, "total_steps": 40000, "loss": 0.0302, "lr": 1.7351178260939007e-05, "epoch": 3.9087201239905376, "percentage": 59.9, "elapsed_time": "5:28:40", "remaining_time": "3:40:01", "throughput": 2338.44, "total_tokens": 46114992} {"current_steps": 23965, "total_steps": 40000, "loss": 0.0077, "lr": 1.7341832154729794e-05, "epoch": 3.9095358512113547, "percentage": 59.91, "elapsed_time": "5:28:42", "remaining_time": "3:39:56", "throughput": 2338.64, "total_tokens": 46123728} {"current_steps": 23970, "total_steps": 40000, "loss": 0.062, "lr": 1.7332487229506286e-05, "epoch": 3.9103515784321723, "percentage": 59.92, "elapsed_time": "5:28:44", "remaining_time": "3:39:50", "throughput": 2338.91, "total_tokens": 46134016} {"current_steps": 23975, "total_steps": 40000, "loss": 0.0828, "lr": 1.732314348670961e-05, "epoch": 3.91116730565299, "percentage": 59.94, "elapsed_time": "5:28:46", "remaining_time": "3:39:45", "throughput": 2339.22, "total_tokens": 46145088} {"current_steps": 23980, "total_steps": 40000, "loss": 0.0028, "lr": 1.7313800927780686e-05, "epoch": 3.911983032873807, "percentage": 59.95, "elapsed_time": "5:28:48", "remaining_time": "3:39:39", "throughput": 2339.51, "total_tokens": 46155728} {"current_steps": 23985, "total_steps": 40000, "loss": 0.074, "lr": 1.7304459554160245e-05, "epoch": 3.912798760094624, "percentage": 59.96, "elapsed_time": "5:28:50", "remaining_time": "3:39:34", "throughput": 2339.74, "total_tokens": 46165072} {"current_steps": 23990, "total_steps": 40000, "loss": 0.1266, "lr": 1.7295119367288853e-05, "epoch": 3.9136144873154417, "percentage": 59.98, "elapsed_time": "5:28:52", "remaining_time": "3:39:28", "throughput": 2339.97, "total_tokens": 46174416} {"current_steps": 23995, "total_steps": 40000, "loss": 0.0008, "lr": 1.728578036860688e-05, "epoch": 3.9144302145362593, "percentage": 59.99, "elapsed_time": "5:28:54", "remaining_time": "3:39:23", "throughput": 2340.16, "total_tokens": 46182912} {"current_steps": 24000, "total_steps": 40000, "loss": 0.1103, "lr": 1.7276442559554513e-05, "epoch": 3.9152459417570764, "percentage": 60.0, "elapsed_time": "5:28:57", "remaining_time": "3:39:18", "throughput": 2340.4, "total_tokens": 46192656} {"current_steps": 24000, "total_steps": 40000, "eval_loss": 0.22623005509376526, "epoch": 3.9152459417570764, "percentage": 60.0, "elapsed_time": "5:30:17", "remaining_time": "3:40:11", "throughput": 2330.85, "total_tokens": 46192656} {"current_steps": 24005, "total_steps": 40000, "loss": 0.0017, "lr": 1.726710594157177e-05, "epoch": 3.9160616689778935, "percentage": 60.01, "elapsed_time": "5:30:21", "remaining_time": "3:40:07", "throughput": 2330.86, "total_tokens": 46201376} {"current_steps": 24010, "total_steps": 40000, "loss": 0.1399, "lr": 1.725777051609846e-05, "epoch": 3.916877396198711, "percentage": 60.02, "elapsed_time": "5:30:23", "remaining_time": "3:40:02", "throughput": 2331.14, "total_tokens": 46211728} {"current_steps": 24015, "total_steps": 40000, "loss": 0.0012, "lr": 1.7248436284574228e-05, "epoch": 3.9176931234195287, "percentage": 60.04, "elapsed_time": "5:30:25", "remaining_time": "3:39:56", "throughput": 2331.27, "total_tokens": 46219136} {"current_steps": 24020, "total_steps": 40000, "loss": 0.0824, "lr": 1.723910324843855e-05, "epoch": 3.918508850640346, "percentage": 60.05, "elapsed_time": "5:30:27", "remaining_time": "3:39:51", "throughput": 2331.49, "total_tokens": 46228384} {"current_steps": 24025, "total_steps": 40000, "loss": 0.1015, "lr": 1.722977140913067e-05, "epoch": 3.919324577861163, "percentage": 60.06, "elapsed_time": "5:30:29", "remaining_time": "3:39:45", "throughput": 2331.75, "total_tokens": 46238288} {"current_steps": 24030, "total_steps": 40000, "loss": 0.0911, "lr": 1.7220440768089688e-05, "epoch": 3.9201403050819805, "percentage": 60.08, "elapsed_time": "5:30:31", "remaining_time": "3:39:40", "throughput": 2332.0, "total_tokens": 46248096} {"current_steps": 24035, "total_steps": 40000, "loss": 0.0933, "lr": 1.7211111326754505e-05, "epoch": 3.920956032302798, "percentage": 60.09, "elapsed_time": "5:30:34", "remaining_time": "3:39:34", "throughput": 2332.22, "total_tokens": 46257248} {"current_steps": 24040, "total_steps": 40000, "loss": 0.1284, "lr": 1.720178308656383e-05, "epoch": 3.9217717595236152, "percentage": 60.1, "elapsed_time": "5:30:36", "remaining_time": "3:39:29", "throughput": 2332.48, "total_tokens": 46267296} {"current_steps": 24045, "total_steps": 40000, "loss": 0.0235, "lr": 1.719245604895621e-05, "epoch": 3.922587486744433, "percentage": 60.11, "elapsed_time": "5:30:38", "remaining_time": "3:39:23", "throughput": 2332.67, "total_tokens": 46276000} {"current_steps": 24050, "total_steps": 40000, "loss": 0.0287, "lr": 1.7183130215369972e-05, "epoch": 3.92340321396525, "percentage": 60.12, "elapsed_time": "5:30:40", "remaining_time": "3:39:18", "throughput": 2332.94, "total_tokens": 46286080} {"current_steps": 24055, "total_steps": 40000, "loss": 0.0957, "lr": 1.7173805587243292e-05, "epoch": 3.9242189411860675, "percentage": 60.14, "elapsed_time": "5:30:42", "remaining_time": "3:39:12", "throughput": 2333.19, "total_tokens": 46295824} {"current_steps": 24060, "total_steps": 40000, "loss": 0.0016, "lr": 1.7164482166014147e-05, "epoch": 3.9250346684068846, "percentage": 60.15, "elapsed_time": "5:30:44", "remaining_time": "3:39:07", "throughput": 2333.42, "total_tokens": 46305376} {"current_steps": 24065, "total_steps": 40000, "loss": 0.0024, "lr": 1.7155159953120313e-05, "epoch": 3.925850395627702, "percentage": 60.16, "elapsed_time": "5:30:46", "remaining_time": "3:39:01", "throughput": 2333.67, "total_tokens": 46315104} {"current_steps": 24070, "total_steps": 40000, "loss": 0.31, "lr": 1.714583894999941e-05, "epoch": 3.9266661228485193, "percentage": 60.17, "elapsed_time": "5:30:48", "remaining_time": "3:38:56", "throughput": 2333.91, "total_tokens": 46324624} {"current_steps": 24075, "total_steps": 40000, "loss": 0.0022, "lr": 1.7136519158088826e-05, "epoch": 3.927481850069337, "percentage": 60.19, "elapsed_time": "5:30:50", "remaining_time": "3:38:50", "throughput": 2334.14, "total_tokens": 46334080} {"current_steps": 24080, "total_steps": 40000, "loss": 0.1074, "lr": 1.712720057882581e-05, "epoch": 3.928297577290154, "percentage": 60.2, "elapsed_time": "5:30:52", "remaining_time": "3:38:45", "throughput": 2334.43, "total_tokens": 46344608} {"current_steps": 24085, "total_steps": 40000, "loss": 0.0048, "lr": 1.7117883213647413e-05, "epoch": 3.9291133045109716, "percentage": 60.21, "elapsed_time": "5:30:54", "remaining_time": "3:38:39", "throughput": 2334.67, "total_tokens": 46354288} {"current_steps": 24090, "total_steps": 40000, "loss": 0.0763, "lr": 1.710856706399046e-05, "epoch": 3.9299290317317888, "percentage": 60.22, "elapsed_time": "5:30:56", "remaining_time": "3:38:34", "throughput": 2334.92, "total_tokens": 46363984} {"current_steps": 24095, "total_steps": 40000, "loss": 0.004, "lr": 1.7099252131291648e-05, "epoch": 3.9307447589526063, "percentage": 60.24, "elapsed_time": "5:30:58", "remaining_time": "3:38:28", "throughput": 2335.19, "total_tokens": 46374208} {"current_steps": 24100, "total_steps": 40000, "loss": 0.078, "lr": 1.708993841698744e-05, "epoch": 3.9315604861734235, "percentage": 60.25, "elapsed_time": "5:31:00", "remaining_time": "3:38:23", "throughput": 2335.46, "total_tokens": 46384544} {"current_steps": 24105, "total_steps": 40000, "loss": 0.0933, "lr": 1.7080625922514132e-05, "epoch": 3.932376213394241, "percentage": 60.26, "elapsed_time": "5:31:03", "remaining_time": "3:38:17", "throughput": 2335.73, "total_tokens": 46394592} {"current_steps": 24110, "total_steps": 40000, "loss": 0.0536, "lr": 1.7071314649307836e-05, "epoch": 3.933191940615058, "percentage": 60.27, "elapsed_time": "5:31:05", "remaining_time": "3:38:12", "throughput": 2336.0, "total_tokens": 46404864} {"current_steps": 24115, "total_steps": 40000, "loss": 0.1214, "lr": 1.7062004598804448e-05, "epoch": 3.9340076678358757, "percentage": 60.29, "elapsed_time": "5:31:07", "remaining_time": "3:38:06", "throughput": 2336.28, "total_tokens": 46415312} {"current_steps": 24120, "total_steps": 40000, "loss": 0.0657, "lr": 1.7052695772439702e-05, "epoch": 3.934823395056693, "percentage": 60.3, "elapsed_time": "5:31:09", "remaining_time": "3:38:01", "throughput": 2336.54, "total_tokens": 46425360} {"current_steps": 24125, "total_steps": 40000, "loss": 0.0769, "lr": 1.7043388171649154e-05, "epoch": 3.9356391222775104, "percentage": 60.31, "elapsed_time": "5:31:11", "remaining_time": "3:37:55", "throughput": 2336.78, "total_tokens": 46434960} {"current_steps": 24130, "total_steps": 40000, "loss": 0.0026, "lr": 1.7034081797868127e-05, "epoch": 3.936454849498328, "percentage": 60.32, "elapsed_time": "5:31:13", "remaining_time": "3:37:50", "throughput": 2337.07, "total_tokens": 46445424} {"current_steps": 24135, "total_steps": 40000, "loss": 0.1627, "lr": 1.70247766525318e-05, "epoch": 3.937270576719145, "percentage": 60.34, "elapsed_time": "5:31:15", "remaining_time": "3:37:45", "throughput": 2337.33, "total_tokens": 46455504} {"current_steps": 24140, "total_steps": 40000, "loss": 0.0018, "lr": 1.701547273707514e-05, "epoch": 3.9380863039399623, "percentage": 60.35, "elapsed_time": "5:31:17", "remaining_time": "3:37:39", "throughput": 2337.5, "total_tokens": 46463696} {"current_steps": 24145, "total_steps": 40000, "loss": 0.0476, "lr": 1.7006170052932916e-05, "epoch": 3.93890203116078, "percentage": 60.36, "elapsed_time": "5:31:19", "remaining_time": "3:37:34", "throughput": 2337.74, "total_tokens": 46473424} {"current_steps": 24150, "total_steps": 40000, "loss": 0.0043, "lr": 1.6996868601539735e-05, "epoch": 3.9397177583815974, "percentage": 60.38, "elapsed_time": "5:31:21", "remaining_time": "3:37:28", "throughput": 2337.97, "total_tokens": 46482768} {"current_steps": 24155, "total_steps": 40000, "loss": 0.0033, "lr": 1.6987568384329977e-05, "epoch": 3.9405334856024146, "percentage": 60.39, "elapsed_time": "5:31:23", "remaining_time": "3:37:23", "throughput": 2338.15, "total_tokens": 46491216} {"current_steps": 24160, "total_steps": 40000, "loss": 0.1896, "lr": 1.6978269402737866e-05, "epoch": 3.9413492128232317, "percentage": 60.4, "elapsed_time": "5:31:25", "remaining_time": "3:37:17", "throughput": 2338.41, "total_tokens": 46501120} {"current_steps": 24165, "total_steps": 40000, "loss": 0.0022, "lr": 1.696897165819743e-05, "epoch": 3.9421649400440493, "percentage": 60.41, "elapsed_time": "5:31:27", "remaining_time": "3:37:12", "throughput": 2338.71, "total_tokens": 46512080} {"current_steps": 24170, "total_steps": 40000, "loss": 0.0089, "lr": 1.6959675152142487e-05, "epoch": 3.942980667264867, "percentage": 60.42, "elapsed_time": "5:31:29", "remaining_time": "3:37:06", "throughput": 2338.92, "total_tokens": 46520960} {"current_steps": 24175, "total_steps": 40000, "loss": 0.0044, "lr": 1.6950379886006667e-05, "epoch": 3.943796394485684, "percentage": 60.44, "elapsed_time": "5:31:32", "remaining_time": "3:37:01", "throughput": 2339.19, "total_tokens": 46531184} {"current_steps": 24180, "total_steps": 40000, "loss": 0.0624, "lr": 1.6941085861223438e-05, "epoch": 3.944612121706501, "percentage": 60.45, "elapsed_time": "5:31:34", "remaining_time": "3:36:55", "throughput": 2339.44, "total_tokens": 46541008} {"current_steps": 24185, "total_steps": 40000, "loss": 0.062, "lr": 1.6931793079226034e-05, "epoch": 3.9454278489273187, "percentage": 60.46, "elapsed_time": "5:31:36", "remaining_time": "3:36:50", "throughput": 2339.69, "total_tokens": 46550832} {"current_steps": 24190, "total_steps": 40000, "loss": 0.1405, "lr": 1.692250154144754e-05, "epoch": 3.9462435761481363, "percentage": 60.48, "elapsed_time": "5:31:38", "remaining_time": "3:36:45", "throughput": 2339.93, "total_tokens": 46560576} {"current_steps": 24195, "total_steps": 40000, "loss": 0.1524, "lr": 1.6913211249320807e-05, "epoch": 3.9470593033689534, "percentage": 60.49, "elapsed_time": "5:31:40", "remaining_time": "3:36:39", "throughput": 2340.13, "total_tokens": 46569312} {"current_steps": 24200, "total_steps": 40000, "loss": 0.0641, "lr": 1.6903922204278522e-05, "epoch": 3.9478750305897705, "percentage": 60.5, "elapsed_time": "5:31:42", "remaining_time": "3:36:34", "throughput": 2340.27, "total_tokens": 46576928} {"current_steps": 24200, "total_steps": 40000, "eval_loss": 0.2038833200931549, "epoch": 3.9478750305897705, "percentage": 60.5, "elapsed_time": "5:33:03", "remaining_time": "3:37:26", "throughput": 2330.79, "total_tokens": 46576928} {"current_steps": 24205, "total_steps": 40000, "loss": 0.0715, "lr": 1.6894634407753186e-05, "epoch": 3.948690757810588, "percentage": 60.51, "elapsed_time": "5:33:07", "remaining_time": "3:37:22", "throughput": 2330.92, "total_tokens": 46588144} {"current_steps": 24210, "total_steps": 40000, "loss": 0.0024, "lr": 1.6885347861177077e-05, "epoch": 3.9495064850314057, "percentage": 60.52, "elapsed_time": "5:33:09", "remaining_time": "3:37:17", "throughput": 2331.15, "total_tokens": 46597680} {"current_steps": 24215, "total_steps": 40000, "loss": 0.0688, "lr": 1.6876062565982298e-05, "epoch": 3.950322212252223, "percentage": 60.54, "elapsed_time": "5:33:11", "remaining_time": "3:37:11", "throughput": 2331.44, "total_tokens": 46608272} {"current_steps": 24220, "total_steps": 40000, "loss": 0.0942, "lr": 1.6866778523600774e-05, "epoch": 3.9511379394730404, "percentage": 60.55, "elapsed_time": "5:33:13", "remaining_time": "3:37:06", "throughput": 2331.7, "total_tokens": 46618416} {"current_steps": 24225, "total_steps": 40000, "loss": 0.0778, "lr": 1.6857495735464195e-05, "epoch": 3.9519536666938575, "percentage": 60.56, "elapsed_time": "5:33:15", "remaining_time": "3:37:00", "throughput": 2331.93, "total_tokens": 46627824} {"current_steps": 24230, "total_steps": 40000, "loss": 0.0618, "lr": 1.6848214203004115e-05, "epoch": 3.952769393914675, "percentage": 60.58, "elapsed_time": "5:33:17", "remaining_time": "3:36:55", "throughput": 2332.18, "total_tokens": 46637680} {"current_steps": 24235, "total_steps": 40000, "loss": 0.0033, "lr": 1.6838933927651835e-05, "epoch": 3.953585121135492, "percentage": 60.59, "elapsed_time": "5:33:19", "remaining_time": "3:36:49", "throughput": 2332.41, "total_tokens": 46647056} {"current_steps": 24240, "total_steps": 40000, "loss": 0.0131, "lr": 1.6829654910838506e-05, "epoch": 3.95440084835631, "percentage": 60.6, "elapsed_time": "5:33:21", "remaining_time": "3:36:44", "throughput": 2332.65, "total_tokens": 46656672} {"current_steps": 24245, "total_steps": 40000, "loss": 0.0098, "lr": 1.6820377153995065e-05, "epoch": 3.955216575577127, "percentage": 60.61, "elapsed_time": "5:33:23", "remaining_time": "3:36:38", "throughput": 2332.92, "total_tokens": 46666848} {"current_steps": 24250, "total_steps": 40000, "loss": 0.221, "lr": 1.681110065855226e-05, "epoch": 3.9560323027979445, "percentage": 60.62, "elapsed_time": "5:33:25", "remaining_time": "3:36:33", "throughput": 2333.14, "total_tokens": 46676192} {"current_steps": 24255, "total_steps": 40000, "loss": 0.0371, "lr": 1.6801825425940642e-05, "epoch": 3.9568480300187616, "percentage": 60.64, "elapsed_time": "5:33:27", "remaining_time": "3:36:27", "throughput": 2333.35, "total_tokens": 46685216} {"current_steps": 24260, "total_steps": 40000, "loss": 0.1321, "lr": 1.679255145759056e-05, "epoch": 3.957663757239579, "percentage": 60.65, "elapsed_time": "5:33:29", "remaining_time": "3:36:22", "throughput": 2333.56, "total_tokens": 46694352} {"current_steps": 24265, "total_steps": 40000, "loss": 0.0375, "lr": 1.6783278754932187e-05, "epoch": 3.9584794844603963, "percentage": 60.66, "elapsed_time": "5:33:31", "remaining_time": "3:36:17", "throughput": 2333.83, "total_tokens": 46704416} {"current_steps": 24270, "total_steps": 40000, "loss": 0.0184, "lr": 1.6774007319395496e-05, "epoch": 3.959295211681214, "percentage": 60.68, "elapsed_time": "5:33:34", "remaining_time": "3:36:11", "throughput": 2334.05, "total_tokens": 46713744} {"current_steps": 24275, "total_steps": 40000, "loss": 0.0771, "lr": 1.6764737152410243e-05, "epoch": 3.960110938902031, "percentage": 60.69, "elapsed_time": "5:33:36", "remaining_time": "3:36:06", "throughput": 2334.27, "total_tokens": 46722896} {"current_steps": 24280, "total_steps": 40000, "loss": 0.1907, "lr": 1.6755468255406016e-05, "epoch": 3.9609266661228486, "percentage": 60.7, "elapsed_time": "5:33:38", "remaining_time": "3:36:00", "throughput": 2334.5, "total_tokens": 46732320} {"current_steps": 24285, "total_steps": 40000, "loss": 0.0067, "lr": 1.674620062981219e-05, "epoch": 3.9617423933436657, "percentage": 60.71, "elapsed_time": "5:33:40", "remaining_time": "3:35:55", "throughput": 2334.71, "total_tokens": 46741504} {"current_steps": 24290, "total_steps": 40000, "loss": 0.0331, "lr": 1.6736934277057947e-05, "epoch": 3.9625581205644833, "percentage": 60.72, "elapsed_time": "5:33:42", "remaining_time": "3:35:49", "throughput": 2334.93, "total_tokens": 46750656} {"current_steps": 24295, "total_steps": 40000, "loss": 0.0043, "lr": 1.6727669198572286e-05, "epoch": 3.9633738477853004, "percentage": 60.74, "elapsed_time": "5:33:44", "remaining_time": "3:35:44", "throughput": 2335.18, "total_tokens": 46760608} {"current_steps": 24300, "total_steps": 40000, "loss": 0.0338, "lr": 1.6718405395783984e-05, "epoch": 3.964189575006118, "percentage": 60.75, "elapsed_time": "5:33:46", "remaining_time": "3:35:38", "throughput": 2335.42, "total_tokens": 46770288} {"current_steps": 24305, "total_steps": 40000, "loss": 0.1299, "lr": 1.6709142870121643e-05, "epoch": 3.965005302226935, "percentage": 60.76, "elapsed_time": "5:33:48", "remaining_time": "3:35:33", "throughput": 2335.62, "total_tokens": 46779024} {"current_steps": 24310, "total_steps": 40000, "loss": 0.0183, "lr": 1.669988162301367e-05, "epoch": 3.9658210294477527, "percentage": 60.77, "elapsed_time": "5:33:50", "remaining_time": "3:35:28", "throughput": 2335.91, "total_tokens": 46789648} {"current_steps": 24315, "total_steps": 40000, "loss": 0.1044, "lr": 1.6690621655888243e-05, "epoch": 3.96663675666857, "percentage": 60.79, "elapsed_time": "5:33:52", "remaining_time": "3:35:22", "throughput": 2336.19, "total_tokens": 46800224} {"current_steps": 24320, "total_steps": 40000, "loss": 0.0026, "lr": 1.6681362970173386e-05, "epoch": 3.9674524838893874, "percentage": 60.8, "elapsed_time": "5:33:54", "remaining_time": "3:35:17", "throughput": 2336.37, "total_tokens": 46809264} {"current_steps": 24325, "total_steps": 40000, "loss": 0.1385, "lr": 1.6672105567296904e-05, "epoch": 3.968268211110205, "percentage": 60.81, "elapsed_time": "5:33:57", "remaining_time": "3:35:11", "throughput": 2336.5, "total_tokens": 46816656} {"current_steps": 24330, "total_steps": 40000, "loss": 0.0034, "lr": 1.666284944868639e-05, "epoch": 3.969083938331022, "percentage": 60.82, "elapsed_time": "5:33:59", "remaining_time": "3:35:06", "throughput": 2336.84, "total_tokens": 46828224} {"current_steps": 24335, "total_steps": 40000, "loss": 0.0115, "lr": 1.665359461576927e-05, "epoch": 3.9698996655518393, "percentage": 60.84, "elapsed_time": "5:34:01", "remaining_time": "3:35:01", "throughput": 2337.08, "total_tokens": 46838032} {"current_steps": 24340, "total_steps": 40000, "loss": 0.0599, "lr": 1.6644341069972736e-05, "epoch": 3.970715392772657, "percentage": 60.85, "elapsed_time": "5:34:03", "remaining_time": "3:34:55", "throughput": 2337.32, "total_tokens": 46847696} {"current_steps": 24345, "total_steps": 40000, "loss": 0.1187, "lr": 1.6635088812723813e-05, "epoch": 3.9715311199934744, "percentage": 60.86, "elapsed_time": "5:34:05", "remaining_time": "3:34:50", "throughput": 2337.56, "total_tokens": 46857232} {"current_steps": 24350, "total_steps": 40000, "loss": 0.0026, "lr": 1.6625837845449328e-05, "epoch": 3.9723468472142915, "percentage": 60.88, "elapsed_time": "5:34:07", "remaining_time": "3:34:44", "throughput": 2337.81, "total_tokens": 46867040} {"current_steps": 24355, "total_steps": 40000, "loss": 0.0025, "lr": 1.6616588169575874e-05, "epoch": 3.9731625744351087, "percentage": 60.89, "elapsed_time": "5:34:09", "remaining_time": "3:34:39", "throughput": 2338.05, "total_tokens": 46876704} {"current_steps": 24360, "total_steps": 40000, "loss": 0.0641, "lr": 1.6607339786529878e-05, "epoch": 3.9739783016559262, "percentage": 60.9, "elapsed_time": "5:34:11", "remaining_time": "3:34:33", "throughput": 2338.33, "total_tokens": 46887216} {"current_steps": 24365, "total_steps": 40000, "loss": 0.0019, "lr": 1.659809269773756e-05, "epoch": 3.974794028876744, "percentage": 60.91, "elapsed_time": "5:34:13", "remaining_time": "3:34:28", "throughput": 2338.54, "total_tokens": 46896288} {"current_steps": 24370, "total_steps": 40000, "loss": 0.1313, "lr": 1.658884690462493e-05, "epoch": 3.975609756097561, "percentage": 60.92, "elapsed_time": "5:34:15", "remaining_time": "3:34:22", "throughput": 2338.79, "total_tokens": 46906176} {"current_steps": 24375, "total_steps": 40000, "loss": 0.0013, "lr": 1.6579602408617813e-05, "epoch": 3.976425483318378, "percentage": 60.94, "elapsed_time": "5:34:17", "remaining_time": "3:34:17", "throughput": 2339.08, "total_tokens": 46916768} {"current_steps": 24380, "total_steps": 40000, "loss": 0.2353, "lr": 1.657035921114181e-05, "epoch": 3.9772412105391957, "percentage": 60.95, "elapsed_time": "5:34:19", "remaining_time": "3:34:12", "throughput": 2339.3, "total_tokens": 46926224} {"current_steps": 24385, "total_steps": 40000, "loss": 0.0011, "lr": 1.656111731362236e-05, "epoch": 3.9780569377600132, "percentage": 60.96, "elapsed_time": "5:34:21", "remaining_time": "3:34:06", "throughput": 2339.51, "total_tokens": 46935280} {"current_steps": 24390, "total_steps": 40000, "loss": 0.0037, "lr": 1.6551876717484666e-05, "epoch": 3.9788726649808304, "percentage": 60.98, "elapsed_time": "5:34:24", "remaining_time": "3:34:01", "throughput": 2339.79, "total_tokens": 46945616} {"current_steps": 24395, "total_steps": 40000, "loss": 0.201, "lr": 1.6542637424153752e-05, "epoch": 3.9796883922016475, "percentage": 60.99, "elapsed_time": "5:34:26", "remaining_time": "3:33:55", "throughput": 2340.06, "total_tokens": 46955920} {"current_steps": 24400, "total_steps": 40000, "loss": 0.1402, "lr": 1.6533399435054418e-05, "epoch": 3.980504119422465, "percentage": 61.0, "elapsed_time": "5:34:28", "remaining_time": "3:33:50", "throughput": 2340.28, "total_tokens": 46965120} {"current_steps": 24400, "total_steps": 40000, "eval_loss": 0.21300624310970306, "epoch": 3.980504119422465, "percentage": 61.0, "elapsed_time": "5:35:49", "remaining_time": "3:34:42", "throughput": 2330.89, "total_tokens": 46965120} {"current_steps": 24405, "total_steps": 40000, "loss": 0.2035, "lr": 1.6524162751611304e-05, "epoch": 3.9813198466432826, "percentage": 61.01, "elapsed_time": "5:35:52", "remaining_time": "3:34:37", "throughput": 2331.01, "total_tokens": 46976032} {"current_steps": 24410, "total_steps": 40000, "loss": 0.0447, "lr": 1.6514927375248796e-05, "epoch": 3.9821355738640998, "percentage": 61.02, "elapsed_time": "5:35:54", "remaining_time": "3:34:32", "throughput": 2331.19, "total_tokens": 46984528} {"current_steps": 24415, "total_steps": 40000, "loss": 0.052, "lr": 1.6505693307391127e-05, "epoch": 3.9829513010849174, "percentage": 61.04, "elapsed_time": "5:35:56", "remaining_time": "3:34:26", "throughput": 2331.41, "total_tokens": 46993936} {"current_steps": 24420, "total_steps": 40000, "loss": 0.0574, "lr": 1.6496460549462288e-05, "epoch": 3.9837670283057345, "percentage": 61.05, "elapsed_time": "5:35:58", "remaining_time": "3:34:21", "throughput": 2331.66, "total_tokens": 47003776} {"current_steps": 24425, "total_steps": 40000, "loss": 0.2164, "lr": 1.6487229102886097e-05, "epoch": 3.984582755526552, "percentage": 61.06, "elapsed_time": "5:36:00", "remaining_time": "3:34:15", "throughput": 2331.89, "total_tokens": 47013088} {"current_steps": 24430, "total_steps": 40000, "loss": 0.0818, "lr": 1.6477998969086155e-05, "epoch": 3.985398482747369, "percentage": 61.08, "elapsed_time": "5:36:03", "remaining_time": "3:34:10", "throughput": 2332.1, "total_tokens": 47022256} {"current_steps": 24435, "total_steps": 40000, "loss": 0.0048, "lr": 1.646877014948587e-05, "epoch": 3.9862142099681868, "percentage": 61.09, "elapsed_time": "5:36:05", "remaining_time": "3:34:05", "throughput": 2332.33, "total_tokens": 47031648} {"current_steps": 24440, "total_steps": 40000, "loss": 0.0919, "lr": 1.6459542645508433e-05, "epoch": 3.987029937189004, "percentage": 61.1, "elapsed_time": "5:36:07", "remaining_time": "3:33:59", "throughput": 2332.6, "total_tokens": 47041984} {"current_steps": 24445, "total_steps": 40000, "loss": 0.0992, "lr": 1.6450316458576852e-05, "epoch": 3.9878456644098215, "percentage": 61.11, "elapsed_time": "5:36:09", "remaining_time": "3:33:54", "throughput": 2332.82, "total_tokens": 47051264} {"current_steps": 24450, "total_steps": 40000, "loss": 0.0717, "lr": 1.6441091590113912e-05, "epoch": 3.9886613916306386, "percentage": 61.12, "elapsed_time": "5:36:11", "remaining_time": "3:33:48", "throughput": 2333.02, "total_tokens": 47060240} {"current_steps": 24455, "total_steps": 40000, "loss": 0.0826, "lr": 1.6431868041542213e-05, "epoch": 3.989477118851456, "percentage": 61.14, "elapsed_time": "5:36:13", "remaining_time": "3:33:43", "throughput": 2333.24, "total_tokens": 47069392} {"current_steps": 24460, "total_steps": 40000, "loss": 0.0056, "lr": 1.6422645814284123e-05, "epoch": 3.9902928460722733, "percentage": 61.15, "elapsed_time": "5:36:15", "remaining_time": "3:33:37", "throughput": 2333.46, "total_tokens": 47078688} {"current_steps": 24465, "total_steps": 40000, "loss": 0.0277, "lr": 1.6413424909761846e-05, "epoch": 3.991108573293091, "percentage": 61.16, "elapsed_time": "5:36:17", "remaining_time": "3:33:32", "throughput": 2333.7, "total_tokens": 47088400} {"current_steps": 24470, "total_steps": 40000, "loss": 0.0019, "lr": 1.640420532939736e-05, "epoch": 3.991924300513908, "percentage": 61.18, "elapsed_time": "5:36:19", "remaining_time": "3:33:27", "throughput": 2333.95, "total_tokens": 47098304} {"current_steps": 24475, "total_steps": 40000, "loss": 0.005, "lr": 1.639498707461242e-05, "epoch": 3.9927400277347256, "percentage": 61.19, "elapsed_time": "5:36:21", "remaining_time": "3:33:21", "throughput": 2334.19, "total_tokens": 47107872} {"current_steps": 24480, "total_steps": 40000, "loss": 0.0441, "lr": 1.6385770146828614e-05, "epoch": 3.9935557549555427, "percentage": 61.2, "elapsed_time": "5:36:23", "remaining_time": "3:33:16", "throughput": 2334.43, "total_tokens": 47117664} {"current_steps": 24485, "total_steps": 40000, "loss": 0.0029, "lr": 1.637655454746731e-05, "epoch": 3.9943714821763603, "percentage": 61.21, "elapsed_time": "5:36:25", "remaining_time": "3:33:10", "throughput": 2334.61, "total_tokens": 47126080} {"current_steps": 24490, "total_steps": 40000, "loss": 0.0406, "lr": 1.6367340277949658e-05, "epoch": 3.9951872093971774, "percentage": 61.22, "elapsed_time": "5:36:27", "remaining_time": "3:33:05", "throughput": 2334.84, "total_tokens": 47135600} {"current_steps": 24495, "total_steps": 40000, "loss": 0.0024, "lr": 1.635812733969663e-05, "epoch": 3.996002936617995, "percentage": 61.24, "elapsed_time": "5:36:30", "remaining_time": "3:33:00", "throughput": 2335.1, "total_tokens": 47145696} {"current_steps": 24500, "total_steps": 40000, "loss": 0.0668, "lr": 1.634891573412896e-05, "epoch": 3.9968186638388126, "percentage": 61.25, "elapsed_time": "5:36:32", "remaining_time": "3:32:54", "throughput": 2335.37, "total_tokens": 47156032} {"current_steps": 24505, "total_steps": 40000, "loss": 0.0035, "lr": 1.6339705462667196e-05, "epoch": 3.9976343910596297, "percentage": 61.26, "elapsed_time": "5:36:34", "remaining_time": "3:32:49", "throughput": 2335.6, "total_tokens": 47165424} {"current_steps": 24510, "total_steps": 40000, "loss": 0.1197, "lr": 1.633049652673169e-05, "epoch": 3.998450118280447, "percentage": 61.27, "elapsed_time": "5:36:36", "remaining_time": "3:32:43", "throughput": 2335.81, "total_tokens": 47174544} {"current_steps": 24515, "total_steps": 40000, "loss": 0.0497, "lr": 1.632128892774256e-05, "epoch": 3.9992658455012644, "percentage": 61.29, "elapsed_time": "5:36:38", "remaining_time": "3:32:38", "throughput": 2336.05, "total_tokens": 47184320} {"current_steps": 24520, "total_steps": 40000, "loss": 0.0014, "lr": 1.6312082667119737e-05, "epoch": 4.0, "percentage": 61.3, "elapsed_time": "5:36:40", "remaining_time": "3:32:32", "throughput": 2336.25, "total_tokens": 47193024} {"current_steps": 24525, "total_steps": 40000, "loss": 0.0405, "lr": 1.630287774628296e-05, "epoch": 4.000815727220817, "percentage": 61.31, "elapsed_time": "5:36:42", "remaining_time": "3:32:27", "throughput": 2336.51, "total_tokens": 47203712} {"current_steps": 24530, "total_steps": 40000, "loss": 0.0022, "lr": 1.6293674166651718e-05, "epoch": 4.001631454441635, "percentage": 61.32, "elapsed_time": "5:36:44", "remaining_time": "3:32:22", "throughput": 2336.78, "total_tokens": 47214032} {"current_steps": 24535, "total_steps": 40000, "loss": 0.0012, "lr": 1.6284471929645338e-05, "epoch": 4.002447181662452, "percentage": 61.34, "elapsed_time": "5:36:46", "remaining_time": "3:32:16", "throughput": 2336.93, "total_tokens": 47221840} {"current_steps": 24540, "total_steps": 40000, "loss": 0.0315, "lr": 1.627527103668291e-05, "epoch": 4.003262908883269, "percentage": 61.35, "elapsed_time": "5:36:48", "remaining_time": "3:32:11", "throughput": 2337.08, "total_tokens": 47229776} {"current_steps": 24545, "total_steps": 40000, "loss": 0.0158, "lr": 1.6266071489183327e-05, "epoch": 4.0040786361040865, "percentage": 61.36, "elapsed_time": "5:36:50", "remaining_time": "3:32:06", "throughput": 2337.35, "total_tokens": 47240128} {"current_steps": 24550, "total_steps": 40000, "loss": 0.003, "lr": 1.6256873288565283e-05, "epoch": 4.004894363324905, "percentage": 61.38, "elapsed_time": "5:36:53", "remaining_time": "3:32:00", "throughput": 2337.61, "total_tokens": 47250112} {"current_steps": 24555, "total_steps": 40000, "loss": 0.0073, "lr": 1.6247676436247245e-05, "epoch": 4.005710090545722, "percentage": 61.39, "elapsed_time": "5:36:55", "remaining_time": "3:31:55", "throughput": 2337.84, "total_tokens": 47259568} {"current_steps": 24560, "total_steps": 40000, "loss": 0.0046, "lr": 1.6238480933647486e-05, "epoch": 4.006525817766539, "percentage": 61.4, "elapsed_time": "5:36:57", "remaining_time": "3:31:49", "throughput": 2338.03, "total_tokens": 47268240} {"current_steps": 24565, "total_steps": 40000, "loss": 0.002, "lr": 1.6229286782184083e-05, "epoch": 4.007341544987356, "percentage": 61.41, "elapsed_time": "5:36:59", "remaining_time": "3:31:44", "throughput": 2338.26, "total_tokens": 47277920} {"current_steps": 24570, "total_steps": 40000, "loss": 0.0463, "lr": 1.622009398327487e-05, "epoch": 4.008157272208174, "percentage": 61.42, "elapsed_time": "5:37:01", "remaining_time": "3:31:39", "throughput": 2338.56, "total_tokens": 47288704} {"current_steps": 24575, "total_steps": 40000, "loss": 0.029, "lr": 1.6210902538337502e-05, "epoch": 4.008972999428991, "percentage": 61.44, "elapsed_time": "5:37:03", "remaining_time": "3:31:33", "throughput": 2338.83, "total_tokens": 47299136} {"current_steps": 24580, "total_steps": 40000, "loss": 0.0004, "lr": 1.6201712448789413e-05, "epoch": 4.009788726649808, "percentage": 61.45, "elapsed_time": "5:37:05", "remaining_time": "3:31:28", "throughput": 2339.07, "total_tokens": 47308784} {"current_steps": 24585, "total_steps": 40000, "loss": 0.0061, "lr": 1.6192523716047827e-05, "epoch": 4.010604453870625, "percentage": 61.46, "elapsed_time": "5:37:07", "remaining_time": "3:31:22", "throughput": 2339.33, "total_tokens": 47318864} {"current_steps": 24590, "total_steps": 40000, "loss": 0.0021, "lr": 1.6183336341529776e-05, "epoch": 4.011420181091443, "percentage": 61.48, "elapsed_time": "5:37:09", "remaining_time": "3:31:17", "throughput": 2339.53, "total_tokens": 47327840} {"current_steps": 24595, "total_steps": 40000, "loss": 0.0621, "lr": 1.6174150326652047e-05, "epoch": 4.0122359083122605, "percentage": 61.49, "elapsed_time": "5:37:11", "remaining_time": "3:31:12", "throughput": 2339.76, "total_tokens": 47337280} {"current_steps": 24600, "total_steps": 40000, "loss": 0.0794, "lr": 1.6164965672831256e-05, "epoch": 4.013051635533078, "percentage": 61.5, "elapsed_time": "5:37:13", "remaining_time": "3:31:06", "throughput": 2340.05, "total_tokens": 47347920} {"current_steps": 24600, "total_steps": 40000, "eval_loss": 0.24521394073963165, "epoch": 4.013051635533078, "percentage": 61.5, "elapsed_time": "5:38:34", "remaining_time": "3:31:57", "throughput": 2330.73, "total_tokens": 47347920} {"current_steps": 24605, "total_steps": 40000, "loss": 0.0018, "lr": 1.6155782381483784e-05, "epoch": 4.013867362753895, "percentage": 61.51, "elapsed_time": "5:38:38", "remaining_time": "3:31:52", "throughput": 2330.82, "total_tokens": 47358416} {"current_steps": 24610, "total_steps": 40000, "loss": 0.0031, "lr": 1.6146600454025813e-05, "epoch": 4.014683089974713, "percentage": 61.52, "elapsed_time": "5:38:40", "remaining_time": "3:31:47", "throughput": 2331.04, "total_tokens": 47367632} {"current_steps": 24615, "total_steps": 40000, "loss": 0.0006, "lr": 1.6137419891873317e-05, "epoch": 4.01549881719553, "percentage": 61.54, "elapsed_time": "5:38:42", "remaining_time": "3:31:42", "throughput": 2331.28, "total_tokens": 47377488} {"current_steps": 24620, "total_steps": 40000, "loss": 0.0012, "lr": 1.6128240696442038e-05, "epoch": 4.016314544416347, "percentage": 61.55, "elapsed_time": "5:38:44", "remaining_time": "3:31:36", "throughput": 2331.49, "total_tokens": 47386464} {"current_steps": 24625, "total_steps": 40000, "loss": 0.0019, "lr": 1.611906286914753e-05, "epoch": 4.017130271637164, "percentage": 61.56, "elapsed_time": "5:38:46", "remaining_time": "3:31:31", "throughput": 2331.71, "total_tokens": 47395904} {"current_steps": 24630, "total_steps": 40000, "loss": 0.0128, "lr": 1.6109886411405144e-05, "epoch": 4.017945998857982, "percentage": 61.58, "elapsed_time": "5:38:48", "remaining_time": "3:31:25", "throughput": 2331.96, "total_tokens": 47405760} {"current_steps": 24635, "total_steps": 40000, "loss": 0.0864, "lr": 1.6100711324629985e-05, "epoch": 4.018761726078799, "percentage": 61.59, "elapsed_time": "5:38:50", "remaining_time": "3:31:20", "throughput": 2332.26, "total_tokens": 47416688} {"current_steps": 24640, "total_steps": 40000, "loss": 0.0005, "lr": 1.609153761023698e-05, "epoch": 4.0195774532996165, "percentage": 61.6, "elapsed_time": "5:38:52", "remaining_time": "3:31:15", "throughput": 2332.53, "total_tokens": 47427024} {"current_steps": 24645, "total_steps": 40000, "loss": 0.0014, "lr": 1.608236526964083e-05, "epoch": 4.020393180520434, "percentage": 61.61, "elapsed_time": "5:38:54", "remaining_time": "3:31:09", "throughput": 2332.78, "total_tokens": 47436880} {"current_steps": 24650, "total_steps": 40000, "loss": 0.0578, "lr": 1.607319430425601e-05, "epoch": 4.021208907741252, "percentage": 61.62, "elapsed_time": "5:38:57", "remaining_time": "3:31:04", "throughput": 2332.98, "total_tokens": 47445840} {"current_steps": 24655, "total_steps": 40000, "loss": 0.0011, "lr": 1.606402471549682e-05, "epoch": 4.022024634962069, "percentage": 61.64, "elapsed_time": "5:38:59", "remaining_time": "3:30:58", "throughput": 2333.18, "total_tokens": 47454832} {"current_steps": 24660, "total_steps": 40000, "loss": 0.002, "lr": 1.6054856504777312e-05, "epoch": 4.022840362182886, "percentage": 61.65, "elapsed_time": "5:39:01", "remaining_time": "3:30:53", "throughput": 2333.42, "total_tokens": 47464496} {"current_steps": 24665, "total_steps": 40000, "loss": 0.0007, "lr": 1.6045689673511334e-05, "epoch": 4.023656089403703, "percentage": 61.66, "elapsed_time": "5:39:03", "remaining_time": "3:30:48", "throughput": 2333.68, "total_tokens": 47474512} {"current_steps": 24670, "total_steps": 40000, "loss": 0.1045, "lr": 1.6036524223112548e-05, "epoch": 4.024471816624521, "percentage": 61.68, "elapsed_time": "5:39:05", "remaining_time": "3:30:42", "throughput": 2333.94, "total_tokens": 47484752} {"current_steps": 24675, "total_steps": 40000, "loss": 0.0008, "lr": 1.602736015499436e-05, "epoch": 4.025287543845338, "percentage": 61.69, "elapsed_time": "5:39:07", "remaining_time": "3:30:37", "throughput": 2334.19, "total_tokens": 47494592} {"current_steps": 24680, "total_steps": 40000, "loss": 0.0017, "lr": 1.601819747057e-05, "epoch": 4.026103271066155, "percentage": 61.7, "elapsed_time": "5:39:09", "remaining_time": "3:30:31", "throughput": 2334.42, "total_tokens": 47504128} {"current_steps": 24685, "total_steps": 40000, "loss": 0.0009, "lr": 1.6009036171252465e-05, "epoch": 4.026918998286972, "percentage": 61.71, "elapsed_time": "5:39:11", "remaining_time": "3:30:26", "throughput": 2334.71, "total_tokens": 47514896} {"current_steps": 24690, "total_steps": 40000, "loss": 0.0006, "lr": 1.599987625845453e-05, "epoch": 4.02773472550779, "percentage": 61.72, "elapsed_time": "5:39:13", "remaining_time": "3:30:21", "throughput": 2335.03, "total_tokens": 47526160} {"current_steps": 24695, "total_steps": 40000, "loss": 0.0006, "lr": 1.599071773358879e-05, "epoch": 4.028550452728608, "percentage": 61.74, "elapsed_time": "5:39:15", "remaining_time": "3:30:15", "throughput": 2335.26, "total_tokens": 47535744} {"current_steps": 24700, "total_steps": 40000, "loss": 0.0034, "lr": 1.598156059806758e-05, "epoch": 4.029366179949425, "percentage": 61.75, "elapsed_time": "5:39:17", "remaining_time": "3:30:10", "throughput": 2335.52, "total_tokens": 47545840} {"current_steps": 24705, "total_steps": 40000, "loss": 0.0021, "lr": 1.5972404853303062e-05, "epoch": 4.030181907170242, "percentage": 61.76, "elapsed_time": "5:39:19", "remaining_time": "3:30:04", "throughput": 2335.76, "total_tokens": 47555680} {"current_steps": 24710, "total_steps": 40000, "loss": 0.0007, "lr": 1.5963250500707172e-05, "epoch": 4.03099763439106, "percentage": 61.78, "elapsed_time": "5:39:21", "remaining_time": "3:29:59", "throughput": 2335.99, "total_tokens": 47565200} {"current_steps": 24715, "total_steps": 40000, "loss": 0.0016, "lr": 1.5954097541691612e-05, "epoch": 4.031813361611877, "percentage": 61.79, "elapsed_time": "5:39:23", "remaining_time": "3:29:54", "throughput": 2336.19, "total_tokens": 47574112} {"current_steps": 24720, "total_steps": 40000, "loss": 0.0002, "lr": 1.5944945977667884e-05, "epoch": 4.032629088832694, "percentage": 61.8, "elapsed_time": "5:39:26", "remaining_time": "3:29:48", "throughput": 2336.42, "total_tokens": 47583664} {"current_steps": 24725, "total_steps": 40000, "loss": 0.0663, "lr": 1.593579581004729e-05, "epoch": 4.033444816053512, "percentage": 61.81, "elapsed_time": "5:39:28", "remaining_time": "3:29:43", "throughput": 2336.64, "total_tokens": 47592880} {"current_steps": 24730, "total_steps": 40000, "loss": 0.0961, "lr": 1.592664704024088e-05, "epoch": 4.034260543274329, "percentage": 61.82, "elapsed_time": "5:39:30", "remaining_time": "3:29:37", "throughput": 2336.83, "total_tokens": 47601744} {"current_steps": 24735, "total_steps": 40000, "loss": 0.0473, "lr": 1.591749966965953e-05, "epoch": 4.035076270495146, "percentage": 61.84, "elapsed_time": "5:39:32", "remaining_time": "3:29:32", "throughput": 2337.12, "total_tokens": 47612464} {"current_steps": 24740, "total_steps": 40000, "loss": 0.0002, "lr": 1.5908353699713856e-05, "epoch": 4.0358919977159635, "percentage": 61.85, "elapsed_time": "5:39:34", "remaining_time": "3:29:27", "throughput": 2337.37, "total_tokens": 47622368} {"current_steps": 24745, "total_steps": 40000, "loss": 0.0004, "lr": 1.5899209131814298e-05, "epoch": 4.0367077249367815, "percentage": 61.86, "elapsed_time": "5:39:36", "remaining_time": "3:29:21", "throughput": 2337.58, "total_tokens": 47631488} {"current_steps": 24750, "total_steps": 40000, "loss": 0.013, "lr": 1.5890065967371067e-05, "epoch": 4.037523452157599, "percentage": 61.88, "elapsed_time": "5:39:38", "remaining_time": "3:29:16", "throughput": 2337.88, "total_tokens": 47642400} {"current_steps": 24755, "total_steps": 40000, "loss": 0.069, "lr": 1.5880924207794144e-05, "epoch": 4.038339179378416, "percentage": 61.89, "elapsed_time": "5:39:40", "remaining_time": "3:29:11", "throughput": 2338.02, "total_tokens": 47650224} {"current_steps": 24760, "total_steps": 40000, "loss": 0.0007, "lr": 1.5871783854493298e-05, "epoch": 4.039154906599233, "percentage": 61.9, "elapsed_time": "5:39:42", "remaining_time": "3:29:05", "throughput": 2338.33, "total_tokens": 47661280} {"current_steps": 24765, "total_steps": 40000, "loss": 0.0014, "lr": 1.5862644908878106e-05, "epoch": 4.039970633820051, "percentage": 61.91, "elapsed_time": "5:39:44", "remaining_time": "3:29:00", "throughput": 2338.57, "total_tokens": 47671152} {"current_steps": 24770, "total_steps": 40000, "loss": 0.0771, "lr": 1.5853507372357885e-05, "epoch": 4.040786361040868, "percentage": 61.92, "elapsed_time": "5:39:46", "remaining_time": "3:28:54", "throughput": 2338.88, "total_tokens": 47682336} {"current_steps": 24775, "total_steps": 40000, "loss": 0.0011, "lr": 1.5844371246341776e-05, "epoch": 4.041602088261685, "percentage": 61.94, "elapsed_time": "5:39:48", "remaining_time": "3:28:49", "throughput": 2339.09, "total_tokens": 47691424} {"current_steps": 24780, "total_steps": 40000, "loss": 0.0003, "lr": 1.5835236532238674e-05, "epoch": 4.042417815482502, "percentage": 61.95, "elapsed_time": "5:39:50", "remaining_time": "3:28:44", "throughput": 2339.31, "total_tokens": 47700608} {"current_steps": 24785, "total_steps": 40000, "loss": 0.0034, "lr": 1.582610323145727e-05, "epoch": 4.04323354270332, "percentage": 61.96, "elapsed_time": "5:39:53", "remaining_time": "3:28:38", "throughput": 2339.54, "total_tokens": 47710288} {"current_steps": 24790, "total_steps": 40000, "loss": 0.0492, "lr": 1.5816971345406035e-05, "epoch": 4.0440492699241375, "percentage": 61.98, "elapsed_time": "5:39:55", "remaining_time": "3:28:33", "throughput": 2339.8, "total_tokens": 47720528} {"current_steps": 24795, "total_steps": 40000, "loss": 0.0637, "lr": 1.5807840875493225e-05, "epoch": 4.044864997144955, "percentage": 61.99, "elapsed_time": "5:39:57", "remaining_time": "3:28:28", "throughput": 2340.08, "total_tokens": 47730912} {"current_steps": 24800, "total_steps": 40000, "loss": 0.0025, "lr": 1.5798711823126854e-05, "epoch": 4.045680724365772, "percentage": 62.0, "elapsed_time": "5:39:59", "remaining_time": "3:28:22", "throughput": 2340.35, "total_tokens": 47741360} {"current_steps": 24800, "total_steps": 40000, "eval_loss": 0.28094929456710815, "epoch": 4.045680724365772, "percentage": 62.0, "elapsed_time": "5:41:20", "remaining_time": "3:29:12", "throughput": 2331.12, "total_tokens": 47741360} {"current_steps": 24805, "total_steps": 40000, "loss": 0.0007, "lr": 1.578958418971477e-05, "epoch": 4.04649645158659, "percentage": 62.01, "elapsed_time": "5:41:23", "remaining_time": "3:29:07", "throughput": 2331.12, "total_tokens": 47750032} {"current_steps": 24810, "total_steps": 40000, "loss": 0.1573, "lr": 1.578045797666453e-05, "epoch": 4.047312178807407, "percentage": 62.02, "elapsed_time": "5:41:25", "remaining_time": "3:29:02", "throughput": 2331.36, "total_tokens": 47759648} {"current_steps": 24815, "total_steps": 40000, "loss": 0.0614, "lr": 1.5771333185383548e-05, "epoch": 4.048127906028224, "percentage": 62.04, "elapsed_time": "5:41:27", "remaining_time": "3:28:57", "throughput": 2331.61, "total_tokens": 47769680} {"current_steps": 24820, "total_steps": 40000, "loss": 0.001, "lr": 1.576220981727895e-05, "epoch": 4.048943633249041, "percentage": 62.05, "elapsed_time": "5:41:29", "remaining_time": "3:28:51", "throughput": 2331.9, "total_tokens": 47780448} {"current_steps": 24825, "total_steps": 40000, "loss": 0.0006, "lr": 1.575308787375769e-05, "epoch": 4.049759360469859, "percentage": 62.06, "elapsed_time": "5:41:32", "remaining_time": "3:28:46", "throughput": 2332.06, "total_tokens": 47788704} {"current_steps": 24830, "total_steps": 40000, "loss": 0.001, "lr": 1.5743967356226492e-05, "epoch": 4.050575087690676, "percentage": 62.08, "elapsed_time": "5:41:34", "remaining_time": "3:28:40", "throughput": 2332.34, "total_tokens": 47799168} {"current_steps": 24835, "total_steps": 40000, "loss": 0.0801, "lr": 1.5734848266091835e-05, "epoch": 4.051390814911493, "percentage": 62.09, "elapsed_time": "5:41:36", "remaining_time": "3:28:35", "throughput": 2332.64, "total_tokens": 47810176} {"current_steps": 24840, "total_steps": 40000, "loss": 0.0256, "lr": 1.572573060476001e-05, "epoch": 4.052206542132311, "percentage": 62.1, "elapsed_time": "5:41:38", "remaining_time": "3:28:30", "throughput": 2332.87, "total_tokens": 47819728} {"current_steps": 24845, "total_steps": 40000, "loss": 0.001, "lr": 1.5716614373637085e-05, "epoch": 4.053022269353129, "percentage": 62.11, "elapsed_time": "5:41:40", "remaining_time": "3:28:24", "throughput": 2333.1, "total_tokens": 47829264} {"current_steps": 24850, "total_steps": 40000, "loss": 0.0003, "lr": 1.570749957412887e-05, "epoch": 4.053837996573946, "percentage": 62.12, "elapsed_time": "5:41:42", "remaining_time": "3:28:19", "throughput": 2333.3, "total_tokens": 47838288} {"current_steps": 24855, "total_steps": 40000, "loss": 0.0398, "lr": 1.5698386207641013e-05, "epoch": 4.054653723794763, "percentage": 62.14, "elapsed_time": "5:41:44", "remaining_time": "3:28:14", "throughput": 2333.54, "total_tokens": 47848000} {"current_steps": 24860, "total_steps": 40000, "loss": 0.0032, "lr": 1.5689274275578884e-05, "epoch": 4.05546945101558, "percentage": 62.15, "elapsed_time": "5:41:46", "remaining_time": "3:28:08", "throughput": 2333.82, "total_tokens": 47858656} {"current_steps": 24865, "total_steps": 40000, "loss": 0.0394, "lr": 1.5680163779347667e-05, "epoch": 4.056285178236398, "percentage": 62.16, "elapsed_time": "5:41:48", "remaining_time": "3:28:03", "throughput": 2334.07, "total_tokens": 47868592} {"current_steps": 24870, "total_steps": 40000, "loss": 0.0012, "lr": 1.5671054720352327e-05, "epoch": 4.057100905457215, "percentage": 62.18, "elapsed_time": "5:41:50", "remaining_time": "3:27:57", "throughput": 2334.3, "total_tokens": 47878112} {"current_steps": 24875, "total_steps": 40000, "loss": 0.0555, "lr": 1.566194709999757e-05, "epoch": 4.057916632678032, "percentage": 62.19, "elapsed_time": "5:41:52", "remaining_time": "3:27:52", "throughput": 2334.58, "total_tokens": 47888736} {"current_steps": 24880, "total_steps": 40000, "loss": 0.0004, "lr": 1.5652840919687933e-05, "epoch": 4.058732359898849, "percentage": 62.2, "elapsed_time": "5:41:54", "remaining_time": "3:27:47", "throughput": 2334.8, "total_tokens": 47897968} {"current_steps": 24885, "total_steps": 40000, "loss": 0.0002, "lr": 1.5643736180827676e-05, "epoch": 4.059548087119667, "percentage": 62.21, "elapsed_time": "5:41:56", "remaining_time": "3:27:41", "throughput": 2335.0, "total_tokens": 47906912} {"current_steps": 24890, "total_steps": 40000, "loss": 0.0016, "lr": 1.5634632884820878e-05, "epoch": 4.0603638143404845, "percentage": 62.22, "elapsed_time": "5:41:58", "remaining_time": "3:27:36", "throughput": 2335.23, "total_tokens": 47916656} {"current_steps": 24895, "total_steps": 40000, "loss": 0.0115, "lr": 1.5625531033071395e-05, "epoch": 4.061179541561302, "percentage": 62.24, "elapsed_time": "5:42:01", "remaining_time": "3:27:31", "throughput": 2335.49, "total_tokens": 47926688} {"current_steps": 24900, "total_steps": 40000, "loss": 0.0873, "lr": 1.5616430626982828e-05, "epoch": 4.06199526878212, "percentage": 62.25, "elapsed_time": "5:42:03", "remaining_time": "3:27:25", "throughput": 2335.74, "total_tokens": 47936656} {"current_steps": 24905, "total_steps": 40000, "loss": 0.0392, "lr": 1.5607331667958575e-05, "epoch": 4.062810996002937, "percentage": 62.26, "elapsed_time": "5:42:05", "remaining_time": "3:27:20", "throughput": 2335.95, "total_tokens": 47945760} {"current_steps": 24910, "total_steps": 40000, "loss": 0.0005, "lr": 1.5598234157401824e-05, "epoch": 4.063626723223754, "percentage": 62.28, "elapsed_time": "5:42:07", "remaining_time": "3:27:15", "throughput": 2336.18, "total_tokens": 47955328} {"current_steps": 24915, "total_steps": 40000, "loss": 0.065, "lr": 1.5589138096715503e-05, "epoch": 4.064442450444571, "percentage": 62.29, "elapsed_time": "5:42:09", "remaining_time": "3:27:09", "throughput": 2336.39, "total_tokens": 47964512} {"current_steps": 24920, "total_steps": 40000, "loss": 0.1193, "lr": 1.5580043487302365e-05, "epoch": 4.065258177665389, "percentage": 62.3, "elapsed_time": "5:42:11", "remaining_time": "3:27:04", "throughput": 2336.59, "total_tokens": 47973600} {"current_steps": 24925, "total_steps": 40000, "loss": 0.1218, "lr": 1.5570950330564888e-05, "epoch": 4.066073904886206, "percentage": 62.31, "elapsed_time": "5:42:13", "remaining_time": "3:26:58", "throughput": 2336.78, "total_tokens": 47982256} {"current_steps": 24930, "total_steps": 40000, "loss": 0.0004, "lr": 1.5561858627905367e-05, "epoch": 4.066889632107023, "percentage": 62.32, "elapsed_time": "5:42:15", "remaining_time": "3:26:53", "throughput": 2337.05, "total_tokens": 47992736} {"current_steps": 24935, "total_steps": 40000, "loss": 0.0004, "lr": 1.5552768380725857e-05, "epoch": 4.0677053593278405, "percentage": 62.34, "elapsed_time": "5:42:17", "remaining_time": "3:26:48", "throughput": 2337.31, "total_tokens": 48002816} {"current_steps": 24940, "total_steps": 40000, "loss": 0.0005, "lr": 1.5543679590428183e-05, "epoch": 4.0685210865486585, "percentage": 62.35, "elapsed_time": "5:42:19", "remaining_time": "3:26:42", "throughput": 2337.51, "total_tokens": 48011792} {"current_steps": 24945, "total_steps": 40000, "loss": 0.002, "lr": 1.5534592258413943e-05, "epoch": 4.069336813769476, "percentage": 62.36, "elapsed_time": "5:42:21", "remaining_time": "3:26:37", "throughput": 2337.74, "total_tokens": 48021296} {"current_steps": 24950, "total_steps": 40000, "loss": 0.0004, "lr": 1.5525506386084538e-05, "epoch": 4.070152540990293, "percentage": 62.38, "elapsed_time": "5:42:23", "remaining_time": "3:26:32", "throughput": 2337.95, "total_tokens": 48030528} {"current_steps": 24955, "total_steps": 40000, "loss": 0.0019, "lr": 1.55164219748411e-05, "epoch": 4.07096826821111, "percentage": 62.39, "elapsed_time": "5:42:25", "remaining_time": "3:26:26", "throughput": 2338.24, "total_tokens": 48041312} {"current_steps": 24960, "total_steps": 40000, "loss": 0.0008, "lr": 1.550733902608459e-05, "epoch": 4.071783995431928, "percentage": 62.4, "elapsed_time": "5:42:28", "remaining_time": "3:26:21", "throughput": 2338.48, "total_tokens": 48051056} {"current_steps": 24965, "total_steps": 40000, "loss": 0.0059, "lr": 1.549825754121568e-05, "epoch": 4.072599722652745, "percentage": 62.41, "elapsed_time": "5:42:30", "remaining_time": "3:26:16", "throughput": 2338.7, "total_tokens": 48060560} {"current_steps": 24970, "total_steps": 40000, "loss": 0.0026, "lr": 1.5489177521634864e-05, "epoch": 4.073415449873562, "percentage": 62.42, "elapsed_time": "5:42:32", "remaining_time": "3:26:10", "throughput": 2338.94, "total_tokens": 48070368} {"current_steps": 24975, "total_steps": 40000, "loss": 0.0576, "lr": 1.5480098968742402e-05, "epoch": 4.074231177094379, "percentage": 62.44, "elapsed_time": "5:42:34", "remaining_time": "3:26:05", "throughput": 2339.17, "total_tokens": 48079968} {"current_steps": 24980, "total_steps": 40000, "loss": 0.0005, "lr": 1.5471021883938304e-05, "epoch": 4.075046904315197, "percentage": 62.45, "elapsed_time": "5:42:36", "remaining_time": "3:26:00", "throughput": 2339.43, "total_tokens": 48090000} {"current_steps": 24985, "total_steps": 40000, "loss": 0.001, "lr": 1.546194626862238e-05, "epoch": 4.0758626315360145, "percentage": 62.46, "elapsed_time": "5:42:38", "remaining_time": "3:25:54", "throughput": 2339.71, "total_tokens": 48100672} {"current_steps": 24990, "total_steps": 40000, "loss": 0.0008, "lr": 1.5452872124194216e-05, "epoch": 4.076678358756832, "percentage": 62.48, "elapsed_time": "5:42:40", "remaining_time": "3:25:49", "throughput": 2339.98, "total_tokens": 48111088} {"current_steps": 24995, "total_steps": 40000, "loss": 0.0015, "lr": 1.5443799452053136e-05, "epoch": 4.077494085977649, "percentage": 62.49, "elapsed_time": "5:42:42", "remaining_time": "3:25:44", "throughput": 2340.25, "total_tokens": 48121360} {"current_steps": 25000, "total_steps": 40000, "loss": 0.0002, "lr": 1.543472825359828e-05, "epoch": 4.078309813198467, "percentage": 62.5, "elapsed_time": "5:42:44", "remaining_time": "3:25:38", "throughput": 2340.48, "total_tokens": 48131120} {"current_steps": 25000, "total_steps": 40000, "eval_loss": 0.30152493715286255, "epoch": 4.078309813198467, "percentage": 62.5, "elapsed_time": "5:44:05", "remaining_time": "3:26:27", "throughput": 2331.32, "total_tokens": 48131120} {"current_steps": 25005, "total_steps": 40000, "loss": 0.0009, "lr": 1.5425658530228522e-05, "epoch": 4.079125540419284, "percentage": 62.51, "elapsed_time": "5:44:09", "remaining_time": "3:26:22", "throughput": 2331.38, "total_tokens": 48140752} {"current_steps": 25010, "total_steps": 40000, "loss": 0.0008, "lr": 1.5416590283342546e-05, "epoch": 4.079941267640101, "percentage": 62.52, "elapsed_time": "5:44:11", "remaining_time": "3:26:17", "throughput": 2331.6, "total_tokens": 48150208} {"current_steps": 25015, "total_steps": 40000, "loss": 0.0001, "lr": 1.5407523514338783e-05, "epoch": 4.080756994860918, "percentage": 62.54, "elapsed_time": "5:44:13", "remaining_time": "3:26:12", "throughput": 2331.86, "total_tokens": 48160288} {"current_steps": 25020, "total_steps": 40000, "loss": 0.0003, "lr": 1.539845822461543e-05, "epoch": 4.081572722081736, "percentage": 62.55, "elapsed_time": "5:44:15", "remaining_time": "3:26:06", "throughput": 2332.08, "total_tokens": 48169744} {"current_steps": 25025, "total_steps": 40000, "loss": 0.0846, "lr": 1.538939441557048e-05, "epoch": 4.082388449302553, "percentage": 62.56, "elapsed_time": "5:44:17", "remaining_time": "3:26:01", "throughput": 2332.27, "total_tokens": 48178528} {"current_steps": 25030, "total_steps": 40000, "loss": 0.116, "lr": 1.5380332088601696e-05, "epoch": 4.08320417652337, "percentage": 62.58, "elapsed_time": "5:44:19", "remaining_time": "3:25:56", "throughput": 2332.47, "total_tokens": 48187376} {"current_steps": 25035, "total_steps": 40000, "loss": 0.0002, "lr": 1.537127124510658e-05, "epoch": 4.0840199037441876, "percentage": 62.59, "elapsed_time": "5:44:21", "remaining_time": "3:25:50", "throughput": 2332.75, "total_tokens": 48198176} {"current_steps": 25040, "total_steps": 40000, "loss": 0.0652, "lr": 1.5362211886482457e-05, "epoch": 4.084835630965006, "percentage": 62.6, "elapsed_time": "5:44:23", "remaining_time": "3:25:45", "throughput": 2332.98, "total_tokens": 48207760} {"current_steps": 25045, "total_steps": 40000, "loss": 0.0671, "lr": 1.5353154014126363e-05, "epoch": 4.085651358185823, "percentage": 62.61, "elapsed_time": "5:44:25", "remaining_time": "3:25:39", "throughput": 2333.22, "total_tokens": 48217376} {"current_steps": 25050, "total_steps": 40000, "loss": 0.0009, "lr": 1.534409762943515e-05, "epoch": 4.08646708540664, "percentage": 62.62, "elapsed_time": "5:44:27", "remaining_time": "3:25:34", "throughput": 2333.41, "total_tokens": 48226304} {"current_steps": 25055, "total_steps": 40000, "loss": 0.0005, "lr": 1.5335042733805438e-05, "epoch": 4.087282812627457, "percentage": 62.64, "elapsed_time": "5:44:29", "remaining_time": "3:25:29", "throughput": 2333.63, "total_tokens": 48235712} {"current_steps": 25060, "total_steps": 40000, "loss": 0.0511, "lr": 1.532598932863358e-05, "epoch": 4.088098539848275, "percentage": 62.65, "elapsed_time": "5:44:31", "remaining_time": "3:25:23", "throughput": 2333.87, "total_tokens": 48245328} {"current_steps": 25065, "total_steps": 40000, "loss": 0.0004, "lr": 1.531693741531574e-05, "epoch": 4.088914267069092, "percentage": 62.66, "elapsed_time": "5:44:33", "remaining_time": "3:25:18", "throughput": 2334.13, "total_tokens": 48255680} {"current_steps": 25070, "total_steps": 40000, "loss": 0.001, "lr": 1.5307886995247844e-05, "epoch": 4.089729994289909, "percentage": 62.68, "elapsed_time": "5:44:36", "remaining_time": "3:25:13", "throughput": 2334.31, "total_tokens": 48264272} {"current_steps": 25075, "total_steps": 40000, "loss": 0.0028, "lr": 1.529883806982557e-05, "epoch": 4.090545721510727, "percentage": 62.69, "elapsed_time": "5:44:38", "remaining_time": "3:25:07", "throughput": 2334.53, "total_tokens": 48273600} {"current_steps": 25080, "total_steps": 40000, "loss": 0.0005, "lr": 1.5289790640444376e-05, "epoch": 4.091361448731544, "percentage": 62.7, "elapsed_time": "5:44:40", "remaining_time": "3:25:02", "throughput": 2334.85, "total_tokens": 48285040} {"current_steps": 25085, "total_steps": 40000, "loss": 0.0001, "lr": 1.5280744708499494e-05, "epoch": 4.0921771759523615, "percentage": 62.71, "elapsed_time": "5:44:42", "remaining_time": "3:24:57", "throughput": 2335.09, "total_tokens": 48294784} {"current_steps": 25090, "total_steps": 40000, "loss": 0.001, "lr": 1.527170027538591e-05, "epoch": 4.092992903173179, "percentage": 62.72, "elapsed_time": "5:44:44", "remaining_time": "3:24:51", "throughput": 2335.33, "total_tokens": 48304640} {"current_steps": 25095, "total_steps": 40000, "loss": 0.0053, "lr": 1.5262657342498407e-05, "epoch": 4.093808630393997, "percentage": 62.74, "elapsed_time": "5:44:46", "remaining_time": "3:24:46", "throughput": 2335.59, "total_tokens": 48314976} {"current_steps": 25100, "total_steps": 40000, "loss": 0.0014, "lr": 1.52536159112315e-05, "epoch": 4.094624357614814, "percentage": 62.75, "elapsed_time": "5:44:48", "remaining_time": "3:24:41", "throughput": 2335.79, "total_tokens": 48323808} {"current_steps": 25105, "total_steps": 40000, "loss": 0.0005, "lr": 1.5244575982979497e-05, "epoch": 4.095440084835631, "percentage": 62.76, "elapsed_time": "5:44:50", "remaining_time": "3:24:35", "throughput": 2335.99, "total_tokens": 48332832} {"current_steps": 25110, "total_steps": 40000, "loss": 0.0002, "lr": 1.5235537559136487e-05, "epoch": 4.096255812056448, "percentage": 62.78, "elapsed_time": "5:44:52", "remaining_time": "3:24:30", "throughput": 2336.18, "total_tokens": 48341696} {"current_steps": 25115, "total_steps": 40000, "loss": 0.0006, "lr": 1.5226500641096286e-05, "epoch": 4.097071539277266, "percentage": 62.79, "elapsed_time": "5:44:54", "remaining_time": "3:24:25", "throughput": 2336.37, "total_tokens": 48350400} {"current_steps": 25120, "total_steps": 40000, "loss": 0.0001, "lr": 1.5217465230252509e-05, "epoch": 4.097887266498083, "percentage": 62.8, "elapsed_time": "5:44:56", "remaining_time": "3:24:19", "throughput": 2336.67, "total_tokens": 48361424} {"current_steps": 25125, "total_steps": 40000, "loss": 0.167, "lr": 1.5208431327998523e-05, "epoch": 4.0987029937189, "percentage": 62.81, "elapsed_time": "5:44:58", "remaining_time": "3:24:14", "throughput": 2336.85, "total_tokens": 48370064} {"current_steps": 25130, "total_steps": 40000, "loss": 0.0018, "lr": 1.5199398935727477e-05, "epoch": 4.0995187209397175, "percentage": 62.82, "elapsed_time": "5:45:00", "remaining_time": "3:24:09", "throughput": 2337.13, "total_tokens": 48380784} {"current_steps": 25135, "total_steps": 40000, "loss": 0.0009, "lr": 1.5190368054832282e-05, "epoch": 4.1003344481605355, "percentage": 62.84, "elapsed_time": "5:45:02", "remaining_time": "3:24:03", "throughput": 2337.37, "total_tokens": 48390480} {"current_steps": 25140, "total_steps": 40000, "loss": 0.0006, "lr": 1.5181338686705601e-05, "epoch": 4.101150175381353, "percentage": 62.85, "elapsed_time": "5:45:05", "remaining_time": "3:23:58", "throughput": 2337.63, "total_tokens": 48400720} {"current_steps": 25145, "total_steps": 40000, "loss": 0.0003, "lr": 1.5172310832739889e-05, "epoch": 4.10196590260217, "percentage": 62.86, "elapsed_time": "5:45:07", "remaining_time": "3:23:53", "throughput": 2337.85, "total_tokens": 48410208} {"current_steps": 25150, "total_steps": 40000, "loss": 0.0003, "lr": 1.5163284494327346e-05, "epoch": 4.102781629822987, "percentage": 62.88, "elapsed_time": "5:45:09", "remaining_time": "3:23:47", "throughput": 2338.08, "total_tokens": 48419776} {"current_steps": 25155, "total_steps": 40000, "loss": 0.0024, "lr": 1.5154259672859952e-05, "epoch": 4.103597357043805, "percentage": 62.89, "elapsed_time": "5:45:11", "remaining_time": "3:23:42", "throughput": 2338.27, "total_tokens": 48428624} {"current_steps": 25160, "total_steps": 40000, "loss": 0.0001, "lr": 1.5145236369729452e-05, "epoch": 4.104413084264622, "percentage": 62.9, "elapsed_time": "5:45:13", "remaining_time": "3:23:37", "throughput": 2338.48, "total_tokens": 48437680} {"current_steps": 25165, "total_steps": 40000, "loss": 0.0029, "lr": 1.5136214586327335e-05, "epoch": 4.105228811485439, "percentage": 62.91, "elapsed_time": "5:45:15", "remaining_time": "3:23:31", "throughput": 2338.67, "total_tokens": 48446560} {"current_steps": 25170, "total_steps": 40000, "loss": 0.0002, "lr": 1.5127194324044885e-05, "epoch": 4.106044538706256, "percentage": 62.92, "elapsed_time": "5:45:17", "remaining_time": "3:23:26", "throughput": 2338.86, "total_tokens": 48455328} {"current_steps": 25175, "total_steps": 40000, "loss": 0.0873, "lr": 1.5118175584273148e-05, "epoch": 4.106860265927074, "percentage": 62.94, "elapsed_time": "5:45:19", "remaining_time": "3:23:21", "throughput": 2339.06, "total_tokens": 48464176} {"current_steps": 25180, "total_steps": 40000, "loss": 0.0738, "lr": 1.5109158368402909e-05, "epoch": 4.1076759931478914, "percentage": 62.95, "elapsed_time": "5:45:21", "remaining_time": "3:23:15", "throughput": 2339.3, "total_tokens": 48474096} {"current_steps": 25185, "total_steps": 40000, "loss": 0.0002, "lr": 1.5100142677824753e-05, "epoch": 4.108491720368709, "percentage": 62.96, "elapsed_time": "5:45:23", "remaining_time": "3:23:10", "throughput": 2339.55, "total_tokens": 48484144} {"current_steps": 25190, "total_steps": 40000, "loss": 0.0286, "lr": 1.509112851392901e-05, "epoch": 4.109307447589526, "percentage": 62.98, "elapsed_time": "5:45:25", "remaining_time": "3:23:05", "throughput": 2339.75, "total_tokens": 48493200} {"current_steps": 25195, "total_steps": 40000, "loss": 0.0983, "lr": 1.5082115878105763e-05, "epoch": 4.110123174810344, "percentage": 62.99, "elapsed_time": "5:45:27", "remaining_time": "3:23:00", "throughput": 2340.0, "total_tokens": 48503104} {"current_steps": 25200, "total_steps": 40000, "loss": 0.0777, "lr": 1.5073104771744892e-05, "epoch": 4.110938902031161, "percentage": 63.0, "elapsed_time": "5:45:29", "remaining_time": "3:22:54", "throughput": 2340.25, "total_tokens": 48513200} {"current_steps": 25200, "total_steps": 40000, "eval_loss": 0.34385573863983154, "epoch": 4.110938902031161, "percentage": 63.0, "elapsed_time": "5:46:50", "remaining_time": "3:23:42", "throughput": 2331.16, "total_tokens": 48513200} {"current_steps": 25205, "total_steps": 40000, "loss": 0.0712, "lr": 1.5064095196236006e-05, "epoch": 4.111754629251978, "percentage": 63.01, "elapsed_time": "5:46:54", "remaining_time": "3:23:37", "throughput": 2331.1, "total_tokens": 48521232} {"current_steps": 25210, "total_steps": 40000, "loss": 0.0712, "lr": 1.50550871529685e-05, "epoch": 4.112570356472795, "percentage": 63.02, "elapsed_time": "5:46:56", "remaining_time": "3:23:32", "throughput": 2331.38, "total_tokens": 48531776} {"current_steps": 25215, "total_steps": 40000, "loss": 0.0, "lr": 1.5046080643331546e-05, "epoch": 4.113386083693613, "percentage": 63.04, "elapsed_time": "5:46:58", "remaining_time": "3:23:27", "throughput": 2331.53, "total_tokens": 48539856} {"current_steps": 25220, "total_steps": 40000, "loss": 0.0001, "lr": 1.5037075668714028e-05, "epoch": 4.11420181091443, "percentage": 63.05, "elapsed_time": "5:47:00", "remaining_time": "3:23:21", "throughput": 2331.83, "total_tokens": 48550928} {"current_steps": 25225, "total_steps": 40000, "loss": 0.0, "lr": 1.5028072230504656e-05, "epoch": 4.115017538135247, "percentage": 63.06, "elapsed_time": "5:47:03", "remaining_time": "3:23:16", "throughput": 2332.03, "total_tokens": 48560000} {"current_steps": 25230, "total_steps": 40000, "loss": 0.0476, "lr": 1.5019070330091861e-05, "epoch": 4.1158332653560645, "percentage": 63.08, "elapsed_time": "5:47:05", "remaining_time": "3:23:11", "throughput": 2332.25, "total_tokens": 48569328} {"current_steps": 25235, "total_steps": 40000, "loss": 0.0003, "lr": 1.5010069968863843e-05, "epoch": 4.1166489925768825, "percentage": 63.09, "elapsed_time": "5:47:07", "remaining_time": "3:23:05", "throughput": 2332.49, "total_tokens": 48579296} {"current_steps": 25240, "total_steps": 40000, "loss": 0.001, "lr": 1.5001071148208584e-05, "epoch": 4.1174647197977, "percentage": 63.1, "elapsed_time": "5:47:09", "remaining_time": "3:23:00", "throughput": 2332.75, "total_tokens": 48589536} {"current_steps": 25245, "total_steps": 40000, "loss": 0.0003, "lr": 1.49920738695138e-05, "epoch": 4.118280447018517, "percentage": 63.11, "elapsed_time": "5:47:11", "remaining_time": "3:22:55", "throughput": 2333.0, "total_tokens": 48599552} {"current_steps": 25250, "total_steps": 40000, "loss": 0.0002, "lr": 1.4983078134166995e-05, "epoch": 4.119096174239334, "percentage": 63.12, "elapsed_time": "5:47:13", "remaining_time": "3:22:50", "throughput": 2333.26, "total_tokens": 48609776} {"current_steps": 25255, "total_steps": 40000, "loss": 0.045, "lr": 1.4974083943555428e-05, "epoch": 4.119911901460152, "percentage": 63.14, "elapsed_time": "5:47:15", "remaining_time": "3:22:44", "throughput": 2333.52, "total_tokens": 48620016} {"current_steps": 25260, "total_steps": 40000, "loss": 0.0005, "lr": 1.496509129906611e-05, "epoch": 4.120727628680969, "percentage": 63.15, "elapsed_time": "5:47:17", "remaining_time": "3:22:39", "throughput": 2333.79, "total_tokens": 48630576} {"current_steps": 25265, "total_steps": 40000, "loss": 0.0374, "lr": 1.4956100202085809e-05, "epoch": 4.121543355901786, "percentage": 63.16, "elapsed_time": "5:47:19", "remaining_time": "3:22:34", "throughput": 2333.96, "total_tokens": 48638896} {"current_steps": 25270, "total_steps": 40000, "loss": 0.0003, "lr": 1.4947110654001093e-05, "epoch": 4.122359083122603, "percentage": 63.18, "elapsed_time": "5:47:21", "remaining_time": "3:22:28", "throughput": 2334.22, "total_tokens": 48649088} {"current_steps": 25275, "total_steps": 40000, "loss": 0.0002, "lr": 1.4938122656198234e-05, "epoch": 4.123174810343421, "percentage": 63.19, "elapsed_time": "5:47:23", "remaining_time": "3:22:23", "throughput": 2334.41, "total_tokens": 48658000} {"current_steps": 25280, "total_steps": 40000, "loss": 0.0001, "lr": 1.4929136210063316e-05, "epoch": 4.1239905375642385, "percentage": 63.2, "elapsed_time": "5:47:25", "remaining_time": "3:22:18", "throughput": 2334.62, "total_tokens": 48667216} {"current_steps": 25285, "total_steps": 40000, "loss": 0.0001, "lr": 1.4920151316982146e-05, "epoch": 4.124806264785056, "percentage": 63.21, "elapsed_time": "5:47:27", "remaining_time": "3:22:12", "throughput": 2334.82, "total_tokens": 48676240} {"current_steps": 25290, "total_steps": 40000, "loss": 0.1107, "lr": 1.4911167978340312e-05, "epoch": 4.125621992005874, "percentage": 63.22, "elapsed_time": "5:47:30", "remaining_time": "3:22:07", "throughput": 2335.03, "total_tokens": 48685488} {"current_steps": 25295, "total_steps": 40000, "loss": 0.0002, "lr": 1.4902186195523166e-05, "epoch": 4.126437719226691, "percentage": 63.24, "elapsed_time": "5:47:32", "remaining_time": "3:22:02", "throughput": 2335.3, "total_tokens": 48695808} {"current_steps": 25300, "total_steps": 40000, "loss": 0.0003, "lr": 1.4893205969915805e-05, "epoch": 4.127253446447508, "percentage": 63.25, "elapsed_time": "5:47:34", "remaining_time": "3:21:56", "throughput": 2335.54, "total_tokens": 48705840} {"current_steps": 25305, "total_steps": 40000, "loss": 0.0431, "lr": 1.4884227302903086e-05, "epoch": 4.128069173668325, "percentage": 63.26, "elapsed_time": "5:47:36", "remaining_time": "3:21:51", "throughput": 2335.8, "total_tokens": 48716000} {"current_steps": 25310, "total_steps": 40000, "loss": 0.0992, "lr": 1.4875250195869653e-05, "epoch": 4.128884900889143, "percentage": 63.28, "elapsed_time": "5:47:38", "remaining_time": "3:21:46", "throughput": 2335.95, "total_tokens": 48723920} {"current_steps": 25315, "total_steps": 40000, "loss": 0.1372, "lr": 1.4866274650199862e-05, "epoch": 4.12970062810996, "percentage": 63.29, "elapsed_time": "5:47:40", "remaining_time": "3:21:40", "throughput": 2336.16, "total_tokens": 48733248} {"current_steps": 25320, "total_steps": 40000, "loss": 0.0012, "lr": 1.485730066727788e-05, "epoch": 4.130516355330777, "percentage": 63.3, "elapsed_time": "5:47:42", "remaining_time": "3:21:35", "throughput": 2336.41, "total_tokens": 48743296} {"current_steps": 25325, "total_steps": 40000, "loss": 0.0005, "lr": 1.4848328248487586e-05, "epoch": 4.1313320825515945, "percentage": 63.31, "elapsed_time": "5:47:44", "remaining_time": "3:21:30", "throughput": 2336.61, "total_tokens": 48752224} {"current_steps": 25330, "total_steps": 40000, "loss": 0.0888, "lr": 1.4839357395212656e-05, "epoch": 4.1321478097724125, "percentage": 63.32, "elapsed_time": "5:47:46", "remaining_time": "3:21:25", "throughput": 2336.8, "total_tokens": 48761152} {"current_steps": 25335, "total_steps": 40000, "loss": 0.0005, "lr": 1.4830388108836502e-05, "epoch": 4.13296353699323, "percentage": 63.34, "elapsed_time": "5:47:48", "remaining_time": "3:21:19", "throughput": 2337.03, "total_tokens": 48770768} {"current_steps": 25340, "total_steps": 40000, "loss": 0.0003, "lr": 1.4821420390742299e-05, "epoch": 4.133779264214047, "percentage": 63.35, "elapsed_time": "5:47:50", "remaining_time": "3:21:14", "throughput": 2337.26, "total_tokens": 48780496} {"current_steps": 25345, "total_steps": 40000, "loss": 0.0001, "lr": 1.4812454242312979e-05, "epoch": 4.134594991434864, "percentage": 63.36, "elapsed_time": "5:47:52", "remaining_time": "3:21:09", "throughput": 2337.48, "total_tokens": 48789808} {"current_steps": 25350, "total_steps": 40000, "loss": 0.0009, "lr": 1.4803489664931253e-05, "epoch": 4.135410718655682, "percentage": 63.38, "elapsed_time": "5:47:54", "remaining_time": "3:21:03", "throughput": 2337.69, "total_tokens": 48798976} {"current_steps": 25355, "total_steps": 40000, "loss": 0.0001, "lr": 1.4794526659979544e-05, "epoch": 4.136226445876499, "percentage": 63.39, "elapsed_time": "5:47:56", "remaining_time": "3:20:58", "throughput": 2337.88, "total_tokens": 48807808} {"current_steps": 25360, "total_steps": 40000, "loss": 0.1227, "lr": 1.4785565228840086e-05, "epoch": 4.137042173097316, "percentage": 63.4, "elapsed_time": "5:47:59", "remaining_time": "3:20:53", "throughput": 2338.07, "total_tokens": 48816672} {"current_steps": 25365, "total_steps": 40000, "loss": 0.0007, "lr": 1.4776605372894819e-05, "epoch": 4.137857900318133, "percentage": 63.41, "elapsed_time": "5:48:01", "remaining_time": "3:20:47", "throughput": 2338.32, "total_tokens": 48826720} {"current_steps": 25370, "total_steps": 40000, "loss": 0.0002, "lr": 1.4767647093525488e-05, "epoch": 4.138673627538951, "percentage": 63.42, "elapsed_time": "5:48:03", "remaining_time": "3:20:42", "throughput": 2338.55, "total_tokens": 48836464} {"current_steps": 25375, "total_steps": 40000, "loss": 0.0008, "lr": 1.4758690392113566e-05, "epoch": 4.139489354759768, "percentage": 63.44, "elapsed_time": "5:48:05", "remaining_time": "3:20:37", "throughput": 2338.77, "total_tokens": 48845808} {"current_steps": 25380, "total_steps": 40000, "loss": 0.0005, "lr": 1.4749735270040276e-05, "epoch": 4.1403050819805856, "percentage": 63.45, "elapsed_time": "5:48:07", "remaining_time": "3:20:32", "throughput": 2338.99, "total_tokens": 48855328} {"current_steps": 25385, "total_steps": 40000, "loss": 0.0001, "lr": 1.4740781728686623e-05, "epoch": 4.141120809201403, "percentage": 63.46, "elapsed_time": "5:48:09", "remaining_time": "3:20:26", "throughput": 2339.25, "total_tokens": 48865504} {"current_steps": 25390, "total_steps": 40000, "loss": 0.0006, "lr": 1.4731829769433358e-05, "epoch": 4.141936536422221, "percentage": 63.48, "elapsed_time": "5:48:11", "remaining_time": "3:20:21", "throughput": 2339.42, "total_tokens": 48874016} {"current_steps": 25395, "total_steps": 40000, "loss": 0.0179, "lr": 1.4722879393660976e-05, "epoch": 4.142752263643038, "percentage": 63.49, "elapsed_time": "5:48:13", "remaining_time": "3:20:16", "throughput": 2339.68, "total_tokens": 48884336} {"current_steps": 25400, "total_steps": 40000, "loss": 0.0003, "lr": 1.4713930602749748e-05, "epoch": 4.143567990863855, "percentage": 63.5, "elapsed_time": "5:48:15", "remaining_time": "3:20:10", "throughput": 2339.94, "total_tokens": 48894496} {"current_steps": 25400, "total_steps": 40000, "eval_loss": 0.3471934497356415, "epoch": 4.143567990863855, "percentage": 63.5, "elapsed_time": "5:49:36", "remaining_time": "3:20:57", "throughput": 2330.91, "total_tokens": 48894496} {"current_steps": 25405, "total_steps": 40000, "loss": 0.0006, "lr": 1.470498339807968e-05, "epoch": 4.144383718084672, "percentage": 63.51, "elapsed_time": "5:49:40", "remaining_time": "3:20:53", "throughput": 2330.93, "total_tokens": 48903408} {"current_steps": 25410, "total_steps": 40000, "loss": 0.1551, "lr": 1.4696037781030542e-05, "epoch": 4.14519944530549, "percentage": 63.52, "elapsed_time": "5:49:42", "remaining_time": "3:20:47", "throughput": 2331.13, "total_tokens": 48912624} {"current_steps": 25415, "total_steps": 40000, "loss": 0.0002, "lr": 1.4687093752981876e-05, "epoch": 4.146015172526307, "percentage": 63.54, "elapsed_time": "5:49:44", "remaining_time": "3:20:42", "throughput": 2331.38, "total_tokens": 48922544} {"current_steps": 25420, "total_steps": 40000, "loss": 0.0001, "lr": 1.4678151315312943e-05, "epoch": 4.146830899747124, "percentage": 63.55, "elapsed_time": "5:49:46", "remaining_time": "3:20:37", "throughput": 2331.59, "total_tokens": 48931984} {"current_steps": 25425, "total_steps": 40000, "loss": 0.0027, "lr": 1.4669210469402789e-05, "epoch": 4.1476466269679415, "percentage": 63.56, "elapsed_time": "5:49:48", "remaining_time": "3:20:31", "throughput": 2331.83, "total_tokens": 48941872} {"current_steps": 25430, "total_steps": 40000, "loss": 0.0002, "lr": 1.4660271216630218e-05, "epoch": 4.1484623541887595, "percentage": 63.58, "elapsed_time": "5:49:50", "remaining_time": "3:20:26", "throughput": 2332.11, "total_tokens": 48952576} {"current_steps": 25435, "total_steps": 40000, "loss": 0.0003, "lr": 1.4651333558373748e-05, "epoch": 4.149278081409577, "percentage": 63.59, "elapsed_time": "5:49:52", "remaining_time": "3:20:21", "throughput": 2332.36, "total_tokens": 48962560} {"current_steps": 25440, "total_steps": 40000, "loss": 0.1335, "lr": 1.4642397496011707e-05, "epoch": 4.150093808630394, "percentage": 63.6, "elapsed_time": "5:49:54", "remaining_time": "3:20:15", "throughput": 2332.54, "total_tokens": 48971136} {"current_steps": 25445, "total_steps": 40000, "loss": 0.0001, "lr": 1.4633463030922129e-05, "epoch": 4.150909535851211, "percentage": 63.61, "elapsed_time": "5:49:56", "remaining_time": "3:20:10", "throughput": 2332.73, "total_tokens": 48980144} {"current_steps": 25450, "total_steps": 40000, "loss": 0.0001, "lr": 1.462453016448282e-05, "epoch": 4.151725263072029, "percentage": 63.62, "elapsed_time": "5:49:58", "remaining_time": "3:20:05", "throughput": 2332.97, "total_tokens": 48989856} {"current_steps": 25455, "total_steps": 40000, "loss": 0.0003, "lr": 1.4615598898071354e-05, "epoch": 4.152540990292846, "percentage": 63.64, "elapsed_time": "5:50:01", "remaining_time": "3:20:00", "throughput": 2333.24, "total_tokens": 49000528} {"current_steps": 25460, "total_steps": 40000, "loss": 0.0977, "lr": 1.4606669233065026e-05, "epoch": 4.153356717513663, "percentage": 63.65, "elapsed_time": "5:50:03", "remaining_time": "3:19:54", "throughput": 2333.45, "total_tokens": 49009744} {"current_steps": 25465, "total_steps": 40000, "loss": 0.0002, "lr": 1.4597741170840914e-05, "epoch": 4.154172444734481, "percentage": 63.66, "elapsed_time": "5:50:05", "remaining_time": "3:19:49", "throughput": 2333.65, "total_tokens": 49018816} {"current_steps": 25470, "total_steps": 40000, "loss": 0.0005, "lr": 1.4588814712775853e-05, "epoch": 4.154988171955298, "percentage": 63.68, "elapsed_time": "5:50:07", "remaining_time": "3:19:44", "throughput": 2333.9, "total_tokens": 49028912} {"current_steps": 25475, "total_steps": 40000, "loss": 0.0003, "lr": 1.4579889860246382e-05, "epoch": 4.1558038991761155, "percentage": 63.69, "elapsed_time": "5:50:09", "remaining_time": "3:19:38", "throughput": 2334.17, "total_tokens": 49039440} {"current_steps": 25480, "total_steps": 40000, "loss": 0.0003, "lr": 1.457096661462885e-05, "epoch": 4.156619626396933, "percentage": 63.7, "elapsed_time": "5:50:11", "remaining_time": "3:19:33", "throughput": 2334.45, "total_tokens": 49050176} {"current_steps": 25485, "total_steps": 40000, "loss": 0.0011, "lr": 1.4562044977299322e-05, "epoch": 4.157435353617751, "percentage": 63.71, "elapsed_time": "5:50:13", "remaining_time": "3:19:28", "throughput": 2334.67, "total_tokens": 49059504} {"current_steps": 25490, "total_steps": 40000, "loss": 0.0342, "lr": 1.4553124949633623e-05, "epoch": 4.158251080838568, "percentage": 63.73, "elapsed_time": "5:50:15", "remaining_time": "3:19:22", "throughput": 2334.9, "total_tokens": 49069280} {"current_steps": 25495, "total_steps": 40000, "loss": 0.0001, "lr": 1.4544206533007354e-05, "epoch": 4.159066808059385, "percentage": 63.74, "elapsed_time": "5:50:17", "remaining_time": "3:19:17", "throughput": 2335.06, "total_tokens": 49077360} {"current_steps": 25500, "total_steps": 40000, "loss": 0.0061, "lr": 1.4535289728795821e-05, "epoch": 4.159882535280202, "percentage": 63.75, "elapsed_time": "5:50:19", "remaining_time": "3:19:12", "throughput": 2335.3, "total_tokens": 49087312} {"current_steps": 25505, "total_steps": 40000, "loss": 0.002, "lr": 1.4526374538374132e-05, "epoch": 4.16069826250102, "percentage": 63.76, "elapsed_time": "5:50:21", "remaining_time": "3:19:07", "throughput": 2335.57, "total_tokens": 49097776} {"current_steps": 25510, "total_steps": 40000, "loss": 0.0002, "lr": 1.4517460963117097e-05, "epoch": 4.161513989721837, "percentage": 63.78, "elapsed_time": "5:50:23", "remaining_time": "3:19:01", "throughput": 2335.78, "total_tokens": 49107024} {"current_steps": 25515, "total_steps": 40000, "loss": 0.0006, "lr": 1.4508549004399314e-05, "epoch": 4.162329716942654, "percentage": 63.79, "elapsed_time": "5:50:25", "remaining_time": "3:18:56", "throughput": 2335.99, "total_tokens": 49116400} {"current_steps": 25520, "total_steps": 40000, "loss": 0.0004, "lr": 1.449963866359513e-05, "epoch": 4.163145444163471, "percentage": 63.8, "elapsed_time": "5:50:28", "remaining_time": "3:18:51", "throughput": 2336.22, "total_tokens": 49126048} {"current_steps": 25525, "total_steps": 40000, "loss": 0.0981, "lr": 1.4490729942078607e-05, "epoch": 4.1639611713842895, "percentage": 63.81, "elapsed_time": "5:50:30", "remaining_time": "3:18:45", "throughput": 2336.47, "total_tokens": 49136176} {"current_steps": 25530, "total_steps": 40000, "loss": 0.0752, "lr": 1.4481822841223608e-05, "epoch": 4.164776898605107, "percentage": 63.82, "elapsed_time": "5:50:32", "remaining_time": "3:18:40", "throughput": 2336.7, "total_tokens": 49145952} {"current_steps": 25535, "total_steps": 40000, "loss": 0.0177, "lr": 1.4472917362403704e-05, "epoch": 4.165592625825924, "percentage": 63.84, "elapsed_time": "5:50:34", "remaining_time": "3:18:35", "throughput": 2336.93, "total_tokens": 49155536} {"current_steps": 25540, "total_steps": 40000, "loss": 0.0001, "lr": 1.4464013506992224e-05, "epoch": 4.166408353046741, "percentage": 63.85, "elapsed_time": "5:50:36", "remaining_time": "3:18:30", "throughput": 2337.13, "total_tokens": 49164624} {"current_steps": 25545, "total_steps": 40000, "loss": 0.0008, "lr": 1.4455111276362277e-05, "epoch": 4.167224080267559, "percentage": 63.86, "elapsed_time": "5:50:38", "remaining_time": "3:18:24", "throughput": 2337.33, "total_tokens": 49173552} {"current_steps": 25550, "total_steps": 40000, "loss": 0.0288, "lr": 1.4446210671886676e-05, "epoch": 4.168039807488376, "percentage": 63.88, "elapsed_time": "5:50:40", "remaining_time": "3:18:19", "throughput": 2337.51, "total_tokens": 49182240} {"current_steps": 25555, "total_steps": 40000, "loss": 0.0004, "lr": 1.4437311694938015e-05, "epoch": 4.168855534709193, "percentage": 63.89, "elapsed_time": "5:50:42", "remaining_time": "3:18:14", "throughput": 2337.78, "total_tokens": 49192784} {"current_steps": 25560, "total_steps": 40000, "loss": 0.0386, "lr": 1.442841434688864e-05, "epoch": 4.16967126193001, "percentage": 63.9, "elapsed_time": "5:50:44", "remaining_time": "3:18:09", "throughput": 2338.03, "total_tokens": 49202992} {"current_steps": 25565, "total_steps": 40000, "loss": 0.0006, "lr": 1.4419518629110615e-05, "epoch": 4.170486989150828, "percentage": 63.91, "elapsed_time": "5:50:46", "remaining_time": "3:18:03", "throughput": 2338.31, "total_tokens": 49213648} {"current_steps": 25570, "total_steps": 40000, "loss": 0.0001, "lr": 1.4410624542975778e-05, "epoch": 4.171302716371645, "percentage": 63.92, "elapsed_time": "5:50:48", "remaining_time": "3:17:58", "throughput": 2338.45, "total_tokens": 49221568} {"current_steps": 25575, "total_steps": 40000, "loss": 0.0001, "lr": 1.4401732089855724e-05, "epoch": 4.1721184435924625, "percentage": 63.94, "elapsed_time": "5:50:50", "remaining_time": "3:17:53", "throughput": 2338.7, "total_tokens": 49231568} {"current_steps": 25580, "total_steps": 40000, "loss": 0.0006, "lr": 1.4392841271121754e-05, "epoch": 4.17293417081328, "percentage": 63.95, "elapsed_time": "5:50:52", "remaining_time": "3:17:47", "throughput": 2338.94, "total_tokens": 49241472} {"current_steps": 25585, "total_steps": 40000, "loss": 0.0171, "lr": 1.438395208814497e-05, "epoch": 4.173749898034098, "percentage": 63.96, "elapsed_time": "5:50:54", "remaining_time": "3:17:42", "throughput": 2339.15, "total_tokens": 49250816} {"current_steps": 25590, "total_steps": 40000, "loss": 0.0001, "lr": 1.4375064542296174e-05, "epoch": 4.174565625254915, "percentage": 63.98, "elapsed_time": "5:50:57", "remaining_time": "3:17:37", "throughput": 2339.35, "total_tokens": 49259792} {"current_steps": 25595, "total_steps": 40000, "loss": 0.0001, "lr": 1.4366178634945946e-05, "epoch": 4.175381352475732, "percentage": 63.99, "elapsed_time": "5:50:59", "remaining_time": "3:17:32", "throughput": 2339.63, "total_tokens": 49270624} {"current_steps": 25600, "total_steps": 40000, "loss": 0.059, "lr": 1.4357294367464616e-05, "epoch": 4.176197079696549, "percentage": 64.0, "elapsed_time": "5:51:01", "remaining_time": "3:17:26", "throughput": 2339.88, "total_tokens": 49280736} {"current_steps": 25600, "total_steps": 40000, "eval_loss": 0.34600019454956055, "epoch": 4.176197079696549, "percentage": 64.0, "elapsed_time": "5:52:21", "remaining_time": "3:18:12", "throughput": 2330.95, "total_tokens": 49280736} {"current_steps": 25605, "total_steps": 40000, "loss": 0.0001, "lr": 1.434841174122224e-05, "epoch": 4.177012806917367, "percentage": 64.01, "elapsed_time": "5:52:25", "remaining_time": "3:18:07", "throughput": 2330.96, "total_tokens": 49289696} {"current_steps": 25610, "total_steps": 40000, "loss": 0.0016, "lr": 1.4339530757588615e-05, "epoch": 4.177828534138184, "percentage": 64.03, "elapsed_time": "5:52:27", "remaining_time": "3:18:02", "throughput": 2331.2, "total_tokens": 49299648} {"current_steps": 25615, "total_steps": 40000, "loss": 0.0044, "lr": 1.433065141793333e-05, "epoch": 4.178644261359001, "percentage": 64.04, "elapsed_time": "5:52:29", "remaining_time": "3:17:57", "throughput": 2331.42, "total_tokens": 49309136} {"current_steps": 25620, "total_steps": 40000, "loss": 0.002, "lr": 1.4321773723625665e-05, "epoch": 4.1794599885798185, "percentage": 64.05, "elapsed_time": "5:52:31", "remaining_time": "3:17:52", "throughput": 2331.65, "total_tokens": 49318960} {"current_steps": 25625, "total_steps": 40000, "loss": 0.1691, "lr": 1.4312897676034693e-05, "epoch": 4.1802757158006365, "percentage": 64.06, "elapsed_time": "5:52:34", "remaining_time": "3:17:46", "throughput": 2331.88, "total_tokens": 49328624} {"current_steps": 25630, "total_steps": 40000, "loss": 0.0007, "lr": 1.4304023276529188e-05, "epoch": 4.181091443021454, "percentage": 64.08, "elapsed_time": "5:52:36", "remaining_time": "3:17:41", "throughput": 2332.17, "total_tokens": 49339472} {"current_steps": 25635, "total_steps": 40000, "loss": 0.0692, "lr": 1.4295150526477712e-05, "epoch": 4.181907170242271, "percentage": 64.09, "elapsed_time": "5:52:38", "remaining_time": "3:17:36", "throughput": 2332.37, "total_tokens": 49348704} {"current_steps": 25640, "total_steps": 40000, "loss": 0.0535, "lr": 1.4286279427248562e-05, "epoch": 4.182722897463089, "percentage": 64.1, "elapsed_time": "5:52:40", "remaining_time": "3:17:31", "throughput": 2332.57, "total_tokens": 49357600} {"current_steps": 25645, "total_steps": 40000, "loss": 0.1285, "lr": 1.4277409980209747e-05, "epoch": 4.183538624683906, "percentage": 64.11, "elapsed_time": "5:52:42", "remaining_time": "3:17:25", "throughput": 2332.78, "total_tokens": 49366896} {"current_steps": 25650, "total_steps": 40000, "loss": 0.0005, "lr": 1.4268542186729061e-05, "epoch": 4.184354351904723, "percentage": 64.12, "elapsed_time": "5:52:44", "remaining_time": "3:17:20", "throughput": 2332.97, "total_tokens": 49375792} {"current_steps": 25655, "total_steps": 40000, "loss": 0.0242, "lr": 1.4259676048174043e-05, "epoch": 4.18517007912554, "percentage": 64.14, "elapsed_time": "5:52:46", "remaining_time": "3:17:15", "throughput": 2333.19, "total_tokens": 49385328} {"current_steps": 25660, "total_steps": 40000, "loss": 0.0292, "lr": 1.4250811565911937e-05, "epoch": 4.185985806346358, "percentage": 64.15, "elapsed_time": "5:52:48", "remaining_time": "3:17:09", "throughput": 2333.4, "total_tokens": 49394640} {"current_steps": 25665, "total_steps": 40000, "loss": 0.0003, "lr": 1.4241948741309782e-05, "epoch": 4.186801533567175, "percentage": 64.16, "elapsed_time": "5:52:50", "remaining_time": "3:17:04", "throughput": 2333.61, "total_tokens": 49404000} {"current_steps": 25670, "total_steps": 40000, "loss": 0.0074, "lr": 1.4233087575734317e-05, "epoch": 4.1876172607879925, "percentage": 64.18, "elapsed_time": "5:52:52", "remaining_time": "3:16:59", "throughput": 2333.78, "total_tokens": 49412400} {"current_steps": 25675, "total_steps": 40000, "loss": 0.0028, "lr": 1.422422807055206e-05, "epoch": 4.18843298800881, "percentage": 64.19, "elapsed_time": "5:52:54", "remaining_time": "3:16:54", "throughput": 2334.08, "total_tokens": 49423504} {"current_steps": 25680, "total_steps": 40000, "loss": 0.0006, "lr": 1.4215370227129243e-05, "epoch": 4.189248715229628, "percentage": 64.2, "elapsed_time": "5:52:56", "remaining_time": "3:16:48", "throughput": 2334.29, "total_tokens": 49432816} {"current_steps": 25685, "total_steps": 40000, "loss": 0.0006, "lr": 1.4206514046831876e-05, "epoch": 4.190064442450445, "percentage": 64.21, "elapsed_time": "5:52:58", "remaining_time": "3:16:43", "throughput": 2334.56, "total_tokens": 49443440} {"current_steps": 25690, "total_steps": 40000, "loss": 0.115, "lr": 1.419765953102567e-05, "epoch": 4.190880169671262, "percentage": 64.22, "elapsed_time": "5:53:00", "remaining_time": "3:16:38", "throughput": 2334.78, "total_tokens": 49452928} {"current_steps": 25695, "total_steps": 40000, "loss": 0.0256, "lr": 1.4188806681076125e-05, "epoch": 4.191695896892079, "percentage": 64.24, "elapsed_time": "5:53:03", "remaining_time": "3:16:33", "throughput": 2335.0, "total_tokens": 49462480} {"current_steps": 25700, "total_steps": 40000, "loss": 0.001, "lr": 1.4179955498348443e-05, "epoch": 4.192511624112897, "percentage": 64.25, "elapsed_time": "5:53:05", "remaining_time": "3:16:27", "throughput": 2335.19, "total_tokens": 49471344} {"current_steps": 25705, "total_steps": 40000, "loss": 0.0004, "lr": 1.4171105984207605e-05, "epoch": 4.193327351333714, "percentage": 64.26, "elapsed_time": "5:53:07", "remaining_time": "3:16:22", "throughput": 2335.41, "total_tokens": 49480880} {"current_steps": 25710, "total_steps": 40000, "loss": 0.0002, "lr": 1.4162258140018304e-05, "epoch": 4.194143078554531, "percentage": 64.28, "elapsed_time": "5:53:09", "remaining_time": "3:16:17", "throughput": 2335.69, "total_tokens": 49491520} {"current_steps": 25715, "total_steps": 40000, "loss": 0.0029, "lr": 1.4153411967144986e-05, "epoch": 4.194958805775348, "percentage": 64.29, "elapsed_time": "5:53:11", "remaining_time": "3:16:12", "throughput": 2335.93, "total_tokens": 49501584} {"current_steps": 25720, "total_steps": 40000, "loss": 0.0613, "lr": 1.4144567466951864e-05, "epoch": 4.195774532996166, "percentage": 64.3, "elapsed_time": "5:53:13", "remaining_time": "3:16:06", "throughput": 2336.15, "total_tokens": 49510944} {"current_steps": 25725, "total_steps": 40000, "loss": 0.0002, "lr": 1.4135724640802844e-05, "epoch": 4.196590260216984, "percentage": 64.31, "elapsed_time": "5:53:15", "remaining_time": "3:16:01", "throughput": 2336.36, "total_tokens": 49520272} {"current_steps": 25730, "total_steps": 40000, "loss": 0.0103, "lr": 1.4126883490061615e-05, "epoch": 4.197405987437801, "percentage": 64.33, "elapsed_time": "5:53:17", "remaining_time": "3:15:56", "throughput": 2336.61, "total_tokens": 49530544} {"current_steps": 25735, "total_steps": 40000, "loss": 0.0001, "lr": 1.4118044016091603e-05, "epoch": 4.198221714658618, "percentage": 64.34, "elapsed_time": "5:53:19", "remaining_time": "3:15:51", "throughput": 2336.86, "total_tokens": 49540560} {"current_steps": 25740, "total_steps": 40000, "loss": 0.0002, "lr": 1.410920622025594e-05, "epoch": 4.199037441879436, "percentage": 64.35, "elapsed_time": "5:53:21", "remaining_time": "3:15:45", "throughput": 2337.03, "total_tokens": 49548928} {"current_steps": 25745, "total_steps": 40000, "loss": 0.0671, "lr": 1.4100370103917554e-05, "epoch": 4.199853169100253, "percentage": 64.36, "elapsed_time": "5:53:23", "remaining_time": "3:15:40", "throughput": 2337.21, "total_tokens": 49557744} {"current_steps": 25750, "total_steps": 40000, "loss": 0.0003, "lr": 1.409153566843907e-05, "epoch": 4.20066889632107, "percentage": 64.38, "elapsed_time": "5:53:25", "remaining_time": "3:15:35", "throughput": 2337.44, "total_tokens": 49567376} {"current_steps": 25755, "total_steps": 40000, "loss": 0.0002, "lr": 1.408270291518286e-05, "epoch": 4.201484623541887, "percentage": 64.39, "elapsed_time": "5:53:27", "remaining_time": "3:15:30", "throughput": 2337.69, "total_tokens": 49577632} {"current_steps": 25760, "total_steps": 40000, "loss": 0.0205, "lr": 1.407387184551107e-05, "epoch": 4.202300350762705, "percentage": 64.4, "elapsed_time": "5:53:30", "remaining_time": "3:15:24", "throughput": 2337.93, "total_tokens": 49587600} {"current_steps": 25765, "total_steps": 40000, "loss": 0.001, "lr": 1.4065042460785532e-05, "epoch": 4.203116077983522, "percentage": 64.41, "elapsed_time": "5:53:32", "remaining_time": "3:15:19", "throughput": 2338.12, "total_tokens": 49596480} {"current_steps": 25770, "total_steps": 40000, "loss": 0.1191, "lr": 1.405621476236787e-05, "epoch": 4.2039318052043395, "percentage": 64.42, "elapsed_time": "5:53:34", "remaining_time": "3:15:14", "throughput": 2338.29, "total_tokens": 49604896} {"current_steps": 25775, "total_steps": 40000, "loss": 0.0001, "lr": 1.4047388751619423e-05, "epoch": 4.204747532425157, "percentage": 64.44, "elapsed_time": "5:53:36", "remaining_time": "3:15:09", "throughput": 2338.55, "total_tokens": 49615248} {"current_steps": 25780, "total_steps": 40000, "loss": 0.0835, "lr": 1.4038564429901264e-05, "epoch": 4.205563259645975, "percentage": 64.45, "elapsed_time": "5:53:38", "remaining_time": "3:15:03", "throughput": 2338.79, "total_tokens": 49625136} {"current_steps": 25785, "total_steps": 40000, "loss": 0.0054, "lr": 1.4029741798574227e-05, "epoch": 4.206378986866792, "percentage": 64.46, "elapsed_time": "5:53:40", "remaining_time": "3:14:58", "throughput": 2338.99, "total_tokens": 49634240} {"current_steps": 25790, "total_steps": 40000, "loss": 0.0003, "lr": 1.402092085899886e-05, "epoch": 4.207194714087609, "percentage": 64.48, "elapsed_time": "5:53:42", "remaining_time": "3:14:53", "throughput": 2339.2, "total_tokens": 49643456} {"current_steps": 25795, "total_steps": 40000, "loss": 0.0005, "lr": 1.4012101612535464e-05, "epoch": 4.208010441308426, "percentage": 64.49, "elapsed_time": "5:53:44", "remaining_time": "3:14:48", "throughput": 2339.42, "total_tokens": 49652896} {"current_steps": 25800, "total_steps": 40000, "loss": 0.0571, "lr": 1.4003284060544092e-05, "epoch": 4.208826168529244, "percentage": 64.5, "elapsed_time": "5:53:46", "remaining_time": "3:14:42", "throughput": 2339.63, "total_tokens": 49662304} {"current_steps": 25800, "total_steps": 40000, "eval_loss": 0.3478054702281952, "epoch": 4.208826168529244, "percentage": 64.5, "elapsed_time": "5:55:07", "remaining_time": "3:15:27", "throughput": 2330.76, "total_tokens": 49662304} {"current_steps": 25805, "total_steps": 40000, "loss": 0.0002, "lr": 1.3994468204384504e-05, "epoch": 4.209641895750061, "percentage": 64.51, "elapsed_time": "5:55:11", "remaining_time": "3:15:22", "throughput": 2330.79, "total_tokens": 49671504} {"current_steps": 25810, "total_steps": 40000, "loss": 0.1403, "lr": 1.398565404541622e-05, "epoch": 4.210457622970878, "percentage": 64.53, "elapsed_time": "5:55:13", "remaining_time": "3:15:17", "throughput": 2331.03, "total_tokens": 49681504} {"current_steps": 25815, "total_steps": 40000, "loss": 0.0492, "lr": 1.3976841584998513e-05, "epoch": 4.211273350191696, "percentage": 64.54, "elapsed_time": "5:55:15", "remaining_time": "3:15:12", "throughput": 2331.28, "total_tokens": 49691616} {"current_steps": 25820, "total_steps": 40000, "loss": 0.0486, "lr": 1.3968030824490352e-05, "epoch": 4.2120890774125135, "percentage": 64.55, "elapsed_time": "5:55:17", "remaining_time": "3:15:07", "throughput": 2331.48, "total_tokens": 49700848} {"current_steps": 25825, "total_steps": 40000, "loss": 0.0001, "lr": 1.3959221765250469e-05, "epoch": 4.212904804633331, "percentage": 64.56, "elapsed_time": "5:55:19", "remaining_time": "3:15:01", "throughput": 2331.75, "total_tokens": 49711344} {"current_steps": 25830, "total_steps": 40000, "loss": 0.0021, "lr": 1.3950414408637343e-05, "epoch": 4.213720531854148, "percentage": 64.58, "elapsed_time": "5:55:21", "remaining_time": "3:14:56", "throughput": 2331.93, "total_tokens": 49719952} {"current_steps": 25835, "total_steps": 40000, "loss": 0.0003, "lr": 1.3941608756009166e-05, "epoch": 4.214536259074965, "percentage": 64.59, "elapsed_time": "5:55:23", "remaining_time": "3:14:51", "throughput": 2332.12, "total_tokens": 49728832} {"current_steps": 25840, "total_steps": 40000, "loss": 0.0561, "lr": 1.3932804808723898e-05, "epoch": 4.215351986295783, "percentage": 64.6, "elapsed_time": "5:55:25", "remaining_time": "3:14:46", "throughput": 2332.38, "total_tokens": 49739264} {"current_steps": 25845, "total_steps": 40000, "loss": 0.0, "lr": 1.3924002568139194e-05, "epoch": 4.2161677135166, "percentage": 64.61, "elapsed_time": "5:55:27", "remaining_time": "3:14:40", "throughput": 2332.6, "total_tokens": 49748720} {"current_steps": 25850, "total_steps": 40000, "loss": 0.0001, "lr": 1.3915202035612485e-05, "epoch": 4.216983440737417, "percentage": 64.62, "elapsed_time": "5:55:29", "remaining_time": "3:14:35", "throughput": 2332.8, "total_tokens": 49757920} {"current_steps": 25855, "total_steps": 40000, "loss": 0.1883, "lr": 1.3906403212500935e-05, "epoch": 4.217799167958235, "percentage": 64.64, "elapsed_time": "5:55:31", "remaining_time": "3:14:30", "throughput": 2333.04, "total_tokens": 49767808} {"current_steps": 25860, "total_steps": 40000, "loss": 0.0092, "lr": 1.3897606100161409e-05, "epoch": 4.218614895179052, "percentage": 64.65, "elapsed_time": "5:55:33", "remaining_time": "3:14:25", "throughput": 2333.29, "total_tokens": 49777952} {"current_steps": 25865, "total_steps": 40000, "loss": 0.0005, "lr": 1.388881069995055e-05, "epoch": 4.219430622399869, "percentage": 64.66, "elapsed_time": "5:55:36", "remaining_time": "3:14:20", "throughput": 2333.55, "total_tokens": 49788928} {"current_steps": 25870, "total_steps": 40000, "loss": 0.0004, "lr": 1.3880017013224708e-05, "epoch": 4.220246349620687, "percentage": 64.68, "elapsed_time": "5:55:38", "remaining_time": "3:14:14", "throughput": 2333.79, "total_tokens": 49799008} {"current_steps": 25875, "total_steps": 40000, "loss": 0.0168, "lr": 1.3871225041339984e-05, "epoch": 4.221062076841505, "percentage": 64.69, "elapsed_time": "5:55:40", "remaining_time": "3:14:09", "throughput": 2334.03, "total_tokens": 49808976} {"current_steps": 25880, "total_steps": 40000, "loss": 0.0006, "lr": 1.386243478565222e-05, "epoch": 4.221877804062322, "percentage": 64.7, "elapsed_time": "5:55:42", "remaining_time": "3:14:04", "throughput": 2334.3, "total_tokens": 49819456} {"current_steps": 25885, "total_steps": 40000, "loss": 0.0004, "lr": 1.3853646247516966e-05, "epoch": 4.222693531283139, "percentage": 64.71, "elapsed_time": "5:55:44", "remaining_time": "3:13:59", "throughput": 2334.52, "total_tokens": 49829152} {"current_steps": 25890, "total_steps": 40000, "loss": 0.0003, "lr": 1.3844859428289545e-05, "epoch": 4.223509258503956, "percentage": 64.72, "elapsed_time": "5:55:46", "remaining_time": "3:13:53", "throughput": 2334.73, "total_tokens": 49838336} {"current_steps": 25895, "total_steps": 40000, "loss": 0.0053, "lr": 1.3836074329324984e-05, "epoch": 4.224324985724774, "percentage": 64.74, "elapsed_time": "5:55:48", "remaining_time": "3:13:48", "throughput": 2334.96, "total_tokens": 49848048} {"current_steps": 25900, "total_steps": 40000, "loss": 0.0147, "lr": 1.3827290951978044e-05, "epoch": 4.225140712945591, "percentage": 64.75, "elapsed_time": "5:55:50", "remaining_time": "3:13:43", "throughput": 2335.24, "total_tokens": 49858880} {"current_steps": 25905, "total_steps": 40000, "loss": 0.0002, "lr": 1.381850929760326e-05, "epoch": 4.225956440166408, "percentage": 64.76, "elapsed_time": "5:55:52", "remaining_time": "3:13:38", "throughput": 2335.48, "total_tokens": 49868992} {"current_steps": 25910, "total_steps": 40000, "loss": 0.0001, "lr": 1.3809729367554842e-05, "epoch": 4.226772167387225, "percentage": 64.78, "elapsed_time": "5:55:54", "remaining_time": "3:13:32", "throughput": 2335.68, "total_tokens": 49878032} {"current_steps": 25915, "total_steps": 40000, "loss": 0.1404, "lr": 1.3800951163186784e-05, "epoch": 4.227587894608043, "percentage": 64.79, "elapsed_time": "5:55:56", "remaining_time": "3:13:27", "throughput": 2335.91, "total_tokens": 49887792} {"current_steps": 25920, "total_steps": 40000, "loss": 0.001, "lr": 1.3792174685852801e-05, "epoch": 4.2284036218288605, "percentage": 64.8, "elapsed_time": "5:55:58", "remaining_time": "3:13:22", "throughput": 2336.13, "total_tokens": 49897392} {"current_steps": 25925, "total_steps": 40000, "loss": 0.0044, "lr": 1.378339993690632e-05, "epoch": 4.229219349049678, "percentage": 64.81, "elapsed_time": "5:56:01", "remaining_time": "3:13:17", "throughput": 2336.38, "total_tokens": 49907600} {"current_steps": 25930, "total_steps": 40000, "loss": 0.0012, "lr": 1.3774626917700523e-05, "epoch": 4.230035076270495, "percentage": 64.83, "elapsed_time": "5:56:03", "remaining_time": "3:13:11", "throughput": 2336.67, "total_tokens": 49918480} {"current_steps": 25935, "total_steps": 40000, "loss": 0.0582, "lr": 1.3765855629588334e-05, "epoch": 4.230850803491313, "percentage": 64.84, "elapsed_time": "5:56:05", "remaining_time": "3:13:06", "throughput": 2336.91, "total_tokens": 49928576} {"current_steps": 25940, "total_steps": 40000, "loss": 0.0177, "lr": 1.3757086073922374e-05, "epoch": 4.23166653071213, "percentage": 64.85, "elapsed_time": "5:56:07", "remaining_time": "3:13:01", "throughput": 2337.12, "total_tokens": 49937888} {"current_steps": 25945, "total_steps": 40000, "loss": 0.0019, "lr": 1.3748318252055038e-05, "epoch": 4.232482257932947, "percentage": 64.86, "elapsed_time": "5:56:09", "remaining_time": "3:12:56", "throughput": 2337.3, "total_tokens": 49946496} {"current_steps": 25950, "total_steps": 40000, "loss": 0.0005, "lr": 1.3739552165338416e-05, "epoch": 4.233297985153764, "percentage": 64.88, "elapsed_time": "5:56:11", "remaining_time": "3:12:51", "throughput": 2337.54, "total_tokens": 49956528} {"current_steps": 25955, "total_steps": 40000, "loss": 0.0013, "lr": 1.3730787815124354e-05, "epoch": 4.234113712374582, "percentage": 64.89, "elapsed_time": "5:56:13", "remaining_time": "3:12:45", "throughput": 2337.76, "total_tokens": 49966080} {"current_steps": 25960, "total_steps": 40000, "loss": 0.0005, "lr": 1.3722025202764443e-05, "epoch": 4.234929439595399, "percentage": 64.9, "elapsed_time": "5:56:15", "remaining_time": "3:12:40", "throughput": 2337.98, "total_tokens": 49975680} {"current_steps": 25965, "total_steps": 40000, "loss": 0.0001, "lr": 1.371326432960997e-05, "epoch": 4.2357451668162165, "percentage": 64.91, "elapsed_time": "5:56:17", "remaining_time": "3:12:35", "throughput": 2338.17, "total_tokens": 49984560} {"current_steps": 25970, "total_steps": 40000, "loss": 0.0002, "lr": 1.3704505197011969e-05, "epoch": 4.236560894037034, "percentage": 64.92, "elapsed_time": "5:56:19", "remaining_time": "3:12:30", "throughput": 2338.32, "total_tokens": 49992448} {"current_steps": 25975, "total_steps": 40000, "loss": 0.0006, "lr": 1.3695747806321224e-05, "epoch": 4.237376621257852, "percentage": 64.94, "elapsed_time": "5:56:21", "remaining_time": "3:12:24", "throughput": 2338.59, "total_tokens": 50003184} {"current_steps": 25980, "total_steps": 40000, "loss": 0.0002, "lr": 1.3686992158888212e-05, "epoch": 4.238192348478669, "percentage": 64.95, "elapsed_time": "5:56:23", "remaining_time": "3:12:19", "throughput": 2338.83, "total_tokens": 50013104} {"current_steps": 25985, "total_steps": 40000, "loss": 0.2253, "lr": 1.367823825606319e-05, "epoch": 4.239008075699486, "percentage": 64.96, "elapsed_time": "5:56:25", "remaining_time": "3:12:14", "throughput": 2339.0, "total_tokens": 50021680} {"current_steps": 25990, "total_steps": 40000, "loss": 0.0002, "lr": 1.36694860991961e-05, "epoch": 4.239823802920303, "percentage": 64.98, "elapsed_time": "5:56:27", "remaining_time": "3:12:09", "throughput": 2339.2, "total_tokens": 50030752} {"current_steps": 25995, "total_steps": 40000, "loss": 0.0002, "lr": 1.3660735689636636e-05, "epoch": 4.240639530141121, "percentage": 64.99, "elapsed_time": "5:56:30", "remaining_time": "3:12:04", "throughput": 2339.39, "total_tokens": 50039664} {"current_steps": 26000, "total_steps": 40000, "loss": 0.0, "lr": 1.365198702873424e-05, "epoch": 4.241455257361938, "percentage": 65.0, "elapsed_time": "5:56:32", "remaining_time": "3:11:58", "throughput": 2339.62, "total_tokens": 50049312} {"current_steps": 26000, "total_steps": 40000, "eval_loss": 0.33171892166137695, "epoch": 4.241455257361938, "percentage": 65.0, "elapsed_time": "5:57:52", "remaining_time": "3:12:42", "throughput": 2330.81, "total_tokens": 50049312} {"current_steps": 26005, "total_steps": 40000, "loss": 0.0001, "lr": 1.364324011783804e-05, "epoch": 4.242270984582755, "percentage": 65.01, "elapsed_time": "5:57:56", "remaining_time": "3:12:38", "throughput": 2330.81, "total_tokens": 50058128} {"current_steps": 26010, "total_steps": 40000, "loss": 0.0236, "lr": 1.3634494958296934e-05, "epoch": 4.243086711803572, "percentage": 65.03, "elapsed_time": "5:57:58", "remaining_time": "3:12:32", "throughput": 2331.08, "total_tokens": 50068784} {"current_steps": 26015, "total_steps": 40000, "loss": 0.0002, "lr": 1.3625751551459542e-05, "epoch": 4.2439024390243905, "percentage": 65.04, "elapsed_time": "5:58:00", "remaining_time": "3:12:27", "throughput": 2331.33, "total_tokens": 50078960} {"current_steps": 26020, "total_steps": 40000, "loss": 0.0004, "lr": 1.3617009898674188e-05, "epoch": 4.244718166245208, "percentage": 65.05, "elapsed_time": "5:58:02", "remaining_time": "3:12:22", "throughput": 2331.57, "total_tokens": 50088928} {"current_steps": 26025, "total_steps": 40000, "loss": 0.0001, "lr": 1.3608270001288967e-05, "epoch": 4.245533893466025, "percentage": 65.06, "elapsed_time": "5:58:05", "remaining_time": "3:12:17", "throughput": 2331.77, "total_tokens": 50098240} {"current_steps": 26030, "total_steps": 40000, "loss": 0.0003, "lr": 1.359953186065166e-05, "epoch": 4.246349620686843, "percentage": 65.08, "elapsed_time": "5:58:07", "remaining_time": "3:12:11", "throughput": 2331.98, "total_tokens": 50107440} {"current_steps": 26035, "total_steps": 40000, "loss": 0.0002, "lr": 1.3590795478109814e-05, "epoch": 4.24716534790766, "percentage": 65.09, "elapsed_time": "5:58:09", "remaining_time": "3:12:06", "throughput": 2332.17, "total_tokens": 50116448} {"current_steps": 26040, "total_steps": 40000, "loss": 0.0002, "lr": 1.3582060855010675e-05, "epoch": 4.247981075128477, "percentage": 65.1, "elapsed_time": "5:58:11", "remaining_time": "3:12:01", "throughput": 2332.41, "total_tokens": 50126432} {"current_steps": 26045, "total_steps": 40000, "loss": 0.0001, "lr": 1.3573327992701245e-05, "epoch": 4.248796802349294, "percentage": 65.11, "elapsed_time": "5:58:13", "remaining_time": "3:11:56", "throughput": 2332.62, "total_tokens": 50135744} {"current_steps": 26050, "total_steps": 40000, "loss": 0.1114, "lr": 1.356459689252823e-05, "epoch": 4.249612529570112, "percentage": 65.12, "elapsed_time": "5:58:15", "remaining_time": "3:11:50", "throughput": 2332.9, "total_tokens": 50146608} {"current_steps": 26055, "total_steps": 40000, "loss": 0.1589, "lr": 1.3555867555838087e-05, "epoch": 4.250428256790929, "percentage": 65.14, "elapsed_time": "5:58:17", "remaining_time": "3:11:45", "throughput": 2333.14, "total_tokens": 50156752} {"current_steps": 26060, "total_steps": 40000, "loss": 0.0001, "lr": 1.3547139983976975e-05, "epoch": 4.251243984011746, "percentage": 65.15, "elapsed_time": "5:58:19", "remaining_time": "3:11:40", "throughput": 2333.35, "total_tokens": 50166112} {"current_steps": 26065, "total_steps": 40000, "loss": 0.0003, "lr": 1.3538414178290815e-05, "epoch": 4.2520597112325635, "percentage": 65.16, "elapsed_time": "5:58:21", "remaining_time": "3:11:35", "throughput": 2333.62, "total_tokens": 50176800} {"current_steps": 26070, "total_steps": 40000, "loss": 0.0002, "lr": 1.3529690140125209e-05, "epoch": 4.252875438453382, "percentage": 65.18, "elapsed_time": "5:58:23", "remaining_time": "3:11:30", "throughput": 2333.85, "total_tokens": 50186576} {"current_steps": 26075, "total_steps": 40000, "loss": 0.0006, "lr": 1.352096787082553e-05, "epoch": 4.253691165674199, "percentage": 65.19, "elapsed_time": "5:58:25", "remaining_time": "3:11:24", "throughput": 2334.13, "total_tokens": 50197280} {"current_steps": 26080, "total_steps": 40000, "loss": 0.0015, "lr": 1.3512247371736871e-05, "epoch": 4.254506892895016, "percentage": 65.2, "elapsed_time": "5:58:27", "remaining_time": "3:11:19", "throughput": 2334.3, "total_tokens": 50205840} {"current_steps": 26085, "total_steps": 40000, "loss": 0.0001, "lr": 1.3503528644204022e-05, "epoch": 4.255322620115833, "percentage": 65.21, "elapsed_time": "5:58:29", "remaining_time": "3:11:14", "throughput": 2334.56, "total_tokens": 50216320} {"current_steps": 26090, "total_steps": 40000, "loss": 0.0008, "lr": 1.349481168957153e-05, "epoch": 4.256138347336651, "percentage": 65.22, "elapsed_time": "5:58:32", "remaining_time": "3:11:09", "throughput": 2334.76, "total_tokens": 50225584} {"current_steps": 26095, "total_steps": 40000, "loss": 0.0004, "lr": 1.3486096509183665e-05, "epoch": 4.256954074557468, "percentage": 65.24, "elapsed_time": "5:58:34", "remaining_time": "3:11:04", "throughput": 2334.92, "total_tokens": 50233808} {"current_steps": 26100, "total_steps": 40000, "loss": 0.0001, "lr": 1.3477383104384406e-05, "epoch": 4.257769801778285, "percentage": 65.25, "elapsed_time": "5:58:36", "remaining_time": "3:10:58", "throughput": 2335.2, "total_tokens": 50244672} {"current_steps": 26105, "total_steps": 40000, "loss": 0.0835, "lr": 1.3468671476517481e-05, "epoch": 4.258585528999102, "percentage": 65.26, "elapsed_time": "5:58:38", "remaining_time": "3:10:53", "throughput": 2335.42, "total_tokens": 50254288} {"current_steps": 26110, "total_steps": 40000, "loss": 0.0002, "lr": 1.3459961626926326e-05, "epoch": 4.25940125621992, "percentage": 65.28, "elapsed_time": "5:58:40", "remaining_time": "3:10:48", "throughput": 2335.64, "total_tokens": 50263760} {"current_steps": 26115, "total_steps": 40000, "loss": 0.013, "lr": 1.3451253556954101e-05, "epoch": 4.2602169834407375, "percentage": 65.29, "elapsed_time": "5:58:42", "remaining_time": "3:10:43", "throughput": 2335.85, "total_tokens": 50273120} {"current_steps": 26120, "total_steps": 40000, "loss": 0.0004, "lr": 1.3442547267943717e-05, "epoch": 4.261032710661555, "percentage": 65.3, "elapsed_time": "5:58:44", "remaining_time": "3:10:37", "throughput": 2336.03, "total_tokens": 50281872} {"current_steps": 26125, "total_steps": 40000, "loss": 0.0814, "lr": 1.3433842761237774e-05, "epoch": 4.261848437882372, "percentage": 65.31, "elapsed_time": "5:58:46", "remaining_time": "3:10:32", "throughput": 2336.2, "total_tokens": 50290464} {"current_steps": 26130, "total_steps": 40000, "loss": 0.0001, "lr": 1.3425140038178639e-05, "epoch": 4.26266416510319, "percentage": 65.33, "elapsed_time": "5:58:48", "remaining_time": "3:10:27", "throughput": 2336.38, "total_tokens": 50299200} {"current_steps": 26135, "total_steps": 40000, "loss": 0.0001, "lr": 1.3416439100108358e-05, "epoch": 4.263479892324007, "percentage": 65.34, "elapsed_time": "5:58:50", "remaining_time": "3:10:22", "throughput": 2336.59, "total_tokens": 50308576} {"current_steps": 26140, "total_steps": 40000, "loss": 0.0002, "lr": 1.3407739948368734e-05, "epoch": 4.264295619544824, "percentage": 65.35, "elapsed_time": "5:58:52", "remaining_time": "3:10:17", "throughput": 2336.81, "total_tokens": 50317968} {"current_steps": 26145, "total_steps": 40000, "loss": 0.0001, "lr": 1.3399042584301298e-05, "epoch": 4.265111346765641, "percentage": 65.36, "elapsed_time": "5:58:54", "remaining_time": "3:10:11", "throughput": 2336.94, "total_tokens": 50325792} {"current_steps": 26150, "total_steps": 40000, "loss": 0.0002, "lr": 1.3390347009247272e-05, "epoch": 4.265927073986459, "percentage": 65.38, "elapsed_time": "5:58:56", "remaining_time": "3:10:06", "throughput": 2337.19, "total_tokens": 50335984} {"current_steps": 26155, "total_steps": 40000, "loss": 0.1463, "lr": 1.3381653224547635e-05, "epoch": 4.266742801207276, "percentage": 65.39, "elapsed_time": "5:58:59", "remaining_time": "3:10:01", "throughput": 2337.43, "total_tokens": 50346048} {"current_steps": 26160, "total_steps": 40000, "loss": 0.0631, "lr": 1.3372961231543086e-05, "epoch": 4.2675585284280935, "percentage": 65.4, "elapsed_time": "5:59:01", "remaining_time": "3:09:56", "throughput": 2337.6, "total_tokens": 50354400} {"current_steps": 26165, "total_steps": 40000, "loss": 0.1624, "lr": 1.3364271031574016e-05, "epoch": 4.268374255648911, "percentage": 65.41, "elapsed_time": "5:59:03", "remaining_time": "3:09:51", "throughput": 2337.87, "total_tokens": 50365248} {"current_steps": 26170, "total_steps": 40000, "loss": 0.0002, "lr": 1.335558262598059e-05, "epoch": 4.269189982869729, "percentage": 65.42, "elapsed_time": "5:59:05", "remaining_time": "3:09:45", "throughput": 2338.15, "total_tokens": 50376128} {"current_steps": 26175, "total_steps": 40000, "loss": 0.0001, "lr": 1.3346896016102645e-05, "epoch": 4.270005710090546, "percentage": 65.44, "elapsed_time": "5:59:07", "remaining_time": "3:09:40", "throughput": 2338.35, "total_tokens": 50385168} {"current_steps": 26180, "total_steps": 40000, "loss": 0.128, "lr": 1.3338211203279788e-05, "epoch": 4.270821437311363, "percentage": 65.45, "elapsed_time": "5:59:09", "remaining_time": "3:09:35", "throughput": 2338.54, "total_tokens": 50394160} {"current_steps": 26185, "total_steps": 40000, "loss": 0.0003, "lr": 1.3329528188851303e-05, "epoch": 4.27163716453218, "percentage": 65.46, "elapsed_time": "5:59:11", "remaining_time": "3:09:30", "throughput": 2338.79, "total_tokens": 50404288} {"current_steps": 26190, "total_steps": 40000, "loss": 0.1106, "lr": 1.3320846974156242e-05, "epoch": 4.272452891752998, "percentage": 65.48, "elapsed_time": "5:59:13", "remaining_time": "3:09:25", "throughput": 2339.02, "total_tokens": 50414160} {"current_steps": 26195, "total_steps": 40000, "loss": 0.0003, "lr": 1.3312167560533337e-05, "epoch": 4.273268618973815, "percentage": 65.49, "elapsed_time": "5:59:15", "remaining_time": "3:09:20", "throughput": 2339.24, "total_tokens": 50423760} {"current_steps": 26200, "total_steps": 40000, "loss": 0.0008, "lr": 1.3303489949321082e-05, "epoch": 4.274084346194632, "percentage": 65.5, "elapsed_time": "5:59:17", "remaining_time": "3:09:14", "throughput": 2339.44, "total_tokens": 50433008} {"current_steps": 26200, "total_steps": 40000, "eval_loss": 0.2933138310909271, "epoch": 4.274084346194632, "percentage": 65.5, "elapsed_time": "6:00:38", "remaining_time": "3:09:57", "throughput": 2330.7, "total_tokens": 50433008} {"current_steps": 26205, "total_steps": 40000, "loss": 0.0861, "lr": 1.3294814141857653e-05, "epoch": 4.27490007341545, "percentage": 65.51, "elapsed_time": "6:00:42", "remaining_time": "3:09:53", "throughput": 2330.74, "total_tokens": 50442320} {"current_steps": 26210, "total_steps": 40000, "loss": 0.0009, "lr": 1.3286140139480992e-05, "epoch": 4.275715800636267, "percentage": 65.53, "elapsed_time": "6:00:44", "remaining_time": "3:09:47", "throughput": 2330.92, "total_tokens": 50451120} {"current_steps": 26215, "total_steps": 40000, "loss": 0.0942, "lr": 1.3277467943528719e-05, "epoch": 4.276531527857085, "percentage": 65.54, "elapsed_time": "6:00:46", "remaining_time": "3:09:42", "throughput": 2331.15, "total_tokens": 50461024} {"current_steps": 26220, "total_steps": 40000, "loss": 0.117, "lr": 1.3268797555338203e-05, "epoch": 4.277347255077902, "percentage": 65.55, "elapsed_time": "6:00:48", "remaining_time": "3:09:37", "throughput": 2331.38, "total_tokens": 50470672} {"current_steps": 26225, "total_steps": 40000, "loss": 0.0007, "lr": 1.3260128976246533e-05, "epoch": 4.27816298229872, "percentage": 65.56, "elapsed_time": "6:00:50", "remaining_time": "3:09:32", "throughput": 2331.6, "total_tokens": 50480304} {"current_steps": 26230, "total_steps": 40000, "loss": 0.002, "lr": 1.32514622075905e-05, "epoch": 4.278978709519537, "percentage": 65.58, "elapsed_time": "6:00:52", "remaining_time": "3:09:26", "throughput": 2331.79, "total_tokens": 50489328} {"current_steps": 26235, "total_steps": 40000, "loss": 0.0346, "lr": 1.3242797250706638e-05, "epoch": 4.279794436740354, "percentage": 65.59, "elapsed_time": "6:00:54", "remaining_time": "3:09:21", "throughput": 2332.01, "total_tokens": 50498944} {"current_steps": 26240, "total_steps": 40000, "loss": 0.0083, "lr": 1.3234134106931195e-05, "epoch": 4.280610163961171, "percentage": 65.6, "elapsed_time": "6:00:56", "remaining_time": "3:09:16", "throughput": 2332.26, "total_tokens": 50509168} {"current_steps": 26245, "total_steps": 40000, "loss": 0.0002, "lr": 1.322547277760013e-05, "epoch": 4.281425891181989, "percentage": 65.61, "elapsed_time": "6:00:58", "remaining_time": "3:09:11", "throughput": 2332.5, "total_tokens": 50519248} {"current_steps": 26250, "total_steps": 40000, "loss": 0.0001, "lr": 1.3216813264049132e-05, "epoch": 4.282241618402806, "percentage": 65.62, "elapsed_time": "6:01:00", "remaining_time": "3:09:06", "throughput": 2332.72, "total_tokens": 50528816} {"current_steps": 26255, "total_steps": 40000, "loss": 0.0008, "lr": 1.32081555676136e-05, "epoch": 4.283057345623623, "percentage": 65.64, "elapsed_time": "6:01:02", "remaining_time": "3:09:00", "throughput": 2332.95, "total_tokens": 50538528} {"current_steps": 26260, "total_steps": 40000, "loss": 0.0011, "lr": 1.3199499689628674e-05, "epoch": 4.2838730728444405, "percentage": 65.65, "elapsed_time": "6:01:05", "remaining_time": "3:08:55", "throughput": 2333.19, "total_tokens": 50548608} {"current_steps": 26265, "total_steps": 40000, "loss": 0.0004, "lr": 1.3190845631429192e-05, "epoch": 4.2846888000652585, "percentage": 65.66, "elapsed_time": "6:01:07", "remaining_time": "3:08:50", "throughput": 2333.4, "total_tokens": 50558128} {"current_steps": 26270, "total_steps": 40000, "loss": 0.0026, "lr": 1.3182193394349704e-05, "epoch": 4.285504527286076, "percentage": 65.67, "elapsed_time": "6:01:09", "remaining_time": "3:08:45", "throughput": 2333.65, "total_tokens": 50568304} {"current_steps": 26275, "total_steps": 40000, "loss": 0.0016, "lr": 1.3173542979724507e-05, "epoch": 4.286320254506893, "percentage": 65.69, "elapsed_time": "6:01:11", "remaining_time": "3:08:40", "throughput": 2333.84, "total_tokens": 50577232} {"current_steps": 26280, "total_steps": 40000, "loss": 0.0009, "lr": 1.3164894388887617e-05, "epoch": 4.28713598172771, "percentage": 65.7, "elapsed_time": "6:01:13", "remaining_time": "3:08:34", "throughput": 2334.04, "total_tokens": 50586448} {"current_steps": 26285, "total_steps": 40000, "loss": 0.0499, "lr": 1.3156247623172727e-05, "epoch": 4.287951708948528, "percentage": 65.71, "elapsed_time": "6:01:15", "remaining_time": "3:08:29", "throughput": 2334.29, "total_tokens": 50596640} {"current_steps": 26290, "total_steps": 40000, "loss": 0.0005, "lr": 1.3147602683913302e-05, "epoch": 4.288767436169345, "percentage": 65.72, "elapsed_time": "6:01:17", "remaining_time": "3:08:24", "throughput": 2334.53, "total_tokens": 50606784} {"current_steps": 26295, "total_steps": 40000, "loss": 0.1147, "lr": 1.3138959572442481e-05, "epoch": 4.289583163390162, "percentage": 65.74, "elapsed_time": "6:01:19", "remaining_time": "3:08:19", "throughput": 2334.72, "total_tokens": 50615696} {"current_steps": 26300, "total_steps": 40000, "loss": 0.0025, "lr": 1.3130318290093146e-05, "epoch": 4.290398890610979, "percentage": 65.75, "elapsed_time": "6:01:21", "remaining_time": "3:08:14", "throughput": 2334.89, "total_tokens": 50624176} {"current_steps": 26305, "total_steps": 40000, "loss": 0.0006, "lr": 1.3121678838197909e-05, "epoch": 4.291214617831797, "percentage": 65.76, "elapsed_time": "6:01:23", "remaining_time": "3:08:09", "throughput": 2335.14, "total_tokens": 50634432} {"current_steps": 26310, "total_steps": 40000, "loss": 0.0002, "lr": 1.3113041218089056e-05, "epoch": 4.2920303450526145, "percentage": 65.77, "elapsed_time": "6:01:25", "remaining_time": "3:08:03", "throughput": 2335.37, "total_tokens": 50644256} {"current_steps": 26315, "total_steps": 40000, "loss": 0.0006, "lr": 1.3104405431098626e-05, "epoch": 4.292846072273432, "percentage": 65.79, "elapsed_time": "6:01:27", "remaining_time": "3:07:58", "throughput": 2335.59, "total_tokens": 50653760} {"current_steps": 26320, "total_steps": 40000, "loss": 0.0009, "lr": 1.3095771478558377e-05, "epoch": 4.293661799494249, "percentage": 65.8, "elapsed_time": "6:01:29", "remaining_time": "3:07:53", "throughput": 2335.8, "total_tokens": 50663264} {"current_steps": 26325, "total_steps": 40000, "loss": 0.0005, "lr": 1.3087139361799766e-05, "epoch": 4.294477526715067, "percentage": 65.81, "elapsed_time": "6:01:31", "remaining_time": "3:07:48", "throughput": 2336.01, "total_tokens": 50672704} {"current_steps": 26330, "total_steps": 40000, "loss": 0.0004, "lr": 1.3078509082153964e-05, "epoch": 4.295293253935884, "percentage": 65.83, "elapsed_time": "6:01:34", "remaining_time": "3:07:43", "throughput": 2336.23, "total_tokens": 50682160} {"current_steps": 26335, "total_steps": 40000, "loss": 0.1173, "lr": 1.3069880640951885e-05, "epoch": 4.296108981156701, "percentage": 65.84, "elapsed_time": "6:01:36", "remaining_time": "3:07:37", "throughput": 2336.41, "total_tokens": 50690960} {"current_steps": 26340, "total_steps": 40000, "loss": 0.0755, "lr": 1.3061254039524123e-05, "epoch": 4.296924708377518, "percentage": 65.85, "elapsed_time": "6:01:38", "remaining_time": "3:07:32", "throughput": 2336.62, "total_tokens": 50700496} {"current_steps": 26345, "total_steps": 40000, "loss": 0.1825, "lr": 1.3052629279201028e-05, "epoch": 4.297740435598336, "percentage": 65.86, "elapsed_time": "6:01:40", "remaining_time": "3:07:27", "throughput": 2336.86, "total_tokens": 50710480} {"current_steps": 26350, "total_steps": 40000, "loss": 0.0007, "lr": 1.3044006361312633e-05, "epoch": 4.298556162819153, "percentage": 65.88, "elapsed_time": "6:01:42", "remaining_time": "3:07:22", "throughput": 2337.1, "total_tokens": 50720528} {"current_steps": 26355, "total_steps": 40000, "loss": 0.0661, "lr": 1.30353852871887e-05, "epoch": 4.2993718900399704, "percentage": 65.89, "elapsed_time": "6:01:44", "remaining_time": "3:07:17", "throughput": 2337.26, "total_tokens": 50728880} {"current_steps": 26360, "total_steps": 40000, "loss": 0.0015, "lr": 1.302676605815873e-05, "epoch": 4.300187617260788, "percentage": 65.9, "elapsed_time": "6:01:46", "remaining_time": "3:07:12", "throughput": 2337.5, "total_tokens": 50738960} {"current_steps": 26365, "total_steps": 40000, "loss": 0.0867, "lr": 1.3018148675551884e-05, "epoch": 4.301003344481606, "percentage": 65.91, "elapsed_time": "6:01:48", "remaining_time": "3:07:06", "throughput": 2337.72, "total_tokens": 50748496} {"current_steps": 26370, "total_steps": 40000, "loss": 0.0836, "lr": 1.3009533140697094e-05, "epoch": 4.301819071702423, "percentage": 65.92, "elapsed_time": "6:01:50", "remaining_time": "3:07:01", "throughput": 2337.93, "total_tokens": 50757904} {"current_steps": 26375, "total_steps": 40000, "loss": 0.2654, "lr": 1.3000919454922966e-05, "epoch": 4.30263479892324, "percentage": 65.94, "elapsed_time": "6:01:52", "remaining_time": "3:06:56", "throughput": 2338.14, "total_tokens": 50767328} {"current_steps": 26380, "total_steps": 40000, "loss": 0.013, "lr": 1.299230761955785e-05, "epoch": 4.303450526144058, "percentage": 65.95, "elapsed_time": "6:01:54", "remaining_time": "3:06:51", "throughput": 2338.42, "total_tokens": 50778288} {"current_steps": 26385, "total_steps": 40000, "loss": 0.0054, "lr": 1.2983697635929807e-05, "epoch": 4.304266253364875, "percentage": 65.96, "elapsed_time": "6:01:56", "remaining_time": "3:06:46", "throughput": 2338.64, "total_tokens": 50787888} {"current_steps": 26390, "total_steps": 40000, "loss": 0.1406, "lr": 1.2975089505366584e-05, "epoch": 4.305081980585692, "percentage": 65.97, "elapsed_time": "6:01:58", "remaining_time": "3:06:40", "throughput": 2338.81, "total_tokens": 50796336} {"current_steps": 26395, "total_steps": 40000, "loss": 0.002, "lr": 1.2966483229195683e-05, "epoch": 4.305897707806509, "percentage": 65.99, "elapsed_time": "6:02:00", "remaining_time": "3:06:35", "throughput": 2339.03, "total_tokens": 50805968} {"current_steps": 26400, "total_steps": 40000, "loss": 0.0881, "lr": 1.2957878808744283e-05, "epoch": 4.306713435027326, "percentage": 66.0, "elapsed_time": "6:02:03", "remaining_time": "3:06:30", "throughput": 2339.26, "total_tokens": 50815824} {"current_steps": 26400, "total_steps": 40000, "eval_loss": 0.2597741484642029, "epoch": 4.306713435027326, "percentage": 66.0, "elapsed_time": "6:03:23", "remaining_time": "3:07:12", "throughput": 2330.6, "total_tokens": 50815824} {"current_steps": 26405, "total_steps": 40000, "loss": 0.0588, "lr": 1.294927624533931e-05, "epoch": 4.307529162248144, "percentage": 66.01, "elapsed_time": "6:03:27", "remaining_time": "3:07:07", "throughput": 2330.64, "total_tokens": 50825488} {"current_steps": 26410, "total_steps": 40000, "loss": 0.0009, "lr": 1.2940675540307378e-05, "epoch": 4.3083448894689615, "percentage": 66.03, "elapsed_time": "6:03:29", "remaining_time": "3:07:02", "throughput": 2330.88, "total_tokens": 50835568} {"current_steps": 26415, "total_steps": 40000, "loss": 0.0739, "lr": 1.2932076694974814e-05, "epoch": 4.309160616689779, "percentage": 66.04, "elapsed_time": "6:03:31", "remaining_time": "3:06:57", "throughput": 2331.08, "total_tokens": 50844688} {"current_steps": 26420, "total_steps": 40000, "loss": 0.0011, "lr": 1.2923479710667682e-05, "epoch": 4.309976343910597, "percentage": 66.05, "elapsed_time": "6:03:33", "remaining_time": "3:06:52", "throughput": 2331.27, "total_tokens": 50853760} {"current_steps": 26425, "total_steps": 40000, "loss": 0.001, "lr": 1.2914884588711751e-05, "epoch": 4.310792071131414, "percentage": 66.06, "elapsed_time": "6:03:35", "remaining_time": "3:06:47", "throughput": 2331.48, "total_tokens": 50862960} {"current_steps": 26430, "total_steps": 40000, "loss": 0.0798, "lr": 1.2906291330432475e-05, "epoch": 4.311607798352231, "percentage": 66.07, "elapsed_time": "6:03:37", "remaining_time": "3:06:41", "throughput": 2331.68, "total_tokens": 50872288} {"current_steps": 26435, "total_steps": 40000, "loss": 0.052, "lr": 1.2897699937155055e-05, "epoch": 4.312423525573048, "percentage": 66.09, "elapsed_time": "6:03:39", "remaining_time": "3:06:36", "throughput": 2331.93, "total_tokens": 50882560} {"current_steps": 26440, "total_steps": 40000, "loss": 0.0138, "lr": 1.2889110410204403e-05, "epoch": 4.313239252793866, "percentage": 66.1, "elapsed_time": "6:03:41", "remaining_time": "3:06:31", "throughput": 2332.14, "total_tokens": 50891968} {"current_steps": 26445, "total_steps": 40000, "loss": 0.0699, "lr": 1.2880522750905111e-05, "epoch": 4.314054980014683, "percentage": 66.11, "elapsed_time": "6:03:44", "remaining_time": "3:06:26", "throughput": 2332.39, "total_tokens": 50902160} {"current_steps": 26450, "total_steps": 40000, "loss": 0.0384, "lr": 1.2871936960581523e-05, "epoch": 4.3148707072355, "percentage": 66.12, "elapsed_time": "6:03:46", "remaining_time": "3:06:21", "throughput": 2332.61, "total_tokens": 50911872} {"current_steps": 26455, "total_steps": 40000, "loss": 0.0702, "lr": 1.2863353040557658e-05, "epoch": 4.3156864344563175, "percentage": 66.14, "elapsed_time": "6:03:48", "remaining_time": "3:06:16", "throughput": 2332.8, "total_tokens": 50920832} {"current_steps": 26460, "total_steps": 40000, "loss": 0.0021, "lr": 1.2854770992157273e-05, "epoch": 4.3165021616771355, "percentage": 66.15, "elapsed_time": "6:03:50", "remaining_time": "3:06:10", "throughput": 2333.02, "total_tokens": 50930592} {"current_steps": 26465, "total_steps": 40000, "loss": 0.0204, "lr": 1.2846190816703835e-05, "epoch": 4.317317888897953, "percentage": 66.16, "elapsed_time": "6:03:52", "remaining_time": "3:06:05", "throughput": 2333.25, "total_tokens": 50940272} {"current_steps": 26470, "total_steps": 40000, "loss": 0.0005, "lr": 1.2837612515520498e-05, "epoch": 4.31813361611877, "percentage": 66.17, "elapsed_time": "6:03:54", "remaining_time": "3:06:00", "throughput": 2333.47, "total_tokens": 50950016} {"current_steps": 26475, "total_steps": 40000, "loss": 0.0012, "lr": 1.2829036089930163e-05, "epoch": 4.318949343339587, "percentage": 66.19, "elapsed_time": "6:03:56", "remaining_time": "3:05:55", "throughput": 2333.72, "total_tokens": 50960336} {"current_steps": 26480, "total_steps": 40000, "loss": 0.0011, "lr": 1.2820461541255412e-05, "epoch": 4.319765070560405, "percentage": 66.2, "elapsed_time": "6:03:58", "remaining_time": "3:05:50", "throughput": 2333.93, "total_tokens": 50969680} {"current_steps": 26485, "total_steps": 40000, "loss": 0.0421, "lr": 1.2811888870818543e-05, "epoch": 4.320580797781222, "percentage": 66.21, "elapsed_time": "6:04:00", "remaining_time": "3:05:45", "throughput": 2334.12, "total_tokens": 50978800} {"current_steps": 26490, "total_steps": 40000, "loss": 0.0027, "lr": 1.2803318079941581e-05, "epoch": 4.321396525002039, "percentage": 66.22, "elapsed_time": "6:04:02", "remaining_time": "3:05:39", "throughput": 2334.35, "total_tokens": 50988640} {"current_steps": 26495, "total_steps": 40000, "loss": 0.0022, "lr": 1.2794749169946235e-05, "epoch": 4.322212252222856, "percentage": 66.24, "elapsed_time": "6:04:04", "remaining_time": "3:05:34", "throughput": 2334.59, "total_tokens": 50998720} {"current_steps": 26500, "total_steps": 40000, "loss": 0.073, "lr": 1.2786182142153952e-05, "epoch": 4.323027979443674, "percentage": 66.25, "elapsed_time": "6:04:06", "remaining_time": "3:05:29", "throughput": 2334.86, "total_tokens": 51009456} {"current_steps": 26505, "total_steps": 40000, "loss": 0.0022, "lr": 1.2777616997885878e-05, "epoch": 4.3238437066644915, "percentage": 66.26, "elapsed_time": "6:04:08", "remaining_time": "3:05:24", "throughput": 2335.09, "total_tokens": 51019328} {"current_steps": 26510, "total_steps": 40000, "loss": 0.0009, "lr": 1.2769053738462847e-05, "epoch": 4.324659433885309, "percentage": 66.27, "elapsed_time": "6:04:11", "remaining_time": "3:05:19", "throughput": 2335.3, "total_tokens": 51028784} {"current_steps": 26515, "total_steps": 40000, "loss": 0.002, "lr": 1.2760492365205434e-05, "epoch": 4.325475161106126, "percentage": 66.29, "elapsed_time": "6:04:13", "remaining_time": "3:05:14", "throughput": 2335.58, "total_tokens": 51039696} {"current_steps": 26520, "total_steps": 40000, "loss": 0.0002, "lr": 1.2751932879433919e-05, "epoch": 4.326290888326944, "percentage": 66.3, "elapsed_time": "6:04:15", "remaining_time": "3:05:08", "throughput": 2335.76, "total_tokens": 51048496} {"current_steps": 26525, "total_steps": 40000, "loss": 0.0094, "lr": 1.2743375282468267e-05, "epoch": 4.327106615547761, "percentage": 66.31, "elapsed_time": "6:04:17", "remaining_time": "3:05:03", "throughput": 2335.96, "total_tokens": 51057648} {"current_steps": 26530, "total_steps": 40000, "loss": 0.0006, "lr": 1.2734819575628182e-05, "epoch": 4.327922342768578, "percentage": 66.33, "elapsed_time": "6:04:19", "remaining_time": "3:04:58", "throughput": 2336.13, "total_tokens": 51066304} {"current_steps": 26535, "total_steps": 40000, "loss": 0.0134, "lr": 1.2726265760233039e-05, "epoch": 4.328738069989395, "percentage": 66.34, "elapsed_time": "6:04:21", "remaining_time": "3:04:53", "throughput": 2336.32, "total_tokens": 51075360} {"current_steps": 26540, "total_steps": 40000, "loss": 0.0785, "lr": 1.271771383760197e-05, "epoch": 4.329553797210213, "percentage": 66.35, "elapsed_time": "6:04:23", "remaining_time": "3:04:48", "throughput": 2336.56, "total_tokens": 51085296} {"current_steps": 26545, "total_steps": 40000, "loss": 0.0006, "lr": 1.2709163809053764e-05, "epoch": 4.33036952443103, "percentage": 66.36, "elapsed_time": "6:04:25", "remaining_time": "3:04:43", "throughput": 2336.83, "total_tokens": 51096064} {"current_steps": 26550, "total_steps": 40000, "loss": 0.0004, "lr": 1.2700615675906963e-05, "epoch": 4.331185251651847, "percentage": 66.38, "elapsed_time": "6:04:27", "remaining_time": "3:04:37", "throughput": 2337.03, "total_tokens": 51105392} {"current_steps": 26555, "total_steps": 40000, "loss": 0.0002, "lr": 1.269206943947978e-05, "epoch": 4.332000978872665, "percentage": 66.39, "elapsed_time": "6:04:29", "remaining_time": "3:04:32", "throughput": 2337.26, "total_tokens": 51115136} {"current_steps": 26560, "total_steps": 40000, "loss": 0.0233, "lr": 1.2683525101090177e-05, "epoch": 4.332816706093483, "percentage": 66.4, "elapsed_time": "6:04:31", "remaining_time": "3:04:27", "throughput": 2337.44, "total_tokens": 51123984} {"current_steps": 26565, "total_steps": 40000, "loss": 0.0021, "lr": 1.2674982662055765e-05, "epoch": 4.3336324333143, "percentage": 66.41, "elapsed_time": "6:04:33", "remaining_time": "3:04:22", "throughput": 2337.69, "total_tokens": 51134208} {"current_steps": 26570, "total_steps": 40000, "loss": 0.0003, "lr": 1.2666442123693922e-05, "epoch": 4.334448160535117, "percentage": 66.42, "elapsed_time": "6:04:35", "remaining_time": "3:04:17", "throughput": 2337.89, "total_tokens": 51143504} {"current_steps": 26575, "total_steps": 40000, "loss": 0.0559, "lr": 1.265790348732169e-05, "epoch": 4.335263887755934, "percentage": 66.44, "elapsed_time": "6:04:37", "remaining_time": "3:04:12", "throughput": 2338.1, "total_tokens": 51152928} {"current_steps": 26580, "total_steps": 40000, "loss": 0.0286, "lr": 1.264936675425584e-05, "epoch": 4.336079614976752, "percentage": 66.45, "elapsed_time": "6:04:40", "remaining_time": "3:04:07", "throughput": 2338.32, "total_tokens": 51162512} {"current_steps": 26585, "total_steps": 40000, "loss": 0.0016, "lr": 1.2640831925812852e-05, "epoch": 4.336895342197569, "percentage": 66.46, "elapsed_time": "6:04:42", "remaining_time": "3:04:01", "throughput": 2338.56, "total_tokens": 51172704} {"current_steps": 26590, "total_steps": 40000, "loss": 0.0003, "lr": 1.263229900330889e-05, "epoch": 4.337711069418386, "percentage": 66.47, "elapsed_time": "6:04:44", "remaining_time": "3:03:56", "throughput": 2338.74, "total_tokens": 51181488} {"current_steps": 26595, "total_steps": 40000, "loss": 0.0315, "lr": 1.2623767988059843e-05, "epoch": 4.338526796639204, "percentage": 66.49, "elapsed_time": "6:04:46", "remaining_time": "3:03:51", "throughput": 2338.98, "total_tokens": 51191648} {"current_steps": 26600, "total_steps": 40000, "loss": 0.0003, "lr": 1.2615238881381309e-05, "epoch": 4.339342523860021, "percentage": 66.5, "elapsed_time": "6:04:48", "remaining_time": "3:03:46", "throughput": 2339.16, "total_tokens": 51200224} {"current_steps": 26600, "total_steps": 40000, "eval_loss": 0.3100602626800537, "epoch": 4.339342523860021, "percentage": 66.5, "elapsed_time": "6:06:09", "remaining_time": "3:04:27", "throughput": 2330.56, "total_tokens": 51200224} {"current_steps": 26605, "total_steps": 40000, "loss": 0.0004, "lr": 1.2606711684588568e-05, "epoch": 4.3401582510808385, "percentage": 66.51, "elapsed_time": "6:06:12", "remaining_time": "3:04:22", "throughput": 2330.66, "total_tokens": 51210992} {"current_steps": 26610, "total_steps": 40000, "loss": 0.0098, "lr": 1.2598186398996636e-05, "epoch": 4.340973978301656, "percentage": 66.53, "elapsed_time": "6:06:14", "remaining_time": "3:04:17", "throughput": 2330.87, "total_tokens": 51220400} {"current_steps": 26615, "total_steps": 40000, "loss": 0.0557, "lr": 1.2589663025920207e-05, "epoch": 4.341789705522474, "percentage": 66.54, "elapsed_time": "6:06:16", "remaining_time": "3:04:12", "throughput": 2331.06, "total_tokens": 51229600} {"current_steps": 26620, "total_steps": 40000, "loss": 0.0227, "lr": 1.2581141566673705e-05, "epoch": 4.342605432743291, "percentage": 66.55, "elapsed_time": "6:06:19", "remaining_time": "3:04:07", "throughput": 2331.3, "total_tokens": 51239584} {"current_steps": 26625, "total_steps": 40000, "loss": 0.0024, "lr": 1.257262202257124e-05, "epoch": 4.343421159964108, "percentage": 66.56, "elapsed_time": "6:06:21", "remaining_time": "3:04:02", "throughput": 2331.57, "total_tokens": 51250352} {"current_steps": 26630, "total_steps": 40000, "loss": 0.0211, "lr": 1.2564104394926618e-05, "epoch": 4.344236887184925, "percentage": 66.57, "elapsed_time": "6:06:23", "remaining_time": "3:03:56", "throughput": 2331.83, "total_tokens": 51261056} {"current_steps": 26635, "total_steps": 40000, "loss": 0.0002, "lr": 1.2555588685053383e-05, "epoch": 4.345052614405743, "percentage": 66.59, "elapsed_time": "6:06:25", "remaining_time": "3:03:51", "throughput": 2332.09, "total_tokens": 51271472} {"current_steps": 26640, "total_steps": 40000, "loss": 0.0111, "lr": 1.2547074894264762e-05, "epoch": 4.34586834162656, "percentage": 66.6, "elapsed_time": "6:06:27", "remaining_time": "3:03:46", "throughput": 2332.31, "total_tokens": 51281168} {"current_steps": 26645, "total_steps": 40000, "loss": 0.0005, "lr": 1.2538563023873679e-05, "epoch": 4.346684068847377, "percentage": 66.61, "elapsed_time": "6:06:29", "remaining_time": "3:03:41", "throughput": 2332.51, "total_tokens": 51290512} {"current_steps": 26650, "total_steps": 40000, "loss": 0.0002, "lr": 1.2530053075192789e-05, "epoch": 4.3474997960681945, "percentage": 66.62, "elapsed_time": "6:06:31", "remaining_time": "3:03:36", "throughput": 2332.73, "total_tokens": 51300096} {"current_steps": 26655, "total_steps": 40000, "loss": 0.0002, "lr": 1.252154504953441e-05, "epoch": 4.3483155232890125, "percentage": 66.64, "elapsed_time": "6:06:33", "remaining_time": "3:03:31", "throughput": 2332.99, "total_tokens": 51310592} {"current_steps": 26660, "total_steps": 40000, "loss": 0.0001, "lr": 1.25130389482106e-05, "epoch": 4.34913125050983, "percentage": 66.65, "elapsed_time": "6:06:35", "remaining_time": "3:03:26", "throughput": 2333.2, "total_tokens": 51320112} {"current_steps": 26665, "total_steps": 40000, "loss": 0.1606, "lr": 1.2504534772533116e-05, "epoch": 4.349946977730647, "percentage": 66.66, "elapsed_time": "6:06:37", "remaining_time": "3:03:20", "throughput": 2333.41, "total_tokens": 51329680} {"current_steps": 26670, "total_steps": 40000, "loss": 0.073, "lr": 1.2496032523813387e-05, "epoch": 4.350762704951464, "percentage": 66.67, "elapsed_time": "6:06:39", "remaining_time": "3:03:15", "throughput": 2333.64, "total_tokens": 51339472} {"current_steps": 26675, "total_steps": 40000, "loss": 0.0592, "lr": 1.2487532203362576e-05, "epoch": 4.351578432172282, "percentage": 66.69, "elapsed_time": "6:06:41", "remaining_time": "3:03:10", "throughput": 2333.84, "total_tokens": 51348784} {"current_steps": 26680, "total_steps": 40000, "loss": 0.0964, "lr": 1.247903381249155e-05, "epoch": 4.352394159393099, "percentage": 66.7, "elapsed_time": "6:06:43", "remaining_time": "3:03:05", "throughput": 2334.09, "total_tokens": 51359104} {"current_steps": 26685, "total_steps": 40000, "loss": 0.153, "lr": 1.2470537352510853e-05, "epoch": 4.353209886613916, "percentage": 66.71, "elapsed_time": "6:06:45", "remaining_time": "3:03:00", "throughput": 2334.34, "total_tokens": 51369360} {"current_steps": 26690, "total_steps": 40000, "loss": 0.0003, "lr": 1.2462042824730758e-05, "epoch": 4.354025613834733, "percentage": 66.72, "elapsed_time": "6:06:48", "remaining_time": "3:02:55", "throughput": 2334.56, "total_tokens": 51379184} {"current_steps": 26695, "total_steps": 40000, "loss": 0.0183, "lr": 1.245355023046122e-05, "epoch": 4.354841341055551, "percentage": 66.74, "elapsed_time": "6:06:50", "remaining_time": "3:02:50", "throughput": 2334.76, "total_tokens": 51388240} {"current_steps": 26700, "total_steps": 40000, "loss": 0.0033, "lr": 1.2445059571011896e-05, "epoch": 4.3556570682763684, "percentage": 66.75, "elapsed_time": "6:06:52", "remaining_time": "3:02:44", "throughput": 2334.99, "total_tokens": 51398192} {"current_steps": 26705, "total_steps": 40000, "loss": 0.0105, "lr": 1.2436570847692173e-05, "epoch": 4.356472795497186, "percentage": 66.76, "elapsed_time": "6:06:54", "remaining_time": "3:02:39", "throughput": 2335.22, "total_tokens": 51408064} {"current_steps": 26710, "total_steps": 40000, "loss": 0.0656, "lr": 1.2428084061811096e-05, "epoch": 4.357288522718003, "percentage": 66.77, "elapsed_time": "6:06:56", "remaining_time": "3:02:34", "throughput": 2335.41, "total_tokens": 51417088} {"current_steps": 26715, "total_steps": 40000, "loss": 0.0402, "lr": 1.2419599214677447e-05, "epoch": 4.358104249938821, "percentage": 66.79, "elapsed_time": "6:06:58", "remaining_time": "3:02:29", "throughput": 2335.62, "total_tokens": 51426624} {"current_steps": 26720, "total_steps": 40000, "loss": 0.0652, "lr": 1.2411116307599702e-05, "epoch": 4.358919977159638, "percentage": 66.8, "elapsed_time": "6:07:00", "remaining_time": "3:02:24", "throughput": 2335.85, "total_tokens": 51436416} {"current_steps": 26725, "total_steps": 40000, "loss": 0.0007, "lr": 1.2402635341886016e-05, "epoch": 4.359735704380455, "percentage": 66.81, "elapsed_time": "6:07:02", "remaining_time": "3:02:19", "throughput": 2336.01, "total_tokens": 51444832} {"current_steps": 26730, "total_steps": 40000, "loss": 0.0798, "lr": 1.2394156318844278e-05, "epoch": 4.360551431601272, "percentage": 66.83, "elapsed_time": "6:07:04", "remaining_time": "3:02:14", "throughput": 2336.23, "total_tokens": 51454624} {"current_steps": 26735, "total_steps": 40000, "loss": 0.0008, "lr": 1.2385679239782039e-05, "epoch": 4.36136715882209, "percentage": 66.84, "elapsed_time": "6:07:06", "remaining_time": "3:02:08", "throughput": 2336.41, "total_tokens": 51463520} {"current_steps": 26740, "total_steps": 40000, "loss": 0.0857, "lr": 1.2377204106006585e-05, "epoch": 4.362182886042907, "percentage": 66.85, "elapsed_time": "6:07:08", "remaining_time": "3:02:03", "throughput": 2336.66, "total_tokens": 51473760} {"current_steps": 26745, "total_steps": 40000, "loss": 0.0012, "lr": 1.2368730918824891e-05, "epoch": 4.362998613263724, "percentage": 66.86, "elapsed_time": "6:07:10", "remaining_time": "3:01:58", "throughput": 2336.89, "total_tokens": 51483760} {"current_steps": 26750, "total_steps": 40000, "loss": 0.0003, "lr": 1.236025967954362e-05, "epoch": 4.3638143404845415, "percentage": 66.88, "elapsed_time": "6:07:12", "remaining_time": "3:01:53", "throughput": 2337.06, "total_tokens": 51492240} {"current_steps": 26755, "total_steps": 40000, "loss": 0.0009, "lr": 1.2351790389469153e-05, "epoch": 4.3646300677053596, "percentage": 66.89, "elapsed_time": "6:07:15", "remaining_time": "3:01:48", "throughput": 2337.29, "total_tokens": 51502336} {"current_steps": 26760, "total_steps": 40000, "loss": 0.0002, "lr": 1.234332304990755e-05, "epoch": 4.365445794926177, "percentage": 66.9, "elapsed_time": "6:07:17", "remaining_time": "3:01:43", "throughput": 2337.46, "total_tokens": 51510960} {"current_steps": 26765, "total_steps": 40000, "loss": 0.001, "lr": 1.2334857662164593e-05, "epoch": 4.366261522146994, "percentage": 66.91, "elapsed_time": "6:07:19", "remaining_time": "3:01:38", "throughput": 2337.67, "total_tokens": 51520352} {"current_steps": 26770, "total_steps": 40000, "loss": 0.0067, "lr": 1.2326394227545743e-05, "epoch": 4.367077249367812, "percentage": 66.92, "elapsed_time": "6:07:21", "remaining_time": "3:01:33", "throughput": 2337.91, "total_tokens": 51530432} {"current_steps": 26775, "total_steps": 40000, "loss": 0.0907, "lr": 1.2317932747356162e-05, "epoch": 4.367892976588629, "percentage": 66.94, "elapsed_time": "6:07:23", "remaining_time": "3:01:27", "throughput": 2338.1, "total_tokens": 51539456} {"current_steps": 26780, "total_steps": 40000, "loss": 0.0007, "lr": 1.2309473222900726e-05, "epoch": 4.368708703809446, "percentage": 66.95, "elapsed_time": "6:07:25", "remaining_time": "3:01:22", "throughput": 2338.37, "total_tokens": 51550240} {"current_steps": 26785, "total_steps": 40000, "loss": 0.0882, "lr": 1.2301015655484006e-05, "epoch": 4.369524431030263, "percentage": 66.96, "elapsed_time": "6:07:27", "remaining_time": "3:01:17", "throughput": 2338.59, "total_tokens": 51559920} {"current_steps": 26790, "total_steps": 40000, "loss": 0.0358, "lr": 1.2292560046410245e-05, "epoch": 4.370340158251081, "percentage": 66.97, "elapsed_time": "6:07:29", "remaining_time": "3:01:12", "throughput": 2338.75, "total_tokens": 51568432} {"current_steps": 26795, "total_steps": 40000, "loss": 0.0011, "lr": 1.228410639698343e-05, "epoch": 4.371155885471898, "percentage": 66.99, "elapsed_time": "6:07:31", "remaining_time": "3:01:07", "throughput": 2338.93, "total_tokens": 51577184} {"current_steps": 26800, "total_steps": 40000, "loss": 0.0034, "lr": 1.2275654708507195e-05, "epoch": 4.3719716126927155, "percentage": 67.0, "elapsed_time": "6:07:33", "remaining_time": "3:01:02", "throughput": 2339.1, "total_tokens": 51585680} {"current_steps": 26800, "total_steps": 40000, "eval_loss": 0.2956853210926056, "epoch": 4.3719716126927155, "percentage": 67.0, "elapsed_time": "6:08:54", "remaining_time": "3:01:42", "throughput": 2330.57, "total_tokens": 51585680} {"current_steps": 26805, "total_steps": 40000, "loss": 0.0243, "lr": 1.2267204982284908e-05, "epoch": 4.372787339913533, "percentage": 67.01, "elapsed_time": "6:08:58", "remaining_time": "3:01:37", "throughput": 2330.61, "total_tokens": 51595104} {"current_steps": 26810, "total_steps": 40000, "loss": 0.0489, "lr": 1.2258757219619635e-05, "epoch": 4.373603067134351, "percentage": 67.03, "elapsed_time": "6:09:00", "remaining_time": "3:01:32", "throughput": 2330.85, "total_tokens": 51605376} {"current_steps": 26815, "total_steps": 40000, "loss": 0.0377, "lr": 1.2250311421814104e-05, "epoch": 4.374418794355168, "percentage": 67.04, "elapsed_time": "6:09:02", "remaining_time": "3:01:27", "throughput": 2331.04, "total_tokens": 51614352} {"current_steps": 26820, "total_steps": 40000, "loss": 0.0006, "lr": 1.2241867590170772e-05, "epoch": 4.375234521575985, "percentage": 67.05, "elapsed_time": "6:09:04", "remaining_time": "3:01:22", "throughput": 2331.29, "total_tokens": 51624832} {"current_steps": 26825, "total_steps": 40000, "loss": 0.0008, "lr": 1.2233425725991799e-05, "epoch": 4.376050248796802, "percentage": 67.06, "elapsed_time": "6:09:06", "remaining_time": "3:01:17", "throughput": 2331.47, "total_tokens": 51633616} {"current_steps": 26830, "total_steps": 40000, "loss": 0.115, "lr": 1.2224985830579003e-05, "epoch": 4.37686597601762, "percentage": 67.07, "elapsed_time": "6:09:08", "remaining_time": "3:01:11", "throughput": 2331.68, "total_tokens": 51643120} {"current_steps": 26835, "total_steps": 40000, "loss": 0.0006, "lr": 1.2216547905233944e-05, "epoch": 4.377681703238437, "percentage": 67.09, "elapsed_time": "6:09:10", "remaining_time": "3:01:06", "throughput": 2331.93, "total_tokens": 51653376} {"current_steps": 26840, "total_steps": 40000, "loss": 0.073, "lr": 1.2208111951257842e-05, "epoch": 4.378497430459254, "percentage": 67.1, "elapsed_time": "6:09:12", "remaining_time": "3:01:01", "throughput": 2332.14, "total_tokens": 51662848} {"current_steps": 26845, "total_steps": 40000, "loss": 0.0356, "lr": 1.2199677969951622e-05, "epoch": 4.3793131576800715, "percentage": 67.11, "elapsed_time": "6:09:14", "remaining_time": "3:00:56", "throughput": 2332.39, "total_tokens": 51673312} {"current_steps": 26850, "total_steps": 40000, "loss": 0.0343, "lr": 1.2191245962615927e-05, "epoch": 4.3801288849008895, "percentage": 67.12, "elapsed_time": "6:09:16", "remaining_time": "3:00:51", "throughput": 2332.62, "total_tokens": 51683328} {"current_steps": 26855, "total_steps": 40000, "loss": 0.001, "lr": 1.218281593055106e-05, "epoch": 4.380944612121707, "percentage": 67.14, "elapsed_time": "6:09:18", "remaining_time": "3:00:46", "throughput": 2332.82, "total_tokens": 51692432} {"current_steps": 26860, "total_steps": 40000, "loss": 0.0573, "lr": 1.217438787505705e-05, "epoch": 4.381760339342524, "percentage": 67.15, "elapsed_time": "6:09:20", "remaining_time": "3:00:41", "throughput": 2333.02, "total_tokens": 51701760} {"current_steps": 26865, "total_steps": 40000, "loss": 0.0485, "lr": 1.2165961797433615e-05, "epoch": 4.382576066563341, "percentage": 67.16, "elapsed_time": "6:09:22", "remaining_time": "3:00:36", "throughput": 2333.26, "total_tokens": 51712000} {"current_steps": 26870, "total_steps": 40000, "loss": 0.0002, "lr": 1.215753769898014e-05, "epoch": 4.383391793784159, "percentage": 67.17, "elapsed_time": "6:09:25", "remaining_time": "3:00:30", "throughput": 2333.47, "total_tokens": 51721536} {"current_steps": 26875, "total_steps": 40000, "loss": 0.0007, "lr": 1.2149115580995755e-05, "epoch": 4.384207521004976, "percentage": 67.19, "elapsed_time": "6:09:27", "remaining_time": "3:00:25", "throughput": 2333.65, "total_tokens": 51730224} {"current_steps": 26880, "total_steps": 40000, "loss": 0.0003, "lr": 1.2140695444779227e-05, "epoch": 4.385023248225793, "percentage": 67.2, "elapsed_time": "6:09:29", "remaining_time": "3:00:20", "throughput": 2333.84, "total_tokens": 51739376} {"current_steps": 26885, "total_steps": 40000, "loss": 0.0023, "lr": 1.2132277291629066e-05, "epoch": 4.38583897544661, "percentage": 67.21, "elapsed_time": "6:09:31", "remaining_time": "3:00:15", "throughput": 2334.09, "total_tokens": 51749728} {"current_steps": 26890, "total_steps": 40000, "loss": 0.0013, "lr": 1.2123861122843458e-05, "epoch": 4.386654702667428, "percentage": 67.22, "elapsed_time": "6:09:33", "remaining_time": "3:00:10", "throughput": 2334.31, "total_tokens": 51759456} {"current_steps": 26895, "total_steps": 40000, "loss": 0.0005, "lr": 1.2115446939720271e-05, "epoch": 4.387470429888245, "percentage": 67.24, "elapsed_time": "6:09:35", "remaining_time": "3:00:05", "throughput": 2334.57, "total_tokens": 51769984} {"current_steps": 26900, "total_steps": 40000, "loss": 0.0039, "lr": 1.210703474355708e-05, "epoch": 4.388286157109063, "percentage": 67.25, "elapsed_time": "6:09:37", "remaining_time": "3:00:00", "throughput": 2334.77, "total_tokens": 51779344} {"current_steps": 26905, "total_steps": 40000, "loss": 0.0126, "lr": 1.2098624535651164e-05, "epoch": 4.38910188432988, "percentage": 67.26, "elapsed_time": "6:09:39", "remaining_time": "2:59:55", "throughput": 2335.02, "total_tokens": 51789792} {"current_steps": 26910, "total_steps": 40000, "loss": 0.0005, "lr": 1.2090216317299477e-05, "epoch": 4.389917611550698, "percentage": 67.27, "elapsed_time": "6:09:41", "remaining_time": "2:59:49", "throughput": 2335.22, "total_tokens": 51799136} {"current_steps": 26915, "total_steps": 40000, "loss": 0.0003, "lr": 1.2081810089798668e-05, "epoch": 4.390733338771515, "percentage": 67.29, "elapsed_time": "6:09:43", "remaining_time": "2:59:44", "throughput": 2335.46, "total_tokens": 51809168} {"current_steps": 26920, "total_steps": 40000, "loss": 0.0123, "lr": 1.2073405854445072e-05, "epoch": 4.391549065992332, "percentage": 67.3, "elapsed_time": "6:09:45", "remaining_time": "2:59:39", "throughput": 2335.65, "total_tokens": 51818160} {"current_steps": 26925, "total_steps": 40000, "loss": 0.0008, "lr": 1.206500361253474e-05, "epoch": 4.392364793213149, "percentage": 67.31, "elapsed_time": "6:09:47", "remaining_time": "2:59:34", "throughput": 2335.84, "total_tokens": 51827280} {"current_steps": 26930, "total_steps": 40000, "loss": 0.0002, "lr": 1.2056603365363409e-05, "epoch": 4.393180520433967, "percentage": 67.33, "elapsed_time": "6:09:49", "remaining_time": "2:59:29", "throughput": 2336.02, "total_tokens": 51836048} {"current_steps": 26935, "total_steps": 40000, "loss": 0.0798, "lr": 1.2048205114226487e-05, "epoch": 4.393996247654784, "percentage": 67.34, "elapsed_time": "6:09:52", "remaining_time": "2:59:24", "throughput": 2336.23, "total_tokens": 51845680} {"current_steps": 26940, "total_steps": 40000, "loss": 0.0476, "lr": 1.2039808860419102e-05, "epoch": 4.394811974875601, "percentage": 67.35, "elapsed_time": "6:09:54", "remaining_time": "2:59:19", "throughput": 2336.46, "total_tokens": 51855552} {"current_steps": 26945, "total_steps": 40000, "loss": 0.0002, "lr": 1.2031414605236066e-05, "epoch": 4.395627702096419, "percentage": 67.36, "elapsed_time": "6:09:56", "remaining_time": "2:59:14", "throughput": 2336.7, "total_tokens": 51865856} {"current_steps": 26950, "total_steps": 40000, "loss": 0.061, "lr": 1.2023022349971862e-05, "epoch": 4.3964434293172365, "percentage": 67.38, "elapsed_time": "6:09:58", "remaining_time": "2:59:09", "throughput": 2336.86, "total_tokens": 51874112} {"current_steps": 26955, "total_steps": 40000, "loss": 0.0006, "lr": 1.20146320959207e-05, "epoch": 4.397259156538054, "percentage": 67.39, "elapsed_time": "6:10:00", "remaining_time": "2:59:03", "throughput": 2337.05, "total_tokens": 51883296} {"current_steps": 26960, "total_steps": 40000, "loss": 0.0005, "lr": 1.2006243844376445e-05, "epoch": 4.398074883758871, "percentage": 67.4, "elapsed_time": "6:10:02", "remaining_time": "2:58:58", "throughput": 2337.31, "total_tokens": 51893776} {"current_steps": 26965, "total_steps": 40000, "loss": 0.0008, "lr": 1.1997857596632678e-05, "epoch": 4.398890610979688, "percentage": 67.41, "elapsed_time": "6:10:04", "remaining_time": "2:58:53", "throughput": 2337.54, "total_tokens": 51903776} {"current_steps": 26970, "total_steps": 40000, "loss": 0.0042, "lr": 1.1989473353982672e-05, "epoch": 4.399706338200506, "percentage": 67.42, "elapsed_time": "6:10:06", "remaining_time": "2:58:48", "throughput": 2337.69, "total_tokens": 51911888} {"current_steps": 26975, "total_steps": 40000, "loss": 0.0603, "lr": 1.198109111771937e-05, "epoch": 4.400522065421323, "percentage": 67.44, "elapsed_time": "6:10:08", "remaining_time": "2:58:43", "throughput": 2337.89, "total_tokens": 51921280} {"current_steps": 26980, "total_steps": 40000, "loss": 0.0004, "lr": 1.197271088913543e-05, "epoch": 4.40133779264214, "percentage": 67.45, "elapsed_time": "6:10:10", "remaining_time": "2:58:38", "throughput": 2338.1, "total_tokens": 51930896} {"current_steps": 26985, "total_steps": 40000, "loss": 0.0617, "lr": 1.1964332669523182e-05, "epoch": 4.402153519862958, "percentage": 67.46, "elapsed_time": "6:10:12", "remaining_time": "2:58:33", "throughput": 2338.32, "total_tokens": 51940560} {"current_steps": 26990, "total_steps": 40000, "loss": 0.065, "lr": 1.1955956460174645e-05, "epoch": 4.402969247083775, "percentage": 67.47, "elapsed_time": "6:10:14", "remaining_time": "2:58:28", "throughput": 2338.53, "total_tokens": 51950016} {"current_steps": 26995, "total_steps": 40000, "loss": 0.1264, "lr": 1.1947582262381552e-05, "epoch": 4.4037849743045925, "percentage": 67.49, "elapsed_time": "6:10:16", "remaining_time": "2:58:23", "throughput": 2338.74, "total_tokens": 51959568} {"current_steps": 27000, "total_steps": 40000, "loss": 0.0809, "lr": 1.1939210077435293e-05, "epoch": 4.40460070152541, "percentage": 67.5, "elapsed_time": "6:10:18", "remaining_time": "2:58:18", "throughput": 2338.95, "total_tokens": 51969184} {"current_steps": 27000, "total_steps": 40000, "eval_loss": 0.2965461015701294, "epoch": 4.40460070152541, "percentage": 67.5, "elapsed_time": "6:11:39", "remaining_time": "2:58:56", "throughput": 2330.48, "total_tokens": 51969184} {"current_steps": 27005, "total_steps": 40000, "loss": 0.0007, "lr": 1.193083990662697e-05, "epoch": 4.405416428746228, "percentage": 67.51, "elapsed_time": "6:11:43", "remaining_time": "2:58:52", "throughput": 2330.53, "total_tokens": 51978944} {"current_steps": 27010, "total_steps": 40000, "loss": 0.0004, "lr": 1.192247175124738e-05, "epoch": 4.406232155967045, "percentage": 67.53, "elapsed_time": "6:11:45", "remaining_time": "2:58:47", "throughput": 2330.78, "total_tokens": 51989328} {"current_steps": 27015, "total_steps": 40000, "loss": 0.0037, "lr": 1.191410561258698e-05, "epoch": 4.407047883187862, "percentage": 67.54, "elapsed_time": "6:11:47", "remaining_time": "2:58:42", "throughput": 2331.04, "total_tokens": 52000128} {"current_steps": 27020, "total_steps": 40000, "loss": 0.0003, "lr": 1.1905741491935944e-05, "epoch": 4.407863610408679, "percentage": 67.55, "elapsed_time": "6:11:49", "remaining_time": "2:58:37", "throughput": 2331.29, "total_tokens": 52010432} {"current_steps": 27025, "total_steps": 40000, "loss": 0.0755, "lr": 1.1897379390584129e-05, "epoch": 4.408679337629497, "percentage": 67.56, "elapsed_time": "6:11:51", "remaining_time": "2:58:32", "throughput": 2331.49, "total_tokens": 52019696} {"current_steps": 27030, "total_steps": 40000, "loss": 0.1275, "lr": 1.1889019309821062e-05, "epoch": 4.409495064850314, "percentage": 67.58, "elapsed_time": "6:11:53", "remaining_time": "2:58:27", "throughput": 2331.66, "total_tokens": 52028384} {"current_steps": 27035, "total_steps": 40000, "loss": 0.0342, "lr": 1.188066125093599e-05, "epoch": 4.410310792071131, "percentage": 67.59, "elapsed_time": "6:11:55", "remaining_time": "2:58:21", "throughput": 2331.91, "total_tokens": 52038720} {"current_steps": 27040, "total_steps": 40000, "loss": 0.0009, "lr": 1.1872305215217811e-05, "epoch": 4.411126519291948, "percentage": 67.6, "elapsed_time": "6:11:58", "remaining_time": "2:58:16", "throughput": 2332.1, "total_tokens": 52047856} {"current_steps": 27045, "total_steps": 40000, "loss": 0.0003, "lr": 1.186395120395514e-05, "epoch": 4.4119422465127665, "percentage": 67.61, "elapsed_time": "6:12:00", "remaining_time": "2:58:11", "throughput": 2332.31, "total_tokens": 52057520} {"current_steps": 27050, "total_steps": 40000, "loss": 0.0008, "lr": 1.1855599218436283e-05, "epoch": 4.412757973733584, "percentage": 67.62, "elapsed_time": "6:12:02", "remaining_time": "2:58:06", "throughput": 2332.48, "total_tokens": 52065984} {"current_steps": 27055, "total_steps": 40000, "loss": 0.0024, "lr": 1.1847249259949209e-05, "epoch": 4.413573700954401, "percentage": 67.64, "elapsed_time": "6:12:04", "remaining_time": "2:58:01", "throughput": 2332.71, "total_tokens": 52076000} {"current_steps": 27060, "total_steps": 40000, "loss": 0.0002, "lr": 1.1838901329781574e-05, "epoch": 4.414389428175218, "percentage": 67.65, "elapsed_time": "6:12:06", "remaining_time": "2:57:56", "throughput": 2332.91, "total_tokens": 52085424} {"current_steps": 27065, "total_steps": 40000, "loss": 0.0005, "lr": 1.1830555429220758e-05, "epoch": 4.415205155396036, "percentage": 67.66, "elapsed_time": "6:12:08", "remaining_time": "2:57:51", "throughput": 2333.1, "total_tokens": 52094384} {"current_steps": 27070, "total_steps": 40000, "loss": 0.0026, "lr": 1.1822211559553784e-05, "epoch": 4.416020882616853, "percentage": 67.67, "elapsed_time": "6:12:10", "remaining_time": "2:57:46", "throughput": 2333.33, "total_tokens": 52104304} {"current_steps": 27075, "total_steps": 40000, "loss": 0.0757, "lr": 1.18138697220674e-05, "epoch": 4.41683660983767, "percentage": 67.69, "elapsed_time": "6:12:12", "remaining_time": "2:57:41", "throughput": 2333.58, "total_tokens": 52114928} {"current_steps": 27080, "total_steps": 40000, "loss": 0.031, "lr": 1.1805529918048e-05, "epoch": 4.417652337058487, "percentage": 67.7, "elapsed_time": "6:12:14", "remaining_time": "2:57:35", "throughput": 2333.84, "total_tokens": 52125456} {"current_steps": 27085, "total_steps": 40000, "loss": 0.0007, "lr": 1.1797192148781702e-05, "epoch": 4.418468064279305, "percentage": 67.71, "elapsed_time": "6:12:16", "remaining_time": "2:57:30", "throughput": 2334.03, "total_tokens": 52134512} {"current_steps": 27090, "total_steps": 40000, "loss": 0.0017, "lr": 1.1788856415554297e-05, "epoch": 4.419283791500122, "percentage": 67.73, "elapsed_time": "6:12:18", "remaining_time": "2:57:25", "throughput": 2334.27, "total_tokens": 52144816} {"current_steps": 27095, "total_steps": 40000, "loss": 0.0005, "lr": 1.1780522719651249e-05, "epoch": 4.4200995187209395, "percentage": 67.74, "elapsed_time": "6:12:20", "remaining_time": "2:57:20", "throughput": 2334.46, "total_tokens": 52153936} {"current_steps": 27100, "total_steps": 40000, "loss": 0.0009, "lr": 1.1772191062357721e-05, "epoch": 4.420915245941757, "percentage": 67.75, "elapsed_time": "6:12:22", "remaining_time": "2:57:15", "throughput": 2334.7, "total_tokens": 52164032} {"current_steps": 27105, "total_steps": 40000, "loss": 0.1691, "lr": 1.1763861444958573e-05, "epoch": 4.421730973162575, "percentage": 67.76, "elapsed_time": "6:12:25", "remaining_time": "2:57:10", "throughput": 2334.94, "total_tokens": 52174160} {"current_steps": 27110, "total_steps": 40000, "loss": 0.0019, "lr": 1.1755533868738317e-05, "epoch": 4.422546700383392, "percentage": 67.77, "elapsed_time": "6:12:27", "remaining_time": "2:57:05", "throughput": 2335.2, "total_tokens": 52185008} {"current_steps": 27115, "total_steps": 40000, "loss": 0.0883, "lr": 1.1747208334981185e-05, "epoch": 4.423362427604209, "percentage": 67.79, "elapsed_time": "6:12:29", "remaining_time": "2:57:00", "throughput": 2335.41, "total_tokens": 52194432} {"current_steps": 27120, "total_steps": 40000, "loss": 0.1209, "lr": 1.1738884844971067e-05, "epoch": 4.424178154825027, "percentage": 67.8, "elapsed_time": "6:12:31", "remaining_time": "2:56:55", "throughput": 2335.63, "total_tokens": 52204176} {"current_steps": 27125, "total_steps": 40000, "loss": 0.0538, "lr": 1.1730563399991563e-05, "epoch": 4.424993882045844, "percentage": 67.81, "elapsed_time": "6:12:33", "remaining_time": "2:56:50", "throughput": 2335.85, "total_tokens": 52214048} {"current_steps": 27130, "total_steps": 40000, "loss": 0.0005, "lr": 1.1722244001325938e-05, "epoch": 4.425809609266661, "percentage": 67.83, "elapsed_time": "6:12:35", "remaining_time": "2:56:45", "throughput": 2336.07, "total_tokens": 52223872} {"current_steps": 27135, "total_steps": 40000, "loss": 0.0003, "lr": 1.1713926650257137e-05, "epoch": 4.426625336487478, "percentage": 67.84, "elapsed_time": "6:12:37", "remaining_time": "2:56:39", "throughput": 2336.28, "total_tokens": 52233376} {"current_steps": 27140, "total_steps": 40000, "loss": 0.0006, "lr": 1.170561134806781e-05, "epoch": 4.4274410637082955, "percentage": 67.85, "elapsed_time": "6:12:39", "remaining_time": "2:56:34", "throughput": 2336.52, "total_tokens": 52243424} {"current_steps": 27145, "total_steps": 40000, "loss": 0.0006, "lr": 1.1697298096040287e-05, "epoch": 4.4282567909291135, "percentage": 67.86, "elapsed_time": "6:12:41", "remaining_time": "2:56:29", "throughput": 2336.75, "total_tokens": 52253408} {"current_steps": 27150, "total_steps": 40000, "loss": 0.0005, "lr": 1.1688986895456567e-05, "epoch": 4.429072518149931, "percentage": 67.88, "elapsed_time": "6:12:43", "remaining_time": "2:56:24", "throughput": 2336.9, "total_tokens": 52261808} {"current_steps": 27155, "total_steps": 40000, "loss": 0.0829, "lr": 1.1680677747598349e-05, "epoch": 4.429888245370748, "percentage": 67.89, "elapsed_time": "6:12:45", "remaining_time": "2:56:19", "throughput": 2337.1, "total_tokens": 52271120} {"current_steps": 27160, "total_steps": 40000, "loss": 0.0004, "lr": 1.1672370653746995e-05, "epoch": 4.430703972591566, "percentage": 67.9, "elapsed_time": "6:12:47", "remaining_time": "2:56:14", "throughput": 2337.32, "total_tokens": 52280896} {"current_steps": 27165, "total_steps": 40000, "loss": 0.0009, "lr": 1.166406561518357e-05, "epoch": 4.431519699812383, "percentage": 67.91, "elapsed_time": "6:12:49", "remaining_time": "2:56:09", "throughput": 2337.51, "total_tokens": 52289920} {"current_steps": 27170, "total_steps": 40000, "loss": 0.0671, "lr": 1.1655762633188826e-05, "epoch": 4.4323354270332, "percentage": 67.92, "elapsed_time": "6:12:51", "remaining_time": "2:56:04", "throughput": 2337.73, "total_tokens": 52299664} {"current_steps": 27175, "total_steps": 40000, "loss": 0.001, "lr": 1.1647461709043172e-05, "epoch": 4.433151154254017, "percentage": 67.94, "elapsed_time": "6:12:54", "remaining_time": "2:55:59", "throughput": 2337.99, "total_tokens": 52310336} {"current_steps": 27180, "total_steps": 40000, "loss": 0.0994, "lr": 1.1639162844026722e-05, "epoch": 4.433966881474835, "percentage": 67.95, "elapsed_time": "6:12:56", "remaining_time": "2:55:54", "throughput": 2338.27, "total_tokens": 52321552} {"current_steps": 27185, "total_steps": 40000, "loss": 0.0018, "lr": 1.163086603941927e-05, "epoch": 4.434782608695652, "percentage": 67.96, "elapsed_time": "6:12:58", "remaining_time": "2:55:49", "throughput": 2338.54, "total_tokens": 52332352} {"current_steps": 27190, "total_steps": 40000, "loss": 0.0016, "lr": 1.1622571296500273e-05, "epoch": 4.4355983359164695, "percentage": 67.97, "elapsed_time": "6:13:00", "remaining_time": "2:55:44", "throughput": 2338.79, "total_tokens": 52342816} {"current_steps": 27195, "total_steps": 40000, "loss": 0.0001, "lr": 1.1614278616548904e-05, "epoch": 4.436414063137287, "percentage": 67.99, "elapsed_time": "6:13:02", "remaining_time": "2:55:38", "throughput": 2339.05, "total_tokens": 52353376} {"current_steps": 27200, "total_steps": 40000, "loss": 0.0639, "lr": 1.1605988000843986e-05, "epoch": 4.437229790358105, "percentage": 68.0, "elapsed_time": "6:13:04", "remaining_time": "2:55:33", "throughput": 2339.27, "total_tokens": 52363216} {"current_steps": 27200, "total_steps": 40000, "eval_loss": 0.31266775727272034, "epoch": 4.437229790358105, "percentage": 68.0, "elapsed_time": "6:14:25", "remaining_time": "2:56:11", "throughput": 2330.85, "total_tokens": 52363216} {"current_steps": 27205, "total_steps": 40000, "loss": 0.0005, "lr": 1.1597699450664028e-05, "epoch": 4.438045517578922, "percentage": 68.01, "elapsed_time": "6:14:28", "remaining_time": "2:56:07", "throughput": 2330.86, "total_tokens": 52371984} {"current_steps": 27210, "total_steps": 40000, "loss": 0.0922, "lr": 1.1589412967287252e-05, "epoch": 4.438861244799739, "percentage": 68.03, "elapsed_time": "6:14:31", "remaining_time": "2:56:02", "throughput": 2331.02, "total_tokens": 52380368} {"current_steps": 27215, "total_steps": 40000, "loss": 0.0004, "lr": 1.1581128551991514e-05, "epoch": 4.439676972020556, "percentage": 68.04, "elapsed_time": "6:14:33", "remaining_time": "2:55:57", "throughput": 2331.22, "total_tokens": 52389744} {"current_steps": 27220, "total_steps": 40000, "loss": 0.0624, "lr": 1.1572846206054383e-05, "epoch": 4.440492699241374, "percentage": 68.05, "elapsed_time": "6:14:35", "remaining_time": "2:55:52", "throughput": 2331.41, "total_tokens": 52398784} {"current_steps": 27225, "total_steps": 40000, "loss": 0.0806, "lr": 1.1564565930753113e-05, "epoch": 4.441308426462191, "percentage": 68.06, "elapsed_time": "6:14:37", "remaining_time": "2:55:47", "throughput": 2331.66, "total_tokens": 52409248} {"current_steps": 27230, "total_steps": 40000, "loss": 0.0663, "lr": 1.1556287727364606e-05, "epoch": 4.442124153683008, "percentage": 68.08, "elapsed_time": "6:14:39", "remaining_time": "2:55:42", "throughput": 2331.89, "total_tokens": 52419376} {"current_steps": 27235, "total_steps": 40000, "loss": 0.1111, "lr": 1.1548011597165489e-05, "epoch": 4.442939880903825, "percentage": 68.09, "elapsed_time": "6:14:41", "remaining_time": "2:55:36", "throughput": 2332.11, "total_tokens": 52429152} {"current_steps": 27240, "total_steps": 40000, "loss": 0.1147, "lr": 1.1539737541432019e-05, "epoch": 4.443755608124643, "percentage": 68.1, "elapsed_time": "6:14:43", "remaining_time": "2:55:31", "throughput": 2332.28, "total_tokens": 52437760} {"current_steps": 27245, "total_steps": 40000, "loss": 0.1255, "lr": 1.1531465561440174e-05, "epoch": 4.444571335345461, "percentage": 68.11, "elapsed_time": "6:14:45", "remaining_time": "2:55:26", "throughput": 2332.46, "total_tokens": 52446736} {"current_steps": 27250, "total_steps": 40000, "loss": 0.002, "lr": 1.1523195658465605e-05, "epoch": 4.445387062566278, "percentage": 68.12, "elapsed_time": "6:14:47", "remaining_time": "2:55:21", "throughput": 2332.66, "total_tokens": 52456032} {"current_steps": 27255, "total_steps": 40000, "loss": 0.0561, "lr": 1.1514927833783618e-05, "epoch": 4.446202789787095, "percentage": 68.14, "elapsed_time": "6:14:49", "remaining_time": "2:55:16", "throughput": 2332.86, "total_tokens": 52465312} {"current_steps": 27260, "total_steps": 40000, "loss": 0.0007, "lr": 1.150666208866922e-05, "epoch": 4.447018517007913, "percentage": 68.15, "elapsed_time": "6:14:51", "remaining_time": "2:55:11", "throughput": 2333.07, "total_tokens": 52474944} {"current_steps": 27265, "total_steps": 40000, "loss": 0.0015, "lr": 1.1498398424397106e-05, "epoch": 4.44783424422873, "percentage": 68.16, "elapsed_time": "6:14:53", "remaining_time": "2:55:06", "throughput": 2333.28, "total_tokens": 52484352} {"current_steps": 27270, "total_steps": 40000, "loss": 0.0016, "lr": 1.1490136842241628e-05, "epoch": 4.448649971449547, "percentage": 68.17, "elapsed_time": "6:14:55", "remaining_time": "2:55:01", "throughput": 2333.51, "total_tokens": 52494464} {"current_steps": 27275, "total_steps": 40000, "loss": 0.0006, "lr": 1.1481877343476813e-05, "epoch": 4.449465698670364, "percentage": 68.19, "elapsed_time": "6:14:57", "remaining_time": "2:54:56", "throughput": 2333.68, "total_tokens": 52503024} {"current_steps": 27280, "total_steps": 40000, "loss": 0.0007, "lr": 1.14736199293764e-05, "epoch": 4.450281425891182, "percentage": 68.2, "elapsed_time": "6:15:00", "remaining_time": "2:54:51", "throughput": 2333.9, "total_tokens": 52512896} {"current_steps": 27285, "total_steps": 40000, "loss": 0.1013, "lr": 1.1465364601213771e-05, "epoch": 4.451097153111999, "percentage": 68.21, "elapsed_time": "6:15:02", "remaining_time": "2:54:46", "throughput": 2334.14, "total_tokens": 52523072} {"current_steps": 27290, "total_steps": 40000, "loss": 0.0805, "lr": 1.1457111360262012e-05, "epoch": 4.4519128803328165, "percentage": 68.23, "elapsed_time": "6:15:04", "remaining_time": "2:54:41", "throughput": 2334.32, "total_tokens": 52532016} {"current_steps": 27295, "total_steps": 40000, "loss": 0.0008, "lr": 1.1448860207793869e-05, "epoch": 4.4527286075536345, "percentage": 68.24, "elapsed_time": "6:15:06", "remaining_time": "2:54:36", "throughput": 2334.54, "total_tokens": 52541744} {"current_steps": 27300, "total_steps": 40000, "loss": 0.059, "lr": 1.144061114508177e-05, "epoch": 4.453544334774452, "percentage": 68.25, "elapsed_time": "6:15:08", "remaining_time": "2:54:30", "throughput": 2334.75, "total_tokens": 52551520} {"current_steps": 27305, "total_steps": 40000, "loss": 0.0005, "lr": 1.1432364173397842e-05, "epoch": 4.454360061995269, "percentage": 68.26, "elapsed_time": "6:15:10", "remaining_time": "2:54:25", "throughput": 2334.93, "total_tokens": 52560320} {"current_steps": 27310, "total_steps": 40000, "loss": 0.0005, "lr": 1.1424119294013852e-05, "epoch": 4.455175789216086, "percentage": 68.27, "elapsed_time": "6:15:12", "remaining_time": "2:54:20", "throughput": 2335.12, "total_tokens": 52569408} {"current_steps": 27315, "total_steps": 40000, "loss": 0.0012, "lr": 1.1415876508201279e-05, "epoch": 4.455991516436903, "percentage": 68.29, "elapsed_time": "6:15:14", "remaining_time": "2:54:15", "throughput": 2335.36, "total_tokens": 52579632} {"current_steps": 27320, "total_steps": 40000, "loss": 0.0008, "lr": 1.140763581723125e-05, "epoch": 4.456807243657721, "percentage": 68.3, "elapsed_time": "6:15:16", "remaining_time": "2:54:10", "throughput": 2335.53, "total_tokens": 52588336} {"current_steps": 27325, "total_steps": 40000, "loss": 0.0008, "lr": 1.1399397222374588e-05, "epoch": 4.457622970878538, "percentage": 68.31, "elapsed_time": "6:15:18", "remaining_time": "2:54:05", "throughput": 2335.73, "total_tokens": 52597840} {"current_steps": 27330, "total_steps": 40000, "loss": 0.0013, "lr": 1.1391160724901804e-05, "epoch": 4.458438698099355, "percentage": 68.33, "elapsed_time": "6:15:20", "remaining_time": "2:54:00", "throughput": 2336.0, "total_tokens": 52608624} {"current_steps": 27335, "total_steps": 40000, "loss": 0.0013, "lr": 1.138292632608304e-05, "epoch": 4.459254425320173, "percentage": 68.34, "elapsed_time": "6:15:22", "remaining_time": "2:53:55", "throughput": 2336.17, "total_tokens": 52617328} {"current_steps": 27340, "total_steps": 40000, "loss": 0.0018, "lr": 1.1374694027188174e-05, "epoch": 4.4600701525409905, "percentage": 68.35, "elapsed_time": "6:15:24", "remaining_time": "2:53:50", "throughput": 2336.38, "total_tokens": 52626912} {"current_steps": 27345, "total_steps": 40000, "loss": 0.0003, "lr": 1.1366463829486711e-05, "epoch": 4.460885879761808, "percentage": 68.36, "elapsed_time": "6:15:27", "remaining_time": "2:53:45", "throughput": 2336.53, "total_tokens": 52635184} {"current_steps": 27350, "total_steps": 40000, "loss": 0.055, "lr": 1.1358235734247849e-05, "epoch": 4.461701606982625, "percentage": 68.38, "elapsed_time": "6:15:29", "remaining_time": "2:53:40", "throughput": 2336.7, "total_tokens": 52643920} {"current_steps": 27355, "total_steps": 40000, "loss": 0.053, "lr": 1.1350009742740478e-05, "epoch": 4.462517334203443, "percentage": 68.39, "elapsed_time": "6:15:31", "remaining_time": "2:53:35", "throughput": 2336.89, "total_tokens": 52652880} {"current_steps": 27360, "total_steps": 40000, "loss": 0.1384, "lr": 1.134178585623313e-05, "epoch": 4.46333306142426, "percentage": 68.4, "elapsed_time": "6:15:33", "remaining_time": "2:53:30", "throughput": 2337.11, "total_tokens": 52662848} {"current_steps": 27365, "total_steps": 40000, "loss": 0.0374, "lr": 1.1333564075994047e-05, "epoch": 4.464148788645077, "percentage": 68.41, "elapsed_time": "6:15:35", "remaining_time": "2:53:25", "throughput": 2337.29, "total_tokens": 52671616} {"current_steps": 27370, "total_steps": 40000, "loss": 0.0004, "lr": 1.1325344403291133e-05, "epoch": 4.464964515865894, "percentage": 68.42, "elapsed_time": "6:15:37", "remaining_time": "2:53:19", "throughput": 2337.49, "total_tokens": 52680992} {"current_steps": 27375, "total_steps": 40000, "loss": 0.0005, "lr": 1.1317126839391951e-05, "epoch": 4.465780243086712, "percentage": 68.44, "elapsed_time": "6:15:39", "remaining_time": "2:53:14", "throughput": 2337.68, "total_tokens": 52690128} {"current_steps": 27380, "total_steps": 40000, "loss": 0.0482, "lr": 1.1308911385563766e-05, "epoch": 4.466595970307529, "percentage": 68.45, "elapsed_time": "6:15:41", "remaining_time": "2:53:09", "throughput": 2337.82, "total_tokens": 52698144} {"current_steps": 27385, "total_steps": 40000, "loss": 0.0045, "lr": 1.1300698043073494e-05, "epoch": 4.467411697528346, "percentage": 68.46, "elapsed_time": "6:15:43", "remaining_time": "2:53:04", "throughput": 2337.99, "total_tokens": 52706720} {"current_steps": 27390, "total_steps": 40000, "loss": 0.0003, "lr": 1.1292486813187736e-05, "epoch": 4.468227424749164, "percentage": 68.47, "elapsed_time": "6:15:45", "remaining_time": "2:52:59", "throughput": 2338.23, "total_tokens": 52717040} {"current_steps": 27395, "total_steps": 40000, "loss": 0.0048, "lr": 1.1284277697172782e-05, "epoch": 4.469043151969982, "percentage": 68.49, "elapsed_time": "6:15:47", "remaining_time": "2:52:54", "throughput": 2338.47, "total_tokens": 52727280} {"current_steps": 27400, "total_steps": 40000, "loss": 0.0635, "lr": 1.127607069629456e-05, "epoch": 4.469858879190799, "percentage": 68.5, "elapsed_time": "6:15:49", "remaining_time": "2:52:49", "throughput": 2338.71, "total_tokens": 52737552} {"current_steps": 27400, "total_steps": 40000, "eval_loss": 0.30705398321151733, "epoch": 4.469858879190799, "percentage": 68.5, "elapsed_time": "6:17:10", "remaining_time": "2:53:26", "throughput": 2330.35, "total_tokens": 52737552} {"current_steps": 27405, "total_steps": 40000, "loss": 0.0015, "lr": 1.1267865811818701e-05, "epoch": 4.470674606411616, "percentage": 68.51, "elapsed_time": "6:17:14", "remaining_time": "2:53:22", "throughput": 2330.41, "total_tokens": 52747440} {"current_steps": 27410, "total_steps": 40000, "loss": 0.0146, "lr": 1.1259663045010513e-05, "epoch": 4.471490333632433, "percentage": 68.53, "elapsed_time": "6:17:16", "remaining_time": "2:53:17", "throughput": 2330.59, "total_tokens": 52756400} {"current_steps": 27415, "total_steps": 40000, "loss": 0.1024, "lr": 1.1251462397134957e-05, "epoch": 4.472306060853251, "percentage": 68.54, "elapsed_time": "6:17:18", "remaining_time": "2:53:12", "throughput": 2330.81, "total_tokens": 52766176} {"current_steps": 27420, "total_steps": 40000, "loss": 0.0001, "lr": 1.1243263869456664e-05, "epoch": 4.473121788074068, "percentage": 68.55, "elapsed_time": "6:17:20", "remaining_time": "2:53:07", "throughput": 2331.03, "total_tokens": 52775952} {"current_steps": 27425, "total_steps": 40000, "loss": 0.0007, "lr": 1.1235067463239967e-05, "epoch": 4.473937515294885, "percentage": 68.56, "elapsed_time": "6:17:22", "remaining_time": "2:53:02", "throughput": 2331.2, "total_tokens": 52784800} {"current_steps": 27430, "total_steps": 40000, "loss": 0.0003, "lr": 1.122687317974884e-05, "epoch": 4.474753242515702, "percentage": 68.58, "elapsed_time": "6:17:24", "remaining_time": "2:52:57", "throughput": 2331.35, "total_tokens": 52792928} {"current_steps": 27435, "total_steps": 40000, "loss": 0.0024, "lr": 1.1218681020246963e-05, "epoch": 4.47556896973652, "percentage": 68.59, "elapsed_time": "6:17:26", "remaining_time": "2:52:52", "throughput": 2331.56, "total_tokens": 52802624} {"current_steps": 27440, "total_steps": 40000, "loss": 0.0004, "lr": 1.1210490985997652e-05, "epoch": 4.4763846969573375, "percentage": 68.6, "elapsed_time": "6:17:28", "remaining_time": "2:52:47", "throughput": 2331.8, "total_tokens": 52812832} {"current_steps": 27445, "total_steps": 40000, "loss": 0.0014, "lr": 1.1202303078263917e-05, "epoch": 4.477200424178155, "percentage": 68.61, "elapsed_time": "6:17:31", "remaining_time": "2:52:41", "throughput": 2331.96, "total_tokens": 52821392} {"current_steps": 27450, "total_steps": 40000, "loss": 0.0005, "lr": 1.1194117298308451e-05, "epoch": 4.478016151398972, "percentage": 68.62, "elapsed_time": "6:17:33", "remaining_time": "2:52:36", "throughput": 2332.17, "total_tokens": 52830912} {"current_steps": 27455, "total_steps": 40000, "loss": 0.1572, "lr": 1.1185933647393585e-05, "epoch": 4.47883187861979, "percentage": 68.64, "elapsed_time": "6:17:35", "remaining_time": "2:52:31", "throughput": 2332.39, "total_tokens": 52840752} {"current_steps": 27460, "total_steps": 40000, "loss": 0.0609, "lr": 1.1177752126781354e-05, "epoch": 4.479647605840607, "percentage": 68.65, "elapsed_time": "6:17:37", "remaining_time": "2:52:26", "throughput": 2332.62, "total_tokens": 52850720} {"current_steps": 27465, "total_steps": 40000, "loss": 0.0007, "lr": 1.1169572737733441e-05, "epoch": 4.480463333061424, "percentage": 68.66, "elapsed_time": "6:17:39", "remaining_time": "2:52:21", "throughput": 2332.83, "total_tokens": 52860848} {"current_steps": 27470, "total_steps": 40000, "loss": 0.0959, "lr": 1.1161395481511216e-05, "epoch": 4.481279060282241, "percentage": 68.67, "elapsed_time": "6:17:41", "remaining_time": "2:52:16", "throughput": 2333.03, "total_tokens": 52870336} {"current_steps": 27475, "total_steps": 40000, "loss": 0.0015, "lr": 1.1153220359375722e-05, "epoch": 4.482094787503059, "percentage": 68.69, "elapsed_time": "6:17:43", "remaining_time": "2:52:11", "throughput": 2333.23, "total_tokens": 52879584} {"current_steps": 27480, "total_steps": 40000, "loss": 0.0006, "lr": 1.114504737258765e-05, "epoch": 4.482910514723876, "percentage": 68.7, "elapsed_time": "6:17:45", "remaining_time": "2:52:06", "throughput": 2333.41, "total_tokens": 52888608} {"current_steps": 27485, "total_steps": 40000, "loss": 0.0002, "lr": 1.1136876522407393e-05, "epoch": 4.4837262419446935, "percentage": 68.71, "elapsed_time": "6:17:47", "remaining_time": "2:52:01", "throughput": 2333.57, "total_tokens": 52896928} {"current_steps": 27490, "total_steps": 40000, "loss": 0.0021, "lr": 1.1128707810094985e-05, "epoch": 4.484541969165511, "percentage": 68.73, "elapsed_time": "6:17:49", "remaining_time": "2:51:56", "throughput": 2333.74, "total_tokens": 52905680} {"current_steps": 27495, "total_steps": 40000, "loss": 0.0545, "lr": 1.1120541236910157e-05, "epoch": 4.485357696386329, "percentage": 68.74, "elapsed_time": "6:17:51", "remaining_time": "2:51:51", "throughput": 2333.91, "total_tokens": 52914448} {"current_steps": 27500, "total_steps": 40000, "loss": 0.0013, "lr": 1.111237680411229e-05, "epoch": 4.486173423607146, "percentage": 68.75, "elapsed_time": "6:17:54", "remaining_time": "2:51:46", "throughput": 2334.12, "total_tokens": 52923904} {"current_steps": 27505, "total_steps": 40000, "loss": 0.0002, "lr": 1.1104214512960433e-05, "epoch": 4.486989150827963, "percentage": 68.76, "elapsed_time": "6:17:56", "remaining_time": "2:51:41", "throughput": 2334.3, "total_tokens": 52932832} {"current_steps": 27510, "total_steps": 40000, "loss": 0.0025, "lr": 1.1096054364713327e-05, "epoch": 4.487804878048781, "percentage": 68.77, "elapsed_time": "6:17:58", "remaining_time": "2:51:36", "throughput": 2334.46, "total_tokens": 52941440} {"current_steps": 27515, "total_steps": 40000, "loss": 0.13, "lr": 1.1087896360629371e-05, "epoch": 4.488620605269598, "percentage": 68.79, "elapsed_time": "6:18:00", "remaining_time": "2:51:31", "throughput": 2334.6, "total_tokens": 52949488} {"current_steps": 27520, "total_steps": 40000, "loss": 0.0004, "lr": 1.107974050196662e-05, "epoch": 4.489436332490415, "percentage": 68.8, "elapsed_time": "6:18:02", "remaining_time": "2:51:26", "throughput": 2334.84, "total_tokens": 52959600} {"current_steps": 27525, "total_steps": 40000, "loss": 0.1203, "lr": 1.1071586789982816e-05, "epoch": 4.490252059711232, "percentage": 68.81, "elapsed_time": "6:18:04", "remaining_time": "2:51:21", "throughput": 2335.1, "total_tokens": 52970464} {"current_steps": 27530, "total_steps": 40000, "loss": 0.0004, "lr": 1.1063435225935373e-05, "epoch": 4.49106778693205, "percentage": 68.83, "elapsed_time": "6:18:06", "remaining_time": "2:51:16", "throughput": 2335.27, "total_tokens": 52979104} {"current_steps": 27535, "total_steps": 40000, "loss": 0.0015, "lr": 1.1055285811081348e-05, "epoch": 4.4918835141528675, "percentage": 68.84, "elapsed_time": "6:18:08", "remaining_time": "2:51:11", "throughput": 2335.5, "total_tokens": 52989296} {"current_steps": 27540, "total_steps": 40000, "loss": 0.0004, "lr": 1.1047138546677499e-05, "epoch": 4.492699241373685, "percentage": 68.85, "elapsed_time": "6:18:10", "remaining_time": "2:51:05", "throughput": 2335.69, "total_tokens": 52998352} {"current_steps": 27545, "total_steps": 40000, "loss": 0.0152, "lr": 1.1038993433980219e-05, "epoch": 4.493514968594502, "percentage": 68.86, "elapsed_time": "6:18:12", "remaining_time": "2:51:00", "throughput": 2335.89, "total_tokens": 53007632} {"current_steps": 27550, "total_steps": 40000, "loss": 0.0652, "lr": 1.1030850474245597e-05, "epoch": 4.49433069581532, "percentage": 68.88, "elapsed_time": "6:18:14", "remaining_time": "2:50:55", "throughput": 2336.08, "total_tokens": 53016992} {"current_steps": 27555, "total_steps": 40000, "loss": 0.0464, "lr": 1.102270966872939e-05, "epoch": 4.495146423036137, "percentage": 68.89, "elapsed_time": "6:18:16", "remaining_time": "2:50:50", "throughput": 2336.27, "total_tokens": 53026032} {"current_steps": 27560, "total_steps": 40000, "loss": 0.1826, "lr": 1.1014571018687e-05, "epoch": 4.495962150256954, "percentage": 68.9, "elapsed_time": "6:18:18", "remaining_time": "2:50:45", "throughput": 2336.46, "total_tokens": 53035296} {"current_steps": 27565, "total_steps": 40000, "loss": 0.0006, "lr": 1.1006434525373502e-05, "epoch": 4.496777877477771, "percentage": 68.91, "elapsed_time": "6:18:21", "remaining_time": "2:50:40", "throughput": 2336.66, "total_tokens": 53044560} {"current_steps": 27570, "total_steps": 40000, "loss": 0.0002, "lr": 1.0998300190043664e-05, "epoch": 4.497593604698589, "percentage": 68.92, "elapsed_time": "6:18:23", "remaining_time": "2:50:35", "throughput": 2336.91, "total_tokens": 53054992} {"current_steps": 27575, "total_steps": 40000, "loss": 0.0715, "lr": 1.0990168013951882e-05, "epoch": 4.498409331919406, "percentage": 68.94, "elapsed_time": "6:18:25", "remaining_time": "2:50:30", "throughput": 2337.15, "total_tokens": 53065456} {"current_steps": 27580, "total_steps": 40000, "loss": 0.0003, "lr": 1.0982037998352263e-05, "epoch": 4.499225059140223, "percentage": 68.95, "elapsed_time": "6:18:27", "remaining_time": "2:50:25", "throughput": 2337.35, "total_tokens": 53074800} {"current_steps": 27585, "total_steps": 40000, "loss": 0.034, "lr": 1.0973910144498534e-05, "epoch": 4.5000407863610405, "percentage": 68.96, "elapsed_time": "6:18:29", "remaining_time": "2:50:20", "throughput": 2337.58, "total_tokens": 53084912} {"current_steps": 27590, "total_steps": 40000, "loss": 0.0003, "lr": 1.0965784453644123e-05, "epoch": 4.500856513581859, "percentage": 68.97, "elapsed_time": "6:18:31", "remaining_time": "2:50:15", "throughput": 2337.78, "total_tokens": 53094288} {"current_steps": 27595, "total_steps": 40000, "loss": 0.024, "lr": 1.0957660927042127e-05, "epoch": 4.501672240802676, "percentage": 68.99, "elapsed_time": "6:18:33", "remaining_time": "2:50:10", "throughput": 2337.98, "total_tokens": 53103616} {"current_steps": 27600, "total_steps": 40000, "loss": 0.0045, "lr": 1.094953956594527e-05, "epoch": 4.502487968023493, "percentage": 69.0, "elapsed_time": "6:18:35", "remaining_time": "2:50:05", "throughput": 2338.14, "total_tokens": 53112128} {"current_steps": 27600, "total_steps": 40000, "eval_loss": 0.30987748503685, "epoch": 4.502487968023493, "percentage": 69.0, "elapsed_time": "6:19:56", "remaining_time": "2:50:41", "throughput": 2329.87, "total_tokens": 53112128} {"current_steps": 27605, "total_steps": 40000, "loss": 0.0002, "lr": 1.0941420371605981e-05, "epoch": 4.50330369524431, "percentage": 69.01, "elapsed_time": "6:19:59", "remaining_time": "2:50:37", "throughput": 2329.93, "total_tokens": 53122464} {"current_steps": 27610, "total_steps": 40000, "loss": 0.0011, "lr": 1.0933303345276354e-05, "epoch": 4.504119422465128, "percentage": 69.03, "elapsed_time": "6:20:02", "remaining_time": "2:50:32", "throughput": 2330.16, "total_tokens": 53132464} {"current_steps": 27615, "total_steps": 40000, "loss": 0.2077, "lr": 1.0925188488208112e-05, "epoch": 4.504935149685945, "percentage": 69.04, "elapsed_time": "6:20:04", "remaining_time": "2:50:27", "throughput": 2330.38, "total_tokens": 53142384} {"current_steps": 27620, "total_steps": 40000, "loss": 0.0055, "lr": 1.0917075801652694e-05, "epoch": 4.505750876906762, "percentage": 69.05, "elapsed_time": "6:20:06", "remaining_time": "2:50:22", "throughput": 2330.59, "total_tokens": 53151984} {"current_steps": 27625, "total_steps": 40000, "loss": 0.0007, "lr": 1.0908965286861151e-05, "epoch": 4.506566604127579, "percentage": 69.06, "elapsed_time": "6:20:08", "remaining_time": "2:50:17", "throughput": 2330.77, "total_tokens": 53160896} {"current_steps": 27630, "total_steps": 40000, "loss": 0.0589, "lr": 1.090085694508425e-05, "epoch": 4.507382331348397, "percentage": 69.08, "elapsed_time": "6:20:10", "remaining_time": "2:50:12", "throughput": 2330.95, "total_tokens": 53169792} {"current_steps": 27635, "total_steps": 40000, "loss": 0.0014, "lr": 1.089275077757238e-05, "epoch": 4.5081980585692145, "percentage": 69.09, "elapsed_time": "6:20:12", "remaining_time": "2:50:07", "throughput": 2331.13, "total_tokens": 53178656} {"current_steps": 27640, "total_steps": 40000, "loss": 0.0006, "lr": 1.0884646785575633e-05, "epoch": 4.509013785790032, "percentage": 69.1, "elapsed_time": "6:20:14", "remaining_time": "2:50:02", "throughput": 2331.28, "total_tokens": 53186880} {"current_steps": 27645, "total_steps": 40000, "loss": 0.0755, "lr": 1.0876544970343728e-05, "epoch": 4.50982951301085, "percentage": 69.11, "elapsed_time": "6:20:16", "remaining_time": "2:49:57", "throughput": 2331.44, "total_tokens": 53195440} {"current_steps": 27650, "total_steps": 40000, "loss": 0.0902, "lr": 1.0868445333126082e-05, "epoch": 4.510645240231667, "percentage": 69.12, "elapsed_time": "6:20:18", "remaining_time": "2:49:52", "throughput": 2331.65, "total_tokens": 53205008} {"current_steps": 27655, "total_steps": 40000, "loss": 0.0026, "lr": 1.0860347875171745e-05, "epoch": 4.511460967452484, "percentage": 69.14, "elapsed_time": "6:20:20", "remaining_time": "2:49:47", "throughput": 2331.81, "total_tokens": 53213536} {"current_steps": 27660, "total_steps": 40000, "loss": 0.0003, "lr": 1.0852252597729465e-05, "epoch": 4.512276694673301, "percentage": 69.15, "elapsed_time": "6:20:22", "remaining_time": "2:49:41", "throughput": 2331.97, "total_tokens": 53222080} {"current_steps": 27665, "total_steps": 40000, "loss": 0.0003, "lr": 1.0844159502047615e-05, "epoch": 4.513092421894118, "percentage": 69.16, "elapsed_time": "6:20:24", "remaining_time": "2:49:36", "throughput": 2332.19, "total_tokens": 53232000} {"current_steps": 27670, "total_steps": 40000, "loss": 0.0002, "lr": 1.0836068589374265e-05, "epoch": 4.513908149114936, "percentage": 69.17, "elapsed_time": "6:20:26", "remaining_time": "2:49:31", "throughput": 2332.37, "total_tokens": 53240944} {"current_steps": 27675, "total_steps": 40000, "loss": 0.0017, "lr": 1.0827979860957144e-05, "epoch": 4.514723876335753, "percentage": 69.19, "elapsed_time": "6:20:29", "remaining_time": "2:49:26", "throughput": 2332.65, "total_tokens": 53252144} {"current_steps": 27680, "total_steps": 40000, "loss": 0.0001, "lr": 1.0819893318043615e-05, "epoch": 4.5155396035565705, "percentage": 69.2, "elapsed_time": "6:20:31", "remaining_time": "2:49:21", "throughput": 2332.84, "total_tokens": 53261360} {"current_steps": 27685, "total_steps": 40000, "loss": 0.0003, "lr": 1.0811808961880734e-05, "epoch": 4.5163553307773885, "percentage": 69.21, "elapsed_time": "6:20:33", "remaining_time": "2:49:16", "throughput": 2333.08, "total_tokens": 53271712} {"current_steps": 27690, "total_steps": 40000, "loss": 0.0045, "lr": 1.080372679371522e-05, "epoch": 4.517171057998206, "percentage": 69.23, "elapsed_time": "6:20:35", "remaining_time": "2:49:11", "throughput": 2333.23, "total_tokens": 53279984} {"current_steps": 27695, "total_steps": 40000, "loss": 0.0034, "lr": 1.0795646814793428e-05, "epoch": 4.517986785219023, "percentage": 69.24, "elapsed_time": "6:20:37", "remaining_time": "2:49:06", "throughput": 2333.41, "total_tokens": 53288880} {"current_steps": 27700, "total_steps": 40000, "loss": 0.0007, "lr": 1.078756902636141e-05, "epoch": 4.51880251243984, "percentage": 69.25, "elapsed_time": "6:20:39", "remaining_time": "2:49:01", "throughput": 2333.61, "total_tokens": 53298272} {"current_steps": 27705, "total_steps": 40000, "loss": 0.0001, "lr": 1.077949342966485e-05, "epoch": 4.519618239660657, "percentage": 69.26, "elapsed_time": "6:20:41", "remaining_time": "2:48:56", "throughput": 2333.81, "total_tokens": 53307600} {"current_steps": 27710, "total_steps": 40000, "loss": 0.0302, "lr": 1.0771420025949103e-05, "epoch": 4.520433966881475, "percentage": 69.27, "elapsed_time": "6:20:43", "remaining_time": "2:48:51", "throughput": 2333.96, "total_tokens": 53315968} {"current_steps": 27715, "total_steps": 40000, "loss": 0.0002, "lr": 1.0763348816459204e-05, "epoch": 4.521249694102292, "percentage": 69.29, "elapsed_time": "6:20:45", "remaining_time": "2:48:46", "throughput": 2334.13, "total_tokens": 53324864} {"current_steps": 27720, "total_steps": 40000, "loss": 0.0006, "lr": 1.0755279802439816e-05, "epoch": 4.522065421323109, "percentage": 69.3, "elapsed_time": "6:20:47", "remaining_time": "2:48:41", "throughput": 2334.38, "total_tokens": 53335360} {"current_steps": 27725, "total_steps": 40000, "loss": 0.059, "lr": 1.0747212985135293e-05, "epoch": 4.522881148543927, "percentage": 69.31, "elapsed_time": "6:20:49", "remaining_time": "2:48:36", "throughput": 2334.57, "total_tokens": 53344400} {"current_steps": 27730, "total_steps": 40000, "loss": 0.0001, "lr": 1.073914836578965e-05, "epoch": 4.523696875764744, "percentage": 69.33, "elapsed_time": "6:20:51", "remaining_time": "2:48:31", "throughput": 2334.77, "total_tokens": 53353904} {"current_steps": 27735, "total_steps": 40000, "loss": 0.0646, "lr": 1.0731085945646529e-05, "epoch": 4.524512602985562, "percentage": 69.34, "elapsed_time": "6:20:53", "remaining_time": "2:48:26", "throughput": 2334.98, "total_tokens": 53363408} {"current_steps": 27740, "total_steps": 40000, "loss": 0.0007, "lr": 1.0723025725949285e-05, "epoch": 4.525328330206379, "percentage": 69.35, "elapsed_time": "6:20:56", "remaining_time": "2:48:21", "throughput": 2335.21, "total_tokens": 53373520} {"current_steps": 27745, "total_steps": 40000, "loss": 0.0692, "lr": 1.0714967707940875e-05, "epoch": 4.526144057427197, "percentage": 69.36, "elapsed_time": "6:20:58", "remaining_time": "2:48:16", "throughput": 2335.46, "total_tokens": 53384256} {"current_steps": 27750, "total_steps": 40000, "loss": 0.0005, "lr": 1.0706911892863963e-05, "epoch": 4.526959784648014, "percentage": 69.38, "elapsed_time": "6:21:00", "remaining_time": "2:48:11", "throughput": 2335.65, "total_tokens": 53393328} {"current_steps": 27755, "total_steps": 40000, "loss": 0.1024, "lr": 1.0698858281960866e-05, "epoch": 4.527775511868831, "percentage": 69.39, "elapsed_time": "6:21:02", "remaining_time": "2:48:06", "throughput": 2335.84, "total_tokens": 53402464} {"current_steps": 27760, "total_steps": 40000, "loss": 0.0922, "lr": 1.069080687647353e-05, "epoch": 4.528591239089648, "percentage": 69.4, "elapsed_time": "6:21:04", "remaining_time": "2:48:01", "throughput": 2336.06, "total_tokens": 53412448} {"current_steps": 27765, "total_steps": 40000, "loss": 0.0918, "lr": 1.0682757677643596e-05, "epoch": 4.529406966310466, "percentage": 69.41, "elapsed_time": "6:21:06", "remaining_time": "2:47:56", "throughput": 2336.23, "total_tokens": 53421200} {"current_steps": 27770, "total_steps": 40000, "loss": 0.1218, "lr": 1.0674710686712359e-05, "epoch": 4.530222693531283, "percentage": 69.42, "elapsed_time": "6:21:08", "remaining_time": "2:47:51", "throughput": 2336.44, "total_tokens": 53430704} {"current_steps": 27775, "total_steps": 40000, "loss": 0.0004, "lr": 1.0666665904920756e-05, "epoch": 4.5310384207521, "percentage": 69.44, "elapsed_time": "6:21:10", "remaining_time": "2:47:46", "throughput": 2336.69, "total_tokens": 53441264} {"current_steps": 27780, "total_steps": 40000, "loss": 0.0004, "lr": 1.0658623333509385e-05, "epoch": 4.5318541479729175, "percentage": 69.45, "elapsed_time": "6:21:12", "remaining_time": "2:47:41", "throughput": 2336.92, "total_tokens": 53451344} {"current_steps": 27785, "total_steps": 40000, "loss": 0.0004, "lr": 1.0650582973718532e-05, "epoch": 4.5326698751937355, "percentage": 69.46, "elapsed_time": "6:21:14", "remaining_time": "2:47:36", "throughput": 2337.11, "total_tokens": 53460576} {"current_steps": 27790, "total_steps": 40000, "loss": 0.1264, "lr": 1.0642544826788098e-05, "epoch": 4.533485602414553, "percentage": 69.47, "elapsed_time": "6:21:16", "remaining_time": "2:47:31", "throughput": 2337.31, "total_tokens": 53469968} {"current_steps": 27795, "total_steps": 40000, "loss": 0.0004, "lr": 1.063450889395769e-05, "epoch": 4.53430132963537, "percentage": 69.49, "elapsed_time": "6:21:18", "remaining_time": "2:47:26", "throughput": 2337.53, "total_tokens": 53479840} {"current_steps": 27800, "total_steps": 40000, "loss": 0.053, "lr": 1.062647517646653e-05, "epoch": 4.535117056856187, "percentage": 69.5, "elapsed_time": "6:21:20", "remaining_time": "2:47:21", "throughput": 2337.72, "total_tokens": 53489200} {"current_steps": 27800, "total_steps": 40000, "eval_loss": 0.29390978813171387, "epoch": 4.535117056856187, "percentage": 69.5, "elapsed_time": "6:22:41", "remaining_time": "2:47:56", "throughput": 2329.5, "total_tokens": 53489200} {"current_steps": 27805, "total_steps": 40000, "loss": 0.0015, "lr": 1.0618443675553527e-05, "epoch": 4.535932784077005, "percentage": 69.51, "elapsed_time": "6:22:45", "remaining_time": "2:47:52", "throughput": 2329.52, "total_tokens": 53498336} {"current_steps": 27810, "total_steps": 40000, "loss": 0.0012, "lr": 1.0610414392457247e-05, "epoch": 4.536748511297822, "percentage": 69.53, "elapsed_time": "6:22:47", "remaining_time": "2:47:47", "throughput": 2329.68, "total_tokens": 53506784} {"current_steps": 27815, "total_steps": 40000, "loss": 0.0009, "lr": 1.0602387328415888e-05, "epoch": 4.537564238518639, "percentage": 69.54, "elapsed_time": "6:22:49", "remaining_time": "2:47:42", "throughput": 2329.95, "total_tokens": 53517936} {"current_steps": 27820, "total_steps": 40000, "loss": 0.0545, "lr": 1.0594362484667347e-05, "epoch": 4.538379965739456, "percentage": 69.55, "elapsed_time": "6:22:51", "remaining_time": "2:47:37", "throughput": 2330.2, "total_tokens": 53528432} {"current_steps": 27825, "total_steps": 40000, "loss": 0.1314, "lr": 1.0586339862449132e-05, "epoch": 4.539195692960274, "percentage": 69.56, "elapsed_time": "6:22:53", "remaining_time": "2:47:32", "throughput": 2330.4, "total_tokens": 53537824} {"current_steps": 27830, "total_steps": 40000, "loss": 0.0005, "lr": 1.0578319462998445e-05, "epoch": 4.5400114201810915, "percentage": 69.58, "elapsed_time": "6:22:55", "remaining_time": "2:47:27", "throughput": 2330.57, "total_tokens": 53546608} {"current_steps": 27835, "total_steps": 40000, "loss": 0.0003, "lr": 1.057030128755214e-05, "epoch": 4.540827147401909, "percentage": 69.59, "elapsed_time": "6:22:57", "remaining_time": "2:47:22", "throughput": 2330.81, "total_tokens": 53556896} {"current_steps": 27840, "total_steps": 40000, "loss": 0.0754, "lr": 1.0562285337346703e-05, "epoch": 4.541642874622726, "percentage": 69.6, "elapsed_time": "6:22:59", "remaining_time": "2:47:17", "throughput": 2330.96, "total_tokens": 53565328} {"current_steps": 27845, "total_steps": 40000, "loss": 0.0692, "lr": 1.0554271613618308e-05, "epoch": 4.542458601843544, "percentage": 69.61, "elapsed_time": "6:23:01", "remaining_time": "2:47:12", "throughput": 2331.14, "total_tokens": 53574272} {"current_steps": 27850, "total_steps": 40000, "loss": 0.0474, "lr": 1.054626011760276e-05, "epoch": 4.543274329064361, "percentage": 69.62, "elapsed_time": "6:23:04", "remaining_time": "2:47:07", "throughput": 2331.34, "total_tokens": 53583680} {"current_steps": 27855, "total_steps": 40000, "loss": 0.0838, "lr": 1.0538250850535549e-05, "epoch": 4.544090056285178, "percentage": 69.64, "elapsed_time": "6:23:06", "remaining_time": "2:47:02", "throughput": 2331.55, "total_tokens": 53593232} {"current_steps": 27860, "total_steps": 40000, "loss": 0.0003, "lr": 1.0530243813651794e-05, "epoch": 4.544905783505996, "percentage": 69.65, "elapsed_time": "6:23:08", "remaining_time": "2:46:57", "throughput": 2331.77, "total_tokens": 53603248} {"current_steps": 27865, "total_steps": 40000, "loss": 0.0006, "lr": 1.0522239008186271e-05, "epoch": 4.545721510726813, "percentage": 69.66, "elapsed_time": "6:23:10", "remaining_time": "2:46:52", "throughput": 2331.97, "total_tokens": 53612656} {"current_steps": 27870, "total_steps": 40000, "loss": 0.0004, "lr": 1.0514236435373434e-05, "epoch": 4.54653723794763, "percentage": 69.67, "elapsed_time": "6:23:12", "remaining_time": "2:46:47", "throughput": 2332.18, "total_tokens": 53622240} {"current_steps": 27875, "total_steps": 40000, "loss": 0.0008, "lr": 1.0506236096447386e-05, "epoch": 4.5473529651684474, "percentage": 69.69, "elapsed_time": "6:23:14", "remaining_time": "2:46:42", "throughput": 2332.47, "total_tokens": 53633776} {"current_steps": 27880, "total_steps": 40000, "loss": 0.001, "lr": 1.049823799264186e-05, "epoch": 4.548168692389265, "percentage": 69.7, "elapsed_time": "6:23:16", "remaining_time": "2:46:37", "throughput": 2332.61, "total_tokens": 53641872} {"current_steps": 27885, "total_steps": 40000, "loss": 0.0014, "lr": 1.049024212519028e-05, "epoch": 4.548984419610083, "percentage": 69.71, "elapsed_time": "6:23:18", "remaining_time": "2:46:32", "throughput": 2332.85, "total_tokens": 53652256} {"current_steps": 27890, "total_steps": 40000, "loss": 0.1147, "lr": 1.0482248495325713e-05, "epoch": 4.5498001468309, "percentage": 69.73, "elapsed_time": "6:23:20", "remaining_time": "2:46:27", "throughput": 2333.05, "total_tokens": 53661648} {"current_steps": 27895, "total_steps": 40000, "loss": 0.0435, "lr": 1.047425710428086e-05, "epoch": 4.550615874051717, "percentage": 69.74, "elapsed_time": "6:23:22", "remaining_time": "2:46:21", "throughput": 2333.25, "total_tokens": 53671072} {"current_steps": 27900, "total_steps": 40000, "loss": 0.0322, "lr": 1.0466267953288114e-05, "epoch": 4.551431601272535, "percentage": 69.75, "elapsed_time": "6:23:24", "remaining_time": "2:46:16", "throughput": 2333.45, "total_tokens": 53680592} {"current_steps": 27905, "total_steps": 40000, "loss": 0.124, "lr": 1.0458281043579482e-05, "epoch": 4.552247328493352, "percentage": 69.76, "elapsed_time": "6:23:26", "remaining_time": "2:46:11", "throughput": 2333.63, "total_tokens": 53689568} {"current_steps": 27910, "total_steps": 40000, "loss": 0.0054, "lr": 1.0450296376386657e-05, "epoch": 4.553063055714169, "percentage": 69.77, "elapsed_time": "6:23:28", "remaining_time": "2:46:06", "throughput": 2333.88, "total_tokens": 53700096} {"current_steps": 27915, "total_steps": 40000, "loss": 0.0013, "lr": 1.044231395294098e-05, "epoch": 4.553878782934986, "percentage": 69.79, "elapsed_time": "6:23:31", "remaining_time": "2:46:01", "throughput": 2334.09, "total_tokens": 53709776} {"current_steps": 27920, "total_steps": 40000, "loss": 0.0005, "lr": 1.0434333774473435e-05, "epoch": 4.554694510155803, "percentage": 69.8, "elapsed_time": "6:23:33", "remaining_time": "2:45:56", "throughput": 2334.27, "total_tokens": 53718640} {"current_steps": 27925, "total_steps": 40000, "loss": 0.0615, "lr": 1.0426355842214657e-05, "epoch": 4.555510237376621, "percentage": 69.81, "elapsed_time": "6:23:35", "remaining_time": "2:45:51", "throughput": 2334.44, "total_tokens": 53727536} {"current_steps": 27930, "total_steps": 40000, "loss": 0.0003, "lr": 1.0418380157394963e-05, "epoch": 4.5563259645974385, "percentage": 69.83, "elapsed_time": "6:23:37", "remaining_time": "2:45:46", "throughput": 2334.7, "total_tokens": 53738224} {"current_steps": 27935, "total_steps": 40000, "loss": 0.0033, "lr": 1.0410406721244281e-05, "epoch": 4.557141691818256, "percentage": 69.84, "elapsed_time": "6:23:39", "remaining_time": "2:45:41", "throughput": 2334.88, "total_tokens": 53747168} {"current_steps": 27940, "total_steps": 40000, "loss": 0.0004, "lr": 1.0402435534992238e-05, "epoch": 4.557957419039074, "percentage": 69.85, "elapsed_time": "6:23:41", "remaining_time": "2:45:36", "throughput": 2335.08, "total_tokens": 53756656} {"current_steps": 27945, "total_steps": 40000, "loss": 0.0003, "lr": 1.0394466599868071e-05, "epoch": 4.558773146259891, "percentage": 69.86, "elapsed_time": "6:23:43", "remaining_time": "2:45:31", "throughput": 2335.2, "total_tokens": 53764208} {"current_steps": 27950, "total_steps": 40000, "loss": 0.0136, "lr": 1.0386499917100697e-05, "epoch": 4.559588873480708, "percentage": 69.88, "elapsed_time": "6:23:45", "remaining_time": "2:45:26", "throughput": 2335.4, "total_tokens": 53773872} {"current_steps": 27955, "total_steps": 40000, "loss": 0.0006, "lr": 1.0378535487918692e-05, "epoch": 4.560404600701525, "percentage": 69.89, "elapsed_time": "6:23:47", "remaining_time": "2:45:21", "throughput": 2335.61, "total_tokens": 53783536} {"current_steps": 27960, "total_steps": 40000, "loss": 0.0678, "lr": 1.037057331355025e-05, "epoch": 4.561220327922343, "percentage": 69.9, "elapsed_time": "6:23:49", "remaining_time": "2:45:16", "throughput": 2335.85, "total_tokens": 53793872} {"current_steps": 27965, "total_steps": 40000, "loss": 0.0004, "lr": 1.0362613395223247e-05, "epoch": 4.56203605514316, "percentage": 69.91, "elapsed_time": "6:23:51", "remaining_time": "2:45:11", "throughput": 2336.05, "total_tokens": 53803200} {"current_steps": 27970, "total_steps": 40000, "loss": 0.0003, "lr": 1.0354655734165212e-05, "epoch": 4.562851782363977, "percentage": 69.92, "elapsed_time": "6:23:53", "remaining_time": "2:45:06", "throughput": 2336.24, "total_tokens": 53812416} {"current_steps": 27975, "total_steps": 40000, "loss": 0.0801, "lr": 1.03467003316033e-05, "epoch": 4.5636675095847945, "percentage": 69.94, "elapsed_time": "6:23:55", "remaining_time": "2:45:01", "throughput": 2336.46, "total_tokens": 53822352} {"current_steps": 27980, "total_steps": 40000, "loss": 0.096, "lr": 1.033874718876435e-05, "epoch": 4.5644832368056125, "percentage": 69.95, "elapsed_time": "6:23:57", "remaining_time": "2:44:56", "throughput": 2336.69, "total_tokens": 53832448} {"current_steps": 27985, "total_steps": 40000, "loss": 0.07, "lr": 1.0330796306874818e-05, "epoch": 4.56529896402643, "percentage": 69.96, "elapsed_time": "6:23:59", "remaining_time": "2:44:51", "throughput": 2336.86, "total_tokens": 53841264} {"current_steps": 27990, "total_steps": 40000, "loss": 0.0029, "lr": 1.032284768716085e-05, "epoch": 4.566114691247247, "percentage": 69.97, "elapsed_time": "6:24:02", "remaining_time": "2:44:46", "throughput": 2337.07, "total_tokens": 53850960} {"current_steps": 27995, "total_steps": 40000, "loss": 0.0365, "lr": 1.0314901330848206e-05, "epoch": 4.566930418468064, "percentage": 69.99, "elapsed_time": "6:24:04", "remaining_time": "2:44:41", "throughput": 2337.28, "total_tokens": 53860592} {"current_steps": 28000, "total_steps": 40000, "loss": 0.011, "lr": 1.030695723916233e-05, "epoch": 4.567746145688882, "percentage": 70.0, "elapsed_time": "6:24:06", "remaining_time": "2:44:36", "throughput": 2337.51, "total_tokens": 53870832} {"current_steps": 28000, "total_steps": 40000, "eval_loss": 0.28987744450569153, "epoch": 4.567746145688882, "percentage": 70.0, "elapsed_time": "6:25:26", "remaining_time": "2:45:11", "throughput": 2329.35, "total_tokens": 53870832} {"current_steps": 28005, "total_steps": 40000, "loss": 0.0007, "lr": 1.0299015413328289e-05, "epoch": 4.568561872909699, "percentage": 70.01, "elapsed_time": "6:25:30", "remaining_time": "2:45:07", "throughput": 2329.34, "total_tokens": 53879408} {"current_steps": 28010, "total_steps": 40000, "loss": 0.0865, "lr": 1.0291075854570809e-05, "epoch": 4.569377600130516, "percentage": 70.03, "elapsed_time": "6:25:32", "remaining_time": "2:45:02", "throughput": 2329.55, "total_tokens": 53889104} {"current_steps": 28015, "total_steps": 40000, "loss": 0.0015, "lr": 1.0283138564114275e-05, "epoch": 4.570193327351333, "percentage": 70.04, "elapsed_time": "6:25:34", "remaining_time": "2:44:57", "throughput": 2329.79, "total_tokens": 53899424} {"current_steps": 28020, "total_steps": 40000, "loss": 0.0166, "lr": 1.027520354318273e-05, "epoch": 4.571009054572151, "percentage": 70.05, "elapsed_time": "6:25:36", "remaining_time": "2:44:52", "throughput": 2330.0, "total_tokens": 53909056} {"current_steps": 28025, "total_steps": 40000, "loss": 0.0012, "lr": 1.0267270792999828e-05, "epoch": 4.5718247817929685, "percentage": 70.06, "elapsed_time": "6:25:39", "remaining_time": "2:44:47", "throughput": 2330.25, "total_tokens": 53919744} {"current_steps": 28030, "total_steps": 40000, "loss": 0.001, "lr": 1.0259340314788919e-05, "epoch": 4.572640509013786, "percentage": 70.08, "elapsed_time": "6:25:41", "remaining_time": "2:44:42", "throughput": 2330.48, "total_tokens": 53929888} {"current_steps": 28035, "total_steps": 40000, "loss": 0.0002, "lr": 1.0251412109772979e-05, "epoch": 4.573456236234604, "percentage": 70.09, "elapsed_time": "6:25:43", "remaining_time": "2:44:37", "throughput": 2330.72, "total_tokens": 53940144} {"current_steps": 28040, "total_steps": 40000, "loss": 0.0555, "lr": 1.0243486179174627e-05, "epoch": 4.574271963455421, "percentage": 70.1, "elapsed_time": "6:25:45", "remaining_time": "2:44:32", "throughput": 2330.9, "total_tokens": 53949200} {"current_steps": 28045, "total_steps": 40000, "loss": 0.001, "lr": 1.0235562524216158e-05, "epoch": 4.575087690676238, "percentage": 70.11, "elapsed_time": "6:25:47", "remaining_time": "2:44:27", "throughput": 2331.08, "total_tokens": 53958256} {"current_steps": 28050, "total_steps": 40000, "loss": 0.0049, "lr": 1.022764114611948e-05, "epoch": 4.575903417897055, "percentage": 70.12, "elapsed_time": "6:25:49", "remaining_time": "2:44:22", "throughput": 2331.28, "total_tokens": 53967744} {"current_steps": 28055, "total_steps": 40000, "loss": 0.0018, "lr": 1.0219722046106178e-05, "epoch": 4.576719145117872, "percentage": 70.14, "elapsed_time": "6:25:51", "remaining_time": "2:44:17", "throughput": 2331.43, "total_tokens": 53976096} {"current_steps": 28060, "total_steps": 40000, "loss": 0.0798, "lr": 1.0211805225397486e-05, "epoch": 4.57753487233869, "percentage": 70.15, "elapsed_time": "6:25:53", "remaining_time": "2:44:12", "throughput": 2331.69, "total_tokens": 53986784} {"current_steps": 28065, "total_steps": 40000, "loss": 0.175, "lr": 1.020389068521426e-05, "epoch": 4.578350599559507, "percentage": 70.16, "elapsed_time": "6:25:55", "remaining_time": "2:44:07", "throughput": 2331.91, "total_tokens": 53996640} {"current_steps": 28070, "total_steps": 40000, "loss": 0.0001, "lr": 1.0195978426777039e-05, "epoch": 4.579166326780324, "percentage": 70.17, "elapsed_time": "6:25:57", "remaining_time": "2:44:02", "throughput": 2332.06, "total_tokens": 54005072} {"current_steps": 28075, "total_steps": 40000, "loss": 0.0004, "lr": 1.0188068451305982e-05, "epoch": 4.5799820540011424, "percentage": 70.19, "elapsed_time": "6:25:59", "remaining_time": "2:43:57", "throughput": 2332.28, "total_tokens": 54015072} {"current_steps": 28080, "total_steps": 40000, "loss": 0.0656, "lr": 1.0180160760020902e-05, "epoch": 4.58079778122196, "percentage": 70.2, "elapsed_time": "6:26:01", "remaining_time": "2:43:52", "throughput": 2332.43, "total_tokens": 54023424} {"current_steps": 28085, "total_steps": 40000, "loss": 0.113, "lr": 1.0172255354141278e-05, "epoch": 4.581613508442777, "percentage": 70.21, "elapsed_time": "6:26:03", "remaining_time": "2:43:47", "throughput": 2332.58, "total_tokens": 54031712} {"current_steps": 28090, "total_steps": 40000, "loss": 0.0026, "lr": 1.0164352234886205e-05, "epoch": 4.582429235663594, "percentage": 70.23, "elapsed_time": "6:26:05", "remaining_time": "2:43:42", "throughput": 2332.76, "total_tokens": 54040672} {"current_steps": 28095, "total_steps": 40000, "loss": 0.001, "lr": 1.0156451403474454e-05, "epoch": 4.583244962884411, "percentage": 70.24, "elapsed_time": "6:26:08", "remaining_time": "2:43:37", "throughput": 2333.07, "total_tokens": 54052528} {"current_steps": 28100, "total_steps": 40000, "loss": 0.0014, "lr": 1.0148552861124443e-05, "epoch": 4.584060690105229, "percentage": 70.25, "elapsed_time": "6:26:10", "remaining_time": "2:43:32", "throughput": 2333.24, "total_tokens": 54061328} {"current_steps": 28105, "total_steps": 40000, "loss": 0.0338, "lr": 1.0140656609054205e-05, "epoch": 4.584876417326046, "percentage": 70.26, "elapsed_time": "6:26:12", "remaining_time": "2:43:27", "throughput": 2333.45, "total_tokens": 54071216} {"current_steps": 28110, "total_steps": 40000, "loss": 0.0012, "lr": 1.0132762648481455e-05, "epoch": 4.585692144546863, "percentage": 70.28, "elapsed_time": "6:26:14", "remaining_time": "2:43:22", "throughput": 2333.68, "total_tokens": 54081216} {"current_steps": 28115, "total_steps": 40000, "loss": 0.0435, "lr": 1.0124870980623543e-05, "epoch": 4.586507871767681, "percentage": 70.29, "elapsed_time": "6:26:16", "remaining_time": "2:43:17", "throughput": 2333.93, "total_tokens": 54092064} {"current_steps": 28120, "total_steps": 40000, "loss": 0.0007, "lr": 1.0116981606697453e-05, "epoch": 4.587323598988498, "percentage": 70.3, "elapsed_time": "6:26:18", "remaining_time": "2:43:12", "throughput": 2334.16, "total_tokens": 54102112} {"current_steps": 28125, "total_steps": 40000, "loss": 0.1356, "lr": 1.0109094527919838e-05, "epoch": 4.5881393262093155, "percentage": 70.31, "elapsed_time": "6:26:20", "remaining_time": "2:43:07", "throughput": 2334.4, "total_tokens": 54112656} {"current_steps": 28130, "total_steps": 40000, "loss": 0.0003, "lr": 1.010120974550697e-05, "epoch": 4.588955053430133, "percentage": 70.33, "elapsed_time": "6:26:22", "remaining_time": "2:43:02", "throughput": 2334.61, "total_tokens": 54122256} {"current_steps": 28135, "total_steps": 40000, "loss": 0.0307, "lr": 1.0093327260674795e-05, "epoch": 4.589770780650951, "percentage": 70.34, "elapsed_time": "6:26:24", "remaining_time": "2:42:57", "throughput": 2334.82, "total_tokens": 54131984} {"current_steps": 28140, "total_steps": 40000, "loss": 0.0615, "lr": 1.0085447074638878e-05, "epoch": 4.590586507871768, "percentage": 70.35, "elapsed_time": "6:26:26", "remaining_time": "2:42:52", "throughput": 2335.05, "total_tokens": 54142000} {"current_steps": 28145, "total_steps": 40000, "loss": 0.0537, "lr": 1.0077569188614461e-05, "epoch": 4.591402235092585, "percentage": 70.36, "elapsed_time": "6:26:28", "remaining_time": "2:42:47", "throughput": 2335.23, "total_tokens": 54151120} {"current_steps": 28150, "total_steps": 40000, "loss": 0.1538, "lr": 1.0069693603816393e-05, "epoch": 4.592217962313402, "percentage": 70.38, "elapsed_time": "6:26:30", "remaining_time": "2:42:42", "throughput": 2335.46, "total_tokens": 54161328} {"current_steps": 28155, "total_steps": 40000, "loss": 0.0336, "lr": 1.0061820321459204e-05, "epoch": 4.59303368953422, "percentage": 70.39, "elapsed_time": "6:26:32", "remaining_time": "2:42:37", "throughput": 2335.7, "total_tokens": 54171760} {"current_steps": 28160, "total_steps": 40000, "loss": 0.0012, "lr": 1.0053949342757038e-05, "epoch": 4.593849416755037, "percentage": 70.4, "elapsed_time": "6:26:35", "remaining_time": "2:42:32", "throughput": 2335.93, "total_tokens": 54181872} {"current_steps": 28165, "total_steps": 40000, "loss": 0.0862, "lr": 1.0046080668923717e-05, "epoch": 4.594665143975854, "percentage": 70.41, "elapsed_time": "6:26:37", "remaining_time": "2:42:27", "throughput": 2336.18, "total_tokens": 54192608} {"current_steps": 28170, "total_steps": 40000, "loss": 0.0004, "lr": 1.003821430117267e-05, "epoch": 4.5954808711966715, "percentage": 70.43, "elapsed_time": "6:26:39", "remaining_time": "2:42:22", "throughput": 2336.41, "total_tokens": 54202816} {"current_steps": 28175, "total_steps": 40000, "loss": 0.0007, "lr": 1.0030350240716999e-05, "epoch": 4.5962965984174895, "percentage": 70.44, "elapsed_time": "6:26:41", "remaining_time": "2:42:17", "throughput": 2336.64, "total_tokens": 54212816} {"current_steps": 28180, "total_steps": 40000, "loss": 0.0032, "lr": 1.0022488488769449e-05, "epoch": 4.597112325638307, "percentage": 70.45, "elapsed_time": "6:26:43", "remaining_time": "2:42:12", "throughput": 2336.83, "total_tokens": 54222032} {"current_steps": 28185, "total_steps": 40000, "loss": 0.1109, "lr": 1.0014629046542387e-05, "epoch": 4.597928052859124, "percentage": 70.46, "elapsed_time": "6:26:45", "remaining_time": "2:42:07", "throughput": 2337.04, "total_tokens": 54231872} {"current_steps": 28190, "total_steps": 40000, "loss": 0.0089, "lr": 1.0006771915247842e-05, "epoch": 4.598743780079941, "percentage": 70.47, "elapsed_time": "6:26:47", "remaining_time": "2:42:02", "throughput": 2337.24, "total_tokens": 54241296} {"current_steps": 28195, "total_steps": 40000, "loss": 0.0011, "lr": 9.998917096097495e-06, "epoch": 4.599559507300759, "percentage": 70.49, "elapsed_time": "6:26:49", "remaining_time": "2:41:57", "throughput": 2337.45, "total_tokens": 54251024} {"current_steps": 28200, "total_steps": 40000, "loss": 0.1026, "lr": 9.991064590302638e-06, "epoch": 4.600375234521576, "percentage": 70.5, "elapsed_time": "6:26:51", "remaining_time": "2:41:52", "throughput": 2337.66, "total_tokens": 54260848} {"current_steps": 28200, "total_steps": 40000, "eval_loss": 0.2872140109539032, "epoch": 4.600375234521576, "percentage": 70.5, "elapsed_time": "6:28:12", "remaining_time": "2:42:26", "throughput": 2329.56, "total_tokens": 54260848} {"current_steps": 28205, "total_steps": 40000, "loss": 0.1035, "lr": 9.983214399074241e-06, "epoch": 4.601190961742393, "percentage": 70.51, "elapsed_time": "6:28:16", "remaining_time": "2:42:22", "throughput": 2329.59, "total_tokens": 54270624} {"current_steps": 28210, "total_steps": 40000, "loss": 0.0022, "lr": 9.975366523622893e-06, "epoch": 4.602006688963211, "percentage": 70.53, "elapsed_time": "6:28:18", "remaining_time": "2:42:17", "throughput": 2329.79, "total_tokens": 54280048} {"current_steps": 28215, "total_steps": 40000, "loss": 0.0637, "lr": 9.967520965158841e-06, "epoch": 4.602822416184028, "percentage": 70.54, "elapsed_time": "6:28:20", "remaining_time": "2:42:12", "throughput": 2330.02, "total_tokens": 54290096} {"current_steps": 28220, "total_steps": 40000, "loss": 0.0862, "lr": 9.95967772489197e-06, "epoch": 4.6036381434048455, "percentage": 70.55, "elapsed_time": "6:28:22", "remaining_time": "2:42:07", "throughput": 2330.2, "total_tokens": 54299120} {"current_steps": 28225, "total_steps": 40000, "loss": 0.0004, "lr": 9.951836804031794e-06, "epoch": 4.604453870625663, "percentage": 70.56, "elapsed_time": "6:28:24", "remaining_time": "2:42:02", "throughput": 2330.45, "total_tokens": 54309840} {"current_steps": 28230, "total_steps": 40000, "loss": 0.0025, "lr": 9.943998203787489e-06, "epoch": 4.60526959784648, "percentage": 70.58, "elapsed_time": "6:28:26", "remaining_time": "2:41:57", "throughput": 2330.64, "total_tokens": 54319200} {"current_steps": 28235, "total_steps": 40000, "loss": 0.0268, "lr": 9.936161925367874e-06, "epoch": 4.606085325067298, "percentage": 70.59, "elapsed_time": "6:28:28", "remaining_time": "2:41:52", "throughput": 2330.79, "total_tokens": 54327520} {"current_steps": 28240, "total_steps": 40000, "loss": 0.0009, "lr": 9.928327969981386e-06, "epoch": 4.606901052288115, "percentage": 70.6, "elapsed_time": "6:28:30", "remaining_time": "2:41:47", "throughput": 2331.02, "total_tokens": 54337792} {"current_steps": 28245, "total_steps": 40000, "loss": 0.006, "lr": 9.920496338836135e-06, "epoch": 4.607716779508932, "percentage": 70.61, "elapsed_time": "6:28:32", "remaining_time": "2:41:42", "throughput": 2331.26, "total_tokens": 54348080} {"current_steps": 28250, "total_steps": 40000, "loss": 0.0003, "lr": 9.912667033139844e-06, "epoch": 4.60853250672975, "percentage": 70.62, "elapsed_time": "6:28:34", "remaining_time": "2:41:37", "throughput": 2331.46, "total_tokens": 54357680} {"current_steps": 28255, "total_steps": 40000, "loss": 0.0005, "lr": 9.904840054099893e-06, "epoch": 4.609348233950567, "percentage": 70.64, "elapsed_time": "6:28:36", "remaining_time": "2:41:32", "throughput": 2331.69, "total_tokens": 54367744} {"current_steps": 28260, "total_steps": 40000, "loss": 0.0381, "lr": 9.897015402923312e-06, "epoch": 4.610163961171384, "percentage": 70.65, "elapsed_time": "6:28:38", "remaining_time": "2:41:27", "throughput": 2331.85, "total_tokens": 54376320} {"current_steps": 28265, "total_steps": 40000, "loss": 0.1276, "lr": 9.889193080816744e-06, "epoch": 4.610979688392201, "percentage": 70.66, "elapsed_time": "6:28:41", "remaining_time": "2:41:22", "throughput": 2332.04, "total_tokens": 54385648} {"current_steps": 28270, "total_steps": 40000, "loss": 0.0004, "lr": 9.881373088986498e-06, "epoch": 4.6117954156130185, "percentage": 70.67, "elapsed_time": "6:28:43", "remaining_time": "2:41:17", "throughput": 2332.2, "total_tokens": 54394256} {"current_steps": 28275, "total_steps": 40000, "loss": 0.093, "lr": 9.873555428638523e-06, "epoch": 4.6126111428338366, "percentage": 70.69, "elapsed_time": "6:28:45", "remaining_time": "2:41:12", "throughput": 2332.38, "total_tokens": 54403248} {"current_steps": 28280, "total_steps": 40000, "loss": 0.0008, "lr": 9.865740100978383e-06, "epoch": 4.613426870054654, "percentage": 70.7, "elapsed_time": "6:28:47", "remaining_time": "2:41:07", "throughput": 2332.6, "total_tokens": 54413248} {"current_steps": 28285, "total_steps": 40000, "loss": 0.0008, "lr": 9.857927107211315e-06, "epoch": 4.614242597275471, "percentage": 70.71, "elapsed_time": "6:28:49", "remaining_time": "2:41:02", "throughput": 2332.86, "total_tokens": 54424160} {"current_steps": 28290, "total_steps": 40000, "loss": 0.0009, "lr": 9.850116448542177e-06, "epoch": 4.615058324496289, "percentage": 70.73, "elapsed_time": "6:28:51", "remaining_time": "2:40:57", "throughput": 2333.05, "total_tokens": 54433344} {"current_steps": 28295, "total_steps": 40000, "loss": 0.0004, "lr": 9.842308126175457e-06, "epoch": 4.615874051717106, "percentage": 70.74, "elapsed_time": "6:28:53", "remaining_time": "2:40:52", "throughput": 2333.23, "total_tokens": 54442320} {"current_steps": 28300, "total_steps": 40000, "loss": 0.0001, "lr": 9.834502141315315e-06, "epoch": 4.616689778937923, "percentage": 70.75, "elapsed_time": "6:28:55", "remaining_time": "2:40:47", "throughput": 2333.4, "total_tokens": 54451360} {"current_steps": 28305, "total_steps": 40000, "loss": 0.0003, "lr": 9.82669849516552e-06, "epoch": 4.61750550615874, "percentage": 70.76, "elapsed_time": "6:28:57", "remaining_time": "2:40:42", "throughput": 2333.62, "total_tokens": 54461216} {"current_steps": 28310, "total_steps": 40000, "loss": 0.0339, "lr": 9.818897188929493e-06, "epoch": 4.618321233379558, "percentage": 70.78, "elapsed_time": "6:28:59", "remaining_time": "2:40:37", "throughput": 2333.87, "total_tokens": 54471824} {"current_steps": 28315, "total_steps": 40000, "loss": 0.0414, "lr": 9.811098223810309e-06, "epoch": 4.619136960600375, "percentage": 70.79, "elapsed_time": "6:29:01", "remaining_time": "2:40:32", "throughput": 2334.06, "total_tokens": 54481120} {"current_steps": 28320, "total_steps": 40000, "loss": 0.0181, "lr": 9.803301601010641e-06, "epoch": 4.6199526878211925, "percentage": 70.8, "elapsed_time": "6:29:03", "remaining_time": "2:40:27", "throughput": 2334.3, "total_tokens": 54491712} {"current_steps": 28325, "total_steps": 40000, "loss": 0.0007, "lr": 9.795507321732853e-06, "epoch": 4.62076841504201, "percentage": 70.81, "elapsed_time": "6:29:05", "remaining_time": "2:40:22", "throughput": 2334.52, "total_tokens": 54501488} {"current_steps": 28330, "total_steps": 40000, "loss": 0.0703, "lr": 9.787715387178898e-06, "epoch": 4.621584142262828, "percentage": 70.83, "elapsed_time": "6:29:08", "remaining_time": "2:40:17", "throughput": 2334.78, "total_tokens": 54512544} {"current_steps": 28335, "total_steps": 40000, "loss": 0.0012, "lr": 9.779925798550399e-06, "epoch": 4.622399869483645, "percentage": 70.84, "elapsed_time": "6:29:10", "remaining_time": "2:40:12", "throughput": 2334.99, "total_tokens": 54522240} {"current_steps": 28340, "total_steps": 40000, "loss": 0.0006, "lr": 9.772138557048619e-06, "epoch": 4.623215596704462, "percentage": 70.85, "elapsed_time": "6:29:12", "remaining_time": "2:40:07", "throughput": 2335.2, "total_tokens": 54531872} {"current_steps": 28345, "total_steps": 40000, "loss": 0.0007, "lr": 9.764353663874426e-06, "epoch": 4.624031323925279, "percentage": 70.86, "elapsed_time": "6:29:14", "remaining_time": "2:40:02", "throughput": 2335.39, "total_tokens": 54541264} {"current_steps": 28350, "total_steps": 40000, "loss": 0.001, "lr": 9.756571120228375e-06, "epoch": 4.624847051146097, "percentage": 70.88, "elapsed_time": "6:29:16", "remaining_time": "2:39:57", "throughput": 2335.55, "total_tokens": 54549920} {"current_steps": 28355, "total_steps": 40000, "loss": 0.0013, "lr": 9.748790927310605e-06, "epoch": 4.625662778366914, "percentage": 70.89, "elapsed_time": "6:29:18", "remaining_time": "2:39:52", "throughput": 2335.73, "total_tokens": 54558832} {"current_steps": 28360, "total_steps": 40000, "loss": 0.1047, "lr": 9.741013086320946e-06, "epoch": 4.626478505587731, "percentage": 70.9, "elapsed_time": "6:29:20", "remaining_time": "2:39:48", "throughput": 2335.95, "total_tokens": 54568864} {"current_steps": 28365, "total_steps": 40000, "loss": 0.0006, "lr": 9.733237598458821e-06, "epoch": 4.6272942328085485, "percentage": 70.91, "elapsed_time": "6:29:22", "remaining_time": "2:39:43", "throughput": 2336.14, "total_tokens": 54578144} {"current_steps": 28370, "total_steps": 40000, "loss": 0.0003, "lr": 9.725464464923308e-06, "epoch": 4.6281099600293665, "percentage": 70.93, "elapsed_time": "6:29:24", "remaining_time": "2:39:38", "throughput": 2336.37, "total_tokens": 54588400} {"current_steps": 28375, "total_steps": 40000, "loss": 0.0716, "lr": 9.717693686913123e-06, "epoch": 4.628925687250184, "percentage": 70.94, "elapsed_time": "6:29:26", "remaining_time": "2:39:33", "throughput": 2336.59, "total_tokens": 54598464} {"current_steps": 28380, "total_steps": 40000, "loss": 0.0006, "lr": 9.709925265626632e-06, "epoch": 4.629741414471001, "percentage": 70.95, "elapsed_time": "6:29:28", "remaining_time": "2:39:28", "throughput": 2336.82, "total_tokens": 54608528} {"current_steps": 28385, "total_steps": 40000, "loss": 0.0574, "lr": 9.702159202261801e-06, "epoch": 4.630557141691818, "percentage": 70.96, "elapsed_time": "6:29:30", "remaining_time": "2:39:23", "throughput": 2337.02, "total_tokens": 54618032} {"current_steps": 28390, "total_steps": 40000, "loss": 0.0003, "lr": 9.694395498016268e-06, "epoch": 4.631372868912636, "percentage": 70.97, "elapsed_time": "6:29:32", "remaining_time": "2:39:18", "throughput": 2337.26, "total_tokens": 54628608} {"current_steps": 28395, "total_steps": 40000, "loss": 0.0005, "lr": 9.686634154087298e-06, "epoch": 4.632188596133453, "percentage": 70.99, "elapsed_time": "6:29:34", "remaining_time": "2:39:13", "throughput": 2337.48, "total_tokens": 54638432} {"current_steps": 28400, "total_steps": 40000, "loss": 0.1132, "lr": 9.678875171671776e-06, "epoch": 4.63300432335427, "percentage": 71.0, "elapsed_time": "6:29:37", "remaining_time": "2:39:08", "throughput": 2337.67, "total_tokens": 54647840} {"current_steps": 28400, "total_steps": 40000, "eval_loss": 0.30900079011917114, "epoch": 4.63300432335427, "percentage": 71.0, "elapsed_time": "6:30:58", "remaining_time": "2:39:41", "throughput": 2329.6, "total_tokens": 54647840} {"current_steps": 28405, "total_steps": 40000, "loss": 0.0005, "lr": 9.671118551966246e-06, "epoch": 4.633820050575087, "percentage": 71.01, "elapsed_time": "6:31:01", "remaining_time": "2:39:37", "throughput": 2329.61, "total_tokens": 54656816} {"current_steps": 28410, "total_steps": 40000, "loss": 0.059, "lr": 9.66336429616686e-06, "epoch": 4.634635777795905, "percentage": 71.03, "elapsed_time": "6:31:03", "remaining_time": "2:39:32", "throughput": 2329.79, "total_tokens": 54666112} {"current_steps": 28415, "total_steps": 40000, "loss": 0.0007, "lr": 9.655612405469436e-06, "epoch": 4.635451505016722, "percentage": 71.04, "elapsed_time": "6:31:06", "remaining_time": "2:39:27", "throughput": 2329.99, "total_tokens": 54675440} {"current_steps": 28420, "total_steps": 40000, "loss": 0.0005, "lr": 9.647862881069413e-06, "epoch": 4.63626723223754, "percentage": 71.05, "elapsed_time": "6:31:08", "remaining_time": "2:39:22", "throughput": 2330.2, "total_tokens": 54685392} {"current_steps": 28425, "total_steps": 40000, "loss": 0.0006, "lr": 9.640115724161855e-06, "epoch": 4.637082959458358, "percentage": 71.06, "elapsed_time": "6:31:10", "remaining_time": "2:39:17", "throughput": 2330.46, "total_tokens": 54696320} {"current_steps": 28430, "total_steps": 40000, "loss": 0.0006, "lr": 9.632370935941483e-06, "epoch": 4.637898686679175, "percentage": 71.08, "elapsed_time": "6:31:12", "remaining_time": "2:39:12", "throughput": 2330.67, "total_tokens": 54705904} {"current_steps": 28435, "total_steps": 40000, "loss": 0.0003, "lr": 9.624628517602634e-06, "epoch": 4.638714413899992, "percentage": 71.09, "elapsed_time": "6:31:14", "remaining_time": "2:39:07", "throughput": 2330.89, "total_tokens": 54716016} {"current_steps": 28440, "total_steps": 40000, "loss": 0.0024, "lr": 9.61688847033928e-06, "epoch": 4.639530141120809, "percentage": 71.1, "elapsed_time": "6:31:16", "remaining_time": "2:39:02", "throughput": 2331.11, "total_tokens": 54725968} {"current_steps": 28445, "total_steps": 40000, "loss": 0.1235, "lr": 9.609150795345051e-06, "epoch": 4.640345868341626, "percentage": 71.11, "elapsed_time": "6:31:18", "remaining_time": "2:38:57", "throughput": 2331.29, "total_tokens": 54735152} {"current_steps": 28450, "total_steps": 40000, "loss": 0.0004, "lr": 9.601415493813171e-06, "epoch": 4.641161595562444, "percentage": 71.12, "elapsed_time": "6:31:20", "remaining_time": "2:38:52", "throughput": 2331.47, "total_tokens": 54744192} {"current_steps": 28455, "total_steps": 40000, "loss": 0.0928, "lr": 9.593682566936533e-06, "epoch": 4.641977322783261, "percentage": 71.14, "elapsed_time": "6:31:22", "remaining_time": "2:38:47", "throughput": 2331.67, "total_tokens": 54753728} {"current_steps": 28460, "total_steps": 40000, "loss": 0.1213, "lr": 9.58595201590766e-06, "epoch": 4.642793050004078, "percentage": 71.15, "elapsed_time": "6:31:24", "remaining_time": "2:38:42", "throughput": 2331.81, "total_tokens": 54761808} {"current_steps": 28465, "total_steps": 40000, "loss": 0.0893, "lr": 9.578223841918681e-06, "epoch": 4.643608777224896, "percentage": 71.16, "elapsed_time": "6:31:26", "remaining_time": "2:38:37", "throughput": 2332.03, "total_tokens": 54771936} {"current_steps": 28470, "total_steps": 40000, "loss": 0.1654, "lr": 9.570498046161389e-06, "epoch": 4.6444245044457135, "percentage": 71.17, "elapsed_time": "6:31:28", "remaining_time": "2:38:32", "throughput": 2332.26, "total_tokens": 54782032} {"current_steps": 28475, "total_steps": 40000, "loss": 0.0021, "lr": 9.562774629827206e-06, "epoch": 4.645240231666531, "percentage": 71.19, "elapsed_time": "6:31:30", "remaining_time": "2:38:27", "throughput": 2332.48, "total_tokens": 54792112} {"current_steps": 28480, "total_steps": 40000, "loss": 0.0817, "lr": 9.555053594107163e-06, "epoch": 4.646055958887348, "percentage": 71.2, "elapsed_time": "6:31:32", "remaining_time": "2:38:22", "throughput": 2332.7, "total_tokens": 54802160} {"current_steps": 28485, "total_steps": 40000, "loss": 0.1712, "lr": 9.547334940191957e-06, "epoch": 4.646871686108166, "percentage": 71.21, "elapsed_time": "6:31:35", "remaining_time": "2:38:17", "throughput": 2332.85, "total_tokens": 54810352} {"current_steps": 28490, "total_steps": 40000, "loss": 0.1437, "lr": 9.539618669271886e-06, "epoch": 4.647687413328983, "percentage": 71.23, "elapsed_time": "6:31:37", "remaining_time": "2:38:12", "throughput": 2333.03, "total_tokens": 54819616} {"current_steps": 28495, "total_steps": 40000, "loss": 0.0006, "lr": 9.531904782536904e-06, "epoch": 4.6485031405498, "percentage": 71.24, "elapsed_time": "6:31:39", "remaining_time": "2:38:07", "throughput": 2333.25, "total_tokens": 54829600} {"current_steps": 28500, "total_steps": 40000, "loss": 0.0581, "lr": 9.524193281176597e-06, "epoch": 4.649318867770617, "percentage": 71.25, "elapsed_time": "6:31:41", "remaining_time": "2:38:02", "throughput": 2333.48, "total_tokens": 54839872} {"current_steps": 28505, "total_steps": 40000, "loss": 0.0794, "lr": 9.516484166380165e-06, "epoch": 4.650134594991435, "percentage": 71.26, "elapsed_time": "6:31:43", "remaining_time": "2:37:58", "throughput": 2333.69, "total_tokens": 54849568} {"current_steps": 28510, "total_steps": 40000, "loss": 0.0005, "lr": 9.508777439336447e-06, "epoch": 4.650950322212252, "percentage": 71.28, "elapsed_time": "6:31:45", "remaining_time": "2:37:53", "throughput": 2333.94, "total_tokens": 54860304} {"current_steps": 28515, "total_steps": 40000, "loss": 0.0006, "lr": 9.50107310123393e-06, "epoch": 4.6517660494330695, "percentage": 71.29, "elapsed_time": "6:31:47", "remaining_time": "2:37:48", "throughput": 2334.17, "total_tokens": 54870512} {"current_steps": 28520, "total_steps": 40000, "loss": 0.0012, "lr": 9.493371153260702e-06, "epoch": 4.652581776653887, "percentage": 71.3, "elapsed_time": "6:31:49", "remaining_time": "2:37:43", "throughput": 2334.36, "total_tokens": 54879856} {"current_steps": 28525, "total_steps": 40000, "loss": 0.0923, "lr": 9.485671596604523e-06, "epoch": 4.653397503874705, "percentage": 71.31, "elapsed_time": "6:31:51", "remaining_time": "2:37:38", "throughput": 2334.53, "total_tokens": 54888720} {"current_steps": 28530, "total_steps": 40000, "loss": 0.066, "lr": 9.477974432452738e-06, "epoch": 4.654213231095522, "percentage": 71.33, "elapsed_time": "6:31:53", "remaining_time": "2:37:33", "throughput": 2334.73, "total_tokens": 54898352} {"current_steps": 28535, "total_steps": 40000, "loss": 0.0055, "lr": 9.470279661992356e-06, "epoch": 4.655028958316339, "percentage": 71.34, "elapsed_time": "6:31:55", "remaining_time": "2:37:28", "throughput": 2334.95, "total_tokens": 54908288} {"current_steps": 28540, "total_steps": 40000, "loss": 0.0007, "lr": 9.462587286410021e-06, "epoch": 4.655844685537156, "percentage": 71.35, "elapsed_time": "6:31:57", "remaining_time": "2:37:23", "throughput": 2335.15, "total_tokens": 54917840} {"current_steps": 28545, "total_steps": 40000, "loss": 0.0004, "lr": 9.454897306891972e-06, "epoch": 4.656660412757974, "percentage": 71.36, "elapsed_time": "6:32:00", "remaining_time": "2:37:18", "throughput": 2335.37, "total_tokens": 54927968} {"current_steps": 28550, "total_steps": 40000, "loss": 0.1412, "lr": 9.44720972462411e-06, "epoch": 4.657476139978791, "percentage": 71.38, "elapsed_time": "6:32:02", "remaining_time": "2:37:13", "throughput": 2335.59, "total_tokens": 54938096} {"current_steps": 28555, "total_steps": 40000, "loss": 0.2182, "lr": 9.439524540791964e-06, "epoch": 4.658291867199608, "percentage": 71.39, "elapsed_time": "6:32:04", "remaining_time": "2:37:08", "throughput": 2335.82, "total_tokens": 54948256} {"current_steps": 28560, "total_steps": 40000, "loss": 0.0477, "lr": 9.431841756580673e-06, "epoch": 4.659107594420425, "percentage": 71.4, "elapsed_time": "6:32:06", "remaining_time": "2:37:03", "throughput": 2336.02, "total_tokens": 54957920} {"current_steps": 28565, "total_steps": 40000, "loss": 0.0577, "lr": 9.42416137317503e-06, "epoch": 4.6599233216412435, "percentage": 71.41, "elapsed_time": "6:32:08", "remaining_time": "2:36:58", "throughput": 2336.22, "total_tokens": 54967296} {"current_steps": 28570, "total_steps": 40000, "loss": 0.0013, "lr": 9.416483391759437e-06, "epoch": 4.660739048862061, "percentage": 71.43, "elapsed_time": "6:32:10", "remaining_time": "2:36:53", "throughput": 2336.44, "total_tokens": 54977376} {"current_steps": 28575, "total_steps": 40000, "loss": 0.0007, "lr": 9.408807813517945e-06, "epoch": 4.661554776082878, "percentage": 71.44, "elapsed_time": "6:32:12", "remaining_time": "2:36:48", "throughput": 2336.63, "total_tokens": 54986688} {"current_steps": 28580, "total_steps": 40000, "loss": 0.1211, "lr": 9.401134639634221e-06, "epoch": 4.662370503303695, "percentage": 71.45, "elapsed_time": "6:32:14", "remaining_time": "2:36:43", "throughput": 2336.82, "total_tokens": 54996000} {"current_steps": 28585, "total_steps": 40000, "loss": 0.0751, "lr": 9.393463871291555e-06, "epoch": 4.663186230524513, "percentage": 71.46, "elapsed_time": "6:32:16", "remaining_time": "2:36:39", "throughput": 2337.04, "total_tokens": 55006080} {"current_steps": 28590, "total_steps": 40000, "loss": 0.0006, "lr": 9.385795509672881e-06, "epoch": 4.66400195774533, "percentage": 71.47, "elapsed_time": "6:32:18", "remaining_time": "2:36:34", "throughput": 2337.23, "total_tokens": 55015440} {"current_steps": 28595, "total_steps": 40000, "loss": 0.0012, "lr": 9.378129555960771e-06, "epoch": 4.664817684966147, "percentage": 71.49, "elapsed_time": "6:32:20", "remaining_time": "2:36:29", "throughput": 2337.45, "total_tokens": 55025440} {"current_steps": 28600, "total_steps": 40000, "loss": 0.0013, "lr": 9.370466011337392e-06, "epoch": 4.665633412186965, "percentage": 71.5, "elapsed_time": "6:32:22", "remaining_time": "2:36:24", "throughput": 2337.67, "total_tokens": 55035376} {"current_steps": 28600, "total_steps": 40000, "eval_loss": 0.259128212928772, "epoch": 4.665633412186965, "percentage": 71.5, "elapsed_time": "6:33:43", "remaining_time": "2:36:56", "throughput": 2329.68, "total_tokens": 55035376} {"current_steps": 28605, "total_steps": 40000, "loss": 0.0008, "lr": 9.362804876984573e-06, "epoch": 4.666449139407782, "percentage": 71.51, "elapsed_time": "6:33:47", "remaining_time": "2:36:52", "throughput": 2329.75, "total_tokens": 55046064} {"current_steps": 28610, "total_steps": 40000, "loss": 0.0474, "lr": 9.355146154083747e-06, "epoch": 4.667264866628599, "percentage": 71.53, "elapsed_time": "6:33:49", "remaining_time": "2:36:47", "throughput": 2329.98, "total_tokens": 55056288} {"current_steps": 28615, "total_steps": 40000, "loss": 0.06, "lr": 9.347489843815987e-06, "epoch": 4.6680805938494165, "percentage": 71.54, "elapsed_time": "6:33:51", "remaining_time": "2:36:42", "throughput": 2330.18, "total_tokens": 55066032} {"current_steps": 28620, "total_steps": 40000, "loss": 0.0016, "lr": 9.339835947362002e-06, "epoch": 4.668896321070234, "percentage": 71.55, "elapsed_time": "6:33:53", "remaining_time": "2:36:37", "throughput": 2330.36, "total_tokens": 55075088} {"current_steps": 28625, "total_steps": 40000, "loss": 0.0492, "lr": 9.332184465902105e-06, "epoch": 4.669712048291052, "percentage": 71.56, "elapsed_time": "6:33:55", "remaining_time": "2:36:32", "throughput": 2330.57, "total_tokens": 55084896} {"current_steps": 28630, "total_steps": 40000, "loss": 0.0007, "lr": 9.324535400616266e-06, "epoch": 4.670527775511869, "percentage": 71.58, "elapsed_time": "6:33:57", "remaining_time": "2:36:27", "throughput": 2330.8, "total_tokens": 55095040} {"current_steps": 28635, "total_steps": 40000, "loss": 0.0009, "lr": 9.31688875268405e-06, "epoch": 4.671343502732686, "percentage": 71.59, "elapsed_time": "6:33:59", "remaining_time": "2:36:22", "throughput": 2331.0, "total_tokens": 55104736} {"current_steps": 28640, "total_steps": 40000, "loss": 0.0015, "lr": 9.309244523284674e-06, "epoch": 4.672159229953504, "percentage": 71.6, "elapsed_time": "6:34:01", "remaining_time": "2:36:17", "throughput": 2331.24, "total_tokens": 55115184} {"current_steps": 28645, "total_steps": 40000, "loss": 0.0518, "lr": 9.301602713596982e-06, "epoch": 4.672974957174321, "percentage": 71.61, "elapsed_time": "6:34:04", "remaining_time": "2:36:12", "throughput": 2331.41, "total_tokens": 55124112} {"current_steps": 28650, "total_steps": 40000, "loss": 0.0376, "lr": 9.293963324799432e-06, "epoch": 4.673790684395138, "percentage": 71.62, "elapsed_time": "6:34:06", "remaining_time": "2:36:07", "throughput": 2331.61, "total_tokens": 55133568} {"current_steps": 28655, "total_steps": 40000, "loss": 0.0004, "lr": 9.286326358070104e-06, "epoch": 4.674606411615955, "percentage": 71.64, "elapsed_time": "6:34:08", "remaining_time": "2:36:02", "throughput": 2331.8, "total_tokens": 55142992} {"current_steps": 28660, "total_steps": 40000, "loss": 0.0694, "lr": 9.278691814586729e-06, "epoch": 4.6754221388367725, "percentage": 71.65, "elapsed_time": "6:34:10", "remaining_time": "2:35:57", "throughput": 2332.0, "total_tokens": 55152400} {"current_steps": 28665, "total_steps": 40000, "loss": 0.0106, "lr": 9.271059695526635e-06, "epoch": 4.6762378660575905, "percentage": 71.66, "elapsed_time": "6:34:12", "remaining_time": "2:35:52", "throughput": 2332.2, "total_tokens": 55162080} {"current_steps": 28670, "total_steps": 40000, "loss": 0.0005, "lr": 9.263430002066805e-06, "epoch": 4.677053593278408, "percentage": 71.67, "elapsed_time": "6:34:14", "remaining_time": "2:35:47", "throughput": 2332.38, "total_tokens": 55171104} {"current_steps": 28675, "total_steps": 40000, "loss": 0.0004, "lr": 9.25580273538382e-06, "epoch": 4.677869320499225, "percentage": 71.69, "elapsed_time": "6:34:16", "remaining_time": "2:35:42", "throughput": 2332.57, "total_tokens": 55180544} {"current_steps": 28680, "total_steps": 40000, "loss": 0.001, "lr": 9.248177896653907e-06, "epoch": 4.678685047720043, "percentage": 71.7, "elapsed_time": "6:34:18", "remaining_time": "2:35:38", "throughput": 2332.77, "total_tokens": 55190032} {"current_steps": 28685, "total_steps": 40000, "loss": 0.2217, "lr": 9.240555487052918e-06, "epoch": 4.67950077494086, "percentage": 71.71, "elapsed_time": "6:34:20", "remaining_time": "2:35:33", "throughput": 2332.95, "total_tokens": 55199024} {"current_steps": 28690, "total_steps": 40000, "loss": 0.1517, "lr": 9.232935507756313e-06, "epoch": 4.680316502161677, "percentage": 71.73, "elapsed_time": "6:34:22", "remaining_time": "2:35:28", "throughput": 2333.16, "total_tokens": 55208944} {"current_steps": 28695, "total_steps": 40000, "loss": 0.1509, "lr": 9.225317959939193e-06, "epoch": 4.681132229382494, "percentage": 71.74, "elapsed_time": "6:34:24", "remaining_time": "2:35:23", "throughput": 2333.35, "total_tokens": 55218256} {"current_steps": 28700, "total_steps": 40000, "loss": 0.037, "lr": 9.217702844776287e-06, "epoch": 4.681947956603312, "percentage": 71.75, "elapsed_time": "6:34:26", "remaining_time": "2:35:18", "throughput": 2333.51, "total_tokens": 55226736} {"current_steps": 28705, "total_steps": 40000, "loss": 0.0825, "lr": 9.210090163441929e-06, "epoch": 4.682763683824129, "percentage": 71.76, "elapsed_time": "6:34:28", "remaining_time": "2:35:13", "throughput": 2333.65, "total_tokens": 55234896} {"current_steps": 28710, "total_steps": 40000, "loss": 0.0013, "lr": 9.202479917110105e-06, "epoch": 4.6835794110449465, "percentage": 71.78, "elapsed_time": "6:34:31", "remaining_time": "2:35:08", "throughput": 2333.85, "total_tokens": 55244544} {"current_steps": 28715, "total_steps": 40000, "loss": 0.0039, "lr": 9.194872106954392e-06, "epoch": 4.684395138265764, "percentage": 71.79, "elapsed_time": "6:34:33", "remaining_time": "2:35:03", "throughput": 2334.09, "total_tokens": 55255136} {"current_steps": 28720, "total_steps": 40000, "loss": 0.0011, "lr": 9.187266734148029e-06, "epoch": 4.685210865486582, "percentage": 71.8, "elapsed_time": "6:34:35", "remaining_time": "2:34:58", "throughput": 2334.29, "total_tokens": 55264752} {"current_steps": 28725, "total_steps": 40000, "loss": 0.0495, "lr": 9.179663799863849e-06, "epoch": 4.686026592707399, "percentage": 71.81, "elapsed_time": "6:34:37", "remaining_time": "2:34:53", "throughput": 2334.5, "total_tokens": 55274464} {"current_steps": 28730, "total_steps": 40000, "loss": 0.0019, "lr": 9.172063305274317e-06, "epoch": 4.686842319928216, "percentage": 71.83, "elapsed_time": "6:34:39", "remaining_time": "2:34:48", "throughput": 2334.73, "total_tokens": 55284880} {"current_steps": 28735, "total_steps": 40000, "loss": 0.002, "lr": 9.164465251551527e-06, "epoch": 4.687658047149033, "percentage": 71.84, "elapsed_time": "6:34:41", "remaining_time": "2:34:43", "throughput": 2334.95, "total_tokens": 55294800} {"current_steps": 28740, "total_steps": 40000, "loss": 0.043, "lr": 9.156869639867205e-06, "epoch": 4.688473774369851, "percentage": 71.85, "elapsed_time": "6:34:43", "remaining_time": "2:34:38", "throughput": 2335.1, "total_tokens": 55303280} {"current_steps": 28745, "total_steps": 40000, "loss": 0.0005, "lr": 9.149276471392677e-06, "epoch": 4.689289501590668, "percentage": 71.86, "elapsed_time": "6:34:45", "remaining_time": "2:34:33", "throughput": 2335.29, "total_tokens": 55312624} {"current_steps": 28750, "total_steps": 40000, "loss": 0.001, "lr": 9.141685747298914e-06, "epoch": 4.690105228811485, "percentage": 71.88, "elapsed_time": "6:34:47", "remaining_time": "2:34:29", "throughput": 2335.52, "total_tokens": 55322784} {"current_steps": 28755, "total_steps": 40000, "loss": 0.0006, "lr": 9.13409746875649e-06, "epoch": 4.690920956032302, "percentage": 71.89, "elapsed_time": "6:34:49", "remaining_time": "2:34:24", "throughput": 2335.74, "total_tokens": 55332960} {"current_steps": 28760, "total_steps": 40000, "loss": 0.1309, "lr": 9.12651163693562e-06, "epoch": 4.69173668325312, "percentage": 71.9, "elapsed_time": "6:34:51", "remaining_time": "2:34:19", "throughput": 2335.94, "total_tokens": 55342512} {"current_steps": 28765, "total_steps": 40000, "loss": 0.0009, "lr": 9.11892825300614e-06, "epoch": 4.692552410473938, "percentage": 71.91, "elapsed_time": "6:34:53", "remaining_time": "2:34:14", "throughput": 2336.14, "total_tokens": 55352016} {"current_steps": 28770, "total_steps": 40000, "loss": 0.0006, "lr": 9.111347318137491e-06, "epoch": 4.693368137694755, "percentage": 71.92, "elapsed_time": "6:34:55", "remaining_time": "2:34:09", "throughput": 2336.36, "total_tokens": 55362016} {"current_steps": 28775, "total_steps": 40000, "loss": 0.0049, "lr": 9.103768833498755e-06, "epoch": 4.694183864915573, "percentage": 71.94, "elapsed_time": "6:34:57", "remaining_time": "2:34:04", "throughput": 2336.6, "total_tokens": 55372592} {"current_steps": 28780, "total_steps": 40000, "loss": 0.0007, "lr": 9.096192800258639e-06, "epoch": 4.69499959213639, "percentage": 71.95, "elapsed_time": "6:35:00", "remaining_time": "2:33:59", "throughput": 2336.79, "total_tokens": 55382080} {"current_steps": 28785, "total_steps": 40000, "loss": 0.0003, "lr": 9.088619219585443e-06, "epoch": 4.695815319357207, "percentage": 71.96, "elapsed_time": "6:35:02", "remaining_time": "2:33:54", "throughput": 2337.01, "total_tokens": 55392128} {"current_steps": 28790, "total_steps": 40000, "loss": 0.0575, "lr": 9.081048092647127e-06, "epoch": 4.696631046578024, "percentage": 71.97, "elapsed_time": "6:35:04", "remaining_time": "2:33:49", "throughput": 2337.23, "total_tokens": 55402176} {"current_steps": 28795, "total_steps": 40000, "loss": 0.0009, "lr": 9.073479420611245e-06, "epoch": 4.697446773798841, "percentage": 71.99, "elapsed_time": "6:35:06", "remaining_time": "2:33:44", "throughput": 2337.41, "total_tokens": 55411200} {"current_steps": 28800, "total_steps": 40000, "loss": 0.0017, "lr": 9.065913204644974e-06, "epoch": 4.698262501019659, "percentage": 72.0, "elapsed_time": "6:35:08", "remaining_time": "2:33:39", "throughput": 2337.63, "total_tokens": 55421296} {"current_steps": 28800, "total_steps": 40000, "eval_loss": 0.2716529071331024, "epoch": 4.698262501019659, "percentage": 72.0, "elapsed_time": "6:36:29", "remaining_time": "2:34:11", "throughput": 2329.69, "total_tokens": 55421296} {"current_steps": 28805, "total_steps": 40000, "loss": 0.0019, "lr": 9.058349445915135e-06, "epoch": 4.699078228240476, "percentage": 72.01, "elapsed_time": "6:36:32", "remaining_time": "2:34:07", "throughput": 2329.71, "total_tokens": 55430512} {"current_steps": 28810, "total_steps": 40000, "loss": 0.0007, "lr": 9.050788145588138e-06, "epoch": 4.6998939554612935, "percentage": 72.02, "elapsed_time": "6:36:34", "remaining_time": "2:34:02", "throughput": 2329.94, "total_tokens": 55440848} {"current_steps": 28815, "total_steps": 40000, "loss": 0.0006, "lr": 9.043229304830039e-06, "epoch": 4.7007096826821115, "percentage": 72.04, "elapsed_time": "6:36:37", "remaining_time": "2:33:57", "throughput": 2330.16, "total_tokens": 55450880} {"current_steps": 28820, "total_steps": 40000, "loss": 0.0008, "lr": 9.035672924806515e-06, "epoch": 4.701525409902929, "percentage": 72.05, "elapsed_time": "6:36:39", "remaining_time": "2:33:52", "throughput": 2330.38, "total_tokens": 55460880} {"current_steps": 28825, "total_steps": 40000, "loss": 0.0165, "lr": 9.028119006682839e-06, "epoch": 4.702341137123746, "percentage": 72.06, "elapsed_time": "6:36:41", "remaining_time": "2:33:47", "throughput": 2330.56, "total_tokens": 55469952} {"current_steps": 28830, "total_steps": 40000, "loss": 0.0542, "lr": 9.020567551623935e-06, "epoch": 4.703156864344563, "percentage": 72.08, "elapsed_time": "6:36:43", "remaining_time": "2:33:42", "throughput": 2330.79, "total_tokens": 55480256} {"current_steps": 28835, "total_steps": 40000, "loss": 0.0004, "lr": 9.013018560794318e-06, "epoch": 4.70397259156538, "percentage": 72.09, "elapsed_time": "6:36:45", "remaining_time": "2:33:37", "throughput": 2330.97, "total_tokens": 55489584} {"current_steps": 28840, "total_steps": 40000, "loss": 0.1458, "lr": 9.005472035358139e-06, "epoch": 4.704788318786198, "percentage": 72.1, "elapsed_time": "6:36:47", "remaining_time": "2:33:32", "throughput": 2331.17, "total_tokens": 55499056} {"current_steps": 28845, "total_steps": 40000, "loss": 0.0006, "lr": 8.997927976479185e-06, "epoch": 4.705604046007015, "percentage": 72.11, "elapsed_time": "6:36:49", "remaining_time": "2:33:27", "throughput": 2331.39, "total_tokens": 55509264} {"current_steps": 28850, "total_steps": 40000, "loss": 0.0008, "lr": 8.99038638532082e-06, "epoch": 4.706419773227832, "percentage": 72.12, "elapsed_time": "6:36:51", "remaining_time": "2:33:22", "throughput": 2331.59, "total_tokens": 55518816} {"current_steps": 28855, "total_steps": 40000, "loss": 0.0007, "lr": 8.982847263046065e-06, "epoch": 4.70723550044865, "percentage": 72.14, "elapsed_time": "6:36:53", "remaining_time": "2:33:17", "throughput": 2331.82, "total_tokens": 55529024} {"current_steps": 28860, "total_steps": 40000, "loss": 0.0008, "lr": 8.975310610817555e-06, "epoch": 4.7080512276694675, "percentage": 72.15, "elapsed_time": "6:36:55", "remaining_time": "2:33:12", "throughput": 2332.01, "total_tokens": 55538352} {"current_steps": 28865, "total_steps": 40000, "loss": 0.0567, "lr": 8.967776429797528e-06, "epoch": 4.708866954890285, "percentage": 72.16, "elapsed_time": "6:36:57", "remaining_time": "2:33:07", "throughput": 2332.21, "total_tokens": 55548016} {"current_steps": 28870, "total_steps": 40000, "loss": 0.0009, "lr": 8.960244721147842e-06, "epoch": 4.709682682111102, "percentage": 72.17, "elapsed_time": "6:36:59", "remaining_time": "2:33:03", "throughput": 2332.48, "total_tokens": 55559376} {"current_steps": 28875, "total_steps": 40000, "loss": 0.0699, "lr": 8.952715486029995e-06, "epoch": 4.71049840933192, "percentage": 72.19, "elapsed_time": "6:37:01", "remaining_time": "2:32:58", "throughput": 2332.68, "total_tokens": 55568864} {"current_steps": 28880, "total_steps": 40000, "loss": 0.1046, "lr": 8.945188725605075e-06, "epoch": 4.711314136552737, "percentage": 72.2, "elapsed_time": "6:37:03", "remaining_time": "2:32:53", "throughput": 2332.92, "total_tokens": 55579360} {"current_steps": 28885, "total_steps": 40000, "loss": 0.0006, "lr": 8.937664441033817e-06, "epoch": 4.712129863773554, "percentage": 72.21, "elapsed_time": "6:37:06", "remaining_time": "2:32:48", "throughput": 2333.08, "total_tokens": 55588000} {"current_steps": 28890, "total_steps": 40000, "loss": 0.0004, "lr": 8.930142633476549e-06, "epoch": 4.712945590994371, "percentage": 72.22, "elapsed_time": "6:37:08", "remaining_time": "2:32:43", "throughput": 2333.23, "total_tokens": 55596576} {"current_steps": 28895, "total_steps": 40000, "loss": 0.0966, "lr": 8.92262330409323e-06, "epoch": 4.713761318215189, "percentage": 72.24, "elapsed_time": "6:37:10", "remaining_time": "2:32:38", "throughput": 2333.42, "total_tokens": 55605856} {"current_steps": 28900, "total_steps": 40000, "loss": 0.0359, "lr": 8.915106454043448e-06, "epoch": 4.714577045436006, "percentage": 72.25, "elapsed_time": "6:37:12", "remaining_time": "2:32:33", "throughput": 2333.61, "total_tokens": 55615168} {"current_steps": 28905, "total_steps": 40000, "loss": 0.0007, "lr": 8.90759208448638e-06, "epoch": 4.715392772656823, "percentage": 72.26, "elapsed_time": "6:37:14", "remaining_time": "2:32:28", "throughput": 2333.81, "total_tokens": 55624816} {"current_steps": 28910, "total_steps": 40000, "loss": 0.0003, "lr": 8.900080196580848e-06, "epoch": 4.716208499877641, "percentage": 72.28, "elapsed_time": "6:37:16", "remaining_time": "2:32:23", "throughput": 2333.99, "total_tokens": 55633920} {"current_steps": 28915, "total_steps": 40000, "loss": 0.0009, "lr": 8.892570791485267e-06, "epoch": 4.717024227098459, "percentage": 72.29, "elapsed_time": "6:37:18", "remaining_time": "2:32:18", "throughput": 2334.25, "total_tokens": 55644912} {"current_steps": 28920, "total_steps": 40000, "loss": 0.0007, "lr": 8.885063870357688e-06, "epoch": 4.717839954319276, "percentage": 72.3, "elapsed_time": "6:37:20", "remaining_time": "2:32:13", "throughput": 2334.43, "total_tokens": 55653952} {"current_steps": 28925, "total_steps": 40000, "loss": 0.0007, "lr": 8.87755943435578e-06, "epoch": 4.718655681540093, "percentage": 72.31, "elapsed_time": "6:37:22", "remaining_time": "2:32:09", "throughput": 2334.59, "total_tokens": 55662800} {"current_steps": 28930, "total_steps": 40000, "loss": 0.0025, "lr": 8.87005748463681e-06, "epoch": 4.71947140876091, "percentage": 72.32, "elapsed_time": "6:37:24", "remaining_time": "2:32:04", "throughput": 2334.8, "total_tokens": 55672544} {"current_steps": 28935, "total_steps": 40000, "loss": 0.0004, "lr": 8.862558022357681e-06, "epoch": 4.720287135981728, "percentage": 72.34, "elapsed_time": "6:37:26", "remaining_time": "2:31:59", "throughput": 2334.93, "total_tokens": 55680560} {"current_steps": 28940, "total_steps": 40000, "loss": 0.0008, "lr": 8.855061048674903e-06, "epoch": 4.721102863202545, "percentage": 72.35, "elapsed_time": "6:37:28", "remaining_time": "2:31:54", "throughput": 2335.12, "total_tokens": 55689984} {"current_steps": 28945, "total_steps": 40000, "loss": 0.1053, "lr": 8.847566564744595e-06, "epoch": 4.721918590423362, "percentage": 72.36, "elapsed_time": "6:37:30", "remaining_time": "2:31:49", "throughput": 2335.29, "total_tokens": 55698800} {"current_steps": 28950, "total_steps": 40000, "loss": 0.0237, "lr": 8.840074571722512e-06, "epoch": 4.72273431764418, "percentage": 72.38, "elapsed_time": "6:37:33", "remaining_time": "2:31:44", "throughput": 2335.49, "total_tokens": 55708512} {"current_steps": 28955, "total_steps": 40000, "loss": 0.0006, "lr": 8.832585070764002e-06, "epoch": 4.723550044864997, "percentage": 72.39, "elapsed_time": "6:37:35", "remaining_time": "2:31:39", "throughput": 2335.75, "total_tokens": 55719424} {"current_steps": 28960, "total_steps": 40000, "loss": 0.0007, "lr": 8.825098063024045e-06, "epoch": 4.7243657720858145, "percentage": 72.4, "elapsed_time": "6:37:37", "remaining_time": "2:31:34", "throughput": 2335.95, "total_tokens": 55729216} {"current_steps": 28965, "total_steps": 40000, "loss": 0.1673, "lr": 8.817613549657244e-06, "epoch": 4.725181499306632, "percentage": 72.41, "elapsed_time": "6:37:39", "remaining_time": "2:31:29", "throughput": 2336.15, "total_tokens": 55738752} {"current_steps": 28970, "total_steps": 40000, "loss": 0.0007, "lr": 8.810131531817783e-06, "epoch": 4.725997226527449, "percentage": 72.42, "elapsed_time": "6:37:41", "remaining_time": "2:31:24", "throughput": 2336.39, "total_tokens": 55749232} {"current_steps": 28975, "total_steps": 40000, "loss": 0.0006, "lr": 8.802652010659496e-06, "epoch": 4.726812953748267, "percentage": 72.44, "elapsed_time": "6:37:43", "remaining_time": "2:31:20", "throughput": 2336.59, "total_tokens": 55758816} {"current_steps": 28980, "total_steps": 40000, "loss": 0.0013, "lr": 8.795174987335827e-06, "epoch": 4.727628680969084, "percentage": 72.45, "elapsed_time": "6:37:45", "remaining_time": "2:31:15", "throughput": 2336.76, "total_tokens": 55767728} {"current_steps": 28985, "total_steps": 40000, "loss": 0.0024, "lr": 8.787700462999807e-06, "epoch": 4.728444408189901, "percentage": 72.46, "elapsed_time": "6:37:47", "remaining_time": "2:31:10", "throughput": 2336.98, "total_tokens": 55778000} {"current_steps": 28990, "total_steps": 40000, "loss": 0.0016, "lr": 8.780228438804122e-06, "epoch": 4.729260135410719, "percentage": 72.47, "elapsed_time": "6:37:49", "remaining_time": "2:31:05", "throughput": 2337.19, "total_tokens": 55787664} {"current_steps": 28995, "total_steps": 40000, "loss": 0.0905, "lr": 8.772758915901032e-06, "epoch": 4.730075862631536, "percentage": 72.49, "elapsed_time": "6:37:51", "remaining_time": "2:31:00", "throughput": 2337.39, "total_tokens": 55797392} {"current_steps": 29000, "total_steps": 40000, "loss": 0.0021, "lr": 8.765291895442443e-06, "epoch": 4.730891589852353, "percentage": 72.5, "elapsed_time": "6:37:53", "remaining_time": "2:30:55", "throughput": 2337.62, "total_tokens": 55807776} {"current_steps": 29000, "total_steps": 40000, "eval_loss": 0.31047356128692627, "epoch": 4.730891589852353, "percentage": 72.5, "elapsed_time": "6:39:14", "remaining_time": "2:31:26", "throughput": 2329.74, "total_tokens": 55807776} {"current_steps": 29005, "total_steps": 40000, "loss": 0.0004, "lr": 8.75782737857987e-06, "epoch": 4.7317073170731705, "percentage": 72.51, "elapsed_time": "6:39:18", "remaining_time": "2:31:21", "throughput": 2329.81, "total_tokens": 55818000} {"current_steps": 29010, "total_steps": 40000, "loss": 0.0006, "lr": 8.750365366464425e-06, "epoch": 4.732523044293988, "percentage": 72.52, "elapsed_time": "6:39:20", "remaining_time": "2:31:16", "throughput": 2330.01, "total_tokens": 55827584} {"current_steps": 29015, "total_steps": 40000, "loss": 0.0018, "lr": 8.742905860246838e-06, "epoch": 4.733338771514806, "percentage": 72.54, "elapsed_time": "6:39:22", "remaining_time": "2:31:12", "throughput": 2330.15, "total_tokens": 55835744} {"current_steps": 29020, "total_steps": 40000, "loss": 0.0009, "lr": 8.735448861077478e-06, "epoch": 4.734154498735623, "percentage": 72.55, "elapsed_time": "6:39:24", "remaining_time": "2:31:07", "throughput": 2330.35, "total_tokens": 55845472} {"current_steps": 29025, "total_steps": 40000, "loss": 0.0168, "lr": 8.727994370106288e-06, "epoch": 4.73497022595644, "percentage": 72.56, "elapsed_time": "6:39:26", "remaining_time": "2:31:02", "throughput": 2330.54, "total_tokens": 55854848} {"current_steps": 29030, "total_steps": 40000, "loss": 0.004, "lr": 8.720542388482861e-06, "epoch": 4.735785953177258, "percentage": 72.58, "elapsed_time": "6:39:28", "remaining_time": "2:30:57", "throughput": 2330.72, "total_tokens": 55863984} {"current_steps": 29035, "total_steps": 40000, "loss": 0.0107, "lr": 8.71309291735637e-06, "epoch": 4.736601680398075, "percentage": 72.59, "elapsed_time": "6:39:30", "remaining_time": "2:30:52", "throughput": 2330.85, "total_tokens": 55872624} {"current_steps": 29040, "total_steps": 40000, "loss": 0.1092, "lr": 8.705645957875621e-06, "epoch": 4.737417407618892, "percentage": 72.6, "elapsed_time": "6:39:32", "remaining_time": "2:30:47", "throughput": 2331.07, "total_tokens": 55882576} {"current_steps": 29045, "total_steps": 40000, "loss": 0.0009, "lr": 8.698201511189048e-06, "epoch": 4.738233134839709, "percentage": 72.61, "elapsed_time": "6:39:35", "remaining_time": "2:30:42", "throughput": 2331.28, "total_tokens": 55892496} {"current_steps": 29050, "total_steps": 40000, "loss": 0.0005, "lr": 8.690759578444649e-06, "epoch": 4.739048862060527, "percentage": 72.62, "elapsed_time": "6:39:37", "remaining_time": "2:30:37", "throughput": 2331.42, "total_tokens": 55900656} {"current_steps": 29055, "total_steps": 40000, "loss": 0.0002, "lr": 8.68332016079008e-06, "epoch": 4.7398645892813445, "percentage": 72.64, "elapsed_time": "6:39:39", "remaining_time": "2:30:32", "throughput": 2331.6, "total_tokens": 55909824} {"current_steps": 29060, "total_steps": 40000, "loss": 0.0631, "lr": 8.6758832593726e-06, "epoch": 4.740680316502162, "percentage": 72.65, "elapsed_time": "6:39:41", "remaining_time": "2:30:28", "throughput": 2331.84, "total_tokens": 55920576} {"current_steps": 29065, "total_steps": 40000, "loss": 0.0303, "lr": 8.668448875339053e-06, "epoch": 4.741496043722979, "percentage": 72.66, "elapsed_time": "6:39:43", "remaining_time": "2:30:23", "throughput": 2332.01, "total_tokens": 55929408} {"current_steps": 29070, "total_steps": 40000, "loss": 0.0008, "lr": 8.661017009835933e-06, "epoch": 4.742311770943797, "percentage": 72.67, "elapsed_time": "6:39:45", "remaining_time": "2:30:18", "throughput": 2332.21, "total_tokens": 55938928} {"current_steps": 29075, "total_steps": 40000, "loss": 0.0338, "lr": 8.653587664009311e-06, "epoch": 4.743127498164614, "percentage": 72.69, "elapsed_time": "6:39:47", "remaining_time": "2:30:13", "throughput": 2332.35, "total_tokens": 55947312} {"current_steps": 29080, "total_steps": 40000, "loss": 0.0008, "lr": 8.646160839004902e-06, "epoch": 4.743943225385431, "percentage": 72.7, "elapsed_time": "6:39:49", "remaining_time": "2:30:08", "throughput": 2332.55, "total_tokens": 55956912} {"current_steps": 29085, "total_steps": 40000, "loss": 0.0836, "lr": 8.638736535967998e-06, "epoch": 4.744758952606248, "percentage": 72.71, "elapsed_time": "6:39:51", "remaining_time": "2:30:03", "throughput": 2332.77, "total_tokens": 55966912} {"current_steps": 29090, "total_steps": 40000, "loss": 0.0005, "lr": 8.631314756043535e-06, "epoch": 4.745574679827066, "percentage": 72.72, "elapsed_time": "6:39:53", "remaining_time": "2:29:58", "throughput": 2333.0, "total_tokens": 55977264} {"current_steps": 29095, "total_steps": 40000, "loss": 0.0002, "lr": 8.62389550037603e-06, "epoch": 4.746390407047883, "percentage": 72.74, "elapsed_time": "6:39:55", "remaining_time": "2:29:53", "throughput": 2333.16, "total_tokens": 55986000} {"current_steps": 29100, "total_steps": 40000, "loss": 0.0002, "lr": 8.616478770109646e-06, "epoch": 4.7472061342687, "percentage": 72.75, "elapsed_time": "6:39:57", "remaining_time": "2:29:48", "throughput": 2333.34, "total_tokens": 55995184} {"current_steps": 29105, "total_steps": 40000, "loss": 0.0004, "lr": 8.609064566388111e-06, "epoch": 4.7480218614895175, "percentage": 72.76, "elapsed_time": "6:39:59", "remaining_time": "2:29:43", "throughput": 2333.58, "total_tokens": 56005712} {"current_steps": 29110, "total_steps": 40000, "loss": 0.1046, "lr": 8.601652890354815e-06, "epoch": 4.748837588710336, "percentage": 72.78, "elapsed_time": "6:40:01", "remaining_time": "2:29:39", "throughput": 2333.76, "total_tokens": 56014992} {"current_steps": 29115, "total_steps": 40000, "loss": 0.0006, "lr": 8.594243743152705e-06, "epoch": 4.749653315931153, "percentage": 72.79, "elapsed_time": "6:40:04", "remaining_time": "2:29:34", "throughput": 2333.95, "total_tokens": 56024288} {"current_steps": 29120, "total_steps": 40000, "loss": 0.0001, "lr": 8.58683712592438e-06, "epoch": 4.75046904315197, "percentage": 72.8, "elapsed_time": "6:40:06", "remaining_time": "2:29:29", "throughput": 2334.19, "total_tokens": 56034880} {"current_steps": 29125, "total_steps": 40000, "loss": 0.0411, "lr": 8.579433039812037e-06, "epoch": 4.751284770372787, "percentage": 72.81, "elapsed_time": "6:40:08", "remaining_time": "2:29:24", "throughput": 2334.4, "total_tokens": 56044896} {"current_steps": 29130, "total_steps": 40000, "loss": 0.0001, "lr": 8.572031485957466e-06, "epoch": 4.752100497593605, "percentage": 72.82, "elapsed_time": "6:40:10", "remaining_time": "2:29:19", "throughput": 2334.57, "total_tokens": 56053792} {"current_steps": 29135, "total_steps": 40000, "loss": 0.2008, "lr": 8.564632465502084e-06, "epoch": 4.752916224814422, "percentage": 72.84, "elapsed_time": "6:40:12", "remaining_time": "2:29:14", "throughput": 2334.77, "total_tokens": 56063344} {"current_steps": 29140, "total_steps": 40000, "loss": 0.0013, "lr": 8.557235979586928e-06, "epoch": 4.753731952035239, "percentage": 72.85, "elapsed_time": "6:40:14", "remaining_time": "2:29:09", "throughput": 2334.97, "total_tokens": 56073008} {"current_steps": 29145, "total_steps": 40000, "loss": 0.0002, "lr": 8.549842029352606e-06, "epoch": 4.754547679256056, "percentage": 72.86, "elapsed_time": "6:40:16", "remaining_time": "2:29:04", "throughput": 2335.22, "total_tokens": 56083952} {"current_steps": 29150, "total_steps": 40000, "loss": 0.032, "lr": 8.542450615939376e-06, "epoch": 4.755363406476874, "percentage": 72.88, "elapsed_time": "6:40:18", "remaining_time": "2:29:00", "throughput": 2335.48, "total_tokens": 56094928} {"current_steps": 29155, "total_steps": 40000, "loss": 0.0783, "lr": 8.535061740487082e-06, "epoch": 4.7561791336976915, "percentage": 72.89, "elapsed_time": "6:40:20", "remaining_time": "2:28:55", "throughput": 2335.67, "total_tokens": 56104352} {"current_steps": 29160, "total_steps": 40000, "loss": 0.0008, "lr": 8.527675404135168e-06, "epoch": 4.756994860918509, "percentage": 72.9, "elapsed_time": "6:40:22", "remaining_time": "2:28:50", "throughput": 2335.86, "total_tokens": 56113712} {"current_steps": 29165, "total_steps": 40000, "loss": 0.0001, "lr": 8.520291608022724e-06, "epoch": 4.757810588139327, "percentage": 72.91, "elapsed_time": "6:40:24", "remaining_time": "2:28:45", "throughput": 2335.99, "total_tokens": 56121776} {"current_steps": 29170, "total_steps": 40000, "loss": 0.055, "lr": 8.512910353288398e-06, "epoch": 4.758626315360144, "percentage": 72.92, "elapsed_time": "6:40:26", "remaining_time": "2:28:40", "throughput": 2336.21, "total_tokens": 56131744} {"current_steps": 29175, "total_steps": 40000, "loss": 0.0207, "lr": 8.505531641070486e-06, "epoch": 4.759442042580961, "percentage": 72.94, "elapsed_time": "6:40:28", "remaining_time": "2:28:35", "throughput": 2336.44, "total_tokens": 56142160} {"current_steps": 29180, "total_steps": 40000, "loss": 0.0013, "lr": 8.498155472506885e-06, "epoch": 4.760257769801778, "percentage": 72.95, "elapsed_time": "6:40:31", "remaining_time": "2:28:30", "throughput": 2336.62, "total_tokens": 56151488} {"current_steps": 29185, "total_steps": 40000, "loss": 0.0135, "lr": 8.49078184873508e-06, "epoch": 4.761073497022595, "percentage": 72.96, "elapsed_time": "6:40:33", "remaining_time": "2:28:25", "throughput": 2336.84, "total_tokens": 56161376} {"current_steps": 29190, "total_steps": 40000, "loss": 0.1196, "lr": 8.483410770892188e-06, "epoch": 4.761889224243413, "percentage": 72.97, "elapsed_time": "6:40:35", "remaining_time": "2:28:20", "throughput": 2337.0, "total_tokens": 56170288} {"current_steps": 29195, "total_steps": 40000, "loss": 0.091, "lr": 8.476042240114909e-06, "epoch": 4.76270495146423, "percentage": 72.99, "elapsed_time": "6:40:37", "remaining_time": "2:28:16", "throughput": 2337.21, "total_tokens": 56180048} {"current_steps": 29200, "total_steps": 40000, "loss": 0.0026, "lr": 8.468676257539568e-06, "epoch": 4.7635206786850475, "percentage": 73.0, "elapsed_time": "6:40:39", "remaining_time": "2:28:11", "throughput": 2337.38, "total_tokens": 56188960} {"current_steps": 29200, "total_steps": 40000, "eval_loss": 0.3253553509712219, "epoch": 4.7635206786850475, "percentage": 73.0, "elapsed_time": "6:42:00", "remaining_time": "2:28:41", "throughput": 2329.55, "total_tokens": 56188960} {"current_steps": 29205, "total_steps": 40000, "loss": 0.0007, "lr": 8.4613128243021e-06, "epoch": 4.7643364059058655, "percentage": 73.01, "elapsed_time": "6:42:03", "remaining_time": "2:28:36", "throughput": 2329.6, "total_tokens": 56198912} {"current_steps": 29210, "total_steps": 40000, "loss": 0.126, "lr": 8.453951941538028e-06, "epoch": 4.765152133126683, "percentage": 73.02, "elapsed_time": "6:42:05", "remaining_time": "2:28:31", "throughput": 2329.77, "total_tokens": 56207792} {"current_steps": 29215, "total_steps": 40000, "loss": 0.0638, "lr": 8.446593610382495e-06, "epoch": 4.7659678603475, "percentage": 73.04, "elapsed_time": "6:42:07", "remaining_time": "2:28:27", "throughput": 2329.95, "total_tokens": 56216784} {"current_steps": 29220, "total_steps": 40000, "loss": 0.0007, "lr": 8.439237831970259e-06, "epoch": 4.766783587568317, "percentage": 73.05, "elapsed_time": "6:42:10", "remaining_time": "2:28:22", "throughput": 2330.15, "total_tokens": 56226576} {"current_steps": 29225, "total_steps": 40000, "loss": 0.1036, "lr": 8.431884607435667e-06, "epoch": 4.767599314789135, "percentage": 73.06, "elapsed_time": "6:42:12", "remaining_time": "2:28:17", "throughput": 2330.34, "total_tokens": 56236032} {"current_steps": 29230, "total_steps": 40000, "loss": 0.0006, "lr": 8.424533937912665e-06, "epoch": 4.768415042009952, "percentage": 73.08, "elapsed_time": "6:42:14", "remaining_time": "2:28:12", "throughput": 2330.52, "total_tokens": 56245152} {"current_steps": 29235, "total_steps": 40000, "loss": 0.1592, "lr": 8.41718582453484e-06, "epoch": 4.769230769230769, "percentage": 73.09, "elapsed_time": "6:42:16", "remaining_time": "2:28:07", "throughput": 2330.77, "total_tokens": 56255920} {"current_steps": 29240, "total_steps": 40000, "loss": 0.0007, "lr": 8.409840268435346e-06, "epoch": 4.770046496451586, "percentage": 73.1, "elapsed_time": "6:42:18", "remaining_time": "2:28:02", "throughput": 2330.94, "total_tokens": 56264864} {"current_steps": 29245, "total_steps": 40000, "loss": 0.1003, "lr": 8.402497270746976e-06, "epoch": 4.770862223672404, "percentage": 73.11, "elapsed_time": "6:42:20", "remaining_time": "2:27:57", "throughput": 2331.14, "total_tokens": 56274496} {"current_steps": 29250, "total_steps": 40000, "loss": 0.0005, "lr": 8.395156832602095e-06, "epoch": 4.771677950893221, "percentage": 73.12, "elapsed_time": "6:42:22", "remaining_time": "2:27:52", "throughput": 2331.3, "total_tokens": 56283328} {"current_steps": 29255, "total_steps": 40000, "loss": 0.0003, "lr": 8.387818955132707e-06, "epoch": 4.772493678114039, "percentage": 73.14, "elapsed_time": "6:42:24", "remaining_time": "2:27:47", "throughput": 2331.5, "total_tokens": 56292992} {"current_steps": 29260, "total_steps": 40000, "loss": 0.0007, "lr": 8.38048363947039e-06, "epoch": 4.773309405334856, "percentage": 73.15, "elapsed_time": "6:42:26", "remaining_time": "2:27:43", "throughput": 2331.66, "total_tokens": 56301648} {"current_steps": 29265, "total_steps": 40000, "loss": 0.0003, "lr": 8.373150886746351e-06, "epoch": 4.774125132555674, "percentage": 73.16, "elapsed_time": "6:42:28", "remaining_time": "2:27:38", "throughput": 2331.83, "total_tokens": 56310560} {"current_steps": 29270, "total_steps": 40000, "loss": 0.0027, "lr": 8.365820698091397e-06, "epoch": 4.774940859776491, "percentage": 73.17, "elapsed_time": "6:42:30", "remaining_time": "2:27:33", "throughput": 2332.08, "total_tokens": 56321632} {"current_steps": 29275, "total_steps": 40000, "loss": 0.0005, "lr": 8.358493074635922e-06, "epoch": 4.775756586997308, "percentage": 73.19, "elapsed_time": "6:42:32", "remaining_time": "2:27:28", "throughput": 2332.3, "total_tokens": 56331568} {"current_steps": 29280, "total_steps": 40000, "loss": 0.0195, "lr": 8.351168017509948e-06, "epoch": 4.776572314218125, "percentage": 73.2, "elapsed_time": "6:42:34", "remaining_time": "2:27:23", "throughput": 2332.53, "total_tokens": 56342016} {"current_steps": 29285, "total_steps": 40000, "loss": 0.0007, "lr": 8.343845527843094e-06, "epoch": 4.777388041438943, "percentage": 73.21, "elapsed_time": "6:42:36", "remaining_time": "2:27:18", "throughput": 2332.73, "total_tokens": 56351584} {"current_steps": 29290, "total_steps": 40000, "loss": 0.0837, "lr": 8.336525606764566e-06, "epoch": 4.77820376865976, "percentage": 73.22, "elapsed_time": "6:42:39", "remaining_time": "2:27:13", "throughput": 2332.89, "total_tokens": 56360480} {"current_steps": 29295, "total_steps": 40000, "loss": 0.0008, "lr": 8.329208255403204e-06, "epoch": 4.779019495880577, "percentage": 73.24, "elapsed_time": "6:42:41", "remaining_time": "2:27:08", "throughput": 2333.09, "total_tokens": 56370160} {"current_steps": 29300, "total_steps": 40000, "loss": 0.0018, "lr": 8.321893474887426e-06, "epoch": 4.7798352231013945, "percentage": 73.25, "elapsed_time": "6:42:43", "remaining_time": "2:27:04", "throughput": 2333.28, "total_tokens": 56379488} {"current_steps": 29305, "total_steps": 40000, "loss": 0.0712, "lr": 8.31458126634526e-06, "epoch": 4.7806509503222125, "percentage": 73.26, "elapsed_time": "6:42:45", "remaining_time": "2:26:59", "throughput": 2333.46, "total_tokens": 56388800} {"current_steps": 29310, "total_steps": 40000, "loss": 0.0513, "lr": 8.30727163090435e-06, "epoch": 4.78146667754303, "percentage": 73.28, "elapsed_time": "6:42:47", "remaining_time": "2:26:54", "throughput": 2333.68, "total_tokens": 56398816} {"current_steps": 29315, "total_steps": 40000, "loss": 0.0481, "lr": 8.29996456969192e-06, "epoch": 4.782282404763847, "percentage": 73.29, "elapsed_time": "6:42:49", "remaining_time": "2:26:49", "throughput": 2333.9, "total_tokens": 56409152} {"current_steps": 29320, "total_steps": 40000, "loss": 0.0513, "lr": 8.292660083834818e-06, "epoch": 4.783098131984664, "percentage": 73.3, "elapsed_time": "6:42:51", "remaining_time": "2:26:44", "throughput": 2334.06, "total_tokens": 56417632} {"current_steps": 29325, "total_steps": 40000, "loss": 0.0596, "lr": 8.2853581744595e-06, "epoch": 4.783913859205482, "percentage": 73.31, "elapsed_time": "6:42:53", "remaining_time": "2:26:39", "throughput": 2334.21, "total_tokens": 56426144} {"current_steps": 29330, "total_steps": 40000, "loss": 0.0792, "lr": 8.278058842691991e-06, "epoch": 4.784729586426299, "percentage": 73.32, "elapsed_time": "6:42:55", "remaining_time": "2:26:34", "throughput": 2334.44, "total_tokens": 56436736} {"current_steps": 29335, "total_steps": 40000, "loss": 0.0254, "lr": 8.27076208965796e-06, "epoch": 4.785545313647116, "percentage": 73.34, "elapsed_time": "6:42:57", "remaining_time": "2:26:30", "throughput": 2334.64, "total_tokens": 56446208} {"current_steps": 29340, "total_steps": 40000, "loss": 0.1842, "lr": 8.263467916482637e-06, "epoch": 4.786361040867934, "percentage": 73.35, "elapsed_time": "6:42:59", "remaining_time": "2:26:25", "throughput": 2334.88, "total_tokens": 56457056} {"current_steps": 29345, "total_steps": 40000, "loss": 0.0736, "lr": 8.256176324290885e-06, "epoch": 4.787176768088751, "percentage": 73.36, "elapsed_time": "6:43:01", "remaining_time": "2:26:20", "throughput": 2335.14, "total_tokens": 56468032} {"current_steps": 29350, "total_steps": 40000, "loss": 0.1676, "lr": 8.248887314207168e-06, "epoch": 4.7879924953095685, "percentage": 73.38, "elapsed_time": "6:43:03", "remaining_time": "2:26:15", "throughput": 2335.38, "total_tokens": 56478768} {"current_steps": 29355, "total_steps": 40000, "loss": 0.0901, "lr": 8.24160088735553e-06, "epoch": 4.788808222530386, "percentage": 73.39, "elapsed_time": "6:43:06", "remaining_time": "2:26:10", "throughput": 2335.59, "total_tokens": 56488624} {"current_steps": 29360, "total_steps": 40000, "loss": 0.0437, "lr": 8.234317044859629e-06, "epoch": 4.789623949751203, "percentage": 73.4, "elapsed_time": "6:43:08", "remaining_time": "2:26:05", "throughput": 2335.81, "total_tokens": 56498832} {"current_steps": 29365, "total_steps": 40000, "loss": 0.0158, "lr": 8.227035787842744e-06, "epoch": 4.790439676972021, "percentage": 73.41, "elapsed_time": "6:43:10", "remaining_time": "2:26:00", "throughput": 2335.99, "total_tokens": 56507920} {"current_steps": 29370, "total_steps": 40000, "loss": 0.0609, "lr": 8.219757117427721e-06, "epoch": 4.791255404192838, "percentage": 73.42, "elapsed_time": "6:43:12", "remaining_time": "2:25:55", "throughput": 2336.22, "total_tokens": 56518464} {"current_steps": 29375, "total_steps": 40000, "loss": 0.0014, "lr": 8.212481034737014e-06, "epoch": 4.792071131413655, "percentage": 73.44, "elapsed_time": "6:43:14", "remaining_time": "2:25:51", "throughput": 2336.39, "total_tokens": 56527456} {"current_steps": 29380, "total_steps": 40000, "loss": 0.0356, "lr": 8.205207540892707e-06, "epoch": 4.792886858634473, "percentage": 73.45, "elapsed_time": "6:43:16", "remaining_time": "2:25:46", "throughput": 2336.6, "total_tokens": 56537280} {"current_steps": 29385, "total_steps": 40000, "loss": 0.004, "lr": 8.197936637016442e-06, "epoch": 4.79370258585529, "percentage": 73.46, "elapsed_time": "6:43:18", "remaining_time": "2:25:41", "throughput": 2336.81, "total_tokens": 56547168} {"current_steps": 29390, "total_steps": 40000, "loss": 0.0028, "lr": 8.190668324229508e-06, "epoch": 4.794518313076107, "percentage": 73.47, "elapsed_time": "6:43:20", "remaining_time": "2:25:36", "throughput": 2337.02, "total_tokens": 56557264} {"current_steps": 29395, "total_steps": 40000, "loss": 0.0003, "lr": 8.183402603652749e-06, "epoch": 4.7953340402969244, "percentage": 73.49, "elapsed_time": "6:43:22", "remaining_time": "2:25:31", "throughput": 2337.3, "total_tokens": 56568656} {"current_steps": 29400, "total_steps": 40000, "loss": 0.0, "lr": 8.176139476406635e-06, "epoch": 4.796149767517742, "percentage": 73.5, "elapsed_time": "6:43:24", "remaining_time": "2:25:26", "throughput": 2337.43, "total_tokens": 56576864} {"current_steps": 29400, "total_steps": 40000, "eval_loss": 0.3105483651161194, "epoch": 4.796149767517742, "percentage": 73.5, "elapsed_time": "6:44:45", "remaining_time": "2:25:56", "throughput": 2329.65, "total_tokens": 56576864} {"current_steps": 29405, "total_steps": 40000, "loss": 0.0002, "lr": 8.16887894361125e-06, "epoch": 4.79696549473856, "percentage": 73.51, "elapsed_time": "6:44:50", "remaining_time": "2:25:52", "throughput": 2329.58, "total_tokens": 56585440} {"current_steps": 29410, "total_steps": 40000, "loss": 0.0056, "lr": 8.161621006386233e-06, "epoch": 4.797781221959377, "percentage": 73.52, "elapsed_time": "6:44:52", "remaining_time": "2:25:47", "throughput": 2329.77, "total_tokens": 56594992} {"current_steps": 29415, "total_steps": 40000, "loss": 0.1377, "lr": 8.154365665850869e-06, "epoch": 4.798596949180194, "percentage": 73.54, "elapsed_time": "6:44:54", "remaining_time": "2:25:42", "throughput": 2329.91, "total_tokens": 56603232} {"current_steps": 29420, "total_steps": 40000, "loss": 0.0779, "lr": 8.147112923124005e-06, "epoch": 4.799412676401012, "percentage": 73.55, "elapsed_time": "6:44:56", "remaining_time": "2:25:37", "throughput": 2330.12, "total_tokens": 56613200} {"current_steps": 29425, "total_steps": 40000, "loss": 0.0005, "lr": 8.13986277932412e-06, "epoch": 4.800228403621829, "percentage": 73.56, "elapsed_time": "6:44:58", "remaining_time": "2:25:32", "throughput": 2330.34, "total_tokens": 56623504} {"current_steps": 29430, "total_steps": 40000, "loss": 0.0002, "lr": 8.132615235569277e-06, "epoch": 4.801044130842646, "percentage": 73.58, "elapsed_time": "6:45:00", "remaining_time": "2:25:27", "throughput": 2330.51, "total_tokens": 56632480} {"current_steps": 29435, "total_steps": 40000, "loss": 0.1125, "lr": 8.125370292977124e-06, "epoch": 4.801859858063463, "percentage": 73.59, "elapsed_time": "6:45:02", "remaining_time": "2:25:22", "throughput": 2330.66, "total_tokens": 56640848} {"current_steps": 29440, "total_steps": 40000, "loss": 0.0648, "lr": 8.118127952664944e-06, "epoch": 4.802675585284281, "percentage": 73.6, "elapsed_time": "6:45:04", "remaining_time": "2:25:17", "throughput": 2330.86, "total_tokens": 56650496} {"current_steps": 29445, "total_steps": 40000, "loss": 0.0004, "lr": 8.110888215749574e-06, "epoch": 4.803491312505098, "percentage": 73.61, "elapsed_time": "6:45:06", "remaining_time": "2:25:13", "throughput": 2331.03, "total_tokens": 56659424} {"current_steps": 29450, "total_steps": 40000, "loss": 0.0003, "lr": 8.10365108334749e-06, "epoch": 4.8043070397259156, "percentage": 73.62, "elapsed_time": "6:45:08", "remaining_time": "2:25:08", "throughput": 2331.25, "total_tokens": 56669632} {"current_steps": 29455, "total_steps": 40000, "loss": 0.0026, "lr": 8.096416556574743e-06, "epoch": 4.805122766946733, "percentage": 73.64, "elapsed_time": "6:45:10", "remaining_time": "2:25:03", "throughput": 2331.47, "total_tokens": 56679872} {"current_steps": 29460, "total_steps": 40000, "loss": 0.0778, "lr": 8.08918463654698e-06, "epoch": 4.805938494167551, "percentage": 73.65, "elapsed_time": "6:45:12", "remaining_time": "2:24:58", "throughput": 2331.64, "total_tokens": 56688912} {"current_steps": 29465, "total_steps": 40000, "loss": 0.0016, "lr": 8.081955324379458e-06, "epoch": 4.806754221388368, "percentage": 73.66, "elapsed_time": "6:45:14", "remaining_time": "2:24:53", "throughput": 2331.87, "total_tokens": 56699184} {"current_steps": 29470, "total_steps": 40000, "loss": 0.0001, "lr": 8.074728621187039e-06, "epoch": 4.807569948609185, "percentage": 73.67, "elapsed_time": "6:45:17", "remaining_time": "2:24:48", "throughput": 2332.12, "total_tokens": 56710160} {"current_steps": 29475, "total_steps": 40000, "loss": 0.0803, "lr": 8.067504528084158e-06, "epoch": 4.808385675830002, "percentage": 73.69, "elapsed_time": "6:45:19", "remaining_time": "2:24:43", "throughput": 2332.32, "total_tokens": 56719888} {"current_steps": 29480, "total_steps": 40000, "loss": 0.085, "lr": 8.060283046184861e-06, "epoch": 4.80920140305082, "percentage": 73.7, "elapsed_time": "6:45:21", "remaining_time": "2:24:39", "throughput": 2332.52, "total_tokens": 56729488} {"current_steps": 29485, "total_steps": 40000, "loss": 0.0006, "lr": 8.053064176602806e-06, "epoch": 4.810017130271637, "percentage": 73.71, "elapsed_time": "6:45:23", "remaining_time": "2:24:34", "throughput": 2332.74, "total_tokens": 56739840} {"current_steps": 29490, "total_steps": 40000, "loss": 0.0004, "lr": 8.045847920451216e-06, "epoch": 4.810832857492454, "percentage": 73.72, "elapsed_time": "6:45:25", "remaining_time": "2:24:29", "throughput": 2332.91, "total_tokens": 56748816} {"current_steps": 29495, "total_steps": 40000, "loss": 0.0017, "lr": 8.038634278842944e-06, "epoch": 4.8116485847132715, "percentage": 73.74, "elapsed_time": "6:45:27", "remaining_time": "2:24:24", "throughput": 2333.12, "total_tokens": 56758736} {"current_steps": 29500, "total_steps": 40000, "loss": 0.0005, "lr": 8.031423252890408e-06, "epoch": 4.8124643119340895, "percentage": 73.75, "elapsed_time": "6:45:29", "remaining_time": "2:24:19", "throughput": 2333.33, "total_tokens": 56768720} {"current_steps": 29505, "total_steps": 40000, "loss": 0.0005, "lr": 8.024214843705646e-06, "epoch": 4.813280039154907, "percentage": 73.76, "elapsed_time": "6:45:31", "remaining_time": "2:24:14", "throughput": 2333.52, "total_tokens": 56778208} {"current_steps": 29510, "total_steps": 40000, "loss": 0.0935, "lr": 8.017009052400295e-06, "epoch": 4.814095766375724, "percentage": 73.78, "elapsed_time": "6:45:33", "remaining_time": "2:24:09", "throughput": 2333.72, "total_tokens": 56787744} {"current_steps": 29515, "total_steps": 40000, "loss": 0.1589, "lr": 8.00980588008557e-06, "epoch": 4.814911493596542, "percentage": 73.79, "elapsed_time": "6:45:35", "remaining_time": "2:24:05", "throughput": 2333.94, "total_tokens": 56798048} {"current_steps": 29520, "total_steps": 40000, "loss": 0.0322, "lr": 8.002605327872282e-06, "epoch": 4.815727220817359, "percentage": 73.8, "elapsed_time": "6:45:37", "remaining_time": "2:24:00", "throughput": 2334.11, "total_tokens": 56806848} {"current_steps": 29525, "total_steps": 40000, "loss": 0.0018, "lr": 7.995407396870862e-06, "epoch": 4.816542948038176, "percentage": 73.81, "elapsed_time": "6:45:39", "remaining_time": "2:23:55", "throughput": 2334.3, "total_tokens": 56816416} {"current_steps": 29530, "total_steps": 40000, "loss": 0.0041, "lr": 7.988212088191307e-06, "epoch": 4.817358675258993, "percentage": 73.83, "elapsed_time": "6:45:41", "remaining_time": "2:23:50", "throughput": 2334.48, "total_tokens": 56825744} {"current_steps": 29535, "total_steps": 40000, "loss": 0.1069, "lr": 7.98101940294324e-06, "epoch": 4.81817440247981, "percentage": 73.84, "elapsed_time": "6:45:43", "remaining_time": "2:23:45", "throughput": 2334.72, "total_tokens": 56836384} {"current_steps": 29540, "total_steps": 40000, "loss": 0.0003, "lr": 7.973829342235847e-06, "epoch": 4.818990129700628, "percentage": 73.85, "elapsed_time": "6:45:46", "remaining_time": "2:23:40", "throughput": 2334.92, "total_tokens": 56846096} {"current_steps": 29545, "total_steps": 40000, "loss": 0.0006, "lr": 7.966641907177936e-06, "epoch": 4.8198058569214455, "percentage": 73.86, "elapsed_time": "6:45:48", "remaining_time": "2:23:35", "throughput": 2335.1, "total_tokens": 56855312} {"current_steps": 29550, "total_steps": 40000, "loss": 0.0002, "lr": 7.959457098877901e-06, "epoch": 4.820621584142263, "percentage": 73.88, "elapsed_time": "6:45:50", "remaining_time": "2:23:31", "throughput": 2335.28, "total_tokens": 56864416} {"current_steps": 29555, "total_steps": 40000, "loss": 0.0062, "lr": 7.952274918443719e-06, "epoch": 4.821437311363081, "percentage": 73.89, "elapsed_time": "6:45:52", "remaining_time": "2:23:26", "throughput": 2335.47, "total_tokens": 56873888} {"current_steps": 29560, "total_steps": 40000, "loss": 0.0002, "lr": 7.945095366982983e-06, "epoch": 4.822253038583898, "percentage": 73.9, "elapsed_time": "6:45:54", "remaining_time": "2:23:21", "throughput": 2335.67, "total_tokens": 56883696} {"current_steps": 29565, "total_steps": 40000, "loss": 0.1003, "lr": 7.937918445602871e-06, "epoch": 4.823068765804715, "percentage": 73.91, "elapsed_time": "6:45:56", "remaining_time": "2:23:16", "throughput": 2335.86, "total_tokens": 56893120} {"current_steps": 29570, "total_steps": 40000, "loss": 0.1932, "lr": 7.930744155410145e-06, "epoch": 4.823884493025532, "percentage": 73.92, "elapsed_time": "6:45:58", "remaining_time": "2:23:11", "throughput": 2336.02, "total_tokens": 56901952} {"current_steps": 29575, "total_steps": 40000, "loss": 0.0008, "lr": 7.923572497511181e-06, "epoch": 4.824700220246349, "percentage": 73.94, "elapsed_time": "6:46:00", "remaining_time": "2:23:06", "throughput": 2336.22, "total_tokens": 56911632} {"current_steps": 29580, "total_steps": 40000, "loss": 0.0191, "lr": 7.916403473011927e-06, "epoch": 4.825515947467167, "percentage": 73.95, "elapsed_time": "6:46:02", "remaining_time": "2:23:02", "throughput": 2336.41, "total_tokens": 56921072} {"current_steps": 29585, "total_steps": 40000, "loss": 0.0044, "lr": 7.909237083017953e-06, "epoch": 4.826331674687984, "percentage": 73.96, "elapsed_time": "6:46:04", "remaining_time": "2:22:57", "throughput": 2336.59, "total_tokens": 56930272} {"current_steps": 29590, "total_steps": 40000, "loss": 0.0683, "lr": 7.902073328634389e-06, "epoch": 4.827147401908801, "percentage": 73.98, "elapsed_time": "6:46:06", "remaining_time": "2:22:52", "throughput": 2336.81, "total_tokens": 56940416} {"current_steps": 29595, "total_steps": 40000, "loss": 0.0018, "lr": 7.894912210965987e-06, "epoch": 4.8279631291296194, "percentage": 73.99, "elapsed_time": "6:46:08", "remaining_time": "2:22:47", "throughput": 2336.99, "total_tokens": 56949584} {"current_steps": 29600, "total_steps": 40000, "loss": 0.0004, "lr": 7.887753731117075e-06, "epoch": 4.828778856350437, "percentage": 74.0, "elapsed_time": "6:46:10", "remaining_time": "2:22:42", "throughput": 2337.21, "total_tokens": 56959888} {"current_steps": 29600, "total_steps": 40000, "eval_loss": 0.29312625527381897, "epoch": 4.828778856350437, "percentage": 74.0, "elapsed_time": "6:47:31", "remaining_time": "2:23:11", "throughput": 2329.48, "total_tokens": 56959888} {"current_steps": 29605, "total_steps": 40000, "loss": 0.0001, "lr": 7.880597890191587e-06, "epoch": 4.829594583571254, "percentage": 74.01, "elapsed_time": "6:47:35", "remaining_time": "2:23:06", "throughput": 2329.46, "total_tokens": 56968656} {"current_steps": 29610, "total_steps": 40000, "loss": 0.0006, "lr": 7.873444689293036e-06, "epoch": 4.830410310792071, "percentage": 74.02, "elapsed_time": "6:47:37", "remaining_time": "2:23:02", "throughput": 2329.67, "total_tokens": 56978640} {"current_steps": 29615, "total_steps": 40000, "loss": 0.1001, "lr": 7.866294129524548e-06, "epoch": 4.831226038012889, "percentage": 74.04, "elapsed_time": "6:47:39", "remaining_time": "2:22:57", "throughput": 2329.85, "total_tokens": 56987712} {"current_steps": 29620, "total_steps": 40000, "loss": 0.0899, "lr": 7.859146211988811e-06, "epoch": 4.832041765233706, "percentage": 74.05, "elapsed_time": "6:47:41", "remaining_time": "2:22:52", "throughput": 2330.04, "total_tokens": 56997392} {"current_steps": 29625, "total_steps": 40000, "loss": 0.0011, "lr": 7.852000937788134e-06, "epoch": 4.832857492454523, "percentage": 74.06, "elapsed_time": "6:47:44", "remaining_time": "2:22:47", "throughput": 2330.28, "total_tokens": 57008048} {"current_steps": 29630, "total_steps": 40000, "loss": 0.0014, "lr": 7.844858308024416e-06, "epoch": 4.83367321967534, "percentage": 74.08, "elapsed_time": "6:47:46", "remaining_time": "2:22:42", "throughput": 2330.49, "total_tokens": 57018064} {"current_steps": 29635, "total_steps": 40000, "loss": 0.0011, "lr": 7.837718323799122e-06, "epoch": 4.834488946896158, "percentage": 74.09, "elapsed_time": "6:47:48", "remaining_time": "2:22:37", "throughput": 2330.67, "total_tokens": 57027248} {"current_steps": 29640, "total_steps": 40000, "loss": 0.0007, "lr": 7.83058098621334e-06, "epoch": 4.835304674116975, "percentage": 74.1, "elapsed_time": "6:47:50", "remaining_time": "2:22:33", "throughput": 2330.83, "total_tokens": 57036032} {"current_steps": 29645, "total_steps": 40000, "loss": 0.0012, "lr": 7.823446296367739e-06, "epoch": 4.8361204013377925, "percentage": 74.11, "elapsed_time": "6:47:52", "remaining_time": "2:22:28", "throughput": 2331.07, "total_tokens": 57046720} {"current_steps": 29650, "total_steps": 40000, "loss": 0.0472, "lr": 7.81631425536257e-06, "epoch": 4.83693612855861, "percentage": 74.12, "elapsed_time": "6:47:54", "remaining_time": "2:22:23", "throughput": 2331.3, "total_tokens": 57057200} {"current_steps": 29655, "total_steps": 40000, "loss": 0.0477, "lr": 7.809184864297689e-06, "epoch": 4.837751855779428, "percentage": 74.14, "elapsed_time": "6:47:56", "remaining_time": "2:22:18", "throughput": 2331.51, "total_tokens": 57067168} {"current_steps": 29660, "total_steps": 40000, "loss": 0.0272, "lr": 7.802058124272532e-06, "epoch": 4.838567583000245, "percentage": 74.15, "elapsed_time": "6:47:58", "remaining_time": "2:22:13", "throughput": 2331.66, "total_tokens": 57075584} {"current_steps": 29665, "total_steps": 40000, "loss": 0.0001, "lr": 7.79493403638614e-06, "epoch": 4.839383310221062, "percentage": 74.16, "elapsed_time": "6:48:00", "remaining_time": "2:22:08", "throughput": 2331.89, "total_tokens": 57086032} {"current_steps": 29670, "total_steps": 40000, "loss": 0.0028, "lr": 7.787812601737132e-06, "epoch": 4.840199037441879, "percentage": 74.17, "elapsed_time": "6:48:02", "remaining_time": "2:22:03", "throughput": 2332.09, "total_tokens": 57095824} {"current_steps": 29675, "total_steps": 40000, "loss": 0.0005, "lr": 7.780693821423715e-06, "epoch": 4.841014764662697, "percentage": 74.19, "elapsed_time": "6:48:04", "remaining_time": "2:21:59", "throughput": 2332.31, "total_tokens": 57105936} {"current_steps": 29680, "total_steps": 40000, "loss": 0.0288, "lr": 7.773577696543705e-06, "epoch": 4.841830491883514, "percentage": 74.2, "elapsed_time": "6:48:06", "remaining_time": "2:21:54", "throughput": 2332.57, "total_tokens": 57117280} {"current_steps": 29685, "total_steps": 40000, "loss": 0.0002, "lr": 7.7664642281945e-06, "epoch": 4.842646219104331, "percentage": 74.21, "elapsed_time": "6:48:08", "remaining_time": "2:21:49", "throughput": 2332.79, "total_tokens": 57127488} {"current_steps": 29690, "total_steps": 40000, "loss": 0.1044, "lr": 7.759353417473072e-06, "epoch": 4.8434619463251485, "percentage": 74.22, "elapsed_time": "6:48:10", "remaining_time": "2:21:44", "throughput": 2333.01, "total_tokens": 57137760} {"current_steps": 29695, "total_steps": 40000, "loss": 0.0012, "lr": 7.752245265476016e-06, "epoch": 4.8442776735459665, "percentage": 74.24, "elapsed_time": "6:48:13", "remaining_time": "2:21:39", "throughput": 2333.25, "total_tokens": 57148432} {"current_steps": 29700, "total_steps": 40000, "loss": 0.0003, "lr": 7.745139773299481e-06, "epoch": 4.845093400766784, "percentage": 74.25, "elapsed_time": "6:48:15", "remaining_time": "2:21:34", "throughput": 2333.46, "total_tokens": 57158352} {"current_steps": 29705, "total_steps": 40000, "loss": 0.0037, "lr": 7.738036942039232e-06, "epoch": 4.845909127987601, "percentage": 74.26, "elapsed_time": "6:48:17", "remaining_time": "2:21:30", "throughput": 2333.65, "total_tokens": 57167968} {"current_steps": 29710, "total_steps": 40000, "loss": 0.0749, "lr": 7.73093677279062e-06, "epoch": 4.846724855208418, "percentage": 74.28, "elapsed_time": "6:48:19", "remaining_time": "2:21:25", "throughput": 2333.85, "total_tokens": 57177536} {"current_steps": 29715, "total_steps": 40000, "loss": 0.001, "lr": 7.72383926664857e-06, "epoch": 4.847540582429236, "percentage": 74.29, "elapsed_time": "6:48:21", "remaining_time": "2:21:20", "throughput": 2334.05, "total_tokens": 57187344} {"current_steps": 29720, "total_steps": 40000, "loss": 0.0008, "lr": 7.716744424707606e-06, "epoch": 4.848356309650053, "percentage": 74.3, "elapsed_time": "6:48:23", "remaining_time": "2:21:15", "throughput": 2334.22, "total_tokens": 57196320} {"current_steps": 29725, "total_steps": 40000, "loss": 0.0011, "lr": 7.709652248061858e-06, "epoch": 4.84917203687087, "percentage": 74.31, "elapsed_time": "6:48:25", "remaining_time": "2:21:10", "throughput": 2334.4, "total_tokens": 57205472} {"current_steps": 29730, "total_steps": 40000, "loss": 0.0007, "lr": 7.702562737805017e-06, "epoch": 4.849987764091688, "percentage": 74.33, "elapsed_time": "6:48:27", "remaining_time": "2:21:05", "throughput": 2334.63, "total_tokens": 57215984} {"current_steps": 29735, "total_steps": 40000, "loss": 0.0357, "lr": 7.695475895030365e-06, "epoch": 4.850803491312505, "percentage": 74.34, "elapsed_time": "6:48:29", "remaining_time": "2:21:01", "throughput": 2334.77, "total_tokens": 57224288} {"current_steps": 29740, "total_steps": 40000, "loss": 0.0012, "lr": 7.6883917208308e-06, "epoch": 4.8516192185333225, "percentage": 74.35, "elapsed_time": "6:48:31", "remaining_time": "2:20:56", "throughput": 2334.93, "total_tokens": 57233248} {"current_steps": 29745, "total_steps": 40000, "loss": 0.0004, "lr": 7.681310216298778e-06, "epoch": 4.85243494575414, "percentage": 74.36, "elapsed_time": "6:48:33", "remaining_time": "2:20:51", "throughput": 2335.15, "total_tokens": 57243456} {"current_steps": 29750, "total_steps": 40000, "loss": 0.0004, "lr": 7.674231382526367e-06, "epoch": 4.853250672974957, "percentage": 74.38, "elapsed_time": "6:48:35", "remaining_time": "2:20:46", "throughput": 2335.36, "total_tokens": 57253408} {"current_steps": 29755, "total_steps": 40000, "loss": 0.0021, "lr": 7.667155220605198e-06, "epoch": 4.854066400195775, "percentage": 74.39, "elapsed_time": "6:48:37", "remaining_time": "2:20:41", "throughput": 2335.56, "total_tokens": 57263072} {"current_steps": 29760, "total_steps": 40000, "loss": 0.1532, "lr": 7.660081731626515e-06, "epoch": 4.854882127416592, "percentage": 74.4, "elapsed_time": "6:48:39", "remaining_time": "2:20:36", "throughput": 2335.79, "total_tokens": 57273552} {"current_steps": 29765, "total_steps": 40000, "loss": 0.0128, "lr": 7.653010916681141e-06, "epoch": 4.855697854637409, "percentage": 74.41, "elapsed_time": "6:48:42", "remaining_time": "2:20:32", "throughput": 2335.97, "total_tokens": 57282752} {"current_steps": 29770, "total_steps": 40000, "loss": 0.0703, "lr": 7.645942776859472e-06, "epoch": 4.856513581858227, "percentage": 74.42, "elapsed_time": "6:48:44", "remaining_time": "2:20:27", "throughput": 2336.14, "total_tokens": 57291872} {"current_steps": 29775, "total_steps": 40000, "loss": 0.0004, "lr": 7.63887731325152e-06, "epoch": 4.857329309079044, "percentage": 74.44, "elapsed_time": "6:48:46", "remaining_time": "2:20:22", "throughput": 2336.3, "total_tokens": 57300496} {"current_steps": 29780, "total_steps": 40000, "loss": 0.0006, "lr": 7.63181452694685e-06, "epoch": 4.858145036299861, "percentage": 74.45, "elapsed_time": "6:48:48", "remaining_time": "2:20:17", "throughput": 2336.47, "total_tokens": 57309616} {"current_steps": 29785, "total_steps": 40000, "loss": 0.0018, "lr": 7.624754419034644e-06, "epoch": 4.858960763520678, "percentage": 74.46, "elapsed_time": "6:48:50", "remaining_time": "2:20:12", "throughput": 2336.65, "total_tokens": 57318752} {"current_steps": 29790, "total_steps": 40000, "loss": 0.0003, "lr": 7.6176969906036645e-06, "epoch": 4.859776490741496, "percentage": 74.48, "elapsed_time": "6:48:52", "remaining_time": "2:20:08", "throughput": 2336.85, "total_tokens": 57328608} {"current_steps": 29795, "total_steps": 40000, "loss": 0.1093, "lr": 7.610642242742242e-06, "epoch": 4.8605922179623136, "percentage": 74.49, "elapsed_time": "6:48:54", "remaining_time": "2:20:03", "throughput": 2337.02, "total_tokens": 57337552} {"current_steps": 29800, "total_steps": 40000, "loss": 0.002, "lr": 7.603590176538322e-06, "epoch": 4.861407945183131, "percentage": 74.5, "elapsed_time": "6:48:56", "remaining_time": "2:19:58", "throughput": 2337.24, "total_tokens": 57347776} {"current_steps": 29800, "total_steps": 40000, "eval_loss": 0.3229916989803314, "epoch": 4.861407945183131, "percentage": 74.5, "elapsed_time": "6:50:17", "remaining_time": "2:20:26", "throughput": 2329.56, "total_tokens": 57347776} {"current_steps": 29805, "total_steps": 40000, "loss": 0.0259, "lr": 7.596540793079404e-06, "epoch": 4.862223672403948, "percentage": 74.51, "elapsed_time": "6:50:21", "remaining_time": "2:20:21", "throughput": 2329.64, "total_tokens": 57358224} {"current_steps": 29810, "total_steps": 40000, "loss": 0.0002, "lr": 7.5894940934526125e-06, "epoch": 4.863039399624766, "percentage": 74.52, "elapsed_time": "6:50:23", "remaining_time": "2:20:16", "throughput": 2329.79, "total_tokens": 57366768} {"current_steps": 29815, "total_steps": 40000, "loss": 0.0927, "lr": 7.582450078744621e-06, "epoch": 4.863855126845583, "percentage": 74.54, "elapsed_time": "6:50:25", "remaining_time": "2:20:12", "throughput": 2330.01, "total_tokens": 57377024} {"current_steps": 29820, "total_steps": 40000, "loss": 0.0899, "lr": 7.575408750041707e-06, "epoch": 4.8646708540664, "percentage": 74.55, "elapsed_time": "6:50:27", "remaining_time": "2:20:07", "throughput": 2330.22, "total_tokens": 57386976} {"current_steps": 29825, "total_steps": 40000, "loss": 0.0002, "lr": 7.568370108429732e-06, "epoch": 4.865486581287217, "percentage": 74.56, "elapsed_time": "6:50:29", "remaining_time": "2:20:02", "throughput": 2330.41, "total_tokens": 57396480} {"current_steps": 29830, "total_steps": 40000, "loss": 0.0471, "lr": 7.561334154994154e-06, "epoch": 4.866302308508035, "percentage": 74.58, "elapsed_time": "6:50:31", "remaining_time": "2:19:57", "throughput": 2330.64, "total_tokens": 57407072} {"current_steps": 29835, "total_steps": 40000, "loss": 0.0879, "lr": 7.55430089081999e-06, "epoch": 4.867118035728852, "percentage": 74.59, "elapsed_time": "6:50:33", "remaining_time": "2:19:52", "throughput": 2330.84, "total_tokens": 57416768} {"current_steps": 29840, "total_steps": 40000, "loss": 0.0693, "lr": 7.547270316991864e-06, "epoch": 4.8679337629496695, "percentage": 74.6, "elapsed_time": "6:50:35", "remaining_time": "2:19:48", "throughput": 2331.02, "total_tokens": 57426256} {"current_steps": 29845, "total_steps": 40000, "loss": 0.0152, "lr": 7.5402424345939884e-06, "epoch": 4.868749490170487, "percentage": 74.61, "elapsed_time": "6:50:37", "remaining_time": "2:19:43", "throughput": 2331.24, "total_tokens": 57436480} {"current_steps": 29850, "total_steps": 40000, "loss": 0.0843, "lr": 7.533217244710133e-06, "epoch": 4.869565217391305, "percentage": 74.62, "elapsed_time": "6:50:39", "remaining_time": "2:19:38", "throughput": 2331.4, "total_tokens": 57445200} {"current_steps": 29855, "total_steps": 40000, "loss": 0.0006, "lr": 7.52619474842369e-06, "epoch": 4.870380944612122, "percentage": 74.64, "elapsed_time": "6:50:41", "remaining_time": "2:19:33", "throughput": 2331.59, "total_tokens": 57454704} {"current_steps": 29860, "total_steps": 40000, "loss": 0.0002, "lr": 7.519174946817597e-06, "epoch": 4.871196671832939, "percentage": 74.65, "elapsed_time": "6:50:43", "remaining_time": "2:19:28", "throughput": 2331.81, "total_tokens": 57464864} {"current_steps": 29865, "total_steps": 40000, "loss": 0.0002, "lr": 7.512157840974407e-06, "epoch": 4.872012399053756, "percentage": 74.66, "elapsed_time": "6:50:46", "remaining_time": "2:19:23", "throughput": 2332.02, "total_tokens": 57474880} {"current_steps": 29870, "total_steps": 40000, "loss": 0.0006, "lr": 7.5051434319762496e-06, "epoch": 4.872828126274574, "percentage": 74.67, "elapsed_time": "6:50:48", "remaining_time": "2:19:19", "throughput": 2332.17, "total_tokens": 57483408} {"current_steps": 29875, "total_steps": 40000, "loss": 0.0942, "lr": 7.498131720904822e-06, "epoch": 4.873643853495391, "percentage": 74.69, "elapsed_time": "6:50:50", "remaining_time": "2:19:14", "throughput": 2332.35, "total_tokens": 57492800} {"current_steps": 29880, "total_steps": 40000, "loss": 0.1977, "lr": 7.491122708841433e-06, "epoch": 4.874459580716208, "percentage": 74.7, "elapsed_time": "6:50:52", "remaining_time": "2:19:09", "throughput": 2332.56, "total_tokens": 57502720} {"current_steps": 29885, "total_steps": 40000, "loss": 0.0005, "lr": 7.4841163968669524e-06, "epoch": 4.8752753079370255, "percentage": 74.71, "elapsed_time": "6:50:54", "remaining_time": "2:19:04", "throughput": 2332.75, "total_tokens": 57512272} {"current_steps": 29890, "total_steps": 40000, "loss": 0.1864, "lr": 7.4771127860618355e-06, "epoch": 4.8760910351578435, "percentage": 74.72, "elapsed_time": "6:50:56", "remaining_time": "2:18:59", "throughput": 2332.89, "total_tokens": 57520496} {"current_steps": 29895, "total_steps": 40000, "loss": 0.0008, "lr": 7.470111877506139e-06, "epoch": 4.876906762378661, "percentage": 74.74, "elapsed_time": "6:50:58", "remaining_time": "2:18:54", "throughput": 2333.03, "total_tokens": 57528912} {"current_steps": 29900, "total_steps": 40000, "loss": 0.0008, "lr": 7.463113672279479e-06, "epoch": 4.877722489599478, "percentage": 74.75, "elapsed_time": "6:51:00", "remaining_time": "2:18:50", "throughput": 2333.2, "total_tokens": 57538048} {"current_steps": 29905, "total_steps": 40000, "loss": 0.0009, "lr": 7.456118171461071e-06, "epoch": 4.878538216820296, "percentage": 74.76, "elapsed_time": "6:51:02", "remaining_time": "2:18:45", "throughput": 2333.38, "total_tokens": 57547104} {"current_steps": 29910, "total_steps": 40000, "loss": 0.0009, "lr": 7.449125376129721e-06, "epoch": 4.879353944041113, "percentage": 74.78, "elapsed_time": "6:51:04", "remaining_time": "2:18:40", "throughput": 2333.55, "total_tokens": 57556352} {"current_steps": 29915, "total_steps": 40000, "loss": 0.0003, "lr": 7.442135287363788e-06, "epoch": 4.88016967126193, "percentage": 74.79, "elapsed_time": "6:51:06", "remaining_time": "2:18:35", "throughput": 2333.75, "total_tokens": 57566032} {"current_steps": 29920, "total_steps": 40000, "loss": 0.0002, "lr": 7.435147906241247e-06, "epoch": 4.880985398482747, "percentage": 74.8, "elapsed_time": "6:51:08", "remaining_time": "2:18:30", "throughput": 2333.97, "total_tokens": 57576320} {"current_steps": 29925, "total_steps": 40000, "loss": 0.0009, "lr": 7.428163233839624e-06, "epoch": 4.881801125703564, "percentage": 74.81, "elapsed_time": "6:51:10", "remaining_time": "2:18:26", "throughput": 2334.13, "total_tokens": 57584960} {"current_steps": 29930, "total_steps": 40000, "loss": 0.0701, "lr": 7.4211812712360525e-06, "epoch": 4.882616852924382, "percentage": 74.83, "elapsed_time": "6:51:12", "remaining_time": "2:18:21", "throughput": 2334.32, "total_tokens": 57594624} {"current_steps": 29935, "total_steps": 40000, "loss": 0.1055, "lr": 7.4142020195072464e-06, "epoch": 4.883432580145199, "percentage": 74.84, "elapsed_time": "6:51:15", "remaining_time": "2:18:16", "throughput": 2334.48, "total_tokens": 57603424} {"current_steps": 29940, "total_steps": 40000, "loss": 0.0067, "lr": 7.407225479729479e-06, "epoch": 4.884248307366017, "percentage": 74.85, "elapsed_time": "6:51:17", "remaining_time": "2:18:11", "throughput": 2334.67, "total_tokens": 57612944} {"current_steps": 29945, "total_steps": 40000, "loss": 0.0002, "lr": 7.400251652978632e-06, "epoch": 4.885064034586835, "percentage": 74.86, "elapsed_time": "6:51:19", "remaining_time": "2:18:06", "throughput": 2334.88, "total_tokens": 57623024} {"current_steps": 29950, "total_steps": 40000, "loss": 0.0335, "lr": 7.393280540330147e-06, "epoch": 4.885879761807652, "percentage": 74.88, "elapsed_time": "6:51:21", "remaining_time": "2:18:02", "throughput": 2335.1, "total_tokens": 57633120} {"current_steps": 29955, "total_steps": 40000, "loss": 0.0044, "lr": 7.386312142859069e-06, "epoch": 4.886695489028469, "percentage": 74.89, "elapsed_time": "6:51:23", "remaining_time": "2:17:57", "throughput": 2335.24, "total_tokens": 57641552} {"current_steps": 29960, "total_steps": 40000, "loss": 0.0027, "lr": 7.379346461640008e-06, "epoch": 4.887511216249286, "percentage": 74.9, "elapsed_time": "6:51:25", "remaining_time": "2:17:52", "throughput": 2335.46, "total_tokens": 57651888} {"current_steps": 29965, "total_steps": 40000, "loss": 0.0822, "lr": 7.372383497747149e-06, "epoch": 4.888326943470103, "percentage": 74.91, "elapsed_time": "6:51:27", "remaining_time": "2:17:47", "throughput": 2335.61, "total_tokens": 57660416} {"current_steps": 29970, "total_steps": 40000, "loss": 0.0004, "lr": 7.3654232522542775e-06, "epoch": 4.889142670690921, "percentage": 74.92, "elapsed_time": "6:51:29", "remaining_time": "2:17:42", "throughput": 2335.8, "total_tokens": 57669904} {"current_steps": 29975, "total_steps": 40000, "loss": 0.0325, "lr": 7.358465726234756e-06, "epoch": 4.889958397911738, "percentage": 74.94, "elapsed_time": "6:51:31", "remaining_time": "2:17:38", "throughput": 2335.97, "total_tokens": 57678800} {"current_steps": 29980, "total_steps": 40000, "loss": 0.0003, "lr": 7.351510920761512e-06, "epoch": 4.890774125132555, "percentage": 74.95, "elapsed_time": "6:51:33", "remaining_time": "2:17:33", "throughput": 2336.17, "total_tokens": 57688768} {"current_steps": 29985, "total_steps": 40000, "loss": 0.0004, "lr": 7.344558836907067e-06, "epoch": 4.891589852353373, "percentage": 74.96, "elapsed_time": "6:51:35", "remaining_time": "2:17:28", "throughput": 2336.39, "total_tokens": 57698960} {"current_steps": 29990, "total_steps": 40000, "loss": 0.0005, "lr": 7.3376094757435285e-06, "epoch": 4.8924055795741905, "percentage": 74.98, "elapsed_time": "6:51:37", "remaining_time": "2:17:23", "throughput": 2336.6, "total_tokens": 57708896} {"current_steps": 29995, "total_steps": 40000, "loss": 0.0099, "lr": 7.330662838342561e-06, "epoch": 4.893221306795008, "percentage": 74.99, "elapsed_time": "6:51:39", "remaining_time": "2:17:18", "throughput": 2336.79, "total_tokens": 57718464} {"current_steps": 30000, "total_steps": 40000, "loss": 0.0001, "lr": 7.323718925775438e-06, "epoch": 4.894037034015825, "percentage": 75.0, "elapsed_time": "6:51:41", "remaining_time": "2:17:13", "throughput": 2336.94, "total_tokens": 57727072} {"current_steps": 30000, "total_steps": 40000, "eval_loss": 0.31458866596221924, "epoch": 4.894037034015825, "percentage": 75.0, "elapsed_time": "6:53:02", "remaining_time": "2:17:40", "throughput": 2329.31, "total_tokens": 57727072} {"current_steps": 30005, "total_steps": 40000, "loss": 0.1091, "lr": 7.316777739112985e-06, "epoch": 4.894852761236643, "percentage": 75.01, "elapsed_time": "6:53:08", "remaining_time": "2:17:37", "throughput": 2329.11, "total_tokens": 57734992} {"current_steps": 30010, "total_steps": 40000, "loss": 0.002, "lr": 7.309839279425626e-06, "epoch": 4.89566848845746, "percentage": 75.02, "elapsed_time": "6:53:10", "remaining_time": "2:17:32", "throughput": 2329.34, "total_tokens": 57745408} {"current_steps": 30015, "total_steps": 40000, "loss": 0.0004, "lr": 7.302903547783366e-06, "epoch": 4.896484215678277, "percentage": 75.04, "elapsed_time": "6:53:12", "remaining_time": "2:17:27", "throughput": 2329.56, "total_tokens": 57755888} {"current_steps": 30020, "total_steps": 40000, "loss": 0.0871, "lr": 7.2959705452557644e-06, "epoch": 4.897299942899094, "percentage": 75.05, "elapsed_time": "6:53:14", "remaining_time": "2:17:22", "throughput": 2329.82, "total_tokens": 57766992} {"current_steps": 30025, "total_steps": 40000, "loss": 0.0005, "lr": 7.289040272911996e-06, "epoch": 4.898115670119912, "percentage": 75.06, "elapsed_time": "6:53:16", "remaining_time": "2:17:18", "throughput": 2330.05, "total_tokens": 57777712} {"current_steps": 30030, "total_steps": 40000, "loss": 0.0833, "lr": 7.282112731820789e-06, "epoch": 4.898931397340729, "percentage": 75.08, "elapsed_time": "6:53:18", "remaining_time": "2:17:13", "throughput": 2330.29, "total_tokens": 57788448} {"current_steps": 30035, "total_steps": 40000, "loss": 0.0002, "lr": 7.275187923050447e-06, "epoch": 4.8997471245615465, "percentage": 75.09, "elapsed_time": "6:53:20", "remaining_time": "2:17:08", "throughput": 2330.49, "total_tokens": 57798192} {"current_steps": 30040, "total_steps": 40000, "loss": 0.0008, "lr": 7.268265847668879e-06, "epoch": 4.900562851782364, "percentage": 75.1, "elapsed_time": "6:53:22", "remaining_time": "2:17:03", "throughput": 2330.71, "total_tokens": 57808400} {"current_steps": 30045, "total_steps": 40000, "loss": 0.0143, "lr": 7.261346506743538e-06, "epoch": 4.901378579003182, "percentage": 75.11, "elapsed_time": "6:53:25", "remaining_time": "2:16:58", "throughput": 2330.93, "total_tokens": 57818720} {"current_steps": 30050, "total_steps": 40000, "loss": 0.1486, "lr": 7.254429901341486e-06, "epoch": 4.902194306223999, "percentage": 75.12, "elapsed_time": "6:53:27", "remaining_time": "2:16:53", "throughput": 2331.11, "total_tokens": 57828032} {"current_steps": 30055, "total_steps": 40000, "loss": 0.011, "lr": 7.247516032529356e-06, "epoch": 4.903010033444816, "percentage": 75.14, "elapsed_time": "6:53:29", "remaining_time": "2:16:49", "throughput": 2331.27, "total_tokens": 57836992} {"current_steps": 30060, "total_steps": 40000, "loss": 0.0009, "lr": 7.240604901373338e-06, "epoch": 4.903825760665633, "percentage": 75.15, "elapsed_time": "6:53:31", "remaining_time": "2:16:44", "throughput": 2331.45, "total_tokens": 57846080} {"current_steps": 30065, "total_steps": 40000, "loss": 0.027, "lr": 7.233696508939223e-06, "epoch": 4.904641487886451, "percentage": 75.16, "elapsed_time": "6:53:33", "remaining_time": "2:16:39", "throughput": 2331.64, "total_tokens": 57855776} {"current_steps": 30070, "total_steps": 40000, "loss": 0.0006, "lr": 7.226790856292376e-06, "epoch": 4.905457215107268, "percentage": 75.17, "elapsed_time": "6:53:35", "remaining_time": "2:16:34", "throughput": 2331.85, "total_tokens": 57865872} {"current_steps": 30075, "total_steps": 40000, "loss": 0.0006, "lr": 7.219887944497727e-06, "epoch": 4.906272942328085, "percentage": 75.19, "elapsed_time": "6:53:37", "remaining_time": "2:16:29", "throughput": 2332.06, "total_tokens": 57875728} {"current_steps": 30080, "total_steps": 40000, "loss": 0.0002, "lr": 7.2129877746198e-06, "epoch": 4.907088669548903, "percentage": 75.2, "elapsed_time": "6:53:39", "remaining_time": "2:16:25", "throughput": 2332.22, "total_tokens": 57884688} {"current_steps": 30085, "total_steps": 40000, "loss": 0.0821, "lr": 7.20609034772268e-06, "epoch": 4.9079043967697205, "percentage": 75.21, "elapsed_time": "6:53:41", "remaining_time": "2:16:20", "throughput": 2332.44, "total_tokens": 57894944} {"current_steps": 30090, "total_steps": 40000, "loss": 0.1919, "lr": 7.19919566487004e-06, "epoch": 4.908720123990538, "percentage": 75.22, "elapsed_time": "6:53:43", "remaining_time": "2:16:15", "throughput": 2332.63, "total_tokens": 57904448} {"current_steps": 30095, "total_steps": 40000, "loss": 0.0031, "lr": 7.192303727125132e-06, "epoch": 4.909535851211355, "percentage": 75.24, "elapsed_time": "6:53:45", "remaining_time": "2:16:10", "throughput": 2332.82, "total_tokens": 57914096} {"current_steps": 30100, "total_steps": 40000, "loss": 0.0461, "lr": 7.185414535550777e-06, "epoch": 4.910351578432172, "percentage": 75.25, "elapsed_time": "6:53:47", "remaining_time": "2:16:05", "throughput": 2333.04, "total_tokens": 57924352} {"current_steps": 30105, "total_steps": 40000, "loss": 0.0715, "lr": 7.178528091209363e-06, "epoch": 4.91116730565299, "percentage": 75.26, "elapsed_time": "6:53:49", "remaining_time": "2:16:01", "throughput": 2333.27, "total_tokens": 57935024} {"current_steps": 30110, "total_steps": 40000, "loss": 0.0016, "lr": 7.171644395162888e-06, "epoch": 4.911983032873807, "percentage": 75.28, "elapsed_time": "6:53:51", "remaining_time": "2:15:56", "throughput": 2333.51, "total_tokens": 57945696} {"current_steps": 30115, "total_steps": 40000, "loss": 0.0544, "lr": 7.164763448472881e-06, "epoch": 4.912798760094624, "percentage": 75.29, "elapsed_time": "6:53:54", "remaining_time": "2:15:51", "throughput": 2333.73, "total_tokens": 57955984} {"current_steps": 30120, "total_steps": 40000, "loss": 0.0005, "lr": 7.157885252200491e-06, "epoch": 4.913614487315442, "percentage": 75.3, "elapsed_time": "6:53:56", "remaining_time": "2:15:46", "throughput": 2333.93, "total_tokens": 57965856} {"current_steps": 30125, "total_steps": 40000, "loss": 0.1112, "lr": 7.151009807406403e-06, "epoch": 4.914430214536259, "percentage": 75.31, "elapsed_time": "6:53:58", "remaining_time": "2:15:41", "throughput": 2334.12, "total_tokens": 57975328} {"current_steps": 30130, "total_steps": 40000, "loss": 0.0115, "lr": 7.144137115150909e-06, "epoch": 4.915245941757076, "percentage": 75.33, "elapsed_time": "6:54:00", "remaining_time": "2:15:37", "throughput": 2334.37, "total_tokens": 57986352} {"current_steps": 30135, "total_steps": 40000, "loss": 0.0108, "lr": 7.1372671764938725e-06, "epoch": 4.9160616689778935, "percentage": 75.34, "elapsed_time": "6:54:02", "remaining_time": "2:15:32", "throughput": 2334.6, "total_tokens": 57997008} {"current_steps": 30140, "total_steps": 40000, "loss": 0.0493, "lr": 7.130399992494705e-06, "epoch": 4.916877396198711, "percentage": 75.35, "elapsed_time": "6:54:04", "remaining_time": "2:15:27", "throughput": 2334.74, "total_tokens": 58005376} {"current_steps": 30145, "total_steps": 40000, "loss": 0.0012, "lr": 7.123535564212419e-06, "epoch": 4.917693123419529, "percentage": 75.36, "elapsed_time": "6:54:06", "remaining_time": "2:15:22", "throughput": 2334.95, "total_tokens": 58015488} {"current_steps": 30150, "total_steps": 40000, "loss": 0.14, "lr": 7.116673892705611e-06, "epoch": 4.918508850640346, "percentage": 75.38, "elapsed_time": "6:54:08", "remaining_time": "2:15:18", "throughput": 2335.16, "total_tokens": 58025536} {"current_steps": 30155, "total_steps": 40000, "loss": 0.0361, "lr": 7.109814979032415e-06, "epoch": 4.919324577861163, "percentage": 75.39, "elapsed_time": "6:54:10", "remaining_time": "2:15:13", "throughput": 2335.38, "total_tokens": 58035760} {"current_steps": 30160, "total_steps": 40000, "loss": 0.2172, "lr": 7.102958824250577e-06, "epoch": 4.920140305081981, "percentage": 75.4, "elapsed_time": "6:54:12", "remaining_time": "2:15:08", "throughput": 2335.55, "total_tokens": 58044944} {"current_steps": 30165, "total_steps": 40000, "loss": 0.0471, "lr": 7.096105429417393e-06, "epoch": 4.920956032302798, "percentage": 75.41, "elapsed_time": "6:54:14", "remaining_time": "2:15:03", "throughput": 2335.75, "total_tokens": 58054672} {"current_steps": 30170, "total_steps": 40000, "loss": 0.0011, "lr": 7.0892547955897506e-06, "epoch": 4.921771759523615, "percentage": 75.42, "elapsed_time": "6:54:16", "remaining_time": "2:14:58", "throughput": 2335.9, "total_tokens": 58063248} {"current_steps": 30175, "total_steps": 40000, "loss": 0.0044, "lr": 7.0824069238241e-06, "epoch": 4.922587486744432, "percentage": 75.44, "elapsed_time": "6:54:18", "remaining_time": "2:14:54", "throughput": 2336.1, "total_tokens": 58073168} {"current_steps": 30180, "total_steps": 40000, "loss": 0.1065, "lr": 7.075561815176462e-06, "epoch": 4.92340321396525, "percentage": 75.45, "elapsed_time": "6:54:21", "remaining_time": "2:14:49", "throughput": 2336.27, "total_tokens": 58082112} {"current_steps": 30185, "total_steps": 40000, "loss": 0.1087, "lr": 7.068719470702445e-06, "epoch": 4.9242189411860675, "percentage": 75.46, "elapsed_time": "6:54:23", "remaining_time": "2:14:44", "throughput": 2336.44, "total_tokens": 58091280} {"current_steps": 30190, "total_steps": 40000, "loss": 0.001, "lr": 7.061879891457229e-06, "epoch": 4.925034668406885, "percentage": 75.48, "elapsed_time": "6:54:25", "remaining_time": "2:14:39", "throughput": 2336.66, "total_tokens": 58101456} {"current_steps": 30195, "total_steps": 40000, "loss": 0.0845, "lr": 7.0550430784955515e-06, "epoch": 4.925850395627702, "percentage": 75.49, "elapsed_time": "6:54:27", "remaining_time": "2:14:34", "throughput": 2336.82, "total_tokens": 58110432} {"current_steps": 30200, "total_steps": 40000, "loss": 0.0019, "lr": 7.048209032871752e-06, "epoch": 4.92666612284852, "percentage": 75.5, "elapsed_time": "6:54:29", "remaining_time": "2:14:30", "throughput": 2337.01, "total_tokens": 58119904} {"current_steps": 30200, "total_steps": 40000, "eval_loss": 0.2772332429885864, "epoch": 4.92666612284852, "percentage": 75.5, "elapsed_time": "6:55:50", "remaining_time": "2:14:56", "throughput": 2329.45, "total_tokens": 58119904} {"current_steps": 30205, "total_steps": 40000, "loss": 0.0037, "lr": 7.0413777556397055e-06, "epoch": 4.927481850069337, "percentage": 75.51, "elapsed_time": "6:55:54", "remaining_time": "2:14:52", "throughput": 2329.43, "total_tokens": 58129072} {"current_steps": 30210, "total_steps": 40000, "loss": 0.0008, "lr": 7.0345492478528925e-06, "epoch": 4.928297577290154, "percentage": 75.52, "elapsed_time": "6:55:56", "remaining_time": "2:14:47", "throughput": 2329.62, "total_tokens": 58138832} {"current_steps": 30215, "total_steps": 40000, "loss": 0.0005, "lr": 7.02772351056436e-06, "epoch": 4.929113304510971, "percentage": 75.54, "elapsed_time": "6:55:58", "remaining_time": "2:14:42", "throughput": 2329.82, "total_tokens": 58148592} {"current_steps": 30220, "total_steps": 40000, "loss": 0.0738, "lr": 7.020900544826709e-06, "epoch": 4.929929031731789, "percentage": 75.55, "elapsed_time": "6:56:00", "remaining_time": "2:14:37", "throughput": 2330.03, "total_tokens": 58158576} {"current_steps": 30225, "total_steps": 40000, "loss": 0.1231, "lr": 7.014080351692134e-06, "epoch": 4.930744758952606, "percentage": 75.56, "elapsed_time": "6:56:02", "remaining_time": "2:14:33", "throughput": 2330.23, "total_tokens": 58168384} {"current_steps": 30230, "total_steps": 40000, "loss": 0.0492, "lr": 7.0072629322124024e-06, "epoch": 4.9315604861734235, "percentage": 75.58, "elapsed_time": "6:56:04", "remaining_time": "2:14:28", "throughput": 2330.41, "total_tokens": 58177856} {"current_steps": 30235, "total_steps": 40000, "loss": 0.0012, "lr": 7.000448287438827e-06, "epoch": 4.932376213394241, "percentage": 75.59, "elapsed_time": "6:56:06", "remaining_time": "2:14:23", "throughput": 2330.61, "total_tokens": 58187584} {"current_steps": 30240, "total_steps": 40000, "loss": 0.1283, "lr": 6.993636418422331e-06, "epoch": 4.933191940615059, "percentage": 75.6, "elapsed_time": "6:56:08", "remaining_time": "2:14:18", "throughput": 2330.81, "total_tokens": 58197312} {"current_steps": 30245, "total_steps": 40000, "loss": 0.0655, "lr": 6.986827326213383e-06, "epoch": 4.934007667835876, "percentage": 75.61, "elapsed_time": "6:56:10", "remaining_time": "2:14:13", "throughput": 2330.94, "total_tokens": 58205520} {"current_steps": 30250, "total_steps": 40000, "loss": 0.0693, "lr": 6.9800210118620205e-06, "epoch": 4.934823395056693, "percentage": 75.62, "elapsed_time": "6:56:12", "remaining_time": "2:14:09", "throughput": 2331.09, "total_tokens": 58214000} {"current_steps": 30255, "total_steps": 40000, "loss": 0.0592, "lr": 6.973217476417876e-06, "epoch": 4.935639122277511, "percentage": 75.64, "elapsed_time": "6:56:14", "remaining_time": "2:14:04", "throughput": 2331.31, "total_tokens": 58224400} {"current_steps": 30260, "total_steps": 40000, "loss": 0.1248, "lr": 6.96641672093013e-06, "epoch": 4.936454849498328, "percentage": 75.65, "elapsed_time": "6:56:17", "remaining_time": "2:13:59", "throughput": 2331.53, "total_tokens": 58234720} {"current_steps": 30265, "total_steps": 40000, "loss": 0.0295, "lr": 6.95961874644755e-06, "epoch": 4.937270576719145, "percentage": 75.66, "elapsed_time": "6:56:19", "remaining_time": "2:13:54", "throughput": 2331.75, "total_tokens": 58244928} {"current_steps": 30270, "total_steps": 40000, "loss": 0.0155, "lr": 6.952823554018476e-06, "epoch": 4.938086303939962, "percentage": 75.67, "elapsed_time": "6:56:21", "remaining_time": "2:13:49", "throughput": 2331.95, "total_tokens": 58254912} {"current_steps": 30275, "total_steps": 40000, "loss": 0.0191, "lr": 6.946031144690798e-06, "epoch": 4.938902031160779, "percentage": 75.69, "elapsed_time": "6:56:23", "remaining_time": "2:13:45", "throughput": 2332.11, "total_tokens": 58263712} {"current_steps": 30280, "total_steps": 40000, "loss": 0.0027, "lr": 6.939241519512005e-06, "epoch": 4.939717758381597, "percentage": 75.7, "elapsed_time": "6:56:25", "remaining_time": "2:13:40", "throughput": 2332.31, "total_tokens": 58273504} {"current_steps": 30285, "total_steps": 40000, "loss": 0.0255, "lr": 6.932454679529129e-06, "epoch": 4.940533485602415, "percentage": 75.71, "elapsed_time": "6:56:27", "remaining_time": "2:13:35", "throughput": 2332.53, "total_tokens": 58283808} {"current_steps": 30290, "total_steps": 40000, "loss": 0.0366, "lr": 6.925670625788791e-06, "epoch": 4.941349212823232, "percentage": 75.72, "elapsed_time": "6:56:29", "remaining_time": "2:13:30", "throughput": 2332.75, "total_tokens": 58294192} {"current_steps": 30295, "total_steps": 40000, "loss": 0.099, "lr": 6.918889359337186e-06, "epoch": 4.94216494004405, "percentage": 75.74, "elapsed_time": "6:56:31", "remaining_time": "2:13:26", "throughput": 2332.97, "total_tokens": 58304512} {"current_steps": 30300, "total_steps": 40000, "loss": 0.0003, "lr": 6.912110881220058e-06, "epoch": 4.942980667264867, "percentage": 75.75, "elapsed_time": "6:56:33", "remaining_time": "2:13:21", "throughput": 2333.15, "total_tokens": 58313760} {"current_steps": 30305, "total_steps": 40000, "loss": 0.0039, "lr": 6.905335192482735e-06, "epoch": 4.943796394485684, "percentage": 75.76, "elapsed_time": "6:56:35", "remaining_time": "2:13:16", "throughput": 2333.34, "total_tokens": 58323328} {"current_steps": 30310, "total_steps": 40000, "loss": 0.0466, "lr": 6.8985622941701275e-06, "epoch": 4.944612121706501, "percentage": 75.78, "elapsed_time": "6:56:37", "remaining_time": "2:13:11", "throughput": 2333.57, "total_tokens": 58333872} {"current_steps": 30315, "total_steps": 40000, "loss": 0.0007, "lr": 6.89179218732669e-06, "epoch": 4.945427848927318, "percentage": 75.79, "elapsed_time": "6:56:39", "remaining_time": "2:13:06", "throughput": 2333.72, "total_tokens": 58342624} {"current_steps": 30320, "total_steps": 40000, "loss": 0.0132, "lr": 6.8850248729964595e-06, "epoch": 4.946243576148136, "percentage": 75.8, "elapsed_time": "6:56:41", "remaining_time": "2:13:02", "throughput": 2333.91, "total_tokens": 58352272} {"current_steps": 30325, "total_steps": 40000, "loss": 0.0006, "lr": 6.8782603522230314e-06, "epoch": 4.947059303368953, "percentage": 75.81, "elapsed_time": "6:56:43", "remaining_time": "2:12:57", "throughput": 2334.06, "total_tokens": 58360704} {"current_steps": 30330, "total_steps": 40000, "loss": 0.0114, "lr": 6.871498626049591e-06, "epoch": 4.9478750305897705, "percentage": 75.83, "elapsed_time": "6:56:46", "remaining_time": "2:12:52", "throughput": 2334.25, "total_tokens": 58370464} {"current_steps": 30335, "total_steps": 40000, "loss": 0.1579, "lr": 6.8647396955188875e-06, "epoch": 4.9486907578105885, "percentage": 75.84, "elapsed_time": "6:56:48", "remaining_time": "2:12:47", "throughput": 2334.47, "total_tokens": 58380688} {"current_steps": 30340, "total_steps": 40000, "loss": 0.0003, "lr": 6.857983561673218e-06, "epoch": 4.949506485031406, "percentage": 75.85, "elapsed_time": "6:56:50", "remaining_time": "2:12:43", "throughput": 2334.61, "total_tokens": 58389168} {"current_steps": 30345, "total_steps": 40000, "loss": 0.0167, "lr": 6.851230225554467e-06, "epoch": 4.950322212252223, "percentage": 75.86, "elapsed_time": "6:56:52", "remaining_time": "2:12:38", "throughput": 2334.79, "total_tokens": 58398336} {"current_steps": 30350, "total_steps": 40000, "loss": 0.0021, "lr": 6.8444796882040946e-06, "epoch": 4.95113793947304, "percentage": 75.88, "elapsed_time": "6:56:54", "remaining_time": "2:12:33", "throughput": 2334.96, "total_tokens": 58407472} {"current_steps": 30355, "total_steps": 40000, "loss": 0.0383, "lr": 6.837731950663106e-06, "epoch": 4.951953666693858, "percentage": 75.89, "elapsed_time": "6:56:56", "remaining_time": "2:12:28", "throughput": 2335.1, "total_tokens": 58415936} {"current_steps": 30360, "total_steps": 40000, "loss": 0.1121, "lr": 6.830987013972098e-06, "epoch": 4.952769393914675, "percentage": 75.9, "elapsed_time": "6:56:58", "remaining_time": "2:12:23", "throughput": 2335.31, "total_tokens": 58425920} {"current_steps": 30365, "total_steps": 40000, "loss": 0.0224, "lr": 6.82424487917121e-06, "epoch": 4.953585121135492, "percentage": 75.91, "elapsed_time": "6:57:00", "remaining_time": "2:12:19", "throughput": 2335.46, "total_tokens": 58434608} {"current_steps": 30370, "total_steps": 40000, "loss": 0.001, "lr": 6.8175055473001735e-06, "epoch": 4.954400848356309, "percentage": 75.92, "elapsed_time": "6:57:02", "remaining_time": "2:12:14", "throughput": 2335.66, "total_tokens": 58444288} {"current_steps": 30375, "total_steps": 40000, "loss": 0.0011, "lr": 6.8107690193982855e-06, "epoch": 4.955216575577127, "percentage": 75.94, "elapsed_time": "6:57:04", "remaining_time": "2:12:09", "throughput": 2335.86, "total_tokens": 58454256} {"current_steps": 30380, "total_steps": 40000, "loss": 0.0185, "lr": 6.804035296504385e-06, "epoch": 4.9560323027979445, "percentage": 75.95, "elapsed_time": "6:57:06", "remaining_time": "2:12:04", "throughput": 2336.07, "total_tokens": 58464288} {"current_steps": 30385, "total_steps": 40000, "loss": 0.0006, "lr": 6.797304379656916e-06, "epoch": 4.956848030018762, "percentage": 75.96, "elapsed_time": "6:57:08", "remaining_time": "2:12:00", "throughput": 2336.29, "total_tokens": 58474720} {"current_steps": 30390, "total_steps": 40000, "loss": 0.0069, "lr": 6.790576269893861e-06, "epoch": 4.957663757239579, "percentage": 75.98, "elapsed_time": "6:57:10", "remaining_time": "2:11:55", "throughput": 2336.53, "total_tokens": 58485472} {"current_steps": 30395, "total_steps": 40000, "loss": 0.0011, "lr": 6.783850968252772e-06, "epoch": 4.958479484460397, "percentage": 75.99, "elapsed_time": "6:57:12", "remaining_time": "2:11:50", "throughput": 2336.71, "total_tokens": 58494768} {"current_steps": 30400, "total_steps": 40000, "loss": 0.0009, "lr": 6.777128475770789e-06, "epoch": 4.959295211681214, "percentage": 76.0, "elapsed_time": "6:57:15", "remaining_time": "2:11:45", "throughput": 2336.87, "total_tokens": 58503776} {"current_steps": 30400, "total_steps": 40000, "eval_loss": 0.2871687114238739, "epoch": 4.959295211681214, "percentage": 76.0, "elapsed_time": "6:58:35", "remaining_time": "2:12:11", "throughput": 2329.36, "total_tokens": 58503776} {"current_steps": 30405, "total_steps": 40000, "loss": 0.0005, "lr": 6.77040879348459e-06, "epoch": 4.960110938902031, "percentage": 76.01, "elapsed_time": "6:58:39", "remaining_time": "2:12:07", "throughput": 2329.44, "total_tokens": 58514288} {"current_steps": 30410, "total_steps": 40000, "loss": 0.0003, "lr": 6.763691922430443e-06, "epoch": 4.960926666122848, "percentage": 76.02, "elapsed_time": "6:58:41", "remaining_time": "2:12:02", "throughput": 2329.64, "total_tokens": 58524288} {"current_steps": 30415, "total_steps": 40000, "loss": 0.1112, "lr": 6.756977863644178e-06, "epoch": 4.961742393343666, "percentage": 76.04, "elapsed_time": "6:58:43", "remaining_time": "2:11:57", "throughput": 2329.85, "total_tokens": 58534400} {"current_steps": 30420, "total_steps": 40000, "loss": 0.0005, "lr": 6.7502666181611804e-06, "epoch": 4.962558120564483, "percentage": 76.05, "elapsed_time": "6:58:45", "remaining_time": "2:11:52", "throughput": 2330.02, "total_tokens": 58543424} {"current_steps": 30425, "total_steps": 40000, "loss": 0.0704, "lr": 6.743558187016405e-06, "epoch": 4.9633738477853, "percentage": 76.06, "elapsed_time": "6:58:47", "remaining_time": "2:11:47", "throughput": 2330.21, "total_tokens": 58553072} {"current_steps": 30430, "total_steps": 40000, "loss": 0.067, "lr": 6.7368525712443925e-06, "epoch": 4.964189575006118, "percentage": 76.08, "elapsed_time": "6:58:49", "remaining_time": "2:11:43", "throughput": 2330.39, "total_tokens": 58562384} {"current_steps": 30435, "total_steps": 40000, "loss": 0.0005, "lr": 6.7301497718792155e-06, "epoch": 4.965005302226936, "percentage": 76.09, "elapsed_time": "6:58:51", "remaining_time": "2:11:38", "throughput": 2330.59, "total_tokens": 58572256} {"current_steps": 30440, "total_steps": 40000, "loss": 0.0359, "lr": 6.723449789954544e-06, "epoch": 4.965821029447753, "percentage": 76.1, "elapsed_time": "6:58:54", "remaining_time": "2:11:33", "throughput": 2330.78, "total_tokens": 58582000} {"current_steps": 30445, "total_steps": 40000, "loss": 0.0983, "lr": 6.716752626503586e-06, "epoch": 4.96663675666857, "percentage": 76.11, "elapsed_time": "6:58:56", "remaining_time": "2:11:28", "throughput": 2330.94, "total_tokens": 58590848} {"current_steps": 30450, "total_steps": 40000, "loss": 0.0005, "lr": 6.710058282559131e-06, "epoch": 4.967452483889387, "percentage": 76.12, "elapsed_time": "6:58:58", "remaining_time": "2:11:24", "throughput": 2331.2, "total_tokens": 58602192} {"current_steps": 30455, "total_steps": 40000, "loss": 0.0007, "lr": 6.703366759153545e-06, "epoch": 4.968268211110205, "percentage": 76.14, "elapsed_time": "6:59:00", "remaining_time": "2:11:19", "throughput": 2331.37, "total_tokens": 58611264} {"current_steps": 30460, "total_steps": 40000, "loss": 0.0002, "lr": 6.6966780573187335e-06, "epoch": 4.969083938331022, "percentage": 76.15, "elapsed_time": "6:59:02", "remaining_time": "2:11:14", "throughput": 2331.56, "total_tokens": 58620720} {"current_steps": 30465, "total_steps": 40000, "loss": 0.0011, "lr": 6.689992178086174e-06, "epoch": 4.969899665551839, "percentage": 76.16, "elapsed_time": "6:59:04", "remaining_time": "2:11:09", "throughput": 2331.78, "total_tokens": 58631104} {"current_steps": 30470, "total_steps": 40000, "loss": 0.1266, "lr": 6.683309122486925e-06, "epoch": 4.970715392772657, "percentage": 76.17, "elapsed_time": "6:59:06", "remaining_time": "2:11:04", "throughput": 2331.97, "total_tokens": 58640672} {"current_steps": 30475, "total_steps": 40000, "loss": 0.0653, "lr": 6.676628891551584e-06, "epoch": 4.971531119993474, "percentage": 76.19, "elapsed_time": "6:59:08", "remaining_time": "2:11:00", "throughput": 2332.16, "total_tokens": 58650480} {"current_steps": 30480, "total_steps": 40000, "loss": 0.0003, "lr": 6.6699514863103385e-06, "epoch": 4.9723468472142915, "percentage": 76.2, "elapsed_time": "6:59:10", "remaining_time": "2:10:55", "throughput": 2332.4, "total_tokens": 58661280} {"current_steps": 30485, "total_steps": 40000, "loss": 0.0599, "lr": 6.663276907792921e-06, "epoch": 4.973162574435109, "percentage": 76.21, "elapsed_time": "6:59:12", "remaining_time": "2:10:50", "throughput": 2332.59, "total_tokens": 58670832} {"current_steps": 30490, "total_steps": 40000, "loss": 0.09, "lr": 6.656605157028634e-06, "epoch": 4.973978301655926, "percentage": 76.22, "elapsed_time": "6:59:14", "remaining_time": "2:10:45", "throughput": 2332.75, "total_tokens": 58679776} {"current_steps": 30495, "total_steps": 40000, "loss": 0.0056, "lr": 6.649936235046358e-06, "epoch": 4.974794028876744, "percentage": 76.24, "elapsed_time": "6:59:16", "remaining_time": "2:10:41", "throughput": 2332.92, "total_tokens": 58688880} {"current_steps": 30500, "total_steps": 40000, "loss": 0.0253, "lr": 6.643270142874508e-06, "epoch": 4.975609756097561, "percentage": 76.25, "elapsed_time": "6:59:18", "remaining_time": "2:10:36", "throughput": 2333.11, "total_tokens": 58698480} {"current_steps": 30505, "total_steps": 40000, "loss": 0.0003, "lr": 6.636606881541094e-06, "epoch": 4.976425483318378, "percentage": 76.26, "elapsed_time": "6:59:20", "remaining_time": "2:10:31", "throughput": 2333.26, "total_tokens": 58706944} {"current_steps": 30510, "total_steps": 40000, "loss": 0.0008, "lr": 6.629946452073662e-06, "epoch": 4.977241210539196, "percentage": 76.28, "elapsed_time": "6:59:23", "remaining_time": "2:10:26", "throughput": 2333.44, "total_tokens": 58716368} {"current_steps": 30515, "total_steps": 40000, "loss": 0.0005, "lr": 6.6232888554993375e-06, "epoch": 4.978056937760013, "percentage": 76.29, "elapsed_time": "6:59:25", "remaining_time": "2:10:22", "throughput": 2333.61, "total_tokens": 58725392} {"current_steps": 30520, "total_steps": 40000, "loss": 0.2031, "lr": 6.616634092844817e-06, "epoch": 4.97887266498083, "percentage": 76.3, "elapsed_time": "6:59:27", "remaining_time": "2:10:17", "throughput": 2333.8, "total_tokens": 58735152} {"current_steps": 30525, "total_steps": 40000, "loss": 0.0012, "lr": 6.609982165136331e-06, "epoch": 4.9796883922016475, "percentage": 76.31, "elapsed_time": "6:59:29", "remaining_time": "2:10:12", "throughput": 2333.97, "total_tokens": 58744224} {"current_steps": 30530, "total_steps": 40000, "loss": 0.0881, "lr": 6.603333073399706e-06, "epoch": 4.9805041194224655, "percentage": 76.33, "elapsed_time": "6:59:31", "remaining_time": "2:10:07", "throughput": 2334.15, "total_tokens": 58753552} {"current_steps": 30535, "total_steps": 40000, "loss": 0.0009, "lr": 6.596686818660308e-06, "epoch": 4.981319846643283, "percentage": 76.34, "elapsed_time": "6:59:33", "remaining_time": "2:10:03", "throughput": 2334.38, "total_tokens": 58764160} {"current_steps": 30540, "total_steps": 40000, "loss": 0.0004, "lr": 6.590043401943066e-06, "epoch": 4.9821355738641, "percentage": 76.35, "elapsed_time": "6:59:35", "remaining_time": "2:09:58", "throughput": 2334.58, "total_tokens": 58774000} {"current_steps": 30545, "total_steps": 40000, "loss": 0.0019, "lr": 6.583402824272494e-06, "epoch": 4.982951301084917, "percentage": 76.36, "elapsed_time": "6:59:37", "remaining_time": "2:09:53", "throughput": 2334.79, "total_tokens": 58784320} {"current_steps": 30550, "total_steps": 40000, "loss": 0.0007, "lr": 6.576765086672634e-06, "epoch": 4.983767028305735, "percentage": 76.38, "elapsed_time": "6:59:39", "remaining_time": "2:09:48", "throughput": 2334.98, "total_tokens": 58794032} {"current_steps": 30555, "total_steps": 40000, "loss": 0.0006, "lr": 6.57013019016712e-06, "epoch": 4.984582755526552, "percentage": 76.39, "elapsed_time": "6:59:41", "remaining_time": "2:09:44", "throughput": 2335.18, "total_tokens": 58803808} {"current_steps": 30560, "total_steps": 40000, "loss": 0.0009, "lr": 6.563498135779142e-06, "epoch": 4.985398482747369, "percentage": 76.4, "elapsed_time": "6:59:43", "remaining_time": "2:09:39", "throughput": 2335.34, "total_tokens": 58812704} {"current_steps": 30565, "total_steps": 40000, "loss": 0.0006, "lr": 6.556868924531431e-06, "epoch": 4.986214209968186, "percentage": 76.41, "elapsed_time": "6:59:45", "remaining_time": "2:09:34", "throughput": 2335.53, "total_tokens": 58822160} {"current_steps": 30570, "total_steps": 40000, "loss": 0.0007, "lr": 6.550242557446304e-06, "epoch": 4.987029937189004, "percentage": 76.42, "elapsed_time": "6:59:47", "remaining_time": "2:09:29", "throughput": 2335.77, "total_tokens": 58833168} {"current_steps": 30575, "total_steps": 40000, "loss": 0.0003, "lr": 6.543619035545634e-06, "epoch": 4.9878456644098215, "percentage": 76.44, "elapsed_time": "6:59:49", "remaining_time": "2:09:25", "throughput": 2335.97, "total_tokens": 58843072} {"current_steps": 30580, "total_steps": 40000, "loss": 0.0503, "lr": 6.53699835985084e-06, "epoch": 4.988661391630639, "percentage": 76.45, "elapsed_time": "6:59:52", "remaining_time": "2:09:20", "throughput": 2336.17, "total_tokens": 58852832} {"current_steps": 30585, "total_steps": 40000, "loss": 0.031, "lr": 6.530380531382927e-06, "epoch": 4.989477118851456, "percentage": 76.46, "elapsed_time": "6:59:54", "remaining_time": "2:09:15", "throughput": 2336.35, "total_tokens": 58862160} {"current_steps": 30590, "total_steps": 40000, "loss": 0.0017, "lr": 6.523765551162433e-06, "epoch": 4.990292846072274, "percentage": 76.48, "elapsed_time": "6:59:56", "remaining_time": "2:09:10", "throughput": 2336.56, "total_tokens": 58872320} {"current_steps": 30595, "total_steps": 40000, "loss": 0.0006, "lr": 6.517153420209476e-06, "epoch": 4.991108573293091, "percentage": 76.49, "elapsed_time": "6:59:58", "remaining_time": "2:09:06", "throughput": 2336.77, "total_tokens": 58882432} {"current_steps": 30600, "total_steps": 40000, "loss": 0.0006, "lr": 6.510544139543739e-06, "epoch": 4.991924300513908, "percentage": 76.5, "elapsed_time": "7:00:00", "remaining_time": "2:09:01", "throughput": 2336.98, "total_tokens": 58892528} {"current_steps": 30600, "total_steps": 40000, "eval_loss": 0.2907790243625641, "epoch": 4.991924300513908, "percentage": 76.5, "elapsed_time": "7:01:21", "remaining_time": "2:09:26", "throughput": 2329.5, "total_tokens": 58892528} {"current_steps": 30605, "total_steps": 40000, "loss": 0.0011, "lr": 6.503937710184452e-06, "epoch": 4.992740027734725, "percentage": 76.51, "elapsed_time": "7:01:25", "remaining_time": "2:09:21", "throughput": 2329.51, "total_tokens": 58901888} {"current_steps": 30610, "total_steps": 40000, "loss": 0.0004, "lr": 6.4973341331503954e-06, "epoch": 4.993555754955543, "percentage": 76.53, "elapsed_time": "7:01:27", "remaining_time": "2:09:17", "throughput": 2329.68, "total_tokens": 58911040} {"current_steps": 30615, "total_steps": 40000, "loss": 0.1623, "lr": 6.490733409459942e-06, "epoch": 4.99437148217636, "percentage": 76.54, "elapsed_time": "7:01:29", "remaining_time": "2:09:12", "throughput": 2329.88, "total_tokens": 58920976} {"current_steps": 30620, "total_steps": 40000, "loss": 0.1114, "lr": 6.484135540130995e-06, "epoch": 4.995187209397177, "percentage": 76.55, "elapsed_time": "7:01:31", "remaining_time": "2:09:07", "throughput": 2330.06, "total_tokens": 58930176} {"current_steps": 30625, "total_steps": 40000, "loss": 0.1429, "lr": 6.4775405261810364e-06, "epoch": 4.9960029366179945, "percentage": 76.56, "elapsed_time": "7:01:33", "remaining_time": "2:09:02", "throughput": 2330.21, "total_tokens": 58938928} {"current_steps": 30630, "total_steps": 40000, "loss": 0.0978, "lr": 6.470948368627092e-06, "epoch": 4.996818663838813, "percentage": 76.58, "elapsed_time": "7:01:35", "remaining_time": "2:08:58", "throughput": 2330.44, "total_tokens": 58949632} {"current_steps": 30635, "total_steps": 40000, "loss": 0.0001, "lr": 6.464359068485756e-06, "epoch": 4.99763439105963, "percentage": 76.59, "elapsed_time": "7:01:37", "remaining_time": "2:08:53", "throughput": 2330.62, "total_tokens": 58958944} {"current_steps": 30640, "total_steps": 40000, "loss": 0.0083, "lr": 6.457772626773195e-06, "epoch": 4.998450118280447, "percentage": 76.6, "elapsed_time": "7:01:39", "remaining_time": "2:08:48", "throughput": 2330.76, "total_tokens": 58967296} {"current_steps": 30645, "total_steps": 40000, "loss": 0.0003, "lr": 6.451189044505104e-06, "epoch": 4.999265845501265, "percentage": 76.61, "elapsed_time": "7:01:41", "remaining_time": "2:08:43", "throughput": 2331.0, "total_tokens": 58978032} {"current_steps": 30650, "total_steps": 40000, "loss": 0.0013, "lr": 6.44460832269676e-06, "epoch": 5.0, "percentage": 76.62, "elapsed_time": "7:01:43", "remaining_time": "2:08:39", "throughput": 2331.17, "total_tokens": 58987136} {"current_steps": 30655, "total_steps": 40000, "loss": 0.0008, "lr": 6.438030462363001e-06, "epoch": 5.000815727220817, "percentage": 76.64, "elapsed_time": "7:01:45", "remaining_time": "2:08:34", "throughput": 2331.35, "total_tokens": 58997088} {"current_steps": 30660, "total_steps": 40000, "loss": 0.0005, "lr": 6.431455464518205e-06, "epoch": 5.001631454441635, "percentage": 76.65, "elapsed_time": "7:01:48", "remaining_time": "2:08:29", "throughput": 2331.53, "total_tokens": 59006448} {"current_steps": 30665, "total_steps": 40000, "loss": 0.0288, "lr": 6.424883330176326e-06, "epoch": 5.002447181662452, "percentage": 76.66, "elapsed_time": "7:01:50", "remaining_time": "2:08:24", "throughput": 2331.73, "total_tokens": 59016400} {"current_steps": 30670, "total_steps": 40000, "loss": 0.0002, "lr": 6.418314060350864e-06, "epoch": 5.003262908883269, "percentage": 76.68, "elapsed_time": "7:01:52", "remaining_time": "2:08:20", "throughput": 2331.95, "total_tokens": 59026784} {"current_steps": 30675, "total_steps": 40000, "loss": 0.0004, "lr": 6.4117476560548895e-06, "epoch": 5.0040786361040865, "percentage": 76.69, "elapsed_time": "7:01:54", "remaining_time": "2:08:15", "throughput": 2332.16, "total_tokens": 59036960} {"current_steps": 30680, "total_steps": 40000, "loss": 0.059, "lr": 6.405184118301016e-06, "epoch": 5.004894363324905, "percentage": 76.7, "elapsed_time": "7:01:56", "remaining_time": "2:08:10", "throughput": 2332.34, "total_tokens": 59046400} {"current_steps": 30685, "total_steps": 40000, "loss": 0.0005, "lr": 6.398623448101434e-06, "epoch": 5.005710090545722, "percentage": 76.71, "elapsed_time": "7:01:58", "remaining_time": "2:08:05", "throughput": 2332.54, "total_tokens": 59056208} {"current_steps": 30690, "total_steps": 40000, "loss": 0.0001, "lr": 6.392065646467871e-06, "epoch": 5.006525817766539, "percentage": 76.72, "elapsed_time": "7:02:00", "remaining_time": "2:08:01", "throughput": 2332.71, "total_tokens": 59065392} {"current_steps": 30695, "total_steps": 40000, "loss": 0.055, "lr": 6.385510714411632e-06, "epoch": 5.007341544987356, "percentage": 76.74, "elapsed_time": "7:02:02", "remaining_time": "2:07:56", "throughput": 2332.96, "total_tokens": 59076560} {"current_steps": 30700, "total_steps": 40000, "loss": 0.0003, "lr": 6.378958652943559e-06, "epoch": 5.008157272208174, "percentage": 76.75, "elapsed_time": "7:02:04", "remaining_time": "2:07:51", "throughput": 2333.14, "total_tokens": 59085824} {"current_steps": 30705, "total_steps": 40000, "loss": 0.0002, "lr": 6.3724094630740776e-06, "epoch": 5.008972999428991, "percentage": 76.76, "elapsed_time": "7:02:06", "remaining_time": "2:07:46", "throughput": 2333.34, "total_tokens": 59095840} {"current_steps": 30710, "total_steps": 40000, "loss": 0.0511, "lr": 6.365863145813136e-06, "epoch": 5.009788726649808, "percentage": 76.78, "elapsed_time": "7:02:08", "remaining_time": "2:07:42", "throughput": 2333.5, "total_tokens": 59104816} {"current_steps": 30715, "total_steps": 40000, "loss": 0.0008, "lr": 6.359319702170269e-06, "epoch": 5.010604453870625, "percentage": 76.79, "elapsed_time": "7:02:10", "remaining_time": "2:07:37", "throughput": 2333.66, "total_tokens": 59113680} {"current_steps": 30720, "total_steps": 40000, "loss": 0.001, "lr": 6.352779133154566e-06, "epoch": 5.011420181091443, "percentage": 76.8, "elapsed_time": "7:02:12", "remaining_time": "2:07:32", "throughput": 2333.81, "total_tokens": 59122144} {"current_steps": 30725, "total_steps": 40000, "loss": 0.0005, "lr": 6.346241439774648e-06, "epoch": 5.0122359083122605, "percentage": 76.81, "elapsed_time": "7:02:14", "remaining_time": "2:07:27", "throughput": 2334.01, "total_tokens": 59132240} {"current_steps": 30730, "total_steps": 40000, "loss": 0.0093, "lr": 6.339706623038716e-06, "epoch": 5.013051635533078, "percentage": 76.83, "elapsed_time": "7:02:17", "remaining_time": "2:07:23", "throughput": 2334.2, "total_tokens": 59141792} {"current_steps": 30735, "total_steps": 40000, "loss": 0.0006, "lr": 6.333174683954532e-06, "epoch": 5.013867362753895, "percentage": 76.84, "elapsed_time": "7:02:19", "remaining_time": "2:07:18", "throughput": 2334.4, "total_tokens": 59151728} {"current_steps": 30740, "total_steps": 40000, "loss": 0.0003, "lr": 6.326645623529387e-06, "epoch": 5.014683089974713, "percentage": 76.85, "elapsed_time": "7:02:21", "remaining_time": "2:07:13", "throughput": 2334.6, "total_tokens": 59161472} {"current_steps": 30745, "total_steps": 40000, "loss": 0.0004, "lr": 6.320119442770156e-06, "epoch": 5.01549881719553, "percentage": 76.86, "elapsed_time": "7:02:23", "remaining_time": "2:07:08", "throughput": 2334.77, "total_tokens": 59170672} {"current_steps": 30750, "total_steps": 40000, "loss": 0.0005, "lr": 6.313596142683254e-06, "epoch": 5.016314544416347, "percentage": 76.88, "elapsed_time": "7:02:25", "remaining_time": "2:07:04", "throughput": 2334.98, "total_tokens": 59180784} {"current_steps": 30755, "total_steps": 40000, "loss": 0.0329, "lr": 6.307075724274647e-06, "epoch": 5.017130271637164, "percentage": 76.89, "elapsed_time": "7:02:27", "remaining_time": "2:06:59", "throughput": 2335.17, "total_tokens": 59190624} {"current_steps": 30760, "total_steps": 40000, "loss": 0.0014, "lr": 6.300558188549882e-06, "epoch": 5.017945998857982, "percentage": 76.9, "elapsed_time": "7:02:29", "remaining_time": "2:06:54", "throughput": 2335.38, "total_tokens": 59200672} {"current_steps": 30765, "total_steps": 40000, "loss": 0.1448, "lr": 6.29404353651403e-06, "epoch": 5.018761726078799, "percentage": 76.91, "elapsed_time": "7:02:31", "remaining_time": "2:06:50", "throughput": 2335.62, "total_tokens": 59211680} {"current_steps": 30770, "total_steps": 40000, "loss": 0.0004, "lr": 6.287531769171737e-06, "epoch": 5.0195774532996165, "percentage": 76.92, "elapsed_time": "7:02:33", "remaining_time": "2:06:45", "throughput": 2335.78, "total_tokens": 59220528} {"current_steps": 30775, "total_steps": 40000, "loss": 0.0002, "lr": 6.2810228875272045e-06, "epoch": 5.020393180520434, "percentage": 76.94, "elapsed_time": "7:02:35", "remaining_time": "2:06:40", "throughput": 2335.99, "total_tokens": 59230688} {"current_steps": 30780, "total_steps": 40000, "loss": 0.0004, "lr": 6.274516892584179e-06, "epoch": 5.021208907741252, "percentage": 76.95, "elapsed_time": "7:02:37", "remaining_time": "2:06:35", "throughput": 2336.15, "total_tokens": 59239568} {"current_steps": 30785, "total_steps": 40000, "loss": 0.1358, "lr": 6.268013785345969e-06, "epoch": 5.022024634962069, "percentage": 76.96, "elapsed_time": "7:02:39", "remaining_time": "2:06:31", "throughput": 2336.35, "total_tokens": 59249376} {"current_steps": 30790, "total_steps": 40000, "loss": 0.0353, "lr": 6.26151356681543e-06, "epoch": 5.022840362182886, "percentage": 76.98, "elapsed_time": "7:02:41", "remaining_time": "2:06:26", "throughput": 2336.53, "total_tokens": 59258864} {"current_steps": 30795, "total_steps": 40000, "loss": 0.0003, "lr": 6.255016237994981e-06, "epoch": 5.023656089403703, "percentage": 76.99, "elapsed_time": "7:02:43", "remaining_time": "2:06:21", "throughput": 2336.79, "total_tokens": 59270192} {"current_steps": 30800, "total_steps": 40000, "loss": 0.0736, "lr": 6.248521799886603e-06, "epoch": 5.024471816624521, "percentage": 77.0, "elapsed_time": "7:02:46", "remaining_time": "2:06:16", "throughput": 2336.91, "total_tokens": 59278112} {"current_steps": 30800, "total_steps": 40000, "eval_loss": 0.2989204525947571, "epoch": 5.024471816624521, "percentage": 77.0, "elapsed_time": "7:04:06", "remaining_time": "2:06:40", "throughput": 2329.5, "total_tokens": 59278112} {"current_steps": 30805, "total_steps": 40000, "loss": 0.0007, "lr": 6.242030253491798e-06, "epoch": 5.025287543845338, "percentage": 77.01, "elapsed_time": "7:04:10", "remaining_time": "2:06:36", "throughput": 2329.54, "total_tokens": 59288064} {"current_steps": 30810, "total_steps": 40000, "loss": 0.0003, "lr": 6.235541599811656e-06, "epoch": 5.026103271066155, "percentage": 77.03, "elapsed_time": "7:04:12", "remaining_time": "2:06:32", "throughput": 2329.69, "total_tokens": 59296688} {"current_steps": 30815, "total_steps": 40000, "loss": 0.0003, "lr": 6.229055839846814e-06, "epoch": 5.026918998286972, "percentage": 77.04, "elapsed_time": "7:04:14", "remaining_time": "2:06:27", "throughput": 2329.9, "total_tokens": 59306880} {"current_steps": 30820, "total_steps": 40000, "loss": 0.0004, "lr": 6.222572974597455e-06, "epoch": 5.02773472550779, "percentage": 77.05, "elapsed_time": "7:04:16", "remaining_time": "2:06:22", "throughput": 2330.03, "total_tokens": 59315184} {"current_steps": 30825, "total_steps": 40000, "loss": 0.0002, "lr": 6.216093005063306e-06, "epoch": 5.028550452728608, "percentage": 77.06, "elapsed_time": "7:04:18", "remaining_time": "2:06:17", "throughput": 2330.2, "total_tokens": 59324208} {"current_steps": 30830, "total_steps": 40000, "loss": 0.001, "lr": 6.209615932243678e-06, "epoch": 5.029366179949425, "percentage": 77.08, "elapsed_time": "7:04:20", "remaining_time": "2:06:13", "throughput": 2330.38, "total_tokens": 59333712} {"current_steps": 30835, "total_steps": 40000, "loss": 0.0004, "lr": 6.203141757137399e-06, "epoch": 5.030181907170242, "percentage": 77.09, "elapsed_time": "7:04:23", "remaining_time": "2:06:08", "throughput": 2330.49, "total_tokens": 59341360} {"current_steps": 30840, "total_steps": 40000, "loss": 0.1127, "lr": 6.196670480742886e-06, "epoch": 5.03099763439106, "percentage": 77.1, "elapsed_time": "7:04:25", "remaining_time": "2:06:03", "throughput": 2330.69, "total_tokens": 59351184} {"current_steps": 30845, "total_steps": 40000, "loss": 0.001, "lr": 6.190202104058074e-06, "epoch": 5.031813361611877, "percentage": 77.11, "elapsed_time": "7:04:27", "remaining_time": "2:05:58", "throughput": 2330.9, "total_tokens": 59361520} {"current_steps": 30850, "total_steps": 40000, "loss": 0.0206, "lr": 6.183736628080475e-06, "epoch": 5.032629088832694, "percentage": 77.12, "elapsed_time": "7:04:29", "remaining_time": "2:05:54", "throughput": 2331.08, "total_tokens": 59370736} {"current_steps": 30855, "total_steps": 40000, "loss": 0.0692, "lr": 6.177274053807155e-06, "epoch": 5.033444816053512, "percentage": 77.14, "elapsed_time": "7:04:31", "remaining_time": "2:05:49", "throughput": 2331.25, "total_tokens": 59379920} {"current_steps": 30860, "total_steps": 40000, "loss": 0.0003, "lr": 6.170814382234713e-06, "epoch": 5.034260543274329, "percentage": 77.15, "elapsed_time": "7:04:33", "remaining_time": "2:05:44", "throughput": 2331.43, "total_tokens": 59389552} {"current_steps": 30865, "total_steps": 40000, "loss": 0.0338, "lr": 6.16435761435932e-06, "epoch": 5.035076270495146, "percentage": 77.16, "elapsed_time": "7:04:35", "remaining_time": "2:05:39", "throughput": 2331.65, "total_tokens": 59399792} {"current_steps": 30870, "total_steps": 40000, "loss": 0.0005, "lr": 6.157903751176681e-06, "epoch": 5.0358919977159635, "percentage": 77.18, "elapsed_time": "7:04:37", "remaining_time": "2:05:35", "throughput": 2331.83, "total_tokens": 59409216} {"current_steps": 30875, "total_steps": 40000, "loss": 0.0004, "lr": 6.151452793682066e-06, "epoch": 5.0367077249367815, "percentage": 77.19, "elapsed_time": "7:04:39", "remaining_time": "2:05:30", "throughput": 2332.05, "total_tokens": 59419760} {"current_steps": 30880, "total_steps": 40000, "loss": 0.0491, "lr": 6.145004742870305e-06, "epoch": 5.037523452157599, "percentage": 77.2, "elapsed_time": "7:04:41", "remaining_time": "2:05:25", "throughput": 2332.22, "total_tokens": 59428928} {"current_steps": 30885, "total_steps": 40000, "loss": 0.0002, "lr": 6.138559599735752e-06, "epoch": 5.038339179378416, "percentage": 77.21, "elapsed_time": "7:04:43", "remaining_time": "2:05:20", "throughput": 2332.44, "total_tokens": 59439440} {"current_steps": 30890, "total_steps": 40000, "loss": 0.0045, "lr": 6.132117365272344e-06, "epoch": 5.039154906599233, "percentage": 77.22, "elapsed_time": "7:04:45", "remaining_time": "2:05:16", "throughput": 2332.61, "total_tokens": 59448544} {"current_steps": 30895, "total_steps": 40000, "loss": 0.0008, "lr": 6.125678040473545e-06, "epoch": 5.039970633820051, "percentage": 77.24, "elapsed_time": "7:04:47", "remaining_time": "2:05:11", "throughput": 2332.79, "total_tokens": 59457904} {"current_steps": 30900, "total_steps": 40000, "loss": 0.0003, "lr": 6.1192416263323755e-06, "epoch": 5.040786361040868, "percentage": 77.25, "elapsed_time": "7:04:50", "remaining_time": "2:05:06", "throughput": 2332.94, "total_tokens": 59466768} {"current_steps": 30905, "total_steps": 40000, "loss": 0.001, "lr": 6.112808123841424e-06, "epoch": 5.041602088261685, "percentage": 77.26, "elapsed_time": "7:04:52", "remaining_time": "2:05:02", "throughput": 2333.12, "total_tokens": 59476176} {"current_steps": 30910, "total_steps": 40000, "loss": 0.0012, "lr": 6.106377533992805e-06, "epoch": 5.042417815482502, "percentage": 77.28, "elapsed_time": "7:04:54", "remaining_time": "2:04:57", "throughput": 2333.31, "total_tokens": 59485728} {"current_steps": 30915, "total_steps": 40000, "loss": 0.0004, "lr": 6.099949857778204e-06, "epoch": 5.04323354270332, "percentage": 77.29, "elapsed_time": "7:04:56", "remaining_time": "2:04:52", "throughput": 2333.49, "total_tokens": 59495120} {"current_steps": 30920, "total_steps": 40000, "loss": 0.0005, "lr": 6.093525096188852e-06, "epoch": 5.0440492699241375, "percentage": 77.3, "elapsed_time": "7:04:58", "remaining_time": "2:04:47", "throughput": 2333.7, "total_tokens": 59505328} {"current_steps": 30925, "total_steps": 40000, "loss": 0.0006, "lr": 6.087103250215518e-06, "epoch": 5.044864997144955, "percentage": 77.31, "elapsed_time": "7:05:00", "remaining_time": "2:04:43", "throughput": 2333.9, "total_tokens": 59515232} {"current_steps": 30930, "total_steps": 40000, "loss": 0.0004, "lr": 6.080684320848537e-06, "epoch": 5.045680724365772, "percentage": 77.33, "elapsed_time": "7:05:02", "remaining_time": "2:04:38", "throughput": 2334.07, "total_tokens": 59524640} {"current_steps": 30935, "total_steps": 40000, "loss": 0.0786, "lr": 6.074268309077794e-06, "epoch": 5.04649645158659, "percentage": 77.34, "elapsed_time": "7:05:04", "remaining_time": "2:04:33", "throughput": 2334.28, "total_tokens": 59534624} {"current_steps": 30940, "total_steps": 40000, "loss": 0.0012, "lr": 6.067855215892709e-06, "epoch": 5.047312178807407, "percentage": 77.35, "elapsed_time": "7:05:06", "remaining_time": "2:04:28", "throughput": 2334.46, "total_tokens": 59544128} {"current_steps": 30945, "total_steps": 40000, "loss": 0.0054, "lr": 6.061445042282271e-06, "epoch": 5.048127906028224, "percentage": 77.36, "elapsed_time": "7:05:08", "remaining_time": "2:04:24", "throughput": 2334.71, "total_tokens": 59555344} {"current_steps": 30950, "total_steps": 40000, "loss": 0.0643, "lr": 6.055037789234999e-06, "epoch": 5.048943633249041, "percentage": 77.38, "elapsed_time": "7:05:10", "remaining_time": "2:04:19", "throughput": 2334.89, "total_tokens": 59564832} {"current_steps": 30955, "total_steps": 40000, "loss": 0.0002, "lr": 6.048633457738975e-06, "epoch": 5.049759360469859, "percentage": 77.39, "elapsed_time": "7:05:12", "remaining_time": "2:04:14", "throughput": 2335.07, "total_tokens": 59574176} {"current_steps": 30960, "total_steps": 40000, "loss": 0.0002, "lr": 6.042232048781837e-06, "epoch": 5.050575087690676, "percentage": 77.4, "elapsed_time": "7:05:14", "remaining_time": "2:04:10", "throughput": 2335.24, "total_tokens": 59583504} {"current_steps": 30965, "total_steps": 40000, "loss": 0.0002, "lr": 6.035833563350757e-06, "epoch": 5.051390814911493, "percentage": 77.41, "elapsed_time": "7:05:16", "remaining_time": "2:04:05", "throughput": 2335.5, "total_tokens": 59594816} {"current_steps": 30970, "total_steps": 40000, "loss": 0.0002, "lr": 6.0294380024324525e-06, "epoch": 5.052206542132311, "percentage": 77.42, "elapsed_time": "7:05:19", "remaining_time": "2:04:00", "throughput": 2335.68, "total_tokens": 59604288} {"current_steps": 30975, "total_steps": 40000, "loss": 0.029, "lr": 6.023045367013213e-06, "epoch": 5.053022269353129, "percentage": 77.44, "elapsed_time": "7:05:21", "remaining_time": "2:03:55", "throughput": 2335.91, "total_tokens": 59614960} {"current_steps": 30980, "total_steps": 40000, "loss": 0.0014, "lr": 6.016655658078851e-06, "epoch": 5.053837996573946, "percentage": 77.45, "elapsed_time": "7:05:23", "remaining_time": "2:03:51", "throughput": 2336.1, "total_tokens": 59624720} {"current_steps": 30985, "total_steps": 40000, "loss": 0.0006, "lr": 6.010268876614753e-06, "epoch": 5.054653723794763, "percentage": 77.46, "elapsed_time": "7:05:25", "remaining_time": "2:03:46", "throughput": 2336.28, "total_tokens": 59634224} {"current_steps": 30990, "total_steps": 40000, "loss": 0.0021, "lr": 6.0038850236058266e-06, "epoch": 5.05546945101558, "percentage": 77.48, "elapsed_time": "7:05:27", "remaining_time": "2:03:41", "throughput": 2336.46, "total_tokens": 59643664} {"current_steps": 30995, "total_steps": 40000, "loss": 0.0002, "lr": 5.997504100036549e-06, "epoch": 5.056285178236398, "percentage": 77.49, "elapsed_time": "7:05:29", "remaining_time": "2:03:37", "throughput": 2336.68, "total_tokens": 59654096} {"current_steps": 31000, "total_steps": 40000, "loss": 0.0017, "lr": 5.991126106890949e-06, "epoch": 5.057100905457215, "percentage": 77.5, "elapsed_time": "7:05:31", "remaining_time": "2:03:32", "throughput": 2336.85, "total_tokens": 59663264} {"current_steps": 31000, "total_steps": 40000, "eval_loss": 0.3235328495502472, "epoch": 5.057100905457215, "percentage": 77.5, "elapsed_time": "7:06:52", "remaining_time": "2:03:55", "throughput": 2329.48, "total_tokens": 59663264} {"current_steps": 31005, "total_steps": 40000, "loss": 0.0836, "lr": 5.984751045152576e-06, "epoch": 5.057916632678032, "percentage": 77.51, "elapsed_time": "7:06:56", "remaining_time": "2:03:51", "throughput": 2329.45, "total_tokens": 59672192} {"current_steps": 31010, "total_steps": 40000, "loss": 0.0011, "lr": 5.978378915804553e-06, "epoch": 5.058732359898849, "percentage": 77.53, "elapsed_time": "7:06:58", "remaining_time": "2:03:46", "throughput": 2329.66, "total_tokens": 59682496} {"current_steps": 31015, "total_steps": 40000, "loss": 0.0016, "lr": 5.972009719829547e-06, "epoch": 5.059548087119667, "percentage": 77.54, "elapsed_time": "7:07:00", "remaining_time": "2:03:42", "throughput": 2329.86, "total_tokens": 59692608} {"current_steps": 31020, "total_steps": 40000, "loss": 0.0236, "lr": 5.965643458209755e-06, "epoch": 5.0603638143404845, "percentage": 77.55, "elapsed_time": "7:07:02", "remaining_time": "2:03:37", "throughput": 2330.05, "total_tokens": 59702304} {"current_steps": 31025, "total_steps": 40000, "loss": 0.0007, "lr": 5.95928013192695e-06, "epoch": 5.061179541561302, "percentage": 77.56, "elapsed_time": "7:07:04", "remaining_time": "2:03:32", "throughput": 2330.19, "total_tokens": 59710688} {"current_steps": 31030, "total_steps": 40000, "loss": 0.0004, "lr": 5.952919741962423e-06, "epoch": 5.06199526878212, "percentage": 77.58, "elapsed_time": "7:07:06", "remaining_time": "2:03:28", "throughput": 2330.35, "total_tokens": 59719696} {"current_steps": 31035, "total_steps": 40000, "loss": 0.0002, "lr": 5.946562289297042e-06, "epoch": 5.062810996002937, "percentage": 77.59, "elapsed_time": "7:07:08", "remaining_time": "2:03:23", "throughput": 2330.58, "total_tokens": 59730336} {"current_steps": 31040, "total_steps": 40000, "loss": 0.059, "lr": 5.9402077749111855e-06, "epoch": 5.063626723223754, "percentage": 77.6, "elapsed_time": "7:07:11", "remaining_time": "2:03:18", "throughput": 2330.7, "total_tokens": 59738304} {"current_steps": 31045, "total_steps": 40000, "loss": 0.0002, "lr": 5.933856199784821e-06, "epoch": 5.064442450444571, "percentage": 77.61, "elapsed_time": "7:07:13", "remaining_time": "2:03:13", "throughput": 2330.89, "total_tokens": 59748032} {"current_steps": 31050, "total_steps": 40000, "loss": 0.0034, "lr": 5.927507564897419e-06, "epoch": 5.065258177665389, "percentage": 77.62, "elapsed_time": "7:07:15", "remaining_time": "2:03:09", "throughput": 2331.08, "total_tokens": 59757712} {"current_steps": 31055, "total_steps": 40000, "loss": 0.0001, "lr": 5.9211618712280395e-06, "epoch": 5.066073904886206, "percentage": 77.64, "elapsed_time": "7:07:17", "remaining_time": "2:03:04", "throughput": 2331.28, "total_tokens": 59767632} {"current_steps": 31060, "total_steps": 40000, "loss": 0.0004, "lr": 5.914819119755255e-06, "epoch": 5.066889632107023, "percentage": 77.65, "elapsed_time": "7:07:19", "remaining_time": "2:02:59", "throughput": 2331.46, "total_tokens": 59777104} {"current_steps": 31065, "total_steps": 40000, "loss": 0.0001, "lr": 5.908479311457205e-06, "epoch": 5.0677053593278405, "percentage": 77.66, "elapsed_time": "7:07:21", "remaining_time": "2:02:55", "throughput": 2331.66, "total_tokens": 59787040} {"current_steps": 31070, "total_steps": 40000, "loss": 0.0004, "lr": 5.902142447311559e-06, "epoch": 5.0685210865486585, "percentage": 77.68, "elapsed_time": "7:07:23", "remaining_time": "2:02:50", "throughput": 2331.85, "total_tokens": 59796688} {"current_steps": 31075, "total_steps": 40000, "loss": 0.0002, "lr": 5.895808528295546e-06, "epoch": 5.069336813769476, "percentage": 77.69, "elapsed_time": "7:07:25", "remaining_time": "2:02:45", "throughput": 2332.05, "total_tokens": 59806720} {"current_steps": 31080, "total_steps": 40000, "loss": 0.0005, "lr": 5.889477555385941e-06, "epoch": 5.070152540990293, "percentage": 77.7, "elapsed_time": "7:07:27", "remaining_time": "2:02:40", "throughput": 2332.24, "total_tokens": 59816432} {"current_steps": 31085, "total_steps": 40000, "loss": 0.0002, "lr": 5.883149529559051e-06, "epoch": 5.07096826821111, "percentage": 77.71, "elapsed_time": "7:07:29", "remaining_time": "2:02:36", "throughput": 2332.41, "total_tokens": 59825552} {"current_steps": 31090, "total_steps": 40000, "loss": 0.0001, "lr": 5.876824451790738e-06, "epoch": 5.071783995431928, "percentage": 77.72, "elapsed_time": "7:07:31", "remaining_time": "2:02:31", "throughput": 2332.59, "total_tokens": 59835040} {"current_steps": 31095, "total_steps": 40000, "loss": 0.0958, "lr": 5.87050232305642e-06, "epoch": 5.072599722652745, "percentage": 77.74, "elapsed_time": "7:07:33", "remaining_time": "2:02:26", "throughput": 2332.79, "total_tokens": 59844928} {"current_steps": 31100, "total_steps": 40000, "loss": 0.0012, "lr": 5.864183144331034e-06, "epoch": 5.073415449873562, "percentage": 77.75, "elapsed_time": "7:07:35", "remaining_time": "2:02:22", "throughput": 2333.01, "total_tokens": 59855408} {"current_steps": 31105, "total_steps": 40000, "loss": 0.0734, "lr": 5.857866916589089e-06, "epoch": 5.074231177094379, "percentage": 77.76, "elapsed_time": "7:07:37", "remaining_time": "2:02:17", "throughput": 2333.22, "total_tokens": 59865776} {"current_steps": 31110, "total_steps": 40000, "loss": 0.0003, "lr": 5.8515536408046216e-06, "epoch": 5.075046904315197, "percentage": 77.78, "elapsed_time": "7:07:40", "remaining_time": "2:02:12", "throughput": 2333.42, "total_tokens": 59875584} {"current_steps": 31115, "total_steps": 40000, "loss": 0.0003, "lr": 5.845243317951208e-06, "epoch": 5.0758626315360145, "percentage": 77.79, "elapsed_time": "7:07:42", "remaining_time": "2:02:07", "throughput": 2333.59, "total_tokens": 59884928} {"current_steps": 31120, "total_steps": 40000, "loss": 0.0014, "lr": 5.838935949001997e-06, "epoch": 5.076678358756832, "percentage": 77.8, "elapsed_time": "7:07:44", "remaining_time": "2:02:03", "throughput": 2333.75, "total_tokens": 59893936} {"current_steps": 31125, "total_steps": 40000, "loss": 0.0001, "lr": 5.8326315349296476e-06, "epoch": 5.077494085977649, "percentage": 77.81, "elapsed_time": "7:07:46", "remaining_time": "2:01:58", "throughput": 2333.92, "total_tokens": 59903120} {"current_steps": 31130, "total_steps": 40000, "loss": 0.0004, "lr": 5.826330076706396e-06, "epoch": 5.078309813198467, "percentage": 77.83, "elapsed_time": "7:07:48", "remaining_time": "2:01:53", "throughput": 2334.07, "total_tokens": 59911824} {"current_steps": 31135, "total_steps": 40000, "loss": 0.0001, "lr": 5.820031575303988e-06, "epoch": 5.079125540419284, "percentage": 77.84, "elapsed_time": "7:07:50", "remaining_time": "2:01:49", "throughput": 2334.25, "total_tokens": 59921168} {"current_steps": 31140, "total_steps": 40000, "loss": 0.0001, "lr": 5.813736031693745e-06, "epoch": 5.079941267640101, "percentage": 77.85, "elapsed_time": "7:07:52", "remaining_time": "2:01:44", "throughput": 2334.41, "total_tokens": 59930144} {"current_steps": 31145, "total_steps": 40000, "loss": 0.0003, "lr": 5.807443446846522e-06, "epoch": 5.080756994860918, "percentage": 77.86, "elapsed_time": "7:07:54", "remaining_time": "2:01:39", "throughput": 2334.64, "total_tokens": 59940800} {"current_steps": 31150, "total_steps": 40000, "loss": 0.0001, "lr": 5.801153821732699e-06, "epoch": 5.081572722081736, "percentage": 77.88, "elapsed_time": "7:07:56", "remaining_time": "2:01:34", "throughput": 2334.81, "total_tokens": 59950048} {"current_steps": 31155, "total_steps": 40000, "loss": 0.0001, "lr": 5.794867157322229e-06, "epoch": 5.082388449302553, "percentage": 77.89, "elapsed_time": "7:07:58", "remaining_time": "2:01:30", "throughput": 2334.99, "total_tokens": 59959584} {"current_steps": 31160, "total_steps": 40000, "loss": 0.0005, "lr": 5.788583454584593e-06, "epoch": 5.08320417652337, "percentage": 77.9, "elapsed_time": "7:08:00", "remaining_time": "2:01:25", "throughput": 2335.18, "total_tokens": 59969216} {"current_steps": 31165, "total_steps": 40000, "loss": 0.0456, "lr": 5.7823027144888075e-06, "epoch": 5.0840199037441876, "percentage": 77.91, "elapsed_time": "7:08:02", "remaining_time": "2:01:20", "throughput": 2335.32, "total_tokens": 59977616} {"current_steps": 31170, "total_steps": 40000, "loss": 0.0178, "lr": 5.776024938003455e-06, "epoch": 5.084835630965006, "percentage": 77.92, "elapsed_time": "7:08:04", "remaining_time": "2:01:16", "throughput": 2335.48, "total_tokens": 59986768} {"current_steps": 31175, "total_steps": 40000, "loss": 0.0732, "lr": 5.7697501260966345e-06, "epoch": 5.085651358185823, "percentage": 77.94, "elapsed_time": "7:08:07", "remaining_time": "2:01:11", "throughput": 2335.73, "total_tokens": 59997792} {"current_steps": 31180, "total_steps": 40000, "loss": 0.0571, "lr": 5.7634782797360145e-06, "epoch": 5.08646708540664, "percentage": 77.95, "elapsed_time": "7:08:09", "remaining_time": "2:01:06", "throughput": 2335.93, "total_tokens": 60007920} {"current_steps": 31185, "total_steps": 40000, "loss": 0.0116, "lr": 5.757209399888777e-06, "epoch": 5.087282812627457, "percentage": 77.96, "elapsed_time": "7:08:11", "remaining_time": "2:01:02", "throughput": 2336.09, "total_tokens": 60016752} {"current_steps": 31190, "total_steps": 40000, "loss": 0.0478, "lr": 5.750943487521679e-06, "epoch": 5.088098539848275, "percentage": 77.98, "elapsed_time": "7:08:13", "remaining_time": "2:00:57", "throughput": 2336.29, "total_tokens": 60026752} {"current_steps": 31195, "total_steps": 40000, "loss": 0.088, "lr": 5.744680543600986e-06, "epoch": 5.088914267069092, "percentage": 77.99, "elapsed_time": "7:08:15", "remaining_time": "2:00:52", "throughput": 2336.48, "total_tokens": 60036560} {"current_steps": 31200, "total_steps": 40000, "loss": 0.0006, "lr": 5.738420569092537e-06, "epoch": 5.089729994289909, "percentage": 78.0, "elapsed_time": "7:08:17", "remaining_time": "2:00:47", "throughput": 2336.7, "total_tokens": 60047056} {"current_steps": 31200, "total_steps": 40000, "eval_loss": 0.33572661876678467, "epoch": 5.089729994289909, "percentage": 78.0, "elapsed_time": "7:09:38", "remaining_time": "2:01:10", "throughput": 2329.39, "total_tokens": 60047056} {"current_steps": 31205, "total_steps": 40000, "loss": 0.0003, "lr": 5.732163564961684e-06, "epoch": 5.090545721510727, "percentage": 78.01, "elapsed_time": "7:09:41", "remaining_time": "2:01:06", "throughput": 2329.45, "total_tokens": 60057440} {"current_steps": 31210, "total_steps": 40000, "loss": 0.0002, "lr": 5.725909532173354e-06, "epoch": 5.091361448731544, "percentage": 78.03, "elapsed_time": "7:09:43", "remaining_time": "2:01:01", "throughput": 2329.62, "total_tokens": 60066672} {"current_steps": 31215, "total_steps": 40000, "loss": 0.055, "lr": 5.719658471691977e-06, "epoch": 5.0921771759523615, "percentage": 78.04, "elapsed_time": "7:09:46", "remaining_time": "2:00:57", "throughput": 2329.83, "total_tokens": 60077088} {"current_steps": 31220, "total_steps": 40000, "loss": 0.0005, "lr": 5.71341038448156e-06, "epoch": 5.092992903173179, "percentage": 78.05, "elapsed_time": "7:09:48", "remaining_time": "2:00:52", "throughput": 2330.04, "total_tokens": 60087248} {"current_steps": 31225, "total_steps": 40000, "loss": 0.001, "lr": 5.707165271505635e-06, "epoch": 5.093808630393997, "percentage": 78.06, "elapsed_time": "7:09:50", "remaining_time": "2:00:47", "throughput": 2330.24, "total_tokens": 60097344} {"current_steps": 31230, "total_steps": 40000, "loss": 0.0013, "lr": 5.700923133727271e-06, "epoch": 5.094624357614814, "percentage": 78.08, "elapsed_time": "7:09:52", "remaining_time": "2:00:42", "throughput": 2330.45, "total_tokens": 60107488} {"current_steps": 31235, "total_steps": 40000, "loss": 0.0856, "lr": 5.694683972109083e-06, "epoch": 5.095440084835631, "percentage": 78.09, "elapsed_time": "7:09:54", "remaining_time": "2:00:38", "throughput": 2330.63, "total_tokens": 60116960} {"current_steps": 31240, "total_steps": 40000, "loss": 0.0008, "lr": 5.688447787613241e-06, "epoch": 5.096255812056448, "percentage": 78.1, "elapsed_time": "7:09:56", "remaining_time": "2:00:33", "throughput": 2330.8, "total_tokens": 60126160} {"current_steps": 31245, "total_steps": 40000, "loss": 0.0652, "lr": 5.6822145812014285e-06, "epoch": 5.097071539277266, "percentage": 78.11, "elapsed_time": "7:09:58", "remaining_time": "2:00:28", "throughput": 2330.99, "total_tokens": 60135872} {"current_steps": 31250, "total_steps": 40000, "loss": 0.0004, "lr": 5.675984353834896e-06, "epoch": 5.097887266498083, "percentage": 78.12, "elapsed_time": "7:10:00", "remaining_time": "2:00:24", "throughput": 2331.16, "total_tokens": 60145184} {"current_steps": 31255, "total_steps": 40000, "loss": 0.0002, "lr": 5.66975710647441e-06, "epoch": 5.0987029937189, "percentage": 78.14, "elapsed_time": "7:10:02", "remaining_time": "2:00:19", "throughput": 2331.38, "total_tokens": 60155664} {"current_steps": 31260, "total_steps": 40000, "loss": 0.0001, "lr": 5.663532840080304e-06, "epoch": 5.0995187209397175, "percentage": 78.15, "elapsed_time": "7:10:04", "remaining_time": "2:00:14", "throughput": 2331.58, "total_tokens": 60165680} {"current_steps": 31265, "total_steps": 40000, "loss": 0.0001, "lr": 5.6573115556124325e-06, "epoch": 5.1003344481605355, "percentage": 78.16, "elapsed_time": "7:10:06", "remaining_time": "2:00:10", "throughput": 2331.76, "total_tokens": 60175008} {"current_steps": 31270, "total_steps": 40000, "loss": 0.0001, "lr": 5.651093254030185e-06, "epoch": 5.101150175381353, "percentage": 78.17, "elapsed_time": "7:10:08", "remaining_time": "2:00:05", "throughput": 2331.91, "total_tokens": 60183776} {"current_steps": 31275, "total_steps": 40000, "loss": 0.0698, "lr": 5.644877936292514e-06, "epoch": 5.10196590260217, "percentage": 78.19, "elapsed_time": "7:10:10", "remaining_time": "2:00:00", "throughput": 2332.08, "total_tokens": 60192928} {"current_steps": 31280, "total_steps": 40000, "loss": 0.0007, "lr": 5.638665603357901e-06, "epoch": 5.102781629822987, "percentage": 78.2, "elapsed_time": "7:10:12", "remaining_time": "1:59:55", "throughput": 2332.25, "total_tokens": 60202288} {"current_steps": 31285, "total_steps": 40000, "loss": 0.0001, "lr": 5.632456256184357e-06, "epoch": 5.103597357043805, "percentage": 78.21, "elapsed_time": "7:10:15", "remaining_time": "1:59:51", "throughput": 2332.44, "total_tokens": 60211872} {"current_steps": 31290, "total_steps": 40000, "loss": 0.0004, "lr": 5.626249895729452e-06, "epoch": 5.104413084264622, "percentage": 78.22, "elapsed_time": "7:10:17", "remaining_time": "1:59:46", "throughput": 2332.57, "total_tokens": 60220112} {"current_steps": 31295, "total_steps": 40000, "loss": 0.018, "lr": 5.620046522950273e-06, "epoch": 5.105228811485439, "percentage": 78.24, "elapsed_time": "7:10:19", "remaining_time": "1:59:41", "throughput": 2332.76, "total_tokens": 60229856} {"current_steps": 31300, "total_steps": 40000, "loss": 0.0691, "lr": 5.613846138803464e-06, "epoch": 5.106044538706256, "percentage": 78.25, "elapsed_time": "7:10:21", "remaining_time": "1:59:37", "throughput": 2332.93, "total_tokens": 60239248} {"current_steps": 31305, "total_steps": 40000, "loss": 0.0005, "lr": 5.607648744245206e-06, "epoch": 5.106860265927074, "percentage": 78.26, "elapsed_time": "7:10:23", "remaining_time": "1:59:32", "throughput": 2333.13, "total_tokens": 60249168} {"current_steps": 31310, "total_steps": 40000, "loss": 0.0011, "lr": 5.601454340231207e-06, "epoch": 5.1076759931478914, "percentage": 78.27, "elapsed_time": "7:10:25", "remaining_time": "1:59:27", "throughput": 2333.32, "total_tokens": 60258896} {"current_steps": 31315, "total_steps": 40000, "loss": 0.0002, "lr": 5.595262927716724e-06, "epoch": 5.108491720368709, "percentage": 78.29, "elapsed_time": "7:10:27", "remaining_time": "1:59:23", "throughput": 2333.5, "total_tokens": 60268400} {"current_steps": 31320, "total_steps": 40000, "loss": 0.0044, "lr": 5.589074507656561e-06, "epoch": 5.109307447589526, "percentage": 78.3, "elapsed_time": "7:10:29", "remaining_time": "1:59:18", "throughput": 2333.72, "total_tokens": 60278784} {"current_steps": 31325, "total_steps": 40000, "loss": 0.0006, "lr": 5.582889081005044e-06, "epoch": 5.110123174810344, "percentage": 78.31, "elapsed_time": "7:10:31", "remaining_time": "1:59:13", "throughput": 2333.94, "total_tokens": 60289472} {"current_steps": 31330, "total_steps": 40000, "loss": 0.113, "lr": 5.5767066487160316e-06, "epoch": 5.110938902031161, "percentage": 78.33, "elapsed_time": "7:10:33", "remaining_time": "1:59:08", "throughput": 2334.1, "total_tokens": 60298432} {"current_steps": 31335, "total_steps": 40000, "loss": 0.0004, "lr": 5.570527211742949e-06, "epoch": 5.111754629251978, "percentage": 78.34, "elapsed_time": "7:10:35", "remaining_time": "1:59:04", "throughput": 2334.3, "total_tokens": 60308352} {"current_steps": 31340, "total_steps": 40000, "loss": 0.0001, "lr": 5.564350771038731e-06, "epoch": 5.112570356472795, "percentage": 78.35, "elapsed_time": "7:10:37", "remaining_time": "1:58:59", "throughput": 2334.47, "total_tokens": 60317600} {"current_steps": 31345, "total_steps": 40000, "loss": 0.0006, "lr": 5.558177327555875e-06, "epoch": 5.113386083693613, "percentage": 78.36, "elapsed_time": "7:10:39", "remaining_time": "1:58:54", "throughput": 2334.65, "total_tokens": 60327232} {"current_steps": 31350, "total_steps": 40000, "loss": 0.0004, "lr": 5.552006882246388e-06, "epoch": 5.11420181091443, "percentage": 78.38, "elapsed_time": "7:10:41", "remaining_time": "1:58:50", "throughput": 2334.87, "total_tokens": 60337664} {"current_steps": 31355, "total_steps": 40000, "loss": 0.0974, "lr": 5.545839436061839e-06, "epoch": 5.115017538135247, "percentage": 78.39, "elapsed_time": "7:10:44", "remaining_time": "1:58:45", "throughput": 2335.07, "total_tokens": 60347760} {"current_steps": 31360, "total_steps": 40000, "loss": 0.027, "lr": 5.539674989953331e-06, "epoch": 5.1158332653560645, "percentage": 78.4, "elapsed_time": "7:10:46", "remaining_time": "1:58:40", "throughput": 2335.29, "total_tokens": 60358272} {"current_steps": 31365, "total_steps": 40000, "loss": 0.0003, "lr": 5.533513544871488e-06, "epoch": 5.1166489925768825, "percentage": 78.41, "elapsed_time": "7:10:48", "remaining_time": "1:58:36", "throughput": 2335.49, "total_tokens": 60368208} {"current_steps": 31370, "total_steps": 40000, "loss": 0.0064, "lr": 5.527355101766493e-06, "epoch": 5.1174647197977, "percentage": 78.42, "elapsed_time": "7:10:50", "remaining_time": "1:58:31", "throughput": 2335.65, "total_tokens": 60377216} {"current_steps": 31375, "total_steps": 40000, "loss": 0.1457, "lr": 5.521199661588044e-06, "epoch": 5.118280447018517, "percentage": 78.44, "elapsed_time": "7:10:52", "remaining_time": "1:58:26", "throughput": 2335.8, "total_tokens": 60385888} {"current_steps": 31380, "total_steps": 40000, "loss": 0.0068, "lr": 5.5150472252853944e-06, "epoch": 5.119096174239334, "percentage": 78.45, "elapsed_time": "7:10:54", "remaining_time": "1:58:22", "throughput": 2335.98, "total_tokens": 60395456} {"current_steps": 31385, "total_steps": 40000, "loss": 0.0416, "lr": 5.50889779380733e-06, "epoch": 5.119911901460152, "percentage": 78.46, "elapsed_time": "7:10:56", "remaining_time": "1:58:17", "throughput": 2336.16, "total_tokens": 60404944} {"current_steps": 31390, "total_steps": 40000, "loss": 0.0002, "lr": 5.5027513681021605e-06, "epoch": 5.120727628680969, "percentage": 78.47, "elapsed_time": "7:10:58", "remaining_time": "1:58:12", "throughput": 2336.35, "total_tokens": 60414816} {"current_steps": 31395, "total_steps": 40000, "loss": 0.0004, "lr": 5.4966079491177545e-06, "epoch": 5.121543355901786, "percentage": 78.49, "elapsed_time": "7:11:00", "remaining_time": "1:58:08", "throughput": 2336.54, "total_tokens": 60424448} {"current_steps": 31400, "total_steps": 40000, "loss": 0.0002, "lr": 5.490467537801491e-06, "epoch": 5.122359083122603, "percentage": 78.5, "elapsed_time": "7:11:02", "remaining_time": "1:58:03", "throughput": 2336.71, "total_tokens": 60433680} {"current_steps": 31400, "total_steps": 40000, "eval_loss": 0.34602808952331543, "epoch": 5.122359083122603, "percentage": 78.5, "elapsed_time": "7:12:23", "remaining_time": "1:58:25", "throughput": 2329.44, "total_tokens": 60433680} {"current_steps": 31405, "total_steps": 40000, "loss": 0.0001, "lr": 5.484330135100313e-06, "epoch": 5.123174810343421, "percentage": 78.51, "elapsed_time": "7:12:27", "remaining_time": "1:58:21", "throughput": 2329.43, "total_tokens": 60442608} {"current_steps": 31410, "total_steps": 40000, "loss": 0.0243, "lr": 5.4781957419606785e-06, "epoch": 5.1239905375642385, "percentage": 78.53, "elapsed_time": "7:12:29", "remaining_time": "1:58:16", "throughput": 2329.64, "total_tokens": 60452880} {"current_steps": 31415, "total_steps": 40000, "loss": 0.0001, "lr": 5.472064359328577e-06, "epoch": 5.124806264785056, "percentage": 78.54, "elapsed_time": "7:12:31", "remaining_time": "1:58:11", "throughput": 2329.84, "total_tokens": 60463152} {"current_steps": 31420, "total_steps": 40000, "loss": 0.0001, "lr": 5.4659359881495565e-06, "epoch": 5.125621992005874, "percentage": 78.55, "elapsed_time": "7:12:33", "remaining_time": "1:58:07", "throughput": 2330.05, "total_tokens": 60473280} {"current_steps": 31425, "total_steps": 40000, "loss": 0.0001, "lr": 5.4598106293686916e-06, "epoch": 5.126437719226691, "percentage": 78.56, "elapsed_time": "7:12:35", "remaining_time": "1:58:02", "throughput": 2330.23, "total_tokens": 60482816} {"current_steps": 31430, "total_steps": 40000, "loss": 0.0571, "lr": 5.45368828393058e-06, "epoch": 5.127253446447508, "percentage": 78.57, "elapsed_time": "7:12:37", "remaining_time": "1:57:57", "throughput": 2330.35, "total_tokens": 60490704} {"current_steps": 31435, "total_steps": 40000, "loss": 0.0006, "lr": 5.44756895277937e-06, "epoch": 5.128069173668325, "percentage": 78.59, "elapsed_time": "7:12:39", "remaining_time": "1:57:53", "throughput": 2330.52, "total_tokens": 60500032} {"current_steps": 31440, "total_steps": 40000, "loss": 0.121, "lr": 5.441452636858746e-06, "epoch": 5.128884900889143, "percentage": 78.6, "elapsed_time": "7:12:41", "remaining_time": "1:57:48", "throughput": 2330.67, "total_tokens": 60508752} {"current_steps": 31445, "total_steps": 40000, "loss": 0.1349, "lr": 5.435339337111905e-06, "epoch": 5.12970062810996, "percentage": 78.61, "elapsed_time": "7:12:44", "remaining_time": "1:57:43", "throughput": 2330.85, "total_tokens": 60518352} {"current_steps": 31450, "total_steps": 40000, "loss": 0.0006, "lr": 5.42922905448161e-06, "epoch": 5.130516355330777, "percentage": 78.62, "elapsed_time": "7:12:46", "remaining_time": "1:57:39", "throughput": 2331.01, "total_tokens": 60527248} {"current_steps": 31455, "total_steps": 40000, "loss": 0.0457, "lr": 5.423121789910129e-06, "epoch": 5.1313320825515945, "percentage": 78.64, "elapsed_time": "7:12:48", "remaining_time": "1:57:34", "throughput": 2331.14, "total_tokens": 60535472} {"current_steps": 31460, "total_steps": 40000, "loss": 0.0005, "lr": 5.417017544339287e-06, "epoch": 5.1321478097724125, "percentage": 78.65, "elapsed_time": "7:12:50", "remaining_time": "1:57:29", "throughput": 2331.24, "total_tokens": 60542848} {"current_steps": 31465, "total_steps": 40000, "loss": 0.0007, "lr": 5.410916318710443e-06, "epoch": 5.13296353699323, "percentage": 78.66, "elapsed_time": "7:12:52", "remaining_time": "1:57:25", "throughput": 2331.44, "total_tokens": 60552976} {"current_steps": 31470, "total_steps": 40000, "loss": 0.0236, "lr": 5.404818113964466e-06, "epoch": 5.133779264214047, "percentage": 78.67, "elapsed_time": "7:12:54", "remaining_time": "1:57:20", "throughput": 2331.61, "total_tokens": 60562144} {"current_steps": 31475, "total_steps": 40000, "loss": 0.0004, "lr": 5.398722931041792e-06, "epoch": 5.134594991434864, "percentage": 78.69, "elapsed_time": "7:12:56", "remaining_time": "1:57:15", "throughput": 2331.83, "total_tokens": 60572768} {"current_steps": 31480, "total_steps": 40000, "loss": 0.0008, "lr": 5.392630770882367e-06, "epoch": 5.135410718655682, "percentage": 78.7, "elapsed_time": "7:12:58", "remaining_time": "1:57:11", "throughput": 2332.06, "total_tokens": 60583472} {"current_steps": 31485, "total_steps": 40000, "loss": 0.0001, "lr": 5.3865416344256705e-06, "epoch": 5.136226445876499, "percentage": 78.71, "elapsed_time": "7:13:00", "remaining_time": "1:57:06", "throughput": 2332.23, "total_tokens": 60592832} {"current_steps": 31490, "total_steps": 40000, "loss": 0.0002, "lr": 5.380455522610742e-06, "epoch": 5.137042173097316, "percentage": 78.72, "elapsed_time": "7:13:02", "remaining_time": "1:57:01", "throughput": 2332.37, "total_tokens": 60601184} {"current_steps": 31495, "total_steps": 40000, "loss": 0.0417, "lr": 5.374372436376116e-06, "epoch": 5.137857900318133, "percentage": 78.74, "elapsed_time": "7:13:04", "remaining_time": "1:56:56", "throughput": 2332.54, "total_tokens": 60610512} {"current_steps": 31500, "total_steps": 40000, "loss": 0.11, "lr": 5.368292376659895e-06, "epoch": 5.138673627538951, "percentage": 78.75, "elapsed_time": "7:13:06", "remaining_time": "1:56:52", "throughput": 2332.72, "total_tokens": 60620048} {"current_steps": 31505, "total_steps": 40000, "loss": 0.0001, "lr": 5.362215344399701e-06, "epoch": 5.139489354759768, "percentage": 78.76, "elapsed_time": "7:13:08", "remaining_time": "1:56:47", "throughput": 2332.87, "total_tokens": 60628768} {"current_steps": 31510, "total_steps": 40000, "loss": 0.0006, "lr": 5.356141340532678e-06, "epoch": 5.1403050819805856, "percentage": 78.77, "elapsed_time": "7:13:10", "remaining_time": "1:56:42", "throughput": 2333.06, "total_tokens": 60638416} {"current_steps": 31515, "total_steps": 40000, "loss": 0.0001, "lr": 5.350070365995522e-06, "epoch": 5.141120809201403, "percentage": 78.79, "elapsed_time": "7:13:13", "remaining_time": "1:56:38", "throughput": 2333.25, "total_tokens": 60648224} {"current_steps": 31520, "total_steps": 40000, "loss": 0.0009, "lr": 5.344002421724459e-06, "epoch": 5.141936536422221, "percentage": 78.8, "elapsed_time": "7:13:15", "remaining_time": "1:56:33", "throughput": 2333.46, "total_tokens": 60658624} {"current_steps": 31525, "total_steps": 40000, "loss": 0.0001, "lr": 5.337937508655228e-06, "epoch": 5.142752263643038, "percentage": 78.81, "elapsed_time": "7:13:17", "remaining_time": "1:56:28", "throughput": 2333.63, "total_tokens": 60667984} {"current_steps": 31530, "total_steps": 40000, "loss": 0.0003, "lr": 5.331875627723126e-06, "epoch": 5.143567990863855, "percentage": 78.83, "elapsed_time": "7:13:19", "remaining_time": "1:56:24", "throughput": 2333.78, "total_tokens": 60676800} {"current_steps": 31535, "total_steps": 40000, "loss": 0.0, "lr": 5.325816779862963e-06, "epoch": 5.144383718084672, "percentage": 78.84, "elapsed_time": "7:13:21", "remaining_time": "1:56:19", "throughput": 2333.98, "total_tokens": 60686928} {"current_steps": 31540, "total_steps": 40000, "loss": 0.0001, "lr": 5.319760966009102e-06, "epoch": 5.14519944530549, "percentage": 78.85, "elapsed_time": "7:13:23", "remaining_time": "1:56:14", "throughput": 2334.14, "total_tokens": 60696048} {"current_steps": 31545, "total_steps": 40000, "loss": 0.0003, "lr": 5.3137081870954096e-06, "epoch": 5.146015172526307, "percentage": 78.86, "elapsed_time": "7:13:25", "remaining_time": "1:56:10", "throughput": 2334.35, "total_tokens": 60706400} {"current_steps": 31550, "total_steps": 40000, "loss": 0.1213, "lr": 5.307658444055313e-06, "epoch": 5.146830899747124, "percentage": 78.88, "elapsed_time": "7:13:27", "remaining_time": "1:56:05", "throughput": 2334.54, "total_tokens": 60716288} {"current_steps": 31555, "total_steps": 40000, "loss": 0.0009, "lr": 5.301611737821749e-06, "epoch": 5.1476466269679415, "percentage": 78.89, "elapsed_time": "7:13:29", "remaining_time": "1:56:00", "throughput": 2334.73, "total_tokens": 60726256} {"current_steps": 31560, "total_steps": 40000, "loss": 0.0001, "lr": 5.295568069327206e-06, "epoch": 5.1484623541887595, "percentage": 78.9, "elapsed_time": "7:13:32", "remaining_time": "1:55:56", "throughput": 2334.89, "total_tokens": 60735360} {"current_steps": 31565, "total_steps": 40000, "loss": 0.0002, "lr": 5.289527439503683e-06, "epoch": 5.149278081409577, "percentage": 78.91, "elapsed_time": "7:13:34", "remaining_time": "1:55:51", "throughput": 2335.05, "total_tokens": 60744432} {"current_steps": 31570, "total_steps": 40000, "loss": 0.0002, "lr": 5.28348984928273e-06, "epoch": 5.150093808630394, "percentage": 78.92, "elapsed_time": "7:13:36", "remaining_time": "1:55:47", "throughput": 2335.21, "total_tokens": 60753536} {"current_steps": 31575, "total_steps": 40000, "loss": 0.0004, "lr": 5.27745529959541e-06, "epoch": 5.150909535851211, "percentage": 78.94, "elapsed_time": "7:13:38", "remaining_time": "1:55:42", "throughput": 2335.43, "total_tokens": 60764112} {"current_steps": 31580, "total_steps": 40000, "loss": 0.0002, "lr": 5.271423791372335e-06, "epoch": 5.151725263072029, "percentage": 78.95, "elapsed_time": "7:13:40", "remaining_time": "1:55:37", "throughput": 2335.55, "total_tokens": 60772080} {"current_steps": 31585, "total_steps": 40000, "loss": 0.0001, "lr": 5.26539532554364e-06, "epoch": 5.152540990292846, "percentage": 78.96, "elapsed_time": "7:13:42", "remaining_time": "1:55:33", "throughput": 2335.73, "total_tokens": 60781712} {"current_steps": 31590, "total_steps": 40000, "loss": 0.0109, "lr": 5.25936990303898e-06, "epoch": 5.153356717513663, "percentage": 78.97, "elapsed_time": "7:13:44", "remaining_time": "1:55:28", "throughput": 2335.9, "total_tokens": 60791104} {"current_steps": 31595, "total_steps": 40000, "loss": 0.0005, "lr": 5.253347524787555e-06, "epoch": 5.154172444734481, "percentage": 78.99, "elapsed_time": "7:13:46", "remaining_time": "1:55:23", "throughput": 2336.08, "total_tokens": 60800720} {"current_steps": 31600, "total_steps": 40000, "loss": 0.0001, "lr": 5.2473281917181035e-06, "epoch": 5.154988171955298, "percentage": 79.0, "elapsed_time": "7:13:48", "remaining_time": "1:55:19", "throughput": 2336.23, "total_tokens": 60809376} {"current_steps": 31600, "total_steps": 40000, "eval_loss": 0.3556883633136749, "epoch": 5.154988171955298, "percentage": 79.0, "elapsed_time": "7:15:09", "remaining_time": "1:55:40", "throughput": 2329.0, "total_tokens": 60809376} {"current_steps": 31605, "total_steps": 40000, "loss": 0.0, "lr": 5.241311904758864e-06, "epoch": 5.1558038991761155, "percentage": 79.01, "elapsed_time": "7:15:13", "remaining_time": "1:55:36", "throughput": 2329.02, "total_tokens": 60819392} {"current_steps": 31610, "total_steps": 40000, "loss": 0.0001, "lr": 5.23529866483764e-06, "epoch": 5.156619626396933, "percentage": 79.03, "elapsed_time": "7:15:15", "remaining_time": "1:55:31", "throughput": 2329.16, "total_tokens": 60827888} {"current_steps": 31615, "total_steps": 40000, "loss": 0.0002, "lr": 5.229288472881732e-06, "epoch": 5.157435353617751, "percentage": 79.04, "elapsed_time": "7:15:17", "remaining_time": "1:55:27", "throughput": 2329.3, "total_tokens": 60836384} {"current_steps": 31620, "total_steps": 40000, "loss": 0.0, "lr": 5.2232813298180025e-06, "epoch": 5.158251080838568, "percentage": 79.05, "elapsed_time": "7:15:20", "remaining_time": "1:55:22", "throughput": 2329.45, "total_tokens": 60845296} {"current_steps": 31625, "total_steps": 40000, "loss": 0.0011, "lr": 5.217277236572824e-06, "epoch": 5.159066808059385, "percentage": 79.06, "elapsed_time": "7:15:22", "remaining_time": "1:55:17", "throughput": 2329.63, "total_tokens": 60854768} {"current_steps": 31630, "total_steps": 40000, "loss": 0.0412, "lr": 5.211276194072093e-06, "epoch": 5.159882535280202, "percentage": 79.07, "elapsed_time": "7:15:24", "remaining_time": "1:55:13", "throughput": 2329.8, "total_tokens": 60864208} {"current_steps": 31635, "total_steps": 40000, "loss": 0.0002, "lr": 5.205278203241254e-06, "epoch": 5.16069826250102, "percentage": 79.09, "elapsed_time": "7:15:26", "remaining_time": "1:55:08", "throughput": 2330.0, "total_tokens": 60874400} {"current_steps": 31640, "total_steps": 40000, "loss": 0.0002, "lr": 5.199283265005278e-06, "epoch": 5.161513989721837, "percentage": 79.1, "elapsed_time": "7:15:28", "remaining_time": "1:55:03", "throughput": 2330.19, "total_tokens": 60884176} {"current_steps": 31645, "total_steps": 40000, "loss": 0.0009, "lr": 5.193291380288648e-06, "epoch": 5.162329716942654, "percentage": 79.11, "elapsed_time": "7:15:30", "remaining_time": "1:54:59", "throughput": 2330.39, "total_tokens": 60894448} {"current_steps": 31650, "total_steps": 40000, "loss": 0.0004, "lr": 5.1873025500153995e-06, "epoch": 5.163145444163471, "percentage": 79.12, "elapsed_time": "7:15:32", "remaining_time": "1:54:54", "throughput": 2330.6, "total_tokens": 60904816} {"current_steps": 31655, "total_steps": 40000, "loss": 0.0003, "lr": 5.181316775109071e-06, "epoch": 5.1639611713842895, "percentage": 79.14, "elapsed_time": "7:15:34", "remaining_time": "1:54:49", "throughput": 2330.78, "total_tokens": 60914416} {"current_steps": 31660, "total_steps": 40000, "loss": 0.0002, "lr": 5.1753340564927564e-06, "epoch": 5.164776898605107, "percentage": 79.15, "elapsed_time": "7:15:36", "remaining_time": "1:54:45", "throughput": 2330.91, "total_tokens": 60922800} {"current_steps": 31665, "total_steps": 40000, "loss": 0.0007, "lr": 5.169354395089068e-06, "epoch": 5.165592625825924, "percentage": 79.16, "elapsed_time": "7:15:39", "remaining_time": "1:54:40", "throughput": 2331.09, "total_tokens": 60932432} {"current_steps": 31670, "total_steps": 40000, "loss": 0.0006, "lr": 5.1633777918201346e-06, "epoch": 5.166408353046741, "percentage": 79.17, "elapsed_time": "7:15:41", "remaining_time": "1:54:35", "throughput": 2331.3, "total_tokens": 60942816} {"current_steps": 31675, "total_steps": 40000, "loss": 0.0001, "lr": 5.157404247607625e-06, "epoch": 5.167224080267559, "percentage": 79.19, "elapsed_time": "7:15:43", "remaining_time": "1:54:31", "throughput": 2331.44, "total_tokens": 60951264} {"current_steps": 31680, "total_steps": 40000, "loss": 0.0008, "lr": 5.1514337633727454e-06, "epoch": 5.168039807488376, "percentage": 79.2, "elapsed_time": "7:15:45", "remaining_time": "1:54:26", "throughput": 2331.56, "total_tokens": 60959440} {"current_steps": 31685, "total_steps": 40000, "loss": 0.0, "lr": 5.145466340036206e-06, "epoch": 5.168855534709193, "percentage": 79.21, "elapsed_time": "7:15:47", "remaining_time": "1:54:21", "throughput": 2331.76, "total_tokens": 60969552} {"current_steps": 31690, "total_steps": 40000, "loss": 0.0003, "lr": 5.139501978518274e-06, "epoch": 5.16967126193001, "percentage": 79.22, "elapsed_time": "7:15:49", "remaining_time": "1:54:17", "throughput": 2331.99, "total_tokens": 60980432} {"current_steps": 31695, "total_steps": 40000, "loss": 0.0012, "lr": 5.133540679738716e-06, "epoch": 5.170486989150828, "percentage": 79.24, "elapsed_time": "7:15:51", "remaining_time": "1:54:12", "throughput": 2332.12, "total_tokens": 60988704} {"current_steps": 31700, "total_steps": 40000, "loss": 0.0002, "lr": 5.127582444616838e-06, "epoch": 5.171302716371645, "percentage": 79.25, "elapsed_time": "7:15:53", "remaining_time": "1:54:07", "throughput": 2332.27, "total_tokens": 60997536} {"current_steps": 31705, "total_steps": 40000, "loss": 0.0856, "lr": 5.121627274071486e-06, "epoch": 5.1721184435924625, "percentage": 79.26, "elapsed_time": "7:15:55", "remaining_time": "1:54:03", "throughput": 2332.47, "total_tokens": 61007856} {"current_steps": 31710, "total_steps": 40000, "loss": 0.0002, "lr": 5.115675169021009e-06, "epoch": 5.17293417081328, "percentage": 79.27, "elapsed_time": "7:15:57", "remaining_time": "1:53:58", "throughput": 2332.66, "total_tokens": 61017600} {"current_steps": 31715, "total_steps": 40000, "loss": 0.0002, "lr": 5.1097261303832994e-06, "epoch": 5.173749898034098, "percentage": 79.29, "elapsed_time": "7:16:00", "remaining_time": "1:53:53", "throughput": 2332.85, "total_tokens": 61027568} {"current_steps": 31720, "total_steps": 40000, "loss": 0.0322, "lr": 5.103780159075788e-06, "epoch": 5.174565625254915, "percentage": 79.3, "elapsed_time": "7:16:02", "remaining_time": "1:53:49", "throughput": 2333.01, "total_tokens": 61036672} {"current_steps": 31725, "total_steps": 40000, "loss": 0.0005, "lr": 5.0978372560154e-06, "epoch": 5.175381352475732, "percentage": 79.31, "elapsed_time": "7:16:04", "remaining_time": "1:53:44", "throughput": 2333.15, "total_tokens": 61045232} {"current_steps": 31730, "total_steps": 40000, "loss": 0.0, "lr": 5.091897422118619e-06, "epoch": 5.176197079696549, "percentage": 79.33, "elapsed_time": "7:16:06", "remaining_time": "1:53:39", "throughput": 2333.34, "total_tokens": 61055024} {"current_steps": 31735, "total_steps": 40000, "loss": 0.0012, "lr": 5.0859606583014305e-06, "epoch": 5.177012806917367, "percentage": 79.34, "elapsed_time": "7:16:08", "remaining_time": "1:53:35", "throughput": 2333.52, "total_tokens": 61064624} {"current_steps": 31740, "total_steps": 40000, "loss": 0.0002, "lr": 5.080026965479365e-06, "epoch": 5.177828534138184, "percentage": 79.35, "elapsed_time": "7:16:10", "remaining_time": "1:53:30", "throughput": 2333.71, "total_tokens": 61074624} {"current_steps": 31745, "total_steps": 40000, "loss": 0.0006, "lr": 5.074096344567475e-06, "epoch": 5.178644261359001, "percentage": 79.36, "elapsed_time": "7:16:12", "remaining_time": "1:53:25", "throughput": 2333.84, "total_tokens": 61082928} {"current_steps": 31750, "total_steps": 40000, "loss": 0.0337, "lr": 5.0681687964803294e-06, "epoch": 5.1794599885798185, "percentage": 79.38, "elapsed_time": "7:16:14", "remaining_time": "1:53:21", "throughput": 2333.98, "total_tokens": 61091632} {"current_steps": 31755, "total_steps": 40000, "loss": 0.0489, "lr": 5.06224432213204e-06, "epoch": 5.1802757158006365, "percentage": 79.39, "elapsed_time": "7:16:16", "remaining_time": "1:53:16", "throughput": 2334.1, "total_tokens": 61099520} {"current_steps": 31760, "total_steps": 40000, "loss": 0.0001, "lr": 5.056322922436224e-06, "epoch": 5.181091443021454, "percentage": 79.4, "elapsed_time": "7:16:19", "remaining_time": "1:53:12", "throughput": 2334.26, "total_tokens": 61108784} {"current_steps": 31765, "total_steps": 40000, "loss": 0.0355, "lr": 5.0504045983060465e-06, "epoch": 5.181907170242271, "percentage": 79.41, "elapsed_time": "7:16:21", "remaining_time": "1:53:07", "throughput": 2334.41, "total_tokens": 61117712} {"current_steps": 31770, "total_steps": 40000, "loss": 0.0576, "lr": 5.044489350654183e-06, "epoch": 5.182722897463089, "percentage": 79.42, "elapsed_time": "7:16:23", "remaining_time": "1:53:02", "throughput": 2334.56, "total_tokens": 61126384} {"current_steps": 31775, "total_steps": 40000, "loss": 0.0414, "lr": 5.038577180392831e-06, "epoch": 5.183538624683906, "percentage": 79.44, "elapsed_time": "7:16:25", "remaining_time": "1:52:58", "throughput": 2334.72, "total_tokens": 61135744} {"current_steps": 31780, "total_steps": 40000, "loss": 0.0001, "lr": 5.032668088433729e-06, "epoch": 5.184354351904723, "percentage": 79.45, "elapsed_time": "7:16:27", "remaining_time": "1:52:53", "throughput": 2334.89, "total_tokens": 61144864} {"current_steps": 31785, "total_steps": 40000, "loss": 0.1322, "lr": 5.02676207568814e-06, "epoch": 5.18517007912554, "percentage": 79.46, "elapsed_time": "7:16:29", "remaining_time": "1:52:48", "throughput": 2335.07, "total_tokens": 61154688} {"current_steps": 31790, "total_steps": 40000, "loss": 0.0119, "lr": 5.02085914306683e-06, "epoch": 5.185985806346358, "percentage": 79.47, "elapsed_time": "7:16:31", "remaining_time": "1:52:44", "throughput": 2335.28, "total_tokens": 61165184} {"current_steps": 31795, "total_steps": 40000, "loss": 0.0001, "lr": 5.014959291480123e-06, "epoch": 5.186801533567175, "percentage": 79.49, "elapsed_time": "7:16:33", "remaining_time": "1:52:39", "throughput": 2335.53, "total_tokens": 61176512} {"current_steps": 31800, "total_steps": 40000, "loss": 0.0002, "lr": 5.009062521837835e-06, "epoch": 5.1876172607879925, "percentage": 79.5, "elapsed_time": "7:16:35", "remaining_time": "1:52:34", "throughput": 2335.73, "total_tokens": 61186608} {"current_steps": 31800, "total_steps": 40000, "eval_loss": 0.35577070713043213, "epoch": 5.1876172607879925, "percentage": 79.5, "elapsed_time": "7:17:56", "remaining_time": "1:52:55", "throughput": 2328.55, "total_tokens": 61186608} {"current_steps": 31805, "total_steps": 40000, "loss": 0.0008, "lr": 5.003168835049324e-06, "epoch": 5.18843298800881, "percentage": 79.51, "elapsed_time": "7:18:00", "remaining_time": "1:52:51", "throughput": 2328.58, "total_tokens": 61196400} {"current_steps": 31810, "total_steps": 40000, "loss": 0.0002, "lr": 4.997278232023483e-06, "epoch": 5.189248715229628, "percentage": 79.53, "elapsed_time": "7:18:02", "remaining_time": "1:52:46", "throughput": 2328.74, "total_tokens": 61205504} {"current_steps": 31815, "total_steps": 40000, "loss": 0.0001, "lr": 4.9913907136687036e-06, "epoch": 5.190064442450445, "percentage": 79.54, "elapsed_time": "7:18:04", "remaining_time": "1:52:42", "throughput": 2328.89, "total_tokens": 61214400} {"current_steps": 31820, "total_steps": 40000, "loss": 0.0356, "lr": 4.985506280892918e-06, "epoch": 5.190880169671262, "percentage": 79.55, "elapsed_time": "7:18:06", "remaining_time": "1:52:37", "throughput": 2329.06, "total_tokens": 61223904} {"current_steps": 31825, "total_steps": 40000, "loss": 0.0017, "lr": 4.979624934603589e-06, "epoch": 5.191695896892079, "percentage": 79.56, "elapsed_time": "7:18:09", "remaining_time": "1:52:32", "throughput": 2329.22, "total_tokens": 61232928} {"current_steps": 31830, "total_steps": 40000, "loss": 0.0002, "lr": 4.97374667570768e-06, "epoch": 5.192511624112897, "percentage": 79.57, "elapsed_time": "7:18:11", "remaining_time": "1:52:28", "throughput": 2329.41, "total_tokens": 61242816} {"current_steps": 31835, "total_steps": 40000, "loss": 0.0022, "lr": 4.967871505111704e-06, "epoch": 5.193327351333714, "percentage": 79.59, "elapsed_time": "7:18:13", "remaining_time": "1:52:23", "throughput": 2329.59, "total_tokens": 61252512} {"current_steps": 31840, "total_steps": 40000, "loss": 0.0002, "lr": 4.961999423721686e-06, "epoch": 5.194143078554531, "percentage": 79.6, "elapsed_time": "7:18:15", "remaining_time": "1:52:19", "throughput": 2329.76, "total_tokens": 61262016} {"current_steps": 31845, "total_steps": 40000, "loss": 0.0002, "lr": 4.956130432443159e-06, "epoch": 5.194958805775348, "percentage": 79.61, "elapsed_time": "7:18:17", "remaining_time": "1:52:14", "throughput": 2329.95, "total_tokens": 61271904} {"current_steps": 31850, "total_steps": 40000, "loss": 0.0005, "lr": 4.950264532181215e-06, "epoch": 5.195774532996166, "percentage": 79.62, "elapsed_time": "7:18:19", "remaining_time": "1:52:09", "throughput": 2330.07, "total_tokens": 61279952} {"current_steps": 31855, "total_steps": 40000, "loss": 0.1338, "lr": 4.944401723840433e-06, "epoch": 5.196590260216984, "percentage": 79.64, "elapsed_time": "7:18:21", "remaining_time": "1:52:05", "throughput": 2330.28, "total_tokens": 61290272} {"current_steps": 31860, "total_steps": 40000, "loss": 0.0002, "lr": 4.938542008324942e-06, "epoch": 5.197405987437801, "percentage": 79.65, "elapsed_time": "7:18:23", "remaining_time": "1:52:00", "throughput": 2330.43, "total_tokens": 61299264} {"current_steps": 31865, "total_steps": 40000, "loss": 0.0002, "lr": 4.9326853865383855e-06, "epoch": 5.198221714658618, "percentage": 79.66, "elapsed_time": "7:18:25", "remaining_time": "1:51:55", "throughput": 2330.58, "total_tokens": 61308032} {"current_steps": 31870, "total_steps": 40000, "loss": 0.0066, "lr": 4.926831859383918e-06, "epoch": 5.199037441879436, "percentage": 79.67, "elapsed_time": "7:18:28", "remaining_time": "1:51:51", "throughput": 2330.77, "total_tokens": 61318000} {"current_steps": 31875, "total_steps": 40000, "loss": 0.0009, "lr": 4.92098142776424e-06, "epoch": 5.199853169100253, "percentage": 79.69, "elapsed_time": "7:18:30", "remaining_time": "1:51:46", "throughput": 2330.93, "total_tokens": 61327072} {"current_steps": 31880, "total_steps": 40000, "loss": 0.0031, "lr": 4.91513409258155e-06, "epoch": 5.20066889632107, "percentage": 79.7, "elapsed_time": "7:18:32", "remaining_time": "1:51:41", "throughput": 2331.13, "total_tokens": 61337456} {"current_steps": 31885, "total_steps": 40000, "loss": 0.0003, "lr": 4.909289854737581e-06, "epoch": 5.201484623541887, "percentage": 79.71, "elapsed_time": "7:18:34", "remaining_time": "1:51:37", "throughput": 2331.27, "total_tokens": 61345952} {"current_steps": 31890, "total_steps": 40000, "loss": 0.0001, "lr": 4.903448715133602e-06, "epoch": 5.202300350762705, "percentage": 79.72, "elapsed_time": "7:18:36", "remaining_time": "1:51:32", "throughput": 2331.45, "total_tokens": 61355584} {"current_steps": 31895, "total_steps": 40000, "loss": 0.0003, "lr": 4.897610674670372e-06, "epoch": 5.203116077983522, "percentage": 79.74, "elapsed_time": "7:18:38", "remaining_time": "1:51:27", "throughput": 2331.6, "total_tokens": 61364512} {"current_steps": 31900, "total_steps": 40000, "loss": 0.0305, "lr": 4.8917757342482e-06, "epoch": 5.2039318052043395, "percentage": 79.75, "elapsed_time": "7:18:40", "remaining_time": "1:51:23", "throughput": 2331.74, "total_tokens": 61373072} {"current_steps": 31905, "total_steps": 40000, "loss": 0.0009, "lr": 4.885943894766909e-06, "epoch": 5.204747532425157, "percentage": 79.76, "elapsed_time": "7:18:42", "remaining_time": "1:51:18", "throughput": 2331.92, "total_tokens": 61382768} {"current_steps": 31910, "total_steps": 40000, "loss": 0.0569, "lr": 4.880115157125842e-06, "epoch": 5.205563259645975, "percentage": 79.77, "elapsed_time": "7:18:44", "remaining_time": "1:51:14", "throughput": 2332.15, "total_tokens": 61393824} {"current_steps": 31915, "total_steps": 40000, "loss": 0.0004, "lr": 4.874289522223857e-06, "epoch": 5.206378986866792, "percentage": 79.79, "elapsed_time": "7:18:47", "remaining_time": "1:51:09", "throughput": 2332.35, "total_tokens": 61403952} {"current_steps": 31920, "total_steps": 40000, "loss": 0.027, "lr": 4.868466990959339e-06, "epoch": 5.207194714087609, "percentage": 79.8, "elapsed_time": "7:18:49", "remaining_time": "1:51:04", "throughput": 2332.51, "total_tokens": 61412896} {"current_steps": 31925, "total_steps": 40000, "loss": 0.1009, "lr": 4.8626475642301964e-06, "epoch": 5.208010441308426, "percentage": 79.81, "elapsed_time": "7:18:51", "remaining_time": "1:51:00", "throughput": 2332.67, "total_tokens": 61422160} {"current_steps": 31930, "total_steps": 40000, "loss": 0.0004, "lr": 4.856831242933871e-06, "epoch": 5.208826168529244, "percentage": 79.83, "elapsed_time": "7:18:53", "remaining_time": "1:50:55", "throughput": 2332.92, "total_tokens": 61433728} {"current_steps": 31935, "total_steps": 40000, "loss": 0.0001, "lr": 4.851018027967294e-06, "epoch": 5.209641895750061, "percentage": 79.84, "elapsed_time": "7:18:55", "remaining_time": "1:50:50", "throughput": 2333.09, "total_tokens": 61443136} {"current_steps": 31940, "total_steps": 40000, "loss": 0.0001, "lr": 4.845207920226946e-06, "epoch": 5.210457622970878, "percentage": 79.85, "elapsed_time": "7:18:57", "remaining_time": "1:50:46", "throughput": 2333.29, "total_tokens": 61453088} {"current_steps": 31945, "total_steps": 40000, "loss": 0.0002, "lr": 4.839400920608825e-06, "epoch": 5.211273350191696, "percentage": 79.86, "elapsed_time": "7:18:59", "remaining_time": "1:50:41", "throughput": 2333.47, "total_tokens": 61462992} {"current_steps": 31950, "total_steps": 40000, "loss": 0.0043, "lr": 4.83359703000843e-06, "epoch": 5.2120890774125135, "percentage": 79.88, "elapsed_time": "7:19:01", "remaining_time": "1:50:36", "throughput": 2333.65, "total_tokens": 61472608} {"current_steps": 31955, "total_steps": 40000, "loss": 0.1025, "lr": 4.827796249320804e-06, "epoch": 5.212904804633331, "percentage": 79.89, "elapsed_time": "7:19:03", "remaining_time": "1:50:32", "throughput": 2333.86, "total_tokens": 61482896} {"current_steps": 31960, "total_steps": 40000, "loss": 0.0004, "lr": 4.82199857944049e-06, "epoch": 5.213720531854148, "percentage": 79.9, "elapsed_time": "7:19:06", "remaining_time": "1:50:27", "throughput": 2333.98, "total_tokens": 61491088} {"current_steps": 31965, "total_steps": 40000, "loss": 0.0002, "lr": 4.8162040212615695e-06, "epoch": 5.214536259074965, "percentage": 79.91, "elapsed_time": "7:19:08", "remaining_time": "1:50:23", "throughput": 2334.16, "total_tokens": 61500880} {"current_steps": 31970, "total_steps": 40000, "loss": 0.0002, "lr": 4.810412575677639e-06, "epoch": 5.215351986295783, "percentage": 79.92, "elapsed_time": "7:19:10", "remaining_time": "1:50:18", "throughput": 2334.36, "total_tokens": 61510848} {"current_steps": 31975, "total_steps": 40000, "loss": 0.0498, "lr": 4.804624243581801e-06, "epoch": 5.2161677135166, "percentage": 79.94, "elapsed_time": "7:19:12", "remaining_time": "1:50:13", "throughput": 2334.53, "total_tokens": 61520192} {"current_steps": 31980, "total_steps": 40000, "loss": 0.0007, "lr": 4.798839025866703e-06, "epoch": 5.216983440737417, "percentage": 79.95, "elapsed_time": "7:19:14", "remaining_time": "1:50:09", "throughput": 2334.73, "total_tokens": 61530480} {"current_steps": 31985, "total_steps": 40000, "loss": 0.0107, "lr": 4.793056923424491e-06, "epoch": 5.217799167958235, "percentage": 79.96, "elapsed_time": "7:19:16", "remaining_time": "1:50:04", "throughput": 2334.85, "total_tokens": 61538704} {"current_steps": 31990, "total_steps": 40000, "loss": 0.0001, "lr": 4.78727793714683e-06, "epoch": 5.218614895179052, "percentage": 79.97, "elapsed_time": "7:19:18", "remaining_time": "1:49:59", "throughput": 2335.01, "total_tokens": 61547792} {"current_steps": 31995, "total_steps": 40000, "loss": 0.0107, "lr": 4.7815020679249285e-06, "epoch": 5.219430622399869, "percentage": 79.99, "elapsed_time": "7:19:20", "remaining_time": "1:49:55", "throughput": 2335.17, "total_tokens": 61556976} {"current_steps": 32000, "total_steps": 40000, "loss": 0.0001, "lr": 4.775729316649483e-06, "epoch": 5.220246349620687, "percentage": 80.0, "elapsed_time": "7:19:22", "remaining_time": "1:49:50", "throughput": 2335.39, "total_tokens": 61567504} {"current_steps": 32000, "total_steps": 40000, "eval_loss": 0.3639886975288391, "epoch": 5.220246349620687, "percentage": 80.0, "elapsed_time": "7:20:43", "remaining_time": "1:50:10", "throughput": 2328.26, "total_tokens": 61567504} {"current_steps": 32005, "total_steps": 40000, "loss": 0.0007, "lr": 4.769959684210728e-06, "epoch": 5.221062076841505, "percentage": 80.01, "elapsed_time": "7:20:47", "remaining_time": "1:50:06", "throughput": 2328.27, "total_tokens": 61576880} {"current_steps": 32010, "total_steps": 40000, "loss": 0.0025, "lr": 4.764193171498426e-06, "epoch": 5.221877804062322, "percentage": 80.03, "elapsed_time": "7:20:49", "remaining_time": "1:50:02", "throughput": 2328.49, "total_tokens": 61587680} {"current_steps": 32015, "total_steps": 40000, "loss": 0.047, "lr": 4.75842977940183e-06, "epoch": 5.222693531283139, "percentage": 80.04, "elapsed_time": "7:20:51", "remaining_time": "1:49:57", "throughput": 2328.66, "total_tokens": 61597088} {"current_steps": 32020, "total_steps": 40000, "loss": 0.0002, "lr": 4.752669508809729e-06, "epoch": 5.223509258503956, "percentage": 80.05, "elapsed_time": "7:20:53", "remaining_time": "1:49:52", "throughput": 2328.88, "total_tokens": 61607760} {"current_steps": 32025, "total_steps": 40000, "loss": 0.0002, "lr": 4.746912360610445e-06, "epoch": 5.224324985724774, "percentage": 80.06, "elapsed_time": "7:20:55", "remaining_time": "1:49:48", "throughput": 2329.08, "total_tokens": 61617840} {"current_steps": 32030, "total_steps": 40000, "loss": 0.0284, "lr": 4.741158335691781e-06, "epoch": 5.225140712945591, "percentage": 80.08, "elapsed_time": "7:20:58", "remaining_time": "1:49:43", "throughput": 2329.24, "total_tokens": 61627232} {"current_steps": 32035, "total_steps": 40000, "loss": 0.0003, "lr": 4.7354074349410994e-06, "epoch": 5.225956440166408, "percentage": 80.09, "elapsed_time": "7:21:00", "remaining_time": "1:49:38", "throughput": 2329.4, "total_tokens": 61636512} {"current_steps": 32040, "total_steps": 40000, "loss": 0.0001, "lr": 4.729659659245245e-06, "epoch": 5.226772167387225, "percentage": 80.1, "elapsed_time": "7:21:02", "remaining_time": "1:49:34", "throughput": 2329.64, "total_tokens": 61647504} {"current_steps": 32045, "total_steps": 40000, "loss": 0.0002, "lr": 4.723915009490601e-06, "epoch": 5.227587894608043, "percentage": 80.11, "elapsed_time": "7:21:04", "remaining_time": "1:49:29", "throughput": 2329.79, "total_tokens": 61656528} {"current_steps": 32050, "total_steps": 40000, "loss": 0.1338, "lr": 4.718173486563077e-06, "epoch": 5.2284036218288605, "percentage": 80.12, "elapsed_time": "7:21:06", "remaining_time": "1:49:25", "throughput": 2329.98, "total_tokens": 61666336} {"current_steps": 32055, "total_steps": 40000, "loss": 0.0754, "lr": 4.71243509134808e-06, "epoch": 5.229219349049678, "percentage": 80.14, "elapsed_time": "7:21:08", "remaining_time": "1:49:20", "throughput": 2330.15, "total_tokens": 61675888} {"current_steps": 32060, "total_steps": 40000, "loss": 0.0589, "lr": 4.706699824730532e-06, "epoch": 5.230035076270495, "percentage": 80.15, "elapsed_time": "7:21:10", "remaining_time": "1:49:15", "throughput": 2330.35, "total_tokens": 61685920} {"current_steps": 32065, "total_steps": 40000, "loss": 0.1147, "lr": 4.700967687594901e-06, "epoch": 5.230850803491313, "percentage": 80.16, "elapsed_time": "7:21:12", "remaining_time": "1:49:11", "throughput": 2330.57, "total_tokens": 61696720} {"current_steps": 32070, "total_steps": 40000, "loss": 0.0002, "lr": 4.69523868082514e-06, "epoch": 5.23166653071213, "percentage": 80.17, "elapsed_time": "7:21:14", "remaining_time": "1:49:06", "throughput": 2330.79, "total_tokens": 61707632} {"current_steps": 32075, "total_steps": 40000, "loss": 0.0002, "lr": 4.689512805304747e-06, "epoch": 5.232482257932947, "percentage": 80.19, "elapsed_time": "7:21:17", "remaining_time": "1:49:01", "throughput": 2330.98, "total_tokens": 61717408} {"current_steps": 32080, "total_steps": 40000, "loss": 0.0673, "lr": 4.683790061916707e-06, "epoch": 5.233297985153764, "percentage": 80.2, "elapsed_time": "7:21:19", "remaining_time": "1:48:57", "throughput": 2331.16, "total_tokens": 61727232} {"current_steps": 32085, "total_steps": 40000, "loss": 0.0066, "lr": 4.678070451543551e-06, "epoch": 5.234113712374582, "percentage": 80.21, "elapsed_time": "7:21:21", "remaining_time": "1:48:52", "throughput": 2331.37, "total_tokens": 61737648} {"current_steps": 32090, "total_steps": 40000, "loss": 0.0001, "lr": 4.6723539750673204e-06, "epoch": 5.234929439595399, "percentage": 80.23, "elapsed_time": "7:21:23", "remaining_time": "1:48:48", "throughput": 2331.52, "total_tokens": 61746640} {"current_steps": 32095, "total_steps": 40000, "loss": 0.0004, "lr": 4.666640633369551e-06, "epoch": 5.2357451668162165, "percentage": 80.24, "elapsed_time": "7:21:25", "remaining_time": "1:48:43", "throughput": 2331.67, "total_tokens": 61755536} {"current_steps": 32100, "total_steps": 40000, "loss": 0.0001, "lr": 4.660930427331323e-06, "epoch": 5.236560894037034, "percentage": 80.25, "elapsed_time": "7:21:27", "remaining_time": "1:48:38", "throughput": 2331.88, "total_tokens": 61765712} {"current_steps": 32105, "total_steps": 40000, "loss": 0.0001, "lr": 4.6552233578332244e-06, "epoch": 5.237376621257852, "percentage": 80.26, "elapsed_time": "7:21:29", "remaining_time": "1:48:34", "throughput": 2332.1, "total_tokens": 61776784} {"current_steps": 32110, "total_steps": 40000, "loss": 0.0166, "lr": 4.649519425755347e-06, "epoch": 5.238192348478669, "percentage": 80.27, "elapsed_time": "7:21:31", "remaining_time": "1:48:29", "throughput": 2332.27, "total_tokens": 61786336} {"current_steps": 32115, "total_steps": 40000, "loss": 0.0006, "lr": 4.64381863197732e-06, "epoch": 5.239008075699486, "percentage": 80.29, "elapsed_time": "7:21:33", "remaining_time": "1:48:24", "throughput": 2332.43, "total_tokens": 61795328} {"current_steps": 32120, "total_steps": 40000, "loss": 0.0589, "lr": 4.638120977378269e-06, "epoch": 5.239823802920303, "percentage": 80.3, "elapsed_time": "7:21:36", "remaining_time": "1:48:20", "throughput": 2332.64, "total_tokens": 61805600} {"current_steps": 32125, "total_steps": 40000, "loss": 0.0611, "lr": 4.632426462836848e-06, "epoch": 5.240639530141121, "percentage": 80.31, "elapsed_time": "7:21:38", "remaining_time": "1:48:15", "throughput": 2332.8, "total_tokens": 61815056} {"current_steps": 32130, "total_steps": 40000, "loss": 0.0002, "lr": 4.626735089231224e-06, "epoch": 5.241455257361938, "percentage": 80.33, "elapsed_time": "7:21:40", "remaining_time": "1:48:11", "throughput": 2332.99, "total_tokens": 61825024} {"current_steps": 32135, "total_steps": 40000, "loss": 0.1493, "lr": 4.621046857439068e-06, "epoch": 5.242270984582755, "percentage": 80.34, "elapsed_time": "7:21:42", "remaining_time": "1:48:06", "throughput": 2333.17, "total_tokens": 61834544} {"current_steps": 32140, "total_steps": 40000, "loss": 0.0001, "lr": 4.615361768337587e-06, "epoch": 5.243086711803572, "percentage": 80.35, "elapsed_time": "7:21:44", "remaining_time": "1:48:01", "throughput": 2333.37, "total_tokens": 61844816} {"current_steps": 32145, "total_steps": 40000, "loss": 0.0006, "lr": 4.6096798228034946e-06, "epoch": 5.2439024390243905, "percentage": 80.36, "elapsed_time": "7:21:46", "remaining_time": "1:47:57", "throughput": 2333.55, "total_tokens": 61854400} {"current_steps": 32150, "total_steps": 40000, "loss": 0.0, "lr": 4.604001021713008e-06, "epoch": 5.244718166245208, "percentage": 80.38, "elapsed_time": "7:21:48", "remaining_time": "1:47:52", "throughput": 2333.71, "total_tokens": 61863824} {"current_steps": 32155, "total_steps": 40000, "loss": 0.0082, "lr": 4.598325365941883e-06, "epoch": 5.245533893466025, "percentage": 80.39, "elapsed_time": "7:21:50", "remaining_time": "1:47:47", "throughput": 2333.88, "total_tokens": 61873152} {"current_steps": 32160, "total_steps": 40000, "loss": 0.0629, "lr": 4.5926528563653645e-06, "epoch": 5.246349620686843, "percentage": 80.4, "elapsed_time": "7:21:52", "remaining_time": "1:47:43", "throughput": 2334.05, "total_tokens": 61882480} {"current_steps": 32165, "total_steps": 40000, "loss": 0.0001, "lr": 4.5869834938582295e-06, "epoch": 5.24716534790766, "percentage": 80.41, "elapsed_time": "7:21:55", "remaining_time": "1:47:38", "throughput": 2334.26, "total_tokens": 61893024} {"current_steps": 32170, "total_steps": 40000, "loss": 0.0031, "lr": 4.581317279294772e-06, "epoch": 5.247981075128477, "percentage": 80.42, "elapsed_time": "7:21:57", "remaining_time": "1:47:34", "throughput": 2334.42, "total_tokens": 61902208} {"current_steps": 32175, "total_steps": 40000, "loss": 0.0168, "lr": 4.57565421354878e-06, "epoch": 5.248796802349294, "percentage": 80.44, "elapsed_time": "7:21:59", "remaining_time": "1:47:29", "throughput": 2334.56, "total_tokens": 61910800} {"current_steps": 32180, "total_steps": 40000, "loss": 0.0024, "lr": 4.569994297493579e-06, "epoch": 5.249612529570112, "percentage": 80.45, "elapsed_time": "7:22:01", "remaining_time": "1:47:24", "throughput": 2334.71, "total_tokens": 61919776} {"current_steps": 32185, "total_steps": 40000, "loss": 0.0008, "lr": 4.564337532002002e-06, "epoch": 5.250428256790929, "percentage": 80.46, "elapsed_time": "7:22:03", "remaining_time": "1:47:20", "throughput": 2334.93, "total_tokens": 61930528} {"current_steps": 32190, "total_steps": 40000, "loss": 0.0715, "lr": 4.55868391794638e-06, "epoch": 5.251243984011746, "percentage": 80.47, "elapsed_time": "7:22:05", "remaining_time": "1:47:15", "throughput": 2335.12, "total_tokens": 61940416} {"current_steps": 32195, "total_steps": 40000, "loss": 0.0047, "lr": 4.553033456198588e-06, "epoch": 5.2520597112325635, "percentage": 80.49, "elapsed_time": "7:22:07", "remaining_time": "1:47:11", "throughput": 2335.29, "total_tokens": 61949904} {"current_steps": 32200, "total_steps": 40000, "loss": 0.0008, "lr": 4.54738614762999e-06, "epoch": 5.252875438453382, "percentage": 80.5, "elapsed_time": "7:22:09", "remaining_time": "1:47:06", "throughput": 2335.45, "total_tokens": 61958976} {"current_steps": 32200, "total_steps": 40000, "eval_loss": 0.3641124665737152, "epoch": 5.252875438453382, "percentage": 80.5, "elapsed_time": "7:23:30", "remaining_time": "1:47:26", "throughput": 2328.35, "total_tokens": 61958976} {"current_steps": 32205, "total_steps": 40000, "loss": 0.1192, "lr": 4.541741993111465e-06, "epoch": 5.253691165674199, "percentage": 80.51, "elapsed_time": "7:23:34", "remaining_time": "1:47:21", "throughput": 2328.32, "total_tokens": 61967648} {"current_steps": 32210, "total_steps": 40000, "loss": 0.0712, "lr": 4.536100993513423e-06, "epoch": 5.254506892895016, "percentage": 80.53, "elapsed_time": "7:23:36", "remaining_time": "1:47:17", "throughput": 2328.44, "total_tokens": 61975904} {"current_steps": 32215, "total_steps": 40000, "loss": 0.0002, "lr": 4.530463149705768e-06, "epoch": 5.255322620115833, "percentage": 80.54, "elapsed_time": "7:23:39", "remaining_time": "1:47:12", "throughput": 2328.61, "total_tokens": 61985200} {"current_steps": 32220, "total_steps": 40000, "loss": 0.0001, "lr": 4.524828462557934e-06, "epoch": 5.256138347336651, "percentage": 80.55, "elapsed_time": "7:23:41", "remaining_time": "1:47:08", "throughput": 2328.8, "total_tokens": 61995184} {"current_steps": 32225, "total_steps": 40000, "loss": 0.0003, "lr": 4.5191969329388625e-06, "epoch": 5.256954074557468, "percentage": 80.56, "elapsed_time": "7:23:43", "remaining_time": "1:47:03", "throughput": 2328.97, "total_tokens": 62005168} {"current_steps": 32230, "total_steps": 40000, "loss": 0.0003, "lr": 4.5135685617169965e-06, "epoch": 5.257769801778285, "percentage": 80.58, "elapsed_time": "7:23:45", "remaining_time": "1:46:58", "throughput": 2329.17, "total_tokens": 62015536} {"current_steps": 32235, "total_steps": 40000, "loss": 0.0006, "lr": 4.507943349760313e-06, "epoch": 5.258585528999102, "percentage": 80.59, "elapsed_time": "7:23:47", "remaining_time": "1:46:54", "throughput": 2329.36, "total_tokens": 62025520} {"current_steps": 32240, "total_steps": 40000, "loss": 0.1022, "lr": 4.502321297936277e-06, "epoch": 5.25940125621992, "percentage": 80.6, "elapsed_time": "7:23:49", "remaining_time": "1:46:49", "throughput": 2329.55, "total_tokens": 62035488} {"current_steps": 32245, "total_steps": 40000, "loss": 0.1099, "lr": 4.496702407111888e-06, "epoch": 5.2602169834407375, "percentage": 80.61, "elapsed_time": "7:23:51", "remaining_time": "1:46:45", "throughput": 2329.76, "total_tokens": 62046032} {"current_steps": 32250, "total_steps": 40000, "loss": 0.0001, "lr": 4.491086678153653e-06, "epoch": 5.261032710661555, "percentage": 80.62, "elapsed_time": "7:23:53", "remaining_time": "1:46:40", "throughput": 2329.92, "total_tokens": 62054960} {"current_steps": 32255, "total_steps": 40000, "loss": 0.0002, "lr": 4.485474111927579e-06, "epoch": 5.261848437882372, "percentage": 80.64, "elapsed_time": "7:23:56", "remaining_time": "1:46:35", "throughput": 2330.06, "total_tokens": 62063744} {"current_steps": 32260, "total_steps": 40000, "loss": 0.0919, "lr": 4.479864709299197e-06, "epoch": 5.26266416510319, "percentage": 80.65, "elapsed_time": "7:23:58", "remaining_time": "1:46:31", "throughput": 2330.29, "total_tokens": 62075056} {"current_steps": 32265, "total_steps": 40000, "loss": 0.0004, "lr": 4.474258471133555e-06, "epoch": 5.263479892324007, "percentage": 80.66, "elapsed_time": "7:24:00", "remaining_time": "1:46:26", "throughput": 2330.46, "total_tokens": 62084224} {"current_steps": 32270, "total_steps": 40000, "loss": 0.0, "lr": 4.4686553982952014e-06, "epoch": 5.264295619544824, "percentage": 80.67, "elapsed_time": "7:24:02", "remaining_time": "1:46:21", "throughput": 2330.64, "total_tokens": 62094032} {"current_steps": 32275, "total_steps": 40000, "loss": 0.0065, "lr": 4.463055491648191e-06, "epoch": 5.265111346765641, "percentage": 80.69, "elapsed_time": "7:24:04", "remaining_time": "1:46:17", "throughput": 2330.81, "total_tokens": 62103312} {"current_steps": 32280, "total_steps": 40000, "loss": 0.001, "lr": 4.457458752056112e-06, "epoch": 5.265927073986459, "percentage": 80.7, "elapsed_time": "7:24:06", "remaining_time": "1:46:12", "throughput": 2330.99, "total_tokens": 62113152} {"current_steps": 32285, "total_steps": 40000, "loss": 0.0002, "lr": 4.451865180382042e-06, "epoch": 5.266742801207276, "percentage": 80.71, "elapsed_time": "7:24:08", "remaining_time": "1:46:08", "throughput": 2331.15, "total_tokens": 62122496} {"current_steps": 32290, "total_steps": 40000, "loss": 0.123, "lr": 4.4462747774885936e-06, "epoch": 5.2675585284280935, "percentage": 80.73, "elapsed_time": "7:24:10", "remaining_time": "1:46:03", "throughput": 2331.36, "total_tokens": 62132880} {"current_steps": 32295, "total_steps": 40000, "loss": 0.0775, "lr": 4.440687544237859e-06, "epoch": 5.268374255648911, "percentage": 80.74, "elapsed_time": "7:24:12", "remaining_time": "1:45:58", "throughput": 2331.58, "total_tokens": 62143488} {"current_steps": 32300, "total_steps": 40000, "loss": 0.0481, "lr": 4.435103481491471e-06, "epoch": 5.269189982869729, "percentage": 80.75, "elapsed_time": "7:24:15", "remaining_time": "1:45:54", "throughput": 2331.67, "total_tokens": 62151056} {"current_steps": 32305, "total_steps": 40000, "loss": 0.0002, "lr": 4.429522590110569e-06, "epoch": 5.270005710090546, "percentage": 80.76, "elapsed_time": "7:24:17", "remaining_time": "1:45:49", "throughput": 2331.86, "total_tokens": 62161104} {"current_steps": 32310, "total_steps": 40000, "loss": 0.001, "lr": 4.423944870955779e-06, "epoch": 5.270821437311363, "percentage": 80.77, "elapsed_time": "7:24:19", "remaining_time": "1:45:45", "throughput": 2332.07, "total_tokens": 62171408} {"current_steps": 32315, "total_steps": 40000, "loss": 0.0, "lr": 4.418370324887272e-06, "epoch": 5.27163716453218, "percentage": 80.79, "elapsed_time": "7:24:21", "remaining_time": "1:45:40", "throughput": 2332.24, "total_tokens": 62180880} {"current_steps": 32320, "total_steps": 40000, "loss": 0.0005, "lr": 4.412798952764699e-06, "epoch": 5.272452891752998, "percentage": 80.8, "elapsed_time": "7:24:23", "remaining_time": "1:45:35", "throughput": 2332.41, "total_tokens": 62190240} {"current_steps": 32325, "total_steps": 40000, "loss": 0.0002, "lr": 4.407230755447245e-06, "epoch": 5.273268618973815, "percentage": 80.81, "elapsed_time": "7:24:25", "remaining_time": "1:45:31", "throughput": 2332.61, "total_tokens": 62200704} {"current_steps": 32330, "total_steps": 40000, "loss": 0.0002, "lr": 4.401665733793598e-06, "epoch": 5.274084346194632, "percentage": 80.83, "elapsed_time": "7:24:27", "remaining_time": "1:45:26", "throughput": 2332.79, "total_tokens": 62210320} {"current_steps": 32335, "total_steps": 40000, "loss": 0.0002, "lr": 4.3961038886619425e-06, "epoch": 5.27490007341545, "percentage": 80.84, "elapsed_time": "7:24:29", "remaining_time": "1:45:22", "throughput": 2332.96, "total_tokens": 62219936} {"current_steps": 32340, "total_steps": 40000, "loss": 0.0001, "lr": 4.39054522091e-06, "epoch": 5.275715800636267, "percentage": 80.85, "elapsed_time": "7:24:32", "remaining_time": "1:45:17", "throughput": 2333.17, "total_tokens": 62230432} {"current_steps": 32345, "total_steps": 40000, "loss": 0.001, "lr": 4.384989731394979e-06, "epoch": 5.276531527857085, "percentage": 80.86, "elapsed_time": "7:24:34", "remaining_time": "1:45:12", "throughput": 2333.34, "total_tokens": 62239760} {"current_steps": 32350, "total_steps": 40000, "loss": 0.0002, "lr": 4.379437420973598e-06, "epoch": 5.277347255077902, "percentage": 80.88, "elapsed_time": "7:24:36", "remaining_time": "1:45:08", "throughput": 2333.46, "total_tokens": 62248064} {"current_steps": 32355, "total_steps": 40000, "loss": 0.0001, "lr": 4.373888290502107e-06, "epoch": 5.27816298229872, "percentage": 80.89, "elapsed_time": "7:24:38", "remaining_time": "1:45:03", "throughput": 2333.68, "total_tokens": 62258896} {"current_steps": 32360, "total_steps": 40000, "loss": 0.0002, "lr": 4.36834234083624e-06, "epoch": 5.278978709519537, "percentage": 80.9, "elapsed_time": "7:24:40", "remaining_time": "1:44:59", "throughput": 2333.91, "total_tokens": 62269856} {"current_steps": 32365, "total_steps": 40000, "loss": 0.0007, "lr": 4.362799572831258e-06, "epoch": 5.279794436740354, "percentage": 80.91, "elapsed_time": "7:24:42", "remaining_time": "1:44:54", "throughput": 2334.1, "total_tokens": 62279824} {"current_steps": 32370, "total_steps": 40000, "loss": 0.0021, "lr": 4.35725998734193e-06, "epoch": 5.280610163961171, "percentage": 80.92, "elapsed_time": "7:24:44", "remaining_time": "1:44:49", "throughput": 2334.26, "total_tokens": 62288896} {"current_steps": 32375, "total_steps": 40000, "loss": 0.0001, "lr": 4.3517235852225195e-06, "epoch": 5.281425891181989, "percentage": 80.94, "elapsed_time": "7:24:46", "remaining_time": "1:44:45", "throughput": 2334.39, "total_tokens": 62297312} {"current_steps": 32380, "total_steps": 40000, "loss": 0.0, "lr": 4.346190367326822e-06, "epoch": 5.282241618402806, "percentage": 80.95, "elapsed_time": "7:24:48", "remaining_time": "1:44:40", "throughput": 2334.57, "total_tokens": 62307136} {"current_steps": 32385, "total_steps": 40000, "loss": 0.0006, "lr": 4.340660334508115e-06, "epoch": 5.283057345623623, "percentage": 80.96, "elapsed_time": "7:24:51", "remaining_time": "1:44:36", "throughput": 2334.73, "total_tokens": 62316320} {"current_steps": 32390, "total_steps": 40000, "loss": 0.0001, "lr": 4.335133487619206e-06, "epoch": 5.2838730728444405, "percentage": 80.97, "elapsed_time": "7:24:53", "remaining_time": "1:44:31", "throughput": 2334.92, "total_tokens": 62326288} {"current_steps": 32395, "total_steps": 40000, "loss": 0.0001, "lr": 4.329609827512409e-06, "epoch": 5.2846888000652585, "percentage": 80.99, "elapsed_time": "7:24:55", "remaining_time": "1:44:26", "throughput": 2335.1, "total_tokens": 62335936} {"current_steps": 32400, "total_steps": 40000, "loss": 0.0005, "lr": 4.324089355039531e-06, "epoch": 5.285504527286076, "percentage": 81.0, "elapsed_time": "7:24:57", "remaining_time": "1:44:22", "throughput": 2335.3, "total_tokens": 62346176} {"current_steps": 32400, "total_steps": 40000, "eval_loss": 0.36028480529785156, "epoch": 5.285504527286076, "percentage": 81.0, "elapsed_time": "7:26:18", "remaining_time": "1:44:41", "throughput": 2328.25, "total_tokens": 62346176} {"current_steps": 32405, "total_steps": 40000, "loss": 0.0858, "lr": 4.3185720710519075e-06, "epoch": 5.286320254506893, "percentage": 81.01, "elapsed_time": "7:26:21", "remaining_time": "1:44:37", "throughput": 2328.3, "total_tokens": 62356384} {"current_steps": 32410, "total_steps": 40000, "loss": 0.0128, "lr": 4.3130579764003724e-06, "epoch": 5.28713598172771, "percentage": 81.03, "elapsed_time": "7:26:24", "remaining_time": "1:44:32", "throughput": 2328.49, "total_tokens": 62366400} {"current_steps": 32415, "total_steps": 40000, "loss": 0.0471, "lr": 4.307547071935267e-06, "epoch": 5.287951708948528, "percentage": 81.04, "elapsed_time": "7:26:26", "remaining_time": "1:44:27", "throughput": 2328.64, "total_tokens": 62375216} {"current_steps": 32420, "total_steps": 40000, "loss": 0.0001, "lr": 4.302039358506435e-06, "epoch": 5.288767436169345, "percentage": 81.05, "elapsed_time": "7:26:28", "remaining_time": "1:44:23", "throughput": 2328.79, "total_tokens": 62384240} {"current_steps": 32425, "total_steps": 40000, "loss": 0.0001, "lr": 4.296534836963245e-06, "epoch": 5.289583163390162, "percentage": 81.06, "elapsed_time": "7:26:30", "remaining_time": "1:44:18", "throughput": 2328.96, "total_tokens": 62393520} {"current_steps": 32430, "total_steps": 40000, "loss": 0.0412, "lr": 4.291033508154555e-06, "epoch": 5.290398890610979, "percentage": 81.08, "elapsed_time": "7:26:32", "remaining_time": "1:44:14", "throughput": 2329.13, "total_tokens": 62403296} {"current_steps": 32435, "total_steps": 40000, "loss": 0.0002, "lr": 4.285535372928748e-06, "epoch": 5.291214617831797, "percentage": 81.09, "elapsed_time": "7:26:34", "remaining_time": "1:44:09", "throughput": 2329.31, "total_tokens": 62413040} {"current_steps": 32440, "total_steps": 40000, "loss": 0.1638, "lr": 4.280040432133695e-06, "epoch": 5.2920303450526145, "percentage": 81.1, "elapsed_time": "7:26:36", "remaining_time": "1:44:04", "throughput": 2329.5, "total_tokens": 62423056} {"current_steps": 32445, "total_steps": 40000, "loss": 0.0008, "lr": 4.274548686616789e-06, "epoch": 5.292846072273432, "percentage": 81.11, "elapsed_time": "7:26:38", "remaining_time": "1:44:00", "throughput": 2329.71, "total_tokens": 62433344} {"current_steps": 32450, "total_steps": 40000, "loss": 0.0655, "lr": 4.2690601372249364e-06, "epoch": 5.293661799494249, "percentage": 81.12, "elapsed_time": "7:26:40", "remaining_time": "1:43:55", "throughput": 2329.86, "total_tokens": 62442224} {"current_steps": 32455, "total_steps": 40000, "loss": 0.0005, "lr": 4.263574784804525e-06, "epoch": 5.294477526715067, "percentage": 81.14, "elapsed_time": "7:26:43", "remaining_time": "1:43:51", "throughput": 2330.07, "total_tokens": 62452880} {"current_steps": 32460, "total_steps": 40000, "loss": 0.0001, "lr": 4.258092630201479e-06, "epoch": 5.295293253935884, "percentage": 81.15, "elapsed_time": "7:26:45", "remaining_time": "1:43:46", "throughput": 2330.22, "total_tokens": 62461824} {"current_steps": 32465, "total_steps": 40000, "loss": 0.0412, "lr": 4.252613674261202e-06, "epoch": 5.296108981156701, "percentage": 81.16, "elapsed_time": "7:26:47", "remaining_time": "1:43:41", "throughput": 2330.38, "total_tokens": 62471136} {"current_steps": 32470, "total_steps": 40000, "loss": 0.0005, "lr": 4.2471379178286224e-06, "epoch": 5.296924708377518, "percentage": 81.17, "elapsed_time": "7:26:49", "remaining_time": "1:43:37", "throughput": 2330.55, "total_tokens": 62480480} {"current_steps": 32475, "total_steps": 40000, "loss": 0.0002, "lr": 4.241665361748181e-06, "epoch": 5.297740435598336, "percentage": 81.19, "elapsed_time": "7:26:51", "remaining_time": "1:43:32", "throughput": 2330.7, "total_tokens": 62489504} {"current_steps": 32480, "total_steps": 40000, "loss": 0.0001, "lr": 4.2361960068637994e-06, "epoch": 5.298556162819153, "percentage": 81.2, "elapsed_time": "7:26:53", "remaining_time": "1:43:28", "throughput": 2330.84, "total_tokens": 62498032} {"current_steps": 32485, "total_steps": 40000, "loss": 0.0001, "lr": 4.230729854018933e-06, "epoch": 5.2993718900399704, "percentage": 81.21, "elapsed_time": "7:26:55", "remaining_time": "1:43:23", "throughput": 2330.97, "total_tokens": 62506448} {"current_steps": 32490, "total_steps": 40000, "loss": 0.0001, "lr": 4.225266904056521e-06, "epoch": 5.300187617260788, "percentage": 81.23, "elapsed_time": "7:26:57", "remaining_time": "1:43:18", "throughput": 2331.16, "total_tokens": 62516704} {"current_steps": 32495, "total_steps": 40000, "loss": 0.0001, "lr": 4.21980715781903e-06, "epoch": 5.301003344481606, "percentage": 81.24, "elapsed_time": "7:26:59", "remaining_time": "1:43:14", "throughput": 2331.36, "total_tokens": 62526896} {"current_steps": 32500, "total_steps": 40000, "loss": 0.0123, "lr": 4.214350616148416e-06, "epoch": 5.301819071702423, "percentage": 81.25, "elapsed_time": "7:27:02", "remaining_time": "1:43:09", "throughput": 2331.59, "total_tokens": 62537984} {"current_steps": 32505, "total_steps": 40000, "loss": 0.0127, "lr": 4.20889727988614e-06, "epoch": 5.30263479892324, "percentage": 81.26, "elapsed_time": "7:27:04", "remaining_time": "1:43:05", "throughput": 2331.79, "total_tokens": 62548144} {"current_steps": 32510, "total_steps": 40000, "loss": 0.0251, "lr": 4.20344714987318e-06, "epoch": 5.303450526144058, "percentage": 81.27, "elapsed_time": "7:27:06", "remaining_time": "1:43:00", "throughput": 2331.94, "total_tokens": 62557216} {"current_steps": 32515, "total_steps": 40000, "loss": 0.0002, "lr": 4.198000226950022e-06, "epoch": 5.304266253364875, "percentage": 81.29, "elapsed_time": "7:27:08", "remaining_time": "1:42:55", "throughput": 2332.16, "total_tokens": 62568016} {"current_steps": 32520, "total_steps": 40000, "loss": 0.0, "lr": 4.192556511956635e-06, "epoch": 5.305081980585692, "percentage": 81.3, "elapsed_time": "7:27:10", "remaining_time": "1:42:51", "throughput": 2332.28, "total_tokens": 62576336} {"current_steps": 32525, "total_steps": 40000, "loss": 0.0176, "lr": 4.18711600573252e-06, "epoch": 5.305897707806509, "percentage": 81.31, "elapsed_time": "7:27:12", "remaining_time": "1:42:46", "throughput": 2332.49, "total_tokens": 62586768} {"current_steps": 32530, "total_steps": 40000, "loss": 0.0004, "lr": 4.181678709116671e-06, "epoch": 5.306713435027326, "percentage": 81.33, "elapsed_time": "7:27:14", "remaining_time": "1:42:42", "throughput": 2332.67, "total_tokens": 62596448} {"current_steps": 32535, "total_steps": 40000, "loss": 0.0002, "lr": 4.1762446229475785e-06, "epoch": 5.307529162248144, "percentage": 81.34, "elapsed_time": "7:27:16", "remaining_time": "1:42:37", "throughput": 2332.85, "total_tokens": 62606160} {"current_steps": 32540, "total_steps": 40000, "loss": 0.0002, "lr": 4.17081374806326e-06, "epoch": 5.3083448894689615, "percentage": 81.35, "elapsed_time": "7:27:18", "remaining_time": "1:42:32", "throughput": 2333.02, "total_tokens": 62615600} {"current_steps": 32545, "total_steps": 40000, "loss": 0.1212, "lr": 4.165386085301212e-06, "epoch": 5.309160616689779, "percentage": 81.36, "elapsed_time": "7:27:21", "remaining_time": "1:42:28", "throughput": 2333.18, "total_tokens": 62624912} {"current_steps": 32550, "total_steps": 40000, "loss": 0.0001, "lr": 4.1599616354984525e-06, "epoch": 5.309976343910597, "percentage": 81.38, "elapsed_time": "7:27:23", "remaining_time": "1:42:23", "throughput": 2333.35, "total_tokens": 62634400} {"current_steps": 32555, "total_steps": 40000, "loss": 0.0002, "lr": 4.154540399491508e-06, "epoch": 5.310792071131414, "percentage": 81.39, "elapsed_time": "7:27:25", "remaining_time": "1:42:19", "throughput": 2333.52, "total_tokens": 62643904} {"current_steps": 32560, "total_steps": 40000, "loss": 0.0002, "lr": 4.149122378116394e-06, "epoch": 5.311607798352231, "percentage": 81.4, "elapsed_time": "7:27:27", "remaining_time": "1:42:14", "throughput": 2333.69, "total_tokens": 62653504} {"current_steps": 32565, "total_steps": 40000, "loss": 0.0028, "lr": 4.14370757220863e-06, "epoch": 5.312423525573048, "percentage": 81.41, "elapsed_time": "7:27:29", "remaining_time": "1:42:10", "throughput": 2333.87, "total_tokens": 62663168} {"current_steps": 32570, "total_steps": 40000, "loss": 0.0003, "lr": 4.138295982603263e-06, "epoch": 5.313239252793866, "percentage": 81.42, "elapsed_time": "7:27:31", "remaining_time": "1:42:05", "throughput": 2334.06, "total_tokens": 62673104} {"current_steps": 32575, "total_steps": 40000, "loss": 0.0005, "lr": 4.132887610134814e-06, "epoch": 5.314054980014683, "percentage": 81.44, "elapsed_time": "7:27:33", "remaining_time": "1:42:00", "throughput": 2334.27, "total_tokens": 62683728} {"current_steps": 32580, "total_steps": 40000, "loss": 0.0001, "lr": 4.127482455637335e-06, "epoch": 5.3148707072355, "percentage": 81.45, "elapsed_time": "7:27:35", "remaining_time": "1:41:56", "throughput": 2334.45, "total_tokens": 62693648} {"current_steps": 32585, "total_steps": 40000, "loss": 0.096, "lr": 4.1220805199443545e-06, "epoch": 5.3156864344563175, "percentage": 81.46, "elapsed_time": "7:27:37", "remaining_time": "1:41:51", "throughput": 2334.63, "total_tokens": 62703536} {"current_steps": 32590, "total_steps": 40000, "loss": 0.0004, "lr": 4.116681803888925e-06, "epoch": 5.3165021616771355, "percentage": 81.47, "elapsed_time": "7:27:40", "remaining_time": "1:41:47", "throughput": 2334.84, "total_tokens": 62714048} {"current_steps": 32595, "total_steps": 40000, "loss": 0.1139, "lr": 4.111286308303605e-06, "epoch": 5.317317888897953, "percentage": 81.49, "elapsed_time": "7:27:42", "remaining_time": "1:41:42", "throughput": 2335.03, "total_tokens": 62723792} {"current_steps": 32600, "total_steps": 40000, "loss": 0.0829, "lr": 4.105894034020433e-06, "epoch": 5.31813361611877, "percentage": 81.5, "elapsed_time": "7:27:44", "remaining_time": "1:41:38", "throughput": 2335.23, "total_tokens": 62734064} {"current_steps": 32600, "total_steps": 40000, "eval_loss": 0.3541121482849121, "epoch": 5.31813361611877, "percentage": 81.5, "elapsed_time": "7:29:05", "remaining_time": "1:41:56", "throughput": 2328.22, "total_tokens": 62734064} {"current_steps": 32605, "total_steps": 40000, "loss": 0.0021, "lr": 4.100504981870975e-06, "epoch": 5.318949343339587, "percentage": 81.51, "elapsed_time": "7:29:08", "remaining_time": "1:41:52", "throughput": 2328.28, "total_tokens": 62744320} {"current_steps": 32610, "total_steps": 40000, "loss": 0.0002, "lr": 4.0951191526862915e-06, "epoch": 5.319765070560405, "percentage": 81.53, "elapsed_time": "7:29:10", "remaining_time": "1:41:47", "throughput": 2328.49, "total_tokens": 62755072} {"current_steps": 32615, "total_steps": 40000, "loss": 0.0003, "lr": 4.089736547296938e-06, "epoch": 5.320580797781222, "percentage": 81.54, "elapsed_time": "7:29:13", "remaining_time": "1:41:42", "throughput": 2328.69, "total_tokens": 62765504} {"current_steps": 32620, "total_steps": 40000, "loss": 0.0002, "lr": 4.08435716653299e-06, "epoch": 5.321396525002039, "percentage": 81.55, "elapsed_time": "7:29:15", "remaining_time": "1:41:38", "throughput": 2328.88, "total_tokens": 62775504} {"current_steps": 32625, "total_steps": 40000, "loss": 0.0015, "lr": 4.0789810112240005e-06, "epoch": 5.322212252222856, "percentage": 81.56, "elapsed_time": "7:29:17", "remaining_time": "1:41:33", "throughput": 2329.09, "total_tokens": 62785984} {"current_steps": 32630, "total_steps": 40000, "loss": 0.0033, "lr": 4.073608082199057e-06, "epoch": 5.323027979443674, "percentage": 81.58, "elapsed_time": "7:29:19", "remaining_time": "1:41:29", "throughput": 2329.22, "total_tokens": 62794304} {"current_steps": 32635, "total_steps": 40000, "loss": 0.0002, "lr": 4.068238380286718e-06, "epoch": 5.3238437066644915, "percentage": 81.59, "elapsed_time": "7:29:21", "remaining_time": "1:41:24", "throughput": 2329.43, "total_tokens": 62804944} {"current_steps": 32640, "total_steps": 40000, "loss": 0.0004, "lr": 4.062871906315072e-06, "epoch": 5.324659433885309, "percentage": 81.6, "elapsed_time": "7:29:23", "remaining_time": "1:41:20", "throughput": 2329.63, "total_tokens": 62815312} {"current_steps": 32645, "total_steps": 40000, "loss": 0.0001, "lr": 4.057508661111686e-06, "epoch": 5.325475161106126, "percentage": 81.61, "elapsed_time": "7:29:25", "remaining_time": "1:41:15", "throughput": 2329.81, "total_tokens": 62825168} {"current_steps": 32650, "total_steps": 40000, "loss": 0.0033, "lr": 4.052148645503648e-06, "epoch": 5.326290888326944, "percentage": 81.62, "elapsed_time": "7:29:27", "remaining_time": "1:41:10", "throughput": 2329.99, "total_tokens": 62834976} {"current_steps": 32655, "total_steps": 40000, "loss": 0.018, "lr": 4.046791860317531e-06, "epoch": 5.327106615547761, "percentage": 81.64, "elapsed_time": "7:29:29", "remaining_time": "1:41:06", "throughput": 2330.16, "total_tokens": 62844256} {"current_steps": 32660, "total_steps": 40000, "loss": 0.0004, "lr": 4.041438306379431e-06, "epoch": 5.327922342768578, "percentage": 81.65, "elapsed_time": "7:29:32", "remaining_time": "1:41:01", "throughput": 2330.31, "total_tokens": 62853152} {"current_steps": 32665, "total_steps": 40000, "loss": 0.0256, "lr": 4.036087984514916e-06, "epoch": 5.328738069989395, "percentage": 81.66, "elapsed_time": "7:29:34", "remaining_time": "1:40:57", "throughput": 2330.5, "total_tokens": 62863248} {"current_steps": 32670, "total_steps": 40000, "loss": 0.0002, "lr": 4.030740895549084e-06, "epoch": 5.329553797210213, "percentage": 81.67, "elapsed_time": "7:29:36", "remaining_time": "1:40:52", "throughput": 2330.65, "total_tokens": 62872144} {"current_steps": 32675, "total_steps": 40000, "loss": 0.0004, "lr": 4.025397040306531e-06, "epoch": 5.33036952443103, "percentage": 81.69, "elapsed_time": "7:29:38", "remaining_time": "1:40:47", "throughput": 2330.8, "total_tokens": 62881248} {"current_steps": 32680, "total_steps": 40000, "loss": 0.0471, "lr": 4.0200564196113285e-06, "epoch": 5.331185251651847, "percentage": 81.7, "elapsed_time": "7:29:40", "remaining_time": "1:40:43", "throughput": 2331.0, "total_tokens": 62891616} {"current_steps": 32685, "total_steps": 40000, "loss": 0.0003, "lr": 4.014719034287079e-06, "epoch": 5.332000978872665, "percentage": 81.71, "elapsed_time": "7:29:42", "remaining_time": "1:40:38", "throughput": 2331.17, "total_tokens": 62901184} {"current_steps": 32690, "total_steps": 40000, "loss": 0.0001, "lr": 4.0093848851568775e-06, "epoch": 5.332816706093483, "percentage": 81.73, "elapsed_time": "7:29:44", "remaining_time": "1:40:34", "throughput": 2331.32, "total_tokens": 62910080} {"current_steps": 32695, "total_steps": 40000, "loss": 0.0005, "lr": 4.004053973043304e-06, "epoch": 5.3336324333143, "percentage": 81.74, "elapsed_time": "7:29:46", "remaining_time": "1:40:29", "throughput": 2331.56, "total_tokens": 62921520} {"current_steps": 32700, "total_steps": 40000, "loss": 0.0046, "lr": 3.998726298768465e-06, "epoch": 5.334448160535117, "percentage": 81.75, "elapsed_time": "7:29:48", "remaining_time": "1:40:25", "throughput": 2331.79, "total_tokens": 62932480} {"current_steps": 32705, "total_steps": 40000, "loss": 0.0301, "lr": 3.99340186315395e-06, "epoch": 5.335263887755934, "percentage": 81.76, "elapsed_time": "7:29:51", "remaining_time": "1:40:20", "throughput": 2332.01, "total_tokens": 62943168} {"current_steps": 32710, "total_steps": 40000, "loss": 0.0004, "lr": 3.988080667020849e-06, "epoch": 5.336079614976752, "percentage": 81.77, "elapsed_time": "7:29:53", "remaining_time": "1:40:15", "throughput": 2332.18, "total_tokens": 62952848} {"current_steps": 32715, "total_steps": 40000, "loss": 0.0671, "lr": 3.982762711189766e-06, "epoch": 5.336895342197569, "percentage": 81.79, "elapsed_time": "7:29:55", "remaining_time": "1:40:11", "throughput": 2332.36, "total_tokens": 62962656} {"current_steps": 32720, "total_steps": 40000, "loss": 0.0278, "lr": 3.977447996480785e-06, "epoch": 5.337711069418386, "percentage": 81.8, "elapsed_time": "7:29:57", "remaining_time": "1:40:06", "throughput": 2332.51, "total_tokens": 62971840} {"current_steps": 32725, "total_steps": 40000, "loss": 0.0177, "lr": 3.97213652371351e-06, "epoch": 5.338526796639204, "percentage": 81.81, "elapsed_time": "7:29:59", "remaining_time": "1:40:02", "throughput": 2332.66, "total_tokens": 62980816} {"current_steps": 32730, "total_steps": 40000, "loss": 0.0, "lr": 3.966828293707042e-06, "epoch": 5.339342523860021, "percentage": 81.83, "elapsed_time": "7:30:01", "remaining_time": "1:39:57", "throughput": 2332.85, "total_tokens": 62990720} {"current_steps": 32735, "total_steps": 40000, "loss": 0.0, "lr": 3.961523307279963e-06, "epoch": 5.3401582510808385, "percentage": 81.84, "elapsed_time": "7:30:03", "remaining_time": "1:39:53", "throughput": 2332.98, "total_tokens": 62999152} {"current_steps": 32740, "total_steps": 40000, "loss": 0.0001, "lr": 3.956221565250382e-06, "epoch": 5.340973978301656, "percentage": 81.85, "elapsed_time": "7:30:05", "remaining_time": "1:39:48", "throughput": 2333.17, "total_tokens": 63009040} {"current_steps": 32745, "total_steps": 40000, "loss": 0.0012, "lr": 3.950923068435883e-06, "epoch": 5.341789705522474, "percentage": 81.86, "elapsed_time": "7:30:07", "remaining_time": "1:39:43", "throughput": 2333.32, "total_tokens": 63018048} {"current_steps": 32750, "total_steps": 40000, "loss": 0.0142, "lr": 3.945627817653566e-06, "epoch": 5.342605432743291, "percentage": 81.88, "elapsed_time": "7:30:10", "remaining_time": "1:39:39", "throughput": 2333.47, "total_tokens": 63027216} {"current_steps": 32755, "total_steps": 40000, "loss": 0.0001, "lr": 3.9403358137200335e-06, "epoch": 5.343421159964108, "percentage": 81.89, "elapsed_time": "7:30:12", "remaining_time": "1:39:34", "throughput": 2333.65, "total_tokens": 63036912} {"current_steps": 32760, "total_steps": 40000, "loss": 0.0001, "lr": 3.9350470574513605e-06, "epoch": 5.344236887184925, "percentage": 81.9, "elapsed_time": "7:30:14", "remaining_time": "1:39:30", "throughput": 2333.83, "total_tokens": 63046928} {"current_steps": 32765, "total_steps": 40000, "loss": 0.0003, "lr": 3.9297615496631525e-06, "epoch": 5.345052614405743, "percentage": 81.91, "elapsed_time": "7:30:16", "remaining_time": "1:39:25", "throughput": 2333.99, "total_tokens": 63056144} {"current_steps": 32770, "total_steps": 40000, "loss": 0.0003, "lr": 3.924479291170505e-06, "epoch": 5.34586834162656, "percentage": 81.92, "elapsed_time": "7:30:18", "remaining_time": "1:39:21", "throughput": 2334.19, "total_tokens": 63066208} {"current_steps": 32775, "total_steps": 40000, "loss": 0.0622, "lr": 3.919200282788002e-06, "epoch": 5.346684068847377, "percentage": 81.94, "elapsed_time": "7:30:20", "remaining_time": "1:39:16", "throughput": 2334.37, "total_tokens": 63076080} {"current_steps": 32780, "total_steps": 40000, "loss": 0.0008, "lr": 3.913924525329726e-06, "epoch": 5.3474997960681945, "percentage": 81.95, "elapsed_time": "7:30:22", "remaining_time": "1:39:11", "throughput": 2334.6, "total_tokens": 63087168} {"current_steps": 32785, "total_steps": 40000, "loss": 0.0002, "lr": 3.908652019609279e-06, "epoch": 5.3483155232890125, "percentage": 81.96, "elapsed_time": "7:30:24", "remaining_time": "1:39:07", "throughput": 2334.81, "total_tokens": 63097792} {"current_steps": 32790, "total_steps": 40000, "loss": 0.0082, "lr": 3.9033827664397364e-06, "epoch": 5.34913125050983, "percentage": 81.97, "elapsed_time": "7:30:26", "remaining_time": "1:39:02", "throughput": 2334.95, "total_tokens": 63106496} {"current_steps": 32795, "total_steps": 40000, "loss": 0.0001, "lr": 3.898116766633694e-06, "epoch": 5.349946977730647, "percentage": 81.99, "elapsed_time": "7:30:29", "remaining_time": "1:38:58", "throughput": 2335.12, "total_tokens": 63116016} {"current_steps": 32800, "total_steps": 40000, "loss": 0.0001, "lr": 3.8928540210032225e-06, "epoch": 5.350762704951464, "percentage": 82.0, "elapsed_time": "7:30:31", "remaining_time": "1:38:53", "throughput": 2335.26, "total_tokens": 63124752} {"current_steps": 32800, "total_steps": 40000, "eval_loss": 0.3812498450279236, "epoch": 5.350762704951464, "percentage": 82.0, "elapsed_time": "7:31:51", "remaining_time": "1:39:11", "throughput": 2328.3, "total_tokens": 63124752} {"current_steps": 32805, "total_steps": 40000, "loss": 0.0005, "lr": 3.887594530359909e-06, "epoch": 5.351578432172282, "percentage": 82.01, "elapsed_time": "7:31:55", "remaining_time": "1:39:07", "throughput": 2328.33, "total_tokens": 63134736} {"current_steps": 32810, "total_steps": 40000, "loss": 0.0001, "lr": 3.88233829551484e-06, "epoch": 5.352394159393099, "percentage": 82.03, "elapsed_time": "7:31:58", "remaining_time": "1:39:02", "throughput": 2328.49, "total_tokens": 63143984} {"current_steps": 32815, "total_steps": 40000, "loss": 0.0001, "lr": 3.877085317278581e-06, "epoch": 5.353209886613916, "percentage": 82.04, "elapsed_time": "7:32:00", "remaining_time": "1:38:58", "throughput": 2328.68, "total_tokens": 63154128} {"current_steps": 32820, "total_steps": 40000, "loss": 0.0028, "lr": 3.87183559646122e-06, "epoch": 5.354025613834733, "percentage": 82.05, "elapsed_time": "7:32:02", "remaining_time": "1:38:53", "throughput": 2328.86, "total_tokens": 63163712} {"current_steps": 32825, "total_steps": 40000, "loss": 0.0003, "lr": 3.866589133872317e-06, "epoch": 5.354841341055551, "percentage": 82.06, "elapsed_time": "7:32:04", "remaining_time": "1:38:48", "throughput": 2329.02, "total_tokens": 63173120} {"current_steps": 32830, "total_steps": 40000, "loss": 0.0027, "lr": 3.861345930320948e-06, "epoch": 5.3556570682763684, "percentage": 82.08, "elapsed_time": "7:32:06", "remaining_time": "1:38:44", "throughput": 2329.21, "total_tokens": 63183232} {"current_steps": 32835, "total_steps": 40000, "loss": 0.0002, "lr": 3.856105986615688e-06, "epoch": 5.356472795497186, "percentage": 82.09, "elapsed_time": "7:32:08", "remaining_time": "1:38:39", "throughput": 2329.36, "total_tokens": 63192304} {"current_steps": 32840, "total_steps": 40000, "loss": 0.0014, "lr": 3.850869303564589e-06, "epoch": 5.357288522718003, "percentage": 82.1, "elapsed_time": "7:32:10", "remaining_time": "1:38:35", "throughput": 2329.55, "total_tokens": 63202560} {"current_steps": 32845, "total_steps": 40000, "loss": 0.0004, "lr": 3.845635881975226e-06, "epoch": 5.358104249938821, "percentage": 82.11, "elapsed_time": "7:32:12", "remaining_time": "1:38:30", "throughput": 2329.72, "total_tokens": 63211856} {"current_steps": 32850, "total_steps": 40000, "loss": 0.0858, "lr": 3.840405722654647e-06, "epoch": 5.358919977159638, "percentage": 82.12, "elapsed_time": "7:32:14", "remaining_time": "1:38:26", "throughput": 2329.89, "total_tokens": 63221344} {"current_steps": 32855, "total_steps": 40000, "loss": 0.0001, "lr": 3.835178826409419e-06, "epoch": 5.359735704380455, "percentage": 82.14, "elapsed_time": "7:32:16", "remaining_time": "1:38:21", "throughput": 2330.07, "total_tokens": 63231056} {"current_steps": 32860, "total_steps": 40000, "loss": 0.0001, "lr": 3.8299551940455895e-06, "epoch": 5.360551431601272, "percentage": 82.15, "elapsed_time": "7:32:19", "remaining_time": "1:38:16", "throughput": 2330.27, "total_tokens": 63241472} {"current_steps": 32865, "total_steps": 40000, "loss": 0.0019, "lr": 3.824734826368703e-06, "epoch": 5.36136715882209, "percentage": 82.16, "elapsed_time": "7:32:21", "remaining_time": "1:38:12", "throughput": 2330.45, "total_tokens": 63251120} {"current_steps": 32870, "total_steps": 40000, "loss": 0.0001, "lr": 3.819517724183813e-06, "epoch": 5.362182886042907, "percentage": 82.17, "elapsed_time": "7:32:23", "remaining_time": "1:38:07", "throughput": 2330.63, "total_tokens": 63261152} {"current_steps": 32875, "total_steps": 40000, "loss": 0.0002, "lr": 3.8143038882954648e-06, "epoch": 5.362998613263724, "percentage": 82.19, "elapsed_time": "7:32:25", "remaining_time": "1:38:03", "throughput": 2330.85, "total_tokens": 63272096} {"current_steps": 32880, "total_steps": 40000, "loss": 0.0001, "lr": 3.8090933195076867e-06, "epoch": 5.3638143404845415, "percentage": 82.2, "elapsed_time": "7:32:27", "remaining_time": "1:37:58", "throughput": 2330.99, "total_tokens": 63280832} {"current_steps": 32885, "total_steps": 40000, "loss": 0.0002, "lr": 3.8038860186240198e-06, "epoch": 5.3646300677053596, "percentage": 82.21, "elapsed_time": "7:32:29", "remaining_time": "1:37:54", "throughput": 2331.19, "total_tokens": 63290992} {"current_steps": 32890, "total_steps": 40000, "loss": 0.0938, "lr": 3.7986819864475026e-06, "epoch": 5.365445794926177, "percentage": 82.23, "elapsed_time": "7:32:31", "remaining_time": "1:37:49", "throughput": 2331.36, "total_tokens": 63300656} {"current_steps": 32895, "total_steps": 40000, "loss": 0.0019, "lr": 3.793481223780651e-06, "epoch": 5.366261522146994, "percentage": 82.24, "elapsed_time": "7:32:33", "remaining_time": "1:37:44", "throughput": 2331.52, "total_tokens": 63309712} {"current_steps": 32900, "total_steps": 40000, "loss": 0.0, "lr": 3.788283731425496e-06, "epoch": 5.367077249367812, "percentage": 82.25, "elapsed_time": "7:32:35", "remaining_time": "1:37:40", "throughput": 2331.71, "total_tokens": 63319664} {"current_steps": 32905, "total_steps": 40000, "loss": 0.0006, "lr": 3.7830895101835488e-06, "epoch": 5.367892976588629, "percentage": 82.26, "elapsed_time": "7:32:38", "remaining_time": "1:37:35", "throughput": 2331.93, "total_tokens": 63330624} {"current_steps": 32910, "total_steps": 40000, "loss": 0.0002, "lr": 3.7778985608558274e-06, "epoch": 5.368708703809446, "percentage": 82.27, "elapsed_time": "7:32:40", "remaining_time": "1:37:31", "throughput": 2332.1, "total_tokens": 63340384} {"current_steps": 32915, "total_steps": 40000, "loss": 0.1272, "lr": 3.7727108842428443e-06, "epoch": 5.369524431030263, "percentage": 82.29, "elapsed_time": "7:32:42", "remaining_time": "1:37:26", "throughput": 2332.26, "total_tokens": 63349504} {"current_steps": 32920, "total_steps": 40000, "loss": 0.0026, "lr": 3.7675264811446065e-06, "epoch": 5.370340158251081, "percentage": 82.3, "elapsed_time": "7:32:44", "remaining_time": "1:37:22", "throughput": 2332.41, "total_tokens": 63358640} {"current_steps": 32925, "total_steps": 40000, "loss": 0.0005, "lr": 3.7623453523605994e-06, "epoch": 5.371155885471898, "percentage": 82.31, "elapsed_time": "7:32:46", "remaining_time": "1:37:17", "throughput": 2332.59, "total_tokens": 63368448} {"current_steps": 32930, "total_steps": 40000, "loss": 0.0007, "lr": 3.757167498689834e-06, "epoch": 5.3719716126927155, "percentage": 82.33, "elapsed_time": "7:32:48", "remaining_time": "1:37:13", "throughput": 2332.74, "total_tokens": 63377376} {"current_steps": 32935, "total_steps": 40000, "loss": 0.0008, "lr": 3.7519929209307914e-06, "epoch": 5.372787339913533, "percentage": 82.34, "elapsed_time": "7:32:50", "remaining_time": "1:37:08", "throughput": 2332.91, "total_tokens": 63386784} {"current_steps": 32940, "total_steps": 40000, "loss": 0.0002, "lr": 3.746821619881463e-06, "epoch": 5.373603067134351, "percentage": 82.35, "elapsed_time": "7:32:52", "remaining_time": "1:37:03", "throughput": 2333.09, "total_tokens": 63396720} {"current_steps": 32945, "total_steps": 40000, "loss": 0.0001, "lr": 3.74165359633932e-06, "epoch": 5.374418794355168, "percentage": 82.36, "elapsed_time": "7:32:54", "remaining_time": "1:36:59", "throughput": 2333.23, "total_tokens": 63405152} {"current_steps": 32950, "total_steps": 40000, "loss": 0.092, "lr": 3.736488851101341e-06, "epoch": 5.375234521575985, "percentage": 82.38, "elapsed_time": "7:32:56", "remaining_time": "1:36:54", "throughput": 2333.42, "total_tokens": 63415280} {"current_steps": 32955, "total_steps": 40000, "loss": 0.0, "lr": 3.7313273849640035e-06, "epoch": 5.376050248796802, "percentage": 82.39, "elapsed_time": "7:32:59", "remaining_time": "1:36:50", "throughput": 2333.59, "total_tokens": 63424864} {"current_steps": 32960, "total_steps": 40000, "loss": 0.0001, "lr": 3.7261691987232533e-06, "epoch": 5.37686597601762, "percentage": 82.4, "elapsed_time": "7:33:01", "remaining_time": "1:36:45", "throughput": 2333.83, "total_tokens": 63436464} {"current_steps": 32965, "total_steps": 40000, "loss": 0.0004, "lr": 3.7210142931745575e-06, "epoch": 5.377681703238437, "percentage": 82.41, "elapsed_time": "7:33:03", "remaining_time": "1:36:41", "throughput": 2333.97, "total_tokens": 63445488} {"current_steps": 32970, "total_steps": 40000, "loss": 0.0, "lr": 3.7158626691128712e-06, "epoch": 5.378497430459254, "percentage": 82.42, "elapsed_time": "7:33:05", "remaining_time": "1:36:36", "throughput": 2334.16, "total_tokens": 63455472} {"current_steps": 32975, "total_steps": 40000, "loss": 0.0001, "lr": 3.710714327332629e-06, "epoch": 5.3793131576800715, "percentage": 82.44, "elapsed_time": "7:33:07", "remaining_time": "1:36:32", "throughput": 2334.38, "total_tokens": 63466464} {"current_steps": 32980, "total_steps": 40000, "loss": 0.0, "lr": 3.7055692686277815e-06, "epoch": 5.3801288849008895, "percentage": 82.45, "elapsed_time": "7:33:09", "remaining_time": "1:36:27", "throughput": 2334.62, "total_tokens": 63477856} {"current_steps": 32985, "total_steps": 40000, "loss": 0.0001, "lr": 3.70042749379175e-06, "epoch": 5.380944612121707, "percentage": 82.46, "elapsed_time": "7:33:11", "remaining_time": "1:36:22", "throughput": 2334.82, "total_tokens": 63487984} {"current_steps": 32990, "total_steps": 40000, "loss": 0.0001, "lr": 3.6952890036174693e-06, "epoch": 5.381760339342524, "percentage": 82.47, "elapsed_time": "7:33:13", "remaining_time": "1:36:18", "throughput": 2335.01, "total_tokens": 63497888} {"current_steps": 32995, "total_steps": 40000, "loss": 0.0, "lr": 3.690153798897353e-06, "epoch": 5.382576066563341, "percentage": 82.49, "elapsed_time": "7:33:15", "remaining_time": "1:36:13", "throughput": 2335.19, "total_tokens": 63507920} {"current_steps": 33000, "total_steps": 40000, "loss": 0.0588, "lr": 3.6850218804233225e-06, "epoch": 5.383391793784159, "percentage": 82.5, "elapsed_time": "7:33:18", "remaining_time": "1:36:09", "throughput": 2335.37, "total_tokens": 63517792} {"current_steps": 33000, "total_steps": 40000, "eval_loss": 0.4010331332683563, "epoch": 5.383391793784159, "percentage": 82.5, "elapsed_time": "7:34:38", "remaining_time": "1:36:26", "throughput": 2328.46, "total_tokens": 63517792} {"current_steps": 33005, "total_steps": 40000, "loss": 0.0001, "lr": 3.679893248986779e-06, "epoch": 5.384207521004976, "percentage": 82.51, "elapsed_time": "7:34:42", "remaining_time": "1:36:22", "throughput": 2328.49, "total_tokens": 63527312} {"current_steps": 33010, "total_steps": 40000, "loss": 0.0001, "lr": 3.6747679053786147e-06, "epoch": 5.385023248225793, "percentage": 82.53, "elapsed_time": "7:34:44", "remaining_time": "1:36:17", "throughput": 2328.65, "total_tokens": 63536768} {"current_steps": 33015, "total_steps": 40000, "loss": 0.0009, "lr": 3.669645850389228e-06, "epoch": 5.38583897544661, "percentage": 82.54, "elapsed_time": "7:34:46", "remaining_time": "1:36:13", "throughput": 2328.79, "total_tokens": 63545520} {"current_steps": 33020, "total_steps": 40000, "loss": 0.0, "lr": 3.664527084808514e-06, "epoch": 5.386654702667428, "percentage": 82.55, "elapsed_time": "7:34:49", "remaining_time": "1:36:08", "throughput": 2328.96, "total_tokens": 63555120} {"current_steps": 33025, "total_steps": 40000, "loss": 0.0166, "lr": 3.6594116094258337e-06, "epoch": 5.387470429888245, "percentage": 82.56, "elapsed_time": "7:34:51", "remaining_time": "1:36:03", "throughput": 2329.13, "total_tokens": 63564496} {"current_steps": 33030, "total_steps": 40000, "loss": 0.0268, "lr": 3.6542994250300665e-06, "epoch": 5.388286157109063, "percentage": 82.58, "elapsed_time": "7:34:53", "remaining_time": "1:35:59", "throughput": 2329.32, "total_tokens": 63574736} {"current_steps": 33035, "total_steps": 40000, "loss": 0.0001, "lr": 3.6491905324095825e-06, "epoch": 5.38910188432988, "percentage": 82.59, "elapsed_time": "7:34:55", "remaining_time": "1:35:54", "throughput": 2329.52, "total_tokens": 63584976} {"current_steps": 33040, "total_steps": 40000, "loss": 0.0002, "lr": 3.644084932352221e-06, "epoch": 5.389917611550698, "percentage": 82.6, "elapsed_time": "7:34:57", "remaining_time": "1:35:50", "throughput": 2329.7, "total_tokens": 63594528} {"current_steps": 33045, "total_steps": 40000, "loss": 0.0373, "lr": 3.6389826256453457e-06, "epoch": 5.390733338771515, "percentage": 82.61, "elapsed_time": "7:34:59", "remaining_time": "1:35:45", "throughput": 2329.9, "total_tokens": 63604992} {"current_steps": 33050, "total_steps": 40000, "loss": 0.0002, "lr": 3.633883613075781e-06, "epoch": 5.391549065992332, "percentage": 82.62, "elapsed_time": "7:35:01", "remaining_time": "1:35:41", "throughput": 2330.05, "total_tokens": 63614048} {"current_steps": 33055, "total_steps": 40000, "loss": 0.0005, "lr": 3.6287878954298693e-06, "epoch": 5.392364793213149, "percentage": 82.64, "elapsed_time": "7:35:03", "remaining_time": "1:35:36", "throughput": 2330.23, "total_tokens": 63624112} {"current_steps": 33060, "total_steps": 40000, "loss": 0.0002, "lr": 3.6236954734934354e-06, "epoch": 5.393180520433967, "percentage": 82.65, "elapsed_time": "7:35:05", "remaining_time": "1:35:32", "throughput": 2330.39, "total_tokens": 63633504} {"current_steps": 33065, "total_steps": 40000, "loss": 0.0001, "lr": 3.618606348051784e-06, "epoch": 5.393996247654784, "percentage": 82.66, "elapsed_time": "7:35:08", "remaining_time": "1:35:27", "throughput": 2330.5, "total_tokens": 63641520} {"current_steps": 33070, "total_steps": 40000, "loss": 0.0001, "lr": 3.6135205198897376e-06, "epoch": 5.394811974875601, "percentage": 82.67, "elapsed_time": "7:35:10", "remaining_time": "1:35:22", "throughput": 2330.68, "total_tokens": 63651280} {"current_steps": 33075, "total_steps": 40000, "loss": 0.0001, "lr": 3.6084379897915854e-06, "epoch": 5.395627702096419, "percentage": 82.69, "elapsed_time": "7:35:12", "remaining_time": "1:35:18", "throughput": 2330.88, "total_tokens": 63661536} {"current_steps": 33080, "total_steps": 40000, "loss": 0.0011, "lr": 3.6033587585411115e-06, "epoch": 5.3964434293172365, "percentage": 82.7, "elapsed_time": "7:35:14", "remaining_time": "1:35:13", "throughput": 2331.01, "total_tokens": 63670064} {"current_steps": 33085, "total_steps": 40000, "loss": 0.0, "lr": 3.5982828269216117e-06, "epoch": 5.397259156538054, "percentage": 82.71, "elapsed_time": "7:35:16", "remaining_time": "1:35:09", "throughput": 2331.2, "total_tokens": 63680096} {"current_steps": 33090, "total_steps": 40000, "loss": 0.0099, "lr": 3.593210195715843e-06, "epoch": 5.398074883758871, "percentage": 82.73, "elapsed_time": "7:35:18", "remaining_time": "1:35:04", "throughput": 2331.4, "total_tokens": 63690208} {"current_steps": 33095, "total_steps": 40000, "loss": 0.0013, "lr": 3.5881408657060773e-06, "epoch": 5.398890610979688, "percentage": 82.74, "elapsed_time": "7:35:20", "remaining_time": "1:35:00", "throughput": 2331.6, "total_tokens": 63700576} {"current_steps": 33100, "total_steps": 40000, "loss": 0.065, "lr": 3.583074837674075e-06, "epoch": 5.399706338200506, "percentage": 82.75, "elapsed_time": "7:35:22", "remaining_time": "1:34:55", "throughput": 2331.73, "total_tokens": 63709216} {"current_steps": 33105, "total_steps": 40000, "loss": 0.0001, "lr": 3.578012112401069e-06, "epoch": 5.400522065421323, "percentage": 82.76, "elapsed_time": "7:35:24", "remaining_time": "1:34:51", "throughput": 2331.88, "total_tokens": 63718320} {"current_steps": 33110, "total_steps": 40000, "loss": 0.0009, "lr": 3.5729526906677996e-06, "epoch": 5.40133779264214, "percentage": 82.78, "elapsed_time": "7:35:26", "remaining_time": "1:34:46", "throughput": 2332.06, "total_tokens": 63728128} {"current_steps": 33115, "total_steps": 40000, "loss": 0.0001, "lr": 3.5678965732545007e-06, "epoch": 5.402153519862958, "percentage": 82.79, "elapsed_time": "7:35:29", "remaining_time": "1:34:42", "throughput": 2332.22, "total_tokens": 63737472} {"current_steps": 33120, "total_steps": 40000, "loss": 0.0001, "lr": 3.562843760940876e-06, "epoch": 5.402969247083775, "percentage": 82.8, "elapsed_time": "7:35:31", "remaining_time": "1:34:37", "throughput": 2332.34, "total_tokens": 63745968} {"current_steps": 33125, "total_steps": 40000, "loss": 0.0009, "lr": 3.5577942545061473e-06, "epoch": 5.4037849743045925, "percentage": 82.81, "elapsed_time": "7:35:33", "remaining_time": "1:34:32", "throughput": 2332.5, "total_tokens": 63755056} {"current_steps": 33130, "total_steps": 40000, "loss": 0.1232, "lr": 3.5527480547289967e-06, "epoch": 5.40460070152541, "percentage": 82.83, "elapsed_time": "7:35:35", "remaining_time": "1:34:28", "throughput": 2332.68, "total_tokens": 63764832} {"current_steps": 33135, "total_steps": 40000, "loss": 0.0003, "lr": 3.547705162387624e-06, "epoch": 5.405416428746228, "percentage": 82.84, "elapsed_time": "7:35:37", "remaining_time": "1:34:23", "throughput": 2332.84, "total_tokens": 63774064} {"current_steps": 33140, "total_steps": 40000, "loss": 0.1211, "lr": 3.542665578259699e-06, "epoch": 5.406232155967045, "percentage": 82.85, "elapsed_time": "7:35:39", "remaining_time": "1:34:19", "throughput": 2333.01, "total_tokens": 63783680} {"current_steps": 33145, "total_steps": 40000, "loss": 0.0042, "lr": 3.5376293031223945e-06, "epoch": 5.407047883187862, "percentage": 82.86, "elapsed_time": "7:35:41", "remaining_time": "1:34:14", "throughput": 2333.21, "total_tokens": 63793744} {"current_steps": 33150, "total_steps": 40000, "loss": 0.0158, "lr": 3.5325963377523614e-06, "epoch": 5.407863610408679, "percentage": 82.88, "elapsed_time": "7:35:43", "remaining_time": "1:34:10", "throughput": 2333.3, "total_tokens": 63801264} {"current_steps": 33155, "total_steps": 40000, "loss": 0.0004, "lr": 3.5275666829257536e-06, "epoch": 5.408679337629497, "percentage": 82.89, "elapsed_time": "7:35:45", "remaining_time": "1:34:05", "throughput": 2333.46, "total_tokens": 63810608} {"current_steps": 33160, "total_steps": 40000, "loss": 0.0001, "lr": 3.5225403394181955e-06, "epoch": 5.409495064850314, "percentage": 82.9, "elapsed_time": "7:35:48", "remaining_time": "1:34:01", "throughput": 2333.61, "total_tokens": 63819760} {"current_steps": 33165, "total_steps": 40000, "loss": 0.0, "lr": 3.517517308004828e-06, "epoch": 5.410310792071131, "percentage": 82.91, "elapsed_time": "7:35:50", "remaining_time": "1:33:56", "throughput": 2333.77, "total_tokens": 63829040} {"current_steps": 33170, "total_steps": 40000, "loss": 0.0509, "lr": 3.512497589460251e-06, "epoch": 5.411126519291948, "percentage": 82.93, "elapsed_time": "7:35:52", "remaining_time": "1:33:52", "throughput": 2333.94, "total_tokens": 63838608} {"current_steps": 33175, "total_steps": 40000, "loss": 0.0, "lr": 3.5074811845585727e-06, "epoch": 5.4119422465127665, "percentage": 82.94, "elapsed_time": "7:35:54", "remaining_time": "1:33:47", "throughput": 2334.07, "total_tokens": 63847296} {"current_steps": 33180, "total_steps": 40000, "loss": 0.0003, "lr": 3.5024680940733937e-06, "epoch": 5.412757973733584, "percentage": 82.95, "elapsed_time": "7:35:56", "remaining_time": "1:33:43", "throughput": 2334.26, "total_tokens": 63857456} {"current_steps": 33185, "total_steps": 40000, "loss": 0.0001, "lr": 3.4974583187777852e-06, "epoch": 5.413573700954401, "percentage": 82.96, "elapsed_time": "7:35:58", "remaining_time": "1:33:38", "throughput": 2334.43, "total_tokens": 63866848} {"current_steps": 33190, "total_steps": 40000, "loss": 0.0251, "lr": 3.4924518594443204e-06, "epoch": 5.414389428175218, "percentage": 82.97, "elapsed_time": "7:36:00", "remaining_time": "1:33:33", "throughput": 2334.64, "total_tokens": 63877488} {"current_steps": 33195, "total_steps": 40000, "loss": 0.0002, "lr": 3.4874487168450682e-06, "epoch": 5.415205155396036, "percentage": 82.99, "elapsed_time": "7:36:02", "remaining_time": "1:33:29", "throughput": 2334.77, "total_tokens": 63885824} {"current_steps": 33200, "total_steps": 40000, "loss": 0.0015, "lr": 3.482448891751558e-06, "epoch": 5.416020882616853, "percentage": 83.0, "elapsed_time": "7:36:04", "remaining_time": "1:33:24", "throughput": 2334.92, "total_tokens": 63894896} {"current_steps": 33200, "total_steps": 40000, "eval_loss": 0.4077255129814148, "epoch": 5.416020882616853, "percentage": 83.0, "elapsed_time": "7:37:25", "remaining_time": "1:33:41", "throughput": 2328.04, "total_tokens": 63894896} {"current_steps": 33205, "total_steps": 40000, "loss": 0.0001, "lr": 3.477452384934843e-06, "epoch": 5.41683660983767, "percentage": 83.01, "elapsed_time": "7:37:29", "remaining_time": "1:33:37", "throughput": 2328.06, "total_tokens": 63904192} {"current_steps": 33210, "total_steps": 40000, "loss": 0.0001, "lr": 3.472459197165434e-06, "epoch": 5.417652337058487, "percentage": 83.03, "elapsed_time": "7:37:31", "remaining_time": "1:33:32", "throughput": 2328.21, "total_tokens": 63913296} {"current_steps": 33215, "total_steps": 40000, "loss": 0.0016, "lr": 3.4674693292133518e-06, "epoch": 5.418468064279305, "percentage": 83.04, "elapsed_time": "7:37:33", "remaining_time": "1:33:28", "throughput": 2328.37, "total_tokens": 63922624} {"current_steps": 33220, "total_steps": 40000, "loss": 0.0001, "lr": 3.4624827818480977e-06, "epoch": 5.419283791500122, "percentage": 83.05, "elapsed_time": "7:37:35", "remaining_time": "1:33:23", "throughput": 2328.48, "total_tokens": 63930640} {"current_steps": 33225, "total_steps": 40000, "loss": 0.0002, "lr": 3.4574995558386474e-06, "epoch": 5.4200995187209395, "percentage": 83.06, "elapsed_time": "7:37:38", "remaining_time": "1:33:19", "throughput": 2328.66, "total_tokens": 63940528} {"current_steps": 33230, "total_steps": 40000, "loss": 0.0011, "lr": 3.452519651953487e-06, "epoch": 5.420915245941757, "percentage": 83.08, "elapsed_time": "7:37:40", "remaining_time": "1:33:14", "throughput": 2328.86, "total_tokens": 63951168} {"current_steps": 33235, "total_steps": 40000, "loss": 0.0014, "lr": 3.447543070960585e-06, "epoch": 5.421730973162575, "percentage": 83.09, "elapsed_time": "7:37:42", "remaining_time": "1:33:09", "throughput": 2329.01, "total_tokens": 63960320} {"current_steps": 33240, "total_steps": 40000, "loss": 0.0007, "lr": 3.4425698136273778e-06, "epoch": 5.422546700383392, "percentage": 83.1, "elapsed_time": "7:37:44", "remaining_time": "1:33:05", "throughput": 2329.18, "total_tokens": 63969888} {"current_steps": 33245, "total_steps": 40000, "loss": 0.0001, "lr": 3.437599880720821e-06, "epoch": 5.423362427604209, "percentage": 83.11, "elapsed_time": "7:37:46", "remaining_time": "1:33:00", "throughput": 2329.34, "total_tokens": 63979104} {"current_steps": 33250, "total_steps": 40000, "loss": 0.0008, "lr": 3.4326332730073267e-06, "epoch": 5.424178154825027, "percentage": 83.12, "elapsed_time": "7:37:48", "remaining_time": "1:32:56", "throughput": 2329.52, "total_tokens": 63988864} {"current_steps": 33255, "total_steps": 40000, "loss": 0.0815, "lr": 3.427669991252813e-06, "epoch": 5.424993882045844, "percentage": 83.14, "elapsed_time": "7:37:50", "remaining_time": "1:32:51", "throughput": 2329.73, "total_tokens": 63999488} {"current_steps": 33260, "total_steps": 40000, "loss": 0.0005, "lr": 3.42271003622269e-06, "epoch": 5.425809609266661, "percentage": 83.15, "elapsed_time": "7:37:52", "remaining_time": "1:32:47", "throughput": 2329.92, "total_tokens": 64009568} {"current_steps": 33265, "total_steps": 40000, "loss": 0.0003, "lr": 3.4177534086818286e-06, "epoch": 5.426625336487478, "percentage": 83.16, "elapsed_time": "7:37:54", "remaining_time": "1:32:42", "throughput": 2330.08, "total_tokens": 64018704} {"current_steps": 33270, "total_steps": 40000, "loss": 0.0, "lr": 3.412800109394612e-06, "epoch": 5.4274410637082955, "percentage": 83.17, "elapsed_time": "7:37:56", "remaining_time": "1:32:38", "throughput": 2330.26, "total_tokens": 64028560} {"current_steps": 33275, "total_steps": 40000, "loss": 0.0, "lr": 3.4078501391249044e-06, "epoch": 5.4282567909291135, "percentage": 83.19, "elapsed_time": "7:37:59", "remaining_time": "1:32:33", "throughput": 2330.44, "total_tokens": 64038416} {"current_steps": 33280, "total_steps": 40000, "loss": 0.0, "lr": 3.4029034986360453e-06, "epoch": 5.429072518149931, "percentage": 83.2, "elapsed_time": "7:38:01", "remaining_time": "1:32:29", "throughput": 2330.6, "total_tokens": 64047712} {"current_steps": 33285, "total_steps": 40000, "loss": 0.0, "lr": 3.397960188690877e-06, "epoch": 5.429888245370748, "percentage": 83.21, "elapsed_time": "7:38:03", "remaining_time": "1:32:24", "throughput": 2330.75, "total_tokens": 64057024} {"current_steps": 33290, "total_steps": 40000, "loss": 0.0001, "lr": 3.393020210051717e-06, "epoch": 5.430703972591566, "percentage": 83.23, "elapsed_time": "7:38:05", "remaining_time": "1:32:20", "throughput": 2330.89, "total_tokens": 64065648} {"current_steps": 33295, "total_steps": 40000, "loss": 0.0002, "lr": 3.3880835634803655e-06, "epoch": 5.431519699812383, "percentage": 83.24, "elapsed_time": "7:38:07", "remaining_time": "1:32:15", "throughput": 2331.05, "total_tokens": 64075248} {"current_steps": 33300, "total_steps": 40000, "loss": 0.0001, "lr": 3.383150249738126e-06, "epoch": 5.4323354270332, "percentage": 83.25, "elapsed_time": "7:38:09", "remaining_time": "1:32:10", "throughput": 2331.2, "total_tokens": 64084368} {"current_steps": 33305, "total_steps": 40000, "loss": 0.0003, "lr": 3.3782202695857663e-06, "epoch": 5.433151154254017, "percentage": 83.26, "elapsed_time": "7:38:11", "remaining_time": "1:32:06", "throughput": 2331.35, "total_tokens": 64093424} {"current_steps": 33310, "total_steps": 40000, "loss": 0.0001, "lr": 3.373293623783558e-06, "epoch": 5.433966881474835, "percentage": 83.28, "elapsed_time": "7:38:14", "remaining_time": "1:32:01", "throughput": 2331.58, "total_tokens": 64104384} {"current_steps": 33315, "total_steps": 40000, "loss": 0.0, "lr": 3.368370313091257e-06, "epoch": 5.434782608695652, "percentage": 83.29, "elapsed_time": "7:38:16", "remaining_time": "1:31:57", "throughput": 2331.74, "total_tokens": 64113664} {"current_steps": 33320, "total_steps": 40000, "loss": 0.0005, "lr": 3.363450338268087e-06, "epoch": 5.4355983359164695, "percentage": 83.3, "elapsed_time": "7:38:18", "remaining_time": "1:31:52", "throughput": 2331.92, "total_tokens": 64123392} {"current_steps": 33325, "total_steps": 40000, "loss": 0.0003, "lr": 3.358533700072783e-06, "epoch": 5.436414063137287, "percentage": 83.31, "elapsed_time": "7:38:20", "remaining_time": "1:31:48", "throughput": 2332.07, "total_tokens": 64132352} {"current_steps": 33330, "total_steps": 40000, "loss": 0.0027, "lr": 3.3536203992635377e-06, "epoch": 5.437229790358105, "percentage": 83.33, "elapsed_time": "7:38:22", "remaining_time": "1:31:43", "throughput": 2332.26, "total_tokens": 64142416} {"current_steps": 33335, "total_steps": 40000, "loss": 0.0001, "lr": 3.348710436598057e-06, "epoch": 5.438045517578922, "percentage": 83.34, "elapsed_time": "7:38:24", "remaining_time": "1:31:39", "throughput": 2332.44, "total_tokens": 64152256} {"current_steps": 33340, "total_steps": 40000, "loss": 0.0001, "lr": 3.3438038128335155e-06, "epoch": 5.438861244799739, "percentage": 83.35, "elapsed_time": "7:38:26", "remaining_time": "1:31:34", "throughput": 2332.61, "total_tokens": 64161856} {"current_steps": 33345, "total_steps": 40000, "loss": 0.0001, "lr": 3.338900528726571e-06, "epoch": 5.439676972020556, "percentage": 83.36, "elapsed_time": "7:38:28", "remaining_time": "1:31:30", "throughput": 2332.79, "total_tokens": 64171952} {"current_steps": 33350, "total_steps": 40000, "loss": 0.0002, "lr": 3.3340005850333812e-06, "epoch": 5.440492699241374, "percentage": 83.38, "elapsed_time": "7:38:30", "remaining_time": "1:31:25", "throughput": 2332.98, "total_tokens": 64182176} {"current_steps": 33355, "total_steps": 40000, "loss": 0.0, "lr": 3.329103982509568e-06, "epoch": 5.441308426462191, "percentage": 83.39, "elapsed_time": "7:38:32", "remaining_time": "1:31:21", "throughput": 2333.16, "total_tokens": 64192016} {"current_steps": 33360, "total_steps": 40000, "loss": 0.0001, "lr": 3.324210721910259e-06, "epoch": 5.442124153683008, "percentage": 83.4, "elapsed_time": "7:38:35", "remaining_time": "1:31:16", "throughput": 2333.31, "total_tokens": 64201392} {"current_steps": 33365, "total_steps": 40000, "loss": 0.0001, "lr": 3.319320803990053e-06, "epoch": 5.442939880903825, "percentage": 83.41, "elapsed_time": "7:38:37", "remaining_time": "1:31:12", "throughput": 2333.49, "total_tokens": 64211360} {"current_steps": 33370, "total_steps": 40000, "loss": 0.0, "lr": 3.3144342295030274e-06, "epoch": 5.443755608124643, "percentage": 83.43, "elapsed_time": "7:38:39", "remaining_time": "1:31:07", "throughput": 2333.67, "total_tokens": 64221264} {"current_steps": 33375, "total_steps": 40000, "loss": 0.0284, "lr": 3.309550999202765e-06, "epoch": 5.444571335345461, "percentage": 83.44, "elapsed_time": "7:38:41", "remaining_time": "1:31:03", "throughput": 2333.87, "total_tokens": 64231440} {"current_steps": 33380, "total_steps": 40000, "loss": 0.0, "lr": 3.3046711138423197e-06, "epoch": 5.445387062566278, "percentage": 83.45, "elapsed_time": "7:38:43", "remaining_time": "1:30:58", "throughput": 2333.98, "total_tokens": 64239520} {"current_steps": 33385, "total_steps": 40000, "loss": 0.0008, "lr": 3.2997945741742255e-06, "epoch": 5.446202789787095, "percentage": 83.46, "elapsed_time": "7:38:45", "remaining_time": "1:30:54", "throughput": 2334.19, "total_tokens": 64250096} {"current_steps": 33390, "total_steps": 40000, "loss": 0.0032, "lr": 3.2949213809505082e-06, "epoch": 5.447018517007913, "percentage": 83.47, "elapsed_time": "7:38:47", "remaining_time": "1:30:49", "throughput": 2334.35, "total_tokens": 64259184} {"current_steps": 33395, "total_steps": 40000, "loss": 0.0252, "lr": 3.2900515349226834e-06, "epoch": 5.44783424422873, "percentage": 83.49, "elapsed_time": "7:38:49", "remaining_time": "1:30:44", "throughput": 2334.52, "total_tokens": 64268912} {"current_steps": 33400, "total_steps": 40000, "loss": 0.0001, "lr": 3.285185036841731e-06, "epoch": 5.448649971449547, "percentage": 83.5, "elapsed_time": "7:38:51", "remaining_time": "1:30:40", "throughput": 2334.66, "total_tokens": 64277584} {"current_steps": 33400, "total_steps": 40000, "eval_loss": 0.42310479283332825, "epoch": 5.448649971449547, "percentage": 83.5, "elapsed_time": "7:40:12", "remaining_time": "1:30:56", "throughput": 2327.83, "total_tokens": 64277584} {"current_steps": 33405, "total_steps": 40000, "loss": 0.0, "lr": 3.2803218874581377e-06, "epoch": 5.449465698670364, "percentage": 83.51, "elapsed_time": "7:40:16", "remaining_time": "1:30:52", "throughput": 2327.8, "total_tokens": 64286688} {"current_steps": 33410, "total_steps": 40000, "loss": 0.0016, "lr": 3.2754620875218494e-06, "epoch": 5.450281425891182, "percentage": 83.53, "elapsed_time": "7:40:19", "remaining_time": "1:30:47", "throughput": 2327.96, "total_tokens": 64296240} {"current_steps": 33415, "total_steps": 40000, "loss": 0.0001, "lr": 3.2706056377823146e-06, "epoch": 5.451097153111999, "percentage": 83.54, "elapsed_time": "7:40:21", "remaining_time": "1:30:43", "throughput": 2328.11, "total_tokens": 64305248} {"current_steps": 33420, "total_steps": 40000, "loss": 0.0, "lr": 3.2657525389884647e-06, "epoch": 5.4519128803328165, "percentage": 83.55, "elapsed_time": "7:40:23", "remaining_time": "1:30:38", "throughput": 2328.28, "total_tokens": 64315008} {"current_steps": 33425, "total_steps": 40000, "loss": 0.0002, "lr": 3.260902791888698e-06, "epoch": 5.4527286075536345, "percentage": 83.56, "elapsed_time": "7:40:25", "remaining_time": "1:30:34", "throughput": 2328.44, "total_tokens": 64324480} {"current_steps": 33430, "total_steps": 40000, "loss": 0.0013, "lr": 3.2560563972309166e-06, "epoch": 5.453544334774452, "percentage": 83.58, "elapsed_time": "7:40:27", "remaining_time": "1:30:29", "throughput": 2328.61, "total_tokens": 64334096} {"current_steps": 33435, "total_steps": 40000, "loss": 0.0009, "lr": 3.251213355762489e-06, "epoch": 5.454360061995269, "percentage": 83.59, "elapsed_time": "7:40:29", "remaining_time": "1:30:25", "throughput": 2328.8, "total_tokens": 64344528} {"current_steps": 33440, "total_steps": 40000, "loss": 0.0551, "lr": 3.2463736682302707e-06, "epoch": 5.455175789216086, "percentage": 83.6, "elapsed_time": "7:40:32", "remaining_time": "1:30:20", "throughput": 2328.97, "total_tokens": 64354160} {"current_steps": 33445, "total_steps": 40000, "loss": 0.0004, "lr": 3.2415373353806124e-06, "epoch": 5.455991516436903, "percentage": 83.61, "elapsed_time": "7:40:34", "remaining_time": "1:30:16", "throughput": 2329.13, "total_tokens": 64363360} {"current_steps": 33450, "total_steps": 40000, "loss": 0.0011, "lr": 3.236704357959322e-06, "epoch": 5.456807243657721, "percentage": 83.62, "elapsed_time": "7:40:36", "remaining_time": "1:30:11", "throughput": 2329.28, "total_tokens": 64372336} {"current_steps": 33455, "total_steps": 40000, "loss": 0.0001, "lr": 3.2318747367117154e-06, "epoch": 5.457622970878538, "percentage": 83.64, "elapsed_time": "7:40:38", "remaining_time": "1:30:07", "throughput": 2329.43, "total_tokens": 64381488} {"current_steps": 33460, "total_steps": 40000, "loss": 0.0001, "lr": 3.227048472382585e-06, "epoch": 5.458438698099355, "percentage": 83.65, "elapsed_time": "7:40:40", "remaining_time": "1:30:02", "throughput": 2329.58, "total_tokens": 64390272} {"current_steps": 33465, "total_steps": 40000, "loss": 0.0001, "lr": 3.2222255657161915e-06, "epoch": 5.459254425320173, "percentage": 83.66, "elapsed_time": "7:40:42", "remaining_time": "1:29:57", "throughput": 2329.73, "total_tokens": 64399344} {"current_steps": 33470, "total_steps": 40000, "loss": 0.0001, "lr": 3.2174060174562924e-06, "epoch": 5.4600701525409905, "percentage": 83.67, "elapsed_time": "7:40:44", "remaining_time": "1:29:53", "throughput": 2329.92, "total_tokens": 64409488} {"current_steps": 33475, "total_steps": 40000, "loss": 0.0, "lr": 3.2125898283461298e-06, "epoch": 5.460885879761808, "percentage": 83.69, "elapsed_time": "7:40:46", "remaining_time": "1:29:48", "throughput": 2330.11, "total_tokens": 64419456} {"current_steps": 33480, "total_steps": 40000, "loss": 0.0, "lr": 3.207776999128406e-06, "epoch": 5.461701606982625, "percentage": 83.7, "elapsed_time": "7:40:48", "remaining_time": "1:29:44", "throughput": 2330.29, "total_tokens": 64429296} {"current_steps": 33485, "total_steps": 40000, "loss": 0.0001, "lr": 3.202967530545331e-06, "epoch": 5.462517334203443, "percentage": 83.71, "elapsed_time": "7:40:50", "remaining_time": "1:29:39", "throughput": 2330.49, "total_tokens": 64439744} {"current_steps": 33490, "total_steps": 40000, "loss": 0.0089, "lr": 3.1981614233385778e-06, "epoch": 5.46333306142426, "percentage": 83.73, "elapsed_time": "7:40:52", "remaining_time": "1:29:35", "throughput": 2330.68, "total_tokens": 64450048} {"current_steps": 33495, "total_steps": 40000, "loss": 0.0001, "lr": 3.1933586782493115e-06, "epoch": 5.464148788645077, "percentage": 83.74, "elapsed_time": "7:40:55", "remaining_time": "1:29:30", "throughput": 2330.79, "total_tokens": 64458128} {"current_steps": 33500, "total_steps": 40000, "loss": 0.0027, "lr": 3.188559296018184e-06, "epoch": 5.464964515865894, "percentage": 83.75, "elapsed_time": "7:40:57", "remaining_time": "1:29:26", "throughput": 2330.97, "total_tokens": 64468112} {"current_steps": 33505, "total_steps": 40000, "loss": 0.0001, "lr": 3.1837632773853098e-06, "epoch": 5.465780243086712, "percentage": 83.76, "elapsed_time": "7:40:59", "remaining_time": "1:29:21", "throughput": 2331.12, "total_tokens": 64477232} {"current_steps": 33510, "total_steps": 40000, "loss": 0.0007, "lr": 3.178970623090294e-06, "epoch": 5.466595970307529, "percentage": 83.78, "elapsed_time": "7:41:01", "remaining_time": "1:29:17", "throughput": 2331.27, "total_tokens": 64486512} {"current_steps": 33515, "total_steps": 40000, "loss": 0.0053, "lr": 3.174181333872234e-06, "epoch": 5.467411697528346, "percentage": 83.79, "elapsed_time": "7:41:03", "remaining_time": "1:29:12", "throughput": 2331.43, "total_tokens": 64495696} {"current_steps": 33520, "total_steps": 40000, "loss": 0.0001, "lr": 3.169395410469686e-06, "epoch": 5.468227424749164, "percentage": 83.8, "elapsed_time": "7:41:05", "remaining_time": "1:29:08", "throughput": 2331.6, "total_tokens": 64505440} {"current_steps": 33525, "total_steps": 40000, "loss": 0.0, "lr": 3.164612853620713e-06, "epoch": 5.469043151969982, "percentage": 83.81, "elapsed_time": "7:41:07", "remaining_time": "1:29:03", "throughput": 2331.79, "total_tokens": 64515728} {"current_steps": 33530, "total_steps": 40000, "loss": 0.0, "lr": 3.1598336640628333e-06, "epoch": 5.469858879190799, "percentage": 83.83, "elapsed_time": "7:41:09", "remaining_time": "1:28:59", "throughput": 2331.98, "total_tokens": 64525616} {"current_steps": 33535, "total_steps": 40000, "loss": 0.0955, "lr": 3.155057842533063e-06, "epoch": 5.470674606411616, "percentage": 83.84, "elapsed_time": "7:41:11", "remaining_time": "1:28:54", "throughput": 2332.16, "total_tokens": 64535504} {"current_steps": 33540, "total_steps": 40000, "loss": 0.0163, "lr": 3.1502853897678984e-06, "epoch": 5.471490333632433, "percentage": 83.85, "elapsed_time": "7:41:14", "remaining_time": "1:28:50", "throughput": 2332.32, "total_tokens": 64544864} {"current_steps": 33545, "total_steps": 40000, "loss": 0.0048, "lr": 3.1455163065033017e-06, "epoch": 5.472306060853251, "percentage": 83.86, "elapsed_time": "7:41:16", "remaining_time": "1:28:45", "throughput": 2332.46, "total_tokens": 64553504} {"current_steps": 33550, "total_steps": 40000, "loss": 0.0001, "lr": 3.140750593474734e-06, "epoch": 5.473121788074068, "percentage": 83.88, "elapsed_time": "7:41:18", "remaining_time": "1:28:41", "throughput": 2332.6, "total_tokens": 64562112} {"current_steps": 33555, "total_steps": 40000, "loss": 0.0163, "lr": 3.1359882514171294e-06, "epoch": 5.473937515294885, "percentage": 83.89, "elapsed_time": "7:41:20", "remaining_time": "1:28:36", "throughput": 2332.8, "total_tokens": 64572576} {"current_steps": 33560, "total_steps": 40000, "loss": 0.0418, "lr": 3.1312292810648903e-06, "epoch": 5.474753242515702, "percentage": 83.9, "elapsed_time": "7:41:22", "remaining_time": "1:28:32", "throughput": 2333.02, "total_tokens": 64583456} {"current_steps": 33565, "total_steps": 40000, "loss": 0.0, "lr": 3.1264736831519204e-06, "epoch": 5.47556896973652, "percentage": 83.91, "elapsed_time": "7:41:24", "remaining_time": "1:28:27", "throughput": 2333.22, "total_tokens": 64593824} {"current_steps": 33570, "total_steps": 40000, "loss": 0.0001, "lr": 3.1217214584115863e-06, "epoch": 5.4763846969573375, "percentage": 83.93, "elapsed_time": "7:41:26", "remaining_time": "1:28:23", "throughput": 2333.4, "total_tokens": 64603968} {"current_steps": 33575, "total_steps": 40000, "loss": 0.0002, "lr": 3.116972607576746e-06, "epoch": 5.477200424178155, "percentage": 83.94, "elapsed_time": "7:41:28", "remaining_time": "1:28:18", "throughput": 2333.58, "total_tokens": 64613808} {"current_steps": 33580, "total_steps": 40000, "loss": 0.0002, "lr": 3.1122271313797303e-06, "epoch": 5.478016151398972, "percentage": 83.95, "elapsed_time": "7:41:30", "remaining_time": "1:28:14", "throughput": 2333.76, "total_tokens": 64623952} {"current_steps": 33585, "total_steps": 40000, "loss": 0.0809, "lr": 3.107485030552343e-06, "epoch": 5.47883187861979, "percentage": 83.96, "elapsed_time": "7:41:33", "remaining_time": "1:28:09", "throughput": 2333.94, "total_tokens": 64633872} {"current_steps": 33590, "total_steps": 40000, "loss": 0.0, "lr": 3.1027463058258848e-06, "epoch": 5.479647605840607, "percentage": 83.97, "elapsed_time": "7:41:35", "remaining_time": "1:28:05", "throughput": 2334.08, "total_tokens": 64642800} {"current_steps": 33595, "total_steps": 40000, "loss": 0.0, "lr": 3.0980109579311273e-06, "epoch": 5.480463333061424, "percentage": 83.99, "elapsed_time": "7:41:37", "remaining_time": "1:28:00", "throughput": 2334.23, "total_tokens": 64651808} {"current_steps": 33600, "total_steps": 40000, "loss": 0.0001, "lr": 3.093278987598314e-06, "epoch": 5.481279060282241, "percentage": 84.0, "elapsed_time": "7:41:39", "remaining_time": "1:27:56", "throughput": 2334.41, "total_tokens": 64661856} {"current_steps": 33600, "total_steps": 40000, "eval_loss": 0.4215812385082245, "epoch": 5.481279060282241, "percentage": 84.0, "elapsed_time": "7:43:00", "remaining_time": "1:28:11", "throughput": 2327.62, "total_tokens": 64661856} {"current_steps": 33605, "total_steps": 40000, "loss": 0.0001, "lr": 3.0885503955571826e-06, "epoch": 5.482094787503059, "percentage": 84.01, "elapsed_time": "7:43:04", "remaining_time": "1:28:07", "throughput": 2327.66, "total_tokens": 64671856} {"current_steps": 33610, "total_steps": 40000, "loss": 0.0, "lr": 3.0838251825369313e-06, "epoch": 5.482910514723876, "percentage": 84.03, "elapsed_time": "7:43:06", "remaining_time": "1:28:02", "throughput": 2327.83, "total_tokens": 64681632} {"current_steps": 33615, "total_steps": 40000, "loss": 0.0001, "lr": 3.0791033492662517e-06, "epoch": 5.4837262419446935, "percentage": 84.04, "elapsed_time": "7:43:08", "remaining_time": "1:27:58", "throughput": 2328.02, "total_tokens": 64691728} {"current_steps": 33620, "total_steps": 40000, "loss": 0.0003, "lr": 3.0743848964733203e-06, "epoch": 5.484541969165511, "percentage": 84.05, "elapsed_time": "7:43:10", "remaining_time": "1:27:53", "throughput": 2328.23, "total_tokens": 64702208} {"current_steps": 33625, "total_steps": 40000, "loss": 0.0002, "lr": 3.0696698248857625e-06, "epoch": 5.485357696386329, "percentage": 84.06, "elapsed_time": "7:43:12", "remaining_time": "1:27:49", "throughput": 2328.41, "total_tokens": 64712208} {"current_steps": 33630, "total_steps": 40000, "loss": 0.0008, "lr": 3.0649581352307192e-06, "epoch": 5.486173423607146, "percentage": 84.08, "elapsed_time": "7:43:14", "remaining_time": "1:27:44", "throughput": 2328.59, "total_tokens": 64722080} {"current_steps": 33635, "total_steps": 40000, "loss": 0.0001, "lr": 3.060249828234776e-06, "epoch": 5.486989150827963, "percentage": 84.09, "elapsed_time": "7:43:16", "remaining_time": "1:27:40", "throughput": 2328.78, "total_tokens": 64731984} {"current_steps": 33640, "total_steps": 40000, "loss": 0.0002, "lr": 3.055544904624025e-06, "epoch": 5.487804878048781, "percentage": 84.1, "elapsed_time": "7:43:18", "remaining_time": "1:27:35", "throughput": 2328.96, "total_tokens": 64741936} {"current_steps": 33645, "total_steps": 40000, "loss": 0.0001, "lr": 3.050843365124026e-06, "epoch": 5.488620605269598, "percentage": 84.11, "elapsed_time": "7:43:20", "remaining_time": "1:27:31", "throughput": 2329.15, "total_tokens": 64752048} {"current_steps": 33650, "total_steps": 40000, "loss": 0.0001, "lr": 3.0461452104598083e-06, "epoch": 5.489436332490415, "percentage": 84.12, "elapsed_time": "7:43:22", "remaining_time": "1:27:26", "throughput": 2329.32, "total_tokens": 64761600} {"current_steps": 33655, "total_steps": 40000, "loss": 0.0007, "lr": 3.0414504413558836e-06, "epoch": 5.490252059711232, "percentage": 84.14, "elapsed_time": "7:43:24", "remaining_time": "1:27:22", "throughput": 2329.47, "total_tokens": 64770816} {"current_steps": 33660, "total_steps": 40000, "loss": 0.0001, "lr": 3.0367590585362564e-06, "epoch": 5.49106778693205, "percentage": 84.15, "elapsed_time": "7:43:27", "remaining_time": "1:27:17", "throughput": 2329.61, "total_tokens": 64779536} {"current_steps": 33665, "total_steps": 40000, "loss": 0.0001, "lr": 3.0320710627243813e-06, "epoch": 5.4918835141528675, "percentage": 84.16, "elapsed_time": "7:43:29", "remaining_time": "1:27:13", "throughput": 2329.79, "total_tokens": 64789664} {"current_steps": 33670, "total_steps": 40000, "loss": 0.0676, "lr": 3.027386454643222e-06, "epoch": 5.492699241373685, "percentage": 84.17, "elapsed_time": "7:43:31", "remaining_time": "1:27:08", "throughput": 2329.98, "total_tokens": 64799760} {"current_steps": 33675, "total_steps": 40000, "loss": 0.0, "lr": 3.0227052350151914e-06, "epoch": 5.493514968594502, "percentage": 84.19, "elapsed_time": "7:43:33", "remaining_time": "1:27:04", "throughput": 2330.14, "total_tokens": 64809264} {"current_steps": 33680, "total_steps": 40000, "loss": 0.097, "lr": 3.0180274045621957e-06, "epoch": 5.49433069581532, "percentage": 84.2, "elapsed_time": "7:43:35", "remaining_time": "1:26:59", "throughput": 2330.31, "total_tokens": 64819152} {"current_steps": 33685, "total_steps": 40000, "loss": 0.1313, "lr": 3.013352964005625e-06, "epoch": 5.495146423036137, "percentage": 84.21, "elapsed_time": "7:43:37", "remaining_time": "1:26:55", "throughput": 2330.45, "total_tokens": 64827968} {"current_steps": 33690, "total_steps": 40000, "loss": 0.0, "lr": 3.0086819140663218e-06, "epoch": 5.495962150256954, "percentage": 84.23, "elapsed_time": "7:43:39", "remaining_time": "1:26:50", "throughput": 2330.6, "total_tokens": 64837088} {"current_steps": 33695, "total_steps": 40000, "loss": 0.0001, "lr": 3.0040142554646265e-06, "epoch": 5.496777877477771, "percentage": 84.24, "elapsed_time": "7:43:42", "remaining_time": "1:26:46", "throughput": 2330.75, "total_tokens": 64846336} {"current_steps": 33700, "total_steps": 40000, "loss": 0.0005, "lr": 2.999349988920361e-06, "epoch": 5.497593604698589, "percentage": 84.25, "elapsed_time": "7:43:44", "remaining_time": "1:26:41", "throughput": 2330.93, "total_tokens": 64856224} {"current_steps": 33705, "total_steps": 40000, "loss": 0.0191, "lr": 2.994689115152796e-06, "epoch": 5.498409331919406, "percentage": 84.26, "elapsed_time": "7:43:46", "remaining_time": "1:26:37", "throughput": 2331.08, "total_tokens": 64865392} {"current_steps": 33710, "total_steps": 40000, "loss": 0.0065, "lr": 2.9900316348807105e-06, "epoch": 5.499225059140223, "percentage": 84.28, "elapsed_time": "7:43:48", "remaining_time": "1:26:32", "throughput": 2331.25, "total_tokens": 64874848} {"current_steps": 33715, "total_steps": 40000, "loss": 0.0001, "lr": 2.985377548822338e-06, "epoch": 5.5000407863610405, "percentage": 84.29, "elapsed_time": "7:43:50", "remaining_time": "1:26:28", "throughput": 2331.42, "total_tokens": 64884496} {"current_steps": 33720, "total_steps": 40000, "loss": 0.0002, "lr": 2.980726857695404e-06, "epoch": 5.500856513581859, "percentage": 84.3, "elapsed_time": "7:43:52", "remaining_time": "1:26:23", "throughput": 2331.61, "total_tokens": 64894464} {"current_steps": 33725, "total_steps": 40000, "loss": 0.0001, "lr": 2.9760795622171017e-06, "epoch": 5.501672240802676, "percentage": 84.31, "elapsed_time": "7:43:54", "remaining_time": "1:26:19", "throughput": 2331.71, "total_tokens": 64902192} {"current_steps": 33730, "total_steps": 40000, "loss": 0.0589, "lr": 2.971435663104094e-06, "epoch": 5.502487968023493, "percentage": 84.33, "elapsed_time": "7:43:56", "remaining_time": "1:26:14", "throughput": 2331.83, "total_tokens": 64910208} {"current_steps": 33735, "total_steps": 40000, "loss": 0.0001, "lr": 2.9667951610725385e-06, "epoch": 5.50330369524431, "percentage": 84.34, "elapsed_time": "7:43:58", "remaining_time": "1:26:09", "throughput": 2331.97, "total_tokens": 64918928} {"current_steps": 33740, "total_steps": 40000, "loss": 0.0006, "lr": 2.9621580568380575e-06, "epoch": 5.504119422465128, "percentage": 84.35, "elapsed_time": "7:44:00", "remaining_time": "1:26:05", "throughput": 2332.13, "total_tokens": 64928320} {"current_steps": 33745, "total_steps": 40000, "loss": 0.0, "lr": 2.9575243511157453e-06, "epoch": 5.504935149685945, "percentage": 84.36, "elapsed_time": "7:44:02", "remaining_time": "1:26:00", "throughput": 2332.32, "total_tokens": 64938304} {"current_steps": 33750, "total_steps": 40000, "loss": 0.0, "lr": 2.952894044620186e-06, "epoch": 5.505750876906762, "percentage": 84.38, "elapsed_time": "7:44:04", "remaining_time": "1:25:56", "throughput": 2332.51, "total_tokens": 64948464} {"current_steps": 33755, "total_steps": 40000, "loss": 0.002, "lr": 2.948267138065419e-06, "epoch": 5.506566604127579, "percentage": 84.39, "elapsed_time": "7:44:07", "remaining_time": "1:25:51", "throughput": 2332.66, "total_tokens": 64957856} {"current_steps": 33760, "total_steps": 40000, "loss": 0.0002, "lr": 2.943643632164983e-06, "epoch": 5.507382331348397, "percentage": 84.4, "elapsed_time": "7:44:09", "remaining_time": "1:25:47", "throughput": 2332.79, "total_tokens": 64966464} {"current_steps": 33765, "total_steps": 40000, "loss": 0.0002, "lr": 2.939023527631879e-06, "epoch": 5.5081980585692145, "percentage": 84.41, "elapsed_time": "7:44:11", "remaining_time": "1:25:42", "throughput": 2332.97, "total_tokens": 64976512} {"current_steps": 33770, "total_steps": 40000, "loss": 0.0001, "lr": 2.934406825178576e-06, "epoch": 5.509013785790032, "percentage": 84.42, "elapsed_time": "7:44:13", "remaining_time": "1:25:38", "throughput": 2333.17, "total_tokens": 64986880} {"current_steps": 33775, "total_steps": 40000, "loss": 0.0, "lr": 2.9297935255170357e-06, "epoch": 5.50982951301085, "percentage": 84.44, "elapsed_time": "7:44:15", "remaining_time": "1:25:34", "throughput": 2333.3, "total_tokens": 64995648} {"current_steps": 33780, "total_steps": 40000, "loss": 0.0001, "lr": 2.925183629358691e-06, "epoch": 5.510645240231667, "percentage": 84.45, "elapsed_time": "7:44:17", "remaining_time": "1:25:29", "throughput": 2333.48, "total_tokens": 65005568} {"current_steps": 33785, "total_steps": 40000, "loss": 0.0728, "lr": 2.9205771374144346e-06, "epoch": 5.511460967452484, "percentage": 84.46, "elapsed_time": "7:44:19", "remaining_time": "1:25:25", "throughput": 2333.62, "total_tokens": 65014432} {"current_steps": 33790, "total_steps": 40000, "loss": 0.0, "lr": 2.915974050394657e-06, "epoch": 5.512276694673301, "percentage": 84.47, "elapsed_time": "7:44:22", "remaining_time": "1:25:20", "throughput": 2333.77, "total_tokens": 65023872} {"current_steps": 33795, "total_steps": 40000, "loss": 0.0509, "lr": 2.9113743690092067e-06, "epoch": 5.513092421894118, "percentage": 84.49, "elapsed_time": "7:44:24", "remaining_time": "1:25:16", "throughput": 2333.93, "total_tokens": 65033280} {"current_steps": 33800, "total_steps": 40000, "loss": 0.0, "lr": 2.906778093967402e-06, "epoch": 5.513908149114936, "percentage": 84.5, "elapsed_time": "7:44:26", "remaining_time": "1:25:11", "throughput": 2334.11, "total_tokens": 65043136} {"current_steps": 33800, "total_steps": 40000, "eval_loss": 0.42182645201683044, "epoch": 5.513908149114936, "percentage": 84.5, "elapsed_time": "7:45:47", "remaining_time": "1:25:26", "throughput": 2327.36, "total_tokens": 65043136} {"current_steps": 33805, "total_steps": 40000, "loss": 0.0, "lr": 2.9021852259780656e-06, "epoch": 5.514723876335753, "percentage": 84.51, "elapsed_time": "7:45:50", "remaining_time": "1:25:22", "throughput": 2327.42, "total_tokens": 65053504} {"current_steps": 33810, "total_steps": 40000, "loss": 0.1008, "lr": 2.8975957657494583e-06, "epoch": 5.5155396035565705, "percentage": 84.52, "elapsed_time": "7:45:52", "remaining_time": "1:25:17", "throughput": 2327.59, "total_tokens": 65063120} {"current_steps": 33815, "total_steps": 40000, "loss": 0.0022, "lr": 2.8930097139893417e-06, "epoch": 5.5163553307773885, "percentage": 84.54, "elapsed_time": "7:45:55", "remaining_time": "1:25:13", "throughput": 2327.8, "total_tokens": 65073824} {"current_steps": 33820, "total_steps": 40000, "loss": 0.0002, "lr": 2.888427071404945e-06, "epoch": 5.517171057998206, "percentage": 84.55, "elapsed_time": "7:45:57", "remaining_time": "1:25:08", "throughput": 2327.99, "total_tokens": 65083952} {"current_steps": 33825, "total_steps": 40000, "loss": 0.0001, "lr": 2.8838478387029606e-06, "epoch": 5.517986785219023, "percentage": 84.56, "elapsed_time": "7:45:59", "remaining_time": "1:25:04", "throughput": 2328.2, "total_tokens": 65094704} {"current_steps": 33830, "total_steps": 40000, "loss": 0.0, "lr": 2.8792720165895737e-06, "epoch": 5.51880251243984, "percentage": 84.58, "elapsed_time": "7:46:01", "remaining_time": "1:24:59", "throughput": 2328.41, "total_tokens": 65105392} {"current_steps": 33835, "total_steps": 40000, "loss": 0.0053, "lr": 2.874699605770423e-06, "epoch": 5.519618239660657, "percentage": 84.59, "elapsed_time": "7:46:03", "remaining_time": "1:24:55", "throughput": 2328.58, "total_tokens": 65114976} {"current_steps": 33840, "total_steps": 40000, "loss": 0.032, "lr": 2.8701306069506383e-06, "epoch": 5.520433966881475, "percentage": 84.6, "elapsed_time": "7:46:05", "remaining_time": "1:24:50", "throughput": 2328.79, "total_tokens": 65125664} {"current_steps": 33845, "total_steps": 40000, "loss": 0.0004, "lr": 2.8655650208348178e-06, "epoch": 5.521249694102292, "percentage": 84.61, "elapsed_time": "7:46:07", "remaining_time": "1:24:46", "throughput": 2328.98, "total_tokens": 65135888} {"current_steps": 33850, "total_steps": 40000, "loss": 0.0002, "lr": 2.8610028481270257e-06, "epoch": 5.522065421323109, "percentage": 84.62, "elapsed_time": "7:46:09", "remaining_time": "1:24:41", "throughput": 2329.18, "total_tokens": 65146208} {"current_steps": 33855, "total_steps": 40000, "loss": 0.0022, "lr": 2.856444089530813e-06, "epoch": 5.522881148543927, "percentage": 84.64, "elapsed_time": "7:46:11", "remaining_time": "1:24:37", "throughput": 2329.38, "total_tokens": 65156512} {"current_steps": 33860, "total_steps": 40000, "loss": 0.013, "lr": 2.8518887457491955e-06, "epoch": 5.523696875764744, "percentage": 84.65, "elapsed_time": "7:46:13", "remaining_time": "1:24:32", "throughput": 2329.56, "total_tokens": 65166656} {"current_steps": 33865, "total_steps": 40000, "loss": 0.0001, "lr": 2.8473368174846666e-06, "epoch": 5.524512602985562, "percentage": 84.66, "elapsed_time": "7:46:15", "remaining_time": "1:24:28", "throughput": 2329.7, "total_tokens": 65175664} {"current_steps": 33870, "total_steps": 40000, "loss": 0.0, "lr": 2.842788305439184e-06, "epoch": 5.525328330206379, "percentage": 84.67, "elapsed_time": "7:46:18", "remaining_time": "1:24:23", "throughput": 2329.91, "total_tokens": 65186400} {"current_steps": 33875, "total_steps": 40000, "loss": 0.0002, "lr": 2.8382432103141925e-06, "epoch": 5.526144057427197, "percentage": 84.69, "elapsed_time": "7:46:20", "remaining_time": "1:24:19", "throughput": 2330.06, "total_tokens": 65195664} {"current_steps": 33880, "total_steps": 40000, "loss": 0.0025, "lr": 2.833701532810598e-06, "epoch": 5.526959784648014, "percentage": 84.7, "elapsed_time": "7:46:22", "remaining_time": "1:24:14", "throughput": 2330.23, "total_tokens": 65205408} {"current_steps": 33885, "total_steps": 40000, "loss": 0.0001, "lr": 2.8291632736287877e-06, "epoch": 5.527775511868831, "percentage": 84.71, "elapsed_time": "7:46:24", "remaining_time": "1:24:10", "throughput": 2330.45, "total_tokens": 65216448} {"current_steps": 33890, "total_steps": 40000, "loss": 0.0, "lr": 2.824628433468615e-06, "epoch": 5.528591239089648, "percentage": 84.72, "elapsed_time": "7:46:26", "remaining_time": "1:24:05", "throughput": 2330.63, "total_tokens": 65226480} {"current_steps": 33895, "total_steps": 40000, "loss": 0.0003, "lr": 2.8200970130294073e-06, "epoch": 5.529406966310466, "percentage": 84.74, "elapsed_time": "7:46:28", "remaining_time": "1:24:01", "throughput": 2330.77, "total_tokens": 65235328} {"current_steps": 33900, "total_steps": 40000, "loss": 0.0001, "lr": 2.8155690130099775e-06, "epoch": 5.530222693531283, "percentage": 84.75, "elapsed_time": "7:46:30", "remaining_time": "1:23:56", "throughput": 2330.89, "total_tokens": 65243792} {"current_steps": 33905, "total_steps": 40000, "loss": 0.0652, "lr": 2.8110444341085895e-06, "epoch": 5.5310384207521, "percentage": 84.76, "elapsed_time": "7:46:33", "remaining_time": "1:23:52", "throughput": 2331.1, "total_tokens": 65254624} {"current_steps": 33910, "total_steps": 40000, "loss": 0.0, "lr": 2.806523277022996e-06, "epoch": 5.5318541479729175, "percentage": 84.78, "elapsed_time": "7:46:35", "remaining_time": "1:23:47", "throughput": 2331.25, "total_tokens": 65263872} {"current_steps": 33915, "total_steps": 40000, "loss": 0.0, "lr": 2.802005542450409e-06, "epoch": 5.5326698751937355, "percentage": 84.79, "elapsed_time": "7:46:37", "remaining_time": "1:23:43", "throughput": 2331.38, "total_tokens": 65272480} {"current_steps": 33920, "total_steps": 40000, "loss": 0.0001, "lr": 2.797491231087526e-06, "epoch": 5.533485602414553, "percentage": 84.8, "elapsed_time": "7:46:39", "remaining_time": "1:23:38", "throughput": 2331.52, "total_tokens": 65281328} {"current_steps": 33925, "total_steps": 40000, "loss": 0.0006, "lr": 2.7929803436305137e-06, "epoch": 5.53430132963537, "percentage": 84.81, "elapsed_time": "7:46:41", "remaining_time": "1:23:34", "throughput": 2331.7, "total_tokens": 65291344} {"current_steps": 33930, "total_steps": 40000, "loss": 0.0001, "lr": 2.788472880774998e-06, "epoch": 5.535117056856187, "percentage": 84.82, "elapsed_time": "7:46:43", "remaining_time": "1:23:29", "throughput": 2331.85, "total_tokens": 65300304} {"current_steps": 33935, "total_steps": 40000, "loss": 0.0001, "lr": 2.7839688432160977e-06, "epoch": 5.535932784077005, "percentage": 84.84, "elapsed_time": "7:46:45", "remaining_time": "1:23:25", "throughput": 2332.03, "total_tokens": 65310272} {"current_steps": 33940, "total_steps": 40000, "loss": 0.0011, "lr": 2.779468231648383e-06, "epoch": 5.536748511297822, "percentage": 84.85, "elapsed_time": "7:46:47", "remaining_time": "1:23:20", "throughput": 2332.24, "total_tokens": 65320928} {"current_steps": 33945, "total_steps": 40000, "loss": 0.0025, "lr": 2.774971046765906e-06, "epoch": 5.537564238518639, "percentage": 84.86, "elapsed_time": "7:46:49", "remaining_time": "1:23:16", "throughput": 2332.41, "total_tokens": 65330592} {"current_steps": 33950, "total_steps": 40000, "loss": 0.0, "lr": 2.770477289262194e-06, "epoch": 5.538379965739456, "percentage": 84.88, "elapsed_time": "7:46:51", "remaining_time": "1:23:11", "throughput": 2332.64, "total_tokens": 65341744} {"current_steps": 33955, "total_steps": 40000, "loss": 0.0001, "lr": 2.765986959830233e-06, "epoch": 5.539195692960274, "percentage": 84.89, "elapsed_time": "7:46:53", "remaining_time": "1:23:07", "throughput": 2332.76, "total_tokens": 65349856} {"current_steps": 33960, "total_steps": 40000, "loss": 0.0002, "lr": 2.761500059162492e-06, "epoch": 5.5400114201810915, "percentage": 84.9, "elapsed_time": "7:46:56", "remaining_time": "1:23:02", "throughput": 2332.87, "total_tokens": 65357968} {"current_steps": 33965, "total_steps": 40000, "loss": 0.0005, "lr": 2.757016587950914e-06, "epoch": 5.540827147401909, "percentage": 84.91, "elapsed_time": "7:46:58", "remaining_time": "1:22:58", "throughput": 2333.07, "total_tokens": 65368368} {"current_steps": 33970, "total_steps": 40000, "loss": 0.0548, "lr": 2.752536546886897e-06, "epoch": 5.541642874622726, "percentage": 84.92, "elapsed_time": "7:47:00", "remaining_time": "1:22:53", "throughput": 2333.25, "total_tokens": 65378064} {"current_steps": 33975, "total_steps": 40000, "loss": 0.0001, "lr": 2.7480599366613234e-06, "epoch": 5.542458601843544, "percentage": 84.94, "elapsed_time": "7:47:02", "remaining_time": "1:22:49", "throughput": 2333.43, "total_tokens": 65388016} {"current_steps": 33980, "total_steps": 40000, "loss": 0.0, "lr": 2.7435867579645473e-06, "epoch": 5.543274329064361, "percentage": 84.95, "elapsed_time": "7:47:04", "remaining_time": "1:22:44", "throughput": 2333.65, "total_tokens": 65399056} {"current_steps": 33985, "total_steps": 40000, "loss": 0.0, "lr": 2.739117011486378e-06, "epoch": 5.544090056285178, "percentage": 84.96, "elapsed_time": "7:47:06", "remaining_time": "1:22:40", "throughput": 2333.81, "total_tokens": 65408576} {"current_steps": 33990, "total_steps": 40000, "loss": 0.0001, "lr": 2.7346506979161216e-06, "epoch": 5.544905783505996, "percentage": 84.97, "elapsed_time": "7:47:08", "remaining_time": "1:22:35", "throughput": 2334.02, "total_tokens": 65419584} {"current_steps": 33995, "total_steps": 40000, "loss": 0.0001, "lr": 2.7301878179425227e-06, "epoch": 5.545721510726813, "percentage": 84.99, "elapsed_time": "7:47:10", "remaining_time": "1:22:31", "throughput": 2334.22, "total_tokens": 65429952} {"current_steps": 34000, "total_steps": 40000, "loss": 0.0, "lr": 2.7257283722538244e-06, "epoch": 5.54653723794763, "percentage": 85.0, "elapsed_time": "7:47:12", "remaining_time": "1:22:26", "throughput": 2334.37, "total_tokens": 65439360} {"current_steps": 34000, "total_steps": 40000, "eval_loss": 0.42869704961776733, "epoch": 5.54653723794763, "percentage": 85.0, "elapsed_time": "7:48:33", "remaining_time": "1:22:41", "throughput": 2327.66, "total_tokens": 65439360} {"current_steps": 34005, "total_steps": 40000, "loss": 0.0002, "lr": 2.7212723615377326e-06, "epoch": 5.5473529651684474, "percentage": 85.01, "elapsed_time": "7:48:37", "remaining_time": "1:22:37", "throughput": 2327.67, "total_tokens": 65449056} {"current_steps": 34010, "total_steps": 40000, "loss": 0.0004, "lr": 2.7168197864814145e-06, "epoch": 5.548168692389265, "percentage": 85.02, "elapsed_time": "7:48:40", "remaining_time": "1:22:32", "throughput": 2327.85, "total_tokens": 65459152} {"current_steps": 34015, "total_steps": 40000, "loss": 0.0034, "lr": 2.712370647771509e-06, "epoch": 5.548984419610083, "percentage": 85.04, "elapsed_time": "7:48:42", "remaining_time": "1:22:28", "throughput": 2328.01, "total_tokens": 65468800} {"current_steps": 34020, "total_steps": 40000, "loss": 0.0002, "lr": 2.707924946094137e-06, "epoch": 5.5498001468309, "percentage": 85.05, "elapsed_time": "7:48:44", "remaining_time": "1:22:23", "throughput": 2328.16, "total_tokens": 65477872} {"current_steps": 34025, "total_steps": 40000, "loss": 0.0917, "lr": 2.7034826821348723e-06, "epoch": 5.550615874051717, "percentage": 85.06, "elapsed_time": "7:48:46", "remaining_time": "1:22:19", "throughput": 2328.29, "total_tokens": 65486512} {"current_steps": 34030, "total_steps": 40000, "loss": 0.0, "lr": 2.6990438565787786e-06, "epoch": 5.551431601272535, "percentage": 85.08, "elapsed_time": "7:48:48", "remaining_time": "1:22:14", "throughput": 2328.41, "total_tokens": 65494816} {"current_steps": 34035, "total_steps": 40000, "loss": 0.0001, "lr": 2.6946084701103714e-06, "epoch": 5.552247328493352, "percentage": 85.09, "elapsed_time": "7:48:50", "remaining_time": "1:22:10", "throughput": 2328.57, "total_tokens": 65504528} {"current_steps": 34040, "total_steps": 40000, "loss": 0.0, "lr": 2.6901765234136428e-06, "epoch": 5.553063055714169, "percentage": 85.1, "elapsed_time": "7:48:52", "remaining_time": "1:22:05", "throughput": 2328.74, "total_tokens": 65514032} {"current_steps": 34045, "total_steps": 40000, "loss": 0.0, "lr": 2.685748017172063e-06, "epoch": 5.553878782934986, "percentage": 85.11, "elapsed_time": "7:48:55", "remaining_time": "1:22:01", "throughput": 2328.9, "total_tokens": 65523632} {"current_steps": 34050, "total_steps": 40000, "loss": 0.0001, "lr": 2.681322952068549e-06, "epoch": 5.554694510155803, "percentage": 85.12, "elapsed_time": "7:48:57", "remaining_time": "1:21:56", "throughput": 2329.05, "total_tokens": 65532800} {"current_steps": 34055, "total_steps": 40000, "loss": 0.0001, "lr": 2.6769013287855137e-06, "epoch": 5.555510237376621, "percentage": 85.14, "elapsed_time": "7:48:59", "remaining_time": "1:21:52", "throughput": 2329.2, "total_tokens": 65541712} {"current_steps": 34060, "total_steps": 40000, "loss": 0.0001, "lr": 2.6724831480048286e-06, "epoch": 5.5563259645974385, "percentage": 85.15, "elapsed_time": "7:49:01", "remaining_time": "1:21:47", "throughput": 2329.39, "total_tokens": 65551968} {"current_steps": 34065, "total_steps": 40000, "loss": 0.0079, "lr": 2.66806841040782e-06, "epoch": 5.557141691818256, "percentage": 85.16, "elapsed_time": "7:49:03", "remaining_time": "1:21:43", "throughput": 2329.61, "total_tokens": 65562880} {"current_steps": 34070, "total_steps": 40000, "loss": 0.015, "lr": 2.6636571166753083e-06, "epoch": 5.557957419039074, "percentage": 85.17, "elapsed_time": "7:49:05", "remaining_time": "1:21:38", "throughput": 2329.82, "total_tokens": 65573552} {"current_steps": 34075, "total_steps": 40000, "loss": 0.0, "lr": 2.6592492674875598e-06, "epoch": 5.558773146259891, "percentage": 85.19, "elapsed_time": "7:49:07", "remaining_time": "1:21:34", "throughput": 2329.97, "total_tokens": 65582688} {"current_steps": 34080, "total_steps": 40000, "loss": 0.0877, "lr": 2.6548448635243305e-06, "epoch": 5.559588873480708, "percentage": 85.2, "elapsed_time": "7:49:09", "remaining_time": "1:21:29", "throughput": 2330.14, "total_tokens": 65592320} {"current_steps": 34085, "total_steps": 40000, "loss": 0.0007, "lr": 2.650443905464828e-06, "epoch": 5.560404600701525, "percentage": 85.21, "elapsed_time": "7:49:11", "remaining_time": "1:21:25", "throughput": 2330.32, "total_tokens": 65602112} {"current_steps": 34090, "total_steps": 40000, "loss": 0.0004, "lr": 2.646046393987739e-06, "epoch": 5.561220327922343, "percentage": 85.22, "elapsed_time": "7:49:13", "remaining_time": "1:21:20", "throughput": 2330.47, "total_tokens": 65611328} {"current_steps": 34095, "total_steps": 40000, "loss": 0.0773, "lr": 2.64165232977121e-06, "epoch": 5.56203605514316, "percentage": 85.24, "elapsed_time": "7:49:15", "remaining_time": "1:21:16", "throughput": 2330.64, "total_tokens": 65620992} {"current_steps": 34100, "total_steps": 40000, "loss": 0.0, "lr": 2.6372617134928695e-06, "epoch": 5.562851782363977, "percentage": 85.25, "elapsed_time": "7:49:17", "remaining_time": "1:21:11", "throughput": 2330.84, "total_tokens": 65631200} {"current_steps": 34105, "total_steps": 40000, "loss": 0.1856, "lr": 2.6328745458297943e-06, "epoch": 5.5636675095847945, "percentage": 85.26, "elapsed_time": "7:49:19", "remaining_time": "1:21:07", "throughput": 2330.95, "total_tokens": 65639168} {"current_steps": 34110, "total_steps": 40000, "loss": 0.0808, "lr": 2.6284908274585546e-06, "epoch": 5.5644832368056125, "percentage": 85.28, "elapsed_time": "7:49:21", "remaining_time": "1:21:02", "throughput": 2331.09, "total_tokens": 65647872} {"current_steps": 34115, "total_steps": 40000, "loss": 0.0002, "lr": 2.6241105590551595e-06, "epoch": 5.56529896402643, "percentage": 85.29, "elapsed_time": "7:49:24", "remaining_time": "1:20:58", "throughput": 2331.21, "total_tokens": 65656096} {"current_steps": 34120, "total_steps": 40000, "loss": 0.0004, "lr": 2.6197337412951105e-06, "epoch": 5.566114691247247, "percentage": 85.3, "elapsed_time": "7:49:26", "remaining_time": "1:20:53", "throughput": 2331.4, "total_tokens": 65666528} {"current_steps": 34125, "total_steps": 40000, "loss": 0.0004, "lr": 2.6153603748533705e-06, "epoch": 5.566930418468064, "percentage": 85.31, "elapsed_time": "7:49:28", "remaining_time": "1:20:49", "throughput": 2331.57, "total_tokens": 65676240} {"current_steps": 34130, "total_steps": 40000, "loss": 0.0014, "lr": 2.6109904604043585e-06, "epoch": 5.567746145688882, "percentage": 85.32, "elapsed_time": "7:49:30", "remaining_time": "1:20:45", "throughput": 2331.72, "total_tokens": 65685424} {"current_steps": 34135, "total_steps": 40000, "loss": 0.0, "lr": 2.6066239986219765e-06, "epoch": 5.568561872909699, "percentage": 85.34, "elapsed_time": "7:49:32", "remaining_time": "1:20:40", "throughput": 2331.87, "total_tokens": 65694768} {"current_steps": 34140, "total_steps": 40000, "loss": 0.1023, "lr": 2.602260990179592e-06, "epoch": 5.569377600130516, "percentage": 85.35, "elapsed_time": "7:49:34", "remaining_time": "1:20:36", "throughput": 2332.04, "total_tokens": 65704560} {"current_steps": 34145, "total_steps": 40000, "loss": 0.0, "lr": 2.5979014357500248e-06, "epoch": 5.570193327351333, "percentage": 85.36, "elapsed_time": "7:49:36", "remaining_time": "1:20:31", "throughput": 2332.21, "total_tokens": 65714336} {"current_steps": 34150, "total_steps": 40000, "loss": 0.0008, "lr": 2.5935453360055844e-06, "epoch": 5.571009054572151, "percentage": 85.38, "elapsed_time": "7:49:39", "remaining_time": "1:20:27", "throughput": 2332.38, "total_tokens": 65724176} {"current_steps": 34155, "total_steps": 40000, "loss": 0.0017, "lr": 2.5891926916180283e-06, "epoch": 5.5718247817929685, "percentage": 85.39, "elapsed_time": "7:49:41", "remaining_time": "1:20:22", "throughput": 2332.55, "total_tokens": 65734048} {"current_steps": 34160, "total_steps": 40000, "loss": 0.0, "lr": 2.5848435032585883e-06, "epoch": 5.572640509013786, "percentage": 85.4, "elapsed_time": "7:49:43", "remaining_time": "1:20:18", "throughput": 2332.7, "total_tokens": 65743312} {"current_steps": 34165, "total_steps": 40000, "loss": 0.0001, "lr": 2.58049777159797e-06, "epoch": 5.573456236234604, "percentage": 85.41, "elapsed_time": "7:49:45", "remaining_time": "1:20:13", "throughput": 2332.85, "total_tokens": 65752368} {"current_steps": 34170, "total_steps": 40000, "loss": 0.0001, "lr": 2.576155497306332e-06, "epoch": 5.574271963455421, "percentage": 85.42, "elapsed_time": "7:49:47", "remaining_time": "1:20:09", "throughput": 2333.0, "total_tokens": 65761696} {"current_steps": 34175, "total_steps": 40000, "loss": 0.0671, "lr": 2.57181668105331e-06, "epoch": 5.575087690676238, "percentage": 85.44, "elapsed_time": "7:49:49", "remaining_time": "1:20:04", "throughput": 2333.19, "total_tokens": 65771904} {"current_steps": 34180, "total_steps": 40000, "loss": 0.0005, "lr": 2.567481323508014e-06, "epoch": 5.575903417897055, "percentage": 85.45, "elapsed_time": "7:49:51", "remaining_time": "1:20:00", "throughput": 2333.32, "total_tokens": 65780624} {"current_steps": 34185, "total_steps": 40000, "loss": 0.0002, "lr": 2.5631494253389954e-06, "epoch": 5.576719145117872, "percentage": 85.46, "elapsed_time": "7:49:54", "remaining_time": "1:19:55", "throughput": 2333.5, "total_tokens": 65790640} {"current_steps": 34190, "total_steps": 40000, "loss": 0.0742, "lr": 2.5588209872142997e-06, "epoch": 5.57753487233869, "percentage": 85.47, "elapsed_time": "7:49:56", "remaining_time": "1:19:51", "throughput": 2333.69, "total_tokens": 65800976} {"current_steps": 34195, "total_steps": 40000, "loss": 0.0007, "lr": 2.5544960098014186e-06, "epoch": 5.578350599559507, "percentage": 85.49, "elapsed_time": "7:49:58", "remaining_time": "1:19:46", "throughput": 2333.85, "total_tokens": 65810448} {"current_steps": 34200, "total_steps": 40000, "loss": 0.0002, "lr": 2.550174493767318e-06, "epoch": 5.579166326780324, "percentage": 85.5, "elapsed_time": "7:50:00", "remaining_time": "1:19:42", "throughput": 2334.0, "total_tokens": 65819600} {"current_steps": 34200, "total_steps": 40000, "eval_loss": 0.4232428967952728, "epoch": 5.579166326780324, "percentage": 85.5, "elapsed_time": "7:51:21", "remaining_time": "1:19:56", "throughput": 2327.34, "total_tokens": 65819600} {"current_steps": 34205, "total_steps": 40000, "loss": 0.0001, "lr": 2.545856439778438e-06, "epoch": 5.5799820540011424, "percentage": 85.51, "elapsed_time": "7:51:25", "remaining_time": "1:19:52", "throughput": 2327.33, "total_tokens": 65829056} {"current_steps": 34210, "total_steps": 40000, "loss": 0.0, "lr": 2.541541848500667e-06, "epoch": 5.58079778122196, "percentage": 85.52, "elapsed_time": "7:51:27", "remaining_time": "1:19:47", "throughput": 2327.52, "total_tokens": 65839104} {"current_steps": 34215, "total_steps": 40000, "loss": 0.0, "lr": 2.5372307205993733e-06, "epoch": 5.581613508442777, "percentage": 85.54, "elapsed_time": "7:51:29", "remaining_time": "1:19:43", "throughput": 2327.66, "total_tokens": 65848048} {"current_steps": 34220, "total_steps": 40000, "loss": 0.0, "lr": 2.5329230567393917e-06, "epoch": 5.582429235663594, "percentage": 85.55, "elapsed_time": "7:51:31", "remaining_time": "1:19:38", "throughput": 2327.82, "total_tokens": 65857312} {"current_steps": 34225, "total_steps": 40000, "loss": 0.0001, "lr": 2.5286188575850164e-06, "epoch": 5.583244962884411, "percentage": 85.56, "elapsed_time": "7:51:33", "remaining_time": "1:19:34", "throughput": 2328.01, "total_tokens": 65867664} {"current_steps": 34230, "total_steps": 40000, "loss": 0.0005, "lr": 2.5243181237999984e-06, "epoch": 5.584060690105229, "percentage": 85.58, "elapsed_time": "7:51:35", "remaining_time": "1:19:29", "throughput": 2328.18, "total_tokens": 65877312} {"current_steps": 34235, "total_steps": 40000, "loss": 0.0006, "lr": 2.520020856047578e-06, "epoch": 5.584876417326046, "percentage": 85.59, "elapsed_time": "7:51:37", "remaining_time": "1:19:25", "throughput": 2328.33, "total_tokens": 65886320} {"current_steps": 34240, "total_steps": 40000, "loss": 0.0003, "lr": 2.515727054990438e-06, "epoch": 5.585692144546863, "percentage": 85.6, "elapsed_time": "7:51:39", "remaining_time": "1:19:20", "throughput": 2328.56, "total_tokens": 65897552} {"current_steps": 34245, "total_steps": 40000, "loss": 0.0007, "lr": 2.511436721290747e-06, "epoch": 5.586507871767681, "percentage": 85.61, "elapsed_time": "7:51:41", "remaining_time": "1:19:16", "throughput": 2328.74, "total_tokens": 65907616} {"current_steps": 34250, "total_steps": 40000, "loss": 0.0, "lr": 2.5071498556101164e-06, "epoch": 5.587323598988498, "percentage": 85.62, "elapsed_time": "7:51:43", "remaining_time": "1:19:11", "throughput": 2328.89, "total_tokens": 65916528} {"current_steps": 34255, "total_steps": 40000, "loss": 0.0001, "lr": 2.5028664586096485e-06, "epoch": 5.5881393262093155, "percentage": 85.64, "elapsed_time": "7:51:45", "remaining_time": "1:19:07", "throughput": 2329.04, "total_tokens": 65925696} {"current_steps": 34260, "total_steps": 40000, "loss": 0.0, "lr": 2.498586530949881e-06, "epoch": 5.588955053430133, "percentage": 85.65, "elapsed_time": "7:51:48", "remaining_time": "1:19:02", "throughput": 2329.23, "total_tokens": 65935888} {"current_steps": 34265, "total_steps": 40000, "loss": 0.0001, "lr": 2.4943100732908427e-06, "epoch": 5.589770780650951, "percentage": 85.66, "elapsed_time": "7:51:50", "remaining_time": "1:18:58", "throughput": 2329.32, "total_tokens": 65943296} {"current_steps": 34270, "total_steps": 40000, "loss": 0.0005, "lr": 2.4900370862920188e-06, "epoch": 5.590586507871768, "percentage": 85.67, "elapsed_time": "7:51:52", "remaining_time": "1:18:53", "throughput": 2329.51, "total_tokens": 65953344} {"current_steps": 34275, "total_steps": 40000, "loss": 0.2063, "lr": 2.4857675706123518e-06, "epoch": 5.591402235092585, "percentage": 85.69, "elapsed_time": "7:51:54", "remaining_time": "1:18:49", "throughput": 2329.67, "total_tokens": 65962736} {"current_steps": 34280, "total_steps": 40000, "loss": 0.0001, "lr": 2.4815015269102543e-06, "epoch": 5.592217962313402, "percentage": 85.7, "elapsed_time": "7:51:56", "remaining_time": "1:18:44", "throughput": 2329.81, "total_tokens": 65971808} {"current_steps": 34285, "total_steps": 40000, "loss": 0.0001, "lr": 2.477238955843611e-06, "epoch": 5.59303368953422, "percentage": 85.71, "elapsed_time": "7:51:58", "remaining_time": "1:18:40", "throughput": 2329.95, "total_tokens": 65980784} {"current_steps": 34290, "total_steps": 40000, "loss": 0.0087, "lr": 2.4729798580697573e-06, "epoch": 5.593849416755037, "percentage": 85.72, "elapsed_time": "7:52:00", "remaining_time": "1:18:35", "throughput": 2330.14, "total_tokens": 65991104} {"current_steps": 34295, "total_steps": 40000, "loss": 0.043, "lr": 2.4687242342455034e-06, "epoch": 5.594665143975854, "percentage": 85.74, "elapsed_time": "7:52:02", "remaining_time": "1:18:31", "throughput": 2330.29, "total_tokens": 66000288} {"current_steps": 34300, "total_steps": 40000, "loss": 0.0, "lr": 2.4644720850271196e-06, "epoch": 5.5954808711966715, "percentage": 85.75, "elapsed_time": "7:52:04", "remaining_time": "1:18:27", "throughput": 2330.45, "total_tokens": 66009760} {"current_steps": 34305, "total_steps": 40000, "loss": 0.0015, "lr": 2.4602234110703364e-06, "epoch": 5.5962965984174895, "percentage": 85.76, "elapsed_time": "7:52:07", "remaining_time": "1:18:22", "throughput": 2330.61, "total_tokens": 66019488} {"current_steps": 34310, "total_steps": 40000, "loss": 0.0, "lr": 2.4559782130303576e-06, "epoch": 5.597112325638307, "percentage": 85.78, "elapsed_time": "7:52:09", "remaining_time": "1:18:18", "throughput": 2330.76, "total_tokens": 66028656} {"current_steps": 34315, "total_steps": 40000, "loss": 0.0012, "lr": 2.451736491561843e-06, "epoch": 5.597928052859124, "percentage": 85.79, "elapsed_time": "7:52:11", "remaining_time": "1:18:13", "throughput": 2330.9, "total_tokens": 66037616} {"current_steps": 34320, "total_steps": 40000, "loss": 0.0001, "lr": 2.4474982473189163e-06, "epoch": 5.598743780079941, "percentage": 85.8, "elapsed_time": "7:52:13", "remaining_time": "1:18:09", "throughput": 2331.1, "total_tokens": 66048224} {"current_steps": 34325, "total_steps": 40000, "loss": 0.0529, "lr": 2.4432634809551796e-06, "epoch": 5.599559507300759, "percentage": 85.81, "elapsed_time": "7:52:15", "remaining_time": "1:18:04", "throughput": 2331.22, "total_tokens": 66056656} {"current_steps": 34330, "total_steps": 40000, "loss": 0.0002, "lr": 2.439032193123675e-06, "epoch": 5.600375234521576, "percentage": 85.82, "elapsed_time": "7:52:17", "remaining_time": "1:18:00", "throughput": 2331.41, "total_tokens": 66067040} {"current_steps": 34335, "total_steps": 40000, "loss": 0.0609, "lr": 2.4348043844769297e-06, "epoch": 5.601190961742393, "percentage": 85.84, "elapsed_time": "7:52:19", "remaining_time": "1:17:55", "throughput": 2331.57, "total_tokens": 66076816} {"current_steps": 34340, "total_steps": 40000, "loss": 0.0, "lr": 2.4305800556669146e-06, "epoch": 5.602006688963211, "percentage": 85.85, "elapsed_time": "7:52:22", "remaining_time": "1:17:51", "throughput": 2331.7, "total_tokens": 66085328} {"current_steps": 34345, "total_steps": 40000, "loss": 0.0001, "lr": 2.426359207345083e-06, "epoch": 5.602822416184028, "percentage": 85.86, "elapsed_time": "7:52:24", "remaining_time": "1:17:46", "throughput": 2331.88, "total_tokens": 66095408} {"current_steps": 34350, "total_steps": 40000, "loss": 0.0001, "lr": 2.4221418401623396e-06, "epoch": 5.6036381434048455, "percentage": 85.88, "elapsed_time": "7:52:26", "remaining_time": "1:17:42", "throughput": 2332.02, "total_tokens": 66104544} {"current_steps": 34355, "total_steps": 40000, "loss": 0.0004, "lr": 2.4179279547690557e-06, "epoch": 5.604453870625663, "percentage": 85.89, "elapsed_time": "7:52:28", "remaining_time": "1:17:38", "throughput": 2332.17, "total_tokens": 66113792} {"current_steps": 34360, "total_steps": 40000, "loss": 0.0, "lr": 2.413717551815062e-06, "epoch": 5.60526959784648, "percentage": 85.9, "elapsed_time": "7:52:30", "remaining_time": "1:17:33", "throughput": 2332.32, "total_tokens": 66122992} {"current_steps": 34365, "total_steps": 40000, "loss": 0.0003, "lr": 2.409510631949666e-06, "epoch": 5.606085325067298, "percentage": 85.91, "elapsed_time": "7:52:32", "remaining_time": "1:17:29", "throughput": 2332.49, "total_tokens": 66132992} {"current_steps": 34370, "total_steps": 40000, "loss": 0.1605, "lr": 2.405307195821618e-06, "epoch": 5.606901052288115, "percentage": 85.92, "elapsed_time": "7:52:35", "remaining_time": "1:17:24", "throughput": 2332.67, "total_tokens": 66142992} {"current_steps": 34375, "total_steps": 40000, "loss": 0.063, "lr": 2.4011072440791372e-06, "epoch": 5.607716779508932, "percentage": 85.94, "elapsed_time": "7:52:37", "remaining_time": "1:17:20", "throughput": 2332.81, "total_tokens": 66151680} {"current_steps": 34380, "total_steps": 40000, "loss": 0.0, "lr": 2.3969107773699233e-06, "epoch": 5.60853250672975, "percentage": 85.95, "elapsed_time": "7:52:39", "remaining_time": "1:17:15", "throughput": 2332.98, "total_tokens": 66161232} {"current_steps": 34385, "total_steps": 40000, "loss": 0.0001, "lr": 2.3927177963411096e-06, "epoch": 5.609348233950567, "percentage": 85.96, "elapsed_time": "7:52:41", "remaining_time": "1:17:11", "throughput": 2333.15, "total_tokens": 66171040} {"current_steps": 34390, "total_steps": 40000, "loss": 0.0001, "lr": 2.3885283016393144e-06, "epoch": 5.610163961171384, "percentage": 85.97, "elapsed_time": "7:52:43", "remaining_time": "1:17:06", "throughput": 2333.34, "total_tokens": 66181152} {"current_steps": 34395, "total_steps": 40000, "loss": 0.0004, "lr": 2.3843422939106076e-06, "epoch": 5.610979688392201, "percentage": 85.99, "elapsed_time": "7:52:45", "remaining_time": "1:17:02", "throughput": 2333.5, "total_tokens": 66190528} {"current_steps": 34400, "total_steps": 40000, "loss": 0.0002, "lr": 2.380159773800525e-06, "epoch": 5.6117954156130185, "percentage": 86.0, "elapsed_time": "7:52:47", "remaining_time": "1:16:57", "throughput": 2333.64, "total_tokens": 66199376} {"current_steps": 34400, "total_steps": 40000, "eval_loss": 0.43207570910453796, "epoch": 5.6117954156130185, "percentage": 86.0, "elapsed_time": "7:54:08", "remaining_time": "1:17:11", "throughput": 2327.01, "total_tokens": 66199376} {"current_steps": 34405, "total_steps": 40000, "loss": 0.1126, "lr": 2.3759807419540675e-06, "epoch": 5.6126111428338366, "percentage": 86.01, "elapsed_time": "7:54:12", "remaining_time": "1:17:06", "throughput": 2327.01, "total_tokens": 66208752} {"current_steps": 34410, "total_steps": 40000, "loss": 0.0003, "lr": 2.3718051990156835e-06, "epoch": 5.613426870054654, "percentage": 86.02, "elapsed_time": "7:54:14", "remaining_time": "1:17:02", "throughput": 2327.13, "total_tokens": 66217152} {"current_steps": 34415, "total_steps": 40000, "loss": 0.0005, "lr": 2.367633145629311e-06, "epoch": 5.614242597275471, "percentage": 86.04, "elapsed_time": "7:54:16", "remaining_time": "1:16:58", "throughput": 2327.33, "total_tokens": 66227520} {"current_steps": 34420, "total_steps": 40000, "loss": 0.0001, "lr": 2.363464582438316e-06, "epoch": 5.615058324496289, "percentage": 86.05, "elapsed_time": "7:54:18", "remaining_time": "1:16:53", "throughput": 2327.49, "total_tokens": 66236816} {"current_steps": 34425, "total_steps": 40000, "loss": 0.0001, "lr": 2.3592995100855526e-06, "epoch": 5.615874051717106, "percentage": 86.06, "elapsed_time": "7:54:20", "remaining_time": "1:16:49", "throughput": 2327.65, "total_tokens": 66246416} {"current_steps": 34430, "total_steps": 40000, "loss": 0.0, "lr": 2.3551379292133273e-06, "epoch": 5.616689778937923, "percentage": 86.08, "elapsed_time": "7:54:22", "remaining_time": "1:16:44", "throughput": 2327.82, "total_tokens": 66255920} {"current_steps": 34435, "total_steps": 40000, "loss": 0.0001, "lr": 2.3509798404634047e-06, "epoch": 5.61750550615874, "percentage": 86.09, "elapsed_time": "7:54:24", "remaining_time": "1:16:40", "throughput": 2328.0, "total_tokens": 66265856} {"current_steps": 34440, "total_steps": 40000, "loss": 0.0006, "lr": 2.346825244477019e-06, "epoch": 5.618321233379558, "percentage": 86.1, "elapsed_time": "7:54:26", "remaining_time": "1:16:35", "throughput": 2328.14, "total_tokens": 66274656} {"current_steps": 34445, "total_steps": 40000, "loss": 0.0003, "lr": 2.3426741418948545e-06, "epoch": 5.619136960600375, "percentage": 86.11, "elapsed_time": "7:54:28", "remaining_time": "1:16:31", "throughput": 2328.35, "total_tokens": 66285424} {"current_steps": 34450, "total_steps": 40000, "loss": 0.0001, "lr": 2.3385265333570715e-06, "epoch": 5.6199526878211925, "percentage": 86.12, "elapsed_time": "7:54:30", "remaining_time": "1:16:26", "throughput": 2328.53, "total_tokens": 66295456} {"current_steps": 34455, "total_steps": 40000, "loss": 0.0001, "lr": 2.334382419503278e-06, "epoch": 5.62076841504201, "percentage": 86.14, "elapsed_time": "7:54:33", "remaining_time": "1:16:22", "throughput": 2328.69, "total_tokens": 66304928} {"current_steps": 34460, "total_steps": 40000, "loss": 0.0235, "lr": 2.3302418009725465e-06, "epoch": 5.621584142262828, "percentage": 86.15, "elapsed_time": "7:54:35", "remaining_time": "1:16:17", "throughput": 2328.84, "total_tokens": 66314000} {"current_steps": 34465, "total_steps": 40000, "loss": 0.0355, "lr": 2.326104678403415e-06, "epoch": 5.622399869483645, "percentage": 86.16, "elapsed_time": "7:54:37", "remaining_time": "1:16:13", "throughput": 2329.04, "total_tokens": 66324448} {"current_steps": 34470, "total_steps": 40000, "loss": 0.0002, "lr": 2.321971052433883e-06, "epoch": 5.623215596704462, "percentage": 86.17, "elapsed_time": "7:54:39", "remaining_time": "1:16:08", "throughput": 2329.21, "total_tokens": 66334256} {"current_steps": 34475, "total_steps": 40000, "loss": 0.0001, "lr": 2.3178409237014004e-06, "epoch": 5.624031323925279, "percentage": 86.19, "elapsed_time": "7:54:41", "remaining_time": "1:16:04", "throughput": 2329.37, "total_tokens": 66343728} {"current_steps": 34480, "total_steps": 40000, "loss": 0.0069, "lr": 2.313714292842889e-06, "epoch": 5.624847051146097, "percentage": 86.2, "elapsed_time": "7:54:43", "remaining_time": "1:16:00", "throughput": 2329.58, "total_tokens": 66354800} {"current_steps": 34485, "total_steps": 40000, "loss": 0.0, "lr": 2.309591160494734e-06, "epoch": 5.625662778366914, "percentage": 86.21, "elapsed_time": "7:54:45", "remaining_time": "1:15:55", "throughput": 2329.74, "total_tokens": 66364256} {"current_steps": 34490, "total_steps": 40000, "loss": 0.0, "lr": 2.305471527292763e-06, "epoch": 5.626478505587731, "percentage": 86.22, "elapsed_time": "7:54:47", "remaining_time": "1:15:51", "throughput": 2329.83, "total_tokens": 66371792} {"current_steps": 34495, "total_steps": 40000, "loss": 0.0, "lr": 2.3013553938722817e-06, "epoch": 5.6272942328085485, "percentage": 86.24, "elapsed_time": "7:54:50", "remaining_time": "1:15:46", "throughput": 2330.0, "total_tokens": 66381760} {"current_steps": 34500, "total_steps": 40000, "loss": 0.1563, "lr": 2.297242760868043e-06, "epoch": 5.6281099600293665, "percentage": 86.25, "elapsed_time": "7:54:52", "remaining_time": "1:15:42", "throughput": 2330.16, "total_tokens": 66391184} {"current_steps": 34505, "total_steps": 40000, "loss": 0.0001, "lr": 2.2931336289142735e-06, "epoch": 5.628925687250184, "percentage": 86.26, "elapsed_time": "7:54:54", "remaining_time": "1:15:37", "throughput": 2330.32, "total_tokens": 66400816} {"current_steps": 34510, "total_steps": 40000, "loss": 0.0001, "lr": 2.289027998644655e-06, "epoch": 5.629741414471001, "percentage": 86.28, "elapsed_time": "7:54:56", "remaining_time": "1:15:33", "throughput": 2330.43, "total_tokens": 66408848} {"current_steps": 34515, "total_steps": 40000, "loss": 0.0001, "lr": 2.2849258706923228e-06, "epoch": 5.630557141691818, "percentage": 86.29, "elapsed_time": "7:54:58", "remaining_time": "1:15:28", "throughput": 2330.57, "total_tokens": 66417792} {"current_steps": 34520, "total_steps": 40000, "loss": 0.0, "lr": 2.2808272456898705e-06, "epoch": 5.631372868912636, "percentage": 86.3, "elapsed_time": "7:55:00", "remaining_time": "1:15:24", "throughput": 2330.73, "total_tokens": 66427408} {"current_steps": 34525, "total_steps": 40000, "loss": 0.0004, "lr": 2.2767321242693707e-06, "epoch": 5.632188596133453, "percentage": 86.31, "elapsed_time": "7:55:02", "remaining_time": "1:15:20", "throughput": 2330.92, "total_tokens": 66437824} {"current_steps": 34530, "total_steps": 40000, "loss": 0.0002, "lr": 2.272640507062329e-06, "epoch": 5.63300432335427, "percentage": 86.33, "elapsed_time": "7:55:05", "remaining_time": "1:15:15", "throughput": 2331.04, "total_tokens": 66446448} {"current_steps": 34535, "total_steps": 40000, "loss": 0.0, "lr": 2.2685523946997382e-06, "epoch": 5.633820050575087, "percentage": 86.34, "elapsed_time": "7:55:07", "remaining_time": "1:15:11", "throughput": 2331.25, "total_tokens": 66457488} {"current_steps": 34540, "total_steps": 40000, "loss": 0.0002, "lr": 2.2644677878120245e-06, "epoch": 5.634635777795905, "percentage": 86.35, "elapsed_time": "7:55:09", "remaining_time": "1:15:06", "throughput": 2331.44, "total_tokens": 66467744} {"current_steps": 34545, "total_steps": 40000, "loss": 0.0001, "lr": 2.2603866870290897e-06, "epoch": 5.635451505016722, "percentage": 86.36, "elapsed_time": "7:55:11", "remaining_time": "1:15:02", "throughput": 2331.62, "total_tokens": 66477776} {"current_steps": 34550, "total_steps": 40000, "loss": 0.0, "lr": 2.256309092980294e-06, "epoch": 5.63626723223754, "percentage": 86.38, "elapsed_time": "7:55:13", "remaining_time": "1:14:57", "throughput": 2331.82, "total_tokens": 66488608} {"current_steps": 34555, "total_steps": 40000, "loss": 0.0001, "lr": 2.252235006294448e-06, "epoch": 5.637082959458358, "percentage": 86.39, "elapsed_time": "7:55:15", "remaining_time": "1:14:53", "throughput": 2331.99, "total_tokens": 66498544} {"current_steps": 34560, "total_steps": 40000, "loss": 0.0001, "lr": 2.2481644275998333e-06, "epoch": 5.637898686679175, "percentage": 86.4, "elapsed_time": "7:55:17", "remaining_time": "1:14:48", "throughput": 2332.14, "total_tokens": 66507776} {"current_steps": 34565, "total_steps": 40000, "loss": 0.0001, "lr": 2.2440973575241832e-06, "epoch": 5.638714413899992, "percentage": 86.41, "elapsed_time": "7:55:20", "remaining_time": "1:14:44", "throughput": 2332.31, "total_tokens": 66517792} {"current_steps": 34570, "total_steps": 40000, "loss": 0.0001, "lr": 2.240033796694685e-06, "epoch": 5.639530141120809, "percentage": 86.42, "elapsed_time": "7:55:22", "remaining_time": "1:14:40", "throughput": 2332.5, "total_tokens": 66528128} {"current_steps": 34575, "total_steps": 40000, "loss": 0.165, "lr": 2.235973745737999e-06, "epoch": 5.640345868341626, "percentage": 86.44, "elapsed_time": "7:55:24", "remaining_time": "1:14:35", "throughput": 2332.67, "total_tokens": 66537984} {"current_steps": 34580, "total_steps": 40000, "loss": 0.0, "lr": 2.2319172052802263e-06, "epoch": 5.641161595562444, "percentage": 86.45, "elapsed_time": "7:55:26", "remaining_time": "1:14:31", "throughput": 2332.85, "total_tokens": 66547904} {"current_steps": 34585, "total_steps": 40000, "loss": 0.0001, "lr": 2.2278641759469477e-06, "epoch": 5.641977322783261, "percentage": 86.46, "elapsed_time": "7:55:28", "remaining_time": "1:14:26", "throughput": 2332.99, "total_tokens": 66556720} {"current_steps": 34590, "total_steps": 40000, "loss": 0.0002, "lr": 2.2238146583631825e-06, "epoch": 5.642793050004078, "percentage": 86.48, "elapsed_time": "7:55:30", "remaining_time": "1:14:22", "throughput": 2333.12, "total_tokens": 66565488} {"current_steps": 34595, "total_steps": 40000, "loss": 0.0004, "lr": 2.2197686531534256e-06, "epoch": 5.643608777224896, "percentage": 86.49, "elapsed_time": "7:55:32", "remaining_time": "1:14:17", "throughput": 2333.26, "total_tokens": 66574256} {"current_steps": 34600, "total_steps": 40000, "loss": 0.0015, "lr": 2.2157261609416087e-06, "epoch": 5.6444245044457135, "percentage": 86.5, "elapsed_time": "7:55:34", "remaining_time": "1:14:13", "throughput": 2333.43, "total_tokens": 66583936} {"current_steps": 34600, "total_steps": 40000, "eval_loss": 0.43429622054100037, "epoch": 5.6444245044457135, "percentage": 86.5, "elapsed_time": "7:56:55", "remaining_time": "1:14:26", "throughput": 2326.84, "total_tokens": 66583936} {"current_steps": 34605, "total_steps": 40000, "loss": 0.0734, "lr": 2.211687182351149e-06, "epoch": 5.645240231666531, "percentage": 86.51, "elapsed_time": "7:56:59", "remaining_time": "1:14:21", "throughput": 2326.89, "total_tokens": 66594032} {"current_steps": 34610, "total_steps": 40000, "loss": 0.0096, "lr": 2.2076517180048993e-06, "epoch": 5.646055958887348, "percentage": 86.52, "elapsed_time": "7:57:01", "remaining_time": "1:14:17", "throughput": 2327.07, "total_tokens": 66603968} {"current_steps": 34615, "total_steps": 40000, "loss": 0.0876, "lr": 2.2036197685251834e-06, "epoch": 5.646871686108166, "percentage": 86.54, "elapsed_time": "7:57:03", "remaining_time": "1:14:12", "throughput": 2327.23, "total_tokens": 66613360} {"current_steps": 34620, "total_steps": 40000, "loss": 0.0449, "lr": 2.199591334533771e-06, "epoch": 5.647687413328983, "percentage": 86.55, "elapsed_time": "7:57:05", "remaining_time": "1:14:08", "throughput": 2327.38, "total_tokens": 66622496} {"current_steps": 34625, "total_steps": 40000, "loss": 0.0017, "lr": 2.1955664166519036e-06, "epoch": 5.6485031405498, "percentage": 86.56, "elapsed_time": "7:57:07", "remaining_time": "1:14:03", "throughput": 2327.56, "total_tokens": 66632544} {"current_steps": 34630, "total_steps": 40000, "loss": 0.0001, "lr": 2.1915450155002793e-06, "epoch": 5.649318867770617, "percentage": 86.58, "elapsed_time": "7:57:09", "remaining_time": "1:13:59", "throughput": 2327.75, "total_tokens": 66642688} {"current_steps": 34635, "total_steps": 40000, "loss": 0.0001, "lr": 2.187527131699038e-06, "epoch": 5.650134594991435, "percentage": 86.59, "elapsed_time": "7:57:11", "remaining_time": "1:13:55", "throughput": 2327.94, "total_tokens": 66653168} {"current_steps": 34640, "total_steps": 40000, "loss": 0.0005, "lr": 2.18351276586779e-06, "epoch": 5.650950322212252, "percentage": 86.6, "elapsed_time": "7:57:13", "remaining_time": "1:13:50", "throughput": 2328.11, "total_tokens": 66662848} {"current_steps": 34645, "total_steps": 40000, "loss": 0.0, "lr": 2.1795019186256092e-06, "epoch": 5.6517660494330695, "percentage": 86.61, "elapsed_time": "7:57:15", "remaining_time": "1:13:46", "throughput": 2328.28, "total_tokens": 66672528} {"current_steps": 34650, "total_steps": 40000, "loss": 0.0, "lr": 2.1754945905910094e-06, "epoch": 5.652581776653887, "percentage": 86.62, "elapsed_time": "7:57:17", "remaining_time": "1:13:41", "throughput": 2328.46, "total_tokens": 66682320} {"current_steps": 34655, "total_steps": 40000, "loss": 0.0067, "lr": 2.171490782381977e-06, "epoch": 5.653397503874705, "percentage": 86.64, "elapsed_time": "7:57:20", "remaining_time": "1:13:37", "throughput": 2328.61, "total_tokens": 66691520} {"current_steps": 34660, "total_steps": 40000, "loss": 0.0, "lr": 2.1674904946159425e-06, "epoch": 5.654213231095522, "percentage": 86.65, "elapsed_time": "7:57:22", "remaining_time": "1:13:32", "throughput": 2328.75, "total_tokens": 66700416} {"current_steps": 34665, "total_steps": 40000, "loss": 0.0001, "lr": 2.16349372790981e-06, "epoch": 5.655028958316339, "percentage": 86.66, "elapsed_time": "7:57:24", "remaining_time": "1:13:28", "throughput": 2328.93, "total_tokens": 66710224} {"current_steps": 34670, "total_steps": 40000, "loss": 0.0163, "lr": 2.159500482879928e-06, "epoch": 5.655844685537156, "percentage": 86.67, "elapsed_time": "7:57:26", "remaining_time": "1:13:23", "throughput": 2329.07, "total_tokens": 66719072} {"current_steps": 34675, "total_steps": 40000, "loss": 0.0001, "lr": 2.155510760142096e-06, "epoch": 5.656660412757974, "percentage": 86.69, "elapsed_time": "7:57:28", "remaining_time": "1:13:19", "throughput": 2329.22, "total_tokens": 66728400} {"current_steps": 34680, "total_steps": 40000, "loss": 0.0003, "lr": 2.151524560311588e-06, "epoch": 5.657476139978791, "percentage": 86.7, "elapsed_time": "7:57:30", "remaining_time": "1:13:15", "throughput": 2329.38, "total_tokens": 66737824} {"current_steps": 34685, "total_steps": 40000, "loss": 0.0, "lr": 2.147541884003129e-06, "epoch": 5.658291867199608, "percentage": 86.71, "elapsed_time": "7:57:32", "remaining_time": "1:13:10", "throughput": 2329.51, "total_tokens": 66746160} {"current_steps": 34690, "total_steps": 40000, "loss": 0.0003, "lr": 2.1435627318308895e-06, "epoch": 5.659107594420425, "percentage": 86.72, "elapsed_time": "7:57:34", "remaining_time": "1:13:06", "throughput": 2329.7, "total_tokens": 66756640} {"current_steps": 34695, "total_steps": 40000, "loss": 0.0001, "lr": 2.139587104408511e-06, "epoch": 5.6599233216412435, "percentage": 86.74, "elapsed_time": "7:57:36", "remaining_time": "1:13:01", "throughput": 2329.88, "total_tokens": 66766496} {"current_steps": 34700, "total_steps": 40000, "loss": 0.0001, "lr": 2.1356150023490783e-06, "epoch": 5.660739048862061, "percentage": 86.75, "elapsed_time": "7:57:38", "remaining_time": "1:12:57", "throughput": 2330.03, "total_tokens": 66775520} {"current_steps": 34705, "total_steps": 40000, "loss": 0.0, "lr": 2.1316464262651464e-06, "epoch": 5.661554776082878, "percentage": 86.76, "elapsed_time": "7:57:40", "remaining_time": "1:12:52", "throughput": 2330.24, "total_tokens": 66786624} {"current_steps": 34710, "total_steps": 40000, "loss": 0.0002, "lr": 2.1276813767687224e-06, "epoch": 5.662370503303695, "percentage": 86.78, "elapsed_time": "7:57:42", "remaining_time": "1:12:48", "throughput": 2330.41, "total_tokens": 66796528} {"current_steps": 34715, "total_steps": 40000, "loss": 0.0003, "lr": 2.123719854471254e-06, "epoch": 5.663186230524513, "percentage": 86.79, "elapsed_time": "7:57:45", "remaining_time": "1:12:43", "throughput": 2330.58, "total_tokens": 66806336} {"current_steps": 34720, "total_steps": 40000, "loss": 0.0, "lr": 2.119761859983668e-06, "epoch": 5.66400195774533, "percentage": 86.8, "elapsed_time": "7:57:47", "remaining_time": "1:12:39", "throughput": 2330.73, "total_tokens": 66815728} {"current_steps": 34725, "total_steps": 40000, "loss": 0.0004, "lr": 2.1158073939163386e-06, "epoch": 5.664817684966147, "percentage": 86.81, "elapsed_time": "7:57:49", "remaining_time": "1:12:35", "throughput": 2330.9, "total_tokens": 66825392} {"current_steps": 34730, "total_steps": 40000, "loss": 0.0629, "lr": 2.111856456879088e-06, "epoch": 5.665633412186965, "percentage": 86.83, "elapsed_time": "7:57:51", "remaining_time": "1:12:30", "throughput": 2331.05, "total_tokens": 66834624} {"current_steps": 34735, "total_steps": 40000, "loss": 0.0001, "lr": 2.1079090494811993e-06, "epoch": 5.666449139407782, "percentage": 86.84, "elapsed_time": "7:57:53", "remaining_time": "1:12:26", "throughput": 2331.19, "total_tokens": 66843856} {"current_steps": 34740, "total_steps": 40000, "loss": 0.0003, "lr": 2.103965172331418e-06, "epoch": 5.667264866628599, "percentage": 86.85, "elapsed_time": "7:57:55", "remaining_time": "1:12:21", "throughput": 2331.34, "total_tokens": 66853040} {"current_steps": 34745, "total_steps": 40000, "loss": 0.0, "lr": 2.100024826037933e-06, "epoch": 5.6680805938494165, "percentage": 86.86, "elapsed_time": "7:57:57", "remaining_time": "1:12:17", "throughput": 2331.49, "total_tokens": 66862240} {"current_steps": 34750, "total_steps": 40000, "loss": 0.1752, "lr": 2.0960880112084027e-06, "epoch": 5.668896321070234, "percentage": 86.88, "elapsed_time": "7:58:00", "remaining_time": "1:12:12", "throughput": 2331.66, "total_tokens": 66872128} {"current_steps": 34755, "total_steps": 40000, "loss": 0.0001, "lr": 2.092154728449927e-06, "epoch": 5.669712048291052, "percentage": 86.89, "elapsed_time": "7:58:02", "remaining_time": "1:12:08", "throughput": 2331.8, "total_tokens": 66881344} {"current_steps": 34760, "total_steps": 40000, "loss": 0.0, "lr": 2.0882249783690687e-06, "epoch": 5.670527775511869, "percentage": 86.9, "elapsed_time": "7:58:04", "remaining_time": "1:12:04", "throughput": 2331.95, "total_tokens": 66890656} {"current_steps": 34765, "total_steps": 40000, "loss": 0.1042, "lr": 2.084298761571851e-06, "epoch": 5.671343502732686, "percentage": 86.91, "elapsed_time": "7:58:06", "remaining_time": "1:11:59", "throughput": 2332.13, "total_tokens": 66900720} {"current_steps": 34770, "total_steps": 40000, "loss": 0.0, "lr": 2.080376078663737e-06, "epoch": 5.672159229953504, "percentage": 86.92, "elapsed_time": "7:58:08", "remaining_time": "1:11:55", "throughput": 2332.3, "total_tokens": 66910624} {"current_steps": 34775, "total_steps": 40000, "loss": 0.1396, "lr": 2.0764569302496593e-06, "epoch": 5.672974957174321, "percentage": 86.94, "elapsed_time": "7:58:10", "remaining_time": "1:11:50", "throughput": 2332.5, "total_tokens": 66921440} {"current_steps": 34780, "total_steps": 40000, "loss": 0.045, "lr": 2.0725413169339957e-06, "epoch": 5.673790684395138, "percentage": 86.95, "elapsed_time": "7:58:12", "remaining_time": "1:11:46", "throughput": 2332.69, "total_tokens": 66931808} {"current_steps": 34785, "total_steps": 40000, "loss": 0.0, "lr": 2.068629239320588e-06, "epoch": 5.674606411615955, "percentage": 86.96, "elapsed_time": "7:58:15", "remaining_time": "1:11:41", "throughput": 2332.82, "total_tokens": 66940576} {"current_steps": 34790, "total_steps": 40000, "loss": 0.0014, "lr": 2.064720698012726e-06, "epoch": 5.6754221388367725, "percentage": 86.98, "elapsed_time": "7:58:17", "remaining_time": "1:11:37", "throughput": 2332.99, "total_tokens": 66950304} {"current_steps": 34795, "total_steps": 40000, "loss": 0.0, "lr": 2.0608156936131522e-06, "epoch": 5.6762378660575905, "percentage": 86.99, "elapsed_time": "7:58:19", "remaining_time": "1:11:33", "throughput": 2333.14, "total_tokens": 66959792} {"current_steps": 34800, "total_steps": 40000, "loss": 0.0, "lr": 2.056914226724074e-06, "epoch": 5.677053593278408, "percentage": 87.0, "elapsed_time": "7:58:21", "remaining_time": "1:11:28", "throughput": 2333.29, "total_tokens": 66968960} {"current_steps": 34800, "total_steps": 40000, "eval_loss": 0.4236864447593689, "epoch": 5.677053593278408, "percentage": 87.0, "elapsed_time": "7:59:42", "remaining_time": "1:11:40", "throughput": 2326.75, "total_tokens": 66968960} {"current_steps": 34805, "total_steps": 40000, "loss": 0.0191, "lr": 2.0530162979471385e-06, "epoch": 5.677869320499225, "percentage": 87.01, "elapsed_time": "7:59:46", "remaining_time": "1:11:36", "throughput": 2326.77, "total_tokens": 66978448} {"current_steps": 34810, "total_steps": 40000, "loss": 0.019, "lr": 2.0491219078834667e-06, "epoch": 5.678685047720043, "percentage": 87.02, "elapsed_time": "7:59:48", "remaining_time": "1:11:32", "throughput": 2327.01, "total_tokens": 66990528} {"current_steps": 34815, "total_steps": 40000, "loss": 0.0794, "lr": 2.045231057133612e-06, "epoch": 5.67950077494086, "percentage": 87.04, "elapsed_time": "7:59:50", "remaining_time": "1:11:27", "throughput": 2327.18, "total_tokens": 67000368} {"current_steps": 34820, "total_steps": 40000, "loss": 0.1084, "lr": 2.0413437462975944e-06, "epoch": 5.680316502161677, "percentage": 87.05, "elapsed_time": "7:59:52", "remaining_time": "1:11:23", "throughput": 2327.32, "total_tokens": 67009280} {"current_steps": 34825, "total_steps": 40000, "loss": 0.0, "lr": 2.0374599759748843e-06, "epoch": 5.681132229382494, "percentage": 87.06, "elapsed_time": "7:59:54", "remaining_time": "1:11:18", "throughput": 2327.48, "total_tokens": 67018960} {"current_steps": 34830, "total_steps": 40000, "loss": 0.0, "lr": 2.033579746764419e-06, "epoch": 5.681947956603312, "percentage": 87.08, "elapsed_time": "7:59:56", "remaining_time": "1:11:14", "throughput": 2327.59, "total_tokens": 67027024} {"current_steps": 34835, "total_steps": 40000, "loss": 0.0786, "lr": 2.029703059264565e-06, "epoch": 5.682763683824129, "percentage": 87.09, "elapsed_time": "7:59:58", "remaining_time": "1:11:10", "throughput": 2327.78, "total_tokens": 67037456} {"current_steps": 34840, "total_steps": 40000, "loss": 0.0772, "lr": 2.02582991407316e-06, "epoch": 5.6835794110449465, "percentage": 87.1, "elapsed_time": "8:00:01", "remaining_time": "1:11:05", "throughput": 2327.94, "total_tokens": 67047088} {"current_steps": 34845, "total_steps": 40000, "loss": 0.0002, "lr": 2.0219603117874992e-06, "epoch": 5.684395138265764, "percentage": 87.11, "elapsed_time": "8:00:03", "remaining_time": "1:11:01", "throughput": 2328.08, "total_tokens": 67056208} {"current_steps": 34850, "total_steps": 40000, "loss": 0.0917, "lr": 2.0180942530043156e-06, "epoch": 5.685210865486582, "percentage": 87.12, "elapsed_time": "8:00:05", "remaining_time": "1:10:56", "throughput": 2328.23, "total_tokens": 67065344} {"current_steps": 34855, "total_steps": 40000, "loss": 0.0006, "lr": 2.0142317383198107e-06, "epoch": 5.686026592707399, "percentage": 87.14, "elapsed_time": "8:00:07", "remaining_time": "1:10:52", "throughput": 2328.41, "total_tokens": 67075344} {"current_steps": 34860, "total_steps": 40000, "loss": 0.0, "lr": 2.0103727683296243e-06, "epoch": 5.686842319928216, "percentage": 87.15, "elapsed_time": "8:00:09", "remaining_time": "1:10:47", "throughput": 2328.61, "total_tokens": 67085776} {"current_steps": 34865, "total_steps": 40000, "loss": 0.0001, "lr": 2.0065173436288636e-06, "epoch": 5.687658047149033, "percentage": 87.16, "elapsed_time": "8:00:11", "remaining_time": "1:10:43", "throughput": 2328.74, "total_tokens": 67094448} {"current_steps": 34870, "total_steps": 40000, "loss": 0.0815, "lr": 2.002665464812087e-06, "epoch": 5.688473774369851, "percentage": 87.17, "elapsed_time": "8:00:13", "remaining_time": "1:10:38", "throughput": 2328.94, "total_tokens": 67105088} {"current_steps": 34875, "total_steps": 40000, "loss": 0.0001, "lr": 1.998817132473291e-06, "epoch": 5.689289501590668, "percentage": 87.19, "elapsed_time": "8:00:15", "remaining_time": "1:10:34", "throughput": 2329.14, "total_tokens": 67115760} {"current_steps": 34880, "total_steps": 40000, "loss": 0.1438, "lr": 1.9949723472059507e-06, "epoch": 5.690105228811485, "percentage": 87.2, "elapsed_time": "8:00:17", "remaining_time": "1:10:30", "throughput": 2329.31, "total_tokens": 67125280} {"current_steps": 34885, "total_steps": 40000, "loss": 0.0017, "lr": 1.9911311096029726e-06, "epoch": 5.690920956032302, "percentage": 87.21, "elapsed_time": "8:00:19", "remaining_time": "1:10:25", "throughput": 2329.51, "total_tokens": 67136096} {"current_steps": 34890, "total_steps": 40000, "loss": 0.0023, "lr": 1.9872934202567224e-06, "epoch": 5.69173668325312, "percentage": 87.22, "elapsed_time": "8:00:21", "remaining_time": "1:10:21", "throughput": 2329.66, "total_tokens": 67145056} {"current_steps": 34895, "total_steps": 40000, "loss": 0.0412, "lr": 1.9834592797590257e-06, "epoch": 5.692552410473938, "percentage": 87.24, "elapsed_time": "8:00:23", "remaining_time": "1:10:16", "throughput": 2329.82, "total_tokens": 67154656} {"current_steps": 34900, "total_steps": 40000, "loss": 0.0001, "lr": 1.979628688701149e-06, "epoch": 5.693368137694755, "percentage": 87.25, "elapsed_time": "8:00:25", "remaining_time": "1:10:12", "throughput": 2330.0, "total_tokens": 67164656} {"current_steps": 34905, "total_steps": 40000, "loss": 0.0, "lr": 1.9758016476738193e-06, "epoch": 5.694183864915573, "percentage": 87.26, "elapsed_time": "8:00:28", "remaining_time": "1:10:07", "throughput": 2330.24, "total_tokens": 67176400} {"current_steps": 34910, "total_steps": 40000, "loss": 0.1461, "lr": 1.971978157267221e-06, "epoch": 5.69499959213639, "percentage": 87.28, "elapsed_time": "8:00:30", "remaining_time": "1:10:03", "throughput": 2330.45, "total_tokens": 67187376} {"current_steps": 34915, "total_steps": 40000, "loss": 0.0007, "lr": 1.968158218070973e-06, "epoch": 5.695815319357207, "percentage": 87.29, "elapsed_time": "8:00:32", "remaining_time": "1:09:59", "throughput": 2330.62, "total_tokens": 67197072} {"current_steps": 34920, "total_steps": 40000, "loss": 0.0001, "lr": 1.9643418306741682e-06, "epoch": 5.696631046578024, "percentage": 87.3, "elapsed_time": "8:00:34", "remaining_time": "1:09:54", "throughput": 2330.81, "total_tokens": 67207328} {"current_steps": 34925, "total_steps": 40000, "loss": 0.0, "lr": 1.9605289956653337e-06, "epoch": 5.697446773798841, "percentage": 87.31, "elapsed_time": "8:00:36", "remaining_time": "1:09:50", "throughput": 2330.97, "total_tokens": 67216768} {"current_steps": 34930, "total_steps": 40000, "loss": 0.0028, "lr": 1.9567197136324626e-06, "epoch": 5.698262501019659, "percentage": 87.33, "elapsed_time": "8:00:38", "remaining_time": "1:09:45", "throughput": 2331.15, "total_tokens": 67226704} {"current_steps": 34935, "total_steps": 40000, "loss": 0.0, "lr": 1.9529139851629935e-06, "epoch": 5.699078228240476, "percentage": 87.34, "elapsed_time": "8:00:40", "remaining_time": "1:09:41", "throughput": 2331.36, "total_tokens": 67237584} {"current_steps": 34940, "total_steps": 40000, "loss": 0.0001, "lr": 1.949111810843812e-06, "epoch": 5.6998939554612935, "percentage": 87.35, "elapsed_time": "8:00:42", "remaining_time": "1:09:36", "throughput": 2331.53, "total_tokens": 67247264} {"current_steps": 34945, "total_steps": 40000, "loss": 0.0001, "lr": 1.9453131912612694e-06, "epoch": 5.7007096826821115, "percentage": 87.36, "elapsed_time": "8:00:44", "remaining_time": "1:09:32", "throughput": 2331.7, "total_tokens": 67257040} {"current_steps": 34950, "total_steps": 40000, "loss": 0.0, "lr": 1.941518127001149e-06, "epoch": 5.701525409902929, "percentage": 87.38, "elapsed_time": "8:00:46", "remaining_time": "1:09:28", "throughput": 2331.91, "total_tokens": 67267888} {"current_steps": 34955, "total_steps": 40000, "loss": 0.0, "lr": 1.9377266186487107e-06, "epoch": 5.702341137123746, "percentage": 87.39, "elapsed_time": "8:00:48", "remaining_time": "1:09:23", "throughput": 2332.07, "total_tokens": 67277328} {"current_steps": 34960, "total_steps": 40000, "loss": 0.0, "lr": 1.9339386667886483e-06, "epoch": 5.703156864344563, "percentage": 87.4, "elapsed_time": "8:00:50", "remaining_time": "1:09:19", "throughput": 2332.26, "total_tokens": 67287712} {"current_steps": 34965, "total_steps": 40000, "loss": 0.0, "lr": 1.9301542720051024e-06, "epoch": 5.70397259156538, "percentage": 87.41, "elapsed_time": "8:00:52", "remaining_time": "1:09:14", "throughput": 2332.41, "total_tokens": 67296864} {"current_steps": 34970, "total_steps": 40000, "loss": 0.0004, "lr": 1.926373434881684e-06, "epoch": 5.704788318786198, "percentage": 87.42, "elapsed_time": "8:00:55", "remaining_time": "1:09:10", "throughput": 2332.52, "total_tokens": 67305008} {"current_steps": 34975, "total_steps": 40000, "loss": 0.0, "lr": 1.9225961560014468e-06, "epoch": 5.705604046007015, "percentage": 87.44, "elapsed_time": "8:00:57", "remaining_time": "1:09:06", "throughput": 2332.64, "total_tokens": 67313264} {"current_steps": 34980, "total_steps": 40000, "loss": 0.0001, "lr": 1.918822435946885e-06, "epoch": 5.706419773227832, "percentage": 87.45, "elapsed_time": "8:00:59", "remaining_time": "1:09:01", "throughput": 2332.85, "total_tokens": 67324128} {"current_steps": 34985, "total_steps": 40000, "loss": 0.0002, "lr": 1.915052275299961e-06, "epoch": 5.70723550044865, "percentage": 87.46, "elapsed_time": "8:01:01", "remaining_time": "1:08:57", "throughput": 2332.99, "total_tokens": 67333056} {"current_steps": 34990, "total_steps": 40000, "loss": 0.0, "lr": 1.9112856746420854e-06, "epoch": 5.7080512276694675, "percentage": 87.48, "elapsed_time": "8:01:03", "remaining_time": "1:08:52", "throughput": 2333.17, "total_tokens": 67343120} {"current_steps": 34995, "total_steps": 40000, "loss": 0.0002, "lr": 1.907522634554104e-06, "epoch": 5.708866954890285, "percentage": 87.49, "elapsed_time": "8:01:05", "remaining_time": "1:08:48", "throughput": 2333.34, "total_tokens": 67352720} {"current_steps": 35000, "total_steps": 40000, "loss": 0.0001, "lr": 1.9037631556163337e-06, "epoch": 5.709682682111102, "percentage": 87.5, "elapsed_time": "8:01:07", "remaining_time": "1:08:43", "throughput": 2333.47, "total_tokens": 67361344} {"current_steps": 35000, "total_steps": 40000, "eval_loss": 0.4073232114315033, "epoch": 5.709682682111102, "percentage": 87.5, "elapsed_time": "8:02:28", "remaining_time": "1:08:55", "throughput": 2326.95, "total_tokens": 67361344} {"current_steps": 35005, "total_steps": 40000, "loss": 0.0001, "lr": 1.9000072384085272e-06, "epoch": 5.71049840933192, "percentage": 87.51, "elapsed_time": "8:02:32", "remaining_time": "1:08:51", "throughput": 2326.97, "total_tokens": 67370688} {"current_steps": 35010, "total_steps": 40000, "loss": 0.1837, "lr": 1.8962548835098987e-06, "epoch": 5.711314136552737, "percentage": 87.52, "elapsed_time": "8:02:34", "remaining_time": "1:08:46", "throughput": 2327.08, "total_tokens": 67378512} {"current_steps": 35015, "total_steps": 40000, "loss": 0.0001, "lr": 1.8925060914991077e-06, "epoch": 5.712129863773554, "percentage": 87.54, "elapsed_time": "8:02:36", "remaining_time": "1:08:42", "throughput": 2327.22, "total_tokens": 67387536} {"current_steps": 35020, "total_steps": 40000, "loss": 0.0, "lr": 1.888760862954264e-06, "epoch": 5.712945590994371, "percentage": 87.55, "elapsed_time": "8:02:38", "remaining_time": "1:08:38", "throughput": 2327.37, "total_tokens": 67396752} {"current_steps": 35025, "total_steps": 40000, "loss": 0.0019, "lr": 1.8850191984529309e-06, "epoch": 5.713761318215189, "percentage": 87.56, "elapsed_time": "8:02:40", "remaining_time": "1:08:33", "throughput": 2327.5, "total_tokens": 67405632} {"current_steps": 35030, "total_steps": 40000, "loss": 0.0005, "lr": 1.8812810985721186e-06, "epoch": 5.714577045436006, "percentage": 87.58, "elapsed_time": "8:02:42", "remaining_time": "1:08:29", "throughput": 2327.7, "total_tokens": 67416496} {"current_steps": 35035, "total_steps": 40000, "loss": 0.0001, "lr": 1.8775465638882856e-06, "epoch": 5.715392772656823, "percentage": 87.59, "elapsed_time": "8:02:44", "remaining_time": "1:08:24", "throughput": 2327.85, "total_tokens": 67425696} {"current_steps": 35040, "total_steps": 40000, "loss": 0.0, "lr": 1.8738155949773517e-06, "epoch": 5.716208499877641, "percentage": 87.6, "elapsed_time": "8:02:46", "remaining_time": "1:08:20", "throughput": 2327.99, "total_tokens": 67434704} {"current_steps": 35045, "total_steps": 40000, "loss": 0.0005, "lr": 1.8700881924146707e-06, "epoch": 5.717024227098459, "percentage": 87.61, "elapsed_time": "8:02:49", "remaining_time": "1:08:15", "throughput": 2328.1, "total_tokens": 67443088} {"current_steps": 35050, "total_steps": 40000, "loss": 0.01, "lr": 1.8663643567750577e-06, "epoch": 5.717839954319276, "percentage": 87.62, "elapsed_time": "8:02:51", "remaining_time": "1:08:11", "throughput": 2328.31, "total_tokens": 67453984} {"current_steps": 35055, "total_steps": 40000, "loss": 0.0, "lr": 1.8626440886327813e-06, "epoch": 5.718655681540093, "percentage": 87.64, "elapsed_time": "8:02:53", "remaining_time": "1:08:07", "throughput": 2328.47, "total_tokens": 67463584} {"current_steps": 35060, "total_steps": 40000, "loss": 0.0005, "lr": 1.8589273885615432e-06, "epoch": 5.71947140876091, "percentage": 87.65, "elapsed_time": "8:02:55", "remaining_time": "1:08:02", "throughput": 2328.67, "total_tokens": 67474416} {"current_steps": 35065, "total_steps": 40000, "loss": 0.0001, "lr": 1.8552142571345133e-06, "epoch": 5.720287135981728, "percentage": 87.66, "elapsed_time": "8:02:57", "remaining_time": "1:07:58", "throughput": 2328.82, "total_tokens": 67483920} {"current_steps": 35070, "total_steps": 40000, "loss": 0.0, "lr": 1.8515046949243025e-06, "epoch": 5.721102863202545, "percentage": 87.67, "elapsed_time": "8:02:59", "remaining_time": "1:07:53", "throughput": 2328.99, "total_tokens": 67493824} {"current_steps": 35075, "total_steps": 40000, "loss": 0.0001, "lr": 1.8477987025029674e-06, "epoch": 5.721918590423362, "percentage": 87.69, "elapsed_time": "8:03:01", "remaining_time": "1:07:49", "throughput": 2329.18, "total_tokens": 67504336} {"current_steps": 35080, "total_steps": 40000, "loss": 0.0005, "lr": 1.8440962804420232e-06, "epoch": 5.72273431764418, "percentage": 87.7, "elapsed_time": "8:03:04", "remaining_time": "1:07:45", "throughput": 2329.34, "total_tokens": 67513920} {"current_steps": 35085, "total_steps": 40000, "loss": 0.0001, "lr": 1.8403974293124265e-06, "epoch": 5.723550044864997, "percentage": 87.71, "elapsed_time": "8:03:06", "remaining_time": "1:07:40", "throughput": 2329.47, "total_tokens": 67522608} {"current_steps": 35090, "total_steps": 40000, "loss": 0.0059, "lr": 1.8367021496845854e-06, "epoch": 5.7243657720858145, "percentage": 87.72, "elapsed_time": "8:03:08", "remaining_time": "1:07:36", "throughput": 2329.66, "total_tokens": 67533040} {"current_steps": 35095, "total_steps": 40000, "loss": 0.0778, "lr": 1.8330104421283662e-06, "epoch": 5.725181499306632, "percentage": 87.74, "elapsed_time": "8:03:10", "remaining_time": "1:07:31", "throughput": 2329.81, "total_tokens": 67542624} {"current_steps": 35100, "total_steps": 40000, "loss": 0.0, "lr": 1.8293223072130717e-06, "epoch": 5.725997226527449, "percentage": 87.75, "elapsed_time": "8:03:12", "remaining_time": "1:07:27", "throughput": 2329.99, "total_tokens": 67552720} {"current_steps": 35105, "total_steps": 40000, "loss": 0.0004, "lr": 1.8256377455074525e-06, "epoch": 5.726812953748267, "percentage": 87.76, "elapsed_time": "8:03:14", "remaining_time": "1:07:23", "throughput": 2330.16, "total_tokens": 67562576} {"current_steps": 35110, "total_steps": 40000, "loss": 0.0003, "lr": 1.8219567575797263e-06, "epoch": 5.727628680969084, "percentage": 87.78, "elapsed_time": "8:03:17", "remaining_time": "1:07:18", "throughput": 2330.33, "total_tokens": 67572512} {"current_steps": 35115, "total_steps": 40000, "loss": 0.0, "lr": 1.8182793439975365e-06, "epoch": 5.728444408189901, "percentage": 87.79, "elapsed_time": "8:03:19", "remaining_time": "1:07:14", "throughput": 2330.49, "total_tokens": 67582240} {"current_steps": 35120, "total_steps": 40000, "loss": 0.0059, "lr": 1.8146055053279958e-06, "epoch": 5.729260135410719, "percentage": 87.8, "elapsed_time": "8:03:21", "remaining_time": "1:07:09", "throughput": 2330.66, "total_tokens": 67592096} {"current_steps": 35125, "total_steps": 40000, "loss": 0.069, "lr": 1.8109352421376486e-06, "epoch": 5.730075862631536, "percentage": 87.81, "elapsed_time": "8:03:23", "remaining_time": "1:07:05", "throughput": 2330.78, "total_tokens": 67600672} {"current_steps": 35130, "total_steps": 40000, "loss": 0.0006, "lr": 1.8072685549924972e-06, "epoch": 5.730891589852353, "percentage": 87.83, "elapsed_time": "8:03:25", "remaining_time": "1:07:00", "throughput": 2330.96, "total_tokens": 67610816} {"current_steps": 35135, "total_steps": 40000, "loss": 0.0, "lr": 1.8036054444579982e-06, "epoch": 5.7317073170731705, "percentage": 87.84, "elapsed_time": "8:03:27", "remaining_time": "1:06:56", "throughput": 2331.1, "total_tokens": 67619984} {"current_steps": 35140, "total_steps": 40000, "loss": 0.0001, "lr": 1.7999459110990407e-06, "epoch": 5.732523044293988, "percentage": 87.85, "elapsed_time": "8:03:29", "remaining_time": "1:06:52", "throughput": 2331.31, "total_tokens": 67631120} {"current_steps": 35145, "total_steps": 40000, "loss": 0.0001, "lr": 1.7962899554799712e-06, "epoch": 5.733338771514806, "percentage": 87.86, "elapsed_time": "8:03:32", "remaining_time": "1:06:47", "throughput": 2331.49, "total_tokens": 67641232} {"current_steps": 35150, "total_steps": 40000, "loss": 0.0001, "lr": 1.7926375781645937e-06, "epoch": 5.734154498735623, "percentage": 87.88, "elapsed_time": "8:03:34", "remaining_time": "1:06:43", "throughput": 2331.67, "total_tokens": 67651568} {"current_steps": 35155, "total_steps": 40000, "loss": 0.0, "lr": 1.7889887797161359e-06, "epoch": 5.73497022595644, "percentage": 87.89, "elapsed_time": "8:03:36", "remaining_time": "1:06:38", "throughput": 2331.88, "total_tokens": 67662560} {"current_steps": 35160, "total_steps": 40000, "loss": 0.0602, "lr": 1.7853435606973028e-06, "epoch": 5.735785953177258, "percentage": 87.9, "elapsed_time": "8:03:38", "remaining_time": "1:06:34", "throughput": 2332.05, "total_tokens": 67672400} {"current_steps": 35165, "total_steps": 40000, "loss": 0.0346, "lr": 1.781701921670223e-06, "epoch": 5.736601680398075, "percentage": 87.91, "elapsed_time": "8:03:40", "remaining_time": "1:06:30", "throughput": 2332.21, "total_tokens": 67682272} {"current_steps": 35170, "total_steps": 40000, "loss": 0.0, "lr": 1.7780638631964886e-06, "epoch": 5.737417407618892, "percentage": 87.92, "elapsed_time": "8:03:42", "remaining_time": "1:06:25", "throughput": 2332.38, "total_tokens": 67692128} {"current_steps": 35175, "total_steps": 40000, "loss": 0.0001, "lr": 1.7744293858371314e-06, "epoch": 5.738233134839709, "percentage": 87.94, "elapsed_time": "8:03:44", "remaining_time": "1:06:21", "throughput": 2332.51, "total_tokens": 67700880} {"current_steps": 35180, "total_steps": 40000, "loss": 0.0176, "lr": 1.770798490152631e-06, "epoch": 5.739048862060527, "percentage": 87.95, "elapsed_time": "8:03:47", "remaining_time": "1:06:16", "throughput": 2332.67, "total_tokens": 67710432} {"current_steps": 35185, "total_steps": 40000, "loss": 0.0003, "lr": 1.767171176702917e-06, "epoch": 5.7398645892813445, "percentage": 87.96, "elapsed_time": "8:03:49", "remaining_time": "1:06:12", "throughput": 2332.81, "total_tokens": 67719680} {"current_steps": 35190, "total_steps": 40000, "loss": 0.0001, "lr": 1.7635474460473755e-06, "epoch": 5.740680316502162, "percentage": 87.98, "elapsed_time": "8:03:51", "remaining_time": "1:06:08", "throughput": 2332.94, "total_tokens": 67728400} {"current_steps": 35195, "total_steps": 40000, "loss": 0.0001, "lr": 1.7599272987448206e-06, "epoch": 5.741496043722979, "percentage": 87.99, "elapsed_time": "8:03:53", "remaining_time": "1:06:03", "throughput": 2333.07, "total_tokens": 67736960} {"current_steps": 35200, "total_steps": 40000, "loss": 0.0024, "lr": 1.7563107353535362e-06, "epoch": 5.742311770943797, "percentage": 88.0, "elapsed_time": "8:03:55", "remaining_time": "1:05:59", "throughput": 2333.21, "total_tokens": 67746288} {"current_steps": 35200, "total_steps": 40000, "eval_loss": 0.4150191843509674, "epoch": 5.742311770943797, "percentage": 88.0, "elapsed_time": "8:05:16", "remaining_time": "1:06:10", "throughput": 2326.73, "total_tokens": 67746288} {"current_steps": 35205, "total_steps": 40000, "loss": 0.0, "lr": 1.7526977564312263e-06, "epoch": 5.743127498164614, "percentage": 88.01, "elapsed_time": "8:05:21", "remaining_time": "1:06:06", "throughput": 2326.71, "total_tokens": 67756384} {"current_steps": 35210, "total_steps": 40000, "loss": 0.0771, "lr": 1.7490883625350701e-06, "epoch": 5.743943225385431, "percentage": 88.02, "elapsed_time": "8:05:23", "remaining_time": "1:06:01", "throughput": 2326.92, "total_tokens": 67767360} {"current_steps": 35215, "total_steps": 40000, "loss": 0.0001, "lr": 1.7454825542216807e-06, "epoch": 5.744758952606248, "percentage": 88.04, "elapsed_time": "8:05:25", "remaining_time": "1:05:57", "throughput": 2327.09, "total_tokens": 67777456} {"current_steps": 35220, "total_steps": 40000, "loss": 0.0001, "lr": 1.7418803320471105e-06, "epoch": 5.745574679827066, "percentage": 88.05, "elapsed_time": "8:05:27", "remaining_time": "1:05:53", "throughput": 2327.27, "total_tokens": 67787472} {"current_steps": 35225, "total_steps": 40000, "loss": 0.0028, "lr": 1.7382816965668737e-06, "epoch": 5.746390407047883, "percentage": 88.06, "elapsed_time": "8:05:29", "remaining_time": "1:05:48", "throughput": 2327.45, "total_tokens": 67797360} {"current_steps": 35230, "total_steps": 40000, "loss": 0.0981, "lr": 1.7346866483359285e-06, "epoch": 5.7472061342687, "percentage": 88.08, "elapsed_time": "8:05:31", "remaining_time": "1:05:44", "throughput": 2327.62, "total_tokens": 67807184} {"current_steps": 35235, "total_steps": 40000, "loss": 0.0007, "lr": 1.7310951879086657e-06, "epoch": 5.7480218614895175, "percentage": 88.09, "elapsed_time": "8:05:33", "remaining_time": "1:05:39", "throughput": 2327.82, "total_tokens": 67817840} {"current_steps": 35240, "total_steps": 40000, "loss": 0.0, "lr": 1.7275073158389471e-06, "epoch": 5.748837588710336, "percentage": 88.1, "elapsed_time": "8:05:35", "remaining_time": "1:05:35", "throughput": 2327.97, "total_tokens": 67827136} {"current_steps": 35245, "total_steps": 40000, "loss": 0.0001, "lr": 1.723923032680061e-06, "epoch": 5.749653315931153, "percentage": 88.11, "elapsed_time": "8:05:37", "remaining_time": "1:05:31", "throughput": 2328.15, "total_tokens": 67837280} {"current_steps": 35250, "total_steps": 40000, "loss": 0.175, "lr": 1.7203423389847428e-06, "epoch": 5.75046904315197, "percentage": 88.12, "elapsed_time": "8:05:39", "remaining_time": "1:05:26", "throughput": 2328.28, "total_tokens": 67845888} {"current_steps": 35255, "total_steps": 40000, "loss": 0.0002, "lr": 1.7167652353051928e-06, "epoch": 5.751284770372787, "percentage": 88.14, "elapsed_time": "8:05:41", "remaining_time": "1:05:22", "throughput": 2328.49, "total_tokens": 67856640} {"current_steps": 35260, "total_steps": 40000, "loss": 0.0918, "lr": 1.7131917221930333e-06, "epoch": 5.752100497593605, "percentage": 88.15, "elapsed_time": "8:05:44", "remaining_time": "1:05:17", "throughput": 2328.64, "total_tokens": 67865872} {"current_steps": 35265, "total_steps": 40000, "loss": 0.0001, "lr": 1.7096218001993513e-06, "epoch": 5.752916224814422, "percentage": 88.16, "elapsed_time": "8:05:46", "remaining_time": "1:05:13", "throughput": 2328.79, "total_tokens": 67875104} {"current_steps": 35270, "total_steps": 40000, "loss": 0.0002, "lr": 1.706055469874676e-06, "epoch": 5.753731952035239, "percentage": 88.17, "elapsed_time": "8:05:48", "remaining_time": "1:05:09", "throughput": 2328.91, "total_tokens": 67883520} {"current_steps": 35275, "total_steps": 40000, "loss": 0.0, "lr": 1.702492731768976e-06, "epoch": 5.754547679256056, "percentage": 88.19, "elapsed_time": "8:05:50", "remaining_time": "1:05:04", "throughput": 2329.1, "total_tokens": 67893632} {"current_steps": 35280, "total_steps": 40000, "loss": 0.0003, "lr": 1.6989335864316724e-06, "epoch": 5.755363406476874, "percentage": 88.2, "elapsed_time": "8:05:52", "remaining_time": "1:05:00", "throughput": 2329.25, "total_tokens": 67902912} {"current_steps": 35285, "total_steps": 40000, "loss": 0.0, "lr": 1.6953780344116265e-06, "epoch": 5.7561791336976915, "percentage": 88.21, "elapsed_time": "8:05:54", "remaining_time": "1:04:55", "throughput": 2329.43, "total_tokens": 67912960} {"current_steps": 35290, "total_steps": 40000, "loss": 0.045, "lr": 1.6918260762571497e-06, "epoch": 5.756994860918509, "percentage": 88.22, "elapsed_time": "8:05:56", "remaining_time": "1:04:51", "throughput": 2329.62, "total_tokens": 67923456} {"current_steps": 35295, "total_steps": 40000, "loss": 0.0, "lr": 1.6882777125160093e-06, "epoch": 5.757810588139327, "percentage": 88.24, "elapsed_time": "8:05:58", "remaining_time": "1:04:46", "throughput": 2329.77, "total_tokens": 67932544} {"current_steps": 35300, "total_steps": 40000, "loss": 0.0571, "lr": 1.6847329437353899e-06, "epoch": 5.758626315360144, "percentage": 88.25, "elapsed_time": "8:06:00", "remaining_time": "1:04:42", "throughput": 2329.92, "total_tokens": 67941888} {"current_steps": 35305, "total_steps": 40000, "loss": 0.0672, "lr": 1.6811917704619511e-06, "epoch": 5.759442042580961, "percentage": 88.26, "elapsed_time": "8:06:02", "remaining_time": "1:04:38", "throughput": 2330.07, "total_tokens": 67950944} {"current_steps": 35310, "total_steps": 40000, "loss": 0.0004, "lr": 1.67765419324179e-06, "epoch": 5.760257769801778, "percentage": 88.28, "elapsed_time": "8:06:04", "remaining_time": "1:04:33", "throughput": 2330.18, "total_tokens": 67959216} {"current_steps": 35315, "total_steps": 40000, "loss": 0.002, "lr": 1.6741202126204364e-06, "epoch": 5.761073497022595, "percentage": 88.29, "elapsed_time": "8:06:06", "remaining_time": "1:04:29", "throughput": 2330.35, "total_tokens": 67968912} {"current_steps": 35320, "total_steps": 40000, "loss": 0.0508, "lr": 1.6705898291428767e-06, "epoch": 5.761889224243413, "percentage": 88.3, "elapsed_time": "8:06:08", "remaining_time": "1:04:24", "throughput": 2330.51, "total_tokens": 67978224} {"current_steps": 35325, "total_steps": 40000, "loss": 0.001, "lr": 1.6670630433535395e-06, "epoch": 5.76270495146423, "percentage": 88.31, "elapsed_time": "8:06:10", "remaining_time": "1:04:20", "throughput": 2330.69, "total_tokens": 67988352} {"current_steps": 35330, "total_steps": 40000, "loss": 0.0, "lr": 1.6635398557962979e-06, "epoch": 5.7635206786850475, "percentage": 88.33, "elapsed_time": "8:06:13", "remaining_time": "1:04:16", "throughput": 2330.8, "total_tokens": 67996544} {"current_steps": 35335, "total_steps": 40000, "loss": 0.0, "lr": 1.660020267014481e-06, "epoch": 5.7643364059058655, "percentage": 88.34, "elapsed_time": "8:06:15", "remaining_time": "1:04:11", "throughput": 2330.97, "total_tokens": 68006208} {"current_steps": 35340, "total_steps": 40000, "loss": 0.0015, "lr": 1.6565042775508438e-06, "epoch": 5.765152133126683, "percentage": 88.35, "elapsed_time": "8:06:17", "remaining_time": "1:04:07", "throughput": 2331.12, "total_tokens": 68015392} {"current_steps": 35345, "total_steps": 40000, "loss": 0.0192, "lr": 1.6529918879475997e-06, "epoch": 5.7659678603475, "percentage": 88.36, "elapsed_time": "8:06:19", "remaining_time": "1:04:02", "throughput": 2331.3, "total_tokens": 68025632} {"current_steps": 35350, "total_steps": 40000, "loss": 0.0, "lr": 1.6494830987464043e-06, "epoch": 5.766783587568317, "percentage": 88.38, "elapsed_time": "8:06:21", "remaining_time": "1:03:58", "throughput": 2331.46, "total_tokens": 68035104} {"current_steps": 35355, "total_steps": 40000, "loss": 0.1272, "lr": 1.6459779104883555e-06, "epoch": 5.767599314789135, "percentage": 88.39, "elapsed_time": "8:06:23", "remaining_time": "1:03:54", "throughput": 2331.59, "total_tokens": 68043504} {"current_steps": 35360, "total_steps": 40000, "loss": 0.0025, "lr": 1.6424763237140013e-06, "epoch": 5.768415042009952, "percentage": 88.4, "elapsed_time": "8:06:25", "remaining_time": "1:03:49", "throughput": 2331.72, "total_tokens": 68052288} {"current_steps": 35365, "total_steps": 40000, "loss": 0.0002, "lr": 1.6389783389633207e-06, "epoch": 5.769230769230769, "percentage": 88.41, "elapsed_time": "8:06:27", "remaining_time": "1:03:45", "throughput": 2331.91, "total_tokens": 68062592} {"current_steps": 35370, "total_steps": 40000, "loss": 0.0153, "lr": 1.6354839567757546e-06, "epoch": 5.770046496451586, "percentage": 88.42, "elapsed_time": "8:06:29", "remaining_time": "1:03:40", "throughput": 2332.08, "total_tokens": 68072432} {"current_steps": 35375, "total_steps": 40000, "loss": 0.0204, "lr": 1.6319931776901831e-06, "epoch": 5.770862223672404, "percentage": 88.44, "elapsed_time": "8:06:31", "remaining_time": "1:03:36", "throughput": 2332.27, "total_tokens": 68082816} {"current_steps": 35380, "total_steps": 40000, "loss": 0.0008, "lr": 1.6285060022449229e-06, "epoch": 5.771677950893221, "percentage": 88.45, "elapsed_time": "8:06:33", "remaining_time": "1:03:32", "throughput": 2332.43, "total_tokens": 68092512} {"current_steps": 35385, "total_steps": 40000, "loss": 0.0508, "lr": 1.6250224309777434e-06, "epoch": 5.772493678114039, "percentage": 88.46, "elapsed_time": "8:06:35", "remaining_time": "1:03:27", "throughput": 2332.65, "total_tokens": 68103472} {"current_steps": 35390, "total_steps": 40000, "loss": 0.0037, "lr": 1.6215424644258515e-06, "epoch": 5.773309405334856, "percentage": 88.48, "elapsed_time": "8:06:37", "remaining_time": "1:03:23", "throughput": 2332.76, "total_tokens": 68111536} {"current_steps": 35395, "total_steps": 40000, "loss": 0.0002, "lr": 1.6180661031259036e-06, "epoch": 5.774125132555674, "percentage": 88.49, "elapsed_time": "8:06:39", "remaining_time": "1:03:19", "throughput": 2332.93, "total_tokens": 68121568} {"current_steps": 35400, "total_steps": 40000, "loss": 0.0138, "lr": 1.614593347613999e-06, "epoch": 5.774940859776491, "percentage": 88.5, "elapsed_time": "8:06:42", "remaining_time": "1:03:14", "throughput": 2333.12, "total_tokens": 68131952} {"current_steps": 35400, "total_steps": 40000, "eval_loss": 0.4070012867450714, "epoch": 5.774940859776491, "percentage": 88.5, "elapsed_time": "8:08:02", "remaining_time": "1:03:25", "throughput": 2326.69, "total_tokens": 68131952} {"current_steps": 35405, "total_steps": 40000, "loss": 0.1366, "lr": 1.6111241984256758e-06, "epoch": 5.775756586997308, "percentage": 88.51, "elapsed_time": "8:08:06", "remaining_time": "1:03:20", "throughput": 2326.72, "total_tokens": 68141456} {"current_steps": 35410, "total_steps": 40000, "loss": 0.0003, "lr": 1.6076586560959257e-06, "epoch": 5.776572314218125, "percentage": 88.52, "elapsed_time": "8:08:08", "remaining_time": "1:03:16", "throughput": 2326.89, "total_tokens": 68151312} {"current_steps": 35415, "total_steps": 40000, "loss": 0.0005, "lr": 1.604196721159182e-06, "epoch": 5.777388041438943, "percentage": 88.54, "elapsed_time": "8:08:10", "remaining_time": "1:03:12", "throughput": 2327.03, "total_tokens": 68160224} {"current_steps": 35420, "total_steps": 40000, "loss": 0.0002, "lr": 1.6007383941493092e-06, "epoch": 5.77820376865976, "percentage": 88.55, "elapsed_time": "8:08:12", "remaining_time": "1:03:07", "throughput": 2327.2, "total_tokens": 68170112} {"current_steps": 35425, "total_steps": 40000, "loss": 0.0795, "lr": 1.5972836755996285e-06, "epoch": 5.779019495880577, "percentage": 88.56, "elapsed_time": "8:08:14", "remaining_time": "1:03:03", "throughput": 2327.32, "total_tokens": 68178400} {"current_steps": 35430, "total_steps": 40000, "loss": 0.0002, "lr": 1.5938325660429076e-06, "epoch": 5.7798352231013945, "percentage": 88.58, "elapsed_time": "8:08:16", "remaining_time": "1:02:58", "throughput": 2327.48, "total_tokens": 68188208} {"current_steps": 35435, "total_steps": 40000, "loss": 0.0054, "lr": 1.5903850660113378e-06, "epoch": 5.7806509503222125, "percentage": 88.59, "elapsed_time": "8:08:19", "remaining_time": "1:02:54", "throughput": 2327.64, "total_tokens": 68197728} {"current_steps": 35440, "total_steps": 40000, "loss": 0.0, "lr": 1.5869411760365826e-06, "epoch": 5.78146667754303, "percentage": 88.6, "elapsed_time": "8:08:21", "remaining_time": "1:02:50", "throughput": 2327.79, "total_tokens": 68206768} {"current_steps": 35445, "total_steps": 40000, "loss": 0.0, "lr": 1.58350089664972e-06, "epoch": 5.782282404763847, "percentage": 88.61, "elapsed_time": "8:08:23", "remaining_time": "1:02:45", "throughput": 2327.95, "total_tokens": 68216432} {"current_steps": 35450, "total_steps": 40000, "loss": 0.0001, "lr": 1.5800642283812865e-06, "epoch": 5.783098131984664, "percentage": 88.62, "elapsed_time": "8:08:25", "remaining_time": "1:02:41", "throughput": 2328.13, "total_tokens": 68226512} {"current_steps": 35455, "total_steps": 40000, "loss": 0.0, "lr": 1.5766311717612698e-06, "epoch": 5.783913859205482, "percentage": 88.64, "elapsed_time": "8:08:27", "remaining_time": "1:02:36", "throughput": 2328.31, "total_tokens": 68236448} {"current_steps": 35460, "total_steps": 40000, "loss": 0.0001, "lr": 1.5732017273190818e-06, "epoch": 5.784729586426299, "percentage": 88.65, "elapsed_time": "8:08:29", "remaining_time": "1:02:32", "throughput": 2328.49, "total_tokens": 68246624} {"current_steps": 35465, "total_steps": 40000, "loss": 0.0005, "lr": 1.5697758955835806e-06, "epoch": 5.785545313647116, "percentage": 88.66, "elapsed_time": "8:08:31", "remaining_time": "1:02:28", "throughput": 2328.67, "total_tokens": 68256688} {"current_steps": 35470, "total_steps": 40000, "loss": 0.0005, "lr": 1.566353677083085e-06, "epoch": 5.786361040867934, "percentage": 88.67, "elapsed_time": "8:08:33", "remaining_time": "1:02:23", "throughput": 2328.82, "total_tokens": 68266160} {"current_steps": 35475, "total_steps": 40000, "loss": 0.0001, "lr": 1.562935072345334e-06, "epoch": 5.787176768088751, "percentage": 88.69, "elapsed_time": "8:08:35", "remaining_time": "1:02:19", "throughput": 2328.97, "total_tokens": 68275552} {"current_steps": 35480, "total_steps": 40000, "loss": 0.0009, "lr": 1.5595200818975281e-06, "epoch": 5.7879924953095685, "percentage": 88.7, "elapsed_time": "8:08:37", "remaining_time": "1:02:14", "throughput": 2329.11, "total_tokens": 68284576} {"current_steps": 35485, "total_steps": 40000, "loss": 0.0096, "lr": 1.5561087062662905e-06, "epoch": 5.788808222530386, "percentage": 88.71, "elapsed_time": "8:08:40", "remaining_time": "1:02:10", "throughput": 2329.25, "total_tokens": 68293680} {"current_steps": 35490, "total_steps": 40000, "loss": 0.0609, "lr": 1.5527009459777087e-06, "epoch": 5.789623949751203, "percentage": 88.72, "elapsed_time": "8:08:42", "remaining_time": "1:02:06", "throughput": 2329.4, "total_tokens": 68303056} {"current_steps": 35495, "total_steps": 40000, "loss": 0.0017, "lr": 1.5492968015572984e-06, "epoch": 5.790439676972021, "percentage": 88.74, "elapsed_time": "8:08:44", "remaining_time": "1:02:01", "throughput": 2329.61, "total_tokens": 68314208} {"current_steps": 35500, "total_steps": 40000, "loss": 0.0003, "lr": 1.5458962735300203e-06, "epoch": 5.791255404192838, "percentage": 88.75, "elapsed_time": "8:08:46", "remaining_time": "1:01:57", "throughput": 2329.81, "total_tokens": 68325040} {"current_steps": 35505, "total_steps": 40000, "loss": 0.0, "lr": 1.54249936242028e-06, "epoch": 5.792071131413655, "percentage": 88.76, "elapsed_time": "8:08:48", "remaining_time": "1:01:53", "throughput": 2329.96, "total_tokens": 68334320} {"current_steps": 35510, "total_steps": 40000, "loss": 0.0046, "lr": 1.5391060687519222e-06, "epoch": 5.792886858634473, "percentage": 88.78, "elapsed_time": "8:08:50", "remaining_time": "1:01:48", "throughput": 2330.12, "total_tokens": 68344112} {"current_steps": 35515, "total_steps": 40000, "loss": 0.0001, "lr": 1.5357163930482367e-06, "epoch": 5.79370258585529, "percentage": 88.79, "elapsed_time": "8:08:52", "remaining_time": "1:01:44", "throughput": 2330.29, "total_tokens": 68354176} {"current_steps": 35520, "total_steps": 40000, "loss": 0.0002, "lr": 1.532330335831955e-06, "epoch": 5.794518313076107, "percentage": 88.8, "elapsed_time": "8:08:55", "remaining_time": "1:01:39", "throughput": 2330.46, "total_tokens": 68364016} {"current_steps": 35525, "total_steps": 40000, "loss": 0.0007, "lr": 1.5289478976252491e-06, "epoch": 5.7953340402969244, "percentage": 88.81, "elapsed_time": "8:08:57", "remaining_time": "1:01:35", "throughput": 2330.58, "total_tokens": 68372544} {"current_steps": 35530, "total_steps": 40000, "loss": 0.1002, "lr": 1.5255690789497345e-06, "epoch": 5.796149767517742, "percentage": 88.83, "elapsed_time": "8:08:59", "remaining_time": "1:01:31", "throughput": 2330.73, "total_tokens": 68381936} {"current_steps": 35535, "total_steps": 40000, "loss": 0.0001, "lr": 1.5221938803264641e-06, "epoch": 5.79696549473856, "percentage": 88.84, "elapsed_time": "8:09:01", "remaining_time": "1:01:26", "throughput": 2330.91, "total_tokens": 68392272} {"current_steps": 35540, "total_steps": 40000, "loss": 0.0115, "lr": 1.518822302275938e-06, "epoch": 5.797781221959377, "percentage": 88.85, "elapsed_time": "8:09:03", "remaining_time": "1:01:22", "throughput": 2331.07, "total_tokens": 68402000} {"current_steps": 35545, "total_steps": 40000, "loss": 0.0011, "lr": 1.5154543453180958e-06, "epoch": 5.798596949180194, "percentage": 88.86, "elapsed_time": "8:09:05", "remaining_time": "1:01:18", "throughput": 2331.2, "total_tokens": 68410896} {"current_steps": 35550, "total_steps": 40000, "loss": 0.0127, "lr": 1.5120900099723167e-06, "epoch": 5.799412676401012, "percentage": 88.88, "elapsed_time": "8:09:07", "remaining_time": "1:01:13", "throughput": 2331.38, "total_tokens": 68420944} {"current_steps": 35555, "total_steps": 40000, "loss": 0.0008, "lr": 1.5087292967574273e-06, "epoch": 5.800228403621829, "percentage": 88.89, "elapsed_time": "8:09:10", "remaining_time": "1:01:09", "throughput": 2331.5, "total_tokens": 68429664} {"current_steps": 35560, "total_steps": 40000, "loss": 0.0006, "lr": 1.5053722061916908e-06, "epoch": 5.801044130842646, "percentage": 88.9, "elapsed_time": "8:09:12", "remaining_time": "1:01:04", "throughput": 2331.64, "total_tokens": 68439200} {"current_steps": 35565, "total_steps": 40000, "loss": 0.0489, "lr": 1.5020187387928124e-06, "epoch": 5.801859858063463, "percentage": 88.91, "elapsed_time": "8:09:14", "remaining_time": "1:01:00", "throughput": 2331.76, "total_tokens": 68447856} {"current_steps": 35570, "total_steps": 40000, "loss": 0.0001, "lr": 1.4986688950779343e-06, "epoch": 5.802675585284281, "percentage": 88.92, "elapsed_time": "8:09:16", "remaining_time": "1:00:56", "throughput": 2331.9, "total_tokens": 68456720} {"current_steps": 35575, "total_steps": 40000, "loss": 0.0072, "lr": 1.495322675563654e-06, "epoch": 5.803491312505098, "percentage": 88.94, "elapsed_time": "8:09:18", "remaining_time": "1:00:51", "throughput": 2332.07, "total_tokens": 68466800} {"current_steps": 35580, "total_steps": 40000, "loss": 0.061, "lr": 1.4919800807659922e-06, "epoch": 5.8043070397259156, "percentage": 88.95, "elapsed_time": "8:09:20", "remaining_time": "1:00:47", "throughput": 2332.23, "total_tokens": 68476496} {"current_steps": 35585, "total_steps": 40000, "loss": 0.0001, "lr": 1.4886411112004255e-06, "epoch": 5.805122766946733, "percentage": 88.96, "elapsed_time": "8:09:23", "remaining_time": "1:00:43", "throughput": 2332.4, "total_tokens": 68486432} {"current_steps": 35590, "total_steps": 40000, "loss": 0.0001, "lr": 1.4853057673818588e-06, "epoch": 5.805938494167551, "percentage": 88.98, "elapsed_time": "8:09:25", "remaining_time": "1:00:38", "throughput": 2332.54, "total_tokens": 68495568} {"current_steps": 35595, "total_steps": 40000, "loss": 0.1403, "lr": 1.481974049824647e-06, "epoch": 5.806754221388368, "percentage": 88.99, "elapsed_time": "8:09:27", "remaining_time": "1:00:34", "throughput": 2332.67, "total_tokens": 68504480} {"current_steps": 35600, "total_steps": 40000, "loss": 0.0, "lr": 1.4786459590425849e-06, "epoch": 5.807569948609185, "percentage": 89.0, "elapsed_time": "8:09:29", "remaining_time": "1:00:29", "throughput": 2332.85, "total_tokens": 68514656} {"current_steps": 35600, "total_steps": 40000, "eval_loss": 0.4128159284591675, "epoch": 5.807569948609185, "percentage": 89.0, "elapsed_time": "8:10:50", "remaining_time": "1:00:39", "throughput": 2326.45, "total_tokens": 68514656} {"current_steps": 35605, "total_steps": 40000, "loss": 0.0001, "lr": 1.4753214955489036e-06, "epoch": 5.808385675830002, "percentage": 89.01, "elapsed_time": "8:10:54", "remaining_time": "1:00:35", "throughput": 2326.47, "total_tokens": 68524416} {"current_steps": 35610, "total_steps": 40000, "loss": 0.0836, "lr": 1.4720006598562737e-06, "epoch": 5.80920140305082, "percentage": 89.03, "elapsed_time": "8:10:56", "remaining_time": "1:00:31", "throughput": 2326.66, "total_tokens": 68534848} {"current_steps": 35615, "total_steps": 40000, "loss": 0.0488, "lr": 1.4686834524768185e-06, "epoch": 5.810017130271637, "percentage": 89.04, "elapsed_time": "8:10:58", "remaining_time": "1:00:26", "throughput": 2326.85, "total_tokens": 68545344} {"current_steps": 35620, "total_steps": 40000, "loss": 0.0, "lr": 1.4653698739220844e-06, "epoch": 5.810832857492454, "percentage": 89.05, "elapsed_time": "8:11:00", "remaining_time": "1:00:22", "throughput": 2326.97, "total_tokens": 68553952} {"current_steps": 35625, "total_steps": 40000, "loss": 0.0001, "lr": 1.4620599247030715e-06, "epoch": 5.8116485847132715, "percentage": 89.06, "elapsed_time": "8:11:02", "remaining_time": "1:00:18", "throughput": 2327.15, "total_tokens": 68564416} {"current_steps": 35630, "total_steps": 40000, "loss": 0.0003, "lr": 1.4587536053302125e-06, "epoch": 5.8124643119340895, "percentage": 89.08, "elapsed_time": "8:11:04", "remaining_time": "1:00:13", "throughput": 2327.29, "total_tokens": 68573360} {"current_steps": 35635, "total_steps": 40000, "loss": 0.0003, "lr": 1.4554509163133862e-06, "epoch": 5.813280039154907, "percentage": 89.09, "elapsed_time": "8:11:07", "remaining_time": "1:00:09", "throughput": 2327.41, "total_tokens": 68581856} {"current_steps": 35640, "total_steps": 40000, "loss": 0.0002, "lr": 1.4521518581619098e-06, "epoch": 5.814095766375724, "percentage": 89.1, "elapsed_time": "8:11:09", "remaining_time": "1:00:05", "throughput": 2327.57, "total_tokens": 68591456} {"current_steps": 35645, "total_steps": 40000, "loss": 0.0876, "lr": 1.4488564313845348e-06, "epoch": 5.814911493596542, "percentage": 89.11, "elapsed_time": "8:11:11", "remaining_time": "1:00:00", "throughput": 2327.7, "total_tokens": 68600400} {"current_steps": 35650, "total_steps": 40000, "loss": 0.0002, "lr": 1.4455646364894603e-06, "epoch": 5.815727220817359, "percentage": 89.12, "elapsed_time": "8:11:13", "remaining_time": "0:59:56", "throughput": 2327.84, "total_tokens": 68609504} {"current_steps": 35655, "total_steps": 40000, "loss": 0.0002, "lr": 1.4422764739843247e-06, "epoch": 5.816542948038176, "percentage": 89.14, "elapsed_time": "8:11:15", "remaining_time": "0:59:51", "throughput": 2327.97, "total_tokens": 68618256} {"current_steps": 35660, "total_steps": 40000, "loss": 0.0001, "lr": 1.4389919443762e-06, "epoch": 5.817358675258993, "percentage": 89.15, "elapsed_time": "8:11:17", "remaining_time": "0:59:47", "throughput": 2328.09, "total_tokens": 68626992} {"current_steps": 35665, "total_steps": 40000, "loss": 0.0007, "lr": 1.4357110481716063e-06, "epoch": 5.81817440247981, "percentage": 89.16, "elapsed_time": "8:11:19", "remaining_time": "0:59:43", "throughput": 2328.3, "total_tokens": 68638032} {"current_steps": 35670, "total_steps": 40000, "loss": 0.0728, "lr": 1.4324337858764941e-06, "epoch": 5.818990129700628, "percentage": 89.18, "elapsed_time": "8:11:22", "remaining_time": "0:59:38", "throughput": 2328.43, "total_tokens": 68646784} {"current_steps": 35675, "total_steps": 40000, "loss": 0.0005, "lr": 1.4291601579962622e-06, "epoch": 5.8198058569214455, "percentage": 89.19, "elapsed_time": "8:11:24", "remaining_time": "0:59:34", "throughput": 2328.59, "total_tokens": 68656560} {"current_steps": 35680, "total_steps": 40000, "loss": 0.0004, "lr": 1.42589016503574e-06, "epoch": 5.820621584142263, "percentage": 89.2, "elapsed_time": "8:11:26", "remaining_time": "0:59:30", "throughput": 2328.75, "total_tokens": 68666272} {"current_steps": 35685, "total_steps": 40000, "loss": 0.0691, "lr": 1.4226238074992099e-06, "epoch": 5.821437311363081, "percentage": 89.21, "elapsed_time": "8:11:28", "remaining_time": "0:59:25", "throughput": 2328.95, "total_tokens": 68677104} {"current_steps": 35690, "total_steps": 40000, "loss": 0.0005, "lr": 1.4193610858903778e-06, "epoch": 5.822253038583898, "percentage": 89.22, "elapsed_time": "8:11:30", "remaining_time": "0:59:21", "throughput": 2329.09, "total_tokens": 68686176} {"current_steps": 35695, "total_steps": 40000, "loss": 0.0001, "lr": 1.416102000712402e-06, "epoch": 5.823068765804715, "percentage": 89.24, "elapsed_time": "8:11:32", "remaining_time": "0:59:16", "throughput": 2329.26, "total_tokens": 68696352} {"current_steps": 35700, "total_steps": 40000, "loss": 0.0001, "lr": 1.4128465524678668e-06, "epoch": 5.823884493025532, "percentage": 89.25, "elapsed_time": "8:11:34", "remaining_time": "0:59:12", "throughput": 2329.33, "total_tokens": 68703376} {"current_steps": 35705, "total_steps": 40000, "loss": 0.0001, "lr": 1.4095947416588124e-06, "epoch": 5.824700220246349, "percentage": 89.26, "elapsed_time": "8:11:37", "remaining_time": "0:59:08", "throughput": 2329.5, "total_tokens": 68713296} {"current_steps": 35710, "total_steps": 40000, "loss": 0.0001, "lr": 1.4063465687866983e-06, "epoch": 5.825515947467167, "percentage": 89.28, "elapsed_time": "8:11:39", "remaining_time": "0:59:03", "throughput": 2329.71, "total_tokens": 68724608} {"current_steps": 35715, "total_steps": 40000, "loss": 0.0002, "lr": 1.4031020343524438e-06, "epoch": 5.826331674687984, "percentage": 89.29, "elapsed_time": "8:11:41", "remaining_time": "0:58:59", "throughput": 2329.89, "total_tokens": 68734736} {"current_steps": 35720, "total_steps": 40000, "loss": 0.0, "lr": 1.3998611388563926e-06, "epoch": 5.827147401908801, "percentage": 89.3, "elapsed_time": "8:11:43", "remaining_time": "0:58:55", "throughput": 2330.09, "total_tokens": 68745744} {"current_steps": 35725, "total_steps": 40000, "loss": 0.0002, "lr": 1.3966238827983314e-06, "epoch": 5.8279631291296194, "percentage": 89.31, "elapsed_time": "8:11:45", "remaining_time": "0:58:50", "throughput": 2330.29, "total_tokens": 68756720} {"current_steps": 35730, "total_steps": 40000, "loss": 0.0019, "lr": 1.393390266677483e-06, "epoch": 5.828778856350437, "percentage": 89.33, "elapsed_time": "8:11:47", "remaining_time": "0:58:46", "throughput": 2330.41, "total_tokens": 68765376} {"current_steps": 35735, "total_steps": 40000, "loss": 0.0, "lr": 1.3901602909925204e-06, "epoch": 5.829594583571254, "percentage": 89.34, "elapsed_time": "8:11:49", "remaining_time": "0:58:42", "throughput": 2330.62, "total_tokens": 68776352} {"current_steps": 35740, "total_steps": 40000, "loss": 0.001, "lr": 1.3869339562415373e-06, "epoch": 5.830410310792071, "percentage": 89.35, "elapsed_time": "8:11:52", "remaining_time": "0:58:37", "throughput": 2330.79, "total_tokens": 68786368} {"current_steps": 35745, "total_steps": 40000, "loss": 0.0, "lr": 1.38371126292208e-06, "epoch": 5.831226038012889, "percentage": 89.36, "elapsed_time": "8:11:54", "remaining_time": "0:58:33", "throughput": 2330.97, "total_tokens": 68796800} {"current_steps": 35750, "total_steps": 40000, "loss": 0.0672, "lr": 1.3804922115311286e-06, "epoch": 5.832041765233706, "percentage": 89.38, "elapsed_time": "8:11:56", "remaining_time": "0:58:28", "throughput": 2331.12, "total_tokens": 68806240} {"current_steps": 35755, "total_steps": 40000, "loss": 0.0003, "lr": 1.3772768025650945e-06, "epoch": 5.832857492454523, "percentage": 89.39, "elapsed_time": "8:11:58", "remaining_time": "0:58:24", "throughput": 2331.26, "total_tokens": 68815184} {"current_steps": 35760, "total_steps": 40000, "loss": 0.0002, "lr": 1.3740650365198448e-06, "epoch": 5.83367321967534, "percentage": 89.4, "elapsed_time": "8:12:00", "remaining_time": "0:58:20", "throughput": 2331.43, "total_tokens": 68825488} {"current_steps": 35765, "total_steps": 40000, "loss": 0.001, "lr": 1.3708569138906612e-06, "epoch": 5.834488946896158, "percentage": 89.41, "elapsed_time": "8:12:02", "remaining_time": "0:58:15", "throughput": 2331.57, "total_tokens": 68834560} {"current_steps": 35770, "total_steps": 40000, "loss": 0.0003, "lr": 1.367652435172287e-06, "epoch": 5.835304674116975, "percentage": 89.42, "elapsed_time": "8:12:04", "remaining_time": "0:58:11", "throughput": 2331.76, "total_tokens": 68845072} {"current_steps": 35775, "total_steps": 40000, "loss": 0.0002, "lr": 1.364451600858893e-06, "epoch": 5.8361204013377925, "percentage": 89.44, "elapsed_time": "8:12:07", "remaining_time": "0:58:07", "throughput": 2331.94, "total_tokens": 68855392} {"current_steps": 35780, "total_steps": 40000, "loss": 0.0006, "lr": 1.3612544114440823e-06, "epoch": 5.83693612855861, "percentage": 89.45, "elapsed_time": "8:12:09", "remaining_time": "0:58:02", "throughput": 2332.13, "total_tokens": 68866128} {"current_steps": 35785, "total_steps": 40000, "loss": 0.0001, "lr": 1.3580608674209072e-06, "epoch": 5.837751855779428, "percentage": 89.46, "elapsed_time": "8:12:11", "remaining_time": "0:57:58", "throughput": 2332.28, "total_tokens": 68875280} {"current_steps": 35790, "total_steps": 40000, "loss": 0.0033, "lr": 1.3548709692818434e-06, "epoch": 5.838567583000245, "percentage": 89.48, "elapsed_time": "8:12:13", "remaining_time": "0:57:54", "throughput": 2332.37, "total_tokens": 68883088} {"current_steps": 35795, "total_steps": 40000, "loss": 0.0072, "lr": 1.3516847175188223e-06, "epoch": 5.839383310221062, "percentage": 89.49, "elapsed_time": "8:12:15", "remaining_time": "0:57:49", "throughput": 2332.57, "total_tokens": 68894112} {"current_steps": 35800, "total_steps": 40000, "loss": 0.0003, "lr": 1.348502112623204e-06, "epoch": 5.840199037441879, "percentage": 89.5, "elapsed_time": "8:12:17", "remaining_time": "0:57:45", "throughput": 2332.76, "total_tokens": 68904544} {"current_steps": 35800, "total_steps": 40000, "eval_loss": 0.41697007417678833, "epoch": 5.840199037441879, "percentage": 89.5, "elapsed_time": "8:13:38", "remaining_time": "0:57:54", "throughput": 2326.4, "total_tokens": 68904544} {"current_steps": 35805, "total_steps": 40000, "loss": 0.15, "lr": 1.3453231550857787e-06, "epoch": 5.841014764662697, "percentage": 89.51, "elapsed_time": "8:13:42", "remaining_time": "0:57:50", "throughput": 2326.41, "total_tokens": 68914592} {"current_steps": 35810, "total_steps": 40000, "loss": 0.0001, "lr": 1.3421478453967878e-06, "epoch": 5.841830491883514, "percentage": 89.53, "elapsed_time": "8:13:44", "remaining_time": "0:57:46", "throughput": 2326.58, "total_tokens": 68924256} {"current_steps": 35815, "total_steps": 40000, "loss": 0.0001, "lr": 1.3389761840459065e-06, "epoch": 5.842646219104331, "percentage": 89.54, "elapsed_time": "8:13:46", "remaining_time": "0:57:41", "throughput": 2326.74, "total_tokens": 68934032} {"current_steps": 35820, "total_steps": 40000, "loss": 0.0, "lr": 1.3358081715222376e-06, "epoch": 5.8434619463251485, "percentage": 89.55, "elapsed_time": "8:13:48", "remaining_time": "0:57:37", "throughput": 2326.9, "total_tokens": 68943408} {"current_steps": 35825, "total_steps": 40000, "loss": 0.0003, "lr": 1.3326438083143295e-06, "epoch": 5.8442776735459665, "percentage": 89.56, "elapsed_time": "8:13:50", "remaining_time": "0:57:33", "throughput": 2327.03, "total_tokens": 68952304} {"current_steps": 35830, "total_steps": 40000, "loss": 0.0001, "lr": 1.3294830949101723e-06, "epoch": 5.845093400766784, "percentage": 89.58, "elapsed_time": "8:13:53", "remaining_time": "0:57:28", "throughput": 2327.18, "total_tokens": 68961488} {"current_steps": 35835, "total_steps": 40000, "loss": 0.098, "lr": 1.3263260317971815e-06, "epoch": 5.845909127987601, "percentage": 89.59, "elapsed_time": "8:13:55", "remaining_time": "0:57:24", "throughput": 2327.34, "total_tokens": 68971008} {"current_steps": 35840, "total_steps": 40000, "loss": 0.0001, "lr": 1.3231726194622208e-06, "epoch": 5.846724855208418, "percentage": 89.6, "elapsed_time": "8:13:57", "remaining_time": "0:57:20", "throughput": 2327.46, "total_tokens": 68979376} {"current_steps": 35845, "total_steps": 40000, "loss": 0.0001, "lr": 1.3200228583915814e-06, "epoch": 5.847540582429236, "percentage": 89.61, "elapsed_time": "8:13:59", "remaining_time": "0:57:15", "throughput": 2327.61, "total_tokens": 68988640} {"current_steps": 35850, "total_steps": 40000, "loss": 0.0016, "lr": 1.3168767490709971e-06, "epoch": 5.848356309650053, "percentage": 89.62, "elapsed_time": "8:14:01", "remaining_time": "0:57:11", "throughput": 2327.74, "total_tokens": 68997440} {"current_steps": 35855, "total_steps": 40000, "loss": 0.0001, "lr": 1.3137342919856437e-06, "epoch": 5.84917203687087, "percentage": 89.64, "elapsed_time": "8:14:03", "remaining_time": "0:57:06", "throughput": 2327.9, "total_tokens": 69006896} {"current_steps": 35860, "total_steps": 40000, "loss": 0.0877, "lr": 1.310595487620117e-06, "epoch": 5.849987764091688, "percentage": 89.65, "elapsed_time": "8:14:05", "remaining_time": "0:57:02", "throughput": 2328.06, "total_tokens": 69016464} {"current_steps": 35865, "total_steps": 40000, "loss": 0.1919, "lr": 1.3074603364584715e-06, "epoch": 5.850803491312505, "percentage": 89.66, "elapsed_time": "8:14:07", "remaining_time": "0:56:58", "throughput": 2328.2, "total_tokens": 69025424} {"current_steps": 35870, "total_steps": 40000, "loss": 0.0338, "lr": 1.3043288389841758e-06, "epoch": 5.8516192185333225, "percentage": 89.68, "elapsed_time": "8:14:09", "remaining_time": "0:56:53", "throughput": 2328.38, "total_tokens": 69035440} {"current_steps": 35875, "total_steps": 40000, "loss": 0.0005, "lr": 1.3012009956801546e-06, "epoch": 5.85243494575414, "percentage": 89.69, "elapsed_time": "8:14:11", "remaining_time": "0:56:49", "throughput": 2328.51, "total_tokens": 69044128} {"current_steps": 35880, "total_steps": 40000, "loss": 0.0008, "lr": 1.2980768070287586e-06, "epoch": 5.853250672974957, "percentage": 89.7, "elapsed_time": "8:14:13", "remaining_time": "0:56:45", "throughput": 2328.7, "total_tokens": 69054752} {"current_steps": 35885, "total_steps": 40000, "loss": 0.0001, "lr": 1.2949562735117716e-06, "epoch": 5.854066400195775, "percentage": 89.71, "elapsed_time": "8:14:15", "remaining_time": "0:56:40", "throughput": 2328.82, "total_tokens": 69063216} {"current_steps": 35890, "total_steps": 40000, "loss": 0.0, "lr": 1.291839395610428e-06, "epoch": 5.854882127416592, "percentage": 89.72, "elapsed_time": "8:14:17", "remaining_time": "0:56:36", "throughput": 2328.97, "total_tokens": 69072288} {"current_steps": 35895, "total_steps": 40000, "loss": 0.0027, "lr": 1.2887261738053852e-06, "epoch": 5.855697854637409, "percentage": 89.74, "elapsed_time": "8:14:19", "remaining_time": "0:56:31", "throughput": 2329.14, "total_tokens": 69082304} {"current_steps": 35900, "total_steps": 40000, "loss": 0.0, "lr": 1.2856166085767396e-06, "epoch": 5.856513581858227, "percentage": 89.75, "elapsed_time": "8:14:22", "remaining_time": "0:56:27", "throughput": 2329.32, "total_tokens": 69092528} {"current_steps": 35905, "total_steps": 40000, "loss": 0.0006, "lr": 1.2825107004040272e-06, "epoch": 5.857329309079044, "percentage": 89.76, "elapsed_time": "8:14:24", "remaining_time": "0:56:23", "throughput": 2329.44, "total_tokens": 69100800} {"current_steps": 35910, "total_steps": 40000, "loss": 0.0301, "lr": 1.2794084497662146e-06, "epoch": 5.858145036299861, "percentage": 89.78, "elapsed_time": "8:14:26", "remaining_time": "0:56:18", "throughput": 2329.59, "total_tokens": 69110192} {"current_steps": 35915, "total_steps": 40000, "loss": 0.0, "lr": 1.276309857141711e-06, "epoch": 5.858960763520678, "percentage": 89.79, "elapsed_time": "8:14:28", "remaining_time": "0:56:14", "throughput": 2329.76, "total_tokens": 69119808} {"current_steps": 35920, "total_steps": 40000, "loss": 0.0, "lr": 1.273214923008359e-06, "epoch": 5.859776490741496, "percentage": 89.8, "elapsed_time": "8:14:30", "remaining_time": "0:56:10", "throughput": 2329.94, "total_tokens": 69130016} {"current_steps": 35925, "total_steps": 40000, "loss": 0.0003, "lr": 1.2701236478434352e-06, "epoch": 5.8605922179623136, "percentage": 89.81, "elapsed_time": "8:14:32", "remaining_time": "0:56:05", "throughput": 2330.05, "total_tokens": 69138272} {"current_steps": 35930, "total_steps": 40000, "loss": 0.0001, "lr": 1.2670360321236502e-06, "epoch": 5.861407945183131, "percentage": 89.83, "elapsed_time": "8:14:34", "remaining_time": "0:56:01", "throughput": 2330.24, "total_tokens": 69148512} {"current_steps": 35935, "total_steps": 40000, "loss": 0.0609, "lr": 1.2639520763251617e-06, "epoch": 5.862223672403948, "percentage": 89.84, "elapsed_time": "8:14:36", "remaining_time": "0:55:57", "throughput": 2330.42, "total_tokens": 69158784} {"current_steps": 35940, "total_steps": 40000, "loss": 0.0002, "lr": 1.2608717809235448e-06, "epoch": 5.863039399624766, "percentage": 89.85, "elapsed_time": "8:14:38", "remaining_time": "0:55:52", "throughput": 2330.6, "total_tokens": 69168912} {"current_steps": 35945, "total_steps": 40000, "loss": 0.0002, "lr": 1.2577951463938282e-06, "epoch": 5.863855126845583, "percentage": 89.86, "elapsed_time": "8:14:40", "remaining_time": "0:55:48", "throughput": 2330.82, "total_tokens": 69180240} {"current_steps": 35950, "total_steps": 40000, "loss": 0.0008, "lr": 1.2547221732104569e-06, "epoch": 5.8646708540664, "percentage": 89.88, "elapsed_time": "8:14:42", "remaining_time": "0:55:43", "throughput": 2331.01, "total_tokens": 69190752} {"current_steps": 35955, "total_steps": 40000, "loss": 0.0, "lr": 1.25165286184733e-06, "epoch": 5.865486581287217, "percentage": 89.89, "elapsed_time": "8:14:44", "remaining_time": "0:55:39", "throughput": 2331.16, "total_tokens": 69199968} {"current_steps": 35960, "total_steps": 40000, "loss": 0.0001, "lr": 1.248587212777777e-06, "epoch": 5.866302308508035, "percentage": 89.9, "elapsed_time": "8:14:46", "remaining_time": "0:55:35", "throughput": 2331.32, "total_tokens": 69209552} {"current_steps": 35965, "total_steps": 40000, "loss": 0.0007, "lr": 1.2455252264745532e-06, "epoch": 5.867118035728852, "percentage": 89.91, "elapsed_time": "8:14:48", "remaining_time": "0:55:30", "throughput": 2331.47, "total_tokens": 69218976} {"current_steps": 35970, "total_steps": 40000, "loss": 0.0002, "lr": 1.2424669034098528e-06, "epoch": 5.8679337629496695, "percentage": 89.92, "elapsed_time": "8:14:51", "remaining_time": "0:55:26", "throughput": 2331.64, "total_tokens": 69228976} {"current_steps": 35975, "total_steps": 40000, "loss": 0.0001, "lr": 1.2394122440553185e-06, "epoch": 5.868749490170487, "percentage": 89.94, "elapsed_time": "8:14:53", "remaining_time": "0:55:22", "throughput": 2331.8, "total_tokens": 69238416} {"current_steps": 35980, "total_steps": 40000, "loss": 0.0002, "lr": 1.2363612488820037e-06, "epoch": 5.869565217391305, "percentage": 89.95, "elapsed_time": "8:14:55", "remaining_time": "0:55:17", "throughput": 2331.96, "total_tokens": 69248032} {"current_steps": 35985, "total_steps": 40000, "loss": 0.0003, "lr": 1.2333139183604208e-06, "epoch": 5.870380944612122, "percentage": 89.96, "elapsed_time": "8:14:57", "remaining_time": "0:55:13", "throughput": 2332.11, "total_tokens": 69257328} {"current_steps": 35990, "total_steps": 40000, "loss": 0.0037, "lr": 1.2302702529604998e-06, "epoch": 5.871196671832939, "percentage": 89.98, "elapsed_time": "8:14:59", "remaining_time": "0:55:09", "throughput": 2332.28, "total_tokens": 69267056} {"current_steps": 35995, "total_steps": 40000, "loss": 0.0001, "lr": 1.227230253151615e-06, "epoch": 5.872012399053756, "percentage": 89.99, "elapsed_time": "8:15:01", "remaining_time": "0:55:04", "throughput": 2332.48, "total_tokens": 69277760} {"current_steps": 36000, "total_steps": 40000, "loss": 0.0001, "lr": 1.2241939194025748e-06, "epoch": 5.872828126274574, "percentage": 90.0, "elapsed_time": "8:15:03", "remaining_time": "0:55:00", "throughput": 2332.6, "total_tokens": 69286320} {"current_steps": 36000, "total_steps": 40000, "eval_loss": 0.4163505733013153, "epoch": 5.872828126274574, "percentage": 90.0, "elapsed_time": "8:16:24", "remaining_time": "0:55:09", "throughput": 2326.26, "total_tokens": 69286320} {"current_steps": 36005, "total_steps": 40000, "loss": 0.0001, "lr": 1.2211612521816156e-06, "epoch": 5.873643853495391, "percentage": 90.01, "elapsed_time": "8:16:28", "remaining_time": "0:55:05", "throughput": 2326.3, "total_tokens": 69296240} {"current_steps": 36010, "total_steps": 40000, "loss": 0.0, "lr": 1.2181322519564137e-06, "epoch": 5.874459580716208, "percentage": 90.03, "elapsed_time": "8:16:30", "remaining_time": "0:55:00", "throughput": 2326.46, "total_tokens": 69306032} {"current_steps": 36015, "total_steps": 40000, "loss": 0.0, "lr": 1.2151069191940839e-06, "epoch": 5.8752753079370255, "percentage": 90.04, "elapsed_time": "8:16:32", "remaining_time": "0:54:56", "throughput": 2326.68, "total_tokens": 69317312} {"current_steps": 36020, "total_steps": 40000, "loss": 0.0001, "lr": 1.2120852543611644e-06, "epoch": 5.8760910351578435, "percentage": 90.05, "elapsed_time": "8:16:34", "remaining_time": "0:54:52", "throughput": 2326.87, "total_tokens": 69327952} {"current_steps": 36025, "total_steps": 40000, "loss": 0.0006, "lr": 1.2090672579236379e-06, "epoch": 5.876906762378661, "percentage": 90.06, "elapsed_time": "8:16:36", "remaining_time": "0:54:47", "throughput": 2327.04, "total_tokens": 69337728} {"current_steps": 36030, "total_steps": 40000, "loss": 0.0001, "lr": 1.2060529303469126e-06, "epoch": 5.877722489599478, "percentage": 90.08, "elapsed_time": "8:16:38", "remaining_time": "0:54:43", "throughput": 2327.18, "total_tokens": 69346736} {"current_steps": 36035, "total_steps": 40000, "loss": 0.0922, "lr": 1.2030422720958445e-06, "epoch": 5.878538216820296, "percentage": 90.09, "elapsed_time": "8:16:40", "remaining_time": "0:54:39", "throughput": 2327.31, "total_tokens": 69355456} {"current_steps": 36040, "total_steps": 40000, "loss": 0.0001, "lr": 1.200035283634704e-06, "epoch": 5.879353944041113, "percentage": 90.1, "elapsed_time": "8:16:42", "remaining_time": "0:54:34", "throughput": 2327.52, "total_tokens": 69366448} {"current_steps": 36045, "total_steps": 40000, "loss": 0.0002, "lr": 1.1970319654272144e-06, "epoch": 5.88016967126193, "percentage": 90.11, "elapsed_time": "8:16:44", "remaining_time": "0:54:30", "throughput": 2327.66, "total_tokens": 69375488} {"current_steps": 36050, "total_steps": 40000, "loss": 0.0, "lr": 1.1940323179365192e-06, "epoch": 5.880985398482747, "percentage": 90.12, "elapsed_time": "8:16:46", "remaining_time": "0:54:25", "throughput": 2327.79, "total_tokens": 69384240} {"current_steps": 36055, "total_steps": 40000, "loss": 0.0222, "lr": 1.1910363416252095e-06, "epoch": 5.881801125703564, "percentage": 90.14, "elapsed_time": "8:16:48", "remaining_time": "0:54:21", "throughput": 2327.93, "total_tokens": 69393120} {"current_steps": 36060, "total_steps": 40000, "loss": 0.045, "lr": 1.1880440369552964e-06, "epoch": 5.882616852924382, "percentage": 90.15, "elapsed_time": "8:16:51", "remaining_time": "0:54:17", "throughput": 2328.09, "total_tokens": 69402928} {"current_steps": 36065, "total_steps": 40000, "loss": 0.0001, "lr": 1.1850554043882328e-06, "epoch": 5.883432580145199, "percentage": 90.16, "elapsed_time": "8:16:53", "remaining_time": "0:54:12", "throughput": 2328.27, "total_tokens": 69412992} {"current_steps": 36070, "total_steps": 40000, "loss": 0.0017, "lr": 1.1820704443849028e-06, "epoch": 5.884248307366017, "percentage": 90.18, "elapsed_time": "8:16:55", "remaining_time": "0:54:08", "throughput": 2328.45, "total_tokens": 69423200} {"current_steps": 36075, "total_steps": 40000, "loss": 0.0001, "lr": 1.1790891574056219e-06, "epoch": 5.885064034586835, "percentage": 90.19, "elapsed_time": "8:16:57", "remaining_time": "0:54:04", "throughput": 2328.62, "total_tokens": 69433168} {"current_steps": 36080, "total_steps": 40000, "loss": 0.0007, "lr": 1.1761115439101523e-06, "epoch": 5.885879761807652, "percentage": 90.2, "elapsed_time": "8:16:59", "remaining_time": "0:53:59", "throughput": 2328.79, "total_tokens": 69442976} {"current_steps": 36085, "total_steps": 40000, "loss": 0.0, "lr": 1.1731376043576659e-06, "epoch": 5.886695489028469, "percentage": 90.21, "elapsed_time": "8:17:01", "remaining_time": "0:53:55", "throughput": 2328.96, "total_tokens": 69452960} {"current_steps": 36090, "total_steps": 40000, "loss": 0.0003, "lr": 1.1701673392067875e-06, "epoch": 5.887511216249286, "percentage": 90.22, "elapsed_time": "8:17:03", "remaining_time": "0:53:51", "throughput": 2329.11, "total_tokens": 69462080} {"current_steps": 36095, "total_steps": 40000, "loss": 0.0001, "lr": 1.1672007489155757e-06, "epoch": 5.888326943470103, "percentage": 90.24, "elapsed_time": "8:17:05", "remaining_time": "0:53:46", "throughput": 2329.26, "total_tokens": 69471616} {"current_steps": 36100, "total_steps": 40000, "loss": 0.0, "lr": 1.164237833941506e-06, "epoch": 5.889142670690921, "percentage": 90.25, "elapsed_time": "8:17:07", "remaining_time": "0:53:42", "throughput": 2329.45, "total_tokens": 69482048} {"current_steps": 36105, "total_steps": 40000, "loss": 0.0002, "lr": 1.1612785947415022e-06, "epoch": 5.889958397911738, "percentage": 90.26, "elapsed_time": "8:17:09", "remaining_time": "0:53:38", "throughput": 2329.63, "total_tokens": 69492144} {"current_steps": 36110, "total_steps": 40000, "loss": 0.0373, "lr": 1.1583230317719185e-06, "epoch": 5.890774125132555, "percentage": 90.28, "elapsed_time": "8:17:11", "remaining_time": "0:53:33", "throughput": 2329.81, "total_tokens": 69502256} {"current_steps": 36115, "total_steps": 40000, "loss": 0.0, "lr": 1.1553711454885318e-06, "epoch": 5.891589852353373, "percentage": 90.29, "elapsed_time": "8:17:13", "remaining_time": "0:53:29", "throughput": 2329.95, "total_tokens": 69511472} {"current_steps": 36120, "total_steps": 40000, "loss": 0.0001, "lr": 1.152422936346567e-06, "epoch": 5.8924055795741905, "percentage": 90.3, "elapsed_time": "8:17:15", "remaining_time": "0:53:24", "throughput": 2330.1, "total_tokens": 69520768} {"current_steps": 36125, "total_steps": 40000, "loss": 0.0251, "lr": 1.1494784048006718e-06, "epoch": 5.893221306795008, "percentage": 90.31, "elapsed_time": "8:17:18", "remaining_time": "0:53:20", "throughput": 2330.26, "total_tokens": 69530304} {"current_steps": 36130, "total_steps": 40000, "loss": 0.0, "lr": 1.1465375513049326e-06, "epoch": 5.894037034015825, "percentage": 90.33, "elapsed_time": "8:17:20", "remaining_time": "0:53:16", "throughput": 2330.42, "total_tokens": 69539872} {"current_steps": 36135, "total_steps": 40000, "loss": 0.0001, "lr": 1.1436003763128616e-06, "epoch": 5.894852761236643, "percentage": 90.34, "elapsed_time": "8:17:22", "remaining_time": "0:53:11", "throughput": 2330.6, "total_tokens": 69550224} {"current_steps": 36140, "total_steps": 40000, "loss": 0.0001, "lr": 1.1406668802774106e-06, "epoch": 5.89566848845746, "percentage": 90.35, "elapsed_time": "8:17:24", "remaining_time": "0:53:07", "throughput": 2330.77, "total_tokens": 69559936} {"current_steps": 36145, "total_steps": 40000, "loss": 0.0, "lr": 1.137737063650965e-06, "epoch": 5.896484215678277, "percentage": 90.36, "elapsed_time": "8:17:26", "remaining_time": "0:53:03", "throughput": 2330.9, "total_tokens": 69568848} {"current_steps": 36150, "total_steps": 40000, "loss": 0.0204, "lr": 1.1348109268853323e-06, "epoch": 5.897299942899094, "percentage": 90.38, "elapsed_time": "8:17:28", "remaining_time": "0:52:58", "throughput": 2331.11, "total_tokens": 69579824} {"current_steps": 36155, "total_steps": 40000, "loss": 0.0, "lr": 1.1318884704317634e-06, "epoch": 5.898115670119912, "percentage": 90.39, "elapsed_time": "8:17:30", "remaining_time": "0:52:54", "throughput": 2331.3, "total_tokens": 69590336} {"current_steps": 36160, "total_steps": 40000, "loss": 0.0001, "lr": 1.1289696947409417e-06, "epoch": 5.898931397340729, "percentage": 90.4, "elapsed_time": "8:17:32", "remaining_time": "0:52:50", "throughput": 2331.49, "total_tokens": 69601056} {"current_steps": 36165, "total_steps": 40000, "loss": 0.0001, "lr": 1.126054600262974e-06, "epoch": 5.8997471245615465, "percentage": 90.41, "elapsed_time": "8:17:34", "remaining_time": "0:52:45", "throughput": 2331.63, "total_tokens": 69609984} {"current_steps": 36170, "total_steps": 40000, "loss": 0.0, "lr": 1.1231431874474064e-06, "epoch": 5.900562851782364, "percentage": 90.42, "elapsed_time": "8:17:36", "remaining_time": "0:52:41", "throughput": 2331.78, "total_tokens": 69619248} {"current_steps": 36175, "total_steps": 40000, "loss": 0.0, "lr": 1.12023545674321e-06, "epoch": 5.901378579003182, "percentage": 90.44, "elapsed_time": "8:17:38", "remaining_time": "0:52:37", "throughput": 2331.92, "total_tokens": 69628304} {"current_steps": 36180, "total_steps": 40000, "loss": 0.0, "lr": 1.117331408598804e-06, "epoch": 5.902194306223999, "percentage": 90.45, "elapsed_time": "8:17:40", "remaining_time": "0:52:32", "throughput": 2332.06, "total_tokens": 69637184} {"current_steps": 36185, "total_steps": 40000, "loss": 0.0, "lr": 1.1144310434620191e-06, "epoch": 5.903010033444816, "percentage": 90.46, "elapsed_time": "8:17:42", "remaining_time": "0:52:28", "throughput": 2332.25, "total_tokens": 69647664} {"current_steps": 36190, "total_steps": 40000, "loss": 0.0018, "lr": 1.1115343617801365e-06, "epoch": 5.903825760665633, "percentage": 90.48, "elapsed_time": "8:17:44", "remaining_time": "0:52:24", "throughput": 2332.42, "total_tokens": 69657728} {"current_steps": 36195, "total_steps": 40000, "loss": 0.0037, "lr": 1.1086413639998515e-06, "epoch": 5.904641487886451, "percentage": 90.49, "elapsed_time": "8:17:47", "remaining_time": "0:52:19", "throughput": 2332.6, "total_tokens": 69667952} {"current_steps": 36200, "total_steps": 40000, "loss": 0.1023, "lr": 1.1057520505673103e-06, "epoch": 5.905457215107268, "percentage": 90.5, "elapsed_time": "8:17:49", "remaining_time": "0:52:15", "throughput": 2332.73, "total_tokens": 69676640} {"current_steps": 36200, "total_steps": 40000, "eval_loss": 0.4185701906681061, "epoch": 5.905457215107268, "percentage": 90.5, "elapsed_time": "8:19:09", "remaining_time": "0:52:23", "throughput": 2326.44, "total_tokens": 69676640} {"current_steps": 36205, "total_steps": 40000, "loss": 0.0001, "lr": 1.1028664219280727e-06, "epoch": 5.906272942328085, "percentage": 90.51, "elapsed_time": "8:19:13", "remaining_time": "0:52:19", "throughput": 2326.48, "total_tokens": 69686592} {"current_steps": 36210, "total_steps": 40000, "loss": 0.0001, "lr": 1.0999844785271468e-06, "epoch": 5.907088669548903, "percentage": 90.53, "elapsed_time": "8:19:15", "remaining_time": "0:52:15", "throughput": 2326.63, "total_tokens": 69695824} {"current_steps": 36215, "total_steps": 40000, "loss": 0.0369, "lr": 1.097106220808955e-06, "epoch": 5.9079043967697205, "percentage": 90.54, "elapsed_time": "8:19:17", "remaining_time": "0:52:11", "throughput": 2326.75, "total_tokens": 69704240} {"current_steps": 36220, "total_steps": 40000, "loss": 0.0, "lr": 1.0942316492173698e-06, "epoch": 5.908720123990538, "percentage": 90.55, "elapsed_time": "8:19:19", "remaining_time": "0:52:06", "throughput": 2326.9, "total_tokens": 69713584} {"current_steps": 36225, "total_steps": 40000, "loss": 0.0034, "lr": 1.0913607641956841e-06, "epoch": 5.909535851211355, "percentage": 90.56, "elapsed_time": "8:19:21", "remaining_time": "0:52:02", "throughput": 2327.06, "total_tokens": 69723328} {"current_steps": 36230, "total_steps": 40000, "loss": 0.0, "lr": 1.0884935661866213e-06, "epoch": 5.910351578432172, "percentage": 90.58, "elapsed_time": "8:19:24", "remaining_time": "0:51:57", "throughput": 2327.21, "total_tokens": 69732736} {"current_steps": 36235, "total_steps": 40000, "loss": 0.002, "lr": 1.0856300556323418e-06, "epoch": 5.91116730565299, "percentage": 90.59, "elapsed_time": "8:19:26", "remaining_time": "0:51:53", "throughput": 2327.38, "total_tokens": 69742528} {"current_steps": 36240, "total_steps": 40000, "loss": 0.001, "lr": 1.0827702329744365e-06, "epoch": 5.911983032873807, "percentage": 90.6, "elapsed_time": "8:19:28", "remaining_time": "0:51:49", "throughput": 2327.51, "total_tokens": 69751232} {"current_steps": 36245, "total_steps": 40000, "loss": 0.0001, "lr": 1.0799140986539197e-06, "epoch": 5.912798760094624, "percentage": 90.61, "elapsed_time": "8:19:30", "remaining_time": "0:51:44", "throughput": 2327.7, "total_tokens": 69761984} {"current_steps": 36250, "total_steps": 40000, "loss": 0.0003, "lr": 1.0770616531112526e-06, "epoch": 5.913614487315442, "percentage": 90.62, "elapsed_time": "8:19:32", "remaining_time": "0:51:40", "throughput": 2327.86, "total_tokens": 69771408} {"current_steps": 36255, "total_steps": 40000, "loss": 0.0017, "lr": 1.0742128967863085e-06, "epoch": 5.914430214536259, "percentage": 90.64, "elapsed_time": "8:19:34", "remaining_time": "0:51:36", "throughput": 2327.98, "total_tokens": 69779920} {"current_steps": 36260, "total_steps": 40000, "loss": 0.0, "lr": 1.071367830118411e-06, "epoch": 5.915245941757076, "percentage": 90.65, "elapsed_time": "8:19:36", "remaining_time": "0:51:31", "throughput": 2328.14, "total_tokens": 69789648} {"current_steps": 36265, "total_steps": 40000, "loss": 0.0002, "lr": 1.068526453546298e-06, "epoch": 5.9160616689778935, "percentage": 90.66, "elapsed_time": "8:19:38", "remaining_time": "0:51:27", "throughput": 2328.33, "total_tokens": 69799936} {"current_steps": 36270, "total_steps": 40000, "loss": 0.0, "lr": 1.0656887675081467e-06, "epoch": 5.916877396198711, "percentage": 90.67, "elapsed_time": "8:19:40", "remaining_time": "0:51:23", "throughput": 2328.51, "total_tokens": 69810096} {"current_steps": 36275, "total_steps": 40000, "loss": 0.0, "lr": 1.0628547724415628e-06, "epoch": 5.917693123419529, "percentage": 90.69, "elapsed_time": "8:19:42", "remaining_time": "0:51:18", "throughput": 2328.62, "total_tokens": 69818256} {"current_steps": 36280, "total_steps": 40000, "loss": 0.0961, "lr": 1.0600244687835881e-06, "epoch": 5.918508850640346, "percentage": 90.7, "elapsed_time": "8:19:44", "remaining_time": "0:51:14", "throughput": 2328.77, "total_tokens": 69827568} {"current_steps": 36285, "total_steps": 40000, "loss": 0.0001, "lr": 1.0571978569706876e-06, "epoch": 5.919324577861163, "percentage": 90.71, "elapsed_time": "8:19:46", "remaining_time": "0:51:10", "throughput": 2328.92, "total_tokens": 69836944} {"current_steps": 36290, "total_steps": 40000, "loss": 0.0109, "lr": 1.0543749374387652e-06, "epoch": 5.920140305081981, "percentage": 90.72, "elapsed_time": "8:19:48", "remaining_time": "0:51:05", "throughput": 2329.05, "total_tokens": 69845680} {"current_steps": 36295, "total_steps": 40000, "loss": 0.1085, "lr": 1.051555710623142e-06, "epoch": 5.920956032302798, "percentage": 90.74, "elapsed_time": "8:19:51", "remaining_time": "0:51:01", "throughput": 2329.22, "total_tokens": 69855840} {"current_steps": 36300, "total_steps": 40000, "loss": 0.0001, "lr": 1.0487401769585847e-06, "epoch": 5.921771759523615, "percentage": 90.75, "elapsed_time": "8:19:53", "remaining_time": "0:50:57", "throughput": 2329.42, "total_tokens": 69866448} {"current_steps": 36305, "total_steps": 40000, "loss": 0.0003, "lr": 1.0459283368792845e-06, "epoch": 5.922587486744432, "percentage": 90.76, "elapsed_time": "8:19:55", "remaining_time": "0:50:52", "throughput": 2329.56, "total_tokens": 69875440} {"current_steps": 36310, "total_steps": 40000, "loss": 0.0, "lr": 1.043120190818858e-06, "epoch": 5.92340321396525, "percentage": 90.77, "elapsed_time": "8:19:57", "remaining_time": "0:50:48", "throughput": 2329.73, "total_tokens": 69885456} {"current_steps": 36315, "total_steps": 40000, "loss": 0.0043, "lr": 1.0403157392103596e-06, "epoch": 5.9242189411860675, "percentage": 90.79, "elapsed_time": "8:19:59", "remaining_time": "0:50:44", "throughput": 2329.9, "total_tokens": 69895600} {"current_steps": 36320, "total_steps": 40000, "loss": 0.019, "lr": 1.0375149824862735e-06, "epoch": 5.925034668406885, "percentage": 90.8, "elapsed_time": "8:20:01", "remaining_time": "0:50:39", "throughput": 2330.07, "total_tokens": 69905680} {"current_steps": 36325, "total_steps": 40000, "loss": 0.0041, "lr": 1.034717921078507e-06, "epoch": 5.925850395627702, "percentage": 90.81, "elapsed_time": "8:20:03", "remaining_time": "0:50:35", "throughput": 2330.25, "total_tokens": 69915840} {"current_steps": 36330, "total_steps": 40000, "loss": 0.0006, "lr": 1.0319245554184009e-06, "epoch": 5.92666612284852, "percentage": 90.83, "elapsed_time": "8:20:05", "remaining_time": "0:50:31", "throughput": 2330.36, "total_tokens": 69924384} {"current_steps": 36335, "total_steps": 40000, "loss": 0.0151, "lr": 1.0291348859367361e-06, "epoch": 5.927481850069337, "percentage": 90.84, "elapsed_time": "8:20:07", "remaining_time": "0:50:26", "throughput": 2330.53, "total_tokens": 69934272} {"current_steps": 36340, "total_steps": 40000, "loss": 0.0008, "lr": 1.0263489130637016e-06, "epoch": 5.928297577290154, "percentage": 90.85, "elapsed_time": "8:20:10", "remaining_time": "0:50:22", "throughput": 2330.7, "total_tokens": 69944336} {"current_steps": 36345, "total_steps": 40000, "loss": 0.0052, "lr": 1.0235666372289427e-06, "epoch": 5.929113304510971, "percentage": 90.86, "elapsed_time": "8:20:12", "remaining_time": "0:50:18", "throughput": 2330.84, "total_tokens": 69953744} {"current_steps": 36350, "total_steps": 40000, "loss": 0.0001, "lr": 1.0207880588615076e-06, "epoch": 5.929929031731789, "percentage": 90.88, "elapsed_time": "8:20:14", "remaining_time": "0:50:13", "throughput": 2330.96, "total_tokens": 69962224} {"current_steps": 36355, "total_steps": 40000, "loss": 0.0, "lr": 1.0180131783898984e-06, "epoch": 5.930744758952606, "percentage": 90.89, "elapsed_time": "8:20:16", "remaining_time": "0:50:09", "throughput": 2331.13, "total_tokens": 69972400} {"current_steps": 36360, "total_steps": 40000, "loss": 0.0, "lr": 1.0152419962420362e-06, "epoch": 5.9315604861734235, "percentage": 90.9, "elapsed_time": "8:20:18", "remaining_time": "0:50:05", "throughput": 2331.3, "total_tokens": 69982448} {"current_steps": 36365, "total_steps": 40000, "loss": 0.0, "lr": 1.0124745128452685e-06, "epoch": 5.932376213394241, "percentage": 90.91, "elapsed_time": "8:20:20", "remaining_time": "0:50:00", "throughput": 2331.45, "total_tokens": 69991968} {"current_steps": 36370, "total_steps": 40000, "loss": 0.0, "lr": 1.0097107286263758e-06, "epoch": 5.933191940615059, "percentage": 90.92, "elapsed_time": "8:20:22", "remaining_time": "0:49:56", "throughput": 2331.58, "total_tokens": 70000880} {"current_steps": 36375, "total_steps": 40000, "loss": 0.0002, "lr": 1.00695064401157e-06, "epoch": 5.934007667835876, "percentage": 90.94, "elapsed_time": "8:20:25", "remaining_time": "0:49:52", "throughput": 2331.69, "total_tokens": 70009136} {"current_steps": 36380, "total_steps": 40000, "loss": 0.0002, "lr": 1.0041942594264886e-06, "epoch": 5.934823395056693, "percentage": 90.95, "elapsed_time": "8:20:27", "remaining_time": "0:49:47", "throughput": 2331.83, "total_tokens": 70018368} {"current_steps": 36385, "total_steps": 40000, "loss": 0.0, "lr": 1.001441575296208e-06, "epoch": 5.935639122277511, "percentage": 90.96, "elapsed_time": "8:20:29", "remaining_time": "0:49:43", "throughput": 2331.95, "total_tokens": 70027024} {"current_steps": 36390, "total_steps": 40000, "loss": 0.1755, "lr": 9.986925920452139e-07, "epoch": 5.936454849498328, "percentage": 90.97, "elapsed_time": "8:20:31", "remaining_time": "0:49:39", "throughput": 2332.14, "total_tokens": 70037536} {"current_steps": 36395, "total_steps": 40000, "loss": 0.0001, "lr": 9.959473100974475e-07, "epoch": 5.937270576719145, "percentage": 90.99, "elapsed_time": "8:20:33", "remaining_time": "0:49:34", "throughput": 2332.3, "total_tokens": 70047360} {"current_steps": 36400, "total_steps": 40000, "loss": 0.0001, "lr": 9.932057298762564e-07, "epoch": 5.938086303939962, "percentage": 91.0, "elapsed_time": "8:20:35", "remaining_time": "0:49:30", "throughput": 2332.45, "total_tokens": 70057024} {"current_steps": 36400, "total_steps": 40000, "eval_loss": 0.42632344365119934, "epoch": 5.938086303939962, "percentage": 91.0, "elapsed_time": "8:21:56", "remaining_time": "0:49:38", "throughput": 2326.19, "total_tokens": 70057024} {"current_steps": 36405, "total_steps": 40000, "loss": 0.0, "lr": 9.90467851804433e-07, "epoch": 5.938902031160779, "percentage": 91.01, "elapsed_time": "8:22:00", "remaining_time": "0:49:34", "throughput": 2326.22, "total_tokens": 70067024} {"current_steps": 36410, "total_steps": 40000, "loss": 0.0816, "lr": 9.877336763041895e-07, "epoch": 5.939717758381597, "percentage": 91.03, "elapsed_time": "8:22:02", "remaining_time": "0:49:30", "throughput": 2326.36, "total_tokens": 70075968} {"current_steps": 36415, "total_steps": 40000, "loss": 0.0001, "lr": 9.850032037971662e-07, "epoch": 5.940533485602415, "percentage": 91.04, "elapsed_time": "8:22:04", "remaining_time": "0:49:25", "throughput": 2326.5, "total_tokens": 70085328} {"current_steps": 36420, "total_steps": 40000, "loss": 0.048, "lr": 9.822764347044406e-07, "epoch": 5.941349212823232, "percentage": 91.05, "elapsed_time": "8:22:06", "remaining_time": "0:49:21", "throughput": 2326.64, "total_tokens": 70094592} {"current_steps": 36425, "total_steps": 40000, "loss": 0.0805, "lr": 9.795533694465175e-07, "epoch": 5.94216494004405, "percentage": 91.06, "elapsed_time": "8:22:09", "remaining_time": "0:49:17", "throughput": 2326.82, "total_tokens": 70104880} {"current_steps": 36430, "total_steps": 40000, "loss": 0.0, "lr": 9.768340084433197e-07, "epoch": 5.942980667264867, "percentage": 91.07, "elapsed_time": "8:22:11", "remaining_time": "0:49:12", "throughput": 2326.89, "total_tokens": 70111904} {"current_steps": 36435, "total_steps": 40000, "loss": 0.0017, "lr": 9.741183521142143e-07, "epoch": 5.943796394485684, "percentage": 91.09, "elapsed_time": "8:22:13", "remaining_time": "0:49:08", "throughput": 2327.0, "total_tokens": 70120224} {"current_steps": 36440, "total_steps": 40000, "loss": 0.0, "lr": 9.714064008779889e-07, "epoch": 5.944612121706501, "percentage": 91.1, "elapsed_time": "8:22:15", "remaining_time": "0:49:04", "throughput": 2327.14, "total_tokens": 70129552} {"current_steps": 36445, "total_steps": 40000, "loss": 0.0, "lr": 9.686981551528584e-07, "epoch": 5.945427848927318, "percentage": 91.11, "elapsed_time": "8:22:17", "remaining_time": "0:48:59", "throughput": 2327.33, "total_tokens": 70140112} {"current_steps": 36450, "total_steps": 40000, "loss": 0.0001, "lr": 9.65993615356467e-07, "epoch": 5.946243576148136, "percentage": 91.12, "elapsed_time": "8:22:19", "remaining_time": "0:48:55", "throughput": 2327.51, "total_tokens": 70150624} {"current_steps": 36455, "total_steps": 40000, "loss": 0.1274, "lr": 9.632927819058917e-07, "epoch": 5.947059303368953, "percentage": 91.14, "elapsed_time": "8:22:21", "remaining_time": "0:48:51", "throughput": 2327.63, "total_tokens": 70159120} {"current_steps": 36460, "total_steps": 40000, "loss": 0.0, "lr": 9.605956552176305e-07, "epoch": 5.9478750305897705, "percentage": 91.15, "elapsed_time": "8:22:24", "remaining_time": "0:48:46", "throughput": 2327.78, "total_tokens": 70168752} {"current_steps": 36465, "total_steps": 40000, "loss": 0.0, "lr": 9.579022357076223e-07, "epoch": 5.9486907578105885, "percentage": 91.16, "elapsed_time": "8:22:26", "remaining_time": "0:48:42", "throughput": 2327.92, "total_tokens": 70177824} {"current_steps": 36470, "total_steps": 40000, "loss": 0.0001, "lr": 9.552125237912158e-07, "epoch": 5.949506485031406, "percentage": 91.17, "elapsed_time": "8:22:28", "remaining_time": "0:48:38", "throughput": 2328.04, "total_tokens": 70186688} {"current_steps": 36475, "total_steps": 40000, "loss": 0.0, "lr": 9.525265198832096e-07, "epoch": 5.950322212252223, "percentage": 91.19, "elapsed_time": "8:22:30", "remaining_time": "0:48:33", "throughput": 2328.2, "total_tokens": 70196384} {"current_steps": 36480, "total_steps": 40000, "loss": 0.0001, "lr": 9.498442243978112e-07, "epoch": 5.95113793947304, "percentage": 91.2, "elapsed_time": "8:22:32", "remaining_time": "0:48:29", "throughput": 2328.34, "total_tokens": 70205584} {"current_steps": 36485, "total_steps": 40000, "loss": 0.0011, "lr": 9.471656377486649e-07, "epoch": 5.951953666693858, "percentage": 91.21, "elapsed_time": "8:22:34", "remaining_time": "0:48:25", "throughput": 2328.51, "total_tokens": 70215600} {"current_steps": 36490, "total_steps": 40000, "loss": 0.1459, "lr": 9.444907603488456e-07, "epoch": 5.952769393914675, "percentage": 91.22, "elapsed_time": "8:22:36", "remaining_time": "0:48:20", "throughput": 2328.67, "total_tokens": 70225408} {"current_steps": 36495, "total_steps": 40000, "loss": 0.0954, "lr": 9.418195926108514e-07, "epoch": 5.953585121135492, "percentage": 91.24, "elapsed_time": "8:22:39", "remaining_time": "0:48:16", "throughput": 2328.8, "total_tokens": 70234592} {"current_steps": 36500, "total_steps": 40000, "loss": 0.0006, "lr": 9.391521349466053e-07, "epoch": 5.954400848356309, "percentage": 91.25, "elapsed_time": "8:22:41", "remaining_time": "0:48:12", "throughput": 2328.92, "total_tokens": 70243024} {"current_steps": 36505, "total_steps": 40000, "loss": 0.0001, "lr": 9.364883877674758e-07, "epoch": 5.955216575577127, "percentage": 91.26, "elapsed_time": "8:22:43", "remaining_time": "0:48:07", "throughput": 2329.07, "total_tokens": 70252672} {"current_steps": 36510, "total_steps": 40000, "loss": 0.0001, "lr": 9.33828351484231e-07, "epoch": 5.9560323027979445, "percentage": 91.27, "elapsed_time": "8:22:45", "remaining_time": "0:48:03", "throughput": 2329.22, "total_tokens": 70262160} {"current_steps": 36515, "total_steps": 40000, "loss": 0.0002, "lr": 9.311720265070906e-07, "epoch": 5.956848030018762, "percentage": 91.29, "elapsed_time": "8:22:47", "remaining_time": "0:47:59", "throughput": 2329.32, "total_tokens": 70270304} {"current_steps": 36520, "total_steps": 40000, "loss": 0.0528, "lr": 9.285194132456931e-07, "epoch": 5.957663757239579, "percentage": 91.3, "elapsed_time": "8:22:49", "remaining_time": "0:47:54", "throughput": 2329.5, "total_tokens": 70280592} {"current_steps": 36525, "total_steps": 40000, "loss": 0.0814, "lr": 9.258705121091032e-07, "epoch": 5.958479484460397, "percentage": 91.31, "elapsed_time": "8:22:51", "remaining_time": "0:47:50", "throughput": 2329.61, "total_tokens": 70288800} {"current_steps": 36530, "total_steps": 40000, "loss": 0.059, "lr": 9.232253235058136e-07, "epoch": 5.959295211681214, "percentage": 91.33, "elapsed_time": "8:22:54", "remaining_time": "0:47:46", "throughput": 2329.77, "total_tokens": 70298608} {"current_steps": 36535, "total_steps": 40000, "loss": 0.0, "lr": 9.205838478437478e-07, "epoch": 5.960110938902031, "percentage": 91.34, "elapsed_time": "8:22:56", "remaining_time": "0:47:41", "throughput": 2329.96, "total_tokens": 70309456} {"current_steps": 36540, "total_steps": 40000, "loss": 0.0529, "lr": 9.179460855302524e-07, "epoch": 5.960926666122848, "percentage": 91.35, "elapsed_time": "8:22:58", "remaining_time": "0:47:37", "throughput": 2330.12, "total_tokens": 70319168} {"current_steps": 36545, "total_steps": 40000, "loss": 0.0, "lr": 9.153120369721046e-07, "epoch": 5.961742393343666, "percentage": 91.36, "elapsed_time": "8:23:00", "remaining_time": "0:47:33", "throughput": 2330.27, "total_tokens": 70328832} {"current_steps": 36550, "total_steps": 40000, "loss": 0.0002, "lr": 9.126817025755103e-07, "epoch": 5.962558120564483, "percentage": 91.38, "elapsed_time": "8:23:02", "remaining_time": "0:47:28", "throughput": 2330.38, "total_tokens": 70337232} {"current_steps": 36555, "total_steps": 40000, "loss": 0.0002, "lr": 9.100550827460947e-07, "epoch": 5.9633738477853, "percentage": 91.39, "elapsed_time": "8:23:04", "remaining_time": "0:47:24", "throughput": 2330.56, "total_tokens": 70347504} {"current_steps": 36560, "total_steps": 40000, "loss": 0.0178, "lr": 9.0743217788892e-07, "epoch": 5.964189575006118, "percentage": 91.4, "elapsed_time": "8:23:06", "remaining_time": "0:47:20", "throughput": 2330.66, "total_tokens": 70355600} {"current_steps": 36565, "total_steps": 40000, "loss": 0.0001, "lr": 9.048129884084683e-07, "epoch": 5.965005302226936, "percentage": 91.41, "elapsed_time": "8:23:09", "remaining_time": "0:47:16", "throughput": 2330.8, "total_tokens": 70364768} {"current_steps": 36570, "total_steps": 40000, "loss": 0.0022, "lr": 9.021975147086553e-07, "epoch": 5.965821029447753, "percentage": 91.42, "elapsed_time": "8:23:11", "remaining_time": "0:47:11", "throughput": 2330.97, "total_tokens": 70374816} {"current_steps": 36575, "total_steps": 40000, "loss": 0.0001, "lr": 8.995857571928141e-07, "epoch": 5.96663675666857, "percentage": 91.44, "elapsed_time": "8:23:13", "remaining_time": "0:47:07", "throughput": 2331.14, "total_tokens": 70385120} {"current_steps": 36580, "total_steps": 40000, "loss": 0.0002, "lr": 8.969777162637139e-07, "epoch": 5.967452483889387, "percentage": 91.45, "elapsed_time": "8:23:15", "remaining_time": "0:47:03", "throughput": 2331.3, "total_tokens": 70394816} {"current_steps": 36585, "total_steps": 40000, "loss": 0.0002, "lr": 8.943733923235525e-07, "epoch": 5.968268211110205, "percentage": 91.46, "elapsed_time": "8:23:17", "remaining_time": "0:46:58", "throughput": 2331.44, "total_tokens": 70404000} {"current_steps": 36590, "total_steps": 40000, "loss": 0.0, "lr": 8.917727857739394e-07, "epoch": 5.969083938331022, "percentage": 91.47, "elapsed_time": "8:23:19", "remaining_time": "0:46:54", "throughput": 2331.57, "total_tokens": 70413008} {"current_steps": 36595, "total_steps": 40000, "loss": 0.0, "lr": 8.891758970159258e-07, "epoch": 5.969899665551839, "percentage": 91.49, "elapsed_time": "8:23:21", "remaining_time": "0:46:50", "throughput": 2331.73, "total_tokens": 70422720} {"current_steps": 36600, "total_steps": 40000, "loss": 0.0, "lr": 8.86582726449986e-07, "epoch": 5.970715392772657, "percentage": 91.5, "elapsed_time": "8:23:24", "remaining_time": "0:46:45", "throughput": 2331.9, "total_tokens": 70432848} {"current_steps": 36600, "total_steps": 40000, "eval_loss": 0.4205092191696167, "epoch": 5.970715392772657, "percentage": 91.5, "elapsed_time": "8:24:44", "remaining_time": "0:46:53", "throughput": 2325.68, "total_tokens": 70432848} {"current_steps": 36605, "total_steps": 40000, "loss": 0.0005, "lr": 8.839932744760165e-07, "epoch": 5.971531119993474, "percentage": 91.51, "elapsed_time": "8:24:48", "remaining_time": "0:46:49", "throughput": 2325.71, "total_tokens": 70442848} {"current_steps": 36610, "total_steps": 40000, "loss": 0.0, "lr": 8.814075414933482e-07, "epoch": 5.9723468472142915, "percentage": 91.53, "elapsed_time": "8:24:50", "remaining_time": "0:46:44", "throughput": 2325.85, "total_tokens": 70452112} {"current_steps": 36615, "total_steps": 40000, "loss": 0.0022, "lr": 8.788255279007257e-07, "epoch": 5.973162574435109, "percentage": 91.54, "elapsed_time": "8:24:53", "remaining_time": "0:46:40", "throughput": 2326.0, "total_tokens": 70461680} {"current_steps": 36620, "total_steps": 40000, "loss": 0.0002, "lr": 8.762472340963362e-07, "epoch": 5.973978301655926, "percentage": 91.55, "elapsed_time": "8:24:55", "remaining_time": "0:46:36", "throughput": 2326.16, "total_tokens": 70471680} {"current_steps": 36625, "total_steps": 40000, "loss": 0.0024, "lr": 8.736726604777811e-07, "epoch": 5.974794028876744, "percentage": 91.56, "elapsed_time": "8:24:57", "remaining_time": "0:46:31", "throughput": 2326.28, "total_tokens": 70480176} {"current_steps": 36630, "total_steps": 40000, "loss": 0.0001, "lr": 8.711018074420901e-07, "epoch": 5.975609756097561, "percentage": 91.57, "elapsed_time": "8:24:59", "remaining_time": "0:46:27", "throughput": 2326.46, "total_tokens": 70490528} {"current_steps": 36635, "total_steps": 40000, "loss": 0.0001, "lr": 8.685346753857209e-07, "epoch": 5.976425483318378, "percentage": 91.59, "elapsed_time": "8:25:01", "remaining_time": "0:46:23", "throughput": 2326.63, "total_tokens": 70500896} {"current_steps": 36640, "total_steps": 40000, "loss": 0.0002, "lr": 8.659712647045654e-07, "epoch": 5.977241210539196, "percentage": 91.6, "elapsed_time": "8:25:03", "remaining_time": "0:46:18", "throughput": 2326.79, "total_tokens": 70510656} {"current_steps": 36645, "total_steps": 40000, "loss": 0.0001, "lr": 8.634115757939209e-07, "epoch": 5.978056937760013, "percentage": 91.61, "elapsed_time": "8:25:05", "remaining_time": "0:46:14", "throughput": 2326.94, "total_tokens": 70520176} {"current_steps": 36650, "total_steps": 40000, "loss": 0.0001, "lr": 8.608556090485387e-07, "epoch": 5.97887266498083, "percentage": 91.62, "elapsed_time": "8:25:08", "remaining_time": "0:46:10", "throughput": 2327.09, "total_tokens": 70529808} {"current_steps": 36655, "total_steps": 40000, "loss": 0.0001, "lr": 8.583033648625671e-07, "epoch": 5.9796883922016475, "percentage": 91.64, "elapsed_time": "8:25:10", "remaining_time": "0:46:06", "throughput": 2327.27, "total_tokens": 70540064} {"current_steps": 36660, "total_steps": 40000, "loss": 0.0, "lr": 8.557548436295998e-07, "epoch": 5.9805041194224655, "percentage": 91.65, "elapsed_time": "8:25:12", "remaining_time": "0:46:01", "throughput": 2327.44, "total_tokens": 70550352} {"current_steps": 36665, "total_steps": 40000, "loss": 0.0003, "lr": 8.532100457426556e-07, "epoch": 5.981319846643283, "percentage": 91.66, "elapsed_time": "8:25:14", "remaining_time": "0:45:57", "throughput": 2327.59, "total_tokens": 70559856} {"current_steps": 36670, "total_steps": 40000, "loss": 0.0, "lr": 8.506689715941679e-07, "epoch": 5.9821355738641, "percentage": 91.67, "elapsed_time": "8:25:16", "remaining_time": "0:45:53", "throughput": 2327.7, "total_tokens": 70568048} {"current_steps": 36675, "total_steps": 40000, "loss": 0.0149, "lr": 8.481316215760011e-07, "epoch": 5.982951301084917, "percentage": 91.69, "elapsed_time": "8:25:18", "remaining_time": "0:45:48", "throughput": 2327.88, "total_tokens": 70578640} {"current_steps": 36680, "total_steps": 40000, "loss": 0.0027, "lr": 8.455979960794558e-07, "epoch": 5.983767028305735, "percentage": 91.7, "elapsed_time": "8:25:20", "remaining_time": "0:45:44", "throughput": 2328.0, "total_tokens": 70587232} {"current_steps": 36685, "total_steps": 40000, "loss": 0.0, "lr": 8.430680954952364e-07, "epoch": 5.984582755526552, "percentage": 91.71, "elapsed_time": "8:25:23", "remaining_time": "0:45:40", "throughput": 2328.16, "total_tokens": 70597088} {"current_steps": 36690, "total_steps": 40000, "loss": 0.1191, "lr": 8.405419202134974e-07, "epoch": 5.985398482747369, "percentage": 91.72, "elapsed_time": "8:25:25", "remaining_time": "0:45:35", "throughput": 2328.32, "total_tokens": 70606992} {"current_steps": 36695, "total_steps": 40000, "loss": 0.047, "lr": 8.380194706237993e-07, "epoch": 5.986214209968186, "percentage": 91.74, "elapsed_time": "8:25:27", "remaining_time": "0:45:31", "throughput": 2328.5, "total_tokens": 70617440} {"current_steps": 36700, "total_steps": 40000, "loss": 0.0001, "lr": 8.355007471151366e-07, "epoch": 5.987029937189004, "percentage": 91.75, "elapsed_time": "8:25:29", "remaining_time": "0:45:27", "throughput": 2328.66, "total_tokens": 70627088} {"current_steps": 36705, "total_steps": 40000, "loss": 0.0001, "lr": 8.329857500759292e-07, "epoch": 5.9878456644098215, "percentage": 91.76, "elapsed_time": "8:25:31", "remaining_time": "0:45:22", "throughput": 2328.83, "total_tokens": 70637296} {"current_steps": 36710, "total_steps": 40000, "loss": 0.0007, "lr": 8.304744798940194e-07, "epoch": 5.988661391630639, "percentage": 91.77, "elapsed_time": "8:25:33", "remaining_time": "0:45:18", "throughput": 2328.99, "total_tokens": 70647104} {"current_steps": 36715, "total_steps": 40000, "loss": 0.069, "lr": 8.279669369566756e-07, "epoch": 5.989477118851456, "percentage": 91.79, "elapsed_time": "8:25:35", "remaining_time": "0:45:14", "throughput": 2329.12, "total_tokens": 70656080} {"current_steps": 36720, "total_steps": 40000, "loss": 0.0001, "lr": 8.254631216505993e-07, "epoch": 5.990292846072274, "percentage": 91.8, "elapsed_time": "8:25:38", "remaining_time": "0:45:09", "throughput": 2329.24, "total_tokens": 70664832} {"current_steps": 36725, "total_steps": 40000, "loss": 0.0, "lr": 8.229630343619038e-07, "epoch": 5.991108573293091, "percentage": 91.81, "elapsed_time": "8:25:40", "remaining_time": "0:45:05", "throughput": 2329.4, "total_tokens": 70674400} {"current_steps": 36730, "total_steps": 40000, "loss": 0.0, "lr": 8.204666754761392e-07, "epoch": 5.991924300513908, "percentage": 91.83, "elapsed_time": "8:25:42", "remaining_time": "0:45:01", "throughput": 2329.58, "total_tokens": 70684848} {"current_steps": 36735, "total_steps": 40000, "loss": 0.0001, "lr": 8.179740453782669e-07, "epoch": 5.992740027734725, "percentage": 91.84, "elapsed_time": "8:25:44", "remaining_time": "0:44:57", "throughput": 2329.72, "total_tokens": 70694224} {"current_steps": 36740, "total_steps": 40000, "loss": 0.0691, "lr": 8.154851444526907e-07, "epoch": 5.993555754955543, "percentage": 91.85, "elapsed_time": "8:25:46", "remaining_time": "0:44:52", "throughput": 2329.91, "total_tokens": 70705168} {"current_steps": 36745, "total_steps": 40000, "loss": 0.0003, "lr": 8.129999730832283e-07, "epoch": 5.99437148217636, "percentage": 91.86, "elapsed_time": "8:25:48", "remaining_time": "0:44:48", "throughput": 2330.06, "total_tokens": 70714576} {"current_steps": 36750, "total_steps": 40000, "loss": 0.0393, "lr": 8.105185316531178e-07, "epoch": 5.995187209397177, "percentage": 91.88, "elapsed_time": "8:25:50", "remaining_time": "0:44:44", "throughput": 2330.17, "total_tokens": 70722832} {"current_steps": 36755, "total_steps": 40000, "loss": 0.0003, "lr": 8.08040820545039e-07, "epoch": 5.9960029366179945, "percentage": 91.89, "elapsed_time": "8:25:53", "remaining_time": "0:44:39", "throughput": 2330.34, "total_tokens": 70732944} {"current_steps": 36760, "total_steps": 40000, "loss": 0.0671, "lr": 8.055668401410782e-07, "epoch": 5.996818663838813, "percentage": 91.9, "elapsed_time": "8:25:55", "remaining_time": "0:44:35", "throughput": 2330.49, "total_tokens": 70742704} {"current_steps": 36765, "total_steps": 40000, "loss": 0.0001, "lr": 8.030965908227578e-07, "epoch": 5.99763439105963, "percentage": 91.91, "elapsed_time": "8:25:57", "remaining_time": "0:44:31", "throughput": 2330.64, "total_tokens": 70752240} {"current_steps": 36770, "total_steps": 40000, "loss": 0.0002, "lr": 8.006300729710203e-07, "epoch": 5.998450118280447, "percentage": 91.92, "elapsed_time": "8:25:59", "remaining_time": "0:44:26", "throughput": 2330.77, "total_tokens": 70761120} {"current_steps": 36775, "total_steps": 40000, "loss": 0.0001, "lr": 7.981672869662337e-07, "epoch": 5.999265845501265, "percentage": 91.94, "elapsed_time": "8:26:01", "remaining_time": "0:44:22", "throughput": 2330.89, "total_tokens": 70769664} {"current_steps": 36780, "total_steps": 40000, "loss": 0.0001, "lr": 7.957082331881888e-07, "epoch": 6.0, "percentage": 91.95, "elapsed_time": "8:26:03", "remaining_time": "0:44:18", "throughput": 2331.01, "total_tokens": 70778304} {"current_steps": 36785, "total_steps": 40000, "loss": 0.0, "lr": 7.932529120161069e-07, "epoch": 6.000815727220817, "percentage": 91.96, "elapsed_time": "8:26:06", "remaining_time": "0:44:13", "throughput": 2331.15, "total_tokens": 70788096} {"current_steps": 36790, "total_steps": 40000, "loss": 0.0, "lr": 7.908013238286243e-07, "epoch": 6.001631454441635, "percentage": 91.97, "elapsed_time": "8:26:08", "remaining_time": "0:44:09", "throughput": 2331.34, "total_tokens": 70798704} {"current_steps": 36795, "total_steps": 40000, "loss": 0.0002, "lr": 7.883534690038136e-07, "epoch": 6.002447181662452, "percentage": 91.99, "elapsed_time": "8:26:10", "remaining_time": "0:44:05", "throughput": 2331.53, "total_tokens": 70809536} {"current_steps": 36800, "total_steps": 40000, "loss": 0.0001, "lr": 7.859093479191559e-07, "epoch": 6.003262908883269, "percentage": 92.0, "elapsed_time": "8:26:12", "remaining_time": "0:44:01", "throughput": 2331.69, "total_tokens": 70819440} {"current_steps": 36800, "total_steps": 40000, "eval_loss": 0.4214898943901062, "epoch": 6.003262908883269, "percentage": 92.0, "elapsed_time": "8:27:33", "remaining_time": "0:44:08", "throughput": 2325.49, "total_tokens": 70819440} {"current_steps": 36805, "total_steps": 40000, "loss": 0.0002, "lr": 7.834689609515722e-07, "epoch": 6.0040786361040865, "percentage": 92.01, "elapsed_time": "8:27:37", "remaining_time": "0:44:03", "throughput": 2325.5, "total_tokens": 70828736} {"current_steps": 36810, "total_steps": 40000, "loss": 0.0006, "lr": 7.810323084774002e-07, "epoch": 6.004894363324905, "percentage": 92.03, "elapsed_time": "8:27:39", "remaining_time": "0:43:59", "throughput": 2325.65, "total_tokens": 70838128} {"current_steps": 36815, "total_steps": 40000, "loss": 0.0001, "lr": 7.785993908723976e-07, "epoch": 6.005710090545722, "percentage": 92.04, "elapsed_time": "8:27:41", "remaining_time": "0:43:55", "throughput": 2325.78, "total_tokens": 70847104} {"current_steps": 36820, "total_steps": 40000, "loss": 0.0002, "lr": 7.761702085117534e-07, "epoch": 6.006525817766539, "percentage": 92.05, "elapsed_time": "8:27:43", "remaining_time": "0:43:51", "throughput": 2325.89, "total_tokens": 70855520} {"current_steps": 36825, "total_steps": 40000, "loss": 0.0, "lr": 7.737447617700844e-07, "epoch": 6.007341544987356, "percentage": 92.06, "elapsed_time": "8:27:45", "remaining_time": "0:43:46", "throughput": 2326.07, "total_tokens": 70865984} {"current_steps": 36830, "total_steps": 40000, "loss": 0.0001, "lr": 7.713230510214136e-07, "epoch": 6.008157272208174, "percentage": 92.07, "elapsed_time": "8:27:48", "remaining_time": "0:43:42", "throughput": 2326.2, "total_tokens": 70874800} {"current_steps": 36835, "total_steps": 40000, "loss": 0.1205, "lr": 7.689050766392092e-07, "epoch": 6.008972999428991, "percentage": 92.09, "elapsed_time": "8:27:50", "remaining_time": "0:43:38", "throughput": 2326.3, "total_tokens": 70882912} {"current_steps": 36840, "total_steps": 40000, "loss": 0.0, "lr": 7.664908389963477e-07, "epoch": 6.009788726649808, "percentage": 92.1, "elapsed_time": "8:27:52", "remaining_time": "0:43:33", "throughput": 2326.47, "total_tokens": 70892960} {"current_steps": 36845, "total_steps": 40000, "loss": 0.0004, "lr": 7.64080338465134e-07, "epoch": 6.010604453870625, "percentage": 92.11, "elapsed_time": "8:27:54", "remaining_time": "0:43:29", "throughput": 2326.59, "total_tokens": 70901744} {"current_steps": 36850, "total_steps": 40000, "loss": 0.0002, "lr": 7.616735754173043e-07, "epoch": 6.011420181091443, "percentage": 92.12, "elapsed_time": "8:27:56", "remaining_time": "0:43:25", "throughput": 2326.76, "total_tokens": 70911872} {"current_steps": 36855, "total_steps": 40000, "loss": 0.0001, "lr": 7.592705502240005e-07, "epoch": 6.0122359083122605, "percentage": 92.14, "elapsed_time": "8:27:58", "remaining_time": "0:43:20", "throughput": 2326.92, "total_tokens": 70921696} {"current_steps": 36860, "total_steps": 40000, "loss": 0.0354, "lr": 7.568712632558095e-07, "epoch": 6.013051635533078, "percentage": 92.15, "elapsed_time": "8:28:00", "remaining_time": "0:43:16", "throughput": 2327.09, "total_tokens": 70932000} {"current_steps": 36865, "total_steps": 40000, "loss": 0.0284, "lr": 7.544757148827297e-07, "epoch": 6.013867362753895, "percentage": 92.16, "elapsed_time": "8:28:03", "remaining_time": "0:43:12", "throughput": 2327.27, "total_tokens": 70942496} {"current_steps": 36870, "total_steps": 40000, "loss": 0.0, "lr": 7.520839054741797e-07, "epoch": 6.014683089974713, "percentage": 92.17, "elapsed_time": "8:28:05", "remaining_time": "0:43:07", "throughput": 2327.47, "total_tokens": 70953536} {"current_steps": 36875, "total_steps": 40000, "loss": 0.0, "lr": 7.496958353990113e-07, "epoch": 6.01549881719553, "percentage": 92.19, "elapsed_time": "8:28:07", "remaining_time": "0:43:03", "throughput": 2327.65, "total_tokens": 70963952} {"current_steps": 36880, "total_steps": 40000, "loss": 0.0002, "lr": 7.473115050254941e-07, "epoch": 6.016314544416347, "percentage": 92.2, "elapsed_time": "8:28:09", "remaining_time": "0:42:59", "throughput": 2327.83, "total_tokens": 70974400} {"current_steps": 36885, "total_steps": 40000, "loss": 0.0001, "lr": 7.449309147213173e-07, "epoch": 6.017130271637164, "percentage": 92.21, "elapsed_time": "8:28:11", "remaining_time": "0:42:55", "throughput": 2328.02, "total_tokens": 70985296} {"current_steps": 36890, "total_steps": 40000, "loss": 0.0001, "lr": 7.425540648536067e-07, "epoch": 6.017945998857982, "percentage": 92.22, "elapsed_time": "8:28:13", "remaining_time": "0:42:50", "throughput": 2328.12, "total_tokens": 70993104} {"current_steps": 36895, "total_steps": 40000, "loss": 0.0, "lr": 7.40180955788894e-07, "epoch": 6.018761726078799, "percentage": 92.24, "elapsed_time": "8:28:15", "remaining_time": "0:42:46", "throughput": 2328.26, "total_tokens": 71002528} {"current_steps": 36900, "total_steps": 40000, "loss": 0.0001, "lr": 7.378115878931474e-07, "epoch": 6.0195774532996165, "percentage": 92.25, "elapsed_time": "8:28:18", "remaining_time": "0:42:42", "throughput": 2328.4, "total_tokens": 71011712} {"current_steps": 36905, "total_steps": 40000, "loss": 0.0002, "lr": 7.354459615317527e-07, "epoch": 6.020393180520434, "percentage": 92.26, "elapsed_time": "8:28:20", "remaining_time": "0:42:37", "throughput": 2328.52, "total_tokens": 71020528} {"current_steps": 36910, "total_steps": 40000, "loss": 0.094, "lr": 7.33084077069518e-07, "epoch": 6.021208907741252, "percentage": 92.27, "elapsed_time": "8:28:22", "remaining_time": "0:42:33", "throughput": 2328.66, "total_tokens": 71029680} {"current_steps": 36915, "total_steps": 40000, "loss": 0.0, "lr": 7.307259348706768e-07, "epoch": 6.022024634962069, "percentage": 92.29, "elapsed_time": "8:28:24", "remaining_time": "0:42:29", "throughput": 2328.75, "total_tokens": 71037536} {"current_steps": 36920, "total_steps": 40000, "loss": 0.0, "lr": 7.283715352988801e-07, "epoch": 6.022840362182886, "percentage": 92.3, "elapsed_time": "8:28:26", "remaining_time": "0:42:24", "throughput": 2328.88, "total_tokens": 71046480} {"current_steps": 36925, "total_steps": 40000, "loss": 0.0025, "lr": 7.260208787172068e-07, "epoch": 6.023656089403703, "percentage": 92.31, "elapsed_time": "8:28:28", "remaining_time": "0:42:20", "throughput": 2329.05, "total_tokens": 71056592} {"current_steps": 36930, "total_steps": 40000, "loss": 0.0001, "lr": 7.23673965488167e-07, "epoch": 6.024471816624521, "percentage": 92.33, "elapsed_time": "8:28:30", "remaining_time": "0:42:16", "throughput": 2329.19, "total_tokens": 71065952} {"current_steps": 36935, "total_steps": 40000, "loss": 0.0001, "lr": 7.213307959736709e-07, "epoch": 6.025287543845338, "percentage": 92.34, "elapsed_time": "8:28:33", "remaining_time": "0:42:12", "throughput": 2329.38, "total_tokens": 71076672} {"current_steps": 36940, "total_steps": 40000, "loss": 0.0, "lr": 7.189913705350715e-07, "epoch": 6.026103271066155, "percentage": 92.35, "elapsed_time": "8:28:35", "remaining_time": "0:42:07", "throughput": 2329.52, "total_tokens": 71085760} {"current_steps": 36945, "total_steps": 40000, "loss": 0.0, "lr": 7.166556895331411e-07, "epoch": 6.026918998286972, "percentage": 92.36, "elapsed_time": "8:28:37", "remaining_time": "0:42:03", "throughput": 2329.69, "total_tokens": 71095920} {"current_steps": 36950, "total_steps": 40000, "loss": 0.0004, "lr": 7.143237533280639e-07, "epoch": 6.02773472550779, "percentage": 92.38, "elapsed_time": "8:28:39", "remaining_time": "0:41:59", "throughput": 2329.83, "total_tokens": 71105264} {"current_steps": 36955, "total_steps": 40000, "loss": 0.0001, "lr": 7.119955622794578e-07, "epoch": 6.028550452728608, "percentage": 92.39, "elapsed_time": "8:28:41", "remaining_time": "0:41:54", "throughput": 2329.98, "total_tokens": 71114736} {"current_steps": 36960, "total_steps": 40000, "loss": 0.0163, "lr": 7.096711167463577e-07, "epoch": 6.029366179949425, "percentage": 92.4, "elapsed_time": "8:28:43", "remaining_time": "0:41:50", "throughput": 2330.14, "total_tokens": 71124736} {"current_steps": 36965, "total_steps": 40000, "loss": 0.0006, "lr": 7.073504170872213e-07, "epoch": 6.030181907170242, "percentage": 92.41, "elapsed_time": "8:28:45", "remaining_time": "0:41:46", "throughput": 2330.31, "total_tokens": 71135136} {"current_steps": 36970, "total_steps": 40000, "loss": 0.0001, "lr": 7.05033463659932e-07, "epoch": 6.03099763439106, "percentage": 92.42, "elapsed_time": "8:28:48", "remaining_time": "0:41:42", "throughput": 2330.43, "total_tokens": 71143680} {"current_steps": 36975, "total_steps": 40000, "loss": 0.0026, "lr": 7.027202568217928e-07, "epoch": 6.031813361611877, "percentage": 92.44, "elapsed_time": "8:28:50", "remaining_time": "0:41:37", "throughput": 2330.61, "total_tokens": 71154176} {"current_steps": 36980, "total_steps": 40000, "loss": 0.0002, "lr": 7.004107969295293e-07, "epoch": 6.032629088832694, "percentage": 92.45, "elapsed_time": "8:28:52", "remaining_time": "0:41:33", "throughput": 2330.74, "total_tokens": 71163104} {"current_steps": 36985, "total_steps": 40000, "loss": 0.0001, "lr": 6.9810508433929e-07, "epoch": 6.033444816053512, "percentage": 92.46, "elapsed_time": "8:28:54", "remaining_time": "0:41:29", "throughput": 2330.92, "total_tokens": 71173616} {"current_steps": 36990, "total_steps": 40000, "loss": 0.0001, "lr": 6.958031194066406e-07, "epoch": 6.034260543274329, "percentage": 92.47, "elapsed_time": "8:28:56", "remaining_time": "0:41:24", "throughput": 2331.06, "total_tokens": 71182832} {"current_steps": 36995, "total_steps": 40000, "loss": 0.0097, "lr": 6.935049024865776e-07, "epoch": 6.035076270495146, "percentage": 92.49, "elapsed_time": "8:28:58", "remaining_time": "0:41:20", "throughput": 2331.24, "total_tokens": 71193280} {"current_steps": 37000, "total_steps": 40000, "loss": 0.0001, "lr": 6.912104339335118e-07, "epoch": 6.0358919977159635, "percentage": 92.5, "elapsed_time": "8:29:00", "remaining_time": "0:41:16", "throughput": 2331.39, "total_tokens": 71203008} {"current_steps": 37000, "total_steps": 40000, "eval_loss": 0.42486536502838135, "epoch": 6.0358919977159635, "percentage": 92.5, "elapsed_time": "8:30:21", "remaining_time": "0:41:22", "throughput": 2325.24, "total_tokens": 71203008} {"current_steps": 37005, "total_steps": 40000, "loss": 0.0001, "lr": 6.889197141012799e-07, "epoch": 6.0367077249367815, "percentage": 92.51, "elapsed_time": "8:30:25", "remaining_time": "0:41:18", "throughput": 2325.21, "total_tokens": 71210928} {"current_steps": 37010, "total_steps": 40000, "loss": 0.0001, "lr": 6.866327433431435e-07, "epoch": 6.037523452157599, "percentage": 92.53, "elapsed_time": "8:30:27", "remaining_time": "0:41:14", "throughput": 2325.34, "total_tokens": 71220064} {"current_steps": 37015, "total_steps": 40000, "loss": 0.0001, "lr": 6.843495220117735e-07, "epoch": 6.038339179378416, "percentage": 92.54, "elapsed_time": "8:30:29", "remaining_time": "0:41:10", "throughput": 2325.52, "total_tokens": 71230384} {"current_steps": 37020, "total_steps": 40000, "loss": 0.0, "lr": 6.820700504592798e-07, "epoch": 6.039154906599233, "percentage": 92.55, "elapsed_time": "8:30:32", "remaining_time": "0:41:05", "throughput": 2325.69, "total_tokens": 71240672} {"current_steps": 37025, "total_steps": 40000, "loss": 0.0449, "lr": 6.797943290371839e-07, "epoch": 6.039970633820051, "percentage": 92.56, "elapsed_time": "8:30:34", "remaining_time": "0:41:01", "throughput": 2325.85, "total_tokens": 71250528} {"current_steps": 37030, "total_steps": 40000, "loss": 0.0049, "lr": 6.775223580964274e-07, "epoch": 6.040786361040868, "percentage": 92.58, "elapsed_time": "8:30:36", "remaining_time": "0:40:57", "throughput": 2326.06, "total_tokens": 71262000} {"current_steps": 37035, "total_steps": 40000, "loss": 0.0001, "lr": 6.7525413798738e-07, "epoch": 6.041602088261685, "percentage": 92.59, "elapsed_time": "8:30:38", "remaining_time": "0:40:52", "throughput": 2326.23, "total_tokens": 71272176} {"current_steps": 37040, "total_steps": 40000, "loss": 0.0, "lr": 6.729896690598259e-07, "epoch": 6.042417815482502, "percentage": 92.6, "elapsed_time": "8:30:40", "remaining_time": "0:40:48", "throughput": 2326.35, "total_tokens": 71280784} {"current_steps": 37045, "total_steps": 40000, "loss": 0.0, "lr": 6.707289516629772e-07, "epoch": 6.04323354270332, "percentage": 92.61, "elapsed_time": "8:30:42", "remaining_time": "0:40:44", "throughput": 2326.5, "total_tokens": 71290576} {"current_steps": 37050, "total_steps": 40000, "loss": 0.0096, "lr": 6.684719861454692e-07, "epoch": 6.0440492699241375, "percentage": 92.62, "elapsed_time": "8:30:44", "remaining_time": "0:40:40", "throughput": 2326.69, "total_tokens": 71301216} {"current_steps": 37055, "total_steps": 40000, "loss": 0.0001, "lr": 6.662187728553481e-07, "epoch": 6.044864997144955, "percentage": 92.64, "elapsed_time": "8:30:47", "remaining_time": "0:40:35", "throughput": 2326.81, "total_tokens": 71310096} {"current_steps": 37060, "total_steps": 40000, "loss": 0.0, "lr": 6.639693121400892e-07, "epoch": 6.045680724365772, "percentage": 92.65, "elapsed_time": "8:30:49", "remaining_time": "0:40:31", "throughput": 2327.01, "total_tokens": 71321200} {"current_steps": 37065, "total_steps": 40000, "loss": 0.0002, "lr": 6.617236043465868e-07, "epoch": 6.04649645158659, "percentage": 92.66, "elapsed_time": "8:30:51", "remaining_time": "0:40:27", "throughput": 2327.19, "total_tokens": 71331552} {"current_steps": 37070, "total_steps": 40000, "loss": 0.0007, "lr": 6.594816498211587e-07, "epoch": 6.047312178807407, "percentage": 92.67, "elapsed_time": "8:30:53", "remaining_time": "0:40:22", "throughput": 2327.33, "total_tokens": 71340672} {"current_steps": 37075, "total_steps": 40000, "loss": 0.0, "lr": 6.572434489095447e-07, "epoch": 6.048127906028224, "percentage": 92.69, "elapsed_time": "8:30:55", "remaining_time": "0:40:18", "throughput": 2327.49, "total_tokens": 71350688} {"current_steps": 37080, "total_steps": 40000, "loss": 0.0004, "lr": 6.550090019568994e-07, "epoch": 6.048943633249041, "percentage": 92.7, "elapsed_time": "8:30:57", "remaining_time": "0:40:14", "throughput": 2327.66, "total_tokens": 71360800} {"current_steps": 37085, "total_steps": 40000, "loss": 0.0001, "lr": 6.527783093078027e-07, "epoch": 6.049759360469859, "percentage": 92.71, "elapsed_time": "8:30:59", "remaining_time": "0:40:09", "throughput": 2327.84, "total_tokens": 71371344} {"current_steps": 37090, "total_steps": 40000, "loss": 0.0551, "lr": 6.5055137130626e-07, "epoch": 6.050575087690676, "percentage": 92.73, "elapsed_time": "8:31:02", "remaining_time": "0:40:05", "throughput": 2327.99, "total_tokens": 71380912} {"current_steps": 37095, "total_steps": 40000, "loss": 0.0, "lr": 6.483281882956854e-07, "epoch": 6.051390814911493, "percentage": 92.74, "elapsed_time": "8:31:04", "remaining_time": "0:40:01", "throughput": 2328.17, "total_tokens": 71391504} {"current_steps": 37100, "total_steps": 40000, "loss": 0.0001, "lr": 6.461087606189298e-07, "epoch": 6.052206542132311, "percentage": 92.75, "elapsed_time": "8:31:06", "remaining_time": "0:39:57", "throughput": 2328.29, "total_tokens": 71400112} {"current_steps": 37105, "total_steps": 40000, "loss": 0.0589, "lr": 6.438930886182554e-07, "epoch": 6.053022269353129, "percentage": 92.76, "elapsed_time": "8:31:08", "remaining_time": "0:39:52", "throughput": 2328.41, "total_tokens": 71408832} {"current_steps": 37110, "total_steps": 40000, "loss": 0.0002, "lr": 6.416811726353417e-07, "epoch": 6.053837996573946, "percentage": 92.77, "elapsed_time": "8:31:10", "remaining_time": "0:39:48", "throughput": 2328.55, "total_tokens": 71418096} {"current_steps": 37115, "total_steps": 40000, "loss": 0.0001, "lr": 6.394730130112991e-07, "epoch": 6.054653723794763, "percentage": 92.79, "elapsed_time": "8:31:13", "remaining_time": "0:39:44", "throughput": 2328.66, "total_tokens": 71427056} {"current_steps": 37120, "total_steps": 40000, "loss": 0.0002, "lr": 6.372686100866471e-07, "epoch": 6.05546945101558, "percentage": 92.8, "elapsed_time": "8:31:15", "remaining_time": "0:39:39", "throughput": 2328.79, "total_tokens": 71436272} {"current_steps": 37125, "total_steps": 40000, "loss": 0.0, "lr": 6.350679642013413e-07, "epoch": 6.056285178236398, "percentage": 92.81, "elapsed_time": "8:31:17", "remaining_time": "0:39:35", "throughput": 2328.94, "total_tokens": 71445808} {"current_steps": 37130, "total_steps": 40000, "loss": 0.0001, "lr": 6.328710756947437e-07, "epoch": 6.057100905457215, "percentage": 92.83, "elapsed_time": "8:31:19", "remaining_time": "0:39:31", "throughput": 2329.09, "total_tokens": 71455408} {"current_steps": 37135, "total_steps": 40000, "loss": 0.0001, "lr": 6.306779449056416e-07, "epoch": 6.057916632678032, "percentage": 92.84, "elapsed_time": "8:31:21", "remaining_time": "0:39:27", "throughput": 2329.24, "total_tokens": 71464960} {"current_steps": 37140, "total_steps": 40000, "loss": 0.0, "lr": 6.284885721722422e-07, "epoch": 6.058732359898849, "percentage": 92.85, "elapsed_time": "8:31:23", "remaining_time": "0:39:22", "throughput": 2329.43, "total_tokens": 71475696} {"current_steps": 37145, "total_steps": 40000, "loss": 0.0, "lr": 6.26302957832181e-07, "epoch": 6.059548087119667, "percentage": 92.86, "elapsed_time": "8:31:25", "remaining_time": "0:39:18", "throughput": 2329.61, "total_tokens": 71486368} {"current_steps": 37150, "total_steps": 40000, "loss": 0.0002, "lr": 6.241211022224997e-07, "epoch": 6.0603638143404845, "percentage": 92.88, "elapsed_time": "8:31:28", "remaining_time": "0:39:14", "throughput": 2329.78, "total_tokens": 71496368} {"current_steps": 37155, "total_steps": 40000, "loss": 0.0, "lr": 6.219430056796732e-07, "epoch": 6.061179541561302, "percentage": 92.89, "elapsed_time": "8:31:30", "remaining_time": "0:39:09", "throughput": 2329.92, "total_tokens": 71505712} {"current_steps": 37160, "total_steps": 40000, "loss": 0.0001, "lr": 6.19768668539586e-07, "epoch": 6.06199526878212, "percentage": 92.9, "elapsed_time": "8:31:32", "remaining_time": "0:39:05", "throughput": 2330.09, "total_tokens": 71515968} {"current_steps": 37165, "total_steps": 40000, "loss": 0.0918, "lr": 6.175980911375528e-07, "epoch": 6.062810996002937, "percentage": 92.91, "elapsed_time": "8:31:34", "remaining_time": "0:39:01", "throughput": 2330.25, "total_tokens": 71525824} {"current_steps": 37170, "total_steps": 40000, "loss": 0.0002, "lr": 6.154312738083034e-07, "epoch": 6.063626723223754, "percentage": 92.92, "elapsed_time": "8:31:36", "remaining_time": "0:38:57", "throughput": 2330.41, "total_tokens": 71535712} {"current_steps": 37175, "total_steps": 40000, "loss": 0.0002, "lr": 6.132682168859843e-07, "epoch": 6.064442450444571, "percentage": 92.94, "elapsed_time": "8:31:38", "remaining_time": "0:38:52", "throughput": 2330.52, "total_tokens": 71544224} {"current_steps": 37180, "total_steps": 40000, "loss": 0.123, "lr": 6.111089207041704e-07, "epoch": 6.065258177665389, "percentage": 92.95, "elapsed_time": "8:31:40", "remaining_time": "0:38:48", "throughput": 2330.65, "total_tokens": 71552992} {"current_steps": 37185, "total_steps": 40000, "loss": 0.0001, "lr": 6.089533855958507e-07, "epoch": 6.066073904886206, "percentage": 92.96, "elapsed_time": "8:31:43", "remaining_time": "0:38:44", "throughput": 2330.78, "total_tokens": 71562176} {"current_steps": 37190, "total_steps": 40000, "loss": 0.0, "lr": 6.068016118934372e-07, "epoch": 6.066889632107023, "percentage": 92.97, "elapsed_time": "8:31:45", "remaining_time": "0:38:40", "throughput": 2330.88, "total_tokens": 71570256} {"current_steps": 37195, "total_steps": 40000, "loss": 0.0, "lr": 6.04653599928759e-07, "epoch": 6.0677053593278405, "percentage": 92.99, "elapsed_time": "8:31:47", "remaining_time": "0:38:35", "throughput": 2331.02, "total_tokens": 71579392} {"current_steps": 37200, "total_steps": 40000, "loss": 0.1001, "lr": 6.025093500330675e-07, "epoch": 6.0685210865486585, "percentage": 93.0, "elapsed_time": "8:31:49", "remaining_time": "0:38:31", "throughput": 2331.16, "total_tokens": 71588672} {"current_steps": 37200, "total_steps": 40000, "eval_loss": 0.42549702525138855, "epoch": 6.0685210865486585, "percentage": 93.0, "elapsed_time": "8:33:10", "remaining_time": "0:38:37", "throughput": 2325.05, "total_tokens": 71588672} {"current_steps": 37205, "total_steps": 40000, "loss": 0.0, "lr": 6.003688625370291e-07, "epoch": 6.069336813769476, "percentage": 93.01, "elapsed_time": "8:33:14", "remaining_time": "0:38:33", "throughput": 2325.05, "total_tokens": 71597856} {"current_steps": 37210, "total_steps": 40000, "loss": 0.0003, "lr": 5.982321377707406e-07, "epoch": 6.070152540990293, "percentage": 93.03, "elapsed_time": "8:33:16", "remaining_time": "0:38:29", "throughput": 2325.2, "total_tokens": 71607360} {"current_steps": 37215, "total_steps": 40000, "loss": 0.0001, "lr": 5.96099176063708e-07, "epoch": 6.07096826821111, "percentage": 93.04, "elapsed_time": "8:33:18", "remaining_time": "0:38:24", "throughput": 2325.33, "total_tokens": 71616368} {"current_steps": 37220, "total_steps": 40000, "loss": 0.0001, "lr": 5.93969977744857e-07, "epoch": 6.071783995431928, "percentage": 93.05, "elapsed_time": "8:33:20", "remaining_time": "0:38:20", "throughput": 2325.51, "total_tokens": 71626864} {"current_steps": 37225, "total_steps": 40000, "loss": 0.0001, "lr": 5.918445431425445e-07, "epoch": 6.072599722652745, "percentage": 93.06, "elapsed_time": "8:33:22", "remaining_time": "0:38:16", "throughput": 2325.63, "total_tokens": 71635472} {"current_steps": 37230, "total_steps": 40000, "loss": 0.0003, "lr": 5.897228725845333e-07, "epoch": 6.073415449873562, "percentage": 93.08, "elapsed_time": "8:33:24", "remaining_time": "0:38:11", "throughput": 2325.78, "total_tokens": 71645168} {"current_steps": 37235, "total_steps": 40000, "loss": 0.0001, "lr": 5.876049663980171e-07, "epoch": 6.074231177094379, "percentage": 93.09, "elapsed_time": "8:33:26", "remaining_time": "0:38:07", "throughput": 2325.9, "total_tokens": 71653776} {"current_steps": 37240, "total_steps": 40000, "loss": 0.0289, "lr": 5.854908249095959e-07, "epoch": 6.075046904315197, "percentage": 93.1, "elapsed_time": "8:33:29", "remaining_time": "0:38:03", "throughput": 2326.01, "total_tokens": 71662368} {"current_steps": 37245, "total_steps": 40000, "loss": 0.0004, "lr": 5.833804484453031e-07, "epoch": 6.0758626315360145, "percentage": 93.11, "elapsed_time": "8:33:31", "remaining_time": "0:37:59", "throughput": 2326.15, "total_tokens": 71671520} {"current_steps": 37250, "total_steps": 40000, "loss": 0.0002, "lr": 5.81273837330587e-07, "epoch": 6.076678358756832, "percentage": 93.12, "elapsed_time": "8:33:33", "remaining_time": "0:37:54", "throughput": 2326.31, "total_tokens": 71681344} {"current_steps": 37255, "total_steps": 40000, "loss": 0.0002, "lr": 5.791709918903071e-07, "epoch": 6.077494085977649, "percentage": 93.14, "elapsed_time": "8:33:35", "remaining_time": "0:37:50", "throughput": 2326.45, "total_tokens": 71690624} {"current_steps": 37260, "total_steps": 40000, "loss": 0.0001, "lr": 5.770719124487483e-07, "epoch": 6.078309813198467, "percentage": 93.15, "elapsed_time": "8:33:37", "remaining_time": "0:37:46", "throughput": 2326.61, "total_tokens": 71700592} {"current_steps": 37265, "total_steps": 40000, "loss": 0.0002, "lr": 5.749765993296241e-07, "epoch": 6.079125540419284, "percentage": 93.16, "elapsed_time": "8:33:39", "remaining_time": "0:37:41", "throughput": 2326.76, "total_tokens": 71710112} {"current_steps": 37270, "total_steps": 40000, "loss": 0.0003, "lr": 5.728850528560509e-07, "epoch": 6.079941267640101, "percentage": 93.17, "elapsed_time": "8:33:41", "remaining_time": "0:37:37", "throughput": 2326.93, "total_tokens": 71720464} {"current_steps": 37275, "total_steps": 40000, "loss": 0.0, "lr": 5.707972733505707e-07, "epoch": 6.080756994860918, "percentage": 93.19, "elapsed_time": "8:33:44", "remaining_time": "0:37:33", "throughput": 2327.08, "total_tokens": 71730240} {"current_steps": 37280, "total_steps": 40000, "loss": 0.0001, "lr": 5.687132611351509e-07, "epoch": 6.081572722081736, "percentage": 93.2, "elapsed_time": "8:33:46", "remaining_time": "0:37:29", "throughput": 2327.24, "total_tokens": 71739888} {"current_steps": 37285, "total_steps": 40000, "loss": 0.0001, "lr": 5.666330165311651e-07, "epoch": 6.082388449302553, "percentage": 93.21, "elapsed_time": "8:33:48", "remaining_time": "0:37:24", "throughput": 2327.39, "total_tokens": 71749552} {"current_steps": 37290, "total_steps": 40000, "loss": 0.0, "lr": 5.645565398594204e-07, "epoch": 6.08320417652337, "percentage": 93.23, "elapsed_time": "8:33:50", "remaining_time": "0:37:20", "throughput": 2327.55, "total_tokens": 71759424} {"current_steps": 37295, "total_steps": 40000, "loss": 0.0003, "lr": 5.624838314401304e-07, "epoch": 6.0840199037441876, "percentage": 93.24, "elapsed_time": "8:33:52", "remaining_time": "0:37:16", "throughput": 2327.66, "total_tokens": 71768080} {"current_steps": 37300, "total_steps": 40000, "loss": 0.0002, "lr": 5.604148915929336e-07, "epoch": 6.084835630965006, "percentage": 93.25, "elapsed_time": "8:33:54", "remaining_time": "0:37:12", "throughput": 2327.83, "total_tokens": 71778208} {"current_steps": 37305, "total_steps": 40000, "loss": 0.0002, "lr": 5.583497206368887e-07, "epoch": 6.085651358185823, "percentage": 93.26, "elapsed_time": "8:33:56", "remaining_time": "0:37:07", "throughput": 2327.97, "total_tokens": 71787552} {"current_steps": 37310, "total_steps": 40000, "loss": 0.0223, "lr": 5.562883188904688e-07, "epoch": 6.08646708540664, "percentage": 93.27, "elapsed_time": "8:33:59", "remaining_time": "0:37:03", "throughput": 2328.15, "total_tokens": 71797888} {"current_steps": 37315, "total_steps": 40000, "loss": 0.0, "lr": 5.542306866715724e-07, "epoch": 6.087282812627457, "percentage": 93.29, "elapsed_time": "8:34:01", "remaining_time": "0:36:59", "throughput": 2328.3, "total_tokens": 71807616} {"current_steps": 37320, "total_steps": 40000, "loss": 0.0014, "lr": 5.52176824297504e-07, "epoch": 6.088098539848275, "percentage": 93.3, "elapsed_time": "8:34:03", "remaining_time": "0:36:54", "throughput": 2328.47, "total_tokens": 71817984} {"current_steps": 37325, "total_steps": 40000, "loss": 0.0002, "lr": 5.501267320850018e-07, "epoch": 6.088914267069092, "percentage": 93.31, "elapsed_time": "8:34:05", "remaining_time": "0:36:50", "throughput": 2328.6, "total_tokens": 71826768} {"current_steps": 37330, "total_steps": 40000, "loss": 0.0001, "lr": 5.480804103502157e-07, "epoch": 6.089729994289909, "percentage": 93.33, "elapsed_time": "8:34:07", "remaining_time": "0:36:46", "throughput": 2328.78, "total_tokens": 71837328} {"current_steps": 37335, "total_steps": 40000, "loss": 0.0004, "lr": 5.460378594087101e-07, "epoch": 6.090545721510727, "percentage": 93.34, "elapsed_time": "8:34:09", "remaining_time": "0:36:42", "throughput": 2328.91, "total_tokens": 71846528} {"current_steps": 37340, "total_steps": 40000, "loss": 0.0, "lr": 5.439990795754773e-07, "epoch": 6.091361448731544, "percentage": 93.35, "elapsed_time": "8:34:11", "remaining_time": "0:36:37", "throughput": 2329.04, "total_tokens": 71855280} {"current_steps": 37345, "total_steps": 40000, "loss": 0.0, "lr": 5.419640711649188e-07, "epoch": 6.0921771759523615, "percentage": 93.36, "elapsed_time": "8:34:14", "remaining_time": "0:36:33", "throughput": 2329.18, "total_tokens": 71864656} {"current_steps": 37350, "total_steps": 40000, "loss": 0.0, "lr": 5.399328344908583e-07, "epoch": 6.092992903173179, "percentage": 93.38, "elapsed_time": "8:34:16", "remaining_time": "0:36:29", "throughput": 2329.33, "total_tokens": 71874400} {"current_steps": 37355, "total_steps": 40000, "loss": 0.0, "lr": 5.379053698665399e-07, "epoch": 6.093808630393997, "percentage": 93.39, "elapsed_time": "8:34:18", "remaining_time": "0:36:24", "throughput": 2329.48, "total_tokens": 71883984} {"current_steps": 37360, "total_steps": 40000, "loss": 0.0004, "lr": 5.358816776046216e-07, "epoch": 6.094624357614814, "percentage": 93.4, "elapsed_time": "8:34:20", "remaining_time": "0:36:20", "throughput": 2329.64, "total_tokens": 71893856} {"current_steps": 37365, "total_steps": 40000, "loss": 0.1048, "lr": 5.338617580171817e-07, "epoch": 6.095440084835631, "percentage": 93.41, "elapsed_time": "8:34:22", "remaining_time": "0:36:16", "throughput": 2329.82, "total_tokens": 71904368} {"current_steps": 37370, "total_steps": 40000, "loss": 0.0003, "lr": 5.318456114157239e-07, "epoch": 6.096255812056448, "percentage": 93.42, "elapsed_time": "8:34:24", "remaining_time": "0:36:12", "throughput": 2329.95, "total_tokens": 71913408} {"current_steps": 37375, "total_steps": 40000, "loss": 0.0878, "lr": 5.298332381111576e-07, "epoch": 6.097071539277266, "percentage": 93.44, "elapsed_time": "8:34:26", "remaining_time": "0:36:07", "throughput": 2330.11, "total_tokens": 71923296} {"current_steps": 37380, "total_steps": 40000, "loss": 0.0, "lr": 5.27824638413818e-07, "epoch": 6.097887266498083, "percentage": 93.45, "elapsed_time": "8:34:29", "remaining_time": "0:36:03", "throughput": 2330.29, "total_tokens": 71934032} {"current_steps": 37385, "total_steps": 40000, "loss": 0.0691, "lr": 5.258198126334546e-07, "epoch": 6.0987029937189, "percentage": 93.46, "elapsed_time": "8:34:31", "remaining_time": "0:35:59", "throughput": 2330.44, "total_tokens": 71943488} {"current_steps": 37390, "total_steps": 40000, "loss": 0.0005, "lr": 5.238187610792367e-07, "epoch": 6.0995187209397175, "percentage": 93.47, "elapsed_time": "8:34:33", "remaining_time": "0:35:55", "throughput": 2330.6, "total_tokens": 71953616} {"current_steps": 37395, "total_steps": 40000, "loss": 0.0001, "lr": 5.218214840597563e-07, "epoch": 6.1003344481605355, "percentage": 93.49, "elapsed_time": "8:34:35", "remaining_time": "0:35:50", "throughput": 2330.74, "total_tokens": 71962928} {"current_steps": 37400, "total_steps": 40000, "loss": 0.0, "lr": 5.198279818830115e-07, "epoch": 6.101150175381353, "percentage": 93.5, "elapsed_time": "8:34:37", "remaining_time": "0:35:46", "throughput": 2330.9, "total_tokens": 71972608} {"current_steps": 37400, "total_steps": 40000, "eval_loss": 0.42471539974212646, "epoch": 6.101150175381353, "percentage": 93.5, "elapsed_time": "8:35:58", "remaining_time": "0:35:52", "throughput": 2324.81, "total_tokens": 71972608} {"current_steps": 37405, "total_steps": 40000, "loss": 0.0002, "lr": 5.178382548564287e-07, "epoch": 6.10196590260217, "percentage": 93.51, "elapsed_time": "8:36:02", "remaining_time": "0:35:48", "throughput": 2324.8, "total_tokens": 71981168} {"current_steps": 37410, "total_steps": 40000, "loss": 0.0001, "lr": 5.15852303286854e-07, "epoch": 6.102781629822987, "percentage": 93.53, "elapsed_time": "8:36:04", "remaining_time": "0:35:43", "throughput": 2324.96, "total_tokens": 71991280} {"current_steps": 37415, "total_steps": 40000, "loss": 0.0001, "lr": 5.138701274805396e-07, "epoch": 6.103597357043805, "percentage": 93.54, "elapsed_time": "8:36:06", "remaining_time": "0:35:39", "throughput": 2325.14, "total_tokens": 72001664} {"current_steps": 37420, "total_steps": 40000, "loss": 0.001, "lr": 5.118917277431606e-07, "epoch": 6.104413084264622, "percentage": 93.55, "elapsed_time": "8:36:08", "remaining_time": "0:35:35", "throughput": 2325.27, "total_tokens": 72010560} {"current_steps": 37425, "total_steps": 40000, "loss": 0.0001, "lr": 5.099171043798145e-07, "epoch": 6.105228811485439, "percentage": 93.56, "elapsed_time": "8:36:10", "remaining_time": "0:35:30", "throughput": 2325.4, "total_tokens": 72019392} {"current_steps": 37430, "total_steps": 40000, "loss": 0.0, "lr": 5.079462576950133e-07, "epoch": 6.106044538706256, "percentage": 93.58, "elapsed_time": "8:36:12", "remaining_time": "0:35:26", "throughput": 2325.53, "total_tokens": 72028224} {"current_steps": 37435, "total_steps": 40000, "loss": 0.0001, "lr": 5.059791879926862e-07, "epoch": 6.106860265927074, "percentage": 93.59, "elapsed_time": "8:36:14", "remaining_time": "0:35:22", "throughput": 2325.69, "total_tokens": 72037968} {"current_steps": 37440, "total_steps": 40000, "loss": 0.0002, "lr": 5.040158955761793e-07, "epoch": 6.1076759931478914, "percentage": 93.6, "elapsed_time": "8:36:16", "remaining_time": "0:35:18", "throughput": 2325.85, "total_tokens": 72047888} {"current_steps": 37445, "total_steps": 40000, "loss": 0.0509, "lr": 5.020563807482559e-07, "epoch": 6.108491720368709, "percentage": 93.61, "elapsed_time": "8:36:19", "remaining_time": "0:35:13", "throughput": 2325.97, "total_tokens": 72056208} {"current_steps": 37450, "total_steps": 40000, "loss": 0.0003, "lr": 5.001006438110995e-07, "epoch": 6.109307447589526, "percentage": 93.62, "elapsed_time": "8:36:21", "remaining_time": "0:35:09", "throughput": 2326.09, "total_tokens": 72064944} {"current_steps": 37455, "total_steps": 40000, "loss": 0.0001, "lr": 4.981486850663075e-07, "epoch": 6.110123174810344, "percentage": 93.64, "elapsed_time": "8:36:23", "remaining_time": "0:35:05", "throughput": 2326.24, "total_tokens": 72074448} {"current_steps": 37460, "total_steps": 40000, "loss": 0.0028, "lr": 4.962005048149005e-07, "epoch": 6.110938902031161, "percentage": 93.65, "elapsed_time": "8:36:25", "remaining_time": "0:35:00", "throughput": 2326.41, "total_tokens": 72084352} {"current_steps": 37465, "total_steps": 40000, "loss": 0.0002, "lr": 4.942561033573073e-07, "epoch": 6.111754629251978, "percentage": 93.66, "elapsed_time": "8:36:27", "remaining_time": "0:34:56", "throughput": 2326.55, "total_tokens": 72093776} {"current_steps": 37470, "total_steps": 40000, "loss": 0.0629, "lr": 4.923154809933827e-07, "epoch": 6.112570356472795, "percentage": 93.67, "elapsed_time": "8:36:29", "remaining_time": "0:34:52", "throughput": 2326.67, "total_tokens": 72102208} {"current_steps": 37475, "total_steps": 40000, "loss": 0.0002, "lr": 4.903786380223957e-07, "epoch": 6.113386083693613, "percentage": 93.69, "elapsed_time": "8:36:31", "remaining_time": "0:34:48", "throughput": 2326.81, "total_tokens": 72111488} {"current_steps": 37480, "total_steps": 40000, "loss": 0.0001, "lr": 4.884455747430266e-07, "epoch": 6.11420181091443, "percentage": 93.7, "elapsed_time": "8:36:33", "remaining_time": "0:34:43", "throughput": 2327.0, "total_tokens": 72121968} {"current_steps": 37485, "total_steps": 40000, "loss": 0.0003, "lr": 4.865162914533816e-07, "epoch": 6.115017538135247, "percentage": 93.71, "elapsed_time": "8:36:35", "remaining_time": "0:34:39", "throughput": 2327.19, "total_tokens": 72132832} {"current_steps": 37490, "total_steps": 40000, "loss": 0.0002, "lr": 4.845907884509809e-07, "epoch": 6.1158332653560645, "percentage": 93.73, "elapsed_time": "8:36:37", "remaining_time": "0:34:35", "throughput": 2327.35, "total_tokens": 72142448} {"current_steps": 37495, "total_steps": 40000, "loss": 0.0003, "lr": 4.82669066032762e-07, "epoch": 6.1166489925768825, "percentage": 93.74, "elapsed_time": "8:36:39", "remaining_time": "0:34:31", "throughput": 2327.48, "total_tokens": 72151504} {"current_steps": 37500, "total_steps": 40000, "loss": 0.0005, "lr": 4.807511244950768e-07, "epoch": 6.1174647197977, "percentage": 93.75, "elapsed_time": "8:36:41", "remaining_time": "0:34:26", "throughput": 2327.65, "total_tokens": 72161472} {"current_steps": 37505, "total_steps": 40000, "loss": 0.0004, "lr": 4.788369641336943e-07, "epoch": 6.118280447018517, "percentage": 93.76, "elapsed_time": "8:36:43", "remaining_time": "0:34:22", "throughput": 2327.8, "total_tokens": 72171056} {"current_steps": 37510, "total_steps": 40000, "loss": 0.0, "lr": 4.769265852438032e-07, "epoch": 6.119096174239334, "percentage": 93.77, "elapsed_time": "8:36:46", "remaining_time": "0:34:18", "throughput": 2327.96, "total_tokens": 72180816} {"current_steps": 37515, "total_steps": 40000, "loss": 0.0001, "lr": 4.750199881200124e-07, "epoch": 6.119911901460152, "percentage": 93.79, "elapsed_time": "8:36:48", "remaining_time": "0:34:13", "throughput": 2328.13, "total_tokens": 72190800} {"current_steps": 37520, "total_steps": 40000, "loss": 0.0, "lr": 4.7311717305633664e-07, "epoch": 6.120727628680969, "percentage": 93.8, "elapsed_time": "8:36:50", "remaining_time": "0:34:09", "throughput": 2328.27, "total_tokens": 72200224} {"current_steps": 37525, "total_steps": 40000, "loss": 0.0856, "lr": 4.7121814034621623e-07, "epoch": 6.121543355901786, "percentage": 93.81, "elapsed_time": "8:36:52", "remaining_time": "0:34:05", "throughput": 2328.46, "total_tokens": 72210912} {"current_steps": 37530, "total_steps": 40000, "loss": 0.0001, "lr": 4.693228902825114e-07, "epoch": 6.122359083122603, "percentage": 93.83, "elapsed_time": "8:36:54", "remaining_time": "0:34:01", "throughput": 2328.63, "total_tokens": 72220960} {"current_steps": 37535, "total_steps": 40000, "loss": 0.0002, "lr": 4.6743142315748277e-07, "epoch": 6.123174810343421, "percentage": 93.84, "elapsed_time": "8:36:56", "remaining_time": "0:33:56", "throughput": 2328.82, "total_tokens": 72231696} {"current_steps": 37540, "total_steps": 40000, "loss": 0.0001, "lr": 4.655437392628276e-07, "epoch": 6.1239905375642385, "percentage": 93.85, "elapsed_time": "8:36:58", "remaining_time": "0:33:52", "throughput": 2328.96, "total_tokens": 72240736} {"current_steps": 37545, "total_steps": 40000, "loss": 0.0003, "lr": 4.636598388896463e-07, "epoch": 6.124806264785056, "percentage": 93.86, "elapsed_time": "8:37:00", "remaining_time": "0:33:48", "throughput": 2329.12, "total_tokens": 72250736} {"current_steps": 37550, "total_steps": 40000, "loss": 0.0, "lr": 4.6177972232845925e-07, "epoch": 6.125621992005874, "percentage": 93.88, "elapsed_time": "8:37:02", "remaining_time": "0:33:44", "throughput": 2329.27, "total_tokens": 72260272} {"current_steps": 37555, "total_steps": 40000, "loss": 0.0001, "lr": 4.5990338986920953e-07, "epoch": 6.126437719226691, "percentage": 93.89, "elapsed_time": "8:37:04", "remaining_time": "0:33:39", "throughput": 2329.45, "total_tokens": 72270432} {"current_steps": 37560, "total_steps": 40000, "loss": 0.0, "lr": 4.5803084180124633e-07, "epoch": 6.127253446447508, "percentage": 93.9, "elapsed_time": "8:37:06", "remaining_time": "0:33:35", "throughput": 2329.59, "total_tokens": 72279840} {"current_steps": 37565, "total_steps": 40000, "loss": 0.0, "lr": 4.561620784133386e-07, "epoch": 6.128069173668325, "percentage": 93.91, "elapsed_time": "8:37:08", "remaining_time": "0:33:31", "throughput": 2329.79, "total_tokens": 72290784} {"current_steps": 37570, "total_steps": 40000, "loss": 0.0, "lr": 4.5429709999367796e-07, "epoch": 6.128884900889143, "percentage": 93.92, "elapsed_time": "8:37:10", "remaining_time": "0:33:27", "throughput": 2329.99, "total_tokens": 72301952} {"current_steps": 37575, "total_steps": 40000, "loss": 0.0006, "lr": 4.5243590682986223e-07, "epoch": 6.12970062810996, "percentage": 93.94, "elapsed_time": "8:37:13", "remaining_time": "0:33:22", "throughput": 2330.12, "total_tokens": 72310704} {"current_steps": 37580, "total_steps": 40000, "loss": 0.0009, "lr": 4.5057849920891735e-07, "epoch": 6.130516355330777, "percentage": 93.95, "elapsed_time": "8:37:15", "remaining_time": "0:33:18", "throughput": 2330.26, "total_tokens": 72319808} {"current_steps": 37585, "total_steps": 40000, "loss": 0.0001, "lr": 4.487248774172698e-07, "epoch": 6.1313320825515945, "percentage": 93.96, "elapsed_time": "8:37:17", "remaining_time": "0:33:14", "throughput": 2330.42, "total_tokens": 72329776} {"current_steps": 37590, "total_steps": 40000, "loss": 0.0003, "lr": 4.4687504174077965e-07, "epoch": 6.1321478097724125, "percentage": 93.97, "elapsed_time": "8:37:19", "remaining_time": "0:33:10", "throughput": 2330.58, "total_tokens": 72339424} {"current_steps": 37595, "total_steps": 40000, "loss": 0.0053, "lr": 4.450289924647133e-07, "epoch": 6.13296353699323, "percentage": 93.99, "elapsed_time": "8:37:21", "remaining_time": "0:33:05", "throughput": 2330.72, "total_tokens": 72348624} {"current_steps": 37600, "total_steps": 40000, "loss": 0.0, "lr": 4.431867298737513e-07, "epoch": 6.133779264214047, "percentage": 94.0, "elapsed_time": "8:37:23", "remaining_time": "0:33:01", "throughput": 2330.87, "total_tokens": 72358032} {"current_steps": 37600, "total_steps": 40000, "eval_loss": 0.4275158941745758, "epoch": 6.133779264214047, "percentage": 94.0, "elapsed_time": "8:38:44", "remaining_time": "0:33:06", "throughput": 2324.81, "total_tokens": 72358032} {"current_steps": 37605, "total_steps": 40000, "loss": 0.0627, "lr": 4.41348254251997e-07, "epoch": 6.134594991434864, "percentage": 94.01, "elapsed_time": "8:38:48", "remaining_time": "0:33:02", "throughput": 2324.82, "total_tokens": 72366992} {"current_steps": 37610, "total_steps": 40000, "loss": 0.0, "lr": 4.395135658829652e-07, "epoch": 6.135410718655682, "percentage": 94.03, "elapsed_time": "8:38:50", "remaining_time": "0:32:58", "throughput": 2324.96, "total_tokens": 72376112} {"current_steps": 37615, "total_steps": 40000, "loss": 0.0001, "lr": 4.376826650495852e-07, "epoch": 6.136226445876499, "percentage": 94.04, "elapsed_time": "8:38:52", "remaining_time": "0:32:53", "throughput": 2325.14, "total_tokens": 72386656} {"current_steps": 37620, "total_steps": 40000, "loss": 0.0, "lr": 4.358555520342117e-07, "epoch": 6.137042173097316, "percentage": 94.05, "elapsed_time": "8:38:54", "remaining_time": "0:32:49", "throughput": 2325.27, "total_tokens": 72395536} {"current_steps": 37625, "total_steps": 40000, "loss": 0.0003, "lr": 4.3403222711860257e-07, "epoch": 6.137857900318133, "percentage": 94.06, "elapsed_time": "8:38:56", "remaining_time": "0:32:45", "throughput": 2325.43, "total_tokens": 72405408} {"current_steps": 37630, "total_steps": 40000, "loss": 0.0, "lr": 4.3221269058394133e-07, "epoch": 6.138673627538951, "percentage": 94.08, "elapsed_time": "8:38:58", "remaining_time": "0:32:41", "throughput": 2325.63, "total_tokens": 72416240} {"current_steps": 37635, "total_steps": 40000, "loss": 0.0001, "lr": 4.303969427108173e-07, "epoch": 6.139489354759768, "percentage": 94.09, "elapsed_time": "8:39:00", "remaining_time": "0:32:36", "throughput": 2325.81, "total_tokens": 72426896} {"current_steps": 37640, "total_steps": 40000, "loss": 0.0, "lr": 4.2858498377924825e-07, "epoch": 6.1403050819805856, "percentage": 94.1, "elapsed_time": "8:39:02", "remaining_time": "0:32:32", "throughput": 2326.01, "total_tokens": 72437952} {"current_steps": 37645, "total_steps": 40000, "loss": 0.0003, "lr": 4.267768140686579e-07, "epoch": 6.141120809201403, "percentage": 94.11, "elapsed_time": "8:39:04", "remaining_time": "0:32:28", "throughput": 2326.17, "total_tokens": 72447552} {"current_steps": 37650, "total_steps": 40000, "loss": 0.0005, "lr": 4.2497243385788975e-07, "epoch": 6.141936536422221, "percentage": 94.12, "elapsed_time": "8:39:06", "remaining_time": "0:32:24", "throughput": 2326.31, "total_tokens": 72456800} {"current_steps": 37655, "total_steps": 40000, "loss": 0.0001, "lr": 4.231718434251991e-07, "epoch": 6.142752263643038, "percentage": 94.14, "elapsed_time": "8:39:08", "remaining_time": "0:32:19", "throughput": 2326.5, "total_tokens": 72467520} {"current_steps": 37660, "total_steps": 40000, "loss": 0.0002, "lr": 4.213750430482666e-07, "epoch": 6.143567990863855, "percentage": 94.15, "elapsed_time": "8:39:10", "remaining_time": "0:32:15", "throughput": 2326.64, "total_tokens": 72476816} {"current_steps": 37665, "total_steps": 40000, "loss": 0.0001, "lr": 4.1958203300417054e-07, "epoch": 6.144383718084672, "percentage": 94.16, "elapsed_time": "8:39:12", "remaining_time": "0:32:11", "throughput": 2326.79, "total_tokens": 72486208} {"current_steps": 37670, "total_steps": 40000, "loss": 0.0002, "lr": 4.177928135694259e-07, "epoch": 6.14519944530549, "percentage": 94.17, "elapsed_time": "8:39:14", "remaining_time": "0:32:07", "throughput": 2326.98, "total_tokens": 72496896} {"current_steps": 37675, "total_steps": 40000, "loss": 0.0, "lr": 4.1600738501994807e-07, "epoch": 6.146015172526307, "percentage": 94.19, "elapsed_time": "8:39:17", "remaining_time": "0:32:02", "throughput": 2327.14, "total_tokens": 72506816} {"current_steps": 37680, "total_steps": 40000, "loss": 0.0, "lr": 4.1422574763107237e-07, "epoch": 6.146830899747124, "percentage": 94.2, "elapsed_time": "8:39:19", "remaining_time": "0:31:58", "throughput": 2327.3, "total_tokens": 72516528} {"current_steps": 37685, "total_steps": 40000, "loss": 0.0002, "lr": 4.124479016775512e-07, "epoch": 6.1476466269679415, "percentage": 94.21, "elapsed_time": "8:39:21", "remaining_time": "0:31:54", "throughput": 2327.47, "total_tokens": 72526656} {"current_steps": 37690, "total_steps": 40000, "loss": 0.0001, "lr": 4.106738474335514e-07, "epoch": 6.1484623541887595, "percentage": 94.23, "elapsed_time": "8:39:23", "remaining_time": "0:31:49", "throughput": 2327.62, "total_tokens": 72536320} {"current_steps": 37695, "total_steps": 40000, "loss": 0.0001, "lr": 4.089035851726486e-07, "epoch": 6.149278081409577, "percentage": 94.24, "elapsed_time": "8:39:25", "remaining_time": "0:31:45", "throughput": 2327.76, "total_tokens": 72545392} {"current_steps": 37700, "total_steps": 40000, "loss": 0.0, "lr": 4.0713711516784937e-07, "epoch": 6.150093808630394, "percentage": 94.25, "elapsed_time": "8:39:27", "remaining_time": "0:31:41", "throughput": 2327.94, "total_tokens": 72555840} {"current_steps": 37705, "total_steps": 40000, "loss": 0.0001, "lr": 4.05374437691558e-07, "epoch": 6.150909535851211, "percentage": 94.26, "elapsed_time": "8:39:29", "remaining_time": "0:31:37", "throughput": 2328.09, "total_tokens": 72565232} {"current_steps": 37710, "total_steps": 40000, "loss": 0.0001, "lr": 4.036155530156044e-07, "epoch": 6.151725263072029, "percentage": 94.27, "elapsed_time": "8:39:31", "remaining_time": "0:31:32", "throughput": 2328.24, "total_tokens": 72574944} {"current_steps": 37715, "total_steps": 40000, "loss": 0.0003, "lr": 4.018604614112298e-07, "epoch": 6.152540990292846, "percentage": 94.29, "elapsed_time": "8:39:33", "remaining_time": "0:31:28", "throughput": 2328.41, "total_tokens": 72585088} {"current_steps": 37720, "total_steps": 40000, "loss": 0.0021, "lr": 4.0010916314908996e-07, "epoch": 6.153356717513663, "percentage": 94.3, "elapsed_time": "8:39:35", "remaining_time": "0:31:24", "throughput": 2328.61, "total_tokens": 72595920} {"current_steps": 37725, "total_steps": 40000, "loss": 0.0176, "lr": 3.983616584992578e-07, "epoch": 6.154172444734481, "percentage": 94.31, "elapsed_time": "8:39:37", "remaining_time": "0:31:20", "throughput": 2328.77, "total_tokens": 72605872} {"current_steps": 37730, "total_steps": 40000, "loss": 0.0, "lr": 3.9661794773122595e-07, "epoch": 6.154988171955298, "percentage": 94.33, "elapsed_time": "8:39:39", "remaining_time": "0:31:15", "throughput": 2328.92, "total_tokens": 72615328} {"current_steps": 37735, "total_steps": 40000, "loss": 0.0001, "lr": 3.9487803111388777e-07, "epoch": 6.1558038991761155, "percentage": 94.34, "elapsed_time": "8:39:41", "remaining_time": "0:31:11", "throughput": 2329.08, "total_tokens": 72625168} {"current_steps": 37740, "total_steps": 40000, "loss": 0.0, "lr": 3.9314190891556747e-07, "epoch": 6.156619626396933, "percentage": 94.35, "elapsed_time": "8:39:44", "remaining_time": "0:31:07", "throughput": 2329.21, "total_tokens": 72634128} {"current_steps": 37745, "total_steps": 40000, "loss": 0.0004, "lr": 3.914095814039925e-07, "epoch": 6.157435353617751, "percentage": 94.36, "elapsed_time": "8:39:46", "remaining_time": "0:31:03", "throughput": 2329.36, "total_tokens": 72643712} {"current_steps": 37750, "total_steps": 40000, "loss": 0.0003, "lr": 3.896810488463104e-07, "epoch": 6.158251080838568, "percentage": 94.38, "elapsed_time": "8:39:48", "remaining_time": "0:30:58", "throughput": 2329.54, "total_tokens": 72653984} {"current_steps": 37755, "total_steps": 40000, "loss": 0.0, "lr": 3.8795631150908565e-07, "epoch": 6.159066808059385, "percentage": 94.39, "elapsed_time": "8:39:50", "remaining_time": "0:30:54", "throughput": 2329.71, "total_tokens": 72664272} {"current_steps": 37760, "total_steps": 40000, "loss": 0.0001, "lr": 3.862353696582888e-07, "epoch": 6.159882535280202, "percentage": 94.4, "elapsed_time": "8:39:52", "remaining_time": "0:30:50", "throughput": 2329.84, "total_tokens": 72673232} {"current_steps": 37765, "total_steps": 40000, "loss": 0.0001, "lr": 3.8451822355931313e-07, "epoch": 6.16069826250102, "percentage": 94.41, "elapsed_time": "8:39:54", "remaining_time": "0:30:46", "throughput": 2330.0, "total_tokens": 72682848} {"current_steps": 37770, "total_steps": 40000, "loss": 0.0006, "lr": 3.82804873476969e-07, "epoch": 6.161513989721837, "percentage": 94.42, "elapsed_time": "8:39:56", "remaining_time": "0:30:41", "throughput": 2330.18, "total_tokens": 72693344} {"current_steps": 37775, "total_steps": 40000, "loss": 0.0, "lr": 3.810953196754702e-07, "epoch": 6.162329716942654, "percentage": 94.44, "elapsed_time": "8:39:58", "remaining_time": "0:30:37", "throughput": 2330.32, "total_tokens": 72702448} {"current_steps": 37780, "total_steps": 40000, "loss": 0.0, "lr": 3.793895624184529e-07, "epoch": 6.163145444163471, "percentage": 94.45, "elapsed_time": "8:40:00", "remaining_time": "0:30:33", "throughput": 2330.47, "total_tokens": 72712208} {"current_steps": 37785, "total_steps": 40000, "loss": 0.0003, "lr": 3.776876019689679e-07, "epoch": 6.1639611713842895, "percentage": 94.46, "elapsed_time": "8:40:02", "remaining_time": "0:30:29", "throughput": 2330.63, "total_tokens": 72722064} {"current_steps": 37790, "total_steps": 40000, "loss": 0.0034, "lr": 3.7598943858947743e-07, "epoch": 6.164776898605107, "percentage": 94.47, "elapsed_time": "8:40:04", "remaining_time": "0:30:24", "throughput": 2330.76, "total_tokens": 72730864} {"current_steps": 37795, "total_steps": 40000, "loss": 0.0007, "lr": 3.742950725418637e-07, "epoch": 6.165592625825924, "percentage": 94.49, "elapsed_time": "8:40:06", "remaining_time": "0:30:20", "throughput": 2330.92, "total_tokens": 72740576} {"current_steps": 37800, "total_steps": 40000, "loss": 0.0205, "lr": 3.726045040874093e-07, "epoch": 6.166408353046741, "percentage": 94.5, "elapsed_time": "8:40:08", "remaining_time": "0:30:16", "throughput": 2331.06, "total_tokens": 72749840} {"current_steps": 37800, "total_steps": 40000, "eval_loss": 0.43063226342201233, "epoch": 6.166408353046741, "percentage": 94.5, "elapsed_time": "8:41:29", "remaining_time": "0:30:21", "throughput": 2325.03, "total_tokens": 72749840} {"current_steps": 37805, "total_steps": 40000, "loss": 0.0001, "lr": 3.709177334868308e-07, "epoch": 6.167224080267559, "percentage": 94.51, "elapsed_time": "8:41:33", "remaining_time": "0:30:16", "throughput": 2325.06, "total_tokens": 72759584} {"current_steps": 37810, "total_steps": 40000, "loss": 0.0002, "lr": 3.692347610002478e-07, "epoch": 6.168039807488376, "percentage": 94.53, "elapsed_time": "8:41:35", "remaining_time": "0:30:12", "throughput": 2325.2, "total_tokens": 72768576} {"current_steps": 37815, "total_steps": 40000, "loss": 0.0, "lr": 3.675555868871916e-07, "epoch": 6.168855534709193, "percentage": 94.54, "elapsed_time": "8:41:37", "remaining_time": "0:30:08", "throughput": 2325.32, "total_tokens": 72777216} {"current_steps": 37820, "total_steps": 40000, "loss": 0.0002, "lr": 3.658802114066162e-07, "epoch": 6.16967126193001, "percentage": 94.55, "elapsed_time": "8:41:39", "remaining_time": "0:30:04", "throughput": 2325.5, "total_tokens": 72787584} {"current_steps": 37825, "total_steps": 40000, "loss": 0.0001, "lr": 3.6420863481688437e-07, "epoch": 6.170486989150828, "percentage": 94.56, "elapsed_time": "8:41:41", "remaining_time": "0:29:59", "throughput": 2325.65, "total_tokens": 72797328} {"current_steps": 37830, "total_steps": 40000, "loss": 0.0, "lr": 3.625408573757705e-07, "epoch": 6.171302716371645, "percentage": 94.58, "elapsed_time": "8:41:43", "remaining_time": "0:29:55", "throughput": 2325.81, "total_tokens": 72807056} {"current_steps": 37835, "total_steps": 40000, "loss": 0.0002, "lr": 3.608768793404743e-07, "epoch": 6.1721184435924625, "percentage": 94.59, "elapsed_time": "8:41:46", "remaining_time": "0:29:51", "throughput": 2325.95, "total_tokens": 72816288} {"current_steps": 37840, "total_steps": 40000, "loss": 0.0, "lr": 3.592167009675934e-07, "epoch": 6.17293417081328, "percentage": 94.6, "elapsed_time": "8:41:48", "remaining_time": "0:29:47", "throughput": 2326.1, "total_tokens": 72825888} {"current_steps": 37845, "total_steps": 40000, "loss": 0.0, "lr": 3.575603225131563e-07, "epoch": 6.173749898034098, "percentage": 94.61, "elapsed_time": "8:41:50", "remaining_time": "0:29:42", "throughput": 2326.25, "total_tokens": 72835232} {"current_steps": 37850, "total_steps": 40000, "loss": 0.0489, "lr": 3.55907744232592e-07, "epoch": 6.174565625254915, "percentage": 94.62, "elapsed_time": "8:41:52", "remaining_time": "0:29:38", "throughput": 2326.41, "total_tokens": 72845168} {"current_steps": 37855, "total_steps": 40000, "loss": 0.0001, "lr": 3.5425896638075217e-07, "epoch": 6.175381352475732, "percentage": 94.64, "elapsed_time": "8:41:54", "remaining_time": "0:29:34", "throughput": 2326.55, "total_tokens": 72854240} {"current_steps": 37860, "total_steps": 40000, "loss": 0.0, "lr": 3.5261398921189736e-07, "epoch": 6.176197079696549, "percentage": 94.65, "elapsed_time": "8:41:56", "remaining_time": "0:29:30", "throughput": 2326.67, "total_tokens": 72863024} {"current_steps": 37865, "total_steps": 40000, "loss": 0.0004, "lr": 3.509728129797024e-07, "epoch": 6.177012806917367, "percentage": 94.66, "elapsed_time": "8:41:58", "remaining_time": "0:29:25", "throughput": 2326.84, "total_tokens": 72873056} {"current_steps": 37870, "total_steps": 40000, "loss": 0.0005, "lr": 3.4933543793725656e-07, "epoch": 6.177828534138184, "percentage": 94.67, "elapsed_time": "8:42:00", "remaining_time": "0:29:21", "throughput": 2326.99, "total_tokens": 72882768} {"current_steps": 37875, "total_steps": 40000, "loss": 0.0001, "lr": 3.4770186433707163e-07, "epoch": 6.178644261359001, "percentage": 94.69, "elapsed_time": "8:42:02", "remaining_time": "0:29:17", "throughput": 2327.16, "total_tokens": 72892832} {"current_steps": 37880, "total_steps": 40000, "loss": 0.0002, "lr": 3.4607209243105453e-07, "epoch": 6.1794599885798185, "percentage": 94.7, "elapsed_time": "8:42:04", "remaining_time": "0:29:13", "throughput": 2327.31, "total_tokens": 72902480} {"current_steps": 37885, "total_steps": 40000, "loss": 0.0001, "lr": 3.444461224705431e-07, "epoch": 6.1802757158006365, "percentage": 94.71, "elapsed_time": "8:42:06", "remaining_time": "0:29:08", "throughput": 2327.47, "total_tokens": 72912208} {"current_steps": 37890, "total_steps": 40000, "loss": 0.0001, "lr": 3.4282395470628116e-07, "epoch": 6.181091443021454, "percentage": 94.73, "elapsed_time": "8:42:08", "remaining_time": "0:29:04", "throughput": 2327.6, "total_tokens": 72920992} {"current_steps": 37895, "total_steps": 40000, "loss": 0.0002, "lr": 3.4120558938842417e-07, "epoch": 6.181907170242271, "percentage": 94.74, "elapsed_time": "8:42:10", "remaining_time": "0:29:00", "throughput": 2327.76, "total_tokens": 72930880} {"current_steps": 37900, "total_steps": 40000, "loss": 0.0, "lr": 3.395910267665503e-07, "epoch": 6.182722897463089, "percentage": 94.75, "elapsed_time": "8:42:13", "remaining_time": "0:28:56", "throughput": 2327.91, "total_tokens": 72940512} {"current_steps": 37905, "total_steps": 40000, "loss": 0.0001, "lr": 3.3798026708964094e-07, "epoch": 6.183538624683906, "percentage": 94.76, "elapsed_time": "8:42:15", "remaining_time": "0:28:51", "throughput": 2328.06, "total_tokens": 72950144} {"current_steps": 37910, "total_steps": 40000, "loss": 0.0001, "lr": 3.3637331060609456e-07, "epoch": 6.184354351904723, "percentage": 94.77, "elapsed_time": "8:42:17", "remaining_time": "0:28:47", "throughput": 2328.25, "total_tokens": 72960832} {"current_steps": 37915, "total_steps": 40000, "loss": 0.0455, "lr": 3.3477015756372966e-07, "epoch": 6.18517007912554, "percentage": 94.79, "elapsed_time": "8:42:19", "remaining_time": "0:28:43", "throughput": 2328.37, "total_tokens": 72969392} {"current_steps": 37920, "total_steps": 40000, "loss": 0.0166, "lr": 3.3317080820976785e-07, "epoch": 6.185985806346358, "percentage": 94.8, "elapsed_time": "8:42:21", "remaining_time": "0:28:39", "throughput": 2328.51, "total_tokens": 72978512} {"current_steps": 37925, "total_steps": 40000, "loss": 0.0001, "lr": 3.315752627908508e-07, "epoch": 6.186801533567175, "percentage": 94.81, "elapsed_time": "8:42:23", "remaining_time": "0:28:34", "throughput": 2328.66, "total_tokens": 72988032} {"current_steps": 37930, "total_steps": 40000, "loss": 0.0005, "lr": 3.299835215530317e-07, "epoch": 6.1876172607879925, "percentage": 94.83, "elapsed_time": "8:42:25", "remaining_time": "0:28:30", "throughput": 2328.8, "total_tokens": 72997360} {"current_steps": 37935, "total_steps": 40000, "loss": 0.0, "lr": 3.2839558474177245e-07, "epoch": 6.18843298800881, "percentage": 94.84, "elapsed_time": "8:42:27", "remaining_time": "0:28:26", "throughput": 2328.96, "total_tokens": 73007088} {"current_steps": 37940, "total_steps": 40000, "loss": 0.0003, "lr": 3.2681145260196056e-07, "epoch": 6.189248715229628, "percentage": 94.85, "elapsed_time": "8:42:29", "remaining_time": "0:28:22", "throughput": 2329.09, "total_tokens": 73016064} {"current_steps": 37945, "total_steps": 40000, "loss": 0.0002, "lr": 3.252311253778839e-07, "epoch": 6.190064442450445, "percentage": 94.86, "elapsed_time": "8:42:31", "remaining_time": "0:28:17", "throughput": 2329.27, "total_tokens": 73026656} {"current_steps": 37950, "total_steps": 40000, "loss": 0.0005, "lr": 3.2365460331325034e-07, "epoch": 6.190880169671262, "percentage": 94.88, "elapsed_time": "8:42:33", "remaining_time": "0:28:13", "throughput": 2329.45, "total_tokens": 73037088} {"current_steps": 37955, "total_steps": 40000, "loss": 0.0, "lr": 3.2208188665117934e-07, "epoch": 6.191695896892079, "percentage": 94.89, "elapsed_time": "8:42:35", "remaining_time": "0:28:09", "throughput": 2329.6, "total_tokens": 73046544} {"current_steps": 37960, "total_steps": 40000, "loss": 0.0002, "lr": 3.205129756342018e-07, "epoch": 6.192511624112897, "percentage": 94.9, "elapsed_time": "8:42:37", "remaining_time": "0:28:05", "throughput": 2329.7, "total_tokens": 73054640} {"current_steps": 37965, "total_steps": 40000, "loss": 0.0007, "lr": 3.189478705042659e-07, "epoch": 6.193327351333714, "percentage": 94.91, "elapsed_time": "8:42:40", "remaining_time": "0:28:00", "throughput": 2329.79, "total_tokens": 73062160} {"current_steps": 37970, "total_steps": 40000, "loss": 0.0001, "lr": 3.173865715027341e-07, "epoch": 6.194143078554531, "percentage": 94.92, "elapsed_time": "8:42:42", "remaining_time": "0:27:56", "throughput": 2329.92, "total_tokens": 73071152} {"current_steps": 37975, "total_steps": 40000, "loss": 0.0003, "lr": 3.158290788703694e-07, "epoch": 6.194958805775348, "percentage": 94.94, "elapsed_time": "8:42:44", "remaining_time": "0:27:52", "throughput": 2330.04, "total_tokens": 73079808} {"current_steps": 37980, "total_steps": 40000, "loss": 0.0, "lr": 3.1427539284736297e-07, "epoch": 6.195774532996166, "percentage": 94.95, "elapsed_time": "8:42:46", "remaining_time": "0:27:48", "throughput": 2330.22, "total_tokens": 73090320} {"current_steps": 37985, "total_steps": 40000, "loss": 0.0374, "lr": 3.127255136733093e-07, "epoch": 6.196590260216984, "percentage": 94.96, "elapsed_time": "8:42:48", "remaining_time": "0:27:44", "throughput": 2330.35, "total_tokens": 73099200} {"current_steps": 37990, "total_steps": 40000, "loss": 0.0001, "lr": 3.1117944158722544e-07, "epoch": 6.197405987437801, "percentage": 94.97, "elapsed_time": "8:42:50", "remaining_time": "0:27:39", "throughput": 2330.52, "total_tokens": 73109456} {"current_steps": 37995, "total_steps": 40000, "loss": 0.0116, "lr": 3.0963717682752635e-07, "epoch": 6.198221714658618, "percentage": 94.99, "elapsed_time": "8:42:52", "remaining_time": "0:27:35", "throughput": 2330.67, "total_tokens": 73118912} {"current_steps": 38000, "total_steps": 40000, "loss": 0.0, "lr": 3.080987196320578e-07, "epoch": 6.199037441879436, "percentage": 95.0, "elapsed_time": "8:42:54", "remaining_time": "0:27:31", "throughput": 2330.82, "total_tokens": 73128448} {"current_steps": 38000, "total_steps": 40000, "eval_loss": 0.43132612109184265, "epoch": 6.199037441879436, "percentage": 95.0, "elapsed_time": "8:44:15", "remaining_time": "0:27:35", "throughput": 2324.84, "total_tokens": 73128448} {"current_steps": 38005, "total_steps": 40000, "loss": 0.0001, "lr": 3.065640702380607e-07, "epoch": 6.199853169100253, "percentage": 95.01, "elapsed_time": "8:44:19", "remaining_time": "0:27:31", "throughput": 2324.86, "total_tokens": 73138224} {"current_steps": 38010, "total_steps": 40000, "loss": 0.0, "lr": 3.050332288822011e-07, "epoch": 6.20066889632107, "percentage": 95.03, "elapsed_time": "8:44:21", "remaining_time": "0:27:27", "throughput": 2325.0, "total_tokens": 73147328} {"current_steps": 38015, "total_steps": 40000, "loss": 0.0001, "lr": 3.035061958005542e-07, "epoch": 6.201484623541887, "percentage": 95.04, "elapsed_time": "8:44:23", "remaining_time": "0:27:22", "throughput": 2325.2, "total_tokens": 73158528} {"current_steps": 38020, "total_steps": 40000, "loss": 0.0001, "lr": 3.019829712286093e-07, "epoch": 6.202300350762705, "percentage": 95.05, "elapsed_time": "8:44:25", "remaining_time": "0:27:18", "throughput": 2325.39, "total_tokens": 73169184} {"current_steps": 38025, "total_steps": 40000, "loss": 0.003, "lr": 3.004635554012647e-07, "epoch": 6.203116077983522, "percentage": 95.06, "elapsed_time": "8:44:27", "remaining_time": "0:27:14", "throughput": 2325.53, "total_tokens": 73178384} {"current_steps": 38030, "total_steps": 40000, "loss": 0.0001, "lr": 2.9894794855283017e-07, "epoch": 6.2039318052043395, "percentage": 95.08, "elapsed_time": "8:44:29", "remaining_time": "0:27:10", "throughput": 2325.71, "total_tokens": 73188864} {"current_steps": 38035, "total_steps": 40000, "loss": 0.0, "lr": 2.9743615091703816e-07, "epoch": 6.204747532425157, "percentage": 95.09, "elapsed_time": "8:44:31", "remaining_time": "0:27:05", "throughput": 2325.84, "total_tokens": 73197872} {"current_steps": 38040, "total_steps": 40000, "loss": 0.0, "lr": 2.959281627270216e-07, "epoch": 6.205563259645975, "percentage": 95.1, "elapsed_time": "8:44:33", "remaining_time": "0:27:01", "throughput": 2325.97, "total_tokens": 73206752} {"current_steps": 38045, "total_steps": 40000, "loss": 0.0, "lr": 2.944239842153362e-07, "epoch": 6.206378986866792, "percentage": 95.11, "elapsed_time": "8:44:35", "remaining_time": "0:26:57", "throughput": 2326.12, "total_tokens": 73216480} {"current_steps": 38050, "total_steps": 40000, "loss": 0.0191, "lr": 2.929236156139381e-07, "epoch": 6.207194714087609, "percentage": 95.12, "elapsed_time": "8:44:37", "remaining_time": "0:26:53", "throughput": 2326.26, "total_tokens": 73225552} {"current_steps": 38055, "total_steps": 40000, "loss": 0.0, "lr": 2.9142705715420883e-07, "epoch": 6.208010441308426, "percentage": 95.14, "elapsed_time": "8:44:39", "remaining_time": "0:26:48", "throughput": 2326.41, "total_tokens": 73235264} {"current_steps": 38060, "total_steps": 40000, "loss": 0.0001, "lr": 2.8993430906693595e-07, "epoch": 6.208826168529244, "percentage": 95.15, "elapsed_time": "8:44:41", "remaining_time": "0:26:44", "throughput": 2326.58, "total_tokens": 73245216} {"current_steps": 38065, "total_steps": 40000, "loss": 0.0001, "lr": 2.88445371582316e-07, "epoch": 6.209641895750061, "percentage": 95.16, "elapsed_time": "8:44:44", "remaining_time": "0:26:40", "throughput": 2326.72, "total_tokens": 73254512} {"current_steps": 38070, "total_steps": 40000, "loss": 0.0, "lr": 2.8696024492996796e-07, "epoch": 6.210457622970878, "percentage": 95.17, "elapsed_time": "8:44:46", "remaining_time": "0:26:36", "throughput": 2326.86, "total_tokens": 73263824} {"current_steps": 38075, "total_steps": 40000, "loss": 0.0, "lr": 2.854789293389115e-07, "epoch": 6.211273350191696, "percentage": 95.19, "elapsed_time": "8:44:48", "remaining_time": "0:26:31", "throughput": 2327.01, "total_tokens": 73273232} {"current_steps": 38080, "total_steps": 40000, "loss": 0.1444, "lr": 2.8400142503758606e-07, "epoch": 6.2120890774125135, "percentage": 95.2, "elapsed_time": "8:44:50", "remaining_time": "0:26:27", "throughput": 2327.16, "total_tokens": 73282976} {"current_steps": 38085, "total_steps": 40000, "loss": 0.0002, "lr": 2.8252773225384276e-07, "epoch": 6.212904804633331, "percentage": 95.21, "elapsed_time": "8:44:52", "remaining_time": "0:26:23", "throughput": 2327.34, "total_tokens": 73293360} {"current_steps": 38090, "total_steps": 40000, "loss": 0.0007, "lr": 2.8105785121494143e-07, "epoch": 6.213720531854148, "percentage": 95.23, "elapsed_time": "8:44:54", "remaining_time": "0:26:19", "throughput": 2327.53, "total_tokens": 73304032} {"current_steps": 38095, "total_steps": 40000, "loss": 0.0, "lr": 2.795917821475563e-07, "epoch": 6.214536259074965, "percentage": 95.24, "elapsed_time": "8:44:56", "remaining_time": "0:26:15", "throughput": 2327.67, "total_tokens": 73313424} {"current_steps": 38100, "total_steps": 40000, "loss": 0.0, "lr": 2.78129525277776e-07, "epoch": 6.215351986295783, "percentage": 95.25, "elapsed_time": "8:44:58", "remaining_time": "0:26:10", "throughput": 2327.83, "total_tokens": 73323152} {"current_steps": 38105, "total_steps": 40000, "loss": 0.0877, "lr": 2.766710808310952e-07, "epoch": 6.2161677135166, "percentage": 95.26, "elapsed_time": "8:45:00", "remaining_time": "0:26:06", "throughput": 2327.98, "total_tokens": 73332736} {"current_steps": 38110, "total_steps": 40000, "loss": 0.1026, "lr": 2.7521644903242827e-07, "epoch": 6.216983440737417, "percentage": 95.28, "elapsed_time": "8:45:02", "remaining_time": "0:26:02", "throughput": 2328.13, "total_tokens": 73342512} {"current_steps": 38115, "total_steps": 40000, "loss": 0.0339, "lr": 2.7376563010609593e-07, "epoch": 6.217799167958235, "percentage": 95.29, "elapsed_time": "8:45:04", "remaining_time": "0:25:58", "throughput": 2328.26, "total_tokens": 73351424} {"current_steps": 38120, "total_steps": 40000, "loss": 0.0, "lr": 2.72318624275833e-07, "epoch": 6.218614895179052, "percentage": 95.3, "elapsed_time": "8:45:06", "remaining_time": "0:25:53", "throughput": 2328.41, "total_tokens": 73360976} {"current_steps": 38125, "total_steps": 40000, "loss": 0.069, "lr": 2.7087543176478324e-07, "epoch": 6.219430622399869, "percentage": 95.31, "elapsed_time": "8:45:08", "remaining_time": "0:25:49", "throughput": 2328.59, "total_tokens": 73371184} {"current_steps": 38130, "total_steps": 40000, "loss": 0.0, "lr": 2.694360527955103e-07, "epoch": 6.220246349620687, "percentage": 95.33, "elapsed_time": "8:45:10", "remaining_time": "0:25:45", "throughput": 2328.72, "total_tokens": 73380128} {"current_steps": 38135, "total_steps": 40000, "loss": 0.001, "lr": 2.680004875899811e-07, "epoch": 6.221062076841505, "percentage": 95.34, "elapsed_time": "8:45:13", "remaining_time": "0:25:41", "throughput": 2328.89, "total_tokens": 73390528} {"current_steps": 38140, "total_steps": 40000, "loss": 0.0004, "lr": 2.665687363695768e-07, "epoch": 6.221877804062322, "percentage": 95.35, "elapsed_time": "8:45:15", "remaining_time": "0:25:36", "throughput": 2329.03, "total_tokens": 73399680} {"current_steps": 38145, "total_steps": 40000, "loss": 0.065, "lr": 2.6514079935509584e-07, "epoch": 6.222693531283139, "percentage": 95.36, "elapsed_time": "8:45:17", "remaining_time": "0:25:32", "throughput": 2329.19, "total_tokens": 73409408} {"current_steps": 38150, "total_steps": 40000, "loss": 0.001, "lr": 2.6371667676673983e-07, "epoch": 6.223509258503956, "percentage": 95.38, "elapsed_time": "8:45:19", "remaining_time": "0:25:28", "throughput": 2329.35, "total_tokens": 73419296} {"current_steps": 38155, "total_steps": 40000, "loss": 0.0649, "lr": 2.6229636882412755e-07, "epoch": 6.224324985724774, "percentage": 95.39, "elapsed_time": "8:45:21", "remaining_time": "0:25:24", "throughput": 2329.51, "total_tokens": 73429152} {"current_steps": 38160, "total_steps": 40000, "loss": 0.0001, "lr": 2.6087987574628935e-07, "epoch": 6.225140712945591, "percentage": 95.4, "elapsed_time": "8:45:23", "remaining_time": "0:25:19", "throughput": 2329.67, "total_tokens": 73439040} {"current_steps": 38165, "total_steps": 40000, "loss": 0.0002, "lr": 2.5946719775166437e-07, "epoch": 6.225956440166408, "percentage": 95.41, "elapsed_time": "8:45:25", "remaining_time": "0:25:15", "throughput": 2329.8, "total_tokens": 73448112} {"current_steps": 38170, "total_steps": 40000, "loss": 0.0961, "lr": 2.5805833505810616e-07, "epoch": 6.226772167387225, "percentage": 95.43, "elapsed_time": "8:45:27", "remaining_time": "0:25:11", "throughput": 2329.98, "total_tokens": 73458480} {"current_steps": 38175, "total_steps": 40000, "loss": 0.0007, "lr": 2.566532878828798e-07, "epoch": 6.227587894608043, "percentage": 95.44, "elapsed_time": "8:45:29", "remaining_time": "0:25:07", "throughput": 2330.15, "total_tokens": 73468800} {"current_steps": 38180, "total_steps": 40000, "loss": 0.0, "lr": 2.552520564426619e-07, "epoch": 6.2284036218288605, "percentage": 95.45, "elapsed_time": "8:45:31", "remaining_time": "0:25:03", "throughput": 2330.33, "total_tokens": 73479216} {"current_steps": 38185, "total_steps": 40000, "loss": 0.0, "lr": 2.5385464095353803e-07, "epoch": 6.229219349049678, "percentage": 95.46, "elapsed_time": "8:45:33", "remaining_time": "0:24:58", "throughput": 2330.47, "total_tokens": 73488416} {"current_steps": 38190, "total_steps": 40000, "loss": 0.1545, "lr": 2.5246104163100804e-07, "epoch": 6.230035076270495, "percentage": 95.47, "elapsed_time": "8:45:35", "remaining_time": "0:24:54", "throughput": 2330.61, "total_tokens": 73497872} {"current_steps": 38195, "total_steps": 40000, "loss": 0.0001, "lr": 2.510712586899833e-07, "epoch": 6.230850803491313, "percentage": 95.49, "elapsed_time": "8:45:37", "remaining_time": "0:24:50", "throughput": 2330.79, "total_tokens": 73508160} {"current_steps": 38200, "total_steps": 40000, "loss": 0.0489, "lr": 2.4968529234478124e-07, "epoch": 6.23166653071213, "percentage": 95.5, "elapsed_time": "8:45:39", "remaining_time": "0:24:46", "throughput": 2330.95, "total_tokens": 73518048} {"current_steps": 38200, "total_steps": 40000, "eval_loss": 0.42790207266807556, "epoch": 6.23166653071213, "percentage": 95.5, "elapsed_time": "8:47:00", "remaining_time": "0:24:49", "throughput": 2324.98, "total_tokens": 73518048} {"current_steps": 38205, "total_steps": 40000, "loss": 0.0002, "lr": 2.483031428091448e-07, "epoch": 6.232482257932947, "percentage": 95.51, "elapsed_time": "8:47:04", "remaining_time": "0:24:45", "throughput": 2325.01, "total_tokens": 73527520} {"current_steps": 38210, "total_steps": 40000, "loss": 0.0, "lr": 2.469248102962091e-07, "epoch": 6.233297985153764, "percentage": 95.53, "elapsed_time": "8:47:06", "remaining_time": "0:24:41", "throughput": 2325.17, "total_tokens": 73537616} {"current_steps": 38215, "total_steps": 40000, "loss": 0.0589, "lr": 2.4555029501853455e-07, "epoch": 6.234113712374582, "percentage": 95.54, "elapsed_time": "8:47:08", "remaining_time": "0:24:37", "throughput": 2325.33, "total_tokens": 73547584} {"current_steps": 38220, "total_steps": 40000, "loss": 0.0005, "lr": 2.441795971880906e-07, "epoch": 6.234929439595399, "percentage": 95.55, "elapsed_time": "8:47:10", "remaining_time": "0:24:33", "throughput": 2325.49, "total_tokens": 73557472} {"current_steps": 38225, "total_steps": 40000, "loss": 0.0, "lr": 2.4281271701625255e-07, "epoch": 6.2357451668162165, "percentage": 95.56, "elapsed_time": "8:47:12", "remaining_time": "0:24:28", "throughput": 2325.65, "total_tokens": 73567200} {"current_steps": 38230, "total_steps": 40000, "loss": 0.0, "lr": 2.4144965471381007e-07, "epoch": 6.236560894037034, "percentage": 95.58, "elapsed_time": "8:47:15", "remaining_time": "0:24:24", "throughput": 2325.82, "total_tokens": 73577520} {"current_steps": 38235, "total_steps": 40000, "loss": 0.0, "lr": 2.400904104909674e-07, "epoch": 6.237376621257852, "percentage": 95.59, "elapsed_time": "8:47:17", "remaining_time": "0:24:20", "throughput": 2325.98, "total_tokens": 73587488} {"current_steps": 38240, "total_steps": 40000, "loss": 0.0001, "lr": 2.3873498455733725e-07, "epoch": 6.238192348478669, "percentage": 95.6, "elapsed_time": "8:47:19", "remaining_time": "0:24:16", "throughput": 2326.13, "total_tokens": 73596992} {"current_steps": 38245, "total_steps": 40000, "loss": 0.0002, "lr": 2.3738337712194137e-07, "epoch": 6.239008075699486, "percentage": 95.61, "elapsed_time": "8:47:21", "remaining_time": "0:24:11", "throughput": 2326.27, "total_tokens": 73606176} {"current_steps": 38250, "total_steps": 40000, "loss": 0.0001, "lr": 2.3603558839321305e-07, "epoch": 6.239823802920303, "percentage": 95.62, "elapsed_time": "8:47:23", "remaining_time": "0:24:07", "throughput": 2326.43, "total_tokens": 73616192} {"current_steps": 38255, "total_steps": 40000, "loss": 0.0006, "lr": 2.3469161857900267e-07, "epoch": 6.240639530141121, "percentage": 95.64, "elapsed_time": "8:47:25", "remaining_time": "0:24:03", "throughput": 2326.59, "total_tokens": 73625840} {"current_steps": 38260, "total_steps": 40000, "loss": 0.0651, "lr": 2.3335146788656393e-07, "epoch": 6.241455257361938, "percentage": 95.65, "elapsed_time": "8:47:27", "remaining_time": "0:23:59", "throughput": 2326.76, "total_tokens": 73636272} {"current_steps": 38265, "total_steps": 40000, "loss": 0.0002, "lr": 2.3201513652256757e-07, "epoch": 6.242270984582755, "percentage": 95.66, "elapsed_time": "8:47:29", "remaining_time": "0:23:55", "throughput": 2326.91, "total_tokens": 73645728} {"current_steps": 38270, "total_steps": 40000, "loss": 0.0002, "lr": 2.3068262469308766e-07, "epoch": 6.243086711803572, "percentage": 95.67, "elapsed_time": "8:47:31", "remaining_time": "0:23:50", "throughput": 2327.03, "total_tokens": 73654352} {"current_steps": 38275, "total_steps": 40000, "loss": 0.0001, "lr": 2.2935393260362093e-07, "epoch": 6.2439024390243905, "percentage": 95.69, "elapsed_time": "8:47:33", "remaining_time": "0:23:46", "throughput": 2327.19, "total_tokens": 73664336} {"current_steps": 38280, "total_steps": 40000, "loss": 0.0001, "lr": 2.2802906045906458e-07, "epoch": 6.244718166245208, "percentage": 95.7, "elapsed_time": "8:47:35", "remaining_time": "0:23:42", "throughput": 2327.32, "total_tokens": 73673168} {"current_steps": 38285, "total_steps": 40000, "loss": 0.0001, "lr": 2.2670800846373018e-07, "epoch": 6.245533893466025, "percentage": 95.71, "elapsed_time": "8:47:37", "remaining_time": "0:23:38", "throughput": 2327.42, "total_tokens": 73681296} {"current_steps": 38290, "total_steps": 40000, "loss": 0.0284, "lr": 2.2539077682134367e-07, "epoch": 6.246349620686843, "percentage": 95.73, "elapsed_time": "8:47:39", "remaining_time": "0:23:33", "throughput": 2327.6, "total_tokens": 73691808} {"current_steps": 38295, "total_steps": 40000, "loss": 0.0, "lr": 2.2407736573503423e-07, "epoch": 6.24716534790766, "percentage": 95.74, "elapsed_time": "8:47:42", "remaining_time": "0:23:29", "throughput": 2327.77, "total_tokens": 73701904} {"current_steps": 38300, "total_steps": 40000, "loss": 0.0001, "lr": 2.2276777540735093e-07, "epoch": 6.247981075128477, "percentage": 95.75, "elapsed_time": "8:47:44", "remaining_time": "0:23:25", "throughput": 2327.95, "total_tokens": 73712496} {"current_steps": 38305, "total_steps": 40000, "loss": 0.0001, "lr": 2.2146200604024613e-07, "epoch": 6.248796802349294, "percentage": 95.76, "elapsed_time": "8:47:46", "remaining_time": "0:23:21", "throughput": 2328.13, "total_tokens": 73723040} {"current_steps": 38310, "total_steps": 40000, "loss": 0.0, "lr": 2.2016005783508375e-07, "epoch": 6.249612529570112, "percentage": 95.78, "elapsed_time": "8:47:48", "remaining_time": "0:23:17", "throughput": 2328.27, "total_tokens": 73732192} {"current_steps": 38315, "total_steps": 40000, "loss": 0.0457, "lr": 2.1886193099264763e-07, "epoch": 6.250428256790929, "percentage": 95.79, "elapsed_time": "8:47:50", "remaining_time": "0:23:12", "throughput": 2328.41, "total_tokens": 73741536} {"current_steps": 38320, "total_steps": 40000, "loss": 0.0011, "lr": 2.175676257131165e-07, "epoch": 6.251243984011746, "percentage": 95.8, "elapsed_time": "8:47:52", "remaining_time": "0:23:08", "throughput": 2328.6, "total_tokens": 73752432} {"current_steps": 38325, "total_steps": 40000, "loss": 0.0001, "lr": 2.162771421960974e-07, "epoch": 6.2520597112325635, "percentage": 95.81, "elapsed_time": "8:47:54", "remaining_time": "0:23:04", "throughput": 2328.76, "total_tokens": 73762176} {"current_steps": 38330, "total_steps": 40000, "loss": 0.0, "lr": 2.1499048064059224e-07, "epoch": 6.252875438453382, "percentage": 95.83, "elapsed_time": "8:47:56", "remaining_time": "0:23:00", "throughput": 2328.92, "total_tokens": 73771904} {"current_steps": 38335, "total_steps": 40000, "loss": 0.0, "lr": 2.1370764124502285e-07, "epoch": 6.253691165674199, "percentage": 95.84, "elapsed_time": "8:47:58", "remaining_time": "0:22:55", "throughput": 2329.01, "total_tokens": 73779840} {"current_steps": 38340, "total_steps": 40000, "loss": 0.0003, "lr": 2.1242862420721988e-07, "epoch": 6.254506892895016, "percentage": 95.85, "elapsed_time": "8:48:00", "remaining_time": "0:22:51", "throughput": 2329.19, "total_tokens": 73790368} {"current_steps": 38345, "total_steps": 40000, "loss": 0.1429, "lr": 2.1115342972442276e-07, "epoch": 6.255322620115833, "percentage": 95.86, "elapsed_time": "8:48:02", "remaining_time": "0:22:47", "throughput": 2329.4, "total_tokens": 73801872} {"current_steps": 38350, "total_steps": 40000, "loss": 0.0002, "lr": 2.0988205799328252e-07, "epoch": 6.256138347336651, "percentage": 95.88, "elapsed_time": "8:48:04", "remaining_time": "0:22:43", "throughput": 2329.54, "total_tokens": 73810944} {"current_steps": 38355, "total_steps": 40000, "loss": 0.0, "lr": 2.0861450920986182e-07, "epoch": 6.256954074557468, "percentage": 95.89, "elapsed_time": "8:48:06", "remaining_time": "0:22:39", "throughput": 2329.7, "total_tokens": 73820944} {"current_steps": 38360, "total_steps": 40000, "loss": 0.0, "lr": 2.07350783569632e-07, "epoch": 6.257769801778285, "percentage": 95.9, "elapsed_time": "8:48:08", "remaining_time": "0:22:34", "throughput": 2329.87, "total_tokens": 73831216} {"current_steps": 38365, "total_steps": 40000, "loss": 0.0469, "lr": 2.060908812674761e-07, "epoch": 6.258585528999102, "percentage": 95.91, "elapsed_time": "8:48:11", "remaining_time": "0:22:30", "throughput": 2330.03, "total_tokens": 73841024} {"current_steps": 38370, "total_steps": 40000, "loss": 0.0002, "lr": 2.0483480249768317e-07, "epoch": 6.25940125621992, "percentage": 95.93, "elapsed_time": "8:48:13", "remaining_time": "0:22:26", "throughput": 2330.18, "total_tokens": 73850672} {"current_steps": 38375, "total_steps": 40000, "loss": 0.0004, "lr": 2.035825474539621e-07, "epoch": 6.2602169834407375, "percentage": 95.94, "elapsed_time": "8:48:15", "remaining_time": "0:22:22", "throughput": 2330.35, "total_tokens": 73860928} {"current_steps": 38380, "total_steps": 40000, "loss": 0.0014, "lr": 2.0233411632942235e-07, "epoch": 6.261032710661555, "percentage": 95.95, "elapsed_time": "8:48:17", "remaining_time": "0:22:17", "throughput": 2330.48, "total_tokens": 73869824} {"current_steps": 38385, "total_steps": 40000, "loss": 0.0001, "lr": 2.0108950931658764e-07, "epoch": 6.261848437882372, "percentage": 95.96, "elapsed_time": "8:48:19", "remaining_time": "0:22:13", "throughput": 2330.68, "total_tokens": 73880880} {"current_steps": 38390, "total_steps": 40000, "loss": 0.0014, "lr": 1.998487266073934e-07, "epoch": 6.26266416510319, "percentage": 95.97, "elapsed_time": "8:48:21", "remaining_time": "0:22:09", "throughput": 2330.82, "total_tokens": 73890352} {"current_steps": 38395, "total_steps": 40000, "loss": 0.002, "lr": 1.986117683931865e-07, "epoch": 6.263479892324007, "percentage": 95.99, "elapsed_time": "8:48:23", "remaining_time": "0:22:05", "throughput": 2331.0, "total_tokens": 73900624} {"current_steps": 38400, "total_steps": 40000, "loss": 0.0027, "lr": 1.9737863486471442e-07, "epoch": 6.264295619544824, "percentage": 96.0, "elapsed_time": "8:48:25", "remaining_time": "0:22:01", "throughput": 2331.18, "total_tokens": 73911328} {"current_steps": 38400, "total_steps": 40000, "eval_loss": 0.43132084608078003, "epoch": 6.264295619544824, "percentage": 96.0, "elapsed_time": "8:49:46", "remaining_time": "0:22:04", "throughput": 2325.24, "total_tokens": 73911328} {"current_steps": 38405, "total_steps": 40000, "loss": 0.0001, "lr": 1.9614932621215e-07, "epoch": 6.265111346765641, "percentage": 96.01, "elapsed_time": "8:49:50", "remaining_time": "0:22:00", "throughput": 2325.24, "total_tokens": 73919952} {"current_steps": 38410, "total_steps": 40000, "loss": 0.0549, "lr": 1.9492384262506102e-07, "epoch": 6.265927073986459, "percentage": 96.03, "elapsed_time": "8:49:52", "remaining_time": "0:21:56", "throughput": 2325.36, "total_tokens": 73928752} {"current_steps": 38415, "total_steps": 40000, "loss": 0.0, "lr": 1.9370218429243524e-07, "epoch": 6.266742801207276, "percentage": 96.04, "elapsed_time": "8:49:54", "remaining_time": "0:21:51", "throughput": 2325.53, "total_tokens": 73938816} {"current_steps": 38420, "total_steps": 40000, "loss": 0.0633, "lr": 1.9248435140267197e-07, "epoch": 6.2675585284280935, "percentage": 96.05, "elapsed_time": "8:49:56", "remaining_time": "0:21:47", "throughput": 2325.69, "total_tokens": 73948752} {"current_steps": 38425, "total_steps": 40000, "loss": 0.0002, "lr": 1.9127034414356814e-07, "epoch": 6.268374255648911, "percentage": 96.06, "elapsed_time": "8:49:58", "remaining_time": "0:21:43", "throughput": 2325.82, "total_tokens": 73957808} {"current_steps": 38430, "total_steps": 40000, "loss": 0.1023, "lr": 1.9006016270234627e-07, "epoch": 6.269189982869729, "percentage": 96.08, "elapsed_time": "8:50:00", "remaining_time": "0:21:39", "throughput": 2325.95, "total_tokens": 73966640} {"current_steps": 38435, "total_steps": 40000, "loss": 0.049, "lr": 1.888538072656293e-07, "epoch": 6.270005710090546, "percentage": 96.09, "elapsed_time": "8:50:02", "remaining_time": "0:21:34", "throughput": 2326.15, "total_tokens": 73977776} {"current_steps": 38440, "total_steps": 40000, "loss": 0.0, "lr": 1.8765127801944893e-07, "epoch": 6.270821437311363, "percentage": 96.1, "elapsed_time": "8:50:04", "remaining_time": "0:21:30", "throughput": 2326.28, "total_tokens": 73986848} {"current_steps": 38445, "total_steps": 40000, "loss": 0.0168, "lr": 1.8645257514925406e-07, "epoch": 6.27163716453218, "percentage": 96.11, "elapsed_time": "8:50:06", "remaining_time": "0:21:26", "throughput": 2326.44, "total_tokens": 73996672} {"current_steps": 38450, "total_steps": 40000, "loss": 0.0, "lr": 1.8525769883989685e-07, "epoch": 6.272452891752998, "percentage": 96.12, "elapsed_time": "8:50:08", "remaining_time": "0:21:22", "throughput": 2326.6, "total_tokens": 74006656} {"current_steps": 38455, "total_steps": 40000, "loss": 0.0001, "lr": 1.8406664927564654e-07, "epoch": 6.273268618973815, "percentage": 96.14, "elapsed_time": "8:50:11", "remaining_time": "0:21:18", "throughput": 2326.72, "total_tokens": 74015248} {"current_steps": 38460, "total_steps": 40000, "loss": 0.0, "lr": 1.8287942664017566e-07, "epoch": 6.274084346194632, "percentage": 96.15, "elapsed_time": "8:50:13", "remaining_time": "0:21:13", "throughput": 2326.87, "total_tokens": 74024848} {"current_steps": 38465, "total_steps": 40000, "loss": 0.1751, "lr": 1.8169603111656552e-07, "epoch": 6.27490007341545, "percentage": 96.16, "elapsed_time": "8:50:15", "remaining_time": "0:21:09", "throughput": 2326.99, "total_tokens": 74033632} {"current_steps": 38470, "total_steps": 40000, "loss": 0.0001, "lr": 1.805164628873146e-07, "epoch": 6.275715800636267, "percentage": 96.17, "elapsed_time": "8:50:17", "remaining_time": "0:21:05", "throughput": 2327.14, "total_tokens": 74043120} {"current_steps": 38475, "total_steps": 40000, "loss": 0.0001, "lr": 1.793407221343274e-07, "epoch": 6.276531527857085, "percentage": 96.19, "elapsed_time": "8:50:19", "remaining_time": "0:21:01", "throughput": 2327.29, "total_tokens": 74052656} {"current_steps": 38480, "total_steps": 40000, "loss": 0.0002, "lr": 1.781688090389172e-07, "epoch": 6.277347255077902, "percentage": 96.2, "elapsed_time": "8:50:21", "remaining_time": "0:20:56", "throughput": 2327.39, "total_tokens": 74060864} {"current_steps": 38485, "total_steps": 40000, "loss": 0.0001, "lr": 1.770007237818061e-07, "epoch": 6.27816298229872, "percentage": 96.21, "elapsed_time": "8:50:23", "remaining_time": "0:20:52", "throughput": 2327.57, "total_tokens": 74071360} {"current_steps": 38490, "total_steps": 40000, "loss": 0.0163, "lr": 1.7583646654313059e-07, "epoch": 6.278978709519537, "percentage": 96.23, "elapsed_time": "8:50:25", "remaining_time": "0:20:48", "throughput": 2327.71, "total_tokens": 74080736} {"current_steps": 38495, "total_steps": 40000, "loss": 0.0001, "lr": 1.7467603750242757e-07, "epoch": 6.279794436740354, "percentage": 96.24, "elapsed_time": "8:50:27", "remaining_time": "0:20:44", "throughput": 2327.87, "total_tokens": 74090400} {"current_steps": 38500, "total_steps": 40000, "loss": 0.0002, "lr": 1.7351943683865944e-07, "epoch": 6.280610163961171, "percentage": 96.25, "elapsed_time": "8:50:29", "remaining_time": "0:20:40", "throughput": 2328.06, "total_tokens": 74101312} {"current_steps": 38505, "total_steps": 40000, "loss": 0.0, "lr": 1.723666647301808e-07, "epoch": 6.281425891181989, "percentage": 96.26, "elapsed_time": "8:50:31", "remaining_time": "0:20:35", "throughput": 2328.2, "total_tokens": 74110832} {"current_steps": 38510, "total_steps": 40000, "loss": 0.0003, "lr": 1.712177213547661e-07, "epoch": 6.282241618402806, "percentage": 96.28, "elapsed_time": "8:50:33", "remaining_time": "0:20:31", "throughput": 2328.33, "total_tokens": 74119520} {"current_steps": 38515, "total_steps": 40000, "loss": 0.0, "lr": 1.7007260688959581e-07, "epoch": 6.283057345623623, "percentage": 96.29, "elapsed_time": "8:50:35", "remaining_time": "0:20:27", "throughput": 2328.49, "total_tokens": 74129584} {"current_steps": 38520, "total_steps": 40000, "loss": 0.0, "lr": 1.68931321511262e-07, "epoch": 6.2838730728444405, "percentage": 96.3, "elapsed_time": "8:50:37", "remaining_time": "0:20:23", "throughput": 2328.63, "total_tokens": 74138784} {"current_steps": 38525, "total_steps": 40000, "loss": 0.0, "lr": 1.6779386539576835e-07, "epoch": 6.2846888000652585, "percentage": 96.31, "elapsed_time": "8:50:40", "remaining_time": "0:20:19", "throughput": 2328.76, "total_tokens": 74147776} {"current_steps": 38530, "total_steps": 40000, "loss": 0.0002, "lr": 1.666602387185162e-07, "epoch": 6.285504527286076, "percentage": 96.33, "elapsed_time": "8:50:42", "remaining_time": "0:20:14", "throughput": 2328.9, "total_tokens": 74157008} {"current_steps": 38535, "total_steps": 40000, "loss": 0.0014, "lr": 1.655304416543352e-07, "epoch": 6.286320254506893, "percentage": 96.34, "elapsed_time": "8:50:44", "remaining_time": "0:20:10", "throughput": 2329.02, "total_tokens": 74165888} {"current_steps": 38540, "total_steps": 40000, "loss": 0.0001, "lr": 1.6440447437744698e-07, "epoch": 6.28713598172771, "percentage": 96.35, "elapsed_time": "8:50:46", "remaining_time": "0:20:06", "throughput": 2329.16, "total_tokens": 74175232} {"current_steps": 38545, "total_steps": 40000, "loss": 0.047, "lr": 1.6328233706149332e-07, "epoch": 6.287951708948528, "percentage": 96.36, "elapsed_time": "8:50:48", "remaining_time": "0:20:02", "throughput": 2329.33, "total_tokens": 74185360} {"current_steps": 38550, "total_steps": 40000, "loss": 0.0003, "lr": 1.6216402987951906e-07, "epoch": 6.288767436169345, "percentage": 96.38, "elapsed_time": "8:50:50", "remaining_time": "0:19:58", "throughput": 2329.46, "total_tokens": 74194368} {"current_steps": 38555, "total_steps": 40000, "loss": 0.0001, "lr": 1.6104955300398627e-07, "epoch": 6.289583163390162, "percentage": 96.39, "elapsed_time": "8:50:52", "remaining_time": "0:19:53", "throughput": 2329.63, "total_tokens": 74204448} {"current_steps": 38560, "total_steps": 40000, "loss": 0.0, "lr": 1.5993890660675748e-07, "epoch": 6.290398890610979, "percentage": 96.4, "elapsed_time": "8:50:54", "remaining_time": "0:19:49", "throughput": 2329.78, "total_tokens": 74214272} {"current_steps": 38565, "total_steps": 40000, "loss": 0.0001, "lr": 1.5883209085910678e-07, "epoch": 6.291214617831797, "percentage": 96.41, "elapsed_time": "8:50:56", "remaining_time": "0:19:45", "throughput": 2329.95, "total_tokens": 74224416} {"current_steps": 38570, "total_steps": 40000, "loss": 0.0, "lr": 1.5772910593172264e-07, "epoch": 6.2920303450526145, "percentage": 96.43, "elapsed_time": "8:50:58", "remaining_time": "0:19:41", "throughput": 2330.1, "total_tokens": 74233920} {"current_steps": 38575, "total_steps": 40000, "loss": 0.0007, "lr": 1.5662995199469954e-07, "epoch": 6.292846072273432, "percentage": 96.44, "elapsed_time": "8:51:00", "remaining_time": "0:19:36", "throughput": 2330.25, "total_tokens": 74243600} {"current_steps": 38580, "total_steps": 40000, "loss": 0.0001, "lr": 1.5553462921753802e-07, "epoch": 6.293661799494249, "percentage": 96.45, "elapsed_time": "8:51:02", "remaining_time": "0:19:32", "throughput": 2330.38, "total_tokens": 74252464} {"current_steps": 38585, "total_steps": 40000, "loss": 0.0138, "lr": 1.544431377691502e-07, "epoch": 6.294477526715067, "percentage": 96.46, "elapsed_time": "8:51:04", "remaining_time": "0:19:28", "throughput": 2330.51, "total_tokens": 74261504} {"current_steps": 38590, "total_steps": 40000, "loss": 0.0004, "lr": 1.5335547781785975e-07, "epoch": 6.295293253935884, "percentage": 96.47, "elapsed_time": "8:51:07", "remaining_time": "0:19:24", "throughput": 2330.66, "total_tokens": 74271296} {"current_steps": 38595, "total_steps": 40000, "loss": 0.0001, "lr": 1.5227164953139917e-07, "epoch": 6.296108981156701, "percentage": 96.49, "elapsed_time": "8:51:09", "remaining_time": "0:19:20", "throughput": 2330.88, "total_tokens": 74283088} {"current_steps": 38600, "total_steps": 40000, "loss": 0.0022, "lr": 1.511916530769042e-07, "epoch": 6.296924708377518, "percentage": 96.5, "elapsed_time": "8:51:11", "remaining_time": "0:19:15", "throughput": 2331.05, "total_tokens": 74293168} {"current_steps": 38600, "total_steps": 40000, "eval_loss": 0.43207141757011414, "epoch": 6.296924708377518, "percentage": 96.5, "elapsed_time": "8:52:32", "remaining_time": "0:19:18", "throughput": 2325.15, "total_tokens": 74293168} {"current_steps": 38605, "total_steps": 40000, "loss": 0.0, "lr": 1.5011548862092773e-07, "epoch": 6.297740435598336, "percentage": 96.51, "elapsed_time": "8:52:35", "remaining_time": "0:19:14", "throughput": 2325.15, "total_tokens": 74302128} {"current_steps": 38610, "total_steps": 40000, "loss": 0.0009, "lr": 1.490431563294231e-07, "epoch": 6.298556162819153, "percentage": 96.53, "elapsed_time": "8:52:37", "remaining_time": "0:19:10", "throughput": 2325.32, "total_tokens": 74312512} {"current_steps": 38615, "total_steps": 40000, "loss": 0.0204, "lr": 1.4797465636776365e-07, "epoch": 6.2993718900399704, "percentage": 96.54, "elapsed_time": "8:52:40", "remaining_time": "0:19:06", "throughput": 2325.49, "total_tokens": 74322560} {"current_steps": 38620, "total_steps": 40000, "loss": 0.0002, "lr": 1.4690998890072027e-07, "epoch": 6.300187617260788, "percentage": 96.55, "elapsed_time": "8:52:42", "remaining_time": "0:19:02", "throughput": 2325.66, "total_tokens": 74333008} {"current_steps": 38625, "total_steps": 40000, "loss": 0.0, "lr": 1.4584915409248112e-07, "epoch": 6.301003344481606, "percentage": 96.56, "elapsed_time": "8:52:44", "remaining_time": "0:18:57", "throughput": 2325.79, "total_tokens": 74341984} {"current_steps": 38630, "total_steps": 40000, "loss": 0.0, "lr": 1.4479215210663754e-07, "epoch": 6.301819071702423, "percentage": 96.58, "elapsed_time": "8:52:46", "remaining_time": "0:18:53", "throughput": 2325.95, "total_tokens": 74351888} {"current_steps": 38635, "total_steps": 40000, "loss": 0.0031, "lr": 1.4373898310619528e-07, "epoch": 6.30263479892324, "percentage": 96.59, "elapsed_time": "8:52:48", "remaining_time": "0:18:49", "throughput": 2326.08, "total_tokens": 74360832} {"current_steps": 38640, "total_steps": 40000, "loss": 0.0009, "lr": 1.4268964725356604e-07, "epoch": 6.303450526144058, "percentage": 96.6, "elapsed_time": "8:52:50", "remaining_time": "0:18:45", "throughput": 2326.23, "total_tokens": 74370576} {"current_steps": 38645, "total_steps": 40000, "loss": 0.0, "lr": 1.4164414471056764e-07, "epoch": 6.304266253364875, "percentage": 96.61, "elapsed_time": "8:52:52", "remaining_time": "0:18:41", "throughput": 2326.35, "total_tokens": 74379120} {"current_steps": 38650, "total_steps": 40000, "loss": 0.0001, "lr": 1.4060247563843497e-07, "epoch": 6.305081980585692, "percentage": 96.62, "elapsed_time": "8:52:54", "remaining_time": "0:18:36", "throughput": 2326.5, "total_tokens": 74388704} {"current_steps": 38655, "total_steps": 40000, "loss": 0.0, "lr": 1.3956464019780068e-07, "epoch": 6.305897707806509, "percentage": 96.64, "elapsed_time": "8:52:56", "remaining_time": "0:18:32", "throughput": 2326.63, "total_tokens": 74397840} {"current_steps": 38660, "total_steps": 40000, "loss": 0.0002, "lr": 1.385306385487145e-07, "epoch": 6.306713435027326, "percentage": 96.65, "elapsed_time": "8:52:58", "remaining_time": "0:18:28", "throughput": 2326.75, "total_tokens": 74406608} {"current_steps": 38665, "total_steps": 40000, "loss": 0.0002, "lr": 1.3750047085063222e-07, "epoch": 6.307529162248144, "percentage": 96.66, "elapsed_time": "8:53:00", "remaining_time": "0:18:24", "throughput": 2326.91, "total_tokens": 74416336} {"current_steps": 38670, "total_steps": 40000, "loss": 0.0, "lr": 1.3647413726242119e-07, "epoch": 6.3083448894689615, "percentage": 96.67, "elapsed_time": "8:53:02", "remaining_time": "0:18:20", "throughput": 2327.04, "total_tokens": 74425520} {"current_steps": 38675, "total_steps": 40000, "loss": 0.0, "lr": 1.3545163794235205e-07, "epoch": 6.309160616689779, "percentage": 96.69, "elapsed_time": "8:53:04", "remaining_time": "0:18:15", "throughput": 2327.21, "total_tokens": 74435584} {"current_steps": 38680, "total_steps": 40000, "loss": 0.0, "lr": 1.3443297304810698e-07, "epoch": 6.309976343910597, "percentage": 96.7, "elapsed_time": "8:53:07", "remaining_time": "0:18:11", "throughput": 2327.35, "total_tokens": 74445120} {"current_steps": 38685, "total_steps": 40000, "loss": 0.0, "lr": 1.3341814273677977e-07, "epoch": 6.310792071131414, "percentage": 96.71, "elapsed_time": "8:53:09", "remaining_time": "0:18:07", "throughput": 2327.47, "total_tokens": 74453584} {"current_steps": 38690, "total_steps": 40000, "loss": 0.0508, "lr": 1.324071471648647e-07, "epoch": 6.311607798352231, "percentage": 96.73, "elapsed_time": "8:53:11", "remaining_time": "0:18:03", "throughput": 2327.61, "total_tokens": 74463088} {"current_steps": 38695, "total_steps": 40000, "loss": 0.0001, "lr": 1.3139998648827312e-07, "epoch": 6.312423525573048, "percentage": 96.74, "elapsed_time": "8:53:13", "remaining_time": "0:17:58", "throughput": 2327.76, "total_tokens": 74472640} {"current_steps": 38700, "total_steps": 40000, "loss": 0.0001, "lr": 1.3039666086232526e-07, "epoch": 6.313239252793866, "percentage": 96.75, "elapsed_time": "8:53:15", "remaining_time": "0:17:54", "throughput": 2327.95, "total_tokens": 74483376} {"current_steps": 38705, "total_steps": 40000, "loss": 0.0, "lr": 1.2939717044174183e-07, "epoch": 6.314054980014683, "percentage": 96.76, "elapsed_time": "8:53:17", "remaining_time": "0:17:50", "throughput": 2328.07, "total_tokens": 74492144} {"current_steps": 38710, "total_steps": 40000, "loss": 0.0001, "lr": 1.284015153806578e-07, "epoch": 6.3148707072355, "percentage": 96.78, "elapsed_time": "8:53:19", "remaining_time": "0:17:46", "throughput": 2328.15, "total_tokens": 74499600} {"current_steps": 38715, "total_steps": 40000, "loss": 0.0, "lr": 1.274096958326171e-07, "epoch": 6.3156864344563175, "percentage": 96.79, "elapsed_time": "8:53:21", "remaining_time": "0:17:42", "throughput": 2328.3, "total_tokens": 74509200} {"current_steps": 38720, "total_steps": 40000, "loss": 0.0, "lr": 1.2642171195056952e-07, "epoch": 6.3165021616771355, "percentage": 96.8, "elapsed_time": "8:53:23", "remaining_time": "0:17:37", "throughput": 2328.42, "total_tokens": 74517664} {"current_steps": 38725, "total_steps": 40000, "loss": 0.0002, "lr": 1.2543756388687377e-07, "epoch": 6.317317888897953, "percentage": 96.81, "elapsed_time": "8:53:25", "remaining_time": "0:17:33", "throughput": 2328.56, "total_tokens": 74527152} {"current_steps": 38730, "total_steps": 40000, "loss": 0.0001, "lr": 1.2445725179330014e-07, "epoch": 6.31813361611877, "percentage": 96.83, "elapsed_time": "8:53:27", "remaining_time": "0:17:29", "throughput": 2328.72, "total_tokens": 74537024} {"current_steps": 38735, "total_steps": 40000, "loss": 0.0392, "lr": 1.2348077582102212e-07, "epoch": 6.318949343339587, "percentage": 96.84, "elapsed_time": "8:53:29", "remaining_time": "0:17:25", "throughput": 2328.87, "total_tokens": 74546800} {"current_steps": 38740, "total_steps": 40000, "loss": 0.0002, "lr": 1.2250813612062762e-07, "epoch": 6.319765070560405, "percentage": 96.85, "elapsed_time": "8:53:31", "remaining_time": "0:17:21", "throughput": 2329.03, "total_tokens": 74556496} {"current_steps": 38745, "total_steps": 40000, "loss": 0.0001, "lr": 1.215393328421105e-07, "epoch": 6.320580797781222, "percentage": 96.86, "elapsed_time": "8:53:33", "remaining_time": "0:17:16", "throughput": 2329.17, "total_tokens": 74565808} {"current_steps": 38750, "total_steps": 40000, "loss": 0.0001, "lr": 1.2057436613486796e-07, "epoch": 6.321396525002039, "percentage": 96.88, "elapsed_time": "8:53:36", "remaining_time": "0:17:12", "throughput": 2329.29, "total_tokens": 74574560} {"current_steps": 38755, "total_steps": 40000, "loss": 0.0002, "lr": 1.1961323614771424e-07, "epoch": 6.322212252222856, "percentage": 96.89, "elapsed_time": "8:53:38", "remaining_time": "0:17:08", "throughput": 2329.44, "total_tokens": 74584384} {"current_steps": 38760, "total_steps": 40000, "loss": 0.0004, "lr": 1.1865594302886418e-07, "epoch": 6.323027979443674, "percentage": 96.9, "elapsed_time": "8:53:40", "remaining_time": "0:17:04", "throughput": 2329.59, "total_tokens": 74593808} {"current_steps": 38765, "total_steps": 40000, "loss": 0.0003, "lr": 1.1770248692594687e-07, "epoch": 6.3238437066644915, "percentage": 96.91, "elapsed_time": "8:53:42", "remaining_time": "0:17:00", "throughput": 2329.7, "total_tokens": 74602256} {"current_steps": 38770, "total_steps": 40000, "loss": 0.049, "lr": 1.167528679859975e-07, "epoch": 6.324659433885309, "percentage": 96.92, "elapsed_time": "8:53:44", "remaining_time": "0:16:55", "throughput": 2329.84, "total_tokens": 74611376} {"current_steps": 38775, "total_steps": 40000, "loss": 0.0002, "lr": 1.1580708635545446e-07, "epoch": 6.325475161106126, "percentage": 96.94, "elapsed_time": "8:53:46", "remaining_time": "0:16:51", "throughput": 2329.99, "total_tokens": 74621088} {"current_steps": 38780, "total_steps": 40000, "loss": 0.0001, "lr": 1.1486514218017885e-07, "epoch": 6.326290888326944, "percentage": 96.95, "elapsed_time": "8:53:48", "remaining_time": "0:16:47", "throughput": 2330.15, "total_tokens": 74631120} {"current_steps": 38785, "total_steps": 40000, "loss": 0.0, "lr": 1.1392703560542117e-07, "epoch": 6.327106615547761, "percentage": 96.96, "elapsed_time": "8:53:50", "remaining_time": "0:16:43", "throughput": 2330.3, "total_tokens": 74640768} {"current_steps": 38790, "total_steps": 40000, "loss": 0.0002, "lr": 1.129927667758518e-07, "epoch": 6.327922342768578, "percentage": 96.97, "elapsed_time": "8:53:52", "remaining_time": "0:16:39", "throughput": 2330.44, "total_tokens": 74650480} {"current_steps": 38795, "total_steps": 40000, "loss": 0.0, "lr": 1.1206233583554992e-07, "epoch": 6.328738069989395, "percentage": 96.99, "elapsed_time": "8:53:54", "remaining_time": "0:16:35", "throughput": 2330.59, "total_tokens": 74660368} {"current_steps": 38800, "total_steps": 40000, "loss": 0.0001, "lr": 1.1113574292799523e-07, "epoch": 6.329553797210213, "percentage": 97.0, "elapsed_time": "8:53:56", "remaining_time": "0:16:30", "throughput": 2330.71, "total_tokens": 74668864} {"current_steps": 38800, "total_steps": 40000, "eval_loss": 0.43222880363464355, "epoch": 6.329553797210213, "percentage": 97.0, "elapsed_time": "8:55:17", "remaining_time": "0:16:33", "throughput": 2324.84, "total_tokens": 74668864} {"current_steps": 38805, "total_steps": 40000, "loss": 0.0001, "lr": 1.1021298819608449e-07, "epoch": 6.33036952443103, "percentage": 97.01, "elapsed_time": "8:55:21", "remaining_time": "0:16:29", "throughput": 2324.89, "total_tokens": 74678944} {"current_steps": 38810, "total_steps": 40000, "loss": 0.0001, "lr": 1.0929407178211226e-07, "epoch": 6.331185251651847, "percentage": 97.02, "elapsed_time": "8:55:23", "remaining_time": "0:16:24", "throughput": 2325.02, "total_tokens": 74688112} {"current_steps": 38815, "total_steps": 40000, "loss": 0.0, "lr": 1.0837899382779293e-07, "epoch": 6.332000978872665, "percentage": 97.04, "elapsed_time": "8:55:25", "remaining_time": "0:16:20", "throughput": 2325.17, "total_tokens": 74697840} {"current_steps": 38820, "total_steps": 40000, "loss": 0.0691, "lr": 1.0746775447423862e-07, "epoch": 6.332816706093483, "percentage": 97.05, "elapsed_time": "8:55:27", "remaining_time": "0:16:16", "throughput": 2325.31, "total_tokens": 74707040} {"current_steps": 38825, "total_steps": 40000, "loss": 0.0002, "lr": 1.0656035386197583e-07, "epoch": 6.3336324333143, "percentage": 97.06, "elapsed_time": "8:55:29", "remaining_time": "0:16:12", "throughput": 2325.5, "total_tokens": 74717936} {"current_steps": 38830, "total_steps": 40000, "loss": 0.0, "lr": 1.0565679213093982e-07, "epoch": 6.334448160535117, "percentage": 97.08, "elapsed_time": "8:55:31", "remaining_time": "0:16:08", "throughput": 2325.66, "total_tokens": 74727856} {"current_steps": 38835, "total_steps": 40000, "loss": 0.0, "lr": 1.0475706942046638e-07, "epoch": 6.335263887755934, "percentage": 97.09, "elapsed_time": "8:55:34", "remaining_time": "0:16:03", "throughput": 2325.85, "total_tokens": 74739008} {"current_steps": 38840, "total_steps": 40000, "loss": 0.0012, "lr": 1.0386118586930282e-07, "epoch": 6.336079614976752, "percentage": 97.1, "elapsed_time": "8:55:36", "remaining_time": "0:15:59", "throughput": 2326.0, "total_tokens": 74748544} {"current_steps": 38845, "total_steps": 40000, "loss": 0.0002, "lr": 1.0296914161561367e-07, "epoch": 6.336895342197569, "percentage": 97.11, "elapsed_time": "8:55:38", "remaining_time": "0:15:55", "throughput": 2326.2, "total_tokens": 74759920} {"current_steps": 38850, "total_steps": 40000, "loss": 0.0, "lr": 1.0208093679695552e-07, "epoch": 6.337711069418386, "percentage": 97.12, "elapsed_time": "8:55:40", "remaining_time": "0:15:51", "throughput": 2326.39, "total_tokens": 74770832} {"current_steps": 38855, "total_steps": 40000, "loss": 0.0, "lr": 1.0119657155030493e-07, "epoch": 6.338526796639204, "percentage": 97.14, "elapsed_time": "8:55:42", "remaining_time": "0:15:47", "throughput": 2326.56, "total_tokens": 74781008} {"current_steps": 38860, "total_steps": 40000, "loss": 0.0002, "lr": 1.003160460120417e-07, "epoch": 6.339342523860021, "percentage": 97.15, "elapsed_time": "8:55:44", "remaining_time": "0:15:42", "throughput": 2326.73, "total_tokens": 74791392} {"current_steps": 38865, "total_steps": 40000, "loss": 0.1084, "lr": 9.943936031795165e-08, "epoch": 6.3401582510808385, "percentage": 97.16, "elapsed_time": "8:55:46", "remaining_time": "0:15:38", "throughput": 2326.91, "total_tokens": 74801888} {"current_steps": 38870, "total_steps": 40000, "loss": 0.0001, "lr": 9.856651460323219e-08, "epoch": 6.340973978301656, "percentage": 97.17, "elapsed_time": "8:55:48", "remaining_time": "0:15:34", "throughput": 2327.07, "total_tokens": 74811872} {"current_steps": 38875, "total_steps": 40000, "loss": 0.0002, "lr": 9.769750900248953e-08, "epoch": 6.341789705522474, "percentage": 97.19, "elapsed_time": "8:55:50", "remaining_time": "0:15:30", "throughput": 2327.21, "total_tokens": 74821312} {"current_steps": 38880, "total_steps": 40000, "loss": 0.0018, "lr": 9.683234364973038e-08, "epoch": 6.342605432743291, "percentage": 97.2, "elapsed_time": "8:55:52", "remaining_time": "0:15:26", "throughput": 2327.3, "total_tokens": 74829152} {"current_steps": 38885, "total_steps": 40000, "loss": 0.0711, "lr": 9.597101867837854e-08, "epoch": 6.343421159964108, "percentage": 97.21, "elapsed_time": "8:55:54", "remaining_time": "0:15:22", "throughput": 2327.47, "total_tokens": 74839424} {"current_steps": 38890, "total_steps": 40000, "loss": 0.0, "lr": 9.511353422125835e-08, "epoch": 6.344236887184925, "percentage": 97.22, "elapsed_time": "8:55:56", "remaining_time": "0:15:17", "throughput": 2327.61, "total_tokens": 74848800} {"current_steps": 38895, "total_steps": 40000, "loss": 0.0001, "lr": 9.42598904106029e-08, "epoch": 6.345052614405743, "percentage": 97.24, "elapsed_time": "8:55:58", "remaining_time": "0:15:13", "throughput": 2327.81, "total_tokens": 74859744} {"current_steps": 38900, "total_steps": 40000, "loss": 0.0001, "lr": 9.341008737806245e-08, "epoch": 6.34586834162656, "percentage": 97.25, "elapsed_time": "8:56:01", "remaining_time": "0:15:09", "throughput": 2327.95, "total_tokens": 74869168} {"current_steps": 38905, "total_steps": 40000, "loss": 0.0001, "lr": 9.256412525467661e-08, "epoch": 6.346684068847377, "percentage": 97.26, "elapsed_time": "8:56:03", "remaining_time": "0:15:05", "throughput": 2328.11, "total_tokens": 74879104} {"current_steps": 38910, "total_steps": 40000, "loss": 0.0001, "lr": 9.172200417091326e-08, "epoch": 6.3474997960681945, "percentage": 97.28, "elapsed_time": "8:56:05", "remaining_time": "0:15:01", "throughput": 2328.28, "total_tokens": 74889440} {"current_steps": 38915, "total_steps": 40000, "loss": 0.015, "lr": 9.088372425663239e-08, "epoch": 6.3483155232890125, "percentage": 97.29, "elapsed_time": "8:56:07", "remaining_time": "0:14:56", "throughput": 2328.4, "total_tokens": 74898160} {"current_steps": 38920, "total_steps": 40000, "loss": 0.0998, "lr": 9.004928564110837e-08, "epoch": 6.34913125050983, "percentage": 97.3, "elapsed_time": "8:56:09", "remaining_time": "0:14:52", "throughput": 2328.54, "total_tokens": 74907456} {"current_steps": 38925, "total_steps": 40000, "loss": 0.0001, "lr": 8.92186884530244e-08, "epoch": 6.349946977730647, "percentage": 97.31, "elapsed_time": "8:56:11", "remaining_time": "0:14:48", "throughput": 2328.68, "total_tokens": 74916752} {"current_steps": 38930, "total_steps": 40000, "loss": 0.0, "lr": 8.83919328204641e-08, "epoch": 6.350762704951464, "percentage": 97.32, "elapsed_time": "8:56:13", "remaining_time": "0:14:44", "throughput": 2328.84, "total_tokens": 74926864} {"current_steps": 38935, "total_steps": 40000, "loss": 0.0098, "lr": 8.756901887093105e-08, "epoch": 6.351578432172282, "percentage": 97.34, "elapsed_time": "8:56:15", "remaining_time": "0:14:40", "throughput": 2329.0, "total_tokens": 74936976} {"current_steps": 38940, "total_steps": 40000, "loss": 0.0001, "lr": 8.674994673132098e-08, "epoch": 6.352394159393099, "percentage": 97.35, "elapsed_time": "8:56:17", "remaining_time": "0:14:35", "throughput": 2329.17, "total_tokens": 74947168} {"current_steps": 38945, "total_steps": 40000, "loss": 0.0, "lr": 8.593471652794949e-08, "epoch": 6.353209886613916, "percentage": 97.36, "elapsed_time": "8:56:19", "remaining_time": "0:14:31", "throughput": 2329.33, "total_tokens": 74957056} {"current_steps": 38950, "total_steps": 40000, "loss": 0.0285, "lr": 8.512332838653548e-08, "epoch": 6.354025613834733, "percentage": 97.38, "elapsed_time": "8:56:21", "remaining_time": "0:14:27", "throughput": 2329.45, "total_tokens": 74965824} {"current_steps": 38955, "total_steps": 40000, "loss": 0.0001, "lr": 8.431578243220106e-08, "epoch": 6.354841341055551, "percentage": 97.39, "elapsed_time": "8:56:23", "remaining_time": "0:14:23", "throughput": 2329.57, "total_tokens": 74974528} {"current_steps": 38960, "total_steps": 40000, "loss": 0.065, "lr": 8.351207878948552e-08, "epoch": 6.3556570682763684, "percentage": 97.4, "elapsed_time": "8:56:25", "remaining_time": "0:14:19", "throughput": 2329.69, "total_tokens": 74983248} {"current_steps": 38965, "total_steps": 40000, "loss": 0.0004, "lr": 8.271221758232583e-08, "epoch": 6.356472795497186, "percentage": 97.41, "elapsed_time": "8:56:27", "remaining_time": "0:14:14", "throughput": 2329.85, "total_tokens": 74993056} {"current_steps": 38970, "total_steps": 40000, "loss": 0.0001, "lr": 8.191619893407332e-08, "epoch": 6.357288522718003, "percentage": 97.42, "elapsed_time": "8:56:30", "remaining_time": "0:14:10", "throughput": 2330.02, "total_tokens": 75003616} {"current_steps": 38975, "total_steps": 40000, "loss": 0.0, "lr": 8.112402296748534e-08, "epoch": 6.358104249938821, "percentage": 97.44, "elapsed_time": "8:56:32", "remaining_time": "0:14:06", "throughput": 2330.15, "total_tokens": 75012576} {"current_steps": 38980, "total_steps": 40000, "loss": 0.0002, "lr": 8.033568980471973e-08, "epoch": 6.358919977159638, "percentage": 97.45, "elapsed_time": "8:56:34", "remaining_time": "0:14:02", "throughput": 2330.25, "total_tokens": 75020416} {"current_steps": 38985, "total_steps": 40000, "loss": 0.001, "lr": 7.955119956735146e-08, "epoch": 6.359735704380455, "percentage": 97.46, "elapsed_time": "8:56:36", "remaining_time": "0:13:58", "throughput": 2330.41, "total_tokens": 75030560} {"current_steps": 38990, "total_steps": 40000, "loss": 0.0007, "lr": 7.877055237636155e-08, "epoch": 6.360551431601272, "percentage": 97.47, "elapsed_time": "8:56:38", "remaining_time": "0:13:54", "throughput": 2330.58, "total_tokens": 75040880} {"current_steps": 38995, "total_steps": 40000, "loss": 0.0, "lr": 7.79937483521287e-08, "epoch": 6.36136715882209, "percentage": 97.49, "elapsed_time": "8:56:40", "remaining_time": "0:13:49", "throughput": 2330.73, "total_tokens": 75050464} {"current_steps": 39000, "total_steps": 40000, "loss": 0.0815, "lr": 7.722078761444873e-08, "epoch": 6.362182886042907, "percentage": 97.5, "elapsed_time": "8:56:42", "remaining_time": "0:13:45", "throughput": 2330.83, "total_tokens": 75058640} {"current_steps": 39000, "total_steps": 40000, "eval_loss": 0.43038803339004517, "epoch": 6.362182886042907, "percentage": 97.5, "elapsed_time": "8:58:03", "remaining_time": "0:13:47", "throughput": 2324.99, "total_tokens": 75058640} {"current_steps": 39005, "total_steps": 40000, "loss": 0.0001, "lr": 7.645167028252631e-08, "epoch": 6.362998613263724, "percentage": 97.51, "elapsed_time": "8:58:07", "remaining_time": "0:13:43", "throughput": 2325.03, "total_tokens": 75068624} {"current_steps": 39010, "total_steps": 40000, "loss": 0.0139, "lr": 7.568639647496379e-08, "epoch": 6.3638143404845415, "percentage": 97.52, "elapsed_time": "8:58:09", "remaining_time": "0:13:39", "throughput": 2325.17, "total_tokens": 75078016} {"current_steps": 39015, "total_steps": 40000, "loss": 0.0, "lr": 7.492496630977508e-08, "epoch": 6.3646300677053596, "percentage": 97.54, "elapsed_time": "8:58:11", "remaining_time": "0:13:35", "throughput": 2325.33, "total_tokens": 75088048} {"current_steps": 39020, "total_steps": 40000, "loss": 0.0001, "lr": 7.416737990438571e-08, "epoch": 6.365445794926177, "percentage": 97.55, "elapsed_time": "8:58:13", "remaining_time": "0:13:31", "throughput": 2325.49, "total_tokens": 75098080} {"current_steps": 39025, "total_steps": 40000, "loss": 0.0671, "lr": 7.341363737562445e-08, "epoch": 6.366261522146994, "percentage": 97.56, "elapsed_time": "8:58:15", "remaining_time": "0:13:26", "throughput": 2325.62, "total_tokens": 75106912} {"current_steps": 39030, "total_steps": 40000, "loss": 0.0, "lr": 7.266373883972887e-08, "epoch": 6.367077249367812, "percentage": 97.58, "elapsed_time": "8:58:17", "remaining_time": "0:13:22", "throughput": 2325.76, "total_tokens": 75116416} {"current_steps": 39035, "total_steps": 40000, "loss": 0.0, "lr": 7.191768441233981e-08, "epoch": 6.367892976588629, "percentage": 97.59, "elapsed_time": "8:58:19", "remaining_time": "0:13:18", "throughput": 2325.92, "total_tokens": 75126352} {"current_steps": 39040, "total_steps": 40000, "loss": 0.0002, "lr": 7.11754742085069e-08, "epoch": 6.368708703809446, "percentage": 97.6, "elapsed_time": "8:58:21", "remaining_time": "0:13:14", "throughput": 2326.03, "total_tokens": 75134768} {"current_steps": 39045, "total_steps": 40000, "loss": 0.0001, "lr": 7.043710834269413e-08, "epoch": 6.369524431030263, "percentage": 97.61, "elapsed_time": "8:58:23", "remaining_time": "0:13:10", "throughput": 2326.19, "total_tokens": 75144512} {"current_steps": 39050, "total_steps": 40000, "loss": 0.0096, "lr": 6.970258692876319e-08, "epoch": 6.370340158251081, "percentage": 97.62, "elapsed_time": "8:58:25", "remaining_time": "0:13:05", "throughput": 2326.29, "total_tokens": 75152752} {"current_steps": 39055, "total_steps": 40000, "loss": 0.0, "lr": 6.897191007998738e-08, "epoch": 6.371155885471898, "percentage": 97.64, "elapsed_time": "8:58:27", "remaining_time": "0:13:01", "throughput": 2326.45, "total_tokens": 75162736} {"current_steps": 39060, "total_steps": 40000, "loss": 0.0002, "lr": 6.824507790904599e-08, "epoch": 6.3719716126927155, "percentage": 97.65, "elapsed_time": "8:58:29", "remaining_time": "0:12:57", "throughput": 2326.61, "total_tokens": 75172544} {"current_steps": 39065, "total_steps": 40000, "loss": 0.0001, "lr": 6.752209052802439e-08, "epoch": 6.372787339913533, "percentage": 97.66, "elapsed_time": "8:58:32", "remaining_time": "0:12:53", "throughput": 2326.77, "total_tokens": 75182800} {"current_steps": 39070, "total_steps": 40000, "loss": 0.0001, "lr": 6.680294804841946e-08, "epoch": 6.373603067134351, "percentage": 97.67, "elapsed_time": "8:58:34", "remaining_time": "0:12:49", "throughput": 2326.91, "total_tokens": 75192144} {"current_steps": 39075, "total_steps": 40000, "loss": 0.0, "lr": 6.608765058112865e-08, "epoch": 6.374418794355168, "percentage": 97.69, "elapsed_time": "8:58:36", "remaining_time": "0:12:45", "throughput": 2327.05, "total_tokens": 75201312} {"current_steps": 39080, "total_steps": 40000, "loss": 0.0, "lr": 6.537619823646368e-08, "epoch": 6.375234521575985, "percentage": 97.7, "elapsed_time": "8:58:38", "remaining_time": "0:12:40", "throughput": 2327.21, "total_tokens": 75211440} {"current_steps": 39085, "total_steps": 40000, "loss": 0.0, "lr": 6.466859112413404e-08, "epoch": 6.376050248796802, "percentage": 97.71, "elapsed_time": "8:58:40", "remaining_time": "0:12:36", "throughput": 2327.35, "total_tokens": 75220704} {"current_steps": 39090, "total_steps": 40000, "loss": 0.0, "lr": 6.39648293532663e-08, "epoch": 6.37686597601762, "percentage": 97.72, "elapsed_time": "8:58:42", "remaining_time": "0:12:32", "throughput": 2327.5, "total_tokens": 75230368} {"current_steps": 39095, "total_steps": 40000, "loss": 0.0005, "lr": 6.32649130323848e-08, "epoch": 6.377681703238437, "percentage": 97.74, "elapsed_time": "8:58:44", "remaining_time": "0:12:28", "throughput": 2327.61, "total_tokens": 75238832} {"current_steps": 39100, "total_steps": 40000, "loss": 0.0, "lr": 6.256884226943094e-08, "epoch": 6.378497430459254, "percentage": 97.75, "elapsed_time": "8:58:46", "remaining_time": "0:12:24", "throughput": 2327.72, "total_tokens": 75247280} {"current_steps": 39105, "total_steps": 40000, "loss": 0.0, "lr": 6.187661717174386e-08, "epoch": 6.3793131576800715, "percentage": 97.76, "elapsed_time": "8:58:48", "remaining_time": "0:12:19", "throughput": 2327.89, "total_tokens": 75257568} {"current_steps": 39110, "total_steps": 40000, "loss": 0.0, "lr": 6.118823784607708e-08, "epoch": 6.3801288849008895, "percentage": 97.78, "elapsed_time": "8:58:50", "remaining_time": "0:12:15", "throughput": 2328.01, "total_tokens": 75266160} {"current_steps": 39115, "total_steps": 40000, "loss": 0.0, "lr": 6.050370439858178e-08, "epoch": 6.380944612121707, "percentage": 97.79, "elapsed_time": "8:58:52", "remaining_time": "0:12:11", "throughput": 2328.14, "total_tokens": 75275376} {"current_steps": 39120, "total_steps": 40000, "loss": 0.0001, "lr": 5.98230169348235e-08, "epoch": 6.381760339342524, "percentage": 97.8, "elapsed_time": "8:58:54", "remaining_time": "0:12:07", "throughput": 2328.29, "total_tokens": 75284880} {"current_steps": 39125, "total_steps": 40000, "loss": 0.0003, "lr": 5.914617555977664e-08, "epoch": 6.382576066563341, "percentage": 97.81, "elapsed_time": "8:58:56", "remaining_time": "0:12:03", "throughput": 2328.44, "total_tokens": 75294544} {"current_steps": 39130, "total_steps": 40000, "loss": 0.0001, "lr": 5.8473180377816017e-08, "epoch": 6.383391793784159, "percentage": 97.82, "elapsed_time": "8:58:59", "remaining_time": "0:11:59", "throughput": 2328.61, "total_tokens": 75305056} {"current_steps": 39135, "total_steps": 40000, "loss": 0.0001, "lr": 5.780403149272251e-08, "epoch": 6.384207521004976, "percentage": 97.84, "elapsed_time": "8:59:01", "remaining_time": "0:11:54", "throughput": 2328.8, "total_tokens": 75315792} {"current_steps": 39140, "total_steps": 40000, "loss": 0.0001, "lr": 5.7138729007694126e-08, "epoch": 6.385023248225793, "percentage": 97.85, "elapsed_time": "8:59:03", "remaining_time": "0:11:50", "throughput": 2328.96, "total_tokens": 75325824} {"current_steps": 39145, "total_steps": 40000, "loss": 0.0001, "lr": 5.64772730253238e-08, "epoch": 6.38583897544661, "percentage": 97.86, "elapsed_time": "8:59:05", "remaining_time": "0:11:46", "throughput": 2329.13, "total_tokens": 75336112} {"current_steps": 39150, "total_steps": 40000, "loss": 0.0001, "lr": 5.5819663647618814e-08, "epoch": 6.386654702667428, "percentage": 97.88, "elapsed_time": "8:59:07", "remaining_time": "0:11:42", "throughput": 2329.27, "total_tokens": 75345536} {"current_steps": 39155, "total_steps": 40000, "loss": 0.0009, "lr": 5.5165900975989723e-08, "epoch": 6.387470429888245, "percentage": 97.89, "elapsed_time": "8:59:09", "remaining_time": "0:11:38", "throughput": 2329.42, "total_tokens": 75355232} {"current_steps": 39160, "total_steps": 40000, "loss": 0.0014, "lr": 5.451598511125311e-08, "epoch": 6.388286157109063, "percentage": 97.9, "elapsed_time": "8:59:11", "remaining_time": "0:11:33", "throughput": 2329.55, "total_tokens": 75364368} {"current_steps": 39165, "total_steps": 40000, "loss": 0.0002, "lr": 5.3869916153637124e-08, "epoch": 6.38910188432988, "percentage": 97.91, "elapsed_time": "8:59:13", "remaining_time": "0:11:29", "throughput": 2329.71, "total_tokens": 75374464} {"current_steps": 39170, "total_steps": 40000, "loss": 0.0, "lr": 5.322769420277318e-08, "epoch": 6.389917611550698, "percentage": 97.92, "elapsed_time": "8:59:15", "remaining_time": "0:11:25", "throughput": 2329.87, "total_tokens": 75384224} {"current_steps": 39175, "total_steps": 40000, "loss": 0.0001, "lr": 5.258931935769873e-08, "epoch": 6.390733338771515, "percentage": 97.94, "elapsed_time": "8:59:17", "remaining_time": "0:11:21", "throughput": 2330.02, "total_tokens": 75394144} {"current_steps": 39180, "total_steps": 40000, "loss": 0.0, "lr": 5.19547917168628e-08, "epoch": 6.391549065992332, "percentage": 97.95, "elapsed_time": "8:59:19", "remaining_time": "0:11:17", "throughput": 2330.17, "total_tokens": 75403552} {"current_steps": 39185, "total_steps": 40000, "loss": 0.0004, "lr": 5.13241113781121e-08, "epoch": 6.392364793213149, "percentage": 97.96, "elapsed_time": "8:59:21", "remaining_time": "0:11:13", "throughput": 2330.32, "total_tokens": 75413424} {"current_steps": 39190, "total_steps": 40000, "loss": 0.0002, "lr": 5.0697278438707755e-08, "epoch": 6.393180520433967, "percentage": 97.97, "elapsed_time": "8:59:23", "remaining_time": "0:11:08", "throughput": 2330.43, "total_tokens": 75421776} {"current_steps": 39195, "total_steps": 40000, "loss": 0.0691, "lr": 5.0074292995316854e-08, "epoch": 6.393996247654784, "percentage": 97.99, "elapsed_time": "8:59:25", "remaining_time": "0:11:04", "throughput": 2330.59, "total_tokens": 75431552} {"current_steps": 39200, "total_steps": 40000, "loss": 0.0, "lr": 4.945515514400978e-08, "epoch": 6.394811974875601, "percentage": 98.0, "elapsed_time": "8:59:27", "remaining_time": "0:11:00", "throughput": 2330.72, "total_tokens": 75440784} {"current_steps": 39200, "total_steps": 40000, "eval_loss": 0.4322725534439087, "epoch": 6.394811974875601, "percentage": 98.0, "elapsed_time": "9:00:48", "remaining_time": "0:11:02", "throughput": 2324.92, "total_tokens": 75440784} {"current_steps": 39205, "total_steps": 40000, "loss": 0.0691, "lr": 4.883986498026571e-08, "epoch": 6.395627702096419, "percentage": 98.01, "elapsed_time": "9:00:52", "remaining_time": "0:10:58", "throughput": 2324.92, "total_tokens": 75449888} {"current_steps": 39210, "total_steps": 40000, "loss": 0.0005, "lr": 4.822842259896987e-08, "epoch": 6.3964434293172365, "percentage": 98.02, "elapsed_time": "9:00:54", "remaining_time": "0:10:53", "throughput": 2325.08, "total_tokens": 75459856} {"current_steps": 39215, "total_steps": 40000, "loss": 0.0, "lr": 4.762082809441626e-08, "epoch": 6.397259156538054, "percentage": 98.04, "elapsed_time": "9:00:56", "remaining_time": "0:10:49", "throughput": 2325.24, "total_tokens": 75469872} {"current_steps": 39220, "total_steps": 40000, "loss": 0.0, "lr": 4.7017081560302156e-08, "epoch": 6.398074883758871, "percentage": 98.05, "elapsed_time": "9:00:58", "remaining_time": "0:10:45", "throughput": 2325.4, "total_tokens": 75479680} {"current_steps": 39225, "total_steps": 40000, "loss": 0.0008, "lr": 4.6417183089730866e-08, "epoch": 6.398890610979688, "percentage": 98.06, "elapsed_time": "9:01:00", "remaining_time": "0:10:41", "throughput": 2325.51, "total_tokens": 75488208} {"current_steps": 39230, "total_steps": 40000, "loss": 0.0001, "lr": 4.5821132775217265e-08, "epoch": 6.399706338200506, "percentage": 98.08, "elapsed_time": "9:01:02", "remaining_time": "0:10:37", "throughput": 2325.63, "total_tokens": 75496928} {"current_steps": 39235, "total_steps": 40000, "loss": 0.0, "lr": 4.5228930708679504e-08, "epoch": 6.400522065421323, "percentage": 98.09, "elapsed_time": "9:01:05", "remaining_time": "0:10:33", "throughput": 2325.74, "total_tokens": 75505184} {"current_steps": 39240, "total_steps": 40000, "loss": 0.0, "lr": 4.464057698144175e-08, "epoch": 6.40133779264214, "percentage": 98.1, "elapsed_time": "9:01:07", "remaining_time": "0:10:28", "throughput": 2325.87, "total_tokens": 75514464} {"current_steps": 39245, "total_steps": 40000, "loss": 0.0001, "lr": 4.4056071684236974e-08, "epoch": 6.402153519862958, "percentage": 98.11, "elapsed_time": "9:01:09", "remaining_time": "0:10:24", "throughput": 2326.01, "total_tokens": 75523584} {"current_steps": 39250, "total_steps": 40000, "loss": 0.0773, "lr": 4.347541490719864e-08, "epoch": 6.402969247083775, "percentage": 98.12, "elapsed_time": "9:01:11", "remaining_time": "0:10:20", "throughput": 2326.16, "total_tokens": 75533280} {"current_steps": 39255, "total_steps": 40000, "loss": 0.0001, "lr": 4.2898606739877336e-08, "epoch": 6.4037849743045925, "percentage": 98.14, "elapsed_time": "9:01:13", "remaining_time": "0:10:16", "throughput": 2326.32, "total_tokens": 75543536} {"current_steps": 39260, "total_steps": 40000, "loss": 0.0, "lr": 4.232564727122135e-08, "epoch": 6.40460070152541, "percentage": 98.15, "elapsed_time": "9:01:15", "remaining_time": "0:10:12", "throughput": 2326.47, "total_tokens": 75552992} {"current_steps": 39265, "total_steps": 40000, "loss": 0.0018, "lr": 4.1756536589585004e-08, "epoch": 6.405416428746228, "percentage": 98.16, "elapsed_time": "9:01:17", "remaining_time": "0:10:07", "throughput": 2326.6, "total_tokens": 75562112} {"current_steps": 39270, "total_steps": 40000, "loss": 0.0001, "lr": 4.119127478273976e-08, "epoch": 6.406232155967045, "percentage": 98.17, "elapsed_time": "9:01:19", "remaining_time": "0:10:03", "throughput": 2326.75, "total_tokens": 75571840} {"current_steps": 39275, "total_steps": 40000, "loss": 0.0, "lr": 4.062986193784923e-08, "epoch": 6.407047883187862, "percentage": 98.19, "elapsed_time": "9:01:21", "remaining_time": "0:09:59", "throughput": 2326.85, "total_tokens": 75580048} {"current_steps": 39280, "total_steps": 40000, "loss": 0.0, "lr": 4.007229814149416e-08, "epoch": 6.407863610408679, "percentage": 98.2, "elapsed_time": "9:01:23", "remaining_time": "0:09:55", "throughput": 2327.0, "total_tokens": 75589552} {"current_steps": 39285, "total_steps": 40000, "loss": 0.1048, "lr": 3.951858347965576e-08, "epoch": 6.408679337629497, "percentage": 98.21, "elapsed_time": "9:01:25", "remaining_time": "0:09:51", "throughput": 2327.09, "total_tokens": 75597440} {"current_steps": 39290, "total_steps": 40000, "loss": 0.0, "lr": 3.896871803772684e-08, "epoch": 6.409495064850314, "percentage": 98.22, "elapsed_time": "9:01:27", "remaining_time": "0:09:47", "throughput": 2327.25, "total_tokens": 75607232} {"current_steps": 39295, "total_steps": 40000, "loss": 0.0001, "lr": 3.842270190050068e-08, "epoch": 6.410310792071131, "percentage": 98.24, "elapsed_time": "9:01:29", "remaining_time": "0:09:42", "throughput": 2327.38, "total_tokens": 75616464} {"current_steps": 39300, "total_steps": 40000, "loss": 0.1314, "lr": 3.7880535152179376e-08, "epoch": 6.411126519291948, "percentage": 98.25, "elapsed_time": "9:01:32", "remaining_time": "0:09:38", "throughput": 2327.51, "total_tokens": 75625520} {"current_steps": 39305, "total_steps": 40000, "loss": 0.0001, "lr": 3.734221787637382e-08, "epoch": 6.4119422465127665, "percentage": 98.26, "elapsed_time": "9:01:34", "remaining_time": "0:09:34", "throughput": 2327.64, "total_tokens": 75634480} {"current_steps": 39310, "total_steps": 40000, "loss": 0.0001, "lr": 3.680775015609817e-08, "epoch": 6.412757973733584, "percentage": 98.28, "elapsed_time": "9:01:36", "remaining_time": "0:09:30", "throughput": 2327.83, "total_tokens": 75645504} {"current_steps": 39315, "total_steps": 40000, "loss": 0.0004, "lr": 3.627713207377537e-08, "epoch": 6.413573700954401, "percentage": 98.29, "elapsed_time": "9:01:38", "remaining_time": "0:09:26", "throughput": 2327.96, "total_tokens": 75654736} {"current_steps": 39320, "total_steps": 40000, "loss": 0.0006, "lr": 3.575036371123164e-08, "epoch": 6.414389428175218, "percentage": 98.3, "elapsed_time": "9:01:40", "remaining_time": "0:09:22", "throughput": 2328.15, "total_tokens": 75665584} {"current_steps": 39325, "total_steps": 40000, "loss": 0.0002, "lr": 3.5227445149704776e-08, "epoch": 6.415205155396036, "percentage": 98.31, "elapsed_time": "9:01:42", "remaining_time": "0:09:17", "throughput": 2328.29, "total_tokens": 75674896} {"current_steps": 39330, "total_steps": 40000, "loss": 0.0262, "lr": 3.470837646983027e-08, "epoch": 6.416020882616853, "percentage": 98.32, "elapsed_time": "9:01:44", "remaining_time": "0:09:13", "throughput": 2328.45, "total_tokens": 75684992} {"current_steps": 39335, "total_steps": 40000, "loss": 0.0002, "lr": 3.419315775165799e-08, "epoch": 6.41683660983767, "percentage": 98.34, "elapsed_time": "9:01:46", "remaining_time": "0:09:09", "throughput": 2328.6, "total_tokens": 75694816} {"current_steps": 39340, "total_steps": 40000, "loss": 0.0, "lr": 3.368178907464103e-08, "epoch": 6.417652337058487, "percentage": 98.35, "elapsed_time": "9:01:48", "remaining_time": "0:09:05", "throughput": 2328.78, "total_tokens": 75705248} {"current_steps": 39345, "total_steps": 40000, "loss": 0.0002, "lr": 3.317427051763855e-08, "epoch": 6.418468064279305, "percentage": 98.36, "elapsed_time": "9:01:50", "remaining_time": "0:09:01", "throughput": 2328.91, "total_tokens": 75714608} {"current_steps": 39350, "total_steps": 40000, "loss": 0.0001, "lr": 3.267060215891571e-08, "epoch": 6.419283791500122, "percentage": 98.38, "elapsed_time": "9:01:52", "remaining_time": "0:08:57", "throughput": 2329.09, "total_tokens": 75725152} {"current_steps": 39355, "total_steps": 40000, "loss": 0.0413, "lr": 3.217078407614649e-08, "epoch": 6.4200995187209395, "percentage": 98.39, "elapsed_time": "9:01:54", "remaining_time": "0:08:52", "throughput": 2329.21, "total_tokens": 75733744} {"current_steps": 39360, "total_steps": 40000, "loss": 0.0004, "lr": 3.1674816346405345e-08, "epoch": 6.420915245941757, "percentage": 98.4, "elapsed_time": "9:01:56", "remaining_time": "0:08:48", "throughput": 2329.33, "total_tokens": 75742560} {"current_steps": 39365, "total_steps": 40000, "loss": 0.0, "lr": 3.11826990461811e-08, "epoch": 6.421730973162575, "percentage": 98.41, "elapsed_time": "9:01:58", "remaining_time": "0:08:44", "throughput": 2329.52, "total_tokens": 75753744} {"current_steps": 39370, "total_steps": 40000, "loss": 0.0001, "lr": 3.069443225136304e-08, "epoch": 6.422546700383392, "percentage": 98.42, "elapsed_time": "9:02:01", "remaining_time": "0:08:40", "throughput": 2329.69, "total_tokens": 75764064} {"current_steps": 39375, "total_steps": 40000, "loss": 0.0002, "lr": 3.021001603724372e-08, "epoch": 6.423362427604209, "percentage": 98.44, "elapsed_time": "9:02:03", "remaining_time": "0:08:36", "throughput": 2329.86, "total_tokens": 75774416} {"current_steps": 39380, "total_steps": 40000, "loss": 0.0, "lr": 2.9729450478532818e-08, "epoch": 6.424178154825027, "percentage": 98.45, "elapsed_time": "9:02:05", "remaining_time": "0:08:32", "throughput": 2330.01, "total_tokens": 75784112} {"current_steps": 39385, "total_steps": 40000, "loss": 0.0001, "lr": 2.9252735649337726e-08, "epoch": 6.424993882045844, "percentage": 98.46, "elapsed_time": "9:02:07", "remaining_time": "0:08:27", "throughput": 2330.17, "total_tokens": 75793904} {"current_steps": 39390, "total_steps": 40000, "loss": 0.0, "lr": 2.8779871623171863e-08, "epoch": 6.425809609266661, "percentage": 98.47, "elapsed_time": "9:02:09", "remaining_time": "0:08:23", "throughput": 2330.3, "total_tokens": 75803088} {"current_steps": 39395, "total_steps": 40000, "loss": 0.0, "lr": 2.8310858472957448e-08, "epoch": 6.426625336487478, "percentage": 98.49, "elapsed_time": "9:02:11", "remaining_time": "0:08:19", "throughput": 2330.44, "total_tokens": 75812512} {"current_steps": 39400, "total_steps": 40000, "loss": 0.0003, "lr": 2.784569627101996e-08, "epoch": 6.4274410637082955, "percentage": 98.5, "elapsed_time": "9:02:13", "remaining_time": "0:08:15", "throughput": 2330.6, "total_tokens": 75822528} {"current_steps": 39400, "total_steps": 40000, "eval_loss": 0.43134525418281555, "epoch": 6.4274410637082955, "percentage": 98.5, "elapsed_time": "9:03:34", "remaining_time": "0:08:16", "throughput": 2324.83, "total_tokens": 75822528} {"current_steps": 39405, "total_steps": 40000, "loss": 0.0001, "lr": 2.738438508909924e-08, "epoch": 6.4282567909291135, "percentage": 98.51, "elapsed_time": "9:03:38", "remaining_time": "0:08:12", "throughput": 2324.85, "total_tokens": 75832176} {"current_steps": 39410, "total_steps": 40000, "loss": 0.069, "lr": 2.692692499833005e-08, "epoch": 6.429072518149931, "percentage": 98.52, "elapsed_time": "9:03:40", "remaining_time": "0:08:08", "throughput": 2325.01, "total_tokens": 75842256} {"current_steps": 39415, "total_steps": 40000, "loss": 0.0619, "lr": 2.647331606926151e-08, "epoch": 6.429888245370748, "percentage": 98.54, "elapsed_time": "9:03:42", "remaining_time": "0:08:04", "throughput": 2325.16, "total_tokens": 75851920} {"current_steps": 39420, "total_steps": 40000, "loss": 0.0001, "lr": 2.6023558371843225e-08, "epoch": 6.430703972591566, "percentage": 98.55, "elapsed_time": "9:03:44", "remaining_time": "0:08:00", "throughput": 2325.29, "total_tokens": 75860928} {"current_steps": 39425, "total_steps": 40000, "loss": 0.0, "lr": 2.557765197543638e-08, "epoch": 6.431519699812383, "percentage": 98.56, "elapsed_time": "9:03:46", "remaining_time": "0:07:55", "throughput": 2325.46, "total_tokens": 75871200} {"current_steps": 39430, "total_steps": 40000, "loss": 0.0001, "lr": 2.513559694880263e-08, "epoch": 6.4323354270332, "percentage": 98.58, "elapsed_time": "9:03:48", "remaining_time": "0:07:51", "throughput": 2325.56, "total_tokens": 75879296} {"current_steps": 39435, "total_steps": 40000, "loss": 0.0001, "lr": 2.469739336011523e-08, "epoch": 6.433151154254017, "percentage": 98.59, "elapsed_time": "9:03:50", "remaining_time": "0:07:47", "throughput": 2325.66, "total_tokens": 75887344} {"current_steps": 39440, "total_steps": 40000, "loss": 0.0, "lr": 2.4263041276947894e-08, "epoch": 6.433966881474835, "percentage": 98.6, "elapsed_time": "9:03:52", "remaining_time": "0:07:43", "throughput": 2325.75, "total_tokens": 75895312} {"current_steps": 39445, "total_steps": 40000, "loss": 0.0214, "lr": 2.3832540766283164e-08, "epoch": 6.434782608695652, "percentage": 98.61, "elapsed_time": "9:03:54", "remaining_time": "0:07:39", "throughput": 2325.9, "total_tokens": 75904752} {"current_steps": 39450, "total_steps": 40000, "loss": 0.0003, "lr": 2.3405891894512366e-08, "epoch": 6.4355983359164695, "percentage": 98.62, "elapsed_time": "9:03:56", "remaining_time": "0:07:35", "throughput": 2326.0, "total_tokens": 75912960} {"current_steps": 39455, "total_steps": 40000, "loss": 0.0, "lr": 2.29830947274301e-08, "epoch": 6.436414063137287, "percentage": 98.64, "elapsed_time": "9:03:58", "remaining_time": "0:07:30", "throughput": 2326.17, "total_tokens": 75923440} {"current_steps": 39460, "total_steps": 40000, "loss": 0.0001, "lr": 2.2564149330231432e-08, "epoch": 6.437229790358105, "percentage": 98.65, "elapsed_time": "9:04:00", "remaining_time": "0:07:26", "throughput": 2326.37, "total_tokens": 75934720} {"current_steps": 39465, "total_steps": 40000, "loss": 0.0097, "lr": 2.2149055767528572e-08, "epoch": 6.438045517578922, "percentage": 98.66, "elapsed_time": "9:04:02", "remaining_time": "0:07:22", "throughput": 2326.5, "total_tokens": 75943648} {"current_steps": 39470, "total_steps": 40000, "loss": 0.0001, "lr": 2.1737814103334197e-08, "epoch": 6.438861244799739, "percentage": 98.67, "elapsed_time": "9:04:05", "remaining_time": "0:07:18", "throughput": 2326.63, "total_tokens": 75952736} {"current_steps": 39475, "total_steps": 40000, "loss": 0.0, "lr": 2.1330424401064253e-08, "epoch": 6.439676972020556, "percentage": 98.69, "elapsed_time": "9:04:07", "remaining_time": "0:07:14", "throughput": 2326.74, "total_tokens": 75961376} {"current_steps": 39480, "total_steps": 40000, "loss": 0.0001, "lr": 2.092688672354348e-08, "epoch": 6.440492699241374, "percentage": 98.7, "elapsed_time": "9:04:09", "remaining_time": "0:07:10", "throughput": 2326.89, "total_tokens": 75971040} {"current_steps": 39485, "total_steps": 40000, "loss": 0.0001, "lr": 2.0527201133005435e-08, "epoch": 6.441308426462191, "percentage": 98.71, "elapsed_time": "9:04:11", "remaining_time": "0:07:05", "throughput": 2326.99, "total_tokens": 75979136} {"current_steps": 39490, "total_steps": 40000, "loss": 0.0001, "lr": 2.0131367691084148e-08, "epoch": 6.442124153683008, "percentage": 98.72, "elapsed_time": "9:04:13", "remaining_time": "0:07:01", "throughput": 2327.12, "total_tokens": 75988176} {"current_steps": 39495, "total_steps": 40000, "loss": 0.0001, "lr": 1.9739386458819675e-08, "epoch": 6.442939880903825, "percentage": 98.74, "elapsed_time": "9:04:15", "remaining_time": "0:06:57", "throughput": 2327.25, "total_tokens": 75997360} {"current_steps": 39500, "total_steps": 40000, "loss": 0.0004, "lr": 1.9351257496666442e-08, "epoch": 6.443755608124643, "percentage": 98.75, "elapsed_time": "9:04:17", "remaining_time": "0:06:53", "throughput": 2327.43, "total_tokens": 76007952} {"current_steps": 39505, "total_steps": 40000, "loss": 0.0003, "lr": 1.896698086447657e-08, "epoch": 6.444571335345461, "percentage": 98.76, "elapsed_time": "9:04:19", "remaining_time": "0:06:49", "throughput": 2327.63, "total_tokens": 76019344} {"current_steps": 39510, "total_steps": 40000, "loss": 0.0, "lr": 1.8586556621505436e-08, "epoch": 6.445387062566278, "percentage": 98.78, "elapsed_time": "9:04:21", "remaining_time": "0:06:45", "throughput": 2327.73, "total_tokens": 76027248} {"current_steps": 39515, "total_steps": 40000, "loss": 0.0205, "lr": 1.820998482642833e-08, "epoch": 6.446202789787095, "percentage": 98.79, "elapsed_time": "9:04:23", "remaining_time": "0:06:40", "throughput": 2327.9, "total_tokens": 76037808} {"current_steps": 39520, "total_steps": 40000, "loss": 0.0001, "lr": 1.7837265537309912e-08, "epoch": 6.447018517007913, "percentage": 98.8, "elapsed_time": "9:04:25", "remaining_time": "0:06:36", "throughput": 2328.03, "total_tokens": 76046928} {"current_steps": 39525, "total_steps": 40000, "loss": 0.0, "lr": 1.7468398811629206e-08, "epoch": 6.44783424422873, "percentage": 98.81, "elapsed_time": "9:04:27", "remaining_time": "0:06:32", "throughput": 2328.19, "total_tokens": 76056928} {"current_steps": 39530, "total_steps": 40000, "loss": 0.1476, "lr": 1.710338470627404e-08, "epoch": 6.448649971449547, "percentage": 98.83, "elapsed_time": "9:04:29", "remaining_time": "0:06:28", "throughput": 2328.34, "total_tokens": 76066656} {"current_steps": 39535, "total_steps": 40000, "loss": 0.0, "lr": 1.6742223277529945e-08, "epoch": 6.449465698670364, "percentage": 98.84, "elapsed_time": "9:04:31", "remaining_time": "0:06:24", "throughput": 2328.49, "total_tokens": 76076256} {"current_steps": 39540, "total_steps": 40000, "loss": 0.0001, "lr": 1.6384914581094036e-08, "epoch": 6.450281425891182, "percentage": 98.85, "elapsed_time": "9:04:34", "remaining_time": "0:06:20", "throughput": 2328.57, "total_tokens": 76083840} {"current_steps": 39545, "total_steps": 40000, "loss": 0.0, "lr": 1.6031458672069455e-08, "epoch": 6.451097153111999, "percentage": 98.86, "elapsed_time": "9:04:36", "remaining_time": "0:06:15", "throughput": 2328.72, "total_tokens": 76093440} {"current_steps": 39550, "total_steps": 40000, "loss": 0.0001, "lr": 1.5681855604962602e-08, "epoch": 6.4519128803328165, "percentage": 98.88, "elapsed_time": "9:04:38", "remaining_time": "0:06:11", "throughput": 2328.86, "total_tokens": 76103008} {"current_steps": 39555, "total_steps": 40000, "loss": 0.0003, "lr": 1.5336105433683135e-08, "epoch": 6.4527286075536345, "percentage": 98.89, "elapsed_time": "9:04:40", "remaining_time": "0:06:07", "throughput": 2328.97, "total_tokens": 76111200} {"current_steps": 39560, "total_steps": 40000, "loss": 0.0549, "lr": 1.499420821155506e-08, "epoch": 6.453544334774452, "percentage": 98.9, "elapsed_time": "9:04:42", "remaining_time": "0:06:03", "throughput": 2329.1, "total_tokens": 76120384} {"current_steps": 39565, "total_steps": 40000, "loss": 0.0002, "lr": 1.4656163991302874e-08, "epoch": 6.454360061995269, "percentage": 98.91, "elapsed_time": "9:04:44", "remaining_time": "0:05:59", "throughput": 2329.26, "total_tokens": 76130464} {"current_steps": 39570, "total_steps": 40000, "loss": 0.0002, "lr": 1.4321972825051544e-08, "epoch": 6.455175789216086, "percentage": 98.92, "elapsed_time": "9:04:46", "remaining_time": "0:05:55", "throughput": 2329.42, "total_tokens": 76140464} {"current_steps": 39575, "total_steps": 40000, "loss": 0.0, "lr": 1.3991634764345951e-08, "epoch": 6.455991516436903, "percentage": 98.94, "elapsed_time": "9:04:48", "remaining_time": "0:05:51", "throughput": 2329.59, "total_tokens": 76150736} {"current_steps": 39580, "total_steps": 40000, "loss": 0.0, "lr": 1.3665149860120352e-08, "epoch": 6.456807243657721, "percentage": 98.95, "elapsed_time": "9:04:50", "remaining_time": "0:05:46", "throughput": 2329.74, "total_tokens": 76160512} {"current_steps": 39585, "total_steps": 40000, "loss": 0.0, "lr": 1.3342518162728912e-08, "epoch": 6.457622970878538, "percentage": 98.96, "elapsed_time": "9:04:52", "remaining_time": "0:05:42", "throughput": 2329.86, "total_tokens": 76169312} {"current_steps": 39590, "total_steps": 40000, "loss": 0.0, "lr": 1.30237397219235e-08, "epoch": 6.458438698099355, "percentage": 98.98, "elapsed_time": "9:04:54", "remaining_time": "0:05:38", "throughput": 2330.02, "total_tokens": 76179264} {"current_steps": 39595, "total_steps": 40000, "loss": 0.024, "lr": 1.2708814586862016e-08, "epoch": 6.459254425320173, "percentage": 98.99, "elapsed_time": "9:04:56", "remaining_time": "0:05:34", "throughput": 2330.15, "total_tokens": 76188512} {"current_steps": 39600, "total_steps": 40000, "loss": 0.0001, "lr": 1.2397742806111168e-08, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "9:04:58", "remaining_time": "0:05:30", "throughput": 2330.31, "total_tokens": 76198368} {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.43227317929267883, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "9:06:19", "remaining_time": "0:05:31", "throughput": 2324.55, "total_tokens": 76198368} {"current_steps": 39605, "total_steps": 40000, "loss": 0.0001, "lr": 1.209052442764369e-08, "epoch": 6.460885879761808, "percentage": 99.01, "elapsed_time": "9:06:23", "remaining_time": "0:05:26", "throughput": 2324.61, "total_tokens": 76209312} {"current_steps": 39610, "total_steps": 40000, "loss": 0.0162, "lr": 1.17871594988328e-08, "epoch": 6.461701606982625, "percentage": 99.02, "elapsed_time": "9:06:25", "remaining_time": "0:05:22", "throughput": 2324.74, "total_tokens": 76218528} {"current_steps": 39615, "total_steps": 40000, "loss": 0.0028, "lr": 1.1487648066466072e-08, "epoch": 6.462517334203443, "percentage": 99.04, "elapsed_time": "9:06:27", "remaining_time": "0:05:18", "throughput": 2324.92, "total_tokens": 76229056} {"current_steps": 39620, "total_steps": 40000, "loss": 0.0001, "lr": 1.1191990176728784e-08, "epoch": 6.46333306142426, "percentage": 99.05, "elapsed_time": "9:06:29", "remaining_time": "0:05:14", "throughput": 2325.07, "total_tokens": 76238912} {"current_steps": 39625, "total_steps": 40000, "loss": 0.0002, "lr": 1.0900185875215018e-08, "epoch": 6.464148788645077, "percentage": 99.06, "elapsed_time": "9:06:32", "remaining_time": "0:05:10", "throughput": 2325.25, "total_tokens": 76249632} {"current_steps": 39630, "total_steps": 40000, "loss": 0.0002, "lr": 1.0612235206924891e-08, "epoch": 6.464964515865894, "percentage": 99.08, "elapsed_time": "9:06:34", "remaining_time": "0:05:06", "throughput": 2325.36, "total_tokens": 76258144} {"current_steps": 39635, "total_steps": 40000, "loss": 0.0001, "lr": 1.0328138216264549e-08, "epoch": 6.465780243086712, "percentage": 99.09, "elapsed_time": "9:06:36", "remaining_time": "0:05:02", "throughput": 2325.5, "total_tokens": 76267360} {"current_steps": 39640, "total_steps": 40000, "loss": 0.1334, "lr": 1.004789494704339e-08, "epoch": 6.466595970307529, "percentage": 99.1, "elapsed_time": "9:06:38", "remaining_time": "0:04:57", "throughput": 2325.61, "total_tokens": 76276000} {"current_steps": 39645, "total_steps": 40000, "loss": 0.055, "lr": 9.771505442482397e-09, "epoch": 6.467411697528346, "percentage": 99.11, "elapsed_time": "9:06:40", "remaining_time": "0:04:53", "throughput": 2325.76, "total_tokens": 76285744} {"current_steps": 39650, "total_steps": 40000, "loss": 0.0001, "lr": 9.498969745200259e-09, "epoch": 6.468227424749164, "percentage": 99.12, "elapsed_time": "9:06:42", "remaining_time": "0:04:49", "throughput": 2325.91, "total_tokens": 76295280} {"current_steps": 39655, "total_steps": 40000, "loss": 0.0006, "lr": 9.230287897230017e-09, "epoch": 6.469043151969982, "percentage": 99.14, "elapsed_time": "9:06:44", "remaining_time": "0:04:45", "throughput": 2326.03, "total_tokens": 76304304} {"current_steps": 39660, "total_steps": 40000, "loss": 0.0002, "lr": 8.965459940002419e-09, "epoch": 6.469858879190799, "percentage": 99.15, "elapsed_time": "9:06:46", "remaining_time": "0:04:41", "throughput": 2326.17, "total_tokens": 76313600} {"current_steps": 39665, "total_steps": 40000, "loss": 0.0001, "lr": 8.704485914357019e-09, "epoch": 6.470674606411616, "percentage": 99.16, "elapsed_time": "9:06:48", "remaining_time": "0:04:37", "throughput": 2326.29, "total_tokens": 76322496} {"current_steps": 39670, "total_steps": 40000, "loss": 0.0001, "lr": 8.447365860539402e-09, "epoch": 6.471490333632433, "percentage": 99.17, "elapsed_time": "9:06:50", "remaining_time": "0:04:32", "throughput": 2326.42, "total_tokens": 76331520} {"current_steps": 39675, "total_steps": 40000, "loss": 0.0001, "lr": 8.194099818201184e-09, "epoch": 6.472306060853251, "percentage": 99.19, "elapsed_time": "9:06:52", "remaining_time": "0:04:28", "throughput": 2326.57, "total_tokens": 76341344} {"current_steps": 39680, "total_steps": 40000, "loss": 0.0793, "lr": 7.944687826400011e-09, "epoch": 6.473121788074068, "percentage": 99.2, "elapsed_time": "9:06:54", "remaining_time": "0:04:24", "throughput": 2326.74, "total_tokens": 76351664} {"current_steps": 39685, "total_steps": 40000, "loss": 0.0002, "lr": 7.699129923599557e-09, "epoch": 6.473937515294885, "percentage": 99.21, "elapsed_time": "9:06:56", "remaining_time": "0:04:20", "throughput": 2326.86, "total_tokens": 76360352} {"current_steps": 39690, "total_steps": 40000, "loss": 0.0, "lr": 7.457426147663982e-09, "epoch": 6.474753242515702, "percentage": 99.22, "elapsed_time": "9:06:58", "remaining_time": "0:04:16", "throughput": 2327.01, "total_tokens": 76370240} {"current_steps": 39695, "total_steps": 40000, "loss": 0.0002, "lr": 7.219576535871797e-09, "epoch": 6.47556896973652, "percentage": 99.24, "elapsed_time": "9:07:01", "remaining_time": "0:04:12", "throughput": 2327.13, "total_tokens": 76379024} {"current_steps": 39700, "total_steps": 40000, "loss": 0.0004, "lr": 6.985581124896445e-09, "epoch": 6.4763846969573375, "percentage": 99.25, "elapsed_time": "9:07:03", "remaining_time": "0:04:08", "throughput": 2327.29, "total_tokens": 76388832} {"current_steps": 39705, "total_steps": 40000, "loss": 0.0002, "lr": 6.755439950828501e-09, "epoch": 6.477200424178155, "percentage": 99.26, "elapsed_time": "9:07:05", "remaining_time": "0:04:03", "throughput": 2327.43, "total_tokens": 76398400} {"current_steps": 39710, "total_steps": 40000, "loss": 0.0, "lr": 6.5291530491562444e-09, "epoch": 6.478016151398972, "percentage": 99.28, "elapsed_time": "9:07:07", "remaining_time": "0:03:59", "throughput": 2327.58, "total_tokens": 76408144} {"current_steps": 39715, "total_steps": 40000, "loss": 0.0002, "lr": 6.3067204547739845e-09, "epoch": 6.47883187861979, "percentage": 99.29, "elapsed_time": "9:07:09", "remaining_time": "0:03:55", "throughput": 2327.69, "total_tokens": 76416624} {"current_steps": 39720, "total_steps": 40000, "loss": 0.0001, "lr": 6.088142201987612e-09, "epoch": 6.479647605840607, "percentage": 99.3, "elapsed_time": "9:07:11", "remaining_time": "0:03:51", "throughput": 2327.86, "total_tokens": 76426912} {"current_steps": 39725, "total_steps": 40000, "loss": 0.0002, "lr": 5.873418324503499e-09, "epoch": 6.480463333061424, "percentage": 99.31, "elapsed_time": "9:07:13", "remaining_time": "0:03:47", "throughput": 2328.04, "total_tokens": 76437568} {"current_steps": 39730, "total_steps": 40000, "loss": 0.0, "lr": 5.6625488554340465e-09, "epoch": 6.481279060282241, "percentage": 99.33, "elapsed_time": "9:07:15", "remaining_time": "0:03:43", "throughput": 2328.19, "total_tokens": 76447328} {"current_steps": 39735, "total_steps": 40000, "loss": 0.0, "lr": 5.455533827297688e-09, "epoch": 6.482094787503059, "percentage": 99.34, "elapsed_time": "9:07:17", "remaining_time": "0:03:39", "throughput": 2328.35, "total_tokens": 76457584} {"current_steps": 39740, "total_steps": 40000, "loss": 0.0053, "lr": 5.252373272018885e-09, "epoch": 6.482910514723876, "percentage": 99.35, "elapsed_time": "9:07:19", "remaining_time": "0:03:34", "throughput": 2328.5, "total_tokens": 76467152} {"current_steps": 39745, "total_steps": 40000, "loss": 0.0072, "lr": 5.053067220925356e-09, "epoch": 6.4837262419446935, "percentage": 99.36, "elapsed_time": "9:07:21", "remaining_time": "0:03:30", "throughput": 2328.69, "total_tokens": 76478304} {"current_steps": 39750, "total_steps": 40000, "loss": 0.0, "lr": 4.857615704759177e-09, "epoch": 6.484541969165511, "percentage": 99.38, "elapsed_time": "9:07:23", "remaining_time": "0:03:26", "throughput": 2328.79, "total_tokens": 76486464} {"current_steps": 39755, "total_steps": 40000, "loss": 0.0001, "lr": 4.666018753654577e-09, "epoch": 6.485357696386329, "percentage": 99.39, "elapsed_time": "9:07:25", "remaining_time": "0:03:22", "throughput": 2328.94, "total_tokens": 76496176} {"current_steps": 39760, "total_steps": 40000, "loss": 0.0, "lr": 4.478276397162917e-09, "epoch": 6.486173423607146, "percentage": 99.4, "elapsed_time": "9:07:28", "remaining_time": "0:03:18", "throughput": 2329.06, "total_tokens": 76505120} {"current_steps": 39765, "total_steps": 40000, "loss": 0.0005, "lr": 4.294388664233262e-09, "epoch": 6.486989150827963, "percentage": 99.41, "elapsed_time": "9:07:30", "remaining_time": "0:03:14", "throughput": 2329.19, "total_tokens": 76513968} {"current_steps": 39770, "total_steps": 40000, "loss": 0.0001, "lr": 4.114355583223484e-09, "epoch": 6.487804878048781, "percentage": 99.42, "elapsed_time": "9:07:32", "remaining_time": "0:03:09", "throughput": 2329.31, "total_tokens": 76522912} {"current_steps": 39775, "total_steps": 40000, "loss": 0.0035, "lr": 3.9381771818974845e-09, "epoch": 6.488620605269598, "percentage": 99.44, "elapsed_time": "9:07:34", "remaining_time": "0:03:05", "throughput": 2329.48, "total_tokens": 76533248} {"current_steps": 39780, "total_steps": 40000, "loss": 0.0, "lr": 3.765853487427973e-09, "epoch": 6.489436332490415, "percentage": 99.45, "elapsed_time": "9:07:36", "remaining_time": "0:03:01", "throughput": 2329.64, "total_tokens": 76543376} {"current_steps": 39785, "total_steps": 40000, "loss": 0.0, "lr": 3.5973845263825857e-09, "epoch": 6.490252059711232, "percentage": 99.46, "elapsed_time": "9:07:38", "remaining_time": "0:02:57", "throughput": 2329.76, "total_tokens": 76552096} {"current_steps": 39790, "total_steps": 40000, "loss": 0.0001, "lr": 3.4327703247488684e-09, "epoch": 6.49106778693205, "percentage": 99.48, "elapsed_time": "9:07:40", "remaining_time": "0:02:53", "throughput": 2329.88, "total_tokens": 76560832} {"current_steps": 39795, "total_steps": 40000, "loss": 0.0, "lr": 3.2720109079037443e-09, "epoch": 6.4918835141528675, "percentage": 99.49, "elapsed_time": "9:07:42", "remaining_time": "0:02:49", "throughput": 2330.02, "total_tokens": 76570480} {"current_steps": 39800, "total_steps": 40000, "loss": 0.0001, "lr": 3.1151063006468193e-09, "epoch": 6.492699241373685, "percentage": 99.5, "elapsed_time": "9:07:44", "remaining_time": "0:02:45", "throughput": 2330.2, "total_tokens": 76581104} {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.4304603636264801, "epoch": 6.492699241373685, "percentage": 99.5, "elapsed_time": "9:09:05", "remaining_time": "0:02:45", "throughput": 2324.47, "total_tokens": 76581104} {"current_steps": 39805, "total_steps": 40000, "loss": 0.0, "lr": 2.962056527169854e-09, "epoch": 6.493514968594502, "percentage": 99.51, "elapsed_time": "9:09:09", "remaining_time": "0:02:41", "throughput": 2324.5, "total_tokens": 76590896} {"current_steps": 39810, "total_steps": 40000, "loss": 0.0109, "lr": 2.8128616110761898e-09, "epoch": 6.49433069581532, "percentage": 99.52, "elapsed_time": "9:09:11", "remaining_time": "0:02:37", "throughput": 2324.65, "total_tokens": 76600464} {"current_steps": 39815, "total_steps": 40000, "loss": 0.0002, "lr": 2.6675215753724223e-09, "epoch": 6.495146423036137, "percentage": 99.54, "elapsed_time": "9:09:13", "remaining_time": "0:02:33", "throughput": 2324.82, "total_tokens": 76610864} {"current_steps": 39820, "total_steps": 40000, "loss": 0.0, "lr": 2.5260364424739557e-09, "epoch": 6.495962150256954, "percentage": 99.55, "elapsed_time": "9:09:15", "remaining_time": "0:02:28", "throughput": 2324.97, "total_tokens": 76620688} {"current_steps": 39825, "total_steps": 40000, "loss": 0.0, "lr": 2.3884062341994475e-09, "epoch": 6.496777877477771, "percentage": 99.56, "elapsed_time": "9:09:17", "remaining_time": "0:02:24", "throughput": 2325.11, "total_tokens": 76630128} {"current_steps": 39830, "total_steps": 40000, "loss": 0.0006, "lr": 2.25463097177081e-09, "epoch": 6.497593604698589, "percentage": 99.58, "elapsed_time": "9:09:19", "remaining_time": "0:02:20", "throughput": 2325.21, "total_tokens": 76638128} {"current_steps": 39835, "total_steps": 40000, "loss": 0.0001, "lr": 2.1247106758215397e-09, "epoch": 6.498409331919406, "percentage": 99.59, "elapsed_time": "9:09:21", "remaining_time": "0:02:16", "throughput": 2325.3, "total_tokens": 76646032} {"current_steps": 39840, "total_steps": 40000, "loss": 0.0005, "lr": 1.998645366382834e-09, "epoch": 6.499225059140223, "percentage": 99.6, "elapsed_time": "9:09:23", "remaining_time": "0:02:12", "throughput": 2325.48, "total_tokens": 76656768} {"current_steps": 39845, "total_steps": 40000, "loss": 0.0002, "lr": 1.876435062897475e-09, "epoch": 6.5000407863610405, "percentage": 99.61, "elapsed_time": "9:09:25", "remaining_time": "0:02:08", "throughput": 2325.67, "total_tokens": 76667984} {"current_steps": 39850, "total_steps": 40000, "loss": 0.0002, "lr": 1.758079784211497e-09, "epoch": 6.500856513581859, "percentage": 99.62, "elapsed_time": "9:09:28", "remaining_time": "0:02:04", "throughput": 2325.81, "total_tokens": 76677392} {"current_steps": 39855, "total_steps": 40000, "loss": 0.043, "lr": 1.6435795485797434e-09, "epoch": 6.501672240802676, "percentage": 99.64, "elapsed_time": "9:09:30", "remaining_time": "0:01:59", "throughput": 2325.96, "total_tokens": 76687072} {"current_steps": 39860, "total_steps": 40000, "loss": 0.0001, "lr": 1.5329343736547596e-09, "epoch": 6.502487968023493, "percentage": 99.65, "elapsed_time": "9:09:32", "remaining_time": "0:01:55", "throughput": 2326.11, "total_tokens": 76696976} {"current_steps": 39865, "total_steps": 40000, "loss": 0.0001, "lr": 1.4261442765006739e-09, "epoch": 6.50330369524431, "percentage": 99.66, "elapsed_time": "9:09:34", "remaining_time": "0:01:51", "throughput": 2326.25, "total_tokens": 76706512} {"current_steps": 39870, "total_steps": 40000, "loss": 0.0, "lr": 1.3232092735876445e-09, "epoch": 6.504119422465128, "percentage": 99.67, "elapsed_time": "9:09:36", "remaining_time": "0:01:47", "throughput": 2326.41, "total_tokens": 76716496} {"current_steps": 39875, "total_steps": 40000, "loss": 0.0001, "lr": 1.2241293807918607e-09, "epoch": 6.504935149685945, "percentage": 99.69, "elapsed_time": "9:09:38", "remaining_time": "0:01:43", "throughput": 2326.55, "total_tokens": 76725744} {"current_steps": 39880, "total_steps": 40000, "loss": 0.0001, "lr": 1.128904613387216e-09, "epoch": 6.505750876906762, "percentage": 99.7, "elapsed_time": "9:09:40", "remaining_time": "0:01:39", "throughput": 2326.72, "total_tokens": 76736368} {"current_steps": 39885, "total_steps": 40000, "loss": 0.0002, "lr": 1.0375349860591853e-09, "epoch": 6.506566604127579, "percentage": 99.71, "elapsed_time": "9:09:42", "remaining_time": "0:01:35", "throughput": 2326.9, "total_tokens": 76747024} {"current_steps": 39890, "total_steps": 40000, "loss": 0.0, "lr": 9.5002051290205e-10, "epoch": 6.507382331348397, "percentage": 99.72, "elapsed_time": "9:09:44", "remaining_time": "0:01:30", "throughput": 2326.99, "total_tokens": 76754944} {"current_steps": 39895, "total_steps": 40000, "loss": 0.0, "lr": 8.663612074077954e-10, "epoch": 6.5081980585692145, "percentage": 99.74, "elapsed_time": "9:09:46", "remaining_time": "0:01:26", "throughput": 2327.13, "total_tokens": 76764368} {"current_steps": 39900, "total_steps": 40000, "loss": 0.0001, "lr": 7.865570824799884e-10, "epoch": 6.509013785790032, "percentage": 99.75, "elapsed_time": "9:09:48", "remaining_time": "0:01:22", "throughput": 2327.29, "total_tokens": 76774384} {"current_steps": 39905, "total_steps": 40000, "loss": 0.0, "lr": 7.106081504254514e-10, "epoch": 6.50982951301085, "percentage": 99.76, "elapsed_time": "9:09:50", "remaining_time": "0:01:18", "throughput": 2327.44, "total_tokens": 76784208} {"current_steps": 39910, "total_steps": 40000, "loss": 0.0, "lr": 6.385144229570372e-10, "epoch": 6.510645240231667, "percentage": 99.78, "elapsed_time": "9:09:52", "remaining_time": "0:01:14", "throughput": 2327.58, "total_tokens": 76793600} {"current_steps": 39915, "total_steps": 40000, "loss": 0.0138, "lr": 5.70275911190854e-10, "epoch": 6.511460967452484, "percentage": 99.79, "elapsed_time": "9:09:55", "remaining_time": "0:01:10", "throughput": 2327.69, "total_tokens": 76802192} {"current_steps": 39920, "total_steps": 40000, "loss": 0.0001, "lr": 5.058926256490403e-10, "epoch": 6.512276694673301, "percentage": 99.8, "elapsed_time": "9:09:57", "remaining_time": "0:01:06", "throughput": 2327.82, "total_tokens": 76811264} {"current_steps": 39925, "total_steps": 40000, "loss": 0.0, "lr": 4.4536457626254134e-10, "epoch": 6.513092421894118, "percentage": 99.81, "elapsed_time": "9:09:59", "remaining_time": "0:01:01", "throughput": 2327.94, "total_tokens": 76820224} {"current_steps": 39930, "total_steps": 40000, "loss": 0.0, "lr": 3.88691772365557e-10, "epoch": 6.513908149114936, "percentage": 99.83, "elapsed_time": "9:10:01", "remaining_time": "0:00:57", "throughput": 2328.09, "total_tokens": 76829824} {"current_steps": 39935, "total_steps": 40000, "loss": 0.0, "lr": 3.358742226955425e-10, "epoch": 6.514723876335753, "percentage": 99.84, "elapsed_time": "9:10:03", "remaining_time": "0:00:53", "throughput": 2328.22, "total_tokens": 76838992} {"current_steps": 39940, "total_steps": 40000, "loss": 0.0163, "lr": 2.8691193539875925e-10, "epoch": 6.5155396035565705, "percentage": 99.85, "elapsed_time": "9:10:05", "remaining_time": "0:00:49", "throughput": 2328.36, "total_tokens": 76848464} {"current_steps": 39945, "total_steps": 40000, "loss": 0.0004, "lr": 2.418049180274995e-10, "epoch": 6.5163553307773885, "percentage": 99.86, "elapsed_time": "9:10:07", "remaining_time": "0:00:45", "throughput": 2328.51, "total_tokens": 76858288} {"current_steps": 39950, "total_steps": 40000, "loss": 0.1063, "lr": 2.005531775373104e-10, "epoch": 6.517171057998206, "percentage": 99.88, "elapsed_time": "9:10:09", "remaining_time": "0:00:41", "throughput": 2328.64, "total_tokens": 76867280} {"current_steps": 39955, "total_steps": 40000, "loss": 0.0177, "lr": 1.6315672028699435e-10, "epoch": 6.517986785219023, "percentage": 99.89, "elapsed_time": "9:10:11", "remaining_time": "0:00:37", "throughput": 2328.79, "total_tokens": 76876992} {"current_steps": 39960, "total_steps": 40000, "loss": 0.0001, "lr": 1.2961555204693555e-10, "epoch": 6.51880251243984, "percentage": 99.9, "elapsed_time": "9:10:13", "remaining_time": "0:00:33", "throughput": 2328.94, "total_tokens": 76887008} {"current_steps": 39965, "total_steps": 40000, "loss": 0.0002, "lr": 9.992967798799768e-11, "epoch": 6.519618239660657, "percentage": 99.91, "elapsed_time": "9:10:15", "remaining_time": "0:00:28", "throughput": 2329.08, "total_tokens": 76896288} {"current_steps": 39970, "total_steps": 40000, "loss": 0.0, "lr": 7.409910268707521e-11, "epoch": 6.520433966881475, "percentage": 99.92, "elapsed_time": "9:10:17", "remaining_time": "0:00:24", "throughput": 2329.23, "total_tokens": 76906288} {"current_steps": 39975, "total_steps": 40000, "loss": 0.0003, "lr": 5.212383012986877e-11, "epoch": 6.521249694102292, "percentage": 99.94, "elapsed_time": "9:10:19", "remaining_time": "0:00:20", "throughput": 2329.32, "total_tokens": 76914000} {"current_steps": 39980, "total_steps": 40000, "loss": 0.0, "lr": 3.400386370533415e-11, "epoch": 6.522065421323109, "percentage": 99.95, "elapsed_time": "9:10:21", "remaining_time": "0:00:16", "throughput": 2329.45, "total_tokens": 76923104} {"current_steps": 39985, "total_steps": 40000, "loss": 0.0252, "lr": 1.9739206205682258e-11, "epoch": 6.522881148543927, "percentage": 99.96, "elapsed_time": "9:10:24", "remaining_time": "0:00:12", "throughput": 2329.62, "total_tokens": 76933424} {"current_steps": 39990, "total_steps": 40000, "loss": 0.001, "lr": 9.329859829154685e-12, "epoch": 6.523696875764744, "percentage": 99.98, "elapsed_time": "9:10:26", "remaining_time": "0:00:08", "throughput": 2329.78, "total_tokens": 76943616} {"current_steps": 39995, "total_steps": 40000, "loss": 0.0005, "lr": 2.7758261855748148e-12, "epoch": 6.524512602985562, "percentage": 99.99, "elapsed_time": "9:10:28", "remaining_time": "0:00:04", "throughput": 2329.93, "total_tokens": 76953296} {"current_steps": 40000, "total_steps": 40000, "loss": 0.0001, "lr": 7.710628524559838e-14, "epoch": 6.525328330206379, "percentage": 100.0, "elapsed_time": "9:10:30", "remaining_time": "0:00:00", "throughput": 2330.07, "total_tokens": 76963024} {"current_steps": 40000, "total_steps": 40000, "eval_loss": 0.42914924025535583, "epoch": 6.525328330206379, "percentage": 100.0, "elapsed_time": "9:11:51", "remaining_time": "0:00:00", "throughput": 2324.39, "total_tokens": 76963024} {"current_steps": 40000, "total_steps": 40000, "epoch": 6.525328330206379, "percentage": 100.0, "elapsed_time": "9:11:52", "remaining_time": "0:00:00", "throughput": 2324.27, "total_tokens": 76963024}