{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9983155530600785, "eval_steps": 500, "global_step": 5340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002807411566535654, "grad_norm": 77.0, "learning_rate": 1.1337142857142857e-05, "loss": 3.9069, "mean_token_accuracy": 0.6125709056854248, "num_tokens": 549878.0, "step": 5 }, { "epoch": 0.005614823133071308, "grad_norm": 76.0, "learning_rate": 2.5508571428571426e-05, "loss": 3.167, "mean_token_accuracy": 0.6638837218284607, "num_tokens": 1097019.0, "step": 10 }, { "epoch": 0.008422234699606962, "grad_norm": 86.5, "learning_rate": 3.968e-05, "loss": 2.5639, "mean_token_accuracy": 0.6666666626930237, "num_tokens": 1652837.0, "step": 15 }, { "epoch": 0.011229646266142616, "grad_norm": 75.5, "learning_rate": 5.3851428571428566e-05, "loss": 1.8468, "mean_token_accuracy": 0.9177083373069763, "num_tokens": 2195895.0, "step": 20 }, { "epoch": 0.01403705783267827, "grad_norm": 56.25, "learning_rate": 6.802285714285715e-05, "loss": 0.8546, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 2748699.0, "step": 25 }, { "epoch": 0.016844469399213923, "grad_norm": 2.8125, "learning_rate": 8.219428571428572e-05, "loss": 0.1105, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 3302441.0, "step": 30 }, { "epoch": 0.019651880965749578, "grad_norm": 0.02294921875, "learning_rate": 9.636571428571428e-05, "loss": 0.0015, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 3858150.0, "step": 35 }, { "epoch": 0.022459292532285232, "grad_norm": 0.0009613037109375, "learning_rate": 9.919989575130165e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 4410061.0, "step": 40 }, { "epoch": 0.025266704098820886, "grad_norm": 0.00016498565673828125, "learning_rate": 9.919947224196606e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 4958280.0, "step": 45 }, { "epoch": 0.02807411566535654, "grad_norm": 7.581710815429688e-05, "learning_rate": 9.919872296015554e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 5500380.0, "step": 50 }, { "epoch": 0.030881527231892195, "grad_norm": 6.031990051269531e-05, "learning_rate": 9.91976479124319e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 6053192.0, "step": 55 }, { "epoch": 0.033688938798427846, "grad_norm": 4.76837158203125e-05, "learning_rate": 9.919624710820983e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 6614263.0, "step": 60 }, { "epoch": 0.0364963503649635, "grad_norm": 4.5299530029296875e-05, "learning_rate": 9.91945205597568e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 7163358.0, "step": 65 }, { "epoch": 0.039303761931499155, "grad_norm": 4.7206878662109375e-05, "learning_rate": 9.919246828219295e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 7711995.0, "step": 70 }, { "epoch": 0.04211117349803481, "grad_norm": 4.792213439941406e-05, "learning_rate": 9.919009029349102e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 8258045.0, "step": 75 }, { "epoch": 0.044918585064570464, "grad_norm": 4.696846008300781e-05, "learning_rate": 9.918738661447612e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 8814280.0, "step": 80 }, { "epoch": 0.04772599663110612, "grad_norm": 4.506111145019531e-05, "learning_rate": 9.918435726882557e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 9371728.0, "step": 85 }, { "epoch": 0.05053340819764177, "grad_norm": 4.601478576660156e-05, "learning_rate": 9.918100228306871e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 9924338.0, "step": 90 }, { "epoch": 0.05334081976417743, "grad_norm": 4.673004150390625e-05, "learning_rate": 9.917732168658667e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 10478099.0, "step": 95 }, { "epoch": 0.05614823133071308, "grad_norm": 4.601478576660156e-05, "learning_rate": 9.917331551161207e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 11030371.0, "step": 100 }, { "epoch": 0.058955642897248736, "grad_norm": 4.410743713378906e-05, "learning_rate": 9.91689837932288e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 11584083.0, "step": 105 }, { "epoch": 0.06176305446378439, "grad_norm": 4.458427429199219e-05, "learning_rate": 9.916432656937164e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 12131615.0, "step": 110 }, { "epoch": 0.06457046603032005, "grad_norm": 4.553794860839844e-05, "learning_rate": 9.9159343880826e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 12680511.0, "step": 115 }, { "epoch": 0.06737787759685569, "grad_norm": 4.3392181396484375e-05, "learning_rate": 9.915403577122753e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 13232222.0, "step": 120 }, { "epoch": 0.07018528916339135, "grad_norm": 4.649162292480469e-05, "learning_rate": 9.914840228706172e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 13783380.0, "step": 125 }, { "epoch": 0.072992700729927, "grad_norm": 4.38690185546875e-05, "learning_rate": 9.914244347766351e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 14335613.0, "step": 130 }, { "epoch": 0.07580011229646266, "grad_norm": 4.482269287109375e-05, "learning_rate": 9.913615939521687e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 14887914.0, "step": 135 }, { "epoch": 0.07860752386299831, "grad_norm": 4.3392181396484375e-05, "learning_rate": 9.912955009475434e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 15436839.0, "step": 140 }, { "epoch": 0.08141493542953397, "grad_norm": 4.5299530029296875e-05, "learning_rate": 9.912261563415655e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 15986614.0, "step": 145 }, { "epoch": 0.08422234699606962, "grad_norm": 4.482269287109375e-05, "learning_rate": 9.911535607415163e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 16539455.0, "step": 150 }, { "epoch": 0.08702975856260528, "grad_norm": 4.38690185546875e-05, "learning_rate": 9.910777147831485e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 17093848.0, "step": 155 }, { "epoch": 0.08983717012914093, "grad_norm": 4.744529724121094e-05, "learning_rate": 9.90998619130679e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 17650597.0, "step": 160 }, { "epoch": 0.09264458169567659, "grad_norm": 4.1961669921875e-05, "learning_rate": 9.90916274476784e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 18201538.0, "step": 165 }, { "epoch": 0.09545199326221224, "grad_norm": 4.2438507080078125e-05, "learning_rate": 9.908306815425927e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 18756478.0, "step": 170 }, { "epoch": 0.0982594048287479, "grad_norm": 4.3392181396484375e-05, "learning_rate": 9.907418410776807e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 19310705.0, "step": 175 }, { "epoch": 0.10106681639528355, "grad_norm": 4.1961669921875e-05, "learning_rate": 9.906497538600639e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 19865940.0, "step": 180 }, { "epoch": 0.10387422796181921, "grad_norm": 4.267692565917969e-05, "learning_rate": 9.905544206961913e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 20413429.0, "step": 185 }, { "epoch": 0.10668163952835485, "grad_norm": 4.291534423828125e-05, "learning_rate": 9.904558424209383e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 20961011.0, "step": 190 }, { "epoch": 0.10948905109489052, "grad_norm": 4.291534423828125e-05, "learning_rate": 9.90354019897599e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 21511901.0, "step": 195 }, { "epoch": 0.11229646266142616, "grad_norm": 4.38690185546875e-05, "learning_rate": 9.90248954017879e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 22066800.0, "step": 200 }, { "epoch": 0.11510387422796182, "grad_norm": 4.172325134277344e-05, "learning_rate": 9.901406457018874e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 22619701.0, "step": 205 }, { "epoch": 0.11791128579449747, "grad_norm": 4.38690185546875e-05, "learning_rate": 9.900290958981288e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 23175387.0, "step": 210 }, { "epoch": 0.12071869736103313, "grad_norm": 4.2438507080078125e-05, "learning_rate": 9.899143055834947e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 23728008.0, "step": 215 }, { "epoch": 0.12352610892756878, "grad_norm": 4.315376281738281e-05, "learning_rate": 9.897962757632554e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 24271472.0, "step": 220 }, { "epoch": 0.12633352049410443, "grad_norm": 4.3392181396484375e-05, "learning_rate": 9.896750074710513e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 24822499.0, "step": 225 }, { "epoch": 0.1291409320606401, "grad_norm": 4.38690185546875e-05, "learning_rate": 9.89550501768883e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 25378674.0, "step": 230 }, { "epoch": 0.13194834362717575, "grad_norm": 4.1961669921875e-05, "learning_rate": 9.89422759747103e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 25932191.0, "step": 235 }, { "epoch": 0.13475575519371139, "grad_norm": 4.4345855712890625e-05, "learning_rate": 9.892917825244055e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 26481304.0, "step": 240 }, { "epoch": 0.13756316676024705, "grad_norm": 4.1961669921875e-05, "learning_rate": 9.891575712478165e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 27032305.0, "step": 245 }, { "epoch": 0.1403705783267827, "grad_norm": 4.291534423828125e-05, "learning_rate": 9.890201270926846e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 27589288.0, "step": 250 }, { "epoch": 0.14317798989331837, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.888794512626705e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 28142842.0, "step": 255 }, { "epoch": 0.145985401459854, "grad_norm": 4.100799560546875e-05, "learning_rate": 9.887355449897346e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 28700410.0, "step": 260 }, { "epoch": 0.14879281302638966, "grad_norm": 4.1961669921875e-05, "learning_rate": 9.885884095341294e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 29255425.0, "step": 265 }, { "epoch": 0.15160022459292533, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.884380461843857e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 29808897.0, "step": 270 }, { "epoch": 0.154407636159461, "grad_norm": 4.124641418457031e-05, "learning_rate": 9.882844562573032e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 30359156.0, "step": 275 }, { "epoch": 0.15721504772599662, "grad_norm": 4.2438507080078125e-05, "learning_rate": 9.881276410979378e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 30909372.0, "step": 280 }, { "epoch": 0.16002245929253228, "grad_norm": 4.2438507080078125e-05, "learning_rate": 9.8796760207959e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 31460071.0, "step": 285 }, { "epoch": 0.16282987085906794, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.878043406037935e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 32015395.0, "step": 290 }, { "epoch": 0.1656372824256036, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.876378581003024e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 32567631.0, "step": 295 }, { "epoch": 0.16844469399213924, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.874681560270783e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 33119041.0, "step": 300 }, { "epoch": 0.1712521055586749, "grad_norm": 4.1961669921875e-05, "learning_rate": 9.872952358702788e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 33674218.0, "step": 305 }, { "epoch": 0.17405951712521056, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.871190991442434e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 34224167.0, "step": 310 }, { "epoch": 0.17686692869174622, "grad_norm": 4.100799560546875e-05, "learning_rate": 9.8693974739148e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 34772495.0, "step": 315 }, { "epoch": 0.17967434025828186, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.867571821826528e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 35323371.0, "step": 320 }, { "epoch": 0.18248175182481752, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.865714051165673e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 35887058.0, "step": 325 }, { "epoch": 0.18528916339135318, "grad_norm": 4.124641418457031e-05, "learning_rate": 9.863824178201563e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 36435682.0, "step": 330 }, { "epoch": 0.18809657495788884, "grad_norm": 4.220008850097656e-05, "learning_rate": 9.861902219484668e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 36992097.0, "step": 335 }, { "epoch": 0.19090398652442447, "grad_norm": 4.076957702636719e-05, "learning_rate": 9.85994819184644e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 37540885.0, "step": 340 }, { "epoch": 0.19371139809096014, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.857962112399176e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 38099629.0, "step": 345 }, { "epoch": 0.1965188096574958, "grad_norm": 4.100799560546875e-05, "learning_rate": 9.85594399853587e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 38652150.0, "step": 350 }, { "epoch": 0.19932622122403143, "grad_norm": 4.100799560546875e-05, "learning_rate": 9.853893867930045e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 39210797.0, "step": 355 }, { "epoch": 0.2021336327905671, "grad_norm": 4.076957702636719e-05, "learning_rate": 9.851811738535616e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 39769370.0, "step": 360 }, { "epoch": 0.20494104435710275, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.849697628586727e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 40326502.0, "step": 365 }, { "epoch": 0.20774845592363841, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.847551556597587e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 40878514.0, "step": 370 }, { "epoch": 0.21055586749017405, "grad_norm": 4.076957702636719e-05, "learning_rate": 9.84537354136231e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 41433424.0, "step": 375 }, { "epoch": 0.2133632790567097, "grad_norm": 4.00543212890625e-05, "learning_rate": 9.843163601954753e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 41984536.0, "step": 380 }, { "epoch": 0.21617069062324537, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.84092175772835e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 42541222.0, "step": 385 }, { "epoch": 0.21897810218978103, "grad_norm": 4.100799560546875e-05, "learning_rate": 9.838648028315934e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 43094450.0, "step": 390 }, { "epoch": 0.22178551375631667, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.836342433629578e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 43650911.0, "step": 395 }, { "epoch": 0.22459292532285233, "grad_norm": 4.100799560546875e-05, "learning_rate": 9.834004993860406e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 44200053.0, "step": 400 }, { "epoch": 0.227400336889388, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.831635729478427e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 44750969.0, "step": 405 }, { "epoch": 0.23020774845592365, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.829234661232353e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 45307102.0, "step": 410 }, { "epoch": 0.23301516002245928, "grad_norm": 4.076957702636719e-05, "learning_rate": 9.82680181014942e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 45859589.0, "step": 415 }, { "epoch": 0.23582257158899494, "grad_norm": 4.1484832763671875e-05, "learning_rate": 9.824337197535193e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 46422855.0, "step": 420 }, { "epoch": 0.2386299831555306, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.821840844973392e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 46975002.0, "step": 425 }, { "epoch": 0.24143739472206627, "grad_norm": 4.076957702636719e-05, "learning_rate": 9.819312774325696e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 47533307.0, "step": 430 }, { "epoch": 0.2442448062886019, "grad_norm": 4.00543212890625e-05, "learning_rate": 9.816753007731553e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 48082587.0, "step": 435 }, { "epoch": 0.24705221785513756, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.814161567607994e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 48642378.0, "step": 440 }, { "epoch": 0.24985962942167322, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.811538476649417e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 49189920.0, "step": 445 }, { "epoch": 0.25266704098820886, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.808883757827411e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 49743946.0, "step": 450 }, { "epoch": 0.25547445255474455, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.806197434390536e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 50292671.0, "step": 455 }, { "epoch": 0.2582818641212802, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.803479529864135e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 50846963.0, "step": 460 }, { "epoch": 0.2610892756878158, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.80073006805012e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 51395702.0, "step": 465 }, { "epoch": 0.2638966872543515, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.797949073026756e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 51946439.0, "step": 470 }, { "epoch": 0.26670409882088714, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.795136569148469e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 52499846.0, "step": 475 }, { "epoch": 0.26951151038742277, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.792292581045619e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 53050389.0, "step": 480 }, { "epoch": 0.27231892195395846, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.789417133624282e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 53600167.0, "step": 485 }, { "epoch": 0.2751263335204941, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.786510252066044e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 54152064.0, "step": 490 }, { "epoch": 0.2779337450870298, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.783571961827773e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 54705723.0, "step": 495 }, { "epoch": 0.2807411566535654, "grad_norm": 4.00543212890625e-05, "learning_rate": 9.780602288641392e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 55264866.0, "step": 500 }, { "epoch": 0.28354856822010105, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.777601258513665e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 55825872.0, "step": 505 }, { "epoch": 0.28635597978663674, "grad_norm": 4.00543212890625e-05, "learning_rate": 9.774568897725958e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 56380766.0, "step": 510 }, { "epoch": 0.28916339135317237, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.771505232834017e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 56931871.0, "step": 515 }, { "epoch": 0.291970802919708, "grad_norm": 4.0531158447265625e-05, "learning_rate": 9.76841029066773e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 57484684.0, "step": 520 }, { "epoch": 0.2947782144862437, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.765284098330893e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 58038977.0, "step": 525 }, { "epoch": 0.29758562605277933, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.762126683200977e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 58587675.0, "step": 530 }, { "epoch": 0.300393037619315, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.758938072928884e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 59139287.0, "step": 535 }, { "epoch": 0.30320044918585065, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.755718295438705e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 59689179.0, "step": 540 }, { "epoch": 0.3060078607523863, "grad_norm": 4.00543212890625e-05, "learning_rate": 9.752467378927475e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 60240055.0, "step": 545 }, { "epoch": 0.308815272318922, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.74918535186493e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 60799372.0, "step": 550 }, { "epoch": 0.3116226838854576, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.745872242993255e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 61347609.0, "step": 555 }, { "epoch": 0.31443009545199324, "grad_norm": 4.076957702636719e-05, "learning_rate": 9.742528081326832e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 61905365.0, "step": 560 }, { "epoch": 0.31723750701852893, "grad_norm": 4.029273986816406e-05, "learning_rate": 9.739152896151981e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 62461111.0, "step": 565 }, { "epoch": 0.32004491858506456, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.735746717026719e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 63012068.0, "step": 570 }, { "epoch": 0.3228523301516002, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.732309573780484e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 63558400.0, "step": 575 }, { "epoch": 0.3256597417181359, "grad_norm": 3.933906555175781e-05, "learning_rate": 9.72884149651388e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 64112362.0, "step": 580 }, { "epoch": 0.3284671532846715, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.725342515598419e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 64660119.0, "step": 585 }, { "epoch": 0.3312745648512072, "grad_norm": 3.981590270996094e-05, "learning_rate": 9.721812661676245e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 65210665.0, "step": 590 }, { "epoch": 0.33408197641774284, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.718251965659874e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 65765080.0, "step": 595 }, { "epoch": 0.3368893879842785, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.71466045873192e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 66309812.0, "step": 600 }, { "epoch": 0.33969679955081417, "grad_norm": 3.933906555175781e-05, "learning_rate": 9.71103817234482e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 66863854.0, "step": 605 }, { "epoch": 0.3425042111173498, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.707385138220563e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 67412995.0, "step": 610 }, { "epoch": 0.34531162268388543, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.703701388350407e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 67970391.0, "step": 615 }, { "epoch": 0.3481190342504211, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.699986954994604e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 68526499.0, "step": 620 }, { "epoch": 0.35092644581695676, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.696241870682114e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 69084325.0, "step": 625 }, { "epoch": 0.35373385738349244, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.692466168210319e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 69636193.0, "step": 630 }, { "epoch": 0.3565412689500281, "grad_norm": 3.933906555175781e-05, "learning_rate": 9.688659880644745e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 70194359.0, "step": 635 }, { "epoch": 0.3593486805165637, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.684823041318754e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 70743263.0, "step": 640 }, { "epoch": 0.3621560920830994, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.680955683833278e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 71296779.0, "step": 645 }, { "epoch": 0.36496350364963503, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.677057842056495e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 71849630.0, "step": 650 }, { "epoch": 0.36777091521617067, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.673129550123562e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 72400218.0, "step": 655 }, { "epoch": 0.37057832678270636, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.669170842436287e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 72951048.0, "step": 660 }, { "epoch": 0.373385738349242, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.665181753662856e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 73502936.0, "step": 665 }, { "epoch": 0.3761931499157777, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.661162318737506e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 74056383.0, "step": 670 }, { "epoch": 0.3790005614823133, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.657112572860237e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 74603137.0, "step": 675 }, { "epoch": 0.38180797304884895, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.653032551496485e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 75158374.0, "step": 680 }, { "epoch": 0.38461538461538464, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.648922290376834e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 75704949.0, "step": 685 }, { "epoch": 0.38742279618192027, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.644781825496684e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 76261312.0, "step": 690 }, { "epoch": 0.3902302077484559, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.640611193115943e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 76805842.0, "step": 695 }, { "epoch": 0.3930376193149916, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.636410429758712e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 77363890.0, "step": 700 }, { "epoch": 0.3958450308815272, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.632179572212961e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 77921673.0, "step": 705 }, { "epoch": 0.39865244244806286, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.627918657530207e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 78471021.0, "step": 710 }, { "epoch": 0.40145985401459855, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.623627723025194e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 79023143.0, "step": 715 }, { "epoch": 0.4042672655811342, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.619306806275562e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 79582921.0, "step": 720 }, { "epoch": 0.40707467714766987, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.614955945121515e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 80133549.0, "step": 725 }, { "epoch": 0.4098820887142055, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.610575177665501e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 80680396.0, "step": 730 }, { "epoch": 0.41268950028074114, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.606164542271863e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 81234731.0, "step": 735 }, { "epoch": 0.41549691184727683, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.601724077566519e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 81784418.0, "step": 740 }, { "epoch": 0.41830432341381246, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.59725382243661e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 82340997.0, "step": 745 }, { "epoch": 0.4211117349803481, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.592753816030163e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 82892452.0, "step": 750 }, { "epoch": 0.4239191465468838, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.58822409775576e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 83445481.0, "step": 755 }, { "epoch": 0.4267265581134194, "grad_norm": 3.933906555175781e-05, "learning_rate": 9.583664707282172e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 83995491.0, "step": 760 }, { "epoch": 0.4295339696799551, "grad_norm": 3.814697265625e-05, "learning_rate": 9.57907568453803e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 84546988.0, "step": 765 }, { "epoch": 0.43234138124649074, "grad_norm": 3.9577484130859375e-05, "learning_rate": 9.574457069711466e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 85093367.0, "step": 770 }, { "epoch": 0.4351487928130264, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.56980890324976e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 85646824.0, "step": 775 }, { "epoch": 0.43795620437956206, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.565131225858998e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 86199422.0, "step": 780 }, { "epoch": 0.4407636159460977, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.560424078503694e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 86750405.0, "step": 785 }, { "epoch": 0.44357102751263333, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.555687502406456e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 87305490.0, "step": 790 }, { "epoch": 0.446378439079169, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.550921539047603e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 87855292.0, "step": 795 }, { "epoch": 0.44918585064570465, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.546126230164816e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 88405488.0, "step": 800 }, { "epoch": 0.4519932622122403, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.541301617752766e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 88958933.0, "step": 805 }, { "epoch": 0.454800673778776, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.536447744062752e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 89509187.0, "step": 810 }, { "epoch": 0.4576080853453116, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.531564651602323e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 90064116.0, "step": 815 }, { "epoch": 0.4604154969118473, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.526652383134911e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 90614975.0, "step": 820 }, { "epoch": 0.46322290847838293, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.521710981679458e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 91163118.0, "step": 825 }, { "epoch": 0.46603032004491857, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.516740490510031e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 91709280.0, "step": 830 }, { "epoch": 0.46883773161145426, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.511740953155456e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 92251306.0, "step": 835 }, { "epoch": 0.4716451431779899, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.506712413398922e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 92804191.0, "step": 840 }, { "epoch": 0.4744525547445255, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.501654915277611e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 93352960.0, "step": 845 }, { "epoch": 0.4772599663110612, "grad_norm": 3.910064697265625e-05, "learning_rate": 9.496568503082302e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 93904048.0, "step": 850 }, { "epoch": 0.48006737787759685, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.491453221356992e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 94450198.0, "step": 855 }, { "epoch": 0.48287478944413254, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.486309114898497e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 95010470.0, "step": 860 }, { "epoch": 0.48568220101066817, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.481136228756068e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 95565971.0, "step": 865 }, { "epoch": 0.4884896125772038, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.475934608230988e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 96115146.0, "step": 870 }, { "epoch": 0.4912970241437395, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.470704298876186e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 96665705.0, "step": 875 }, { "epoch": 0.4941044357102751, "grad_norm": 3.886222839355469e-05, "learning_rate": 9.465445346495826e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 97225452.0, "step": 880 }, { "epoch": 0.49691184727681076, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.460157797144915e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 97783773.0, "step": 885 }, { "epoch": 0.49971925884334645, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.454841697128895e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 98343857.0, "step": 890 }, { "epoch": 0.5025266704098821, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.449497093003244e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 98893955.0, "step": 895 }, { "epoch": 0.5053340819764177, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.444124031573053e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 99449900.0, "step": 900 }, { "epoch": 0.5081414935429533, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.438722559892638e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 99997493.0, "step": 905 }, { "epoch": 0.5109489051094891, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.433292725265108e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 100547188.0, "step": 910 }, { "epoch": 0.5137563166760247, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.427834575241962e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 101099643.0, "step": 915 }, { "epoch": 0.5165637282425604, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.42234815762267e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 101651500.0, "step": 920 }, { "epoch": 0.519371139809096, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.416833520454256e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 102203775.0, "step": 925 }, { "epoch": 0.5221785513756316, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.411290712030869e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 102748636.0, "step": 930 }, { "epoch": 0.5249859629421674, "grad_norm": 3.814697265625e-05, "learning_rate": 9.405719780893371e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 103304242.0, "step": 935 }, { "epoch": 0.527793374508703, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.400120775828907e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 103858388.0, "step": 940 }, { "epoch": 0.5306007860752386, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.394493745870479e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 104403364.0, "step": 945 }, { "epoch": 0.5334081976417743, "grad_norm": 3.814697265625e-05, "learning_rate": 9.388838740296514e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 104951146.0, "step": 950 }, { "epoch": 0.5362156092083099, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.38315580863043e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 105502998.0, "step": 955 }, { "epoch": 0.5390230207748455, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.377445000640214e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 106056029.0, "step": 960 }, { "epoch": 0.5418304323413813, "grad_norm": 3.8623809814453125e-05, "learning_rate": 9.371706366337973e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 106614456.0, "step": 965 }, { "epoch": 0.5446378439079169, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.365939955979505e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 107168795.0, "step": 970 }, { "epoch": 0.5474452554744526, "grad_norm": 3.814697265625e-05, "learning_rate": 9.360145820063852e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 107725110.0, "step": 975 }, { "epoch": 0.5502526670409882, "grad_norm": 3.814697265625e-05, "learning_rate": 9.354324009332864e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 108277854.0, "step": 980 }, { "epoch": 0.5530600786075238, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.348474574770748e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 108833252.0, "step": 985 }, { "epoch": 0.5558674901740596, "grad_norm": 3.814697265625e-05, "learning_rate": 9.342597567603632e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 109388680.0, "step": 990 }, { "epoch": 0.5586749017405952, "grad_norm": 3.814697265625e-05, "learning_rate": 9.336693039299103e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 109942919.0, "step": 995 }, { "epoch": 0.5614823133071308, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.330761041565767e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 110494393.0, "step": 1000 }, { "epoch": 0.5642897248736665, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.324801626352788e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 111053294.0, "step": 1005 }, { "epoch": 0.5670971364402021, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.318814845849443e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 111605069.0, "step": 1010 }, { "epoch": 0.5699045480067377, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.312800752484653e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 112157540.0, "step": 1015 }, { "epoch": 0.5727119595732735, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.306759398926535e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 112709965.0, "step": 1020 }, { "epoch": 0.5755193711398091, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.300690838081935e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 113264609.0, "step": 1025 }, { "epoch": 0.5783267827063447, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.29459512309596e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 113819280.0, "step": 1030 }, { "epoch": 0.5811341942728804, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.288472307351525e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 114374246.0, "step": 1035 }, { "epoch": 0.583941605839416, "grad_norm": 3.814697265625e-05, "learning_rate": 9.282322444468875e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 114930242.0, "step": 1040 }, { "epoch": 0.5867490174059518, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.276145588305121e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 115478310.0, "step": 1045 }, { "epoch": 0.5895564289724874, "grad_norm": 3.814697265625e-05, "learning_rate": 9.26994179295376e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 116026101.0, "step": 1050 }, { "epoch": 0.592363840539023, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.263711112744218e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 116582979.0, "step": 1055 }, { "epoch": 0.5951712521055587, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.257453602241356e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 117136441.0, "step": 1060 }, { "epoch": 0.5979786636720943, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.251169316245001e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 117688166.0, "step": 1065 }, { "epoch": 0.60078607523863, "grad_norm": 3.814697265625e-05, "learning_rate": 9.244858309789468e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 118234800.0, "step": 1070 }, { "epoch": 0.6035934868051657, "grad_norm": 3.814697265625e-05, "learning_rate": 9.238520638143072e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 118788259.0, "step": 1075 }, { "epoch": 0.6064008983717013, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.232156356807648e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 119335601.0, "step": 1080 }, { "epoch": 0.6092083099382369, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.225765521518065e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 119880870.0, "step": 1085 }, { "epoch": 0.6120157215047726, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.219348188241737e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 120434836.0, "step": 1090 }, { "epoch": 0.6148231330713082, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.212904413178128e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 120982263.0, "step": 1095 }, { "epoch": 0.617630544637844, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.206434252758272e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 121531079.0, "step": 1100 }, { "epoch": 0.6204379562043796, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.199937763644266e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 122089548.0, "step": 1105 }, { "epoch": 0.6232453677709152, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.193415002728783e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 122643175.0, "step": 1110 }, { "epoch": 0.6260527793374508, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.186866027134565e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 123195979.0, "step": 1115 }, { "epoch": 0.6288601909039865, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.180290894213934e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 123752298.0, "step": 1120 }, { "epoch": 0.6316676024705222, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.173689661548278e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 124313602.0, "step": 1125 }, { "epoch": 0.6344750140370579, "grad_norm": 3.814697265625e-05, "learning_rate": 9.167062386947555e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 124860278.0, "step": 1130 }, { "epoch": 0.6372824256035935, "grad_norm": 3.790855407714844e-05, "learning_rate": 9.160409128449784e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 125410347.0, "step": 1135 }, { "epoch": 0.6400898371701291, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.153729944320533e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 125968306.0, "step": 1140 }, { "epoch": 0.6428972487366648, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.147024893052419e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 126520924.0, "step": 1145 }, { "epoch": 0.6457046603032004, "grad_norm": 3.814697265625e-05, "learning_rate": 9.140294033364585e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 127070792.0, "step": 1150 }, { "epoch": 0.6485120718697361, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.133537424202186e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 127619525.0, "step": 1155 }, { "epoch": 0.6513194834362718, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.126755124735887e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 128173299.0, "step": 1160 }, { "epoch": 0.6541268950028074, "grad_norm": 3.814697265625e-05, "learning_rate": 9.119947194361324e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 128726691.0, "step": 1165 }, { "epoch": 0.656934306569343, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.1131136926986e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 129277597.0, "step": 1170 }, { "epoch": 0.6597417181358787, "grad_norm": 3.814697265625e-05, "learning_rate": 9.10625467959176e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 129833579.0, "step": 1175 }, { "epoch": 0.6625491297024144, "grad_norm": 3.814697265625e-05, "learning_rate": 9.099370215108254e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 130386568.0, "step": 1180 }, { "epoch": 0.66535654126895, "grad_norm": 3.814697265625e-05, "learning_rate": 9.092460359538433e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 130940117.0, "step": 1185 }, { "epoch": 0.6681639528354857, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.085525173395e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 131494349.0, "step": 1190 }, { "epoch": 0.6709713644020213, "grad_norm": 3.814697265625e-05, "learning_rate": 9.078564717412495e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 132044064.0, "step": 1195 }, { "epoch": 0.673778775968557, "grad_norm": 3.790855407714844e-05, "learning_rate": 9.071579052546754e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 132600861.0, "step": 1200 }, { "epoch": 0.6765861875350927, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.064568239974379e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 133149250.0, "step": 1205 }, { "epoch": 0.6793935991016283, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.057532341092203e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 133701108.0, "step": 1210 }, { "epoch": 0.682201010668164, "grad_norm": 3.814697265625e-05, "learning_rate": 9.050471417516754e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 134253832.0, "step": 1215 }, { "epoch": 0.6850084222346996, "grad_norm": 3.790855407714844e-05, "learning_rate": 9.043385531083703e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 134807691.0, "step": 1220 }, { "epoch": 0.6878158338012352, "grad_norm": 3.838539123535156e-05, "learning_rate": 9.036274743847342e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 135355206.0, "step": 1225 }, { "epoch": 0.6906232453677709, "grad_norm": 3.814697265625e-05, "learning_rate": 9.029139118080024e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 135908658.0, "step": 1230 }, { "epoch": 0.6934306569343066, "grad_norm": 3.814697265625e-05, "learning_rate": 9.021978716271629e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 136459115.0, "step": 1235 }, { "epoch": 0.6962380685008422, "grad_norm": 3.790855407714844e-05, "learning_rate": 9.014793601129006e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 137010285.0, "step": 1240 }, { "epoch": 0.6990454800673779, "grad_norm": 3.790855407714844e-05, "learning_rate": 9.007583835575437e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 137563309.0, "step": 1245 }, { "epoch": 0.7018528916339135, "grad_norm": 3.814697265625e-05, "learning_rate": 9.000349482750074e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 138115722.0, "step": 1250 }, { "epoch": 0.7046603032004491, "grad_norm": 3.814697265625e-05, "learning_rate": 8.99309060600739e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 138662903.0, "step": 1255 }, { "epoch": 0.7074677147669849, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.985807268916628e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 139215094.0, "step": 1260 }, { "epoch": 0.7102751263335205, "grad_norm": 3.814697265625e-05, "learning_rate": 8.978499535261239e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 139768126.0, "step": 1265 }, { "epoch": 0.7130825379000562, "grad_norm": 3.838539123535156e-05, "learning_rate": 8.971167469038328e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 140322542.0, "step": 1270 }, { "epoch": 0.7158899494665918, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.96381113445809e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 140875658.0, "step": 1275 }, { "epoch": 0.7186973610331274, "grad_norm": 3.814697265625e-05, "learning_rate": 8.956430595943248e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 141423350.0, "step": 1280 }, { "epoch": 0.7215047725996631, "grad_norm": 3.814697265625e-05, "learning_rate": 8.949025918128489e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 141973889.0, "step": 1285 }, { "epoch": 0.7243121841661988, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.941597165859902e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 142529488.0, "step": 1290 }, { "epoch": 0.7271195957327344, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.934144404194404e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 143086636.0, "step": 1295 }, { "epoch": 0.7299270072992701, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.926667698399173e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 143635935.0, "step": 1300 }, { "epoch": 0.7327344188658057, "grad_norm": 3.814697265625e-05, "learning_rate": 8.919167113951081e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 144190580.0, "step": 1305 }, { "epoch": 0.7355418304323413, "grad_norm": 3.814697265625e-05, "learning_rate": 8.911642716536109e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 144741265.0, "step": 1310 }, { "epoch": 0.7383492419988771, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.904094572048783e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 145291693.0, "step": 1315 }, { "epoch": 0.7411566535654127, "grad_norm": 3.814697265625e-05, "learning_rate": 8.896522746591595e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 145839264.0, "step": 1320 }, { "epoch": 0.7439640651319483, "grad_norm": 3.814697265625e-05, "learning_rate": 8.888927306474415e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 146394351.0, "step": 1325 }, { "epoch": 0.746771476698484, "grad_norm": 3.838539123535156e-05, "learning_rate": 8.881308318213924e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 146948626.0, "step": 1330 }, { "epoch": 0.7495788882650196, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.873665848533021e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 147503679.0, "step": 1335 }, { "epoch": 0.7523862998315554, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.865999964360243e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 148050994.0, "step": 1340 }, { "epoch": 0.755193711398091, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.858310732829179e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 148609443.0, "step": 1345 }, { "epoch": 0.7580011229646266, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.85059822127788e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 149162196.0, "step": 1350 }, { "epoch": 0.7608085345311623, "grad_norm": 3.814697265625e-05, "learning_rate": 8.842862497248272e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 149711561.0, "step": 1355 }, { "epoch": 0.7636159460976979, "grad_norm": 3.814697265625e-05, "learning_rate": 8.835103628485561e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 150260709.0, "step": 1360 }, { "epoch": 0.7664233576642335, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.827321682937645e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 150817752.0, "step": 1365 }, { "epoch": 0.7692307692307693, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.819516728754514e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 151371055.0, "step": 1370 }, { "epoch": 0.7720381807973049, "grad_norm": 3.814697265625e-05, "learning_rate": 8.811688834287654e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 151929409.0, "step": 1375 }, { "epoch": 0.7748455923638405, "grad_norm": 3.814697265625e-05, "learning_rate": 8.803838068089448e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 152481972.0, "step": 1380 }, { "epoch": 0.7776530039303762, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.795964498912585e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 153033911.0, "step": 1385 }, { "epoch": 0.7804604154969118, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.78806819570944e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 153583648.0, "step": 1390 }, { "epoch": 0.7832678270634476, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.780149227631485e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 154140515.0, "step": 1395 }, { "epoch": 0.7860752386299832, "grad_norm": 3.814697265625e-05, "learning_rate": 8.772207664028678e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 154700589.0, "step": 1400 }, { "epoch": 0.7888826501965188, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.764243574448856e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 155251826.0, "step": 1405 }, { "epoch": 0.7916900617630545, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.756257028637125e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 155807861.0, "step": 1410 }, { "epoch": 0.7944974733295901, "grad_norm": 3.814697265625e-05, "learning_rate": 8.748248096535255e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 156358780.0, "step": 1415 }, { "epoch": 0.7973048848961257, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.740216848281055e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 156920999.0, "step": 1420 }, { "epoch": 0.8001122964626615, "grad_norm": 3.814697265625e-05, "learning_rate": 8.732163354207774e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 157477110.0, "step": 1425 }, { "epoch": 0.8029197080291971, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.724087684843469e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 158021585.0, "step": 1430 }, { "epoch": 0.8057271195957327, "grad_norm": 3.814697265625e-05, "learning_rate": 8.715989910910409e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 158571838.0, "step": 1435 }, { "epoch": 0.8085345311622684, "grad_norm": 3.743171691894531e-05, "learning_rate": 8.707870103324428e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 159125749.0, "step": 1440 }, { "epoch": 0.811341942728804, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.699728333194328e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 159674555.0, "step": 1445 }, { "epoch": 0.8141493542953397, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.691564671821246e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 160227381.0, "step": 1450 }, { "epoch": 0.8169567658618754, "grad_norm": 3.814697265625e-05, "learning_rate": 8.683379190698027e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 160778083.0, "step": 1455 }, { "epoch": 0.819764177428411, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.675171961508604e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 161328123.0, "step": 1460 }, { "epoch": 0.8225715889949466, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.666943056127365e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 161886243.0, "step": 1465 }, { "epoch": 0.8253790005614823, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.658692546618528e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 162443002.0, "step": 1470 }, { "epoch": 0.8281864121280179, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.65042050523551e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 163004851.0, "step": 1475 }, { "epoch": 0.8309938236945537, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.642127004420289e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 163556926.0, "step": 1480 }, { "epoch": 0.8338012352610893, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.633812116802776e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 164108873.0, "step": 1485 }, { "epoch": 0.8366086468276249, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.625475915200171e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 164665480.0, "step": 1490 }, { "epoch": 0.8394160583941606, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.617118472616333e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 165218320.0, "step": 1495 }, { "epoch": 0.8422234699606962, "grad_norm": 3.814697265625e-05, "learning_rate": 8.60873986224114e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 165780227.0, "step": 1500 }, { "epoch": 0.8450308815272319, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.600340157449844e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 166330966.0, "step": 1505 }, { "epoch": 0.8478382930937676, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.591919431802425e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 166886726.0, "step": 1510 }, { "epoch": 0.8506457046603032, "grad_norm": 3.743171691894531e-05, "learning_rate": 8.583477759042965e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 167443045.0, "step": 1515 }, { "epoch": 0.8534531162268388, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.575015213098974e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 167988006.0, "step": 1520 }, { "epoch": 0.8562605277933745, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.566531868080768e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 168542825.0, "step": 1525 }, { "epoch": 0.8590679393599102, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.558027798280808e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 169088910.0, "step": 1530 }, { "epoch": 0.8618753509264458, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.54950307817305e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 169640258.0, "step": 1535 }, { "epoch": 0.8646827624929815, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.540957782412291e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 170192821.0, "step": 1540 }, { "epoch": 0.8674901740595171, "grad_norm": 3.814697265625e-05, "learning_rate": 8.532391985833525e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 170744205.0, "step": 1545 }, { "epoch": 0.8702975856260527, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.523805763451276e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 171295984.0, "step": 1550 }, { "epoch": 0.8731049971925884, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.515199190458947e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 171849931.0, "step": 1555 }, { "epoch": 0.8759124087591241, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.50657234222816e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 172408275.0, "step": 1560 }, { "epoch": 0.8787198203256598, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.497925294308102e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 172964043.0, "step": 1565 }, { "epoch": 0.8815272318921954, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.489258122424846e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 173514585.0, "step": 1570 }, { "epoch": 0.884334643458731, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.480570902480709e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 174059607.0, "step": 1575 }, { "epoch": 0.8871420550252667, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.471863710553575e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 174614319.0, "step": 1580 }, { "epoch": 0.8899494665918024, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.463136622896231e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 175172735.0, "step": 1585 }, { "epoch": 0.892756878158338, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.454389715935704e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 175728018.0, "step": 1590 }, { "epoch": 0.8955642897248737, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.445623066272581e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 176280757.0, "step": 1595 }, { "epoch": 0.8983717012914093, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.436836750680346e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 176829094.0, "step": 1600 }, { "epoch": 0.9011791128579449, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.428030846104714e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 177382344.0, "step": 1605 }, { "epoch": 0.9039865244244806, "grad_norm": 3.814697265625e-05, "learning_rate": 8.41920542966294e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 177937865.0, "step": 1610 }, { "epoch": 0.9067939359910163, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.410360578643157e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 178489432.0, "step": 1615 }, { "epoch": 0.909601347557552, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.401496370503698e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 179042734.0, "step": 1620 }, { "epoch": 0.9124087591240876, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.392612882872409e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 179593096.0, "step": 1625 }, { "epoch": 0.9152161706906232, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.383710193545979e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 180143839.0, "step": 1630 }, { "epoch": 0.9180235822571589, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.374788380489258e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 180698105.0, "step": 1635 }, { "epoch": 0.9208309938236946, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.365847521834561e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 181250339.0, "step": 1640 }, { "epoch": 0.9236384053902302, "grad_norm": 3.814697265625e-05, "learning_rate": 8.356887695881005e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 181802391.0, "step": 1645 }, { "epoch": 0.9264458169567659, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.347908981093806e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 182355394.0, "step": 1650 }, { "epoch": 0.9292532285233015, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.338911456103598e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 182901242.0, "step": 1655 }, { "epoch": 0.9320606400898371, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.329895199705748e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 183449668.0, "step": 1660 }, { "epoch": 0.9348680516563729, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.320860290859659e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 183997705.0, "step": 1665 }, { "epoch": 0.9376754632229085, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.311806808688083e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 184552272.0, "step": 1670 }, { "epoch": 0.9404828747894441, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.302734832476427e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 185095192.0, "step": 1675 }, { "epoch": 0.9432902863559798, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.29364444167206e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 185650699.0, "step": 1680 }, { "epoch": 0.9460976979225154, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.284535715883611e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 186199858.0, "step": 1685 }, { "epoch": 0.948905109489051, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.275408734880283e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 186749316.0, "step": 1690 }, { "epoch": 0.9517125210555868, "grad_norm": 3.814697265625e-05, "learning_rate": 8.266263578591144e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 187294846.0, "step": 1695 }, { "epoch": 0.9545199326221224, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.257100327104433e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 187853983.0, "step": 1700 }, { "epoch": 0.9573273441886581, "grad_norm": 3.719329833984375e-05, "learning_rate": 8.247919060666855e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 188407871.0, "step": 1705 }, { "epoch": 0.9601347557551937, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.238719859682882e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 188956349.0, "step": 1710 }, { "epoch": 0.9629421673217293, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.229502804714045e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 189509134.0, "step": 1715 }, { "epoch": 0.9657495788882651, "grad_norm": 3.743171691894531e-05, "learning_rate": 8.220267976478232e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 190063240.0, "step": 1720 }, { "epoch": 0.9685569904548007, "grad_norm": 3.719329833984375e-05, "learning_rate": 8.211015455848978e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 190622603.0, "step": 1725 }, { "epoch": 0.9713644020213363, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.20174532385476e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 191170189.0, "step": 1730 }, { "epoch": 0.974171813587872, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.192457661678286e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 191726109.0, "step": 1735 }, { "epoch": 0.9769792251544076, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.18315255065578e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 192280509.0, "step": 1740 }, { "epoch": 0.9797866367209432, "grad_norm": 3.790855407714844e-05, "learning_rate": 8.173830072276275e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 192824519.0, "step": 1745 }, { "epoch": 0.982594048287479, "grad_norm": 3.719329833984375e-05, "learning_rate": 8.1644903081809e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 193383680.0, "step": 1750 }, { "epoch": 0.9854014598540146, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.155133340162162e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 193933074.0, "step": 1755 }, { "epoch": 0.9882088714205502, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.145759250163229e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 194487752.0, "step": 1760 }, { "epoch": 0.9910162829870859, "grad_norm": 3.743171691894531e-05, "learning_rate": 8.136368120277213e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 195041134.0, "step": 1765 }, { "epoch": 0.9938236945536215, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.126960032746456e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 195594894.0, "step": 1770 }, { "epoch": 0.9966311061201573, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.117535069961801e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 196147513.0, "step": 1775 }, { "epoch": 0.9994385176866929, "grad_norm": 3.719329833984375e-05, "learning_rate": 8.10809331446188e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 196690606.0, "step": 1780 }, { "epoch": 1.0022459292532284, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.098634848932381e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 197200444.0, "step": 1785 }, { "epoch": 1.0050533408197642, "grad_norm": 3.743171691894531e-05, "learning_rate": 8.089159756205334e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 197749842.0, "step": 1790 }, { "epoch": 1.0078607523863, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.079668119258376e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 198304764.0, "step": 1795 }, { "epoch": 1.0106681639528354, "grad_norm": 3.814697265625e-05, "learning_rate": 8.070160021214034e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 198855059.0, "step": 1800 }, { "epoch": 1.0134755755193712, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.06063554533899e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 199415427.0, "step": 1805 }, { "epoch": 1.0162829870859067, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.051094775043355e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 199964096.0, "step": 1810 }, { "epoch": 1.0190903986524424, "grad_norm": 3.719329833984375e-05, "learning_rate": 8.041537793879934e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 200508470.0, "step": 1815 }, { "epoch": 1.0218978102189782, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.031964685543505e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 201065408.0, "step": 1820 }, { "epoch": 1.0247052217855137, "grad_norm": 3.743171691894531e-05, "learning_rate": 8.02237553387007e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 201612886.0, "step": 1825 }, { "epoch": 1.0275126333520495, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.012770422836136e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 202169264.0, "step": 1830 }, { "epoch": 1.030320044918585, "grad_norm": 3.7670135498046875e-05, "learning_rate": 8.003149436557972e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 202717508.0, "step": 1835 }, { "epoch": 1.0331274564851207, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.993512659290872e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 203268972.0, "step": 1840 }, { "epoch": 1.0359348680516565, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.98386017542842e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 203815735.0, "step": 1845 }, { "epoch": 1.038742279618192, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.974192069501751e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 204361307.0, "step": 1850 }, { "epoch": 1.0415496911847277, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.964508426178806e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 204915912.0, "step": 1855 }, { "epoch": 1.0443571027512633, "grad_norm": 3.790855407714844e-05, "learning_rate": 7.954809330263598e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 205471973.0, "step": 1860 }, { "epoch": 1.047164514317799, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.945094866695461e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 206029063.0, "step": 1865 }, { "epoch": 1.0499719258843347, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.935365120548316e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 206577216.0, "step": 1870 }, { "epoch": 1.0527793374508703, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.925620177029913e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 207134150.0, "step": 1875 }, { "epoch": 1.055586749017406, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.915860121481098e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 207682819.0, "step": 1880 }, { "epoch": 1.0583941605839415, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.906085039375058e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 208241654.0, "step": 1885 }, { "epoch": 1.0612015721504773, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.896295016316577e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 208802767.0, "step": 1890 }, { "epoch": 1.064008983717013, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.886490138041277e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 209355390.0, "step": 1895 }, { "epoch": 1.0668163952835485, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.87667049041488e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 209912022.0, "step": 1900 }, { "epoch": 1.0696238068500843, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.866836159432447e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 210463772.0, "step": 1905 }, { "epoch": 1.0724312184166198, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.85698723121763e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 211010865.0, "step": 1910 }, { "epoch": 1.0752386299831556, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.847123792021912e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 211558586.0, "step": 1915 }, { "epoch": 1.078046041549691, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.837245928223856e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 212116504.0, "step": 1920 }, { "epoch": 1.0808534531162268, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.827353726328352e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 212666296.0, "step": 1925 }, { "epoch": 1.0836608646827626, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.81744727296585e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 213221364.0, "step": 1930 }, { "epoch": 1.086468276249298, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.80752665489161e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 213768066.0, "step": 1935 }, { "epoch": 1.0892756878158338, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.797591958984938e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 214320096.0, "step": 1940 }, { "epoch": 1.0920830993823694, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.787643272248419e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 214868275.0, "step": 1945 }, { "epoch": 1.094890510948905, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.777680681807175e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 215417147.0, "step": 1950 }, { "epoch": 1.0976979225154408, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.767704274908079e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 215972838.0, "step": 1955 }, { "epoch": 1.1005053340819764, "grad_norm": 3.790855407714844e-05, "learning_rate": 7.757714138919005e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 216522140.0, "step": 1960 }, { "epoch": 1.1033127456485121, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.747710361328056e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 217070563.0, "step": 1965 }, { "epoch": 1.1061201572150476, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.737693029742805e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 217617212.0, "step": 1970 }, { "epoch": 1.1089275687815834, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.727662231889518e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 218169207.0, "step": 1975 }, { "epoch": 1.1117349803481191, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.717618055612397e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 218723495.0, "step": 1980 }, { "epoch": 1.1145423919146547, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.7075605888728e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 219274880.0, "step": 1985 }, { "epoch": 1.1173498034811904, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.69748991974848e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 219824766.0, "step": 1990 }, { "epoch": 1.120157215047726, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.687406136432802e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 220383177.0, "step": 1995 }, { "epoch": 1.1229646266142617, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.677309327233985e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 220934640.0, "step": 2000 }, { "epoch": 1.1257720381807972, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.667199580574319e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 221487359.0, "step": 2005 }, { "epoch": 1.128579449747333, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.657076984989392e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 222040267.0, "step": 2010 }, { "epoch": 1.1313868613138687, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.64694162912732e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 222596288.0, "step": 2015 }, { "epoch": 1.1341942728804042, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.63679360174796e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 223157044.0, "step": 2020 }, { "epoch": 1.13700168444694, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.626632991722144e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 223713806.0, "step": 2025 }, { "epoch": 1.1398090960134755, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.616459888030895e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 224263145.0, "step": 2030 }, { "epoch": 1.1426165075800112, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.606274379764647e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 224822104.0, "step": 2035 }, { "epoch": 1.145423919146547, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.596076556122467e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 225376423.0, "step": 2040 }, { "epoch": 1.1482313307130825, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.585866506411275e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 225928705.0, "step": 2045 }, { "epoch": 1.1510387422796182, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.575644320045061e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 226475014.0, "step": 2050 }, { "epoch": 1.1538461538461537, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.565410086544095e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 227024366.0, "step": 2055 }, { "epoch": 1.1566535654126895, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.555163895534155e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 227580327.0, "step": 2060 }, { "epoch": 1.1594609769792252, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.544905836745734e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 228135136.0, "step": 2065 }, { "epoch": 1.1622683885457608, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.53463600001326e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 228688467.0, "step": 2070 }, { "epoch": 1.1650758001122965, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.524354475274298e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 229243340.0, "step": 2075 }, { "epoch": 1.167883211678832, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.514061352568778e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 229791445.0, "step": 2080 }, { "epoch": 1.1706906232453678, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.503756722038194e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 230348082.0, "step": 2085 }, { "epoch": 1.1734980348119035, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.493440673924822e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 230897423.0, "step": 2090 }, { "epoch": 1.176305446378439, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.483113298570925e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 231452200.0, "step": 2095 }, { "epoch": 1.1791128579449748, "grad_norm": 3.6716461181640625e-05, "learning_rate": 7.472774686417964e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 232005564.0, "step": 2100 }, { "epoch": 1.1819202695115103, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.462424928005804e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 232553803.0, "step": 2105 }, { "epoch": 1.184727681078046, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.45206411397193e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 233112910.0, "step": 2110 }, { "epoch": 1.1875350926445818, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.441692335050637e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 233662235.0, "step": 2115 }, { "epoch": 1.1903425042111173, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.431309682072249e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 234211802.0, "step": 2120 }, { "epoch": 1.193149915777653, "grad_norm": 3.6716461181640625e-05, "learning_rate": 7.420916245962317e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 234766357.0, "step": 2125 }, { "epoch": 1.1959573273441886, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.410512117740829e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 235312658.0, "step": 2130 }, { "epoch": 1.1987647389107243, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.4000973885214e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 235858427.0, "step": 2135 }, { "epoch": 1.20157215047726, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.389672149510497e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 236414940.0, "step": 2140 }, { "epoch": 1.2043795620437956, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.379236492006609e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 236968181.0, "step": 2145 }, { "epoch": 1.2071869736103313, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.368790507399478e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 237522495.0, "step": 2150 }, { "epoch": 1.2099943851768669, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.358334287169277e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 238071824.0, "step": 2155 }, { "epoch": 1.2128017967434026, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.347867922885818e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 238624403.0, "step": 2160 }, { "epoch": 1.2156092083099383, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.337391506207755e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 239172889.0, "step": 2165 }, { "epoch": 1.2184166198764739, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.326905128881771e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 239726973.0, "step": 2170 }, { "epoch": 1.2212240314430096, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.316408882741774e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 240281485.0, "step": 2175 }, { "epoch": 1.2240314430095451, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.305902859708108e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 240834091.0, "step": 2180 }, { "epoch": 1.2268388545760809, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.295387151786728e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 241380490.0, "step": 2185 }, { "epoch": 1.2296462661426166, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.284861851068411e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 241925513.0, "step": 2190 }, { "epoch": 1.2324536777091522, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.274327049727938e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 242473886.0, "step": 2195 }, { "epoch": 1.235261089275688, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.263782840023293e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 243028174.0, "step": 2200 }, { "epoch": 1.2380685008422234, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.253229314294854e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 243583784.0, "step": 2205 }, { "epoch": 1.2408759124087592, "grad_norm": 3.6716461181640625e-05, "learning_rate": 7.242666564964582e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 244137045.0, "step": 2210 }, { "epoch": 1.2436833239752947, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.232094684535214e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 244682853.0, "step": 2215 }, { "epoch": 1.2464907355418304, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.22151376558945e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 245236916.0, "step": 2220 }, { "epoch": 1.2492981471083662, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.210923900789148e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 245789883.0, "step": 2225 }, { "epoch": 1.2521055586749017, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.200325182874507e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 246345364.0, "step": 2230 }, { "epoch": 1.2549129702414374, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.189717704663257e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 246892607.0, "step": 2235 }, { "epoch": 1.2577203818079732, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.179101559049847e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 247442091.0, "step": 2240 }, { "epoch": 1.2605277933745087, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.168476839004628e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 248002077.0, "step": 2245 }, { "epoch": 1.2633352049410442, "grad_norm": 3.743171691894531e-05, "learning_rate": 7.15784363757304e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 248552163.0, "step": 2250 }, { "epoch": 1.26614261650758, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.147202047874803e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 249099457.0, "step": 2255 }, { "epoch": 1.2689500280741157, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.136552163103095e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 249653165.0, "step": 2260 }, { "epoch": 1.2717574396406512, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.125894076523733e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 250202923.0, "step": 2265 }, { "epoch": 1.274564851207187, "grad_norm": 3.6716461181640625e-05, "learning_rate": 7.115227881474371e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 250749595.0, "step": 2270 }, { "epoch": 1.2773722627737225, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.104553671363664e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 251300649.0, "step": 2275 }, { "epoch": 1.2801796743402583, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.093871539670455e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 251863817.0, "step": 2280 }, { "epoch": 1.282987085906794, "grad_norm": 3.6716461181640625e-05, "learning_rate": 7.083181579942975e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 252414100.0, "step": 2285 }, { "epoch": 1.2857944974733295, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.072483885797993e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 252963019.0, "step": 2290 }, { "epoch": 1.2886019090398653, "grad_norm": 3.695487976074219e-05, "learning_rate": 7.061778550920021e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 253514748.0, "step": 2295 }, { "epoch": 1.2914093206064008, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.051065669060481e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 254066475.0, "step": 2300 }, { "epoch": 1.2942167321729365, "grad_norm": 3.6716461181640625e-05, "learning_rate": 7.040345334036888e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 254617738.0, "step": 2305 }, { "epoch": 1.2970241437394723, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.029617639732026e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 255166490.0, "step": 2310 }, { "epoch": 1.2998315553060078, "grad_norm": 3.7670135498046875e-05, "learning_rate": 7.018882680093131e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 255719454.0, "step": 2315 }, { "epoch": 1.3026389668725435, "grad_norm": 3.719329833984375e-05, "learning_rate": 7.008140549131061e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 256278704.0, "step": 2320 }, { "epoch": 1.305446378439079, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.99739134091948e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 256835827.0, "step": 2325 }, { "epoch": 1.3082537900056148, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.986635149594029e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 257390899.0, "step": 2330 }, { "epoch": 1.3110612015721506, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.9758720693515e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 257935358.0, "step": 2335 }, { "epoch": 1.313868613138686, "grad_norm": 3.743171691894531e-05, "learning_rate": 6.965102194449021e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 258484634.0, "step": 2340 }, { "epoch": 1.3166760247052218, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.954325619203218e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 259038074.0, "step": 2345 }, { "epoch": 1.3194834362717573, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.943542437989402e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 259590605.0, "step": 2350 }, { "epoch": 1.322290847838293, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.932752745240725e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 260139109.0, "step": 2355 }, { "epoch": 1.3250982594048288, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.921956635447372e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 260684120.0, "step": 2360 }, { "epoch": 1.3279056709713644, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.911154203155722e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 261238273.0, "step": 2365 }, { "epoch": 1.3307130825379, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.900345542967523e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 261790081.0, "step": 2370 }, { "epoch": 1.3335204941044356, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.889530749539062e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 262344651.0, "step": 2375 }, { "epoch": 1.3363279056709714, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.878709917580342e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 262895947.0, "step": 2380 }, { "epoch": 1.3391353172375071, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.867883141854245e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 263444696.0, "step": 2385 }, { "epoch": 1.3419427288040426, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.857050517175702e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 263985620.0, "step": 2390 }, { "epoch": 1.3447501403705784, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.846212138410873e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 264541949.0, "step": 2395 }, { "epoch": 1.347557551937114, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.835368100476305e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 265091725.0, "step": 2400 }, { "epoch": 1.3503649635036497, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.824518498338104e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 265646692.0, "step": 2405 }, { "epoch": 1.3531723750701854, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.813663427011106e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 266200325.0, "step": 2410 }, { "epoch": 1.355979786636721, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.802802981558042e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 266751956.0, "step": 2415 }, { "epoch": 1.3587871982032567, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.79193725708871e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 267305428.0, "step": 2420 }, { "epoch": 1.3615946097697922, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.781066348759134e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 267863237.0, "step": 2425 }, { "epoch": 1.364402021336328, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.770190351770737e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 268405424.0, "step": 2430 }, { "epoch": 1.3672094329028637, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.75930936136951e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 268962476.0, "step": 2435 }, { "epoch": 1.3700168444693992, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.748423472845165e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 269511277.0, "step": 2440 }, { "epoch": 1.372824256035935, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.737532781530317e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 270067794.0, "step": 2445 }, { "epoch": 1.3756316676024705, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.726637382799634e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 270617188.0, "step": 2450 }, { "epoch": 1.3784390791690062, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.715737372069017e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 271171384.0, "step": 2455 }, { "epoch": 1.381246490735542, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.704832844794752e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 271723504.0, "step": 2460 }, { "epoch": 1.3840539023020775, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.693923896472678e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 272276911.0, "step": 2465 }, { "epoch": 1.3868613138686132, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.68301062263735e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 272831546.0, "step": 2470 }, { "epoch": 1.3896687254351487, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.672093118861207e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 273386934.0, "step": 2475 }, { "epoch": 1.3924761370016845, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.66117148075373e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 273942988.0, "step": 2480 }, { "epoch": 1.3952835485682202, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.650245803960605e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 274492948.0, "step": 2485 }, { "epoch": 1.3980909601347558, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.639316184162887e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 275048430.0, "step": 2490 }, { "epoch": 1.4008983717012913, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.628382717076166e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 275609903.0, "step": 2495 }, { "epoch": 1.403705783267827, "grad_norm": 3.600120544433594e-05, "learning_rate": 6.617445498449715e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 276162794.0, "step": 2500 }, { "epoch": 1.4065131948343628, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.606504624065669e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 276720878.0, "step": 2505 }, { "epoch": 1.4093206064008983, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.595560189738178e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 277280489.0, "step": 2510 }, { "epoch": 1.412128017967434, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.584612291312562e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 277833958.0, "step": 2515 }, { "epoch": 1.4149354295339696, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.573661024664484e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 278390354.0, "step": 2520 }, { "epoch": 1.4177428411005053, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.5627064856991e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 278946349.0, "step": 2525 }, { "epoch": 1.420550252667041, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.551748770350222e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 279496523.0, "step": 2530 }, { "epoch": 1.4233576642335766, "grad_norm": 3.743171691894531e-05, "learning_rate": 6.540787974579485e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 280054235.0, "step": 2535 }, { "epoch": 1.4261650758001123, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.529824194375499e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 280608058.0, "step": 2540 }, { "epoch": 1.4289724873666478, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.518857525753006e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 281168287.0, "step": 2545 }, { "epoch": 1.4317798989331836, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.507888064752043e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 281729266.0, "step": 2550 }, { "epoch": 1.4345873104997193, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.496915907437106e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 282283631.0, "step": 2555 }, { "epoch": 1.4373947220662548, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.485941149896301e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 282834189.0, "step": 2560 }, { "epoch": 1.4402021336327906, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.474963888240505e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 283389944.0, "step": 2565 }, { "epoch": 1.4430095451993261, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.463984218602527e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 283947933.0, "step": 2570 }, { "epoch": 1.4458169567658619, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.453002237136261e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 284499561.0, "step": 2575 }, { "epoch": 1.4486243683323976, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.442018040015847e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 285045837.0, "step": 2580 }, { "epoch": 1.4514317798989331, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.43103172343483e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 285601567.0, "step": 2585 }, { "epoch": 1.4542391914654689, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.420043383605316e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 286153887.0, "step": 2590 }, { "epoch": 1.4570466030320044, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.409053116757128e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 286704964.0, "step": 2595 }, { "epoch": 1.4598540145985401, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.398061019136963e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 287260332.0, "step": 2600 }, { "epoch": 1.4626614261650759, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.38706718700756e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 287813982.0, "step": 2605 }, { "epoch": 1.4654688377316114, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.376071716646837e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 288366276.0, "step": 2610 }, { "epoch": 1.4682762492981472, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.365074704347064e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 288916157.0, "step": 2615 }, { "epoch": 1.4710836608646827, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.354076246414013e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 289466792.0, "step": 2620 }, { "epoch": 1.4738910724312184, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.343076439166117e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 290019432.0, "step": 2625 }, { "epoch": 1.4766984839977542, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.332075378933626e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 290567948.0, "step": 2630 }, { "epoch": 1.4795058955642897, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.32107316205776e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 291118208.0, "step": 2635 }, { "epoch": 1.4823133071308254, "grad_norm": 3.647804260253906e-05, "learning_rate": 6.310069884889873e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 291669947.0, "step": 2640 }, { "epoch": 1.485120718697361, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.2990656437906e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 292222023.0, "step": 2645 }, { "epoch": 1.4879281302638967, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.28806053512902e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 292779409.0, "step": 2650 }, { "epoch": 1.4907355418304324, "grad_norm": 3.504753112792969e-05, "learning_rate": 6.27705465528181e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 293327558.0, "step": 2655 }, { "epoch": 1.493542953396968, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.266048100632398e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 293874913.0, "step": 2660 }, { "epoch": 1.4963503649635037, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.255040967570123e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 294430550.0, "step": 2665 }, { "epoch": 1.4991577765300392, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.244033352489392e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 294985975.0, "step": 2670 }, { "epoch": 1.501965188096575, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.233025351788829e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 295534856.0, "step": 2675 }, { "epoch": 1.5047725996631107, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.222017061870437e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 296085702.0, "step": 2680 }, { "epoch": 1.5075800112296462, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.211008579138753e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 296638985.0, "step": 2685 }, { "epoch": 1.5103874227961818, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.2e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 297190692.0, "step": 2690 }, { "epoch": 1.5131948343627175, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.188991420861248e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 297744984.0, "step": 2695 }, { "epoch": 1.5160022459292533, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.177982938129562e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 298295767.0, "step": 2700 }, { "epoch": 1.518809657495789, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.166974648211172e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 298850175.0, "step": 2705 }, { "epoch": 1.5216170690623245, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.155966647510609e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 299398676.0, "step": 2710 }, { "epoch": 1.52442448062886, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.144959032429878e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 299955257.0, "step": 2715 }, { "epoch": 1.5272318921953958, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.133951899367604e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 300510596.0, "step": 2720 }, { "epoch": 1.5300393037619315, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.122945344718191e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 301060103.0, "step": 2725 }, { "epoch": 1.5328467153284673, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.11193946487098e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 301614426.0, "step": 2730 }, { "epoch": 1.5356541268950028, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.1009343562094015e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 302165808.0, "step": 2735 }, { "epoch": 1.5384615384615383, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.089930115110129e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 302713974.0, "step": 2740 }, { "epoch": 1.541268950028074, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.07892683794224e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 303264275.0, "step": 2745 }, { "epoch": 1.5440763615946098, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.0679246210663754e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 303810859.0, "step": 2750 }, { "epoch": 1.5468837731611456, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.056923560833883e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 304365824.0, "step": 2755 }, { "epoch": 1.549691184727681, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.045923753585987e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 304915806.0, "step": 2760 }, { "epoch": 1.5524985962942166, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.034925295652936e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 305466950.0, "step": 2765 }, { "epoch": 1.5553060078607523, "grad_norm": 3.719329833984375e-05, "learning_rate": 6.023928283353163e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 306017854.0, "step": 2770 }, { "epoch": 1.558113419427288, "grad_norm": 3.695487976074219e-05, "learning_rate": 6.0129328129924395e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 306565847.0, "step": 2775 }, { "epoch": 1.5609208309938238, "grad_norm": 3.6716461181640625e-05, "learning_rate": 6.001938980863035e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 307117825.0, "step": 2780 }, { "epoch": 1.5637282425603594, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.990946883242872e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 307671452.0, "step": 2785 }, { "epoch": 1.5665356541268949, "grad_norm": 3.647804260253906e-05, "learning_rate": 5.979956616394685e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 308220590.0, "step": 2790 }, { "epoch": 1.5693430656934306, "grad_norm": 3.743171691894531e-05, "learning_rate": 5.9689682765651705e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 308779145.0, "step": 2795 }, { "epoch": 1.5721504772599664, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.9579819599841534e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 309326580.0, "step": 2800 }, { "epoch": 1.5749578888265021, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.94699776286374e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 309877499.0, "step": 2805 }, { "epoch": 1.5777653003930376, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.9360157813974725e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 310430339.0, "step": 2810 }, { "epoch": 1.5805727119595732, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.9250361117594944e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 310984529.0, "step": 2815 }, { "epoch": 1.583380123526109, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.914058850103699e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 311538779.0, "step": 2820 }, { "epoch": 1.5861875350926447, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.9030840925628945e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 312095458.0, "step": 2825 }, { "epoch": 1.5889949466591804, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.892111935247957e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 312642688.0, "step": 2830 }, { "epoch": 1.591802358225716, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.881142474246995e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 313191732.0, "step": 2835 }, { "epoch": 1.5946097697922514, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.8701758056245006e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 313739280.0, "step": 2840 }, { "epoch": 1.5974171813587872, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.8592120254205144e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 314288851.0, "step": 2845 }, { "epoch": 1.600224592925323, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.8482512296497785e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 314838453.0, "step": 2850 }, { "epoch": 1.6030320044918585, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.837293514300903e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 315386608.0, "step": 2855 }, { "epoch": 1.6058394160583942, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.826338975335519e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 315936204.0, "step": 2860 }, { "epoch": 1.6086468276249297, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.81538770868744e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 316478496.0, "step": 2865 }, { "epoch": 1.6114542391914655, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.804439810261824e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 317035720.0, "step": 2870 }, { "epoch": 1.6142616507580012, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.7934953759343324e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 317586504.0, "step": 2875 }, { "epoch": 1.6170690623245367, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.782554501550286e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 318141325.0, "step": 2880 }, { "epoch": 1.6198764738910725, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.7716172829238355e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 318689676.0, "step": 2885 }, { "epoch": 1.622683885457608, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.760683815837112e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 319241350.0, "step": 2890 }, { "epoch": 1.6254912970241437, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.749754196039396e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 319790792.0, "step": 2895 }, { "epoch": 1.6282987085906795, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.738828519246271e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 320343884.0, "step": 2900 }, { "epoch": 1.631106120157215, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.727906881138793e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 320898677.0, "step": 2905 }, { "epoch": 1.6339135317237508, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.716989377362651e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 321450248.0, "step": 2910 }, { "epoch": 1.6367209432902863, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.706076103527323e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 322005047.0, "step": 2915 }, { "epoch": 1.639528354856822, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.6951671552052476e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 322555438.0, "step": 2920 }, { "epoch": 1.6423357664233578, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.684262627930982e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 323106990.0, "step": 2925 }, { "epoch": 1.6451431779898933, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.673362617200365e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 323653012.0, "step": 2930 }, { "epoch": 1.6479505895564288, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.6624672184696846e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 324209101.0, "step": 2935 }, { "epoch": 1.6507580011229646, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.651576527154836e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 324759021.0, "step": 2940 }, { "epoch": 1.6535654126895003, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.640690638630491e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 325307331.0, "step": 2945 }, { "epoch": 1.656372824256036, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.629809648229262e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 325864942.0, "step": 2950 }, { "epoch": 1.6591802358225716, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.618933651240866e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 326418194.0, "step": 2955 }, { "epoch": 1.661987647389107, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.608062742911291e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 326967964.0, "step": 2960 }, { "epoch": 1.6647950589556428, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.597197018441958e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 327521938.0, "step": 2965 }, { "epoch": 1.6676024705221786, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.586336572988896e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 328081736.0, "step": 2970 }, { "epoch": 1.6704098820887143, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.5754815016618974e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 328633509.0, "step": 2975 }, { "epoch": 1.6732172936552498, "grad_norm": 3.62396240234375e-05, "learning_rate": 5.564631899523696e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 329189101.0, "step": 2980 }, { "epoch": 1.6760247052217854, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.5537878615891265e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 329744365.0, "step": 2985 }, { "epoch": 1.6788321167883211, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.542949482824298e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 330300716.0, "step": 2990 }, { "epoch": 1.6816395283548569, "grad_norm": 3.743171691894531e-05, "learning_rate": 5.5321168581457565e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 330854482.0, "step": 2995 }, { "epoch": 1.6844469399213926, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.521290082419658e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 331408338.0, "step": 3000 }, { "epoch": 1.6872543514879281, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.510469250460936e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 331966602.0, "step": 3005 }, { "epoch": 1.6900617630544637, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.499654457032477e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 332518416.0, "step": 3010 }, { "epoch": 1.6928691746209994, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.488845796844277e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 333072716.0, "step": 3015 }, { "epoch": 1.6956765861875351, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.478043364552627e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 333623915.0, "step": 3020 }, { "epoch": 1.6984839977540709, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.467247254759275e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 334174562.0, "step": 3025 }, { "epoch": 1.7012914093206064, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.456457562010599e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 334731138.0, "step": 3030 }, { "epoch": 1.704098820887142, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.445674380796781e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 335289277.0, "step": 3035 }, { "epoch": 1.7069062324536777, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.4348978055509787e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 335837634.0, "step": 3040 }, { "epoch": 1.7097136440202134, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.4241279306484995e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 336393892.0, "step": 3045 }, { "epoch": 1.7125210555867492, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.413364850405972e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 336950326.0, "step": 3050 }, { "epoch": 1.7153284671532847, "grad_norm": 3.743171691894531e-05, "learning_rate": 5.402608659080519e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 337494390.0, "step": 3055 }, { "epoch": 1.7181358787198202, "grad_norm": 3.743171691894531e-05, "learning_rate": 5.391859450868939e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 338046291.0, "step": 3060 }, { "epoch": 1.720943290286356, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.38111731990687e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 338598239.0, "step": 3065 }, { "epoch": 1.7237507018528917, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.370382360267973e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 339156514.0, "step": 3070 }, { "epoch": 1.7265581134194274, "grad_norm": 3.62396240234375e-05, "learning_rate": 5.359654665963112e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 339711188.0, "step": 3075 }, { "epoch": 1.729365524985963, "grad_norm": 3.62396240234375e-05, "learning_rate": 5.348934330939518e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 340262751.0, "step": 3080 }, { "epoch": 1.7321729365524985, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.338221449079979e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 340818765.0, "step": 3085 }, { "epoch": 1.7349803481190342, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.327516114202007e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 341370966.0, "step": 3090 }, { "epoch": 1.73778775968557, "grad_norm": 3.62396240234375e-05, "learning_rate": 5.316818420057026e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 341925735.0, "step": 3095 }, { "epoch": 1.7405951712521057, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.306128460329545e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 342480144.0, "step": 3100 }, { "epoch": 1.7434025828186412, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.295446328636339e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 343034774.0, "step": 3105 }, { "epoch": 1.7462099943851768, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.28477211852563e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 343589175.0, "step": 3110 }, { "epoch": 1.7490174059517125, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.274105923476266e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 344134755.0, "step": 3115 }, { "epoch": 1.7518248175182483, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.263447836896906e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 344697282.0, "step": 3120 }, { "epoch": 1.7546322290847838, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.2527979521251985e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 345253230.0, "step": 3125 }, { "epoch": 1.7574396406513195, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.242156362426959e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 345807371.0, "step": 3130 }, { "epoch": 1.760247052217855, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.2315231609953726e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 346360453.0, "step": 3135 }, { "epoch": 1.7630544637843908, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.2208984409501525e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 346909029.0, "step": 3140 }, { "epoch": 1.7658618753509265, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.210282295336742e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 347469332.0, "step": 3145 }, { "epoch": 1.768669286917462, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.199674817125492e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 348017936.0, "step": 3150 }, { "epoch": 1.7714766984839978, "grad_norm": 3.647804260253906e-05, "learning_rate": 5.189076099210852e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 348573678.0, "step": 3155 }, { "epoch": 1.7742841100505333, "grad_norm": 3.62396240234375e-05, "learning_rate": 5.1784862344105504e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 349130130.0, "step": 3160 }, { "epoch": 1.777091521617069, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.1679053154647877e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 349678519.0, "step": 3165 }, { "epoch": 1.7798989331836048, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.157333435035418e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 350234527.0, "step": 3170 }, { "epoch": 1.7827063447501403, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.1467706857051455e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 350787725.0, "step": 3175 }, { "epoch": 1.7855137563166759, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.1362171599767064e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 351340627.0, "step": 3180 }, { "epoch": 1.7883211678832116, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.1256729502720627e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 351897843.0, "step": 3185 }, { "epoch": 1.7911285794497473, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.115138148931589e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 352447744.0, "step": 3190 }, { "epoch": 1.793935991016283, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.104612848213272e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 352995330.0, "step": 3195 }, { "epoch": 1.7967434025828186, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.094097140291892e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 353549685.0, "step": 3200 }, { "epoch": 1.7995508141493541, "grad_norm": 3.719329833984375e-05, "learning_rate": 5.083591117258226e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 354103367.0, "step": 3205 }, { "epoch": 1.8023582257158899, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.0730948711182304e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 354648418.0, "step": 3210 }, { "epoch": 1.8051656372824256, "grad_norm": 3.695487976074219e-05, "learning_rate": 5.0626084937922445e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 355196624.0, "step": 3215 }, { "epoch": 1.8079730488489614, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.052132077114181e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 355750522.0, "step": 3220 }, { "epoch": 1.810780460415497, "grad_norm": 3.600120544433594e-05, "learning_rate": 5.041665712830725e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 356307216.0, "step": 3225 }, { "epoch": 1.8135878719820324, "grad_norm": 3.647804260253906e-05, "learning_rate": 5.031209492600523e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 356857378.0, "step": 3230 }, { "epoch": 1.8163952835485682, "grad_norm": 3.6716461181640625e-05, "learning_rate": 5.0207635079933916e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 357404700.0, "step": 3235 }, { "epoch": 1.819202695115104, "grad_norm": 3.647804260253906e-05, "learning_rate": 5.010327850489505e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 357952668.0, "step": 3240 }, { "epoch": 1.8220101066816397, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.9999026114785986e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 358503357.0, "step": 3245 }, { "epoch": 1.8248175182481752, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.9894878822591726e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 359056222.0, "step": 3250 }, { "epoch": 1.8276249298147107, "grad_norm": 3.552436828613281e-05, "learning_rate": 4.979083754037683e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 359613705.0, "step": 3255 }, { "epoch": 1.8304323413812464, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.9686903179277513e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 360174863.0, "step": 3260 }, { "epoch": 1.8332397529477822, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.9583076649493626e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 360725928.0, "step": 3265 }, { "epoch": 1.836047164514318, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.947935886028069e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 361277355.0, "step": 3270 }, { "epoch": 1.8388545760808535, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.937575071994194e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 361828968.0, "step": 3275 }, { "epoch": 1.841661987647389, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.927225313582036e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 362377333.0, "step": 3280 }, { "epoch": 1.8444693992139247, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.916886701429075e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 362927237.0, "step": 3285 }, { "epoch": 1.8472768107804605, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.906559326075178e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 363485255.0, "step": 3290 }, { "epoch": 1.8500842223469962, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.896243277961806e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 364043415.0, "step": 3295 }, { "epoch": 1.8528916339135317, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.885938647431222e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 364596780.0, "step": 3300 }, { "epoch": 1.8556990454800673, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.875645524725702e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 365147209.0, "step": 3305 }, { "epoch": 1.858506457046603, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.865363999986741e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 365707178.0, "step": 3310 }, { "epoch": 1.8613138686131387, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.855094163254265e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 366266619.0, "step": 3315 }, { "epoch": 1.8641212801796745, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.844836104465845e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 366819911.0, "step": 3320 }, { "epoch": 1.86692869174621, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.8345899134559056e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 367367717.0, "step": 3325 }, { "epoch": 1.8697361033127455, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.824355679954939e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 367919977.0, "step": 3330 }, { "epoch": 1.8725435148792813, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.8141334935887245e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 368469172.0, "step": 3335 }, { "epoch": 1.875350926445817, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.803923443877533e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 369023251.0, "step": 3340 }, { "epoch": 1.8781583380123528, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.793725620235354e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 369575387.0, "step": 3345 }, { "epoch": 1.8809657495788883, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.7835401119691063e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 370124260.0, "step": 3350 }, { "epoch": 1.8837731611454238, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.773367008277857e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 370681760.0, "step": 3355 }, { "epoch": 1.8865805727119596, "grad_norm": 3.719329833984375e-05, "learning_rate": 4.763206398252041e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 371233812.0, "step": 3360 }, { "epoch": 1.8893879842784953, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.7530583708726814e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 371786264.0, "step": 3365 }, { "epoch": 1.892195395845031, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.742923015010608e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 372340592.0, "step": 3370 }, { "epoch": 1.8950028074115666, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.7328004194256824e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 372901718.0, "step": 3375 }, { "epoch": 1.897810218978102, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.722690672766016e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 373459814.0, "step": 3380 }, { "epoch": 1.9006176305446378, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.712593863567198e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 374012660.0, "step": 3385 }, { "epoch": 1.9034250421111736, "grad_norm": 3.719329833984375e-05, "learning_rate": 4.702510080251521e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 374563720.0, "step": 3390 }, { "epoch": 1.906232453677709, "grad_norm": 3.528594970703125e-05, "learning_rate": 4.692439411127199e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 375120157.0, "step": 3395 }, { "epoch": 1.9090398652442448, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.682381944387602e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 375674586.0, "step": 3400 }, { "epoch": 1.9118472768107804, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.672337768110481e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 376225975.0, "step": 3405 }, { "epoch": 1.9146546883773161, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.662306970257195e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 376775438.0, "step": 3410 }, { "epoch": 1.9174620999438519, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.6522896386719435e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 377330791.0, "step": 3415 }, { "epoch": 1.9202695115103874, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.642285861080997e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 377881155.0, "step": 3420 }, { "epoch": 1.9230769230769231, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.632295725091922e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 378428703.0, "step": 3425 }, { "epoch": 1.9258843346434587, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.6223193181928266e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 378981316.0, "step": 3430 }, { "epoch": 1.9286917462099944, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.612356727751581e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 379532283.0, "step": 3435 }, { "epoch": 1.9314991577765301, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.602408041015065e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 380085586.0, "step": 3440 }, { "epoch": 1.9343065693430657, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.59247334510839e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 380641970.0, "step": 3445 }, { "epoch": 1.9371139809096012, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.5825527270341506e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 381190156.0, "step": 3450 }, { "epoch": 1.939921392476137, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.572646273671649e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 381739880.0, "step": 3455 }, { "epoch": 1.9427288040426727, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.562754071776145e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 382287862.0, "step": 3460 }, { "epoch": 1.9455362156092084, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.5528762079780894e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 382839030.0, "step": 3465 }, { "epoch": 1.948343627175744, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.543012768782372e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 383395963.0, "step": 3470 }, { "epoch": 1.9511510387422795, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.533163840567553e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 383948951.0, "step": 3475 }, { "epoch": 1.9539584503088152, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.523329509585121e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 384502145.0, "step": 3480 }, { "epoch": 1.956765861875351, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.5135098619587235e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 385059056.0, "step": 3485 }, { "epoch": 1.9595732734418867, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.503704983683424e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 385610430.0, "step": 3490 }, { "epoch": 1.9623806850084222, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.493914960624941e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 386163521.0, "step": 3495 }, { "epoch": 1.9651880965749577, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.484139878518903e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 386717202.0, "step": 3500 }, { "epoch": 1.9679955081414935, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.474379822970086e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 387270797.0, "step": 3505 }, { "epoch": 1.9708029197080292, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.464634879451685e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 387825786.0, "step": 3510 }, { "epoch": 1.973610331274565, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.454905133304538e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 388381012.0, "step": 3515 }, { "epoch": 1.9764177428411005, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.445190669736402e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 388934798.0, "step": 3520 }, { "epoch": 1.979225154407636, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.435491573821194e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 389492165.0, "step": 3525 }, { "epoch": 1.9820325659741718, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.425807930498249e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 390045964.0, "step": 3530 }, { "epoch": 1.9848399775407075, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.41613982457158e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 390596385.0, "step": 3535 }, { "epoch": 1.9876473891072433, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.406487340709128e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 391153904.0, "step": 3540 }, { "epoch": 1.9904548006737788, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.3968505634420296e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 391703027.0, "step": 3545 }, { "epoch": 1.9932622122403143, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.387229577163864e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 392251032.0, "step": 3550 }, { "epoch": 1.99606962380685, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.377624466129931e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 392796150.0, "step": 3555 }, { "epoch": 1.9988770353733858, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.368035314456496e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 393342272.0, "step": 3560 }, { "epoch": 2.0016844469399215, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.358462206120066e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 393855386.0, "step": 3565 }, { "epoch": 2.004491858506457, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.348905224956645e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 394415587.0, "step": 3570 }, { "epoch": 2.0072992700729926, "grad_norm": 3.62396240234375e-05, "learning_rate": 4.339364454661011e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 394974360.0, "step": 3575 }, { "epoch": 2.0101066816395283, "grad_norm": 3.62396240234375e-05, "learning_rate": 4.329839978785966e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 395525472.0, "step": 3580 }, { "epoch": 2.012914093206064, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.320331880741626e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 396074688.0, "step": 3585 }, { "epoch": 2.0157215047726, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.310840243794667e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 396626100.0, "step": 3590 }, { "epoch": 2.018528916339135, "grad_norm": 3.62396240234375e-05, "learning_rate": 4.30136515106762e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 397182264.0, "step": 3595 }, { "epoch": 2.021336327905671, "grad_norm": 3.719329833984375e-05, "learning_rate": 4.29190668553812e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 397736977.0, "step": 3600 }, { "epoch": 2.0241437394722066, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.2824649300382e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 398291111.0, "step": 3605 }, { "epoch": 2.0269511510387423, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.273039967253544e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 398843183.0, "step": 3610 }, { "epoch": 2.029758562605278, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.263631879722787e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 399400041.0, "step": 3615 }, { "epoch": 2.0325659741718134, "grad_norm": 3.62396240234375e-05, "learning_rate": 4.254240749836771e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 399954035.0, "step": 3620 }, { "epoch": 2.035373385738349, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.244866659837838e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 400511951.0, "step": 3625 }, { "epoch": 2.038180797304885, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.235509691819098e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 401060249.0, "step": 3630 }, { "epoch": 2.0409882088714206, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.2261699277237244e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 401604794.0, "step": 3635 }, { "epoch": 2.0437956204379564, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.21684744934422e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 402161143.0, "step": 3640 }, { "epoch": 2.0466030320044917, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.207542338321714e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 402708815.0, "step": 3645 }, { "epoch": 2.0494104435710274, "grad_norm": 3.695487976074219e-05, "learning_rate": 4.198254676145238e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 403255060.0, "step": 3650 }, { "epoch": 2.052217855137563, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.1889845441510214e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 403812590.0, "step": 3655 }, { "epoch": 2.055025266704099, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.179732023521768e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 404367889.0, "step": 3660 }, { "epoch": 2.0578326782706347, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.170497195285955e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 404923862.0, "step": 3665 }, { "epoch": 2.06064008983717, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.1612801403171195e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 405475338.0, "step": 3670 }, { "epoch": 2.0634475014037057, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.1520809393331454e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 406028058.0, "step": 3675 }, { "epoch": 2.0662549129702414, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.142899672895568e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 406578786.0, "step": 3680 }, { "epoch": 2.069062324536777, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.1337364214088556e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 407132846.0, "step": 3685 }, { "epoch": 2.071869736103313, "grad_norm": 3.719329833984375e-05, "learning_rate": 4.124591265119717e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 407682770.0, "step": 3690 }, { "epoch": 2.0746771476698482, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.1154642841163885e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 408230483.0, "step": 3695 }, { "epoch": 2.077484559236384, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.106355558327942e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 408781964.0, "step": 3700 }, { "epoch": 2.0802919708029197, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.097265167523573e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 409337735.0, "step": 3705 }, { "epoch": 2.0830993823694555, "grad_norm": 3.647804260253906e-05, "learning_rate": 4.088193191311917e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 409891184.0, "step": 3710 }, { "epoch": 2.085906793935991, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.0791397091403416e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 410443472.0, "step": 3715 }, { "epoch": 2.0887142055025265, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.070104800294253e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 410994454.0, "step": 3720 }, { "epoch": 2.0915216170690623, "grad_norm": 3.5762786865234375e-05, "learning_rate": 4.061088543896403e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 411549641.0, "step": 3725 }, { "epoch": 2.094329028635598, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.052091018906196e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 412093816.0, "step": 3730 }, { "epoch": 2.0971364402021337, "grad_norm": 3.600120544433594e-05, "learning_rate": 4.0431123041189955e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 412641540.0, "step": 3735 }, { "epoch": 2.0999438517686695, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.034152478165441e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 413187544.0, "step": 3740 }, { "epoch": 2.102751263335205, "grad_norm": 3.62396240234375e-05, "learning_rate": 4.025211619510744e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 413743197.0, "step": 3745 }, { "epoch": 2.1055586749017405, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.016289806454021e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 414292096.0, "step": 3750 }, { "epoch": 2.1083660864682763, "grad_norm": 3.6716461181640625e-05, "learning_rate": 4.00738711712759e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 414844835.0, "step": 3755 }, { "epoch": 2.111173498034812, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.998503629496302e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 415395689.0, "step": 3760 }, { "epoch": 2.1139809096013478, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.989639421356841e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 415945234.0, "step": 3765 }, { "epoch": 2.116788321167883, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.9807945703370595e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 416502352.0, "step": 3770 }, { "epoch": 2.119595732734419, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.971969153895285e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 417056171.0, "step": 3775 }, { "epoch": 2.1224031443009546, "grad_norm": 3.62396240234375e-05, "learning_rate": 3.963163249319653e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 417606441.0, "step": 3780 }, { "epoch": 2.1252105558674903, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.95437693372742e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 418165002.0, "step": 3785 }, { "epoch": 2.128017967434026, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.9456102840642973e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 418723608.0, "step": 3790 }, { "epoch": 2.1308253790005613, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.9368633771037685e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 419270681.0, "step": 3795 }, { "epoch": 2.133632790567097, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.928136289446426e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 419819889.0, "step": 3800 }, { "epoch": 2.136440202133633, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.919429097519291e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 420375837.0, "step": 3805 }, { "epoch": 2.1392476137001686, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.9107418775751554e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 420924439.0, "step": 3810 }, { "epoch": 2.1420550252667043, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.902074705691898e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 421479805.0, "step": 3815 }, { "epoch": 2.1448624368332396, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.8934276577718387e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 422037428.0, "step": 3820 }, { "epoch": 2.1476698483997754, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.884800809541053e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 422591274.0, "step": 3825 }, { "epoch": 2.150477259966311, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.8761942365487236e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 423148403.0, "step": 3830 }, { "epoch": 2.153284671532847, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.8676080141664755e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 423700476.0, "step": 3835 }, { "epoch": 2.156092083099382, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.859042217587709e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 424253758.0, "step": 3840 }, { "epoch": 2.158899494665918, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.8504969218269515e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 424804087.0, "step": 3845 }, { "epoch": 2.1617069062324537, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.8419722017191924e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 425365636.0, "step": 3850 }, { "epoch": 2.1645143177989894, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.8334681319192324e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 425920145.0, "step": 3855 }, { "epoch": 2.167321729365525, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.824984786901027e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 426462062.0, "step": 3860 }, { "epoch": 2.1701291409320604, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.8165222409570366e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 427011236.0, "step": 3865 }, { "epoch": 2.172936552498596, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.808080568197574e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 427564650.0, "step": 3870 }, { "epoch": 2.175743964065132, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.799659842550158e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 428110369.0, "step": 3875 }, { "epoch": 2.1785513756316677, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.791260137758859e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 428657566.0, "step": 3880 }, { "epoch": 2.1813587871982034, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.782881527383666e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 429208293.0, "step": 3885 }, { "epoch": 2.1841661987647387, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.774524084799829e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 429764935.0, "step": 3890 }, { "epoch": 2.1869736103312745, "grad_norm": 3.62396240234375e-05, "learning_rate": 3.766187883197224e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 430313872.0, "step": 3895 }, { "epoch": 2.18978102189781, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.757872995579709e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 430868907.0, "step": 3900 }, { "epoch": 2.192588433464346, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.749579494764489e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 431419983.0, "step": 3905 }, { "epoch": 2.1953958450308817, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.74130745338147e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 431974999.0, "step": 3910 }, { "epoch": 2.198203256597417, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.733056943872636e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 432530614.0, "step": 3915 }, { "epoch": 2.2010106681639527, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.724828038491397e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 433082665.0, "step": 3920 }, { "epoch": 2.2038180797304885, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.7166208093019734e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 433636089.0, "step": 3925 }, { "epoch": 2.2066254912970242, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.708435328178753e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 434185821.0, "step": 3930 }, { "epoch": 2.20943290286356, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.7002716668056716e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 434740820.0, "step": 3935 }, { "epoch": 2.2122403144300953, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.692129896675571e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 435291751.0, "step": 3940 }, { "epoch": 2.215047725996631, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.684010089089591e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 435843376.0, "step": 3945 }, { "epoch": 2.2178551375631668, "grad_norm": 3.552436828613281e-05, "learning_rate": 3.6759123151565285e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 436392388.0, "step": 3950 }, { "epoch": 2.2206625491297025, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.6678366457922266e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 436941750.0, "step": 3955 }, { "epoch": 2.2234699606962383, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.6597831517189436e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 437496915.0, "step": 3960 }, { "epoch": 2.2262773722627736, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.651751903464745e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 438052514.0, "step": 3965 }, { "epoch": 2.2290847838293093, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.643742971362873e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 438608859.0, "step": 3970 }, { "epoch": 2.231892195395845, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.635756425551144e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 439161038.0, "step": 3975 }, { "epoch": 2.234699606962381, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.6277923359713226e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 439715521.0, "step": 3980 }, { "epoch": 2.2375070185289165, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.619850772368516e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 440270663.0, "step": 3985 }, { "epoch": 2.240314430095452, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.6119318042905615e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 440820622.0, "step": 3990 }, { "epoch": 2.2431218416619876, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.604035501087416e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 441372621.0, "step": 3995 }, { "epoch": 2.2459292532285233, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.5961619319105524e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 441922822.0, "step": 4000 }, { "epoch": 2.248736664795059, "grad_norm": 3.4332275390625e-05, "learning_rate": 3.588311165712346e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 442468147.0, "step": 4005 }, { "epoch": 2.2515440763615944, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.580483271245486e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 443021043.0, "step": 4010 }, { "epoch": 2.25435148792813, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.5726783170623544e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 443568908.0, "step": 4015 }, { "epoch": 2.257158899494666, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.564896371514439e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 444118978.0, "step": 4020 }, { "epoch": 2.2599663110612016, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.557137502751728e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 444672648.0, "step": 4025 }, { "epoch": 2.2627737226277373, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.549401778722121e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 445220135.0, "step": 4030 }, { "epoch": 2.2655811341942727, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.541689267170821e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 445769695.0, "step": 4035 }, { "epoch": 2.2683885457608084, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.534000035639757e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 446322562.0, "step": 4040 }, { "epoch": 2.271195957327344, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.5263341514669786e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 446875618.0, "step": 4045 }, { "epoch": 2.27400336889388, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.518691681786076e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 447424094.0, "step": 4050 }, { "epoch": 2.2768107804604156, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.511072693525584e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 447970798.0, "step": 4055 }, { "epoch": 2.279618192026951, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.503477253408406e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 448530070.0, "step": 4060 }, { "epoch": 2.2824256035934867, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.495905427951216e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 449080883.0, "step": 4065 }, { "epoch": 2.2852330151600224, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.488357283463892e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 449631606.0, "step": 4070 }, { "epoch": 2.288040426726558, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.480832886048919e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 450180177.0, "step": 4075 }, { "epoch": 2.290847838293094, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.473332301600827e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 450734393.0, "step": 4080 }, { "epoch": 2.293655249859629, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.465855595805597e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 451286197.0, "step": 4085 }, { "epoch": 2.296462661426165, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.458402834140099e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 451840444.0, "step": 4090 }, { "epoch": 2.2992700729927007, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.450974081871512e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 452396502.0, "step": 4095 }, { "epoch": 2.3020774845592364, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.4435694040567535e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 452947008.0, "step": 4100 }, { "epoch": 2.304884896125772, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.43618886554191e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 453507120.0, "step": 4105 }, { "epoch": 2.3076923076923075, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.428832530961672e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 454056290.0, "step": 4110 }, { "epoch": 2.3104997192588432, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.42150046473876e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 454610107.0, "step": 4115 }, { "epoch": 2.313307130825379, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.414192731083373e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 455163278.0, "step": 4120 }, { "epoch": 2.3161145423919147, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.4069093939926105e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 455708115.0, "step": 4125 }, { "epoch": 2.3189219539584505, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.399650517249926e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 456254633.0, "step": 4130 }, { "epoch": 2.3217293655249858, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.3924161644245626e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 456804402.0, "step": 4135 }, { "epoch": 2.3245367770915215, "grad_norm": 3.528594970703125e-05, "learning_rate": 3.3852063988709934e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 457356066.0, "step": 4140 }, { "epoch": 2.3273441886580573, "grad_norm": 3.719329833984375e-05, "learning_rate": 3.378021283728372e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 457909549.0, "step": 4145 }, { "epoch": 2.330151600224593, "grad_norm": 3.504753112792969e-05, "learning_rate": 3.3708608819199756e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 458462908.0, "step": 4150 }, { "epoch": 2.3329590117911287, "grad_norm": 3.62396240234375e-05, "learning_rate": 3.363725256152659e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 459019310.0, "step": 4155 }, { "epoch": 2.335766423357664, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.3566144689162964e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 459569856.0, "step": 4160 }, { "epoch": 2.3385738349242, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.349528582483247e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 460122283.0, "step": 4165 }, { "epoch": 2.3413812464907355, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.342467658907796e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 460671252.0, "step": 4170 }, { "epoch": 2.3441886580572713, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.3354317600256214e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 461224519.0, "step": 4175 }, { "epoch": 2.346996069623807, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.328420947453246e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 461774292.0, "step": 4180 }, { "epoch": 2.3498034811903423, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.321435282587506e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 462324971.0, "step": 4185 }, { "epoch": 2.352610892756878, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.314474826605e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 462882605.0, "step": 4190 }, { "epoch": 2.355418304323414, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.307539640461568e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 463434314.0, "step": 4195 }, { "epoch": 2.3582257158899496, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.300629784891745e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 463983117.0, "step": 4200 }, { "epoch": 2.3610331274564853, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.2937453204082417e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 464531840.0, "step": 4205 }, { "epoch": 2.3638405390230206, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.286886307301399e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 465086777.0, "step": 4210 }, { "epoch": 2.3666479505895563, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.280052805638677e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 465637200.0, "step": 4215 }, { "epoch": 2.369455362156092, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.273244875264113e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 466187734.0, "step": 4220 }, { "epoch": 2.372262773722628, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.266462575797813e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 466738928.0, "step": 4225 }, { "epoch": 2.3750701852891636, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.259705966635416e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 467293894.0, "step": 4230 }, { "epoch": 2.377877596855699, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.252975106947581e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 467844924.0, "step": 4235 }, { "epoch": 2.3806850084222346, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.2462700556794665e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 468406343.0, "step": 4240 }, { "epoch": 2.3834924199887704, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.239590871550217e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 468955243.0, "step": 4245 }, { "epoch": 2.386299831555306, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.2329376130524454e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 469510244.0, "step": 4250 }, { "epoch": 2.389107243121842, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.226310338451722e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 470065469.0, "step": 4255 }, { "epoch": 2.391914654688377, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.2197091057860664e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 470621191.0, "step": 4260 }, { "epoch": 2.394722066254913, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.213133972865434e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 471173960.0, "step": 4265 }, { "epoch": 2.3975294778214487, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.206584997271219e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 471721395.0, "step": 4270 }, { "epoch": 2.4003368893879844, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.2000622363557336e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 472271956.0, "step": 4275 }, { "epoch": 2.40314430095452, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.193565747241729e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 472820362.0, "step": 4280 }, { "epoch": 2.4059517125210554, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.187095586821872e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 473370960.0, "step": 4285 }, { "epoch": 2.408759124087591, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.1806518117582644e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 473922648.0, "step": 4290 }, { "epoch": 2.411566535654127, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.174234478481934e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 474470657.0, "step": 4295 }, { "epoch": 2.4143739472206627, "grad_norm": 3.409385681152344e-05, "learning_rate": 3.167843643192352e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 475021208.0, "step": 4300 }, { "epoch": 2.4171813587871984, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.161479361856928e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 475572371.0, "step": 4305 }, { "epoch": 2.4199887703537337, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.155141690210532e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 476129454.0, "step": 4310 }, { "epoch": 2.4227961819202695, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.148830683754998e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 476684137.0, "step": 4315 }, { "epoch": 2.425603593486805, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.1425463977586445e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 477240244.0, "step": 4320 }, { "epoch": 2.428411005053341, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.136288887255781e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 477787923.0, "step": 4325 }, { "epoch": 2.4312184166198767, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.13005820704624e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 478337856.0, "step": 4330 }, { "epoch": 2.434025828186412, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.12385441169488e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 478892564.0, "step": 4335 }, { "epoch": 2.4368332397529477, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.117677555531126e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 479444225.0, "step": 4340 }, { "epoch": 2.4396406513194835, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.111527692648475e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 480002116.0, "step": 4345 }, { "epoch": 2.4424480628860192, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.1054048769040406e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 480542313.0, "step": 4350 }, { "epoch": 2.445255474452555, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.099309161918066e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 481093218.0, "step": 4355 }, { "epoch": 2.4480628860190903, "grad_norm": 3.5762786865234375e-05, "learning_rate": 3.093240601073465e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 481638497.0, "step": 4360 }, { "epoch": 2.450870297585626, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.087199247515347e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 482191203.0, "step": 4365 }, { "epoch": 2.4536777091521618, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.081185154150558e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 482744679.0, "step": 4370 }, { "epoch": 2.4564851207186975, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.075198373647212e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 483297916.0, "step": 4375 }, { "epoch": 2.4592925322852333, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.069238958434235e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 483853225.0, "step": 4380 }, { "epoch": 2.4620999438517686, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.063306960700897e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 484401633.0, "step": 4385 }, { "epoch": 2.4649073554183043, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.0574024323963676e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 484958826.0, "step": 4390 }, { "epoch": 2.46771476698484, "grad_norm": 3.62396240234375e-05, "learning_rate": 3.0515254252292517e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 485519579.0, "step": 4395 }, { "epoch": 2.470522178551376, "grad_norm": 3.695487976074219e-05, "learning_rate": 3.045675990667137e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 486074867.0, "step": 4400 }, { "epoch": 2.473329590117911, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.039854179936149e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 486622230.0, "step": 4405 }, { "epoch": 2.476137001684447, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.0340600440204953e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 487168498.0, "step": 4410 }, { "epoch": 2.4789444132509826, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.0282936336620264e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 487722028.0, "step": 4415 }, { "epoch": 2.4817518248175183, "grad_norm": 3.600120544433594e-05, "learning_rate": 3.0225549993597855e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 488285750.0, "step": 4420 }, { "epoch": 2.484559236384054, "grad_norm": 3.647804260253906e-05, "learning_rate": 3.01684419136957e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 488839075.0, "step": 4425 }, { "epoch": 2.4873666479505894, "grad_norm": 3.6716461181640625e-05, "learning_rate": 3.0111612597034867e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 489389237.0, "step": 4430 }, { "epoch": 2.490174059517125, "grad_norm": 3.62396240234375e-05, "learning_rate": 3.0055062541295205e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 489939543.0, "step": 4435 }, { "epoch": 2.492981471083661, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.999879224171091e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 490498563.0, "step": 4440 }, { "epoch": 2.4957888826501966, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.994280219106629e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 491054881.0, "step": 4445 }, { "epoch": 2.4985962942167323, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.9887092879691318e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 491610500.0, "step": 4450 }, { "epoch": 2.501403705783268, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.9831664795457446e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 492166822.0, "step": 4455 }, { "epoch": 2.5042111173498034, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.977651842377329e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 492718184.0, "step": 4460 }, { "epoch": 2.507018528916339, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.9721654247580383e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 493269291.0, "step": 4465 }, { "epoch": 2.509825940482875, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.9667072747348922e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 493824296.0, "step": 4470 }, { "epoch": 2.51263335204941, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.961277440107363e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 494379518.0, "step": 4475 }, { "epoch": 2.5154407636159464, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.9558759684269465e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 494935559.0, "step": 4480 }, { "epoch": 2.5182481751824817, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.9505029069967577e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 495490094.0, "step": 4485 }, { "epoch": 2.5210555867490174, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.945158302871104e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 496047710.0, "step": 4490 }, { "epoch": 2.523862998315553, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.9398422028550858e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 496596973.0, "step": 4495 }, { "epoch": 2.5266704098820885, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.934554653504175e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 497144172.0, "step": 4500 }, { "epoch": 2.529477821448624, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.9292957011238153e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 497698226.0, "step": 4505 }, { "epoch": 2.53228523301516, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.9240653917690116e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 498249131.0, "step": 4510 }, { "epoch": 2.5350926445816957, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.918863771243932e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 498798216.0, "step": 4515 }, { "epoch": 2.5379000561482314, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.913690885101503e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 499351434.0, "step": 4520 }, { "epoch": 2.5407074677147667, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.9085467786430075e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 499910114.0, "step": 4525 }, { "epoch": 2.5435148792813025, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.9034314969176974e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 500467379.0, "step": 4530 }, { "epoch": 2.5463222908478382, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.8983450847223893e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 501014665.0, "step": 4535 }, { "epoch": 2.549129702414374, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.8932875866010782e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 501570276.0, "step": 4540 }, { "epoch": 2.5519371139809097, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.888259046844544e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 502113861.0, "step": 4545 }, { "epoch": 2.554744525547445, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.8832595094899683e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 502671432.0, "step": 4550 }, { "epoch": 2.5575519371139808, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.878289018320542e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 503231399.0, "step": 4555 }, { "epoch": 2.5603593486805165, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.8733476168650887e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 503787894.0, "step": 4560 }, { "epoch": 2.5631667602470523, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.8684353483976765e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 504343672.0, "step": 4565 }, { "epoch": 2.565974171813588, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.8635522559372482e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 504899932.0, "step": 4570 }, { "epoch": 2.5687815833801233, "grad_norm": 3.62396240234375e-05, "learning_rate": 2.858698382247234e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 505456280.0, "step": 4575 }, { "epoch": 2.571588994946659, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.853873769835185e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 506011141.0, "step": 4580 }, { "epoch": 2.574396406513195, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.8490784609523975e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 506563586.0, "step": 4585 }, { "epoch": 2.5772038180797305, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.8443124975935442e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 507120320.0, "step": 4590 }, { "epoch": 2.5800112296462663, "grad_norm": 3.62396240234375e-05, "learning_rate": 2.8395759214963045e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 507675333.0, "step": 4595 }, { "epoch": 2.5828186412128016, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.8348687741410022e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 508225578.0, "step": 4600 }, { "epoch": 2.5856260527793373, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.8301910967502386e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 508779780.0, "step": 4605 }, { "epoch": 2.588433464345873, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.825542930288535e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 509334766.0, "step": 4610 }, { "epoch": 2.591240875912409, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.8209243154619703e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 509885085.0, "step": 4615 }, { "epoch": 2.5940482874789446, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.8163352927178284e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 510434191.0, "step": 4620 }, { "epoch": 2.59685569904548, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.81177590224424e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 510992007.0, "step": 4625 }, { "epoch": 2.5996631106120156, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.807246183969836e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 511535458.0, "step": 4630 }, { "epoch": 2.6024705221785513, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.80274617756339e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 512096261.0, "step": 4635 }, { "epoch": 2.605277933745087, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.79827592243348e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 512650203.0, "step": 4640 }, { "epoch": 2.608085345311623, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.793835457728136e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 513200923.0, "step": 4645 }, { "epoch": 2.610892756878158, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.789424822334499e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 513756141.0, "step": 4650 }, { "epoch": 2.613700168444694, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.785044054878485e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 514318777.0, "step": 4655 }, { "epoch": 2.6165075800112296, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.780693193724439e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 514877191.0, "step": 4660 }, { "epoch": 2.6193149915777654, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.7763722769748062e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 515423610.0, "step": 4665 }, { "epoch": 2.622122403144301, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.772081342469793e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 515970305.0, "step": 4670 }, { "epoch": 2.6249298147108364, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.76782042778704e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 516524519.0, "step": 4675 }, { "epoch": 2.627737226277372, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.7635895702412877e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 517071784.0, "step": 4680 }, { "epoch": 2.630544637843908, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.759388806884057e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 517625722.0, "step": 4685 }, { "epoch": 2.6333520494104437, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.7552181745033163e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 518181488.0, "step": 4690 }, { "epoch": 2.6361594609769794, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.7510777096231655e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 518735057.0, "step": 4695 }, { "epoch": 2.6389668725435147, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.7469674485035143e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 519284341.0, "step": 4700 }, { "epoch": 2.6417742841100504, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.7428874271397647e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 519844052.0, "step": 4705 }, { "epoch": 2.644581695676586, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.7388376812624932e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 520393755.0, "step": 4710 }, { "epoch": 2.647389107243122, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.7348182463371448e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 520939485.0, "step": 4715 }, { "epoch": 2.6501965188096577, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.7308291575637122e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 521490972.0, "step": 4720 }, { "epoch": 2.653003930376193, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.726870449876439e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 522046812.0, "step": 4725 }, { "epoch": 2.6558113419427287, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.7229421579435037e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 522599824.0, "step": 4730 }, { "epoch": 2.6586187535092645, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.719044316166723e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 523152229.0, "step": 4735 }, { "epoch": 2.6614261650758, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.7151769586812447e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 523710114.0, "step": 4740 }, { "epoch": 2.664233576642336, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.7113401193552564e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 524259379.0, "step": 4745 }, { "epoch": 2.6670409882088713, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.707533831789681e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 524811545.0, "step": 4750 }, { "epoch": 2.669848399775407, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.7037581293178877e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 525362716.0, "step": 4755 }, { "epoch": 2.6726558113419427, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.700013045005396e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 525914647.0, "step": 4760 }, { "epoch": 2.6754632229084785, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.696298611649593e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 526460363.0, "step": 4765 }, { "epoch": 2.6782706344750142, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.6926148617794374e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 527013619.0, "step": 4770 }, { "epoch": 2.6810780460415495, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.6889618276551795e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 527569667.0, "step": 4775 }, { "epoch": 2.6838854576080853, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.6853395412680797e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 528117310.0, "step": 4780 }, { "epoch": 2.686692869174621, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.6817480343401255e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 528672741.0, "step": 4785 }, { "epoch": 2.6895002807411568, "grad_norm": 3.62396240234375e-05, "learning_rate": 2.6781873383237548e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 529231565.0, "step": 4790 }, { "epoch": 2.6923076923076925, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.6746574844015817e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 529784538.0, "step": 4795 }, { "epoch": 2.695115103874228, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.67115850348612e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999523162841, "num_tokens": 530344021.0, "step": 4800 }, { "epoch": 2.6979225154407636, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6676904262195166e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 530898167.0, "step": 4805 }, { "epoch": 2.7007299270072993, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6642532829732803e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 531445249.0, "step": 4810 }, { "epoch": 2.703537338573835, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6608471038480187e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 532000652.0, "step": 4815 }, { "epoch": 2.706344750140371, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6574719186731696e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 532546443.0, "step": 4820 }, { "epoch": 2.709152161706906, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.6541277570067448e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 533100076.0, "step": 4825 }, { "epoch": 2.711959573273442, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.65081464813507e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 533648568.0, "step": 4830 }, { "epoch": 2.7147669848399776, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6475326210725255e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 534205768.0, "step": 4835 }, { "epoch": 2.7175743964065133, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6442817045612965e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 534761319.0, "step": 4840 }, { "epoch": 2.720381807973049, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.6410619270711157e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 535316032.0, "step": 4845 }, { "epoch": 2.7231892195395844, "grad_norm": 3.4809112548828125e-05, "learning_rate": 2.6378733167990227e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 535871594.0, "step": 4850 }, { "epoch": 2.72599663110612, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.6347159016691074e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 536425028.0, "step": 4855 }, { "epoch": 2.728804042672656, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.631589709332271e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 536980077.0, "step": 4860 }, { "epoch": 2.7316114542391916, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.6284947671659832e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 537532742.0, "step": 4865 }, { "epoch": 2.7344188658057273, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.625431102274042e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 538084924.0, "step": 4870 }, { "epoch": 2.7372262773722627, "grad_norm": 3.337860107421875e-05, "learning_rate": 2.622398741486335e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 538639412.0, "step": 4875 }, { "epoch": 2.7400336889387984, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6193977113586082e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 539191297.0, "step": 4880 }, { "epoch": 2.742841100505334, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.6164280381722277e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 539744336.0, "step": 4885 }, { "epoch": 2.74564851207187, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.613489747933956e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 540297982.0, "step": 4890 }, { "epoch": 2.7484559236384056, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.6105828663757183e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 540853227.0, "step": 4895 }, { "epoch": 2.751263335204941, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.6077074189543822e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 541409567.0, "step": 4900 }, { "epoch": 2.7540707467714767, "grad_norm": 3.695487976074219e-05, "learning_rate": 2.6048634308515305e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 541966124.0, "step": 4905 }, { "epoch": 2.7568781583380124, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.6020509269732445e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 542521286.0, "step": 4910 }, { "epoch": 2.759685569904548, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5992699319498815e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 543063982.0, "step": 4915 }, { "epoch": 2.762492981471084, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.5965204701358646e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 543618225.0, "step": 4920 }, { "epoch": 2.765300393037619, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.593802565609464e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 544171246.0, "step": 4925 }, { "epoch": 2.768107804604155, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5911162421725903e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 544710993.0, "step": 4930 }, { "epoch": 2.7709152161706907, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.588461523350583e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 545264957.0, "step": 4935 }, { "epoch": 2.7737226277372264, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.585838432392007e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 545817184.0, "step": 4940 }, { "epoch": 2.776530039303762, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.5832469922684454e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 546368057.0, "step": 4945 }, { "epoch": 2.7793374508702975, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.5806872256743047e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 546921971.0, "step": 4950 }, { "epoch": 2.7821448624368332, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.5781591550266094e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 547476877.0, "step": 4955 }, { "epoch": 2.784952274003369, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.5756628024648076e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 548027287.0, "step": 4960 }, { "epoch": 2.7877596855699043, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.57319818985058e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 548572683.0, "step": 4965 }, { "epoch": 2.7905670971364405, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.570765338767646e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 549127107.0, "step": 4970 }, { "epoch": 2.7933745087029758, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.568364270521573e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 549684823.0, "step": 4975 }, { "epoch": 2.7961819202695115, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.5659950061395948e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 550234676.0, "step": 4980 }, { "epoch": 2.7989893318360473, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5636575663704226e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 550779576.0, "step": 4985 }, { "epoch": 2.8017967434025826, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5613519716840653e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 551334644.0, "step": 4990 }, { "epoch": 2.8046041549691187, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.55907824227165e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 551881166.0, "step": 4995 }, { "epoch": 2.807411566535654, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.556836398045247e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 552430559.0, "step": 5000 }, { "epoch": 2.81021897810219, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.554626458637691e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 552987223.0, "step": 5005 }, { "epoch": 2.8130263896687255, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.552448443402414e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 553531494.0, "step": 5010 }, { "epoch": 2.815833801235261, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.550302371413273e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 554084782.0, "step": 5015 }, { "epoch": 2.8186412128017966, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.548188261464384e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 554632393.0, "step": 5020 }, { "epoch": 2.8214486243683323, "grad_norm": 3.4809112548828125e-05, "learning_rate": 2.5461061320699555e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 555190522.0, "step": 5025 }, { "epoch": 2.824256035934868, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.5440560014641302e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 555744351.0, "step": 5030 }, { "epoch": 2.827063447501404, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.542037887600822e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 556297519.0, "step": 5035 }, { "epoch": 2.829870859067939, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.5400518081535596e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 556845909.0, "step": 5040 }, { "epoch": 2.832678270634475, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5380977805153318e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 557400612.0, "step": 5045 }, { "epoch": 2.8354856822010106, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.5361758217984356e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 557957824.0, "step": 5050 }, { "epoch": 2.8382930937675463, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5342859488343268e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 558507879.0, "step": 5055 }, { "epoch": 2.841100505334082, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.5324281781734712e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 559053864.0, "step": 5060 }, { "epoch": 2.8439079169006174, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5306025260851995e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 559608757.0, "step": 5065 }, { "epoch": 2.846715328467153, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.528809008557567e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 560152508.0, "step": 5070 }, { "epoch": 2.849522740033689, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.527047641297212e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 560709603.0, "step": 5075 }, { "epoch": 2.8523301516002246, "grad_norm": 3.62396240234375e-05, "learning_rate": 2.5253184397292168e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 561264021.0, "step": 5080 }, { "epoch": 2.8551375631667604, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5236214189969777e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 561819249.0, "step": 5085 }, { "epoch": 2.8579449747332957, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.521956593962065e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 562363845.0, "step": 5090 }, { "epoch": 2.8607523862998314, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5203239792040996e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 562907165.0, "step": 5095 }, { "epoch": 2.863559797866367, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.518723589020622e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 563464021.0, "step": 5100 }, { "epoch": 2.866367209432903, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.517155437426968e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 564013299.0, "step": 5105 }, { "epoch": 2.8691746209994387, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5156195381561432e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 564569494.0, "step": 5110 }, { "epoch": 2.871982032565974, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5141159046587077e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 565127735.0, "step": 5115 }, { "epoch": 2.8747894441325097, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.5126445501026548e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 565678334.0, "step": 5120 }, { "epoch": 2.8775968556990454, "grad_norm": 3.504753112792969e-05, "learning_rate": 2.5112054873732968e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 566229333.0, "step": 5125 }, { "epoch": 2.880404267265581, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5097987290731527e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 566783012.0, "step": 5130 }, { "epoch": 2.883211678832117, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.5084242875218346e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 567334001.0, "step": 5135 }, { "epoch": 2.8860190903986522, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.507082174755946e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 567888452.0, "step": 5140 }, { "epoch": 2.888826501965188, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.5057724025289695e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 568436097.0, "step": 5145 }, { "epoch": 2.8916339135317237, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.504494982311169e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 568987904.0, "step": 5150 }, { "epoch": 2.8944413250982595, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.5032499252894858e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 569539803.0, "step": 5155 }, { "epoch": 2.897248736664795, "grad_norm": 3.4809112548828125e-05, "learning_rate": 2.5020372423674444e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 570098733.0, "step": 5160 }, { "epoch": 2.9000561482313305, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.500856944165053e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 570646446.0, "step": 5165 }, { "epoch": 2.9028635597978663, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4997090410187124e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 571200890.0, "step": 5170 }, { "epoch": 2.905670971364402, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.498593542981125e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 571755411.0, "step": 5175 }, { "epoch": 2.9084783829309377, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4975104598212094e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 572309313.0, "step": 5180 }, { "epoch": 2.9112857944974735, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4964598010240096e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 572857278.0, "step": 5185 }, { "epoch": 2.914093206064009, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.4954415757906173e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 573405011.0, "step": 5190 }, { "epoch": 2.9169006176305445, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4944557930380868e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 573954973.0, "step": 5195 }, { "epoch": 2.9197080291970803, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.493502461399361e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 574501076.0, "step": 5200 }, { "epoch": 2.922515440763616, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.4925815892231925e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 575052705.0, "step": 5205 }, { "epoch": 2.9253228523301518, "grad_norm": 3.504753112792969e-05, "learning_rate": 2.491693184574072e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 575606312.0, "step": 5210 }, { "epoch": 2.928130263896687, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.490837255232159e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 576159265.0, "step": 5215 }, { "epoch": 2.930937675463223, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.49001380869321e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 576711256.0, "step": 5220 }, { "epoch": 2.9337450870297586, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.4892228521685148e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 577257255.0, "step": 5225 }, { "epoch": 2.9365524985962943, "grad_norm": 3.504753112792969e-05, "learning_rate": 2.4884643925848374e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 577811491.0, "step": 5230 }, { "epoch": 2.93935991016283, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.4877384365843467e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 578366927.0, "step": 5235 }, { "epoch": 2.9421673217293653, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4870449905245658e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 578922020.0, "step": 5240 }, { "epoch": 2.944974733295901, "grad_norm": 3.504753112792969e-05, "learning_rate": 2.4863840604783134e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 579470485.0, "step": 5245 }, { "epoch": 2.947782144862437, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4857556522336498e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 580024767.0, "step": 5250 }, { "epoch": 2.9505895564289726, "grad_norm": 3.600120544433594e-05, "learning_rate": 2.485159771293829e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 580576669.0, "step": 5255 }, { "epoch": 2.9533969679955083, "grad_norm": 3.528594970703125e-05, "learning_rate": 2.4845964228772473e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 581130341.0, "step": 5260 }, { "epoch": 2.9562043795620436, "grad_norm": 3.504753112792969e-05, "learning_rate": 2.4840656119173992e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 581687010.0, "step": 5265 }, { "epoch": 2.9590117911285794, "grad_norm": 3.504753112792969e-05, "learning_rate": 2.483567343062836e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999642372132, "num_tokens": 582240438.0, "step": 5270 }, { "epoch": 2.961819202695115, "grad_norm": 3.4332275390625e-05, "learning_rate": 2.4831016206771202e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 582806082.0, "step": 5275 }, { "epoch": 2.964626614261651, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4826684488387922e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 583363300.0, "step": 5280 }, { "epoch": 2.9674340258281866, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.4822678313413326e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 583914687.0, "step": 5285 }, { "epoch": 2.970241437394722, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.481899771693128e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 584471274.0, "step": 5290 }, { "epoch": 2.9730488489612577, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.4815642731174427e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 585030923.0, "step": 5295 }, { "epoch": 2.9758562605277934, "grad_norm": 3.6716461181640625e-05, "learning_rate": 2.4812613385523877e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 585584791.0, "step": 5300 }, { "epoch": 2.978663672094329, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4809909706508978e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 586135923.0, "step": 5305 }, { "epoch": 2.981471083660865, "grad_norm": 3.552436828613281e-05, "learning_rate": 2.4807531717807047e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 586685667.0, "step": 5310 }, { "epoch": 2.9842784952274, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.4805479440243207e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999761581421, "num_tokens": 587238144.0, "step": 5315 }, { "epoch": 2.987085906793936, "grad_norm": 3.647804260253906e-05, "learning_rate": 2.480375289179017e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 587789476.0, "step": 5320 }, { "epoch": 2.9898933183604717, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.4802352087568106e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 588345607.0, "step": 5325 }, { "epoch": 2.9927007299270074, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.4801277039844473e-05, "loss": 0.0, "mean_token_accuracy": 0.9999999880790711, "num_tokens": 588898982.0, "step": 5330 }, { "epoch": 2.995508141493543, "grad_norm": 3.4809112548828125e-05, "learning_rate": 2.4800527758033947e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 589442767.0, "step": 5335 }, { "epoch": 2.9983155530600785, "grad_norm": 3.5762786865234375e-05, "learning_rate": 2.480010424869834e-05, "loss": 0.0, "mean_token_accuracy": 1.0, "num_tokens": 589989550.0, "step": 5340 } ], "logging_steps": 5, "max_steps": 5343, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2669368762683776e+18, "train_batch_size": 140, "trial_name": null, "trial_params": null }