env_44ce73d_env / trainer_state.json
bimabk's picture
Upload task output 1
36a3eb3 verified
Raw
History Blame Contribute Delete
270 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9983155530600785,
"eval_steps": 500,
"global_step": 5340,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002807411566535654,
"grad_norm": 77.0,
"learning_rate": 1.1337142857142857e-05,
"loss": 3.9069,
"mean_token_accuracy": 0.6125709056854248,
"num_tokens": 549878.0,
"step": 5
},
{
"epoch": 0.005614823133071308,
"grad_norm": 76.0,
"learning_rate": 2.5508571428571426e-05,
"loss": 3.167,
"mean_token_accuracy": 0.6638837218284607,
"num_tokens": 1097019.0,
"step": 10
},
{
"epoch": 0.008422234699606962,
"grad_norm": 86.5,
"learning_rate": 3.968e-05,
"loss": 2.5639,
"mean_token_accuracy": 0.6666666626930237,
"num_tokens": 1652837.0,
"step": 15
},
{
"epoch": 0.011229646266142616,
"grad_norm": 75.5,
"learning_rate": 5.3851428571428566e-05,
"loss": 1.8468,
"mean_token_accuracy": 0.9177083373069763,
"num_tokens": 2195895.0,
"step": 20
},
{
"epoch": 0.01403705783267827,
"grad_norm": 56.25,
"learning_rate": 6.802285714285715e-05,
"loss": 0.8546,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 2748699.0,
"step": 25
},
{
"epoch": 0.016844469399213923,
"grad_norm": 2.8125,
"learning_rate": 8.219428571428572e-05,
"loss": 0.1105,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 3302441.0,
"step": 30
},
{
"epoch": 0.019651880965749578,
"grad_norm": 0.02294921875,
"learning_rate": 9.636571428571428e-05,
"loss": 0.0015,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 3858150.0,
"step": 35
},
{
"epoch": 0.022459292532285232,
"grad_norm": 0.0009613037109375,
"learning_rate": 9.919989575130165e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 4410061.0,
"step": 40
},
{
"epoch": 0.025266704098820886,
"grad_norm": 0.00016498565673828125,
"learning_rate": 9.919947224196606e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 4958280.0,
"step": 45
},
{
"epoch": 0.02807411566535654,
"grad_norm": 7.581710815429688e-05,
"learning_rate": 9.919872296015554e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 5500380.0,
"step": 50
},
{
"epoch": 0.030881527231892195,
"grad_norm": 6.031990051269531e-05,
"learning_rate": 9.91976479124319e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 6053192.0,
"step": 55
},
{
"epoch": 0.033688938798427846,
"grad_norm": 4.76837158203125e-05,
"learning_rate": 9.919624710820983e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 6614263.0,
"step": 60
},
{
"epoch": 0.0364963503649635,
"grad_norm": 4.5299530029296875e-05,
"learning_rate": 9.91945205597568e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 7163358.0,
"step": 65
},
{
"epoch": 0.039303761931499155,
"grad_norm": 4.7206878662109375e-05,
"learning_rate": 9.919246828219295e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 7711995.0,
"step": 70
},
{
"epoch": 0.04211117349803481,
"grad_norm": 4.792213439941406e-05,
"learning_rate": 9.919009029349102e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 8258045.0,
"step": 75
},
{
"epoch": 0.044918585064570464,
"grad_norm": 4.696846008300781e-05,
"learning_rate": 9.918738661447612e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 8814280.0,
"step": 80
},
{
"epoch": 0.04772599663110612,
"grad_norm": 4.506111145019531e-05,
"learning_rate": 9.918435726882557e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 9371728.0,
"step": 85
},
{
"epoch": 0.05053340819764177,
"grad_norm": 4.601478576660156e-05,
"learning_rate": 9.918100228306871e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 9924338.0,
"step": 90
},
{
"epoch": 0.05334081976417743,
"grad_norm": 4.673004150390625e-05,
"learning_rate": 9.917732168658667e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 10478099.0,
"step": 95
},
{
"epoch": 0.05614823133071308,
"grad_norm": 4.601478576660156e-05,
"learning_rate": 9.917331551161207e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 11030371.0,
"step": 100
},
{
"epoch": 0.058955642897248736,
"grad_norm": 4.410743713378906e-05,
"learning_rate": 9.91689837932288e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 11584083.0,
"step": 105
},
{
"epoch": 0.06176305446378439,
"grad_norm": 4.458427429199219e-05,
"learning_rate": 9.916432656937164e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 12131615.0,
"step": 110
},
{
"epoch": 0.06457046603032005,
"grad_norm": 4.553794860839844e-05,
"learning_rate": 9.9159343880826e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 12680511.0,
"step": 115
},
{
"epoch": 0.06737787759685569,
"grad_norm": 4.3392181396484375e-05,
"learning_rate": 9.915403577122753e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 13232222.0,
"step": 120
},
{
"epoch": 0.07018528916339135,
"grad_norm": 4.649162292480469e-05,
"learning_rate": 9.914840228706172e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 13783380.0,
"step": 125
},
{
"epoch": 0.072992700729927,
"grad_norm": 4.38690185546875e-05,
"learning_rate": 9.914244347766351e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 14335613.0,
"step": 130
},
{
"epoch": 0.07580011229646266,
"grad_norm": 4.482269287109375e-05,
"learning_rate": 9.913615939521687e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 14887914.0,
"step": 135
},
{
"epoch": 0.07860752386299831,
"grad_norm": 4.3392181396484375e-05,
"learning_rate": 9.912955009475434e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 15436839.0,
"step": 140
},
{
"epoch": 0.08141493542953397,
"grad_norm": 4.5299530029296875e-05,
"learning_rate": 9.912261563415655e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 15986614.0,
"step": 145
},
{
"epoch": 0.08422234699606962,
"grad_norm": 4.482269287109375e-05,
"learning_rate": 9.911535607415163e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 16539455.0,
"step": 150
},
{
"epoch": 0.08702975856260528,
"grad_norm": 4.38690185546875e-05,
"learning_rate": 9.910777147831485e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 17093848.0,
"step": 155
},
{
"epoch": 0.08983717012914093,
"grad_norm": 4.744529724121094e-05,
"learning_rate": 9.90998619130679e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 17650597.0,
"step": 160
},
{
"epoch": 0.09264458169567659,
"grad_norm": 4.1961669921875e-05,
"learning_rate": 9.90916274476784e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 18201538.0,
"step": 165
},
{
"epoch": 0.09545199326221224,
"grad_norm": 4.2438507080078125e-05,
"learning_rate": 9.908306815425927e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 18756478.0,
"step": 170
},
{
"epoch": 0.0982594048287479,
"grad_norm": 4.3392181396484375e-05,
"learning_rate": 9.907418410776807e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 19310705.0,
"step": 175
},
{
"epoch": 0.10106681639528355,
"grad_norm": 4.1961669921875e-05,
"learning_rate": 9.906497538600639e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 19865940.0,
"step": 180
},
{
"epoch": 0.10387422796181921,
"grad_norm": 4.267692565917969e-05,
"learning_rate": 9.905544206961913e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 20413429.0,
"step": 185
},
{
"epoch": 0.10668163952835485,
"grad_norm": 4.291534423828125e-05,
"learning_rate": 9.904558424209383e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 20961011.0,
"step": 190
},
{
"epoch": 0.10948905109489052,
"grad_norm": 4.291534423828125e-05,
"learning_rate": 9.90354019897599e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 21511901.0,
"step": 195
},
{
"epoch": 0.11229646266142616,
"grad_norm": 4.38690185546875e-05,
"learning_rate": 9.90248954017879e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 22066800.0,
"step": 200
},
{
"epoch": 0.11510387422796182,
"grad_norm": 4.172325134277344e-05,
"learning_rate": 9.901406457018874e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 22619701.0,
"step": 205
},
{
"epoch": 0.11791128579449747,
"grad_norm": 4.38690185546875e-05,
"learning_rate": 9.900290958981288e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 23175387.0,
"step": 210
},
{
"epoch": 0.12071869736103313,
"grad_norm": 4.2438507080078125e-05,
"learning_rate": 9.899143055834947e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 23728008.0,
"step": 215
},
{
"epoch": 0.12352610892756878,
"grad_norm": 4.315376281738281e-05,
"learning_rate": 9.897962757632554e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 24271472.0,
"step": 220
},
{
"epoch": 0.12633352049410443,
"grad_norm": 4.3392181396484375e-05,
"learning_rate": 9.896750074710513e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 24822499.0,
"step": 225
},
{
"epoch": 0.1291409320606401,
"grad_norm": 4.38690185546875e-05,
"learning_rate": 9.89550501768883e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 25378674.0,
"step": 230
},
{
"epoch": 0.13194834362717575,
"grad_norm": 4.1961669921875e-05,
"learning_rate": 9.89422759747103e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 25932191.0,
"step": 235
},
{
"epoch": 0.13475575519371139,
"grad_norm": 4.4345855712890625e-05,
"learning_rate": 9.892917825244055e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 26481304.0,
"step": 240
},
{
"epoch": 0.13756316676024705,
"grad_norm": 4.1961669921875e-05,
"learning_rate": 9.891575712478165e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 27032305.0,
"step": 245
},
{
"epoch": 0.1403705783267827,
"grad_norm": 4.291534423828125e-05,
"learning_rate": 9.890201270926846e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 27589288.0,
"step": 250
},
{
"epoch": 0.14317798989331837,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.888794512626705e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 28142842.0,
"step": 255
},
{
"epoch": 0.145985401459854,
"grad_norm": 4.100799560546875e-05,
"learning_rate": 9.887355449897346e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 28700410.0,
"step": 260
},
{
"epoch": 0.14879281302638966,
"grad_norm": 4.1961669921875e-05,
"learning_rate": 9.885884095341294e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 29255425.0,
"step": 265
},
{
"epoch": 0.15160022459292533,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.884380461843857e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 29808897.0,
"step": 270
},
{
"epoch": 0.154407636159461,
"grad_norm": 4.124641418457031e-05,
"learning_rate": 9.882844562573032e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 30359156.0,
"step": 275
},
{
"epoch": 0.15721504772599662,
"grad_norm": 4.2438507080078125e-05,
"learning_rate": 9.881276410979378e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 30909372.0,
"step": 280
},
{
"epoch": 0.16002245929253228,
"grad_norm": 4.2438507080078125e-05,
"learning_rate": 9.8796760207959e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 31460071.0,
"step": 285
},
{
"epoch": 0.16282987085906794,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.878043406037935e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 32015395.0,
"step": 290
},
{
"epoch": 0.1656372824256036,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.876378581003024e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 32567631.0,
"step": 295
},
{
"epoch": 0.16844469399213924,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.874681560270783e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 33119041.0,
"step": 300
},
{
"epoch": 0.1712521055586749,
"grad_norm": 4.1961669921875e-05,
"learning_rate": 9.872952358702788e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 33674218.0,
"step": 305
},
{
"epoch": 0.17405951712521056,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.871190991442434e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 34224167.0,
"step": 310
},
{
"epoch": 0.17686692869174622,
"grad_norm": 4.100799560546875e-05,
"learning_rate": 9.8693974739148e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 34772495.0,
"step": 315
},
{
"epoch": 0.17967434025828186,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.867571821826528e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 35323371.0,
"step": 320
},
{
"epoch": 0.18248175182481752,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.865714051165673e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 35887058.0,
"step": 325
},
{
"epoch": 0.18528916339135318,
"grad_norm": 4.124641418457031e-05,
"learning_rate": 9.863824178201563e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 36435682.0,
"step": 330
},
{
"epoch": 0.18809657495788884,
"grad_norm": 4.220008850097656e-05,
"learning_rate": 9.861902219484668e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 36992097.0,
"step": 335
},
{
"epoch": 0.19090398652442447,
"grad_norm": 4.076957702636719e-05,
"learning_rate": 9.85994819184644e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 37540885.0,
"step": 340
},
{
"epoch": 0.19371139809096014,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.857962112399176e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 38099629.0,
"step": 345
},
{
"epoch": 0.1965188096574958,
"grad_norm": 4.100799560546875e-05,
"learning_rate": 9.85594399853587e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 38652150.0,
"step": 350
},
{
"epoch": 0.19932622122403143,
"grad_norm": 4.100799560546875e-05,
"learning_rate": 9.853893867930045e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 39210797.0,
"step": 355
},
{
"epoch": 0.2021336327905671,
"grad_norm": 4.076957702636719e-05,
"learning_rate": 9.851811738535616e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 39769370.0,
"step": 360
},
{
"epoch": 0.20494104435710275,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.849697628586727e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 40326502.0,
"step": 365
},
{
"epoch": 0.20774845592363841,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.847551556597587e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 40878514.0,
"step": 370
},
{
"epoch": 0.21055586749017405,
"grad_norm": 4.076957702636719e-05,
"learning_rate": 9.84537354136231e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 41433424.0,
"step": 375
},
{
"epoch": 0.2133632790567097,
"grad_norm": 4.00543212890625e-05,
"learning_rate": 9.843163601954753e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 41984536.0,
"step": 380
},
{
"epoch": 0.21617069062324537,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.84092175772835e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 42541222.0,
"step": 385
},
{
"epoch": 0.21897810218978103,
"grad_norm": 4.100799560546875e-05,
"learning_rate": 9.838648028315934e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 43094450.0,
"step": 390
},
{
"epoch": 0.22178551375631667,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.836342433629578e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 43650911.0,
"step": 395
},
{
"epoch": 0.22459292532285233,
"grad_norm": 4.100799560546875e-05,
"learning_rate": 9.834004993860406e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 44200053.0,
"step": 400
},
{
"epoch": 0.227400336889388,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.831635729478427e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 44750969.0,
"step": 405
},
{
"epoch": 0.23020774845592365,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.829234661232353e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 45307102.0,
"step": 410
},
{
"epoch": 0.23301516002245928,
"grad_norm": 4.076957702636719e-05,
"learning_rate": 9.82680181014942e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 45859589.0,
"step": 415
},
{
"epoch": 0.23582257158899494,
"grad_norm": 4.1484832763671875e-05,
"learning_rate": 9.824337197535193e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 46422855.0,
"step": 420
},
{
"epoch": 0.2386299831555306,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.821840844973392e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 46975002.0,
"step": 425
},
{
"epoch": 0.24143739472206627,
"grad_norm": 4.076957702636719e-05,
"learning_rate": 9.819312774325696e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 47533307.0,
"step": 430
},
{
"epoch": 0.2442448062886019,
"grad_norm": 4.00543212890625e-05,
"learning_rate": 9.816753007731553e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 48082587.0,
"step": 435
},
{
"epoch": 0.24705221785513756,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.814161567607994e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 48642378.0,
"step": 440
},
{
"epoch": 0.24985962942167322,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.811538476649417e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 49189920.0,
"step": 445
},
{
"epoch": 0.25266704098820886,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.808883757827411e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 49743946.0,
"step": 450
},
{
"epoch": 0.25547445255474455,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.806197434390536e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 50292671.0,
"step": 455
},
{
"epoch": 0.2582818641212802,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.803479529864135e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 50846963.0,
"step": 460
},
{
"epoch": 0.2610892756878158,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.80073006805012e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 51395702.0,
"step": 465
},
{
"epoch": 0.2638966872543515,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.797949073026756e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 51946439.0,
"step": 470
},
{
"epoch": 0.26670409882088714,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.795136569148469e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 52499846.0,
"step": 475
},
{
"epoch": 0.26951151038742277,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.792292581045619e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 53050389.0,
"step": 480
},
{
"epoch": 0.27231892195395846,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.789417133624282e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 53600167.0,
"step": 485
},
{
"epoch": 0.2751263335204941,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.786510252066044e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 54152064.0,
"step": 490
},
{
"epoch": 0.2779337450870298,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.783571961827773e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 54705723.0,
"step": 495
},
{
"epoch": 0.2807411566535654,
"grad_norm": 4.00543212890625e-05,
"learning_rate": 9.780602288641392e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 55264866.0,
"step": 500
},
{
"epoch": 0.28354856822010105,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.777601258513665e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 55825872.0,
"step": 505
},
{
"epoch": 0.28635597978663674,
"grad_norm": 4.00543212890625e-05,
"learning_rate": 9.774568897725958e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 56380766.0,
"step": 510
},
{
"epoch": 0.28916339135317237,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.771505232834017e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 56931871.0,
"step": 515
},
{
"epoch": 0.291970802919708,
"grad_norm": 4.0531158447265625e-05,
"learning_rate": 9.76841029066773e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 57484684.0,
"step": 520
},
{
"epoch": 0.2947782144862437,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.765284098330893e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 58038977.0,
"step": 525
},
{
"epoch": 0.29758562605277933,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.762126683200977e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 58587675.0,
"step": 530
},
{
"epoch": 0.300393037619315,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.758938072928884e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 59139287.0,
"step": 535
},
{
"epoch": 0.30320044918585065,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.755718295438705e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 59689179.0,
"step": 540
},
{
"epoch": 0.3060078607523863,
"grad_norm": 4.00543212890625e-05,
"learning_rate": 9.752467378927475e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 60240055.0,
"step": 545
},
{
"epoch": 0.308815272318922,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.74918535186493e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 60799372.0,
"step": 550
},
{
"epoch": 0.3116226838854576,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.745872242993255e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 61347609.0,
"step": 555
},
{
"epoch": 0.31443009545199324,
"grad_norm": 4.076957702636719e-05,
"learning_rate": 9.742528081326832e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 61905365.0,
"step": 560
},
{
"epoch": 0.31723750701852893,
"grad_norm": 4.029273986816406e-05,
"learning_rate": 9.739152896151981e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 62461111.0,
"step": 565
},
{
"epoch": 0.32004491858506456,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.735746717026719e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 63012068.0,
"step": 570
},
{
"epoch": 0.3228523301516002,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.732309573780484e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 63558400.0,
"step": 575
},
{
"epoch": 0.3256597417181359,
"grad_norm": 3.933906555175781e-05,
"learning_rate": 9.72884149651388e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 64112362.0,
"step": 580
},
{
"epoch": 0.3284671532846715,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.725342515598419e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 64660119.0,
"step": 585
},
{
"epoch": 0.3312745648512072,
"grad_norm": 3.981590270996094e-05,
"learning_rate": 9.721812661676245e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 65210665.0,
"step": 590
},
{
"epoch": 0.33408197641774284,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.718251965659874e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 65765080.0,
"step": 595
},
{
"epoch": 0.3368893879842785,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.71466045873192e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 66309812.0,
"step": 600
},
{
"epoch": 0.33969679955081417,
"grad_norm": 3.933906555175781e-05,
"learning_rate": 9.71103817234482e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 66863854.0,
"step": 605
},
{
"epoch": 0.3425042111173498,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.707385138220563e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 67412995.0,
"step": 610
},
{
"epoch": 0.34531162268388543,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.703701388350407e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 67970391.0,
"step": 615
},
{
"epoch": 0.3481190342504211,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.699986954994604e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 68526499.0,
"step": 620
},
{
"epoch": 0.35092644581695676,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.696241870682114e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 69084325.0,
"step": 625
},
{
"epoch": 0.35373385738349244,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.692466168210319e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 69636193.0,
"step": 630
},
{
"epoch": 0.3565412689500281,
"grad_norm": 3.933906555175781e-05,
"learning_rate": 9.688659880644745e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 70194359.0,
"step": 635
},
{
"epoch": 0.3593486805165637,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.684823041318754e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 70743263.0,
"step": 640
},
{
"epoch": 0.3621560920830994,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.680955683833278e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 71296779.0,
"step": 645
},
{
"epoch": 0.36496350364963503,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.677057842056495e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 71849630.0,
"step": 650
},
{
"epoch": 0.36777091521617067,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.673129550123562e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 72400218.0,
"step": 655
},
{
"epoch": 0.37057832678270636,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.669170842436287e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 72951048.0,
"step": 660
},
{
"epoch": 0.373385738349242,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.665181753662856e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 73502936.0,
"step": 665
},
{
"epoch": 0.3761931499157777,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.661162318737506e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 74056383.0,
"step": 670
},
{
"epoch": 0.3790005614823133,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.657112572860237e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 74603137.0,
"step": 675
},
{
"epoch": 0.38180797304884895,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.653032551496485e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 75158374.0,
"step": 680
},
{
"epoch": 0.38461538461538464,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.648922290376834e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 75704949.0,
"step": 685
},
{
"epoch": 0.38742279618192027,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.644781825496684e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 76261312.0,
"step": 690
},
{
"epoch": 0.3902302077484559,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.640611193115943e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 76805842.0,
"step": 695
},
{
"epoch": 0.3930376193149916,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.636410429758712e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 77363890.0,
"step": 700
},
{
"epoch": 0.3958450308815272,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.632179572212961e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 77921673.0,
"step": 705
},
{
"epoch": 0.39865244244806286,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.627918657530207e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 78471021.0,
"step": 710
},
{
"epoch": 0.40145985401459855,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.623627723025194e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 79023143.0,
"step": 715
},
{
"epoch": 0.4042672655811342,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.619306806275562e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 79582921.0,
"step": 720
},
{
"epoch": 0.40707467714766987,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.614955945121515e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 80133549.0,
"step": 725
},
{
"epoch": 0.4098820887142055,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.610575177665501e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 80680396.0,
"step": 730
},
{
"epoch": 0.41268950028074114,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.606164542271863e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 81234731.0,
"step": 735
},
{
"epoch": 0.41549691184727683,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.601724077566519e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 81784418.0,
"step": 740
},
{
"epoch": 0.41830432341381246,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.59725382243661e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 82340997.0,
"step": 745
},
{
"epoch": 0.4211117349803481,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.592753816030163e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 82892452.0,
"step": 750
},
{
"epoch": 0.4239191465468838,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.58822409775576e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 83445481.0,
"step": 755
},
{
"epoch": 0.4267265581134194,
"grad_norm": 3.933906555175781e-05,
"learning_rate": 9.583664707282172e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 83995491.0,
"step": 760
},
{
"epoch": 0.4295339696799551,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.57907568453803e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 84546988.0,
"step": 765
},
{
"epoch": 0.43234138124649074,
"grad_norm": 3.9577484130859375e-05,
"learning_rate": 9.574457069711466e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 85093367.0,
"step": 770
},
{
"epoch": 0.4351487928130264,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.56980890324976e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 85646824.0,
"step": 775
},
{
"epoch": 0.43795620437956206,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.565131225858998e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 86199422.0,
"step": 780
},
{
"epoch": 0.4407636159460977,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.560424078503694e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 86750405.0,
"step": 785
},
{
"epoch": 0.44357102751263333,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.555687502406456e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 87305490.0,
"step": 790
},
{
"epoch": 0.446378439079169,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.550921539047603e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 87855292.0,
"step": 795
},
{
"epoch": 0.44918585064570465,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.546126230164816e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 88405488.0,
"step": 800
},
{
"epoch": 0.4519932622122403,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.541301617752766e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 88958933.0,
"step": 805
},
{
"epoch": 0.454800673778776,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.536447744062752e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 89509187.0,
"step": 810
},
{
"epoch": 0.4576080853453116,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.531564651602323e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 90064116.0,
"step": 815
},
{
"epoch": 0.4604154969118473,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.526652383134911e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 90614975.0,
"step": 820
},
{
"epoch": 0.46322290847838293,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.521710981679458e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 91163118.0,
"step": 825
},
{
"epoch": 0.46603032004491857,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.516740490510031e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 91709280.0,
"step": 830
},
{
"epoch": 0.46883773161145426,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.511740953155456e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 92251306.0,
"step": 835
},
{
"epoch": 0.4716451431779899,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.506712413398922e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 92804191.0,
"step": 840
},
{
"epoch": 0.4744525547445255,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.501654915277611e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 93352960.0,
"step": 845
},
{
"epoch": 0.4772599663110612,
"grad_norm": 3.910064697265625e-05,
"learning_rate": 9.496568503082302e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 93904048.0,
"step": 850
},
{
"epoch": 0.48006737787759685,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.491453221356992e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 94450198.0,
"step": 855
},
{
"epoch": 0.48287478944413254,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.486309114898497e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 95010470.0,
"step": 860
},
{
"epoch": 0.48568220101066817,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.481136228756068e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 95565971.0,
"step": 865
},
{
"epoch": 0.4884896125772038,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.475934608230988e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 96115146.0,
"step": 870
},
{
"epoch": 0.4912970241437395,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.470704298876186e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 96665705.0,
"step": 875
},
{
"epoch": 0.4941044357102751,
"grad_norm": 3.886222839355469e-05,
"learning_rate": 9.465445346495826e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 97225452.0,
"step": 880
},
{
"epoch": 0.49691184727681076,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.460157797144915e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 97783773.0,
"step": 885
},
{
"epoch": 0.49971925884334645,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.454841697128895e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 98343857.0,
"step": 890
},
{
"epoch": 0.5025266704098821,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.449497093003244e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 98893955.0,
"step": 895
},
{
"epoch": 0.5053340819764177,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.444124031573053e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 99449900.0,
"step": 900
},
{
"epoch": 0.5081414935429533,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.438722559892638e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 99997493.0,
"step": 905
},
{
"epoch": 0.5109489051094891,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.433292725265108e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 100547188.0,
"step": 910
},
{
"epoch": 0.5137563166760247,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.427834575241962e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 101099643.0,
"step": 915
},
{
"epoch": 0.5165637282425604,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.42234815762267e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 101651500.0,
"step": 920
},
{
"epoch": 0.519371139809096,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.416833520454256e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 102203775.0,
"step": 925
},
{
"epoch": 0.5221785513756316,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.411290712030869e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 102748636.0,
"step": 930
},
{
"epoch": 0.5249859629421674,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.405719780893371e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 103304242.0,
"step": 935
},
{
"epoch": 0.527793374508703,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.400120775828907e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 103858388.0,
"step": 940
},
{
"epoch": 0.5306007860752386,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.394493745870479e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 104403364.0,
"step": 945
},
{
"epoch": 0.5334081976417743,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.388838740296514e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 104951146.0,
"step": 950
},
{
"epoch": 0.5362156092083099,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.38315580863043e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 105502998.0,
"step": 955
},
{
"epoch": 0.5390230207748455,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.377445000640214e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 106056029.0,
"step": 960
},
{
"epoch": 0.5418304323413813,
"grad_norm": 3.8623809814453125e-05,
"learning_rate": 9.371706366337973e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 106614456.0,
"step": 965
},
{
"epoch": 0.5446378439079169,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.365939955979505e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 107168795.0,
"step": 970
},
{
"epoch": 0.5474452554744526,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.360145820063852e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 107725110.0,
"step": 975
},
{
"epoch": 0.5502526670409882,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.354324009332864e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 108277854.0,
"step": 980
},
{
"epoch": 0.5530600786075238,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.348474574770748e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 108833252.0,
"step": 985
},
{
"epoch": 0.5558674901740596,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.342597567603632e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 109388680.0,
"step": 990
},
{
"epoch": 0.5586749017405952,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.336693039299103e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 109942919.0,
"step": 995
},
{
"epoch": 0.5614823133071308,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.330761041565767e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 110494393.0,
"step": 1000
},
{
"epoch": 0.5642897248736665,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.324801626352788e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 111053294.0,
"step": 1005
},
{
"epoch": 0.5670971364402021,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.318814845849443e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 111605069.0,
"step": 1010
},
{
"epoch": 0.5699045480067377,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.312800752484653e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 112157540.0,
"step": 1015
},
{
"epoch": 0.5727119595732735,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.306759398926535e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 112709965.0,
"step": 1020
},
{
"epoch": 0.5755193711398091,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.300690838081935e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 113264609.0,
"step": 1025
},
{
"epoch": 0.5783267827063447,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.29459512309596e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 113819280.0,
"step": 1030
},
{
"epoch": 0.5811341942728804,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.288472307351525e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 114374246.0,
"step": 1035
},
{
"epoch": 0.583941605839416,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.282322444468875e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 114930242.0,
"step": 1040
},
{
"epoch": 0.5867490174059518,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.276145588305121e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 115478310.0,
"step": 1045
},
{
"epoch": 0.5895564289724874,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.26994179295376e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 116026101.0,
"step": 1050
},
{
"epoch": 0.592363840539023,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.263711112744218e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 116582979.0,
"step": 1055
},
{
"epoch": 0.5951712521055587,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.257453602241356e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 117136441.0,
"step": 1060
},
{
"epoch": 0.5979786636720943,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.251169316245001e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 117688166.0,
"step": 1065
},
{
"epoch": 0.60078607523863,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.244858309789468e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 118234800.0,
"step": 1070
},
{
"epoch": 0.6035934868051657,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.238520638143072e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 118788259.0,
"step": 1075
},
{
"epoch": 0.6064008983717013,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.232156356807648e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 119335601.0,
"step": 1080
},
{
"epoch": 0.6092083099382369,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.225765521518065e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 119880870.0,
"step": 1085
},
{
"epoch": 0.6120157215047726,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.219348188241737e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 120434836.0,
"step": 1090
},
{
"epoch": 0.6148231330713082,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.212904413178128e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 120982263.0,
"step": 1095
},
{
"epoch": 0.617630544637844,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.206434252758272e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 121531079.0,
"step": 1100
},
{
"epoch": 0.6204379562043796,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.199937763644266e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 122089548.0,
"step": 1105
},
{
"epoch": 0.6232453677709152,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.193415002728783e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 122643175.0,
"step": 1110
},
{
"epoch": 0.6260527793374508,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.186866027134565e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 123195979.0,
"step": 1115
},
{
"epoch": 0.6288601909039865,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.180290894213934e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 123752298.0,
"step": 1120
},
{
"epoch": 0.6316676024705222,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.173689661548278e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 124313602.0,
"step": 1125
},
{
"epoch": 0.6344750140370579,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.167062386947555e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 124860278.0,
"step": 1130
},
{
"epoch": 0.6372824256035935,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 9.160409128449784e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 125410347.0,
"step": 1135
},
{
"epoch": 0.6400898371701291,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.153729944320533e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 125968306.0,
"step": 1140
},
{
"epoch": 0.6428972487366648,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.147024893052419e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 126520924.0,
"step": 1145
},
{
"epoch": 0.6457046603032004,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.140294033364585e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 127070792.0,
"step": 1150
},
{
"epoch": 0.6485120718697361,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.133537424202186e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 127619525.0,
"step": 1155
},
{
"epoch": 0.6513194834362718,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.126755124735887e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 128173299.0,
"step": 1160
},
{
"epoch": 0.6541268950028074,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.119947194361324e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 128726691.0,
"step": 1165
},
{
"epoch": 0.656934306569343,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.1131136926986e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 129277597.0,
"step": 1170
},
{
"epoch": 0.6597417181358787,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.10625467959176e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 129833579.0,
"step": 1175
},
{
"epoch": 0.6625491297024144,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.099370215108254e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 130386568.0,
"step": 1180
},
{
"epoch": 0.66535654126895,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.092460359538433e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 130940117.0,
"step": 1185
},
{
"epoch": 0.6681639528354857,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.085525173395e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 131494349.0,
"step": 1190
},
{
"epoch": 0.6709713644020213,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.078564717412495e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 132044064.0,
"step": 1195
},
{
"epoch": 0.673778775968557,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 9.071579052546754e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 132600861.0,
"step": 1200
},
{
"epoch": 0.6765861875350927,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.064568239974379e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 133149250.0,
"step": 1205
},
{
"epoch": 0.6793935991016283,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.057532341092203e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 133701108.0,
"step": 1210
},
{
"epoch": 0.682201010668164,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.050471417516754e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 134253832.0,
"step": 1215
},
{
"epoch": 0.6850084222346996,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 9.043385531083703e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 134807691.0,
"step": 1220
},
{
"epoch": 0.6878158338012352,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 9.036274743847342e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 135355206.0,
"step": 1225
},
{
"epoch": 0.6906232453677709,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.029139118080024e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 135908658.0,
"step": 1230
},
{
"epoch": 0.6934306569343066,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.021978716271629e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 136459115.0,
"step": 1235
},
{
"epoch": 0.6962380685008422,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 9.014793601129006e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 137010285.0,
"step": 1240
},
{
"epoch": 0.6990454800673779,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 9.007583835575437e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 137563309.0,
"step": 1245
},
{
"epoch": 0.7018528916339135,
"grad_norm": 3.814697265625e-05,
"learning_rate": 9.000349482750074e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 138115722.0,
"step": 1250
},
{
"epoch": 0.7046603032004491,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.99309060600739e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 138662903.0,
"step": 1255
},
{
"epoch": 0.7074677147669849,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.985807268916628e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 139215094.0,
"step": 1260
},
{
"epoch": 0.7102751263335205,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.978499535261239e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 139768126.0,
"step": 1265
},
{
"epoch": 0.7130825379000562,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 8.971167469038328e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 140322542.0,
"step": 1270
},
{
"epoch": 0.7158899494665918,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.96381113445809e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 140875658.0,
"step": 1275
},
{
"epoch": 0.7186973610331274,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.956430595943248e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 141423350.0,
"step": 1280
},
{
"epoch": 0.7215047725996631,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.949025918128489e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 141973889.0,
"step": 1285
},
{
"epoch": 0.7243121841661988,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.941597165859902e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 142529488.0,
"step": 1290
},
{
"epoch": 0.7271195957327344,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.934144404194404e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 143086636.0,
"step": 1295
},
{
"epoch": 0.7299270072992701,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.926667698399173e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 143635935.0,
"step": 1300
},
{
"epoch": 0.7327344188658057,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.919167113951081e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 144190580.0,
"step": 1305
},
{
"epoch": 0.7355418304323413,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.911642716536109e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 144741265.0,
"step": 1310
},
{
"epoch": 0.7383492419988771,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.904094572048783e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 145291693.0,
"step": 1315
},
{
"epoch": 0.7411566535654127,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.896522746591595e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 145839264.0,
"step": 1320
},
{
"epoch": 0.7439640651319483,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.888927306474415e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 146394351.0,
"step": 1325
},
{
"epoch": 0.746771476698484,
"grad_norm": 3.838539123535156e-05,
"learning_rate": 8.881308318213924e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 146948626.0,
"step": 1330
},
{
"epoch": 0.7495788882650196,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.873665848533021e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 147503679.0,
"step": 1335
},
{
"epoch": 0.7523862998315554,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.865999964360243e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 148050994.0,
"step": 1340
},
{
"epoch": 0.755193711398091,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.858310732829179e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 148609443.0,
"step": 1345
},
{
"epoch": 0.7580011229646266,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.85059822127788e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 149162196.0,
"step": 1350
},
{
"epoch": 0.7608085345311623,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.842862497248272e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 149711561.0,
"step": 1355
},
{
"epoch": 0.7636159460976979,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.835103628485561e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 150260709.0,
"step": 1360
},
{
"epoch": 0.7664233576642335,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.827321682937645e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 150817752.0,
"step": 1365
},
{
"epoch": 0.7692307692307693,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.819516728754514e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 151371055.0,
"step": 1370
},
{
"epoch": 0.7720381807973049,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.811688834287654e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 151929409.0,
"step": 1375
},
{
"epoch": 0.7748455923638405,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.803838068089448e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 152481972.0,
"step": 1380
},
{
"epoch": 0.7776530039303762,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.795964498912585e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 153033911.0,
"step": 1385
},
{
"epoch": 0.7804604154969118,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.78806819570944e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 153583648.0,
"step": 1390
},
{
"epoch": 0.7832678270634476,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.780149227631485e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 154140515.0,
"step": 1395
},
{
"epoch": 0.7860752386299832,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.772207664028678e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 154700589.0,
"step": 1400
},
{
"epoch": 0.7888826501965188,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.764243574448856e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 155251826.0,
"step": 1405
},
{
"epoch": 0.7916900617630545,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.756257028637125e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 155807861.0,
"step": 1410
},
{
"epoch": 0.7944974733295901,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.748248096535255e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 156358780.0,
"step": 1415
},
{
"epoch": 0.7973048848961257,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.740216848281055e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 156920999.0,
"step": 1420
},
{
"epoch": 0.8001122964626615,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.732163354207774e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 157477110.0,
"step": 1425
},
{
"epoch": 0.8029197080291971,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.724087684843469e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 158021585.0,
"step": 1430
},
{
"epoch": 0.8057271195957327,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.715989910910409e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 158571838.0,
"step": 1435
},
{
"epoch": 0.8085345311622684,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 8.707870103324428e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 159125749.0,
"step": 1440
},
{
"epoch": 0.811341942728804,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.699728333194328e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 159674555.0,
"step": 1445
},
{
"epoch": 0.8141493542953397,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.691564671821246e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 160227381.0,
"step": 1450
},
{
"epoch": 0.8169567658618754,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.683379190698027e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 160778083.0,
"step": 1455
},
{
"epoch": 0.819764177428411,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.675171961508604e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 161328123.0,
"step": 1460
},
{
"epoch": 0.8225715889949466,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.666943056127365e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 161886243.0,
"step": 1465
},
{
"epoch": 0.8253790005614823,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.658692546618528e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 162443002.0,
"step": 1470
},
{
"epoch": 0.8281864121280179,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.65042050523551e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 163004851.0,
"step": 1475
},
{
"epoch": 0.8309938236945537,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.642127004420289e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 163556926.0,
"step": 1480
},
{
"epoch": 0.8338012352610893,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.633812116802776e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 164108873.0,
"step": 1485
},
{
"epoch": 0.8366086468276249,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.625475915200171e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 164665480.0,
"step": 1490
},
{
"epoch": 0.8394160583941606,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.617118472616333e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 165218320.0,
"step": 1495
},
{
"epoch": 0.8422234699606962,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.60873986224114e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 165780227.0,
"step": 1500
},
{
"epoch": 0.8450308815272319,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.600340157449844e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 166330966.0,
"step": 1505
},
{
"epoch": 0.8478382930937676,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.591919431802425e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 166886726.0,
"step": 1510
},
{
"epoch": 0.8506457046603032,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 8.583477759042965e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 167443045.0,
"step": 1515
},
{
"epoch": 0.8534531162268388,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.575015213098974e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 167988006.0,
"step": 1520
},
{
"epoch": 0.8562605277933745,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.566531868080768e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 168542825.0,
"step": 1525
},
{
"epoch": 0.8590679393599102,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.558027798280808e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 169088910.0,
"step": 1530
},
{
"epoch": 0.8618753509264458,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.54950307817305e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 169640258.0,
"step": 1535
},
{
"epoch": 0.8646827624929815,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.540957782412291e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 170192821.0,
"step": 1540
},
{
"epoch": 0.8674901740595171,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.532391985833525e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 170744205.0,
"step": 1545
},
{
"epoch": 0.8702975856260527,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.523805763451276e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 171295984.0,
"step": 1550
},
{
"epoch": 0.8731049971925884,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.515199190458947e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 171849931.0,
"step": 1555
},
{
"epoch": 0.8759124087591241,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.50657234222816e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 172408275.0,
"step": 1560
},
{
"epoch": 0.8787198203256598,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.497925294308102e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 172964043.0,
"step": 1565
},
{
"epoch": 0.8815272318921954,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.489258122424846e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 173514585.0,
"step": 1570
},
{
"epoch": 0.884334643458731,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.480570902480709e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 174059607.0,
"step": 1575
},
{
"epoch": 0.8871420550252667,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.471863710553575e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 174614319.0,
"step": 1580
},
{
"epoch": 0.8899494665918024,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.463136622896231e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 175172735.0,
"step": 1585
},
{
"epoch": 0.892756878158338,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.454389715935704e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 175728018.0,
"step": 1590
},
{
"epoch": 0.8955642897248737,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.445623066272581e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 176280757.0,
"step": 1595
},
{
"epoch": 0.8983717012914093,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.436836750680346e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 176829094.0,
"step": 1600
},
{
"epoch": 0.9011791128579449,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.428030846104714e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 177382344.0,
"step": 1605
},
{
"epoch": 0.9039865244244806,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.41920542966294e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 177937865.0,
"step": 1610
},
{
"epoch": 0.9067939359910163,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.410360578643157e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 178489432.0,
"step": 1615
},
{
"epoch": 0.909601347557552,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.401496370503698e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 179042734.0,
"step": 1620
},
{
"epoch": 0.9124087591240876,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.392612882872409e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 179593096.0,
"step": 1625
},
{
"epoch": 0.9152161706906232,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.383710193545979e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 180143839.0,
"step": 1630
},
{
"epoch": 0.9180235822571589,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.374788380489258e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 180698105.0,
"step": 1635
},
{
"epoch": 0.9208309938236946,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.365847521834561e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 181250339.0,
"step": 1640
},
{
"epoch": 0.9236384053902302,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.356887695881005e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 181802391.0,
"step": 1645
},
{
"epoch": 0.9264458169567659,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.347908981093806e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 182355394.0,
"step": 1650
},
{
"epoch": 0.9292532285233015,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.338911456103598e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 182901242.0,
"step": 1655
},
{
"epoch": 0.9320606400898371,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.329895199705748e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 183449668.0,
"step": 1660
},
{
"epoch": 0.9348680516563729,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.320860290859659e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 183997705.0,
"step": 1665
},
{
"epoch": 0.9376754632229085,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.311806808688083e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 184552272.0,
"step": 1670
},
{
"epoch": 0.9404828747894441,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.302734832476427e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 185095192.0,
"step": 1675
},
{
"epoch": 0.9432902863559798,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.29364444167206e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 185650699.0,
"step": 1680
},
{
"epoch": 0.9460976979225154,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.284535715883611e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 186199858.0,
"step": 1685
},
{
"epoch": 0.948905109489051,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.275408734880283e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 186749316.0,
"step": 1690
},
{
"epoch": 0.9517125210555868,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.266263578591144e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 187294846.0,
"step": 1695
},
{
"epoch": 0.9545199326221224,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.257100327104433e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 187853983.0,
"step": 1700
},
{
"epoch": 0.9573273441886581,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 8.247919060666855e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 188407871.0,
"step": 1705
},
{
"epoch": 0.9601347557551937,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.238719859682882e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 188956349.0,
"step": 1710
},
{
"epoch": 0.9629421673217293,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.229502804714045e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 189509134.0,
"step": 1715
},
{
"epoch": 0.9657495788882651,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 8.220267976478232e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 190063240.0,
"step": 1720
},
{
"epoch": 0.9685569904548007,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 8.211015455848978e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 190622603.0,
"step": 1725
},
{
"epoch": 0.9713644020213363,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.20174532385476e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 191170189.0,
"step": 1730
},
{
"epoch": 0.974171813587872,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.192457661678286e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 191726109.0,
"step": 1735
},
{
"epoch": 0.9769792251544076,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.18315255065578e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 192280509.0,
"step": 1740
},
{
"epoch": 0.9797866367209432,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 8.173830072276275e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 192824519.0,
"step": 1745
},
{
"epoch": 0.982594048287479,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 8.1644903081809e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 193383680.0,
"step": 1750
},
{
"epoch": 0.9854014598540146,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.155133340162162e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 193933074.0,
"step": 1755
},
{
"epoch": 0.9882088714205502,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.145759250163229e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 194487752.0,
"step": 1760
},
{
"epoch": 0.9910162829870859,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 8.136368120277213e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 195041134.0,
"step": 1765
},
{
"epoch": 0.9938236945536215,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.126960032746456e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 195594894.0,
"step": 1770
},
{
"epoch": 0.9966311061201573,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.117535069961801e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 196147513.0,
"step": 1775
},
{
"epoch": 0.9994385176866929,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 8.10809331446188e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 196690606.0,
"step": 1780
},
{
"epoch": 1.0022459292532284,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.098634848932381e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 197200444.0,
"step": 1785
},
{
"epoch": 1.0050533408197642,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 8.089159756205334e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 197749842.0,
"step": 1790
},
{
"epoch": 1.0078607523863,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.079668119258376e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 198304764.0,
"step": 1795
},
{
"epoch": 1.0106681639528354,
"grad_norm": 3.814697265625e-05,
"learning_rate": 8.070160021214034e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 198855059.0,
"step": 1800
},
{
"epoch": 1.0134755755193712,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.06063554533899e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 199415427.0,
"step": 1805
},
{
"epoch": 1.0162829870859067,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.051094775043355e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 199964096.0,
"step": 1810
},
{
"epoch": 1.0190903986524424,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 8.041537793879934e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 200508470.0,
"step": 1815
},
{
"epoch": 1.0218978102189782,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.031964685543505e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 201065408.0,
"step": 1820
},
{
"epoch": 1.0247052217855137,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 8.02237553387007e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 201612886.0,
"step": 1825
},
{
"epoch": 1.0275126333520495,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.012770422836136e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 202169264.0,
"step": 1830
},
{
"epoch": 1.030320044918585,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 8.003149436557972e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 202717508.0,
"step": 1835
},
{
"epoch": 1.0331274564851207,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.993512659290872e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 203268972.0,
"step": 1840
},
{
"epoch": 1.0359348680516565,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.98386017542842e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 203815735.0,
"step": 1845
},
{
"epoch": 1.038742279618192,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.974192069501751e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 204361307.0,
"step": 1850
},
{
"epoch": 1.0415496911847277,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.964508426178806e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 204915912.0,
"step": 1855
},
{
"epoch": 1.0443571027512633,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 7.954809330263598e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 205471973.0,
"step": 1860
},
{
"epoch": 1.047164514317799,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.945094866695461e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 206029063.0,
"step": 1865
},
{
"epoch": 1.0499719258843347,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.935365120548316e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 206577216.0,
"step": 1870
},
{
"epoch": 1.0527793374508703,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.925620177029913e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 207134150.0,
"step": 1875
},
{
"epoch": 1.055586749017406,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.915860121481098e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 207682819.0,
"step": 1880
},
{
"epoch": 1.0583941605839415,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.906085039375058e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 208241654.0,
"step": 1885
},
{
"epoch": 1.0612015721504773,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.896295016316577e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 208802767.0,
"step": 1890
},
{
"epoch": 1.064008983717013,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.886490138041277e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 209355390.0,
"step": 1895
},
{
"epoch": 1.0668163952835485,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.87667049041488e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 209912022.0,
"step": 1900
},
{
"epoch": 1.0696238068500843,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.866836159432447e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 210463772.0,
"step": 1905
},
{
"epoch": 1.0724312184166198,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.85698723121763e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 211010865.0,
"step": 1910
},
{
"epoch": 1.0752386299831556,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.847123792021912e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 211558586.0,
"step": 1915
},
{
"epoch": 1.078046041549691,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.837245928223856e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 212116504.0,
"step": 1920
},
{
"epoch": 1.0808534531162268,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.827353726328352e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 212666296.0,
"step": 1925
},
{
"epoch": 1.0836608646827626,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.81744727296585e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 213221364.0,
"step": 1930
},
{
"epoch": 1.086468276249298,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.80752665489161e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 213768066.0,
"step": 1935
},
{
"epoch": 1.0892756878158338,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.797591958984938e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 214320096.0,
"step": 1940
},
{
"epoch": 1.0920830993823694,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.787643272248419e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 214868275.0,
"step": 1945
},
{
"epoch": 1.094890510948905,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.777680681807175e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 215417147.0,
"step": 1950
},
{
"epoch": 1.0976979225154408,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.767704274908079e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 215972838.0,
"step": 1955
},
{
"epoch": 1.1005053340819764,
"grad_norm": 3.790855407714844e-05,
"learning_rate": 7.757714138919005e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 216522140.0,
"step": 1960
},
{
"epoch": 1.1033127456485121,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.747710361328056e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 217070563.0,
"step": 1965
},
{
"epoch": 1.1061201572150476,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.737693029742805e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 217617212.0,
"step": 1970
},
{
"epoch": 1.1089275687815834,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.727662231889518e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 218169207.0,
"step": 1975
},
{
"epoch": 1.1117349803481191,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.717618055612397e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 218723495.0,
"step": 1980
},
{
"epoch": 1.1145423919146547,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.7075605888728e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 219274880.0,
"step": 1985
},
{
"epoch": 1.1173498034811904,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.69748991974848e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 219824766.0,
"step": 1990
},
{
"epoch": 1.120157215047726,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.687406136432802e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 220383177.0,
"step": 1995
},
{
"epoch": 1.1229646266142617,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.677309327233985e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 220934640.0,
"step": 2000
},
{
"epoch": 1.1257720381807972,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.667199580574319e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 221487359.0,
"step": 2005
},
{
"epoch": 1.128579449747333,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.657076984989392e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 222040267.0,
"step": 2010
},
{
"epoch": 1.1313868613138687,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.64694162912732e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 222596288.0,
"step": 2015
},
{
"epoch": 1.1341942728804042,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.63679360174796e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 223157044.0,
"step": 2020
},
{
"epoch": 1.13700168444694,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.626632991722144e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 223713806.0,
"step": 2025
},
{
"epoch": 1.1398090960134755,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.616459888030895e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 224263145.0,
"step": 2030
},
{
"epoch": 1.1426165075800112,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.606274379764647e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 224822104.0,
"step": 2035
},
{
"epoch": 1.145423919146547,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.596076556122467e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 225376423.0,
"step": 2040
},
{
"epoch": 1.1482313307130825,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.585866506411275e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 225928705.0,
"step": 2045
},
{
"epoch": 1.1510387422796182,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.575644320045061e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 226475014.0,
"step": 2050
},
{
"epoch": 1.1538461538461537,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.565410086544095e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 227024366.0,
"step": 2055
},
{
"epoch": 1.1566535654126895,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.555163895534155e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 227580327.0,
"step": 2060
},
{
"epoch": 1.1594609769792252,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.544905836745734e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 228135136.0,
"step": 2065
},
{
"epoch": 1.1622683885457608,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.53463600001326e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 228688467.0,
"step": 2070
},
{
"epoch": 1.1650758001122965,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.524354475274298e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 229243340.0,
"step": 2075
},
{
"epoch": 1.167883211678832,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.514061352568778e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 229791445.0,
"step": 2080
},
{
"epoch": 1.1706906232453678,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.503756722038194e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 230348082.0,
"step": 2085
},
{
"epoch": 1.1734980348119035,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.493440673924822e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 230897423.0,
"step": 2090
},
{
"epoch": 1.176305446378439,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.483113298570925e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 231452200.0,
"step": 2095
},
{
"epoch": 1.1791128579449748,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 7.472774686417964e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 232005564.0,
"step": 2100
},
{
"epoch": 1.1819202695115103,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.462424928005804e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 232553803.0,
"step": 2105
},
{
"epoch": 1.184727681078046,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.45206411397193e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 233112910.0,
"step": 2110
},
{
"epoch": 1.1875350926445818,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.441692335050637e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 233662235.0,
"step": 2115
},
{
"epoch": 1.1903425042111173,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.431309682072249e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 234211802.0,
"step": 2120
},
{
"epoch": 1.193149915777653,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 7.420916245962317e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 234766357.0,
"step": 2125
},
{
"epoch": 1.1959573273441886,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.410512117740829e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 235312658.0,
"step": 2130
},
{
"epoch": 1.1987647389107243,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.4000973885214e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 235858427.0,
"step": 2135
},
{
"epoch": 1.20157215047726,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.389672149510497e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 236414940.0,
"step": 2140
},
{
"epoch": 1.2043795620437956,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.379236492006609e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 236968181.0,
"step": 2145
},
{
"epoch": 1.2071869736103313,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.368790507399478e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 237522495.0,
"step": 2150
},
{
"epoch": 1.2099943851768669,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.358334287169277e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 238071824.0,
"step": 2155
},
{
"epoch": 1.2128017967434026,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.347867922885818e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 238624403.0,
"step": 2160
},
{
"epoch": 1.2156092083099383,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.337391506207755e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 239172889.0,
"step": 2165
},
{
"epoch": 1.2184166198764739,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.326905128881771e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 239726973.0,
"step": 2170
},
{
"epoch": 1.2212240314430096,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.316408882741774e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 240281485.0,
"step": 2175
},
{
"epoch": 1.2240314430095451,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.305902859708108e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 240834091.0,
"step": 2180
},
{
"epoch": 1.2268388545760809,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.295387151786728e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 241380490.0,
"step": 2185
},
{
"epoch": 1.2296462661426166,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.284861851068411e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 241925513.0,
"step": 2190
},
{
"epoch": 1.2324536777091522,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.274327049727938e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 242473886.0,
"step": 2195
},
{
"epoch": 1.235261089275688,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.263782840023293e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 243028174.0,
"step": 2200
},
{
"epoch": 1.2380685008422234,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.253229314294854e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 243583784.0,
"step": 2205
},
{
"epoch": 1.2408759124087592,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 7.242666564964582e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 244137045.0,
"step": 2210
},
{
"epoch": 1.2436833239752947,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.232094684535214e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 244682853.0,
"step": 2215
},
{
"epoch": 1.2464907355418304,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.22151376558945e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 245236916.0,
"step": 2220
},
{
"epoch": 1.2492981471083662,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.210923900789148e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 245789883.0,
"step": 2225
},
{
"epoch": 1.2521055586749017,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.200325182874507e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 246345364.0,
"step": 2230
},
{
"epoch": 1.2549129702414374,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.189717704663257e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 246892607.0,
"step": 2235
},
{
"epoch": 1.2577203818079732,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.179101559049847e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 247442091.0,
"step": 2240
},
{
"epoch": 1.2605277933745087,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.168476839004628e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 248002077.0,
"step": 2245
},
{
"epoch": 1.2633352049410442,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 7.15784363757304e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 248552163.0,
"step": 2250
},
{
"epoch": 1.26614261650758,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.147202047874803e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 249099457.0,
"step": 2255
},
{
"epoch": 1.2689500280741157,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.136552163103095e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 249653165.0,
"step": 2260
},
{
"epoch": 1.2717574396406512,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.125894076523733e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 250202923.0,
"step": 2265
},
{
"epoch": 1.274564851207187,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 7.115227881474371e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 250749595.0,
"step": 2270
},
{
"epoch": 1.2773722627737225,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.104553671363664e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 251300649.0,
"step": 2275
},
{
"epoch": 1.2801796743402583,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.093871539670455e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 251863817.0,
"step": 2280
},
{
"epoch": 1.282987085906794,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 7.083181579942975e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 252414100.0,
"step": 2285
},
{
"epoch": 1.2857944974733295,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.072483885797993e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 252963019.0,
"step": 2290
},
{
"epoch": 1.2886019090398653,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 7.061778550920021e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 253514748.0,
"step": 2295
},
{
"epoch": 1.2914093206064008,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.051065669060481e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 254066475.0,
"step": 2300
},
{
"epoch": 1.2942167321729365,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 7.040345334036888e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 254617738.0,
"step": 2305
},
{
"epoch": 1.2970241437394723,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.029617639732026e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 255166490.0,
"step": 2310
},
{
"epoch": 1.2998315553060078,
"grad_norm": 3.7670135498046875e-05,
"learning_rate": 7.018882680093131e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 255719454.0,
"step": 2315
},
{
"epoch": 1.3026389668725435,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 7.008140549131061e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 256278704.0,
"step": 2320
},
{
"epoch": 1.305446378439079,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.99739134091948e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 256835827.0,
"step": 2325
},
{
"epoch": 1.3082537900056148,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.986635149594029e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 257390899.0,
"step": 2330
},
{
"epoch": 1.3110612015721506,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.9758720693515e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 257935358.0,
"step": 2335
},
{
"epoch": 1.313868613138686,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 6.965102194449021e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 258484634.0,
"step": 2340
},
{
"epoch": 1.3166760247052218,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.954325619203218e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 259038074.0,
"step": 2345
},
{
"epoch": 1.3194834362717573,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.943542437989402e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 259590605.0,
"step": 2350
},
{
"epoch": 1.322290847838293,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.932752745240725e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 260139109.0,
"step": 2355
},
{
"epoch": 1.3250982594048288,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.921956635447372e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 260684120.0,
"step": 2360
},
{
"epoch": 1.3279056709713644,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.911154203155722e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 261238273.0,
"step": 2365
},
{
"epoch": 1.3307130825379,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.900345542967523e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 261790081.0,
"step": 2370
},
{
"epoch": 1.3335204941044356,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.889530749539062e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 262344651.0,
"step": 2375
},
{
"epoch": 1.3363279056709714,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.878709917580342e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 262895947.0,
"step": 2380
},
{
"epoch": 1.3391353172375071,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.867883141854245e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 263444696.0,
"step": 2385
},
{
"epoch": 1.3419427288040426,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.857050517175702e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 263985620.0,
"step": 2390
},
{
"epoch": 1.3447501403705784,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.846212138410873e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 264541949.0,
"step": 2395
},
{
"epoch": 1.347557551937114,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.835368100476305e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 265091725.0,
"step": 2400
},
{
"epoch": 1.3503649635036497,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.824518498338104e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 265646692.0,
"step": 2405
},
{
"epoch": 1.3531723750701854,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.813663427011106e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 266200325.0,
"step": 2410
},
{
"epoch": 1.355979786636721,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.802802981558042e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 266751956.0,
"step": 2415
},
{
"epoch": 1.3587871982032567,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.79193725708871e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 267305428.0,
"step": 2420
},
{
"epoch": 1.3615946097697922,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.781066348759134e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 267863237.0,
"step": 2425
},
{
"epoch": 1.364402021336328,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.770190351770737e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 268405424.0,
"step": 2430
},
{
"epoch": 1.3672094329028637,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.75930936136951e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 268962476.0,
"step": 2435
},
{
"epoch": 1.3700168444693992,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.748423472845165e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 269511277.0,
"step": 2440
},
{
"epoch": 1.372824256035935,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.737532781530317e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 270067794.0,
"step": 2445
},
{
"epoch": 1.3756316676024705,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.726637382799634e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 270617188.0,
"step": 2450
},
{
"epoch": 1.3784390791690062,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.715737372069017e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 271171384.0,
"step": 2455
},
{
"epoch": 1.381246490735542,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.704832844794752e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 271723504.0,
"step": 2460
},
{
"epoch": 1.3840539023020775,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.693923896472678e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 272276911.0,
"step": 2465
},
{
"epoch": 1.3868613138686132,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.68301062263735e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 272831546.0,
"step": 2470
},
{
"epoch": 1.3896687254351487,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.672093118861207e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 273386934.0,
"step": 2475
},
{
"epoch": 1.3924761370016845,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.66117148075373e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 273942988.0,
"step": 2480
},
{
"epoch": 1.3952835485682202,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.650245803960605e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 274492948.0,
"step": 2485
},
{
"epoch": 1.3980909601347558,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.639316184162887e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 275048430.0,
"step": 2490
},
{
"epoch": 1.4008983717012913,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.628382717076166e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 275609903.0,
"step": 2495
},
{
"epoch": 1.403705783267827,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 6.617445498449715e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 276162794.0,
"step": 2500
},
{
"epoch": 1.4065131948343628,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.606504624065669e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 276720878.0,
"step": 2505
},
{
"epoch": 1.4093206064008983,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.595560189738178e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 277280489.0,
"step": 2510
},
{
"epoch": 1.412128017967434,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.584612291312562e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 277833958.0,
"step": 2515
},
{
"epoch": 1.4149354295339696,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.573661024664484e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 278390354.0,
"step": 2520
},
{
"epoch": 1.4177428411005053,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.5627064856991e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 278946349.0,
"step": 2525
},
{
"epoch": 1.420550252667041,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.551748770350222e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 279496523.0,
"step": 2530
},
{
"epoch": 1.4233576642335766,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 6.540787974579485e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 280054235.0,
"step": 2535
},
{
"epoch": 1.4261650758001123,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.529824194375499e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 280608058.0,
"step": 2540
},
{
"epoch": 1.4289724873666478,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.518857525753006e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 281168287.0,
"step": 2545
},
{
"epoch": 1.4317798989331836,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.507888064752043e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 281729266.0,
"step": 2550
},
{
"epoch": 1.4345873104997193,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.496915907437106e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 282283631.0,
"step": 2555
},
{
"epoch": 1.4373947220662548,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.485941149896301e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 282834189.0,
"step": 2560
},
{
"epoch": 1.4402021336327906,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.474963888240505e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 283389944.0,
"step": 2565
},
{
"epoch": 1.4430095451993261,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.463984218602527e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 283947933.0,
"step": 2570
},
{
"epoch": 1.4458169567658619,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.453002237136261e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 284499561.0,
"step": 2575
},
{
"epoch": 1.4486243683323976,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.442018040015847e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 285045837.0,
"step": 2580
},
{
"epoch": 1.4514317798989331,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.43103172343483e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 285601567.0,
"step": 2585
},
{
"epoch": 1.4542391914654689,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.420043383605316e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 286153887.0,
"step": 2590
},
{
"epoch": 1.4570466030320044,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.409053116757128e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 286704964.0,
"step": 2595
},
{
"epoch": 1.4598540145985401,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.398061019136963e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 287260332.0,
"step": 2600
},
{
"epoch": 1.4626614261650759,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.38706718700756e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 287813982.0,
"step": 2605
},
{
"epoch": 1.4654688377316114,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.376071716646837e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 288366276.0,
"step": 2610
},
{
"epoch": 1.4682762492981472,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.365074704347064e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 288916157.0,
"step": 2615
},
{
"epoch": 1.4710836608646827,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.354076246414013e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 289466792.0,
"step": 2620
},
{
"epoch": 1.4738910724312184,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.343076439166117e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 290019432.0,
"step": 2625
},
{
"epoch": 1.4766984839977542,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.332075378933626e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 290567948.0,
"step": 2630
},
{
"epoch": 1.4795058955642897,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.32107316205776e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 291118208.0,
"step": 2635
},
{
"epoch": 1.4823133071308254,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 6.310069884889873e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 291669947.0,
"step": 2640
},
{
"epoch": 1.485120718697361,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.2990656437906e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 292222023.0,
"step": 2645
},
{
"epoch": 1.4879281302638967,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.28806053512902e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 292779409.0,
"step": 2650
},
{
"epoch": 1.4907355418304324,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 6.27705465528181e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 293327558.0,
"step": 2655
},
{
"epoch": 1.493542953396968,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.266048100632398e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 293874913.0,
"step": 2660
},
{
"epoch": 1.4963503649635037,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.255040967570123e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 294430550.0,
"step": 2665
},
{
"epoch": 1.4991577765300392,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.244033352489392e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 294985975.0,
"step": 2670
},
{
"epoch": 1.501965188096575,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.233025351788829e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 295534856.0,
"step": 2675
},
{
"epoch": 1.5047725996631107,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.222017061870437e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 296085702.0,
"step": 2680
},
{
"epoch": 1.5075800112296462,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.211008579138753e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 296638985.0,
"step": 2685
},
{
"epoch": 1.5103874227961818,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.2e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 297190692.0,
"step": 2690
},
{
"epoch": 1.5131948343627175,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.188991420861248e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 297744984.0,
"step": 2695
},
{
"epoch": 1.5160022459292533,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.177982938129562e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 298295767.0,
"step": 2700
},
{
"epoch": 1.518809657495789,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.166974648211172e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 298850175.0,
"step": 2705
},
{
"epoch": 1.5216170690623245,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.155966647510609e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 299398676.0,
"step": 2710
},
{
"epoch": 1.52442448062886,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.144959032429878e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 299955257.0,
"step": 2715
},
{
"epoch": 1.5272318921953958,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.133951899367604e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 300510596.0,
"step": 2720
},
{
"epoch": 1.5300393037619315,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.122945344718191e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 301060103.0,
"step": 2725
},
{
"epoch": 1.5328467153284673,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.11193946487098e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 301614426.0,
"step": 2730
},
{
"epoch": 1.5356541268950028,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.1009343562094015e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 302165808.0,
"step": 2735
},
{
"epoch": 1.5384615384615383,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.089930115110129e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 302713974.0,
"step": 2740
},
{
"epoch": 1.541268950028074,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.07892683794224e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 303264275.0,
"step": 2745
},
{
"epoch": 1.5440763615946098,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.0679246210663754e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 303810859.0,
"step": 2750
},
{
"epoch": 1.5468837731611456,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.056923560833883e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 304365824.0,
"step": 2755
},
{
"epoch": 1.549691184727681,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.045923753585987e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 304915806.0,
"step": 2760
},
{
"epoch": 1.5524985962942166,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.034925295652936e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 305466950.0,
"step": 2765
},
{
"epoch": 1.5553060078607523,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 6.023928283353163e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 306017854.0,
"step": 2770
},
{
"epoch": 1.558113419427288,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 6.0129328129924395e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 306565847.0,
"step": 2775
},
{
"epoch": 1.5609208309938238,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 6.001938980863035e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 307117825.0,
"step": 2780
},
{
"epoch": 1.5637282425603594,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.990946883242872e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 307671452.0,
"step": 2785
},
{
"epoch": 1.5665356541268949,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 5.979956616394685e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 308220590.0,
"step": 2790
},
{
"epoch": 1.5693430656934306,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 5.9689682765651705e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 308779145.0,
"step": 2795
},
{
"epoch": 1.5721504772599664,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.9579819599841534e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 309326580.0,
"step": 2800
},
{
"epoch": 1.5749578888265021,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.94699776286374e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 309877499.0,
"step": 2805
},
{
"epoch": 1.5777653003930376,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.9360157813974725e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 310430339.0,
"step": 2810
},
{
"epoch": 1.5805727119595732,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.9250361117594944e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 310984529.0,
"step": 2815
},
{
"epoch": 1.583380123526109,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.914058850103699e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 311538779.0,
"step": 2820
},
{
"epoch": 1.5861875350926447,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.9030840925628945e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 312095458.0,
"step": 2825
},
{
"epoch": 1.5889949466591804,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.892111935247957e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 312642688.0,
"step": 2830
},
{
"epoch": 1.591802358225716,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.881142474246995e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 313191732.0,
"step": 2835
},
{
"epoch": 1.5946097697922514,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.8701758056245006e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 313739280.0,
"step": 2840
},
{
"epoch": 1.5974171813587872,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.8592120254205144e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 314288851.0,
"step": 2845
},
{
"epoch": 1.600224592925323,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.8482512296497785e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 314838453.0,
"step": 2850
},
{
"epoch": 1.6030320044918585,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.837293514300903e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 315386608.0,
"step": 2855
},
{
"epoch": 1.6058394160583942,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.826338975335519e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 315936204.0,
"step": 2860
},
{
"epoch": 1.6086468276249297,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.81538770868744e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 316478496.0,
"step": 2865
},
{
"epoch": 1.6114542391914655,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.804439810261824e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 317035720.0,
"step": 2870
},
{
"epoch": 1.6142616507580012,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.7934953759343324e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 317586504.0,
"step": 2875
},
{
"epoch": 1.6170690623245367,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.782554501550286e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 318141325.0,
"step": 2880
},
{
"epoch": 1.6198764738910725,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.7716172829238355e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 318689676.0,
"step": 2885
},
{
"epoch": 1.622683885457608,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.760683815837112e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 319241350.0,
"step": 2890
},
{
"epoch": 1.6254912970241437,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.749754196039396e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 319790792.0,
"step": 2895
},
{
"epoch": 1.6282987085906795,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.738828519246271e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 320343884.0,
"step": 2900
},
{
"epoch": 1.631106120157215,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.727906881138793e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 320898677.0,
"step": 2905
},
{
"epoch": 1.6339135317237508,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.716989377362651e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 321450248.0,
"step": 2910
},
{
"epoch": 1.6367209432902863,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.706076103527323e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 322005047.0,
"step": 2915
},
{
"epoch": 1.639528354856822,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.6951671552052476e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 322555438.0,
"step": 2920
},
{
"epoch": 1.6423357664233578,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.684262627930982e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 323106990.0,
"step": 2925
},
{
"epoch": 1.6451431779898933,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.673362617200365e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 323653012.0,
"step": 2930
},
{
"epoch": 1.6479505895564288,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.6624672184696846e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 324209101.0,
"step": 2935
},
{
"epoch": 1.6507580011229646,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.651576527154836e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 324759021.0,
"step": 2940
},
{
"epoch": 1.6535654126895003,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.640690638630491e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 325307331.0,
"step": 2945
},
{
"epoch": 1.656372824256036,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.629809648229262e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 325864942.0,
"step": 2950
},
{
"epoch": 1.6591802358225716,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.618933651240866e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 326418194.0,
"step": 2955
},
{
"epoch": 1.661987647389107,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.608062742911291e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 326967964.0,
"step": 2960
},
{
"epoch": 1.6647950589556428,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.597197018441958e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 327521938.0,
"step": 2965
},
{
"epoch": 1.6676024705221786,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.586336572988896e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 328081736.0,
"step": 2970
},
{
"epoch": 1.6704098820887143,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.5754815016618974e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 328633509.0,
"step": 2975
},
{
"epoch": 1.6732172936552498,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 5.564631899523696e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 329189101.0,
"step": 2980
},
{
"epoch": 1.6760247052217854,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.5537878615891265e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 329744365.0,
"step": 2985
},
{
"epoch": 1.6788321167883211,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.542949482824298e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 330300716.0,
"step": 2990
},
{
"epoch": 1.6816395283548569,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 5.5321168581457565e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 330854482.0,
"step": 2995
},
{
"epoch": 1.6844469399213926,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.521290082419658e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 331408338.0,
"step": 3000
},
{
"epoch": 1.6872543514879281,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.510469250460936e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 331966602.0,
"step": 3005
},
{
"epoch": 1.6900617630544637,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.499654457032477e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 332518416.0,
"step": 3010
},
{
"epoch": 1.6928691746209994,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.488845796844277e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 333072716.0,
"step": 3015
},
{
"epoch": 1.6956765861875351,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.478043364552627e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 333623915.0,
"step": 3020
},
{
"epoch": 1.6984839977540709,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.467247254759275e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 334174562.0,
"step": 3025
},
{
"epoch": 1.7012914093206064,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.456457562010599e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 334731138.0,
"step": 3030
},
{
"epoch": 1.704098820887142,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.445674380796781e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 335289277.0,
"step": 3035
},
{
"epoch": 1.7069062324536777,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.4348978055509787e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 335837634.0,
"step": 3040
},
{
"epoch": 1.7097136440202134,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.4241279306484995e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 336393892.0,
"step": 3045
},
{
"epoch": 1.7125210555867492,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.413364850405972e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 336950326.0,
"step": 3050
},
{
"epoch": 1.7153284671532847,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 5.402608659080519e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 337494390.0,
"step": 3055
},
{
"epoch": 1.7181358787198202,
"grad_norm": 3.743171691894531e-05,
"learning_rate": 5.391859450868939e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 338046291.0,
"step": 3060
},
{
"epoch": 1.720943290286356,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.38111731990687e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 338598239.0,
"step": 3065
},
{
"epoch": 1.7237507018528917,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.370382360267973e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 339156514.0,
"step": 3070
},
{
"epoch": 1.7265581134194274,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 5.359654665963112e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 339711188.0,
"step": 3075
},
{
"epoch": 1.729365524985963,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 5.348934330939518e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 340262751.0,
"step": 3080
},
{
"epoch": 1.7321729365524985,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.338221449079979e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 340818765.0,
"step": 3085
},
{
"epoch": 1.7349803481190342,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.327516114202007e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 341370966.0,
"step": 3090
},
{
"epoch": 1.73778775968557,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 5.316818420057026e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 341925735.0,
"step": 3095
},
{
"epoch": 1.7405951712521057,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.306128460329545e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 342480144.0,
"step": 3100
},
{
"epoch": 1.7434025828186412,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.295446328636339e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 343034774.0,
"step": 3105
},
{
"epoch": 1.7462099943851768,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.28477211852563e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 343589175.0,
"step": 3110
},
{
"epoch": 1.7490174059517125,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.274105923476266e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 344134755.0,
"step": 3115
},
{
"epoch": 1.7518248175182483,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.263447836896906e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 344697282.0,
"step": 3120
},
{
"epoch": 1.7546322290847838,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.2527979521251985e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 345253230.0,
"step": 3125
},
{
"epoch": 1.7574396406513195,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.242156362426959e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 345807371.0,
"step": 3130
},
{
"epoch": 1.760247052217855,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.2315231609953726e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 346360453.0,
"step": 3135
},
{
"epoch": 1.7630544637843908,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.2208984409501525e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 346909029.0,
"step": 3140
},
{
"epoch": 1.7658618753509265,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.210282295336742e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 347469332.0,
"step": 3145
},
{
"epoch": 1.768669286917462,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.199674817125492e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 348017936.0,
"step": 3150
},
{
"epoch": 1.7714766984839978,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 5.189076099210852e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 348573678.0,
"step": 3155
},
{
"epoch": 1.7742841100505333,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 5.1784862344105504e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 349130130.0,
"step": 3160
},
{
"epoch": 1.777091521617069,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.1679053154647877e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 349678519.0,
"step": 3165
},
{
"epoch": 1.7798989331836048,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.157333435035418e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 350234527.0,
"step": 3170
},
{
"epoch": 1.7827063447501403,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.1467706857051455e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 350787725.0,
"step": 3175
},
{
"epoch": 1.7855137563166759,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.1362171599767064e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 351340627.0,
"step": 3180
},
{
"epoch": 1.7883211678832116,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.1256729502720627e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 351897843.0,
"step": 3185
},
{
"epoch": 1.7911285794497473,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.115138148931589e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 352447744.0,
"step": 3190
},
{
"epoch": 1.793935991016283,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.104612848213272e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 352995330.0,
"step": 3195
},
{
"epoch": 1.7967434025828186,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.094097140291892e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 353549685.0,
"step": 3200
},
{
"epoch": 1.7995508141493541,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 5.083591117258226e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 354103367.0,
"step": 3205
},
{
"epoch": 1.8023582257158899,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.0730948711182304e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 354648418.0,
"step": 3210
},
{
"epoch": 1.8051656372824256,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 5.0626084937922445e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 355196624.0,
"step": 3215
},
{
"epoch": 1.8079730488489614,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.052132077114181e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 355750522.0,
"step": 3220
},
{
"epoch": 1.810780460415497,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 5.041665712830725e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 356307216.0,
"step": 3225
},
{
"epoch": 1.8135878719820324,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 5.031209492600523e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 356857378.0,
"step": 3230
},
{
"epoch": 1.8163952835485682,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 5.0207635079933916e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 357404700.0,
"step": 3235
},
{
"epoch": 1.819202695115104,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 5.010327850489505e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 357952668.0,
"step": 3240
},
{
"epoch": 1.8220101066816397,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.9999026114785986e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 358503357.0,
"step": 3245
},
{
"epoch": 1.8248175182481752,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.9894878822591726e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 359056222.0,
"step": 3250
},
{
"epoch": 1.8276249298147107,
"grad_norm": 3.552436828613281e-05,
"learning_rate": 4.979083754037683e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 359613705.0,
"step": 3255
},
{
"epoch": 1.8304323413812464,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.9686903179277513e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 360174863.0,
"step": 3260
},
{
"epoch": 1.8332397529477822,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.9583076649493626e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 360725928.0,
"step": 3265
},
{
"epoch": 1.836047164514318,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.947935886028069e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 361277355.0,
"step": 3270
},
{
"epoch": 1.8388545760808535,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.937575071994194e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 361828968.0,
"step": 3275
},
{
"epoch": 1.841661987647389,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.927225313582036e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 362377333.0,
"step": 3280
},
{
"epoch": 1.8444693992139247,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.916886701429075e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 362927237.0,
"step": 3285
},
{
"epoch": 1.8472768107804605,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.906559326075178e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 363485255.0,
"step": 3290
},
{
"epoch": 1.8500842223469962,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.896243277961806e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 364043415.0,
"step": 3295
},
{
"epoch": 1.8528916339135317,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.885938647431222e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 364596780.0,
"step": 3300
},
{
"epoch": 1.8556990454800673,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.875645524725702e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 365147209.0,
"step": 3305
},
{
"epoch": 1.858506457046603,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.865363999986741e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 365707178.0,
"step": 3310
},
{
"epoch": 1.8613138686131387,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.855094163254265e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 366266619.0,
"step": 3315
},
{
"epoch": 1.8641212801796745,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.844836104465845e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 366819911.0,
"step": 3320
},
{
"epoch": 1.86692869174621,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.8345899134559056e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 367367717.0,
"step": 3325
},
{
"epoch": 1.8697361033127455,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.824355679954939e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 367919977.0,
"step": 3330
},
{
"epoch": 1.8725435148792813,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.8141334935887245e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 368469172.0,
"step": 3335
},
{
"epoch": 1.875350926445817,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.803923443877533e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 369023251.0,
"step": 3340
},
{
"epoch": 1.8781583380123528,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.793725620235354e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 369575387.0,
"step": 3345
},
{
"epoch": 1.8809657495788883,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.7835401119691063e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 370124260.0,
"step": 3350
},
{
"epoch": 1.8837731611454238,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.773367008277857e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 370681760.0,
"step": 3355
},
{
"epoch": 1.8865805727119596,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 4.763206398252041e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 371233812.0,
"step": 3360
},
{
"epoch": 1.8893879842784953,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.7530583708726814e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 371786264.0,
"step": 3365
},
{
"epoch": 1.892195395845031,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.742923015010608e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 372340592.0,
"step": 3370
},
{
"epoch": 1.8950028074115666,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.7328004194256824e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 372901718.0,
"step": 3375
},
{
"epoch": 1.897810218978102,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.722690672766016e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 373459814.0,
"step": 3380
},
{
"epoch": 1.9006176305446378,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.712593863567198e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 374012660.0,
"step": 3385
},
{
"epoch": 1.9034250421111736,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 4.702510080251521e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 374563720.0,
"step": 3390
},
{
"epoch": 1.906232453677709,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 4.692439411127199e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 375120157.0,
"step": 3395
},
{
"epoch": 1.9090398652442448,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.682381944387602e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 375674586.0,
"step": 3400
},
{
"epoch": 1.9118472768107804,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.672337768110481e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 376225975.0,
"step": 3405
},
{
"epoch": 1.9146546883773161,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.662306970257195e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 376775438.0,
"step": 3410
},
{
"epoch": 1.9174620999438519,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.6522896386719435e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 377330791.0,
"step": 3415
},
{
"epoch": 1.9202695115103874,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.642285861080997e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 377881155.0,
"step": 3420
},
{
"epoch": 1.9230769230769231,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.632295725091922e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 378428703.0,
"step": 3425
},
{
"epoch": 1.9258843346434587,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.6223193181928266e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 378981316.0,
"step": 3430
},
{
"epoch": 1.9286917462099944,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.612356727751581e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 379532283.0,
"step": 3435
},
{
"epoch": 1.9314991577765301,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.602408041015065e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 380085586.0,
"step": 3440
},
{
"epoch": 1.9343065693430657,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.59247334510839e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 380641970.0,
"step": 3445
},
{
"epoch": 1.9371139809096012,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.5825527270341506e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 381190156.0,
"step": 3450
},
{
"epoch": 1.939921392476137,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.572646273671649e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 381739880.0,
"step": 3455
},
{
"epoch": 1.9427288040426727,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.562754071776145e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 382287862.0,
"step": 3460
},
{
"epoch": 1.9455362156092084,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.5528762079780894e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 382839030.0,
"step": 3465
},
{
"epoch": 1.948343627175744,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.543012768782372e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 383395963.0,
"step": 3470
},
{
"epoch": 1.9511510387422795,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.533163840567553e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 383948951.0,
"step": 3475
},
{
"epoch": 1.9539584503088152,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.523329509585121e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 384502145.0,
"step": 3480
},
{
"epoch": 1.956765861875351,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.5135098619587235e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 385059056.0,
"step": 3485
},
{
"epoch": 1.9595732734418867,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.503704983683424e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 385610430.0,
"step": 3490
},
{
"epoch": 1.9623806850084222,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.493914960624941e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 386163521.0,
"step": 3495
},
{
"epoch": 1.9651880965749577,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.484139878518903e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 386717202.0,
"step": 3500
},
{
"epoch": 1.9679955081414935,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.474379822970086e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 387270797.0,
"step": 3505
},
{
"epoch": 1.9708029197080292,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.464634879451685e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 387825786.0,
"step": 3510
},
{
"epoch": 1.973610331274565,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.454905133304538e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 388381012.0,
"step": 3515
},
{
"epoch": 1.9764177428411005,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.445190669736402e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 388934798.0,
"step": 3520
},
{
"epoch": 1.979225154407636,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.435491573821194e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 389492165.0,
"step": 3525
},
{
"epoch": 1.9820325659741718,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.425807930498249e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 390045964.0,
"step": 3530
},
{
"epoch": 1.9848399775407075,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.41613982457158e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 390596385.0,
"step": 3535
},
{
"epoch": 1.9876473891072433,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.406487340709128e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 391153904.0,
"step": 3540
},
{
"epoch": 1.9904548006737788,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.3968505634420296e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 391703027.0,
"step": 3545
},
{
"epoch": 1.9932622122403143,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.387229577163864e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 392251032.0,
"step": 3550
},
{
"epoch": 1.99606962380685,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.377624466129931e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 392796150.0,
"step": 3555
},
{
"epoch": 1.9988770353733858,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.368035314456496e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 393342272.0,
"step": 3560
},
{
"epoch": 2.0016844469399215,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.358462206120066e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 393855386.0,
"step": 3565
},
{
"epoch": 2.004491858506457,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.348905224956645e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 394415587.0,
"step": 3570
},
{
"epoch": 2.0072992700729926,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 4.339364454661011e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 394974360.0,
"step": 3575
},
{
"epoch": 2.0101066816395283,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 4.329839978785966e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 395525472.0,
"step": 3580
},
{
"epoch": 2.012914093206064,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.320331880741626e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 396074688.0,
"step": 3585
},
{
"epoch": 2.0157215047726,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.310840243794667e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 396626100.0,
"step": 3590
},
{
"epoch": 2.018528916339135,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 4.30136515106762e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 397182264.0,
"step": 3595
},
{
"epoch": 2.021336327905671,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 4.29190668553812e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 397736977.0,
"step": 3600
},
{
"epoch": 2.0241437394722066,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.2824649300382e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 398291111.0,
"step": 3605
},
{
"epoch": 2.0269511510387423,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.273039967253544e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 398843183.0,
"step": 3610
},
{
"epoch": 2.029758562605278,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.263631879722787e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 399400041.0,
"step": 3615
},
{
"epoch": 2.0325659741718134,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 4.254240749836771e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 399954035.0,
"step": 3620
},
{
"epoch": 2.035373385738349,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.244866659837838e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 400511951.0,
"step": 3625
},
{
"epoch": 2.038180797304885,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.235509691819098e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 401060249.0,
"step": 3630
},
{
"epoch": 2.0409882088714206,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.2261699277237244e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 401604794.0,
"step": 3635
},
{
"epoch": 2.0437956204379564,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.21684744934422e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 402161143.0,
"step": 3640
},
{
"epoch": 2.0466030320044917,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.207542338321714e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 402708815.0,
"step": 3645
},
{
"epoch": 2.0494104435710274,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 4.198254676145238e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 403255060.0,
"step": 3650
},
{
"epoch": 2.052217855137563,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.1889845441510214e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 403812590.0,
"step": 3655
},
{
"epoch": 2.055025266704099,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.179732023521768e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 404367889.0,
"step": 3660
},
{
"epoch": 2.0578326782706347,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.170497195285955e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 404923862.0,
"step": 3665
},
{
"epoch": 2.06064008983717,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.1612801403171195e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 405475338.0,
"step": 3670
},
{
"epoch": 2.0634475014037057,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.1520809393331454e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 406028058.0,
"step": 3675
},
{
"epoch": 2.0662549129702414,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.142899672895568e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 406578786.0,
"step": 3680
},
{
"epoch": 2.069062324536777,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.1337364214088556e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 407132846.0,
"step": 3685
},
{
"epoch": 2.071869736103313,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 4.124591265119717e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 407682770.0,
"step": 3690
},
{
"epoch": 2.0746771476698482,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.1154642841163885e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 408230483.0,
"step": 3695
},
{
"epoch": 2.077484559236384,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.106355558327942e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 408781964.0,
"step": 3700
},
{
"epoch": 2.0802919708029197,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.097265167523573e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 409337735.0,
"step": 3705
},
{
"epoch": 2.0830993823694555,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 4.088193191311917e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 409891184.0,
"step": 3710
},
{
"epoch": 2.085906793935991,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.0791397091403416e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 410443472.0,
"step": 3715
},
{
"epoch": 2.0887142055025265,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.070104800294253e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 410994454.0,
"step": 3720
},
{
"epoch": 2.0915216170690623,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 4.061088543896403e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 411549641.0,
"step": 3725
},
{
"epoch": 2.094329028635598,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.052091018906196e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 412093816.0,
"step": 3730
},
{
"epoch": 2.0971364402021337,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 4.0431123041189955e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 412641540.0,
"step": 3735
},
{
"epoch": 2.0999438517686695,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.034152478165441e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 413187544.0,
"step": 3740
},
{
"epoch": 2.102751263335205,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 4.025211619510744e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 413743197.0,
"step": 3745
},
{
"epoch": 2.1055586749017405,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.016289806454021e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 414292096.0,
"step": 3750
},
{
"epoch": 2.1083660864682763,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 4.00738711712759e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 414844835.0,
"step": 3755
},
{
"epoch": 2.111173498034812,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.998503629496302e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 415395689.0,
"step": 3760
},
{
"epoch": 2.1139809096013478,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.989639421356841e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 415945234.0,
"step": 3765
},
{
"epoch": 2.116788321167883,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.9807945703370595e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 416502352.0,
"step": 3770
},
{
"epoch": 2.119595732734419,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.971969153895285e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 417056171.0,
"step": 3775
},
{
"epoch": 2.1224031443009546,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 3.963163249319653e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 417606441.0,
"step": 3780
},
{
"epoch": 2.1252105558674903,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.95437693372742e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 418165002.0,
"step": 3785
},
{
"epoch": 2.128017967434026,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.9456102840642973e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 418723608.0,
"step": 3790
},
{
"epoch": 2.1308253790005613,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.9368633771037685e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 419270681.0,
"step": 3795
},
{
"epoch": 2.133632790567097,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.928136289446426e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 419819889.0,
"step": 3800
},
{
"epoch": 2.136440202133633,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.919429097519291e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 420375837.0,
"step": 3805
},
{
"epoch": 2.1392476137001686,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.9107418775751554e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 420924439.0,
"step": 3810
},
{
"epoch": 2.1420550252667043,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.902074705691898e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 421479805.0,
"step": 3815
},
{
"epoch": 2.1448624368332396,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.8934276577718387e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 422037428.0,
"step": 3820
},
{
"epoch": 2.1476698483997754,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.884800809541053e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 422591274.0,
"step": 3825
},
{
"epoch": 2.150477259966311,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.8761942365487236e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 423148403.0,
"step": 3830
},
{
"epoch": 2.153284671532847,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.8676080141664755e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 423700476.0,
"step": 3835
},
{
"epoch": 2.156092083099382,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.859042217587709e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 424253758.0,
"step": 3840
},
{
"epoch": 2.158899494665918,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.8504969218269515e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 424804087.0,
"step": 3845
},
{
"epoch": 2.1617069062324537,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.8419722017191924e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 425365636.0,
"step": 3850
},
{
"epoch": 2.1645143177989894,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.8334681319192324e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 425920145.0,
"step": 3855
},
{
"epoch": 2.167321729365525,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.824984786901027e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 426462062.0,
"step": 3860
},
{
"epoch": 2.1701291409320604,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.8165222409570366e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 427011236.0,
"step": 3865
},
{
"epoch": 2.172936552498596,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.808080568197574e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 427564650.0,
"step": 3870
},
{
"epoch": 2.175743964065132,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.799659842550158e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 428110369.0,
"step": 3875
},
{
"epoch": 2.1785513756316677,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.791260137758859e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 428657566.0,
"step": 3880
},
{
"epoch": 2.1813587871982034,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.782881527383666e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 429208293.0,
"step": 3885
},
{
"epoch": 2.1841661987647387,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.774524084799829e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 429764935.0,
"step": 3890
},
{
"epoch": 2.1869736103312745,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 3.766187883197224e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 430313872.0,
"step": 3895
},
{
"epoch": 2.18978102189781,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.757872995579709e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 430868907.0,
"step": 3900
},
{
"epoch": 2.192588433464346,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.749579494764489e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 431419983.0,
"step": 3905
},
{
"epoch": 2.1953958450308817,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.74130745338147e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 431974999.0,
"step": 3910
},
{
"epoch": 2.198203256597417,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.733056943872636e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 432530614.0,
"step": 3915
},
{
"epoch": 2.2010106681639527,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.724828038491397e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 433082665.0,
"step": 3920
},
{
"epoch": 2.2038180797304885,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.7166208093019734e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 433636089.0,
"step": 3925
},
{
"epoch": 2.2066254912970242,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.708435328178753e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 434185821.0,
"step": 3930
},
{
"epoch": 2.20943290286356,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.7002716668056716e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 434740820.0,
"step": 3935
},
{
"epoch": 2.2122403144300953,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.692129896675571e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 435291751.0,
"step": 3940
},
{
"epoch": 2.215047725996631,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.684010089089591e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 435843376.0,
"step": 3945
},
{
"epoch": 2.2178551375631668,
"grad_norm": 3.552436828613281e-05,
"learning_rate": 3.6759123151565285e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 436392388.0,
"step": 3950
},
{
"epoch": 2.2206625491297025,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.6678366457922266e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 436941750.0,
"step": 3955
},
{
"epoch": 2.2234699606962383,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.6597831517189436e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 437496915.0,
"step": 3960
},
{
"epoch": 2.2262773722627736,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.651751903464745e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 438052514.0,
"step": 3965
},
{
"epoch": 2.2290847838293093,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.643742971362873e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 438608859.0,
"step": 3970
},
{
"epoch": 2.231892195395845,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.635756425551144e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 439161038.0,
"step": 3975
},
{
"epoch": 2.234699606962381,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.6277923359713226e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 439715521.0,
"step": 3980
},
{
"epoch": 2.2375070185289165,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.619850772368516e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 440270663.0,
"step": 3985
},
{
"epoch": 2.240314430095452,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.6119318042905615e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 440820622.0,
"step": 3990
},
{
"epoch": 2.2431218416619876,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.604035501087416e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 441372621.0,
"step": 3995
},
{
"epoch": 2.2459292532285233,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.5961619319105524e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 441922822.0,
"step": 4000
},
{
"epoch": 2.248736664795059,
"grad_norm": 3.4332275390625e-05,
"learning_rate": 3.588311165712346e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 442468147.0,
"step": 4005
},
{
"epoch": 2.2515440763615944,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.580483271245486e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 443021043.0,
"step": 4010
},
{
"epoch": 2.25435148792813,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.5726783170623544e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 443568908.0,
"step": 4015
},
{
"epoch": 2.257158899494666,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.564896371514439e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 444118978.0,
"step": 4020
},
{
"epoch": 2.2599663110612016,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.557137502751728e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 444672648.0,
"step": 4025
},
{
"epoch": 2.2627737226277373,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.549401778722121e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 445220135.0,
"step": 4030
},
{
"epoch": 2.2655811341942727,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.541689267170821e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 445769695.0,
"step": 4035
},
{
"epoch": 2.2683885457608084,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.534000035639757e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 446322562.0,
"step": 4040
},
{
"epoch": 2.271195957327344,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.5263341514669786e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 446875618.0,
"step": 4045
},
{
"epoch": 2.27400336889388,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.518691681786076e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 447424094.0,
"step": 4050
},
{
"epoch": 2.2768107804604156,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.511072693525584e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 447970798.0,
"step": 4055
},
{
"epoch": 2.279618192026951,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.503477253408406e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 448530070.0,
"step": 4060
},
{
"epoch": 2.2824256035934867,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.495905427951216e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 449080883.0,
"step": 4065
},
{
"epoch": 2.2852330151600224,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.488357283463892e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 449631606.0,
"step": 4070
},
{
"epoch": 2.288040426726558,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.480832886048919e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 450180177.0,
"step": 4075
},
{
"epoch": 2.290847838293094,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.473332301600827e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 450734393.0,
"step": 4080
},
{
"epoch": 2.293655249859629,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.465855595805597e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 451286197.0,
"step": 4085
},
{
"epoch": 2.296462661426165,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.458402834140099e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 451840444.0,
"step": 4090
},
{
"epoch": 2.2992700729927007,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.450974081871512e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 452396502.0,
"step": 4095
},
{
"epoch": 2.3020774845592364,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.4435694040567535e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 452947008.0,
"step": 4100
},
{
"epoch": 2.304884896125772,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.43618886554191e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 453507120.0,
"step": 4105
},
{
"epoch": 2.3076923076923075,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.428832530961672e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 454056290.0,
"step": 4110
},
{
"epoch": 2.3104997192588432,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.42150046473876e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 454610107.0,
"step": 4115
},
{
"epoch": 2.313307130825379,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.414192731083373e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 455163278.0,
"step": 4120
},
{
"epoch": 2.3161145423919147,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.4069093939926105e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 455708115.0,
"step": 4125
},
{
"epoch": 2.3189219539584505,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.399650517249926e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 456254633.0,
"step": 4130
},
{
"epoch": 2.3217293655249858,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.3924161644245626e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 456804402.0,
"step": 4135
},
{
"epoch": 2.3245367770915215,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 3.3852063988709934e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 457356066.0,
"step": 4140
},
{
"epoch": 2.3273441886580573,
"grad_norm": 3.719329833984375e-05,
"learning_rate": 3.378021283728372e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 457909549.0,
"step": 4145
},
{
"epoch": 2.330151600224593,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 3.3708608819199756e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 458462908.0,
"step": 4150
},
{
"epoch": 2.3329590117911287,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 3.363725256152659e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 459019310.0,
"step": 4155
},
{
"epoch": 2.335766423357664,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.3566144689162964e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 459569856.0,
"step": 4160
},
{
"epoch": 2.3385738349242,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.349528582483247e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 460122283.0,
"step": 4165
},
{
"epoch": 2.3413812464907355,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.342467658907796e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 460671252.0,
"step": 4170
},
{
"epoch": 2.3441886580572713,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.3354317600256214e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 461224519.0,
"step": 4175
},
{
"epoch": 2.346996069623807,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.328420947453246e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 461774292.0,
"step": 4180
},
{
"epoch": 2.3498034811903423,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.321435282587506e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 462324971.0,
"step": 4185
},
{
"epoch": 2.352610892756878,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.314474826605e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 462882605.0,
"step": 4190
},
{
"epoch": 2.355418304323414,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.307539640461568e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 463434314.0,
"step": 4195
},
{
"epoch": 2.3582257158899496,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.300629784891745e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 463983117.0,
"step": 4200
},
{
"epoch": 2.3610331274564853,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.2937453204082417e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 464531840.0,
"step": 4205
},
{
"epoch": 2.3638405390230206,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.286886307301399e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 465086777.0,
"step": 4210
},
{
"epoch": 2.3666479505895563,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.280052805638677e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 465637200.0,
"step": 4215
},
{
"epoch": 2.369455362156092,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.273244875264113e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 466187734.0,
"step": 4220
},
{
"epoch": 2.372262773722628,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.266462575797813e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 466738928.0,
"step": 4225
},
{
"epoch": 2.3750701852891636,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.259705966635416e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 467293894.0,
"step": 4230
},
{
"epoch": 2.377877596855699,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.252975106947581e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 467844924.0,
"step": 4235
},
{
"epoch": 2.3806850084222346,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.2462700556794665e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 468406343.0,
"step": 4240
},
{
"epoch": 2.3834924199887704,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.239590871550217e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 468955243.0,
"step": 4245
},
{
"epoch": 2.386299831555306,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.2329376130524454e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 469510244.0,
"step": 4250
},
{
"epoch": 2.389107243121842,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.226310338451722e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 470065469.0,
"step": 4255
},
{
"epoch": 2.391914654688377,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.2197091057860664e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 470621191.0,
"step": 4260
},
{
"epoch": 2.394722066254913,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.213133972865434e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 471173960.0,
"step": 4265
},
{
"epoch": 2.3975294778214487,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.206584997271219e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 471721395.0,
"step": 4270
},
{
"epoch": 2.4003368893879844,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.2000622363557336e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 472271956.0,
"step": 4275
},
{
"epoch": 2.40314430095452,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.193565747241729e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 472820362.0,
"step": 4280
},
{
"epoch": 2.4059517125210554,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.187095586821872e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 473370960.0,
"step": 4285
},
{
"epoch": 2.408759124087591,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.1806518117582644e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 473922648.0,
"step": 4290
},
{
"epoch": 2.411566535654127,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.174234478481934e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 474470657.0,
"step": 4295
},
{
"epoch": 2.4143739472206627,
"grad_norm": 3.409385681152344e-05,
"learning_rate": 3.167843643192352e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 475021208.0,
"step": 4300
},
{
"epoch": 2.4171813587871984,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.161479361856928e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 475572371.0,
"step": 4305
},
{
"epoch": 2.4199887703537337,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.155141690210532e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 476129454.0,
"step": 4310
},
{
"epoch": 2.4227961819202695,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.148830683754998e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 476684137.0,
"step": 4315
},
{
"epoch": 2.425603593486805,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.1425463977586445e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 477240244.0,
"step": 4320
},
{
"epoch": 2.428411005053341,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.136288887255781e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 477787923.0,
"step": 4325
},
{
"epoch": 2.4312184166198767,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.13005820704624e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 478337856.0,
"step": 4330
},
{
"epoch": 2.434025828186412,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.12385441169488e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 478892564.0,
"step": 4335
},
{
"epoch": 2.4368332397529477,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.117677555531126e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 479444225.0,
"step": 4340
},
{
"epoch": 2.4396406513194835,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.111527692648475e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 480002116.0,
"step": 4345
},
{
"epoch": 2.4424480628860192,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.1054048769040406e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 480542313.0,
"step": 4350
},
{
"epoch": 2.445255474452555,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.099309161918066e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 481093218.0,
"step": 4355
},
{
"epoch": 2.4480628860190903,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 3.093240601073465e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 481638497.0,
"step": 4360
},
{
"epoch": 2.450870297585626,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.087199247515347e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 482191203.0,
"step": 4365
},
{
"epoch": 2.4536777091521618,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.081185154150558e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 482744679.0,
"step": 4370
},
{
"epoch": 2.4564851207186975,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.075198373647212e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 483297916.0,
"step": 4375
},
{
"epoch": 2.4592925322852333,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.069238958434235e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 483853225.0,
"step": 4380
},
{
"epoch": 2.4620999438517686,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.063306960700897e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 484401633.0,
"step": 4385
},
{
"epoch": 2.4649073554183043,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.0574024323963676e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 484958826.0,
"step": 4390
},
{
"epoch": 2.46771476698484,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 3.0515254252292517e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 485519579.0,
"step": 4395
},
{
"epoch": 2.470522178551376,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 3.045675990667137e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 486074867.0,
"step": 4400
},
{
"epoch": 2.473329590117911,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.039854179936149e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 486622230.0,
"step": 4405
},
{
"epoch": 2.476137001684447,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.0340600440204953e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 487168498.0,
"step": 4410
},
{
"epoch": 2.4789444132509826,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.0282936336620264e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 487722028.0,
"step": 4415
},
{
"epoch": 2.4817518248175183,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 3.0225549993597855e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 488285750.0,
"step": 4420
},
{
"epoch": 2.484559236384054,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 3.01684419136957e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 488839075.0,
"step": 4425
},
{
"epoch": 2.4873666479505894,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 3.0111612597034867e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 489389237.0,
"step": 4430
},
{
"epoch": 2.490174059517125,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 3.0055062541295205e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 489939543.0,
"step": 4435
},
{
"epoch": 2.492981471083661,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.999879224171091e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 490498563.0,
"step": 4440
},
{
"epoch": 2.4957888826501966,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.994280219106629e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 491054881.0,
"step": 4445
},
{
"epoch": 2.4985962942167323,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.9887092879691318e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 491610500.0,
"step": 4450
},
{
"epoch": 2.501403705783268,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.9831664795457446e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 492166822.0,
"step": 4455
},
{
"epoch": 2.5042111173498034,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.977651842377329e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 492718184.0,
"step": 4460
},
{
"epoch": 2.507018528916339,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.9721654247580383e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 493269291.0,
"step": 4465
},
{
"epoch": 2.509825940482875,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.9667072747348922e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 493824296.0,
"step": 4470
},
{
"epoch": 2.51263335204941,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.961277440107363e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 494379518.0,
"step": 4475
},
{
"epoch": 2.5154407636159464,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.9558759684269465e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 494935559.0,
"step": 4480
},
{
"epoch": 2.5182481751824817,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.9505029069967577e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 495490094.0,
"step": 4485
},
{
"epoch": 2.5210555867490174,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.945158302871104e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 496047710.0,
"step": 4490
},
{
"epoch": 2.523862998315553,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.9398422028550858e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 496596973.0,
"step": 4495
},
{
"epoch": 2.5266704098820885,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.934554653504175e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 497144172.0,
"step": 4500
},
{
"epoch": 2.529477821448624,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.9292957011238153e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 497698226.0,
"step": 4505
},
{
"epoch": 2.53228523301516,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.9240653917690116e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 498249131.0,
"step": 4510
},
{
"epoch": 2.5350926445816957,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.918863771243932e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 498798216.0,
"step": 4515
},
{
"epoch": 2.5379000561482314,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.913690885101503e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 499351434.0,
"step": 4520
},
{
"epoch": 2.5407074677147667,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.9085467786430075e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 499910114.0,
"step": 4525
},
{
"epoch": 2.5435148792813025,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.9034314969176974e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 500467379.0,
"step": 4530
},
{
"epoch": 2.5463222908478382,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.8983450847223893e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 501014665.0,
"step": 4535
},
{
"epoch": 2.549129702414374,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.8932875866010782e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 501570276.0,
"step": 4540
},
{
"epoch": 2.5519371139809097,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.888259046844544e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 502113861.0,
"step": 4545
},
{
"epoch": 2.554744525547445,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.8832595094899683e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 502671432.0,
"step": 4550
},
{
"epoch": 2.5575519371139808,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.878289018320542e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 503231399.0,
"step": 4555
},
{
"epoch": 2.5603593486805165,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.8733476168650887e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 503787894.0,
"step": 4560
},
{
"epoch": 2.5631667602470523,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.8684353483976765e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 504343672.0,
"step": 4565
},
{
"epoch": 2.565974171813588,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.8635522559372482e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 504899932.0,
"step": 4570
},
{
"epoch": 2.5687815833801233,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 2.858698382247234e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 505456280.0,
"step": 4575
},
{
"epoch": 2.571588994946659,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.853873769835185e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 506011141.0,
"step": 4580
},
{
"epoch": 2.574396406513195,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.8490784609523975e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 506563586.0,
"step": 4585
},
{
"epoch": 2.5772038180797305,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.8443124975935442e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 507120320.0,
"step": 4590
},
{
"epoch": 2.5800112296462663,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 2.8395759214963045e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 507675333.0,
"step": 4595
},
{
"epoch": 2.5828186412128016,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.8348687741410022e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 508225578.0,
"step": 4600
},
{
"epoch": 2.5856260527793373,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.8301910967502386e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 508779780.0,
"step": 4605
},
{
"epoch": 2.588433464345873,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.825542930288535e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 509334766.0,
"step": 4610
},
{
"epoch": 2.591240875912409,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.8209243154619703e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 509885085.0,
"step": 4615
},
{
"epoch": 2.5940482874789446,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.8163352927178284e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 510434191.0,
"step": 4620
},
{
"epoch": 2.59685569904548,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.81177590224424e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 510992007.0,
"step": 4625
},
{
"epoch": 2.5996631106120156,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.807246183969836e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 511535458.0,
"step": 4630
},
{
"epoch": 2.6024705221785513,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.80274617756339e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 512096261.0,
"step": 4635
},
{
"epoch": 2.605277933745087,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.79827592243348e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 512650203.0,
"step": 4640
},
{
"epoch": 2.608085345311623,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.793835457728136e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 513200923.0,
"step": 4645
},
{
"epoch": 2.610892756878158,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.789424822334499e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 513756141.0,
"step": 4650
},
{
"epoch": 2.613700168444694,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.785044054878485e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 514318777.0,
"step": 4655
},
{
"epoch": 2.6165075800112296,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.780693193724439e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 514877191.0,
"step": 4660
},
{
"epoch": 2.6193149915777654,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.7763722769748062e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 515423610.0,
"step": 4665
},
{
"epoch": 2.622122403144301,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.772081342469793e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 515970305.0,
"step": 4670
},
{
"epoch": 2.6249298147108364,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.76782042778704e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 516524519.0,
"step": 4675
},
{
"epoch": 2.627737226277372,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.7635895702412877e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 517071784.0,
"step": 4680
},
{
"epoch": 2.630544637843908,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.759388806884057e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 517625722.0,
"step": 4685
},
{
"epoch": 2.6333520494104437,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.7552181745033163e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 518181488.0,
"step": 4690
},
{
"epoch": 2.6361594609769794,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.7510777096231655e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 518735057.0,
"step": 4695
},
{
"epoch": 2.6389668725435147,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.7469674485035143e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 519284341.0,
"step": 4700
},
{
"epoch": 2.6417742841100504,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.7428874271397647e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 519844052.0,
"step": 4705
},
{
"epoch": 2.644581695676586,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.7388376812624932e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 520393755.0,
"step": 4710
},
{
"epoch": 2.647389107243122,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.7348182463371448e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 520939485.0,
"step": 4715
},
{
"epoch": 2.6501965188096577,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.7308291575637122e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 521490972.0,
"step": 4720
},
{
"epoch": 2.653003930376193,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.726870449876439e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 522046812.0,
"step": 4725
},
{
"epoch": 2.6558113419427287,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.7229421579435037e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 522599824.0,
"step": 4730
},
{
"epoch": 2.6586187535092645,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.719044316166723e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 523152229.0,
"step": 4735
},
{
"epoch": 2.6614261650758,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.7151769586812447e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 523710114.0,
"step": 4740
},
{
"epoch": 2.664233576642336,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.7113401193552564e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 524259379.0,
"step": 4745
},
{
"epoch": 2.6670409882088713,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.707533831789681e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 524811545.0,
"step": 4750
},
{
"epoch": 2.669848399775407,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.7037581293178877e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 525362716.0,
"step": 4755
},
{
"epoch": 2.6726558113419427,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.700013045005396e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 525914647.0,
"step": 4760
},
{
"epoch": 2.6754632229084785,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.696298611649593e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 526460363.0,
"step": 4765
},
{
"epoch": 2.6782706344750142,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.6926148617794374e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 527013619.0,
"step": 4770
},
{
"epoch": 2.6810780460415495,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.6889618276551795e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 527569667.0,
"step": 4775
},
{
"epoch": 2.6838854576080853,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.6853395412680797e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 528117310.0,
"step": 4780
},
{
"epoch": 2.686692869174621,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.6817480343401255e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 528672741.0,
"step": 4785
},
{
"epoch": 2.6895002807411568,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 2.6781873383237548e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 529231565.0,
"step": 4790
},
{
"epoch": 2.6923076923076925,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.6746574844015817e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 529784538.0,
"step": 4795
},
{
"epoch": 2.695115103874228,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.67115850348612e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999523162841,
"num_tokens": 530344021.0,
"step": 4800
},
{
"epoch": 2.6979225154407636,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6676904262195166e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 530898167.0,
"step": 4805
},
{
"epoch": 2.7007299270072993,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6642532829732803e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 531445249.0,
"step": 4810
},
{
"epoch": 2.703537338573835,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6608471038480187e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 532000652.0,
"step": 4815
},
{
"epoch": 2.706344750140371,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6574719186731696e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 532546443.0,
"step": 4820
},
{
"epoch": 2.709152161706906,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.6541277570067448e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 533100076.0,
"step": 4825
},
{
"epoch": 2.711959573273442,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.65081464813507e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 533648568.0,
"step": 4830
},
{
"epoch": 2.7147669848399776,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6475326210725255e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 534205768.0,
"step": 4835
},
{
"epoch": 2.7175743964065133,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6442817045612965e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 534761319.0,
"step": 4840
},
{
"epoch": 2.720381807973049,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.6410619270711157e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 535316032.0,
"step": 4845
},
{
"epoch": 2.7231892195395844,
"grad_norm": 3.4809112548828125e-05,
"learning_rate": 2.6378733167990227e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 535871594.0,
"step": 4850
},
{
"epoch": 2.72599663110612,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.6347159016691074e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 536425028.0,
"step": 4855
},
{
"epoch": 2.728804042672656,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.631589709332271e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 536980077.0,
"step": 4860
},
{
"epoch": 2.7316114542391916,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.6284947671659832e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 537532742.0,
"step": 4865
},
{
"epoch": 2.7344188658057273,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.625431102274042e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 538084924.0,
"step": 4870
},
{
"epoch": 2.7372262773722627,
"grad_norm": 3.337860107421875e-05,
"learning_rate": 2.622398741486335e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 538639412.0,
"step": 4875
},
{
"epoch": 2.7400336889387984,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6193977113586082e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 539191297.0,
"step": 4880
},
{
"epoch": 2.742841100505334,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.6164280381722277e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 539744336.0,
"step": 4885
},
{
"epoch": 2.74564851207187,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.613489747933956e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 540297982.0,
"step": 4890
},
{
"epoch": 2.7484559236384056,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.6105828663757183e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 540853227.0,
"step": 4895
},
{
"epoch": 2.751263335204941,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.6077074189543822e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 541409567.0,
"step": 4900
},
{
"epoch": 2.7540707467714767,
"grad_norm": 3.695487976074219e-05,
"learning_rate": 2.6048634308515305e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 541966124.0,
"step": 4905
},
{
"epoch": 2.7568781583380124,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.6020509269732445e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 542521286.0,
"step": 4910
},
{
"epoch": 2.759685569904548,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5992699319498815e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 543063982.0,
"step": 4915
},
{
"epoch": 2.762492981471084,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.5965204701358646e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 543618225.0,
"step": 4920
},
{
"epoch": 2.765300393037619,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.593802565609464e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 544171246.0,
"step": 4925
},
{
"epoch": 2.768107804604155,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5911162421725903e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 544710993.0,
"step": 4930
},
{
"epoch": 2.7709152161706907,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.588461523350583e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 545264957.0,
"step": 4935
},
{
"epoch": 2.7737226277372264,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.585838432392007e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 545817184.0,
"step": 4940
},
{
"epoch": 2.776530039303762,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.5832469922684454e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 546368057.0,
"step": 4945
},
{
"epoch": 2.7793374508702975,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.5806872256743047e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 546921971.0,
"step": 4950
},
{
"epoch": 2.7821448624368332,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.5781591550266094e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 547476877.0,
"step": 4955
},
{
"epoch": 2.784952274003369,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.5756628024648076e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 548027287.0,
"step": 4960
},
{
"epoch": 2.7877596855699043,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.57319818985058e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 548572683.0,
"step": 4965
},
{
"epoch": 2.7905670971364405,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.570765338767646e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 549127107.0,
"step": 4970
},
{
"epoch": 2.7933745087029758,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.568364270521573e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 549684823.0,
"step": 4975
},
{
"epoch": 2.7961819202695115,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.5659950061395948e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 550234676.0,
"step": 4980
},
{
"epoch": 2.7989893318360473,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5636575663704226e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 550779576.0,
"step": 4985
},
{
"epoch": 2.8017967434025826,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5613519716840653e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 551334644.0,
"step": 4990
},
{
"epoch": 2.8046041549691187,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.55907824227165e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 551881166.0,
"step": 4995
},
{
"epoch": 2.807411566535654,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.556836398045247e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 552430559.0,
"step": 5000
},
{
"epoch": 2.81021897810219,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.554626458637691e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 552987223.0,
"step": 5005
},
{
"epoch": 2.8130263896687255,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.552448443402414e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 553531494.0,
"step": 5010
},
{
"epoch": 2.815833801235261,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.550302371413273e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 554084782.0,
"step": 5015
},
{
"epoch": 2.8186412128017966,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.548188261464384e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 554632393.0,
"step": 5020
},
{
"epoch": 2.8214486243683323,
"grad_norm": 3.4809112548828125e-05,
"learning_rate": 2.5461061320699555e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 555190522.0,
"step": 5025
},
{
"epoch": 2.824256035934868,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.5440560014641302e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 555744351.0,
"step": 5030
},
{
"epoch": 2.827063447501404,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.542037887600822e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 556297519.0,
"step": 5035
},
{
"epoch": 2.829870859067939,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.5400518081535596e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 556845909.0,
"step": 5040
},
{
"epoch": 2.832678270634475,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5380977805153318e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 557400612.0,
"step": 5045
},
{
"epoch": 2.8354856822010106,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.5361758217984356e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 557957824.0,
"step": 5050
},
{
"epoch": 2.8382930937675463,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5342859488343268e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 558507879.0,
"step": 5055
},
{
"epoch": 2.841100505334082,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.5324281781734712e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 559053864.0,
"step": 5060
},
{
"epoch": 2.8439079169006174,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5306025260851995e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 559608757.0,
"step": 5065
},
{
"epoch": 2.846715328467153,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.528809008557567e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 560152508.0,
"step": 5070
},
{
"epoch": 2.849522740033689,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.527047641297212e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 560709603.0,
"step": 5075
},
{
"epoch": 2.8523301516002246,
"grad_norm": 3.62396240234375e-05,
"learning_rate": 2.5253184397292168e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 561264021.0,
"step": 5080
},
{
"epoch": 2.8551375631667604,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5236214189969777e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 561819249.0,
"step": 5085
},
{
"epoch": 2.8579449747332957,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.521956593962065e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 562363845.0,
"step": 5090
},
{
"epoch": 2.8607523862998314,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5203239792040996e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 562907165.0,
"step": 5095
},
{
"epoch": 2.863559797866367,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.518723589020622e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 563464021.0,
"step": 5100
},
{
"epoch": 2.866367209432903,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.517155437426968e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 564013299.0,
"step": 5105
},
{
"epoch": 2.8691746209994387,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5156195381561432e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 564569494.0,
"step": 5110
},
{
"epoch": 2.871982032565974,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5141159046587077e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 565127735.0,
"step": 5115
},
{
"epoch": 2.8747894441325097,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.5126445501026548e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 565678334.0,
"step": 5120
},
{
"epoch": 2.8775968556990454,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 2.5112054873732968e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 566229333.0,
"step": 5125
},
{
"epoch": 2.880404267265581,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5097987290731527e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 566783012.0,
"step": 5130
},
{
"epoch": 2.883211678832117,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.5084242875218346e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 567334001.0,
"step": 5135
},
{
"epoch": 2.8860190903986522,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.507082174755946e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 567888452.0,
"step": 5140
},
{
"epoch": 2.888826501965188,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.5057724025289695e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 568436097.0,
"step": 5145
},
{
"epoch": 2.8916339135317237,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.504494982311169e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 568987904.0,
"step": 5150
},
{
"epoch": 2.8944413250982595,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.5032499252894858e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 569539803.0,
"step": 5155
},
{
"epoch": 2.897248736664795,
"grad_norm": 3.4809112548828125e-05,
"learning_rate": 2.5020372423674444e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 570098733.0,
"step": 5160
},
{
"epoch": 2.9000561482313305,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.500856944165053e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 570646446.0,
"step": 5165
},
{
"epoch": 2.9028635597978663,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4997090410187124e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 571200890.0,
"step": 5170
},
{
"epoch": 2.905670971364402,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.498593542981125e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 571755411.0,
"step": 5175
},
{
"epoch": 2.9084783829309377,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4975104598212094e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 572309313.0,
"step": 5180
},
{
"epoch": 2.9112857944974735,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4964598010240096e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 572857278.0,
"step": 5185
},
{
"epoch": 2.914093206064009,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.4954415757906173e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 573405011.0,
"step": 5190
},
{
"epoch": 2.9169006176305445,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4944557930380868e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 573954973.0,
"step": 5195
},
{
"epoch": 2.9197080291970803,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.493502461399361e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 574501076.0,
"step": 5200
},
{
"epoch": 2.922515440763616,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.4925815892231925e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 575052705.0,
"step": 5205
},
{
"epoch": 2.9253228523301518,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 2.491693184574072e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 575606312.0,
"step": 5210
},
{
"epoch": 2.928130263896687,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.490837255232159e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 576159265.0,
"step": 5215
},
{
"epoch": 2.930937675463223,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.49001380869321e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 576711256.0,
"step": 5220
},
{
"epoch": 2.9337450870297586,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.4892228521685148e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 577257255.0,
"step": 5225
},
{
"epoch": 2.9365524985962943,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 2.4884643925848374e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 577811491.0,
"step": 5230
},
{
"epoch": 2.93935991016283,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.4877384365843467e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 578366927.0,
"step": 5235
},
{
"epoch": 2.9421673217293653,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4870449905245658e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 578922020.0,
"step": 5240
},
{
"epoch": 2.944974733295901,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 2.4863840604783134e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 579470485.0,
"step": 5245
},
{
"epoch": 2.947782144862437,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4857556522336498e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 580024767.0,
"step": 5250
},
{
"epoch": 2.9505895564289726,
"grad_norm": 3.600120544433594e-05,
"learning_rate": 2.485159771293829e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 580576669.0,
"step": 5255
},
{
"epoch": 2.9533969679955083,
"grad_norm": 3.528594970703125e-05,
"learning_rate": 2.4845964228772473e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 581130341.0,
"step": 5260
},
{
"epoch": 2.9562043795620436,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 2.4840656119173992e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 581687010.0,
"step": 5265
},
{
"epoch": 2.9590117911285794,
"grad_norm": 3.504753112792969e-05,
"learning_rate": 2.483567343062836e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999642372132,
"num_tokens": 582240438.0,
"step": 5270
},
{
"epoch": 2.961819202695115,
"grad_norm": 3.4332275390625e-05,
"learning_rate": 2.4831016206771202e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 582806082.0,
"step": 5275
},
{
"epoch": 2.964626614261651,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4826684488387922e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 583363300.0,
"step": 5280
},
{
"epoch": 2.9674340258281866,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.4822678313413326e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 583914687.0,
"step": 5285
},
{
"epoch": 2.970241437394722,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.481899771693128e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 584471274.0,
"step": 5290
},
{
"epoch": 2.9730488489612577,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.4815642731174427e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 585030923.0,
"step": 5295
},
{
"epoch": 2.9758562605277934,
"grad_norm": 3.6716461181640625e-05,
"learning_rate": 2.4812613385523877e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 585584791.0,
"step": 5300
},
{
"epoch": 2.978663672094329,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4809909706508978e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 586135923.0,
"step": 5305
},
{
"epoch": 2.981471083660865,
"grad_norm": 3.552436828613281e-05,
"learning_rate": 2.4807531717807047e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 586685667.0,
"step": 5310
},
{
"epoch": 2.9842784952274,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.4805479440243207e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999761581421,
"num_tokens": 587238144.0,
"step": 5315
},
{
"epoch": 2.987085906793936,
"grad_norm": 3.647804260253906e-05,
"learning_rate": 2.480375289179017e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 587789476.0,
"step": 5320
},
{
"epoch": 2.9898933183604717,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.4802352087568106e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 588345607.0,
"step": 5325
},
{
"epoch": 2.9927007299270074,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.4801277039844473e-05,
"loss": 0.0,
"mean_token_accuracy": 0.9999999880790711,
"num_tokens": 588898982.0,
"step": 5330
},
{
"epoch": 2.995508141493543,
"grad_norm": 3.4809112548828125e-05,
"learning_rate": 2.4800527758033947e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 589442767.0,
"step": 5335
},
{
"epoch": 2.9983155530600785,
"grad_norm": 3.5762786865234375e-05,
"learning_rate": 2.480010424869834e-05,
"loss": 0.0,
"mean_token_accuracy": 1.0,
"num_tokens": 589989550.0,
"step": 5340
}
],
"logging_steps": 5,
"max_steps": 5343,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2669368762683776e+18,
"train_batch_size": 140,
"trial_name": null,
"trial_params": null
}