Video-Text-to-Text
Safetensors
qwen2_5_vl
robotic-manipulation
reinforcement-learning
chain-of-thought
PRIMO-COT-SFT-7B / trainer_state.json
LeonOverload's picture
Upload folder using huggingface_hub
74c9d81 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1476,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006777363605557439,
"grad_norm": 215.31928086277176,
"learning_rate": 2e-06,
"loss": 10.8319,
"mean_token_accuracy": 0.22483410313725471,
"num_tokens": 152858.0,
"step": 1
},
{
"epoch": 0.0013554727211114877,
"grad_norm": 175.0501040640227,
"learning_rate": 1.9986449864498644e-06,
"loss": 10.2378,
"mean_token_accuracy": 0.24036189168691635,
"num_tokens": 300109.0,
"step": 2
},
{
"epoch": 0.0020332090816672314,
"grad_norm": 199.45107221580295,
"learning_rate": 1.997289972899729e-06,
"loss": 9.9628,
"mean_token_accuracy": 0.2516961731016636,
"num_tokens": 451536.0,
"step": 3
},
{
"epoch": 0.0027109454422229754,
"grad_norm": 276.70813993268683,
"learning_rate": 1.9959349593495935e-06,
"loss": 9.6748,
"mean_token_accuracy": 0.25994390062987804,
"num_tokens": 601536.0,
"step": 4
},
{
"epoch": 0.003388681802778719,
"grad_norm": 327.52259849403754,
"learning_rate": 1.994579945799458e-06,
"loss": 8.4163,
"mean_token_accuracy": 0.28762012347579,
"num_tokens": 751272.0,
"step": 5
},
{
"epoch": 0.004066418163334463,
"grad_norm": 161.92592352250892,
"learning_rate": 1.9932249322493225e-06,
"loss": 8.3087,
"mean_token_accuracy": 0.29531916975975037,
"num_tokens": 899935.0,
"step": 6
},
{
"epoch": 0.004744154523890207,
"grad_norm": 243.14567162148077,
"learning_rate": 1.991869918699187e-06,
"loss": 7.9566,
"mean_token_accuracy": 0.3023368716239929,
"num_tokens": 1046913.0,
"step": 7
},
{
"epoch": 0.005421890884445951,
"grad_norm": 176.82871333029541,
"learning_rate": 1.990514905149051e-06,
"loss": 8.108,
"mean_token_accuracy": 0.2935122326016426,
"num_tokens": 1201554.0,
"step": 8
},
{
"epoch": 0.006099627245001694,
"grad_norm": 113.28849091621012,
"learning_rate": 1.9891598915989156e-06,
"loss": 6.8465,
"mean_token_accuracy": 0.3109285309910774,
"num_tokens": 1349568.0,
"step": 9
},
{
"epoch": 0.006777363605557438,
"grad_norm": 254.49554351760855,
"learning_rate": 1.9878048780487806e-06,
"loss": 6.3518,
"mean_token_accuracy": 0.3181378021836281,
"num_tokens": 1497459.0,
"step": 10
},
{
"epoch": 0.007455099966113182,
"grad_norm": 83.50614405450463,
"learning_rate": 1.986449864498645e-06,
"loss": 6.2805,
"mean_token_accuracy": 0.3154120370745659,
"num_tokens": 1646573.0,
"step": 11
},
{
"epoch": 0.008132836326668925,
"grad_norm": 86.39368529977068,
"learning_rate": 1.9850948509485096e-06,
"loss": 6.145,
"mean_token_accuracy": 0.31414467468857765,
"num_tokens": 1792422.0,
"step": 12
},
{
"epoch": 0.00881057268722467,
"grad_norm": 119.19627809594022,
"learning_rate": 1.9837398373983737e-06,
"loss": 6.0264,
"mean_token_accuracy": 0.3126797378063202,
"num_tokens": 1940761.0,
"step": 13
},
{
"epoch": 0.009488309047780414,
"grad_norm": 73.76070751225097,
"learning_rate": 1.9823848238482382e-06,
"loss": 5.6976,
"mean_token_accuracy": 0.32139725238084793,
"num_tokens": 2088013.0,
"step": 14
},
{
"epoch": 0.010166045408336157,
"grad_norm": 96.93314452315771,
"learning_rate": 1.9810298102981028e-06,
"loss": 5.7813,
"mean_token_accuracy": 0.30942872911691666,
"num_tokens": 2239304.0,
"step": 15
},
{
"epoch": 0.010843781768891902,
"grad_norm": 166.31622316662109,
"learning_rate": 1.9796747967479673e-06,
"loss": 5.623,
"mean_token_accuracy": 0.3139219619333744,
"num_tokens": 2387468.0,
"step": 16
},
{
"epoch": 0.011521518129447645,
"grad_norm": 66.0493117054842,
"learning_rate": 1.978319783197832e-06,
"loss": 5.282,
"mean_token_accuracy": 0.32881826534867287,
"num_tokens": 2535035.0,
"step": 17
},
{
"epoch": 0.012199254490003388,
"grad_norm": 64.22483298490378,
"learning_rate": 1.9769647696476963e-06,
"loss": 5.0493,
"mean_token_accuracy": 0.3398439697921276,
"num_tokens": 2680182.0,
"step": 18
},
{
"epoch": 0.012876990850559133,
"grad_norm": 75.86806695541563,
"learning_rate": 1.975609756097561e-06,
"loss": 5.0178,
"mean_token_accuracy": 0.34127549827098846,
"num_tokens": 2827956.0,
"step": 19
},
{
"epoch": 0.013554727211114876,
"grad_norm": 77.56337630320212,
"learning_rate": 1.9742547425474254e-06,
"loss": 4.9786,
"mean_token_accuracy": 0.34324514865875244,
"num_tokens": 2976812.0,
"step": 20
},
{
"epoch": 0.01423246357167062,
"grad_norm": 77.81656184616301,
"learning_rate": 1.97289972899729e-06,
"loss": 4.7973,
"mean_token_accuracy": 0.351806353777647,
"num_tokens": 3125150.0,
"step": 21
},
{
"epoch": 0.014910199932226365,
"grad_norm": 80.89049380537493,
"learning_rate": 1.9715447154471544e-06,
"loss": 4.7448,
"mean_token_accuracy": 0.3536057360470295,
"num_tokens": 3272181.0,
"step": 22
},
{
"epoch": 0.015587936292782108,
"grad_norm": 85.30272719046583,
"learning_rate": 1.970189701897019e-06,
"loss": 4.6602,
"mean_token_accuracy": 0.3579399660229683,
"num_tokens": 3421575.0,
"step": 23
},
{
"epoch": 0.01626567265333785,
"grad_norm": 89.74092507095774,
"learning_rate": 1.9688346883468834e-06,
"loss": 4.5848,
"mean_token_accuracy": 0.36106717213988304,
"num_tokens": 3570850.0,
"step": 24
},
{
"epoch": 0.016943409013893594,
"grad_norm": 92.46296984633125,
"learning_rate": 1.967479674796748e-06,
"loss": 4.4245,
"mean_token_accuracy": 0.37010491639375687,
"num_tokens": 3720110.0,
"step": 25
},
{
"epoch": 0.01762114537444934,
"grad_norm": 99.31637778401333,
"learning_rate": 1.9661246612466125e-06,
"loss": 4.3483,
"mean_token_accuracy": 0.37695401161909103,
"num_tokens": 3867948.0,
"step": 26
},
{
"epoch": 0.018298881735005084,
"grad_norm": 107.76203073450068,
"learning_rate": 1.964769647696477e-06,
"loss": 4.3199,
"mean_token_accuracy": 0.37547387182712555,
"num_tokens": 4016986.0,
"step": 27
},
{
"epoch": 0.018976618095560827,
"grad_norm": 110.05813690039194,
"learning_rate": 1.9634146341463415e-06,
"loss": 4.1004,
"mean_token_accuracy": 0.39141304790973663,
"num_tokens": 4162483.0,
"step": 28
},
{
"epoch": 0.01965435445611657,
"grad_norm": 122.62831228475048,
"learning_rate": 1.962059620596206e-06,
"loss": 4.1895,
"mean_token_accuracy": 0.3831036686897278,
"num_tokens": 4311489.0,
"step": 29
},
{
"epoch": 0.020332090816672314,
"grad_norm": 124.97452075345946,
"learning_rate": 1.9607046070460706e-06,
"loss": 4.1468,
"mean_token_accuracy": 0.38225793465971947,
"num_tokens": 4464098.0,
"step": 30
},
{
"epoch": 0.021009827177228057,
"grad_norm": 124.1659063102063,
"learning_rate": 1.9593495934959347e-06,
"loss": 4.006,
"mean_token_accuracy": 0.3971566930413246,
"num_tokens": 4611556.0,
"step": 31
},
{
"epoch": 0.021687563537783804,
"grad_norm": 125.00815818704322,
"learning_rate": 1.957994579945799e-06,
"loss": 3.9493,
"mean_token_accuracy": 0.40315019339323044,
"num_tokens": 4760310.0,
"step": 32
},
{
"epoch": 0.022365299898339547,
"grad_norm": 122.07591947126842,
"learning_rate": 1.9566395663956637e-06,
"loss": 3.7965,
"mean_token_accuracy": 0.4153985045850277,
"num_tokens": 4906715.0,
"step": 33
},
{
"epoch": 0.02304303625889529,
"grad_norm": 126.74800270999756,
"learning_rate": 1.955284552845528e-06,
"loss": 3.8472,
"mean_token_accuracy": 0.4084292873740196,
"num_tokens": 5056083.0,
"step": 34
},
{
"epoch": 0.023720772619451033,
"grad_norm": 125.49345325581528,
"learning_rate": 1.953929539295393e-06,
"loss": 3.7707,
"mean_token_accuracy": 0.4145648442208767,
"num_tokens": 5205556.0,
"step": 35
},
{
"epoch": 0.024398508980006776,
"grad_norm": 130.54186032107705,
"learning_rate": 1.9525745257452573e-06,
"loss": 3.8357,
"mean_token_accuracy": 0.40865280851721764,
"num_tokens": 5358374.0,
"step": 36
},
{
"epoch": 0.02507624534056252,
"grad_norm": 123.00299156664755,
"learning_rate": 1.9512195121951218e-06,
"loss": 3.6205,
"mean_token_accuracy": 0.4278510734438896,
"num_tokens": 5504445.0,
"step": 37
},
{
"epoch": 0.025753981701118266,
"grad_norm": 124.8763734549625,
"learning_rate": 1.9498644986449863e-06,
"loss": 3.6099,
"mean_token_accuracy": 0.4292087107896805,
"num_tokens": 5655196.0,
"step": 38
},
{
"epoch": 0.02643171806167401,
"grad_norm": 122.93711702855603,
"learning_rate": 1.948509485094851e-06,
"loss": 3.5454,
"mean_token_accuracy": 0.4364416375756264,
"num_tokens": 5804319.0,
"step": 39
},
{
"epoch": 0.027109454422229753,
"grad_norm": 127.34644717949502,
"learning_rate": 1.9471544715447153e-06,
"loss": 3.5861,
"mean_token_accuracy": 0.431281503289938,
"num_tokens": 5956589.0,
"step": 40
},
{
"epoch": 0.027787190782785496,
"grad_norm": 120.67004246260448,
"learning_rate": 1.94579945799458e-06,
"loss": 3.4181,
"mean_token_accuracy": 0.4462522640824318,
"num_tokens": 6104451.0,
"step": 41
},
{
"epoch": 0.02846492714334124,
"grad_norm": 124.66691702087084,
"learning_rate": 1.9444444444444444e-06,
"loss": 3.4574,
"mean_token_accuracy": 0.44873106479644775,
"num_tokens": 6253336.0,
"step": 42
},
{
"epoch": 0.029142663503896982,
"grad_norm": 127.90260387508701,
"learning_rate": 1.943089430894309e-06,
"loss": 3.4951,
"mean_token_accuracy": 0.44188638776540756,
"num_tokens": 6405404.0,
"step": 43
},
{
"epoch": 0.02982039986445273,
"grad_norm": 122.09081961349227,
"learning_rate": 1.9417344173441734e-06,
"loss": 3.3221,
"mean_token_accuracy": 0.4585568234324455,
"num_tokens": 6553474.0,
"step": 44
},
{
"epoch": 0.030498136225008472,
"grad_norm": 122.73792960023344,
"learning_rate": 1.940379403794038e-06,
"loss": 3.3127,
"mean_token_accuracy": 0.4573376663029194,
"num_tokens": 6700108.0,
"step": 45
},
{
"epoch": 0.031175872585564215,
"grad_norm": 122.62720593333567,
"learning_rate": 1.9390243902439024e-06,
"loss": 3.2842,
"mean_token_accuracy": 0.45810314640402794,
"num_tokens": 6848463.0,
"step": 46
},
{
"epoch": 0.03185360894611996,
"grad_norm": 121.2286239246027,
"learning_rate": 1.937669376693767e-06,
"loss": 3.2317,
"mean_token_accuracy": 0.46193112805485725,
"num_tokens": 6998690.0,
"step": 47
},
{
"epoch": 0.0325313453066757,
"grad_norm": 123.66939663555446,
"learning_rate": 1.9363143631436315e-06,
"loss": 3.2559,
"mean_token_accuracy": 0.45676320046186447,
"num_tokens": 7149009.0,
"step": 48
},
{
"epoch": 0.033209081667231445,
"grad_norm": 120.83625724001882,
"learning_rate": 1.934959349593496e-06,
"loss": 3.1631,
"mean_token_accuracy": 0.4640156216919422,
"num_tokens": 7296015.0,
"step": 49
},
{
"epoch": 0.03388681802778719,
"grad_norm": 115.86241664463016,
"learning_rate": 1.93360433604336e-06,
"loss": 3.0535,
"mean_token_accuracy": 0.47255614027380943,
"num_tokens": 7440154.0,
"step": 50
},
{
"epoch": 0.03456455438834293,
"grad_norm": 120.77679179022189,
"learning_rate": 1.9322493224932246e-06,
"loss": 3.1193,
"mean_token_accuracy": 0.46199216321110725,
"num_tokens": 7589986.0,
"step": 51
},
{
"epoch": 0.03524229074889868,
"grad_norm": 119.75325748406044,
"learning_rate": 1.9308943089430896e-06,
"loss": 3.0699,
"mean_token_accuracy": 0.4629558362066746,
"num_tokens": 7738759.0,
"step": 52
},
{
"epoch": 0.035920027109454425,
"grad_norm": 122.81516564789821,
"learning_rate": 1.929539295392954e-06,
"loss": 3.0929,
"mean_token_accuracy": 0.4590213857591152,
"num_tokens": 7888442.0,
"step": 53
},
{
"epoch": 0.03659776347001017,
"grad_norm": 116.60973705379047,
"learning_rate": 1.9281842818428186e-06,
"loss": 2.9478,
"mean_token_accuracy": 0.47187361493706703,
"num_tokens": 8035766.0,
"step": 54
},
{
"epoch": 0.03727549983056591,
"grad_norm": 122.05879455164165,
"learning_rate": 1.9268292682926827e-06,
"loss": 3.0327,
"mean_token_accuracy": 0.45789875090122223,
"num_tokens": 8187621.0,
"step": 55
},
{
"epoch": 0.037953236191121655,
"grad_norm": 121.61169807853662,
"learning_rate": 1.9254742547425472e-06,
"loss": 3.0006,
"mean_token_accuracy": 0.45924459397792816,
"num_tokens": 8336909.0,
"step": 56
},
{
"epoch": 0.0386309725516774,
"grad_norm": 117.02544106479719,
"learning_rate": 1.9241192411924117e-06,
"loss": 2.8853,
"mean_token_accuracy": 0.46758873015642166,
"num_tokens": 8484120.0,
"step": 57
},
{
"epoch": 0.03930870891223314,
"grad_norm": 118.50453238861088,
"learning_rate": 1.9227642276422763e-06,
"loss": 2.8773,
"mean_token_accuracy": 0.4679280035197735,
"num_tokens": 8632765.0,
"step": 58
},
{
"epoch": 0.039986445272788884,
"grad_norm": 119.75315383821146,
"learning_rate": 1.9214092140921408e-06,
"loss": 2.8609,
"mean_token_accuracy": 0.46564289554953575,
"num_tokens": 8780158.0,
"step": 59
},
{
"epoch": 0.04066418163334463,
"grad_norm": 121.82700024099637,
"learning_rate": 1.9200542005420053e-06,
"loss": 2.8858,
"mean_token_accuracy": 0.45738009735941887,
"num_tokens": 8931674.0,
"step": 60
},
{
"epoch": 0.04134191799390037,
"grad_norm": 120.37404301611848,
"learning_rate": 1.91869918699187e-06,
"loss": 2.8305,
"mean_token_accuracy": 0.4612518399953842,
"num_tokens": 9083330.0,
"step": 61
},
{
"epoch": 0.042019654354456114,
"grad_norm": 120.55110506672673,
"learning_rate": 1.9173441734417343e-06,
"loss": 2.7999,
"mean_token_accuracy": 0.46009667590260506,
"num_tokens": 9234569.0,
"step": 62
},
{
"epoch": 0.04269739071501186,
"grad_norm": 120.16994369985942,
"learning_rate": 1.915989159891599e-06,
"loss": 2.7573,
"mean_token_accuracy": 0.4637075141072273,
"num_tokens": 9383301.0,
"step": 63
},
{
"epoch": 0.04337512707556761,
"grad_norm": 123.56031740474211,
"learning_rate": 1.9146341463414634e-06,
"loss": 2.7766,
"mean_token_accuracy": 0.45710835233330727,
"num_tokens": 9535886.0,
"step": 64
},
{
"epoch": 0.04405286343612335,
"grad_norm": 122.98514880561626,
"learning_rate": 1.913279132791328e-06,
"loss": 2.7324,
"mean_token_accuracy": 0.4671022370457649,
"num_tokens": 9687879.0,
"step": 65
},
{
"epoch": 0.044730599796679094,
"grad_norm": 125.43736740720358,
"learning_rate": 1.9119241192411924e-06,
"loss": 2.7436,
"mean_token_accuracy": 0.47459762170910835,
"num_tokens": 9842571.0,
"step": 66
},
{
"epoch": 0.04540833615723484,
"grad_norm": 114.11746508121755,
"learning_rate": 1.910569105691057e-06,
"loss": 2.5244,
"mean_token_accuracy": 0.48580894619226456,
"num_tokens": 9987540.0,
"step": 67
},
{
"epoch": 0.04608607251779058,
"grad_norm": 121.82433617300515,
"learning_rate": 1.909214092140921e-06,
"loss": 2.6134,
"mean_token_accuracy": 0.4700146056711674,
"num_tokens": 10138378.0,
"step": 68
},
{
"epoch": 0.04676380887834632,
"grad_norm": 120.89387496367533,
"learning_rate": 1.907859078590786e-06,
"loss": 2.569,
"mean_token_accuracy": 0.511335089802742,
"num_tokens": 10287979.0,
"step": 69
},
{
"epoch": 0.047441545238902066,
"grad_norm": 122.26976503869227,
"learning_rate": 1.9065040650406503e-06,
"loss": 2.5595,
"mean_token_accuracy": 0.5703656449913979,
"num_tokens": 10438939.0,
"step": 70
},
{
"epoch": 0.04811928159945781,
"grad_norm": 119.11292329257235,
"learning_rate": 1.9051490514905148e-06,
"loss": 2.4608,
"mean_token_accuracy": 0.4872521534562111,
"num_tokens": 10586175.0,
"step": 71
},
{
"epoch": 0.04879701796001355,
"grad_norm": 120.66894079757309,
"learning_rate": 1.9037940379403793e-06,
"loss": 2.4675,
"mean_token_accuracy": 0.5670045763254166,
"num_tokens": 10735195.0,
"step": 72
},
{
"epoch": 0.049474754320569296,
"grad_norm": 126.57059301982753,
"learning_rate": 1.9024390243902436e-06,
"loss": 2.5366,
"mean_token_accuracy": 0.6708709970116615,
"num_tokens": 10890747.0,
"step": 73
},
{
"epoch": 0.05015249068112504,
"grad_norm": 119.11823841769932,
"learning_rate": 1.9010840108401084e-06,
"loss": 2.3787,
"mean_token_accuracy": 0.5503224208950996,
"num_tokens": 11038790.0,
"step": 74
},
{
"epoch": 0.05083022704168079,
"grad_norm": 120.87828788657913,
"learning_rate": 1.8997289972899729e-06,
"loss": 2.3944,
"mean_token_accuracy": 0.7692296281456947,
"num_tokens": 11188775.0,
"step": 75
},
{
"epoch": 0.05150796340223653,
"grad_norm": 118.25074374900122,
"learning_rate": 1.8983739837398374e-06,
"loss": 2.3129,
"mean_token_accuracy": 0.7943554669618607,
"num_tokens": 11336216.0,
"step": 76
},
{
"epoch": 0.052185699762792276,
"grad_norm": 119.89244525861773,
"learning_rate": 1.897018970189702e-06,
"loss": 2.3178,
"mean_token_accuracy": 0.8039621710777283,
"num_tokens": 11487643.0,
"step": 77
},
{
"epoch": 0.05286343612334802,
"grad_norm": 117.61570660235944,
"learning_rate": 1.8956639566395662e-06,
"loss": 2.2518,
"mean_token_accuracy": 0.8842473700642586,
"num_tokens": 11636350.0,
"step": 78
},
{
"epoch": 0.05354117248390376,
"grad_norm": 119.18861910334302,
"learning_rate": 1.8943089430894307e-06,
"loss": 2.2523,
"mean_token_accuracy": 0.9015108346939087,
"num_tokens": 11786908.0,
"step": 79
},
{
"epoch": 0.054218908844459505,
"grad_norm": 115.79762500453249,
"learning_rate": 1.8929539295392953e-06,
"loss": 2.1631,
"mean_token_accuracy": 0.8935829252004623,
"num_tokens": 11932736.0,
"step": 80
},
{
"epoch": 0.05489664520501525,
"grad_norm": 119.09989693599702,
"learning_rate": 1.8915989159891598e-06,
"loss": 2.1722,
"mean_token_accuracy": 0.913075864315033,
"num_tokens": 12081616.0,
"step": 81
},
{
"epoch": 0.05557438156557099,
"grad_norm": 118.06996069036731,
"learning_rate": 1.8902439024390243e-06,
"loss": 2.1496,
"mean_token_accuracy": 0.904613807797432,
"num_tokens": 12230175.0,
"step": 82
},
{
"epoch": 0.056252117926126735,
"grad_norm": 123.85320199345325,
"learning_rate": 1.8888888888888888e-06,
"loss": 2.1906,
"mean_token_accuracy": 0.9205794930458069,
"num_tokens": 12383751.0,
"step": 83
},
{
"epoch": 0.05692985428668248,
"grad_norm": 121.52517225759287,
"learning_rate": 1.8875338753387533e-06,
"loss": 2.1203,
"mean_token_accuracy": 0.9293738752603531,
"num_tokens": 12534184.0,
"step": 84
},
{
"epoch": 0.05760759064723822,
"grad_norm": 120.65452449500275,
"learning_rate": 1.8861788617886179e-06,
"loss": 2.0948,
"mean_token_accuracy": 0.9251657500863075,
"num_tokens": 12687333.0,
"step": 85
},
{
"epoch": 0.058285327007793965,
"grad_norm": 119.22615191510779,
"learning_rate": 1.8848238482384824e-06,
"loss": 2.0499,
"mean_token_accuracy": 0.9273019582033157,
"num_tokens": 12837011.0,
"step": 86
},
{
"epoch": 0.058963063368349715,
"grad_norm": 118.57976808894816,
"learning_rate": 1.8834688346883467e-06,
"loss": 2.0156,
"mean_token_accuracy": 0.9246799051761627,
"num_tokens": 12986181.0,
"step": 87
},
{
"epoch": 0.05964079972890546,
"grad_norm": 118.99893633401761,
"learning_rate": 1.8821138211382112e-06,
"loss": 1.9919,
"mean_token_accuracy": 0.9291153773665428,
"num_tokens": 13134214.0,
"step": 88
},
{
"epoch": 0.0603185360894612,
"grad_norm": 121.00759692804243,
"learning_rate": 1.8807588075880757e-06,
"loss": 1.9796,
"mean_token_accuracy": 0.9294244274497032,
"num_tokens": 13286608.0,
"step": 89
},
{
"epoch": 0.060996272450016945,
"grad_norm": 122.96705505458303,
"learning_rate": 1.8794037940379405e-06,
"loss": 1.9793,
"mean_token_accuracy": 0.9320042505860329,
"num_tokens": 13438982.0,
"step": 90
},
{
"epoch": 0.06167400881057269,
"grad_norm": 115.45042378077322,
"learning_rate": 1.8780487804878048e-06,
"loss": 1.8714,
"mean_token_accuracy": 0.9282395839691162,
"num_tokens": 13585428.0,
"step": 91
},
{
"epoch": 0.06235174517112843,
"grad_norm": 111.7836219513804,
"learning_rate": 1.8766937669376693e-06,
"loss": 1.7863,
"mean_token_accuracy": 0.9293386787176132,
"num_tokens": 13730964.0,
"step": 92
},
{
"epoch": 0.06302948153168418,
"grad_norm": 118.87630401245544,
"learning_rate": 1.8753387533875338e-06,
"loss": 1.8495,
"mean_token_accuracy": 0.9302572533488274,
"num_tokens": 13879984.0,
"step": 93
},
{
"epoch": 0.06370721789223992,
"grad_norm": 114.52334966185592,
"learning_rate": 1.8739837398373983e-06,
"loss": 1.775,
"mean_token_accuracy": 0.9275632426142693,
"num_tokens": 14029215.0,
"step": 94
},
{
"epoch": 0.06438495425279567,
"grad_norm": 121.46158002135849,
"learning_rate": 1.8726287262872629e-06,
"loss": 1.8224,
"mean_token_accuracy": 0.9329173788428307,
"num_tokens": 14182230.0,
"step": 95
},
{
"epoch": 0.0650626906133514,
"grad_norm": 112.07899271050006,
"learning_rate": 1.8712737127371272e-06,
"loss": 1.6955,
"mean_token_accuracy": 0.929063692688942,
"num_tokens": 14324540.0,
"step": 96
},
{
"epoch": 0.06574042697390715,
"grad_norm": 118.60172402243633,
"learning_rate": 1.8699186991869917e-06,
"loss": 1.747,
"mean_token_accuracy": 0.9302254170179367,
"num_tokens": 14476642.0,
"step": 97
},
{
"epoch": 0.06641816333446289,
"grad_norm": 118.1561917036615,
"learning_rate": 1.8685636856368562e-06,
"loss": 1.714,
"mean_token_accuracy": 0.930650383234024,
"num_tokens": 14627769.0,
"step": 98
},
{
"epoch": 0.06709589969501864,
"grad_norm": 117.87828039655399,
"learning_rate": 1.867208672086721e-06,
"loss": 1.6863,
"mean_token_accuracy": 0.9286526739597321,
"num_tokens": 14779314.0,
"step": 99
},
{
"epoch": 0.06777363605557438,
"grad_norm": 118.70756510736268,
"learning_rate": 1.8658536585365854e-06,
"loss": 1.6616,
"mean_token_accuracy": 0.9325949177145958,
"num_tokens": 14930361.0,
"step": 100
},
{
"epoch": 0.06845137241613013,
"grad_norm": 115.0705408214617,
"learning_rate": 1.8644986449864498e-06,
"loss": 1.6001,
"mean_token_accuracy": 0.9325196817517281,
"num_tokens": 15076964.0,
"step": 101
},
{
"epoch": 0.06912910877668586,
"grad_norm": 112.04482720947613,
"learning_rate": 1.8631436314363143e-06,
"loss": 1.5575,
"mean_token_accuracy": 0.9281066954135895,
"num_tokens": 15224576.0,
"step": 102
},
{
"epoch": 0.06980684513724161,
"grad_norm": 114.97883851594864,
"learning_rate": 1.8617886178861788e-06,
"loss": 1.5598,
"mean_token_accuracy": 0.9284133464097977,
"num_tokens": 15374847.0,
"step": 103
},
{
"epoch": 0.07048458149779736,
"grad_norm": 113.64949116247662,
"learning_rate": 1.8604336043360433e-06,
"loss": 1.5186,
"mean_token_accuracy": 0.9293715506792068,
"num_tokens": 15525505.0,
"step": 104
},
{
"epoch": 0.0711623178583531,
"grad_norm": 109.63343026661512,
"learning_rate": 1.8590785907859076e-06,
"loss": 1.4655,
"mean_token_accuracy": 0.927336260676384,
"num_tokens": 15672609.0,
"step": 105
},
{
"epoch": 0.07184005421890885,
"grad_norm": 114.18318380830236,
"learning_rate": 1.8577235772357721e-06,
"loss": 1.4822,
"mean_token_accuracy": 0.9287517815828323,
"num_tokens": 15822989.0,
"step": 106
},
{
"epoch": 0.07251779057946459,
"grad_norm": 110.2006577907072,
"learning_rate": 1.8563685636856367e-06,
"loss": 1.4193,
"mean_token_accuracy": 0.9296880438923836,
"num_tokens": 15970564.0,
"step": 107
},
{
"epoch": 0.07319552694002034,
"grad_norm": 108.26203421754022,
"learning_rate": 1.8550135501355014e-06,
"loss": 1.3794,
"mean_token_accuracy": 0.9290289804339409,
"num_tokens": 16116146.0,
"step": 108
},
{
"epoch": 0.07387326330057607,
"grad_norm": 111.01200634554529,
"learning_rate": 1.853658536585366e-06,
"loss": 1.3787,
"mean_token_accuracy": 0.9306840002536774,
"num_tokens": 16264186.0,
"step": 109
},
{
"epoch": 0.07455099966113182,
"grad_norm": 111.43380256907831,
"learning_rate": 1.8523035230352302e-06,
"loss": 1.3651,
"mean_token_accuracy": 0.9310031309723854,
"num_tokens": 16413040.0,
"step": 110
},
{
"epoch": 0.07522873602168756,
"grad_norm": 112.26968519229395,
"learning_rate": 1.8509485094850947e-06,
"loss": 1.3468,
"mean_token_accuracy": 0.931459404528141,
"num_tokens": 16564213.0,
"step": 111
},
{
"epoch": 0.07590647238224331,
"grad_norm": 109.02323402522909,
"learning_rate": 1.8495934959349593e-06,
"loss": 1.3021,
"mean_token_accuracy": 0.9300208985805511,
"num_tokens": 16713339.0,
"step": 112
},
{
"epoch": 0.07658420874279905,
"grad_norm": 107.38997170228535,
"learning_rate": 1.8482384823848238e-06,
"loss": 1.2712,
"mean_token_accuracy": 0.9297583177685738,
"num_tokens": 16860845.0,
"step": 113
},
{
"epoch": 0.0772619451033548,
"grad_norm": 105.96279047024431,
"learning_rate": 1.8468834688346883e-06,
"loss": 1.2345,
"mean_token_accuracy": 0.9307239279150963,
"num_tokens": 17008447.0,
"step": 114
},
{
"epoch": 0.07793968146391055,
"grad_norm": 107.1270259224278,
"learning_rate": 1.8455284552845526e-06,
"loss": 1.2204,
"mean_token_accuracy": 0.9324908629059792,
"num_tokens": 17156449.0,
"step": 115
},
{
"epoch": 0.07861741782446628,
"grad_norm": 107.24961194416598,
"learning_rate": 1.8441734417344173e-06,
"loss": 1.2074,
"mean_token_accuracy": 0.9318635389208794,
"num_tokens": 17305874.0,
"step": 116
},
{
"epoch": 0.07929515418502203,
"grad_norm": 101.95987989009163,
"learning_rate": 1.8428184281842819e-06,
"loss": 1.1589,
"mean_token_accuracy": 0.9266369640827179,
"num_tokens": 17453331.0,
"step": 117
},
{
"epoch": 0.07997289054557777,
"grad_norm": 105.82055210123028,
"learning_rate": 1.8414634146341464e-06,
"loss": 1.1611,
"mean_token_accuracy": 0.930683083832264,
"num_tokens": 17602041.0,
"step": 118
},
{
"epoch": 0.08065062690613352,
"grad_norm": 99.44019225376582,
"learning_rate": 1.8401084010840107e-06,
"loss": 1.1027,
"mean_token_accuracy": 0.9283188283443451,
"num_tokens": 17748531.0,
"step": 119
},
{
"epoch": 0.08132836326668925,
"grad_norm": 104.3596685159921,
"learning_rate": 1.8387533875338752e-06,
"loss": 1.1165,
"mean_token_accuracy": 0.9300511553883553,
"num_tokens": 17899989.0,
"step": 120
},
{
"epoch": 0.082006099627245,
"grad_norm": 100.81191503355579,
"learning_rate": 1.8373983739837397e-06,
"loss": 1.0777,
"mean_token_accuracy": 0.9295774847269058,
"num_tokens": 18048993.0,
"step": 121
},
{
"epoch": 0.08268383598780074,
"grad_norm": 93.88135822464984,
"learning_rate": 1.8360433604336042e-06,
"loss": 1.02,
"mean_token_accuracy": 0.9281311184167862,
"num_tokens": 18190797.0,
"step": 122
},
{
"epoch": 0.08336157234835649,
"grad_norm": 100.17923887033267,
"learning_rate": 1.8346883468834688e-06,
"loss": 1.0306,
"mean_token_accuracy": 0.9315191507339478,
"num_tokens": 18338862.0,
"step": 123
},
{
"epoch": 0.08403930870891223,
"grad_norm": 96.97512207450372,
"learning_rate": 1.833333333333333e-06,
"loss": 0.9934,
"mean_token_accuracy": 0.9317428171634674,
"num_tokens": 18486594.0,
"step": 124
},
{
"epoch": 0.08471704506946798,
"grad_norm": 96.55200278181182,
"learning_rate": 1.8319783197831978e-06,
"loss": 0.9818,
"mean_token_accuracy": 0.9299175664782524,
"num_tokens": 18636214.0,
"step": 125
},
{
"epoch": 0.08539478143002371,
"grad_norm": 95.99010101143081,
"learning_rate": 1.8306233062330623e-06,
"loss": 0.9599,
"mean_token_accuracy": 0.9313259571790695,
"num_tokens": 18786296.0,
"step": 126
},
{
"epoch": 0.08607251779057946,
"grad_norm": 93.57149619949969,
"learning_rate": 1.8292682926829268e-06,
"loss": 0.9285,
"mean_token_accuracy": 0.93116744607687,
"num_tokens": 18934560.0,
"step": 127
},
{
"epoch": 0.08675025415113521,
"grad_norm": 92.01277288501751,
"learning_rate": 1.8279132791327912e-06,
"loss": 0.9117,
"mean_token_accuracy": 0.9301353469491005,
"num_tokens": 19084031.0,
"step": 128
},
{
"epoch": 0.08742799051169095,
"grad_norm": 93.17793996368636,
"learning_rate": 1.8265582655826557e-06,
"loss": 0.8974,
"mean_token_accuracy": 0.9323903545737267,
"num_tokens": 19234417.0,
"step": 129
},
{
"epoch": 0.0881057268722467,
"grad_norm": 87.91463270355133,
"learning_rate": 1.8252032520325202e-06,
"loss": 0.8588,
"mean_token_accuracy": 0.9307873845100403,
"num_tokens": 19380999.0,
"step": 130
},
{
"epoch": 0.08878346323280244,
"grad_norm": 88.1638527981959,
"learning_rate": 1.8238482384823847e-06,
"loss": 0.8553,
"mean_token_accuracy": 0.9293078556656837,
"num_tokens": 19528898.0,
"step": 131
},
{
"epoch": 0.08946119959335819,
"grad_norm": 88.94710264026781,
"learning_rate": 1.8224932249322492e-06,
"loss": 0.835,
"mean_token_accuracy": 0.9326649755239487,
"num_tokens": 19681222.0,
"step": 132
},
{
"epoch": 0.09013893595391392,
"grad_norm": 85.90974131550782,
"learning_rate": 1.8211382113821138e-06,
"loss": 0.8063,
"mean_token_accuracy": 0.932416245341301,
"num_tokens": 19830868.0,
"step": 133
},
{
"epoch": 0.09081667231446967,
"grad_norm": 84.86967745400749,
"learning_rate": 1.8197831978319783e-06,
"loss": 0.784,
"mean_token_accuracy": 0.9324630126357079,
"num_tokens": 19980203.0,
"step": 134
},
{
"epoch": 0.09149440867502541,
"grad_norm": 83.25372427993219,
"learning_rate": 1.8184281842818428e-06,
"loss": 0.7671,
"mean_token_accuracy": 0.9325378760695457,
"num_tokens": 20127736.0,
"step": 135
},
{
"epoch": 0.09217214503558116,
"grad_norm": 82.15717791516646,
"learning_rate": 1.8170731707317073e-06,
"loss": 0.7513,
"mean_token_accuracy": 0.9318142458796501,
"num_tokens": 20278996.0,
"step": 136
},
{
"epoch": 0.0928498813961369,
"grad_norm": 77.51250554472611,
"learning_rate": 1.8157181571815718e-06,
"loss": 0.7255,
"mean_token_accuracy": 0.9290755987167358,
"num_tokens": 20425353.0,
"step": 137
},
{
"epoch": 0.09352761775669265,
"grad_norm": 78.21321425269326,
"learning_rate": 1.8143631436314361e-06,
"loss": 0.7069,
"mean_token_accuracy": 0.9344506710767746,
"num_tokens": 20574453.0,
"step": 138
},
{
"epoch": 0.0942053541172484,
"grad_norm": 74.35012855974352,
"learning_rate": 1.8130081300813007e-06,
"loss": 0.6941,
"mean_token_accuracy": 0.9283354431390762,
"num_tokens": 20721900.0,
"step": 139
},
{
"epoch": 0.09488309047780413,
"grad_norm": 73.22218164338094,
"learning_rate": 1.8116531165311652e-06,
"loss": 0.6646,
"mean_token_accuracy": 0.9332837462425232,
"num_tokens": 20868804.0,
"step": 140
},
{
"epoch": 0.09556082683835988,
"grad_norm": 71.46610642433735,
"learning_rate": 1.81029810298103e-06,
"loss": 0.6465,
"mean_token_accuracy": 0.9334042221307755,
"num_tokens": 21015885.0,
"step": 141
},
{
"epoch": 0.09623856319891562,
"grad_norm": 72.70207166228874,
"learning_rate": 1.8089430894308942e-06,
"loss": 0.645,
"mean_token_accuracy": 0.9326634481549263,
"num_tokens": 21168376.0,
"step": 142
},
{
"epoch": 0.09691629955947137,
"grad_norm": 71.05676304804858,
"learning_rate": 1.8075880758807587e-06,
"loss": 0.6231,
"mean_token_accuracy": 0.9345277771353722,
"num_tokens": 21319503.0,
"step": 143
},
{
"epoch": 0.0975940359200271,
"grad_norm": 70.89133381935596,
"learning_rate": 1.8062330623306233e-06,
"loss": 0.614,
"mean_token_accuracy": 0.9351460039615631,
"num_tokens": 21474123.0,
"step": 144
},
{
"epoch": 0.09827177228058286,
"grad_norm": 64.58516129851412,
"learning_rate": 1.8048780487804878e-06,
"loss": 0.5968,
"mean_token_accuracy": 0.9304576441645622,
"num_tokens": 21624512.0,
"step": 145
},
{
"epoch": 0.09894950864113859,
"grad_norm": 65.24856190459862,
"learning_rate": 1.8035230352303523e-06,
"loss": 0.5809,
"mean_token_accuracy": 0.9337568357586861,
"num_tokens": 21776372.0,
"step": 146
},
{
"epoch": 0.09962724500169434,
"grad_norm": 62.568471114426266,
"learning_rate": 1.8021680216802166e-06,
"loss": 0.5723,
"mean_token_accuracy": 0.9315716549754143,
"num_tokens": 21926956.0,
"step": 147
},
{
"epoch": 0.10030498136225008,
"grad_norm": 59.965025191532895,
"learning_rate": 1.8008130081300811e-06,
"loss": 0.5625,
"mean_token_accuracy": 0.9297163262963295,
"num_tokens": 22075445.0,
"step": 148
},
{
"epoch": 0.10098271772280583,
"grad_norm": 59.35204164689335,
"learning_rate": 1.7994579945799456e-06,
"loss": 0.5394,
"mean_token_accuracy": 0.9344234243035316,
"num_tokens": 22224724.0,
"step": 149
},
{
"epoch": 0.10166045408336158,
"grad_norm": 56.05971904812392,
"learning_rate": 1.7981029810298104e-06,
"loss": 0.5217,
"mean_token_accuracy": 0.932852178812027,
"num_tokens": 22372664.0,
"step": 150
},
{
"epoch": 0.10233819044391732,
"grad_norm": 55.899238426042885,
"learning_rate": 1.7967479674796747e-06,
"loss": 0.5111,
"mean_token_accuracy": 0.9346337839961052,
"num_tokens": 22522123.0,
"step": 151
},
{
"epoch": 0.10301592680447307,
"grad_norm": 53.66256786236335,
"learning_rate": 1.7953929539295392e-06,
"loss": 0.5019,
"mean_token_accuracy": 0.9335299357771873,
"num_tokens": 22672150.0,
"step": 152
},
{
"epoch": 0.1036936631650288,
"grad_norm": 50.73144865079355,
"learning_rate": 1.7940379403794037e-06,
"loss": 0.484,
"mean_token_accuracy": 0.9336813315749168,
"num_tokens": 22818967.0,
"step": 153
},
{
"epoch": 0.10437139952558455,
"grad_norm": 49.08098043135844,
"learning_rate": 1.7926829268292682e-06,
"loss": 0.4711,
"mean_token_accuracy": 0.9337323307991028,
"num_tokens": 22966978.0,
"step": 154
},
{
"epoch": 0.10504913588614029,
"grad_norm": 46.55240197600489,
"learning_rate": 1.7913279132791328e-06,
"loss": 0.4667,
"mean_token_accuracy": 0.9307694062590599,
"num_tokens": 23113941.0,
"step": 155
},
{
"epoch": 0.10572687224669604,
"grad_norm": 45.4207324250192,
"learning_rate": 1.789972899728997e-06,
"loss": 0.4495,
"mean_token_accuracy": 0.9333298355340958,
"num_tokens": 23260864.0,
"step": 156
},
{
"epoch": 0.10640460860725177,
"grad_norm": 43.93823803845357,
"learning_rate": 1.7886178861788616e-06,
"loss": 0.4405,
"mean_token_accuracy": 0.9329454302787781,
"num_tokens": 23410477.0,
"step": 157
},
{
"epoch": 0.10708234496780752,
"grad_norm": 43.85611117671106,
"learning_rate": 1.7872628726287263e-06,
"loss": 0.4286,
"mean_token_accuracy": 0.9351244196295738,
"num_tokens": 23562882.0,
"step": 158
},
{
"epoch": 0.10776008132836326,
"grad_norm": 41.32401336746093,
"learning_rate": 1.7859078590785908e-06,
"loss": 0.4164,
"mean_token_accuracy": 0.9357188642024994,
"num_tokens": 23713602.0,
"step": 159
},
{
"epoch": 0.10843781768891901,
"grad_norm": 39.5175400314298,
"learning_rate": 1.7845528455284554e-06,
"loss": 0.406,
"mean_token_accuracy": 0.9362591058015823,
"num_tokens": 23860727.0,
"step": 160
},
{
"epoch": 0.10911555404947476,
"grad_norm": 38.416625804623116,
"learning_rate": 1.7831978319783197e-06,
"loss": 0.4068,
"mean_token_accuracy": 0.9329937174916267,
"num_tokens": 24012425.0,
"step": 161
},
{
"epoch": 0.1097932904100305,
"grad_norm": 37.59441870071182,
"learning_rate": 1.7818428184281842e-06,
"loss": 0.3909,
"mean_token_accuracy": 0.9354848563671112,
"num_tokens": 24164863.0,
"step": 162
},
{
"epoch": 0.11047102677058625,
"grad_norm": 33.91348570223861,
"learning_rate": 1.7804878048780487e-06,
"loss": 0.3914,
"mean_token_accuracy": 0.9322528839111328,
"num_tokens": 24311333.0,
"step": 163
},
{
"epoch": 0.11114876313114198,
"grad_norm": 35.04551034637384,
"learning_rate": 1.7791327913279132e-06,
"loss": 0.3714,
"mean_token_accuracy": 0.9384682103991508,
"num_tokens": 24463590.0,
"step": 164
},
{
"epoch": 0.11182649949169773,
"grad_norm": 33.68435544945808,
"learning_rate": 1.7777777777777775e-06,
"loss": 0.3835,
"mean_token_accuracy": 0.932737372815609,
"num_tokens": 24619168.0,
"step": 165
},
{
"epoch": 0.11250423585225347,
"grad_norm": 29.82505772722471,
"learning_rate": 1.776422764227642e-06,
"loss": 0.3639,
"mean_token_accuracy": 0.933692567050457,
"num_tokens": 24765859.0,
"step": 166
},
{
"epoch": 0.11318197221280922,
"grad_norm": 30.525423505810934,
"learning_rate": 1.7750677506775068e-06,
"loss": 0.3507,
"mean_token_accuracy": 0.9373074173927307,
"num_tokens": 24917083.0,
"step": 167
},
{
"epoch": 0.11385970857336496,
"grad_norm": 29.204223791728122,
"learning_rate": 1.7737127371273713e-06,
"loss": 0.3529,
"mean_token_accuracy": 0.9359963908791542,
"num_tokens": 25070216.0,
"step": 168
},
{
"epoch": 0.1145374449339207,
"grad_norm": 26.974547673145388,
"learning_rate": 1.7723577235772358e-06,
"loss": 0.3535,
"mean_token_accuracy": 0.9311786666512489,
"num_tokens": 25222236.0,
"step": 169
},
{
"epoch": 0.11521518129447644,
"grad_norm": 25.967670455879,
"learning_rate": 1.7710027100271001e-06,
"loss": 0.3434,
"mean_token_accuracy": 0.9331553354859352,
"num_tokens": 25374346.0,
"step": 170
},
{
"epoch": 0.1158929176550322,
"grad_norm": 24.357263220519712,
"learning_rate": 1.7696476964769647e-06,
"loss": 0.3335,
"mean_token_accuracy": 0.9339867532253265,
"num_tokens": 25522811.0,
"step": 171
},
{
"epoch": 0.11657065401558793,
"grad_norm": 24.239286311319983,
"learning_rate": 1.7682926829268292e-06,
"loss": 0.3158,
"mean_token_accuracy": 0.9381037876009941,
"num_tokens": 25674231.0,
"step": 172
},
{
"epoch": 0.11724839037614368,
"grad_norm": 23.38288846125647,
"learning_rate": 1.7669376693766937e-06,
"loss": 0.3186,
"mean_token_accuracy": 0.9358685091137886,
"num_tokens": 25825637.0,
"step": 173
},
{
"epoch": 0.11792612673669943,
"grad_norm": 21.22164404336873,
"learning_rate": 1.765582655826558e-06,
"loss": 0.3261,
"mean_token_accuracy": 0.9328601211309433,
"num_tokens": 25974206.0,
"step": 174
},
{
"epoch": 0.11860386309725517,
"grad_norm": 20.66910608762449,
"learning_rate": 1.7642276422764225e-06,
"loss": 0.3041,
"mean_token_accuracy": 0.9370677098631859,
"num_tokens": 26122520.0,
"step": 175
},
{
"epoch": 0.11928159945781092,
"grad_norm": 19.929368999525767,
"learning_rate": 1.7628726287262872e-06,
"loss": 0.3054,
"mean_token_accuracy": 0.9352571219205856,
"num_tokens": 26270638.0,
"step": 176
},
{
"epoch": 0.11995933581836665,
"grad_norm": 18.77625094705047,
"learning_rate": 1.7615176151761518e-06,
"loss": 0.3008,
"mean_token_accuracy": 0.9352380633354187,
"num_tokens": 26419854.0,
"step": 177
},
{
"epoch": 0.1206370721789224,
"grad_norm": 18.135479252465245,
"learning_rate": 1.7601626016260163e-06,
"loss": 0.2888,
"mean_token_accuracy": 0.9373323991894722,
"num_tokens": 26570929.0,
"step": 178
},
{
"epoch": 0.12131480853947814,
"grad_norm": 17.09097110378426,
"learning_rate": 1.7588075880758806e-06,
"loss": 0.2904,
"mean_token_accuracy": 0.9367010816931725,
"num_tokens": 26721450.0,
"step": 179
},
{
"epoch": 0.12199254490003389,
"grad_norm": 16.371606132788596,
"learning_rate": 1.7574525745257451e-06,
"loss": 0.2923,
"mean_token_accuracy": 0.9353242516517639,
"num_tokens": 26872626.0,
"step": 180
},
{
"epoch": 0.12267028126058963,
"grad_norm": 15.248691516705478,
"learning_rate": 1.7560975609756096e-06,
"loss": 0.2826,
"mean_token_accuracy": 0.9366849288344383,
"num_tokens": 27021185.0,
"step": 181
},
{
"epoch": 0.12334801762114538,
"grad_norm": 14.665572209611303,
"learning_rate": 1.7547425474254742e-06,
"loss": 0.28,
"mean_token_accuracy": 0.9356379881501198,
"num_tokens": 27171480.0,
"step": 182
},
{
"epoch": 0.12402575398170111,
"grad_norm": 13.630257963311905,
"learning_rate": 1.7533875338753387e-06,
"loss": 0.2911,
"mean_token_accuracy": 0.9309459328651428,
"num_tokens": 27322106.0,
"step": 183
},
{
"epoch": 0.12470349034225686,
"grad_norm": 13.34907992144106,
"learning_rate": 1.7520325203252032e-06,
"loss": 0.2692,
"mean_token_accuracy": 0.9370318055152893,
"num_tokens": 27472644.0,
"step": 184
},
{
"epoch": 0.1253812267028126,
"grad_norm": 12.514192586765324,
"learning_rate": 1.7506775067750677e-06,
"loss": 0.2869,
"mean_token_accuracy": 0.9315063208341599,
"num_tokens": 27624217.0,
"step": 185
},
{
"epoch": 0.12605896306336836,
"grad_norm": 11.80365697230253,
"learning_rate": 1.7493224932249322e-06,
"loss": 0.2657,
"mean_token_accuracy": 0.9369841367006302,
"num_tokens": 27770421.0,
"step": 186
},
{
"epoch": 0.1267366994239241,
"grad_norm": 11.288559694543885,
"learning_rate": 1.7479674796747968e-06,
"loss": 0.2629,
"mean_token_accuracy": 0.9366028532385826,
"num_tokens": 27919419.0,
"step": 187
},
{
"epoch": 0.12741443578447983,
"grad_norm": 11.256329471358255,
"learning_rate": 1.746612466124661e-06,
"loss": 0.2608,
"mean_token_accuracy": 0.9365546107292175,
"num_tokens": 28073242.0,
"step": 188
},
{
"epoch": 0.12809217214503557,
"grad_norm": 10.423222021208382,
"learning_rate": 1.7452574525745256e-06,
"loss": 0.2613,
"mean_token_accuracy": 0.9360240176320076,
"num_tokens": 28224007.0,
"step": 189
},
{
"epoch": 0.12876990850559134,
"grad_norm": 10.212095148631771,
"learning_rate": 1.74390243902439e-06,
"loss": 0.2663,
"mean_token_accuracy": 0.9340936243534088,
"num_tokens": 28371178.0,
"step": 190
},
{
"epoch": 0.12944764486614707,
"grad_norm": 9.412927667433392,
"learning_rate": 1.7425474254742546e-06,
"loss": 0.2542,
"mean_token_accuracy": 0.9363159984350204,
"num_tokens": 28519559.0,
"step": 191
},
{
"epoch": 0.1301253812267028,
"grad_norm": 8.826276779662683,
"learning_rate": 1.7411924119241194e-06,
"loss": 0.2592,
"mean_token_accuracy": 0.9346078857779503,
"num_tokens": 28670550.0,
"step": 192
},
{
"epoch": 0.13080311758725854,
"grad_norm": 8.135940057692663,
"learning_rate": 1.7398373983739837e-06,
"loss": 0.2505,
"mean_token_accuracy": 0.9358848333358765,
"num_tokens": 28816237.0,
"step": 193
},
{
"epoch": 0.1314808539478143,
"grad_norm": 8.043989763018512,
"learning_rate": 1.7384823848238482e-06,
"loss": 0.2469,
"mean_token_accuracy": 0.9357665106654167,
"num_tokens": 28966298.0,
"step": 194
},
{
"epoch": 0.13215859030837004,
"grad_norm": 7.767089209326023,
"learning_rate": 1.7371273712737127e-06,
"loss": 0.2392,
"mean_token_accuracy": 0.9387388676404953,
"num_tokens": 29116059.0,
"step": 195
},
{
"epoch": 0.13283632666892578,
"grad_norm": 7.318706294635459,
"learning_rate": 1.7357723577235772e-06,
"loss": 0.2398,
"mean_token_accuracy": 0.9373445063829422,
"num_tokens": 29266370.0,
"step": 196
},
{
"epoch": 0.13351406302948154,
"grad_norm": 6.936442211517777,
"learning_rate": 1.7344173441734417e-06,
"loss": 0.2389,
"mean_token_accuracy": 0.9369916915893555,
"num_tokens": 29418266.0,
"step": 197
},
{
"epoch": 0.13419179939003728,
"grad_norm": 6.779837554902574,
"learning_rate": 1.733062330623306e-06,
"loss": 0.2323,
"mean_token_accuracy": 0.9391230568289757,
"num_tokens": 29569892.0,
"step": 198
},
{
"epoch": 0.13486953575059302,
"grad_norm": 6.186085198068867,
"learning_rate": 1.7317073170731706e-06,
"loss": 0.2383,
"mean_token_accuracy": 0.9360450059175491,
"num_tokens": 29719244.0,
"step": 199
},
{
"epoch": 0.13554727211114875,
"grad_norm": 6.019009969504502,
"learning_rate": 1.730352303523035e-06,
"loss": 0.2406,
"mean_token_accuracy": 0.935395322740078,
"num_tokens": 29871381.0,
"step": 200
},
{
"epoch": 0.13622500847170452,
"grad_norm": 5.727962603690222,
"learning_rate": 1.7289972899728998e-06,
"loss": 0.2288,
"mean_token_accuracy": 0.9381909817457199,
"num_tokens": 30021540.0,
"step": 201
},
{
"epoch": 0.13690274483226025,
"grad_norm": 5.253235694977242,
"learning_rate": 1.7276422764227641e-06,
"loss": 0.2361,
"mean_token_accuracy": 0.936265304684639,
"num_tokens": 30169377.0,
"step": 202
},
{
"epoch": 0.137580481192816,
"grad_norm": 5.22232253508078,
"learning_rate": 1.7262872628726286e-06,
"loss": 0.2273,
"mean_token_accuracy": 0.9385531917214394,
"num_tokens": 30321117.0,
"step": 203
},
{
"epoch": 0.13825821755337173,
"grad_norm": 4.648804691637109,
"learning_rate": 1.7249322493224932e-06,
"loss": 0.2298,
"mean_token_accuracy": 0.9360805526375771,
"num_tokens": 30467494.0,
"step": 204
},
{
"epoch": 0.1389359539139275,
"grad_norm": 4.164262717598353,
"learning_rate": 1.7235772357723577e-06,
"loss": 0.2397,
"mean_token_accuracy": 0.9347240626811981,
"num_tokens": 30606325.0,
"step": 205
},
{
"epoch": 0.13961369027448323,
"grad_norm": 4.536463490792273,
"learning_rate": 1.7222222222222222e-06,
"loss": 0.2195,
"mean_token_accuracy": 0.938719667494297,
"num_tokens": 30758254.0,
"step": 206
},
{
"epoch": 0.14029142663503896,
"grad_norm": 4.538769291529138,
"learning_rate": 1.7208672086720865e-06,
"loss": 0.2254,
"mean_token_accuracy": 0.9374769926071167,
"num_tokens": 30905389.0,
"step": 207
},
{
"epoch": 0.14096916299559473,
"grad_norm": 4.033711929104982,
"learning_rate": 1.719512195121951e-06,
"loss": 0.2157,
"mean_token_accuracy": 0.9390349760651588,
"num_tokens": 31055026.0,
"step": 208
},
{
"epoch": 0.14164689935615046,
"grad_norm": 3.6409458860202712,
"learning_rate": 1.7181571815718158e-06,
"loss": 0.222,
"mean_token_accuracy": 0.937271773815155,
"num_tokens": 31200100.0,
"step": 209
},
{
"epoch": 0.1423246357167062,
"grad_norm": 3.4863786559982337,
"learning_rate": 1.7168021680216803e-06,
"loss": 0.2288,
"mean_token_accuracy": 0.9352766647934914,
"num_tokens": 31346837.0,
"step": 210
},
{
"epoch": 0.14300237207726194,
"grad_norm": 3.383281767656107,
"learning_rate": 1.7154471544715446e-06,
"loss": 0.2197,
"mean_token_accuracy": 0.9381493553519249,
"num_tokens": 31493121.0,
"step": 211
},
{
"epoch": 0.1436801084378177,
"grad_norm": 3.2964314796805243,
"learning_rate": 1.7140921409214091e-06,
"loss": 0.2084,
"mean_token_accuracy": 0.9412015900015831,
"num_tokens": 31641799.0,
"step": 212
},
{
"epoch": 0.14435784479837344,
"grad_norm": 3.4016540123991397,
"learning_rate": 1.7127371273712736e-06,
"loss": 0.2144,
"mean_token_accuracy": 0.938295342028141,
"num_tokens": 31787984.0,
"step": 213
},
{
"epoch": 0.14503558115892917,
"grad_norm": 3.0325880459408734,
"learning_rate": 1.7113821138211381e-06,
"loss": 0.2087,
"mean_token_accuracy": 0.9392581135034561,
"num_tokens": 31939062.0,
"step": 214
},
{
"epoch": 0.1457133175194849,
"grad_norm": 3.0832469321783913,
"learning_rate": 1.7100271002710027e-06,
"loss": 0.2163,
"mean_token_accuracy": 0.93733299523592,
"num_tokens": 32089393.0,
"step": 215
},
{
"epoch": 0.14639105388004067,
"grad_norm": 3.2888382634875866,
"learning_rate": 1.708672086720867e-06,
"loss": 0.205,
"mean_token_accuracy": 0.9410371333360672,
"num_tokens": 32238112.0,
"step": 216
},
{
"epoch": 0.1470687902405964,
"grad_norm": 2.7808473921102803,
"learning_rate": 1.7073170731707315e-06,
"loss": 0.2108,
"mean_token_accuracy": 0.9397686347365379,
"num_tokens": 32385200.0,
"step": 217
},
{
"epoch": 0.14774652660115214,
"grad_norm": 2.5104602082156737,
"learning_rate": 1.7059620596205962e-06,
"loss": 0.2164,
"mean_token_accuracy": 0.9379466697573662,
"num_tokens": 32535204.0,
"step": 218
},
{
"epoch": 0.1484242629617079,
"grad_norm": 2.735127283922612,
"learning_rate": 1.7046070460704607e-06,
"loss": 0.1965,
"mean_token_accuracy": 0.9441072791814804,
"num_tokens": 32687702.0,
"step": 219
},
{
"epoch": 0.14910199932226365,
"grad_norm": 2.4352256213682324,
"learning_rate": 1.7032520325203253e-06,
"loss": 0.2168,
"mean_token_accuracy": 0.9381761774420738,
"num_tokens": 32840837.0,
"step": 220
},
{
"epoch": 0.14977973568281938,
"grad_norm": 2.197431115010913,
"learning_rate": 1.7018970189701896e-06,
"loss": 0.2077,
"mean_token_accuracy": 0.9409635365009308,
"num_tokens": 32985864.0,
"step": 221
},
{
"epoch": 0.15045747204337512,
"grad_norm": 2.3023481059782136,
"learning_rate": 1.700542005420054e-06,
"loss": 0.1935,
"mean_token_accuracy": 0.9441099762916565,
"num_tokens": 33136575.0,
"step": 222
},
{
"epoch": 0.15113520840393088,
"grad_norm": 2.1703415601449567,
"learning_rate": 1.6991869918699186e-06,
"loss": 0.1991,
"mean_token_accuracy": 0.9431856349110603,
"num_tokens": 33285733.0,
"step": 223
},
{
"epoch": 0.15181294476448662,
"grad_norm": 2.0999864868513454,
"learning_rate": 1.6978319783197831e-06,
"loss": 0.1987,
"mean_token_accuracy": 0.9425452724099159,
"num_tokens": 33434357.0,
"step": 224
},
{
"epoch": 0.15249068112504235,
"grad_norm": 1.982812903952784,
"learning_rate": 1.6964769647696474e-06,
"loss": 0.2032,
"mean_token_accuracy": 0.9406230673193932,
"num_tokens": 33581539.0,
"step": 225
},
{
"epoch": 0.1531684174855981,
"grad_norm": 1.988849372609138,
"learning_rate": 1.6951219512195122e-06,
"loss": 0.2023,
"mean_token_accuracy": 0.9410624876618385,
"num_tokens": 33731875.0,
"step": 226
},
{
"epoch": 0.15384615384615385,
"grad_norm": 2.0620676978301242,
"learning_rate": 1.6937669376693767e-06,
"loss": 0.1904,
"mean_token_accuracy": 0.944817841053009,
"num_tokens": 33883783.0,
"step": 227
},
{
"epoch": 0.1545238902067096,
"grad_norm": 1.8858609906294919,
"learning_rate": 1.6924119241192412e-06,
"loss": 0.1954,
"mean_token_accuracy": 0.9423842057585716,
"num_tokens": 34026743.0,
"step": 228
},
{
"epoch": 0.15520162656726533,
"grad_norm": 1.7701044225179388,
"learning_rate": 1.6910569105691057e-06,
"loss": 0.198,
"mean_token_accuracy": 0.9419083893299103,
"num_tokens": 34174186.0,
"step": 229
},
{
"epoch": 0.1558793629278211,
"grad_norm": 1.8072093712894957,
"learning_rate": 1.68970189701897e-06,
"loss": 0.1852,
"mean_token_accuracy": 0.9456257075071335,
"num_tokens": 34323042.0,
"step": 230
},
{
"epoch": 0.15655709928837683,
"grad_norm": 1.7231337811365375,
"learning_rate": 1.6883468834688346e-06,
"loss": 0.1934,
"mean_token_accuracy": 0.9430926144123077,
"num_tokens": 34466803.0,
"step": 231
},
{
"epoch": 0.15723483564893256,
"grad_norm": 1.7500461493474793,
"learning_rate": 1.686991869918699e-06,
"loss": 0.1955,
"mean_token_accuracy": 0.9432472810149193,
"num_tokens": 34614121.0,
"step": 232
},
{
"epoch": 0.1579125720094883,
"grad_norm": 1.8031685403461577,
"learning_rate": 1.6856368563685636e-06,
"loss": 0.1995,
"mean_token_accuracy": 0.9413378089666367,
"num_tokens": 34768119.0,
"step": 233
},
{
"epoch": 0.15859030837004406,
"grad_norm": 1.5907404865528574,
"learning_rate": 1.684281842818428e-06,
"loss": 0.177,
"mean_token_accuracy": 0.9471688345074654,
"num_tokens": 34916728.0,
"step": 234
},
{
"epoch": 0.1592680447305998,
"grad_norm": 1.9739387942567423,
"learning_rate": 1.6829268292682926e-06,
"loss": 0.1968,
"mean_token_accuracy": 0.942169301211834,
"num_tokens": 35064413.0,
"step": 235
},
{
"epoch": 0.15994578109115554,
"grad_norm": 1.8386525906268687,
"learning_rate": 1.6815718157181572e-06,
"loss": 0.203,
"mean_token_accuracy": 0.9395218342542648,
"num_tokens": 35215765.0,
"step": 236
},
{
"epoch": 0.16062351745171127,
"grad_norm": 1.429285344989454,
"learning_rate": 1.6802168021680217e-06,
"loss": 0.1981,
"mean_token_accuracy": 0.9406212717294693,
"num_tokens": 35359898.0,
"step": 237
},
{
"epoch": 0.16130125381226704,
"grad_norm": 1.4973311916596244,
"learning_rate": 1.6788617886178862e-06,
"loss": 0.1949,
"mean_token_accuracy": 0.9427109137177467,
"num_tokens": 35507980.0,
"step": 238
},
{
"epoch": 0.16197899017282277,
"grad_norm": 1.4499758005643015,
"learning_rate": 1.6775067750677505e-06,
"loss": 0.1906,
"mean_token_accuracy": 0.9435393437743187,
"num_tokens": 35660165.0,
"step": 239
},
{
"epoch": 0.1626567265333785,
"grad_norm": 1.380569873662013,
"learning_rate": 1.676151761517615e-06,
"loss": 0.1961,
"mean_token_accuracy": 0.9422763735055923,
"num_tokens": 35808219.0,
"step": 240
},
{
"epoch": 0.16333446289393425,
"grad_norm": 1.4096173759739132,
"learning_rate": 1.6747967479674795e-06,
"loss": 0.191,
"mean_token_accuracy": 0.942513681948185,
"num_tokens": 35955460.0,
"step": 241
},
{
"epoch": 0.16401219925449,
"grad_norm": 1.4868939861076391,
"learning_rate": 1.673441734417344e-06,
"loss": 0.1988,
"mean_token_accuracy": 0.9399052634835243,
"num_tokens": 36103261.0,
"step": 242
},
{
"epoch": 0.16468993561504575,
"grad_norm": 1.4375727799110902,
"learning_rate": 1.6720867208672088e-06,
"loss": 0.1973,
"mean_token_accuracy": 0.9417018443346024,
"num_tokens": 36248189.0,
"step": 243
},
{
"epoch": 0.16536767197560148,
"grad_norm": 1.314070869390369,
"learning_rate": 1.670731707317073e-06,
"loss": 0.1893,
"mean_token_accuracy": 0.9426815882325172,
"num_tokens": 36393544.0,
"step": 244
},
{
"epoch": 0.16604540833615725,
"grad_norm": 1.3499549232471146,
"learning_rate": 1.6693766937669376e-06,
"loss": 0.1932,
"mean_token_accuracy": 0.9427401125431061,
"num_tokens": 36543518.0,
"step": 245
},
{
"epoch": 0.16672314469671298,
"grad_norm": 1.9729404580437042,
"learning_rate": 1.6680216802168021e-06,
"loss": 0.1859,
"mean_token_accuracy": 0.9439921900629997,
"num_tokens": 36694721.0,
"step": 246
},
{
"epoch": 0.16740088105726872,
"grad_norm": 1.3458282419895553,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.1958,
"mean_token_accuracy": 0.9415561556816101,
"num_tokens": 36841930.0,
"step": 247
},
{
"epoch": 0.16807861741782446,
"grad_norm": 1.3064160320363043,
"learning_rate": 1.665311653116531e-06,
"loss": 0.182,
"mean_token_accuracy": 0.9455081224441528,
"num_tokens": 36990131.0,
"step": 248
},
{
"epoch": 0.16875635377838022,
"grad_norm": 1.261676946128337,
"learning_rate": 1.6639566395663955e-06,
"loss": 0.1833,
"mean_token_accuracy": 0.944699801504612,
"num_tokens": 37137578.0,
"step": 249
},
{
"epoch": 0.16943409013893596,
"grad_norm": 1.2203540224127716,
"learning_rate": 1.66260162601626e-06,
"loss": 0.194,
"mean_token_accuracy": 0.9413745477795601,
"num_tokens": 37286524.0,
"step": 250
},
{
"epoch": 0.1701118264994917,
"grad_norm": 1.3724047567386704,
"learning_rate": 1.6612466124661245e-06,
"loss": 0.1891,
"mean_token_accuracy": 0.9431008100509644,
"num_tokens": 37438074.0,
"step": 251
},
{
"epoch": 0.17078956286004743,
"grad_norm": 1.3174705549054002,
"learning_rate": 1.6598915989159893e-06,
"loss": 0.1927,
"mean_token_accuracy": 0.9415145292878151,
"num_tokens": 37589233.0,
"step": 252
},
{
"epoch": 0.1714672992206032,
"grad_norm": 1.1838612410896918,
"learning_rate": 1.6585365853658536e-06,
"loss": 0.1837,
"mean_token_accuracy": 0.9449172392487526,
"num_tokens": 37739989.0,
"step": 253
},
{
"epoch": 0.17214503558115893,
"grad_norm": 1.2635613106193353,
"learning_rate": 1.657181571815718e-06,
"loss": 0.1877,
"mean_token_accuracy": 0.9437820985913277,
"num_tokens": 37893549.0,
"step": 254
},
{
"epoch": 0.17282277194171466,
"grad_norm": 1.282954852219653,
"learning_rate": 1.6558265582655826e-06,
"loss": 0.169,
"mean_token_accuracy": 0.9490370899438858,
"num_tokens": 38040724.0,
"step": 255
},
{
"epoch": 0.17350050830227043,
"grad_norm": 1.2530699631714797,
"learning_rate": 1.6544715447154471e-06,
"loss": 0.1827,
"mean_token_accuracy": 0.9446689784526825,
"num_tokens": 38188254.0,
"step": 256
},
{
"epoch": 0.17417824466282616,
"grad_norm": 1.100306273258646,
"learning_rate": 1.6531165311653114e-06,
"loss": 0.1815,
"mean_token_accuracy": 0.9455656632781029,
"num_tokens": 38330425.0,
"step": 257
},
{
"epoch": 0.1748559810233819,
"grad_norm": 1.5264947374361602,
"learning_rate": 1.651761517615176e-06,
"loss": 0.1861,
"mean_token_accuracy": 0.943930372595787,
"num_tokens": 38477837.0,
"step": 258
},
{
"epoch": 0.17553371738393764,
"grad_norm": 1.1532449440630994,
"learning_rate": 1.6504065040650405e-06,
"loss": 0.1861,
"mean_token_accuracy": 0.9444294348359108,
"num_tokens": 38626257.0,
"step": 259
},
{
"epoch": 0.1762114537444934,
"grad_norm": 1.6544229249884137,
"learning_rate": 1.6490514905149052e-06,
"loss": 0.1892,
"mean_token_accuracy": 0.9426649659872055,
"num_tokens": 38776569.0,
"step": 260
},
{
"epoch": 0.17688919010504914,
"grad_norm": 1.0694243551952667,
"learning_rate": 1.6476964769647697e-06,
"loss": 0.2002,
"mean_token_accuracy": 0.9409672617912292,
"num_tokens": 38925925.0,
"step": 261
},
{
"epoch": 0.17756692646560487,
"grad_norm": 1.3080074316972092,
"learning_rate": 1.646341463414634e-06,
"loss": 0.1925,
"mean_token_accuracy": 0.9425214007496834,
"num_tokens": 39077640.0,
"step": 262
},
{
"epoch": 0.1782446628261606,
"grad_norm": 1.3980933975114367,
"learning_rate": 1.6449864498644986e-06,
"loss": 0.1806,
"mean_token_accuracy": 0.9455323368310928,
"num_tokens": 39224918.0,
"step": 263
},
{
"epoch": 0.17892239918671637,
"grad_norm": 1.1857997540693603,
"learning_rate": 1.643631436314363e-06,
"loss": 0.1884,
"mean_token_accuracy": 0.9434381946921349,
"num_tokens": 39376315.0,
"step": 264
},
{
"epoch": 0.1796001355472721,
"grad_norm": 1.134544558489507,
"learning_rate": 1.6422764227642276e-06,
"loss": 0.1791,
"mean_token_accuracy": 0.9452700912952423,
"num_tokens": 39527098.0,
"step": 265
},
{
"epoch": 0.18027787190782785,
"grad_norm": 1.020597711825539,
"learning_rate": 1.6409214092140921e-06,
"loss": 0.1774,
"mean_token_accuracy": 0.9458664432168007,
"num_tokens": 39677407.0,
"step": 266
},
{
"epoch": 0.1809556082683836,
"grad_norm": 1.0196216805968796,
"learning_rate": 1.6395663956639564e-06,
"loss": 0.1938,
"mean_token_accuracy": 0.9419092014431953,
"num_tokens": 39824965.0,
"step": 267
},
{
"epoch": 0.18163334462893935,
"grad_norm": 1.0375652692946211,
"learning_rate": 1.638211382113821e-06,
"loss": 0.1725,
"mean_token_accuracy": 0.9472092837095261,
"num_tokens": 39973184.0,
"step": 268
},
{
"epoch": 0.18231108098949508,
"grad_norm": 1.069543030178723,
"learning_rate": 1.6368563685636857e-06,
"loss": 0.1835,
"mean_token_accuracy": 0.9444401264190674,
"num_tokens": 40126749.0,
"step": 269
},
{
"epoch": 0.18298881735005082,
"grad_norm": 2.6087291009006095,
"learning_rate": 1.6355013550135502e-06,
"loss": 0.1849,
"mean_token_accuracy": 0.9445175155997276,
"num_tokens": 40274296.0,
"step": 270
},
{
"epoch": 0.18366655371060658,
"grad_norm": 1.145610129912151,
"learning_rate": 1.6341463414634145e-06,
"loss": 0.1763,
"mean_token_accuracy": 0.9467235654592514,
"num_tokens": 40424327.0,
"step": 271
},
{
"epoch": 0.18434429007116232,
"grad_norm": 0.9442070840190216,
"learning_rate": 1.632791327913279e-06,
"loss": 0.1799,
"mean_token_accuracy": 0.9448632001876831,
"num_tokens": 40576373.0,
"step": 272
},
{
"epoch": 0.18502202643171806,
"grad_norm": 1.1101817736320692,
"learning_rate": 1.6314363143631435e-06,
"loss": 0.1864,
"mean_token_accuracy": 0.9435540661215782,
"num_tokens": 40726947.0,
"step": 273
},
{
"epoch": 0.1856997627922738,
"grad_norm": 1.0278047553144887,
"learning_rate": 1.630081300813008e-06,
"loss": 0.1857,
"mean_token_accuracy": 0.9442450702190399,
"num_tokens": 40881512.0,
"step": 274
},
{
"epoch": 0.18637749915282956,
"grad_norm": 1.0047281423735306,
"learning_rate": 1.6287262872628726e-06,
"loss": 0.1781,
"mean_token_accuracy": 0.9466114342212677,
"num_tokens": 41032071.0,
"step": 275
},
{
"epoch": 0.1870552355133853,
"grad_norm": 0.9207829057447015,
"learning_rate": 1.6273712737127369e-06,
"loss": 0.1793,
"mean_token_accuracy": 0.9454649612307549,
"num_tokens": 41178572.0,
"step": 276
},
{
"epoch": 0.18773297187394103,
"grad_norm": 1.0459821704366787,
"learning_rate": 1.6260162601626016e-06,
"loss": 0.1714,
"mean_token_accuracy": 0.9479309245944023,
"num_tokens": 41326245.0,
"step": 277
},
{
"epoch": 0.1884107082344968,
"grad_norm": 0.8895774959436806,
"learning_rate": 1.6246612466124661e-06,
"loss": 0.1895,
"mean_token_accuracy": 0.9422756433486938,
"num_tokens": 41477100.0,
"step": 278
},
{
"epoch": 0.18908844459505253,
"grad_norm": 1.0128601516532234,
"learning_rate": 1.6233062330623307e-06,
"loss": 0.1849,
"mean_token_accuracy": 0.9443082809448242,
"num_tokens": 41630341.0,
"step": 279
},
{
"epoch": 0.18976618095560827,
"grad_norm": 2.0424823903284413,
"learning_rate": 1.6219512195121952e-06,
"loss": 0.1851,
"mean_token_accuracy": 0.9452182948589325,
"num_tokens": 41780343.0,
"step": 280
},
{
"epoch": 0.190443917316164,
"grad_norm": 0.8145971834620163,
"learning_rate": 1.6205962059620595e-06,
"loss": 0.1792,
"mean_token_accuracy": 0.9458454251289368,
"num_tokens": 41933317.0,
"step": 281
},
{
"epoch": 0.19112165367671977,
"grad_norm": 1.078154403405584,
"learning_rate": 1.619241192411924e-06,
"loss": 0.182,
"mean_token_accuracy": 0.9441016316413879,
"num_tokens": 42079542.0,
"step": 282
},
{
"epoch": 0.1917993900372755,
"grad_norm": 0.8513071000906206,
"learning_rate": 1.6178861788617885e-06,
"loss": 0.1886,
"mean_token_accuracy": 0.9428616538643837,
"num_tokens": 42228028.0,
"step": 283
},
{
"epoch": 0.19247712639783124,
"grad_norm": 0.8207607308186832,
"learning_rate": 1.616531165311653e-06,
"loss": 0.1873,
"mean_token_accuracy": 0.9433973506093025,
"num_tokens": 42377327.0,
"step": 284
},
{
"epoch": 0.19315486275838697,
"grad_norm": 0.9210360751803185,
"learning_rate": 1.6151761517615173e-06,
"loss": 0.182,
"mean_token_accuracy": 0.9449851289391518,
"num_tokens": 42529591.0,
"step": 285
},
{
"epoch": 0.19383259911894274,
"grad_norm": 1.2912492016134,
"learning_rate": 1.613821138211382e-06,
"loss": 0.1852,
"mean_token_accuracy": 0.9434697777032852,
"num_tokens": 42676420.0,
"step": 286
},
{
"epoch": 0.19451033547949848,
"grad_norm": 0.8456164080511251,
"learning_rate": 1.6124661246612466e-06,
"loss": 0.1909,
"mean_token_accuracy": 0.9428432658314705,
"num_tokens": 42827210.0,
"step": 287
},
{
"epoch": 0.1951880718400542,
"grad_norm": 0.9723505555850847,
"learning_rate": 1.6111111111111111e-06,
"loss": 0.1817,
"mean_token_accuracy": 0.9461212381720543,
"num_tokens": 42976660.0,
"step": 288
},
{
"epoch": 0.19586580820060998,
"grad_norm": 0.8654379574437804,
"learning_rate": 1.6097560975609756e-06,
"loss": 0.1815,
"mean_token_accuracy": 0.9445760920643806,
"num_tokens": 43125512.0,
"step": 289
},
{
"epoch": 0.1965435445611657,
"grad_norm": 0.8652948188343491,
"learning_rate": 1.60840108401084e-06,
"loss": 0.1807,
"mean_token_accuracy": 0.9456342980265617,
"num_tokens": 43277205.0,
"step": 290
},
{
"epoch": 0.19722128092172145,
"grad_norm": 0.8533413578650831,
"learning_rate": 1.6070460704607045e-06,
"loss": 0.1835,
"mean_token_accuracy": 0.9441514536738396,
"num_tokens": 43423965.0,
"step": 291
},
{
"epoch": 0.19789901728227718,
"grad_norm": 2.515713642686306,
"learning_rate": 1.605691056910569e-06,
"loss": 0.1967,
"mean_token_accuracy": 0.9408286511898041,
"num_tokens": 43575854.0,
"step": 292
},
{
"epoch": 0.19857675364283295,
"grad_norm": 0.795648973749908,
"learning_rate": 1.6043360433604335e-06,
"loss": 0.1813,
"mean_token_accuracy": 0.9449711665511131,
"num_tokens": 43721651.0,
"step": 293
},
{
"epoch": 0.19925449000338868,
"grad_norm": 0.8159918175759459,
"learning_rate": 1.602981029810298e-06,
"loss": 0.1799,
"mean_token_accuracy": 0.9449080228805542,
"num_tokens": 43869979.0,
"step": 294
},
{
"epoch": 0.19993222636394442,
"grad_norm": 0.8163105160373558,
"learning_rate": 1.6016260162601625e-06,
"loss": 0.1567,
"mean_token_accuracy": 0.952298603951931,
"num_tokens": 44018298.0,
"step": 295
},
{
"epoch": 0.20060996272450016,
"grad_norm": 0.8492083507814324,
"learning_rate": 1.600271002710027e-06,
"loss": 0.1937,
"mean_token_accuracy": 0.9416857361793518,
"num_tokens": 44172138.0,
"step": 296
},
{
"epoch": 0.20128769908505592,
"grad_norm": 0.8226773724748727,
"learning_rate": 1.5989159891598916e-06,
"loss": 0.1756,
"mean_token_accuracy": 0.9463808164000511,
"num_tokens": 44322425.0,
"step": 297
},
{
"epoch": 0.20196543544561166,
"grad_norm": 0.8167617610295194,
"learning_rate": 1.597560975609756e-06,
"loss": 0.1752,
"mean_token_accuracy": 0.9460206627845764,
"num_tokens": 44471714.0,
"step": 298
},
{
"epoch": 0.2026431718061674,
"grad_norm": 0.8121474009282322,
"learning_rate": 1.5962059620596204e-06,
"loss": 0.1793,
"mean_token_accuracy": 0.945349395275116,
"num_tokens": 44620706.0,
"step": 299
},
{
"epoch": 0.20332090816672316,
"grad_norm": 0.8203925606623038,
"learning_rate": 1.594850948509485e-06,
"loss": 0.1798,
"mean_token_accuracy": 0.9457396641373634,
"num_tokens": 44768001.0,
"step": 300
},
{
"epoch": 0.2039986445272789,
"grad_norm": 1.1162539595032475,
"learning_rate": 1.5934959349593495e-06,
"loss": 0.183,
"mean_token_accuracy": 0.9439538642764091,
"num_tokens": 44917279.0,
"step": 301
},
{
"epoch": 0.20467638088783463,
"grad_norm": 0.9438985030858859,
"learning_rate": 1.592140921409214e-06,
"loss": 0.175,
"mean_token_accuracy": 0.9461761340498924,
"num_tokens": 45064408.0,
"step": 302
},
{
"epoch": 0.20535411724839037,
"grad_norm": 0.7513340518725357,
"learning_rate": 1.5907859078590787e-06,
"loss": 0.1854,
"mean_token_accuracy": 0.943101279437542,
"num_tokens": 45211245.0,
"step": 303
},
{
"epoch": 0.20603185360894613,
"grad_norm": 0.7923678369199532,
"learning_rate": 1.589430894308943e-06,
"loss": 0.1961,
"mean_token_accuracy": 0.9411340057849884,
"num_tokens": 45360319.0,
"step": 304
},
{
"epoch": 0.20670958996950187,
"grad_norm": 0.8234845112435402,
"learning_rate": 1.5880758807588075e-06,
"loss": 0.1754,
"mean_token_accuracy": 0.9461505860090256,
"num_tokens": 45508437.0,
"step": 305
},
{
"epoch": 0.2073873263300576,
"grad_norm": 0.8035375957173344,
"learning_rate": 1.586720867208672e-06,
"loss": 0.175,
"mean_token_accuracy": 0.9456483274698257,
"num_tokens": 45657849.0,
"step": 306
},
{
"epoch": 0.20806506269061334,
"grad_norm": 0.7798304235915292,
"learning_rate": 1.5853658536585366e-06,
"loss": 0.1781,
"mean_token_accuracy": 0.9458611235022545,
"num_tokens": 45806799.0,
"step": 307
},
{
"epoch": 0.2087427990511691,
"grad_norm": 1.027733000497745,
"learning_rate": 1.5840108401084009e-06,
"loss": 0.1792,
"mean_token_accuracy": 0.9450256898999214,
"num_tokens": 45956514.0,
"step": 308
},
{
"epoch": 0.20942053541172484,
"grad_norm": 0.9708179518638089,
"learning_rate": 1.5826558265582654e-06,
"loss": 0.1672,
"mean_token_accuracy": 0.9480339214205742,
"num_tokens": 46101153.0,
"step": 309
},
{
"epoch": 0.21009827177228058,
"grad_norm": 1.0101673484960263,
"learning_rate": 1.58130081300813e-06,
"loss": 0.1745,
"mean_token_accuracy": 0.9472529590129852,
"num_tokens": 46250830.0,
"step": 310
},
{
"epoch": 0.21077600813283634,
"grad_norm": 0.8055269982671405,
"learning_rate": 1.5799457994579946e-06,
"loss": 0.1857,
"mean_token_accuracy": 0.9427757039666176,
"num_tokens": 46399689.0,
"step": 311
},
{
"epoch": 0.21145374449339208,
"grad_norm": 0.8131150374890923,
"learning_rate": 1.5785907859078592e-06,
"loss": 0.1828,
"mean_token_accuracy": 0.9429053366184235,
"num_tokens": 46550048.0,
"step": 312
},
{
"epoch": 0.2121314808539478,
"grad_norm": 0.9916815324998841,
"learning_rate": 1.5772357723577235e-06,
"loss": 0.1849,
"mean_token_accuracy": 0.9444558545947075,
"num_tokens": 46700214.0,
"step": 313
},
{
"epoch": 0.21280921721450355,
"grad_norm": 0.8218886276826911,
"learning_rate": 1.575880758807588e-06,
"loss": 0.1757,
"mean_token_accuracy": 0.9461935609579086,
"num_tokens": 46850904.0,
"step": 314
},
{
"epoch": 0.2134869535750593,
"grad_norm": 1.5051826126447845,
"learning_rate": 1.5745257452574525e-06,
"loss": 0.1942,
"mean_token_accuracy": 0.9406725689768791,
"num_tokens": 46995645.0,
"step": 315
},
{
"epoch": 0.21416468993561505,
"grad_norm": 1.1804298128417674,
"learning_rate": 1.573170731707317e-06,
"loss": 0.1697,
"mean_token_accuracy": 0.9470863491296768,
"num_tokens": 47144834.0,
"step": 316
},
{
"epoch": 0.21484242629617079,
"grad_norm": 1.8709738855807754,
"learning_rate": 1.5718157181571813e-06,
"loss": 0.1767,
"mean_token_accuracy": 0.9464762061834335,
"num_tokens": 47295935.0,
"step": 317
},
{
"epoch": 0.21552016265672652,
"grad_norm": 0.9948866957339356,
"learning_rate": 1.5704607046070459e-06,
"loss": 0.1926,
"mean_token_accuracy": 0.9422658383846283,
"num_tokens": 47448767.0,
"step": 318
},
{
"epoch": 0.21619789901728229,
"grad_norm": 0.6859056336958299,
"learning_rate": 1.5691056910569104e-06,
"loss": 0.1768,
"mean_token_accuracy": 0.9466421827673912,
"num_tokens": 47597776.0,
"step": 319
},
{
"epoch": 0.21687563537783802,
"grad_norm": 0.9534355123013837,
"learning_rate": 1.5677506775067751e-06,
"loss": 0.1741,
"mean_token_accuracy": 0.9466301873326302,
"num_tokens": 47749593.0,
"step": 320
},
{
"epoch": 0.21755337173839376,
"grad_norm": 0.6601057176223992,
"learning_rate": 1.5663956639566396e-06,
"loss": 0.1706,
"mean_token_accuracy": 0.9476458579301834,
"num_tokens": 47899535.0,
"step": 321
},
{
"epoch": 0.21823110809894952,
"grad_norm": 1.4849205670776673,
"learning_rate": 1.565040650406504e-06,
"loss": 0.1831,
"mean_token_accuracy": 0.9445011243224144,
"num_tokens": 48047352.0,
"step": 322
},
{
"epoch": 0.21890884445950526,
"grad_norm": 0.743404066147728,
"learning_rate": 1.5636856368563685e-06,
"loss": 0.1804,
"mean_token_accuracy": 0.9452903419733047,
"num_tokens": 48197212.0,
"step": 323
},
{
"epoch": 0.219586580820061,
"grad_norm": 0.6631613920135498,
"learning_rate": 1.562330623306233e-06,
"loss": 0.167,
"mean_token_accuracy": 0.9489706978201866,
"num_tokens": 48347691.0,
"step": 324
},
{
"epoch": 0.22026431718061673,
"grad_norm": 0.6597316081566086,
"learning_rate": 1.5609756097560975e-06,
"loss": 0.1734,
"mean_token_accuracy": 0.9466495141386986,
"num_tokens": 48494182.0,
"step": 325
},
{
"epoch": 0.2209420535411725,
"grad_norm": 0.7592815379964866,
"learning_rate": 1.559620596205962e-06,
"loss": 0.1875,
"mean_token_accuracy": 0.9426940456032753,
"num_tokens": 48644558.0,
"step": 326
},
{
"epoch": 0.22161978990172823,
"grad_norm": 0.7647348178811313,
"learning_rate": 1.5582655826558263e-06,
"loss": 0.1738,
"mean_token_accuracy": 0.9461934119462967,
"num_tokens": 48795672.0,
"step": 327
},
{
"epoch": 0.22229752626228397,
"grad_norm": 0.7808653399303861,
"learning_rate": 1.556910569105691e-06,
"loss": 0.1751,
"mean_token_accuracy": 0.9463725537061691,
"num_tokens": 48943974.0,
"step": 328
},
{
"epoch": 0.2229752626228397,
"grad_norm": 0.756952990843898,
"learning_rate": 1.5555555555555556e-06,
"loss": 0.175,
"mean_token_accuracy": 0.9455836415290833,
"num_tokens": 49098021.0,
"step": 329
},
{
"epoch": 0.22365299898339547,
"grad_norm": 0.709020202356822,
"learning_rate": 1.55420054200542e-06,
"loss": 0.1772,
"mean_token_accuracy": 0.9447627812623978,
"num_tokens": 49244636.0,
"step": 330
},
{
"epoch": 0.2243307353439512,
"grad_norm": 2.5406344783012953,
"learning_rate": 1.5528455284552844e-06,
"loss": 0.1845,
"mean_token_accuracy": 0.9430216625332832,
"num_tokens": 49393214.0,
"step": 331
},
{
"epoch": 0.22500847170450694,
"grad_norm": 1.8544287599202158,
"learning_rate": 1.551490514905149e-06,
"loss": 0.1702,
"mean_token_accuracy": 0.9466702789068222,
"num_tokens": 49543539.0,
"step": 332
},
{
"epoch": 0.2256862080650627,
"grad_norm": 1.2263258425274202,
"learning_rate": 1.5501355013550134e-06,
"loss": 0.1733,
"mean_token_accuracy": 0.9458054676651955,
"num_tokens": 49694060.0,
"step": 333
},
{
"epoch": 0.22636394442561844,
"grad_norm": 0.8958912135833915,
"learning_rate": 1.548780487804878e-06,
"loss": 0.1793,
"mean_token_accuracy": 0.9444833248853683,
"num_tokens": 49838822.0,
"step": 334
},
{
"epoch": 0.22704168078617418,
"grad_norm": 0.7500636106488427,
"learning_rate": 1.5474254742547425e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9464510008692741,
"num_tokens": 49993407.0,
"step": 335
},
{
"epoch": 0.2277194171467299,
"grad_norm": 0.8422231147582026,
"learning_rate": 1.5460704607046068e-06,
"loss": 0.1649,
"mean_token_accuracy": 0.9492187052965164,
"num_tokens": 50139231.0,
"step": 336
},
{
"epoch": 0.22839715350728568,
"grad_norm": 0.7618479553430881,
"learning_rate": 1.5447154471544715e-06,
"loss": 0.1649,
"mean_token_accuracy": 0.9489440247416496,
"num_tokens": 50285172.0,
"step": 337
},
{
"epoch": 0.2290748898678414,
"grad_norm": 3.9635844853206668,
"learning_rate": 1.543360433604336e-06,
"loss": 0.1815,
"mean_token_accuracy": 0.9444872289896011,
"num_tokens": 50434641.0,
"step": 338
},
{
"epoch": 0.22975262622839715,
"grad_norm": 1.5173197037721815,
"learning_rate": 1.5420054200542006e-06,
"loss": 0.1751,
"mean_token_accuracy": 0.9459080919623375,
"num_tokens": 50586423.0,
"step": 339
},
{
"epoch": 0.2304303625889529,
"grad_norm": 0.8715123124413551,
"learning_rate": 1.540650406504065e-06,
"loss": 0.1815,
"mean_token_accuracy": 0.9437100365757942,
"num_tokens": 50739282.0,
"step": 340
},
{
"epoch": 0.23110809894950865,
"grad_norm": 0.713851985898761,
"learning_rate": 1.5392953929539294e-06,
"loss": 0.1737,
"mean_token_accuracy": 0.9452726021409035,
"num_tokens": 50889759.0,
"step": 341
},
{
"epoch": 0.2317858353100644,
"grad_norm": 1.7227816114940597,
"learning_rate": 1.537940379403794e-06,
"loss": 0.1782,
"mean_token_accuracy": 0.9447159990668297,
"num_tokens": 51039115.0,
"step": 342
},
{
"epoch": 0.23246357167062012,
"grad_norm": 0.6357869321933041,
"learning_rate": 1.5365853658536584e-06,
"loss": 0.1654,
"mean_token_accuracy": 0.9484410360455513,
"num_tokens": 51191408.0,
"step": 343
},
{
"epoch": 0.23314130803117586,
"grad_norm": 0.7641072965945757,
"learning_rate": 1.535230352303523e-06,
"loss": 0.1788,
"mean_token_accuracy": 0.9448902904987335,
"num_tokens": 51342797.0,
"step": 344
},
{
"epoch": 0.23381904439173162,
"grad_norm": 1.426367810145877,
"learning_rate": 1.5338753387533875e-06,
"loss": 0.1669,
"mean_token_accuracy": 0.9472499415278435,
"num_tokens": 51494034.0,
"step": 345
},
{
"epoch": 0.23449678075228736,
"grad_norm": 0.8277340659798751,
"learning_rate": 1.532520325203252e-06,
"loss": 0.1779,
"mean_token_accuracy": 0.9454124942421913,
"num_tokens": 51645287.0,
"step": 346
},
{
"epoch": 0.2351745171128431,
"grad_norm": 0.6225560087724911,
"learning_rate": 1.5311653116531165e-06,
"loss": 0.1782,
"mean_token_accuracy": 0.944624200463295,
"num_tokens": 51794927.0,
"step": 347
},
{
"epoch": 0.23585225347339886,
"grad_norm": 0.6105718263558104,
"learning_rate": 1.529810298102981e-06,
"loss": 0.1676,
"mean_token_accuracy": 0.9470618143677711,
"num_tokens": 51949310.0,
"step": 348
},
{
"epoch": 0.2365299898339546,
"grad_norm": 0.8282440344375993,
"learning_rate": 1.5284552845528455e-06,
"loss": 0.1836,
"mean_token_accuracy": 0.9437334463000298,
"num_tokens": 52098351.0,
"step": 349
},
{
"epoch": 0.23720772619451033,
"grad_norm": 0.8045671772384535,
"learning_rate": 1.5271002710027099e-06,
"loss": 0.1684,
"mean_token_accuracy": 0.9471992254257202,
"num_tokens": 52243926.0,
"step": 350
},
{
"epoch": 0.23788546255506607,
"grad_norm": 0.7650637135387357,
"learning_rate": 1.5257452574525744e-06,
"loss": 0.1666,
"mean_token_accuracy": 0.9483982622623444,
"num_tokens": 52392176.0,
"step": 351
},
{
"epoch": 0.23856319891562183,
"grad_norm": 0.5877138058616375,
"learning_rate": 1.524390243902439e-06,
"loss": 0.1664,
"mean_token_accuracy": 0.9483258500695229,
"num_tokens": 52539091.0,
"step": 352
},
{
"epoch": 0.23924093527617757,
"grad_norm": 0.9492412162850925,
"learning_rate": 1.5230352303523036e-06,
"loss": 0.1798,
"mean_token_accuracy": 0.94503004103899,
"num_tokens": 52685643.0,
"step": 353
},
{
"epoch": 0.2399186716367333,
"grad_norm": 0.7914693335736157,
"learning_rate": 1.521680216802168e-06,
"loss": 0.1685,
"mean_token_accuracy": 0.9475009217858315,
"num_tokens": 52836703.0,
"step": 354
},
{
"epoch": 0.24059640799728904,
"grad_norm": 0.6318669285353299,
"learning_rate": 1.5203252032520325e-06,
"loss": 0.1826,
"mean_token_accuracy": 0.9431716948747635,
"num_tokens": 52987407.0,
"step": 355
},
{
"epoch": 0.2412741443578448,
"grad_norm": 0.6525553458237484,
"learning_rate": 1.518970189701897e-06,
"loss": 0.1719,
"mean_token_accuracy": 0.9459177628159523,
"num_tokens": 53138015.0,
"step": 356
},
{
"epoch": 0.24195188071840054,
"grad_norm": 1.0182590701495176,
"learning_rate": 1.5176151761517615e-06,
"loss": 0.1714,
"mean_token_accuracy": 0.9465559273958206,
"num_tokens": 53286706.0,
"step": 357
},
{
"epoch": 0.24262961707895628,
"grad_norm": 0.7419379260166533,
"learning_rate": 1.516260162601626e-06,
"loss": 0.1688,
"mean_token_accuracy": 0.9479071423411369,
"num_tokens": 53435927.0,
"step": 358
},
{
"epoch": 0.24330735343951204,
"grad_norm": 0.6434143063693157,
"learning_rate": 1.5149051490514903e-06,
"loss": 0.1639,
"mean_token_accuracy": 0.9488667771220207,
"num_tokens": 53583285.0,
"step": 359
},
{
"epoch": 0.24398508980006778,
"grad_norm": 0.7064489736323913,
"learning_rate": 1.5135501355013548e-06,
"loss": 0.1712,
"mean_token_accuracy": 0.9466379284858704,
"num_tokens": 53729652.0,
"step": 360
},
{
"epoch": 0.24466282616062351,
"grad_norm": 0.57356802923614,
"learning_rate": 1.5121951219512194e-06,
"loss": 0.1814,
"mean_token_accuracy": 0.9437981992959976,
"num_tokens": 53879309.0,
"step": 361
},
{
"epoch": 0.24534056252117925,
"grad_norm": 0.6642508244898545,
"learning_rate": 1.510840108401084e-06,
"loss": 0.1743,
"mean_token_accuracy": 0.945423923432827,
"num_tokens": 54026614.0,
"step": 362
},
{
"epoch": 0.24601829888173501,
"grad_norm": 0.7069645718402103,
"learning_rate": 1.5094850948509486e-06,
"loss": 0.1613,
"mean_token_accuracy": 0.9490662589669228,
"num_tokens": 54176925.0,
"step": 363
},
{
"epoch": 0.24669603524229075,
"grad_norm": 0.6324630784607443,
"learning_rate": 1.508130081300813e-06,
"loss": 0.1789,
"mean_token_accuracy": 0.9442641958594322,
"num_tokens": 54325352.0,
"step": 364
},
{
"epoch": 0.2473737716028465,
"grad_norm": 0.8384176307680217,
"learning_rate": 1.5067750677506774e-06,
"loss": 0.1807,
"mean_token_accuracy": 0.9429530650377274,
"num_tokens": 54470233.0,
"step": 365
},
{
"epoch": 0.24805150796340222,
"grad_norm": 0.6954590281573189,
"learning_rate": 1.505420054200542e-06,
"loss": 0.1755,
"mean_token_accuracy": 0.9451927840709686,
"num_tokens": 54618289.0,
"step": 366
},
{
"epoch": 0.248729244323958,
"grad_norm": 0.6399857892270593,
"learning_rate": 1.5040650406504065e-06,
"loss": 0.1776,
"mean_token_accuracy": 0.9442391991615295,
"num_tokens": 54769472.0,
"step": 367
},
{
"epoch": 0.24940698068451372,
"grad_norm": 0.5810678934700249,
"learning_rate": 1.5027100271002708e-06,
"loss": 0.1806,
"mean_token_accuracy": 0.9441807121038437,
"num_tokens": 54918059.0,
"step": 368
},
{
"epoch": 0.25008471704506946,
"grad_norm": 0.5748648220171113,
"learning_rate": 1.5013550135501353e-06,
"loss": 0.1702,
"mean_token_accuracy": 0.9472423642873764,
"num_tokens": 55061935.0,
"step": 369
},
{
"epoch": 0.2507624534056252,
"grad_norm": 0.6204651627049123,
"learning_rate": 1.5e-06,
"loss": 0.1779,
"mean_token_accuracy": 0.9443566277623177,
"num_tokens": 55207909.0,
"step": 370
},
{
"epoch": 0.25144018976618093,
"grad_norm": 1.9313730908634967,
"learning_rate": 1.4986449864498646e-06,
"loss": 0.1668,
"mean_token_accuracy": 0.9468053802847862,
"num_tokens": 55359342.0,
"step": 371
},
{
"epoch": 0.2521179261267367,
"grad_norm": 0.7053643966816144,
"learning_rate": 1.497289972899729e-06,
"loss": 0.1694,
"mean_token_accuracy": 0.9480549320578575,
"num_tokens": 55509924.0,
"step": 372
},
{
"epoch": 0.25279566248729246,
"grad_norm": 0.5711946424889177,
"learning_rate": 1.4959349593495934e-06,
"loss": 0.1761,
"mean_token_accuracy": 0.9459714740514755,
"num_tokens": 55655563.0,
"step": 373
},
{
"epoch": 0.2534733988478482,
"grad_norm": 0.7057929562458208,
"learning_rate": 1.494579945799458e-06,
"loss": 0.1861,
"mean_token_accuracy": 0.9424010515213013,
"num_tokens": 55805408.0,
"step": 374
},
{
"epoch": 0.25415113520840393,
"grad_norm": 0.6779480981631576,
"learning_rate": 1.4932249322493224e-06,
"loss": 0.1647,
"mean_token_accuracy": 0.9481048583984375,
"num_tokens": 55954355.0,
"step": 375
},
{
"epoch": 0.25482887156895967,
"grad_norm": 0.6446360496909933,
"learning_rate": 1.491869918699187e-06,
"loss": 0.1738,
"mean_token_accuracy": 0.9460531622171402,
"num_tokens": 56104752.0,
"step": 376
},
{
"epoch": 0.2555066079295154,
"grad_norm": 0.5592091503855612,
"learning_rate": 1.4905149051490513e-06,
"loss": 0.1686,
"mean_token_accuracy": 0.9460347890853882,
"num_tokens": 56253865.0,
"step": 377
},
{
"epoch": 0.25618434429007114,
"grad_norm": 0.7691679868780944,
"learning_rate": 1.4891598915989158e-06,
"loss": 0.1762,
"mean_token_accuracy": 0.9456475153565407,
"num_tokens": 56401545.0,
"step": 378
},
{
"epoch": 0.25686208065062693,
"grad_norm": 0.5847739125186556,
"learning_rate": 1.4878048780487805e-06,
"loss": 0.1744,
"mean_token_accuracy": 0.9454444199800491,
"num_tokens": 56548503.0,
"step": 379
},
{
"epoch": 0.25753981701118267,
"grad_norm": 0.736275038780874,
"learning_rate": 1.486449864498645e-06,
"loss": 0.1673,
"mean_token_accuracy": 0.9481697604060173,
"num_tokens": 56698713.0,
"step": 380
},
{
"epoch": 0.2582175533717384,
"grad_norm": 0.5901455728571308,
"learning_rate": 1.4850948509485095e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.9482420459389687,
"num_tokens": 56842679.0,
"step": 381
},
{
"epoch": 0.25889528973229414,
"grad_norm": 0.6090115372442366,
"learning_rate": 1.4837398373983739e-06,
"loss": 0.1674,
"mean_token_accuracy": 0.9481484293937683,
"num_tokens": 56990745.0,
"step": 382
},
{
"epoch": 0.2595730260928499,
"grad_norm": 0.617661866412643,
"learning_rate": 1.4823848238482384e-06,
"loss": 0.1626,
"mean_token_accuracy": 0.9488430246710777,
"num_tokens": 57139222.0,
"step": 383
},
{
"epoch": 0.2602507624534056,
"grad_norm": 0.700865862234029,
"learning_rate": 1.4810298102981029e-06,
"loss": 0.1646,
"mean_token_accuracy": 0.9490372464060783,
"num_tokens": 57285381.0,
"step": 384
},
{
"epoch": 0.26092849881396135,
"grad_norm": 0.6651593095325409,
"learning_rate": 1.4796747967479674e-06,
"loss": 0.1764,
"mean_token_accuracy": 0.9455696046352386,
"num_tokens": 57436822.0,
"step": 385
},
{
"epoch": 0.2616062351745171,
"grad_norm": 0.5245056227383542,
"learning_rate": 1.478319783197832e-06,
"loss": 0.164,
"mean_token_accuracy": 0.9481494948267937,
"num_tokens": 57580522.0,
"step": 386
},
{
"epoch": 0.2622839715350729,
"grad_norm": 0.7261567302725815,
"learning_rate": 1.4769647696476962e-06,
"loss": 0.1735,
"mean_token_accuracy": 0.9464136362075806,
"num_tokens": 57726389.0,
"step": 387
},
{
"epoch": 0.2629617078956286,
"grad_norm": 0.5153986387448213,
"learning_rate": 1.475609756097561e-06,
"loss": 0.1648,
"mean_token_accuracy": 0.9486708492040634,
"num_tokens": 57876226.0,
"step": 388
},
{
"epoch": 0.26363944425618435,
"grad_norm": 0.5414010058573772,
"learning_rate": 1.4742547425474255e-06,
"loss": 0.1772,
"mean_token_accuracy": 0.9455103054642677,
"num_tokens": 58027117.0,
"step": 389
},
{
"epoch": 0.2643171806167401,
"grad_norm": 0.628858128487318,
"learning_rate": 1.47289972899729e-06,
"loss": 0.1718,
"mean_token_accuracy": 0.9465411677956581,
"num_tokens": 58173593.0,
"step": 390
},
{
"epoch": 0.2649949169772958,
"grad_norm": 1.1610350160200895,
"learning_rate": 1.4715447154471543e-06,
"loss": 0.1924,
"mean_token_accuracy": 0.9406460449099541,
"num_tokens": 58316482.0,
"step": 391
},
{
"epoch": 0.26567265333785156,
"grad_norm": 0.5892258484112674,
"learning_rate": 1.4701897018970188e-06,
"loss": 0.1781,
"mean_token_accuracy": 0.9436300992965698,
"num_tokens": 58467767.0,
"step": 392
},
{
"epoch": 0.2663503896984073,
"grad_norm": 2.1658319104198,
"learning_rate": 1.4688346883468834e-06,
"loss": 0.1837,
"mean_token_accuracy": 0.9428680464625359,
"num_tokens": 58619811.0,
"step": 393
},
{
"epoch": 0.2670281260589631,
"grad_norm": 0.5878236554873534,
"learning_rate": 1.4674796747967479e-06,
"loss": 0.1675,
"mean_token_accuracy": 0.947679303586483,
"num_tokens": 58770262.0,
"step": 394
},
{
"epoch": 0.2677058624195188,
"grad_norm": 1.014211328981339,
"learning_rate": 1.4661246612466124e-06,
"loss": 0.1708,
"mean_token_accuracy": 0.9462408125400543,
"num_tokens": 58918504.0,
"step": 395
},
{
"epoch": 0.26838359878007456,
"grad_norm": 0.573535814041953,
"learning_rate": 1.464769647696477e-06,
"loss": 0.1829,
"mean_token_accuracy": 0.9431460350751877,
"num_tokens": 59069941.0,
"step": 396
},
{
"epoch": 0.2690613351406303,
"grad_norm": 0.5211265319964212,
"learning_rate": 1.4634146341463414e-06,
"loss": 0.1696,
"mean_token_accuracy": 0.9468951299786568,
"num_tokens": 59217661.0,
"step": 397
},
{
"epoch": 0.26973907150118603,
"grad_norm": 0.47731716171111105,
"learning_rate": 1.462059620596206e-06,
"loss": 0.1626,
"mean_token_accuracy": 0.9483945071697235,
"num_tokens": 59368209.0,
"step": 398
},
{
"epoch": 0.27041680786174177,
"grad_norm": 0.6369462410545568,
"learning_rate": 1.4607046070460705e-06,
"loss": 0.177,
"mean_token_accuracy": 0.9451368376612663,
"num_tokens": 59516528.0,
"step": 399
},
{
"epoch": 0.2710945442222975,
"grad_norm": 0.6314122894037728,
"learning_rate": 1.4593495934959348e-06,
"loss": 0.1743,
"mean_token_accuracy": 0.9454843327403069,
"num_tokens": 59670956.0,
"step": 400
},
{
"epoch": 0.27177228058285324,
"grad_norm": 0.6436872262414639,
"learning_rate": 1.4579945799457993e-06,
"loss": 0.1638,
"mean_token_accuracy": 0.9488751068711281,
"num_tokens": 59818251.0,
"step": 401
},
{
"epoch": 0.27245001694340903,
"grad_norm": 0.7202028539827626,
"learning_rate": 1.4566395663956638e-06,
"loss": 0.1667,
"mean_token_accuracy": 0.9471313506364822,
"num_tokens": 59963581.0,
"step": 402
},
{
"epoch": 0.27312775330396477,
"grad_norm": 0.6133438385683547,
"learning_rate": 1.4552845528455283e-06,
"loss": 0.1754,
"mean_token_accuracy": 0.9452999755740166,
"num_tokens": 60112835.0,
"step": 403
},
{
"epoch": 0.2738054896645205,
"grad_norm": 0.5430933799454143,
"learning_rate": 1.453929539295393e-06,
"loss": 0.1785,
"mean_token_accuracy": 0.9441541954874992,
"num_tokens": 60262081.0,
"step": 404
},
{
"epoch": 0.27448322602507624,
"grad_norm": 0.5688106499270535,
"learning_rate": 1.4525745257452574e-06,
"loss": 0.1769,
"mean_token_accuracy": 0.9446316137909889,
"num_tokens": 60406861.0,
"step": 405
},
{
"epoch": 0.275160962385632,
"grad_norm": 0.7473940935553454,
"learning_rate": 1.451219512195122e-06,
"loss": 0.1723,
"mean_token_accuracy": 0.9457308053970337,
"num_tokens": 60559494.0,
"step": 406
},
{
"epoch": 0.2758386987461877,
"grad_norm": 0.801132251747668,
"learning_rate": 1.4498644986449864e-06,
"loss": 0.1745,
"mean_token_accuracy": 0.9456846341490746,
"num_tokens": 60710003.0,
"step": 407
},
{
"epoch": 0.27651643510674345,
"grad_norm": 0.6208498730487398,
"learning_rate": 1.448509485094851e-06,
"loss": 0.1764,
"mean_token_accuracy": 0.9447103142738342,
"num_tokens": 60856286.0,
"step": 408
},
{
"epoch": 0.27719417146729924,
"grad_norm": 0.6774623538519904,
"learning_rate": 1.4471544715447155e-06,
"loss": 0.1684,
"mean_token_accuracy": 0.9472365826368332,
"num_tokens": 61006327.0,
"step": 409
},
{
"epoch": 0.277871907827855,
"grad_norm": 0.6357023280654596,
"learning_rate": 1.4457994579945798e-06,
"loss": 0.1748,
"mean_token_accuracy": 0.9449824169278145,
"num_tokens": 61155663.0,
"step": 410
},
{
"epoch": 0.2785496441884107,
"grad_norm": 0.586651942204708,
"learning_rate": 1.4444444444444443e-06,
"loss": 0.1809,
"mean_token_accuracy": 0.9449451044201851,
"num_tokens": 61306989.0,
"step": 411
},
{
"epoch": 0.27922738054896645,
"grad_norm": 0.5943615384568424,
"learning_rate": 1.4430894308943088e-06,
"loss": 0.1725,
"mean_token_accuracy": 0.945733904838562,
"num_tokens": 61454320.0,
"step": 412
},
{
"epoch": 0.2799051169095222,
"grad_norm": 0.535959689403948,
"learning_rate": 1.4417344173441735e-06,
"loss": 0.1764,
"mean_token_accuracy": 0.9447762593626976,
"num_tokens": 61604384.0,
"step": 413
},
{
"epoch": 0.2805828532700779,
"grad_norm": 1.1108469224632183,
"learning_rate": 1.4403794037940378e-06,
"loss": 0.1881,
"mean_token_accuracy": 0.9409819021821022,
"num_tokens": 61749668.0,
"step": 414
},
{
"epoch": 0.28126058963063366,
"grad_norm": 1.4220143979516637,
"learning_rate": 1.4390243902439024e-06,
"loss": 0.1621,
"mean_token_accuracy": 0.948139026761055,
"num_tokens": 61896561.0,
"step": 415
},
{
"epoch": 0.28193832599118945,
"grad_norm": 5.664714263912324,
"learning_rate": 1.4376693766937669e-06,
"loss": 0.167,
"mean_token_accuracy": 0.9476560726761818,
"num_tokens": 62046096.0,
"step": 416
},
{
"epoch": 0.2826160623517452,
"grad_norm": 0.5408574541103001,
"learning_rate": 1.4363143631436314e-06,
"loss": 0.1737,
"mean_token_accuracy": 0.9460098519921303,
"num_tokens": 62196352.0,
"step": 417
},
{
"epoch": 0.2832937987123009,
"grad_norm": 0.5171848037398799,
"learning_rate": 1.434959349593496e-06,
"loss": 0.1648,
"mean_token_accuracy": 0.947636604309082,
"num_tokens": 62342590.0,
"step": 418
},
{
"epoch": 0.28397153507285666,
"grad_norm": 0.7681427596162449,
"learning_rate": 1.4336043360433602e-06,
"loss": 0.1765,
"mean_token_accuracy": 0.9449510797858238,
"num_tokens": 62492065.0,
"step": 419
},
{
"epoch": 0.2846492714334124,
"grad_norm": 0.5576209119650561,
"learning_rate": 1.4322493224932248e-06,
"loss": 0.1714,
"mean_token_accuracy": 0.9463492035865784,
"num_tokens": 62642728.0,
"step": 420
},
{
"epoch": 0.28532700779396813,
"grad_norm": 1.554946021967147,
"learning_rate": 1.4308943089430895e-06,
"loss": 0.1698,
"mean_token_accuracy": 0.9462010189890862,
"num_tokens": 62791436.0,
"step": 421
},
{
"epoch": 0.28600474415452387,
"grad_norm": 0.5922041226434441,
"learning_rate": 1.429539295392954e-06,
"loss": 0.1694,
"mean_token_accuracy": 0.9464942514896393,
"num_tokens": 62936005.0,
"step": 422
},
{
"epoch": 0.2866824805150796,
"grad_norm": 0.5037684488003914,
"learning_rate": 1.4281842818428185e-06,
"loss": 0.1739,
"mean_token_accuracy": 0.9458468109369278,
"num_tokens": 63082667.0,
"step": 423
},
{
"epoch": 0.2873602168756354,
"grad_norm": 1.6124042876955846,
"learning_rate": 1.4268292682926828e-06,
"loss": 0.1661,
"mean_token_accuracy": 0.9480357691645622,
"num_tokens": 63236677.0,
"step": 424
},
{
"epoch": 0.28803795323619114,
"grad_norm": 0.5859070705046109,
"learning_rate": 1.4254742547425473e-06,
"loss": 0.1826,
"mean_token_accuracy": 0.9433834180235863,
"num_tokens": 63386648.0,
"step": 425
},
{
"epoch": 0.28871568959674687,
"grad_norm": 0.5369683321519448,
"learning_rate": 1.4241192411924119e-06,
"loss": 0.1664,
"mean_token_accuracy": 0.9470108896493912,
"num_tokens": 63535840.0,
"step": 426
},
{
"epoch": 0.2893934259573026,
"grad_norm": 0.564420602002199,
"learning_rate": 1.4227642276422764e-06,
"loss": 0.1705,
"mean_token_accuracy": 0.9462280347943306,
"num_tokens": 63682575.0,
"step": 427
},
{
"epoch": 0.29007116231785834,
"grad_norm": 0.5839667069470125,
"learning_rate": 1.4214092140921407e-06,
"loss": 0.1709,
"mean_token_accuracy": 0.9467422887682915,
"num_tokens": 63831495.0,
"step": 428
},
{
"epoch": 0.2907488986784141,
"grad_norm": 0.9956637241221521,
"learning_rate": 1.4200542005420052e-06,
"loss": 0.1682,
"mean_token_accuracy": 0.9462997168302536,
"num_tokens": 63979852.0,
"step": 429
},
{
"epoch": 0.2914266350389698,
"grad_norm": 0.5704586511896154,
"learning_rate": 1.41869918699187e-06,
"loss": 0.1746,
"mean_token_accuracy": 0.9454977512359619,
"num_tokens": 64129698.0,
"step": 430
},
{
"epoch": 0.2921043713995256,
"grad_norm": 1.871088990317711,
"learning_rate": 1.4173441734417345e-06,
"loss": 0.1588,
"mean_token_accuracy": 0.9499670192599297,
"num_tokens": 64279200.0,
"step": 431
},
{
"epoch": 0.29278210776008134,
"grad_norm": 1.5476889433417227,
"learning_rate": 1.415989159891599e-06,
"loss": 0.1723,
"mean_token_accuracy": 0.9459821656346321,
"num_tokens": 64425862.0,
"step": 432
},
{
"epoch": 0.2934598441206371,
"grad_norm": 0.5123528384746117,
"learning_rate": 1.4146341463414633e-06,
"loss": 0.1533,
"mean_token_accuracy": 0.9508016780018806,
"num_tokens": 64571479.0,
"step": 433
},
{
"epoch": 0.2941375804811928,
"grad_norm": 0.5428371445057838,
"learning_rate": 1.4132791327913278e-06,
"loss": 0.1758,
"mean_token_accuracy": 0.9448861479759216,
"num_tokens": 64724369.0,
"step": 434
},
{
"epoch": 0.29481531684174855,
"grad_norm": 0.5636544542076198,
"learning_rate": 1.4119241192411923e-06,
"loss": 0.1647,
"mean_token_accuracy": 0.9479606598615646,
"num_tokens": 64874118.0,
"step": 435
},
{
"epoch": 0.2954930532023043,
"grad_norm": 0.6426992034644753,
"learning_rate": 1.4105691056910569e-06,
"loss": 0.1637,
"mean_token_accuracy": 0.9485922604799271,
"num_tokens": 65022090.0,
"step": 436
},
{
"epoch": 0.29617078956286,
"grad_norm": 0.5686534096575024,
"learning_rate": 1.4092140921409212e-06,
"loss": 0.1674,
"mean_token_accuracy": 0.947353720664978,
"num_tokens": 65173700.0,
"step": 437
},
{
"epoch": 0.2968485259234158,
"grad_norm": 0.663170523983989,
"learning_rate": 1.4078590785907859e-06,
"loss": 0.1683,
"mean_token_accuracy": 0.9468541517853737,
"num_tokens": 65321565.0,
"step": 438
},
{
"epoch": 0.29752626228397155,
"grad_norm": 0.9033226102569267,
"learning_rate": 1.4065040650406504e-06,
"loss": 0.1769,
"mean_token_accuracy": 0.9448807463049889,
"num_tokens": 65471086.0,
"step": 439
},
{
"epoch": 0.2982039986445273,
"grad_norm": 0.6238168471167909,
"learning_rate": 1.405149051490515e-06,
"loss": 0.1799,
"mean_token_accuracy": 0.9433454647660255,
"num_tokens": 65622215.0,
"step": 440
},
{
"epoch": 0.298881735005083,
"grad_norm": 0.65657877294606,
"learning_rate": 1.4037940379403795e-06,
"loss": 0.161,
"mean_token_accuracy": 0.9487375542521477,
"num_tokens": 65773020.0,
"step": 441
},
{
"epoch": 0.29955947136563876,
"grad_norm": 0.5451737092122348,
"learning_rate": 1.4024390243902438e-06,
"loss": 0.1751,
"mean_token_accuracy": 0.9447740465402603,
"num_tokens": 65925286.0,
"step": 442
},
{
"epoch": 0.3002372077261945,
"grad_norm": 0.9603213545168497,
"learning_rate": 1.4010840108401083e-06,
"loss": 0.1771,
"mean_token_accuracy": 0.944790817797184,
"num_tokens": 66071253.0,
"step": 443
},
{
"epoch": 0.30091494408675024,
"grad_norm": 0.5051328972393605,
"learning_rate": 1.3997289972899728e-06,
"loss": 0.1694,
"mean_token_accuracy": 0.9472986534237862,
"num_tokens": 66221084.0,
"step": 444
},
{
"epoch": 0.30159268044730597,
"grad_norm": 0.5285524624142262,
"learning_rate": 1.3983739837398373e-06,
"loss": 0.172,
"mean_token_accuracy": 0.9456663802266121,
"num_tokens": 66374673.0,
"step": 445
},
{
"epoch": 0.30227041680786176,
"grad_norm": 0.5569235706497918,
"learning_rate": 1.3970189701897018e-06,
"loss": 0.1556,
"mean_token_accuracy": 0.9494876861572266,
"num_tokens": 66523000.0,
"step": 446
},
{
"epoch": 0.3029481531684175,
"grad_norm": 0.6230296052835673,
"learning_rate": 1.3956639566395664e-06,
"loss": 0.1759,
"mean_token_accuracy": 0.9450423941016197,
"num_tokens": 66671260.0,
"step": 447
},
{
"epoch": 0.30362588952897324,
"grad_norm": 0.7398089733547972,
"learning_rate": 1.3943089430894309e-06,
"loss": 0.1686,
"mean_token_accuracy": 0.9479307159781456,
"num_tokens": 66820184.0,
"step": 448
},
{
"epoch": 0.304303625889529,
"grad_norm": 0.5061983222632233,
"learning_rate": 1.3929539295392954e-06,
"loss": 0.1655,
"mean_token_accuracy": 0.9474298283457756,
"num_tokens": 66965488.0,
"step": 449
},
{
"epoch": 0.3049813622500847,
"grad_norm": 0.5750619870591818,
"learning_rate": 1.39159891598916e-06,
"loss": 0.1741,
"mean_token_accuracy": 0.9448145478963852,
"num_tokens": 67114457.0,
"step": 450
},
{
"epoch": 0.30565909861064045,
"grad_norm": 0.6895145752098988,
"learning_rate": 1.3902439024390242e-06,
"loss": 0.1717,
"mean_token_accuracy": 0.9465706199407578,
"num_tokens": 67267546.0,
"step": 451
},
{
"epoch": 0.3063368349711962,
"grad_norm": 0.5779351191860994,
"learning_rate": 1.3888888888888887e-06,
"loss": 0.1793,
"mean_token_accuracy": 0.9438166320323944,
"num_tokens": 67414865.0,
"step": 452
},
{
"epoch": 0.307014571331752,
"grad_norm": 0.5393943677688567,
"learning_rate": 1.3875338753387533e-06,
"loss": 0.1743,
"mean_token_accuracy": 0.9451542571187019,
"num_tokens": 67561255.0,
"step": 453
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.516057277111362,
"learning_rate": 1.3861788617886178e-06,
"loss": 0.1699,
"mean_token_accuracy": 0.9466835260391235,
"num_tokens": 67713911.0,
"step": 454
},
{
"epoch": 0.30837004405286345,
"grad_norm": 0.5651895866813262,
"learning_rate": 1.3848238482384825e-06,
"loss": 0.1837,
"mean_token_accuracy": 0.9428005740046501,
"num_tokens": 67862459.0,
"step": 455
},
{
"epoch": 0.3090477804134192,
"grad_norm": 1.8607347681619975,
"learning_rate": 1.3834688346883468e-06,
"loss": 0.1669,
"mean_token_accuracy": 0.9473821371793747,
"num_tokens": 68015157.0,
"step": 456
},
{
"epoch": 0.3097255167739749,
"grad_norm": 0.6787472041403066,
"learning_rate": 1.3821138211382113e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9469644278287888,
"num_tokens": 68163700.0,
"step": 457
},
{
"epoch": 0.31040325313453065,
"grad_norm": 0.5242628755897727,
"learning_rate": 1.3807588075880759e-06,
"loss": 0.1797,
"mean_token_accuracy": 0.9444685280323029,
"num_tokens": 68313252.0,
"step": 458
},
{
"epoch": 0.3110809894950864,
"grad_norm": 0.6132130405405823,
"learning_rate": 1.3794037940379404e-06,
"loss": 0.1659,
"mean_token_accuracy": 0.9476732388138771,
"num_tokens": 68465402.0,
"step": 459
},
{
"epoch": 0.3117587258556422,
"grad_norm": 0.6238582052168113,
"learning_rate": 1.3780487804878047e-06,
"loss": 0.1659,
"mean_token_accuracy": 0.9477816596627235,
"num_tokens": 68617127.0,
"step": 460
},
{
"epoch": 0.3124364622161979,
"grad_norm": 0.5029242618624353,
"learning_rate": 1.3766937669376692e-06,
"loss": 0.1635,
"mean_token_accuracy": 0.9483131021261215,
"num_tokens": 68767063.0,
"step": 461
},
{
"epoch": 0.31311419857675366,
"grad_norm": 0.5515295054346878,
"learning_rate": 1.3753387533875337e-06,
"loss": 0.1671,
"mean_token_accuracy": 0.9469945877790451,
"num_tokens": 68917560.0,
"step": 462
},
{
"epoch": 0.3137919349373094,
"grad_norm": 0.6068528780075827,
"learning_rate": 1.3739837398373982e-06,
"loss": 0.1659,
"mean_token_accuracy": 0.9475928917527199,
"num_tokens": 69068425.0,
"step": 463
},
{
"epoch": 0.3144696712978651,
"grad_norm": 0.5545208173289021,
"learning_rate": 1.372628726287263e-06,
"loss": 0.1671,
"mean_token_accuracy": 0.9473569318652153,
"num_tokens": 69217619.0,
"step": 464
},
{
"epoch": 0.31514740765842086,
"grad_norm": 0.4733218472766618,
"learning_rate": 1.3712737127371273e-06,
"loss": 0.1634,
"mean_token_accuracy": 0.9482020065188408,
"num_tokens": 69368394.0,
"step": 465
},
{
"epoch": 0.3158251440189766,
"grad_norm": 0.4955677510087951,
"learning_rate": 1.3699186991869918e-06,
"loss": 0.1704,
"mean_token_accuracy": 0.9457170516252518,
"num_tokens": 69514936.0,
"step": 466
},
{
"epoch": 0.31650288037953234,
"grad_norm": 0.7772199753715142,
"learning_rate": 1.3685636856368563e-06,
"loss": 0.1641,
"mean_token_accuracy": 0.947662316262722,
"num_tokens": 69668437.0,
"step": 467
},
{
"epoch": 0.31718061674008813,
"grad_norm": 0.5756478947859188,
"learning_rate": 1.3672086720867208e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9456316977739334,
"num_tokens": 69818454.0,
"step": 468
},
{
"epoch": 0.31785835310064386,
"grad_norm": 0.8871875668287942,
"learning_rate": 1.3658536585365854e-06,
"loss": 0.1768,
"mean_token_accuracy": 0.9445038959383965,
"num_tokens": 69970526.0,
"step": 469
},
{
"epoch": 0.3185360894611996,
"grad_norm": 0.547639293799732,
"learning_rate": 1.3644986449864497e-06,
"loss": 0.1772,
"mean_token_accuracy": 0.9447048604488373,
"num_tokens": 70118479.0,
"step": 470
},
{
"epoch": 0.31921382582175534,
"grad_norm": 0.5942792200494496,
"learning_rate": 1.3631436314363142e-06,
"loss": 0.165,
"mean_token_accuracy": 0.9484960064291954,
"num_tokens": 70264592.0,
"step": 471
},
{
"epoch": 0.3198915621823111,
"grad_norm": 0.8867268905803408,
"learning_rate": 1.361788617886179e-06,
"loss": 0.1804,
"mean_token_accuracy": 0.9441796317696571,
"num_tokens": 70413918.0,
"step": 472
},
{
"epoch": 0.3205692985428668,
"grad_norm": 0.5964766492892464,
"learning_rate": 1.3604336043360434e-06,
"loss": 0.1598,
"mean_token_accuracy": 0.9500296339392662,
"num_tokens": 70557619.0,
"step": 473
},
{
"epoch": 0.32124703490342255,
"grad_norm": 0.496311307208984,
"learning_rate": 1.3590785907859078e-06,
"loss": 0.1725,
"mean_token_accuracy": 0.9461822360754013,
"num_tokens": 70712848.0,
"step": 474
},
{
"epoch": 0.32192477126397834,
"grad_norm": 1.0881675555317671,
"learning_rate": 1.3577235772357723e-06,
"loss": 0.1696,
"mean_token_accuracy": 0.9468296840786934,
"num_tokens": 70860911.0,
"step": 475
},
{
"epoch": 0.3226025076245341,
"grad_norm": 0.7588836627706635,
"learning_rate": 1.3563685636856368e-06,
"loss": 0.1767,
"mean_token_accuracy": 0.9439148157835007,
"num_tokens": 71007181.0,
"step": 476
},
{
"epoch": 0.3232802439850898,
"grad_norm": 0.7597258614999529,
"learning_rate": 1.3550135501355013e-06,
"loss": 0.1676,
"mean_token_accuracy": 0.94772619754076,
"num_tokens": 71156741.0,
"step": 477
},
{
"epoch": 0.32395798034564555,
"grad_norm": 0.5079000156085575,
"learning_rate": 1.3536585365853658e-06,
"loss": 0.186,
"mean_token_accuracy": 0.9424601569771767,
"num_tokens": 71305246.0,
"step": 478
},
{
"epoch": 0.3246357167062013,
"grad_norm": 0.7809688395216372,
"learning_rate": 1.3523035230352301e-06,
"loss": 0.1532,
"mean_token_accuracy": 0.9514161571860313,
"num_tokens": 71456332.0,
"step": 479
},
{
"epoch": 0.325313453066757,
"grad_norm": 1.8673061353136242,
"learning_rate": 1.3509485094850947e-06,
"loss": 0.1882,
"mean_token_accuracy": 0.9417855143547058,
"num_tokens": 71606935.0,
"step": 480
},
{
"epoch": 0.32599118942731276,
"grad_norm": 0.48000691431047476,
"learning_rate": 1.3495934959349594e-06,
"loss": 0.159,
"mean_token_accuracy": 0.9492316693067551,
"num_tokens": 71757554.0,
"step": 481
},
{
"epoch": 0.3266689257878685,
"grad_norm": 0.5014062000611049,
"learning_rate": 1.348238482384824e-06,
"loss": 0.1785,
"mean_token_accuracy": 0.943628765642643,
"num_tokens": 71903853.0,
"step": 482
},
{
"epoch": 0.3273466621484243,
"grad_norm": 0.6987254900902669,
"learning_rate": 1.3468834688346884e-06,
"loss": 0.1709,
"mean_token_accuracy": 0.9468587040901184,
"num_tokens": 72054020.0,
"step": 483
},
{
"epoch": 0.32802439850898,
"grad_norm": 0.5985256675867555,
"learning_rate": 1.3455284552845527e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9468069896101952,
"num_tokens": 72206495.0,
"step": 484
},
{
"epoch": 0.32870213486953576,
"grad_norm": 1.3290797142590758,
"learning_rate": 1.3441734417344173e-06,
"loss": 0.1597,
"mean_token_accuracy": 0.9495629146695137,
"num_tokens": 72353685.0,
"step": 485
},
{
"epoch": 0.3293798712300915,
"grad_norm": 0.5119654251077203,
"learning_rate": 1.3428184281842818e-06,
"loss": 0.1724,
"mean_token_accuracy": 0.9460138157010078,
"num_tokens": 72504327.0,
"step": 486
},
{
"epoch": 0.33005760759064723,
"grad_norm": 0.4880586016116781,
"learning_rate": 1.3414634146341463e-06,
"loss": 0.1726,
"mean_token_accuracy": 0.945694237947464,
"num_tokens": 72654840.0,
"step": 487
},
{
"epoch": 0.33073534395120296,
"grad_norm": 0.5043166922170219,
"learning_rate": 1.3401084010840106e-06,
"loss": 0.1627,
"mean_token_accuracy": 0.9485412240028381,
"num_tokens": 72799372.0,
"step": 488
},
{
"epoch": 0.3314130803117587,
"grad_norm": 0.535117165226787,
"learning_rate": 1.3387533875338753e-06,
"loss": 0.1695,
"mean_token_accuracy": 0.9474276155233383,
"num_tokens": 72948960.0,
"step": 489
},
{
"epoch": 0.3320908166723145,
"grad_norm": 0.519763234225128,
"learning_rate": 1.3373983739837399e-06,
"loss": 0.1688,
"mean_token_accuracy": 0.9468537047505379,
"num_tokens": 73100760.0,
"step": 490
},
{
"epoch": 0.33276855303287023,
"grad_norm": 0.5711715197411761,
"learning_rate": 1.3360433604336044e-06,
"loss": 0.183,
"mean_token_accuracy": 0.9430373981595039,
"num_tokens": 73246508.0,
"step": 491
},
{
"epoch": 0.33344628939342597,
"grad_norm": 1.1701182936544605,
"learning_rate": 1.334688346883469e-06,
"loss": 0.1705,
"mean_token_accuracy": 0.947103701531887,
"num_tokens": 73397884.0,
"step": 492
},
{
"epoch": 0.3341240257539817,
"grad_norm": 0.50623069387712,
"learning_rate": 1.3333333333333332e-06,
"loss": 0.1712,
"mean_token_accuracy": 0.9459755271673203,
"num_tokens": 73544836.0,
"step": 493
},
{
"epoch": 0.33480176211453744,
"grad_norm": 0.5201173073498514,
"learning_rate": 1.3319783197831977e-06,
"loss": 0.1698,
"mean_token_accuracy": 0.9458015710115433,
"num_tokens": 73694370.0,
"step": 494
},
{
"epoch": 0.3354794984750932,
"grad_norm": 0.5538352573416628,
"learning_rate": 1.3306233062330622e-06,
"loss": 0.1604,
"mean_token_accuracy": 0.9487332031130791,
"num_tokens": 73844473.0,
"step": 495
},
{
"epoch": 0.3361572348356489,
"grad_norm": 0.6168876470878009,
"learning_rate": 1.3292682926829268e-06,
"loss": 0.1568,
"mean_token_accuracy": 0.9499210342764854,
"num_tokens": 73996402.0,
"step": 496
},
{
"epoch": 0.3368349711962047,
"grad_norm": 0.4550808255041631,
"learning_rate": 1.327913279132791e-06,
"loss": 0.1643,
"mean_token_accuracy": 0.9480182603001595,
"num_tokens": 74149961.0,
"step": 497
},
{
"epoch": 0.33751270755676044,
"grad_norm": 0.49242815426502456,
"learning_rate": 1.3265582655826558e-06,
"loss": 0.1742,
"mean_token_accuracy": 0.9449413493275642,
"num_tokens": 74300652.0,
"step": 498
},
{
"epoch": 0.3381904439173162,
"grad_norm": 0.6538467978588217,
"learning_rate": 1.3252032520325203e-06,
"loss": 0.1656,
"mean_token_accuracy": 0.9482651427388191,
"num_tokens": 74452182.0,
"step": 499
},
{
"epoch": 0.3388681802778719,
"grad_norm": 0.5554154903588379,
"learning_rate": 1.3238482384823848e-06,
"loss": 0.1788,
"mean_token_accuracy": 0.9439527839422226,
"num_tokens": 74602966.0,
"step": 500
},
{
"epoch": 0.33954591663842765,
"grad_norm": 0.45834856596079654,
"learning_rate": 1.3224932249322494e-06,
"loss": 0.1661,
"mean_token_accuracy": 0.9474470615386963,
"num_tokens": 74752679.0,
"step": 501
},
{
"epoch": 0.3402236529989834,
"grad_norm": 0.482857087748798,
"learning_rate": 1.3211382113821137e-06,
"loss": 0.1637,
"mean_token_accuracy": 0.9479742795228958,
"num_tokens": 74900663.0,
"step": 502
},
{
"epoch": 0.3409013893595391,
"grad_norm": 0.4688544076647359,
"learning_rate": 1.3197831978319782e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.948046438395977,
"num_tokens": 75053468.0,
"step": 503
},
{
"epoch": 0.34157912572009486,
"grad_norm": 0.8131967901070724,
"learning_rate": 1.3184281842818427e-06,
"loss": 0.1675,
"mean_token_accuracy": 0.9472577646374702,
"num_tokens": 75206169.0,
"step": 504
},
{
"epoch": 0.34225686208065065,
"grad_norm": 0.45932339378365955,
"learning_rate": 1.3170731707317072e-06,
"loss": 0.1706,
"mean_token_accuracy": 0.9472535997629166,
"num_tokens": 75353645.0,
"step": 505
},
{
"epoch": 0.3429345984412064,
"grad_norm": 0.49131482848831964,
"learning_rate": 1.315718157181572e-06,
"loss": 0.1811,
"mean_token_accuracy": 0.9429889023303986,
"num_tokens": 75506144.0,
"step": 506
},
{
"epoch": 0.3436123348017621,
"grad_norm": 0.7735749197588243,
"learning_rate": 1.3143631436314363e-06,
"loss": 0.1637,
"mean_token_accuracy": 0.948386162519455,
"num_tokens": 75653020.0,
"step": 507
},
{
"epoch": 0.34429007116231786,
"grad_norm": 0.5290018312058388,
"learning_rate": 1.3130081300813008e-06,
"loss": 0.179,
"mean_token_accuracy": 0.9440815150737762,
"num_tokens": 75806317.0,
"step": 508
},
{
"epoch": 0.3449678075228736,
"grad_norm": 0.8792037383181599,
"learning_rate": 1.3116531165311653e-06,
"loss": 0.1666,
"mean_token_accuracy": 0.9479077309370041,
"num_tokens": 75954710.0,
"step": 509
},
{
"epoch": 0.34564554388342933,
"grad_norm": 1.2379023033712115,
"learning_rate": 1.3102981029810298e-06,
"loss": 0.163,
"mean_token_accuracy": 0.948448158800602,
"num_tokens": 76102081.0,
"step": 510
},
{
"epoch": 0.34632328024398507,
"grad_norm": 0.7047671011914056,
"learning_rate": 1.3089430894308941e-06,
"loss": 0.1706,
"mean_token_accuracy": 0.9455461129546165,
"num_tokens": 76253124.0,
"step": 511
},
{
"epoch": 0.34700101660454086,
"grad_norm": 0.5215665430937292,
"learning_rate": 1.3075880758807587e-06,
"loss": 0.1723,
"mean_token_accuracy": 0.9454864710569382,
"num_tokens": 76401400.0,
"step": 512
},
{
"epoch": 0.3476787529650966,
"grad_norm": 0.6254105625236192,
"learning_rate": 1.3062330623306232e-06,
"loss": 0.1669,
"mean_token_accuracy": 0.9473327398300171,
"num_tokens": 76557019.0,
"step": 513
},
{
"epoch": 0.34835648932565233,
"grad_norm": 0.5188097653856689,
"learning_rate": 1.3048780487804877e-06,
"loss": 0.1769,
"mean_token_accuracy": 0.9443547651171684,
"num_tokens": 76706741.0,
"step": 514
},
{
"epoch": 0.34903422568620807,
"grad_norm": 0.4675221964626805,
"learning_rate": 1.3035230352303524e-06,
"loss": 0.1603,
"mean_token_accuracy": 0.9501334503293037,
"num_tokens": 76854200.0,
"step": 515
},
{
"epoch": 0.3497119620467638,
"grad_norm": 0.49070216913278825,
"learning_rate": 1.3021680216802167e-06,
"loss": 0.1735,
"mean_token_accuracy": 0.94585020840168,
"num_tokens": 77003380.0,
"step": 516
},
{
"epoch": 0.35038969840731954,
"grad_norm": 0.6566218821573573,
"learning_rate": 1.3008130081300813e-06,
"loss": 0.1647,
"mean_token_accuracy": 0.9479044005274773,
"num_tokens": 77153163.0,
"step": 517
},
{
"epoch": 0.3510674347678753,
"grad_norm": 0.7789379307919548,
"learning_rate": 1.2994579945799458e-06,
"loss": 0.1634,
"mean_token_accuracy": 0.9485658556222916,
"num_tokens": 77303522.0,
"step": 518
},
{
"epoch": 0.35174517112843107,
"grad_norm": 1.6255929271886427,
"learning_rate": 1.2981029810298103e-06,
"loss": 0.1752,
"mean_token_accuracy": 0.9446841180324554,
"num_tokens": 77454740.0,
"step": 519
},
{
"epoch": 0.3524229074889868,
"grad_norm": 0.7616210624348961,
"learning_rate": 1.2967479674796746e-06,
"loss": 0.1652,
"mean_token_accuracy": 0.9477170780301094,
"num_tokens": 77601927.0,
"step": 520
},
{
"epoch": 0.35310064384954254,
"grad_norm": 0.5266794391641887,
"learning_rate": 1.2953929539295391e-06,
"loss": 0.1741,
"mean_token_accuracy": 0.944876454770565,
"num_tokens": 77751294.0,
"step": 521
},
{
"epoch": 0.3537783802100983,
"grad_norm": 0.5950589959591587,
"learning_rate": 1.2940379403794036e-06,
"loss": 0.1579,
"mean_token_accuracy": 0.9505382627248764,
"num_tokens": 77902036.0,
"step": 522
},
{
"epoch": 0.354456116570654,
"grad_norm": 0.5787471802269085,
"learning_rate": 1.2926829268292684e-06,
"loss": 0.1776,
"mean_token_accuracy": 0.9443177506327629,
"num_tokens": 78050230.0,
"step": 523
},
{
"epoch": 0.35513385293120975,
"grad_norm": 0.5137344606578952,
"learning_rate": 1.2913279132791329e-06,
"loss": 0.1589,
"mean_token_accuracy": 0.9494662508368492,
"num_tokens": 78193724.0,
"step": 524
},
{
"epoch": 0.3558115892917655,
"grad_norm": 0.5758157648566467,
"learning_rate": 1.2899728997289972e-06,
"loss": 0.1673,
"mean_token_accuracy": 0.9469494745135307,
"num_tokens": 78344604.0,
"step": 525
},
{
"epoch": 0.3564893256523212,
"grad_norm": 0.4823293650140685,
"learning_rate": 1.2886178861788617e-06,
"loss": 0.1733,
"mean_token_accuracy": 0.9464617371559143,
"num_tokens": 78493386.0,
"step": 526
},
{
"epoch": 0.357167062012877,
"grad_norm": 1.3093734334897686,
"learning_rate": 1.2872628726287262e-06,
"loss": 0.1736,
"mean_token_accuracy": 0.9449149370193481,
"num_tokens": 78643722.0,
"step": 527
},
{
"epoch": 0.35784479837343275,
"grad_norm": 0.9479862338053053,
"learning_rate": 1.2859078590785908e-06,
"loss": 0.1617,
"mean_token_accuracy": 0.949364185333252,
"num_tokens": 78794768.0,
"step": 528
},
{
"epoch": 0.3585225347339885,
"grad_norm": 0.4910511709067298,
"learning_rate": 1.2845528455284553e-06,
"loss": 0.1739,
"mean_token_accuracy": 0.9452231824398041,
"num_tokens": 78945067.0,
"step": 529
},
{
"epoch": 0.3592002710945442,
"grad_norm": 0.6780819218651569,
"learning_rate": 1.2831978319783196e-06,
"loss": 0.1764,
"mean_token_accuracy": 0.9451046735048294,
"num_tokens": 79093659.0,
"step": 530
},
{
"epoch": 0.35987800745509996,
"grad_norm": 0.6028299913157514,
"learning_rate": 1.281842818428184e-06,
"loss": 0.1692,
"mean_token_accuracy": 0.9468301609158516,
"num_tokens": 79245168.0,
"step": 531
},
{
"epoch": 0.3605557438156557,
"grad_norm": 0.47202114885701624,
"learning_rate": 1.2804878048780488e-06,
"loss": 0.1703,
"mean_token_accuracy": 0.946605496108532,
"num_tokens": 79392112.0,
"step": 532
},
{
"epoch": 0.36123348017621143,
"grad_norm": 0.44640985325082555,
"learning_rate": 1.2791327913279134e-06,
"loss": 0.1553,
"mean_token_accuracy": 0.9503597840666771,
"num_tokens": 79540450.0,
"step": 533
},
{
"epoch": 0.3619112165367672,
"grad_norm": 0.4932596703182334,
"learning_rate": 1.2777777777777777e-06,
"loss": 0.1572,
"mean_token_accuracy": 0.9503260552883148,
"num_tokens": 79688916.0,
"step": 534
},
{
"epoch": 0.36258895289732296,
"grad_norm": 0.5831851813680463,
"learning_rate": 1.2764227642276422e-06,
"loss": 0.1642,
"mean_token_accuracy": 0.9486764073371887,
"num_tokens": 79838845.0,
"step": 535
},
{
"epoch": 0.3632666892578787,
"grad_norm": 0.61181600071,
"learning_rate": 1.2750677506775067e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.9481100291013718,
"num_tokens": 79988221.0,
"step": 536
},
{
"epoch": 0.36394442561843443,
"grad_norm": 0.5506545480633555,
"learning_rate": 1.2737127371273712e-06,
"loss": 0.1648,
"mean_token_accuracy": 0.9479293003678322,
"num_tokens": 80137590.0,
"step": 537
},
{
"epoch": 0.36462216197899017,
"grad_norm": 0.43887901796596895,
"learning_rate": 1.2723577235772357e-06,
"loss": 0.1639,
"mean_token_accuracy": 0.9483424499630928,
"num_tokens": 80287101.0,
"step": 538
},
{
"epoch": 0.3652998983395459,
"grad_norm": 0.4955011682450888,
"learning_rate": 1.2710027100271e-06,
"loss": 0.1628,
"mean_token_accuracy": 0.9483226388692856,
"num_tokens": 80436322.0,
"step": 539
},
{
"epoch": 0.36597763470010164,
"grad_norm": 0.5239537864203889,
"learning_rate": 1.2696476964769648e-06,
"loss": 0.1715,
"mean_token_accuracy": 0.9458038881421089,
"num_tokens": 80581684.0,
"step": 540
},
{
"epoch": 0.36665537106065743,
"grad_norm": 0.5346227062984173,
"learning_rate": 1.2682926829268293e-06,
"loss": 0.1615,
"mean_token_accuracy": 0.948853574693203,
"num_tokens": 80734232.0,
"step": 541
},
{
"epoch": 0.36733310742121317,
"grad_norm": 0.43618818869719533,
"learning_rate": 1.2669376693766938e-06,
"loss": 0.1687,
"mean_token_accuracy": 0.9466055184602737,
"num_tokens": 80885624.0,
"step": 542
},
{
"epoch": 0.3680108437817689,
"grad_norm": 0.5126139849918319,
"learning_rate": 1.2655826558265581e-06,
"loss": 0.1725,
"mean_token_accuracy": 0.9452295154333115,
"num_tokens": 81032518.0,
"step": 543
},
{
"epoch": 0.36868858014232464,
"grad_norm": 0.5156027227638869,
"learning_rate": 1.2642276422764226e-06,
"loss": 0.1691,
"mean_token_accuracy": 0.9462386891245842,
"num_tokens": 81183730.0,
"step": 544
},
{
"epoch": 0.3693663165028804,
"grad_norm": 0.4953647785801021,
"learning_rate": 1.2628726287262872e-06,
"loss": 0.1779,
"mean_token_accuracy": 0.9446133226156235,
"num_tokens": 81332553.0,
"step": 545
},
{
"epoch": 0.3700440528634361,
"grad_norm": 0.6205616037290729,
"learning_rate": 1.2615176151761517e-06,
"loss": 0.1658,
"mean_token_accuracy": 0.9468551427125931,
"num_tokens": 81481317.0,
"step": 546
},
{
"epoch": 0.37072178922399185,
"grad_norm": 0.5144257052051068,
"learning_rate": 1.2601626016260162e-06,
"loss": 0.1709,
"mean_token_accuracy": 0.9458277970552444,
"num_tokens": 81629307.0,
"step": 547
},
{
"epoch": 0.3713995255845476,
"grad_norm": 0.49628657698403855,
"learning_rate": 1.2588075880758805e-06,
"loss": 0.1731,
"mean_token_accuracy": 0.9464028999209404,
"num_tokens": 81780418.0,
"step": 548
},
{
"epoch": 0.3720772619451034,
"grad_norm": 0.5360410530877545,
"learning_rate": 1.2574525745257452e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.9468945264816284,
"num_tokens": 81931133.0,
"step": 549
},
{
"epoch": 0.3727549983056591,
"grad_norm": 0.5154118695419517,
"learning_rate": 1.2560975609756098e-06,
"loss": 0.1802,
"mean_token_accuracy": 0.9430196806788445,
"num_tokens": 82077166.0,
"step": 550
},
{
"epoch": 0.37343273466621485,
"grad_norm": 0.5054293298675031,
"learning_rate": 1.2547425474254743e-06,
"loss": 0.1737,
"mean_token_accuracy": 0.9454409256577492,
"num_tokens": 82227375.0,
"step": 551
},
{
"epoch": 0.3741104710267706,
"grad_norm": 0.8108065600612613,
"learning_rate": 1.2533875338753388e-06,
"loss": 0.1638,
"mean_token_accuracy": 0.9477621614933014,
"num_tokens": 82372453.0,
"step": 552
},
{
"epoch": 0.3747882073873263,
"grad_norm": 0.4999817635617397,
"learning_rate": 1.2520325203252031e-06,
"loss": 0.1708,
"mean_token_accuracy": 0.9459622874855995,
"num_tokens": 82518157.0,
"step": 553
},
{
"epoch": 0.37546594374788206,
"grad_norm": 0.6126446568489133,
"learning_rate": 1.2506775067750676e-06,
"loss": 0.1717,
"mean_token_accuracy": 0.9467468857765198,
"num_tokens": 82669176.0,
"step": 554
},
{
"epoch": 0.3761436801084378,
"grad_norm": 0.4092562831332532,
"learning_rate": 1.2493224932249322e-06,
"loss": 0.1663,
"mean_token_accuracy": 0.9466510340571404,
"num_tokens": 82815386.0,
"step": 555
},
{
"epoch": 0.3768214164689936,
"grad_norm": 0.5544136499451818,
"learning_rate": 1.2479674796747967e-06,
"loss": 0.1436,
"mean_token_accuracy": 0.9543255716562271,
"num_tokens": 82963781.0,
"step": 556
},
{
"epoch": 0.3774991528295493,
"grad_norm": 0.6577759537029808,
"learning_rate": 1.2466124661246612e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9457321241497993,
"num_tokens": 83114702.0,
"step": 557
},
{
"epoch": 0.37817688919010506,
"grad_norm": 0.6669400570725771,
"learning_rate": 1.2452574525745257e-06,
"loss": 0.1628,
"mean_token_accuracy": 0.9488318488001823,
"num_tokens": 83262766.0,
"step": 558
},
{
"epoch": 0.3788546255506608,
"grad_norm": 2.110564198277507,
"learning_rate": 1.2439024390243902e-06,
"loss": 0.1611,
"mean_token_accuracy": 0.9489534422755241,
"num_tokens": 83414310.0,
"step": 559
},
{
"epoch": 0.37953236191121653,
"grad_norm": 0.5862482748277763,
"learning_rate": 1.2425474254742547e-06,
"loss": 0.1678,
"mean_token_accuracy": 0.9466524496674538,
"num_tokens": 83565463.0,
"step": 560
},
{
"epoch": 0.38021009827177227,
"grad_norm": 0.44976063765840474,
"learning_rate": 1.2411924119241193e-06,
"loss": 0.1637,
"mean_token_accuracy": 0.948176383972168,
"num_tokens": 83716123.0,
"step": 561
},
{
"epoch": 0.380887834632328,
"grad_norm": 0.4871383532266955,
"learning_rate": 1.2398373983739836e-06,
"loss": 0.1651,
"mean_token_accuracy": 0.9474801197648048,
"num_tokens": 83867770.0,
"step": 562
},
{
"epoch": 0.3815655709928838,
"grad_norm": 0.4875394052722114,
"learning_rate": 1.238482384823848e-06,
"loss": 0.1648,
"mean_token_accuracy": 0.9471545964479446,
"num_tokens": 84013176.0,
"step": 563
},
{
"epoch": 0.38224330735343953,
"grad_norm": 0.4812873126942103,
"learning_rate": 1.2371273712737126e-06,
"loss": 0.1784,
"mean_token_accuracy": 0.9448759853839874,
"num_tokens": 84159306.0,
"step": 564
},
{
"epoch": 0.38292104371399527,
"grad_norm": 0.48087624001771767,
"learning_rate": 1.2357723577235773e-06,
"loss": 0.1672,
"mean_token_accuracy": 0.9471018239855766,
"num_tokens": 84306663.0,
"step": 565
},
{
"epoch": 0.383598780074551,
"grad_norm": 0.49836516202253933,
"learning_rate": 1.2344173441734419e-06,
"loss": 0.1711,
"mean_token_accuracy": 0.9457863420248032,
"num_tokens": 84457537.0,
"step": 566
},
{
"epoch": 0.38427651643510674,
"grad_norm": 0.8028630495125755,
"learning_rate": 1.2330623306233062e-06,
"loss": 0.1715,
"mean_token_accuracy": 0.945386491715908,
"num_tokens": 84604736.0,
"step": 567
},
{
"epoch": 0.3849542527956625,
"grad_norm": 0.517032288094208,
"learning_rate": 1.2317073170731707e-06,
"loss": 0.1705,
"mean_token_accuracy": 0.9462244659662247,
"num_tokens": 84750138.0,
"step": 568
},
{
"epoch": 0.3856319891562182,
"grad_norm": 0.524515202262513,
"learning_rate": 1.2303523035230352e-06,
"loss": 0.1734,
"mean_token_accuracy": 0.9456737115979195,
"num_tokens": 84901947.0,
"step": 569
},
{
"epoch": 0.38630972551677395,
"grad_norm": 0.5016321881100445,
"learning_rate": 1.2289972899728997e-06,
"loss": 0.1646,
"mean_token_accuracy": 0.9482433423399925,
"num_tokens": 85050141.0,
"step": 570
},
{
"epoch": 0.38698746187732974,
"grad_norm": 0.701544567243051,
"learning_rate": 1.227642276422764e-06,
"loss": 0.168,
"mean_token_accuracy": 0.9463346377015114,
"num_tokens": 85197933.0,
"step": 571
},
{
"epoch": 0.3876651982378855,
"grad_norm": 0.9260298597170125,
"learning_rate": 1.2262872628726286e-06,
"loss": 0.1746,
"mean_token_accuracy": 0.9452116563916206,
"num_tokens": 85344354.0,
"step": 572
},
{
"epoch": 0.3883429345984412,
"grad_norm": 0.4665785964639984,
"learning_rate": 1.224932249322493e-06,
"loss": 0.1667,
"mean_token_accuracy": 0.946522019803524,
"num_tokens": 85493157.0,
"step": 573
},
{
"epoch": 0.38902067095899695,
"grad_norm": 0.47890919766848145,
"learning_rate": 1.2235772357723578e-06,
"loss": 0.1605,
"mean_token_accuracy": 0.9488857388496399,
"num_tokens": 85640984.0,
"step": 574
},
{
"epoch": 0.3896984073195527,
"grad_norm": 0.5237978003925238,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.175,
"mean_token_accuracy": 0.9449859485030174,
"num_tokens": 85787937.0,
"step": 575
},
{
"epoch": 0.3903761436801084,
"grad_norm": 0.5044327985275826,
"learning_rate": 1.2208672086720866e-06,
"loss": 0.1643,
"mean_token_accuracy": 0.947325699031353,
"num_tokens": 85934883.0,
"step": 576
},
{
"epoch": 0.39105388004066416,
"grad_norm": 1.212018009924748,
"learning_rate": 1.2195121951219512e-06,
"loss": 0.1674,
"mean_token_accuracy": 0.9467579498887062,
"num_tokens": 86079914.0,
"step": 577
},
{
"epoch": 0.39173161640121995,
"grad_norm": 0.5117113977489923,
"learning_rate": 1.2181571815718157e-06,
"loss": 0.1635,
"mean_token_accuracy": 0.9482778683304787,
"num_tokens": 86228740.0,
"step": 578
},
{
"epoch": 0.3924093527617757,
"grad_norm": 0.6494915285028472,
"learning_rate": 1.2168021680216802e-06,
"loss": 0.157,
"mean_token_accuracy": 0.9500811025500298,
"num_tokens": 86377532.0,
"step": 579
},
{
"epoch": 0.3930870891223314,
"grad_norm": 0.5973179488736141,
"learning_rate": 1.2154471544715445e-06,
"loss": 0.1575,
"mean_token_accuracy": 0.9502118080854416,
"num_tokens": 86526682.0,
"step": 580
},
{
"epoch": 0.39376482548288716,
"grad_norm": 0.5808424568038079,
"learning_rate": 1.214092140921409e-06,
"loss": 0.1539,
"mean_token_accuracy": 0.9514239802956581,
"num_tokens": 86676044.0,
"step": 581
},
{
"epoch": 0.3944425618434429,
"grad_norm": 0.4953674203579119,
"learning_rate": 1.2127371273712735e-06,
"loss": 0.1684,
"mean_token_accuracy": 0.9472720921039581,
"num_tokens": 86825381.0,
"step": 582
},
{
"epoch": 0.39512029820399863,
"grad_norm": 0.4322370643094621,
"learning_rate": 1.2113821138211383e-06,
"loss": 0.1589,
"mean_token_accuracy": 0.9499673321843147,
"num_tokens": 86970825.0,
"step": 583
},
{
"epoch": 0.39579803456455437,
"grad_norm": 0.5358177146908822,
"learning_rate": 1.2100271002710028e-06,
"loss": 0.1671,
"mean_token_accuracy": 0.9472456872463226,
"num_tokens": 87118763.0,
"step": 584
},
{
"epoch": 0.3964757709251101,
"grad_norm": 0.5193297152368992,
"learning_rate": 1.208672086720867e-06,
"loss": 0.1752,
"mean_token_accuracy": 0.9457727372646332,
"num_tokens": 87267978.0,
"step": 585
},
{
"epoch": 0.3971535072856659,
"grad_norm": 0.5008798591237139,
"learning_rate": 1.2073170731707316e-06,
"loss": 0.1539,
"mean_token_accuracy": 0.950521744787693,
"num_tokens": 87413903.0,
"step": 586
},
{
"epoch": 0.39783124364622163,
"grad_norm": 0.5845390262447139,
"learning_rate": 1.2059620596205961e-06,
"loss": 0.1729,
"mean_token_accuracy": 0.9458159878849983,
"num_tokens": 87567058.0,
"step": 587
},
{
"epoch": 0.39850898000677737,
"grad_norm": 0.44423455283346175,
"learning_rate": 1.2046070460704607e-06,
"loss": 0.1713,
"mean_token_accuracy": 0.945778027176857,
"num_tokens": 87719494.0,
"step": 588
},
{
"epoch": 0.3991867163673331,
"grad_norm": 0.47221544465230036,
"learning_rate": 1.2032520325203252e-06,
"loss": 0.1695,
"mean_token_accuracy": 0.9476176202297211,
"num_tokens": 87870107.0,
"step": 589
},
{
"epoch": 0.39986445272788884,
"grad_norm": 0.49885913591469067,
"learning_rate": 1.2018970189701895e-06,
"loss": 0.1681,
"mean_token_accuracy": 0.9467541128396988,
"num_tokens": 88016868.0,
"step": 590
},
{
"epoch": 0.4005421890884446,
"grad_norm": 0.7890592585591757,
"learning_rate": 1.2005420054200542e-06,
"loss": 0.1682,
"mean_token_accuracy": 0.946752056479454,
"num_tokens": 88164592.0,
"step": 591
},
{
"epoch": 0.4012199254490003,
"grad_norm": 0.7369518492575368,
"learning_rate": 1.1991869918699187e-06,
"loss": 0.157,
"mean_token_accuracy": 0.949407272040844,
"num_tokens": 88312487.0,
"step": 592
},
{
"epoch": 0.4018976618095561,
"grad_norm": 0.4484504676130025,
"learning_rate": 1.1978319783197833e-06,
"loss": 0.1631,
"mean_token_accuracy": 0.9482650607824326,
"num_tokens": 88460693.0,
"step": 593
},
{
"epoch": 0.40257539817011184,
"grad_norm": 0.8485155855645619,
"learning_rate": 1.1964769647696476e-06,
"loss": 0.1605,
"mean_token_accuracy": 0.9483138248324394,
"num_tokens": 88609733.0,
"step": 594
},
{
"epoch": 0.4032531345306676,
"grad_norm": 0.4480372776691949,
"learning_rate": 1.195121951219512e-06,
"loss": 0.1612,
"mean_token_accuracy": 0.9484245628118515,
"num_tokens": 88761086.0,
"step": 595
},
{
"epoch": 0.4039308708912233,
"grad_norm": 0.4329784742071409,
"learning_rate": 1.1937669376693766e-06,
"loss": 0.1717,
"mean_token_accuracy": 0.9458352103829384,
"num_tokens": 88906999.0,
"step": 596
},
{
"epoch": 0.40460860725177905,
"grad_norm": 0.5096564027934126,
"learning_rate": 1.1924119241192411e-06,
"loss": 0.1722,
"mean_token_accuracy": 0.9450919255614281,
"num_tokens": 89058412.0,
"step": 597
},
{
"epoch": 0.4052863436123348,
"grad_norm": 0.45356808602832843,
"learning_rate": 1.1910569105691056e-06,
"loss": 0.1752,
"mean_token_accuracy": 0.9447236880660057,
"num_tokens": 89205873.0,
"step": 598
},
{
"epoch": 0.4059640799728905,
"grad_norm": 0.4789037831314169,
"learning_rate": 1.18970189701897e-06,
"loss": 0.1726,
"mean_token_accuracy": 0.94582499563694,
"num_tokens": 89352986.0,
"step": 599
},
{
"epoch": 0.4066418163334463,
"grad_norm": 0.4903512536874018,
"learning_rate": 1.1883468834688347e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.9469872713088989,
"num_tokens": 89502021.0,
"step": 600
},
{
"epoch": 0.40731955269400205,
"grad_norm": 0.5036605840464546,
"learning_rate": 1.1869918699186992e-06,
"loss": 0.1761,
"mean_token_accuracy": 0.9450404793024063,
"num_tokens": 89654563.0,
"step": 601
},
{
"epoch": 0.4079972890545578,
"grad_norm": 0.5970722709739015,
"learning_rate": 1.1856368563685637e-06,
"loss": 0.1655,
"mean_token_accuracy": 0.9475810378789902,
"num_tokens": 89807709.0,
"step": 602
},
{
"epoch": 0.4086750254151135,
"grad_norm": 0.4818755636606668,
"learning_rate": 1.184281842818428e-06,
"loss": 0.165,
"mean_token_accuracy": 0.9479880854487419,
"num_tokens": 89957680.0,
"step": 603
},
{
"epoch": 0.40935276177566926,
"grad_norm": 0.49513417717067487,
"learning_rate": 1.1829268292682926e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.9484023600816727,
"num_tokens": 90104372.0,
"step": 604
},
{
"epoch": 0.410030498136225,
"grad_norm": 0.4705947908265269,
"learning_rate": 1.181571815718157e-06,
"loss": 0.175,
"mean_token_accuracy": 0.9449072778224945,
"num_tokens": 90254315.0,
"step": 605
},
{
"epoch": 0.41070823449678073,
"grad_norm": 0.6062525288818309,
"learning_rate": 1.1802168021680216e-06,
"loss": 0.1622,
"mean_token_accuracy": 0.9490978792309761,
"num_tokens": 90406409.0,
"step": 606
},
{
"epoch": 0.41138597085733647,
"grad_norm": 0.4887642188601793,
"learning_rate": 1.1788617886178861e-06,
"loss": 0.1556,
"mean_token_accuracy": 0.9504926428198814,
"num_tokens": 90555846.0,
"step": 607
},
{
"epoch": 0.41206370721789226,
"grad_norm": 0.6695846625041596,
"learning_rate": 1.1775067750677506e-06,
"loss": 0.1607,
"mean_token_accuracy": 0.9490882977843285,
"num_tokens": 90703080.0,
"step": 608
},
{
"epoch": 0.412741443578448,
"grad_norm": 0.626157768978341,
"learning_rate": 1.1761517615176152e-06,
"loss": 0.1616,
"mean_token_accuracy": 0.9481958895921707,
"num_tokens": 90849340.0,
"step": 609
},
{
"epoch": 0.41341917993900373,
"grad_norm": 0.49981917199068726,
"learning_rate": 1.1747967479674797e-06,
"loss": 0.1686,
"mean_token_accuracy": 0.9468593597412109,
"num_tokens": 90998272.0,
"step": 610
},
{
"epoch": 0.41409691629955947,
"grad_norm": 0.4809733175759335,
"learning_rate": 1.1734417344173442e-06,
"loss": 0.1663,
"mean_token_accuracy": 0.9470062106847763,
"num_tokens": 91147727.0,
"step": 611
},
{
"epoch": 0.4147746526601152,
"grad_norm": 0.5546230985106029,
"learning_rate": 1.1720867208672087e-06,
"loss": 0.1606,
"mean_token_accuracy": 0.9482028186321259,
"num_tokens": 91298332.0,
"step": 612
},
{
"epoch": 0.41545238902067094,
"grad_norm": 1.369314542898872,
"learning_rate": 1.170731707317073e-06,
"loss": 0.1628,
"mean_token_accuracy": 0.9486950933933258,
"num_tokens": 91448033.0,
"step": 613
},
{
"epoch": 0.4161301253812267,
"grad_norm": 0.47642657411701406,
"learning_rate": 1.1693766937669375e-06,
"loss": 0.1778,
"mean_token_accuracy": 0.9440385848283768,
"num_tokens": 91594992.0,
"step": 614
},
{
"epoch": 0.41680786174178247,
"grad_norm": 0.47669213610459643,
"learning_rate": 1.168021680216802e-06,
"loss": 0.1697,
"mean_token_accuracy": 0.9455291479825974,
"num_tokens": 91737805.0,
"step": 615
},
{
"epoch": 0.4174855981023382,
"grad_norm": 0.5797787298320769,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.1641,
"mean_token_accuracy": 0.947659395635128,
"num_tokens": 91887143.0,
"step": 616
},
{
"epoch": 0.41816333446289394,
"grad_norm": 0.5345821378971174,
"learning_rate": 1.165311653116531e-06,
"loss": 0.1651,
"mean_token_accuracy": 0.9479231685400009,
"num_tokens": 92033481.0,
"step": 617
},
{
"epoch": 0.4188410708234497,
"grad_norm": 0.4900914190375703,
"learning_rate": 1.1639566395663956e-06,
"loss": 0.1694,
"mean_token_accuracy": 0.9459358528256416,
"num_tokens": 92186016.0,
"step": 618
},
{
"epoch": 0.4195188071840054,
"grad_norm": 0.5466231230181472,
"learning_rate": 1.1626016260162601e-06,
"loss": 0.1616,
"mean_token_accuracy": 0.9476613327860832,
"num_tokens": 92337069.0,
"step": 619
},
{
"epoch": 0.42019654354456115,
"grad_norm": 0.48693997965582553,
"learning_rate": 1.1612466124661247e-06,
"loss": 0.1672,
"mean_token_accuracy": 0.9474040865898132,
"num_tokens": 92485244.0,
"step": 620
},
{
"epoch": 0.4208742799051169,
"grad_norm": 0.43074697827839376,
"learning_rate": 1.1598915989159892e-06,
"loss": 0.1725,
"mean_token_accuracy": 0.9458062797784805,
"num_tokens": 92637765.0,
"step": 621
},
{
"epoch": 0.4215520162656727,
"grad_norm": 0.5096419473326914,
"learning_rate": 1.1585365853658535e-06,
"loss": 0.1702,
"mean_token_accuracy": 0.9466564804315567,
"num_tokens": 92781503.0,
"step": 622
},
{
"epoch": 0.4222297526262284,
"grad_norm": 0.5259582342847179,
"learning_rate": 1.157181571815718e-06,
"loss": 0.1718,
"mean_token_accuracy": 0.9453662484884262,
"num_tokens": 92933443.0,
"step": 623
},
{
"epoch": 0.42290748898678415,
"grad_norm": 0.47284404987635503,
"learning_rate": 1.1558265582655825e-06,
"loss": 0.1601,
"mean_token_accuracy": 0.9497467800974846,
"num_tokens": 93083711.0,
"step": 624
},
{
"epoch": 0.4235852253473399,
"grad_norm": 0.4602475722385104,
"learning_rate": 1.1544715447154473e-06,
"loss": 0.181,
"mean_token_accuracy": 0.9428821057081223,
"num_tokens": 93233232.0,
"step": 625
},
{
"epoch": 0.4242629617078956,
"grad_norm": 0.5936717548555265,
"learning_rate": 1.1531165311653116e-06,
"loss": 0.1843,
"mean_token_accuracy": 0.943098396062851,
"num_tokens": 93382753.0,
"step": 626
},
{
"epoch": 0.42494069806845136,
"grad_norm": 0.5047803415992738,
"learning_rate": 1.151761517615176e-06,
"loss": 0.1671,
"mean_token_accuracy": 0.9477358534932137,
"num_tokens": 93530158.0,
"step": 627
},
{
"epoch": 0.4256184344290071,
"grad_norm": 0.40107744619021507,
"learning_rate": 1.1504065040650406e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9463073089718819,
"num_tokens": 93680192.0,
"step": 628
},
{
"epoch": 0.42629617078956283,
"grad_norm": 1.5652127560562183,
"learning_rate": 1.1490514905149051e-06,
"loss": 0.176,
"mean_token_accuracy": 0.9450653791427612,
"num_tokens": 93826231.0,
"step": 629
},
{
"epoch": 0.4269739071501186,
"grad_norm": 0.5639073605989375,
"learning_rate": 1.1476964769647696e-06,
"loss": 0.1549,
"mean_token_accuracy": 0.9503551051020622,
"num_tokens": 93978792.0,
"step": 630
},
{
"epoch": 0.42765164351067436,
"grad_norm": 0.598966036130003,
"learning_rate": 1.146341463414634e-06,
"loss": 0.1656,
"mean_token_accuracy": 0.9473350793123245,
"num_tokens": 94127961.0,
"step": 631
},
{
"epoch": 0.4283293798712301,
"grad_norm": 0.44224050193023334,
"learning_rate": 1.1449864498644985e-06,
"loss": 0.1707,
"mean_token_accuracy": 0.9464718252420425,
"num_tokens": 94275820.0,
"step": 632
},
{
"epoch": 0.42900711623178583,
"grad_norm": 0.43676189629999784,
"learning_rate": 1.1436314363143632e-06,
"loss": 0.1686,
"mean_token_accuracy": 0.9459518492221832,
"num_tokens": 94427764.0,
"step": 633
},
{
"epoch": 0.42968485259234157,
"grad_norm": 0.4853901597305064,
"learning_rate": 1.1422764227642277e-06,
"loss": 0.1683,
"mean_token_accuracy": 0.9467138350009918,
"num_tokens": 94578347.0,
"step": 634
},
{
"epoch": 0.4303625889528973,
"grad_norm": 0.8511262690132398,
"learning_rate": 1.1409214092140922e-06,
"loss": 0.1727,
"mean_token_accuracy": 0.9447790756821632,
"num_tokens": 94724699.0,
"step": 635
},
{
"epoch": 0.43104032531345304,
"grad_norm": 0.5749925214374794,
"learning_rate": 1.1395663956639565e-06,
"loss": 0.1611,
"mean_token_accuracy": 0.9488679245114326,
"num_tokens": 94871151.0,
"step": 636
},
{
"epoch": 0.43171806167400884,
"grad_norm": 0.4064827921884073,
"learning_rate": 1.138211382113821e-06,
"loss": 0.1659,
"mean_token_accuracy": 0.9467856511473656,
"num_tokens": 95016678.0,
"step": 637
},
{
"epoch": 0.43239579803456457,
"grad_norm": 0.4499177007745121,
"learning_rate": 1.1368563685636856e-06,
"loss": 0.1686,
"mean_token_accuracy": 0.9464468285441399,
"num_tokens": 95169906.0,
"step": 638
},
{
"epoch": 0.4330735343951203,
"grad_norm": 0.43655958974808085,
"learning_rate": 1.1355013550135501e-06,
"loss": 0.1707,
"mean_token_accuracy": 0.9456497430801392,
"num_tokens": 95314234.0,
"step": 639
},
{
"epoch": 0.43375127075567604,
"grad_norm": 0.4486256023161831,
"learning_rate": 1.1341463414634144e-06,
"loss": 0.1657,
"mean_token_accuracy": 0.9474812969565392,
"num_tokens": 95462991.0,
"step": 640
},
{
"epoch": 0.4344290071162318,
"grad_norm": 0.697692666673496,
"learning_rate": 1.132791327913279e-06,
"loss": 0.1787,
"mean_token_accuracy": 0.9435675144195557,
"num_tokens": 95612865.0,
"step": 641
},
{
"epoch": 0.4351067434767875,
"grad_norm": 0.47686709968057667,
"learning_rate": 1.1314363143631437e-06,
"loss": 0.1623,
"mean_token_accuracy": 0.9478402659296989,
"num_tokens": 95762009.0,
"step": 642
},
{
"epoch": 0.43578447983734325,
"grad_norm": 0.5222106123204958,
"learning_rate": 1.1300813008130082e-06,
"loss": 0.1752,
"mean_token_accuracy": 0.9447394981980324,
"num_tokens": 95913932.0,
"step": 643
},
{
"epoch": 0.43646221619789904,
"grad_norm": 0.4408805629947386,
"learning_rate": 1.1287262872628727e-06,
"loss": 0.1525,
"mean_token_accuracy": 0.9512403458356857,
"num_tokens": 96062886.0,
"step": 644
},
{
"epoch": 0.4371399525584548,
"grad_norm": 0.5189327331310518,
"learning_rate": 1.127371273712737e-06,
"loss": 0.1677,
"mean_token_accuracy": 0.9463458731770515,
"num_tokens": 96209877.0,
"step": 645
},
{
"epoch": 0.4378176889190105,
"grad_norm": 0.56504508409712,
"learning_rate": 1.1260162601626015e-06,
"loss": 0.1622,
"mean_token_accuracy": 0.948399268090725,
"num_tokens": 96353812.0,
"step": 646
},
{
"epoch": 0.43849542527956625,
"grad_norm": 0.6696376390441588,
"learning_rate": 1.124661246612466e-06,
"loss": 0.1599,
"mean_token_accuracy": 0.948715977370739,
"num_tokens": 96503467.0,
"step": 647
},
{
"epoch": 0.439173161640122,
"grad_norm": 8.77885251280563,
"learning_rate": 1.1233062330623306e-06,
"loss": 0.17,
"mean_token_accuracy": 0.9463560730218887,
"num_tokens": 96653851.0,
"step": 648
},
{
"epoch": 0.4398508980006777,
"grad_norm": 0.4390179148821473,
"learning_rate": 1.121951219512195e-06,
"loss": 0.1589,
"mean_token_accuracy": 0.9486561864614487,
"num_tokens": 96806318.0,
"step": 649
},
{
"epoch": 0.44052863436123346,
"grad_norm": 0.4538980859024063,
"learning_rate": 1.1205962059620594e-06,
"loss": 0.1682,
"mean_token_accuracy": 0.9472699463367462,
"num_tokens": 96957317.0,
"step": 650
},
{
"epoch": 0.4412063707217892,
"grad_norm": 0.5309930041301483,
"learning_rate": 1.1192411924119241e-06,
"loss": 0.1603,
"mean_token_accuracy": 0.9494251385331154,
"num_tokens": 97106739.0,
"step": 651
},
{
"epoch": 0.441884107082345,
"grad_norm": 1.1385250470817276,
"learning_rate": 1.1178861788617887e-06,
"loss": 0.1579,
"mean_token_accuracy": 0.9499758258461952,
"num_tokens": 97252810.0,
"step": 652
},
{
"epoch": 0.4425618434429007,
"grad_norm": 0.46732463165861593,
"learning_rate": 1.1165311653116532e-06,
"loss": 0.1701,
"mean_token_accuracy": 0.9466474801301956,
"num_tokens": 97401105.0,
"step": 653
},
{
"epoch": 0.44323957980345646,
"grad_norm": 0.4512173042208031,
"learning_rate": 1.1151761517615175e-06,
"loss": 0.1626,
"mean_token_accuracy": 0.9485284760594368,
"num_tokens": 97547875.0,
"step": 654
},
{
"epoch": 0.4439173161640122,
"grad_norm": 0.5145343354995293,
"learning_rate": 1.113821138211382e-06,
"loss": 0.1624,
"mean_token_accuracy": 0.9473702013492584,
"num_tokens": 97697488.0,
"step": 655
},
{
"epoch": 0.44459505252456794,
"grad_norm": 0.41260423688312087,
"learning_rate": 1.1124661246612465e-06,
"loss": 0.1738,
"mean_token_accuracy": 0.9457203596830368,
"num_tokens": 97846993.0,
"step": 656
},
{
"epoch": 0.44527278888512367,
"grad_norm": 0.5102664687253794,
"learning_rate": 1.111111111111111e-06,
"loss": 0.18,
"mean_token_accuracy": 0.9433509930968285,
"num_tokens": 97996404.0,
"step": 657
},
{
"epoch": 0.4459505252456794,
"grad_norm": 0.506990574421379,
"learning_rate": 1.1097560975609756e-06,
"loss": 0.1883,
"mean_token_accuracy": 0.9421600103378296,
"num_tokens": 98147309.0,
"step": 658
},
{
"epoch": 0.4466282616062352,
"grad_norm": 0.5881766043352495,
"learning_rate": 1.10840108401084e-06,
"loss": 0.1625,
"mean_token_accuracy": 0.9479449465870857,
"num_tokens": 98297012.0,
"step": 659
},
{
"epoch": 0.44730599796679094,
"grad_norm": 0.5533879121735672,
"learning_rate": 1.1070460704607046e-06,
"loss": 0.1542,
"mean_token_accuracy": 0.9507003426551819,
"num_tokens": 98445075.0,
"step": 660
},
{
"epoch": 0.44798373432734667,
"grad_norm": 0.5201112017138473,
"learning_rate": 1.1056910569105691e-06,
"loss": 0.1689,
"mean_token_accuracy": 0.9461495503783226,
"num_tokens": 98594676.0,
"step": 661
},
{
"epoch": 0.4486614706879024,
"grad_norm": 0.44444711040968005,
"learning_rate": 1.1043360433604336e-06,
"loss": 0.1652,
"mean_token_accuracy": 0.9470791891217232,
"num_tokens": 98744305.0,
"step": 662
},
{
"epoch": 0.44933920704845814,
"grad_norm": 0.41457573882593707,
"learning_rate": 1.102981029810298e-06,
"loss": 0.1554,
"mean_token_accuracy": 0.9500356465578079,
"num_tokens": 98894145.0,
"step": 663
},
{
"epoch": 0.4500169434090139,
"grad_norm": 1.2907711098136603,
"learning_rate": 1.1016260162601625e-06,
"loss": 0.1635,
"mean_token_accuracy": 0.9473171010613441,
"num_tokens": 99041859.0,
"step": 664
},
{
"epoch": 0.4506946797695696,
"grad_norm": 0.46720116943273754,
"learning_rate": 1.100271002710027e-06,
"loss": 0.1718,
"mean_token_accuracy": 0.9454536214470863,
"num_tokens": 99193325.0,
"step": 665
},
{
"epoch": 0.4513724161301254,
"grad_norm": 0.5735650882180938,
"learning_rate": 1.0989159891598915e-06,
"loss": 0.1781,
"mean_token_accuracy": 0.944037027657032,
"num_tokens": 99344245.0,
"step": 666
},
{
"epoch": 0.45205015249068115,
"grad_norm": 0.42171149150991766,
"learning_rate": 1.0975609756097562e-06,
"loss": 0.1719,
"mean_token_accuracy": 0.9449406191706657,
"num_tokens": 99493777.0,
"step": 667
},
{
"epoch": 0.4527278888512369,
"grad_norm": 2.622371719944133,
"learning_rate": 1.0962059620596205e-06,
"loss": 0.1673,
"mean_token_accuracy": 0.9471249580383301,
"num_tokens": 99639666.0,
"step": 668
},
{
"epoch": 0.4534056252117926,
"grad_norm": 0.4308619134194267,
"learning_rate": 1.094850948509485e-06,
"loss": 0.1668,
"mean_token_accuracy": 0.9482819065451622,
"num_tokens": 99791278.0,
"step": 669
},
{
"epoch": 0.45408336157234835,
"grad_norm": 0.44887095282109707,
"learning_rate": 1.0934959349593496e-06,
"loss": 0.1661,
"mean_token_accuracy": 0.9470083937048912,
"num_tokens": 99941775.0,
"step": 670
},
{
"epoch": 0.4547610979329041,
"grad_norm": 0.6272156675456803,
"learning_rate": 1.092140921409214e-06,
"loss": 0.164,
"mean_token_accuracy": 0.9480069652199745,
"num_tokens": 100091771.0,
"step": 671
},
{
"epoch": 0.4554388342934598,
"grad_norm": 0.5381560149540484,
"learning_rate": 1.0907859078590786e-06,
"loss": 0.1584,
"mean_token_accuracy": 0.9485157206654549,
"num_tokens": 100239960.0,
"step": 672
},
{
"epoch": 0.45611657065401556,
"grad_norm": 0.5687429876872409,
"learning_rate": 1.089430894308943e-06,
"loss": 0.1641,
"mean_token_accuracy": 0.947501078248024,
"num_tokens": 100387617.0,
"step": 673
},
{
"epoch": 0.45679430701457135,
"grad_norm": 0.4784796211552843,
"learning_rate": 1.0880758807588074e-06,
"loss": 0.1709,
"mean_token_accuracy": 0.9460517168045044,
"num_tokens": 100535043.0,
"step": 674
},
{
"epoch": 0.4574720433751271,
"grad_norm": 0.5820901146665393,
"learning_rate": 1.086720867208672e-06,
"loss": 0.166,
"mean_token_accuracy": 0.946740947663784,
"num_tokens": 100685344.0,
"step": 675
},
{
"epoch": 0.4581497797356828,
"grad_norm": 0.49825430441063895,
"learning_rate": 1.0853658536585367e-06,
"loss": 0.1749,
"mean_token_accuracy": 0.9447141662240028,
"num_tokens": 100834993.0,
"step": 676
},
{
"epoch": 0.45882751609623856,
"grad_norm": 0.4517472676766605,
"learning_rate": 1.084010840108401e-06,
"loss": 0.1554,
"mean_token_accuracy": 0.9495163634419441,
"num_tokens": 100980820.0,
"step": 677
},
{
"epoch": 0.4595052524567943,
"grad_norm": 4.281733626493249,
"learning_rate": 1.0826558265582655e-06,
"loss": 0.1508,
"mean_token_accuracy": 0.9506775587797165,
"num_tokens": 101125284.0,
"step": 678
},
{
"epoch": 0.46018298881735004,
"grad_norm": 0.3922966399191834,
"learning_rate": 1.08130081300813e-06,
"loss": 0.1614,
"mean_token_accuracy": 0.9481890052556992,
"num_tokens": 101272992.0,
"step": 679
},
{
"epoch": 0.4608607251779058,
"grad_norm": 0.4595732751799743,
"learning_rate": 1.0799457994579946e-06,
"loss": 0.1731,
"mean_token_accuracy": 0.9453582316637039,
"num_tokens": 101421582.0,
"step": 680
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.5865357648842143,
"learning_rate": 1.078590785907859e-06,
"loss": 0.1575,
"mean_token_accuracy": 0.9493934810161591,
"num_tokens": 101569696.0,
"step": 681
},
{
"epoch": 0.4622161978990173,
"grad_norm": 0.4907597552813253,
"learning_rate": 1.0772357723577234e-06,
"loss": 0.1643,
"mean_token_accuracy": 0.9481813237071037,
"num_tokens": 101718242.0,
"step": 682
},
{
"epoch": 0.46289393425957304,
"grad_norm": 0.5738496429651249,
"learning_rate": 1.075880758807588e-06,
"loss": 0.1652,
"mean_token_accuracy": 0.948225773870945,
"num_tokens": 101868574.0,
"step": 683
},
{
"epoch": 0.4635716706201288,
"grad_norm": 0.5747780283701054,
"learning_rate": 1.0745257452574526e-06,
"loss": 0.1619,
"mean_token_accuracy": 0.9486450627446175,
"num_tokens": 102016272.0,
"step": 684
},
{
"epoch": 0.4642494069806845,
"grad_norm": 0.5059577085300414,
"learning_rate": 1.0731707317073172e-06,
"loss": 0.1592,
"mean_token_accuracy": 0.949810229241848,
"num_tokens": 102165015.0,
"step": 685
},
{
"epoch": 0.46492714334124025,
"grad_norm": 0.5835022953962732,
"learning_rate": 1.0718157181571815e-06,
"loss": 0.161,
"mean_token_accuracy": 0.9493243172764778,
"num_tokens": 102311940.0,
"step": 686
},
{
"epoch": 0.465604879701796,
"grad_norm": 0.7781826625681301,
"learning_rate": 1.070460704607046e-06,
"loss": 0.1595,
"mean_token_accuracy": 0.9490118324756622,
"num_tokens": 102465237.0,
"step": 687
},
{
"epoch": 0.4662826160623517,
"grad_norm": 0.47757570477744504,
"learning_rate": 1.0691056910569105e-06,
"loss": 0.1627,
"mean_token_accuracy": 0.9480254128575325,
"num_tokens": 102610588.0,
"step": 688
},
{
"epoch": 0.4669603524229075,
"grad_norm": 0.4806833949582325,
"learning_rate": 1.067750677506775e-06,
"loss": 0.1699,
"mean_token_accuracy": 0.945494756102562,
"num_tokens": 102763615.0,
"step": 689
},
{
"epoch": 0.46763808878346325,
"grad_norm": 0.4122436220594733,
"learning_rate": 1.0663956639566396e-06,
"loss": 0.167,
"mean_token_accuracy": 0.9464166983962059,
"num_tokens": 102910842.0,
"step": 690
},
{
"epoch": 0.468315825144019,
"grad_norm": 0.4394930622319954,
"learning_rate": 1.0650406504065039e-06,
"loss": 0.1743,
"mean_token_accuracy": 0.9447741508483887,
"num_tokens": 103058541.0,
"step": 691
},
{
"epoch": 0.4689935615045747,
"grad_norm": 0.43543158560650574,
"learning_rate": 1.0636856368563684e-06,
"loss": 0.1586,
"mean_token_accuracy": 0.9491621479392052,
"num_tokens": 103212920.0,
"step": 692
},
{
"epoch": 0.46967129786513045,
"grad_norm": 0.6038558938086582,
"learning_rate": 1.0623306233062331e-06,
"loss": 0.1656,
"mean_token_accuracy": 0.9469928592443466,
"num_tokens": 103366294.0,
"step": 693
},
{
"epoch": 0.4703490342256862,
"grad_norm": 0.5169428891483752,
"learning_rate": 1.0609756097560976e-06,
"loss": 0.1676,
"mean_token_accuracy": 0.9460704624652863,
"num_tokens": 103516086.0,
"step": 694
},
{
"epoch": 0.4710267705862419,
"grad_norm": 0.4204143777115915,
"learning_rate": 1.0596205962059621e-06,
"loss": 0.1606,
"mean_token_accuracy": 0.9489069432020187,
"num_tokens": 103663772.0,
"step": 695
},
{
"epoch": 0.4717045069467977,
"grad_norm": 0.53680032839607,
"learning_rate": 1.0582655826558265e-06,
"loss": 0.1556,
"mean_token_accuracy": 0.9499735161662102,
"num_tokens": 103809403.0,
"step": 696
},
{
"epoch": 0.47238224330735346,
"grad_norm": 0.41928447429862764,
"learning_rate": 1.056910569105691e-06,
"loss": 0.1547,
"mean_token_accuracy": 0.9501360580325127,
"num_tokens": 103958959.0,
"step": 697
},
{
"epoch": 0.4730599796679092,
"grad_norm": 0.5315795339532013,
"learning_rate": 1.0555555555555555e-06,
"loss": 0.1707,
"mean_token_accuracy": 0.9456048160791397,
"num_tokens": 104105483.0,
"step": 698
},
{
"epoch": 0.47373771602846493,
"grad_norm": 0.5954558763645185,
"learning_rate": 1.05420054200542e-06,
"loss": 0.1713,
"mean_token_accuracy": 0.9454812332987785,
"num_tokens": 104253981.0,
"step": 699
},
{
"epoch": 0.47441545238902066,
"grad_norm": 0.548448631040149,
"learning_rate": 1.0528455284552843e-06,
"loss": 0.1687,
"mean_token_accuracy": 0.9472458362579346,
"num_tokens": 104400658.0,
"step": 700
},
{
"epoch": 0.4750931887495764,
"grad_norm": 0.7249286327431037,
"learning_rate": 1.051490514905149e-06,
"loss": 0.1639,
"mean_token_accuracy": 0.9484637081623077,
"num_tokens": 104546254.0,
"step": 701
},
{
"epoch": 0.47577092511013214,
"grad_norm": 0.47880559600558203,
"learning_rate": 1.0501355013550136e-06,
"loss": 0.1655,
"mean_token_accuracy": 0.9477341920137405,
"num_tokens": 104697706.0,
"step": 702
},
{
"epoch": 0.47644866147068793,
"grad_norm": 0.532284214873716,
"learning_rate": 1.048780487804878e-06,
"loss": 0.1653,
"mean_token_accuracy": 0.9469640702009201,
"num_tokens": 104850387.0,
"step": 703
},
{
"epoch": 0.47712639783124366,
"grad_norm": 0.8313245354091897,
"learning_rate": 1.0474254742547426e-06,
"loss": 0.1688,
"mean_token_accuracy": 0.9462714120745659,
"num_tokens": 104998229.0,
"step": 704
},
{
"epoch": 0.4778041341917994,
"grad_norm": 0.4642162638190659,
"learning_rate": 1.046070460704607e-06,
"loss": 0.1595,
"mean_token_accuracy": 0.9481769949197769,
"num_tokens": 105142197.0,
"step": 705
},
{
"epoch": 0.47848187055235514,
"grad_norm": 0.5041207986876783,
"learning_rate": 1.0447154471544714e-06,
"loss": 0.1543,
"mean_token_accuracy": 0.9502139016985893,
"num_tokens": 105292849.0,
"step": 706
},
{
"epoch": 0.4791596069129109,
"grad_norm": 0.48408064640935866,
"learning_rate": 1.043360433604336e-06,
"loss": 0.1672,
"mean_token_accuracy": 0.9469591826200485,
"num_tokens": 105442224.0,
"step": 707
},
{
"epoch": 0.4798373432734666,
"grad_norm": 0.49214635864367573,
"learning_rate": 1.0420054200542005e-06,
"loss": 0.169,
"mean_token_accuracy": 0.9472545087337494,
"num_tokens": 105588113.0,
"step": 708
},
{
"epoch": 0.48051507963402235,
"grad_norm": 0.47558616757666466,
"learning_rate": 1.040650406504065e-06,
"loss": 0.1679,
"mean_token_accuracy": 0.9467742890119553,
"num_tokens": 105735195.0,
"step": 709
},
{
"epoch": 0.4811928159945781,
"grad_norm": 0.4988404932381961,
"learning_rate": 1.0392953929539295e-06,
"loss": 0.1756,
"mean_token_accuracy": 0.9449669793248177,
"num_tokens": 105884493.0,
"step": 710
},
{
"epoch": 0.4818705523551339,
"grad_norm": 0.49847447518114224,
"learning_rate": 1.037940379403794e-06,
"loss": 0.1638,
"mean_token_accuracy": 0.9476760029792786,
"num_tokens": 106033097.0,
"step": 711
},
{
"epoch": 0.4825482887156896,
"grad_norm": 0.4738878804902704,
"learning_rate": 1.0365853658536586e-06,
"loss": 0.1541,
"mean_token_accuracy": 0.9495199620723724,
"num_tokens": 106180475.0,
"step": 712
},
{
"epoch": 0.48322602507624535,
"grad_norm": 0.4549647737418487,
"learning_rate": 1.035230352303523e-06,
"loss": 0.171,
"mean_token_accuracy": 0.9463451281189919,
"num_tokens": 106324243.0,
"step": 713
},
{
"epoch": 0.4839037614368011,
"grad_norm": 0.5457516421527091,
"learning_rate": 1.0338753387533874e-06,
"loss": 0.1574,
"mean_token_accuracy": 0.9500289857387543,
"num_tokens": 106476157.0,
"step": 714
},
{
"epoch": 0.4845814977973568,
"grad_norm": 0.5482377070075686,
"learning_rate": 1.032520325203252e-06,
"loss": 0.1731,
"mean_token_accuracy": 0.9448134079575539,
"num_tokens": 106628472.0,
"step": 715
},
{
"epoch": 0.48525923415791256,
"grad_norm": 0.5467205293980786,
"learning_rate": 1.0311653116531164e-06,
"loss": 0.1699,
"mean_token_accuracy": 0.9459547698497772,
"num_tokens": 106774603.0,
"step": 716
},
{
"epoch": 0.4859369705184683,
"grad_norm": 0.625319792565366,
"learning_rate": 1.029810298102981e-06,
"loss": 0.1671,
"mean_token_accuracy": 0.9472379386425018,
"num_tokens": 106925585.0,
"step": 717
},
{
"epoch": 0.4866147068790241,
"grad_norm": 0.46862713118482585,
"learning_rate": 1.0284552845528457e-06,
"loss": 0.1569,
"mean_token_accuracy": 0.9494001865386963,
"num_tokens": 107073596.0,
"step": 718
},
{
"epoch": 0.4872924432395798,
"grad_norm": 0.4729214433145921,
"learning_rate": 1.02710027100271e-06,
"loss": 0.1594,
"mean_token_accuracy": 0.949380025267601,
"num_tokens": 107223256.0,
"step": 719
},
{
"epoch": 0.48797017960013556,
"grad_norm": 0.4821712382621107,
"learning_rate": 1.0257452574525745e-06,
"loss": 0.1616,
"mean_token_accuracy": 0.9479318857192993,
"num_tokens": 107373846.0,
"step": 720
},
{
"epoch": 0.4886479159606913,
"grad_norm": 0.42839181331142523,
"learning_rate": 1.024390243902439e-06,
"loss": 0.1608,
"mean_token_accuracy": 0.9478693678975105,
"num_tokens": 107523614.0,
"step": 721
},
{
"epoch": 0.48932565232124703,
"grad_norm": 1.4382103336661227,
"learning_rate": 1.0230352303523035e-06,
"loss": 0.1589,
"mean_token_accuracy": 0.9502756372094154,
"num_tokens": 107672510.0,
"step": 722
},
{
"epoch": 0.49000338868180277,
"grad_norm": 0.4478990985518304,
"learning_rate": 1.0216802168021679e-06,
"loss": 0.1545,
"mean_token_accuracy": 0.950567439198494,
"num_tokens": 107821486.0,
"step": 723
},
{
"epoch": 0.4906811250423585,
"grad_norm": 0.49546135852173817,
"learning_rate": 1.0203252032520324e-06,
"loss": 0.1597,
"mean_token_accuracy": 0.9489807188510895,
"num_tokens": 107971346.0,
"step": 724
},
{
"epoch": 0.4913588614029143,
"grad_norm": 0.5268621271334399,
"learning_rate": 1.0189701897018969e-06,
"loss": 0.1686,
"mean_token_accuracy": 0.9466768205165863,
"num_tokens": 108117296.0,
"step": 725
},
{
"epoch": 0.49203659776347003,
"grad_norm": 0.5302331386916604,
"learning_rate": 1.0176151761517614e-06,
"loss": 0.1715,
"mean_token_accuracy": 0.9459145441651344,
"num_tokens": 108269363.0,
"step": 726
},
{
"epoch": 0.49271433412402577,
"grad_norm": 0.6950355569365599,
"learning_rate": 1.0162601626016261e-06,
"loss": 0.1617,
"mean_token_accuracy": 0.9490808993577957,
"num_tokens": 108418896.0,
"step": 727
},
{
"epoch": 0.4933920704845815,
"grad_norm": 0.6291340372645929,
"learning_rate": 1.0149051490514905e-06,
"loss": 0.1553,
"mean_token_accuracy": 0.9506272599101067,
"num_tokens": 108568146.0,
"step": 728
},
{
"epoch": 0.49406980684513724,
"grad_norm": 0.4823368965206394,
"learning_rate": 1.013550135501355e-06,
"loss": 0.1666,
"mean_token_accuracy": 0.9474935382604599,
"num_tokens": 108717754.0,
"step": 729
},
{
"epoch": 0.494747543205693,
"grad_norm": 1.1298261109243466,
"learning_rate": 1.0121951219512195e-06,
"loss": 0.1767,
"mean_token_accuracy": 0.9445156082510948,
"num_tokens": 108865813.0,
"step": 730
},
{
"epoch": 0.4954252795662487,
"grad_norm": 0.5542227313632503,
"learning_rate": 1.010840108401084e-06,
"loss": 0.17,
"mean_token_accuracy": 0.9461698085069656,
"num_tokens": 109017876.0,
"step": 731
},
{
"epoch": 0.49610301592680445,
"grad_norm": 0.6317319587465365,
"learning_rate": 1.0094850948509485e-06,
"loss": 0.1663,
"mean_token_accuracy": 0.9471932277083397,
"num_tokens": 109165966.0,
"step": 732
},
{
"epoch": 0.49678075228736024,
"grad_norm": 0.5709037252451544,
"learning_rate": 1.0081300813008128e-06,
"loss": 0.1696,
"mean_token_accuracy": 0.9459073320031166,
"num_tokens": 109319510.0,
"step": 733
},
{
"epoch": 0.497458488647916,
"grad_norm": 0.4745034952273246,
"learning_rate": 1.0067750677506774e-06,
"loss": 0.1719,
"mean_token_accuracy": 0.945509634912014,
"num_tokens": 109471374.0,
"step": 734
},
{
"epoch": 0.4981362250084717,
"grad_norm": 0.5997054541531742,
"learning_rate": 1.005420054200542e-06,
"loss": 0.1597,
"mean_token_accuracy": 0.9486510381102562,
"num_tokens": 109619134.0,
"step": 735
},
{
"epoch": 0.49881396136902745,
"grad_norm": 0.44582034839420726,
"learning_rate": 1.0040650406504066e-06,
"loss": 0.1676,
"mean_token_accuracy": 0.9460132345557213,
"num_tokens": 109771208.0,
"step": 736
},
{
"epoch": 0.4994916977295832,
"grad_norm": 0.46505806035131303,
"learning_rate": 1.002710027100271e-06,
"loss": 0.1646,
"mean_token_accuracy": 0.9484423398971558,
"num_tokens": 109923524.0,
"step": 737
},
{
"epoch": 0.5001694340901389,
"grad_norm": 0.534117786954477,
"learning_rate": 1.0013550135501354e-06,
"loss": 0.1743,
"mean_token_accuracy": 0.9447778537869453,
"num_tokens": 110074329.0,
"step": 738
},
{
"epoch": 0.5008471704506947,
"grad_norm": 0.4969310333618879,
"learning_rate": 1e-06,
"loss": 0.1762,
"mean_token_accuracy": 0.9441555514931679,
"num_tokens": 110223675.0,
"step": 739
},
{
"epoch": 0.5015249068112504,
"grad_norm": 0.43004524784944254,
"learning_rate": 9.986449864498645e-07,
"loss": 0.1717,
"mean_token_accuracy": 0.9462130591273308,
"num_tokens": 110371658.0,
"step": 740
},
{
"epoch": 0.5022026431718062,
"grad_norm": 0.5280386035661061,
"learning_rate": 9.97289972899729e-07,
"loss": 0.1602,
"mean_token_accuracy": 0.9489178732037544,
"num_tokens": 110522893.0,
"step": 741
},
{
"epoch": 0.5028803795323619,
"grad_norm": 0.487926791997932,
"learning_rate": 9.959349593495935e-07,
"loss": 0.1505,
"mean_token_accuracy": 0.9517373815178871,
"num_tokens": 110667680.0,
"step": 742
},
{
"epoch": 0.5035581158929177,
"grad_norm": 0.4887678950646701,
"learning_rate": 9.945799457994578e-07,
"loss": 0.1638,
"mean_token_accuracy": 0.9478684067726135,
"num_tokens": 110816521.0,
"step": 743
},
{
"epoch": 0.5042358522534734,
"grad_norm": 1.0072774482193543,
"learning_rate": 9.932249322493226e-07,
"loss": 0.1685,
"mean_token_accuracy": 0.9473919719457626,
"num_tokens": 110965132.0,
"step": 744
},
{
"epoch": 0.5049135886140291,
"grad_norm": 0.504093174335246,
"learning_rate": 9.918699186991869e-07,
"loss": 0.1678,
"mean_token_accuracy": 0.9465838596224785,
"num_tokens": 111116073.0,
"step": 745
},
{
"epoch": 0.5055913249745849,
"grad_norm": 0.49005593530631875,
"learning_rate": 9.905149051490514e-07,
"loss": 0.1784,
"mean_token_accuracy": 0.9438984841108322,
"num_tokens": 111263209.0,
"step": 746
},
{
"epoch": 0.5062690613351406,
"grad_norm": 0.4547236242174316,
"learning_rate": 9.89159891598916e-07,
"loss": 0.1575,
"mean_token_accuracy": 0.9499284625053406,
"num_tokens": 111411766.0,
"step": 747
},
{
"epoch": 0.5069467976956964,
"grad_norm": 0.6009114812592581,
"learning_rate": 9.878048780487804e-07,
"loss": 0.1689,
"mean_token_accuracy": 0.9469569846987724,
"num_tokens": 111557025.0,
"step": 748
},
{
"epoch": 0.5076245340562521,
"grad_norm": 0.42135854041604787,
"learning_rate": 9.86449864498645e-07,
"loss": 0.1561,
"mean_token_accuracy": 0.9499689266085625,
"num_tokens": 111707742.0,
"step": 749
},
{
"epoch": 0.5083022704168079,
"grad_norm": 0.4338767955990597,
"learning_rate": 9.850948509485095e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9490743651986122,
"num_tokens": 111858573.0,
"step": 750
},
{
"epoch": 0.5089800067773637,
"grad_norm": 0.48802928227265013,
"learning_rate": 9.83739837398374e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9479610547423363,
"num_tokens": 112006134.0,
"step": 751
},
{
"epoch": 0.5096577431379193,
"grad_norm": 0.5253666701851781,
"learning_rate": 9.823848238482385e-07,
"loss": 0.153,
"mean_token_accuracy": 0.9508332163095474,
"num_tokens": 112156014.0,
"step": 752
},
{
"epoch": 0.5103354794984751,
"grad_norm": 0.6510987326952798,
"learning_rate": 9.81029810298103e-07,
"loss": 0.1602,
"mean_token_accuracy": 0.9488585442304611,
"num_tokens": 112307072.0,
"step": 753
},
{
"epoch": 0.5110132158590308,
"grad_norm": 0.42952608876024273,
"learning_rate": 9.796747967479673e-07,
"loss": 0.167,
"mean_token_accuracy": 0.9463882446289062,
"num_tokens": 112457138.0,
"step": 754
},
{
"epoch": 0.5116909522195866,
"grad_norm": 0.44738605619561317,
"learning_rate": 9.783197831978318e-07,
"loss": 0.1564,
"mean_token_accuracy": 0.9506853669881821,
"num_tokens": 112606569.0,
"step": 755
},
{
"epoch": 0.5123686885801423,
"grad_norm": 0.5123927765966358,
"learning_rate": 9.769647696476966e-07,
"loss": 0.163,
"mean_token_accuracy": 0.948229692876339,
"num_tokens": 112755571.0,
"step": 756
},
{
"epoch": 0.5130464249406981,
"grad_norm": 0.45086208832924035,
"learning_rate": 9.756097560975609e-07,
"loss": 0.1719,
"mean_token_accuracy": 0.9456148147583008,
"num_tokens": 112904410.0,
"step": 757
},
{
"epoch": 0.5137241613012539,
"grad_norm": 0.630544474001028,
"learning_rate": 9.742547425474254e-07,
"loss": 0.1679,
"mean_token_accuracy": 0.946696400642395,
"num_tokens": 113054622.0,
"step": 758
},
{
"epoch": 0.5144018976618095,
"grad_norm": 0.47191984856805846,
"learning_rate": 9.7289972899729e-07,
"loss": 0.1715,
"mean_token_accuracy": 0.9447854086756706,
"num_tokens": 113204101.0,
"step": 759
},
{
"epoch": 0.5150796340223653,
"grad_norm": 0.7466705426242991,
"learning_rate": 9.715447154471544e-07,
"loss": 0.1644,
"mean_token_accuracy": 0.9477136209607124,
"num_tokens": 113353757.0,
"step": 760
},
{
"epoch": 0.515757370382921,
"grad_norm": 0.4424803125673622,
"learning_rate": 9.70189701897019e-07,
"loss": 0.1588,
"mean_token_accuracy": 0.9492787793278694,
"num_tokens": 113501991.0,
"step": 761
},
{
"epoch": 0.5164351067434768,
"grad_norm": 0.5154548055150858,
"learning_rate": 9.688346883468835e-07,
"loss": 0.1647,
"mean_token_accuracy": 0.9481583312153816,
"num_tokens": 113656775.0,
"step": 762
},
{
"epoch": 0.5171128431040325,
"grad_norm": 0.5138120980499192,
"learning_rate": 9.67479674796748e-07,
"loss": 0.1572,
"mean_token_accuracy": 0.949316069483757,
"num_tokens": 113804739.0,
"step": 763
},
{
"epoch": 0.5177905794645883,
"grad_norm": 0.597476859195241,
"learning_rate": 9.661246612466123e-07,
"loss": 0.1679,
"mean_token_accuracy": 0.9463634565472603,
"num_tokens": 113956619.0,
"step": 764
},
{
"epoch": 0.518468315825144,
"grad_norm": 0.3915756552113162,
"learning_rate": 9.64769647696477e-07,
"loss": 0.1627,
"mean_token_accuracy": 0.9484176561236382,
"num_tokens": 114110011.0,
"step": 765
},
{
"epoch": 0.5191460521856998,
"grad_norm": 0.5299306971347804,
"learning_rate": 9.634146341463414e-07,
"loss": 0.1587,
"mean_token_accuracy": 0.9498151689767838,
"num_tokens": 114256287.0,
"step": 766
},
{
"epoch": 0.5198237885462555,
"grad_norm": 0.45604200441024956,
"learning_rate": 9.620596205962059e-07,
"loss": 0.1577,
"mean_token_accuracy": 0.949748583137989,
"num_tokens": 114405384.0,
"step": 767
},
{
"epoch": 0.5205015249068112,
"grad_norm": 0.5501039593422863,
"learning_rate": 9.607046070460704e-07,
"loss": 0.1673,
"mean_token_accuracy": 0.947017602622509,
"num_tokens": 114558364.0,
"step": 768
},
{
"epoch": 0.521179261267367,
"grad_norm": 0.425048989890096,
"learning_rate": 9.59349593495935e-07,
"loss": 0.1582,
"mean_token_accuracy": 0.9495890736579895,
"num_tokens": 114706272.0,
"step": 769
},
{
"epoch": 0.5218569976279227,
"grad_norm": 0.4962916899714615,
"learning_rate": 9.579945799457994e-07,
"loss": 0.1563,
"mean_token_accuracy": 0.9498101100325584,
"num_tokens": 114851238.0,
"step": 770
},
{
"epoch": 0.5225347339884785,
"grad_norm": 1.0919307561207405,
"learning_rate": 9.56639566395664e-07,
"loss": 0.1599,
"mean_token_accuracy": 0.9484520703554153,
"num_tokens": 115002037.0,
"step": 771
},
{
"epoch": 0.5232124703490342,
"grad_norm": 0.47581107966778774,
"learning_rate": 9.552845528455285e-07,
"loss": 0.1554,
"mean_token_accuracy": 0.9501020833849907,
"num_tokens": 115150459.0,
"step": 772
},
{
"epoch": 0.52389020670959,
"grad_norm": 0.431562811821882,
"learning_rate": 9.53929539295393e-07,
"loss": 0.1759,
"mean_token_accuracy": 0.9442851468920708,
"num_tokens": 115294986.0,
"step": 773
},
{
"epoch": 0.5245679430701458,
"grad_norm": 0.4078301321714335,
"learning_rate": 9.525745257452574e-07,
"loss": 0.1677,
"mean_token_accuracy": 0.9461727514863014,
"num_tokens": 115443301.0,
"step": 774
},
{
"epoch": 0.5252456794307014,
"grad_norm": 0.5082886880179815,
"learning_rate": 9.512195121951218e-07,
"loss": 0.1645,
"mean_token_accuracy": 0.947071261703968,
"num_tokens": 115591069.0,
"step": 775
},
{
"epoch": 0.5259234157912572,
"grad_norm": 0.4666195259750703,
"learning_rate": 9.498644986449864e-07,
"loss": 0.175,
"mean_token_accuracy": 0.9454357475042343,
"num_tokens": 115743380.0,
"step": 776
},
{
"epoch": 0.5266011521518129,
"grad_norm": 0.4919611574214216,
"learning_rate": 9.48509485094851e-07,
"loss": 0.1703,
"mean_token_accuracy": 0.9463658332824707,
"num_tokens": 115889361.0,
"step": 777
},
{
"epoch": 0.5272788885123687,
"grad_norm": 1.7430472687651062,
"learning_rate": 9.471544715447154e-07,
"loss": 0.1581,
"mean_token_accuracy": 0.9499025717377663,
"num_tokens": 116038571.0,
"step": 778
},
{
"epoch": 0.5279566248729244,
"grad_norm": 0.5108511243101571,
"learning_rate": 9.457994579945799e-07,
"loss": 0.1611,
"mean_token_accuracy": 0.9482218623161316,
"num_tokens": 116186871.0,
"step": 779
},
{
"epoch": 0.5286343612334802,
"grad_norm": 1.2775624635609895,
"learning_rate": 9.444444444444444e-07,
"loss": 0.1648,
"mean_token_accuracy": 0.9478557929396629,
"num_tokens": 116333923.0,
"step": 780
},
{
"epoch": 0.529312097594036,
"grad_norm": 0.7218332579746664,
"learning_rate": 9.430894308943089e-07,
"loss": 0.1636,
"mean_token_accuracy": 0.9476354792714119,
"num_tokens": 116484514.0,
"step": 781
},
{
"epoch": 0.5299898339545916,
"grad_norm": 0.47382289757475327,
"learning_rate": 9.417344173441733e-07,
"loss": 0.1699,
"mean_token_accuracy": 0.9467662200331688,
"num_tokens": 116633843.0,
"step": 782
},
{
"epoch": 0.5306675703151474,
"grad_norm": 0.7320079657643676,
"learning_rate": 9.403794037940379e-07,
"loss": 0.1567,
"mean_token_accuracy": 0.9489927589893341,
"num_tokens": 116784397.0,
"step": 783
},
{
"epoch": 0.5313453066757031,
"grad_norm": 0.4630461591729857,
"learning_rate": 9.390243902439024e-07,
"loss": 0.157,
"mean_token_accuracy": 0.949583537876606,
"num_tokens": 116937107.0,
"step": 784
},
{
"epoch": 0.5320230430362589,
"grad_norm": 0.47230814693707296,
"learning_rate": 9.376693766937669e-07,
"loss": 0.167,
"mean_token_accuracy": 0.9474940076470375,
"num_tokens": 117086319.0,
"step": 785
},
{
"epoch": 0.5327007793968146,
"grad_norm": 0.4415373162153277,
"learning_rate": 9.363143631436314e-07,
"loss": 0.1657,
"mean_token_accuracy": 0.9468573704361916,
"num_tokens": 117234587.0,
"step": 786
},
{
"epoch": 0.5333785157573704,
"grad_norm": 0.42824798111784507,
"learning_rate": 9.349593495934958e-07,
"loss": 0.1666,
"mean_token_accuracy": 0.947014681994915,
"num_tokens": 117383847.0,
"step": 787
},
{
"epoch": 0.5340562521179262,
"grad_norm": 0.44986339654153784,
"learning_rate": 9.336043360433605e-07,
"loss": 0.1607,
"mean_token_accuracy": 0.9481543377041817,
"num_tokens": 117534768.0,
"step": 788
},
{
"epoch": 0.5347339884784819,
"grad_norm": 0.6592995054568456,
"learning_rate": 9.322493224932249e-07,
"loss": 0.166,
"mean_token_accuracy": 0.9465798437595367,
"num_tokens": 117686438.0,
"step": 789
},
{
"epoch": 0.5354117248390377,
"grad_norm": 0.543648944178695,
"learning_rate": 9.308943089430894e-07,
"loss": 0.1681,
"mean_token_accuracy": 0.9476120993494987,
"num_tokens": 117840225.0,
"step": 790
},
{
"epoch": 0.5360894611995933,
"grad_norm": 0.5019318009561816,
"learning_rate": 9.295392953929538e-07,
"loss": 0.1636,
"mean_token_accuracy": 0.9476380497217178,
"num_tokens": 117986407.0,
"step": 791
},
{
"epoch": 0.5367671975601491,
"grad_norm": 0.4633489551285423,
"learning_rate": 9.281842818428183e-07,
"loss": 0.1737,
"mean_token_accuracy": 0.9449344500899315,
"num_tokens": 118135942.0,
"step": 792
},
{
"epoch": 0.5374449339207048,
"grad_norm": 0.4902355737543805,
"learning_rate": 9.26829268292683e-07,
"loss": 0.1634,
"mean_token_accuracy": 0.9482391402125359,
"num_tokens": 118287895.0,
"step": 793
},
{
"epoch": 0.5381226702812606,
"grad_norm": 0.9371752420840535,
"learning_rate": 9.254742547425474e-07,
"loss": 0.1724,
"mean_token_accuracy": 0.9446223452687263,
"num_tokens": 118432630.0,
"step": 794
},
{
"epoch": 0.5388004066418164,
"grad_norm": 0.4481821620387517,
"learning_rate": 9.241192411924119e-07,
"loss": 0.1557,
"mean_token_accuracy": 0.9504266083240509,
"num_tokens": 118580254.0,
"step": 795
},
{
"epoch": 0.5394781430023721,
"grad_norm": 0.4433316238279581,
"learning_rate": 9.227642276422763e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9479897543787956,
"num_tokens": 118727619.0,
"step": 796
},
{
"epoch": 0.5401558793629279,
"grad_norm": 0.46876476388476396,
"learning_rate": 9.214092140921409e-07,
"loss": 0.1706,
"mean_token_accuracy": 0.9457740485668182,
"num_tokens": 118874120.0,
"step": 797
},
{
"epoch": 0.5408336157234835,
"grad_norm": 0.3922350527541487,
"learning_rate": 9.200542005420053e-07,
"loss": 0.1585,
"mean_token_accuracy": 0.9497921913862228,
"num_tokens": 119023831.0,
"step": 798
},
{
"epoch": 0.5415113520840393,
"grad_norm": 0.46069590927505677,
"learning_rate": 9.186991869918699e-07,
"loss": 0.1581,
"mean_token_accuracy": 0.9491604790091515,
"num_tokens": 119172402.0,
"step": 799
},
{
"epoch": 0.542189088444595,
"grad_norm": 1.5163101510803785,
"learning_rate": 9.173441734417344e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.9501603320240974,
"num_tokens": 119321631.0,
"step": 800
},
{
"epoch": 0.5428668248051508,
"grad_norm": 0.47039412722321516,
"learning_rate": 9.159891598915989e-07,
"loss": 0.1678,
"mean_token_accuracy": 0.9467765465378761,
"num_tokens": 119473130.0,
"step": 801
},
{
"epoch": 0.5435445611657065,
"grad_norm": 0.500634772226534,
"learning_rate": 9.146341463414634e-07,
"loss": 0.1735,
"mean_token_accuracy": 0.9450068846344948,
"num_tokens": 119624002.0,
"step": 802
},
{
"epoch": 0.5442222975262623,
"grad_norm": 0.546366210975246,
"learning_rate": 9.132791327913278e-07,
"loss": 0.1616,
"mean_token_accuracy": 0.9482316449284554,
"num_tokens": 119770921.0,
"step": 803
},
{
"epoch": 0.5449000338868181,
"grad_norm": 0.444178937700613,
"learning_rate": 9.119241192411924e-07,
"loss": 0.1656,
"mean_token_accuracy": 0.9471771121025085,
"num_tokens": 119921288.0,
"step": 804
},
{
"epoch": 0.5455777702473738,
"grad_norm": 0.7038113748485348,
"learning_rate": 9.105691056910569e-07,
"loss": 0.1684,
"mean_token_accuracy": 0.9470604583621025,
"num_tokens": 120063297.0,
"step": 805
},
{
"epoch": 0.5462555066079295,
"grad_norm": 0.4750752606409635,
"learning_rate": 9.092140921409214e-07,
"loss": 0.1574,
"mean_token_accuracy": 0.9491050541400909,
"num_tokens": 120214728.0,
"step": 806
},
{
"epoch": 0.5469332429684852,
"grad_norm": 0.46993115257183415,
"learning_rate": 9.078590785907859e-07,
"loss": 0.1589,
"mean_token_accuracy": 0.9494856968522072,
"num_tokens": 120359269.0,
"step": 807
},
{
"epoch": 0.547610979329041,
"grad_norm": 2.162229906542572,
"learning_rate": 9.065040650406503e-07,
"loss": 0.1518,
"mean_token_accuracy": 0.9509051218628883,
"num_tokens": 120511262.0,
"step": 808
},
{
"epoch": 0.5482887156895967,
"grad_norm": 0.5212946679760524,
"learning_rate": 9.05149051490515e-07,
"loss": 0.1691,
"mean_token_accuracy": 0.9471752345561981,
"num_tokens": 120664787.0,
"step": 809
},
{
"epoch": 0.5489664520501525,
"grad_norm": 0.6422543732085594,
"learning_rate": 9.037940379403794e-07,
"loss": 0.1657,
"mean_token_accuracy": 0.9477086663246155,
"num_tokens": 120816729.0,
"step": 810
},
{
"epoch": 0.5496441884107083,
"grad_norm": 0.44358840772713953,
"learning_rate": 9.024390243902439e-07,
"loss": 0.1687,
"mean_token_accuracy": 0.9464196562767029,
"num_tokens": 120965272.0,
"step": 811
},
{
"epoch": 0.550321924771264,
"grad_norm": 0.4511515056931182,
"learning_rate": 9.010840108401083e-07,
"loss": 0.1657,
"mean_token_accuracy": 0.9473424032330513,
"num_tokens": 121111754.0,
"step": 812
},
{
"epoch": 0.5509996611318198,
"grad_norm": 0.38817856013106755,
"learning_rate": 8.997289972899728e-07,
"loss": 0.1588,
"mean_token_accuracy": 0.9496878236532211,
"num_tokens": 121261973.0,
"step": 813
},
{
"epoch": 0.5516773974923754,
"grad_norm": 0.4061986477027123,
"learning_rate": 8.983739837398373e-07,
"loss": 0.164,
"mean_token_accuracy": 0.9469241499900818,
"num_tokens": 121410011.0,
"step": 814
},
{
"epoch": 0.5523551338529312,
"grad_norm": 1.015324252743301,
"learning_rate": 8.970189701897019e-07,
"loss": 0.1566,
"mean_token_accuracy": 0.9484916105866432,
"num_tokens": 121550744.0,
"step": 815
},
{
"epoch": 0.5530328702134869,
"grad_norm": 0.44049449649746186,
"learning_rate": 8.956639566395664e-07,
"loss": 0.1572,
"mean_token_accuracy": 0.9494698345661163,
"num_tokens": 121700363.0,
"step": 816
},
{
"epoch": 0.5537106065740427,
"grad_norm": 0.6340088199439193,
"learning_rate": 8.943089430894308e-07,
"loss": 0.1656,
"mean_token_accuracy": 0.9485754668712616,
"num_tokens": 121848454.0,
"step": 817
},
{
"epoch": 0.5543883429345985,
"grad_norm": 0.48586459859651804,
"learning_rate": 8.929539295392954e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9491472393274307,
"num_tokens": 121998493.0,
"step": 818
},
{
"epoch": 0.5550660792951542,
"grad_norm": 1.4709351530109251,
"learning_rate": 8.915989159891598e-07,
"loss": 0.1622,
"mean_token_accuracy": 0.9486441239714622,
"num_tokens": 122145352.0,
"step": 819
},
{
"epoch": 0.55574381565571,
"grad_norm": 0.4324305757569167,
"learning_rate": 8.902439024390244e-07,
"loss": 0.1671,
"mean_token_accuracy": 0.9465356022119522,
"num_tokens": 122297716.0,
"step": 820
},
{
"epoch": 0.5564215520162656,
"grad_norm": 0.6959982735475431,
"learning_rate": 8.888888888888888e-07,
"loss": 0.1711,
"mean_token_accuracy": 0.9456816613674164,
"num_tokens": 122450816.0,
"step": 821
},
{
"epoch": 0.5570992883768214,
"grad_norm": 0.7003745818253505,
"learning_rate": 8.875338753387534e-07,
"loss": 0.1726,
"mean_token_accuracy": 0.9444479048252106,
"num_tokens": 122600489.0,
"step": 822
},
{
"epoch": 0.5577770247373771,
"grad_norm": 0.9742519698585274,
"learning_rate": 8.861788617886179e-07,
"loss": 0.1683,
"mean_token_accuracy": 0.9471156671643257,
"num_tokens": 122750619.0,
"step": 823
},
{
"epoch": 0.5584547610979329,
"grad_norm": 0.5382653005082321,
"learning_rate": 8.848238482384823e-07,
"loss": 0.1711,
"mean_token_accuracy": 0.9445811286568642,
"num_tokens": 122899835.0,
"step": 824
},
{
"epoch": 0.5591324974584887,
"grad_norm": 0.5746386344906712,
"learning_rate": 8.834688346883468e-07,
"loss": 0.1571,
"mean_token_accuracy": 0.9500567018985748,
"num_tokens": 123048046.0,
"step": 825
},
{
"epoch": 0.5598102338190444,
"grad_norm": 0.49272665751581907,
"learning_rate": 8.821138211382113e-07,
"loss": 0.1595,
"mean_token_accuracy": 0.9488162100315094,
"num_tokens": 123196978.0,
"step": 826
},
{
"epoch": 0.5604879701796002,
"grad_norm": 0.49751566301965017,
"learning_rate": 8.807588075880759e-07,
"loss": 0.1622,
"mean_token_accuracy": 0.9485758990049362,
"num_tokens": 123346407.0,
"step": 827
},
{
"epoch": 0.5611657065401559,
"grad_norm": 0.5289804379300486,
"learning_rate": 8.794037940379403e-07,
"loss": 0.1714,
"mean_token_accuracy": 0.9460392519831657,
"num_tokens": 123500062.0,
"step": 828
},
{
"epoch": 0.5618434429007116,
"grad_norm": 0.45654638803491393,
"learning_rate": 8.780487804878048e-07,
"loss": 0.1575,
"mean_token_accuracy": 0.9497273415327072,
"num_tokens": 123649294.0,
"step": 829
},
{
"epoch": 0.5625211792612673,
"grad_norm": 0.44613375128695365,
"learning_rate": 8.766937669376693e-07,
"loss": 0.1628,
"mean_token_accuracy": 0.948423445224762,
"num_tokens": 123800787.0,
"step": 830
},
{
"epoch": 0.5631989156218231,
"grad_norm": 1.0831604948180953,
"learning_rate": 8.753387533875339e-07,
"loss": 0.1655,
"mean_token_accuracy": 0.9475297853350639,
"num_tokens": 123952990.0,
"step": 831
},
{
"epoch": 0.5638766519823789,
"grad_norm": 0.9942947603234078,
"learning_rate": 8.739837398373984e-07,
"loss": 0.1658,
"mean_token_accuracy": 0.9475270807743073,
"num_tokens": 124099716.0,
"step": 832
},
{
"epoch": 0.5645543883429346,
"grad_norm": 0.517614231215208,
"learning_rate": 8.726287262872628e-07,
"loss": 0.1563,
"mean_token_accuracy": 0.9498177841305733,
"num_tokens": 124252344.0,
"step": 833
},
{
"epoch": 0.5652321247034904,
"grad_norm": 0.5317023085654892,
"learning_rate": 8.712737127371273e-07,
"loss": 0.1612,
"mean_token_accuracy": 0.9490551054477692,
"num_tokens": 124401448.0,
"step": 834
},
{
"epoch": 0.5659098610640461,
"grad_norm": 0.5185688681175784,
"learning_rate": 8.699186991869918e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9469140246510506,
"num_tokens": 124546582.0,
"step": 835
},
{
"epoch": 0.5665875974246019,
"grad_norm": 0.41557332220052745,
"learning_rate": 8.685636856368563e-07,
"loss": 0.1664,
"mean_token_accuracy": 0.9459367915987968,
"num_tokens": 124695136.0,
"step": 836
},
{
"epoch": 0.5672653337851575,
"grad_norm": 0.6431221859186249,
"learning_rate": 8.672086720867209e-07,
"loss": 0.1654,
"mean_token_accuracy": 0.9466153234243393,
"num_tokens": 124848280.0,
"step": 837
},
{
"epoch": 0.5679430701457133,
"grad_norm": 0.4597423272540076,
"learning_rate": 8.658536585365853e-07,
"loss": 0.1735,
"mean_token_accuracy": 0.9452999532222748,
"num_tokens": 124995930.0,
"step": 838
},
{
"epoch": 0.5686208065062691,
"grad_norm": 0.40694430784656294,
"learning_rate": 8.644986449864499e-07,
"loss": 0.1685,
"mean_token_accuracy": 0.9465653151273727,
"num_tokens": 125141313.0,
"step": 839
},
{
"epoch": 0.5692985428668248,
"grad_norm": 0.6509343776039687,
"learning_rate": 8.631436314363143e-07,
"loss": 0.1576,
"mean_token_accuracy": 0.9495387375354767,
"num_tokens": 125289634.0,
"step": 840
},
{
"epoch": 0.5699762792273806,
"grad_norm": 0.4304540404614368,
"learning_rate": 8.617886178861788e-07,
"loss": 0.1649,
"mean_token_accuracy": 0.9472305327653885,
"num_tokens": 125440651.0,
"step": 841
},
{
"epoch": 0.5706540155879363,
"grad_norm": 0.5442253821672075,
"learning_rate": 8.604336043360433e-07,
"loss": 0.1579,
"mean_token_accuracy": 0.9492665678262711,
"num_tokens": 125587031.0,
"step": 842
},
{
"epoch": 0.5713317519484921,
"grad_norm": 1.0424392574624841,
"learning_rate": 8.590785907859079e-07,
"loss": 0.1564,
"mean_token_accuracy": 0.9497368782758713,
"num_tokens": 125734286.0,
"step": 843
},
{
"epoch": 0.5720094883090477,
"grad_norm": 0.6985477539533492,
"learning_rate": 8.577235772357723e-07,
"loss": 0.1593,
"mean_token_accuracy": 0.9490614905953407,
"num_tokens": 125879250.0,
"step": 844
},
{
"epoch": 0.5726872246696035,
"grad_norm": 0.3914382877262425,
"learning_rate": 8.563685636856368e-07,
"loss": 0.1561,
"mean_token_accuracy": 0.9510177820920944,
"num_tokens": 126030729.0,
"step": 845
},
{
"epoch": 0.5733649610301592,
"grad_norm": 0.5134766215553107,
"learning_rate": 8.550135501355013e-07,
"loss": 0.1621,
"mean_token_accuracy": 0.9482594132423401,
"num_tokens": 126180883.0,
"step": 846
},
{
"epoch": 0.574042697390715,
"grad_norm": 0.7396491074742783,
"learning_rate": 8.536585365853657e-07,
"loss": 0.1505,
"mean_token_accuracy": 0.9510187357664108,
"num_tokens": 126336047.0,
"step": 847
},
{
"epoch": 0.5747204337512708,
"grad_norm": 0.4231370791240071,
"learning_rate": 8.523035230352304e-07,
"loss": 0.1668,
"mean_token_accuracy": 0.9463280215859413,
"num_tokens": 126483799.0,
"step": 848
},
{
"epoch": 0.5753981701118265,
"grad_norm": 4.232746679945196,
"learning_rate": 8.509485094850948e-07,
"loss": 0.1593,
"mean_token_accuracy": 0.948805071413517,
"num_tokens": 126634319.0,
"step": 849
},
{
"epoch": 0.5760759064723823,
"grad_norm": 0.4184667470094849,
"learning_rate": 8.495934959349593e-07,
"loss": 0.1682,
"mean_token_accuracy": 0.9463841244578362,
"num_tokens": 126785223.0,
"step": 850
},
{
"epoch": 0.576753642832938,
"grad_norm": 0.4315358083805072,
"learning_rate": 8.482384823848237e-07,
"loss": 0.1662,
"mean_token_accuracy": 0.9478829950094223,
"num_tokens": 126932745.0,
"step": 851
},
{
"epoch": 0.5774313791934937,
"grad_norm": 0.8449257861756138,
"learning_rate": 8.468834688346883e-07,
"loss": 0.1625,
"mean_token_accuracy": 0.9479701891541481,
"num_tokens": 127083705.0,
"step": 852
},
{
"epoch": 0.5781091155540494,
"grad_norm": 0.40974519696433026,
"learning_rate": 8.455284552845529e-07,
"loss": 0.17,
"mean_token_accuracy": 0.9462849348783493,
"num_tokens": 127235425.0,
"step": 853
},
{
"epoch": 0.5787868519146052,
"grad_norm": 0.4195578859819862,
"learning_rate": 8.441734417344173e-07,
"loss": 0.1579,
"mean_token_accuracy": 0.949033334851265,
"num_tokens": 127388201.0,
"step": 854
},
{
"epoch": 0.579464588275161,
"grad_norm": 0.49000810389007354,
"learning_rate": 8.428184281842818e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9488765150308609,
"num_tokens": 127534594.0,
"step": 855
},
{
"epoch": 0.5801423246357167,
"grad_norm": 1.0802510638369283,
"learning_rate": 8.414634146341463e-07,
"loss": 0.1615,
"mean_token_accuracy": 0.9483487829566002,
"num_tokens": 127683017.0,
"step": 856
},
{
"epoch": 0.5808200609962725,
"grad_norm": 0.6578855599046194,
"learning_rate": 8.401084010840108e-07,
"loss": 0.1512,
"mean_token_accuracy": 0.9515197053551674,
"num_tokens": 127832419.0,
"step": 857
},
{
"epoch": 0.5814977973568282,
"grad_norm": 0.4359659612480235,
"learning_rate": 8.387533875338753e-07,
"loss": 0.1639,
"mean_token_accuracy": 0.9471932053565979,
"num_tokens": 127982085.0,
"step": 858
},
{
"epoch": 0.582175533717384,
"grad_norm": 1.819776022271404,
"learning_rate": 8.373983739837398e-07,
"loss": 0.1702,
"mean_token_accuracy": 0.9458101093769073,
"num_tokens": 128128922.0,
"step": 859
},
{
"epoch": 0.5828532700779396,
"grad_norm": 0.4258732751942105,
"learning_rate": 8.360433604336044e-07,
"loss": 0.1711,
"mean_token_accuracy": 0.9463748782873154,
"num_tokens": 128277048.0,
"step": 860
},
{
"epoch": 0.5835310064384954,
"grad_norm": 0.4183924777076949,
"learning_rate": 8.346883468834688e-07,
"loss": 0.1599,
"mean_token_accuracy": 0.9493186771869659,
"num_tokens": 128423094.0,
"step": 861
},
{
"epoch": 0.5842087427990512,
"grad_norm": 0.4758718297804327,
"learning_rate": 8.333333333333333e-07,
"loss": 0.1626,
"mean_token_accuracy": 0.9486329630017281,
"num_tokens": 128574980.0,
"step": 862
},
{
"epoch": 0.5848864791596069,
"grad_norm": 0.4797506718095942,
"learning_rate": 8.319783197831977e-07,
"loss": 0.16,
"mean_token_accuracy": 0.9494662657380104,
"num_tokens": 128725510.0,
"step": 863
},
{
"epoch": 0.5855642155201627,
"grad_norm": 0.6113872026540533,
"learning_rate": 8.306233062330623e-07,
"loss": 0.1668,
"mean_token_accuracy": 0.9471870213747025,
"num_tokens": 128876751.0,
"step": 864
},
{
"epoch": 0.5862419518807184,
"grad_norm": 0.9150236072650709,
"learning_rate": 8.292682926829268e-07,
"loss": 0.1697,
"mean_token_accuracy": 0.9455385357141495,
"num_tokens": 129030714.0,
"step": 865
},
{
"epoch": 0.5869196882412742,
"grad_norm": 0.37306588743257885,
"learning_rate": 8.279132791327913e-07,
"loss": 0.1637,
"mean_token_accuracy": 0.9470183029770851,
"num_tokens": 129180043.0,
"step": 866
},
{
"epoch": 0.5875974246018298,
"grad_norm": 0.46435908405163356,
"learning_rate": 8.265582655826557e-07,
"loss": 0.176,
"mean_token_accuracy": 0.9439493343234062,
"num_tokens": 129332537.0,
"step": 867
},
{
"epoch": 0.5882751609623856,
"grad_norm": 0.465667821102405,
"learning_rate": 8.252032520325202e-07,
"loss": 0.1732,
"mean_token_accuracy": 0.9455002173781395,
"num_tokens": 129482278.0,
"step": 868
},
{
"epoch": 0.5889528973229414,
"grad_norm": 0.4219842630697802,
"learning_rate": 8.238482384823849e-07,
"loss": 0.1683,
"mean_token_accuracy": 0.9466238841414452,
"num_tokens": 129629550.0,
"step": 869
},
{
"epoch": 0.5896306336834971,
"grad_norm": 0.4745502832196809,
"learning_rate": 8.224932249322493e-07,
"loss": 0.1679,
"mean_token_accuracy": 0.94708351790905,
"num_tokens": 129776202.0,
"step": 870
},
{
"epoch": 0.5903083700440529,
"grad_norm": 0.42594833229248413,
"learning_rate": 8.211382113821138e-07,
"loss": 0.1555,
"mean_token_accuracy": 0.9506156072020531,
"num_tokens": 129923307.0,
"step": 871
},
{
"epoch": 0.5909861064046086,
"grad_norm": 0.3876767907960687,
"learning_rate": 8.197831978319782e-07,
"loss": 0.169,
"mean_token_accuracy": 0.9464268982410431,
"num_tokens": 130071563.0,
"step": 872
},
{
"epoch": 0.5916638427651644,
"grad_norm": 0.4452387059716283,
"learning_rate": 8.184281842818428e-07,
"loss": 0.1543,
"mean_token_accuracy": 0.9506272077560425,
"num_tokens": 130218754.0,
"step": 873
},
{
"epoch": 0.59234157912572,
"grad_norm": 0.39458214992094115,
"learning_rate": 8.170731707317072e-07,
"loss": 0.1486,
"mean_token_accuracy": 0.9521933421492577,
"num_tokens": 130372245.0,
"step": 874
},
{
"epoch": 0.5930193154862758,
"grad_norm": 0.4060057020635777,
"learning_rate": 8.157181571815718e-07,
"loss": 0.1643,
"mean_token_accuracy": 0.9476899206638336,
"num_tokens": 130522066.0,
"step": 875
},
{
"epoch": 0.5936970518468316,
"grad_norm": 0.3740875728540713,
"learning_rate": 8.143631436314363e-07,
"loss": 0.1567,
"mean_token_accuracy": 0.949752576649189,
"num_tokens": 130670822.0,
"step": 876
},
{
"epoch": 0.5943747882073873,
"grad_norm": 0.449478208310373,
"learning_rate": 8.130081300813008e-07,
"loss": 0.1618,
"mean_token_accuracy": 0.9482812359929085,
"num_tokens": 130823135.0,
"step": 877
},
{
"epoch": 0.5950525245679431,
"grad_norm": 0.470574147535195,
"learning_rate": 8.116531165311653e-07,
"loss": 0.1693,
"mean_token_accuracy": 0.9464894384145737,
"num_tokens": 130976061.0,
"step": 878
},
{
"epoch": 0.5957302609284988,
"grad_norm": 0.43273814562418705,
"learning_rate": 8.102981029810297e-07,
"loss": 0.1664,
"mean_token_accuracy": 0.946991890668869,
"num_tokens": 131130287.0,
"step": 879
},
{
"epoch": 0.5964079972890546,
"grad_norm": 0.5133374187671007,
"learning_rate": 8.089430894308943e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9469396248459816,
"num_tokens": 131279600.0,
"step": 880
},
{
"epoch": 0.5970857336496103,
"grad_norm": 0.5527391693964944,
"learning_rate": 8.075880758807587e-07,
"loss": 0.1639,
"mean_token_accuracy": 0.9478160068392754,
"num_tokens": 131429003.0,
"step": 881
},
{
"epoch": 0.597763470010166,
"grad_norm": 0.5869559180286896,
"learning_rate": 8.062330623306233e-07,
"loss": 0.1614,
"mean_token_accuracy": 0.9487715065479279,
"num_tokens": 131581100.0,
"step": 882
},
{
"epoch": 0.5984412063707217,
"grad_norm": 0.5139028440956621,
"learning_rate": 8.048780487804878e-07,
"loss": 0.1657,
"mean_token_accuracy": 0.9471326619386673,
"num_tokens": 131730402.0,
"step": 883
},
{
"epoch": 0.5991189427312775,
"grad_norm": 0.4782042342078006,
"learning_rate": 8.035230352303522e-07,
"loss": 0.1687,
"mean_token_accuracy": 0.945782758295536,
"num_tokens": 131883450.0,
"step": 884
},
{
"epoch": 0.5997966790918333,
"grad_norm": 0.5420589297463521,
"learning_rate": 8.021680216802168e-07,
"loss": 0.1546,
"mean_token_accuracy": 0.9501096978783607,
"num_tokens": 132035968.0,
"step": 885
},
{
"epoch": 0.600474415452389,
"grad_norm": 0.5659045444746059,
"learning_rate": 8.008130081300813e-07,
"loss": 0.1591,
"mean_token_accuracy": 0.9486903175711632,
"num_tokens": 132185887.0,
"step": 886
},
{
"epoch": 0.6011521518129448,
"grad_norm": 0.5846227300914831,
"learning_rate": 7.994579945799458e-07,
"loss": 0.1734,
"mean_token_accuracy": 0.945100449025631,
"num_tokens": 132333916.0,
"step": 887
},
{
"epoch": 0.6018298881735005,
"grad_norm": 0.43980703857602094,
"learning_rate": 7.981029810298102e-07,
"loss": 0.1666,
"mean_token_accuracy": 0.946213386952877,
"num_tokens": 132484111.0,
"step": 888
},
{
"epoch": 0.6025076245340563,
"grad_norm": 0.38862752445000626,
"learning_rate": 7.967479674796747e-07,
"loss": 0.171,
"mean_token_accuracy": 0.9452003985643387,
"num_tokens": 132638890.0,
"step": 889
},
{
"epoch": 0.6031853608946119,
"grad_norm": 0.5018014213821751,
"learning_rate": 7.953929539295394e-07,
"loss": 0.1591,
"mean_token_accuracy": 0.9488074406981468,
"num_tokens": 132787007.0,
"step": 890
},
{
"epoch": 0.6038630972551677,
"grad_norm": 0.6087439529267555,
"learning_rate": 7.940379403794038e-07,
"loss": 0.1635,
"mean_token_accuracy": 0.9478660523891449,
"num_tokens": 132936200.0,
"step": 891
},
{
"epoch": 0.6045408336157235,
"grad_norm": 0.49604043868352027,
"learning_rate": 7.926829268292683e-07,
"loss": 0.1598,
"mean_token_accuracy": 0.9487058073282242,
"num_tokens": 133088672.0,
"step": 892
},
{
"epoch": 0.6052185699762792,
"grad_norm": 0.7340801375571371,
"learning_rate": 7.913279132791327e-07,
"loss": 0.1695,
"mean_token_accuracy": 0.9449058994650841,
"num_tokens": 133239608.0,
"step": 893
},
{
"epoch": 0.605896306336835,
"grad_norm": 0.39721191202967127,
"learning_rate": 7.899728997289973e-07,
"loss": 0.164,
"mean_token_accuracy": 0.9476265981793404,
"num_tokens": 133387750.0,
"step": 894
},
{
"epoch": 0.6065740426973907,
"grad_norm": 0.4376674787899762,
"learning_rate": 7.886178861788617e-07,
"loss": 0.1627,
"mean_token_accuracy": 0.947635717689991,
"num_tokens": 133535888.0,
"step": 895
},
{
"epoch": 0.6072517790579465,
"grad_norm": 0.42423057779823,
"learning_rate": 7.872628726287263e-07,
"loss": 0.1678,
"mean_token_accuracy": 0.9462937116622925,
"num_tokens": 133681883.0,
"step": 896
},
{
"epoch": 0.6079295154185022,
"grad_norm": 0.5936497940257488,
"learning_rate": 7.859078590785907e-07,
"loss": 0.1646,
"mean_token_accuracy": 0.946974903345108,
"num_tokens": 133833687.0,
"step": 897
},
{
"epoch": 0.608607251779058,
"grad_norm": 0.5219859690946337,
"learning_rate": 7.845528455284552e-07,
"loss": 0.1736,
"mean_token_accuracy": 0.9455397203564644,
"num_tokens": 133986910.0,
"step": 898
},
{
"epoch": 0.6092849881396137,
"grad_norm": 0.4774722891619204,
"learning_rate": 7.831978319783198e-07,
"loss": 0.1698,
"mean_token_accuracy": 0.9463390782475471,
"num_tokens": 134132997.0,
"step": 899
},
{
"epoch": 0.6099627245001694,
"grad_norm": 0.4850631210301614,
"learning_rate": 7.818428184281842e-07,
"loss": 0.154,
"mean_token_accuracy": 0.9509309977293015,
"num_tokens": 134279331.0,
"step": 900
},
{
"epoch": 0.6106404608607252,
"grad_norm": 2.0643418841530607,
"learning_rate": 7.804878048780488e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9475974664092064,
"num_tokens": 134430596.0,
"step": 901
},
{
"epoch": 0.6113181972212809,
"grad_norm": 0.4523057522995995,
"learning_rate": 7.791327913279132e-07,
"loss": 0.1571,
"mean_token_accuracy": 0.9492429792881012,
"num_tokens": 134582264.0,
"step": 902
},
{
"epoch": 0.6119959335818367,
"grad_norm": 0.772571598377388,
"learning_rate": 7.777777777777778e-07,
"loss": 0.1632,
"mean_token_accuracy": 0.9482496008276939,
"num_tokens": 134729586.0,
"step": 903
},
{
"epoch": 0.6126736699423924,
"grad_norm": 0.9569695012936315,
"learning_rate": 7.764227642276422e-07,
"loss": 0.1637,
"mean_token_accuracy": 0.9479860961437225,
"num_tokens": 134876147.0,
"step": 904
},
{
"epoch": 0.6133514063029482,
"grad_norm": 3.4385418189852968,
"learning_rate": 7.750677506775067e-07,
"loss": 0.161,
"mean_token_accuracy": 0.9483773708343506,
"num_tokens": 135027603.0,
"step": 905
},
{
"epoch": 0.614029142663504,
"grad_norm": 0.4428624807588454,
"learning_rate": 7.737127371273712e-07,
"loss": 0.1696,
"mean_token_accuracy": 0.9465060532093048,
"num_tokens": 135175544.0,
"step": 906
},
{
"epoch": 0.6147068790240596,
"grad_norm": 0.42033595784536043,
"learning_rate": 7.723577235772358e-07,
"loss": 0.1613,
"mean_token_accuracy": 0.9488836824893951,
"num_tokens": 135325813.0,
"step": 907
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.4431305406629688,
"learning_rate": 7.710027100271003e-07,
"loss": 0.1607,
"mean_token_accuracy": 0.9485266506671906,
"num_tokens": 135475422.0,
"step": 908
},
{
"epoch": 0.6160623517451711,
"grad_norm": 0.46267044020984877,
"learning_rate": 7.696476964769647e-07,
"loss": 0.1711,
"mean_token_accuracy": 0.946107029914856,
"num_tokens": 135622188.0,
"step": 909
},
{
"epoch": 0.6167400881057269,
"grad_norm": 0.7193235514746738,
"learning_rate": 7.682926829268292e-07,
"loss": 0.1643,
"mean_token_accuracy": 0.9476191028952599,
"num_tokens": 135771377.0,
"step": 910
},
{
"epoch": 0.6174178244662826,
"grad_norm": 0.38960900983203894,
"learning_rate": 7.669376693766937e-07,
"loss": 0.1633,
"mean_token_accuracy": 0.9476170986890793,
"num_tokens": 135920418.0,
"step": 911
},
{
"epoch": 0.6180955608268384,
"grad_norm": 0.7216784295260695,
"learning_rate": 7.655826558265583e-07,
"loss": 0.1718,
"mean_token_accuracy": 0.9461710155010223,
"num_tokens": 136068122.0,
"step": 912
},
{
"epoch": 0.6187732971873942,
"grad_norm": 0.4510715884297431,
"learning_rate": 7.642276422764228e-07,
"loss": 0.1721,
"mean_token_accuracy": 0.9446103274822235,
"num_tokens": 136217084.0,
"step": 913
},
{
"epoch": 0.6194510335479498,
"grad_norm": 0.5186949964426198,
"learning_rate": 7.628726287262872e-07,
"loss": 0.1581,
"mean_token_accuracy": 0.9492061957716942,
"num_tokens": 136362608.0,
"step": 914
},
{
"epoch": 0.6201287699085056,
"grad_norm": 0.4111352524011199,
"learning_rate": 7.615176151761518e-07,
"loss": 0.1579,
"mean_token_accuracy": 0.9499277547001839,
"num_tokens": 136515985.0,
"step": 915
},
{
"epoch": 0.6208065062690613,
"grad_norm": 0.5139528769121537,
"learning_rate": 7.601626016260162e-07,
"loss": 0.1616,
"mean_token_accuracy": 0.9490419402718544,
"num_tokens": 136666846.0,
"step": 916
},
{
"epoch": 0.6214842426296171,
"grad_norm": 0.6677050444967101,
"learning_rate": 7.588075880758807e-07,
"loss": 0.1599,
"mean_token_accuracy": 0.9486491605639458,
"num_tokens": 136814955.0,
"step": 917
},
{
"epoch": 0.6221619789901728,
"grad_norm": 0.5997639548038106,
"learning_rate": 7.574525745257452e-07,
"loss": 0.1558,
"mean_token_accuracy": 0.9490111693739891,
"num_tokens": 136962819.0,
"step": 918
},
{
"epoch": 0.6228397153507286,
"grad_norm": 0.3898808395054342,
"learning_rate": 7.560975609756097e-07,
"loss": 0.1588,
"mean_token_accuracy": 0.9494509026408195,
"num_tokens": 137107360.0,
"step": 919
},
{
"epoch": 0.6235174517112844,
"grad_norm": 0.5185812674164358,
"learning_rate": 7.547425474254743e-07,
"loss": 0.1582,
"mean_token_accuracy": 0.9492890685796738,
"num_tokens": 137254114.0,
"step": 920
},
{
"epoch": 0.62419518807184,
"grad_norm": 0.4597891887499145,
"learning_rate": 7.533875338753387e-07,
"loss": 0.1654,
"mean_token_accuracy": 0.9472557231783867,
"num_tokens": 137407322.0,
"step": 921
},
{
"epoch": 0.6248729244323958,
"grad_norm": 0.43904471022087876,
"learning_rate": 7.520325203252032e-07,
"loss": 0.1713,
"mean_token_accuracy": 0.9465183466672897,
"num_tokens": 137552436.0,
"step": 922
},
{
"epoch": 0.6255506607929515,
"grad_norm": 0.5280919024943563,
"learning_rate": 7.506775067750677e-07,
"loss": 0.175,
"mean_token_accuracy": 0.9440080150961876,
"num_tokens": 137698616.0,
"step": 923
},
{
"epoch": 0.6262283971535073,
"grad_norm": 0.5085519075809602,
"learning_rate": 7.493224932249323e-07,
"loss": 0.1669,
"mean_token_accuracy": 0.9469470083713531,
"num_tokens": 137852417.0,
"step": 924
},
{
"epoch": 0.626906133514063,
"grad_norm": 0.9381387752413748,
"learning_rate": 7.479674796747967e-07,
"loss": 0.1551,
"mean_token_accuracy": 0.9501704648137093,
"num_tokens": 138002264.0,
"step": 925
},
{
"epoch": 0.6275838698746188,
"grad_norm": 0.4065937650418371,
"learning_rate": 7.466124661246612e-07,
"loss": 0.1689,
"mean_token_accuracy": 0.9459371268749237,
"num_tokens": 138150177.0,
"step": 926
},
{
"epoch": 0.6282616062351745,
"grad_norm": 0.6068927611317052,
"learning_rate": 7.452574525745256e-07,
"loss": 0.164,
"mean_token_accuracy": 0.9475051537156105,
"num_tokens": 138299546.0,
"step": 927
},
{
"epoch": 0.6289393425957303,
"grad_norm": 0.48424123680078385,
"learning_rate": 7.439024390243903e-07,
"loss": 0.1648,
"mean_token_accuracy": 0.9467552751302719,
"num_tokens": 138451328.0,
"step": 928
},
{
"epoch": 0.629617078956286,
"grad_norm": 0.5033477602408023,
"learning_rate": 7.425474254742548e-07,
"loss": 0.1657,
"mean_token_accuracy": 0.9471101090312004,
"num_tokens": 138597072.0,
"step": 929
},
{
"epoch": 0.6302948153168417,
"grad_norm": 0.4618401305691291,
"learning_rate": 7.411924119241192e-07,
"loss": 0.1742,
"mean_token_accuracy": 0.9449163228273392,
"num_tokens": 138746702.0,
"step": 930
},
{
"epoch": 0.6309725516773975,
"grad_norm": 0.5859131515689481,
"learning_rate": 7.398373983739837e-07,
"loss": 0.1723,
"mean_token_accuracy": 0.9446366801857948,
"num_tokens": 138898702.0,
"step": 931
},
{
"epoch": 0.6316502880379532,
"grad_norm": 0.4646050979072507,
"learning_rate": 7.384823848238481e-07,
"loss": 0.1547,
"mean_token_accuracy": 0.9500670656561852,
"num_tokens": 139050482.0,
"step": 932
},
{
"epoch": 0.632328024398509,
"grad_norm": 0.4938179522850119,
"learning_rate": 7.371273712737127e-07,
"loss": 0.1655,
"mean_token_accuracy": 0.9475302547216415,
"num_tokens": 139202406.0,
"step": 933
},
{
"epoch": 0.6330057607590647,
"grad_norm": 0.4587172954884775,
"learning_rate": 7.357723577235772e-07,
"loss": 0.1567,
"mean_token_accuracy": 0.9493889287114143,
"num_tokens": 139349833.0,
"step": 934
},
{
"epoch": 0.6336834971196205,
"grad_norm": 0.6670031930804362,
"learning_rate": 7.344173441734417e-07,
"loss": 0.1596,
"mean_token_accuracy": 0.9493798017501831,
"num_tokens": 139500833.0,
"step": 935
},
{
"epoch": 0.6343612334801763,
"grad_norm": 0.4389808125315095,
"learning_rate": 7.330623306233062e-07,
"loss": 0.1664,
"mean_token_accuracy": 0.9476785436272621,
"num_tokens": 139648168.0,
"step": 936
},
{
"epoch": 0.6350389698407319,
"grad_norm": 0.37509769597300396,
"learning_rate": 7.317073170731707e-07,
"loss": 0.1572,
"mean_token_accuracy": 0.9489102885127068,
"num_tokens": 139794445.0,
"step": 937
},
{
"epoch": 0.6357167062012877,
"grad_norm": 0.53982506710144,
"learning_rate": 7.303523035230352e-07,
"loss": 0.1505,
"mean_token_accuracy": 0.9502668455243111,
"num_tokens": 139942868.0,
"step": 938
},
{
"epoch": 0.6363944425618434,
"grad_norm": 0.4635865192147379,
"learning_rate": 7.289972899728997e-07,
"loss": 0.1618,
"mean_token_accuracy": 0.9481147155165672,
"num_tokens": 140093883.0,
"step": 939
},
{
"epoch": 0.6370721789223992,
"grad_norm": 0.41885841355844794,
"learning_rate": 7.276422764227642e-07,
"loss": 0.1658,
"mean_token_accuracy": 0.9470746591687202,
"num_tokens": 140240725.0,
"step": 940
},
{
"epoch": 0.6377499152829549,
"grad_norm": 0.47462858953487475,
"learning_rate": 7.262872628726287e-07,
"loss": 0.172,
"mean_token_accuracy": 0.9460104629397392,
"num_tokens": 140388967.0,
"step": 941
},
{
"epoch": 0.6384276516435107,
"grad_norm": 1.1343146325099123,
"learning_rate": 7.249322493224932e-07,
"loss": 0.1735,
"mean_token_accuracy": 0.9451970756053925,
"num_tokens": 140541231.0,
"step": 942
},
{
"epoch": 0.6391053880040665,
"grad_norm": 0.6104215798107635,
"learning_rate": 7.235772357723577e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9492672756314278,
"num_tokens": 140692424.0,
"step": 943
},
{
"epoch": 0.6397831243646221,
"grad_norm": 0.421253631362635,
"learning_rate": 7.222222222222221e-07,
"loss": 0.1625,
"mean_token_accuracy": 0.9471209272742271,
"num_tokens": 140841186.0,
"step": 944
},
{
"epoch": 0.6404608607251779,
"grad_norm": 0.38741060193677335,
"learning_rate": 7.208672086720868e-07,
"loss": 0.1604,
"mean_token_accuracy": 0.9487796202301979,
"num_tokens": 140991562.0,
"step": 945
},
{
"epoch": 0.6411385970857336,
"grad_norm": 0.41674417095219957,
"learning_rate": 7.195121951219512e-07,
"loss": 0.1742,
"mean_token_accuracy": 0.9446077048778534,
"num_tokens": 141139839.0,
"step": 946
},
{
"epoch": 0.6418163334462894,
"grad_norm": 0.4176168006364939,
"learning_rate": 7.181571815718157e-07,
"loss": 0.1524,
"mean_token_accuracy": 0.9500337019562721,
"num_tokens": 141288450.0,
"step": 947
},
{
"epoch": 0.6424940698068451,
"grad_norm": 0.47308408138568686,
"learning_rate": 7.168021680216801e-07,
"loss": 0.162,
"mean_token_accuracy": 0.9485318809747696,
"num_tokens": 141439179.0,
"step": 948
},
{
"epoch": 0.6431718061674009,
"grad_norm": 0.39253531614222453,
"learning_rate": 7.154471544715447e-07,
"loss": 0.1556,
"mean_token_accuracy": 0.9496328011155128,
"num_tokens": 141591861.0,
"step": 949
},
{
"epoch": 0.6438495425279567,
"grad_norm": 0.4843422610365838,
"learning_rate": 7.140921409214093e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9474169239401817,
"num_tokens": 141741485.0,
"step": 950
},
{
"epoch": 0.6445272788885124,
"grad_norm": 0.5257492858880942,
"learning_rate": 7.127371273712737e-07,
"loss": 0.1658,
"mean_token_accuracy": 0.9472004100680351,
"num_tokens": 141890794.0,
"step": 951
},
{
"epoch": 0.6452050152490681,
"grad_norm": 0.5382929426731695,
"learning_rate": 7.113821138211382e-07,
"loss": 0.1562,
"mean_token_accuracy": 0.9496374428272247,
"num_tokens": 142041227.0,
"step": 952
},
{
"epoch": 0.6458827516096238,
"grad_norm": 0.4590965631424657,
"learning_rate": 7.100271002710026e-07,
"loss": 0.1557,
"mean_token_accuracy": 0.949472963809967,
"num_tokens": 142189286.0,
"step": 953
},
{
"epoch": 0.6465604879701796,
"grad_norm": 0.9315953451855179,
"learning_rate": 7.086720867208672e-07,
"loss": 0.1675,
"mean_token_accuracy": 0.9469330534338951,
"num_tokens": 142335353.0,
"step": 954
},
{
"epoch": 0.6472382243307353,
"grad_norm": 0.49292357092040273,
"learning_rate": 7.073170731707316e-07,
"loss": 0.1599,
"mean_token_accuracy": 0.9490496292710304,
"num_tokens": 142487994.0,
"step": 955
},
{
"epoch": 0.6479159606912911,
"grad_norm": 0.6003853214589224,
"learning_rate": 7.059620596205962e-07,
"loss": 0.1739,
"mean_token_accuracy": 0.94477578997612,
"num_tokens": 142641052.0,
"step": 956
},
{
"epoch": 0.6485936970518469,
"grad_norm": 0.43738049928150774,
"learning_rate": 7.046070460704606e-07,
"loss": 0.1597,
"mean_token_accuracy": 0.948681466281414,
"num_tokens": 142785857.0,
"step": 957
},
{
"epoch": 0.6492714334124026,
"grad_norm": 0.5945071762572889,
"learning_rate": 7.032520325203252e-07,
"loss": 0.1637,
"mean_token_accuracy": 0.947319395840168,
"num_tokens": 142934275.0,
"step": 958
},
{
"epoch": 0.6499491697729584,
"grad_norm": 0.39907508200233105,
"learning_rate": 7.018970189701897e-07,
"loss": 0.1717,
"mean_token_accuracy": 0.944616362452507,
"num_tokens": 143085778.0,
"step": 959
},
{
"epoch": 0.650626906133514,
"grad_norm": 0.43112091341931374,
"learning_rate": 7.005420054200541e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9482121244072914,
"num_tokens": 143237406.0,
"step": 960
},
{
"epoch": 0.6513046424940698,
"grad_norm": 0.37422520350770055,
"learning_rate": 6.991869918699187e-07,
"loss": 0.1534,
"mean_token_accuracy": 0.9510421678423882,
"num_tokens": 143388807.0,
"step": 961
},
{
"epoch": 0.6519823788546255,
"grad_norm": 0.5373532478650356,
"learning_rate": 6.978319783197832e-07,
"loss": 0.1584,
"mean_token_accuracy": 0.9494762420654297,
"num_tokens": 143538958.0,
"step": 962
},
{
"epoch": 0.6526601152151813,
"grad_norm": 0.5160251596916773,
"learning_rate": 6.964769647696477e-07,
"loss": 0.1713,
"mean_token_accuracy": 0.9450407698750496,
"num_tokens": 143689416.0,
"step": 963
},
{
"epoch": 0.653337851575737,
"grad_norm": 0.43329708914509557,
"learning_rate": 6.951219512195121e-07,
"loss": 0.162,
"mean_token_accuracy": 0.9481780678033829,
"num_tokens": 143839600.0,
"step": 964
},
{
"epoch": 0.6540155879362928,
"grad_norm": 0.7329749772251899,
"learning_rate": 6.937669376693766e-07,
"loss": 0.1697,
"mean_token_accuracy": 0.9457468092441559,
"num_tokens": 143990179.0,
"step": 965
},
{
"epoch": 0.6546933242968486,
"grad_norm": 0.7453207951547401,
"learning_rate": 6.924119241192413e-07,
"loss": 0.159,
"mean_token_accuracy": 0.9484611824154854,
"num_tokens": 144142458.0,
"step": 966
},
{
"epoch": 0.6553710606574042,
"grad_norm": 0.42955950036507906,
"learning_rate": 6.910569105691057e-07,
"loss": 0.1614,
"mean_token_accuracy": 0.9487689658999443,
"num_tokens": 144293182.0,
"step": 967
},
{
"epoch": 0.65604879701796,
"grad_norm": 0.46246879113498524,
"learning_rate": 6.897018970189702e-07,
"loss": 0.1673,
"mean_token_accuracy": 0.9465522468090057,
"num_tokens": 144441849.0,
"step": 968
},
{
"epoch": 0.6567265333785157,
"grad_norm": 0.4728480565761358,
"learning_rate": 6.883468834688346e-07,
"loss": 0.1601,
"mean_token_accuracy": 0.9483184814453125,
"num_tokens": 144594086.0,
"step": 969
},
{
"epoch": 0.6574042697390715,
"grad_norm": 0.5700572199360658,
"learning_rate": 6.869918699186991e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.948683463037014,
"num_tokens": 144743175.0,
"step": 970
},
{
"epoch": 0.6580820060996272,
"grad_norm": 0.4478620978369501,
"learning_rate": 6.856368563685636e-07,
"loss": 0.1657,
"mean_token_accuracy": 0.9477500319480896,
"num_tokens": 144890797.0,
"step": 971
},
{
"epoch": 0.658759742460183,
"grad_norm": 0.4471516180312603,
"learning_rate": 6.842818428184282e-07,
"loss": 0.161,
"mean_token_accuracy": 0.9481164589524269,
"num_tokens": 145039973.0,
"step": 972
},
{
"epoch": 0.6594374788207388,
"grad_norm": 0.43607758337967034,
"learning_rate": 6.829268292682927e-07,
"loss": 0.161,
"mean_token_accuracy": 0.9476160705089569,
"num_tokens": 145186940.0,
"step": 973
},
{
"epoch": 0.6601152151812945,
"grad_norm": 0.6017629461734204,
"learning_rate": 6.815718157181571e-07,
"loss": 0.1717,
"mean_token_accuracy": 0.945083349943161,
"num_tokens": 145337724.0,
"step": 974
},
{
"epoch": 0.6607929515418502,
"grad_norm": 0.5675225086716628,
"learning_rate": 6.802168021680217e-07,
"loss": 0.1585,
"mean_token_accuracy": 0.9487828463315964,
"num_tokens": 145488495.0,
"step": 975
},
{
"epoch": 0.6614706879024059,
"grad_norm": 0.7115168554273109,
"learning_rate": 6.788617886178861e-07,
"loss": 0.1625,
"mean_token_accuracy": 0.9486016631126404,
"num_tokens": 145639262.0,
"step": 976
},
{
"epoch": 0.6621484242629617,
"grad_norm": 0.41618104203918604,
"learning_rate": 6.775067750677507e-07,
"loss": 0.1564,
"mean_token_accuracy": 0.9503029733896255,
"num_tokens": 145786917.0,
"step": 977
},
{
"epoch": 0.6628261606235174,
"grad_norm": 0.539733540745076,
"learning_rate": 6.761517615176151e-07,
"loss": 0.166,
"mean_token_accuracy": 0.9473312497138977,
"num_tokens": 145937276.0,
"step": 978
},
{
"epoch": 0.6635038969840732,
"grad_norm": 0.4949885821070727,
"learning_rate": 6.747967479674797e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.948186106979847,
"num_tokens": 146086934.0,
"step": 979
},
{
"epoch": 0.664181633344629,
"grad_norm": 0.43336406807299116,
"learning_rate": 6.734417344173442e-07,
"loss": 0.1688,
"mean_token_accuracy": 0.946333147585392,
"num_tokens": 146233812.0,
"step": 980
},
{
"epoch": 0.6648593697051847,
"grad_norm": 0.49588130939765285,
"learning_rate": 6.720867208672086e-07,
"loss": 0.1709,
"mean_token_accuracy": 0.9457742869853973,
"num_tokens": 146380514.0,
"step": 981
},
{
"epoch": 0.6655371060657405,
"grad_norm": 0.4596389273240423,
"learning_rate": 6.707317073170731e-07,
"loss": 0.1562,
"mean_token_accuracy": 0.9501648396253586,
"num_tokens": 146527322.0,
"step": 982
},
{
"epoch": 0.6662148424262961,
"grad_norm": 0.6372506840384964,
"learning_rate": 6.693766937669377e-07,
"loss": 0.1697,
"mean_token_accuracy": 0.946533665060997,
"num_tokens": 146675600.0,
"step": 983
},
{
"epoch": 0.6668925787868519,
"grad_norm": 0.3954764119023803,
"learning_rate": 6.680216802168022e-07,
"loss": 0.1573,
"mean_token_accuracy": 0.9492572247982025,
"num_tokens": 146826945.0,
"step": 984
},
{
"epoch": 0.6675703151474076,
"grad_norm": 0.5041113695619949,
"learning_rate": 6.666666666666666e-07,
"loss": 0.1682,
"mean_token_accuracy": 0.946345716714859,
"num_tokens": 146977048.0,
"step": 985
},
{
"epoch": 0.6682480515079634,
"grad_norm": 0.42733097214955723,
"learning_rate": 6.653116531165311e-07,
"loss": 0.173,
"mean_token_accuracy": 0.9447240754961967,
"num_tokens": 147126835.0,
"step": 986
},
{
"epoch": 0.6689257878685192,
"grad_norm": 0.43474026258546133,
"learning_rate": 6.639566395663955e-07,
"loss": 0.1618,
"mean_token_accuracy": 0.9477177634835243,
"num_tokens": 147279051.0,
"step": 987
},
{
"epoch": 0.6696035242290749,
"grad_norm": 0.3979093996109721,
"learning_rate": 6.626016260162602e-07,
"loss": 0.1611,
"mean_token_accuracy": 0.9484685435891151,
"num_tokens": 147429614.0,
"step": 988
},
{
"epoch": 0.6702812605896307,
"grad_norm": 0.6075977010738575,
"learning_rate": 6.612466124661247e-07,
"loss": 0.1604,
"mean_token_accuracy": 0.9487278908491135,
"num_tokens": 147578177.0,
"step": 989
},
{
"epoch": 0.6709589969501863,
"grad_norm": 0.6626901297393074,
"learning_rate": 6.598915989159891e-07,
"loss": 0.1554,
"mean_token_accuracy": 0.9505260586738586,
"num_tokens": 147728883.0,
"step": 990
},
{
"epoch": 0.6716367333107421,
"grad_norm": 0.44245900135479793,
"learning_rate": 6.585365853658536e-07,
"loss": 0.1643,
"mean_token_accuracy": 0.9478070139884949,
"num_tokens": 147880083.0,
"step": 991
},
{
"epoch": 0.6723144696712978,
"grad_norm": 0.44930521995867917,
"learning_rate": 6.571815718157181e-07,
"loss": 0.1515,
"mean_token_accuracy": 0.9511049762368202,
"num_tokens": 148029594.0,
"step": 992
},
{
"epoch": 0.6729922060318536,
"grad_norm": 0.47838561589504064,
"learning_rate": 6.558265582655827e-07,
"loss": 0.1626,
"mean_token_accuracy": 0.9485039636492729,
"num_tokens": 148176040.0,
"step": 993
},
{
"epoch": 0.6736699423924094,
"grad_norm": 0.45346399169896656,
"learning_rate": 6.544715447154471e-07,
"loss": 0.1672,
"mean_token_accuracy": 0.9471649006009102,
"num_tokens": 148328243.0,
"step": 994
},
{
"epoch": 0.6743476787529651,
"grad_norm": 0.40105875616680847,
"learning_rate": 6.531165311653116e-07,
"loss": 0.1714,
"mean_token_accuracy": 0.9450387135148048,
"num_tokens": 148478098.0,
"step": 995
},
{
"epoch": 0.6750254151135209,
"grad_norm": 0.4691349395187444,
"learning_rate": 6.517615176151762e-07,
"loss": 0.1733,
"mean_token_accuracy": 0.9437250271439552,
"num_tokens": 148626030.0,
"step": 996
},
{
"epoch": 0.6757031514740766,
"grad_norm": 0.4670006204550999,
"learning_rate": 6.504065040650406e-07,
"loss": 0.1654,
"mean_token_accuracy": 0.9475633949041367,
"num_tokens": 148773461.0,
"step": 997
},
{
"epoch": 0.6763808878346323,
"grad_norm": 0.4226966853445537,
"learning_rate": 6.490514905149051e-07,
"loss": 0.1603,
"mean_token_accuracy": 0.947656437754631,
"num_tokens": 148921060.0,
"step": 998
},
{
"epoch": 0.677058624195188,
"grad_norm": 0.37248207883522355,
"learning_rate": 6.476964769647696e-07,
"loss": 0.1574,
"mean_token_accuracy": 0.9487950205802917,
"num_tokens": 149070708.0,
"step": 999
},
{
"epoch": 0.6777363605557438,
"grad_norm": 0.8744351429100297,
"learning_rate": 6.463414634146342e-07,
"loss": 0.1551,
"mean_token_accuracy": 0.9497079774737358,
"num_tokens": 149221242.0,
"step": 1000
},
{
"epoch": 0.6784140969162996,
"grad_norm": 0.5697798300327783,
"learning_rate": 6.449864498644986e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9479104578495026,
"num_tokens": 149373988.0,
"step": 1001
},
{
"epoch": 0.6790918332768553,
"grad_norm": 1.2588346663017063,
"learning_rate": 6.436314363143631e-07,
"loss": 0.1632,
"mean_token_accuracy": 0.9477178603410721,
"num_tokens": 149525452.0,
"step": 1002
},
{
"epoch": 0.6797695696374111,
"grad_norm": 0.5467253562349623,
"learning_rate": 6.422764227642276e-07,
"loss": 0.145,
"mean_token_accuracy": 0.952636294066906,
"num_tokens": 149677061.0,
"step": 1003
},
{
"epoch": 0.6804473059979668,
"grad_norm": 0.42502847488356554,
"learning_rate": 6.40921409214092e-07,
"loss": 0.1736,
"mean_token_accuracy": 0.9454517439007759,
"num_tokens": 149827751.0,
"step": 1004
},
{
"epoch": 0.6811250423585226,
"grad_norm": 0.524162218685377,
"learning_rate": 6.395663956639567e-07,
"loss": 0.1667,
"mean_token_accuracy": 0.9461118578910828,
"num_tokens": 149974862.0,
"step": 1005
},
{
"epoch": 0.6818027787190782,
"grad_norm": 0.4066395805542919,
"learning_rate": 6.382113821138211e-07,
"loss": 0.1672,
"mean_token_accuracy": 0.9463948607444763,
"num_tokens": 150124235.0,
"step": 1006
},
{
"epoch": 0.682480515079634,
"grad_norm": 0.39312368414912413,
"learning_rate": 6.368563685636856e-07,
"loss": 0.1597,
"mean_token_accuracy": 0.9479557573795319,
"num_tokens": 150269048.0,
"step": 1007
},
{
"epoch": 0.6831582514401897,
"grad_norm": 0.4971750767713653,
"learning_rate": 6.3550135501355e-07,
"loss": 0.1692,
"mean_token_accuracy": 0.9455204978585243,
"num_tokens": 150414464.0,
"step": 1008
},
{
"epoch": 0.6838359878007455,
"grad_norm": 0.523589931416947,
"learning_rate": 6.341463414634146e-07,
"loss": 0.1597,
"mean_token_accuracy": 0.9485000520944595,
"num_tokens": 150559944.0,
"step": 1009
},
{
"epoch": 0.6845137241613013,
"grad_norm": 0.42982723923499433,
"learning_rate": 6.327913279132791e-07,
"loss": 0.1639,
"mean_token_accuracy": 0.947691835463047,
"num_tokens": 150709472.0,
"step": 1010
},
{
"epoch": 0.685191460521857,
"grad_norm": 0.4800134554668242,
"learning_rate": 6.314363143631436e-07,
"loss": 0.1673,
"mean_token_accuracy": 0.9475793689489365,
"num_tokens": 150857897.0,
"step": 1011
},
{
"epoch": 0.6858691968824128,
"grad_norm": 0.4965764094852088,
"learning_rate": 6.300813008130081e-07,
"loss": 0.1723,
"mean_token_accuracy": 0.9453277885913849,
"num_tokens": 151009033.0,
"step": 1012
},
{
"epoch": 0.6865469332429684,
"grad_norm": 0.9911853882172641,
"learning_rate": 6.287262872628726e-07,
"loss": 0.1601,
"mean_token_accuracy": 0.9489800333976746,
"num_tokens": 151158813.0,
"step": 1013
},
{
"epoch": 0.6872246696035242,
"grad_norm": 0.4913798882103245,
"learning_rate": 6.273712737127371e-07,
"loss": 0.1586,
"mean_token_accuracy": 0.9487544074654579,
"num_tokens": 151306636.0,
"step": 1014
},
{
"epoch": 0.6879024059640799,
"grad_norm": 0.42742009951783383,
"learning_rate": 6.260162601626016e-07,
"loss": 0.1585,
"mean_token_accuracy": 0.9494217410683632,
"num_tokens": 151457560.0,
"step": 1015
},
{
"epoch": 0.6885801423246357,
"grad_norm": 0.434739576969294,
"learning_rate": 6.246612466124661e-07,
"loss": 0.167,
"mean_token_accuracy": 0.9475802183151245,
"num_tokens": 151604728.0,
"step": 1016
},
{
"epoch": 0.6892578786851915,
"grad_norm": 0.46381836616803435,
"learning_rate": 6.233062330623306e-07,
"loss": 0.173,
"mean_token_accuracy": 0.9461763128638268,
"num_tokens": 151754272.0,
"step": 1017
},
{
"epoch": 0.6899356150457472,
"grad_norm": 0.4280497647125328,
"learning_rate": 6.219512195121951e-07,
"loss": 0.1719,
"mean_token_accuracy": 0.9453966617584229,
"num_tokens": 151902351.0,
"step": 1018
},
{
"epoch": 0.690613351406303,
"grad_norm": 0.6707571359671862,
"learning_rate": 6.205962059620596e-07,
"loss": 0.165,
"mean_token_accuracy": 0.9466883912682533,
"num_tokens": 152052072.0,
"step": 1019
},
{
"epoch": 0.6912910877668587,
"grad_norm": 0.4956643167635072,
"learning_rate": 6.19241192411924e-07,
"loss": 0.161,
"mean_token_accuracy": 0.9491880983114243,
"num_tokens": 152204352.0,
"step": 1020
},
{
"epoch": 0.6919688241274145,
"grad_norm": 0.3824262140069849,
"learning_rate": 6.178861788617887e-07,
"loss": 0.1621,
"mean_token_accuracy": 0.9476993232965469,
"num_tokens": 152354692.0,
"step": 1021
},
{
"epoch": 0.6926465604879701,
"grad_norm": 0.47128833676415405,
"learning_rate": 6.165311653116531e-07,
"loss": 0.1678,
"mean_token_accuracy": 0.9471416920423508,
"num_tokens": 152505270.0,
"step": 1022
},
{
"epoch": 0.6933242968485259,
"grad_norm": 0.435155481364805,
"learning_rate": 6.151761517615176e-07,
"loss": 0.1609,
"mean_token_accuracy": 0.948992021381855,
"num_tokens": 152656768.0,
"step": 1023
},
{
"epoch": 0.6940020332090817,
"grad_norm": 0.48425829900282286,
"learning_rate": 6.13821138211382e-07,
"loss": 0.1688,
"mean_token_accuracy": 0.9452779963612556,
"num_tokens": 152798483.0,
"step": 1024
},
{
"epoch": 0.6946797695696374,
"grad_norm": 0.4958286938733244,
"learning_rate": 6.124661246612465e-07,
"loss": 0.1775,
"mean_token_accuracy": 0.9441835582256317,
"num_tokens": 152945144.0,
"step": 1025
},
{
"epoch": 0.6953575059301932,
"grad_norm": 0.46234975698526665,
"learning_rate": 6.111111111111112e-07,
"loss": 0.1738,
"mean_token_accuracy": 0.9445149600505829,
"num_tokens": 153097031.0,
"step": 1026
},
{
"epoch": 0.6960352422907489,
"grad_norm": 1.057014143853164,
"learning_rate": 6.097560975609756e-07,
"loss": 0.1646,
"mean_token_accuracy": 0.9487510249018669,
"num_tokens": 153245418.0,
"step": 1027
},
{
"epoch": 0.6967129786513047,
"grad_norm": 0.4154254841226032,
"learning_rate": 6.084010840108401e-07,
"loss": 0.1554,
"mean_token_accuracy": 0.9500058144330978,
"num_tokens": 153393588.0,
"step": 1028
},
{
"epoch": 0.6973907150118603,
"grad_norm": 0.494308254553982,
"learning_rate": 6.070460704607045e-07,
"loss": 0.1588,
"mean_token_accuracy": 0.9491009786725044,
"num_tokens": 153544720.0,
"step": 1029
},
{
"epoch": 0.6980684513724161,
"grad_norm": 0.6031422994604925,
"learning_rate": 6.056910569105691e-07,
"loss": 0.1771,
"mean_token_accuracy": 0.9448559284210205,
"num_tokens": 153693950.0,
"step": 1030
},
{
"epoch": 0.6987461877329719,
"grad_norm": 0.41629799805370477,
"learning_rate": 6.043360433604336e-07,
"loss": 0.1605,
"mean_token_accuracy": 0.9477561861276627,
"num_tokens": 153840032.0,
"step": 1031
},
{
"epoch": 0.6994239240935276,
"grad_norm": 0.6530039841126879,
"learning_rate": 6.029810298102981e-07,
"loss": 0.165,
"mean_token_accuracy": 0.9468976333737373,
"num_tokens": 153987990.0,
"step": 1032
},
{
"epoch": 0.7001016604540834,
"grad_norm": 0.4861004204726015,
"learning_rate": 6.016260162601626e-07,
"loss": 0.1634,
"mean_token_accuracy": 0.9479655027389526,
"num_tokens": 154141733.0,
"step": 1033
},
{
"epoch": 0.7007793968146391,
"grad_norm": 0.414327671266201,
"learning_rate": 6.002710027100271e-07,
"loss": 0.1618,
"mean_token_accuracy": 0.9477419853210449,
"num_tokens": 154294962.0,
"step": 1034
},
{
"epoch": 0.7014571331751949,
"grad_norm": 0.583808333818519,
"learning_rate": 5.989159891598916e-07,
"loss": 0.1626,
"mean_token_accuracy": 0.9477186799049377,
"num_tokens": 154444151.0,
"step": 1035
},
{
"epoch": 0.7021348695357505,
"grad_norm": 0.4599533579612761,
"learning_rate": 5.97560975609756e-07,
"loss": 0.1617,
"mean_token_accuracy": 0.9488044008612633,
"num_tokens": 154596863.0,
"step": 1036
},
{
"epoch": 0.7028126058963063,
"grad_norm": 0.7288151095568319,
"learning_rate": 5.962059620596206e-07,
"loss": 0.1647,
"mean_token_accuracy": 0.9469533413648605,
"num_tokens": 154743904.0,
"step": 1037
},
{
"epoch": 0.7034903422568621,
"grad_norm": 0.40988753181716203,
"learning_rate": 5.94850948509485e-07,
"loss": 0.1626,
"mean_token_accuracy": 0.9466472268104553,
"num_tokens": 154891324.0,
"step": 1038
},
{
"epoch": 0.7041680786174178,
"grad_norm": 0.4404920504893847,
"learning_rate": 5.934959349593496e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9487411081790924,
"num_tokens": 155042210.0,
"step": 1039
},
{
"epoch": 0.7048458149779736,
"grad_norm": 0.5766288185187523,
"learning_rate": 5.92140921409214e-07,
"loss": 0.1622,
"mean_token_accuracy": 0.9482933580875397,
"num_tokens": 155189432.0,
"step": 1040
},
{
"epoch": 0.7055235513385293,
"grad_norm": 0.412030134764044,
"learning_rate": 5.907859078590785e-07,
"loss": 0.1702,
"mean_token_accuracy": 0.9456660151481628,
"num_tokens": 155341723.0,
"step": 1041
},
{
"epoch": 0.7062012876990851,
"grad_norm": 0.44728187011697396,
"learning_rate": 5.894308943089431e-07,
"loss": 0.1711,
"mean_token_accuracy": 0.9455131962895393,
"num_tokens": 155489554.0,
"step": 1042
},
{
"epoch": 0.7068790240596408,
"grad_norm": 0.5074896720012357,
"learning_rate": 5.880758807588076e-07,
"loss": 0.1558,
"mean_token_accuracy": 0.9492698237299919,
"num_tokens": 155635822.0,
"step": 1043
},
{
"epoch": 0.7075567604201966,
"grad_norm": 0.4326441243027523,
"learning_rate": 5.867208672086721e-07,
"loss": 0.1572,
"mean_token_accuracy": 0.9493276998400688,
"num_tokens": 155780829.0,
"step": 1044
},
{
"epoch": 0.7082344967807523,
"grad_norm": 0.40335399154054274,
"learning_rate": 5.853658536585365e-07,
"loss": 0.1581,
"mean_token_accuracy": 0.9495497569441795,
"num_tokens": 155930299.0,
"step": 1045
},
{
"epoch": 0.708912233141308,
"grad_norm": 0.446254509446425,
"learning_rate": 5.84010840108401e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9480220675468445,
"num_tokens": 156079491.0,
"step": 1046
},
{
"epoch": 0.7095899695018638,
"grad_norm": 0.4839320678771503,
"learning_rate": 5.826558265582655e-07,
"loss": 0.1577,
"mean_token_accuracy": 0.9495791122317314,
"num_tokens": 156231042.0,
"step": 1047
},
{
"epoch": 0.7102677058624195,
"grad_norm": 0.39007295050050605,
"learning_rate": 5.813008130081301e-07,
"loss": 0.1588,
"mean_token_accuracy": 0.9492901787161827,
"num_tokens": 156381147.0,
"step": 1048
},
{
"epoch": 0.7109454422229753,
"grad_norm": 0.4476484023763023,
"learning_rate": 5.799457994579946e-07,
"loss": 0.1677,
"mean_token_accuracy": 0.9462874010205269,
"num_tokens": 156527359.0,
"step": 1049
},
{
"epoch": 0.711623178583531,
"grad_norm": 0.36657558143799696,
"learning_rate": 5.78590785907859e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.9479619190096855,
"num_tokens": 156675494.0,
"step": 1050
},
{
"epoch": 0.7123009149440868,
"grad_norm": 0.3907366683635014,
"learning_rate": 5.772357723577236e-07,
"loss": 0.168,
"mean_token_accuracy": 0.9459411054849625,
"num_tokens": 156824525.0,
"step": 1051
},
{
"epoch": 0.7129786513046424,
"grad_norm": 0.5010217700696592,
"learning_rate": 5.75880758807588e-07,
"loss": 0.168,
"mean_token_accuracy": 0.9472585767507553,
"num_tokens": 156972539.0,
"step": 1052
},
{
"epoch": 0.7136563876651982,
"grad_norm": 0.4552239071847725,
"learning_rate": 5.745257452574526e-07,
"loss": 0.1571,
"mean_token_accuracy": 0.9496041089296341,
"num_tokens": 157126178.0,
"step": 1053
},
{
"epoch": 0.714334124025754,
"grad_norm": 0.5803431503580072,
"learning_rate": 5.73170731707317e-07,
"loss": 0.1667,
"mean_token_accuracy": 0.9461987987160683,
"num_tokens": 157271554.0,
"step": 1054
},
{
"epoch": 0.7150118603863097,
"grad_norm": 0.41125032613778734,
"learning_rate": 5.718157181571816e-07,
"loss": 0.1603,
"mean_token_accuracy": 0.948636420071125,
"num_tokens": 157420011.0,
"step": 1055
},
{
"epoch": 0.7156895967468655,
"grad_norm": 0.49381781540478925,
"learning_rate": 5.704607046070461e-07,
"loss": 0.1618,
"mean_token_accuracy": 0.9482694193720818,
"num_tokens": 157572976.0,
"step": 1056
},
{
"epoch": 0.7163673331074212,
"grad_norm": 0.4063972540938144,
"learning_rate": 5.691056910569105e-07,
"loss": 0.1565,
"mean_token_accuracy": 0.9495553001761436,
"num_tokens": 157726207.0,
"step": 1057
},
{
"epoch": 0.717045069467977,
"grad_norm": 0.47874259116197987,
"learning_rate": 5.677506775067751e-07,
"loss": 0.1611,
"mean_token_accuracy": 0.9484956189990044,
"num_tokens": 157876946.0,
"step": 1058
},
{
"epoch": 0.7177228058285327,
"grad_norm": 0.48396877389491794,
"learning_rate": 5.663956639566395e-07,
"loss": 0.1574,
"mean_token_accuracy": 0.9495202451944351,
"num_tokens": 158026932.0,
"step": 1059
},
{
"epoch": 0.7184005421890884,
"grad_norm": 0.391289028399714,
"learning_rate": 5.650406504065041e-07,
"loss": 0.1626,
"mean_token_accuracy": 0.9484245777130127,
"num_tokens": 158174397.0,
"step": 1060
},
{
"epoch": 0.7190782785496442,
"grad_norm": 0.4424697534472181,
"learning_rate": 5.636856368563685e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.9496257454156876,
"num_tokens": 158326039.0,
"step": 1061
},
{
"epoch": 0.7197560149101999,
"grad_norm": 0.5089271154160393,
"learning_rate": 5.62330623306233e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9480353966355324,
"num_tokens": 158469566.0,
"step": 1062
},
{
"epoch": 0.7204337512707557,
"grad_norm": 0.4985784920100805,
"learning_rate": 5.609756097560975e-07,
"loss": 0.1613,
"mean_token_accuracy": 0.9490131065249443,
"num_tokens": 158619431.0,
"step": 1063
},
{
"epoch": 0.7211114876313114,
"grad_norm": 0.44218019553298804,
"learning_rate": 5.596205962059621e-07,
"loss": 0.1615,
"mean_token_accuracy": 0.9480287060141563,
"num_tokens": 158771302.0,
"step": 1064
},
{
"epoch": 0.7217892239918672,
"grad_norm": 0.6204884275566048,
"learning_rate": 5.582655826558266e-07,
"loss": 0.1507,
"mean_token_accuracy": 0.9509203135967255,
"num_tokens": 158916649.0,
"step": 1065
},
{
"epoch": 0.7224669603524229,
"grad_norm": 0.609368393923901,
"learning_rate": 5.56910569105691e-07,
"loss": 0.1632,
"mean_token_accuracy": 0.9473355263471603,
"num_tokens": 159066742.0,
"step": 1066
},
{
"epoch": 0.7231446967129787,
"grad_norm": 0.49336244834060305,
"learning_rate": 5.555555555555555e-07,
"loss": 0.1519,
"mean_token_accuracy": 0.950840562582016,
"num_tokens": 159216948.0,
"step": 1067
},
{
"epoch": 0.7238224330735344,
"grad_norm": 0.4084779841187189,
"learning_rate": 5.5420054200542e-07,
"loss": 0.1574,
"mean_token_accuracy": 0.949260413646698,
"num_tokens": 159367626.0,
"step": 1068
},
{
"epoch": 0.7245001694340901,
"grad_norm": 0.4875070606551302,
"learning_rate": 5.528455284552846e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.9500613510608673,
"num_tokens": 159518997.0,
"step": 1069
},
{
"epoch": 0.7251779057946459,
"grad_norm": 0.621033279364186,
"learning_rate": 5.51490514905149e-07,
"loss": 0.1621,
"mean_token_accuracy": 0.947950966656208,
"num_tokens": 159667046.0,
"step": 1070
},
{
"epoch": 0.7258556421552016,
"grad_norm": 0.5091231347925989,
"learning_rate": 5.501355013550135e-07,
"loss": 0.1597,
"mean_token_accuracy": 0.9486712962388992,
"num_tokens": 159818866.0,
"step": 1071
},
{
"epoch": 0.7265333785157574,
"grad_norm": 0.42381328311317457,
"learning_rate": 5.487804878048781e-07,
"loss": 0.1577,
"mean_token_accuracy": 0.9498942792415619,
"num_tokens": 159969008.0,
"step": 1072
},
{
"epoch": 0.7272111148763131,
"grad_norm": 0.4825256924296279,
"learning_rate": 5.474254742547425e-07,
"loss": 0.1586,
"mean_token_accuracy": 0.9494803622364998,
"num_tokens": 160110210.0,
"step": 1073
},
{
"epoch": 0.7278888512368689,
"grad_norm": 0.4265890277238117,
"learning_rate": 5.46070460704607e-07,
"loss": 0.1579,
"mean_token_accuracy": 0.9486462771892548,
"num_tokens": 160257767.0,
"step": 1074
},
{
"epoch": 0.7285665875974247,
"grad_norm": 0.47929271348046776,
"learning_rate": 5.447154471544715e-07,
"loss": 0.1517,
"mean_token_accuracy": 0.9511135593056679,
"num_tokens": 160403786.0,
"step": 1075
},
{
"epoch": 0.7292443239579803,
"grad_norm": 0.45652629692790414,
"learning_rate": 5.43360433604336e-07,
"loss": 0.1572,
"mean_token_accuracy": 0.9493985250592232,
"num_tokens": 160555117.0,
"step": 1076
},
{
"epoch": 0.7299220603185361,
"grad_norm": 1.9342607817523356,
"learning_rate": 5.420054200542005e-07,
"loss": 0.1549,
"mean_token_accuracy": 0.9498350769281387,
"num_tokens": 160702467.0,
"step": 1077
},
{
"epoch": 0.7305997966790918,
"grad_norm": 0.4618242595459399,
"learning_rate": 5.40650406504065e-07,
"loss": 0.1714,
"mean_token_accuracy": 0.9453785419464111,
"num_tokens": 160852318.0,
"step": 1078
},
{
"epoch": 0.7312775330396476,
"grad_norm": 0.9002974906877117,
"learning_rate": 5.392953929539295e-07,
"loss": 0.1587,
"mean_token_accuracy": 0.9489387944340706,
"num_tokens": 161005141.0,
"step": 1079
},
{
"epoch": 0.7319552694002033,
"grad_norm": 0.4539144950148489,
"learning_rate": 5.37940379403794e-07,
"loss": 0.1609,
"mean_token_accuracy": 0.9478288516402245,
"num_tokens": 161155555.0,
"step": 1080
},
{
"epoch": 0.7326330057607591,
"grad_norm": 0.717476527687285,
"learning_rate": 5.365853658536586e-07,
"loss": 0.1654,
"mean_token_accuracy": 0.9471670761704445,
"num_tokens": 161307804.0,
"step": 1081
},
{
"epoch": 0.7333107421213149,
"grad_norm": 0.5286492849743774,
"learning_rate": 5.35230352303523e-07,
"loss": 0.16,
"mean_token_accuracy": 0.9486302956938744,
"num_tokens": 161453779.0,
"step": 1082
},
{
"epoch": 0.7339884784818705,
"grad_norm": 0.44520556756003754,
"learning_rate": 5.338753387533875e-07,
"loss": 0.1664,
"mean_token_accuracy": 0.947560265660286,
"num_tokens": 161602603.0,
"step": 1083
},
{
"epoch": 0.7346662148424263,
"grad_norm": 0.44339790141119606,
"learning_rate": 5.325203252032519e-07,
"loss": 0.1712,
"mean_token_accuracy": 0.9448318034410477,
"num_tokens": 161752988.0,
"step": 1084
},
{
"epoch": 0.735343951202982,
"grad_norm": 0.6641488603681329,
"learning_rate": 5.311653116531166e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9483387470245361,
"num_tokens": 161902180.0,
"step": 1085
},
{
"epoch": 0.7360216875635378,
"grad_norm": 0.4147540319296875,
"learning_rate": 5.298102981029811e-07,
"loss": 0.1681,
"mean_token_accuracy": 0.9463905096054077,
"num_tokens": 162052450.0,
"step": 1086
},
{
"epoch": 0.7366994239240935,
"grad_norm": 0.4571766444054861,
"learning_rate": 5.284552845528455e-07,
"loss": 0.1633,
"mean_token_accuracy": 0.9472680240869522,
"num_tokens": 162204506.0,
"step": 1087
},
{
"epoch": 0.7373771602846493,
"grad_norm": 0.5567352989309443,
"learning_rate": 5.2710027100271e-07,
"loss": 0.1652,
"mean_token_accuracy": 0.9466003924608231,
"num_tokens": 162356075.0,
"step": 1088
},
{
"epoch": 0.738054896645205,
"grad_norm": 0.4095996116976388,
"learning_rate": 5.257452574525745e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9489855617284775,
"num_tokens": 162505952.0,
"step": 1089
},
{
"epoch": 0.7387326330057608,
"grad_norm": 0.42251631468174644,
"learning_rate": 5.24390243902439e-07,
"loss": 0.172,
"mean_token_accuracy": 0.9460385143756866,
"num_tokens": 162655204.0,
"step": 1090
},
{
"epoch": 0.7394103693663165,
"grad_norm": 0.5142282362855055,
"learning_rate": 5.230352303523035e-07,
"loss": 0.1471,
"mean_token_accuracy": 0.9530572295188904,
"num_tokens": 162806577.0,
"step": 1091
},
{
"epoch": 0.7400881057268722,
"grad_norm": 0.47517812677105936,
"learning_rate": 5.21680216802168e-07,
"loss": 0.1589,
"mean_token_accuracy": 0.9492161795496941,
"num_tokens": 162953245.0,
"step": 1092
},
{
"epoch": 0.740765842087428,
"grad_norm": 0.47577509346551555,
"learning_rate": 5.203252032520325e-07,
"loss": 0.1659,
"mean_token_accuracy": 0.947672463953495,
"num_tokens": 163103640.0,
"step": 1093
},
{
"epoch": 0.7414435784479837,
"grad_norm": 0.4931157254060953,
"learning_rate": 5.18970189701897e-07,
"loss": 0.1614,
"mean_token_accuracy": 0.9485186189413071,
"num_tokens": 163251527.0,
"step": 1094
},
{
"epoch": 0.7421213148085395,
"grad_norm": 0.46813396306553173,
"learning_rate": 5.176151761517615e-07,
"loss": 0.1525,
"mean_token_accuracy": 0.9509926736354828,
"num_tokens": 163401281.0,
"step": 1095
},
{
"epoch": 0.7427990511690952,
"grad_norm": 0.44488095188988325,
"learning_rate": 5.16260162601626e-07,
"loss": 0.1684,
"mean_token_accuracy": 0.9467815607786179,
"num_tokens": 163548949.0,
"step": 1096
},
{
"epoch": 0.743476787529651,
"grad_norm": 0.37545263145077906,
"learning_rate": 5.149051490514905e-07,
"loss": 0.1532,
"mean_token_accuracy": 0.9503208845853806,
"num_tokens": 163696654.0,
"step": 1097
},
{
"epoch": 0.7441545238902068,
"grad_norm": 0.451614998294825,
"learning_rate": 5.13550135501355e-07,
"loss": 0.165,
"mean_token_accuracy": 0.9479142129421234,
"num_tokens": 163844051.0,
"step": 1098
},
{
"epoch": 0.7448322602507624,
"grad_norm": 0.3906206502589162,
"learning_rate": 5.121951219512195e-07,
"loss": 0.1531,
"mean_token_accuracy": 0.9506673514842987,
"num_tokens": 163997319.0,
"step": 1099
},
{
"epoch": 0.7455099966113182,
"grad_norm": 0.39475390707276214,
"learning_rate": 5.108401084010839e-07,
"loss": 0.1599,
"mean_token_accuracy": 0.9487632811069489,
"num_tokens": 164149806.0,
"step": 1100
},
{
"epoch": 0.7461877329718739,
"grad_norm": 0.45195993303656146,
"learning_rate": 5.094850948509484e-07,
"loss": 0.1701,
"mean_token_accuracy": 0.9452178552746773,
"num_tokens": 164300498.0,
"step": 1101
},
{
"epoch": 0.7468654693324297,
"grad_norm": 0.9283345187428643,
"learning_rate": 5.081300813008131e-07,
"loss": 0.1617,
"mean_token_accuracy": 0.9482429027557373,
"num_tokens": 164453163.0,
"step": 1102
},
{
"epoch": 0.7475432056929854,
"grad_norm": 0.4160790574433991,
"learning_rate": 5.067750677506775e-07,
"loss": 0.1632,
"mean_token_accuracy": 0.9479247257113457,
"num_tokens": 164603381.0,
"step": 1103
},
{
"epoch": 0.7482209420535412,
"grad_norm": 0.5972610753011809,
"learning_rate": 5.05420054200542e-07,
"loss": 0.1595,
"mean_token_accuracy": 0.9486266598105431,
"num_tokens": 164753482.0,
"step": 1104
},
{
"epoch": 0.748898678414097,
"grad_norm": 0.50765690623194,
"learning_rate": 5.040650406504064e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9477174207568169,
"num_tokens": 164901803.0,
"step": 1105
},
{
"epoch": 0.7495764147746526,
"grad_norm": 0.6244395867041137,
"learning_rate": 5.02710027100271e-07,
"loss": 0.1709,
"mean_token_accuracy": 0.945775531232357,
"num_tokens": 165051724.0,
"step": 1106
},
{
"epoch": 0.7502541511352084,
"grad_norm": 0.4230661986575455,
"learning_rate": 5.013550135501355e-07,
"loss": 0.159,
"mean_token_accuracy": 0.9486871957778931,
"num_tokens": 165201307.0,
"step": 1107
},
{
"epoch": 0.7509318874957641,
"grad_norm": 0.4138709790482835,
"learning_rate": 5e-07,
"loss": 0.1759,
"mean_token_accuracy": 0.944426141679287,
"num_tokens": 165348364.0,
"step": 1108
},
{
"epoch": 0.7516096238563199,
"grad_norm": 0.6258168441746877,
"learning_rate": 4.986449864498645e-07,
"loss": 0.1593,
"mean_token_accuracy": 0.9489279463887215,
"num_tokens": 165498683.0,
"step": 1109
},
{
"epoch": 0.7522873602168756,
"grad_norm": 0.44625048459307576,
"learning_rate": 4.972899728997289e-07,
"loss": 0.163,
"mean_token_accuracy": 0.9476623311638832,
"num_tokens": 165649766.0,
"step": 1110
},
{
"epoch": 0.7529650965774314,
"grad_norm": 0.4055254667064464,
"learning_rate": 4.959349593495934e-07,
"loss": 0.1573,
"mean_token_accuracy": 0.9490974396467209,
"num_tokens": 165799500.0,
"step": 1111
},
{
"epoch": 0.7536428329379872,
"grad_norm": 0.5897618277261697,
"learning_rate": 4.94579945799458e-07,
"loss": 0.162,
"mean_token_accuracy": 0.9483003690838814,
"num_tokens": 165943803.0,
"step": 1112
},
{
"epoch": 0.7543205692985429,
"grad_norm": 0.4931572511555608,
"learning_rate": 4.932249322493225e-07,
"loss": 0.1565,
"mean_token_accuracy": 0.9494629874825478,
"num_tokens": 166093921.0,
"step": 1113
},
{
"epoch": 0.7549983056590986,
"grad_norm": 0.44369073250658586,
"learning_rate": 4.91869918699187e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9481725171208382,
"num_tokens": 166240636.0,
"step": 1114
},
{
"epoch": 0.7556760420196543,
"grad_norm": 0.3982843037368229,
"learning_rate": 4.905149051490515e-07,
"loss": 0.1646,
"mean_token_accuracy": 0.9479377120733261,
"num_tokens": 166388546.0,
"step": 1115
},
{
"epoch": 0.7563537783802101,
"grad_norm": 0.48976846833393406,
"learning_rate": 4.891598915989159e-07,
"loss": 0.1518,
"mean_token_accuracy": 0.9502234533429146,
"num_tokens": 166538021.0,
"step": 1116
},
{
"epoch": 0.7570315147407658,
"grad_norm": 0.4933497477703194,
"learning_rate": 4.878048780487804e-07,
"loss": 0.1777,
"mean_token_accuracy": 0.944856159389019,
"num_tokens": 166688000.0,
"step": 1117
},
{
"epoch": 0.7577092511013216,
"grad_norm": 0.859665555701841,
"learning_rate": 4.86449864498645e-07,
"loss": 0.1561,
"mean_token_accuracy": 0.949802003800869,
"num_tokens": 166839080.0,
"step": 1118
},
{
"epoch": 0.7583869874618774,
"grad_norm": 0.48352933866150305,
"learning_rate": 4.850948509485095e-07,
"loss": 0.1779,
"mean_token_accuracy": 0.9442877918481827,
"num_tokens": 166991404.0,
"step": 1119
},
{
"epoch": 0.7590647238224331,
"grad_norm": 0.5251387210642391,
"learning_rate": 4.83739837398374e-07,
"loss": 0.1638,
"mean_token_accuracy": 0.9475614503026009,
"num_tokens": 167136411.0,
"step": 1120
},
{
"epoch": 0.7597424601829889,
"grad_norm": 0.43023629072445974,
"learning_rate": 4.823848238482385e-07,
"loss": 0.1634,
"mean_token_accuracy": 0.9484768286347389,
"num_tokens": 167285781.0,
"step": 1121
},
{
"epoch": 0.7604201965435445,
"grad_norm": 0.3962967805842696,
"learning_rate": 4.810298102981029e-07,
"loss": 0.1687,
"mean_token_accuracy": 0.9462398812174797,
"num_tokens": 167435299.0,
"step": 1122
},
{
"epoch": 0.7610979329041003,
"grad_norm": 0.5046873268117023,
"learning_rate": 4.796747967479675e-07,
"loss": 0.1693,
"mean_token_accuracy": 0.9459140375256538,
"num_tokens": 167580175.0,
"step": 1123
},
{
"epoch": 0.761775669264656,
"grad_norm": 0.44880163261180067,
"learning_rate": 4.78319783197832e-07,
"loss": 0.1626,
"mean_token_accuracy": 0.9481295719742775,
"num_tokens": 167731384.0,
"step": 1124
},
{
"epoch": 0.7624534056252118,
"grad_norm": 0.476587035527155,
"learning_rate": 4.769647696476965e-07,
"loss": 0.1602,
"mean_token_accuracy": 0.9479389265179634,
"num_tokens": 167878163.0,
"step": 1125
},
{
"epoch": 0.7631311419857676,
"grad_norm": 0.4058674637764776,
"learning_rate": 4.756097560975609e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9488006085157394,
"num_tokens": 168024384.0,
"step": 1126
},
{
"epoch": 0.7638088783463233,
"grad_norm": 0.4528273730595995,
"learning_rate": 4.742547425474255e-07,
"loss": 0.1671,
"mean_token_accuracy": 0.9465280771255493,
"num_tokens": 168174116.0,
"step": 1127
},
{
"epoch": 0.7644866147068791,
"grad_norm": 0.43105944053847295,
"learning_rate": 4.7289972899728995e-07,
"loss": 0.1631,
"mean_token_accuracy": 0.9475222527980804,
"num_tokens": 168322628.0,
"step": 1128
},
{
"epoch": 0.7651643510674347,
"grad_norm": 0.6121472847360002,
"learning_rate": 4.7154471544715447e-07,
"loss": 0.1803,
"mean_token_accuracy": 0.9420292302966118,
"num_tokens": 168472390.0,
"step": 1129
},
{
"epoch": 0.7658420874279905,
"grad_norm": 0.4119363405573088,
"learning_rate": 4.7018970189701893e-07,
"loss": 0.1617,
"mean_token_accuracy": 0.9473756477236748,
"num_tokens": 168625150.0,
"step": 1130
},
{
"epoch": 0.7665198237885462,
"grad_norm": 0.6418528742178927,
"learning_rate": 4.6883468834688345e-07,
"loss": 0.1539,
"mean_token_accuracy": 0.9500811100006104,
"num_tokens": 168772427.0,
"step": 1131
},
{
"epoch": 0.767197560149102,
"grad_norm": 0.6202551640812881,
"learning_rate": 4.674796747967479e-07,
"loss": 0.1669,
"mean_token_accuracy": 0.9462482109665871,
"num_tokens": 168917877.0,
"step": 1132
},
{
"epoch": 0.7678752965096577,
"grad_norm": 0.4660477467600856,
"learning_rate": 4.6612466124661244e-07,
"loss": 0.1721,
"mean_token_accuracy": 0.9449072405695915,
"num_tokens": 169065824.0,
"step": 1133
},
{
"epoch": 0.7685530328702135,
"grad_norm": 0.4556578992177878,
"learning_rate": 4.647696476964769e-07,
"loss": 0.163,
"mean_token_accuracy": 0.9484978765249252,
"num_tokens": 169216911.0,
"step": 1134
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.40157746071753253,
"learning_rate": 4.634146341463415e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.9495006650686264,
"num_tokens": 169364869.0,
"step": 1135
},
{
"epoch": 0.769908505591325,
"grad_norm": 0.7454765323943013,
"learning_rate": 4.6205962059620595e-07,
"loss": 0.1732,
"mean_token_accuracy": 0.9446614757180214,
"num_tokens": 169513768.0,
"step": 1136
},
{
"epoch": 0.7705862419518807,
"grad_norm": 0.45367573148015927,
"learning_rate": 4.6070460704607046e-07,
"loss": 0.1472,
"mean_token_accuracy": 0.9525748193264008,
"num_tokens": 169662682.0,
"step": 1137
},
{
"epoch": 0.7712639783124364,
"grad_norm": 0.46148330013702105,
"learning_rate": 4.5934959349593493e-07,
"loss": 0.1691,
"mean_token_accuracy": 0.9462109357118607,
"num_tokens": 169812200.0,
"step": 1138
},
{
"epoch": 0.7719417146729922,
"grad_norm": 0.49333201810275873,
"learning_rate": 4.5799457994579945e-07,
"loss": 0.1408,
"mean_token_accuracy": 0.9537433162331581,
"num_tokens": 169963321.0,
"step": 1139
},
{
"epoch": 0.7726194510335479,
"grad_norm": 0.47938082073994764,
"learning_rate": 4.566395663956639e-07,
"loss": 0.1692,
"mean_token_accuracy": 0.9459724575281143,
"num_tokens": 170111961.0,
"step": 1140
},
{
"epoch": 0.7732971873941037,
"grad_norm": 0.4136374831731232,
"learning_rate": 4.5528455284552844e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.9475755989551544,
"num_tokens": 170260622.0,
"step": 1141
},
{
"epoch": 0.7739749237546595,
"grad_norm": 0.4048615679339811,
"learning_rate": 4.5392953929539296e-07,
"loss": 0.1638,
"mean_token_accuracy": 0.947485126554966,
"num_tokens": 170407835.0,
"step": 1142
},
{
"epoch": 0.7746526601152152,
"grad_norm": 1.00471158613149,
"learning_rate": 4.525745257452575e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9490302726626396,
"num_tokens": 170562121.0,
"step": 1143
},
{
"epoch": 0.775330396475771,
"grad_norm": 0.4249072808237762,
"learning_rate": 4.5121951219512194e-07,
"loss": 0.1658,
"mean_token_accuracy": 0.9463138654828072,
"num_tokens": 170708352.0,
"step": 1144
},
{
"epoch": 0.7760081328363266,
"grad_norm": 0.6615241980486258,
"learning_rate": 4.498644986449864e-07,
"loss": 0.1672,
"mean_token_accuracy": 0.9470024704933167,
"num_tokens": 170859060.0,
"step": 1145
},
{
"epoch": 0.7766858691968824,
"grad_norm": 0.4079743575969547,
"learning_rate": 4.4850948509485093e-07,
"loss": 0.1683,
"mean_token_accuracy": 0.9460372775793076,
"num_tokens": 171006613.0,
"step": 1146
},
{
"epoch": 0.7773636055574381,
"grad_norm": 0.47673565387001,
"learning_rate": 4.471544715447154e-07,
"loss": 0.1612,
"mean_token_accuracy": 0.9476036727428436,
"num_tokens": 171152018.0,
"step": 1147
},
{
"epoch": 0.7780413419179939,
"grad_norm": 0.48209638608165467,
"learning_rate": 4.457994579945799e-07,
"loss": 0.1608,
"mean_token_accuracy": 0.9482689946889877,
"num_tokens": 171298623.0,
"step": 1148
},
{
"epoch": 0.7787190782785497,
"grad_norm": 0.4632142775384579,
"learning_rate": 4.444444444444444e-07,
"loss": 0.1692,
"mean_token_accuracy": 0.9462221264839172,
"num_tokens": 171449245.0,
"step": 1149
},
{
"epoch": 0.7793968146391054,
"grad_norm": 0.6531764939466912,
"learning_rate": 4.4308943089430896e-07,
"loss": 0.1563,
"mean_token_accuracy": 0.9493897035717964,
"num_tokens": 171600165.0,
"step": 1150
},
{
"epoch": 0.7800745509996612,
"grad_norm": 0.45547498077652343,
"learning_rate": 4.417344173441734e-07,
"loss": 0.1575,
"mean_token_accuracy": 0.9489210993051529,
"num_tokens": 171751080.0,
"step": 1151
},
{
"epoch": 0.7807522873602168,
"grad_norm": 0.484511345927537,
"learning_rate": 4.4037940379403794e-07,
"loss": 0.1706,
"mean_token_accuracy": 0.9453775733709335,
"num_tokens": 171901129.0,
"step": 1152
},
{
"epoch": 0.7814300237207726,
"grad_norm": 0.41706546335957123,
"learning_rate": 4.390243902439024e-07,
"loss": 0.1575,
"mean_token_accuracy": 0.9489161521196365,
"num_tokens": 172050783.0,
"step": 1153
},
{
"epoch": 0.7821077600813283,
"grad_norm": 0.4308068569401677,
"learning_rate": 4.3766937669376693e-07,
"loss": 0.1712,
"mean_token_accuracy": 0.9455830752849579,
"num_tokens": 172198177.0,
"step": 1154
},
{
"epoch": 0.7827854964418841,
"grad_norm": 0.604842624030898,
"learning_rate": 4.363143631436314e-07,
"loss": 0.1634,
"mean_token_accuracy": 0.9486095905303955,
"num_tokens": 172350203.0,
"step": 1155
},
{
"epoch": 0.7834632328024399,
"grad_norm": 0.3932324798137123,
"learning_rate": 4.349593495934959e-07,
"loss": 0.1663,
"mean_token_accuracy": 0.9465008825063705,
"num_tokens": 172500435.0,
"step": 1156
},
{
"epoch": 0.7841409691629956,
"grad_norm": 0.4495398266781922,
"learning_rate": 4.3360433604336043e-07,
"loss": 0.1651,
"mean_token_accuracy": 0.9467159286141396,
"num_tokens": 172650483.0,
"step": 1157
},
{
"epoch": 0.7848187055235514,
"grad_norm": 0.6357691155168428,
"learning_rate": 4.3224932249322495e-07,
"loss": 0.1612,
"mean_token_accuracy": 0.9480964988470078,
"num_tokens": 172795246.0,
"step": 1158
},
{
"epoch": 0.785496441884107,
"grad_norm": 0.4374942015220935,
"learning_rate": 4.308943089430894e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9480918347835541,
"num_tokens": 172942985.0,
"step": 1159
},
{
"epoch": 0.7861741782446628,
"grad_norm": 0.47143774102574604,
"learning_rate": 4.2953929539295394e-07,
"loss": 0.1546,
"mean_token_accuracy": 0.9506010115146637,
"num_tokens": 173092290.0,
"step": 1160
},
{
"epoch": 0.7868519146052185,
"grad_norm": 0.6559537232580721,
"learning_rate": 4.281842818428184e-07,
"loss": 0.1641,
"mean_token_accuracy": 0.9472885355353355,
"num_tokens": 173242462.0,
"step": 1161
},
{
"epoch": 0.7875296509657743,
"grad_norm": 0.42550464871987237,
"learning_rate": 4.268292682926829e-07,
"loss": 0.1609,
"mean_token_accuracy": 0.9479214176535606,
"num_tokens": 173391533.0,
"step": 1162
},
{
"epoch": 0.7882073873263301,
"grad_norm": 0.4178771851406128,
"learning_rate": 4.254742547425474e-07,
"loss": 0.1663,
"mean_token_accuracy": 0.9468031749129295,
"num_tokens": 173541939.0,
"step": 1163
},
{
"epoch": 0.7888851236868858,
"grad_norm": 3.0851874844884506,
"learning_rate": 4.2411924119241186e-07,
"loss": 0.1735,
"mean_token_accuracy": 0.9444401487708092,
"num_tokens": 173691382.0,
"step": 1164
},
{
"epoch": 0.7895628600474416,
"grad_norm": 0.541872071246644,
"learning_rate": 4.2276422764227643e-07,
"loss": 0.1668,
"mean_token_accuracy": 0.9471112638711929,
"num_tokens": 173839231.0,
"step": 1165
},
{
"epoch": 0.7902405964079973,
"grad_norm": 0.4587060057310284,
"learning_rate": 4.214092140921409e-07,
"loss": 0.1792,
"mean_token_accuracy": 0.9437511041760445,
"num_tokens": 173989261.0,
"step": 1166
},
{
"epoch": 0.7909183327685531,
"grad_norm": 0.4225958028880353,
"learning_rate": 4.200542005420054e-07,
"loss": 0.1727,
"mean_token_accuracy": 0.9445930123329163,
"num_tokens": 174138486.0,
"step": 1167
},
{
"epoch": 0.7915960691291087,
"grad_norm": 0.4800932551942435,
"learning_rate": 4.186991869918699e-07,
"loss": 0.1591,
"mean_token_accuracy": 0.9493310451507568,
"num_tokens": 174291947.0,
"step": 1168
},
{
"epoch": 0.7922738054896645,
"grad_norm": 0.5128346029772238,
"learning_rate": 4.173441734417344e-07,
"loss": 0.1571,
"mean_token_accuracy": 0.9485156983137131,
"num_tokens": 174440595.0,
"step": 1169
},
{
"epoch": 0.7929515418502202,
"grad_norm": 0.40254641736714897,
"learning_rate": 4.1598915989159887e-07,
"loss": 0.1544,
"mean_token_accuracy": 0.9498544856905937,
"num_tokens": 174590649.0,
"step": 1170
},
{
"epoch": 0.793629278210776,
"grad_norm": 0.4033616840760745,
"learning_rate": 4.146341463414634e-07,
"loss": 0.1571,
"mean_token_accuracy": 0.948909617960453,
"num_tokens": 174742023.0,
"step": 1171
},
{
"epoch": 0.7943070145713318,
"grad_norm": 0.845072571792667,
"learning_rate": 4.1327913279132786e-07,
"loss": 0.1494,
"mean_token_accuracy": 0.9514944478869438,
"num_tokens": 174890895.0,
"step": 1172
},
{
"epoch": 0.7949847509318875,
"grad_norm": 0.40710652669557085,
"learning_rate": 4.1192411924119243e-07,
"loss": 0.1658,
"mean_token_accuracy": 0.9479440152645111,
"num_tokens": 175039721.0,
"step": 1173
},
{
"epoch": 0.7956624872924433,
"grad_norm": 0.5258012808838943,
"learning_rate": 4.105691056910569e-07,
"loss": 0.1597,
"mean_token_accuracy": 0.9484201744198799,
"num_tokens": 175192953.0,
"step": 1174
},
{
"epoch": 0.796340223652999,
"grad_norm": 0.44523121927967174,
"learning_rate": 4.092140921409214e-07,
"loss": 0.1562,
"mean_token_accuracy": 0.9496227726340294,
"num_tokens": 175345855.0,
"step": 1175
},
{
"epoch": 0.7970179600135547,
"grad_norm": 0.5030347427449846,
"learning_rate": 4.078590785907859e-07,
"loss": 0.1491,
"mean_token_accuracy": 0.9523748084902763,
"num_tokens": 175494658.0,
"step": 1176
},
{
"epoch": 0.7976956963741104,
"grad_norm": 0.3941068989311148,
"learning_rate": 4.065040650406504e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9482178464531898,
"num_tokens": 175644012.0,
"step": 1177
},
{
"epoch": 0.7983734327346662,
"grad_norm": 0.46684232352389565,
"learning_rate": 4.0514905149051487e-07,
"loss": 0.1659,
"mean_token_accuracy": 0.9468219578266144,
"num_tokens": 175792869.0,
"step": 1178
},
{
"epoch": 0.799051169095222,
"grad_norm": 1.1655751155677825,
"learning_rate": 4.0379403794037934e-07,
"loss": 0.1669,
"mean_token_accuracy": 0.9474190697073936,
"num_tokens": 175942224.0,
"step": 1179
},
{
"epoch": 0.7997289054557777,
"grad_norm": 0.5365055431684573,
"learning_rate": 4.024390243902439e-07,
"loss": 0.1609,
"mean_token_accuracy": 0.94867292791605,
"num_tokens": 176092741.0,
"step": 1180
},
{
"epoch": 0.8004066418163335,
"grad_norm": 0.3826394929998182,
"learning_rate": 4.010840108401084e-07,
"loss": 0.1575,
"mean_token_accuracy": 0.9491298869252205,
"num_tokens": 176246685.0,
"step": 1181
},
{
"epoch": 0.8010843781768892,
"grad_norm": 1.0506281171730445,
"learning_rate": 3.997289972899729e-07,
"loss": 0.173,
"mean_token_accuracy": 0.9438380673527718,
"num_tokens": 176394970.0,
"step": 1182
},
{
"epoch": 0.801762114537445,
"grad_norm": 0.7703779432047844,
"learning_rate": 3.9837398373983736e-07,
"loss": 0.1664,
"mean_token_accuracy": 0.9469498619437218,
"num_tokens": 176544055.0,
"step": 1183
},
{
"epoch": 0.8024398508980006,
"grad_norm": 0.5262594689854897,
"learning_rate": 3.970189701897019e-07,
"loss": 0.1601,
"mean_token_accuracy": 0.948112279176712,
"num_tokens": 176688198.0,
"step": 1184
},
{
"epoch": 0.8031175872585564,
"grad_norm": 0.3950576329095008,
"learning_rate": 3.9566395663956635e-07,
"loss": 0.1622,
"mean_token_accuracy": 0.9483528733253479,
"num_tokens": 176837103.0,
"step": 1185
},
{
"epoch": 0.8037953236191122,
"grad_norm": 0.605347918350154,
"learning_rate": 3.9430894308943087e-07,
"loss": 0.1697,
"mean_token_accuracy": 0.9456880316138268,
"num_tokens": 176981904.0,
"step": 1186
},
{
"epoch": 0.8044730599796679,
"grad_norm": 0.41717484581072717,
"learning_rate": 3.9295392953929534e-07,
"loss": 0.1588,
"mean_token_accuracy": 0.9493725001811981,
"num_tokens": 177129587.0,
"step": 1187
},
{
"epoch": 0.8051507963402237,
"grad_norm": 0.46778880805281214,
"learning_rate": 3.915989159891599e-07,
"loss": 0.1601,
"mean_token_accuracy": 0.9488052427768707,
"num_tokens": 177282050.0,
"step": 1188
},
{
"epoch": 0.8058285327007794,
"grad_norm": 0.7785220244255328,
"learning_rate": 3.902439024390244e-07,
"loss": 0.1642,
"mean_token_accuracy": 0.9476650431752205,
"num_tokens": 177429549.0,
"step": 1189
},
{
"epoch": 0.8065062690613352,
"grad_norm": 0.42360438924275595,
"learning_rate": 3.888888888888889e-07,
"loss": 0.1532,
"mean_token_accuracy": 0.9501416981220245,
"num_tokens": 177580668.0,
"step": 1190
},
{
"epoch": 0.8071840054218908,
"grad_norm": 0.6166756515285686,
"learning_rate": 3.8753387533875336e-07,
"loss": 0.1564,
"mean_token_accuracy": 0.9494942203164101,
"num_tokens": 177733477.0,
"step": 1191
},
{
"epoch": 0.8078617417824466,
"grad_norm": 0.40998304298923804,
"learning_rate": 3.861788617886179e-07,
"loss": 0.1691,
"mean_token_accuracy": 0.9466608390212059,
"num_tokens": 177880290.0,
"step": 1192
},
{
"epoch": 0.8085394781430024,
"grad_norm": 0.42628251502407266,
"learning_rate": 3.8482384823848235e-07,
"loss": 0.1614,
"mean_token_accuracy": 0.9487505033612251,
"num_tokens": 178034375.0,
"step": 1193
},
{
"epoch": 0.8092172145035581,
"grad_norm": 0.7174704285276794,
"learning_rate": 3.8346883468834687e-07,
"loss": 0.1732,
"mean_token_accuracy": 0.9452232122421265,
"num_tokens": 178186543.0,
"step": 1194
},
{
"epoch": 0.8098949508641139,
"grad_norm": 0.6932593178487101,
"learning_rate": 3.821138211382114e-07,
"loss": 0.1655,
"mean_token_accuracy": 0.9474772363901138,
"num_tokens": 178337693.0,
"step": 1195
},
{
"epoch": 0.8105726872246696,
"grad_norm": 0.45951252039517,
"learning_rate": 3.807588075880759e-07,
"loss": 0.1599,
"mean_token_accuracy": 0.9495590180158615,
"num_tokens": 178489898.0,
"step": 1196
},
{
"epoch": 0.8112504235852254,
"grad_norm": 0.4309023019212715,
"learning_rate": 3.794037940379404e-07,
"loss": 0.1608,
"mean_token_accuracy": 0.9492277428507805,
"num_tokens": 178636952.0,
"step": 1197
},
{
"epoch": 0.811928159945781,
"grad_norm": 0.4792060714652919,
"learning_rate": 3.7804878048780484e-07,
"loss": 0.1735,
"mean_token_accuracy": 0.945383831858635,
"num_tokens": 178788213.0,
"step": 1198
},
{
"epoch": 0.8126058963063368,
"grad_norm": 0.5703820152757423,
"learning_rate": 3.7669376693766936e-07,
"loss": 0.1669,
"mean_token_accuracy": 0.9468568116426468,
"num_tokens": 178938091.0,
"step": 1199
},
{
"epoch": 0.8132836326668926,
"grad_norm": 1.0307293977795946,
"learning_rate": 3.7533875338753383e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9486866071820259,
"num_tokens": 179083665.0,
"step": 1200
},
{
"epoch": 0.8139613690274483,
"grad_norm": 0.5343963015335058,
"learning_rate": 3.7398373983739835e-07,
"loss": 0.1501,
"mean_token_accuracy": 0.9515577852725983,
"num_tokens": 179234580.0,
"step": 1201
},
{
"epoch": 0.8146391053880041,
"grad_norm": 0.47269386542652564,
"learning_rate": 3.726287262872628e-07,
"loss": 0.1688,
"mean_token_accuracy": 0.9467170462012291,
"num_tokens": 179384638.0,
"step": 1202
},
{
"epoch": 0.8153168417485598,
"grad_norm": 0.622932001873934,
"learning_rate": 3.712737127371274e-07,
"loss": 0.1541,
"mean_token_accuracy": 0.94975396245718,
"num_tokens": 179532313.0,
"step": 1203
},
{
"epoch": 0.8159945781091156,
"grad_norm": 1.30977819206103,
"learning_rate": 3.6991869918699185e-07,
"loss": 0.1593,
"mean_token_accuracy": 0.9485208690166473,
"num_tokens": 179678201.0,
"step": 1204
},
{
"epoch": 0.8166723144696713,
"grad_norm": 0.49551797663980024,
"learning_rate": 3.6856368563685637e-07,
"loss": 0.1598,
"mean_token_accuracy": 0.9494869783520699,
"num_tokens": 179825896.0,
"step": 1205
},
{
"epoch": 0.817350050830227,
"grad_norm": 0.39597804024706906,
"learning_rate": 3.6720867208672084e-07,
"loss": 0.1529,
"mean_token_accuracy": 0.9507526159286499,
"num_tokens": 179975719.0,
"step": 1206
},
{
"epoch": 0.8180277871907828,
"grad_norm": 0.8735766662793337,
"learning_rate": 3.6585365853658536e-07,
"loss": 0.1685,
"mean_token_accuracy": 0.9466816782951355,
"num_tokens": 180123444.0,
"step": 1207
},
{
"epoch": 0.8187055235513385,
"grad_norm": 0.7259156579762289,
"learning_rate": 3.644986449864498e-07,
"loss": 0.161,
"mean_token_accuracy": 0.9495271146297455,
"num_tokens": 180273836.0,
"step": 1208
},
{
"epoch": 0.8193832599118943,
"grad_norm": 0.42037044850466443,
"learning_rate": 3.6314363143631434e-07,
"loss": 0.1615,
"mean_token_accuracy": 0.9484260380268097,
"num_tokens": 180419061.0,
"step": 1209
},
{
"epoch": 0.82006099627245,
"grad_norm": 1.2533075540817267,
"learning_rate": 3.6178861788617886e-07,
"loss": 0.1678,
"mean_token_accuracy": 0.9464852139353752,
"num_tokens": 180570458.0,
"step": 1210
},
{
"epoch": 0.8207387326330058,
"grad_norm": 0.5627775260496694,
"learning_rate": 3.604336043360434e-07,
"loss": 0.1627,
"mean_token_accuracy": 0.9483892321586609,
"num_tokens": 180719063.0,
"step": 1211
},
{
"epoch": 0.8214164689935615,
"grad_norm": 0.5266555398373794,
"learning_rate": 3.5907859078590785e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9465538933873177,
"num_tokens": 180869555.0,
"step": 1212
},
{
"epoch": 0.8220942053541173,
"grad_norm": 0.6971738026269859,
"learning_rate": 3.5772357723577237e-07,
"loss": 0.1758,
"mean_token_accuracy": 0.945346049964428,
"num_tokens": 181021477.0,
"step": 1213
},
{
"epoch": 0.8227719417146729,
"grad_norm": 0.5589925515652632,
"learning_rate": 3.5636856368563684e-07,
"loss": 0.161,
"mean_token_accuracy": 0.9486629068851471,
"num_tokens": 181171918.0,
"step": 1214
},
{
"epoch": 0.8234496780752287,
"grad_norm": 0.9941559869041463,
"learning_rate": 3.550135501355013e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.948122650384903,
"num_tokens": 181318018.0,
"step": 1215
},
{
"epoch": 0.8241274144357845,
"grad_norm": 0.4344537392230086,
"learning_rate": 3.536585365853658e-07,
"loss": 0.1577,
"mean_token_accuracy": 0.9494742602109909,
"num_tokens": 181467329.0,
"step": 1216
},
{
"epoch": 0.8248051507963402,
"grad_norm": 0.38916370294651115,
"learning_rate": 3.523035230352303e-07,
"loss": 0.152,
"mean_token_accuracy": 0.9506790786981583,
"num_tokens": 181613107.0,
"step": 1217
},
{
"epoch": 0.825482887156896,
"grad_norm": 0.46380417163685994,
"learning_rate": 3.5094850948509486e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.9499632716178894,
"num_tokens": 181761115.0,
"step": 1218
},
{
"epoch": 0.8261606235174517,
"grad_norm": 0.3606566170578932,
"learning_rate": 3.4959349593495933e-07,
"loss": 0.154,
"mean_token_accuracy": 0.9500089287757874,
"num_tokens": 181911928.0,
"step": 1219
},
{
"epoch": 0.8268383598780075,
"grad_norm": 0.5330045233966407,
"learning_rate": 3.4823848238482385e-07,
"loss": 0.1565,
"mean_token_accuracy": 0.9503036737442017,
"num_tokens": 182057199.0,
"step": 1220
},
{
"epoch": 0.8275160962385631,
"grad_norm": 0.383571091647365,
"learning_rate": 3.468834688346883e-07,
"loss": 0.1527,
"mean_token_accuracy": 0.9508719816803932,
"num_tokens": 182208138.0,
"step": 1221
},
{
"epoch": 0.8281938325991189,
"grad_norm": 0.46665590559776177,
"learning_rate": 3.4552845528455284e-07,
"loss": 0.1695,
"mean_token_accuracy": 0.9455024227499962,
"num_tokens": 182360961.0,
"step": 1222
},
{
"epoch": 0.8288715689596747,
"grad_norm": 0.8179154615736234,
"learning_rate": 3.441734417344173e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9481576010584831,
"num_tokens": 182510718.0,
"step": 1223
},
{
"epoch": 0.8295493053202304,
"grad_norm": 0.4497187886461492,
"learning_rate": 3.428184281842818e-07,
"loss": 0.1732,
"mean_token_accuracy": 0.9444357454776764,
"num_tokens": 182658094.0,
"step": 1224
},
{
"epoch": 0.8302270416807862,
"grad_norm": 0.5485640710688537,
"learning_rate": 3.4146341463414634e-07,
"loss": 0.158,
"mean_token_accuracy": 0.9486449137330055,
"num_tokens": 182809300.0,
"step": 1225
},
{
"epoch": 0.8309047780413419,
"grad_norm": 0.6068099348141154,
"learning_rate": 3.4010840108401086e-07,
"loss": 0.1504,
"mean_token_accuracy": 0.9518176093697548,
"num_tokens": 182960527.0,
"step": 1226
},
{
"epoch": 0.8315825144018977,
"grad_norm": 0.5377496770350054,
"learning_rate": 3.3875338753387533e-07,
"loss": 0.1656,
"mean_token_accuracy": 0.94700937718153,
"num_tokens": 183109254.0,
"step": 1227
},
{
"epoch": 0.8322602507624534,
"grad_norm": 0.41807441815831087,
"learning_rate": 3.3739837398373985e-07,
"loss": 0.1718,
"mean_token_accuracy": 0.9449028000235558,
"num_tokens": 183261181.0,
"step": 1228
},
{
"epoch": 0.8329379871230091,
"grad_norm": 0.4427331296597803,
"learning_rate": 3.360433604336043e-07,
"loss": 0.1545,
"mean_token_accuracy": 0.9506580010056496,
"num_tokens": 183407209.0,
"step": 1229
},
{
"epoch": 0.8336157234835649,
"grad_norm": 0.9700740822615448,
"learning_rate": 3.3468834688346883e-07,
"loss": 0.1608,
"mean_token_accuracy": 0.9479135498404503,
"num_tokens": 183553470.0,
"step": 1230
},
{
"epoch": 0.8342934598441206,
"grad_norm": 1.0011705970363838,
"learning_rate": 3.333333333333333e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9486960023641586,
"num_tokens": 183705918.0,
"step": 1231
},
{
"epoch": 0.8349711962046764,
"grad_norm": 1.2599432294982404,
"learning_rate": 3.3197831978319777e-07,
"loss": 0.1678,
"mean_token_accuracy": 0.9466327428817749,
"num_tokens": 183856899.0,
"step": 1232
},
{
"epoch": 0.8356489325652321,
"grad_norm": 0.44463240271531623,
"learning_rate": 3.3062330623306234e-07,
"loss": 0.1632,
"mean_token_accuracy": 0.9478116035461426,
"num_tokens": 184006826.0,
"step": 1233
},
{
"epoch": 0.8363266689257879,
"grad_norm": 0.3853389407085814,
"learning_rate": 3.292682926829268e-07,
"loss": 0.1569,
"mean_token_accuracy": 0.949841283261776,
"num_tokens": 184160764.0,
"step": 1234
},
{
"epoch": 0.8370044052863436,
"grad_norm": 0.45767149703589827,
"learning_rate": 3.279132791327913e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9479725807905197,
"num_tokens": 184303969.0,
"step": 1235
},
{
"epoch": 0.8376821416468994,
"grad_norm": 1.9773177245102982,
"learning_rate": 3.265582655826558e-07,
"loss": 0.1654,
"mean_token_accuracy": 0.9471398890018463,
"num_tokens": 184450526.0,
"step": 1236
},
{
"epoch": 0.8383598780074552,
"grad_norm": 0.4918123062692637,
"learning_rate": 3.252032520325203e-07,
"loss": 0.1714,
"mean_token_accuracy": 0.944765530526638,
"num_tokens": 184602809.0,
"step": 1237
},
{
"epoch": 0.8390376143680108,
"grad_norm": 0.3901587167469587,
"learning_rate": 3.238482384823848e-07,
"loss": 0.1614,
"mean_token_accuracy": 0.9484593421220779,
"num_tokens": 184754853.0,
"step": 1238
},
{
"epoch": 0.8397153507285666,
"grad_norm": 1.287255346321022,
"learning_rate": 3.224932249322493e-07,
"loss": 0.1601,
"mean_token_accuracy": 0.9491341561079025,
"num_tokens": 184904472.0,
"step": 1239
},
{
"epoch": 0.8403930870891223,
"grad_norm": 0.4942027850122778,
"learning_rate": 3.211382113821138e-07,
"loss": 0.1686,
"mean_token_accuracy": 0.9465633928775787,
"num_tokens": 185051283.0,
"step": 1240
},
{
"epoch": 0.8410708234496781,
"grad_norm": 0.4421386762305037,
"learning_rate": 3.1978319783197834e-07,
"loss": 0.1627,
"mean_token_accuracy": 0.9489489793777466,
"num_tokens": 185198011.0,
"step": 1241
},
{
"epoch": 0.8417485598102338,
"grad_norm": 0.5126437317253629,
"learning_rate": 3.184281842818428e-07,
"loss": 0.1704,
"mean_token_accuracy": 0.9451634883880615,
"num_tokens": 185348283.0,
"step": 1242
},
{
"epoch": 0.8424262961707896,
"grad_norm": 0.4640809390185053,
"learning_rate": 3.170731707317073e-07,
"loss": 0.1662,
"mean_token_accuracy": 0.9478862881660461,
"num_tokens": 185503045.0,
"step": 1243
},
{
"epoch": 0.8431040325313454,
"grad_norm": 0.5888631565883243,
"learning_rate": 3.157181571815718e-07,
"loss": 0.163,
"mean_token_accuracy": 0.9481711536645889,
"num_tokens": 185650437.0,
"step": 1244
},
{
"epoch": 0.843781768891901,
"grad_norm": 0.6289341440241704,
"learning_rate": 3.143631436314363e-07,
"loss": 0.1649,
"mean_token_accuracy": 0.947802871465683,
"num_tokens": 185803124.0,
"step": 1245
},
{
"epoch": 0.8444595052524568,
"grad_norm": 0.587262406475338,
"learning_rate": 3.130081300813008e-07,
"loss": 0.156,
"mean_token_accuracy": 0.9502067714929581,
"num_tokens": 185949400.0,
"step": 1246
},
{
"epoch": 0.8451372416130125,
"grad_norm": 0.4966697299947886,
"learning_rate": 3.116531165311653e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.9491457492113113,
"num_tokens": 186101066.0,
"step": 1247
},
{
"epoch": 0.8458149779735683,
"grad_norm": 0.4232902254992847,
"learning_rate": 3.102981029810298e-07,
"loss": 0.1668,
"mean_token_accuracy": 0.9468086063861847,
"num_tokens": 186252511.0,
"step": 1248
},
{
"epoch": 0.846492714334124,
"grad_norm": 0.3604518735048993,
"learning_rate": 3.0894308943089434e-07,
"loss": 0.157,
"mean_token_accuracy": 0.9492731615900993,
"num_tokens": 186404888.0,
"step": 1249
},
{
"epoch": 0.8471704506946798,
"grad_norm": 0.40100425292989167,
"learning_rate": 3.075880758807588e-07,
"loss": 0.1546,
"mean_token_accuracy": 0.9506191238760948,
"num_tokens": 186552971.0,
"step": 1250
},
{
"epoch": 0.8478481870552355,
"grad_norm": 0.37038645677460275,
"learning_rate": 3.0623306233062327e-07,
"loss": 0.1668,
"mean_token_accuracy": 0.9471093341708183,
"num_tokens": 186701736.0,
"step": 1251
},
{
"epoch": 0.8485259234157913,
"grad_norm": 0.5248022281276231,
"learning_rate": 3.048780487804878e-07,
"loss": 0.1675,
"mean_token_accuracy": 0.9471896588802338,
"num_tokens": 186849104.0,
"step": 1252
},
{
"epoch": 0.849203659776347,
"grad_norm": 0.8811872116727292,
"learning_rate": 3.0352303523035226e-07,
"loss": 0.1596,
"mean_token_accuracy": 0.9485187977552414,
"num_tokens": 186998111.0,
"step": 1253
},
{
"epoch": 0.8498813961369027,
"grad_norm": 0.5996673555873404,
"learning_rate": 3.021680216802168e-07,
"loss": 0.1613,
"mean_token_accuracy": 0.9474183395504951,
"num_tokens": 187142004.0,
"step": 1254
},
{
"epoch": 0.8505591324974585,
"grad_norm": 0.42858683016562826,
"learning_rate": 3.008130081300813e-07,
"loss": 0.1646,
"mean_token_accuracy": 0.9466542750597,
"num_tokens": 187292467.0,
"step": 1255
},
{
"epoch": 0.8512368688580142,
"grad_norm": 0.39053228229918563,
"learning_rate": 2.994579945799458e-07,
"loss": 0.1586,
"mean_token_accuracy": 0.9496741071343422,
"num_tokens": 187438951.0,
"step": 1256
},
{
"epoch": 0.85191460521857,
"grad_norm": 0.3980012222167013,
"learning_rate": 2.981029810298103e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.948141522705555,
"num_tokens": 187586449.0,
"step": 1257
},
{
"epoch": 0.8525923415791257,
"grad_norm": 0.48893145810215405,
"learning_rate": 2.967479674796748e-07,
"loss": 0.1632,
"mean_token_accuracy": 0.9474975019693375,
"num_tokens": 187733743.0,
"step": 1258
},
{
"epoch": 0.8532700779396815,
"grad_norm": 0.4166335922064512,
"learning_rate": 2.9539295392953927e-07,
"loss": 0.1653,
"mean_token_accuracy": 0.9472394436597824,
"num_tokens": 187880144.0,
"step": 1259
},
{
"epoch": 0.8539478143002373,
"grad_norm": 0.5566663810368183,
"learning_rate": 2.940379403794038e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9473065361380577,
"num_tokens": 188031774.0,
"step": 1260
},
{
"epoch": 0.8546255506607929,
"grad_norm": 0.44170510987096306,
"learning_rate": 2.9268292682926825e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.9481263235211372,
"num_tokens": 188180795.0,
"step": 1261
},
{
"epoch": 0.8553032870213487,
"grad_norm": 0.456152885141962,
"learning_rate": 2.913279132791328e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9467766508460045,
"num_tokens": 188327661.0,
"step": 1262
},
{
"epoch": 0.8559810233819044,
"grad_norm": 0.48010268631689385,
"learning_rate": 2.899728997289973e-07,
"loss": 0.1603,
"mean_token_accuracy": 0.949536144733429,
"num_tokens": 188478001.0,
"step": 1263
},
{
"epoch": 0.8566587597424602,
"grad_norm": 0.4825523038347356,
"learning_rate": 2.886178861788618e-07,
"loss": 0.1777,
"mean_token_accuracy": 0.9437452927231789,
"num_tokens": 188626348.0,
"step": 1264
},
{
"epoch": 0.8573364961030159,
"grad_norm": 0.5861677090234955,
"learning_rate": 2.872628726287263e-07,
"loss": 0.1747,
"mean_token_accuracy": 0.9444176256656647,
"num_tokens": 188776297.0,
"step": 1265
},
{
"epoch": 0.8580142324635717,
"grad_norm": 0.39427539151202734,
"learning_rate": 2.859078590785908e-07,
"loss": 0.1646,
"mean_token_accuracy": 0.9467851668596268,
"num_tokens": 188923403.0,
"step": 1266
},
{
"epoch": 0.8586919688241275,
"grad_norm": 0.40564905252247896,
"learning_rate": 2.8455284552845527e-07,
"loss": 0.1648,
"mean_token_accuracy": 0.9476913884282112,
"num_tokens": 189076475.0,
"step": 1267
},
{
"epoch": 0.8593697051846831,
"grad_norm": 0.4186772728839254,
"learning_rate": 2.8319783197831973e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9472432062029839,
"num_tokens": 189230576.0,
"step": 1268
},
{
"epoch": 0.8600474415452389,
"grad_norm": 0.4390604343607371,
"learning_rate": 2.8184281842818425e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9469037428498268,
"num_tokens": 189381092.0,
"step": 1269
},
{
"epoch": 0.8607251779057946,
"grad_norm": 0.40477830196201936,
"learning_rate": 2.8048780487804877e-07,
"loss": 0.1651,
"mean_token_accuracy": 0.9463135749101639,
"num_tokens": 189529988.0,
"step": 1270
},
{
"epoch": 0.8614029142663504,
"grad_norm": 0.4461257655942589,
"learning_rate": 2.791327913279133e-07,
"loss": 0.1569,
"mean_token_accuracy": 0.9497584477066994,
"num_tokens": 189679438.0,
"step": 1271
},
{
"epoch": 0.8620806506269061,
"grad_norm": 0.627751042354744,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.1758,
"mean_token_accuracy": 0.9447125047445297,
"num_tokens": 189827211.0,
"step": 1272
},
{
"epoch": 0.8627583869874619,
"grad_norm": 0.5029254430147256,
"learning_rate": 2.764227642276423e-07,
"loss": 0.1681,
"mean_token_accuracy": 0.9464574307203293,
"num_tokens": 189975220.0,
"step": 1273
},
{
"epoch": 0.8634361233480177,
"grad_norm": 0.9232385351058474,
"learning_rate": 2.7506775067750675e-07,
"loss": 0.1551,
"mean_token_accuracy": 0.9502605646848679,
"num_tokens": 190127470.0,
"step": 1274
},
{
"epoch": 0.8641138597085734,
"grad_norm": 0.44449553850505596,
"learning_rate": 2.7371273712737127e-07,
"loss": 0.1616,
"mean_token_accuracy": 0.9485994949936867,
"num_tokens": 190280037.0,
"step": 1275
},
{
"epoch": 0.8647915960691291,
"grad_norm": 2.4731755358390917,
"learning_rate": 2.7235772357723573e-07,
"loss": 0.158,
"mean_token_accuracy": 0.9491154477000237,
"num_tokens": 190429590.0,
"step": 1276
},
{
"epoch": 0.8654693324296848,
"grad_norm": 0.43476345107395914,
"learning_rate": 2.7100271002710025e-07,
"loss": 0.1616,
"mean_token_accuracy": 0.9486151933670044,
"num_tokens": 190580748.0,
"step": 1277
},
{
"epoch": 0.8661470687902406,
"grad_norm": 0.4611579965695597,
"learning_rate": 2.6964769647696477e-07,
"loss": 0.1633,
"mean_token_accuracy": 0.948381170630455,
"num_tokens": 190732409.0,
"step": 1278
},
{
"epoch": 0.8668248051507963,
"grad_norm": 0.3853333697956996,
"learning_rate": 2.682926829268293e-07,
"loss": 0.1569,
"mean_token_accuracy": 0.9489934965968132,
"num_tokens": 190883434.0,
"step": 1279
},
{
"epoch": 0.8675025415113521,
"grad_norm": 0.5793024017953724,
"learning_rate": 2.6693766937669376e-07,
"loss": 0.1661,
"mean_token_accuracy": 0.9469688385725021,
"num_tokens": 191031276.0,
"step": 1280
},
{
"epoch": 0.8681802778719079,
"grad_norm": 0.4653940866114761,
"learning_rate": 2.655826558265583e-07,
"loss": 0.174,
"mean_token_accuracy": 0.944032609462738,
"num_tokens": 191180550.0,
"step": 1281
},
{
"epoch": 0.8688580142324636,
"grad_norm": 0.5295147327059975,
"learning_rate": 2.6422764227642274e-07,
"loss": 0.1539,
"mean_token_accuracy": 0.950570173561573,
"num_tokens": 191328672.0,
"step": 1282
},
{
"epoch": 0.8695357505930194,
"grad_norm": 0.4839416614450446,
"learning_rate": 2.6287262872628726e-07,
"loss": 0.1604,
"mean_token_accuracy": 0.9487968757748604,
"num_tokens": 191476358.0,
"step": 1283
},
{
"epoch": 0.870213486953575,
"grad_norm": 0.5506681952992758,
"learning_rate": 2.6151761517615173e-07,
"loss": 0.1591,
"mean_token_accuracy": 0.9497069045901299,
"num_tokens": 191616680.0,
"step": 1284
},
{
"epoch": 0.8708912233141308,
"grad_norm": 0.5124878675054456,
"learning_rate": 2.6016260162601625e-07,
"loss": 0.1675,
"mean_token_accuracy": 0.9464941918849945,
"num_tokens": 191762437.0,
"step": 1285
},
{
"epoch": 0.8715689596746865,
"grad_norm": 0.6914476460367316,
"learning_rate": 2.5880758807588077e-07,
"loss": 0.1652,
"mean_token_accuracy": 0.948076955974102,
"num_tokens": 191912068.0,
"step": 1286
},
{
"epoch": 0.8722466960352423,
"grad_norm": 0.513420792494206,
"learning_rate": 2.5745257452574524e-07,
"loss": 0.1609,
"mean_token_accuracy": 0.9484427720308304,
"num_tokens": 192062536.0,
"step": 1287
},
{
"epoch": 0.8729244323957981,
"grad_norm": 0.44814166594814653,
"learning_rate": 2.5609756097560976e-07,
"loss": 0.1602,
"mean_token_accuracy": 0.9484685808420181,
"num_tokens": 192211249.0,
"step": 1288
},
{
"epoch": 0.8736021687563538,
"grad_norm": 0.471365632422765,
"learning_rate": 2.547425474254742e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9492049291729927,
"num_tokens": 192353299.0,
"step": 1289
},
{
"epoch": 0.8742799051169096,
"grad_norm": 0.46964537723716343,
"learning_rate": 2.5338753387533874e-07,
"loss": 0.1607,
"mean_token_accuracy": 0.9494825899600983,
"num_tokens": 192502090.0,
"step": 1290
},
{
"epoch": 0.8749576414774652,
"grad_norm": 0.778237123761203,
"learning_rate": 2.520325203252032e-07,
"loss": 0.1649,
"mean_token_accuracy": 0.9470194801688194,
"num_tokens": 192653755.0,
"step": 1291
},
{
"epoch": 0.875635377838021,
"grad_norm": 1.4437998607866556,
"learning_rate": 2.5067750677506773e-07,
"loss": 0.1706,
"mean_token_accuracy": 0.9462285861372948,
"num_tokens": 192802868.0,
"step": 1292
},
{
"epoch": 0.8763131141985767,
"grad_norm": 0.39360519635219227,
"learning_rate": 2.4932249322493225e-07,
"loss": 0.1641,
"mean_token_accuracy": 0.9470438733696938,
"num_tokens": 192949636.0,
"step": 1293
},
{
"epoch": 0.8769908505591325,
"grad_norm": 0.4870040325471479,
"learning_rate": 2.479674796747967e-07,
"loss": 0.1493,
"mean_token_accuracy": 0.9510429948568344,
"num_tokens": 193097651.0,
"step": 1294
},
{
"epoch": 0.8776685869196882,
"grad_norm": 0.5810436594415671,
"learning_rate": 2.4661246612466123e-07,
"loss": 0.1628,
"mean_token_accuracy": 0.9486610442399979,
"num_tokens": 193244672.0,
"step": 1295
},
{
"epoch": 0.878346323280244,
"grad_norm": 0.4662810869506531,
"learning_rate": 2.4525745257452575e-07,
"loss": 0.1519,
"mean_token_accuracy": 0.9504307880997658,
"num_tokens": 193395885.0,
"step": 1296
},
{
"epoch": 0.8790240596407998,
"grad_norm": 0.367052086711362,
"learning_rate": 2.439024390243902e-07,
"loss": 0.1617,
"mean_token_accuracy": 0.947607047855854,
"num_tokens": 193544593.0,
"step": 1297
},
{
"epoch": 0.8797017960013555,
"grad_norm": 0.4134829891271465,
"learning_rate": 2.4254742547425474e-07,
"loss": 0.1654,
"mean_token_accuracy": 0.9463229477405548,
"num_tokens": 193694382.0,
"step": 1298
},
{
"epoch": 0.8803795323619112,
"grad_norm": 0.5142223217380762,
"learning_rate": 2.4119241192411926e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9481227025389671,
"num_tokens": 193844950.0,
"step": 1299
},
{
"epoch": 0.8810572687224669,
"grad_norm": 0.5725821844973095,
"learning_rate": 2.3983739837398373e-07,
"loss": 0.1761,
"mean_token_accuracy": 0.944548599421978,
"num_tokens": 193991353.0,
"step": 1300
},
{
"epoch": 0.8817350050830227,
"grad_norm": 0.5356025903004101,
"learning_rate": 2.3848238482384825e-07,
"loss": 0.172,
"mean_token_accuracy": 0.9450171962380409,
"num_tokens": 194136677.0,
"step": 1301
},
{
"epoch": 0.8824127414435784,
"grad_norm": 0.48877085346696164,
"learning_rate": 2.3712737127371274e-07,
"loss": 0.1659,
"mean_token_accuracy": 0.9471217319369316,
"num_tokens": 194286050.0,
"step": 1302
},
{
"epoch": 0.8830904778041342,
"grad_norm": 0.376632408570397,
"learning_rate": 2.3577235772357723e-07,
"loss": 0.1648,
"mean_token_accuracy": 0.9473475515842438,
"num_tokens": 194436091.0,
"step": 1303
},
{
"epoch": 0.88376821416469,
"grad_norm": 0.45486507931417874,
"learning_rate": 2.3441734417344173e-07,
"loss": 0.1607,
"mean_token_accuracy": 0.9485864788293839,
"num_tokens": 194586531.0,
"step": 1304
},
{
"epoch": 0.8844459505252457,
"grad_norm": 0.44784500489937396,
"learning_rate": 2.3306233062330622e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.9483805522322655,
"num_tokens": 194737102.0,
"step": 1305
},
{
"epoch": 0.8851236868858015,
"grad_norm": 0.7993291555150185,
"learning_rate": 2.3170731707317074e-07,
"loss": 0.1674,
"mean_token_accuracy": 0.9464853033423424,
"num_tokens": 194889119.0,
"step": 1306
},
{
"epoch": 0.8858014232463571,
"grad_norm": 0.4379267853744643,
"learning_rate": 2.3035230352303523e-07,
"loss": 0.1671,
"mean_token_accuracy": 0.9470188841223717,
"num_tokens": 195036049.0,
"step": 1307
},
{
"epoch": 0.8864791596069129,
"grad_norm": 0.40040270745053264,
"learning_rate": 2.2899728997289973e-07,
"loss": 0.1623,
"mean_token_accuracy": 0.947534941136837,
"num_tokens": 195184127.0,
"step": 1308
},
{
"epoch": 0.8871568959674686,
"grad_norm": 0.36793694793042225,
"learning_rate": 2.2764227642276422e-07,
"loss": 0.1548,
"mean_token_accuracy": 0.9490689262747765,
"num_tokens": 195334385.0,
"step": 1309
},
{
"epoch": 0.8878346323280244,
"grad_norm": 0.36498410723940267,
"learning_rate": 2.2628726287262874e-07,
"loss": 0.1559,
"mean_token_accuracy": 0.949463352560997,
"num_tokens": 195482527.0,
"step": 1310
},
{
"epoch": 0.8885123686885802,
"grad_norm": 0.9601269538119754,
"learning_rate": 2.249322493224932e-07,
"loss": 0.1568,
"mean_token_accuracy": 0.949680283665657,
"num_tokens": 195633680.0,
"step": 1311
},
{
"epoch": 0.8891901050491359,
"grad_norm": 0.3973604680490306,
"learning_rate": 2.235772357723577e-07,
"loss": 0.1572,
"mean_token_accuracy": 0.9494841918349266,
"num_tokens": 195784698.0,
"step": 1312
},
{
"epoch": 0.8898678414096917,
"grad_norm": 0.4244837770458849,
"learning_rate": 2.222222222222222e-07,
"loss": 0.1647,
"mean_token_accuracy": 0.9473633095622063,
"num_tokens": 195930878.0,
"step": 1313
},
{
"epoch": 0.8905455777702473,
"grad_norm": 0.6840023007911671,
"learning_rate": 2.208672086720867e-07,
"loss": 0.1745,
"mean_token_accuracy": 0.94522675126791,
"num_tokens": 196081665.0,
"step": 1314
},
{
"epoch": 0.8912233141308031,
"grad_norm": 0.46451158595442055,
"learning_rate": 2.195121951219512e-07,
"loss": 0.1676,
"mean_token_accuracy": 0.9465636387467384,
"num_tokens": 196233517.0,
"step": 1315
},
{
"epoch": 0.8919010504913588,
"grad_norm": 0.40412358259357356,
"learning_rate": 2.181571815718157e-07,
"loss": 0.1698,
"mean_token_accuracy": 0.9449945688247681,
"num_tokens": 196383614.0,
"step": 1316
},
{
"epoch": 0.8925787868519146,
"grad_norm": 0.5062261543786651,
"learning_rate": 2.1680216802168022e-07,
"loss": 0.1637,
"mean_token_accuracy": 0.9466878697276115,
"num_tokens": 196532369.0,
"step": 1317
},
{
"epoch": 0.8932565232124704,
"grad_norm": 0.6978443716648501,
"learning_rate": 2.154471544715447e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9484983906149864,
"num_tokens": 196676914.0,
"step": 1318
},
{
"epoch": 0.8939342595730261,
"grad_norm": 0.6617405961208453,
"learning_rate": 2.140921409214092e-07,
"loss": 0.1597,
"mean_token_accuracy": 0.9493363499641418,
"num_tokens": 196822084.0,
"step": 1319
},
{
"epoch": 0.8946119959335819,
"grad_norm": 0.5560405235740569,
"learning_rate": 2.127371273712737e-07,
"loss": 0.1722,
"mean_token_accuracy": 0.9459304660558701,
"num_tokens": 196970332.0,
"step": 1320
},
{
"epoch": 0.8952897322941376,
"grad_norm": 0.40050381837066196,
"learning_rate": 2.1138211382113822e-07,
"loss": 0.1663,
"mean_token_accuracy": 0.9466271102428436,
"num_tokens": 197119174.0,
"step": 1321
},
{
"epoch": 0.8959674686546933,
"grad_norm": 0.6321057538339169,
"learning_rate": 2.100271002710027e-07,
"loss": 0.1605,
"mean_token_accuracy": 0.9481549188494682,
"num_tokens": 197268637.0,
"step": 1322
},
{
"epoch": 0.896645205015249,
"grad_norm": 0.45622751792515764,
"learning_rate": 2.086720867208672e-07,
"loss": 0.1672,
"mean_token_accuracy": 0.9462346211075783,
"num_tokens": 197415386.0,
"step": 1323
},
{
"epoch": 0.8973229413758048,
"grad_norm": 0.3687854548382046,
"learning_rate": 2.073170731707317e-07,
"loss": 0.1668,
"mean_token_accuracy": 0.9470949769020081,
"num_tokens": 197558590.0,
"step": 1324
},
{
"epoch": 0.8980006777363606,
"grad_norm": 0.3640691148557062,
"learning_rate": 2.0596205962059622e-07,
"loss": 0.1598,
"mean_token_accuracy": 0.9491933286190033,
"num_tokens": 197708876.0,
"step": 1325
},
{
"epoch": 0.8986784140969163,
"grad_norm": 0.43915756263748035,
"learning_rate": 2.046070460704607e-07,
"loss": 0.1642,
"mean_token_accuracy": 0.9470717161893845,
"num_tokens": 197857476.0,
"step": 1326
},
{
"epoch": 0.8993561504574721,
"grad_norm": 0.3884628942763983,
"learning_rate": 2.032520325203252e-07,
"loss": 0.1452,
"mean_token_accuracy": 0.9524314031004906,
"num_tokens": 198004317.0,
"step": 1327
},
{
"epoch": 0.9000338868180278,
"grad_norm": 0.559885511493827,
"learning_rate": 2.0189701897018967e-07,
"loss": 0.1635,
"mean_token_accuracy": 0.9473034217953682,
"num_tokens": 198157023.0,
"step": 1328
},
{
"epoch": 0.9007116231785836,
"grad_norm": 1.286366814416818,
"learning_rate": 2.005420054200542e-07,
"loss": 0.1465,
"mean_token_accuracy": 0.9523212388157845,
"num_tokens": 198301609.0,
"step": 1329
},
{
"epoch": 0.9013893595391392,
"grad_norm": 0.4470361145569467,
"learning_rate": 1.9918699186991868e-07,
"loss": 0.172,
"mean_token_accuracy": 0.9452895820140839,
"num_tokens": 198450025.0,
"step": 1330
},
{
"epoch": 0.902067095899695,
"grad_norm": 0.5056161190236214,
"learning_rate": 1.9783197831978317e-07,
"loss": 0.1559,
"mean_token_accuracy": 0.9498795047402382,
"num_tokens": 198595265.0,
"step": 1331
},
{
"epoch": 0.9027448322602508,
"grad_norm": 0.4705626378527346,
"learning_rate": 1.9647696476964767e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9484021738171577,
"num_tokens": 198743782.0,
"step": 1332
},
{
"epoch": 0.9034225686208065,
"grad_norm": 0.7139510920836756,
"learning_rate": 1.951219512195122e-07,
"loss": 0.1548,
"mean_token_accuracy": 0.9500160440802574,
"num_tokens": 198892693.0,
"step": 1333
},
{
"epoch": 0.9041003049813623,
"grad_norm": 0.4346157528632144,
"learning_rate": 1.9376693766937668e-07,
"loss": 0.174,
"mean_token_accuracy": 0.9452803283929825,
"num_tokens": 199047544.0,
"step": 1334
},
{
"epoch": 0.904778041341918,
"grad_norm": 0.46040849660604394,
"learning_rate": 1.9241192411924117e-07,
"loss": 0.1604,
"mean_token_accuracy": 0.9492721632122993,
"num_tokens": 199197511.0,
"step": 1335
},
{
"epoch": 0.9054557777024738,
"grad_norm": 0.5077735426247331,
"learning_rate": 1.910569105691057e-07,
"loss": 0.1583,
"mean_token_accuracy": 0.9483174160122871,
"num_tokens": 199350131.0,
"step": 1336
},
{
"epoch": 0.9061335140630294,
"grad_norm": 0.5822238393967466,
"learning_rate": 1.897018970189702e-07,
"loss": 0.162,
"mean_token_accuracy": 0.9482871666550636,
"num_tokens": 199498888.0,
"step": 1337
},
{
"epoch": 0.9068112504235852,
"grad_norm": 0.9963446979708105,
"learning_rate": 1.8834688346883468e-07,
"loss": 0.1676,
"mean_token_accuracy": 0.9468878507614136,
"num_tokens": 199650041.0,
"step": 1338
},
{
"epoch": 0.9074889867841409,
"grad_norm": 0.4094642484081531,
"learning_rate": 1.8699186991869917e-07,
"loss": 0.1583,
"mean_token_accuracy": 0.9493989273905754,
"num_tokens": 199799651.0,
"step": 1339
},
{
"epoch": 0.9081667231446967,
"grad_norm": 0.4044904055612835,
"learning_rate": 1.856368563685637e-07,
"loss": 0.173,
"mean_token_accuracy": 0.9452235251665115,
"num_tokens": 199949211.0,
"step": 1340
},
{
"epoch": 0.9088444595052525,
"grad_norm": 0.43063638556652495,
"learning_rate": 1.8428184281842819e-07,
"loss": 0.1547,
"mean_token_accuracy": 0.9505686908960342,
"num_tokens": 200095988.0,
"step": 1341
},
{
"epoch": 0.9095221958658082,
"grad_norm": 0.4829193370579189,
"learning_rate": 1.8292682926829268e-07,
"loss": 0.1544,
"mean_token_accuracy": 0.9502275586128235,
"num_tokens": 200250528.0,
"step": 1342
},
{
"epoch": 0.910199932226364,
"grad_norm": 0.4802569071665241,
"learning_rate": 1.8157181571815717e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.9475988522171974,
"num_tokens": 200403099.0,
"step": 1343
},
{
"epoch": 0.9108776685869197,
"grad_norm": 0.8898908368334608,
"learning_rate": 1.802168021680217e-07,
"loss": 0.1566,
"mean_token_accuracy": 0.9496802464127541,
"num_tokens": 200553689.0,
"step": 1344
},
{
"epoch": 0.9115554049474754,
"grad_norm": 0.9086836656117131,
"learning_rate": 1.7886178861788619e-07,
"loss": 0.1607,
"mean_token_accuracy": 0.947704590857029,
"num_tokens": 200701501.0,
"step": 1345
},
{
"epoch": 0.9122331413080311,
"grad_norm": 0.5054363586713749,
"learning_rate": 1.7750677506775065e-07,
"loss": 0.1569,
"mean_token_accuracy": 0.9495393559336662,
"num_tokens": 200850046.0,
"step": 1346
},
{
"epoch": 0.9129108776685869,
"grad_norm": 0.48926660077240064,
"learning_rate": 1.7615176151761515e-07,
"loss": 0.1552,
"mean_token_accuracy": 0.9491599351167679,
"num_tokens": 201000638.0,
"step": 1347
},
{
"epoch": 0.9135886140291427,
"grad_norm": 0.48134549461282383,
"learning_rate": 1.7479674796747966e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9479363709688187,
"num_tokens": 201149168.0,
"step": 1348
},
{
"epoch": 0.9142663503896984,
"grad_norm": 0.8390624871312155,
"learning_rate": 1.7344173441734416e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9488432630896568,
"num_tokens": 201294436.0,
"step": 1349
},
{
"epoch": 0.9149440867502542,
"grad_norm": 0.5602856755239709,
"learning_rate": 1.7208672086720865e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.9480102583765984,
"num_tokens": 201441840.0,
"step": 1350
},
{
"epoch": 0.9156218231108099,
"grad_norm": 0.4292552573013406,
"learning_rate": 1.7073170731707317e-07,
"loss": 0.1629,
"mean_token_accuracy": 0.9480935409665108,
"num_tokens": 201588791.0,
"step": 1351
},
{
"epoch": 0.9162995594713657,
"grad_norm": 0.425749476838467,
"learning_rate": 1.6937669376693766e-07,
"loss": 0.1609,
"mean_token_accuracy": 0.9492569714784622,
"num_tokens": 201737553.0,
"step": 1352
},
{
"epoch": 0.9169772958319213,
"grad_norm": 0.7023335491453301,
"learning_rate": 1.6802168021680216e-07,
"loss": 0.1761,
"mean_token_accuracy": 0.9445386901497841,
"num_tokens": 201885478.0,
"step": 1353
},
{
"epoch": 0.9176550321924771,
"grad_norm": 0.4780401071887224,
"learning_rate": 1.6666666666666665e-07,
"loss": 0.1712,
"mean_token_accuracy": 0.9456515088677406,
"num_tokens": 202035458.0,
"step": 1354
},
{
"epoch": 0.9183327685530329,
"grad_norm": 0.46240498917097805,
"learning_rate": 1.6531165311653117e-07,
"loss": 0.1556,
"mean_token_accuracy": 0.9500665068626404,
"num_tokens": 202184449.0,
"step": 1355
},
{
"epoch": 0.9190105049135886,
"grad_norm": 0.809794434334873,
"learning_rate": 1.6395663956639566e-07,
"loss": 0.1647,
"mean_token_accuracy": 0.9476128816604614,
"num_tokens": 202337184.0,
"step": 1356
},
{
"epoch": 0.9196882412741444,
"grad_norm": 0.7156898794358771,
"learning_rate": 1.6260162601626016e-07,
"loss": 0.155,
"mean_token_accuracy": 0.9504127278923988,
"num_tokens": 202490847.0,
"step": 1357
},
{
"epoch": 0.9203659776347001,
"grad_norm": 0.43154704578495007,
"learning_rate": 1.6124661246612465e-07,
"loss": 0.1481,
"mean_token_accuracy": 0.9516681507229805,
"num_tokens": 202642145.0,
"step": 1358
},
{
"epoch": 0.9210437139952559,
"grad_norm": 0.4127900350922141,
"learning_rate": 1.5989159891598917e-07,
"loss": 0.1583,
"mean_token_accuracy": 0.9486039876937866,
"num_tokens": 202793032.0,
"step": 1359
},
{
"epoch": 0.9217214503558115,
"grad_norm": 0.5934644429599674,
"learning_rate": 1.5853658536585366e-07,
"loss": 0.164,
"mean_token_accuracy": 0.9477408677339554,
"num_tokens": 202941213.0,
"step": 1360
},
{
"epoch": 0.9223991867163673,
"grad_norm": 0.5187626748589975,
"learning_rate": 1.5718157181571816e-07,
"loss": 0.165,
"mean_token_accuracy": 0.9467665180563927,
"num_tokens": 203089723.0,
"step": 1361
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.4700994490692931,
"learning_rate": 1.5582655826558265e-07,
"loss": 0.1616,
"mean_token_accuracy": 0.9481227323412895,
"num_tokens": 203239273.0,
"step": 1362
},
{
"epoch": 0.9237546594374788,
"grad_norm": 0.4619249541167841,
"learning_rate": 1.5447154471544717e-07,
"loss": 0.1693,
"mean_token_accuracy": 0.945893757045269,
"num_tokens": 203389451.0,
"step": 1363
},
{
"epoch": 0.9244323957980346,
"grad_norm": 0.5623005777167278,
"learning_rate": 1.5311653116531164e-07,
"loss": 0.1538,
"mean_token_accuracy": 0.9502845928072929,
"num_tokens": 203536972.0,
"step": 1364
},
{
"epoch": 0.9251101321585903,
"grad_norm": 0.3768508973610597,
"learning_rate": 1.5176151761517613e-07,
"loss": 0.1472,
"mean_token_accuracy": 0.952195554971695,
"num_tokens": 203688790.0,
"step": 1365
},
{
"epoch": 0.9257878685191461,
"grad_norm": 0.42153147866492585,
"learning_rate": 1.5040650406504065e-07,
"loss": 0.1624,
"mean_token_accuracy": 0.9478074163198471,
"num_tokens": 203832292.0,
"step": 1366
},
{
"epoch": 0.9264656048797018,
"grad_norm": 0.4192583902969117,
"learning_rate": 1.4905149051490514e-07,
"loss": 0.1646,
"mean_token_accuracy": 0.946540355682373,
"num_tokens": 203983349.0,
"step": 1367
},
{
"epoch": 0.9271433412402575,
"grad_norm": 0.4001297016006573,
"learning_rate": 1.4769647696476963e-07,
"loss": 0.1586,
"mean_token_accuracy": 0.9498896673321724,
"num_tokens": 204132144.0,
"step": 1368
},
{
"epoch": 0.9278210776008133,
"grad_norm": 0.4395702296591971,
"learning_rate": 1.4634146341463413e-07,
"loss": 0.1611,
"mean_token_accuracy": 0.9474482089281082,
"num_tokens": 204285881.0,
"step": 1369
},
{
"epoch": 0.928498813961369,
"grad_norm": 0.44906172664238575,
"learning_rate": 1.4498644986449865e-07,
"loss": 0.1549,
"mean_token_accuracy": 0.950348399579525,
"num_tokens": 204434052.0,
"step": 1370
},
{
"epoch": 0.9291765503219248,
"grad_norm": 0.4713204739330186,
"learning_rate": 1.4363143631436314e-07,
"loss": 0.1627,
"mean_token_accuracy": 0.9476472660899162,
"num_tokens": 204582273.0,
"step": 1371
},
{
"epoch": 0.9298542866824805,
"grad_norm": 0.4670922599752331,
"learning_rate": 1.4227642276422763e-07,
"loss": 0.1733,
"mean_token_accuracy": 0.944943904876709,
"num_tokens": 204734486.0,
"step": 1372
},
{
"epoch": 0.9305320230430363,
"grad_norm": 0.5480059042541625,
"learning_rate": 1.4092140921409213e-07,
"loss": 0.1578,
"mean_token_accuracy": 0.9495535492897034,
"num_tokens": 204882430.0,
"step": 1373
},
{
"epoch": 0.931209759403592,
"grad_norm": 0.40076036586833685,
"learning_rate": 1.3956639566395665e-07,
"loss": 0.1617,
"mean_token_accuracy": 0.9486064985394478,
"num_tokens": 205026873.0,
"step": 1374
},
{
"epoch": 0.9318874957641478,
"grad_norm": 0.47472929118812235,
"learning_rate": 1.3821138211382114e-07,
"loss": 0.1615,
"mean_token_accuracy": 0.9485347419977188,
"num_tokens": 205174583.0,
"step": 1375
},
{
"epoch": 0.9325652321247034,
"grad_norm": 0.5345554264982604,
"learning_rate": 1.3685636856368563e-07,
"loss": 0.1555,
"mean_token_accuracy": 0.9495607689023018,
"num_tokens": 205322381.0,
"step": 1376
},
{
"epoch": 0.9332429684852592,
"grad_norm": 0.5403128710953481,
"learning_rate": 1.3550135501355013e-07,
"loss": 0.1545,
"mean_token_accuracy": 0.9499097019433975,
"num_tokens": 205470320.0,
"step": 1377
},
{
"epoch": 0.933920704845815,
"grad_norm": 0.4233625714626548,
"learning_rate": 1.3414634146341465e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9478924572467804,
"num_tokens": 205618922.0,
"step": 1378
},
{
"epoch": 0.9345984412063707,
"grad_norm": 0.3872504699509748,
"learning_rate": 1.3279132791327914e-07,
"loss": 0.1569,
"mean_token_accuracy": 0.9503503814339638,
"num_tokens": 205769081.0,
"step": 1379
},
{
"epoch": 0.9352761775669265,
"grad_norm": 0.5570599491065501,
"learning_rate": 1.3143631436314363e-07,
"loss": 0.1567,
"mean_token_accuracy": 0.9494674950838089,
"num_tokens": 205920667.0,
"step": 1380
},
{
"epoch": 0.9359539139274822,
"grad_norm": 1.5008149195955116,
"learning_rate": 1.3008130081300813e-07,
"loss": 0.1523,
"mean_token_accuracy": 0.9510042071342468,
"num_tokens": 206070639.0,
"step": 1381
},
{
"epoch": 0.936631650288038,
"grad_norm": 0.3892147335254059,
"learning_rate": 1.2872628726287262e-07,
"loss": 0.1617,
"mean_token_accuracy": 0.9483537450432777,
"num_tokens": 206217381.0,
"step": 1382
},
{
"epoch": 0.9373093866485936,
"grad_norm": 0.5219739171434112,
"learning_rate": 1.273712737127371e-07,
"loss": 0.1625,
"mean_token_accuracy": 0.9483264535665512,
"num_tokens": 206370560.0,
"step": 1383
},
{
"epoch": 0.9379871230091494,
"grad_norm": 0.41583308606483665,
"learning_rate": 1.260162601626016e-07,
"loss": 0.1619,
"mean_token_accuracy": 0.9483330249786377,
"num_tokens": 206518013.0,
"step": 1384
},
{
"epoch": 0.9386648593697052,
"grad_norm": 0.4161834454505529,
"learning_rate": 1.2466124661246612e-07,
"loss": 0.164,
"mean_token_accuracy": 0.9477608054876328,
"num_tokens": 206667634.0,
"step": 1385
},
{
"epoch": 0.9393425957302609,
"grad_norm": 1.122778950661849,
"learning_rate": 1.2330623306233062e-07,
"loss": 0.1587,
"mean_token_accuracy": 0.9485376551747322,
"num_tokens": 206815383.0,
"step": 1386
},
{
"epoch": 0.9400203320908167,
"grad_norm": 1.0992782123231566,
"learning_rate": 1.219512195121951e-07,
"loss": 0.1708,
"mean_token_accuracy": 0.945359356701374,
"num_tokens": 206963461.0,
"step": 1387
},
{
"epoch": 0.9406980684513724,
"grad_norm": 0.5225790434291085,
"learning_rate": 1.2059620596205963e-07,
"loss": 0.1518,
"mean_token_accuracy": 0.9507160186767578,
"num_tokens": 207113679.0,
"step": 1388
},
{
"epoch": 0.9413758048119282,
"grad_norm": 0.4292828147249612,
"learning_rate": 1.1924119241192412e-07,
"loss": 0.15,
"mean_token_accuracy": 0.9513011053204536,
"num_tokens": 207260255.0,
"step": 1389
},
{
"epoch": 0.9420535411724839,
"grad_norm": 0.4518478115324446,
"learning_rate": 1.1788617886178862e-07,
"loss": 0.1592,
"mean_token_accuracy": 0.9490228891372681,
"num_tokens": 207408464.0,
"step": 1390
},
{
"epoch": 0.9427312775330396,
"grad_norm": 0.3926518572267984,
"learning_rate": 1.1653116531165311e-07,
"loss": 0.1649,
"mean_token_accuracy": 0.9475235939025879,
"num_tokens": 207555440.0,
"step": 1391
},
{
"epoch": 0.9434090138935954,
"grad_norm": 0.5210429994208757,
"learning_rate": 1.1517615176151762e-07,
"loss": 0.1594,
"mean_token_accuracy": 0.9485410302877426,
"num_tokens": 207701598.0,
"step": 1392
},
{
"epoch": 0.9440867502541511,
"grad_norm": 0.5924782618800354,
"learning_rate": 1.1382113821138211e-07,
"loss": 0.1709,
"mean_token_accuracy": 0.94605902582407,
"num_tokens": 207850093.0,
"step": 1393
},
{
"epoch": 0.9447644866147069,
"grad_norm": 0.39958163207143554,
"learning_rate": 1.124661246612466e-07,
"loss": 0.167,
"mean_token_accuracy": 0.9468234106898308,
"num_tokens": 208002086.0,
"step": 1394
},
{
"epoch": 0.9454422229752626,
"grad_norm": 0.5160991026450638,
"learning_rate": 1.111111111111111e-07,
"loss": 0.1643,
"mean_token_accuracy": 0.947230651974678,
"num_tokens": 208154266.0,
"step": 1395
},
{
"epoch": 0.9461199593358184,
"grad_norm": 0.8555265857717597,
"learning_rate": 1.097560975609756e-07,
"loss": 0.1634,
"mean_token_accuracy": 0.947859637439251,
"num_tokens": 208298242.0,
"step": 1396
},
{
"epoch": 0.9467976956963741,
"grad_norm": 0.6694579789663303,
"learning_rate": 1.0840108401084011e-07,
"loss": 0.1522,
"mean_token_accuracy": 0.9503393620252609,
"num_tokens": 208446923.0,
"step": 1397
},
{
"epoch": 0.9474754320569299,
"grad_norm": 0.7662756104885292,
"learning_rate": 1.070460704607046e-07,
"loss": 0.1647,
"mean_token_accuracy": 0.9478938356041908,
"num_tokens": 208595226.0,
"step": 1398
},
{
"epoch": 0.9481531684174856,
"grad_norm": 0.6299424424389714,
"learning_rate": 1.0569105691056911e-07,
"loss": 0.1695,
"mean_token_accuracy": 0.9457604214549065,
"num_tokens": 208747043.0,
"step": 1399
},
{
"epoch": 0.9488309047780413,
"grad_norm": 0.6996370626099346,
"learning_rate": 1.043360433604336e-07,
"loss": 0.1666,
"mean_token_accuracy": 0.946971595287323,
"num_tokens": 208894774.0,
"step": 1400
},
{
"epoch": 0.9495086411385971,
"grad_norm": 0.6276628265777034,
"learning_rate": 1.0298102981029811e-07,
"loss": 0.158,
"mean_token_accuracy": 0.9496228843927383,
"num_tokens": 209041360.0,
"step": 1401
},
{
"epoch": 0.9501863774991528,
"grad_norm": 0.4483022817684672,
"learning_rate": 1.016260162601626e-07,
"loss": 0.1661,
"mean_token_accuracy": 0.9469497203826904,
"num_tokens": 209189529.0,
"step": 1402
},
{
"epoch": 0.9508641138597086,
"grad_norm": 0.45151447318384536,
"learning_rate": 1.002710027100271e-07,
"loss": 0.1751,
"mean_token_accuracy": 0.9444667249917984,
"num_tokens": 209336530.0,
"step": 1403
},
{
"epoch": 0.9515418502202643,
"grad_norm": 0.4861060434456673,
"learning_rate": 9.891598915989159e-08,
"loss": 0.1597,
"mean_token_accuracy": 0.9496123939752579,
"num_tokens": 209485343.0,
"step": 1404
},
{
"epoch": 0.9522195865808201,
"grad_norm": 0.4596168635267935,
"learning_rate": 9.75609756097561e-08,
"loss": 0.1745,
"mean_token_accuracy": 0.9443343281745911,
"num_tokens": 209633536.0,
"step": 1405
},
{
"epoch": 0.9528973229413759,
"grad_norm": 0.5019914406626255,
"learning_rate": 9.620596205962059e-08,
"loss": 0.1554,
"mean_token_accuracy": 0.9496943727135658,
"num_tokens": 209782949.0,
"step": 1406
},
{
"epoch": 0.9535750593019315,
"grad_norm": 0.41667165269758527,
"learning_rate": 9.48509485094851e-08,
"loss": 0.1603,
"mean_token_accuracy": 0.9488124921917915,
"num_tokens": 209934473.0,
"step": 1407
},
{
"epoch": 0.9542527956624873,
"grad_norm": 0.4249710160121378,
"learning_rate": 9.349593495934959e-08,
"loss": 0.1603,
"mean_token_accuracy": 0.9486833810806274,
"num_tokens": 210084504.0,
"step": 1408
},
{
"epoch": 0.954930532023043,
"grad_norm": 0.42678826928356306,
"learning_rate": 9.214092140921409e-08,
"loss": 0.1603,
"mean_token_accuracy": 0.9479594007134438,
"num_tokens": 210233550.0,
"step": 1409
},
{
"epoch": 0.9556082683835988,
"grad_norm": 0.44287513677740237,
"learning_rate": 9.078590785907859e-08,
"loss": 0.1681,
"mean_token_accuracy": 0.9466651305556297,
"num_tokens": 210377294.0,
"step": 1410
},
{
"epoch": 0.9562860047441545,
"grad_norm": 0.5190623192817143,
"learning_rate": 8.943089430894309e-08,
"loss": 0.1687,
"mean_token_accuracy": 0.9465017914772034,
"num_tokens": 210528792.0,
"step": 1411
},
{
"epoch": 0.9569637411047103,
"grad_norm": 0.45243101031406463,
"learning_rate": 8.807588075880757e-08,
"loss": 0.1647,
"mean_token_accuracy": 0.9479076936841011,
"num_tokens": 210673424.0,
"step": 1412
},
{
"epoch": 0.9576414774652661,
"grad_norm": 0.5749862594910748,
"learning_rate": 8.672086720867208e-08,
"loss": 0.1575,
"mean_token_accuracy": 0.949820950627327,
"num_tokens": 210823368.0,
"step": 1413
},
{
"epoch": 0.9583192138258217,
"grad_norm": 0.4382410240273075,
"learning_rate": 8.536585365853659e-08,
"loss": 0.1586,
"mean_token_accuracy": 0.9487280175089836,
"num_tokens": 210975510.0,
"step": 1414
},
{
"epoch": 0.9589969501863775,
"grad_norm": 0.4427876374521735,
"learning_rate": 8.401084010840108e-08,
"loss": 0.1626,
"mean_token_accuracy": 0.9473810121417046,
"num_tokens": 211127158.0,
"step": 1415
},
{
"epoch": 0.9596746865469332,
"grad_norm": 0.4804162197215828,
"learning_rate": 8.265582655826558e-08,
"loss": 0.1606,
"mean_token_accuracy": 0.9485824853181839,
"num_tokens": 211275571.0,
"step": 1416
},
{
"epoch": 0.960352422907489,
"grad_norm": 0.5330105982182631,
"learning_rate": 8.130081300813008e-08,
"loss": 0.1613,
"mean_token_accuracy": 0.9479814395308495,
"num_tokens": 211424065.0,
"step": 1417
},
{
"epoch": 0.9610301592680447,
"grad_norm": 0.4616732204432621,
"learning_rate": 7.994579945799458e-08,
"loss": 0.1576,
"mean_token_accuracy": 0.9496120512485504,
"num_tokens": 211573949.0,
"step": 1418
},
{
"epoch": 0.9617078956286005,
"grad_norm": 0.4145030612625962,
"learning_rate": 7.859078590785908e-08,
"loss": 0.1589,
"mean_token_accuracy": 0.9490218088030815,
"num_tokens": 211724572.0,
"step": 1419
},
{
"epoch": 0.9623856319891562,
"grad_norm": 0.7476389113371616,
"learning_rate": 7.723577235772358e-08,
"loss": 0.1689,
"mean_token_accuracy": 0.9456061944365501,
"num_tokens": 211874433.0,
"step": 1420
},
{
"epoch": 0.963063368349712,
"grad_norm": 0.4511632542604127,
"learning_rate": 7.588075880758806e-08,
"loss": 0.1677,
"mean_token_accuracy": 0.9462789595127106,
"num_tokens": 212024712.0,
"step": 1421
},
{
"epoch": 0.9637411047102677,
"grad_norm": 0.42374716464969436,
"learning_rate": 7.452574525745257e-08,
"loss": 0.1484,
"mean_token_accuracy": 0.9522485435009003,
"num_tokens": 212170030.0,
"step": 1422
},
{
"epoch": 0.9644188410708234,
"grad_norm": 0.44484121070820676,
"learning_rate": 7.317073170731706e-08,
"loss": 0.1629,
"mean_token_accuracy": 0.9475429728627205,
"num_tokens": 212318731.0,
"step": 1423
},
{
"epoch": 0.9650965774313792,
"grad_norm": 0.40622925709319363,
"learning_rate": 7.181571815718157e-08,
"loss": 0.1804,
"mean_token_accuracy": 0.9423197209835052,
"num_tokens": 212467624.0,
"step": 1424
},
{
"epoch": 0.9657743137919349,
"grad_norm": 0.5531226991163463,
"learning_rate": 7.046070460704606e-08,
"loss": 0.1574,
"mean_token_accuracy": 0.9494439512491226,
"num_tokens": 212617827.0,
"step": 1425
},
{
"epoch": 0.9664520501524907,
"grad_norm": 0.7000362874893901,
"learning_rate": 6.910569105691057e-08,
"loss": 0.1652,
"mean_token_accuracy": 0.9473242685198784,
"num_tokens": 212765526.0,
"step": 1426
},
{
"epoch": 0.9671297865130464,
"grad_norm": 0.6038971992664033,
"learning_rate": 6.775067750677506e-08,
"loss": 0.1696,
"mean_token_accuracy": 0.9467999115586281,
"num_tokens": 212916763.0,
"step": 1427
},
{
"epoch": 0.9678075228736022,
"grad_norm": 0.3759979601319053,
"learning_rate": 6.639566395663957e-08,
"loss": 0.1564,
"mean_token_accuracy": 0.9499575644731522,
"num_tokens": 213066865.0,
"step": 1428
},
{
"epoch": 0.968485259234158,
"grad_norm": 0.4273989848141862,
"learning_rate": 6.504065040650406e-08,
"loss": 0.1498,
"mean_token_accuracy": 0.9508862793445587,
"num_tokens": 213218281.0,
"step": 1429
},
{
"epoch": 0.9691629955947136,
"grad_norm": 0.3797911718383248,
"learning_rate": 6.368563685636856e-08,
"loss": 0.1571,
"mean_token_accuracy": 0.9494618847966194,
"num_tokens": 213368122.0,
"step": 1430
},
{
"epoch": 0.9698407319552694,
"grad_norm": 1.0814754748509061,
"learning_rate": 6.233062330623306e-08,
"loss": 0.1591,
"mean_token_accuracy": 0.9488984197378159,
"num_tokens": 213516074.0,
"step": 1431
},
{
"epoch": 0.9705184683158251,
"grad_norm": 0.45371909333167076,
"learning_rate": 6.097560975609756e-08,
"loss": 0.1526,
"mean_token_accuracy": 0.9507407993078232,
"num_tokens": 213661704.0,
"step": 1432
},
{
"epoch": 0.9711962046763809,
"grad_norm": 0.723580284990279,
"learning_rate": 5.962059620596206e-08,
"loss": 0.1524,
"mean_token_accuracy": 0.9498827084898949,
"num_tokens": 213810239.0,
"step": 1433
},
{
"epoch": 0.9718739410369366,
"grad_norm": 0.38236820047757286,
"learning_rate": 5.8265582655826555e-08,
"loss": 0.1647,
"mean_token_accuracy": 0.9473016634583473,
"num_tokens": 213960363.0,
"step": 1434
},
{
"epoch": 0.9725516773974924,
"grad_norm": 0.4881616063054681,
"learning_rate": 5.6910569105691055e-08,
"loss": 0.1548,
"mean_token_accuracy": 0.9498519450426102,
"num_tokens": 214106047.0,
"step": 1435
},
{
"epoch": 0.9732294137580482,
"grad_norm": 0.4358638712210395,
"learning_rate": 5.555555555555555e-08,
"loss": 0.162,
"mean_token_accuracy": 0.9484395757317543,
"num_tokens": 214253705.0,
"step": 1436
},
{
"epoch": 0.9739071501186038,
"grad_norm": 0.4715153035751415,
"learning_rate": 5.4200542005420054e-08,
"loss": 0.1609,
"mean_token_accuracy": 0.9482447728514671,
"num_tokens": 214404671.0,
"step": 1437
},
{
"epoch": 0.9745848864791596,
"grad_norm": 0.46315128761956975,
"learning_rate": 5.2845528455284554e-08,
"loss": 0.159,
"mean_token_accuracy": 0.9491038843989372,
"num_tokens": 214556711.0,
"step": 1438
},
{
"epoch": 0.9752626228397153,
"grad_norm": 0.39816199018947296,
"learning_rate": 5.1490514905149054e-08,
"loss": 0.1618,
"mean_token_accuracy": 0.9482710883021355,
"num_tokens": 214706564.0,
"step": 1439
},
{
"epoch": 0.9759403592002711,
"grad_norm": 0.5013092964160424,
"learning_rate": 5.013550135501355e-08,
"loss": 0.1566,
"mean_token_accuracy": 0.9488778188824654,
"num_tokens": 214852299.0,
"step": 1440
},
{
"epoch": 0.9766180955608268,
"grad_norm": 0.7003737733221979,
"learning_rate": 4.878048780487805e-08,
"loss": 0.158,
"mean_token_accuracy": 0.9493415355682373,
"num_tokens": 214999570.0,
"step": 1441
},
{
"epoch": 0.9772958319213826,
"grad_norm": 0.6975526090391319,
"learning_rate": 4.742547425474255e-08,
"loss": 0.168,
"mean_token_accuracy": 0.945052981376648,
"num_tokens": 215147006.0,
"step": 1442
},
{
"epoch": 0.9779735682819384,
"grad_norm": 0.4318385766579875,
"learning_rate": 4.6070460704607046e-08,
"loss": 0.1657,
"mean_token_accuracy": 0.9462201297283173,
"num_tokens": 215296933.0,
"step": 1443
},
{
"epoch": 0.9786513046424941,
"grad_norm": 0.4686644205335301,
"learning_rate": 4.4715447154471546e-08,
"loss": 0.1552,
"mean_token_accuracy": 0.9505117833614349,
"num_tokens": 215444571.0,
"step": 1444
},
{
"epoch": 0.9793290410030498,
"grad_norm": 0.5516950584174143,
"learning_rate": 4.336043360433604e-08,
"loss": 0.1595,
"mean_token_accuracy": 0.9482467696070671,
"num_tokens": 215590422.0,
"step": 1445
},
{
"epoch": 0.9800067773636055,
"grad_norm": 0.40927146742899334,
"learning_rate": 4.200542005420054e-08,
"loss": 0.1526,
"mean_token_accuracy": 0.9507933631539345,
"num_tokens": 215740373.0,
"step": 1446
},
{
"epoch": 0.9806845137241613,
"grad_norm": 1.3874657143664766,
"learning_rate": 4.065040650406504e-08,
"loss": 0.1619,
"mean_token_accuracy": 0.9482349902391434,
"num_tokens": 215890354.0,
"step": 1447
},
{
"epoch": 0.981362250084717,
"grad_norm": 0.39288518372515746,
"learning_rate": 3.929539295392954e-08,
"loss": 0.1571,
"mean_token_accuracy": 0.9491913393139839,
"num_tokens": 216042293.0,
"step": 1448
},
{
"epoch": 0.9820399864452728,
"grad_norm": 0.4142757769437737,
"learning_rate": 3.794037940379403e-08,
"loss": 0.1616,
"mean_token_accuracy": 0.9484345316886902,
"num_tokens": 216192721.0,
"step": 1449
},
{
"epoch": 0.9827177228058286,
"grad_norm": 0.513825824397314,
"learning_rate": 3.658536585365853e-08,
"loss": 0.1449,
"mean_token_accuracy": 0.9520828351378441,
"num_tokens": 216342654.0,
"step": 1450
},
{
"epoch": 0.9833954591663843,
"grad_norm": 0.4753898397147626,
"learning_rate": 3.523035230352303e-08,
"loss": 0.1616,
"mean_token_accuracy": 0.9478369429707527,
"num_tokens": 216491843.0,
"step": 1451
},
{
"epoch": 0.9840731955269401,
"grad_norm": 0.42824168874566454,
"learning_rate": 3.387533875338753e-08,
"loss": 0.1673,
"mean_token_accuracy": 0.9474748447537422,
"num_tokens": 216637358.0,
"step": 1452
},
{
"epoch": 0.9847509318874957,
"grad_norm": 0.4235882744447348,
"learning_rate": 3.252032520325203e-08,
"loss": 0.1617,
"mean_token_accuracy": 0.9481077715754509,
"num_tokens": 216780615.0,
"step": 1453
},
{
"epoch": 0.9854286682480515,
"grad_norm": 0.42767560637753677,
"learning_rate": 3.116531165311653e-08,
"loss": 0.1645,
"mean_token_accuracy": 0.9475177302956581,
"num_tokens": 216933270.0,
"step": 1454
},
{
"epoch": 0.9861064046086072,
"grad_norm": 0.4693239469006344,
"learning_rate": 2.981029810298103e-08,
"loss": 0.1524,
"mean_token_accuracy": 0.9508577063679695,
"num_tokens": 217083629.0,
"step": 1455
},
{
"epoch": 0.986784140969163,
"grad_norm": 0.383108569963258,
"learning_rate": 2.8455284552845527e-08,
"loss": 0.1606,
"mean_token_accuracy": 0.9483126401901245,
"num_tokens": 217234233.0,
"step": 1456
},
{
"epoch": 0.9874618773297187,
"grad_norm": 1.029164567918242,
"learning_rate": 2.7100271002710027e-08,
"loss": 0.1587,
"mean_token_accuracy": 0.9489754140377045,
"num_tokens": 217383089.0,
"step": 1457
},
{
"epoch": 0.9881396136902745,
"grad_norm": 0.4222264677475407,
"learning_rate": 2.5745257452574527e-08,
"loss": 0.1643,
"mean_token_accuracy": 0.9470364972949028,
"num_tokens": 217529730.0,
"step": 1458
},
{
"epoch": 0.9888173500508303,
"grad_norm": 0.42748278256522887,
"learning_rate": 2.4390243902439023e-08,
"loss": 0.166,
"mean_token_accuracy": 0.9459366276860237,
"num_tokens": 217676151.0,
"step": 1459
},
{
"epoch": 0.989495086411386,
"grad_norm": 0.7811521591588235,
"learning_rate": 2.3035230352303523e-08,
"loss": 0.1608,
"mean_token_accuracy": 0.9484688863158226,
"num_tokens": 217821534.0,
"step": 1460
},
{
"epoch": 0.9901728227719417,
"grad_norm": 0.4365643706529922,
"learning_rate": 2.168021680216802e-08,
"loss": 0.1526,
"mean_token_accuracy": 0.951580099761486,
"num_tokens": 217968730.0,
"step": 1461
},
{
"epoch": 0.9908505591324974,
"grad_norm": 0.41791200977441884,
"learning_rate": 2.032520325203252e-08,
"loss": 0.1663,
"mean_token_accuracy": 0.9464643821120262,
"num_tokens": 218119818.0,
"step": 1462
},
{
"epoch": 0.9915282954930532,
"grad_norm": 0.39231804146546523,
"learning_rate": 1.8970189701897016e-08,
"loss": 0.1593,
"mean_token_accuracy": 0.9486127719283104,
"num_tokens": 218269352.0,
"step": 1463
},
{
"epoch": 0.9922060318536089,
"grad_norm": 0.7878637845281535,
"learning_rate": 1.7615176151761516e-08,
"loss": 0.1546,
"mean_token_accuracy": 0.9497353583574295,
"num_tokens": 218416325.0,
"step": 1464
},
{
"epoch": 0.9928837682141647,
"grad_norm": 0.4620398358770985,
"learning_rate": 1.6260162601626016e-08,
"loss": 0.1674,
"mean_token_accuracy": 0.9468031600117683,
"num_tokens": 218561324.0,
"step": 1465
},
{
"epoch": 0.9935615045747205,
"grad_norm": 0.5178289109201032,
"learning_rate": 1.4905149051490515e-08,
"loss": 0.1596,
"mean_token_accuracy": 0.9484723061323166,
"num_tokens": 218709700.0,
"step": 1466
},
{
"epoch": 0.9942392409352762,
"grad_norm": 0.40435189291017193,
"learning_rate": 1.3550135501355014e-08,
"loss": 0.157,
"mean_token_accuracy": 0.9486423879861832,
"num_tokens": 218852741.0,
"step": 1467
},
{
"epoch": 0.994916977295832,
"grad_norm": 0.5105890387057939,
"learning_rate": 1.2195121951219512e-08,
"loss": 0.1665,
"mean_token_accuracy": 0.9470389634370804,
"num_tokens": 218999345.0,
"step": 1468
},
{
"epoch": 0.9955947136563876,
"grad_norm": 0.5170350599741357,
"learning_rate": 1.084010840108401e-08,
"loss": 0.1533,
"mean_token_accuracy": 0.9501299187541008,
"num_tokens": 219148132.0,
"step": 1469
},
{
"epoch": 0.9962724500169434,
"grad_norm": 0.459774640406125,
"learning_rate": 9.485094850948508e-09,
"loss": 0.1541,
"mean_token_accuracy": 0.949880562722683,
"num_tokens": 219295719.0,
"step": 1470
},
{
"epoch": 0.9969501863774991,
"grad_norm": 0.3986996249321926,
"learning_rate": 8.130081300813008e-09,
"loss": 0.1693,
"mean_token_accuracy": 0.9457027688622475,
"num_tokens": 219443499.0,
"step": 1471
},
{
"epoch": 0.9976279227380549,
"grad_norm": 0.4286006846978446,
"learning_rate": 6.775067750677507e-09,
"loss": 0.1535,
"mean_token_accuracy": 0.9497988075017929,
"num_tokens": 219591042.0,
"step": 1472
},
{
"epoch": 0.9983056590986107,
"grad_norm": 0.5770627540228075,
"learning_rate": 5.420054200542005e-09,
"loss": 0.1596,
"mean_token_accuracy": 0.9486493095755577,
"num_tokens": 219741118.0,
"step": 1473
},
{
"epoch": 0.9989833954591664,
"grad_norm": 0.5074089071295785,
"learning_rate": 4.065040650406504e-09,
"loss": 0.1576,
"mean_token_accuracy": 0.9491940215229988,
"num_tokens": 219889941.0,
"step": 1474
},
{
"epoch": 0.9996611318197222,
"grad_norm": 0.3843164178142288,
"learning_rate": 2.7100271002710025e-09,
"loss": 0.1552,
"mean_token_accuracy": 0.9503316506743431,
"num_tokens": 220044144.0,
"step": 1475
},
{
"epoch": 1.0,
"grad_norm": 1.0722423111346728,
"learning_rate": 1.3550135501355012e-09,
"loss": 0.1607,
"mean_token_accuracy": 0.947552278637886,
"num_tokens": 220100184.0,
"step": 1476
}
],
"logging_steps": 1.0,
"max_steps": 1476,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.436682675001754e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}